Merge tag 'kvm-s390-next-6.9-1' of https://git.kernel.org/pub/scm/linux/kernel/git...

author Paolo Bonzini <pbonzini@redhat.com>

Thu, 14 Mar 2024 18:47:56 +0000 (14:47 -0400)

committer Paolo Bonzini <pbonzini@redhat.com>

Thu, 14 Mar 2024 18:47:56 +0000 (14:47 -0400)
author Paolo Bonzini <pbonzini@redhat.com>
Thu, 14 Mar 2024 18:47:56 +0000 (14:47 -0400)
committer Paolo Bonzini <pbonzini@redhat.com>
Thu, 14 Mar 2024 18:47:56 +0000 (14:47 -0400)
diff --git a/.mailmap b/.mailmap

index 04998f7bda81816b4bb1dd321f80393a80ae9a67..08f28f2999f0dc5d64cb5d04a77c7c3eab78130a 100644 (file)
--- a/.mailmap
+++ b/.mailmap
@@ -191,10 +191,11 @@ Gao Xiang <xiang@kernel.org> <gaoxiang25@huawei.com>
  Gao Xiang <xiang@kernel.org> <hsiangkao@aol.com>
  Gao Xiang <xiang@kernel.org> <hsiangkao@linux.alibaba.com>
  Gao Xiang <xiang@kernel.org> <hsiangkao@redhat.com>
-Geliang Tang <geliang.tang@linux.dev> <geliang.tang@suse.com>
-Geliang Tang <geliang.tang@linux.dev> <geliangtang@xiaomi.com>
-Geliang Tang <geliang.tang@linux.dev> <geliangtang@gmail.com>
-Geliang Tang <geliang.tang@linux.dev> <geliangtang@163.com>
+Geliang Tang <geliang@kernel.org> <geliang.tang@linux.dev>
+Geliang Tang <geliang@kernel.org> <geliang.tang@suse.com>
+Geliang Tang <geliang@kernel.org> <geliangtang@xiaomi.com>
+Geliang Tang <geliang@kernel.org> <geliangtang@gmail.com>
+Geliang Tang <geliang@kernel.org> <geliangtang@163.com>
  Georgi Djakov <djakov@kernel.org> <georgi.djakov@linaro.org>
  Gerald Schaefer <gerald.schaefer@linux.ibm.com> <geraldsc@de.ibm.com>
  Gerald Schaefer <gerald.schaefer@linux.ibm.com> <gerald.schaefer@de.ibm.com>
@@ -289,6 +290,7 @@ Johan Hovold <johan@kernel.org> <johan@hovoldconsulting.com>
  John Crispin <john@phrozen.org> <blogic@openwrt.org>
  John Fastabend <john.fastabend@gmail.com> <john.r.fastabend@intel.com>
  John Keeping <john@keeping.me.uk> <john@metanate.com>
+John Moon <john@jmoon.dev> <quic_johmoo@quicinc.com>
  John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
  John Stultz <johnstul@us.ibm.com>
  <jon.toppins+linux@gmail.com> <jtoppins@cumulusnetworks.com>
@@ -344,6 +346,7 @@ Leonid I Ananiev <leonid.i.ananiev@intel.com>
  Leon Romanovsky <leon@kernel.org> <leon@leon.nu>
  Leon Romanovsky <leon@kernel.org> <leonro@mellanox.com>
  Leon Romanovsky <leon@kernel.org> <leonro@nvidia.com>
+Leo Yan <leo.yan@linux.dev> <leo.yan@linaro.org>
  Liam Mark <quic_lmark@quicinc.com> <lmark@codeaurora.org>
  Linas Vepstas <linas@austin.ibm.com>
  Linus Lüssing <linus.luessing@c0d3.blue> <linus.luessing@ascom.ch>
@@ -550,6 +553,7 @@ Senthilkumar N L <quic_snlakshm@quicinc.com> <snlakshm@codeaurora.org>
  Serge Hallyn <sergeh@kernel.org> <serge.hallyn@canonical.com>
  Serge Hallyn <sergeh@kernel.org> <serue@us.ibm.com>
  Seth Forshee <sforshee@kernel.org> <seth.forshee@canonical.com>
+Shakeel Butt <shakeel.butt@linux.dev> <shakeelb@google.com>
  Shannon Nelson <shannon.nelson@amd.com> <snelson@pensando.io>
  Shannon Nelson <shannon.nelson@amd.com> <shannon.nelson@intel.com>
  Shannon Nelson <shannon.nelson@amd.com> <shannon.nelson@oracle.com>
diff --git a/CREDITS b/CREDITS

index 5797e8f7e92b06f8736c01c6c191815c4802b6fd..df8d6946739f68655a8b077f0ebcc4bf4612944b 100644 (file)
--- a/CREDITS
+++ b/CREDITS
@@ -2161,6 +2161,19 @@ N: Mike Kravetz
  E: mike.kravetz@oracle.com
  D: Maintenance and development of the hugetlb subsystem
  
+N: Seth Jennings
+E: sjenning@redhat.com
+D: Creation and maintenance of zswap
+
+N: Dan Streetman
+E: ddstreet@ieee.org
+D: Maintenance and development of zswap
+D: Creation and maintenance of the zpool API
+
+N: Vitaly Wool
+E: vitaly.wool@konsulko.com
+D: Maintenance and development of zswap
+
  N: Andreas S. Krebs
  E: akrebs@altavista.net
  D: CYPRESS CY82C693 chipset IDE, Digital's PC-Alpha 164SX boards
diff --git a/Documentation/ABI/testing/sysfs-class-net-queues b/Documentation/ABI/testing/sysfs-class-net-queues

index 906ff3ca928ac1389567a5f02bdc4e06c3980b38..5bff64d256c207c8a7d2c915e0e8affac191913c 100644 (file)
--- a/Documentation/ABI/testing/sysfs-class-net-queues
+++ b/Documentation/ABI/testing/sysfs-class-net-queues
@@ -1,4 +1,4 @@
-What:          /sys/class/<iface>/queues/rx-<queue>/rps_cpus
+What:          /sys/class/net/<iface>/queues/rx-<queue>/rps_cpus
  Date:          March 2010
  KernelVersion: 2.6.35
  Contact:       netdev@vger.kernel.org
@@ -8,7 +8,7 @@ Description:
                 network device queue. Possible values depend on the number
                 of available CPU(s) in the system.
  
-What:          /sys/class/<iface>/queues/rx-<queue>/rps_flow_cnt
+What:          /sys/class/net/<iface>/queues/rx-<queue>/rps_flow_cnt
  Date:          April 2010
  KernelVersion: 2.6.35
  Contact:       netdev@vger.kernel.org
@@ -16,7 +16,7 @@ Description:
                 Number of Receive Packet Steering flows being currently
                 processed by this particular network device receive queue.
  
-What:          /sys/class/<iface>/queues/tx-<queue>/tx_timeout
+What:          /sys/class/net/<iface>/queues/tx-<queue>/tx_timeout
  Date:          November 2011
  KernelVersion: 3.3
  Contact:       netdev@vger.kernel.org
@@ -24,7 +24,7 @@ Description:
                 Indicates the number of transmit timeout events seen by this
                 network interface transmit queue.
  
-What:          /sys/class/<iface>/queues/tx-<queue>/tx_maxrate
+What:          /sys/class/net/<iface>/queues/tx-<queue>/tx_maxrate
  Date:          March 2015
  KernelVersion: 4.1
  Contact:       netdev@vger.kernel.org
@@ -32,7 +32,7 @@ Description:
                 A Mbps max-rate set for the queue, a value of zero means disabled,
                 default is disabled.
  
-What:          /sys/class/<iface>/queues/tx-<queue>/xps_cpus
+What:          /sys/class/net/<iface>/queues/tx-<queue>/xps_cpus
  Date:          November 2010
  KernelVersion: 2.6.38
  Contact:       netdev@vger.kernel.org
@@ -42,7 +42,7 @@ Description:
                 network device transmit queue. Possible values depend on the
                 number of available CPU(s) in the system.
  
-What:          /sys/class/<iface>/queues/tx-<queue>/xps_rxqs
+What:          /sys/class/net/<iface>/queues/tx-<queue>/xps_rxqs
  Date:          June 2018
  KernelVersion: 4.18.0
  Contact:       netdev@vger.kernel.org
@@ -53,7 +53,7 @@ Description:
                 number of available receive queue(s) in the network device.
                 Default is disabled.
  
-What:          /sys/class/<iface>/queues/tx-<queue>/byte_queue_limits/hold_time
+What:          /sys/class/net/<iface>/queues/tx-<queue>/byte_queue_limits/hold_time
  Date:          November 2011
  KernelVersion: 3.3
  Contact:       netdev@vger.kernel.org
@@ -62,7 +62,7 @@ Description:
                 of this particular network device transmit queue.
                 Default value is 1000.
  
-What:          /sys/class/<iface>/queues/tx-<queue>/byte_queue_limits/inflight
+What:          /sys/class/net/<iface>/queues/tx-<queue>/byte_queue_limits/inflight
  Date:          November 2011
  KernelVersion: 3.3
  Contact:       netdev@vger.kernel.org
@@ -70,7 +70,7 @@ Description:
                 Indicates the number of bytes (objects) in flight on this
                 network device transmit queue.
  
-What:          /sys/class/<iface>/queues/tx-<queue>/byte_queue_limits/limit
+What:          /sys/class/net/<iface>/queues/tx-<queue>/byte_queue_limits/limit
  Date:          November 2011
  KernelVersion: 3.3
  Contact:       netdev@vger.kernel.org
@@ -79,7 +79,7 @@ Description:
                 on this network device transmit queue. This value is clamped
                 to be within the bounds defined by limit_max and limit_min.
  
-What:          /sys/class/<iface>/queues/tx-<queue>/byte_queue_limits/limit_max
+What:          /sys/class/net/<iface>/queues/tx-<queue>/byte_queue_limits/limit_max
  Date:          November 2011
  KernelVersion: 3.3
  Contact:       netdev@vger.kernel.org
@@ -88,7 +88,7 @@ Description:
                 queued on this network device transmit queue. See
                 include/linux/dynamic_queue_limits.h for the default value.
  
-What:          /sys/class/<iface>/queues/tx-<queue>/byte_queue_limits/limit_min
+What:          /sys/class/net/<iface>/queues/tx-<queue>/byte_queue_limits/limit_min
  Date:          November 2011
  KernelVersion: 3.3
  Contact:       netdev@vger.kernel.org
diff --git a/Documentation/ABI/testing/sysfs-class-net-statistics b/Documentation/ABI/testing/sysfs-class-net-statistics

index 55db27815361b2d9ad511e026bee60a615ae03d5..53e508c6936a515216ad3af96ddfb170f6e50cf7 100644 (file)
--- a/Documentation/ABI/testing/sysfs-class-net-statistics
+++ b/Documentation/ABI/testing/sysfs-class-net-statistics
@@ -1,4 +1,4 @@
-What:          /sys/class/<iface>/statistics/collisions
+What:          /sys/class/net/<iface>/statistics/collisions
  Date:          April 2005
  KernelVersion: 2.6.12
  Contact:       netdev@vger.kernel.org
@@ -6,7 +6,7 @@ Description:
                 Indicates the number of collisions seen by this network device.
                 This value might not be relevant with all MAC layers.
  
-What:          /sys/class/<iface>/statistics/multicast
+What:          /sys/class/net/<iface>/statistics/multicast
  Date:          April 2005
  KernelVersion: 2.6.12
  Contact:       netdev@vger.kernel.org
@@ -14,7 +14,7 @@ Description:
                 Indicates the number of multicast packets received by this
                 network device.
  
-What:          /sys/class/<iface>/statistics/rx_bytes
+What:          /sys/class/net/<iface>/statistics/rx_bytes
  Date:          April 2005
  KernelVersion: 2.6.12
  Contact:       netdev@vger.kernel.org
@@ -23,7 +23,7 @@ Description:
                 See the network driver for the exact meaning of when this
                 value is incremented.
  
-What:          /sys/class/<iface>/statistics/rx_compressed
+What:          /sys/class/net/<iface>/statistics/rx_compressed
  Date:          April 2005
  KernelVersion: 2.6.12
  Contact:       netdev@vger.kernel.org
@@ -32,7 +32,7 @@ Description:
                 network device. This value might only be relevant for interfaces
                 that support packet compression (e.g: PPP).
  
-What:          /sys/class/<iface>/statistics/rx_crc_errors
+What:          /sys/class/net/<iface>/statistics/rx_crc_errors
  Date:          April 2005
  KernelVersion: 2.6.12
  Contact:       netdev@vger.kernel.org
@@ -41,7 +41,7 @@ Description:
                 by this network device. Note that the specific meaning might
                 depend on the MAC layer used by the interface.
  
-What:          /sys/class/<iface>/statistics/rx_dropped
+What:          /sys/class/net/<iface>/statistics/rx_dropped
  Date:          April 2005
  KernelVersion: 2.6.12
  Contact:       netdev@vger.kernel.org
@@ -51,7 +51,7 @@ Description:
                 packet processing. See the network driver for the exact
                 meaning of this value.
  
-What:          /sys/class/<iface>/statistics/rx_errors
+What:          /sys/class/net/<iface>/statistics/rx_errors
  Date:          April 2005
  KernelVersion: 2.6.12
  Contact:       netdev@vger.kernel.org
@@ -59,7 +59,7 @@ Description:
                 Indicates the number of receive errors on this network device.
                 See the network driver for the exact meaning of this value.
  
-What:          /sys/class/<iface>/statistics/rx_fifo_errors
+What:          /sys/class/net/<iface>/statistics/rx_fifo_errors
  Date:          April 2005
  KernelVersion: 2.6.12
  Contact:       netdev@vger.kernel.org
@@ -68,7 +68,7 @@ Description:
                 network device. See the network driver for the exact
                 meaning of this value.
  
-What:          /sys/class/<iface>/statistics/rx_frame_errors
+What:          /sys/class/net/<iface>/statistics/rx_frame_errors
  Date:          April 2005
  KernelVersion: 2.6.12
  Contact:       netdev@vger.kernel.org
@@ -78,7 +78,7 @@ Description:
                 on the MAC layer protocol used. See the network driver for
                 the exact meaning of this value.
  
-What:          /sys/class/<iface>/statistics/rx_length_errors
+What:          /sys/class/net/<iface>/statistics/rx_length_errors
  Date:          April 2005
  KernelVersion: 2.6.12
  Contact:       netdev@vger.kernel.org
@@ -87,7 +87,7 @@ Description:
                 error, oversized or undersized. See the network driver for the
                 exact meaning of this value.
  
-What:          /sys/class/<iface>/statistics/rx_missed_errors
+What:          /sys/class/net/<iface>/statistics/rx_missed_errors
  Date:          April 2005
  KernelVersion: 2.6.12
  Contact:       netdev@vger.kernel.org
@@ -96,7 +96,7 @@ Description:
                 due to lack of capacity in the receive side. See the network
                 driver for the exact meaning of this value.
  
-What:          /sys/class/<iface>/statistics/rx_nohandler
+What:          /sys/class/net/<iface>/statistics/rx_nohandler
  Date:          February 2016
  KernelVersion: 4.6
  Contact:       netdev@vger.kernel.org
@@ -104,7 +104,7 @@ Description:
                 Indicates the number of received packets that were dropped on
                 an inactive device by the network core.
  
-What:          /sys/class/<iface>/statistics/rx_over_errors
+What:          /sys/class/net/<iface>/statistics/rx_over_errors
  Date:          April 2005
  KernelVersion: 2.6.12
  Contact:       netdev@vger.kernel.org
@@ -114,7 +114,7 @@ Description:
                 (e.g: larger than MTU). See the network driver for the exact
                 meaning of this value.
  
-What:          /sys/class/<iface>/statistics/rx_packets
+What:          /sys/class/net/<iface>/statistics/rx_packets
  Date:          April 2005
  KernelVersion: 2.6.12
  Contact:       netdev@vger.kernel.org
@@ -122,7 +122,7 @@ Description:
                 Indicates the total number of good packets received by this
                 network device.
  
-What:          /sys/class/<iface>/statistics/tx_aborted_errors
+What:          /sys/class/net/<iface>/statistics/tx_aborted_errors
  Date:          April 2005
  KernelVersion: 2.6.12
  Contact:       netdev@vger.kernel.org
@@ -132,7 +132,7 @@ Description:
                 a medium collision). See the network driver for the exact
                 meaning of this value.
  
-What:          /sys/class/<iface>/statistics/tx_bytes
+What:          /sys/class/net/<iface>/statistics/tx_bytes
  Date:          April 2005
  KernelVersion: 2.6.12
  Contact:       netdev@vger.kernel.org
@@ -143,7 +143,7 @@ Description:
                 transmitted packets or all packets that have been queued for
                 transmission.
  
-What:          /sys/class/<iface>/statistics/tx_carrier_errors
+What:          /sys/class/net/<iface>/statistics/tx_carrier_errors
  Date:          April 2005
  KernelVersion: 2.6.12
  Contact:       netdev@vger.kernel.org
@@ -152,7 +152,7 @@ Description:
                 because of carrier errors (e.g: physical link down). See the
                 network driver for the exact meaning of this value.
  
-What:          /sys/class/<iface>/statistics/tx_compressed
+What:          /sys/class/net/<iface>/statistics/tx_compressed
  Date:          April 2005
  KernelVersion: 2.6.12
  Contact:       netdev@vger.kernel.org
@@ -161,7 +161,7 @@ Description:
                 this might only be relevant for devices that support
                 compression (e.g: PPP).
  
-What:          /sys/class/<iface>/statistics/tx_dropped
+What:          /sys/class/net/<iface>/statistics/tx_dropped
  Date:          April 2005
  KernelVersion: 2.6.12
  Contact:       netdev@vger.kernel.org
@@ -170,7 +170,7 @@ Description:
                 See the driver for the exact reasons as to why the packets were
                 dropped.
  
-What:          /sys/class/<iface>/statistics/tx_errors
+What:          /sys/class/net/<iface>/statistics/tx_errors
  Date:          April 2005
  KernelVersion: 2.6.12
  Contact:       netdev@vger.kernel.org
@@ -179,7 +179,7 @@ Description:
                 a network device. See the driver for the exact reasons as to
                 why the packets were dropped.
  
-What:          /sys/class/<iface>/statistics/tx_fifo_errors
+What:          /sys/class/net/<iface>/statistics/tx_fifo_errors
  Date:          April 2005
  KernelVersion: 2.6.12
  Contact:       netdev@vger.kernel.org
@@ -188,7 +188,7 @@ Description:
                 FIFO error. See the driver for the exact reasons as to why the
                 packets were dropped.
  
-What:          /sys/class/<iface>/statistics/tx_heartbeat_errors
+What:          /sys/class/net/<iface>/statistics/tx_heartbeat_errors
  Date:          April 2005
  KernelVersion: 2.6.12
  Contact:       netdev@vger.kernel.org
@@ -197,7 +197,7 @@ Description:
                 reported as heartbeat errors. See the driver for the exact
                 reasons as to why the packets were dropped.
  
-What:          /sys/class/<iface>/statistics/tx_packets
+What:          /sys/class/net/<iface>/statistics/tx_packets
  Date:          April 2005
  KernelVersion: 2.6.12
  Contact:       netdev@vger.kernel.org
@@ -206,7 +206,7 @@ Description:
                 device. See the driver for whether this reports the number of all
                 attempted or successful transmissions.
  
-What:          /sys/class/<iface>/statistics/tx_window_errors
+What:          /sys/class/net/<iface>/statistics/tx_window_errors
  Date:          April 2005
  KernelVersion: 2.6.12
  Contact:       netdev@vger.kernel.org
diff --git a/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon b/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon

index 8d7d8f05f6cd0a3d7bb9fa06f0a40730f5bae99b..92fe7c5c5ac1d1d981562d1b441f32a6bafdc1aa 100644 (file)
--- a/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
+++ b/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
@@ -1,4 +1,4 @@
-What:          /sys/devices/.../hwmon/hwmon<i>/in0_input
+What:          /sys/bus/pci/drivers/i915/.../hwmon/hwmon<i>/in0_input
  Date:          February 2023
  KernelVersion: 6.2
  Contact:       intel-gfx@lists.freedesktop.org
@@ -6,7 +6,7 @@ Description:    RO. Current Voltage in millivolt.
  
                 Only supported for particular Intel i915 graphics platforms.
  
-What:          /sys/devices/.../hwmon/hwmon<i>/power1_max
+What:          /sys/bus/pci/drivers/i915/.../hwmon/hwmon<i>/power1_max
  Date:          February 2023
  KernelVersion: 6.2
  Contact:       intel-gfx@lists.freedesktop.org
@@ -20,7 +20,7 @@ Description:  RW. Card reactive sustained  (PL1/Tau) power limit in microwatts.
  
                 Only supported for particular Intel i915 graphics platforms.
  
-What:          /sys/devices/.../hwmon/hwmon<i>/power1_rated_max
+What:          /sys/bus/pci/drivers/i915/.../hwmon/hwmon<i>/power1_rated_max
  Date:          February 2023
  KernelVersion: 6.2
  Contact:       intel-gfx@lists.freedesktop.org
@@ -28,7 +28,7 @@ Description:  RO. Card default power limit (default TDP setting).
  
                 Only supported for particular Intel i915 graphics platforms.
  
-What:          /sys/devices/.../hwmon/hwmon<i>/power1_max_interval
+What:          /sys/bus/pci/drivers/i915/.../hwmon/hwmon<i>/power1_max_interval
  Date:          February 2023
  KernelVersion: 6.2
  Contact:       intel-gfx@lists.freedesktop.org
@@ -37,7 +37,7 @@ Description:  RW. Sustained power limit interval (Tau in PL1/Tau) in
  
                 Only supported for particular Intel i915 graphics platforms.
  
-What:          /sys/devices/.../hwmon/hwmon<i>/power1_crit
+What:          /sys/bus/pci/drivers/i915/.../hwmon/hwmon<i>/power1_crit
  Date:          February 2023
  KernelVersion: 6.2
  Contact:       intel-gfx@lists.freedesktop.org
@@ -50,7 +50,7 @@ Description:  RW. Card reactive critical (I1) power limit in microwatts.
  
                 Only supported for particular Intel i915 graphics platforms.
  
-What:          /sys/devices/.../hwmon/hwmon<i>/curr1_crit
+What:          /sys/bus/pci/drivers/i915/.../hwmon/hwmon<i>/curr1_crit
  Date:          February 2023
  KernelVersion: 6.2
  Contact:       intel-gfx@lists.freedesktop.org
@@ -63,7 +63,7 @@ Description:  RW. Card reactive critical (I1) power limit in milliamperes.
  
                 Only supported for particular Intel i915 graphics platforms.
  
-What:          /sys/devices/.../hwmon/hwmon<i>/energy1_input
+What:          /sys/bus/pci/drivers/i915/.../hwmon/hwmon<i>/energy1_input
  Date:          February 2023
  KernelVersion: 6.2
  Contact:       intel-gfx@lists.freedesktop.org
diff --git a/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon b/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon

index 8c321bc9dc04401e5b25fb2e4c2e509f0d2eba14..023fd82de3f70a61fb9c58c973690bc0fff38e12 100644 (file)
--- a/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon
+++ b/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon
@@ -1,4 +1,4 @@
-What:          /sys/devices/.../hwmon/hwmon<i>/power1_max
+What:          /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/power1_max
  Date:          September 2023
  KernelVersion: 6.5
  Contact:       intel-xe@lists.freedesktop.org
@@ -12,7 +12,7 @@ Description:  RW. Card reactive sustained  (PL1) power limit in microwatts.
  
                 Only supported for particular Intel xe graphics platforms.
  
-What:          /sys/devices/.../hwmon/hwmon<i>/power1_rated_max
+What:          /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/power1_rated_max
  Date:          September 2023
  KernelVersion: 6.5
  Contact:       intel-xe@lists.freedesktop.org
@@ -20,7 +20,7 @@ Description:  RO. Card default power limit (default TDP setting).
  
                 Only supported for particular Intel xe graphics platforms.
  
-What:          /sys/devices/.../hwmon/hwmon<i>/power1_crit
+What:          /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/power1_crit
  Date:          September 2023
  KernelVersion: 6.5
  Contact:       intel-xe@lists.freedesktop.org
@@ -33,7 +33,7 @@ Description:  RW. Card reactive critical (I1) power limit in microwatts.
  
                 Only supported for particular Intel xe graphics platforms.
  
-What:          /sys/devices/.../hwmon/hwmon<i>/curr1_crit
+What:          /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/curr1_crit
  Date:          September 2023
  KernelVersion: 6.5
  Contact:       intel-xe@lists.freedesktop.org
@@ -44,7 +44,7 @@ Description:  RW. Card reactive critical (I1) power limit in milliamperes.
                 the operating frequency if the power averaged over a window
                 exceeds this limit.
  
-What:          /sys/devices/.../hwmon/hwmon<i>/in0_input
+What:          /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/in0_input
  Date:          September 2023
  KernelVersion: 6.5
  Contact:       intel-xe@lists.freedesktop.org
@@ -52,7 +52,7 @@ Description:  RO. Current Voltage in millivolt.
  
                 Only supported for particular Intel xe graphics platforms.
  
-What:          /sys/devices/.../hwmon/hwmon<i>/energy1_input
+What:          /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/energy1_input
  Date:          September 2023
  KernelVersion: 6.5
  Contact:       intel-xe@lists.freedesktop.org
@@ -60,7 +60,7 @@ Description:  RO. Energy input of device in microjoules.
  
                 Only supported for particular Intel xe graphics platforms.
  
-What:          /sys/devices/.../hwmon/hwmon<i>/power1_max_interval
+What:          /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/power1_max_interval
  Date:          October 2023
  KernelVersion: 6.6
  Contact:       intel-xe@lists.freedesktop.org
diff --git a/Documentation/ABI/testing/sysfs-nvmem-cells b/Documentation/ABI/testing/sysfs-nvmem-cells

index 7af70adf3690e3b0b3a3c148a087ecb3f788d54a..c7c9444f92a880ff3f9971fc52f71a0e2756d5f1 100644 (file)
--- a/Documentation/ABI/testing/sysfs-nvmem-cells
+++ b/Documentation/ABI/testing/sysfs-nvmem-cells
@@ -4,18 +4,18 @@ KernelVersion:        6.5
  Contact:       Miquel Raynal <miquel.raynal@bootlin.com>
  Description:
                 The "cells" folder contains one file per cell exposed by the
-               NVMEM device. The name of the file is: <name>@<where>, with
-               <name> being the cell name and <where> its location in the NVMEM
-               device, in hexadecimal (without the '0x' prefix, to mimic device
-               tree node names). The length of the file is the size of the cell
-               (when known). The content of the file is the binary content of
-               the cell (may sometimes be ASCII, likely without trailing
-               character).
+               NVMEM device. The name of the file is: "<name>@<byte>,<bit>",
+               with <name> being the cell name and <where> its location in
+               the NVMEM device, in hexadecimal bytes and bits (without the
+               '0x' prefix, to mimic device tree node names). The length of
+               the file is the size of the cell (when known). The content of
+               the file is the binary content of the cell (may sometimes be
+               ASCII, likely without trailing character).
                 Note: This file is only present if CONFIG_NVMEM_SYSFS
                 is enabled.
  
                 Example::
  
-                 hexdump -C /sys/bus/nvmem/devices/1-00563/cells/product-name@d
+                 hexdump -C /sys/bus/nvmem/devices/1-00563/cells/product-name@d,0
                   00000000  54 4e 34 38 4d 2d 50 2d  44 4e         |TN48M-P-DN|
                   0000000a
diff --git a/Documentation/arch/arm64/silicon-errata.rst b/Documentation/arch/arm64/silicon-errata.rst

index e8c2ce1f9df68df5976b7cc536d3f48c0501ba4b..45a7f4932fe07f295cd452313bfcb64c59809218 100644 (file)
--- a/Documentation/arch/arm64/silicon-errata.rst
+++ b/Documentation/arch/arm64/silicon-errata.rst
@@ -243,3 +243,10 @@ stable kernels.
  +----------------+-----------------+-----------------+-----------------------------+
  | ASR            | ASR8601         | #8601001        | N/A                         |
  +----------------+-----------------+-----------------+-----------------------------+
++----------------+-----------------+-----------------+-----------------------------+
+| Microsoft      | Azure Cobalt 100| #2139208        | ARM64_ERRATUM_2139208       |
++----------------+-----------------+-----------------+-----------------------------+
+| Microsoft      | Azure Cobalt 100| #2067961        | ARM64_ERRATUM_2067961       |
++----------------+-----------------+-----------------+-----------------------------+
+| Microsoft      | Azure Cobalt 100| #2253138        | ARM64_ERRATUM_2253138       |
++----------------+-----------------+-----------------+-----------------------------+
diff --git a/Documentation/arch/x86/mds.rst b/Documentation/arch/x86/mds.rst

index e73fdff62c0aa10a0d6de89e6a62f8b2185920a7..c58c72362911cd0a10be8e96eba4cb9940d3b576 100644 (file)
--- a/Documentation/arch/x86/mds.rst
+++ b/Documentation/arch/x86/mds.rst
@@ -95,6 +95,9 @@ The kernel provides a function to invoke the buffer clearing:
  
      mds_clear_cpu_buffers()
  
+Also macro CLEAR_CPU_BUFFERS can be used in ASM late in exit-to-user path.
+Other than CFLAGS.ZF, this macro doesn't clobber any registers.
+
  The mitigation is invoked on kernel/userspace, hypervisor/guest and C-state
  (idle) transitions.
  
@@ -138,17 +141,30 @@ Mitigation points
  
     When transitioning from kernel to user space the CPU buffers are flushed
     on affected CPUs when the mitigation is not disabled on the kernel
-   command line. The migitation is enabled through the static key
-   mds_user_clear.
-
-   The mitigation is invoked in prepare_exit_to_usermode() which covers
-   all but one of the kernel to user space transitions.  The exception
-   is when we return from a Non Maskable Interrupt (NMI), which is
-   handled directly in do_nmi().
-
-   (The reason that NMI is special is that prepare_exit_to_usermode() can
-    enable IRQs.  In NMI context, NMIs are blocked, and we don't want to
-    enable IRQs with NMIs blocked.)
+   command line. The mitigation is enabled through the feature flag
+   X86_FEATURE_CLEAR_CPU_BUF.
+
+   The mitigation is invoked just before transitioning to userspace after
+   user registers are restored. This is done to minimize the window in
+   which kernel data could be accessed after VERW e.g. via an NMI after
+   VERW.
+
+   **Corner case not handled**
+   Interrupts returning to kernel don't clear CPUs buffers since the
+   exit-to-user path is expected to do that anyways. But, there could be
+   a case when an NMI is generated in kernel after the exit-to-user path
+   has cleared the buffers. This case is not handled and NMI returning to
+   kernel don't clear CPU buffers because:
+
+   1. It is rare to get an NMI after VERW, but before returning to userspace.
+   2. For an unprivileged user, there is no known way to make that NMI
+      less rare or target it.
+   3. It would take a large number of these precisely-timed NMIs to mount
+      an actual attack.  There's presumably not enough bandwidth.
+   4. The NMI in question occurs after a VERW, i.e. when user state is
+      restored and most interesting data is already scrubbed. Whats left
+      is only the data that NMI touches, and that may or may not be of
+      any interest.
  
  
  2. C-State transition
diff --git a/Documentation/conf.py b/Documentation/conf.py

index 5830b01c56429d38f18e12778ebce543605b3296..da64c9fb7e072378c53423b1f7b575ef124b6834 100644 (file)
--- a/Documentation/conf.py
+++ b/Documentation/conf.py
@@ -388,6 +388,12 @@ latex_elements = {
          verbatimhintsturnover=false,
      ''',
  
+    #
+    # Some of our authors are fond of deep nesting; tell latex to
+    # cope.
+    #
+    'maxlistdepth': '10',
+
      # For CJK One-half spacing, need to be in front of hyperref
      'extrapackages': r'\usepackage{setspace}',
  
diff --git a/Documentation/dev-tools/kunit/usage.rst b/Documentation/dev-tools/kunit/usage.rst

index a9efab50eed83e06a89549aeb1fb4da1b2eba1d9..22955d56b3799bfc3f3b92874b638aa24c1edaa6 100644 (file)
--- a/Documentation/dev-tools/kunit/usage.rst
+++ b/Documentation/dev-tools/kunit/usage.rst
@@ -671,8 +671,23 @@ Testing Static Functions
  ------------------------
  
  If we do not want to expose functions or variables for testing, one option is to
-conditionally ``#include`` the test file at the end of your .c file. For
-example:
+conditionally export the used symbol. For example:
+
+.. code-block:: c
+
+       /* In my_file.c */
+
+       VISIBLE_IF_KUNIT int do_interesting_thing();
+       EXPORT_SYMBOL_IF_KUNIT(do_interesting_thing);
+
+       /* In my_file.h */
+
+       #if IS_ENABLED(CONFIG_KUNIT)
+               int do_interesting_thing(void);
+       #endif
+
+Alternatively, you could conditionally ``#include`` the test file at the end of
+your .c file. For example:
  
  .. code-block:: c
  
diff --git a/Documentation/devicetree/bindings/Makefile b/Documentation/devicetree/bindings/Makefile

index 2323fd5b7cdae1ebe440275d8f67649354a6f448..129cf698fa8a66fd2be5111074319da545f4cc98 100644 (file)
--- a/Documentation/devicetree/bindings/Makefile
+++ b/Documentation/devicetree/bindings/Makefile
@@ -28,7 +28,10 @@ $(obj)/%.example.dts: $(src)/%.yaml check_dtschema_version FORCE
  find_all_cmd = find $(srctree)/$(src) \( -name '*.yaml' ! \
                 -name 'processed-schema*' \)
  
-find_cmd = $(find_all_cmd) | sed 's|^$(srctree)/$(src)/||' | grep -F -e "$(subst :," -e ",$(DT_SCHEMA_FILES))" | sed 's|^|$(srctree)/$(src)/|'
+find_cmd = $(find_all_cmd) | \
+               sed 's|^$(srctree)/||' | \
+               grep -F -e "$(subst :," -e ",$(DT_SCHEMA_FILES))" | \
+               sed 's|^|$(srctree)/|'
  CHK_DT_DOCS := $(shell $(find_cmd))
  
  quiet_cmd_yamllint = LINT    $(src)
diff --git a/Documentation/devicetree/bindings/ata/ceva,ahci-1v84.yaml b/Documentation/devicetree/bindings/ata/ceva,ahci-1v84.yaml

index b29ce598f9aaea327bcd177dc6bf143ee8693ebf..9952e0ef77674c11d115dab50a904841410e148a 100644 (file)
--- a/Documentation/devicetree/bindings/ata/ceva,ahci-1v84.yaml
+++ b/Documentation/devicetree/bindings/ata/ceva,ahci-1v84.yaml
@@ -7,7 +7,8 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
  title: Ceva AHCI SATA Controller
  
  maintainers:
-  - Piyush Mehta <piyush.mehta@amd.com>
+  - Mubin Sayyed <mubin.sayyed@amd.com>
+  - Radhey Shyam Pandey <radhey.shyam.pandey@amd.com>
  
  description: |
    The Ceva SATA controller mostly conforms to the AHCI interface with some
diff --git a/Documentation/devicetree/bindings/clock/google,gs101-clock.yaml b/Documentation/devicetree/bindings/clock/google,gs101-clock.yaml

index 3eebc03a309be24fca83f928ffeab18fed09b13b..ca7fdada3ff2487c3c678bc3aa8b40381d04d12e 100644 (file)
--- a/Documentation/devicetree/bindings/clock/google,gs101-clock.yaml
+++ b/Documentation/devicetree/bindings/clock/google,gs101-clock.yaml
@@ -85,8 +85,8 @@ allOf:
  
          clock-names:
            items:
-            - const: dout_cmu_misc_bus
-            - const: dout_cmu_misc_sss
+            - const: bus
+            - const: sss
  
  additionalProperties: false
  
diff --git a/Documentation/devicetree/bindings/display/bridge/nxp,tda998x.yaml b/Documentation/devicetree/bindings/display/bridge/nxp,tda998x.yaml

index 21d995f29a1e3068be328506cf01d8f0f5d3d383..b8e9cf6ce4e61145bb6a30d90396b982449b2f08 100644 (file)
--- a/Documentation/devicetree/bindings/display/bridge/nxp,tda998x.yaml
+++ b/Documentation/devicetree/bindings/display/bridge/nxp,tda998x.yaml
@@ -29,19 +29,22 @@ properties:
  
    audio-ports:
      description:
-      Array of 8-bit values, 2 values per DAI (Documentation/sound/soc/dai.rst).
+      Array of 2 values per DAI (Documentation/sound/soc/dai.rst).
        The implementation allows one or two DAIs.
        If two DAIs are defined, they must be of different type.
      $ref: /schemas/types.yaml#/definitions/uint32-matrix
+    minItems: 1
+    maxItems: 2
      items:
-      minItems: 1
        items:
          - description: |
              The first value defines the DAI type: TDA998x_SPDIF or TDA998x_I2S
              (see include/dt-bindings/display/tda998x.h).
+          enum: [ 1, 2 ]
          - description:
              The second value defines the tda998x AP_ENA reg content when the
              DAI in question is used.
+          maximum: 0xff
  
    '#sound-dai-cells':
      enum: [ 0, 1 ]
diff --git a/Documentation/devicetree/bindings/gpio/xlnx,zynqmp-gpio-modepin.yaml b/Documentation/devicetree/bindings/gpio/xlnx,zynqmp-gpio-modepin.yaml

index b1fd632718d49659483fd7e6773377adeb66d938..bb93baa888794b83d1613cecca79a383b528914a 100644 (file)
--- a/Documentation/devicetree/bindings/gpio/xlnx,zynqmp-gpio-modepin.yaml
+++ b/Documentation/devicetree/bindings/gpio/xlnx,zynqmp-gpio-modepin.yaml
@@ -12,7 +12,8 @@ description:
    PS_MODE). Every pin can be configured as input/output.
  
  maintainers:
-  - Piyush Mehta <piyush.mehta@amd.com>
+  - Mubin Sayyed <mubin.sayyed@amd.com>
+  - Radhey Shyam Pandey <radhey.shyam.pandey@amd.com>
  
  properties:
    compatible:
diff --git a/Documentation/devicetree/bindings/net/marvell,prestera.yaml b/Documentation/devicetree/bindings/net/marvell,prestera.yaml

index 5ea8b73663a50c3f55999fb8cc911af491d46086..16ff892f7bbd0aa8f965d602e5ccbd3b18ec9253 100644 (file)
--- a/Documentation/devicetree/bindings/net/marvell,prestera.yaml
+++ b/Documentation/devicetree/bindings/net/marvell,prestera.yaml
@@ -78,8 +78,8 @@ examples:
      pcie@0 {
          #address-cells = <3>;
          #size-cells = <2>;
-        ranges = <0x0 0x0 0x0 0x0 0x0 0x0>;
-        reg = <0x0 0x0 0x0 0x0 0x0 0x0>;
+        ranges = <0x02000000 0x0 0x100000 0x10000000 0x0 0x0>;
+        reg = <0x0 0x1000>;
          device_type = "pci";
  
          switch@0,0 {
diff --git a/Documentation/devicetree/bindings/reset/xlnx,zynqmp-reset.yaml b/Documentation/devicetree/bindings/reset/xlnx,zynqmp-reset.yaml

index 49db668014297040f85b628137769951663992ed..1f1b42dde94d5086020f0a89d183eafa1ea17589 100644 (file)
--- a/Documentation/devicetree/bindings/reset/xlnx,zynqmp-reset.yaml
+++ b/Documentation/devicetree/bindings/reset/xlnx,zynqmp-reset.yaml
@@ -7,7 +7,8 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
  title: Zynq UltraScale+ MPSoC and Versal reset
  
  maintainers:
-  - Piyush Mehta <piyush.mehta@amd.com>
+  - Mubin Sayyed <mubin.sayyed@amd.com>
+  - Radhey Shyam Pandey <radhey.shyam.pandey@amd.com>
  
  description: |
    The Zynq UltraScale+ MPSoC and Versal has several different resets.
diff --git a/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-spdif.yaml b/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-spdif.yaml

index 8108c564dd78a84a1d869a60b975dcb51e6480ff..aa32dc950e72ccdaf7fb1ac7f759d57a855fc9b6 100644 (file)
--- a/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-spdif.yaml
+++ b/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-spdif.yaml
@@ -22,6 +22,7 @@ properties:
        - const: allwinner,sun6i-a31-spdif
        - const: allwinner,sun8i-h3-spdif
        - const: allwinner,sun50i-h6-spdif
+      - const: allwinner,sun50i-h616-spdif
        - items:
            - const: allwinner,sun8i-a83t-spdif
            - const: allwinner,sun8i-h3-spdif
@@ -62,6 +63,8 @@ allOf:
              enum:
                - allwinner,sun6i-a31-spdif
                - allwinner,sun8i-h3-spdif
+              - allwinner,sun50i-h6-spdif
+              - allwinner,sun50i-h616-spdif
  
      then:
        required:
@@ -73,7 +76,7 @@ allOf:
            contains:
              enum:
                - allwinner,sun8i-h3-spdif
-              - allwinner,sun50i-h6-spdif
+              - allwinner,sun50i-h616-spdif
  
      then:
        properties:
diff --git a/Documentation/devicetree/bindings/sound/google,sc7280-herobrine.yaml b/Documentation/devicetree/bindings/sound/google,sc7280-herobrine.yaml

index ec4b6e547ca6efad4b77697c567e30da74707261..cdcd7c6f21eb241663c44fd8d066dc1700a8994b 100644 (file)
--- a/Documentation/devicetree/bindings/sound/google,sc7280-herobrine.yaml
+++ b/Documentation/devicetree/bindings/sound/google,sc7280-herobrine.yaml
@@ -7,7 +7,6 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
  title: Google SC7280-Herobrine ASoC sound card driver
  
  maintainers:
-  - Srinivasa Rao Mandadapu <srivasam@codeaurora.org>
    - Judy Hsiao <judyhsiao@chromium.org>
  
  description:
diff --git a/Documentation/devicetree/bindings/tpm/tpm-common.yaml b/Documentation/devicetree/bindings/tpm/tpm-common.yaml

index 90390624a8be5e7abc0b18374f19705db999a97d..3c1241b2a43f99d361e0af89aed61f4318c9b914 100644 (file)
--- a/Documentation/devicetree/bindings/tpm/tpm-common.yaml
+++ b/Documentation/devicetree/bindings/tpm/tpm-common.yaml
@@ -42,7 +42,7 @@ properties:
  
    resets:
      description: Reset controller to reset the TPM
-    $ref: /schemas/types.yaml#/definitions/phandle
+    maxItems: 1
  
    reset-gpios:
      description: Output GPIO pin to reset the TPM
diff --git a/Documentation/devicetree/bindings/ufs/samsung,exynos-ufs.yaml b/Documentation/devicetree/bindings/ufs/samsung,exynos-ufs.yaml

index 88cc1e3a0c887c367c7ed83ff2a0835398a20b93..b2b509b3944d85714316c8f91b042054373416a4 100644 (file)
--- a/Documentation/devicetree/bindings/ufs/samsung,exynos-ufs.yaml
+++ b/Documentation/devicetree/bindings/ufs/samsung,exynos-ufs.yaml
@@ -55,9 +55,12 @@ properties:
  
    samsung,sysreg:
      $ref: /schemas/types.yaml#/definitions/phandle-array
-    description: Should be phandle/offset pair. The phandle to the syscon node
-                 which indicates the FSYSx sysreg interface and the offset of
-                 the control register for UFS io coherency setting.
+    items:
+      - items:
+          - description: phandle to FSYSx sysreg node
+          - description: offset of the control register for UFS io coherency setting
+    description:
+      Phandle and offset to the FSYSx sysreg for UFS io coherency setting.
  
    dma-coherent: true
  
diff --git a/Documentation/devicetree/bindings/usb/dwc3-xilinx.yaml b/Documentation/devicetree/bindings/usb/dwc3-xilinx.yaml

index bb373eb025a5f92b085d62b21354d94eaa002e65..00f87a558c7dd3b8af7392f87448ac8a00fbcd95 100644 (file)
--- a/Documentation/devicetree/bindings/usb/dwc3-xilinx.yaml
+++ b/Documentation/devicetree/bindings/usb/dwc3-xilinx.yaml
@@ -7,7 +7,8 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
  title: Xilinx SuperSpeed DWC3 USB SoC controller
  
  maintainers:
-  - Piyush Mehta <piyush.mehta@amd.com>
+  - Mubin Sayyed <mubin.sayyed@amd.com>
+  - Radhey Shyam Pandey <radhey.shyam.pandey@amd.com>
  
  properties:
    compatible:
diff --git a/Documentation/devicetree/bindings/usb/microchip,usb5744.yaml b/Documentation/devicetree/bindings/usb/microchip,usb5744.yaml

index 6d4cfd943f5847ff43cbccd13e5f210a95448c1c..445183d9d6db1adaa1ab9d04cb4271eadbe22ffc 100644 (file)
--- a/Documentation/devicetree/bindings/usb/microchip,usb5744.yaml
+++ b/Documentation/devicetree/bindings/usb/microchip,usb5744.yaml
@@ -16,8 +16,9 @@ description:
    USB 2.0 traffic.
  
  maintainers:
-  - Piyush Mehta <piyush.mehta@amd.com>
    - Michal Simek <michal.simek@amd.com>
+  - Mubin Sayyed <mubin.sayyed@amd.com>
+  - Radhey Shyam Pandey <radhey.shyam.pandey@amd.com>
  
  properties:
    compatible:
diff --git a/Documentation/devicetree/bindings/usb/xlnx,usb2.yaml b/Documentation/devicetree/bindings/usb/xlnx,usb2.yaml

index 868dffe314bcba9123a4e99b9966de738b0ea8f3..a7f75fe366652bb2dcec6bf6e87c5879d31f1fce 100644 (file)
--- a/Documentation/devicetree/bindings/usb/xlnx,usb2.yaml
+++ b/Documentation/devicetree/bindings/usb/xlnx,usb2.yaml
@@ -7,7 +7,8 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
  title: Xilinx udc controller
  
  maintainers:
-  - Piyush Mehta <piyush.mehta@amd.com>
+  - Mubin Sayyed <mubin.sayyed@amd.com>
+  - Radhey Shyam Pandey <radhey.shyam.pandey@amd.com>
  
  properties:
    compatible:
diff --git a/Documentation/kbuild/Kconfig.recursion-issue-01 b/Documentation/kbuild/Kconfig.recursion-issue-01

index e8877db0461fb45ed939bd859df9d7bc0ef7077e..ac49836d8ecf8909ef75e8e04fbab45d89d1f888 100644 (file)
--- a/Documentation/kbuild/Kconfig.recursion-issue-01
+++ b/Documentation/kbuild/Kconfig.recursion-issue-01
@@ -16,13 +16,13 @@
  # that are possible for CORE. So for example if CORE_BELL_A_ADVANCED is 'y',
  # CORE must be 'y' too.
  #
-#  * What influences CORE_BELL_A_ADVANCED ?
+#  * What influences CORE_BELL_A_ADVANCED?
  #
  # As the name implies CORE_BELL_A_ADVANCED is an advanced feature of
  # CORE_BELL_A so naturally it depends on CORE_BELL_A. So if CORE_BELL_A is 'y'
  # we know CORE_BELL_A_ADVANCED can be 'y' too.
  #
-#   * What influences CORE_BELL_A ?
+#   * What influences CORE_BELL_A?
  #
  # CORE_BELL_A depends on CORE, so CORE influences CORE_BELL_A.
  #
@@ -34,7 +34,7 @@
  # the "recursive dependency detected" error.
  #
  # Reading the Documentation/kbuild/Kconfig.recursion-issue-01 file it may be
-# obvious that an easy to solution to this problem should just be the removal
+# obvious that an easy solution to this problem should just be the removal
  # of the "select CORE" from CORE_BELL_A_ADVANCED as that is implicit already
  # since CORE_BELL_A depends on CORE. Recursive dependency issues are not always
  # so trivial to resolve, we provide another example below of practical
diff --git a/Documentation/netlink/specs/dpll.yaml b/Documentation/netlink/specs/dpll.yaml

index b14aed18065f43ce24e9217eefd455814c953efc..3dcc9ece272aad6842a6297c6d5bf2cca2c2acc3 100644 (file)
--- a/Documentation/netlink/specs/dpll.yaml
+++ b/Documentation/netlink/specs/dpll.yaml
@@ -384,8 +384,6 @@ operations:
              - type
  
        dump:
-        pre: dpll-lock-dumpit
-        post: dpll-unlock-dumpit
          reply: *dev-attrs
  
      -
@@ -473,8 +471,6 @@ operations:
              - fractional-frequency-offset
  
        dump:
-        pre: dpll-lock-dumpit
-        post: dpll-unlock-dumpit
          request:
            attributes:
              - id
diff --git a/Documentation/netlink/specs/rt_link.yaml b/Documentation/netlink/specs/rt_link.yaml

index 1ad01d52a8638dcf6ee8a1c6c3d58698abd0d8e4..8e4d19adee8cd17eae831db73692c9237b5e0ad1 100644 (file)
--- a/Documentation/netlink/specs/rt_link.yaml
+++ b/Documentation/netlink/specs/rt_link.yaml
@@ -942,6 +942,10 @@ attribute-sets:
        -
          name: gro-ipv4-max-size
          type: u32
+      -
+        name: dpll-pin
+        type: nest
+        nested-attributes: link-dpll-pin-attrs
    -
      name: af-spec-attrs
      attributes:
@@ -1627,6 +1631,12 @@ attribute-sets:
        -
          name: used
          type: u8
+  -
+    name: link-dpll-pin-attrs
+    attributes:
+      -
+        name: id
+        type: u32
  
  sub-messages:
    -
diff --git a/Documentation/networking/devlink/devlink-port.rst b/Documentation/networking/devlink/devlink-port.rst

index e33ad2401ad70c8a678fec93ebfda3ca4909f9db..562f46b41274493c77176a1e563269fc9e4b2b85 100644 (file)
--- a/Documentation/networking/devlink/devlink-port.rst
+++ b/Documentation/networking/devlink/devlink-port.rst
@@ -126,7 +126,7 @@ Users may also set the RoCE capability of the function using
  `devlink port function set roce` command.
  
  Users may also set the function as migratable using
-'devlink port function set migratable' command.
+`devlink port function set migratable` command.
  
  Users may also set the IPsec crypto capability of the function using
  `devlink port function set ipsec_crypto` command.
diff --git a/Documentation/networking/net_cachelines/inet_sock.rst b/Documentation/networking/net_cachelines/inet_sock.rst

index a2babd0d7954e6729ed8533518dbef039f5fdeac..595d7ef5fc8b090788e7a3439843c060951d1098 100644 (file)
--- a/Documentation/networking/net_cachelines/inet_sock.rst
+++ b/Documentation/networking/net_cachelines/inet_sock.rst
@@ -1,9 +1,9 @@
  .. SPDX-License-Identifier: GPL-2.0
  .. Copyright (C) 2023 Google LLC
  
-=====================================================
-inet_connection_sock struct fast path usage breakdown
-=====================================================
+==========================================
+inet_sock struct fast path usage breakdown
+==========================================
  
  Type                    Name                  fastpath_tx_access  fastpath_rx_access  comment
  ..struct                ..inet_sock                                                     
diff --git a/Documentation/networking/net_cachelines/net_device.rst b/Documentation/networking/net_cachelines/net_device.rst

index e75a53593bb9606f1c0595d8f7227881ec932b9c..dceb49d56a91158232543e920c7ed23bed74106e 100644 (file)
--- a/Documentation/networking/net_cachelines/net_device.rst
+++ b/Documentation/networking/net_cachelines/net_device.rst
@@ -136,8 +136,8 @@ struct_netpoll_info*                npinfo                  -
  possible_net_t                      nd_net                  -                   read_mostly         (dev_net)napi_busy_loop,tcp_v(4/6)_rcv,ip(v6)_rcv,ip(6)_input,ip(6)_input_finish
  void*                               ml_priv                                                         
  enum_netdev_ml_priv_type            ml_priv_type                                                    
-struct_pcpu_lstats__percpu*         lstats                                                          
-struct_pcpu_sw_netstats__percpu*    tstats                                                          
+struct_pcpu_lstats__percpu*         lstats                  read_mostly                             dev_lstats_add()
+struct_pcpu_sw_netstats__percpu*    tstats                  read_mostly                             dev_sw_netstats_tx_add()
  struct_pcpu_dstats__percpu*         dstats                                                          
  struct_garp_port*                   garp_port                                                       
  struct_mrp_port*                    mrp_port                                                        
diff --git a/Documentation/networking/net_cachelines/tcp_sock.rst b/Documentation/networking/net_cachelines/tcp_sock.rst

index 97d7a5c8e01c02658c7f445ed92a2d1f7cc61d31..1c154cbd18487e385c8ae7a1e39d3b5f5ab086a2 100644 (file)
--- a/Documentation/networking/net_cachelines/tcp_sock.rst
+++ b/Documentation/networking/net_cachelines/tcp_sock.rst
@@ -38,13 +38,13 @@ u32                           max_window              read_mostly         -
  u32                           mss_cache               read_mostly         read_mostly         tcp_rate_check_app_limited,tcp_current_mss,tcp_sync_mss,tcp_sndbuf_expand,tcp_tso_should_defer(tx);tcp_update_pacing_rate,tcp_clean_rtx_queue(rx)
  u32                           window_clamp            read_mostly         read_write          tcp_rcv_space_adjust,__tcp_select_window
  u32                           rcv_ssthresh            read_mostly         -                   __tcp_select_window
-u82                           scaling_ratio                                                   
+u8                            scaling_ratio           read_mostly         read_mostly         tcp_win_from_space
  struct                        tcp_rack                                                        
  u16                           advmss                  -                   read_mostly         tcp_rcv_space_adjust
  u8                            compressed_ack                                                  
  u8:2                          dup_ack_counter                                                 
  u8:1                          tlp_retrans                                                     
-u8:1                          tcp_usec_ts                                                     
+u8:1                          tcp_usec_ts             read_mostly         read_mostly
  u32                           chrono_start            read_write          -                   tcp_chrono_start/stop(tcp_write_xmit,tcp_cwnd_validate,tcp_send_syn_data)
  u32[3]                        chrono_stat             read_write          -                   tcp_chrono_start/stop(tcp_write_xmit,tcp_cwnd_validate,tcp_send_syn_data)
  u8:2                          chrono_type             read_write          -                   tcp_chrono_start/stop(tcp_write_xmit,tcp_cwnd_validate,tcp_send_syn_data)
diff --git a/Documentation/process/cve.rst b/Documentation/process/cve.rst

new file mode 100644 (file)

index 0000000..5e2753e
--- /dev/null
+++ b/Documentation/process/cve.rst
@@ -0,0 +1,121 @@
+====
+CVEs
+====
+
+Common Vulnerabilities and Exposure (CVE®) numbers were developed as an
+unambiguous way to identify, define, and catalog publicly disclosed
+security vulnerabilities.  Over time, their usefulness has declined with
+regards to the kernel project, and CVE numbers were very often assigned
+in inappropriate ways and for inappropriate reasons.  Because of this,
+the kernel development community has tended to avoid them.  However, the
+combination of continuing pressure to assign CVEs and other forms of
+security identifiers, and ongoing abuses by individuals and companies
+outside of the kernel community has made it clear that the kernel
+community should have control over those assignments.
+
+The Linux kernel developer team does have the ability to assign CVEs for
+potential Linux kernel security issues.  This assignment is independent
+of the :doc:`normal Linux kernel security bug reporting
+process<../process/security-bugs>`.
+
+A list of all assigned CVEs for the Linux kernel can be found in the
+archives of the linux-cve mailing list, as seen on
+https://lore.kernel.org/linux-cve-announce/.  To get notice of the
+assigned CVEs, please `subscribe
+<https://subspace.kernel.org/subscribing.html>`_ to that mailing list.
+
+Process
+=======
+
+As part of the normal stable release process, kernel changes that are
+potentially security issues are identified by the developers responsible
+for CVE number assignments and have CVE numbers automatically assigned
+to them.  These assignments are published on the linux-cve-announce
+mailing list as announcements on a frequent basis.
+
+Note, due to the layer at which the Linux kernel is in a system, almost
+any bug might be exploitable to compromise the security of the kernel,
+but the possibility of exploitation is often not evident when the bug is
+fixed.  Because of this, the CVE assignment team is overly cautious and
+assign CVE numbers to any bugfix that they identify.  This
+explains the seemingly large number of CVEs that are issued by the Linux
+kernel team.
+
+If the CVE assignment team misses a specific fix that any user feels
+should have a CVE assigned to it, please email them at <cve@kernel.org>
+and the team there will work with you on it.  Note that no potential
+security issues should be sent to this alias, it is ONLY for assignment
+of CVEs for fixes that are already in released kernel trees.  If you
+feel you have found an unfixed security issue, please follow the
+:doc:`normal Linux kernel security bug reporting
+process<../process/security-bugs>`.
+
+No CVEs will be automatically assigned for unfixed security issues in
+the Linux kernel; assignment will only automatically happen after a fix
+is available and applied to a stable kernel tree, and it will be tracked
+that way by the git commit id of the original fix.  If anyone wishes to
+have a CVE assigned before an issue is resolved with a commit, please
+contact the kernel CVE assignment team at <cve@kernel.org> to get an
+identifier assigned from their batch of reserved identifiers.
+
+No CVEs will be assigned for any issue found in a version of the kernel
+that is not currently being actively supported by the Stable/LTS kernel
+team.  A list of the currently supported kernel branches can be found at
+https://kernel.org/releases.html
+
+Disputes of assigned CVEs
+=========================
+
+The authority to dispute or modify an assigned CVE for a specific kernel
+change lies solely with the maintainers of the relevant subsystem
+affected.  This principle ensures a high degree of accuracy and
+accountability in vulnerability reporting.  Only those individuals with
+deep expertise and intimate knowledge of the subsystem can effectively
+assess the validity and scope of a reported vulnerability and determine
+its appropriate CVE designation.  Any attempt to modify or dispute a CVE
+outside of this designated authority could lead to confusion, inaccurate
+reporting, and ultimately, compromised systems.
+
+Invalid CVEs
+============
+
+If a security issue is found in a Linux kernel that is only supported by
+a Linux distribution due to the changes that have been made by that
+distribution, or due to the distribution supporting a kernel version
+that is no longer one of the kernel.org supported releases, then a CVE
+can not be assigned by the Linux kernel CVE team, and must be asked for
+from that Linux distribution itself.
+
+Any CVE that is assigned against the Linux kernel for an actively
+supported kernel version, by any group other than the kernel assignment
+CVE team should not be treated as a valid CVE.  Please notify the
+kernel CVE assignment team at <cve@kernel.org> so that they can work to
+invalidate such entries through the CNA remediation process.
+
+Applicability of specific CVEs
+==============================
+
+As the Linux kernel can be used in many different ways, with many
+different ways of accessing it by external users, or no access at all,
+the applicability of any specific CVE is up to the user of Linux to
+determine, it is not up to the CVE assignment team.  Please do not
+contact us to attempt to determine the applicability of any specific
+CVE.
+
+Also, as the source tree is so large, and any one system only uses a
+small subset of the source tree, any users of Linux should be aware that
+large numbers of assigned CVEs are not relevant for their systems.
+
+In short, we do not know your use case, and we do not know what portions
+of the kernel that you use, so there is no way for us to determine if a
+specific CVE is relevant for your system.
+
+As always, it is best to take all released kernel changes, as they are
+tested together in a unified whole by many community members, and not as
+individual cherry-picked changes.  Also note that for many bugs, the
+solution to the overall problem is not found in a single change, but by
+the sum of many fixes on top of each other.  Ideally CVEs will be
+assigned to all fixes for all issues, but sometimes we will fail to
+notice fixes, therefore assume that some changes without a CVE assigned
+might be relevant to take.
+
diff --git a/Documentation/process/index.rst b/Documentation/process/index.rst

index 6cb732dfcc72245639e93638083c60fdb7141195..de9cbb7bd7eb2b3a064a93ca2b025bdaf63a42c7 100644 (file)
--- a/Documentation/process/index.rst
+++ b/Documentation/process/index.rst
@@ -81,6 +81,7 @@ of special classes of bugs: regressions and security problems.
  
     handling-regressions
     security-bugs
+   cve
     embargoed-hardware-issues
  
  Maintainer information
diff --git a/Documentation/process/maintainer-netdev.rst b/Documentation/process/maintainer-netdev.rst

index 84ee60fceef24cbf1ba9e090ac91c94abd4064b5..fd96e4a3cef9c09382e34419ec3f8ac1c5514cf4 100644 (file)
--- a/Documentation/process/maintainer-netdev.rst
+++ b/Documentation/process/maintainer-netdev.rst
@@ -431,7 +431,7 @@ patchwork checks
  Checks in patchwork are mostly simple wrappers around existing kernel
  scripts, the sources are available at:
  
-https://github.com/kuba-moo/nipa/tree/master/tests
+https://github.com/linux-netdev/nipa/tree/master/tests
  
  **Do not** post your patches just to run them through the checks.
  You must ensure that your patches are ready by testing them locally
diff --git a/Documentation/process/security-bugs.rst b/Documentation/process/security-bugs.rst

index 692a3ba56cca83742f77edc5161167060d76f414..56c560a00b37a6a3e99a7d9edaa45a103d7398bb 100644 (file)
--- a/Documentation/process/security-bugs.rst
+++ b/Documentation/process/security-bugs.rst
@@ -99,9 +99,8 @@ CVE assignment
  The security team does not assign CVEs, nor do we require them for
  reports or fixes, as this can needlessly complicate the process and may
  delay the bug handling.  If a reporter wishes to have a CVE identifier
-assigned, they should find one by themselves, for example by contacting
-MITRE directly.  However under no circumstances will a patch inclusion
-be delayed to wait for a CVE identifier to arrive.
+assigned for a confirmed issue, they can contact the :doc:`kernel CVE
+assignment team<../process/cve>` to obtain one.
  
  Non-disclosure agreements
  -------------------------
diff --git a/Documentation/sphinx/kernel_feat.py b/Documentation/sphinx/kernel_feat.py

index b9df61eb45013872ca82b463048494e041b3f127..03ace5f01b5c021e12adba23b83b8cb074c949ba 100644 (file)
--- a/Documentation/sphinx/kernel_feat.py
+++ b/Documentation/sphinx/kernel_feat.py
@@ -109,7 +109,7 @@ class KernelFeat(Directive):
              else:
                  out_lines += line + "\n"
  
-        nodeList = self.nestedParse(out_lines, fname)
+        nodeList = self.nestedParse(out_lines, self.arguments[0])
          return nodeList
  
      def nestedParse(self, lines, fname):
diff --git a/Documentation/sphinx/translations.py b/Documentation/sphinx/translations.py

index 47161e6eba9976fa8e67a14905f284ea05d82f21..32c2b32b2b5ee91a27abacfa0332620e208b8723 100644 (file)
--- a/Documentation/sphinx/translations.py
+++ b/Documentation/sphinx/translations.py
@@ -29,10 +29,7 @@ all_languages = {
  }
  
  class LanguagesNode(nodes.Element):
-    def __init__(self, current_language, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-
-        self.current_language = current_language
+    pass
  
  class TranslationsTransform(Transform):
      default_priority = 900
@@ -49,7 +46,8 @@ class TranslationsTransform(Transform):
              # normalize docname to be the untranslated one
              docname = os.path.join(*components[2:])
  
-        new_nodes = LanguagesNode(all_languages[this_lang_code])
+        new_nodes = LanguagesNode()
+        new_nodes['current_language'] = all_languages[this_lang_code]
  
          for lang_code, lang_name in all_languages.items():
              if lang_code == this_lang_code:
@@ -84,7 +82,7 @@ def process_languages(app, doctree, docname):
  
          html_content = app.builder.templates.render('translations.html',
              context={
-                'current_language': node.current_language,
+                'current_language': node['current_language'],
                  'languages': languages,
              })
  
diff --git a/Documentation/usb/gadget-testing.rst b/Documentation/usb/gadget-testing.rst

index 8cd62c466d20aac597fa5aa15ecfb2930c15c252..077dfac7ed98f7911d731312eb09631e41c63772 100644 (file)
--- a/Documentation/usb/gadget-testing.rst
+++ b/Documentation/usb/gadget-testing.rst
@@ -448,17 +448,17 @@ Function-specific configfs interface
  The function name to use when creating the function directory is "ncm".
  The NCM function provides these attributes in its function directory:
  
-       ===============   ==================================================
-       ifname            network device interface name associated with this
-                         function instance
-       qmult             queue length multiplier for high and super speed
-       host_addr         MAC address of host's end of this
-                         Ethernet over USB link
-       dev_addr          MAC address of device's end of this
-                         Ethernet over USB link
-       max_segment_size  Segment size required for P2P connections. This
-                         will set MTU to (max_segment_size - 14 bytes)
-       ===============   ==================================================
+       ======================= ==================================================
+       ifname                  network device interface name associated with this
+                               function instance
+       qmult                   queue length multiplier for high and super speed
+       host_addr               MAC address of host's end of this
+                               Ethernet over USB link
+       dev_addr                MAC address of device's end of this
+                               Ethernet over USB link
+       max_segment_size        Segment size required for P2P connections. This
+                               will set MTU to 14 bytes
+       ======================= ==================================================
  
  and after creating the functions/ncm.<instance name> they contain default
  values: qmult is 5, dev_addr and host_addr are randomly selected.
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst

index 3ec0b7a455a0cf489b93683a49b5362cded0b570..0b5a33ee71eea11e5dabe09ad77e62426a72fd11 100644 (file)
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -372,7 +372,7 @@ The bits in the dirty bitmap are cleared before the ioctl returns, unless
  KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 is enabled.  For more information,
  see the description of the capability.
  
-Note that the Xen shared info page, if configured, shall always be assumed
+Note that the Xen shared_info page, if configured, shall always be assumed
  to be dirty. KVM will not explicitly mark it such.
  
  
@@ -5487,8 +5487,9 @@ KVM_PV_ASYNC_CLEANUP_PERFORM
                 __u8 long_mode;
                 __u8 vector;
                 __u8 runstate_update_flag;
-               struct {
+               union {
                         __u64 gfn;
+                       __u64 hva;
                 } shared_info;
                 struct {
                         __u32 send_port;
@@ -5516,19 +5517,20 @@ type values:
  
  KVM_XEN_ATTR_TYPE_LONG_MODE
    Sets the ABI mode of the VM to 32-bit or 64-bit (long mode). This
-  determines the layout of the shared info pages exposed to the VM.
+  determines the layout of the shared_info page exposed to the VM.
  
  KVM_XEN_ATTR_TYPE_SHARED_INFO
-  Sets the guest physical frame number at which the Xen "shared info"
+  Sets the guest physical frame number at which the Xen shared_info
    page resides. Note that although Xen places vcpu_info for the first
    32 vCPUs in the shared_info page, KVM does not automatically do so
-  and instead requires that KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO be used
-  explicitly even when the vcpu_info for a given vCPU resides at the
-  "default" location in the shared_info page. This is because KVM may
-  not be aware of the Xen CPU id which is used as the index into the
-  vcpu_info[] array, so may know the correct default location.
-
-  Note that the shared info page may be constantly written to by KVM;
+  and instead requires that KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO or
+  KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO_HVA be used explicitly even when
+  the vcpu_info for a given vCPU resides at the "default" location
+  in the shared_info page. This is because KVM may not be aware of
+  the Xen CPU id which is used as the index into the vcpu_info[]
+  array, so may know the correct default location.
+
+  Note that the shared_info page may be constantly written to by KVM;
    it contains the event channel bitmap used to deliver interrupts to
    a Xen guest, amongst other things. It is exempt from dirty tracking
    mechanisms — KVM will not explicitly mark the page as dirty each
@@ -5537,9 +5539,21 @@ KVM_XEN_ATTR_TYPE_SHARED_INFO
    any vCPU has been running or any event channel interrupts can be
    routed to the guest.
  
-  Setting the gfn to KVM_XEN_INVALID_GFN will disable the shared info
+  Setting the gfn to KVM_XEN_INVALID_GFN will disable the shared_info
    page.
  
+KVM_XEN_ATTR_TYPE_SHARED_INFO_HVA
+  If the KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA flag is also set in the
+  Xen capabilities, then this attribute may be used to set the
+  userspace address at which the shared_info page resides, which
+  will always be fixed in the VMM regardless of where it is mapped
+  in guest physical address space. This attribute should be used in
+  preference to KVM_XEN_ATTR_TYPE_SHARED_INFO as it avoids
+  unnecessary invalidation of an internal cache when the page is
+  re-mapped in guest physcial address space.
+
+  Setting the hva to zero will disable the shared_info page.
+
  KVM_XEN_ATTR_TYPE_UPCALL_VECTOR
    Sets the exception vector used to deliver Xen event channel upcalls.
    This is the HVM-wide vector injected directly by the hypervisor
@@ -5636,6 +5650,21 @@ KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO
    on dirty logging. Setting the gpa to KVM_XEN_INVALID_GPA will disable
    the vcpu_info.
  
+KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO_HVA
+  If the KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA flag is also set in the
+  Xen capabilities, then this attribute may be used to set the
+  userspace address of the vcpu_info for a given vCPU. It should
+  only be used when the vcpu_info resides at the "default" location
+  in the shared_info page. In this case it is safe to assume the
+  userspace address will not change, because the shared_info page is
+  an overlay on guest memory and remains at a fixed host address
+  regardless of where it is mapped in guest physical address space
+  and hence unnecessary invalidation of an internal cache may be
+  avoided if the guest memory layout is modified.
+  If the vcpu_info does not reside at the "default" location then
+  it is not guaranteed to remain at the same host address and
+  hence the aforementioned cache invalidation is required.
+
  KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO
    Sets the guest physical address of an additional pvclock structure
    for a given vCPU. This is typically used for guest vsyscall support.
@@ -8791,6 +8820,11 @@ means the VM type with value @n is supported.  Possible values of @n are::
    #define KVM_X86_DEFAULT_VM   0
    #define KVM_X86_SW_PROTECTED_VM      1
  
+Note, KVM_X86_SW_PROTECTED_VM is currently only for development and testing.
+Do not use KVM_X86_SW_PROTECTED_VM for "real" VMs, and especially not in
+production.  The behavior and effective ABI for software-protected VMs is
+unstable.
+
  9. Known KVM API problems
  =========================
  
diff --git a/MAINTAINERS b/MAINTAINERS

index 8999497011a263595f481191b419c9ca14765e9e..4f298c4187fbc7374d189289783422a5d803c35e 100644 (file)
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1395,6 +1395,7 @@ F:        drivers/hwmon/max31760.c
  
  ANALOGBITS PLL LIBRARIES
  M:     Paul Walmsley <paul.walmsley@sifive.com>
+M:     Samuel Holland <samuel.holland@sifive.com>
  S:     Supported
  F:     drivers/clk/analogbits/*
  F:     include/linux/clk/analogbits*
@@ -4169,14 +4170,14 @@ F:      drivers/firmware/broadcom/tee_bnxt_fw.c
  F:     drivers/net/ethernet/broadcom/bnxt/
  F:     include/linux/firmware/broadcom/tee_bnxt_fw.h
  
-BROADCOM BRCM80211 IEEE802.11n WIRELESS DRIVER
-M:     Arend van Spriel <aspriel@gmail.com>
-M:     Franky Lin <franky.lin@broadcom.com>
-M:     Hante Meuleman <hante.meuleman@broadcom.com>
+BROADCOM BRCM80211 IEEE802.11 WIRELESS DRIVERS
+M:     Arend van Spriel <arend.vanspriel@broadcom.com>
  L:     linux-wireless@vger.kernel.org
+L:     brcm80211@lists.linux.dev
  L:     brcm80211-dev-list.pdl@broadcom.com
  S:     Supported
  F:     drivers/net/wireless/broadcom/brcm80211/
+F:     include/linux/platform_data/brcmfmac.h
  
  BROADCOM BRCMSTB GPIO DRIVER
  M:     Doug Berger <opendmb@gmail.com>
@@ -5378,7 +5379,7 @@ CONTROL GROUP - MEMORY RESOURCE CONTROLLER (MEMCG)
  M:     Johannes Weiner <hannes@cmpxchg.org>
  M:     Michal Hocko <mhocko@kernel.org>
  M:     Roman Gushchin <roman.gushchin@linux.dev>
-M:     Shakeel Butt <shakeelb@google.com>
+M:     Shakeel Butt <shakeel.butt@linux.dev>
  R:     Muchun Song <muchun.song@linux.dev>
  L:     cgroups@vger.kernel.org
  L:     linux-mm@kvack.org
@@ -5610,6 +5611,11 @@ S:       Maintained
  F:     Documentation/devicetree/bindings/net/can/ctu,ctucanfd.yaml
  F:     drivers/net/can/ctucanfd/
  
+CVE ASSIGNMENT CONTACT
+M:     CVE Assignment Team <cve@kernel.org>
+S:     Maintained
+F:     Documentation/process/cve.rst
+
  CW1200 WLAN driver
  S:     Orphan
  F:     drivers/net/wireless/st/cw1200/
@@ -10091,7 +10097,7 @@ L:      linux-i2c@vger.kernel.org
  S:     Maintained
  W:     https://i2c.wiki.kernel.org/
  Q:     https://patchwork.ozlabs.org/project/linux-i2c/list/
-T:     git git://git.kernel.org/pub/scm/linux/kernel/git/wsa/linux.git
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/andi.shyti/linux.git
  F:     Documentation/devicetree/bindings/i2c/
  F:     drivers/i2c/algos/
  F:     drivers/i2c/busses/
@@ -10283,7 +10289,7 @@ F:      drivers/scsi/ibmvscsi/ibmvscsi*
  F:     include/scsi/viosrp.h
  
  IBM Power Virtual SCSI Device Target Driver
-M:     Michael Cyr <mikecyr@linux.ibm.com>
+M:     Tyrel Datwyler <tyreld@linux.ibm.com>
  L:     linux-scsi@vger.kernel.org
  L:     target-devel@vger.kernel.org
  S:     Supported
@@ -10801,11 +10807,11 @@ F:    drivers/gpio/gpio-tangier.h
  
  INTEL GVT-g DRIVERS (Intel GPU Virtualization)
  M:     Zhenyu Wang <zhenyuw@linux.intel.com>
-M:     Zhi Wang <zhi.a.wang@intel.com>
+M:     Zhi Wang <zhi.wang.linux@gmail.com>
  L:     intel-gvt-dev@lists.freedesktop.org
  L:     intel-gfx@lists.freedesktop.org
  S:     Supported
-W:     https://01.org/igvt-g
+W:     https://github.com/intel/gvt-linux/wiki
  T:     git https://github.com/intel/gvt-linux.git
  F:     drivers/gpu/drm/i915/gvt/
  
@@ -11127,7 +11133,6 @@ S:      Supported
  F:     drivers/net/wireless/intel/iwlegacy/
  
  INTEL WIRELESS WIFI LINK (iwlwifi)
-M:     Gregory Greenman <gregory.greenman@intel.com>
  M:     Miri Korenblit <miriam.rachel.korenblit@intel.com>
  L:     linux-wireless@vger.kernel.org
  S:     Supported
@@ -11725,6 +11730,7 @@ F:      fs/smb/server/
  KERNEL UNIT TESTING FRAMEWORK (KUnit)
  M:     Brendan Higgins <brendanhiggins@google.com>
  M:     David Gow <davidgow@google.com>
+R:     Rae Moar <rmoar@google.com>
  L:     linux-kselftest@vger.kernel.org
  L:     kunit-dev@googlegroups.com
  S:     Maintained
@@ -12903,6 +12909,8 @@ M:      Alejandro Colomar <alx@kernel.org>
  L:     linux-man@vger.kernel.org
  S:     Maintained
  W:     http://www.kernel.org/doc/man-pages
+T:     git git://git.kernel.org/pub/scm/docs/man-pages/man-pages.git
+T:     git git://www.alejandro-colomar.es/src/alx/linux/man-pages/man-pages.git
  
  MANAGEMENT COMPONENT TRANSPORT PROTOCOL (MCTP)
  M:     Jeremy Kerr <jk@codeconstruct.com.au>
@@ -14104,6 +14112,17 @@ F:     mm/
  F:     tools/mm/
  F:     tools/testing/selftests/mm/
  
+MEMORY MAPPING
+M:     Andrew Morton <akpm@linux-foundation.org>
+R:     Liam R. Howlett <Liam.Howlett@oracle.com>
+R:     Vlastimil Babka <vbabka@suse.cz>
+R:     Lorenzo Stoakes <lstoakes@gmail.com>
+L:     linux-mm@kvack.org
+S:     Maintained
+W:     http://www.linux-mm.org
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
+F:     mm/mmap.c
+
  MEMORY TECHNOLOGY DEVICES (MTD)
  M:     Miquel Raynal <miquel.raynal@bootlin.com>
  M:     Richard Weinberger <richard@nod.at>
@@ -14362,7 +14381,7 @@ MICROCHIP MCP16502 PMIC DRIVER
  M:     Claudiu Beznea <claudiu.beznea@tuxon.dev>
  L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
  S:     Supported
-F:     Documentation/devicetree/bindings/regulator/mcp16502-regulator.txt
+F:     Documentation/devicetree/bindings/regulator/microchip,mcp16502.yaml
  F:     drivers/regulator/mcp16502.c
  
  MICROCHIP MCP3564 ADC DRIVER
@@ -15178,6 +15197,7 @@ F:      Documentation/networking/net_cachelines/net_device.rst
  F:     drivers/connector/
  F:     drivers/net/
  F:     include/dt-bindings/net/
+F:     include/linux/cn_proc.h
  F:     include/linux/etherdevice.h
  F:     include/linux/fcdevice.h
  F:     include/linux/fddidevice.h
@@ -15185,6 +15205,7 @@ F:      include/linux/hippidevice.h
  F:     include/linux/if_*
  F:     include/linux/inetdevice.h
  F:     include/linux/netdevice.h
+F:     include/uapi/linux/cn_proc.h
  F:     include/uapi/linux/if_*
  F:     include/uapi/linux/netdevice.h
  X:     drivers/net/wireless/
@@ -15233,6 +15254,8 @@ F:      Documentation/networking/
  F:     Documentation/networking/net_cachelines/
  F:     Documentation/process/maintainer-netdev.rst
  F:     Documentation/userspace-api/netlink/
+F:     include/linux/framer/framer-provider.h
+F:     include/linux/framer/framer.h
  F:     include/linux/in.h
  F:     include/linux/indirect_call_wrapper.h
  F:     include/linux/net.h
@@ -15320,7 +15343,7 @@ K:      \bmdo_
  NETWORKING [MPTCP]
  M:     Matthieu Baerts <matttbe@kernel.org>
  M:     Mat Martineau <martineau@kernel.org>
-R:     Geliang Tang <geliang.tang@linux.dev>
+R:     Geliang Tang <geliang@kernel.org>
  L:     netdev@vger.kernel.org
  L:     mptcp@lists.linux.dev
  S:     Maintained
@@ -16721,6 +16744,7 @@ F:      drivers/pci/controller/dwc/*layerscape*
  PCI DRIVER FOR FU740
  M:     Paul Walmsley <paul.walmsley@sifive.com>
  M:     Greentime Hu <greentime.hu@sifive.com>
+M:     Samuel Holland <samuel.holland@sifive.com>
  L:     linux-pci@vger.kernel.org
  S:     Maintained
  F:     Documentation/devicetree/bindings/pci/sifive,fu740-pcie.yaml
@@ -16833,6 +16857,7 @@ F:      drivers/pci/controller/dwc/*designware*
  
  PCI DRIVER FOR TI DRA7XX/J721E
  M:     Vignesh Raghavendra <vigneshr@ti.com>
+R:     Siddharth Vadapalli <s-vadapalli@ti.com>
  L:     linux-omap@vger.kernel.org
  L:     linux-pci@vger.kernel.org
  L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
@@ -16857,9 +16882,8 @@ F:      Documentation/devicetree/bindings/pci/xilinx-versal-cpm.yaml
  F:     drivers/pci/controller/pcie-xilinx-cpm.c
  
  PCI ENDPOINT SUBSYSTEM
-M:     Lorenzo Pieralisi <lpieralisi@kernel.org>
+M:     Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
  M:     Krzysztof Wilczyński <kw@linux.com>
-R:     Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
  R:     Kishon Vijay Abraham I <kishon@kernel.org>
  L:     linux-pci@vger.kernel.org
  S:     Supported
@@ -17179,7 +17203,7 @@ R:      John Garry <john.g.garry@oracle.com>
  R:     Will Deacon <will@kernel.org>
  R:     James Clark <james.clark@arm.com>
  R:     Mike Leach <mike.leach@linaro.org>
-R:     Leo Yan <leo.yan@linaro.org>
+R:     Leo Yan <leo.yan@linux.dev>
  L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
  S:     Supported
  F:     tools/build/feature/test-libopencsd.c
@@ -17973,33 +17997,34 @@ F:    drivers/media/tuners/qt1010*
  
  QUALCOMM ATH12K WIRELESS DRIVER
  M:     Kalle Valo <kvalo@kernel.org>
-M:     Jeff Johnson <quic_jjohnson@quicinc.com>
+M:     Jeff Johnson <jjohnson@kernel.org>
  L:     ath12k@lists.infradead.org
  S:     Supported
  W:     https://wireless.wiki.kernel.org/en/users/Drivers/ath12k
  T:     git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git
  F:     drivers/net/wireless/ath/ath12k/
+N:     ath12k
  
  QUALCOMM ATHEROS ATH10K WIRELESS DRIVER
  M:     Kalle Valo <kvalo@kernel.org>
-M:     Jeff Johnson <quic_jjohnson@quicinc.com>
+M:     Jeff Johnson <jjohnson@kernel.org>
  L:     ath10k@lists.infradead.org
  S:     Supported
  W:     https://wireless.wiki.kernel.org/en/users/Drivers/ath10k
  T:     git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git
-F:     Documentation/devicetree/bindings/net/wireless/qcom,ath10k.yaml
  F:     drivers/net/wireless/ath/ath10k/
+N:     ath10k
  
  QUALCOMM ATHEROS ATH11K WIRELESS DRIVER
  M:     Kalle Valo <kvalo@kernel.org>
-M:     Jeff Johnson <quic_jjohnson@quicinc.com>
+M:     Jeff Johnson <jjohnson@kernel.org>
  L:     ath11k@lists.infradead.org
  S:     Supported
  W:     https://wireless.wiki.kernel.org/en/users/Drivers/ath11k
  B:     https://wireless.wiki.kernel.org/en/users/Drivers/ath11k/bugreport
  T:     git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git
-F:     Documentation/devicetree/bindings/net/wireless/qcom,ath11k.yaml
  F:     drivers/net/wireless/ath/ath11k/
+N:     ath11k
  
  QUALCOMM ATHEROS ATH9K WIRELESS DRIVER
  M:     Toke Høiland-Jørgensen <toke@toke.dk>
@@ -18082,7 +18107,6 @@ F:      drivers/net/ethernet/qualcomm/emac/
  
  QUALCOMM ETHQOS ETHERNET DRIVER
  M:     Vinod Koul <vkoul@kernel.org>
-R:     Bhupesh Sharma <bhupesh.sharma@linaro.org>
  L:     netdev@vger.kernel.org
  L:     linux-arm-msm@vger.kernel.org
  S:     Maintained
@@ -18429,7 +18453,7 @@ S:      Supported
  F:     drivers/infiniband/sw/rdmavt
  
  RDS - RELIABLE DATAGRAM SOCKETS
-M:     Santosh Shilimkar <santosh.shilimkar@oracle.com>
+M:     Allison Henderson <allison.henderson@oracle.com>
  L:     netdev@vger.kernel.org
  L:     linux-rdma@vger.kernel.org
  L:     rds-devel@oss.oracle.com (moderated for non-subscribers)
@@ -19966,36 +19990,15 @@ S:    Maintained
  F:     drivers/watchdog/simatic-ipc-wdt.c
  
  SIFIVE DRIVERS
-M:     Palmer Dabbelt <palmer@dabbelt.com>
  M:     Paul Walmsley <paul.walmsley@sifive.com>
+M:     Samuel Holland <samuel.holland@sifive.com>
  L:     linux-riscv@lists.infradead.org
  S:     Supported
+F:     drivers/dma/sf-pdma/
  N:     sifive
+K:     fu[57]40
  K:     [^@]sifive
  
-SIFIVE CACHE DRIVER
-M:     Conor Dooley <conor@kernel.org>
-L:     linux-riscv@lists.infradead.org
-S:     Maintained
-F:     Documentation/devicetree/bindings/cache/sifive,ccache0.yaml
-F:     drivers/cache/sifive_ccache.c
-
-SIFIVE FU540 SYSTEM-ON-CHIP
-M:     Paul Walmsley <paul.walmsley@sifive.com>
-M:     Palmer Dabbelt <palmer@dabbelt.com>
-L:     linux-riscv@lists.infradead.org
-S:     Supported
-T:     git git://git.kernel.org/pub/scm/linux/kernel/git/pjw/sifive.git
-N:     fu540
-K:     fu540
-
-SIFIVE PDMA DRIVER
-M:     Green Wan <green.wan@sifive.com>
-S:     Maintained
-F:     Documentation/devicetree/bindings/dma/sifive,fu540-c000-pdma.yaml
-F:     drivers/dma/sf-pdma/
-
-
  SILEAD TOUCHSCREEN DRIVER
  M:     Hans de Goede <hdegoede@redhat.com>
  L:     linux-input@vger.kernel.org
@@ -22007,6 +22010,14 @@ F:     Documentation/devicetree/bindings/media/i2c/ti,ds90*
  F:     drivers/media/i2c/ds90*
  F:     include/media/i2c/ds90*
  
+TI HDC302X HUMIDITY DRIVER
+M:     Javier Carrasco <javier.carrasco.cruz@gmail.com>
+M:     Li peiyu <579lpy@gmail.com>
+L:     linux-iio@vger.kernel.org
+S:     Maintained
+F:     Documentation/devicetree/bindings/iio/humidity/ti,hdc3020.yaml
+F:     drivers/iio/humidity/hdc3020.c
+
  TI ICSSG ETHERNET DRIVER (ICSSG)
  R:     MD Danish Anwar <danishanwar@ti.com>
  R:     Roger Quadros <rogerq@kernel.org>
@@ -22862,9 +22873,8 @@ S:      Maintained
  F:     drivers/usb/typec/mux/pi3usb30532.c
  
  USB TYPEC PORT CONTROLLER DRIVERS
-M:     Guenter Roeck <linux@roeck-us.net>
  L:     linux-usb@vger.kernel.org
-S:     Maintained
+S:     Orphan
  F:     drivers/usb/typec/tcpm/
  
  USB UHCI DRIVER
@@ -24341,13 +24351,6 @@ T:     git git://git.kernel.org/pub/scm/linux/kernel/git/dlemoal/zonefs.git
  F:     Documentation/filesystems/zonefs.rst
  F:     fs/zonefs/
  
-ZPOOL COMPRESSED PAGE STORAGE API
-M:     Dan Streetman <ddstreet@ieee.org>
-L:     linux-mm@kvack.org
-S:     Maintained
-F:     include/linux/zpool.h
-F:     mm/zpool.c
-
  ZR36067 VIDEO FOR LINUX DRIVER
  M:     Corentin Labbe <clabbe@baylibre.com>
  L:     mjpeg-users@lists.sourceforge.net
@@ -24399,7 +24402,9 @@ M:      Nhat Pham <nphamcs@gmail.com>
  L:     linux-mm@kvack.org
  S:     Maintained
  F:     Documentation/admin-guide/mm/zswap.rst
+F:     include/linux/zpool.h
  F:     include/linux/zswap.h
+F:     mm/zpool.c
  F:     mm/zswap.c
  
  THE REST
diff --git a/Makefile b/Makefile

index 6c0a4d294444cb41b174651ca2b66c856a8f1a55..0e36eff146088a0edf59ab2d90e18ff5105626f8 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
  VERSION = 6
  PATCHLEVEL = 8
  SUBLEVEL = 0
-EXTRAVERSION = -rc2
+EXTRAVERSION = -rc7
  NAME = Hurr durr I'ma ninja sloth
  
  # *DOCUMENTATION*
@@ -294,15 +294,15 @@ may-sync-config   := 1
  single-build   :=
  
  ifneq ($(filter $(no-dot-config-targets), $(MAKECMDGOALS)),)
-       ifeq ($(filter-out $(no-dot-config-targets), $(MAKECMDGOALS)),)
+    ifeq ($(filter-out $(no-dot-config-targets), $(MAKECMDGOALS)),)
                 need-config :=
-       endif
+    endif
  endif
  
  ifneq ($(filter $(no-sync-config-targets), $(MAKECMDGOALS)),)
-       ifeq ($(filter-out $(no-sync-config-targets), $(MAKECMDGOALS)),)
+    ifeq ($(filter-out $(no-sync-config-targets), $(MAKECMDGOALS)),)
                 may-sync-config :=
-       endif
+    endif
  endif
  
  need-compiler := $(may-sync-config)
@@ -323,9 +323,9 @@ endif
  # We cannot build single targets and the others at the same time
  ifneq ($(filter $(single-targets), $(MAKECMDGOALS)),)
         single-build := 1
-       ifneq ($(filter-out $(single-targets), $(MAKECMDGOALS)),)
+    ifneq ($(filter-out $(single-targets), $(MAKECMDGOALS)),)
                 mixed-build := 1
-       endif
+    endif
  endif
  
  # For "make -j clean all", "make -j mrproper defconfig all", etc.
@@ -1666,7 +1666,7 @@ help:
         @echo  '                       (sparse by default)'
         @echo  '  make C=2   [targets] Force check of all c source with $$CHECK'
         @echo  '  make RECORDMCOUNT_WARN=1 [targets] Warn about ignored mcount sections'
-       @echo  '  make W=n   [targets] Enable extra build checks, n=1,2,3 where'
+       @echo  '  make W=n   [targets] Enable extra build checks, n=1,2,3,c,e where'
         @echo  '                1: warnings which may be relevant and do not occur too often'
         @echo  '                2: warnings which occur quite often but may still be relevant'
         @echo  '                3: more obscure warnings, can most likely be ignored'
diff --git a/arch/Kconfig b/arch/Kconfig

index c91917b508736d1fa0d37d5bf3b1e4bf5550e211..a5af0edd3eb8f3b64e6e51bffb2ac491cb31bc26 100644 (file)
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -673,6 +673,7 @@ config SHADOW_CALL_STACK
         bool "Shadow Call Stack"
         depends on ARCH_SUPPORTS_SHADOW_CALL_STACK
         depends on DYNAMIC_FTRACE_WITH_ARGS || DYNAMIC_FTRACE_WITH_REGS || !FUNCTION_GRAPH_TRACER
+       depends on MMU
         help
           This option enables the compiler's Shadow Call Stack, which
           uses a shadow stack to protect function return addresses from
diff --git a/arch/arc/include/asm/jump_label.h b/arch/arc/include/asm/jump_label.h

index 9d96180797396bba26ace54f047f7a47bf82dd5f..a339223d9e052b35ea678d6a3e60faf6e5673671 100644 (file)
--- a/arch/arc/include/asm/jump_label.h
+++ b/arch/arc/include/asm/jump_label.h
@@ -31,7 +31,7 @@
  static __always_inline bool arch_static_branch(struct static_key *key,
                                                bool branch)
  {
-       asm_volatile_goto(".balign "__stringify(JUMP_LABEL_NOP_SIZE)"   \n"
+       asm goto(".balign "__stringify(JUMP_LABEL_NOP_SIZE)"            \n"
                  "1:                                                    \n"
                  "nop                                                   \n"
                  ".pushsection __jump_table, \"aw\"                     \n"
@@ -47,7 +47,7 @@ l_yes:
  static __always_inline bool arch_static_branch_jump(struct static_key *key,
                                                     bool branch)
  {
-       asm_volatile_goto(".balign "__stringify(JUMP_LABEL_NOP_SIZE)"   \n"
+       asm goto(".balign "__stringify(JUMP_LABEL_NOP_SIZE)"            \n"
                  "1:                                                    \n"
                  "b %l[l_yes]                                           \n"
                  ".pushsection __jump_table, \"aw\"                     \n"
diff --git a/arch/arm/boot/dts/amazon/alpine.dtsi b/arch/arm/boot/dts/amazon/alpine.dtsi

index ff68dfb4eb7874a00d398bf7dfc2d242385c5620..90bd12feac010108def3f68756edf4e2d76c2e84 100644 (file)
--- a/arch/arm/boot/dts/amazon/alpine.dtsi
+++ b/arch/arm/boot/dts/amazon/alpine.dtsi
@@ -167,7 +167,6 @@
                 msix: msix@fbe00000 {
                         compatible = "al,alpine-msix";
                         reg = <0x0 0xfbe00000 0x0 0x100000>;
-                       interrupt-controller;
                         msi-controller;
                         al,msi-base-spi = <96>;
                         al,msi-num-spis = <64>;
diff --git a/arch/arm/boot/dts/aspeed/aspeed-g4.dtsi b/arch/arm/boot/dts/aspeed/aspeed-g4.dtsi

index 530491ae5eb26060f68802cf3318914f7fb2d361..857cb26ed6d7e8acd13c5695daa9fb3b8699c3c1 100644 (file)
--- a/arch/arm/boot/dts/aspeed/aspeed-g4.dtsi
+++ b/arch/arm/boot/dts/aspeed/aspeed-g4.dtsi
@@ -466,7 +466,6 @@
         i2c0: i2c-bus@40 {
                 #address-cells = <1>;
                 #size-cells = <0>;
-               #interrupt-cells = <1>;
  
                 reg = <0x40 0x40>;
                 compatible = "aspeed,ast2400-i2c-bus";
@@ -482,7 +481,6 @@
         i2c1: i2c-bus@80 {
                 #address-cells = <1>;
                 #size-cells = <0>;
-               #interrupt-cells = <1>;
  
                 reg = <0x80 0x40>;
                 compatible = "aspeed,ast2400-i2c-bus";
@@ -498,7 +496,6 @@
         i2c2: i2c-bus@c0 {
                 #address-cells = <1>;
                 #size-cells = <0>;
-               #interrupt-cells = <1>;
  
                 reg = <0xc0 0x40>;
                 compatible = "aspeed,ast2400-i2c-bus";
@@ -515,7 +512,6 @@
         i2c3: i2c-bus@100 {
                 #address-cells = <1>;
                 #size-cells = <0>;
-               #interrupt-cells = <1>;
  
                 reg = <0x100 0x40>;
                 compatible = "aspeed,ast2400-i2c-bus";
@@ -532,7 +528,6 @@
         i2c4: i2c-bus@140 {
                 #address-cells = <1>;
                 #size-cells = <0>;
-               #interrupt-cells = <1>;
  
                 reg = <0x140 0x40>;
                 compatible = "aspeed,ast2400-i2c-bus";
@@ -549,7 +544,6 @@
         i2c5: i2c-bus@180 {
                 #address-cells = <1>;
                 #size-cells = <0>;
-               #interrupt-cells = <1>;
  
                 reg = <0x180 0x40>;
                 compatible = "aspeed,ast2400-i2c-bus";
@@ -566,7 +560,6 @@
         i2c6: i2c-bus@1c0 {
                 #address-cells = <1>;
                 #size-cells = <0>;
-               #interrupt-cells = <1>;
  
                 reg = <0x1c0 0x40>;
                 compatible = "aspeed,ast2400-i2c-bus";
@@ -583,7 +576,6 @@
         i2c7: i2c-bus@300 {
                 #address-cells = <1>;
                 #size-cells = <0>;
-               #interrupt-cells = <1>;
  
                 reg = <0x300 0x40>;
                 compatible = "aspeed,ast2400-i2c-bus";
@@ -600,7 +592,6 @@
         i2c8: i2c-bus@340 {
                 #address-cells = <1>;
                 #size-cells = <0>;
-               #interrupt-cells = <1>;
  
                 reg = <0x340 0x40>;
                 compatible = "aspeed,ast2400-i2c-bus";
@@ -617,7 +608,6 @@
         i2c9: i2c-bus@380 {
                 #address-cells = <1>;
                 #size-cells = <0>;
-               #interrupt-cells = <1>;
  
                 reg = <0x380 0x40>;
                 compatible = "aspeed,ast2400-i2c-bus";
@@ -634,7 +624,6 @@
         i2c10: i2c-bus@3c0 {
                 #address-cells = <1>;
                 #size-cells = <0>;
-               #interrupt-cells = <1>;
  
                 reg = <0x3c0 0x40>;
                 compatible = "aspeed,ast2400-i2c-bus";
@@ -651,7 +640,6 @@
         i2c11: i2c-bus@400 {
                 #address-cells = <1>;
                 #size-cells = <0>;
-               #interrupt-cells = <1>;
  
                 reg = <0x400 0x40>;
                 compatible = "aspeed,ast2400-i2c-bus";
@@ -668,7 +656,6 @@
         i2c12: i2c-bus@440 {
                 #address-cells = <1>;
                 #size-cells = <0>;
-               #interrupt-cells = <1>;
  
                 reg = <0x440 0x40>;
                 compatible = "aspeed,ast2400-i2c-bus";
@@ -685,7 +672,6 @@
         i2c13: i2c-bus@480 {
                 #address-cells = <1>;
                 #size-cells = <0>;
-               #interrupt-cells = <1>;
  
                 reg = <0x480 0x40>;
                 compatible = "aspeed,ast2400-i2c-bus";
diff --git a/arch/arm/boot/dts/aspeed/aspeed-g5.dtsi b/arch/arm/boot/dts/aspeed/aspeed-g5.dtsi

index 04f98d1dbb97c84c318c7e6a133fbf4572237c47..e6f3cf3c721e574f8b9975254cdcc79e3ce3b725 100644 (file)
--- a/arch/arm/boot/dts/aspeed/aspeed-g5.dtsi
+++ b/arch/arm/boot/dts/aspeed/aspeed-g5.dtsi
@@ -363,6 +363,7 @@
                                 interrupts = <40>;
                                 reg = <0x1e780200 0x0100>;
                                 clocks = <&syscon ASPEED_CLK_APB>;
+                               #interrupt-cells = <2>;
                                 interrupt-controller;
                                 bus-frequency = <12000000>;
                                 pinctrl-names = "default";
@@ -594,7 +595,6 @@
         i2c0: i2c-bus@40 {
                 #address-cells = <1>;
                 #size-cells = <0>;
-               #interrupt-cells = <1>;
  
                 reg = <0x40 0x40>;
                 compatible = "aspeed,ast2500-i2c-bus";
@@ -610,7 +610,6 @@
         i2c1: i2c-bus@80 {
                 #address-cells = <1>;
                 #size-cells = <0>;
-               #interrupt-cells = <1>;
  
                 reg = <0x80 0x40>;
                 compatible = "aspeed,ast2500-i2c-bus";
@@ -626,7 +625,6 @@
         i2c2: i2c-bus@c0 {
                 #address-cells = <1>;
                 #size-cells = <0>;
-               #interrupt-cells = <1>;
  
                 reg = <0xc0 0x40>;
                 compatible = "aspeed,ast2500-i2c-bus";
@@ -643,7 +641,6 @@
         i2c3: i2c-bus@100 {
                 #address-cells = <1>;
                 #size-cells = <0>;
-               #interrupt-cells = <1>;
  
                 reg = <0x100 0x40>;
                 compatible = "aspeed,ast2500-i2c-bus";
@@ -660,7 +657,6 @@
         i2c4: i2c-bus@140 {
                 #address-cells = <1>;
                 #size-cells = <0>;
-               #interrupt-cells = <1>;
  
                 reg = <0x140 0x40>;
                 compatible = "aspeed,ast2500-i2c-bus";
@@ -677,7 +673,6 @@
         i2c5: i2c-bus@180 {
                 #address-cells = <1>;
                 #size-cells = <0>;
-               #interrupt-cells = <1>;
  
                 reg = <0x180 0x40>;
                 compatible = "aspeed,ast2500-i2c-bus";
@@ -694,7 +689,6 @@
         i2c6: i2c-bus@1c0 {
                 #address-cells = <1>;
                 #size-cells = <0>;
-               #interrupt-cells = <1>;
  
                 reg = <0x1c0 0x40>;
                 compatible = "aspeed,ast2500-i2c-bus";
@@ -711,7 +705,6 @@
         i2c7: i2c-bus@300 {
                 #address-cells = <1>;
                 #size-cells = <0>;
-               #interrupt-cells = <1>;
  
                 reg = <0x300 0x40>;
                 compatible = "aspeed,ast2500-i2c-bus";
@@ -728,7 +721,6 @@
         i2c8: i2c-bus@340 {
                 #address-cells = <1>;
                 #size-cells = <0>;
-               #interrupt-cells = <1>;
  
                 reg = <0x340 0x40>;
                 compatible = "aspeed,ast2500-i2c-bus";
@@ -745,7 +737,6 @@
         i2c9: i2c-bus@380 {
                 #address-cells = <1>;
                 #size-cells = <0>;
-               #interrupt-cells = <1>;
  
                 reg = <0x380 0x40>;
                 compatible = "aspeed,ast2500-i2c-bus";
@@ -762,7 +753,6 @@
         i2c10: i2c-bus@3c0 {
                 #address-cells = <1>;
                 #size-cells = <0>;
-               #interrupt-cells = <1>;
  
                 reg = <0x3c0 0x40>;
                 compatible = "aspeed,ast2500-i2c-bus";
@@ -779,7 +769,6 @@
         i2c11: i2c-bus@400 {
                 #address-cells = <1>;
                 #size-cells = <0>;
-               #interrupt-cells = <1>;
  
                 reg = <0x400 0x40>;
                 compatible = "aspeed,ast2500-i2c-bus";
@@ -796,7 +785,6 @@
         i2c12: i2c-bus@440 {
                 #address-cells = <1>;
                 #size-cells = <0>;
-               #interrupt-cells = <1>;
  
                 reg = <0x440 0x40>;
                 compatible = "aspeed,ast2500-i2c-bus";
@@ -813,7 +801,6 @@
         i2c13: i2c-bus@480 {
                 #address-cells = <1>;
                 #size-cells = <0>;
-               #interrupt-cells = <1>;
  
                 reg = <0x480 0x40>;
                 compatible = "aspeed,ast2500-i2c-bus";
diff --git a/arch/arm/boot/dts/aspeed/aspeed-g6.dtsi b/arch/arm/boot/dts/aspeed/aspeed-g6.dtsi

index c4d1faade8be33d52c91f797f3fedaa0b22566a2..29f94696d8b189cba0113e7a65bbb25611358710 100644 (file)
--- a/arch/arm/boot/dts/aspeed/aspeed-g6.dtsi
+++ b/arch/arm/boot/dts/aspeed/aspeed-g6.dtsi
@@ -474,6 +474,7 @@
                                 reg = <0x1e780500 0x100>;
                                 interrupts = <GIC_SPI 51 IRQ_TYPE_LEVEL_HIGH>;
                                 clocks = <&syscon ASPEED_CLK_APB2>;
+                               #interrupt-cells = <2>;
                                 interrupt-controller;
                                 bus-frequency = <12000000>;
                                 pinctrl-names = "default";
@@ -488,6 +489,7 @@
                                 reg = <0x1e780600 0x100>;
                                 interrupts = <GIC_SPI 70 IRQ_TYPE_LEVEL_HIGH>;
                                 clocks = <&syscon ASPEED_CLK_APB2>;
+                               #interrupt-cells = <2>;
                                 interrupt-controller;
                                 bus-frequency = <12000000>;
                                 pinctrl-names = "default";
@@ -902,7 +904,6 @@
         i2c0: i2c-bus@80 {
                 #address-cells = <1>;
                 #size-cells = <0>;
-               #interrupt-cells = <1>;
                 reg = <0x80 0x80>;
                 compatible = "aspeed,ast2600-i2c-bus";
                 clocks = <&syscon ASPEED_CLK_APB2>;
@@ -917,7 +918,6 @@
         i2c1: i2c-bus@100 {
                 #address-cells = <1>;
                 #size-cells = <0>;
-               #interrupt-cells = <1>;
                 reg = <0x100 0x80>;
                 compatible = "aspeed,ast2600-i2c-bus";
                 clocks = <&syscon ASPEED_CLK_APB2>;
@@ -932,7 +932,6 @@
         i2c2: i2c-bus@180 {
                 #address-cells = <1>;
                 #size-cells = <0>;
-               #interrupt-cells = <1>;
                 reg = <0x180 0x80>;
                 compatible = "aspeed,ast2600-i2c-bus";
                 clocks = <&syscon ASPEED_CLK_APB2>;
@@ -947,7 +946,6 @@
         i2c3: i2c-bus@200 {
                 #address-cells = <1>;
                 #size-cells = <0>;
-               #interrupt-cells = <1>;
                 reg = <0x200 0x80>;
                 compatible = "aspeed,ast2600-i2c-bus";
                 clocks = <&syscon ASPEED_CLK_APB2>;
@@ -962,7 +960,6 @@
         i2c4: i2c-bus@280 {
                 #address-cells = <1>;
                 #size-cells = <0>;
-               #interrupt-cells = <1>;
                 reg = <0x280 0x80>;
                 compatible = "aspeed,ast2600-i2c-bus";
                 clocks = <&syscon ASPEED_CLK_APB2>;
@@ -977,7 +974,6 @@
         i2c5: i2c-bus@300 {
                 #address-cells = <1>;
                 #size-cells = <0>;
-               #interrupt-cells = <1>;
                 reg = <0x300 0x80>;
                 compatible = "aspeed,ast2600-i2c-bus";
                 clocks = <&syscon ASPEED_CLK_APB2>;
@@ -992,7 +988,6 @@
         i2c6: i2c-bus@380 {
                 #address-cells = <1>;
                 #size-cells = <0>;
-               #interrupt-cells = <1>;
                 reg = <0x380 0x80>;
                 compatible = "aspeed,ast2600-i2c-bus";
                 clocks = <&syscon ASPEED_CLK_APB2>;
@@ -1007,7 +1002,6 @@
         i2c7: i2c-bus@400 {
                 #address-cells = <1>;
                 #size-cells = <0>;
-               #interrupt-cells = <1>;
                 reg = <0x400 0x80>;
                 compatible = "aspeed,ast2600-i2c-bus";
                 clocks = <&syscon ASPEED_CLK_APB2>;
@@ -1022,7 +1016,6 @@
         i2c8: i2c-bus@480 {
                 #address-cells = <1>;
                 #size-cells = <0>;
-               #interrupt-cells = <1>;
                 reg = <0x480 0x80>;
                 compatible = "aspeed,ast2600-i2c-bus";
                 clocks = <&syscon ASPEED_CLK_APB2>;
@@ -1037,7 +1030,6 @@
         i2c9: i2c-bus@500 {
                 #address-cells = <1>;
                 #size-cells = <0>;
-               #interrupt-cells = <1>;
                 reg = <0x500 0x80>;
                 compatible = "aspeed,ast2600-i2c-bus";
                 clocks = <&syscon ASPEED_CLK_APB2>;
@@ -1052,7 +1044,6 @@
         i2c10: i2c-bus@580 {
                 #address-cells = <1>;
                 #size-cells = <0>;
-               #interrupt-cells = <1>;
                 reg = <0x580 0x80>;
                 compatible = "aspeed,ast2600-i2c-bus";
                 clocks = <&syscon ASPEED_CLK_APB2>;
@@ -1067,7 +1058,6 @@
         i2c11: i2c-bus@600 {
                 #address-cells = <1>;
                 #size-cells = <0>;
-               #interrupt-cells = <1>;
                 reg = <0x600 0x80>;
                 compatible = "aspeed,ast2600-i2c-bus";
                 clocks = <&syscon ASPEED_CLK_APB2>;
@@ -1082,7 +1072,6 @@
         i2c12: i2c-bus@680 {
                 #address-cells = <1>;
                 #size-cells = <0>;
-               #interrupt-cells = <1>;
                 reg = <0x680 0x80>;
                 compatible = "aspeed,ast2600-i2c-bus";
                 clocks = <&syscon ASPEED_CLK_APB2>;
@@ -1097,7 +1086,6 @@
         i2c13: i2c-bus@700 {
                 #address-cells = <1>;
                 #size-cells = <0>;
-               #interrupt-cells = <1>;
                 reg = <0x700 0x80>;
                 compatible = "aspeed,ast2600-i2c-bus";
                 clocks = <&syscon ASPEED_CLK_APB2>;
@@ -1112,7 +1100,6 @@
         i2c14: i2c-bus@780 {
                 #address-cells = <1>;
                 #size-cells = <0>;
-               #interrupt-cells = <1>;
                 reg = <0x780 0x80>;
                 compatible = "aspeed,ast2600-i2c-bus";
                 clocks = <&syscon ASPEED_CLK_APB2>;
@@ -1127,7 +1114,6 @@
         i2c15: i2c-bus@800 {
                 #address-cells = <1>;
                 #size-cells = <0>;
-               #interrupt-cells = <1>;
                 reg = <0x800 0x80>;
                 compatible = "aspeed,ast2600-i2c-bus";
                 clocks = <&syscon ASPEED_CLK_APB2>;
diff --git a/arch/arm/boot/dts/broadcom/bcm-cygnus.dtsi b/arch/arm/boot/dts/broadcom/bcm-cygnus.dtsi

index f9f79ed825181b7e71b12f87d7ba21ade0fd6d4d..07ca0d993c9fdb27ef50e3c450f3472ebe67f858 100644 (file)
--- a/arch/arm/boot/dts/broadcom/bcm-cygnus.dtsi
+++ b/arch/arm/boot/dts/broadcom/bcm-cygnus.dtsi
@@ -167,6 +167,7 @@
                         #gpio-cells = <2>;
                         gpio-controller;
                         interrupt-controller;
+                       #interrupt-cells = <2>;
                         interrupt-parent = <&mailbox>;
                         interrupts = <0>;
                 };
@@ -247,6 +248,7 @@
                         gpio-controller;
                         interrupts = <GIC_SPI 84 IRQ_TYPE_LEVEL_HIGH>;
                         interrupt-controller;
+                       #interrupt-cells = <2>;
                 };
  
                 i2c1: i2c@1800b000 {
@@ -518,6 +520,7 @@
                         gpio-controller;
  
                         interrupt-controller;
+                       #interrupt-cells = <2>;
                         interrupts = <GIC_SPI 174 IRQ_TYPE_LEVEL_HIGH>;
                         gpio-ranges = <&pinctrl 0 42 1>,
                                         <&pinctrl 1 44 3>,
diff --git a/arch/arm/boot/dts/broadcom/bcm-hr2.dtsi b/arch/arm/boot/dts/broadcom/bcm-hr2.dtsi

index 788a6806191a33a04aa326a0645d5af06365571d..75545b10ef2fa69570f42422e15a2341d4cfaf92 100644 (file)
--- a/arch/arm/boot/dts/broadcom/bcm-hr2.dtsi
+++ b/arch/arm/boot/dts/broadcom/bcm-hr2.dtsi
@@ -200,6 +200,7 @@
                         gpio-controller;
                         ngpios = <4>;
                         interrupt-controller;
+                       #interrupt-cells = <2>;
                         interrupts = <GIC_SPI 93 IRQ_TYPE_LEVEL_HIGH>;
                 };
  
diff --git a/arch/arm/boot/dts/broadcom/bcm-nsp.dtsi b/arch/arm/boot/dts/broadcom/bcm-nsp.dtsi

index 9d20ba3b1ffb13d4983f28e66de7ae140af528be..6a4482c9316741d89eb67371ac13a3670783b8fc 100644 (file)
--- a/arch/arm/boot/dts/broadcom/bcm-nsp.dtsi
+++ b/arch/arm/boot/dts/broadcom/bcm-nsp.dtsi
@@ -180,6 +180,7 @@
                         gpio-controller;
                         ngpios = <32>;
                         interrupt-controller;
+                       #interrupt-cells = <2>;
                         interrupts = <GIC_SPI 85 IRQ_TYPE_LEVEL_HIGH>;
                         gpio-ranges = <&pinctrl 0 0 32>;
                 };
@@ -352,6 +353,7 @@
                         gpio-controller;
                         ngpios = <4>;
                         interrupt-controller;
+                       #interrupt-cells = <2>;
                         interrupts = <GIC_SPI 87 IRQ_TYPE_LEVEL_HIGH>;
                 };
  
diff --git a/arch/arm/boot/dts/intel/ixp/intel-ixp42x-gateway-7001.dts b/arch/arm/boot/dts/intel/ixp/intel-ixp42x-gateway-7001.dts

index 4d70f6afd13ab5ee5df7ea621b56f81f4e642d41..6d5e69035f94dcaa3f323c833c1edd064d4f7dfd 100644 (file)
--- a/arch/arm/boot/dts/intel/ixp/intel-ixp42x-gateway-7001.dts
+++ b/arch/arm/boot/dts/intel/ixp/intel-ixp42x-gateway-7001.dts
@@ -60,6 +60,8 @@
                          * We have slots (IDSEL) 1 and 2 with one assigned IRQ
                          * each handling all IRQs.
                          */
+                       #interrupt-cells = <1>;
+                       interrupt-map-mask = <0xf800 0 0 7>;
                         interrupt-map =
                         /* IDSEL 1 */
                         <0x0800 0 0 1 &gpio0 11 IRQ_TYPE_LEVEL_LOW>, /* INT A on slot 1 is irq 11 */
diff --git a/arch/arm/boot/dts/intel/ixp/intel-ixp42x-goramo-multilink.dts b/arch/arm/boot/dts/intel/ixp/intel-ixp42x-goramo-multilink.dts

index 9ec0169bacf8c2098814ec6c1399e41c910df464..5f4c849915db71390ab3050b7277b7893b075307 100644 (file)
--- a/arch/arm/boot/dts/intel/ixp/intel-ixp42x-goramo-multilink.dts
+++ b/arch/arm/boot/dts/intel/ixp/intel-ixp42x-goramo-multilink.dts
@@ -89,6 +89,8 @@
                          * The slots have Ethernet, Ethernet, NEC and MPCI.
                          * The IDSELs are 11, 12, 13, 14.
                          */
+                       #interrupt-cells = <1>;
+                       interrupt-map-mask = <0xf800 0 0 7>;
                         interrupt-map =
                         /* IDSEL 11 - Ethernet A */
                         <0x5800 0 0 1 &gpio0 4 IRQ_TYPE_LEVEL_LOW>, /* INT A on slot 11 is irq 4 */
diff --git a/arch/arm/boot/dts/marvell/kirkwood-l-50.dts b/arch/arm/boot/dts/marvell/kirkwood-l-50.dts

index dffb9f84e67c50c63ba5268a9975c62b93e75157..c841eb8e7fb1d0404301f4f8b21899fb60b77a25 100644 (file)
--- a/arch/arm/boot/dts/marvell/kirkwood-l-50.dts
+++ b/arch/arm/boot/dts/marvell/kirkwood-l-50.dts
@@ -65,6 +65,7 @@
                         gpio2: gpio-expander@20 {
                                 #gpio-cells = <2>;
                                 #interrupt-cells = <2>;
+                               interrupt-controller;
                                 compatible = "semtech,sx1505q";
                                 reg = <0x20>;
  
@@ -79,6 +80,7 @@
                         gpio3: gpio-expander@21 {
                                 #gpio-cells = <2>;
                                 #interrupt-cells = <2>;
+                               interrupt-controller;
                                 compatible = "semtech,sx1505q";
                                 reg = <0x21>;
  
diff --git a/arch/arm/boot/dts/nuvoton/nuvoton-wpcm450.dtsi b/arch/arm/boot/dts/nuvoton/nuvoton-wpcm450.dtsi

index fd671c7a1e5d64c6eafb0a7434c7d14b19f4d1b6..6e1f0f164cb4f511d19774a8c39a9a3090d85b9d 100644 (file)
--- a/arch/arm/boot/dts/nuvoton/nuvoton-wpcm450.dtsi
+++ b/arch/arm/boot/dts/nuvoton/nuvoton-wpcm450.dtsi
@@ -120,6 +120,7 @@
                                 interrupts = <2 IRQ_TYPE_LEVEL_HIGH>,
                                              <3 IRQ_TYPE_LEVEL_HIGH>,
                                              <4 IRQ_TYPE_LEVEL_HIGH>;
+                               #interrupt-cells = <2>;
                                 interrupt-controller;
                         };
  
@@ -128,6 +129,7 @@
                                 gpio-controller;
                                 #gpio-cells = <2>;
                                 interrupts = <5 IRQ_TYPE_LEVEL_HIGH>;
+                               #interrupt-cells = <2>;
                                 interrupt-controller;
                         };
  
diff --git a/arch/arm/boot/dts/nvidia/tegra30-apalis-v1.1.dtsi b/arch/arm/boot/dts/nvidia/tegra30-apalis-v1.1.dtsi

index 1640763fd4af2216c225b95e60e954afd5255fb5..ff0d684622f74d13eb1b4b2c7178c38e93ab4293 100644 (file)
--- a/arch/arm/boot/dts/nvidia/tegra30-apalis-v1.1.dtsi
+++ b/arch/arm/boot/dts/nvidia/tegra30-apalis-v1.1.dtsi
@@ -997,7 +997,6 @@
                         compatible = "st,stmpe811";
                         reg = <0x41>;
                         irq-gpio = <&gpio TEGRA_GPIO(V, 0) GPIO_ACTIVE_LOW>;
-                       interrupt-controller;
                         id = <0>;
                         blocks = <0x5>;
                         irq-trigger = <0x1>;
diff --git a/arch/arm/boot/dts/nvidia/tegra30-apalis.dtsi b/arch/arm/boot/dts/nvidia/tegra30-apalis.dtsi

index 3b6fad273cabf17a6ddff7ede1d72de13079ed1f..d38f1dd38a9068371c25ddf82f4c284a555ffb03 100644 (file)
--- a/arch/arm/boot/dts/nvidia/tegra30-apalis.dtsi
+++ b/arch/arm/boot/dts/nvidia/tegra30-apalis.dtsi
@@ -980,7 +980,6 @@
                         compatible = "st,stmpe811";
                         reg = <0x41>;
                         irq-gpio = <&gpio TEGRA_GPIO(V, 0) GPIO_ACTIVE_LOW>;
-                       interrupt-controller;
                         id = <0>;
                         blocks = <0x5>;
                         irq-trigger = <0x1>;
diff --git a/arch/arm/boot/dts/nvidia/tegra30-colibri.dtsi b/arch/arm/boot/dts/nvidia/tegra30-colibri.dtsi

index 4eb526fe9c55888d6a595d68d3a95616bb913404..81c8a5fd92ccea33b3673d61302d39397e8fa72f 100644 (file)
--- a/arch/arm/boot/dts/nvidia/tegra30-colibri.dtsi
+++ b/arch/arm/boot/dts/nvidia/tegra30-colibri.dtsi
@@ -861,7 +861,6 @@
                         compatible = "st,stmpe811";
                         reg = <0x41>;
                         irq-gpio = <&gpio TEGRA_GPIO(V, 0) GPIO_ACTIVE_LOW>;
-                       interrupt-controller;
                         id = <0>;
                         blocks = <0x5>;
                         irq-trigger = <0x1>;
diff --git a/arch/arm/boot/dts/nxp/imx/imx6q-b850v3.dts b/arch/arm/boot/dts/nxp/imx/imx6q-b850v3.dts

index db8c332df6a1d53f1b3eff6572a9f080ac10fe0a..cad112e054758f7ce364f2346eb4e1e291086a61 100644 (file)
--- a/arch/arm/boot/dts/nxp/imx/imx6q-b850v3.dts
+++ b/arch/arm/boot/dts/nxp/imx/imx6q-b850v3.dts
@@ -227,7 +227,6 @@
  
                 #address-cells = <3>;
                 #size-cells = <2>;
-               #interrupt-cells = <1>;
  
                 bridge@2,1 {
                         compatible = "pci10b5,8605";
@@ -235,7 +234,6 @@
  
                         #address-cells = <3>;
                         #size-cells = <2>;
-                       #interrupt-cells = <1>;
  
                         /* Intel Corporation I210 Gigabit Network Connection */
                         ethernet@3,0 {
@@ -250,7 +248,6 @@
  
                         #address-cells = <3>;
                         #size-cells = <2>;
-                       #interrupt-cells = <1>;
  
                         /* Intel Corporation I210 Gigabit Network Connection */
                         switch_nic: ethernet@4,0 {
diff --git a/arch/arm/boot/dts/nxp/imx/imx6q-bx50v3.dtsi b/arch/arm/boot/dts/nxp/imx/imx6q-bx50v3.dtsi

index 99f4f6ac71d4a18f6f6eb2f0476c47280ba844b7..c1ae7c47b44227c2438d4e7c73fbafd6eaa269b9 100644 (file)
--- a/arch/arm/boot/dts/nxp/imx/imx6q-bx50v3.dtsi
+++ b/arch/arm/boot/dts/nxp/imx/imx6q-bx50v3.dtsi
@@ -245,6 +245,7 @@
                                 reg = <0x74>;
                                 gpio-controller;
                                 #gpio-cells = <2>;
+                               #interrupt-cells = <2>;
                                 interrupt-controller;
                                 interrupt-parent = <&gpio2>;
                                 interrupts = <3 IRQ_TYPE_LEVEL_LOW>;
@@ -390,7 +391,6 @@
  
                 #address-cells = <3>;
                 #size-cells = <2>;
-               #interrupt-cells = <1>;
         };
  };
  
diff --git a/arch/arm/boot/dts/nxp/imx/imx6qdl-apalis.dtsi b/arch/arm/boot/dts/nxp/imx/imx6qdl-apalis.dtsi

index 2ae93f57fe5acac1f3f437b082e258ed81a391e0..ea40623d12e5fddc11b2af150ca6a80af93510a3 100644 (file)
--- a/arch/arm/boot/dts/nxp/imx/imx6qdl-apalis.dtsi
+++ b/arch/arm/boot/dts/nxp/imx/imx6qdl-apalis.dtsi
@@ -626,7 +626,6 @@
                 blocks = <0x5>;
                 id = <0>;
                 interrupts = <10 IRQ_TYPE_LEVEL_LOW>;
-               interrupt-controller;
                 interrupt-parent = <&gpio4>;
                 irq-trigger = <0x1>;
                 pinctrl-names = "default";
diff --git a/arch/arm/boot/dts/nxp/imx/imx6qdl-colibri.dtsi b/arch/arm/boot/dts/nxp/imx/imx6qdl-colibri.dtsi

index 55c90f6393ad5e1176b5f8af6ca94bcf9c368477..d3a7a6eeb8e09edff6963de86527e13899e3c956 100644 (file)
--- a/arch/arm/boot/dts/nxp/imx/imx6qdl-colibri.dtsi
+++ b/arch/arm/boot/dts/nxp/imx/imx6qdl-colibri.dtsi
@@ -550,7 +550,6 @@
                 blocks = <0x5>;
                 interrupts = <20 IRQ_TYPE_LEVEL_LOW>;
                 interrupt-parent = <&gpio6>;
-               interrupt-controller;
                 id = <0>;
                 irq-trigger = <0x1>;
                 pinctrl-names = "default";
diff --git a/arch/arm/boot/dts/nxp/imx/imx6qdl-emcon.dtsi b/arch/arm/boot/dts/nxp/imx/imx6qdl-emcon.dtsi

index a63e73adc1fc532175d8cd1baca8ede060f4d2f8..42b2ba23aefc9e26ddb3a8e0317013e30602fdbe 100644 (file)
--- a/arch/arm/boot/dts/nxp/imx/imx6qdl-emcon.dtsi
+++ b/arch/arm/boot/dts/nxp/imx/imx6qdl-emcon.dtsi
@@ -225,7 +225,6 @@
                 pinctrl-0 = <&pinctrl_pmic>;
                 interrupt-parent = <&gpio2>;
                 interrupts = <8 IRQ_TYPE_LEVEL_LOW>;
-               interrupt-controller;
  
                 onkey {
                         compatible = "dlg,da9063-onkey";
diff --git a/arch/arm/boot/dts/nxp/imx/imx6qdl-phytec-pfla02.dtsi b/arch/arm/boot/dts/nxp/imx/imx6qdl-phytec-pfla02.dtsi

index 113974520d544b72ff3397629935037c1d1cae53..c0c47adc5866e3ea157b499f15d8edf8b2d1fcde 100644 (file)
--- a/arch/arm/boot/dts/nxp/imx/imx6qdl-phytec-pfla02.dtsi
+++ b/arch/arm/boot/dts/nxp/imx/imx6qdl-phytec-pfla02.dtsi
@@ -124,6 +124,7 @@
                 reg = <0x58>;
                 interrupt-parent = <&gpio2>;
                 interrupts = <9 IRQ_TYPE_LEVEL_LOW>; /* active-low GPIO2_9 */
+               #interrupt-cells = <2>;
                 interrupt-controller;
  
                 regulators {
diff --git a/arch/arm/boot/dts/nxp/imx/imx6qdl-phytec-phycore-som.dtsi b/arch/arm/boot/dts/nxp/imx/imx6qdl-phytec-phycore-som.dtsi

index 86b4269e0e0117b3906b625537444533c28510fb..85e278eb201610a1c851c4093025bb205e02a3b3 100644 (file)
--- a/arch/arm/boot/dts/nxp/imx/imx6qdl-phytec-phycore-som.dtsi
+++ b/arch/arm/boot/dts/nxp/imx/imx6qdl-phytec-phycore-som.dtsi
@@ -100,6 +100,7 @@
                 interrupt-parent = <&gpio1>;
                 interrupts = <2 IRQ_TYPE_LEVEL_LOW>;
                 interrupt-controller;
+               #interrupt-cells = <2>;
                 gpio-controller;
                 #gpio-cells = <2>;
  
diff --git a/arch/arm/boot/dts/nxp/imx/imx7d-pico-dwarf.dts b/arch/arm/boot/dts/nxp/imx/imx7d-pico-dwarf.dts

index 12361fcbe24aff98a70482f2a7885c6ce28cb3b2..1b965652291bfaf5d6bad76ac3eaf10974eac6ea 100644 (file)
--- a/arch/arm/boot/dts/nxp/imx/imx7d-pico-dwarf.dts
+++ b/arch/arm/boot/dts/nxp/imx/imx7d-pico-dwarf.dts
@@ -63,6 +63,7 @@
                 gpio-controller;
                 #gpio-cells = <2>;
                 #interrupt-cells = <2>;
+               interrupt-controller;
                 reg = <0x25>;
         };
  
diff --git a/arch/arm/boot/dts/nxp/vf/vf610-zii-dev-rev-b.dts b/arch/arm/boot/dts/nxp/vf/vf610-zii-dev-rev-b.dts

index b0ed68af0546702d9413c492da6796194208c347..029f49be40e373f706f7f67c34358ba9272ea0af 100644 (file)
--- a/arch/arm/boot/dts/nxp/vf/vf610-zii-dev-rev-b.dts
+++ b/arch/arm/boot/dts/nxp/vf/vf610-zii-dev-rev-b.dts
@@ -338,6 +338,7 @@
                 reg = <0x22>;
                 gpio-controller;
                 #gpio-cells = <2>;
+               #interrupt-cells = <2>;
                 interrupt-controller;
                 interrupt-parent = <&gpio3>;
                 interrupts = <2 IRQ_TYPE_LEVEL_LOW>;
diff --git a/arch/arm/boot/dts/qcom/qcom-sdx55.dtsi b/arch/arm/boot/dts/qcom/qcom-sdx55.dtsi

index 2045fc779f887030735f9310982bdef228f8a481..27429d0fedfba8ac6f144c55dbd49d295f5cec29 100644 (file)
--- a/arch/arm/boot/dts/qcom/qcom-sdx55.dtsi
+++ b/arch/arm/boot/dts/qcom/qcom-sdx55.dtsi
@@ -340,10 +340,10 @@
                                           "msi8";
                         #interrupt-cells = <1>;
                         interrupt-map-mask = <0 0 0 0x7>;
-                       interrupt-map = <0 0 0 1 &intc 0 0 0 141 IRQ_TYPE_LEVEL_HIGH>, /* int_a */
-                                       <0 0 0 2 &intc 0 0 0 142 IRQ_TYPE_LEVEL_HIGH>, /* int_b */
-                                       <0 0 0 3 &intc 0 0 0 143 IRQ_TYPE_LEVEL_HIGH>, /* int_c */
-                                       <0 0 0 4 &intc 0 0 0 144 IRQ_TYPE_LEVEL_HIGH>; /* int_d */
+                       interrupt-map = <0 0 0 1 &intc 0 141 IRQ_TYPE_LEVEL_HIGH>, /* int_a */
+                                       <0 0 0 2 &intc 0 142 IRQ_TYPE_LEVEL_HIGH>, /* int_b */
+                                       <0 0 0 3 &intc 0 143 IRQ_TYPE_LEVEL_HIGH>, /* int_c */
+                                       <0 0 0 4 &intc 0 144 IRQ_TYPE_LEVEL_HIGH>; /* int_d */
  
                         clocks = <&gcc GCC_PCIE_PIPE_CLK>,
                                  <&gcc GCC_PCIE_AUX_CLK>,
diff --git a/arch/arm/boot/dts/renesas/r8a7790-lager.dts b/arch/arm/boot/dts/renesas/r8a7790-lager.dts

index 2fba4d084001b9646ee012eb967e96a27695bfa6..8590981245a62057c2b61370e57a7627f36496e8 100644 (file)
--- a/arch/arm/boot/dts/renesas/r8a7790-lager.dts
+++ b/arch/arm/boot/dts/renesas/r8a7790-lager.dts
@@ -447,6 +447,7 @@
                         interrupt-parent = <&irqc0>;
                         interrupts = <2 IRQ_TYPE_LEVEL_LOW>;
                         interrupt-controller;
+                       #interrupt-cells = <2>;
  
                         rtc {
                                 compatible = "dlg,da9063-rtc";
diff --git a/arch/arm/boot/dts/renesas/r8a7790-stout.dts b/arch/arm/boot/dts/renesas/r8a7790-stout.dts

index f9bc5b4f019d02136aa99631c1b2e8c67e9651de..683f7395fab0b6961e5f00a3985fc9b690469237 100644 (file)
--- a/arch/arm/boot/dts/renesas/r8a7790-stout.dts
+++ b/arch/arm/boot/dts/renesas/r8a7790-stout.dts
@@ -347,6 +347,7 @@
                 interrupt-parent = <&irqc0>;
                 interrupts = <2 IRQ_TYPE_LEVEL_LOW>;
                 interrupt-controller;
+               #interrupt-cells = <2>;
  
                 onkey {
                         compatible = "dlg,da9063-onkey";
diff --git a/arch/arm/boot/dts/renesas/r8a7791-koelsch.dts b/arch/arm/boot/dts/renesas/r8a7791-koelsch.dts

index e9c13bb03772af44eada731a13b5ee88a2e3de7c..0efd9f98c75aced03009396d1c6e6ac023d84c4a 100644 (file)
--- a/arch/arm/boot/dts/renesas/r8a7791-koelsch.dts
+++ b/arch/arm/boot/dts/renesas/r8a7791-koelsch.dts
@@ -819,6 +819,7 @@
                 interrupt-parent = <&irqc0>;
                 interrupts = <2 IRQ_TYPE_LEVEL_LOW>;
                 interrupt-controller;
+               #interrupt-cells = <2>;
  
                 rtc {
                         compatible = "dlg,da9063-rtc";
diff --git a/arch/arm/boot/dts/renesas/r8a7791-porter.dts b/arch/arm/boot/dts/renesas/r8a7791-porter.dts

index 7e8bc06715f6564badf502267a33c3737c206cf9..93c86e9216455577271652dcbeb8623faba69885 100644 (file)
--- a/arch/arm/boot/dts/renesas/r8a7791-porter.dts
+++ b/arch/arm/boot/dts/renesas/r8a7791-porter.dts
@@ -413,6 +413,7 @@
                 interrupt-parent = <&irqc0>;
                 interrupts = <2 IRQ_TYPE_LEVEL_LOW>;
                 interrupt-controller;
+               #interrupt-cells = <2>;
  
                 watchdog {
                         compatible = "dlg,da9063-watchdog";
diff --git a/arch/arm/boot/dts/renesas/r8a7792-blanche.dts b/arch/arm/boot/dts/renesas/r8a7792-blanche.dts

index 4f9838cf97ee4fb608b27bfc3d637edee39f3c95..540a9ad28f28ac1a08c7b4f5d3e6a23bcfc262e0 100644 (file)
--- a/arch/arm/boot/dts/renesas/r8a7792-blanche.dts
+++ b/arch/arm/boot/dts/renesas/r8a7792-blanche.dts
@@ -381,6 +381,7 @@
                 interrupt-parent = <&irqc>;
                 interrupts = <2 IRQ_TYPE_LEVEL_LOW>;
                 interrupt-controller;
+               #interrupt-cells = <2>;
  
                 rtc {
                         compatible = "dlg,da9063-rtc";
diff --git a/arch/arm/boot/dts/renesas/r8a7793-gose.dts b/arch/arm/boot/dts/renesas/r8a7793-gose.dts

index 1744fdbf9e0ce08d2a30180e1462dd46a18152f9..1ea6c757893bc0bf5ae4d7c6a6c91854939f9b3f 100644 (file)
--- a/arch/arm/boot/dts/renesas/r8a7793-gose.dts
+++ b/arch/arm/boot/dts/renesas/r8a7793-gose.dts
@@ -759,6 +759,7 @@
                 interrupt-parent = <&irqc0>;
                 interrupts = <2 IRQ_TYPE_LEVEL_LOW>;
                 interrupt-controller;
+               #interrupt-cells = <2>;
  
                 rtc {
                         compatible = "dlg,da9063-rtc";
diff --git a/arch/arm/boot/dts/renesas/r8a7794-alt.dts b/arch/arm/boot/dts/renesas/r8a7794-alt.dts

index c0d067df22a03d4e2590333965c7c8d7a6f539d6..b5ecafbb2e4de582e4449e7abba6217d4e35dcdb 100644 (file)
--- a/arch/arm/boot/dts/renesas/r8a7794-alt.dts
+++ b/arch/arm/boot/dts/renesas/r8a7794-alt.dts
@@ -453,6 +453,7 @@
                 interrupt-parent = <&gpio3>;
                 interrupts = <31 IRQ_TYPE_LEVEL_LOW>;
                 interrupt-controller;
+               #interrupt-cells = <2>;
  
                 rtc {
                         compatible = "dlg,da9063-rtc";
diff --git a/arch/arm/boot/dts/renesas/r8a7794-silk.dts b/arch/arm/boot/dts/renesas/r8a7794-silk.dts

index 43d480a7f3eacc21636788f15e2b27ce3d4dec43..595e074085eb4cd3cf9ad84d59b138051302ef5e 100644 (file)
--- a/arch/arm/boot/dts/renesas/r8a7794-silk.dts
+++ b/arch/arm/boot/dts/renesas/r8a7794-silk.dts
@@ -439,6 +439,7 @@
                 interrupt-parent = <&gpio3>;
                 interrupts = <31 IRQ_TYPE_LEVEL_LOW>;
                 interrupt-controller;
+               #interrupt-cells = <2>;
  
                 onkey {
                         compatible = "dlg,da9063-onkey";
diff --git a/arch/arm/boot/dts/rockchip/rv1108.dtsi b/arch/arm/boot/dts/rockchip/rv1108.dtsi

index abf3006f0a842435b9d56750e805fe93261649c6..f3291f3bbc6fd2b480e975632847f9310c082225 100644 (file)
--- a/arch/arm/boot/dts/rockchip/rv1108.dtsi
+++ b/arch/arm/boot/dts/rockchip/rv1108.dtsi
@@ -196,7 +196,6 @@
         pwm4: pwm@10280000 {
                 compatible = "rockchip,rv1108-pwm", "rockchip,rk3288-pwm";
                 reg = <0x10280000 0x10>;
-               interrupts = <GIC_SPI 38 IRQ_TYPE_LEVEL_HIGH>;
                 clocks = <&cru SCLK_PWM>, <&cru PCLK_PWM>;
                 clock-names = "pwm", "pclk";
                 pinctrl-names = "default";
@@ -208,7 +207,6 @@
         pwm5: pwm@10280010 {
                 compatible = "rockchip,rv1108-pwm", "rockchip,rk3288-pwm";
                 reg = <0x10280010 0x10>;
-               interrupts = <GIC_SPI 38 IRQ_TYPE_LEVEL_HIGH>;
                 clocks = <&cru SCLK_PWM>, <&cru PCLK_PWM>;
                 clock-names = "pwm", "pclk";
                 pinctrl-names = "default";
@@ -220,7 +218,6 @@
         pwm6: pwm@10280020 {
                 compatible = "rockchip,rv1108-pwm", "rockchip,rk3288-pwm";
                 reg = <0x10280020 0x10>;
-               interrupts = <GIC_SPI 38 IRQ_TYPE_LEVEL_HIGH>;
                 clocks = <&cru SCLK_PWM>, <&cru PCLK_PWM>;
                 clock-names = "pwm", "pclk";
                 pinctrl-names = "default";
@@ -232,7 +229,6 @@
         pwm7: pwm@10280030 {
                 compatible = "rockchip,rv1108-pwm", "rockchip,rk3288-pwm";
                 reg = <0x10280030 0x10>;
-               interrupts = <GIC_SPI 38 IRQ_TYPE_LEVEL_HIGH>;
                 clocks = <&cru SCLK_PWM>, <&cru PCLK_PWM>;
                 clock-names = "pwm", "pclk";
                 pinctrl-names = "default";
@@ -386,7 +382,6 @@
         pwm0: pwm@20040000 {
                 compatible = "rockchip,rv1108-pwm", "rockchip,rk3288-pwm";
                 reg = <0x20040000 0x10>;
-               interrupts = <GIC_SPI 39 IRQ_TYPE_LEVEL_HIGH>;
                 clocks = <&cru SCLK_PWM0_PMU>, <&cru PCLK_PWM0_PMU>;
                 clock-names = "pwm", "pclk";
                 pinctrl-names = "default";
@@ -398,7 +393,6 @@
         pwm1: pwm@20040010 {
                 compatible = "rockchip,rv1108-pwm", "rockchip,rk3288-pwm";
                 reg = <0x20040010 0x10>;
-               interrupts = <GIC_SPI 39 IRQ_TYPE_LEVEL_HIGH>;
                 clocks = <&cru SCLK_PWM0_PMU>, <&cru PCLK_PWM0_PMU>;
                 clock-names = "pwm", "pclk";
                 pinctrl-names = "default";
@@ -410,7 +404,6 @@
         pwm2: pwm@20040020 {
                 compatible = "rockchip,rv1108-pwm", "rockchip,rk3288-pwm";
                 reg = <0x20040020 0x10>;
-               interrupts = <GIC_SPI 39 IRQ_TYPE_LEVEL_HIGH>;
                 clocks = <&cru SCLK_PWM0_PMU>, <&cru PCLK_PWM0_PMU>;
                 clock-names = "pwm", "pclk";
                 pinctrl-names = "default";
@@ -422,7 +415,6 @@
         pwm3: pwm@20040030 {
                 compatible = "rockchip,rv1108-pwm", "rockchip,rk3288-pwm";
                 reg = <0x20040030 0x10>;
-               interrupts = <GIC_SPI 39 IRQ_TYPE_LEVEL_HIGH>;
                 clocks = <&cru SCLK_PWM0_PMU>, <&cru PCLK_PWM0_PMU>;
                 clock-names = "pwm", "pclk";
                 pinctrl-names = "default";
diff --git a/arch/arm/boot/dts/st/stm32429i-eval.dts b/arch/arm/boot/dts/st/stm32429i-eval.dts

index 576235ec3c516ee2136dd2b4a9c95a2ded61a3b3..afa417b34b25ffd7351885071e72989dd635b382 100644 (file)
--- a/arch/arm/boot/dts/st/stm32429i-eval.dts
+++ b/arch/arm/boot/dts/st/stm32429i-eval.dts
@@ -222,7 +222,6 @@
                 reg = <0x42>;
                 interrupts = <8 3>;
                 interrupt-parent = <&gpioi>;
-               interrupt-controller;
                 wakeup-source;
  
                 stmpegpio: stmpe_gpio {
diff --git a/arch/arm/boot/dts/st/stm32mp157c-dk2.dts b/arch/arm/boot/dts/st/stm32mp157c-dk2.dts

index 510cca5acb79ca449dc11ba043475cfc43becc4c..7a701f7ef0c70467181e71719f17712ca4341562 100644 (file)
--- a/arch/arm/boot/dts/st/stm32mp157c-dk2.dts
+++ b/arch/arm/boot/dts/st/stm32mp157c-dk2.dts
@@ -64,7 +64,6 @@
                 reg = <0x38>;
                 interrupts = <2 2>;
                 interrupt-parent = <&gpiof>;
-               interrupt-controller;
                 touchscreen-size-x = <480>;
                 touchscreen-size-y = <800>;
                 status = "okay";
diff --git a/arch/arm/boot/dts/ti/omap/am5729-beagleboneai.dts b/arch/arm/boot/dts/ti/omap/am5729-beagleboneai.dts

index c8e55642f9c6e5acc43a741a769f798be6cccb37..3e834fc7e3707d4573b75cbfd89a49423c3ec6a5 100644 (file)
--- a/arch/arm/boot/dts/ti/omap/am5729-beagleboneai.dts
+++ b/arch/arm/boot/dts/ti/omap/am5729-beagleboneai.dts
@@ -415,7 +415,6 @@
                 reg = <0x41>;
                 interrupts = <30 IRQ_TYPE_LEVEL_LOW>;
                 interrupt-parent = <&gpio2>;
-               interrupt-controller;
                 id = <0>;
                 blocks = <0x5>;
                 irq-trigger = <0x1>;
diff --git a/arch/arm/include/asm/jump_label.h b/arch/arm/include/asm/jump_label.h

index e12d7d096fc034058bfaa094bf9b314a2a7a983d..e4eb54f6cd9fef41fecad56e25c4136e75455756 100644 (file)
--- a/arch/arm/include/asm/jump_label.h
+++ b/arch/arm/include/asm/jump_label.h
@@ -11,7 +11,7 @@
  
  static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
  {
-       asm_volatile_goto("1:\n\t"
+       asm goto("1:\n\t"
                  WASM(nop) "\n\t"
                  ".pushsection __jump_table,  \"aw\"\n\t"
                  ".word 1b, %l[l_yes], %c0\n\t"
@@ -25,7 +25,7 @@ l_yes:
  
  static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
  {
-       asm_volatile_goto("1:\n\t"
+       asm goto("1:\n\t"
                  WASM(b) " %l[l_yes]\n\t"
                  ".pushsection __jump_table,  \"aw\"\n\t"
                  ".word 1b, %l[l_yes], %c0\n\t"
diff --git a/arch/arm/mach-ep93xx/core.c b/arch/arm/mach-ep93xx/core.c

index 71b1139764204c506bae31fc31c23f7a51bf61a3..8b1ec60a9a467abcc3bc80c07be12bf734d7c236 100644 (file)
--- a/arch/arm/mach-ep93xx/core.c
+++ b/arch/arm/mach-ep93xx/core.c
@@ -339,6 +339,7 @@ static struct gpiod_lookup_table ep93xx_i2c_gpiod_table = {
                                 GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN),
                 GPIO_LOOKUP_IDX("G", 0, NULL, 1,
                                 GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN),
+               { }
         },
  };
  
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c

index e96fb40b9cc32a64b5a0379e5ee42bd9e38f1aa5..07565b593ed681b0f1675f8ef7a934c1ae53dc51 100644 (file)
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -298,6 +298,8 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
                 goto done;
         }
         count_vm_vma_lock_event(VMA_LOCK_RETRY);
+       if (fault & VM_FAULT_MAJOR)
+               flags |= FAULT_FLAG_TRIED;
  
         /* Quick path to respond to signals */
         if (fault_signal_pending(fault, regs)) {
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig

index aa7c1d435139684d7b56f96f3f93945d331d64d6..f86383d9c7712abe312d56b59724858dd090d1a1 100644 (file)
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -216,7 +216,6 @@ config ARM64
         select HAVE_HW_BREAKPOINT if PERF_EVENTS
         select HAVE_IOREMAP_PROT
         select HAVE_IRQ_TIME_ACCOUNTING
-       select HAVE_KVM
         select HAVE_MOD_ARCH_SPECIFIC
         select HAVE_NMI
         select HAVE_PERF_EVENTS
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile

index 47ecc4cff9d25b7752c94df9ab574ec52cbabd28..a88cdf91068713ebefc031f438b3b22a0247f943 100644 (file)
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -195,7 +195,7 @@ vdso_prepare: prepare0
         include/generated/vdso-offsets.h arch/arm64/kernel/vdso/vdso.so
  ifdef CONFIG_COMPAT_VDSO
         $(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso32 \
-       include/generated/vdso32-offsets.h arch/arm64/kernel/vdso32/vdso.so
+       arch/arm64/kernel/vdso32/vdso.so
  endif
  endif
  
diff --git a/arch/arm64/boot/dts/amazon/alpine-v2.dtsi b/arch/arm64/boot/dts/amazon/alpine-v2.dtsi

index dccbba6e7f98e49f572b57c86415dced108fee2d..dbf2dce8d1d68a5225311bf330704e9f6d1ead40 100644 (file)
--- a/arch/arm64/boot/dts/amazon/alpine-v2.dtsi
+++ b/arch/arm64/boot/dts/amazon/alpine-v2.dtsi
@@ -145,7 +145,6 @@
                 msix: msix@fbe00000 {
                         compatible = "al,alpine-msix";
                         reg = <0x0 0xfbe00000 0x0 0x100000>;
-                       interrupt-controller;
                         msi-controller;
                         al,msi-base-spi = <160>;
                         al,msi-num-spis = <160>;
diff --git a/arch/arm64/boot/dts/amazon/alpine-v3.dtsi b/arch/arm64/boot/dts/amazon/alpine-v3.dtsi

index 39481d7fd7d4da806fe1ab1e4b2320cc732f37d5..3ea178acdddfe2072352283f47318f0f75808c4f 100644 (file)
--- a/arch/arm64/boot/dts/amazon/alpine-v3.dtsi
+++ b/arch/arm64/boot/dts/amazon/alpine-v3.dtsi
@@ -355,7 +355,6 @@
                 msix: msix@fbe00000 {
                         compatible = "al,alpine-msix";
                         reg = <0x0 0xfbe00000 0x0 0x100000>;
-                       interrupt-controller;
                         msi-controller;
                         al,msi-base-spi = <336>;
                         al,msi-num-spis = <959>;
diff --git a/arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi b/arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi

index 9dcd25ec2c04183fb90f160452142c2f5a790136..896d1f33b5b6173e3b4b701d4e08f4ad277856e0 100644 (file)
--- a/arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi
+++ b/arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi
@@ -586,6 +586,7 @@
                         #gpio-cells = <2>;
                         gpio-controller;
                         interrupt-controller;
+                       #interrupt-cells = <2>;
                         interrupts = <GIC_SPI 400 IRQ_TYPE_LEVEL_HIGH>;
                 };
  
diff --git a/arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi b/arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi

index f049687d6b96d23fb0383401ef9c19e50af34148..d8516ec0dae7450e2c5e81f0bddf8ffdeba2bb5e 100644 (file)
--- a/arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi
+++ b/arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi
@@ -450,6 +450,7 @@
                         #gpio-cells = <2>;
                         gpio-controller;
                         interrupt-controller;
+                       #interrupt-cells = <2>;
                         interrupts = <GIC_SPI 183 IRQ_TYPE_LEVEL_HIGH>;
                         gpio-ranges = <&pinmux 0 0 16>,
                                         <&pinmux 16 71 2>,
diff --git a/arch/arm64/boot/dts/freescale/Makefile b/arch/arm64/boot/dts/freescale/Makefile

index 2e027675d7bbe16300b91be4b6f5522b245dea12..2cb0212b63c6eda77567f90d7960ce89825bd114 100644 (file)
--- a/arch/arm64/boot/dts/freescale/Makefile
+++ b/arch/arm64/boot/dts/freescale/Makefile
@@ -20,23 +20,41 @@ dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-ls1046a-frwy.dtb
  dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-ls1046a-qds.dtb
  dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-ls1046a-rdb.dtb
  dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-ls1046a-tqmls1046a-mbls10xxa.dtb
+DTC_FLAGS_fsl-ls1088a-qds := -Wno-interrupt_map
  dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-ls1088a-qds.dtb
+DTC_FLAGS_fsl-ls1088a-rdb := -Wno-interrupt_map
  dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-ls1088a-rdb.dtb
+DTC_FLAGS_fsl-ls1088a-ten64 := -Wno-interrupt_map
  dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-ls1088a-ten64.dtb
+DTC_FLAGS_fsl-ls1088a-tqmls1088a-mbls10xxa := -Wno-interrupt_map
  dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-ls1088a-tqmls1088a-mbls10xxa.dtb
+DTC_FLAGS_fsl-ls2080a-qds := -Wno-interrupt_map
  dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-ls2080a-qds.dtb
+DTC_FLAGS_fsl-ls2080a-rdb := -Wno-interrupt_map
  dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-ls2080a-rdb.dtb
+DTC_FLAGS_fsl-ls2081a-rdb := -Wno-interrupt_map
  dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-ls2081a-rdb.dtb
+DTC_FLAGS_fsl-ls2080a-simu := -Wno-interrupt_map
  dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-ls2080a-simu.dtb
+DTC_FLAGS_fsl-ls2088a-qds := -Wno-interrupt_map
  dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-ls2088a-qds.dtb
+DTC_FLAGS_fsl-ls2088a-rdb := -Wno-interrupt_map
  dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-ls2088a-rdb.dtb
+DTC_FLAGS_fsl-lx2160a-bluebox3 := -Wno-interrupt_map
  dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-lx2160a-bluebox3.dtb
+DTC_FLAGS_fsl-lx2160a-bluebox3-rev-a := -Wno-interrupt_map
  dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-lx2160a-bluebox3-rev-a.dtb
+DTC_FLAGS_fsl-lx2160a-clearfog-cx := -Wno-interrupt_map
  dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-lx2160a-clearfog-cx.dtb
+DTC_FLAGS_fsl-lx2160a-honeycomb := -Wno-interrupt_map
  dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-lx2160a-honeycomb.dtb
+DTC_FLAGS_fsl-lx2160a-qds := -Wno-interrupt_map
  dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-lx2160a-qds.dtb
+DTC_FLAGS_fsl-lx2160a-rdb := -Wno-interrupt_map
  dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-lx2160a-rdb.dtb
+DTC_FLAGS_fsl-lx2162a-clearfog := -Wno-interrupt_map
  dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-lx2162a-clearfog.dtb
+DTC_FLAGS_fsl-lx2162a-qds := -Wno-interrupt_map
  dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-lx2162a-qds.dtb
  
  fsl-ls1028a-qds-13bb-dtbs := fsl-ls1028a-qds.dtb fsl-ls1028a-qds-13bb.dtbo
@@ -53,6 +71,7 @@ dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-ls1028a-qds-85bb.dtb
  dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-ls1028a-qds-899b.dtb
  dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-ls1028a-qds-9999.dtb
  
+DTC_FLAGS_fsl-lx2160a-tqmlx2160a-mblx2160a := -Wno-interrupt_map
  fsl-lx2160a-tqmlx2160a-mblx2160a-12-11-x-dtbs := fsl-lx2160a-tqmlx2160a-mblx2160a.dtb \
         fsl-lx2160a-tqmlx2160a-mblx2160a_12_x_x.dtbo \
         fsl-lx2160a-tqmlx2160a-mblx2160a_x_11_x.dtbo
diff --git a/arch/arm64/boot/dts/freescale/imx8mn-var-som-symphony.dts b/arch/arm64/boot/dts/freescale/imx8mn-var-som-symphony.dts

index f38ee2266b25dd811e1a8f29c7380aed337a1337..a6b94d1957c92ac6bcc18667b477ca05eda8b1bc 100644 (file)
--- a/arch/arm64/boot/dts/freescale/imx8mn-var-som-symphony.dts
+++ b/arch/arm64/boot/dts/freescale/imx8mn-var-som-symphony.dts
@@ -128,14 +128,9 @@
                 pinctrl-0 = <&pinctrl_ptn5150>;
                 status = "okay";
  
-               connector {
-                       compatible = "usb-c-connector";
-                       label = "USB-C";
-
-                       port {
-                               typec1_dr_sw: endpoint {
-                                       remote-endpoint = <&usb1_drd_sw>;
-                               };
+               port {
+                       typec1_dr_sw: endpoint {
+                               remote-endpoint = <&usb1_drd_sw>;
                         };
                 };
         };
diff --git a/arch/arm64/boot/dts/freescale/imx8mp-data-modul-edm-sbc.dts b/arch/arm64/boot/dts/freescale/imx8mp-data-modul-edm-sbc.dts

index d98a040860a48a3ff2c6592420853a0dacc9b48a..5828c9d7821de1eab50967972cf406f8f6359da5 100644 (file)
--- a/arch/arm64/boot/dts/freescale/imx8mp-data-modul-edm-sbc.dts
+++ b/arch/arm64/boot/dts/freescale/imx8mp-data-modul-edm-sbc.dts
@@ -486,7 +486,7 @@
  &uart4 {
         pinctrl-names = "default";
         pinctrl-0 = <&pinctrl_uart4>;
-       status = "okay";
+       status = "disabled";
  };
  
  &usb3_phy0 {
diff --git a/arch/arm64/boot/dts/freescale/imx8mp-dhcom-pdk3.dts b/arch/arm64/boot/dts/freescale/imx8mp-dhcom-pdk3.dts

index fea67a9282f033121323ef2c86e200deac9463d4..b749e28e5ede5cf85f309f2f7903ebee44b41f98 100644 (file)
--- a/arch/arm64/boot/dts/freescale/imx8mp-dhcom-pdk3.dts
+++ b/arch/arm64/boot/dts/freescale/imx8mp-dhcom-pdk3.dts
@@ -175,14 +175,10 @@
                                 pinctrl-names = "default";
                                 pinctrl-0 = <&pinctrl_ptn5150>;
  
-                               connector {
-                                       compatible = "usb-c-connector";
-                                       label = "USB-C";
-
-                                       port {
-                                               ptn5150_out_ep: endpoint {
-                                                       remote-endpoint = <&dwc3_0_ep>;
-                                               };
+                               port {
+
+                                       ptn5150_out_ep: endpoint {
+                                               remote-endpoint = <&dwc3_0_ep>;
                                         };
                                 };
                         };
diff --git a/arch/arm64/boot/dts/freescale/imx8mp-tqma8mpql-mba8mpxl.dts b/arch/arm64/boot/dts/freescale/imx8mp-tqma8mpql-mba8mpxl.dts

index a2d5d19b2de0cb8b69a8ce55fbaeb0c6ba410907..86d3da36e4f3eecf64c0168c825baee86dfdab3f 100644 (file)
--- a/arch/arm64/boot/dts/freescale/imx8mp-tqma8mpql-mba8mpxl.dts
+++ b/arch/arm64/boot/dts/freescale/imx8mp-tqma8mpql-mba8mpxl.dts
@@ -184,6 +184,13 @@
                 enable-active-high;
         };
  
+       reg_vcc_1v8: regulator-1v8 {
+               compatible = "regulator-fixed";
+               regulator-name = "VCC_1V8";
+               regulator-min-microvolt = <1800000>;
+               regulator-max-microvolt = <1800000>;
+       };
+
         reg_vcc_3v3: regulator-3v3 {
                 compatible = "regulator-fixed";
                 regulator-name = "VCC_3V3";
@@ -480,7 +487,7 @@
                 clock-names = "mclk";
                 clocks = <&audio_blk_ctrl IMX8MP_CLK_AUDIOMIX_SAI3_MCLK1>;
                 reset-gpios = <&gpio4 29 GPIO_ACTIVE_LOW>;
-               iov-supply = <&reg_vcc_3v3>;
+               iov-supply = <&reg_vcc_1v8>;
                 ldoin-supply = <&reg_vcc_3v3>;
         };
  
diff --git a/arch/arm64/boot/dts/lg/lg1312.dtsi b/arch/arm64/boot/dts/lg/lg1312.dtsi

index 48ec4ebec0a83e65bb4978e2f2ffa9cb7aba873c..b864ffa74ea8b6ff72afbd698eab4d30ad990a37 100644 (file)
--- a/arch/arm64/boot/dts/lg/lg1312.dtsi
+++ b/arch/arm64/boot/dts/lg/lg1312.dtsi
@@ -126,7 +126,6 @@
         amba {
                 #address-cells = <2>;
                 #size-cells = <1>;
-               #interrupt-cells = <3>;
  
                 compatible = "simple-bus";
                 interrupt-parent = <&gic>;
diff --git a/arch/arm64/boot/dts/lg/lg1313.dtsi b/arch/arm64/boot/dts/lg/lg1313.dtsi

index 3869460aa5dcb5da3a3fb32f8e0df6903b88862c..996fb39bb50c1f2074ddd5ac03f191091920c96b 100644 (file)
--- a/arch/arm64/boot/dts/lg/lg1313.dtsi
+++ b/arch/arm64/boot/dts/lg/lg1313.dtsi
@@ -126,7 +126,6 @@
         amba {
                 #address-cells = <2>;
                 #size-cells = <1>;
-               #interrupt-cells = <3>;
  
                 compatible = "simple-bus";
                 interrupt-parent = <&gic>;
diff --git a/arch/arm64/boot/dts/marvell/armada-ap80x.dtsi b/arch/arm64/boot/dts/marvell/armada-ap80x.dtsi

index 2c920e22cec2b52dd983f2d20812e7fe80a0c379..7ec7c789d87eff436c4f7362e417c71e2033a5b1 100644 (file)
--- a/arch/arm64/boot/dts/marvell/armada-ap80x.dtsi
+++ b/arch/arm64/boot/dts/marvell/armada-ap80x.dtsi
@@ -138,7 +138,6 @@
  
                         odmi: odmi@300000 {
                                 compatible = "marvell,odmi-controller";
-                               interrupt-controller;
                                 msi-controller;
                                 marvell,odmi-frames = <4>;
                                 reg = <0x300000 0x4000>,
diff --git a/arch/arm64/boot/dts/mediatek/mt8195-demo.dts b/arch/arm64/boot/dts/mediatek/mt8195-demo.dts

index 69c7f3954ae59a8008a257807d31f227ba1cd2a8..4127cb84eba41a39f0fbff423a43de827dbea695 100644 (file)
--- a/arch/arm64/boot/dts/mediatek/mt8195-demo.dts
+++ b/arch/arm64/boot/dts/mediatek/mt8195-demo.dts
@@ -128,6 +128,7 @@
                 compatible = "mediatek,mt6360";
                 reg = <0x34>;
                 interrupt-controller;
+               #interrupt-cells = <1>;
                 interrupts-extended = <&pio 101 IRQ_TYPE_EDGE_FALLING>;
                 interrupt-names = "IRQB";
  
diff --git a/arch/arm64/boot/dts/qcom/ipq6018.dtsi b/arch/arm64/boot/dts/qcom/ipq6018.dtsi

index 5e1277fea7250b4132039efb18f1cfaafdc5257e..61c8fd49c96678740684696397eb15118d83e1b9 100644 (file)
--- a/arch/arm64/boot/dts/qcom/ipq6018.dtsi
+++ b/arch/arm64/boot/dts/qcom/ipq6018.dtsi
@@ -830,10 +830,10 @@
  
                         #interrupt-cells = <1>;
                         interrupt-map-mask = <0 0 0 0x7>;
-                       interrupt-map = <0 0 0 1 &intc 0 75 IRQ_TYPE_LEVEL_HIGH>, /* int_a */
-                                       <0 0 0 2 &intc 0 78 IRQ_TYPE_LEVEL_HIGH>, /* int_b */
-                                       <0 0 0 3 &intc 0 79 IRQ_TYPE_LEVEL_HIGH>, /* int_c */
-                                       <0 0 0 4 &intc 0 83 IRQ_TYPE_LEVEL_HIGH>; /* int_d */
+                       interrupt-map = <0 0 0 1 &intc 0 0 0 75 IRQ_TYPE_LEVEL_HIGH>, /* int_a */
+                                       <0 0 0 2 &intc 0 0 0 78 IRQ_TYPE_LEVEL_HIGH>, /* int_b */
+                                       <0 0 0 3 &intc 0 0 0 79 IRQ_TYPE_LEVEL_HIGH>, /* int_c */
+                                       <0 0 0 4 &intc 0 0 0 83 IRQ_TYPE_LEVEL_HIGH>; /* int_d */
  
                         clocks = <&gcc GCC_SYS_NOC_PCIE0_AXI_CLK>,
                                  <&gcc GCC_PCIE0_AXI_M_CLK>,
diff --git a/arch/arm64/boot/dts/qcom/ipq8074.dtsi b/arch/arm64/boot/dts/qcom/ipq8074.dtsi

index cf295bed32998087cee60bd0ce61d0cf587d2c0a..26441447c866f6095aa26d48bb15c79f73bdd6c8 100644 (file)
--- a/arch/arm64/boot/dts/qcom/ipq8074.dtsi
+++ b/arch/arm64/boot/dts/qcom/ipq8074.dtsi
@@ -814,13 +814,13 @@
                         interrupt-names = "msi";
                         #interrupt-cells = <1>;
                         interrupt-map-mask = <0 0 0 0x7>;
-                       interrupt-map = <0 0 0 1 &intc 0 142
+                       interrupt-map = <0 0 0 1 &intc 0 0 142
                                          IRQ_TYPE_LEVEL_HIGH>, /* int_a */
-                                       <0 0 0 2 &intc 0 143
+                                       <0 0 0 2 &intc 0 0 143
                                          IRQ_TYPE_LEVEL_HIGH>, /* int_b */
-                                       <0 0 0 3 &intc 0 144
+                                       <0 0 0 3 &intc 0 0 144
                                          IRQ_TYPE_LEVEL_HIGH>, /* int_c */
-                                       <0 0 0 4 &intc 0 145
+                                       <0 0 0 4 &intc 0 0 145
                                          IRQ_TYPE_LEVEL_HIGH>; /* int_d */
  
                         clocks = <&gcc GCC_SYS_NOC_PCIE1_AXI_CLK>,
@@ -876,13 +876,13 @@
                         interrupt-names = "msi";
                         #interrupt-cells = <1>;
                         interrupt-map-mask = <0 0 0 0x7>;
-                       interrupt-map = <0 0 0 1 &intc 0 75
+                       interrupt-map = <0 0 0 1 &intc 0 0 75
                                          IRQ_TYPE_LEVEL_HIGH>, /* int_a */
-                                       <0 0 0 2 &intc 0 78
+                                       <0 0 0 2 &intc 0 0 78
                                          IRQ_TYPE_LEVEL_HIGH>, /* int_b */
-                                       <0 0 0 3 &intc 0 79
+                                       <0 0 0 3 &intc 0 0 79
                                          IRQ_TYPE_LEVEL_HIGH>, /* int_c */
-                                       <0 0 0 4 &intc 0 83
+                                       <0 0 0 4 &intc 0 0 83
                                          IRQ_TYPE_LEVEL_HIGH>; /* int_d */
  
                         clocks = <&gcc GCC_SYS_NOC_PCIE0_AXI_CLK>,
diff --git a/arch/arm64/boot/dts/renesas/ulcb-kf.dtsi b/arch/arm64/boot/dts/renesas/ulcb-kf.dtsi

index 3885ef3454ff6e92d8f0d00509d0f935e7e40fa6..50de17e4fb3f25ed0ad490d9b4e593cab2b2cc5a 100644 (file)
--- a/arch/arm64/boot/dts/renesas/ulcb-kf.dtsi
+++ b/arch/arm64/boot/dts/renesas/ulcb-kf.dtsi
@@ -234,6 +234,7 @@
                 gpio-controller;
                 #gpio-cells = <2>;
                 interrupt-controller;
+               #interrupt-cells = <2>;
                 interrupt-parent = <&gpio6>;
                 interrupts = <8 IRQ_TYPE_EDGE_FALLING>;
  
@@ -294,6 +295,7 @@
                 gpio-controller;
                 #gpio-cells = <2>;
                 interrupt-controller;
+               #interrupt-cells = <2>;
                 interrupt-parent = <&gpio6>;
                 interrupts = <4 IRQ_TYPE_EDGE_FALLING>;
         };
@@ -314,6 +316,7 @@
                 gpio-controller;
                 #gpio-cells = <2>;
                 interrupt-controller;
+               #interrupt-cells = <2>;
                 interrupt-parent = <&gpio7>;
                 interrupts = <3 IRQ_TYPE_EDGE_FALLING>;
         };
@@ -324,6 +327,7 @@
                 gpio-controller;
                 #gpio-cells = <2>;
                 interrupt-controller;
+               #interrupt-cells = <2>;
                 interrupt-parent = <&gpio5>;
                 interrupts = <9 IRQ_TYPE_EDGE_FALLING>;
         };
diff --git a/arch/arm64/boot/dts/rockchip/px30.dtsi b/arch/arm64/boot/dts/rockchip/px30.dtsi

index d0905515399bb00b8562305b7fa5cf8a0eee65b2..9137dd76e72cedb0cfbf1995032e5852cab80f96 100644 (file)
--- a/arch/arm64/boot/dts/rockchip/px30.dtsi
+++ b/arch/arm64/boot/dts/rockchip/px30.dtsi
@@ -631,6 +631,7 @@
                 clock-names = "spiclk", "apb_pclk";
                 dmas = <&dmac 12>, <&dmac 13>;
                 dma-names = "tx", "rx";
+               num-cs = <2>;
                 pinctrl-names = "default";
                 pinctrl-0 = <&spi0_clk &spi0_csn &spi0_miso &spi0_mosi>;
                 #address-cells = <1>;
@@ -646,6 +647,7 @@
                 clock-names = "spiclk", "apb_pclk";
                 dmas = <&dmac 14>, <&dmac 15>;
                 dma-names = "tx", "rx";
+               num-cs = <2>;
                 pinctrl-names = "default";
                 pinctrl-0 = <&spi1_clk &spi1_csn0 &spi1_csn1 &spi1_miso &spi1_mosi>;
                 #address-cells = <1>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3328.dtsi b/arch/arm64/boot/dts/rockchip/rk3328.dtsi

index fb5dcf6e93272180bfd60b8e251a61e61f0e9155..7b4c15c4a9c319da2e92a19ca902884a788e9514 100644 (file)
--- a/arch/arm64/boot/dts/rockchip/rk3328.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3328.dtsi
@@ -488,7 +488,6 @@
         pwm3: pwm@ff1b0030 {
                 compatible = "rockchip,rk3328-pwm";
                 reg = <0x0 0xff1b0030 0x0 0x10>;
-               interrupts = <GIC_SPI 50 IRQ_TYPE_LEVEL_HIGH>;
                 clocks = <&cru SCLK_PWM>, <&cru PCLK_PWM>;
                 clock-names = "pwm", "pclk";
                 pinctrl-names = "default";
diff --git a/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5-evb.dts b/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5-evb.dts

index d4c70835e0fe28639548ed4bf4579439a5f92308..a4946cdc3bb34ef7bc084f74ae0a4ac8424994df 100644 (file)
--- a/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5-evb.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5-evb.dts
@@ -72,7 +72,7 @@
                 vin-supply = <&vcc3v3_sys>;
         };
  
-       vcc5v0_usb30_host: vcc5v0-usb30-host-regulator {
+       vcc5v0_usb_host1: vcc5v0_usb_host2: vcc5v0-usb-host-regulator {
                 compatible = "regulator-fixed";
                 regulator-name = "vcc5v0_host";
                 regulator-boot-on;
@@ -114,6 +114,7 @@
         status = "okay";
  };
  
+/* Standard pcie */
  &pcie3x2 {
         reset-gpios = <&gpio3 RK_PB0 GPIO_ACTIVE_HIGH>;
         vpcie3v3-supply = <&vcc3v3_sys>;
@@ -122,6 +123,7 @@
  
  /* M.2 M-Key ssd */
  &pcie3x4 {
+       num-lanes = <2>;
         reset-gpios = <&gpio4 RK_PB6 GPIO_ACTIVE_HIGH>;
         vpcie3v3-supply = <&vcc3v3_sys>;
         status = "okay";
@@ -188,12 +190,12 @@
  };
  
  &u2phy2_host {
-       phy-supply = <&vcc5v0_usb30_host>;
+       phy-supply = <&vcc5v0_usb_host1>;
         status = "okay";
  };
  
  &u2phy3_host {
-       phy-supply = <&vcc5v0_usb30_host>;
+       phy-supply = <&vcc5v0_usb_host2>;
         status = "okay";
  };
  
diff --git a/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5.dtsi

index 0b02f4d6e00331d4731e60251240748b5415b660..cce1c8e835877c4341d90f2fe80da7c57dde8d0c 100644 (file)
--- a/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5.dtsi
@@ -16,8 +16,8 @@
  
         aliases {
                 mmc0 = &sdhci;
-               mmc1 = &sdio;
-               mmc2 = &sdmmc;
+               mmc1 = &sdmmc;
+               mmc2 = &sdio;
                 serial2 = &uart2;
         };
  
diff --git a/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts b/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts

index ac7c677b0fb9c3d6af9e7b8bcd399d9d24ef0b84..de30c2632b8e5fc8cc6d89272269353676b1e1a3 100644 (file)
--- a/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts
@@ -448,6 +448,7 @@
                             <&rk806_dvs2_null>, <&rk806_dvs3_null>;
                 pinctrl-names = "default";
                 spi-max-frequency = <1000000>;
+               system-power-controller;
  
                 vcc1-supply = <&vcc5v0_sys>;
                 vcc2-supply = <&vcc5v0_sys>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3588-jaguar.dts b/arch/arm64/boot/dts/rockchip/rk3588-jaguar.dts

index 4ce70fb75a307ba34fdd8ad5a72d56401de0118e..39d65002add1e11e81bb0d660fd7f5ff90e4cdf7 100644 (file)
--- a/arch/arm64/boot/dts/rockchip/rk3588-jaguar.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3588-jaguar.dts
@@ -62,7 +62,6 @@
                 compatible = "gpio-leds";
                 pinctrl-names = "default";
                 pinctrl-0 = <&led1_pin>;
-               status = "okay";
  
                 /* LED1 on PCB */
                 led-1 {
diff --git a/arch/arm64/boot/dts/rockchip/rk3588-nanopc-t6.dts b/arch/arm64/boot/dts/rockchip/rk3588-nanopc-t6.dts

index d7722772ecd8a0afb7e844ffb168fa9a7462cb03..997b516c2533c1d1fe2db05f2b9df2ad5588e278 100644 (file)
--- a/arch/arm64/boot/dts/rockchip/rk3588-nanopc-t6.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3588-nanopc-t6.dts
@@ -189,19 +189,19 @@
         cpu-supply = <&vdd_cpu_lit_s0>;
  };
  
-&cpu_b0{
+&cpu_b0 {
         cpu-supply = <&vdd_cpu_big0_s0>;
  };
  
-&cpu_b1{
+&cpu_b1 {
         cpu-supply = <&vdd_cpu_big0_s0>;
  };
  
-&cpu_b2{
+&cpu_b2 {
         cpu-supply = <&vdd_cpu_big1_s0>;
  };
  
-&cpu_b3{
+&cpu_b3 {
         cpu-supply = <&vdd_cpu_big1_s0>;
  };
  
diff --git a/arch/arm64/boot/dts/rockchip/rk3588s-coolpi-4b.dts b/arch/arm64/boot/dts/rockchip/rk3588s-coolpi-4b.dts

index ef4f058c20ff1565cb67e5c2c495f0f337ab2a1c..e037bf9db75af0402dccd26b82b50922823fe9f7 100644 (file)
--- a/arch/arm64/boot/dts/rockchip/rk3588s-coolpi-4b.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3588s-coolpi-4b.dts
@@ -19,8 +19,8 @@
  
         aliases {
                 mmc0 = &sdhci;
-               mmc1 = &sdio;
-               mmc2 = &sdmmc;
+               mmc1 = &sdmmc;
+               mmc2 = &sdio;
         };
  
         analog-sound {
diff --git a/arch/arm64/boot/dts/rockchip/rk3588s-indiedroid-nova.dts b/arch/arm64/boot/dts/rockchip/rk3588s-indiedroid-nova.dts

index dc677f29a9c7fca2359cf0d28b3ec3c9e97dda30..3c227888685192456ec7b4e9d348f187f3259063 100644 (file)
--- a/arch/arm64/boot/dts/rockchip/rk3588s-indiedroid-nova.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3588s-indiedroid-nova.dts
@@ -195,13 +195,13 @@
  
  &gpio1 {
         gpio-line-names = /* GPIO1 A0-A7 */
-                         "HEADER_27_3v3", "HEADER_28_3v3", "", "",
+                         "HEADER_27_3v3", "", "", "",
                           "HEADER_29_1v8", "", "HEADER_7_1v8", "",
                           /* GPIO1 B0-B7 */
                           "", "HEADER_31_1v8", "HEADER_33_1v8", "",
                           "HEADER_11_1v8", "HEADER_13_1v8", "", "",
                           /* GPIO1 C0-C7 */
-                         "", "", "", "",
+                         "", "HEADER_28_3v3", "", "",
                           "", "", "", "",
                           /* GPIO1 D0-D7 */
                           "", "", "", "",
@@ -225,11 +225,11 @@
  
  &gpio4 {
         gpio-line-names = /* GPIO4 A0-A7 */
-                         "", "", "HEADER_37_3v3", "HEADER_32_3v3",
-                         "HEADER_36_3v3", "", "HEADER_35_3v3", "HEADER_38_3v3",
+                         "", "", "HEADER_37_3v3", "HEADER_8_3v3",
+                         "HEADER_10_3v3", "", "HEADER_32_3v3", "HEADER_35_3v3",
                           /* GPIO4 B0-B7 */
                           "", "", "", "HEADER_40_3v3",
-                         "HEADER_8_3v3", "HEADER_10_3v3", "", "",
+                         "HEADER_38_3v3", "HEADER_36_3v3", "", "",
                           /* GPIO4 C0-C7 */
                           "", "", "", "",
                           "", "", "", "",
diff --git a/arch/arm64/crypto/aes-neonbs-glue.c b/arch/arm64/crypto/aes-neonbs-glue.c

index bac4cabef6073e5b0c652d0ed031ea7cce97c72f..467ac2f768ac2bb423b92eb797dce8bde697f259 100644 (file)
--- a/arch/arm64/crypto/aes-neonbs-glue.c
+++ b/arch/arm64/crypto/aes-neonbs-glue.c
@@ -227,8 +227,19 @@ static int ctr_encrypt(struct skcipher_request *req)
                         src += blocks * AES_BLOCK_SIZE;
                 }
                 if (nbytes && walk.nbytes == walk.total) {
+                       u8 buf[AES_BLOCK_SIZE];
+                       u8 *d = dst;
+
+                       if (unlikely(nbytes < AES_BLOCK_SIZE))
+                               src = dst = memcpy(buf + sizeof(buf) - nbytes,
+                                                  src, nbytes);
+
                         neon_aes_ctr_encrypt(dst, src, ctx->enc, ctx->key.rounds,
                                              nbytes, walk.iv);
+
+                       if (unlikely(nbytes < AES_BLOCK_SIZE))
+                               memcpy(d, dst, nbytes);
+
                         nbytes = 0;
                 }
                 kernel_neon_end();
diff --git a/arch/arm64/include/asm/alternative-macros.h b/arch/arm64/include/asm/alternative-macros.h

index 210bb43cff2c7d020fdf9dc1c8f2a99ca49415a0..d328f549b1a60a26bff884bccebe448fc0936f76 100644 (file)
--- a/arch/arm64/include/asm/alternative-macros.h
+++ b/arch/arm64/include/asm/alternative-macros.h
@@ -229,7 +229,7 @@ alternative_has_cap_likely(const unsigned long cpucap)
         if (!cpucap_is_possible(cpucap))
                 return false;
  
-       asm_volatile_goto(
+       asm goto(
         ALTERNATIVE_CB("b       %l[l_no]", %[cpucap], alt_cb_patch_nops)
         :
         : [cpucap] "i" (cpucap)
@@ -247,7 +247,7 @@ alternative_has_cap_unlikely(const unsigned long cpucap)
         if (!cpucap_is_possible(cpucap))
                 return false;
  
-       asm_volatile_goto(
+       asm goto(
         ALTERNATIVE("nop", "b   %l[l_yes]", %[cpucap])
         :
         : [cpucap] "i" (cpucap)
diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h

index b1e43f56ee461eb8f36222412057e90c42d6f6bf..6c13fd47e170c6510553ba718354d07cf057a809 100644 (file)
--- a/arch/arm64/include/asm/cpu.h
+++ b/arch/arm64/include/asm/cpu.h
@@ -56,6 +56,7 @@ struct cpuinfo_arm64 {
         u64             reg_id_aa64mmfr1;
         u64             reg_id_aa64mmfr2;
         u64             reg_id_aa64mmfr3;
+       u64             reg_id_aa64mmfr4;
         u64             reg_id_aa64pfr0;
         u64             reg_id_aa64pfr1;
         u64             reg_id_aa64zfr0;
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h

index 21c824edf8ce4a6fa1c208b7a882df4b4e31eeb7..d7c6988188d9a39623e11b8566322652a1fb2bf8 100644 (file)
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -83,7 +83,7 @@ struct arm64_ftr_bits {
   * to full-0 denotes that this field has no override
   *
   * A @mask field set to full-0 with the corresponding @val field set
- * to full-1 denotes thath this field has an invalid override.
+ * to full-1 denotes that this field has an invalid override.
   */
  struct arm64_ftr_override {
         u64             val;
@@ -363,6 +363,7 @@ struct arm64_cpu_capabilities {
                         u8 field_pos;
                         u8 field_width;
                         u8 min_field_value;
+                       u8 max_field_value;
                         u8 hwcap_type;
                         bool sign;
                         unsigned long hwcap;
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h

index 7c7493cb571f97bf98b0b4841aeb756d43990718..52f076afeb96006c42dfee6edefcf348048af96b 100644 (file)
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -61,6 +61,7 @@
  #define ARM_CPU_IMP_HISI               0x48
  #define ARM_CPU_IMP_APPLE              0x61
  #define ARM_CPU_IMP_AMPERE             0xC0
+#define ARM_CPU_IMP_MICROSOFT          0x6D
  
  #define ARM_CPU_PART_AEM_V8            0xD0F
  #define ARM_CPU_PART_FOUNDATION                0xD00
@@ -135,6 +136,8 @@
  
  #define AMPERE_CPU_PART_AMPERE1                0xAC3
  
+#define MICROSOFT_CPU_PART_AZURE_COBALT_100    0xD49 /* Based on r0p0 of ARM Neoverse N2 */
+
  #define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53)
  #define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57)
  #define MIDR_CORTEX_A72 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A72)
@@ -193,6 +196,7 @@
  #define MIDR_APPLE_M2_BLIZZARD_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD_MAX)
  #define MIDR_APPLE_M2_AVALANCHE_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE_MAX)
  #define MIDR_AMPERE1 MIDR_CPU_MODEL(ARM_CPU_IMP_AMPERE, AMPERE_CPU_PART_AMPERE1)
+#define MIDR_MICROSOFT_AZURE_COBALT_100 MIDR_CPU_MODEL(ARM_CPU_IMP_MICROSOFT, MICROSOFT_CPU_PART_AZURE_COBALT_100)
  
  /* Fujitsu Erratum 010001 affects A64FX 1.0 and 1.1, (v0r0 and v1r0) */
  #define MIDR_FUJITSU_ERRATUM_010001            MIDR_FUJITSU_A64FX
diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h

index 50e5f25d3024ced8b3c107de352a69e65b6ab7f6..b67b89c54e1c83644cfdd7c63a4807dd24b8d06d 100644 (file)
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -62,13 +62,13 @@ static inline void cpacr_restore(unsigned long cpacr)
   * When we defined the maximum SVE vector length we defined the ABI so
   * that the maximum vector length included all the reserved for future
   * expansion bits in ZCR rather than those just currently defined by
- * the architecture. While SME follows a similar pattern the fact that
- * it includes a square matrix means that any allocations that attempt
- * to cover the maximum potential vector length (such as happen with
- * the regset used for ptrace) end up being extremely large. Define
- * the much lower actual limit for use in such situations.
+ * the architecture.  Using this length to allocate worst size buffers
+ * results in excessively large allocations, and this effect is even
+ * more pronounced for SME due to ZA.  Define more suitable VLs for
+ * these situations.
   */
-#define SME_VQ_MAX     16
+#define ARCH_SVE_VQ_MAX ((ZCR_ELx_LEN_MASK >> ZCR_ELx_LEN_SHIFT) + 1)
+#define SME_VQ_MAX     ((SMCR_ELx_LEN_MASK >> SMCR_ELx_LEN_SHIFT) + 1)
  
  struct task_struct;
  
@@ -386,6 +386,7 @@ extern void sme_alloc(struct task_struct *task, bool flush);
  extern unsigned int sme_get_vl(void);
  extern int sme_set_current_vl(unsigned long arg);
  extern int sme_get_current_vl(void);
+extern void sme_suspend_exit(void);
  
  /*
   * Return how many bytes of memory are required to store the full SME
@@ -421,6 +422,7 @@ static inline int sme_max_vl(void) { return 0; }
  static inline int sme_max_virtualisable_vl(void) { return 0; }
  static inline int sme_set_current_vl(unsigned long arg) { return -EINVAL; }
  static inline int sme_get_current_vl(void) { return -EINVAL; }
+static inline void sme_suspend_exit(void) { }
  
  static inline size_t sme_state_size(struct task_struct const *task)
  {
diff --git a/arch/arm64/include/asm/jump_label.h b/arch/arm64/include/asm/jump_label.h

index 48ddc0f45d2283f2c5ba0de62f5d3231999a91f3..6aafbb7899916e631eab9241c39c1313a7c93707 100644 (file)
--- a/arch/arm64/include/asm/jump_label.h
+++ b/arch/arm64/include/asm/jump_label.h
@@ -18,7 +18,7 @@
  static __always_inline bool arch_static_branch(struct static_key * const key,
                                                const bool branch)
  {
-       asm_volatile_goto(
+       asm goto(
                 "1:     nop                                     \n\t"
                  "      .pushsection    __jump_table, \"aw\"    \n\t"
                  "      .align          3                       \n\t"
@@ -35,7 +35,7 @@ l_yes:
  static __always_inline bool arch_static_branch_jump(struct static_key * const key,
                                                     const bool branch)
  {
-       asm_volatile_goto(
+       asm goto(
                 "1:     b               %l[l_yes]               \n\t"
                  "      .pushsection    __jump_table, \"aw\"    \n\t"
                  "      .align          3                       \n\t"
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h

index 3c6f8ba1e47927cea13fd4ff7f25c46c846052cf..a1769e415d728ad9354c99ed7686a2e912e962ad 100644 (file)
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -102,9 +102,7 @@
  #define HCR_HOST_NVHE_PROTECTED_FLAGS (HCR_HOST_NVHE_FLAGS | HCR_TSC)
  #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H)
  
-#define HCRX_GUEST_FLAGS \
-       (HCRX_EL2_SMPME | HCRX_EL2_TCR2En | \
-        (cpus_have_final_cap(ARM64_HAS_MOPS) ? (HCRX_EL2_MSCEn | HCRX_EL2_MCE2) : 0))
+#define HCRX_GUEST_FLAGS (HCRX_EL2_SMPME | HCRX_EL2_TCR2En)
  #define HCRX_HOST_FLAGS (HCRX_EL2_MSCEn | HCRX_EL2_TCR2En)
  
  /* TCR_EL2 Registers bits */
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h

index b804fe832184466d68f597533858b90a6ffbc781..debc3753d2ef6d946f05c7f80c36dfdd6ddea6ac 100644 (file)
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -209,7 +209,8 @@ static inline bool vcpu_is_el2(const struct kvm_vcpu *vcpu)
  
  static inline bool __vcpu_el2_e2h_is_set(const struct kvm_cpu_context *ctxt)
  {
-       return ctxt_sys_reg(ctxt, HCR_EL2) & HCR_E2H;
+       return (!cpus_have_final_cap(ARM64_HAS_HCR_NV1) ||
+               (ctxt_sys_reg(ctxt, HCR_EL2) & HCR_E2H));
  }
  
  static inline bool vcpu_el2_e2h_is_set(const struct kvm_vcpu *vcpu)
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h

index 21c57b812569f22532bd57c7fb17af669d3eb370..6883963bbc3ac8d641c61ef8baf52048bef7c1d7 100644 (file)
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -238,9 +238,32 @@ static inline u16 kvm_mpidr_index(struct kvm_mpidr_data *data, u64 mpidr)
         return index;
  }
  
+struct kvm_sysreg_masks;
+
+enum fgt_group_id {
+       __NO_FGT_GROUP__,
+       HFGxTR_GROUP,
+       HDFGRTR_GROUP,
+       HDFGWTR_GROUP = HDFGRTR_GROUP,
+       HFGITR_GROUP,
+       HAFGRTR_GROUP,
+
+       /* Must be last */
+       __NR_FGT_GROUP_IDS__
+};
+
  struct kvm_arch {
         struct kvm_s2_mmu mmu;
  
+       /*
+        * Fine-Grained UNDEF, mimicking the FGT layout defined by the
+        * architecture. We track them globally, as we present the
+        * same feature-set to all vcpus.
+        *
+        * Index 0 is currently spare.
+        */
+       u64 fgu[__NR_FGT_GROUP_IDS__];
+
         /* Interrupt controller */
         struct vgic_dist        vgic;
  
@@ -274,6 +297,8 @@ struct kvm_arch {
  #define KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE             6
         /* Initial ID reg values loaded */
  #define KVM_ARCH_FLAG_ID_REGS_INITIALIZED              7
+       /* Fine-Grained UNDEF initialised */
+#define KVM_ARCH_FLAG_FGU_INITIALIZED                  8
         unsigned long flags;
  
         /* VM-wide vCPU feature set */
@@ -294,6 +319,9 @@ struct kvm_arch {
         /* PMCR_EL0.N value for the guest */
         u8 pmcr_n;
  
+       /* Iterator for idreg debugfs */
+       u8      idreg_debugfs_iter;
+
         /* Hypercall features firmware registers' descriptor */
         struct kvm_smccc_features smccc_feat;
         struct maple_tree smccc_filter;
@@ -312,6 +340,9 @@ struct kvm_arch {
  #define KVM_ARM_ID_REG_NUM     (IDREG_IDX(sys_reg(3, 0, 0, 7, 7)) + 1)
         u64 id_regs[KVM_ARM_ID_REG_NUM];
  
+       /* Masks for VNCR-baked sysregs */
+       struct kvm_sysreg_masks *sysreg_masks;
+
         /*
          * For an untrusted host VM, 'pkvm.handle' is used to lookup
          * the associated pKVM instance in the hypervisor.
@@ -474,6 +505,13 @@ enum vcpu_sysreg {
         NR_SYS_REGS     /* Nothing after this line! */
  };
  
+struct kvm_sysreg_masks {
+       struct {
+               u64     res0;
+               u64     res1;
+       } mask[NR_SYS_REGS - __VNCR_START__];
+};
+
  struct kvm_cpu_context {
         struct user_pt_regs regs;       /* sp = sp_el0 */
  
@@ -549,6 +587,7 @@ struct kvm_vcpu_arch {
  
         /* Values of trap registers for the guest. */
         u64 hcr_el2;
+       u64 hcrx_el2;
         u64 mdcr_el2;
         u64 cptr_el2;
  
@@ -868,7 +907,15 @@ static inline u64 *__ctxt_sys_reg(const struct kvm_cpu_context *ctxt, int r)
  
  #define ctxt_sys_reg(c,r)      (*__ctxt_sys_reg(c,r))
  
-#define __vcpu_sys_reg(v,r)    (ctxt_sys_reg(&(v)->arch.ctxt, (r)))
+u64 kvm_vcpu_sanitise_vncr_reg(const struct kvm_vcpu *, enum vcpu_sysreg);
+#define __vcpu_sys_reg(v,r)                                            \
+       (*({                                                            \
+               const struct kvm_cpu_context *ctxt = &(v)->arch.ctxt;   \
+               u64 *__r = __ctxt_sys_reg(ctxt, (r));                   \
+               if (vcpu_has_nv((v)) && (r) >= __VNCR_START__)          \
+                       *__r = kvm_vcpu_sanitise_vncr_reg((v), (r));    \
+               __r;                                                    \
+       }))
  
  u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg);
  void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg);
@@ -1055,14 +1102,20 @@ int kvm_handle_cp15_64(struct kvm_vcpu *vcpu);
  int kvm_handle_sys_reg(struct kvm_vcpu *vcpu);
  int kvm_handle_cp10_id(struct kvm_vcpu *vcpu);
  
+void kvm_sys_regs_create_debugfs(struct kvm *kvm);
  void kvm_reset_sys_regs(struct kvm_vcpu *vcpu);
  
  int __init kvm_sys_reg_table_init(void);
+struct sys_reg_desc;
+int __init populate_sysreg_config(const struct sys_reg_desc *sr,
+                                 unsigned int idx);
  int __init populate_nv_trap_config(void);
  
  bool lock_all_vcpus(struct kvm *kvm);
  void unlock_all_vcpus(struct kvm *kvm);
  
+void kvm_init_sysreg(struct kvm_vcpu *);
+
  /* MMIO helpers */
  void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data);
  unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len);
@@ -1233,4 +1286,48 @@ static inline void kvm_hyp_reserve(void) { }
  void kvm_arm_vcpu_power_off(struct kvm_vcpu *vcpu);
  bool kvm_arm_vcpu_stopped(struct kvm_vcpu *vcpu);
  
+#define __expand_field_sign_unsigned(id, fld, val)                     \
+       ((u64)SYS_FIELD_VALUE(id, fld, val))
+
+#define __expand_field_sign_signed(id, fld, val)                       \
+       ({                                                              \
+               u64 __val = SYS_FIELD_VALUE(id, fld, val);              \
+               sign_extend64(__val, id##_##fld##_WIDTH - 1);           \
+       })
+
+#define expand_field_sign(id, fld, val)                                        \
+       (id##_##fld##_SIGNED ?                                          \
+        __expand_field_sign_signed(id, fld, val) :                     \
+        __expand_field_sign_unsigned(id, fld, val))
+
+#define get_idreg_field_unsigned(kvm, id, fld)                         \
+       ({                                                              \
+               u64 __val = IDREG((kvm), SYS_##id);                     \
+               FIELD_GET(id##_##fld##_MASK, __val);                    \
+       })
+
+#define get_idreg_field_signed(kvm, id, fld)                           \
+       ({                                                              \
+               u64 __val = get_idreg_field_unsigned(kvm, id, fld);     \
+               sign_extend64(__val, id##_##fld##_WIDTH - 1);           \
+       })
+
+#define get_idreg_field_enum(kvm, id, fld)                             \
+       get_idreg_field_unsigned(kvm, id, fld)
+
+#define get_idreg_field(kvm, id, fld)                                  \
+       (id##_##fld##_SIGNED ?                                          \
+        get_idreg_field_signed(kvm, id, fld) :                         \
+        get_idreg_field_unsigned(kvm, id, fld))
+
+#define kvm_has_feat(kvm, id, fld, limit)                              \
+       (get_idreg_field((kvm), id, fld) >= expand_field_sign(id, fld, limit))
+
+#define kvm_has_feat_enum(kvm, id, fld, val)                           \
+       (get_idreg_field_unsigned((kvm), id, fld) == __expand_field_sign_unsigned(id, fld, val))
+
+#define kvm_has_feat_range(kvm, id, fld, min, max)                     \
+       (get_idreg_field((kvm), id, fld) >= expand_field_sign(id, fld, min) && \
+        get_idreg_field((kvm), id, fld) <= expand_field_sign(id, fld, max))
+
  #endif /* __ARM64_KVM_HOST_H__ */
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h

index 145ce73fc16c950ee41a2db31d29186bdf9c0149..3e2a1ac0c9bb81629b4599b95ddd9a708c4d65f5 100644 (file)
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -70,7 +70,7 @@ DECLARE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
  /*
   * Without an __arch_swab32(), we fall back to ___constant_swab32(), but the
   * static inline can allow the compiler to out-of-line this. KVM always wants
- * the macro version as its always inlined.
+ * the macro version as it's always inlined.
   */
  #define __kvm_swab32(x)        ___constant_swab32(x)
  
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h

index e3e793d0ec30413491839f9e060bd9e27deff227..d5e48d870461baded85222fe3f55067376b4a19d 100644 (file)
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -53,27 +53,6 @@
  
  #include <asm/alternative.h>
  
-/*
- * Convert a kernel VA into a HYP VA.
- * reg: VA to be converted.
- *
- * The actual code generation takes place in kvm_update_va_mask, and
- * the instructions below are only there to reserve the space and
- * perform the register allocation (kvm_update_va_mask uses the
- * specific registers encoded in the instructions).
- */
-.macro kern_hyp_va     reg
-#ifndef __KVM_VHE_HYPERVISOR__
-alternative_cb ARM64_ALWAYS_SYSTEM, kvm_update_va_mask
-       and     \reg, \reg, #1          /* mask with va_mask */
-       ror     \reg, \reg, #1          /* rotate to the first tag bit */
-       add     \reg, \reg, #0          /* insert the low 12 bits of the tag */
-       add     \reg, \reg, #0, lsl 12  /* insert the top 12 bits of the tag */
-       ror     \reg, \reg, #63         /* rotate back */
-alternative_cb_end
-#endif
-.endm
-
  /*
   * Convert a hypervisor VA to a PA
   * reg: hypervisor address to be converted in place
@@ -127,14 +106,29 @@ void kvm_apply_hyp_relocations(void);
  
  #define __hyp_pa(x) (((phys_addr_t)(x)) + hyp_physvirt_offset)
  
+/*
+ * Convert a kernel VA into a HYP VA.
+ *
+ * Can be called from hyp or non-hyp context.
+ *
+ * The actual code generation takes place in kvm_update_va_mask(), and
+ * the instructions below are only there to reserve the space and
+ * perform the register allocation (kvm_update_va_mask() uses the
+ * specific registers encoded in the instructions).
+ */
  static __always_inline unsigned long __kern_hyp_va(unsigned long v)
  {
+/*
+ * This #ifndef is an optimisation for when this is called from VHE hyp
+ * context.  When called from a VHE non-hyp context, kvm_update_va_mask() will
+ * replace the instructions with `nop`s.
+ */
  #ifndef __KVM_VHE_HYPERVISOR__
-       asm volatile(ALTERNATIVE_CB("and %0, %0, #1\n"
-                                   "ror %0, %0, #1\n"
-                                   "add %0, %0, #0\n"
-                                   "add %0, %0, #0, lsl 12\n"
-                                   "ror %0, %0, #63\n",
+       asm volatile(ALTERNATIVE_CB("and %0, %0, #1\n"         /* mask with va_mask */
+                                   "ror %0, %0, #1\n"         /* rotate to the first tag bit */
+                                   "add %0, %0, #0\n"         /* insert the low 12 bits of the tag */
+                                   "add %0, %0, #0, lsl 12\n" /* insert the top 12 bits of the tag */
+                                   "ror %0, %0, #63\n",       /* rotate back */
                                     ARM64_ALWAYS_SYSTEM,
                                     kvm_update_va_mask)
                      : "+r" (v));
diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h

index 4882905357f43b6b5146f5d4f38d13b100f8dbb8..c77d795556e130d0c2ae05be272043e65d1acddb 100644 (file)
--- a/arch/arm64/include/asm/kvm_nested.h
+++ b/arch/arm64/include/asm/kvm_nested.h
@@ -60,7 +60,6 @@ static inline u64 translate_ttbr0_el2_to_ttbr0_el1(u64 ttbr0)
         return ttbr0 & ~GENMASK_ULL(63, 48);
  }
  
-extern bool __check_nv_sr_forward(struct kvm_vcpu *vcpu);
  
  int kvm_init_nv_sysregs(struct kvm *kvm);
  
diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h

index cfdf40f734b12264ea9b4227839ffa191d512e33..19278dfe79782561013d4c2b03e9a866615479b0 100644 (file)
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -197,6 +197,7 @@ enum kvm_pgtable_stage2_flags {
   * @KVM_PGTABLE_PROT_W:                Write permission.
   * @KVM_PGTABLE_PROT_R:                Read permission.
   * @KVM_PGTABLE_PROT_DEVICE:   Device attributes.
+ * @KVM_PGTABLE_PROT_NORMAL_NC:        Normal noncacheable attributes.
   * @KVM_PGTABLE_PROT_SW0:      Software bit 0.
   * @KVM_PGTABLE_PROT_SW1:      Software bit 1.
   * @KVM_PGTABLE_PROT_SW2:      Software bit 2.
@@ -208,6 +209,7 @@ enum kvm_pgtable_prot {
         KVM_PGTABLE_PROT_R                      = BIT(2),
  
         KVM_PGTABLE_PROT_DEVICE                 = BIT(3),
+       KVM_PGTABLE_PROT_NORMAL_NC              = BIT(4),
  
         KVM_PGTABLE_PROT_SW0                    = BIT(55),
         KVM_PGTABLE_PROT_SW1                    = BIT(56),
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h

index d82305ab420f74cb56ce65a373a7380fb6321c37..449ca2ff1df6073181469dd0d796b3cf247eef0e 100644 (file)
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -173,6 +173,7 @@
   * Memory types for Stage-2 translation
   */
  #define MT_S2_NORMAL           0xf
+#define MT_S2_NORMAL_NC                0x5
  #define MT_S2_DEVICE_nGnRE     0x1
  
  /*
@@ -180,6 +181,7 @@
   * Stage-2 enforces Normal-WB and Device-nGnRE
   */
  #define MT_S2_FWB_NORMAL       6
+#define MT_S2_FWB_NORMAL_NC    5
  #define MT_S2_FWB_DEVICE_nGnRE 1
  
  #ifdef CONFIG_ARM64_4K_PAGES
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h

index c3b19b376c86722c668a208b0be6cd122de17851..9e8999592f3af5068b27717f137337e5af500165 100644 (file)
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -1181,6 +1181,8 @@
         par;                                                            \
  })
  
+#define SYS_FIELD_VALUE(reg, field, val)       reg##_##field##_##val
+
  #define SYS_FIELD_GET(reg, field, val)         \
                  FIELD_GET(reg##_##field##_MASK, val)
  
@@ -1188,7 +1190,8 @@
                  FIELD_PREP(reg##_##field##_MASK, val)
  
  #define SYS_FIELD_PREP_ENUM(reg, field, val)           \
-                FIELD_PREP(reg##_##field##_MASK, reg##_##field##_##val)
+                FIELD_PREP(reg##_##field##_MASK,       \
+                           SYS_FIELD_VALUE(reg, field, val))
  
  #endif
  
diff --git a/arch/arm64/include/asm/vdso.h b/arch/arm64/include/asm/vdso.h

index b4ae3210993273e8fd709b8f4d17a081bf39ff3d..4305995c8f82f416e6ce11280ac1dd19fbe25eec 100644 (file)
--- a/arch/arm64/include/asm/vdso.h
+++ b/arch/arm64/include/asm/vdso.h
@@ -17,9 +17,6 @@
  #ifndef __ASSEMBLY__
  
  #include <generated/vdso-offsets.h>
-#ifdef CONFIG_COMPAT_VDSO
-#include <generated/vdso32-offsets.h>
-#endif
  
  #define VDSO_SYMBOL(base, name)                                                   \
  ({                                                                        \
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h

index 89d2fc872d9f5e63dce2e2a74dfb422c9e255030..964df31da9751c96c984358c66d6f73c8519b2e7 100644 (file)
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -37,9 +37,7 @@
  #include <asm/ptrace.h>
  #include <asm/sve_context.h>
  
-#define __KVM_HAVE_GUEST_DEBUG
  #define __KVM_HAVE_IRQ_LINE
-#define __KVM_HAVE_READONLY_MEM
  #define __KVM_HAVE_VCPU_EVENTS
  
  #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
@@ -76,11 +74,11 @@ struct kvm_regs {
  
  /* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */
  #define KVM_ARM_DEVICE_TYPE_SHIFT      0
-#define KVM_ARM_DEVICE_TYPE_MASK       GENMASK(KVM_ARM_DEVICE_TYPE_SHIFT + 15, \
-                                               KVM_ARM_DEVICE_TYPE_SHIFT)
+#define KVM_ARM_DEVICE_TYPE_MASK       __GENMASK(KVM_ARM_DEVICE_TYPE_SHIFT + 15, \
+                                                 KVM_ARM_DEVICE_TYPE_SHIFT)
  #define KVM_ARM_DEVICE_ID_SHIFT                16
-#define KVM_ARM_DEVICE_ID_MASK         GENMASK(KVM_ARM_DEVICE_ID_SHIFT + 15, \
-                                               KVM_ARM_DEVICE_ID_SHIFT)
+#define KVM_ARM_DEVICE_ID_MASK         __GENMASK(KVM_ARM_DEVICE_ID_SHIFT + 15, \
+                                                 KVM_ARM_DEVICE_ID_SHIFT)
  
  /* Supported device IDs */
  #define KVM_ARM_DEVICE_VGIC_V2         0
@@ -162,6 +160,11 @@ struct kvm_sync_regs {
         __u64 device_irq_level;
  };
  
+/* Bits for run->s.regs.device_irq_level */
+#define KVM_ARM_DEV_EL1_VTIMER         (1 << 0)
+#define KVM_ARM_DEV_EL1_PTIMER         (1 << 1)
+#define KVM_ARM_DEV_PMU                        (1 << 2)
+
  /*
   * PMU filter structure. Describe a range of events with a particular
   * action. To be used with KVM_ARM_VCPU_PMU_V3_FILTER.
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile

index e5d03a7039b4bf9cce893b1ea39712eef3e2f4ad..467cb711727309eb991df38ece1af46b858e6178 100644 (file)
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -77,9 +77,9 @@ obj-$(CONFIG_UNWIND_PATCH_PAC_INTO_SCS)       += patch-scs.o
  # We need to prevent the SCS patching code from patching itself. Using
  # -mbranch-protection=none here to avoid the patchable PAC opcodes from being
  # generated triggers an issue with full LTO on Clang, which stops emitting PAC
-# instructions altogether. So instead, omit the unwind tables used by the
-# patching code, so it will not be able to locate its own PAC instructions.
-CFLAGS_patch-scs.o                     += -fno-asynchronous-unwind-tables -fno-unwind-tables
+# instructions altogether. So disable LTO as well for the compilation unit.
+CFLAGS_patch-scs.o                     += -mbranch-protection=none
+CFLAGS_REMOVE_patch-scs.o              += $(CC_FLAGS_LTO)
  
  # Force dependency (vdso*-wrap.S includes vdso.so through incbin)
  $(obj)/vdso-wrap.o: $(obj)/vdso/vdso.so
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c

index 967c7c7a4e7db3db7e3d05a7637e8e7d13e0d273..76b8dd37092ad2a9dd6e59a92d1c1fab887589da 100644 (file)
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -374,6 +374,7 @@ static const struct midr_range erratum_1463225[] = {
  static const struct midr_range trbe_overwrite_fill_mode_cpus[] = {
  #ifdef CONFIG_ARM64_ERRATUM_2139208
         MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2),
+       MIDR_ALL_VERSIONS(MIDR_MICROSOFT_AZURE_COBALT_100),
  #endif
  #ifdef CONFIG_ARM64_ERRATUM_2119858
         MIDR_ALL_VERSIONS(MIDR_CORTEX_A710),
@@ -387,6 +388,7 @@ static const struct midr_range trbe_overwrite_fill_mode_cpus[] = {
  static const struct midr_range tsb_flush_fail_cpus[] = {
  #ifdef CONFIG_ARM64_ERRATUM_2067961
         MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2),
+       MIDR_ALL_VERSIONS(MIDR_MICROSOFT_AZURE_COBALT_100),
  #endif
  #ifdef CONFIG_ARM64_ERRATUM_2054223
         MIDR_ALL_VERSIONS(MIDR_CORTEX_A710),
@@ -399,6 +401,7 @@ static const struct midr_range tsb_flush_fail_cpus[] = {
  static struct midr_range trbe_write_out_of_range_cpus[] = {
  #ifdef CONFIG_ARM64_ERRATUM_2253138
         MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2),
+       MIDR_ALL_VERSIONS(MIDR_MICROSOFT_AZURE_COBALT_100),
  #endif
  #ifdef CONFIG_ARM64_ERRATUM_2224489
         MIDR_ALL_VERSIONS(MIDR_CORTEX_A710),
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c

index 8d1a634a403eed6e13a210331a8f25133354ca28..f309fd542c20bd60c8747e9e41188d41573fe985 100644 (file)
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -140,12 +140,42 @@ void dump_cpu_features(void)
         pr_emerg("0x%*pb\n", ARM64_NCAPS, &system_cpucaps);
  }
  
+#define __ARM64_MAX_POSITIVE(reg, field)                               \
+               ((reg##_##field##_SIGNED ?                              \
+                 BIT(reg##_##field##_WIDTH - 1) :                      \
+                 BIT(reg##_##field##_WIDTH)) - 1)
+
+#define __ARM64_MIN_NEGATIVE(reg, field)  BIT(reg##_##field##_WIDTH - 1)
+
+#define __ARM64_CPUID_FIELDS(reg, field, min_value, max_value)         \
+               .sys_reg = SYS_##reg,                                   \
+               .field_pos = reg##_##field##_SHIFT,                     \
+               .field_width = reg##_##field##_WIDTH,                   \
+               .sign = reg##_##field##_SIGNED,                         \
+               .min_field_value = min_value,                           \
+               .max_field_value = max_value,
+
+/*
+ * ARM64_CPUID_FIELDS() encodes a field with a range from min_value to
+ * an implicit maximum that depends on the sign-ess of the field.
+ *
+ * An unsigned field will be capped at all ones, while a signed field
+ * will be limited to the positive half only.
+ */
  #define ARM64_CPUID_FIELDS(reg, field, min_value)                      \
-               .sys_reg = SYS_##reg,                                                   \
-               .field_pos = reg##_##field##_SHIFT,                                             \
-               .field_width = reg##_##field##_WIDTH,                                           \
-               .sign = reg##_##field##_SIGNED,                                                 \
-               .min_field_value = reg##_##field##_##min_value,
+       __ARM64_CPUID_FIELDS(reg, field,                                \
+                            SYS_FIELD_VALUE(reg, field, min_value),    \
+                            __ARM64_MAX_POSITIVE(reg, field))
+
+/*
+ * ARM64_CPUID_FIELDS_NEG() encodes a field with a range from an
+ * implicit minimal value to max_value. This should be used when
+ * matching a non-implemented property.
+ */
+#define ARM64_CPUID_FIELDS_NEG(reg, field, max_value)                  \
+       __ARM64_CPUID_FIELDS(reg, field,                                \
+                            __ARM64_MIN_NEGATIVE(reg, field),          \
+                            SYS_FIELD_VALUE(reg, field, max_value))
  
  #define __ARM64_FTR_BITS(SIGNED, VISIBLE, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \
         {                                               \
@@ -407,6 +437,11 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr3[] = {
         ARM64_FTR_END,
  };
  
+static const struct arm64_ftr_bits ftr_id_aa64mmfr4[] = {
+       S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR4_EL1_E2H0_SHIFT, 4, 0),
+       ARM64_FTR_END,
+};
+
  static const struct arm64_ftr_bits ftr_ctr[] = {
         ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, 31, 1, 1), /* RES1 */
         ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_EL0_DIC_SHIFT, 1, 1),
@@ -724,6 +759,7 @@ static const struct __ftr_reg_entry {
                                &id_aa64mmfr1_override),
         ARM64_FTR_REG(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2),
         ARM64_FTR_REG(SYS_ID_AA64MMFR3_EL1, ftr_id_aa64mmfr3),
+       ARM64_FTR_REG(SYS_ID_AA64MMFR4_EL1, ftr_id_aa64mmfr4),
  
         /* Op1 = 1, CRn = 0, CRm = 0 */
         ARM64_FTR_REG(SYS_GMID_EL1, ftr_gmid),
@@ -919,7 +955,8 @@ static void init_cpu_ftr_reg(u32 sys_reg, u64 new)
                                 pr_warn("%s[%d:%d]: %s to %llx\n",
                                         reg->name,
                                         ftrp->shift + ftrp->width - 1,
-                                       ftrp->shift, str, tmp);
+                                       ftrp->shift, str,
+                                       tmp & (BIT(ftrp->width) - 1));
                 } else if ((ftr_mask & reg->override->val) == ftr_mask) {
                         reg->override->val &= ~ftr_mask;
                         pr_warn("%s[%d:%d]: impossible override, ignored\n",
@@ -1047,6 +1084,7 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info)
         init_cpu_ftr_reg(SYS_ID_AA64MMFR1_EL1, info->reg_id_aa64mmfr1);
         init_cpu_ftr_reg(SYS_ID_AA64MMFR2_EL1, info->reg_id_aa64mmfr2);
         init_cpu_ftr_reg(SYS_ID_AA64MMFR3_EL1, info->reg_id_aa64mmfr3);
+       init_cpu_ftr_reg(SYS_ID_AA64MMFR4_EL1, info->reg_id_aa64mmfr4);
         init_cpu_ftr_reg(SYS_ID_AA64PFR0_EL1, info->reg_id_aa64pfr0);
         init_cpu_ftr_reg(SYS_ID_AA64PFR1_EL1, info->reg_id_aa64pfr1);
         init_cpu_ftr_reg(SYS_ID_AA64ZFR0_EL1, info->reg_id_aa64zfr0);
@@ -1418,6 +1456,7 @@ u64 __read_sysreg_by_encoding(u32 sys_id)
         read_sysreg_case(SYS_ID_AA64MMFR1_EL1);
         read_sysreg_case(SYS_ID_AA64MMFR2_EL1);
         read_sysreg_case(SYS_ID_AA64MMFR3_EL1);
+       read_sysreg_case(SYS_ID_AA64MMFR4_EL1);
         read_sysreg_case(SYS_ID_AA64ISAR0_EL1);
         read_sysreg_case(SYS_ID_AA64ISAR1_EL1);
         read_sysreg_case(SYS_ID_AA64ISAR2_EL1);
@@ -1451,11 +1490,28 @@ has_always(const struct arm64_cpu_capabilities *entry, int scope)
  static bool
  feature_matches(u64 reg, const struct arm64_cpu_capabilities *entry)
  {
-       int val = cpuid_feature_extract_field_width(reg, entry->field_pos,
-                                                   entry->field_width,
-                                                   entry->sign);
+       int val, min, max;
+       u64 tmp;
+
+       val = cpuid_feature_extract_field_width(reg, entry->field_pos,
+                                               entry->field_width,
+                                               entry->sign);
+
+       tmp = entry->min_field_value;
+       tmp <<= entry->field_pos;
  
-       return val >= entry->min_field_value;
+       min = cpuid_feature_extract_field_width(tmp, entry->field_pos,
+                                               entry->field_width,
+                                               entry->sign);
+
+       tmp = entry->max_field_value;
+       tmp <<= entry->field_pos;
+
+       max = cpuid_feature_extract_field_width(tmp, entry->field_pos,
+                                               entry->field_width,
+                                               entry->sign);
+
+       return val >= min && val <= max;
  }
  
  static u64
@@ -1739,6 +1795,28 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry,
         return !meltdown_safe;
  }
  
+static bool has_nv1(const struct arm64_cpu_capabilities *entry, int scope)
+{
+       /*
+        * Although the Apple M2 family appears to support NV1, the
+        * PTW barfs on the nVHE EL2 S1 page table format. Pretend
+        * that it doesn't support NV1 at all.
+        */
+       static const struct midr_range nv1_ni_list[] = {
+               MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD),
+               MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE),
+               MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD_PRO),
+               MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE_PRO),
+               MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD_MAX),
+               MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE_MAX),
+               {}
+       };
+
+       return (__system_matches_cap(ARM64_HAS_NESTED_VIRT) &&
+               !(has_cpuid_feature(entry, scope) ||
+                 is_midr_in_range_list(read_cpuid_id(), nv1_ni_list)));
+}
+
  #if defined(ID_AA64MMFR0_EL1_TGRAN_LPA2) && defined(ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_LPA2)
  static bool has_lpa2_at_stage1(u64 mmfr0)
  {
@@ -2739,6 +2817,13 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
                 .type = ARM64_CPUCAP_SYSTEM_FEATURE,
                 .matches = has_lpa2,
         },
+       {
+               .desc = "NV1",
+               .capability = ARM64_HAS_HCR_NV1,
+               .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+               .matches = has_nv1,
+               ARM64_CPUID_FIELDS_NEG(ID_AA64MMFR4_EL1, E2H0, NI_NV1)
+       },
         {},
  };
  
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c

index 47043c0d95ec3d5d1c4bcfba9766b57a8aca5266..7ca3fbd200f0f3ec1043e41226e326aaaec390f4 100644 (file)
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -447,6 +447,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
         info->reg_id_aa64mmfr1 = read_cpuid(ID_AA64MMFR1_EL1);
         info->reg_id_aa64mmfr2 = read_cpuid(ID_AA64MMFR2_EL1);
         info->reg_id_aa64mmfr3 = read_cpuid(ID_AA64MMFR3_EL1);
+       info->reg_id_aa64mmfr4 = read_cpuid(ID_AA64MMFR4_EL1);
         info->reg_id_aa64pfr0 = read_cpuid(ID_AA64PFR0_EL1);
         info->reg_id_aa64pfr1 = read_cpuid(ID_AA64PFR1_EL1);
         info->reg_id_aa64zfr0 = read_cpuid(ID_AA64ZFR0_EL1);
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c

index a5dc6f764195847251dc25c196304cbef44d8850..f27acca550d5539d00d958d441ca8631c8dba8d4 100644 (file)
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -1311,6 +1311,22 @@ void __init sme_setup(void)
                 get_sme_default_vl());
  }
  
+void sme_suspend_exit(void)
+{
+       u64 smcr = 0;
+
+       if (!system_supports_sme())
+               return;
+
+       if (system_supports_fa64())
+               smcr |= SMCR_ELx_FA64;
+       if (system_supports_sme2())
+               smcr |= SMCR_ELx_EZT0;
+
+       write_sysreg_s(smcr, SYS_SMCR_EL1);
+       write_sysreg_s(0, SYS_SMPRI_EL1);
+}
+
  #endif /* CONFIG_ARM64_SME */
  
  static void sve_init_regs(void)
@@ -1635,7 +1651,7 @@ void fpsimd_preserve_current_state(void)
  void fpsimd_signal_preserve_current_state(void)
  {
         fpsimd_preserve_current_state();
-       if (test_thread_flag(TIF_SVE))
+       if (current->thread.fp_type == FP_STATE_SVE)
                 sve_to_fpsimd(current);
  }
  
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S

index cab7f91949d8f58e9565dae545e1f0a1056ea14b..5bdafbcff00973657bbc0ec362768b77919f459e 100644 (file)
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -584,25 +584,32 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL)
         mov_q   x1, INIT_SCTLR_EL1_MMU_OFF
  
         /*
-        * Fruity CPUs seem to have HCR_EL2.E2H set to RES1,
-        * making it impossible to start in nVHE mode. Is that
-        * compliant with the architecture? Absolutely not!
+        * Compliant CPUs advertise their VHE-onlyness with
+        * ID_AA64MMFR4_EL1.E2H0 < 0. HCR_EL2.E2H can be
+        * RES1 in that case.
+        *
+        * Fruity CPUs seem to have HCR_EL2.E2H set to RES1, but
+        * don't advertise it (they predate this relaxation).
          */
+       mrs_s   x0, SYS_ID_AA64MMFR4_EL1
+       ubfx    x0, x0, #ID_AA64MMFR4_EL1_E2H0_SHIFT, #ID_AA64MMFR4_EL1_E2H0_WIDTH
+       tbnz    x0, #(ID_AA64MMFR4_EL1_E2H0_SHIFT + ID_AA64MMFR4_EL1_E2H0_WIDTH - 1), 1f
+
         mrs     x0, hcr_el2
         and     x0, x0, #HCR_E2H
-       cbz     x0, 1f
-
+       cbz     x0, 2f
+1:
         /* Set a sane SCTLR_EL1, the VHE way */
         pre_disable_mmu_workaround
         msr_s   SYS_SCTLR_EL12, x1
         mov     x2, #BOOT_CPU_FLAG_E2H
-       b       2f
+       b       3f
  
-1:
+2:
         pre_disable_mmu_workaround
         msr     sctlr_el1, x1
         mov     x2, xzr
-2:
+3:
         __init_el2_nvhe_prepare_eret
  
         mov     w0, #BOOT_CPU_MODE_EL2
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c

index dc6cf0e37194e428519d7d58524ad0f624f4bebb..e3bef38fc2e2d36b85a9a4069729e276f9368846 100644 (file)
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -1500,7 +1500,8 @@ static const struct user_regset aarch64_regsets[] = {
  #ifdef CONFIG_ARM64_SVE
         [REGSET_SVE] = { /* Scalable Vector Extension */
                 .core_note_type = NT_ARM_SVE,
-               .n = DIV_ROUND_UP(SVE_PT_SIZE(SVE_VQ_MAX, SVE_PT_REGS_SVE),
+               .n = DIV_ROUND_UP(SVE_PT_SIZE(ARCH_SVE_VQ_MAX,
+                                             SVE_PT_REGS_SVE),
                                   SVE_VQ_BYTES),
                 .size = SVE_VQ_BYTES,
                 .align = SVE_VQ_BYTES,
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c

index 0e8beb3349ea2a1aae6340f879f78dd568e04f51..425b1bc17a3f6dc3237e81fabcb35d774f1aa9d1 100644 (file)
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -242,7 +242,7 @@ static int preserve_sve_context(struct sve_context __user *ctx)
                 vl = task_get_sme_vl(current);
                 vq = sve_vq_from_vl(vl);
                 flags |= SVE_SIG_FLAG_SM;
-       } else if (test_thread_flag(TIF_SVE)) {
+       } else if (current->thread.fp_type == FP_STATE_SVE) {
                 vq = sve_vq_from_vl(vl);
         }
  
@@ -878,7 +878,7 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user,
         if (system_supports_sve() || system_supports_sme()) {
                 unsigned int vq = 0;
  
-               if (add_all || test_thread_flag(TIF_SVE) ||
+               if (add_all || current->thread.fp_type == FP_STATE_SVE ||
                     thread_sm_enabled(&current->thread)) {
                         int vl = max(sve_max_vl(), sme_max_vl());
  
diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c

index eca4d043521183adc7263da95cf656323a4cc73a..eaaff94329cddb8d1fb8d1523395453f3501c9a5 100644 (file)
--- a/arch/arm64/kernel/suspend.c
+++ b/arch/arm64/kernel/suspend.c
@@ -12,6 +12,7 @@
  #include <asm/daifflags.h>
  #include <asm/debug-monitors.h>
  #include <asm/exec.h>
+#include <asm/fpsimd.h>
  #include <asm/mte.h>
  #include <asm/memory.h>
  #include <asm/mmu_context.h>
@@ -80,6 +81,8 @@ void notrace __cpu_suspend_exit(void)
          */
         spectre_v4_enable_mitigation(NULL);
  
+       sme_suspend_exit();
+
         /* Restore additional feature-specific configuration */
         ptrauth_suspend_exit();
  }
diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile

index 2266fcdff78a0740fcd72a5c8125d17938d88df4..f5f80fdce0fe7aa2ab3b14ce931999b954312162 100644 (file)
--- a/arch/arm64/kernel/vdso32/Makefile
+++ b/arch/arm64/kernel/vdso32/Makefile
@@ -127,9 +127,6 @@ obj-vdso := $(c-obj-vdso) $(c-obj-vdso-gettimeofday) $(asm-obj-vdso)
  targets += vdso.lds
  CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
  
-include/generated/vdso32-offsets.h: $(obj)/vdso32.so.dbg FORCE
-       $(call if_changed,vdsosym)
-
  # Strip rule for vdso.so
  $(obj)/vdso.so: OBJCOPYFLAGS := -S
  $(obj)/vdso.so: $(obj)/vdso32.so.dbg FORCE
@@ -166,9 +163,3 @@ quiet_cmd_vdsoas = AS32    $@
  
  quiet_cmd_vdsomunge = MUNGE   $@
        cmd_vdsomunge = $(obj)/$(munge) $< $@
-
-# Generate vDSO offsets using helper script (borrowed from the 64-bit vDSO)
-gen-vdsosym := $(srctree)/$(src)/../vdso/gen_vdso_offsets.sh
-quiet_cmd_vdsosym = VDSOSYM $@
-# The AArch64 nm should be able to read an AArch32 binary
-      cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig

index 6c3c8ca73e7fda8bb29792218bb11d031e7527ff..937f15b7d8c3034dd97dc1ed7230b8cefdaa34a0 100644 (file)
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -3,7 +3,6 @@
  # KVM configuration
  #
  
-source "virt/lib/Kconfig"
  source "virt/kvm/Kconfig"
  
  menuconfig VIRTUALIZATION
@@ -20,7 +19,6 @@ if VIRTUALIZATION
  
  menuconfig KVM
         bool "Kernel-based Virtual Machine (KVM) support"
-       depends on HAVE_KVM
         select KVM_COMMON
         select KVM_GENERIC_HARDWARE_ENABLING
         select KVM_GENERIC_MMU_NOTIFIER
@@ -34,12 +32,11 @@ menuconfig KVM
         select HAVE_KVM_MSI
         select HAVE_KVM_IRQCHIP
         select HAVE_KVM_IRQ_ROUTING
-       select IRQ_BYPASS_MANAGER
         select HAVE_KVM_IRQ_BYPASS
+       select HAVE_KVM_READONLY_MEM
         select HAVE_KVM_VCPU_RUN_PID_CHANGE
         select SCHED_INFO
         select GUEST_PERF_EVENTS if PERF_EVENTS
-       select XARRAY_MULTI
         help
           Support hosting virtualized guest machines.
  
@@ -68,4 +65,15 @@ config PROTECTED_NVHE_STACKTRACE
  
           If unsure, or not using protected nVHE (pKVM), say N.
  
+config KVM_ARM64_RES_BITS_PARANOIA
+       bool "Build-time check of RES0/RES1 bits"
+       depends on KVM
+       default n
+       help
+         Say Y here to validate that KVM's knowledge of most system
+         registers' RES0/RES1 bits matches when the rest of the kernel
+         defines. Expect the build to fail badly if you enable this.
+
+         Just say N.
+
  endif # VIRTUALIZATION
diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c

index 9dec8c419bf4028e11350f5bab3cc079cdaf67b7..879982b1cc739eaed31ee503a76c01e83d42a5af 100644 (file)
--- a/arch/arm64/kvm/arch_timer.c
+++ b/arch/arm64/kvm/arch_timer.c
@@ -745,7 +745,7 @@ static void kvm_timer_vcpu_load_nested_switch(struct kvm_vcpu *vcpu,
                 WARN_ON_ONCE(ret);
  
                 /*
-                * The virtual offset behaviour is "interresting", as it
+                * The virtual offset behaviour is "interesting", as it
                  * always applies when HCR_EL2.E2H==0, but only when
                  * accessed from EL1 when HCR_EL2.E2H==1. So make sure we
                  * track E2H when putting the HV timer in "direct" mode.
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c

index a25265aca4324e490298794f5418aaa85b64480c..3dee5490eea94dd08e4ff88cb79f41d5d60be139 100644 (file)
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -190,6 +190,10 @@ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
         return VM_FAULT_SIGBUS;
  }
  
+void kvm_arch_create_vm_debugfs(struct kvm *kvm)
+{
+       kvm_sys_regs_create_debugfs(kvm);
+}
  
  /**
   * kvm_arch_destroy_vm - destroy the VM data structure
@@ -206,6 +210,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
                 pkvm_destroy_hyp_vm(kvm);
  
         kfree(kvm->arch.mpidr_data);
+       kfree(kvm->arch.sysreg_masks);
         kvm_destroy_vcpus(kvm);
  
         kvm_unshare_hyp(kvm, kvm + 1);
@@ -674,6 +679,12 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
                         return ret;
         }
  
+       /*
+        * This needs to happen after NV has imposed its own restrictions on
+        * the feature set
+        */
+       kvm_init_sysreg(vcpu);
+
         ret = kvm_timer_enable(vcpu);
         if (ret)
                 return ret;
@@ -2591,7 +2602,8 @@ static __init int kvm_arm_init(void)
         } else if (in_hyp_mode) {
                 kvm_info("VHE mode initialized successfully\n");
         } else {
-               kvm_info("Hyp mode initialized successfully\n");
+               char mode = cpus_have_final_cap(ARM64_KVM_HVHE) ? 'h' : 'n';
+               kvm_info("Hyp mode (%cVHE) initialized successfully\n", mode);
         }
  
         /*
diff --git a/arch/arm64/kvm/check-res-bits.h b/arch/arm64/kvm/check-res-bits.h

new file mode 100644 (file)

index 0000000..2d98e60
--- /dev/null
+++ b/arch/arm64/kvm/check-res-bits.h
@@ -0,0 +1,125 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2024 - Google LLC
+ * Author: Marc Zyngier <maz@kernel.org>
+ */
+
+#include <asm/sysreg-defs.h>
+
+/*
+ * WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
+ *
+ * If any of these BUILD_BUG_ON() fails, that's because some bits that
+ * were reserved have gained some other meaning, and KVM needs to know
+ * about those.
+ *
+ * In such case, do *NOT* blindly change the assertion so that it
+ * passes, but also teach the rest of the code about the actual
+ * change.
+ *
+ * WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
+ */
+static inline void check_res_bits(void)
+{
+#ifdef CONFIG_KVM_ARM64_RES_BITS_PARANOIA
+
+       BUILD_BUG_ON(OSDTRRX_EL1_RES0           != (GENMASK_ULL(63, 32)));
+       BUILD_BUG_ON(MDCCINT_EL1_RES0           != (GENMASK_ULL(63, 31) | GENMASK_ULL(28, 0)));
+       BUILD_BUG_ON(MDSCR_EL1_RES0             != (GENMASK_ULL(63, 36) | GENMASK_ULL(28, 28) | GENMASK_ULL(25, 24) | GENMASK_ULL(20, 20) | GENMASK_ULL(18, 16) | GENMASK_ULL(11, 7) | GENMASK_ULL(5, 1)));
+       BUILD_BUG_ON(OSDTRTX_EL1_RES0           != (GENMASK_ULL(63, 32)));
+       BUILD_BUG_ON(OSECCR_EL1_RES0            != (GENMASK_ULL(63, 32)));
+       BUILD_BUG_ON(OSLAR_EL1_RES0             != (GENMASK_ULL(63, 1)));
+       BUILD_BUG_ON(ID_PFR0_EL1_RES0           != (GENMASK_ULL(63, 32)));
+       BUILD_BUG_ON(ID_PFR1_EL1_RES0           != (GENMASK_ULL(63, 32)));
+       BUILD_BUG_ON(ID_DFR0_EL1_RES0           != (GENMASK_ULL(63, 32)));
+       BUILD_BUG_ON(ID_AFR0_EL1_RES0           != (GENMASK_ULL(63, 16)));
+       BUILD_BUG_ON(ID_MMFR0_EL1_RES0          != (GENMASK_ULL(63, 32)));
+       BUILD_BUG_ON(ID_MMFR1_EL1_RES0          != (GENMASK_ULL(63, 32)));
+       BUILD_BUG_ON(ID_MMFR2_EL1_RES0          != (GENMASK_ULL(63, 32)));
+       BUILD_BUG_ON(ID_MMFR3_EL1_RES0          != (GENMASK_ULL(63, 32)));
+       BUILD_BUG_ON(ID_ISAR0_EL1_RES0          != (GENMASK_ULL(63, 28)));
+       BUILD_BUG_ON(ID_ISAR1_EL1_RES0          != (GENMASK_ULL(63, 32)));
+       BUILD_BUG_ON(ID_ISAR2_EL1_RES0          != (GENMASK_ULL(63, 32)));
+       BUILD_BUG_ON(ID_ISAR3_EL1_RES0          != (GENMASK_ULL(63, 32)));
+       BUILD_BUG_ON(ID_ISAR4_EL1_RES0          != (GENMASK_ULL(63, 32)));
+       BUILD_BUG_ON(ID_ISAR5_EL1_RES0          != (GENMASK_ULL(63, 32) | GENMASK_ULL(23, 20)));
+       BUILD_BUG_ON(ID_ISAR6_EL1_RES0          != (GENMASK_ULL(63, 28)));
+       BUILD_BUG_ON(ID_MMFR4_EL1_RES0          != (GENMASK_ULL(63, 32)));
+       BUILD_BUG_ON(MVFR0_EL1_RES0             != (GENMASK_ULL(63, 32)));
+       BUILD_BUG_ON(MVFR1_EL1_RES0             != (GENMASK_ULL(63, 32)));
+       BUILD_BUG_ON(MVFR2_EL1_RES0             != (GENMASK_ULL(63, 8)));
+       BUILD_BUG_ON(ID_PFR2_EL1_RES0           != (GENMASK_ULL(63, 12)));
+       BUILD_BUG_ON(ID_DFR1_EL1_RES0           != (GENMASK_ULL(63, 8)));
+       BUILD_BUG_ON(ID_MMFR5_EL1_RES0          != (GENMASK_ULL(63, 8)));
+       BUILD_BUG_ON(ID_AA64PFR1_EL1_RES0       != (GENMASK_ULL(23, 20)));
+       BUILD_BUG_ON(ID_AA64PFR2_EL1_RES0       != (GENMASK_ULL(63, 36) | GENMASK_ULL(31, 12)));
+       BUILD_BUG_ON(ID_AA64ZFR0_EL1_RES0       != (GENMASK_ULL(63, 60) | GENMASK_ULL(51, 48) | GENMASK_ULL(39, 36) | GENMASK_ULL(31, 28) | GENMASK_ULL(15, 8)));
+       BUILD_BUG_ON(ID_AA64SMFR0_EL1_RES0      != (GENMASK_ULL(62, 61) | GENMASK_ULL(51, 49) | GENMASK_ULL(31, 31) | GENMASK_ULL(27, 0)));
+       BUILD_BUG_ON(ID_AA64FPFR0_EL1_RES0      != (GENMASK_ULL(63, 32) | GENMASK_ULL(27, 2)));
+       BUILD_BUG_ON(ID_AA64DFR0_EL1_RES0       != (GENMASK_ULL(27, 24) | GENMASK_ULL(19, 16)));
+       BUILD_BUG_ON(ID_AA64DFR1_EL1_RES0       != (GENMASK_ULL(63, 0)));
+       BUILD_BUG_ON(ID_AA64AFR0_EL1_RES0       != (GENMASK_ULL(63, 32)));
+       BUILD_BUG_ON(ID_AA64AFR1_EL1_RES0       != (GENMASK_ULL(63, 0)));
+       BUILD_BUG_ON(ID_AA64ISAR0_EL1_RES0      != (GENMASK_ULL(3, 0)));
+       BUILD_BUG_ON(ID_AA64ISAR2_EL1_RES0      != (GENMASK_ULL(47, 44)));
+       BUILD_BUG_ON(ID_AA64ISAR3_EL1_RES0      != (GENMASK_ULL(63, 16)));
+       BUILD_BUG_ON(ID_AA64MMFR0_EL1_RES0      != (GENMASK_ULL(55, 48)));
+       BUILD_BUG_ON(ID_AA64MMFR2_EL1_RES0      != (GENMASK_ULL(47, 44)));
+       BUILD_BUG_ON(ID_AA64MMFR3_EL1_RES0      != (GENMASK_ULL(51, 48)));
+       BUILD_BUG_ON(ID_AA64MMFR4_EL1_RES0      != (GENMASK_ULL(63, 40) | GENMASK_ULL(35, 28) | GENMASK_ULL(3, 0)));
+       BUILD_BUG_ON(SCTLR_EL1_RES0             != (GENMASK_ULL(17, 17)));
+       BUILD_BUG_ON(CPACR_ELx_RES0             != (GENMASK_ULL(63, 30) | GENMASK_ULL(27, 26) | GENMASK_ULL(23, 22) | GENMASK_ULL(19, 18) | GENMASK_ULL(15, 0)));
+       BUILD_BUG_ON(SMPRI_EL1_RES0             != (GENMASK_ULL(63, 4)));
+       BUILD_BUG_ON(ZCR_ELx_RES0               != (GENMASK_ULL(63, 9)));
+       BUILD_BUG_ON(SMCR_ELx_RES0              != (GENMASK_ULL(63, 32) | GENMASK_ULL(29, 9)));
+       BUILD_BUG_ON(GCSCR_ELx_RES0             != (GENMASK_ULL(63, 10) | GENMASK_ULL(7, 7) | GENMASK_ULL(4, 1)));
+       BUILD_BUG_ON(GCSPR_ELx_RES0             != (GENMASK_ULL(2, 0)));
+       BUILD_BUG_ON(GCSCRE0_EL1_RES0           != (GENMASK_ULL(63, 11) | GENMASK_ULL(7, 6) | GENMASK_ULL(4, 1)));
+       BUILD_BUG_ON(ALLINT_RES0                != (GENMASK_ULL(63, 14) | GENMASK_ULL(12, 0)));
+       BUILD_BUG_ON(PMSCR_EL1_RES0             != (GENMASK_ULL(63, 8) | GENMASK_ULL(2, 2)));
+       BUILD_BUG_ON(PMSICR_EL1_RES0            != (GENMASK_ULL(55, 32)));
+       BUILD_BUG_ON(PMSIRR_EL1_RES0            != (GENMASK_ULL(63, 32) | GENMASK_ULL(7, 1)));
+       BUILD_BUG_ON(PMSFCR_EL1_RES0            != (GENMASK_ULL(63, 19) | GENMASK_ULL(15, 4)));
+       BUILD_BUG_ON(PMSLATFR_EL1_RES0          != (GENMASK_ULL(63, 16)));
+       BUILD_BUG_ON(PMSIDR_EL1_RES0            != (GENMASK_ULL(63, 25) | GENMASK_ULL(7, 7)));
+       BUILD_BUG_ON(PMBLIMITR_EL1_RES0         != (GENMASK_ULL(11, 6) | GENMASK_ULL(4, 3)));
+       BUILD_BUG_ON(PMBSR_EL1_RES0             != (GENMASK_ULL(63, 32) | GENMASK_ULL(25, 20)));
+       BUILD_BUG_ON(PMBIDR_EL1_RES0            != (GENMASK_ULL(63, 12) | GENMASK_ULL(7, 6)));
+       BUILD_BUG_ON(CONTEXTIDR_ELx_RES0        != (GENMASK_ULL(63, 32)));
+       BUILD_BUG_ON(CCSIDR_EL1_RES0            != (GENMASK_ULL(63, 32)));
+       BUILD_BUG_ON(CLIDR_EL1_RES0             != (GENMASK_ULL(63, 47)));
+       BUILD_BUG_ON(CCSIDR2_EL1_RES0           != (GENMASK_ULL(63, 24)));
+       BUILD_BUG_ON(GMID_EL1_RES0              != (GENMASK_ULL(63, 4)));
+       BUILD_BUG_ON(SMIDR_EL1_RES0             != (GENMASK_ULL(63, 32) | GENMASK_ULL(14, 12)));
+       BUILD_BUG_ON(CSSELR_EL1_RES0            != (GENMASK_ULL(63, 5)));
+       BUILD_BUG_ON(CTR_EL0_RES0               != (GENMASK_ULL(63, 38) | GENMASK_ULL(30, 30) | GENMASK_ULL(13, 4)));
+       BUILD_BUG_ON(CTR_EL0_RES1               != (GENMASK_ULL(31, 31)));
+       BUILD_BUG_ON(DCZID_EL0_RES0             != (GENMASK_ULL(63, 5)));
+       BUILD_BUG_ON(SVCR_RES0                  != (GENMASK_ULL(63, 2)));
+       BUILD_BUG_ON(FPMR_RES0                  != (GENMASK_ULL(63, 38) | GENMASK_ULL(23, 23) | GENMASK_ULL(13, 9)));
+       BUILD_BUG_ON(HFGxTR_EL2_RES0            != (GENMASK_ULL(51, 51)));
+       BUILD_BUG_ON(HFGITR_EL2_RES0            != (GENMASK_ULL(63, 63) | GENMASK_ULL(61, 61)));
+       BUILD_BUG_ON(HDFGRTR_EL2_RES0           != (GENMASK_ULL(49, 49) | GENMASK_ULL(42, 42) | GENMASK_ULL(39, 38) | GENMASK_ULL(21, 20) | GENMASK_ULL(8, 8)));
+       BUILD_BUG_ON(HDFGWTR_EL2_RES0           != (GENMASK_ULL(63, 63) | GENMASK_ULL(59, 58) | GENMASK_ULL(51, 51) | GENMASK_ULL(47, 47) | GENMASK_ULL(43, 43) | GENMASK_ULL(40, 38) | GENMASK_ULL(34, 34) | GENMASK_ULL(30, 30) | GENMASK_ULL(22, 22) | GENMASK_ULL(9, 9) | GENMASK_ULL(6, 6)));
+       BUILD_BUG_ON(HAFGRTR_EL2_RES0           != (GENMASK_ULL(63, 50) | GENMASK_ULL(16, 5)));
+       BUILD_BUG_ON(HCRX_EL2_RES0              != (GENMASK_ULL(63, 25) | GENMASK_ULL(13, 12)));
+       BUILD_BUG_ON(DACR32_EL2_RES0            != (GENMASK_ULL(63, 32)));
+       BUILD_BUG_ON(PMSCR_EL2_RES0             != (GENMASK_ULL(63, 8) | GENMASK_ULL(2, 2)));
+       BUILD_BUG_ON(TCR2_EL1x_RES0             != (GENMASK_ULL(63, 16) | GENMASK_ULL(13, 12) | GENMASK_ULL(9, 6)));
+       BUILD_BUG_ON(TCR2_EL2_RES0              != (GENMASK_ULL(63, 16)));
+       BUILD_BUG_ON(LORSA_EL1_RES0             != (GENMASK_ULL(63, 52) | GENMASK_ULL(15, 1)));
+       BUILD_BUG_ON(LOREA_EL1_RES0             != (GENMASK_ULL(63, 52) | GENMASK_ULL(15, 0)));
+       BUILD_BUG_ON(LORN_EL1_RES0              != (GENMASK_ULL(63, 8)));
+       BUILD_BUG_ON(LORC_EL1_RES0              != (GENMASK_ULL(63, 10) | GENMASK_ULL(1, 1)));
+       BUILD_BUG_ON(LORID_EL1_RES0             != (GENMASK_ULL(63, 24) | GENMASK_ULL(15, 8)));
+       BUILD_BUG_ON(ISR_EL1_RES0               != (GENMASK_ULL(63, 11) | GENMASK_ULL(5, 0)));
+       BUILD_BUG_ON(ICC_NMIAR1_EL1_RES0        != (GENMASK_ULL(63, 24)));
+       BUILD_BUG_ON(TRBLIMITR_EL1_RES0         != (GENMASK_ULL(11, 7)));
+       BUILD_BUG_ON(TRBBASER_EL1_RES0          != (GENMASK_ULL(11, 0)));
+       BUILD_BUG_ON(TRBSR_EL1_RES0             != (GENMASK_ULL(63, 56) | GENMASK_ULL(25, 24) | GENMASK_ULL(19, 19) | GENMASK_ULL(16, 16)));
+       BUILD_BUG_ON(TRBMAR_EL1_RES0            != (GENMASK_ULL(63, 12)));
+       BUILD_BUG_ON(TRBTRG_EL1_RES0            != (GENMASK_ULL(63, 32)));
+       BUILD_BUG_ON(TRBIDR_EL1_RES0            != (GENMASK_ULL(63, 12) | GENMASK_ULL(7, 6)));
+
+#endif
+}
diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c

index 8725291cb00a185780ed1d795ff4ef0dcef31d36..ce8886122ed30c042df6b0d3786889b9f36bd1d4 100644 (file)
--- a/arch/arm64/kvm/debug.c
+++ b/arch/arm64/kvm/debug.c
@@ -23,7 +23,7 @@
  
  static DEFINE_PER_CPU(u64, mdcr_el2);
  
-/**
+/*
   * save/restore_guest_debug_regs
   *
   * For some debug operations we need to tweak some guest registers. As
@@ -143,6 +143,7 @@ void kvm_arm_vcpu_init_debug(struct kvm_vcpu *vcpu)
  
  /**
   * kvm_arm_reset_debug_ptr - reset the debug ptr to point to the vcpu state
+ * @vcpu:      the vcpu pointer
   */
  
  void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu)
diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c

index 431fd429932dfaa557ac0977fbbc3baa58eef99d..4697ba41b3a9c7723b862b35d43019f97d0945d2 100644 (file)
--- a/arch/arm64/kvm/emulate-nested.c
+++ b/arch/arm64/kvm/emulate-nested.c
@@ -427,12 +427,14 @@ static const complex_condition_check ccc[] = {
   * [19:14]     bit number in the FGT register (6 bits)
   * [20]                trap polarity (1 bit)
   * [25:21]     FG filter (5 bits)
- * [62:26]     Unused (37 bits)
+ * [35:26]     Main SysReg table index (10 bits)
+ * [62:36]     Unused (27 bits)
   * [63]                RES0 - Must be zero, as lost on insertion in the xarray
   */
  #define TC_CGT_BITS    10
  #define TC_FGT_BITS    4
  #define TC_FGF_BITS    5
+#define TC_SRI_BITS    10
  
  union trap_config {
         u64     val;
@@ -442,7 +444,8 @@ union trap_config {
                 unsigned long   bit:6;           /* Bit number */
                 unsigned long   pol:1;           /* Polarity */
                 unsigned long   fgf:TC_FGF_BITS; /* Fine Grained Filter */
-               unsigned long   unused:37;       /* Unused, should be zero */
+               unsigned long   sri:TC_SRI_BITS; /* SysReg Index */
+               unsigned long   unused:27;       /* Unused, should be zero */
                 unsigned long   mbz:1;           /* Must Be Zero */
         };
  };
@@ -1006,18 +1009,6 @@ static const struct encoding_to_trap_config encoding_to_cgt[] __initconst = {
  
  static DEFINE_XARRAY(sr_forward_xa);
  
-enum fgt_group_id {
-       __NO_FGT_GROUP__,
-       HFGxTR_GROUP,
-       HDFGRTR_GROUP,
-       HDFGWTR_GROUP,
-       HFGITR_GROUP,
-       HAFGRTR_GROUP,
-
-       /* Must be last */
-       __NR_FGT_GROUP_IDS__
-};
-
  enum fg_filter_id {
         __NO_FGF__,
         HCRX_FGTnXS,
@@ -1757,6 +1748,28 @@ static __init void print_nv_trap_error(const struct encoding_to_trap_config *tc,
                 err);
  }
  
+static u32 encoding_next(u32 encoding)
+{
+       u8 op0, op1, crn, crm, op2;
+
+       op0 = sys_reg_Op0(encoding);
+       op1 = sys_reg_Op1(encoding);
+       crn = sys_reg_CRn(encoding);
+       crm = sys_reg_CRm(encoding);
+       op2 = sys_reg_Op2(encoding);
+
+       if (op2 < Op2_mask)
+               return sys_reg(op0, op1, crn, crm, op2 + 1);
+       if (crm < CRm_mask)
+               return sys_reg(op0, op1, crn, crm + 1, 0);
+       if (crn < CRn_mask)
+               return sys_reg(op0, op1, crn + 1, 0, 0);
+       if (op1 < Op1_mask)
+               return sys_reg(op0, op1 + 1, 0, 0, 0);
+
+       return sys_reg(op0 + 1, 0, 0, 0, 0);
+}
+
  int __init populate_nv_trap_config(void)
  {
         int ret = 0;
@@ -1775,23 +1788,18 @@ int __init populate_nv_trap_config(void)
                         ret = -EINVAL;
                 }
  
-               if (cgt->encoding != cgt->end) {
-                       prev = xa_store_range(&sr_forward_xa,
-                                             cgt->encoding, cgt->end,
-                                             xa_mk_value(cgt->tc.val),
-                                             GFP_KERNEL);
-               } else {
-                       prev = xa_store(&sr_forward_xa, cgt->encoding,
+               for (u32 enc = cgt->encoding; enc <= cgt->end; enc = encoding_next(enc)) {
+                       prev = xa_store(&sr_forward_xa, enc,
                                         xa_mk_value(cgt->tc.val), GFP_KERNEL);
                         if (prev && !xa_is_err(prev)) {
                                 ret = -EINVAL;
                                 print_nv_trap_error(cgt, "Duplicate CGT", ret);
                         }
-               }
  
-               if (xa_is_err(prev)) {
-                       ret = xa_err(prev);
-                       print_nv_trap_error(cgt, "Failed CGT insertion", ret);
+                       if (xa_is_err(prev)) {
+                               ret = xa_err(prev);
+                               print_nv_trap_error(cgt, "Failed CGT insertion", ret);
+                       }
                 }
         }
  
@@ -1804,6 +1812,7 @@ int __init populate_nv_trap_config(void)
         for (int i = 0; i < ARRAY_SIZE(encoding_to_fgt); i++) {
                 const struct encoding_to_trap_config *fgt = &encoding_to_fgt[i];
                 union trap_config tc;
+               void *prev;
  
                 if (fgt->tc.fgt >= __NR_FGT_GROUP_IDS__) {
                         ret = -EINVAL;
@@ -1818,8 +1827,13 @@ int __init populate_nv_trap_config(void)
                 }
  
                 tc.val |= fgt->tc.val;
-               xa_store(&sr_forward_xa, fgt->encoding,
-                        xa_mk_value(tc.val), GFP_KERNEL);
+               prev = xa_store(&sr_forward_xa, fgt->encoding,
+                               xa_mk_value(tc.val), GFP_KERNEL);
+
+               if (xa_is_err(prev)) {
+                       ret = xa_err(prev);
+                       print_nv_trap_error(fgt, "Failed FGT insertion", ret);
+               }
         }
  
         kvm_info("nv: %ld fine grained trap handlers\n",
@@ -1845,6 +1859,38 @@ check_mcb:
         return ret;
  }
  
+int __init populate_sysreg_config(const struct sys_reg_desc *sr,
+                                 unsigned int idx)
+{
+       union trap_config tc;
+       u32 encoding;
+       void *ret;
+
+       /*
+        * 0 is a valid value for the index, but not for the storage.
+        * We'll store (idx+1), so check against an offset'd limit.
+        */
+       if (idx >= (BIT(TC_SRI_BITS) - 1)) {
+               kvm_err("sysreg %s (%d) out of range\n", sr->name, idx);
+               return -EINVAL;
+       }
+
+       encoding = sys_reg(sr->Op0, sr->Op1, sr->CRn, sr->CRm, sr->Op2);
+       tc = get_trap_config(encoding);
+
+       if (tc.sri) {
+               kvm_err("sysreg %s (%d) duplicate entry (%d)\n",
+                       sr->name, idx - 1, tc.sri);
+               return -EINVAL;
+       }
+
+       tc.sri = idx + 1;
+       ret = xa_store(&sr_forward_xa, encoding,
+                      xa_mk_value(tc.val), GFP_KERNEL);
+
+       return xa_err(ret);
+}
+
  static enum trap_behaviour get_behaviour(struct kvm_vcpu *vcpu,
                                          const struct trap_bits *tb)
  {
@@ -1892,20 +1938,64 @@ static enum trap_behaviour compute_trap_behaviour(struct kvm_vcpu *vcpu,
         return __compute_trap_behaviour(vcpu, tc.cgt, b);
  }
  
-static bool check_fgt_bit(u64 val, const union trap_config tc)
+static u64 kvm_get_sysreg_res0(struct kvm *kvm, enum vcpu_sysreg sr)
  {
-       return ((val >> tc.bit) & 1) == tc.pol;
+       struct kvm_sysreg_masks *masks;
+
+       /* Only handle the VNCR-backed regs for now */
+       if (sr < __VNCR_START__)
+               return 0;
+
+       masks = kvm->arch.sysreg_masks;
+
+       return masks->mask[sr - __VNCR_START__].res0;
  }
  
-#define sanitised_sys_reg(vcpu, reg)                   \
-       ({                                              \
-               u64 __val;                              \
-               __val = __vcpu_sys_reg(vcpu, reg);      \
-               __val &= ~__ ## reg ## _RES0;           \
-               (__val);                                \
-       })
+static bool check_fgt_bit(struct kvm *kvm, bool is_read,
+                         u64 val, const union trap_config tc)
+{
+       enum vcpu_sysreg sr;
+
+       if (tc.pol)
+               return (val & BIT(tc.bit));
+
+       /*
+        * FGTs with negative polarities are an absolute nightmare, as
+        * we need to evaluate the bit in the light of the feature
+        * that defines it. WTF were they thinking?
+        *
+        * So let's check if the bit has been earmarked as RES0, as
+        * this indicates an unimplemented feature.
+        */
+       if (val & BIT(tc.bit))
+               return false;
+
+       switch ((enum fgt_group_id)tc.fgt) {
+       case HFGxTR_GROUP:
+               sr = is_read ? HFGRTR_EL2 : HFGWTR_EL2;
+               break;
+
+       case HDFGRTR_GROUP:
+               sr = is_read ? HDFGRTR_EL2 : HDFGWTR_EL2;
+               break;
+
+       case HAFGRTR_GROUP:
+               sr = HAFGRTR_EL2;
+               break;
+
+       case HFGITR_GROUP:
+               sr = HFGITR_EL2;
+               break;
+
+       default:
+               WARN_ONCE(1, "Unhandled FGT group");
+               return false;
+       }
+
+       return !(kvm_get_sysreg_res0(kvm, sr) & BIT(tc.bit));
+}
  
-bool __check_nv_sr_forward(struct kvm_vcpu *vcpu)
+bool triage_sysreg_trap(struct kvm_vcpu *vcpu, int *sr_index)
  {
         union trap_config tc;
         enum trap_behaviour b;
@@ -1913,9 +2003,6 @@ bool __check_nv_sr_forward(struct kvm_vcpu *vcpu)
         u32 sysreg;
         u64 esr, val;
  
-       if (!vcpu_has_nv(vcpu) || is_hyp_ctxt(vcpu))
-               return false;
-
         esr = kvm_vcpu_get_esr(vcpu);
         sysreg = esr_sys64_to_sysreg(esr);
         is_read = (esr & ESR_ELx_SYS64_ISS_DIR_MASK) == ESR_ELx_SYS64_ISS_DIR_READ;
@@ -1926,13 +2013,27 @@ bool __check_nv_sr_forward(struct kvm_vcpu *vcpu)
          * A value of 0 for the whole entry means that we know nothing
          * for this sysreg, and that it cannot be re-injected into the
          * nested hypervisor. In this situation, let's cut it short.
-        *
-        * Note that ultimately, we could also make use of the xarray
-        * to store the index of the sysreg in the local descriptor
-        * array, avoiding another search... Hint, hint...
          */
         if (!tc.val)
-               return false;
+               goto local;
+
+       /*
+        * If a sysreg can be trapped using a FGT, first check whether we
+        * trap for the purpose of forbidding the feature. In that case,
+        * inject an UNDEF.
+        */
+       if (tc.fgt != __NO_FGT_GROUP__ &&
+           (vcpu->kvm->arch.fgu[tc.fgt] & BIT(tc.bit))) {
+               kvm_inject_undefined(vcpu);
+               return true;
+       }
+
+       /*
+        * If we're not nesting, immediately return to the caller, with the
+        * sysreg index, should we have it.
+        */
+       if (!vcpu_has_nv(vcpu) || is_hyp_ctxt(vcpu))
+               goto local;
  
         switch ((enum fgt_group_id)tc.fgt) {
         case __NO_FGT_GROUP__:
@@ -1940,25 +2041,24 @@ bool __check_nv_sr_forward(struct kvm_vcpu *vcpu)
  
         case HFGxTR_GROUP:
                 if (is_read)
-                       val = sanitised_sys_reg(vcpu, HFGRTR_EL2);
+                       val = __vcpu_sys_reg(vcpu, HFGRTR_EL2);
                 else
-                       val = sanitised_sys_reg(vcpu, HFGWTR_EL2);
+                       val = __vcpu_sys_reg(vcpu, HFGWTR_EL2);
                 break;
  
         case HDFGRTR_GROUP:
-       case HDFGWTR_GROUP:
                 if (is_read)
-                       val = sanitised_sys_reg(vcpu, HDFGRTR_EL2);
+                       val = __vcpu_sys_reg(vcpu, HDFGRTR_EL2);
                 else
-                       val = sanitised_sys_reg(vcpu, HDFGWTR_EL2);
+                       val = __vcpu_sys_reg(vcpu, HDFGWTR_EL2);
                 break;
  
         case HAFGRTR_GROUP:
-               val = sanitised_sys_reg(vcpu, HAFGRTR_EL2);
+               val = __vcpu_sys_reg(vcpu, HAFGRTR_EL2);
                 break;
  
         case HFGITR_GROUP:
-               val = sanitised_sys_reg(vcpu, HFGITR_EL2);
+               val = __vcpu_sys_reg(vcpu, HFGITR_EL2);
                 switch (tc.fgf) {
                         u64 tmp;
  
@@ -1966,7 +2066,7 @@ bool __check_nv_sr_forward(struct kvm_vcpu *vcpu)
                         break;
  
                 case HCRX_FGTnXS:
-                       tmp = sanitised_sys_reg(vcpu, HCRX_EL2);
+                       tmp = __vcpu_sys_reg(vcpu, HCRX_EL2);
                         if (tmp & HCRX_EL2_FGTnXS)
                                 tc.fgt = __NO_FGT_GROUP__;
                 }
@@ -1975,10 +2075,11 @@ bool __check_nv_sr_forward(struct kvm_vcpu *vcpu)
         case __NR_FGT_GROUP_IDS__:
                 /* Something is really wrong, bail out */
                 WARN_ONCE(1, "__NR_FGT_GROUP_IDS__");
-               return false;
+               goto local;
         }
  
-       if (tc.fgt != __NO_FGT_GROUP__ && check_fgt_bit(val, tc))
+       if (tc.fgt != __NO_FGT_GROUP__ && check_fgt_bit(vcpu->kvm, is_read,
+                                                       val, tc))
                 goto inject;
  
         b = compute_trap_behaviour(vcpu, tc);
@@ -1987,6 +2088,26 @@ bool __check_nv_sr_forward(struct kvm_vcpu *vcpu)
             ((b & BEHAVE_FORWARD_WRITE) && !is_read))
                 goto inject;
  
+local:
+       if (!tc.sri) {
+               struct sys_reg_params params;
+
+               params = esr_sys64_to_params(esr);
+
+               /*
+                * Check for the IMPDEF range, as per DDI0487 J.a,
+                * D18.3.2 Reserved encodings for IMPLEMENTATION
+                * DEFINED registers.
+                */
+               if (!(params.Op0 == 3 && (params.CRn & 0b1011) == 0b1011))
+                       print_sys_reg_msg(&params,
+                                         "Unsupported guest access at: %lx\n",
+                                         *vcpu_pc(vcpu));
+               kvm_inject_undefined(vcpu);
+               return true;
+       }
+
+       *sr_index = tc.sri - 1;
         return false;
  
  inject:
diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c

index 8c1d0d4853df48abf4d089bbde153bcee8d0e6d0..571cf6eef1e14f68de1df7fc0b8ee54a92bd3767 100644 (file)
--- a/arch/arm64/kvm/fpsimd.c
+++ b/arch/arm64/kvm/fpsimd.c
@@ -117,7 +117,7 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu)
  }
  
  /*
- * Called just before entering the guest once we are no longer preemptable
+ * Called just before entering the guest once we are no longer preemptible
   * and interrupts are disabled. If we have managed to run anything using
   * FP while we were preemptible (such as off the back of an interrupt),
   * then neither the host nor the guest own the FP hardware (and it was the
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c

index aaf1d49397392b0b9d1b1284b65354db6f883dce..6e22e658795a1c9881cb52752c07bc4bce06b361 100644 (file)
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -711,6 +711,7 @@ static int copy_sve_reg_indices(const struct kvm_vcpu *vcpu,
  
  /**
   * kvm_arm_num_regs - how many registers do we present via KVM_GET_ONE_REG
+ * @vcpu: the vCPU pointer
   *
   * This is for all registers.
   */
@@ -729,6 +730,8 @@ unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu)
  
  /**
   * kvm_arm_copy_reg_indices - get indices of all registers.
+ * @vcpu: the vCPU pointer
+ * @uindices: register list to copy
   *
   * We do core registers right here, then we append system regs.
   */
@@ -902,8 +905,8 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
  
  /**
   * kvm_arch_vcpu_ioctl_set_guest_debug - set up guest debugging
- * @kvm:       pointer to the KVM struct
- * @kvm_guest_debug: the ioctl data buffer
+ * @vcpu: the vCPU pointer
+ * @dbg: the ioctl data buffer
   *
   * This sets up and enables the VM for guest debugging. Userspace
   * passes in a control flag to enable different debug types and
diff --git a/arch/arm64/kvm/hyp/aarch32.c b/arch/arm64/kvm/hyp/aarch32.c

index f98cbe2626a1cb545da7a71ffd31fb11f0c6e8ae..8d9670e6615dc8767e3b1524e04fcd96ae6f63c8 100644 (file)
--- a/arch/arm64/kvm/hyp/aarch32.c
+++ b/arch/arm64/kvm/hyp/aarch32.c
@@ -84,7 +84,7 @@ bool kvm_condition_valid32(const struct kvm_vcpu *vcpu)
  }
  
  /**
- * adjust_itstate - adjust ITSTATE when emulating instructions in IT-block
+ * kvm_adjust_itstate - adjust ITSTATE when emulating instructions in IT-block
   * @vcpu:      The VCPU pointer
   *
   * When exceptions occur while instructions are executed in Thumb IF-THEN
@@ -120,7 +120,7 @@ static void kvm_adjust_itstate(struct kvm_vcpu *vcpu)
  }
  
  /**
- * kvm_skip_instr - skip a trapped instruction and proceed to the next
+ * kvm_skip_instr32 - skip a trapped instruction and proceed to the next
   * @vcpu: The vcpu pointer
   */
  void kvm_skip_instr32(struct kvm_vcpu *vcpu)
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h

index a038320cdb089074328a29419a5fa1c121a5052f..e3fcf8c4d5b4d4c847e0dd1522380463a6c92e44 100644 (file)
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -79,14 +79,48 @@ static inline void __activate_traps_fpsimd32(struct kvm_vcpu *vcpu)
                 clr |= ~hfg & __ ## reg ## _nMASK;                      \
         } while(0)
  
-#define update_fgt_traps_cs(vcpu, reg, clr, set)                       \
+#define reg_to_fgt_group_id(reg)                                       \
+       ({                                                              \
+               enum fgt_group_id id;                                   \
+               switch(reg) {                                           \
+               case HFGRTR_EL2:                                        \
+               case HFGWTR_EL2:                                        \
+                       id = HFGxTR_GROUP;                              \
+                       break;                                          \
+               case HFGITR_EL2:                                        \
+                       id = HFGITR_GROUP;                              \
+                       break;                                          \
+               case HDFGRTR_EL2:                                       \
+               case HDFGWTR_EL2:                                       \
+                       id = HDFGRTR_GROUP;                             \
+                       break;                                          \
+               case HAFGRTR_EL2:                                       \
+                       id = HAFGRTR_GROUP;                             \
+                       break;                                          \
+               default:                                                \
+                       BUILD_BUG_ON(1);                                \
+               }                                                       \
+                                                                       \
+               id;                                                     \
+       })
+
+#define compute_undef_clr_set(vcpu, kvm, reg, clr, set)                        \
+       do {                                                            \
+               u64 hfg = kvm->arch.fgu[reg_to_fgt_group_id(reg)];      \
+               set |= hfg & __ ## reg ## _MASK;                        \
+               clr |= hfg & __ ## reg ## _nMASK;                       \
+       } while(0)
+
+#define update_fgt_traps_cs(hctxt, vcpu, kvm, reg, clr, set)           \
         do {                                                            \
-               struct kvm_cpu_context *hctxt =                         \
-                       &this_cpu_ptr(&kvm_host_data)->host_ctxt;       \
                 u64 c = 0, s = 0;                                       \
                                                                         \
                 ctxt_sys_reg(hctxt, reg) = read_sysreg_s(SYS_ ## reg);  \
-               compute_clr_set(vcpu, reg, c, s);                       \
+               if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu))            \
+                       compute_clr_set(vcpu, reg, c, s);               \
+                                                                       \
+               compute_undef_clr_set(vcpu, kvm, reg, c, s);            \
+                                                                       \
                 s |= set;                                               \
                 c |= clr;                                               \
                 if (c || s) {                                           \
@@ -97,8 +131,8 @@ static inline void __activate_traps_fpsimd32(struct kvm_vcpu *vcpu)
                 }                                                       \
         } while(0)
  
-#define update_fgt_traps(vcpu, reg)            \
-       update_fgt_traps_cs(vcpu, reg, 0, 0)
+#define update_fgt_traps(hctxt, vcpu, kvm, reg)                \
+       update_fgt_traps_cs(hctxt, vcpu, kvm, reg, 0, 0)
  
  /*
   * Validate the fine grain trap masks.
@@ -122,8 +156,7 @@ static inline bool cpu_has_amu(void)
  static inline void __activate_traps_hfgxtr(struct kvm_vcpu *vcpu)
  {
         struct kvm_cpu_context *hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
-       u64 r_clr = 0, w_clr = 0, r_set = 0, w_set = 0, tmp;
-       u64 r_val, w_val;
+       struct kvm *kvm = kern_hyp_va(vcpu->kvm);
  
         CHECK_FGT_MASKS(HFGRTR_EL2);
         CHECK_FGT_MASKS(HFGWTR_EL2);
@@ -136,72 +169,45 @@ static inline void __activate_traps_hfgxtr(struct kvm_vcpu *vcpu)
         if (!cpus_have_final_cap(ARM64_HAS_FGT))
                 return;
  
-       ctxt_sys_reg(hctxt, HFGRTR_EL2) = read_sysreg_s(SYS_HFGRTR_EL2);
-       ctxt_sys_reg(hctxt, HFGWTR_EL2) = read_sysreg_s(SYS_HFGWTR_EL2);
-
-       if (cpus_have_final_cap(ARM64_SME)) {
-               tmp = HFGxTR_EL2_nSMPRI_EL1_MASK | HFGxTR_EL2_nTPIDR2_EL0_MASK;
-
-               r_clr |= tmp;
-               w_clr |= tmp;
-       }
-
-       /*
-        * Trap guest writes to TCR_EL1 to prevent it from enabling HA or HD.
-        */
-       if (cpus_have_final_cap(ARM64_WORKAROUND_AMPERE_AC03_CPU_38))
-               w_set |= HFGxTR_EL2_TCR_EL1_MASK;
-
-       if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) {
-               compute_clr_set(vcpu, HFGRTR_EL2, r_clr, r_set);
-               compute_clr_set(vcpu, HFGWTR_EL2, w_clr, w_set);
-       }
-
-       /* The default to trap everything not handled or supported in KVM. */
-       tmp = HFGxTR_EL2_nAMAIR2_EL1 | HFGxTR_EL2_nMAIR2_EL1 | HFGxTR_EL2_nS2POR_EL1 |
-             HFGxTR_EL2_nPOR_EL1 | HFGxTR_EL2_nPOR_EL0 | HFGxTR_EL2_nACCDATA_EL1;
-
-       r_val = __HFGRTR_EL2_nMASK & ~tmp;
-       r_val |= r_set;
-       r_val &= ~r_clr;
-
-       w_val = __HFGWTR_EL2_nMASK & ~tmp;
-       w_val |= w_set;
-       w_val &= ~w_clr;
-
-       write_sysreg_s(r_val, SYS_HFGRTR_EL2);
-       write_sysreg_s(w_val, SYS_HFGWTR_EL2);
-
-       if (!vcpu_has_nv(vcpu) || is_hyp_ctxt(vcpu))
-               return;
-
-       update_fgt_traps(vcpu, HFGITR_EL2);
-       update_fgt_traps(vcpu, HDFGRTR_EL2);
-       update_fgt_traps(vcpu, HDFGWTR_EL2);
+       update_fgt_traps(hctxt, vcpu, kvm, HFGRTR_EL2);
+       update_fgt_traps_cs(hctxt, vcpu, kvm, HFGWTR_EL2, 0,
+                           cpus_have_final_cap(ARM64_WORKAROUND_AMPERE_AC03_CPU_38) ?
+                           HFGxTR_EL2_TCR_EL1_MASK : 0);
+       update_fgt_traps(hctxt, vcpu, kvm, HFGITR_EL2);
+       update_fgt_traps(hctxt, vcpu, kvm, HDFGRTR_EL2);
+       update_fgt_traps(hctxt, vcpu, kvm, HDFGWTR_EL2);
  
         if (cpu_has_amu())
-               update_fgt_traps(vcpu, HAFGRTR_EL2);
+               update_fgt_traps(hctxt, vcpu, kvm, HAFGRTR_EL2);
  }
  
+#define __deactivate_fgt(htcxt, vcpu, kvm, reg)                                \
+       do {                                                            \
+               if ((vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) ||        \
+                   kvm->arch.fgu[reg_to_fgt_group_id(reg)])            \
+                       write_sysreg_s(ctxt_sys_reg(hctxt, reg),        \
+                                      SYS_ ## reg);                    \
+       } while(0)
+
  static inline void __deactivate_traps_hfgxtr(struct kvm_vcpu *vcpu)
  {
         struct kvm_cpu_context *hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+       struct kvm *kvm = kern_hyp_va(vcpu->kvm);
  
         if (!cpus_have_final_cap(ARM64_HAS_FGT))
                 return;
  
-       write_sysreg_s(ctxt_sys_reg(hctxt, HFGRTR_EL2), SYS_HFGRTR_EL2);
-       write_sysreg_s(ctxt_sys_reg(hctxt, HFGWTR_EL2), SYS_HFGWTR_EL2);
-
-       if (!vcpu_has_nv(vcpu) || is_hyp_ctxt(vcpu))
-               return;
-
-       write_sysreg_s(ctxt_sys_reg(hctxt, HFGITR_EL2), SYS_HFGITR_EL2);
-       write_sysreg_s(ctxt_sys_reg(hctxt, HDFGRTR_EL2), SYS_HDFGRTR_EL2);
-       write_sysreg_s(ctxt_sys_reg(hctxt, HDFGWTR_EL2), SYS_HDFGWTR_EL2);
+       __deactivate_fgt(hctxt, vcpu, kvm, HFGRTR_EL2);
+       if (cpus_have_final_cap(ARM64_WORKAROUND_AMPERE_AC03_CPU_38))
+               write_sysreg_s(ctxt_sys_reg(hctxt, HFGWTR_EL2), SYS_HFGWTR_EL2);
+       else
+               __deactivate_fgt(hctxt, vcpu, kvm, HFGWTR_EL2);
+       __deactivate_fgt(hctxt, vcpu, kvm, HFGITR_EL2);
+       __deactivate_fgt(hctxt, vcpu, kvm, HDFGRTR_EL2);
+       __deactivate_fgt(hctxt, vcpu, kvm, HDFGWTR_EL2);
  
         if (cpu_has_amu())
-               write_sysreg_s(ctxt_sys_reg(hctxt, HAFGRTR_EL2), SYS_HAFGRTR_EL2);
+               __deactivate_fgt(hctxt, vcpu, kvm, HAFGRTR_EL2);
  }
  
  static inline void __activate_traps_common(struct kvm_vcpu *vcpu)
@@ -230,7 +236,7 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu)
         write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
  
         if (cpus_have_final_cap(ARM64_HAS_HCX)) {
-               u64 hcrx = HCRX_GUEST_FLAGS;
+               u64 hcrx = vcpu->arch.hcrx_el2;
                 if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) {
                         u64 clr = 0, set = 0;
  
diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h

index bb6b571ec627dede466c5fa1d05785a9c9f78764..4be6a7fa007082ac008c83422e9bb69b0f5da324 100644 (file)
--- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
+++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
@@ -27,16 +27,34 @@ static inline void __sysreg_save_user_state(struct kvm_cpu_context *ctxt)
         ctxt_sys_reg(ctxt, TPIDRRO_EL0) = read_sysreg(tpidrro_el0);
  }
  
-static inline bool ctxt_has_mte(struct kvm_cpu_context *ctxt)
+static inline struct kvm_vcpu *ctxt_to_vcpu(struct kvm_cpu_context *ctxt)
  {
         struct kvm_vcpu *vcpu = ctxt->__hyp_running_vcpu;
  
         if (!vcpu)
                 vcpu = container_of(ctxt, struct kvm_vcpu, arch.ctxt);
  
+       return vcpu;
+}
+
+static inline bool ctxt_has_mte(struct kvm_cpu_context *ctxt)
+{
+       struct kvm_vcpu *vcpu = ctxt_to_vcpu(ctxt);
+
         return kvm_has_mte(kern_hyp_va(vcpu->kvm));
  }
  
+static inline bool ctxt_has_s1pie(struct kvm_cpu_context *ctxt)
+{
+       struct kvm_vcpu *vcpu;
+
+       if (!cpus_have_final_cap(ARM64_HAS_S1PIE))
+               return false;
+
+       vcpu = ctxt_to_vcpu(ctxt);
+       return kvm_has_feat(kern_hyp_va(vcpu->kvm), ID_AA64MMFR3_EL1, S1PIE, IMP);
+}
+
  static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
  {
         ctxt_sys_reg(ctxt, SCTLR_EL1)   = read_sysreg_el1(SYS_SCTLR);
@@ -55,7 +73,7 @@ static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
         ctxt_sys_reg(ctxt, CONTEXTIDR_EL1) = read_sysreg_el1(SYS_CONTEXTIDR);
         ctxt_sys_reg(ctxt, AMAIR_EL1)   = read_sysreg_el1(SYS_AMAIR);
         ctxt_sys_reg(ctxt, CNTKCTL_EL1) = read_sysreg_el1(SYS_CNTKCTL);
-       if (cpus_have_final_cap(ARM64_HAS_S1PIE)) {
+       if (ctxt_has_s1pie(ctxt)) {
                 ctxt_sys_reg(ctxt, PIR_EL1)     = read_sysreg_el1(SYS_PIR);
                 ctxt_sys_reg(ctxt, PIRE0_EL1)   = read_sysreg_el1(SYS_PIRE0);
         }
@@ -131,7 +149,7 @@ static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
         write_sysreg_el1(ctxt_sys_reg(ctxt, CONTEXTIDR_EL1), SYS_CONTEXTIDR);
         write_sysreg_el1(ctxt_sys_reg(ctxt, AMAIR_EL1), SYS_AMAIR);
         write_sysreg_el1(ctxt_sys_reg(ctxt, CNTKCTL_EL1), SYS_CNTKCTL);
-       if (cpus_have_final_cap(ARM64_HAS_S1PIE)) {
+       if (ctxt_has_s1pie(ctxt)) {
                 write_sysreg_el1(ctxt_sys_reg(ctxt, PIR_EL1),   SYS_PIR);
                 write_sysreg_el1(ctxt_sys_reg(ctxt, PIRE0_EL1), SYS_PIRE0);
         }
diff --git a/arch/arm64/kvm/hyp/nvhe/debug-sr.c b/arch/arm64/kvm/hyp/nvhe/debug-sr.c

index 4558c02eb352d06be1c5861ea22a956c2c05954f..7746ea507b6f00ea0214310a5f52c840e3435b11 100644 (file)
--- a/arch/arm64/kvm/hyp/nvhe/debug-sr.c
+++ b/arch/arm64/kvm/hyp/nvhe/debug-sr.c
@@ -31,8 +31,8 @@ static void __debug_save_spe(u64 *pmscr_el1)
                 return;
  
         /* Yes; save the control register and disable data generation */
-       *pmscr_el1 = read_sysreg_s(SYS_PMSCR_EL1);
-       write_sysreg_s(0, SYS_PMSCR_EL1);
+       *pmscr_el1 = read_sysreg_el1(SYS_PMSCR);
+       write_sysreg_el1(0, SYS_PMSCR);
         isb();
  
         /* Now drain all buffered data to memory */
@@ -48,7 +48,7 @@ static void __debug_restore_spe(u64 pmscr_el1)
         isb();
  
         /* Re-enable data generation */
-       write_sysreg_s(pmscr_el1, SYS_PMSCR_EL1);
+       write_sysreg_el1(pmscr_el1, SYS_PMSCR);
  }
  
  static void __debug_save_trace(u64 *trfcr_el1)
@@ -63,8 +63,8 @@ static void __debug_save_trace(u64 *trfcr_el1)
          * Since access to TRFCR_EL1 is trapped, the guest can't
          * modify the filtering set by the host.
          */
-       *trfcr_el1 = read_sysreg_s(SYS_TRFCR_EL1);
-       write_sysreg_s(0, SYS_TRFCR_EL1);
+       *trfcr_el1 = read_sysreg_el1(SYS_TRFCR);
+       write_sysreg_el1(0, SYS_TRFCR);
         isb();
         /* Drain the trace buffer to memory */
         tsb_csync();
@@ -76,7 +76,7 @@ static void __debug_restore_trace(u64 trfcr_el1)
                 return;
  
         /* Restore trace filter controls */
-       write_sysreg_s(trfcr_el1, SYS_TRFCR_EL1);
+       write_sysreg_el1(trfcr_el1, SYS_TRFCR);
  }
  
  void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu)
diff --git a/arch/arm64/kvm/hyp/nvhe/host.S b/arch/arm64/kvm/hyp/nvhe/host.S

index 7693a6757cd766b4fc166a6d1adf3f2b1a2ad1a3..135cfb294ee51f96c3578450f90daf8925b3a255 100644 (file)
--- a/arch/arm64/kvm/hyp/nvhe/host.S
+++ b/arch/arm64/kvm/hyp/nvhe/host.S
@@ -110,7 +110,7 @@ SYM_FUNC_END(__host_enter)
   *                               u64 elr, u64 par);
   */
  SYM_FUNC_START(__hyp_do_panic)
-       /* Prepare and exit to the host's panic funciton. */
+       /* Prepare and exit to the host's panic function. */
         mov     lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\
                       PSR_MODE_EL1h)
         msr     spsr_el2, lr
diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c

index b01a3d1078a8803f061496044066d749f9881f94..8850b591d775181a9f8545a21f05bb8fdc2b3a21 100644 (file)
--- a/arch/arm64/kvm/hyp/nvhe/mm.c
+++ b/arch/arm64/kvm/hyp/nvhe/mm.c
@@ -155,7 +155,7 @@ int hyp_back_vmemmap(phys_addr_t back)
                 start = hyp_memory[i].base;
                 start = ALIGN_DOWN((u64)hyp_phys_to_page(start), PAGE_SIZE);
                 /*
-                * The begining of the hyp_vmemmap region for the current
+                * The beginning of the hyp_vmemmap region for the current
                  * memblock may already be backed by the page backing the end
                  * the previous region, so avoid mapping it twice.
                  */
@@ -408,7 +408,7 @@ static void *admit_host_page(void *arg)
         return pop_hyp_memcache(host_mc, hyp_phys_to_virt);
  }
  
-/* Refill our local memcache by poping pages from the one provided by the host. */
+/* Refill our local memcache by popping pages from the one provided by the host. */
  int refill_memcache(struct kvm_hyp_memcache *mc, unsigned long min_pages,
                     struct kvm_hyp_memcache *host_mc)
  {
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c

index c651df904fe3eb940e07785aac1ac76079743666..3fae5830f8d2c72f4ed4032cfd99fd285cbcb885 100644 (file)
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -717,15 +717,29 @@ void kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
  static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot prot,
                                 kvm_pte_t *ptep)
  {
-       bool device = prot & KVM_PGTABLE_PROT_DEVICE;
-       kvm_pte_t attr = device ? KVM_S2_MEMATTR(pgt, DEVICE_nGnRE) :
-                           KVM_S2_MEMATTR(pgt, NORMAL);
+       kvm_pte_t attr;
         u32 sh = KVM_PTE_LEAF_ATTR_LO_S2_SH_IS;
  
+       switch (prot & (KVM_PGTABLE_PROT_DEVICE |
+                       KVM_PGTABLE_PROT_NORMAL_NC)) {
+       case KVM_PGTABLE_PROT_DEVICE | KVM_PGTABLE_PROT_NORMAL_NC:
+               return -EINVAL;
+       case KVM_PGTABLE_PROT_DEVICE:
+               if (prot & KVM_PGTABLE_PROT_X)
+                       return -EINVAL;
+               attr = KVM_S2_MEMATTR(pgt, DEVICE_nGnRE);
+               break;
+       case KVM_PGTABLE_PROT_NORMAL_NC:
+               if (prot & KVM_PGTABLE_PROT_X)
+                       return -EINVAL;
+               attr = KVM_S2_MEMATTR(pgt, NORMAL_NC);
+               break;
+       default:
+               attr = KVM_S2_MEMATTR(pgt, NORMAL);
+       }
+
         if (!(prot & KVM_PGTABLE_PROT_X))
                 attr |= KVM_PTE_LEAF_ATTR_HI_S2_XN;
-       else if (device)
-               return -EINVAL;
  
         if (prot & KVM_PGTABLE_PROT_R)
                 attr |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R;
@@ -1419,7 +1433,6 @@ kvm_pte_t *kvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt,
                                  level + 1);
         if (ret) {
                 kvm_pgtable_stage2_free_unlinked(mm_ops, pgtable, level);
-               mm_ops->put_page(pgtable);
                 return ERR_PTR(ret);
         }
  
@@ -1502,7 +1515,6 @@ static int stage2_split_walker(const struct kvm_pgtable_visit_ctx *ctx,
  
         if (!stage2_try_break_pte(ctx, mmu)) {
                 kvm_pgtable_stage2_free_unlinked(mm_ops, childp, level);
-               mm_ops->put_page(childp);
                 return -EAGAIN;
         }
  
diff --git a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c

index 8e1e0d5033b689243186e54b99f581a0fd50cd96..a8b9ea496706df291b479b12bf9e7ffed6aa204d 100644 (file)
--- a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
@@ -95,7 +95,7 @@ void __vcpu_load_switch_sysregs(struct kvm_vcpu *vcpu)
  }
  
  /**
- * __vcpu_put_switch_syregs - Restore host system registers to the physical CPU
+ * __vcpu_put_switch_sysregs - Restore host system registers to the physical CPU
   *
   * @vcpu: The VCPU pointer
   *
diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c

index 0bd93a5f21ce382506803d018ba7df7072ac0aa1..a640e839848e601f622c7eeacec7b3600608dbbd 100644 (file)
--- a/arch/arm64/kvm/inject_fault.c
+++ b/arch/arm64/kvm/inject_fault.c
@@ -134,7 +134,7 @@ static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt, u32 addr)
         if (vcpu_read_sys_reg(vcpu, TCR_EL1) & TTBCR_EAE) {
                 fsr = DFSR_LPAE | DFSR_FSC_EXTABT_LPAE;
         } else {
-               /* no need to shuffle FS[4] into DFSR[10] as its 0 */
+               /* no need to shuffle FS[4] into DFSR[10] as it's 0 */
                 fsr = DFSR_FSC_EXTABT_nLPAE;
         }
  
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c

index d14504821b794eb84dc7c44fec72097cee5ceceb..2902f3ffca3cf37d72013838bb77ea85b5420507 100644 (file)
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -305,7 +305,7 @@ static void invalidate_icache_guest_page(void *va, size_t size)
   * does.
   */
  /**
- * unmap_stage2_range -- Clear stage2 page table entries to unmap a range
+ * __unmap_stage2_range -- Clear stage2 page table entries to unmap a range
   * @mmu:   The KVM stage-2 MMU pointer
   * @start: The intermediate physical base address of the range to unmap
   * @size:  The size of the area to unmap
@@ -1381,7 +1381,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
         int ret = 0;
         bool write_fault, writable, force_pte = false;
         bool exec_fault, mte_allowed;
-       bool device = false;
+       bool device = false, vfio_allow_any_uc = false;
         unsigned long mmu_seq;
         struct kvm *kvm = vcpu->kvm;
         struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
@@ -1472,6 +1472,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
         gfn = fault_ipa >> PAGE_SHIFT;
         mte_allowed = kvm_vma_mte_allowed(vma);
  
+       vfio_allow_any_uc = vma->vm_flags & VM_ALLOW_ANY_UNCACHED;
+
         /* Don't use the VMA after the unlock -- it may have vanished */
         vma = NULL;
  
@@ -1557,10 +1559,14 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
         if (exec_fault)
                 prot |= KVM_PGTABLE_PROT_X;
  
-       if (device)
-               prot |= KVM_PGTABLE_PROT_DEVICE;
-       else if (cpus_have_final_cap(ARM64_HAS_CACHE_DIC))
+       if (device) {
+               if (vfio_allow_any_uc)
+                       prot |= KVM_PGTABLE_PROT_NORMAL_NC;
+               else
+                       prot |= KVM_PGTABLE_PROT_DEVICE;
+       } else if (cpus_have_final_cap(ARM64_HAS_CACHE_DIC)) {
                 prot |= KVM_PGTABLE_PROT_X;
+       }
  
         /*
          * Under the premise of getting a FSC_PERM fault, we just need to relax
diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c

index ba95d044bc98fd397d2b59e48aeb92d56a39c656..ced30c90521a02713a4e0e06c1d7b1430df55ea6 100644 (file)
--- a/arch/arm64/kvm/nested.c
+++ b/arch/arm64/kvm/nested.c
@@ -133,6 +133,13 @@ static u64 limit_nv_id_reg(u32 id, u64 val)
                 val |= FIELD_PREP(NV_FTR(MMFR2, TTL), 0b0001);
                 break;
  
+       case SYS_ID_AA64MMFR4_EL1:
+               val = 0;
+               if (!cpus_have_final_cap(ARM64_HAS_HCR_NV1))
+                       val |= FIELD_PREP(NV_FTR(MMFR4, E2H0),
+                                         ID_AA64MMFR4_EL1_E2H0_NI_NV1);
+               break;
+
         case SYS_ID_AA64DFR0_EL1:
                 /* Only limited support for PMU, Debug, BPs and WPs */
                 val &= (NV_FTR(DFR0, PMUVer)    |
@@ -156,15 +163,280 @@ static u64 limit_nv_id_reg(u32 id, u64 val)
  
         return val;
  }
+
+u64 kvm_vcpu_sanitise_vncr_reg(const struct kvm_vcpu *vcpu, enum vcpu_sysreg sr)
+{
+       u64 v = ctxt_sys_reg(&vcpu->arch.ctxt, sr);
+       struct kvm_sysreg_masks *masks;
+
+       masks = vcpu->kvm->arch.sysreg_masks;
+
+       if (masks) {
+               sr -= __VNCR_START__;
+
+               v &= ~masks->mask[sr].res0;
+               v |= masks->mask[sr].res1;
+       }
+
+       return v;
+}
+
+static void set_sysreg_masks(struct kvm *kvm, int sr, u64 res0, u64 res1)
+{
+       int i = sr - __VNCR_START__;
+
+       kvm->arch.sysreg_masks->mask[i].res0 = res0;
+       kvm->arch.sysreg_masks->mask[i].res1 = res1;
+}
+
  int kvm_init_nv_sysregs(struct kvm *kvm)
  {
+       u64 res0, res1;
+       int ret = 0;
+
         mutex_lock(&kvm->arch.config_lock);
  
+       if (kvm->arch.sysreg_masks)
+               goto out;
+
+       kvm->arch.sysreg_masks = kzalloc(sizeof(*(kvm->arch.sysreg_masks)),
+                                        GFP_KERNEL);
+       if (!kvm->arch.sysreg_masks) {
+               ret = -ENOMEM;
+               goto out;
+       }
+
         for (int i = 0; i < KVM_ARM_ID_REG_NUM; i++)
                 kvm->arch.id_regs[i] = limit_nv_id_reg(IDX_IDREG(i),
                                                        kvm->arch.id_regs[i]);
  
+       /* VTTBR_EL2 */
+       res0 = res1 = 0;
+       if (!kvm_has_feat_enum(kvm, ID_AA64MMFR1_EL1, VMIDBits, 16))
+               res0 |= GENMASK(63, 56);
+       if (!kvm_has_feat(kvm, ID_AA64MMFR2_EL1, CnP, IMP))
+               res0 |= VTTBR_CNP_BIT;
+       set_sysreg_masks(kvm, VTTBR_EL2, res0, res1);
+
+       /* VTCR_EL2 */
+       res0 = GENMASK(63, 32) | GENMASK(30, 20);
+       res1 = BIT(31);
+       set_sysreg_masks(kvm, VTCR_EL2, res0, res1);
+
+       /* VMPIDR_EL2 */
+       res0 = GENMASK(63, 40) | GENMASK(30, 24);
+       res1 = BIT(31);
+       set_sysreg_masks(kvm, VMPIDR_EL2, res0, res1);
+
+       /* HCR_EL2 */
+       res0 = BIT(48);
+       res1 = HCR_RW;
+       if (!kvm_has_feat(kvm, ID_AA64MMFR1_EL1, TWED, IMP))
+               res0 |= GENMASK(63, 59);
+       if (!kvm_has_feat(kvm, ID_AA64PFR1_EL1, MTE, MTE2))
+               res0 |= (HCR_TID5 | HCR_DCT | HCR_ATA);
+       if (!kvm_has_feat(kvm, ID_AA64MMFR2_EL1, EVT, TTLBxS))
+               res0 |= (HCR_TTLBIS | HCR_TTLBOS);
+       if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, CSV2, CSV2_2) &&
+           !kvm_has_feat(kvm, ID_AA64PFR1_EL1, CSV2_frac, CSV2_1p2))
+               res0 |= HCR_ENSCXT;
+       if (!kvm_has_feat(kvm, ID_AA64MMFR2_EL1, EVT, IMP))
+               res0 |= (HCR_TOCU | HCR_TICAB | HCR_TID4);
+       if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, AMU, V1P1))
+               res0 |= HCR_AMVOFFEN;
+       if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, RAS, V1P1))
+               res0 |= HCR_FIEN;
+       if (!kvm_has_feat(kvm, ID_AA64MMFR2_EL1, FWB, IMP))
+               res0 |= HCR_FWB;
+       if (!kvm_has_feat(kvm, ID_AA64MMFR2_EL1, NV, NV2))
+               res0 |= HCR_NV2;
+       if (!kvm_has_feat(kvm, ID_AA64MMFR2_EL1, NV, IMP))
+               res0 |= (HCR_AT | HCR_NV1 | HCR_NV);
+       if (!(__vcpu_has_feature(&kvm->arch, KVM_ARM_VCPU_PTRAUTH_ADDRESS) &&
+             __vcpu_has_feature(&kvm->arch, KVM_ARM_VCPU_PTRAUTH_GENERIC)))
+               res0 |= (HCR_API | HCR_APK);
+       if (!kvm_has_feat(kvm, ID_AA64ISAR0_EL1, TME, IMP))
+               res0 |= BIT(39);
+       if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, RAS, IMP))
+               res0 |= (HCR_TEA | HCR_TERR);
+       if (!kvm_has_feat(kvm, ID_AA64MMFR1_EL1, LO, IMP))
+               res0 |= HCR_TLOR;
+       if (!kvm_has_feat(kvm, ID_AA64MMFR4_EL1, E2H0, IMP))
+               res1 |= HCR_E2H;
+       set_sysreg_masks(kvm, HCR_EL2, res0, res1);
+
+       /* HCRX_EL2 */
+       res0 = HCRX_EL2_RES0;
+       res1 = HCRX_EL2_RES1;
+       if (!kvm_has_feat(kvm, ID_AA64ISAR3_EL1, PACM, TRIVIAL_IMP))
+               res0 |= HCRX_EL2_PACMEn;
+       if (!kvm_has_feat(kvm, ID_AA64PFR2_EL1, FPMR, IMP))
+               res0 |= HCRX_EL2_EnFPM;
+       if (!kvm_has_feat(kvm, ID_AA64PFR1_EL1, GCS, IMP))
+               res0 |= HCRX_EL2_GCSEn;
+       if (!kvm_has_feat(kvm, ID_AA64ISAR2_EL1, SYSREG_128, IMP))
+               res0 |= HCRX_EL2_EnIDCP128;
+       if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, ADERR, DEV_ASYNC))
+               res0 |= (HCRX_EL2_EnSDERR | HCRX_EL2_EnSNERR);
+       if (!kvm_has_feat(kvm, ID_AA64PFR1_EL1, DF2, IMP))
+               res0 |= HCRX_EL2_TMEA;
+       if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, D128, IMP))
+               res0 |= HCRX_EL2_D128En;
+       if (!kvm_has_feat(kvm, ID_AA64PFR1_EL1, THE, IMP))
+               res0 |= HCRX_EL2_PTTWI;
+       if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, SCTLRX, IMP))
+               res0 |= HCRX_EL2_SCTLR2En;
+       if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, TCRX, IMP))
+               res0 |= HCRX_EL2_TCR2En;
+       if (!kvm_has_feat(kvm, ID_AA64ISAR2_EL1, MOPS, IMP))
+               res0 |= (HCRX_EL2_MSCEn | HCRX_EL2_MCE2);
+       if (!kvm_has_feat(kvm, ID_AA64MMFR1_EL1, CMOW, IMP))
+               res0 |= HCRX_EL2_CMOW;
+       if (!kvm_has_feat(kvm, ID_AA64PFR1_EL1, NMI, IMP))
+               res0 |= (HCRX_EL2_VFNMI | HCRX_EL2_VINMI | HCRX_EL2_TALLINT);
+       if (!kvm_has_feat(kvm, ID_AA64PFR1_EL1, SME, IMP) ||
+           !(read_sysreg_s(SYS_SMIDR_EL1) & SMIDR_EL1_SMPS))
+               res0 |= HCRX_EL2_SMPME;
+       if (!kvm_has_feat(kvm, ID_AA64ISAR1_EL1, XS, IMP))
+               res0 |= (HCRX_EL2_FGTnXS | HCRX_EL2_FnXS);
+       if (!kvm_has_feat(kvm, ID_AA64ISAR1_EL1, LS64, LS64_V))
+               res0 |= HCRX_EL2_EnASR;
+       if (!kvm_has_feat(kvm, ID_AA64ISAR1_EL1, LS64, LS64))
+               res0 |= HCRX_EL2_EnALS;
+       if (!kvm_has_feat(kvm, ID_AA64ISAR1_EL1, LS64, LS64_ACCDATA))
+               res0 |= HCRX_EL2_EnAS0;
+       set_sysreg_masks(kvm, HCRX_EL2, res0, res1);
+
+       /* HFG[RW]TR_EL2 */
+       res0 = res1 = 0;
+       if (!(__vcpu_has_feature(&kvm->arch, KVM_ARM_VCPU_PTRAUTH_ADDRESS) &&
+             __vcpu_has_feature(&kvm->arch, KVM_ARM_VCPU_PTRAUTH_GENERIC)))
+               res0 |= (HFGxTR_EL2_APDAKey | HFGxTR_EL2_APDBKey |
+                        HFGxTR_EL2_APGAKey | HFGxTR_EL2_APIAKey |
+                        HFGxTR_EL2_APIBKey);
+       if (!kvm_has_feat(kvm, ID_AA64MMFR1_EL1, LO, IMP))
+               res0 |= (HFGxTR_EL2_LORC_EL1 | HFGxTR_EL2_LOREA_EL1 |
+                        HFGxTR_EL2_LORID_EL1 | HFGxTR_EL2_LORN_EL1 |
+                        HFGxTR_EL2_LORSA_EL1);
+       if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, CSV2, CSV2_2) &&
+           !kvm_has_feat(kvm, ID_AA64PFR1_EL1, CSV2_frac, CSV2_1p2))
+               res0 |= (HFGxTR_EL2_SCXTNUM_EL1 | HFGxTR_EL2_SCXTNUM_EL0);
+       if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, GIC, IMP))
+               res0 |= HFGxTR_EL2_ICC_IGRPENn_EL1;
+       if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, RAS, IMP))
+               res0 |= (HFGxTR_EL2_ERRIDR_EL1 | HFGxTR_EL2_ERRSELR_EL1 |
+                        HFGxTR_EL2_ERXFR_EL1 | HFGxTR_EL2_ERXCTLR_EL1 |
+                        HFGxTR_EL2_ERXSTATUS_EL1 | HFGxTR_EL2_ERXMISCn_EL1 |
+                        HFGxTR_EL2_ERXPFGF_EL1 | HFGxTR_EL2_ERXPFGCTL_EL1 |
+                        HFGxTR_EL2_ERXPFGCDN_EL1 | HFGxTR_EL2_ERXADDR_EL1);
+       if (!kvm_has_feat(kvm, ID_AA64ISAR1_EL1, LS64, LS64_ACCDATA))
+               res0 |= HFGxTR_EL2_nACCDATA_EL1;
+       if (!kvm_has_feat(kvm, ID_AA64PFR1_EL1, GCS, IMP))
+               res0 |= (HFGxTR_EL2_nGCS_EL0 | HFGxTR_EL2_nGCS_EL1);
+       if (!kvm_has_feat(kvm, ID_AA64PFR1_EL1, SME, IMP))
+               res0 |= (HFGxTR_EL2_nSMPRI_EL1 | HFGxTR_EL2_nTPIDR2_EL0);
+       if (!kvm_has_feat(kvm, ID_AA64PFR1_EL1, THE, IMP))
+               res0 |= HFGxTR_EL2_nRCWMASK_EL1;
+       if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, S1PIE, IMP))
+               res0 |= (HFGxTR_EL2_nPIRE0_EL1 | HFGxTR_EL2_nPIR_EL1);
+       if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, S1POE, IMP))
+               res0 |= (HFGxTR_EL2_nPOR_EL0 | HFGxTR_EL2_nPOR_EL1);
+       if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, S2POE, IMP))
+               res0 |= HFGxTR_EL2_nS2POR_EL1;
+       if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, AIE, IMP))
+               res0 |= (HFGxTR_EL2_nMAIR2_EL1 | HFGxTR_EL2_nAMAIR2_EL1);
+       set_sysreg_masks(kvm, HFGRTR_EL2, res0 | __HFGRTR_EL2_RES0, res1);
+       set_sysreg_masks(kvm, HFGWTR_EL2, res0 | __HFGWTR_EL2_RES0, res1);
+
+       /* HDFG[RW]TR_EL2 */
+       res0 = res1 = 0;
+       if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, DoubleLock, IMP))
+               res0 |= HDFGRTR_EL2_OSDLR_EL1;
+       if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, PMUVer, IMP))
+               res0 |= (HDFGRTR_EL2_PMEVCNTRn_EL0 | HDFGRTR_EL2_PMEVTYPERn_EL0 |
+                        HDFGRTR_EL2_PMCCFILTR_EL0 | HDFGRTR_EL2_PMCCNTR_EL0 |
+                        HDFGRTR_EL2_PMCNTEN | HDFGRTR_EL2_PMINTEN |
+                        HDFGRTR_EL2_PMOVS | HDFGRTR_EL2_PMSELR_EL0 |
+                        HDFGRTR_EL2_PMMIR_EL1 | HDFGRTR_EL2_PMUSERENR_EL0 |
+                        HDFGRTR_EL2_PMCEIDn_EL0);
+       if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, PMSVer, IMP))
+               res0 |= (HDFGRTR_EL2_PMBLIMITR_EL1 | HDFGRTR_EL2_PMBPTR_EL1 |
+                        HDFGRTR_EL2_PMBSR_EL1 | HDFGRTR_EL2_PMSCR_EL1 |
+                        HDFGRTR_EL2_PMSEVFR_EL1 | HDFGRTR_EL2_PMSFCR_EL1 |
+                        HDFGRTR_EL2_PMSICR_EL1 | HDFGRTR_EL2_PMSIDR_EL1 |
+                        HDFGRTR_EL2_PMSIRR_EL1 | HDFGRTR_EL2_PMSLATFR_EL1 |
+                        HDFGRTR_EL2_PMBIDR_EL1);
+       if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, TraceVer, IMP))
+               res0 |= (HDFGRTR_EL2_TRC | HDFGRTR_EL2_TRCAUTHSTATUS |
+                        HDFGRTR_EL2_TRCAUXCTLR | HDFGRTR_EL2_TRCCLAIM |
+                        HDFGRTR_EL2_TRCCNTVRn | HDFGRTR_EL2_TRCID |
+                        HDFGRTR_EL2_TRCIMSPECn | HDFGRTR_EL2_TRCOSLSR |
+                        HDFGRTR_EL2_TRCPRGCTLR | HDFGRTR_EL2_TRCSEQSTR |
+                        HDFGRTR_EL2_TRCSSCSRn | HDFGRTR_EL2_TRCSTATR |
+                        HDFGRTR_EL2_TRCVICTLR);
+       if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, TraceBuffer, IMP))
+               res0 |= (HDFGRTR_EL2_TRBBASER_EL1 | HDFGRTR_EL2_TRBIDR_EL1 |
+                        HDFGRTR_EL2_TRBLIMITR_EL1 | HDFGRTR_EL2_TRBMAR_EL1 |
+                        HDFGRTR_EL2_TRBPTR_EL1 | HDFGRTR_EL2_TRBSR_EL1 |
+                        HDFGRTR_EL2_TRBTRG_EL1);
+       if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, BRBE, IMP))
+               res0 |= (HDFGRTR_EL2_nBRBIDR | HDFGRTR_EL2_nBRBCTL |
+                        HDFGRTR_EL2_nBRBDATA);
+       if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, PMSVer, V1P2))
+               res0 |= HDFGRTR_EL2_nPMSNEVFR_EL1;
+       set_sysreg_masks(kvm, HDFGRTR_EL2, res0 | HDFGRTR_EL2_RES0, res1);
+
+       /* Reuse the bits from the read-side and add the write-specific stuff */
+       if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, PMUVer, IMP))
+               res0 |= (HDFGWTR_EL2_PMCR_EL0 | HDFGWTR_EL2_PMSWINC_EL0);
+       if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, TraceVer, IMP))
+               res0 |= HDFGWTR_EL2_TRCOSLAR;
+       if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, TraceFilt, IMP))
+               res0 |= HDFGWTR_EL2_TRFCR_EL1;
+       set_sysreg_masks(kvm, HFGWTR_EL2, res0 | HDFGWTR_EL2_RES0, res1);
+
+       /* HFGITR_EL2 */
+       res0 = HFGITR_EL2_RES0;
+       res1 = HFGITR_EL2_RES1;
+       if (!kvm_has_feat(kvm, ID_AA64ISAR1_EL1, DPB, DPB2))
+               res0 |= HFGITR_EL2_DCCVADP;
+       if (!kvm_has_feat(kvm, ID_AA64MMFR1_EL1, PAN, PAN2))
+               res0 |= (HFGITR_EL2_ATS1E1RP | HFGITR_EL2_ATS1E1WP);
+       if (!kvm_has_feat(kvm, ID_AA64ISAR0_EL1, TLB, OS))
+               res0 |= (HFGITR_EL2_TLBIRVAALE1OS | HFGITR_EL2_TLBIRVALE1OS |
+                        HFGITR_EL2_TLBIRVAAE1OS | HFGITR_EL2_TLBIRVAE1OS |
+                        HFGITR_EL2_TLBIVAALE1OS | HFGITR_EL2_TLBIVALE1OS |
+                        HFGITR_EL2_TLBIVAAE1OS | HFGITR_EL2_TLBIASIDE1OS |
+                        HFGITR_EL2_TLBIVAE1OS | HFGITR_EL2_TLBIVMALLE1OS);
+       if (!kvm_has_feat(kvm, ID_AA64ISAR0_EL1, TLB, RANGE))
+               res0 |= (HFGITR_EL2_TLBIRVAALE1 | HFGITR_EL2_TLBIRVALE1 |
+                        HFGITR_EL2_TLBIRVAAE1 | HFGITR_EL2_TLBIRVAE1 |
+                        HFGITR_EL2_TLBIRVAALE1IS | HFGITR_EL2_TLBIRVALE1IS |
+                        HFGITR_EL2_TLBIRVAAE1IS | HFGITR_EL2_TLBIRVAE1IS |
+                        HFGITR_EL2_TLBIRVAALE1OS | HFGITR_EL2_TLBIRVALE1OS |
+                        HFGITR_EL2_TLBIRVAAE1OS | HFGITR_EL2_TLBIRVAE1OS);
+       if (!kvm_has_feat(kvm, ID_AA64ISAR1_EL1, SPECRES, IMP))
+               res0 |= (HFGITR_EL2_CFPRCTX | HFGITR_EL2_DVPRCTX |
+                        HFGITR_EL2_CPPRCTX);
+       if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, BRBE, IMP))
+               res0 |= (HFGITR_EL2_nBRBINJ | HFGITR_EL2_nBRBIALL);
+       if (!kvm_has_feat(kvm, ID_AA64PFR1_EL1, GCS, IMP))
+               res0 |= (HFGITR_EL2_nGCSPUSHM_EL1 | HFGITR_EL2_nGCSSTR_EL1 |
+                        HFGITR_EL2_nGCSEPP);
+       if (!kvm_has_feat(kvm, ID_AA64ISAR1_EL1, SPECRES, COSP_RCTX))
+               res0 |= HFGITR_EL2_COSPRCTX;
+       if (!kvm_has_feat(kvm, ID_AA64ISAR2_EL1, ATS1A, IMP))
+               res0 |= HFGITR_EL2_ATS1E1A;
+       set_sysreg_masks(kvm, HFGITR_EL2, res0, res1);
+
+       /* HAFGRTR_EL2 - not a lot to see here */
+       res0 = HAFGRTR_EL2_RES0;
+       res1 = HAFGRTR_EL2_RES1;
+       if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, AMU, V1P1))
+               res0 |= ~(res0 | res1);
+       set_sysreg_masks(kvm, HAFGRTR_EL2, res0, res1);
+out:
         mutex_unlock(&kvm->arch.config_lock);
  
-       return 0;
+       return ret;
  }
diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c

index 8350fb8fee0b998ccf27dca4b7bf2e858846ccd3..b7be96a5359737d41576af46eee1f68852632846 100644 (file)
--- a/arch/arm64/kvm/pkvm.c
+++ b/arch/arm64/kvm/pkvm.c
@@ -101,6 +101,17 @@ void __init kvm_hyp_reserve(void)
                  hyp_mem_base);
  }
  
+static void __pkvm_destroy_hyp_vm(struct kvm *host_kvm)
+{
+       if (host_kvm->arch.pkvm.handle) {
+               WARN_ON(kvm_call_hyp_nvhe(__pkvm_teardown_vm,
+                                         host_kvm->arch.pkvm.handle));
+       }
+
+       host_kvm->arch.pkvm.handle = 0;
+       free_hyp_memcache(&host_kvm->arch.pkvm.teardown_mc);
+}
+
  /*
   * Allocates and donates memory for hypervisor VM structs at EL2.
   *
@@ -181,7 +192,7 @@ static int __pkvm_create_hyp_vm(struct kvm *host_kvm)
         return 0;
  
  destroy_vm:
-       pkvm_destroy_hyp_vm(host_kvm);
+       __pkvm_destroy_hyp_vm(host_kvm);
         return ret;
  free_vm:
         free_pages_exact(hyp_vm, hyp_vm_sz);
@@ -194,23 +205,19 @@ int pkvm_create_hyp_vm(struct kvm *host_kvm)
  {
         int ret = 0;
  
-       mutex_lock(&host_kvm->lock);
+       mutex_lock(&host_kvm->arch.config_lock);
         if (!host_kvm->arch.pkvm.handle)
                 ret = __pkvm_create_hyp_vm(host_kvm);
-       mutex_unlock(&host_kvm->lock);
+       mutex_unlock(&host_kvm->arch.config_lock);
  
         return ret;
  }
  
  void pkvm_destroy_hyp_vm(struct kvm *host_kvm)
  {
-       if (host_kvm->arch.pkvm.handle) {
-               WARN_ON(kvm_call_hyp_nvhe(__pkvm_teardown_vm,
-                                         host_kvm->arch.pkvm.handle));
-       }
-
-       host_kvm->arch.pkvm.handle = 0;
-       free_hyp_memcache(&host_kvm->arch.pkvm.teardown_mc);
+       mutex_lock(&host_kvm->arch.config_lock);
+       __pkvm_destroy_hyp_vm(host_kvm);
+       mutex_unlock(&host_kvm->arch.config_lock);
  }
  
  int pkvm_init_host_vm(struct kvm *host_kvm)
diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c

index 3d9467ff73bcbff82f8f22f51dd0784a83cecede..a35ce10e0a9f3efe554cbf4b1d11fc2300ed54e4 100644 (file)
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -64,12 +64,11 @@ u64 kvm_pmu_evtyper_mask(struct kvm *kvm)
  {
         u64 mask = ARMV8_PMU_EXCLUDE_EL1 | ARMV8_PMU_EXCLUDE_EL0 |
                    kvm_pmu_event_mask(kvm);
-       u64 pfr0 = IDREG(kvm, SYS_ID_AA64PFR0_EL1);
  
-       if (SYS_FIELD_GET(ID_AA64PFR0_EL1, EL2, pfr0))
+       if (kvm_has_feat(kvm, ID_AA64PFR0_EL1, EL2, IMP))
                 mask |= ARMV8_PMU_INCLUDE_EL2;
  
-       if (SYS_FIELD_GET(ID_AA64PFR0_EL1, EL3, pfr0))
+       if (kvm_has_feat(kvm, ID_AA64PFR0_EL1, EL3, IMP))
                 mask |= ARMV8_PMU_EXCLUDE_NS_EL0 |
                         ARMV8_PMU_EXCLUDE_NS_EL1 |
                         ARMV8_PMU_EXCLUDE_EL3;
@@ -83,8 +82,10 @@ u64 kvm_pmu_evtyper_mask(struct kvm *kvm)
   */
  static bool kvm_pmc_is_64bit(struct kvm_pmc *pmc)
  {
+       struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
+
         return (pmc->idx == ARMV8_PMU_CYCLE_IDX ||
-               kvm_pmu_is_3p5(kvm_pmc_to_vcpu(pmc)));
+               kvm_has_feat(vcpu->kvm, ID_AA64DFR0_EL1, PMUVer, V3P5));
  }
  
  static bool kvm_pmc_has_64bit_overflow(struct kvm_pmc *pmc)
@@ -419,7 +420,7 @@ void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu)
         kvm_pmu_update_state(vcpu);
  }
  
-/**
+/*
   * When perf interrupt is an NMI, we cannot safely notify the vcpu corresponding
   * to the event.
   * This is why we need a callback to do it once outside of the NMI context.
@@ -490,7 +491,7 @@ static u64 compute_period(struct kvm_pmc *pmc, u64 counter)
         return val;
  }
  
-/**
+/*
   * When the perf event overflows, set the overflow status and inform the vcpu.
   */
  static void kvm_pmu_perf_overflow(struct perf_event *perf_event,
@@ -556,7 +557,7 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
                 return;
  
         /* Fixup PMCR_EL0 to reconcile the PMU version and the LP bit */
-       if (!kvm_pmu_is_3p5(vcpu))
+       if (!kvm_has_feat(vcpu->kvm, ID_AA64DFR0_EL1, PMUVer, V3P5))
                 val &= ~ARMV8_PMU_PMCR_LP;
  
         /* The reset bits don't indicate any state, and shouldn't be saved. */
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c

index 30253bd19917f46640ecffc914d91a7d1102dc81..8e60aa4a8dfb06618a789b9d55182a041b5b1e04 100644 (file)
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -12,6 +12,7 @@
  #include <linux/bitfield.h>
  #include <linux/bsearch.h>
  #include <linux/cacheinfo.h>
+#include <linux/debugfs.h>
  #include <linux/kvm_host.h>
  #include <linux/mm.h>
  #include <linux/printk.h>
@@ -31,6 +32,7 @@
  
  #include <trace/events/kvm.h>
  
+#include "check-res-bits.h"
  #include "sys_regs.h"
  
  #include "trace.h"
@@ -505,10 +507,9 @@ static bool trap_loregion(struct kvm_vcpu *vcpu,
                           struct sys_reg_params *p,
                           const struct sys_reg_desc *r)
  {
-       u64 val = IDREG(vcpu->kvm, SYS_ID_AA64MMFR1_EL1);
         u32 sr = reg_to_encoding(r);
  
-       if (!(val & (0xfUL << ID_AA64MMFR1_EL1_LO_SHIFT))) {
+       if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR1_EL1, LO, IMP)) {
                 kvm_inject_undefined(vcpu);
                 return false;
         }
@@ -1685,7 +1686,8 @@ static u64 read_sanitised_id_aa64pfr0_el1(struct kvm_vcpu *vcpu,
         u64 __f_val = FIELD_GET(reg##_##field##_MASK, val);                    \
         (val) &= ~reg##_##field##_MASK;                                        \
         (val) |= FIELD_PREP(reg##_##field##_MASK,                              \
-                       min(__f_val, (u64)reg##_##field##_##limit));           \
+                           min(__f_val,                                       \
+                               (u64)SYS_FIELD_VALUE(reg, field, limit)));     \
         (val);                                                                 \
  })
  
@@ -2174,6 +2176,16 @@ static bool access_spsr(struct kvm_vcpu *vcpu,
         return true;
  }
  
+static u64 reset_hcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
+{
+       u64 val = r->val;
+
+       if (!cpus_have_final_cap(ARM64_HAS_HCR_NV1))
+               val |= HCR_E2H;
+
+       return __vcpu_sys_reg(vcpu, r->reg) = val;
+}
+
  /*
   * Architected system registers.
   * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2
@@ -2186,16 +2198,6 @@ static bool access_spsr(struct kvm_vcpu *vcpu,
   * guest...
   */
  static const struct sys_reg_desc sys_reg_descs[] = {
-       { SYS_DESC(SYS_DC_ISW), access_dcsw },
-       { SYS_DESC(SYS_DC_IGSW), access_dcgsw },
-       { SYS_DESC(SYS_DC_IGDSW), access_dcgsw },
-       { SYS_DESC(SYS_DC_CSW), access_dcsw },
-       { SYS_DESC(SYS_DC_CGSW), access_dcgsw },
-       { SYS_DESC(SYS_DC_CGDSW), access_dcgsw },
-       { SYS_DESC(SYS_DC_CISW), access_dcsw },
-       { SYS_DESC(SYS_DC_CIGSW), access_dcgsw },
-       { SYS_DESC(SYS_DC_CIGDSW), access_dcgsw },
-
         DBG_BCR_BVR_WCR_WVR_EL1(0),
         DBG_BCR_BVR_WCR_WVR_EL1(1),
         { SYS_DESC(SYS_MDCCINT_EL1), trap_debug_regs, reset_val, MDCCINT_EL1, 0 },
@@ -2349,7 +2351,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
                                         ID_AA64MMFR2_EL1_NV |
                                         ID_AA64MMFR2_EL1_CCIDX)),
         ID_SANITISED(ID_AA64MMFR3_EL1),
-       ID_UNALLOCATED(7,4),
+       ID_SANITISED(ID_AA64MMFR4_EL1),
         ID_UNALLOCATED(7,5),
         ID_UNALLOCATED(7,6),
         ID_UNALLOCATED(7,7),
@@ -2665,7 +2667,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
         EL2_REG_VNCR(VMPIDR_EL2, reset_unknown, 0),
         EL2_REG(SCTLR_EL2, access_rw, reset_val, SCTLR_EL2_RES1),
         EL2_REG(ACTLR_EL2, access_rw, reset_val, 0),
-       EL2_REG_VNCR(HCR_EL2, reset_val, 0),
+       EL2_REG_VNCR(HCR_EL2, reset_hcr, 0),
         EL2_REG(MDCR_EL2, access_rw, reset_val, 0),
         EL2_REG(CPTR_EL2, access_rw, reset_val, CPTR_NVHE_EL2_RES1),
         EL2_REG_VNCR(HSTR_EL2, reset_val, 0),
@@ -2727,6 +2729,18 @@ static const struct sys_reg_desc sys_reg_descs[] = {
         EL2_REG(SP_EL2, NULL, reset_unknown, 0),
  };
  
+static struct sys_reg_desc sys_insn_descs[] = {
+       { SYS_DESC(SYS_DC_ISW), access_dcsw },
+       { SYS_DESC(SYS_DC_IGSW), access_dcgsw },
+       { SYS_DESC(SYS_DC_IGDSW), access_dcgsw },
+       { SYS_DESC(SYS_DC_CSW), access_dcsw },
+       { SYS_DESC(SYS_DC_CGSW), access_dcgsw },
+       { SYS_DESC(SYS_DC_CGDSW), access_dcgsw },
+       { SYS_DESC(SYS_DC_CISW), access_dcsw },
+       { SYS_DESC(SYS_DC_CIGSW), access_dcgsw },
+       { SYS_DESC(SYS_DC_CIGDSW), access_dcgsw },
+};
+
  static const struct sys_reg_desc *first_idreg;
  
  static bool trap_dbgdidr(struct kvm_vcpu *vcpu,
@@ -2737,8 +2751,7 @@ static bool trap_dbgdidr(struct kvm_vcpu *vcpu,
                 return ignore_write(vcpu, p);
         } else {
                 u64 dfr = IDREG(vcpu->kvm, SYS_ID_AA64DFR0_EL1);
-               u64 pfr = IDREG(vcpu->kvm, SYS_ID_AA64PFR0_EL1);
-               u32 el3 = !!SYS_FIELD_GET(ID_AA64PFR0_EL1, EL3, pfr);
+               u32 el3 = kvm_has_feat(vcpu->kvm, ID_AA64PFR0_EL1, EL3, IMP);
  
                 p->regval = ((SYS_FIELD_GET(ID_AA64DFR0_EL1, WRPs, dfr) << 28) |
                              (SYS_FIELD_GET(ID_AA64DFR0_EL1, BRPs, dfr) << 24) |
@@ -3159,7 +3172,8 @@ static void unhandled_cp_access(struct kvm_vcpu *vcpu,
  /**
   * kvm_handle_cp_64 -- handles a mrrc/mcrr trap on a guest CP14/CP15 access
   * @vcpu: The VCPU pointer
- * @run:  The kvm_run struct
+ * @global: &struct sys_reg_desc
+ * @nr_global: size of the @global array
   */
  static int kvm_handle_cp_64(struct kvm_vcpu *vcpu,
                             const struct sys_reg_desc *global,
@@ -3326,7 +3340,9 @@ static int kvm_emulate_cp15_id_reg(struct kvm_vcpu *vcpu,
  /**
   * kvm_handle_cp_32 -- handles a mrc/mcr trap on a guest CP14/CP15 access
   * @vcpu: The VCPU pointer
- * @run:  The kvm_run struct
+ * @params: &struct sys_reg_params
+ * @global: &struct sys_reg_desc
+ * @nr_global: size of the @global array
   */
  static int kvm_handle_cp_32(struct kvm_vcpu *vcpu,
                             struct sys_reg_params *params,
@@ -3384,12 +3400,6 @@ int kvm_handle_cp14_32(struct kvm_vcpu *vcpu)
         return kvm_handle_cp_32(vcpu, &params, cp14_regs, ARRAY_SIZE(cp14_regs));
  }
  
-static bool is_imp_def_sys_reg(struct sys_reg_params *params)
-{
-       // See ARM DDI 0487E.a, section D12.3.2
-       return params->Op0 == 3 && (params->CRn & 0b1011) == 0b1011;
-}
-
  /**
   * emulate_sys_reg - Emulate a guest access to an AArch64 system register
   * @vcpu: The VCPU pointer
@@ -3398,26 +3408,106 @@ static bool is_imp_def_sys_reg(struct sys_reg_params *params)
   * Return: true if the system register access was successful, false otherwise.
   */
  static bool emulate_sys_reg(struct kvm_vcpu *vcpu,
-                          struct sys_reg_params *params)
+                           struct sys_reg_params *params)
  {
         const struct sys_reg_desc *r;
  
         r = find_reg(params, sys_reg_descs, ARRAY_SIZE(sys_reg_descs));
-
         if (likely(r)) {
                 perform_access(vcpu, params, r);
                 return true;
         }
  
-       if (is_imp_def_sys_reg(params)) {
-               kvm_inject_undefined(vcpu);
+       print_sys_reg_msg(params,
+                         "Unsupported guest sys_reg access at: %lx [%08lx]\n",
+                         *vcpu_pc(vcpu), *vcpu_cpsr(vcpu));
+       kvm_inject_undefined(vcpu);
+
+       return false;
+}
+
+static void *idregs_debug_start(struct seq_file *s, loff_t *pos)
+{
+       struct kvm *kvm = s->private;
+       u8 *iter;
+
+       mutex_lock(&kvm->arch.config_lock);
+
+       iter = &kvm->arch.idreg_debugfs_iter;
+       if (test_bit(KVM_ARCH_FLAG_ID_REGS_INITIALIZED, &kvm->arch.flags) &&
+           *iter == (u8)~0) {
+               *iter = *pos;
+               if (*iter >= KVM_ARM_ID_REG_NUM)
+                       iter = NULL;
         } else {
-               print_sys_reg_msg(params,
-                                 "Unsupported guest sys_reg access at: %lx [%08lx]\n",
-                                 *vcpu_pc(vcpu), *vcpu_cpsr(vcpu));
-               kvm_inject_undefined(vcpu);
+               iter = ERR_PTR(-EBUSY);
         }
-       return false;
+
+       mutex_unlock(&kvm->arch.config_lock);
+
+       return iter;
+}
+
+static void *idregs_debug_next(struct seq_file *s, void *v, loff_t *pos)
+{
+       struct kvm *kvm = s->private;
+
+       (*pos)++;
+
+       if ((kvm->arch.idreg_debugfs_iter + 1) < KVM_ARM_ID_REG_NUM) {
+               kvm->arch.idreg_debugfs_iter++;
+
+               return &kvm->arch.idreg_debugfs_iter;
+       }
+
+       return NULL;
+}
+
+static void idregs_debug_stop(struct seq_file *s, void *v)
+{
+       struct kvm *kvm = s->private;
+
+       if (IS_ERR(v))
+               return;
+
+       mutex_lock(&kvm->arch.config_lock);
+
+       kvm->arch.idreg_debugfs_iter = ~0;
+
+       mutex_unlock(&kvm->arch.config_lock);
+}
+
+static int idregs_debug_show(struct seq_file *s, void *v)
+{
+       struct kvm *kvm = s->private;
+       const struct sys_reg_desc *desc;
+
+       desc = first_idreg + kvm->arch.idreg_debugfs_iter;
+
+       if (!desc->name)
+               return 0;
+
+       seq_printf(s, "%20s:\t%016llx\n",
+                  desc->name, IDREG(kvm, IDX_IDREG(kvm->arch.idreg_debugfs_iter)));
+
+       return 0;
+}
+
+static const struct seq_operations idregs_debug_sops = {
+       .start  = idregs_debug_start,
+       .next   = idregs_debug_next,
+       .stop   = idregs_debug_stop,
+       .show   = idregs_debug_show,
+};
+
+DEFINE_SEQ_ATTRIBUTE(idregs_debug);
+
+void kvm_sys_regs_create_debugfs(struct kvm *kvm)
+{
+       kvm->arch.idreg_debugfs_iter = ~0;
+
+       debugfs_create_file("idregs", 0444, kvm->debugfs_dentry, kvm,
+                           &idregs_debug_fops);
  }
  
  static void kvm_reset_id_regs(struct kvm_vcpu *vcpu)
@@ -3467,28 +3557,39 @@ void kvm_reset_sys_regs(struct kvm_vcpu *vcpu)
  }
  
  /**
- * kvm_handle_sys_reg -- handles a mrs/msr trap on a guest sys_reg access
+ * kvm_handle_sys_reg -- handles a system instruction or mrs/msr instruction
+ *                      trap on a guest execution
   * @vcpu: The VCPU pointer
   */
  int kvm_handle_sys_reg(struct kvm_vcpu *vcpu)
  {
+       const struct sys_reg_desc *desc = NULL;
         struct sys_reg_params params;
         unsigned long esr = kvm_vcpu_get_esr(vcpu);
         int Rt = kvm_vcpu_sys_get_rt(vcpu);
+       int sr_idx;
  
         trace_kvm_handle_sys_reg(esr);
  
-       if (__check_nv_sr_forward(vcpu))
+       if (triage_sysreg_trap(vcpu, &sr_idx))
                 return 1;
  
         params = esr_sys64_to_params(esr);
         params.regval = vcpu_get_reg(vcpu, Rt);
  
-       if (!emulate_sys_reg(vcpu, &params))
-               return 1;
+       /* System registers have Op0=={2,3}, as per DDI487 J.a C5.1.2 */
+       if (params.Op0 == 2 || params.Op0 == 3)
+               desc = &sys_reg_descs[sr_idx];
+       else
+               desc = &sys_insn_descs[sr_idx];
  
-       if (!params.is_write)
+       perform_access(vcpu, &params, desc);
+
+       /* Read from system register? */
+       if (!params.is_write &&
+           (params.Op0 == 2 || params.Op0 == 3))
                 vcpu_set_reg(vcpu, Rt, params.regval);
+
         return 1;
  }
  
@@ -3930,11 +4031,86 @@ int kvm_vm_ioctl_get_reg_writable_masks(struct kvm *kvm, struct reg_mask_range *
         return 0;
  }
  
+void kvm_init_sysreg(struct kvm_vcpu *vcpu)
+{
+       struct kvm *kvm = vcpu->kvm;
+
+       mutex_lock(&kvm->arch.config_lock);
+
+       /*
+        * In the absence of FGT, we cannot independently trap TLBI
+        * Range instructions. This isn't great, but trapping all
+        * TLBIs would be far worse. Live with it...
+        */
+       if (!kvm_has_feat(kvm, ID_AA64ISAR0_EL1, TLB, OS))
+               vcpu->arch.hcr_el2 |= HCR_TTLBOS;
+
+       if (cpus_have_final_cap(ARM64_HAS_HCX)) {
+               vcpu->arch.hcrx_el2 = HCRX_GUEST_FLAGS;
+
+               if (kvm_has_feat(kvm, ID_AA64ISAR2_EL1, MOPS, IMP))
+                       vcpu->arch.hcrx_el2 |= (HCRX_EL2_MSCEn | HCRX_EL2_MCE2);
+       }
+
+       if (test_bit(KVM_ARCH_FLAG_FGU_INITIALIZED, &kvm->arch.flags))
+               goto out;
+
+       kvm->arch.fgu[HFGxTR_GROUP] = (HFGxTR_EL2_nAMAIR2_EL1           |
+                                      HFGxTR_EL2_nMAIR2_EL1            |
+                                      HFGxTR_EL2_nS2POR_EL1            |
+                                      HFGxTR_EL2_nPOR_EL1              |
+                                      HFGxTR_EL2_nPOR_EL0              |
+                                      HFGxTR_EL2_nACCDATA_EL1          |
+                                      HFGxTR_EL2_nSMPRI_EL1_MASK       |
+                                      HFGxTR_EL2_nTPIDR2_EL0_MASK);
+
+       if (!kvm_has_feat(kvm, ID_AA64ISAR0_EL1, TLB, OS))
+               kvm->arch.fgu[HFGITR_GROUP] |= (HFGITR_EL2_TLBIRVAALE1OS|
+                                               HFGITR_EL2_TLBIRVALE1OS |
+                                               HFGITR_EL2_TLBIRVAAE1OS |
+                                               HFGITR_EL2_TLBIRVAE1OS  |
+                                               HFGITR_EL2_TLBIVAALE1OS |
+                                               HFGITR_EL2_TLBIVALE1OS  |
+                                               HFGITR_EL2_TLBIVAAE1OS  |
+                                               HFGITR_EL2_TLBIASIDE1OS |
+                                               HFGITR_EL2_TLBIVAE1OS   |
+                                               HFGITR_EL2_TLBIVMALLE1OS);
+
+       if (!kvm_has_feat(kvm, ID_AA64ISAR0_EL1, TLB, RANGE))
+               kvm->arch.fgu[HFGITR_GROUP] |= (HFGITR_EL2_TLBIRVAALE1  |
+                                               HFGITR_EL2_TLBIRVALE1   |
+                                               HFGITR_EL2_TLBIRVAAE1   |
+                                               HFGITR_EL2_TLBIRVAE1    |
+                                               HFGITR_EL2_TLBIRVAALE1IS|
+                                               HFGITR_EL2_TLBIRVALE1IS |
+                                               HFGITR_EL2_TLBIRVAAE1IS |
+                                               HFGITR_EL2_TLBIRVAE1IS  |
+                                               HFGITR_EL2_TLBIRVAALE1OS|
+                                               HFGITR_EL2_TLBIRVALE1OS |
+                                               HFGITR_EL2_TLBIRVAAE1OS |
+                                               HFGITR_EL2_TLBIRVAE1OS);
+
+       if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, S1PIE, IMP))
+               kvm->arch.fgu[HFGxTR_GROUP] |= (HFGxTR_EL2_nPIRE0_EL1 |
+                                               HFGxTR_EL2_nPIR_EL1);
+
+       if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, AMU, IMP))
+               kvm->arch.fgu[HAFGRTR_GROUP] |= ~(HAFGRTR_EL2_RES0 |
+                                                 HAFGRTR_EL2_RES1);
+
+       set_bit(KVM_ARCH_FLAG_FGU_INITIALIZED, &kvm->arch.flags);
+out:
+       mutex_unlock(&kvm->arch.config_lock);
+}
+
  int __init kvm_sys_reg_table_init(void)
  {
         struct sys_reg_params params;
         bool valid = true;
         unsigned int i;
+       int ret = 0;
+
+       check_res_bits();
  
         /* Make sure tables are unique and in order. */
         valid &= check_sysreg_table(sys_reg_descs, ARRAY_SIZE(sys_reg_descs), false);
@@ -3943,6 +4119,7 @@ int __init kvm_sys_reg_table_init(void)
         valid &= check_sysreg_table(cp15_regs, ARRAY_SIZE(cp15_regs), true);
         valid &= check_sysreg_table(cp15_64_regs, ARRAY_SIZE(cp15_64_regs), true);
         valid &= check_sysreg_table(invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs), false);
+       valid &= check_sysreg_table(sys_insn_descs, ARRAY_SIZE(sys_insn_descs), false);
  
         if (!valid)
                 return -EINVAL;
@@ -3957,8 +4134,13 @@ int __init kvm_sys_reg_table_init(void)
         if (!first_idreg)
                 return -EINVAL;
  
-       if (kvm_get_mode() == KVM_MODE_NV)
-               return populate_nv_trap_config();
+       ret = populate_nv_trap_config();
  
-       return 0;
+       for (i = 0; !ret && i < ARRAY_SIZE(sys_reg_descs); i++)
+               ret = populate_sysreg_config(sys_reg_descs + i, i);
+
+       for (i = 0; !ret && i < ARRAY_SIZE(sys_insn_descs); i++)
+               ret = populate_sysreg_config(sys_insn_descs + i, i);
+
+       return ret;
  }
diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h

index c65c129b35001049ed4b14c02ec6fcfb8e37b1fa..997eea21ba2ab3d7b08ae8f6178d208b74453606 100644 (file)
--- a/arch/arm64/kvm/sys_regs.h
+++ b/arch/arm64/kvm/sys_regs.h
@@ -233,6 +233,8 @@ int kvm_sys_reg_get_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg,
  int kvm_sys_reg_set_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg,
                          const struct sys_reg_desc table[], unsigned int num);
  
+bool triage_sysreg_trap(struct kvm_vcpu *vcpu, int *sr_index);
+
  #define AA32(_x)       .aarch32_map = AA32_##_x
  #define Op0(_x)        .Op0 = _x
  #define Op1(_x)        .Op1 = _x
diff --git a/arch/arm64/kvm/vgic/vgic-debug.c b/arch/arm64/kvm/vgic/vgic-debug.c

index 85606a531dc3987fc04c18a179c577efd2113ff8..389025ce7749bc32e889f80f2a395552831b0f77 100644 (file)
--- a/arch/arm64/kvm/vgic/vgic-debug.c
+++ b/arch/arm64/kvm/vgic/vgic-debug.c
@@ -149,7 +149,7 @@ static void print_dist_state(struct seq_file *s, struct vgic_dist *dist)
         seq_printf(s, "vgic_model:\t%s\n", v3 ? "GICv3" : "GICv2");
         seq_printf(s, "nr_spis:\t%d\n", dist->nr_spis);
         if (v3)
-               seq_printf(s, "nr_lpis:\t%d\n", dist->lpi_list_count);
+               seq_printf(s, "nr_lpis:\t%d\n", atomic_read(&dist->lpi_count));
         seq_printf(s, "enabled:\t%d\n", dist->enabled);
         seq_printf(s, "\n");
  
diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c

index e949e1d0fd9f7ba80d3a1e5d6cda02354dfe0403..f20941f83a077cbc2be093b487e4afe5a29f30ae 100644 (file)
--- a/arch/arm64/kvm/vgic/vgic-init.c
+++ b/arch/arm64/kvm/vgic/vgic-init.c
@@ -53,9 +53,9 @@ void kvm_vgic_early_init(struct kvm *kvm)
  {
         struct vgic_dist *dist = &kvm->arch.vgic;
  
-       INIT_LIST_HEAD(&dist->lpi_list_head);
         INIT_LIST_HEAD(&dist->lpi_translation_cache);
         raw_spin_lock_init(&dist->lpi_list_lock);
+       xa_init_flags(&dist->lpi_xa, XA_FLAGS_LOCK_IRQ);
  }
  
  /* CREATION */
@@ -309,7 +309,7 @@ int vgic_init(struct kvm *kvm)
                 vgic_lpi_translation_cache_init(kvm);
  
         /*
-        * If we have GICv4.1 enabled, unconditionnaly request enable the
+        * If we have GICv4.1 enabled, unconditionally request enable the
          * v4 support so that we get HW-accelerated vSGIs. Otherwise, only
          * enable it if we present a virtual ITS to the guest.
          */
@@ -366,6 +366,8 @@ static void kvm_vgic_dist_destroy(struct kvm *kvm)
  
         if (vgic_supports_direct_msis(kvm))
                 vgic_v4_teardown(kvm);
+
+       xa_destroy(&dist->lpi_xa);
  }
  
  static void __kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
@@ -445,13 +447,15 @@ int vgic_lazy_init(struct kvm *kvm)
  /* RESOURCE MAPPING */
  
  /**
+ * kvm_vgic_map_resources - map the MMIO regions
+ * @kvm: kvm struct pointer
+ *
   * Map the MMIO regions depending on the VGIC model exposed to the guest
   * called on the first VCPU run.
   * Also map the virtual CPU interface into the VM.
   * v2 calls vgic_init() if not already done.
   * v3 and derivatives return an error if the VGIC is not initialized.
   * vgic_ready() returns true if this function has succeeded.
- * @kvm: kvm struct pointer
   */
  int kvm_vgic_map_resources(struct kvm *kvm)
  {
diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c

index e2764d0ffa9f32094c57580ed5d987f99b5d2ade..e85a495ada9c193aa75ac4e2d404070d6390c758 100644 (file)
--- a/arch/arm64/kvm/vgic/vgic-its.c
+++ b/arch/arm64/kvm/vgic/vgic-its.c
@@ -52,7 +52,12 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid,
         if (!irq)
                 return ERR_PTR(-ENOMEM);
  
-       INIT_LIST_HEAD(&irq->lpi_list);
+       ret = xa_reserve_irq(&dist->lpi_xa, intid, GFP_KERNEL_ACCOUNT);
+       if (ret) {
+               kfree(irq);
+               return ERR_PTR(ret);
+       }
+
         INIT_LIST_HEAD(&irq->ap_list);
         raw_spin_lock_init(&irq->irq_lock);
  
@@ -68,30 +73,30 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid,
          * There could be a race with another vgic_add_lpi(), so we need to
          * check that we don't add a second list entry with the same LPI.
          */
-       list_for_each_entry(oldirq, &dist->lpi_list_head, lpi_list) {
-               if (oldirq->intid != intid)
-                       continue;
-
+       oldirq = xa_load(&dist->lpi_xa, intid);
+       if (vgic_try_get_irq_kref(oldirq)) {
                 /* Someone was faster with adding this LPI, lets use that. */
                 kfree(irq);
                 irq = oldirq;
  
-               /*
-                * This increases the refcount, the caller is expected to
-                * call vgic_put_irq() on the returned pointer once it's
-                * finished with the IRQ.
-                */
-               vgic_get_irq_kref(irq);
+               goto out_unlock;
+       }
  
+       ret = xa_err(xa_store(&dist->lpi_xa, intid, irq, 0));
+       if (ret) {
+               xa_release(&dist->lpi_xa, intid);
+               kfree(irq);
                 goto out_unlock;
         }
  
-       list_add_tail(&irq->lpi_list, &dist->lpi_list_head);
-       dist->lpi_list_count++;
+       atomic_inc(&dist->lpi_count);
  
  out_unlock:
         raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
  
+       if (ret)
+               return ERR_PTR(ret);
+
         /*
          * We "cache" the configuration table entries in our struct vgic_irq's.
          * However we only have those structs for mapped IRQs, so we read in
@@ -158,7 +163,7 @@ struct vgic_translation_cache_entry {
   * @cte_esz: collection table entry size
   * @dte_esz: device table entry size
   * @ite_esz: interrupt translation table entry size
- * @save tables: save the ITS tables into guest RAM
+ * @save_tables: save the ITS tables into guest RAM
   * @restore_tables: restore the ITS internal structs from tables
   *  stored in guest RAM
   * @commit: initialize the registers which expose the ABI settings,
@@ -311,6 +316,8 @@ static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq,
         return 0;
  }
  
+#define GIC_LPI_MAX_INTID      ((1 << INTERRUPT_ID_BITS_ITS) - 1)
+
  /*
   * Create a snapshot of the current LPIs targeting @vcpu, so that we can
   * enumerate those LPIs without holding any lock.
@@ -319,6 +326,7 @@ static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq,
  int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr)
  {
         struct vgic_dist *dist = &kvm->arch.vgic;
+       XA_STATE(xas, &dist->lpi_xa, GIC_LPI_OFFSET);
         struct vgic_irq *irq;
         unsigned long flags;
         u32 *intids;
@@ -331,13 +339,15 @@ int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr)
          * command). If coming from another path (such as enabling LPIs),
          * we must be careful not to overrun the array.
          */
-       irq_count = READ_ONCE(dist->lpi_list_count);
+       irq_count = atomic_read(&dist->lpi_count);
         intids = kmalloc_array(irq_count, sizeof(intids[0]), GFP_KERNEL_ACCOUNT);
         if (!intids)
                 return -ENOMEM;
  
         raw_spin_lock_irqsave(&dist->lpi_list_lock, flags);
-       list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) {
+       rcu_read_lock();
+
+       xas_for_each(&xas, irq, GIC_LPI_MAX_INTID) {
                 if (i == irq_count)
                         break;
                 /* We don't need to "get" the IRQ, as we hold the list lock. */
@@ -345,6 +355,8 @@ int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr)
                         continue;
                 intids[i++] = irq->intid;
         }
+
+       rcu_read_unlock();
         raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
  
         *intid_ptr = intids;
@@ -468,6 +480,9 @@ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu)
                 }
  
                 irq = vgic_get_irq(vcpu->kvm, NULL, intids[i]);
+               if (!irq)
+                       continue;
+
                 raw_spin_lock_irqsave(&irq->irq_lock, flags);
                 irq->pending_latch = pendmask & (1U << bit_nr);
                 vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
@@ -592,8 +607,8 @@ static struct vgic_irq *vgic_its_check_cache(struct kvm *kvm, phys_addr_t db,
         raw_spin_lock_irqsave(&dist->lpi_list_lock, flags);
  
         irq = __vgic_its_check_cache(dist, db, devid, eventid);
-       if (irq)
-               vgic_get_irq_kref(irq);
+       if (!vgic_try_get_irq_kref(irq))
+               irq = NULL;
  
         raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
  
@@ -637,8 +652,13 @@ static void vgic_its_cache_translation(struct kvm *kvm, struct vgic_its *its,
          * was in the cache, and increment it on the new interrupt.
          */
         if (cte->irq)
-               __vgic_put_lpi_locked(kvm, cte->irq);
+               vgic_put_irq(kvm, cte->irq);
  
+       /*
+        * The irq refcount is guaranteed to be nonzero while holding the
+        * its_lock, as the ITE (and the reference it holds) cannot be freed.
+        */
+       lockdep_assert_held(&its->its_lock);
         vgic_get_irq_kref(irq);
  
         cte->db         = db;
@@ -669,7 +689,7 @@ void vgic_its_invalidate_cache(struct kvm *kvm)
                 if (!cte->irq)
                         break;
  
-               __vgic_put_lpi_locked(kvm, cte->irq);
+               vgic_put_irq(kvm, cte->irq);
                 cte->irq = NULL;
         }
  
@@ -1342,8 +1362,8 @@ static int vgic_its_cmd_handle_inv(struct kvm *kvm, struct vgic_its *its,
  }
  
  /**
- * vgic_its_invall - invalidate all LPIs targetting a given vcpu
- * @vcpu: the vcpu for which the RD is targetted by an invalidation
+ * vgic_its_invall - invalidate all LPIs targeting a given vcpu
+ * @vcpu: the vcpu for which the RD is targeted by an invalidation
   *
   * Contrary to the INVALL command, this targets a RD instead of a
   * collection, and we don't need to hold the its_lock, since no ITS is
@@ -1432,6 +1452,8 @@ static int vgic_its_cmd_handle_movall(struct kvm *kvm, struct vgic_its *its,
  
         for (i = 0; i < irq_count; i++) {
                 irq = vgic_get_irq(kvm, NULL, intids[i]);
+               if (!irq)
+                       continue;
  
                 update_affinity(irq, vcpu2);
  
@@ -2139,7 +2161,7 @@ static u32 compute_next_eventid_offset(struct list_head *h, struct its_ite *ite)
  }
  
  /**
- * entry_fn_t - Callback called on a table entry restore path
+ * typedef entry_fn_t - Callback called on a table entry restore path
   * @its: its handle
   * @id: id of the entry
   * @entry: pointer to the entry
diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c

index 9465d3706ab9bcf8fb7fd72a8904d6b87ede574a..4ea3340786b950527fe87821ec26252a99e384bb 100644 (file)
--- a/arch/arm64/kvm/vgic/vgic-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-v3.c
@@ -380,6 +380,7 @@ int vgic_v3_save_pending_tables(struct kvm *kvm)
         struct vgic_irq *irq;
         gpa_t last_ptr = ~(gpa_t)0;
         bool vlpi_avail = false;
+       unsigned long index;
         int ret = 0;
         u8 val;
  
@@ -396,7 +397,7 @@ int vgic_v3_save_pending_tables(struct kvm *kvm)
                 vlpi_avail = true;
         }
  
-       list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) {
+       xa_for_each(&dist->lpi_xa, index, irq) {
                 int byte_offset, bit_nr;
                 struct kvm_vcpu *vcpu;
                 gpa_t pendbase, ptr;
diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c

index db2a95762b1b663e7ccc2c45184f8a8af1fd4ffd..4ec93587c8cd2b07d11e08efe0614e211c44ef51 100644 (file)
--- a/arch/arm64/kvm/vgic/vgic.c
+++ b/arch/arm64/kvm/vgic/vgic.c
@@ -30,7 +30,8 @@ struct vgic_global kvm_vgic_global_state __ro_after_init = {
   *         its->its_lock (mutex)
   *           vgic_cpu->ap_list_lock            must be taken with IRQs disabled
   *             kvm->lpi_list_lock              must be taken with IRQs disabled
- *               vgic_irq->irq_lock            must be taken with IRQs disabled
+ *               vgic_dist->lpi_xa.xa_lock     must be taken with IRQs disabled
+ *                 vgic_irq->irq_lock          must be taken with IRQs disabled
   *
   * As the ap_list_lock might be taken from the timer interrupt handler,
   * we have to disable IRQs before taking this lock and everything lower
@@ -54,32 +55,22 @@ struct vgic_global kvm_vgic_global_state __ro_after_init = {
   */
  
  /*
- * Iterate over the VM's list of mapped LPIs to find the one with a
- * matching interrupt ID and return a reference to the IRQ structure.
+ * Index the VM's xarray of mapped LPIs and return a reference to the IRQ
+ * structure. The caller is expected to call vgic_put_irq() later once it's
+ * finished with the IRQ.
   */
  static struct vgic_irq *vgic_get_lpi(struct kvm *kvm, u32 intid)
  {
         struct vgic_dist *dist = &kvm->arch.vgic;
         struct vgic_irq *irq = NULL;
-       unsigned long flags;
-
-       raw_spin_lock_irqsave(&dist->lpi_list_lock, flags);
  
-       list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) {
-               if (irq->intid != intid)
-                       continue;
+       rcu_read_lock();
  
-               /*
-                * This increases the refcount, the caller is expected to
-                * call vgic_put_irq() later once it's finished with the IRQ.
-                */
-               vgic_get_irq_kref(irq);
-               goto out_unlock;
-       }
-       irq = NULL;
+       irq = xa_load(&dist->lpi_xa, intid);
+       if (!vgic_try_get_irq_kref(irq))
+               irq = NULL;
  
-out_unlock:
-       raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
+       rcu_read_unlock();
  
         return irq;
  }
@@ -120,22 +111,6 @@ static void vgic_irq_release(struct kref *ref)
  {
  }
  
-/*
- * Drop the refcount on the LPI. Must be called with lpi_list_lock held.
- */
-void __vgic_put_lpi_locked(struct kvm *kvm, struct vgic_irq *irq)
-{
-       struct vgic_dist *dist = &kvm->arch.vgic;
-
-       if (!kref_put(&irq->refcount, vgic_irq_release))
-               return;
-
-       list_del(&irq->lpi_list);
-       dist->lpi_list_count--;
-
-       kfree(irq);
-}
-
  void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq)
  {
         struct vgic_dist *dist = &kvm->arch.vgic;
@@ -144,9 +119,15 @@ void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq)
         if (irq->intid < VGIC_MIN_LPI)
                 return;
  
-       raw_spin_lock_irqsave(&dist->lpi_list_lock, flags);
-       __vgic_put_lpi_locked(kvm, irq);
-       raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
+       if (!kref_put(&irq->refcount, vgic_irq_release))
+               return;
+
+       xa_lock_irqsave(&dist->lpi_xa, flags);
+       __xa_erase(&dist->lpi_xa, irq->intid);
+       xa_unlock_irqrestore(&dist->lpi_xa, flags);
+
+       atomic_dec(&dist->lpi_count);
+       kfree_rcu(irq, rcu);
  }
  
  void vgic_flush_pending_lpis(struct kvm_vcpu *vcpu)
@@ -203,7 +184,7 @@ void vgic_irq_set_phys_active(struct vgic_irq *irq, bool active)
  }
  
  /**
- * kvm_vgic_target_oracle - compute the target vcpu for an irq
+ * vgic_target_oracle - compute the target vcpu for an irq
   *
   * @irq:       The irq to route. Must be already locked.
   *
@@ -404,7 +385,8 @@ retry:
  
         /*
          * Grab a reference to the irq to reflect the fact that it is
-        * now in the ap_list.
+        * now in the ap_list. This is safe as the caller must already hold a
+        * reference on the irq.
          */
         vgic_get_irq_kref(irq);
         list_add_tail(&irq->ap_list, &vcpu->arch.vgic_cpu.ap_list_head);
diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h

index 8d134569d0a1f6c26f9a4735aa0c89847d9add48..0c2b82de8fa3c723279695c8641de02e7775065d 100644 (file)
--- a/arch/arm64/kvm/vgic/vgic.h
+++ b/arch/arm64/kvm/vgic/vgic.h
@@ -180,7 +180,6 @@ vgic_get_mmio_region(struct kvm_vcpu *vcpu, struct vgic_io_device *iodev,
                      gpa_t addr, int len);
  struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
                               u32 intid);
-void __vgic_put_lpi_locked(struct kvm *kvm, struct vgic_irq *irq);
  void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq);
  bool vgic_get_phys_line_level(struct vgic_irq *irq);
  void vgic_irq_set_phys_pending(struct vgic_irq *irq, bool pending);
@@ -220,12 +219,20 @@ void vgic_v2_vmcr_sync(struct kvm_vcpu *vcpu);
  void vgic_v2_save_state(struct kvm_vcpu *vcpu);
  void vgic_v2_restore_state(struct kvm_vcpu *vcpu);
  
-static inline void vgic_get_irq_kref(struct vgic_irq *irq)
+static inline bool vgic_try_get_irq_kref(struct vgic_irq *irq)
  {
+       if (!irq)
+               return false;
+
         if (irq->intid < VGIC_MIN_LPI)
-               return;
+               return true;
  
-       kref_get(&irq->refcount);
+       return kref_get_unless_zero(&irq->refcount);
+}
+
+static inline void vgic_get_irq_kref(struct vgic_irq *irq)
+{
+       WARN_ON_ONCE(!vgic_try_get_irq_kref(irq));
  }
  
  void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu);
diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps

index b912b1409fc09aaf08705b8d75b5d221ae0d020e..65090dd34641ce6605601a98e363ebc2f06b4356 100644 (file)
--- a/arch/arm64/tools/cpucaps
+++ b/arch/arm64/tools/cpucaps
@@ -35,6 +35,7 @@ HAS_GENERIC_AUTH_IMP_DEF
  HAS_GIC_CPUIF_SYSREGS
  HAS_GIC_PRIO_MASKING
  HAS_GIC_PRIO_RELAXED_SYNC
+HAS_HCR_NV1
  HAS_HCX
  HAS_LDAPR
  HAS_LPA2
diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg

index 4c9b679343674ab0ceaa04799654757391dd262c..53daaaef46cbb76afa43899a81d68219b3f4f5ce 100644 (file)
--- a/arch/arm64/tools/sysreg
+++ b/arch/arm64/tools/sysreg
@@ -1366,6 +1366,7 @@ EndEnum
  UnsignedEnum   43:40   SPECRES
         0b0000  NI
         0b0001  IMP
+       0b0010  COSP_RCTX
  EndEnum
  UnsignedEnum   39:36   SB
         0b0000  NI
@@ -1492,7 +1493,12 @@ EndEnum
  EndSysreg
  
  Sysreg ID_AA64ISAR3_EL1        3       0       0       6       3
-Res0   63:12
+Res0   63:16
+UnsignedEnum   15:12   PACM
+       0b0000  NI
+       0b0001  TRIVIAL_IMP
+       0b0010  FULL_IMP
+EndEnum
  UnsignedEnum   11:8    TLBIW
         0b0000  NI
         0b0001  IMP
@@ -1791,6 +1797,43 @@ UnsignedEnum     3:0     TCRX
  EndEnum
  EndSysreg
  
+Sysreg ID_AA64MMFR4_EL1        3       0       0       7       4
+Res0   63:40
+UnsignedEnum   39:36   E3DSE
+       0b0000  NI
+       0b0001  IMP
+EndEnum
+Res0   35:28
+SignedEnum     27:24   E2H0
+       0b0000  IMP
+       0b1110  NI_NV1
+       0b1111  NI
+EndEnum
+UnsignedEnum   23:20   NV_frac
+       0b0000  NV_NV2
+       0b0001  NV2_ONLY
+EndEnum
+UnsignedEnum   19:16   FGWTE3
+       0b0000  NI
+       0b0001  IMP
+EndEnum
+UnsignedEnum   15:12   HACDBS
+       0b0000  NI
+       0b0001  IMP
+EndEnum
+UnsignedEnum   11:8    ASID2
+       0b0000  NI
+       0b0001  IMP
+EndEnum
+SignedEnum     7:4     EIESB
+       0b0000  NI
+       0b0001  ToEL3
+       0b0010  ToELx
+       0b1111  ANY
+EndEnum
+Res0   3:0
+EndSysreg
+
  Sysreg SCTLR_EL1       3       0       1       0       0
  Field  63      TIDCP
  Field  62      SPINTMASK
diff --git a/arch/csky/include/asm/jump_label.h b/arch/csky/include/asm/jump_label.h

index 98a3f4b168bd2687f3e4828aa681d29e0c13b97e..ef2e37a10a0feb9af3543481cffddd75c5b3a8ef 100644 (file)
--- a/arch/csky/include/asm/jump_label.h
+++ b/arch/csky/include/asm/jump_label.h
@@ -12,7 +12,7 @@
  static __always_inline bool arch_static_branch(struct static_key *key,
                                                bool branch)
  {
-       asm_volatile_goto(
+       asm goto(
                 "1:     nop32                                   \n"
                 "       .pushsection    __jump_table, \"aw\"    \n"
                 "       .align          2                       \n"
@@ -29,7 +29,7 @@ label:
  static __always_inline bool arch_static_branch_jump(struct static_key *key,
                                                     bool branch)
  {
-       asm_volatile_goto(
+       asm goto(
                 "1:     bsr32           %l[label]               \n"
                 "       .pushsection    __jump_table, \"aw\"    \n"
                 "       .align          2                       \n"
diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig

index 10959e6c3583255264aef0ea7de6e6477a003418..eb2139387a54572c841660a422488fd2231a1ff7 100644 (file)
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -12,6 +12,7 @@ config LOONGARCH
         select ARCH_DISABLE_KASAN_INLINE
         select ARCH_ENABLE_MEMORY_HOTPLUG
         select ARCH_ENABLE_MEMORY_HOTREMOVE
+       select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
         select ARCH_HAS_ACPI_TABLE_UPGRADE      if ACPI
         select ARCH_HAS_CPU_FINALIZE_INIT
         select ARCH_HAS_FORTIFY_SOURCE
@@ -99,6 +100,7 @@ config LOONGARCH
         select HAVE_ARCH_KFENCE
         select HAVE_ARCH_KGDB if PERF_EVENTS
         select HAVE_ARCH_MMAP_RND_BITS if MMU
+       select HAVE_ARCH_SECCOMP
         select HAVE_ARCH_SECCOMP_FILTER
         select HAVE_ARCH_TRACEHOOK
         select HAVE_ARCH_TRANSPARENT_HUGEPAGE
@@ -131,7 +133,6 @@ config LOONGARCH
         select HAVE_KPROBES
         select HAVE_KPROBES_ON_FTRACE
         select HAVE_KRETPROBES
-       select HAVE_KVM
         select HAVE_MOD_ARCH_SPECIFIC
         select HAVE_NMI
         select HAVE_PCI
@@ -632,23 +633,6 @@ config RANDOMIZE_BASE_MAX_OFFSET
  
           This is limited by the size of the lower address memory, 256MB.
  
-config SECCOMP
-       bool "Enable seccomp to safely compute untrusted bytecode"
-       depends on PROC_FS
-       default y
-       help
-         This kernel feature is useful for number crunching applications
-         that may need to compute untrusted bytecode during their
-         execution. By using pipes or other transports made available to
-         the process as file descriptors supporting the read/write
-         syscalls, it's possible to isolate those applications in
-         their own address space using seccomp. Once seccomp is
-         enabled via /proc/<pid>/seccomp, it cannot be disabled
-         and the task is only allowed to execute a few safe syscalls
-         defined by each seccomp mode.
-
-         If unsure, say Y. Only embedded should say N here.
-
  endmenu
  
  config ARCH_SELECT_MEMORY_MODEL
@@ -667,10 +651,6 @@ config ARCH_SPARSEMEM_ENABLE
           or have huge holes in the physical address space for other reasons.
           See <file:Documentation/mm/numa.rst> for more.
  
-config ARCH_ENABLE_THP_MIGRATION
-       def_bool y
-       depends on TRANSPARENT_HUGEPAGE
-
  config ARCH_MEMORY_PROBE
         def_bool y
         depends on MEMORY_HOTPLUG
diff --git a/arch/loongarch/boot/dts/loongson-2k0500-ref.dts b/arch/loongarch/boot/dts/loongson-2k0500-ref.dts

index b38071a4d0b023c7faf29935d3bb6d5e0c65ca76..8aefb0c126722980a345062cae02a6127c02b52e 100644 (file)
--- a/arch/loongarch/boot/dts/loongson-2k0500-ref.dts
+++ b/arch/loongarch/boot/dts/loongson-2k0500-ref.dts
@@ -60,7 +60,7 @@
  
         #address-cells = <1>;
         #size-cells = <0>;
-       eeprom@57{
+       eeprom@57 {
                 compatible = "atmel,24c16";
                 reg = <0x57>;
                 pagesize = <16>;
diff --git a/arch/loongarch/boot/dts/loongson-2k1000-ref.dts b/arch/loongarch/boot/dts/loongson-2k1000-ref.dts

index 132a2d1ea8bce1ac95222875b6ad74d5ebf06b14..ed4d324340411dee9b88e52720329cf839307ad0 100644 (file)
--- a/arch/loongarch/boot/dts/loongson-2k1000-ref.dts
+++ b/arch/loongarch/boot/dts/loongson-2k1000-ref.dts
@@ -78,7 +78,7 @@
  
         #address-cells = <1>;
         #size-cells = <0>;
-       eeprom@57{
+       eeprom@57 {
                 compatible = "atmel,24c16";
                 reg = <0x57>;
                 pagesize = <16>;
diff --git a/arch/loongarch/include/asm/acpi.h b/arch/loongarch/include/asm/acpi.h

index 8de6c4b83a61a8088903abc67ddc50beb35233d5..49e29b29996f0f4473c5d628c936c7528630ad52 100644 (file)
--- a/arch/loongarch/include/asm/acpi.h
+++ b/arch/loongarch/include/asm/acpi.h
@@ -32,8 +32,10 @@ static inline bool acpi_has_cpu_in_madt(void)
         return true;
  }
  
+#define MAX_CORE_PIC 256
+
  extern struct list_head acpi_wakeup_device_list;
-extern struct acpi_madt_core_pic acpi_core_pic[NR_CPUS];
+extern struct acpi_madt_core_pic acpi_core_pic[MAX_CORE_PIC];
  
  extern int __init parse_acpi_topology(void);
  
diff --git a/arch/loongarch/include/asm/jump_label.h b/arch/loongarch/include/asm/jump_label.h

index 3cea299a5ef58313a305f7d5a086ae9a32e8aa95..29acfe3de3faae797beca198e58f9e5a5e570bbc 100644 (file)
--- a/arch/loongarch/include/asm/jump_label.h
+++ b/arch/loongarch/include/asm/jump_label.h
@@ -22,7 +22,7 @@
  
  static __always_inline bool arch_static_branch(struct static_key * const key, const bool branch)
  {
-       asm_volatile_goto(
+       asm goto(
                 "1:     nop                     \n\t"
                 JUMP_TABLE_ENTRY
                 :  :  "i"(&((char *)key)[branch]) :  : l_yes);
@@ -35,7 +35,7 @@ l_yes:
  
  static __always_inline bool arch_static_branch_jump(struct static_key * const key, const bool branch)
  {
-       asm_volatile_goto(
+       asm goto(
                 "1:     b       %l[l_yes]       \n\t"
                 JUMP_TABLE_ENTRY
                 :  :  "i"(&((char *)key)[branch]) :  : l_yes);
diff --git a/arch/loongarch/include/uapi/asm/kvm.h b/arch/loongarch/include/uapi/asm/kvm.h

index 923d0bd382941acc5794d7622f7adfe1b2533422..109785922cf94e455b3945f18ec7b1f0c82b233d 100644 (file)
--- a/arch/loongarch/include/uapi/asm/kvm.h
+++ b/arch/loongarch/include/uapi/asm/kvm.h
@@ -14,8 +14,6 @@
   * Some parts derived from the x86 version of this file.
   */
  
-#define __KVM_HAVE_READONLY_MEM
-
  #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
  #define KVM_DIRTY_LOG_PAGE_OFFSET      64
  
diff --git a/arch/loongarch/kernel/acpi.c b/arch/loongarch/kernel/acpi.c

index b6b097bbf8668a400105dd408c696e4135c824d8..5cf59c617126b7d00f65b3310ef39ea7bfb98e96 100644 (file)
--- a/arch/loongarch/kernel/acpi.c
+++ b/arch/loongarch/kernel/acpi.c
@@ -29,11 +29,9 @@ int disabled_cpus;
  
  u64 acpi_saved_sp;
  
-#define MAX_CORE_PIC 256
-
  #define PREFIX                 "ACPI: "
  
-struct acpi_madt_core_pic acpi_core_pic[NR_CPUS];
+struct acpi_madt_core_pic acpi_core_pic[MAX_CORE_PIC];
  
  void __init __iomem * __acpi_map_table(unsigned long phys, unsigned long size)
  {
diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c

index edf2bba80130670364e144ad301868a7dfd3bf93..634ef17fd38bf10d8bd9deef8a6693f0f4777c1e 100644 (file)
--- a/arch/loongarch/kernel/setup.c
+++ b/arch/loongarch/kernel/setup.c
@@ -357,6 +357,8 @@ void __init platform_init(void)
         acpi_gbl_use_default_register_widths = false;
         acpi_boot_table_init();
  #endif
+
+       early_init_fdt_scan_reserved_mem();
         unflatten_and_copy_device_tree();
  
  #ifdef CONFIG_NUMA
@@ -390,8 +392,6 @@ static void __init arch_mem_init(char **cmdline_p)
  
         check_kernel_sections_mem();
  
-       early_init_fdt_scan_reserved_mem();
-
         /*
          * In order to reduce the possibility of kernel panic when failed to
          * get IO TLB memory under CONFIG_SWIOTLB, it is better to allocate
diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c

index 2b49d30eb7c0185e043e462859e76a4ae64ecd67..aabee0b280fe5f43a70d8a1091e6268a37d701e9 100644 (file)
--- a/arch/loongarch/kernel/smp.c
+++ b/arch/loongarch/kernel/smp.c
@@ -88,6 +88,73 @@ void show_ipi_list(struct seq_file *p, int prec)
         }
  }
  
+static inline void set_cpu_core_map(int cpu)
+{
+       int i;
+
+       cpumask_set_cpu(cpu, &cpu_core_setup_map);
+
+       for_each_cpu(i, &cpu_core_setup_map) {
+               if (cpu_data[cpu].package == cpu_data[i].package) {
+                       cpumask_set_cpu(i, &cpu_core_map[cpu]);
+                       cpumask_set_cpu(cpu, &cpu_core_map[i]);
+               }
+       }
+}
+
+static inline void set_cpu_sibling_map(int cpu)
+{
+       int i;
+
+       cpumask_set_cpu(cpu, &cpu_sibling_setup_map);
+
+       for_each_cpu(i, &cpu_sibling_setup_map) {
+               if (cpus_are_siblings(cpu, i)) {
+                       cpumask_set_cpu(i, &cpu_sibling_map[cpu]);
+                       cpumask_set_cpu(cpu, &cpu_sibling_map[i]);
+               }
+       }
+}
+
+static inline void clear_cpu_sibling_map(int cpu)
+{
+       int i;
+
+       for_each_cpu(i, &cpu_sibling_setup_map) {
+               if (cpus_are_siblings(cpu, i)) {
+                       cpumask_clear_cpu(i, &cpu_sibling_map[cpu]);
+                       cpumask_clear_cpu(cpu, &cpu_sibling_map[i]);
+               }
+       }
+
+       cpumask_clear_cpu(cpu, &cpu_sibling_setup_map);
+}
+
+/*
+ * Calculate a new cpu_foreign_map mask whenever a
+ * new cpu appears or disappears.
+ */
+void calculate_cpu_foreign_map(void)
+{
+       int i, k, core_present;
+       cpumask_t temp_foreign_map;
+
+       /* Re-calculate the mask */
+       cpumask_clear(&temp_foreign_map);
+       for_each_online_cpu(i) {
+               core_present = 0;
+               for_each_cpu(k, &temp_foreign_map)
+                       if (cpus_are_siblings(i, k))
+                               core_present = 1;
+               if (!core_present)
+                       cpumask_set_cpu(i, &temp_foreign_map);
+       }
+
+       for_each_online_cpu(i)
+               cpumask_andnot(&cpu_foreign_map[i],
+                              &temp_foreign_map, &cpu_sibling_map[i]);
+}
+
  /* Send mailbox buffer via Mail_Send */
  static void csr_mail_send(uint64_t data, int cpu, int mailbox)
  {
@@ -303,6 +370,7 @@ int loongson_cpu_disable(void)
         numa_remove_cpu(cpu);
  #endif
         set_cpu_online(cpu, false);
+       clear_cpu_sibling_map(cpu);
         calculate_cpu_foreign_map();
         local_irq_save(flags);
         irq_migrate_all_off_this_cpu();
@@ -337,6 +405,7 @@ void __noreturn arch_cpu_idle_dead(void)
                 addr = iocsr_read64(LOONGARCH_IOCSR_MBUF0);
         } while (addr == 0);
  
+       local_irq_disable();
         init_fn = (void *)TO_CACHE(addr);
         iocsr_write32(0xffffffff, LOONGARCH_IOCSR_IPI_CLEAR);
  
@@ -379,59 +448,6 @@ static int __init ipi_pm_init(void)
  core_initcall(ipi_pm_init);
  #endif
  
-static inline void set_cpu_sibling_map(int cpu)
-{
-       int i;
-
-       cpumask_set_cpu(cpu, &cpu_sibling_setup_map);
-
-       for_each_cpu(i, &cpu_sibling_setup_map) {
-               if (cpus_are_siblings(cpu, i)) {
-                       cpumask_set_cpu(i, &cpu_sibling_map[cpu]);
-                       cpumask_set_cpu(cpu, &cpu_sibling_map[i]);
-               }
-       }
-}
-
-static inline void set_cpu_core_map(int cpu)
-{
-       int i;
-
-       cpumask_set_cpu(cpu, &cpu_core_setup_map);
-
-       for_each_cpu(i, &cpu_core_setup_map) {
-               if (cpu_data[cpu].package == cpu_data[i].package) {
-                       cpumask_set_cpu(i, &cpu_core_map[cpu]);
-                       cpumask_set_cpu(cpu, &cpu_core_map[i]);
-               }
-       }
-}
-
-/*
- * Calculate a new cpu_foreign_map mask whenever a
- * new cpu appears or disappears.
- */
-void calculate_cpu_foreign_map(void)
-{
-       int i, k, core_present;
-       cpumask_t temp_foreign_map;
-
-       /* Re-calculate the mask */
-       cpumask_clear(&temp_foreign_map);
-       for_each_online_cpu(i) {
-               core_present = 0;
-               for_each_cpu(k, &temp_foreign_map)
-                       if (cpus_are_siblings(i, k))
-                               core_present = 1;
-               if (!core_present)
-                       cpumask_set_cpu(i, &temp_foreign_map);
-       }
-
-       for_each_online_cpu(i)
-               cpumask_andnot(&cpu_foreign_map[i],
-                              &temp_foreign_map, &cpu_sibling_map[i]);
-}
-
  /* Preload SMP state for boot cpu */
  void smp_prepare_boot_cpu(void)
  {
diff --git a/arch/loongarch/kvm/Kconfig b/arch/loongarch/kvm/Kconfig

index 61f7e33b1f95731c3c1a207337e30a63ff028ad6..c4ef2b4d97974762cf99228affa2ec8da4b82622 100644 (file)
--- a/arch/loongarch/kvm/Kconfig
+++ b/arch/loongarch/kvm/Kconfig
@@ -20,7 +20,6 @@ if VIRTUALIZATION
  config KVM
         tristate "Kernel-based Virtual Machine (KVM) support"
         depends on AS_HAS_LVZ_EXTENSION
-       depends on HAVE_KVM
         select HAVE_KVM_DIRTY_RING_ACQ_REL
         select HAVE_KVM_VCPU_ASYNC_IOCTL
         select KVM_COMMON
@@ -28,6 +27,7 @@ config KVM
         select KVM_GENERIC_HARDWARE_ENABLING
         select KVM_GENERIC_MMU_NOTIFIER
         select KVM_MMIO
+       select HAVE_KVM_READONLY_MEM
         select KVM_XFER_TO_GUEST_WORK
         help
           Support hosting virtualized guest machines using
diff --git a/arch/loongarch/kvm/switch.S b/arch/loongarch/kvm/switch.S

index ba976509bfe819ec51fdaa08f2a1ba4a334755cd..3634431db18a4a4992ea9b7d1ed44545214ff3df 100644 (file)
--- a/arch/loongarch/kvm/switch.S
+++ b/arch/loongarch/kvm/switch.S
@@ -213,12 +213,6 @@ SYM_FUNC_START(kvm_enter_guest)
         /* Save host GPRs */
         kvm_save_host_gpr a2
  
-       /* Save host CRMD, PRMD to stack */
-       csrrd   a3, LOONGARCH_CSR_CRMD
-       st.d    a3, a2, PT_CRMD
-       csrrd   a3, LOONGARCH_CSR_PRMD
-       st.d    a3, a2, PT_PRMD
-
         addi.d  a2, a1, KVM_VCPU_ARCH
         st.d    sp, a2, KVM_ARCH_HSP
         st.d    tp, a2, KVM_ARCH_HTP
diff --git a/arch/loongarch/kvm/timer.c b/arch/loongarch/kvm/timer.c

index 111328f6087285a01ccf4077672cf4cc85266866..bcc6b6d063d914dbf820b43f2c1308803646b395 100644 (file)
--- a/arch/loongarch/kvm/timer.c
+++ b/arch/loongarch/kvm/timer.c
@@ -23,24 +23,6 @@ static inline u64 tick_to_ns(struct kvm_vcpu *vcpu, u64 tick)
         return div_u64(tick * MNSEC_PER_SEC, vcpu->arch.timer_mhz);
  }
  
-/*
- * Push timer forward on timeout.
- * Handle an hrtimer event by push the hrtimer forward a period.
- */
-static enum hrtimer_restart kvm_count_timeout(struct kvm_vcpu *vcpu)
-{
-       unsigned long cfg, period;
-
-       /* Add periodic tick to current expire time */
-       cfg = kvm_read_sw_gcsr(vcpu->arch.csr, LOONGARCH_CSR_TCFG);
-       if (cfg & CSR_TCFG_PERIOD) {
-               period = tick_to_ns(vcpu, cfg & CSR_TCFG_VAL);
-               hrtimer_add_expires_ns(&vcpu->arch.swtimer, period);
-               return HRTIMER_RESTART;
-       } else
-               return HRTIMER_NORESTART;
-}
-
  /* Low level hrtimer wake routine */
  enum hrtimer_restart kvm_swtimer_wakeup(struct hrtimer *timer)
  {
@@ -50,7 +32,7 @@ enum hrtimer_restart kvm_swtimer_wakeup(struct hrtimer *timer)
         kvm_queue_irq(vcpu, INT_TI);
         rcuwait_wake_up(&vcpu->wait);
  
-       return kvm_count_timeout(vcpu);
+       return HRTIMER_NORESTART;
  }
  
  /*
@@ -93,7 +75,8 @@ void kvm_restore_timer(struct kvm_vcpu *vcpu)
         /*
          * Freeze the soft-timer and sync the guest stable timer with it.
          */
-       hrtimer_cancel(&vcpu->arch.swtimer);
+       if (kvm_vcpu_is_blocking(vcpu))
+               hrtimer_cancel(&vcpu->arch.swtimer);
  
         /*
          * From LoongArch Reference Manual Volume 1 Chapter 7.6.2
@@ -168,26 +151,20 @@ static void _kvm_save_timer(struct kvm_vcpu *vcpu)
          * Here judge one-shot timer fired by checking whether TVAL is larger
          * than TCFG
          */
-       if (ticks < cfg) {
+       if (ticks < cfg)
                 delta = tick_to_ns(vcpu, ticks);
-               expire = ktime_add_ns(ktime_get(), delta);
-               vcpu->arch.expire = expire;
+       else
+               delta = 0;
+
+       expire = ktime_add_ns(ktime_get(), delta);
+       vcpu->arch.expire = expire;
+       if (kvm_vcpu_is_blocking(vcpu)) {
  
                 /*
                  * HRTIMER_MODE_PINNED is suggested since vcpu may run in
                  * the same physical cpu in next time
                  */
                 hrtimer_start(&vcpu->arch.swtimer, expire, HRTIMER_MODE_ABS_PINNED);
-       } else if (vcpu->stat.generic.blocking) {
-               /*
-                * Inject timer interrupt so that halt polling can dectect and exit.
-                * VCPU is scheduled out already and sleeps in rcuwait queue and
-                * will not poll pending events again. kvm_queue_irq() is not enough,
-                * hrtimer swtimer should be used here.
-                */
-               expire = ktime_add_ns(ktime_get(), 10);
-               vcpu->arch.expire = expire;
-               hrtimer_start(&vcpu->arch.swtimer, expire, HRTIMER_MODE_ABS_PINNED);
         }
  }
  
diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c

index 27701991886dda7e3a6f75bd8a7f71a86995735b..3a8779065f73b45425f69b2cf204545dd2e0c908 100644 (file)
--- a/arch/loongarch/kvm/vcpu.c
+++ b/arch/loongarch/kvm/vcpu.c
@@ -298,74 +298,92 @@ static int _kvm_setcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 val)
         return ret;
  }
  
-static int _kvm_get_cpucfg(int id, u64 *v)
+static int _kvm_get_cpucfg_mask(int id, u64 *v)
  {
-       int ret = 0;
-
-       if (id < 0 && id >= KVM_MAX_CPUCFG_REGS)
+       if (id < 0 || id >= KVM_MAX_CPUCFG_REGS)
                 return -EINVAL;
  
         switch (id) {
-       case 2:
-               /* Return CPUCFG2 features which have been supported by KVM */
+       case LOONGARCH_CPUCFG0:
+               *v = GENMASK(31, 0);
+               return 0;
+       case LOONGARCH_CPUCFG1:
+               /* CPUCFG1_MSGINT is not supported by KVM */
+               *v = GENMASK(25, 0);
+               return 0;
+       case LOONGARCH_CPUCFG2:
+               /* CPUCFG2 features unconditionally supported by KVM */
                 *v = CPUCFG2_FP     | CPUCFG2_FPSP  | CPUCFG2_FPDP     |
                      CPUCFG2_FPVERS | CPUCFG2_LLFTP | CPUCFG2_LLFTPREV |
-                    CPUCFG2_LAM;
+                    CPUCFG2_LSPW | CPUCFG2_LAM;
                 /*
-                * If LSX is supported by CPU, it is also supported by KVM,
-                * as we implement it.
+                * For the ISA extensions listed below, if one is supported
+                * by the host, then it is also supported by KVM.
                  */
                 if (cpu_has_lsx)
                         *v |= CPUCFG2_LSX;
-               /*
-                * if LASX is supported by CPU, it is also supported by KVM,
-                * as we implement it.
-                */
                 if (cpu_has_lasx)
                         *v |= CPUCFG2_LASX;
  
-               break;
+               return 0;
+       case LOONGARCH_CPUCFG3:
+               *v = GENMASK(16, 0);
+               return 0;
+       case LOONGARCH_CPUCFG4:
+       case LOONGARCH_CPUCFG5:
+               *v = GENMASK(31, 0);
+               return 0;
+       case LOONGARCH_CPUCFG16:
+               *v = GENMASK(16, 0);
+               return 0;
+       case LOONGARCH_CPUCFG17 ... LOONGARCH_CPUCFG20:
+               *v = GENMASK(30, 0);
+               return 0;
         default:
-               ret = -EINVAL;
-               break;
+               /*
+                * CPUCFG bits should be zero if reserved by HW or not
+                * supported by KVM.
+                */
+               *v = 0;
+               return 0;
         }
-       return ret;
  }
  
  static int kvm_check_cpucfg(int id, u64 val)
  {
-       u64 mask;
-       int ret = 0;
-
-       if (id < 0 && id >= KVM_MAX_CPUCFG_REGS)
-               return -EINVAL;
+       int ret;
+       u64 mask = 0;
  
-       if (_kvm_get_cpucfg(id, &mask))
+       ret = _kvm_get_cpucfg_mask(id, &mask);
+       if (ret)
                 return ret;
  
+       if (val & ~mask)
+               /* Unsupported features and/or the higher 32 bits should not be set */
+               return -EINVAL;
+
         switch (id) {
-       case 2:
-               /* CPUCFG2 features checking */
-               if (val & ~mask)
-                       /* The unsupported features should not be set */
-                       ret = -EINVAL;
-               else if (!(val & CPUCFG2_LLFTP))
-                       /* The LLFTP must be set, as guest must has a constant timer */
-                       ret = -EINVAL;
-               else if ((val & CPUCFG2_FP) && (!(val & CPUCFG2_FPSP) || !(val & CPUCFG2_FPDP)))
-                       /* Single and double float point must both be set when enable FP */
-                       ret = -EINVAL;
-               else if ((val & CPUCFG2_LSX) && !(val & CPUCFG2_FP))
-                       /* FP should be set when enable LSX */
-                       ret = -EINVAL;
-               else if ((val & CPUCFG2_LASX) && !(val & CPUCFG2_LSX))
-                       /* LSX, FP should be set when enable LASX, and FP has been checked before. */
-                       ret = -EINVAL;
-               break;
+       case LOONGARCH_CPUCFG2:
+               if (!(val & CPUCFG2_LLFTP))
+                       /* Guests must have a constant timer */
+                       return -EINVAL;
+               if ((val & CPUCFG2_FP) && (!(val & CPUCFG2_FPSP) || !(val & CPUCFG2_FPDP)))
+                       /* Single and double float point must both be set when FP is enabled */
+                       return -EINVAL;
+               if ((val & CPUCFG2_LSX) && !(val & CPUCFG2_FP))
+                       /* LSX architecturally implies FP but val does not satisfy that */
+                       return -EINVAL;
+               if ((val & CPUCFG2_LASX) && !(val & CPUCFG2_LSX))
+                       /* LASX architecturally implies LSX and FP but val does not satisfy that */
+                       return -EINVAL;
+               return 0;
         default:
-               break;
+               /*
+                * Values for the other CPUCFG IDs are not being further validated
+                * besides the mask check above.
+                */
+               return 0;
         }
-       return ret;
  }
  
  static int kvm_get_one_reg(struct kvm_vcpu *vcpu,
@@ -566,7 +584,7 @@ static int kvm_loongarch_get_cpucfg_attr(struct kvm_vcpu *vcpu,
         uint64_t val;
         uint64_t __user *uaddr = (uint64_t __user *)attr->addr;
  
-       ret = _kvm_get_cpucfg(attr->attr, &val);
+       ret = _kvm_get_cpucfg_mask(attr->attr, &val);
         if (ret)
                 return ret;
  
diff --git a/arch/loongarch/mm/kasan_init.c b/arch/loongarch/mm/kasan_init.c

index cc3e81fe0186f4f0fa8de9cedfc75138583ce23f..c608adc9984581d0419594a8eb87ae18a3e9ec63 100644 (file)
--- a/arch/loongarch/mm/kasan_init.c
+++ b/arch/loongarch/mm/kasan_init.c
@@ -44,6 +44,9 @@ void *kasan_mem_to_shadow(const void *addr)
                 unsigned long xrange = (maddr >> XRANGE_SHIFT) & 0xffff;
                 unsigned long offset = 0;
  
+               if (maddr >= FIXADDR_START)
+                       return (void *)(kasan_early_shadow_page);
+
                 maddr &= XRANGE_SHADOW_MASK;
                 switch (xrange) {
                 case XKPRANGE_CC_SEG:
diff --git a/arch/loongarch/vdso/Makefile b/arch/loongarch/vdso/Makefile

index c74c9921304f2273fea31278cfafce7b143a75ea..f597cd08a96be0a19084884bd175678a6a83d6ab 100644 (file)
--- a/arch/loongarch/vdso/Makefile
+++ b/arch/loongarch/vdso/Makefile
@@ -2,6 +2,7 @@
  # Objects to go into the VDSO.
  
  KASAN_SANITIZE := n
+UBSAN_SANITIZE := n
  KCOV_INSTRUMENT := n
  
  # Include the generic Makefile to check the built vdso.
diff --git a/arch/m68k/Makefile b/arch/m68k/Makefile

index 43e39040d3ac6cd38a4bd4fc3dc04e03d5c71bf5..0abcf994ce5503e3e713f0ee2f1d563f978786a4 100644 (file)
--- a/arch/m68k/Makefile
+++ b/arch/m68k/Makefile
@@ -15,10 +15,10 @@
  KBUILD_DEFCONFIG := multi_defconfig
  
  ifdef cross_compiling
-       ifeq ($(CROSS_COMPILE),)
+    ifeq ($(CROSS_COMPILE),)
                 CROSS_COMPILE := $(call cc-cross-prefix, \
                         m68k-linux-gnu- m68k-linux- m68k-unknown-linux-gnu-)
-       endif
+    endif
  endif
  
  #
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig

index 797ae590ebdba505c313b448720c7207b29673f8..cf5bb1c756e696461d44d04b560b255bf01a796b 100644 (file)
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -1250,6 +1250,7 @@ config CPU_LOONGSON64
         select CPU_SUPPORTS_HIGHMEM
         select CPU_SUPPORTS_HUGEPAGES
         select CPU_SUPPORTS_MSA
+       select CPU_SUPPORTS_VZ
         select CPU_DIEI_BROKEN if !LOONGSON3_ENHANCEMENT
         select CPU_MIPSR2_IRQ_VI
         select DMA_NONCOHERENT
@@ -1261,7 +1262,6 @@ config CPU_LOONGSON64
         select MIPS_FP_SUPPORT
         select GPIOLIB
         select SWIOTLB
-       select HAVE_KVM
         help
           The Loongson GSx64(GS264/GS464/GS464E/GS464V) series of processor
           cores implements the MIPS64R2 instruction set with many extensions,
@@ -1374,7 +1374,6 @@ config CPU_MIPS32_R2
         select CPU_SUPPORTS_32BIT_KERNEL
         select CPU_SUPPORTS_HIGHMEM
         select CPU_SUPPORTS_MSA
-       select HAVE_KVM
         help
           Choose this option to build a kernel for release 2 or later of the
           MIPS32 architecture.  Most modern embedded systems with a 32-bit
@@ -1389,7 +1388,7 @@ config CPU_MIPS32_R5
         select CPU_SUPPORTS_32BIT_KERNEL
         select CPU_SUPPORTS_HIGHMEM
         select CPU_SUPPORTS_MSA
-       select HAVE_KVM
+       select CPU_SUPPORTS_VZ
         select MIPS_O32_FP64_SUPPORT
         help
           Choose this option to build a kernel for release 5 or later of the
@@ -1405,7 +1404,7 @@ config CPU_MIPS32_R6
         select CPU_SUPPORTS_32BIT_KERNEL
         select CPU_SUPPORTS_HIGHMEM
         select CPU_SUPPORTS_MSA
-       select HAVE_KVM
+       select CPU_SUPPORTS_VZ
         select MIPS_O32_FP64_SUPPORT
         help
           Choose this option to build a kernel for release 6 or later of the
@@ -1441,7 +1440,6 @@ config CPU_MIPS64_R2
         select CPU_SUPPORTS_HIGHMEM
         select CPU_SUPPORTS_HUGEPAGES
         select CPU_SUPPORTS_MSA
-       select HAVE_KVM
         help
           Choose this option to build a kernel for release 2 or later of the
           MIPS64 architecture.  Many modern embedded systems with a 64-bit
@@ -1459,7 +1457,7 @@ config CPU_MIPS64_R5
         select CPU_SUPPORTS_HUGEPAGES
         select CPU_SUPPORTS_MSA
         select MIPS_O32_FP64_SUPPORT if 32BIT || MIPS32_O32
-       select HAVE_KVM
+       select CPU_SUPPORTS_VZ
         help
           Choose this option to build a kernel for release 5 or later of the
           MIPS64 architecture.  This is a intermediate MIPS architecture
@@ -1477,7 +1475,7 @@ config CPU_MIPS64_R6
         select CPU_SUPPORTS_HUGEPAGES
         select CPU_SUPPORTS_MSA
         select MIPS_O32_FP64_SUPPORT if 32BIT || MIPS32_O32
-       select HAVE_KVM
+       select CPU_SUPPORTS_VZ
         help
           Choose this option to build a kernel for release 6 or later of the
           MIPS64 architecture.  New MIPS processors, starting with the Warrior
@@ -1492,9 +1490,9 @@ config CPU_P5600
         select CPU_SUPPORTS_HIGHMEM
         select CPU_SUPPORTS_MSA
         select CPU_SUPPORTS_CPUFREQ
+       select CPU_SUPPORTS_VZ
         select CPU_MIPSR2_IRQ_VI
         select CPU_MIPSR2_IRQ_EI
-       select HAVE_KVM
         select MIPS_O32_FP64_SUPPORT
         help
           Choose this option to build a kernel for MIPS Warrior P5600 CPU.
@@ -1614,7 +1612,7 @@ config CPU_CAVIUM_OCTEON
         select USB_EHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN
         select USB_OHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN
         select MIPS_L1_CACHE_SHIFT_7
-       select HAVE_KVM
+       select CPU_SUPPORTS_VZ
         help
           The Cavium Octeon processor is a highly integrated chip containing
           many ethernet hardware widgets for networking tasks. The processor
@@ -1969,6 +1967,8 @@ config CPU_SUPPORTS_ADDRWINCFG
  config CPU_SUPPORTS_HUGEPAGES
         bool
         depends on !(32BIT && (PHYS_ADDR_T_64BIT || EVA))
+config CPU_SUPPORTS_VZ
+       bool
  config MIPS_PGD_C0_CONTEXT
         bool
         depends on 64BIT
diff --git a/arch/mips/include/asm/checksum.h b/arch/mips/include/asm/checksum.h

index 4044eaf989ac7dad0f2094c5d4cfab05ac9fb5c3..0921ddda11a4b353c1c4d754417d3de4d003c12f 100644 (file)
--- a/arch/mips/include/asm/checksum.h
+++ b/arch/mips/include/asm/checksum.h
@@ -241,7 +241,8 @@ static __inline__ __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
         "       .set    pop"
         : "=&r" (sum), "=&r" (tmp)
         : "r" (saddr), "r" (daddr),
-         "0" (htonl(len)), "r" (htonl(proto)), "r" (sum));
+         "0" (htonl(len)), "r" (htonl(proto)), "r" (sum)
+       : "memory");
  
         return csum_fold(sum);
  }
diff --git a/arch/mips/include/asm/jump_label.h b/arch/mips/include/asm/jump_label.h

index 081be98c71ef48c698f4aa6ba14945239a666a9d..ff5d388502d4ab56ec28d71ad4126d542bb65977 100644 (file)
--- a/arch/mips/include/asm/jump_label.h
+++ b/arch/mips/include/asm/jump_label.h
@@ -39,7 +39,7 @@ extern void jump_label_apply_nops(struct module *mod);
  
  static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
  {
-       asm_volatile_goto("1:\t" B_INSN " 2f\n\t"
+       asm goto("1:\t" B_INSN " 2f\n\t"
                 "2:\t.insn\n\t"
                 ".pushsection __jump_table,  \"aw\"\n\t"
                 WORD_INSN " 1b, %l[l_yes], %0\n\t"
@@ -53,7 +53,7 @@ l_yes:
  
  static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
  {
-       asm_volatile_goto("1:\t" J_INSN " %l[l_yes]\n\t"
+       asm goto("1:\t" J_INSN " %l[l_yes]\n\t"
                 ".pushsection __jump_table,  \"aw\"\n\t"
                 WORD_INSN " 1b, %l[l_yes], %0\n\t"
                 ".popsection\n\t"
diff --git a/arch/mips/include/asm/ptrace.h b/arch/mips/include/asm/ptrace.h

index daf3cf244ea972c9a8bf134a09fa081931645425..d14d0e37ad02ddf10b42cfed590c65f97f8de424 100644 (file)
--- a/arch/mips/include/asm/ptrace.h
+++ b/arch/mips/include/asm/ptrace.h
@@ -60,6 +60,7 @@ static inline void instruction_pointer_set(struct pt_regs *regs,
                                             unsigned long val)
  {
         regs->cp0_epc = val;
+       regs->cp0_cause &= ~CAUSEF_BD;
  }
  
  /* Query offset/name of register from its name/offset */
@@ -154,6 +155,8 @@ static inline long regs_return_value(struct pt_regs *regs)
  }
  
  #define instruction_pointer(regs) ((regs)->cp0_epc)
+extern unsigned long exception_ip(struct pt_regs *regs);
+#define exception_ip(regs) exception_ip(regs)
  #define profile_pc(regs) instruction_pointer(regs)
  
  extern asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall);
diff --git a/arch/mips/include/uapi/asm/kvm.h b/arch/mips/include/uapi/asm/kvm.h

index edcf717c432717fce72096b1e5a2905c3f8c63ba..9673dc9cb31575a3553a4cf0e73d3e73ec24494f 100644 (file)
--- a/arch/mips/include/uapi/asm/kvm.h
+++ b/arch/mips/include/uapi/asm/kvm.h
@@ -20,8 +20,6 @@
   * Some parts derived from the x86 version of this file.
   */
  
-#define __KVM_HAVE_READONLY_MEM
-
  #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
  
  /*
diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c

index d9df543f7e2c4cd17b29522840154f1e323cacb0..59288c13b581b89ccb46214c7be02126a017dab2 100644 (file)
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -31,6 +31,7 @@
  #include <linux/seccomp.h>
  #include <linux/ftrace.h>
  
+#include <asm/branch.h>
  #include <asm/byteorder.h>
  #include <asm/cpu.h>
  #include <asm/cpu-info.h>
@@ -48,6 +49,12 @@
  #define CREATE_TRACE_POINTS
  #include <trace/events/syscalls.h>
  
+unsigned long exception_ip(struct pt_regs *regs)
+{
+       return exception_epc(regs);
+}
+EXPORT_SYMBOL(exception_ip);
+
  /*
   * Called by kernel/ptrace.c when detaching..
   *
diff --git a/arch/mips/kvm/Kconfig b/arch/mips/kvm/Kconfig

index 18e7a17d51158ee45901b4fda71f4996eb6d8a9a..ab57221fa4ddef542dde61284045de59cb9350af 100644 (file)
--- a/arch/mips/kvm/Kconfig
+++ b/arch/mips/kvm/Kconfig
@@ -17,7 +17,7 @@ if VIRTUALIZATION
  
  config KVM
         tristate "Kernel-based Virtual Machine (KVM) support"
-       depends on HAVE_KVM
+       depends on CPU_SUPPORTS_VZ
         depends on MIPS_FP_SUPPORT
         select EXPORT_UASM
         select KVM_COMMON
@@ -26,6 +26,7 @@ config KVM
         select KVM_MMIO
         select KVM_GENERIC_MMU_NOTIFIER
         select KVM_GENERIC_HARDWARE_ENABLING
+       select HAVE_KVM_READONLY_MEM
         help
           Support for hosting Guest kernels.
  
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig

index d14ccc948a29b920854b6c750febffac625619fd..5c845e8d59d92f8cd3594fccf1476503d8957149 100644 (file)
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -25,7 +25,6 @@ config PARISC
         select RTC_DRV_GENERIC
         select INIT_ALL_POSSIBLE
         select BUG
-       select BUILDTIME_TABLE_SORT
         select HAVE_KERNEL_UNCOMPRESSED
         select HAVE_PCI
         select HAVE_PERF_EVENTS
diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile

index 920db57b6b4cc866018c05dd00ca49142c7f949c..316f84f1d15c8f8c6e65dd3862dc5db1144f95bb 100644 (file)
--- a/arch/parisc/Makefile
+++ b/arch/parisc/Makefile
@@ -50,12 +50,12 @@ export CROSS32CC
  
  # Set default cross compiler for kernel build
  ifdef cross_compiling
-       ifeq ($(CROSS_COMPILE),)
+    ifeq ($(CROSS_COMPILE),)
                 CC_SUFFIXES = linux linux-gnu unknown-linux-gnu suse-linux
                 CROSS_COMPILE := $(call cc-cross-prefix, \
                         $(foreach a,$(CC_ARCHES), \
                         $(foreach s,$(CC_SUFFIXES),$(a)-$(s)-)))
-       endif
+    endif
  endif
  
  ifdef CONFIG_DYNAMIC_FTRACE
diff --git a/arch/parisc/include/asm/assembly.h b/arch/parisc/include/asm/assembly.h

index 74d17d7e759da9dfa89aa1a504b94de4554db16d..5937d5edaba1eac5a0c4e4b055c3e77fcbe3bf62 100644 (file)
--- a/arch/parisc/include/asm/assembly.h
+++ b/arch/parisc/include/asm/assembly.h
@@ -576,6 +576,7 @@
         .section __ex_table,"aw"                        !       \
         .align 4                                        !       \
         .word (fault_addr - .), (except_addr - .)       !       \
+       or %r0,%r0,%r0                                  !       \
         .previous
  
  
diff --git a/arch/parisc/include/asm/extable.h b/arch/parisc/include/asm/extable.h

new file mode 100644 (file)

index 0000000..4ea23e3
--- /dev/null
+++ b/arch/parisc/include/asm/extable.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PARISC_EXTABLE_H
+#define __PARISC_EXTABLE_H
+
+#include <asm/ptrace.h>
+#include <linux/compiler.h>
+
+/*
+ * The exception table consists of three addresses:
+ *
+ * - A relative address to the instruction that is allowed to fault.
+ * - A relative address at which the program should continue (fixup routine)
+ * - An asm statement which specifies which CPU register will
+ *   receive -EFAULT when an exception happens if the lowest bit in
+ *   the fixup address is set.
+ *
+ * Note: The register specified in the err_opcode instruction will be
+ * modified at runtime if a fault happens. Register %r0 will be ignored.
+ *
+ * Since relative addresses are used, 32bit values are sufficient even on
+ * 64bit kernel.
+ */
+
+struct pt_regs;
+int fixup_exception(struct pt_regs *regs);
+
+#define ARCH_HAS_RELATIVE_EXTABLE
+struct exception_table_entry {
+       int insn;       /* relative address of insn that is allowed to fault. */
+       int fixup;      /* relative address of fixup routine */
+       int err_opcode; /* sample opcode with register which holds error code */
+};
+
+#define ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr, opcode )\
+       ".section __ex_table,\"aw\"\n"                     \
+       ".align 4\n"                                       \
+       ".word (" #fault_addr " - .), (" #except_addr " - .)\n" \
+       opcode "\n"                                        \
+       ".previous\n"
+
+/*
+ * ASM_EXCEPTIONTABLE_ENTRY_EFAULT() creates a special exception table entry
+ * (with lowest bit set) for which the fault handler in fixup_exception() will
+ * load -EFAULT on fault into the register specified by the err_opcode instruction,
+ * and zeroes the target register in case of a read fault in get_user().
+ */
+#define ASM_EXCEPTIONTABLE_VAR(__err_var)              \
+       int __err_var = 0
+#define ASM_EXCEPTIONTABLE_ENTRY_EFAULT( fault_addr, except_addr, register )\
+       ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr + 1, "or %%r0,%%r0," register)
+
+static inline void swap_ex_entry_fixup(struct exception_table_entry *a,
+                                      struct exception_table_entry *b,
+                                      struct exception_table_entry tmp,
+                                      int delta)
+{
+       a->fixup = b->fixup + delta;
+       b->fixup = tmp.fixup - delta;
+       a->err_opcode = b->err_opcode;
+       b->err_opcode = tmp.err_opcode;
+}
+#define swap_ex_entry_fixup swap_ex_entry_fixup
+
+#endif
diff --git a/arch/parisc/include/asm/jump_label.h b/arch/parisc/include/asm/jump_label.h

index 94428798b6aa63e8d4b0878cc7555826cf080e47..317ebc5edc9fe99950f4efe55d989db453f46d0d 100644 (file)
--- a/arch/parisc/include/asm/jump_label.h
+++ b/arch/parisc/include/asm/jump_label.h
@@ -12,7 +12,7 @@
  
  static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
  {
-       asm_volatile_goto("1:\n\t"
+       asm goto("1:\n\t"
                  "nop\n\t"
                  ".pushsection __jump_table,  \"aw\"\n\t"
                  ".align %1\n\t"
@@ -29,7 +29,7 @@ l_yes:
  
  static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
  {
-       asm_volatile_goto("1:\n\t"
+       asm goto("1:\n\t"
                  "b,n %l[l_yes]\n\t"
                  ".pushsection __jump_table,  \"aw\"\n\t"
                  ".align %1\n\t"
diff --git a/arch/parisc/include/asm/kprobes.h b/arch/parisc/include/asm/kprobes.h

index 0a175ac876980c7c90b747bd8f8f34658499997a..0f42f5c8e3b66a8cbcf6f95a3312cb22f456cca8 100644 (file)
--- a/arch/parisc/include/asm/kprobes.h
+++ b/arch/parisc/include/asm/kprobes.h
@@ -10,9 +10,10 @@
  #ifndef _PARISC_KPROBES_H
  #define _PARISC_KPROBES_H
  
+#include <asm-generic/kprobes.h>
+
  #ifdef CONFIG_KPROBES
  
-#include <asm-generic/kprobes.h>
  #include <linux/types.h>
  #include <linux/ptrace.h>
  #include <linux/notifier.h>
diff --git a/arch/parisc/include/asm/special_insns.h b/arch/parisc/include/asm/special_insns.h

index c822bd0c0e3c6ccb86b4190d15500589c70f353a..51f40eaf7780659263f37b7c10fa7bd4ecf4ced7 100644 (file)
--- a/arch/parisc/include/asm/special_insns.h
+++ b/arch/parisc/include/asm/special_insns.h
@@ -8,7 +8,8 @@
                 "copy %%r0,%0\n"                        \
                 "8:\tlpa %%r0(%1),%0\n"                 \
                 "9:\n"                                  \
-               ASM_EXCEPTIONTABLE_ENTRY(8b, 9b)        \
+               ASM_EXCEPTIONTABLE_ENTRY(8b, 9b,        \
+                               "or %%r0,%%r0,%%r0")    \
                 : "=&r" (pa)                            \
                 : "r" (va)                              \
                 : "memory"                              \
@@ -22,7 +23,8 @@
                 "copy %%r0,%0\n"                        \
                 "8:\tlpa %%r0(%%sr3,%1),%0\n"           \
                 "9:\n"                                  \
-               ASM_EXCEPTIONTABLE_ENTRY(8b, 9b)        \
+               ASM_EXCEPTIONTABLE_ENTRY(8b, 9b,        \
+                               "or %%r0,%%r0,%%r0")    \
                 : "=&r" (pa)                            \
                 : "r" (va)                              \
                 : "memory"                              \
diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h

index 4165079898d9e7af239a31a1bc77821e6081706a..88d0ae5769dde54e29176e286da359eb6a54e7bf 100644 (file)
--- a/arch/parisc/include/asm/uaccess.h
+++ b/arch/parisc/include/asm/uaccess.h
@@ -7,6 +7,7 @@
   */
  #include <asm/page.h>
  #include <asm/cache.h>
+#include <asm/extable.h>
  
  #include <linux/bug.h>
  #include <linux/string.h>
@@ -26,37 +27,6 @@
  #define STD_USER(sr, x, ptr)   __put_user_asm(sr, "std", x, ptr)
  #endif
  
-/*
- * The exception table contains two values: the first is the relative offset to
- * the address of the instruction that is allowed to fault, and the second is
- * the relative offset to the address of the fixup routine. Since relative
- * addresses are used, 32bit values are sufficient even on 64bit kernel.
- */
-
-#define ARCH_HAS_RELATIVE_EXTABLE
-struct exception_table_entry {
-       int insn;       /* relative address of insn that is allowed to fault. */
-       int fixup;      /* relative address of fixup routine */
-};
-
-#define ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr )\
-       ".section __ex_table,\"aw\"\n"                     \
-       ".align 4\n"                                       \
-       ".word (" #fault_addr " - .), (" #except_addr " - .)\n\t" \
-       ".previous\n"
-
-/*
- * ASM_EXCEPTIONTABLE_ENTRY_EFAULT() creates a special exception table entry
- * (with lowest bit set) for which the fault handler in fixup_exception() will
- * load -EFAULT into %r29 for a read or write fault, and zeroes the target
- * register in case of a read fault in get_user().
- */
-#define ASM_EXCEPTIONTABLE_REG 29
-#define ASM_EXCEPTIONTABLE_VAR(__variable)             \
-       register long __variable __asm__ ("r29") = 0
-#define ASM_EXCEPTIONTABLE_ENTRY_EFAULT( fault_addr, except_addr )\
-       ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr + 1)
-
  #define __get_user_internal(sr, val, ptr)              \
  ({                                                     \
         ASM_EXCEPTIONTABLE_VAR(__gu_err);               \
@@ -83,7 +53,7 @@ struct exception_table_entry {
                                                         \
         __asm__("1: " ldx " 0(%%sr%2,%3),%0\n"          \
                 "9:\n"                                  \
-               ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \
+               ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b, "%1")   \
                 : "=r"(__gu_val), "+r"(__gu_err)        \
                 : "i"(sr), "r"(ptr));                   \
                                                         \
@@ -115,8 +85,8 @@ struct exception_table_entry {
                 "1: ldw 0(%%sr%2,%3),%0\n"              \
                 "2: ldw 4(%%sr%2,%3),%R0\n"             \
                 "9:\n"                                  \
-               ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \
-               ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b) \
+               ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b, "%1")   \
+               ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b, "%1")   \
                 : "=&r"(__gu_tmp.l), "+r"(__gu_err)     \
                 : "i"(sr), "r"(ptr));                   \
                                                         \
@@ -174,7 +144,7 @@ struct exception_table_entry {
         __asm__ __volatile__ (                                  \
                 "1: " stx " %1,0(%%sr%2,%3)\n"                  \
                 "9:\n"                                          \
-               ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b)         \
+               ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b, "%0")   \
                 : "+r"(__pu_err)                                \
                 : "r"(x), "i"(sr), "r"(ptr))
  
@@ -186,15 +156,14 @@ struct exception_table_entry {
                 "1: stw %1,0(%%sr%2,%3)\n"                      \
                 "2: stw %R1,4(%%sr%2,%3)\n"                     \
                 "9:\n"                                          \
-               ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b)         \
-               ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b)         \
+               ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b, "%0")   \
+               ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b, "%0")   \
                 : "+r"(__pu_err)                                \
                 : "r"(__val), "i"(sr), "r"(ptr));               \
  } while (0)
  
  #endif /* !defined(CONFIG_64BIT) */
  
-
  /*
   * Complex access routines -- external declarations
   */
@@ -216,7 +185,4 @@ unsigned long __must_check raw_copy_from_user(void *dst, const void __user *src,
  #define INLINE_COPY_TO_USER
  #define INLINE_COPY_FROM_USER
  
-struct pt_regs;
-int fixup_exception(struct pt_regs *regs);
-
  #endif /* __PARISC_UACCESS_H */
diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c

index 268d90a9325b468603b634b86b48980a31b4fba7..422f3e1e6d9cad718c264c7d7c9bd30872846555 100644 (file)
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -58,7 +58,7 @@ int pa_serialize_tlb_flushes __ro_after_init;
  
  struct pdc_cache_info cache_info __ro_after_init;
  #ifndef CONFIG_PA20
-struct pdc_btlb_info btlb_info __ro_after_init;
+struct pdc_btlb_info btlb_info;
  #endif
  
  DEFINE_STATIC_KEY_TRUE(parisc_has_cache);
@@ -264,6 +264,10 @@ parisc_cache_init(void)
         icache_stride = CAFL_STRIDE(cache_info.ic_conf);
  #undef CAFL_STRIDE
  
+       /* stride needs to be non-zero, otherwise cache flushes will not work */
+       WARN_ON(cache_info.dc_size && dcache_stride == 0);
+       WARN_ON(cache_info.ic_size && icache_stride == 0);
+
         if ((boot_cpu_data.pdc.capabilities & PDC_MODEL_NVA_MASK) ==
                                                 PDC_MODEL_NVA_UNSUPPORTED) {
                 printk(KERN_WARNING "parisc_cache_init: Only equivalent aliasing supported!\n");
@@ -850,7 +854,7 @@ SYSCALL_DEFINE3(cacheflush, unsigned long, addr, unsigned long, bytes,
  #endif
                         "   fic,m       %3(%4,%0)\n"
                         "2: sync\n"
-                       ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 2b)
+                       ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 2b, "%1")
                         : "+r" (start), "+r" (error)
                         : "r" (end), "r" (dcache_stride), "i" (SR_USER));
         }
@@ -865,7 +869,7 @@ SYSCALL_DEFINE3(cacheflush, unsigned long, addr, unsigned long, bytes,
  #endif
                         "   fdc,m       %3(%4,%0)\n"
                         "2: sync\n"
-                       ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 2b)
+                       ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 2b, "%1")
                         : "+r" (start), "+r" (error)
                         : "r" (end), "r" (icache_stride), "i" (SR_USER));
         }
diff --git a/arch/parisc/kernel/drivers.c b/arch/parisc/kernel/drivers.c

index 25f9b9e9d6dfbc70f21787e29a170334cb102dc7..c7ff339732ba5a762eac90e1b3072aef45c58318 100644 (file)
--- a/arch/parisc/kernel/drivers.c
+++ b/arch/parisc/kernel/drivers.c
@@ -742,7 +742,7 @@ parse_tree_node(struct device *parent, int index, struct hardware_path *modpath)
         };
  
         if (device_for_each_child(parent, &recurse_data, descend_children))
-               { /* nothing */ };
+               { /* nothing */ }
  
         return d.dev;
  }
@@ -1004,6 +1004,9 @@ static __init int qemu_print_iodc_data(struct device *lin_dev, void *data)
  
         pr_info("\n");
  
+       /* Prevent hung task messages when printing on serial console */
+       cond_resched();
+
         pr_info("#define HPA_%08lx_DESCRIPTION \"%s\"\n",
                 hpa, parisc_hardware_description(&dev->id));
  
diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c

index d1defb9ede70c0ae73e46363e850fc28ef91cebd..621a4b386ae4fcc90fa5e2ad9b7ac6b947fd903d 100644 (file)
--- a/arch/parisc/kernel/ftrace.c
+++ b/arch/parisc/kernel/ftrace.c
@@ -78,7 +78,7 @@ asmlinkage void notrace __hot ftrace_function_trampoline(unsigned long parent,
  #endif
  }
  
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+#if defined(CONFIG_DYNAMIC_FTRACE) && defined(CONFIG_FUNCTION_GRAPH_TRACER)
  int ftrace_enable_ftrace_graph_caller(void)
  {
         static_key_enable(&ftrace_graph_enable.key);
diff --git a/arch/parisc/kernel/processor.c b/arch/parisc/kernel/processor.c

index e95a977ba5f376eb813d4c7806d205a92f539880..bf73562706b2e8ec337bc8cde4b6fd9e5cd7f43e 100644 (file)
--- a/arch/parisc/kernel/processor.c
+++ b/arch/parisc/kernel/processor.c
@@ -172,7 +172,6 @@ static int __init processor_probe(struct parisc_device *dev)
         p->cpu_num = cpu_info.cpu_num;
         p->cpu_loc = cpu_info.cpu_loc;
  
-       set_cpu_possible(cpuid, true);
         store_cpu_topology(cpuid);
  
  #ifdef CONFIG_SMP
@@ -474,13 +473,6 @@ static struct parisc_driver cpu_driver __refdata = {
   */
  void __init processor_init(void)
  {
-       unsigned int cpu;
-
         reset_cpu_topology();
-
-       /* reset possible mask. We will mark those which are possible. */
-       for_each_possible_cpu(cpu)
-               set_cpu_possible(cpu, false);
-
         register_parisc_driver(&cpu_driver);
  }
diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c

index ce25acfe4889d0df8048e448a16d76e414ee1262..c520e551a165258609cba5e068037493bd7e57a8 100644 (file)
--- a/arch/parisc/kernel/unaligned.c
+++ b/arch/parisc/kernel/unaligned.c
@@ -120,8 +120,8 @@ static int emulate_ldh(struct pt_regs *regs, int toreg)
  "2:    ldbs    1(%%sr1,%3), %0\n"
  "      depw    %2, 23, 24, %0\n"
  "3:    \n"
-       ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b)
-       ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b)
+       ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%1")
+       ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%1")
         : "+r" (val), "+r" (ret), "=&r" (temp1)
         : "r" (saddr), "r" (regs->isr) );
  
@@ -152,8 +152,8 @@ static int emulate_ldw(struct pt_regs *regs, int toreg, int flop)
  "      mtctl   %2,11\n"
  "      vshd    %0,%3,%0\n"
  "3:    \n"
-       ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b)
-       ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b)
+       ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%1")
+       ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%1")
         : "+r" (val), "+r" (ret), "=&r" (temp1), "=&r" (temp2)
         : "r" (saddr), "r" (regs->isr) );
  
@@ -189,8 +189,8 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop)
  "      mtsar   %%r19\n"
  "      shrpd   %0,%%r20,%%sar,%0\n"
  "3:    \n"
-       ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b)
-       ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b)
+       ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%1")
+       ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%1")
         : "=r" (val), "+r" (ret)
         : "0" (val), "r" (saddr), "r" (regs->isr)
         : "r19", "r20" );
@@ -209,9 +209,9 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop)
  "      vshd    %0,%R0,%0\n"
  "      vshd    %R0,%4,%R0\n"
  "4:    \n"
-       ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 4b)
-       ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 4b)
-       ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 4b)
+       ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 4b, "%1")
+       ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 4b, "%1")
+       ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 4b, "%1")
         : "+r" (val), "+r" (ret), "+r" (saddr), "=&r" (shift), "=&r" (temp1)
         : "r" (regs->isr) );
      }
@@ -244,8 +244,8 @@ static int emulate_sth(struct pt_regs *regs, int frreg)
  "1:    stb %1, 0(%%sr1, %3)\n"
  "2:    stb %2, 1(%%sr1, %3)\n"
  "3:    \n"
-       ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b)
-       ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b)
+       ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%0")
+       ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%0")
         : "+r" (ret), "=&r" (temp1)
         : "r" (val), "r" (regs->ior), "r" (regs->isr) );
  
@@ -285,8 +285,8 @@ static int emulate_stw(struct pt_regs *regs, int frreg, int flop)
  "      stw     %%r20,0(%%sr1,%2)\n"
  "      stw     %%r21,4(%%sr1,%2)\n"
  "3:    \n"
-       ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b)
-       ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b)
+       ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%0")
+       ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%0")
         : "+r" (ret)
         : "r" (val), "r" (regs->ior), "r" (regs->isr)
         : "r19", "r20", "r21", "r22", "r1" );
@@ -329,10 +329,10 @@ static int emulate_std(struct pt_regs *regs, int frreg, int flop)
  "3:    std     %%r20,0(%%sr1,%2)\n"
  "4:    std     %%r21,8(%%sr1,%2)\n"
  "5:    \n"
-       ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 5b)
-       ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 5b)
-       ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 5b)
-       ASM_EXCEPTIONTABLE_ENTRY_EFAULT(4b, 5b)
+       ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 5b, "%0")
+       ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 5b, "%0")
+       ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 5b, "%0")
+       ASM_EXCEPTIONTABLE_ENTRY_EFAULT(4b, 5b, "%0")
         : "+r" (ret)
         : "r" (val), "r" (regs->ior), "r" (regs->isr)
         : "r19", "r20", "r21", "r22", "r1" );
@@ -357,11 +357,11 @@ static int emulate_std(struct pt_regs *regs, int frreg, int flop)
  "4:    stw     %%r1,4(%%sr1,%2)\n"
  "5:    stw     %R1,8(%%sr1,%2)\n"
  "6:    \n"
-       ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 6b)
-       ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 6b)
-       ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 6b)
-       ASM_EXCEPTIONTABLE_ENTRY_EFAULT(4b, 6b)
-       ASM_EXCEPTIONTABLE_ENTRY_EFAULT(5b, 6b)
+       ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 6b, "%0")
+       ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 6b, "%0")
+       ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 6b, "%0")
+       ASM_EXCEPTIONTABLE_ENTRY_EFAULT(4b, 6b, "%0")
+       ASM_EXCEPTIONTABLE_ENTRY_EFAULT(5b, 6b, "%0")
         : "+r" (ret)
         : "r" (val), "r" (regs->ior), "r" (regs->isr)
         : "r19", "r20", "r21", "r1" );
diff --git a/arch/parisc/kernel/unwind.c b/arch/parisc/kernel/unwind.c

index 27ae40a443b80c5fa575e8579bca7f08ef6d36ab..f7e0fee5ee55a3e055679e75b06c280679b603ad 100644 (file)
--- a/arch/parisc/kernel/unwind.c
+++ b/arch/parisc/kernel/unwind.c
@@ -228,10 +228,8 @@ static int unwind_special(struct unwind_frame_info *info, unsigned long pc, int
  #ifdef CONFIG_IRQSTACKS
         extern void * const _call_on_stack;
  #endif /* CONFIG_IRQSTACKS */
-       void *ptr;
  
-       ptr = dereference_kernel_function_descriptor(&handle_interruption);
-       if (pc_is_kernel_fn(pc, ptr)) {
+       if (pc_is_kernel_fn(pc, handle_interruption)) {
                 struct pt_regs *regs = (struct pt_regs *)(info->sp - frame_size - PT_SZ_ALGN);
                 dbg("Unwinding through handle_interruption()\n");
                 info->prev_sp = regs->gr[30];
@@ -239,13 +237,13 @@ static int unwind_special(struct unwind_frame_info *info, unsigned long pc, int
                 return 1;
         }
  
-       if (pc_is_kernel_fn(pc, ret_from_kernel_thread) ||
-           pc_is_kernel_fn(pc, syscall_exit)) {
+       if (pc == (unsigned long)&ret_from_kernel_thread ||
+           pc == (unsigned long)&syscall_exit) {
                 info->prev_sp = info->prev_ip = 0;
                 return 1;
         }
  
-       if (pc_is_kernel_fn(pc, intr_return)) {
+       if (pc == (unsigned long)&intr_return) {
                 struct pt_regs *regs;
  
                 dbg("Found intr_return()\n");
@@ -257,14 +255,14 @@ static int unwind_special(struct unwind_frame_info *info, unsigned long pc, int
         }
  
         if (pc_is_kernel_fn(pc, _switch_to) ||
-           pc_is_kernel_fn(pc, _switch_to_ret)) {
+           pc == (unsigned long)&_switch_to_ret) {
                 info->prev_sp = info->sp - CALLEE_SAVE_FRAME_SIZE;
                 info->prev_ip = *(unsigned long *)(info->prev_sp - RP_OFFSET);
                 return 1;
         }
  
  #ifdef CONFIG_IRQSTACKS
-       if (pc_is_kernel_fn(pc, _call_on_stack)) {
+       if (pc == (unsigned long)&_call_on_stack) {
                 info->prev_sp = *(unsigned long *)(info->sp - FRAME_SIZE - REG_SZ);
                 info->prev_ip = *(unsigned long *)(info->sp - FRAME_SIZE - RP_OFFSET);
                 return 1;
diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S

index 548051b0b4aff692741847a04b09208d1e68d279..b445e47903cfd0b813035c2056f11a4f818cf6d2 100644 (file)
--- a/arch/parisc/kernel/vmlinux.lds.S
+++ b/arch/parisc/kernel/vmlinux.lds.S
@@ -127,7 +127,7 @@ SECTIONS
         }
  #endif
  
-       RO_DATA(8)
+       RO_DATA(PAGE_SIZE)
  
         /* unwind info */
         . = ALIGN(4);
diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c

index 2fe5b44986e0924e3981ebc1edb9d074c08e6fda..c39de84e98b05172bdec0f474261ccde4a06cf00 100644 (file)
--- a/arch/parisc/mm/fault.c
+++ b/arch/parisc/mm/fault.c
@@ -150,11 +150,16 @@ int fixup_exception(struct pt_regs *regs)
                  * Fix up get_user() and put_user().
                  * ASM_EXCEPTIONTABLE_ENTRY_EFAULT() sets the least-significant
                  * bit in the relative address of the fixup routine to indicate
-                * that gr[ASM_EXCEPTIONTABLE_REG] should be loaded with
-                * -EFAULT to report a userspace access error.
+                * that the register encoded in the "or %r0,%r0,register"
+                * opcode should be loaded with -EFAULT to report a userspace
+                * access error.
                  */
                 if (fix->fixup & 1) {
-                       regs->gr[ASM_EXCEPTIONTABLE_REG] = -EFAULT;
+                       int fault_error_reg = fix->err_opcode & 0x1f;
+                       if (!WARN_ON(!fault_error_reg))
+                               regs->gr[fault_error_reg] = -EFAULT;
+                       pr_debug("Unalignment fixup of register %d at %pS\n",
+                               fault_error_reg, (void*)regs->iaoq[0]);
  
                         /* zero target register for get_user() */
                         if (parisc_acctyp(0, regs->iir) == VM_READ) {
diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h

index 1ebd2ca97f1201f1e760fcb46fe4c5e043727333..107fc5a484569673af80bd58e2e5980ae4082fab 100644 (file)
--- a/arch/powerpc/include/asm/ftrace.h
+++ b/arch/powerpc/include/asm/ftrace.h
@@ -20,14 +20,6 @@
  #ifndef __ASSEMBLY__
  extern void _mcount(void);
  
-static inline unsigned long ftrace_call_adjust(unsigned long addr)
-{
-       if (IS_ENABLED(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY))
-               addr += MCOUNT_INSN_SIZE;
-
-       return addr;
-}
-
  unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip,
                                     unsigned long sp);
  
@@ -142,8 +134,10 @@ static inline u8 this_cpu_get_ftrace_enabled(void) { return 1; }
  #ifdef CONFIG_FUNCTION_TRACER
  extern unsigned int ftrace_tramp_text[], ftrace_tramp_init[];
  void ftrace_free_init_tramp(void);
+unsigned long ftrace_call_adjust(unsigned long addr);
  #else
  static inline void ftrace_free_init_tramp(void) { }
+static inline unsigned long ftrace_call_adjust(unsigned long addr) { return addr; }
  #endif
  #endif /* !__ASSEMBLY__ */
  
diff --git a/arch/powerpc/include/asm/jump_label.h b/arch/powerpc/include/asm/jump_label.h

index 93ce3ec253877d38da5e3f9c3ac76205354d3496..2f2a86ed2280aac66df0535d7938cf4a673446f7 100644 (file)
--- a/arch/powerpc/include/asm/jump_label.h
+++ b/arch/powerpc/include/asm/jump_label.h
@@ -17,7 +17,7 @@
  
  static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
  {
-       asm_volatile_goto("1:\n\t"
+       asm goto("1:\n\t"
                  "nop # arch_static_branch\n\t"
                  ".pushsection __jump_table,  \"aw\"\n\t"
                  ".long 1b - ., %l[l_yes] - .\n\t"
@@ -32,7 +32,7 @@ l_yes:
  
  static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
  {
-       asm_volatile_goto("1:\n\t"
+       asm goto("1:\n\t"
                  "b %l[l_yes] # arch_static_branch_jump\n\t"
                  ".pushsection __jump_table,  \"aw\"\n\t"
                  ".long 1b - ., %l[l_yes] - .\n\t"
diff --git a/arch/powerpc/include/asm/papr-sysparm.h b/arch/powerpc/include/asm/papr-sysparm.h

index 0dbbff59101d6f31d4ac9f59c2fa74a7d4e90cae..c3cd5b131033eb3ef5c8fc1e99f0a8da0b78157a 100644 (file)
--- a/arch/powerpc/include/asm/papr-sysparm.h
+++ b/arch/powerpc/include/asm/papr-sysparm.h
@@ -32,7 +32,7 @@ typedef struct {
   */
  struct papr_sysparm_buf {
         __be16 len;
-       char val[PAPR_SYSPARM_MAX_OUTPUT];
+       u8 val[PAPR_SYSPARM_MAX_OUTPUT];
  };
  
  struct papr_sysparm_buf *papr_sysparm_buf_alloc(void);
diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h

index ce2b1b5eebddcf5eb2e84b5e8853f89cf06501a6..a8b7e8682f5bd6c58ff9faa31a152a31e1b5280d 100644 (file)
--- a/arch/powerpc/include/asm/ppc-pci.h
+++ b/arch/powerpc/include/asm/ppc-pci.h
@@ -30,6 +30,16 @@ void *pci_traverse_device_nodes(struct device_node *start,
                                 void *data);
  extern void pci_devs_phb_init_dynamic(struct pci_controller *phb);
  
+#if defined(CONFIG_IOMMU_API) && (defined(CONFIG_PPC_PSERIES) || \
+                                 defined(CONFIG_PPC_POWERNV))
+extern void ppc_iommu_register_device(struct pci_controller *phb);
+extern void ppc_iommu_unregister_device(struct pci_controller *phb);
+#else
+static inline void ppc_iommu_register_device(struct pci_controller *phb) { }
+static inline void ppc_iommu_unregister_device(struct pci_controller *phb) { }
+#endif
+
+
  /* From rtas_pci.h */
  extern void init_pci_config_tokens (void);
  extern unsigned long get_phb_buid (struct device_node *);
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h

index 7fd09f25452d4f697728f7958a9ad5b277f73a50..bb47af9054a9545f54bf5080a14bde235ef29a30 100644 (file)
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -617,6 +617,8 @@
  #endif
  #define SPRN_HID2      0x3F8           /* Hardware Implementation Register 2 */
  #define SPRN_HID2_GEKKO        0x398           /* Gekko HID2 Register */
+#define SPRN_HID2_G2_LE        0x3F3           /* G2_LE HID2 Register */
+#define  HID2_G2_LE_HBE        (1<<18)         /* High BAT Enable (G2_LE) */
  #define SPRN_IABR      0x3F2   /* Instruction Address Breakpoint Register */
  #define SPRN_IABR2     0x3FA           /* 83xx */
  #define SPRN_IBCR      0x135           /* 83xx Insn Breakpoint Control Reg */
diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h

index 9bb2210c8d4417a4262aab81d68d851e175b77b4..065ffd1b2f8adaef8369846531bf4e6f78159b57 100644 (file)
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -69,7 +69,7 @@ enum rtas_function_index {
         RTAS_FNIDX__IBM_READ_SLOT_RESET_STATE,
         RTAS_FNIDX__IBM_READ_SLOT_RESET_STATE2,
         RTAS_FNIDX__IBM_REMOVE_PE_DMA_WINDOW,
-       RTAS_FNIDX__IBM_RESET_PE_DMA_WINDOWS,
+       RTAS_FNIDX__IBM_RESET_PE_DMA_WINDOW,
         RTAS_FNIDX__IBM_SCAN_LOG_DUMP,
         RTAS_FNIDX__IBM_SET_DYNAMIC_INDICATOR,
         RTAS_FNIDX__IBM_SET_EEH_OPTION,
@@ -164,7 +164,7 @@ typedef struct {
  #define RTAS_FN_IBM_READ_SLOT_RESET_STATE         rtas_fn_handle(RTAS_FNIDX__IBM_READ_SLOT_RESET_STATE)
  #define RTAS_FN_IBM_READ_SLOT_RESET_STATE2        rtas_fn_handle(RTAS_FNIDX__IBM_READ_SLOT_RESET_STATE2)
  #define RTAS_FN_IBM_REMOVE_PE_DMA_WINDOW          rtas_fn_handle(RTAS_FNIDX__IBM_REMOVE_PE_DMA_WINDOW)
-#define RTAS_FN_IBM_RESET_PE_DMA_WINDOWS          rtas_fn_handle(RTAS_FNIDX__IBM_RESET_PE_DMA_WINDOWS)
+#define RTAS_FN_IBM_RESET_PE_DMA_WINDOW           rtas_fn_handle(RTAS_FNIDX__IBM_RESET_PE_DMA_WINDOW)
  #define RTAS_FN_IBM_SCAN_LOG_DUMP                 rtas_fn_handle(RTAS_FNIDX__IBM_SCAN_LOG_DUMP)
  #define RTAS_FN_IBM_SET_DYNAMIC_INDICATOR         rtas_fn_handle(RTAS_FNIDX__IBM_SET_DYNAMIC_INDICATOR)
  #define RTAS_FN_IBM_SET_EEH_OPTION                rtas_fn_handle(RTAS_FNIDX__IBM_SET_EEH_OPTION)
diff --git a/arch/powerpc/include/asm/sections.h b/arch/powerpc/include/asm/sections.h

index ea26665f82cfc833a93b87f23ae2b196eb5a4180..f43f3a6b0051cf24bd76428987366d1fcdf5de5d 100644 (file)
--- a/arch/powerpc/include/asm/sections.h
+++ b/arch/powerpc/include/asm/sections.h
@@ -14,6 +14,7 @@ typedef struct func_desc func_desc_t;
  
  extern char __head_end[];
  extern char __srwx_boundary[];
+extern char __exittext_begin[], __exittext_end[];
  
  /* Patch sites */
  extern s32 patch__call_flush_branch_caches1;
diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h

index bf5dde1a411471fcc95d4503dfb41d3881aad9fe..15c5691dd218440d32142779a2a1e2ce5058d60c 100644 (file)
--- a/arch/powerpc/include/asm/thread_info.h
+++ b/arch/powerpc/include/asm/thread_info.h
@@ -14,7 +14,7 @@
  
  #ifdef __KERNEL__
  
-#ifdef CONFIG_KASAN
+#if defined(CONFIG_KASAN) && CONFIG_THREAD_SHIFT < 15
  #define MIN_THREAD_SHIFT       (CONFIG_THREAD_SHIFT + 1)
  #else
  #define MIN_THREAD_SHIFT       CONFIG_THREAD_SHIFT
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h

index f1f9890f50d3ef84dfd62b5d66db68315f0698b6..de10437fd20652ee63a6d214638bded13cdbc6c3 100644 (file)
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -74,7 +74,7 @@ __pu_failed:                                                  \
  /* -mprefixed can generate offsets beyond range, fall back hack */
  #ifdef CONFIG_PPC_KERNEL_PREFIXED
  #define __put_user_asm_goto(x, addr, label, op)                        \
-       asm_volatile_goto(                                      \
+       asm goto(                                       \
                 "1:     " op " %0,0(%1) # put_user\n"           \
                 EX_TABLE(1b, %l2)                               \
                 :                                               \
@@ -83,7 +83,7 @@ __pu_failed:                                                  \
                 : label)
  #else
  #define __put_user_asm_goto(x, addr, label, op)                        \
-       asm_volatile_goto(                                      \
+       asm goto(                                       \
                 "1:     " op "%U1%X1 %0,%1      # put_user\n"   \
                 EX_TABLE(1b, %l2)                               \
                 :                                               \
@@ -97,7 +97,7 @@ __pu_failed:                                                  \
         __put_user_asm_goto(x, ptr, label, "std")
  #else /* __powerpc64__ */
  #define __put_user_asm2_goto(x, addr, label)                   \
-       asm_volatile_goto(                                      \
+       asm goto(                                       \
                 "1:     stw%X1 %0, %1\n"                        \
                 "2:     stw%X1 %L0, %L1\n"                      \
                 EX_TABLE(1b, %l2)                               \
@@ -146,7 +146,7 @@ do {                                                                \
  /* -mprefixed can generate offsets beyond range, fall back hack */
  #ifdef CONFIG_PPC_KERNEL_PREFIXED
  #define __get_user_asm_goto(x, addr, label, op)                        \
-       asm_volatile_goto(                                      \
+       asm_goto_output(                                        \
                 "1:     "op" %0,0(%1)   # get_user\n"           \
                 EX_TABLE(1b, %l2)                               \
                 : "=r" (x)                                      \
@@ -155,7 +155,7 @@ do {                                                                \
                 : label)
  #else
  #define __get_user_asm_goto(x, addr, label, op)                        \
-       asm_volatile_goto(                                      \
+       asm_goto_output(                                        \
                 "1:     "op"%U1%X1 %0, %1       # get_user\n"   \
                 EX_TABLE(1b, %l2)                               \
                 : "=r" (x)                                      \
@@ -169,7 +169,7 @@ do {                                                                \
         __get_user_asm_goto(x, addr, label, "ld")
  #else /* __powerpc64__ */
  #define __get_user_asm2_goto(x, addr, label)                   \
-       asm_volatile_goto(                                      \
+       asm_goto_output(                                        \
                 "1:     lwz%X1 %0, %1\n"                        \
                 "2:     lwz%X1 %L0, %L1\n"                      \
                 EX_TABLE(1b, %l2)                               \
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h

index 9f18fa090f1f1d08179cba6f39f7b832bbd7b95b..1691297a766a9c1a4df9384c4ff02ecd8ce21b92 100644 (file)
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -28,7 +28,6 @@
  #define __KVM_HAVE_PPC_SMT
  #define __KVM_HAVE_IRQCHIP
  #define __KVM_HAVE_IRQ_LINE
-#define __KVM_HAVE_GUEST_DEBUG
  
  /* Not always available, but if it is, this is the correct offset.  */
  #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
@@ -733,4 +732,48 @@ struct kvm_ppc_xive_eq {
  #define KVM_XIVE_TIMA_PAGE_OFFSET      0
  #define KVM_XIVE_ESB_PAGE_OFFSET       4
  
+/* for KVM_PPC_GET_PVINFO */
+
+#define KVM_PPC_PVINFO_FLAGS_EV_IDLE   (1<<0)
+
+struct kvm_ppc_pvinfo {
+       /* out */
+       __u32 flags;
+       __u32 hcall[4];
+       __u8  pad[108];
+};
+
+/* for KVM_PPC_GET_SMMU_INFO */
+#define KVM_PPC_PAGE_SIZES_MAX_SZ      8
+
+struct kvm_ppc_one_page_size {
+       __u32 page_shift;       /* Page shift (or 0) */
+       __u32 pte_enc;          /* Encoding in the HPTE (>>12) */
+};
+
+struct kvm_ppc_one_seg_page_size {
+       __u32 page_shift;       /* Base page shift of segment (or 0) */
+       __u32 slb_enc;          /* SLB encoding for BookS */
+       struct kvm_ppc_one_page_size enc[KVM_PPC_PAGE_SIZES_MAX_SZ];
+};
+
+#define KVM_PPC_PAGE_SIZES_REAL                0x00000001
+#define KVM_PPC_1T_SEGMENTS            0x00000002
+#define KVM_PPC_NO_HASH                        0x00000004
+
+struct kvm_ppc_smmu_info {
+       __u64 flags;
+       __u32 slb_size;
+       __u16 data_keys;        /* # storage keys supported for data */
+       __u16 instr_keys;       /* # storage keys supported for instructions */
+       struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ];
+};
+
+/* for KVM_PPC_RESIZE_HPT_{PREPARE,COMMIT} */
+struct kvm_ppc_resize_hpt {
+       __u64 flags;
+       __u32 shift;
+       __u32 pad;
+};
+
  #endif /* __LINUX_KVM_POWERPC_H */
diff --git a/arch/powerpc/include/uapi/asm/papr-sysparm.h b/arch/powerpc/include/uapi/asm/papr-sysparm.h

index 9f9a0f267ea57c2593448bcfbd0af4f4f0582f08..f733467b1534eb9bf3dad20042b06afd85ab8f41 100644 (file)
--- a/arch/powerpc/include/uapi/asm/papr-sysparm.h
+++ b/arch/powerpc/include/uapi/asm/papr-sysparm.h
@@ -14,7 +14,7 @@ enum {
  struct papr_sysparm_io_block {
         __u32 parameter;
         __u16 length;
-       char data[PAPR_SYSPARM_MAX_OUTPUT];
+       __u8 data[PAPR_SYSPARM_MAX_OUTPUT];
  };
  
  /**
diff --git a/arch/powerpc/kernel/cpu_setup_6xx.S b/arch/powerpc/kernel/cpu_setup_6xx.S

index f29ce3dd6140f40c026a0d8f67e79858c86dabd9..bfd3f442e5eb9dfcb851f1b1c5b68e690f1702ae 100644 (file)
--- a/arch/powerpc/kernel/cpu_setup_6xx.S
+++ b/arch/powerpc/kernel/cpu_setup_6xx.S
@@ -26,6 +26,15 @@ BEGIN_FTR_SECTION
         bl      __init_fpu_registers
  END_FTR_SECTION_IFCLR(CPU_FTR_FPU_UNAVAILABLE)
         bl      setup_common_caches
+
+       /*
+        * This assumes that all cores using __setup_cpu_603 with
+        * MMU_FTR_USE_HIGH_BATS are G2_LE compatible
+        */
+BEGIN_MMU_FTR_SECTION
+       bl      setup_g2_le_hid2
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
+
         mtlr    r5
         blr
  _GLOBAL(__setup_cpu_604)
@@ -115,6 +124,16 @@ SYM_FUNC_START_LOCAL(setup_604_hid0)
         blr
  SYM_FUNC_END(setup_604_hid0)
  
+/* Enable high BATs for G2_LE and derivatives like e300cX */
+SYM_FUNC_START_LOCAL(setup_g2_le_hid2)
+       mfspr   r11,SPRN_HID2_G2_LE
+       oris    r11,r11,HID2_G2_LE_HBE@h
+       mtspr   SPRN_HID2_G2_LE,r11
+       sync
+       isync
+       blr
+SYM_FUNC_END(setup_g2_le_hid2)
+
  /* 7400 <= rev 2.7 and 7410 rev = 1.0 suffer from some
   * erratas we work around here.
   * Moto MPC710CE.pdf describes them, those are errata
@@ -495,4 +514,3 @@ _GLOBAL(__restore_cpu_setup)
         mtcr    r7
         blr
  _ASM_NOKPROBE_SYMBOL(__restore_cpu_setup)
-
diff --git a/arch/powerpc/kernel/cpu_specs_e500mc.h b/arch/powerpc/kernel/cpu_specs_e500mc.h

index ceb06b109f831355a833a0e929ef68d86ccbc321..2ae8e9a7b461c8c35755bd4ac0ab48e29a4daf79 100644 (file)
--- a/arch/powerpc/kernel/cpu_specs_e500mc.h
+++ b/arch/powerpc/kernel/cpu_specs_e500mc.h
@@ -8,7 +8,8 @@
  
  #ifdef CONFIG_PPC64
  #define COMMON_USER_BOOKE      (PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | \
-                                PPC_FEATURE_HAS_FPU | PPC_FEATURE_64)
+                                PPC_FEATURE_HAS_FPU | PPC_FEATURE_64 | \
+                                PPC_FEATURE_BOOKE)
  #else
  #define COMMON_USER_BOOKE      (PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | \
                                  PPC_FEATURE_BOOKE)
diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S

index bd863702d81218d80e61c73e469d448f963eb265..1ad059a9e2fef3da806514bc35158966d626072b 100644 (file)
--- a/arch/powerpc/kernel/interrupt_64.S
+++ b/arch/powerpc/kernel/interrupt_64.S
@@ -52,7 +52,8 @@ _ASM_NOKPROBE_SYMBOL(system_call_vectored_\name)
         mr      r10,r1
         ld      r1,PACAKSAVE(r13)
         std     r10,0(r1)
-       std     r11,_NIP(r1)
+       std     r11,_LINK(r1)
+       std     r11,_NIP(r1)    /* Saved LR is also the next instruction */
         std     r12,_MSR(r1)
         std     r0,GPR0(r1)
         std     r10,GPR1(r1)
@@ -70,7 +71,6 @@ _ASM_NOKPROBE_SYMBOL(system_call_vectored_\name)
         std     r9,GPR13(r1)
         SAVE_NVGPRS(r1)
         std     r11,_XER(r1)
-       std     r11,_LINK(r1)
         std     r11,_CTR(r1)
  
         li      r11,\trapnr
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c

index ebe259bdd46298e0654fb681b0cf8853c8381079..1185efebf032b6e7d2cf08db4c953938948a44b1 100644 (file)
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -1287,20 +1287,22 @@ spapr_tce_platform_iommu_attach_dev(struct iommu_domain *platform_domain,
         struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
         struct iommu_group *grp = iommu_group_get(dev);
         struct iommu_table_group *table_group;
-       int ret = -EINVAL;
  
         /* At first attach the ownership is already set */
-       if (!domain)
+       if (!domain) {
+               iommu_group_put(grp);
                 return 0;
-
-       if (!grp)
-               return -ENODEV;
+       }
  
         table_group = iommu_group_get_iommudata(grp);
-       ret = table_group->ops->take_ownership(table_group);
+       /*
+        * The domain being set to PLATFORM from earlier
+        * BLOCKED. The table_group ownership has to be released.
+        */
+       table_group->ops->release_ownership(table_group);
         iommu_group_put(grp);
  
-       return ret;
+       return 0;
  }
  
  static const struct iommu_domain_ops spapr_tce_platform_domain_ops = {
@@ -1312,13 +1314,32 @@ static struct iommu_domain spapr_tce_platform_domain = {
         .ops = &spapr_tce_platform_domain_ops,
  };
  
-static struct iommu_domain spapr_tce_blocked_domain = {
-       .type = IOMMU_DOMAIN_BLOCKED,
+static int
+spapr_tce_blocked_iommu_attach_dev(struct iommu_domain *platform_domain,
+                                    struct device *dev)
+{
+       struct iommu_group *grp = iommu_group_get(dev);
+       struct iommu_table_group *table_group;
+       int ret = -EINVAL;
+
         /*
          * FIXME: SPAPR mixes blocked and platform behaviors, the blocked domain
          * also sets the dma_api ops
          */
-       .ops = &spapr_tce_platform_domain_ops,
+       table_group = iommu_group_get_iommudata(grp);
+       ret = table_group->ops->take_ownership(table_group);
+       iommu_group_put(grp);
+
+       return ret;
+}
+
+static const struct iommu_domain_ops spapr_tce_blocked_domain_ops = {
+       .attach_dev = spapr_tce_blocked_iommu_attach_dev,
+};
+
+static struct iommu_domain spapr_tce_blocked_domain = {
+       .type = IOMMU_DOMAIN_BLOCKED,
+       .ops = &spapr_tce_blocked_domain_ops,
  };
  
  static bool spapr_tce_iommu_capable(struct device *dev, enum iommu_cap cap)
@@ -1339,7 +1360,7 @@ static struct iommu_device *spapr_tce_iommu_probe_device(struct device *dev)
         struct pci_controller *hose;
  
         if (!dev_is_pci(dev))
-               return ERR_PTR(-EPERM);
+               return ERR_PTR(-ENODEV);
  
         pdev = to_pci_dev(dev);
         hose = pdev->bus->sysdata;
@@ -1388,6 +1409,21 @@ static const struct attribute_group *spapr_tce_iommu_groups[] = {
         NULL,
  };
  
+void ppc_iommu_register_device(struct pci_controller *phb)
+{
+       iommu_device_sysfs_add(&phb->iommu, phb->parent,
+                               spapr_tce_iommu_groups, "iommu-phb%04x",
+                               phb->global_number);
+       iommu_device_register(&phb->iommu, &spapr_tce_iommu_ops,
+                               phb->parent);
+}
+
+void ppc_iommu_unregister_device(struct pci_controller *phb)
+{
+       iommu_device_unregister(&phb->iommu);
+       iommu_device_sysfs_remove(&phb->iommu);
+}
+
  /*
   * This registers IOMMU devices of PHBs. This needs to happen
   * after core_initcall(iommu_init) + postcore_initcall(pci_driver_init) and
@@ -1398,11 +1434,7 @@ static int __init spapr_tce_setup_phb_iommus_initcall(void)
         struct pci_controller *hose;
  
         list_for_each_entry(hose, &hose_list, list_node) {
-               iommu_device_sysfs_add(&hose->iommu, hose->parent,
-                                      spapr_tce_iommu_groups, "iommu-phb%04x",
-                                      hose->global_number);
-               iommu_device_register(&hose->iommu, &spapr_tce_iommu_ops,
-                                     hose->parent);
+               ppc_iommu_register_device(hose);
         }
         return 0;
  }
diff --git a/arch/powerpc/kernel/irq_64.c b/arch/powerpc/kernel/irq_64.c

index 938e66829eae65cc52d170f7753ee0685cdaa4e3..d5c48d1b0a31ea533281934e414320bbf77368d2 100644 (file)
--- a/arch/powerpc/kernel/irq_64.c
+++ b/arch/powerpc/kernel/irq_64.c
@@ -230,7 +230,7 @@ again:
          * This allows interrupts to be unmasked without hard disabling, and
          * also without new hard interrupts coming in ahead of pending ones.
          */
-       asm_volatile_goto(
+       asm goto(
  "1:                                    \n"
  "              lbz     9,%0(13)        \n"
  "              cmpwi   9,0             \n"
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c

index 7e793b503e29f1ff878e7289c8703e7c4cf20edc..8064d9c3de8620d27d9c87f829676ef048aeed40 100644 (file)
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -375,8 +375,13 @@ static struct rtas_function rtas_function_table[] __ro_after_init = {
         [RTAS_FNIDX__IBM_REMOVE_PE_DMA_WINDOW] = {
                 .name = "ibm,remove-pe-dma-window",
         },
-       [RTAS_FNIDX__IBM_RESET_PE_DMA_WINDOWS] = {
-               .name = "ibm,reset-pe-dma-windows",
+       [RTAS_FNIDX__IBM_RESET_PE_DMA_WINDOW] = {
+               /*
+                * Note: PAPR+ v2.13 7.3.31.4.1 spells this as
+                * "ibm,reset-pe-dma-windows" (plural), but RTAS
+                * implementations use the singular form in practice.
+                */
+               .name = "ibm,reset-pe-dma-window",
         },
         [RTAS_FNIDX__IBM_SCAN_LOG_DUMP] = {
                 .name = "ibm,scan-log-dump",
diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c

index 82010629cf887ca1753d4f64bfa5916cdc1d7b48..d8d6b4fd9a14cbf8f8f93e499500eed11190be71 100644 (file)
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -27,10 +27,22 @@
  #include <asm/ftrace.h>
  #include <asm/syscall.h>
  #include <asm/inst.h>
+#include <asm/sections.h>
  
  #define        NUM_FTRACE_TRAMPS       2
  static unsigned long ftrace_tramps[NUM_FTRACE_TRAMPS];
  
+unsigned long ftrace_call_adjust(unsigned long addr)
+{
+       if (addr >= (unsigned long)__exittext_begin && addr < (unsigned long)__exittext_end)
+               return 0;
+
+       if (IS_ENABLED(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY))
+               addr += MCOUNT_INSN_SIZE;
+
+       return addr;
+}
+
  static ppc_inst_t ftrace_create_branch_inst(unsigned long ip, unsigned long addr, int link)
  {
         ppc_inst_t op;
diff --git a/arch/powerpc/kernel/trace/ftrace_64_pg.c b/arch/powerpc/kernel/trace/ftrace_64_pg.c

index 7b85c3b460a3c048ec31cce44e9b21066b96c5a8..12fab1803bcf45cafb3fd230c1f7871e2c539f1d 100644 (file)
--- a/arch/powerpc/kernel/trace/ftrace_64_pg.c
+++ b/arch/powerpc/kernel/trace/ftrace_64_pg.c
@@ -37,6 +37,11 @@
  #define        NUM_FTRACE_TRAMPS       8
  static unsigned long ftrace_tramps[NUM_FTRACE_TRAMPS];
  
+unsigned long ftrace_call_adjust(unsigned long addr)
+{
+       return addr;
+}
+
  static ppc_inst_t
  ftrace_call_replace(unsigned long ip, unsigned long addr, int link)
  {
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S

index 1c5970df32336655703888b1ffafb8180e79c446..f420df7888a75c5f515a3457708c3188661fa331 100644 (file)
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -281,7 +281,9 @@ SECTIONS
          * to deal with references from __bug_table
          */
         .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) {
+               __exittext_begin = .;
                 EXIT_TEXT
+               __exittext_end = .;
         }
  
         . = ALIGN(PAGE_SIZE);
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig

index 074263429faf2e49b516fb1ef1ac3544b902febe..dbfdc126bf1440b463ea509ed9f832617c4e35f3 100644 (file)
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -22,7 +22,6 @@ config KVM
         select KVM_COMMON
         select HAVE_KVM_VCPU_ASYNC_IOCTL
         select KVM_VFIO
-       select IRQ_BYPASS_MANAGER
         select HAVE_KVM_IRQ_BYPASS
  
  config KVM_BOOK3S_HANDLER
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c

index 52427fc2a33fa4ad7032bcc6323bc6364918d98f..0b921704da45eb6b718cac8f031c5d0c45176746 100644 (file)
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -391,6 +391,24 @@ static void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr)
  /* Dummy value used in computing PCR value below */
  #define PCR_ARCH_31    (PCR_ARCH_300 << 1)
  
+static inline unsigned long map_pcr_to_cap(unsigned long pcr)
+{
+       unsigned long cap = 0;
+
+       switch (pcr) {
+       case PCR_ARCH_300:
+               cap = H_GUEST_CAP_POWER9;
+               break;
+       case PCR_ARCH_31:
+               cap = H_GUEST_CAP_POWER10;
+               break;
+       default:
+               break;
+       }
+
+       return cap;
+}
+
  static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat)
  {
         unsigned long host_pcr_bit = 0, guest_pcr_bit = 0, cap = 0;
@@ -424,11 +442,9 @@ static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat)
                         break;
                 case PVR_ARCH_300:
                         guest_pcr_bit = PCR_ARCH_300;
-                       cap = H_GUEST_CAP_POWER9;
                         break;
                 case PVR_ARCH_31:
                         guest_pcr_bit = PCR_ARCH_31;
-                       cap = H_GUEST_CAP_POWER10;
                         break;
                 default:
                         return -EINVAL;
@@ -440,6 +456,12 @@ static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat)
                 return -EINVAL;
  
         if (kvmhv_on_pseries() && kvmhv_is_nestedv2()) {
+               /*
+                * 'arch_compat == 0' would mean the guest should default to
+                * L1's compatibility. In this case, the guest would pick
+                * host's PCR and evaluate the corresponding capabilities.
+                */
+               cap = map_pcr_to_cap(guest_pcr_bit);
                 if (!(cap & nested_capabilities))
                         return -EINVAL;
         }
diff --git a/arch/powerpc/kvm/book3s_hv_nestedv2.c b/arch/powerpc/kvm/book3s_hv_nestedv2.c

index 5378eb40b162f2690879f43fbaeb3f0b003536a7..8e6f5355f08b5d925c54606db4a70cbe24d74e61 100644 (file)
--- a/arch/powerpc/kvm/book3s_hv_nestedv2.c
+++ b/arch/powerpc/kvm/book3s_hv_nestedv2.c
@@ -138,6 +138,7 @@ static int gs_msg_ops_vcpu_fill_info(struct kvmppc_gs_buff *gsb,
         vector128 v;
         int rc, i;
         u16 iden;
+       u32 arch_compat = 0;
  
         vcpu = gsm->data;
  
@@ -347,8 +348,23 @@ static int gs_msg_ops_vcpu_fill_info(struct kvmppc_gs_buff *gsb,
                         break;
                 }
                 case KVMPPC_GSID_LOGICAL_PVR:
-                       rc = kvmppc_gse_put_u32(gsb, iden,
-                                               vcpu->arch.vcore->arch_compat);
+                       /*
+                        * Though 'arch_compat == 0' would mean the default
+                        * compatibility, arch_compat, being a Guest Wide
+                        * Element, cannot be filled with a value of 0 in GSB
+                        * as this would result into a kernel trap.
+                        * Hence, when `arch_compat == 0`, arch_compat should
+                        * default to L1's PVR.
+                        */
+                       if (!vcpu->arch.vcore->arch_compat) {
+                               if (cpu_has_feature(CPU_FTR_ARCH_31))
+                                       arch_compat = PVR_ARCH_31;
+                               else if (cpu_has_feature(CPU_FTR_ARCH_300))
+                                       arch_compat = PVR_ARCH_300;
+                       } else {
+                               arch_compat = vcpu->arch.vcore->arch_compat;
+                       }
+                       rc = kvmppc_gse_put_u32(gsb, iden, arch_compat);
                         break;
                 }
  
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c

index 23407fbd73c9346e05113db812c6897e2a33e7e7..d32abe7fe6ab79ea81a4583126b036a24e01d4b0 100644 (file)
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -2538,9 +2538,8 @@ void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu, struct dentry *debugfs_
                 vcpu->kvm->arch.kvm_ops->create_vcpu_debugfs(vcpu, debugfs_dentry);
  }
  
-int kvm_arch_create_vm_debugfs(struct kvm *kvm)
+void kvm_arch_create_vm_debugfs(struct kvm *kvm)
  {
         if (kvm->arch.kvm_ops->create_vm_debugfs)
                 kvm->arch.kvm_ops->create_vm_debugfs(kvm);
-       return 0;
  }
diff --git a/arch/powerpc/mm/kasan/init_32.c b/arch/powerpc/mm/kasan/init_32.c

index a70828a6d9357d5fb399375f9624a7fb06d4a129..aa9aa11927b2f842718d98e7818fcd499efd0e54 100644 (file)
--- a/arch/powerpc/mm/kasan/init_32.c
+++ b/arch/powerpc/mm/kasan/init_32.c
@@ -64,6 +64,7 @@ int __init __weak kasan_init_region(void *start, size_t size)
         if (ret)
                 return ret;
  
+       k_start = k_start & PAGE_MASK;
         block = memblock_alloc(k_end - k_start, PAGE_SIZE);
         if (!block)
                 return -ENOMEM;
diff --git a/arch/powerpc/platforms/85xx/mpc8536_ds.c b/arch/powerpc/platforms/85xx/mpc8536_ds.c

index e966b2ad8ecd42a97d8eb9fc7d6a70922165b2d9..b3327a358eb434dfe4c3357b534ea3f8102425e1 100644 (file)
--- a/arch/powerpc/platforms/85xx/mpc8536_ds.c
+++ b/arch/powerpc/platforms/85xx/mpc8536_ds.c
@@ -27,7 +27,7 @@
  
  #include "mpc85xx.h"
  
-void __init mpc8536_ds_pic_init(void)
+static void __init mpc8536_ds_pic_init(void)
  {
         struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN,
                         0, 256, " OpenPIC  ");
diff --git a/arch/powerpc/platforms/85xx/mvme2500.c b/arch/powerpc/platforms/85xx/mvme2500.c

index 1b59e45a0c64f1bd4dc257b52df072b07a555abf..19122daadb55a64842c4d5e56ad6e08ad762ab28 100644 (file)
--- a/arch/powerpc/platforms/85xx/mvme2500.c
+++ b/arch/powerpc/platforms/85xx/mvme2500.c
@@ -21,7 +21,7 @@
  
  #include "mpc85xx.h"
  
-void __init mvme2500_pic_init(void)
+static void __init mvme2500_pic_init(void)
  {
         struct mpic *mpic = mpic_alloc(NULL, 0,
                   MPIC_BIG_ENDIAN | MPIC_SINGLE_DEST_CPU,
diff --git a/arch/powerpc/platforms/85xx/p1010rdb.c b/arch/powerpc/platforms/85xx/p1010rdb.c

index 10d6f1fa33275a8b5b4153226c827e6f1d2816fa..491895ac8bcfe2121f99138e352255cd15085ccd 100644 (file)
--- a/arch/powerpc/platforms/85xx/p1010rdb.c
+++ b/arch/powerpc/platforms/85xx/p1010rdb.c
@@ -24,7 +24,7 @@
  
  #include "mpc85xx.h"
  
-void __init p1010_rdb_pic_init(void)
+static void __init p1010_rdb_pic_init(void)
  {
         struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN |
           MPIC_SINGLE_DEST_CPU,
diff --git a/arch/powerpc/platforms/85xx/p1022_ds.c b/arch/powerpc/platforms/85xx/p1022_ds.c

index 0dd786a061a6a2195e51a1ecf2d6be65d6d38221..adc3a2ee141509f0ca06ccb1d648efd17292f7be 100644 (file)
--- a/arch/powerpc/platforms/85xx/p1022_ds.c
+++ b/arch/powerpc/platforms/85xx/p1022_ds.c
@@ -370,7 +370,7 @@ exit:
   *
   * @pixclock: the wavelength, in picoseconds, of the clock
   */
-void p1022ds_set_pixel_clock(unsigned int pixclock)
+static void p1022ds_set_pixel_clock(unsigned int pixclock)
  {
         struct device_node *guts_np = NULL;
         struct ccsr_guts __iomem *guts;
@@ -418,7 +418,7 @@ void p1022ds_set_pixel_clock(unsigned int pixclock)
  /**
   * p1022ds_valid_monitor_port: set the monitor port for sysfs
   */
-enum fsl_diu_monitor_port
+static enum fsl_diu_monitor_port
  p1022ds_valid_monitor_port(enum fsl_diu_monitor_port port)
  {
         switch (port) {
@@ -432,7 +432,7 @@ p1022ds_valid_monitor_port(enum fsl_diu_monitor_port port)
  
  #endif
  
-void __init p1022_ds_pic_init(void)
+static void __init p1022_ds_pic_init(void)
  {
         struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN |
                 MPIC_SINGLE_DEST_CPU,
diff --git a/arch/powerpc/platforms/85xx/p1022_rdk.c b/arch/powerpc/platforms/85xx/p1022_rdk.c

index 25ab6e9c14703a66fd9b702324f36574166ab114..6198299d95b1b88806f83da5743961034dc35b8f 100644 (file)
--- a/arch/powerpc/platforms/85xx/p1022_rdk.c
+++ b/arch/powerpc/platforms/85xx/p1022_rdk.c
@@ -40,7 +40,7 @@
   *
   * @pixclock: the wavelength, in picoseconds, of the clock
   */
-void p1022rdk_set_pixel_clock(unsigned int pixclock)
+static void p1022rdk_set_pixel_clock(unsigned int pixclock)
  {
         struct device_node *guts_np = NULL;
         struct ccsr_guts __iomem *guts;
@@ -88,7 +88,7 @@ void p1022rdk_set_pixel_clock(unsigned int pixclock)
  /**
   * p1022rdk_valid_monitor_port: set the monitor port for sysfs
   */
-enum fsl_diu_monitor_port
+static enum fsl_diu_monitor_port
  p1022rdk_valid_monitor_port(enum fsl_diu_monitor_port port)
  {
         return FSL_DIU_PORT_DVI;
@@ -96,7 +96,7 @@ p1022rdk_valid_monitor_port(enum fsl_diu_monitor_port port)
  
  #endif
  
-void __init p1022_rdk_pic_init(void)
+static void __init p1022_rdk_pic_init(void)
  {
         struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN |
                 MPIC_SINGLE_DEST_CPU,
diff --git a/arch/powerpc/platforms/85xx/socrates_fpga_pic.c b/arch/powerpc/platforms/85xx/socrates_fpga_pic.c

index baa12eff6d5de460f56832a60bd3116e3748fa32..60e0b8947ce6106873dcc1abb22b51450524c3b2 100644 (file)
--- a/arch/powerpc/platforms/85xx/socrates_fpga_pic.c
+++ b/arch/powerpc/platforms/85xx/socrates_fpga_pic.c
@@ -8,6 +8,8 @@
  #include <linux/of_irq.h>
  #include <linux/io.h>
  
+#include "socrates_fpga_pic.h"
+
  /*
   * The FPGA supports 9 interrupt sources, which can be routed to 3
   * interrupt request lines of the MPIC. The line to be used can be
diff --git a/arch/powerpc/platforms/85xx/xes_mpc85xx.c b/arch/powerpc/platforms/85xx/xes_mpc85xx.c

index 45f257fc1ade055a7fdcc6a9142e0b5404f77f0b..2582427d8d0182fffdb79d70ed584889c6ff0d5f 100644 (file)
--- a/arch/powerpc/platforms/85xx/xes_mpc85xx.c
+++ b/arch/powerpc/platforms/85xx/xes_mpc85xx.c
@@ -37,7 +37,7 @@
  #define MPC85xx_L2CTL_L2I              0x40000000 /* L2 flash invalidate */
  #define MPC85xx_L2CTL_L2SIZ_MASK       0x30000000 /* L2 SRAM size (R/O) */
  
-void __init xes_mpc85xx_pic_init(void)
+static void __init xes_mpc85xx_pic_init(void)
  {
         struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN,
                         0, 256, " OpenPIC  ");
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c

index 496e16c588aaa8edcd0294825862312471928506..e8c4129697b142ba48490481ee38793086e8425a 100644 (file)
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -574,29 +574,6 @@ static void iommu_table_setparms(struct pci_controller *phb,
  
  struct iommu_table_ops iommu_table_lpar_multi_ops;
  
-/*
- * iommu_table_setparms_lpar
- *
- * Function: On pSeries LPAR systems, return TCE table info, given a pci bus.
- */
-static void iommu_table_setparms_lpar(struct pci_controller *phb,
-                                     struct device_node *dn,
-                                     struct iommu_table *tbl,
-                                     struct iommu_table_group *table_group,
-                                     const __be32 *dma_window)
-{
-       unsigned long offset, size, liobn;
-
-       of_parse_dma_window(dn, dma_window, &liobn, &offset, &size);
-
-       iommu_table_setparms_common(tbl, phb->bus->number, liobn, offset, size, IOMMU_PAGE_SHIFT_4K, NULL,
-                                   &iommu_table_lpar_multi_ops);
-
-
-       table_group->tce32_start = offset;
-       table_group->tce32_size = size;
-}
-
  struct iommu_table_ops iommu_table_pseries_ops = {
         .set = tce_build_pSeries,
         .clear = tce_free_pSeries,
@@ -724,26 +701,71 @@ struct iommu_table_ops iommu_table_lpar_multi_ops = {
   * dynamic 64bit DMA window, walking up the device tree.
   */
  static struct device_node *pci_dma_find(struct device_node *dn,
-                                       const __be32 **dma_window)
+                                       struct dynamic_dma_window_prop *prop)
  {
-       const __be32 *dw = NULL;
+       const __be32 *default_prop = NULL;
+       const __be32 *ddw_prop = NULL;
+       struct device_node *rdn = NULL;
+       bool default_win = false, ddw_win = false;
  
         for ( ; dn && PCI_DN(dn); dn = dn->parent) {
-               dw = of_get_property(dn, "ibm,dma-window", NULL);
-               if (dw) {
-                       if (dma_window)
-                               *dma_window = dw;
-                       return dn;
+               default_prop = of_get_property(dn, "ibm,dma-window", NULL);
+               if (default_prop) {
+                       rdn = dn;
+                       default_win = true;
+               }
+               ddw_prop = of_get_property(dn, DIRECT64_PROPNAME, NULL);
+               if (ddw_prop) {
+                       rdn = dn;
+                       ddw_win = true;
+                       break;
+               }
+               ddw_prop = of_get_property(dn, DMA64_PROPNAME, NULL);
+               if (ddw_prop) {
+                       rdn = dn;
+                       ddw_win = true;
+                       break;
                 }
-               dw = of_get_property(dn, DIRECT64_PROPNAME, NULL);
-               if (dw)
-                       return dn;
-               dw = of_get_property(dn, DMA64_PROPNAME, NULL);
-               if (dw)
-                       return dn;
+
+               /* At least found default window, which is the case for normal boot */
+               if (default_win)
+                       break;
         }
  
-       return NULL;
+       /* For PCI devices there will always be a DMA window, either on the device
+        * or parent bus
+        */
+       WARN_ON(!(default_win | ddw_win));
+
+       /* caller doesn't want to get DMA window property */
+       if (!prop)
+               return rdn;
+
+       /* parse DMA window property. During normal system boot, only default
+        * DMA window is passed in OF. But, for kdump, a dedicated adapter might
+        * have both default and DDW in FDT. In this scenario, DDW takes precedence
+        * over default window.
+        */
+       if (ddw_win) {
+               struct dynamic_dma_window_prop *p;
+
+               p = (struct dynamic_dma_window_prop *)ddw_prop;
+               prop->liobn = p->liobn;
+               prop->dma_base = p->dma_base;
+               prop->tce_shift = p->tce_shift;
+               prop->window_shift = p->window_shift;
+       } else if (default_win) {
+               unsigned long offset, size, liobn;
+
+               of_parse_dma_window(rdn, default_prop, &liobn, &offset, &size);
+
+               prop->liobn = cpu_to_be32((u32)liobn);
+               prop->dma_base = cpu_to_be64(offset);
+               prop->tce_shift = cpu_to_be32(IOMMU_PAGE_SHIFT_4K);
+               prop->window_shift = cpu_to_be32(order_base_2(size));
+       }
+
+       return rdn;
  }
  
  static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus)
@@ -751,17 +773,20 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus)
         struct iommu_table *tbl;
         struct device_node *dn, *pdn;
         struct pci_dn *ppci;
-       const __be32 *dma_window = NULL;
+       struct dynamic_dma_window_prop prop;
  
         dn = pci_bus_to_OF_node(bus);
  
         pr_debug("pci_dma_bus_setup_pSeriesLP: setting up bus %pOF\n",
                  dn);
  
-       pdn = pci_dma_find(dn, &dma_window);
+       pdn = pci_dma_find(dn, &prop);
  
-       if (dma_window == NULL)
-               pr_debug("  no ibm,dma-window property !\n");
+       /* In PPC architecture, there will always be DMA window on bus or one of the
+        * parent bus. During reboot, there will be ibm,dma-window property to
+        * define DMA window. For kdump, there will at least be default window or DDW
+        * or both.
+        */
  
         ppci = PCI_DN(pdn);
  
@@ -771,13 +796,24 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus)
         if (!ppci->table_group) {
                 ppci->table_group = iommu_pseries_alloc_group(ppci->phb->node);
                 tbl = ppci->table_group->tables[0];
-               if (dma_window) {
-                       iommu_table_setparms_lpar(ppci->phb, pdn, tbl,
-                                                 ppci->table_group, dma_window);
  
-                       if (!iommu_init_table(tbl, ppci->phb->node, 0, 0))
-                               panic("Failed to initialize iommu table");
-               }
+               iommu_table_setparms_common(tbl, ppci->phb->bus->number,
+                               be32_to_cpu(prop.liobn),
+                               be64_to_cpu(prop.dma_base),
+                               1ULL << be32_to_cpu(prop.window_shift),
+                               be32_to_cpu(prop.tce_shift), NULL,
+                               &iommu_table_lpar_multi_ops);
+
+               /* Only for normal boot with default window. Doesn't matter even
+                * if we set these with DDW which is 64bit during kdump, since
+                * these will not be used during kdump.
+                */
+               ppci->table_group->tce32_start = be64_to_cpu(prop.dma_base);
+               ppci->table_group->tce32_size = 1 << be32_to_cpu(prop.window_shift);
+
+               if (!iommu_init_table(tbl, ppci->phb->node, 0, 0))
+                       panic("Failed to initialize iommu table");
+
                 iommu_register_group(ppci->table_group,
                                 pci_domain_nr(bus), 0);
                 pr_debug("  created table: %p\n", ppci->table_group);
@@ -968,6 +1004,12 @@ static void find_existing_ddw_windows_named(const char *name)
                         continue;
                 }
  
+               /* If at the time of system initialization, there are DDWs in OF,
+                * it means this is during kexec. DDW could be direct or dynamic.
+                * We will just mark DDWs as "dynamic" since this is kdump path,
+                * no need to worry about perforance. ddw_list_new_entry() will
+                * set window->direct = false.
+                */
                 window = ddw_list_new_entry(pdn, dma64);
                 if (!window) {
                         of_node_put(pdn);
@@ -1524,8 +1566,8 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
  {
         struct device_node *pdn, *dn;
         struct iommu_table *tbl;
-       const __be32 *dma_window = NULL;
         struct pci_dn *pci;
+       struct dynamic_dma_window_prop prop;
  
         pr_debug("pci_dma_dev_setup_pSeriesLP: %s\n", pci_name(dev));
  
@@ -1538,7 +1580,7 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
         dn = pci_device_to_OF_node(dev);
         pr_debug("  node is %pOF\n", dn);
  
-       pdn = pci_dma_find(dn, &dma_window);
+       pdn = pci_dma_find(dn, &prop);
         if (!pdn || !PCI_DN(pdn)) {
                 printk(KERN_WARNING "pci_dma_dev_setup_pSeriesLP: "
                        "no DMA window found for pci dev=%s dn=%pOF\n",
@@ -1551,8 +1593,20 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
         if (!pci->table_group) {
                 pci->table_group = iommu_pseries_alloc_group(pci->phb->node);
                 tbl = pci->table_group->tables[0];
-               iommu_table_setparms_lpar(pci->phb, pdn, tbl,
-                               pci->table_group, dma_window);
+
+               iommu_table_setparms_common(tbl, pci->phb->bus->number,
+                               be32_to_cpu(prop.liobn),
+                               be64_to_cpu(prop.dma_base),
+                               1ULL << be32_to_cpu(prop.window_shift),
+                               be32_to_cpu(prop.tce_shift), NULL,
+                               &iommu_table_lpar_multi_ops);
+
+               /* Only for normal boot with default window. Doesn't matter even
+                * if we set these with DDW which is 64bit during kdump, since
+                * these will not be used during kdump.
+                */
+               pci->table_group->tce32_start = be64_to_cpu(prop.dma_base);
+               pci->table_group->tce32_size = 1 << be32_to_cpu(prop.window_shift);
  
                 iommu_init_table(tbl, pci->phb->node, 0, 0);
                 iommu_register_group(pci->table_group,
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c

index 4561667832ed403e2a4ce1847bda97d844a12485..4e9916bb03d71fb687f49c989b7f65cce08bd066 100644 (file)
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -662,8 +662,12 @@ u64 pseries_paravirt_steal_clock(int cpu)
  {
         struct lppaca *lppaca = &lppaca_of(cpu);
  
-       return be64_to_cpu(READ_ONCE(lppaca->enqueue_dispatch_tb)) +
-               be64_to_cpu(READ_ONCE(lppaca->ready_enqueue_tb));
+       /*
+        * VPA steal time counters are reported at TB frequency. Hence do a
+        * conversion to ns before returning
+        */
+       return tb_to_ns(be64_to_cpu(READ_ONCE(lppaca->enqueue_dispatch_tb)) +
+                       be64_to_cpu(READ_ONCE(lppaca->ready_enqueue_tb)));
  }
  #endif
  
diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c

index 4ba8245681192120860ad1278a1b7ec7110a4bfc..4448386268d99155657fe6179ad8fd0132676f13 100644 (file)
--- a/arch/powerpc/platforms/pseries/pci_dlpar.c
+++ b/arch/powerpc/platforms/pseries/pci_dlpar.c
@@ -35,6 +35,8 @@ struct pci_controller *init_phb_dynamic(struct device_node *dn)
  
         pseries_msi_allocate_domains(phb);
  
+       ppc_iommu_register_device(phb);
+
         /* Create EEH devices for the PHB */
         eeh_phb_pe_create(phb);
  
@@ -76,6 +78,8 @@ int remove_phb_dynamic(struct pci_controller *phb)
                 }
         }
  
+       ppc_iommu_unregister_device(phb);
+
         pseries_msi_free_domains(phb);
  
         /* Keep a reference so phb isn't freed yet */
diff --git a/arch/powerpc/sysdev/udbg_memcons.c b/arch/powerpc/sysdev/udbg_memcons.c

index 5020044400dcb3caa60d5efebe366249dc7ff65e..4de57ba52236513f5c2f92e42ae4d7059d6e4f2f 100644 (file)
--- a/arch/powerpc/sysdev/udbg_memcons.c
+++ b/arch/powerpc/sysdev/udbg_memcons.c
@@ -41,7 +41,7 @@ struct memcons memcons = {
         .input_end = &memcons_input[CONFIG_PPC_MEMCONS_INPUT_SIZE],
  };
  
-void memcons_putc(char c)
+static void memcons_putc(char c)
  {
         char *new_output_pos;
  
@@ -54,7 +54,7 @@ void memcons_putc(char c)
         memcons.output_pos = new_output_pos;
  }
  
-int memcons_getc_poll(void)
+static int memcons_getc_poll(void)
  {
         char c;
         char *new_input_pos;
@@ -77,7 +77,7 @@ int memcons_getc_poll(void)
         return -1;
  }
  
-int memcons_getc(void)
+static int memcons_getc(void)
  {
         int c;
  
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig

index bffbd869a0682842883591788da784648acf1626..e3142ce531a097b8cf0e39251ba88ae143d6594c 100644 (file)
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -315,7 +315,6 @@ config AS_HAS_OPTION_ARCH
         # https://reviews.llvm.org/D123515
         def_bool y
         depends on $(as-instr, .option arch$(comma) +m)
-       depends on !$(as-instr, .option arch$(comma) -i)
  
  source "arch/riscv/Kconfig.socs"
  source "arch/riscv/Kconfig.errata"
diff --git a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts

index 07387f9c135ca7e8ddf7d45de10ccdb933a2e4d4..72b87b08ab444ef1dc1ed200a6e8b3cbb9bfc73f 100644 (file)
--- a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts
+++ b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts
@@ -123,6 +123,7 @@
                 interrupt-parent = <&gpio>;
                 interrupts = <1 IRQ_TYPE_LEVEL_LOW>;
                 interrupt-controller;
+               #interrupt-cells = <2>;
  
                 onkey {
                         compatible = "dlg,da9063-onkey";
diff --git a/arch/riscv/boot/dts/starfive/jh7100.dtsi b/arch/riscv/boot/dts/starfive/jh7100.dtsi

index c216aaecac53f2d7d1ec47b4f250ea5ae08e11cb..8bcf36d07f3f7c38a164a5864974bc60ad11e8b1 100644 (file)
--- a/arch/riscv/boot/dts/starfive/jh7100.dtsi
+++ b/arch/riscv/boot/dts/starfive/jh7100.dtsi
@@ -96,14 +96,14 @@
                         thermal-sensors = <&sfctemp>;
  
                         trips {
-                               cpu_alert0 {
+                               cpu-alert0 {
                                         /* milliCelsius */
                                         temperature = <75000>;
                                         hysteresis = <2000>;
                                         type = "passive";
                                 };
  
-                               cpu_crit {
+                               cpu-crit {
                                         /* milliCelsius */
                                         temperature = <90000>;
                                         hysteresis = <2000>;
@@ -113,28 +113,28 @@
                 };
         };
  
-       osc_sys: osc_sys {
+       osc_sys: osc-sys {
                 compatible = "fixed-clock";
                 #clock-cells = <0>;
                 /* This value must be overridden by the board */
                 clock-frequency = <0>;
         };
  
-       osc_aud: osc_aud {
+       osc_aud: osc-aud {
                 compatible = "fixed-clock";
                 #clock-cells = <0>;
                 /* This value must be overridden by the board */
                 clock-frequency = <0>;
         };
  
-       gmac_rmii_ref: gmac_rmii_ref {
+       gmac_rmii_ref: gmac-rmii-ref {
                 compatible = "fixed-clock";
                 #clock-cells = <0>;
                 /* Should be overridden by the board when needed */
                 clock-frequency = <0>;
         };
  
-       gmac_gr_mii_rxclk: gmac_gr_mii_rxclk {
+       gmac_gr_mii_rxclk: gmac-gr-mii-rxclk {
                 compatible = "fixed-clock";
                 #clock-cells = <0>;
                 /* Should be overridden by the board when needed */
diff --git a/arch/riscv/boot/dts/starfive/jh7110.dtsi b/arch/riscv/boot/dts/starfive/jh7110.dtsi

index 45213cdf50dc75a9fa6610710a4d0cbe58b44c51..74ed3b9264d8f15ee10400b4bf5fcf855b7cecd0 100644 (file)
--- a/arch/riscv/boot/dts/starfive/jh7110.dtsi
+++ b/arch/riscv/boot/dts/starfive/jh7110.dtsi
@@ -237,14 +237,14 @@
                         };
  
                         trips {
-                               cpu_alert0: cpu_alert0 {
+                               cpu_alert0: cpu-alert0 {
                                         /* milliCelsius */
                                         temperature = <85000>;
                                         hysteresis = <2000>;
                                         type = "passive";
                                 };
  
-                               cpu_crit {
+                               cpu-crit {
                                         /* milliCelsius */
                                         temperature = <100000>;
                                         hysteresis = <2000>;
diff --git a/arch/riscv/include/asm/arch_hweight.h b/arch/riscv/include/asm/arch_hweight.h

index c20236a0725b9e27a31d28b580db1bd1ad2c945a..85b2c443823e8ab8ce55aa5b32953eabbc5e1108 100644 (file)
--- a/arch/riscv/include/asm/arch_hweight.h
+++ b/arch/riscv/include/asm/arch_hweight.h
@@ -20,7 +20,7 @@
  static __always_inline unsigned int __arch_hweight32(unsigned int w)
  {
  #ifdef CONFIG_RISCV_ISA_ZBB
-       asm_volatile_goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
+       asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
                                       RISCV_ISA_EXT_ZBB, 1)
                           : : : : legacy);
  
@@ -51,7 +51,7 @@ static inline unsigned int __arch_hweight8(unsigned int w)
  static __always_inline unsigned long __arch_hweight64(__u64 w)
  {
  # ifdef CONFIG_RISCV_ISA_ZBB
-       asm_volatile_goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
+       asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
                                       RISCV_ISA_EXT_ZBB, 1)
                           : : : : legacy);
  
diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h

index 9ffc355370248aed22dbe690ba1cde8e682a3588..329d8244a9b3fd516104808db5a959acfb469b22 100644 (file)
--- a/arch/riscv/include/asm/bitops.h
+++ b/arch/riscv/include/asm/bitops.h
@@ -39,7 +39,7 @@ static __always_inline unsigned long variable__ffs(unsigned long word)
  {
         int num;
  
-       asm_volatile_goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
+       asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
                                       RISCV_ISA_EXT_ZBB, 1)
                           : : : : legacy);
  
@@ -95,7 +95,7 @@ static __always_inline unsigned long variable__fls(unsigned long word)
  {
         int num;
  
-       asm_volatile_goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
+       asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
                                       RISCV_ISA_EXT_ZBB, 1)
                           : : : : legacy);
  
@@ -154,7 +154,7 @@ static __always_inline int variable_ffs(int x)
         if (!x)
                 return 0;
  
-       asm_volatile_goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
+       asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
                                       RISCV_ISA_EXT_ZBB, 1)
                           : : : : legacy);
  
@@ -209,7 +209,7 @@ static __always_inline int variable_fls(unsigned int x)
         if (!x)
                 return 0;
  
-       asm_volatile_goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
+       asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
                                       RISCV_ISA_EXT_ZBB, 1)
                           : : : : legacy);
  
diff --git a/arch/riscv/include/asm/checksum.h b/arch/riscv/include/asm/checksum.h

index a5b60b54b101c3ba1e550b3e16e7096a6bfc8357..88e6f1499e889951b2871fec052330b3c92f2eb7 100644 (file)
--- a/arch/riscv/include/asm/checksum.h
+++ b/arch/riscv/include/asm/checksum.h
@@ -53,7 +53,7 @@ static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
             IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) {
                 unsigned long fold_temp;
  
-               asm_volatile_goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0,
+               asm goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0,
                                               RISCV_ISA_EXT_ZBB, 1)
                     :
                     :
diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h

index 5a626ed2c47a8915b3848df2e7f4a7ea0601bd71..0bd11862b7607b9ffebf8460ea6cc00cc1e4ff62 100644 (file)
--- a/arch/riscv/include/asm/cpufeature.h
+++ b/arch/riscv/include/asm/cpufeature.h
@@ -80,7 +80,7 @@ riscv_has_extension_likely(const unsigned long ext)
                            "ext must be < RISCV_ISA_EXT_MAX");
  
         if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) {
-               asm_volatile_goto(
+               asm goto(
                 ALTERNATIVE("j  %l[l_no]", "nop", 0, %[ext], 1)
                 :
                 : [ext] "i" (ext)
@@ -103,7 +103,7 @@ riscv_has_extension_unlikely(const unsigned long ext)
                            "ext must be < RISCV_ISA_EXT_MAX");
  
         if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) {
-               asm_volatile_goto(
+               asm goto(
                 ALTERNATIVE("nop", "j   %l[l_yes]", 0, %[ext], 1)
                 :
                 : [ext] "i" (ext)
diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h

index 510014051f5dbb1aa61098e4974e7e7ac02145ee..2468c55933cd0d5d55d71d83a52226172bd5121c 100644 (file)
--- a/arch/riscv/include/asm/csr.h
+++ b/arch/riscv/include/asm/csr.h
@@ -424,6 +424,7 @@
  # define CSR_STATUS    CSR_MSTATUS
  # define CSR_IE                CSR_MIE
  # define CSR_TVEC      CSR_MTVEC
+# define CSR_ENVCFG    CSR_MENVCFG
  # define CSR_SCRATCH   CSR_MSCRATCH
  # define CSR_EPC       CSR_MEPC
  # define CSR_CAUSE     CSR_MCAUSE
@@ -448,6 +449,7 @@
  # define CSR_STATUS    CSR_SSTATUS
  # define CSR_IE                CSR_SIE
  # define CSR_TVEC      CSR_STVEC
+# define CSR_ENVCFG    CSR_SENVCFG
  # define CSR_SCRATCH   CSR_SSCRATCH
  # define CSR_EPC       CSR_SEPC
  # define CSR_CAUSE     CSR_SCAUSE
diff --git a/arch/riscv/include/asm/ftrace.h b/arch/riscv/include/asm/ftrace.h

index 3291721229523456247532009bc2ed2ddc444540..15055f9df4daa1e4250c8a37c64193bf5c943ee3 100644 (file)
--- a/arch/riscv/include/asm/ftrace.h
+++ b/arch/riscv/include/asm/ftrace.h
@@ -25,6 +25,11 @@
  
  #define ARCH_SUPPORTS_FTRACE_OPS 1
  #ifndef __ASSEMBLY__
+
+extern void *return_address(unsigned int level);
+
+#define ftrace_return_address(n) return_address(n)
+
  void MCOUNT_NAME(void);
  static inline unsigned long ftrace_call_adjust(unsigned long addr)
  {
diff --git a/arch/riscv/include/asm/hugetlb.h b/arch/riscv/include/asm/hugetlb.h

index 4c5b0e929890fadcebb3caace0afe97dfa46d8bf..22deb7a2a6ec4e4daba8322c7c6c28137b49f5f8 100644 (file)
--- a/arch/riscv/include/asm/hugetlb.h
+++ b/arch/riscv/include/asm/hugetlb.h
@@ -11,6 +11,11 @@ static inline void arch_clear_hugepage_flags(struct page *page)
  }
  #define arch_clear_hugepage_flags arch_clear_hugepage_flags
  
+#ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION
+bool arch_hugetlb_migration_supported(struct hstate *h);
+#define arch_hugetlb_migration_supported arch_hugetlb_migration_supported
+#endif
+
  #ifdef CONFIG_RISCV_ISA_SVNAPOT
  #define __HAVE_ARCH_HUGE_PTE_CLEAR
  void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h

index 5340f818746b71a805319eb6f941fa311c9b36a2..1f2d2599c655d20be6df7516382e20a7e3956301 100644 (file)
--- a/arch/riscv/include/asm/hwcap.h
+++ b/arch/riscv/include/asm/hwcap.h
@@ -81,6 +81,8 @@
  #define RISCV_ISA_EXT_ZTSO             72
  #define RISCV_ISA_EXT_ZACAS            73
  
+#define RISCV_ISA_EXT_XLINUXENVCFG     127
+
  #define RISCV_ISA_EXT_MAX              128
  #define RISCV_ISA_EXT_INVALID          U32_MAX
  
diff --git a/arch/riscv/include/asm/jump_label.h b/arch/riscv/include/asm/jump_label.h

index 14a5ea8d8ef0f4a2f4477fb65778e4f8ea449e2a..4a35d787c0191475b3a5d8dc7452e448541dc8e9 100644 (file)
--- a/arch/riscv/include/asm/jump_label.h
+++ b/arch/riscv/include/asm/jump_label.h
@@ -17,7 +17,7 @@
  static __always_inline bool arch_static_branch(struct static_key * const key,
                                                const bool branch)
  {
-       asm_volatile_goto(
+       asm goto(
                 "       .align          2                       \n\t"
                 "       .option push                            \n\t"
                 "       .option norelax                         \n\t"
@@ -39,7 +39,7 @@ label:
  static __always_inline bool arch_static_branch_jump(struct static_key * const key,
                                                     const bool branch)
  {
-       asm_volatile_goto(
+       asm goto(
                 "       .align          2                       \n\t"
                 "       .option push                            \n\t"
                 "       .option norelax                         \n\t"
diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h

index d169a4f41a2e728276a97898e1270c7b4763f9ed..c80bb9990d32ef706452d7d4fcc1c049cd7436d9 100644 (file)
--- a/arch/riscv/include/asm/pgalloc.h
+++ b/arch/riscv/include/asm/pgalloc.h
@@ -95,7 +95,13 @@ static inline void pud_free(struct mm_struct *mm, pud_t *pud)
                 __pud_free(mm, pud);
  }
  
-#define __pud_free_tlb(tlb, pud, addr)  pud_free((tlb)->mm, pud)
+#define __pud_free_tlb(tlb, pud, addr)                                 \
+do {                                                                   \
+       if (pgtable_l4_enabled) {                                       \
+               pagetable_pud_dtor(virt_to_ptdesc(pud));                \
+               tlb_remove_page_ptdesc((tlb), virt_to_ptdesc(pud));     \
+       }                                                               \
+} while (0)
  
  #define p4d_alloc_one p4d_alloc_one
  static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long addr)
@@ -124,7 +130,11 @@ static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d)
                 __p4d_free(mm, p4d);
  }
  
-#define __p4d_free_tlb(tlb, p4d, addr)  p4d_free((tlb)->mm, p4d)
+#define __p4d_free_tlb(tlb, p4d, addr)                                 \
+do {                                                                   \
+       if (pgtable_l5_enabled)                                         \
+               tlb_remove_page_ptdesc((tlb), virt_to_ptdesc(p4d));     \
+} while (0)
  #endif /* __PAGETABLE_PMD_FOLDED */
  
  static inline void sync_kernel_mappings(pgd_t *pgd)
@@ -149,7 +159,11 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
  
  #ifndef __PAGETABLE_PMD_FOLDED
  
-#define __pmd_free_tlb(tlb, pmd, addr)  pmd_free((tlb)->mm, pmd)
+#define __pmd_free_tlb(tlb, pmd, addr)                         \
+do {                                                           \
+       pagetable_pmd_dtor(virt_to_ptdesc(pmd));                \
+       tlb_remove_page_ptdesc((tlb), virt_to_ptdesc(pmd));     \
+} while (0)
  
  #endif /* __PAGETABLE_PMD_FOLDED */
  
diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h

index b42017d76924f74386bc712719280af21781bb5d..b99bd66107a69038c835ead6b77725aaeaf882c3 100644 (file)
--- a/arch/riscv/include/asm/pgtable-64.h
+++ b/arch/riscv/include/asm/pgtable-64.h
@@ -136,7 +136,7 @@ enum napot_cont_order {
   * 10010 - IO   Strongly-ordered, Non-cacheable, Non-bufferable, Shareable, Non-trustable
   */
  #define _PAGE_PMA_THEAD                ((1UL << 62) | (1UL << 61) | (1UL << 60))
-#define _PAGE_NOCACHE_THEAD    ((1UL < 61) | (1UL << 60))
+#define _PAGE_NOCACHE_THEAD    ((1UL << 61) | (1UL << 60))
  #define _PAGE_IO_THEAD         ((1UL << 63) | (1UL << 60))
  #define _PAGE_MTMASK_THEAD     (_PAGE_PMA_THEAD | _PAGE_IO_THEAD | (1UL << 59))
  
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h

index 0c94260b5d0c126f6302f39a59507f19eed48dac..6066822e7396fa5078a546356a3a6f6605470712 100644 (file)
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -84,7 +84,7 @@
   * Define vmemmap for pfn_to_page & page_to_pfn calls. Needed if kernel
   * is configured with CONFIG_SPARSEMEM_VMEMMAP enabled.
   */
-#define vmemmap                ((struct page *)VMEMMAP_START)
+#define vmemmap                ((struct page *)VMEMMAP_START - (phys_ram_base >> PAGE_SHIFT))
  
  #define PCI_IO_SIZE      SZ_16M
  #define PCI_IO_END       VMEMMAP_START
@@ -439,6 +439,10 @@ static inline pte_t pte_mkhuge(pte_t pte)
         return pte;
  }
  
+#define pte_leaf_size(pte)     (pte_napot(pte) ?                               \
+                                       napot_cont_size(napot_cont_order(pte)) :\
+                                       PAGE_SIZE)
+
  #ifdef CONFIG_NUMA_BALANCING
  /*
   * See the comment in include/asm-generic/pgtable.h
diff --git a/arch/riscv/include/asm/stacktrace.h b/arch/riscv/include/asm/stacktrace.h

index f7e8ef2418b99fc98362a1a977f8038f7592fbac..b1495a7e06ce693b4fc698ee4d62549bd0614700 100644 (file)
--- a/arch/riscv/include/asm/stacktrace.h
+++ b/arch/riscv/include/asm/stacktrace.h
@@ -21,4 +21,9 @@ static inline bool on_thread_stack(void)
         return !(((unsigned long)(current->stack) ^ current_stack_pointer) & ~(THREAD_SIZE - 1));
  }
  
+
+#ifdef CONFIG_VMAP_STACK
+DECLARE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack);
+#endif /* CONFIG_VMAP_STACK */
+
  #endif /* _ASM_RISCV_STACKTRACE_H */
diff --git a/arch/riscv/include/asm/suspend.h b/arch/riscv/include/asm/suspend.h

index 02f87867389a9e660f91b64c7ca818a6b61637dc..491296a335d0ce6cd9c8f242646c3c60c762bc87 100644 (file)
--- a/arch/riscv/include/asm/suspend.h
+++ b/arch/riscv/include/asm/suspend.h
@@ -14,6 +14,7 @@ struct suspend_context {
         struct pt_regs regs;
         /* Saved and restored by high-level functions */
         unsigned long scratch;
+       unsigned long envcfg;
         unsigned long tvec;
         unsigned long ie;
  #ifdef CONFIG_MMU
diff --git a/arch/riscv/include/asm/tlb.h b/arch/riscv/include/asm/tlb.h

index 1eb5682b2af6065c9019e398df729f5b97a573c6..50b63b5c15bd8b19dac37176ef98c3489c837e05 100644 (file)
--- a/arch/riscv/include/asm/tlb.h
+++ b/arch/riscv/include/asm/tlb.h
@@ -16,7 +16,7 @@ static void tlb_flush(struct mmu_gather *tlb);
  static inline void tlb_flush(struct mmu_gather *tlb)
  {
  #ifdef CONFIG_MMU
-       if (tlb->fullmm || tlb->need_flush_all)
+       if (tlb->fullmm || tlb->need_flush_all || tlb->freed_tables)
                 flush_tlb_mm(tlb->mm);
         else
                 flush_tlb_mm_range(tlb->mm, tlb->start, tlb->end,
diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h

index 928f096dca21b4e6cbafc009595cd34bb9917109..4112cc8d1d69f9fbde77a524820a5de1e7931acf 100644 (file)
--- a/arch/riscv/include/asm/tlbflush.h
+++ b/arch/riscv/include/asm/tlbflush.h
@@ -75,6 +75,7 @@ static inline void flush_tlb_kernel_range(unsigned long start,
  
  #define flush_tlb_mm(mm) flush_tlb_all()
  #define flush_tlb_mm_range(mm, start, end, page_size) flush_tlb_all()
+#define local_flush_tlb_kernel_range(start, end) flush_tlb_all()
  #endif /* !CONFIG_SMP || !CONFIG_MMU */
  
  #endif /* _ASM_RISCV_TLBFLUSH_H */
diff --git a/arch/riscv/include/asm/vmalloc.h b/arch/riscv/include/asm/vmalloc.h

index 924d01b56c9a1eb1eacd53a923fc55591cda654f..51f6dfe19745aa486bd73d7de472faa538cf0486 100644 (file)
--- a/arch/riscv/include/asm/vmalloc.h
+++ b/arch/riscv/include/asm/vmalloc.h
@@ -19,65 +19,6 @@ static inline bool arch_vmap_pmd_supported(pgprot_t prot)
         return true;
  }
  
-#ifdef CONFIG_RISCV_ISA_SVNAPOT
-#include <linux/pgtable.h>
+#endif
  
-#define arch_vmap_pte_range_map_size arch_vmap_pte_range_map_size
-static inline unsigned long arch_vmap_pte_range_map_size(unsigned long addr, unsigned long end,
-                                                        u64 pfn, unsigned int max_page_shift)
-{
-       unsigned long map_size = PAGE_SIZE;
-       unsigned long size, order;
-
-       if (!has_svnapot())
-               return map_size;
-
-       for_each_napot_order_rev(order) {
-               if (napot_cont_shift(order) > max_page_shift)
-                       continue;
-
-               size = napot_cont_size(order);
-               if (end - addr < size)
-                       continue;
-
-               if (!IS_ALIGNED(addr, size))
-                       continue;
-
-               if (!IS_ALIGNED(PFN_PHYS(pfn), size))
-                       continue;
-
-               map_size = size;
-               break;
-       }
-
-       return map_size;
-}
-
-#define arch_vmap_pte_supported_shift arch_vmap_pte_supported_shift
-static inline int arch_vmap_pte_supported_shift(unsigned long size)
-{
-       int shift = PAGE_SHIFT;
-       unsigned long order;
-
-       if (!has_svnapot())
-               return shift;
-
-       WARN_ON_ONCE(size >= PMD_SIZE);
-
-       for_each_napot_order_rev(order) {
-               if (napot_cont_size(order) > size)
-                       continue;
-
-               if (!IS_ALIGNED(size, napot_cont_size(order)))
-                       continue;
-
-               shift = napot_cont_shift(order);
-               break;
-       }
-
-       return shift;
-}
-
-#endif /* CONFIG_RISCV_ISA_SVNAPOT */
-#endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */
  #endif /* _ASM_RISCV_VMALLOC_H */
diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h

index d6b7a5b958742c443bce93e067434128c1cff7e4..b1c503c2959c34ee4fb7f7c7a4b3a0b6c3dda240 100644 (file)
--- a/arch/riscv/include/uapi/asm/kvm.h
+++ b/arch/riscv/include/uapi/asm/kvm.h
@@ -16,7 +16,6 @@
  #include <asm/ptrace.h>
  
  #define __KVM_HAVE_IRQ_LINE
-#define __KVM_HAVE_READONLY_MEM
  
  #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
  
@@ -139,6 +138,35 @@ enum KVM_RISCV_ISA_EXT_ID {
         KVM_RISCV_ISA_EXT_ZIHPM,
         KVM_RISCV_ISA_EXT_SMSTATEEN,
         KVM_RISCV_ISA_EXT_ZICOND,
+       KVM_RISCV_ISA_EXT_ZBC,
+       KVM_RISCV_ISA_EXT_ZBKB,
+       KVM_RISCV_ISA_EXT_ZBKC,
+       KVM_RISCV_ISA_EXT_ZBKX,
+       KVM_RISCV_ISA_EXT_ZKND,
+       KVM_RISCV_ISA_EXT_ZKNE,
+       KVM_RISCV_ISA_EXT_ZKNH,
+       KVM_RISCV_ISA_EXT_ZKR,
+       KVM_RISCV_ISA_EXT_ZKSED,
+       KVM_RISCV_ISA_EXT_ZKSH,
+       KVM_RISCV_ISA_EXT_ZKT,
+       KVM_RISCV_ISA_EXT_ZVBB,
+       KVM_RISCV_ISA_EXT_ZVBC,
+       KVM_RISCV_ISA_EXT_ZVKB,
+       KVM_RISCV_ISA_EXT_ZVKG,
+       KVM_RISCV_ISA_EXT_ZVKNED,
+       KVM_RISCV_ISA_EXT_ZVKNHA,
+       KVM_RISCV_ISA_EXT_ZVKNHB,
+       KVM_RISCV_ISA_EXT_ZVKSED,
+       KVM_RISCV_ISA_EXT_ZVKSH,
+       KVM_RISCV_ISA_EXT_ZVKT,
+       KVM_RISCV_ISA_EXT_ZFH,
+       KVM_RISCV_ISA_EXT_ZFHMIN,
+       KVM_RISCV_ISA_EXT_ZIHINTNTL,
+       KVM_RISCV_ISA_EXT_ZVFH,
+       KVM_RISCV_ISA_EXT_ZVFHMIN,
+       KVM_RISCV_ISA_EXT_ZFA,
+       KVM_RISCV_ISA_EXT_ZTSO,
+       KVM_RISCV_ISA_EXT_ZACAS,
         KVM_RISCV_ISA_EXT_MAX,
  };
  
diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile

index f71910718053d841a361fd97e7d62da4f86bebcf..604d6bf7e47672e9b01902f6fa497aeb4e102ee5 100644 (file)
--- a/arch/riscv/kernel/Makefile
+++ b/arch/riscv/kernel/Makefile
@@ -7,6 +7,7 @@ ifdef CONFIG_FTRACE
  CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE)
  CFLAGS_REMOVE_patch.o  = $(CC_FLAGS_FTRACE)
  CFLAGS_REMOVE_sbi.o    = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_return_address.o = $(CC_FLAGS_FTRACE)
  endif
  CFLAGS_syscall_table.o += $(call cc-option,-Wno-override-init,)
  CFLAGS_compat_syscall_table.o += $(call cc-option,-Wno-override-init,)
@@ -46,6 +47,7 @@ obj-y += irq.o
  obj-y  += process.o
  obj-y  += ptrace.o
  obj-y  += reset.o
+obj-y  += return_address.o
  obj-y  += setup.o
  obj-y  += signal.o
  obj-y  += syscall_table.o
diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c

index 89920f84d0a34385471e9afbf9c26d287cbbd838..79a5a35fab964d3b54db97b5504f45f68dface11 100644 (file)
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -24,6 +24,7 @@
  #include <asm/hwprobe.h>
  #include <asm/patch.h>
  #include <asm/processor.h>
+#include <asm/sbi.h>
  #include <asm/vector.h>
  
  #include "copy-unaligned.h"
@@ -201,6 +202,16 @@ static const unsigned int riscv_zvbb_exts[] = {
         RISCV_ISA_EXT_ZVKB
  };
  
+/*
+ * While the [ms]envcfg CSRs were not defined until version 1.12 of the RISC-V
+ * privileged ISA, the existence of the CSRs is implied by any extension which
+ * specifies [ms]envcfg bit(s). Hence, we define a custom ISA extension for the
+ * existence of the CSR, and treat it as a subset of those other extensions.
+ */
+static const unsigned int riscv_xlinuxenvcfg_exts[] = {
+       RISCV_ISA_EXT_XLINUXENVCFG
+};
+
  /*
   * The canonical order of ISA extension names in the ISA string is defined in
   * chapter 27 of the unprivileged specification.
@@ -250,8 +261,8 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = {
         __RISCV_ISA_EXT_DATA(c, RISCV_ISA_EXT_c),
         __RISCV_ISA_EXT_DATA(v, RISCV_ISA_EXT_v),
         __RISCV_ISA_EXT_DATA(h, RISCV_ISA_EXT_h),
-       __RISCV_ISA_EXT_DATA(zicbom, RISCV_ISA_EXT_ZICBOM),
-       __RISCV_ISA_EXT_DATA(zicboz, RISCV_ISA_EXT_ZICBOZ),
+       __RISCV_ISA_EXT_SUPERSET(zicbom, RISCV_ISA_EXT_ZICBOM, riscv_xlinuxenvcfg_exts),
+       __RISCV_ISA_EXT_SUPERSET(zicboz, RISCV_ISA_EXT_ZICBOZ, riscv_xlinuxenvcfg_exts),
         __RISCV_ISA_EXT_DATA(zicntr, RISCV_ISA_EXT_ZICNTR),
         __RISCV_ISA_EXT_DATA(zicond, RISCV_ISA_EXT_ZICOND),
         __RISCV_ISA_EXT_DATA(zicsr, RISCV_ISA_EXT_ZICSR),
@@ -538,6 +549,20 @@ static void __init riscv_fill_hwcap_from_isa_string(unsigned long *isa2hwcap)
                         set_bit(RISCV_ISA_EXT_ZIHPM, isainfo->isa);
                 }
  
+               /*
+                * "V" in ISA strings is ambiguous in practice: it should mean
+                * just the standard V-1.0 but vendors aren't well behaved.
+                * Many vendors with T-Head CPU cores which implement the 0.7.1
+                * version of the vector specification put "v" into their DTs.
+                * CPU cores with the ratified spec will contain non-zero
+                * marchid.
+                */
+               if (acpi_disabled && riscv_cached_mvendorid(cpu) == THEAD_VENDOR_ID &&
+                   riscv_cached_marchid(cpu) == 0x0) {
+                       this_hwcap &= ~isa2hwcap[RISCV_ISA_EXT_v];
+                       clear_bit(RISCV_ISA_EXT_v, isainfo->isa);
+               }
+
                 /*
                  * All "okay" hart should have same isa. Set HWCAP based on
                  * common capabilities of every "okay" hart, in case they don't
@@ -950,7 +975,7 @@ arch_initcall(check_unaligned_access_all_cpus);
  void riscv_user_isa_enable(void)
  {
         if (riscv_cpu_has_extension_unlikely(smp_processor_id(), RISCV_ISA_EXT_ZICBOZ))
-               csr_set(CSR_SENVCFG, ENVCFG_CBZE);
+               csr_set(CSR_ENVCFG, ENVCFG_CBZE);
  }
  
  #ifdef CONFIG_RISCV_ALTERNATIVE
diff --git a/arch/riscv/kernel/paravirt.c b/arch/riscv/kernel/paravirt.c

index 8e114f5930cec6148b98f7f81abd72a798caf8e8..0d6225fd3194e14ed71ac9afc716b2e81168e9a5 100644 (file)
--- a/arch/riscv/kernel/paravirt.c
+++ b/arch/riscv/kernel/paravirt.c
@@ -41,7 +41,7 @@ static int __init parse_no_stealacc(char *arg)
  
  early_param("no-steal-acc", parse_no_stealacc);
  
-DEFINE_PER_CPU(struct sbi_sta_struct, steal_time) __aligned(64);
+static DEFINE_PER_CPU(struct sbi_sta_struct, steal_time) __aligned(64);
  
  static bool __init has_pv_steal_clock(void)
  {
@@ -91,8 +91,8 @@ static int pv_time_cpu_down_prepare(unsigned int cpu)
  static u64 pv_time_steal_clock(int cpu)
  {
         struct sbi_sta_struct *st = per_cpu_ptr(&steal_time, cpu);
-       u32 sequence;
-       u64 steal;
+       __le32 sequence;
+       __le64 steal;
  
         /*
          * Check the sequence field before and after reading the steal
diff --git a/arch/riscv/kernel/return_address.c b/arch/riscv/kernel/return_address.c

new file mode 100644 (file)

index 0000000..c8115ec
--- /dev/null
+++ b/arch/riscv/kernel/return_address.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * This code come from arch/arm64/kernel/return_address.c
+ *
+ * Copyright (C) 2023 SiFive.
+ */
+
+#include <linux/export.h>
+#include <linux/kprobes.h>
+#include <linux/stacktrace.h>
+
+struct return_address_data {
+       unsigned int level;
+       void *addr;
+};
+
+static bool save_return_addr(void *d, unsigned long pc)
+{
+       struct return_address_data *data = d;
+
+       if (!data->level) {
+               data->addr = (void *)pc;
+               return false;
+       }
+
+       --data->level;
+
+       return true;
+}
+NOKPROBE_SYMBOL(save_return_addr);
+
+noinline void *return_address(unsigned int level)
+{
+       struct return_address_data data;
+
+       data.level = level + 3;
+       data.addr = NULL;
+
+       arch_stack_walk(save_return_addr, &data, current, NULL);
+
+       if (!data.level)
+               return data.addr;
+       else
+               return NULL;
+
+}
+EXPORT_SYMBOL_GPL(return_address);
+NOKPROBE_SYMBOL(return_address);
diff --git a/arch/riscv/kernel/suspend.c b/arch/riscv/kernel/suspend.c

index 239509367e4233336806c19da964a06537d5a9b5..299795341e8a2207dc922373511e31118bbd0f8b 100644 (file)
--- a/arch/riscv/kernel/suspend.c
+++ b/arch/riscv/kernel/suspend.c
@@ -15,6 +15,8 @@
  void suspend_save_csrs(struct suspend_context *context)
  {
         context->scratch = csr_read(CSR_SCRATCH);
+       if (riscv_cpu_has_extension_unlikely(smp_processor_id(), RISCV_ISA_EXT_XLINUXENVCFG))
+               context->envcfg = csr_read(CSR_ENVCFG);
         context->tvec = csr_read(CSR_TVEC);
         context->ie = csr_read(CSR_IE);
  
@@ -36,6 +38,8 @@ void suspend_save_csrs(struct suspend_context *context)
  void suspend_restore_csrs(struct suspend_context *context)
  {
         csr_write(CSR_SCRATCH, context->scratch);
+       if (riscv_cpu_has_extension_unlikely(smp_processor_id(), RISCV_ISA_EXT_XLINUXENVCFG))
+               csr_write(CSR_ENVCFG, context->envcfg);
         csr_write(CSR_TVEC, context->tvec);
         csr_write(CSR_IE, context->ie);
  
diff --git a/arch/riscv/kvm/Kconfig b/arch/riscv/kvm/Kconfig

index d490db94385883eb3e74048c799cea3f6a07e8cf..26d1727f0550d3dbf8cf55ce9e1d3ca1e2d41fc4 100644 (file)
--- a/arch/riscv/kvm/Kconfig
+++ b/arch/riscv/kvm/Kconfig
@@ -24,6 +24,7 @@ config KVM
         select HAVE_KVM_IRQ_ROUTING
         select HAVE_KVM_MSI
         select HAVE_KVM_VCPU_ASYNC_IOCTL
+       select HAVE_KVM_READONLY_MEM
         select KVM_COMMON
         select KVM_GENERIC_DIRTYLOG_READ_PROTECT
         select KVM_GENERIC_HARDWARE_ENABLING
diff --git a/arch/riscv/kvm/vcpu_insn.c b/arch/riscv/kvm/vcpu_insn.c

index 7a6abed41bc170b9545662c3c9387b109ca2db3f..ee7215f4071f52186c4aa2295fbb385cae0f7a0f 100644 (file)
--- a/arch/riscv/kvm/vcpu_insn.c
+++ b/arch/riscv/kvm/vcpu_insn.c
@@ -7,6 +7,8 @@
  #include <linux/bitops.h>
  #include <linux/kvm_host.h>
  
+#include <asm/cpufeature.h>
+
  #define INSN_OPCODE_MASK       0x007c
  #define INSN_OPCODE_SHIFT      2
  #define INSN_OPCODE_SYSTEM     28
@@ -213,9 +215,20 @@ struct csr_func {
                     unsigned long wr_mask);
  };
  
+static int seed_csr_rmw(struct kvm_vcpu *vcpu, unsigned int csr_num,
+                       unsigned long *val, unsigned long new_val,
+                       unsigned long wr_mask)
+{
+       if (!riscv_isa_extension_available(vcpu->arch.isa, ZKR))
+               return KVM_INSN_ILLEGAL_TRAP;
+
+       return KVM_INSN_EXIT_TO_USER_SPACE;
+}
+
  static const struct csr_func csr_funcs[] = {
         KVM_RISCV_VCPU_AIA_CSR_FUNCS
         KVM_RISCV_VCPU_HPMCOUNTER_CSR_FUNCS
+       { .base = CSR_SEED, .count = 1, .func = seed_csr_rmw },
  };
  
  /**
diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c

index fc34557f5356e27902a2f83c27eb37f1237c9b95..f4a6124d25c939ecdf5dc631d8c7aa69a2684621 100644 (file)
--- a/arch/riscv/kvm/vcpu_onereg.c
+++ b/arch/riscv/kvm/vcpu_onereg.c
@@ -40,17 +40,46 @@ static const unsigned long kvm_isa_ext_arr[] = {
         KVM_ISA_EXT_ARR(SVINVAL),
         KVM_ISA_EXT_ARR(SVNAPOT),
         KVM_ISA_EXT_ARR(SVPBMT),
+       KVM_ISA_EXT_ARR(ZACAS),
         KVM_ISA_EXT_ARR(ZBA),
         KVM_ISA_EXT_ARR(ZBB),
+       KVM_ISA_EXT_ARR(ZBC),
+       KVM_ISA_EXT_ARR(ZBKB),
+       KVM_ISA_EXT_ARR(ZBKC),
+       KVM_ISA_EXT_ARR(ZBKX),
         KVM_ISA_EXT_ARR(ZBS),
+       KVM_ISA_EXT_ARR(ZFA),
+       KVM_ISA_EXT_ARR(ZFH),
+       KVM_ISA_EXT_ARR(ZFHMIN),
         KVM_ISA_EXT_ARR(ZICBOM),
         KVM_ISA_EXT_ARR(ZICBOZ),
         KVM_ISA_EXT_ARR(ZICNTR),
         KVM_ISA_EXT_ARR(ZICOND),
         KVM_ISA_EXT_ARR(ZICSR),
         KVM_ISA_EXT_ARR(ZIFENCEI),
+       KVM_ISA_EXT_ARR(ZIHINTNTL),
         KVM_ISA_EXT_ARR(ZIHINTPAUSE),
         KVM_ISA_EXT_ARR(ZIHPM),
+       KVM_ISA_EXT_ARR(ZKND),
+       KVM_ISA_EXT_ARR(ZKNE),
+       KVM_ISA_EXT_ARR(ZKNH),
+       KVM_ISA_EXT_ARR(ZKR),
+       KVM_ISA_EXT_ARR(ZKSED),
+       KVM_ISA_EXT_ARR(ZKSH),
+       KVM_ISA_EXT_ARR(ZKT),
+       KVM_ISA_EXT_ARR(ZTSO),
+       KVM_ISA_EXT_ARR(ZVBB),
+       KVM_ISA_EXT_ARR(ZVBC),
+       KVM_ISA_EXT_ARR(ZVFH),
+       KVM_ISA_EXT_ARR(ZVFHMIN),
+       KVM_ISA_EXT_ARR(ZVKB),
+       KVM_ISA_EXT_ARR(ZVKG),
+       KVM_ISA_EXT_ARR(ZVKNED),
+       KVM_ISA_EXT_ARR(ZVKNHA),
+       KVM_ISA_EXT_ARR(ZVKNHB),
+       KVM_ISA_EXT_ARR(ZVKSED),
+       KVM_ISA_EXT_ARR(ZVKSH),
+       KVM_ISA_EXT_ARR(ZVKT),
  };
  
  static unsigned long kvm_riscv_vcpu_base2isa_ext(unsigned long base_ext)
@@ -90,15 +119,44 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext)
         case KVM_RISCV_ISA_EXT_SSTC:
         case KVM_RISCV_ISA_EXT_SVINVAL:
         case KVM_RISCV_ISA_EXT_SVNAPOT:
+       case KVM_RISCV_ISA_EXT_ZACAS:
         case KVM_RISCV_ISA_EXT_ZBA:
         case KVM_RISCV_ISA_EXT_ZBB:
+       case KVM_RISCV_ISA_EXT_ZBC:
+       case KVM_RISCV_ISA_EXT_ZBKB:
+       case KVM_RISCV_ISA_EXT_ZBKC:
+       case KVM_RISCV_ISA_EXT_ZBKX:
         case KVM_RISCV_ISA_EXT_ZBS:
+       case KVM_RISCV_ISA_EXT_ZFA:
+       case KVM_RISCV_ISA_EXT_ZFH:
+       case KVM_RISCV_ISA_EXT_ZFHMIN:
         case KVM_RISCV_ISA_EXT_ZICNTR:
         case KVM_RISCV_ISA_EXT_ZICOND:
         case KVM_RISCV_ISA_EXT_ZICSR:
         case KVM_RISCV_ISA_EXT_ZIFENCEI:
+       case KVM_RISCV_ISA_EXT_ZIHINTNTL:
         case KVM_RISCV_ISA_EXT_ZIHINTPAUSE:
         case KVM_RISCV_ISA_EXT_ZIHPM:
+       case KVM_RISCV_ISA_EXT_ZKND:
+       case KVM_RISCV_ISA_EXT_ZKNE:
+       case KVM_RISCV_ISA_EXT_ZKNH:
+       case KVM_RISCV_ISA_EXT_ZKR:
+       case KVM_RISCV_ISA_EXT_ZKSED:
+       case KVM_RISCV_ISA_EXT_ZKSH:
+       case KVM_RISCV_ISA_EXT_ZKT:
+       case KVM_RISCV_ISA_EXT_ZTSO:
+       case KVM_RISCV_ISA_EXT_ZVBB:
+       case KVM_RISCV_ISA_EXT_ZVBC:
+       case KVM_RISCV_ISA_EXT_ZVFH:
+       case KVM_RISCV_ISA_EXT_ZVFHMIN:
+       case KVM_RISCV_ISA_EXT_ZVKB:
+       case KVM_RISCV_ISA_EXT_ZVKG:
+       case KVM_RISCV_ISA_EXT_ZVKNED:
+       case KVM_RISCV_ISA_EXT_ZVKNHA:
+       case KVM_RISCV_ISA_EXT_ZVKNHB:
+       case KVM_RISCV_ISA_EXT_ZVKSED:
+       case KVM_RISCV_ISA_EXT_ZVKSH:
+       case KVM_RISCV_ISA_EXT_ZVKT:
                 return false;
         /* Extensions which can be disabled using Smstateen */
         case KVM_RISCV_ISA_EXT_SSAIA:
diff --git a/arch/riscv/kvm/vcpu_sbi_sta.c b/arch/riscv/kvm/vcpu_sbi_sta.c

index 01f09fe8c3b020968be3f623097c9a48ab958087..d8cf9ca28c616e9d4073465a71dcc7479be3d35a 100644 (file)
--- a/arch/riscv/kvm/vcpu_sbi_sta.c
+++ b/arch/riscv/kvm/vcpu_sbi_sta.c
@@ -26,8 +26,12 @@ void kvm_riscv_vcpu_record_steal_time(struct kvm_vcpu *vcpu)
  {
         gpa_t shmem = vcpu->arch.sta.shmem;
         u64 last_steal = vcpu->arch.sta.last_steal;
-       u32 *sequence_ptr, sequence;
-       u64 *steal_ptr, steal;
+       __le32 __user *sequence_ptr;
+       __le64 __user *steal_ptr;
+       __le32 sequence_le;
+       __le64 steal_le;
+       u32 sequence;
+       u64 steal;
         unsigned long hva;
         gfn_t gfn;
  
@@ -47,22 +51,22 @@ void kvm_riscv_vcpu_record_steal_time(struct kvm_vcpu *vcpu)
                 return;
         }
  
-       sequence_ptr = (u32 *)(hva + offset_in_page(shmem) +
+       sequence_ptr = (__le32 __user *)(hva + offset_in_page(shmem) +
                                offsetof(struct sbi_sta_struct, sequence));
-       steal_ptr = (u64 *)(hva + offset_in_page(shmem) +
+       steal_ptr = (__le64 __user *)(hva + offset_in_page(shmem) +
                             offsetof(struct sbi_sta_struct, steal));
  
-       if (WARN_ON(get_user(sequence, sequence_ptr)))
+       if (WARN_ON(get_user(sequence_le, sequence_ptr)))
                 return;
  
-       sequence = le32_to_cpu(sequence);
+       sequence = le32_to_cpu(sequence_le);
         sequence += 1;
  
         if (WARN_ON(put_user(cpu_to_le32(sequence), sequence_ptr)))
                 return;
  
-       if (!WARN_ON(get_user(steal, steal_ptr))) {
-               steal = le64_to_cpu(steal);
+       if (!WARN_ON(get_user(steal_le, steal_ptr))) {
+               steal = le64_to_cpu(steal_le);
                 vcpu->arch.sta.last_steal = READ_ONCE(current->sched_info.run_delay);
                 steal += vcpu->arch.sta.last_steal - last_steal;
                 WARN_ON(put_user(cpu_to_le64(steal), steal_ptr));
diff --git a/arch/riscv/lib/csum.c b/arch/riscv/lib/csum.c

index af3df5274ccbae0118488080040f45881a3e025a..74af3ab520b6d433836930937dd90ffa2e672339 100644 (file)
--- a/arch/riscv/lib/csum.c
+++ b/arch/riscv/lib/csum.c
@@ -53,7 +53,7 @@ __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
                  * support, so nop when Zbb is available and jump when Zbb is
                  * not available.
                  */
-               asm_volatile_goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0,
+               asm goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0,
                                               RISCV_ISA_EXT_ZBB, 1)
                                   :
                                   :
@@ -170,7 +170,7 @@ do_csum_with_alignment(const unsigned char *buff, int len)
                  * support, so nop when Zbb is available and jump when Zbb is
                  * not available.
                  */
-               asm_volatile_goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0,
+               asm goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0,
                                               RISCV_ISA_EXT_ZBB, 1)
                                   :
                                   :
@@ -178,7 +178,7 @@ do_csum_with_alignment(const unsigned char *buff, int len)
                                   : no_zbb);
  
  #ifdef CONFIG_32BIT
-               asm_volatile_goto(".option push                 \n\
+               asm_goto_output(".option push                   \n\
                 .option arch,+zbb                               \n\
                         rori    %[fold_temp], %[csum], 16       \n\
                         andi    %[offset], %[offset], 1         \n\
@@ -193,7 +193,7 @@ do_csum_with_alignment(const unsigned char *buff, int len)
  
                 return (unsigned short)csum;
  #else /* !CONFIG_32BIT */
-               asm_volatile_goto(".option push                 \n\
+               asm_goto_output(".option push                   \n\
                 .option arch,+zbb                               \n\
                         rori    %[fold_temp], %[csum], 32       \n\
                         add     %[csum], %[fold_temp], %[csum]  \n\
@@ -257,7 +257,7 @@ do_csum_no_alignment(const unsigned char *buff, int len)
                  * support, so nop when Zbb is available and jump when Zbb is
                  * not available.
                  */
-               asm_volatile_goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0,
+               asm goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0,
                                               RISCV_ISA_EXT_ZBB, 1)
                                   :
                                   :
diff --git a/arch/riscv/mm/hugetlbpage.c b/arch/riscv/mm/hugetlbpage.c

index 431596c0e20e04e1ad5a2422fa989f27cdfdc7c4..5ef2a6891158a6d59de8f36b4f4d98cf3ad6eb2a 100644 (file)
--- a/arch/riscv/mm/hugetlbpage.c
+++ b/arch/riscv/mm/hugetlbpage.c
@@ -125,6 +125,26 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
         return pte;
  }
  
+unsigned long hugetlb_mask_last_page(struct hstate *h)
+{
+       unsigned long hp_size = huge_page_size(h);
+
+       switch (hp_size) {
+#ifndef __PAGETABLE_PMD_FOLDED
+       case PUD_SIZE:
+               return P4D_SIZE - PUD_SIZE;
+#endif
+       case PMD_SIZE:
+               return PUD_SIZE - PMD_SIZE;
+       case napot_cont_size(NAPOT_CONT64KB_ORDER):
+               return PMD_SIZE - napot_cont_size(NAPOT_CONT64KB_ORDER);
+       default:
+               break;
+       }
+
+       return 0UL;
+}
+
  static pte_t get_clear_contig(struct mm_struct *mm,
                               unsigned long addr,
                               pte_t *ptep,
@@ -177,13 +197,36 @@ pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags)
         return entry;
  }
  
+static void clear_flush(struct mm_struct *mm,
+                       unsigned long addr,
+                       pte_t *ptep,
+                       unsigned long pgsize,
+                       unsigned long ncontig)
+{
+       struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0);
+       unsigned long i, saddr = addr;
+
+       for (i = 0; i < ncontig; i++, addr += pgsize, ptep++)
+               ptep_get_and_clear(mm, addr, ptep);
+
+       flush_tlb_range(&vma, saddr, addr);
+}
+
+/*
+ * When dealing with NAPOT mappings, the privileged specification indicates that
+ * "if an update needs to be made, the OS generally should first mark all of the
+ * PTEs invalid, then issue SFENCE.VMA instruction(s) covering all 4 KiB regions
+ * within the range, [...] then update the PTE(s), as described in Section
+ * 4.2.1.". That's the equivalent of the Break-Before-Make approach used by
+ * arm64.
+ */
  void set_huge_pte_at(struct mm_struct *mm,
                      unsigned long addr,
                      pte_t *ptep,
                      pte_t pte,
                      unsigned long sz)
  {
-       unsigned long hugepage_shift;
+       unsigned long hugepage_shift, pgsize;
         int i, pte_num;
  
         if (sz >= PGDIR_SIZE)
@@ -198,7 +241,22 @@ void set_huge_pte_at(struct mm_struct *mm,
                 hugepage_shift = PAGE_SHIFT;
  
         pte_num = sz >> hugepage_shift;
-       for (i = 0; i < pte_num; i++, ptep++, addr += (1 << hugepage_shift))
+       pgsize = 1 << hugepage_shift;
+
+       if (!pte_present(pte)) {
+               for (i = 0; i < pte_num; i++, ptep++, addr += pgsize)
+                       set_ptes(mm, addr, ptep, pte, 1);
+               return;
+       }
+
+       if (!pte_napot(pte)) {
+               set_ptes(mm, addr, ptep, pte, 1);
+               return;
+       }
+
+       clear_flush(mm, addr, ptep, pgsize, pte_num);
+
+       for (i = 0; i < pte_num; i++, ptep++, addr += pgsize)
                 set_pte_at(mm, addr, ptep, pte);
  }
  
@@ -306,7 +364,7 @@ void huge_pte_clear(struct mm_struct *mm,
                 pte_clear(mm, addr, ptep);
  }
  
-static __init bool is_napot_size(unsigned long size)
+static bool is_napot_size(unsigned long size)
  {
         unsigned long order;
  
@@ -334,7 +392,7 @@ arch_initcall(napot_hugetlbpages_init);
  
  #else
  
-static __init bool is_napot_size(unsigned long size)
+static bool is_napot_size(unsigned long size)
  {
         return false;
  }
@@ -351,7 +409,7 @@ int pmd_huge(pmd_t pmd)
         return pmd_leaf(pmd);
  }
  
-bool __init arch_hugetlb_valid_size(unsigned long size)
+static bool __hugetlb_valid_size(unsigned long size)
  {
         if (size == HPAGE_SIZE)
                 return true;
@@ -363,6 +421,18 @@ bool __init arch_hugetlb_valid_size(unsigned long size)
                 return false;
  }
  
+bool __init arch_hugetlb_valid_size(unsigned long size)
+{
+       return __hugetlb_valid_size(size);
+}
+
+#ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION
+bool arch_hugetlb_migration_supported(struct hstate *h)
+{
+       return __hugetlb_valid_size(huge_page_size(h));
+}
+#endif
+
  #ifdef CONFIG_CONTIG_ALLOC
  static __init int gigantic_pages_init(void)
  {
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c

index 32cad6a65ccd23431d63097a0906ca5b8de485f8..fa34cf55037bd37ad0b8d3bb3b67f6f91d243f58 100644 (file)
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -1385,6 +1385,10 @@ void __init misc_mem_init(void)
         early_memtest(min_low_pfn << PAGE_SHIFT, max_low_pfn << PAGE_SHIFT);
         arch_numa_init();
         sparse_init();
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+       /* The entire VMEMMAP region has been populated. Flush TLB for this region */
+       local_flush_tlb_kernel_range(VMEMMAP_START, VMEMMAP_END);
+#endif
         zone_sizes_init();
         arch_reserve_crashkernel();
         memblock_dump_all();
diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c

index 8d12b26f5ac37b659687981c2046f3d5b590753c..893566e004b73fcf9a8dbc94f766e59cd00f1bb1 100644 (file)
--- a/arch/riscv/mm/tlbflush.c
+++ b/arch/riscv/mm/tlbflush.c
@@ -66,9 +66,10 @@ static inline void local_flush_tlb_range_asid(unsigned long start,
                 local_flush_tlb_range_threshold_asid(start, size, stride, asid);
  }
  
+/* Flush a range of kernel pages without broadcasting */
  void local_flush_tlb_kernel_range(unsigned long start, unsigned long end)
  {
-       local_flush_tlb_range_asid(start, end, PAGE_SIZE, FLUSH_TLB_NO_ASID);
+       local_flush_tlb_range_asid(start, end - start, PAGE_SIZE, FLUSH_TLB_NO_ASID);
  }
  
  static void __ipi_flush_tlb_all(void *info)
@@ -233,4 +234,5 @@ void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
  {
         __flush_tlb_range(&batch->cpumask, FLUSH_TLB_NO_ASID, 0,
                           FLUSH_TLB_MAX_SIZE, PAGE_SIZE);
+       cpumask_clear(&batch->cpumask);
  }
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig

index fe565f3a3a917d0da83dbd8329a503910fa41948..a6409a312747daf7480481239ab139d2ee81339e 100644 (file)
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -193,7 +193,6 @@ config S390
         select HAVE_KPROBES
         select HAVE_KPROBES_ON_FTRACE
         select HAVE_KRETPROBES
-       select HAVE_KVM
         select HAVE_LIVEPATCH
         select HAVE_MEMBLOCK_PHYS_MAP
         select HAVE_MOD_ARCH_SPECIFIC
diff --git a/arch/s390/configs/compat.config b/arch/s390/configs/compat.config

new file mode 100644 (file)

index 0000000..6fd0514
--- /dev/null
+++ b/arch/s390/configs/compat.config
@@ -0,0 +1,3 @@
+# Help: Enable compat support
+CONFIG_COMPAT=y
+CONFIG_COMPAT_32BIT_TIME=y
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig

index cae2dd34fbb49d16ee020e72fb669010dca832f8..c924be0d7ed873b2ab9b82a7ab789598f497b016 100644 (file)
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -118,7 +118,6 @@ CONFIG_UNIX=y
  CONFIG_UNIX_DIAG=m
  CONFIG_XFRM_USER=m
  CONFIG_NET_KEY=m
-CONFIG_SMC=m
  CONFIG_SMC_DIAG=m
  CONFIG_INET=y
  CONFIG_IP_MULTICAST=y
@@ -374,6 +373,7 @@ CONFIG_NET_ACT_POLICE=m
  CONFIG_NET_ACT_GACT=m
  CONFIG_GACT_PROB=y
  CONFIG_NET_ACT_MIRRED=m
+CONFIG_NET_ACT_IPT=m
  CONFIG_NET_ACT_NAT=m
  CONFIG_NET_ACT_PEDIT=m
  CONFIG_NET_ACT_SIMP=m
@@ -436,9 +436,6 @@ CONFIG_SCSI_DH_ALUA=m
  CONFIG_MD=y
  CONFIG_BLK_DEV_MD=y
  # CONFIG_MD_BITMAP_FILE is not set
-CONFIG_MD_LINEAR=m
-CONFIG_MD_MULTIPATH=m
-CONFIG_MD_FAULTY=m
  CONFIG_MD_CLUSTER=m
  CONFIG_BCACHE=m
  CONFIG_BLK_DEV_DM=y
@@ -637,7 +634,6 @@ CONFIG_FUSE_FS=y
  CONFIG_CUSE=m
  CONFIG_VIRTIO_FS=m
  CONFIG_OVERLAY_FS=m
-CONFIG_NETFS_SUPPORT=m
  CONFIG_NETFS_STATS=y
  CONFIG_FSCACHE=y
  CONFIG_CACHEFILES=m
@@ -709,7 +705,6 @@ CONFIG_IMA_DEFAULT_HASH_SHA256=y
  CONFIG_IMA_WRITE_POLICY=y
  CONFIG_IMA_APPRAISE=y
  CONFIG_LSM="yama,loadpin,safesetid,integrity,selinux,smack,tomoyo,apparmor"
-CONFIG_INIT_STACK_NONE=y
  CONFIG_BUG_ON_DATA_CORRUPTION=y
  CONFIG_CRYPTO_USER=m
  # CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set
@@ -739,7 +734,6 @@ CONFIG_CRYPTO_TEA=m
  CONFIG_CRYPTO_TWOFISH=m
  CONFIG_CRYPTO_ADIANTUM=m
  CONFIG_CRYPTO_ARC4=m
-CONFIG_CRYPTO_CFB=m
  CONFIG_CRYPTO_HCTR2=m
  CONFIG_CRYPTO_KEYWRAP=m
  CONFIG_CRYPTO_LRW=m
diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig

index 42b988873e5443df15b054d78610697fdf769293..c8f0c9fe40d708e9b082df3ac0fd5fb901883584 100644 (file)
--- a/arch/s390/configs/defconfig
+++ b/arch/s390/configs/defconfig
@@ -109,7 +109,6 @@ CONFIG_UNIX=y
  CONFIG_UNIX_DIAG=m
  CONFIG_XFRM_USER=m
  CONFIG_NET_KEY=m
-CONFIG_SMC=m
  CONFIG_SMC_DIAG=m
  CONFIG_INET=y
  CONFIG_IP_MULTICAST=y
@@ -364,6 +363,7 @@ CONFIG_NET_ACT_POLICE=m
  CONFIG_NET_ACT_GACT=m
  CONFIG_GACT_PROB=y
  CONFIG_NET_ACT_MIRRED=m
+CONFIG_NET_ACT_IPT=m
  CONFIG_NET_ACT_NAT=m
  CONFIG_NET_ACT_PEDIT=m
  CONFIG_NET_ACT_SIMP=m
@@ -426,9 +426,6 @@ CONFIG_SCSI_DH_ALUA=m
  CONFIG_MD=y
  CONFIG_BLK_DEV_MD=y
  # CONFIG_MD_BITMAP_FILE is not set
-CONFIG_MD_LINEAR=m
-CONFIG_MD_MULTIPATH=m
-CONFIG_MD_FAULTY=m
  CONFIG_MD_CLUSTER=m
  CONFIG_BCACHE=m
  CONFIG_BLK_DEV_DM=y
@@ -622,7 +619,6 @@ CONFIG_FUSE_FS=y
  CONFIG_CUSE=m
  CONFIG_VIRTIO_FS=m
  CONFIG_OVERLAY_FS=m
-CONFIG_NETFS_SUPPORT=m
  CONFIG_NETFS_STATS=y
  CONFIG_FSCACHE=y
  CONFIG_CACHEFILES=m
@@ -693,7 +689,6 @@ CONFIG_IMA_DEFAULT_HASH_SHA256=y
  CONFIG_IMA_WRITE_POLICY=y
  CONFIG_IMA_APPRAISE=y
  CONFIG_LSM="yama,loadpin,safesetid,integrity,selinux,smack,tomoyo,apparmor"
-CONFIG_INIT_STACK_NONE=y
  CONFIG_BUG_ON_DATA_CORRUPTION=y
  CONFIG_CRYPTO_FIPS=y
  CONFIG_CRYPTO_USER=m
@@ -724,11 +719,9 @@ CONFIG_CRYPTO_TEA=m
  CONFIG_CRYPTO_TWOFISH=m
  CONFIG_CRYPTO_ADIANTUM=m
  CONFIG_CRYPTO_ARC4=m
-CONFIG_CRYPTO_CFB=m
  CONFIG_CRYPTO_HCTR2=m
  CONFIG_CRYPTO_KEYWRAP=m
  CONFIG_CRYPTO_LRW=m
-CONFIG_CRYPTO_OFB=m
  CONFIG_CRYPTO_PCBC=m
  CONFIG_CRYPTO_AEGIS128=m
  CONFIG_CRYPTO_CHACHA20POLY1305=m
diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig

index 30d2a16876650e9c3ea32997f771131e6372e2fc..c51f3ec4eb28ab189b7d27d12ca28b98261178e2 100644 (file)
--- a/arch/s390/configs/zfcpdump_defconfig
+++ b/arch/s390/configs/zfcpdump_defconfig
@@ -8,6 +8,7 @@ CONFIG_BPF_SYSCALL=y
  # CONFIG_NET_NS is not set
  CONFIG_BLK_DEV_INITRD=y
  CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+CONFIG_KEXEC=y
  CONFIG_CRASH_DUMP=y
  CONFIG_MARCH_Z13=y
  CONFIG_NR_CPUS=2
@@ -64,7 +65,6 @@ CONFIG_ZFCP=y
  # CONFIG_MISC_FILESYSTEMS is not set
  # CONFIG_NETWORK_FILESYSTEMS is not set
  CONFIG_LSM="yama,loadpin,safesetid,integrity"
-CONFIG_INIT_STACK_NONE=y
  # CONFIG_ZLIB_DFLTCC is not set
  CONFIG_XZ_DEC_MICROLZMA=y
  CONFIG_PRINTK_TIME=y
diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h

index 895f774bbcc55353cc7a3d302b009796a799446f..bf78cf381dfcdac92a170b754328acd16846eb2e 100644 (file)
--- a/arch/s390/include/asm/jump_label.h
+++ b/arch/s390/include/asm/jump_label.h
@@ -25,7 +25,7 @@
   */
  static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
  {
-       asm_volatile_goto("0:   brcl 0,%l[label]\n"
+       asm goto("0:    brcl 0,%l[label]\n"
                           ".pushsection __jump_table,\"aw\"\n"
                           ".balign      8\n"
                           ".long        0b-.,%l[label]-.\n"
@@ -39,7 +39,7 @@ label:
  
  static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
  {
-       asm_volatile_goto("0:   brcl 15,%l[label]\n"
+       asm goto("0:    brcl 15,%l[label]\n"
                           ".pushsection __jump_table,\"aw\"\n"
                           ".balign      8\n"
                           ".long        0b-.,%l[label]-.\n"
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h

index abe926d43cbe0a06342f8c53f202cdd707ea1693..05eaf6db3ad4cba4269b1ce36563096eab236b1d 100644 (file)
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -12,7 +12,320 @@
  #include <linux/types.h>
  
  #define __KVM_S390
-#define __KVM_HAVE_GUEST_DEBUG
+
+struct kvm_s390_skeys {
+       __u64 start_gfn;
+       __u64 count;
+       __u64 skeydata_addr;
+       __u32 flags;
+       __u32 reserved[9];
+};
+
+#define KVM_S390_CMMA_PEEK (1 << 0)
+
+/**
+ * kvm_s390_cmma_log - Used for CMMA migration.
+ *
+ * Used both for input and output.
+ *
+ * @start_gfn: Guest page number to start from.
+ * @count: Size of the result buffer.
+ * @flags: Control operation mode via KVM_S390_CMMA_* flags
+ * @remaining: Used with KVM_S390_GET_CMMA_BITS. Indicates how many dirty
+ *             pages are still remaining.
+ * @mask: Used with KVM_S390_SET_CMMA_BITS. Bitmap of bits to actually set
+ *        in the PGSTE.
+ * @values: Pointer to the values buffer.
+ *
+ * Used in KVM_S390_{G,S}ET_CMMA_BITS ioctls.
+ */
+struct kvm_s390_cmma_log {
+       __u64 start_gfn;
+       __u32 count;
+       __u32 flags;
+       union {
+               __u64 remaining;
+               __u64 mask;
+       };
+       __u64 values;
+};
+
+#define KVM_S390_RESET_POR       1
+#define KVM_S390_RESET_CLEAR     2
+#define KVM_S390_RESET_SUBSYSTEM 4
+#define KVM_S390_RESET_CPU_INIT  8
+#define KVM_S390_RESET_IPL       16
+
+/* for KVM_S390_MEM_OP */
+struct kvm_s390_mem_op {
+       /* in */
+       __u64 gaddr;            /* the guest address */
+       __u64 flags;            /* flags */
+       __u32 size;             /* amount of bytes */
+       __u32 op;               /* type of operation */
+       __u64 buf;              /* buffer in userspace */
+       union {
+               struct {
+                       __u8 ar;        /* the access register number */
+                       __u8 key;       /* access key, ignored if flag unset */
+                       __u8 pad1[6];   /* ignored */
+                       __u64 old_addr; /* ignored if cmpxchg flag unset */
+               };
+               __u32 sida_offset; /* offset into the sida */
+               __u8 reserved[32]; /* ignored */
+       };
+};
+/* types for kvm_s390_mem_op->op */
+#define KVM_S390_MEMOP_LOGICAL_READ    0
+#define KVM_S390_MEMOP_LOGICAL_WRITE   1
+#define KVM_S390_MEMOP_SIDA_READ       2
+#define KVM_S390_MEMOP_SIDA_WRITE      3
+#define KVM_S390_MEMOP_ABSOLUTE_READ   4
+#define KVM_S390_MEMOP_ABSOLUTE_WRITE  5
+#define KVM_S390_MEMOP_ABSOLUTE_CMPXCHG        6
+
+/* flags for kvm_s390_mem_op->flags */
+#define KVM_S390_MEMOP_F_CHECK_ONLY            (1ULL << 0)
+#define KVM_S390_MEMOP_F_INJECT_EXCEPTION      (1ULL << 1)
+#define KVM_S390_MEMOP_F_SKEY_PROTECTION       (1ULL << 2)
+
+/* flags specifying extension support via KVM_CAP_S390_MEM_OP_EXTENSION */
+#define KVM_S390_MEMOP_EXTENSION_CAP_BASE      (1 << 0)
+#define KVM_S390_MEMOP_EXTENSION_CAP_CMPXCHG   (1 << 1)
+
+struct kvm_s390_psw {
+       __u64 mask;
+       __u64 addr;
+};
+
+/* valid values for type in kvm_s390_interrupt */
+#define KVM_S390_SIGP_STOP             0xfffe0000u
+#define KVM_S390_PROGRAM_INT           0xfffe0001u
+#define KVM_S390_SIGP_SET_PREFIX       0xfffe0002u
+#define KVM_S390_RESTART               0xfffe0003u
+#define KVM_S390_INT_PFAULT_INIT       0xfffe0004u
+#define KVM_S390_INT_PFAULT_DONE       0xfffe0005u
+#define KVM_S390_MCHK                  0xfffe1000u
+#define KVM_S390_INT_CLOCK_COMP                0xffff1004u
+#define KVM_S390_INT_CPU_TIMER         0xffff1005u
+#define KVM_S390_INT_VIRTIO            0xffff2603u
+#define KVM_S390_INT_SERVICE           0xffff2401u
+#define KVM_S390_INT_EMERGENCY         0xffff1201u
+#define KVM_S390_INT_EXTERNAL_CALL     0xffff1202u
+/* Anything below 0xfffe0000u is taken by INT_IO */
+#define KVM_S390_INT_IO(ai,cssid,ssid,schid)   \
+       (((schid)) |                           \
+        ((ssid) << 16) |                      \
+        ((cssid) << 18) |                     \
+        ((ai) << 26))
+#define KVM_S390_INT_IO_MIN            0x00000000u
+#define KVM_S390_INT_IO_MAX            0xfffdffffu
+#define KVM_S390_INT_IO_AI_MASK                0x04000000u
+
+
+struct kvm_s390_interrupt {
+       __u32 type;
+       __u32 parm;
+       __u64 parm64;
+};
+
+struct kvm_s390_io_info {
+       __u16 subchannel_id;
+       __u16 subchannel_nr;
+       __u32 io_int_parm;
+       __u32 io_int_word;
+};
+
+struct kvm_s390_ext_info {
+       __u32 ext_params;
+       __u32 pad;
+       __u64 ext_params2;
+};
+
+struct kvm_s390_pgm_info {
+       __u64 trans_exc_code;
+       __u64 mon_code;
+       __u64 per_address;
+       __u32 data_exc_code;
+       __u16 code;
+       __u16 mon_class_nr;
+       __u8 per_code;
+       __u8 per_atmid;
+       __u8 exc_access_id;
+       __u8 per_access_id;
+       __u8 op_access_id;
+#define KVM_S390_PGM_FLAGS_ILC_VALID   0x01
+#define KVM_S390_PGM_FLAGS_ILC_0       0x02
+#define KVM_S390_PGM_FLAGS_ILC_1       0x04
+#define KVM_S390_PGM_FLAGS_ILC_MASK    0x06
+#define KVM_S390_PGM_FLAGS_NO_REWIND   0x08
+       __u8 flags;
+       __u8 pad[2];
+};
+
+struct kvm_s390_prefix_info {
+       __u32 address;
+};
+
+struct kvm_s390_extcall_info {
+       __u16 code;
+};
+
+struct kvm_s390_emerg_info {
+       __u16 code;
+};
+
+#define KVM_S390_STOP_FLAG_STORE_STATUS        0x01
+struct kvm_s390_stop_info {
+       __u32 flags;
+};
+
+struct kvm_s390_mchk_info {
+       __u64 cr14;
+       __u64 mcic;
+       __u64 failing_storage_address;
+       __u32 ext_damage_code;
+       __u32 pad;
+       __u8 fixed_logout[16];
+};
+
+struct kvm_s390_irq {
+       __u64 type;
+       union {
+               struct kvm_s390_io_info io;
+               struct kvm_s390_ext_info ext;
+               struct kvm_s390_pgm_info pgm;
+               struct kvm_s390_emerg_info emerg;
+               struct kvm_s390_extcall_info extcall;
+               struct kvm_s390_prefix_info prefix;
+               struct kvm_s390_stop_info stop;
+               struct kvm_s390_mchk_info mchk;
+               char reserved[64];
+       } u;
+};
+
+struct kvm_s390_irq_state {
+       __u64 buf;
+       __u32 flags;        /* will stay unused for compatibility reasons */
+       __u32 len;
+       __u32 reserved[4];  /* will stay unused for compatibility reasons */
+};
+
+struct kvm_s390_ucas_mapping {
+       __u64 user_addr;
+       __u64 vcpu_addr;
+       __u64 length;
+};
+
+struct kvm_s390_pv_sec_parm {
+       __u64 origin;
+       __u64 length;
+};
+
+struct kvm_s390_pv_unp {
+       __u64 addr;
+       __u64 size;
+       __u64 tweak;
+};
+
+enum pv_cmd_dmp_id {
+       KVM_PV_DUMP_INIT,
+       KVM_PV_DUMP_CONFIG_STOR_STATE,
+       KVM_PV_DUMP_COMPLETE,
+       KVM_PV_DUMP_CPU,
+};
+
+struct kvm_s390_pv_dmp {
+       __u64 subcmd;
+       __u64 buff_addr;
+       __u64 buff_len;
+       __u64 gaddr;            /* For dump storage state */
+       __u64 reserved[4];
+};
+
+enum pv_cmd_info_id {
+       KVM_PV_INFO_VM,
+       KVM_PV_INFO_DUMP,
+};
+
+struct kvm_s390_pv_info_dump {
+       __u64 dump_cpu_buffer_len;
+       __u64 dump_config_mem_buffer_per_1m;
+       __u64 dump_config_finalize_len;
+};
+
+struct kvm_s390_pv_info_vm {
+       __u64 inst_calls_list[4];
+       __u64 max_cpus;
+       __u64 max_guests;
+       __u64 max_guest_addr;
+       __u64 feature_indication;
+};
+
+struct kvm_s390_pv_info_header {
+       __u32 id;
+       __u32 len_max;
+       __u32 len_written;
+       __u32 reserved;
+};
+
+struct kvm_s390_pv_info {
+       struct kvm_s390_pv_info_header header;
+       union {
+               struct kvm_s390_pv_info_dump dump;
+               struct kvm_s390_pv_info_vm vm;
+       };
+};
+
+enum pv_cmd_id {
+       KVM_PV_ENABLE,
+       KVM_PV_DISABLE,
+       KVM_PV_SET_SEC_PARMS,
+       KVM_PV_UNPACK,
+       KVM_PV_VERIFY,
+       KVM_PV_PREP_RESET,
+       KVM_PV_UNSHARE_ALL,
+       KVM_PV_INFO,
+       KVM_PV_DUMP,
+       KVM_PV_ASYNC_CLEANUP_PREPARE,
+       KVM_PV_ASYNC_CLEANUP_PERFORM,
+};
+
+struct kvm_pv_cmd {
+       __u32 cmd;      /* Command to be executed */
+       __u16 rc;       /* Ultravisor return code */
+       __u16 rrc;      /* Ultravisor return reason code */
+       __u64 data;     /* Data or address */
+       __u32 flags;    /* flags for future extensions. Must be 0 for now */
+       __u32 reserved[3];
+};
+
+struct kvm_s390_zpci_op {
+       /* in */
+       __u32 fh;               /* target device */
+       __u8  op;               /* operation to perform */
+       __u8  pad[3];
+       union {
+               /* for KVM_S390_ZPCIOP_REG_AEN */
+               struct {
+                       __u64 ibv;      /* Guest addr of interrupt bit vector */
+                       __u64 sb;       /* Guest addr of summary bit */
+                       __u32 flags;
+                       __u32 noi;      /* Number of interrupts */
+                       __u8 isc;       /* Guest interrupt subclass */
+                       __u8 sbo;       /* Offset of guest summary bit vector */
+                       __u16 pad;
+               } reg_aen;
+               __u64 reserved[8];
+       } u;
+};
+
+/* types for kvm_s390_zpci_op->op */
+#define KVM_S390_ZPCIOP_REG_AEN                0
+#define KVM_S390_ZPCIOP_DEREG_AEN      1
+
+/* flags for kvm_s390_zpci_op->u.reg_aen.flags */
+#define KVM_S390_ZPCIOP_REGAEN_HOST    (1 << 0)
  
  /* Device control API: s390-specific devices */
  #define KVM_DEV_FLIC_GET_ALL_IRQS      1
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig

index 72e9b7dcdf7d977a14a1ea7f9d39a06c36e4b97f..cae908d645501ef7eb4edbe87b8431f6499370a4 100644 (file)
--- a/arch/s390/kvm/Kconfig
+++ b/arch/s390/kvm/Kconfig
@@ -19,7 +19,6 @@ if VIRTUALIZATION
  config KVM
         def_tristate y
         prompt "Kernel-based Virtual Machine (KVM) support"
-       depends on HAVE_KVM
         select HAVE_KVM_CPU_RELAX_INTERCEPT
         select HAVE_KVM_VCPU_ASYNC_IOCTL
         select KVM_ASYNC_PF
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c

index 3c65b8258ae67ad4b28589494e663034e69dffdd..2a32438e09cebaa698a8935c4aec03bf1f2cbc58 100644 (file)
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -102,7 +102,7 @@ static int __diag_page_ref_service(struct kvm_vcpu *vcpu)
                     parm.token_addr & 7 || parm.zarch != 0x8000000000000000ULL)
                         return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
  
-               if (kvm_is_error_gpa(vcpu->kvm, parm.token_addr))
+               if (!kvm_is_gpa_in_memslot(vcpu->kvm, parm.token_addr))
                         return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
  
                 vcpu->arch.pfault_token = parm.token_addr;
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c

index 5bfcc50c1a682893b9add045508fcbb3d4526338..415c99649e43ebb11e452b63ce78efb4f0644b7c 100644 (file)
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -664,7 +664,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
         case ASCE_TYPE_REGION1: {
                 union region1_table_entry rfte;
  
-               if (kvm_is_error_gpa(vcpu->kvm, ptr))
+               if (!kvm_is_gpa_in_memslot(vcpu->kvm, ptr))
                         return PGM_ADDRESSING;
                 if (deref_table(vcpu->kvm, ptr, &rfte.val))
                         return -EFAULT;
@@ -682,7 +682,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
         case ASCE_TYPE_REGION2: {
                 union region2_table_entry rste;
  
-               if (kvm_is_error_gpa(vcpu->kvm, ptr))
+               if (!kvm_is_gpa_in_memslot(vcpu->kvm, ptr))
                         return PGM_ADDRESSING;
                 if (deref_table(vcpu->kvm, ptr, &rste.val))
                         return -EFAULT;
@@ -700,7 +700,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
         case ASCE_TYPE_REGION3: {
                 union region3_table_entry rtte;
  
-               if (kvm_is_error_gpa(vcpu->kvm, ptr))
+               if (!kvm_is_gpa_in_memslot(vcpu->kvm, ptr))
                         return PGM_ADDRESSING;
                 if (deref_table(vcpu->kvm, ptr, &rtte.val))
                         return -EFAULT;
@@ -728,7 +728,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
         case ASCE_TYPE_SEGMENT: {
                 union segment_table_entry ste;
  
-               if (kvm_is_error_gpa(vcpu->kvm, ptr))
+               if (!kvm_is_gpa_in_memslot(vcpu->kvm, ptr))
                         return PGM_ADDRESSING;
                 if (deref_table(vcpu->kvm, ptr, &ste.val))
                         return -EFAULT;
@@ -748,7 +748,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
                 ptr = ste.fc0.pto * (PAGE_SIZE / 2) + vaddr.px * 8;
         }
         }
-       if (kvm_is_error_gpa(vcpu->kvm, ptr))
+       if (!kvm_is_gpa_in_memslot(vcpu->kvm, ptr))
                 return PGM_ADDRESSING;
         if (deref_table(vcpu->kvm, ptr, &pte.val))
                 return -EFAULT;
@@ -770,7 +770,7 @@ absolute_address:
                 *prot = PROT_TYPE_IEP;
                 return PGM_PROTECTION;
         }
-       if (kvm_is_error_gpa(vcpu->kvm, raddr.addr))
+       if (!kvm_is_gpa_in_memslot(vcpu->kvm, raddr.addr))
                 return PGM_ADDRESSING;
         *gpa = raddr.addr;
         return 0;
@@ -957,7 +957,7 @@ static int guest_range_to_gpas(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
                                 return rc;
                 } else {
                         gpa = kvm_s390_real_to_abs(vcpu, ga);
-                       if (kvm_is_error_gpa(vcpu->kvm, gpa)) {
+                       if (!kvm_is_gpa_in_memslot(vcpu->kvm, gpa)) {
                                 rc = PGM_ADDRESSING;
                                 prot = PROT_NONE;
                         }
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c

index 6635a7acef34de0c3042001eb538bc6f5bc2c30c..15d589502f5152abb5eb30d2251ba8c1646e3912 100644 (file)
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -2878,7 +2878,7 @@ static int kvm_s390_vm_mem_op_abs(struct kvm *kvm, struct kvm_s390_mem_op *mop)
  
         srcu_idx = srcu_read_lock(&kvm->srcu);
  
-       if (kvm_is_error_gpa(kvm, mop->gaddr)) {
+       if (!kvm_is_gpa_in_memslot(kvm, mop->gaddr)) {
                 r = PGM_ADDRESSING;
                 goto out_unlock;
         }
@@ -2940,7 +2940,7 @@ static int kvm_s390_vm_mem_op_cmpxchg(struct kvm *kvm, struct kvm_s390_mem_op *m
  
         srcu_idx = srcu_read_lock(&kvm->srcu);
  
-       if (kvm_is_error_gpa(kvm, mop->gaddr)) {
+       if (!kvm_is_gpa_in_memslot(kvm, mop->gaddr)) {
                 r = PGM_ADDRESSING;
                 goto out_unlock;
         }
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c

index 621a17fd1a1bb52fd7875a134a1acac25f004209..1be19cc9d73c19cf35b3df5dd9f7b430cdf8c3b7 100644 (file)
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -149,7 +149,7 @@ static int handle_set_prefix(struct kvm_vcpu *vcpu)
          * first page, since address is 8k aligned and memory pieces are always
          * at least 1MB aligned and have at least a size of 1MB.
          */
-       if (kvm_is_error_gpa(vcpu->kvm, address))
+       if (!kvm_is_gpa_in_memslot(vcpu->kvm, address))
                 return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
  
         kvm_s390_set_prefix(vcpu, address);
@@ -464,7 +464,7 @@ static int handle_test_block(struct kvm_vcpu *vcpu)
                 return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
         addr = kvm_s390_real_to_abs(vcpu, addr);
  
-       if (kvm_is_error_gpa(vcpu->kvm, addr))
+       if (!kvm_is_gpa_in_memslot(vcpu->kvm, addr))
                 return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
         /*
          * We don't expect errors on modern systems, and do not care
@@ -676,8 +676,12 @@ static int handle_pqap(struct kvm_vcpu *vcpu)
         if (vcpu->kvm->arch.crypto.pqap_hook) {
                 pqap_hook = *vcpu->kvm->arch.crypto.pqap_hook;
                 ret = pqap_hook(vcpu);
-               if (!ret && vcpu->run->s.regs.gprs[1] & 0x00ff0000)
-                       kvm_s390_set_psw_cc(vcpu, 3);
+               if (!ret) {
+                       if (vcpu->run->s.regs.gprs[1] & 0x00ff0000)
+                               kvm_s390_set_psw_cc(vcpu, 3);
+                       else
+                               kvm_s390_set_psw_cc(vcpu, 0);
+               }
                 up_read(&vcpu->kvm->arch.crypto.pqap_hook_rwsem);
                 return ret;
         }
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c

index d9696b5300647c87332bb212cab2840071913e37..55c34cb354281e720b03436f2a273f92beaacc51 100644 (file)
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -172,7 +172,7 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu,
          * first page, since address is 8k aligned and memory pieces are always
          * at least 1MB aligned and have at least a size of 1MB.
          */
-       if (kvm_is_error_gpa(vcpu->kvm, irq.u.prefix.address)) {
+       if (!kvm_is_gpa_in_memslot(vcpu->kvm, irq.u.prefix.address)) {
                 *reg &= 0xffffffff00000000UL;
                 *reg |= SIGP_STATUS_INVALID_PARAMETER;
                 return SIGP_CC_STATUS_STORED;
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c

index fef42e2a80a2ae5bc47eae89f2b4e38293a2586b..3af3bd20ac7b8f075e08b85b34f7e257f4687eeb 100644 (file)
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -1235,7 +1235,6 @@ static int acquire_gmap_shadow(struct kvm_vcpu *vcpu,
         gmap = gmap_shadow(vcpu->arch.gmap, asce, edat);
         if (IS_ERR(gmap))
                 return PTR_ERR(gmap);
-       gmap->private = vcpu->kvm;
         vcpu->kvm->stat.gmap_shadow_create++;
         WRITE_ONCE(vsie_page->gmap, gmap);
         return 0;
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c

index 6f96b5a71c6383d07eb447cb80df70214bdd1910..8da39deb56ca4952a6f8e436d153ec6f54292932 100644 (file)
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -1691,6 +1691,7 @@ struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce,
                 return ERR_PTR(-ENOMEM);
         new->mm = parent->mm;
         new->parent = gmap_get(parent);
+       new->private = parent->private;
         new->orig_asce = asce;
         new->edat_level = edat_level;
         new->initialized = false;
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c

index 676ac74026a82b578f857e2426a501abdec014c7..52a44e353796c001a31e9a8242f39982203fb8be 100644 (file)
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -252,7 +252,7 @@ resource_size_t pcibios_align_resource(void *data, const struct resource *res,
  /* combine single writes by using store-block insn */
  void __iowrite64_copy(void __iomem *to, const void *from, size_t count)
  {
-       zpci_memcpy_toio(to, from, count);
+       zpci_memcpy_toio(to, from, count * 8);
  }
  
  void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size,
diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile

index 5f60359361312e4159b45d769c1afe81533ace1b..2a03daa68f2857df85df97b5b632d6154e76496f 100644 (file)
--- a/arch/sparc/Makefile
+++ b/arch/sparc/Makefile
@@ -60,7 +60,7 @@ libs-y                 += arch/sparc/prom/
  libs-y                 += arch/sparc/lib/
  
  drivers-$(CONFIG_PM) += arch/sparc/power/
-drivers-$(CONFIG_FB) += arch/sparc/video/
+drivers-$(CONFIG_FB_CORE) += arch/sparc/video/
  
  boot := arch/sparc/boot
  
diff --git a/arch/sparc/include/asm/jump_label.h b/arch/sparc/include/asm/jump_label.h

index 94eb529dcb77623caf637387e694d8e5ddc049a8..2718cbea826a7d13aefacd26fee3719b69856746 100644 (file)
--- a/arch/sparc/include/asm/jump_label.h
+++ b/arch/sparc/include/asm/jump_label.h
@@ -10,7 +10,7 @@
  
  static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
  {
-       asm_volatile_goto("1:\n\t"
+       asm goto("1:\n\t"
                  "nop\n\t"
                  "nop\n\t"
                  ".pushsection __jump_table,  \"aw\"\n\t"
@@ -26,7 +26,7 @@ l_yes:
  
  static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
  {
-       asm_volatile_goto("1:\n\t"
+       asm goto("1:\n\t"
                  "b %l[l_yes]\n\t"
                  "nop\n\t"
                  ".pushsection __jump_table,  \"aw\"\n\t"
diff --git a/arch/sparc/video/Makefile b/arch/sparc/video/Makefile

index 6baddbd58e4db3fa82c9ba76fd5e0d571a7c4f48..d4d83f1702c61f09e3dceac24c494ecd1632f3e5 100644 (file)
--- a/arch/sparc/video/Makefile
+++ b/arch/sparc/video/Makefile
@@ -1,3 +1,3 @@
  # SPDX-License-Identifier: GPL-2.0-only
  
-obj-$(CONFIG_FB) += fbdev.o
+obj-$(CONFIG_FB_CORE) += fbdev.o
diff --git a/arch/um/Makefile b/arch/um/Makefile

index 82f05f250634807c9f78774bca9213dfd5de2038..34957dcb88b9c31befa3c2b7e08809de73b23a3e 100644 (file)
--- a/arch/um/Makefile
+++ b/arch/um/Makefile
@@ -115,7 +115,9 @@ archprepare:
         $(Q)$(MAKE) $(build)=$(HOST_DIR)/um include/generated/user_constants.h
  
  LINK-$(CONFIG_LD_SCRIPT_STATIC) += -static
-LINK-$(CONFIG_LD_SCRIPT_DYN) += $(call cc-option, -no-pie)
+ifdef CONFIG_LD_SCRIPT_DYN
+LINK-$(call gcc-min-version, 60100)$(CONFIG_CC_IS_CLANG) += -no-pie
+endif
  LINK-$(CONFIG_LD_SCRIPT_DYN_RPATH) += -Wl,-rpath,/lib
  
  CFLAGS_NO_HARDENING := $(call cc-option, -fno-PIC,) $(call cc-option, -fno-pic,) \
diff --git a/arch/um/include/asm/cpufeature.h b/arch/um/include/asm/cpufeature.h

index 4b6d1b526bc1217e2e89d4670f9c4385e68dacc7..66fe06db872f05bb775f0089a4f134f77563efe4 100644 (file)
--- a/arch/um/include/asm/cpufeature.h
+++ b/arch/um/include/asm/cpufeature.h
@@ -75,7 +75,7 @@ extern void setup_clear_cpu_cap(unsigned int bit);
   */
  static __always_inline bool _static_cpu_has(u16 bit)
  {
-       asm_volatile_goto("1: jmp 6f\n"
+       asm goto("1: jmp 6f\n"
                  "2:\n"
                  ".skip -(((5f-4f) - (2b-1b)) > 0) * "
                          "((5f-4f) - (2b-1b)),0x90\n"
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig

index 5edec175b9bfc92dfac8832fc3600b843407828b..da140c3aed84cc7517491fae11d3fe146bba549a 100644 (file)
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -244,7 +244,6 @@ config X86
         select HAVE_FUNCTION_ERROR_INJECTION
         select HAVE_KRETPROBES
         select HAVE_RETHOOK
-       select HAVE_KVM
         select HAVE_LIVEPATCH                   if X86_64
         select HAVE_MIXED_BREAKPOINTS_REGS
         select HAVE_MOD_ARCH_SPECIFIC
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu

index b9224cf2ee4d6fcb234be76e072d37fa1cc7ad53..2a7279d80460a8adf0218a954646d9d8343ddf3e 100644 (file)
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -379,7 +379,7 @@ config X86_CMOV
  config X86_MINIMUM_CPU_FAMILY
         int
         default "64" if X86_64
-       default "6" if X86_32 && (MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MEFFICEON || MATOM || MCRUSOE || MCORE2 || MK7 || MK8)
+       default "6" if X86_32 && (MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MEFFICEON || MATOM || MCORE2 || MK7 || MK8)
         default "5" if X86_32 && X86_CMPXCHG64
         default "4"
  
diff --git a/arch/x86/Makefile b/arch/x86/Makefile

index 1a068de12a564fe452cd5c003feb907fd3de42fd..da8f3caf27815e39592443c7c8c09674fe9e2362 100644 (file)
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -112,13 +112,13 @@ ifeq ($(CONFIG_X86_32),y)
          # temporary until string.h is fixed
          KBUILD_CFLAGS += -ffreestanding
  
-       ifeq ($(CONFIG_STACKPROTECTOR),y)
-               ifeq ($(CONFIG_SMP),y)
+    ifeq ($(CONFIG_STACKPROTECTOR),y)
+        ifeq ($(CONFIG_SMP),y)
                         KBUILD_CFLAGS += -mstack-protector-guard-reg=fs -mstack-protector-guard-symbol=__stack_chk_guard
-               else
+        else
                         KBUILD_CFLAGS += -mstack-protector-guard=global
-               endif
-       endif
+        endif
+    endif
  else
          BITS := 64
          UTS_MACHINE := x86_64
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S

index b2771710ed989cc805e6310bd88a1743350a9dc5..a1bbedd989e42ed5f9e433f556613613094ae74d 100644 (file)
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -106,8 +106,7 @@ extra_header_fields:
         .word   0                               # MinorSubsystemVersion
         .long   0                               # Win32VersionValue
  
-       .long   setup_size + ZO__end + pecompat_vsize
-                                               # SizeOfImage
+       .long   setup_size + ZO__end            # SizeOfImage
  
         .long   salign                          # SizeOfHeaders
         .long   0                               # CheckSum
@@ -143,7 +142,7 @@ section_table:
         .ascii  ".setup"
         .byte   0
         .byte   0
-       .long   setup_size - salign             # VirtualSize
+       .long   pecompat_fstart - salign        # VirtualSize
         .long   salign                          # VirtualAddress
         .long   pecompat_fstart - salign        # SizeOfRawData
         .long   salign                          # PointerToRawData
@@ -156,8 +155,8 @@ section_table:
  #ifdef CONFIG_EFI_MIXED
         .asciz  ".compat"
  
-       .long   8                               # VirtualSize
-       .long   setup_size + ZO__end            # VirtualAddress
+       .long   pecompat_fsize                  # VirtualSize
+       .long   pecompat_fstart                 # VirtualAddress
         .long   pecompat_fsize                  # SizeOfRawData
         .long   pecompat_fstart                 # PointerToRawData
  
@@ -172,17 +171,16 @@ section_table:
          * modes this image supports.
          */
         .pushsection ".pecompat", "a", @progbits
-       .balign falign
-       .set    pecompat_vsize, salign
+       .balign salign
         .globl  pecompat_fstart
  pecompat_fstart:
         .byte   0x1                             # Version
         .byte   8                               # Size
         .word   IMAGE_FILE_MACHINE_I386         # PE machine type
         .long   setup_size + ZO_efi32_pe_entry  # Entrypoint
+       .byte   0x0                             # Sentinel
         .popsection
  #else
-       .set    pecompat_vsize, 0
         .set    pecompat_fstart, setup_size
  #endif
         .ascii  ".text"
diff --git a/arch/x86/boot/setup.ld b/arch/x86/boot/setup.ld

index 83bb7efad8ae7139ca66f850d7bb21b4859bd3e0..3a2d1360abb016902495f5879632335d883b8c03 100644 (file)
--- a/arch/x86/boot/setup.ld
+++ b/arch/x86/boot/setup.ld
@@ -24,6 +24,9 @@ SECTIONS
         .text           : { *(.text .text.*) }
         .text32         : { *(.text32) }
  
+       .pecompat       : { *(.pecompat) }
+       PROVIDE(pecompat_fsize = setup_size - pecompat_fstart);
+
         . = ALIGN(16);
         .rodata         : { *(.rodata*) }
  
@@ -36,9 +39,6 @@ SECTIONS
         . = ALIGN(16);
         .data           : { *(.data*) }
  
-       .pecompat       : { *(.pecompat) }
-       PROVIDE(pecompat_fsize = setup_size - pecompat_fstart);
-
         .signature      : {
                 setup_sig = .;
                 LONG(0x5a5aaa55)
diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S

index 8c8d38f0cb1df0ee959e09c9f912ec1ab2afce40..0033790499245e3df5f10496986badbe0150aac2 100644 (file)
--- a/arch/x86/entry/entry.S
+++ b/arch/x86/entry/entry.S
@@ -6,6 +6,9 @@
  #include <linux/export.h>
  #include <linux/linkage.h>
  #include <asm/msr-index.h>
+#include <asm/unwind_hints.h>
+#include <asm/segment.h>
+#include <asm/cache.h>
  
  .pushsection .noinstr.text, "ax"
  
@@ -20,3 +23,23 @@ SYM_FUNC_END(entry_ibpb)
  EXPORT_SYMBOL_GPL(entry_ibpb);
  
  .popsection
+
+/*
+ * Define the VERW operand that is disguised as entry code so that
+ * it can be referenced with KPTI enabled. This ensure VERW can be
+ * used late in exit-to-user path after page tables are switched.
+ */
+.pushsection .entry.text, "ax"
+
+.align L1_CACHE_BYTES, 0xcc
+SYM_CODE_START_NOALIGN(mds_verw_sel)
+       UNWIND_HINT_UNDEFINED
+       ANNOTATE_NOENDBR
+       .word __KERNEL_DS
+.align L1_CACHE_BYTES, 0xcc
+SYM_CODE_END(mds_verw_sel);
+/* For KVM */
+EXPORT_SYMBOL_GPL(mds_verw_sel);
+
+.popsection
+
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S

index c73047bf9f4bff9c4631c0eab383cedceda41918..fba427646805d55221664538be2285c3ae188ca1 100644 (file)
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -885,6 +885,7 @@ SYM_FUNC_START(entry_SYSENTER_32)
         BUG_IF_WRONG_CR3 no_user_check=1
         popfl
         popl    %eax
+       CLEAR_CPU_BUFFERS
  
         /*
          * Return back to the vDSO, which will pop ecx and edx.
@@ -954,6 +955,7 @@ restore_all_switch_stack:
  
         /* Restore user state */
         RESTORE_REGS pop=4                      # skip orig_eax/error_code
+       CLEAR_CPU_BUFFERS
  .Lirq_return:
         /*
          * ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization
@@ -1146,6 +1148,7 @@ SYM_CODE_START(asm_exc_nmi)
  
         /* Not on SYSENTER stack. */
         call    exc_nmi
+       CLEAR_CPU_BUFFERS
         jmp     .Lnmi_return
  
  .Lnmi_from_sysenter_stack:
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S

index c40f89ab1b4c70a18b632a50c1e659e3fd83cfa9..9bb4859776291593249b9998416505aeec505011 100644 (file)
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -161,6 +161,7 @@ syscall_return_via_sysret:
  SYM_INNER_LABEL(entry_SYSRETQ_unsafe_stack, SYM_L_GLOBAL)
         ANNOTATE_NOENDBR
         swapgs
+       CLEAR_CPU_BUFFERS
         sysretq
  SYM_INNER_LABEL(entry_SYSRETQ_end, SYM_L_GLOBAL)
         ANNOTATE_NOENDBR
@@ -573,6 +574,7 @@ SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL)
  
  .Lswapgs_and_iret:
         swapgs
+       CLEAR_CPU_BUFFERS
         /* Assert that the IRET frame indicates user mode. */
         testb   $3, 8(%rsp)
         jnz     .Lnative_iret
@@ -723,6 +725,8 @@ native_irq_return_ldt:
          */
         popq    %rax                            /* Restore user RAX */
  
+       CLEAR_CPU_BUFFERS
+
         /*
          * RSP now points to an ordinary IRET frame, except that the page
          * is read-only and RSP[31:16] are preloaded with the userspace
@@ -1449,6 +1453,12 @@ nmi_restore:
         std
         movq    $0, 5*8(%rsp)           /* clear "NMI executing" */
  
+       /*
+        * Skip CLEAR_CPU_BUFFERS here, since it only helps in rare cases like
+        * NMI in kernel after user state is restored. For an unprivileged user
+        * these conditions are hard to meet.
+        */
+
         /*
          * iretq reads the "iret" frame and exits the NMI stack in a
          * single instruction.  We are returning to kernel mode, so this
@@ -1466,6 +1476,7 @@ SYM_CODE_START(entry_SYSCALL32_ignore)
         UNWIND_HINT_END_OF_STACK
         ENDBR
         mov     $-ENOSYS, %eax
+       CLEAR_CPU_BUFFERS
         sysretl
  SYM_CODE_END(entry_SYSCALL32_ignore)
  
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S

index de94e2e84ecca927d9aa0e1ab99466466c163d44..eabf48c4d4b4c30367792f5d9a0b158a9ecf8a04 100644 (file)
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -270,6 +270,7 @@ SYM_INNER_LABEL(entry_SYSRETL_compat_unsafe_stack, SYM_L_GLOBAL)
         xorl    %r9d, %r9d
         xorl    %r10d, %r10d
         swapgs
+       CLEAR_CPU_BUFFERS
         sysretl
  SYM_INNER_LABEL(entry_SYSRETL_compat_end, SYM_L_GLOBAL)
         ANNOTATE_NOENDBR
diff --git a/arch/x86/include/asm/coco.h b/arch/x86/include/asm/coco.h

index 6ae2d16a7613b714cb58283dafa600db5829ba6f..76c310b19b11d898db11cf498d7c82449bbf7dc2 100644 (file)
--- a/arch/x86/include/asm/coco.h
+++ b/arch/x86/include/asm/coco.h
@@ -10,13 +10,14 @@ enum cc_vendor {
         CC_VENDOR_INTEL,
  };
  
-extern enum cc_vendor cc_vendor;
-
  #ifdef CONFIG_ARCH_HAS_CC_PLATFORM
+extern enum cc_vendor cc_vendor;
  void cc_set_mask(u64 mask);
  u64 cc_mkenc(u64 val);
  u64 cc_mkdec(u64 val);
  #else
+#define cc_vendor (CC_VENDOR_NONE)
+
  static inline u64 cc_mkenc(u64 val)
  {
         return val;
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h

index a26bebbdff87ed20c45bdb98dcc4a8873f5c30f5..a1273698fc430b41951c241b6b76dfa9b7887692 100644 (file)
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -168,7 +168,7 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
   */
  static __always_inline bool _static_cpu_has(u16 bit)
  {
-       asm_volatile_goto(
+       asm goto(
                 ALTERNATIVE_TERNARY("jmp 6f", %P[feature], "", "jmp %l[t_no]")
                 ".pushsection .altinstr_aux,\"ax\"\n"
                 "6:\n"
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h

index fdf723b6f6d0ce9f6742ef3c67adce3c8d57c002..2b62cdd8dd1227f2425e698525b97639a4124f75 100644 (file)
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -95,7 +95,7 @@
  #define X86_FEATURE_SYSENTER32         ( 3*32+15) /* "" sysenter in IA32 userspace */
  #define X86_FEATURE_REP_GOOD           ( 3*32+16) /* REP microcode works well */
  #define X86_FEATURE_AMD_LBR_V2         ( 3*32+17) /* AMD Last Branch Record Extension Version 2 */
-/* FREE, was #define X86_FEATURE_LFENCE_RDTSC          ( 3*32+18) "" LFENCE synchronizes RDTSC */
+#define X86_FEATURE_CLEAR_CPU_BUF      ( 3*32+18) /* "" Clear CPU buffers using VERW */
  #define X86_FEATURE_ACC_POWER          ( 3*32+19) /* AMD Accumulated Power Mechanism */
  #define X86_FEATURE_NOPL               ( 3*32+20) /* The NOPL (0F 1F) instructions */
  #define X86_FEATURE_ALWAYS             ( 3*32+21) /* "" Always-present feature */
diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h

index ce8f50192ae3e46da87fe3a24fc736b3b2fc3b21..7e523bb3d2d31a9a8ab9d32ca65a41b5b765c4c4 100644 (file)
--- a/arch/x86/include/asm/entry-common.h
+++ b/arch/x86/include/asm/entry-common.h
@@ -91,7 +91,6 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
  
  static __always_inline void arch_exit_to_user_mode(void)
  {
-       mds_user_clear_cpu_buffers();
         amd_clear_divider();
  }
  #define arch_exit_to_user_mode arch_exit_to_user_mode
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h

index 66837b8c67f1a9f794f9b65008bace6278f1e3d3..fbc7722b87d1fd40f244d697fde2692a464df69f 100644 (file)
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -15,7 +15,7 @@ typedef struct {
         unsigned int irq_spurious_count;
         unsigned int icr_read_retry_count;
  #endif
-#ifdef CONFIG_HAVE_KVM
+#if IS_ENABLED(CONFIG_KVM)
         unsigned int kvm_posted_intr_ipis;
         unsigned int kvm_posted_intr_wakeup_ipis;
         unsigned int kvm_posted_intr_nested_ipis;
diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h

index 13639e57e1f8af4c24c0c656a9f0801516bf25f4..d9c86733d0dbc497ab24d616e34415010dc43d2f 100644 (file)
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -675,7 +675,7 @@ DECLARE_IDTENTRY_SYSVEC(IRQ_WORK_VECTOR,            sysvec_irq_work);
  # endif
  #endif
  
-#ifdef CONFIG_HAVE_KVM
+#if IS_ENABLED(CONFIG_KVM)
  DECLARE_IDTENTRY_SYSVEC(POSTED_INTR_VECTOR,            sysvec_kvm_posted_intr_ipi);
  DECLARE_IDTENTRY_SYSVEC(POSTED_INTR_WAKEUP_VECTOR,     sysvec_kvm_posted_intr_wakeup_ipi);
  DECLARE_IDTENTRY_SYSVEC(POSTED_INTR_NESTED_VECTOR,     sysvec_kvm_posted_intr_nested_ipi);
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h

index 836c170d308755fe205b802e34b51c7ffd592017..194dfff84cb11e53b4bf65fb1ea26f9dbc216549 100644 (file)
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -29,7 +29,7 @@ struct irq_desc;
  
  extern void fixup_irqs(void);
  
-#ifdef CONFIG_HAVE_KVM
+#if IS_ENABLED(CONFIG_KVM)
  extern void kvm_set_posted_intr_wakeup_handler(void (*handler)(void));
  #endif
  
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h

index 3a19904c2db6935fda03c0a7c9eeaa47e62f823c..d18bfb238f660fcccdfdf444a60720f03163e4d5 100644 (file)
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -84,11 +84,9 @@
  #define HYPERVISOR_CALLBACK_VECTOR     0xf3
  
  /* Vector for KVM to deliver posted interrupt IPI */
-#ifdef CONFIG_HAVE_KVM
  #define POSTED_INTR_VECTOR             0xf2
  #define POSTED_INTR_WAKEUP_VECTOR      0xf1
  #define POSTED_INTR_NESTED_VECTOR      0xf0
-#endif
  
  #define MANAGED_IRQ_SHUTDOWN_VECTOR    0xef
  
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h

index 071572e23d3a06783e3a1f63e11bb47e99af9daa..cbbef32517f0049a3df51842162032ff1946e901 100644 (file)
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -24,7 +24,7 @@
  
  static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
  {
-       asm_volatile_goto("1:"
+       asm goto("1:"
                 "jmp %l[l_yes] # objtool NOPs this \n\t"
                 JUMP_TABLE_ENTRY
                 : :  "i" (key), "i" (2 | branch) : : l_yes);
@@ -38,7 +38,7 @@ l_yes:
  
  static __always_inline bool arch_static_branch(struct static_key * const key, const bool branch)
  {
-       asm_volatile_goto("1:"
+       asm goto("1:"
                 ".byte " __stringify(BYTES_NOP5) "\n\t"
                 JUMP_TABLE_ENTRY
                 : :  "i" (key), "i" (branch) : : l_yes);
@@ -52,7 +52,7 @@ l_yes:
  
  static __always_inline bool arch_static_branch_jump(struct static_key * const key, const bool branch)
  {
-       asm_volatile_goto("1:"
+       asm goto("1:"
                 "jmp %l[l_yes]\n\t"
                 JUMP_TABLE_ENTRY
                 : :  "i" (key), "i" (branch) : : l_yes);
diff --git a/arch/x86/include/asm/kmsan.h b/arch/x86/include/asm/kmsan.h

index 8fa6ac0e2d7665f936756748c0e1b4ab08a2c5a7..d91b37f5b4bb45106ee927fcd98b66f1b82a54c1 100644 (file)
--- a/arch/x86/include/asm/kmsan.h
+++ b/arch/x86/include/asm/kmsan.h
@@ -64,6 +64,7 @@ static inline bool kmsan_virt_addr_valid(void *addr)
  {
         unsigned long x = (unsigned long)addr;
         unsigned long y = x - __START_KERNEL_map;
+       bool ret;
  
         /* use the carry flag to determine if x was < __START_KERNEL_map */
         if (unlikely(x > y)) {
@@ -79,7 +80,21 @@ static inline bool kmsan_virt_addr_valid(void *addr)
                         return false;
         }
  
-       return pfn_valid(x >> PAGE_SHIFT);
+       /*
+        * pfn_valid() relies on RCU, and may call into the scheduler on exiting
+        * the critical section. However, this would result in recursion with
+        * KMSAN. Therefore, disable preemption here, and re-enable preemption
+        * below while suppressing reschedules to avoid recursion.
+        *
+        * Note, this sacrifices occasionally breaking scheduling guarantees.
+        * Although, a kernel compiled with KMSAN has already given up on any
+        * performance guarantees due to being heavily instrumented.
+        */
+       preempt_disable();
+       ret = pfn_valid(x >> PAGE_SHIFT);
+       preempt_enable_no_resched();
+
+       return ret;
  }
  
  #endif /* !MODULE */
diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h

index 378ed944b849fb0448a13bd7f12a7a542ab7e388..3942b74c1b753c9f251807df276f709926bd8860 100644 (file)
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -103,7 +103,6 @@ KVM_X86_OP(write_tsc_multiplier)
  KVM_X86_OP(get_exit_info)
  KVM_X86_OP(check_intercept)
  KVM_X86_OP(handle_exit_irqoff)
-KVM_X86_OP(request_immediate_exit)
  KVM_X86_OP(sched_in)
  KVM_X86_OP_OPTIONAL(update_cpu_dirty_logging)
  KVM_X86_OP_OPTIONAL(vcpu_blocking)
diff --git a/arch/x86/include/asm/kvm-x86-pmu-ops.h b/arch/x86/include/asm/kvm-x86-pmu-ops.h

index 058bc636356a1133ad151457d8bf0b56528e7f39..f852b13aeefea7a15f811c62a7035d32527b8740 100644 (file)
--- a/arch/x86/include/asm/kvm-x86-pmu-ops.h
+++ b/arch/x86/include/asm/kvm-x86-pmu-ops.h
@@ -12,11 +12,9 @@ BUILD_BUG_ON(1)
   * a NULL definition, for example if "static_call_cond()" will be used
   * at the call sites.
   */
-KVM_X86_PMU_OP(hw_event_available)
-KVM_X86_PMU_OP(pmc_idx_to_pmc)
  KVM_X86_PMU_OP(rdpmc_ecx_to_pmc)
  KVM_X86_PMU_OP(msr_idx_to_pmc)
-KVM_X86_PMU_OP(is_valid_rdpmc_ecx)
+KVM_X86_PMU_OP_OPTIONAL(check_rdpmc_early)
  KVM_X86_PMU_OP(is_valid_msr)
  KVM_X86_PMU_OP(get_msr)
  KVM_X86_PMU_OP(set_msr)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h

index b5b2d0fde5796894534f0cb98f96d3076b6bf27d..9e7b1a00e265986ab102c9a2a985e46c64248889 100644 (file)
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -536,6 +536,7 @@ struct kvm_pmc {
  #define KVM_PMC_MAX_FIXED      3
  #define MSR_ARCH_PERFMON_FIXED_CTR_MAX (MSR_ARCH_PERFMON_FIXED_CTR0 + KVM_PMC_MAX_FIXED - 1)
  #define KVM_AMD_PMC_MAX_GENERIC        6
+
  struct kvm_pmu {
         u8 version;
         unsigned nr_arch_gp_counters;
@@ -1145,6 +1146,8 @@ struct kvm_hv {
         unsigned int synic_auto_eoi_used;
  
         struct kvm_hv_syndbg hv_syndbg;
+
+       bool xsaves_xsavec_checked;
  };
  #endif
  
@@ -1466,6 +1469,15 @@ struct kvm_arch {
          */
         bool shadow_root_allocated;
  
+#ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING
+       /*
+        * If set, the VM has (or had) an external write tracking user, and
+        * thus all write tracking metadata has been allocated, even if KVM
+        * itself isn't using write tracking.
+        */
+       bool external_write_tracking_enabled;
+#endif
+
  #if IS_ENABLED(CONFIG_HYPERV)
         hpa_t   hv_root_tdp;
         spinlock_t hv_root_tdp_lock;
@@ -1663,7 +1675,8 @@ struct kvm_x86_ops {
         void (*flush_tlb_guest)(struct kvm_vcpu *vcpu);
  
         int (*vcpu_pre_run)(struct kvm_vcpu *vcpu);
-       enum exit_fastpath_completion (*vcpu_run)(struct kvm_vcpu *vcpu);
+       enum exit_fastpath_completion (*vcpu_run)(struct kvm_vcpu *vcpu,
+                                                 bool force_immediate_exit);
         int (*handle_exit)(struct kvm_vcpu *vcpu,
                 enum exit_fastpath_completion exit_fastpath);
         int (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
@@ -1731,8 +1744,6 @@ struct kvm_x86_ops {
                                struct x86_exception *exception);
         void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu);
  
-       void (*request_immediate_exit)(struct kvm_vcpu *vcpu);
-
         void (*sched_in)(struct kvm_vcpu *vcpu, int cpu);
  
         /*
@@ -1879,8 +1890,16 @@ static inline int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn,
  }
  #endif /* CONFIG_HYPERV */
  
+enum kvm_intr_type {
+       /* Values are arbitrary, but must be non-zero. */
+       KVM_HANDLING_IRQ = 1,
+       KVM_HANDLING_NMI,
+};
+
+/* Enable perf NMI and timer modes to work, and minimise false positives. */
  #define kvm_arch_pmi_in_guest(vcpu) \
-       ((vcpu) && (vcpu)->arch.handling_intr_from_guest)
+       ((vcpu) && (vcpu)->arch.handling_intr_from_guest && \
+        (!!in_nmi() == ((vcpu)->arch.handling_intr_from_guest == KVM_HANDLING_NMI)))
  
  void __init kvm_mmu_x86_module_init(void);
  int kvm_mmu_vendor_module_init(void);
@@ -2045,7 +2064,7 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3);
  int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
  int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8);
  int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val);
-void kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val);
+unsigned long kvm_get_dr(struct kvm_vcpu *vcpu, int dr);
  unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu);
  void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw);
  int kvm_emulate_xsetbv(struct kvm_vcpu *vcpu);
@@ -2238,7 +2257,6 @@ extern bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn);
  
  int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu);
  int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err);
-void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu);
  
  void __user *__x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa,
                                      u32 size);
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h

index 262e65539f83c86d140552305c8a9d330b313c20..2aa52cab1e463af6f4105e2f887acf185dec9f31 100644 (file)
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -315,6 +315,17 @@
  #endif
  .endm
  
+/*
+ * Macro to execute VERW instruction that mitigate transient data sampling
+ * attacks such as MDS. On affected systems a microcode update overloaded VERW
+ * instruction to also clear the CPU buffers. VERW clobbers CFLAGS.ZF.
+ *
+ * Note: Only the memory operand variant of VERW clears the CPU buffers.
+ */
+.macro CLEAR_CPU_BUFFERS
+       ALTERNATIVE "", __stringify(verw _ASM_RIP(mds_verw_sel)), X86_FEATURE_CLEAR_CPU_BUF
+.endm
+
  #else /* __ASSEMBLY__ */
  
  #define ANNOTATE_RETPOLINE_SAFE                                        \
@@ -529,13 +540,14 @@ DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp);
  DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb);
  DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
  
-DECLARE_STATIC_KEY_FALSE(mds_user_clear);
  DECLARE_STATIC_KEY_FALSE(mds_idle_clear);
  
  DECLARE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush);
  
  DECLARE_STATIC_KEY_FALSE(mmio_stale_data_clear);
  
+extern u16 mds_verw_sel;
+
  #include <asm/segment.h>
  
  /**
@@ -561,17 +573,6 @@ static __always_inline void mds_clear_cpu_buffers(void)
         asm volatile("verw %[ds]" : : [ds] "m" (ds) : "cc");
  }
  
-/**
- * mds_user_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability
- *
- * Clear CPU buffers if the corresponding static key is enabled
- */
-static __always_inline void mds_user_clear_cpu_buffers(void)
-{
-       if (static_branch_likely(&mds_user_clear))
-               mds_clear_cpu_buffers();
-}
-
  /**
   * mds_idle_clear_cpu_buffers - Mitigation for MDS vulnerability
   *
diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h

index 4b081e0d3306b79cca3dc222bb7406cde371d517..363266cbcadaf29e5bdeba4b0bfd5ab0ccb7355f 100644 (file)
--- a/arch/x86/include/asm/rmwcc.h
+++ b/arch/x86/include/asm/rmwcc.h
@@ -13,7 +13,7 @@
  #define __GEN_RMWcc(fullop, _var, cc, clobbers, ...)                   \
  ({                                                                     \
         bool c = false;                                                 \
-       asm_volatile_goto (fullop "; j" #cc " %l[cc_label]"             \
+       asm goto (fullop "; j" #cc " %l[cc_label]"              \
                         : : [var] "m" (_var), ## __VA_ARGS__            \
                         : clobbers : cc_label);                         \
         if (0) {                                                        \
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h

index d6cd9344f6c78e5555486e5d9f231fd27de9da6a..48f8dd47cf6882ac9e3920d6e7105c0eff430528 100644 (file)
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -205,7 +205,7 @@ static inline void clwb(volatile void *__p)
  #ifdef CONFIG_X86_USER_SHADOW_STACK
  static inline int write_user_shstk_64(u64 __user *addr, u64 val)
  {
-       asm_volatile_goto("1: wrussq %[val], (%[addr])\n"
+       asm goto("1: wrussq %[val], (%[addr])\n"
                           _ASM_EXTABLE(1b, %l[fail])
                           :: [addr] "r" (addr), [val] "r" (val)
                           :: fail);
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h

index 87a7b917d30ea9fe0998df0d11f84e9bdcc00702..728c98175b9cb8fd5a229dfdf0ac32f294a06b52 100644 (file)
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -358,10 +358,10 @@ struct sev_es_save_area {
         struct vmcb_seg ldtr;
         struct vmcb_seg idtr;
         struct vmcb_seg tr;
-       u64 vmpl0_ssp;
-       u64 vmpl1_ssp;
-       u64 vmpl2_ssp;
-       u64 vmpl3_ssp;
+       u64 pl0_ssp;
+       u64 pl1_ssp;
+       u64 pl2_ssp;
+       u64 pl3_ssp;
         u64 u_cet;
         u8 reserved_0xc8[2];
         u8 vmpl;
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h

index 5c367c1290c355fb3849800a38c88c3553175903..237dc8cdd12b9482f38f8543b85dbde88fb98d65 100644 (file)
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -133,7 +133,7 @@ extern int __get_user_bad(void);
  
  #ifdef CONFIG_X86_32
  #define __put_user_goto_u64(x, addr, label)                    \
-       asm_volatile_goto("\n"                                  \
+       asm goto("\n"                                   \
                      "1:        movl %%eax,0(%1)\n"             \
                      "2:        movl %%edx,4(%1)\n"             \
                      _ASM_EXTABLE_UA(1b, %l2)                   \
@@ -295,7 +295,7 @@ do {                                                                        \
  } while (0)
  
  #define __get_user_asm(x, addr, itype, ltype, label)                   \
-       asm_volatile_goto("\n"                                          \
+       asm_goto_output("\n"                                            \
                      "1:        mov"itype" %[umem],%[output]\n"         \
                      _ASM_EXTABLE_UA(1b, %l2)                           \
                      : [output] ltype(x)                                \
@@ -375,7 +375,7 @@ do {                                                                        \
         __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold);              \
         __typeof__(*(_ptr)) __old = *_old;                              \
         __typeof__(*(_ptr)) __new = (_new);                             \
-       asm_volatile_goto("\n"                                          \
+       asm_goto_output("\n"                                            \
                      "1: " LOCK_PREFIX "cmpxchg"itype" %[new], %[ptr]\n"\
                      _ASM_EXTABLE_UA(1b, %l[label])                     \
                      : CC_OUT(z) (success),                             \
@@ -394,7 +394,7 @@ do {                                                                        \
         __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold);              \
         __typeof__(*(_ptr)) __old = *_old;                              \
         __typeof__(*(_ptr)) __new = (_new);                             \
-       asm_volatile_goto("\n"                                          \
+       asm_goto_output("\n"                                            \
                      "1: " LOCK_PREFIX "cmpxchg8b %[ptr]\n"             \
                      _ASM_EXTABLE_UA(1b, %l[label])                     \
                      : CC_OUT(z) (success),                             \
@@ -477,7 +477,7 @@ struct __large_struct { unsigned long buf[100]; };
   * aliasing issues.
   */
  #define __put_user_goto(x, addr, itype, ltype, label)                  \
-       asm_volatile_goto("\n"                                          \
+       asm goto("\n"                                                   \
                 "1:     mov"itype" %0,%1\n"                             \
                 _ASM_EXTABLE_UA(1b, %l2)                                \
                 : : ltype(x), "m" (__m(addr))                           \
diff --git a/arch/x86/include/asm/vmxfeatures.h b/arch/x86/include/asm/vmxfeatures.h

index c6a7eed039145be3964db90a6cac559e45d87040..266daf5b5b842d0b9921d5950285c648077712fc 100644 (file)
--- a/arch/x86/include/asm/vmxfeatures.h
+++ b/arch/x86/include/asm/vmxfeatures.h
@@ -25,6 +25,7 @@
  #define VMX_FEATURE_EPT_EXECUTE_ONLY   ( 0*32+ 17) /* "ept_x_only" EPT entries can be execute only */
  #define VMX_FEATURE_EPT_AD             ( 0*32+ 18) /* EPT Accessed/Dirty bits */
  #define VMX_FEATURE_EPT_1GB            ( 0*32+ 19) /* 1GB EPT pages */
+#define VMX_FEATURE_EPT_5LEVEL         ( 0*32+ 20) /* 5-level EPT paging */
  
  /* Aggregated APIC features 24-27 */
  #define VMX_FEATURE_FLEXPRIORITY       ( 0*32+ 24) /* TPR shadow + virt APIC */
diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h

index ab60a71a8dcb98e62bccf3c045066df8f42f30f4..472f0263dbc6129c30636f149b94d5828ac300c6 100644 (file)
--- a/arch/x86/include/asm/vsyscall.h
+++ b/arch/x86/include/asm/vsyscall.h
@@ -4,6 +4,7 @@
  
  #include <linux/seqlock.h>
  #include <uapi/asm/vsyscall.h>
+#include <asm/page_types.h>
  
  #ifdef CONFIG_X86_VSYSCALL_EMULATION
  extern void map_vsyscall(void);
@@ -24,4 +25,13 @@ static inline bool emulate_vsyscall(unsigned long error_code,
  }
  #endif
  
+/*
+ * The (legacy) vsyscall page is the long page in the kernel portion
+ * of the address space that has user-accessible permissions.
+ */
+static inline bool is_vsyscall_vaddr(unsigned long vaddr)
+{
+       return unlikely((vaddr & PAGE_MASK) == VSYSCALL_ADDR);
+}
+
  #endif /* _ASM_X86_VSYSCALL_H */
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h

index a448d0964fc06ebd0c15cd0b550e3c2cefbf57bf..ad29984d5e398da425c0516f14b5cf538a023696 100644 (file)
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -7,6 +7,8 @@
   *
   */
  
+#include <linux/const.h>
+#include <linux/bits.h>
  #include <linux/types.h>
  #include <linux/ioctl.h>
  #include <linux/stddef.h>
@@ -40,7 +42,6 @@
  #define __KVM_HAVE_IRQ_LINE
  #define __KVM_HAVE_MSI
  #define __KVM_HAVE_USER_NMI
-#define __KVM_HAVE_GUEST_DEBUG
  #define __KVM_HAVE_MSIX
  #define __KVM_HAVE_MCE
  #define __KVM_HAVE_PIT_STATE2
@@ -49,7 +50,6 @@
  #define __KVM_HAVE_DEBUGREGS
  #define __KVM_HAVE_XSAVE
  #define __KVM_HAVE_XCRS
-#define __KVM_HAVE_READONLY_MEM
  
  /* Architectural interrupt line count. */
  #define KVM_NR_INTERRUPTS 256
@@ -526,9 +526,278 @@ struct kvm_pmu_event_filter {
  #define KVM_PMU_EVENT_ALLOW 0
  #define KVM_PMU_EVENT_DENY 1
  
-#define KVM_PMU_EVENT_FLAG_MASKED_EVENTS BIT(0)
+#define KVM_PMU_EVENT_FLAG_MASKED_EVENTS _BITUL(0)
  #define KVM_PMU_EVENT_FLAGS_VALID_MASK (KVM_PMU_EVENT_FLAG_MASKED_EVENTS)
  
+/* for KVM_CAP_MCE */
+struct kvm_x86_mce {
+       __u64 status;
+       __u64 addr;
+       __u64 misc;
+       __u64 mcg_status;
+       __u8 bank;
+       __u8 pad1[7];
+       __u64 pad2[3];
+};
+
+/* for KVM_CAP_XEN_HVM */
+#define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR       (1 << 0)
+#define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL     (1 << 1)
+#define KVM_XEN_HVM_CONFIG_SHARED_INFO         (1 << 2)
+#define KVM_XEN_HVM_CONFIG_RUNSTATE            (1 << 3)
+#define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL       (1 << 4)
+#define KVM_XEN_HVM_CONFIG_EVTCHN_SEND         (1 << 5)
+#define KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG        (1 << 6)
+#define KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE        (1 << 7)
+#define KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA     (1 << 8)
+
+struct kvm_xen_hvm_config {
+       __u32 flags;
+       __u32 msr;
+       __u64 blob_addr_32;
+       __u64 blob_addr_64;
+       __u8 blob_size_32;
+       __u8 blob_size_64;
+       __u8 pad2[30];
+};
+
+struct kvm_xen_hvm_attr {
+       __u16 type;
+       __u16 pad[3];
+       union {
+               __u8 long_mode;
+               __u8 vector;
+               __u8 runstate_update_flag;
+               union {
+                       __u64 gfn;
+#define KVM_XEN_INVALID_GFN ((__u64)-1)
+                       __u64 hva;
+               } shared_info;
+               struct {
+                       __u32 send_port;
+                       __u32 type; /* EVTCHNSTAT_ipi / EVTCHNSTAT_interdomain */
+                       __u32 flags;
+#define KVM_XEN_EVTCHN_DEASSIGN                (1 << 0)
+#define KVM_XEN_EVTCHN_UPDATE          (1 << 1)
+#define KVM_XEN_EVTCHN_RESET           (1 << 2)
+                       /*
+                        * Events sent by the guest are either looped back to
+                        * the guest itself (potentially on a different port#)
+                        * or signalled via an eventfd.
+                        */
+                       union {
+                               struct {
+                                       __u32 port;
+                                       __u32 vcpu;
+                                       __u32 priority;
+                               } port;
+                               struct {
+                                       __u32 port; /* Zero for eventfd */
+                                       __s32 fd;
+                               } eventfd;
+                               __u32 padding[4];
+                       } deliver;
+               } evtchn;
+               __u32 xen_version;
+               __u64 pad[8];
+       } u;
+};
+
+
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */
+#define KVM_XEN_ATTR_TYPE_LONG_MODE            0x0
+#define KVM_XEN_ATTR_TYPE_SHARED_INFO          0x1
+#define KVM_XEN_ATTR_TYPE_UPCALL_VECTOR                0x2
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */
+#define KVM_XEN_ATTR_TYPE_EVTCHN               0x3
+#define KVM_XEN_ATTR_TYPE_XEN_VERSION          0x4
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG */
+#define KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG 0x5
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA */
+#define KVM_XEN_ATTR_TYPE_SHARED_INFO_HVA      0x6
+
+struct kvm_xen_vcpu_attr {
+       __u16 type;
+       __u16 pad[3];
+       union {
+               __u64 gpa;
+#define KVM_XEN_INVALID_GPA ((__u64)-1)
+               __u64 hva;
+               __u64 pad[8];
+               struct {
+                       __u64 state;
+                       __u64 state_entry_time;
+                       __u64 time_running;
+                       __u64 time_runnable;
+                       __u64 time_blocked;
+                       __u64 time_offline;
+               } runstate;
+               __u32 vcpu_id;
+               struct {
+                       __u32 port;
+                       __u32 priority;
+                       __u64 expires_ns;
+               } timer;
+               __u8 vector;
+       } u;
+};
+
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */
+#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO       0x0
+#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO  0x1
+#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR   0x2
+#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT        0x3
+#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA   0x4
+#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST 0x5
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */
+#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID         0x6
+#define KVM_XEN_VCPU_ATTR_TYPE_TIMER           0x7
+#define KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR   0x8
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA */
+#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO_HVA   0x9
+
+/* Secure Encrypted Virtualization command */
+enum sev_cmd_id {
+       /* Guest initialization commands */
+       KVM_SEV_INIT = 0,
+       KVM_SEV_ES_INIT,
+       /* Guest launch commands */
+       KVM_SEV_LAUNCH_START,
+       KVM_SEV_LAUNCH_UPDATE_DATA,
+       KVM_SEV_LAUNCH_UPDATE_VMSA,
+       KVM_SEV_LAUNCH_SECRET,
+       KVM_SEV_LAUNCH_MEASURE,
+       KVM_SEV_LAUNCH_FINISH,
+       /* Guest migration commands (outgoing) */
+       KVM_SEV_SEND_START,
+       KVM_SEV_SEND_UPDATE_DATA,
+       KVM_SEV_SEND_UPDATE_VMSA,
+       KVM_SEV_SEND_FINISH,
+       /* Guest migration commands (incoming) */
+       KVM_SEV_RECEIVE_START,
+       KVM_SEV_RECEIVE_UPDATE_DATA,
+       KVM_SEV_RECEIVE_UPDATE_VMSA,
+       KVM_SEV_RECEIVE_FINISH,
+       /* Guest status and debug commands */
+       KVM_SEV_GUEST_STATUS,
+       KVM_SEV_DBG_DECRYPT,
+       KVM_SEV_DBG_ENCRYPT,
+       /* Guest certificates commands */
+       KVM_SEV_CERT_EXPORT,
+       /* Attestation report */
+       KVM_SEV_GET_ATTESTATION_REPORT,
+       /* Guest Migration Extension */
+       KVM_SEV_SEND_CANCEL,
+
+       KVM_SEV_NR_MAX,
+};
+
+struct kvm_sev_cmd {
+       __u32 id;
+       __u64 data;
+       __u32 error;
+       __u32 sev_fd;
+};
+
+struct kvm_sev_launch_start {
+       __u32 handle;
+       __u32 policy;
+       __u64 dh_uaddr;
+       __u32 dh_len;
+       __u64 session_uaddr;
+       __u32 session_len;
+};
+
+struct kvm_sev_launch_update_data {
+       __u64 uaddr;
+       __u32 len;
+};
+
+
+struct kvm_sev_launch_secret {
+       __u64 hdr_uaddr;
+       __u32 hdr_len;
+       __u64 guest_uaddr;
+       __u32 guest_len;
+       __u64 trans_uaddr;
+       __u32 trans_len;
+};
+
+struct kvm_sev_launch_measure {
+       __u64 uaddr;
+       __u32 len;
+};
+
+struct kvm_sev_guest_status {
+       __u32 handle;
+       __u32 policy;
+       __u32 state;
+};
+
+struct kvm_sev_dbg {
+       __u64 src_uaddr;
+       __u64 dst_uaddr;
+       __u32 len;
+};
+
+struct kvm_sev_attestation_report {
+       __u8 mnonce[16];
+       __u64 uaddr;
+       __u32 len;
+};
+
+struct kvm_sev_send_start {
+       __u32 policy;
+       __u64 pdh_cert_uaddr;
+       __u32 pdh_cert_len;
+       __u64 plat_certs_uaddr;
+       __u32 plat_certs_len;
+       __u64 amd_certs_uaddr;
+       __u32 amd_certs_len;
+       __u64 session_uaddr;
+       __u32 session_len;
+};
+
+struct kvm_sev_send_update_data {
+       __u64 hdr_uaddr;
+       __u32 hdr_len;
+       __u64 guest_uaddr;
+       __u32 guest_len;
+       __u64 trans_uaddr;
+       __u32 trans_len;
+};
+
+struct kvm_sev_receive_start {
+       __u32 handle;
+       __u32 policy;
+       __u64 pdh_uaddr;
+       __u32 pdh_len;
+       __u64 session_uaddr;
+       __u32 session_len;
+};
+
+struct kvm_sev_receive_update_data {
+       __u64 hdr_uaddr;
+       __u32 hdr_len;
+       __u64 guest_uaddr;
+       __u32 guest_len;
+       __u64 trans_uaddr;
+       __u32 trans_len;
+};
+
+#define KVM_X2APIC_API_USE_32BIT_IDS            (1ULL << 0)
+#define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK  (1ULL << 1)
+
+struct kvm_hyperv_eventfd {
+       __u32 conn_id;
+       __s32 fd;
+       __u32 flags;
+       __u32 padding[3];
+};
+
+#define KVM_HYPERV_CONN_ID_MASK                0x00ffffff
+#define KVM_HYPERV_EVENTFD_DEASSIGN    (1 << 0)
+
  /*
   * Masked event layout.
   * Bits   Description
@@ -549,10 +818,10 @@ struct kvm_pmu_event_filter {
         ((__u64)(!!(exclude)) << 55))
  
  #define KVM_PMU_MASKED_ENTRY_EVENT_SELECT \
-       (GENMASK_ULL(7, 0) | GENMASK_ULL(35, 32))
-#define KVM_PMU_MASKED_ENTRY_UMASK_MASK                (GENMASK_ULL(63, 56))
-#define KVM_PMU_MASKED_ENTRY_UMASK_MATCH       (GENMASK_ULL(15, 8))
-#define KVM_PMU_MASKED_ENTRY_EXCLUDE           (BIT_ULL(55))
+       (__GENMASK_ULL(7, 0) | __GENMASK_ULL(35, 32))
+#define KVM_PMU_MASKED_ENTRY_UMASK_MASK                (__GENMASK_ULL(63, 56))
+#define KVM_PMU_MASKED_ENTRY_UMASK_MATCH       (__GENMASK_ULL(15, 8))
+#define KVM_PMU_MASKED_ENTRY_EXCLUDE           (_BITULL(55))
  #define KVM_PMU_MASKED_ENTRY_UMASK_MASK_SHIFT  (56)
  
  /* for KVM_{GET,SET,HAS}_DEVICE_ATTR */
@@ -560,7 +829,7 @@ struct kvm_pmu_event_filter {
  #define   KVM_VCPU_TSC_OFFSET 0 /* attribute for the TSC offset */
  
  /* x86-specific KVM_EXIT_HYPERCALL flags. */
-#define KVM_EXIT_HYPERCALL_LONG_MODE   BIT(0)
+#define KVM_EXIT_HYPERCALL_LONG_MODE   _BITULL(0)
  
  #define KVM_X86_DEFAULT_VM     0
  #define KVM_X86_SW_PROTECTED_VM        1
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h

index 6e64b27b2c1ee0b7ac49bcb7c20c60caf7f3314c..6bc3456a8ebf1d1a7c83498cbbef2b5bae106b41 100644 (file)
--- a/arch/x86/include/uapi/asm/kvm_para.h
+++ b/arch/x86/include/uapi/asm/kvm_para.h
@@ -92,7 +92,7 @@ struct kvm_clock_pairing {
  #define KVM_ASYNC_PF_DELIVERY_AS_INT           (1 << 3)
  
  /* MSR_KVM_ASYNC_PF_INT */
-#define KVM_ASYNC_PF_VEC_MASK                  GENMASK(7, 0)
+#define KVM_ASYNC_PF_VEC_MASK                  __GENMASK(7, 0)
  
  /* MSR_KVM_MIGRATION_CONTROL */
  #define KVM_MIGRATION_READY            (1 << 0)
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c

index bb0ab8466b919809a861d7a2f979e132ad863289..48d049cd74e7123a178564ba6fc8ef1dc0212e2d 100644 (file)
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -111,9 +111,6 @@ DEFINE_STATIC_KEY_FALSE(switch_mm_cond_ibpb);
  /* Control unconditional IBPB in switch_mm() */
  DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
  
-/* Control MDS CPU buffer clear before returning to user space */
-DEFINE_STATIC_KEY_FALSE(mds_user_clear);
-EXPORT_SYMBOL_GPL(mds_user_clear);
  /* Control MDS CPU buffer clear before idling (halt, mwait) */
  DEFINE_STATIC_KEY_FALSE(mds_idle_clear);
  EXPORT_SYMBOL_GPL(mds_idle_clear);
@@ -252,7 +249,7 @@ static void __init mds_select_mitigation(void)
                 if (!boot_cpu_has(X86_FEATURE_MD_CLEAR))
                         mds_mitigation = MDS_MITIGATION_VMWERV;
  
-               static_branch_enable(&mds_user_clear);
+               setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF);
  
                 if (!boot_cpu_has(X86_BUG_MSBDS_ONLY) &&
                     (mds_nosmt || cpu_mitigations_auto_nosmt()))
@@ -356,7 +353,7 @@ static void __init taa_select_mitigation(void)
          * For guests that can't determine whether the correct microcode is
          * present on host, enable the mitigation for UCODE_NEEDED as well.
          */
-       static_branch_enable(&mds_user_clear);
+       setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF);
  
         if (taa_nosmt || cpu_mitigations_auto_nosmt())
                 cpu_smt_disable(false);
@@ -424,7 +421,7 @@ static void __init mmio_select_mitigation(void)
          */
         if (boot_cpu_has_bug(X86_BUG_MDS) || (boot_cpu_has_bug(X86_BUG_TAA) &&
                                               boot_cpu_has(X86_FEATURE_RTM)))
-               static_branch_enable(&mds_user_clear);
+               setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF);
         else
                 static_branch_enable(&mmio_stale_data_clear);
  
@@ -484,12 +481,12 @@ static void __init md_clear_update_mitigation(void)
         if (cpu_mitigations_off())
                 return;
  
-       if (!static_key_enabled(&mds_user_clear))
+       if (!boot_cpu_has(X86_FEATURE_CLEAR_CPU_BUF))
                 goto out;
  
         /*
-        * mds_user_clear is now enabled. Update MDS, TAA and MMIO Stale Data
-        * mitigation, if necessary.
+        * X86_FEATURE_CLEAR_CPU_BUF is now enabled. Update MDS, TAA and MMIO
+        * Stale Data mitigation, if necessary.
          */
         if (mds_mitigation == MDS_MITIGATION_OFF &&
             boot_cpu_has_bug(X86_BUG_MDS)) {
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c

index 0b97bcde70c6102a4b82b561c3256ec53b614770..fbc4e60d027cbff23b91e0d8cf2720cabb64803c 100644 (file)
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1589,6 +1589,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
                 get_cpu_vendor(c);
                 get_cpu_cap(c);
                 setup_force_cpu_cap(X86_FEATURE_CPUID);
+               get_cpu_address_sizes(c);
                 cpu_parse_early_param();
  
                 if (this_cpu->c_early_init)
@@ -1601,10 +1602,9 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
                         this_cpu->c_bsp_init(c);
         } else {
                 setup_clear_cpu_cap(X86_FEATURE_CPUID);
+               get_cpu_address_sizes(c);
         }
  
-       get_cpu_address_sizes(c);
-
         setup_force_cpu_cap(X86_FEATURE_ALWAYS);
  
         cpu_set_bug_bits(c);
diff --git a/arch/x86/kernel/cpu/feat_ctl.c b/arch/x86/kernel/cpu/feat_ctl.c

index 03851240c3e36d4ed5e9ad250eee76410830d6e9..1640ae76548fc71247970398da2bbd35b4c0a5f6 100644 (file)
--- a/arch/x86/kernel/cpu/feat_ctl.c
+++ b/arch/x86/kernel/cpu/feat_ctl.c
@@ -72,6 +72,8 @@ static void init_vmx_capabilities(struct cpuinfo_x86 *c)
                 c->vmx_capability[MISC_FEATURES] |= VMX_F(EPT_AD);
         if (ept & VMX_EPT_1GB_PAGE_BIT)
                 c->vmx_capability[MISC_FEATURES] |= VMX_F(EPT_1GB);
+       if (ept & VMX_EPT_PAGE_WALK_5_BIT)
+               c->vmx_capability[MISC_FEATURES] |= VMX_F(EPT_5LEVEL);
  
         /* Synthetic APIC features that are aggregates of multiple features. */
         if ((c->vmx_capability[PRIMARY_CTLS] & VMX_F(VIRTUAL_TPR)) &&
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c

index a927a8fc962448035f041c8b17f45ffb6bb9e079..40dec9b56f87db8348c1a242330f243c22c5199d 100644 (file)
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -184,6 +184,90 @@ static bool bad_spectre_microcode(struct cpuinfo_x86 *c)
         return false;
  }
  
+#define MSR_IA32_TME_ACTIVATE          0x982
+
+/* Helpers to access TME_ACTIVATE MSR */
+#define TME_ACTIVATE_LOCKED(x)         (x & 0x1)
+#define TME_ACTIVATE_ENABLED(x)                (x & 0x2)
+
+#define TME_ACTIVATE_POLICY(x)         ((x >> 4) & 0xf)        /* Bits 7:4 */
+#define TME_ACTIVATE_POLICY_AES_XTS_128        0
+
+#define TME_ACTIVATE_KEYID_BITS(x)     ((x >> 32) & 0xf)       /* Bits 35:32 */
+
+#define TME_ACTIVATE_CRYPTO_ALGS(x)    ((x >> 48) & 0xffff)    /* Bits 63:48 */
+#define TME_ACTIVATE_CRYPTO_AES_XTS_128        1
+
+/* Values for mktme_status (SW only construct) */
+#define MKTME_ENABLED                  0
+#define MKTME_DISABLED                 1
+#define MKTME_UNINITIALIZED            2
+static int mktme_status = MKTME_UNINITIALIZED;
+
+static void detect_tme_early(struct cpuinfo_x86 *c)
+{
+       u64 tme_activate, tme_policy, tme_crypto_algs;
+       int keyid_bits = 0, nr_keyids = 0;
+       static u64 tme_activate_cpu0 = 0;
+
+       rdmsrl(MSR_IA32_TME_ACTIVATE, tme_activate);
+
+       if (mktme_status != MKTME_UNINITIALIZED) {
+               if (tme_activate != tme_activate_cpu0) {
+                       /* Broken BIOS? */
+                       pr_err_once("x86/tme: configuration is inconsistent between CPUs\n");
+                       pr_err_once("x86/tme: MKTME is not usable\n");
+                       mktme_status = MKTME_DISABLED;
+
+                       /* Proceed. We may need to exclude bits from x86_phys_bits. */
+               }
+       } else {
+               tme_activate_cpu0 = tme_activate;
+       }
+
+       if (!TME_ACTIVATE_LOCKED(tme_activate) || !TME_ACTIVATE_ENABLED(tme_activate)) {
+               pr_info_once("x86/tme: not enabled by BIOS\n");
+               mktme_status = MKTME_DISABLED;
+               return;
+       }
+
+       if (mktme_status != MKTME_UNINITIALIZED)
+               goto detect_keyid_bits;
+
+       pr_info("x86/tme: enabled by BIOS\n");
+
+       tme_policy = TME_ACTIVATE_POLICY(tme_activate);
+       if (tme_policy != TME_ACTIVATE_POLICY_AES_XTS_128)
+               pr_warn("x86/tme: Unknown policy is active: %#llx\n", tme_policy);
+
+       tme_crypto_algs = TME_ACTIVATE_CRYPTO_ALGS(tme_activate);
+       if (!(tme_crypto_algs & TME_ACTIVATE_CRYPTO_AES_XTS_128)) {
+               pr_err("x86/mktme: No known encryption algorithm is supported: %#llx\n",
+                               tme_crypto_algs);
+               mktme_status = MKTME_DISABLED;
+       }
+detect_keyid_bits:
+       keyid_bits = TME_ACTIVATE_KEYID_BITS(tme_activate);
+       nr_keyids = (1UL << keyid_bits) - 1;
+       if (nr_keyids) {
+               pr_info_once("x86/mktme: enabled by BIOS\n");
+               pr_info_once("x86/mktme: %d KeyIDs available\n", nr_keyids);
+       } else {
+               pr_info_once("x86/mktme: disabled by BIOS\n");
+       }
+
+       if (mktme_status == MKTME_UNINITIALIZED) {
+               /* MKTME is usable */
+               mktme_status = MKTME_ENABLED;
+       }
+
+       /*
+        * KeyID bits effectively lower the number of physical address
+        * bits.  Update cpuinfo_x86::x86_phys_bits accordingly.
+        */
+       c->x86_phys_bits -= keyid_bits;
+}
+
  static void early_init_intel(struct cpuinfo_x86 *c)
  {
         u64 misc_enable;
@@ -322,6 +406,13 @@ static void early_init_intel(struct cpuinfo_x86 *c)
          */
         if (detect_extended_topology_early(c) < 0)
                 detect_ht_early(c);
+
+       /*
+        * Adjust the number of physical bits early because it affects the
+        * valid bits of the MTRR mask registers.
+        */
+       if (cpu_has(c, X86_FEATURE_TME))
+               detect_tme_early(c);
  }
  
  static void bsp_init_intel(struct cpuinfo_x86 *c)
@@ -482,90 +573,6 @@ static void srat_detect_node(struct cpuinfo_x86 *c)
  #endif
  }
  
-#define MSR_IA32_TME_ACTIVATE          0x982
-
-/* Helpers to access TME_ACTIVATE MSR */
-#define TME_ACTIVATE_LOCKED(x)         (x & 0x1)
-#define TME_ACTIVATE_ENABLED(x)                (x & 0x2)
-
-#define TME_ACTIVATE_POLICY(x)         ((x >> 4) & 0xf)        /* Bits 7:4 */
-#define TME_ACTIVATE_POLICY_AES_XTS_128        0
-
-#define TME_ACTIVATE_KEYID_BITS(x)     ((x >> 32) & 0xf)       /* Bits 35:32 */
-
-#define TME_ACTIVATE_CRYPTO_ALGS(x)    ((x >> 48) & 0xffff)    /* Bits 63:48 */
-#define TME_ACTIVATE_CRYPTO_AES_XTS_128        1
-
-/* Values for mktme_status (SW only construct) */
-#define MKTME_ENABLED                  0
-#define MKTME_DISABLED                 1
-#define MKTME_UNINITIALIZED            2
-static int mktme_status = MKTME_UNINITIALIZED;
-
-static void detect_tme(struct cpuinfo_x86 *c)
-{
-       u64 tme_activate, tme_policy, tme_crypto_algs;
-       int keyid_bits = 0, nr_keyids = 0;
-       static u64 tme_activate_cpu0 = 0;
-
-       rdmsrl(MSR_IA32_TME_ACTIVATE, tme_activate);
-
-       if (mktme_status != MKTME_UNINITIALIZED) {
-               if (tme_activate != tme_activate_cpu0) {
-                       /* Broken BIOS? */
-                       pr_err_once("x86/tme: configuration is inconsistent between CPUs\n");
-                       pr_err_once("x86/tme: MKTME is not usable\n");
-                       mktme_status = MKTME_DISABLED;
-
-                       /* Proceed. We may need to exclude bits from x86_phys_bits. */
-               }
-       } else {
-               tme_activate_cpu0 = tme_activate;
-       }
-
-       if (!TME_ACTIVATE_LOCKED(tme_activate) || !TME_ACTIVATE_ENABLED(tme_activate)) {
-               pr_info_once("x86/tme: not enabled by BIOS\n");
-               mktme_status = MKTME_DISABLED;
-               return;
-       }
-
-       if (mktme_status != MKTME_UNINITIALIZED)
-               goto detect_keyid_bits;
-
-       pr_info("x86/tme: enabled by BIOS\n");
-
-       tme_policy = TME_ACTIVATE_POLICY(tme_activate);
-       if (tme_policy != TME_ACTIVATE_POLICY_AES_XTS_128)
-               pr_warn("x86/tme: Unknown policy is active: %#llx\n", tme_policy);
-
-       tme_crypto_algs = TME_ACTIVATE_CRYPTO_ALGS(tme_activate);
-       if (!(tme_crypto_algs & TME_ACTIVATE_CRYPTO_AES_XTS_128)) {
-               pr_err("x86/mktme: No known encryption algorithm is supported: %#llx\n",
-                               tme_crypto_algs);
-               mktme_status = MKTME_DISABLED;
-       }
-detect_keyid_bits:
-       keyid_bits = TME_ACTIVATE_KEYID_BITS(tme_activate);
-       nr_keyids = (1UL << keyid_bits) - 1;
-       if (nr_keyids) {
-               pr_info_once("x86/mktme: enabled by BIOS\n");
-               pr_info_once("x86/mktme: %d KeyIDs available\n", nr_keyids);
-       } else {
-               pr_info_once("x86/mktme: disabled by BIOS\n");
-       }
-
-       if (mktme_status == MKTME_UNINITIALIZED) {
-               /* MKTME is usable */
-               mktme_status = MKTME_ENABLED;
-       }
-
-       /*
-        * KeyID bits effectively lower the number of physical address
-        * bits.  Update cpuinfo_x86::x86_phys_bits accordingly.
-        */
-       c->x86_phys_bits -= keyid_bits;
-}
-
  static void init_cpuid_fault(struct cpuinfo_x86 *c)
  {
         u64 msr;
@@ -702,9 +709,6 @@ static void init_intel(struct cpuinfo_x86 *c)
  
         init_ia32_feat_ctl(c);
  
-       if (cpu_has(c, X86_FEATURE_TME))
-               detect_tme(c);
-
         init_intel_misc_features(c);
  
         split_lock_init();
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c

index fb8cf953380dab44a5426f78733a25452ade3b87..b66f540de054a72403dbe3b4a837d6b1e280610d 100644 (file)
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -1017,10 +1017,12 @@ void __init e820__reserve_setup_data(void)
                 e820__range_update(pa_data, sizeof(*data)+data->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
  
                 /*
-                * SETUP_EFI and SETUP_IMA are supplied by kexec and do not need
-                * to be reserved.
+                * SETUP_EFI, SETUP_IMA and SETUP_RNG_SEED are supplied by
+                * kexec and do not need to be reserved.
                  */
-               if (data->type != SETUP_EFI && data->type != SETUP_IMA)
+               if (data->type != SETUP_EFI &&
+                   data->type != SETUP_IMA &&
+                   data->type != SETUP_RNG_SEED)
                         e820__range_update_kexec(pa_data,
                                                  sizeof(*data) + data->len,
                                                  E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c

index 558076dbde5bfca582139f8de63bd9ffa1050d6f..247f2225aa9f36f0a0fef0a22ed921b4748a7de5 100644 (file)
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -274,12 +274,13 @@ static int __restore_fpregs_from_user(void __user *buf, u64 ufeatures,
   * Attempt to restore the FPU registers directly from user memory.
   * Pagefaults are handled and any errors returned are fatal.
   */
-static bool restore_fpregs_from_user(void __user *buf, u64 xrestore,
-                                    bool fx_only, unsigned int size)
+static bool restore_fpregs_from_user(void __user *buf, u64 xrestore, bool fx_only)
  {
         struct fpu *fpu = &current->thread.fpu;
         int ret;
  
+       /* Restore enabled features only. */
+       xrestore &= fpu->fpstate->user_xfeatures;
  retry:
         fpregs_lock();
         /* Ensure that XFD is up to date */
@@ -309,7 +310,7 @@ retry:
                 if (ret != X86_TRAP_PF)
                         return false;
  
-               if (!fault_in_readable(buf, size))
+               if (!fault_in_readable(buf, fpu->fpstate->user_size))
                         goto retry;
                 return false;
         }
@@ -339,7 +340,6 @@ static bool __fpu_restore_sig(void __user *buf, void __user *buf_fx,
         struct user_i387_ia32_struct env;
         bool success, fx_only = false;
         union fpregs_state *fpregs;
-       unsigned int state_size;
         u64 user_xfeatures = 0;
  
         if (use_xsave()) {
@@ -349,17 +349,14 @@ static bool __fpu_restore_sig(void __user *buf, void __user *buf_fx,
                         return false;
  
                 fx_only = !fx_sw_user.magic1;
-               state_size = fx_sw_user.xstate_size;
                 user_xfeatures = fx_sw_user.xfeatures;
         } else {
                 user_xfeatures = XFEATURE_MASK_FPSSE;
-               state_size = fpu->fpstate->user_size;
         }
  
         if (likely(!ia32_fxstate)) {
                 /* Restore the FPU registers directly from user memory. */
-               return restore_fpregs_from_user(buf_fx, user_xfeatures, fx_only,
-                                               state_size);
+               return restore_fpregs_from_user(buf_fx, user_xfeatures, fx_only);
         }
  
         /*
diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c

index 660b601f1d6c33e9ad62ec2d12d860e92d4ea420..d2bc67cbaf9206a1decfc966cb90c15ae217f3e3 100644 (file)
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -153,7 +153,7 @@ static const __initconst struct idt_data apic_idts[] = {
  #ifdef CONFIG_X86_LOCAL_APIC
         INTG(LOCAL_TIMER_VECTOR,                asm_sysvec_apic_timer_interrupt),
         INTG(X86_PLATFORM_IPI_VECTOR,           asm_sysvec_x86_platform_ipi),
-# ifdef CONFIG_HAVE_KVM
+# if IS_ENABLED(CONFIG_KVM)
         INTG(POSTED_INTR_VECTOR,                asm_sysvec_kvm_posted_intr_ipi),
         INTG(POSTED_INTR_WAKEUP_VECTOR,         asm_sysvec_kvm_posted_intr_wakeup_ipi),
         INTG(POSTED_INTR_NESTED_VECTOR,         asm_sysvec_kvm_posted_intr_nested_ipi),
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c

index 11761c12454533c10159a6456c5a57aac3004777..35fde0107901d61f58dd08af4988f7ce7b2530b3 100644 (file)
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -164,7 +164,7 @@ int arch_show_interrupts(struct seq_file *p, int prec)
  #if defined(CONFIG_X86_IO_APIC)
         seq_printf(p, "%*s: %10u\n", prec, "MIS", atomic_read(&irq_mis_count));
  #endif
-#ifdef CONFIG_HAVE_KVM
+#if IS_ENABLED(CONFIG_KVM)
         seq_printf(p, "%*s: ", prec, "PIN");
         for_each_online_cpu(j)
                 seq_printf(p, "%10u ", irq_stats(j)->kvm_posted_intr_ipis);
@@ -290,7 +290,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_x86_platform_ipi)
  }
  #endif
  
-#ifdef CONFIG_HAVE_KVM
+#if IS_ENABLED(CONFIG_KVM)
  static void dummy_handler(void) {}
  static void (*kvm_posted_intr_wakeup_handler)(void) = dummy_handler;
  
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c

index dfe9945b9becee7f6d0ca89d00fd3c1eb4e496c5..428ee74002e1eac63d0e269510f536ba2644c3f7 100644 (file)
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -434,7 +434,8 @@ static void __init sev_map_percpu_data(void)
  {
         int cpu;
  
-       if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
+       if (cc_vendor != CC_VENDOR_AMD ||
+           !cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
                 return;
  
         for_each_possible_cpu(cpu) {
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c

index 17e955ab69feda933cca3708822f6f9f598e31bf..3082cf24b69e34a3a0ca09a50a72ee1aaec8ebc8 100644 (file)
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -563,9 +563,6 @@ nmi_restart:
         }
         if (this_cpu_dec_return(nmi_state))
                 goto nmi_restart;
-
-       if (user_mode(regs))
-               mds_user_clear_cpu_buffers();
  }
  
  #if IS_ENABLED(CONFIG_KVM_INTEL)
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig

index 87e3da7b0439790dac6b35aa4f95e8e7573284d7..8c3032a96caf167b30c1fc76b91039c809bc44e4 100644 (file)
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -7,7 +7,6 @@ source "virt/kvm/Kconfig"
  
  menuconfig VIRTUALIZATION
         bool "Virtualization"
-       depends on HAVE_KVM || X86
         default y
         help
           Say Y here to get to see options for using your Linux host to run other
@@ -20,7 +19,6 @@ if VIRTUALIZATION
  
  config KVM
         tristate "Kernel-based Virtual Machine (KVM) support"
-       depends on HAVE_KVM
         depends on HIGH_RES_TIMERS
         depends on X86_LOCAL_APIC
         select KVM_COMMON
@@ -29,9 +27,9 @@ config KVM
         select HAVE_KVM_PFNCACHE
         select HAVE_KVM_DIRTY_RING_TSO
         select HAVE_KVM_DIRTY_RING_ACQ_REL
-       select IRQ_BYPASS_MANAGER
         select HAVE_KVM_IRQ_BYPASS
         select HAVE_KVM_IRQ_ROUTING
+       select HAVE_KVM_READONLY_MEM
         select KVM_ASYNC_PF
         select USER_RETURN_NOTIFIER
         select KVM_MMIO
@@ -80,9 +78,10 @@ config KVM_SW_PROTECTED_VM
         depends on KVM && X86_64
         select KVM_GENERIC_PRIVATE_MEM
         help
-         Enable support for KVM software-protected VMs.  Currently "protected"
-         means the VM can be backed with memory provided by
-         KVM_CREATE_GUEST_MEMFD.
+         Enable support for KVM software-protected VMs.  Currently, software-
+         protected VMs are purely a development and testing vehicle for
+         KVM_CREATE_GUEST_MEMFD.  Attempting to run a "real" VM workload as a
+         software-protected VM will fail miserably.
  
           If unsure, say "N".
  
diff --git a/arch/x86/kvm/debugfs.c b/arch/x86/kvm/debugfs.c

index 95ea1a1f7403ea8cd3da1051ac1632d046d8081b..999227fc7c6659158051525bcf819ae2f9edc5f0 100644 (file)
--- a/arch/x86/kvm/debugfs.c
+++ b/arch/x86/kvm/debugfs.c
@@ -189,9 +189,8 @@ static const struct file_operations mmu_rmaps_stat_fops = {
         .release        = kvm_mmu_rmaps_stat_release,
  };
  
-int kvm_arch_create_vm_debugfs(struct kvm *kvm)
+void kvm_arch_create_vm_debugfs(struct kvm *kvm)
  {
         debugfs_create_file("mmu_rmaps_stat", 0644, kvm->debugfs_dentry, kvm,
                             &mmu_rmaps_stat_fops);
-       return 0;
  }
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c

index e223043ef5b26f23be5b2f0606641f66c5cd18aa..5d4c86133453d88dbb4f1b5f34d04bde7ec45a55 100644 (file)
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -1820,22 +1820,22 @@ static int writeback(struct x86_emulate_ctxt *ctxt, struct operand *op)
         return X86EMUL_CONTINUE;
  }
  
-static int push(struct x86_emulate_ctxt *ctxt, void *data, int bytes)
+static int emulate_push(struct x86_emulate_ctxt *ctxt, const void *data, int len)
  {
         struct segmented_address addr;
  
-       rsp_increment(ctxt, -bytes);
+       rsp_increment(ctxt, -len);
         addr.ea = reg_read(ctxt, VCPU_REGS_RSP) & stack_mask(ctxt);
         addr.seg = VCPU_SREG_SS;
  
-       return segmented_write(ctxt, addr, data, bytes);
+       return segmented_write(ctxt, addr, data, len);
  }
  
  static int em_push(struct x86_emulate_ctxt *ctxt)
  {
         /* Disable writeback. */
         ctxt->dst.type = OP_NONE;
-       return push(ctxt, &ctxt->src.val, ctxt->op_bytes);
+       return emulate_push(ctxt, &ctxt->src.val, ctxt->op_bytes);
  }
  
  static int emulate_pop(struct x86_emulate_ctxt *ctxt,
@@ -1863,7 +1863,8 @@ static int emulate_popf(struct x86_emulate_ctxt *ctxt,
                         void *dest, int len)
  {
         int rc;
-       unsigned long val, change_mask;
+       unsigned long val = 0;
+       unsigned long change_mask;
         int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> X86_EFLAGS_IOPL_BIT;
         int cpl = ctxt->ops->cpl(ctxt);
  
@@ -1920,7 +1921,7 @@ static int em_enter(struct x86_emulate_ctxt *ctxt)
                 return X86EMUL_UNHANDLEABLE;
  
         rbp = reg_read(ctxt, VCPU_REGS_RBP);
-       rc = push(ctxt, &rbp, stack_size(ctxt));
+       rc = emulate_push(ctxt, &rbp, stack_size(ctxt));
         if (rc != X86EMUL_CONTINUE)
                 return rc;
         assign_masked(reg_rmw(ctxt, VCPU_REGS_RBP), reg_read(ctxt, VCPU_REGS_RSP),
@@ -1954,7 +1955,7 @@ static int em_push_sreg(struct x86_emulate_ctxt *ctxt)
  static int em_pop_sreg(struct x86_emulate_ctxt *ctxt)
  {
         int seg = ctxt->src2.val;
-       unsigned long selector;
+       unsigned long selector = 0;
         int rc;
  
         rc = emulate_pop(ctxt, &selector, 2);
@@ -2000,7 +2001,7 @@ static int em_popa(struct x86_emulate_ctxt *ctxt)
  {
         int rc = X86EMUL_CONTINUE;
         int reg = VCPU_REGS_RDI;
-       u32 val;
+       u32 val = 0;
  
         while (reg >= VCPU_REGS_RAX) {
                 if (reg == VCPU_REGS_RSP) {
@@ -2229,7 +2230,7 @@ static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt)
  static int em_ret(struct x86_emulate_ctxt *ctxt)
  {
         int rc;
-       unsigned long eip;
+       unsigned long eip = 0;
  
         rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
         if (rc != X86EMUL_CONTINUE)
@@ -2241,7 +2242,8 @@ static int em_ret(struct x86_emulate_ctxt *ctxt)
  static int em_ret_far(struct x86_emulate_ctxt *ctxt)
  {
         int rc;
-       unsigned long eip, cs;
+       unsigned long eip = 0;
+       unsigned long cs = 0;
         int cpl = ctxt->ops->cpl(ctxt);
         struct desc_struct new_desc;
  
@@ -3011,7 +3013,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
                 ret = em_push(ctxt);
         }
  
-       ops->get_dr(ctxt, 7, &dr7);
+       dr7 = ops->get_dr(ctxt, 7);
         ops->set_dr(ctxt, 7, dr7 & ~(DR_LOCAL_ENABLE_MASK | DR_LOCAL_SLOWDOWN));
  
         return ret;
@@ -3184,7 +3186,7 @@ fail:
  static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt)
  {
         int rc;
-       unsigned long eip;
+       unsigned long eip = 0;
  
         rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
         if (rc != X86EMUL_CONTINUE)
@@ -3866,15 +3868,6 @@ static int check_cr_access(struct x86_emulate_ctxt *ctxt)
         return X86EMUL_CONTINUE;
  }
  
-static int check_dr7_gd(struct x86_emulate_ctxt *ctxt)
-{
-       unsigned long dr7;
-
-       ctxt->ops->get_dr(ctxt, 7, &dr7);
-
-       return dr7 & DR7_GD;
-}
-
  static int check_dr_read(struct x86_emulate_ctxt *ctxt)
  {
         int dr = ctxt->modrm_reg;
@@ -3887,10 +3880,10 @@ static int check_dr_read(struct x86_emulate_ctxt *ctxt)
         if ((cr4 & X86_CR4_DE) && (dr == 4 || dr == 5))
                 return emulate_ud(ctxt);
  
-       if (check_dr7_gd(ctxt)) {
+       if (ctxt->ops->get_dr(ctxt, 7) & DR7_GD) {
                 ulong dr6;
  
-               ctxt->ops->get_dr(ctxt, 6, &dr6);
+               dr6 = ctxt->ops->get_dr(ctxt, 6);
                 dr6 &= ~DR_TRAP_BITS;
                 dr6 |= DR6_BD | DR6_ACTIVE_LOW;
                 ctxt->ops->set_dr(ctxt, 6, dr6);
@@ -3962,7 +3955,7 @@ static int check_rdpmc(struct x86_emulate_ctxt *ctxt)
          * protected mode.
          */
         if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt)) ||
-           ctxt->ops->check_pmc(ctxt, rcx))
+           ctxt->ops->check_rdpmc_early(ctxt, rcx))
                 return emulate_gp(ctxt, 0);
  
         return X86EMUL_CONTINUE;
@@ -4505,11 +4498,11 @@ static const struct instr_dual instr_dual_0f_38_f1 = {
  };
  
  static const struct gprefix three_byte_0f_38_f0 = {
-       ID(0, &instr_dual_0f_38_f0), N, N, N
+       ID(0, &instr_dual_0f_38_f0), ID(0, &instr_dual_0f_38_f0), N, N
  };
  
  static const struct gprefix three_byte_0f_38_f1 = {
-       ID(0, &instr_dual_0f_38_f1), N, N, N
+       ID(0, &instr_dual_0f_38_f1), ID(0, &instr_dual_0f_38_f1), N, N
  };
  
  /*
@@ -5449,7 +5442,7 @@ twobyte_insn:
                 ctxt->dst.val = ops->get_cr(ctxt, ctxt->modrm_reg);
                 break;
         case 0x21: /* mov from dr to reg */
-               ops->get_dr(ctxt, ctxt->modrm_reg, &ctxt->dst.val);
+               ctxt->dst.val = ops->get_dr(ctxt, ctxt->modrm_reg);
                 break;
         case 0x40 ... 0x4f:     /* cmov */
                 if (test_cc(ctxt->b, ctxt->eflags))
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c

index 4943f6b2bbee491651bdacf288e4cdbda2e49dec..8a47f8541eab7098c991837c2b4e03c4822c445a 100644 (file)
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1322,6 +1322,56 @@ static bool hv_check_msr_access(struct kvm_vcpu_hv *hv_vcpu, u32 msr)
         return false;
  }
  
+#define KVM_HV_WIN2016_GUEST_ID 0x1040a00003839
+#define KVM_HV_WIN2016_GUEST_ID_MASK (~GENMASK_ULL(23, 16)) /* mask out the service version */
+
+/*
+ * Hyper-V enabled Windows Server 2016 SMP VMs fail to boot in !XSAVES && XSAVEC
+ * configuration.
+ * Such configuration can result from, for example, AMD Erratum 1386 workaround.
+ *
+ * Print a notice so users aren't left wondering what's suddenly gone wrong.
+ */
+static void __kvm_hv_xsaves_xsavec_maybe_warn(struct kvm_vcpu *vcpu)
+{
+       struct kvm *kvm = vcpu->kvm;
+       struct kvm_hv *hv = to_kvm_hv(kvm);
+
+       /* Check again under the hv_lock.  */
+       if (hv->xsaves_xsavec_checked)
+               return;
+
+       if ((hv->hv_guest_os_id & KVM_HV_WIN2016_GUEST_ID_MASK) !=
+           KVM_HV_WIN2016_GUEST_ID)
+               return;
+
+       hv->xsaves_xsavec_checked = true;
+
+       /* UP configurations aren't affected */
+       if (atomic_read(&kvm->online_vcpus) < 2)
+               return;
+
+       if (guest_cpuid_has(vcpu, X86_FEATURE_XSAVES) ||
+           !guest_cpuid_has(vcpu, X86_FEATURE_XSAVEC))
+               return;
+
+       pr_notice_ratelimited("Booting SMP Windows KVM VM with !XSAVES && XSAVEC. "
+                             "If it fails to boot try disabling XSAVEC in the VM config.\n");
+}
+
+void kvm_hv_xsaves_xsavec_maybe_warn(struct kvm_vcpu *vcpu)
+{
+       struct kvm_hv *hv = to_kvm_hv(vcpu->kvm);
+
+       if (!vcpu->arch.hyperv_enabled ||
+           hv->xsaves_xsavec_checked)
+               return;
+
+       mutex_lock(&hv->hv_lock);
+       __kvm_hv_xsaves_xsavec_maybe_warn(vcpu);
+       mutex_unlock(&hv->hv_lock);
+}
+
  static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
                              bool host)
  {
diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h

index 1dc0b6604526a1c629b3709d54c674f1ae1ce080..923e64903da9afeeff80f76062c45bd5ab076717 100644 (file)
--- a/arch/x86/kvm/hyperv.h
+++ b/arch/x86/kvm/hyperv.h
@@ -182,6 +182,8 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm,
                            struct pvclock_vcpu_time_info *hv_clock);
  void kvm_hv_request_tsc_page_update(struct kvm *kvm);
  
+void kvm_hv_xsaves_xsavec_maybe_warn(struct kvm_vcpu *vcpu);
+
  void kvm_hv_init_vm(struct kvm *kvm);
  void kvm_hv_destroy_vm(struct kvm *kvm);
  int kvm_hv_vcpu_init(struct kvm_vcpu *vcpu);
@@ -267,6 +269,7 @@ int kvm_hv_vcpu_flush_tlb(struct kvm_vcpu *vcpu);
  static inline void kvm_hv_setup_tsc_page(struct kvm *kvm,
                                          struct pvclock_vcpu_time_info *hv_clock) {}
  static inline void kvm_hv_request_tsc_page_update(struct kvm *kvm) {}
+static inline void kvm_hv_xsaves_xsavec_maybe_warn(struct kvm_vcpu *vcpu) {}
  static inline void kvm_hv_init_vm(struct kvm *kvm) {}
  static inline void kvm_hv_destroy_vm(struct kvm *kvm) {}
  static inline int kvm_hv_vcpu_init(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h

index e6d149825169dda3ace396ca979923c4a2d108e8..5382646162a38710f4c85bc7b0f28f1dc944ee06 100644 (file)
--- a/arch/x86/kvm/kvm_emulate.h
+++ b/arch/x86/kvm/kvm_emulate.h
@@ -203,12 +203,12 @@ struct x86_emulate_ops {
         ulong (*get_cr)(struct x86_emulate_ctxt *ctxt, int cr);
         int (*set_cr)(struct x86_emulate_ctxt *ctxt, int cr, ulong val);
         int (*cpl)(struct x86_emulate_ctxt *ctxt);
-       void (*get_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong *dest);
+       ulong (*get_dr)(struct x86_emulate_ctxt *ctxt, int dr);
         int (*set_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong value);
         int (*set_msr_with_filter)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 data);
         int (*get_msr_with_filter)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 *pdata);
         int (*get_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 *pdata);
-       int (*check_pmc)(struct x86_emulate_ctxt *ctxt, u32 pmc);
+       int (*check_rdpmc_early)(struct x86_emulate_ctxt *ctxt, u32 pmc);
         int (*read_pmc)(struct x86_emulate_ctxt *ctxt, u32 pmc, u64 *pdata);
         void (*halt)(struct x86_emulate_ctxt *ctxt);
         void (*wbinvd)(struct x86_emulate_ctxt *ctxt);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c

index 3242f3da2457671bafde8d5ad7823c9a3d3a07be..11b9a9bdc07a14587e9b904f27b1267d2002c2ed 100644 (file)
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -41,6 +41,7 @@
  #include "ioapic.h"
  #include "trace.h"
  #include "x86.h"
+#include "xen.h"
  #include "cpuid.h"
  #include "hyperv.h"
  #include "smm.h"
@@ -124,6 +125,9 @@ static inline int __apic_test_and_clear_vector(int vec, void *bitmap)
         return __test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
  }
  
+__read_mostly DEFINE_STATIC_KEY_FALSE(kvm_has_noapic_vcpu);
+EXPORT_SYMBOL_GPL(kvm_has_noapic_vcpu);
+
  __read_mostly DEFINE_STATIC_KEY_DEFERRED_FALSE(apic_hw_disabled, HZ);
  __read_mostly DEFINE_STATIC_KEY_DEFERRED_FALSE(apic_sw_disabled, HZ);
  
@@ -499,8 +503,10 @@ static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
         }
  
         /* Check if there are APF page ready requests pending */
-       if (enabled)
+       if (enabled) {
                 kvm_make_request(KVM_REQ_APF_READY, apic->vcpu);
+               kvm_xen_sw_enable_lapic(apic->vcpu);
+       }
  }
  
  static inline void kvm_apic_set_xapic_id(struct kvm_lapic *apic, u8 id)
@@ -2466,8 +2472,10 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu)
  {
         struct kvm_lapic *apic = vcpu->arch.apic;
  
-       if (!vcpu->arch.apic)
+       if (!vcpu->arch.apic) {
+               static_branch_dec(&kvm_has_noapic_vcpu);
                 return;
+       }
  
         hrtimer_cancel(&apic->lapic_timer.timer);
  
@@ -2809,6 +2817,11 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
  
         ASSERT(vcpu != NULL);
  
+       if (!irqchip_in_kernel(vcpu->kvm)) {
+               static_branch_inc(&kvm_has_noapic_vcpu);
+               return 0;
+       }
+
         apic = kzalloc(sizeof(*apic), GFP_KERNEL_ACCOUNT);
         if (!apic)
                 goto nomem;
@@ -2844,6 +2857,21 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
         static_branch_inc(&apic_sw_disabled.key); /* sw disabled at reset */
         kvm_iodevice_init(&apic->dev, &apic_mmio_ops);
  
+       /*
+        * Defer evaluating inhibits until the vCPU is first run, as this vCPU
+        * will not get notified of any changes until this vCPU is visible to
+        * other vCPUs (marked online and added to the set of vCPUs).
+        *
+        * Opportunistically mark APICv active as VMX in particularly is highly
+        * unlikely to have inhibits.  Ignore the current per-VM APICv state so
+        * that vCPU creation is guaranteed to run with a deterministic value,
+        * the request will ensure the vCPU gets the correct state before VM-Entry.
+        */
+       if (enable_apicv) {
+               apic->apicv_active = true;
+               kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
+       }
+
         return 0;
  nomem_free_apic:
         kfree(apic);
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c

index 2d6cdeab1f8a3e78306148d44a4665a1d51d8b1e..e5e2af69e24d87ceb6de3cee2f3dd2c855877aef 100644 (file)
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3575,10 +3575,14 @@ static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa,
         if (WARN_ON_ONCE(!sp))
                 return;
  
-       if (is_tdp_mmu_page(sp))
+       if (is_tdp_mmu_page(sp)) {
+               lockdep_assert_held_read(&kvm->mmu_lock);
                 kvm_tdp_mmu_put_root(kvm, sp);
-       else if (!--sp->root_count && sp->role.invalid)
-               kvm_mmu_prepare_zap_page(kvm, sp, invalid_list);
+       } else {
+               lockdep_assert_held_write(&kvm->mmu_lock);
+               if (!--sp->root_count && sp->role.invalid)
+                       kvm_mmu_prepare_zap_page(kvm, sp, invalid_list);
+       }
  
         *root_hpa = INVALID_PAGE;
  }
@@ -3587,6 +3591,7 @@ static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa,
  void kvm_mmu_free_roots(struct kvm *kvm, struct kvm_mmu *mmu,
                         ulong roots_to_free)
  {
+       bool is_tdp_mmu = tdp_mmu_enabled && mmu->root_role.direct;
         int i;
         LIST_HEAD(invalid_list);
         bool free_active_root;
@@ -3609,7 +3614,10 @@ void kvm_mmu_free_roots(struct kvm *kvm, struct kvm_mmu *mmu,
                         return;
         }
  
-       write_lock(&kvm->mmu_lock);
+       if (is_tdp_mmu)
+               read_lock(&kvm->mmu_lock);
+       else
+               write_lock(&kvm->mmu_lock);
  
         for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
                 if (roots_to_free & KVM_MMU_ROOT_PREVIOUS(i))
@@ -3635,8 +3643,13 @@ void kvm_mmu_free_roots(struct kvm *kvm, struct kvm_mmu *mmu,
                 mmu->root.pgd = 0;
         }
  
-       kvm_mmu_commit_zap_page(kvm, &invalid_list);
-       write_unlock(&kvm->mmu_lock);
+       if (is_tdp_mmu) {
+               read_unlock(&kvm->mmu_lock);
+               WARN_ON_ONCE(!list_empty(&invalid_list));
+       } else {
+               kvm_mmu_commit_zap_page(kvm, &invalid_list);
+               write_unlock(&kvm->mmu_lock);
+       }
  }
  EXPORT_SYMBOL_GPL(kvm_mmu_free_roots);
  
@@ -3693,15 +3706,15 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
         unsigned i;
         int r;
  
+       if (tdp_mmu_enabled)
+               return kvm_tdp_mmu_alloc_root(vcpu);
+
         write_lock(&vcpu->kvm->mmu_lock);
         r = make_mmu_pages_available(vcpu);
         if (r < 0)
                 goto out_unlock;
  
-       if (tdp_mmu_enabled) {
-               root = kvm_tdp_mmu_get_vcpu_root_hpa(vcpu);
-               mmu->root.hpa = root;
-       } else if (shadow_root_level >= PT64_ROOT_4LEVEL) {
+       if (shadow_root_level >= PT64_ROOT_4LEVEL) {
                 root = mmu_alloc_root(vcpu, 0, 0, shadow_root_level);
                 mmu->root.hpa = root;
         } else if (shadow_root_level == PT32E_ROOT_LEVEL) {
@@ -6997,9 +7010,7 @@ int kvm_mmu_vendor_module_init(void)
  
         kvm_mmu_reset_all_pte_masks();
  
-       pte_list_desc_cache = kmem_cache_create("pte_list_desc",
-                                           sizeof(struct pte_list_desc),
-                                           0, SLAB_ACCOUNT, NULL);
+       pte_list_desc_cache = KMEM_CACHE(pte_list_desc, SLAB_ACCOUNT);
         if (!pte_list_desc_cache)
                 goto out;
  
diff --git a/arch/x86/kvm/mmu/page_track.c b/arch/x86/kvm/mmu/page_track.c

index c87da11f3a049b75e2dc6cae714b37e868666252..f6448284c18e3e1a2820530a4919f151a10c997b 100644 (file)
--- a/arch/x86/kvm/mmu/page_track.c
+++ b/arch/x86/kvm/mmu/page_track.c
@@ -20,10 +20,23 @@
  #include "mmu_internal.h"
  #include "page_track.h"
  
+static bool kvm_external_write_tracking_enabled(struct kvm *kvm)
+{
+#ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING
+       /*
+        * Read external_write_tracking_enabled before related pointers.  Pairs
+        * with the smp_store_release in kvm_page_track_write_tracking_enable().
+        */
+       return smp_load_acquire(&kvm->arch.external_write_tracking_enabled);
+#else
+       return false;
+#endif
+}
+
  bool kvm_page_track_write_tracking_enabled(struct kvm *kvm)
  {
-       return IS_ENABLED(CONFIG_KVM_EXTERNAL_WRITE_TRACKING) ||
-              !tdp_enabled || kvm_shadow_root_allocated(kvm);
+       return kvm_external_write_tracking_enabled(kvm) ||
+              kvm_shadow_root_allocated(kvm) || !tdp_enabled;
  }
  
  void kvm_page_track_free_memslot(struct kvm_memory_slot *slot)
@@ -153,6 +166,50 @@ int kvm_page_track_init(struct kvm *kvm)
         return init_srcu_struct(&head->track_srcu);
  }
  
+static int kvm_enable_external_write_tracking(struct kvm *kvm)
+{
+       struct kvm_memslots *slots;
+       struct kvm_memory_slot *slot;
+       int r = 0, i, bkt;
+
+       mutex_lock(&kvm->slots_arch_lock);
+
+       /*
+        * Check for *any* write tracking user (not just external users) under
+        * lock.  This avoids unnecessary work, e.g. if KVM itself is using
+        * write tracking, or if two external users raced when registering.
+        */
+       if (kvm_page_track_write_tracking_enabled(kvm))
+               goto out_success;
+
+       for (i = 0; i < kvm_arch_nr_memslot_as_ids(kvm); i++) {
+               slots = __kvm_memslots(kvm, i);
+               kvm_for_each_memslot(slot, bkt, slots) {
+                       /*
+                        * Intentionally do NOT free allocations on failure to
+                        * avoid having to track which allocations were made
+                        * now versus when the memslot was created.  The
+                        * metadata is guaranteed to be freed when the slot is
+                        * freed, and will be kept/used if userspace retries
+                        * the failed ioctl() instead of killing the VM.
+                        */
+                       r = kvm_page_track_write_tracking_alloc(slot);
+                       if (r)
+                               goto out_unlock;
+               }
+       }
+
+out_success:
+       /*
+        * Ensure that external_write_tracking_enabled becomes true strictly
+        * after all the related pointers are set.
+        */
+       smp_store_release(&kvm->arch.external_write_tracking_enabled, true);
+out_unlock:
+       mutex_unlock(&kvm->slots_arch_lock);
+       return r;
+}
+
  /*
   * register the notifier so that event interception for the tracked guest
   * pages can be received.
@@ -161,10 +218,17 @@ int kvm_page_track_register_notifier(struct kvm *kvm,
                                      struct kvm_page_track_notifier_node *n)
  {
         struct kvm_page_track_notifier_head *head;
+       int r;
  
         if (!kvm || kvm->mm != current->mm)
                 return -ESRCH;
  
+       if (!kvm_external_write_tracking_enabled(kvm)) {
+               r = kvm_enable_external_write_tracking(kvm);
+               if (r)
+                       return r;
+       }
+
         kvm_get_kvm(kvm);
  
         head = &kvm->arch.track_notifier_head;
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c

index 6ae19b4ee5b1cb17d4ddda85197379cde425b03e..d078157e62aa4025e6a3a6411e0d6b118245f3b8 100644 (file)
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -149,11 +149,11 @@ static struct kvm_mmu_page *tdp_mmu_next_root(struct kvm *kvm,
   * If shared is set, this function is operating under the MMU lock in read
   * mode.
   */
-#define __for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, _only_valid)\
-       for (_root = tdp_mmu_next_root(_kvm, NULL, _only_valid);        \
-            ({ lockdep_assert_held(&(_kvm)->mmu_lock); }), _root;      \
-            _root = tdp_mmu_next_root(_kvm, _root, _only_valid))       \
-               if (kvm_mmu_page_as_id(_root) != _as_id) {              \
+#define __for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, _only_valid)   \
+       for (_root = tdp_mmu_next_root(_kvm, NULL, _only_valid);                \
+            ({ lockdep_assert_held(&(_kvm)->mmu_lock); }), _root;              \
+            _root = tdp_mmu_next_root(_kvm, _root, _only_valid))               \
+               if (_as_id >= 0 && kvm_mmu_page_as_id(_root) != _as_id) {       \
                 } else
  
  #define for_each_valid_tdp_mmu_root_yield_safe(_kvm, _root, _as_id)    \
@@ -171,12 +171,19 @@ static struct kvm_mmu_page *tdp_mmu_next_root(struct kvm *kvm,
   * Holding mmu_lock for write obviates the need for RCU protection as the list
   * is guaranteed to be stable.
   */
-#define for_each_tdp_mmu_root(_kvm, _root, _as_id)                     \
-       list_for_each_entry(_root, &_kvm->arch.tdp_mmu_roots, link)     \
-               if (kvm_lockdep_assert_mmu_lock_held(_kvm, false) &&    \
-                   kvm_mmu_page_as_id(_root) != _as_id) {              \
+#define __for_each_tdp_mmu_root(_kvm, _root, _as_id, _only_valid)              \
+       list_for_each_entry(_root, &_kvm->arch.tdp_mmu_roots, link)             \
+               if (kvm_lockdep_assert_mmu_lock_held(_kvm, false) &&            \
+                   ((_as_id >= 0 && kvm_mmu_page_as_id(_root) != _as_id) ||    \
+                    ((_only_valid) && (_root)->role.invalid))) {               \
                 } else
  
+#define for_each_tdp_mmu_root(_kvm, _root, _as_id)                     \
+       __for_each_tdp_mmu_root(_kvm, _root, _as_id, false)
+
+#define for_each_valid_tdp_mmu_root(_kvm, _root, _as_id)               \
+       __for_each_tdp_mmu_root(_kvm, _root, _as_id, true)
+
  static struct kvm_mmu_page *tdp_mmu_alloc_sp(struct kvm_vcpu *vcpu)
  {
         struct kvm_mmu_page *sp;
@@ -216,22 +223,41 @@ static void tdp_mmu_init_child_sp(struct kvm_mmu_page *child_sp,
         tdp_mmu_init_sp(child_sp, iter->sptep, iter->gfn, role);
  }
  
-hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu)
+int kvm_tdp_mmu_alloc_root(struct kvm_vcpu *vcpu)
  {
-       union kvm_mmu_page_role role = vcpu->arch.mmu->root_role;
+       struct kvm_mmu *mmu = vcpu->arch.mmu;
+       union kvm_mmu_page_role role = mmu->root_role;
+       int as_id = kvm_mmu_role_as_id(role);
         struct kvm *kvm = vcpu->kvm;
         struct kvm_mmu_page *root;
  
-       lockdep_assert_held_write(&kvm->mmu_lock);
+       /*
+        * Check for an existing root before acquiring the pages lock to avoid
+        * unnecessary serialization if multiple vCPUs are loading a new root.
+        * E.g. when bringing up secondary vCPUs, KVM will already have created
+        * a valid root on behalf of the primary vCPU.
+        */
+       read_lock(&kvm->mmu_lock);
+
+       for_each_valid_tdp_mmu_root_yield_safe(kvm, root, as_id) {
+               if (root->role.word == role.word)
+                       goto out_read_unlock;
+       }
+
+       spin_lock(&kvm->arch.tdp_mmu_pages_lock);
  
         /*
-        * Check for an existing root before allocating a new one.  Note, the
-        * role check prevents consuming an invalid root.
+        * Recheck for an existing root after acquiring the pages lock, another
+        * vCPU may have raced ahead and created a new usable root.  Manually
+        * walk the list of roots as the standard macros assume that the pages
+        * lock is *not* held.  WARN if grabbing a reference to a usable root
+        * fails, as the last reference to a root can only be put *after* the
+        * root has been invalidated, which requires holding mmu_lock for write.
          */
-       for_each_tdp_mmu_root(kvm, root, kvm_mmu_role_as_id(role)) {
+       list_for_each_entry(root, &kvm->arch.tdp_mmu_roots, link) {
                 if (root->role.word == role.word &&
-                   kvm_tdp_mmu_get_root(root))
-                       goto out;
+                   !WARN_ON_ONCE(!kvm_tdp_mmu_get_root(root)))
+                       goto out_spin_unlock;
         }
  
         root = tdp_mmu_alloc_sp(vcpu);
@@ -245,13 +271,20 @@ hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu)
          * is ultimately put by kvm_tdp_mmu_zap_invalidated_roots().
          */
         refcount_set(&root->tdp_mmu_root_count, 2);
-
-       spin_lock(&kvm->arch.tdp_mmu_pages_lock);
         list_add_rcu(&root->link, &kvm->arch.tdp_mmu_roots);
-       spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
  
-out:
-       return __pa(root->spt);
+out_spin_unlock:
+       spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
+out_read_unlock:
+       read_unlock(&kvm->mmu_lock);
+       /*
+        * Note, KVM_REQ_MMU_FREE_OBSOLETE_ROOTS will prevent entering the guest
+        * and actually consuming the root if it's invalidated after dropping
+        * mmu_lock, and the root can't be freed as this vCPU holds a reference.
+        */
+       mmu->root.hpa = __pa(root->spt);
+       mmu->root.pgd = 0;
+       return 0;
  }
  
  static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
@@ -734,15 +767,26 @@ static void tdp_mmu_zap_root(struct kvm *kvm, struct kvm_mmu_page *root,
         rcu_read_lock();
  
         /*
-        * To avoid RCU stalls due to recursively removing huge swaths of SPs,
-        * split the zap into two passes.  On the first pass, zap at the 1gb
-        * level, and then zap top-level SPs on the second pass.  "1gb" is not
-        * arbitrary, as KVM must be able to zap a 1gb shadow page without
-        * inducing a stall to allow in-place replacement with a 1gb hugepage.
+        * Zap roots in multiple passes of decreasing granularity, i.e. zap at
+        * 4KiB=>2MiB=>1GiB=>root, in order to better honor need_resched() (all
+        * preempt models) or mmu_lock contention (full or real-time models).
+        * Zapping at finer granularity marginally increases the total time of
+        * the zap, but in most cases the zap itself isn't latency sensitive.
          *
-        * Because zapping a SP recurses on its children, stepping down to
-        * PG_LEVEL_4K in the iterator itself is unnecessary.
+        * If KVM is configured to prove the MMU, skip the 4KiB and 2MiB zaps
+        * in order to mimic the page fault path, which can replace a 1GiB page
+        * table with an equivalent 1GiB hugepage, i.e. can get saddled with
+        * zapping a 1GiB region that's fully populated with 4KiB SPTEs.  This
+        * allows verifying that KVM can safely zap 1GiB regions, e.g. without
+        * inducing RCU stalls, without relying on a relatively rare event
+        * (zapping roots is orders of magnitude more common).  Note, because
+        * zapping a SP recurses on its children, stepping down to PG_LEVEL_4K
+        * in the iterator itself is unnecessary.
          */
+       if (!IS_ENABLED(CONFIG_KVM_PROVE_MMU)) {
+               __tdp_mmu_zap_root(kvm, root, shared, PG_LEVEL_4K);
+               __tdp_mmu_zap_root(kvm, root, shared, PG_LEVEL_2M);
+       }
         __tdp_mmu_zap_root(kvm, root, shared, PG_LEVEL_1G);
         __tdp_mmu_zap_root(kvm, root, shared, root->role.level);
  
@@ -800,7 +844,13 @@ static bool tdp_mmu_zap_leafs(struct kvm *kvm, struct kvm_mmu_page *root,
                         continue;
  
                 tdp_mmu_iter_set_spte(kvm, &iter, 0);
-               flush = true;
+
+               /*
+                * Zappings SPTEs in invalid roots doesn't require a TLB flush,
+                * see kvm_tdp_mmu_zap_invalidated_roots() for details.
+                */
+               if (!root->role.invalid)
+                       flush = true;
         }
  
         rcu_read_unlock();
@@ -813,16 +863,16 @@ static bool tdp_mmu_zap_leafs(struct kvm *kvm, struct kvm_mmu_page *root,
  }
  
  /*
- * Zap leaf SPTEs for the range of gfns, [start, end), for all roots. Returns
- * true if a TLB flush is needed before releasing the MMU lock, i.e. if one or
- * more SPTEs were zapped since the MMU lock was last acquired.
+ * Zap leaf SPTEs for the range of gfns, [start, end), for all *VALID** roots.
+ * Returns true if a TLB flush is needed before releasing the MMU lock, i.e. if
+ * one or more SPTEs were zapped since the MMU lock was last acquired.
   */
  bool kvm_tdp_mmu_zap_leafs(struct kvm *kvm, gfn_t start, gfn_t end, bool flush)
  {
         struct kvm_mmu_page *root;
  
         lockdep_assert_held_write(&kvm->mmu_lock);
-       for_each_tdp_mmu_root_yield_safe(kvm, root)
+       for_each_valid_tdp_mmu_root_yield_safe(kvm, root, -1)
                 flush = tdp_mmu_zap_leafs(kvm, root, start, end, true, flush);
  
         return flush;
@@ -896,7 +946,7 @@ void kvm_tdp_mmu_zap_invalidated_roots(struct kvm *kvm)
   * the VM is being destroyed).
   *
   * Note, kvm_tdp_mmu_zap_invalidated_roots() is gifted the TDP MMU's reference.
- * See kvm_tdp_mmu_get_vcpu_root_hpa().
+ * See kvm_tdp_mmu_alloc_root().
   */
  void kvm_tdp_mmu_invalidate_all_roots(struct kvm *kvm)
  {
@@ -1622,7 +1672,7 @@ void kvm_tdp_mmu_clear_dirty_pt_masked(struct kvm *kvm,
  {
         struct kvm_mmu_page *root;
  
-       for_each_tdp_mmu_root(kvm, root, slot->as_id)
+       for_each_valid_tdp_mmu_root(kvm, root, slot->as_id)
                 clear_dirty_pt_masked(kvm, root, gfn, mask, wrprot);
  }
  
@@ -1740,7 +1790,7 @@ bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm,
         bool spte_set = false;
  
         lockdep_assert_held_write(&kvm->mmu_lock);
-       for_each_tdp_mmu_root(kvm, root, slot->as_id)
+       for_each_valid_tdp_mmu_root(kvm, root, slot->as_id)
                 spte_set |= write_protect_gfn(kvm, root, gfn, min_level);
  
         return spte_set;
diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h

index 20d97aa46c490fff98f9d3a6cbc116935d71a726..6e1ea04ca885e5691760326f445163bbf8447cb2 100644 (file)
--- a/arch/x86/kvm/mmu/tdp_mmu.h
+++ b/arch/x86/kvm/mmu/tdp_mmu.h
@@ -10,7 +10,7 @@
  void kvm_mmu_init_tdp_mmu(struct kvm *kvm);
  void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm);
  
-hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu);
+int kvm_tdp_mmu_alloc_root(struct kvm_vcpu *vcpu);
  
  __must_check static inline bool kvm_tdp_mmu_get_root(struct kvm_mmu_page *root)
  {
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c

index 87cc6c8809ad88898894bd0ea6199ab70e2a91ac..c397b28e3d1b680788249daa32f36c12c80bd1a1 100644 (file)
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -29,6 +29,9 @@
  struct x86_pmu_capability __read_mostly kvm_pmu_cap;
  EXPORT_SYMBOL_GPL(kvm_pmu_cap);
  
+struct kvm_pmu_emulated_event_selectors __read_mostly kvm_pmu_eventsel;
+EXPORT_SYMBOL_GPL(kvm_pmu_eventsel);
+
  /* Precise Distribution of Instructions Retired (PDIR) */
  static const struct x86_cpu_id vmx_pebs_pdir_cpu[] = {
         X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, NULL),
@@ -67,7 +70,7 @@ static const struct x86_cpu_id vmx_pebs_pdist_cpu[] = {
   *        all perf counters (both gp and fixed). The mapping relationship
   *        between pmc and perf counters is as the following:
   *        * Intel: [0 .. KVM_INTEL_PMC_MAX_GENERIC-1] <=> gp counters
- *                 [INTEL_PMC_IDX_FIXED .. INTEL_PMC_IDX_FIXED + 2] <=> fixed
+ *                 [KVM_FIXED_PMC_BASE_IDX .. KVM_FIXED_PMC_BASE_IDX + 2] <=> fixed
   *        * AMD:   [0 .. AMD64_NUM_COUNTERS-1] and, for families 15H
   *          and later, [0 .. AMD64_NUM_COUNTERS_CORE-1] <=> gp counters
   */
@@ -411,7 +414,7 @@ static bool is_gp_event_allowed(struct kvm_x86_pmu_event_filter *f,
  static bool is_fixed_event_allowed(struct kvm_x86_pmu_event_filter *filter,
                                    int idx)
  {
-       int fixed_idx = idx - INTEL_PMC_IDX_FIXED;
+       int fixed_idx = idx - KVM_FIXED_PMC_BASE_IDX;
  
         if (filter->action == KVM_PMU_EVENT_DENY &&
             test_bit(fixed_idx, (ulong *)&filter->fixed_counter_bitmap))
@@ -441,11 +444,10 @@ static bool check_pmu_event_filter(struct kvm_pmc *pmc)
  static bool pmc_event_is_allowed(struct kvm_pmc *pmc)
  {
         return pmc_is_globally_enabled(pmc) && pmc_speculative_in_use(pmc) &&
-              static_call(kvm_x86_pmu_hw_event_available)(pmc) &&
                check_pmu_event_filter(pmc);
  }
  
-static void reprogram_counter(struct kvm_pmc *pmc)
+static int reprogram_counter(struct kvm_pmc *pmc)
  {
         struct kvm_pmu *pmu = pmc_to_pmu(pmc);
         u64 eventsel = pmc->eventsel;
@@ -456,7 +458,7 @@ static void reprogram_counter(struct kvm_pmc *pmc)
         emulate_overflow = pmc_pause_counter(pmc);
  
         if (!pmc_event_is_allowed(pmc))
-               goto reprogram_complete;
+               return 0;
  
         if (emulate_overflow)
                 __kvm_perf_overflow(pmc, false);
@@ -466,7 +468,7 @@ static void reprogram_counter(struct kvm_pmc *pmc)
  
         if (pmc_is_fixed(pmc)) {
                 fixed_ctr_ctrl = fixed_ctrl_field(pmu->fixed_ctr_ctrl,
-                                                 pmc->idx - INTEL_PMC_IDX_FIXED);
+                                                 pmc->idx - KVM_FIXED_PMC_BASE_IDX);
                 if (fixed_ctr_ctrl & 0x1)
                         eventsel |= ARCH_PERFMON_EVENTSEL_OS;
                 if (fixed_ctr_ctrl & 0x2)
@@ -477,43 +479,45 @@ static void reprogram_counter(struct kvm_pmc *pmc)
         }
  
         if (pmc->current_config == new_config && pmc_resume_counter(pmc))
-               goto reprogram_complete;
+               return 0;
  
         pmc_release_perf_event(pmc);
  
         pmc->current_config = new_config;
  
-       /*
-        * If reprogramming fails, e.g. due to contention, leave the counter's
-        * regprogram bit set, i.e. opportunistically try again on the next PMU
-        * refresh.  Don't make a new request as doing so can stall the guest
-        * if reprogramming repeatedly fails.
-        */
-       if (pmc_reprogram_counter(pmc, PERF_TYPE_RAW,
-                                 (eventsel & pmu->raw_event_mask),
-                                 !(eventsel & ARCH_PERFMON_EVENTSEL_USR),
-                                 !(eventsel & ARCH_PERFMON_EVENTSEL_OS),
-                                 eventsel & ARCH_PERFMON_EVENTSEL_INT))
-               return;
-
-reprogram_complete:
-       clear_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->reprogram_pmi);
+       return pmc_reprogram_counter(pmc, PERF_TYPE_RAW,
+                                    (eventsel & pmu->raw_event_mask),
+                                    !(eventsel & ARCH_PERFMON_EVENTSEL_USR),
+                                    !(eventsel & ARCH_PERFMON_EVENTSEL_OS),
+                                    eventsel & ARCH_PERFMON_EVENTSEL_INT);
  }
  
  void kvm_pmu_handle_event(struct kvm_vcpu *vcpu)
  {
+       DECLARE_BITMAP(bitmap, X86_PMC_IDX_MAX);
         struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
+       struct kvm_pmc *pmc;
         int bit;
  
-       for_each_set_bit(bit, pmu->reprogram_pmi, X86_PMC_IDX_MAX) {
-               struct kvm_pmc *pmc = static_call(kvm_x86_pmu_pmc_idx_to_pmc)(pmu, bit);
+       bitmap_copy(bitmap, pmu->reprogram_pmi, X86_PMC_IDX_MAX);
  
-               if (unlikely(!pmc)) {
-                       clear_bit(bit, pmu->reprogram_pmi);
-                       continue;
-               }
+       /*
+        * The reprogramming bitmap can be written asynchronously by something
+        * other than the task that holds vcpu->mutex, take care to clear only
+        * the bits that will actually processed.
+        */
+       BUILD_BUG_ON(sizeof(bitmap) != sizeof(atomic64_t));
+       atomic64_andnot(*(s64 *)bitmap, &pmu->__reprogram_pmi);
  
-               reprogram_counter(pmc);
+       kvm_for_each_pmc(pmu, pmc, bit, bitmap) {
+               /*
+                * If reprogramming fails, e.g. due to contention, re-set the
+                * regprogram bit set, i.e. opportunistically try again on the
+                * next PMU refresh.  Don't make a new request as doing so can
+                * stall the guest if reprogramming repeatedly fails.
+                */
+               if (reprogram_counter(pmc))
+                       set_bit(pmc->idx, pmu->reprogram_pmi);
         }
  
         /*
@@ -525,10 +529,20 @@ void kvm_pmu_handle_event(struct kvm_vcpu *vcpu)
                 kvm_pmu_cleanup(vcpu);
  }
  
-/* check if idx is a valid index to access PMU */
-bool kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
+int kvm_pmu_check_rdpmc_early(struct kvm_vcpu *vcpu, unsigned int idx)
  {
-       return static_call(kvm_x86_pmu_is_valid_rdpmc_ecx)(vcpu, idx);
+       /*
+        * On Intel, VMX interception has priority over RDPMC exceptions that
+        * aren't already handled by the emulator, i.e. there are no additional
+        * check needed for Intel PMUs.
+        *
+        * On AMD, _all_ exceptions on RDPMC have priority over SVM intercepts,
+        * i.e. an invalid PMC results in a #GP, not #VMEXIT.
+        */
+       if (!kvm_pmu_ops.check_rdpmc_early)
+               return 0;
+
+       return static_call(kvm_x86_pmu_check_rdpmc_early)(vcpu, idx);
  }
  
  bool is_vmware_backdoor_pmc(u32 pmc_idx)
@@ -567,10 +581,9 @@ static int kvm_pmu_rdpmc_vmware(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
  
  int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
  {
-       bool fast_mode = idx & (1u << 31);
         struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
         struct kvm_pmc *pmc;
-       u64 mask = fast_mode ? ~0u : ~0ull;
+       u64 mask = ~0ull;
  
         if (!pmu->version)
                 return 1;
@@ -716,11 +729,7 @@ static void kvm_pmu_reset(struct kvm_vcpu *vcpu)
  
         bitmap_zero(pmu->reprogram_pmi, X86_PMC_IDX_MAX);
  
-       for_each_set_bit(i, pmu->all_valid_pmc_idx, X86_PMC_IDX_MAX) {
-               pmc = static_call(kvm_x86_pmu_pmc_idx_to_pmc)(pmu, i);
-               if (!pmc)
-                       continue;
-
+       kvm_for_each_pmc(pmu, pmc, i, pmu->all_valid_pmc_idx) {
                 pmc_stop_counter(pmc);
                 pmc->counter = 0;
                 pmc->emulated_counter = 0;
@@ -741,6 +750,8 @@ static void kvm_pmu_reset(struct kvm_vcpu *vcpu)
   */
  void kvm_pmu_refresh(struct kvm_vcpu *vcpu)
  {
+       struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
+
         if (KVM_BUG_ON(kvm_vcpu_has_run(vcpu), vcpu->kvm))
                 return;
  
@@ -750,8 +761,22 @@ void kvm_pmu_refresh(struct kvm_vcpu *vcpu)
          */
         kvm_pmu_reset(vcpu);
  
-       bitmap_zero(vcpu_to_pmu(vcpu)->all_valid_pmc_idx, X86_PMC_IDX_MAX);
-       static_call(kvm_x86_pmu_refresh)(vcpu);
+       pmu->version = 0;
+       pmu->nr_arch_gp_counters = 0;
+       pmu->nr_arch_fixed_counters = 0;
+       pmu->counter_bitmask[KVM_PMC_GP] = 0;
+       pmu->counter_bitmask[KVM_PMC_FIXED] = 0;
+       pmu->reserved_bits = 0xffffffff00200000ull;
+       pmu->raw_event_mask = X86_RAW_EVENT_MASK;
+       pmu->global_ctrl_mask = ~0ull;
+       pmu->global_status_mask = ~0ull;
+       pmu->fixed_ctr_ctrl_mask = ~0ull;
+       pmu->pebs_enable_mask = ~0ull;
+       pmu->pebs_data_cfg_mask = ~0ull;
+       bitmap_zero(pmu->all_valid_pmc_idx, X86_PMC_IDX_MAX);
+
+       if (vcpu->kvm->arch.enable_pmu)
+               static_call(kvm_x86_pmu_refresh)(vcpu);
  }
  
  void kvm_pmu_init(struct kvm_vcpu *vcpu)
@@ -776,10 +801,8 @@ void kvm_pmu_cleanup(struct kvm_vcpu *vcpu)
         bitmap_andnot(bitmask, pmu->all_valid_pmc_idx,
                       pmu->pmc_in_use, X86_PMC_IDX_MAX);
  
-       for_each_set_bit(i, bitmask, X86_PMC_IDX_MAX) {
-               pmc = static_call(kvm_x86_pmu_pmc_idx_to_pmc)(pmu, i);
-
-               if (pmc && pmc->perf_event && !pmc_speculative_in_use(pmc))
+       kvm_for_each_pmc(pmu, pmc, i, bitmask) {
+               if (pmc->perf_event && !pmc_speculative_in_use(pmc))
                         pmc_stop_counter(pmc);
         }
  
@@ -799,13 +822,6 @@ static void kvm_pmu_incr_counter(struct kvm_pmc *pmc)
         kvm_pmu_request_counter_reprogram(pmc);
  }
  
-static inline bool eventsel_match_perf_hw_id(struct kvm_pmc *pmc,
-       unsigned int perf_hw_id)
-{
-       return !((pmc->eventsel ^ perf_get_hw_event_config(perf_hw_id)) &
-               AMD64_RAW_EVENT_MASK_NB);
-}
-
  static inline bool cpl_is_matched(struct kvm_pmc *pmc)
  {
         bool select_os, select_user;
@@ -817,29 +833,56 @@ static inline bool cpl_is_matched(struct kvm_pmc *pmc)
                 select_user = config & ARCH_PERFMON_EVENTSEL_USR;
         } else {
                 config = fixed_ctrl_field(pmc_to_pmu(pmc)->fixed_ctr_ctrl,
-                                         pmc->idx - INTEL_PMC_IDX_FIXED);
+                                         pmc->idx - KVM_FIXED_PMC_BASE_IDX);
                 select_os = config & 0x1;
                 select_user = config & 0x2;
         }
  
+       /*
+        * Skip the CPL lookup, which isn't free on Intel, if the result will
+        * be the same regardless of the CPL.
+        */
+       if (select_os == select_user)
+               return select_os;
+
         return (static_call(kvm_x86_get_cpl)(pmc->vcpu) == 0) ? select_os : select_user;
  }
  
-void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 perf_hw_id)
+void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 eventsel)
  {
+       DECLARE_BITMAP(bitmap, X86_PMC_IDX_MAX);
         struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
         struct kvm_pmc *pmc;
         int i;
  
-       for_each_set_bit(i, pmu->all_valid_pmc_idx, X86_PMC_IDX_MAX) {
-               pmc = static_call(kvm_x86_pmu_pmc_idx_to_pmc)(pmu, i);
+       BUILD_BUG_ON(sizeof(pmu->global_ctrl) * BITS_PER_BYTE != X86_PMC_IDX_MAX);
  
-               if (!pmc || !pmc_event_is_allowed(pmc))
+       if (!kvm_pmu_has_perf_global_ctrl(pmu))
+               bitmap_copy(bitmap, pmu->all_valid_pmc_idx, X86_PMC_IDX_MAX);
+       else if (!bitmap_and(bitmap, pmu->all_valid_pmc_idx,
+                            (unsigned long *)&pmu->global_ctrl, X86_PMC_IDX_MAX))
+               return;
+
+       kvm_for_each_pmc(pmu, pmc, i, bitmap) {
+               /*
+                * Ignore checks for edge detect (all events currently emulated
+                * but KVM are always rising edges), pin control (unsupported
+                * by modern CPUs), and counter mask and its invert flag (KVM
+                * doesn't emulate multiple events in a single clock cycle).
+                *
+                * Note, the uppermost nibble of AMD's mask overlaps Intel's
+                * IN_TX (bit 32) and IN_TXCP (bit 33), as well as two reserved
+                * bits (bits 35:34).  Checking the "in HLE/RTM transaction"
+                * flags is correct as the vCPU can't be in a transaction if
+                * KVM is emulating an instruction.  Checking the reserved bits
+                * might be wrong if they are defined in the future, but so
+                * could ignoring them, so do the simple thing for now.
+                */
+               if (((pmc->eventsel ^ eventsel) & AMD64_RAW_EVENT_MASK_NB) ||
+                   !pmc_event_is_allowed(pmc) || !cpl_is_matched(pmc))
                         continue;
  
-               /* Ignore checks for edge detect, pin control, invert and CMASK bits */
-               if (eventsel_match_perf_hw_id(pmc, perf_hw_id) && cpl_is_matched(pmc))
-                       kvm_pmu_incr_counter(pmc);
+               kvm_pmu_incr_counter(pmc);
         }
  }
  EXPORT_SYMBOL_GPL(kvm_pmu_trigger_event);
diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h

index 7caeb3d8d4fd1739bba12b0d133185fda8a041df..4d52b0b539bacf70821febdcb7754996eb7e389b 100644 (file)
--- a/arch/x86/kvm/pmu.h
+++ b/arch/x86/kvm/pmu.h
@@ -4,6 +4,8 @@
  
  #include <linux/nospec.h>
  
+#include <asm/kvm_host.h>
+
  #define vcpu_to_pmu(vcpu) (&(vcpu)->arch.pmu)
  #define pmu_to_vcpu(pmu)  (container_of((pmu), struct kvm_vcpu, arch.pmu))
  #define pmc_to_pmu(pmc)   (&(pmc)->vcpu->arch.pmu)
@@ -18,13 +20,18 @@
  #define VMWARE_BACKDOOR_PMC_REAL_TIME          0x10001
  #define VMWARE_BACKDOOR_PMC_APPARENT_TIME      0x10002
  
+#define KVM_FIXED_PMC_BASE_IDX INTEL_PMC_IDX_FIXED
+
+struct kvm_pmu_emulated_event_selectors {
+       u64 INSTRUCTIONS_RETIRED;
+       u64 BRANCH_INSTRUCTIONS_RETIRED;
+};
+
  struct kvm_pmu_ops {
-       bool (*hw_event_available)(struct kvm_pmc *pmc);
-       struct kvm_pmc *(*pmc_idx_to_pmc)(struct kvm_pmu *pmu, int pmc_idx);
         struct kvm_pmc *(*rdpmc_ecx_to_pmc)(struct kvm_vcpu *vcpu,
                 unsigned int idx, u64 *mask);
         struct kvm_pmc *(*msr_idx_to_pmc)(struct kvm_vcpu *vcpu, u32 msr);
-       bool (*is_valid_rdpmc_ecx)(struct kvm_vcpu *vcpu, unsigned int idx);
+       int (*check_rdpmc_early)(struct kvm_vcpu *vcpu, unsigned int idx);
         bool (*is_valid_msr)(struct kvm_vcpu *vcpu, u32 msr);
         int (*get_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
         int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
@@ -55,6 +62,38 @@ static inline bool kvm_pmu_has_perf_global_ctrl(struct kvm_pmu *pmu)
         return pmu->version > 1;
  }
  
+/*
+ * KVM tracks all counters in 64-bit bitmaps, with general purpose counters
+ * mapped to bits 31:0 and fixed counters mapped to 63:32, e.g. fixed counter 0
+ * is tracked internally via index 32.  On Intel, (AMD doesn't support fixed
+ * counters), this mirrors how fixed counters are mapped to PERF_GLOBAL_CTRL
+ * and similar MSRs, i.e. tracking fixed counters at base index 32 reduces the
+ * amounter of boilerplate needed to iterate over PMCs *and* simplifies common
+ * enabling/disable/reset operations.
+ *
+ * WARNING!  This helper is only for lookups that are initiated by KVM, it is
+ * NOT safe for guest lookups, e.g. will do the wrong thing if passed a raw
+ * ECX value from RDPMC (fixed counters are accessed by setting bit 30 in ECX
+ * for RDPMC, not by adding 32 to the fixed counter index).
+ */
+static inline struct kvm_pmc *kvm_pmc_idx_to_pmc(struct kvm_pmu *pmu, int idx)
+{
+       if (idx < pmu->nr_arch_gp_counters)
+               return &pmu->gp_counters[idx];
+
+       idx -= KVM_FIXED_PMC_BASE_IDX;
+       if (idx >= 0 && idx < pmu->nr_arch_fixed_counters)
+               return &pmu->fixed_counters[idx];
+
+       return NULL;
+}
+
+#define kvm_for_each_pmc(pmu, pmc, i, bitmap)                  \
+       for_each_set_bit(i, bitmap, X86_PMC_IDX_MAX)            \
+               if (!(pmc = kvm_pmc_idx_to_pmc(pmu, i)))        \
+                       continue;                               \
+               else                                            \
+
  static inline u64 pmc_bitmask(struct kvm_pmc *pmc)
  {
         struct kvm_pmu *pmu = pmc_to_pmu(pmc);
@@ -131,12 +170,13 @@ static inline bool pmc_speculative_in_use(struct kvm_pmc *pmc)
  
         if (pmc_is_fixed(pmc))
                 return fixed_ctrl_field(pmu->fixed_ctr_ctrl,
-                                       pmc->idx - INTEL_PMC_IDX_FIXED) & 0x3;
+                                       pmc->idx - KVM_FIXED_PMC_BASE_IDX) & 0x3;
  
         return pmc->eventsel & ARCH_PERFMON_EVENTSEL_ENABLE;
  }
  
  extern struct x86_pmu_capability kvm_pmu_cap;
+extern struct kvm_pmu_emulated_event_selectors kvm_pmu_eventsel;
  
  static inline void kvm_init_pmu_capability(const struct kvm_pmu_ops *pmu_ops)
  {
@@ -178,6 +218,11 @@ static inline void kvm_init_pmu_capability(const struct kvm_pmu_ops *pmu_ops)
                                           pmu_ops->MAX_NR_GP_COUNTERS);
         kvm_pmu_cap.num_counters_fixed = min(kvm_pmu_cap.num_counters_fixed,
                                              KVM_PMC_MAX_FIXED);
+
+       kvm_pmu_eventsel.INSTRUCTIONS_RETIRED =
+               perf_get_hw_event_config(PERF_COUNT_HW_INSTRUCTIONS);
+       kvm_pmu_eventsel.BRANCH_INSTRUCTIONS_RETIRED =
+               perf_get_hw_event_config(PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
  }
  
  static inline void kvm_pmu_request_counter_reprogram(struct kvm_pmc *pmc)
@@ -216,7 +261,7 @@ static inline bool pmc_is_globally_enabled(struct kvm_pmc *pmc)
  void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu);
  void kvm_pmu_handle_event(struct kvm_vcpu *vcpu);
  int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data);
-bool kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx);
+int kvm_pmu_check_rdpmc_early(struct kvm_vcpu *vcpu, unsigned int idx);
  bool kvm_pmu_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr);
  int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
  int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
@@ -225,7 +270,7 @@ void kvm_pmu_init(struct kvm_vcpu *vcpu);
  void kvm_pmu_cleanup(struct kvm_vcpu *vcpu);
  void kvm_pmu_destroy(struct kvm_vcpu *vcpu);
  int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp);
-void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 perf_hw_id);
+void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 eventsel);
  
  bool is_vmware_backdoor_pmc(u32 pmc_idx);
  
diff --git a/arch/x86/kvm/smm.c b/arch/x86/kvm/smm.c

index dc3d95fdca7d337ef4305123b8f439d2103c8b30..d06d43d8d2aa462e4f789b5ae31c4d6d36031cb3 100644 (file)
--- a/arch/x86/kvm/smm.c
+++ b/arch/x86/kvm/smm.c
@@ -184,7 +184,6 @@ static void enter_smm_save_state_32(struct kvm_vcpu *vcpu,
                                     struct kvm_smram_state_32 *smram)
  {
         struct desc_ptr dt;
-       unsigned long val;
         int i;
  
         smram->cr0     = kvm_read_cr0(vcpu);
@@ -195,10 +194,8 @@ static void enter_smm_save_state_32(struct kvm_vcpu *vcpu,
         for (i = 0; i < 8; i++)
                 smram->gprs[i] = kvm_register_read_raw(vcpu, i);
  
-       kvm_get_dr(vcpu, 6, &val);
-       smram->dr6     = (u32)val;
-       kvm_get_dr(vcpu, 7, &val);
-       smram->dr7     = (u32)val;
+       smram->dr6     = (u32)vcpu->arch.dr6;
+       smram->dr7     = (u32)vcpu->arch.dr7;
  
         enter_smm_save_seg_32(vcpu, &smram->tr, &smram->tr_sel, VCPU_SREG_TR);
         enter_smm_save_seg_32(vcpu, &smram->ldtr, &smram->ldtr_sel, VCPU_SREG_LDTR);
@@ -231,7 +228,6 @@ static void enter_smm_save_state_64(struct kvm_vcpu *vcpu,
                                     struct kvm_smram_state_64 *smram)
  {
         struct desc_ptr dt;
-       unsigned long val;
         int i;
  
         for (i = 0; i < 16; i++)
@@ -240,11 +236,8 @@ static void enter_smm_save_state_64(struct kvm_vcpu *vcpu,
         smram->rip    = kvm_rip_read(vcpu);
         smram->rflags = kvm_get_rflags(vcpu);
  
-
-       kvm_get_dr(vcpu, 6, &val);
-       smram->dr6 = val;
-       kvm_get_dr(vcpu, 7, &val);
-       smram->dr7 = val;
+       smram->dr6 = vcpu->arch.dr6;
+       smram->dr7 = vcpu->arch.dr7;
  
         smram->cr0 = kvm_read_cr0(vcpu);
         smram->cr3 = kvm_read_cr3(vcpu);
diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c

index b6a7ad4d69145096d55e610ef8d789b87c2a5fb0..dfcc38bd97d34f4c618bc88f202cc4dd627f00ea 100644 (file)
--- a/arch/x86/kvm/svm/pmu.c
+++ b/arch/x86/kvm/svm/pmu.c
@@ -25,7 +25,7 @@ enum pmu_type {
         PMU_TYPE_EVNTSEL,
  };
  
-static struct kvm_pmc *amd_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx)
+static struct kvm_pmc *amd_pmu_get_pmc(struct kvm_pmu *pmu, int pmc_idx)
  {
         unsigned int num_counters = pmu->nr_arch_gp_counters;
  
@@ -70,28 +70,24 @@ static inline struct kvm_pmc *get_gp_pmc_amd(struct kvm_pmu *pmu, u32 msr,
                 return NULL;
         }
  
-       return amd_pmc_idx_to_pmc(pmu, idx);
+       return amd_pmu_get_pmc(pmu, idx);
  }
  
-static bool amd_hw_event_available(struct kvm_pmc *pmc)
-{
-       return true;
-}
-
-static bool amd_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
+static int amd_check_rdpmc_early(struct kvm_vcpu *vcpu, unsigned int idx)
  {
         struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
  
-       idx &= ~(3u << 30);
+       if (idx >= pmu->nr_arch_gp_counters)
+               return -EINVAL;
  
-       return idx < pmu->nr_arch_gp_counters;
+       return 0;
  }
  
  /* idx is the ECX register of RDPMC instruction */
  static struct kvm_pmc *amd_rdpmc_ecx_to_pmc(struct kvm_vcpu *vcpu,
         unsigned int idx, u64 *mask)
  {
-       return amd_pmc_idx_to_pmc(vcpu_to_pmu(vcpu), idx & ~(3u << 30));
+       return amd_pmu_get_pmc(vcpu_to_pmu(vcpu), idx);
  }
  
  static struct kvm_pmc *amd_msr_idx_to_pmc(struct kvm_vcpu *vcpu, u32 msr)
@@ -233,11 +229,9 @@ static void amd_pmu_init(struct kvm_vcpu *vcpu)
  }
  
  struct kvm_pmu_ops amd_pmu_ops __initdata = {
-       .hw_event_available = amd_hw_event_available,
-       .pmc_idx_to_pmc = amd_pmc_idx_to_pmc,
         .rdpmc_ecx_to_pmc = amd_rdpmc_ecx_to_pmc,
         .msr_idx_to_pmc = amd_msr_idx_to_pmc,
-       .is_valid_rdpmc_ecx = amd_is_valid_rdpmc_ecx,
+       .check_rdpmc_early = amd_check_rdpmc_early,
         .is_valid_msr = amd_is_valid_msr,
         .get_msr = amd_pmu_get_msr,
         .set_msr = amd_pmu_set_msr,
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c

index e90b429c84f158bdd8d4348172d56eac1e80763b..b9096bb79c0096a9138f049dfe2695a950a409a6 100644 (file)
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -2735,7 +2735,6 @@ static int dr_interception(struct kvm_vcpu *vcpu)
  {
         struct vcpu_svm *svm = to_svm(vcpu);
         int reg, dr;
-       unsigned long val;
         int err = 0;
  
         /*
@@ -2763,11 +2762,9 @@ static int dr_interception(struct kvm_vcpu *vcpu)
         dr = svm->vmcb->control.exit_code - SVM_EXIT_READ_DR0;
         if (dr >= 16) { /* mov to DRn  */
                 dr -= 16;
-               val = kvm_register_read(vcpu, reg);
-               err = kvm_set_dr(vcpu, dr, val);
+               err = kvm_set_dr(vcpu, dr, kvm_register_read(vcpu, reg));
         } else {
-               kvm_get_dr(vcpu, dr, &val);
-               kvm_register_write(vcpu, reg, val);
+               kvm_register_write(vcpu, reg, kvm_get_dr(vcpu, dr));
         }
  
         return kvm_complete_insn_gp(vcpu, err);
@@ -4092,6 +4089,9 @@ static int svm_vcpu_pre_run(struct kvm_vcpu *vcpu)
  
  static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
  {
+       if (is_guest_mode(vcpu))
+               return EXIT_FASTPATH_NONE;
+
         if (to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_MSR &&
             to_svm(vcpu)->vmcb->control.exit_info_1)
                 return handle_fastpath_set_msr_irqoff(vcpu);
@@ -4115,12 +4115,13 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, bool spec_ctrl_in
         guest_state_exit_irqoff();
  }
  
-static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
+static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu,
+                                         bool force_immediate_exit)
  {
         struct vcpu_svm *svm = to_svm(vcpu);
         bool spec_ctrl_intercepted = msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL);
  
-       trace_kvm_entry(vcpu);
+       trace_kvm_entry(vcpu, force_immediate_exit);
  
         svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
         svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
@@ -4139,9 +4140,12 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
                  * is enough to force an immediate vmexit.
                  */
                 disable_nmi_singlestep(svm);
-               smp_send_reschedule(vcpu->cpu);
+               force_immediate_exit = true;
         }
  
+       if (force_immediate_exit)
+               smp_send_reschedule(vcpu->cpu);
+
         pre_svm_run(vcpu);
  
         sync_lapic_to_cr8(vcpu);
@@ -4237,9 +4241,6 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
  
         svm_complete_interrupts(vcpu);
  
-       if (is_guest_mode(vcpu))
-               return EXIT_FASTPATH_NONE;
-
         return svm_exit_handlers_fastpath(vcpu);
  }
  
@@ -4997,8 +4998,6 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
         .check_intercept = svm_check_intercept,
         .handle_exit_irqoff = svm_handle_exit_irqoff,
  
-       .request_immediate_exit = __kvm_request_immediate_exit,
-
         .sched_in = svm_sched_in,
  
         .nested_ops = &svm_nested_ops,
diff --git a/arch/x86/kvm/svm/svm_ops.h b/arch/x86/kvm/svm/svm_ops.h

index 36c8af87a707ac0556fb1e50157e70c6305df798..4e725854c63a10c8645fa3b875a7a718020e96fe 100644 (file)
--- a/arch/x86/kvm/svm/svm_ops.h
+++ b/arch/x86/kvm/svm/svm_ops.h
@@ -8,7 +8,7 @@
  
  #define svm_asm(insn, clobber...)                              \
  do {                                                           \
-       asm_volatile_goto("1: " __stringify(insn) "\n\t"        \
+       asm goto("1: " __stringify(insn) "\n\t" \
                           _ASM_EXTABLE(1b, %l[fault])           \
                           ::: clobber : fault);                 \
         return;                                                 \
@@ -18,7 +18,7 @@ fault:                                                                \
  
  #define svm_asm1(insn, op1, clobber...)                                \
  do {                                                           \
-       asm_volatile_goto("1: "  __stringify(insn) " %0\n\t"    \
+       asm goto("1: "  __stringify(insn) " %0\n\t"     \
                           _ASM_EXTABLE(1b, %l[fault])           \
                           :: op1 : clobber : fault);            \
         return;                                                 \
@@ -28,7 +28,7 @@ fault:                                                                \
  
  #define svm_asm2(insn, op1, op2, clobber...)                           \
  do {                                                                   \
-       asm_volatile_goto("1: "  __stringify(insn) " %1, %0\n\t"        \
+       asm goto("1: "  __stringify(insn) " %1, %0\n\t" \
                           _ASM_EXTABLE(1b, %l[fault])                   \
                           :: op1, op2 : clobber : fault);               \
         return;                                                         \
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h

index 83843379813ee3ef8cca33d1986ef61ea6e1ff9b..88659de4d2a7141a6eff8adb28054c8cb0a6c3c2 100644 (file)
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -15,20 +15,23 @@
   * Tracepoint for guest mode entry.
   */
  TRACE_EVENT(kvm_entry,
-       TP_PROTO(struct kvm_vcpu *vcpu),
-       TP_ARGS(vcpu),
+       TP_PROTO(struct kvm_vcpu *vcpu, bool force_immediate_exit),
+       TP_ARGS(vcpu, force_immediate_exit),
  
         TP_STRUCT__entry(
                 __field(        unsigned int,   vcpu_id         )
                 __field(        unsigned long,  rip             )
+               __field(        bool,           immediate_exit  )
         ),
  
         TP_fast_assign(
                 __entry->vcpu_id        = vcpu->vcpu_id;
                 __entry->rip            = kvm_rip_read(vcpu);
+               __entry->immediate_exit = force_immediate_exit;
         ),
  
-       TP_printk("vcpu %u, rip 0x%lx", __entry->vcpu_id, __entry->rip)
+       TP_printk("vcpu %u, rip 0x%lx%s", __entry->vcpu_id, __entry->rip,
+                 __entry->immediate_exit ? "[immediate exit]" : "")
  );
  
  /*
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c

index 6329a306856b28972ca32af5f708bb9408c60896..d05ddf7514915c479bffbd6e40514b2b91db93ed 100644 (file)
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -3606,7 +3606,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
                 return 1;
         }
  
-       kvm_pmu_trigger_event(vcpu, PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
+       kvm_pmu_trigger_event(vcpu, kvm_pmu_eventsel.BRANCH_INSTRUCTIONS_RETIRED);
  
         if (CC(evmptrld_status == EVMPTRLD_VMFAIL))
                 return nested_vmx_failInvalid(vcpu);
@@ -4433,7 +4433,7 @@ static void sync_vmcs02_to_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
                 (vm_entry_controls_get(to_vmx(vcpu)) & VM_ENTRY_IA32E_MODE);
  
         if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_DEBUG_CONTROLS)
-               kvm_get_dr(vcpu, 7, (unsigned long *)&vmcs12->guest_dr7);
+               vmcs12->guest_dr7 = vcpu->arch.dr7;
  
         if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_EFER)
                 vmcs12->guest_ia32_efer = vcpu->arch.efer;
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c

index a6216c8747291f4c8aeed534117fad1f3808acb8..12ade343a17ed5c7aaa2efc1ebf3b3b40046d907 100644 (file)
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -20,58 +20,24 @@
  #include "nested.h"
  #include "pmu.h"
  
-#define MSR_PMC_FULL_WIDTH_BIT      (MSR_IA32_PMC0 - MSR_IA32_PERFCTR0)
-
-enum intel_pmu_architectural_events {
-       /*
-        * The order of the architectural events matters as support for each
-        * event is enumerated via CPUID using the index of the event.
-        */
-       INTEL_ARCH_CPU_CYCLES,
-       INTEL_ARCH_INSTRUCTIONS_RETIRED,
-       INTEL_ARCH_REFERENCE_CYCLES,
-       INTEL_ARCH_LLC_REFERENCES,
-       INTEL_ARCH_LLC_MISSES,
-       INTEL_ARCH_BRANCHES_RETIRED,
-       INTEL_ARCH_BRANCHES_MISPREDICTED,
-
-       NR_REAL_INTEL_ARCH_EVENTS,
-
-       /*
-        * Pseudo-architectural event used to implement IA32_FIXED_CTR2, a.k.a.
-        * TSC reference cycles.  The architectural reference cycles event may
-        * or may not actually use the TSC as the reference, e.g. might use the
-        * core crystal clock or the bus clock (yeah, "architectural").
-        */
-       PSEUDO_ARCH_REFERENCE_CYCLES = NR_REAL_INTEL_ARCH_EVENTS,
-       NR_INTEL_ARCH_EVENTS,
-};
+/*
+ * Perf's "BASE" is wildly misleading, architectural PMUs use bits 31:16 of ECX
+ * to encode the "type" of counter to read, i.e. this is not a "base".  And to
+ * further confuse things, non-architectural PMUs use bit 31 as a flag for
+ * "fast" reads, whereas the "type" is an explicit value.
+ */
+#define INTEL_RDPMC_GP         0
+#define INTEL_RDPMC_FIXED      INTEL_PMC_FIXED_RDPMC_BASE
  
-static struct {
-       u8 eventsel;
-       u8 unit_mask;
-} const intel_arch_events[] = {
-       [INTEL_ARCH_CPU_CYCLES]                 = { 0x3c, 0x00 },
-       [INTEL_ARCH_INSTRUCTIONS_RETIRED]       = { 0xc0, 0x00 },
-       [INTEL_ARCH_REFERENCE_CYCLES]           = { 0x3c, 0x01 },
-       [INTEL_ARCH_LLC_REFERENCES]             = { 0x2e, 0x4f },
-       [INTEL_ARCH_LLC_MISSES]                 = { 0x2e, 0x41 },
-       [INTEL_ARCH_BRANCHES_RETIRED]           = { 0xc4, 0x00 },
-       [INTEL_ARCH_BRANCHES_MISPREDICTED]      = { 0xc5, 0x00 },
-       [PSEUDO_ARCH_REFERENCE_CYCLES]          = { 0x00, 0x03 },
-};
+#define INTEL_RDPMC_TYPE_MASK  GENMASK(31, 16)
+#define INTEL_RDPMC_INDEX_MASK GENMASK(15, 0)
  
-/* mapping between fixed pmc index and intel_arch_events array */
-static int fixed_pmc_events[] = {
-       [0] = INTEL_ARCH_INSTRUCTIONS_RETIRED,
-       [1] = INTEL_ARCH_CPU_CYCLES,
-       [2] = PSEUDO_ARCH_REFERENCE_CYCLES,
-};
+#define MSR_PMC_FULL_WIDTH_BIT      (MSR_IA32_PMC0 - MSR_IA32_PERFCTR0)
  
  static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data)
  {
         struct kvm_pmc *pmc;
-       u8 old_fixed_ctr_ctrl = pmu->fixed_ctr_ctrl;
+       u64 old_fixed_ctr_ctrl = pmu->fixed_ctr_ctrl;
         int i;
  
         pmu->fixed_ctr_ctrl = data;
@@ -84,77 +50,61 @@ static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data)
  
                 pmc = get_fixed_pmc(pmu, MSR_CORE_PERF_FIXED_CTR0 + i);
  
-               __set_bit(INTEL_PMC_IDX_FIXED + i, pmu->pmc_in_use);
+               __set_bit(KVM_FIXED_PMC_BASE_IDX + i, pmu->pmc_in_use);
                 kvm_pmu_request_counter_reprogram(pmc);
         }
  }
  
-static struct kvm_pmc *intel_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx)
-{
-       if (pmc_idx < INTEL_PMC_IDX_FIXED) {
-               return get_gp_pmc(pmu, MSR_P6_EVNTSEL0 + pmc_idx,
-                                 MSR_P6_EVNTSEL0);
-       } else {
-               u32 idx = pmc_idx - INTEL_PMC_IDX_FIXED;
-
-               return get_fixed_pmc(pmu, idx + MSR_CORE_PERF_FIXED_CTR0);
-       }
-}
-
-static bool intel_hw_event_available(struct kvm_pmc *pmc)
-{
-       struct kvm_pmu *pmu = pmc_to_pmu(pmc);
-       u8 event_select = pmc->eventsel & ARCH_PERFMON_EVENTSEL_EVENT;
-       u8 unit_mask = (pmc->eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
-       int i;
-
-       BUILD_BUG_ON(ARRAY_SIZE(intel_arch_events) != NR_INTEL_ARCH_EVENTS);
-
-       /*
-        * Disallow events reported as unavailable in guest CPUID.  Note, this
-        * doesn't apply to pseudo-architectural events.
-        */
-       for (i = 0; i < NR_REAL_INTEL_ARCH_EVENTS; i++) {
-               if (intel_arch_events[i].eventsel != event_select ||
-                   intel_arch_events[i].unit_mask != unit_mask)
-                       continue;
-
-               return pmu->available_event_types & BIT(i);
-       }
-
-       return true;
-}
-
-static bool intel_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
-{
-       struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
-       bool fixed = idx & (1u << 30);
-
-       idx &= ~(3u << 30);
-
-       return fixed ? idx < pmu->nr_arch_fixed_counters
-                    : idx < pmu->nr_arch_gp_counters;
-}
-
  static struct kvm_pmc *intel_rdpmc_ecx_to_pmc(struct kvm_vcpu *vcpu,
                                             unsigned int idx, u64 *mask)
  {
+       unsigned int type = idx & INTEL_RDPMC_TYPE_MASK;
         struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
-       bool fixed = idx & (1u << 30);
         struct kvm_pmc *counters;
         unsigned int num_counters;
+       u64 bitmask;
  
-       idx &= ~(3u << 30);
-       if (fixed) {
+       /*
+        * The encoding of ECX for RDPMC is different for architectural versus
+        * non-architecturals PMUs (PMUs with version '0').  For architectural
+        * PMUs, bits 31:16 specify the PMC type and bits 15:0 specify the PMC
+        * index.  For non-architectural PMUs, bit 31 is a "fast" flag, and
+        * bits 30:0 specify the PMC index.
+        *
+        * Yell and reject attempts to read PMCs for a non-architectural PMU,
+        * as KVM doesn't support such PMUs.
+        */
+       if (WARN_ON_ONCE(!pmu->version))
+               return NULL;
+
+       /*
+        * General Purpose (GP) PMCs are supported on all PMUs, and fixed PMCs
+        * are supported on all architectural PMUs, i.e. on all virtual PMUs
+        * supported by KVM.  Note, KVM only emulates fixed PMCs for PMU v2+,
+        * but the type itself is still valid, i.e. let RDPMC fail due to
+        * accessing a non-existent counter.  Reject attempts to read all other
+        * types, which are unknown/unsupported.
+        */
+       switch (type) {
+       case INTEL_RDPMC_FIXED:
                 counters = pmu->fixed_counters;
                 num_counters = pmu->nr_arch_fixed_counters;
-       } else {
+               bitmask = pmu->counter_bitmask[KVM_PMC_FIXED];
+               break;
+       case INTEL_RDPMC_GP:
                 counters = pmu->gp_counters;
                 num_counters = pmu->nr_arch_gp_counters;
+               bitmask = pmu->counter_bitmask[KVM_PMC_GP];
+               break;
+       default:
+               return NULL;
         }
+
+       idx &= INTEL_RDPMC_INDEX_MASK;
         if (idx >= num_counters)
                 return NULL;
-       *mask &= pmu->counter_bitmask[fixed ? KVM_PMC_FIXED : KVM_PMC_GP];
+
+       *mask &= bitmask;
         return &counters[array_index_nospec(idx, num_counters)];
  }
  
@@ -464,20 +414,38 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
         return 0;
  }
  
-static void setup_fixed_pmc_eventsel(struct kvm_pmu *pmu)
+/*
+ * Map fixed counter events to architectural general purpose event encodings.
+ * Perf doesn't provide APIs to allow KVM to directly program a fixed counter,
+ * and so KVM instead programs the architectural event to effectively request
+ * the fixed counter.  Perf isn't guaranteed to use a fixed counter and may
+ * instead program the encoding into a general purpose counter, e.g. if a
+ * different perf_event is already utilizing the requested counter, but the end
+ * result is the same (ignoring the fact that using a general purpose counter
+ * will likely exacerbate counter contention).
+ *
+ * Forcibly inlined to allow asserting on @index at build time, and there should
+ * never be more than one user.
+ */
+static __always_inline u64 intel_get_fixed_pmc_eventsel(unsigned int index)
  {
-       int i;
-
-       BUILD_BUG_ON(ARRAY_SIZE(fixed_pmc_events) != KVM_PMC_MAX_FIXED);
+       const enum perf_hw_id fixed_pmc_perf_ids[] = {
+               [0] = PERF_COUNT_HW_INSTRUCTIONS,
+               [1] = PERF_COUNT_HW_CPU_CYCLES,
+               [2] = PERF_COUNT_HW_REF_CPU_CYCLES,
+       };
+       u64 eventsel;
  
-       for (i = 0; i < pmu->nr_arch_fixed_counters; i++) {
-               int index = array_index_nospec(i, KVM_PMC_MAX_FIXED);
-               struct kvm_pmc *pmc = &pmu->fixed_counters[index];
-               u32 event = fixed_pmc_events[index];
+       BUILD_BUG_ON(ARRAY_SIZE(fixed_pmc_perf_ids) != KVM_PMC_MAX_FIXED);
+       BUILD_BUG_ON(index >= KVM_PMC_MAX_FIXED);
  
-               pmc->eventsel = (intel_arch_events[event].unit_mask << 8) |
-                                intel_arch_events[event].eventsel;
-       }
+       /*
+        * Yell if perf reports support for a fixed counter but perf doesn't
+        * have a known encoding for the associated general purpose event.
+        */
+       eventsel = perf_get_hw_event_config(fixed_pmc_perf_ids[index]);
+       WARN_ON_ONCE(!eventsel && index < kvm_pmu_cap.num_counters_fixed);
+       return eventsel;
  }
  
  static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
@@ -491,19 +459,6 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
         u64 counter_mask;
         int i;
  
-       pmu->nr_arch_gp_counters = 0;
-       pmu->nr_arch_fixed_counters = 0;
-       pmu->counter_bitmask[KVM_PMC_GP] = 0;
-       pmu->counter_bitmask[KVM_PMC_FIXED] = 0;
-       pmu->version = 0;
-       pmu->reserved_bits = 0xffffffff00200000ull;
-       pmu->raw_event_mask = X86_RAW_EVENT_MASK;
-       pmu->global_ctrl_mask = ~0ull;
-       pmu->global_status_mask = ~0ull;
-       pmu->fixed_ctr_ctrl_mask = ~0ull;
-       pmu->pebs_enable_mask = ~0ull;
-       pmu->pebs_data_cfg_mask = ~0ull;
-
         memset(&lbr_desc->records, 0, sizeof(lbr_desc->records));
  
         /*
@@ -515,8 +470,9 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
                 return;
  
         entry = kvm_find_cpuid_entry(vcpu, 0xa);
-       if (!entry || !vcpu->kvm->arch.enable_pmu)
+       if (!entry)
                 return;
+
         eax.full = entry->eax;
         edx.full = entry->edx;
  
@@ -543,13 +499,12 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
                                                   kvm_pmu_cap.bit_width_fixed);
                 pmu->counter_bitmask[KVM_PMC_FIXED] =
                         ((u64)1 << edx.split.bit_width_fixed) - 1;
-               setup_fixed_pmc_eventsel(pmu);
         }
  
         for (i = 0; i < pmu->nr_arch_fixed_counters; i++)
                 pmu->fixed_ctr_ctrl_mask &= ~(0xbull << (i * 4));
         counter_mask = ~(((1ull << pmu->nr_arch_gp_counters) - 1) |
-               (((1ull << pmu->nr_arch_fixed_counters) - 1) << INTEL_PMC_IDX_FIXED));
+               (((1ull << pmu->nr_arch_fixed_counters) - 1) << KVM_FIXED_PMC_BASE_IDX));
         pmu->global_ctrl_mask = counter_mask;
  
         /*
@@ -593,7 +548,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
                         pmu->reserved_bits &= ~ICL_EVENTSEL_ADAPTIVE;
                         for (i = 0; i < pmu->nr_arch_fixed_counters; i++) {
                                 pmu->fixed_ctr_ctrl_mask &=
-                                       ~(1ULL << (INTEL_PMC_IDX_FIXED + i * 4));
+                                       ~(1ULL << (KVM_FIXED_PMC_BASE_IDX + i * 4));
                         }
                         pmu->pebs_data_cfg_mask = ~0xff00000full;
                 } else {
@@ -619,8 +574,9 @@ static void intel_pmu_init(struct kvm_vcpu *vcpu)
         for (i = 0; i < KVM_PMC_MAX_FIXED; i++) {
                 pmu->fixed_counters[i].type = KVM_PMC_FIXED;
                 pmu->fixed_counters[i].vcpu = vcpu;
-               pmu->fixed_counters[i].idx = i + INTEL_PMC_IDX_FIXED;
+               pmu->fixed_counters[i].idx = i + KVM_FIXED_PMC_BASE_IDX;
                 pmu->fixed_counters[i].current_config = 0;
+               pmu->fixed_counters[i].eventsel = intel_get_fixed_pmc_eventsel(i);
         }
  
         lbr_desc->records.nr = 0;
@@ -748,11 +704,8 @@ void intel_pmu_cross_mapped_check(struct kvm_pmu *pmu)
         struct kvm_pmc *pmc = NULL;
         int bit, hw_idx;
  
-       for_each_set_bit(bit, (unsigned long *)&pmu->global_ctrl,
-                        X86_PMC_IDX_MAX) {
-               pmc = intel_pmc_idx_to_pmc(pmu, bit);
-
-               if (!pmc || !pmc_speculative_in_use(pmc) ||
+       kvm_for_each_pmc(pmu, pmc, bit, (unsigned long *)&pmu->global_ctrl) {
+               if (!pmc_speculative_in_use(pmc) ||
                     !pmc_is_globally_enabled(pmc) || !pmc->perf_event)
                         continue;
  
@@ -767,11 +720,8 @@ void intel_pmu_cross_mapped_check(struct kvm_pmu *pmu)
  }
  
  struct kvm_pmu_ops intel_pmu_ops __initdata = {
-       .hw_event_available = intel_hw_event_available,
-       .pmc_idx_to_pmc = intel_pmc_idx_to_pmc,
         .rdpmc_ecx_to_pmc = intel_rdpmc_ecx_to_pmc,
         .msr_idx_to_pmc = intel_msr_idx_to_pmc,
-       .is_valid_rdpmc_ecx = intel_is_valid_rdpmc_ecx,
         .is_valid_msr = intel_is_valid_msr,
         .get_msr = intel_pmu_get_msr,
         .set_msr = intel_pmu_set_msr,
diff --git a/arch/x86/kvm/vmx/run_flags.h b/arch/x86/kvm/vmx/run_flags.h

index edc3f16cc1896f29e4eef46da685d22b4c31c668..6a9bfdfbb6e59b2e613385cd2ad46cc651a0eb28 100644 (file)
--- a/arch/x86/kvm/vmx/run_flags.h
+++ b/arch/x86/kvm/vmx/run_flags.h
@@ -2,7 +2,10 @@
  #ifndef __KVM_X86_VMX_RUN_FLAGS_H
  #define __KVM_X86_VMX_RUN_FLAGS_H
  
-#define VMX_RUN_VMRESUME       (1 << 0)
-#define VMX_RUN_SAVE_SPEC_CTRL (1 << 1)
+#define VMX_RUN_VMRESUME_SHIFT         0
+#define VMX_RUN_SAVE_SPEC_CTRL_SHIFT   1
+
+#define VMX_RUN_VMRESUME               BIT(VMX_RUN_VMRESUME_SHIFT)
+#define VMX_RUN_SAVE_SPEC_CTRL         BIT(VMX_RUN_SAVE_SPEC_CTRL_SHIFT)
  
  #endif /* __KVM_X86_VMX_RUN_FLAGS_H */
diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S

index 906ecd001511355d0939e4e90a3994a7bd9809e3..2bfbf758d06110f49c71a22c1f54da9d9499669a 100644 (file)
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -139,7 +139,7 @@ SYM_FUNC_START(__vmx_vcpu_run)
         mov (%_ASM_SP), %_ASM_AX
  
         /* Check if vmlaunch or vmresume is needed */
-       test $VMX_RUN_VMRESUME, %ebx
+       bt   $VMX_RUN_VMRESUME_SHIFT, %ebx
  
         /* Load guest registers.  Don't clobber flags. */
         mov VCPU_RCX(%_ASM_AX), %_ASM_CX
@@ -161,8 +161,11 @@ SYM_FUNC_START(__vmx_vcpu_run)
         /* Load guest RAX.  This kills the @regs pointer! */
         mov VCPU_RAX(%_ASM_AX), %_ASM_AX
  
-       /* Check EFLAGS.ZF from 'test VMX_RUN_VMRESUME' above */
-       jz .Lvmlaunch
+       /* Clobbers EFLAGS.ZF */
+       CLEAR_CPU_BUFFERS
+
+       /* Check EFLAGS.CF from the VMX_RUN_VMRESUME bit test above. */
+       jnc .Lvmlaunch
  
         /*
          * After a successful VMRESUME/VMLAUNCH, control flow "magically"
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c

index e262bc2ba4e569983a94c3db5c13e1f0dabd9951..24d377d0a0c844df1bc387091ace98e13637dee8 100644 (file)
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -49,6 +49,8 @@
  #include <asm/spec-ctrl.h>
  #include <asm/vmx.h>
  
+#include <trace/events/ipi.h>
+
  #include "capabilities.h"
  #include "cpuid.h"
  #include "hyperv.h"
@@ -159,7 +161,7 @@ module_param(allow_smaller_maxphyaddr, bool, S_IRUGO);
  
  /*
   * List of MSRs that can be directly passed to the guest.
- * In addition to these x2apic and PT MSRs are handled specially.
+ * In addition to these x2apic, PT and LBR MSRs are handled specially.
   */
  static u32 vmx_possible_passthrough_msrs[MAX_POSSIBLE_PASSTHROUGH_MSRS] = {
         MSR_IA32_SPEC_CTRL,
@@ -388,7 +390,16 @@ static __always_inline void vmx_enable_fb_clear(struct vcpu_vmx *vmx)
  
  static void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx)
  {
-       vmx->disable_fb_clear = (host_arch_capabilities & ARCH_CAP_FB_CLEAR_CTRL) &&
+       /*
+        * Disable VERW's behavior of clearing CPU buffers for the guest if the
+        * CPU isn't affected by MDS/TAA, and the host hasn't forcefully enabled
+        * the mitigation. Disabling the clearing behavior provides a
+        * performance boost for guests that aren't aware that manually clearing
+        * CPU buffers is unnecessary, at the cost of MSR accesses on VM-Entry
+        * and VM-Exit.
+        */
+       vmx->disable_fb_clear = !cpu_feature_enabled(X86_FEATURE_CLEAR_CPU_BUF) &&
+                               (host_arch_capabilities & ARCH_CAP_FB_CLEAR_CTRL) &&
                                 !boot_cpu_has_bug(X86_BUG_MDS) &&
                                 !boot_cpu_has_bug(X86_BUG_TAA);
  
@@ -658,25 +669,14 @@ static inline bool cpu_need_virtualize_apic_accesses(struct kvm_vcpu *vcpu)
         return flexpriority_enabled && lapic_in_kernel(vcpu);
  }
  
-static int possible_passthrough_msr_slot(u32 msr)
+static int vmx_get_passthrough_msr_slot(u32 msr)
  {
-       u32 i;
-
-       for (i = 0; i < ARRAY_SIZE(vmx_possible_passthrough_msrs); i++)
-               if (vmx_possible_passthrough_msrs[i] == msr)
-                       return i;
-
-       return -ENOENT;
-}
-
-static bool is_valid_passthrough_msr(u32 msr)
-{
-       bool r;
+       int i;
  
         switch (msr) {
         case 0x800 ... 0x8ff:
                 /* x2APIC MSRs. These are handled in vmx_update_msr_bitmap_x2apic() */
-               return true;
+               return -ENOENT;
         case MSR_IA32_RTIT_STATUS:
         case MSR_IA32_RTIT_OUTPUT_BASE:
         case MSR_IA32_RTIT_OUTPUT_MASK:
@@ -691,14 +691,16 @@ static bool is_valid_passthrough_msr(u32 msr)
         case MSR_LBR_CORE_FROM ... MSR_LBR_CORE_FROM + 8:
         case MSR_LBR_CORE_TO ... MSR_LBR_CORE_TO + 8:
                 /* LBR MSRs. These are handled in vmx_update_intercept_for_lbr_msrs() */
-               return true;
+               return -ENOENT;
         }
  
-       r = possible_passthrough_msr_slot(msr) != -ENOENT;
-
-       WARN(!r, "Invalid MSR %x, please adapt vmx_possible_passthrough_msrs[]", msr);
+       for (i = 0; i < ARRAY_SIZE(vmx_possible_passthrough_msrs); i++) {
+               if (vmx_possible_passthrough_msrs[i] == msr)
+                       return i;
+       }
  
-       return r;
+       WARN(1, "Invalid MSR %x, please adapt vmx_possible_passthrough_msrs[]", msr);
+       return -ENOENT;
  }
  
  struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr)
@@ -738,7 +740,7 @@ static int vmx_set_guest_uret_msr(struct vcpu_vmx *vmx,
   */
  static int kvm_cpu_vmxoff(void)
  {
-       asm_volatile_goto("1: vmxoff\n\t"
+       asm goto("1: vmxoff\n\t"
                           _ASM_EXTABLE(1b, %l[fault])
                           ::: "cc", "memory" : fault);
  
@@ -1281,8 +1283,6 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
         u16 fs_sel, gs_sel;
         int i;
  
-       vmx->req_immediate_exit = false;
-
         /*
          * Note that guest MSRs to be saved/restored can also be changed
          * when guest state is loaded. This happens when guest transitions
@@ -2784,7 +2784,7 @@ static int kvm_cpu_vmxon(u64 vmxon_pointer)
  
         cr4_set_bits(X86_CR4_VMXE);
  
-       asm_volatile_goto("1: vmxon %[vmxon_pointer]\n\t"
+       asm goto("1: vmxon %[vmxon_pointer]\n\t"
                           _ASM_EXTABLE(1b, %l[fault])
                           : : [vmxon_pointer] "m"(vmxon_pointer)
                           : : fault);
@@ -3954,6 +3954,7 @@ void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)
  {
         struct vcpu_vmx *vmx = to_vmx(vcpu);
         unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap;
+       int idx;
  
         if (!cpu_has_vmx_msr_bitmap())
                 return;
@@ -3963,16 +3964,13 @@ void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)
         /*
          * Mark the desired intercept state in shadow bitmap, this is needed
          * for resync when the MSR filters change.
-       */
-       if (is_valid_passthrough_msr(msr)) {
-               int idx = possible_passthrough_msr_slot(msr);
-
-               if (idx != -ENOENT) {
-                       if (type & MSR_TYPE_R)
-                               clear_bit(idx, vmx->shadow_msr_intercept.read);
-                       if (type & MSR_TYPE_W)
-                               clear_bit(idx, vmx->shadow_msr_intercept.write);
-               }
+        */
+       idx = vmx_get_passthrough_msr_slot(msr);
+       if (idx >= 0) {
+               if (type & MSR_TYPE_R)
+                       clear_bit(idx, vmx->shadow_msr_intercept.read);
+               if (type & MSR_TYPE_W)
+                       clear_bit(idx, vmx->shadow_msr_intercept.write);
         }
  
         if ((type & MSR_TYPE_R) &&
@@ -3998,6 +3996,7 @@ void vmx_enable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)
  {
         struct vcpu_vmx *vmx = to_vmx(vcpu);
         unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap;
+       int idx;
  
         if (!cpu_has_vmx_msr_bitmap())
                 return;
@@ -4007,16 +4006,13 @@ void vmx_enable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)
         /*
          * Mark the desired intercept state in shadow bitmap, this is needed
          * for resync when the MSR filter changes.
-       */
-       if (is_valid_passthrough_msr(msr)) {
-               int idx = possible_passthrough_msr_slot(msr);
-
-               if (idx != -ENOENT) {
-                       if (type & MSR_TYPE_R)
-                               set_bit(idx, vmx->shadow_msr_intercept.read);
-                       if (type & MSR_TYPE_W)
-                               set_bit(idx, vmx->shadow_msr_intercept.write);
-               }
+        */
+       idx = vmx_get_passthrough_msr_slot(msr);
+       if (idx >= 0) {
+               if (type & MSR_TYPE_R)
+                       set_bit(idx, vmx->shadow_msr_intercept.read);
+               if (type & MSR_TYPE_W)
+                       set_bit(idx, vmx->shadow_msr_intercept.write);
         }
  
         if (type & MSR_TYPE_R)
@@ -4127,6 +4123,9 @@ static void vmx_msr_filter_changed(struct kvm_vcpu *vcpu)
         struct vcpu_vmx *vmx = to_vmx(vcpu);
         u32 i;
  
+       if (!cpu_has_vmx_msr_bitmap())
+               return;
+
         /*
          * Redo intercept permissions for MSRs that KVM is passing through to
          * the guest.  Disabling interception will check the new MSR filter and
@@ -5566,10 +5565,7 @@ static int handle_dr(struct kvm_vcpu *vcpu)
  
         reg = DEBUG_REG_ACCESS_REG(exit_qualification);
         if (exit_qualification & TYPE_MOV_FROM_DR) {
-               unsigned long val;
-
-               kvm_get_dr(vcpu, dr, &val);
-               kvm_register_write(vcpu, reg, val);
+               kvm_register_write(vcpu, reg, kvm_get_dr(vcpu, dr));
                 err = 0;
         } else {
                 err = kvm_set_dr(vcpu, dr, kvm_register_read(vcpu, reg));
@@ -5991,22 +5987,46 @@ static int handle_pml_full(struct kvm_vcpu *vcpu)
         return 1;
  }
  
-static fastpath_t handle_fastpath_preemption_timer(struct kvm_vcpu *vcpu)
+static fastpath_t handle_fastpath_preemption_timer(struct kvm_vcpu *vcpu,
+                                                  bool force_immediate_exit)
  {
         struct vcpu_vmx *vmx = to_vmx(vcpu);
  
-       if (!vmx->req_immediate_exit &&
-           !unlikely(vmx->loaded_vmcs->hv_timer_soft_disabled)) {
-               kvm_lapic_expired_hv_timer(vcpu);
+       /*
+        * In the *extremely* unlikely scenario that this is a spurious VM-Exit
+        * due to the timer expiring while it was "soft" disabled, just eat the
+        * exit and re-enter the guest.
+        */
+       if (unlikely(vmx->loaded_vmcs->hv_timer_soft_disabled))
                 return EXIT_FASTPATH_REENTER_GUEST;
-       }
  
-       return EXIT_FASTPATH_NONE;
+       /*
+        * If the timer expired because KVM used it to force an immediate exit,
+        * then mission accomplished.
+        */
+       if (force_immediate_exit)
+               return EXIT_FASTPATH_EXIT_HANDLED;
+
+       /*
+        * If L2 is active, go down the slow path as emulating the guest timer
+        * expiration likely requires synthesizing a nested VM-Exit.
+        */
+       if (is_guest_mode(vcpu))
+               return EXIT_FASTPATH_NONE;
+
+       kvm_lapic_expired_hv_timer(vcpu);
+       return EXIT_FASTPATH_REENTER_GUEST;
  }
  
  static int handle_preemption_timer(struct kvm_vcpu *vcpu)
  {
-       handle_fastpath_preemption_timer(vcpu);
+       /*
+        * This non-fastpath handler is reached if and only if the preemption
+        * timer was being used to emulate a guest timer while L2 is active.
+        * All other scenarios are supposed to be handled in the fastpath.
+        */
+       WARN_ON_ONCE(!is_guest_mode(vcpu));
+       kvm_lapic_expired_hv_timer(vcpu);
         return 1;
  }
  
@@ -6509,7 +6529,7 @@ static int __vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
                 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_DELIVERY_EV;
                 vcpu->run->internal.data[0] = vectoring_info;
                 vcpu->run->internal.data[1] = exit_reason.full;
-               vcpu->run->internal.data[2] = vcpu->arch.exit_qualification;
+               vcpu->run->internal.data[2] = vmx_get_exit_qual(vcpu);
                 if (exit_reason.basic == EXIT_REASON_EPT_MISCONFIG) {
                         vcpu->run->internal.data[ndata++] =
                                 vmcs_read64(GUEST_PHYSICAL_ADDRESS);
@@ -7146,13 +7166,13 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
                                         msrs[i].host, false);
  }
  
-static void vmx_update_hv_timer(struct kvm_vcpu *vcpu)
+static void vmx_update_hv_timer(struct kvm_vcpu *vcpu, bool force_immediate_exit)
  {
         struct vcpu_vmx *vmx = to_vmx(vcpu);
         u64 tscl;
         u32 delta_tsc;
  
-       if (vmx->req_immediate_exit) {
+       if (force_immediate_exit) {
                 vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, 0);
                 vmx->loaded_vmcs->hv_timer_soft_disabled = false;
         } else if (vmx->hv_deadline_tsc != -1) {
@@ -7205,13 +7225,22 @@ void noinstr vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx,
         barrier_nospec();
  }
  
-static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
+static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu,
+                                            bool force_immediate_exit)
  {
+       /*
+        * If L2 is active, some VMX preemption timer exits can be handled in
+        * the fastpath even, all other exits must use the slow path.
+        */
+       if (is_guest_mode(vcpu) &&
+           to_vmx(vcpu)->exit_reason.basic != EXIT_REASON_PREEMPTION_TIMER)
+               return EXIT_FASTPATH_NONE;
+
         switch (to_vmx(vcpu)->exit_reason.basic) {
         case EXIT_REASON_MSR_WRITE:
                 return handle_fastpath_set_msr_irqoff(vcpu);
         case EXIT_REASON_PREEMPTION_TIMER:
-               return handle_fastpath_preemption_timer(vcpu);
+               return handle_fastpath_preemption_timer(vcpu, force_immediate_exit);
         default:
                 return EXIT_FASTPATH_NONE;
         }
@@ -7224,11 +7253,14 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
  
         guest_state_enter_irqoff();
  
-       /* L1D Flush includes CPU buffer clear to mitigate MDS */
+       /*
+        * L1D Flush includes CPU buffer clear to mitigate MDS, but VERW
+        * mitigation for MDS is done late in VMentry and is still
+        * executed in spite of L1D Flush. This is because an extra VERW
+        * should not matter much after the big hammer L1D Flush.
+        */
         if (static_branch_unlikely(&vmx_l1d_should_flush))
                 vmx_l1d_flush(vcpu);
-       else if (static_branch_unlikely(&mds_user_clear))
-               mds_clear_cpu_buffers();
         else if (static_branch_unlikely(&mmio_stale_data_clear) &&
                  kvm_arch_has_assigned_device(vcpu->kvm))
                 mds_clear_cpu_buffers();
@@ -7268,7 +7300,7 @@ out:
         guest_state_exit_irqoff();
  }
  
-static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
+static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit)
  {
         struct vcpu_vmx *vmx = to_vmx(vcpu);
         unsigned long cr3, cr4;
@@ -7295,7 +7327,7 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
                 return EXIT_FASTPATH_NONE;
         }
  
-       trace_kvm_entry(vcpu);
+       trace_kvm_entry(vcpu, force_immediate_exit);
  
         if (vmx->ple_window_dirty) {
                 vmx->ple_window_dirty = false;
@@ -7354,7 +7386,9 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
                 vmx_passthrough_lbr_msrs(vcpu);
  
         if (enable_preemption_timer)
-               vmx_update_hv_timer(vcpu);
+               vmx_update_hv_timer(vcpu, force_immediate_exit);
+       else if (force_immediate_exit)
+               smp_send_reschedule(vcpu->cpu);
  
         kvm_wait_lapic_expire(vcpu);
  
@@ -7418,10 +7452,7 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
         vmx_recover_nmi_blocking(vmx);
         vmx_complete_interrupts(vmx);
  
-       if (is_guest_mode(vcpu))
-               return EXIT_FASTPATH_NONE;
-
-       return vmx_exit_handlers_fastpath(vcpu);
+       return vmx_exit_handlers_fastpath(vcpu, force_immediate_exit);
  }
  
  static void vmx_vcpu_free(struct kvm_vcpu *vcpu)
@@ -7901,11 +7932,6 @@ static __init void vmx_set_cpu_caps(void)
                 kvm_cpu_cap_check_and_set(X86_FEATURE_WAITPKG);
  }
  
-static void vmx_request_immediate_exit(struct kvm_vcpu *vcpu)
-{
-       to_vmx(vcpu)->req_immediate_exit = true;
-}
-
  static int vmx_check_intercept_io(struct kvm_vcpu *vcpu,
                                   struct x86_instruction_info *info)
  {
@@ -8358,8 +8384,6 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
         .check_intercept = vmx_check_intercept,
         .handle_exit_irqoff = vmx_handle_exit_irqoff,
  
-       .request_immediate_exit = vmx_request_immediate_exit,
-
         .sched_in = vmx_sched_in,
  
         .cpu_dirty_log_size = PML_ENTITY_NUM,
@@ -8619,7 +8643,6 @@ static __init int hardware_setup(void)
         if (!enable_preemption_timer) {
                 vmx_x86_ops.set_hv_timer = NULL;
                 vmx_x86_ops.cancel_hv_timer = NULL;
-               vmx_x86_ops.request_immediate_exit = __kvm_request_immediate_exit;
         }
  
         kvm_caps.supported_mce_cap |= MCG_LMCE_P;
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h

index e3b0985bb74a1f4d57be41cbb0d283abbc476625..65786dbe7d60bdf753db779312bb70754ccc6f1e 100644 (file)
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -332,8 +332,6 @@ struct vcpu_vmx {
         unsigned int ple_window;
         bool ple_window_dirty;
  
-       bool req_immediate_exit;
-
         /* Support for PML */
  #define PML_ENTITY_NUM         512
         struct page *pml_pg;
diff --git a/arch/x86/kvm/vmx/vmx_ops.h b/arch/x86/kvm/vmx/vmx_ops.h

index f41ce3c24123a93e08a996d4daa8e033077caf6d..8060e5fc6dbd83e145f6c08fca370e4a42d9861b 100644 (file)
--- a/arch/x86/kvm/vmx/vmx_ops.h
+++ b/arch/x86/kvm/vmx/vmx_ops.h
@@ -94,7 +94,7 @@ static __always_inline unsigned long __vmcs_readl(unsigned long field)
  
  #ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
  
-       asm_volatile_goto("1: vmread %[field], %[output]\n\t"
+       asm_goto_output("1: vmread %[field], %[output]\n\t"
                           "jna %l[do_fail]\n\t"
  
                           _ASM_EXTABLE(1b, %l[do_exception])
@@ -188,7 +188,7 @@ static __always_inline unsigned long vmcs_readl(unsigned long field)
  
  #define vmx_asm1(insn, op1, error_args...)                             \
  do {                                                                   \
-       asm_volatile_goto("1: " __stringify(insn) " %0\n\t"             \
+       asm goto("1: " __stringify(insn) " %0\n\t"                      \
                           ".byte 0x2e\n\t" /* branch not taken hint */  \
                           "jna %l[error]\n\t"                           \
                           _ASM_EXTABLE(1b, %l[fault])                   \
@@ -205,7 +205,7 @@ fault:                                                                      \
  
  #define vmx_asm2(insn, op1, op2, error_args...)                                \
  do {                                                                   \
-       asm_volatile_goto("1: "  __stringify(insn) " %1, %0\n\t"        \
+       asm goto("1: "  __stringify(insn) " %1, %0\n\t"                 \
                           ".byte 0x2e\n\t" /* branch not taken hint */  \
                           "jna %l[error]\n\t"                           \
                           _ASM_EXTABLE(1b, %l[fault])                   \
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c

index 363b1c08020578b090b53d74482d9c8912629ec9..fb2bc9f5fe96757a772684f7094c6e235c4f86d5 100644 (file)
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1399,22 +1399,19 @@ int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
  }
  EXPORT_SYMBOL_GPL(kvm_set_dr);
  
-void kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
+unsigned long kvm_get_dr(struct kvm_vcpu *vcpu, int dr)
  {
         size_t size = ARRAY_SIZE(vcpu->arch.db);
  
         switch (dr) {
         case 0 ... 3:
-               *val = vcpu->arch.db[array_index_nospec(dr, size)];
-               break;
+               return vcpu->arch.db[array_index_nospec(dr, size)];
         case 4:
         case 6:
-               *val = vcpu->arch.dr6;
-               break;
+               return vcpu->arch.dr6;
         case 5:
         default: /* 7 */
-               *val = vcpu->arch.dr7;
-               break;
+               return vcpu->arch.dr7;
         }
  }
  EXPORT_SYMBOL_GPL(kvm_get_dr);
@@ -1704,22 +1701,17 @@ static int do_get_msr_feature(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
         struct kvm_msr_entry msr;
         int r;
  
+       /* Unconditionally clear the output for simplicity */
+       msr.data = 0;
         msr.index = index;
         r = kvm_get_msr_feature(&msr);
  
-       if (r == KVM_MSR_RET_INVALID) {
-               /* Unconditionally clear the output for simplicity */
-               *data = 0;
-               if (kvm_msr_ignored_check(index, 0, false))
-                       r = 0;
-       }
-
-       if (r)
-               return r;
+       if (r == KVM_MSR_RET_INVALID && kvm_msr_ignored_check(index, 0, false))
+               r = 0;
  
         *data = msr.data;
  
-       return 0;
+       return r;
  }
  
  static bool __kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer)
@@ -1782,6 +1774,10 @@ static int set_efer(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
         if ((efer ^ old_efer) & KVM_MMU_EFER_ROLE_BITS)
                 kvm_mmu_reset_context(vcpu);
  
+       if (!static_cpu_has(X86_FEATURE_XSAVES) &&
+           (efer & EFER_SVME))
+               kvm_hv_xsaves_xsavec_maybe_warn(vcpu);
+
         return 0;
  }
  
@@ -2507,7 +2503,7 @@ static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
  }
  
  #ifdef CONFIG_X86_64
-static inline int gtod_is_based_on_tsc(int mode)
+static inline bool gtod_is_based_on_tsc(int mode)
  {
         return mode == VDSO_CLOCKMODE_TSC || mode == VDSO_CLOCKMODE_HVCLOCK;
  }
@@ -2858,7 +2854,11 @@ static inline u64 vgettsc(struct pvclock_clock *clock, u64 *tsc_timestamp,
         return v * clock->mult;
  }
  
-static int do_monotonic_raw(s64 *t, u64 *tsc_timestamp)
+/*
+ * As with get_kvmclock_base_ns(), this counts from boot time, at the
+ * frequency of CLOCK_MONOTONIC_RAW (hence adding gtos->offs_boot).
+ */
+static int do_kvmclock_base(s64 *t, u64 *tsc_timestamp)
  {
         struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
         unsigned long seq;
@@ -2877,6 +2877,29 @@ static int do_monotonic_raw(s64 *t, u64 *tsc_timestamp)
         return mode;
  }
  
+/*
+ * This calculates CLOCK_MONOTONIC at the time of the TSC snapshot, with
+ * no boot time offset.
+ */
+static int do_monotonic(s64 *t, u64 *tsc_timestamp)
+{
+       struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
+       unsigned long seq;
+       int mode;
+       u64 ns;
+
+       do {
+               seq = read_seqcount_begin(&gtod->seq);
+               ns = gtod->clock.base_cycles;
+               ns += vgettsc(&gtod->clock, tsc_timestamp, &mode);
+               ns >>= gtod->clock.shift;
+               ns += ktime_to_ns(gtod->clock.offset);
+       } while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
+       *t = ns;
+
+       return mode;
+}
+
  static int do_realtime(struct timespec64 *ts, u64 *tsc_timestamp)
  {
         struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
@@ -2898,18 +2921,42 @@ static int do_realtime(struct timespec64 *ts, u64 *tsc_timestamp)
         return mode;
  }
  
-/* returns true if host is using TSC based clocksource */
+/*
+ * Calculates the kvmclock_base_ns (CLOCK_MONOTONIC_RAW + boot time) and
+ * reports the TSC value from which it do so. Returns true if host is
+ * using TSC based clocksource.
+ */
  static bool kvm_get_time_and_clockread(s64 *kernel_ns, u64 *tsc_timestamp)
  {
         /* checked again under seqlock below */
         if (!gtod_is_based_on_tsc(pvclock_gtod_data.clock.vclock_mode))
                 return false;
  
-       return gtod_is_based_on_tsc(do_monotonic_raw(kernel_ns,
-                                                     tsc_timestamp));
+       return gtod_is_based_on_tsc(do_kvmclock_base(kernel_ns,
+                                                    tsc_timestamp));
  }
  
-/* returns true if host is using TSC based clocksource */
+/*
+ * Calculates CLOCK_MONOTONIC and reports the TSC value from which it did
+ * so. Returns true if host is using TSC based clocksource.
+ */
+bool kvm_get_monotonic_and_clockread(s64 *kernel_ns, u64 *tsc_timestamp)
+{
+       /* checked again under seqlock below */
+       if (!gtod_is_based_on_tsc(pvclock_gtod_data.clock.vclock_mode))
+               return false;
+
+       return gtod_is_based_on_tsc(do_monotonic(kernel_ns,
+                                                tsc_timestamp));
+}
+
+/*
+ * Calculates CLOCK_REALTIME and reports the TSC value from which it did
+ * so. Returns true if host is using TSC based clocksource.
+ *
+ * DO NOT USE this for anything related to migration. You want CLOCK_TAI
+ * for that.
+ */
  static bool kvm_get_walltime_and_clockread(struct timespec64 *ts,
                                            u64 *tsc_timestamp)
  {
@@ -3156,7 +3203,7 @@ static void kvm_setup_guest_pvclock(struct kvm_vcpu *v,
  
         guest_hv_clock->version = ++vcpu->hv_clock.version;
  
-       mark_page_dirty_in_slot(v->kvm, gpc->memslot, gpc->gpa >> PAGE_SHIFT);
+       kvm_gpc_mark_dirty_in_slot(gpc);
         read_unlock_irqrestore(&gpc->lock, flags);
  
         trace_kvm_pvclock_update(v->vcpu_id, &vcpu->hv_clock);
@@ -4581,7 +4628,7 @@ static bool kvm_is_vm_type_supported(unsigned long type)
  {
         return type == KVM_X86_DEFAULT_VM ||
                (type == KVM_X86_SW_PROTECTED_VM &&
-               IS_ENABLED(CONFIG_KVM_SW_PROTECTED_VM) && tdp_enabled);
+               IS_ENABLED(CONFIG_KVM_SW_PROTECTED_VM) && tdp_mmu_enabled);
  }
  
  int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
@@ -4678,7 +4725,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
                     KVM_XEN_HVM_CONFIG_SHARED_INFO |
                     KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL |
                     KVM_XEN_HVM_CONFIG_EVTCHN_SEND |
-                   KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE;
+                   KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE |
+                   KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA;
                 if (sched_info_on())
                         r |= KVM_XEN_HVM_CONFIG_RUNSTATE |
                              KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG;
@@ -5062,8 +5110,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
         int idx;
  
         if (vcpu->preempted) {
-               if (!vcpu->arch.guest_state_protected)
-                       vcpu->arch.preempted_in_kernel = !static_call(kvm_x86_get_cpl)(vcpu);
+               vcpu->arch.preempted_in_kernel = kvm_arch_vcpu_in_kernel(vcpu);
  
                 /*
                  * Take the srcu lock as memslots will be accessed to check the gfn
@@ -5454,7 +5501,8 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
         if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING) {
                 vcpu->arch.nmi_pending = 0;
                 atomic_set(&vcpu->arch.nmi_queued, events->nmi.pending);
-               kvm_make_request(KVM_REQ_NMI, vcpu);
+               if (events->nmi.pending)
+                       kvm_make_request(KVM_REQ_NMI, vcpu);
         }
         static_call(kvm_x86_set_nmi_mask)(vcpu, events->nmi.masked);
  
@@ -5509,18 +5557,23 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
  static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu,
                                              struct kvm_debugregs *dbgregs)
  {
-       unsigned long val;
+       unsigned int i;
  
         memset(dbgregs, 0, sizeof(*dbgregs));
-       memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db));
-       kvm_get_dr(vcpu, 6, &val);
-       dbgregs->dr6 = val;
+
+       BUILD_BUG_ON(ARRAY_SIZE(vcpu->arch.db) != ARRAY_SIZE(dbgregs->db));
+       for (i = 0; i < ARRAY_SIZE(vcpu->arch.db); i++)
+               dbgregs->db[i] = vcpu->arch.db[i];
+
+       dbgregs->dr6 = vcpu->arch.dr6;
         dbgregs->dr7 = vcpu->arch.dr7;
  }
  
  static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
                                             struct kvm_debugregs *dbgregs)
  {
+       unsigned int i;
+
         if (dbgregs->flags)
                 return -EINVAL;
  
@@ -5529,7 +5582,9 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
         if (!kvm_dr7_valid(dbgregs->dr7))
                 return -EINVAL;
  
-       memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db));
+       for (i = 0; i < ARRAY_SIZE(vcpu->arch.db); i++)
+               vcpu->arch.db[i] = dbgregs->db[i];
+
         kvm_update_dr0123(vcpu);
         vcpu->arch.dr6 = dbgregs->dr6;
         vcpu->arch.dr7 = dbgregs->dr7;
@@ -7016,6 +7071,9 @@ set_identity_unlock:
                 r = -EEXIST;
                 if (kvm->arch.vpit)
                         goto create_pit_unlock;
+               r = -ENOENT;
+               if (!pic_in_kernel(kvm))
+                       goto create_pit_unlock;
                 r = -ENOMEM;
                 kvm->arch.vpit = kvm_create_pit(kvm, u.pit_config.flags);
                 if (kvm->arch.vpit)
@@ -8164,10 +8222,9 @@ static void emulator_wbinvd(struct x86_emulate_ctxt *ctxt)
         kvm_emulate_wbinvd_noskip(emul_to_vcpu(ctxt));
  }
  
-static void emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr,
-                           unsigned long *dest)
+static unsigned long emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr)
  {
-       kvm_get_dr(emul_to_vcpu(ctxt), dr, dest);
+       return kvm_get_dr(emul_to_vcpu(ctxt), dr);
  }
  
  static int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr,
@@ -8389,12 +8446,9 @@ static int emulator_get_msr(struct x86_emulate_ctxt *ctxt,
         return kvm_get_msr(emul_to_vcpu(ctxt), msr_index, pdata);
  }
  
-static int emulator_check_pmc(struct x86_emulate_ctxt *ctxt,
-                             u32 pmc)
+static int emulator_check_rdpmc_early(struct x86_emulate_ctxt *ctxt, u32 pmc)
  {
-       if (kvm_pmu_is_valid_rdpmc_ecx(emul_to_vcpu(ctxt), pmc))
-               return 0;
-       return -EINVAL;
+       return kvm_pmu_check_rdpmc_early(emul_to_vcpu(ctxt), pmc);
  }
  
  static int emulator_read_pmc(struct x86_emulate_ctxt *ctxt,
@@ -8526,7 +8580,7 @@ static const struct x86_emulate_ops emulate_ops = {
         .set_msr_with_filter = emulator_set_msr_with_filter,
         .get_msr_with_filter = emulator_get_msr_with_filter,
         .get_msr             = emulator_get_msr,
-       .check_pmc           = emulator_check_pmc,
+       .check_rdpmc_early   = emulator_check_rdpmc_early,
         .read_pmc            = emulator_read_pmc,
         .halt                = emulator_halt,
         .wbinvd              = emulator_wbinvd,
@@ -8787,31 +8841,24 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
  
         kvm_release_pfn_clean(pfn);
  
-       /* The instructions are well-emulated on direct mmu. */
-       if (vcpu->arch.mmu->root_role.direct) {
-               unsigned int indirect_shadow_pages;
-
-               write_lock(&vcpu->kvm->mmu_lock);
-               indirect_shadow_pages = vcpu->kvm->arch.indirect_shadow_pages;
-               write_unlock(&vcpu->kvm->mmu_lock);
-
-               if (indirect_shadow_pages)
-                       kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
-
-               return true;
-       }
-
         /*
-        * if emulation was due to access to shadowed page table
-        * and it failed try to unshadow page and re-enter the
-        * guest to let CPU execute the instruction.
+        * If emulation may have been triggered by a write to a shadowed page
+        * table, unprotect the gfn (zap any relevant SPTEs) and re-enter the
+        * guest to let the CPU re-execute the instruction in the hope that the
+        * CPU can cleanly execute the instruction that KVM failed to emulate.
          */
-       kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
+       if (vcpu->kvm->arch.indirect_shadow_pages)
+               kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
  
         /*
-        * If the access faults on its page table, it can not
-        * be fixed by unprotecting shadow page and it should
-        * be reported to userspace.
+        * If the failed instruction faulted on an access to page tables that
+        * are used to translate any part of the instruction, KVM can't resolve
+        * the issue by unprotecting the gfn, as zapping the shadow page will
+        * result in the instruction taking a !PRESENT page fault and thus put
+        * the vCPU into an infinite loop of page faults.  E.g. KVM will create
+        * a SPTE and write-protect the gfn to resolve the !PRESENT fault, and
+        * then zap the SPTE to unprotect the gfn, and then do it all over
+        * again.  Report the error to userspace.
          */
         return !(emulation_type & EMULTYPE_WRITE_PF_TO_SP);
  }
@@ -8906,7 +8953,7 @@ int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
         if (unlikely(!r))
                 return 0;
  
-       kvm_pmu_trigger_event(vcpu, PERF_COUNT_HW_INSTRUCTIONS);
+       kvm_pmu_trigger_event(vcpu, kvm_pmu_eventsel.INSTRUCTIONS_RETIRED);
  
         /*
          * rflags is the old, "raw" value of the flags.  The new value has
@@ -9219,9 +9266,9 @@ writeback:
                  */
                 if (!ctxt->have_exception ||
                     exception_type(ctxt->exception.vector) == EXCPT_TRAP) {
-                       kvm_pmu_trigger_event(vcpu, PERF_COUNT_HW_INSTRUCTIONS);
+                       kvm_pmu_trigger_event(vcpu, kvm_pmu_eventsel.INSTRUCTIONS_RETIRED);
                         if (ctxt->is_branch)
-                               kvm_pmu_trigger_event(vcpu, PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
+                               kvm_pmu_trigger_event(vcpu, kvm_pmu_eventsel.BRANCH_INSTRUCTIONS_RETIRED);
                         kvm_rip_write(vcpu, ctxt->eip);
                         if (r && (ctxt->tf || (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)))
                                 r = kvm_vcpu_do_singlestep(vcpu);
@@ -9632,11 +9679,13 @@ static void kvm_x86_check_cpu_compat(void *ret)
         *(int *)ret = kvm_x86_check_processor_compatibility();
  }
  
-static int __kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
+int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
  {
         u64 host_pat;
         int r, cpu;
  
+       guard(mutex)(&vendor_module_lock);
+
         if (kvm_x86_ops.hardware_enable) {
                 pr_err("already loaded vendor module '%s'\n", kvm_x86_ops.name);
                 return -EEXIST;
@@ -9766,17 +9815,6 @@ out_free_x86_emulator_cache:
         kmem_cache_destroy(x86_emulator_cache);
         return r;
  }
-
-int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
-{
-       int r;
-
-       mutex_lock(&vendor_module_lock);
-       r = __kvm_x86_vendor_init(ops);
-       mutex_unlock(&vendor_module_lock);
-
-       return r;
-}
  EXPORT_SYMBOL_GPL(kvm_x86_vendor_init);
  
  void kvm_x86_vendor_exit(void)
@@ -10673,12 +10711,6 @@ static void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
         static_call_cond(kvm_x86_set_apic_access_page_addr)(vcpu);
  }
  
-void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu)
-{
-       smp_send_reschedule(vcpu->cpu);
-}
-EXPORT_SYMBOL_GPL(__kvm_request_immediate_exit);
-
  /*
   * Called within kvm->srcu read side.
   * Returns 1 to let vcpu_run() continue the guest execution loop without
@@ -10928,10 +10960,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
                 goto cancel_injection;
         }
  
-       if (req_immediate_exit) {
+       if (req_immediate_exit)
                 kvm_make_request(KVM_REQ_EVENT, vcpu);
-               static_call(kvm_x86_request_immediate_exit)(vcpu);
-       }
  
         fpregs_assert_state_consistent();
         if (test_thread_flag(TIF_NEED_FPU_LOAD))
@@ -10962,7 +10992,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
                 WARN_ON_ONCE((kvm_vcpu_apicv_activated(vcpu) != kvm_vcpu_apicv_active(vcpu)) &&
                              (kvm_get_apic_mode(vcpu) != LAPIC_MODE_DISABLED));
  
-               exit_fastpath = static_call(kvm_x86_vcpu_run)(vcpu);
+               exit_fastpath = static_call(kvm_x86_vcpu_run)(vcpu, req_immediate_exit);
                 if (likely(exit_fastpath != EXIT_FASTPATH_REENTER_GUEST))
                         break;
  
@@ -12049,7 +12079,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
         vcpu->arch.regs_avail = ~0;
         vcpu->arch.regs_dirty = ~0;
  
-       kvm_gpc_init(&vcpu->arch.pv_time, vcpu->kvm, vcpu, KVM_HOST_USES_PFN);
+       kvm_gpc_init(&vcpu->arch.pv_time, vcpu->kvm);
  
         if (!irqchip_in_kernel(vcpu->kvm) || kvm_vcpu_is_reset_bsp(vcpu))
                 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
@@ -12060,27 +12090,9 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
         if (r < 0)
                 return r;
  
-       if (irqchip_in_kernel(vcpu->kvm)) {
-               r = kvm_create_lapic(vcpu, lapic_timer_advance_ns);
-               if (r < 0)
-                       goto fail_mmu_destroy;
-
-               /*
-                * Defer evaluating inhibits until the vCPU is first run, as
-                * this vCPU will not get notified of any changes until this
-                * vCPU is visible to other vCPUs (marked online and added to
-                * the set of vCPUs).  Opportunistically mark APICv active as
-                * VMX in particularly is highly unlikely to have inhibits.
-                * Ignore the current per-VM APICv state so that vCPU creation
-                * is guaranteed to run with a deterministic value, the request
-                * will ensure the vCPU gets the correct state before VM-Entry.
-                */
-               if (enable_apicv) {
-                       vcpu->arch.apic->apicv_active = true;
-                       kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
-               }
-       } else
-               static_branch_inc(&kvm_has_noapic_vcpu);
+       r = kvm_create_lapic(vcpu, lapic_timer_advance_ns);
+       if (r < 0)
+               goto fail_mmu_destroy;
  
         r = -ENOMEM;
  
@@ -12201,8 +12213,6 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
         srcu_read_unlock(&vcpu->kvm->srcu, idx);
         free_page((unsigned long)vcpu->arch.pio_data);
         kvfree(vcpu->arch.cpuid_entries);
-       if (!lapic_in_kernel(vcpu))
-               static_branch_dec(&kvm_has_noapic_vcpu);
  }
  
  void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
@@ -12479,9 +12489,6 @@ bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu)
         return (vcpu->arch.apic_base & MSR_IA32_APICBASE_BSP) != 0;
  }
  
-__read_mostly DEFINE_STATIC_KEY_FALSE(kvm_has_noapic_vcpu);
-EXPORT_SYMBOL_GPL(kvm_has_noapic_vcpu);
-
  void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
  {
         struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
@@ -13084,11 +13091,13 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
  
  bool kvm_arch_dy_has_pending_interrupt(struct kvm_vcpu *vcpu)
  {
-       if (kvm_vcpu_apicv_active(vcpu) &&
-           static_call(kvm_x86_dy_apicv_has_pending_interrupt)(vcpu))
-               return true;
+       return kvm_vcpu_apicv_active(vcpu) &&
+              static_call(kvm_x86_dy_apicv_has_pending_interrupt)(vcpu);
+}
  
-       return false;
+bool kvm_arch_vcpu_preempted_in_kernel(struct kvm_vcpu *vcpu)
+{
+       return vcpu->arch.preempted_in_kernel;
  }
  
  bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu)
@@ -13111,9 +13120,6 @@ bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
         if (vcpu->arch.guest_state_protected)
                 return true;
  
-       if (vcpu != kvm_get_running_vcpu())
-               return vcpu->arch.preempted_in_kernel;
-
         return static_call(kvm_x86_get_cpl)(vcpu) == 0;
  }
  
@@ -13908,9 +13914,6 @@ module_init(kvm_x86_init);
  
  static void __exit kvm_x86_exit(void)
  {
-       /*
-        * If module_init() is implemented, module_exit() must also be
-        * implemented to allow module unload.
-        */
+       WARN_ON_ONCE(static_branch_unlikely(&kvm_has_noapic_vcpu));
  }
  module_exit(kvm_x86_exit);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h

index 2f7e191666580085c85785ada86789fb9d1842b1..a8b71803777baa13060b8051cc2b9aa3ce7287fd 100644 (file)
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -294,6 +294,7 @@ void kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip);
  
  u64 get_kvmclock_ns(struct kvm *kvm);
  uint64_t kvm_get_wall_clock_epoch(struct kvm *kvm);
+bool kvm_get_monotonic_and_clockread(s64 *kernel_ns, u64 *tsc_timestamp);
  
  int kvm_read_guest_virt(struct kvm_vcpu *vcpu,
         gva_t addr, void *val, unsigned int bytes,
@@ -431,12 +432,6 @@ static inline bool kvm_notify_vmexit_enabled(struct kvm *kvm)
         return kvm->arch.notify_vmexit_flags & KVM_X86_NOTIFY_VMEXIT_ENABLED;
  }
  
-enum kvm_intr_type {
-       /* Values are arbitrary, but must be non-zero. */
-       KVM_HANDLING_IRQ = 1,
-       KVM_HANDLING_NMI,
-};
-
  static __always_inline void kvm_before_interrupt(struct kvm_vcpu *vcpu,
                                                  enum kvm_intr_type intr)
  {
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c

index 4b4e738c6f1b79e474d18519a51e72f3d53286cc..f65b35a05d91687b3a159af967a7408c8d73c27c 100644 (file)
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -10,7 +10,7 @@
  #include "x86.h"
  #include "xen.h"
  #include "hyperv.h"
-#include "lapic.h"
+#include "irq.h"
  
  #include <linux/eventfd.h>
  #include <linux/kvm_host.h>
@@ -24,6 +24,7 @@
  #include <xen/interface/sched.h>
  
  #include <asm/xen/cpuid.h>
+#include <asm/pvclock.h>
  
  #include "cpuid.h"
  #include "trace.h"
@@ -34,41 +35,32 @@ static bool kvm_xen_hcall_evtchn_send(struct kvm_vcpu *vcpu, u64 param, u64 *r);
  
  DEFINE_STATIC_KEY_DEFERRED_FALSE(kvm_xen_enabled, HZ);
  
-static int kvm_xen_shared_info_init(struct kvm *kvm, gfn_t gfn)
+static int kvm_xen_shared_info_init(struct kvm *kvm)
  {
         struct gfn_to_pfn_cache *gpc = &kvm->arch.xen.shinfo_cache;
         struct pvclock_wall_clock *wc;
-       gpa_t gpa = gfn_to_gpa(gfn);
         u32 *wc_sec_hi;
         u32 wc_version;
         u64 wall_nsec;
         int ret = 0;
         int idx = srcu_read_lock(&kvm->srcu);
  
-       if (gfn == KVM_XEN_INVALID_GFN) {
-               kvm_gpc_deactivate(gpc);
-               goto out;
-       }
+       read_lock_irq(&gpc->lock);
+       while (!kvm_gpc_check(gpc, PAGE_SIZE)) {
+               read_unlock_irq(&gpc->lock);
  
-       do {
-               ret = kvm_gpc_activate(gpc, gpa, PAGE_SIZE);
+               ret = kvm_gpc_refresh(gpc, PAGE_SIZE);
                 if (ret)
                         goto out;
  
-               /*
-                * This code mirrors kvm_write_wall_clock() except that it writes
-                * directly through the pfn cache and doesn't mark the page dirty.
-                */
-               wall_nsec = kvm_get_wall_clock_epoch(kvm);
-
-               /* It could be invalid again already, so we need to check */
                 read_lock_irq(&gpc->lock);
+       }
  
-               if (gpc->valid)
-                       break;
-
-               read_unlock_irq(&gpc->lock);
-       } while (1);
+       /*
+        * This code mirrors kvm_write_wall_clock() except that it writes
+        * directly through the pfn cache and doesn't mark the page dirty.
+        */
+       wall_nsec = kvm_get_wall_clock_epoch(kvm);
  
         /* Paranoia checks on the 32-bit struct layout */
         BUILD_BUG_ON(offsetof(struct compat_shared_info, wc) != 0x900);
@@ -158,8 +150,93 @@ static enum hrtimer_restart xen_timer_callback(struct hrtimer *timer)
         return HRTIMER_NORESTART;
  }
  
-static void kvm_xen_start_timer(struct kvm_vcpu *vcpu, u64 guest_abs, s64 delta_ns)
+static void kvm_xen_start_timer(struct kvm_vcpu *vcpu, u64 guest_abs,
+                               bool linux_wa)
  {
+       int64_t kernel_now, delta;
+       uint64_t guest_now;
+
+       /*
+        * The guest provides the requested timeout in absolute nanoseconds
+        * of the KVM clock — as *it* sees it, based on the scaled TSC and
+        * the pvclock information provided by KVM.
+        *
+        * The kernel doesn't support hrtimers based on CLOCK_MONOTONIC_RAW
+        * so use CLOCK_MONOTONIC. In the timescales covered by timers, the
+        * difference won't matter much as there is no cumulative effect.
+        *
+        * Calculate the time for some arbitrary point in time around "now"
+        * in terms of both kvmclock and CLOCK_MONOTONIC. Calculate the
+        * delta between the kvmclock "now" value and the guest's requested
+        * timeout, apply the "Linux workaround" described below, and add
+        * the resulting delta to the CLOCK_MONOTONIC "now" value, to get
+        * the absolute CLOCK_MONOTONIC time at which the timer should
+        * fire.
+        */
+       if (vcpu->arch.hv_clock.version && vcpu->kvm->arch.use_master_clock &&
+           static_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
+               uint64_t host_tsc, guest_tsc;
+
+               if (!IS_ENABLED(CONFIG_64BIT) ||
+                   !kvm_get_monotonic_and_clockread(&kernel_now, &host_tsc)) {
+                       /*
+                        * Don't fall back to get_kvmclock_ns() because it's
+                        * broken; it has a systemic error in its results
+                        * because it scales directly from host TSC to
+                        * nanoseconds, and doesn't scale first to guest TSC
+                        * and *then* to nanoseconds as the guest does.
+                        *
+                        * There is a small error introduced here because time
+                        * continues to elapse between the ktime_get() and the
+                        * subsequent rdtsc(). But not the systemic drift due
+                        * to get_kvmclock_ns().
+                        */
+                       kernel_now = ktime_get(); /* This is CLOCK_MONOTONIC */
+                       host_tsc = rdtsc();
+               }
+
+               /* Calculate the guest kvmclock as the guest would do it. */
+               guest_tsc = kvm_read_l1_tsc(vcpu, host_tsc);
+               guest_now = __pvclock_read_cycles(&vcpu->arch.hv_clock,
+                                                 guest_tsc);
+       } else {
+               /*
+                * Without CONSTANT_TSC, get_kvmclock_ns() is the only option.
+                *
+                * Also if the guest PV clock hasn't been set up yet, as is
+                * likely to be the case during migration when the vCPU has
+                * not been run yet. It would be possible to calculate the
+                * scaling factors properly in that case but there's not much
+                * point in doing so. The get_kvmclock_ns() drift accumulates
+                * over time, so it's OK to use it at startup. Besides, on
+                * migration there's going to be a little bit of skew in the
+                * precise moment at which timers fire anyway. Often they'll
+                * be in the "past" by the time the VM is running again after
+                * migration.
+                */
+               guest_now = get_kvmclock_ns(vcpu->kvm);
+               kernel_now = ktime_get();
+       }
+
+       delta = guest_abs - guest_now;
+
+       /*
+        * Xen has a 'Linux workaround' in do_set_timer_op() which checks for
+        * negative absolute timeout values (caused by integer overflow), and
+        * for values about 13 days in the future (2^50ns) which would be
+        * caused by jiffies overflow. For those cases, Xen sets the timeout
+        * 100ms in the future (not *too* soon, since if a guest really did
+        * set a long timeout on purpose we don't want to keep churning CPU
+        * time by waking it up).  Emulate Xen's workaround when starting the
+        * timer in response to __HYPERVISOR_set_timer_op.
+        */
+       if (linux_wa &&
+           unlikely((int64_t)guest_abs < 0 ||
+                    (delta > 0 && (uint32_t) (delta >> 50) != 0))) {
+               delta = 100 * NSEC_PER_MSEC;
+               guest_abs = guest_now + delta;
+       }
+
         /*
          * Avoid races with the old timer firing. Checking timer_expires
          * to avoid calling hrtimer_cancel() will only have false positives
@@ -171,14 +248,12 @@ static void kvm_xen_start_timer(struct kvm_vcpu *vcpu, u64 guest_abs, s64 delta_
         atomic_set(&vcpu->arch.xen.timer_pending, 0);
         vcpu->arch.xen.timer_expires = guest_abs;
  
-       if (delta_ns <= 0) {
+       if (delta <= 0)
                 xen_timer_callback(&vcpu->arch.xen.timer);
-       } else {
-               ktime_t ktime_now = ktime_get();
+       else
                 hrtimer_start(&vcpu->arch.xen.timer,
-                             ktime_add_ns(ktime_now, delta_ns),
+                             ktime_add_ns(kernel_now, delta),
                               HRTIMER_MODE_ABS_HARD);
-       }
  }
  
  static void kvm_xen_stop_timer(struct kvm_vcpu *vcpu)
@@ -452,14 +527,13 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic)
                 smp_wmb();
         }
  
-       if (user_len2)
+       if (user_len2) {
+               kvm_gpc_mark_dirty_in_slot(gpc2);
                 read_unlock(&gpc2->lock);
+       }
  
+       kvm_gpc_mark_dirty_in_slot(gpc1);
         read_unlock_irqrestore(&gpc1->lock, flags);
-
-       mark_page_dirty_in_slot(v->kvm, gpc1->memslot, gpc1->gpa >> PAGE_SHIFT);
-       if (user_len2)
-               mark_page_dirty_in_slot(v->kvm, gpc2->memslot, gpc2->gpa >> PAGE_SHIFT);
  }
  
  void kvm_xen_update_runstate(struct kvm_vcpu *v, int state)
@@ -493,10 +567,9 @@ void kvm_xen_update_runstate(struct kvm_vcpu *v, int state)
                 kvm_xen_update_runstate_guest(v, state == RUNSTATE_runnable);
  }
  
-static void kvm_xen_inject_vcpu_vector(struct kvm_vcpu *v)
+void kvm_xen_inject_vcpu_vector(struct kvm_vcpu *v)
  {
         struct kvm_lapic_irq irq = { };
-       int r;
  
         irq.dest_id = v->vcpu_id;
         irq.vector = v->arch.xen.upcall_vector;
@@ -505,8 +578,7 @@ static void kvm_xen_inject_vcpu_vector(struct kvm_vcpu *v)
         irq.delivery_mode = APIC_DM_FIXED;
         irq.level = 1;
  
-       /* The fast version will always work for physical unicast */
-       WARN_ON_ONCE(!kvm_irq_delivery_to_apic_fast(v->kvm, NULL, &irq, &r, NULL));
+       kvm_irq_delivery_to_apic(v->kvm, NULL, &irq, NULL);
  }
  
  /*
@@ -565,13 +637,13 @@ void kvm_xen_inject_pending_events(struct kvm_vcpu *v)
                              : "0" (evtchn_pending_sel32));
                 WRITE_ONCE(vi->evtchn_upcall_pending, 1);
         }
+
+       kvm_gpc_mark_dirty_in_slot(gpc);
         read_unlock_irqrestore(&gpc->lock, flags);
  
         /* For the per-vCPU lapic vector, deliver it as MSI. */
         if (v->arch.xen.upcall_vector)
                 kvm_xen_inject_vcpu_vector(v);
-
-       mark_page_dirty_in_slot(v->kvm, gpc->memslot, gpc->gpa >> PAGE_SHIFT);
  }
  
  int __kvm_xen_has_interrupt(struct kvm_vcpu *v)
@@ -635,17 +707,59 @@ int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
                 } else {
                         mutex_lock(&kvm->arch.xen.xen_lock);
                         kvm->arch.xen.long_mode = !!data->u.long_mode;
+
+                       /*
+                        * Re-initialize shared_info to put the wallclock in the
+                        * correct place. Whilst it's not necessary to do this
+                        * unless the mode is actually changed, it does no harm
+                        * to make the call anyway.
+                        */
+                       r = kvm->arch.xen.shinfo_cache.active ?
+                               kvm_xen_shared_info_init(kvm) : 0;
                         mutex_unlock(&kvm->arch.xen.xen_lock);
-                       r = 0;
                 }
                 break;
  
         case KVM_XEN_ATTR_TYPE_SHARED_INFO:
+       case KVM_XEN_ATTR_TYPE_SHARED_INFO_HVA: {
+               int idx;
+
                 mutex_lock(&kvm->arch.xen.xen_lock);
-               r = kvm_xen_shared_info_init(kvm, data->u.shared_info.gfn);
+
+               idx = srcu_read_lock(&kvm->srcu);
+
+               if (data->type == KVM_XEN_ATTR_TYPE_SHARED_INFO) {
+                       gfn_t gfn = data->u.shared_info.gfn;
+
+                       if (gfn == KVM_XEN_INVALID_GFN) {
+                               kvm_gpc_deactivate(&kvm->arch.xen.shinfo_cache);
+                               r = 0;
+                       } else {
+                               r = kvm_gpc_activate(&kvm->arch.xen.shinfo_cache,
+                                                    gfn_to_gpa(gfn), PAGE_SIZE);
+                       }
+               } else {
+                       void __user * hva = u64_to_user_ptr(data->u.shared_info.hva);
+
+                       if (!PAGE_ALIGNED(hva) || !access_ok(hva, PAGE_SIZE)) {
+                               r = -EINVAL;
+                       } else if (!hva) {
+                               kvm_gpc_deactivate(&kvm->arch.xen.shinfo_cache);
+                               r = 0;
+                       } else {
+                               r = kvm_gpc_activate_hva(&kvm->arch.xen.shinfo_cache,
+                                                        (unsigned long)hva, PAGE_SIZE);
+                       }
+               }
+
+               srcu_read_unlock(&kvm->srcu, idx);
+
+               if (!r && kvm->arch.xen.shinfo_cache.active)
+                       r = kvm_xen_shared_info_init(kvm);
+
                 mutex_unlock(&kvm->arch.xen.xen_lock);
                 break;
-
+       }
         case KVM_XEN_ATTR_TYPE_UPCALL_VECTOR:
                 if (data->u.vector && data->u.vector < 0x10)
                         r = -EINVAL;
@@ -699,13 +813,21 @@ int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
                 break;
  
         case KVM_XEN_ATTR_TYPE_SHARED_INFO:
-               if (kvm->arch.xen.shinfo_cache.active)
+               if (kvm_gpc_is_gpa_active(&kvm->arch.xen.shinfo_cache))
                         data->u.shared_info.gfn = gpa_to_gfn(kvm->arch.xen.shinfo_cache.gpa);
                 else
                         data->u.shared_info.gfn = KVM_XEN_INVALID_GFN;
                 r = 0;
                 break;
  
+       case KVM_XEN_ATTR_TYPE_SHARED_INFO_HVA:
+               if (kvm_gpc_is_hva_active(&kvm->arch.xen.shinfo_cache))
+                       data->u.shared_info.hva = kvm->arch.xen.shinfo_cache.uhva;
+               else
+                       data->u.shared_info.hva = 0;
+               r = 0;
+               break;
+
         case KVM_XEN_ATTR_TYPE_UPCALL_VECTOR:
                 data->u.vector = kvm->arch.xen.upcall_vector;
                 r = 0;
@@ -742,20 +864,33 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
  
         switch (data->type) {
         case KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO:
+       case KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO_HVA:
                 /* No compat necessary here. */
                 BUILD_BUG_ON(sizeof(struct vcpu_info) !=
                              sizeof(struct compat_vcpu_info));
                 BUILD_BUG_ON(offsetof(struct vcpu_info, time) !=
                              offsetof(struct compat_vcpu_info, time));
  
-               if (data->u.gpa == KVM_XEN_INVALID_GPA) {
-                       kvm_gpc_deactivate(&vcpu->arch.xen.vcpu_info_cache);
-                       r = 0;
-                       break;
+               if (data->type == KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO) {
+                       if (data->u.gpa == KVM_XEN_INVALID_GPA) {
+                               kvm_gpc_deactivate(&vcpu->arch.xen.vcpu_info_cache);
+                               r = 0;
+                               break;
+                       }
+
+                       r = kvm_gpc_activate(&vcpu->arch.xen.vcpu_info_cache,
+                                            data->u.gpa, sizeof(struct vcpu_info));
+               } else {
+                       if (data->u.hva == 0) {
+                               kvm_gpc_deactivate(&vcpu->arch.xen.vcpu_info_cache);
+                               r = 0;
+                               break;
+                       }
+
+                       r = kvm_gpc_activate_hva(&vcpu->arch.xen.vcpu_info_cache,
+                                                data->u.hva, sizeof(struct vcpu_info));
                 }
  
-               r = kvm_gpc_activate(&vcpu->arch.xen.vcpu_info_cache,
-                                    data->u.gpa, sizeof(struct vcpu_info));
                 if (!r)
                         kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
  
@@ -944,9 +1079,7 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
  
                 /* Start the timer if the new value has a valid vector+expiry. */
                 if (data->u.timer.port && data->u.timer.expires_ns)
-                       kvm_xen_start_timer(vcpu, data->u.timer.expires_ns,
-                                           data->u.timer.expires_ns -
-                                           get_kvmclock_ns(vcpu->kvm));
+                       kvm_xen_start_timer(vcpu, data->u.timer.expires_ns, false);
  
                 r = 0;
                 break;
@@ -977,13 +1110,21 @@ int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
  
         switch (data->type) {
         case KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO:
-               if (vcpu->arch.xen.vcpu_info_cache.active)
+               if (kvm_gpc_is_gpa_active(&vcpu->arch.xen.vcpu_info_cache))
                         data->u.gpa = vcpu->arch.xen.vcpu_info_cache.gpa;
                 else
                         data->u.gpa = KVM_XEN_INVALID_GPA;
                 r = 0;
                 break;
  
+       case KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO_HVA:
+               if (kvm_gpc_is_hva_active(&vcpu->arch.xen.vcpu_info_cache))
+                       data->u.hva = vcpu->arch.xen.vcpu_info_cache.uhva;
+               else
+                       data->u.hva = 0;
+               r = 0;
+               break;
+
         case KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO:
                 if (vcpu->arch.xen.vcpu_time_info_cache.active)
                         data->u.gpa = vcpu->arch.xen.vcpu_time_info_cache.gpa;
@@ -1093,9 +1234,24 @@ int kvm_xen_write_hypercall_page(struct kvm_vcpu *vcpu, u64 data)
         u32 page_num = data & ~PAGE_MASK;
         u64 page_addr = data & PAGE_MASK;
         bool lm = is_long_mode(vcpu);
+       int r = 0;
+
+       mutex_lock(&kvm->arch.xen.xen_lock);
+       if (kvm->arch.xen.long_mode != lm) {
+               kvm->arch.xen.long_mode = lm;
+
+               /*
+                * Re-initialize shared_info to put the wallclock in the
+                * correct place.
+                */
+               if (kvm->arch.xen.shinfo_cache.active &&
+                   kvm_xen_shared_info_init(kvm))
+                       r = 1;
+       }
+       mutex_unlock(&kvm->arch.xen.xen_lock);
  
-       /* Latch long_mode for shared_info pages etc. */
-       vcpu->kvm->arch.xen.long_mode = lm;
+       if (r)
+               return r;
  
         /*
          * If Xen hypercall intercept is enabled, fill the hypercall
@@ -1396,7 +1552,6 @@ static bool kvm_xen_hcall_vcpu_op(struct kvm_vcpu *vcpu, bool longmode, int cmd,
  {
         struct vcpu_set_singleshot_timer oneshot;
         struct x86_exception e;
-       s64 delta;
  
         if (!kvm_xen_timer_enabled(vcpu))
                 return false;
@@ -1430,9 +1585,7 @@ static bool kvm_xen_hcall_vcpu_op(struct kvm_vcpu *vcpu, bool longmode, int cmd,
                         return true;
                 }
  
-               /* A delta <= 0 results in an immediate callback, which is what we want */
-               delta = oneshot.timeout_abs_ns - get_kvmclock_ns(vcpu->kvm);
-               kvm_xen_start_timer(vcpu, oneshot.timeout_abs_ns, delta);
+               kvm_xen_start_timer(vcpu, oneshot.timeout_abs_ns, false);
                 *r = 0;
                 return true;
  
@@ -1455,29 +1608,10 @@ static bool kvm_xen_hcall_set_timer_op(struct kvm_vcpu *vcpu, uint64_t timeout,
         if (!kvm_xen_timer_enabled(vcpu))
                 return false;
  
-       if (timeout) {
-               uint64_t guest_now = get_kvmclock_ns(vcpu->kvm);
-               int64_t delta = timeout - guest_now;
-
-               /* Xen has a 'Linux workaround' in do_set_timer_op() which
-                * checks for negative absolute timeout values (caused by
-                * integer overflow), and for values about 13 days in the
-                * future (2^50ns) which would be caused by jiffies
-                * overflow. For those cases, it sets the timeout 100ms in
-                * the future (not *too* soon, since if a guest really did
-                * set a long timeout on purpose we don't want to keep
-                * churning CPU time by waking it up).
-                */
-               if (unlikely((int64_t)timeout < 0 ||
-                            (delta > 0 && (uint32_t) (delta >> 50) != 0))) {
-                       delta = 100 * NSEC_PER_MSEC;
-                       timeout = guest_now + delta;
-               }
-
-               kvm_xen_start_timer(vcpu, timeout, delta);
-       } else {
+       if (timeout)
+               kvm_xen_start_timer(vcpu, timeout, true);
+       else
                 kvm_xen_stop_timer(vcpu);
-       }
  
         *r = 0;
         return true;
@@ -1621,9 +1755,6 @@ int kvm_xen_set_evtchn_fast(struct kvm_xen_evtchn *xe, struct kvm *kvm)
                 WRITE_ONCE(xe->vcpu_idx, vcpu->vcpu_idx);
         }
  
-       if (!vcpu->arch.xen.vcpu_info_cache.active)
-               return -EINVAL;
-
         if (xe->port >= max_evtchn_port(kvm))
                 return -EINVAL;
  
@@ -1731,8 +1862,6 @@ static int kvm_xen_set_evtchn(struct kvm_xen_evtchn *xe, struct kvm *kvm)
                 mm_borrowed = true;
         }
  
-       mutex_lock(&kvm->arch.xen.xen_lock);
-
         /*
          * It is theoretically possible for the page to be unmapped
          * and the MMU notifier to invalidate the shared_info before
@@ -1760,8 +1889,6 @@ static int kvm_xen_set_evtchn(struct kvm_xen_evtchn *xe, struct kvm *kvm)
                 srcu_read_unlock(&kvm->srcu, idx);
         } while(!rc);
  
-       mutex_unlock(&kvm->arch.xen.xen_lock);
-
         if (mm_borrowed)
                 kthread_unuse_mm(kvm->mm);
  
@@ -2109,14 +2236,10 @@ void kvm_xen_init_vcpu(struct kvm_vcpu *vcpu)
  
         timer_setup(&vcpu->arch.xen.poll_timer, cancel_evtchn_poll, 0);
  
-       kvm_gpc_init(&vcpu->arch.xen.runstate_cache, vcpu->kvm, NULL,
-                    KVM_HOST_USES_PFN);
-       kvm_gpc_init(&vcpu->arch.xen.runstate2_cache, vcpu->kvm, NULL,
-                    KVM_HOST_USES_PFN);
-       kvm_gpc_init(&vcpu->arch.xen.vcpu_info_cache, vcpu->kvm, NULL,
-                    KVM_HOST_USES_PFN);
-       kvm_gpc_init(&vcpu->arch.xen.vcpu_time_info_cache, vcpu->kvm, NULL,
-                    KVM_HOST_USES_PFN);
+       kvm_gpc_init(&vcpu->arch.xen.runstate_cache, vcpu->kvm);
+       kvm_gpc_init(&vcpu->arch.xen.runstate2_cache, vcpu->kvm);
+       kvm_gpc_init(&vcpu->arch.xen.vcpu_info_cache, vcpu->kvm);
+       kvm_gpc_init(&vcpu->arch.xen.vcpu_time_info_cache, vcpu->kvm);
  }
  
  void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu)
@@ -2159,7 +2282,7 @@ void kvm_xen_init_vm(struct kvm *kvm)
  {
         mutex_init(&kvm->arch.xen.xen_lock);
         idr_init(&kvm->arch.xen.evtchn_ports);
-       kvm_gpc_init(&kvm->arch.xen.shinfo_cache, kvm, NULL, KVM_HOST_USES_PFN);
+       kvm_gpc_init(&kvm->arch.xen.shinfo_cache, kvm);
  }
  
  void kvm_xen_destroy_vm(struct kvm *kvm)
diff --git a/arch/x86/kvm/xen.h b/arch/x86/kvm/xen.h

index f8f1fe22d090696cb32b44f719d75e124fb4c8bf..f5841d9000aebd5b9584db188d89135fb6f3e11e 100644 (file)
--- a/arch/x86/kvm/xen.h
+++ b/arch/x86/kvm/xen.h
@@ -18,6 +18,7 @@ extern struct static_key_false_deferred kvm_xen_enabled;
  
  int __kvm_xen_has_interrupt(struct kvm_vcpu *vcpu);
  void kvm_xen_inject_pending_events(struct kvm_vcpu *vcpu);
+void kvm_xen_inject_vcpu_vector(struct kvm_vcpu *vcpu);
  int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data);
  int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data);
  int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data);
@@ -36,6 +37,19 @@ int kvm_xen_setup_evtchn(struct kvm *kvm,
                          const struct kvm_irq_routing_entry *ue);
  void kvm_xen_update_tsc_info(struct kvm_vcpu *vcpu);
  
+static inline void kvm_xen_sw_enable_lapic(struct kvm_vcpu *vcpu)
+{
+       /*
+        * The local APIC is being enabled. If the per-vCPU upcall vector is
+        * set and the vCPU's evtchn_upcall_pending flag is set, inject the
+        * interrupt.
+        */
+       if (static_branch_unlikely(&kvm_xen_enabled.key) &&
+           vcpu->arch.xen.vcpu_info_cache.active &&
+           vcpu->arch.xen.upcall_vector && __kvm_xen_has_interrupt(vcpu))
+               kvm_xen_inject_vcpu_vector(vcpu);
+}
+
  static inline bool kvm_xen_msr_enabled(struct kvm *kvm)
  {
         return static_branch_unlikely(&kvm_xen_enabled.key) &&
@@ -101,6 +115,10 @@ static inline void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu)
  {
  }
  
+static inline void kvm_xen_sw_enable_lapic(struct kvm_vcpu *vcpu)
+{
+}
+
  static inline bool kvm_xen_msr_enabled(struct kvm *kvm)
  {
         return false;
diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S

index 20ef350a60fbb59a4b183bc3e54b1d517e6bba9b..10d5ed8b5990f4d2f64436b71905a9d817df11a1 100644 (file)
--- a/arch/x86/lib/getuser.S
+++ b/arch/x86/lib/getuser.S
@@ -163,23 +163,23 @@ SYM_CODE_END(__get_user_8_handle_exception)
  #endif
  
  /* get_user */
-       _ASM_EXTABLE(1b, __get_user_handle_exception)
-       _ASM_EXTABLE(2b, __get_user_handle_exception)
-       _ASM_EXTABLE(3b, __get_user_handle_exception)
+       _ASM_EXTABLE_UA(1b, __get_user_handle_exception)
+       _ASM_EXTABLE_UA(2b, __get_user_handle_exception)
+       _ASM_EXTABLE_UA(3b, __get_user_handle_exception)
  #ifdef CONFIG_X86_64
-       _ASM_EXTABLE(4b, __get_user_handle_exception)
+       _ASM_EXTABLE_UA(4b, __get_user_handle_exception)
  #else
-       _ASM_EXTABLE(4b, __get_user_8_handle_exception)
-       _ASM_EXTABLE(5b, __get_user_8_handle_exception)
+       _ASM_EXTABLE_UA(4b, __get_user_8_handle_exception)
+       _ASM_EXTABLE_UA(5b, __get_user_8_handle_exception)
  #endif
  
  /* __get_user */
-       _ASM_EXTABLE(6b, __get_user_handle_exception)
-       _ASM_EXTABLE(7b, __get_user_handle_exception)
-       _ASM_EXTABLE(8b, __get_user_handle_exception)
+       _ASM_EXTABLE_UA(6b, __get_user_handle_exception)
+       _ASM_EXTABLE_UA(7b, __get_user_handle_exception)
+       _ASM_EXTABLE_UA(8b, __get_user_handle_exception)
  #ifdef CONFIG_X86_64
-       _ASM_EXTABLE(9b, __get_user_handle_exception)
+       _ASM_EXTABLE_UA(9b, __get_user_handle_exception)
  #else
-       _ASM_EXTABLE(9b, __get_user_8_handle_exception)
-       _ASM_EXTABLE(10b, __get_user_8_handle_exception)
+       _ASM_EXTABLE_UA(9b, __get_user_8_handle_exception)
+       _ASM_EXTABLE_UA(10b, __get_user_8_handle_exception)
  #endif
diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S

index 2877f59341775aa38a68d152f72e0d55606c1cac..975c9c18263d2afd926c12a8bfffe0c2d72d43cd 100644 (file)
--- a/arch/x86/lib/putuser.S
+++ b/arch/x86/lib/putuser.S
@@ -133,15 +133,15 @@ SYM_CODE_START_LOCAL(__put_user_handle_exception)
         RET
  SYM_CODE_END(__put_user_handle_exception)
  
-       _ASM_EXTABLE(1b, __put_user_handle_exception)
-       _ASM_EXTABLE(2b, __put_user_handle_exception)
-       _ASM_EXTABLE(3b, __put_user_handle_exception)
-       _ASM_EXTABLE(4b, __put_user_handle_exception)
-       _ASM_EXTABLE(5b, __put_user_handle_exception)
-       _ASM_EXTABLE(6b, __put_user_handle_exception)
-       _ASM_EXTABLE(7b, __put_user_handle_exception)
-       _ASM_EXTABLE(9b, __put_user_handle_exception)
+       _ASM_EXTABLE_UA(1b, __put_user_handle_exception)
+       _ASM_EXTABLE_UA(2b, __put_user_handle_exception)
+       _ASM_EXTABLE_UA(3b, __put_user_handle_exception)
+       _ASM_EXTABLE_UA(4b, __put_user_handle_exception)
+       _ASM_EXTABLE_UA(5b, __put_user_handle_exception)
+       _ASM_EXTABLE_UA(6b, __put_user_handle_exception)
+       _ASM_EXTABLE_UA(7b, __put_user_handle_exception)
+       _ASM_EXTABLE_UA(9b, __put_user_handle_exception)
  #ifdef CONFIG_X86_32
-       _ASM_EXTABLE(8b, __put_user_handle_exception)
-       _ASM_EXTABLE(10b, __put_user_handle_exception)
+       _ASM_EXTABLE_UA(8b, __put_user_handle_exception)
+       _ASM_EXTABLE_UA(10b, __put_user_handle_exception)
  #endif
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c

index 679b09cfe241c72e7f85bd7bbd406d59a259bf2a..d6375b3c633bc45474bbb2d6460512863ff14a51 100644 (file)
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -798,15 +798,6 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code,
         show_opcodes(regs, loglvl);
  }
  
-/*
- * The (legacy) vsyscall page is the long page in the kernel portion
- * of the address space that has user-accessible permissions.
- */
-static bool is_vsyscall_vaddr(unsigned long vaddr)
-{
-       return unlikely((vaddr & PAGE_MASK) == VSYSCALL_ADDR);
-}
-
  static void
  __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
                        unsigned long address, u32 pkey, int si_code)
diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c

index 968d7005f4a72454ccf8678967f040fe06f36ad6..f50cc210a981886e7d3a265b4d43ca16f47f6825 100644 (file)
--- a/arch/x86/mm/ident_map.c
+++ b/arch/x86/mm/ident_map.c
@@ -26,18 +26,31 @@ static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page,
         for (; addr < end; addr = next) {
                 pud_t *pud = pud_page + pud_index(addr);
                 pmd_t *pmd;
+               bool use_gbpage;
  
                 next = (addr & PUD_MASK) + PUD_SIZE;
                 if (next > end)
                         next = end;
  
-               if (info->direct_gbpages) {
-                       pud_t pudval;
+               /* if this is already a gbpage, this portion is already mapped */
+               if (pud_large(*pud))
+                       continue;
+
+               /* Is using a gbpage allowed? */
+               use_gbpage = info->direct_gbpages;
  
-                       if (pud_present(*pud))
-                               continue;
+               /* Don't use gbpage if it maps more than the requested region. */
+               /* at the begining: */
+               use_gbpage &= ((addr & ~PUD_MASK) == 0);
+               /* ... or at the end: */
+               use_gbpage &= ((next & ~PUD_MASK) == 0);
+
+               /* Never overwrite existing mappings */
+               use_gbpage &= !pud_present(*pud);
+
+               if (use_gbpage) {
+                       pud_t pudval;
  
-                       addr &= PUD_MASK;
                         pudval = __pud((addr - info->offset) | info->page_flag);
                         set_pud(pud, pudval);
                         continue;
diff --git a/arch/x86/mm/maccess.c b/arch/x86/mm/maccess.c

index 6993f026adec9d12a68cdbf3af3314336882f36f..42115ac079cfe617b76199a167c61e5b3c7de10f 100644 (file)
--- a/arch/x86/mm/maccess.c
+++ b/arch/x86/mm/maccess.c
@@ -3,6 +3,8 @@
  #include <linux/uaccess.h>
  #include <linux/kernel.h>
  
+#include <asm/vsyscall.h>
+
  #ifdef CONFIG_X86_64
  bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size)
  {
@@ -15,6 +17,14 @@ bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size)
         if (vaddr < TASK_SIZE_MAX + PAGE_SIZE)
                 return false;
  
+       /*
+        * Reading from the vsyscall page may cause an unhandled fault in
+        * certain cases.  Though it is at an address above TASK_SIZE_MAX, it is
+        * usually considered as a user space address.
+        */
+       if (is_vsyscall_vaddr(vaddr))
+               return false;
+
         /*
          * Allow everything during early boot before 'x86_virt_bits'
          * is initialized.  Needed for instruction decoding in early
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c

index adc497b93f03746aca087a71233b806a5790bf96..65e9a6e391c046d1c18c32ffa0049082461a82dd 100644 (file)
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -934,7 +934,7 @@ static int __init cmp_memblk(const void *a, const void *b)
         const struct numa_memblk *ma = *(const struct numa_memblk **)a;
         const struct numa_memblk *mb = *(const struct numa_memblk **)b;
  
-       return ma->start - mb->start;
+       return (ma->start > mb->start) - (ma->start < mb->start);
  }
  
  static struct numa_memblk *numa_memblk_list[NR_NODE_MEMBLKS] __initdata;
@@ -944,14 +944,12 @@ static struct numa_memblk *numa_memblk_list[NR_NODE_MEMBLKS] __initdata;
   * @start: address to begin fill
   * @end: address to end fill
   *
- * Find and extend numa_meminfo memblks to cover the @start-@end
- * physical address range, such that the first memblk includes
- * @start, the last memblk includes @end, and any gaps in between
- * are filled.
+ * Find and extend numa_meminfo memblks to cover the physical
+ * address range @start-@end
   *
   * RETURNS:
   * 0             : Success
- * NUMA_NO_MEMBLK : No memblk exists in @start-@end range
+ * NUMA_NO_MEMBLK : No memblks exist in address range @start-@end
   */
  
  int __init numa_fill_memblks(u64 start, u64 end)
@@ -963,17 +961,14 @@ int __init numa_fill_memblks(u64 start, u64 end)
  
         /*
          * Create a list of pointers to numa_meminfo memblks that
-        * overlap start, end. Exclude (start == bi->end) since
-        * end addresses in both a CFMWS range and a memblk range
-        * are exclusive.
-        *
-        * This list of pointers is used to make in-place changes
-        * that fill out the numa_meminfo memblks.
+        * overlap start, end. The list is used to make in-place
+        * changes that fill out the numa_meminfo memblks.
          */
         for (int i = 0; i < mi->nr_blks; i++) {
                 struct numa_memblk *bi = &mi->blk[i];
  
-               if (start < bi->end && end >= bi->start) {
+               if (memblock_addrs_overlap(start, end - start, bi->start,
+                                          bi->end - bi->start)) {
                         blk[count] = &mi->blk[i];
                         count++;
                 }
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c

index 4b0d6fff88de5a544e2ef91a8c3f7c5fa1339aa5..1fb9a1644d944b825a7eb5735b68f48b4c8df9ce 100644 (file)
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -65,6 +65,8 @@ int xen_smp_intr_init(unsigned int cpu)
         char *resched_name, *callfunc_name, *debug_name;
  
         resched_name = kasprintf(GFP_KERNEL, "resched%d", cpu);
+       if (!resched_name)
+               goto fail_mem;
         per_cpu(xen_resched_irq, cpu).name = resched_name;
         rc = bind_ipi_to_irqhandler(XEN_RESCHEDULE_VECTOR,
                                     cpu,
@@ -77,6 +79,8 @@ int xen_smp_intr_init(unsigned int cpu)
         per_cpu(xen_resched_irq, cpu).irq = rc;
  
         callfunc_name = kasprintf(GFP_KERNEL, "callfunc%d", cpu);
+       if (!callfunc_name)
+               goto fail_mem;
         per_cpu(xen_callfunc_irq, cpu).name = callfunc_name;
         rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_VECTOR,
                                     cpu,
@@ -90,6 +94,9 @@ int xen_smp_intr_init(unsigned int cpu)
  
         if (!xen_fifo_events) {
                 debug_name = kasprintf(GFP_KERNEL, "debug%d", cpu);
+               if (!debug_name)
+                       goto fail_mem;
+
                 per_cpu(xen_debug_irq, cpu).name = debug_name;
                 rc = bind_virq_to_irqhandler(VIRQ_DEBUG, cpu,
                                              xen_debug_interrupt,
@@ -101,6 +108,9 @@ int xen_smp_intr_init(unsigned int cpu)
         }
  
         callfunc_name = kasprintf(GFP_KERNEL, "callfuncsingle%d", cpu);
+       if (!callfunc_name)
+               goto fail_mem;
+
         per_cpu(xen_callfuncsingle_irq, cpu).name = callfunc_name;
         rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_SINGLE_VECTOR,
                                     cpu,
@@ -114,6 +124,8 @@ int xen_smp_intr_init(unsigned int cpu)
  
         return 0;
  
+ fail_mem:
+       rc = -ENOMEM;
   fail:
         xen_smp_intr_free(cpu);
         return rc;
diff --git a/arch/xtensa/include/asm/jump_label.h b/arch/xtensa/include/asm/jump_label.h

index c812bf85021c02db59d107fda752dba2872e3b60..46c8596259d2d921fc0395a9eaf8dd444032029c 100644 (file)
--- a/arch/xtensa/include/asm/jump_label.h
+++ b/arch/xtensa/include/asm/jump_label.h
@@ -13,7 +13,7 @@
  static __always_inline bool arch_static_branch(struct static_key *key,
                                                bool branch)
  {
-       asm_volatile_goto("1:\n\t"
+       asm goto("1:\n\t"
                           "_nop\n\t"
                           ".pushsection __jump_table,  \"aw\"\n\t"
                           ".word 1b, %l[l_yes], %c0\n\t"
@@ -38,7 +38,7 @@ static __always_inline bool arch_static_branch_jump(struct static_key *key,
          * make it reachable and wrap both into a no-transform block
          * to avoid any assembler interference with this.
          */
-       asm_volatile_goto("1:\n\t"
+       asm goto("1:\n\t"
                           ".begin no-transform\n\t"
                           "_j %l[l_yes]\n\t"
                           "2:\n\t"
diff --git a/block/blk-core.c b/block/blk-core.c

index 11342af420d0c41d1c98a729471dcd6bfb46da05..de771093b52687ae2431af36bf75b73ccaa1bbf0 100644 (file)
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -49,6 +49,7 @@
  #include "blk-pm.h"
  #include "blk-cgroup.h"
  #include "blk-throttle.h"
+#include "blk-ioprio.h"
  
  struct dentry *blk_debugfs_root;
  
@@ -833,6 +834,14 @@ end_io:
  }
  EXPORT_SYMBOL(submit_bio_noacct);
  
+static void bio_set_ioprio(struct bio *bio)
+{
+       /* Nobody set ioprio so far? Initialize it based on task's nice value */
+       if (IOPRIO_PRIO_CLASS(bio->bi_ioprio) == IOPRIO_CLASS_NONE)
+               bio->bi_ioprio = get_current_ioprio();
+       blkcg_set_ioprio(bio);
+}
+
  /**
   * submit_bio - submit a bio to the block device layer for I/O
   * @bio: The &struct bio which describes the I/O
@@ -855,6 +864,7 @@ void submit_bio(struct bio *bio)
                 count_vm_events(PGPGOUT, bio_sectors(bio));
         }
  
+       bio_set_ioprio(bio);
         submit_bio_noacct(bio);
  }
  EXPORT_SYMBOL(submit_bio);
diff --git a/block/blk-iocost.c b/block/blk-iocost.c

index c8beec6d7df0863bb4811c12f5ff576e7a5121c7..04d44f0bcbc85d4898df728c8ceefb3f1b5bea39 100644 (file)
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -1353,6 +1353,13 @@ static bool iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now)
  
         lockdep_assert_held(&iocg->waitq.lock);
  
+       /*
+        * If the delay is set by another CPU, we may be in the past. No need to
+        * change anything if so. This avoids decay calculation underflow.
+        */
+       if (time_before64(now->now, iocg->delay_at))
+               return false;
+
         /* calculate the current delay in effect - 1/2 every second */
         tdelta = now->now - iocg->delay_at;
         if (iocg->delay)
diff --git a/block/blk-mq.c b/block/blk-mq.c

index aa87fcfda1ecfc875c86a0258fe16e707ce3f167..2dc01551e27c7d1e50266e554fe4bb6d378a1482 100644 (file)
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -40,7 +40,6 @@
  #include "blk-stat.h"
  #include "blk-mq-sched.h"
  #include "blk-rq-qos.h"
-#include "blk-ioprio.h"
  
  static DEFINE_PER_CPU(struct llist_head, blk_cpu_done);
  static DEFINE_PER_CPU(call_single_data_t, blk_cpu_csd);
@@ -2944,14 +2943,6 @@ static bool blk_mq_use_cached_rq(struct request *rq, struct blk_plug *plug,
         return true;
  }
  
-static void bio_set_ioprio(struct bio *bio)
-{
-       /* Nobody set ioprio so far? Initialize it based on task's nice value */
-       if (IOPRIO_PRIO_CLASS(bio->bi_ioprio) == IOPRIO_CLASS_NONE)
-               bio->bi_ioprio = get_current_ioprio();
-       blkcg_set_ioprio(bio);
-}
-
  /**
   * blk_mq_submit_bio - Create and send a request to block device.
   * @bio: Bio pointer.
@@ -2976,7 +2967,6 @@ void blk_mq_submit_bio(struct bio *bio)
         blk_status_t ret;
  
         bio = blk_queue_bounce(bio, q);
-       bio_set_ioprio(bio);
  
         if (plug) {
                 rq = rq_list_peek(&plug->cached_rq);
diff --git a/block/blk-wbt.c b/block/blk-wbt.c

index 5ba3cd574eacbddc1b92bbaec3d79d81fb66ae7a..0c0e270a82650d9a0c6977931cd8a833b467f520 100644 (file)
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -163,9 +163,9 @@ static void wb_timestamp(struct rq_wb *rwb, unsigned long *var)
   */
  static bool wb_recent_wait(struct rq_wb *rwb)
  {
-       struct bdi_writeback *wb = &rwb->rqos.disk->bdi->wb;
+       struct backing_dev_info *bdi = rwb->rqos.disk->bdi;
  
-       return time_before(jiffies, wb->dirty_sleep + HZ);
+       return time_before(jiffies, bdi->last_bdp_sleep + HZ);
  }
  
  static inline struct rq_wait *get_rq_wait(struct rq_wb *rwb,
diff --git a/block/opal_proto.h b/block/opal_proto.h

index dec7ce3a3edb7027b971232269846390e3baa834..d247a457bf6e3fd03c0e0f496988c6a5e8999ff1 100644 (file)
--- a/block/opal_proto.h
+++ b/block/opal_proto.h
@@ -71,6 +71,7 @@ enum opal_response_token {
  #define SHORT_ATOM_BYTE  0xBF
  #define MEDIUM_ATOM_BYTE 0xDF
  #define LONG_ATOM_BYTE   0xE3
+#define EMPTY_ATOM_BYTE  0xFF
  
  #define OPAL_INVAL_PARAM 12
  #define OPAL_MANUFACTURED_INACTIVE 0x08
diff --git a/block/sed-opal.c b/block/sed-opal.c

index 3d9e9cd250bd541f3166932bde9e43b35c13f13a..fa4dba5d85319e49a3bb411a7c78a6deed65d3e9 100644 (file)
--- a/block/sed-opal.c
+++ b/block/sed-opal.c
@@ -1056,16 +1056,20 @@ static int response_parse(const u8 *buf, size_t length,
                         token_length = response_parse_medium(iter, pos);
                 else if (pos[0] <= LONG_ATOM_BYTE) /* long atom */
                         token_length = response_parse_long(iter, pos);
+               else if (pos[0] == EMPTY_ATOM_BYTE) /* empty atom */
+                       token_length = 1;
                 else /* TOKEN */
                         token_length = response_parse_token(iter, pos);
  
                 if (token_length < 0)
                         return token_length;
  
+               if (pos[0] != EMPTY_ATOM_BYTE)
+                       num_entries++;
+
                 pos += token_length;
                 total -= token_length;
                 iter++;
-               num_entries++;
         }
  
         resp->num = num_entries;
diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c

index 82c44d4899b9676d4d43c2f2af7fd9f95758b894..e24c829d7a0154f0ff016152e6913bff105cd93f 100644 (file)
--- a/crypto/algif_hash.c
+++ b/crypto/algif_hash.c
@@ -91,13 +91,13 @@ static int hash_sendmsg(struct socket *sock, struct msghdr *msg,
                 if (!(msg->msg_flags & MSG_MORE)) {
                         err = hash_alloc_result(sk, ctx);
                         if (err)
-                               goto unlock_free;
+                               goto unlock_free_result;
                         ahash_request_set_crypt(&ctx->req, NULL,
                                                 ctx->result, 0);
                         err = crypto_wait_req(crypto_ahash_final(&ctx->req),
                                               &ctx->wait);
                         if (err)
-                               goto unlock_free;
+                               goto unlock_free_result;
                 }
                 goto done_more;
         }
@@ -170,6 +170,7 @@ unlock:
  
  unlock_free:
         af_alg_free_sg(&ctx->sgl);
+unlock_free_result:
         hash_free_result(sk, ctx);
         ctx->more = false;
         goto unlock;
diff --git a/crypto/cbc.c b/crypto/cbc.c

index eedddef9ce40cc40fa7a3c2cd3bcca7607be491b..e81918ca68b782c881bf6f868b441281e249e7f4 100644 (file)
--- a/crypto/cbc.c
+++ b/crypto/cbc.c
@@ -148,6 +148,9 @@ static int crypto_cbc_create(struct crypto_template *tmpl, struct rtattr **tb)
         if (!is_power_of_2(inst->alg.co.base.cra_blocksize))
                 goto out_free_inst;
  
+       if (inst->alg.co.statesize)
+               goto out_free_inst;
+
         inst->alg.encrypt = crypto_cbc_encrypt;
         inst->alg.decrypt = crypto_cbc_decrypt;
  
diff --git a/crypto/lskcipher.c b/crypto/lskcipher.c

index 0b6dd8aa21f2edace686fb5531705698e7acc18d..0f1bd7dcde245988bb7d01dc9d0e32655669bdf8 100644 (file)
--- a/crypto/lskcipher.c
+++ b/crypto/lskcipher.c
@@ -212,13 +212,12 @@ static int crypto_lskcipher_crypt_sg(struct skcipher_request *req,
  
         ivsize = crypto_lskcipher_ivsize(tfm);
         ivs = PTR_ALIGN(ivs, crypto_skcipher_alignmask(skcipher) + 1);
+       memcpy(ivs, req->iv, ivsize);
  
         flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP;
  
         if (req->base.flags & CRYPTO_SKCIPHER_REQ_CONT)
                 flags |= CRYPTO_LSKCIPHER_FLAG_CONT;
-       else
-               memcpy(ivs, req->iv, ivsize);
  
         if (!(req->base.flags & CRYPTO_SKCIPHER_REQ_NOTFINAL))
                 flags |= CRYPTO_LSKCIPHER_FLAG_FINAL;
@@ -234,8 +233,7 @@ static int crypto_lskcipher_crypt_sg(struct skcipher_request *req,
                 flags |= CRYPTO_LSKCIPHER_FLAG_CONT;
         }
  
-       if (flags & CRYPTO_LSKCIPHER_FLAG_FINAL)
-               memcpy(req->iv, ivs, ivsize);
+       memcpy(req->iv, ivs, ivsize);
  
         return err;
  }
diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c

index 9418c73ee8ef8ba025ef896ffe218b61b8058f75..4b06402269869335c324770fc572a57bea7316f7 100644 (file)
--- a/drivers/accel/ivpu/ivpu_drv.c
+++ b/drivers/accel/ivpu/ivpu_drv.c
@@ -480,9 +480,8 @@ static int ivpu_pci_init(struct ivpu_device *vdev)
         /* Clear any pending errors */
         pcie_capability_clear_word(pdev, PCI_EXP_DEVSTA, 0x3f);
  
-       /* VPU 37XX does not require 10m D3hot delay */
-       if (ivpu_hw_gen(vdev) == IVPU_HW_37XX)
-               pdev->d3hot_delay = 0;
+       /* NPU does not require 10m D3hot delay */
+       pdev->d3hot_delay = 0;
  
         ret = pcim_enable_device(pdev);
         if (ret) {
diff --git a/drivers/accel/ivpu/ivpu_fw.c b/drivers/accel/ivpu/ivpu_fw.c

index 6576232f3e678ee7c2532b07c830b74733c06960..5fa8bd4603d5be6f1fba8c43ba058e6a9b4f3676 100644 (file)
--- a/drivers/accel/ivpu/ivpu_fw.c
+++ b/drivers/accel/ivpu/ivpu_fw.c
@@ -222,7 +222,6 @@ ivpu_fw_init_wa(struct ivpu_device *vdev)
         const struct vpu_firmware_header *fw_hdr = (const void *)vdev->fw->file->data;
  
         if (IVPU_FW_CHECK_API_VER_LT(vdev, fw_hdr, BOOT, 3, 17) ||
-           (ivpu_hw_gen(vdev) > IVPU_HW_37XX) ||
             (ivpu_test_mode & IVPU_TEST_MODE_D0I3_MSG_DISABLE))
                 vdev->wa.disable_d0i3_msg = true;
  
diff --git a/drivers/accel/ivpu/ivpu_hw_37xx.c b/drivers/accel/ivpu/ivpu_hw_37xx.c

index f15a93d83057822f2414aedb22359063fb99c6a1..89af1006df5587ba560415a7d669251648f3c3e8 100644 (file)
--- a/drivers/accel/ivpu/ivpu_hw_37xx.c
+++ b/drivers/accel/ivpu/ivpu_hw_37xx.c
@@ -510,22 +510,12 @@ static int ivpu_boot_pwr_domain_enable(struct ivpu_device *vdev)
         return ret;
  }
  
-static int ivpu_boot_pwr_domain_disable(struct ivpu_device *vdev)
-{
-       ivpu_boot_dpu_active_drive(vdev, false);
-       ivpu_boot_pwr_island_isolation_drive(vdev, true);
-       ivpu_boot_pwr_island_trickle_drive(vdev, false);
-       ivpu_boot_pwr_island_drive(vdev, false);
-
-       return ivpu_boot_wait_for_pwr_island_status(vdev, 0x0);
-}
-
  static void ivpu_boot_no_snoop_enable(struct ivpu_device *vdev)
  {
         u32 val = REGV_RD32(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES);
  
         val = REG_SET_FLD(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES, NOSNOOP_OVERRIDE_EN, val);
-       val = REG_SET_FLD(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES, AW_NOSNOOP_OVERRIDE, val);
+       val = REG_CLR_FLD(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES, AW_NOSNOOP_OVERRIDE, val);
         val = REG_SET_FLD(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES, AR_NOSNOOP_OVERRIDE, val);
  
         REGV_WR32(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES, val);
@@ -616,12 +606,37 @@ static int ivpu_hw_37xx_info_init(struct ivpu_device *vdev)
         return 0;
  }
  
+static int ivpu_hw_37xx_ip_reset(struct ivpu_device *vdev)
+{
+       int ret;
+       u32 val;
+
+       if (IVPU_WA(punit_disabled))
+               return 0;
+
+       ret = REGB_POLL_FLD(VPU_37XX_BUTTRESS_VPU_IP_RESET, TRIGGER, 0, TIMEOUT_US);
+       if (ret) {
+               ivpu_err(vdev, "Timed out waiting for TRIGGER bit\n");
+               return ret;
+       }
+
+       val = REGB_RD32(VPU_37XX_BUTTRESS_VPU_IP_RESET);
+       val = REG_SET_FLD(VPU_37XX_BUTTRESS_VPU_IP_RESET, TRIGGER, val);
+       REGB_WR32(VPU_37XX_BUTTRESS_VPU_IP_RESET, val);
+
+       ret = REGB_POLL_FLD(VPU_37XX_BUTTRESS_VPU_IP_RESET, TRIGGER, 0, TIMEOUT_US);
+       if (ret)
+               ivpu_err(vdev, "Timed out waiting for RESET completion\n");
+
+       return ret;
+}
+
  static int ivpu_hw_37xx_reset(struct ivpu_device *vdev)
  {
         int ret = 0;
  
-       if (ivpu_boot_pwr_domain_disable(vdev)) {
-               ivpu_err(vdev, "Failed to disable power domain\n");
+       if (ivpu_hw_37xx_ip_reset(vdev)) {
+               ivpu_err(vdev, "Failed to reset NPU\n");
                 ret = -EIO;
         }
  
@@ -661,6 +676,11 @@ static int ivpu_hw_37xx_power_up(struct ivpu_device *vdev)
  {
         int ret;
  
+       /* PLL requests may fail when powering down, so issue WP 0 here */
+       ret = ivpu_pll_disable(vdev);
+       if (ret)
+               ivpu_warn(vdev, "Failed to disable PLL: %d\n", ret);
+
         ret = ivpu_hw_37xx_d0i3_disable(vdev);
         if (ret)
                 ivpu_warn(vdev, "Failed to disable D0I3: %d\n", ret);
diff --git a/drivers/accel/ivpu/ivpu_hw_40xx.c b/drivers/accel/ivpu/ivpu_hw_40xx.c

index 704288084f37379eb6c7a1a5beb8f58ec3cea4a2..a1523d0b1ef3660709ae087003a703fb4f8237bd 100644 (file)
--- a/drivers/accel/ivpu/ivpu_hw_40xx.c
+++ b/drivers/accel/ivpu/ivpu_hw_40xx.c
@@ -24,7 +24,7 @@
  #define SKU_HW_ID_SHIFT              16u
  #define SKU_HW_ID_MASK               0xffff0000u
  
-#define PLL_CONFIG_DEFAULT           0x1
+#define PLL_CONFIG_DEFAULT           0x0
  #define PLL_CDYN_DEFAULT             0x80
  #define PLL_EPP_DEFAULT              0x80
  #define PLL_REF_CLK_FREQ            (50 * 1000000)
@@ -530,7 +530,7 @@ static void ivpu_boot_no_snoop_enable(struct ivpu_device *vdev)
         u32 val = REGV_RD32(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES);
  
         val = REG_SET_FLD(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES, SNOOP_OVERRIDE_EN, val);
-       val = REG_CLR_FLD(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES, AW_SNOOP_OVERRIDE, val);
+       val = REG_SET_FLD(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES, AW_SNOOP_OVERRIDE, val);
         val = REG_CLR_FLD(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES, AR_SNOOP_OVERRIDE, val);
  
         REGV_WR32(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES, val);
@@ -704,7 +704,6 @@ static int ivpu_hw_40xx_info_init(struct ivpu_device *vdev)
  {
         struct ivpu_hw_info *hw = vdev->hw;
         u32 tile_disable;
-       u32 tile_enable;
         u32 fuse;
  
         fuse = REGB_RD32(VPU_40XX_BUTTRESS_TILE_FUSE);
@@ -725,10 +724,6 @@ static int ivpu_hw_40xx_info_init(struct ivpu_device *vdev)
         else
                 ivpu_dbg(vdev, MISC, "Fuse: All %d tiles enabled\n", TILE_MAX_NUM);
  
-       tile_enable = (~tile_disable) & TILE_MAX_MASK;
-
-       hw->sku = REG_SET_FLD_NUM(SKU, HW_ID, LNL_HW_ID, hw->sku);
-       hw->sku = REG_SET_FLD_NUM(SKU, TILE, tile_enable, hw->sku);
         hw->tile_fuse = tile_disable;
         hw->pll.profiling_freq = PLL_PROFILING_FREQ_DEFAULT;
  
diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c

index 0440bee3ecafd567da6cbf47584f3daa688b618d..e70cfb8593390e489e9f9868fb6c2420733ae241 100644 (file)
--- a/drivers/accel/ivpu/ivpu_job.c
+++ b/drivers/accel/ivpu/ivpu_job.c
@@ -294,7 +294,7 @@ static int ivpu_job_signal_and_destroy(struct ivpu_device *vdev, u32 job_id, u32
                 return -ENOENT;
  
         if (job->file_priv->has_mmu_faults)
-               job_status = VPU_JSM_STATUS_ABORTED;
+               job_status = DRM_IVPU_JOB_STATUS_ABORTED;
  
         job->bos[CMD_BUF_IDX]->job_status = job_status;
         dma_fence_signal(job->done_fence);
@@ -315,7 +315,7 @@ void ivpu_jobs_abort_all(struct ivpu_device *vdev)
         unsigned long id;
  
         xa_for_each(&vdev->submitted_jobs_xa, id, job)
-               ivpu_job_signal_and_destroy(vdev, id, VPU_JSM_STATUS_ABORTED);
+               ivpu_job_signal_and_destroy(vdev, id, DRM_IVPU_JOB_STATUS_ABORTED);
  }
  
  static int ivpu_job_submit(struct ivpu_job *job)
diff --git a/drivers/accel/ivpu/ivpu_mmu.c b/drivers/accel/ivpu/ivpu_mmu.c

index 9a3122ffce03c1dc11311ab36f31f775f4fdf6fe..91bd640655ab363b51df17a25cb9589293adc804 100644 (file)
--- a/drivers/accel/ivpu/ivpu_mmu.c
+++ b/drivers/accel/ivpu/ivpu_mmu.c
@@ -72,10 +72,10 @@
  
  #define IVPU_MMU_Q_COUNT_LOG2          4 /* 16 entries */
  #define IVPU_MMU_Q_COUNT               ((u32)1 << IVPU_MMU_Q_COUNT_LOG2)
-#define IVPU_MMU_Q_WRAP_BIT            (IVPU_MMU_Q_COUNT << 1)
-#define IVPU_MMU_Q_WRAP_MASK           (IVPU_MMU_Q_WRAP_BIT - 1)
-#define IVPU_MMU_Q_IDX_MASK            (IVPU_MMU_Q_COUNT - 1)
+#define IVPU_MMU_Q_WRAP_MASK            GENMASK(IVPU_MMU_Q_COUNT_LOG2, 0)
+#define IVPU_MMU_Q_IDX_MASK             (IVPU_MMU_Q_COUNT - 1)
  #define IVPU_MMU_Q_IDX(val)            ((val) & IVPU_MMU_Q_IDX_MASK)
+#define IVPU_MMU_Q_WRP(val)             ((val) & IVPU_MMU_Q_COUNT)
  
  #define IVPU_MMU_CMDQ_CMD_SIZE         16
  #define IVPU_MMU_CMDQ_SIZE             (IVPU_MMU_Q_COUNT * IVPU_MMU_CMDQ_CMD_SIZE)
@@ -475,20 +475,32 @@ static int ivpu_mmu_cmdq_wait_for_cons(struct ivpu_device *vdev)
         return 0;
  }
  
+static bool ivpu_mmu_queue_is_full(struct ivpu_mmu_queue *q)
+{
+       return ((IVPU_MMU_Q_IDX(q->prod) == IVPU_MMU_Q_IDX(q->cons)) &&
+               (IVPU_MMU_Q_WRP(q->prod) != IVPU_MMU_Q_WRP(q->cons)));
+}
+
+static bool ivpu_mmu_queue_is_empty(struct ivpu_mmu_queue *q)
+{
+       return ((IVPU_MMU_Q_IDX(q->prod) == IVPU_MMU_Q_IDX(q->cons)) &&
+               (IVPU_MMU_Q_WRP(q->prod) == IVPU_MMU_Q_WRP(q->cons)));
+}
+
  static int ivpu_mmu_cmdq_cmd_write(struct ivpu_device *vdev, const char *name, u64 data0, u64 data1)
  {
-       struct ivpu_mmu_queue *q = &vdev->mmu->cmdq;
-       u64 *queue_buffer = q->base;
-       int idx = IVPU_MMU_Q_IDX(q->prod) * (IVPU_MMU_CMDQ_CMD_SIZE / sizeof(*queue_buffer));
+       struct ivpu_mmu_queue *cmdq = &vdev->mmu->cmdq;
+       u64 *queue_buffer = cmdq->base;
+       int idx = IVPU_MMU_Q_IDX(cmdq->prod) * (IVPU_MMU_CMDQ_CMD_SIZE / sizeof(*queue_buffer));
  
-       if (!CIRC_SPACE(IVPU_MMU_Q_IDX(q->prod), IVPU_MMU_Q_IDX(q->cons), IVPU_MMU_Q_COUNT)) {
+       if (ivpu_mmu_queue_is_full(cmdq)) {
                 ivpu_err(vdev, "Failed to write MMU CMD %s\n", name);
                 return -EBUSY;
         }
  
         queue_buffer[idx] = data0;
         queue_buffer[idx + 1] = data1;
-       q->prod = (q->prod + 1) & IVPU_MMU_Q_WRAP_MASK;
+       cmdq->prod = (cmdq->prod + 1) & IVPU_MMU_Q_WRAP_MASK;
  
         ivpu_dbg(vdev, MMU, "CMD write: %s data: 0x%llx 0x%llx\n", name, data0, data1);
  
@@ -560,7 +572,6 @@ static int ivpu_mmu_reset(struct ivpu_device *vdev)
         mmu->cmdq.cons = 0;
  
         memset(mmu->evtq.base, 0, IVPU_MMU_EVTQ_SIZE);
-       clflush_cache_range(mmu->evtq.base, IVPU_MMU_EVTQ_SIZE);
         mmu->evtq.prod = 0;
         mmu->evtq.cons = 0;
  
@@ -874,14 +885,10 @@ static u32 *ivpu_mmu_get_event(struct ivpu_device *vdev)
         u32 *evt = evtq->base + (idx * IVPU_MMU_EVTQ_CMD_SIZE);
  
         evtq->prod = REGV_RD32(IVPU_MMU_REG_EVTQ_PROD_SEC);
-       if (!CIRC_CNT(IVPU_MMU_Q_IDX(evtq->prod), IVPU_MMU_Q_IDX(evtq->cons), IVPU_MMU_Q_COUNT))
+       if (ivpu_mmu_queue_is_empty(evtq))
                 return NULL;
  
-       clflush_cache_range(evt, IVPU_MMU_EVTQ_CMD_SIZE);
-
         evtq->cons = (evtq->cons + 1) & IVPU_MMU_Q_WRAP_MASK;
-       REGV_WR32(IVPU_MMU_REG_EVTQ_CONS_SEC, evtq->cons);
-
         return evt;
  }
  
@@ -902,6 +909,7 @@ void ivpu_mmu_irq_evtq_handler(struct ivpu_device *vdev)
                 }
  
                 ivpu_mmu_user_context_mark_invalid(vdev, ssid);
+               REGV_WR32(IVPU_MMU_REG_EVTQ_CONS_SEC, vdev->mmu->evtq.cons);
         }
  }
  
diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c

index f501f27ebafdf6687b5a46ca7e2387faa931af3e..5f73854234ba93da22b00113376c296df1ebd35a 100644 (file)
--- a/drivers/accel/ivpu/ivpu_pm.c
+++ b/drivers/accel/ivpu/ivpu_pm.c
@@ -58,11 +58,14 @@ static int ivpu_suspend(struct ivpu_device *vdev)
  {
         int ret;
  
+       /* Save PCI state before powering down as it sometimes gets corrupted if NPU hangs */
+       pci_save_state(to_pci_dev(vdev->drm.dev));
+
         ret = ivpu_shutdown(vdev);
-       if (ret) {
+       if (ret)
                 ivpu_err(vdev, "Failed to shutdown VPU: %d\n", ret);
-               return ret;
-       }
+
+       pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D3hot);
  
         return ret;
  }
@@ -71,6 +74,9 @@ static int ivpu_resume(struct ivpu_device *vdev)
  {
         int ret;
  
+       pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D0);
+       pci_restore_state(to_pci_dev(vdev->drm.dev));
+
  retry:
         ret = ivpu_hw_power_up(vdev);
         if (ret) {
@@ -120,15 +126,20 @@ static void ivpu_pm_recovery_work(struct work_struct *work)
  
         ivpu_fw_log_dump(vdev);
  
-retry:
-       ret = pci_try_reset_function(to_pci_dev(vdev->drm.dev));
-       if (ret == -EAGAIN && !drm_dev_is_unplugged(&vdev->drm)) {
-               cond_resched();
-               goto retry;
-       }
+       atomic_inc(&vdev->pm->reset_counter);
+       atomic_set(&vdev->pm->reset_pending, 1);
+       down_write(&vdev->pm->reset_lock);
+
+       ivpu_suspend(vdev);
+       ivpu_pm_prepare_cold_boot(vdev);
+       ivpu_jobs_abort_all(vdev);
+
+       ret = ivpu_resume(vdev);
+       if (ret)
+               ivpu_err(vdev, "Failed to resume NPU: %d\n", ret);
  
-       if (ret && ret != -EAGAIN)
-               ivpu_err(vdev, "Failed to reset VPU: %d\n", ret);
+       up_write(&vdev->pm->reset_lock);
+       atomic_set(&vdev->pm->reset_pending, 0);
  
         kobject_uevent_env(&vdev->drm.dev->kobj, KOBJ_CHANGE, evt);
         pm_runtime_mark_last_busy(vdev->drm.dev);
@@ -200,9 +211,6 @@ int ivpu_pm_suspend_cb(struct device *dev)
         ivpu_suspend(vdev);
         ivpu_pm_prepare_warm_boot(vdev);
  
-       pci_save_state(to_pci_dev(dev));
-       pci_set_power_state(to_pci_dev(dev), PCI_D3hot);
-
         ivpu_dbg(vdev, PM, "Suspend done.\n");
  
         return 0;
@@ -216,9 +224,6 @@ int ivpu_pm_resume_cb(struct device *dev)
  
         ivpu_dbg(vdev, PM, "Resume..\n");
  
-       pci_set_power_state(to_pci_dev(dev), PCI_D0);
-       pci_restore_state(to_pci_dev(dev));
-
         ret = ivpu_resume(vdev);
         if (ret)
                 ivpu_err(vdev, "Failed to resume: %d\n", ret);
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c

index 7b7c605166e0c1c7d2a4c9e1f1bce1f05799d4f6..ab2a82cb1b0b48ab21682bdb87c052707f19d282 100644 (file)
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -26,7 +26,6 @@
  #include <linux/interrupt.h>
  #include <linux/timer.h>
  #include <linux/cper.h>
-#include <linux/cxl-event.h>
  #include <linux/platform_device.h>
  #include <linux/mutex.h>
  #include <linux/ratelimit.h>
@@ -674,78 +673,6 @@ static void ghes_defer_non_standard_event(struct acpi_hest_generic_data *gdata,
         schedule_work(&entry->work);
  }
  
-/*
- * Only a single callback can be registered for CXL CPER events.
- */
-static DECLARE_RWSEM(cxl_cper_rw_sem);
-static cxl_cper_callback cper_callback;
-
-/* CXL Event record UUIDs are formatted as GUIDs and reported in section type */
-
-/*
- * General Media Event Record
- * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43
- */
-#define CPER_SEC_CXL_GEN_MEDIA_GUID                                    \
-       GUID_INIT(0xfbcd0a77, 0xc260, 0x417f,                           \
-                 0x85, 0xa9, 0x08, 0x8b, 0x16, 0x21, 0xeb, 0xa6)
-
-/*
- * DRAM Event Record
- * CXL rev 3.0 section 8.2.9.2.1.2; Table 8-44
- */
-#define CPER_SEC_CXL_DRAM_GUID                                         \
-       GUID_INIT(0x601dcbb3, 0x9c06, 0x4eab,                           \
-                 0xb8, 0xaf, 0x4e, 0x9b, 0xfb, 0x5c, 0x96, 0x24)
-
-/*
- * Memory Module Event Record
- * CXL rev 3.0 section 8.2.9.2.1.3; Table 8-45
- */
-#define CPER_SEC_CXL_MEM_MODULE_GUID                                   \
-       GUID_INIT(0xfe927475, 0xdd59, 0x4339,                           \
-                 0xa5, 0x86, 0x79, 0xba, 0xb1, 0x13, 0xb7, 0x74)
-
-static void cxl_cper_post_event(enum cxl_event_type event_type,
-                               struct cxl_cper_event_rec *rec)
-{
-       if (rec->hdr.length <= sizeof(rec->hdr) ||
-           rec->hdr.length > sizeof(*rec)) {
-               pr_err(FW_WARN "CXL CPER Invalid section length (%u)\n",
-                      rec->hdr.length);
-               return;
-       }
-
-       if (!(rec->hdr.validation_bits & CPER_CXL_COMP_EVENT_LOG_VALID)) {
-               pr_err(FW_WARN "CXL CPER invalid event\n");
-               return;
-       }
-
-       guard(rwsem_read)(&cxl_cper_rw_sem);
-       if (cper_callback)
-               cper_callback(event_type, rec);
-}
-
-int cxl_cper_register_callback(cxl_cper_callback callback)
-{
-       guard(rwsem_write)(&cxl_cper_rw_sem);
-       if (cper_callback)
-               return -EINVAL;
-       cper_callback = callback;
-       return 0;
-}
-EXPORT_SYMBOL_NS_GPL(cxl_cper_register_callback, CXL);
-
-int cxl_cper_unregister_callback(cxl_cper_callback callback)
-{
-       guard(rwsem_write)(&cxl_cper_rw_sem);
-       if (callback != cper_callback)
-               return -EINVAL;
-       cper_callback = NULL;
-       return 0;
-}
-EXPORT_SYMBOL_NS_GPL(cxl_cper_unregister_callback, CXL);
-
  static bool ghes_do_proc(struct ghes *ghes,
                          const struct acpi_hest_generic_status *estatus)
  {
@@ -780,22 +707,6 @@ static bool ghes_do_proc(struct ghes *ghes,
                 }
                 else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) {
                         queued = ghes_handle_arm_hw_error(gdata, sev, sync);
-               } else if (guid_equal(sec_type, &CPER_SEC_CXL_GEN_MEDIA_GUID)) {
-                       struct cxl_cper_event_rec *rec =
-                               acpi_hest_get_payload(gdata);
-
-                       cxl_cper_post_event(CXL_CPER_EVENT_GEN_MEDIA, rec);
-               } else if (guid_equal(sec_type, &CPER_SEC_CXL_DRAM_GUID)) {
-                       struct cxl_cper_event_rec *rec =
-                               acpi_hest_get_payload(gdata);
-
-                       cxl_cper_post_event(CXL_CPER_EVENT_DRAM, rec);
-               } else if (guid_equal(sec_type,
-                                     &CPER_SEC_CXL_MEM_MODULE_GUID)) {
-                       struct cxl_cper_event_rec *rec =
-                               acpi_hest_get_payload(gdata);
-
-                       cxl_cper_post_event(CXL_CPER_EVENT_MEM_MODULE, rec);
                 } else {
                         void *err = acpi_hest_get_payload(gdata);
  
diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c

index dbdee2924594a921f27fead574fcf1855c4e471b..02255795b800d1a42ceb7694216d2b6c92594b6b 100644 (file)
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@@ -525,10 +525,12 @@ static void acpi_ec_clear(struct acpi_ec *ec)
  
  static void acpi_ec_enable_event(struct acpi_ec *ec)
  {
-       spin_lock(&ec->lock);
+       unsigned long flags;
+
+       spin_lock_irqsave(&ec->lock, flags);
         if (acpi_ec_started(ec))
                 __acpi_ec_enable_event(ec);
-       spin_unlock(&ec->lock);
+       spin_unlock_irqrestore(&ec->lock, flags);
  
         /* Drain additional events if hardware requires that */
         if (EC_FLAGS_CLEAR_ON_RESUME)
@@ -544,9 +546,11 @@ static void __acpi_ec_flush_work(void)
  
  static void acpi_ec_disable_event(struct acpi_ec *ec)
  {
-       spin_lock(&ec->lock);
+       unsigned long flags;
+
+       spin_lock_irqsave(&ec->lock, flags);
         __acpi_ec_disable_event(ec);
-       spin_unlock(&ec->lock);
+       spin_unlock_irqrestore(&ec->lock, flags);
  
         /*
          * When ec_freeze_events is true, we need to flush events in
@@ -567,9 +571,10 @@ void acpi_ec_flush_work(void)
  
  static bool acpi_ec_guard_event(struct acpi_ec *ec)
  {
+       unsigned long flags;
         bool guarded;
  
-       spin_lock(&ec->lock);
+       spin_lock_irqsave(&ec->lock, flags);
         /*
          * If firmware SCI_EVT clearing timing is "event", we actually
          * don't know when the SCI_EVT will be cleared by firmware after
@@ -585,29 +590,31 @@ static bool acpi_ec_guard_event(struct acpi_ec *ec)
         guarded = ec_event_clearing == ACPI_EC_EVT_TIMING_EVENT &&
                 ec->event_state != EC_EVENT_READY &&
                 (!ec->curr || ec->curr->command != ACPI_EC_COMMAND_QUERY);
-       spin_unlock(&ec->lock);
+       spin_unlock_irqrestore(&ec->lock, flags);
         return guarded;
  }
  
  static int ec_transaction_polled(struct acpi_ec *ec)
  {
+       unsigned long flags;
         int ret = 0;
  
-       spin_lock(&ec->lock);
+       spin_lock_irqsave(&ec->lock, flags);
         if (ec->curr && (ec->curr->flags & ACPI_EC_COMMAND_POLL))
                 ret = 1;
-       spin_unlock(&ec->lock);
+       spin_unlock_irqrestore(&ec->lock, flags);
         return ret;
  }
  
  static int ec_transaction_completed(struct acpi_ec *ec)
  {
+       unsigned long flags;
         int ret = 0;
  
-       spin_lock(&ec->lock);
+       spin_lock_irqsave(&ec->lock, flags);
         if (ec->curr && (ec->curr->flags & ACPI_EC_COMMAND_COMPLETE))
                 ret = 1;
-       spin_unlock(&ec->lock);
+       spin_unlock_irqrestore(&ec->lock, flags);
         return ret;
  }
  
@@ -749,6 +756,7 @@ static int ec_guard(struct acpi_ec *ec)
  
  static int ec_poll(struct acpi_ec *ec)
  {
+       unsigned long flags;
         int repeat = 5; /* number of command restarts */
  
         while (repeat--) {
@@ -757,14 +765,14 @@ static int ec_poll(struct acpi_ec *ec)
                 do {
                         if (!ec_guard(ec))
                                 return 0;
-                       spin_lock(&ec->lock);
+                       spin_lock_irqsave(&ec->lock, flags);
                         advance_transaction(ec, false);
-                       spin_unlock(&ec->lock);
+                       spin_unlock_irqrestore(&ec->lock, flags);
                 } while (time_before(jiffies, delay));
                 pr_debug("controller reset, restart transaction\n");
-               spin_lock(&ec->lock);
+               spin_lock_irqsave(&ec->lock, flags);
                 start_transaction(ec);
-               spin_unlock(&ec->lock);
+               spin_unlock_irqrestore(&ec->lock, flags);
         }
         return -ETIME;
  }
@@ -772,10 +780,11 @@ static int ec_poll(struct acpi_ec *ec)
  static int acpi_ec_transaction_unlocked(struct acpi_ec *ec,
                                         struct transaction *t)
  {
+       unsigned long tmp;
         int ret = 0;
  
         /* start transaction */
-       spin_lock(&ec->lock);
+       spin_lock_irqsave(&ec->lock, tmp);
         /* Enable GPE for command processing (IBF=0/OBF=1) */
         if (!acpi_ec_submit_flushable_request(ec)) {
                 ret = -EINVAL;
@@ -786,11 +795,11 @@ static int acpi_ec_transaction_unlocked(struct acpi_ec *ec,
         ec->curr = t;
         ec_dbg_req("Command(%s) started", acpi_ec_cmd_string(t->command));
         start_transaction(ec);
-       spin_unlock(&ec->lock);
+       spin_unlock_irqrestore(&ec->lock, tmp);
  
         ret = ec_poll(ec);
  
-       spin_lock(&ec->lock);
+       spin_lock_irqsave(&ec->lock, tmp);
         if (t->irq_count == ec_storm_threshold)
                 acpi_ec_unmask_events(ec);
         ec_dbg_req("Command(%s) stopped", acpi_ec_cmd_string(t->command));
@@ -799,7 +808,7 @@ static int acpi_ec_transaction_unlocked(struct acpi_ec *ec,
         acpi_ec_complete_request(ec);
         ec_dbg_ref(ec, "Decrease command");
  unlock:
-       spin_unlock(&ec->lock);
+       spin_unlock_irqrestore(&ec->lock, tmp);
         return ret;
  }
  
@@ -927,7 +936,9 @@ EXPORT_SYMBOL(ec_get_handle);
  
  static void acpi_ec_start(struct acpi_ec *ec, bool resuming)
  {
-       spin_lock(&ec->lock);
+       unsigned long flags;
+
+       spin_lock_irqsave(&ec->lock, flags);
         if (!test_and_set_bit(EC_FLAGS_STARTED, &ec->flags)) {
                 ec_dbg_drv("Starting EC");
                 /* Enable GPE for event processing (SCI_EVT=1) */
@@ -937,28 +948,31 @@ static void acpi_ec_start(struct acpi_ec *ec, bool resuming)
                 }
                 ec_log_drv("EC started");
         }
-       spin_unlock(&ec->lock);
+       spin_unlock_irqrestore(&ec->lock, flags);
  }
  
  static bool acpi_ec_stopped(struct acpi_ec *ec)
  {
+       unsigned long flags;
         bool flushed;
  
-       spin_lock(&ec->lock);
+       spin_lock_irqsave(&ec->lock, flags);
         flushed = acpi_ec_flushed(ec);
-       spin_unlock(&ec->lock);
+       spin_unlock_irqrestore(&ec->lock, flags);
         return flushed;
  }
  
  static void acpi_ec_stop(struct acpi_ec *ec, bool suspending)
  {
-       spin_lock(&ec->lock);
+       unsigned long flags;
+
+       spin_lock_irqsave(&ec->lock, flags);
         if (acpi_ec_started(ec)) {
                 ec_dbg_drv("Stopping EC");
                 set_bit(EC_FLAGS_STOPPED, &ec->flags);
-               spin_unlock(&ec->lock);
+               spin_unlock_irqrestore(&ec->lock, flags);
                 wait_event(ec->wait, acpi_ec_stopped(ec));
-               spin_lock(&ec->lock);
+               spin_lock_irqsave(&ec->lock, flags);
                 /* Disable GPE for event processing (SCI_EVT=1) */
                 if (!suspending) {
                         acpi_ec_complete_request(ec);
@@ -969,25 +983,29 @@ static void acpi_ec_stop(struct acpi_ec *ec, bool suspending)
                 clear_bit(EC_FLAGS_STOPPED, &ec->flags);
                 ec_log_drv("EC stopped");
         }
-       spin_unlock(&ec->lock);
+       spin_unlock_irqrestore(&ec->lock, flags);
  }
  
  static void acpi_ec_enter_noirq(struct acpi_ec *ec)
  {
-       spin_lock(&ec->lock);
+       unsigned long flags;
+
+       spin_lock_irqsave(&ec->lock, flags);
         ec->busy_polling = true;
         ec->polling_guard = 0;
         ec_log_drv("interrupt blocked");
-       spin_unlock(&ec->lock);
+       spin_unlock_irqrestore(&ec->lock, flags);
  }
  
  static void acpi_ec_leave_noirq(struct acpi_ec *ec)
  {
-       spin_lock(&ec->lock);
+       unsigned long flags;
+
+       spin_lock_irqsave(&ec->lock, flags);
         ec->busy_polling = ec_busy_polling;
         ec->polling_guard = ec_polling_guard;
         ec_log_drv("interrupt unblocked");
-       spin_unlock(&ec->lock);
+       spin_unlock_irqrestore(&ec->lock, flags);
  }
  
  void acpi_ec_block_transactions(void)
@@ -1119,9 +1137,9 @@ static void acpi_ec_event_processor(struct work_struct *work)
  
         ec_dbg_evt("Query(0x%02x) stopped", handler->query_bit);
  
-       spin_lock(&ec->lock);
+       spin_lock_irq(&ec->lock);
         ec->queries_in_progress--;
-       spin_unlock(&ec->lock);
+       spin_unlock_irq(&ec->lock);
  
         acpi_ec_put_query_handler(handler);
         kfree(q);
@@ -1184,12 +1202,12 @@ static int acpi_ec_submit_query(struct acpi_ec *ec)
          */
         ec_dbg_evt("Query(0x%02x) scheduled", value);
  
-       spin_lock(&ec->lock);
+       spin_lock_irq(&ec->lock);
  
         ec->queries_in_progress++;
         queue_work(ec_query_wq, &q->work);
  
-       spin_unlock(&ec->lock);
+       spin_unlock_irq(&ec->lock);
  
         return 0;
  
@@ -1205,14 +1223,14 @@ static void acpi_ec_event_handler(struct work_struct *work)
  
         ec_dbg_evt("Event started");
  
-       spin_lock(&ec->lock);
+       spin_lock_irq(&ec->lock);
  
         while (ec->events_to_process) {
-               spin_unlock(&ec->lock);
+               spin_unlock_irq(&ec->lock);
  
                 acpi_ec_submit_query(ec);
  
-               spin_lock(&ec->lock);
+               spin_lock_irq(&ec->lock);
  
                 ec->events_to_process--;
         }
@@ -1229,11 +1247,11 @@ static void acpi_ec_event_handler(struct work_struct *work)
  
                 ec_dbg_evt("Event stopped");
  
-               spin_unlock(&ec->lock);
+               spin_unlock_irq(&ec->lock);
  
                 guard_timeout = !!ec_guard(ec);
  
-               spin_lock(&ec->lock);
+               spin_lock_irq(&ec->lock);
  
                 /* Take care of SCI_EVT unless someone else is doing that. */
                 if (guard_timeout && !ec->curr)
@@ -1246,7 +1264,7 @@ static void acpi_ec_event_handler(struct work_struct *work)
  
         ec->events_in_progress--;
  
-       spin_unlock(&ec->lock);
+       spin_unlock_irq(&ec->lock);
  }
  
  static void clear_gpe_and_advance_transaction(struct acpi_ec *ec, bool interrupt)
@@ -1271,11 +1289,13 @@ static void clear_gpe_and_advance_transaction(struct acpi_ec *ec, bool interrupt
  
  static void acpi_ec_handle_interrupt(struct acpi_ec *ec)
  {
-       spin_lock(&ec->lock);
+       unsigned long flags;
+
+       spin_lock_irqsave(&ec->lock, flags);
  
         clear_gpe_and_advance_transaction(ec, true);
  
-       spin_unlock(&ec->lock);
+       spin_unlock_irqrestore(&ec->lock, flags);
  }
  
  static u32 acpi_ec_gpe_handler(acpi_handle gpe_device,
@@ -2085,7 +2105,7 @@ bool acpi_ec_dispatch_gpe(void)
          * Dispatch the EC GPE in-band, but do not report wakeup in any case
          * to allow the caller to process events properly after that.
          */
-       spin_lock(&first_ec->lock);
+       spin_lock_irq(&first_ec->lock);
  
         if (acpi_ec_gpe_status_set(first_ec)) {
                 pm_pr_dbg("ACPI EC GPE status set\n");
@@ -2094,7 +2114,7 @@ bool acpi_ec_dispatch_gpe(void)
                 work_in_progress = acpi_ec_work_in_progress(first_ec);
         }
  
-       spin_unlock(&first_ec->lock);
+       spin_unlock_irq(&first_ec->lock);
  
         if (!work_in_progress)
                 return false;
@@ -2107,11 +2127,11 @@ bool acpi_ec_dispatch_gpe(void)
  
                 pm_pr_dbg("ACPI EC work flushed\n");
  
-               spin_lock(&first_ec->lock);
+               spin_lock_irq(&first_ec->lock);
  
                 work_in_progress = acpi_ec_work_in_progress(first_ec);
  
-               spin_unlock(&first_ec->lock);
+               spin_unlock_irq(&first_ec->lock);
         } while (work_in_progress && !pm_wakeup_pending());
  
         return false;
diff --git a/drivers/android/binder.c b/drivers/android/binder.c

index 8dd23b19e99731ce1ce5f5d6062366aece0dd415..eca24f41556df04ac61747e05aace9622fbcc580 100644 (file)
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -478,6 +478,16 @@ binder_enqueue_thread_work_ilocked(struct binder_thread *thread,
  {
         WARN_ON(!list_empty(&thread->waiting_thread_node));
         binder_enqueue_work_ilocked(work, &thread->todo);
+
+       /* (e)poll-based threads require an explicit wakeup signal when
+        * queuing their own work; they rely on these events to consume
+        * messages without I/O block. Without it, threads risk waiting
+        * indefinitely without handling the work.
+        */
+       if (thread->looper & BINDER_LOOPER_STATE_POLL &&
+           thread->pid == current->pid && !thread->process_todo)
+               wake_up_interruptible_sync(&thread->wait);
+
         thread->process_todo = true;
  }
  
diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c

index d2460fa985b7e3b53cf71bc154d613c715ccf2fc..682ff550ccfb98381515b4821594176f5561f869 100644 (file)
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -606,13 +606,13 @@ static const struct pci_device_id ahci_pci_tbl[] = {
         { PCI_VDEVICE(PROMISE, 0x3781), board_ahci },   /* FastTrak TX8660 ahci-mode */
  
         /* ASMedia */
-       { PCI_VDEVICE(ASMEDIA, 0x0601), board_ahci },   /* ASM1060 */
-       { PCI_VDEVICE(ASMEDIA, 0x0602), board_ahci },   /* ASM1060 */
+       { PCI_VDEVICE(ASMEDIA, 0x0601), board_ahci_43bit_dma }, /* ASM1060 */
+       { PCI_VDEVICE(ASMEDIA, 0x0602), board_ahci_43bit_dma }, /* ASM1060 */
         { PCI_VDEVICE(ASMEDIA, 0x0611), board_ahci_43bit_dma }, /* ASM1061 */
         { PCI_VDEVICE(ASMEDIA, 0x0612), board_ahci_43bit_dma }, /* ASM1061/1062 */
-       { PCI_VDEVICE(ASMEDIA, 0x0621), board_ahci },   /* ASM1061R */
-       { PCI_VDEVICE(ASMEDIA, 0x0622), board_ahci },   /* ASM1062R */
-       { PCI_VDEVICE(ASMEDIA, 0x0624), board_ahci },   /* ASM1062+JMB575 */
+       { PCI_VDEVICE(ASMEDIA, 0x0621), board_ahci_43bit_dma }, /* ASM1061R */
+       { PCI_VDEVICE(ASMEDIA, 0x0622), board_ahci_43bit_dma }, /* ASM1062R */
+       { PCI_VDEVICE(ASMEDIA, 0x0624), board_ahci_43bit_dma }, /* ASM1062+JMB575 */
         { PCI_VDEVICE(ASMEDIA, 0x1062), board_ahci },   /* ASM1062A */
         { PCI_VDEVICE(ASMEDIA, 0x1064), board_ahci },   /* ASM1064 */
         { PCI_VDEVICE(ASMEDIA, 0x1164), board_ahci },   /* ASM1164 */
@@ -671,9 +671,17 @@ MODULE_PARM_DESC(mobile_lpm_policy, "Default LPM policy for mobile chipsets");
  static void ahci_pci_save_initial_config(struct pci_dev *pdev,
                                          struct ahci_host_priv *hpriv)
  {
-       if (pdev->vendor == PCI_VENDOR_ID_ASMEDIA && pdev->device == 0x1166) {
-               dev_info(&pdev->dev, "ASM1166 has only six ports\n");
-               hpriv->saved_port_map = 0x3f;
+       if (pdev->vendor == PCI_VENDOR_ID_ASMEDIA) {
+               switch (pdev->device) {
+               case 0x1166:
+                       dev_info(&pdev->dev, "ASM1166 has only six ports\n");
+                       hpriv->saved_port_map = 0x3f;
+                       break;
+               case 0x1064:
+                       dev_info(&pdev->dev, "ASM1064 has only four ports\n");
+                       hpriv->saved_port_map = 0xf;
+                       break;
+               }
         }
  
         if (pdev->vendor == PCI_VENDOR_ID_JMICRON && pdev->device == 0x2361) {
diff --git a/drivers/ata/ahci_ceva.c b/drivers/ata/ahci_ceva.c

index 64f7f7d6ba84e07c2f2db2fbbdfb3d315f821ec2..11a2c199a7c24628e858f2fc8e88e69a60c8b94b 100644 (file)
--- a/drivers/ata/ahci_ceva.c
+++ b/drivers/ata/ahci_ceva.c
@@ -88,7 +88,6 @@ struct ceva_ahci_priv {
         u32 axicc;
         bool is_cci_enabled;
         int flags;
-       struct reset_control *rst;
  };
  
  static unsigned int ceva_ahci_read_id(struct ata_device *dev,
@@ -189,6 +188,60 @@ static const struct scsi_host_template ahci_platform_sht = {
         AHCI_SHT(DRV_NAME),
  };
  
+static int ceva_ahci_platform_enable_resources(struct ahci_host_priv *hpriv)
+{
+       int rc, i;
+
+       rc = ahci_platform_enable_regulators(hpriv);
+       if (rc)
+               return rc;
+
+       rc = ahci_platform_enable_clks(hpriv);
+       if (rc)
+               goto disable_regulator;
+
+       /* Assert the controller reset */
+       rc = ahci_platform_assert_rsts(hpriv);
+       if (rc)
+               goto disable_clks;
+
+       for (i = 0; i < hpriv->nports; i++) {
+               rc = phy_init(hpriv->phys[i]);
+               if (rc)
+                       goto disable_rsts;
+       }
+
+       /* De-assert the controller reset */
+       ahci_platform_deassert_rsts(hpriv);
+
+       for (i = 0; i < hpriv->nports; i++) {
+               rc = phy_power_on(hpriv->phys[i]);
+               if (rc) {
+                       phy_exit(hpriv->phys[i]);
+                       goto disable_phys;
+               }
+       }
+
+       return 0;
+
+disable_rsts:
+       ahci_platform_deassert_rsts(hpriv);
+
+disable_phys:
+       while (--i >= 0) {
+               phy_power_off(hpriv->phys[i]);
+               phy_exit(hpriv->phys[i]);
+       }
+
+disable_clks:
+       ahci_platform_disable_clks(hpriv);
+
+disable_regulator:
+       ahci_platform_disable_regulators(hpriv);
+
+       return rc;
+}
+
  static int ceva_ahci_probe(struct platform_device *pdev)
  {
         struct device_node *np = pdev->dev.of_node;
@@ -203,47 +256,19 @@ static int ceva_ahci_probe(struct platform_device *pdev)
                 return -ENOMEM;
  
         cevapriv->ahci_pdev = pdev;
-
-       cevapriv->rst = devm_reset_control_get_optional_exclusive(&pdev->dev,
-                                                                 NULL);
-       if (IS_ERR(cevapriv->rst))
-               dev_err_probe(&pdev->dev, PTR_ERR(cevapriv->rst),
-                             "failed to get reset\n");
-
         hpriv = ahci_platform_get_resources(pdev, 0);
         if (IS_ERR(hpriv))
                 return PTR_ERR(hpriv);
  
-       if (!cevapriv->rst) {
-               rc = ahci_platform_enable_resources(hpriv);
-               if (rc)
-                       return rc;
-       } else {
-               int i;
+       hpriv->rsts = devm_reset_control_get_optional_exclusive(&pdev->dev,
+                                                               NULL);
+       if (IS_ERR(hpriv->rsts))
+               return dev_err_probe(&pdev->dev, PTR_ERR(hpriv->rsts),
+                                    "failed to get reset\n");
  
-               rc = ahci_platform_enable_clks(hpriv);
-               if (rc)
-                       return rc;
-               /* Assert the controller reset */
-               reset_control_assert(cevapriv->rst);
-
-               for (i = 0; i < hpriv->nports; i++) {
-                       rc = phy_init(hpriv->phys[i]);
-                       if (rc)
-                               return rc;
-               }
-
-               /* De-assert the controller reset */
-               reset_control_deassert(cevapriv->rst);
-
-               for (i = 0; i < hpriv->nports; i++) {
-                       rc = phy_power_on(hpriv->phys[i]);
-                       if (rc) {
-                               phy_exit(hpriv->phys[i]);
-                               return rc;
-                       }
-               }
-       }
+       rc = ceva_ahci_platform_enable_resources(hpriv);
+       if (rc)
+               return rc;
  
         if (of_property_read_bool(np, "ceva,broken-gen2"))
                 cevapriv->flags = CEVA_FLAG_BROKEN_GEN2;
@@ -252,52 +277,60 @@ static int ceva_ahci_probe(struct platform_device *pdev)
         if (of_property_read_u8_array(np, "ceva,p0-cominit-params",
                                         (u8 *)&cevapriv->pp2c[0], 4) < 0) {
                 dev_warn(dev, "ceva,p0-cominit-params property not defined\n");
-               return -EINVAL;
+               rc = -EINVAL;
+               goto disable_resources;
         }
  
         if (of_property_read_u8_array(np, "ceva,p1-cominit-params",
                                         (u8 *)&cevapriv->pp2c[1], 4) < 0) {
                 dev_warn(dev, "ceva,p1-cominit-params property not defined\n");
-               return -EINVAL;
+               rc = -EINVAL;
+               goto disable_resources;
         }
  
         /* Read OOB timing value for COMWAKE from device-tree*/
         if (of_property_read_u8_array(np, "ceva,p0-comwake-params",
                                         (u8 *)&cevapriv->pp3c[0], 4) < 0) {
                 dev_warn(dev, "ceva,p0-comwake-params property not defined\n");
-               return -EINVAL;
+               rc = -EINVAL;
+               goto disable_resources;
         }
  
         if (of_property_read_u8_array(np, "ceva,p1-comwake-params",
                                         (u8 *)&cevapriv->pp3c[1], 4) < 0) {
                 dev_warn(dev, "ceva,p1-comwake-params property not defined\n");
-               return -EINVAL;
+               rc = -EINVAL;
+               goto disable_resources;
         }
  
         /* Read phy BURST timing value from device-tree */
         if (of_property_read_u8_array(np, "ceva,p0-burst-params",
                                         (u8 *)&cevapriv->pp4c[0], 4) < 0) {
                 dev_warn(dev, "ceva,p0-burst-params property not defined\n");
-               return -EINVAL;
+               rc = -EINVAL;
+               goto disable_resources;
         }
  
         if (of_property_read_u8_array(np, "ceva,p1-burst-params",
                                         (u8 *)&cevapriv->pp4c[1], 4) < 0) {
                 dev_warn(dev, "ceva,p1-burst-params property not defined\n");
-               return -EINVAL;
+               rc = -EINVAL;
+               goto disable_resources;
         }
  
         /* Read phy RETRY interval timing value from device-tree */
         if (of_property_read_u16_array(np, "ceva,p0-retry-params",
                                         (u16 *)&cevapriv->pp5c[0], 2) < 0) {
                 dev_warn(dev, "ceva,p0-retry-params property not defined\n");
-               return -EINVAL;
+               rc = -EINVAL;
+               goto disable_resources;
         }
  
         if (of_property_read_u16_array(np, "ceva,p1-retry-params",
                                         (u16 *)&cevapriv->pp5c[1], 2) < 0) {
                 dev_warn(dev, "ceva,p1-retry-params property not defined\n");
-               return -EINVAL;
+               rc = -EINVAL;
+               goto disable_resources;
         }
  
         /*
@@ -335,7 +368,7 @@ static int __maybe_unused ceva_ahci_resume(struct device *dev)
         struct ahci_host_priv *hpriv = host->private_data;
         int rc;
  
-       rc = ahci_platform_enable_resources(hpriv);
+       rc = ceva_ahci_platform_enable_resources(hpriv);
         if (rc)
                 return rc;
  
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c

index 09ed67772fae492323361ab7e94f8a8d4345d2e8..be3412cdb22e78a1d663337698f07b07c66727e4 100644 (file)
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -2001,6 +2001,33 @@ bool ata_dev_power_init_tf(struct ata_device *dev, struct ata_taskfile *tf,
         return true;
  }
  
+static bool ata_dev_power_is_active(struct ata_device *dev)
+{
+       struct ata_taskfile tf;
+       unsigned int err_mask;
+
+       ata_tf_init(dev, &tf);
+       tf.flags |= ATA_TFLAG_DEVICE | ATA_TFLAG_ISADDR;
+       tf.protocol = ATA_PROT_NODATA;
+       tf.command = ATA_CMD_CHK_POWER;
+
+       err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0);
+       if (err_mask) {
+               ata_dev_err(dev, "Check power mode failed (err_mask=0x%x)\n",
+                           err_mask);
+               /*
+                * Assume we are in standby mode so that we always force a
+                * spinup in ata_dev_power_set_active().
+                */
+               return false;
+       }
+
+       ata_dev_dbg(dev, "Power mode: 0x%02x\n", tf.nsect);
+
+       /* Active or idle */
+       return tf.nsect == 0xff;
+}
+
  /**
   *     ata_dev_power_set_standby - Set a device power mode to standby
   *     @dev: target device
@@ -2017,6 +2044,11 @@ void ata_dev_power_set_standby(struct ata_device *dev)
         struct ata_taskfile tf;
         unsigned int err_mask;
  
+       /* If the device is already sleeping or in standby, do nothing. */
+       if ((dev->flags & ATA_DFLAG_SLEEPING) ||
+           !ata_dev_power_is_active(dev))
+               return;
+
         /*
          * Some odd clown BIOSes issue spindown on power off (ACPI S4 or S5)
          * causing some drives to spin up and down again. For these, do nothing
@@ -2042,33 +2074,6 @@ void ata_dev_power_set_standby(struct ata_device *dev)
                             err_mask);
  }
  
-static bool ata_dev_power_is_active(struct ata_device *dev)
-{
-       struct ata_taskfile tf;
-       unsigned int err_mask;
-
-       ata_tf_init(dev, &tf);
-       tf.flags |= ATA_TFLAG_DEVICE | ATA_TFLAG_ISADDR;
-       tf.protocol = ATA_PROT_NODATA;
-       tf.command = ATA_CMD_CHK_POWER;
-
-       err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0);
-       if (err_mask) {
-               ata_dev_err(dev, "Check power mode failed (err_mask=0x%x)\n",
-                           err_mask);
-               /*
-                * Assume we are in standby mode so that we always force a
-                * spinup in ata_dev_power_set_active().
-                */
-               return false;
-       }
-
-       ata_dev_dbg(dev, "Power mode: 0x%02x\n", tf.nsect);
-
-       /* Active or idle */
-       return tf.nsect == 0xff;
-}
-
  /**
   *     ata_dev_power_set_active -  Set a device power mode to active
   *     @dev: target device
diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c

index e327a0229dc173442b2789a402a8ea0adb931cdd..e7f713cd70d3fd7a413c2568b8dca8f8cc8ba2c4 100644 (file)
--- a/drivers/atm/idt77252.c
+++ b/drivers/atm/idt77252.c
@@ -2930,6 +2930,8 @@ open_card_ubr0(struct idt77252_dev *card)
         vc->scq = alloc_scq(card, vc->class);
         if (!vc->scq) {
                 printk("%s: can't get SCQ.\n", card->name);
+               kfree(card->vcs[0]);
+               card->vcs[0] = NULL;
                 return -ENOMEM;
         }
  
diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c

index 018ac202de345e9a97bc7198385c1d95d460eb28..024b78a0cfc11bbba2f0bf3c32f21d55aa101d3d 100644 (file)
--- a/drivers/base/arch_topology.c
+++ b/drivers/base/arch_topology.c
@@ -431,9 +431,6 @@ init_cpu_capacity_callback(struct notifier_block *nb,
         struct cpufreq_policy *policy = data;
         int cpu;
  
-       if (!raw_capacity)
-               return 0;
-
         if (val != CPUFREQ_CREATE_POLICY)
                 return 0;
  
@@ -450,9 +447,11 @@ init_cpu_capacity_callback(struct notifier_block *nb,
         }
  
         if (cpumask_empty(cpus_to_visit)) {
-               topology_normalize_cpu_scale();
-               schedule_work(&update_topology_flags_work);
-               free_raw_capacity();
+               if (raw_capacity) {
+                       topology_normalize_cpu_scale();
+                       schedule_work(&update_topology_flags_work);
+                       free_raw_capacity();
+               }
                 pr_debug("cpu_capacity: parsing done\n");
                 schedule_work(&parsing_done_work);
         }
@@ -472,7 +471,7 @@ static int __init register_cpufreq_notifier(void)
          * On ACPI-based systems skip registering cpufreq notifier as cpufreq
          * information is not needed for cpu capacity initialization.
          */
-       if (!acpi_disabled || !raw_capacity)
+       if (!acpi_disabled)
                 return -EINVAL;
  
         if (!alloc_cpumask_var(&cpus_to_visit, GFP_KERNEL))
diff --git a/drivers/base/core.c b/drivers/base/core.c

index 14d46af40f9a15e185230eecf3bbac6ec94728ef..9828da9b933cb7511756d15ec8be2ebbd14f9e44 100644 (file)
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -125,7 +125,7 @@ static void __fwnode_link_del(struct fwnode_link *link)
   */
  static void __fwnode_link_cycle(struct fwnode_link *link)
  {
-       pr_debug("%pfwf: Relaxing link with %pfwf\n",
+       pr_debug("%pfwf: cycle: depends on %pfwf\n",
                  link->consumer, link->supplier);
         link->flags |= FWLINK_FLAG_CYCLE;
  }
@@ -284,10 +284,12 @@ static bool device_is_ancestor(struct device *dev, struct device *target)
         return false;
  }
  
+#define DL_MARKER_FLAGS                (DL_FLAG_INFERRED | \
+                                DL_FLAG_CYCLE | \
+                                DL_FLAG_MANAGED)
  static inline bool device_link_flag_is_sync_state_only(u32 flags)
  {
-       return (flags & ~(DL_FLAG_INFERRED | DL_FLAG_CYCLE)) ==
-               (DL_FLAG_SYNC_STATE_ONLY | DL_FLAG_MANAGED);
+       return (flags & ~DL_MARKER_FLAGS) == DL_FLAG_SYNC_STATE_ONLY;
  }
  
  /**
@@ -1943,6 +1945,7 @@ static bool __fw_devlink_relax_cycles(struct device *con,
  
         /* Termination condition. */
         if (sup_dev == con) {
+               pr_debug("----- cycle: start -----\n");
                 ret = true;
                 goto out;
         }
@@ -1974,8 +1977,11 @@ static bool __fw_devlink_relax_cycles(struct device *con,
         else
                 par_dev = fwnode_get_next_parent_dev(sup_handle);
  
-       if (par_dev && __fw_devlink_relax_cycles(con, par_dev->fwnode))
+       if (par_dev && __fw_devlink_relax_cycles(con, par_dev->fwnode)) {
+               pr_debug("%pfwf: cycle: child of %pfwf\n", sup_handle,
+                        par_dev->fwnode);
                 ret = true;
+       }
  
         if (!sup_dev)
                 goto out;
@@ -1991,6 +1997,8 @@ static bool __fw_devlink_relax_cycles(struct device *con,
  
                 if (__fw_devlink_relax_cycles(con,
                                               dev_link->supplier->fwnode)) {
+                       pr_debug("%pfwf: cycle: depends on %pfwf\n", sup_handle,
+                                dev_link->supplier->fwnode);
                         fw_devlink_relax_link(dev_link);
                         dev_link->flags |= DL_FLAG_CYCLE;
                         ret = true;
@@ -2058,13 +2066,19 @@ static int fw_devlink_create_devlink(struct device *con,
  
         /*
          * SYNC_STATE_ONLY device links don't block probing and supports cycles.
-        * So cycle detection isn't necessary and shouldn't be done.
+        * So, one might expect that cycle detection isn't necessary for them.
+        * However, if the device link was marked as SYNC_STATE_ONLY because
+        * it's part of a cycle, then we still need to do cycle detection. This
+        * is because the consumer and supplier might be part of multiple cycles
+        * and we need to detect all those cycles.
          */
-       if (!(flags & DL_FLAG_SYNC_STATE_ONLY)) {
+       if (!device_link_flag_is_sync_state_only(flags) ||
+           flags & DL_FLAG_CYCLE) {
                 device_links_write_lock();
                 if (__fw_devlink_relax_cycles(con, sup_handle)) {
                         __fwnode_link_cycle(link);
                         flags = fw_devlink_get_flags(link->flags);
+                       pr_debug("----- cycle: end -----\n");
                         dev_info(con, "Fixed dependency cycle(s) with %pfwf\n",
                                  sup_handle);
                 }
diff --git a/drivers/base/regmap/regmap-kunit.c b/drivers/base/regmap/regmap-kunit.c

index 026bdcb45127f530093cb4041f734d222e2fb005..0d957c5f1bcc987a585abaad9ed53623c33b4189 100644 (file)
--- a/drivers/base/regmap/regmap-kunit.c
+++ b/drivers/base/regmap/regmap-kunit.c
@@ -9,6 +9,23 @@
  
  #define BLOCK_TEST_SIZE 12
  
+static void get_changed_bytes(void *orig, void *new, size_t size)
+{
+       char *o = orig;
+       char *n = new;
+       int i;
+
+       get_random_bytes(new, size);
+
+       /*
+        * This could be nicer and more efficient but we shouldn't
+        * super care.
+        */
+       for (i = 0; i < size; i++)
+               while (n[i] == o[i])
+                       get_random_bytes(&n[i], 1);
+}
+
  static const struct regmap_config test_regmap_config = {
         .max_register = BLOCK_TEST_SIZE,
         .reg_stride = 1,
@@ -1202,7 +1219,8 @@ static void raw_noinc_write(struct kunit *test)
         struct regmap *map;
         struct regmap_config config;
         struct regmap_ram_data *data;
-       unsigned int val, val_test, val_last;
+       unsigned int val;
+       u16 val_test, val_last;
         u16 val_array[BLOCK_TEST_SIZE];
  
         config = raw_regmap_config;
@@ -1251,7 +1269,7 @@ static void raw_sync(struct kunit *test)
         struct regmap *map;
         struct regmap_config config;
         struct regmap_ram_data *data;
-       u16 val[2];
+       u16 val[3];
         u16 *hw_buf;
         unsigned int rval;
         int i;
@@ -1265,17 +1283,13 @@ static void raw_sync(struct kunit *test)
  
         hw_buf = (u16 *)data->vals;
  
-       get_random_bytes(&val, sizeof(val));
+       get_changed_bytes(&hw_buf[2], &val[0], sizeof(val));
  
         /* Do a regular write and a raw write in cache only mode */
         regcache_cache_only(map, true);
-       KUNIT_EXPECT_EQ(test, 0, regmap_raw_write(map, 2, val, sizeof(val)));
-       if (config.val_format_endian == REGMAP_ENDIAN_BIG)
-               KUNIT_EXPECT_EQ(test, 0, regmap_write(map, 6,
-                                                     be16_to_cpu(val[0])));
-       else
-               KUNIT_EXPECT_EQ(test, 0, regmap_write(map, 6,
-                                                     le16_to_cpu(val[0])));
+       KUNIT_EXPECT_EQ(test, 0, regmap_raw_write(map, 2, val,
+                                                 sizeof(u16) * 2));
+       KUNIT_EXPECT_EQ(test, 0, regmap_write(map, 4, val[2]));
  
         /* We should read back the new values, and defaults for the rest */
         for (i = 0; i < config.max_register + 1; i++) {
@@ -1284,24 +1298,34 @@ static void raw_sync(struct kunit *test)
                 switch (i) {
                 case 2:
                 case 3:
-               case 6:
                         if (config.val_format_endian == REGMAP_ENDIAN_BIG) {
                                 KUNIT_EXPECT_EQ(test, rval,
-                                               be16_to_cpu(val[i % 2]));
+                                               be16_to_cpu(val[i - 2]));
                         } else {
                                 KUNIT_EXPECT_EQ(test, rval,
-                                               le16_to_cpu(val[i % 2]));
+                                               le16_to_cpu(val[i - 2]));
                         }
                         break;
+               case 4:
+                       KUNIT_EXPECT_EQ(test, rval, val[i - 2]);
+                       break;
                 default:
                         KUNIT_EXPECT_EQ(test, config.reg_defaults[i].def, rval);
                         break;
                 }
         }
+
+       /*
+        * The value written via _write() was translated by the core,
+        * translate the original copy for comparison purposes.
+        */
+       if (config.val_format_endian == REGMAP_ENDIAN_BIG)
+               val[2] = cpu_to_be16(val[2]);
+       else
+               val[2] = cpu_to_le16(val[2]);
         
         /* The values should not appear in the "hardware" */
-       KUNIT_EXPECT_MEMNEQ(test, &hw_buf[2], val, sizeof(val));
-       KUNIT_EXPECT_MEMNEQ(test, &hw_buf[6], val, sizeof(u16));
+       KUNIT_EXPECT_MEMNEQ(test, &hw_buf[2], &val[0], sizeof(val));
  
         for (i = 0; i < config.max_register + 1; i++)
                 data->written[i] = false;
@@ -1312,8 +1336,7 @@ static void raw_sync(struct kunit *test)
         KUNIT_EXPECT_EQ(test, 0, regcache_sync(map));
  
         /* The values should now appear in the "hardware" */
-       KUNIT_EXPECT_MEMEQ(test, &hw_buf[2], val, sizeof(val));
-       KUNIT_EXPECT_MEMEQ(test, &hw_buf[6], val, sizeof(u16));
+       KUNIT_EXPECT_MEMEQ(test, &hw_buf[2], &val[0], sizeof(val));
  
         regmap_exit(map);
  }
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c

index 5bf98fd6a651a506ff294545d6241f608af34568..2bf14a0e2815f6292a02b3fa1e394489af780074 100644 (file)
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -1593,14 +1593,15 @@ static int virtblk_freeze(struct virtio_device *vdev)
  {
         struct virtio_blk *vblk = vdev->priv;
  
+       /* Ensure no requests in virtqueues before deleting vqs. */
+       blk_mq_freeze_queue(vblk->disk->queue);
+
         /* Ensure we don't receive any more interrupts */
         virtio_reset_device(vdev);
  
         /* Make sure no work handler is accessing the device. */
         flush_work(&vblk->config_work);
  
-       blk_mq_quiesce_queue(vblk->disk->queue);
-
         vdev->config->del_vqs(vdev);
         kfree(vblk->vqs);
  
@@ -1618,7 +1619,7 @@ static int virtblk_restore(struct virtio_device *vdev)
  
         virtio_device_ready(vdev);
  
-       blk_mq_unquiesce_queue(vblk->disk->queue);
+       blk_mq_unfreeze_queue(vblk->disk->queue);
         return 0;
  }
  #endif
diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c

index fdb0fae88d1c584e94bdc3b206999203779cd755..b40b32fa7f1c38c5d12931ee7b06e5b8ab144d77 100644 (file)
--- a/drivers/bluetooth/btqca.c
+++ b/drivers/bluetooth/btqca.c
@@ -152,7 +152,7 @@ static int qca_send_patch_config_cmd(struct hci_dev *hdev)
         bt_dev_dbg(hdev, "QCA Patch config");
  
         skb = __hci_cmd_sync_ev(hdev, EDL_PATCH_CMD_OPCODE, sizeof(cmd),
-                               cmd, HCI_EV_VENDOR, HCI_INIT_TIMEOUT);
+                               cmd, 0, HCI_INIT_TIMEOUT);
         if (IS_ERR(skb)) {
                 err = PTR_ERR(skb);
                 bt_dev_err(hdev, "Sending QCA Patch config failed (%d)", err);
diff --git a/drivers/bluetooth/hci_bcm4377.c b/drivers/bluetooth/hci_bcm4377.c

index a617578356953c30a4a882f7928d16d464a4a04d..9a7243d5db71ff35697cf26cf7a744910f2741fd 100644 (file)
--- a/drivers/bluetooth/hci_bcm4377.c
+++ b/drivers/bluetooth/hci_bcm4377.c
@@ -1417,7 +1417,7 @@ static int bcm4377_check_bdaddr(struct bcm4377_data *bcm4377)
  
         bda = (struct hci_rp_read_bd_addr *)skb->data;
         if (!bcm4377_is_valid_bdaddr(bcm4377, &bda->bdaddr))
-               set_bit(HCI_QUIRK_INVALID_BDADDR, &bcm4377->hdev->quirks);
+               set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &bcm4377->hdev->quirks);
  
         kfree_skb(skb);
         return 0;
@@ -2368,7 +2368,6 @@ static int bcm4377_probe(struct pci_dev *pdev, const struct pci_device_id *id)
         hdev->set_bdaddr = bcm4377_hci_set_bdaddr;
         hdev->setup = bcm4377_hci_setup;
  
-       set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks);
         if (bcm4377->hw->broken_mws_transport_config)
                 set_bit(HCI_QUIRK_BROKEN_MWS_TRANSPORT_CONFIG, &hdev->quirks);
         if (bcm4377->hw->broken_ext_scan)
diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c

index 94b8c406f0c0edf0245064bd994ea6b84637b7b1..edd2a81b4d5ed7f5f9f36058ffe9131877ddde56 100644 (file)
--- a/drivers/bluetooth/hci_qca.c
+++ b/drivers/bluetooth/hci_qca.c
@@ -7,6 +7,7 @@
   *
   *  Copyright (C) 2007 Texas Instruments, Inc.
   *  Copyright (c) 2010, 2012, 2018 The Linux Foundation. All rights reserved.
+ *  Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved.
   *
   *  Acknowledgements:
   *  This file is based on hci_ll.c, which was...
@@ -1806,13 +1807,12 @@ static int qca_power_on(struct hci_dev *hdev)
  
  static void hci_coredump_qca(struct hci_dev *hdev)
  {
+       int err;
         static const u8 param[] = { 0x26 };
-       struct sk_buff *skb;
  
-       skb = __hci_cmd_sync(hdev, 0xfc0c, 1, param, HCI_CMD_TIMEOUT);
-       if (IS_ERR(skb))
-               bt_dev_err(hdev, "%s: trigger crash failed (%ld)", __func__, PTR_ERR(skb));
-       kfree_skb(skb);
+       err = __hci_cmd_send(hdev, 0xfc0c, 1, param);
+       if (err < 0)
+               bt_dev_err(hdev, "%s: trigger crash failed (%d)", __func__, err);
  }
  
  static int qca_get_data_path_id(struct hci_dev *hdev, __u8 *data_path_id)
@@ -1904,7 +1904,17 @@ retry:
         case QCA_WCN6750:
         case QCA_WCN6855:
         case QCA_WCN7850:
-               set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks);
+
+               /* Set BDA quirk bit for reading BDA value from fwnode property
+                * only if that property exist in DT.
+                */
+               if (fwnode_property_present(dev_fwnode(hdev->dev.parent), "local-bd-address")) {
+                       set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks);
+                       bt_dev_info(hdev, "setting quirk bit to read BDA from fwnode later");
+               } else {
+                       bt_dev_dbg(hdev, "local-bd-address` is not present in the devicetree so not setting quirk bit for BDA");
+               }
+
                 hci_set_aosp_capable(hdev);
  
                 ret = qca_read_soc_version(hdev, &ver, soc_type);
diff --git a/drivers/bus/imx-weim.c b/drivers/bus/imx-weim.c

index 6b5da73c85417644b5885e534c39917e4e5496a3..837bf9d51c6ec93888cec97ecde0eb2a792339e2 100644 (file)
--- a/drivers/bus/imx-weim.c
+++ b/drivers/bus/imx-weim.c
@@ -120,7 +120,7 @@ static int imx_weim_gpr_setup(struct platform_device *pdev)
                 i++;
         }
  
-       if (i == 0 || i % 4)
+       if (i == 0)
                 goto err;
  
         for (i = 0; i < ARRAY_SIZE(gprvals); i++) {
diff --git a/drivers/cache/ax45mp_cache.c b/drivers/cache/ax45mp_cache.c

index 57186c58dc849c15db2f9c25ad8c816398f29986..1d7dd3d2c101cd4412876d62162fb733c800c02c 100644 (file)
--- a/drivers/cache/ax45mp_cache.c
+++ b/drivers/cache/ax45mp_cache.c
@@ -129,8 +129,12 @@ static void ax45mp_dma_cache_wback(phys_addr_t paddr, size_t size)
         unsigned long line_size;
         unsigned long flags;
  
+       if (unlikely(start == end))
+               return;
+
         line_size = ax45mp_priv.ax45mp_cache_line_size;
         start = start & (~(line_size - 1));
+       end = ((end + line_size - 1) & (~(line_size - 1)));
         local_irq_save(flags);
         ax45mp_cpu_dcache_wb_range(start, end);
         local_irq_restore(flags);
diff --git a/drivers/clk/samsung/clk-gs101.c b/drivers/clk/samsung/clk-gs101.c

index 0964bb11657f100916b85b7f00074a9bdb365c62..782993951fff8f7cc209329fc84af7f825fee143 100644 (file)
--- a/drivers/clk/samsung/clk-gs101.c
+++ b/drivers/clk/samsung/clk-gs101.c
@@ -2475,7 +2475,7 @@ static const struct samsung_cmu_info misc_cmu_info __initconst = {
         .nr_clk_ids             = CLKS_NR_MISC,
         .clk_regs               = misc_clk_regs,
         .nr_clk_regs            = ARRAY_SIZE(misc_clk_regs),
-       .clk_name               = "dout_cmu_misc_bus",
+       .clk_name               = "bus",
  };
  
  /* ---- platform_driver ----------------------------------------------------- */
diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c

index 3d5e6d705fc6ee3a0224a0b1fa57c32076fe306f..44b19e69617632bf4951d8da1e514f9c0c689d4b 100644 (file)
--- a/drivers/connector/cn_proc.c
+++ b/drivers/connector/cn_proc.c
@@ -108,9 +108,8 @@ static inline void send_msg(struct cn_msg *msg)
                 filter_data[1] = 0;
         }
  
-       if (cn_netlink_send_mult(msg, msg->len, 0, CN_IDX_PROC, GFP_NOWAIT,
-                            cn_filter, (void *)filter_data) == -ESRCH)
-               atomic_set(&proc_event_num_listeners, 0);
+       cn_netlink_send_mult(msg, msg->len, 0, CN_IDX_PROC, GFP_NOWAIT,
+                            cn_filter, (void *)filter_data);
  
         local_unlock(&local_event.lock);
  }
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c

index ca94e60e705a1df435b1dd75a13c0a50dc3f8c27..79619227ea511b5247ca7941400ae821b1030f73 100644 (file)
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -2987,6 +2987,9 @@ static void intel_cpufreq_adjust_perf(unsigned int cpunum,
         if (min_pstate < cpu->min_perf_ratio)
                 min_pstate = cpu->min_perf_ratio;
  
+       if (min_pstate > cpu->max_perf_ratio)
+               min_pstate = cpu->max_perf_ratio;
+
         max_pstate = min(cap_pstate, cpu->max_perf_ratio);
         if (max_pstate < min_pstate)
                 max_pstate = min_pstate;
diff --git a/drivers/crypto/caam/caamalg_qi2.c b/drivers/crypto/caam/caamalg_qi2.c

index a148ff1f0872c419fc2198f64174d26e45342289..a4f6884416a0486181426c8a22d885f4f0534ea0 100644 (file)
--- a/drivers/crypto/caam/caamalg_qi2.c
+++ b/drivers/crypto/caam/caamalg_qi2.c
@@ -4545,6 +4545,7 @@ struct caam_hash_alg {
         struct list_head entry;
         struct device *dev;
         int alg_type;
+       bool is_hmac;
         struct ahash_alg ahash_alg;
  };
  
@@ -4571,7 +4572,7 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm)
  
         ctx->dev = caam_hash->dev;
  
-       if (alg->setkey) {
+       if (caam_hash->is_hmac) {
                 ctx->adata.key_dma = dma_map_single_attrs(ctx->dev, ctx->key,
                                                           ARRAY_SIZE(ctx->key),
                                                           DMA_TO_DEVICE,
@@ -4611,7 +4612,7 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm)
          * For keyed hash algorithms shared descriptors
          * will be created later in setkey() callback
          */
-       return alg->setkey ? 0 : ahash_set_sh_desc(ahash);
+       return caam_hash->is_hmac ? 0 : ahash_set_sh_desc(ahash);
  }
  
  static void caam_hash_cra_exit(struct crypto_tfm *tfm)
@@ -4646,12 +4647,14 @@ static struct caam_hash_alg *caam_hash_alloc(struct device *dev,
                          template->hmac_name);
                 snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s",
                          template->hmac_driver_name);
+               t_alg->is_hmac = true;
         } else {
                 snprintf(alg->cra_name, CRYPTO_MAX_ALG_NAME, "%s",
                          template->name);
                 snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s",
                          template->driver_name);
                 t_alg->ahash_alg.setkey = NULL;
+               t_alg->is_hmac = false;
         }
         alg->cra_module = THIS_MODULE;
         alg->cra_init = caam_hash_cra_init;
diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c

index 290c8500c247f9cbf20fb055e3715400a5f30646..fdd724228c2fa8accc7c7ebc1244c5ee92423247 100644 (file)
--- a/drivers/crypto/caam/caamhash.c
+++ b/drivers/crypto/caam/caamhash.c
@@ -1753,6 +1753,7 @@ static struct caam_hash_template driver_hash[] = {
  struct caam_hash_alg {
         struct list_head entry;
         int alg_type;
+       bool is_hmac;
         struct ahash_engine_alg ahash_alg;
  };
  
@@ -1804,7 +1805,7 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm)
         } else {
                 if (priv->era >= 6) {
                         ctx->dir = DMA_BIDIRECTIONAL;
-                       ctx->key_dir = alg->setkey ? DMA_TO_DEVICE : DMA_NONE;
+                       ctx->key_dir = caam_hash->is_hmac ? DMA_TO_DEVICE : DMA_NONE;
                 } else {
                         ctx->dir = DMA_TO_DEVICE;
                         ctx->key_dir = DMA_NONE;
@@ -1862,7 +1863,7 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm)
          * For keyed hash algorithms shared descriptors
          * will be created later in setkey() callback
          */
-       return alg->setkey ? 0 : ahash_set_sh_desc(ahash);
+       return caam_hash->is_hmac ? 0 : ahash_set_sh_desc(ahash);
  }
  
  static void caam_hash_cra_exit(struct crypto_tfm *tfm)
@@ -1915,12 +1916,14 @@ caam_hash_alloc(struct caam_hash_template *template,
                          template->hmac_name);
                 snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s",
                          template->hmac_driver_name);
+               t_alg->is_hmac = true;
         } else {
                 snprintf(alg->cra_name, CRYPTO_MAX_ALG_NAME, "%s",
                          template->name);
                 snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s",
                          template->driver_name);
                 halg->setkey = NULL;
+               t_alg->is_hmac = false;
         }
         alg->cra_module = THIS_MODULE;
         alg->cra_init = caam_hash_cra_init;
diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c

index e4d3f45242f63258ea0efc9f0a0a7ca9b411333c..b04bc1d3d627d447c2cfc10b9078b040800c8406 100644 (file)
--- a/drivers/crypto/ccp/sev-dev.c
+++ b/drivers/crypto/ccp/sev-dev.c
@@ -534,10 +534,16 @@ EXPORT_SYMBOL_GPL(sev_platform_init);
  
  static int __sev_platform_shutdown_locked(int *error)
  {
-       struct sev_device *sev = psp_master->sev_data;
+       struct psp_device *psp = psp_master;
+       struct sev_device *sev;
         int ret;
  
-       if (!sev || sev->state == SEV_STATE_UNINIT)
+       if (!psp || !psp->sev_data)
+               return 0;
+
+       sev = psp->sev_data;
+
+       if (sev->state == SEV_STATE_UNINIT)
                 return 0;
  
         ret = __sev_do_cmd_locked(SEV_CMD_SHUTDOWN, NULL, error);
diff --git a/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c b/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c

index 479062aa5e6b61c2706ff8b4f4fe912f52ded3dc..94a0ebb03d8c96804b455f73a8d8b3155baab866 100644 (file)
--- a/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c
+++ b/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c
@@ -463,6 +463,7 @@ void adf_init_hw_data_4xxx(struct adf_hw_device_data *hw_data, u32 dev_id)
                 hw_data->fw_name = ADF_402XX_FW;
                 hw_data->fw_mmp_name = ADF_402XX_MMP;
                 hw_data->uof_get_name = uof_get_name_402xx;
+               hw_data->get_ena_thd_mask = get_ena_thd_mask;
                 break;
         case ADF_401XX_PCI_DEVICE_ID:
                 hw_data->fw_name = ADF_4XXX_FW;
diff --git a/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c b/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c

index 2621ff8a93764d4ad905bcfe7e52331f45bb2c71..de53eddf6796b6c6ac6eafdeaee9a7ee03c979d3 100644 (file)
--- a/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c
+++ b/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c
@@ -104,7 +104,8 @@ static void virtio_crypto_dataq_akcipher_callback(struct virtio_crypto_request *
  }
  
  static int virtio_crypto_alg_akcipher_init_session(struct virtio_crypto_akcipher_ctx *ctx,
-               struct virtio_crypto_ctrl_header *header, void *para,
+               struct virtio_crypto_ctrl_header *header,
+               struct virtio_crypto_akcipher_session_para *para,
                 const uint8_t *key, unsigned int keylen)
  {
         struct scatterlist outhdr_sg, key_sg, inhdr_sg, *sgs[3];
@@ -128,7 +129,7 @@ static int virtio_crypto_alg_akcipher_init_session(struct virtio_crypto_akcipher
  
         ctrl = &vc_ctrl_req->ctrl;
         memcpy(&ctrl->header, header, sizeof(ctrl->header));
-       memcpy(&ctrl->u, para, sizeof(ctrl->u));
+       memcpy(&ctrl->u.akcipher_create_session.para, para, sizeof(*para));
         input = &vc_ctrl_req->input;
         input->status = cpu_to_le32(VIRTIO_CRYPTO_ERR);
  
diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c

index dcf2b39e1048822ca90324667d85f68225c05fa4..1a3e6aafbdcc33dd2aae8731be8a5ad52cc0891e 100644 (file)
--- a/drivers/cxl/acpi.c
+++ b/drivers/cxl/acpi.c
@@ -316,31 +316,27 @@ static const struct cxl_root_ops acpi_root_ops = {
         .qos_class = cxl_acpi_qos_class,
  };
  
-static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg,
-                          const unsigned long end)
+static int __cxl_parse_cfmws(struct acpi_cedt_cfmws *cfmws,
+                            struct cxl_cfmws_context *ctx)
  {
         int target_map[CXL_DECODER_MAX_INTERLEAVE];
-       struct cxl_cfmws_context *ctx = arg;
         struct cxl_port *root_port = ctx->root_port;
         struct resource *cxl_res = ctx->cxl_res;
         struct cxl_cxims_context cxims_ctx;
         struct cxl_root_decoder *cxlrd;
         struct device *dev = ctx->dev;
-       struct acpi_cedt_cfmws *cfmws;
         cxl_calc_hb_fn cxl_calc_hb;
         struct cxl_decoder *cxld;
         unsigned int ways, i, ig;
         struct resource *res;
         int rc;
  
-       cfmws = (struct acpi_cedt_cfmws *) header;
-
         rc = cxl_acpi_cfmws_verify(dev, cfmws);
         if (rc) {
                 dev_err(dev, "CFMWS range %#llx-%#llx not registered\n",
                         cfmws->base_hpa,
                         cfmws->base_hpa + cfmws->window_size - 1);
-               return 0;
+               return rc;
         }
  
         rc = eiw_to_ways(cfmws->interleave_ways, &ways);
@@ -376,7 +372,7 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg,
  
         cxlrd = cxl_root_decoder_alloc(root_port, ways, cxl_calc_hb);
         if (IS_ERR(cxlrd))
-               return 0;
+               return PTR_ERR(cxlrd);
  
         cxld = &cxlrd->cxlsd.cxld;
         cxld->flags = cfmws_to_decoder_flags(cfmws->restrictions);
@@ -420,16 +416,7 @@ err_xormap:
                 put_device(&cxld->dev);
         else
                 rc = cxl_decoder_autoremove(dev, cxld);
-       if (rc) {
-               dev_err(dev, "Failed to add decode range: %pr", res);
-               return rc;
-       }
-       dev_dbg(dev, "add: %s node: %d range [%#llx - %#llx]\n",
-               dev_name(&cxld->dev),
-               phys_to_target_node(cxld->hpa_range.start),
-               cxld->hpa_range.start, cxld->hpa_range.end);
-
-       return 0;
+       return rc;
  
  err_insert:
         kfree(res->name);
@@ -438,6 +425,29 @@ err_name:
         return -ENOMEM;
  }
  
+static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg,
+                          const unsigned long end)
+{
+       struct acpi_cedt_cfmws *cfmws = (struct acpi_cedt_cfmws *)header;
+       struct cxl_cfmws_context *ctx = arg;
+       struct device *dev = ctx->dev;
+       int rc;
+
+       rc = __cxl_parse_cfmws(cfmws, ctx);
+       if (rc)
+               dev_err(dev,
+                       "Failed to add decode range: [%#llx - %#llx] (%d)\n",
+                       cfmws->base_hpa,
+                       cfmws->base_hpa + cfmws->window_size - 1, rc);
+       else
+               dev_dbg(dev, "decode range: node: %d range [%#llx - %#llx]\n",
+                       phys_to_target_node(cfmws->base_hpa), cfmws->base_hpa,
+                       cfmws->base_hpa + cfmws->window_size - 1);
+
+       /* never fail cxl_acpi load for a single window failure */
+       return 0;
+}
+
  __mock struct acpi_device *to_cxl_host_bridge(struct device *host,
                                               struct device *dev)
  {
diff --git a/drivers/cxl/core/cdat.c b/drivers/cxl/core/cdat.c

index 6fe11546889fabb48e997fda83e1f184a64179c6..08fd0baea7a0eb0f1c1442e9f454e3c32736d19c 100644 (file)
--- a/drivers/cxl/core/cdat.c
+++ b/drivers/cxl/core/cdat.c
@@ -210,19 +210,12 @@ static int cxl_port_perf_data_calculate(struct cxl_port *port,
         return 0;
  }
  
-static void add_perf_entry(struct device *dev, struct dsmas_entry *dent,
-                          struct list_head *list)
+static void update_perf_entry(struct device *dev, struct dsmas_entry *dent,
+                             struct cxl_dpa_perf *dpa_perf)
  {
-       struct cxl_dpa_perf *dpa_perf;
-
-       dpa_perf = kzalloc(sizeof(*dpa_perf), GFP_KERNEL);
-       if (!dpa_perf)
-               return;
-
         dpa_perf->dpa_range = dent->dpa_range;
         dpa_perf->coord = dent->coord;
         dpa_perf->qos_class = dent->qos_class;
-       list_add_tail(&dpa_perf->list, list);
         dev_dbg(dev,
                 "DSMAS: dpa: %#llx qos: %d read_bw: %d write_bw %d read_lat: %d write_lat: %d\n",
                 dent->dpa_range.start, dpa_perf->qos_class,
@@ -230,20 +223,6 @@ static void add_perf_entry(struct device *dev, struct dsmas_entry *dent,
                 dent->coord.read_latency, dent->coord.write_latency);
  }
  
-static void free_perf_ents(void *data)
-{
-       struct cxl_memdev_state *mds = data;
-       struct cxl_dpa_perf *dpa_perf, *n;
-       LIST_HEAD(discard);
-
-       list_splice_tail_init(&mds->ram_perf_list, &discard);
-       list_splice_tail_init(&mds->pmem_perf_list, &discard);
-       list_for_each_entry_safe(dpa_perf, n, &discard, list) {
-               list_del(&dpa_perf->list);
-               kfree(dpa_perf);
-       }
-}
-
  static void cxl_memdev_set_qos_class(struct cxl_dev_state *cxlds,
                                      struct xarray *dsmas_xa)
  {
@@ -263,16 +242,14 @@ static void cxl_memdev_set_qos_class(struct cxl_dev_state *cxlds,
         xa_for_each(dsmas_xa, index, dent) {
                 if (resource_size(&cxlds->ram_res) &&
                     range_contains(&ram_range, &dent->dpa_range))
-                       add_perf_entry(dev, dent, &mds->ram_perf_list);
+                       update_perf_entry(dev, dent, &mds->ram_perf);
                 else if (resource_size(&cxlds->pmem_res) &&
                          range_contains(&pmem_range, &dent->dpa_range))
-                       add_perf_entry(dev, dent, &mds->pmem_perf_list);
+                       update_perf_entry(dev, dent, &mds->pmem_perf);
                 else
                         dev_dbg(dev, "no partition for dsmas dpa: %#llx\n",
                                 dent->dpa_range.start);
         }
-
-       devm_add_action_or_reset(&cxlds->cxlmd->dev, free_perf_ents, mds);
  }
  
  static int match_cxlrd_qos_class(struct device *dev, void *data)
@@ -293,24 +270,24 @@ static int match_cxlrd_qos_class(struct device *dev, void *data)
         return 0;
  }
  
-static void cxl_qos_match(struct cxl_port *root_port,
-                         struct list_head *work_list,
-                         struct list_head *discard_list)
+static void reset_dpa_perf(struct cxl_dpa_perf *dpa_perf)
  {
-       struct cxl_dpa_perf *dpa_perf, *n;
+       *dpa_perf = (struct cxl_dpa_perf) {
+               .qos_class = CXL_QOS_CLASS_INVALID,
+       };
+}
  
-       list_for_each_entry_safe(dpa_perf, n, work_list, list) {
-               int rc;
+static bool cxl_qos_match(struct cxl_port *root_port,
+                         struct cxl_dpa_perf *dpa_perf)
+{
+       if (dpa_perf->qos_class == CXL_QOS_CLASS_INVALID)
+               return false;
  
-               if (dpa_perf->qos_class == CXL_QOS_CLASS_INVALID)
-                       return;
+       if (!device_for_each_child(&root_port->dev, &dpa_perf->qos_class,
+                                  match_cxlrd_qos_class))
+               return false;
  
-               rc = device_for_each_child(&root_port->dev,
-                                          (void *)&dpa_perf->qos_class,
-                                          match_cxlrd_qos_class);
-               if (!rc)
-                       list_move_tail(&dpa_perf->list, discard_list);
-       }
+       return true;
  }
  
  static int match_cxlrd_hb(struct device *dev, void *data)
@@ -334,23 +311,10 @@ static int match_cxlrd_hb(struct device *dev, void *data)
         return 0;
  }
  
-static void discard_dpa_perf(struct list_head *list)
-{
-       struct cxl_dpa_perf *dpa_perf, *n;
-
-       list_for_each_entry_safe(dpa_perf, n, list, list) {
-               list_del(&dpa_perf->list);
-               kfree(dpa_perf);
-       }
-}
-DEFINE_FREE(dpa_perf, struct list_head *, if (!list_empty(_T)) discard_dpa_perf(_T))
-
  static int cxl_qos_class_verify(struct cxl_memdev *cxlmd)
  {
         struct cxl_dev_state *cxlds = cxlmd->cxlds;
         struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
-       LIST_HEAD(__discard);
-       struct list_head *discard __free(dpa_perf) = &__discard;
         struct cxl_port *root_port;
         int rc;
  
@@ -363,16 +327,17 @@ static int cxl_qos_class_verify(struct cxl_memdev *cxlmd)
         root_port = &cxl_root->port;
  
         /* Check that the QTG IDs are all sane between end device and root decoders */
-       cxl_qos_match(root_port, &mds->ram_perf_list, discard);
-       cxl_qos_match(root_port, &mds->pmem_perf_list, discard);
+       if (!cxl_qos_match(root_port, &mds->ram_perf))
+               reset_dpa_perf(&mds->ram_perf);
+       if (!cxl_qos_match(root_port, &mds->pmem_perf))
+               reset_dpa_perf(&mds->pmem_perf);
  
         /* Check to make sure that the device's host bridge is under a root decoder */
         rc = device_for_each_child(&root_port->dev,
-                                  (void *)cxlmd->endpoint->host_bridge,
-                                  match_cxlrd_hb);
+                                  cxlmd->endpoint->host_bridge, match_cxlrd_hb);
         if (!rc) {
-               list_splice_tail_init(&mds->ram_perf_list, discard);
-               list_splice_tail_init(&mds->pmem_perf_list, discard);
+               reset_dpa_perf(&mds->ram_perf);
+               reset_dpa_perf(&mds->pmem_perf);
         }
  
         return rc;
@@ -417,6 +382,7 @@ void cxl_endpoint_parse_cdat(struct cxl_port *port)
  
         cxl_memdev_set_qos_class(cxlds, dsmas_xa);
         cxl_qos_class_verify(cxlmd);
+       cxl_memdev_update_perf(cxlmd);
  }
  EXPORT_SYMBOL_NS_GPL(cxl_endpoint_parse_cdat, CXL);
  
diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c

index 27166a41170579a9441a2f9bf3e2a915ed85d893..9adda4795eb786b8658b573dd1e79befbad52255 100644 (file)
--- a/drivers/cxl/core/mbox.c
+++ b/drivers/cxl/core/mbox.c
@@ -1391,8 +1391,8 @@ struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev)
         mds->cxlds.reg_map.host = dev;
         mds->cxlds.reg_map.resource = CXL_RESOURCE_NONE;
         mds->cxlds.type = CXL_DEVTYPE_CLASSMEM;
-       INIT_LIST_HEAD(&mds->ram_perf_list);
-       INIT_LIST_HEAD(&mds->pmem_perf_list);
+       mds->ram_perf.qos_class = CXL_QOS_CLASS_INVALID;
+       mds->pmem_perf.qos_class = CXL_QOS_CLASS_INVALID;
  
         return mds;
  }
diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c

index dae8802ecdb01ee748e3891120bc0011e9e8894e..d4e259f3a7e914b9e3f17330cbc57f691d1976c2 100644 (file)
--- a/drivers/cxl/core/memdev.c
+++ b/drivers/cxl/core/memdev.c
@@ -447,13 +447,41 @@ static struct attribute *cxl_memdev_attributes[] = {
         NULL,
  };
  
+static ssize_t pmem_qos_class_show(struct device *dev,
+                                  struct device_attribute *attr, char *buf)
+{
+       struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
+       struct cxl_dev_state *cxlds = cxlmd->cxlds;
+       struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
+
+       return sysfs_emit(buf, "%d\n", mds->pmem_perf.qos_class);
+}
+
+static struct device_attribute dev_attr_pmem_qos_class =
+       __ATTR(qos_class, 0444, pmem_qos_class_show, NULL);
+
  static struct attribute *cxl_memdev_pmem_attributes[] = {
         &dev_attr_pmem_size.attr,
+       &dev_attr_pmem_qos_class.attr,
         NULL,
  };
  
+static ssize_t ram_qos_class_show(struct device *dev,
+                                 struct device_attribute *attr, char *buf)
+{
+       struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
+       struct cxl_dev_state *cxlds = cxlmd->cxlds;
+       struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
+
+       return sysfs_emit(buf, "%d\n", mds->ram_perf.qos_class);
+}
+
+static struct device_attribute dev_attr_ram_qos_class =
+       __ATTR(qos_class, 0444, ram_qos_class_show, NULL);
+
  static struct attribute *cxl_memdev_ram_attributes[] = {
         &dev_attr_ram_size.attr,
+       &dev_attr_ram_qos_class.attr,
         NULL,
  };
  
@@ -477,14 +505,42 @@ static struct attribute_group cxl_memdev_attribute_group = {
         .is_visible = cxl_memdev_visible,
  };
  
+static umode_t cxl_ram_visible(struct kobject *kobj, struct attribute *a, int n)
+{
+       struct device *dev = kobj_to_dev(kobj);
+       struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
+       struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
+
+       if (a == &dev_attr_ram_qos_class.attr)
+               if (mds->ram_perf.qos_class == CXL_QOS_CLASS_INVALID)
+                       return 0;
+
+       return a->mode;
+}
+
  static struct attribute_group cxl_memdev_ram_attribute_group = {
         .name = "ram",
         .attrs = cxl_memdev_ram_attributes,
+       .is_visible = cxl_ram_visible,
  };
  
+static umode_t cxl_pmem_visible(struct kobject *kobj, struct attribute *a, int n)
+{
+       struct device *dev = kobj_to_dev(kobj);
+       struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
+       struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
+
+       if (a == &dev_attr_pmem_qos_class.attr)
+               if (mds->pmem_perf.qos_class == CXL_QOS_CLASS_INVALID)
+                       return 0;
+
+       return a->mode;
+}
+
  static struct attribute_group cxl_memdev_pmem_attribute_group = {
         .name = "pmem",
         .attrs = cxl_memdev_pmem_attributes,
+       .is_visible = cxl_pmem_visible,
  };
  
  static umode_t cxl_memdev_security_visible(struct kobject *kobj,
@@ -519,6 +575,13 @@ static const struct attribute_group *cxl_memdev_attribute_groups[] = {
         NULL,
  };
  
+void cxl_memdev_update_perf(struct cxl_memdev *cxlmd)
+{
+       sysfs_update_group(&cxlmd->dev.kobj, &cxl_memdev_ram_attribute_group);
+       sysfs_update_group(&cxlmd->dev.kobj, &cxl_memdev_pmem_attribute_group);
+}
+EXPORT_SYMBOL_NS_GPL(cxl_memdev_update_perf, CXL);
+
  static const struct device_type cxl_memdev_type = {
         .name = "cxl_memdev",
         .release = cxl_memdev_release,
diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c

index 6c9c8d92f8f71401af70fec26be60e0339c18c64..e9e6c81ce034a8ffaba105132d5b9ecc59d51880 100644 (file)
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -477,9 +477,9 @@ int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm,
                 allowed++;
         }
  
-       if (!allowed) {
-               cxl_set_mem_enable(cxlds, 0);
-               info->mem_enabled = 0;
+       if (!allowed && info->mem_enabled) {
+               dev_err(dev, "Range register decodes outside platform defined CXL ranges.\n");
+               return -ENXIO;
         }
  
         /*
@@ -932,11 +932,21 @@ static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) { }
  void cxl_cor_error_detected(struct pci_dev *pdev)
  {
         struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
+       struct device *dev = &cxlds->cxlmd->dev;
+
+       scoped_guard(device, dev) {
+               if (!dev->driver) {
+                       dev_warn(&pdev->dev,
+                                "%s: memdev disabled, abort error handling\n",
+                                dev_name(dev));
+                       return;
+               }
  
-       if (cxlds->rcd)
-               cxl_handle_rdport_errors(cxlds);
+               if (cxlds->rcd)
+                       cxl_handle_rdport_errors(cxlds);
  
-       cxl_handle_endpoint_cor_ras(cxlds);
+               cxl_handle_endpoint_cor_ras(cxlds);
+       }
  }
  EXPORT_SYMBOL_NS_GPL(cxl_cor_error_detected, CXL);
  
@@ -948,16 +958,25 @@ pci_ers_result_t cxl_error_detected(struct pci_dev *pdev,
         struct device *dev = &cxlmd->dev;
         bool ue;
  
-       if (cxlds->rcd)
-               cxl_handle_rdport_errors(cxlds);
+       scoped_guard(device, dev) {
+               if (!dev->driver) {
+                       dev_warn(&pdev->dev,
+                                "%s: memdev disabled, abort error handling\n",
+                                dev_name(dev));
+                       return PCI_ERS_RESULT_DISCONNECT;
+               }
+
+               if (cxlds->rcd)
+                       cxl_handle_rdport_errors(cxlds);
+               /*
+                * A frozen channel indicates an impending reset which is fatal to
+                * CXL.mem operation, and will likely crash the system. On the off
+                * chance the situation is recoverable dump the status of the RAS
+                * capability registers and bounce the active state of the memdev.
+                */
+               ue = cxl_handle_endpoint_ras(cxlds);
+       }
  
-       /*
-        * A frozen channel indicates an impending reset which is fatal to
-        * CXL.mem operation, and will likely crash the system. On the off
-        * chance the situation is recoverable dump the status of the RAS
-        * capability registers and bounce the active state of the memdev.
-        */
-       ue = cxl_handle_endpoint_ras(cxlds);
  
         switch (state) {
         case pci_channel_io_normal:
diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c

index ce0e2d82bb2b4cfdc61761d5e32a8c91cc121d82..4c7fd2d5cccb2965eb528cbc26bb261ef01dcdce 100644 (file)
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -730,12 +730,17 @@ static int match_auto_decoder(struct device *dev, void *data)
         return 0;
  }
  
-static struct cxl_decoder *cxl_region_find_decoder(struct cxl_port *port,
-                                                  struct cxl_region *cxlr)
+static struct cxl_decoder *
+cxl_region_find_decoder(struct cxl_port *port,
+                       struct cxl_endpoint_decoder *cxled,
+                       struct cxl_region *cxlr)
  {
         struct device *dev;
         int id = 0;
  
+       if (port == cxled_to_port(cxled))
+               return &cxled->cxld;
+
         if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags))
                 dev = device_find_child(&port->dev, &cxlr->params,
                                         match_auto_decoder);
@@ -753,8 +758,31 @@ static struct cxl_decoder *cxl_region_find_decoder(struct cxl_port *port,
         return to_cxl_decoder(dev);
  }
  
-static struct cxl_region_ref *alloc_region_ref(struct cxl_port *port,
-                                              struct cxl_region *cxlr)
+static bool auto_order_ok(struct cxl_port *port, struct cxl_region *cxlr_iter,
+                         struct cxl_decoder *cxld)
+{
+       struct cxl_region_ref *rr = cxl_rr_load(port, cxlr_iter);
+       struct cxl_decoder *cxld_iter = rr->decoder;
+
+       /*
+        * Allow the out of order assembly of auto-discovered regions.
+        * Per CXL Spec 3.1 8.2.4.20.12 software must commit decoders
+        * in HPA order. Confirm that the decoder with the lesser HPA
+        * starting address has the lesser id.
+        */
+       dev_dbg(&cxld->dev, "check for HPA violation %s:%d < %s:%d\n",
+               dev_name(&cxld->dev), cxld->id,
+               dev_name(&cxld_iter->dev), cxld_iter->id);
+
+       if (cxld_iter->id > cxld->id)
+               return true;
+
+       return false;
+}
+
+static struct cxl_region_ref *
+alloc_region_ref(struct cxl_port *port, struct cxl_region *cxlr,
+                struct cxl_endpoint_decoder *cxled)
  {
         struct cxl_region_params *p = &cxlr->params;
         struct cxl_region_ref *cxl_rr, *iter;
@@ -764,16 +792,21 @@ static struct cxl_region_ref *alloc_region_ref(struct cxl_port *port,
         xa_for_each(&port->regions, index, iter) {
                 struct cxl_region_params *ip = &iter->region->params;
  
-               if (!ip->res)
+               if (!ip->res || ip->res->start < p->res->start)
                         continue;
  
-               if (ip->res->start > p->res->start) {
-                       dev_dbg(&cxlr->dev,
-                               "%s: HPA order violation %s:%pr vs %pr\n",
-                               dev_name(&port->dev),
-                               dev_name(&iter->region->dev), ip->res, p->res);
-                       return ERR_PTR(-EBUSY);
+               if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) {
+                       struct cxl_decoder *cxld;
+
+                       cxld = cxl_region_find_decoder(port, cxled, cxlr);
+                       if (auto_order_ok(port, iter->region, cxld))
+                               continue;
                 }
+               dev_dbg(&cxlr->dev, "%s: HPA order violation %s:%pr vs %pr\n",
+                       dev_name(&port->dev),
+                       dev_name(&iter->region->dev), ip->res, p->res);
+
+               return ERR_PTR(-EBUSY);
         }
  
         cxl_rr = kzalloc(sizeof(*cxl_rr), GFP_KERNEL);
@@ -853,10 +886,7 @@ static int cxl_rr_alloc_decoder(struct cxl_port *port, struct cxl_region *cxlr,
  {
         struct cxl_decoder *cxld;
  
-       if (port == cxled_to_port(cxled))
-               cxld = &cxled->cxld;
-       else
-               cxld = cxl_region_find_decoder(port, cxlr);
+       cxld = cxl_region_find_decoder(port, cxled, cxlr);
         if (!cxld) {
                 dev_dbg(&cxlr->dev, "%s: no decoder available\n",
                         dev_name(&port->dev));
@@ -953,7 +983,7 @@ static int cxl_port_attach_region(struct cxl_port *port,
                         nr_targets_inc = true;
                 }
         } else {
-               cxl_rr = alloc_region_ref(port, cxlr);
+               cxl_rr = alloc_region_ref(port, cxlr, cxled);
                 if (IS_ERR(cxl_rr)) {
                         dev_dbg(&cxlr->dev,
                                 "%s: failed to allocate region reference\n",
diff --git a/drivers/cxl/core/trace.h b/drivers/cxl/core/trace.h

index 89445435303aac4d043c964a0ada866548889917..bdf117a33744be2db0468e869226ac8d45ef7a16 100644 (file)
--- a/drivers/cxl/core/trace.h
+++ b/drivers/cxl/core/trace.h
@@ -338,7 +338,7 @@ TRACE_EVENT(cxl_general_media,
  
         TP_fast_assign(
                 CXL_EVT_TP_fast_assign(cxlmd, log, rec->hdr);
-               memcpy(&__entry->hdr_uuid, &CXL_EVENT_GEN_MEDIA_UUID, sizeof(uuid_t));
+               __entry->hdr_uuid = CXL_EVENT_GEN_MEDIA_UUID;
  
                 /* General Media */
                 __entry->dpa = le64_to_cpu(rec->phys_addr);
@@ -425,7 +425,7 @@ TRACE_EVENT(cxl_dram,
  
         TP_fast_assign(
                 CXL_EVT_TP_fast_assign(cxlmd, log, rec->hdr);
-               memcpy(&__entry->hdr_uuid, &CXL_EVENT_DRAM_UUID, sizeof(uuid_t));
+               __entry->hdr_uuid = CXL_EVENT_DRAM_UUID;
  
                 /* DRAM */
                 __entry->dpa = le64_to_cpu(rec->phys_addr);
@@ -573,7 +573,7 @@ TRACE_EVENT(cxl_memory_module,
  
         TP_fast_assign(
                 CXL_EVT_TP_fast_assign(cxlmd, log, rec->hdr);
-               memcpy(&__entry->hdr_uuid, &CXL_EVENT_MEM_MODULE_UUID, sizeof(uuid_t));
+               __entry->hdr_uuid = CXL_EVENT_MEM_MODULE_UUID;
  
                 /* Memory Module Event */
                 __entry->event_type = rec->event_type;
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h

index b6017c0c57b4d5e69dfe45011b7a8b3f5bf0b913..003feebab79b5f8e7563ba2e32665b4377871a55 100644 (file)
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -880,6 +880,8 @@ void cxl_switch_parse_cdat(struct cxl_port *port);
  int cxl_endpoint_get_perf_coordinates(struct cxl_port *port,
                                       struct access_coordinate *coord);
  
+void cxl_memdev_update_perf(struct cxl_memdev *cxlmd);
+
  /*
   * Unit test builds overrides this to __weak, find the 'strong' version
   * of these symbols in tools/testing/cxl/.
diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h

index 5303d6942b880af65dcf8e77b02d26626c2bb94d..20fb3b35e89e0473ee8ad42dcd17407086fb8cdb 100644 (file)
--- a/drivers/cxl/cxlmem.h
+++ b/drivers/cxl/cxlmem.h
@@ -395,13 +395,11 @@ enum cxl_devtype {
  
  /**
   * struct cxl_dpa_perf - DPA performance property entry
- * @list - list entry
   * @dpa_range - range for DPA address
   * @coord - QoS performance data (i.e. latency, bandwidth)
   * @qos_class - QoS Class cookies
   */
  struct cxl_dpa_perf {
-       struct list_head list;
         struct range dpa_range;
         struct access_coordinate coord;
         int qos_class;
@@ -471,8 +469,8 @@ struct cxl_dev_state {
   * @security: security driver state info
   * @fw: firmware upload / activation state
   * @mbox_send: @dev specific transport for transmitting mailbox commands
- * @ram_perf_list: performance data entries matched to RAM
- * @pmem_perf_list: performance data entries matched to PMEM
+ * @ram_perf: performance data entry matched to RAM partition
+ * @pmem_perf: performance data entry matched to PMEM partition
   *
   * See CXL 3.0 8.2.9.8.2 Capacity Configuration and Label Storage for
   * details on capacity parameters.
@@ -494,8 +492,8 @@ struct cxl_memdev_state {
         u64 next_volatile_bytes;
         u64 next_persistent_bytes;
  
-       struct list_head ram_perf_list;
-       struct list_head pmem_perf_list;
+       struct cxl_dpa_perf ram_perf;
+       struct cxl_dpa_perf pmem_perf;
  
         struct cxl_event_state event;
         struct cxl_poison_state poison;
diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c

index c5c9d8e0d88d69fcc9f031e1bd46ba7c44de4fd4..0c79d9ce877ccaef9895a9885801d4fff69c5093 100644 (file)
--- a/drivers/cxl/mem.c
+++ b/drivers/cxl/mem.c
@@ -215,52 +215,6 @@ static ssize_t trigger_poison_list_store(struct device *dev,
  }
  static DEVICE_ATTR_WO(trigger_poison_list);
  
-static ssize_t ram_qos_class_show(struct device *dev,
-                                 struct device_attribute *attr, char *buf)
-{
-       struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
-       struct cxl_dev_state *cxlds = cxlmd->cxlds;
-       struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
-       struct cxl_dpa_perf *dpa_perf;
-
-       if (!dev->driver)
-               return -ENOENT;
-
-       if (list_empty(&mds->ram_perf_list))
-               return -ENOENT;
-
-       dpa_perf = list_first_entry(&mds->ram_perf_list, struct cxl_dpa_perf,
-                                   list);
-
-       return sysfs_emit(buf, "%d\n", dpa_perf->qos_class);
-}
-
-static struct device_attribute dev_attr_ram_qos_class =
-       __ATTR(qos_class, 0444, ram_qos_class_show, NULL);
-
-static ssize_t pmem_qos_class_show(struct device *dev,
-                                  struct device_attribute *attr, char *buf)
-{
-       struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
-       struct cxl_dev_state *cxlds = cxlmd->cxlds;
-       struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
-       struct cxl_dpa_perf *dpa_perf;
-
-       if (!dev->driver)
-               return -ENOENT;
-
-       if (list_empty(&mds->pmem_perf_list))
-               return -ENOENT;
-
-       dpa_perf = list_first_entry(&mds->pmem_perf_list, struct cxl_dpa_perf,
-                                   list);
-
-       return sysfs_emit(buf, "%d\n", dpa_perf->qos_class);
-}
-
-static struct device_attribute dev_attr_pmem_qos_class =
-       __ATTR(qos_class, 0444, pmem_qos_class_show, NULL);
-
  static umode_t cxl_mem_visible(struct kobject *kobj, struct attribute *a, int n)
  {
         struct device *dev = kobj_to_dev(kobj);
@@ -272,21 +226,11 @@ static umode_t cxl_mem_visible(struct kobject *kobj, struct attribute *a, int n)
                               mds->poison.enabled_cmds))
                         return 0;
  
-       if (a == &dev_attr_pmem_qos_class.attr)
-               if (list_empty(&mds->pmem_perf_list))
-                       return 0;
-
-       if (a == &dev_attr_ram_qos_class.attr)
-               if (list_empty(&mds->ram_perf_list))
-                       return 0;
-
         return a->mode;
  }
  
  static struct attribute *cxl_mem_attrs[] = {
         &dev_attr_trigger_poison_list.attr,
-       &dev_attr_ram_qos_class.attr,
-       &dev_attr_pmem_qos_class.attr,
         NULL
  };
  
diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c

index 233e7c42c161d8e0b64424776d121f5d08176010..2ff361e756d66147d8d20969c376730ae2bcc90e 100644 (file)
--- a/drivers/cxl/pci.c
+++ b/drivers/cxl/pci.c
@@ -974,61 +974,6 @@ static struct pci_driver cxl_pci_driver = {
         },
  };
  
-#define CXL_EVENT_HDR_FLAGS_REC_SEVERITY GENMASK(1, 0)
-static void cxl_cper_event_call(enum cxl_event_type ev_type,
-                               struct cxl_cper_event_rec *rec)
-{
-       struct cper_cxl_event_devid *device_id = &rec->hdr.device_id;
-       struct pci_dev *pdev __free(pci_dev_put) = NULL;
-       enum cxl_event_log_type log_type;
-       struct cxl_dev_state *cxlds;
-       unsigned int devfn;
-       u32 hdr_flags;
-
-       devfn = PCI_DEVFN(device_id->device_num, device_id->func_num);
-       pdev = pci_get_domain_bus_and_slot(device_id->segment_num,
-                                          device_id->bus_num, devfn);
-       if (!pdev)
-               return;
-
-       guard(pci_dev)(pdev);
-       if (pdev->driver != &cxl_pci_driver)
-               return;
-
-       cxlds = pci_get_drvdata(pdev);
-       if (!cxlds)
-               return;
-
-       /* Fabricate a log type */
-       hdr_flags = get_unaligned_le24(rec->event.generic.hdr.flags);
-       log_type = FIELD_GET(CXL_EVENT_HDR_FLAGS_REC_SEVERITY, hdr_flags);
-
-       cxl_event_trace_record(cxlds->cxlmd, log_type, ev_type,
-                              &uuid_null, &rec->event);
-}
-
-static int __init cxl_pci_driver_init(void)
-{
-       int rc;
-
-       rc = cxl_cper_register_callback(cxl_cper_event_call);
-       if (rc)
-               return rc;
-
-       rc = pci_register_driver(&cxl_pci_driver);
-       if (rc)
-               cxl_cper_unregister_callback(cxl_cper_event_call);
-
-       return rc;
-}
-
-static void __exit cxl_pci_driver_exit(void)
-{
-       pci_unregister_driver(&cxl_pci_driver);
-       cxl_cper_unregister_callback(cxl_cper_event_call);
-}
-
-module_init(cxl_pci_driver_init);
-module_exit(cxl_pci_driver_exit);
+module_pci_driver(cxl_pci_driver);
  MODULE_LICENSE("GPL v2");
  MODULE_IMPORT_NS(CXL);
diff --git a/drivers/dma-buf/heaps/cma_heap.c b/drivers/dma-buf/heaps/cma_heap.c

index ee899f8e67215f6036734795cb5b90ab77a293a3..4a63567e93bae3dd2d5affabeedfd713aaa51460 100644 (file)
--- a/drivers/dma-buf/heaps/cma_heap.c
+++ b/drivers/dma-buf/heaps/cma_heap.c
@@ -168,10 +168,7 @@ static vm_fault_t cma_heap_vm_fault(struct vm_fault *vmf)
         if (vmf->pgoff > buffer->pagecount)
                 return VM_FAULT_SIGBUS;
  
-       vmf->page = buffer->pages[vmf->pgoff];
-       get_page(vmf->page);
-
-       return 0;
+       return vmf_insert_pfn(vma, vmf->address, page_to_pfn(buffer->pages[vmf->pgoff]));
  }
  
  static const struct vm_operations_struct dma_heap_vm_ops = {
@@ -185,6 +182,8 @@ static int cma_heap_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma)
         if ((vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) == 0)
                 return -EINVAL;
  
+       vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP);
+
         vma->vm_ops = &dma_heap_vm_ops;
         vma->vm_private_data = buffer;
  
diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c

index fb89ecbf0cc5be8ca566eaac6e499f2e2336b625..40052d1bd0b5c161180eec477bedb0f871e91a55 100644 (file)
--- a/drivers/dma/at_hdmac.c
+++ b/drivers/dma/at_hdmac.c
@@ -222,8 +222,14 @@ struct atdma_sg {
   * @vd: pointer to the virtual dma descriptor.
   * @atchan: pointer to the atmel dma channel.
   * @total_len: total transaction byte count
- * @sg_len: number of sg entries.
+ * @sglen: number of sg entries.
   * @sg: array of sgs.
+ * @boundary: number of transfers to perform before the automatic address increment operation
+ * @dst_hole: value to add to the destination address when the boundary has been reached
+ * @src_hole: value to add to the source address when the boundary has been reached
+ * @memset_buffer: buffer used for the memset operation
+ * @memset_paddr: physical address of the buffer used for the memset operation
+ * @memset_vaddr: virtual address of the buffer used for the memset operation
   */
  struct at_desc {
         struct                          virt_dma_desc vd;
@@ -245,7 +251,10 @@ struct at_desc {
  /*--  Channels  --------------------------------------------------------*/
  
  /**
- * atc_status - information bits stored in channel status flag
+ * enum atc_status - information bits stored in channel status flag
+ *
+ * @ATC_IS_PAUSED: If channel is pauses
+ * @ATC_IS_CYCLIC: If channel is cyclic
   *
   * Manipulated with atomic operations.
   */
@@ -282,7 +291,6 @@ struct at_dma_chan {
         u32                     save_cfg;
         u32                     save_dscr;
         struct dma_slave_config dma_sconfig;
-       bool                    cyclic;
         struct at_desc          *desc;
  };
  
@@ -328,12 +336,12 @@ static inline u8 convert_buswidth(enum dma_slave_buswidth addr_width)
  /**
   * struct at_dma - internal representation of an Atmel HDMA Controller
   * @dma_device: dmaengine dma_device object members
- * @atdma_devtype: identifier of DMA controller compatibility
- * @ch_regs: memory mapped register base
+ * @regs: memory mapped register base
   * @clk: dma controller clock
   * @save_imr: interrupt mask register that is saved on suspend/resume cycle
   * @all_chan_mask: all channels availlable in a mask
   * @lli_pool: hw lli table
+ * @memset_pool: hw memset pool
   * @chan: channels table to store at_dma_chan structures
   */
  struct at_dma {
@@ -626,6 +634,9 @@ static inline u32 atc_calc_bytes_left(u32 current_len, u32 ctrla)
  
  /**
   * atc_get_llis_residue - Get residue for a hardware linked list transfer
+ * @atchan: pointer to an atmel hdmac channel.
+ * @desc: pointer to the descriptor for which the residue is calculated.
+ * @residue: residue to be set to dma_tx_state.
   *
   * Calculate the residue by removing the length of the Linked List Item (LLI)
   * already transferred from the total length. To get the current LLI we can use
@@ -661,10 +672,8 @@ static inline u32 atc_calc_bytes_left(u32 current_len, u32 ctrla)
   * two DSCR values are different, we read again the CTRLA then the DSCR till two
   * consecutive read values from DSCR are equal or till the maximum trials is
   * reach. This algorithm is very unlikely not to find a stable value for DSCR.
- * @atchan: pointer to an atmel hdmac channel.
- * @desc: pointer to the descriptor for which the residue is calculated.
- * @residue: residue to be set to dma_tx_state.
- * Returns 0 on success, -errno otherwise.
+ *
+ * Returns: %0 on success, -errno otherwise.
   */
  static int atc_get_llis_residue(struct at_dma_chan *atchan,
                                 struct at_desc *desc, u32 *residue)
@@ -731,7 +740,8 @@ static int atc_get_llis_residue(struct at_dma_chan *atchan,
   * @chan: DMA channel
   * @cookie: transaction identifier to check status of
   * @residue: residue to be updated.
- * Return 0 on success, -errono otherwise.
+ *
+ * Return: %0 on success, -errno otherwise.
   */
  static int atc_get_residue(struct dma_chan *chan, dma_cookie_t cookie,
                            u32 *residue)
@@ -1710,7 +1720,7 @@ static void atc_issue_pending(struct dma_chan *chan)
   * atc_alloc_chan_resources - allocate resources for DMA channel
   * @chan: allocate descriptor resources for this channel
   *
- * return - the number of allocated descriptors
+ * Return: the number of allocated descriptors
   */
  static int atc_alloc_chan_resources(struct dma_chan *chan)
  {
diff --git a/drivers/dma/dw-edma/dw-edma-v0-core.c b/drivers/dma/dw-edma/dw-edma-v0-core.c

index b38786f0ad7995d9b0d22aa18fdd6d2407320c26..b75fdaffad9a4ea6cd8d15e8f43bea550848b46c 100644 (file)
--- a/drivers/dma/dw-edma/dw-edma-v0-core.c
+++ b/drivers/dma/dw-edma/dw-edma-v0-core.c
@@ -346,6 +346,20 @@ static void dw_edma_v0_core_write_chunk(struct dw_edma_chunk *chunk)
         dw_edma_v0_write_ll_link(chunk, i, control, chunk->ll_region.paddr);
  }
  
+static void dw_edma_v0_sync_ll_data(struct dw_edma_chunk *chunk)
+{
+       /*
+        * In case of remote eDMA engine setup, the DW PCIe RP/EP internal
+        * configuration registers and application memory are normally accessed
+        * over different buses. Ensure LL-data reaches the memory before the
+        * doorbell register is toggled by issuing the dummy-read from the remote
+        * LL memory in a hope that the MRd TLP will return only after the
+        * last MWr TLP is completed
+        */
+       if (!(chunk->chan->dw->chip->flags & DW_EDMA_CHIP_LOCAL))
+               readl(chunk->ll_region.vaddr.io);
+}
+
  static void dw_edma_v0_core_start(struct dw_edma_chunk *chunk, bool first)
  {
         struct dw_edma_chan *chan = chunk->chan;
@@ -412,6 +426,9 @@ static void dw_edma_v0_core_start(struct dw_edma_chunk *chunk, bool first)
                 SET_CH_32(dw, chan->dir, chan->id, llp.msb,
                           upper_32_bits(chunk->ll_region.paddr));
         }
+
+       dw_edma_v0_sync_ll_data(chunk);
+
         /* Doorbell */
         SET_RW_32(dw, chan->dir, doorbell,
                   FIELD_PREP(EDMA_V0_DOORBELL_CH_MASK, chan->id));
diff --git a/drivers/dma/dw-edma/dw-hdma-v0-core.c b/drivers/dma/dw-edma/dw-hdma-v0-core.c

index 00b735a0202ab2e8e030910db2747c02be8bf75e..10e8f0715114fb5f08f135b4f2d592ce6c53f10c 100644 (file)
--- a/drivers/dma/dw-edma/dw-hdma-v0-core.c
+++ b/drivers/dma/dw-edma/dw-hdma-v0-core.c
@@ -65,18 +65,12 @@ static void dw_hdma_v0_core_off(struct dw_edma *dw)
  
  static u16 dw_hdma_v0_core_ch_count(struct dw_edma *dw, enum dw_edma_dir dir)
  {
-       u32 num_ch = 0;
-       int id;
-
-       for (id = 0; id < HDMA_V0_MAX_NR_CH; id++) {
-               if (GET_CH_32(dw, id, dir, ch_en) & BIT(0))
-                       num_ch++;
-       }
-
-       if (num_ch > HDMA_V0_MAX_NR_CH)
-               num_ch = HDMA_V0_MAX_NR_CH;
-
-       return (u16)num_ch;
+       /*
+        * The HDMA IP have no way to know the number of hardware channels
+        * available, we set it to maximum channels and let the platform
+        * set the right number of channels.
+        */
+       return HDMA_V0_MAX_NR_CH;
  }
  
  static enum dma_status dw_hdma_v0_core_ch_status(struct dw_edma_chan *chan)
@@ -228,6 +222,20 @@ static void dw_hdma_v0_core_write_chunk(struct dw_edma_chunk *chunk)
         dw_hdma_v0_write_ll_link(chunk, i, control, chunk->ll_region.paddr);
  }
  
+static void dw_hdma_v0_sync_ll_data(struct dw_edma_chunk *chunk)
+{
+       /*
+        * In case of remote HDMA engine setup, the DW PCIe RP/EP internal
+        * configuration registers and application memory are normally accessed
+        * over different buses. Ensure LL-data reaches the memory before the
+        * doorbell register is toggled by issuing the dummy-read from the remote
+        * LL memory in a hope that the MRd TLP will return only after the
+        * last MWr TLP is completed
+        */
+       if (!(chunk->chan->dw->chip->flags & DW_EDMA_CHIP_LOCAL))
+               readl(chunk->ll_region.vaddr.io);
+}
+
  static void dw_hdma_v0_core_start(struct dw_edma_chunk *chunk, bool first)
  {
         struct dw_edma_chan *chan = chunk->chan;
@@ -242,7 +250,9 @@ static void dw_hdma_v0_core_start(struct dw_edma_chunk *chunk, bool first)
                 /* Interrupt enable&unmask - done, abort */
                 tmp = GET_CH_32(dw, chan->dir, chan->id, int_setup) |
                       HDMA_V0_STOP_INT_MASK | HDMA_V0_ABORT_INT_MASK |
-                     HDMA_V0_LOCAL_STOP_INT_EN | HDMA_V0_LOCAL_STOP_INT_EN;
+                     HDMA_V0_LOCAL_STOP_INT_EN | HDMA_V0_LOCAL_ABORT_INT_EN;
+               if (!(dw->chip->flags & DW_EDMA_CHIP_LOCAL))
+                       tmp |= HDMA_V0_REMOTE_STOP_INT_EN | HDMA_V0_REMOTE_ABORT_INT_EN;
                 SET_CH_32(dw, chan->dir, chan->id, int_setup, tmp);
                 /* Channel control */
                 SET_CH_32(dw, chan->dir, chan->id, control1, HDMA_V0_LINKLIST_EN);
@@ -256,6 +266,9 @@ static void dw_hdma_v0_core_start(struct dw_edma_chunk *chunk, bool first)
         /* Set consumer cycle */
         SET_CH_32(dw, chan->dir, chan->id, cycle_sync,
                   HDMA_V0_CONSUMER_CYCLE_STAT | HDMA_V0_CONSUMER_CYCLE_BIT);
+
+       dw_hdma_v0_sync_ll_data(chunk);
+
         /* Doorbell */
         SET_CH_32(dw, chan->dir, chan->id, doorbell, HDMA_V0_DOORBELL_START);
  }
diff --git a/drivers/dma/dw-edma/dw-hdma-v0-regs.h b/drivers/dma/dw-edma/dw-hdma-v0-regs.h

index a974abdf8aaf5ecd83eadd56f191a313ec37e9ff..eab5fd7177e545cab3f2217bd1a8add0d8dbb435 100644 (file)
--- a/drivers/dma/dw-edma/dw-hdma-v0-regs.h
+++ b/drivers/dma/dw-edma/dw-hdma-v0-regs.h
@@ -15,7 +15,7 @@
  #define HDMA_V0_LOCAL_ABORT_INT_EN             BIT(6)
  #define HDMA_V0_REMOTE_ABORT_INT_EN            BIT(5)
  #define HDMA_V0_LOCAL_STOP_INT_EN              BIT(4)
-#define HDMA_V0_REMOTEL_STOP_INT_EN            BIT(3)
+#define HDMA_V0_REMOTE_STOP_INT_EN             BIT(3)
  #define HDMA_V0_ABORT_INT_MASK                 BIT(2)
  #define HDMA_V0_STOP_INT_MASK                  BIT(0)
  #define HDMA_V0_LINKLIST_EN                    BIT(0)
diff --git a/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c b/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c

index 7958ac33e36ce3fab462d33161fea8dabe0ee215..5a8061a307cdafeb3a4db5ddd104ae6d7ec8d190 100644 (file)
--- a/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c
+++ b/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c
@@ -38,15 +38,17 @@ static int dpaa2_qdma_alloc_chan_resources(struct dma_chan *chan)
         if (!dpaa2_chan->fd_pool)
                 goto err;
  
-       dpaa2_chan->fl_pool = dma_pool_create("fl_pool", dev,
-                                             sizeof(struct dpaa2_fl_entry),
-                                             sizeof(struct dpaa2_fl_entry), 0);
+       dpaa2_chan->fl_pool =
+               dma_pool_create("fl_pool", dev,
+                                sizeof(struct dpaa2_fl_entry) * 3,
+                                sizeof(struct dpaa2_fl_entry), 0);
+
         if (!dpaa2_chan->fl_pool)
                 goto err_fd;
  
         dpaa2_chan->sdd_pool =
                 dma_pool_create("sdd_pool", dev,
-                               sizeof(struct dpaa2_qdma_sd_d),
+                               sizeof(struct dpaa2_qdma_sd_d) * 2,
                                 sizeof(struct dpaa2_qdma_sd_d), 0);
         if (!dpaa2_chan->sdd_pool)
                 goto err_fl;
diff --git a/drivers/dma/fsl-edma-common.c b/drivers/dma/fsl-edma-common.c

index b53f46245c377f05520c8275c95bf10c59be34d7..793f1a7ad5e343bbfe403c9e0ad28e891bd0d556 100644 (file)
--- a/drivers/dma/fsl-edma-common.c
+++ b/drivers/dma/fsl-edma-common.c
@@ -503,7 +503,7 @@ void fsl_edma_fill_tcd(struct fsl_edma_chan *fsl_chan,
         if (fsl_chan->is_multi_fifo) {
                 /* set mloff to support multiple fifo */
                 burst = cfg->direction == DMA_DEV_TO_MEM ?
-                               cfg->src_addr_width : cfg->dst_addr_width;
+                               cfg->src_maxburst : cfg->dst_maxburst;
                 nbytes |= EDMA_V3_TCD_NBYTES_MLOFF(-(burst * 4));
                 /* enable DMLOE/SMLOE */
                 if (cfg->direction == DMA_MEM_TO_DEV) {
diff --git a/drivers/dma/fsl-edma-common.h b/drivers/dma/fsl-edma-common.h

index bb5221158a7702379322392a46a1ebfb4de0f476..f5e216b157c75ff2215d7c74cd1d9febad47031c 100644 (file)
--- a/drivers/dma/fsl-edma-common.h
+++ b/drivers/dma/fsl-edma-common.h
@@ -30,8 +30,9 @@
  #define EDMA_TCD_ATTR_SSIZE(x)         (((x) & GENMASK(2, 0)) << 8)
  #define EDMA_TCD_ATTR_SMOD(x)          (((x) & GENMASK(4, 0)) << 11)
  
-#define EDMA_TCD_CITER_CITER(x)                ((x) & GENMASK(14, 0))
-#define EDMA_TCD_BITER_BITER(x)                ((x) & GENMASK(14, 0))
+#define EDMA_TCD_ITER_MASK             GENMASK(14, 0)
+#define EDMA_TCD_CITER_CITER(x)                ((x) & EDMA_TCD_ITER_MASK)
+#define EDMA_TCD_BITER_BITER(x)                ((x) & EDMA_TCD_ITER_MASK)
  
  #define EDMA_TCD_CSR_START             BIT(0)
  #define EDMA_TCD_CSR_INT_MAJOR         BIT(1)
diff --git a/drivers/dma/fsl-edma-main.c b/drivers/dma/fsl-edma-main.c

index 45cc419b1b4acbe87c12c3daaccafce73f8de1ba..d36e28b9c767ae7ebb44bc9e87de7bbc0363f926 100644 (file)
--- a/drivers/dma/fsl-edma-main.c
+++ b/drivers/dma/fsl-edma-main.c
@@ -10,6 +10,7 @@
   */
  
  #include <dt-bindings/dma/fsl-edma.h>
+#include <linux/bitfield.h>
  #include <linux/module.h>
  #include <linux/interrupt.h>
  #include <linux/clk.h>
@@ -582,7 +583,8 @@ static int fsl_edma_probe(struct platform_device *pdev)
                                         DMAENGINE_ALIGN_32_BYTES;
  
         /* Per worst case 'nbytes = 1' take CITER as the max_seg_size */
-       dma_set_max_seg_size(fsl_edma->dma_dev.dev, 0x3fff);
+       dma_set_max_seg_size(fsl_edma->dma_dev.dev,
+                            FIELD_GET(EDMA_TCD_ITER_MASK, EDMA_TCD_ITER_MASK));
  
         fsl_edma->dma_dev.residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT;
  
diff --git a/drivers/dma/fsl-qdma.c b/drivers/dma/fsl-qdma.c

index a1d0aa63142a981bb59fcde5663e53ee7355947c..5005e138fc239bf23a8a888c90e5ad720f697d3d 100644 (file)
--- a/drivers/dma/fsl-qdma.c
+++ b/drivers/dma/fsl-qdma.c
@@ -109,6 +109,7 @@
  #define FSL_QDMA_CMD_WTHROTL_OFFSET    20
  #define FSL_QDMA_CMD_DSEN_OFFSET       19
  #define FSL_QDMA_CMD_LWC_OFFSET                16
+#define FSL_QDMA_CMD_PF                        BIT(17)
  
  /* Field definition for Descriptor status */
  #define QDMA_CCDF_STATUS_RTE           BIT(5)
@@ -160,6 +161,10 @@ struct fsl_qdma_format {
                         u8 __reserved1[2];
                         u8 cfg8b_w1;
                 } __packed;
+               struct {
+                       __le32 __reserved2;
+                       __le32 cmd;
+               } __packed;
                 __le64 data;
         };
  } __packed;
@@ -354,7 +359,6 @@ static void fsl_qdma_free_chan_resources(struct dma_chan *chan)
  static void fsl_qdma_comp_fill_memcpy(struct fsl_qdma_comp *fsl_comp,
                                       dma_addr_t dst, dma_addr_t src, u32 len)
  {
-       u32 cmd;
         struct fsl_qdma_format *sdf, *ddf;
         struct fsl_qdma_format *ccdf, *csgf_desc, *csgf_src, *csgf_dest;
  
@@ -383,14 +387,11 @@ static void fsl_qdma_comp_fill_memcpy(struct fsl_qdma_comp *fsl_comp,
         /* This entry is the last entry. */
         qdma_csgf_set_f(csgf_dest, len);
         /* Descriptor Buffer */
-       cmd = cpu_to_le32(FSL_QDMA_CMD_RWTTYPE <<
-                         FSL_QDMA_CMD_RWTTYPE_OFFSET);
-       sdf->data = QDMA_SDDF_CMD(cmd);
-
-       cmd = cpu_to_le32(FSL_QDMA_CMD_RWTTYPE <<
-                         FSL_QDMA_CMD_RWTTYPE_OFFSET);
-       cmd |= cpu_to_le32(FSL_QDMA_CMD_LWC << FSL_QDMA_CMD_LWC_OFFSET);
-       ddf->data = QDMA_SDDF_CMD(cmd);
+       sdf->cmd = cpu_to_le32((FSL_QDMA_CMD_RWTTYPE << FSL_QDMA_CMD_RWTTYPE_OFFSET) |
+                              FSL_QDMA_CMD_PF);
+
+       ddf->cmd = cpu_to_le32((FSL_QDMA_CMD_RWTTYPE << FSL_QDMA_CMD_RWTTYPE_OFFSET) |
+                              (FSL_QDMA_CMD_LWC << FSL_QDMA_CMD_LWC_OFFSET));
  }
  
  /*
@@ -514,11 +515,11 @@ static struct fsl_qdma_queue
                         queue_temp = queue_head + i + (j * queue_num);
  
                         queue_temp->cq =
-                       dma_alloc_coherent(&pdev->dev,
-                                          sizeof(struct fsl_qdma_format) *
-                                          queue_size[i],
-                                          &queue_temp->bus_addr,
-                                          GFP_KERNEL);
+                       dmam_alloc_coherent(&pdev->dev,
+                                           sizeof(struct fsl_qdma_format) *
+                                           queue_size[i],
+                                           &queue_temp->bus_addr,
+                                           GFP_KERNEL);
                         if (!queue_temp->cq)
                                 return NULL;
                         queue_temp->block_base = fsl_qdma->block_base +
@@ -563,15 +564,14 @@ static struct fsl_qdma_queue
         /*
          * Buffer for queue command
          */
-       status_head->cq = dma_alloc_coherent(&pdev->dev,
-                                            sizeof(struct fsl_qdma_format) *
-                                            status_size,
-                                            &status_head->bus_addr,
-                                            GFP_KERNEL);
-       if (!status_head->cq) {
-               devm_kfree(&pdev->dev, status_head);
+       status_head->cq = dmam_alloc_coherent(&pdev->dev,
+                                             sizeof(struct fsl_qdma_format) *
+                                             status_size,
+                                             &status_head->bus_addr,
+                                             GFP_KERNEL);
+       if (!status_head->cq)
                 return NULL;
-       }
+
         status_head->n_cq = status_size;
         status_head->virt_head = status_head->cq;
         status_head->virt_tail = status_head->cq;
@@ -625,7 +625,7 @@ static int fsl_qdma_halt(struct fsl_qdma_engine *fsl_qdma)
  
  static int
  fsl_qdma_queue_transfer_complete(struct fsl_qdma_engine *fsl_qdma,
-                                void *block,
+                                __iomem void *block,
                                  int id)
  {
         bool duplicate;
@@ -1197,10 +1197,6 @@ static int fsl_qdma_probe(struct platform_device *pdev)
         if (!fsl_qdma->queue)
                 return -ENOMEM;
  
-       ret = fsl_qdma_irq_init(pdev, fsl_qdma);
-       if (ret)
-               return ret;
-
         fsl_qdma->irq_base = platform_get_irq_byname(pdev, "qdma-queue0");
         if (fsl_qdma->irq_base < 0)
                 return fsl_qdma->irq_base;
@@ -1239,16 +1235,19 @@ static int fsl_qdma_probe(struct platform_device *pdev)
  
         platform_set_drvdata(pdev, fsl_qdma);
  
-       ret = dma_async_device_register(&fsl_qdma->dma_dev);
+       ret = fsl_qdma_reg_init(fsl_qdma);
         if (ret) {
-               dev_err(&pdev->dev,
-                       "Can't register NXP Layerscape qDMA engine.\n");
+               dev_err(&pdev->dev, "Can't Initialize the qDMA engine.\n");
                 return ret;
         }
  
-       ret = fsl_qdma_reg_init(fsl_qdma);
+       ret = fsl_qdma_irq_init(pdev, fsl_qdma);
+       if (ret)
+               return ret;
+
+       ret = dma_async_device_register(&fsl_qdma->dma_dev);
         if (ret) {
-               dev_err(&pdev->dev, "Can't Initialize the qDMA engine.\n");
+               dev_err(&pdev->dev, "Can't register NXP Layerscape qDMA engine.\n");
                 return ret;
         }
  
@@ -1268,8 +1267,6 @@ static void fsl_qdma_cleanup_vchan(struct dma_device *dmadev)
  
  static void fsl_qdma_remove(struct platform_device *pdev)
  {
-       int i;
-       struct fsl_qdma_queue *status;
         struct device_node *np = pdev->dev.of_node;
         struct fsl_qdma_engine *fsl_qdma = platform_get_drvdata(pdev);
  
@@ -1277,12 +1274,6 @@ static void fsl_qdma_remove(struct platform_device *pdev)
         fsl_qdma_cleanup_vchan(&fsl_qdma->dma_dev);
         of_dma_controller_free(np);
         dma_async_device_unregister(&fsl_qdma->dma_dev);
-
-       for (i = 0; i < fsl_qdma->block_number; i++) {
-               status = fsl_qdma->status[i];
-               dma_free_coherent(&pdev->dev, sizeof(struct fsl_qdma_format) *
-                               status->n_cq, status->cq, status->bus_addr);
-       }
  }
  
  static const struct of_device_id fsl_qdma_dt_ids[] = {
diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c

index 77f8885cf4075acfd3ff535b7e09519a8df41c70..e5a94a93a3cc4e6da66aca64cc2174b20d80a7bb 100644 (file)
--- a/drivers/dma/idxd/cdev.c
+++ b/drivers/dma/idxd/cdev.c
@@ -345,7 +345,7 @@ static void idxd_cdev_evl_drain_pasid(struct idxd_wq *wq, u32 pasid)
         spin_lock(&evl->lock);
         status.bits = ioread64(idxd->reg_base + IDXD_EVLSTATUS_OFFSET);
         t = status.tail;
-       h = evl->head;
+       h = status.head;
         size = evl->size;
  
         while (h != t) {
diff --git a/drivers/dma/idxd/debugfs.c b/drivers/dma/idxd/debugfs.c

index 9cfbd9b14c4c43306326e857b8b3d982c612314f..f3f25ee676f30eb283989586d458a5c8b8c01f9f 100644 (file)
--- a/drivers/dma/idxd/debugfs.c
+++ b/drivers/dma/idxd/debugfs.c
@@ -68,9 +68,9 @@ static int debugfs_evl_show(struct seq_file *s, void *d)
  
         spin_lock(&evl->lock);
  
-       h = evl->head;
         evl_status.bits = ioread64(idxd->reg_base + IDXD_EVLSTATUS_OFFSET);
         t = evl_status.tail;
+       h = evl_status.head;
         evl_size = evl->size;
  
         seq_printf(s, "Event Log head %u tail %u interrupt pending %u\n\n",
diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h

index 47de3f93ff1e9a72eb718b07c05213d19ec1d23b..d0f5db6cf1eda103db09c31449cf3a58d58b7971 100644 (file)
--- a/drivers/dma/idxd/idxd.h
+++ b/drivers/dma/idxd/idxd.h
@@ -300,7 +300,6 @@ struct idxd_evl {
         unsigned int log_size;
         /* The number of entries in the event log. */
         u16 size;
-       u16 head;
         unsigned long *bmap;
         bool batch_fail[IDXD_MAX_BATCH_IDENT];
  };
diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c

index 14df1f1347a8dd83b82263438acf3fe613513564..4954adc6bb609e508c510daf630f1077191fd2c7 100644 (file)
--- a/drivers/dma/idxd/init.c
+++ b/drivers/dma/idxd/init.c
@@ -343,7 +343,9 @@ static void idxd_cleanup_internals(struct idxd_device *idxd)
  static int idxd_init_evl(struct idxd_device *idxd)
  {
         struct device *dev = &idxd->pdev->dev;
+       unsigned int evl_cache_size;
         struct idxd_evl *evl;
+       const char *idxd_name;
  
         if (idxd->hw.gen_cap.evl_support == 0)
                 return 0;
@@ -355,9 +357,16 @@ static int idxd_init_evl(struct idxd_device *idxd)
         spin_lock_init(&evl->lock);
         evl->size = IDXD_EVL_SIZE_MIN;
  
-       idxd->evl_cache = kmem_cache_create(dev_name(idxd_confdev(idxd)),
-                                           sizeof(struct idxd_evl_fault) + evl_ent_size(idxd),
-                                           0, 0, NULL);
+       idxd_name = dev_name(idxd_confdev(idxd));
+       evl_cache_size = sizeof(struct idxd_evl_fault) + evl_ent_size(idxd);
+       /*
+        * Since completion record in evl_cache will be copied to user
+        * when handling completion record page fault, need to create
+        * the cache suitable for user copy.
+        */
+       idxd->evl_cache = kmem_cache_create_usercopy(idxd_name, evl_cache_size,
+                                                    0, 0, 0, evl_cache_size,
+                                                    NULL);
         if (!idxd->evl_cache) {
                 kfree(evl);
                 return -ENOMEM;
diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c

index c8a0aa874b1153f845278e03e9e5153cc487c0fb..348aa21389a9fceb4cd522579c8f8a9963e72ef3 100644 (file)
--- a/drivers/dma/idxd/irq.c
+++ b/drivers/dma/idxd/irq.c
@@ -367,9 +367,9 @@ static void process_evl_entries(struct idxd_device *idxd)
         /* Clear interrupt pending bit */
         iowrite32(evl_status.bits_upper32,
                   idxd->reg_base + IDXD_EVLSTATUS_OFFSET + sizeof(u32));
-       h = evl->head;
         evl_status.bits = ioread64(idxd->reg_base + IDXD_EVLSTATUS_OFFSET);
         t = evl_status.tail;
+       h = evl_status.head;
         size = idxd->evl->size;
  
         while (h != t) {
@@ -378,7 +378,6 @@ static void process_evl_entries(struct idxd_device *idxd)
                 h = (h + 1) % size;
         }
  
-       evl->head = h;
         evl_status.head = h;
         iowrite32(evl_status.bits_lower32, idxd->reg_base + IDXD_EVLSTATUS_OFFSET);
         spin_unlock(&evl->lock);
diff --git a/drivers/dma/ptdma/ptdma-dmaengine.c b/drivers/dma/ptdma/ptdma-dmaengine.c

index 1aa65e5de0f3ad9bc0fa0907ebda8e8c0fe6d0ab..f792407348077dd9fe481cfdec6577a701493487 100644 (file)
--- a/drivers/dma/ptdma/ptdma-dmaengine.c
+++ b/drivers/dma/ptdma/ptdma-dmaengine.c
@@ -385,8 +385,6 @@ int pt_dmaengine_register(struct pt_device *pt)
         chan->vc.desc_free = pt_do_cleanup;
         vchan_init(&chan->vc, dma_dev);
  
-       dma_set_mask_and_coherent(pt->dev, DMA_BIT_MASK(64));
-
         ret = dma_async_device_register(dma_dev);
         if (ret)
                 goto err_reg;
diff --git a/drivers/dma/ti/edma.c b/drivers/dma/ti/edma.c

index f1f920861fa9d8937a34bc29906778720a8dbfd8..5f8d2e93ff3fb516ea6374e007110b057377f11e 100644 (file)
--- a/drivers/dma/ti/edma.c
+++ b/drivers/dma/ti/edma.c
@@ -2404,6 +2404,11 @@ static int edma_probe(struct platform_device *pdev)
         if (irq > 0) {
                 irq_name = devm_kasprintf(dev, GFP_KERNEL, "%s_ccint",
                                           dev_name(dev));
+               if (!irq_name) {
+                       ret = -ENOMEM;
+                       goto err_disable_pm;
+               }
+
                 ret = devm_request_irq(dev, irq, dma_irq_handler, 0, irq_name,
                                        ecc);
                 if (ret) {
@@ -2420,6 +2425,11 @@ static int edma_probe(struct platform_device *pdev)
         if (irq > 0) {
                 irq_name = devm_kasprintf(dev, GFP_KERNEL, "%s_ccerrint",
                                           dev_name(dev));
+               if (!irq_name) {
+                       ret = -ENOMEM;
+                       goto err_disable_pm;
+               }
+
                 ret = devm_request_irq(dev, irq, dma_ccerr_handler, 0, irq_name,
                                        ecc);
                 if (ret) {
diff --git a/drivers/dma/ti/k3-udma.c b/drivers/dma/ti/k3-udma.c

index 2841a539c264891cde8b8166d51d5bbf885a5d85..6400d06588a24d1aa54ceefc3e79ce7661e43f3a 100644 (file)
--- a/drivers/dma/ti/k3-udma.c
+++ b/drivers/dma/ti/k3-udma.c
@@ -3968,6 +3968,7 @@ static void udma_desc_pre_callback(struct virt_dma_chan *vc,
  {
         struct udma_chan *uc = to_udma_chan(&vc->chan);
         struct udma_desc *d;
+       u8 status;
  
         if (!vd)
                 return;
@@ -3977,12 +3978,12 @@ static void udma_desc_pre_callback(struct virt_dma_chan *vc,
         if (d->metadata_size)
                 udma_fetch_epib(uc, d);
  
-       /* Provide residue information for the client */
         if (result) {
                 void *desc_vaddr = udma_curr_cppi5_desc_vaddr(d, d->desc_idx);
  
                 if (cppi5_desc_get_type(desc_vaddr) ==
                     CPPI5_INFO0_DESC_TYPE_VAL_HOST) {
+                       /* Provide residue information for the client */
                         result->residue = d->residue -
                                           cppi5_hdesc_get_pktlen(desc_vaddr);
                         if (result->residue)
@@ -3991,7 +3992,12 @@ static void udma_desc_pre_callback(struct virt_dma_chan *vc,
                                 result->result = DMA_TRANS_NOERROR;
                 } else {
                         result->residue = 0;
-                       result->result = DMA_TRANS_NOERROR;
+                       /* Propagate TR Response errors to the client */
+                       status = d->hwdesc[0].tr_resp_base->status;
+                       if (status)
+                               result->result = DMA_TRANS_ABORTED;
+                       else
+                               result->result = DMA_TRANS_NOERROR;
                 }
         }
  }
diff --git a/drivers/dpll/dpll_core.c b/drivers/dpll/dpll_core.c

index 5152bd1b0daf599869195e81805fbb2709dbe6b4..241db366b2c74ae749f49612d86176b2f8f479c1 100644 (file)
--- a/drivers/dpll/dpll_core.c
+++ b/drivers/dpll/dpll_core.c
@@ -42,6 +42,11 @@ struct dpll_pin_registration {
         void *priv;
  };
  
+struct dpll_pin *netdev_dpll_pin(const struct net_device *dev)
+{
+       return rcu_dereference_rtnl(dev->dpll_pin);
+}
+
  struct dpll_device *dpll_device_get_by_id(int id)
  {
         if (xa_get_mark(&dpll_device_xa, id, DPLL_REGISTERED))
@@ -564,7 +569,7 @@ void dpll_pin_put(struct dpll_pin *pin)
                 xa_destroy(&pin->parent_refs);
                 xa_erase(&dpll_pin_xa, pin->id);
                 dpll_pin_prop_free(&pin->prop);
-               kfree(pin);
+               kfree_rcu(pin, rcu);
         }
         mutex_unlock(&dpll_lock);
  }
diff --git a/drivers/dpll/dpll_core.h b/drivers/dpll/dpll_core.h

index 717f715015c742238d5585fddc5cd267fbb0db9f..2b6d8ef1cdf36cff24328e497c49d667659dd0e6 100644 (file)
--- a/drivers/dpll/dpll_core.h
+++ b/drivers/dpll/dpll_core.h
@@ -47,6 +47,7 @@ struct dpll_device {
   * @prop:              pin properties copied from the registerer
   * @rclk_dev_name:     holds name of device when pin can recover clock from it
   * @refcount:          refcount
+ * @rcu:               rcu_head for kfree_rcu()
   **/
  struct dpll_pin {
         u32 id;
@@ -57,6 +58,7 @@ struct dpll_pin {
         struct xarray parent_refs;
         struct dpll_pin_properties prop;
         refcount_t refcount;
+       struct rcu_head rcu;
  };
  
  /**
diff --git a/drivers/dpll/dpll_netlink.c b/drivers/dpll/dpll_netlink.c

index 314bb377546519ef25987b2e6f77827f590fe5fe..4ca9ad16cd957aaefaf50a74bbeb27ab3f3d1ec7 100644 (file)
--- a/drivers/dpll/dpll_netlink.c
+++ b/drivers/dpll/dpll_netlink.c
@@ -1199,6 +1199,7 @@ int dpll_nl_pin_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
         unsigned long i;
         int ret = 0;
  
+       mutex_lock(&dpll_lock);
         xa_for_each_marked_start(&dpll_pin_xa, i, pin, DPLL_REGISTERED,
                                  ctx->idx) {
                 if (!dpll_pin_available(pin))
@@ -1218,6 +1219,8 @@ int dpll_nl_pin_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
                 }
                 genlmsg_end(skb, hdr);
         }
+       mutex_unlock(&dpll_lock);
+
         if (ret == -EMSGSIZE) {
                 ctx->idx = i;
                 return skb->len;
@@ -1373,6 +1376,7 @@ int dpll_nl_device_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
         unsigned long i;
         int ret = 0;
  
+       mutex_lock(&dpll_lock);
         xa_for_each_marked_start(&dpll_device_xa, i, dpll, DPLL_REGISTERED,
                                  ctx->idx) {
                 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
@@ -1389,6 +1393,8 @@ int dpll_nl_device_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
                 }
                 genlmsg_end(skb, hdr);
         }
+       mutex_unlock(&dpll_lock);
+
         if (ret == -EMSGSIZE) {
                 ctx->idx = i;
                 return skb->len;
@@ -1439,20 +1445,6 @@ dpll_unlock_doit(const struct genl_split_ops *ops, struct sk_buff *skb,
         mutex_unlock(&dpll_lock);
  }
  
-int dpll_lock_dumpit(struct netlink_callback *cb)
-{
-       mutex_lock(&dpll_lock);
-
-       return 0;
-}
-
-int dpll_unlock_dumpit(struct netlink_callback *cb)
-{
-       mutex_unlock(&dpll_lock);
-
-       return 0;
-}
-
  int dpll_pin_pre_doit(const struct genl_split_ops *ops, struct sk_buff *skb,
                       struct genl_info *info)
  {
diff --git a/drivers/dpll/dpll_nl.c b/drivers/dpll/dpll_nl.c

index eaee5be7aa642a9359c0b438e7157eec830b6519..1e95f5397cfce65270fbc88d8916a24386258047 100644 (file)
--- a/drivers/dpll/dpll_nl.c
+++ b/drivers/dpll/dpll_nl.c
@@ -95,9 +95,7 @@ static const struct genl_split_ops dpll_nl_ops[] = {
         },
         {
                 .cmd    = DPLL_CMD_DEVICE_GET,
-               .start  = dpll_lock_dumpit,
                 .dumpit = dpll_nl_device_get_dumpit,
-               .done   = dpll_unlock_dumpit,
                 .flags  = GENL_ADMIN_PERM | GENL_CMD_CAP_DUMP,
         },
         {
@@ -129,9 +127,7 @@ static const struct genl_split_ops dpll_nl_ops[] = {
         },
         {
                 .cmd            = DPLL_CMD_PIN_GET,
-               .start          = dpll_lock_dumpit,
                 .dumpit         = dpll_nl_pin_get_dumpit,
-               .done           = dpll_unlock_dumpit,
                 .policy         = dpll_pin_get_dump_nl_policy,
                 .maxattr        = DPLL_A_PIN_ID,
                 .flags          = GENL_ADMIN_PERM | GENL_CMD_CAP_DUMP,
diff --git a/drivers/dpll/dpll_nl.h b/drivers/dpll/dpll_nl.h

index 92d4c9c4f788dc1b36c7b076a980afd2903aba68..f491262bee4f0c16e97624353bef6a4938b761aa 100644 (file)
--- a/drivers/dpll/dpll_nl.h
+++ b/drivers/dpll/dpll_nl.h
@@ -30,8 +30,6 @@ dpll_post_doit(const struct genl_split_ops *ops, struct sk_buff *skb,
  void
  dpll_pin_post_doit(const struct genl_split_ops *ops, struct sk_buff *skb,
                    struct genl_info *info);
-int dpll_lock_dumpit(struct netlink_callback *cb);
-int dpll_unlock_dumpit(struct netlink_callback *cb);
  
  int dpll_nl_device_id_get_doit(struct sk_buff *skb, struct genl_info *info);
  int dpll_nl_device_get_doit(struct sk_buff *skb, struct genl_info *info);
diff --git a/drivers/firewire/core-card.c b/drivers/firewire/core-card.c

index 6ac5ff20a2fe22f1c3c5a7010ca0af87caf4c446..401a77e3b5fa8ed9e9b834c4a55cde98d2b2a8db 100644 (file)
--- a/drivers/firewire/core-card.c
+++ b/drivers/firewire/core-card.c
@@ -429,7 +429,23 @@ static void bm_work(struct work_struct *work)
          */
         card->bm_generation = generation;
  
-       if (root_device == NULL) {
+       if (card->gap_count == 0) {
+               /*
+                * If self IDs have inconsistent gap counts, do a
+                * bus reset ASAP. The config rom read might never
+                * complete, so don't wait for it. However, still
+                * send a PHY configuration packet prior to the
+                * bus reset. The PHY configuration packet might
+                * fail, but 1394-2008 8.4.5.2 explicitly permits
+                * it in this case, so it should be safe to try.
+                */
+               new_root_id = local_id;
+               /*
+                * We must always send a bus reset if the gap count
+                * is inconsistent, so bypass the 5-reset limit.
+                */
+               card->bm_retries = 0;
+       } else if (root_device == NULL) {
                 /*
                  * Either link_on is false, or we failed to read the
                  * config rom.  In either case, pick another root.
@@ -484,7 +500,19 @@ static void bm_work(struct work_struct *work)
                 fw_notice(card, "phy config: new root=%x, gap_count=%d\n",
                           new_root_id, gap_count);
                 fw_send_phy_config(card, new_root_id, generation, gap_count);
-               reset_bus(card, true);
+               /*
+                * Where possible, use a short bus reset to minimize
+                * disruption to isochronous transfers. But in the event
+                * of a gap count inconsistency, use a long bus reset.
+                *
+                * As noted in 1394a 8.4.6.2, nodes on a mixed 1394/1394a bus
+                * may set different gap counts after a bus reset. On a mixed
+                * 1394/1394a bus, a short bus reset can get doubled. Some
+                * nodes may treat the double reset as one bus reset and others
+                * may treat it as two, causing a gap count inconsistency
+                * again. Using a long bus reset prevents this.
+                */
+               reset_bus(card, card->gap_count != 0);
                 /* Will allocate broadcast channel after the reset. */
                 goto out;
         }
diff --git a/drivers/firewire/core-device.c b/drivers/firewire/core-device.c

index 0547253d16fe5dc5f606a34035473de7f271acf6..7d3346b3a2bf320910c72783ab857415d331ed14 100644 (file)
--- a/drivers/firewire/core-device.c
+++ b/drivers/firewire/core-device.c
@@ -118,10 +118,9 @@ static int textual_leaf_to_string(const u32 *block, char *buf, size_t size)
   * @buf:       where to put the string
   * @size:      size of @buf, in bytes
   *
- * The string is taken from a minimal ASCII text descriptor leaf after
- * the immediate entry with @key.  The string is zero-terminated.
- * An overlong string is silently truncated such that it and the
- * zero byte fit into @size.
+ * The string is taken from a minimal ASCII text descriptor leaf just after the entry with the
+ * @key. The string is zero-terminated. An overlong string is silently truncated such that it
+ * and the zero byte fit into @size.
   *
   * Returns strlen(buf) or a negative error code.
   */
@@ -368,8 +367,17 @@ static ssize_t show_text_leaf(struct device *dev,
         for (i = 0; i < ARRAY_SIZE(directories) && !!directories[i]; ++i) {
                 int result = fw_csr_string(directories[i], attr->key, buf, bufsize);
                 // Detected.
-               if (result >= 0)
+               if (result >= 0) {
                         ret = result;
+               } else if (i == 0 && attr->key == CSR_VENDOR) {
+                       // Sony DVMC-DA1 has configuration ROM such that the descriptor leaf entry
+                       // in the root directory follows to the directory entry for vendor ID
+                       // instead of the immediate value for vendor ID.
+                       result = fw_csr_string(directories[i], CSR_DIRECTORY | attr->key, buf,
+                                              bufsize);
+                       if (result >= 0)
+                               ret = result;
+               }
         }
  
         if (ret >= 0) {
diff --git a/drivers/firmware/efi/arm-runtime.c b/drivers/firmware/efi/arm-runtime.c

index 83f5bb57fa4c466334a90c2195c06ce7443d1b6a..83092d93f36a63087ffbd8b6460d38a824e9cbb1 100644 (file)
--- a/drivers/firmware/efi/arm-runtime.c
+++ b/drivers/firmware/efi/arm-runtime.c
@@ -107,7 +107,7 @@ static int __init arm_enable_runtime_services(void)
                 efi_memory_desc_t *md;
  
                 for_each_efi_memory_desc(md) {
-                       int md_size = md->num_pages << EFI_PAGE_SHIFT;
+                       u64 md_size = md->num_pages << EFI_PAGE_SHIFT;
                         struct resource *res;
  
                         if (!(md->attribute & EFI_MEMORY_SP))
diff --git a/drivers/firmware/efi/capsule-loader.c b/drivers/firmware/efi/capsule-loader.c

index 3e8d4b51a8140c16720eef8f08d311b024b1a830..97bafb5f7038924fb99eea6f5679b18b2d459e5a 100644 (file)
--- a/drivers/firmware/efi/capsule-loader.c
+++ b/drivers/firmware/efi/capsule-loader.c
@@ -292,7 +292,7 @@ static int efi_capsule_open(struct inode *inode, struct file *file)
                 return -ENOMEM;
         }
  
-       cap_info->phys = kzalloc(sizeof(void *), GFP_KERNEL);
+       cap_info->phys = kzalloc(sizeof(phys_addr_t), GFP_KERNEL);
         if (!cap_info->phys) {
                 kfree(cap_info->pages);
                 kfree(cap_info);
diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c

index 35c37f667781c7071c714aef274e68dbddca026b..9b3884ff81e699f2308a3cf618e774ad9a67e6a3 100644 (file)
--- a/drivers/firmware/efi/cper.c
+++ b/drivers/firmware/efi/cper.c
@@ -523,6 +523,17 @@ static void cper_print_tstamp(const char *pfx,
         }
  }
  
+struct ignore_section {
+       guid_t guid;
+       const char *name;
+};
+
+static const struct ignore_section ignore_sections[] = {
+       { .guid = CPER_SEC_CXL_GEN_MEDIA_GUID, .name = "CXL General Media Event" },
+       { .guid = CPER_SEC_CXL_DRAM_GUID, .name = "CXL DRAM Event" },
+       { .guid = CPER_SEC_CXL_MEM_MODULE_GUID, .name = "CXL Memory Module Event" },
+};
+
  static void
  cper_estatus_print_section(const char *pfx, struct acpi_hest_generic_data *gdata,
                            int sec_no)
@@ -543,6 +554,14 @@ cper_estatus_print_section(const char *pfx, struct acpi_hest_generic_data *gdata
                 printk("%s""fru_text: %.20s\n", pfx, gdata->fru_text);
  
         snprintf(newpfx, sizeof(newpfx), "%s ", pfx);
+
+       for (int i = 0; i < ARRAY_SIZE(ignore_sections); i++) {
+               if (guid_equal(sec_type, &ignore_sections[i].guid)) {
+                       printk("%ssection_type: %s\n", newpfx, ignore_sections[i].name);
+                       return;
+               }
+       }
+
         if (guid_equal(sec_type, &CPER_SEC_PROC_GENERIC)) {
                 struct cper_sec_proc_generic *proc_err = acpi_hest_get_payload(gdata);
  
diff --git a/drivers/firmware/efi/efi-init.c b/drivers/firmware/efi/efi-init.c

index d4987d013080174bda0e462f029c03192897bebb..a00e07b853f221721e1bcd2f801cadcc5bcb67cf 100644 (file)
--- a/drivers/firmware/efi/efi-init.c
+++ b/drivers/firmware/efi/efi-init.c
@@ -143,15 +143,6 @@ static __init int is_usable_memory(efi_memory_desc_t *md)
         case EFI_BOOT_SERVICES_DATA:
         case EFI_CONVENTIONAL_MEMORY:
         case EFI_PERSISTENT_MEMORY:
-               /*
-                * Special purpose memory is 'soft reserved', which means it
-                * is set aside initially, but can be hotplugged back in or
-                * be assigned to the dax driver after boot.
-                */
-               if (efi_soft_reserve_enabled() &&
-                   (md->attribute & EFI_MEMORY_SP))
-                       return false;
-
                 /*
                  * According to the spec, these regions are no longer reserved
                  * after calling ExitBootServices(). However, we can only use
@@ -196,6 +187,16 @@ static __init void reserve_regions(void)
                 size = npages << PAGE_SHIFT;
  
                 if (is_memory(md)) {
+                       /*
+                        * Special purpose memory is 'soft reserved', which
+                        * means it is set aside initially. Don't add a memblock
+                        * for it now so that it can be hotplugged back in or
+                        * be assigned to the dax driver after boot.
+                        */
+                       if (efi_soft_reserve_enabled() &&
+                           (md->attribute & EFI_MEMORY_SP))
+                               continue;
+
                         early_init_dt_add_memory_arch(paddr, size);
  
                         if (!is_usable_memory(md))
diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile

index 06964a3c130f6addeed20eca1ed26153a2260854..73f4810f6db38ecc933f9a6ac2bed5ae57709148 100644 (file)
--- a/drivers/firmware/efi/libstub/Makefile
+++ b/drivers/firmware/efi/libstub/Makefile
@@ -28,7 +28,7 @@ cflags-$(CONFIG_ARM)          += -DEFI_HAVE_STRLEN -DEFI_HAVE_STRNLEN \
                                    -DEFI_HAVE_MEMCHR -DEFI_HAVE_STRRCHR \
                                    -DEFI_HAVE_STRCMP -fno-builtin -fpic \
                                    $(call cc-option,-mno-single-pic-base)
-cflags-$(CONFIG_RISCV)         += -fpic -DNO_ALTERNATIVE
+cflags-$(CONFIG_RISCV)         += -fpic -DNO_ALTERNATIVE -mno-relax
  cflags-$(CONFIG_LOONGARCH)     += -fpie
  
  cflags-$(CONFIG_EFI_PARAMS_FROM_FDT)   += -I$(srctree)/scripts/dtc/libfdt
@@ -143,7 +143,7 @@ STUBCOPY_RELOC-$(CONFIG_ARM64)      := R_AARCH64_ABS
  # exist.
  STUBCOPY_FLAGS-$(CONFIG_RISCV) += --prefix-alloc-sections=.init \
                                    --prefix-symbols=__efistub_
-STUBCOPY_RELOC-$(CONFIG_RISCV) := R_RISCV_HI20
+STUBCOPY_RELOC-$(CONFIG_RISCV) := -E R_RISCV_HI20\|R_RISCV_$(BITS)\|R_RISCV_RELAX
  
  # For LoongArch, keep all the symbols in .init section and make sure that no
  # absolute symbols references exist.
diff --git a/drivers/firmware/efi/libstub/alignedmem.c b/drivers/firmware/efi/libstub/alignedmem.c

index 6b83c492c3b8260d52e16bb73a1d5abaa4cb943d..31928bd87e0fff5a0666234ef8328cf5d4f564df 100644 (file)
--- a/drivers/firmware/efi/libstub/alignedmem.c
+++ b/drivers/firmware/efi/libstub/alignedmem.c
@@ -14,6 +14,7 @@
   * @max:       the address that the last allocated memory page shall not
   *             exceed
   * @align:     minimum alignment of the base of the allocation
+ * @memory_type: the type of memory to allocate
   *
   * Allocate pages as EFI_LOADER_DATA. The allocated pages are aligned according
   * to @align, which should be >= EFI_ALLOC_ALIGN. The last allocated page will
diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h

index 212687c30d79c4b0b307af0b8d3c7b52502e6a95..c04b82ea40f2169b6764ff69a14ff3acc5a8795d 100644 (file)
--- a/drivers/firmware/efi/libstub/efistub.h
+++ b/drivers/firmware/efi/libstub/efistub.h
@@ -956,7 +956,8 @@ efi_status_t efi_get_random_bytes(unsigned long size, u8 *out);
  
  efi_status_t efi_random_alloc(unsigned long size, unsigned long align,
                               unsigned long *addr, unsigned long random_seed,
-                             int memory_type, unsigned long alloc_limit);
+                             int memory_type, unsigned long alloc_min,
+                             unsigned long alloc_max);
  
  efi_status_t efi_random_get_seed(void);
  
diff --git a/drivers/firmware/efi/libstub/kaslr.c b/drivers/firmware/efi/libstub/kaslr.c

index 62d63f7a2645bf82525d79b5d8825e9bea023404..1a9808012abd36ee7f58ad0baf818cbae6df1b0b 100644 (file)
--- a/drivers/firmware/efi/libstub/kaslr.c
+++ b/drivers/firmware/efi/libstub/kaslr.c
@@ -119,7 +119,7 @@ efi_status_t efi_kaslr_relocate_kernel(unsigned long *image_addr,
                  */
                 status = efi_random_alloc(*reserve_size, min_kimg_align,
                                           reserve_addr, phys_seed,
-                                         EFI_LOADER_CODE, EFI_ALLOC_LIMIT);
+                                         EFI_LOADER_CODE, 0, EFI_ALLOC_LIMIT);
                 if (status != EFI_SUCCESS)
                         efi_warn("efi_random_alloc() failed: 0x%lx\n", status);
         } else {
diff --git a/drivers/firmware/efi/libstub/randomalloc.c b/drivers/firmware/efi/libstub/randomalloc.c

index 674a064b8f7adc68edf2412bb8e012250077c717..4e96a855fdf47b5b064b63b729d7dc989cd2b949 100644 (file)
--- a/drivers/firmware/efi/libstub/randomalloc.c
+++ b/drivers/firmware/efi/libstub/randomalloc.c
@@ -17,7 +17,7 @@
  static unsigned long get_entry_num_slots(efi_memory_desc_t *md,
                                          unsigned long size,
                                          unsigned long align_shift,
-                                        u64 alloc_limit)
+                                        u64 alloc_min, u64 alloc_max)
  {
         unsigned long align = 1UL << align_shift;
         u64 first_slot, last_slot, region_end;
@@ -30,11 +30,11 @@ static unsigned long get_entry_num_slots(efi_memory_desc_t *md,
                 return 0;
  
         region_end = min(md->phys_addr + md->num_pages * EFI_PAGE_SIZE - 1,
-                        alloc_limit);
+                        alloc_max);
         if (region_end < size)
                 return 0;
  
-       first_slot = round_up(md->phys_addr, align);
+       first_slot = round_up(max(md->phys_addr, alloc_min), align);
         last_slot = round_down(region_end - size + 1, align);
  
         if (first_slot > last_slot)
@@ -56,7 +56,8 @@ efi_status_t efi_random_alloc(unsigned long size,
                               unsigned long *addr,
                               unsigned long random_seed,
                               int memory_type,
-                             unsigned long alloc_limit)
+                             unsigned long alloc_min,
+                             unsigned long alloc_max)
  {
         unsigned long total_slots = 0, target_slot;
         unsigned long total_mirrored_slots = 0;
@@ -78,7 +79,8 @@ efi_status_t efi_random_alloc(unsigned long size,
                 efi_memory_desc_t *md = (void *)map->map + map_offset;
                 unsigned long slots;
  
-               slots = get_entry_num_slots(md, size, ilog2(align), alloc_limit);
+               slots = get_entry_num_slots(md, size, ilog2(align), alloc_min,
+                                           alloc_max);
                 MD_NUM_SLOTS(md) = slots;
                 total_slots += slots;
                 if (md->attribute & EFI_MEMORY_MORE_RELIABLE)
diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c

index 0d510c9a06a45925922595f1e44c7ee3b2a170a6..99429bc4b0c7eb0c639b84934fe614f8f8cb5721 100644 (file)
--- a/drivers/firmware/efi/libstub/x86-stub.c
+++ b/drivers/firmware/efi/libstub/x86-stub.c
@@ -223,8 +223,8 @@ static void retrieve_apple_device_properties(struct boot_params *boot_params)
         }
  }
  
-void efi_adjust_memory_range_protection(unsigned long start,
-                                       unsigned long size)
+efi_status_t efi_adjust_memory_range_protection(unsigned long start,
+                                               unsigned long size)
  {
         efi_status_t status;
         efi_gcd_memory_space_desc_t desc;
@@ -236,13 +236,17 @@ void efi_adjust_memory_range_protection(unsigned long start,
         rounded_end = roundup(start + size, EFI_PAGE_SIZE);
  
         if (memattr != NULL) {
-               efi_call_proto(memattr, clear_memory_attributes, rounded_start,
-                              rounded_end - rounded_start, EFI_MEMORY_XP);
-               return;
+               status = efi_call_proto(memattr, clear_memory_attributes,
+                                       rounded_start,
+                                       rounded_end - rounded_start,
+                                       EFI_MEMORY_XP);
+               if (status != EFI_SUCCESS)
+                       efi_warn("Failed to clear EFI_MEMORY_XP attribute\n");
+               return status;
         }
  
         if (efi_dxe_table == NULL)
-               return;
+               return EFI_SUCCESS;
  
         /*
          * Don't modify memory region attributes, they are
@@ -255,7 +259,7 @@ void efi_adjust_memory_range_protection(unsigned long start,
                 status = efi_dxe_call(get_memory_space_descriptor, start, &desc);
  
                 if (status != EFI_SUCCESS)
-                       return;
+                       break;
  
                 next = desc.base_address + desc.length;
  
@@ -280,8 +284,10 @@ void efi_adjust_memory_range_protection(unsigned long start,
                                  unprotect_start,
                                  unprotect_start + unprotect_size,
                                  status);
+                       break;
                 }
         }
+       return EFI_SUCCESS;
  }
  
  static void setup_unaccepted_memory(void)
@@ -793,6 +799,7 @@ static efi_status_t efi_decompress_kernel(unsigned long *kernel_entry)
  
         status = efi_random_alloc(alloc_size, CONFIG_PHYSICAL_ALIGN, &addr,
                                   seed[0], EFI_LOADER_CODE,
+                                 LOAD_PHYSICAL_ADDR,
                                   EFI_X86_KERNEL_ALLOC_LIMIT);
         if (status != EFI_SUCCESS)
                 return status;
@@ -805,9 +812,7 @@ static efi_status_t efi_decompress_kernel(unsigned long *kernel_entry)
  
         *kernel_entry = addr + entry;
  
-       efi_adjust_memory_range_protection(addr, kernel_total_size);
-
-       return EFI_SUCCESS;
+       return efi_adjust_memory_range_protection(addr, kernel_total_size);
  }
  
  static void __noreturn enter_kernel(unsigned long kernel_addr,
diff --git a/drivers/firmware/efi/libstub/x86-stub.h b/drivers/firmware/efi/libstub/x86-stub.h

index 37c5a36b9d8cf9b2cad93f228502fd336d142908..1c20e99a6494423787ef1dd091739ed9cbc89a24 100644 (file)
--- a/drivers/firmware/efi/libstub/x86-stub.h
+++ b/drivers/firmware/efi/libstub/x86-stub.h
@@ -5,8 +5,8 @@
  extern void trampoline_32bit_src(void *, bool);
  extern const u16 trampoline_ljmp_imm_offset;
  
-void efi_adjust_memory_range_protection(unsigned long start,
-                                       unsigned long size);
+efi_status_t efi_adjust_memory_range_protection(unsigned long start,
+                                               unsigned long size);
  
  #ifdef CONFIG_X86_64
  efi_status_t efi_setup_5level_paging(void);
diff --git a/drivers/firmware/efi/libstub/zboot.c b/drivers/firmware/efi/libstub/zboot.c

index bdb17eac0cb401befbcc8b13820f9a3b416b6f19..1ceace956758682f592f6fe3f280b7260f7ca562 100644 (file)
--- a/drivers/firmware/efi/libstub/zboot.c
+++ b/drivers/firmware/efi/libstub/zboot.c
@@ -119,7 +119,7 @@ efi_zboot_entry(efi_handle_t handle, efi_system_table_t *systab)
                 }
  
                 status = efi_random_alloc(alloc_size, min_kimg_align, &image_base,
-                                         seed, EFI_LOADER_CODE, EFI_ALLOC_LIMIT);
+                                         seed, EFI_LOADER_CODE, 0, EFI_ALLOC_LIMIT);
                 if (status != EFI_SUCCESS) {
                         efi_err("Failed to allocate memory\n");
                         goto free_cmdline;
diff --git a/drivers/firmware/efi/riscv-runtime.c b/drivers/firmware/efi/riscv-runtime.c

index 09525fb5c240e6686ff5588c55998d5815e20ff7..01f0f90ea4183119b0a4eedf82a3fe81f1b2f480 100644 (file)
--- a/drivers/firmware/efi/riscv-runtime.c
+++ b/drivers/firmware/efi/riscv-runtime.c
@@ -85,7 +85,7 @@ static int __init riscv_enable_runtime_services(void)
                 efi_memory_desc_t *md;
  
                 for_each_efi_memory_desc(md) {
-                       int md_size = md->num_pages << EFI_PAGE_SHIFT;
+                       u64 md_size = md->num_pages << EFI_PAGE_SHIFT;
                         struct resource *res;
  
                         if (!(md->attribute & EFI_MEMORY_SP))
diff --git a/drivers/firmware/microchip/mpfs-auto-update.c b/drivers/firmware/microchip/mpfs-auto-update.c

index 81f5f62e34fce04fb6db2db11294f8281c58f5b7..682e417be5a3e49d3a4bc8c63bac963670f09af1 100644 (file)
--- a/drivers/firmware/microchip/mpfs-auto-update.c
+++ b/drivers/firmware/microchip/mpfs-auto-update.c
@@ -167,7 +167,7 @@ static int mpfs_auto_update_verify_image(struct fw_upload *fw_uploader)
         u32 *response_msg;
         int ret;
  
-       response_msg = devm_kzalloc(priv->dev, AUTO_UPDATE_FEATURE_RESP_SIZE * sizeof(response_msg),
+       response_msg = devm_kzalloc(priv->dev, AUTO_UPDATE_FEATURE_RESP_SIZE * sizeof(*response_msg),
                                     GFP_KERNEL);
         if (!response_msg)
                 return -ENOMEM;
diff --git a/drivers/gpio/gpio-74x164.c b/drivers/gpio/gpio-74x164.c

index e00c333105170f5a2a702593feab340ddc4a7d8e..753e7be039e4d9cd830190d75d8b62ca1219ec96 100644 (file)
--- a/drivers/gpio/gpio-74x164.c
+++ b/drivers/gpio/gpio-74x164.c
@@ -127,8 +127,6 @@ static int gen_74x164_probe(struct spi_device *spi)
         if (IS_ERR(chip->gpiod_oe))
                 return PTR_ERR(chip->gpiod_oe);
  
-       gpiod_set_value_cansleep(chip->gpiod_oe, 1);
-
         spi_set_drvdata(spi, chip);
  
         chip->gpio_chip.label = spi->modalias;
@@ -153,6 +151,8 @@ static int gen_74x164_probe(struct spi_device *spi)
                 goto exit_destroy;
         }
  
+       gpiod_set_value_cansleep(chip->gpiod_oe, 1);
+
         ret = gpiochip_add_data(&chip->gpio_chip, chip);
         if (!ret)
                 return 0;
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c

index 44c8f5743a2416087b523e973967e993e8a192a1..75be4a3ca7f8443f55a68aff5185044bbbdaa367 100644 (file)
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -968,11 +968,11 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data,
  
         ret = gpiochip_irqchip_init_valid_mask(gc);
         if (ret)
-               goto err_remove_acpi_chip;
+               goto err_free_hogs;
  
         ret = gpiochip_irqchip_init_hw(gc);
         if (ret)
-               goto err_remove_acpi_chip;
+               goto err_remove_irqchip_mask;
  
         ret = gpiochip_add_irqchip(gc, lock_key, request_key);
         if (ret)
@@ -997,23 +997,23 @@ err_remove_irqchip:
         gpiochip_irqchip_remove(gc);
  err_remove_irqchip_mask:
         gpiochip_irqchip_free_valid_mask(gc);
-err_remove_acpi_chip:
+err_free_hogs:
+       gpiochip_free_hogs(gc);
         acpi_gpiochip_remove(gc);
+       gpiochip_remove_pin_ranges(gc);
  err_remove_of_chip:
-       gpiochip_free_hogs(gc);
         of_gpiochip_remove(gc);
  err_free_gpiochip_mask:
-       gpiochip_remove_pin_ranges(gc);
         gpiochip_free_valid_mask(gc);
+err_remove_from_list:
+       spin_lock_irqsave(&gpio_lock, flags);
+       list_del(&gdev->list);
+       spin_unlock_irqrestore(&gpio_lock, flags);
         if (gdev->dev.release) {
                 /* release() has been registered by gpiochip_setup_dev() */
                 gpio_device_put(gdev);
                 goto err_print_message;
         }
-err_remove_from_list:
-       spin_lock_irqsave(&gpio_lock, flags);
-       list_del(&gdev->list);
-       spin_unlock_irqrestore(&gpio_lock, flags);
  err_free_label:
         kfree_const(gdev->label);
  err_free_descs:
@@ -2042,6 +2042,11 @@ EXPORT_SYMBOL_GPL(gpiochip_generic_free);
  int gpiochip_generic_config(struct gpio_chip *gc, unsigned int offset,
                             unsigned long config)
  {
+#ifdef CONFIG_PINCTRL
+       if (list_empty(&gc->gpiodev->pin_ranges))
+               return -ENOTSUPP;
+#endif
+
         return pinctrl_gpio_set_config(gc, offset, config);
  }
  EXPORT_SYMBOL_GPL(gpiochip_generic_config);
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig

index 2520db0b776e1bccf213fd541baf6275dbb192eb..c7edba18a6f09c4d3c75af737d94737a0e6f2890 100644 (file)
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -199,7 +199,7 @@ config DRM_TTM
  config DRM_TTM_KUNIT_TEST
          tristate "KUnit tests for TTM" if !KUNIT_ALL_TESTS
          default n
-        depends on DRM && KUNIT && MMU
+        depends on DRM && KUNIT && MMU && (UML || COMPILE_TEST)
          select DRM_TTM
          select DRM_EXPORT_FOR_TESTS if m
          select DRM_KUNIT_TEST_HELPERS
@@ -207,7 +207,8 @@ config DRM_TTM_KUNIT_TEST
          help
            Enables unit tests for TTM, a GPU memory manager subsystem used
            to manage memory buffers. This option is mostly useful for kernel
-          developers.
+          developers. It depends on (UML || COMPILE_TEST) since no other driver
+          which uses TTM can be loaded while running the tests.
  
            If in doubt, say "N".
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h

index 3d8a48f46b015613dc44517ebd20d5250df5a3b1..79827a6dcd7f5cbbf30d61f6701ecee8ae6614fa 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -200,6 +200,7 @@ extern uint amdgpu_dc_debug_mask;
  extern uint amdgpu_dc_visual_confirm;
  extern uint amdgpu_dm_abm_level;
  extern int amdgpu_backlight;
+extern int amdgpu_damage_clips;
  extern struct amdgpu_mgpu_info mgpu_info;
  extern int amdgpu_ras_enable;
  extern uint amdgpu_ras_mask;
@@ -1078,6 +1079,8 @@ struct amdgpu_device {
         bool                            in_s3;
         bool                            in_s4;
         bool                            in_s0ix;
+       /* indicate amdgpu suspension status */
+       bool                            suspend_complete;
  
         enum pp_mp1_state               mp1_state;
         struct amdgpu_doorbell_index doorbell_index;
@@ -1547,9 +1550,11 @@ static inline int amdgpu_acpi_smart_shift_update(struct drm_device *dev,
  #if defined(CONFIG_ACPI) && defined(CONFIG_SUSPEND)
  bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev);
  bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev);
+void amdgpu_choose_low_power_state(struct amdgpu_device *adev);
  #else
  static inline bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) { return false; }
  static inline bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev) { return false; }
+static inline void amdgpu_choose_low_power_state(struct amdgpu_device *adev) { }
  #endif
  
  #if defined(CONFIG_DRM_AMD_DC)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c

index 2deebece810e78a7ce039772a839684f570bceca..7099ff9cf8c50d7b7ea96149bcef235368fae165 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
@@ -1519,4 +1519,22 @@ bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev)
  #endif /* CONFIG_AMD_PMC */
  }
  
+/**
+ * amdgpu_choose_low_power_state
+ *
+ * @adev: amdgpu_device_pointer
+ *
+ * Choose the target low power state for the GPU
+ */
+void amdgpu_choose_low_power_state(struct amdgpu_device *adev)
+{
+       if (adev->in_runpm)
+               return;
+
+       if (amdgpu_acpi_is_s0ix_active(adev))
+               adev->in_s0ix = true;
+       else if (amdgpu_acpi_is_s3_active(adev))
+               adev->in_s3 = true;
+}
+
  #endif /* CONFIG_SUSPEND */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c

index 77e2636602887034c188ec695591d20e5b087b60..41db030ddc4ee9c98ba952b4b91d6292f7c457d6 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -141,11 +141,31 @@ static void amdgpu_amdkfd_reset_work(struct work_struct *work)
  static const struct drm_client_funcs kfd_client_funcs = {
         .unregister     = drm_client_release,
  };
+
+int amdgpu_amdkfd_drm_client_create(struct amdgpu_device *adev)
+{
+       int ret;
+
+       if (!adev->kfd.init_complete)
+               return 0;
+
+       ret = drm_client_init(&adev->ddev, &adev->kfd.client, "kfd",
+                             &kfd_client_funcs);
+       if (ret) {
+               dev_err(adev->dev, "Failed to init DRM client: %d\n",
+                       ret);
+               return ret;
+       }
+
+       drm_client_register(&adev->kfd.client);
+
+       return 0;
+}
+
  void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
  {
         int i;
         int last_valid_bit;
-       int ret;
  
         amdgpu_amdkfd_gpuvm_init_mem_limits();
  
@@ -164,12 +184,6 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
                         .enable_mes = adev->enable_mes,
                 };
  
-               ret = drm_client_init(&adev->ddev, &adev->kfd.client, "kfd", &kfd_client_funcs);
-               if (ret) {
-                       dev_err(adev->dev, "Failed to init DRM client: %d\n", ret);
-                       return;
-               }
-
                 /* this is going to have a few of the MSBs set that we need to
                  * clear
                  */
@@ -208,10 +222,6 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
  
                 adev->kfd.init_complete = kgd2kfd_device_init(adev->kfd.dev,
                                                         &gpu_resources);
-               if (adev->kfd.init_complete)
-                       drm_client_register(&adev->kfd.client);
-               else
-                       drm_client_release(&adev->kfd.client);
  
                 amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size;
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h

index f262b9d89541a8a971a394b5f0da0f6a1368ba65..27c61c535e297931892902f1abb9e56ca6feea5c 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -182,6 +182,8 @@ int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev,
  struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
                                 struct mm_struct *mm,
                                 struct svm_range_bo *svm_bo);
+
+int amdgpu_amdkfd_drm_client_create(struct amdgpu_device *adev);
  #if defined(CONFIG_DEBUG_FS)
  int kfd_debugfs_kfd_mem_limits(struct seq_file *m, void *data);
  #endif
@@ -301,7 +303,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(struct amdgpu_device *adev,
                                           struct kgd_mem *mem, void *drm_priv);
  int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
                 struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv);
-void amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv);
+int amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv);
  int amdgpu_amdkfd_gpuvm_sync_memory(
                 struct amdgpu_device *adev, struct kgd_mem *mem, bool intr);
  int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_mem *mem,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c

index 899e31e3a5e81d2be343a668e295a564efee10af..3a3f3ce09f00dbe77f61455f24fed7bd0db0dec5 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
@@ -290,7 +290,7 @@ static int suspend_resume_compute_scheduler(struct amdgpu_device *adev, bool sus
         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
  
-               if (!(ring && drm_sched_wqueue_ready(&ring->sched)))
+               if (!amdgpu_ring_sched_ready(ring))
                         continue;
  
                 /* stop secheduler and drain ring. */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

index f183d7faeeece16cfc7c211f5a6a0232dce37c36..231fd927dcfbee0db07e3a5d28eed2b24ff82b9c 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -2085,21 +2085,35 @@ out:
         return ret;
  }
  
-void amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv)
+int amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv)
  {
         struct kfd_mem_attachment *entry;
         struct amdgpu_vm *vm;
+       int ret;
  
         vm = drm_priv_to_vm(drm_priv);
  
         mutex_lock(&mem->lock);
  
+       ret = amdgpu_bo_reserve(mem->bo, true);
+       if (ret)
+               goto out;
+
         list_for_each_entry(entry, &mem->attachments, list) {
-               if (entry->bo_va->base.vm == vm)
-                       kfd_mem_dmaunmap_attachment(mem, entry);
+               if (entry->bo_va->base.vm != vm)
+                       continue;
+               if (entry->bo_va->base.bo->tbo.ttm &&
+                   !entry->bo_va->base.bo->tbo.ttm->sg)
+                       continue;
+
+               kfd_mem_dmaunmap_attachment(mem, entry);
         }
  
+       amdgpu_bo_unreserve(mem->bo);
+out:
         mutex_unlock(&mem->lock);
+
+       return ret;
  }
  
  int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c

index e485dd3357c63fd225b3fb7e3847675749f018da..1afbb2e932c6b58a9e26cbabe61370151373a4af 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -1678,7 +1678,7 @@ static int amdgpu_debugfs_test_ib_show(struct seq_file *m, void *unused)
         for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
                 struct amdgpu_ring *ring = adev->rings[i];
  
-               if (!ring || !drm_sched_wqueue_ready(&ring->sched))
+               if (!amdgpu_ring_sched_ready(ring))
                         continue;
                 drm_sched_wqueue_stop(&ring->sched);
         }
@@ -1694,7 +1694,7 @@ static int amdgpu_debugfs_test_ib_show(struct seq_file *m, void *unused)
         for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
                 struct amdgpu_ring *ring = adev->rings[i];
  
-               if (!ring || !drm_sched_wqueue_ready(&ring->sched))
+               if (!amdgpu_ring_sched_ready(ring))
                         continue;
                 drm_sched_wqueue_start(&ring->sched);
         }
@@ -1916,8 +1916,8 @@ static int amdgpu_debugfs_ib_preempt(void *data, u64 val)
  
         ring = adev->rings[val];
  
-       if (!ring || !ring->funcs->preempt_ib ||
-           !drm_sched_wqueue_ready(&ring->sched))
+       if (!amdgpu_ring_sched_ready(ring) ||
+           !ring->funcs->preempt_ib)
                 return -EINVAL;
  
         /* the last preemption failed */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

index b158d27d0a71cbbafb55f0d58657c1ec178fa6c2..94bdb5fa6ebc6ac7715a64191b5050a9670bf673 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4121,23 +4121,13 @@ int amdgpu_device_init(struct amdgpu_device *adev,
                                 }
                         }
                 } else {
-                       switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
-                       case IP_VERSION(13, 0, 0):
-                       case IP_VERSION(13, 0, 7):
-                       case IP_VERSION(13, 0, 10):
-                               r = psp_gpu_reset(adev);
-                               break;
-                       default:
-                               tmp = amdgpu_reset_method;
-                               /* It should do a default reset when loading or reloading the driver,
-                                * regardless of the module parameter reset_method.
-                                */
-                               amdgpu_reset_method = AMD_RESET_METHOD_NONE;
-                               r = amdgpu_asic_reset(adev);
-                               amdgpu_reset_method = tmp;
-                               break;
-                       }
-
+                       tmp = amdgpu_reset_method;
+                       /* It should do a default reset when loading or reloading the driver,
+                        * regardless of the module parameter reset_method.
+                        */
+                       amdgpu_reset_method = AMD_RESET_METHOD_NONE;
+                       r = amdgpu_asic_reset(adev);
+                       amdgpu_reset_method = tmp;
                         if (r) {
                                 dev_err(adev->dev, "asic reset on init failed\n");
                                 goto failed;
@@ -4524,13 +4514,15 @@ int amdgpu_device_prepare(struct drm_device *dev)
         struct amdgpu_device *adev = drm_to_adev(dev);
         int i, r;
  
+       amdgpu_choose_low_power_state(adev);
+
         if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
                 return 0;
  
         /* Evict the majority of BOs before starting suspend sequence */
         r = amdgpu_device_evict_resources(adev);
         if (r)
-               return r;
+               goto unprepare;
  
         for (i = 0; i < adev->num_ip_blocks; i++) {
                 if (!adev->ip_blocks[i].status.valid)
@@ -4539,10 +4531,15 @@ int amdgpu_device_prepare(struct drm_device *dev)
                         continue;
                 r = adev->ip_blocks[i].version->funcs->prepare_suspend((void *)adev);
                 if (r)
-                       return r;
+                       goto unprepare;
         }
  
         return 0;
+
+unprepare:
+       adev->in_s0ix = adev->in_s3 = false;
+
+       return r;
  }
  
  /**
@@ -4579,7 +4576,6 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
                 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
  
         cancel_delayed_work_sync(&adev->delayed_init_work);
-       flush_delayed_work(&adev->gfx.gfx_off_delay_work);
  
         amdgpu_ras_suspend(adev);
  
@@ -5031,7 +5027,7 @@ bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
         for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
                 struct amdgpu_ring *ring = adev->rings[i];
  
-               if (!ring || !drm_sched_wqueue_ready(&ring->sched))
+               if (!amdgpu_ring_sched_ready(ring))
                         continue;
  
                 spin_lock(&ring->sched.job_list_lock);
@@ -5170,7 +5166,7 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
         for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
                 struct amdgpu_ring *ring = adev->rings[i];
  
-               if (!ring || !drm_sched_wqueue_ready(&ring->sched))
+               if (!amdgpu_ring_sched_ready(ring))
                         continue;
  
                 /* Clear job fence from fence drv to avoid force_completion
@@ -5637,7 +5633,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
                 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
                         struct amdgpu_ring *ring = tmp_adev->rings[i];
  
-                       if (!ring || !drm_sched_wqueue_ready(&ring->sched))
+                       if (!amdgpu_ring_sched_ready(ring))
                                 continue;
  
                         drm_sched_stop(&ring->sched, job ? &job->base : NULL);
@@ -5706,7 +5702,7 @@ skip_hw_reset:
                 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
                         struct amdgpu_ring *ring = tmp_adev->rings[i];
  
-                       if (!ring || !drm_sched_wqueue_ready(&ring->sched))
+                       if (!amdgpu_ring_sched_ready(ring))
                                 continue;
  
                         drm_sched_start(&ring->sched, true);
@@ -6061,7 +6057,7 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta
                 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
                         struct amdgpu_ring *ring = adev->rings[i];
  
-                       if (!ring || !drm_sched_wqueue_ready(&ring->sched))
+                       if (!amdgpu_ring_sched_ready(ring))
                                 continue;
  
                         drm_sched_stop(&ring->sched, NULL);
@@ -6189,7 +6185,7 @@ void amdgpu_pci_resume(struct pci_dev *pdev)
         for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
                 struct amdgpu_ring *ring = adev->rings[i];
  
-               if (!ring || !drm_sched_wqueue_ready(&ring->sched))
+               if (!amdgpu_ring_sched_ready(ring))
                         continue;
  
                 drm_sched_start(&ring->sched, true);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c

index cc69005f5b46e7b9f06d65db13287a617cc384e2..586f4d03039dfb5177a27fce81fbdbead88e0235 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -211,6 +211,7 @@ int amdgpu_seamless = -1; /* auto */
  uint amdgpu_debug_mask;
  int amdgpu_agp = -1; /* auto */
  int amdgpu_wbrf = -1;
+int amdgpu_damage_clips = -1; /* auto */
  
  static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work);
  
@@ -859,6 +860,18 @@ int amdgpu_backlight = -1;
  MODULE_PARM_DESC(backlight, "Backlight control (0 = pwm, 1 = aux, -1 auto (default))");
  module_param_named(backlight, amdgpu_backlight, bint, 0444);
  
+/**
+ * DOC: damageclips (int)
+ * Enable or disable damage clips support. If damage clips support is disabled,
+ * we will force full frame updates, irrespective of what user space sends to
+ * us.
+ *
+ * Defaults to -1 (where it is enabled unless a PSR-SU display is detected).
+ */
+MODULE_PARM_DESC(damageclips,
+                "Damage clips support (0 = disable, 1 = enable, -1 auto (default))");
+module_param_named(damageclips, amdgpu_damage_clips, int, 0444);
+
  /**
   * DOC: tmz (int)
   * Trusted Memory Zone (TMZ) is a method to protect data being written
@@ -2255,6 +2268,10 @@ retry_init:
         if (ret)
                 goto err_pci;
  
+       ret = amdgpu_amdkfd_drm_client_create(adev);
+       if (ret)
+               goto err_pci;
+
         /*
          * 1. don't init fbdev on hw without DCE
          * 2. don't init fbdev if there are no connectors
@@ -2472,6 +2489,7 @@ static int amdgpu_pmops_suspend(struct device *dev)
         struct drm_device *drm_dev = dev_get_drvdata(dev);
         struct amdgpu_device *adev = drm_to_adev(drm_dev);
  
+       adev->suspend_complete = false;
         if (amdgpu_acpi_is_s0ix_active(adev))
                 adev->in_s0ix = true;
         else if (amdgpu_acpi_is_s3_active(adev))
@@ -2486,6 +2504,7 @@ static int amdgpu_pmops_suspend_noirq(struct device *dev)
         struct drm_device *drm_dev = dev_get_drvdata(dev);
         struct amdgpu_device *adev = drm_to_adev(drm_dev);
  
+       adev->suspend_complete = true;
         if (amdgpu_acpi_should_gpu_reset(adev))
                 return amdgpu_asic_reset(adev);
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c

index b9674c57c4365fb5ebdf9644fc4ac0a31b955da8..6ddc8e3360e220644618b26059d735e6bbda10e4 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -723,8 +723,15 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable)
  
                 if (adev->gfx.gfx_off_req_count == 0 &&
                     !adev->gfx.gfx_off_state) {
-                       schedule_delayed_work(&adev->gfx.gfx_off_delay_work,
+                       /* If going to s2idle, no need to wait */
+                       if (adev->in_s0ix) {
+                               if (!amdgpu_dpm_set_powergating_by_smu(adev,
+                                               AMD_IP_BLOCK_TYPE_GFX, true))
+                                       adev->gfx.gfx_off_state = true;
+                       } else {
+                               schedule_delayed_work(&adev->gfx.gfx_off_delay_work,
                                               delay);
+                       }
                 }
         } else {
                 if (adev->gfx.gfx_off_req_count == 0) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c

index 468a67b302d4c140c9d7cf09bc92566404180e75..ca5c86e5f7cd671a651d61357ab52d3c53a1e7f3 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c
@@ -362,7 +362,7 @@ static ssize_t ta_if_invoke_debugfs_write(struct file *fp, const char *buf, size
                 }
         }
  
-       if (copy_to_user((char *)buf, context->mem_context.shared_buf, shared_buf_len))
+       if (copy_to_user((char *)&buf[copy_pos], context->mem_context.shared_buf, shared_buf_len))
                 ret = -EFAULT;
  
  err_free_shared_buf:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c

index 45424ebf9681430fefc21bdc33d6aa2c6e5f6c91..5505d646f43aa8f963d8d8732846b00fc612a3a7 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -635,6 +635,7 @@ int amdgpu_ring_test_helper(struct amdgpu_ring *ring)
                               ring->name);
  
         ring->sched.ready = !r;
+
         return r;
  }
  
@@ -717,3 +718,14 @@ void amdgpu_ring_ib_on_emit_de(struct amdgpu_ring *ring)
         if (ring->is_sw_ring)
                 amdgpu_sw_ring_ib_mark_offset(ring, AMDGPU_MUX_OFFSET_TYPE_DE);
  }
+
+bool amdgpu_ring_sched_ready(struct amdgpu_ring *ring)
+{
+       if (!ring)
+               return false;
+
+       if (ring->no_scheduler || !drm_sched_wqueue_ready(&ring->sched))
+               return false;
+
+       return true;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h

index bbb53720a0181d93cf9fdfd6f7721ee006699004..fe1a61eb6e4c0809c1bccd41bc89f32bcd8304f2 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -450,5 +450,5 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
  int amdgpu_ib_pool_init(struct amdgpu_device *adev);
  void amdgpu_ib_pool_fini(struct amdgpu_device *adev);
  int amdgpu_ib_ring_tests(struct amdgpu_device *adev);
-
+bool amdgpu_ring_sched_ready(struct amdgpu_ring *ring);
  #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c

index 6f7c031dd197a22e388ddcfaed56ec75e37cafe5..f24e34dc33d1defcd70cab67f1423dffd31e8f08 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c
@@ -204,6 +204,12 @@ static u32 cik_ih_get_wptr(struct amdgpu_device *adev,
                 tmp = RREG32(mmIH_RB_CNTL);
                 tmp |= IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK;
                 WREG32(mmIH_RB_CNTL, tmp);
+
+               /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+                * can be detected.
+                */
+               tmp &= ~IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK;
+               WREG32(mmIH_RB_CNTL, tmp);
         }
         return (wptr & ih->ptr_mask);
  }
diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c

index b8c47e0cf37ad53bcb3f1afe161e6356b91789e3..c19681492efa748bf7b5d92864dbdc61c0351520 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c
@@ -216,6 +216,11 @@ static u32 cz_ih_get_wptr(struct amdgpu_device *adev,
         tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
         WREG32(mmIH_RB_CNTL, tmp);
  
+       /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+        * can be detected.
+        */
+       tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
+       WREG32(mmIH_RB_CNTL, tmp);
  
  out:
         return (wptr & ih->ptr_mask);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c

index ecb622b7f9709c1a0f9e5307b3dad1da7f0f8f05..dcdecb18b2306b84ca1b18852837409776707c69 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -4027,8 +4027,6 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
                 err = 0;
                 adev->gfx.mec2_fw = NULL;
         }
-       amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2);
-       amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2_JT);
  
         gfx_v10_0_check_fw_write_wait(adev);
  out:
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c

index d9cf9fd03d30010df0827033c4ceb6bbd21d3afa..4f3bfdc75b37d66cbc5d78a5525a8a905eb1e733 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -107,23 +107,6 @@ static const struct soc15_reg_golden golden_settings_gc_11_0_1[] =
         SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL2, 0xfcffffff, 0x0000000a)
  };
  
-static const struct soc15_reg_golden golden_settings_gc_11_5_0[] = {
-       SOC15_REG_GOLDEN_VALUE(GC, 0, regDB_DEBUG5, 0xffffffff, 0x00000800),
-       SOC15_REG_GOLDEN_VALUE(GC, 0, regGB_ADDR_CONFIG, 0x0c1807ff, 0x00000242),
-       SOC15_REG_GOLDEN_VALUE(GC, 0, regGCR_GENERAL_CNTL, 0x1ff1ffff, 0x00000500),
-       SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xfffffff3),
-       SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xfffffff3),
-       SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL, 0xffffffff, 0xf37fff3f),
-       SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL3, 0xfffffffb, 0x00f40188),
-       SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL4, 0xf0ffffff, 0x80009007),
-       SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_CL_ENHANCE, 0xf1ffffff, 0x00880007),
-       SOC15_REG_GOLDEN_VALUE(GC, 0, regPC_CONFIG_CNTL_1, 0xffffffff, 0x00010000),
-       SOC15_REG_GOLDEN_VALUE(GC, 0, regTA_CNTL_AUX, 0xf7f7ffff, 0x01030000),
-       SOC15_REG_GOLDEN_VALUE(GC, 0, regTA_CNTL2, 0x007f0000, 0x00000000),
-       SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL2, 0xffcfffff, 0x0000200a),
-       SOC15_REG_GOLDEN_VALUE(GC, 0, regUTCL1_CTRL_2, 0xffffffff, 0x0000048f)
-};
-
  #define DEFAULT_SH_MEM_CONFIG \
         ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \
          (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \
@@ -304,11 +287,6 @@ static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev)
                                                 golden_settings_gc_11_0_1,
                                                 (const u32)ARRAY_SIZE(golden_settings_gc_11_0_1));
                 break;
-       case IP_VERSION(11, 5, 0):
-               soc15_program_register_sequence(adev,
-                                               golden_settings_gc_11_5_0,
-                                               (const u32)ARRAY_SIZE(golden_settings_gc_11_5_0));
-               break;
         default:
                 break;
         }
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c

index 69c500910746018281471ad6d27350aaf2461702..3bc6943365a4ff36a32827ae2d477aac6883631d 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -3034,6 +3034,14 @@ static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
  
         gfx_v9_0_cp_gfx_enable(adev, true);
  
+       /* Now only limit the quirk on the APU gfx9 series and already
+        * confirmed that the APU gfx10/gfx11 needn't such update.
+        */
+       if (adev->flags & AMD_IS_APU &&
+                       adev->in_s3 && !adev->suspend_complete) {
+               DRM_INFO(" Will skip the CSB packet resubmit\n");
+               return 0;
+       }
         r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
         if (r) {
                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c

index 42e103d7077d52d5bbe556f70f2b03bb0d5ae8db..59d9215e555629577b43afcba38e945f5ce90bcd 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
@@ -915,8 +915,8 @@ static int gmc_v6_0_hw_init(void *handle)
  
         if (amdgpu_emu_mode == 1)
                 return amdgpu_gmc_vram_checking(adev);
-       else
-               return r;
+
+       return 0;
  }
  
  static int gmc_v6_0_hw_fini(void *handle)
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c

index efc16e580f1e27e384b7c80323c72d0e59fba473..45a2f8e031a2c9920f3a68ae690731357f33da0c 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -1099,8 +1099,8 @@ static int gmc_v7_0_hw_init(void *handle)
  
         if (amdgpu_emu_mode == 1)
                 return amdgpu_gmc_vram_checking(adev);
-       else
-               return r;
+
+       return 0;
  }
  
  static int gmc_v7_0_hw_fini(void *handle)
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c

index ff4ae73d27ecd26aaf399bdfe158e22c1de3009f..4422b27a3cc2fc069a6ecb3e6d8b9630e9c173cc 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -1219,8 +1219,8 @@ static int gmc_v8_0_hw_init(void *handle)
  
         if (amdgpu_emu_mode == 1)
                 return amdgpu_gmc_vram_checking(adev);
-       else
-               return r;
+
+       return 0;
  }
  
  static int gmc_v8_0_hw_fini(void *handle)
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c

index 17b7a25121b00e48637fffff324531890301819a..e67a62db9e12629b40c92f322922cc763ce53ce7 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1947,14 +1947,6 @@ static int gmc_v9_0_init_mem_ranges(struct amdgpu_device *adev)
  
  static void gmc_v9_4_3_init_vram_info(struct amdgpu_device *adev)
  {
-       static const u32 regBIF_BIOS_SCRATCH_4 = 0x50;
-       u32 vram_info;
-
-       /* Only for dGPU, vendor informaton is reliable */
-       if (!amdgpu_sriov_vf(adev) && !(adev->flags & AMD_IS_APU)) {
-               vram_info = RREG32(regBIF_BIOS_SCRATCH_4);
-               adev->gmc.vram_vendor = vram_info & 0xF;
-       }
         adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM;
         adev->gmc.vram_width = 128 * 64;
  }
@@ -2341,8 +2333,8 @@ static int gmc_v9_0_hw_init(void *handle)
  
         if (amdgpu_emu_mode == 1)
                 return amdgpu_gmc_vram_checking(adev);
-       else
-               return r;
+
+       return 0;
  }
  
  /**
diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c

index aecad530b10a61289f9e2413612bbf58a33cec22..2c02ae69883d2bb86bec8e1d1fb521f8481d7ebb 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
@@ -215,6 +215,11 @@ static u32 iceland_ih_get_wptr(struct amdgpu_device *adev,
         tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
         WREG32(mmIH_RB_CNTL, tmp);
  
+       /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+        * can be detected.
+        */
+       tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
+       WREG32(mmIH_RB_CNTL, tmp);
  
  out:
         return (wptr & ih->ptr_mask);
diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c

index d9ed7332d805d3fca1bd0343ebc804e69dc44595..ad4ad39f128f7d7f788a866d36cc7c8175743b5d 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
@@ -418,6 +418,12 @@ static u32 ih_v6_0_get_wptr(struct amdgpu_device *adev,
         tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl);
         tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
         WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+
+       /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+        * can be detected.
+        */
+       tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
+       WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
  out:
         return (wptr & ih->ptr_mask);
  }
diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c

index 8fb05eae340ad298653afaca4edccfce86741c84..b8da0fc29378c496ba0392e10105d1c58d53bf5a 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c
@@ -418,6 +418,13 @@ static u32 ih_v6_1_get_wptr(struct amdgpu_device *adev,
         tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl);
         tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
         WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+
+       /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+        * can be detected.
+        */
+       tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
+       WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+
  out:
         return (wptr & ih->ptr_mask);
  }
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c

index bc38b90f8cf88e8fee393e8e52214ac72f0aa8a6..88ea58d5c4abf5b0f20abff28f9833f402e4b016 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c
@@ -674,14 +674,6 @@ static int jpeg_v4_0_set_powergating_state(void *handle,
         return ret;
  }
  
-static int jpeg_v4_0_set_interrupt_state(struct amdgpu_device *adev,
-                                       struct amdgpu_irq_src *source,
-                                       unsigned type,
-                                       enum amdgpu_interrupt_state state)
-{
-       return 0;
-}
-
  static int jpeg_v4_0_set_ras_interrupt_state(struct amdgpu_device *adev,
                                         struct amdgpu_irq_src *source,
                                         unsigned int type,
@@ -765,7 +757,6 @@ static void jpeg_v4_0_set_dec_ring_funcs(struct amdgpu_device *adev)
  }
  
  static const struct amdgpu_irq_src_funcs jpeg_v4_0_irq_funcs = {
-       .set = jpeg_v4_0_set_interrupt_state,
         .process = jpeg_v4_0_process_interrupt,
  };
  
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c

index 6ede85b28cc8c0bbfd6a7e94c6a3d1a677e958bf..78b74daf4eebfc30f04ee4aaf6d0ff92891ff30f 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c
@@ -181,7 +181,6 @@ static int jpeg_v4_0_5_hw_fini(void *handle)
                         RREG32_SOC15(JPEG, 0, regUVD_JRBC_STATUS))
                         jpeg_v4_0_5_set_powergating_state(adev, AMD_PG_STATE_GATE);
         }
-       amdgpu_irq_put(adev, &adev->jpeg.inst->irq, 0);
  
         return 0;
  }
@@ -516,14 +515,6 @@ static int jpeg_v4_0_5_set_powergating_state(void *handle,
         return ret;
  }
  
-static int jpeg_v4_0_5_set_interrupt_state(struct amdgpu_device *adev,
-                                       struct amdgpu_irq_src *source,
-                                       unsigned type,
-                                       enum amdgpu_interrupt_state state)
-{
-       return 0;
-}
-
  static int jpeg_v4_0_5_process_interrupt(struct amdgpu_device *adev,
                                       struct amdgpu_irq_src *source,
                                       struct amdgpu_iv_entry *entry)
@@ -603,7 +594,6 @@ static void jpeg_v4_0_5_set_dec_ring_funcs(struct amdgpu_device *adev)
  }
  
  static const struct amdgpu_irq_src_funcs jpeg_v4_0_5_irq_funcs = {
-       .set = jpeg_v4_0_5_set_interrupt_state,
         .process = jpeg_v4_0_5_process_interrupt,
  };
  
diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c

index e64b33115848d204a4d81eb9530df5bf95fdf796..de93614726c9a48ccd398c6ac5570a8844fb7618 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
@@ -442,6 +442,12 @@ static u32 navi10_ih_get_wptr(struct amdgpu_device *adev,
         tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl);
         tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
         WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+
+       /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+        * can be detected.
+        */
+       tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
+       WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
  out:
         return (wptr & ih->ptr_mask);
  }
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c

index e90f33780803458c32843f2599c07e4f598ca659..b4723d68eab0f939ba057b67cf7712ddb512c8c8 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
@@ -431,6 +431,12 @@ static void nbio_v7_9_init_registers(struct amdgpu_device *adev)
         u32 inst_mask;
         int i;
  
+       if (amdgpu_sriov_vf(adev))
+               adev->rmmio_remap.reg_offset =
+                       SOC15_REG_OFFSET(
+                               NBIO, 0,
+                               regBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL)
+                       << 2;
         WREG32_SOC15(NBIO, 0, regXCC_DOORBELL_FENCE,
                 0xff & ~(adev->gfx.xcc_mask));
  
diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c

index 9a24f17a57502edaa744451bd312dfcd8b3d678c..cada9f300a7f510a3f025c3ed17c87aedcbbaeb5 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/si_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c
@@ -119,6 +119,12 @@ static u32 si_ih_get_wptr(struct amdgpu_device *adev,
                 tmp = RREG32(IH_RB_CNTL);
                 tmp |= IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK;
                 WREG32(IH_RB_CNTL, tmp);
+
+               /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+                * can be detected.
+                */
+               tmp &= ~IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK;
+               WREG32(IH_RB_CNTL, tmp);
         }
         return (wptr & ih->ptr_mask);
  }
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c

index 15033efec2bac0148e5d9381027a6ee3e70334b7..1c614451deadd10d5dfb29a591fbeb394505ac91 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -574,11 +574,34 @@ soc15_asic_reset_method(struct amdgpu_device *adev)
                 return AMD_RESET_METHOD_MODE1;
  }
  
+static bool soc15_need_reset_on_resume(struct amdgpu_device *adev)
+{
+       u32 sol_reg;
+
+       sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81);
+
+       /* Will reset for the following suspend abort cases.
+        * 1) Only reset limit on APU side, dGPU hasn't checked yet.
+        * 2) S3 suspend abort and TOS already launched.
+        */
+       if (adev->flags & AMD_IS_APU && adev->in_s3 &&
+                       !adev->suspend_complete &&
+                       sol_reg)
+               return true;
+
+       return false;
+}
+
  static int soc15_asic_reset(struct amdgpu_device *adev)
  {
         /* original raven doesn't have full asic reset */
-       if ((adev->apu_flags & AMD_APU_IS_RAVEN) ||
-           (adev->apu_flags & AMD_APU_IS_RAVEN2))
+       /* On the latest Raven, the GPU reset can be performed
+        * successfully. So now, temporarily enable it for the
+        * S3 suspend abort case.
+        */
+       if (((adev->apu_flags & AMD_APU_IS_RAVEN) ||
+           (adev->apu_flags & AMD_APU_IS_RAVEN2)) &&
+               !soc15_need_reset_on_resume(adev))
                 return 0;
  
         switch (soc15_asic_reset_method(adev)) {
@@ -1302,6 +1325,10 @@ static int soc15_common_resume(void *handle)
  {
         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
  
+       if (soc15_need_reset_on_resume(adev)) {
+               dev_info(adev->dev, "S3 suspend abort case, let's reset ASIC.\n");
+               soc15_asic_reset(adev);
+       }
         return soc15_common_hw_init(adev);
  }
  
diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c

index 48c6efcdeac974ba109224510442b0488e1875d0..4d7188912edfee820dca2ac854b55314dc2f1b27 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/soc21.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc21.c
@@ -50,13 +50,13 @@ static const struct amd_ip_funcs soc21_common_ip_funcs;
  /* SOC21 */
  static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_encode_array_vcn0[] = {
         {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)},
-       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 0)},
         {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
  };
  
  static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_encode_array_vcn1[] = {
         {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)},
-       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 0)},
  };
  
  static const struct amdgpu_video_codecs vcn_4_0_0_video_codecs_encode_vcn0 = {
diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c

index 917707bba7f3624e37b0525d3ec72bf563c1307a..450b6e8315091448c24e2d90dcd4edccc9d4423c 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
@@ -219,6 +219,12 @@ static u32 tonga_ih_get_wptr(struct amdgpu_device *adev,
         tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
         WREG32(mmIH_RB_CNTL, tmp);
  
+       /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+        * can be detected.
+        */
+       tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
+       WREG32(mmIH_RB_CNTL, tmp);
+
  out:
         return (wptr & ih->ptr_mask);
  }
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c

index 169ed400ee7b7413263ab48a2de1e75aa3ed00f7..8ab01ae919d2e36c8ff1c2226227c173223247be 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
@@ -2017,22 +2017,6 @@ static int vcn_v4_0_set_powergating_state(void *handle, enum amd_powergating_sta
         return ret;
  }
  
-/**
- * vcn_v4_0_set_interrupt_state - set VCN block interrupt state
- *
- * @adev: amdgpu_device pointer
- * @source: interrupt sources
- * @type: interrupt types
- * @state: interrupt states
- *
- * Set VCN block interrupt state
- */
-static int vcn_v4_0_set_interrupt_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source,
-      unsigned type, enum amdgpu_interrupt_state state)
-{
-       return 0;
-}
-
  /**
   * vcn_v4_0_set_ras_interrupt_state - set VCN block RAS interrupt state
   *
@@ -2097,7 +2081,6 @@ static int vcn_v4_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_
  }
  
  static const struct amdgpu_irq_src_funcs vcn_v4_0_irq_funcs = {
-       .set = vcn_v4_0_set_interrupt_state,
         .process = vcn_v4_0_process_interrupt,
  };
  
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c

index 2eda30e78f61d928984cf57b94337abc7b9cfc0a..49e4c3c09acab8eab12770325f4cf48c8c491b7c 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
@@ -269,8 +269,6 @@ static int vcn_v4_0_5_hw_fini(void *handle)
                                 vcn_v4_0_5_set_powergating_state(adev, AMD_PG_STATE_GATE);
                         }
                 }
-
-               amdgpu_irq_put(adev, &adev->vcn.inst[i].irq, 0);
         }
  
         return 0;
@@ -1668,22 +1666,6 @@ static int vcn_v4_0_5_set_powergating_state(void *handle, enum amd_powergating_s
         return ret;
  }
  
-/**
- * vcn_v4_0_5_set_interrupt_state - set VCN block interrupt state
- *
- * @adev: amdgpu_device pointer
- * @source: interrupt sources
- * @type: interrupt types
- * @state: interrupt states
- *
- * Set VCN block interrupt state
- */
-static int vcn_v4_0_5_set_interrupt_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source,
-               unsigned type, enum amdgpu_interrupt_state state)
-{
-       return 0;
-}
-
  /**
   * vcn_v4_0_5_process_interrupt - process VCN block interrupt
   *
@@ -1726,7 +1708,6 @@ static int vcn_v4_0_5_process_interrupt(struct amdgpu_device *adev, struct amdgp
  }
  
  static const struct amdgpu_irq_src_funcs vcn_v4_0_5_irq_funcs = {
-       .set = vcn_v4_0_5_set_interrupt_state,
         .process = vcn_v4_0_5_process_interrupt,
  };
  
diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c

index d364c6dd152c33b7fc1fbc614668b2dd4ffe223a..bf68e18e3824b8e492c2451b655bfcf5068910f6 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
@@ -373,6 +373,12 @@ static u32 vega10_ih_get_wptr(struct amdgpu_device *adev,
         tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
         WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
  
+       /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+        * can be detected.
+        */
+       tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
+       WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+
  out:
         return (wptr & ih->ptr_mask);
  }
diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c

index ddfc6941f9d559c916fe2cdb66b4e27394f1d618..db66e6cccaf2aa4e596a8f377eed8030c55159b7 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
@@ -421,6 +421,12 @@ static u32 vega20_ih_get_wptr(struct amdgpu_device *adev,
         tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
         WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
  
+       /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+        * can be detected.
+        */
+       tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
+       WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+
  out:
         return (wptr & ih->ptr_mask);
  }
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h

index df75863393fcb887613fb4dc054977fb46a49b1e..d1caaf0e6a7c4eaed98fc8f390781719bf28b846 100644 (file)
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
@@ -674,7 +674,7 @@ static const uint32_t cwsr_trap_gfx9_hex[] = {
         0x86ea6a6a, 0x8f6e837a,
         0xb96ee0c2, 0xbf800002,
         0xb97a0002, 0xbf8a0000,
-       0xbe801f6c, 0xbf810000,
+       0xbe801f6c, 0xbf9b0000,
  };
  
  static const uint32_t cwsr_trap_nv1x_hex[] = {
@@ -1091,7 +1091,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
         0xb9eef807, 0x876dff6d,
         0x0000ffff, 0x87fe7e7e,
         0x87ea6a6a, 0xb9faf802,
-       0xbe80226c, 0xbf810000,
+       0xbe80226c, 0xbf9b0000,
         0xbf9f0000, 0xbf9f0000,
         0xbf9f0000, 0xbf9f0000,
         0xbf9f0000, 0x00000000,
@@ -1574,7 +1574,7 @@ static const uint32_t cwsr_trap_arcturus_hex[] = {
         0x86ea6a6a, 0x8f6e837a,
         0xb96ee0c2, 0xbf800002,
         0xb97a0002, 0xbf8a0000,
-       0xbe801f6c, 0xbf810000,
+       0xbe801f6c, 0xbf9b0000,
  };
  
  static const uint32_t cwsr_trap_aldebaran_hex[] = {
@@ -2065,7 +2065,7 @@ static const uint32_t cwsr_trap_aldebaran_hex[] = {
         0x86ea6a6a, 0x8f6e837a,
         0xb96ee0c2, 0xbf800002,
         0xb97a0002, 0xbf8a0000,
-       0xbe801f6c, 0xbf810000,
+       0xbe801f6c, 0xbf9b0000,
  };
  
  static const uint32_t cwsr_trap_gfx10_hex[] = {
@@ -2500,7 +2500,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
         0x876dff6d, 0x0000ffff,
         0x87fe7e7e, 0x87ea6a6a,
         0xb9faf802, 0xbe80226c,
-       0xbf810000, 0xbf9f0000,
+       0xbf9b0000, 0xbf9f0000,
         0xbf9f0000, 0xbf9f0000,
         0xbf9f0000, 0xbf9f0000,
  };
@@ -2944,7 +2944,7 @@ static const uint32_t cwsr_trap_gfx11_hex[] = {
         0xb8eef802, 0xbf0d866e,
         0xbfa20002, 0xb97af802,
         0xbe80486c, 0xb97af802,
-       0xbe804a6c, 0xbfb00000,
+       0xbe804a6c, 0xbfb10000,
         0xbf9f0000, 0xbf9f0000,
         0xbf9f0000, 0xbf9f0000,
         0xbf9f0000, 0x00000000,
@@ -3436,5 +3436,5 @@ static const uint32_t cwsr_trap_gfx9_4_3_hex[] = {
         0x86ea6a6a, 0x8f6e837a,
         0xb96ee0c2, 0xbf800002,
         0xb97a0002, 0xbf8a0000,
-       0xbe801f6c, 0xbf810000,
+       0xbe801f6c, 0xbf9b0000,
  };
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm

index e0140df0b0ec8086433048adb31a06ca6aca740d..71b3dc0c73634aef86846be3669723590ca55db9 100644 (file)
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
@@ -1104,7 +1104,7 @@ L_RETURN_WITHOUT_PRIV:
         s_rfe_b64       s_restore_pc_lo                                         //Return to the main shader program and resume execution
  
  L_END_PGM:
-       s_endpgm
+       s_endpgm_saved
  end
  
  function write_hwreg_to_mem(s, s_rsrc, s_mem_offset)
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm

index e506411ad28ab99f474eca96ff37254fb43078de..bb26338204f4ba84b5ae41a781e1becdf9ad72bb 100644 (file)
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
@@ -921,7 +921,7 @@ L_RESTORE:
  /*                     the END                                           */
  /**************************************************************************/
  L_END_PGM:
-    s_endpgm
+    s_endpgm_saved
  
  end
  
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c

index ce4c52ec34d80eabb7f7664051ccebcd2f0ec64e..80e90fdef291d5b8cdcf7d08c6e319150fcf631b 100644 (file)
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1442,7 +1442,9 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
                         kfd_flush_tlb(peer_pdd, TLB_FLUSH_HEAVYWEIGHT);
  
                 /* Remove dma mapping after tlb flush to avoid IO_PAGE_FAULT */
-               amdgpu_amdkfd_gpuvm_dmaunmap_mem(mem, peer_pdd->drm_priv);
+               err = amdgpu_amdkfd_gpuvm_dmaunmap_mem(mem, peer_pdd->drm_priv);
+               if (err)
+                       goto sync_memory_failed;
         }
  
         mutex_unlock(&p->mutex);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c

index f856901055d34e605cd4ec51fbdfc3be18e2abeb..bdc01ca9609a7e57fac05ee60d6866a5950e2b07 100644 (file)
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -574,7 +574,7 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
         pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start,
                  prange->last);
  
-       addr = prange->start << PAGE_SHIFT;
+       addr = migrate->start;
  
         src = (uint64_t *)(scratch + npages);
         dst = scratch;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c

index d722cbd317834a8a893a0ed5a847feb3a51d6961..826bc4f6c8a7043853d0b8e21bad73660c6a8a8c 100644 (file)
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
@@ -55,8 +55,8 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,
         m = get_mqd(mqd);
  
         if (has_wa_flag) {
-               uint32_t wa_mask = minfo->update_flag == UPDATE_FLAG_DBG_WA_ENABLE ?
-                                               0xffff : 0xffffffff;
+               uint32_t wa_mask =
+                       (minfo->update_flag & UPDATE_FLAG_DBG_WA_ENABLE) ? 0xffff : 0xffffffff;
  
                 m->compute_static_thread_mgmt_se0 = wa_mask;
                 m->compute_static_thread_mgmt_se1 = wa_mask;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c

index 42d881809dc70e230133674e4b12f6f68567837a..697b6d530d12ef30ed06a22d3cf5c15fa740b62a 100644 (file)
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
@@ -303,6 +303,15 @@ static void update_mqd(struct mqd_manager *mm, void *mqd,
                 update_cu_mask(mm, mqd, minfo, 0);
         set_priority(m, q);
  
+       if (minfo && KFD_GC_VERSION(mm->dev) >= IP_VERSION(9, 4, 2)) {
+               if (minfo->update_flag & UPDATE_FLAG_IS_GWS)
+                       m->compute_resource_limits |=
+                               COMPUTE_RESOURCE_LIMITS__FORCE_SIMD_DIST_MASK;
+               else
+                       m->compute_resource_limits &=
+                               ~COMPUTE_RESOURCE_LIMITS__FORCE_SIMD_DIST_MASK;
+       }
+
         q->is_active = QUEUE_IS_ACTIVE(*q);
  }
  
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h

index 17fbedbf3651388edfcd0109a22d0fe9dfcd331f..80320b8603fc6692cc5f10426d24f33b5ce0acfa 100644 (file)
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -532,6 +532,7 @@ struct queue_properties {
  enum mqd_update_flag {
         UPDATE_FLAG_DBG_WA_ENABLE = 1,
         UPDATE_FLAG_DBG_WA_DISABLE = 2,
+       UPDATE_FLAG_IS_GWS = 4, /* quirk for gfx9 IP */
  };
  
  struct mqd_update_info {
@@ -1488,10 +1489,15 @@ void kfd_dec_compute_active(struct kfd_node *dev);
  
  /* Cgroup Support */
  /* Check with device cgroup if @kfd device is accessible */
-static inline int kfd_devcgroup_check_permission(struct kfd_node *kfd)
+static inline int kfd_devcgroup_check_permission(struct kfd_node *node)
  {
  #if defined(CONFIG_CGROUP_DEVICE) || defined(CONFIG_CGROUP_BPF)
-       struct drm_device *ddev = adev_to_drm(kfd->adev);
+       struct drm_device *ddev;
+
+       if (node->xcp)
+               ddev = node->xcp->ddev;
+       else
+               ddev = adev_to_drm(node->adev);
  
         return devcgroup_check_permission(DEVCG_DEV_CHAR, DRM_MAJOR,
                                           ddev->render->index,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c

index 43eff221eae58ca008e2e2e92aec09eb749157d7..4858112f9a53b7e491186e0efa0e70dbb92ee47a 100644 (file)
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -95,6 +95,7 @@ void kfd_process_dequeue_from_device(struct kfd_process_device *pdd)
  int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
                         void *gws)
  {
+       struct mqd_update_info minfo = {0};
         struct kfd_node *dev = NULL;
         struct process_queue_node *pqn;
         struct kfd_process_device *pdd;
@@ -146,9 +147,10 @@ int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
         }
  
         pdd->qpd.num_gws = gws ? dev->adev->gds.gws_size : 0;
+       minfo.update_flag = gws ? UPDATE_FLAG_IS_GWS : 0;
  
         return pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm,
-                                                       pqn->q, NULL);
+                                                       pqn->q, &minfo);
  }
  
  void kfd_process_dequeue_from_all_devices(struct kfd_process *p)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c

index e5f7c92eebcbbfa6a1fda115ca2b599cab48e4e8..6ed2ec381aaa320ed1514038a1b6b10c44843019 100644 (file)
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -1638,12 +1638,10 @@ static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext,
                 else
                         mode = UNKNOWN_MEMORY_PARTITION_MODE;
  
-               if (pcache->cache_level == 2)
-                       pcache->cache_size = pcache_info[cache_type].cache_size * num_xcc;
-               else if (mode)
-                       pcache->cache_size = pcache_info[cache_type].cache_size / mode;
-               else
-                       pcache->cache_size = pcache_info[cache_type].cache_size;
+               pcache->cache_size = pcache_info[cache_type].cache_size;
+               /* Partition mode only affects L3 cache size */
+               if (mode && pcache->cache_level == 3)
+                       pcache->cache_size /= mode;
  
                 if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_DATA_CACHE)
                         pcache->cache_type |= HSA_CACHE_TYPE_DATA;
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c

index 6cda5b536362655bce2b3bab59f4cbd6c7452c8a..5853cf022917680cbc52796c533f34d687537130 100644 (file)
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -1843,21 +1843,12 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
                         DRM_ERROR("amdgpu: fail to register dmub aux callback");
                         goto error;
                 }
-               if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_HPD, dmub_hpd_callback, true)) {
-                       DRM_ERROR("amdgpu: fail to register dmub hpd callback");
-                       goto error;
-               }
-               if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_HPD_IRQ, dmub_hpd_callback, true)) {
-                       DRM_ERROR("amdgpu: fail to register dmub hpd callback");
-                       goto error;
-               }
-       }
-
-       /* Enable outbox notification only after IRQ handlers are registered and DMUB is alive.
-        * It is expected that DMUB will resend any pending notifications at this point, for
-        * example HPD from DPIA.
-        */
-       if (dc_is_dmub_outbox_supported(adev->dm.dc)) {
+               /* Enable outbox notification only after IRQ handlers are registered and DMUB is alive.
+                * It is expected that DMUB will resend any pending notifications at this point. Note
+                * that hpd and hpd_irq handler registration are deferred to register_hpd_handlers() to
+                * align legacy interface initialization sequence. Connection status will be proactivly
+                * detected once in the amdgpu_dm_initialize_drm_device.
+                */
                 dc_enable_dmub_outbox(adev->dm.dc);
  
                 /* DPIA trace goes to dmesg logs only if outbox is enabled */
@@ -1956,7 +1947,7 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev)
                                       &adev->dm.dmub_bo_gpu_addr,
                                       &adev->dm.dmub_bo_cpu_addr);
  
-       if (adev->dm.hpd_rx_offload_wq) {
+       if (adev->dm.hpd_rx_offload_wq && adev->dm.dc) {
                 for (i = 0; i < adev->dm.dc->caps.max_links; i++) {
                         if (adev->dm.hpd_rx_offload_wq[i].wq) {
                                 destroy_workqueue(adev->dm.hpd_rx_offload_wq[i].wq);
@@ -2287,6 +2278,7 @@ static int dm_sw_fini(void *handle)
  
         if (adev->dm.dmub_srv) {
                 dmub_srv_destroy(adev->dm.dmub_srv);
+               kfree(adev->dm.dmub_srv);
                 adev->dm.dmub_srv = NULL;
         }
  
@@ -3536,6 +3528,14 @@ static void register_hpd_handlers(struct amdgpu_device *adev)
         int_params.requested_polarity = INTERRUPT_POLARITY_DEFAULT;
         int_params.current_polarity = INTERRUPT_POLARITY_DEFAULT;
  
+       if (dc_is_dmub_outbox_supported(adev->dm.dc)) {
+               if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_HPD, dmub_hpd_callback, true))
+                       DRM_ERROR("amdgpu: fail to register dmub hpd callback");
+
+               if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_HPD_IRQ, dmub_hpd_callback, true))
+                       DRM_ERROR("amdgpu: fail to register dmub hpd callback");
+       }
+
         list_for_each_entry(connector,
                         &dev->mode_config.connector_list, head) {
  
@@ -3564,10 +3564,6 @@ static void register_hpd_handlers(struct amdgpu_device *adev)
                                         handle_hpd_rx_irq,
                                         (void *) aconnector);
                 }
-
-               if (adev->dm.hpd_rx_offload_wq)
-                       adev->dm.hpd_rx_offload_wq[connector->index].aconnector =
-                               aconnector;
         }
  }
  
@@ -4561,6 +4557,10 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
                         goto fail;
                 }
  
+               if (dm->hpd_rx_offload_wq)
+                       dm->hpd_rx_offload_wq[aconnector->base.index].aconnector =
+                               aconnector;
+
                 if (!dc_link_detect_connection_type(link, &new_connection_type))
                         DRM_ERROR("KMS: Failed to detect connector\n");
  
@@ -5219,6 +5219,7 @@ static void fill_dc_dirty_rects(struct drm_plane *plane,
                                 struct drm_plane_state *new_plane_state,
                                 struct drm_crtc_state *crtc_state,
                                 struct dc_flip_addrs *flip_addrs,
+                               bool is_psr_su,
                                 bool *dirty_regions_changed)
  {
         struct dm_crtc_state *dm_crtc_state = to_dm_crtc_state(crtc_state);
@@ -5243,6 +5244,10 @@ static void fill_dc_dirty_rects(struct drm_plane *plane,
         num_clips = drm_plane_get_damage_clips_count(new_plane_state);
         clips = drm_plane_get_damage_clips(new_plane_state);
  
+       if (num_clips && (!amdgpu_damage_clips || (amdgpu_damage_clips < 0 &&
+                                                  is_psr_su)))
+               goto ffu;
+
         if (!dm_crtc_state->mpo_requested) {
                 if (!num_clips || num_clips > DC_MAX_DIRTY_RECTS)
                         goto ffu;
@@ -6194,7 +6199,9 @@ create_stream_for_sink(struct drm_connector *connector,
                 if (recalculate_timing) {
                         freesync_mode = get_highest_refresh_rate_mode(aconnector, false);
                         drm_mode_copy(&saved_mode, &mode);
+                       saved_mode.picture_aspect_ratio = mode.picture_aspect_ratio;
                         drm_mode_copy(&mode, freesync_mode);
+                       mode.picture_aspect_ratio = saved_mode.picture_aspect_ratio;
                 } else {
                         decide_crtc_timing_for_drm_display_mode(
                                         &mode, preferred_mode, scale);
@@ -6527,10 +6534,15 @@ amdgpu_dm_connector_late_register(struct drm_connector *connector)
  static void amdgpu_dm_connector_funcs_force(struct drm_connector *connector)
  {
         struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
-       struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
         struct dc_link *dc_link = aconnector->dc_link;
         struct dc_sink *dc_em_sink = aconnector->dc_em_sink;
         struct edid *edid;
+       struct i2c_adapter *ddc;
+
+       if (dc_link->aux_mode)
+               ddc = &aconnector->dm_dp_aux.aux.ddc;
+       else
+               ddc = &aconnector->i2c->base;
  
         /*
          * Note: drm_get_edid gets edid in the following order:
@@ -6538,7 +6550,7 @@ static void amdgpu_dm_connector_funcs_force(struct drm_connector *connector)
          * 2) firmware EDID if set via edid_firmware module parameter
          * 3) regular DDC read.
          */
-       edid = drm_get_edid(connector, &amdgpu_connector->ddc_bus->aux.ddc);
+       edid = drm_get_edid(connector, ddc);
         if (!edid) {
                 DRM_ERROR("No EDID found on connector: %s.\n", connector->name);
                 return;
@@ -6579,12 +6591,18 @@ static int get_modes(struct drm_connector *connector)
  static void create_eml_sink(struct amdgpu_dm_connector *aconnector)
  {
         struct drm_connector *connector = &aconnector->base;
-       struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(&aconnector->base);
+       struct dc_link *dc_link = aconnector->dc_link;
         struct dc_sink_init_data init_params = {
                         .link = aconnector->dc_link,
                         .sink_signal = SIGNAL_TYPE_VIRTUAL
         };
         struct edid *edid;
+       struct i2c_adapter *ddc;
+
+       if (dc_link->aux_mode)
+               ddc = &aconnector->dm_dp_aux.aux.ddc;
+       else
+               ddc = &aconnector->i2c->base;
  
         /*
          * Note: drm_get_edid gets edid in the following order:
@@ -6592,7 +6610,7 @@ static void create_eml_sink(struct amdgpu_dm_connector *aconnector)
          * 2) firmware EDID if set via edid_firmware module parameter
          * 3) regular DDC read.
          */
-       edid = drm_get_edid(connector, &amdgpu_connector->ddc_bus->aux.ddc);
+       edid = drm_get_edid(connector, ddc);
         if (!edid) {
                 DRM_ERROR("No EDID found on connector: %s.\n", connector->name);
                 return;
@@ -8298,6 +8316,8 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
                         fill_dc_dirty_rects(plane, old_plane_state,
                                             new_plane_state, new_crtc_state,
                                             &bundle->flip_addrs[planes_count],
+                                           acrtc_state->stream->link->psr_settings.psr_version ==
+                                           DC_PSR_VERSION_SU_1,
                                             &dirty_rects_changed);
  
                         /*
@@ -9187,6 +9207,10 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
                  * To fix this, DC should permit updating only stream properties.
                  */
                 dummy_updates = kzalloc(sizeof(struct dc_surface_update) * MAX_SURFACES, GFP_ATOMIC);
+               if (!dummy_updates) {
+                       DRM_ERROR("Failed to allocate memory for dummy_updates.\n");
+                       continue;
+               }
                 for (j = 0; j < status->plane_count; j++)
                         dummy_updates[j].surface = status->plane_states[0];
  
@@ -10727,11 +10751,13 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
                         goto fail;
                 }
  
-               ret = compute_mst_dsc_configs_for_state(state, dm_state->context, vars);
-               if (ret) {
-                       DRM_DEBUG_DRIVER("compute_mst_dsc_configs_for_state() failed\n");
-                       ret = -EINVAL;
-                       goto fail;
+               if (dc_resource_is_dsc_encoding_supported(dc)) {
+                       ret = compute_mst_dsc_configs_for_state(state, dm_state->context, vars);
+                       if (ret) {
+                               DRM_DEBUG_DRIVER("compute_mst_dsc_configs_for_state() failed\n");
+                               ret = -EINVAL;
+                               goto fail;
+                       }
                 }
  
                 ret = dm_update_mst_vcpi_slots_for_dsc(state, dm_state->context, vars);
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c

index 85b7f58a7f35a478f551ec097b1613b504ced535..c27063305a1341c677c95e91dd49eb4fca1ea94a 100644 (file)
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
@@ -67,6 +67,8 @@ static void apply_edid_quirks(struct edid *edid, struct dc_edid_caps *edid_caps)
         /* Workaround for some monitors that do not clear DPCD 0x317 if FreeSync is unsupported */
         case drm_edid_encode_panel_id('A', 'U', 'O', 0xA7AB):
         case drm_edid_encode_panel_id('A', 'U', 'O', 0xE69B):
+       case drm_edid_encode_panel_id('B', 'O', 'E', 0x092A):
+       case drm_edid_encode_panel_id('L', 'G', 'D', 0x06D1):
                 DRM_DEBUG_DRIVER("Clearing DPCD 0x317 on monitor with panel id %X\n", panel_id);
                 edid_caps->panel_patch.remove_sink_ext_caps = true;
                 break;
@@ -120,6 +122,8 @@ enum dc_edid_status dm_helpers_parse_edid_caps(
  
         edid_caps->edid_hdmi = connector->display_info.is_hdmi;
  
+       apply_edid_quirks(edid_buf, edid_caps);
+
         sad_count = drm_edid_to_sad((struct edid *) edid->raw_edid, &sads);
         if (sad_count <= 0)
                 return result;
@@ -146,8 +150,6 @@ enum dc_edid_status dm_helpers_parse_edid_caps(
         else
                 edid_caps->speaker_flags = DEFAULT_SPEAKER_LOCATION;
  
-       apply_edid_quirks(edid_buf, edid_caps);
-
         kfree(sads);
         kfree(sadb);
  
diff --git a/drivers/gpu/drm/amd/display/dc/basics/dce_calcs.c b/drivers/gpu/drm/amd/display/dc/basics/dce_calcs.c

index f2dfa96f9ef5d9e4805fdbf592cac078efa391a5..39530b2ea4957cc0a6718f322158e101f15431d0 100644 (file)
--- a/drivers/gpu/drm/amd/display/dc/basics/dce_calcs.c
+++ b/drivers/gpu/drm/amd/display/dc/basics/dce_calcs.c
@@ -94,7 +94,7 @@ static void calculate_bandwidth(
         const uint32_t s_high = 7;
         const uint32_t dmif_chunk_buff_margin = 1;
  
-       uint32_t max_chunks_fbc_mode;
+       uint32_t max_chunks_fbc_mode = 0;
         int32_t num_cursor_lines;
  
         int32_t i, j, k;
diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c

index 960c4b4f6ddf3670156abd99cc0a02aeb176c7dc..05f392501c0ae3572250061b31defef7cde51fb5 100644 (file)
--- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
@@ -1850,19 +1850,21 @@ static enum bp_result get_firmware_info_v3_2(
                 /* Vega12 */
                 smu_info_v3_2 = GET_IMAGE(struct atom_smu_info_v3_2,
                                                         DATA_TABLES(smu_info));
-               DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", smu_info_v3_2->gpuclk_ss_percentage);
                 if (!smu_info_v3_2)
                         return BP_RESULT_BADBIOSTABLE;
  
+               DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", smu_info_v3_2->gpuclk_ss_percentage);
+
                 info->default_engine_clk = smu_info_v3_2->bootup_dcefclk_10khz * 10;
         } else if (revision.minor == 3) {
                 /* Vega20 */
                 smu_info_v3_3 = GET_IMAGE(struct atom_smu_info_v3_3,
                                                         DATA_TABLES(smu_info));
-               DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", smu_info_v3_3->gpuclk_ss_percentage);
                 if (!smu_info_v3_3)
                         return BP_RESULT_BADBIOSTABLE;
  
+               DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", smu_info_v3_3->gpuclk_ss_percentage);
+
                 info->default_engine_clk = smu_info_v3_3->bootup_dcefclk_10khz * 10;
         }
  
@@ -2422,10 +2424,11 @@ static enum bp_result get_integrated_info_v11(
         info_v11 = GET_IMAGE(struct atom_integrated_system_info_v1_11,
                                         DATA_TABLES(integratedsysteminfo));
  
-       DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", info_v11->gpuclk_ss_percentage);
         if (info_v11 == NULL)
                 return BP_RESULT_BADBIOSTABLE;
  
+       DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", info_v11->gpuclk_ss_percentage);
+
         info->gpu_cap_info =
         le32_to_cpu(info_v11->gpucapinfo);
         /*
@@ -2637,11 +2640,12 @@ static enum bp_result get_integrated_info_v2_1(
  
         info_v2_1 = GET_IMAGE(struct atom_integrated_system_info_v2_1,
                                         DATA_TABLES(integratedsysteminfo));
-       DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", info_v2_1->gpuclk_ss_percentage);
  
         if (info_v2_1 == NULL)
                 return BP_RESULT_BADBIOSTABLE;
  
+       DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", info_v2_1->gpuclk_ss_percentage);
+
         info->gpu_cap_info =
         le32_to_cpu(info_v2_1->gpucapinfo);
         /*
@@ -2799,11 +2803,11 @@ static enum bp_result get_integrated_info_v2_2(
         info_v2_2 = GET_IMAGE(struct atom_integrated_system_info_v2_2,
                                         DATA_TABLES(integratedsysteminfo));
  
-       DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", info_v2_2->gpuclk_ss_percentage);
-
         if (info_v2_2 == NULL)
                 return BP_RESULT_BADBIOSTABLE;
  
+       DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", info_v2_2->gpuclk_ss_percentage);
+
         info->gpu_cap_info =
         le32_to_cpu(info_v2_2->gpucapinfo);
         /*
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c

index a5489fe6875f453149d622d59e9b6417b4db616c..aa9fd1dc550a5e8b2142cfb10db96f4779bdc788 100644 (file)
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c
@@ -546,6 +546,8 @@ static unsigned int find_dcfclk_for_voltage(const struct vg_dpm_clocks *clock_ta
         int i;
  
         for (i = 0; i < VG_NUM_SOC_VOLTAGE_LEVELS; i++) {
+               if (i >= VG_NUM_DCFCLK_DPM_LEVELS)
+                       break;
                 if (clock_table->SocVoltage[i] == voltage)
                         return clock_table->DcfClocks[i];
         }
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c

index 9c660d1facc7699d7a1b3f90292ae31d985fd259..e648902592358ff08ca3a536d9f0abe56bfe5e34 100644 (file)
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
@@ -437,32 +437,32 @@ static struct wm_table ddr5_wm_table = {
                         .wm_inst = WM_A,
                         .wm_type = WM_TYPE_PSTATE_CHG,
                         .pstate_latency_us = 11.72,
-                       .sr_exit_time_us = 14.0,
-                       .sr_enter_plus_exit_time_us = 16.0,
+                       .sr_exit_time_us = 28.0,
+                       .sr_enter_plus_exit_time_us = 30.0,
                         .valid = true,
                 },
                 {
                         .wm_inst = WM_B,
                         .wm_type = WM_TYPE_PSTATE_CHG,
                         .pstate_latency_us = 11.72,
-                       .sr_exit_time_us = 14.0,
-                       .sr_enter_plus_exit_time_us = 16.0,
+                       .sr_exit_time_us = 28.0,
+                       .sr_enter_plus_exit_time_us = 30.0,
                         .valid = true,
                 },
                 {
                         .wm_inst = WM_C,
                         .wm_type = WM_TYPE_PSTATE_CHG,
                         .pstate_latency_us = 11.72,
-                       .sr_exit_time_us = 14.0,
-                       .sr_enter_plus_exit_time_us = 16.0,
+                       .sr_exit_time_us = 28.0,
+                       .sr_enter_plus_exit_time_us = 30.0,
                         .valid = true,
                 },
                 {
                         .wm_inst = WM_D,
                         .wm_type = WM_TYPE_PSTATE_CHG,
                         .pstate_latency_us = 11.72,
-                       .sr_exit_time_us = 14.0,
-                       .sr_enter_plus_exit_time_us = 16.0,
+                       .sr_exit_time_us = 28.0,
+                       .sr_enter_plus_exit_time_us = 30.0,
                         .valid = true,
                 },
         }
@@ -474,32 +474,32 @@ static struct wm_table lpddr5_wm_table = {
                         .wm_inst = WM_A,
                         .wm_type = WM_TYPE_PSTATE_CHG,
                         .pstate_latency_us = 11.65333,
-                       .sr_exit_time_us = 14.0,
-                       .sr_enter_plus_exit_time_us = 16.0,
+                       .sr_exit_time_us = 28.0,
+                       .sr_enter_plus_exit_time_us = 30.0,
                         .valid = true,
                 },
                 {
                         .wm_inst = WM_B,
                         .wm_type = WM_TYPE_PSTATE_CHG,
                         .pstate_latency_us = 11.65333,
-                       .sr_exit_time_us = 14.0,
-                       .sr_enter_plus_exit_time_us = 16.0,
+                       .sr_exit_time_us = 28.0,
+                       .sr_enter_plus_exit_time_us = 30.0,
                         .valid = true,
                 },
                 {
                         .wm_inst = WM_C,
                         .wm_type = WM_TYPE_PSTATE_CHG,
                         .pstate_latency_us = 11.65333,
-                       .sr_exit_time_us = 14.0,
-                       .sr_enter_plus_exit_time_us = 16.0,
+                       .sr_exit_time_us = 28.0,
+                       .sr_enter_plus_exit_time_us = 30.0,
                         .valid = true,
                 },
                 {
                         .wm_inst = WM_D,
                         .wm_type = WM_TYPE_PSTATE_CHG,
                         .pstate_latency_us = 11.65333,
-                       .sr_exit_time_us = 14.0,
-                       .sr_enter_plus_exit_time_us = 16.0,
+                       .sr_exit_time_us = 28.0,
+                       .sr_enter_plus_exit_time_us = 30.0,
                         .valid = true,
                 },
         }
@@ -655,10 +655,13 @@ static void dcn35_clk_mgr_helper_populate_bw_params(struct clk_mgr_internal *clk
         struct clk_limit_table_entry def_max = bw_params->clk_table.entries[bw_params->clk_table.num_entries - 1];
         uint32_t max_fclk = 0, min_pstate = 0, max_dispclk = 0, max_dppclk = 0;
         uint32_t max_pstate = 0, max_dram_speed_mts = 0, min_dram_speed_mts = 0;
+       uint32_t num_memps, num_fclk, num_dcfclk;
         int i;
  
         /* Determine min/max p-state values. */
-       for (i = 0; i < clock_table->NumMemPstatesEnabled; i++) {
+       num_memps = (clock_table->NumMemPstatesEnabled > NUM_MEM_PSTATE_LEVELS) ? NUM_MEM_PSTATE_LEVELS :
+               clock_table->NumMemPstatesEnabled;
+       for (i = 0; i < num_memps; i++) {
                 uint32_t dram_speed_mts = calc_dram_speed_mts(&clock_table->MemPstateTable[i]);
  
                 if (is_valid_clock_value(dram_speed_mts) && dram_speed_mts > max_dram_speed_mts) {
@@ -670,7 +673,7 @@ static void dcn35_clk_mgr_helper_populate_bw_params(struct clk_mgr_internal *clk
         min_dram_speed_mts = max_dram_speed_mts;
         min_pstate = max_pstate;
  
-       for (i = 0; i < clock_table->NumMemPstatesEnabled; i++) {
+       for (i = 0; i < num_memps; i++) {
                 uint32_t dram_speed_mts = calc_dram_speed_mts(&clock_table->MemPstateTable[i]);
  
                 if (is_valid_clock_value(dram_speed_mts) && dram_speed_mts < min_dram_speed_mts) {
@@ -699,9 +702,13 @@ static void dcn35_clk_mgr_helper_populate_bw_params(struct clk_mgr_internal *clk
         /* Base the clock table on dcfclk, need at least one entry regardless of pmfw table */
         ASSERT(clock_table->NumDcfClkLevelsEnabled > 0);
  
-       max_fclk = find_max_clk_value(clock_table->FclkClocks_Freq, clock_table->NumFclkLevelsEnabled);
+       num_fclk = (clock_table->NumFclkLevelsEnabled > NUM_FCLK_DPM_LEVELS) ? NUM_FCLK_DPM_LEVELS :
+               clock_table->NumFclkLevelsEnabled;
+       max_fclk = find_max_clk_value(clock_table->FclkClocks_Freq, num_fclk);
  
-       for (i = 0; i < clock_table->NumDcfClkLevelsEnabled; i++) {
+       num_dcfclk = (clock_table->NumFclkLevelsEnabled > NUM_DCFCLK_DPM_LEVELS) ? NUM_DCFCLK_DPM_LEVELS :
+               clock_table->NumDcfClkLevelsEnabled;
+       for (i = 0; i < num_dcfclk; i++) {
                 int j;
  
                 /* First search defaults for the clocks we don't read using closest lower or equal default dcfclk */
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c

index aa7c02ba948e9ce63aa84eb7518f9c73c80d107a..2c424e435962d4ddd73648aeb3b531ad1bd7aa92 100644 (file)
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -3817,7 +3817,9 @@ static void commit_planes_for_stream(struct dc *dc,
                  * programming has completed (we turn on phantom OTG in order
                  * to complete the plane disable for phantom pipes).
                  */
-               dc->hwss.apply_ctx_to_hw(dc, context);
+
+               if (dc->hwss.disable_phantom_streams)
+                       dc->hwss.disable_phantom_streams(dc, context);
         }
  
         if (update_type != UPDATE_TYPE_FAST)
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_state.c b/drivers/gpu/drm/amd/display/dc/core/dc_state.c

index 88c6436b28b69ca7f4791bdc47404cd5f73a5f83..180ac47868c22a68c1af47096db95ecf6b11994c 100644 (file)
--- a/drivers/gpu/drm/amd/display/dc/core/dc_state.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_state.c
@@ -291,11 +291,14 @@ void dc_state_destruct(struct dc_state *state)
                 dc_stream_release(state->phantom_streams[i]);
                 state->phantom_streams[i] = NULL;
         }
+       state->phantom_stream_count = 0;
  
         for (i = 0; i < state->phantom_plane_count; i++) {
                 dc_plane_state_release(state->phantom_planes[i]);
                 state->phantom_planes[i] = NULL;
         }
+       state->phantom_plane_count = 0;
+
         state->stream_mask = 0;
         memset(&state->res_ctx, 0, sizeof(state->res_ctx));
         memset(&state->pp_display_cfg, 0, sizeof(state->pp_display_cfg));
diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c

index 2b79a0e5638e1b757ea3d3527add517db139552e..363d522603a21744c02e3e3497a2907862b02fd1 100644 (file)
--- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
+++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
@@ -125,7 +125,7 @@ bool dc_dmub_srv_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv,
                 unsigned int count,
                 union dmub_rb_cmd *cmd_list)
  {
-       struct dc_context *dc_ctx = dc_dmub_srv->ctx;
+       struct dc_context *dc_ctx;
         struct dmub_srv *dmub;
         enum dmub_status status;
         int i;
@@ -133,6 +133,7 @@ bool dc_dmub_srv_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv,
         if (!dc_dmub_srv || !dc_dmub_srv->dmub)
                 return false;
  
+       dc_ctx = dc_dmub_srv->ctx;
         dmub = dc_dmub_srv->dmub;
  
         for (i = 0 ; i < count; i++) {
@@ -1161,7 +1162,7 @@ void dc_dmub_srv_subvp_save_surf_addr(const struct dc_dmub_srv *dc_dmub_srv, con
  
  bool dc_dmub_srv_is_hw_pwr_up(struct dc_dmub_srv *dc_dmub_srv, bool wait)
  {
-       struct dc_context *dc_ctx = dc_dmub_srv->ctx;
+       struct dc_context *dc_ctx;
         enum dmub_status status;
  
         if (!dc_dmub_srv || !dc_dmub_srv->dmub)
@@ -1170,6 +1171,8 @@ bool dc_dmub_srv_is_hw_pwr_up(struct dc_dmub_srv *dc_dmub_srv, bool wait)
         if (dc_dmub_srv->ctx->dc->debug.dmcub_emulation)
                 return true;
  
+       dc_ctx = dc_dmub_srv->ctx;
+
         if (wait) {
                 if (dc_dmub_srv->ctx->dc->debug.disable_timeout) {
                         do {
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.c b/drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.c

index e8570060d007ba5bab0db3b3395aca2b9c487573..5bca67407c5b16b682ed669ef2b6382be7965b1b 100644 (file)
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.c
@@ -290,4 +290,5 @@ void dce_panel_cntl_construct(
         dce_panel_cntl->base.funcs = &dce_link_panel_cntl_funcs;
         dce_panel_cntl->base.ctx = init_data->ctx;
         dce_panel_cntl->base.inst = init_data->inst;
+       dce_panel_cntl->base.pwrseq_inst = 0;
  }
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp_cm.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp_cm.c

index e43f77c11c00825aad64ada6ddfb4b0bdce23aff..5f97a868ada34734d99a6a35a329d9c3cd3c5ac2 100644 (file)
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp_cm.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp_cm.c
@@ -56,16 +56,13 @@ static void dpp3_enable_cm_block(
  
  static enum dc_lut_mode dpp30_get_gamcor_current(struct dpp *dpp_base)
  {
-       enum dc_lut_mode mode;
+       enum dc_lut_mode mode = LUT_BYPASS;
         uint32_t state_mode;
         uint32_t lut_mode;
         struct dcn3_dpp *dpp = TO_DCN30_DPP(dpp_base);
  
         REG_GET(CM_GAMCOR_CONTROL, CM_GAMCOR_MODE_CURRENT, &state_mode);
  
-       if (state_mode == 0)
-               mode = LUT_BYPASS;
-
         if (state_mode == 2) {//Programmable RAM LUT
                 REG_GET(CM_GAMCOR_CONTROL, CM_GAMCOR_SELECT_CURRENT, &lut_mode);
                 if (lut_mode == 0)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_panel_cntl.c b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_panel_cntl.c

index ad0df1a72a90ab4ff13b267f1c69392e68703884..9e96a3ace2077cb53bff30f5984a5391a017d239 100644 (file)
--- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_panel_cntl.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_panel_cntl.c
@@ -215,4 +215,5 @@ void dcn301_panel_cntl_construct(
         dcn301_panel_cntl->base.funcs = &dcn301_link_panel_cntl_funcs;
         dcn301_panel_cntl->base.ctx = init_data->ctx;
         dcn301_panel_cntl->base.inst = init_data->inst;
+       dcn301_panel_cntl->base.pwrseq_inst = 0;
  }
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c

index 03248422d6ffde2d6923fb33185bf8dd12607787..281be20b1a1071576a4ca9037ee105333268801e 100644 (file)
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c
@@ -154,8 +154,24 @@ void dcn31_panel_cntl_construct(
         struct dcn31_panel_cntl *dcn31_panel_cntl,
         const struct panel_cntl_init_data *init_data)
  {
+       uint8_t pwrseq_inst = 0xF;
+
         dcn31_panel_cntl->base.funcs = &dcn31_link_panel_cntl_funcs;
         dcn31_panel_cntl->base.ctx = init_data->ctx;
         dcn31_panel_cntl->base.inst = init_data->inst;
-       dcn31_panel_cntl->base.pwrseq_inst = init_data->pwrseq_inst;
+
+       switch (init_data->eng_id) {
+       case ENGINE_ID_DIGA:
+               pwrseq_inst = 0;
+               break;
+       case ENGINE_ID_DIGB:
+               pwrseq_inst = 1;
+               break;
+       default:
+               DC_LOG_WARNING("Unsupported pwrseq engine id: %d!\n", init_data->eng_id);
+               ASSERT(false);
+               break;
+       }
+
+       dcn31_panel_cntl->base.pwrseq_inst = pwrseq_inst;
  }
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_link_encoder.c

index 501388014855c5a1f830b6a830d9f6eed9bf3224..d761b0df28784afd5d81dfef193dfc11657ddff2 100644 (file)
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_link_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_link_encoder.c
@@ -203,12 +203,12 @@ void dcn32_link_encoder_construct(
         enc10->base.hpd_source = init_data->hpd_source;
         enc10->base.connector = init_data->connector;
  
-       if (enc10->base.connector.id == CONNECTOR_ID_USBC)
-               enc10->base.features.flags.bits.DP_IS_USB_C = 1;
  
         enc10->base.preferred_engine = ENGINE_ID_UNKNOWN;
  
         enc10->base.features = *enc_features;
+       if (enc10->base.connector.id == CONNECTOR_ID_USBC)
+               enc10->base.features.flags.bits.DP_IS_USB_C = 1;
  
         enc10->base.transmitter = init_data->transmitter;
  
diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_link_encoder.c

index da94e5309fbaf0f8e06a4a1aad4ce431a8d9f2cc..81e349d5835bbed499f03ef6eb33e5210c83d64b 100644 (file)
--- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_link_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_link_encoder.c
@@ -184,8 +184,6 @@ void dcn35_link_encoder_construct(
         enc10->base.hpd_source = init_data->hpd_source;
         enc10->base.connector = init_data->connector;
  
-       if (enc10->base.connector.id == CONNECTOR_ID_USBC)
-               enc10->base.features.flags.bits.DP_IS_USB_C = 1;
  
         enc10->base.preferred_engine = ENGINE_ID_UNKNOWN;
  
@@ -240,6 +238,8 @@ void dcn35_link_encoder_construct(
         }
  
         enc10->base.features.flags.bits.HDMI_6GB_EN = 1;
+       if (enc10->base.connector.id == CONNECTOR_ID_USBC)
+               enc10->base.features.flags.bits.DP_IS_USB_C = 1;
  
         if (bp_funcs->get_connector_speed_cap_info)
                 result = bp_funcs->get_connector_speed_cap_info(enc10->base.ctx->dc_bios,
diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile

index 6042a5a6a44f8c32187b2bea702892572f08ec57..59ade76ffb18d56f26a6b329b850462150214c04 100644 (file)
--- a/drivers/gpu/drm/amd/display/dc/dml/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile
@@ -72,11 +72,11 @@ CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_ccflags)
  CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_vba.o := $(dml_ccflags)
  CFLAGS_$(AMDDALPATH)/dc/dml/dcn10/dcn10_fpu.o := $(dml_ccflags)
  CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/dcn20_fpu.o := $(dml_ccflags)
-CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20.o := $(dml_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20.o := $(dml_ccflags) $(frame_warn_flag)
  CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20.o := $(dml_ccflags)
-CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20v2.o := $(dml_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20v2.o := $(dml_ccflags) $(frame_warn_flag)
  CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20v2.o := $(dml_ccflags)
-CFLAGS_$(AMDDALPATH)/dc/dml/dcn21/display_mode_vba_21.o := $(dml_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml/dcn21/display_mode_vba_21.o := $(dml_ccflags) $(frame_warn_flag)
  CFLAGS_$(AMDDALPATH)/dc/dml/dcn21/display_rq_dlg_calc_21.o := $(dml_ccflags)
  CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_mode_vba_30.o := $(dml_ccflags) $(frame_warn_flag)
  CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_rq_dlg_calc_30.o := $(dml_ccflags)
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c

index 9f37f717a1f86f88c5fa41bc30f477406d70f3b8..a0a65e0991041d90904c516c7279c5b8aa76967c 100644 (file)
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
@@ -1112,7 +1112,7 @@ struct pipe_slice_table {
                 struct pipe_ctx *pri_pipe;
                 struct dc_plane_state *plane;
                 int slice_count;
-       } mpc_combines[MAX_SURFACES];
+       } mpc_combines[MAX_PLANES];
         int mpc_combine_count;
  };
  
@@ -1288,7 +1288,7 @@ static bool update_pipes_with_split_flags(struct dc *dc, struct dc_state *contex
         return updated;
  }
  
-static bool should_allow_odm_power_optimization(struct dc *dc,
+static bool should_apply_odm_power_optimization(struct dc *dc,
                 struct dc_state *context, struct vba_vars_st *v, int *split,
                 bool *merge)
  {
@@ -1392,9 +1392,12 @@ static void try_odm_power_optimization_and_revalidate(
  {
         int i;
         unsigned int new_vlevel;
+       unsigned int cur_policy[MAX_PIPES];
  
-       for (i = 0; i < pipe_cnt; i++)
+       for (i = 0; i < pipe_cnt; i++) {
+               cur_policy[i] = pipes[i].pipe.dest.odm_combine_policy;
                 pipes[i].pipe.dest.odm_combine_policy = dm_odm_combine_policy_2to1;
+       }
  
         new_vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt);
  
@@ -1403,6 +1406,9 @@ static void try_odm_power_optimization_and_revalidate(
                 memset(merge, 0, MAX_PIPES * sizeof(bool));
                 *vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, new_vlevel, split, merge);
                 context->bw_ctx.dml.vba.VoltageLevel = *vlevel;
+       } else {
+               for (i = 0; i < pipe_cnt; i++)
+                       pipes[i].pipe.dest.odm_combine_policy = cur_policy[i];
         }
  }
  
@@ -1580,7 +1586,7 @@ static void dcn32_full_validate_bw_helper(struct dc *dc,
                 }
         }
  
-       if (should_allow_odm_power_optimization(dc, context, vba, split, merge))
+       if (should_apply_odm_power_optimization(dc, context, vba, split, merge))
                 try_odm_power_optimization_and_revalidate(
                                 dc, context, pipes, split, merge, vlevel, *pipe_cnt);
  
@@ -2209,7 +2215,8 @@ bool dcn32_internal_validate_bw(struct dc *dc,
                 int i;
  
                 pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, fast_validate);
-               dcn32_update_dml_pipes_odm_policy_based_on_context(dc, context, pipes);
+               if (!dc->config.enable_windowed_mpo_odm)
+                       dcn32_update_dml_pipes_odm_policy_based_on_context(dc, context, pipes);
  
                 /* repopulate_pipes = 1 means the pipes were either split or merged. In this case
                  * we have to re-calculate the DET allocation and run through DML once more to
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c

index 475c4ec43c013f481a71ad5668a8aef82ac7ba0a..7ea2bd5374d51b138d13179ab7444d0d8d2ef3a7 100644 (file)
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c
@@ -164,8 +164,8 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = {
                 },
         },
         .num_states = 5,
-       .sr_exit_time_us = 14.0,
-       .sr_enter_plus_exit_time_us = 16.0,
+       .sr_exit_time_us = 28.0,
+       .sr_enter_plus_exit_time_us = 30.0,
         .sr_exit_z8_time_us = 210.0,
         .sr_enter_plus_exit_z8_time_us = 320.0,
         .fclk_change_latency_us = 24.0,
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c

index 64d01a9cd68c859db9bcffbc478ef09090b07fbf..1ba6933d2b3617aa6d275647d17320dd0755ae69 100644 (file)
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c
@@ -341,9 +341,6 @@ void dml2_init_soc_states(struct dml2_context *dml2, const struct dc *in_dc,
                 break;
         }
  
-       if (dml2->config.bbox_overrides.clks_table.num_states)
-                       p->in_states->num_states = dml2->config.bbox_overrides.clks_table.num_states;
-
         /* Override from passed values, if available */
         for (i = 0; i < p->in_states->num_states; i++) {
                 if (dml2->config.bbox_overrides.sr_exit_latency_us) {
@@ -400,7 +397,7 @@ void dml2_init_soc_states(struct dml2_context *dml2, const struct dc *in_dc,
         }
         /* Copy clocks tables entries, if available */
         if (dml2->config.bbox_overrides.clks_table.num_states) {
-
+               p->in_states->num_states = dml2->config.bbox_overrides.clks_table.num_states;
                 for (i = 0; i < dml2->config.bbox_overrides.clks_table.num_entries_per_clk.num_dcfclk_levels; i++) {
                         p->in_states->state_array[i].dcfclk_mhz = dml2->config.bbox_overrides.clks_table.clk_entries[i].dcfclk_mhz;
                 }
@@ -439,6 +436,14 @@ void dml2_init_soc_states(struct dml2_context *dml2, const struct dc *in_dc,
         }
  
         dml2_policy_build_synthetic_soc_states(s, p);
+       if (dml2->v20.dml_core_ctx.project == dml_project_dcn35 ||
+               dml2->v20.dml_core_ctx.project == dml_project_dcn351) {
+               // Override last out_state with data from last in_state
+               // This will ensure that out_state contains max fclk
+               memcpy(&p->out_states->state_array[p->out_states->num_states - 1],
+                               &p->in_states->state_array[p->in_states->num_states - 1],
+                               sizeof(struct soc_state_bounding_box_st));
+       }
  }
  
  void dml2_translate_ip_params(const struct dc *in, struct ip_params_st *out)
@@ -793,35 +798,28 @@ static void populate_dml_surface_cfg_from_plane_state(enum dml_project_id dml2_p
         }
  }
  
-/*TODO no support for mpc combine, need rework - should calculate scaling params based on plane+stream*/
-static struct scaler_data get_scaler_data_for_plane(const struct dc_plane_state *in, const struct dc_state *context)
+static struct scaler_data get_scaler_data_for_plane(const struct dc_plane_state *in, struct dc_state *context)
  {
         int i;
-       struct scaler_data data = { 0 };
+       struct pipe_ctx *temp_pipe = &context->res_ctx.temp_pipe;
+
+       memset(temp_pipe, 0, sizeof(struct pipe_ctx));
  
         for (i = 0; i < MAX_PIPES; i++) {
                 const struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
  
                 if (pipe->plane_state == in && !pipe->prev_odm_pipe) {
-                       const struct pipe_ctx *next_pipe = pipe->next_odm_pipe;
-
-                       data = context->res_ctx.pipe_ctx[i].plane_res.scl_data;
-                       while (next_pipe) {
-                               data.h_active += next_pipe->plane_res.scl_data.h_active;
-                               data.recout.width += next_pipe->plane_res.scl_data.recout.width;
-                               if (in->rotation == ROTATION_ANGLE_0 || in->rotation == ROTATION_ANGLE_180) {
-                                       data.viewport.width += next_pipe->plane_res.scl_data.viewport.width;
-                               } else {
-                                       data.viewport.height += next_pipe->plane_res.scl_data.viewport.height;
-                               }
-                               next_pipe = next_pipe->next_odm_pipe;
-                       }
+                       temp_pipe->stream = pipe->stream;
+                       temp_pipe->plane_state = pipe->plane_state;
+                       temp_pipe->plane_res.scl_data.taps = pipe->plane_res.scl_data.taps;
+
+                       resource_build_scaling_params(temp_pipe);
                         break;
                 }
         }
  
         ASSERT(i < MAX_PIPES);
-       return data;
+       return temp_pipe->plane_res.scl_data;
  }
  
  static void populate_dummy_dml_plane_cfg(struct dml_plane_cfg_st *out, unsigned int location, const struct dc_stream_state *in)
@@ -866,7 +864,7 @@ static void populate_dummy_dml_plane_cfg(struct dml_plane_cfg_st *out, unsigned
         out->ScalerEnabled[location] = false;
  }
  
-static void populate_dml_plane_cfg_from_plane_state(struct dml_plane_cfg_st *out, unsigned int location, const struct dc_plane_state *in, const struct dc_state *context)
+static void populate_dml_plane_cfg_from_plane_state(struct dml_plane_cfg_st *out, unsigned int location, const struct dc_plane_state *in, struct dc_state *context)
  {
         const struct scaler_data scaler_data = get_scaler_data_for_plane(in, context);
  
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c

index 26307e599614c6e1212c53184ba02849ae6e1dbb..2a58a7687bdb5779db6c639d3cbf2277aaf231ae 100644 (file)
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c
@@ -76,6 +76,11 @@ static void map_hw_resources(struct dml2_context *dml2,
                         in_out_display_cfg->hw.DLGRefClkFreqMHz = 50;
                 }
                 for (j = 0; j < mode_support_info->DPPPerSurface[i]; j++) {
+                       if (i >= __DML2_WRAPPER_MAX_STREAMS_PLANES__) {
+                               dml_print("DML::%s: Index out of bounds: i=%d, __DML2_WRAPPER_MAX_STREAMS_PLANES__=%d\n",
+                                         __func__, i, __DML2_WRAPPER_MAX_STREAMS_PLANES__);
+                               break;
+                       }
                         dml2->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id[num_pipes] = dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id[i];
                         dml2->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id_valid[num_pipes] = true;
                         dml2->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_id[num_pipes] = dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id[i];
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c

index 5660f15da291e9de58637c115e315b07f1cee7a3..01493c49bd7a084b1748bb786c56106858709dcc 100644 (file)
--- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c
@@ -1183,9 +1183,9 @@ void dce110_disable_stream(struct pipe_ctx *pipe_ctx)
                 dto_params.timing = &pipe_ctx->stream->timing;
                 dp_hpo_inst = pipe_ctx->stream_res.hpo_dp_stream_enc->inst;
                 if (dccg) {
-                       dccg->funcs->set_dtbclk_dto(dccg, &dto_params);
                         dccg->funcs->disable_symclk32_se(dccg, dp_hpo_inst);
                         dccg->funcs->set_dpstreamclk(dccg, REFCLK, tg->inst, dp_hpo_inst);
+                       dccg->funcs->set_dtbclk_dto(dccg, &dto_params);
                 }
         } else if (dccg && dccg->funcs->disable_symclk_se) {
                 dccg->funcs->disable_symclk_se(dccg, stream_enc->stream_enc_inst,
@@ -1476,7 +1476,7 @@ static enum dc_status dce110_enable_stream_timing(
         return DC_OK;
  }
  
-static enum dc_status apply_single_controller_ctx_to_hw(
+enum dc_status dce110_apply_single_controller_ctx_to_hw(
                 struct pipe_ctx *pipe_ctx,
                 struct dc_state *context,
                 struct dc *dc)
@@ -2302,7 +2302,7 @@ enum dc_status dce110_apply_ctx_to_hw(
                 if (pipe_ctx->top_pipe || pipe_ctx->prev_odm_pipe)
                         continue;
  
-               status = apply_single_controller_ctx_to_hw(
+               status = dce110_apply_single_controller_ctx_to_hw(
                                 pipe_ctx,
                                 context,
                                 dc);
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.h

index 08028a1779ae819282ab2394de57c4b8f266a9f3..ed3cc3648e8e23f8d076b92e10a23791253f9662 100644 (file)
--- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.h
@@ -39,6 +39,10 @@ enum dc_status dce110_apply_ctx_to_hw(
                 struct dc *dc,
                 struct dc_state *context);
  
+enum dc_status dce110_apply_single_controller_ctx_to_hw(
+               struct pipe_ctx *pipe_ctx,
+               struct dc_state *context,
+               struct dc *dc);
  
  void dce110_enable_stream(struct pipe_ctx *pipe_ctx);
  
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c

index e931342fcf4cf1d4f4b0cf41628cd9f855fa6dac..931ac8ed7069d7bdcd3ca2f0c35f5e5f04552827 100644 (file)
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c
@@ -2561,7 +2561,7 @@ void dcn20_setup_vupdate_interrupt(struct dc *dc, struct pipe_ctx *pipe_ctx)
                 tg->funcs->setup_vertical_interrupt2(tg, start_line);
  }
  
-static void dcn20_reset_back_end_for_pipe(
+void dcn20_reset_back_end_for_pipe(
                 struct dc *dc,
                 struct pipe_ctx *pipe_ctx,
                 struct dc_state *context)
@@ -2790,18 +2790,17 @@ void dcn20_enable_stream(struct pipe_ctx *pipe_ctx)
         }
  
         if (dc->link_srv->dp_is_128b_132b_signal(pipe_ctx)) {
-               dp_hpo_inst = pipe_ctx->stream_res.hpo_dp_stream_enc->inst;
-               dccg->funcs->set_dpstreamclk(dccg, DTBCLK0, tg->inst, dp_hpo_inst);
-
-               phyd32clk = get_phyd32clk_src(link);
-               dccg->funcs->enable_symclk32_se(dccg, dp_hpo_inst, phyd32clk);
-
                 dto_params.otg_inst = tg->inst;
                 dto_params.pixclk_khz = pipe_ctx->stream->timing.pix_clk_100hz / 10;
                 dto_params.num_odm_segments = get_odm_segment_count(pipe_ctx);
                 dto_params.timing = &pipe_ctx->stream->timing;
                 dto_params.ref_dtbclk_khz = dc->clk_mgr->funcs->get_dtb_ref_clk_frequency(dc->clk_mgr);
                 dccg->funcs->set_dtbclk_dto(dccg, &dto_params);
+               dp_hpo_inst = pipe_ctx->stream_res.hpo_dp_stream_enc->inst;
+               dccg->funcs->set_dpstreamclk(dccg, DTBCLK0, tg->inst, dp_hpo_inst);
+
+               phyd32clk = get_phyd32clk_src(link);
+               dccg->funcs->enable_symclk32_se(dccg, dp_hpo_inst, phyd32clk);
         } else {
                 if (dccg->funcs->enable_symclk_se)
                         dccg->funcs->enable_symclk_se(dccg, stream_enc->stream_enc_inst,
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.h

index b94c85340abff7c02f3ec59025b04c8417d77bd6..d950b3e54ec2c7d35fb1c70a53094f0543c17b97 100644 (file)
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.h
@@ -84,6 +84,10 @@ enum dc_status dcn20_enable_stream_timing(
  void dcn20_disable_stream_gating(struct dc *dc, struct pipe_ctx *pipe_ctx);
  void dcn20_enable_stream_gating(struct dc *dc, struct pipe_ctx *pipe_ctx);
  void dcn20_setup_vupdate_interrupt(struct dc *dc, struct pipe_ctx *pipe_ctx);
+void dcn20_reset_back_end_for_pipe(
+               struct dc *dc,
+               struct pipe_ctx *pipe_ctx,
+               struct dc_state *context);
  void dcn20_init_blank(
                 struct dc *dc,
                 struct timing_generator *tg);
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c

index 8e88dcaf88f5b2b709a95abf9e0673390e27daa5..7252f5f781f0d7869e147846bc1eb44f09e63593 100644 (file)
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c
@@ -206,28 +206,32 @@ void dcn21_set_abm_immediate_disable(struct pipe_ctx *pipe_ctx)
  void dcn21_set_pipe(struct pipe_ctx *pipe_ctx)
  {
         struct abm *abm = pipe_ctx->stream_res.abm;
-       uint32_t otg_inst = pipe_ctx->stream_res.tg->inst;
+       struct timing_generator *tg = pipe_ctx->stream_res.tg;
         struct panel_cntl *panel_cntl = pipe_ctx->stream->link->panel_cntl;
         struct dmcu *dmcu = pipe_ctx->stream->ctx->dc->res_pool->dmcu;
+       uint32_t otg_inst;
+
+       if (!abm || !tg || !panel_cntl)
+               return;
+
+       otg_inst = tg->inst;
  
         if (dmcu) {
                 dce110_set_pipe(pipe_ctx);
                 return;
         }
  
-       if (abm && panel_cntl) {
-               if (abm->funcs && abm->funcs->set_pipe_ex) {
-                       abm->funcs->set_pipe_ex(abm,
+       if (abm->funcs && abm->funcs->set_pipe_ex) {
+               abm->funcs->set_pipe_ex(abm,
                                         otg_inst,
                                         SET_ABM_PIPE_NORMAL,
                                         panel_cntl->inst,
                                         panel_cntl->pwrseq_inst);
-               } else {
-                               dmub_abm_set_pipe(abm, otg_inst,
-                                               SET_ABM_PIPE_NORMAL,
-                                               panel_cntl->inst,
-                                               panel_cntl->pwrseq_inst);
-               }
+       } else {
+               dmub_abm_set_pipe(abm, otg_inst,
+                                 SET_ABM_PIPE_NORMAL,
+                                 panel_cntl->inst,
+                                 panel_cntl->pwrseq_inst);
         }
  }
  
@@ -237,34 +241,35 @@ bool dcn21_set_backlight_level(struct pipe_ctx *pipe_ctx,
  {
         struct dc_context *dc = pipe_ctx->stream->ctx;
         struct abm *abm = pipe_ctx->stream_res.abm;
+       struct timing_generator *tg = pipe_ctx->stream_res.tg;
         struct panel_cntl *panel_cntl = pipe_ctx->stream->link->panel_cntl;
+       uint32_t otg_inst;
+
+       if (!abm || !tg || !panel_cntl)
+               return false;
+
+       otg_inst = tg->inst;
  
         if (dc->dc->res_pool->dmcu) {
                 dce110_set_backlight_level(pipe_ctx, backlight_pwm_u16_16, frame_ramp);
                 return true;
         }
  
-       if (abm != NULL) {
-               uint32_t otg_inst = pipe_ctx->stream_res.tg->inst;
-
-               if (abm && panel_cntl) {
-                       if (abm->funcs && abm->funcs->set_pipe_ex) {
-                               abm->funcs->set_pipe_ex(abm,
-                                               otg_inst,
-                                               SET_ABM_PIPE_NORMAL,
-                                               panel_cntl->inst,
-                                               panel_cntl->pwrseq_inst);
-                       } else {
-                                       dmub_abm_set_pipe(abm,
-                                                       otg_inst,
-                                                       SET_ABM_PIPE_NORMAL,
-                                                       panel_cntl->inst,
-                                                       panel_cntl->pwrseq_inst);
-                       }
-               }
+       if (abm->funcs && abm->funcs->set_pipe_ex) {
+               abm->funcs->set_pipe_ex(abm,
+                                       otg_inst,
+                                       SET_ABM_PIPE_NORMAL,
+                                       panel_cntl->inst,
+                                       panel_cntl->pwrseq_inst);
+       } else {
+               dmub_abm_set_pipe(abm,
+                                 otg_inst,
+                                 SET_ABM_PIPE_NORMAL,
+                                 panel_cntl->inst,
+                                 panel_cntl->pwrseq_inst);
         }
  
-       if (abm && abm->funcs && abm->funcs->set_backlight_level_pwm)
+       if (abm->funcs && abm->funcs->set_backlight_level_pwm)
                 abm->funcs->set_backlight_level_pwm(abm, backlight_pwm_u16_16,
                         frame_ramp, 0, panel_cntl->inst);
         else
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c

index 6c9299c7683df19b3c444b865d297182d91ae7b3..aa36d7a56ca8c3b6f3cd47e67455ba67549bf73b 100644 (file)
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c
@@ -1474,9 +1474,44 @@ void dcn32_update_dsc_pg(struct dc *dc,
         }
  }
  
+void dcn32_disable_phantom_streams(struct dc *dc, struct dc_state *context)
+{
+       struct dce_hwseq *hws = dc->hwseq;
+       int i;
+
+       for (i = dc->res_pool->pipe_count - 1; i >= 0 ; i--) {
+               struct pipe_ctx *pipe_ctx_old =
+                       &dc->current_state->res_ctx.pipe_ctx[i];
+               struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
+
+               if (!pipe_ctx_old->stream)
+                       continue;
+
+               if (dc_state_get_pipe_subvp_type(dc->current_state, pipe_ctx_old) != SUBVP_PHANTOM)
+                       continue;
+
+               if (pipe_ctx_old->top_pipe || pipe_ctx_old->prev_odm_pipe)
+                       continue;
+
+               if (!pipe_ctx->stream || pipe_need_reprogram(pipe_ctx_old, pipe_ctx) ||
+                               (pipe_ctx->stream && dc_state_get_pipe_subvp_type(context, pipe_ctx) != SUBVP_PHANTOM)) {
+                       struct clock_source *old_clk = pipe_ctx_old->clock_source;
+
+                       if (hws->funcs.reset_back_end_for_pipe)
+                               hws->funcs.reset_back_end_for_pipe(dc, pipe_ctx_old, dc->current_state);
+                       if (hws->funcs.enable_stream_gating)
+                               hws->funcs.enable_stream_gating(dc, pipe_ctx_old);
+                       if (old_clk)
+                               old_clk->funcs->cs_power_down(old_clk);
+               }
+       }
+}
+
  void dcn32_enable_phantom_streams(struct dc *dc, struct dc_state *context)
  {
         unsigned int i;
+       enum dc_status status = DC_OK;
+       struct dce_hwseq *hws = dc->hwseq;
  
         for (i = 0; i < dc->res_pool->pipe_count; i++) {
                 struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
@@ -1497,16 +1532,39 @@ void dcn32_enable_phantom_streams(struct dc *dc, struct dc_state *context)
                 }
         }
         for (i = 0; i < dc->res_pool->pipe_count; i++) {
-               struct pipe_ctx *new_pipe = &context->res_ctx.pipe_ctx[i];
-
-               if (new_pipe->stream && dc_state_get_pipe_subvp_type(context, new_pipe) == SUBVP_PHANTOM) {
-                       // If old context or new context has phantom pipes, apply
-                       // the phantom timings now. We can't change the phantom
-                       // pipe configuration safely without driver acquiring
-                       // the DMCUB lock first.
-                       dc->hwss.apply_ctx_to_hw(dc, context);
-                       break;
+               struct pipe_ctx *pipe_ctx_old =
+                                       &dc->current_state->res_ctx.pipe_ctx[i];
+               struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
+
+               if (pipe_ctx->stream == NULL)
+                       continue;
+
+               if (dc_state_get_pipe_subvp_type(context, pipe_ctx) != SUBVP_PHANTOM)
+                       continue;
+
+               if (pipe_ctx->stream == pipe_ctx_old->stream &&
+                       pipe_ctx->stream->link->link_state_valid) {
+                       continue;
                 }
+
+               if (pipe_ctx_old->stream && !pipe_need_reprogram(pipe_ctx_old, pipe_ctx))
+                       continue;
+
+               if (pipe_ctx->top_pipe || pipe_ctx->prev_odm_pipe)
+                       continue;
+
+               if (hws->funcs.apply_single_controller_ctx_to_hw)
+                       status = hws->funcs.apply_single_controller_ctx_to_hw(
+                                       pipe_ctx,
+                                       context,
+                                       dc);
+
+               ASSERT(status == DC_OK);
+
+#ifdef CONFIG_DRM_AMD_DC_FP
+               if (hws->funcs.resync_fifo_dccg_dio)
+                       hws->funcs.resync_fifo_dccg_dio(hws, dc, context);
+#endif
         }
  }
  
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.h

index cecf7f0f567190b257cf81e5f756b5a916eba09c..069e20bc87c0a75af028168253219fc9343b1af3 100644 (file)
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.h
@@ -111,6 +111,8 @@ void dcn32_update_dsc_pg(struct dc *dc,
  
  void dcn32_enable_phantom_streams(struct dc *dc, struct dc_state *context);
  
+void dcn32_disable_phantom_streams(struct dc *dc, struct dc_state *context);
+
  void dcn32_init_blank(
                 struct dc *dc,
                 struct timing_generator *tg);
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c

index 427cfc8c24a4b7ed4cee1f0b6955cbe371797219..e8ac94a005b83a78533646aae0a36ca132eb8a75 100644 (file)
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c
@@ -109,6 +109,7 @@ static const struct hw_sequencer_funcs dcn32_funcs = {
         .get_dcc_en_bits = dcn10_get_dcc_en_bits,
         .commit_subvp_config = dcn32_commit_subvp_config,
         .enable_phantom_streams = dcn32_enable_phantom_streams,
+       .disable_phantom_streams = dcn32_disable_phantom_streams,
         .subvp_pipe_control_lock = dcn32_subvp_pipe_control_lock,
         .update_visual_confirm_color = dcn10_update_visual_confirm_color,
         .subvp_pipe_control_lock_fast = dcn32_subvp_pipe_control_lock_fast,
@@ -159,6 +160,8 @@ static const struct hwseq_private_funcs dcn32_private_funcs = {
         .set_pixels_per_cycle = dcn32_set_pixels_per_cycle,
         .resync_fifo_dccg_dio = dcn32_resync_fifo_dccg_dio,
         .is_dp_dig_pixel_rate_div_policy = dcn32_is_dp_dig_pixel_rate_div_policy,
+       .apply_single_controller_ctx_to_hw = dce110_apply_single_controller_ctx_to_hw,
+       .reset_back_end_for_pipe = dcn20_reset_back_end_for_pipe,
  };
  
  void dcn32_hw_sequencer_init_functions(struct dc *dc)
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h

index a54399383318145b8bc72fc85e646bf546588609..64ca7c66509b79bc2cfe50806cc37e8953468239 100644 (file)
--- a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h
@@ -379,6 +379,7 @@ struct hw_sequencer_funcs {
                         struct dc_cursor_attributes *cursor_attr);
         void (*commit_subvp_config)(struct dc *dc, struct dc_state *context);
         void (*enable_phantom_streams)(struct dc *dc, struct dc_state *context);
+       void (*disable_phantom_streams)(struct dc *dc, struct dc_state *context);
         void (*subvp_pipe_control_lock)(struct dc *dc,
                         struct dc_state *context,
                         bool lock,
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h

index 6137cf09aa54d25750246e86583c5938e557501b..b3c62a82cb1cf10fddad52dcf85b7e02de87ee35 100644 (file)
--- a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h
@@ -165,8 +165,15 @@ struct hwseq_private_funcs {
         void (*set_pixels_per_cycle)(struct pipe_ctx *pipe_ctx);
         void (*resync_fifo_dccg_dio)(struct dce_hwseq *hws, struct dc *dc,
                         struct dc_state *context);
+       enum dc_status (*apply_single_controller_ctx_to_hw)(
+                       struct pipe_ctx *pipe_ctx,
+                       struct dc_state *context,
+                       struct dc *dc);
         bool (*is_dp_dig_pixel_rate_div_policy)(struct pipe_ctx *pipe_ctx);
  #endif
+       void (*reset_back_end_for_pipe)(struct dc *dc,
+                       struct pipe_ctx *pipe_ctx,
+                       struct dc_state *context);
  };
  
  struct dce_hwseq {
diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h

index f74ae0d41d3c49cf215d615f336339b773cbbcbc..3a6bf77a68732166d320dbea642929c3201d3e01 100644 (file)
--- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h
@@ -469,6 +469,8 @@ struct resource_context {
         unsigned int hpo_dp_link_enc_to_link_idx[MAX_HPO_DP2_LINK_ENCODERS];
         int hpo_dp_link_enc_ref_cnts[MAX_HPO_DP2_LINK_ENCODERS];
         bool is_mpc_3dlut_acquired[MAX_PIPES];
+       /* solely used for build scalar data in dml2 */
+       struct pipe_ctx temp_pipe;
  };
  
  struct dce_bw_output {
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/panel_cntl.h b/drivers/gpu/drm/amd/display/dc/inc/hw/panel_cntl.h

index 5dcbaa2db964aee7de17c2e9306606cac1817b08..e97d964a1791cefb2eb47c91780a41e3682baed0 100644 (file)
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/panel_cntl.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/panel_cntl.h
@@ -57,7 +57,7 @@ struct panel_cntl_funcs {
  struct panel_cntl_init_data {
         struct dc_context *ctx;
         uint32_t inst;
-       uint32_t pwrseq_inst;
+       uint32_t eng_id;
  };
  
  struct panel_cntl {
diff --git a/drivers/gpu/drm/amd/display/dc/inc/resource.h b/drivers/gpu/drm/amd/display/dc/inc/resource.h

index c958ef37b78a667b1bb9bfb26827ae3e45053715..77a60aa9f27bbfdfa8a652306e2366dc0eca4345 100644 (file)
--- a/drivers/gpu/drm/amd/display/dc/inc/resource.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/resource.h
@@ -427,22 +427,18 @@ struct pipe_ctx *resource_get_primary_dpp_pipe(const struct pipe_ctx *dpp_pipe);
  int resource_get_mpc_slice_index(const struct pipe_ctx *dpp_pipe);
  
  /*
- * Get number of MPC "cuts" of the plane associated with the pipe. MPC slice
- * count is equal to MPC splits + 1. For example if a plane is cut 3 times, it
- * will have 4 pieces of slice.
- * return - 0 if pipe is not used for a plane with MPCC combine. otherwise
- * the number of MPC "cuts" for the plane.
+ * Get the number of MPC slices associated with the pipe.
+ * The function returns 0 if the pipe is not associated with an MPC combine
+ * pipe topology.
   */
-int resource_get_mpc_slice_count(const struct pipe_ctx *opp_head);
+int resource_get_mpc_slice_count(const struct pipe_ctx *pipe);
  
  /*
- * Get number of ODM "cuts" of the timing associated with the pipe. ODM slice
- * count is equal to ODM splits + 1. For example if a timing is cut 3 times, it
- * will have 4 pieces of slice.
- * return - 0 if pipe is not used for ODM combine. otherwise
- * the number of ODM "cuts" for the timing.
+ * Get the number of ODM slices associated with the pipe.
+ * The function returns 0 if the pipe is not associated with an ODM combine
+ * pipe topology.
   */
-int resource_get_odm_slice_count(const struct pipe_ctx *otg_master);
+int resource_get_odm_slice_count(const struct pipe_ctx *pipe);
  
  /* Get the ODM slice index counting from 0 from left most slice */
  int resource_get_odm_slice_index(const struct pipe_ctx *opp_head);
diff --git a/drivers/gpu/drm/amd/display/dc/link/link_factory.c b/drivers/gpu/drm/amd/display/dc/link/link_factory.c

index 37d3027c32dcb1007dbb90e209f7f459be81617e..cf22b8f28ba6c65394a536465143d1c2f81bd2b6 100644 (file)
--- a/drivers/gpu/drm/amd/display/dc/link/link_factory.c
+++ b/drivers/gpu/drm/amd/display/dc/link/link_factory.c
@@ -370,30 +370,6 @@ static enum transmitter translate_encoder_to_transmitter(
         }
  }
  
-static uint8_t translate_dig_inst_to_pwrseq_inst(struct dc_link *link)
-{
-       uint8_t pwrseq_inst = 0xF;
-       struct dc_context *dc_ctx = link->dc->ctx;
-
-       DC_LOGGER_INIT(dc_ctx->logger);
-
-       switch (link->eng_id) {
-       case ENGINE_ID_DIGA:
-               pwrseq_inst = 0;
-               break;
-       case ENGINE_ID_DIGB:
-               pwrseq_inst = 1;
-               break;
-       default:
-               DC_LOG_WARNING("Unsupported pwrseq engine id: %d!\n", link->eng_id);
-               ASSERT(false);
-               break;
-       }
-
-       return pwrseq_inst;
-}
-
-
  static void link_destruct(struct dc_link *link)
  {
         int i;
@@ -657,7 +633,7 @@ static bool construct_phy(struct dc_link *link,
                         link->link_id.id == CONNECTOR_ID_LVDS)) {
                 panel_cntl_init_data.ctx = dc_ctx;
                 panel_cntl_init_data.inst = panel_cntl_init_data.ctx->dc_edp_id_count;
-               panel_cntl_init_data.pwrseq_inst = translate_dig_inst_to_pwrseq_inst(link);
+               panel_cntl_init_data.eng_id = link->eng_id;
                 link->panel_cntl =
                         link->dc->res_pool->funcs->panel_cntl_create(
                                                                 &panel_cntl_init_data);
diff --git a/drivers/gpu/drm/amd/display/dc/link/link_validation.c b/drivers/gpu/drm/amd/display/dc/link/link_validation.c

index 8fe66c3678508d9aee6779fa25cd6128e1f30832..5b0bc7f6a188ccd6b304a369be0bdfe43b91f76a 100644 (file)
--- a/drivers/gpu/drm/amd/display/dc/link/link_validation.c
+++ b/drivers/gpu/drm/amd/display/dc/link/link_validation.c
@@ -361,7 +361,7 @@ bool link_validate_dpia_bandwidth(const struct dc_stream_state *stream, const un
         struct dc_link *dpia_link[MAX_DPIA_NUM] = {0};
         int num_dpias = 0;
  
-       for (uint8_t i = 0; i < num_streams; ++i) {
+       for (unsigned int i = 0; i < num_streams; ++i) {
                 if (stream[i].signal == SIGNAL_TYPE_DISPLAY_PORT) {
                         /* new dpia sst stream, check whether it exceeds max dpia */
                         if (num_dpias >= MAX_DPIA_NUM)
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c

index dd0d2b206462c927c5f68b355498e71250c154b9..5491b707cec881b9854ab96834503c1e88053380 100644 (file)
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c
@@ -196,7 +196,7 @@ static int get_host_router_total_dp_tunnel_bw(const struct dc *dc, uint8_t hr_in
         struct dc_link *link_dpia_primary, *link_dpia_secondary;
         int total_bw = 0;
  
-       for (uint8_t i = 0; i < MAX_PIPES * 2; ++i) {
+       for (uint8_t i = 0; i < (MAX_PIPES * 2) - 1; ++i) {
  
                 if (!dc->links[i] || dc->links[i]->ep_type != DISPLAY_ENDPOINT_USB4_DPIA)
                         continue;
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c

index 5a0b0451895690d184ec00c56873f0d1acad6864..16a62e01871224495cd771c4042f04d3be85e04d 100644 (file)
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c
@@ -517,6 +517,7 @@ enum link_training_result dp_check_link_loss_status(
  {
         enum link_training_result status = LINK_TRAINING_SUCCESS;
         union lane_status lane_status;
+       union lane_align_status_updated dpcd_lane_status_updated;
         uint8_t dpcd_buf[6] = {0};
         uint32_t lane;
  
@@ -532,10 +533,12 @@ enum link_training_result dp_check_link_loss_status(
                  * check lanes status
                  */
                 lane_status.raw = dp_get_nibble_at_index(&dpcd_buf[2], lane);
+               dpcd_lane_status_updated.raw = dpcd_buf[4];
  
                 if (!lane_status.bits.CHANNEL_EQ_DONE_0 ||
                         !lane_status.bits.CR_DONE_0 ||
-                       !lane_status.bits.SYMBOL_LOCKED_0) {
+                       !lane_status.bits.SYMBOL_LOCKED_0 ||
+                       !dp_is_interlane_aligned(dpcd_lane_status_updated)) {
                         /* if one of the channel equalization, clock
                          * recovery or symbol lock is dropped
                          * consider it as (link has been
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c

index e8dda44b23cb29aa3ec2686b6656bd044c194606..5d36bab0029ca54a03aaef4fc83ff99e59550e5a 100644 (file)
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c
@@ -619,7 +619,7 @@ static enum link_training_result dpia_training_eq_non_transparent(
         uint32_t retries_eq = 0;
         enum dc_status status;
         enum dc_dp_training_pattern tr_pattern;
-       uint32_t wait_time_microsec;
+       uint32_t wait_time_microsec = 0;
         enum dc_lane_count lane_count = lt_settings->link_settings.lane_count;
         union lane_align_status_updated dpcd_lane_status_updated = {0};
         union lane_status dpcd_lane_status[LANE_COUNT_DP_MAX] = {0};
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c

index 511ff6b5b9856776ea834393e4a7bfcaa90ca49f..7538b548c5725177b12e2d169acc681c31174797 100644 (file)
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c
@@ -999,7 +999,7 @@ static struct stream_encoder *dcn301_stream_encoder_create(enum engine_id eng_id
         vpg = dcn301_vpg_create(ctx, vpg_inst);
         afmt = dcn301_afmt_create(ctx, afmt_inst);
  
-       if (!enc1 || !vpg || !afmt) {
+       if (!enc1 || !vpg || !afmt || eng_id >= ARRAY_SIZE(stream_enc_regs)) {
                 kfree(enc1);
                 kfree(vpg);
                 kfree(afmt);
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c

index c4d71e7f18af47ba47dbc89e1a9098a0a4eade04..6f10052caeef02c3448307c4c81aef805e68e95b 100644 (file)
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c
@@ -1829,7 +1829,21 @@ int dcn32_populate_dml_pipes_from_context(
                 dcn32_zero_pipe_dcc_fraction(pipes, pipe_cnt);
                 DC_FP_END();
                 pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch;
-               pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_dal;
+               if (dc->config.enable_windowed_mpo_odm &&
+                               dc->debug.enable_single_display_2to1_odm_policy) {
+                       switch (resource_get_odm_slice_count(pipe)) {
+                       case 2:
+                               pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_2to1;
+                               break;
+                       case 4:
+                               pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_4to1;
+                               break;
+                       default:
+                               pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_dal;
+                       }
+               } else {
+                       pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_dal;
+               }
                 pipes[pipe_cnt].pipe.src.gpuvm_min_page_size_kbytes = 256; // according to spreadsheet
                 pipes[pipe_cnt].pipe.src.unbounded_req_mode = false;
                 pipes[pipe_cnt].pipe.scale_ratio_depth.lb_depth = dm_lb_19;
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c

index 761ec989187568730fdd8cd51cd1802fa657be9c..5fdcda8f86026d94697a069ed53a83752a0ebdee 100644 (file)
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
@@ -780,8 +780,8 @@ static const struct dc_debug_options debug_defaults_drv = {
         .disable_z10 = false,
         .ignore_pg = true,
         .psp_disabled_wa = true,
-       .ips2_eval_delay_us = 200,
-       .ips2_entry_delay_us = 400,
+       .ips2_eval_delay_us = 2000,
+       .ips2_entry_delay_us = 800,
         .static_screen_wait_frames = 2,
  };
  
@@ -2130,6 +2130,7 @@ static bool dcn35_resource_construct(
         dc->dml2_options.dcn_pipe_count = pool->base.pipe_count;
         dc->dml2_options.use_native_pstate_optimization = true;
         dc->dml2_options.use_native_soc_bb_construction = true;
+       dc->dml2_options.minimize_dispclk_using_odm = false;
         if (dc->config.EnableMinDispClkODM)
                 dc->dml2_options.minimize_dispclk_using_odm = true;
         dc->dml2_options.enable_windowed_mpo_odm = dc->config.enable_windowed_mpo_odm;
diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c

index df4f20293c16a368748cd4138c0912906f80acc7..eb4da3666e05d6d145a927258d7ea247425dad93 100644 (file)
--- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
+++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
@@ -6925,6 +6925,23 @@ static int si_dpm_enable(struct amdgpu_device *adev)
         return 0;
  }
  
+static int si_set_temperature_range(struct amdgpu_device *adev)
+{
+       int ret;
+
+       ret = si_thermal_enable_alert(adev, false);
+       if (ret)
+               return ret;
+       ret = si_thermal_set_temperature_range(adev, R600_TEMP_RANGE_MIN, R600_TEMP_RANGE_MAX);
+       if (ret)
+               return ret;
+       ret = si_thermal_enable_alert(adev, true);
+       if (ret)
+               return ret;
+
+       return ret;
+}
+
  static void si_dpm_disable(struct amdgpu_device *adev)
  {
         struct rv7xx_power_info *pi = rv770_get_pi(adev);
@@ -7608,6 +7625,18 @@ static int si_dpm_process_interrupt(struct amdgpu_device *adev,
  
  static int si_dpm_late_init(void *handle)
  {
+       int ret;
+       struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+       if (!adev->pm.dpm_enabled)
+               return 0;
+
+       ret = si_set_temperature_range(adev);
+       if (ret)
+               return ret;
+#if 0 //TODO ?
+       si_dpm_powergate_uvd(adev, true);
+#endif
         return 0;
  }
  
diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c

index 7ffad3eb0a01500fedf1be4348b17561bc744a38..0ad947df777ab2665a8f0de986a5d39737dd9ded 100644 (file)
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -734,7 +734,7 @@ static int smu_early_init(void *handle)
         smu->adev = adev;
         smu->pm_enabled = !!amdgpu_dpm;
         smu->is_apu = false;
-       smu->smu_baco.state = SMU_BACO_STATE_NONE;
+       smu->smu_baco.state = SMU_BACO_STATE_EXIT;
         smu->smu_baco.platform_support = false;
         smu->user_dpm_profile.fan_mode = -1;
  
@@ -1954,31 +1954,10 @@ static int smu_smc_hw_cleanup(struct smu_context *smu)
         return 0;
  }
  
-static int smu_reset_mp1_state(struct smu_context *smu)
-{
-       struct amdgpu_device *adev = smu->adev;
-       int ret = 0;
-
-       if ((!adev->in_runpm) && (!adev->in_suspend) &&
-               (!amdgpu_in_reset(adev)))
-               switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
-               case IP_VERSION(13, 0, 0):
-               case IP_VERSION(13, 0, 7):
-               case IP_VERSION(13, 0, 10):
-                       ret = smu_set_mp1_state(smu, PP_MP1_STATE_UNLOAD);
-                       break;
-               default:
-                       break;
-               }
-
-       return ret;
-}
-
  static int smu_hw_fini(void *handle)
  {
         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
         struct smu_context *smu = adev->powerplay.pp_handle;
-       int ret;
  
         if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
                 return 0;
@@ -1996,15 +1975,7 @@ static int smu_hw_fini(void *handle)
  
         adev->pm.dpm_enabled = false;
  
-       ret = smu_smc_hw_cleanup(smu);
-       if (ret)
-               return ret;
-
-       ret = smu_reset_mp1_state(smu);
-       if (ret)
-               return ret;
-
-       return 0;
+       return smu_smc_hw_cleanup(smu);
  }
  
  static void smu_late_fini(void *handle)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h

index 2aa4fea873147516c23fb2fc568a94d907ee1c8a..66e84defd0b6ec2521c230262c34215a14251dfb 100644 (file)
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
@@ -424,7 +424,6 @@ enum smu_reset_mode {
  enum smu_baco_state {
         SMU_BACO_STATE_ENTER = 0,
         SMU_BACO_STATE_EXIT,
-       SMU_BACO_STATE_NONE,
  };
  
  struct smu_baco_context {
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c

index 4cd43bbec910e351eb27a79b4c39308d6462d196..bcad42534da46d780423d636953c40993e7001ac 100644 (file)
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
@@ -1303,13 +1303,12 @@ static int arcturus_get_power_limit(struct smu_context *smu,
         if (default_power_limit)
                 *default_power_limit = power_limit;
  
-       if (smu->od_enabled) {
+       if (smu->od_enabled)
                 od_percent_upper = le32_to_cpu(powerplay_table->overdrive_table.max[SMU_11_0_ODSETTING_POWERPERCENTAGE]);
-               od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_11_0_ODSETTING_POWERPERCENTAGE]);
-       } else {
+       else
                 od_percent_upper = 0;
-               od_percent_lower = 100;
-       }
+
+       od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_11_0_ODSETTING_POWERPERCENTAGE]);
  
         dev_dbg(smu->adev->dev, "od percent upper:%d, od percent lower:%d (default power: %d)\n",
                                                         od_percent_upper, od_percent_lower, power_limit);
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c

index 8d1d29ffb0f1c54a781c2508447454f9bb7aa5ee..ed189a3878ebe7199833e495f45417461897a93a 100644 (file)
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
@@ -2357,13 +2357,12 @@ static int navi10_get_power_limit(struct smu_context *smu,
                 *default_power_limit = power_limit;
  
         if (smu->od_enabled &&
-                   navi10_od_feature_is_supported(od_settings, SMU_11_0_ODCAP_POWER_LIMIT)) {
+                   navi10_od_feature_is_supported(od_settings, SMU_11_0_ODCAP_POWER_LIMIT))
                 od_percent_upper = le32_to_cpu(powerplay_table->overdrive_table.max[SMU_11_0_ODSETTING_POWERPERCENTAGE]);
-               od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_11_0_ODSETTING_POWERPERCENTAGE]);
-       } else {
+       else
                 od_percent_upper = 0;
-               od_percent_lower = 100;
-       }
+
+       od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_11_0_ODSETTING_POWERPERCENTAGE]);
  
         dev_dbg(smu->adev->dev, "od percent upper:%d, od percent lower:%d (default power: %d)\n",
                                         od_percent_upper, od_percent_lower, power_limit);
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c

index 21fc033528fa9d1a57ea2699a2780501e2902b3c..e2ad2b972ab0b3550d7aceb66e632eb372a0ffc5 100644 (file)
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
@@ -640,13 +640,12 @@ static int sienna_cichlid_get_power_limit(struct smu_context *smu,
         if (default_power_limit)
                 *default_power_limit = power_limit;
  
-       if (smu->od_enabled) {
+       if (smu->od_enabled)
                 od_percent_upper = le32_to_cpu(powerplay_table->overdrive_table.max[SMU_11_0_7_ODSETTING_POWERPERCENTAGE]);
-               od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_11_0_7_ODSETTING_POWERPERCENTAGE]);
-       } else {
+       else
                 od_percent_upper = 0;
-               od_percent_lower = 100;
-       }
+
+       od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_11_0_7_ODSETTING_POWERPERCENTAGE]);
  
         dev_dbg(smu->adev->dev, "od percent upper:%d, od percent lower:%d (default power: %d)\n",
                                         od_percent_upper, od_percent_lower, power_limit);
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c

index 4fdf34fffa9a57f53ad55418e957e36282ac98b9..9b80f18ea6c359f279f050ee9f645b92dd43d057 100644 (file)
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
@@ -2369,13 +2369,12 @@ static int smu_v13_0_0_get_power_limit(struct smu_context *smu,
         if (default_power_limit)
                 *default_power_limit = power_limit;
  
-       if (smu->od_enabled) {
+       if (smu->od_enabled)
                 od_percent_upper = le32_to_cpu(powerplay_table->overdrive_table.max[SMU_13_0_0_ODSETTING_POWERPERCENTAGE]);
-               od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_13_0_0_ODSETTING_POWERPERCENTAGE]);
-       } else {
+       else
                 od_percent_upper = 0;
-               od_percent_lower = 100;
-       }
+
+       od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_13_0_0_ODSETTING_POWERPERCENTAGE]);
  
         dev_dbg(smu->adev->dev, "od percent upper:%d, od percent lower:%d (default power: %d)\n",
                                         od_percent_upper, od_percent_lower, power_limit);
@@ -2748,13 +2747,7 @@ static int smu_v13_0_0_set_mp1_state(struct smu_context *smu,
  
         switch (mp1_state) {
         case PP_MP1_STATE_UNLOAD:
-               ret = smu_cmn_send_smc_msg_with_param(smu,
-                                                                                         SMU_MSG_PrepareMp1ForUnload,
-                                                                                         0x55, NULL);
-
-               if (!ret && smu->smu_baco.state == SMU_BACO_STATE_EXIT)
-                       ret = smu_v13_0_disable_pmfw_state(smu);
-
+               ret = smu_cmn_set_mp1_state(smu, mp1_state);
                 break;
         default:
                 /* Ignore others */
@@ -2950,7 +2943,7 @@ static bool smu_v13_0_0_wbrf_support_check(struct smu_context *smu)
  {
         struct amdgpu_device *adev = smu->adev;
  
-       switch (adev->ip_versions[MP1_HWIP][0]) {
+       switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
         case IP_VERSION(13, 0, 0):
                 return smu->smc_fw_version >= 0x004e6300;
         case IP_VERSION(13, 0, 10):
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c

index 7c3e162e2d818fa4083c62373990b2c7e9a69e26..3dc7b60cb0754d0f62fd3cead74f1553071b8597 100644 (file)
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
@@ -2333,13 +2333,12 @@ static int smu_v13_0_7_get_power_limit(struct smu_context *smu,
         if (default_power_limit)
                 *default_power_limit = power_limit;
  
-       if (smu->od_enabled) {
+       if (smu->od_enabled)
                 od_percent_upper = le32_to_cpu(powerplay_table->overdrive_table.max[SMU_13_0_7_ODSETTING_POWERPERCENTAGE]);
-               od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_13_0_7_ODSETTING_POWERPERCENTAGE]);
-       } else {
+       else
                 od_percent_upper = 0;
-               od_percent_lower = 100;
-       }
+
+       od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_13_0_7_ODSETTING_POWERPERCENTAGE]);
  
         dev_dbg(smu->adev->dev, "od percent upper:%d, od percent lower:%d (default power: %d)\n",
                                         od_percent_upper, od_percent_lower, power_limit);
@@ -2505,13 +2504,7 @@ static int smu_v13_0_7_set_mp1_state(struct smu_context *smu,
  
         switch (mp1_state) {
         case PP_MP1_STATE_UNLOAD:
-               ret = smu_cmn_send_smc_msg_with_param(smu,
-                                                                                         SMU_MSG_PrepareMp1ForUnload,
-                                                                                         0x55, NULL);
-
-               if (!ret && smu->smu_baco.state == SMU_BACO_STATE_EXIT)
-                       ret = smu_v13_0_disable_pmfw_state(smu);
-
+               ret = smu_cmn_set_mp1_state(smu, mp1_state);
                 break;
         default:
                 /* Ignore others */
diff --git a/drivers/gpu/drm/bridge/aux-hpd-bridge.c b/drivers/gpu/drm/bridge/aux-hpd-bridge.c

index bb55f697a1819264e1320f6118d28dd776236f4f..6886db2d9e00c4544ee3d81e29e779f806c8a9b7 100644 (file)
--- a/drivers/gpu/drm/bridge/aux-hpd-bridge.c
+++ b/drivers/gpu/drm/bridge/aux-hpd-bridge.c
@@ -25,20 +25,18 @@ static void drm_aux_hpd_bridge_release(struct device *dev)
         ida_free(&drm_aux_hpd_bridge_ida, adev->id);
  
         of_node_put(adev->dev.platform_data);
+       of_node_put(adev->dev.of_node);
  
         kfree(adev);
  }
  
-static void drm_aux_hpd_bridge_unregister_adev(void *_adev)
+static void drm_aux_hpd_bridge_free_adev(void *_adev)
  {
-       struct auxiliary_device *adev = _adev;
-
-       auxiliary_device_delete(adev);
-       auxiliary_device_uninit(adev);
+       auxiliary_device_uninit(_adev);
  }
  
  /**
- * drm_dp_hpd_bridge_register - Create a simple HPD DisplayPort bridge
+ * devm_drm_dp_hpd_bridge_alloc - allocate a HPD DisplayPort bridge
   * @parent: device instance providing this bridge
   * @np: device node pointer corresponding to this bridge instance
   *
@@ -46,11 +44,9 @@ static void drm_aux_hpd_bridge_unregister_adev(void *_adev)
   * DRM_MODE_CONNECTOR_DisplayPort, which terminates the bridge chain and is
   * able to send the HPD events.
   *
- * Return: device instance that will handle created bridge or an error code
- * encoded into the pointer.
+ * Return: bridge auxiliary device pointer or an error pointer
   */
-struct device *drm_dp_hpd_bridge_register(struct device *parent,
-                                         struct device_node *np)
+struct auxiliary_device *devm_drm_dp_hpd_bridge_alloc(struct device *parent, struct device_node *np)
  {
         struct auxiliary_device *adev;
         int ret;
@@ -74,18 +70,62 @@ struct device *drm_dp_hpd_bridge_register(struct device *parent,
  
         ret = auxiliary_device_init(adev);
         if (ret) {
+               of_node_put(adev->dev.platform_data);
+               of_node_put(adev->dev.of_node);
                 ida_free(&drm_aux_hpd_bridge_ida, adev->id);
                 kfree(adev);
                 return ERR_PTR(ret);
         }
  
-       ret = auxiliary_device_add(adev);
-       if (ret) {
-               auxiliary_device_uninit(adev);
+       ret = devm_add_action_or_reset(parent, drm_aux_hpd_bridge_free_adev, adev);
+       if (ret)
                 return ERR_PTR(ret);
-       }
  
-       ret = devm_add_action_or_reset(parent, drm_aux_hpd_bridge_unregister_adev, adev);
+       return adev;
+}
+EXPORT_SYMBOL_GPL(devm_drm_dp_hpd_bridge_alloc);
+
+static void drm_aux_hpd_bridge_del_adev(void *_adev)
+{
+       auxiliary_device_delete(_adev);
+}
+
+/**
+ * devm_drm_dp_hpd_bridge_add - register a HDP DisplayPort bridge
+ * @dev: struct device to tie registration lifetime to
+ * @adev: bridge auxiliary device to be registered
+ *
+ * Returns: zero on success or a negative errno
+ */
+int devm_drm_dp_hpd_bridge_add(struct device *dev, struct auxiliary_device *adev)
+{
+       int ret;
+
+       ret = auxiliary_device_add(adev);
+       if (ret)
+               return ret;
+
+       return devm_add_action_or_reset(dev, drm_aux_hpd_bridge_del_adev, adev);
+}
+EXPORT_SYMBOL_GPL(devm_drm_dp_hpd_bridge_add);
+
+/**
+ * drm_dp_hpd_bridge_register - allocate and register a HDP DisplayPort bridge
+ * @parent: device instance providing this bridge
+ * @np: device node pointer corresponding to this bridge instance
+ *
+ * Return: device instance that will handle created bridge or an error pointer
+ */
+struct device *drm_dp_hpd_bridge_register(struct device *parent, struct device_node *np)
+{
+       struct auxiliary_device *adev;
+       int ret;
+
+       adev = devm_drm_dp_hpd_bridge_alloc(parent, np);
+       if (IS_ERR(adev))
+               return ERR_CAST(adev);
+
+       ret = devm_drm_dp_hpd_bridge_add(parent, adev);
         if (ret)
                 return ERR_PTR(ret);
  
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c

index f57e6d74fb0e039a710b9bd8161a8e8e25d5888b..5ebdd6f8f36e6bc8d67e99a54bac3856d45ac9eb 100644 (file)
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -332,6 +332,7 @@ alloc_range_bias(struct drm_buddy *mm,
                  u64 start, u64 end,
                  unsigned int order)
  {
+       u64 req_size = mm->chunk_size << order;
         struct drm_buddy_block *block;
         struct drm_buddy_block *buddy;
         LIST_HEAD(dfs);
@@ -367,6 +368,15 @@ alloc_range_bias(struct drm_buddy *mm,
                 if (drm_buddy_block_is_allocated(block))
                         continue;
  
+               if (block_start < start || block_end > end) {
+                       u64 adjusted_start = max(block_start, start);
+                       u64 adjusted_end = min(block_end, end);
+
+                       if (round_down(adjusted_end + 1, req_size) <=
+                           round_up(adjusted_start, req_size))
+                               continue;
+               }
+
                 if (contains(start, end, block_start, block_end) &&
                     order == drm_buddy_block_order(block)) {
                         /*
@@ -538,7 +548,13 @@ static int __alloc_range(struct drm_buddy *mm,
                 list_add(&block->left->tmp_link, dfs);
         } while (1);
  
+       if (total_allocated < size) {
+               err = -ENOSPC;
+               goto err_free;
+       }
+
         list_splice_tail(&allocated, blocks);
+
         return 0;
  
  err_undo:
@@ -755,8 +771,12 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
                 return -EINVAL;
  
         /* Actual range allocation */
-       if (start + size == end)
+       if (start + size == end) {
+               if (!IS_ALIGNED(start | end, min_block_size))
+                       return -EINVAL;
+
                 return __drm_buddy_alloc_range(mm, start, size, NULL, blocks);
+       }
  
         original_size = size;
         original_min_size = min_block_size;
diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c

index cb90e70d85e862a495f2e8691813161a93b7a030..65f9f66933bba2785fc3b64f7040e676c2afd352 100644 (file)
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -904,6 +904,7 @@ out:
         connector_set = NULL;
         fb = NULL;
         mode = NULL;
+       num_connectors = 0;
  
         DRM_MODESET_LOCK_ALL_END(dev, ctx, ret);
  
diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c

index 834a5e28abbe5959cc6da2933904c4feb4c7b00d..7352bde299d54767fecb34232cb5941a01d6ea88 100644 (file)
--- a/drivers/gpu/drm/drm_prime.c
+++ b/drivers/gpu/drm/drm_prime.c
@@ -820,7 +820,7 @@ struct sg_table *drm_prime_pages_to_sg(struct drm_device *dev,
         if (max_segment == 0)
                 max_segment = UINT_MAX;
         err = sg_alloc_table_from_pages_segment(sg, pages, nr_pages, 0,
-                                               nr_pages << PAGE_SHIFT,
+                                               (unsigned long)nr_pages << PAGE_SHIFT,
                                                 max_segment, GFP_KERNEL);
         if (err) {
                 kfree(sg);
diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c

index 84101baeecc6e67d562e15e7d5ded57df39ffdc1..a6c19de462928ed70da033a60b10f08061bd1dc8 100644 (file)
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -1040,7 +1040,8 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
         uint64_t *points;
         uint32_t signaled_count, i;
  
-       if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT)
+       if (flags & (DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT |
+                    DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE))
                 lockdep_assert_none_held_once();
  
         points = kmalloc_array(count, sizeof(*points), GFP_KERNEL);
@@ -1109,7 +1110,8 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
          * fallthough and try a 0 timeout wait!
          */
  
-       if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) {
+       if (flags & (DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT |
+                    DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE)) {
                 for (i = 0; i < count; ++i)
                         drm_syncobj_fence_add_wait(syncobjs[i], &entries[i]);
         }
@@ -1416,10 +1418,21 @@ syncobj_eventfd_entry_func(struct drm_syncobj *syncobj,
  
         /* This happens inside the syncobj lock */
         fence = dma_fence_get(rcu_dereference_protected(syncobj->fence, 1));
+       if (!fence)
+               return;
+
         ret = dma_fence_chain_find_seqno(&fence, entry->point);
-       if (ret != 0 || !fence) {
+       if (ret != 0) {
+               /* The given seqno has not been submitted yet. */
                 dma_fence_put(fence);
                 return;
+       } else if (!fence) {
+               /* If dma_fence_chain_find_seqno returns 0 but sets the fence
+                * to NULL, it implies that the given seqno is signaled and a
+                * later seqno has already been submitted. Assign a stub fence
+                * so that the eventfd still gets signaled below.
+                */
+               fence = dma_fence_get_stub();
         }
  
         list_del_init(&entry->node);
diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig

index b5d6e3352071f5c3765f074200180bd6dfc7685c..3089029abba481828522070dc0063eaa79251bf9 100644 (file)
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig
@@ -140,7 +140,7 @@ config DRM_I915_GVT_KVMGT
  
           Note that this driver only supports newer device from Broadwell on.
           For further information and setup guide, you can visit:
-         http://01.org/igvt-g.
+         https://github.com/intel/gvt-linux/wiki.
  
           If in doubt, say "N".
  
diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c

index f5ef95da55346ff14cc6b102c27e78e8960cec65..ae647d03af25cd48a3151b362c5ed1e752bdba60 100644 (file)
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -2355,6 +2355,9 @@ intel_dp_compute_config_limits(struct intel_dp *intel_dp,
         limits->min_rate = intel_dp_common_rate(intel_dp, 0);
         limits->max_rate = intel_dp_max_link_rate(intel_dp);
  
+       /* FIXME 128b/132b SST support missing */
+       limits->max_rate = min(limits->max_rate, 810000);
+
         limits->min_lane_count = 1;
         limits->max_lane_count = intel_dp_max_lane_count(intel_dp);
  
diff --git a/drivers/gpu/drm/i915/display/intel_sdvo.c b/drivers/gpu/drm/i915/display/intel_sdvo.c

index acc6b6804105102389dc26c3fefce80444d0adad..2915d7afe5ccc2facdaeaee164e7b9c60796f361 100644 (file)
--- a/drivers/gpu/drm/i915/display/intel_sdvo.c
+++ b/drivers/gpu/drm/i915/display/intel_sdvo.c
@@ -1209,7 +1209,7 @@ static bool intel_sdvo_set_tv_format(struct intel_sdvo *intel_sdvo,
         struct intel_sdvo_tv_format format;
         u32 format_map;
  
-       format_map = 1 << conn_state->tv.mode;
+       format_map = 1 << conn_state->tv.legacy_mode;
         memset(&format, 0, sizeof(format));
         memcpy(&format, &format_map, min(sizeof(format), sizeof(format_map)));
  
@@ -2298,7 +2298,7 @@ static int intel_sdvo_get_tv_modes(struct drm_connector *connector)
          * Read the list of supported input resolutions for the selected TV
          * format.
          */
-       format_map = 1 << conn_state->tv.mode;
+       format_map = 1 << conn_state->tv.legacy_mode;
         memcpy(&tv_res, &format_map,
                min(sizeof(format_map), sizeof(struct intel_sdvo_sdtv_resolution_request)));
  
@@ -2363,7 +2363,7 @@ intel_sdvo_connector_atomic_get_property(struct drm_connector *connector,
                 int i;
  
                 for (i = 0; i < intel_sdvo_connector->format_supported_num; i++)
-                       if (state->tv.mode == intel_sdvo_connector->tv_format_supported[i]) {
+                       if (state->tv.legacy_mode == intel_sdvo_connector->tv_format_supported[i]) {
                                 *val = i;
  
                                 return 0;
@@ -2419,7 +2419,7 @@ intel_sdvo_connector_atomic_set_property(struct drm_connector *connector,
         struct intel_sdvo_connector_state *sdvo_state = to_intel_sdvo_connector_state(state);
  
         if (property == intel_sdvo_connector->tv_format) {
-               state->tv.mode = intel_sdvo_connector->tv_format_supported[val];
+               state->tv.legacy_mode = intel_sdvo_connector->tv_format_supported[val];
  
                 if (state->crtc) {
                         struct drm_crtc_state *crtc_state =
@@ -3076,7 +3076,7 @@ static bool intel_sdvo_tv_create_property(struct intel_sdvo *intel_sdvo,
                 drm_property_add_enum(intel_sdvo_connector->tv_format, i,
                                       tv_format_names[intel_sdvo_connector->tv_format_supported[i]]);
  
-       intel_sdvo_connector->base.base.state->tv.mode = intel_sdvo_connector->tv_format_supported[0];
+       intel_sdvo_connector->base.base.state->tv.legacy_mode = intel_sdvo_connector->tv_format_supported[0];
         drm_object_attach_property(&intel_sdvo_connector->base.base.base,
                                    intel_sdvo_connector->tv_format, 0);
         return true;
diff --git a/drivers/gpu/drm/i915/display/intel_tv.c b/drivers/gpu/drm/i915/display/intel_tv.c

index d4386cb3569e0991bc3c0c78a4415d77a7bc1998..992a725de751a2d1925c23da8763e5ea7dce4714 100644 (file)
--- a/drivers/gpu/drm/i915/display/intel_tv.c
+++ b/drivers/gpu/drm/i915/display/intel_tv.c
@@ -949,7 +949,7 @@ intel_disable_tv(struct intel_atomic_state *state,
  
  static const struct tv_mode *intel_tv_mode_find(const struct drm_connector_state *conn_state)
  {
-       int format = conn_state->tv.mode;
+       int format = conn_state->tv.legacy_mode;
  
         return &tv_modes[format];
  }
@@ -1704,7 +1704,7 @@ static void intel_tv_find_better_format(struct drm_connector *connector)
                         break;
         }
  
-       connector->state->tv.mode = i;
+       connector->state->tv.legacy_mode = i;
  }
  
  static int
@@ -1859,7 +1859,7 @@ static int intel_tv_atomic_check(struct drm_connector *connector,
         old_state = drm_atomic_get_old_connector_state(state, connector);
         new_crtc_state = drm_atomic_get_new_crtc_state(state, new_state->crtc);
  
-       if (old_state->tv.mode != new_state->tv.mode ||
+       if (old_state->tv.legacy_mode != new_state->tv.legacy_mode ||
             old_state->tv.margins.left != new_state->tv.margins.left ||
             old_state->tv.margins.right != new_state->tv.margins.right ||
             old_state->tv.margins.top != new_state->tv.margins.top ||
@@ -1896,7 +1896,7 @@ static void intel_tv_add_properties(struct drm_connector *connector)
         conn_state->tv.margins.right = 46;
         conn_state->tv.margins.bottom = 37;
  
-       conn_state->tv.mode = 0;
+       conn_state->tv.legacy_mode = 0;
  
         /* Create TV properties then attach current values */
         for (i = 0; i < ARRAY_SIZE(tv_modes); i++) {
@@ -1910,7 +1910,7 @@ static void intel_tv_add_properties(struct drm_connector *connector)
  
         drm_object_attach_property(&connector->base,
                                    i915->drm.mode_config.legacy_tv_mode_property,
-                                  conn_state->tv.mode);
+                                  conn_state->tv.legacy_mode);
         drm_object_attach_property(&connector->base,
                                    i915->drm.mode_config.tv_left_margin_property,
                                    conn_state->tv.margins.left);
diff --git a/drivers/gpu/drm/i915/display/intel_vdsc_regs.h b/drivers/gpu/drm/i915/display/intel_vdsc_regs.h

index 64f440fdc22b2c832a77ca7ca73cf83ecc5ba625..8b21dc8e26d525f514f74f2a647ea703b2cd2d9a 100644 (file)
--- a/drivers/gpu/drm/i915/display/intel_vdsc_regs.h
+++ b/drivers/gpu/drm/i915/display/intel_vdsc_regs.h
@@ -51,8 +51,8 @@
  #define DSCC_PICTURE_PARAMETER_SET_0           _MMIO(0x6BA00)
  #define _DSCA_PPS_0                            0x6B200
  #define _DSCC_PPS_0                            0x6BA00
-#define DSCA_PPS(pps)                          _MMIO(_DSCA_PPS_0 + (pps) * 4)
-#define DSCC_PPS(pps)                          _MMIO(_DSCC_PPS_0 + (pps) * 4)
+#define DSCA_PPS(pps)                          _MMIO(_DSCA_PPS_0 + ((pps) < 12 ? (pps) : (pps) + 12) * 4)
+#define DSCC_PPS(pps)                          _MMIO(_DSCC_PPS_0 + ((pps) < 12 ? (pps) : (pps) + 12) * 4)
  #define _ICL_DSC0_PICTURE_PARAMETER_SET_0_PB   0x78270
  #define _ICL_DSC1_PICTURE_PARAMETER_SET_0_PB   0x78370
  #define _ICL_DSC0_PICTURE_PARAMETER_SET_0_PC   0x78470
diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c

index 90f6c1ece57d4478a30375df1725624b47449298..efcb00472be24779590fcce94753ab83a787f2c4 100644 (file)
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -2849,8 +2849,7 @@ static int handle_mmio(struct intel_gvt_mmio_table_iter *iter, u32 offset,
         for (i = start; i < end; i += 4) {
                 p = intel_gvt_find_mmio_info(gvt, i);
                 if (p) {
-                       WARN(1, "dup mmio definition offset %x\n",
-                               info->offset);
+                       WARN(1, "dup mmio definition offset %x\n", i);
  
                         /* We return -EEXIST here to make GVT-g load fail.
                          * So duplicated MMIO can be found as soon as
diff --git a/drivers/gpu/drm/i915/intel_gvt.c b/drivers/gpu/drm/i915/intel_gvt.c

index e98b6d69a91ab70c224d67ad337926c1b69936b3..9b6d87c8b5831c14aec9bbc425a374f0721f0273 100644 (file)
--- a/drivers/gpu/drm/i915/intel_gvt.c
+++ b/drivers/gpu/drm/i915/intel_gvt.c
@@ -41,7 +41,7 @@
   * To virtualize GPU resources GVT-g driver depends on hypervisor technology
   * e.g KVM/VFIO/mdev, Xen, etc. to provide resource access trapping capability
   * and be virtualized within GVT-g device module. More architectural design
- * doc is available on https://01.org/group/2230/documentation-list.
+ * doc is available on https://github.com/intel/gvt-linux/wiki.
   */
  
  static LIST_HEAD(intel_gvt_devices);
diff --git a/drivers/gpu/drm/meson/meson_encoder_cvbs.c b/drivers/gpu/drm/meson/meson_encoder_cvbs.c

index 3f73b211fa8e3e3bc4812180883c9685f8377f19..3407450435e2057dd3973441ba6e31485e69ee6d 100644 (file)
--- a/drivers/gpu/drm/meson/meson_encoder_cvbs.c
+++ b/drivers/gpu/drm/meson/meson_encoder_cvbs.c
@@ -294,6 +294,5 @@ void meson_encoder_cvbs_remove(struct meson_drm *priv)
         if (priv->encoders[MESON_ENC_CVBS]) {
                 meson_encoder_cvbs = priv->encoders[MESON_ENC_CVBS];
                 drm_bridge_remove(&meson_encoder_cvbs->bridge);
-               drm_bridge_remove(meson_encoder_cvbs->next_bridge);
         }
  }
diff --git a/drivers/gpu/drm/meson/meson_encoder_dsi.c b/drivers/gpu/drm/meson/meson_encoder_dsi.c

index 3f93c70488cad1829bbe488d8bf8f7b3833859f1..311b91630fbe536cf724223a1fa71e565ba2c778 100644 (file)
--- a/drivers/gpu/drm/meson/meson_encoder_dsi.c
+++ b/drivers/gpu/drm/meson/meson_encoder_dsi.c
@@ -168,6 +168,5 @@ void meson_encoder_dsi_remove(struct meson_drm *priv)
         if (priv->encoders[MESON_ENC_DSI]) {
                 meson_encoder_dsi = priv->encoders[MESON_ENC_DSI];
                 drm_bridge_remove(&meson_encoder_dsi->bridge);
-               drm_bridge_remove(meson_encoder_dsi->next_bridge);
         }
  }
diff --git a/drivers/gpu/drm/meson/meson_encoder_hdmi.c b/drivers/gpu/drm/meson/meson_encoder_hdmi.c

index 25ea765586908f14d08715f45ca9def85a6a07f3..c4686568c9ca5d81b4066315681263e0fbd848a2 100644 (file)
--- a/drivers/gpu/drm/meson/meson_encoder_hdmi.c
+++ b/drivers/gpu/drm/meson/meson_encoder_hdmi.c
@@ -474,6 +474,5 @@ void meson_encoder_hdmi_remove(struct meson_drm *priv)
         if (priv->encoders[MESON_ENC_HDMI]) {
                 meson_encoder_hdmi = priv->encoders[MESON_ENC_HDMI];
                 drm_bridge_remove(&meson_encoder_hdmi->bridge);
-               drm_bridge_remove(meson_encoder_hdmi->next_bridge);
         }
  }
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c

index c0bc924cd3025dc21939e2e75548f273a90fd620..c9c55e2ea584927ce7b3f8ffc50e7ed807f6671a 100644 (file)
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -1287,7 +1287,7 @@ static void a6xx_calc_ubwc_config(struct adreno_gpu *gpu)
         gpu->ubwc_config.highest_bank_bit = 15;
  
         if (adreno_is_a610(gpu)) {
-               gpu->ubwc_config.highest_bank_bit = 14;
+               gpu->ubwc_config.highest_bank_bit = 13;
                 gpu->ubwc_config.min_acc_len = 1;
                 gpu->ubwc_config.ubwc_mode = 1;
         }
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c

index 83380bc92a00a964479a0cbbb8dbc7a9dcd675ca..6a4b489d44e5173831d73956f1fb7a4e10809052 100644 (file)
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
@@ -144,10 +144,6 @@ enum dpu_enc_rc_states {
   *                     to track crtc in the disable() hook which is called
   *                     _after_ encoder_mask is cleared.
   * @connector:         If a mode is set, cached pointer to the active connector
- * @crtc_kickoff_cb:           Callback into CRTC that will flush & start
- *                             all CTL paths
- * @crtc_kickoff_cb_data:      Opaque user data given to crtc_kickoff_cb
- * @debugfs_root:              Debug file system root file node
   * @enc_lock:                  Lock around physical encoder
   *                             create/destroy/enable/disable
   * @frame_busy_mask:           Bitmask tracking which phys_enc we are still
@@ -2072,7 +2068,7 @@ void dpu_encoder_helper_phys_cleanup(struct dpu_encoder_phys *phys_enc)
         }
  
         /* reset the merge 3D HW block */
-       if (phys_enc->hw_pp->merge_3d) {
+       if (phys_enc->hw_pp && phys_enc->hw_pp->merge_3d) {
                 phys_enc->hw_pp->merge_3d->ops.setup_3d_mode(phys_enc->hw_pp->merge_3d,
                                 BLEND_3D_NONE);
                 if (phys_enc->hw_ctl->ops.update_pending_flush_merge_3d)
@@ -2103,7 +2099,7 @@ void dpu_encoder_helper_phys_cleanup(struct dpu_encoder_phys *phys_enc)
         if (phys_enc->hw_wb)
                 intf_cfg.wb = phys_enc->hw_wb->idx;
  
-       if (phys_enc->hw_pp->merge_3d)
+       if (phys_enc->hw_pp && phys_enc->hw_pp->merge_3d)
                 intf_cfg.merge_3d = phys_enc->hw_pp->merge_3d->idx;
  
         if (ctl->ops.reset_intf_cfg)
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c

index b58a9c2ae326cab6c4799a88fe86acbab1c236f8..724537ab776dfde95c6406cf0aef1b795874b171 100644 (file)
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c
@@ -29,7 +29,6 @@ static inline bool reserved_by_other(uint32_t *res_map, int idx,
  /**
   * struct dpu_rm_requirements - Reservation requirements parameter bundle
   * @topology:  selected topology for the display
- * @hw_res:       Hardware resources required as reported by the encoders
   */
  struct dpu_rm_requirements {
         struct msm_display_topology topology;
@@ -204,6 +203,8 @@ static bool _dpu_rm_needs_split_display(const struct msm_display_topology *top)
   * _dpu_rm_get_lm_peer - get the id of a mixer which is a peer of the primary
   * @rm: dpu resource manager handle
   * @primary_idx: index of primary mixer in rm->mixer_blks[]
+ *
+ * Returns: lm peer mixed id on success or %-EINVAL on error
   */
  static int _dpu_rm_get_lm_peer(struct dpu_rm *rm, int primary_idx)
  {
diff --git a/drivers/gpu/drm/msm/dp/dp_ctrl.c b/drivers/gpu/drm/msm/dp/dp_ctrl.c

index 77a8d9366ed7b01d46a01cf602e74eafb15d4937..fb588fde298a2de231ea5fdd8f639da156d47030 100644 (file)
--- a/drivers/gpu/drm/msm/dp/dp_ctrl.c
+++ b/drivers/gpu/drm/msm/dp/dp_ctrl.c
@@ -135,11 +135,6 @@ static void dp_ctrl_config_ctrl(struct dp_ctrl_private *ctrl)
         tbd = dp_link_get_test_bits_depth(ctrl->link,
                         ctrl->panel->dp_mode.bpp);
  
-       if (tbd == DP_TEST_BIT_DEPTH_UNKNOWN) {
-               pr_debug("BIT_DEPTH not set. Configure default\n");
-               tbd = DP_TEST_BIT_DEPTH_8;
-       }
-
         config |= tbd << DP_CONFIGURATION_CTRL_BPC_SHIFT;
  
         /* Num of Lanes */
diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c

index d37d599aec273b41b7ec54eb04b55ee86770d1a5..4c72124ffb5d495bdd24eefaf086f4d9401663ce 100644 (file)
--- a/drivers/gpu/drm/msm/dp/dp_display.c
+++ b/drivers/gpu/drm/msm/dp/dp_display.c
@@ -329,10 +329,26 @@ static const struct component_ops dp_display_comp_ops = {
         .unbind = dp_display_unbind,
  };
  
+static void dp_display_send_hpd_event(struct msm_dp *dp_display)
+{
+       struct dp_display_private *dp;
+       struct drm_connector *connector;
+
+       dp = container_of(dp_display, struct dp_display_private, dp_display);
+
+       connector = dp->dp_display.connector;
+       drm_helper_hpd_irq_event(connector->dev);
+}
+
  static int dp_display_send_hpd_notification(struct dp_display_private *dp,
                                             bool hpd)
  {
-       struct drm_bridge *bridge = dp->dp_display.bridge;
+       if ((hpd && dp->dp_display.link_ready) ||
+                       (!hpd && !dp->dp_display.link_ready)) {
+               drm_dbg_dp(dp->drm_dev, "HPD already %s\n",
+                               (hpd ? "on" : "off"));
+               return 0;
+       }
  
         /* reset video pattern flag on disconnect */
         if (!hpd) {
@@ -348,7 +364,7 @@ static int dp_display_send_hpd_notification(struct dp_display_private *dp,
  
         drm_dbg_dp(dp->drm_dev, "type=%d hpd=%d\n",
                         dp->dp_display.connector_type, hpd);
-       drm_bridge_hpd_notify(bridge, dp->dp_display.link_ready);
+       dp_display_send_hpd_event(&dp->dp_display);
  
         return 0;
  }
diff --git a/drivers/gpu/drm/msm/dp/dp_link.c b/drivers/gpu/drm/msm/dp/dp_link.c

index 98427d45e9a7e3ac99a47871bbd1e0e893b2bc24..49dfac1fd1ef2158626f4a417b22e810414b76f9 100644 (file)
--- a/drivers/gpu/drm/msm/dp/dp_link.c
+++ b/drivers/gpu/drm/msm/dp/dp_link.c
@@ -7,6 +7,7 @@
  
  #include <drm/drm_print.h>
  
+#include "dp_reg.h"
  #include "dp_link.h"
  #include "dp_panel.h"
  
@@ -1082,7 +1083,7 @@ int dp_link_process_request(struct dp_link *dp_link)
  
  int dp_link_get_colorimetry_config(struct dp_link *dp_link)
  {
-       u32 cc;
+       u32 cc = DP_MISC0_COLORIMERY_CFG_LEGACY_RGB;
         struct dp_link_private *link;
  
         if (!dp_link) {
@@ -1096,10 +1097,11 @@ int dp_link_get_colorimetry_config(struct dp_link *dp_link)
          * Unless a video pattern CTS test is ongoing, use RGB_VESA
          * Only RGB_VESA and RGB_CEA supported for now
          */
-       if (dp_link_is_video_pattern_requested(link))
-               cc = link->dp_link.test_video.test_dyn_range;
-       else
-               cc = DP_TEST_DYNAMIC_RANGE_VESA;
+       if (dp_link_is_video_pattern_requested(link)) {
+               if (link->dp_link.test_video.test_dyn_range &
+                                       DP_TEST_DYNAMIC_RANGE_CEA)
+                       cc = DP_MISC0_COLORIMERY_CFG_CEA_RGB;
+       }
  
         return cc;
  }
@@ -1179,6 +1181,9 @@ void dp_link_reset_phy_params_vx_px(struct dp_link *dp_link)
  u32 dp_link_get_test_bits_depth(struct dp_link *dp_link, u32 bpp)
  {
         u32 tbd;
+       struct dp_link_private *link;
+
+       link = container_of(dp_link, struct dp_link_private, dp_link);
  
         /*
          * Few simplistic rules and assumptions made here:
@@ -1196,12 +1201,13 @@ u32 dp_link_get_test_bits_depth(struct dp_link *dp_link, u32 bpp)
                 tbd = DP_TEST_BIT_DEPTH_10;
                 break;
         default:
-               tbd = DP_TEST_BIT_DEPTH_UNKNOWN;
+               drm_dbg_dp(link->drm_dev, "bpp=%d not supported, use bpc=8\n",
+                          bpp);
+               tbd = DP_TEST_BIT_DEPTH_8;
                 break;
         }
  
-       if (tbd != DP_TEST_BIT_DEPTH_UNKNOWN)
-               tbd = (tbd >> DP_TEST_BIT_DEPTH_SHIFT);
+       tbd = (tbd >> DP_TEST_BIT_DEPTH_SHIFT);
  
         return tbd;
  }
diff --git a/drivers/gpu/drm/msm/dp/dp_reg.h b/drivers/gpu/drm/msm/dp/dp_reg.h

index ea85a691e72b5ce505822e4fce21f0cbcf0c4319..78785ed4b40c490d83396825d62a65af2fd6c9df 100644 (file)
--- a/drivers/gpu/drm/msm/dp/dp_reg.h
+++ b/drivers/gpu/drm/msm/dp/dp_reg.h
@@ -143,6 +143,9 @@
  #define DP_MISC0_COLORIMETRY_CFG_SHIFT         (0x00000001)
  #define DP_MISC0_TEST_BITS_DEPTH_SHIFT         (0x00000005)
  
+#define DP_MISC0_COLORIMERY_CFG_LEGACY_RGB     (0)
+#define DP_MISC0_COLORIMERY_CFG_CEA_RGB                (0x04)
+
  #define REG_DP_VALID_BOUNDARY                  (0x00000030)
  #define REG_DP_VALID_BOUNDARY_2                        (0x00000034)
  
diff --git a/drivers/gpu/drm/msm/msm_gem_prime.c b/drivers/gpu/drm/msm/msm_gem_prime.c

index 5f68e31a3e4e1cbeed95bfde138711c0fc9c9759..0915f3b68752e34702ae8864249d049e3b277ee2 100644 (file)
--- a/drivers/gpu/drm/msm/msm_gem_prime.c
+++ b/drivers/gpu/drm/msm/msm_gem_prime.c
@@ -26,7 +26,7 @@ int msm_gem_prime_vmap(struct drm_gem_object *obj, struct iosys_map *map)
  {
         void *vaddr;
  
-       vaddr = msm_gem_get_vaddr(obj);
+       vaddr = msm_gem_get_vaddr_locked(obj);
         if (IS_ERR(vaddr))
                 return PTR_ERR(vaddr);
         iosys_map_set_vaddr(map, vaddr);
@@ -36,7 +36,7 @@ int msm_gem_prime_vmap(struct drm_gem_object *obj, struct iosys_map *map)
  
  void msm_gem_prime_vunmap(struct drm_gem_object *obj, struct iosys_map *map)
  {
-       msm_gem_put_vaddr(obj);
+       msm_gem_put_vaddr_locked(obj);
  }
  
  struct drm_gem_object *msm_gem_prime_import_sg_table(struct drm_device *dev,
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c

index 095390774f22b547668227ed492a6e9783b055f9..655002b21b0d5dc345283a7699d14b0e88b3e472 100644 (file)
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -751,12 +751,14 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
         struct msm_ringbuffer *ring = submit->ring;
         unsigned long flags;
  
-       pm_runtime_get_sync(&gpu->pdev->dev);
+       WARN_ON(!mutex_is_locked(&gpu->lock));
  
-       mutex_lock(&gpu->lock);
+       pm_runtime_get_sync(&gpu->pdev->dev);
  
         msm_gpu_hw_init(gpu);
  
+       submit->seqno = submit->hw_fence->seqno;
+
         update_sw_cntrs(gpu);
  
         /*
@@ -781,11 +783,8 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
         gpu->funcs->submit(gpu, submit);
         gpu->cur_ctx_seqno = submit->queue->ctx->seqno;
  
-       hangcheck_timer_reset(gpu);
-
-       mutex_unlock(&gpu->lock);
-
         pm_runtime_put(&gpu->pdev->dev);
+       hangcheck_timer_reset(gpu);
  }
  
  /*
diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c

index 5cc8d358cc9759307a444cd62bce83c62b3dcdb7..d5512037c38bcd7ca807aaf281dceaebdd688a4e 100644 (file)
--- a/drivers/gpu/drm/msm/msm_iommu.c
+++ b/drivers/gpu/drm/msm/msm_iommu.c
@@ -21,6 +21,8 @@ struct msm_iommu_pagetable {
         struct msm_mmu base;
         struct msm_mmu *parent;
         struct io_pgtable_ops *pgtbl_ops;
+       const struct iommu_flush_ops *tlb;
+       struct device *iommu_dev;
         unsigned long pgsize_bitmap;    /* Bitmap of page sizes in use */
         phys_addr_t ttbr;
         u32 asid;
@@ -201,11 +203,33 @@ static const struct msm_mmu_funcs pagetable_funcs = {
  
  static void msm_iommu_tlb_flush_all(void *cookie)
  {
+       struct msm_iommu_pagetable *pagetable = cookie;
+       struct adreno_smmu_priv *adreno_smmu;
+
+       if (!pm_runtime_get_if_in_use(pagetable->iommu_dev))
+               return;
+
+       adreno_smmu = dev_get_drvdata(pagetable->parent->dev);
+
+       pagetable->tlb->tlb_flush_all((void *)adreno_smmu->cookie);
+
+       pm_runtime_put_autosuspend(pagetable->iommu_dev);
  }
  
  static void msm_iommu_tlb_flush_walk(unsigned long iova, size_t size,
                 size_t granule, void *cookie)
  {
+       struct msm_iommu_pagetable *pagetable = cookie;
+       struct adreno_smmu_priv *adreno_smmu;
+
+       if (!pm_runtime_get_if_in_use(pagetable->iommu_dev))
+               return;
+
+       adreno_smmu = dev_get_drvdata(pagetable->parent->dev);
+
+       pagetable->tlb->tlb_flush_walk(iova, size, granule, (void *)adreno_smmu->cookie);
+
+       pm_runtime_put_autosuspend(pagetable->iommu_dev);
  }
  
  static void msm_iommu_tlb_add_page(struct iommu_iotlb_gather *gather,
@@ -213,7 +237,7 @@ static void msm_iommu_tlb_add_page(struct iommu_iotlb_gather *gather,
  {
  }
  
-static const struct iommu_flush_ops null_tlb_ops = {
+static const struct iommu_flush_ops tlb_ops = {
         .tlb_flush_all = msm_iommu_tlb_flush_all,
         .tlb_flush_walk = msm_iommu_tlb_flush_walk,
         .tlb_add_page = msm_iommu_tlb_add_page,
@@ -254,10 +278,10 @@ struct msm_mmu *msm_iommu_pagetable_create(struct msm_mmu *parent)
  
         /* The incoming cfg will have the TTBR1 quirk enabled */
         ttbr0_cfg.quirks &= ~IO_PGTABLE_QUIRK_ARM_TTBR1;
-       ttbr0_cfg.tlb = &null_tlb_ops;
+       ttbr0_cfg.tlb = &tlb_ops;
  
         pagetable->pgtbl_ops = alloc_io_pgtable_ops(ARM_64_LPAE_S1,
-               &ttbr0_cfg, iommu->domain);
+               &ttbr0_cfg, pagetable);
  
         if (!pagetable->pgtbl_ops) {
                 kfree(pagetable);
@@ -279,6 +303,8 @@ struct msm_mmu *msm_iommu_pagetable_create(struct msm_mmu *parent)
  
         /* Needed later for TLB flush */
         pagetable->parent = parent;
+       pagetable->tlb = ttbr1_cfg->tlb;
+       pagetable->iommu_dev = ttbr1_cfg->iommu_dev;
         pagetable->pgsize_bitmap = ttbr0_cfg.pgsize_bitmap;
         pagetable->ttbr = ttbr0_cfg.arm_lpae_s1_cfg.ttbr;
  
diff --git a/drivers/gpu/drm/msm/msm_mdss.c b/drivers/gpu/drm/msm/msm_mdss.c

index 455b2e3a0cdd4811c67fda8efccd9dd3dcf77a16..35423d10aafa90b98bb6f92c3da405940c8938ea 100644 (file)
--- a/drivers/gpu/drm/msm/msm_mdss.c
+++ b/drivers/gpu/drm/msm/msm_mdss.c
@@ -562,6 +562,7 @@ static const struct msm_mdss_data sdm670_data = {
         .ubwc_enc_version = UBWC_2_0,
         .ubwc_dec_version = UBWC_2_0,
         .highest_bank_bit = 1,
+       .reg_bus_bw = 76800,
  };
  
  static const struct msm_mdss_data sdm845_data = {
diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c

index 4bc13f7d005ab7c643f78206d8d41d72cd779045..9d6655f96f0cebcc0c03e5b9bef6900c299f2f0d 100644 (file)
--- a/drivers/gpu/drm/msm/msm_ringbuffer.c
+++ b/drivers/gpu/drm/msm/msm_ringbuffer.c
@@ -21,8 +21,6 @@ static struct dma_fence *msm_job_run(struct drm_sched_job *job)
  
         msm_fence_init(submit->hw_fence, fctx);
  
-       submit->seqno = submit->hw_fence->seqno;
-
         mutex_lock(&priv->lru.lock);
  
         for (i = 0; i < submit->nr_bos; i++) {
@@ -35,8 +33,13 @@ static struct dma_fence *msm_job_run(struct drm_sched_job *job)
  
         mutex_unlock(&priv->lru.lock);
  
+       /* TODO move submit path over to using a per-ring lock.. */
+       mutex_lock(&gpu->lock);
+
         msm_gpu_submit(gpu, submit);
  
+       mutex_unlock(&gpu->lock);
+
         return dma_fence_get(submit->hw_fence);
  }
  
diff --git a/drivers/gpu/drm/nouveau/Kconfig b/drivers/gpu/drm/nouveau/Kconfig

index 1e6aaf95ff7c79483f7d8bba1ddce897bb7affcf..ceef470c9fbfcfb08be6abd69627b7e7bc66366d 100644 (file)
--- a/drivers/gpu/drm/nouveau/Kconfig
+++ b/drivers/gpu/drm/nouveau/Kconfig
@@ -100,3 +100,11 @@ config DRM_NOUVEAU_SVM
         help
           Say Y here if you want to enable experimental support for
           Shared Virtual Memory (SVM).
+
+config DRM_NOUVEAU_GSP_DEFAULT
+       bool "Use GSP firmware for Turing/Ampere (needs firmware installed)"
+       depends on DRM_NOUVEAU
+       default n
+       help
+         Say Y here if you want to use the GSP codepaths by default on
+         Turing and Ampere GPUs.
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h

index d1437c08645f90d9c745ee77405d3fa1d8d51f9d..6f5d376d8fcc1ecb6d9faa80b4b06ba4cd1b21e4 100644 (file)
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h
@@ -9,7 +9,7 @@
  #define GSP_PAGE_SIZE  BIT(GSP_PAGE_SHIFT)
  
  struct nvkm_gsp_mem {
-       u32 size;
+       size_t size;
         void *data;
         dma_addr_t addr;
  };
diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c b/drivers/gpu/drm/nouveau/nouveau_abi16.c

index a04156ca8390ba6fea6a21e07e9eb5bba3ec7605..80f74ee0fc78677f8f890e8cc5daf8f363817b34 100644 (file)
--- a/drivers/gpu/drm/nouveau/nouveau_abi16.c
+++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c
@@ -128,12 +128,14 @@ nouveau_abi16_chan_fini(struct nouveau_abi16 *abi16,
         struct nouveau_abi16_ntfy *ntfy, *temp;
  
         /* Cancel all jobs from the entity's queue. */
-       drm_sched_entity_fini(&chan->sched.entity);
+       if (chan->sched)
+               drm_sched_entity_fini(&chan->sched->entity);
  
         if (chan->chan)
                 nouveau_channel_idle(chan->chan);
  
-       nouveau_sched_fini(&chan->sched);
+       if (chan->sched)
+               nouveau_sched_destroy(&chan->sched);
  
         /* cleanup notifier state */
         list_for_each_entry_safe(ntfy, temp, &chan->notifiers, head) {
@@ -197,6 +199,7 @@ nouveau_abi16_ioctl_getparam(ABI16_IOCTL_ARGS)
         struct nouveau_cli *cli = nouveau_cli(file_priv);
         struct nouveau_drm *drm = nouveau_drm(dev);
         struct nvif_device *device = &drm->client.device;
+       struct nvkm_device *nvkm_device = nvxx_device(&drm->client.device);
         struct nvkm_gr *gr = nvxx_gr(device);
         struct drm_nouveau_getparam *getparam = data;
         struct pci_dev *pdev = to_pci_dev(dev->dev);
@@ -261,6 +264,14 @@ nouveau_abi16_ioctl_getparam(ABI16_IOCTL_ARGS)
                 getparam->value = nouveau_exec_push_max_from_ib_max(ib_max);
                 break;
         }
+       case NOUVEAU_GETPARAM_VRAM_BAR_SIZE:
+               getparam->value = nvkm_device->func->resource_size(nvkm_device, 1);
+               break;
+       case NOUVEAU_GETPARAM_VRAM_USED: {
+               struct ttm_resource_manager *vram_mgr = ttm_manager_type(&drm->ttm.bdev, TTM_PL_VRAM);
+               getparam->value = (u64)ttm_resource_manager_usage(vram_mgr);
+               break;
+       }
         default:
                 NV_PRINTK(dbg, cli, "unknown parameter %lld\n", getparam->param);
                 return -EINVAL;
@@ -337,10 +348,16 @@ nouveau_abi16_ioctl_channel_alloc(ABI16_IOCTL_ARGS)
         if (ret)
                 goto done;
  
-       ret = nouveau_sched_init(&chan->sched, drm, drm->sched_wq,
-                                chan->chan->dma.ib_max);
-       if (ret)
-               goto done;
+       /* If we're not using the VM_BIND uAPI, we don't need a scheduler.
+        *
+        * The client lock is already acquired by nouveau_abi16_get().
+        */
+       if (nouveau_cli_uvmm(cli)) {
+               ret = nouveau_sched_create(&chan->sched, drm, drm->sched_wq,
+                                          chan->chan->dma.ib_max);
+               if (ret)
+                       goto done;
+       }
  
         init->channel = chan->chan->chid;
  
diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.h b/drivers/gpu/drm/nouveau/nouveau_abi16.h

index 1f5e243c0c759ef759dbba7d4f89279c90bce5d4..11c8c4a80079bbb2b658816dd42d05f68a5eaab6 100644 (file)
--- a/drivers/gpu/drm/nouveau/nouveau_abi16.h
+++ b/drivers/gpu/drm/nouveau/nouveau_abi16.h
@@ -26,7 +26,7 @@ struct nouveau_abi16_chan {
         struct nouveau_bo *ntfy;
         struct nouveau_vma *ntfy_vma;
         struct nvkm_mm  heap;
-       struct nouveau_sched sched;
+       struct nouveau_sched *sched;
  };
  
  struct nouveau_abi16 {
diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c

index 6f6c31a9937b2fe751c6cffe429cc21a6b47a385..a947e1d5f309ae525e8087d13899f1efd1e8e73b 100644 (file)
--- a/drivers/gpu/drm/nouveau/nouveau_drm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
@@ -201,7 +201,8 @@ nouveau_cli_fini(struct nouveau_cli *cli)
         WARN_ON(!list_empty(&cli->worker));
  
         usif_client_fini(cli);
-       nouveau_sched_fini(&cli->sched);
+       if (cli->sched)
+               nouveau_sched_destroy(&cli->sched);
         if (uvmm)
                 nouveau_uvmm_fini(uvmm);
         nouveau_vmm_fini(&cli->svm);
@@ -311,7 +312,7 @@ nouveau_cli_init(struct nouveau_drm *drm, const char *sname,
         cli->mem = &mems[ret];
  
         /* Don't pass in the (shared) sched_wq in order to let
-        * nouveau_sched_init() create a dedicated one for VM_BIND jobs.
+        * nouveau_sched_create() create a dedicated one for VM_BIND jobs.
          *
          * This is required to ensure that for VM_BIND jobs free_job() work and
          * run_job() work can always run concurrently and hence, free_job() work
@@ -320,7 +321,7 @@ nouveau_cli_init(struct nouveau_drm *drm, const char *sname,
          * locks which indirectly or directly are held for allocations
          * elsewhere.
          */
-       ret = nouveau_sched_init(&cli->sched, drm, NULL, 1);
+       ret = nouveau_sched_create(&cli->sched, drm, NULL, 1);
         if (ret)
                 goto done;
  
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h

index 8a6d94c8b1631fd7ab8bbc193f35b064057a0185..e239c6bf4afa4f75d4ca30c63583af82f2ab9621 100644 (file)
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -98,7 +98,7 @@ struct nouveau_cli {
                 bool disabled;
         } uvmm;
  
-       struct nouveau_sched sched;
+       struct nouveau_sched *sched;
  
         const struct nvif_mclass *mem;
  
diff --git a/drivers/gpu/drm/nouveau/nouveau_exec.c b/drivers/gpu/drm/nouveau/nouveau_exec.c

index bc5d71b79ab203ff7e874c612f3ea1e7c36323de..e65c0ef23bc73d59f3066ff02ae9360253b93e6d 100644 (file)
--- a/drivers/gpu/drm/nouveau/nouveau_exec.c
+++ b/drivers/gpu/drm/nouveau/nouveau_exec.c
@@ -389,7 +389,7 @@ nouveau_exec_ioctl_exec(struct drm_device *dev,
         if (ret)
                 goto out;
  
-       args.sched = &chan16->sched;
+       args.sched = chan16->sched;
         args.file_priv = file_priv;
         args.chan = chan;
  
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c

index ca762ea5541361bb023e8b0288470502797f1a15..93f08f9479d89bfda87fbeef246c9dd702f047a1 100644 (file)
--- a/drivers/gpu/drm/nouveau/nouveau_fence.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
@@ -103,6 +103,7 @@ nouveau_fence_context_kill(struct nouveau_fence_chan *fctx, int error)
  void
  nouveau_fence_context_del(struct nouveau_fence_chan *fctx)
  {
+       cancel_work_sync(&fctx->uevent_work);
         nouveau_fence_context_kill(fctx, 0);
         nvif_event_dtor(&fctx->event);
         fctx->dead = 1;
@@ -145,12 +146,13 @@ nouveau_fence_update(struct nouveau_channel *chan, struct nouveau_fence_chan *fc
         return drop;
  }
  
-static int
-nouveau_fence_wait_uevent_handler(struct nvif_event *event, void *repv, u32 repc)
+static void
+nouveau_fence_uevent_work(struct work_struct *work)
  {
-       struct nouveau_fence_chan *fctx = container_of(event, typeof(*fctx), event);
+       struct nouveau_fence_chan *fctx = container_of(work, struct nouveau_fence_chan,
+                                                      uevent_work);
         unsigned long flags;
-       int ret = NVIF_EVENT_KEEP;
+       int drop = 0;
  
         spin_lock_irqsave(&fctx->lock, flags);
         if (!list_empty(&fctx->pending)) {
@@ -160,11 +162,20 @@ nouveau_fence_wait_uevent_handler(struct nvif_event *event, void *repv, u32 repc
                 fence = list_entry(fctx->pending.next, typeof(*fence), head);
                 chan = rcu_dereference_protected(fence->channel, lockdep_is_held(&fctx->lock));
                 if (nouveau_fence_update(chan, fctx))
-                       ret = NVIF_EVENT_DROP;
+                       drop = 1;
         }
+       if (drop)
+               nvif_event_block(&fctx->event);
+
         spin_unlock_irqrestore(&fctx->lock, flags);
+}
  
-       return ret;
+static int
+nouveau_fence_wait_uevent_handler(struct nvif_event *event, void *repv, u32 repc)
+{
+       struct nouveau_fence_chan *fctx = container_of(event, typeof(*fctx), event);
+       schedule_work(&fctx->uevent_work);
+       return NVIF_EVENT_KEEP;
  }
  
  void
@@ -178,6 +189,7 @@ nouveau_fence_context_new(struct nouveau_channel *chan, struct nouveau_fence_cha
         } args;
         int ret;
  
+       INIT_WORK(&fctx->uevent_work, nouveau_fence_uevent_work);
         INIT_LIST_HEAD(&fctx->flip);
         INIT_LIST_HEAD(&fctx->pending);
         spin_lock_init(&fctx->lock);
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h b/drivers/gpu/drm/nouveau/nouveau_fence.h

index 64d33ae7f35610fe763cd34f717335ab7b24ea4e..8bc065acfe35870f62bd0f2e37df47a35eb8ae38 100644 (file)
--- a/drivers/gpu/drm/nouveau/nouveau_fence.h
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.h
@@ -44,6 +44,7 @@ struct nouveau_fence_chan {
         u32 context;
         char name[32];
  
+       struct work_struct uevent_work;
         struct nvif_event event;
         int notify_ref, dead, killed;
  };
diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.c b/drivers/gpu/drm/nouveau/nouveau_sched.c

index dd98f6910f9cab7b19117186339a138277e77b78..32fa2e273965bf140a4cb2e05262b638c312cd6e 100644 (file)
--- a/drivers/gpu/drm/nouveau/nouveau_sched.c
+++ b/drivers/gpu/drm/nouveau/nouveau_sched.c
@@ -398,7 +398,7 @@ static const struct drm_sched_backend_ops nouveau_sched_ops = {
         .free_job = nouveau_sched_free_job,
  };
  
-int
+static int
  nouveau_sched_init(struct nouveau_sched *sched, struct nouveau_drm *drm,
                    struct workqueue_struct *wq, u32 credit_limit)
  {
@@ -453,7 +453,30 @@ fail_wq:
         return ret;
  }
  
-void
+int
+nouveau_sched_create(struct nouveau_sched **psched, struct nouveau_drm *drm,
+                    struct workqueue_struct *wq, u32 credit_limit)
+{
+       struct nouveau_sched *sched;
+       int ret;
+
+       sched = kzalloc(sizeof(*sched), GFP_KERNEL);
+       if (!sched)
+               return -ENOMEM;
+
+       ret = nouveau_sched_init(sched, drm, wq, credit_limit);
+       if (ret) {
+               kfree(sched);
+               return ret;
+       }
+
+       *psched = sched;
+
+       return 0;
+}
+
+
+static void
  nouveau_sched_fini(struct nouveau_sched *sched)
  {
         struct drm_gpu_scheduler *drm_sched = &sched->base;
@@ -471,3 +494,14 @@ nouveau_sched_fini(struct nouveau_sched *sched)
         if (sched->wq)
                 destroy_workqueue(sched->wq);
  }
+
+void
+nouveau_sched_destroy(struct nouveau_sched **psched)
+{
+       struct nouveau_sched *sched = *psched;
+
+       nouveau_sched_fini(sched);
+       kfree(sched);
+
+       *psched = NULL;
+}
diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.h b/drivers/gpu/drm/nouveau/nouveau_sched.h

index a6528f5981e6a6e8182a44e0ec3c0336302e6154..e1f01a23e6f6e84cf2700bde86e4fb5e3e013df1 100644 (file)
--- a/drivers/gpu/drm/nouveau/nouveau_sched.h
+++ b/drivers/gpu/drm/nouveau/nouveau_sched.h
@@ -111,8 +111,8 @@ struct nouveau_sched {
         } job;
  };
  
-int nouveau_sched_init(struct nouveau_sched *sched, struct nouveau_drm *drm,
-                      struct workqueue_struct *wq, u32 credit_limit);
-void nouveau_sched_fini(struct nouveau_sched *sched);
+int nouveau_sched_create(struct nouveau_sched **psched, struct nouveau_drm *drm,
+                        struct workqueue_struct *wq, u32 credit_limit);
+void nouveau_sched_destroy(struct nouveau_sched **psched);
  
  #endif
diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c

index cc03e0c22ff3fec65cf6a40ae34db2af20bb349e..5e4565c5011a976d1c8057e9366d9e1da03de97a 100644 (file)
--- a/drivers/gpu/drm/nouveau/nouveau_svm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_svm.c
@@ -1011,7 +1011,7 @@ nouveau_svm_fault_buffer_ctor(struct nouveau_svm *svm, s32 oclass, int id)
         if (ret)
                 return ret;
  
-       buffer->fault = kvcalloc(sizeof(*buffer->fault), buffer->entries, GFP_KERNEL);
+       buffer->fault = kvcalloc(buffer->entries, sizeof(*buffer->fault), GFP_KERNEL);
         if (!buffer->fault)
                 return -ENOMEM;
  
diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.c b/drivers/gpu/drm/nouveau/nouveau_uvmm.c

index 4f223c972c6a8cb3bab7873dfa2a1c38756648b2..0a0a11dc9ec03eeba855f47ca57c1ad1c5669f54 100644 (file)
--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
@@ -1740,7 +1740,7 @@ nouveau_uvmm_ioctl_vm_bind(struct drm_device *dev,
         if (ret)
                 return ret;
  
-       args.sched = &cli->sched;
+       args.sched = cli->sched;
         args.file_priv = file_priv;
  
         ret = nouveau_uvmm_vm_bind(&args);
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/r535.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/r535.c

index 4135690326f44789535e8cb375ccfe1ee5fa68c3..3a30bea30e366f47ecda0bbabac5441aed285565 100644 (file)
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/r535.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/r535.c
@@ -168,12 +168,11 @@ r535_bar_new_(const struct nvkm_bar_func *hw, struct nvkm_device *device,
         rm->flush = r535_bar_flush;
  
         ret = gf100_bar_new_(rm, device, type, inst, &bar);
-       *pbar = bar;
         if (ret) {
-               if (!bar)
-                       kfree(rm);
+               kfree(rm);
                 return ret;
         }
+       *pbar = bar;
  
         bar->flushBAR2PhysMode = ioremap(device->func->resource_addr(device, 3), PAGE_SIZE);
         if (!bar->flushBAR2PhysMode)
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c

index 19188683c8fca90a7656b53ab15a8ee58d8575e0..8c2bf1c16f2a9568a8d434838d0c7691d9d70ff7 100644 (file)
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c
@@ -154,11 +154,17 @@ shadow_fw_init(struct nvkm_bios *bios, const char *name)
         return (void *)fw;
  }
  
+static void
+shadow_fw_release(void *fw)
+{
+       release_firmware(fw);
+}
+
  static const struct nvbios_source
  shadow_fw = {
         .name = "firmware",
         .init = shadow_fw_init,
-       .fini = (void(*)(void *))release_firmware,
+       .fini = shadow_fw_release,
         .read = shadow_fw_read,
         .rw = false,
  };
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c

index 9ee58e2a0eb2ad99c198ea7a58e6e1cf02a667d0..a73a5b58979045b07468c1443940f87e1b151f67 100644 (file)
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
@@ -997,6 +997,32 @@ r535_gsp_rpc_get_gsp_static_info(struct nvkm_gsp *gsp)
         return 0;
  }
  
+static void
+nvkm_gsp_mem_dtor(struct nvkm_gsp *gsp, struct nvkm_gsp_mem *mem)
+{
+       if (mem->data) {
+               /*
+                * Poison the buffer to catch any unexpected access from
+                * GSP-RM if the buffer was prematurely freed.
+                */
+               memset(mem->data, 0xFF, mem->size);
+
+               dma_free_coherent(gsp->subdev.device->dev, mem->size, mem->data, mem->addr);
+               memset(mem, 0, sizeof(*mem));
+       }
+}
+
+static int
+nvkm_gsp_mem_ctor(struct nvkm_gsp *gsp, size_t size, struct nvkm_gsp_mem *mem)
+{
+       mem->size = size;
+       mem->data = dma_alloc_coherent(gsp->subdev.device->dev, size, &mem->addr, GFP_KERNEL);
+       if (WARN_ON(!mem->data))
+               return -ENOMEM;
+
+       return 0;
+}
+
  static int
  r535_gsp_postinit(struct nvkm_gsp *gsp)
  {
@@ -1024,6 +1050,11 @@ r535_gsp_postinit(struct nvkm_gsp *gsp)
  
         nvkm_inth_allow(&gsp->subdev.inth);
         nvkm_wr32(device, 0x110004, 0x00000040);
+
+       /* Release the DMA buffers that were needed only for boot and init */
+       nvkm_gsp_mem_dtor(gsp, &gsp->boot.fw);
+       nvkm_gsp_mem_dtor(gsp, &gsp->libos);
+
         return ret;
  }
  
@@ -1078,7 +1109,6 @@ r535_gsp_rpc_set_registry(struct nvkm_gsp *gsp)
         if (IS_ERR(rpc))
                 return PTR_ERR(rpc);
  
-       rpc->size = sizeof(*rpc);
         rpc->numEntries = NV_GSP_REG_NUM_ENTRIES;
  
         str_offset = offsetof(typeof(*rpc), entries[NV_GSP_REG_NUM_ENTRIES]);
@@ -1094,6 +1124,7 @@ r535_gsp_rpc_set_registry(struct nvkm_gsp *gsp)
                 strings += name_len;
                 str_offset += name_len;
         }
+       rpc->size = str_offset;
  
         return nvkm_gsp_rpc_wr(gsp, rpc, false);
  }
@@ -1532,27 +1563,6 @@ r535_gsp_msg_run_cpu_sequencer(void *priv, u32 fn, void *repv, u32 repc)
         return 0;
  }
  
-static void
-nvkm_gsp_mem_dtor(struct nvkm_gsp *gsp, struct nvkm_gsp_mem *mem)
-{
-       if (mem->data) {
-               dma_free_coherent(gsp->subdev.device->dev, mem->size, mem->data, mem->addr);
-               mem->data = NULL;
-       }
-}
-
-static int
-nvkm_gsp_mem_ctor(struct nvkm_gsp *gsp, u32 size, struct nvkm_gsp_mem *mem)
-{
-       mem->size = size;
-       mem->data = dma_alloc_coherent(gsp->subdev.device->dev, size, &mem->addr, GFP_KERNEL);
-       if (WARN_ON(!mem->data))
-               return -ENOMEM;
-
-       return 0;
-}
-
-
  static int
  r535_gsp_booter_unload(struct nvkm_gsp *gsp, u32 mbox0, u32 mbox1)
  {
@@ -1938,20 +1948,20 @@ nvkm_gsp_radix3_dtor(struct nvkm_gsp *gsp, struct nvkm_gsp_radix3 *rx3)
   * See kgspCreateRadix3_IMPL
   */
  static int
-nvkm_gsp_radix3_sg(struct nvkm_device *device, struct sg_table *sgt, u64 size,
+nvkm_gsp_radix3_sg(struct nvkm_gsp *gsp, struct sg_table *sgt, u64 size,
                    struct nvkm_gsp_radix3 *rx3)
  {
         u64 addr;
  
         for (int i = ARRAY_SIZE(rx3->mem) - 1; i >= 0; i--) {
                 u64 *ptes;
-               int idx;
+               size_t bufsize;
+               int ret, idx;
  
-               rx3->mem[i].size = ALIGN((size / GSP_PAGE_SIZE) * sizeof(u64), GSP_PAGE_SIZE);
-               rx3->mem[i].data = dma_alloc_coherent(device->dev, rx3->mem[i].size,
-                                                     &rx3->mem[i].addr, GFP_KERNEL);
-               if (WARN_ON(!rx3->mem[i].data))
-                       return -ENOMEM;
+               bufsize = ALIGN((size / GSP_PAGE_SIZE) * sizeof(u64), GSP_PAGE_SIZE);
+               ret = nvkm_gsp_mem_ctor(gsp, bufsize, &rx3->mem[i]);
+               if (ret)
+                       return ret;
  
                 ptes = rx3->mem[i].data;
                 if (i == 2) {
@@ -1991,7 +2001,7 @@ r535_gsp_fini(struct nvkm_gsp *gsp, bool suspend)
                 if (ret)
                         return ret;
  
-               ret = nvkm_gsp_radix3_sg(gsp->subdev.device, &gsp->sr.sgt, len, &gsp->sr.radix3);
+               ret = nvkm_gsp_radix3_sg(gsp, &gsp->sr.sgt, len, &gsp->sr.radix3);
                 if (ret)
                         return ret;
  
@@ -2150,6 +2160,13 @@ r535_gsp_dtor(struct nvkm_gsp *gsp)
         mutex_destroy(&gsp->cmdq.mutex);
  
         r535_gsp_dtor_fws(gsp);
+
+       nvkm_gsp_mem_dtor(gsp, &gsp->rmargs);
+       nvkm_gsp_mem_dtor(gsp, &gsp->wpr_meta);
+       nvkm_gsp_mem_dtor(gsp, &gsp->shm.mem);
+       nvkm_gsp_mem_dtor(gsp, &gsp->loginit);
+       nvkm_gsp_mem_dtor(gsp, &gsp->logintr);
+       nvkm_gsp_mem_dtor(gsp, &gsp->logrm);
  }
  
  int
@@ -2194,7 +2211,7 @@ r535_gsp_oneinit(struct nvkm_gsp *gsp)
         memcpy(gsp->sig.data, data, size);
  
         /* Build radix3 page table for ELF image. */
-       ret = nvkm_gsp_radix3_sg(device, &gsp->fw.mem.sgt, gsp->fw.len, &gsp->radix3);
+       ret = nvkm_gsp_radix3_sg(gsp, &gsp->fw.mem.sgt, gsp->fw.len, &gsp->radix3);
         if (ret)
                 return ret;
  
@@ -2295,8 +2312,12 @@ r535_gsp_load(struct nvkm_gsp *gsp, int ver, const struct nvkm_gsp_fwif *fwif)
  {
         struct nvkm_subdev *subdev = &gsp->subdev;
         int ret;
+       bool enable_gsp = fwif->enable;
  
-       if (!nvkm_boolopt(subdev->device->cfgopt, "NvGspRm", fwif->enable))
+#if IS_ENABLED(CONFIG_DRM_NOUVEAU_GSP_DEFAULT)
+       enable_gsp = true;
+#endif
+       if (!nvkm_boolopt(subdev->device->cfgopt, "NvGspRm", enable_gsp))
                 return -EINVAL;
  
         if ((ret = r535_gsp_load_fw(gsp, "gsp", fwif->ver, &gsp->fws.rm)) ||
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c

index 85b3b4871a1d63bf5a8cb2315a25dfd5ef2b8b70..fdd768bbd487c24b545da7aba2d0d45f63784293 100644 (file)
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
@@ -1985,8 +1985,10 @@ static void vop2_crtc_atomic_enable(struct drm_crtc *crtc,
                 clock = vop2_set_intf_mux(vp, rkencoder->crtc_endpoint_id, polflags);
         }
  
-       if (!clock)
+       if (!clock) {
+               vop2_unlock(vop2);
                 return;
+       }
  
         if (vcstate->output_mode == ROCKCHIP_OUT_MODE_AAAA &&
             !(vp_data->feature & VOP2_VP_FEATURE_OUTPUT_10BIT))
diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c

index 85f082396d42da982589fe69183c71743c69a89b..d442b893275b971a53adc42b3a06973eebd8bdbb 100644 (file)
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -1178,21 +1178,24 @@ static void drm_sched_run_job_work(struct work_struct *w)
         struct drm_sched_entity *entity;
         struct dma_fence *fence;
         struct drm_sched_fence *s_fence;
-       struct drm_sched_job *sched_job = NULL;
+       struct drm_sched_job *sched_job;
         int r;
  
         if (READ_ONCE(sched->pause_submit))
                 return;
  
         /* Find entity with a ready job */
-       while (!sched_job && (entity = drm_sched_select_entity(sched))) {
-               sched_job = drm_sched_entity_pop_job(entity);
-               if (!sched_job)
-                       complete_all(&entity->entity_idle);
-       }
+       entity = drm_sched_select_entity(sched);
         if (!entity)
                 return; /* No more work */
  
+       sched_job = drm_sched_entity_pop_job(entity);
+       if (!sched_job) {
+               complete_all(&entity->entity_idle);
+               drm_sched_run_job_queue(sched);
+               return;
+       }
+
         s_fence = sched_job->s_fence;
  
         atomic_add(sched_job->credits, &sched->credit_count);
diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c

index ff36171c8fb700bae9967961220ea7cbb262d193..03d1c76aec2d3f7aca6a52acbb1a42455b37faa8 100644 (file)
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -960,7 +960,8 @@ int host1x_client_iommu_attach(struct host1x_client *client)
          * not the shared IOMMU domain, don't try to attach it to a different
          * domain. This allows using the IOMMU-backed DMA API.
          */
-       if (domain && domain != tegra->domain)
+       if (domain && domain->type != IOMMU_DOMAIN_IDENTITY &&
+           domain != tegra->domain)
                 return 0;
  
         if (tegra->domain) {
@@ -1242,9 +1243,26 @@ static int host1x_drm_probe(struct host1x_device *dev)
  
         drm_mode_config_reset(drm);
  
-       err = drm_aperture_remove_framebuffers(&tegra_drm_driver);
-       if (err < 0)
-               goto hub;
+       /*
+        * Only take over from a potential firmware framebuffer if any CRTCs
+        * have been registered. This must not be a fatal error because there
+        * are other accelerators that are exposed via this driver.
+        *
+        * Another case where this happens is on Tegra234 where the display
+        * hardware is no longer part of the host1x complex, so this driver
+        * will not expose any modesetting features.
+        */
+       if (drm->mode_config.num_crtc > 0) {
+               err = drm_aperture_remove_framebuffers(&tegra_drm_driver);
+               if (err < 0)
+                       goto hub;
+       } else {
+               /*
+                * Indicate to userspace that this doesn't expose any display
+                * capabilities.
+                */
+               drm->driver_features &= ~(DRIVER_MODESET | DRIVER_ATOMIC);
+       }
  
         err = drm_dev_register(drm, 0);
         if (err < 0)
diff --git a/drivers/gpu/drm/tests/drm_buddy_test.c b/drivers/gpu/drm/tests/drm_buddy_test.c

index ea2af6bd9abebcf381cc6a1a245e8cc1f044a656..be2d9d7764be6bdf4086011c4a86914c244854a2 100644 (file)
--- a/drivers/gpu/drm/tests/drm_buddy_test.c
+++ b/drivers/gpu/drm/tests/drm_buddy_test.c
@@ -8,16 +8,308 @@
  
  #include <linux/prime_numbers.h>
  #include <linux/sched/signal.h>
+#include <linux/sizes.h>
  
  #include <drm/drm_buddy.h>
  
  #include "../lib/drm_random.h"
  
+static unsigned int random_seed;
+
  static inline u64 get_size(int order, u64 chunk_size)
  {
         return (1 << order) * chunk_size;
  }
  
+static void drm_test_buddy_alloc_range_bias(struct kunit *test)
+{
+       u32 mm_size, ps, bias_size, bias_start, bias_end, bias_rem;
+       DRM_RND_STATE(prng, random_seed);
+       unsigned int i, count, *order;
+       struct drm_buddy mm;
+       LIST_HEAD(allocated);
+
+       bias_size = SZ_1M;
+       ps = roundup_pow_of_two(prandom_u32_state(&prng) % bias_size);
+       ps = max(SZ_4K, ps);
+       mm_size = (SZ_8M-1) & ~(ps-1); /* Multiple roots */
+
+       kunit_info(test, "mm_size=%u, ps=%u\n", mm_size, ps);
+
+       KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, ps),
+                              "buddy_init failed\n");
+
+       count = mm_size / bias_size;
+       order = drm_random_order(count, &prng);
+       KUNIT_EXPECT_TRUE(test, order);
+
+       /*
+        * Idea is to split the address space into uniform bias ranges, and then
+        * in some random order allocate within each bias, using various
+        * patterns within. This should detect if allocations leak out from a
+        * given bias, for example.
+        */
+
+       for (i = 0; i < count; i++) {
+               LIST_HEAD(tmp);
+               u32 size;
+
+               bias_start = order[i] * bias_size;
+               bias_end = bias_start + bias_size;
+               bias_rem = bias_size;
+
+               /* internal round_up too big */
+               KUNIT_ASSERT_TRUE_MSG(test,
+                                     drm_buddy_alloc_blocks(&mm, bias_start,
+                                                            bias_end, bias_size + ps, bias_size,
+                                                            &allocated,
+                                                            DRM_BUDDY_RANGE_ALLOCATION),
+                                     "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n",
+                                     bias_start, bias_end, bias_size, bias_size);
+
+               /* size too big */
+               KUNIT_ASSERT_TRUE_MSG(test,
+                                     drm_buddy_alloc_blocks(&mm, bias_start,
+                                                            bias_end, bias_size + ps, ps,
+                                                            &allocated,
+                                                            DRM_BUDDY_RANGE_ALLOCATION),
+                                     "buddy_alloc didn't fail with bias(%x-%x), size=%u, ps=%u\n",
+                                     bias_start, bias_end, bias_size + ps, ps);
+
+               /* bias range too small for size */
+               KUNIT_ASSERT_TRUE_MSG(test,
+                                     drm_buddy_alloc_blocks(&mm, bias_start + ps,
+                                                            bias_end, bias_size, ps,
+                                                            &allocated,
+                                                            DRM_BUDDY_RANGE_ALLOCATION),
+                                     "buddy_alloc didn't fail with bias(%x-%x), size=%u, ps=%u\n",
+                                     bias_start + ps, bias_end, bias_size, ps);
+
+               /* bias misaligned */
+               KUNIT_ASSERT_TRUE_MSG(test,
+                                     drm_buddy_alloc_blocks(&mm, bias_start + ps,
+                                                            bias_end - ps,
+                                                            bias_size >> 1, bias_size >> 1,
+                                                            &allocated,
+                                                            DRM_BUDDY_RANGE_ALLOCATION),
+                                     "buddy_alloc h didn't fail with bias(%x-%x), size=%u, ps=%u\n",
+                                     bias_start + ps, bias_end - ps, bias_size >> 1, bias_size >> 1);
+
+               /* single big page */
+               KUNIT_ASSERT_FALSE_MSG(test,
+                                      drm_buddy_alloc_blocks(&mm, bias_start,
+                                                             bias_end, bias_size, bias_size,
+                                                             &tmp,
+                                                             DRM_BUDDY_RANGE_ALLOCATION),
+                                      "buddy_alloc i failed with bias(%x-%x), size=%u, ps=%u\n",
+                                      bias_start, bias_end, bias_size, bias_size);
+               drm_buddy_free_list(&mm, &tmp);
+
+               /* single page with internal round_up */
+               KUNIT_ASSERT_FALSE_MSG(test,
+                                      drm_buddy_alloc_blocks(&mm, bias_start,
+                                                             bias_end, ps, bias_size,
+                                                             &tmp,
+                                                             DRM_BUDDY_RANGE_ALLOCATION),
+                                      "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n",
+                                      bias_start, bias_end, ps, bias_size);
+               drm_buddy_free_list(&mm, &tmp);
+
+               /* random size within */
+               size = max(round_up(prandom_u32_state(&prng) % bias_rem, ps), ps);
+               if (size)
+                       KUNIT_ASSERT_FALSE_MSG(test,
+                                              drm_buddy_alloc_blocks(&mm, bias_start,
+                                                                     bias_end, size, ps,
+                                                                     &tmp,
+                                                                     DRM_BUDDY_RANGE_ALLOCATION),
+                                              "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n",
+                                              bias_start, bias_end, size, ps);
+
+               bias_rem -= size;
+               /* too big for current avail */
+               KUNIT_ASSERT_TRUE_MSG(test,
+                                     drm_buddy_alloc_blocks(&mm, bias_start,
+                                                            bias_end, bias_rem + ps, ps,
+                                                            &allocated,
+                                                            DRM_BUDDY_RANGE_ALLOCATION),
+                                     "buddy_alloc didn't fail with bias(%x-%x), size=%u, ps=%u\n",
+                                     bias_start, bias_end, bias_rem + ps, ps);
+
+               if (bias_rem) {
+                       /* random fill of the remainder */
+                       size = max(round_up(prandom_u32_state(&prng) % bias_rem, ps), ps);
+                       size = max(size, ps);
+
+                       KUNIT_ASSERT_FALSE_MSG(test,
+                                              drm_buddy_alloc_blocks(&mm, bias_start,
+                                                                     bias_end, size, ps,
+                                                                     &allocated,
+                                                                     DRM_BUDDY_RANGE_ALLOCATION),
+                                              "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n",
+                                              bias_start, bias_end, size, ps);
+                       /*
+                        * Intentionally allow some space to be left
+                        * unallocated, and ideally not always on the bias
+                        * boundaries.
+                        */
+                       drm_buddy_free_list(&mm, &tmp);
+               } else {
+                       list_splice_tail(&tmp, &allocated);
+               }
+       }
+
+       kfree(order);
+       drm_buddy_free_list(&mm, &allocated);
+       drm_buddy_fini(&mm);
+
+       /*
+        * Something more free-form. Idea is to pick a random starting bias
+        * range within the address space and then start filling it up. Also
+        * randomly grow the bias range in both directions as we go along. This
+        * should give us bias start/end which is not always uniform like above,
+        * and in some cases will require the allocator to jump over already
+        * allocated nodes in the middle of the address space.
+        */
+
+       KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, ps),
+                              "buddy_init failed\n");
+
+       bias_start = round_up(prandom_u32_state(&prng) % (mm_size - ps), ps);
+       bias_end = round_up(bias_start + prandom_u32_state(&prng) % (mm_size - bias_start), ps);
+       bias_end = max(bias_end, bias_start + ps);
+       bias_rem = bias_end - bias_start;
+
+       do {
+               u32 size = max(round_up(prandom_u32_state(&prng) % bias_rem, ps), ps);
+
+               KUNIT_ASSERT_FALSE_MSG(test,
+                                      drm_buddy_alloc_blocks(&mm, bias_start,
+                                                             bias_end, size, ps,
+                                                             &allocated,
+                                                             DRM_BUDDY_RANGE_ALLOCATION),
+                                      "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n",
+                                      bias_start, bias_end, size);
+               bias_rem -= size;
+
+               /*
+                * Try to randomly grow the bias range in both directions, or
+                * only one, or perhaps don't grow at all.
+                */
+               do {
+                       u32 old_bias_start = bias_start;
+                       u32 old_bias_end = bias_end;
+
+                       if (bias_start)
+                               bias_start -= round_up(prandom_u32_state(&prng) % bias_start, ps);
+                       if (bias_end != mm_size)
+                               bias_end += round_up(prandom_u32_state(&prng) % (mm_size - bias_end), ps);
+
+                       bias_rem += old_bias_start - bias_start;
+                       bias_rem += bias_end - old_bias_end;
+               } while (!bias_rem && (bias_start || bias_end != mm_size));
+       } while (bias_rem);
+
+       KUNIT_ASSERT_EQ(test, bias_start, 0);
+       KUNIT_ASSERT_EQ(test, bias_end, mm_size);
+       KUNIT_ASSERT_TRUE_MSG(test,
+                             drm_buddy_alloc_blocks(&mm, bias_start, bias_end,
+                                                    ps, ps,
+                                                    &allocated,
+                                                    DRM_BUDDY_RANGE_ALLOCATION),
+                             "buddy_alloc passed with bias(%x-%x), size=%u\n",
+                             bias_start, bias_end, ps);
+
+       drm_buddy_free_list(&mm, &allocated);
+       drm_buddy_fini(&mm);
+}
+
+static void drm_test_buddy_alloc_contiguous(struct kunit *test)
+{
+       const unsigned long ps = SZ_4K, mm_size = 16 * 3 * SZ_4K;
+       unsigned long i, n_pages, total;
+       struct drm_buddy_block *block;
+       struct drm_buddy mm;
+       LIST_HEAD(left);
+       LIST_HEAD(middle);
+       LIST_HEAD(right);
+       LIST_HEAD(allocated);
+
+       KUNIT_EXPECT_FALSE(test, drm_buddy_init(&mm, mm_size, ps));
+
+       /*
+        * Idea is to fragment the address space by alternating block
+        * allocations between three different lists; one for left, middle and
+        * right. We can then free a list to simulate fragmentation. In
+        * particular we want to exercise the DRM_BUDDY_CONTIGUOUS_ALLOCATION,
+        * including the try_harder path.
+        */
+
+       i = 0;
+       n_pages = mm_size / ps;
+       do {
+               struct list_head *list;
+               int slot = i % 3;
+
+               if (slot == 0)
+                       list = &left;
+               else if (slot == 1)
+                       list = &middle;
+               else
+                       list = &right;
+               KUNIT_ASSERT_FALSE_MSG(test,
+                                      drm_buddy_alloc_blocks(&mm, 0, mm_size,
+                                                             ps, ps, list, 0),
+                                      "buddy_alloc hit an error size=%u\n",
+                                      ps);
+       } while (++i < n_pages);
+
+       KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size,
+                                                          3 * ps, ps, &allocated,
+                                                          DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+                              "buddy_alloc didn't error size=%u\n", 3 * ps);
+
+       drm_buddy_free_list(&mm, &middle);
+       KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size,
+                                                          3 * ps, ps, &allocated,
+                                                          DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+                              "buddy_alloc didn't error size=%u\n", 3 * ps);
+       KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size,
+                                                          2 * ps, ps, &allocated,
+                                                          DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+                              "buddy_alloc didn't error size=%u\n", 2 * ps);
+
+       drm_buddy_free_list(&mm, &right);
+       KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size,
+                                                          3 * ps, ps, &allocated,
+                                                          DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+                              "buddy_alloc didn't error size=%u\n", 3 * ps);
+       /*
+        * At this point we should have enough contiguous space for 2 blocks,
+        * however they are never buddies (since we freed middle and right) so
+        * will require the try_harder logic to find them.
+        */
+       KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size,
+                                                           2 * ps, ps, &allocated,
+                                                           DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+                              "buddy_alloc hit an error size=%u\n", 2 * ps);
+
+       drm_buddy_free_list(&mm, &left);
+       KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size,
+                                                           3 * ps, ps, &allocated,
+                                                           DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+                              "buddy_alloc hit an error size=%u\n", 3 * ps);
+
+       total = 0;
+       list_for_each_entry(block, &allocated, link)
+               total += drm_buddy_block_size(&mm, block);
+
+       KUNIT_ASSERT_EQ(test, total, ps * 2 + ps * 3);
+
+       drm_buddy_free_list(&mm, &allocated);
+       drm_buddy_fini(&mm);
+}
+
  static void drm_test_buddy_alloc_pathological(struct kunit *test)
  {
         u64 mm_size, size, start = 0;
@@ -275,16 +567,30 @@ static void drm_test_buddy_alloc_limit(struct kunit *test)
         drm_buddy_fini(&mm);
  }
  
+static int drm_buddy_suite_init(struct kunit_suite *suite)
+{
+       while (!random_seed)
+               random_seed = get_random_u32();
+
+       kunit_info(suite, "Testing DRM buddy manager, with random_seed=0x%x\n",
+                  random_seed);
+
+       return 0;
+}
+
  static struct kunit_case drm_buddy_tests[] = {
         KUNIT_CASE(drm_test_buddy_alloc_limit),
         KUNIT_CASE(drm_test_buddy_alloc_optimistic),
         KUNIT_CASE(drm_test_buddy_alloc_pessimistic),
         KUNIT_CASE(drm_test_buddy_alloc_pathological),
+       KUNIT_CASE(drm_test_buddy_alloc_contiguous),
+       KUNIT_CASE(drm_test_buddy_alloc_range_bias),
         {}
  };
  
  static struct kunit_suite drm_buddy_test_suite = {
         .name = "drm_buddy",
+       .suite_init = drm_buddy_suite_init,
         .test_cases = drm_buddy_tests,
  };
  
diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c

index b62f420a9f969d61e09e1cde82f2e49b9415d8aa..112438d965ffbefd4fa2cce5f246cc03a63759f9 100644 (file)
--- a/drivers/gpu/drm/ttm/ttm_pool.c
+++ b/drivers/gpu/drm/ttm/ttm_pool.c
@@ -387,7 +387,7 @@ static void ttm_pool_free_range(struct ttm_pool *pool, struct ttm_tt *tt,
                                 enum ttm_caching caching,
                                 pgoff_t start_page, pgoff_t end_page)
  {
-       struct page **pages = tt->pages;
+       struct page **pages = &tt->pages[start_page];
         unsigned int order;
         pgoff_t i, nr;
  
diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.c b/drivers/gpu/drm/virtio/virtgpu_drv.c

index f8e9abe647b927b211abb4bbc0751ea318d80369..9539aa28937fa4cf71fbcd8e252749607617d966 100644 (file)
--- a/drivers/gpu/drm/virtio/virtgpu_drv.c
+++ b/drivers/gpu/drm/virtio/virtgpu_drv.c
@@ -94,6 +94,7 @@ static int virtio_gpu_probe(struct virtio_device *vdev)
                         goto err_free;
         }
  
+       dma_set_max_seg_size(dev->dev, dma_max_mapping_size(dev->dev) ?: UINT_MAX);
         ret = virtio_gpu_init(vdev, dev);
         if (ret)
                 goto err_free;
diff --git a/drivers/gpu/drm/xe/abi/guc_actions_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_abi.h

index 3062e0e0d467ee0737f0fbf63d3826c9f4013778..79ba98a169f907cc18dcd63c07e9570c623c1608 100644 (file)
--- a/drivers/gpu/drm/xe/abi/guc_actions_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_actions_abi.h
@@ -50,8 +50,8 @@
  
  #define HOST2GUC_SELF_CFG_REQUEST_MSG_LEN              (GUC_HXG_REQUEST_MSG_MIN_LEN + 3u)
  #define HOST2GUC_SELF_CFG_REQUEST_MSG_0_MBZ            GUC_HXG_REQUEST_MSG_0_DATA0
-#define HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_KEY                (0xffff << 16)
-#define HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_LEN                (0xffff << 0)
+#define HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_KEY                (0xffffu << 16)
+#define HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_LEN                (0xffffu << 0)
  #define HOST2GUC_SELF_CFG_REQUEST_MSG_2_VALUE32                GUC_HXG_REQUEST_MSG_n_DATAn
  #define HOST2GUC_SELF_CFG_REQUEST_MSG_3_VALUE64                GUC_HXG_REQUEST_MSG_n_DATAn
  
diff --git a/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h

index 811add10c30dc21a357841dccd10e6583468978c..c165e26c097669b72e6cfa7f97a7ee5bdada90ff 100644 (file)
--- a/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h
@@ -242,8 +242,8 @@ struct slpc_shared_data {
                 (HOST2GUC_PC_SLPC_REQUEST_REQUEST_MSG_MIN_LEN + \
                         HOST2GUC_PC_SLPC_EVENT_MAX_INPUT_ARGS)
  #define HOST2GUC_PC_SLPC_REQUEST_MSG_0_MBZ             GUC_HXG_REQUEST_MSG_0_DATA0
-#define HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ID                (0xff << 8)
-#define HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ARGC      (0xff << 0)
+#define HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ID                (0xffu << 8)
+#define HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ARGC      (0xffu << 0)
  #define HOST2GUC_PC_SLPC_REQUEST_MSG_N_EVENT_DATA_N    GUC_HXG_REQUEST_MSG_n_DATAn
  
  #endif
diff --git a/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h b/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h

index 3b83f907ece46165c5bc11f93a3077e6dafd2edf..0b1146d0c997a216c589bb21d86d91f4d0f6841c 100644 (file)
--- a/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h
@@ -82,11 +82,11 @@ static_assert(sizeof(struct guc_ct_buffer_desc) == 64);
  #define GUC_CTB_HDR_LEN                                1u
  #define GUC_CTB_MSG_MIN_LEN                    GUC_CTB_HDR_LEN
  #define GUC_CTB_MSG_MAX_LEN                    256u
-#define GUC_CTB_MSG_0_FENCE                    (0xffff << 16)
-#define GUC_CTB_MSG_0_FORMAT                   (0xf << 12)
+#define GUC_CTB_MSG_0_FENCE                    (0xffffu << 16)
+#define GUC_CTB_MSG_0_FORMAT                   (0xfu << 12)
  #define   GUC_CTB_FORMAT_HXG                   0u
-#define GUC_CTB_MSG_0_RESERVED                 (0xf << 8)
-#define GUC_CTB_MSG_0_NUM_DWORDS               (0xff << 0)
+#define GUC_CTB_MSG_0_RESERVED                 (0xfu << 8)
+#define GUC_CTB_MSG_0_NUM_DWORDS               (0xffu << 0)
  
  /**
   * DOC: CTB HXG Message
diff --git a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h

index 47094b9b044cbbcdd68f51b3cacd6f15e4d97b3c..0400bc0fccdc9b5d5605dafd5f5480ff3983319c 100644 (file)
--- a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
@@ -31,9 +31,9 @@
   */
  
  #define GUC_KLV_LEN_MIN                                1u
-#define GUC_KLV_0_KEY                          (0xffff << 16)
-#define GUC_KLV_0_LEN                          (0xffff << 0)
-#define GUC_KLV_n_VALUE                                (0xffffffff << 0)
+#define GUC_KLV_0_KEY                          (0xffffu << 16)
+#define GUC_KLV_0_LEN                          (0xffffu << 0)
+#define GUC_KLV_n_VALUE                                (0xffffffffu << 0)
  
  /**
   * DOC: GuC Self Config KLVs
diff --git a/drivers/gpu/drm/xe/abi/guc_messages_abi.h b/drivers/gpu/drm/xe/abi/guc_messages_abi.h

index 3d199016cf881cea10668a010fce5e8b4ea234c1..29e414c82d56cb5a318686e18d3d774c7b9c3d0c 100644 (file)
--- a/drivers/gpu/drm/xe/abi/guc_messages_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_messages_abi.h
@@ -40,18 +40,18 @@
   */
  
  #define GUC_HXG_MSG_MIN_LEN                    1u
-#define GUC_HXG_MSG_0_ORIGIN                   (0x1 << 31)
+#define GUC_HXG_MSG_0_ORIGIN                   (0x1u << 31)
  #define   GUC_HXG_ORIGIN_HOST                  0u
  #define   GUC_HXG_ORIGIN_GUC                   1u
-#define GUC_HXG_MSG_0_TYPE                     (0x7 << 28)
+#define GUC_HXG_MSG_0_TYPE                     (0x7u << 28)
  #define   GUC_HXG_TYPE_REQUEST                 0u
  #define   GUC_HXG_TYPE_EVENT                   1u
  #define   GUC_HXG_TYPE_NO_RESPONSE_BUSY                3u
  #define   GUC_HXG_TYPE_NO_RESPONSE_RETRY       5u
  #define   GUC_HXG_TYPE_RESPONSE_FAILURE                6u
  #define   GUC_HXG_TYPE_RESPONSE_SUCCESS                7u
-#define GUC_HXG_MSG_0_AUX                      (0xfffffff << 0)
-#define GUC_HXG_MSG_n_PAYLOAD                  (0xffffffff << 0)
+#define GUC_HXG_MSG_0_AUX                      (0xfffffffu << 0)
+#define GUC_HXG_MSG_n_PAYLOAD                  (0xffffffffu << 0)
  
  /**
   * DOC: HXG Request
@@ -85,8 +85,8 @@
   */
  
  #define GUC_HXG_REQUEST_MSG_MIN_LEN            GUC_HXG_MSG_MIN_LEN
-#define GUC_HXG_REQUEST_MSG_0_DATA0            (0xfff << 16)
-#define GUC_HXG_REQUEST_MSG_0_ACTION           (0xffff << 0)
+#define GUC_HXG_REQUEST_MSG_0_DATA0            (0xfffu << 16)
+#define GUC_HXG_REQUEST_MSG_0_ACTION           (0xffffu << 0)
  #define GUC_HXG_REQUEST_MSG_n_DATAn            GUC_HXG_MSG_n_PAYLOAD
  
  /**
@@ -117,8 +117,8 @@
   */
  
  #define GUC_HXG_EVENT_MSG_MIN_LEN              GUC_HXG_MSG_MIN_LEN
-#define GUC_HXG_EVENT_MSG_0_DATA0              (0xfff << 16)
-#define GUC_HXG_EVENT_MSG_0_ACTION             (0xffff << 0)
+#define GUC_HXG_EVENT_MSG_0_DATA0              (0xfffu << 16)
+#define GUC_HXG_EVENT_MSG_0_ACTION             (0xffffu << 0)
  #define GUC_HXG_EVENT_MSG_n_DATAn              GUC_HXG_MSG_n_PAYLOAD
  
  /**
@@ -188,8 +188,8 @@
   */
  
  #define GUC_HXG_FAILURE_MSG_LEN                        GUC_HXG_MSG_MIN_LEN
-#define GUC_HXG_FAILURE_MSG_0_HINT             (0xfff << 16)
-#define GUC_HXG_FAILURE_MSG_0_ERROR            (0xffff << 0)
+#define GUC_HXG_FAILURE_MSG_0_HINT             (0xfffu << 16)
+#define GUC_HXG_FAILURE_MSG_0_ERROR            (0xffffu << 0)
  
  /**
   * DOC: HXG Response
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h

index 68d9f6116bdfc3522ee5d6d94ef2bb763ec81090..777c20ceabab12f04f3f2062df3524ef1c2c0923 100644 (file)
--- a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h
@@ -10,7 +10,7 @@
  
  #include "xe_bo.h"
  
-#define i915_gem_object_is_shmem(obj) ((obj)->flags & XE_BO_CREATE_SYSTEM_BIT)
+#define i915_gem_object_is_shmem(obj) (0) /* We don't use shmem */
  
  static inline dma_addr_t i915_gem_object_get_dma_address(const struct xe_bo *bo, pgoff_t n)
  {
diff --git a/drivers/gpu/drm/xe/tests/xe_mocs_test.c b/drivers/gpu/drm/xe/tests/xe_mocs_test.c

index ef56bd517b28c2604b1ed8c5f494a839217d25da..421b819fd4ba9a182d1dcbb7b364ad9a144477cb 100644 (file)
--- a/drivers/gpu/drm/xe/tests/xe_mocs_test.c
+++ b/drivers/gpu/drm/xe/tests/xe_mocs_test.c
@@ -21,4 +21,5 @@ kunit_test_suite(xe_mocs_test_suite);
  
  MODULE_AUTHOR("Intel Corporation");
  MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("xe_mocs kunit test");
  MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING);
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c

index 0b0e262e2166d69da1063915fa4c6eeedfd38bd6..4d3b80ec906d0a6f44793df496ef776a90d84596 100644 (file)
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -28,6 +28,14 @@
  #include "xe_ttm_stolen_mgr.h"
  #include "xe_vm.h"
  
+const char *const xe_mem_type_to_name[TTM_NUM_MEM_TYPES]  = {
+       [XE_PL_SYSTEM] = "system",
+       [XE_PL_TT] = "gtt",
+       [XE_PL_VRAM0] = "vram0",
+       [XE_PL_VRAM1] = "vram1",
+       [XE_PL_STOLEN] = "stolen"
+};
+
  static const struct ttm_place sys_placement_flags = {
         .fpfn = 0,
         .lpfn = 0,
@@ -713,8 +721,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
                 migrate = xe->tiles[0].migrate;
  
         xe_assert(xe, migrate);
-
-       trace_xe_bo_move(bo);
+       trace_xe_bo_move(bo, new_mem->mem_type, old_mem_type, move_lacks_source);
         xe_device_mem_access_get(xe);
  
         if (xe_bo_is_pinned(bo) && !xe_bo_is_user(bo)) {
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h

index 9b1279aca1272cd69eab6d1121ed651b83210166..8be42ac6cd07023c520988cfff2cf3599de4859f 100644 (file)
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -243,6 +243,7 @@ int xe_bo_evict_pinned(struct xe_bo *bo);
  int xe_bo_restore_pinned(struct xe_bo *bo);
  
  extern struct ttm_device_funcs xe_ttm_funcs;
+extern const char *const xe_mem_type_to_name[];
  
  int xe_gem_create_ioctl(struct drm_device *dev, void *data,
                         struct drm_file *file);
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c

index 1f0b4b9ce84f585ea599ccaf7f4641c3d139121f..5176c27e4b6a4c59739f5e456f79ca7d8a77ce94 100644 (file)
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -83,9 +83,6 @@ static int xe_file_open(struct drm_device *dev, struct drm_file *file)
         return 0;
  }
  
-static void device_kill_persistent_exec_queues(struct xe_device *xe,
-                                              struct xe_file *xef);
-
  static void xe_file_close(struct drm_device *dev, struct drm_file *file)
  {
         struct xe_device *xe = to_xe_device(dev);
@@ -102,8 +99,6 @@ static void xe_file_close(struct drm_device *dev, struct drm_file *file)
         mutex_unlock(&xef->exec_queue.lock);
         xa_destroy(&xef->exec_queue.xa);
         mutex_destroy(&xef->exec_queue.lock);
-       device_kill_persistent_exec_queues(xe, xef);
-
         mutex_lock(&xef->vm.lock);
         xa_for_each(&xef->vm.xa, idx, vm)
                 xe_vm_close_and_put(vm);
@@ -255,9 +250,6 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
                         xa_erase(&xe->usm.asid_to_vm, asid);
         }
  
-       drmm_mutex_init(&xe->drm, &xe->persistent_engines.lock);
-       INIT_LIST_HEAD(&xe->persistent_engines.list);
-
         spin_lock_init(&xe->pinned.lock);
         INIT_LIST_HEAD(&xe->pinned.kernel_bo_present);
         INIT_LIST_HEAD(&xe->pinned.external_vram);
@@ -570,37 +562,6 @@ void xe_device_shutdown(struct xe_device *xe)
  {
  }
  
-void xe_device_add_persistent_exec_queues(struct xe_device *xe, struct xe_exec_queue *q)
-{
-       mutex_lock(&xe->persistent_engines.lock);
-       list_add_tail(&q->persistent.link, &xe->persistent_engines.list);
-       mutex_unlock(&xe->persistent_engines.lock);
-}
-
-void xe_device_remove_persistent_exec_queues(struct xe_device *xe,
-                                            struct xe_exec_queue *q)
-{
-       mutex_lock(&xe->persistent_engines.lock);
-       if (!list_empty(&q->persistent.link))
-               list_del(&q->persistent.link);
-       mutex_unlock(&xe->persistent_engines.lock);
-}
-
-static void device_kill_persistent_exec_queues(struct xe_device *xe,
-                                              struct xe_file *xef)
-{
-       struct xe_exec_queue *q, *next;
-
-       mutex_lock(&xe->persistent_engines.lock);
-       list_for_each_entry_safe(q, next, &xe->persistent_engines.list,
-                                persistent.link)
-               if (q->persistent.xef == xef) {
-                       xe_exec_queue_kill(q);
-                       list_del_init(&q->persistent.link);
-               }
-       mutex_unlock(&xe->persistent_engines.lock);
-}
-
  void xe_device_wmb(struct xe_device *xe)
  {
         struct xe_gt *gt = xe_root_mmio_gt(xe);
diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h

index 3da83b2332063882afcaffb3f204410fa848de9d..08d8b72c77319a74bc34562c92ec0aab0195be42 100644 (file)
--- a/drivers/gpu/drm/xe/xe_device.h
+++ b/drivers/gpu/drm/xe/xe_device.h
@@ -42,10 +42,6 @@ int xe_device_probe(struct xe_device *xe);
  void xe_device_remove(struct xe_device *xe);
  void xe_device_shutdown(struct xe_device *xe);
  
-void xe_device_add_persistent_exec_queues(struct xe_device *xe, struct xe_exec_queue *q);
-void xe_device_remove_persistent_exec_queues(struct xe_device *xe,
-                                            struct xe_exec_queue *q);
-
  void xe_device_wmb(struct xe_device *xe);
  
  static inline struct xe_file *to_xe_file(const struct drm_file *file)
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h

index 5dc9127a20293e1ebb56c3684e2fdb7e6f425b43..e8491979a6f21810cf4c480af08e9b2b6abfd4ee 100644 (file)
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -341,14 +341,6 @@ struct xe_device {
                 struct mutex lock;
         } usm;
  
-       /** @persistent_engines: engines that are closed but still running */
-       struct {
-               /** @lock: protects persistent engines */
-               struct mutex lock;
-               /** @list: list of persistent engines */
-               struct list_head list;
-       } persistent_engines;
-
         /** @pinned: pinned BO state */
         struct {
                 /** @lock: protected pinned BO list state */
diff --git a/drivers/gpu/drm/xe/xe_display.c b/drivers/gpu/drm/xe/xe_display.c

index 74391d9b11ae0e4cc77ecf9d0f5f47264e7adec6..e4db069f0db3f1fd27ed80eb84fc4544ea0831df 100644 (file)
--- a/drivers/gpu/drm/xe/xe_display.c
+++ b/drivers/gpu/drm/xe/xe_display.c
@@ -134,8 +134,6 @@ static void xe_display_fini_nommio(struct drm_device *dev, void *dummy)
  
  int xe_display_init_nommio(struct xe_device *xe)
  {
-       int err;
-
         if (!xe->info.enable_display)
                 return 0;
  
@@ -145,10 +143,6 @@ int xe_display_init_nommio(struct xe_device *xe)
         /* This must be called before any calls to HAS_PCH_* */
         intel_detect_pch(xe);
  
-       err = intel_power_domains_init(xe);
-       if (err)
-               return err;
-
         return drmm_add_action_or_reset(&xe->drm, xe_display_fini_nommio, xe);
  }
  
diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c

index 82d1305e831f298f013338e4f7ee9e6e2ea67168..6040e4d22b2809c10385fadfbd6f4d8b6fdd0b28 100644 (file)
--- a/drivers/gpu/drm/xe/xe_drm_client.c
+++ b/drivers/gpu/drm/xe/xe_drm_client.c
@@ -131,14 +131,6 @@ static void bo_meminfo(struct xe_bo *bo,
  
  static void show_meminfo(struct drm_printer *p, struct drm_file *file)
  {
-       static const char *const mem_type_to_name[TTM_NUM_MEM_TYPES]  = {
-               [XE_PL_SYSTEM] = "system",
-               [XE_PL_TT] = "gtt",
-               [XE_PL_VRAM0] = "vram0",
-               [XE_PL_VRAM1] = "vram1",
-               [4 ... 6] = NULL,
-               [XE_PL_STOLEN] = "stolen"
-       };
         struct drm_memory_stats stats[TTM_NUM_MEM_TYPES] = {};
         struct xe_file *xef = file->driver_priv;
         struct ttm_device *bdev = &xef->xe->ttm;
@@ -171,7 +163,7 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file)
         spin_unlock(&client->bos_lock);
  
         for (mem_type = XE_PL_SYSTEM; mem_type < TTM_NUM_MEM_TYPES; ++mem_type) {
-               if (!mem_type_to_name[mem_type])
+               if (!xe_mem_type_to_name[mem_type])
                         continue;
  
                 man = ttm_manager_type(bdev, mem_type);
@@ -182,7 +174,7 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file)
                                                DRM_GEM_OBJECT_RESIDENT |
                                                (mem_type != XE_PL_SYSTEM ? 0 :
                                                DRM_GEM_OBJECT_PURGEABLE),
-                                              mem_type_to_name[mem_type]);
+                                              xe_mem_type_to_name[mem_type]);
                 }
         }
  }
diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c

index b853feed9ccc15eefab7f0ccdf070096521e6015..17f26952e6656b8a077eb51161acbfd96638db2c 100644 (file)
--- a/drivers/gpu/drm/xe/xe_exec.c
+++ b/drivers/gpu/drm/xe/xe_exec.c
@@ -111,7 +111,7 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
         u64 addresses[XE_HW_ENGINE_MAX_INSTANCE];
         struct drm_gpuvm_exec vm_exec = {.extra.fn = xe_exec_fn};
         struct drm_exec *exec = &vm_exec.exec;
-       u32 i, num_syncs = 0;
+       u32 i, num_syncs = 0, num_ufence = 0;
         struct xe_sched_job *job;
         struct dma_fence *rebind_fence;
         struct xe_vm *vm;
@@ -157,6 +157,14 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
                                            SYNC_PARSE_FLAG_LR_MODE : 0));
                 if (err)
                         goto err_syncs;
+
+               if (xe_sync_is_ufence(&syncs[i]))
+                       num_ufence++;
+       }
+
+       if (XE_IOCTL_DBG(xe, num_ufence > 1)) {
+               err = -EINVAL;
+               goto err_syncs;
         }
  
         if (xe_exec_queue_is_parallel(q)) {
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c

index bcfc4127c7c59f0fffc8e40df70a5b1c8222495f..49223026c89fd5e3626be84a9687774d29b6bcb2 100644 (file)
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -60,7 +60,6 @@ static struct xe_exec_queue *__xe_exec_queue_create(struct xe_device *xe,
         q->fence_irq = &gt->fence_irq[hwe->class];
         q->ring_ops = gt->ring_ops[hwe->class];
         q->ops = gt->exec_queue_ops;
-       INIT_LIST_HEAD(&q->persistent.link);
         INIT_LIST_HEAD(&q->compute.link);
         INIT_LIST_HEAD(&q->multi_gt_link);
  
@@ -310,102 +309,6 @@ static int exec_queue_set_timeslice(struct xe_device *xe, struct xe_exec_queue *
         return q->ops->set_timeslice(q, value);
  }
  
-static int exec_queue_set_preemption_timeout(struct xe_device *xe,
-                                            struct xe_exec_queue *q, u64 value,
-                                            bool create)
-{
-       u32 min = 0, max = 0;
-
-       xe_exec_queue_get_prop_minmax(q->hwe->eclass,
-                                     XE_EXEC_QUEUE_PREEMPT_TIMEOUT, &min, &max);
-
-       if (xe_exec_queue_enforce_schedule_limit() &&
-           !xe_hw_engine_timeout_in_range(value, min, max))
-               return -EINVAL;
-
-       return q->ops->set_preempt_timeout(q, value);
-}
-
-static int exec_queue_set_persistence(struct xe_device *xe, struct xe_exec_queue *q,
-                                     u64 value, bool create)
-{
-       if (XE_IOCTL_DBG(xe, !create))
-               return -EINVAL;
-
-       if (XE_IOCTL_DBG(xe, xe_vm_in_preempt_fence_mode(q->vm)))
-               return -EINVAL;
-
-       if (value)
-               q->flags |= EXEC_QUEUE_FLAG_PERSISTENT;
-       else
-               q->flags &= ~EXEC_QUEUE_FLAG_PERSISTENT;
-
-       return 0;
-}
-
-static int exec_queue_set_job_timeout(struct xe_device *xe, struct xe_exec_queue *q,
-                                     u64 value, bool create)
-{
-       u32 min = 0, max = 0;
-
-       if (XE_IOCTL_DBG(xe, !create))
-               return -EINVAL;
-
-       xe_exec_queue_get_prop_minmax(q->hwe->eclass,
-                                     XE_EXEC_QUEUE_JOB_TIMEOUT, &min, &max);
-
-       if (xe_exec_queue_enforce_schedule_limit() &&
-           !xe_hw_engine_timeout_in_range(value, min, max))
-               return -EINVAL;
-
-       return q->ops->set_job_timeout(q, value);
-}
-
-static int exec_queue_set_acc_trigger(struct xe_device *xe, struct xe_exec_queue *q,
-                                     u64 value, bool create)
-{
-       if (XE_IOCTL_DBG(xe, !create))
-               return -EINVAL;
-
-       if (XE_IOCTL_DBG(xe, !xe->info.has_usm))
-               return -EINVAL;
-
-       q->usm.acc_trigger = value;
-
-       return 0;
-}
-
-static int exec_queue_set_acc_notify(struct xe_device *xe, struct xe_exec_queue *q,
-                                    u64 value, bool create)
-{
-       if (XE_IOCTL_DBG(xe, !create))
-               return -EINVAL;
-
-       if (XE_IOCTL_DBG(xe, !xe->info.has_usm))
-               return -EINVAL;
-
-       q->usm.acc_notify = value;
-
-       return 0;
-}
-
-static int exec_queue_set_acc_granularity(struct xe_device *xe, struct xe_exec_queue *q,
-                                         u64 value, bool create)
-{
-       if (XE_IOCTL_DBG(xe, !create))
-               return -EINVAL;
-
-       if (XE_IOCTL_DBG(xe, !xe->info.has_usm))
-               return -EINVAL;
-
-       if (value > DRM_XE_ACC_GRANULARITY_64M)
-               return -EINVAL;
-
-       q->usm.acc_granularity = value;
-
-       return 0;
-}
-
  typedef int (*xe_exec_queue_set_property_fn)(struct xe_device *xe,
                                              struct xe_exec_queue *q,
                                              u64 value, bool create);
@@ -413,12 +316,6 @@ typedef int (*xe_exec_queue_set_property_fn)(struct xe_device *xe,
  static const xe_exec_queue_set_property_fn exec_queue_set_property_funcs[] = {
         [DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY] = exec_queue_set_priority,
         [DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE] = exec_queue_set_timeslice,
-       [DRM_XE_EXEC_QUEUE_SET_PROPERTY_PREEMPTION_TIMEOUT] = exec_queue_set_preemption_timeout,
-       [DRM_XE_EXEC_QUEUE_SET_PROPERTY_PERSISTENCE] = exec_queue_set_persistence,
-       [DRM_XE_EXEC_QUEUE_SET_PROPERTY_JOB_TIMEOUT] = exec_queue_set_job_timeout,
-       [DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_TRIGGER] = exec_queue_set_acc_trigger,
-       [DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_NOTIFY] = exec_queue_set_acc_notify,
-       [DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_GRANULARITY] = exec_queue_set_acc_granularity,
  };
  
  static int exec_queue_user_ext_set_property(struct xe_device *xe,
@@ -437,10 +334,15 @@ static int exec_queue_user_ext_set_property(struct xe_device *xe,
  
         if (XE_IOCTL_DBG(xe, ext.property >=
                          ARRAY_SIZE(exec_queue_set_property_funcs)) ||
-           XE_IOCTL_DBG(xe, ext.pad))
+           XE_IOCTL_DBG(xe, ext.pad) ||
+           XE_IOCTL_DBG(xe, ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY &&
+                        ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE))
                 return -EINVAL;
  
         idx = array_index_nospec(ext.property, ARRAY_SIZE(exec_queue_set_property_funcs));
+       if (!exec_queue_set_property_funcs[idx])
+               return -EINVAL;
+
         return exec_queue_set_property_funcs[idx](xe, q, ext.value,  create);
  }
  
@@ -704,9 +606,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
                 }
  
                 q = xe_exec_queue_create(xe, vm, logical_mask,
-                                        args->width, hwe,
-                                        xe_vm_in_lr_mode(vm) ? 0 :
-                                        EXEC_QUEUE_FLAG_PERSISTENT);
+                                        args->width, hwe, 0);
                 up_read(&vm->lock);
                 xe_vm_put(vm);
                 if (IS_ERR(q))
@@ -728,8 +628,6 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
                         goto kill_exec_queue;
         }
  
-       q->persistent.xef = xef;
-
         mutex_lock(&xef->exec_queue.lock);
         err = xa_alloc(&xef->exec_queue.xa, &id, q, xa_limit_32b, GFP_KERNEL);
         mutex_unlock(&xef->exec_queue.lock);
@@ -872,10 +770,7 @@ int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data,
         if (XE_IOCTL_DBG(xe, !q))
                 return -ENOENT;
  
-       if (!(q->flags & EXEC_QUEUE_FLAG_PERSISTENT))
-               xe_exec_queue_kill(q);
-       else
-               xe_device_add_persistent_exec_queues(xe, q);
+       xe_exec_queue_kill(q);
  
         trace_xe_exec_queue_close(q);
         xe_exec_queue_put(q);
@@ -926,20 +821,24 @@ void xe_exec_queue_last_fence_put_unlocked(struct xe_exec_queue *q)
   * @q: The exec queue
   * @vm: The VM the engine does a bind or exec for
   *
- * Get last fence, does not take a ref
+ * Get last fence, takes a ref
   *
   * Returns: last fence if not signaled, dma fence stub if signaled
   */
  struct dma_fence *xe_exec_queue_last_fence_get(struct xe_exec_queue *q,
                                                struct xe_vm *vm)
  {
+       struct dma_fence *fence;
+
         xe_exec_queue_last_fence_lockdep_assert(q, vm);
  
         if (q->last_fence &&
             test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &q->last_fence->flags))
                 xe_exec_queue_last_fence_put(q, vm);
  
-       return q->last_fence ? q->last_fence : dma_fence_get_stub();
+       fence = q->last_fence ? q->last_fence : dma_fence_get_stub();
+       dma_fence_get(fence);
+       return fence;
  }
  
  /**
diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h

index 8d4b7feb8c306b8a406a46f74c5cad2a430bdef3..36f4901d8d7ee917215d745da900ea49b7616a78 100644 (file)
--- a/drivers/gpu/drm/xe/xe_exec_queue_types.h
+++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
@@ -105,16 +105,6 @@ struct xe_exec_queue {
                 struct xe_guc_exec_queue *guc;
         };
  
-       /**
-        * @persistent: persistent exec queue state
-        */
-       struct {
-               /** @xef: file which this exec queue belongs to */
-               struct xe_file *xef;
-               /** @link: link in list of persistent exec queues */
-               struct list_head link;
-       } persistent;
-
         union {
                 /**
                  * @parallel: parallel submission state
@@ -160,16 +150,6 @@ struct xe_exec_queue {
                 spinlock_t lock;
         } compute;
  
-       /** @usm: unified shared memory state */
-       struct {
-               /** @acc_trigger: access counter trigger */
-               u32 acc_trigger;
-               /** @acc_notify: access counter notify */
-               u32 acc_notify;
-               /** @acc_granularity: access counter granularity */
-               u32 acc_granularity;
-       } usm;
-
         /** @ops: submission backend exec queue operations */
         const struct xe_exec_queue_ops *ops;
  
diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c

index 96b5224eb4787d4c7abd2b65b56d0559724bd2c8..acb4d9f38fd738dd5a0e66607cb1bbdbe91311c2 100644 (file)
--- a/drivers/gpu/drm/xe/xe_execlist.c
+++ b/drivers/gpu/drm/xe/xe_execlist.c
@@ -212,7 +212,7 @@ static void xe_execlist_port_wake_locked(struct xe_execlist_port *port,
  static void xe_execlist_make_active(struct xe_execlist_exec_queue *exl)
  {
         struct xe_execlist_port *port = exl->port;
-       enum xe_exec_queue_priority priority = exl->active_priority;
+       enum xe_exec_queue_priority priority = exl->q->sched_props.priority;
  
         XE_WARN_ON(priority == XE_EXEC_QUEUE_PRIORITY_UNSET);
         XE_WARN_ON(priority < 0);
@@ -378,8 +378,6 @@ static void execlist_exec_queue_fini_async(struct work_struct *w)
                 list_del(&exl->active_link);
         spin_unlock_irqrestore(&exl->port->lock, flags);
  
-       if (q->flags & EXEC_QUEUE_FLAG_PERSISTENT)
-               xe_device_remove_persistent_exec_queues(xe, q);
         drm_sched_entity_fini(&exl->entity);
         drm_sched_fini(&exl->sched);
         kfree(exl);
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c

index 3af2adec129561850bfb378c04ca2d7caacdf325..35474ddbaf97ecc974a6b55643e578dbcfe135f9 100644 (file)
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -437,7 +437,10 @@ static int all_fw_domain_init(struct xe_gt *gt)
                  * USM has its only SA pool to non-block behind user operations
                  */
                 if (gt_to_xe(gt)->info.has_usm) {
-                       gt->usm.bb_pool = xe_sa_bo_manager_init(gt_to_tile(gt), SZ_1M, 16);
+                       struct xe_device *xe = gt_to_xe(gt);
+
+                       gt->usm.bb_pool = xe_sa_bo_manager_init(gt_to_tile(gt),
+                                                               IS_DGFX(xe) ? SZ_1M : SZ_512K, 16);
                         if (IS_ERR(gt->usm.bb_pool)) {
                                 err = PTR_ERR(gt->usm.bb_pool);
                                 goto err_force_wake;
diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c

index 9358f733688969391e68f22a2658b08c993d296a..9fcae65b64699eadb80a82b06386588a8af07f86 100644 (file)
--- a/drivers/gpu/drm/xe/xe_gt_idle.c
+++ b/drivers/gpu/drm/xe/xe_gt_idle.c
@@ -145,10 +145,10 @@ void xe_gt_idle_sysfs_init(struct xe_gt_idle *gtidle)
         }
  
         if (xe_gt_is_media_type(gt)) {
-               sprintf(gtidle->name, "gt%d-mc\n", gt->info.id);
+               sprintf(gtidle->name, "gt%d-mc", gt->info.id);
                 gtidle->idle_residency = xe_guc_pc_mc6_residency;
         } else {
-               sprintf(gtidle->name, "gt%d-rc\n", gt->info.id);
+               sprintf(gtidle->name, "gt%d-rc", gt->info.id);
                 gtidle->idle_residency = xe_guc_pc_rc6_residency;
         }
  
diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c

index 77925b35cf8dcb0ee1d62ba7c579767796c8d807..8546cd3cc50d1f8c4146b2f69c4758bac05aa240 100644 (file)
--- a/drivers/gpu/drm/xe/xe_gt_mcr.c
+++ b/drivers/gpu/drm/xe/xe_gt_mcr.c
@@ -480,7 +480,7 @@ static bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt,
   * to synchronize with external clients (e.g., firmware), so a semaphore
   * register will also need to be taken.
   */
-static void mcr_lock(struct xe_gt *gt)
+static void mcr_lock(struct xe_gt *gt) __acquires(&gt->mcr_lock)
  {
         struct xe_device *xe = gt_to_xe(gt);
         int ret = 0;
@@ -500,7 +500,7 @@ static void mcr_lock(struct xe_gt *gt)
         drm_WARN_ON_ONCE(&xe->drm, ret == -ETIMEDOUT);
  }
  
-static void mcr_unlock(struct xe_gt *gt)
+static void mcr_unlock(struct xe_gt *gt) __releases(&gt->mcr_lock)
  {
         /* Release hardware semaphore - this is done by writing 1 to the register */
         if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270)
diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c

index 59a70d2e0a7a33386fdcfca9cc158919aab1e32c..73f08f1924df2ea8d4aaabb87eceaa13eff81d78 100644 (file)
--- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
+++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
@@ -165,7 +165,8 @@ retry_userptr:
                 goto unlock_vm;
         }
  
-       if (!xe_vma_is_userptr(vma) || !xe_vma_userptr_check_repin(vma)) {
+       if (!xe_vma_is_userptr(vma) ||
+           !xe_vma_userptr_check_repin(to_userptr_vma(vma))) {
                 downgrade_write(&vm->lock);
                 write_locked = false;
         }
@@ -181,11 +182,13 @@ retry_userptr:
         /* TODO: Validate fault */
  
         if (xe_vma_is_userptr(vma) && write_locked) {
+               struct xe_userptr_vma *uvma = to_userptr_vma(vma);
+
                 spin_lock(&vm->userptr.invalidated_lock);
-               list_del_init(&vma->userptr.invalidate_link);
+               list_del_init(&uvma->userptr.invalidate_link);
                 spin_unlock(&vm->userptr.invalidated_lock);
  
-               ret = xe_vma_userptr_pin_pages(vma);
+               ret = xe_vma_userptr_pin_pages(uvma);
                 if (ret)
                         goto unlock_vm;
  
@@ -220,7 +223,7 @@ retry_userptr:
         dma_fence_put(fence);
  
         if (xe_vma_is_userptr(vma))
-               ret = xe_vma_userptr_check_repin(vma);
+               ret = xe_vma_userptr_check_repin(to_userptr_vma(vma));
         vma->usm.tile_invalidated &= ~BIT(tile->id);
  
  unlock_dma_resv:
@@ -332,7 +335,7 @@ int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len)
                 return -EPROTO;
  
         asid = FIELD_GET(PFD_ASID, msg[1]);
-       pf_queue = &gt->usm.pf_queue[asid % NUM_PF_QUEUE];
+       pf_queue = gt->usm.pf_queue + (asid % NUM_PF_QUEUE);
  
         spin_lock_irqsave(&pf_queue->lock, flags);
         full = pf_queue_full(pf_queue);
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c

index 7eef23a00d77ee679b011d8e4a0dc2b3ed1bb360..f4c485289dbe4d606e9022c5b58eec8e8123fdca 100644 (file)
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
@@ -247,6 +247,14 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
  
         xe_gt_assert(gt, vma);
  
+       /* Execlists not supported */
+       if (gt_to_xe(gt)->info.force_execlist) {
+               if (fence)
+                       __invalidation_fence_signal(fence);
+
+               return 0;
+       }
+
         action[len++] = XE_GUC_ACTION_TLB_INVALIDATION;
         action[len++] = 0; /* seqno, replaced in send_tlb_invalidation */
         if (!xe->info.has_range_tlb_invalidation) {
@@ -317,6 +325,10 @@ int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno)
         struct drm_printer p = drm_err_printer(__func__);
         int ret;
  
+       /* Execlists not supported */
+       if (gt_to_xe(gt)->info.force_execlist)
+               return 0;
+
         /*
          * XXX: See above, this algorithm only works if seqno are always in
          * order
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c

index f71085228cb33992940622dca2992f4e1ae9fa62..d91702592520af54eea5f8ca4bd56b67719531be 100644 (file)
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -963,7 +963,9 @@ void xe_guc_pc_fini(struct xe_guc_pc *pc)
         struct xe_device *xe = pc_to_xe(pc);
  
         if (xe->info.skip_guc_pc) {
+               xe_device_mem_access_get(xe);
                 xe_gt_idle_disable_c6(pc_to_gt(pc));
+               xe_device_mem_access_put(xe);
                 return;
         }
  
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c

index 54ffcfcdd41f9ce3c590f5814fcbe3d3535946ac..f22ae717b0b2d3d8ff938d83f9ea954b4d5746e4 100644 (file)
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -1028,8 +1028,6 @@ static void __guc_exec_queue_fini_async(struct work_struct *w)
  
         if (xe_exec_queue_is_lr(q))
                 cancel_work_sync(&ge->lr_tdr);
-       if (q->flags & EXEC_QUEUE_FLAG_PERSISTENT)
-               xe_device_remove_persistent_exec_queues(gt_to_xe(q->gt), q);
         release_guc_id(guc, q);
         xe_sched_entity_fini(&ge->entity);
         xe_sched_fini(&ge->sched);
diff --git a/drivers/gpu/drm/xe/xe_hw_fence.c b/drivers/gpu/drm/xe/xe_hw_fence.c

index a6094c81f2ad0fa8a3f1cf1001ceb897a044d3cb..a5de3e7b0bd6ab134557fdfb52a406d4bf199016 100644 (file)
--- a/drivers/gpu/drm/xe/xe_hw_fence.c
+++ b/drivers/gpu/drm/xe/xe_hw_fence.c
@@ -217,13 +217,13 @@ struct xe_hw_fence *xe_hw_fence_create(struct xe_hw_fence_ctx *ctx,
         if (!fence)
                 return ERR_PTR(-ENOMEM);
  
-       dma_fence_init(&fence->dma, &xe_hw_fence_ops, &ctx->irq->lock,
-                      ctx->dma_fence_ctx, ctx->next_seqno++);
-
         fence->ctx = ctx;
         fence->seqno_map = seqno_map;
         INIT_LIST_HEAD(&fence->irq_link);
  
+       dma_fence_init(&fence->dma, &xe_hw_fence_ops, &ctx->irq->lock,
+                      ctx->dma_fence_ctx, ctx->next_seqno++);
+
         trace_xe_hw_fence_create(fence);
  
         return fence;
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c

index b7fa3831b68451cb74ae557ca3e7a66d5d4fa6fd..b38319d2801e008f14fa4b5089cd1b9dc204f547 100644 (file)
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -21,10 +21,10 @@
  #include "xe_map.h"
  #include "xe_vm.h"
  
-#define CTX_VALID                              (1 << 0)
-#define CTX_PRIVILEGE                          (1 << 8)
-#define CTX_ADDRESSING_MODE_SHIFT              3
-#define LEGACY_64B_CONTEXT                     3
+#define LRC_VALID                              (1 << 0)
+#define LRC_PRIVILEGE                          (1 << 8)
+#define LRC_ADDRESSING_MODE_SHIFT              3
+#define LRC_LEGACY_64B_CONTEXT                 3
  
  #define ENGINE_CLASS_SHIFT                     61
  #define ENGINE_INSTANCE_SHIFT                  48
@@ -682,8 +682,6 @@ static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm)
  
  #define PVC_CTX_ASID           (0x2e + 1)
  #define PVC_CTX_ACC_CTR_THOLD  (0x2a + 1)
-#define ACC_GRANULARITY_S       20
-#define ACC_NOTIFY_S            16
  
  int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
                 struct xe_exec_queue *q, struct xe_vm *vm, u32 ring_size)
@@ -754,23 +752,17 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
         xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL,
                              RING_CTL_SIZE(lrc->ring.size) | RING_VALID);
         if (xe->info.has_asid && vm)
-               xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID,
-                                    (q->usm.acc_granularity <<
-                                     ACC_GRANULARITY_S) | vm->usm.asid);
-       if (xe->info.has_usm && vm)
-               xe_lrc_write_ctx_reg(lrc, PVC_CTX_ACC_CTR_THOLD,
-                                    (q->usm.acc_notify << ACC_NOTIFY_S) |
-                                    q->usm.acc_trigger);
-
-       lrc->desc = CTX_VALID;
-       lrc->desc |= LEGACY_64B_CONTEXT << CTX_ADDRESSING_MODE_SHIFT;
+               xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid);
+
+       lrc->desc = LRC_VALID;
+       lrc->desc |= LRC_LEGACY_64B_CONTEXT << LRC_ADDRESSING_MODE_SHIFT;
         /* TODO: Priority */
  
         /* While this appears to have something about privileged batches or
          * some such, it really just means PPGTT mode.
          */
         if (vm)
-               lrc->desc |= CTX_PRIVILEGE;
+               lrc->desc |= LRC_PRIVILEGE;
  
         if (GRAPHICS_VERx100(xe) < 1250) {
                 lrc->desc |= (u64)hwe->instance << ENGINE_INSTANCE_SHIFT;
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c

index 5c6c5462425217c9301560a626d67ec7386bd418..70480c30560215ff7fece9a824fd01c92008562d 100644 (file)
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -170,11 +170,6 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
         if (!IS_DGFX(xe)) {
                 /* Write out batch too */
                 m->batch_base_ofs = NUM_PT_SLOTS * XE_PAGE_SIZE;
-               if (xe->info.has_usm) {
-                       batch = tile->primary_gt->usm.bb_pool->bo;
-                       m->usm_batch_base_ofs = m->batch_base_ofs;
-               }
-
                 for (i = 0; i < batch->size;
                      i += vm->flags & XE_VM_FLAG_64K ? XE_64K_PAGE_SIZE :
                      XE_PAGE_SIZE) {
@@ -185,6 +180,24 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
                                   entry);
                         level++;
                 }
+               if (xe->info.has_usm) {
+                       xe_tile_assert(tile, batch->size == SZ_1M);
+
+                       batch = tile->primary_gt->usm.bb_pool->bo;
+                       m->usm_batch_base_ofs = m->batch_base_ofs + SZ_1M;
+                       xe_tile_assert(tile, batch->size == SZ_512K);
+
+                       for (i = 0; i < batch->size;
+                            i += vm->flags & XE_VM_FLAG_64K ? XE_64K_PAGE_SIZE :
+                            XE_PAGE_SIZE) {
+                               entry = vm->pt_ops->pte_encode_bo(batch, i,
+                                                                 pat_index, 0);
+
+                               xe_map_wr(xe, &bo->vmap, map_ofs + level * 8, u64,
+                                         entry);
+                               level++;
+                       }
+               }
         } else {
                 u64 batch_addr = xe_bo_addr(batch, 0, XE_PAGE_SIZE);
  
@@ -1204,8 +1217,11 @@ static bool no_in_syncs(struct xe_vm *vm, struct xe_exec_queue *q,
         }
         if (q) {
                 fence = xe_exec_queue_last_fence_get(q, vm);
-               if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
+               if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) {
+                       dma_fence_put(fence);
                         return false;
+               }
+               dma_fence_put(fence);
         }
  
         return true;
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c

index 5f6b53ea5528b2c904ce0c4ee30e39c4a16139b7..02f7808f28cabd5533e634b41d1780769bdcbb10 100644 (file)
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -105,7 +105,7 @@ static void xe_resize_vram_bar(struct xe_device *xe)
  
         pci_bus_for_each_resource(root, root_res, i) {
                 if (root_res && root_res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
-                   root_res->start > 0x100000000ull)
+                   (u64)root_res->start > 0x100000000ul)
                         break;
         }
  
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c

index de1030a47588371b0cc71f5b69bee8f0257e2625..6653c045f3c927f21e9d73dacb591ad363e01c47 100644 (file)
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -20,8 +20,8 @@
  
  struct xe_pt_dir {
         struct xe_pt pt;
-       /** @dir: Directory structure for the xe_pt_walk functionality */
-       struct xe_ptw_dir dir;
+       /** @children: Array of page-table child nodes */
+       struct xe_ptw *children[XE_PDES];
  };
  
  #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)
@@ -44,7 +44,7 @@ static struct xe_pt_dir *as_xe_pt_dir(struct xe_pt *pt)
  
  static struct xe_pt *xe_pt_entry(struct xe_pt_dir *pt_dir, unsigned int index)
  {
-       return container_of(pt_dir->dir.entries[index], struct xe_pt, base);
+       return container_of(pt_dir->children[index], struct xe_pt, base);
  }
  
  static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm,
@@ -65,6 +65,14 @@ static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm,
                 XE_PTE_NULL;
  }
  
+static void xe_pt_free(struct xe_pt *pt)
+{
+       if (pt->level)
+               kfree(as_xe_pt_dir(pt));
+       else
+               kfree(pt);
+}
+
  /**
   * xe_pt_create() - Create a page-table.
   * @vm: The vm to create for.
@@ -85,15 +93,19 @@ struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile,
  {
         struct xe_pt *pt;
         struct xe_bo *bo;
-       size_t size;
         int err;
  
-       size = !level ?  sizeof(struct xe_pt) : sizeof(struct xe_pt_dir) +
-               XE_PDES * sizeof(struct xe_ptw *);
-       pt = kzalloc(size, GFP_KERNEL);
+       if (level) {
+               struct xe_pt_dir *dir = kzalloc(sizeof(*dir), GFP_KERNEL);
+
+               pt = (dir) ? &dir->pt : NULL;
+       } else {
+               pt = kzalloc(sizeof(*pt), GFP_KERNEL);
+       }
         if (!pt)
                 return ERR_PTR(-ENOMEM);
  
+       pt->level = level;
         bo = xe_bo_create_pin_map(vm->xe, tile, vm, SZ_4K,
                                   ttm_bo_type_kernel,
                                   XE_BO_CREATE_VRAM_IF_DGFX(tile) |
@@ -106,8 +118,7 @@ struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile,
                 goto err_kfree;
         }
         pt->bo = bo;
-       pt->level = level;
-       pt->base.dir = level ? &as_xe_pt_dir(pt)->dir : NULL;
+       pt->base.children = level ? as_xe_pt_dir(pt)->children : NULL;
  
         if (vm->xef)
                 xe_drm_client_add_bo(vm->xef->client, pt->bo);
@@ -116,7 +127,7 @@ struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile,
         return pt;
  
  err_kfree:
-       kfree(pt);
+       xe_pt_free(pt);
         return ERR_PTR(err);
  }
  
@@ -193,7 +204,7 @@ void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred)
                                               deferred);
                 }
         }
-       kfree(pt);
+       xe_pt_free(pt);
  }
  
  /**
@@ -358,7 +369,7 @@ xe_pt_insert_entry(struct xe_pt_stage_bind_walk *xe_walk, struct xe_pt *parent,
                 struct iosys_map *map = &parent->bo->vmap;
  
                 if (unlikely(xe_child))
-                       parent->base.dir->entries[offset] = &xe_child->base;
+                       parent->base.children[offset] = &xe_child->base;
  
                 xe_pt_write(xe_walk->vm->xe, map, offset, pte);
                 parent->num_live++;
@@ -488,10 +499,12 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset,
                  * this device *requires* 64K PTE size for VRAM, fail.
                  */
                 if (level == 0 && !xe_parent->is_compact) {
-                       if (xe_pt_is_pte_ps64K(addr, next, xe_walk))
+                       if (xe_pt_is_pte_ps64K(addr, next, xe_walk)) {
+                               xe_walk->vma->gpuva.flags |= XE_VMA_PTE_64K;
                                 pte |= XE_PTE_PS64;
-                       else if (XE_WARN_ON(xe_walk->needs_64K))
+                       } else if (XE_WARN_ON(xe_walk->needs_64K)) {
                                 return -EINVAL;
+                       }
                 }
  
                 ret = xe_pt_insert_entry(xe_walk, xe_parent, offset, NULL, pte);
@@ -534,13 +547,16 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset,
                 *child = &xe_child->base;
  
                 /*
-                * Prefer the compact pagetable layout for L0 if possible.
+                * Prefer the compact pagetable layout for L0 if possible. Only
+                * possible if VMA covers entire 2MB region as compact 64k and
+                * 4k pages cannot be mixed within a 2MB region.
                  * TODO: Suballocate the pt bo to avoid wasting a lot of
                  * memory.
                  */
                 if (GRAPHICS_VERx100(tile_to_xe(xe_walk->tile)) >= 1250 && level == 1 &&
                     covers && xe_pt_scan_64K(addr, next, xe_walk)) {
                         walk->shifts = xe_compact_pt_shifts;
+                       xe_walk->vma->gpuva.flags |= XE_VMA_PTE_COMPACT;
                         flags |= XE_PDE_64K;
                         xe_child->is_compact = true;
                 }
@@ -618,8 +634,8 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
  
         if (!xe_vma_is_null(vma)) {
                 if (xe_vma_is_userptr(vma))
-                       xe_res_first_sg(vma->userptr.sg, 0, xe_vma_size(vma),
-                                       &curs);
+                       xe_res_first_sg(to_userptr_vma(vma)->userptr.sg, 0,
+                                       xe_vma_size(vma), &curs);
                 else if (xe_bo_is_vram(bo) || xe_bo_is_stolen(bo))
                         xe_res_first(bo->ttm.resource, xe_vma_bo_offset(vma),
                                      xe_vma_size(vma), &curs);
@@ -853,7 +869,7 @@ static void xe_pt_commit_bind(struct xe_vma *vma,
                                 xe_pt_destroy(xe_pt_entry(pt_dir, j_),
                                               xe_vma_vm(vma)->flags, deferred);
  
-                       pt_dir->dir.entries[j_] = &newpte->base;
+                       pt_dir->children[j_] = &newpte->base;
                 }
                 kfree(entries[i].pt_entries);
         }
@@ -906,17 +922,17 @@ static void xe_vm_dbg_print_entries(struct xe_device *xe,
  
  #ifdef CONFIG_DRM_XE_USERPTR_INVAL_INJECT
  
-static int xe_pt_userptr_inject_eagain(struct xe_vma *vma)
+static int xe_pt_userptr_inject_eagain(struct xe_userptr_vma *uvma)
  {
-       u32 divisor = vma->userptr.divisor ? vma->userptr.divisor : 2;
+       u32 divisor = uvma->userptr.divisor ? uvma->userptr.divisor : 2;
         static u32 count;
  
         if (count++ % divisor == divisor - 1) {
-               struct xe_vm *vm = xe_vma_vm(vma);
+               struct xe_vm *vm = xe_vma_vm(&uvma->vma);
  
-               vma->userptr.divisor = divisor << 1;
+               uvma->userptr.divisor = divisor << 1;
                 spin_lock(&vm->userptr.invalidated_lock);
-               list_move_tail(&vma->userptr.invalidate_link,
+               list_move_tail(&uvma->userptr.invalidate_link,
                                &vm->userptr.invalidated);
                 spin_unlock(&vm->userptr.invalidated_lock);
                 return true;
@@ -927,7 +943,7 @@ static int xe_pt_userptr_inject_eagain(struct xe_vma *vma)
  
  #else
  
-static bool xe_pt_userptr_inject_eagain(struct xe_vma *vma)
+static bool xe_pt_userptr_inject_eagain(struct xe_userptr_vma *uvma)
  {
         return false;
  }
@@ -1000,9 +1016,9 @@ static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update)
  {
         struct xe_pt_migrate_pt_update *userptr_update =
                 container_of(pt_update, typeof(*userptr_update), base);
-       struct xe_vma *vma = pt_update->vma;
-       unsigned long notifier_seq = vma->userptr.notifier_seq;
-       struct xe_vm *vm = xe_vma_vm(vma);
+       struct xe_userptr_vma *uvma = to_userptr_vma(pt_update->vma);
+       unsigned long notifier_seq = uvma->userptr.notifier_seq;
+       struct xe_vm *vm = xe_vma_vm(&uvma->vma);
         int err = xe_pt_vm_dependencies(pt_update->job,
                                         &vm->rftree[pt_update->tile_id],
                                         pt_update->start,
@@ -1023,7 +1039,7 @@ static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update)
          */
         do {
                 down_read(&vm->userptr.notifier_lock);
-               if (!mmu_interval_read_retry(&vma->userptr.notifier,
+               if (!mmu_interval_read_retry(&uvma->userptr.notifier,
                                              notifier_seq))
                         break;
  
@@ -1032,11 +1048,11 @@ static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update)
                 if (userptr_update->bind)
                         return -EAGAIN;
  
-               notifier_seq = mmu_interval_read_begin(&vma->userptr.notifier);
+               notifier_seq = mmu_interval_read_begin(&uvma->userptr.notifier);
         } while (true);
  
         /* Inject errors to test_whether they are handled correctly */
-       if (userptr_update->bind && xe_pt_userptr_inject_eagain(vma)) {
+       if (userptr_update->bind && xe_pt_userptr_inject_eagain(uvma)) {
                 up_read(&vm->userptr.notifier_lock);
                 return -EAGAIN;
         }
@@ -1297,7 +1313,7 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue
                 vma->tile_present |= BIT(tile->id);
  
                 if (bind_pt_update.locked) {
-                       vma->userptr.initial_bind = true;
+                       to_userptr_vma(vma)->userptr.initial_bind = true;
                         up_read(&vm->userptr.notifier_lock);
                         xe_bo_put_commit(&deferred);
                 }
@@ -1507,7 +1523,7 @@ xe_pt_commit_unbind(struct xe_vma *vma,
                                         xe_pt_destroy(xe_pt_entry(pt_dir, i),
                                                       xe_vma_vm(vma)->flags, deferred);
  
-                               pt_dir->dir.entries[i] = NULL;
+                               pt_dir->children[i] = NULL;
                         }
                 }
         }
@@ -1642,7 +1658,7 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queu
  
                 if (!vma->tile_present) {
                         spin_lock(&vm->userptr.invalidated_lock);
-                       list_del_init(&vma->userptr.invalidate_link);
+                       list_del_init(&to_userptr_vma(vma)->userptr.invalidate_link);
                         spin_unlock(&vm->userptr.invalidated_lock);
                 }
                 up_read(&vm->userptr.notifier_lock);
diff --git a/drivers/gpu/drm/xe/xe_pt_walk.c b/drivers/gpu/drm/xe/xe_pt_walk.c

index 8f6c8d063f39f0293a6c4d966c009dfc24ca9045..b8b3d2aea4923d0ac087f6a2c972652aba8efc6f 100644 (file)
--- a/drivers/gpu/drm/xe/xe_pt_walk.c
+++ b/drivers/gpu/drm/xe/xe_pt_walk.c
@@ -74,7 +74,7 @@ int xe_pt_walk_range(struct xe_ptw *parent, unsigned int level,
                      u64 addr, u64 end, struct xe_pt_walk *walk)
  {
         pgoff_t offset = xe_pt_offset(addr, level, walk);
-       struct xe_ptw **entries = parent->dir ? parent->dir->entries : NULL;
+       struct xe_ptw **entries = parent->children ? parent->children : NULL;
         const struct xe_pt_walk_ops *ops = walk->ops;
         enum page_walk_action action;
         struct xe_ptw *child;
diff --git a/drivers/gpu/drm/xe/xe_pt_walk.h b/drivers/gpu/drm/xe/xe_pt_walk.h

index ec3d1e9efa6d514ae21bb4b4a1b35a0bc2baf59c..5ecc4d2f0f6536b7ec79033f80f556ce1f00edc5 100644 (file)
--- a/drivers/gpu/drm/xe/xe_pt_walk.h
+++ b/drivers/gpu/drm/xe/xe_pt_walk.h
@@ -8,28 +8,15 @@
  #include <linux/pagewalk.h>
  #include <linux/types.h>
  
-struct xe_ptw_dir;
-
  /**
   * struct xe_ptw - base class for driver pagetable subclassing.
- * @dir: Pointer to an array of children if any.
+ * @children: Pointer to an array of children if any.
   *
   * Drivers could subclass this, and if it's a page-directory, typically
- * embed the xe_ptw_dir::entries array in the same allocation.
+ * embed an array of xe_ptw pointers.
   */
  struct xe_ptw {
-       struct xe_ptw_dir *dir;
-};
-
-/**
- * struct xe_ptw_dir - page directory structure
- * @entries: Array holding page directory children.
- *
- * It is the responsibility of the user to ensure @entries is
- * correctly sized.
- */
-struct xe_ptw_dir {
-       struct xe_ptw *entries[0];
+       struct xe_ptw **children;
  };
  
  /**
diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c

index 9b35673b286c80c1c2332d6ece9add9f754f3ca8..7e924faeeea0b0f8ebc2f7fe89ade231412a3892 100644 (file)
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -459,21 +459,21 @@ static size_t calc_topo_query_size(struct xe_device *xe)
                  sizeof_field(struct xe_gt, fuse_topo.eu_mask_per_dss));
  }
  
-static void __user *copy_mask(void __user *ptr,
-                             struct drm_xe_query_topology_mask *topo,
-                             void *mask, size_t mask_size)
+static int copy_mask(void __user **ptr,
+                    struct drm_xe_query_topology_mask *topo,
+                    void *mask, size_t mask_size)
  {
         topo->num_bytes = mask_size;
  
-       if (copy_to_user(ptr, topo, sizeof(*topo)))
-               return ERR_PTR(-EFAULT);
-       ptr += sizeof(topo);
+       if (copy_to_user(*ptr, topo, sizeof(*topo)))
+               return -EFAULT;
+       *ptr += sizeof(topo);
  
-       if (copy_to_user(ptr, mask, mask_size))
-               return ERR_PTR(-EFAULT);
-       ptr += mask_size;
+       if (copy_to_user(*ptr, mask, mask_size))
+               return -EFAULT;
+       *ptr += mask_size;
  
-       return ptr;
+       return 0;
  }
  
  static int query_gt_topology(struct xe_device *xe,
@@ -493,28 +493,28 @@ static int query_gt_topology(struct xe_device *xe,
         }
  
         for_each_gt(gt, xe, id) {
+               int err;
+
                 topo.gt_id = id;
  
                 topo.type = DRM_XE_TOPO_DSS_GEOMETRY;
-               query_ptr = copy_mask(query_ptr, &topo,
-                                     gt->fuse_topo.g_dss_mask,
-                                     sizeof(gt->fuse_topo.g_dss_mask));
-               if (IS_ERR(query_ptr))
-                       return PTR_ERR(query_ptr);
+               err = copy_mask(&query_ptr, &topo, gt->fuse_topo.g_dss_mask,
+                               sizeof(gt->fuse_topo.g_dss_mask));
+               if (err)
+                       return err;
  
                 topo.type = DRM_XE_TOPO_DSS_COMPUTE;
-               query_ptr = copy_mask(query_ptr, &topo,
-                                     gt->fuse_topo.c_dss_mask,
-                                     sizeof(gt->fuse_topo.c_dss_mask));
-               if (IS_ERR(query_ptr))
-                       return PTR_ERR(query_ptr);
+               err = copy_mask(&query_ptr, &topo, gt->fuse_topo.c_dss_mask,
+                               sizeof(gt->fuse_topo.c_dss_mask));
+               if (err)
+                       return err;
  
                 topo.type = DRM_XE_TOPO_EU_PER_DSS;
-               query_ptr = copy_mask(query_ptr, &topo,
-                                     gt->fuse_topo.eu_mask_per_dss,
-                                     sizeof(gt->fuse_topo.eu_mask_per_dss));
-               if (IS_ERR(query_ptr))
-                       return PTR_ERR(query_ptr);
+               err = copy_mask(&query_ptr, &topo,
+                               gt->fuse_topo.eu_mask_per_dss,
+                               sizeof(gt->fuse_topo.eu_mask_per_dss));
+               if (err)
+                       return err;
         }
  
         return 0;
diff --git a/drivers/gpu/drm/xe/xe_range_fence.c b/drivers/gpu/drm/xe/xe_range_fence.c

index d35d9ec58e86f95c8244fc02e6a9709b63ccf93a..372378e89e989239833879e23d2e62a2fd573b54 100644 (file)
--- a/drivers/gpu/drm/xe/xe_range_fence.c
+++ b/drivers/gpu/drm/xe/xe_range_fence.c
@@ -151,6 +151,11 @@ xe_range_fence_tree_next(struct xe_range_fence *rfence, u64 start, u64 last)
         return xe_range_fence_tree_iter_next(rfence, start, last);
  }
  
+static void xe_range_fence_free(struct xe_range_fence *rfence)
+{
+       kfree(rfence);
+}
+
  const struct xe_range_fence_ops xe_range_fence_kfree_ops = {
-       .free = (void (*)(struct xe_range_fence *rfence)) kfree,
+       .free = xe_range_fence_free,
  };
diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c

index 01106a1156ad82ab30378b29abf3f18d55b64fe3..4e2ccad0e52fabaf43ea26ddc1dd86f2294662a1 100644 (file)
--- a/drivers/gpu/drm/xe/xe_sched_job.c
+++ b/drivers/gpu/drm/xe/xe_sched_job.c
@@ -274,7 +274,6 @@ int xe_sched_job_last_fence_add_dep(struct xe_sched_job *job, struct xe_vm *vm)
         struct dma_fence *fence;
  
         fence = xe_exec_queue_last_fence_get(job->q, vm);
-       dma_fence_get(fence);
  
         return drm_sched_job_add_dependency(&job->drm, fence);
  }
diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c

index e4c220cf9115e9d52fc7b1e9440e0e44ba247c46..02c9577fe418516bcb891174b9599b6c0b2903bf 100644 (file)
--- a/drivers/gpu/drm/xe/xe_sync.c
+++ b/drivers/gpu/drm/xe/xe_sync.c
@@ -19,7 +19,7 @@
  #include "xe_macros.h"
  #include "xe_sched_job_types.h"
  
-struct user_fence {
+struct xe_user_fence {
         struct xe_device *xe;
         struct kref refcount;
         struct dma_fence_cb cb;
@@ -27,31 +27,32 @@ struct user_fence {
         struct mm_struct *mm;
         u64 __user *addr;
         u64 value;
+       int signalled;
  };
  
  static void user_fence_destroy(struct kref *kref)
  {
-       struct user_fence *ufence = container_of(kref, struct user_fence,
+       struct xe_user_fence *ufence = container_of(kref, struct xe_user_fence,
                                                  refcount);
  
         mmdrop(ufence->mm);
         kfree(ufence);
  }
  
-static void user_fence_get(struct user_fence *ufence)
+static void user_fence_get(struct xe_user_fence *ufence)
  {
         kref_get(&ufence->refcount);
  }
  
-static void user_fence_put(struct user_fence *ufence)
+static void user_fence_put(struct xe_user_fence *ufence)
  {
         kref_put(&ufence->refcount, user_fence_destroy);
  }
  
-static struct user_fence *user_fence_create(struct xe_device *xe, u64 addr,
-                                           u64 value)
+static struct xe_user_fence *user_fence_create(struct xe_device *xe, u64 addr,
+                                              u64 value)
  {
-       struct user_fence *ufence;
+       struct xe_user_fence *ufence;
  
         ufence = kmalloc(sizeof(*ufence), GFP_KERNEL);
         if (!ufence)
@@ -69,7 +70,7 @@ static struct user_fence *user_fence_create(struct xe_device *xe, u64 addr,
  
  static void user_fence_worker(struct work_struct *w)
  {
-       struct user_fence *ufence = container_of(w, struct user_fence, worker);
+       struct xe_user_fence *ufence = container_of(w, struct xe_user_fence, worker);
  
         if (mmget_not_zero(ufence->mm)) {
                 kthread_use_mm(ufence->mm);
@@ -80,10 +81,11 @@ static void user_fence_worker(struct work_struct *w)
         }
  
         wake_up_all(&ufence->xe->ufence_wq);
+       WRITE_ONCE(ufence->signalled, 1);
         user_fence_put(ufence);
  }
  
-static void kick_ufence(struct user_fence *ufence, struct dma_fence *fence)
+static void kick_ufence(struct xe_user_fence *ufence, struct dma_fence *fence)
  {
         INIT_WORK(&ufence->worker, user_fence_worker);
         queue_work(ufence->xe->ordered_wq, &ufence->worker);
@@ -92,7 +94,7 @@ static void kick_ufence(struct user_fence *ufence, struct dma_fence *fence)
  
  static void user_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
  {
-       struct user_fence *ufence = container_of(cb, struct user_fence, cb);
+       struct xe_user_fence *ufence = container_of(cb, struct xe_user_fence, cb);
  
         kick_ufence(ufence, fence);
  }
@@ -307,7 +309,6 @@ xe_sync_in_fence_get(struct xe_sync_entry *sync, int num_sync,
         /* Easy case... */
         if (!num_in_fence) {
                 fence = xe_exec_queue_last_fence_get(q, vm);
-               dma_fence_get(fence);
                 return fence;
         }
  
@@ -322,7 +323,6 @@ xe_sync_in_fence_get(struct xe_sync_entry *sync, int num_sync,
                 }
         }
         fences[current_fence++] = xe_exec_queue_last_fence_get(q, vm);
-       dma_fence_get(fences[current_fence - 1]);
         cf = dma_fence_array_create(num_in_fence, fences,
                                     vm->composite_fence_ctx,
                                     vm->composite_fence_seqno++,
@@ -342,3 +342,39 @@ err_out:
  
         return ERR_PTR(-ENOMEM);
  }
+
+/**
+ * xe_sync_ufence_get() - Get user fence from sync
+ * @sync: input sync
+ *
+ * Get a user fence reference from sync.
+ *
+ * Return: xe_user_fence pointer with reference
+ */
+struct xe_user_fence *xe_sync_ufence_get(struct xe_sync_entry *sync)
+{
+       user_fence_get(sync->ufence);
+
+       return sync->ufence;
+}
+
+/**
+ * xe_sync_ufence_put() - Put user fence reference
+ * @ufence: user fence reference
+ *
+ */
+void xe_sync_ufence_put(struct xe_user_fence *ufence)
+{
+       user_fence_put(ufence);
+}
+
+/**
+ * xe_sync_ufence_get_status() - Get user fence status
+ * @ufence: user fence
+ *
+ * Return: 1 if signalled, 0 not signalled, <0 on error
+ */
+int xe_sync_ufence_get_status(struct xe_user_fence *ufence)
+{
+       return READ_ONCE(ufence->signalled);
+}
diff --git a/drivers/gpu/drm/xe/xe_sync.h b/drivers/gpu/drm/xe/xe_sync.h

index d284afbe917c19203473b30d0abc38ca88ffbfa2..0fd0d51208e627c9be72eef661c160458db6f5a4 100644 (file)
--- a/drivers/gpu/drm/xe/xe_sync.h
+++ b/drivers/gpu/drm/xe/xe_sync.h
@@ -33,4 +33,13 @@ struct dma_fence *
  xe_sync_in_fence_get(struct xe_sync_entry *sync, int num_sync,
                      struct xe_exec_queue *q, struct xe_vm *vm);
  
+static inline bool xe_sync_is_ufence(struct xe_sync_entry *sync)
+{
+       return !!sync->ufence;
+}
+
+struct xe_user_fence *xe_sync_ufence_get(struct xe_sync_entry *sync);
+void xe_sync_ufence_put(struct xe_user_fence *ufence);
+int xe_sync_ufence_get_status(struct xe_user_fence *ufence);
+
  #endif
diff --git a/drivers/gpu/drm/xe/xe_sync_types.h b/drivers/gpu/drm/xe/xe_sync_types.h

index 852db5e7884fcde668f6f85b6e4049fa5290f8a9..30ac3f51993b944e3dd86ccb059c75441f87f5e1 100644 (file)
--- a/drivers/gpu/drm/xe/xe_sync_types.h
+++ b/drivers/gpu/drm/xe/xe_sync_types.h
@@ -18,7 +18,7 @@ struct xe_sync_entry {
         struct drm_syncobj *syncobj;
         struct dma_fence *fence;
         struct dma_fence_chain *chain_fence;
-       struct user_fence *ufence;
+       struct xe_user_fence *ufence;
         u64 addr;
         u64 timeline_value;
         u32 type;
diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h

index 95163c303f3e11694bdc1bafd18eb6386740eb01..4ddc55527f9ab3e632635c5f920d4f4420df1255 100644 (file)
--- a/drivers/gpu/drm/xe/xe_trace.h
+++ b/drivers/gpu/drm/xe/xe_trace.h
@@ -12,6 +12,7 @@
  #include <linux/tracepoint.h>
  #include <linux/types.h>
  
+#include "xe_bo.h"
  #include "xe_bo_types.h"
  #include "xe_exec_queue_types.h"
  #include "xe_gpu_scheduler_types.h"
@@ -26,16 +27,16 @@ DECLARE_EVENT_CLASS(xe_gt_tlb_invalidation_fence,
                     TP_ARGS(fence),
  
                     TP_STRUCT__entry(
-                            __field(u64, fence)
+                            __field(struct xe_gt_tlb_invalidation_fence *, fence)
                              __field(int, seqno)
                              ),
  
                     TP_fast_assign(
-                          __entry->fence = (u64)fence;
+                          __entry->fence = fence;
                            __entry->seqno = fence->seqno;
                            ),
  
-                   TP_printk("fence=0x%016llx, seqno=%d",
+                   TP_printk("fence=%p, seqno=%d",
                               __entry->fence, __entry->seqno)
  );
  
@@ -82,16 +83,16 @@ DECLARE_EVENT_CLASS(xe_bo,
                     TP_STRUCT__entry(
                              __field(size_t, size)
                              __field(u32, flags)
-                            __field(u64, vm)
+                            __field(struct xe_vm *, vm)
                              ),
  
                     TP_fast_assign(
                            __entry->size = bo->size;
                            __entry->flags = bo->flags;
-                          __entry->vm = (unsigned long)bo->vm;
+                          __entry->vm = bo->vm;
                            ),
  
-                   TP_printk("size=%zu, flags=0x%02x, vm=0x%016llx",
+                   TP_printk("size=%zu, flags=0x%02x, vm=%p",
                               __entry->size, __entry->flags, __entry->vm)
  );
  
@@ -100,9 +101,31 @@ DEFINE_EVENT(xe_bo, xe_bo_cpu_fault,
              TP_ARGS(bo)
  );
  
-DEFINE_EVENT(xe_bo, xe_bo_move,
-            TP_PROTO(struct xe_bo *bo),
-            TP_ARGS(bo)
+TRACE_EVENT(xe_bo_move,
+           TP_PROTO(struct xe_bo *bo, uint32_t new_placement, uint32_t old_placement,
+                    bool move_lacks_source),
+           TP_ARGS(bo, new_placement, old_placement, move_lacks_source),
+           TP_STRUCT__entry(
+                    __field(struct xe_bo *, bo)
+                    __field(size_t, size)
+                    __field(u32, new_placement)
+                    __field(u32, old_placement)
+                    __array(char, device_id, 12)
+                    __field(bool, move_lacks_source)
+                       ),
+
+           TP_fast_assign(
+                  __entry->bo      = bo;
+                  __entry->size = bo->size;
+                  __entry->new_placement = new_placement;
+                  __entry->old_placement = old_placement;
+                  strscpy(__entry->device_id, dev_name(xe_bo_device(__entry->bo)->drm.dev), 12);
+                  __entry->move_lacks_source = move_lacks_source;
+                  ),
+           TP_printk("move_lacks_source:%s, migrate object %p [size %zu] from %s to %s device_id:%s",
+                     __entry->move_lacks_source ? "yes" : "no", __entry->bo, __entry->size,
+                     xe_mem_type_to_name[__entry->old_placement],
+                     xe_mem_type_to_name[__entry->new_placement], __entry->device_id)
  );
  
  DECLARE_EVENT_CLASS(xe_exec_queue,
@@ -327,16 +350,16 @@ DECLARE_EVENT_CLASS(xe_hw_fence,
                     TP_STRUCT__entry(
                              __field(u64, ctx)
                              __field(u32, seqno)
-                            __field(u64, fence)
+                            __field(struct xe_hw_fence *, fence)
                              ),
  
                     TP_fast_assign(
                            __entry->ctx = fence->dma.context;
                            __entry->seqno = fence->dma.seqno;
-                          __entry->fence = (unsigned long)fence;
+                          __entry->fence = fence;
                            ),
  
-                   TP_printk("ctx=0x%016llx, fence=0x%016llx, seqno=%u",
+                   TP_printk("ctx=0x%016llx, fence=%p, seqno=%u",
                               __entry->ctx, __entry->fence, __entry->seqno)
  );
  
@@ -365,7 +388,7 @@ DECLARE_EVENT_CLASS(xe_vma,
                     TP_ARGS(vma),
  
                     TP_STRUCT__entry(
-                            __field(u64, vma)
+                            __field(struct xe_vma *, vma)
                              __field(u32, asid)
                              __field(u64, start)
                              __field(u64, end)
@@ -373,14 +396,14 @@ DECLARE_EVENT_CLASS(xe_vma,
                              ),
  
                     TP_fast_assign(
-                          __entry->vma = (unsigned long)vma;
+                          __entry->vma = vma;
                            __entry->asid = xe_vma_vm(vma)->usm.asid;
                            __entry->start = xe_vma_start(vma);
                            __entry->end = xe_vma_end(vma) - 1;
                            __entry->ptr = xe_vma_userptr(vma);
                            ),
  
-                   TP_printk("vma=0x%016llx, asid=0x%05x, start=0x%012llx, end=0x%012llx, ptr=0x%012llx,",
+                   TP_printk("vma=%p, asid=0x%05x, start=0x%012llx, end=0x%012llx, userptr=0x%012llx,",
                               __entry->vma, __entry->asid, __entry->start,
                               __entry->end, __entry->ptr)
  )
@@ -465,16 +488,16 @@ DECLARE_EVENT_CLASS(xe_vm,
                     TP_ARGS(vm),
  
                     TP_STRUCT__entry(
-                            __field(u64, vm)
+                            __field(struct xe_vm *, vm)
                              __field(u32, asid)
                              ),
  
                     TP_fast_assign(
-                          __entry->vm = (unsigned long)vm;
+                          __entry->vm = vm;
                            __entry->asid = vm->usm.asid;
                            ),
  
-                   TP_printk("vm=0x%016llx, asid=0x%05x",  __entry->vm,
+                   TP_printk("vm=%p, asid=0x%05x",  __entry->vm,
                               __entry->asid)
  );
  
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c

index 53833ab81424ceeca8edd95e15a2c6a34e681f6c..3b21afe5b4883fa64aeb92c6d2174b014be96c59 100644 (file)
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -37,8 +37,6 @@
  #include "generated/xe_wa_oob.h"
  #include "xe_wa.h"
  
-#define TEST_VM_ASYNC_OPS_ERROR
-
  static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm)
  {
         return vm->gpuvm.r_obj;
@@ -46,7 +44,7 @@ static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm)
  
  /**
   * xe_vma_userptr_check_repin() - Advisory check for repin needed
- * @vma: The userptr vma
+ * @uvma: The userptr vma
   *
   * Check if the userptr vma has been invalidated since last successful
   * repin. The check is advisory only and can the function can be called
@@ -56,15 +54,17 @@ static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm)
   *
   * Return: 0 if userptr vma is valid, -EAGAIN otherwise; repin recommended.
   */
-int xe_vma_userptr_check_repin(struct xe_vma *vma)
+int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma)
  {
-       return mmu_interval_check_retry(&vma->userptr.notifier,
-                                       vma->userptr.notifier_seq) ?
+       return mmu_interval_check_retry(&uvma->userptr.notifier,
+                                       uvma->userptr.notifier_seq) ?
                 -EAGAIN : 0;
  }
  
-int xe_vma_userptr_pin_pages(struct xe_vma *vma)
+int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma)
  {
+       struct xe_userptr *userptr = &uvma->userptr;
+       struct xe_vma *vma = &uvma->vma;
         struct xe_vm *vm = xe_vma_vm(vma);
         struct xe_device *xe = vm->xe;
         const unsigned long num_pages = xe_vma_size(vma) >> PAGE_SHIFT;
@@ -80,30 +80,30 @@ retry:
         if (vma->gpuva.flags & XE_VMA_DESTROYED)
                 return 0;
  
-       notifier_seq = mmu_interval_read_begin(&vma->userptr.notifier);
-       if (notifier_seq == vma->userptr.notifier_seq)
+       notifier_seq = mmu_interval_read_begin(&userptr->notifier);
+       if (notifier_seq == userptr->notifier_seq)
                 return 0;
  
         pages = kvmalloc_array(num_pages, sizeof(*pages), GFP_KERNEL);
         if (!pages)
                 return -ENOMEM;
  
-       if (vma->userptr.sg) {
+       if (userptr->sg) {
                 dma_unmap_sgtable(xe->drm.dev,
-                                 vma->userptr.sg,
+                                 userptr->sg,
                                   read_only ? DMA_TO_DEVICE :
                                   DMA_BIDIRECTIONAL, 0);
-               sg_free_table(vma->userptr.sg);
-               vma->userptr.sg = NULL;
+               sg_free_table(userptr->sg);
+               userptr->sg = NULL;
         }
  
         pinned = ret = 0;
         if (in_kthread) {
-               if (!mmget_not_zero(vma->userptr.notifier.mm)) {
+               if (!mmget_not_zero(userptr->notifier.mm)) {
                         ret = -EFAULT;
                         goto mm_closed;
                 }
-               kthread_use_mm(vma->userptr.notifier.mm);
+               kthread_use_mm(userptr->notifier.mm);
         }
  
         while (pinned < num_pages) {
@@ -112,43 +112,40 @@ retry:
                                           num_pages - pinned,
                                           read_only ? 0 : FOLL_WRITE,
                                           &pages[pinned]);
-               if (ret < 0) {
-                       if (in_kthread)
-                               ret = 0;
+               if (ret < 0)
                         break;
-               }
  
                 pinned += ret;
                 ret = 0;
         }
  
         if (in_kthread) {
-               kthread_unuse_mm(vma->userptr.notifier.mm);
-               mmput(vma->userptr.notifier.mm);
+               kthread_unuse_mm(userptr->notifier.mm);
+               mmput(userptr->notifier.mm);
         }
  mm_closed:
         if (ret)
                 goto out;
  
-       ret = sg_alloc_table_from_pages_segment(&vma->userptr.sgt, pages,
+       ret = sg_alloc_table_from_pages_segment(&userptr->sgt, pages,
                                                 pinned, 0,
                                                 (u64)pinned << PAGE_SHIFT,
                                                 xe_sg_segment_size(xe->drm.dev),
                                                 GFP_KERNEL);
         if (ret) {
-               vma->userptr.sg = NULL;
+               userptr->sg = NULL;
                 goto out;
         }
-       vma->userptr.sg = &vma->userptr.sgt;
+       userptr->sg = &userptr->sgt;
  
-       ret = dma_map_sgtable(xe->drm.dev, vma->userptr.sg,
+       ret = dma_map_sgtable(xe->drm.dev, userptr->sg,
                               read_only ? DMA_TO_DEVICE :
                               DMA_BIDIRECTIONAL,
                               DMA_ATTR_SKIP_CPU_SYNC |
                               DMA_ATTR_NO_KERNEL_MAPPING);
         if (ret) {
-               sg_free_table(vma->userptr.sg);
-               vma->userptr.sg = NULL;
+               sg_free_table(userptr->sg);
+               userptr->sg = NULL;
                 goto out;
         }
  
@@ -167,8 +164,8 @@ out:
         kvfree(pages);
  
         if (!(ret < 0)) {
-               vma->userptr.notifier_seq = notifier_seq;
-               if (xe_vma_userptr_check_repin(vma) == -EAGAIN)
+               userptr->notifier_seq = notifier_seq;
+               if (xe_vma_userptr_check_repin(uvma) == -EAGAIN)
                         goto retry;
         }
  
@@ -635,7 +632,9 @@ static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni,
                                    const struct mmu_notifier_range *range,
                                    unsigned long cur_seq)
  {
-       struct xe_vma *vma = container_of(mni, struct xe_vma, userptr.notifier);
+       struct xe_userptr *userptr = container_of(mni, typeof(*userptr), notifier);
+       struct xe_userptr_vma *uvma = container_of(userptr, typeof(*uvma), userptr);
+       struct xe_vma *vma = &uvma->vma;
         struct xe_vm *vm = xe_vma_vm(vma);
         struct dma_resv_iter cursor;
         struct dma_fence *fence;
@@ -651,7 +650,7 @@ static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni,
         mmu_interval_set_seq(mni, cur_seq);
  
         /* No need to stop gpu access if the userptr is not yet bound. */
-       if (!vma->userptr.initial_bind) {
+       if (!userptr->initial_bind) {
                 up_write(&vm->userptr.notifier_lock);
                 return true;
         }
@@ -663,7 +662,7 @@ static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni,
         if (!xe_vm_in_fault_mode(vm) &&
             !(vma->gpuva.flags & XE_VMA_DESTROYED) && vma->tile_present) {
                 spin_lock(&vm->userptr.invalidated_lock);
-               list_move_tail(&vma->userptr.invalidate_link,
+               list_move_tail(&userptr->invalidate_link,
                                &vm->userptr.invalidated);
                 spin_unlock(&vm->userptr.invalidated_lock);
         }
@@ -703,7 +702,7 @@ static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = {
  
  int xe_vm_userptr_pin(struct xe_vm *vm)
  {
-       struct xe_vma *vma, *next;
+       struct xe_userptr_vma *uvma, *next;
         int err = 0;
         LIST_HEAD(tmp_evict);
  
@@ -711,22 +710,23 @@ int xe_vm_userptr_pin(struct xe_vm *vm)
  
         /* Collect invalidated userptrs */
         spin_lock(&vm->userptr.invalidated_lock);
-       list_for_each_entry_safe(vma, next, &vm->userptr.invalidated,
+       list_for_each_entry_safe(uvma, next, &vm->userptr.invalidated,
                                  userptr.invalidate_link) {
-               list_del_init(&vma->userptr.invalidate_link);
-               list_move_tail(&vma->combined_links.userptr,
+               list_del_init(&uvma->userptr.invalidate_link);
+               list_move_tail(&uvma->userptr.repin_link,
                                &vm->userptr.repin_list);
         }
         spin_unlock(&vm->userptr.invalidated_lock);
  
         /* Pin and move to temporary list */
-       list_for_each_entry_safe(vma, next, &vm->userptr.repin_list,
-                                combined_links.userptr) {
-               err = xe_vma_userptr_pin_pages(vma);
+       list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list,
+                                userptr.repin_link) {
+               err = xe_vma_userptr_pin_pages(uvma);
                 if (err < 0)
                         return err;
  
-               list_move_tail(&vma->combined_links.userptr, &vm->rebind_list);
+               list_del_init(&uvma->userptr.repin_link);
+               list_move_tail(&uvma->vma.combined_links.rebind, &vm->rebind_list);
         }
  
         return 0;
@@ -782,6 +782,14 @@ struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
         return fence;
  }
  
+static void xe_vma_free(struct xe_vma *vma)
+{
+       if (xe_vma_is_userptr(vma))
+               kfree(to_userptr_vma(vma));
+       else
+               kfree(vma);
+}
+
  #define VMA_CREATE_FLAG_READ_ONLY      BIT(0)
  #define VMA_CREATE_FLAG_IS_NULL                BIT(1)
  
@@ -800,14 +808,26 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
         xe_assert(vm->xe, start < end);
         xe_assert(vm->xe, end < vm->size);
  
-       if (!bo && !is_null)    /* userptr */
+       /*
+        * Allocate and ensure that the xe_vma_is_userptr() return
+        * matches what was allocated.
+        */
+       if (!bo && !is_null) {
+               struct xe_userptr_vma *uvma = kzalloc(sizeof(*uvma), GFP_KERNEL);
+
+               if (!uvma)
+                       return ERR_PTR(-ENOMEM);
+
+               vma = &uvma->vma;
+       } else {
                 vma = kzalloc(sizeof(*vma), GFP_KERNEL);
-       else
-               vma = kzalloc(sizeof(*vma) - sizeof(struct xe_userptr),
-                             GFP_KERNEL);
-       if (!vma) {
-               vma = ERR_PTR(-ENOMEM);
-               return vma;
+               if (!vma)
+                       return ERR_PTR(-ENOMEM);
+
+               if (is_null)
+                       vma->gpuva.flags |= DRM_GPUVA_SPARSE;
+               if (bo)
+                       vma->gpuva.gem.obj = &bo->ttm.base;
         }
  
         INIT_LIST_HEAD(&vma->combined_links.rebind);
@@ -818,8 +838,6 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
         vma->gpuva.va.range = end - start + 1;
         if (read_only)
                 vma->gpuva.flags |= XE_VMA_READ_ONLY;
-       if (is_null)
-               vma->gpuva.flags |= DRM_GPUVA_SPARSE;
  
         for_each_tile(tile, vm->xe, id)
                 vma->tile_mask |= 0x1 << id;
@@ -836,35 +854,35 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
  
                 vm_bo = drm_gpuvm_bo_obtain(vma->gpuva.vm, &bo->ttm.base);
                 if (IS_ERR(vm_bo)) {
-                       kfree(vma);
+                       xe_vma_free(vma);
                         return ERR_CAST(vm_bo);
                 }
  
                 drm_gpuvm_bo_extobj_add(vm_bo);
                 drm_gem_object_get(&bo->ttm.base);
-               vma->gpuva.gem.obj = &bo->ttm.base;
                 vma->gpuva.gem.offset = bo_offset_or_userptr;
                 drm_gpuva_link(&vma->gpuva, vm_bo);
                 drm_gpuvm_bo_put(vm_bo);
         } else /* userptr or null */ {
                 if (!is_null) {
+                       struct xe_userptr *userptr = &to_userptr_vma(vma)->userptr;
                         u64 size = end - start + 1;
                         int err;
  
-                       INIT_LIST_HEAD(&vma->userptr.invalidate_link);
+                       INIT_LIST_HEAD(&userptr->invalidate_link);
+                       INIT_LIST_HEAD(&userptr->repin_link);
                         vma->gpuva.gem.offset = bo_offset_or_userptr;
  
-                       err = mmu_interval_notifier_insert(&vma->userptr.notifier,
+                       err = mmu_interval_notifier_insert(&userptr->notifier,
                                                            current->mm,
                                                            xe_vma_userptr(vma), size,
                                                            &vma_userptr_notifier_ops);
                         if (err) {
-                               kfree(vma);
-                               vma = ERR_PTR(err);
-                               return vma;
+                               xe_vma_free(vma);
+                               return ERR_PTR(err);
                         }
  
-                       vma->userptr.notifier_seq = LONG_MAX;
+                       userptr->notifier_seq = LONG_MAX;
                 }
  
                 xe_vm_get(vm);
@@ -879,14 +897,21 @@ static void xe_vma_destroy_late(struct xe_vma *vma)
         struct xe_device *xe = vm->xe;
         bool read_only = xe_vma_read_only(vma);
  
+       if (vma->ufence) {
+               xe_sync_ufence_put(vma->ufence);
+               vma->ufence = NULL;
+       }
+
         if (xe_vma_is_userptr(vma)) {
-               if (vma->userptr.sg) {
+               struct xe_userptr *userptr = &to_userptr_vma(vma)->userptr;
+
+               if (userptr->sg) {
                         dma_unmap_sgtable(xe->drm.dev,
-                                         vma->userptr.sg,
+                                         userptr->sg,
                                           read_only ? DMA_TO_DEVICE :
                                           DMA_BIDIRECTIONAL, 0);
-                       sg_free_table(vma->userptr.sg);
-                       vma->userptr.sg = NULL;
+                       sg_free_table(userptr->sg);
+                       userptr->sg = NULL;
                 }
  
                 /*
@@ -894,7 +919,7 @@ static void xe_vma_destroy_late(struct xe_vma *vma)
                  * the notifer until we're sure the GPU is not accessing
                  * them anymore
                  */
-               mmu_interval_notifier_remove(&vma->userptr.notifier);
+               mmu_interval_notifier_remove(&userptr->notifier);
                 xe_vm_put(vm);
         } else if (xe_vma_is_null(vma)) {
                 xe_vm_put(vm);
@@ -902,7 +927,7 @@ static void xe_vma_destroy_late(struct xe_vma *vma)
                 xe_bo_put(xe_vma_bo(vma));
         }
  
-       kfree(vma);
+       xe_vma_free(vma);
  }
  
  static void vma_destroy_work_func(struct work_struct *w)
@@ -933,7 +958,7 @@ static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
                 xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED);
  
                 spin_lock(&vm->userptr.invalidated_lock);
-               list_del(&vma->userptr.invalidate_link);
+               list_del(&to_userptr_vma(vma)->userptr.invalidate_link);
                 spin_unlock(&vm->userptr.invalidated_lock);
         } else if (!xe_vma_is_null(vma)) {
                 xe_bo_assert_held(xe_vma_bo(vma));
@@ -975,9 +1000,16 @@ int xe_vm_prepare_vma(struct drm_exec *exec, struct xe_vma *vma,
         int err;
  
         XE_WARN_ON(!vm);
-       err = drm_exec_prepare_obj(exec, xe_vm_obj(vm), num_shared);
-       if (!err && bo && !bo->vm)
-               err = drm_exec_prepare_obj(exec, &bo->ttm.base, num_shared);
+       if (num_shared)
+               err = drm_exec_prepare_obj(exec, xe_vm_obj(vm), num_shared);
+       else
+               err = drm_exec_lock_obj(exec, xe_vm_obj(vm));
+       if (!err && bo && !bo->vm) {
+               if (num_shared)
+                       err = drm_exec_prepare_obj(exec, &bo->ttm.base, num_shared);
+               else
+                       err = drm_exec_lock_obj(exec, &bo->ttm.base);
+       }
  
         return err;
  }
@@ -1581,6 +1613,16 @@ xe_vm_unbind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
  
         trace_xe_vma_unbind(vma);
  
+       if (vma->ufence) {
+               struct xe_user_fence * const f = vma->ufence;
+
+               if (!xe_sync_ufence_get_status(f))
+                       return ERR_PTR(-EBUSY);
+
+               vma->ufence = NULL;
+               xe_sync_ufence_put(f);
+       }
+
         if (number_tiles > 1) {
                 fences = kmalloc_array(number_tiles, sizeof(*fences),
                                        GFP_KERNEL);
@@ -1714,6 +1756,21 @@ err_fences:
         return ERR_PTR(err);
  }
  
+static struct xe_user_fence *
+find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs)
+{
+       unsigned int i;
+
+       for (i = 0; i < num_syncs; i++) {
+               struct xe_sync_entry *e = &syncs[i];
+
+               if (xe_sync_is_ufence(e))
+                       return xe_sync_ufence_get(e);
+       }
+
+       return NULL;
+}
+
  static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma,
                         struct xe_exec_queue *q, struct xe_sync_entry *syncs,
                         u32 num_syncs, bool immediate, bool first_op,
@@ -1721,9 +1778,16 @@ static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma,
  {
         struct dma_fence *fence;
         struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
+       struct xe_user_fence *ufence;
  
         xe_vm_assert_held(vm);
  
+       ufence = find_ufence_get(syncs, num_syncs);
+       if (vma->ufence && ufence)
+               xe_sync_ufence_put(vma->ufence);
+
+       vma->ufence = ufence ?: vma->ufence;
+
         if (immediate) {
                 fence = xe_vm_bind_vma(vma, q, syncs, num_syncs, first_op,
                                        last_op);
@@ -1959,6 +2023,7 @@ static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
                                         xe_exec_queue_last_fence_get(wait_exec_queue, vm);
  
                                 xe_sync_entry_signal(&syncs[i], NULL, fence);
+                               dma_fence_put(fence);
                         }
                 }
  
@@ -2039,7 +2104,6 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
         struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL;
         struct drm_gpuva_ops *ops;
         struct drm_gpuva_op *__op;
-       struct xe_vma_op *op;
         struct drm_gpuvm_bo *vm_bo;
         int err;
  
@@ -2086,23 +2150,10 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
         if (IS_ERR(ops))
                 return ops;
  
-#ifdef TEST_VM_ASYNC_OPS_ERROR
-       if (operation & FORCE_ASYNC_OP_ERROR) {
-               op = list_first_entry_or_null(&ops->list, struct xe_vma_op,
-                                             base.entry);
-               if (op)
-                       op->inject_error = true;
-       }
-#endif
-
         drm_gpuva_for_each_op(__op, ops) {
                 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
  
                 if (__op->op == DRM_GPUVA_OP_MAP) {
-                       op->map.immediate =
-                               flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE;
-                       op->map.read_only =
-                               flags & DRM_XE_VM_BIND_FLAG_READONLY;
                         op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
                         op->map.pat_index = pat_index;
                 } else if (__op->op == DRM_GPUVA_OP_PREFETCH) {
@@ -2150,7 +2201,7 @@ static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
                 drm_exec_fini(&exec);
  
         if (xe_vma_is_userptr(vma)) {
-               err = xe_vma_userptr_pin_pages(vma);
+               err = xe_vma_userptr_pin_pages(to_userptr_vma(vma));
                 if (err) {
                         prep_vma_destroy(vm, vma, false);
                         xe_vma_destroy_unlocked(vma);
@@ -2172,13 +2223,17 @@ static u64 xe_vma_max_pte_size(struct xe_vma *vma)
  {
         if (vma->gpuva.flags & XE_VMA_PTE_1G)
                 return SZ_1G;
-       else if (vma->gpuva.flags & XE_VMA_PTE_2M)
+       else if (vma->gpuva.flags & (XE_VMA_PTE_2M | XE_VMA_PTE_COMPACT))
                 return SZ_2M;
+       else if (vma->gpuva.flags & XE_VMA_PTE_64K)
+               return SZ_64K;
+       else if (vma->gpuva.flags & XE_VMA_PTE_4K)
+               return SZ_4K;
  
-       return SZ_4K;
+       return SZ_1G;   /* Uninitialized, used max size */
  }
  
-static u64 xe_vma_set_pte_size(struct xe_vma *vma, u64 size)
+static void xe_vma_set_pte_size(struct xe_vma *vma, u64 size)
  {
         switch (size) {
         case SZ_1G:
@@ -2187,9 +2242,13 @@ static u64 xe_vma_set_pte_size(struct xe_vma *vma, u64 size)
         case SZ_2M:
                 vma->gpuva.flags |= XE_VMA_PTE_2M;
                 break;
+       case SZ_64K:
+               vma->gpuva.flags |= XE_VMA_PTE_64K;
+               break;
+       case SZ_4K:
+               vma->gpuva.flags |= XE_VMA_PTE_4K;
+               break;
         }
-
-       return SZ_4K;
  }
  
  static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op)
@@ -2287,8 +2346,6 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
                 switch (op->base.op) {
                 case DRM_GPUVA_OP_MAP:
                 {
-                       flags |= op->map.read_only ?
-                               VMA_CREATE_FLAG_READ_ONLY : 0;
                         flags |= op->map.is_null ?
                                 VMA_CREATE_FLAG_IS_NULL : 0;
  
@@ -2419,7 +2476,7 @@ static int op_execute(struct drm_exec *exec, struct xe_vm *vm,
         case DRM_GPUVA_OP_MAP:
                 err = xe_vm_bind(vm, vma, op->q, xe_vma_bo(vma),
                                  op->syncs, op->num_syncs,
-                                op->map.immediate || !xe_vm_in_fault_mode(vm),
+                                !xe_vm_in_fault_mode(vm),
                                  op->flags & XE_VMA_OP_FIRST,
                                  op->flags & XE_VMA_OP_LAST);
                 break;
@@ -2505,13 +2562,25 @@ retry_userptr:
         }
         drm_exec_fini(&exec);
  
-       if (err == -EAGAIN && xe_vma_is_userptr(vma)) {
+       if (err == -EAGAIN) {
                 lockdep_assert_held_write(&vm->lock);
-               err = xe_vma_userptr_pin_pages(vma);
-               if (!err)
-                       goto retry_userptr;
  
-               trace_xe_vma_fail(vma);
+               if (op->base.op == DRM_GPUVA_OP_REMAP) {
+                       if (!op->remap.unmap_done)
+                               vma = gpuva_to_vma(op->base.remap.unmap->va);
+                       else if (op->remap.prev)
+                               vma = op->remap.prev;
+                       else
+                               vma = op->remap.next;
+               }
+
+               if (xe_vma_is_userptr(vma)) {
+                       err = xe_vma_userptr_pin_pages(to_userptr_vma(vma));
+                       if (!err)
+                               goto retry_userptr;
+
+                       trace_xe_vma_fail(vma);
+               }
         }
  
         return err;
@@ -2523,13 +2592,6 @@ static int xe_vma_op_execute(struct xe_vm *vm, struct xe_vma_op *op)
  
         lockdep_assert_held_write(&vm->lock);
  
-#ifdef TEST_VM_ASYNC_OPS_ERROR
-       if (op->inject_error) {
-               op->inject_error = false;
-               return -ENOMEM;
-       }
-#endif
-
         switch (op->base.op) {
         case DRM_GPUVA_OP_MAP:
                 ret = __xe_vma_op_execute(vm, op->map.vma, op);
@@ -2644,7 +2706,7 @@ static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm,
  {
         int i;
  
-       for (i = num_ops_list - 1; i; ++i) {
+       for (i = num_ops_list - 1; i >= 0; --i) {
                 struct drm_gpuva_ops *__ops = ops[i];
                 struct drm_gpuva_op *__op;
  
@@ -2689,21 +2751,11 @@ static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
         return 0;
  }
  
-#ifdef TEST_VM_ASYNC_OPS_ERROR
-#define SUPPORTED_FLAGS        \
-       (FORCE_ASYNC_OP_ERROR | DRM_XE_VM_BIND_FLAG_READONLY | \
-        DRM_XE_VM_BIND_FLAG_IMMEDIATE | DRM_XE_VM_BIND_FLAG_NULL | 0xffff)
-#else
-#define SUPPORTED_FLAGS        \
-       (DRM_XE_VM_BIND_FLAG_READONLY | \
-        DRM_XE_VM_BIND_FLAG_IMMEDIATE | DRM_XE_VM_BIND_FLAG_NULL | \
-        0xffff)
-#endif
+#define SUPPORTED_FLAGS        (DRM_XE_VM_BIND_FLAG_NULL | \
+        DRM_XE_VM_BIND_FLAG_DUMPABLE)
  #define XE_64K_PAGE_MASK 0xffffull
  #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP)
  
-#define MAX_BINDS      512     /* FIXME: Picking random upper limit */
-
  static int vm_bind_ioctl_check_args(struct xe_device *xe,
                                     struct drm_xe_vm_bind *args,
                                     struct drm_xe_vm_bind_op **bind_ops)
@@ -2715,16 +2767,16 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe,
             XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
                 return -EINVAL;
  
-       if (XE_IOCTL_DBG(xe, args->extensions) ||
-           XE_IOCTL_DBG(xe, args->num_binds > MAX_BINDS))
+       if (XE_IOCTL_DBG(xe, args->extensions))
                 return -EINVAL;
  
         if (args->num_binds > 1) {
                 u64 __user *bind_user =
                         u64_to_user_ptr(args->vector_of_binds);
  
-               *bind_ops = kmalloc(sizeof(struct drm_xe_vm_bind_op) *
-                                   args->num_binds, GFP_KERNEL);
+               *bind_ops = kvmalloc_array(args->num_binds,
+                                          sizeof(struct drm_xe_vm_bind_op),
+                                          GFP_KERNEL | __GFP_ACCOUNT);
                 if (!*bind_ops)
                         return -ENOMEM;
  
@@ -2814,7 +2866,7 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe,
  
  free_bind_ops:
         if (args->num_binds > 1)
-               kfree(*bind_ops);
+               kvfree(*bind_ops);
         return err;
  }
  
@@ -2851,7 +2903,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
         struct drm_gpuva_ops **ops = NULL;
         struct xe_vm *vm;
         struct xe_exec_queue *q = NULL;
-       u32 num_syncs;
+       u32 num_syncs, num_ufence = 0;
         struct xe_sync_entry *syncs = NULL;
         struct drm_xe_vm_bind_op *bind_ops;
         LIST_HEAD(ops_list);
@@ -2902,13 +2954,15 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
         }
  
         if (args->num_binds) {
-               bos = kcalloc(args->num_binds, sizeof(*bos), GFP_KERNEL);
+               bos = kvcalloc(args->num_binds, sizeof(*bos),
+                              GFP_KERNEL | __GFP_ACCOUNT);
                 if (!bos) {
                         err = -ENOMEM;
                         goto release_vm_lock;
                 }
  
-               ops = kcalloc(args->num_binds, sizeof(*ops), GFP_KERNEL);
+               ops = kvcalloc(args->num_binds, sizeof(*ops),
+                              GFP_KERNEL | __GFP_ACCOUNT);
                 if (!ops) {
                         err = -ENOMEM;
                         goto release_vm_lock;
@@ -2988,6 +3042,14 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
                                            SYNC_PARSE_FLAG_DISALLOW_USER_FENCE : 0));
                 if (err)
                         goto free_syncs;
+
+               if (xe_sync_is_ufence(&syncs[num_syncs]))
+                       num_ufence++;
+       }
+
+       if (XE_IOCTL_DBG(xe, num_ufence > 1)) {
+               err = -EINVAL;
+               goto free_syncs;
         }
  
         if (!args->num_binds) {
@@ -3041,10 +3103,10 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
         for (i = 0; bos && i < args->num_binds; ++i)
                 xe_bo_put(bos[i]);
  
-       kfree(bos);
-       kfree(ops);
+       kvfree(bos);
+       kvfree(ops);
         if (args->num_binds > 1)
-               kfree(bind_ops);
+               kvfree(bind_ops);
  
         return err;
  
@@ -3068,10 +3130,10 @@ put_exec_queue:
         if (q)
                 xe_exec_queue_put(q);
  free_objs:
-       kfree(bos);
-       kfree(ops);
+       kvfree(bos);
+       kvfree(ops);
         if (args->num_binds > 1)
-               kfree(bind_ops);
+               kvfree(bind_ops);
         return err;
  }
  
@@ -3130,8 +3192,8 @@ int xe_vm_invalidate_vma(struct xe_vma *vma)
         if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
                 if (xe_vma_is_userptr(vma)) {
                         WARN_ON_ONCE(!mmu_interval_check_retry
-                                    (&vma->userptr.notifier,
-                                     vma->userptr.notifier_seq));
+                                    (&to_userptr_vma(vma)->userptr.notifier,
+                                     to_userptr_vma(vma)->userptr.notifier_seq));
                         WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(xe_vma_vm(vma)),
                                                              DMA_RESV_USAGE_BOOKKEEP));
  
@@ -3192,11 +3254,11 @@ int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id)
                 if (is_null) {
                         addr = 0;
                 } else if (is_userptr) {
+                       struct sg_table *sg = to_userptr_vma(vma)->userptr.sg;
                         struct xe_res_cursor cur;
  
-                       if (vma->userptr.sg) {
-                               xe_res_first_sg(vma->userptr.sg, 0, XE_PAGE_SIZE,
-                                               &cur);
+                       if (sg) {
+                               xe_res_first_sg(sg, 0, XE_PAGE_SIZE, &cur);
                                 addr = xe_res_dma(&cur);
                         } else {
                                 addr = 0;
diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h

index cf2f96e8c1ab92245b69dd8853c90d5e128262fd..9654a0612fc258d0ba7395ba7c7fd87899caf904 100644 (file)
--- a/drivers/gpu/drm/xe/xe_vm.h
+++ b/drivers/gpu/drm/xe/xe_vm.h
@@ -160,6 +160,18 @@ static inline bool xe_vma_is_userptr(struct xe_vma *vma)
         return xe_vma_has_no_bo(vma) && !xe_vma_is_null(vma);
  }
  
+/**
+ * to_userptr_vma() - Return a pointer to an embedding userptr vma
+ * @vma: Pointer to the embedded struct xe_vma
+ *
+ * Return: Pointer to the embedding userptr vma
+ */
+static inline struct xe_userptr_vma *to_userptr_vma(struct xe_vma *vma)
+{
+       xe_assert(xe_vma_vm(vma)->xe, xe_vma_is_userptr(vma));
+       return container_of(vma, struct xe_userptr_vma, vma);
+}
+
  u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile);
  
  int xe_vm_create_ioctl(struct drm_device *dev, void *data,
@@ -224,9 +236,9 @@ static inline void xe_vm_reactivate_rebind(struct xe_vm *vm)
         }
  }
  
-int xe_vma_userptr_pin_pages(struct xe_vma *vma);
+int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma);
  
-int xe_vma_userptr_check_repin(struct xe_vma *vma);
+int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma);
  
  bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end);
  
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h

index 63e8a50b88e94980d65a0800817235c518adcd69..7300eea5394ba8c1ece10dba63314bf733ee5157 100644 (file)
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -19,11 +19,9 @@
  
  struct xe_bo;
  struct xe_sync_entry;
+struct xe_user_fence;
  struct xe_vm;
  
-#define TEST_VM_ASYNC_OPS_ERROR
-#define FORCE_ASYNC_OP_ERROR   BIT(31)
-
  #define XE_VMA_READ_ONLY       DRM_GPUVA_USERBITS
  #define XE_VMA_DESTROYED       (DRM_GPUVA_USERBITS << 1)
  #define XE_VMA_ATOMIC_PTE_BIT  (DRM_GPUVA_USERBITS << 2)
@@ -32,11 +30,15 @@ struct xe_vm;
  #define XE_VMA_PTE_4K          (DRM_GPUVA_USERBITS << 5)
  #define XE_VMA_PTE_2M          (DRM_GPUVA_USERBITS << 6)
  #define XE_VMA_PTE_1G          (DRM_GPUVA_USERBITS << 7)
+#define XE_VMA_PTE_64K         (DRM_GPUVA_USERBITS << 8)
+#define XE_VMA_PTE_COMPACT     (DRM_GPUVA_USERBITS << 9)
  
  /** struct xe_userptr - User pointer */
  struct xe_userptr {
         /** @invalidate_link: Link for the vm::userptr.invalidated list */
         struct list_head invalidate_link;
+       /** @userptr: link into VM repin list if userptr. */
+       struct list_head repin_link;
         /**
          * @notifier: MMU notifier for user pointer (invalidation call back)
          */
@@ -68,8 +70,6 @@ struct xe_vma {
          * resv.
          */
         union {
-               /** @userptr: link into VM repin list if userptr. */
-               struct list_head userptr;
                 /** @rebind: link into VM if this VMA needs rebinding. */
                 struct list_head rebind;
                 /** @destroy: link to contested list when VM is being closed. */
@@ -107,9 +107,19 @@ struct xe_vma {
         u16 pat_index;
  
         /**
-        * @userptr: user pointer state, only allocated for VMAs that are
-        * user pointers
+        * @ufence: The user fence that was provided with MAP.
+        * Needs to be signalled before UNMAP can be processed.
          */
+       struct xe_user_fence *ufence;
+};
+
+/**
+ * struct xe_userptr_vma - A userptr vma subclass
+ * @vma: The vma.
+ * @userptr: Additional userptr information.
+ */
+struct xe_userptr_vma {
+       struct xe_vma vma;
         struct xe_userptr userptr;
  };
  
@@ -285,10 +295,6 @@ struct xe_vm {
  struct xe_vma_op_map {
         /** @vma: VMA to map */
         struct xe_vma *vma;
-       /** @immediate: Immediate bind */
-       bool immediate;
-       /** @read_only: Read only */
-       bool read_only;
         /** @is_null: is NULL binding */
         bool is_null;
         /** @pat_index: The pat index to use for this operation. */
@@ -356,11 +362,6 @@ struct xe_vma_op {
         /** @flags: operation flags */
         enum xe_vma_op_flags flags;
  
-#ifdef TEST_VM_ASYNC_OPS_ERROR
-       /** @inject_error: inject error to test async op error handling */
-       bool inject_error;
-#endif
-
         union {
                 /** @map: VMA map operation specific data */
                 struct xe_vma_op_map map;
diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c

index 42fd504abbcda248e67fd84a64e2f96a2609b4cb..89983d7d73ca1539c19ff4a511c0c179bd07ed91 100644 (file)
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -169,6 +169,7 @@ static const struct host1x_info host1x06_info = {
         .num_sid_entries = ARRAY_SIZE(tegra186_sid_table),
         .sid_table = tegra186_sid_table,
         .reserve_vblank_syncpts = false,
+       .skip_reset_assert = true,
  };
  
  static const struct host1x_sid_entry tegra194_sid_table[] = {
@@ -680,13 +681,15 @@ static int __maybe_unused host1x_runtime_suspend(struct device *dev)
         host1x_intr_stop(host);
         host1x_syncpt_save(host);
  
-       err = reset_control_bulk_assert(host->nresets, host->resets);
-       if (err) {
-               dev_err(dev, "failed to assert reset: %d\n", err);
-               goto resume_host1x;
-       }
+       if (!host->info->skip_reset_assert) {
+               err = reset_control_bulk_assert(host->nresets, host->resets);
+               if (err) {
+                       dev_err(dev, "failed to assert reset: %d\n", err);
+                       goto resume_host1x;
+               }
  
-       usleep_range(1000, 2000);
+               usleep_range(1000, 2000);
+       }
  
         clk_disable_unprepare(host->clk);
         reset_control_bulk_release(host->nresets, host->resets);
diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h

index c8e302de76257008aa3fb172da0c3acd4412c572..925a118db23f5751cbbe50db317e98fd4c543414 100644 (file)
--- a/drivers/gpu/host1x/dev.h
+++ b/drivers/gpu/host1x/dev.h
@@ -116,6 +116,12 @@ struct host1x_info {
          * the display driver disables VBLANK increments.
          */
         bool reserve_vblank_syncpts;
+       /*
+        * On Tegra186, secure world applications may require access to
+        * host1x during suspend/resume. To allow this, we need to leave
+        * host1x not in reset.
+        */
+       bool skip_reset_assert;
  };
  
  struct host1x {
diff --git a/drivers/hid/bpf/hid_bpf_dispatch.c b/drivers/hid/bpf/hid_bpf_dispatch.c

index d9ef45fcaeab1380967fe2fa2357411d2bc913d4..470ae2c29c94f25b66127827b725da24b41e101b 100644 (file)
--- a/drivers/hid/bpf/hid_bpf_dispatch.c
+++ b/drivers/hid/bpf/hid_bpf_dispatch.c
@@ -143,6 +143,9 @@ u8 *call_hid_bpf_rdesc_fixup(struct hid_device *hdev, u8 *rdesc, unsigned int *s
  }
  EXPORT_SYMBOL_GPL(call_hid_bpf_rdesc_fixup);
  
+/* Disables missing prototype warnings */
+__bpf_kfunc_start_defs();
+
  /**
   * hid_bpf_get_data - Get the kernel memory pointer associated with the context @ctx
   *
@@ -152,7 +155,7 @@ EXPORT_SYMBOL_GPL(call_hid_bpf_rdesc_fixup);
   *
   * @returns %NULL on error, an %__u8 memory pointer on success
   */
-noinline __u8 *
+__bpf_kfunc __u8 *
  hid_bpf_get_data(struct hid_bpf_ctx *ctx, unsigned int offset, const size_t rdwr_buf_size)
  {
         struct hid_bpf_ctx_kern *ctx_kern;
@@ -167,6 +170,7 @@ hid_bpf_get_data(struct hid_bpf_ctx *ctx, unsigned int offset, const size_t rdwr
  
         return ctx_kern->data + offset;
  }
+__bpf_kfunc_end_defs();
  
  /*
   * The following set contains all functions we agree BPF programs
@@ -241,6 +245,42 @@ int hid_bpf_reconnect(struct hid_device *hdev)
         return 0;
  }
  
+static int do_hid_bpf_attach_prog(struct hid_device *hdev, int prog_fd, struct bpf_prog *prog,
+                                 __u32 flags)
+{
+       int fd, err, prog_type;
+
+       prog_type = hid_bpf_get_prog_attach_type(prog);
+       if (prog_type < 0)
+               return prog_type;
+
+       if (prog_type >= HID_BPF_PROG_TYPE_MAX)
+               return -EINVAL;
+
+       if (prog_type == HID_BPF_PROG_TYPE_DEVICE_EVENT) {
+               err = hid_bpf_allocate_event_data(hdev);
+               if (err)
+                       return err;
+       }
+
+       fd = __hid_bpf_attach_prog(hdev, prog_type, prog_fd, prog, flags);
+       if (fd < 0)
+               return fd;
+
+       if (prog_type == HID_BPF_PROG_TYPE_RDESC_FIXUP) {
+               err = hid_bpf_reconnect(hdev);
+               if (err) {
+                       close_fd(fd);
+                       return err;
+               }
+       }
+
+       return fd;
+}
+
+/* Disables missing prototype warnings */
+__bpf_kfunc_start_defs();
+
  /**
   * hid_bpf_attach_prog - Attach the given @prog_fd to the given HID device
   *
@@ -253,22 +293,17 @@ int hid_bpf_reconnect(struct hid_device *hdev)
   * is pinned to the BPF file system).
   */
  /* called from syscall */
-noinline int
+__bpf_kfunc int
  hid_bpf_attach_prog(unsigned int hid_id, int prog_fd, __u32 flags)
  {
         struct hid_device *hdev;
+       struct bpf_prog *prog;
         struct device *dev;
-       int fd, err, prog_type = hid_bpf_get_prog_attach_type(prog_fd);
+       int err, fd;
  
         if (!hid_bpf_ops)
                 return -EINVAL;
  
-       if (prog_type < 0)
-               return prog_type;
-
-       if (prog_type >= HID_BPF_PROG_TYPE_MAX)
-               return -EINVAL;
-
         if ((flags & ~HID_BPF_FLAG_MASK))
                 return -EINVAL;
  
@@ -278,25 +313,29 @@ hid_bpf_attach_prog(unsigned int hid_id, int prog_fd, __u32 flags)
  
         hdev = to_hid_device(dev);
  
-       if (prog_type == HID_BPF_PROG_TYPE_DEVICE_EVENT) {
-               err = hid_bpf_allocate_event_data(hdev);
-               if (err)
-                       return err;
+       /*
+        * take a ref on the prog itself, it will be released
+        * on errors or when it'll be detached
+        */
+       prog = bpf_prog_get(prog_fd);
+       if (IS_ERR(prog)) {
+               err = PTR_ERR(prog);
+               goto out_dev_put;
         }
  
-       fd = __hid_bpf_attach_prog(hdev, prog_type, prog_fd, flags);
-       if (fd < 0)
-               return fd;
-
-       if (prog_type == HID_BPF_PROG_TYPE_RDESC_FIXUP) {
-               err = hid_bpf_reconnect(hdev);
-               if (err) {
-                       close_fd(fd);
-                       return err;
-               }
+       fd = do_hid_bpf_attach_prog(hdev, prog_fd, prog, flags);
+       if (fd < 0) {
+               err = fd;
+               goto out_prog_put;
         }
  
         return fd;
+
+ out_prog_put:
+       bpf_prog_put(prog);
+ out_dev_put:
+       put_device(dev);
+       return err;
  }
  
  /**
@@ -306,7 +345,7 @@ hid_bpf_attach_prog(unsigned int hid_id, int prog_fd, __u32 flags)
   *
   * @returns A pointer to &struct hid_bpf_ctx on success, %NULL on error.
   */
-noinline struct hid_bpf_ctx *
+__bpf_kfunc struct hid_bpf_ctx *
  hid_bpf_allocate_context(unsigned int hid_id)
  {
         struct hid_device *hdev;
@@ -323,8 +362,10 @@ hid_bpf_allocate_context(unsigned int hid_id)
         hdev = to_hid_device(dev);
  
         ctx_kern = kzalloc(sizeof(*ctx_kern), GFP_KERNEL);
-       if (!ctx_kern)
+       if (!ctx_kern) {
+               put_device(dev);
                 return NULL;
+       }
  
         ctx_kern->ctx.hid = hdev;
  
@@ -337,14 +378,19 @@ hid_bpf_allocate_context(unsigned int hid_id)
   * @ctx: the HID-BPF context to release
   *
   */
-noinline void
+__bpf_kfunc void
  hid_bpf_release_context(struct hid_bpf_ctx *ctx)
  {
         struct hid_bpf_ctx_kern *ctx_kern;
+       struct hid_device *hid;
  
         ctx_kern = container_of(ctx, struct hid_bpf_ctx_kern, ctx);
+       hid = (struct hid_device *)ctx_kern->ctx.hid; /* ignore const */
  
         kfree(ctx_kern);
+
+       /* get_device() is called by bus_find_device() */
+       put_device(&hid->dev);
  }
  
  /**
@@ -358,7 +404,7 @@ hid_bpf_release_context(struct hid_bpf_ctx *ctx)
   *
   * @returns %0 on success, a negative error code otherwise.
   */
-noinline int
+__bpf_kfunc int
  hid_bpf_hw_request(struct hid_bpf_ctx *ctx, __u8 *buf, size_t buf__sz,
                    enum hid_report_type rtype, enum hid_class_request reqtype)
  {
@@ -426,6 +472,7 @@ hid_bpf_hw_request(struct hid_bpf_ctx *ctx, __u8 *buf, size_t buf__sz,
         kfree(dma_data);
         return ret;
  }
+__bpf_kfunc_end_defs();
  
  /* our HID-BPF entrypoints */
  BTF_SET8_START(hid_bpf_fmodret_ids)
diff --git a/drivers/hid/bpf/hid_bpf_dispatch.h b/drivers/hid/bpf/hid_bpf_dispatch.h

index 63dfc8605cd21efbc5f0bdc1844e4e79d73ab3cb..fbe0639d09f2604d6a8e11833eba82480640e289 100644 (file)
--- a/drivers/hid/bpf/hid_bpf_dispatch.h
+++ b/drivers/hid/bpf/hid_bpf_dispatch.h
@@ -12,9 +12,9 @@ struct hid_bpf_ctx_kern {
  
  int hid_bpf_preload_skel(void);
  void hid_bpf_free_links_and_skel(void);
-int hid_bpf_get_prog_attach_type(int prog_fd);
+int hid_bpf_get_prog_attach_type(struct bpf_prog *prog);
  int __hid_bpf_attach_prog(struct hid_device *hdev, enum hid_bpf_prog_type prog_type, int prog_fd,
-                         __u32 flags);
+                         struct bpf_prog *prog, __u32 flags);
  void __hid_bpf_destroy_device(struct hid_device *hdev);
  int hid_bpf_prog_run(struct hid_device *hdev, enum hid_bpf_prog_type type,
                      struct hid_bpf_ctx_kern *ctx_kern);
diff --git a/drivers/hid/bpf/hid_bpf_jmp_table.c b/drivers/hid/bpf/hid_bpf_jmp_table.c

index eca34b7372f951fc17e156ec2cc3761282ea61e8..aa8e1c79cdf5518301e73e44038f75e6fb1173e0 100644 (file)
--- a/drivers/hid/bpf/hid_bpf_jmp_table.c
+++ b/drivers/hid/bpf/hid_bpf_jmp_table.c
@@ -196,6 +196,7 @@ static void __hid_bpf_do_release_prog(int map_fd, unsigned int idx)
  static void hid_bpf_release_progs(struct work_struct *work)
  {
         int i, j, n, map_fd = -1;
+       bool hdev_destroyed;
  
         if (!jmp_table.map)
                 return;
@@ -220,6 +221,12 @@ static void hid_bpf_release_progs(struct work_struct *work)
                 if (entry->hdev) {
                         hdev = entry->hdev;
                         type = entry->type;
+                       /*
+                        * hdev is still valid, even if we are called after hid_destroy_device():
+                        * when hid_bpf_attach() gets called, it takes a ref on the dev through
+                        * bus_find_device()
+                        */
+                       hdev_destroyed = hdev->bpf.destroyed;
  
                         hid_bpf_populate_hdev(hdev, type);
  
@@ -232,12 +239,19 @@ static void hid_bpf_release_progs(struct work_struct *work)
                                 if (test_bit(next->idx, jmp_table.enabled))
                                         continue;
  
-                               if (next->hdev == hdev && next->type == type)
+                               if (next->hdev == hdev && next->type == type) {
+                                       /*
+                                        * clear the hdev reference and decrement the device ref
+                                        * that was taken during bus_find_device() while calling
+                                        * hid_bpf_attach()
+                                        */
                                         next->hdev = NULL;
+                                       put_device(&hdev->dev);
+                               }
                         }
  
-                       /* if type was rdesc fixup, reconnect device */
-                       if (type == HID_BPF_PROG_TYPE_RDESC_FIXUP)
+                       /* if type was rdesc fixup and the device is not gone, reconnect device */
+                       if (type == HID_BPF_PROG_TYPE_RDESC_FIXUP && !hdev_destroyed)
                                 hid_bpf_reconnect(hdev);
                 }
         }
@@ -333,15 +347,10 @@ static int hid_bpf_insert_prog(int prog_fd, struct bpf_prog *prog)
         return err;
  }
  
-int hid_bpf_get_prog_attach_type(int prog_fd)
+int hid_bpf_get_prog_attach_type(struct bpf_prog *prog)
  {
-       struct bpf_prog *prog = NULL;
-       int i;
         int prog_type = HID_BPF_PROG_TYPE_UNDEF;
-
-       prog = bpf_prog_get(prog_fd);
-       if (IS_ERR(prog))
-               return PTR_ERR(prog);
+       int i;
  
         for (i = 0; i < HID_BPF_PROG_TYPE_MAX; i++) {
                 if (hid_bpf_btf_ids[i] == prog->aux->attach_btf_id) {
@@ -350,8 +359,6 @@ int hid_bpf_get_prog_attach_type(int prog_fd)
                 }
         }
  
-       bpf_prog_put(prog);
-
         return prog_type;
  }
  
@@ -388,19 +395,13 @@ static const struct bpf_link_ops hid_bpf_link_lops = {
  /* called from syscall */
  noinline int
  __hid_bpf_attach_prog(struct hid_device *hdev, enum hid_bpf_prog_type prog_type,
-                     int prog_fd, __u32 flags)
+                     int prog_fd, struct bpf_prog *prog, __u32 flags)
  {
         struct bpf_link_primer link_primer;
         struct hid_bpf_link *link;
-       struct bpf_prog *prog = NULL;
         struct hid_bpf_prog_entry *prog_entry;
         int cnt, err = -EINVAL, prog_table_idx = -1;
  
-       /* take a ref on the prog itself */
-       prog = bpf_prog_get(prog_fd);
-       if (IS_ERR(prog))
-               return PTR_ERR(prog);
-
         mutex_lock(&hid_bpf_attach_lock);
  
         link = kzalloc(sizeof(*link), GFP_USER);
@@ -467,7 +468,6 @@ __hid_bpf_attach_prog(struct hid_device *hdev, enum hid_bpf_prog_type prog_type,
   err_unlock:
         mutex_unlock(&hid_bpf_attach_lock);
  
-       bpf_prog_put(prog);
         kfree(link);
  
         return err;
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h

index fb30e228d35f9a91b6bb395845584d9f073be26c..828a5c022c6407add84c44122248e0e1ea9aaa64 100644 (file)
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -298,6 +298,9 @@
  
  #define USB_VENDOR_ID_CIDC             0x1677
  
+#define I2C_VENDOR_ID_CIRQUE           0x0488
+#define I2C_PRODUCT_ID_CIRQUE_1063     0x1063
+
  #define USB_VENDOR_ID_CJTOUCH          0x24b8
  #define USB_DEVICE_ID_CJTOUCH_MULTI_TOUCH_0020 0x0020
  #define USB_DEVICE_ID_CJTOUCH_MULTI_TOUCH_0040 0x0040
diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c

index fd6d8f1d9b8f61992a69ce651dd379d121c2da49..d2f3f234f29dea35b2bfb37ef693ba9d6a9b8bf6 100644 (file)
--- a/drivers/hid/hid-logitech-hidpp.c
+++ b/drivers/hid/hid-logitech-hidpp.c
@@ -203,6 +203,8 @@ struct hidpp_device {
         struct hidpp_scroll_counter vertical_wheel_counter;
  
         u8 wireless_feature_index;
+
+       bool connected_once;
  };
  
  /* HID++ 1.0 error codes */
@@ -988,8 +990,13 @@ static int hidpp_root_get_protocol_version(struct hidpp_device *hidpp)
         hidpp->protocol_minor = response.rap.params[1];
  
  print_version:
-       hid_info(hidpp->hid_dev, "HID++ %u.%u device connected.\n",
-                hidpp->protocol_major, hidpp->protocol_minor);
+       if (!hidpp->connected_once) {
+               hid_info(hidpp->hid_dev, "HID++ %u.%u device connected.\n",
+                        hidpp->protocol_major, hidpp->protocol_minor);
+               hidpp->connected_once = true;
+       } else
+               hid_dbg(hidpp->hid_dev, "HID++ %u.%u device connected.\n",
+                        hidpp->protocol_major, hidpp->protocol_minor);
         return 0;
  }
  
@@ -4184,7 +4191,7 @@ static void hidpp_connect_event(struct work_struct *work)
         /* Get device version to check if it is connected */
         ret = hidpp_root_get_protocol_version(hidpp);
         if (ret) {
-               hid_info(hidpp->hid_dev, "Disconnected\n");
+               hid_dbg(hidpp->hid_dev, "Disconnected\n");
                 if (hidpp->battery.ps) {
                         hidpp->battery.online = false;
                         hidpp->battery.status = POWER_SUPPLY_STATUS_UNKNOWN;
@@ -4610,6 +4617,8 @@ static const struct hid_device_id hidpp_devices[] = {
           HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, 0xC088) },
         { /* Logitech G Pro X Superlight Gaming Mouse over USB */
           HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, 0xC094) },
+       { /* Logitech G Pro X Superlight 2 Gaming Mouse over USB */
+         HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, 0xC09b) },
  
         { /* G935 Gaming Headset */
           HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, 0x0a87),
diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c

index fd5b0637dad683e7b20c929974c958e79936880c..3e91e4d6ba6fa335c7f5988638791d3df8d1773a 100644 (file)
--- a/drivers/hid/hid-multitouch.c
+++ b/drivers/hid/hid-multitouch.c
@@ -2151,6 +2151,10 @@ static const struct hid_device_id mt_devices[] = {
                 HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8,
                         USB_VENDOR_ID_SYNAPTICS, 0xcd7e) },
  
+       { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT,
+               HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8,
+                       USB_VENDOR_ID_SYNAPTICS, 0xcddc) },
+
         { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT,
                 HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8,
                         USB_VENDOR_ID_SYNAPTICS, 0xce08) },
diff --git a/drivers/hid/hid-nvidia-shield.c b/drivers/hid/hid-nvidia-shield.c

index 82d0a77359c460c9bad772038e1a129e9983e0c0..58b15750dbb0ac2cb2ad333b616dc39cdea8c779 100644 (file)
--- a/drivers/hid/hid-nvidia-shield.c
+++ b/drivers/hid/hid-nvidia-shield.c
@@ -800,6 +800,8 @@ static inline int thunderstrike_led_create(struct thunderstrike *ts)
  
         led->name = devm_kasprintf(&ts->base.hdev->dev, GFP_KERNEL,
                                    "thunderstrike%d:blue:led", ts->id);
+       if (!led->name)
+               return -ENOMEM;
         led->max_brightness = 1;
         led->flags = LED_CORE_SUSPENDRESUME | LED_RETAIN_AT_SHUTDOWN;
         led->brightness_get = &thunderstrike_led_get_brightness;
@@ -831,6 +833,8 @@ static inline int thunderstrike_psy_create(struct shield_device *shield_dev)
         shield_dev->battery_dev.desc.name =
                 devm_kasprintf(&ts->base.hdev->dev, GFP_KERNEL,
                                "thunderstrike_%d", ts->id);
+       if (!shield_dev->battery_dev.desc.name)
+               return -ENOMEM;
  
         shield_dev->battery_dev.psy = power_supply_register(
                 &hdev->dev, &shield_dev->battery_dev.desc, &psy_cfg);
diff --git a/drivers/hid/hid-steam.c b/drivers/hid/hid-steam.c

index b3c4e50e248aa7eda08a356187ecea54cc803834..b08a5ab5852884219654ac449f255d9a3e3f1585 100644 (file)
--- a/drivers/hid/hid-steam.c
+++ b/drivers/hid/hid-steam.c
@@ -1109,10 +1109,9 @@ static int steam_probe(struct hid_device *hdev,
                 return hid_hw_start(hdev, HID_CONNECT_DEFAULT);
  
         steam = devm_kzalloc(&hdev->dev, sizeof(*steam), GFP_KERNEL);
-       if (!steam) {
-               ret = -ENOMEM;
-               goto steam_alloc_fail;
-       }
+       if (!steam)
+               return -ENOMEM;
+
         steam->hdev = hdev;
         hid_set_drvdata(hdev, steam);
         spin_lock_init(&steam->lock);
@@ -1129,14 +1128,14 @@ static int steam_probe(struct hid_device *hdev,
          */
         ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT & ~HID_CONNECT_HIDRAW);
         if (ret)
-               goto hid_hw_start_fail;
+               goto err_cancel_work;
  
         ret = hid_hw_open(hdev);
         if (ret) {
                 hid_err(hdev,
                         "%s:hid_hw_open\n",
                         __func__);
-               goto hid_hw_open_fail;
+               goto err_hw_stop;
         }
  
         if (steam->quirks & STEAM_QUIRK_WIRELESS) {
@@ -1152,36 +1151,37 @@ static int steam_probe(struct hid_device *hdev,
                         hid_err(hdev,
                                 "%s:steam_register failed with error %d\n",
                                 __func__, ret);
-                       goto input_register_fail;
+                       goto err_hw_close;
                 }
         }
  
         steam->client_hdev = steam_create_client_hid(hdev);
         if (IS_ERR(steam->client_hdev)) {
                 ret = PTR_ERR(steam->client_hdev);
-               goto client_hdev_fail;
+               goto err_stream_unregister;
         }
         steam->client_hdev->driver_data = steam;
  
         ret = hid_add_device(steam->client_hdev);
         if (ret)
-               goto client_hdev_add_fail;
+               goto err_destroy;
  
         return 0;
  
-client_hdev_add_fail:
-       hid_hw_stop(hdev);
-client_hdev_fail:
+err_destroy:
         hid_destroy_device(steam->client_hdev);
-input_register_fail:
-hid_hw_open_fail:
-hid_hw_start_fail:
+err_stream_unregister:
+       if (steam->connected)
+               steam_unregister(steam);
+err_hw_close:
+       hid_hw_close(hdev);
+err_hw_stop:
+       hid_hw_stop(hdev);
+err_cancel_work:
         cancel_work_sync(&steam->work_connect);
         cancel_delayed_work_sync(&steam->mode_switch);
         cancel_work_sync(&steam->rumble_work);
-steam_alloc_fail:
-       hid_err(hdev, "%s: failed with error %d\n",
-                       __func__, ret);
+
         return ret;
  }
  
diff --git a/drivers/hid/hidraw.c b/drivers/hid/hidraw.c

index 13c8dd8cd35060731165cd2018f96c6e7bfef512..2bc762d31ac70de9724df166422f31ab1c8687f4 100644 (file)
--- a/drivers/hid/hidraw.c
+++ b/drivers/hid/hidraw.c
@@ -357,8 +357,11 @@ static int hidraw_release(struct inode * inode, struct file * file)
         down_write(&minors_rwsem);
  
         spin_lock_irqsave(&hidraw_table[minor]->list_lock, flags);
-       for (int i = list->tail; i < list->head; i++)
-               kfree(list->buffer[i].value);
+       while (list->tail != list->head) {
+               kfree(list->buffer[list->tail].value);
+               list->buffer[list->tail].value = NULL;
+               list->tail = (list->tail + 1) & (HIDRAW_BUFFER_SIZE - 1);
+       }
         list_del(&list->node);
         spin_unlock_irqrestore(&hidraw_table[minor]->list_lock, flags);
         kfree(list);
diff --git a/drivers/hid/i2c-hid/i2c-hid-core.c b/drivers/hid/i2c-hid/i2c-hid-core.c

index 90f316ae9819af4759720aad86136721f78f5abe..2df1ab3c31cc54da812ee653face224f32e69fc2 100644 (file)
--- a/drivers/hid/i2c-hid/i2c-hid-core.c
+++ b/drivers/hid/i2c-hid/i2c-hid-core.c
@@ -49,6 +49,7 @@
  #define I2C_HID_QUIRK_RESET_ON_RESUME          BIT(2)
  #define I2C_HID_QUIRK_BAD_INPUT_SIZE           BIT(3)
  #define I2C_HID_QUIRK_NO_WAKEUP_AFTER_RESET    BIT(4)
+#define I2C_HID_QUIRK_NO_SLEEP_ON_SUSPEND      BIT(5)
  
  /* Command opcodes */
  #define I2C_HID_OPCODE_RESET                   0x01
@@ -131,6 +132,8 @@ static const struct i2c_hid_quirks {
                  I2C_HID_QUIRK_RESET_ON_RESUME },
         { USB_VENDOR_ID_ITE, I2C_DEVICE_ID_ITE_LENOVO_LEGION_Y720,
                 I2C_HID_QUIRK_BAD_INPUT_SIZE },
+       { I2C_VENDOR_ID_CIRQUE, I2C_PRODUCT_ID_CIRQUE_1063,
+               I2C_HID_QUIRK_NO_SLEEP_ON_SUSPEND },
         /*
          * Sending the wakeup after reset actually break ELAN touchscreen controller
          */
@@ -956,7 +959,8 @@ static int i2c_hid_core_suspend(struct i2c_hid *ihid, bool force_poweroff)
                 return ret;
  
         /* Save some power */
-       i2c_hid_set_power(ihid, I2C_HID_PWR_SLEEP);
+       if (!(ihid->quirks & I2C_HID_QUIRK_NO_SLEEP_ON_SUSPEND))
+               i2c_hid_set_power(ihid, I2C_HID_PWR_SLEEP);
  
         disable_irq(client->irq);
  
diff --git a/drivers/hid/i2c-hid/i2c-hid-of.c b/drivers/hid/i2c-hid/i2c-hid-of.c

index c4e1fa0273c84c3b2e3b438e04673727b05e6f6e..8be4d576da7733d28b8e4a1a07e86a0d11584ae6 100644 (file)
--- a/drivers/hid/i2c-hid/i2c-hid-of.c
+++ b/drivers/hid/i2c-hid/i2c-hid-of.c
@@ -87,6 +87,7 @@ static int i2c_hid_of_probe(struct i2c_client *client)
         if (!ihid_of)
                 return -ENOMEM;
  
+       ihid_of->client = client;
         ihid_of->ops.power_up = i2c_hid_of_power_up;
         ihid_of->ops.power_down = i2c_hid_of_power_down;
  
diff --git a/drivers/hid/intel-ish-hid/ishtp/bus.c b/drivers/hid/intel-ish-hid/ishtp/bus.c

index aa6cb033bb06b77f182e6df441a04e1b016aaef5..03d5601ce807b3b1d49ed88bc923774d71ace572 100644 (file)
--- a/drivers/hid/intel-ish-hid/ishtp/bus.c
+++ b/drivers/hid/intel-ish-hid/ishtp/bus.c
@@ -722,6 +722,8 @@ void ishtp_bus_remove_all_clients(struct ishtp_device *ishtp_dev,
         spin_lock_irqsave(&ishtp_dev->cl_list_lock, flags);
         list_for_each_entry(cl, &ishtp_dev->cl_list, link) {
                 cl->state = ISHTP_CL_DISCONNECTED;
+               if (warm_reset && cl->device->reference_count)
+                       continue;
  
                 /*
                  * Wake any pending process. The waiter would check dev->state
diff --git a/drivers/hid/intel-ish-hid/ishtp/client.c b/drivers/hid/intel-ish-hid/ishtp/client.c

index 82c907f01bd3b66af02efa1d313f3bb2f7cb7209..8a7f2f6a4f86864cd5783ed51852f56cef614d5f 100644 (file)
--- a/drivers/hid/intel-ish-hid/ishtp/client.c
+++ b/drivers/hid/intel-ish-hid/ishtp/client.c
@@ -49,7 +49,9 @@ static void ishtp_read_list_flush(struct ishtp_cl *cl)
         list_for_each_entry_safe(rb, next, &cl->dev->read_list.list, list)
                 if (rb->cl && ishtp_cl_cmp_id(cl, rb->cl)) {
                         list_del(&rb->list);
-                       ishtp_io_rb_free(rb);
+                       spin_lock(&cl->free_list_spinlock);
+                       list_add_tail(&rb->list, &cl->free_rb_list.list);
+                       spin_unlock(&cl->free_list_spinlock);
                 }
         spin_unlock_irqrestore(&cl->dev->read_list_spinlock, flags);
  }
diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c

index b613f11ed9498d7045f8649496049dc1b0b91839..2bc45b24075c3fe4b70ef222bbd21a4ee11eeb21 100644 (file)
--- a/drivers/hid/wacom_sys.c
+++ b/drivers/hid/wacom_sys.c
@@ -2087,7 +2087,7 @@ static int wacom_allocate_inputs(struct wacom *wacom)
         return 0;
  }
  
-static int wacom_register_inputs(struct wacom *wacom)
+static int wacom_setup_inputs(struct wacom *wacom)
  {
         struct input_dev *pen_input_dev, *touch_input_dev, *pad_input_dev;
         struct wacom_wac *wacom_wac = &(wacom->wacom_wac);
@@ -2106,10 +2106,6 @@ static int wacom_register_inputs(struct wacom *wacom)
                 input_free_device(pen_input_dev);
                 wacom_wac->pen_input = NULL;
                 pen_input_dev = NULL;
-       } else {
-               error = input_register_device(pen_input_dev);
-               if (error)
-                       goto fail;
         }
  
         error = wacom_setup_touch_input_capabilities(touch_input_dev, wacom_wac);
@@ -2118,10 +2114,6 @@ static int wacom_register_inputs(struct wacom *wacom)
                 input_free_device(touch_input_dev);
                 wacom_wac->touch_input = NULL;
                 touch_input_dev = NULL;
-       } else {
-               error = input_register_device(touch_input_dev);
-               if (error)
-                       goto fail;
         }
  
         error = wacom_setup_pad_input_capabilities(pad_input_dev, wacom_wac);
@@ -2130,7 +2122,34 @@ static int wacom_register_inputs(struct wacom *wacom)
                 input_free_device(pad_input_dev);
                 wacom_wac->pad_input = NULL;
                 pad_input_dev = NULL;
-       } else {
+       }
+
+       return 0;
+}
+
+static int wacom_register_inputs(struct wacom *wacom)
+{
+       struct input_dev *pen_input_dev, *touch_input_dev, *pad_input_dev;
+       struct wacom_wac *wacom_wac = &(wacom->wacom_wac);
+       int error = 0;
+
+       pen_input_dev = wacom_wac->pen_input;
+       touch_input_dev = wacom_wac->touch_input;
+       pad_input_dev = wacom_wac->pad_input;
+
+       if (pen_input_dev) {
+               error = input_register_device(pen_input_dev);
+               if (error)
+                       goto fail;
+       }
+
+       if (touch_input_dev) {
+               error = input_register_device(touch_input_dev);
+               if (error)
+                       goto fail;
+       }
+
+       if (pad_input_dev) {
                 error = input_register_device(pad_input_dev);
                 if (error)
                         goto fail;
@@ -2383,6 +2402,20 @@ static int wacom_parse_and_register(struct wacom *wacom, bool wireless)
         if (error)
                 goto fail;
  
+       error = wacom_setup_inputs(wacom);
+       if (error)
+               goto fail;
+
+       if (features->type == HID_GENERIC)
+               connect_mask |= HID_CONNECT_DRIVER;
+
+       /* Regular HID work starts now */
+       error = hid_hw_start(hdev, connect_mask);
+       if (error) {
+               hid_err(hdev, "hw start failed\n");
+               goto fail;
+       }
+
         error = wacom_register_inputs(wacom);
         if (error)
                 goto fail;
@@ -2397,16 +2430,6 @@ static int wacom_parse_and_register(struct wacom *wacom, bool wireless)
                         goto fail;
         }
  
-       if (features->type == HID_GENERIC)
-               connect_mask |= HID_CONNECT_DRIVER;
-
-       /* Regular HID work starts now */
-       error = hid_hw_start(hdev, connect_mask);
-       if (error) {
-               hid_err(hdev, "hw start failed\n");
-               goto fail;
-       }
-
         if (!wireless) {
                 /* Note that if query fails it is not a hard failure */
                 wacom_query_tablet_data(wacom);
diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c

index da8a01fedd3944a7588aad5e2a523b44b2b2797c..fbe10fbc5769e53affe44a0826a55853b306c0ee 100644 (file)
--- a/drivers/hid/wacom_wac.c
+++ b/drivers/hid/wacom_wac.c
@@ -2575,7 +2575,14 @@ static void wacom_wac_pen_report(struct hid_device *hdev,
                                 wacom_wac->hid_data.tipswitch);
                 input_report_key(input, wacom_wac->tool[0], sense);
                 if (wacom_wac->serial[0]) {
-                       input_event(input, EV_MSC, MSC_SERIAL, wacom_wac->serial[0]);
+                       /*
+                        * xf86-input-wacom does not accept a serial number
+                        * of '0'. Report the low 32 bits if possible, but
+                        * if they are zero, report the upper ones instead.
+                        */
+                       __u32 serial_lo = wacom_wac->serial[0] & 0xFFFFFFFFu;
+                       __u32 serial_hi = wacom_wac->serial[0] >> 32;
+                       input_event(input, EV_MSC, MSC_SERIAL, (int)(serial_lo ? serial_lo : serial_hi));
                         input_report_abs(input, ABS_MISC, sense ? id : 0);
                 }
  
diff --git a/drivers/hwmon/aspeed-pwm-tacho.c b/drivers/hwmon/aspeed-pwm-tacho.c

index f6e1e55e82922be6f67a98046b4f74c3159625d9..4acc1858d8acf799c20e5c2061431d35adc8db10 100644 (file)
--- a/drivers/hwmon/aspeed-pwm-tacho.c
+++ b/drivers/hwmon/aspeed-pwm-tacho.c
@@ -195,6 +195,8 @@ struct aspeed_pwm_tacho_data {
         u8 fan_tach_ch_source[MAX_ASPEED_FAN_TACH_CHANNELS];
         struct aspeed_cooling_device *cdev[8];
         const struct attribute_group *groups[3];
+       /* protects access to shared ASPEED_PTCR_RESULT */
+       struct mutex tach_lock;
  };
  
  enum type { TYPEM, TYPEN, TYPEO };
@@ -529,6 +531,8 @@ static int aspeed_get_fan_tach_ch_rpm(struct aspeed_pwm_tacho_data *priv,
         u8 fan_tach_ch_source, type, mode, both;
         int ret;
  
+       mutex_lock(&priv->tach_lock);
+
         regmap_write(priv->regmap, ASPEED_PTCR_TRIGGER, 0);
         regmap_write(priv->regmap, ASPEED_PTCR_TRIGGER, 0x1 << fan_tach_ch);
  
@@ -546,6 +550,8 @@ static int aspeed_get_fan_tach_ch_rpm(struct aspeed_pwm_tacho_data *priv,
                 ASPEED_RPM_STATUS_SLEEP_USEC,
                 usec);
  
+       mutex_unlock(&priv->tach_lock);
+
         /* return -ETIMEDOUT if we didn't get an answer. */
         if (ret)
                 return ret;
@@ -915,6 +921,7 @@ static int aspeed_pwm_tacho_probe(struct platform_device *pdev)
         priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
         if (!priv)
                 return -ENOMEM;
+       mutex_init(&priv->tach_lock);
         priv->regmap = devm_regmap_init(dev, NULL, (__force void *)regs,
                         &aspeed_pwm_tacho_regmap_config);
         if (IS_ERR(priv->regmap))
diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c

index ba82d1e79c131678c0c673bd5c0d9d77b09fdf1a..b8fc8d1ef20dfcb6132a168425df2d7e2653afa4 100644 (file)
--- a/drivers/hwmon/coretemp.c
+++ b/drivers/hwmon/coretemp.c
@@ -41,7 +41,7 @@ MODULE_PARM_DESC(tjmax, "TjMax value in degrees Celsius");
  
  #define PKG_SYSFS_ATTR_NO      1       /* Sysfs attribute for package temp */
  #define BASE_SYSFS_ATTR_NO     2       /* Sysfs Base attr no for coretemp */
-#define NUM_REAL_CORES         128     /* Number of Real cores per cpu */
+#define NUM_REAL_CORES         512     /* Number of Real cores per cpu */
  #define CORETEMP_NAME_LENGTH   28      /* String Length of attrs */
  #define MAX_CORE_ATTRS         4       /* Maximum no of basic attrs */
  #define TOTAL_ATTRS            (MAX_CORE_ATTRS + 1)
@@ -419,7 +419,7 @@ static ssize_t show_temp(struct device *dev,
  }
  
  static int create_core_attrs(struct temp_data *tdata, struct device *dev,
-                            int attr_no)
+                            int index)
  {
         int i;
         static ssize_t (*const rd_ptr[TOTAL_ATTRS]) (struct device *dev,
@@ -431,13 +431,20 @@ static int create_core_attrs(struct temp_data *tdata, struct device *dev,
         };
  
         for (i = 0; i < tdata->attr_size; i++) {
+               /*
+                * We map the attr number to core id of the CPU
+                * The attr number is always core id + 2
+                * The Pkgtemp will always show up as temp1_*, if available
+                */
+               int attr_no = tdata->is_pkg_data ? 1 : tdata->cpu_core_id + 2;
+
                 snprintf(tdata->attr_name[i], CORETEMP_NAME_LENGTH,
                          "temp%d_%s", attr_no, suffixes[i]);
                 sysfs_attr_init(&tdata->sd_attrs[i].dev_attr.attr);
                 tdata->sd_attrs[i].dev_attr.attr.name = tdata->attr_name[i];
                 tdata->sd_attrs[i].dev_attr.attr.mode = 0444;
                 tdata->sd_attrs[i].dev_attr.show = rd_ptr[i];
-               tdata->sd_attrs[i].index = attr_no;
+               tdata->sd_attrs[i].index = index;
                 tdata->attrs[i] = &tdata->sd_attrs[i].dev_attr.attr;
         }
         tdata->attr_group.attrs = tdata->attrs;
@@ -495,30 +502,25 @@ static int create_core_data(struct platform_device *pdev, unsigned int cpu,
         struct platform_data *pdata = platform_get_drvdata(pdev);
         struct cpuinfo_x86 *c = &cpu_data(cpu);
         u32 eax, edx;
-       int err, index, attr_no;
+       int err, index;
  
         if (!housekeeping_cpu(cpu, HK_TYPE_MISC))
                 return 0;
  
         /*
-        * Find attr number for sysfs:
-        * We map the attr number to core id of the CPU
-        * The attr number is always core id + 2
-        * The Pkgtemp will always show up as temp1_*, if available
+        * Get the index of tdata in pdata->core_data[]
+        * tdata for package: pdata->core_data[1]
+        * tdata for core: pdata->core_data[2] .. pdata->core_data[NUM_REAL_CORES + 1]
          */
         if (pkg_flag) {
-               attr_no = PKG_SYSFS_ATTR_NO;
+               index = PKG_SYSFS_ATTR_NO;
         } else {
-               index = ida_alloc(&pdata->ida, GFP_KERNEL);
+               index = ida_alloc_max(&pdata->ida, NUM_REAL_CORES - 1, GFP_KERNEL);
                 if (index < 0)
                         return index;
-               pdata->cpu_map[index] = topology_core_id(cpu);
-               attr_no = index + BASE_SYSFS_ATTR_NO;
-       }
  
-       if (attr_no > MAX_CORE_DATA - 1) {
-               err = -ERANGE;
-               goto ida_free;
+               pdata->cpu_map[index] = topology_core_id(cpu);
+               index += BASE_SYSFS_ATTR_NO;
         }
  
         tdata = init_temp_data(cpu, pkg_flag);
@@ -544,20 +546,20 @@ static int create_core_data(struct platform_device *pdev, unsigned int cpu,
                 if (get_ttarget(tdata, &pdev->dev) >= 0)
                         tdata->attr_size++;
  
-       pdata->core_data[attr_no] = tdata;
+       pdata->core_data[index] = tdata;
  
         /* Create sysfs interfaces */
-       err = create_core_attrs(tdata, pdata->hwmon_dev, attr_no);
+       err = create_core_attrs(tdata, pdata->hwmon_dev, index);
         if (err)
                 goto exit_free;
  
         return 0;
  exit_free:
-       pdata->core_data[attr_no] = NULL;
+       pdata->core_data[index] = NULL;
         kfree(tdata);
  ida_free:
         if (!pkg_flag)
-               ida_free(&pdata->ida, index);
+               ida_free(&pdata->ida, index - BASE_SYSFS_ATTR_NO);
         return err;
  }
  
diff --git a/drivers/hwmon/gigabyte_waterforce.c b/drivers/hwmon/gigabyte_waterforce.c

index 85e5237757142a05b45fdd7006d4c7b2ca61a3c3..8129d7b3ceaf9ae2e851f39af2db7aa6eaca9ce0 100644 (file)
--- a/drivers/hwmon/gigabyte_waterforce.c
+++ b/drivers/hwmon/gigabyte_waterforce.c
@@ -146,7 +146,7 @@ static int waterforce_get_status(struct waterforce_data *priv)
         /* Send command for getting status */
         ret = waterforce_write_expanded(priv, get_status_cmd, GET_STATUS_CMD_LENGTH);
         if (ret < 0)
-               return ret;
+               goto unlock_and_return;
  
         ret = wait_for_completion_interruptible_timeout(&priv->status_report_received,
                                                         msecs_to_jiffies(STATUS_VALIDITY));
diff --git a/drivers/hwmon/nct6775-core.c b/drivers/hwmon/nct6775-core.c

index 8d2ef3145bca3c71b0aee2d8a1fb466dd3f9cb3e..9fbab8f023340da24cf9623e8da882849358ccea 100644 (file)
--- a/drivers/hwmon/nct6775-core.c
+++ b/drivers/hwmon/nct6775-core.c
@@ -3512,6 +3512,7 @@ int nct6775_probe(struct device *dev, struct nct6775_data *data,
         const u16 *reg_temp_mon, *reg_temp_alternate, *reg_temp_crit;
         const u16 *reg_temp_crit_l = NULL, *reg_temp_crit_h = NULL;
         int num_reg_temp, num_reg_temp_mon, num_reg_tsi_temp;
+       int num_reg_temp_config;
         struct device *hwmon_dev;
         struct sensor_template_group tsi_temp_tg;
  
@@ -3594,6 +3595,7 @@ int nct6775_probe(struct device *dev, struct nct6775_data *data,
                 reg_temp_over = NCT6106_REG_TEMP_OVER;
                 reg_temp_hyst = NCT6106_REG_TEMP_HYST;
                 reg_temp_config = NCT6106_REG_TEMP_CONFIG;
+               num_reg_temp_config = ARRAY_SIZE(NCT6106_REG_TEMP_CONFIG);
                 reg_temp_alternate = NCT6106_REG_TEMP_ALTERNATE;
                 reg_temp_crit = NCT6106_REG_TEMP_CRIT;
                 reg_temp_crit_l = NCT6106_REG_TEMP_CRIT_L;
@@ -3669,6 +3671,7 @@ int nct6775_probe(struct device *dev, struct nct6775_data *data,
                 reg_temp_over = NCT6106_REG_TEMP_OVER;
                 reg_temp_hyst = NCT6106_REG_TEMP_HYST;
                 reg_temp_config = NCT6106_REG_TEMP_CONFIG;
+               num_reg_temp_config = ARRAY_SIZE(NCT6106_REG_TEMP_CONFIG);
                 reg_temp_alternate = NCT6106_REG_TEMP_ALTERNATE;
                 reg_temp_crit = NCT6106_REG_TEMP_CRIT;
                 reg_temp_crit_l = NCT6106_REG_TEMP_CRIT_L;
@@ -3746,6 +3749,7 @@ int nct6775_probe(struct device *dev, struct nct6775_data *data,
                 reg_temp_over = NCT6775_REG_TEMP_OVER;
                 reg_temp_hyst = NCT6775_REG_TEMP_HYST;
                 reg_temp_config = NCT6775_REG_TEMP_CONFIG;
+               num_reg_temp_config = ARRAY_SIZE(NCT6775_REG_TEMP_CONFIG);
                 reg_temp_alternate = NCT6775_REG_TEMP_ALTERNATE;
                 reg_temp_crit = NCT6775_REG_TEMP_CRIT;
  
@@ -3821,6 +3825,7 @@ int nct6775_probe(struct device *dev, struct nct6775_data *data,
                 reg_temp_over = NCT6775_REG_TEMP_OVER;
                 reg_temp_hyst = NCT6775_REG_TEMP_HYST;
                 reg_temp_config = NCT6776_REG_TEMP_CONFIG;
+               num_reg_temp_config = ARRAY_SIZE(NCT6776_REG_TEMP_CONFIG);
                 reg_temp_alternate = NCT6776_REG_TEMP_ALTERNATE;
                 reg_temp_crit = NCT6776_REG_TEMP_CRIT;
  
@@ -3900,6 +3905,7 @@ int nct6775_probe(struct device *dev, struct nct6775_data *data,
                 reg_temp_over = NCT6779_REG_TEMP_OVER;
                 reg_temp_hyst = NCT6779_REG_TEMP_HYST;
                 reg_temp_config = NCT6779_REG_TEMP_CONFIG;
+               num_reg_temp_config = ARRAY_SIZE(NCT6779_REG_TEMP_CONFIG);
                 reg_temp_alternate = NCT6779_REG_TEMP_ALTERNATE;
                 reg_temp_crit = NCT6779_REG_TEMP_CRIT;
  
@@ -4034,6 +4040,7 @@ int nct6775_probe(struct device *dev, struct nct6775_data *data,
                 reg_temp_over = NCT6779_REG_TEMP_OVER;
                 reg_temp_hyst = NCT6779_REG_TEMP_HYST;
                 reg_temp_config = NCT6779_REG_TEMP_CONFIG;
+               num_reg_temp_config = ARRAY_SIZE(NCT6779_REG_TEMP_CONFIG);
                 reg_temp_alternate = NCT6779_REG_TEMP_ALTERNATE;
                 reg_temp_crit = NCT6779_REG_TEMP_CRIT;
  
@@ -4123,6 +4130,7 @@ int nct6775_probe(struct device *dev, struct nct6775_data *data,
                 reg_temp_over = NCT6798_REG_TEMP_OVER;
                 reg_temp_hyst = NCT6798_REG_TEMP_HYST;
                 reg_temp_config = NCT6779_REG_TEMP_CONFIG;
+               num_reg_temp_config = ARRAY_SIZE(NCT6779_REG_TEMP_CONFIG);
                 reg_temp_alternate = NCT6798_REG_TEMP_ALTERNATE;
                 reg_temp_crit = NCT6798_REG_TEMP_CRIT;
  
@@ -4204,7 +4212,8 @@ int nct6775_probe(struct device *dev, struct nct6775_data *data,
                                   = reg_temp_crit[src - 1];
                         if (reg_temp_crit_l && reg_temp_crit_l[i])
                                 data->reg_temp[4][src - 1] = reg_temp_crit_l[i];
-                       data->reg_temp_config[src - 1] = reg_temp_config[i];
+                       if (i < num_reg_temp_config)
+                               data->reg_temp_config[src - 1] = reg_temp_config[i];
                         data->temp_src[src - 1] = src;
                         continue;
                 }
@@ -4217,7 +4226,8 @@ int nct6775_probe(struct device *dev, struct nct6775_data *data,
                 data->reg_temp[0][s] = reg_temp[i];
                 data->reg_temp[1][s] = reg_temp_over[i];
                 data->reg_temp[2][s] = reg_temp_hyst[i];
-               data->reg_temp_config[s] = reg_temp_config[i];
+               if (i < num_reg_temp_config)
+                       data->reg_temp_config[s] = reg_temp_config[i];
                 if (reg_temp_crit_h && reg_temp_crit_h[i])
                         data->reg_temp[3][s] = reg_temp_crit_h[i];
                 else if (reg_temp_crit[src - 1])
diff --git a/drivers/hwmon/pmbus/mp2975.c b/drivers/hwmon/pmbus/mp2975.c

index b9bb469e2d8febe1d056e0b8f7d0a0b743d5ff3e..e5fa10b3b8bc7184e03e6caa0701e34f81bcb7cb 100644 (file)
--- a/drivers/hwmon/pmbus/mp2975.c
+++ b/drivers/hwmon/pmbus/mp2975.c
@@ -126,6 +126,21 @@ static const struct regulator_desc __maybe_unused mp2975_reg_desc[] = {
  
  #define to_mp2975_data(x)  container_of(x, struct mp2975_data, info)
  
+static int mp2975_read_byte_data(struct i2c_client *client, int page, int reg)
+{
+       switch (reg) {
+       case PMBUS_VOUT_MODE:
+               /*
+                * Report direct format as configured by MFR_DC_LOOP_CTRL.
+                * Unlike on MP2971/MP2973 the reported VOUT_MODE isn't automatically
+                * internally updated, but always reads as PB_VOUT_MODE_VID.
+                */
+               return PB_VOUT_MODE_DIRECT;
+       default:
+               return -ENODATA;
+       }
+}
+
  static int
  mp2975_read_word_helper(struct i2c_client *client, int page, int phase, u8 reg,
                         u16 mask)
@@ -869,6 +884,7 @@ static struct pmbus_driver_info mp2975_info = {
                 PMBUS_HAVE_IIN | PMBUS_HAVE_IOUT | PMBUS_HAVE_STATUS_IOUT |
                 PMBUS_HAVE_TEMP | PMBUS_HAVE_STATUS_TEMP | PMBUS_HAVE_POUT |
                 PMBUS_HAVE_PIN | PMBUS_HAVE_STATUS_INPUT | PMBUS_PHASE_VIRTUAL,
+       .read_byte_data = mp2975_read_byte_data,
         .read_word_data = mp2975_read_word_data,
  #if IS_ENABLED(CONFIG_SENSORS_MP2975_REGULATOR)
         .num_regulators = 1,
diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile

index 3757b9391e60ae9b0e1c2ec5e564e0ae55af0c2a..aa0ee8ecd6f2f53ea109cfeacffe5e2682ae228e 100644 (file)
--- a/drivers/i2c/busses/Makefile
+++ b/drivers/i2c/busses/Makefile
@@ -90,10 +90,8 @@ obj-$(CONFIG_I2C_NPCM)               += i2c-npcm7xx.o
  obj-$(CONFIG_I2C_OCORES)       += i2c-ocores.o
  obj-$(CONFIG_I2C_OMAP)         += i2c-omap.o
  obj-$(CONFIG_I2C_OWL)          += i2c-owl.o
-i2c-pasemi-objs := i2c-pasemi-core.o i2c-pasemi-pci.o
-obj-$(CONFIG_I2C_PASEMI)       += i2c-pasemi.o
-i2c-apple-objs := i2c-pasemi-core.o i2c-pasemi-platform.o
-obj-$(CONFIG_I2C_APPLE)        += i2c-apple.o
+obj-$(CONFIG_I2C_PASEMI)       += i2c-pasemi-core.o i2c-pasemi-pci.o
+obj-$(CONFIG_I2C_APPLE)                += i2c-pasemi-core.o i2c-pasemi-platform.o
  obj-$(CONFIG_I2C_PCA_PLATFORM) += i2c-pca-platform.o
  obj-$(CONFIG_I2C_PNX)          += i2c-pnx.o
  obj-$(CONFIG_I2C_PXA)          += i2c-pxa.o
diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c

index 3932e8d96a17173fa3b4f7ad90ebcbb786e99370..2c36b36d7d516c851c8e9e44c8f90ce11fac0f13 100644 (file)
--- a/drivers/i2c/busses/i2c-i801.c
+++ b/drivers/i2c/busses/i2c-i801.c
@@ -498,11 +498,10 @@ static int i801_block_transaction_by_block(struct i801_priv *priv,
         /* Set block buffer mode */
         outb_p(inb_p(SMBAUXCTL(priv)) | SMBAUXCTL_E32B, SMBAUXCTL(priv));
  
-       inb_p(SMBHSTCNT(priv)); /* reset the data buffer index */
-
         if (read_write == I2C_SMBUS_WRITE) {
                 len = data->block[0];
                 outb_p(len, SMBHSTDAT0(priv));
+               inb_p(SMBHSTCNT(priv)); /* reset the data buffer index */
                 for (i = 0; i < len; i++)
                         outb_p(data->block[i+1], SMBBLKDAT(priv));
         }
@@ -520,6 +519,7 @@ static int i801_block_transaction_by_block(struct i801_priv *priv,
                 }
  
                 data->block[0] = len;
+               inb_p(SMBHSTCNT(priv)); /* reset the data buffer index */
                 for (i = 0; i < len; i++)
                         data->block[i + 1] = inb_p(SMBBLKDAT(priv));
         }
diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c

index 88a053987403cc6f59c3def73fd52cd11e2b1359..60e813137f8442895b19c6e9d871252cc32c7f24 100644 (file)
--- a/drivers/i2c/busses/i2c-imx.c
+++ b/drivers/i2c/busses/i2c-imx.c
@@ -803,6 +803,11 @@ static irqreturn_t i2c_imx_slave_handle(struct imx_i2c_struct *i2c_imx,
                 ctl &= ~I2CR_MTX;
                 imx_i2c_write_reg(ctl, i2c_imx, IMX_I2C_I2CR);
                 imx_i2c_read_reg(i2c_imx, IMX_I2C_I2DR);
+
+               /* flag the last byte as processed */
+               i2c_imx_slave_event(i2c_imx,
+                                   I2C_SLAVE_READ_PROCESSED, &value);
+
                 i2c_imx_slave_finish_op(i2c_imx);
                 return IRQ_HANDLED;
         }
diff --git a/drivers/i2c/busses/i2c-pasemi-core.c b/drivers/i2c/busses/i2c-pasemi-core.c

index 7d54a9f34c74b5a3b074a469dca674eb286dd50d..bd8becbdeeb28f4aa7f094df18fa7d059113dcae 100644 (file)
--- a/drivers/i2c/busses/i2c-pasemi-core.c
+++ b/drivers/i2c/busses/i2c-pasemi-core.c
@@ -369,6 +369,7 @@ int pasemi_i2c_common_probe(struct pasemi_smbus *smbus)
  
         return 0;
  }
+EXPORT_SYMBOL_GPL(pasemi_i2c_common_probe);
  
  irqreturn_t pasemi_irq_handler(int irq, void *dev_id)
  {
@@ -378,3 +379,8 @@ irqreturn_t pasemi_irq_handler(int irq, void *dev_id)
         complete(&smbus->irq_completion);
         return IRQ_HANDLED;
  }
+EXPORT_SYMBOL_GPL(pasemi_irq_handler);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Olof Johansson <olof@lixom.net>");
+MODULE_DESCRIPTION("PA Semi PWRficient SMBus driver");
diff --git a/drivers/i2c/busses/i2c-qcom-geni.c b/drivers/i2c/busses/i2c-qcom-geni.c

index 0d2e7171e3a6f94a66d2ff85861723e0f2caea00..da94df466e83c9d34c6212681c087cafc8a6b788 100644 (file)
--- a/drivers/i2c/busses/i2c-qcom-geni.c
+++ b/drivers/i2c/busses/i2c-qcom-geni.c
@@ -613,20 +613,20 @@ static int geni_i2c_gpi_xfer(struct geni_i2c_dev *gi2c, struct i2c_msg msgs[], i
  
                 peripheral.addr = msgs[i].addr;
  
+               ret =  geni_i2c_gpi(gi2c, &msgs[i], &config,
+                                   &tx_addr, &tx_buf, I2C_WRITE, gi2c->tx_c);
+               if (ret)
+                       goto err;
+
                 if (msgs[i].flags & I2C_M_RD) {
                         ret =  geni_i2c_gpi(gi2c, &msgs[i], &config,
                                             &rx_addr, &rx_buf, I2C_READ, gi2c->rx_c);
                         if (ret)
                                 goto err;
-               }
-
-               ret =  geni_i2c_gpi(gi2c, &msgs[i], &config,
-                                   &tx_addr, &tx_buf, I2C_WRITE, gi2c->tx_c);
-               if (ret)
-                       goto err;
  
-               if (msgs[i].flags & I2C_M_RD)
                         dma_async_issue_pending(gi2c->rx_c);
+               }
+
                 dma_async_issue_pending(gi2c->tx_c);
  
                 timeout = wait_for_completion_timeout(&gi2c->done, XFER_TIMEOUT);
diff --git a/drivers/iio/accel/Kconfig b/drivers/iio/accel/Kconfig

index 91adcac875a4130d75d887b80464b76dd1f422a0..c9d7afe489e832b4a9598ffe9266084dbebd9fd6 100644 (file)
--- a/drivers/iio/accel/Kconfig
+++ b/drivers/iio/accel/Kconfig
@@ -219,10 +219,12 @@ config BMA400
  
  config BMA400_I2C
         tristate
+       select REGMAP_I2C
         depends on BMA400
  
  config BMA400_SPI
         tristate
+       select REGMAP_SPI
         depends on BMA400
  
  config BMC150_ACCEL
diff --git a/drivers/iio/adc/ad4130.c b/drivers/iio/adc/ad4130.c

index feb86fe6c422df4ad3085b1c4ffea1651ad4cfd1..62490424b6aed44698c376560550af353073b72b 100644 (file)
--- a/drivers/iio/adc/ad4130.c
+++ b/drivers/iio/adc/ad4130.c
@@ -1821,7 +1821,7 @@ static int ad4130_setup_int_clk(struct ad4130_state *st)
  {
         struct device *dev = &st->spi->dev;
         struct device_node *of_node = dev_of_node(dev);
-       struct clk_init_data init;
+       struct clk_init_data init = {};
         const char *clk_name;
         int ret;
  
@@ -1891,10 +1891,14 @@ static int ad4130_setup(struct iio_dev *indio_dev)
                 return ret;
  
         /*
-        * Configure all GPIOs for output. If configured, the interrupt function
-        * of P2 takes priority over the GPIO out function.
+        * Configure unused GPIOs for output. If configured, the interrupt
+        * function of P2 takes priority over the GPIO out function.
          */
-       val =  AD4130_IO_CONTROL_GPIO_CTRL_MASK;
+       val = 0;
+       for (i = 0; i < AD4130_MAX_GPIOS; i++)
+               if (st->pins_fn[i + AD4130_AIN2_P1] == AD4130_PIN_FN_NONE)
+                       val |= FIELD_PREP(AD4130_IO_CONTROL_GPIO_CTRL_MASK, BIT(i));
+
         val |= FIELD_PREP(AD4130_IO_CONTROL_INT_PIN_SEL_MASK, st->int_pin_sel);
  
         ret = regmap_write(st->regmap, AD4130_IO_CONTROL_REG, val);
diff --git a/drivers/iio/adc/ad7091r8.c b/drivers/iio/adc/ad7091r8.c

index 57700f12480382b82299c5822ae2fb42a8d7f391..70056430505752682f5fd2c3b2e56ef1db48c7b7 100644 (file)
--- a/drivers/iio/adc/ad7091r8.c
+++ b/drivers/iio/adc/ad7091r8.c
@@ -195,7 +195,7 @@ static int ad7091r8_gpio_setup(struct ad7091r_state *st)
         st->reset_gpio = devm_gpiod_get_optional(st->dev, "reset",
                                                  GPIOD_OUT_HIGH);
         if (IS_ERR(st->reset_gpio))
-               return dev_err_probe(st->dev, PTR_ERR(st->convst_gpio),
+               return dev_err_probe(st->dev, PTR_ERR(st->reset_gpio),
                                      "Error on requesting reset GPIO\n");
  
         if (st->reset_gpio) {
diff --git a/drivers/iio/humidity/Kconfig b/drivers/iio/humidity/Kconfig

index 2de5494e7c22585aa52f016dc18e30c4f8107f37..b15b7a3b66d5a4d84bf3b48d46d88b29ec7c00d7 100644 (file)
--- a/drivers/iio/humidity/Kconfig
+++ b/drivers/iio/humidity/Kconfig
@@ -48,6 +48,18 @@ config HDC2010
           To compile this driver as a module, choose M here: the module
           will be called hdc2010.
  
+config HDC3020
+       tristate "TI HDC3020 relative humidity and temperature sensor"
+       depends on I2C
+       select CRC8
+       help
+         Say yes here to build support for the Texas Instruments
+         HDC3020, HDC3021 and HDC3022 relative humidity and temperature
+         sensors.
+
+         To compile this driver as a module, choose M here: the module
+         will be called hdc3020.
+
  config HID_SENSOR_HUMIDITY
         tristate "HID Environmental humidity sensor"
         depends on HID_SENSOR_HUB
diff --git a/drivers/iio/humidity/Makefile b/drivers/iio/humidity/Makefile

index f19ff3de97c56743f0ac51e2768e11c7a2816846..5fbeef299f61bfff07c6dd1f2215cf147d015b4f 100644 (file)
--- a/drivers/iio/humidity/Makefile
+++ b/drivers/iio/humidity/Makefile
@@ -7,6 +7,7 @@ obj-$(CONFIG_AM2315) += am2315.o
  obj-$(CONFIG_DHT11) += dht11.o
  obj-$(CONFIG_HDC100X) += hdc100x.o
  obj-$(CONFIG_HDC2010) += hdc2010.o
+obj-$(CONFIG_HDC3020) += hdc3020.o
  obj-$(CONFIG_HID_SENSOR_HUMIDITY) += hid-sensor-humidity.o
  
  hts221-y := hts221_core.o \
diff --git a/drivers/iio/humidity/hdc3020.c b/drivers/iio/humidity/hdc3020.c

index 4e3311170725bc55fa3a5dd1c2a90dccb6c3aa19..ed70415512f687b6333078f9416b9a0fd6edbfdb 100644 (file)
--- a/drivers/iio/humidity/hdc3020.c
+++ b/drivers/iio/humidity/hdc3020.c
@@ -322,7 +322,7 @@ static int hdc3020_read_raw(struct iio_dev *indio_dev,
                 if (chan->type != IIO_TEMP)
                         return -EINVAL;
  
-               *val = 16852;
+               *val = -16852;
                 return IIO_VAL_INT;
  
         default:
diff --git a/drivers/iio/imu/bno055/Kconfig b/drivers/iio/imu/bno055/Kconfig

index 83e53acfbe88011f4306f19438b6f31d4cad5b22..c7f5866a177d90edef7c30bfe54ada71cc17870c 100644 (file)
--- a/drivers/iio/imu/bno055/Kconfig
+++ b/drivers/iio/imu/bno055/Kconfig
@@ -8,6 +8,7 @@ config BOSCH_BNO055
  config BOSCH_BNO055_SERIAL
         tristate "Bosch BNO055 attached via UART"
         depends on SERIAL_DEV_BUS
+       select REGMAP
         select BOSCH_BNO055
         help
           Enable this to support Bosch BNO055 IMUs attached via UART.
diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c

index 9a85752124ddc43b10ecb12ed2c48f605395b170..173dc00762a152e414feac8f1d8d626e01d4bde8 100644 (file)
--- a/drivers/iio/industrialio-core.c
+++ b/drivers/iio/industrialio-core.c
@@ -1584,10 +1584,13 @@ static int iio_device_register_sysfs(struct iio_dev *indio_dev)
         ret = iio_device_register_sysfs_group(indio_dev,
                                               &iio_dev_opaque->chan_attr_group);
         if (ret)
-               goto error_clear_attrs;
+               goto error_free_chan_attrs;
  
         return 0;
  
+error_free_chan_attrs:
+       kfree(iio_dev_opaque->chan_attr_group.attrs);
+       iio_dev_opaque->chan_attr_group.attrs = NULL;
  error_clear_attrs:
         iio_free_chan_devattr_list(&iio_dev_opaque->channel_attr_list);
  
diff --git a/drivers/iio/light/hid-sensor-als.c b/drivers/iio/light/hid-sensor-als.c

index 5cd27f04b45e6d911ae53e7574a916455149c2a5..b6c4bef2a7bb22bbe42463ffc7d72e934e0a2591 100644 (file)
--- a/drivers/iio/light/hid-sensor-als.c
+++ b/drivers/iio/light/hid-sensor-als.c
@@ -226,6 +226,7 @@ static int als_capture_sample(struct hid_sensor_hub_device *hsdev,
         case HID_USAGE_SENSOR_TIME_TIMESTAMP:
                 als_state->timestamp = hid_sensor_convert_timestamp(&als_state->common_attributes,
                                                                     *(s64 *)raw_data);
+               ret = 0;
                 break;
         default:
                 break;
diff --git a/drivers/iio/magnetometer/rm3100-core.c b/drivers/iio/magnetometer/rm3100-core.c

index 69938204456f8bb0c1c4777d93ee7d0b8f2421dd..42b70cd42b39359ddd542e273163cab829128ab4 100644 (file)
--- a/drivers/iio/magnetometer/rm3100-core.c
+++ b/drivers/iio/magnetometer/rm3100-core.c
@@ -530,6 +530,7 @@ int rm3100_common_probe(struct device *dev, struct regmap *regmap, int irq)
         struct rm3100_data *data;
         unsigned int tmp;
         int ret;
+       int samp_rate_index;
  
         indio_dev = devm_iio_device_alloc(dev, sizeof(*data));
         if (!indio_dev)
@@ -586,9 +587,14 @@ int rm3100_common_probe(struct device *dev, struct regmap *regmap, int irq)
         ret = regmap_read(regmap, RM3100_REG_TMRC, &tmp);
         if (ret < 0)
                 return ret;
+
+       samp_rate_index = tmp - RM3100_TMRC_OFFSET;
+       if (samp_rate_index < 0 || samp_rate_index >=  RM3100_SAMP_NUM) {
+               dev_err(dev, "The value read from RM3100_REG_TMRC is invalid!\n");
+               return -EINVAL;
+       }
         /* Initializing max wait time, which is double conversion time. */
-       data->conversion_time = rm3100_samp_rates[tmp - RM3100_TMRC_OFFSET][2]
-                               * 2;
+       data->conversion_time = rm3100_samp_rates[samp_rate_index][2] * 2;
  
         /* Cycle count values may not be what we want. */
         if ((tmp - RM3100_TMRC_OFFSET) == 0)
diff --git a/drivers/iio/pressure/bmp280-spi.c b/drivers/iio/pressure/bmp280-spi.c

index 433d6fac83c4cd95f698e1063a78c36dde79b374..e8a5fed07e88835a019924a0b60bdb16233a95a3 100644 (file)
--- a/drivers/iio/pressure/bmp280-spi.c
+++ b/drivers/iio/pressure/bmp280-spi.c
@@ -87,6 +87,7 @@ static const struct of_device_id bmp280_of_spi_match[] = {
  MODULE_DEVICE_TABLE(of, bmp280_of_spi_match);
  
  static const struct spi_device_id bmp280_spi_id[] = {
+       { "bmp085", (kernel_ulong_t)&bmp180_chip_info },
         { "bmp180", (kernel_ulong_t)&bmp180_chip_info },
         { "bmp181", (kernel_ulong_t)&bmp180_chip_info },
         { "bmp280", (kernel_ulong_t)&bmp280_chip_info },
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c

index 824349659d69dc8e9ea9c1b5254d469628a5f933..ce9c5bae83bf1b934338d465ce25c5fba4e6ab2c 100644 (file)
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -401,6 +401,10 @@ static void bnxt_re_create_fence_wqe(struct bnxt_re_pd *pd)
         struct bnxt_re_fence_data *fence = &pd->fence;
         struct ib_mr *ib_mr = &fence->mr->ib_mr;
         struct bnxt_qplib_swqe *wqe = &fence->bind_wqe;
+       struct bnxt_re_dev *rdev = pd->rdev;
+
+       if (bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx))
+               return;
  
         memset(wqe, 0, sizeof(*wqe));
         wqe->type = BNXT_QPLIB_SWQE_TYPE_BIND_MW;
@@ -455,6 +459,9 @@ static void bnxt_re_destroy_fence_mr(struct bnxt_re_pd *pd)
         struct device *dev = &rdev->en_dev->pdev->dev;
         struct bnxt_re_mr *mr = fence->mr;
  
+       if (bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx))
+               return;
+
         if (fence->mw) {
                 bnxt_re_dealloc_mw(fence->mw);
                 fence->mw = NULL;
@@ -486,6 +493,9 @@ static int bnxt_re_create_fence_mr(struct bnxt_re_pd *pd)
         struct ib_mw *mw;
         int rc;
  
+       if (bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx))
+               return 0;
+
         dma_addr = dma_map_single(dev, fence->va, BNXT_RE_FENCE_BYTES,
                                   DMA_BIDIRECTIONAL);
         rc = dma_mapping_error(dev, dma_addr);
@@ -1817,7 +1827,7 @@ int bnxt_re_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *srq_attr,
         switch (srq_attr_mask) {
         case IB_SRQ_MAX_WR:
                 /* SRQ resize is not supported */
-               break;
+               return -EINVAL;
         case IB_SRQ_LIMIT:
                 /* Change the SRQ threshold */
                 if (srq_attr->srq_limit > srq->qplib_srq.max_wqe)
@@ -1832,13 +1842,12 @@ int bnxt_re_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *srq_attr,
                 /* On success, update the shadow */
                 srq->srq_limit = srq_attr->srq_limit;
                 /* No need to Build and send response back to udata */
-               break;
+               return 0;
         default:
                 ibdev_err(&rdev->ibdev,
                           "Unsupported srq_attr_mask 0x%x", srq_attr_mask);
                 return -EINVAL;
         }
-       return 0;
  }
  
  int bnxt_re_query_srq(struct ib_srq *ib_srq, struct ib_srq_attr *srq_attr)
@@ -2556,11 +2565,6 @@ static int bnxt_re_build_inv_wqe(const struct ib_send_wr *wr,
         wqe->type = BNXT_QPLIB_SWQE_TYPE_LOCAL_INV;
         wqe->local_inv.inv_l_key = wr->ex.invalidate_rkey;
  
-       /* Need unconditional fence for local invalidate
-        * opcode to work as expected.
-        */
-       wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE;
-
         if (wr->send_flags & IB_SEND_SIGNALED)
                 wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SIGNAL_COMP;
         if (wr->send_flags & IB_SEND_SOLICITED)
@@ -2583,12 +2587,6 @@ static int bnxt_re_build_reg_wqe(const struct ib_reg_wr *wr,
         wqe->frmr.levels = qplib_frpl->hwq.level;
         wqe->type = BNXT_QPLIB_SWQE_TYPE_REG_MR;
  
-       /* Need unconditional fence for reg_mr
-        * opcode to function as expected.
-        */
-
-       wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE;
-
         if (wr->wr.send_flags & IB_SEND_SIGNALED)
                 wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SIGNAL_COMP;
  
@@ -2719,6 +2717,18 @@ bad:
         return rc;
  }
  
+static void bnxt_re_legacy_set_uc_fence(struct bnxt_qplib_swqe *wqe)
+{
+       /* Need unconditional fence for non-wire memory opcode
+        * to work as expected.
+        */
+       if (wqe->type == BNXT_QPLIB_SWQE_TYPE_LOCAL_INV ||
+           wqe->type == BNXT_QPLIB_SWQE_TYPE_FAST_REG_MR ||
+           wqe->type == BNXT_QPLIB_SWQE_TYPE_REG_MR ||
+           wqe->type == BNXT_QPLIB_SWQE_TYPE_BIND_MW)
+               wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE;
+}
+
  int bnxt_re_post_send(struct ib_qp *ib_qp, const struct ib_send_wr *wr,
                       const struct ib_send_wr **bad_wr)
  {
@@ -2798,8 +2808,11 @@ int bnxt_re_post_send(struct ib_qp *ib_qp, const struct ib_send_wr *wr,
                         rc = -EINVAL;
                         goto bad;
                 }
-               if (!rc)
+               if (!rc) {
+                       if (!bnxt_qplib_is_chip_gen_p5_p7(qp->rdev->chip_ctx))
+                               bnxt_re_legacy_set_uc_fence(&wqe);
                         rc = bnxt_qplib_post_send(&qp->qplib_qp, &wqe);
+               }
  bad:
                 if (rc) {
                         ibdev_err(&qp->rdev->ibdev,
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c

index f022c922fae5183cb6860092e5bd0662d22f1764..54b4d2f3a5d885d1f17643a2416420cb6b805b8a 100644 (file)
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -280,9 +280,6 @@ static void bnxt_re_set_resource_limits(struct bnxt_re_dev *rdev)
  
  static void bnxt_re_vf_res_config(struct bnxt_re_dev *rdev)
  {
-
-       if (test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags))
-               return;
         rdev->num_vfs = pci_sriov_get_totalvfs(rdev->en_dev->pdev);
         if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)) {
                 bnxt_re_set_resource_limits(rdev);
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c

index c98e04fe2ddd477dd8457c09bef64c9339b992f4..439d0c7c5d0cab91e028b380435aaf898f9856c3 100644 (file)
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
@@ -744,7 +744,8 @@ int bnxt_qplib_query_srq(struct bnxt_qplib_res *res,
         bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, &sbuf, sizeof(req),
                                 sizeof(resp), 0);
         rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
-       srq->threshold = le16_to_cpu(sb->srq_limit);
+       if (!rc)
+               srq->threshold = le16_to_cpu(sb->srq_limit);
         dma_free_coherent(&rcfw->pdev->dev, sbuf.size,
                           sbuf.sb, sbuf.dma_addr);
  
diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c

index 68c621ff59d03fea9340eb50a56363d24c9bc105..5a91cbda4aee6f769385d6a4eab9aa191d0e44d4 100644 (file)
--- a/drivers/infiniband/hw/hfi1/pio.c
+++ b/drivers/infiniband/hw/hfi1/pio.c
@@ -2086,7 +2086,7 @@ int init_credit_return(struct hfi1_devdata *dd)
                                    "Unable to allocate credit return DMA range for NUMA %d\n",
                                    i);
                         ret = -ENOMEM;
-                       goto done;
+                       goto free_cr_base;
                 }
         }
         set_dev_node(&dd->pcidev->dev, dd->node);
@@ -2094,6 +2094,10 @@ int init_credit_return(struct hfi1_devdata *dd)
         ret = 0;
  done:
         return ret;
+
+free_cr_base:
+       free_credit_return(dd);
+       goto done;
  }
  
  void free_credit_return(struct hfi1_devdata *dd)
diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c

index 6e5ac2023328a7d59d42f6532113dd9a95641b31..b67d23b1f28625c5ed7a4f15f8a07a32d074199b 100644 (file)
--- a/drivers/infiniband/hw/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -3158,7 +3158,7 @@ int _pad_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx)
  {
         int rval = 0;
  
-       if ((unlikely(tx->num_desc + 1 == tx->desc_limit))) {
+       if ((unlikely(tx->num_desc == tx->desc_limit))) {
                 rval = _extend_sdma_tx_descs(dd, tx);
                 if (rval) {
                         __sdma_txclean(dd, tx);
diff --git a/drivers/infiniband/hw/irdma/defs.h b/drivers/infiniband/hw/irdma/defs.h

index 8fb752f2eda2999aed4f61bffcb53e105adde9a5..2cb4b96db7212163f1e207bb87dd0b1326ad26e1 100644 (file)
--- a/drivers/infiniband/hw/irdma/defs.h
+++ b/drivers/infiniband/hw/irdma/defs.h
@@ -346,6 +346,7 @@ enum irdma_cqp_op_type {
  #define IRDMA_AE_LLP_TOO_MANY_KEEPALIVE_RETRIES                                0x050b
  #define IRDMA_AE_LLP_DOUBT_REACHABILITY                                        0x050c
  #define IRDMA_AE_LLP_CONNECTION_ESTABLISHED                            0x050e
+#define IRDMA_AE_LLP_TOO_MANY_RNRS                                     0x050f
  #define IRDMA_AE_RESOURCE_EXHAUSTION                                   0x0520
  #define IRDMA_AE_RESET_SENT                                            0x0601
  #define IRDMA_AE_TERMINATE_SENT                                                0x0602
diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c

index bd4b2b89644442341226e6c5716f5ddb221ea1a1..ad50b77282f8a1b5352e390080d208d0086152eb 100644 (file)
--- a/drivers/infiniband/hw/irdma/hw.c
+++ b/drivers/infiniband/hw/irdma/hw.c
@@ -387,6 +387,7 @@ static void irdma_process_aeq(struct irdma_pci_f *rf)
                 case IRDMA_AE_LLP_TOO_MANY_RETRIES:
                 case IRDMA_AE_LCE_QP_CATASTROPHIC:
                 case IRDMA_AE_LCE_FUNCTION_CATASTROPHIC:
+               case IRDMA_AE_LLP_TOO_MANY_RNRS:
                 case IRDMA_AE_LCE_CQ_CATASTROPHIC:
                 case IRDMA_AE_UDA_XMIT_DGRAM_TOO_LONG:
                 default:
@@ -570,6 +571,13 @@ static void irdma_destroy_irq(struct irdma_pci_f *rf,
         dev->irq_ops->irdma_dis_irq(dev, msix_vec->idx);
         irq_update_affinity_hint(msix_vec->irq, NULL);
         free_irq(msix_vec->irq, dev_id);
+       if (rf == dev_id) {
+               tasklet_kill(&rf->dpc_tasklet);
+       } else {
+               struct irdma_ceq *iwceq = (struct irdma_ceq *)dev_id;
+
+               tasklet_kill(&iwceq->dpc_tasklet);
+       }
  }
  
  /**
diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c

index b5eb8d421988c1abd73cf4eb3a93adc6f2944089..0b046c061742be140251785f60ac25cff73aa2ba 100644 (file)
--- a/drivers/infiniband/hw/irdma/verbs.c
+++ b/drivers/infiniband/hw/irdma/verbs.c
@@ -839,7 +839,9 @@ static int irdma_validate_qp_attrs(struct ib_qp_init_attr *init_attr,
  
         if (init_attr->cap.max_inline_data > uk_attrs->max_hw_inline ||
             init_attr->cap.max_send_sge > uk_attrs->max_hw_wq_frags ||
-           init_attr->cap.max_recv_sge > uk_attrs->max_hw_wq_frags)
+           init_attr->cap.max_recv_sge > uk_attrs->max_hw_wq_frags ||
+           init_attr->cap.max_send_wr > uk_attrs->max_hw_wq_quanta ||
+           init_attr->cap.max_recv_wr > uk_attrs->max_hw_rq_quanta)
                 return -EINVAL;
  
         if (rdma_protocol_roce(&iwdev->ibdev, 1)) {
@@ -2184,9 +2186,8 @@ static int irdma_create_cq(struct ib_cq *ibcq,
                 info.cq_base_pa = iwcq->kmem.pa;
         }
  
-       if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2)
-               info.shadow_read_threshold = min(info.cq_uk_init_info.cq_size / 2,
-                                                (u32)IRDMA_MAX_CQ_READ_THRESH);
+       info.shadow_read_threshold = min(info.cq_uk_init_info.cq_size / 2,
+                                        (u32)IRDMA_MAX_CQ_READ_THRESH);
  
         if (irdma_sc_cq_init(cq, &info)) {
                 ibdev_dbg(&iwdev->ibdev, "VERBS: init cq fail\n");
diff --git a/drivers/infiniband/hw/mlx5/cong.c b/drivers/infiniband/hw/mlx5/cong.c

index f87531318feb807c7c5a216c991e10f197e9f8f4..a78a067e3ce7f3abd260c09f552562050b7b78cc 100644 (file)
--- a/drivers/infiniband/hw/mlx5/cong.c
+++ b/drivers/infiniband/hw/mlx5/cong.c
@@ -458,6 +458,12 @@ void mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u32 port_num)
         dbg_cc_params->root = debugfs_create_dir("cc_params", mlx5_debugfs_get_dev_root(mdev));
  
         for (i = 0; i < MLX5_IB_DBG_CC_MAX; i++) {
+               if ((i == MLX5_IB_DBG_CC_GENERAL_RTT_RESP_DSCP_VALID ||
+                    i == MLX5_IB_DBG_CC_GENERAL_RTT_RESP_DSCP))
+                       if (!MLX5_CAP_GEN(mdev, roce) ||
+                           !MLX5_CAP_ROCE(mdev, roce_cc_general))
+                               continue;
+
                 dbg_cc_params->params[i].offset = i;
                 dbg_cc_params->params[i].dev = dev;
                 dbg_cc_params->params[i].port_num = port_num;
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c

index 869369cb5b5fa4745aaca7bc5eb7032e684bb132..253fea374a72de1d1143b82601da2ce9caf1cf1f 100644 (file)
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -2949,7 +2949,7 @@ DECLARE_UVERBS_NAMED_METHOD(
         MLX5_IB_METHOD_DEVX_OBJ_MODIFY,
         UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE,
                         UVERBS_IDR_ANY_OBJECT,
-                       UVERBS_ACCESS_WRITE,
+                       UVERBS_ACCESS_READ,
                         UA_MANDATORY),
         UVERBS_ATTR_PTR_IN(
                 MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN,
diff --git a/drivers/infiniband/hw/mlx5/wr.c b/drivers/infiniband/hw/mlx5/wr.c

index df1d1b0a3ef72bfc938c6cb61b5589e5ef7b7ff4..9947feb7fb8a0bcd1ecf9e5d136e9ea7e326e8e7 100644 (file)
--- a/drivers/infiniband/hw/mlx5/wr.c
+++ b/drivers/infiniband/hw/mlx5/wr.c
@@ -78,7 +78,7 @@ static void set_eth_seg(const struct ib_send_wr *wr, struct mlx5_ib_qp *qp,
                  */
                 copysz = min_t(u64, *cur_edge - (void *)eseg->inline_hdr.start,
                                left);
-               memcpy(eseg->inline_hdr.start, pdata, copysz);
+               memcpy(eseg->inline_hdr.data, pdata, copysz);
                 stride = ALIGN(sizeof(struct mlx5_wqe_eth_seg) -
                                sizeof(eseg->inline_hdr.start) + copysz, 16);
                 *size += stride / 16;
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c

index 7887a6786ed43d6917a97b2dfbd8770c49383fbd..f118ce0a9a617b4226d0195048299827f2a11d37 100644 (file)
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -1879,8 +1879,17 @@ static int qedr_create_user_qp(struct qedr_dev *dev,
                 /* RQ - read access only (0) */
                 rc = qedr_init_user_queue(udata, dev, &qp->urq, ureq.rq_addr,
                                           ureq.rq_len, true, 0, alloc_and_init);
-               if (rc)
+               if (rc) {
+                       ib_umem_release(qp->usq.umem);
+                       qp->usq.umem = NULL;
+                       if (rdma_protocol_roce(&dev->ibdev, 1)) {
+                               qedr_free_pbl(dev, &qp->usq.pbl_info,
+                                             qp->usq.pbl_tbl);
+                       } else {
+                               kfree(qp->usq.pbl_tbl);
+                       }
                         return rc;
+               }
         }
  
         memset(&in_params, 0, sizeof(in_params));
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c

index 58f70cfec45a72abd8df2ba88098a92f7fcacb4a..040234c01be4d5a0cc6fb4a4124af4752f58e181 100644 (file)
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -79,12 +79,16 @@ module_param(srpt_srq_size, int, 0444);
  MODULE_PARM_DESC(srpt_srq_size,
                  "Shared receive queue (SRQ) size.");
  
+static int srpt_set_u64_x(const char *buffer, const struct kernel_param *kp)
+{
+       return kstrtou64(buffer, 16, (u64 *)kp->arg);
+}
  static int srpt_get_u64_x(char *buffer, const struct kernel_param *kp)
  {
         return sprintf(buffer, "0x%016llx\n", *(u64 *)kp->arg);
  }
-module_param_call(srpt_service_guid, NULL, srpt_get_u64_x, &srpt_service_guid,
-                 0444);
+module_param_call(srpt_service_guid, srpt_set_u64_x, srpt_get_u64_x,
+                 &srpt_service_guid, 0444);
  MODULE_PARM_DESC(srpt_service_guid,
                  "Using this value for ioc_guid, id_ext, and cm_listen_id instead of using the node_guid of the first HCA.");
  
@@ -210,10 +214,12 @@ static const char *get_ch_state_name(enum rdma_ch_state s)
  /**
   * srpt_qp_event - QP event callback function
   * @event: Description of the event that occurred.
- * @ch: SRPT RDMA channel.
+ * @ptr: SRPT RDMA channel.
   */
-static void srpt_qp_event(struct ib_event *event, struct srpt_rdma_ch *ch)
+static void srpt_qp_event(struct ib_event *event, void *ptr)
  {
+       struct srpt_rdma_ch *ch = ptr;
+
         pr_debug("QP event %d on ch=%p sess_name=%s-%d state=%s\n",
                  event->event, ch, ch->sess_name, ch->qp->qp_num,
                  get_ch_state_name(ch->state));
@@ -1807,8 +1813,7 @@ retry:
         ch->cq_size = ch->rq_size + sq_size;
  
         qp_init->qp_context = (void *)ch;
-       qp_init->event_handler
-               = (void(*)(struct ib_event *, void*))srpt_qp_event;
+       qp_init->event_handler = srpt_qp_event;
         qp_init->send_cq = ch->cq;
         qp_init->recv_cq = ch->cq;
         qp_init->sq_sig_type = IB_SIGNAL_REQ_WR;
diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c

index b1244d7df6cc9e097a11257f9bb530b8954e3c12..7c4b2a5cc1b54a1c98a92b38076df6a7b0424b49 100644 (file)
--- a/drivers/input/joystick/xpad.c
+++ b/drivers/input/joystick/xpad.c
@@ -294,6 +294,7 @@ static const struct xpad_device {
         { 0x1689, 0xfd00, "Razer Onza Tournament Edition", 0, XTYPE_XBOX360 },
         { 0x1689, 0xfd01, "Razer Onza Classic Edition", 0, XTYPE_XBOX360 },
         { 0x1689, 0xfe00, "Razer Sabertooth", 0, XTYPE_XBOX360 },
+       { 0x17ef, 0x6182, "Lenovo Legion Controller for Windows", 0, XTYPE_XBOX360 },
         { 0x1949, 0x041a, "Amazon Game Controller", 0, XTYPE_XBOX360 },
         { 0x1bad, 0x0002, "Harmonix Rock Band Guitar", 0, XTYPE_XBOX360 },
         { 0x1bad, 0x0003, "Harmonix Rock Band Drumkit", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 },
@@ -491,6 +492,7 @@ static const struct usb_device_id xpad_table[] = {
         XPAD_XBOX360_VENDOR(0x15e4),            /* Numark Xbox 360 controllers */
         XPAD_XBOX360_VENDOR(0x162e),            /* Joytech Xbox 360 controllers */
         XPAD_XBOX360_VENDOR(0x1689),            /* Razer Onza */
+       XPAD_XBOX360_VENDOR(0x17ef),            /* Lenovo */
         XPAD_XBOX360_VENDOR(0x1949),            /* Amazon controllers */
         XPAD_XBOX360_VENDOR(0x1bad),            /* Harmonix Rock Band guitar and drums */
         XPAD_XBOX360_VENDOR(0x20d6),            /* PowerA controllers */
diff --git a/drivers/input/keyboard/atkbd.c b/drivers/input/keyboard/atkbd.c

index 13ef6284223da30940e5a37802d04a104d2692f6..7f67f9f2946b484317575d529ee35a385fc2882e 100644 (file)
--- a/drivers/input/keyboard/atkbd.c
+++ b/drivers/input/keyboard/atkbd.c
@@ -811,7 +811,6 @@ static int atkbd_probe(struct atkbd *atkbd)
  {
         struct ps2dev *ps2dev = &atkbd->ps2dev;
         unsigned char param[2];
-       bool skip_getid;
  
  /*
   * Some systems, where the bit-twiddling when testing the io-lines of the
@@ -825,6 +824,11 @@ static int atkbd_probe(struct atkbd *atkbd)
                                  "keyboard reset failed on %s\n",
                                  ps2dev->serio->phys);
  
+       if (atkbd_skip_getid(atkbd)) {
+               atkbd->id = 0xab83;
+               goto deactivate_kbd;
+       }
+
  /*
   * Then we check the keyboard ID. We should get 0xab83 under normal conditions.
   * Some keyboards report different values, but the first byte is always 0xab or
@@ -833,18 +837,17 @@ static int atkbd_probe(struct atkbd *atkbd)
   */
  
         param[0] = param[1] = 0xa5;     /* initialize with invalid values */
-       skip_getid = atkbd_skip_getid(atkbd);
-       if (skip_getid || ps2_command(ps2dev, param, ATKBD_CMD_GETID)) {
+       if (ps2_command(ps2dev, param, ATKBD_CMD_GETID)) {
  
  /*
- * If the get ID command was skipped or failed, we check if we can at least set
+ * If the get ID command failed, we check if we can at least set
   * the LEDs on the keyboard. This should work on every keyboard out there.
   * It also turns the LEDs off, which we want anyway.
   */
                 param[0] = 0;
                 if (ps2_command(ps2dev, param, ATKBD_CMD_SETLEDS))
                         return -1;
-               atkbd->id = skip_getid ? 0xab83 : 0xabba;
+               atkbd->id = 0xabba;
                 return 0;
         }
  
@@ -860,6 +863,7 @@ static int atkbd_probe(struct atkbd *atkbd)
                 return -1;
         }
  
+deactivate_kbd:
  /*
   * Make sure nothing is coming from the keyboard and disturbs our
   * internal state.
diff --git a/drivers/input/mouse/bcm5974.c b/drivers/input/mouse/bcm5974.c

index ca150618d32f1863795f390b4ebf4687ea0e36c1..953992b458e9f2c46900204e926da7c665468709 100644 (file)
--- a/drivers/input/mouse/bcm5974.c
+++ b/drivers/input/mouse/bcm5974.c
@@ -19,6 +19,7 @@
   * Copyright (C) 2006     Nicolas Boichat (nicolas@boichat.ch)
   */
  
+#include "linux/usb.h"
  #include <linux/kernel.h>
  #include <linux/errno.h>
  #include <linux/slab.h>
@@ -193,6 +194,8 @@ enum tp_type {
  
  /* list of device capability bits */
  #define HAS_INTEGRATED_BUTTON  1
+/* maximum number of supported endpoints (currently trackpad and button) */
+#define MAX_ENDPOINTS  2
  
  /* trackpad finger data block size */
  #define FSIZE_TYPE1            (14 * sizeof(__le16))
@@ -891,6 +894,18 @@ static int bcm5974_resume(struct usb_interface *iface)
         return error;
  }
  
+static bool bcm5974_check_endpoints(struct usb_interface *iface,
+                                   const struct bcm5974_config *cfg)
+{
+       u8 ep_addr[MAX_ENDPOINTS + 1] = {0};
+
+       ep_addr[0] = cfg->tp_ep;
+       if (cfg->tp_type == TYPE1)
+               ep_addr[1] = cfg->bt_ep;
+
+       return usb_check_int_endpoints(iface, ep_addr);
+}
+
  static int bcm5974_probe(struct usb_interface *iface,
                          const struct usb_device_id *id)
  {
@@ -903,6 +918,11 @@ static int bcm5974_probe(struct usb_interface *iface,
         /* find the product index */
         cfg = bcm5974_get_config(udev);
  
+       if (!bcm5974_check_endpoints(iface, cfg)) {
+               dev_err(&iface->dev, "Unexpected non-int endpoint\n");
+               return -ENODEV;
+       }
+
         /* allocate memory for our device state and initialize it */
         dev = kzalloc(sizeof(struct bcm5974), GFP_KERNEL);
         input_dev = input_allocate_device();
diff --git a/drivers/input/serio/i8042-acpipnpio.h b/drivers/input/serio/i8042-acpipnpio.h

index b585b1dab870e0725daa62d7b52d2c9ca406798a..dfc6c581873b7d45da63d88a216295a24fa2c13b 100644 (file)
--- a/drivers/input/serio/i8042-acpipnpio.h
+++ b/drivers/input/serio/i8042-acpipnpio.h
@@ -634,6 +634,14 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = {
                 },
                 .driver_data = (void *)(SERIO_QUIRK_NOAUX)
         },
+       {
+               /* Fujitsu Lifebook U728 */
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK U728"),
+               },
+               .driver_data = (void *)(SERIO_QUIRK_NOAUX)
+       },
         {
                 /* Gigabyte M912 */
                 .matches = {
@@ -1208,6 +1216,12 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = {
                                         SERIO_QUIRK_RESET_ALWAYS | SERIO_QUIRK_NOLOOP |
                                         SERIO_QUIRK_NOPNP)
         },
+       {
+               .matches = {
+                       DMI_MATCH(DMI_BOARD_NAME, "NS5x_7xPU"),
+               },
+               .driver_data = (void *)(SERIO_QUIRK_NOAUX)
+       },
         {
                 .matches = {
                         DMI_MATCH(DMI_BOARD_NAME, "NJ50_70CU"),
diff --git a/drivers/input/touchscreen/goodix.c b/drivers/input/touchscreen/goodix.c

index af32fbe57b630373f6fd8b67e3129be1d54de1d1..b068ff8afbc9ad3ba62b70cbbee20feb572c3855 100644 (file)
--- a/drivers/input/touchscreen/goodix.c
+++ b/drivers/input/touchscreen/goodix.c
@@ -884,7 +884,8 @@ static int goodix_add_acpi_gpio_mappings(struct goodix_ts_data *ts)
                 }
         }
  
-       if (ts->gpio_count == 2 && ts->gpio_int_idx == 0) {
+       /* Some devices with gpio_int_idx 0 list a third unused GPIO */
+       if ((ts->gpio_count == 2 || ts->gpio_count == 3) && ts->gpio_int_idx == 0) {
                 ts->irq_pin_access_method = IRQ_PIN_ACCESS_ACPI_GPIO;
                 gpio_mapping = acpi_goodix_int_first_gpios;
         } else if (ts->gpio_count == 2 && ts->gpio_int_idx == 1) {
diff --git a/drivers/interconnect/qcom/sc8180x.c b/drivers/interconnect/qcom/sc8180x.c

index 20331e119beb694945b196df9fb2c7efff60feda..03d626776ba17a3ff18c91c1e685a0230e8fcbbb 100644 (file)
--- a/drivers/interconnect/qcom/sc8180x.c
+++ b/drivers/interconnect/qcom/sc8180x.c
@@ -1372,6 +1372,7 @@ static struct qcom_icc_bcm bcm_mm0 = {
  
  static struct qcom_icc_bcm bcm_co0 = {
         .name = "CO0",
+       .keepalive = true,
         .num_nodes = 1,
         .nodes = { &slv_qns_cdsp_mem_noc }
  };
diff --git a/drivers/interconnect/qcom/sm8550.c b/drivers/interconnect/qcom/sm8550.c

index 629faa4c9aaee280e7514695dcd2c96e9125d1dd..fc22cecf650fc4eedaf3970a6a8f025f7e9d849e 100644 (file)
--- a/drivers/interconnect/qcom/sm8550.c
+++ b/drivers/interconnect/qcom/sm8550.c
@@ -2223,6 +2223,7 @@ static struct platform_driver qnoc_driver = {
         .driver = {
                 .name = "qnoc-sm8550",
                 .of_match_table = qnoc_of_match,
+               .sync_state = icc_sync_state,
         },
  };
  
diff --git a/drivers/interconnect/qcom/sm8650.c b/drivers/interconnect/qcom/sm8650.c

index b83de54577b6874553624390e0e0145fd966a38a..b962e6c233ef78ed3ed44cf0b0777bb62fbd50a6 100644 (file)
--- a/drivers/interconnect/qcom/sm8650.c
+++ b/drivers/interconnect/qcom/sm8650.c
@@ -1160,7 +1160,7 @@ static struct qcom_icc_node qns_gemnoc_sf = {
  
  static struct qcom_icc_bcm bcm_acv = {
         .name = "ACV",
-       .enable_mask = BIT(3),
+       .enable_mask = BIT(0),
         .num_nodes = 1,
         .nodes = { &ebi },
  };
diff --git a/drivers/interconnect/qcom/x1e80100.c b/drivers/interconnect/qcom/x1e80100.c

index d19501d913b39c696a337ff4f5d3a54aa07915c4..cbaf4f9c41be656212b50dce683273911e1e1cd6 100644 (file)
--- a/drivers/interconnect/qcom/x1e80100.c
+++ b/drivers/interconnect/qcom/x1e80100.c
@@ -1586,6 +1586,7 @@ static struct qcom_icc_node qns_pcie_south_gem_noc_pcie = {
  
  static struct qcom_icc_bcm bcm_acv = {
         .name = "ACV",
+       .enable_mask = BIT(3),
         .num_nodes = 1,
         .nodes = { &ebi },
  };
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c

index 05722121f00e70689680ce7a45cc5e953f50210b..4a27fbdb2d8446cb6af2b0e287580615c7da47c1 100644 (file)
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
@@ -292,10 +292,8 @@ arm_smmu_mmu_notifier_get(struct arm_smmu_domain *smmu_domain,
                           struct mm_struct *mm)
  {
         int ret;
-       unsigned long flags;
         struct arm_smmu_ctx_desc *cd;
         struct arm_smmu_mmu_notifier *smmu_mn;
-       struct arm_smmu_master *master;
  
         list_for_each_entry(smmu_mn, &smmu_domain->mmu_notifiers, list) {
                 if (smmu_mn->mn.mm == mm) {
@@ -325,28 +323,9 @@ arm_smmu_mmu_notifier_get(struct arm_smmu_domain *smmu_domain,
                 goto err_free_cd;
         }
  
-       spin_lock_irqsave(&smmu_domain->devices_lock, flags);
-       list_for_each_entry(master, &smmu_domain->devices, domain_head) {
-               ret = arm_smmu_write_ctx_desc(master, mm_get_enqcmd_pasid(mm),
-                                             cd);
-               if (ret) {
-                       list_for_each_entry_from_reverse(
-                               master, &smmu_domain->devices, domain_head)
-                               arm_smmu_write_ctx_desc(
-                                       master, mm_get_enqcmd_pasid(mm), NULL);
-                       break;
-               }
-       }
-       spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
-       if (ret)
-               goto err_put_notifier;
-
         list_add(&smmu_mn->list, &smmu_domain->mmu_notifiers);
         return smmu_mn;
  
-err_put_notifier:
-       /* Frees smmu_mn */
-       mmu_notifier_put(&smmu_mn->mn);
  err_free_cd:
         arm_smmu_free_shared_cd(cd);
         return ERR_PTR(ret);
@@ -363,9 +342,6 @@ static void arm_smmu_mmu_notifier_put(struct arm_smmu_mmu_notifier *smmu_mn)
  
         list_del(&smmu_mn->list);
  
-       arm_smmu_update_ctx_desc_devices(smmu_domain, mm_get_enqcmd_pasid(mm),
-                                        NULL);
-
         /*
          * If we went through clear(), we've already invalidated, and no
          * new TLB entry can have been formed.
@@ -381,7 +357,8 @@ static void arm_smmu_mmu_notifier_put(struct arm_smmu_mmu_notifier *smmu_mn)
         arm_smmu_free_shared_cd(cd);
  }
  
-static int __arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm)
+static int __arm_smmu_sva_bind(struct device *dev, ioasid_t pasid,
+                              struct mm_struct *mm)
  {
         int ret;
         struct arm_smmu_bond *bond;
@@ -404,9 +381,15 @@ static int __arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm)
                 goto err_free_bond;
         }
  
+       ret = arm_smmu_write_ctx_desc(master, pasid, bond->smmu_mn->cd);
+       if (ret)
+               goto err_put_notifier;
+
         list_add(&bond->list, &master->bonds);
         return 0;
  
+err_put_notifier:
+       arm_smmu_mmu_notifier_put(bond->smmu_mn);
  err_free_bond:
         kfree(bond);
         return ret;
@@ -568,6 +551,9 @@ void arm_smmu_sva_remove_dev_pasid(struct iommu_domain *domain,
         struct arm_smmu_master *master = dev_iommu_priv_get(dev);
  
         mutex_lock(&sva_lock);
+
+       arm_smmu_write_ctx_desc(master, id, NULL);
+
         list_for_each_entry(t, &master->bonds, list) {
                 if (t->mm == mm) {
                         bond = t;
@@ -590,7 +576,7 @@ static int arm_smmu_sva_set_dev_pasid(struct iommu_domain *domain,
         struct mm_struct *mm = domain->mm;
  
         mutex_lock(&sva_lock);
-       ret = __arm_smmu_sva_bind(dev, mm);
+       ret = __arm_smmu_sva_bind(dev, id, mm);
         mutex_unlock(&sva_lock);
  
         return ret;
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c

index 68b6bc5e7c71016b8d58a6a077e921b27fb51447..6317aaf7b3ab1c7bed6f5f33b9a4bdca14cc171e 100644 (file)
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
@@ -859,10 +859,14 @@ static void arm_smmu_destroy_domain_context(struct arm_smmu_domain *smmu_domain)
         arm_smmu_rpm_put(smmu);
  }
  
-static struct iommu_domain *arm_smmu_domain_alloc_paging(struct device *dev)
+static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
  {
         struct arm_smmu_domain *smmu_domain;
  
+       if (type != IOMMU_DOMAIN_UNMANAGED) {
+               if (using_legacy_binding || type != IOMMU_DOMAIN_DMA)
+                       return NULL;
+       }
         /*
          * Allocate the domain and initialise some of its data structures.
          * We can't really do anything meaningful until we've added a
@@ -875,15 +879,6 @@ static struct iommu_domain *arm_smmu_domain_alloc_paging(struct device *dev)
         mutex_init(&smmu_domain->init_mutex);
         spin_lock_init(&smmu_domain->cb_lock);
  
-       if (dev) {
-               struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
-
-               if (arm_smmu_init_domain_context(smmu_domain, cfg->smmu, dev)) {
-                       kfree(smmu_domain);
-                       return NULL;
-               }
-       }
-
         return &smmu_domain->domain;
  }
  
@@ -1600,7 +1595,7 @@ static struct iommu_ops arm_smmu_ops = {
         .identity_domain        = &arm_smmu_identity_domain,
         .blocked_domain         = &arm_smmu_blocked_domain,
         .capable                = arm_smmu_capable,
-       .domain_alloc_paging    = arm_smmu_domain_alloc_paging,
+       .domain_alloc           = arm_smmu_domain_alloc,
         .probe_device           = arm_smmu_probe_device,
         .release_device         = arm_smmu_release_device,
         .probe_finalize         = arm_smmu_probe_finalize,
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c

index 6fb5f6fceea11fb7865d92d8451a5de98a655556..11652e0bcab3a6e3113c70fb80971853df012f57 100644 (file)
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -396,8 +396,6 @@ static int domain_update_device_node(struct dmar_domain *domain)
         return nid;
  }
  
-static void domain_update_iotlb(struct dmar_domain *domain);
-
  /* Return the super pagesize bitmap if supported. */
  static unsigned long domain_super_pgsize_bitmap(struct dmar_domain *domain)
  {
@@ -1218,7 +1216,7 @@ domain_lookup_dev_info(struct dmar_domain *domain,
         return NULL;
  }
  
-static void domain_update_iotlb(struct dmar_domain *domain)
+void domain_update_iotlb(struct dmar_domain *domain)
  {
         struct dev_pasid_info *dev_pasid;
         struct device_domain_info *info;
@@ -1368,6 +1366,46 @@ static void domain_flush_pasid_iotlb(struct intel_iommu *iommu,
         spin_unlock_irqrestore(&domain->lock, flags);
  }
  
+static void __iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
+                                   unsigned long pfn, unsigned int pages,
+                                   int ih)
+{
+       unsigned int aligned_pages = __roundup_pow_of_two(pages);
+       unsigned long bitmask = aligned_pages - 1;
+       unsigned int mask = ilog2(aligned_pages);
+       u64 addr = (u64)pfn << VTD_PAGE_SHIFT;
+
+       /*
+        * PSI masks the low order bits of the base address. If the
+        * address isn't aligned to the mask, then compute a mask value
+        * needed to ensure the target range is flushed.
+        */
+       if (unlikely(bitmask & pfn)) {
+               unsigned long end_pfn = pfn + pages - 1, shared_bits;
+
+               /*
+                * Since end_pfn <= pfn + bitmask, the only way bits
+                * higher than bitmask can differ in pfn and end_pfn is
+                * by carrying. This means after masking out bitmask,
+                * high bits starting with the first set bit in
+                * shared_bits are all equal in both pfn and end_pfn.
+                */
+               shared_bits = ~(pfn ^ end_pfn) & ~bitmask;
+               mask = shared_bits ? __ffs(shared_bits) : BITS_PER_LONG;
+       }
+
+       /*
+        * Fallback to domain selective flush if no PSI support or
+        * the size is too big.
+        */
+       if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
+               iommu->flush.flush_iotlb(iommu, did, 0, 0,
+                                        DMA_TLB_DSI_FLUSH);
+       else
+               iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
+                                        DMA_TLB_PSI_FLUSH);
+}
+
  static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
                                   struct dmar_domain *domain,
                                   unsigned long pfn, unsigned int pages,
@@ -1384,42 +1422,10 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
         if (ih)
                 ih = 1 << 6;
  
-       if (domain->use_first_level) {
+       if (domain->use_first_level)
                 domain_flush_pasid_iotlb(iommu, domain, addr, pages, ih);
-       } else {
-               unsigned long bitmask = aligned_pages - 1;
-
-               /*
-                * PSI masks the low order bits of the base address. If the
-                * address isn't aligned to the mask, then compute a mask value
-                * needed to ensure the target range is flushed.
-                */
-               if (unlikely(bitmask & pfn)) {
-                       unsigned long end_pfn = pfn + pages - 1, shared_bits;
-
-                       /*
-                        * Since end_pfn <= pfn + bitmask, the only way bits
-                        * higher than bitmask can differ in pfn and end_pfn is
-                        * by carrying. This means after masking out bitmask,
-                        * high bits starting with the first set bit in
-                        * shared_bits are all equal in both pfn and end_pfn.
-                        */
-                       shared_bits = ~(pfn ^ end_pfn) & ~bitmask;
-                       mask = shared_bits ? __ffs(shared_bits) : BITS_PER_LONG;
-               }
-
-               /*
-                * Fallback to domain selective flush if no PSI support or
-                * the size is too big.
-                */
-               if (!cap_pgsel_inv(iommu->cap) ||
-                   mask > cap_max_amask_val(iommu->cap))
-                       iommu->flush.flush_iotlb(iommu, did, 0, 0,
-                                                       DMA_TLB_DSI_FLUSH);
-               else
-                       iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
-                                                       DMA_TLB_PSI_FLUSH);
-       }
+       else
+               __iommu_flush_iotlb_psi(iommu, did, pfn, pages, ih);
  
         /*
          * In caching mode, changes of pages from non-present to present require
@@ -1443,6 +1449,46 @@ static void __mapping_notify_one(struct intel_iommu *iommu, struct dmar_domain *
                 iommu_flush_write_buffer(iommu);
  }
  
+/*
+ * Flush the relevant caches in nested translation if the domain
+ * also serves as a parent
+ */
+static void parent_domain_flush(struct dmar_domain *domain,
+                               unsigned long pfn,
+                               unsigned long pages, int ih)
+{
+       struct dmar_domain *s1_domain;
+
+       spin_lock(&domain->s1_lock);
+       list_for_each_entry(s1_domain, &domain->s1_domains, s2_link) {
+               struct device_domain_info *device_info;
+               struct iommu_domain_info *info;
+               unsigned long flags;
+               unsigned long i;
+
+               xa_for_each(&s1_domain->iommu_array, i, info)
+                       __iommu_flush_iotlb_psi(info->iommu, info->did,
+                                               pfn, pages, ih);
+
+               if (!s1_domain->has_iotlb_device)
+                       continue;
+
+               spin_lock_irqsave(&s1_domain->lock, flags);
+               list_for_each_entry(device_info, &s1_domain->devices, link)
+                       /*
+                        * Address translation cache in device side caches the
+                        * result of nested translation. There is no easy way
+                        * to identify the exact set of nested translations
+                        * affected by a change in S2. So just flush the entire
+                        * device cache.
+                        */
+                       __iommu_flush_dev_iotlb(device_info, 0,
+                                               MAX_AGAW_PFN_WIDTH);
+               spin_unlock_irqrestore(&s1_domain->lock, flags);
+       }
+       spin_unlock(&domain->s1_lock);
+}
+
  static void intel_flush_iotlb_all(struct iommu_domain *domain)
  {
         struct dmar_domain *dmar_domain = to_dmar_domain(domain);
@@ -1462,6 +1508,9 @@ static void intel_flush_iotlb_all(struct iommu_domain *domain)
                 if (!cap_caching_mode(iommu->cap))
                         iommu_flush_dev_iotlb(dmar_domain, 0, MAX_AGAW_PFN_WIDTH);
         }
+
+       if (dmar_domain->nested_parent)
+               parent_domain_flush(dmar_domain, 0, -1, 0);
  }
  
  static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
@@ -1985,6 +2034,9 @@ static void switch_to_super_page(struct dmar_domain *domain,
                                 iommu_flush_iotlb_psi(info->iommu, domain,
                                                       start_pfn, lvl_pages,
                                                       0, 0);
+                       if (domain->nested_parent)
+                               parent_domain_flush(domain, start_pfn,
+                                                   lvl_pages, 0);
                 }
  
                 pte++;
@@ -3883,6 +3935,7 @@ intel_iommu_domain_alloc_user(struct device *dev, u32 flags,
         bool dirty_tracking = flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
         bool nested_parent = flags & IOMMU_HWPT_ALLOC_NEST_PARENT;
         struct intel_iommu *iommu = info->iommu;
+       struct dmar_domain *dmar_domain;
         struct iommu_domain *domain;
  
         /* Must be NESTING domain */
@@ -3908,11 +3961,16 @@ intel_iommu_domain_alloc_user(struct device *dev, u32 flags,
         if (!domain)
                 return ERR_PTR(-ENOMEM);
  
-       if (nested_parent)
-               to_dmar_domain(domain)->nested_parent = true;
+       dmar_domain = to_dmar_domain(domain);
+
+       if (nested_parent) {
+               dmar_domain->nested_parent = true;
+               INIT_LIST_HEAD(&dmar_domain->s1_domains);
+               spin_lock_init(&dmar_domain->s1_lock);
+       }
  
         if (dirty_tracking) {
-               if (to_dmar_domain(domain)->use_first_level) {
+               if (dmar_domain->use_first_level) {
                         iommu_domain_free(domain);
                         return ERR_PTR(-EOPNOTSUPP);
                 }
@@ -3924,8 +3982,12 @@ intel_iommu_domain_alloc_user(struct device *dev, u32 flags,
  
  static void intel_iommu_domain_free(struct iommu_domain *domain)
  {
+       struct dmar_domain *dmar_domain = to_dmar_domain(domain);
+
+       WARN_ON(dmar_domain->nested_parent &&
+               !list_empty(&dmar_domain->s1_domains));
         if (domain != &si_domain->domain)
-               domain_exit(to_dmar_domain(domain));
+               domain_exit(dmar_domain);
  }
  
  int prepare_domain_attach_device(struct iommu_domain *domain,
@@ -4107,6 +4169,9 @@ static void intel_iommu_tlb_sync(struct iommu_domain *domain,
                                       start_pfn, nrpages,
                                       list_empty(&gather->freelist), 0);
  
+       if (dmar_domain->nested_parent)
+               parent_domain_flush(dmar_domain, start_pfn, nrpages,
+                                   list_empty(&gather->freelist));
         put_pages_list(&gather->freelist);
  }
  
@@ -4664,21 +4729,70 @@ static void *intel_iommu_hw_info(struct device *dev, u32 *length, u32 *type)
         return vtd;
  }
  
+/*
+ * Set dirty tracking for the device list of a domain. The caller must
+ * hold the domain->lock when calling it.
+ */
+static int device_set_dirty_tracking(struct list_head *devices, bool enable)
+{
+       struct device_domain_info *info;
+       int ret = 0;
+
+       list_for_each_entry(info, devices, link) {
+               ret = intel_pasid_setup_dirty_tracking(info->iommu, info->dev,
+                                                      IOMMU_NO_PASID, enable);
+               if (ret)
+                       break;
+       }
+
+       return ret;
+}
+
+static int parent_domain_set_dirty_tracking(struct dmar_domain *domain,
+                                           bool enable)
+{
+       struct dmar_domain *s1_domain;
+       unsigned long flags;
+       int ret;
+
+       spin_lock(&domain->s1_lock);
+       list_for_each_entry(s1_domain, &domain->s1_domains, s2_link) {
+               spin_lock_irqsave(&s1_domain->lock, flags);
+               ret = device_set_dirty_tracking(&s1_domain->devices, enable);
+               spin_unlock_irqrestore(&s1_domain->lock, flags);
+               if (ret)
+                       goto err_unwind;
+       }
+       spin_unlock(&domain->s1_lock);
+       return 0;
+
+err_unwind:
+       list_for_each_entry(s1_domain, &domain->s1_domains, s2_link) {
+               spin_lock_irqsave(&s1_domain->lock, flags);
+               device_set_dirty_tracking(&s1_domain->devices,
+                                         domain->dirty_tracking);
+               spin_unlock_irqrestore(&s1_domain->lock, flags);
+       }
+       spin_unlock(&domain->s1_lock);
+       return ret;
+}
+
  static int intel_iommu_set_dirty_tracking(struct iommu_domain *domain,
                                           bool enable)
  {
         struct dmar_domain *dmar_domain = to_dmar_domain(domain);
-       struct device_domain_info *info;
         int ret;
  
         spin_lock(&dmar_domain->lock);
         if (dmar_domain->dirty_tracking == enable)
                 goto out_unlock;
  
-       list_for_each_entry(info, &dmar_domain->devices, link) {
-               ret = intel_pasid_setup_dirty_tracking(info->iommu,
-                                                      info->domain, info->dev,
-                                                      IOMMU_NO_PASID, enable);
+       ret = device_set_dirty_tracking(&dmar_domain->devices, enable);
+       if (ret)
+               goto err_unwind;
+
+       if (dmar_domain->nested_parent) {
+               ret = parent_domain_set_dirty_tracking(dmar_domain, enable);
                 if (ret)
                         goto err_unwind;
         }
@@ -4690,10 +4804,8 @@ out_unlock:
         return 0;
  
  err_unwind:
-       list_for_each_entry(info, &dmar_domain->devices, link)
-               intel_pasid_setup_dirty_tracking(info->iommu, dmar_domain,
-                                                info->dev, IOMMU_NO_PASID,
-                                                dmar_domain->dirty_tracking);
+       device_set_dirty_tracking(&dmar_domain->devices,
+                                 dmar_domain->dirty_tracking);
         spin_unlock(&dmar_domain->lock);
         return ret;
  }
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h

index d02f916d8e59a914d2441fa2b81af9ac31dfbf86..4145c04cb1c6818fea0ce420d31c41acec8836a3 100644 (file)
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -627,6 +627,10 @@ struct dmar_domain {
                         int             agaw;
                         /* maximum mapped address */
                         u64             max_addr;
+                       /* Protect the s1_domains list */
+                       spinlock_t      s1_lock;
+                       /* Track s1_domains nested on this domain */
+                       struct list_head s1_domains;
                 };
  
                 /* Nested user domain */
@@ -637,6 +641,8 @@ struct dmar_domain {
                         unsigned long s1_pgtbl;
                         /* page table attributes */
                         struct iommu_hwpt_vtd_s1 s1_cfg;
+                       /* link to parent domain siblings */
+                       struct list_head s2_link;
                 };
         };
  
@@ -1060,6 +1066,7 @@ int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc,
   */
  #define QI_OPT_WAIT_DRAIN              BIT(0)
  
+void domain_update_iotlb(struct dmar_domain *domain);
  int domain_attach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu);
  void domain_detach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu);
  void device_block_translation(struct device *dev);
diff --git a/drivers/iommu/intel/nested.c b/drivers/iommu/intel/nested.c

index f26c7f1c46ccaf43b0a4db5209b5c85b484277ed..a7d68f3d518acd9fc5af6f03ebbf71c825a4afcc 100644 (file)
--- a/drivers/iommu/intel/nested.c
+++ b/drivers/iommu/intel/nested.c
@@ -65,12 +65,20 @@ static int intel_nested_attach_dev(struct iommu_domain *domain,
         list_add(&info->link, &dmar_domain->devices);
         spin_unlock_irqrestore(&dmar_domain->lock, flags);
  
+       domain_update_iotlb(dmar_domain);
+
         return 0;
  }
  
  static void intel_nested_domain_free(struct iommu_domain *domain)
  {
-       kfree(to_dmar_domain(domain));
+       struct dmar_domain *dmar_domain = to_dmar_domain(domain);
+       struct dmar_domain *s2_domain = dmar_domain->s2_domain;
+
+       spin_lock(&s2_domain->s1_lock);
+       list_del(&dmar_domain->s2_link);
+       spin_unlock(&s2_domain->s1_lock);
+       kfree(dmar_domain);
  }
  
  static void nested_flush_dev_iotlb(struct dmar_domain *domain, u64 addr,
@@ -95,7 +103,7 @@ static void nested_flush_dev_iotlb(struct dmar_domain *domain, u64 addr,
  }
  
  static void intel_nested_flush_cache(struct dmar_domain *domain, u64 addr,
-                                    unsigned long npages, bool ih)
+                                    u64 npages, bool ih)
  {
         struct iommu_domain_info *info;
         unsigned int mask;
@@ -201,5 +209,9 @@ struct iommu_domain *intel_nested_domain_alloc(struct iommu_domain *parent,
         spin_lock_init(&domain->lock);
         xa_init(&domain->iommu_array);
  
+       spin_lock(&s2_domain->s1_lock);
+       list_add(&domain->s2_link, &s2_domain->s1_domains);
+       spin_unlock(&s2_domain->s1_lock);
+
         return &domain->domain;
  }
diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c

index 3239cefa4c337897dda048ebec7aeb1fc075a955..108158e2b907d0744467d88e8ec35b419185555b 100644 (file)
--- a/drivers/iommu/intel/pasid.c
+++ b/drivers/iommu/intel/pasid.c
@@ -428,7 +428,6 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu,
   * Set up dirty tracking on a second only or nested translation type.
   */
  int intel_pasid_setup_dirty_tracking(struct intel_iommu *iommu,
-                                    struct dmar_domain *domain,
                                      struct device *dev, u32 pasid,
                                      bool enabled)
  {
@@ -445,7 +444,7 @@ int intel_pasid_setup_dirty_tracking(struct intel_iommu *iommu,
                 return -ENODEV;
         }
  
-       did = domain_id_iommu(domain, iommu);
+       did = pasid_get_domain_id(pte);
         pgtt = pasid_pte_get_pgtt(pte);
         if (pgtt != PASID_ENTRY_PGTT_SL_ONLY &&
             pgtt != PASID_ENTRY_PGTT_NESTED) {
@@ -658,6 +657,8 @@ int intel_pasid_setup_nested(struct intel_iommu *iommu, struct device *dev,
         pasid_set_domain_id(pte, did);
         pasid_set_address_width(pte, s2_domain->agaw);
         pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
+       if (s2_domain->dirty_tracking)
+               pasid_set_ssade(pte);
         pasid_set_translation_type(pte, PASID_ENTRY_PGTT_NESTED);
         pasid_set_present(pte);
         spin_unlock(&iommu->lock);
diff --git a/drivers/iommu/intel/pasid.h b/drivers/iommu/intel/pasid.h

index 8d40d4c66e3198a7ce90c83168a3f86491d79f71..487ede039bdde5733ec1f6af0905ade24c806200 100644 (file)
--- a/drivers/iommu/intel/pasid.h
+++ b/drivers/iommu/intel/pasid.h
@@ -307,7 +307,6 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu,
                                    struct dmar_domain *domain,
                                    struct device *dev, u32 pasid);
  int intel_pasid_setup_dirty_tracking(struct intel_iommu *iommu,
-                                    struct dmar_domain *domain,
                                      struct device *dev, u32 pasid,
                                      bool enabled);
  int intel_pasid_setup_pass_through(struct intel_iommu *iommu,
diff --git a/drivers/iommu/iommu-sva.c b/drivers/iommu/iommu-sva.c

index c3fc9201d0be97e59395750cda0fc29940c0b844..65814cbc84020021df67d0b7dab9db2c61351b56 100644 (file)
--- a/drivers/iommu/iommu-sva.c
+++ b/drivers/iommu/iommu-sva.c
@@ -41,6 +41,7 @@ static struct iommu_mm_data *iommu_alloc_mm_data(struct mm_struct *mm, struct de
         }
         iommu_mm->pasid = pasid;
         INIT_LIST_HEAD(&iommu_mm->sva_domains);
+       INIT_LIST_HEAD(&iommu_mm->sva_handles);
         /*
          * Make sure the write to mm->iommu_mm is not reordered in front of
          * initialization to iommu_mm fields. If it does, readers may see a
@@ -82,6 +83,14 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm
                 goto out_unlock;
         }
  
+       list_for_each_entry(handle, &mm->iommu_mm->sva_handles, handle_item) {
+               if (handle->dev == dev) {
+                       refcount_inc(&handle->users);
+                       mutex_unlock(&iommu_sva_lock);
+                       return handle;
+               }
+       }
+
         handle = kzalloc(sizeof(*handle), GFP_KERNEL);
         if (!handle) {
                 ret = -ENOMEM;
@@ -111,6 +120,8 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm
         list_add(&domain->next, &mm->iommu_mm->sva_domains);
  
  out:
+       refcount_set(&handle->users, 1);
+       list_add(&handle->handle_item, &mm->iommu_mm->sva_handles);
         mutex_unlock(&iommu_sva_lock);
         handle->dev = dev;
         handle->domain = domain;
@@ -141,6 +152,12 @@ void iommu_sva_unbind_device(struct iommu_sva *handle)
         struct device *dev = handle->dev;
  
         mutex_lock(&iommu_sva_lock);
+       if (!refcount_dec_and_test(&handle->users)) {
+               mutex_unlock(&iommu_sva_lock);
+               return;
+       }
+       list_del(&handle->handle_item);
+
         iommu_detach_device_pasid(domain, dev, iommu_mm->pasid);
         if (--domain->users == 0) {
                 list_del(&domain->next);
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c

index 68e648b55767060204a8f42d1927c09ebacad39a..d14413916f93a01626e850aa72ee0c919c1f72bd 100644 (file)
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -1799,7 +1799,7 @@ iommu_group_alloc_default_domain(struct iommu_group *group, int req_type)
          * domain. Do not use in new drivers.
          */
         if (ops->default_domain) {
-               if (req_type)
+               if (req_type != ops->default_domain->type)
                         return ERR_PTR(-EINVAL);
                 return ops->default_domain;
         }
@@ -1871,10 +1871,18 @@ static int iommu_get_def_domain_type(struct iommu_group *group,
         const struct iommu_ops *ops = dev_iommu_ops(dev);
         int type;
  
-       if (!ops->def_domain_type)
-               return cur_type;
-
-       type = ops->def_domain_type(dev);
+       if (ops->default_domain) {
+               /*
+                * Drivers that declare a global static default_domain will
+                * always choose that.
+                */
+               type = ops->default_domain->type;
+       } else {
+               if (ops->def_domain_type)
+                       type = ops->def_domain_type(dev);
+               else
+                       return cur_type;
+       }
         if (!type || cur_type == type)
                 return cur_type;
         if (!cur_type)
diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c

index 3f3f1fa1a0a946a43eb48ee324ab4979683bb566..33d142f8057d70a77f44e842afdd84b1bee0a970 100644 (file)
--- a/drivers/iommu/iommufd/hw_pagetable.c
+++ b/drivers/iommu/iommufd/hw_pagetable.c
@@ -263,7 +263,8 @@ int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd)
  
         if (cmd->__reserved)
                 return -EOPNOTSUPP;
-       if (cmd->data_type == IOMMU_HWPT_DATA_NONE && cmd->data_len)
+       if ((cmd->data_type == IOMMU_HWPT_DATA_NONE && cmd->data_len) ||
+           (cmd->data_type != IOMMU_HWPT_DATA_NONE && !cmd->data_len))
                 return -EINVAL;
  
         idev = iommufd_get_device(ucmd, cmd->dev_id);
diff --git a/drivers/iommu/iommufd/io_pagetable.c b/drivers/iommu/iommufd/io_pagetable.c

index 504ac1b01b2d2ab45fbc22fde2bdcf324ce2d973..05fd9d3abf1b809614cced9e9387679797866103 100644 (file)
--- a/drivers/iommu/iommufd/io_pagetable.c
+++ b/drivers/iommu/iommufd/io_pagetable.c
@@ -1330,20 +1330,23 @@ out_unlock:
  
  int iopt_add_access(struct io_pagetable *iopt, struct iommufd_access *access)
  {
+       u32 new_id;
         int rc;
  
         down_write(&iopt->domains_rwsem);
         down_write(&iopt->iova_rwsem);
-       rc = xa_alloc(&iopt->access_list, &access->iopt_access_list_id, access,
-                     xa_limit_16b, GFP_KERNEL_ACCOUNT);
+       rc = xa_alloc(&iopt->access_list, &new_id, access, xa_limit_16b,
+                     GFP_KERNEL_ACCOUNT);
+
         if (rc)
                 goto out_unlock;
  
         rc = iopt_calculate_iova_alignment(iopt);
         if (rc) {
-               xa_erase(&iopt->access_list, access->iopt_access_list_id);
+               xa_erase(&iopt->access_list, new_id);
                 goto out_unlock;
         }
+       access->iopt_access_list_id = new_id;
  
  out_unlock:
         up_write(&iopt->iova_rwsem);
diff --git a/drivers/iommu/iommufd/iommufd_test.h b/drivers/iommu/iommufd/iommufd_test.h

index 482d4059f5db6aed38ee8aa60f25b791f1e7556d..e854d3f672051b5223e0fec8af741abf03bbffbd 100644 (file)
--- a/drivers/iommu/iommufd/iommufd_test.h
+++ b/drivers/iommu/iommufd/iommufd_test.h
@@ -45,6 +45,7 @@ enum {
  
  enum {
         MOCK_FLAGS_DEVICE_NO_DIRTY = 1 << 0,
+       MOCK_FLAGS_DEVICE_HUGE_IOVA = 1 << 1,
  };
  
  enum {
diff --git a/drivers/iommu/iommufd/iova_bitmap.c b/drivers/iommu/iommufd/iova_bitmap.c

index 0a92c9eeaf7f50a6fe05c266b9ec39d1021844a9..db8c46bee1559ac46fb148d2474668b5a994ae15 100644 (file)
--- a/drivers/iommu/iommufd/iova_bitmap.c
+++ b/drivers/iommu/iommufd/iova_bitmap.c
@@ -100,7 +100,7 @@ struct iova_bitmap {
         struct iova_bitmap_map mapped;
  
         /* userspace address of the bitmap */
-       u64 __user *bitmap;
+       u8 __user *bitmap;
  
         /* u64 index that @mapped points to */
         unsigned long mapped_base_index;
@@ -113,6 +113,9 @@ struct iova_bitmap {
  
         /* length of the IOVA range for the whole bitmap */
         size_t length;
+
+       /* length of the IOVA range set ahead the pinned pages */
+       unsigned long set_ahead_length;
  };
  
  /*
@@ -162,7 +165,7 @@ static int iova_bitmap_get(struct iova_bitmap *bitmap)
  {
         struct iova_bitmap_map *mapped = &bitmap->mapped;
         unsigned long npages;
-       u64 __user *addr;
+       u8 __user *addr;
         long ret;
  
         /*
@@ -175,18 +178,19 @@ static int iova_bitmap_get(struct iova_bitmap *bitmap)
                                bitmap->mapped_base_index) *
                                sizeof(*bitmap->bitmap), PAGE_SIZE);
  
-       /*
-        * We always cap at max number of 'struct page' a base page can fit.
-        * This is, for example, on x86 means 2M of bitmap data max.
-        */
-       npages = min(npages,  PAGE_SIZE / sizeof(struct page *));
-
         /*
          * Bitmap address to be pinned is calculated via pointer arithmetic
          * with bitmap u64 word index.
          */
         addr = bitmap->bitmap + bitmap->mapped_base_index;
  
+       /*
+        * We always cap at max number of 'struct page' a base page can fit.
+        * This is, for example, on x86 means 2M of bitmap data max.
+        */
+       npages = min(npages + !!offset_in_page(addr),
+                    PAGE_SIZE / sizeof(struct page *));
+
         ret = pin_user_pages_fast((unsigned long)addr, npages,
                                   FOLL_WRITE, mapped->pages);
         if (ret <= 0)
@@ -247,7 +251,7 @@ struct iova_bitmap *iova_bitmap_alloc(unsigned long iova, size_t length,
  
         mapped = &bitmap->mapped;
         mapped->pgshift = __ffs(page_size);
-       bitmap->bitmap = data;
+       bitmap->bitmap = (u8 __user *)data;
         bitmap->mapped_total_index =
                 iova_bitmap_offset_to_index(bitmap, length - 1) + 1;
         bitmap->iova = iova;
@@ -304,7 +308,7 @@ static unsigned long iova_bitmap_mapped_remaining(struct iova_bitmap *bitmap)
  
         remaining = bitmap->mapped_total_index - bitmap->mapped_base_index;
         remaining = min_t(unsigned long, remaining,
-                         bytes / sizeof(*bitmap->bitmap));
+                         DIV_ROUND_UP(bytes, sizeof(*bitmap->bitmap)));
  
         return remaining;
  }
@@ -341,6 +345,32 @@ static bool iova_bitmap_done(struct iova_bitmap *bitmap)
         return bitmap->mapped_base_index >= bitmap->mapped_total_index;
  }
  
+static int iova_bitmap_set_ahead(struct iova_bitmap *bitmap,
+                                size_t set_ahead_length)
+{
+       int ret = 0;
+
+       while (set_ahead_length > 0 && !iova_bitmap_done(bitmap)) {
+               unsigned long length = iova_bitmap_mapped_length(bitmap);
+               unsigned long iova = iova_bitmap_mapped_iova(bitmap);
+
+               ret = iova_bitmap_get(bitmap);
+               if (ret)
+                       break;
+
+               length = min(length, set_ahead_length);
+               iova_bitmap_set(bitmap, iova, length);
+
+               set_ahead_length -= length;
+               bitmap->mapped_base_index +=
+                       iova_bitmap_offset_to_index(bitmap, length - 1) + 1;
+               iova_bitmap_put(bitmap);
+       }
+
+       bitmap->set_ahead_length = 0;
+       return ret;
+}
+
  /*
   * Advances to the next range, releases the current pinned
   * pages and pins the next set of bitmap pages.
@@ -357,6 +387,15 @@ static int iova_bitmap_advance(struct iova_bitmap *bitmap)
         if (iova_bitmap_done(bitmap))
                 return 0;
  
+       /* Iterate, set and skip any bits requested for next iteration */
+       if (bitmap->set_ahead_length) {
+               int ret;
+
+               ret = iova_bitmap_set_ahead(bitmap, bitmap->set_ahead_length);
+               if (ret)
+                       return ret;
+       }
+
         /* When advancing the index we pin the next set of bitmap pages */
         return iova_bitmap_get(bitmap);
  }
@@ -409,6 +448,7 @@ void iova_bitmap_set(struct iova_bitmap *bitmap,
                         mapped->pgshift) + mapped->pgoff * BITS_PER_BYTE;
         unsigned long last_bit = (((iova + length - 1) - mapped->iova) >>
                         mapped->pgshift) + mapped->pgoff * BITS_PER_BYTE;
+       unsigned long last_page_idx = mapped->npages - 1;
  
         do {
                 unsigned int page_idx = cur_bit / BITS_PER_PAGE;
@@ -417,10 +457,18 @@ void iova_bitmap_set(struct iova_bitmap *bitmap,
                                          last_bit - cur_bit + 1);
                 void *kaddr;
  
+               if (unlikely(page_idx > last_page_idx))
+                       break;
+
                 kaddr = kmap_local_page(mapped->pages[page_idx]);
                 bitmap_set(kaddr, offset, nbits);
                 kunmap_local(kaddr);
                 cur_bit += nbits;
         } while (cur_bit <= last_bit);
+
+       if (unlikely(cur_bit <= last_bit)) {
+               bitmap->set_ahead_length =
+                       ((last_bit - cur_bit + 1) << bitmap->mapped.pgshift);
+       }
  }
  EXPORT_SYMBOL_NS_GPL(iova_bitmap_set, IOMMUFD);
diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c

index d9e9920c7eba413eaf25b7840eefdf36a3999a9e..7a2199470f3121da91e060bca82315a6944e37b8 100644 (file)
--- a/drivers/iommu/iommufd/selftest.c
+++ b/drivers/iommu/iommufd/selftest.c
@@ -36,11 +36,12 @@ static struct mock_bus_type iommufd_mock_bus_type = {
         },
  };
  
-static atomic_t mock_dev_num;
+static DEFINE_IDA(mock_dev_ida);
  
  enum {
         MOCK_DIRTY_TRACK = 1,
         MOCK_IO_PAGE_SIZE = PAGE_SIZE / 2,
+       MOCK_HUGE_PAGE_SIZE = 512 * MOCK_IO_PAGE_SIZE,
  
         /*
          * Like a real page table alignment requires the low bits of the address
@@ -53,6 +54,7 @@ enum {
         MOCK_PFN_START_IOVA = _MOCK_PFN_START,
         MOCK_PFN_LAST_IOVA = _MOCK_PFN_START,
         MOCK_PFN_DIRTY_IOVA = _MOCK_PFN_START << 1,
+       MOCK_PFN_HUGE_IOVA = _MOCK_PFN_START << 2,
  };
  
  /*
@@ -61,8 +63,8 @@ enum {
   * In syzkaller mode the 64 bit IOVA is converted into an nth area and offset
   * value. This has a much smaller randomization space and syzkaller can hit it.
   */
-static unsigned long iommufd_test_syz_conv_iova(struct io_pagetable *iopt,
-                                               u64 *iova)
+static unsigned long __iommufd_test_syz_conv_iova(struct io_pagetable *iopt,
+                                                 u64 *iova)
  {
         struct syz_layout {
                 __u32 nth_area;
@@ -86,6 +88,21 @@ static unsigned long iommufd_test_syz_conv_iova(struct io_pagetable *iopt,
         return 0;
  }
  
+static unsigned long iommufd_test_syz_conv_iova(struct iommufd_access *access,
+                                               u64 *iova)
+{
+       unsigned long ret;
+
+       mutex_lock(&access->ioas_lock);
+       if (!access->ioas) {
+               mutex_unlock(&access->ioas_lock);
+               return 0;
+       }
+       ret = __iommufd_test_syz_conv_iova(&access->ioas->iopt, iova);
+       mutex_unlock(&access->ioas_lock);
+       return ret;
+}
+
  void iommufd_test_syz_conv_iova_id(struct iommufd_ucmd *ucmd,
                                    unsigned int ioas_id, u64 *iova, u32 *flags)
  {
@@ -98,7 +115,7 @@ void iommufd_test_syz_conv_iova_id(struct iommufd_ucmd *ucmd,
         ioas = iommufd_get_ioas(ucmd->ictx, ioas_id);
         if (IS_ERR(ioas))
                 return;
-       *iova = iommufd_test_syz_conv_iova(&ioas->iopt, iova);
+       *iova = __iommufd_test_syz_conv_iova(&ioas->iopt, iova);
         iommufd_put_object(ucmd->ictx, &ioas->obj);
  }
  
@@ -121,6 +138,7 @@ enum selftest_obj_type {
  struct mock_dev {
         struct device dev;
         unsigned long flags;
+       int id;
  };
  
  struct selftest_obj {
@@ -191,6 +209,34 @@ static int mock_domain_set_dirty_tracking(struct iommu_domain *domain,
         return 0;
  }
  
+static bool mock_test_and_clear_dirty(struct mock_iommu_domain *mock,
+                                     unsigned long iova, size_t page_size,
+                                     unsigned long flags)
+{
+       unsigned long cur, end = iova + page_size - 1;
+       bool dirty = false;
+       void *ent, *old;
+
+       for (cur = iova; cur < end; cur += MOCK_IO_PAGE_SIZE) {
+               ent = xa_load(&mock->pfns, cur / MOCK_IO_PAGE_SIZE);
+               if (!ent || !(xa_to_value(ent) & MOCK_PFN_DIRTY_IOVA))
+                       continue;
+
+               dirty = true;
+               /* Clear dirty */
+               if (!(flags & IOMMU_DIRTY_NO_CLEAR)) {
+                       unsigned long val;
+
+                       val = xa_to_value(ent) & ~MOCK_PFN_DIRTY_IOVA;
+                       old = xa_store(&mock->pfns, cur / MOCK_IO_PAGE_SIZE,
+                                      xa_mk_value(val), GFP_KERNEL);
+                       WARN_ON_ONCE(ent != old);
+               }
+       }
+
+       return dirty;
+}
+
  static int mock_domain_read_and_clear_dirty(struct iommu_domain *domain,
                                             unsigned long iova, size_t size,
                                             unsigned long flags,
@@ -198,31 +244,31 @@ static int mock_domain_read_and_clear_dirty(struct iommu_domain *domain,
  {
         struct mock_iommu_domain *mock =
                 container_of(domain, struct mock_iommu_domain, domain);
-       unsigned long i, max = size / MOCK_IO_PAGE_SIZE;
-       void *ent, *old;
+       unsigned long end = iova + size;
+       void *ent;
  
         if (!(mock->flags & MOCK_DIRTY_TRACK) && dirty->bitmap)
                 return -EINVAL;
  
-       for (i = 0; i < max; i++) {
-               unsigned long cur = iova + i * MOCK_IO_PAGE_SIZE;
+       do {
+               unsigned long pgsize = MOCK_IO_PAGE_SIZE;
+               unsigned long head;
  
-               ent = xa_load(&mock->pfns, cur / MOCK_IO_PAGE_SIZE);
-               if (ent && (xa_to_value(ent) & MOCK_PFN_DIRTY_IOVA)) {
-                       /* Clear dirty */
-                       if (!(flags & IOMMU_DIRTY_NO_CLEAR)) {
-                               unsigned long val;
-
-                               val = xa_to_value(ent) & ~MOCK_PFN_DIRTY_IOVA;
-                               old = xa_store(&mock->pfns,
-                                              cur / MOCK_IO_PAGE_SIZE,
-                                              xa_mk_value(val), GFP_KERNEL);
-                               WARN_ON_ONCE(ent != old);
-                       }
-                       iommu_dirty_bitmap_record(dirty, cur,
-                                                 MOCK_IO_PAGE_SIZE);
+               ent = xa_load(&mock->pfns, iova / MOCK_IO_PAGE_SIZE);
+               if (!ent) {
+                       iova += pgsize;
+                       continue;
                 }
-       }
+
+               if (xa_to_value(ent) & MOCK_PFN_HUGE_IOVA)
+                       pgsize = MOCK_HUGE_PAGE_SIZE;
+               head = iova & ~(pgsize - 1);
+
+               /* Clear dirty */
+               if (mock_test_and_clear_dirty(mock, head, pgsize, flags))
+                       iommu_dirty_bitmap_record(dirty, head, pgsize);
+               iova = head + pgsize;
+       } while (iova < end);
  
         return 0;
  }
@@ -234,6 +280,7 @@ const struct iommu_dirty_ops dirty_ops = {
  
  static struct iommu_domain *mock_domain_alloc_paging(struct device *dev)
  {
+       struct mock_dev *mdev = container_of(dev, struct mock_dev, dev);
         struct mock_iommu_domain *mock;
  
         mock = kzalloc(sizeof(*mock), GFP_KERNEL);
@@ -242,6 +289,8 @@ static struct iommu_domain *mock_domain_alloc_paging(struct device *dev)
         mock->domain.geometry.aperture_start = MOCK_APERTURE_START;
         mock->domain.geometry.aperture_end = MOCK_APERTURE_LAST;
         mock->domain.pgsize_bitmap = MOCK_IO_PAGE_SIZE;
+       if (dev && mdev->flags & MOCK_FLAGS_DEVICE_HUGE_IOVA)
+               mock->domain.pgsize_bitmap |= MOCK_HUGE_PAGE_SIZE;
         mock->domain.ops = mock_ops.default_domain_ops;
         mock->domain.type = IOMMU_DOMAIN_UNMANAGED;
         xa_init(&mock->pfns);
@@ -287,7 +336,7 @@ mock_domain_alloc_user(struct device *dev, u32 flags,
                         return ERR_PTR(-EOPNOTSUPP);
                 if (user_data || (has_dirty_flag && no_dirty_ops))
                         return ERR_PTR(-EOPNOTSUPP);
-               domain = mock_domain_alloc_paging(NULL);
+               domain = mock_domain_alloc_paging(dev);
                 if (!domain)
                         return ERR_PTR(-ENOMEM);
                 if (has_dirty_flag)
@@ -350,6 +399,9 @@ static int mock_domain_map_pages(struct iommu_domain *domain,
  
                         if (pgcount == 1 && cur + MOCK_IO_PAGE_SIZE == pgsize)
                                 flags = MOCK_PFN_LAST_IOVA;
+                       if (pgsize != MOCK_IO_PAGE_SIZE) {
+                               flags |= MOCK_PFN_HUGE_IOVA;
+                       }
                         old = xa_store(&mock->pfns, iova / MOCK_IO_PAGE_SIZE,
                                        xa_mk_value((paddr / MOCK_IO_PAGE_SIZE) |
                                                    flags),
@@ -394,20 +446,27 @@ static size_t mock_domain_unmap_pages(struct iommu_domain *domain,
  
                         /*
                          * iommufd generates unmaps that must be a strict
-                        * superset of the map's performend So every starting
-                        * IOVA should have been an iova passed to map, and the
+                        * superset of the map's performend So every
+                        * starting/ending IOVA should have been an iova passed
+                        * to map.
                          *
-                        * First IOVA must be present and have been a first IOVA
-                        * passed to map_pages
+                        * This simple logic doesn't work when the HUGE_PAGE is
+                        * turned on since the core code will automatically
+                        * switch between the two page sizes creating a break in
+                        * the unmap calls. The break can land in the middle of
+                        * contiguous IOVA.
                          */
-                       if (first) {
-                               WARN_ON(ent && !(xa_to_value(ent) &
-                                                MOCK_PFN_START_IOVA));
-                               first = false;
+                       if (!(domain->pgsize_bitmap & MOCK_HUGE_PAGE_SIZE)) {
+                               if (first) {
+                                       WARN_ON(ent && !(xa_to_value(ent) &
+                                                        MOCK_PFN_START_IOVA));
+                                       first = false;
+                               }
+                               if (pgcount == 1 &&
+                                   cur + MOCK_IO_PAGE_SIZE == pgsize)
+                                       WARN_ON(ent && !(xa_to_value(ent) &
+                                                        MOCK_PFN_LAST_IOVA));
                         }
-                       if (pgcount == 1 && cur + MOCK_IO_PAGE_SIZE == pgsize)
-                               WARN_ON(ent && !(xa_to_value(ent) &
-                                                MOCK_PFN_LAST_IOVA));
  
                         iova += MOCK_IO_PAGE_SIZE;
                         ret += MOCK_IO_PAGE_SIZE;
@@ -595,7 +654,7 @@ static void mock_dev_release(struct device *dev)
  {
         struct mock_dev *mdev = container_of(dev, struct mock_dev, dev);
  
-       atomic_dec(&mock_dev_num);
+       ida_free(&mock_dev_ida, mdev->id);
         kfree(mdev);
  }
  
@@ -604,7 +663,8 @@ static struct mock_dev *mock_dev_create(unsigned long dev_flags)
         struct mock_dev *mdev;
         int rc;
  
-       if (dev_flags & ~(MOCK_FLAGS_DEVICE_NO_DIRTY))
+       if (dev_flags &
+           ~(MOCK_FLAGS_DEVICE_NO_DIRTY | MOCK_FLAGS_DEVICE_HUGE_IOVA))
                 return ERR_PTR(-EINVAL);
  
         mdev = kzalloc(sizeof(*mdev), GFP_KERNEL);
@@ -616,8 +676,12 @@ static struct mock_dev *mock_dev_create(unsigned long dev_flags)
         mdev->dev.release = mock_dev_release;
         mdev->dev.bus = &iommufd_mock_bus_type.bus;
  
-       rc = dev_set_name(&mdev->dev, "iommufd_mock%u",
-                         atomic_inc_return(&mock_dev_num));
+       rc = ida_alloc(&mock_dev_ida, GFP_KERNEL);
+       if (rc < 0)
+               goto err_put;
+       mdev->id = rc;
+
+       rc = dev_set_name(&mdev->dev, "iommufd_mock%u", mdev->id);
         if (rc)
                 goto err_put;
  
@@ -1119,7 +1183,7 @@ static int iommufd_test_access_pages(struct iommufd_ucmd *ucmd,
         }
  
         if (flags & MOCK_FLAGS_ACCESS_SYZ)
-               iova = iommufd_test_syz_conv_iova(&staccess->access->ioas->iopt,
+               iova = iommufd_test_syz_conv_iova(staccess->access,
                                         &cmd->access_pages.iova);
  
         npages = (ALIGN(iova + length, PAGE_SIZE) -
@@ -1221,8 +1285,8 @@ static int iommufd_test_access_rw(struct iommufd_ucmd *ucmd,
         }
  
         if (flags & MOCK_FLAGS_ACCESS_SYZ)
-               iova = iommufd_test_syz_conv_iova(&staccess->access->ioas->iopt,
-                                       &cmd->access_rw.iova);
+               iova = iommufd_test_syz_conv_iova(staccess->access,
+                               &cmd->access_rw.iova);
  
         rc = iommufd_access_rw(staccess->access, iova, tmp, length, flags);
         if (rc)
diff --git a/drivers/irqchip/irq-brcmstb-l2.c b/drivers/irqchip/irq-brcmstb-l2.c

index 5559c943f03f973137432de01f1972aec58f94b6..2b0b3175cea068eb571d8ec5f82a4d45a01e5719 100644 (file)
--- a/drivers/irqchip/irq-brcmstb-l2.c
+++ b/drivers/irqchip/irq-brcmstb-l2.c
@@ -2,7 +2,7 @@
  /*
   * Generic Broadcom Set Top Box Level 2 Interrupt controller driver
   *
- * Copyright (C) 2014-2017 Broadcom
+ * Copyright (C) 2014-2024 Broadcom
   */
  
  #define pr_fmt(fmt)    KBUILD_MODNAME  ": " fmt
@@ -112,6 +112,9 @@ static void brcmstb_l2_intc_irq_handle(struct irq_desc *desc)
                 generic_handle_domain_irq(b->domain, irq);
         } while (status);
  out:
+       /* Don't ack parent before all device writes are done */
+       wmb();
+
         chained_irq_exit(chip, desc);
  }
  
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c

index d097001c1e3ee7e1d1380a891660dfc522a37554..b822752c42617055e811f9e89bc2b3455bcc2eb8 100644 (file)
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -207,6 +207,11 @@ static bool require_its_list_vmovp(struct its_vm *vm, struct its_node *its)
         return (gic_rdists->has_rvpeid || vm->vlpi_count[its->list_nr]);
  }
  
+static bool rdists_support_shareable(void)
+{
+       return !(gic_rdists->flags & RDIST_FLAGS_FORCE_NON_SHAREABLE);
+}
+
  static u16 get_its_list(struct its_vm *vm)
  {
         struct its_node *its;
@@ -2710,10 +2715,12 @@ static u64 inherit_vpe_l1_table_from_its(void)
                         break;
                 }
                 val |= FIELD_PREP(GICR_VPROPBASER_4_1_ADDR, addr >> 12);
-               val |= FIELD_PREP(GICR_VPROPBASER_SHAREABILITY_MASK,
-                                 FIELD_GET(GITS_BASER_SHAREABILITY_MASK, baser));
-               val |= FIELD_PREP(GICR_VPROPBASER_INNER_CACHEABILITY_MASK,
-                                 FIELD_GET(GITS_BASER_INNER_CACHEABILITY_MASK, baser));
+               if (rdists_support_shareable()) {
+                       val |= FIELD_PREP(GICR_VPROPBASER_SHAREABILITY_MASK,
+                                         FIELD_GET(GITS_BASER_SHAREABILITY_MASK, baser));
+                       val |= FIELD_PREP(GICR_VPROPBASER_INNER_CACHEABILITY_MASK,
+                                         FIELD_GET(GITS_BASER_INNER_CACHEABILITY_MASK, baser));
+               }
                 val |= FIELD_PREP(GICR_VPROPBASER_4_1_SIZE, GITS_BASER_NR_PAGES(baser) - 1);
  
                 return val;
@@ -2936,8 +2943,10 @@ static int allocate_vpe_l1_table(void)
         WARN_ON(!IS_ALIGNED(pa, psz));
  
         val |= FIELD_PREP(GICR_VPROPBASER_4_1_ADDR, pa >> 12);
-       val |= GICR_VPROPBASER_RaWb;
-       val |= GICR_VPROPBASER_InnerShareable;
+       if (rdists_support_shareable()) {
+               val |= GICR_VPROPBASER_RaWb;
+               val |= GICR_VPROPBASER_InnerShareable;
+       }
         val |= GICR_VPROPBASER_4_1_Z;
         val |= GICR_VPROPBASER_4_1_VALID;
  
@@ -3126,7 +3135,7 @@ static void its_cpu_init_lpis(void)
         gicr_write_propbaser(val, rbase + GICR_PROPBASER);
         tmp = gicr_read_propbaser(rbase + GICR_PROPBASER);
  
-       if (gic_rdists->flags & RDIST_FLAGS_FORCE_NON_SHAREABLE)
+       if (!rdists_support_shareable())
                 tmp &= ~GICR_PROPBASER_SHAREABILITY_MASK;
  
         if ((tmp ^ val) & GICR_PROPBASER_SHAREABILITY_MASK) {
@@ -3153,7 +3162,7 @@ static void its_cpu_init_lpis(void)
         gicr_write_pendbaser(val, rbase + GICR_PENDBASER);
         tmp = gicr_read_pendbaser(rbase + GICR_PENDBASER);
  
-       if (gic_rdists->flags & RDIST_FLAGS_FORCE_NON_SHAREABLE)
+       if (!rdists_support_shareable())
                 tmp &= ~GICR_PENDBASER_SHAREABILITY_MASK;
  
         if (!(tmp & GICR_PENDBASER_SHAREABILITY_MASK)) {
@@ -3172,6 +3181,7 @@ static void its_cpu_init_lpis(void)
         val |= GICR_CTLR_ENABLE_LPIS;
         writel_relaxed(val, rbase + GICR_CTLR);
  
+out:
         if (gic_rdists->has_vlpis && !gic_rdists->has_rvpeid) {
                 void __iomem *vlpi_base = gic_data_rdist_vlpi_base();
  
@@ -3207,7 +3217,6 @@ static void its_cpu_init_lpis(void)
  
         /* Make sure the GIC has seen the above */
         dsb(sy);
-out:
         gic_data_rdist()->flags |= RD_LOCAL_LPI_ENABLED;
         pr_info("GICv3: CPU%d: using %s LPI pending table @%pa\n",
                 smp_processor_id(),
@@ -3817,8 +3826,9 @@ static int its_vpe_set_affinity(struct irq_data *d,
                                 bool force)
  {
         struct its_vpe *vpe = irq_data_get_irq_chip_data(d);
-       int from, cpu = cpumask_first(mask_val);
+       struct cpumask common, *table_mask;
         unsigned long flags;
+       int from, cpu;
  
         /*
          * Changing affinity is mega expensive, so let's be as lazy as
@@ -3834,19 +3844,22 @@ static int its_vpe_set_affinity(struct irq_data *d,
          * taken on any vLPI handling path that evaluates vpe->col_idx.
          */
         from = vpe_to_cpuid_lock(vpe, &flags);
-       if (from == cpu)
-               goto out;
-
-       vpe->col_idx = cpu;
+       table_mask = gic_data_rdist_cpu(from)->vpe_table_mask;
  
         /*
-        * GICv4.1 allows us to skip VMOVP if moving to a cpu whose RD
-        * is sharing its VPE table with the current one.
+        * If we are offered another CPU in the same GICv4.1 ITS
+        * affinity, pick this one. Otherwise, any CPU will do.
          */
-       if (gic_data_rdist_cpu(cpu)->vpe_table_mask &&
-           cpumask_test_cpu(from, gic_data_rdist_cpu(cpu)->vpe_table_mask))
+       if (table_mask && cpumask_and(&common, mask_val, table_mask))
+               cpu = cpumask_test_cpu(from, &common) ? from : cpumask_first(&common);
+       else
+               cpu = cpumask_first(mask_val);
+
+       if (from == cpu)
                 goto out;
  
+       vpe->col_idx = cpu;
+
         its_send_vmovp(vpe);
         its_vpe_db_proxy_move(vpe, from, cpu);
  
@@ -3880,14 +3893,18 @@ static void its_vpe_schedule(struct its_vpe *vpe)
         val  = virt_to_phys(page_address(vpe->its_vm->vprop_page)) &
                 GENMASK_ULL(51, 12);
         val |= (LPI_NRBITS - 1) & GICR_VPROPBASER_IDBITS_MASK;
-       val |= GICR_VPROPBASER_RaWb;
-       val |= GICR_VPROPBASER_InnerShareable;
+       if (rdists_support_shareable()) {
+               val |= GICR_VPROPBASER_RaWb;
+               val |= GICR_VPROPBASER_InnerShareable;
+       }
         gicr_write_vpropbaser(val, vlpi_base + GICR_VPROPBASER);
  
         val  = virt_to_phys(page_address(vpe->vpt_page)) &
                 GENMASK_ULL(51, 16);
-       val |= GICR_VPENDBASER_RaWaWb;
-       val |= GICR_VPENDBASER_InnerShareable;
+       if (rdists_support_shareable()) {
+               val |= GICR_VPENDBASER_RaWaWb;
+               val |= GICR_VPENDBASER_InnerShareable;
+       }
         /*
          * There is no good way of finding out if the pending table is
          * empty as we can race against the doorbell interrupt very
@@ -5078,6 +5095,8 @@ static int __init its_probe_one(struct its_node *its)
         u32 ctlr;
         int err;
  
+       its_enable_quirks(its);
+
         if (is_v4(its)) {
                 if (!(its->typer & GITS_TYPER_VMOVP)) {
                         err = its_compute_its_list_map(its);
@@ -5429,7 +5448,6 @@ static int __init its_of_probe(struct device_node *node)
                 if (!its)
                         return -ENOMEM;
  
-               its_enable_quirks(its);
                 err = its_probe_one(its);
                 if (err)  {
                         its_node_destroy(its);
diff --git a/drivers/irqchip/irq-loongson-eiointc.c b/drivers/irqchip/irq-loongson-eiointc.c

index 1623cd77917523f42419cb958ecbc0ce32ba8809..b3736bdd4b9f2ce0ddabd86b777f40c53c488eeb 100644 (file)
--- a/drivers/irqchip/irq-loongson-eiointc.c
+++ b/drivers/irqchip/irq-loongson-eiointc.c
@@ -241,7 +241,7 @@ static int eiointc_domain_alloc(struct irq_domain *domain, unsigned int virq,
         int ret;
         unsigned int i, type;
         unsigned long hwirq = 0;
-       struct eiointc *priv = domain->host_data;
+       struct eiointc_priv *priv = domain->host_data;
  
         ret = irq_domain_translate_onecell(domain, arg, &hwirq, &type);
         if (ret)
diff --git a/drivers/irqchip/irq-mbigen.c b/drivers/irqchip/irq-mbigen.c

index 5101a3fb11df5bef53122db9db3c194669d754e7..58881d3139792074bf6ae1430a4de3760d3eb220 100644 (file)
--- a/drivers/irqchip/irq-mbigen.c
+++ b/drivers/irqchip/irq-mbigen.c
@@ -235,22 +235,17 @@ static const struct irq_domain_ops mbigen_domain_ops = {
  static int mbigen_of_create_domain(struct platform_device *pdev,
                                    struct mbigen_device *mgn_chip)
  {
-       struct device *parent;
         struct platform_device *child;
         struct irq_domain *domain;
         struct device_node *np;
         u32 num_pins;
         int ret = 0;
  
-       parent = bus_get_dev_root(&platform_bus_type);
-       if (!parent)
-               return -ENODEV;
-
         for_each_child_of_node(pdev->dev.of_node, np) {
                 if (!of_property_read_bool(np, "interrupt-controller"))
                         continue;
  
-               child = of_platform_device_create(np, NULL, parent);
+               child = of_platform_device_create(np, NULL, NULL);
                 if (!child) {
                         ret = -ENOMEM;
                         break;
@@ -273,7 +268,6 @@ static int mbigen_of_create_domain(struct platform_device *pdev,
                 }
         }
  
-       put_device(parent);
         if (ret)
                 of_node_put(np);
  
diff --git a/drivers/irqchip/irq-qcom-mpm.c b/drivers/irqchip/irq-qcom-mpm.c

index cda5838d2232dc1971369b4c6e872d004263b8de..7942d8eb3d00eae5fa7e5718a05ef889bb8a82f0 100644 (file)
--- a/drivers/irqchip/irq-qcom-mpm.c
+++ b/drivers/irqchip/irq-qcom-mpm.c
@@ -389,8 +389,8 @@ static int qcom_mpm_init(struct device_node *np, struct device_node *parent)
                 /* Don't use devm_ioremap_resource, as we're accessing a shared region. */
                 priv->base = devm_ioremap(dev, res.start, resource_size(&res));
                 of_node_put(msgram_np);
-               if (IS_ERR(priv->base))
-                       return PTR_ERR(priv->base);
+               if (!priv->base)
+                       return -ENOMEM;
         } else {
                 /* Otherwise, fall back to simple MMIO. */
                 priv->base = devm_platform_ioremap_resource(pdev, 0);
diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c

index 5b7bc4fd9517c8972680ad7a503eebf2ca47a518..bf0b40b0fad4b23d756a22a86c8e206a7155e858 100644 (file)
--- a/drivers/irqchip/irq-sifive-plic.c
+++ b/drivers/irqchip/irq-sifive-plic.c
@@ -148,7 +148,13 @@ static void plic_irq_eoi(struct irq_data *d)
  {
         struct plic_handler *handler = this_cpu_ptr(&plic_handlers);
  
-       writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM);
+       if (unlikely(irqd_irq_disabled(d))) {
+               plic_toggle(handler, d->hwirq, 1);
+               writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM);
+               plic_toggle(handler, d->hwirq, 0);
+       } else {
+               writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM);
+       }
  }
  
  #ifdef CONFIG_SMP
diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h

index 095b9b49aa8250a1f56c531883cce5f6e8a24727..e6757a30dccad1fa1a6ae060b33d41a6a120dda3 100644 (file)
--- a/drivers/md/dm-core.h
+++ b/drivers/md/dm-core.h
@@ -22,6 +22,8 @@
  #include "dm-ima.h"
  
  #define DM_RESERVED_MAX_IOS            1024
+#define DM_MAX_TARGETS                 1048576
+#define DM_MAX_TARGET_PARAMS           1024
  
  struct dm_io;
  
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c

index 855b482cbff1f072912e957e8c1cc1d3b4e1b319..59445763e55a65de49e79cc2436c8a03131a5a15 100644 (file)
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -53,15 +53,17 @@
  struct convert_context {
         struct completion restart;
         struct bio *bio_in;
-       struct bio *bio_out;
         struct bvec_iter iter_in;
+       struct bio *bio_out;
         struct bvec_iter iter_out;
-       u64 cc_sector;
         atomic_t cc_pending;
+       u64 cc_sector;
         union {
                 struct skcipher_request *req;
                 struct aead_request *req_aead;
         } r;
+       bool aead_recheck;
+       bool aead_failed;
  
  };
  
@@ -73,10 +75,8 @@ struct dm_crypt_io {
         struct bio *base_bio;
         u8 *integrity_metadata;
         bool integrity_metadata_from_pool:1;
-       bool in_tasklet:1;
  
         struct work_struct work;
-       struct tasklet_struct tasklet;
  
         struct convert_context ctx;
  
@@ -84,6 +84,8 @@ struct dm_crypt_io {
         blk_status_t error;
         sector_t sector;
  
+       struct bvec_iter saved_bi_iter;
+
         struct rb_node rb_node;
  } CRYPTO_MINALIGN_ATTR;
  
@@ -1372,10 +1374,13 @@ static int crypt_convert_block_aead(struct crypt_config *cc,
         if (r == -EBADMSG) {
                 sector_t s = le64_to_cpu(*sector);
  
-               DMERR_LIMIT("%pg: INTEGRITY AEAD ERROR, sector %llu",
-                           ctx->bio_in->bi_bdev, s);
-               dm_audit_log_bio(DM_MSG_PREFIX, "integrity-aead",
-                                ctx->bio_in, s, 0);
+               ctx->aead_failed = true;
+               if (ctx->aead_recheck) {
+                       DMERR_LIMIT("%pg: INTEGRITY AEAD ERROR, sector %llu",
+                                   ctx->bio_in->bi_bdev, s);
+                       dm_audit_log_bio(DM_MSG_PREFIX, "integrity-aead",
+                                        ctx->bio_in, s, 0);
+               }
         }
  
         if (!r && cc->iv_gen_ops && cc->iv_gen_ops->post)
@@ -1759,10 +1764,11 @@ static void crypt_io_init(struct dm_crypt_io *io, struct crypt_config *cc,
         io->base_bio = bio;
         io->sector = sector;
         io->error = 0;
+       io->ctx.aead_recheck = false;
+       io->ctx.aead_failed = false;
         io->ctx.r.req = NULL;
         io->integrity_metadata = NULL;
         io->integrity_metadata_from_pool = false;
-       io->in_tasklet = false;
         atomic_set(&io->io_pending, 0);
  }
  
@@ -1771,12 +1777,7 @@ static void crypt_inc_pending(struct dm_crypt_io *io)
         atomic_inc(&io->io_pending);
  }
  
-static void kcryptd_io_bio_endio(struct work_struct *work)
-{
-       struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work);
-
-       bio_endio(io->base_bio);
-}
+static void kcryptd_queue_read(struct dm_crypt_io *io);
  
  /*
   * One of the bios was finished. Check for completion of
@@ -1791,6 +1792,15 @@ static void crypt_dec_pending(struct dm_crypt_io *io)
         if (!atomic_dec_and_test(&io->io_pending))
                 return;
  
+       if (likely(!io->ctx.aead_recheck) && unlikely(io->ctx.aead_failed) &&
+           cc->on_disk_tag_size && bio_data_dir(base_bio) == READ) {
+               io->ctx.aead_recheck = true;
+               io->ctx.aead_failed = false;
+               io->error = 0;
+               kcryptd_queue_read(io);
+               return;
+       }
+
         if (io->ctx.r.req)
                 crypt_free_req(cc, io->ctx.r.req, base_bio);
  
@@ -1801,20 +1811,6 @@ static void crypt_dec_pending(struct dm_crypt_io *io)
  
         base_bio->bi_status = error;
  
-       /*
-        * If we are running this function from our tasklet,
-        * we can't call bio_endio() here, because it will call
-        * clone_endio() from dm.c, which in turn will
-        * free the current struct dm_crypt_io structure with
-        * our tasklet. In this case we need to delay bio_endio()
-        * execution to after the tasklet is done and dequeued.
-        */
-       if (io->in_tasklet) {
-               INIT_WORK(&io->work, kcryptd_io_bio_endio);
-               queue_work(cc->io_queue, &io->work);
-               return;
-       }
-
         bio_endio(base_bio);
  }
  
@@ -1840,15 +1836,19 @@ static void crypt_endio(struct bio *clone)
         struct dm_crypt_io *io = clone->bi_private;
         struct crypt_config *cc = io->cc;
         unsigned int rw = bio_data_dir(clone);
-       blk_status_t error;
+       blk_status_t error = clone->bi_status;
+
+       if (io->ctx.aead_recheck && !error) {
+               kcryptd_queue_crypt(io);
+               return;
+       }
  
         /*
          * free the processed pages
          */
-       if (rw == WRITE)
+       if (rw == WRITE || io->ctx.aead_recheck)
                 crypt_free_buffer_pages(cc, clone);
  
-       error = clone->bi_status;
         bio_put(clone);
  
         if (rw == READ && !error) {
@@ -1869,6 +1869,22 @@ static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp)
         struct crypt_config *cc = io->cc;
         struct bio *clone;
  
+       if (io->ctx.aead_recheck) {
+               if (!(gfp & __GFP_DIRECT_RECLAIM))
+                       return 1;
+               crypt_inc_pending(io);
+               clone = crypt_alloc_buffer(io, io->base_bio->bi_iter.bi_size);
+               if (unlikely(!clone)) {
+                       crypt_dec_pending(io);
+                       return 1;
+               }
+               clone->bi_iter.bi_sector = cc->start + io->sector;
+               crypt_convert_init(cc, &io->ctx, clone, clone, io->sector);
+               io->saved_bi_iter = clone->bi_iter;
+               dm_submit_bio_remap(io->base_bio, clone);
+               return 0;
+       }
+
         /*
          * We need the original biovec array in order to decrypt the whole bio
          * data *afterwards* -- thanks to immutable biovecs we don't need to
@@ -2095,6 +2111,12 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
         io->ctx.bio_out = clone;
         io->ctx.iter_out = clone->bi_iter;
  
+       if (crypt_integrity_aead(cc)) {
+               bio_copy_data(clone, io->base_bio);
+               io->ctx.bio_in = clone;
+               io->ctx.iter_in = clone->bi_iter;
+       }
+
         sector += bio_sectors(clone);
  
         crypt_inc_pending(io);
@@ -2131,6 +2153,14 @@ dec:
  
  static void kcryptd_crypt_read_done(struct dm_crypt_io *io)
  {
+       if (io->ctx.aead_recheck) {
+               if (!io->error) {
+                       io->ctx.bio_in->bi_iter = io->saved_bi_iter;
+                       bio_copy_data(io->base_bio, io->ctx.bio_in);
+               }
+               crypt_free_buffer_pages(io->cc, io->ctx.bio_in);
+               bio_put(io->ctx.bio_in);
+       }
         crypt_dec_pending(io);
  }
  
@@ -2160,11 +2190,17 @@ static void kcryptd_crypt_read_convert(struct dm_crypt_io *io)
  
         crypt_inc_pending(io);
  
-       crypt_convert_init(cc, &io->ctx, io->base_bio, io->base_bio,
-                          io->sector);
+       if (io->ctx.aead_recheck) {
+               io->ctx.cc_sector = io->sector + cc->iv_offset;
+               r = crypt_convert(cc, &io->ctx,
+                                 test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags), true);
+       } else {
+               crypt_convert_init(cc, &io->ctx, io->base_bio, io->base_bio,
+                                  io->sector);
  
-       r = crypt_convert(cc, &io->ctx,
-                         test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags), true);
+               r = crypt_convert(cc, &io->ctx,
+                                 test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags), true);
+       }
         /*
          * Crypto API backlogged the request, because its queue was full
          * and we're in softirq context, so continue from a workqueue
@@ -2206,10 +2242,13 @@ static void kcryptd_async_done(void *data, int error)
         if (error == -EBADMSG) {
                 sector_t s = le64_to_cpu(*org_sector_of_dmreq(cc, dmreq));
  
-               DMERR_LIMIT("%pg: INTEGRITY AEAD ERROR, sector %llu",
-                           ctx->bio_in->bi_bdev, s);
-               dm_audit_log_bio(DM_MSG_PREFIX, "integrity-aead",
-                                ctx->bio_in, s, 0);
+               ctx->aead_failed = true;
+               if (ctx->aead_recheck) {
+                       DMERR_LIMIT("%pg: INTEGRITY AEAD ERROR, sector %llu",
+                                   ctx->bio_in->bi_bdev, s);
+                       dm_audit_log_bio(DM_MSG_PREFIX, "integrity-aead",
+                                        ctx->bio_in, s, 0);
+               }
                 io->error = BLK_STS_PROTECTION;
         } else if (error < 0)
                 io->error = BLK_STS_IOERR;
@@ -2246,11 +2285,6 @@ static void kcryptd_crypt(struct work_struct *work)
                 kcryptd_crypt_write_convert(io);
  }
  
-static void kcryptd_crypt_tasklet(unsigned long work)
-{
-       kcryptd_crypt((struct work_struct *)work);
-}
-
  static void kcryptd_queue_crypt(struct dm_crypt_io *io)
  {
         struct crypt_config *cc = io->cc;
@@ -2262,15 +2296,10 @@ static void kcryptd_queue_crypt(struct dm_crypt_io *io)
                  * irqs_disabled(): the kernel may run some IO completion from the idle thread, but
                  * it is being executed with irqs disabled.
                  */
-               if (in_hardirq() || irqs_disabled()) {
-                       io->in_tasklet = true;
-                       tasklet_init(&io->tasklet, kcryptd_crypt_tasklet, (unsigned long)&io->work);
-                       tasklet_schedule(&io->tasklet);
+               if (!(in_hardirq() || irqs_disabled())) {
+                       kcryptd_crypt(&io->work);
                         return;
                 }
-
-               kcryptd_crypt(&io->work);
-               return;
         }
  
         INIT_WORK(&io->work, kcryptd_crypt);
@@ -3144,7 +3173,7 @@ static int crypt_ctr_optional(struct dm_target *ti, unsigned int argc, char **ar
                         sval = strchr(opt_string + strlen("integrity:"), ':') + 1;
                         if (!strcasecmp(sval, "aead")) {
                                 set_bit(CRYPT_MODE_INTEGRITY_AEAD, &cc->cipher_flags);
-                       } else  if (strcasecmp(sval, "none")) {
+                       } else if (strcasecmp(sval, "none")) {
                                 ti->error = "Unknown integrity profile";
                                 return -EINVAL;
                         }
@@ -3673,7 +3702,7 @@ static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits)
  
  static struct target_type crypt_target = {
         .name   = "crypt",
-       .version = {1, 24, 0},
+       .version = {1, 25, 0},
         .module = THIS_MODULE,
         .ctr    = crypt_ctr,
         .dtr    = crypt_dtr,
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c

index c5f03aab455256ff1b0abc606b7728438be347f0..1fc901df84eb163c833e364d21e0a48e65c06239 100644 (file)
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -278,6 +278,8 @@ struct dm_integrity_c {
  
         atomic64_t number_of_mismatches;
  
+       mempool_t recheck_pool;
+
         struct notifier_block reboot_notifier;
  };
  
@@ -1689,6 +1691,77 @@ failed:
         get_random_bytes(result, ic->tag_size);
  }
  
+static noinline void integrity_recheck(struct dm_integrity_io *dio, char *checksum)
+{
+       struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
+       struct dm_integrity_c *ic = dio->ic;
+       struct bvec_iter iter;
+       struct bio_vec bv;
+       sector_t sector, logical_sector, area, offset;
+       struct page *page;
+       void *buffer;
+
+       get_area_and_offset(ic, dio->range.logical_sector, &area, &offset);
+       dio->metadata_block = get_metadata_sector_and_offset(ic, area, offset,
+                                                            &dio->metadata_offset);
+       sector = get_data_sector(ic, area, offset);
+       logical_sector = dio->range.logical_sector;
+
+       page = mempool_alloc(&ic->recheck_pool, GFP_NOIO);
+       buffer = page_to_virt(page);
+
+       __bio_for_each_segment(bv, bio, iter, dio->bio_details.bi_iter) {
+               unsigned pos = 0;
+
+               do {
+                       char *mem;
+                       int r;
+                       struct dm_io_request io_req;
+                       struct dm_io_region io_loc;
+                       io_req.bi_opf = REQ_OP_READ;
+                       io_req.mem.type = DM_IO_KMEM;
+                       io_req.mem.ptr.addr = buffer;
+                       io_req.notify.fn = NULL;
+                       io_req.client = ic->io;
+                       io_loc.bdev = ic->dev->bdev;
+                       io_loc.sector = sector;
+                       io_loc.count = ic->sectors_per_block;
+
+                       r = dm_io(&io_req, 1, &io_loc, NULL);
+                       if (unlikely(r)) {
+                               dio->bi_status = errno_to_blk_status(r);
+                               goto free_ret;
+                       }
+
+                       integrity_sector_checksum(ic, logical_sector, buffer, checksum);
+                       r = dm_integrity_rw_tag(ic, checksum, &dio->metadata_block,
+                                               &dio->metadata_offset, ic->tag_size, TAG_CMP);
+                       if (r) {
+                               if (r > 0) {
+                                       DMERR_LIMIT("%pg: Checksum failed at sector 0x%llx",
+                                                   bio->bi_bdev, logical_sector);
+                                       atomic64_inc(&ic->number_of_mismatches);
+                                       dm_audit_log_bio(DM_MSG_PREFIX, "integrity-checksum",
+                                                        bio, logical_sector, 0);
+                                       r = -EILSEQ;
+                               }
+                               dio->bi_status = errno_to_blk_status(r);
+                               goto free_ret;
+                       }
+
+                       mem = bvec_kmap_local(&bv);
+                       memcpy(mem + pos, buffer, ic->sectors_per_block << SECTOR_SHIFT);
+                       kunmap_local(mem);
+
+                       pos += ic->sectors_per_block << SECTOR_SHIFT;
+                       sector += ic->sectors_per_block;
+                       logical_sector += ic->sectors_per_block;
+               } while (pos < bv.bv_len);
+       }
+free_ret:
+       mempool_free(page, &ic->recheck_pool);
+}
+
  static void integrity_metadata(struct work_struct *w)
  {
         struct dm_integrity_io *dio = container_of(w, struct dm_integrity_io, work);
@@ -1776,15 +1849,8 @@ again:
                                                 checksums_ptr - checksums, dio->op == REQ_OP_READ ? TAG_CMP : TAG_WRITE);
                         if (unlikely(r)) {
                                 if (r > 0) {
-                                       sector_t s;
-
-                                       s = sector - ((r + ic->tag_size - 1) / ic->tag_size);
-                                       DMERR_LIMIT("%pg: Checksum failed at sector 0x%llx",
-                                                   bio->bi_bdev, s);
-                                       r = -EILSEQ;
-                                       atomic64_inc(&ic->number_of_mismatches);
-                                       dm_audit_log_bio(DM_MSG_PREFIX, "integrity-checksum",
-                                                        bio, s, 0);
+                                       integrity_recheck(dio, checksums);
+                                       goto skip_io;
                                 }
                                 if (likely(checksums != checksums_onstack))
                                         kfree(checksums);
@@ -4261,6 +4327,12 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned int argc, char **argv
                 goto bad;
         }
  
+       r = mempool_init_page_pool(&ic->recheck_pool, 1, 0);
+       if (r) {
+               ti->error = "Cannot allocate mempool";
+               goto bad;
+       }
+
         ic->metadata_wq = alloc_workqueue("dm-integrity-metadata",
                                           WQ_MEM_RECLAIM, METADATA_WORKQUEUE_MAX_ACTIVE);
         if (!ic->metadata_wq) {
@@ -4609,6 +4681,7 @@ static void dm_integrity_dtr(struct dm_target *ti)
         kvfree(ic->bbs);
         if (ic->bufio)
                 dm_bufio_client_destroy(ic->bufio);
+       mempool_exit(&ic->recheck_pool);
         mempool_exit(&ic->journal_io_mempool);
         if (ic->io)
                 dm_io_client_destroy(ic->io);
@@ -4661,7 +4734,7 @@ static void dm_integrity_dtr(struct dm_target *ti)
  
  static struct target_type integrity_target = {
         .name                   = "integrity",
-       .version                = {1, 10, 0},
+       .version                = {1, 11, 0},
         .module                 = THIS_MODULE,
         .features               = DM_TARGET_SINGLETON | DM_TARGET_INTEGRITY,
         .ctr                    = dm_integrity_ctr,
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c

index e65058e0ed06ab73b9d20d26dfbf7aca55829572..3b1ad7127cb846a1b50059921241f2abe63eaf53 100644 (file)
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -1941,7 +1941,8 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl *param_kern
                            minimum_data_size - sizeof(param_kernel->version)))
                 return -EFAULT;
  
-       if (param_kernel->data_size < minimum_data_size) {
+       if (unlikely(param_kernel->data_size < minimum_data_size) ||
+           unlikely(param_kernel->data_size > DM_MAX_TARGETS * DM_MAX_TARGET_PARAMS)) {
                 DMERR("Invalid data size in the ioctl structure: %u",
                       param_kernel->data_size);
                 return -EINVAL;
diff --git a/drivers/md/dm-stats.c b/drivers/md/dm-stats.c

index bdc14ec9981414c60e1dee432b97d50e82dbbc88..1e5d988f44da6919da6de094c6744bf1bb2a89be 100644 (file)
--- a/drivers/md/dm-stats.c
+++ b/drivers/md/dm-stats.c
@@ -66,6 +66,9 @@ struct dm_stats_last_position {
         unsigned int last_rw;
  };
  
+#define DM_STAT_MAX_ENTRIES            8388608
+#define DM_STAT_MAX_HISTOGRAM_ENTRIES  134217728
+
  /*
   * A typo on the command line could possibly make the kernel run out of memory
   * and crash. To prevent the crash we account all used memory. We fail if we
@@ -285,6 +288,9 @@ static int dm_stats_create(struct dm_stats *stats, sector_t start, sector_t end,
         if (n_entries != (size_t)n_entries || !(size_t)(n_entries + 1))
                 return -EOVERFLOW;
  
+       if (n_entries > DM_STAT_MAX_ENTRIES)
+               return -EOVERFLOW;
+
         shared_alloc_size = struct_size(s, stat_shared, n_entries);
         if ((shared_alloc_size - sizeof(struct dm_stat)) / sizeof(struct dm_stat_shared) != n_entries)
                 return -EOVERFLOW;
@@ -297,6 +303,9 @@ static int dm_stats_create(struct dm_stats *stats, sector_t start, sector_t end,
         if (histogram_alloc_size / (n_histogram_entries + 1) != (size_t)n_entries * sizeof(unsigned long long))
                 return -EOVERFLOW;
  
+       if ((n_histogram_entries + 1) * (size_t)n_entries > DM_STAT_MAX_HISTOGRAM_ENTRIES)
+               return -EOVERFLOW;
+
         if (!check_shared_memory(shared_alloc_size + histogram_alloc_size +
                                  num_possible_cpus() * (percpu_alloc_size + histogram_alloc_size)))
                 return -ENOMEM;
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c

index 260b5b8f2b0d7e9352ed9ed9376a91504ee10c9d..41f1d731ae5ac275d90fbc334666438187da02b4 100644 (file)
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -129,7 +129,12 @@ static int alloc_targets(struct dm_table *t, unsigned int num)
  int dm_table_create(struct dm_table **result, blk_mode_t mode,
                     unsigned int num_targets, struct mapped_device *md)
  {
-       struct dm_table *t = kzalloc(sizeof(*t), GFP_KERNEL);
+       struct dm_table *t;
+
+       if (num_targets > DM_MAX_TARGETS)
+               return -EOVERFLOW;
+
+       t = kzalloc(sizeof(*t), GFP_KERNEL);
  
         if (!t)
                 return -ENOMEM;
@@ -144,7 +149,7 @@ int dm_table_create(struct dm_table **result, blk_mode_t mode,
  
         if (!num_targets) {
                 kfree(t);
-               return -ENOMEM;
+               return -EOVERFLOW;
         }
  
         if (alloc_targets(t, num_targets)) {
diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c

index 14e58ae705218f71923b99bdfc1d195e6a45e658..1b591bfa90d5d6463016e22183dbb5f071e94a75 100644 (file)
--- a/drivers/md/dm-verity-target.c
+++ b/drivers/md/dm-verity-target.c
@@ -482,6 +482,63 @@ int verity_for_bv_block(struct dm_verity *v, struct dm_verity_io *io,
         return 0;
  }
  
+static int verity_recheck_copy(struct dm_verity *v, struct dm_verity_io *io,
+                              u8 *data, size_t len)
+{
+       memcpy(data, io->recheck_buffer, len);
+       io->recheck_buffer += len;
+
+       return 0;
+}
+
+static noinline int verity_recheck(struct dm_verity *v, struct dm_verity_io *io,
+                                  struct bvec_iter start, sector_t cur_block)
+{
+       struct page *page;
+       void *buffer;
+       int r;
+       struct dm_io_request io_req;
+       struct dm_io_region io_loc;
+
+       page = mempool_alloc(&v->recheck_pool, GFP_NOIO);
+       buffer = page_to_virt(page);
+
+       io_req.bi_opf = REQ_OP_READ;
+       io_req.mem.type = DM_IO_KMEM;
+       io_req.mem.ptr.addr = buffer;
+       io_req.notify.fn = NULL;
+       io_req.client = v->io;
+       io_loc.bdev = v->data_dev->bdev;
+       io_loc.sector = cur_block << (v->data_dev_block_bits - SECTOR_SHIFT);
+       io_loc.count = 1 << (v->data_dev_block_bits - SECTOR_SHIFT);
+       r = dm_io(&io_req, 1, &io_loc, NULL);
+       if (unlikely(r))
+               goto free_ret;
+
+       r = verity_hash(v, verity_io_hash_req(v, io), buffer,
+                       1 << v->data_dev_block_bits,
+                       verity_io_real_digest(v, io), true);
+       if (unlikely(r))
+               goto free_ret;
+
+       if (memcmp(verity_io_real_digest(v, io),
+                  verity_io_want_digest(v, io), v->digest_size)) {
+               r = -EIO;
+               goto free_ret;
+       }
+
+       io->recheck_buffer = buffer;
+       r = verity_for_bv_block(v, io, &start, verity_recheck_copy);
+       if (unlikely(r))
+               goto free_ret;
+
+       r = 0;
+free_ret:
+       mempool_free(page, &v->recheck_pool);
+
+       return r;
+}
+
  static int verity_bv_zero(struct dm_verity *v, struct dm_verity_io *io,
                           u8 *data, size_t len)
  {
@@ -508,9 +565,7 @@ static int verity_verify_io(struct dm_verity_io *io)
  {
         bool is_zero;
         struct dm_verity *v = io->v;
-#if defined(CONFIG_DM_VERITY_FEC)
         struct bvec_iter start;
-#endif
         struct bvec_iter iter_copy;
         struct bvec_iter *iter;
         struct crypto_wait wait;
@@ -561,10 +616,7 @@ static int verity_verify_io(struct dm_verity_io *io)
                 if (unlikely(r < 0))
                         return r;
  
-#if defined(CONFIG_DM_VERITY_FEC)
-               if (verity_fec_is_enabled(v))
-                       start = *iter;
-#endif
+               start = *iter;
                 r = verity_for_io_block(v, io, iter, &wait);
                 if (unlikely(r < 0))
                         return r;
@@ -586,6 +638,10 @@ static int verity_verify_io(struct dm_verity_io *io)
                          * tasklet since it may sleep, so fallback to work-queue.
                          */
                         return -EAGAIN;
+               } else if (verity_recheck(v, io, start, cur_block) == 0) {
+                       if (v->validated_blocks)
+                               set_bit(cur_block, v->validated_blocks);
+                       continue;
  #if defined(CONFIG_DM_VERITY_FEC)
                 } else if (verity_fec_decode(v, io, DM_VERITY_BLOCK_TYPE_DATA,
                                              cur_block, NULL, &start) == 0) {
@@ -645,23 +701,6 @@ static void verity_work(struct work_struct *w)
         verity_finish_io(io, errno_to_blk_status(verity_verify_io(io)));
  }
  
-static void verity_tasklet(unsigned long data)
-{
-       struct dm_verity_io *io = (struct dm_verity_io *)data;
-       int err;
-
-       io->in_tasklet = true;
-       err = verity_verify_io(io);
-       if (err == -EAGAIN || err == -ENOMEM) {
-               /* fallback to retrying with work-queue */
-               INIT_WORK(&io->work, verity_work);
-               queue_work(io->v->verify_wq, &io->work);
-               return;
-       }
-
-       verity_finish_io(io, errno_to_blk_status(err));
-}
-
  static void verity_end_io(struct bio *bio)
  {
         struct dm_verity_io *io = bio->bi_private;
@@ -674,13 +713,8 @@ static void verity_end_io(struct bio *bio)
                 return;
         }
  
-       if (static_branch_unlikely(&use_tasklet_enabled) && io->v->use_tasklet) {
-               tasklet_init(&io->tasklet, verity_tasklet, (unsigned long)io);
-               tasklet_schedule(&io->tasklet);
-       } else {
-               INIT_WORK(&io->work, verity_work);
-               queue_work(io->v->verify_wq, &io->work);
-       }
+       INIT_WORK(&io->work, verity_work);
+       queue_work(io->v->verify_wq, &io->work);
  }
  
  /*
@@ -963,6 +997,10 @@ static void verity_dtr(struct dm_target *ti)
         if (v->verify_wq)
                 destroy_workqueue(v->verify_wq);
  
+       mempool_exit(&v->recheck_pool);
+       if (v->io)
+               dm_io_client_destroy(v->io);
+
         if (v->bufio)
                 dm_bufio_client_destroy(v->bufio);
  
@@ -1401,6 +1439,20 @@ static int verity_ctr(struct dm_target *ti, unsigned int argc, char **argv)
         }
         v->hash_blocks = hash_position;
  
+       r = mempool_init_page_pool(&v->recheck_pool, 1, 0);
+       if (unlikely(r)) {
+               ti->error = "Cannot allocate mempool";
+               goto bad;
+       }
+
+       v->io = dm_io_client_create();
+       if (IS_ERR(v->io)) {
+               r = PTR_ERR(v->io);
+               v->io = NULL;
+               ti->error = "Cannot allocate dm io";
+               goto bad;
+       }
+
         v->bufio = dm_bufio_client_create(v->hash_dev->bdev,
                 1 << v->hash_dev_block_bits, 1, sizeof(struct buffer_aux),
                 dm_bufio_alloc_callback, NULL,
@@ -1508,7 +1560,7 @@ int dm_verity_get_root_digest(struct dm_target *ti, u8 **root_digest, unsigned i
  static struct target_type verity_target = {
         .name           = "verity",
         .features       = DM_TARGET_IMMUTABLE,
-       .version        = {1, 9, 0},
+       .version        = {1, 10, 0},
         .module         = THIS_MODULE,
         .ctr            = verity_ctr,
         .dtr            = verity_dtr,
diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h

index f9d522c870e61665d87271f66c690138db42108f..db93a91169d5e6de31d344a6f37589bbc0bdb654 100644 (file)
--- a/drivers/md/dm-verity.h
+++ b/drivers/md/dm-verity.h
@@ -11,6 +11,7 @@
  #ifndef DM_VERITY_H
  #define DM_VERITY_H
  
+#include <linux/dm-io.h>
  #include <linux/dm-bufio.h>
  #include <linux/device-mapper.h>
  #include <linux/interrupt.h>
@@ -68,6 +69,9 @@ struct dm_verity {
         unsigned long *validated_blocks; /* bitset blocks validated */
  
         char *signature_key_desc; /* signature keyring reference */
+
+       struct dm_io_client *io;
+       mempool_t recheck_pool;
  };
  
  struct dm_verity_io {
@@ -76,14 +80,15 @@ struct dm_verity_io {
         /* original value of bio->bi_end_io */
         bio_end_io_t *orig_bi_end_io;
  
+       struct bvec_iter iter;
+
         sector_t block;
         unsigned int n_blocks;
         bool in_tasklet;
  
-       struct bvec_iter iter;
-
         struct work_struct work;
-       struct tasklet_struct tasklet;
+
+       char *recheck_buffer;
  
         /*
          * Three variably-size fields follow this struct:
diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c

index 074cb785eafc19172b9ebf4b6a6f2ae4591563d6..b463c28c39ad34ca23b3d2433811384901171d80 100644 (file)
--- a/drivers/md/dm-writecache.c
+++ b/drivers/md/dm-writecache.c
@@ -299,7 +299,7 @@ static int persistent_memory_claim(struct dm_writecache *wc)
                 long i;
  
                 wc->memory_map = NULL;
-               pages = kvmalloc_array(p, sizeof(struct page *), GFP_KERNEL);
+               pages = vmalloc_array(p, sizeof(struct page *));
                 if (!pages) {
                         r = -ENOMEM;
                         goto err2;
@@ -330,7 +330,7 @@ static int persistent_memory_claim(struct dm_writecache *wc)
                         r = -ENOMEM;
                         goto err3;
                 }
-               kvfree(pages);
+               vfree(pages);
                 wc->memory_vmapped = true;
         }
  
@@ -341,7 +341,7 @@ static int persistent_memory_claim(struct dm_writecache *wc)
  
         return 0;
  err3:
-       kvfree(pages);
+       vfree(pages);
  err2:
         dax_read_unlock(id);
  err1:
@@ -962,7 +962,7 @@ static int writecache_alloc_entries(struct dm_writecache *wc)
  
         if (wc->entries)
                 return 0;
-       wc->entries = vmalloc(array_size(sizeof(struct wc_entry), wc->n_blocks));
+       wc->entries = vmalloc_array(wc->n_blocks, sizeof(struct wc_entry));
         if (!wc->entries)
                 return -ENOMEM;
         for (b = 0; b < wc->n_blocks; b++) {
diff --git a/drivers/md/md.c b/drivers/md/md.c

index 2266358d807466f95d02b431d09ee39805dff5e8..9e41a9aaba8b5cab9d513ef047718071551b35f6 100644 (file)
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -579,8 +579,12 @@ static void submit_flushes(struct work_struct *ws)
                         rcu_read_lock();
                 }
         rcu_read_unlock();
-       if (atomic_dec_and_test(&mddev->flush_pending))
+       if (atomic_dec_and_test(&mddev->flush_pending)) {
+               /* The pair is percpu_ref_get() from md_flush_request() */
+               percpu_ref_put(&mddev->active_io);
+
                 queue_work(md_wq, &mddev->flush_work);
+       }
  }
  
  static void md_submit_flush_data(struct work_struct *ws)
@@ -8788,12 +8792,16 @@ void md_do_sync(struct md_thread *thread)
         int ret;
  
         /* just incase thread restarts... */
-       if (test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
-           test_bit(MD_RECOVERY_WAIT, &mddev->recovery))
+       if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
                 return;
-       if (!md_is_rdwr(mddev)) {/* never try to sync a read-only array */
+
+       if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
+               goto skip;
+
+       if (test_bit(MD_RECOVERY_WAIT, &mddev->recovery) ||
+           !md_is_rdwr(mddev)) {/* never try to sync a read-only array */
                 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
-               return;
+               goto skip;
         }
  
         if (mddev_is_clustered(mddev)) {
@@ -9368,13 +9376,19 @@ static void md_start_sync(struct work_struct *ws)
         struct mddev *mddev = container_of(ws, struct mddev, sync_work);
         int spares = 0;
         bool suspend = false;
+       char *name;
  
-       if (md_spares_need_change(mddev))
+       /*
+        * If reshape is still in progress, spares won't be added or removed
+        * from conf until reshape is done.
+        */
+       if (mddev->reshape_position == MaxSector &&
+           md_spares_need_change(mddev)) {
                 suspend = true;
+               mddev_suspend(mddev, false);
+       }
  
-       suspend ? mddev_suspend_and_lock_nointr(mddev) :
-                 mddev_lock_nointr(mddev);
-
+       mddev_lock_nointr(mddev);
         if (!md_is_rdwr(mddev)) {
                 /*
                  * On a read-only array we can:
@@ -9400,8 +9414,10 @@ static void md_start_sync(struct work_struct *ws)
         if (spares)
                 md_bitmap_write_all(mddev->bitmap);
  
+       name = test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) ?
+                       "reshape" : "resync";
         rcu_assign_pointer(mddev->sync_thread,
-                          md_register_thread(md_do_sync, mddev, "resync"));
+                          md_register_thread(md_do_sync, mddev, name));
         if (!mddev->sync_thread) {
                 pr_warn("%s: could not start resync thread...\n",
                         mdname(mddev));
@@ -9445,6 +9461,20 @@ not_running:
                 sysfs_notify_dirent_safe(mddev->sysfs_action);
  }
  
+static void unregister_sync_thread(struct mddev *mddev)
+{
+       if (!test_bit(MD_RECOVERY_DONE, &mddev->recovery)) {
+               /* resync/recovery still happening */
+               clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+               return;
+       }
+
+       if (WARN_ON_ONCE(!mddev->sync_thread))
+               return;
+
+       md_reap_sync_thread(mddev);
+}
+
  /*
   * This routine is regularly called by all per-raid-array threads to
   * deal with generic issues like resync and super-block update.
@@ -9469,9 +9499,6 @@ not_running:
   */
  void md_check_recovery(struct mddev *mddev)
  {
-       if (READ_ONCE(mddev->suspended))
-               return;
-
         if (mddev->bitmap)
                 md_bitmap_daemon_work(mddev);
  
@@ -9485,7 +9512,8 @@ void md_check_recovery(struct mddev *mddev)
         }
  
         if (!md_is_rdwr(mddev) &&
-           !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
+           !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) &&
+           !test_bit(MD_RECOVERY_DONE, &mddev->recovery))
                 return;
         if ( ! (
                 (mddev->sb_flags & ~ (1<<MD_SB_CHANGE_PENDING)) ||
@@ -9507,8 +9535,7 @@ void md_check_recovery(struct mddev *mddev)
                         struct md_rdev *rdev;
  
                         if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
-                               /* sync_work already queued. */
-                               clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+                               unregister_sync_thread(mddev);
                                 goto unlock;
                         }
  
@@ -9571,16 +9598,7 @@ void md_check_recovery(struct mddev *mddev)
                  * still set.
                  */
                 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
-                       if (!test_bit(MD_RECOVERY_DONE, &mddev->recovery)) {
-                               /* resync/recovery still happening */
-                               clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
-                               goto unlock;
-                       }
-
-                       if (WARN_ON_ONCE(!mddev->sync_thread))
-                               goto unlock;
-
-                       md_reap_sync_thread(mddev);
+                       unregister_sync_thread(mddev);
                         goto unlock;
                 }
  
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c

index 7412066ea22c7a525ed3e9ff1cfc1b5db2b2b527..a5f8419e2df1d5624f587a3615c3e46348532701 100644 (file)
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -4175,11 +4175,7 @@ static int raid10_run(struct mddev *mddev)
                 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
                 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
                 set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
-               set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
-               rcu_assign_pointer(mddev->sync_thread,
-                       md_register_thread(md_do_sync, mddev, "reshape"));
-               if (!mddev->sync_thread)
-                       goto out_free_conf;
+               set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
         }
  
         return 0;
@@ -4573,16 +4569,8 @@ out:
         clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
         clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
         set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
-       set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
-
-       rcu_assign_pointer(mddev->sync_thread,
-                          md_register_thread(md_do_sync, mddev, "reshape"));
-       if (!mddev->sync_thread) {
-               ret = -EAGAIN;
-               goto abort;
-       }
+       set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
         conf->reshape_checkpoint = jiffies;
-       md_wakeup_thread(mddev->sync_thread);
         md_new_event();
         return 0;
  
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c

index 8497880135ee4269ef329e58a10757870ae2df18..6a7a32f7fb912019754f75338104373009604051 100644 (file)
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -7936,11 +7936,7 @@ static int raid5_run(struct mddev *mddev)
                 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
                 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
                 set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
-               set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
-               rcu_assign_pointer(mddev->sync_thread,
-                       md_register_thread(md_do_sync, mddev, "reshape"));
-               if (!mddev->sync_thread)
-                       goto abort;
+               set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
         }
  
         /* Ok, everything is just fine now */
@@ -8506,29 +8502,8 @@ static int raid5_start_reshape(struct mddev *mddev)
         clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
         clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
         set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
-       set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
-       rcu_assign_pointer(mddev->sync_thread,
-                          md_register_thread(md_do_sync, mddev, "reshape"));
-       if (!mddev->sync_thread) {
-               mddev->recovery = 0;
-               spin_lock_irq(&conf->device_lock);
-               write_seqcount_begin(&conf->gen_lock);
-               mddev->raid_disks = conf->raid_disks = conf->previous_raid_disks;
-               mddev->new_chunk_sectors =
-                       conf->chunk_sectors = conf->prev_chunk_sectors;
-               mddev->new_layout = conf->algorithm = conf->prev_algo;
-               rdev_for_each(rdev, mddev)
-                       rdev->new_data_offset = rdev->data_offset;
-               smp_wmb();
-               conf->generation --;
-               conf->reshape_progress = MaxSector;
-               mddev->reshape_position = MaxSector;
-               write_seqcount_end(&conf->gen_lock);
-               spin_unlock_irq(&conf->device_lock);
-               return -EAGAIN;
-       }
+       set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
         conf->reshape_checkpoint = jiffies;
-       md_wakeup_thread(mddev->sync_thread);
         md_new_event();
         return 0;
  }
diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-capture.c b/drivers/media/platform/rockchip/rkisp1/rkisp1-capture.c

index aebd3c12020bfd5d104c089354da0557de93547d..c381c22135a217b71e82f8282699cfbe14749ded 100644 (file)
--- a/drivers/media/platform/rockchip/rkisp1/rkisp1-capture.c
+++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-capture.c
@@ -725,6 +725,9 @@ irqreturn_t rkisp1_capture_isr(int irq, void *ctx)
         unsigned int i;
         u32 status;
  
+       if (!rkisp1->irqs_enabled)
+               return IRQ_NONE;
+
         status = rkisp1_read(rkisp1, RKISP1_CIF_MI_MIS);
         if (!status)
                 return IRQ_NONE;
diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-common.h b/drivers/media/platform/rockchip/rkisp1/rkisp1-common.h

index 4b6b28c05b8916b7e5e079b6d2791009907c22db..b757f75edecf75256525e378151fe217c88ef2c4 100644 (file)
--- a/drivers/media/platform/rockchip/rkisp1/rkisp1-common.h
+++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-common.h
@@ -450,6 +450,7 @@ struct rkisp1_debug {
   * @debug:        debug params to be exposed on debugfs
   * @info:         version-specific ISP information
   * @irqs:          IRQ line numbers
+ * @irqs_enabled:  the hardware is enabled and can cause interrupts
   */
  struct rkisp1_device {
         void __iomem *base_addr;
@@ -471,6 +472,7 @@ struct rkisp1_device {
         struct rkisp1_debug debug;
         const struct rkisp1_info *info;
         int irqs[RKISP1_NUM_IRQS];
+       bool irqs_enabled;
  };
  
  /*
diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-csi.c b/drivers/media/platform/rockchip/rkisp1/rkisp1-csi.c

index b6e47e2f1b94916e51ec86b38d12b37ae186609d..4202642e052392a946761ecb76d3cfa771956e07 100644 (file)
--- a/drivers/media/platform/rockchip/rkisp1/rkisp1-csi.c
+++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-csi.c
@@ -196,6 +196,9 @@ irqreturn_t rkisp1_csi_isr(int irq, void *ctx)
         struct rkisp1_device *rkisp1 = dev_get_drvdata(dev);
         u32 val, status;
  
+       if (!rkisp1->irqs_enabled)
+               return IRQ_NONE;
+
         status = rkisp1_read(rkisp1, RKISP1_CIF_MIPI_MIS);
         if (!status)
                 return IRQ_NONE;
diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c b/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c

index f96f821a7b50d0f10db51932d2b82986dcb16957..73cf08a740118c05328fdd3f1a9d52a6e935c4e0 100644 (file)
--- a/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c
+++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c
@@ -305,6 +305,24 @@ static int __maybe_unused rkisp1_runtime_suspend(struct device *dev)
  {
         struct rkisp1_device *rkisp1 = dev_get_drvdata(dev);
  
+       rkisp1->irqs_enabled = false;
+       /* Make sure the IRQ handler will see the above */
+       mb();
+
+       /*
+        * Wait until any running IRQ handler has returned. The IRQ handler
+        * may get called even after this (as it's a shared interrupt line)
+        * but the 'irqs_enabled' flag will make the handler return immediately.
+        */
+       for (unsigned int il = 0; il < ARRAY_SIZE(rkisp1->irqs); ++il) {
+               if (rkisp1->irqs[il] == -1)
+                       continue;
+
+               /* Skip if the irq line is the same as previous */
+               if (il == 0 || rkisp1->irqs[il - 1] != rkisp1->irqs[il])
+                       synchronize_irq(rkisp1->irqs[il]);
+       }
+
         clk_bulk_disable_unprepare(rkisp1->clk_size, rkisp1->clks);
         return pinctrl_pm_select_sleep_state(dev);
  }
@@ -321,6 +339,10 @@ static int __maybe_unused rkisp1_runtime_resume(struct device *dev)
         if (ret)
                 return ret;
  
+       rkisp1->irqs_enabled = true;
+       /* Make sure the IRQ handler will see the above */
+       mb();
+
         return 0;
  }
  
@@ -559,7 +581,7 @@ static int rkisp1_probe(struct platform_device *pdev)
                                 rkisp1->irqs[il] = irq;
                 }
  
-               ret = devm_request_irq(dev, irq, info->isrs[i].isr, 0,
+               ret = devm_request_irq(dev, irq, info->isrs[i].isr, IRQF_SHARED,
                                        dev_driver_string(dev), dev);
                 if (ret) {
                         dev_err(dev, "request irq failed: %d\n", ret);
diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-isp.c b/drivers/media/platform/rockchip/rkisp1/rkisp1-isp.c

index f00873d31c42b702d239e9e9fefcb8eddb599275..78a1f7a1499be84f15b94d75b30266dfb8c720ce 100644 (file)
--- a/drivers/media/platform/rockchip/rkisp1/rkisp1-isp.c
+++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-isp.c
@@ -976,6 +976,9 @@ irqreturn_t rkisp1_isp_isr(int irq, void *ctx)
         struct rkisp1_device *rkisp1 = dev_get_drvdata(dev);
         u32 status, isp_err;
  
+       if (!rkisp1->irqs_enabled)
+               return IRQ_NONE;
+
         status = rkisp1_read(rkisp1, RKISP1_CIF_ISP_MIS);
         if (!status)
                 return IRQ_NONE;
diff --git a/drivers/media/rc/Kconfig b/drivers/media/rc/Kconfig

index 2afe67ffa285e3755f35c5842827160b93f573b5..74d69ce22a33e801762bc156c8c40289b2b2d4cb 100644 (file)
--- a/drivers/media/rc/Kconfig
+++ b/drivers/media/rc/Kconfig
@@ -319,6 +319,7 @@ config IR_PWM_TX
         tristate "PWM IR transmitter"
         depends on LIRC
         depends on PWM
+       depends on HIGH_RES_TIMERS
         depends on OF
         help
            Say Y if you want to use a PWM based IR transmitter. This is
diff --git a/drivers/media/rc/bpf-lirc.c b/drivers/media/rc/bpf-lirc.c

index fe17c7f98e8101afdae3d608ab12a2ef7f971d0e..52d82cbe7685f5b5adadf4448a171fcb146612b8 100644 (file)
--- a/drivers/media/rc/bpf-lirc.c
+++ b/drivers/media/rc/bpf-lirc.c
@@ -253,7 +253,7 @@ int lirc_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog)
         if (attr->attach_flags)
                 return -EINVAL;
  
-       rcdev = rc_dev_get_from_fd(attr->target_fd);
+       rcdev = rc_dev_get_from_fd(attr->target_fd, true);
         if (IS_ERR(rcdev))
                 return PTR_ERR(rcdev);
  
@@ -278,7 +278,7 @@ int lirc_prog_detach(const union bpf_attr *attr)
         if (IS_ERR(prog))
                 return PTR_ERR(prog);
  
-       rcdev = rc_dev_get_from_fd(attr->target_fd);
+       rcdev = rc_dev_get_from_fd(attr->target_fd, true);
         if (IS_ERR(rcdev)) {
                 bpf_prog_put(prog);
                 return PTR_ERR(rcdev);
@@ -303,7 +303,7 @@ int lirc_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr)
         if (attr->query.query_flags)
                 return -EINVAL;
  
-       rcdev = rc_dev_get_from_fd(attr->query.target_fd);
+       rcdev = rc_dev_get_from_fd(attr->query.target_fd, false);
         if (IS_ERR(rcdev))
                 return PTR_ERR(rcdev);
  
diff --git a/drivers/media/rc/ir_toy.c b/drivers/media/rc/ir_toy.c

index 1968067092594979942f030af29bf4b484b5fd09..69e630d85262f65f413ee8c9d092ea85cee01c91 100644 (file)
--- a/drivers/media/rc/ir_toy.c
+++ b/drivers/media/rc/ir_toy.c
@@ -332,6 +332,7 @@ static int irtoy_tx(struct rc_dev *rc, uint *txbuf, uint count)
                             sizeof(COMMAND_SMODE_EXIT), STATE_COMMAND_NO_RESP);
         if (err) {
                 dev_err(irtoy->dev, "exit sample mode: %d\n", err);
+               kfree(buf);
                 return err;
         }
  
@@ -339,6 +340,7 @@ static int irtoy_tx(struct rc_dev *rc, uint *txbuf, uint count)
                             sizeof(COMMAND_SMODE_ENTER), STATE_COMMAND);
         if (err) {
                 dev_err(irtoy->dev, "enter sample mode: %d\n", err);
+               kfree(buf);
                 return err;
         }
  
diff --git a/drivers/media/rc/lirc_dev.c b/drivers/media/rc/lirc_dev.c

index a537734832c5080498d263428a96d7b1d13dcb88..caad59f76793f750f757c8fe5e58fe569b6b4322 100644 (file)
--- a/drivers/media/rc/lirc_dev.c
+++ b/drivers/media/rc/lirc_dev.c
@@ -814,7 +814,7 @@ void __exit lirc_dev_exit(void)
         unregister_chrdev_region(lirc_base_dev, RC_DEV_MAX);
  }
  
-struct rc_dev *rc_dev_get_from_fd(int fd)
+struct rc_dev *rc_dev_get_from_fd(int fd, bool write)
  {
         struct fd f = fdget(fd);
         struct lirc_fh *fh;
@@ -828,6 +828,9 @@ struct rc_dev *rc_dev_get_from_fd(int fd)
                 return ERR_PTR(-EINVAL);
         }
  
+       if (write && !(f.file->f_mode & FMODE_WRITE))
+               return ERR_PTR(-EPERM);
+
         fh = f.file->private_data;
         dev = fh->rc;
  
diff --git a/drivers/media/rc/rc-core-priv.h b/drivers/media/rc/rc-core-priv.h

index ef1e95e1af7fcccda49324e375b940cc92c627f2..7df949fc65e2b68bf88c12643410330fc1ad4635 100644 (file)
--- a/drivers/media/rc/rc-core-priv.h
+++ b/drivers/media/rc/rc-core-priv.h
@@ -325,7 +325,7 @@ void lirc_raw_event(struct rc_dev *dev, struct ir_raw_event ev);
  void lirc_scancode_event(struct rc_dev *dev, struct lirc_scancode *lsc);
  int lirc_register(struct rc_dev *dev);
  void lirc_unregister(struct rc_dev *dev);
-struct rc_dev *rc_dev_get_from_fd(int fd);
+struct rc_dev *rc_dev_get_from_fd(int fd, bool write);
  #else
  static inline int lirc_dev_init(void) { return 0; }
  static inline void lirc_dev_exit(void) {}
diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c

index 1c6c62a7f7f5535f4c1025ee0d957006a4c5deb4..03319a1fa97fda2bf967dd425af9aef83fc1602d 100644 (file)
--- a/drivers/misc/fastrpc.c
+++ b/drivers/misc/fastrpc.c
@@ -2191,7 +2191,7 @@ static int fastrpc_cb_remove(struct platform_device *pdev)
         int i;
  
         spin_lock_irqsave(&cctx->lock, flags);
-       for (i = 1; i < FASTRPC_MAX_SESSIONS; i++) {
+       for (i = 0; i < FASTRPC_MAX_SESSIONS; i++) {
                 if (cctx->session[i].sid == sess->sid) {
                         cctx->session[i].valid = false;
                         cctx->sesscount--;
diff --git a/drivers/misc/open-dice.c b/drivers/misc/open-dice.c

index 8aea2d070a40c23e0a0ed9495d8039f9fa6804ac..d279a4f195e2a343a8332d25c25e85a89b6ac88f 100644 (file)
--- a/drivers/misc/open-dice.c
+++ b/drivers/misc/open-dice.c
@@ -140,7 +140,6 @@ static int __init open_dice_probe(struct platform_device *pdev)
                 return -ENOMEM;
  
         *drvdata = (struct open_dice_drvdata){
-               .lock = __MUTEX_INITIALIZER(drvdata->lock),
                 .rmem = rmem,
                 .misc = (struct miscdevice){
                         .parent = dev,
@@ -150,6 +149,7 @@ static int __init open_dice_probe(struct platform_device *pdev)
                         .mode   = 0600,
                 },
         };
+       mutex_init(&drvdata->lock);
  
         /* Index overflow check not needed, misc_register() will fail. */
         snprintf(drvdata->name, sizeof(drvdata->name), DRIVER_NAME"%u", dev_idx++);
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c

index f410bee501328f6af96b4f0029d4856e45e06766..58ed7193a3ca460fe58a46427306b385a40a2d3e 100644 (file)
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -1015,10 +1015,12 @@ static int mmc_select_bus_width(struct mmc_card *card)
         static unsigned ext_csd_bits[] = {
                 EXT_CSD_BUS_WIDTH_8,
                 EXT_CSD_BUS_WIDTH_4,
+               EXT_CSD_BUS_WIDTH_1,
         };
         static unsigned bus_widths[] = {
                 MMC_BUS_WIDTH_8,
                 MMC_BUS_WIDTH_4,
+               MMC_BUS_WIDTH_1,
         };
         struct mmc_host *host = card->host;
         unsigned idx, bus_width = 0;
diff --git a/drivers/mmc/core/slot-gpio.c b/drivers/mmc/core/slot-gpio.c

index 2a2d949a9344ea78b540337d471996f2ec77d53e..39f45c2b6de8a885e12af08f302d8bc2dce15d61 100644 (file)
--- a/drivers/mmc/core/slot-gpio.c
+++ b/drivers/mmc/core/slot-gpio.c
@@ -75,11 +75,15 @@ EXPORT_SYMBOL(mmc_gpio_set_cd_irq);
  int mmc_gpio_get_ro(struct mmc_host *host)
  {
         struct mmc_gpio *ctx = host->slot.handler_priv;
+       int cansleep;
  
         if (!ctx || !ctx->ro_gpio)
                 return -ENOSYS;
  
-       return gpiod_get_value_cansleep(ctx->ro_gpio);
+       cansleep = gpiod_cansleep(ctx->ro_gpio);
+       return cansleep ?
+               gpiod_get_value_cansleep(ctx->ro_gpio) :
+               gpiod_get_value(ctx->ro_gpio);
  }
  EXPORT_SYMBOL(mmc_gpio_get_ro);
  
diff --git a/drivers/mmc/host/mmci_stm32_sdmmc.c b/drivers/mmc/host/mmci_stm32_sdmmc.c

index 35067e1e6cd8017b1bb37683f9dda6169af5cbf1..f5da7f9baa52d4b29cd396f0aa88e1ff7891666c 100644 (file)
--- a/drivers/mmc/host/mmci_stm32_sdmmc.c
+++ b/drivers/mmc/host/mmci_stm32_sdmmc.c
@@ -225,6 +225,8 @@ static int sdmmc_idma_start(struct mmci_host *host, unsigned int *datactrl)
         struct scatterlist *sg;
         int i;
  
+       host->dma_in_progress = true;
+
         if (!host->variant->dma_lli || data->sg_len == 1 ||
             idma->use_bounce_buffer) {
                 u32 dma_addr;
@@ -263,9 +265,30 @@ static int sdmmc_idma_start(struct mmci_host *host, unsigned int *datactrl)
         return 0;
  }
  
+static void sdmmc_idma_error(struct mmci_host *host)
+{
+       struct mmc_data *data = host->data;
+       struct sdmmc_idma *idma = host->dma_priv;
+
+       if (!dma_inprogress(host))
+               return;
+
+       writel_relaxed(0, host->base + MMCI_STM32_IDMACTRLR);
+       host->dma_in_progress = false;
+       data->host_cookie = 0;
+
+       if (!idma->use_bounce_buffer)
+               dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len,
+                            mmc_get_dma_dir(data));
+}
+
  static void sdmmc_idma_finalize(struct mmci_host *host, struct mmc_data *data)
  {
+       if (!dma_inprogress(host))
+               return;
+
         writel_relaxed(0, host->base + MMCI_STM32_IDMACTRLR);
+       host->dma_in_progress = false;
  
         if (!data->host_cookie)
                 sdmmc_idma_unprep_data(host, data, 0);
@@ -676,6 +699,7 @@ static struct mmci_host_ops sdmmc_variant_ops = {
         .dma_setup = sdmmc_idma_setup,
         .dma_start = sdmmc_idma_start,
         .dma_finalize = sdmmc_idma_finalize,
+       .dma_error = sdmmc_idma_error,
         .set_clkreg = mmci_sdmmc_set_clkreg,
         .set_pwrreg = mmci_sdmmc_set_pwrreg,
         .busy_complete = sdmmc_busy_complete,
diff --git a/drivers/mmc/host/sdhci-pci-o2micro.c b/drivers/mmc/host/sdhci-pci-o2micro.c

index 7bfee28116af12ebdf08efb7b0e51e68cb602956..d4a02184784a3458b55b601d1fba1216e1ef3149 100644 (file)
--- a/drivers/mmc/host/sdhci-pci-o2micro.c
+++ b/drivers/mmc/host/sdhci-pci-o2micro.c
@@ -693,6 +693,35 @@ static int sdhci_pci_o2_init_sd_express(struct mmc_host *mmc, struct mmc_ios *io
         return 0;
  }
  
+static void sdhci_pci_o2_set_power(struct sdhci_host *host, unsigned char mode,  unsigned short vdd)
+{
+       struct sdhci_pci_chip *chip;
+       struct sdhci_pci_slot *slot = sdhci_priv(host);
+       u32 scratch_32 = 0;
+       u8 scratch_8 = 0;
+
+       chip = slot->chip;
+
+       if (mode == MMC_POWER_OFF) {
+               /* UnLock WP */
+               pci_read_config_byte(chip->pdev, O2_SD_LOCK_WP, &scratch_8);
+               scratch_8 &= 0x7f;
+               pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch_8);
+
+               /* Set PCR 0x354[16] to switch Clock Source back to OPE Clock */
+               pci_read_config_dword(chip->pdev, O2_SD_OUTPUT_CLK_SOURCE_SWITCH, &scratch_32);
+               scratch_32 &= ~(O2_SD_SEL_DLL);
+               pci_write_config_dword(chip->pdev, O2_SD_OUTPUT_CLK_SOURCE_SWITCH, scratch_32);
+
+               /* Lock WP */
+               pci_read_config_byte(chip->pdev, O2_SD_LOCK_WP, &scratch_8);
+               scratch_8 |= 0x80;
+               pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch_8);
+       }
+
+       sdhci_set_power(host, mode, vdd);
+}
+
  static int sdhci_pci_o2_probe_slot(struct sdhci_pci_slot *slot)
  {
         struct sdhci_pci_chip *chip;
@@ -1051,6 +1080,7 @@ static const struct sdhci_ops sdhci_pci_o2_ops = {
         .set_bus_width = sdhci_set_bus_width,
         .reset = sdhci_reset,
         .set_uhs_signaling = sdhci_set_uhs_signaling,
+       .set_power = sdhci_pci_o2_set_power,
  };
  
  const struct sdhci_pci_fixes sdhci_o2 = {
diff --git a/drivers/mmc/host/sdhci-xenon-phy.c b/drivers/mmc/host/sdhci-xenon-phy.c

index 8cf3a375de659a6d98b7dcfc2f4e2be09f7c4a5d..cc9d28b75eb911733d847a1d0c19cf24d9a3f755 100644 (file)
--- a/drivers/mmc/host/sdhci-xenon-phy.c
+++ b/drivers/mmc/host/sdhci-xenon-phy.c
@@ -11,6 +11,7 @@
  #include <linux/slab.h>
  #include <linux/delay.h>
  #include <linux/ktime.h>
+#include <linux/iopoll.h>
  #include <linux/of_address.h>
  
  #include "sdhci-pltfm.h"
@@ -109,6 +110,8 @@
  #define XENON_EMMC_PHY_LOGIC_TIMING_ADJUST     (XENON_EMMC_PHY_REG_BASE + 0x18)
  #define XENON_LOGIC_TIMING_VALUE               0x00AA8977
  
+#define XENON_MAX_PHY_TIMEOUT_LOOPS            100
+
  /*
   * List offset of PHY registers and some special register values
   * in eMMC PHY 5.0 or eMMC PHY 5.1
@@ -216,6 +219,19 @@ static int xenon_alloc_emmc_phy(struct sdhci_host *host)
         return 0;
  }
  
+static int xenon_check_stability_internal_clk(struct sdhci_host *host)
+{
+       u32 reg;
+       int err;
+
+       err = read_poll_timeout(sdhci_readw, reg, reg & SDHCI_CLOCK_INT_STABLE,
+                               1100, 20000, false, host, SDHCI_CLOCK_CONTROL);
+       if (err)
+               dev_err(mmc_dev(host->mmc), "phy_init: Internal clock never stabilized.\n");
+
+       return err;
+}
+
  /*
   * eMMC 5.0/5.1 PHY init/re-init.
   * eMMC PHY init should be executed after:
@@ -232,6 +248,11 @@ static int xenon_emmc_phy_init(struct sdhci_host *host)
         struct xenon_priv *priv = sdhci_pltfm_priv(pltfm_host);
         struct xenon_emmc_phy_regs *phy_regs = priv->emmc_phy_regs;
  
+       int ret = xenon_check_stability_internal_clk(host);
+
+       if (ret)
+               return ret;
+
         reg = sdhci_readl(host, phy_regs->timing_adj);
         reg |= XENON_PHY_INITIALIZAION;
         sdhci_writel(host, reg, phy_regs->timing_adj);
@@ -259,18 +280,27 @@ static int xenon_emmc_phy_init(struct sdhci_host *host)
         /* get the wait time */
         wait /= clock;
         wait++;
-       /* wait for host eMMC PHY init completes */
-       udelay(wait);
  
-       reg = sdhci_readl(host, phy_regs->timing_adj);
-       reg &= XENON_PHY_INITIALIZAION;
-       if (reg) {
+       /*
+        * AC5X spec says bit must be polled until zero.
+        * We see cases in which timeout can take longer
+        * than the standard calculation on AC5X, which is
+        * expected following the spec comment above.
+        * According to the spec, we must wait as long as
+        * it takes for that bit to toggle on AC5X.
+        * Cap that with 100 delay loops so we won't get
+        * stuck here forever:
+        */
+
+       ret = read_poll_timeout(sdhci_readl, reg,
+                               !(reg & XENON_PHY_INITIALIZAION),
+                               wait, XENON_MAX_PHY_TIMEOUT_LOOPS * wait,
+                               false, host, phy_regs->timing_adj);
+       if (ret)
                 dev_err(mmc_dev(host->mmc), "eMMC PHY init cannot complete after %d us\n",
-                       wait);
-               return -ETIMEDOUT;
-       }
+                       wait * XENON_MAX_PHY_TIMEOUT_LOOPS);
  
-       return 0;
+       return ret;
  }
  
  #define ARMADA_3700_SOC_PAD_1_8V       0x1
diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c

index e451b28840d58b2b0e6b5fdd4d50fe809dd29de4..5887feb347a4e42aa1dcc779bc7f5b252402b16e 100644 (file)
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -621,6 +621,7 @@ static void mtd_check_of_node(struct mtd_info *mtd)
                 if (plen == mtd_name_len &&
                     !strncmp(mtd->name, pname + offset, plen)) {
                         mtd_set_of_node(mtd, mtd_dn);
+                       of_node_put(mtd_dn);
                         break;
                 }
         }
diff --git a/drivers/mtd/nand/raw/marvell_nand.c b/drivers/mtd/nand/raw/marvell_nand.c

index a466987448502e0b576b612f42139f878d4014ff..5b0f5a9cef81b5fbc1494cb9f00b123ea06f0d35 100644 (file)
--- a/drivers/mtd/nand/raw/marvell_nand.c
+++ b/drivers/mtd/nand/raw/marvell_nand.c
@@ -290,16 +290,13 @@ static const struct marvell_hw_ecc_layout marvell_nfc_layouts[] = {
         MARVELL_LAYOUT( 2048,   512,  4,  1,  1, 2048, 32, 30,  0,  0,  0),
         MARVELL_LAYOUT( 2048,   512,  8,  2,  1, 1024,  0, 30,1024,32, 30),
         MARVELL_LAYOUT( 2048,   512,  8,  2,  1, 1024,  0, 30,1024,64, 30),
-       MARVELL_LAYOUT( 2048,   512,  12, 3,  2, 704,   0, 30,640,  0, 30),
-       MARVELL_LAYOUT( 2048,   512,  16, 5,  4, 512,   0, 30,  0, 32, 30),
+       MARVELL_LAYOUT( 2048,   512,  16, 4,  4, 512,   0, 30,  0, 32, 30),
         MARVELL_LAYOUT( 4096,   512,  4,  2,  2, 2048, 32, 30,  0,  0,  0),
-       MARVELL_LAYOUT( 4096,   512,  8,  5,  4, 1024,  0, 30,  0, 64, 30),
-       MARVELL_LAYOUT( 4096,   512,  12, 6,  5, 704,   0, 30,576, 32, 30),
-       MARVELL_LAYOUT( 4096,   512,  16, 9,  8, 512,   0, 30,  0, 32, 30),
+       MARVELL_LAYOUT( 4096,   512,  8,  4,  4, 1024,  0, 30,  0, 64, 30),
+       MARVELL_LAYOUT( 4096,   512,  16, 8,  8, 512,   0, 30,  0, 32, 30),
         MARVELL_LAYOUT( 8192,   512,  4,  4,  4, 2048,  0, 30,  0,  0,  0),
-       MARVELL_LAYOUT( 8192,   512,  8,  9,  8, 1024,  0, 30,  0, 160, 30),
-       MARVELL_LAYOUT( 8192,   512,  12, 12, 11, 704,  0, 30,448,  64, 30),
-       MARVELL_LAYOUT( 8192,   512,  16, 17, 16, 512,  0, 30,  0,  32, 30),
+       MARVELL_LAYOUT( 8192,   512,  8,  8,  8, 1024,  0, 30,  0, 160, 30),
+       MARVELL_LAYOUT( 8192,   512,  16, 16, 16, 512,  0, 30,  0,  32, 30),
  };
  
  /**
diff --git a/drivers/mtd/nand/spi/gigadevice.c b/drivers/mtd/nand/spi/gigadevice.c

index 987710e09441adefbf238948e759fec4d049b126..6023cba748bb858373a54dd58c5808057d9841fc 100644 (file)
--- a/drivers/mtd/nand/spi/gigadevice.c
+++ b/drivers/mtd/nand/spi/gigadevice.c
@@ -186,7 +186,7 @@ static int gd5fxgq4uexxg_ecc_get_status(struct spinand_device *spinand,
  {
         u8 status2;
         struct spi_mem_op op = SPINAND_GET_FEATURE_OP(GD5FXGQXXEXXG_REG_STATUS2,
-                                                     &status2);
+                                                     spinand->scratchbuf);
         int ret;
  
         switch (status & STATUS_ECC_MASK) {
@@ -207,6 +207,7 @@ static int gd5fxgq4uexxg_ecc_get_status(struct spinand_device *spinand,
                  * report the maximum of 4 in this case
                  */
                 /* bits sorted this way (3...0): ECCS1,ECCS0,ECCSE1,ECCSE0 */
+               status2 = *(spinand->scratchbuf);
                 return ((status & STATUS_ECC_MASK) >> 2) |
                         ((status2 & STATUS_ECC_MASK) >> 4);
  
@@ -228,7 +229,7 @@ static int gd5fxgq5xexxg_ecc_get_status(struct spinand_device *spinand,
  {
         u8 status2;
         struct spi_mem_op op = SPINAND_GET_FEATURE_OP(GD5FXGQXXEXXG_REG_STATUS2,
-                                                     &status2);
+                                                     spinand->scratchbuf);
         int ret;
  
         switch (status & STATUS_ECC_MASK) {
@@ -248,6 +249,7 @@ static int gd5fxgq5xexxg_ecc_get_status(struct spinand_device *spinand,
                  * 1 ... 4 bits are flipped (and corrected)
                  */
                 /* bits sorted this way (1...0): ECCSE1, ECCSE0 */
+               status2 = *(spinand->scratchbuf);
                 return ((status2 & STATUS_ECC_MASK) >> 4) + 1;
  
         case STATUS_ECC_UNCOR_ERROR:
diff --git a/drivers/net/arcnet/arc-rawmode.c b/drivers/net/arcnet/arc-rawmode.c

index 8c651fdee039aab85019b9a574b813c51aa04ef6..57f1729066f28b76de26bd3c5145cbe6bcc2f9ac 100644 (file)
--- a/drivers/net/arcnet/arc-rawmode.c
+++ b/drivers/net/arcnet/arc-rawmode.c
@@ -186,4 +186,5 @@ static void __exit arcnet_raw_exit(void)
  module_init(arcnet_raw_init);
  module_exit(arcnet_raw_exit);
  
+MODULE_DESCRIPTION("ARCnet raw mode packet interface module");
  MODULE_LICENSE("GPL");
diff --git a/drivers/net/arcnet/arc-rimi.c b/drivers/net/arcnet/arc-rimi.c

index 8c3ccc7c83cd3cd92e73c44ebd09a6fa8f8c0c41..53d10a04d1bd0a765e34dd6e56217a59d5485cc9 100644 (file)
--- a/drivers/net/arcnet/arc-rimi.c
+++ b/drivers/net/arcnet/arc-rimi.c
@@ -312,6 +312,7 @@ module_param(node, int, 0);
  module_param(io, int, 0);
  module_param(irq, int, 0);
  module_param_string(device, device, sizeof(device), 0);
+MODULE_DESCRIPTION("ARCnet COM90xx RIM I chipset driver");
  MODULE_LICENSE("GPL");
  
  static struct net_device *my_dev;
diff --git a/drivers/net/arcnet/capmode.c b/drivers/net/arcnet/capmode.c

index c09b567845e1eeb025eff26c77ad5e22f6db150f..7a0a799737698f8ebb834b592d86c849aaf8f371 100644 (file)
--- a/drivers/net/arcnet/capmode.c
+++ b/drivers/net/arcnet/capmode.c
@@ -265,4 +265,5 @@ static void __exit capmode_module_exit(void)
  module_init(capmode_module_init);
  module_exit(capmode_module_exit);
  
+MODULE_DESCRIPTION("ARCnet CAP mode packet interface module");
  MODULE_LICENSE("GPL");
diff --git a/drivers/net/arcnet/com20020-pci.c b/drivers/net/arcnet/com20020-pci.c

index 7b5c8bb02f11941f6210200c23ee2f74272d49a3..c5e571ec94c990d0a76a19e3ab418730548c0ce1 100644 (file)
--- a/drivers/net/arcnet/com20020-pci.c
+++ b/drivers/net/arcnet/com20020-pci.c
@@ -61,6 +61,7 @@ module_param(timeout, int, 0);
  module_param(backplane, int, 0);
  module_param(clockp, int, 0);
  module_param(clockm, int, 0);
+MODULE_DESCRIPTION("ARCnet COM20020 chipset PCI driver");
  MODULE_LICENSE("GPL");
  
  static void led_tx_set(struct led_classdev *led_cdev,
diff --git a/drivers/net/arcnet/com20020.c b/drivers/net/arcnet/com20020.c

index 06e1651b594ba813fc5a0d75931c12ec299c100a..a0053e3992a364ef3e1d3c4328b4baf8956d248d 100644 (file)
--- a/drivers/net/arcnet/com20020.c
+++ b/drivers/net/arcnet/com20020.c
@@ -399,6 +399,7 @@ EXPORT_SYMBOL(com20020_found);
  EXPORT_SYMBOL(com20020_netdev_ops);
  #endif
  
+MODULE_DESCRIPTION("ARCnet COM20020 chipset core driver");
  MODULE_LICENSE("GPL");
  
  #ifdef MODULE
diff --git a/drivers/net/arcnet/com20020_cs.c b/drivers/net/arcnet/com20020_cs.c

index dc3253b318dafc3e668df035880a8bccabeb0fdc..75f08aa7528b4620180da471e87e225a39fcd06b 100644 (file)
--- a/drivers/net/arcnet/com20020_cs.c
+++ b/drivers/net/arcnet/com20020_cs.c
@@ -97,6 +97,7 @@ module_param(backplane, int, 0);
  module_param(clockp, int, 0);
  module_param(clockm, int, 0);
  
+MODULE_DESCRIPTION("ARCnet COM20020 chipset PCMCIA driver");
  MODULE_LICENSE("GPL");
  
  /*====================================================================*/
diff --git a/drivers/net/arcnet/com90io.c b/drivers/net/arcnet/com90io.c

index 37b47749fc8b4afb24ae60151ac5b316554ab391..3b463fbc6402114322278a6648a422f4a7ea2bff 100644 (file)
--- a/drivers/net/arcnet/com90io.c
+++ b/drivers/net/arcnet/com90io.c
@@ -350,6 +350,7 @@ static char device[9];              /* use eg. device=arc1 to change name */
  module_param_hw(io, int, ioport, 0);
  module_param_hw(irq, int, irq, 0);
  module_param_string(device, device, sizeof(device), 0);
+MODULE_DESCRIPTION("ARCnet COM90xx IO mapped chipset driver");
  MODULE_LICENSE("GPL");
  
  #ifndef MODULE
diff --git a/drivers/net/arcnet/com90xx.c b/drivers/net/arcnet/com90xx.c

index f49dae1942846d866d11aad4d619202fd7a8cb01..b3b287c1656179b6656d62a53e488ffcd85178ac 100644 (file)
--- a/drivers/net/arcnet/com90xx.c
+++ b/drivers/net/arcnet/com90xx.c
@@ -645,6 +645,7 @@ static void com90xx_copy_from_card(struct net_device *dev, int bufnum,
         TIME(dev, "memcpy_fromio", count, memcpy_fromio(buf, memaddr, count));
  }
  
+MODULE_DESCRIPTION("ARCnet COM90xx normal chipset driver");
  MODULE_LICENSE("GPL");
  
  static int __init com90xx_init(void)
diff --git a/drivers/net/arcnet/rfc1051.c b/drivers/net/arcnet/rfc1051.c

index a7752a5b647fcd4128e3e0c7b59961ad0a83c660..46519ca63a0aa5459732fd140d62ad679bc3a519 100644 (file)
--- a/drivers/net/arcnet/rfc1051.c
+++ b/drivers/net/arcnet/rfc1051.c
@@ -78,6 +78,7 @@ static void __exit arcnet_rfc1051_exit(void)
  module_init(arcnet_rfc1051_init);
  module_exit(arcnet_rfc1051_exit);
  
+MODULE_DESCRIPTION("ARCNet packet format (RFC 1051) module");
  MODULE_LICENSE("GPL");
  
  /* Determine a packet's protocol ID.
diff --git a/drivers/net/arcnet/rfc1201.c b/drivers/net/arcnet/rfc1201.c

index a4c856282674b3d3d74007f43fb7c7272ea046b8..0edf35d971c56ef15a11814167a2100927975f61 100644 (file)
--- a/drivers/net/arcnet/rfc1201.c
+++ b/drivers/net/arcnet/rfc1201.c
@@ -35,6 +35,7 @@
  
  #include "arcdevice.h"
  
+MODULE_DESCRIPTION("ARCNet packet format (RFC 1201) module");
  MODULE_LICENSE("GPL");
  
  static __be16 type_trans(struct sk_buff *skb, struct net_device *dev);
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c

index 4e0600c7b050f21c82a8862e224bb055e95d5039..a11748b8d69b435cf97971cec21c0340365ed6d1 100644 (file)
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1819,6 +1819,8 @@ void bond_xdp_set_features(struct net_device *bond_dev)
         bond_for_each_slave(bond, slave, iter)
                 val &= slave->dev->xdp_features;
  
+       val &= ~NETDEV_XDP_ACT_XSK_ZEROCOPY;
+
         xdp_set_features_flag(bond_dev, val);
  }
  
@@ -5909,9 +5911,6 @@ void bond_setup(struct net_device *bond_dev)
         if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP)
                 bond_dev->features |= BOND_XFRM_FEATURES;
  #endif /* CONFIG_XFRM_OFFLOAD */
-
-       if (bond_xdp_check(bond))
-               bond_dev->xdp_features = NETDEV_XDP_ACT_MASK;
  }
  
  /* Destroy a bonding device.
diff --git a/drivers/net/can/dev/netlink.c b/drivers/net/can/dev/netlink.c

index 036d85ef07f5ba676611e4a20ea470ab7691ffc1..dfdc039d92a6c114a1d5204c3a42ad170ca1b400 100644 (file)
--- a/drivers/net/can/dev/netlink.c
+++ b/drivers/net/can/dev/netlink.c
@@ -346,7 +346,7 @@ static int can_changelink(struct net_device *dev, struct nlattr *tb[],
                         /* Neither of TDC parameters nor TDC flags are
                          * provided: do calculation
                          */
-                       can_calc_tdco(&priv->tdc, priv->tdc_const, &priv->data_bittiming,
+                       can_calc_tdco(&priv->tdc, priv->tdc_const, &dbt,
                                       &priv->ctrlmode, priv->ctrlmode_supported);
                 } /* else: both CAN_CTRLMODE_TDC_{AUTO,MANUAL} are explicitly
                    * turned off. TDC is disabled: do nothing
diff --git a/drivers/net/dsa/dsa_loop_bdinfo.c b/drivers/net/dsa/dsa_loop_bdinfo.c

index 237066d307044583167923fcfc54d88a1ff53bc4..14ca42491512c62d71dac84e6e7d4dd91b2eb19b 100644 (file)
--- a/drivers/net/dsa/dsa_loop_bdinfo.c
+++ b/drivers/net/dsa/dsa_loop_bdinfo.c
@@ -32,4 +32,5 @@ static int __init dsa_loop_bdinfo_init(void)
  }
  arch_initcall(dsa_loop_bdinfo_init)
  
+MODULE_DESCRIPTION("DSA mock-up switch driver");
  MODULE_LICENSE("GPL");
diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c

index 391c4dbdff4283d0b077608a59e4c95758eb24cf..3c1f657593a8f364e5db9500d06257f34373af8a 100644 (file)
--- a/drivers/net/dsa/mt7530.c
+++ b/drivers/net/dsa/mt7530.c
@@ -2838,8 +2838,7 @@ static void mt753x_phylink_mac_link_up(struct dsa_switch *ds, int port,
         /* MT753x MAC works in 1G full duplex mode for all up-clocked
          * variants.
          */
-       if (interface == PHY_INTERFACE_MODE_INTERNAL ||
-           interface == PHY_INTERFACE_MODE_TRGMII ||
+       if (interface == PHY_INTERFACE_MODE_TRGMII ||
             (phy_interface_mode_is_8023z(interface))) {
                 speed = SPEED_1000;
                 duplex = DUPLEX_FULL;
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c

index 383b3c4d6f599c57358d8970c9c26941231e9898..614cabb5c1b039d8d6df6789589455fe00f09e70 100644 (file)
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -3659,7 +3659,7 @@ static int mv88e6xxx_mdio_read_c45(struct mii_bus *bus, int phy, int devad,
         int err;
  
         if (!chip->info->ops->phy_read_c45)
-               return -EOPNOTSUPP;
+               return 0xffff;
  
         mv88e6xxx_reg_lock(chip);
         err = chip->info->ops->phy_read_c45(chip, bus, phy, devad, reg, &val);
diff --git a/drivers/net/dsa/qca/qca8k-8xxx.c b/drivers/net/dsa/qca/qca8k-8xxx.c

index c51f40960961f2b10a2f4191e4b8f5a50af9fc86..7a864329cb7267a9431a181a183c94b6f791f91e 100644 (file)
--- a/drivers/net/dsa/qca/qca8k-8xxx.c
+++ b/drivers/net/dsa/qca/qca8k-8xxx.c
@@ -2051,12 +2051,11 @@ qca8k_sw_probe(struct mdio_device *mdiodev)
         priv->info = of_device_get_match_data(priv->dev);
  
         priv->reset_gpio = devm_gpiod_get_optional(priv->dev, "reset",
-                                                  GPIOD_ASIS);
+                                                  GPIOD_OUT_HIGH);
         if (IS_ERR(priv->reset_gpio))
                 return PTR_ERR(priv->reset_gpio);
  
         if (priv->reset_gpio) {
-               gpiod_set_value_cansleep(priv->reset_gpio, 1);
                 /* The active low duration must be greater than 10 ms
                  * and checkpatch.pl wants 20 ms.
                  */
diff --git a/drivers/net/ethernet/adi/Kconfig b/drivers/net/ethernet/adi/Kconfig

index da3bdd3025022c3dd7286c3b7873e1a108767025..760a9a60bc15c1849f6b70e7d3f5b99c58667523 100644 (file)
--- a/drivers/net/ethernet/adi/Kconfig
+++ b/drivers/net/ethernet/adi/Kconfig
@@ -21,6 +21,7 @@ config ADIN1110
         tristate "Analog Devices ADIN1110 MAC-PHY"
         depends on SPI && NET_SWITCHDEV
         select CRC8
+       select PHYLIB
         help
           Say yes here to build support for Analog Devices ADIN1110
           Low Power 10BASE-T1L Ethernet MAC-PHY.
diff --git a/drivers/net/ethernet/amd/pds_core/adminq.c b/drivers/net/ethernet/amd/pds_core/adminq.c

index 5beadabc213618314ad42da120da259415eaa7b3..ea773cfa0af67bd06d86037bc8208b4111b3bbc5 100644 (file)
--- a/drivers/net/ethernet/amd/pds_core/adminq.c
+++ b/drivers/net/ethernet/amd/pds_core/adminq.c
@@ -63,6 +63,15 @@ static int pdsc_process_notifyq(struct pdsc_qcq *qcq)
         return nq_work;
  }
  
+static bool pdsc_adminq_inc_if_up(struct pdsc *pdsc)
+{
+       if (pdsc->state & BIT_ULL(PDSC_S_STOPPING_DRIVER) ||
+           pdsc->state & BIT_ULL(PDSC_S_FW_DEAD))
+               return false;
+
+       return refcount_inc_not_zero(&pdsc->adminq_refcnt);
+}
+
  void pdsc_process_adminq(struct pdsc_qcq *qcq)
  {
         union pds_core_adminq_comp *comp;
@@ -75,9 +84,9 @@ void pdsc_process_adminq(struct pdsc_qcq *qcq)
         int aq_work = 0;
         int credits;
  
-       /* Don't process AdminQ when shutting down */
-       if (pdsc->state & BIT_ULL(PDSC_S_STOPPING_DRIVER)) {
-               dev_err(pdsc->dev, "%s: called while PDSC_S_STOPPING_DRIVER\n",
+       /* Don't process AdminQ when it's not up */
+       if (!pdsc_adminq_inc_if_up(pdsc)) {
+               dev_err(pdsc->dev, "%s: called while adminq is unavailable\n",
                         __func__);
                 return;
         }
@@ -124,6 +133,7 @@ credits:
                 pds_core_intr_credits(&pdsc->intr_ctrl[qcq->intx],
                                       credits,
                                       PDS_CORE_INTR_CRED_REARM);
+       refcount_dec(&pdsc->adminq_refcnt);
  }
  
  void pdsc_work_thread(struct work_struct *work)
@@ -135,18 +145,20 @@ void pdsc_work_thread(struct work_struct *work)
  
  irqreturn_t pdsc_adminq_isr(int irq, void *data)
  {
-       struct pdsc_qcq *qcq = data;
-       struct pdsc *pdsc = qcq->pdsc;
+       struct pdsc *pdsc = data;
+       struct pdsc_qcq *qcq;
  
-       /* Don't process AdminQ when shutting down */
-       if (pdsc->state & BIT_ULL(PDSC_S_STOPPING_DRIVER)) {
-               dev_err(pdsc->dev, "%s: called while PDSC_S_STOPPING_DRIVER\n",
+       /* Don't process AdminQ when it's not up */
+       if (!pdsc_adminq_inc_if_up(pdsc)) {
+               dev_err(pdsc->dev, "%s: called while adminq is unavailable\n",
                         __func__);
                 return IRQ_HANDLED;
         }
  
+       qcq = &pdsc->adminqcq;
         queue_work(pdsc->wq, &qcq->work);
         pds_core_intr_mask(&pdsc->intr_ctrl[qcq->intx], PDS_CORE_INTR_MASK_CLEAR);
+       refcount_dec(&pdsc->adminq_refcnt);
  
         return IRQ_HANDLED;
  }
@@ -179,10 +191,16 @@ static int __pdsc_adminq_post(struct pdsc *pdsc,
  
         /* Check that the FW is running */
         if (!pdsc_is_fw_running(pdsc)) {
-               u8 fw_status = ioread8(&pdsc->info_regs->fw_status);
-
-               dev_info(pdsc->dev, "%s: post failed - fw not running %#02x:\n",
-                        __func__, fw_status);
+               if (pdsc->info_regs) {
+                       u8 fw_status =
+                               ioread8(&pdsc->info_regs->fw_status);
+
+                       dev_info(pdsc->dev, "%s: post failed - fw not running %#02x:\n",
+                                __func__, fw_status);
+               } else {
+                       dev_info(pdsc->dev, "%s: post failed - BARs not setup\n",
+                                __func__);
+               }
                 ret = -ENXIO;
  
                 goto err_out_unlock;
@@ -230,6 +248,12 @@ int pdsc_adminq_post(struct pdsc *pdsc,
         int err = 0;
         int index;
  
+       if (!pdsc_adminq_inc_if_up(pdsc)) {
+               dev_dbg(pdsc->dev, "%s: preventing adminq cmd %u\n",
+                       __func__, cmd->opcode);
+               return -ENXIO;
+       }
+
         wc.qcq = &pdsc->adminqcq;
         index = __pdsc_adminq_post(pdsc, &pdsc->adminqcq, cmd, comp, &wc);
         if (index < 0) {
@@ -248,10 +272,16 @@ int pdsc_adminq_post(struct pdsc *pdsc,
                         break;
  
                 if (!pdsc_is_fw_running(pdsc)) {
-                       u8 fw_status = ioread8(&pdsc->info_regs->fw_status);
-
-                       dev_dbg(pdsc->dev, "%s: post wait failed - fw not running %#02x:\n",
-                               __func__, fw_status);
+                       if (pdsc->info_regs) {
+                               u8 fw_status =
+                                       ioread8(&pdsc->info_regs->fw_status);
+
+                               dev_dbg(pdsc->dev, "%s: post wait failed - fw not running %#02x:\n",
+                                       __func__, fw_status);
+                       } else {
+                               dev_dbg(pdsc->dev, "%s: post wait failed - BARs not setup\n",
+                                       __func__);
+                       }
                         err = -ENXIO;
                         break;
                 }
@@ -285,6 +315,8 @@ err_out:
                         queue_work(pdsc->wq, &pdsc->health_work);
         }
  
+       refcount_dec(&pdsc->adminq_refcnt);
+
         return err;
  }
  EXPORT_SYMBOL_GPL(pdsc_adminq_post);
diff --git a/drivers/net/ethernet/amd/pds_core/core.c b/drivers/net/ethernet/amd/pds_core/core.c

index 0d2091e9eb283a375617828c00552cceb82768ca..7658a72867675aad5287c15989155386d3ab9de7 100644 (file)
--- a/drivers/net/ethernet/amd/pds_core/core.c
+++ b/drivers/net/ethernet/amd/pds_core/core.c
@@ -125,7 +125,7 @@ static int pdsc_qcq_intr_alloc(struct pdsc *pdsc, struct pdsc_qcq *qcq)
  
         snprintf(name, sizeof(name), "%s-%d-%s",
                  PDS_CORE_DRV_NAME, pdsc->pdev->bus->number, qcq->q.name);
-       index = pdsc_intr_alloc(pdsc, name, pdsc_adminq_isr, qcq);
+       index = pdsc_intr_alloc(pdsc, name, pdsc_adminq_isr, pdsc);
         if (index < 0)
                 return index;
         qcq->intx = index;
@@ -404,10 +404,7 @@ int pdsc_setup(struct pdsc *pdsc, bool init)
         int numdescs;
         int err;
  
-       if (init)
-               err = pdsc_dev_init(pdsc);
-       else
-               err = pdsc_dev_reinit(pdsc);
+       err = pdsc_dev_init(pdsc);
         if (err)
                 return err;
  
@@ -450,6 +447,7 @@ int pdsc_setup(struct pdsc *pdsc, bool init)
                 pdsc_debugfs_add_viftype(pdsc);
         }
  
+       refcount_set(&pdsc->adminq_refcnt, 1);
         clear_bit(PDSC_S_FW_DEAD, &pdsc->state);
         return 0;
  
@@ -464,6 +462,8 @@ void pdsc_teardown(struct pdsc *pdsc, bool removing)
  
         if (!pdsc->pdev->is_virtfn)
                 pdsc_devcmd_reset(pdsc);
+       if (pdsc->adminqcq.work.func)
+               cancel_work_sync(&pdsc->adminqcq.work);
         pdsc_qcq_free(pdsc, &pdsc->notifyqcq);
         pdsc_qcq_free(pdsc, &pdsc->adminqcq);
  
@@ -476,10 +476,9 @@ void pdsc_teardown(struct pdsc *pdsc, bool removing)
                 for (i = 0; i < pdsc->nintrs; i++)
                         pdsc_intr_free(pdsc, i);
  
-               if (removing) {
-                       kfree(pdsc->intr_info);
-                       pdsc->intr_info = NULL;
-               }
+               kfree(pdsc->intr_info);
+               pdsc->intr_info = NULL;
+               pdsc->nintrs = 0;
         }
  
         if (pdsc->kern_dbpage) {
@@ -487,6 +486,7 @@ void pdsc_teardown(struct pdsc *pdsc, bool removing)
                 pdsc->kern_dbpage = NULL;
         }
  
+       pci_free_irq_vectors(pdsc->pdev);
         set_bit(PDSC_S_FW_DEAD, &pdsc->state);
  }
  
@@ -512,6 +512,24 @@ void pdsc_stop(struct pdsc *pdsc)
                                            PDS_CORE_INTR_MASK_SET);
  }
  
+static void pdsc_adminq_wait_and_dec_once_unused(struct pdsc *pdsc)
+{
+       /* The driver initializes the adminq_refcnt to 1 when the adminq is
+        * allocated and ready for use. Other users/requesters will increment
+        * the refcnt while in use. If the refcnt is down to 1 then the adminq
+        * is not in use and the refcnt can be cleared and adminq freed. Before
+        * calling this function the driver will set PDSC_S_FW_DEAD, which
+        * prevent subsequent attempts to use the adminq and increment the
+        * refcnt to fail. This guarantees that this function will eventually
+        * exit.
+        */
+       while (!refcount_dec_if_one(&pdsc->adminq_refcnt)) {
+               dev_dbg_ratelimited(pdsc->dev, "%s: adminq in use\n",
+                                   __func__);
+               cpu_relax();
+       }
+}
+
  void pdsc_fw_down(struct pdsc *pdsc)
  {
         union pds_core_notifyq_comp reset_event = {
@@ -527,6 +545,8 @@ void pdsc_fw_down(struct pdsc *pdsc)
         if (pdsc->pdev->is_virtfn)
                 return;
  
+       pdsc_adminq_wait_and_dec_once_unused(pdsc);
+
         /* Notify clients of fw_down */
         if (pdsc->fw_reporter)
                 devlink_health_report(pdsc->fw_reporter, "FW down reported", pdsc);
@@ -577,7 +597,13 @@ err_out:
  
  static void pdsc_check_pci_health(struct pdsc *pdsc)
  {
-       u8 fw_status = ioread8(&pdsc->info_regs->fw_status);
+       u8 fw_status;
+
+       /* some sort of teardown already in progress */
+       if (!pdsc->info_regs)
+               return;
+
+       fw_status = ioread8(&pdsc->info_regs->fw_status);
  
         /* is PCI broken? */
         if (fw_status != PDS_RC_BAD_PCI)
diff --git a/drivers/net/ethernet/amd/pds_core/core.h b/drivers/net/ethernet/amd/pds_core/core.h

index e35d3e7006bfc1891a0343643910b915f31ba56a..110c4b826b22d588b33ca5cd2f0f1d38c76cf4b5 100644 (file)
--- a/drivers/net/ethernet/amd/pds_core/core.h
+++ b/drivers/net/ethernet/amd/pds_core/core.h
@@ -184,6 +184,7 @@ struct pdsc {
         struct mutex devcmd_lock;       /* lock for dev_cmd operations */
         struct mutex config_lock;       /* lock for configuration operations */
         spinlock_t adminq_lock;         /* lock for adminq operations */
+       refcount_t adminq_refcnt;
         struct pds_core_dev_info_regs __iomem *info_regs;
         struct pds_core_dev_cmd_regs __iomem *cmd_regs;
         struct pds_core_intr __iomem *intr_ctrl;
@@ -280,7 +281,6 @@ int pdsc_devcmd_locked(struct pdsc *pdsc, union pds_core_dev_cmd *cmd,
                        union pds_core_dev_comp *comp, int max_seconds);
  int pdsc_devcmd_init(struct pdsc *pdsc);
  int pdsc_devcmd_reset(struct pdsc *pdsc);
-int pdsc_dev_reinit(struct pdsc *pdsc);
  int pdsc_dev_init(struct pdsc *pdsc);
  
  void pdsc_reset_prepare(struct pci_dev *pdev);
diff --git a/drivers/net/ethernet/amd/pds_core/debugfs.c b/drivers/net/ethernet/amd/pds_core/debugfs.c

index 8ec392299b7dcff9b74a0b08f45a5ccd25986cf1..4e8579ca1c8c71bd89659f041f3613113af16141 100644 (file)
--- a/drivers/net/ethernet/amd/pds_core/debugfs.c
+++ b/drivers/net/ethernet/amd/pds_core/debugfs.c
@@ -64,6 +64,10 @@ DEFINE_SHOW_ATTRIBUTE(identity);
  
  void pdsc_debugfs_add_ident(struct pdsc *pdsc)
  {
+       /* This file will already exist in the reset flow */
+       if (debugfs_lookup("identity", pdsc->dentry))
+               return;
+
         debugfs_create_file("identity", 0400, pdsc->dentry,
                             pdsc, &identity_fops);
  }
diff --git a/drivers/net/ethernet/amd/pds_core/dev.c b/drivers/net/ethernet/amd/pds_core/dev.c

index 31940b857e0e501d2d4d220a0ed6a0cfd03098c7..e65a1632df505d55de687ba781166299d865eaae 100644 (file)
--- a/drivers/net/ethernet/amd/pds_core/dev.c
+++ b/drivers/net/ethernet/amd/pds_core/dev.c
@@ -57,6 +57,9 @@ int pdsc_err_to_errno(enum pds_core_status_code code)
  
  bool pdsc_is_fw_running(struct pdsc *pdsc)
  {
+       if (!pdsc->info_regs)
+               return false;
+
         pdsc->fw_status = ioread8(&pdsc->info_regs->fw_status);
         pdsc->last_fw_time = jiffies;
         pdsc->last_hb = ioread32(&pdsc->info_regs->fw_heartbeat);
@@ -182,13 +185,17 @@ int pdsc_devcmd_locked(struct pdsc *pdsc, union pds_core_dev_cmd *cmd,
  {
         int err;
  
+       if (!pdsc->cmd_regs)
+               return -ENXIO;
+
         memcpy_toio(&pdsc->cmd_regs->cmd, cmd, sizeof(*cmd));
         pdsc_devcmd_dbell(pdsc);
         err = pdsc_devcmd_wait(pdsc, cmd->opcode, max_seconds);
-       memcpy_fromio(comp, &pdsc->cmd_regs->comp, sizeof(*comp));
  
         if ((err == -ENXIO || err == -ETIMEDOUT) && pdsc->wq)
                 queue_work(pdsc->wq, &pdsc->health_work);
+       else
+               memcpy_fromio(comp, &pdsc->cmd_regs->comp, sizeof(*comp));
  
         return err;
  }
@@ -309,13 +316,6 @@ static int pdsc_identify(struct pdsc *pdsc)
         return 0;
  }
  
-int pdsc_dev_reinit(struct pdsc *pdsc)
-{
-       pdsc_init_devinfo(pdsc);
-
-       return pdsc_identify(pdsc);
-}
-
  int pdsc_dev_init(struct pdsc *pdsc)
  {
         unsigned int nintrs;
diff --git a/drivers/net/ethernet/amd/pds_core/devlink.c b/drivers/net/ethernet/amd/pds_core/devlink.c

index e9948ea5bbcdbaae713390cca46280e55b548956..54864f27c87a9e526524a023e444e318cc2bc0f7 100644 (file)
--- a/drivers/net/ethernet/amd/pds_core/devlink.c
+++ b/drivers/net/ethernet/amd/pds_core/devlink.c
@@ -111,7 +111,8 @@ int pdsc_dl_info_get(struct devlink *dl, struct devlink_info_req *req,
  
         mutex_lock(&pdsc->devcmd_lock);
         err = pdsc_devcmd_locked(pdsc, &cmd, &comp, pdsc->devcmd_timeout * 2);
-       memcpy_fromio(&fw_list, pdsc->cmd_regs->data, sizeof(fw_list));
+       if (!err)
+               memcpy_fromio(&fw_list, pdsc->cmd_regs->data, sizeof(fw_list));
         mutex_unlock(&pdsc->devcmd_lock);
         if (err && err != -EIO)
                 return err;
diff --git a/drivers/net/ethernet/amd/pds_core/fw.c b/drivers/net/ethernet/amd/pds_core/fw.c

index 90a811f3878ae974679bc5caba97e18aae04bdfb..fa626719e68d1b206fc9bbe1d038daf51984f19b 100644 (file)
--- a/drivers/net/ethernet/amd/pds_core/fw.c
+++ b/drivers/net/ethernet/amd/pds_core/fw.c
@@ -107,6 +107,9 @@ int pdsc_firmware_update(struct pdsc *pdsc, const struct firmware *fw,
  
         dev_info(pdsc->dev, "Installing firmware\n");
  
+       if (!pdsc->cmd_regs)
+               return -ENXIO;
+
         dl = priv_to_devlink(pdsc);
         devlink_flash_update_status_notify(dl, "Preparing to flash",
                                            NULL, 0, 0);
diff --git a/drivers/net/ethernet/amd/pds_core/main.c b/drivers/net/ethernet/amd/pds_core/main.c

index 3080898d7b95b0122701cacb8a15796ed2cc2dcb..0050c5894563b8a54c21a6b8933b844d63804098 100644 (file)
--- a/drivers/net/ethernet/amd/pds_core/main.c
+++ b/drivers/net/ethernet/amd/pds_core/main.c
@@ -37,6 +37,11 @@ static void pdsc_unmap_bars(struct pdsc *pdsc)
         struct pdsc_dev_bar *bars = pdsc->bars;
         unsigned int i;
  
+       pdsc->info_regs = NULL;
+       pdsc->cmd_regs = NULL;
+       pdsc->intr_status = NULL;
+       pdsc->intr_ctrl = NULL;
+
         for (i = 0; i < PDS_CORE_BARS_MAX; i++) {
                 if (bars[i].vaddr)
                         pci_iounmap(pdsc->pdev, bars[i].vaddr);
@@ -293,7 +298,7 @@ err_out_stop:
  err_out_teardown:
         pdsc_teardown(pdsc, PDSC_TEARDOWN_REMOVING);
  err_out_unmap_bars:
-       del_timer_sync(&pdsc->wdtimer);
+       timer_shutdown_sync(&pdsc->wdtimer);
         if (pdsc->wq)
                 destroy_workqueue(pdsc->wq);
         mutex_destroy(&pdsc->config_lock);
@@ -420,7 +425,7 @@ static void pdsc_remove(struct pci_dev *pdev)
                  */
                 pdsc_sriov_configure(pdev, 0);
  
-               del_timer_sync(&pdsc->wdtimer);
+               timer_shutdown_sync(&pdsc->wdtimer);
                 if (pdsc->wq)
                         destroy_workqueue(pdsc->wq);
  
@@ -433,7 +438,6 @@ static void pdsc_remove(struct pci_dev *pdev)
                 mutex_destroy(&pdsc->config_lock);
                 mutex_destroy(&pdsc->devcmd_lock);
  
-               pci_free_irq_vectors(pdev);
                 pdsc_unmap_bars(pdsc);
                 pci_release_regions(pdev);
         }
@@ -445,13 +449,32 @@ static void pdsc_remove(struct pci_dev *pdev)
         devlink_free(dl);
  }
  
+static void pdsc_stop_health_thread(struct pdsc *pdsc)
+{
+       if (pdsc->pdev->is_virtfn)
+               return;
+
+       timer_shutdown_sync(&pdsc->wdtimer);
+       if (pdsc->health_work.func)
+               cancel_work_sync(&pdsc->health_work);
+}
+
+static void pdsc_restart_health_thread(struct pdsc *pdsc)
+{
+       if (pdsc->pdev->is_virtfn)
+               return;
+
+       timer_setup(&pdsc->wdtimer, pdsc_wdtimer_cb, 0);
+       mod_timer(&pdsc->wdtimer, jiffies + 1);
+}
+
  void pdsc_reset_prepare(struct pci_dev *pdev)
  {
         struct pdsc *pdsc = pci_get_drvdata(pdev);
  
+       pdsc_stop_health_thread(pdsc);
         pdsc_fw_down(pdsc);
  
-       pci_free_irq_vectors(pdev);
         pdsc_unmap_bars(pdsc);
         pci_release_regions(pdev);
         pci_disable_device(pdev);
@@ -486,6 +509,7 @@ void pdsc_reset_done(struct pci_dev *pdev)
         }
  
         pdsc_fw_up(pdsc);
+       pdsc_restart_health_thread(pdsc);
  }
  
  static const struct pci_error_handlers pdsc_err_handler = {
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c b/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c

index abd4832e4ed21f3c2a22aed047a0331675162907..5acb3e16b5677b7826e488942ff6efb2c3cdf400 100644 (file)
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c
@@ -993,7 +993,7 @@ int aq_ptp_ring_alloc(struct aq_nic_s *aq_nic)
         return 0;
  
  err_exit_hwts_rx:
-       aq_ring_free(&aq_ptp->hwts_rx);
+       aq_ring_hwts_rx_free(&aq_ptp->hwts_rx);
  err_exit_ptp_rx:
         aq_ring_free(&aq_ptp->ptp_rx);
  err_exit_ptp_tx:
@@ -1011,7 +1011,7 @@ void aq_ptp_ring_free(struct aq_nic_s *aq_nic)
  
         aq_ring_free(&aq_ptp->ptp_tx);
         aq_ring_free(&aq_ptp->ptp_rx);
-       aq_ring_free(&aq_ptp->hwts_rx);
+       aq_ring_hwts_rx_free(&aq_ptp->hwts_rx);
  
         aq_ptp_skb_ring_release(&aq_ptp->skb_ring);
  }
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c

index cda8597b4e1469d2895f895f982f84cb97ef4506..f7433abd659159203f99fbb6cc9ed394bdedacfc 100644 (file)
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
@@ -919,6 +919,19 @@ void aq_ring_free(struct aq_ring_s *self)
         }
  }
  
+void aq_ring_hwts_rx_free(struct aq_ring_s *self)
+{
+       if (!self)
+               return;
+
+       if (self->dx_ring) {
+               dma_free_coherent(aq_nic_get_dev(self->aq_nic),
+                                 self->size * self->dx_size + AQ_CFG_RXDS_DEF,
+                                 self->dx_ring, self->dx_ring_pa);
+               self->dx_ring = NULL;
+       }
+}
+
  unsigned int aq_ring_fill_stats_data(struct aq_ring_s *self, u64 *data)
  {
         unsigned int count;
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h

index 52847310740a21097dfc35a395e96dfe5de46321..d627ace850ff54201b760a079416e4d690e73184 100644 (file)
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h
@@ -210,6 +210,7 @@ int aq_ring_rx_fill(struct aq_ring_s *self);
  int aq_ring_hwts_rx_alloc(struct aq_ring_s *self,
                           struct aq_nic_s *aq_nic, unsigned int idx,
                           unsigned int size, unsigned int dx_size);
+void aq_ring_hwts_rx_free(struct aq_ring_s *self);
  void aq_ring_hwts_rx_clean(struct aq_ring_s *self, struct aq_nic_s *aq_nic);
  
  unsigned int aq_ring_fill_stats_data(struct aq_ring_s *self, u64 *data);
diff --git a/drivers/net/ethernet/broadcom/asp2/bcmasp.c b/drivers/net/ethernet/broadcom/asp2/bcmasp.c

index 29b04a274d077375d9658ea91c16df1ccd963969..80245c65cc904defdec4637eb66a9c1edd6eb03f 100644 (file)
--- a/drivers/net/ethernet/broadcom/asp2/bcmasp.c
+++ b/drivers/net/ethernet/broadcom/asp2/bcmasp.c
@@ -535,9 +535,6 @@ int bcmasp_netfilt_get_all_active(struct bcmasp_intf *intf, u32 *rule_locs,
         int j = 0, i;
  
         for (i = 0; i < NUM_NET_FILTERS; i++) {
-               if (j == *rule_cnt)
-                       return -EMSGSIZE;
-
                 if (!priv->net_filters[i].claimed ||
                     priv->net_filters[i].port != intf->port)
                         continue;
@@ -547,6 +544,9 @@ int bcmasp_netfilt_get_all_active(struct bcmasp_intf *intf, u32 *rule_locs,
                     priv->net_filters[i - 1].wake_filter)
                         continue;
  
+               if (j == *rule_cnt)
+                       return -EMSGSIZE;
+
                 rule_locs[j++] = priv->net_filters[i].fs.location;
         }
  
diff --git a/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c b/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c

index 53e5428812552b56de4c7809c5c3f49f7c65379b..6ad1366270f79cba0579bac6088743b1645203ef 100644 (file)
--- a/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c
+++ b/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c
@@ -684,6 +684,8 @@ static int bcmasp_init_rx(struct bcmasp_intf *intf)
  
         intf->rx_buf_order = get_order(RING_BUFFER_SIZE);
         buffer_pg = alloc_pages(GFP_KERNEL, intf->rx_buf_order);
+       if (!buffer_pg)
+               return -ENOMEM;
  
         dma = dma_map_page(kdev, buffer_pg, 0, RING_BUFFER_SIZE,
                            DMA_FROM_DEVICE);
@@ -1048,6 +1050,9 @@ static int bcmasp_netif_init(struct net_device *dev, bool phy_connect)
                         netdev_err(dev, "could not attach to PHY\n");
                         goto err_phy_disable;
                 }
+
+               /* Indicate that the MAC is responsible for PHY PM */
+               phydev->mac_managed_pm = true;
         } else if (!intf->wolopts) {
                 ret = phy_resume(dev->phydev);
                 if (ret)
@@ -1092,6 +1097,7 @@ static int bcmasp_netif_init(struct net_device *dev, bool phy_connect)
         return 0;
  
  err_reclaim_tx:
+       netif_napi_del(&intf->tx_napi);
         bcmasp_reclaim_free_all_tx(intf);
  err_phy_disconnect:
         if (phydev)
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c

index adad188e38b8256ef5a1e051310abae2d5bd9b34..cc07660330f533b5e39efcb0f5dff6f865821315 100644 (file)
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
@@ -684,7 +684,7 @@ static void bnxt_stamp_tx_skb(struct bnxt *bp, struct sk_buff *skb)
                 timestamp.hwtstamp = ns_to_ktime(ns);
                 skb_tstamp_tx(ptp->tx_skb, &timestamp);
         } else {
-               netdev_WARN_ONCE(bp->dev,
+               netdev_warn_once(bp->dev,
                                  "TS query for TX timer failed rc = %x\n", rc);
         }
  
diff --git a/drivers/net/ethernet/brocade/bna/bnad.c b/drivers/net/ethernet/brocade/bna/bnad.c

index 31191b520b5875a72f08e22ce316a103ccee68ea..c32174484a967ae4cf49f6025bf0892333d1fd89 100644 (file)
--- a/drivers/net/ethernet/brocade/bna/bnad.c
+++ b/drivers/net/ethernet/brocade/bna/bnad.c
@@ -1091,10 +1091,10 @@ bnad_cb_tx_resume(struct bnad *bnad, struct bna_tx *tx)
   * Free all TxQs buffers and then notify TX_E_CLEANUP_DONE to Tx fsm.
   */
  static void
-bnad_tx_cleanup(struct delayed_work *work)
+bnad_tx_cleanup(struct work_struct *work)
  {
         struct bnad_tx_info *tx_info =
-               container_of(work, struct bnad_tx_info, tx_cleanup_work);
+               container_of(work, struct bnad_tx_info, tx_cleanup_work.work);
         struct bnad *bnad = NULL;
         struct bna_tcb *tcb;
         unsigned long flags;
@@ -1170,7 +1170,7 @@ bnad_cb_rx_stall(struct bnad *bnad, struct bna_rx *rx)
   * Free all RxQs buffers and then notify RX_E_CLEANUP_DONE to Rx fsm.
   */
  static void
-bnad_rx_cleanup(void *work)
+bnad_rx_cleanup(struct work_struct *work)
  {
         struct bnad_rx_info *rx_info =
                 container_of(work, struct bnad_rx_info, rx_cleanup_work);
@@ -1991,8 +1991,7 @@ bnad_setup_tx(struct bnad *bnad, u32 tx_id)
         }
         tx_info->tx = tx;
  
-       INIT_DELAYED_WORK(&tx_info->tx_cleanup_work,
-                       (work_func_t)bnad_tx_cleanup);
+       INIT_DELAYED_WORK(&tx_info->tx_cleanup_work, bnad_tx_cleanup);
  
         /* Register ISR for the Tx object */
         if (intr_info->intr_type == BNA_INTR_T_MSIX) {
@@ -2248,8 +2247,7 @@ bnad_setup_rx(struct bnad *bnad, u32 rx_id)
         rx_info->rx = rx;
         spin_unlock_irqrestore(&bnad->bna_lock, flags);
  
-       INIT_WORK(&rx_info->rx_cleanup_work,
-                       (work_func_t)(bnad_rx_cleanup));
+       INIT_WORK(&rx_info->rx_cleanup_work, bnad_rx_cleanup);
  
         /*
          * Init NAPI, so that state is set to NAPI_STATE_SCHED,
diff --git a/drivers/net/ethernet/cisco/enic/vnic_vic.c b/drivers/net/ethernet/cisco/enic/vnic_vic.c

index 20fcb20b42edee5129fcf6e5edde9f3a0ecd2764..66b57783533897e6399ec12505441004646b8ebc 100644 (file)
--- a/drivers/net/ethernet/cisco/enic/vnic_vic.c
+++ b/drivers/net/ethernet/cisco/enic/vnic_vic.c
@@ -49,7 +49,8 @@ int vic_provinfo_add_tlv(struct vic_provinfo *vp, u16 type, u16 length,
  
         tlv->type = htons(type);
         tlv->length = htons(length);
-       memcpy(tlv->value, value, length);
+       unsafe_memcpy(tlv->value, value, length,
+                     /* Flexible array of flexible arrays */);
  
         vp->num_tlvs = htonl(ntohl(vp->num_tlvs) + 1);
         vp->length = htonl(ntohl(vp->length) +
diff --git a/drivers/net/ethernet/engleder/tsnep_main.c b/drivers/net/ethernet/engleder/tsnep_main.c

index 9aeff2b37a61299587c22dc39b207204c3412543..64eadd3207983a671332d47cc4fdc966561d8425 100644 (file)
--- a/drivers/net/ethernet/engleder/tsnep_main.c
+++ b/drivers/net/ethernet/engleder/tsnep_main.c
@@ -719,17 +719,25 @@ static void tsnep_xdp_xmit_flush(struct tsnep_tx *tx)
  
  static bool tsnep_xdp_xmit_back(struct tsnep_adapter *adapter,
                                 struct xdp_buff *xdp,
-                               struct netdev_queue *tx_nq, struct tsnep_tx *tx)
+                               struct netdev_queue *tx_nq, struct tsnep_tx *tx,
+                               bool zc)
  {
         struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp);
         bool xmit;
+       u32 type;
  
         if (unlikely(!xdpf))
                 return false;
  
+       /* no page pool for zero copy */
+       if (zc)
+               type = TSNEP_TX_TYPE_XDP_NDO;
+       else
+               type = TSNEP_TX_TYPE_XDP_TX;
+
         __netif_tx_lock(tx_nq, smp_processor_id());
  
-       xmit = tsnep_xdp_xmit_frame_ring(xdpf, tx, TSNEP_TX_TYPE_XDP_TX);
+       xmit = tsnep_xdp_xmit_frame_ring(xdpf, tx, type);
  
         /* Avoid transmit queue timeout since we share it with the slow path */
         if (xmit)
@@ -1273,7 +1281,7 @@ static bool tsnep_xdp_run_prog(struct tsnep_rx *rx, struct bpf_prog *prog,
         case XDP_PASS:
                 return false;
         case XDP_TX:
-               if (!tsnep_xdp_xmit_back(rx->adapter, xdp, tx_nq, tx))
+               if (!tsnep_xdp_xmit_back(rx->adapter, xdp, tx_nq, tx, false))
                         goto out_failure;
                 *status |= TSNEP_XDP_TX;
                 return true;
@@ -1323,7 +1331,7 @@ static bool tsnep_xdp_run_prog_zc(struct tsnep_rx *rx, struct bpf_prog *prog,
         case XDP_PASS:
                 return false;
         case XDP_TX:
-               if (!tsnep_xdp_xmit_back(rx->adapter, xdp, tx_nq, tx))
+               if (!tsnep_xdp_xmit_back(rx->adapter, xdp, tx_nq, tx, true))
                         goto out_failure;
                 *status |= TSNEP_XDP_TX;
                 return true;
diff --git a/drivers/net/ethernet/freescale/fman/fman_memac.c b/drivers/net/ethernet/freescale/fman/fman_memac.c

index 9ba15d3183d75726fd88fa6b27a6efaf1fc30790..758535adc9ff5bb0a043683e1875ff1f3c9c2005 100644 (file)
--- a/drivers/net/ethernet/freescale/fman/fman_memac.c
+++ b/drivers/net/ethernet/freescale/fman/fman_memac.c
@@ -1073,6 +1073,14 @@ int memac_initialization(struct mac_device *mac_dev,
         unsigned long            capabilities;
         unsigned long           *supported;
  
+       /* The internal connection to the serdes is XGMII, but this isn't
+        * really correct for the phy mode (which is the external connection).
+        * However, this is how all older device trees say that they want
+        * 10GBASE-R (aka XFI), so just convert it for them.
+        */
+       if (mac_dev->phy_if == PHY_INTERFACE_MODE_XGMII)
+               mac_dev->phy_if = PHY_INTERFACE_MODE_10GBASER;
+
         mac_dev->phylink_ops            = &memac_mac_ops;
         mac_dev->set_promisc            = memac_set_promiscuous;
         mac_dev->change_addr            = memac_modify_mac_address;
@@ -1139,7 +1147,7 @@ int memac_initialization(struct mac_device *mac_dev,
          * (and therefore that xfi_pcs cannot be set). If we are defaulting to
          * XGMII, assume this is for XFI. Otherwise, assume it is for SGMII.
          */
-       if (err && mac_dev->phy_if == PHY_INTERFACE_MODE_XGMII)
+       if (err && mac_dev->phy_if == PHY_INTERFACE_MODE_10GBASER)
                 memac->xfi_pcs = pcs;
         else
                 memac->sgmii_pcs = pcs;
@@ -1153,14 +1161,6 @@ int memac_initialization(struct mac_device *mac_dev,
                 goto _return_fm_mac_free;
         }
  
-       /* The internal connection to the serdes is XGMII, but this isn't
-        * really correct for the phy mode (which is the external connection).
-        * However, this is how all older device trees say that they want
-        * 10GBASE-R (aka XFI), so just convert it for them.
-        */
-       if (mac_dev->phy_if == PHY_INTERFACE_MODE_XGMII)
-               mac_dev->phy_if = PHY_INTERFACE_MODE_10GBASER;
-
         /* TODO: The following interface modes are supported by (some) hardware
          * but not by this driver:
          * - 1000BASE-KX
diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c

index 7a8dc5386ffff9bd99d94eced337cf276551a88f..76615d47e055aebc9fcea0d365b28b4389337c07 100644 (file)
--- a/drivers/net/ethernet/google/gve/gve_rx.c
+++ b/drivers/net/ethernet/google/gve/gve_rx.c
@@ -356,7 +356,7 @@ static enum pkt_hash_types gve_rss_type(__be16 pkt_flags)
  
  static struct sk_buff *gve_rx_add_frags(struct napi_struct *napi,
                                         struct gve_rx_slot_page_info *page_info,
-                                       u16 packet_buffer_size, u16 len,
+                                       unsigned int truesize, u16 len,
                                         struct gve_rx_ctx *ctx)
  {
         u32 offset = page_info->page_offset + page_info->pad;
@@ -389,10 +389,10 @@ static struct sk_buff *gve_rx_add_frags(struct napi_struct *napi,
         if (skb != ctx->skb_head) {
                 ctx->skb_head->len += len;
                 ctx->skb_head->data_len += len;
-               ctx->skb_head->truesize += packet_buffer_size;
+               ctx->skb_head->truesize += truesize;
         }
         skb_add_rx_frag(skb, num_frags, page_info->page,
-                       offset, len, packet_buffer_size);
+                       offset, len, truesize);
  
         return ctx->skb_head;
  }
@@ -486,7 +486,7 @@ static struct sk_buff *gve_rx_copy_to_pool(struct gve_rx_ring *rx,
  
                 memcpy(alloc_page_info.page_address, src, page_info->pad + len);
                 skb = gve_rx_add_frags(napi, &alloc_page_info,
-                                      rx->packet_buffer_size,
+                                      PAGE_SIZE,
                                        len, ctx);
  
                 u64_stats_update_begin(&rx->statss);
diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h b/drivers/net/ethernet/intel/e1000e/e1000.h

index a187582d22994c607915f1fe26f5374031444976..ba9c19e6994c9defdf06eada37091e09d10881fa 100644 (file)
--- a/drivers/net/ethernet/intel/e1000e/e1000.h
+++ b/drivers/net/ethernet/intel/e1000e/e1000.h
@@ -360,23 +360,43 @@ s32 e1000e_get_base_timinca(struct e1000_adapter *adapter, u32 *timinca);
   * As a result, a shift of INCVALUE_SHIFT_n is used to fit a value of
   * INCVALUE_n into the TIMINCA register allowing 32+8+(24-INCVALUE_SHIFT_n)
   * bits to count nanoseconds leaving the rest for fractional nonseconds.
+ *
+ * Any given INCVALUE also has an associated maximum adjustment value. This
+ * maximum adjustment value is the largest increase (or decrease) which can be
+ * safely applied without overflowing the INCVALUE. Since INCVALUE has
+ * a maximum range of 24 bits, its largest value is 0xFFFFFF.
+ *
+ * To understand where the maximum value comes from, consider the following
+ * equation:
+ *
+ *   new_incval = base_incval + (base_incval * adjustment) / 1billion
+ *
+ * To avoid overflow that means:
+ *   max_incval = base_incval + (base_incval * max_adj) / billion
+ *
+ * Re-arranging:
+ *   max_adj = floor(((max_incval - base_incval) * 1billion) / 1billion)
   */
  #define INCVALUE_96MHZ         125
  #define INCVALUE_SHIFT_96MHZ   17
  #define INCPERIOD_SHIFT_96MHZ  2
  #define INCPERIOD_96MHZ                (12 >> INCPERIOD_SHIFT_96MHZ)
+#define MAX_PPB_96MHZ          23999900 /* 23,999,900 ppb */
  
  #define INCVALUE_25MHZ         40
  #define INCVALUE_SHIFT_25MHZ   18
  #define INCPERIOD_25MHZ                1
+#define MAX_PPB_25MHZ          599999900 /* 599,999,900 ppb */
  
  #define INCVALUE_24MHZ         125
  #define INCVALUE_SHIFT_24MHZ   14
  #define INCPERIOD_24MHZ                3
+#define MAX_PPB_24MHZ          999999999 /* 999,999,999 ppb */
  
  #define INCVALUE_38400KHZ      26
  #define INCVALUE_SHIFT_38400KHZ        19
  #define INCPERIOD_38400KHZ     1
+#define MAX_PPB_38400KHZ       230769100 /* 230,769,100 ppb */
  
  /* Another drawback of scaling the incvalue by a large factor is the
   * 64-bit SYSTIM register overflows more quickly.  This is dealt with
diff --git a/drivers/net/ethernet/intel/e1000e/ptp.c b/drivers/net/ethernet/intel/e1000e/ptp.c

index 02d871bc112a739cec1baffba5b63abaf14f4a7d..bbcfd529399b0fa938037858b1e2f7912f8e5a58 100644 (file)
--- a/drivers/net/ethernet/intel/e1000e/ptp.c
+++ b/drivers/net/ethernet/intel/e1000e/ptp.c
@@ -280,8 +280,17 @@ void e1000e_ptp_init(struct e1000_adapter *adapter)
  
         switch (hw->mac.type) {
         case e1000_pch2lan:
+               adapter->ptp_clock_info.max_adj = MAX_PPB_96MHZ;
+               break;
         case e1000_pch_lpt:
+               if (er32(TSYNCRXCTL) & E1000_TSYNCRXCTL_SYSCFI)
+                       adapter->ptp_clock_info.max_adj = MAX_PPB_96MHZ;
+               else
+                       adapter->ptp_clock_info.max_adj = MAX_PPB_25MHZ;
+               break;
         case e1000_pch_spt:
+               adapter->ptp_clock_info.max_adj = MAX_PPB_24MHZ;
+               break;
         case e1000_pch_cnp:
         case e1000_pch_tgp:
         case e1000_pch_adp:
@@ -289,15 +298,14 @@ void e1000e_ptp_init(struct e1000_adapter *adapter)
         case e1000_pch_lnp:
         case e1000_pch_ptp:
         case e1000_pch_nvp:
-               if ((hw->mac.type < e1000_pch_lpt) ||
-                   (er32(TSYNCRXCTL) & E1000_TSYNCRXCTL_SYSCFI)) {
-                       adapter->ptp_clock_info.max_adj = 24000000 - 1;
-                       break;
-               }
-               fallthrough;
+               if (er32(TSYNCRXCTL) & E1000_TSYNCRXCTL_SYSCFI)
+                       adapter->ptp_clock_info.max_adj = MAX_PPB_24MHZ;
+               else
+                       adapter->ptp_clock_info.max_adj = MAX_PPB_38400KHZ;
+               break;
         case e1000_82574:
         case e1000_82583:
-               adapter->ptp_clock_info.max_adj = 600000000 - 1;
+               adapter->ptp_clock_info.max_adj = MAX_PPB_25MHZ;
                 break;
         default:
                 break;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb.c b/drivers/net/ethernet/intel/i40e/i40e_dcb.c

index 9d88ed6105fd8f25ac8724827a9467f5043ee8b5..8db1eb0c1768c9869d7bf09820b4324b91bf788c 100644 (file)
--- a/drivers/net/ethernet/intel/i40e/i40e_dcb.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_dcb.c
@@ -1523,7 +1523,7 @@ void i40e_dcb_hw_rx_ets_bw_config(struct i40e_hw *hw, u8 *bw_share,
                 reg = rd32(hw, I40E_PRTDCB_RETSTCC(i));
                 reg &= ~(I40E_PRTDCB_RETSTCC_BWSHARE_MASK     |
                          I40E_PRTDCB_RETSTCC_UPINTC_MODE_MASK |
-                        I40E_PRTDCB_RETSTCC_ETSTC_SHIFT);
+                        I40E_PRTDCB_RETSTCC_ETSTC_MASK);
                 reg |= FIELD_PREP(I40E_PRTDCB_RETSTCC_BWSHARE_MASK,
                                   bw_share[i]);
                 reg |= FIELD_PREP(I40E_PRTDCB_RETSTCC_UPINTC_MODE_MASK,
diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb.h b/drivers/net/ethernet/intel/i40e/i40e_dcb.h

index 6b60dc9b77361a2537466c18c8d78eafbc35a01a..d76497566e40e739fd7eba7773fbb84c02ddf93b 100644 (file)
--- a/drivers/net/ethernet/intel/i40e/i40e_dcb.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_dcb.h
@@ -43,7 +43,7 @@
  #define I40E_LLDP_TLV_SUBTYPE_SHIFT    0
  #define I40E_LLDP_TLV_SUBTYPE_MASK     (0xFF << I40E_LLDP_TLV_SUBTYPE_SHIFT)
  #define I40E_LLDP_TLV_OUI_SHIFT                8
-#define I40E_LLDP_TLV_OUI_MASK         (0xFFFFFF << I40E_LLDP_TLV_OUI_SHIFT)
+#define I40E_LLDP_TLV_OUI_MASK         (0xFFFFFFU << I40E_LLDP_TLV_OUI_SHIFT)
  
  /* Defines for IEEE ETS TLV */
  #define I40E_IEEE_ETS_MAXTC_SHIFT      0
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c

index 6e7fd473abfd001eb45e8b5bda8978fff9eec26b..54eb55464e3151f00665f9b780c65a1861f8ca07 100644 (file)
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -4926,27 +4926,23 @@ int i40e_vsi_start_rings(struct i40e_vsi *vsi)
  void i40e_vsi_stop_rings(struct i40e_vsi *vsi)
  {
         struct i40e_pf *pf = vsi->back;
-       int pf_q, err, q_end;
+       u32 pf_q, tx_q_end, rx_q_end;
  
         /* When port TX is suspended, don't wait */
         if (test_bit(__I40E_PORT_SUSPENDED, vsi->back->state))
                 return i40e_vsi_stop_rings_no_wait(vsi);
  
-       q_end = vsi->base_queue + vsi->num_queue_pairs;
-       for (pf_q = vsi->base_queue; pf_q < q_end; pf_q++)
-               i40e_pre_tx_queue_cfg(&pf->hw, (u32)pf_q, false);
+       tx_q_end = vsi->base_queue +
+               vsi->alloc_queue_pairs * (i40e_enabled_xdp_vsi(vsi) ? 2 : 1);
+       for (pf_q = vsi->base_queue; pf_q < tx_q_end; pf_q++)
+               i40e_pre_tx_queue_cfg(&pf->hw, pf_q, false);
  
-       for (pf_q = vsi->base_queue; pf_q < q_end; pf_q++) {
-               err = i40e_control_wait_rx_q(pf, pf_q, false);
-               if (err)
-                       dev_info(&pf->pdev->dev,
-                                "VSI seid %d Rx ring %d disable timeout\n",
-                                vsi->seid, pf_q);
-       }
+       rx_q_end = vsi->base_queue + vsi->num_queue_pairs;
+       for (pf_q = vsi->base_queue; pf_q < rx_q_end; pf_q++)
+               i40e_control_rx_q(pf, pf_q, false);
  
         msleep(I40E_DISABLE_TX_GAP_MSEC);
-       pf_q = vsi->base_queue;
-       for (pf_q = vsi->base_queue; pf_q < q_end; pf_q++)
+       for (pf_q = vsi->base_queue; pf_q < tx_q_end; pf_q++)
                 wr32(&pf->hw, I40E_QTX_ENA(pf_q), 0);
  
         i40e_vsi_wait_queues_disabled(vsi);
@@ -5360,7 +5356,7 @@ static int i40e_pf_wait_queues_disabled(struct i40e_pf *pf)
  {
         int v, ret = 0;
  
-       for (v = 0; v < pf->hw.func_caps.num_vsis; v++) {
+       for (v = 0; v < pf->num_alloc_vsi; v++) {
                 if (pf->vsi[v]) {
                         ret = i40e_vsi_wait_queues_disabled(pf->vsi[v]);
                         if (ret)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c

index 908cdbd3ec5d4fafe26fa03b4a41433f04ef5d31..b34c7177088745468ad33817302a631a5162322a 100644 (file)
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -2848,6 +2848,24 @@ error_param:
                                       (u8 *)&stats, sizeof(stats));
  }
  
+/**
+ * i40e_can_vf_change_mac
+ * @vf: pointer to the VF info
+ *
+ * Return true if the VF is allowed to change its MAC filters, false otherwise
+ */
+static bool i40e_can_vf_change_mac(struct i40e_vf *vf)
+{
+       /* If the VF MAC address has been set administratively (via the
+        * ndo_set_vf_mac command), then deny permission to the VF to
+        * add/delete unicast MAC addresses, unless the VF is trusted
+        */
+       if (vf->pf_set_mac && !vf->trusted)
+               return false;
+
+       return true;
+}
+
  #define I40E_MAX_MACVLAN_PER_HW 3072
  #define I40E_MAX_MACVLAN_PER_PF(num_ports) (I40E_MAX_MACVLAN_PER_HW /  \
         (num_ports))
@@ -2907,8 +2925,8 @@ static inline int i40e_check_vf_permission(struct i40e_vf *vf,
                  * The VF may request to set the MAC address filter already
                  * assigned to it so do not return an error in that case.
                  */
-               if (!test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps) &&
-                   !is_multicast_ether_addr(addr) && vf->pf_set_mac &&
+               if (!i40e_can_vf_change_mac(vf) &&
+                   !is_multicast_ether_addr(addr) &&
                     !ether_addr_equal(addr, vf->default_lan_addr.addr)) {
                         dev_err(&pf->pdev->dev,
                                 "VF attempting to override administratively set MAC address, bring down and up the VF interface to resume normal operation\n");
@@ -3114,19 +3132,29 @@ static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg)
                         ret = -EINVAL;
                         goto error_param;
                 }
-               if (ether_addr_equal(al->list[i].addr, vf->default_lan_addr.addr))
-                       was_unimac_deleted = true;
         }
         vsi = pf->vsi[vf->lan_vsi_idx];
  
         spin_lock_bh(&vsi->mac_filter_hash_lock);
         /* delete addresses from the list */
-       for (i = 0; i < al->num_elements; i++)
+       for (i = 0; i < al->num_elements; i++) {
+               const u8 *addr = al->list[i].addr;
+
+               /* Allow to delete VF primary MAC only if it was not set
+                * administratively by PF or if VF is trusted.
+                */
+               if (ether_addr_equal(addr, vf->default_lan_addr.addr) &&
+                   i40e_can_vf_change_mac(vf))
+                       was_unimac_deleted = true;
+               else
+                       continue;
+
                 if (i40e_del_mac_filter(vsi, al->list[i].addr)) {
                         ret = -EINVAL;
                         spin_unlock_bh(&vsi->mac_filter_hash_lock);
                         goto error_param;
                 }
+       }
  
         spin_unlock_bh(&vsi->mac_filter_hash_lock);
  
diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c

index 7ac847718882e29b38071ca6b8adb47ca063f1d7..c979192e44d108b370ad132ec900c19d8452db32 100644 (file)
--- a/drivers/net/ethernet/intel/ice/ice_base.c
+++ b/drivers/net/ethernet/intel/ice/ice_base.c
@@ -190,15 +190,13 @@ static void ice_free_q_vector(struct ice_vsi *vsi, int v_idx)
         q_vector = vsi->q_vectors[v_idx];
  
         ice_for_each_tx_ring(tx_ring, q_vector->tx) {
-               if (vsi->netdev)
-                       netif_queue_set_napi(vsi->netdev, tx_ring->q_index,
-                                            NETDEV_QUEUE_TYPE_TX, NULL);
+               ice_queue_set_napi(vsi, tx_ring->q_index, NETDEV_QUEUE_TYPE_TX,
+                                  NULL);
                 tx_ring->q_vector = NULL;
         }
         ice_for_each_rx_ring(rx_ring, q_vector->rx) {
-               if (vsi->netdev)
-                       netif_queue_set_napi(vsi->netdev, rx_ring->q_index,
-                                            NETDEV_QUEUE_TYPE_RX, NULL);
+               ice_queue_set_napi(vsi, rx_ring->q_index, NETDEV_QUEUE_TYPE_RX,
+                                  NULL);
                 rx_ring->q_vector = NULL;
         }
  
diff --git a/drivers/net/ethernet/intel/ice/ice_dpll.c b/drivers/net/ethernet/intel/ice/ice_dpll.c

index b9c5eced6326f8fe3958c446f3e8b8bb0c517f90..adfa1f2a80a667455ebefde181c2da9c5d73864b 100644 (file)
--- a/drivers/net/ethernet/intel/ice/ice_dpll.c
+++ b/drivers/net/ethernet/intel/ice/ice_dpll.c
@@ -30,6 +30,26 @@ static const char * const pin_type_name[] = {
         [ICE_DPLL_PIN_TYPE_RCLK_INPUT] = "rclk-input",
  };
  
+/**
+ * ice_dpll_is_reset - check if reset is in progress
+ * @pf: private board structure
+ * @extack: error reporting
+ *
+ * If reset is in progress, fill extack with error.
+ *
+ * Return:
+ * * false - no reset in progress
+ * * true - reset in progress
+ */
+static bool ice_dpll_is_reset(struct ice_pf *pf, struct netlink_ext_ack *extack)
+{
+       if (ice_is_reset_in_progress(pf->state)) {
+               NL_SET_ERR_MSG(extack, "PF reset in progress");
+               return true;
+       }
+       return false;
+}
+
  /**
   * ice_dpll_pin_freq_set - set pin's frequency
   * @pf: private board structure
@@ -109,6 +129,9 @@ ice_dpll_frequency_set(const struct dpll_pin *pin, void *pin_priv,
         struct ice_pf *pf = d->pf;
         int ret;
  
+       if (ice_dpll_is_reset(pf, extack))
+               return -EBUSY;
+
         mutex_lock(&pf->dplls.lock);
         ret = ice_dpll_pin_freq_set(pf, p, pin_type, frequency, extack);
         mutex_unlock(&pf->dplls.lock);
@@ -254,6 +277,7 @@ ice_dpll_output_frequency_get(const struct dpll_pin *pin, void *pin_priv,
   * ice_dpll_pin_enable - enable a pin on dplls
   * @hw: board private hw structure
   * @pin: pointer to a pin
+ * @dpll_idx: dpll index to connect to output pin
   * @pin_type: type of pin being enabled
   * @extack: error reporting
   *
@@ -266,7 +290,7 @@ ice_dpll_output_frequency_get(const struct dpll_pin *pin, void *pin_priv,
   */
  static int
  ice_dpll_pin_enable(struct ice_hw *hw, struct ice_dpll_pin *pin,
-                   enum ice_dpll_pin_type pin_type,
+                   u8 dpll_idx, enum ice_dpll_pin_type pin_type,
                     struct netlink_ext_ack *extack)
  {
         u8 flags = 0;
@@ -280,10 +304,12 @@ ice_dpll_pin_enable(struct ice_hw *hw, struct ice_dpll_pin *pin,
                 ret = ice_aq_set_input_pin_cfg(hw, pin->idx, 0, flags, 0, 0);
                 break;
         case ICE_DPLL_PIN_TYPE_OUTPUT:
+               flags = ICE_AQC_SET_CGU_OUT_CFG_UPDATE_SRC_SEL;
                 if (pin->flags[0] & ICE_AQC_GET_CGU_OUT_CFG_ESYNC_EN)
                         flags |= ICE_AQC_SET_CGU_OUT_CFG_ESYNC_EN;
                 flags |= ICE_AQC_SET_CGU_OUT_CFG_OUT_EN;
-               ret = ice_aq_set_output_pin_cfg(hw, pin->idx, flags, 0, 0, 0);
+               ret = ice_aq_set_output_pin_cfg(hw, pin->idx, flags, dpll_idx,
+                                               0, 0);
                 break;
         default:
                 return -EINVAL;
@@ -370,7 +396,7 @@ ice_dpll_pin_state_update(struct ice_pf *pf, struct ice_dpll_pin *pin,
         case ICE_DPLL_PIN_TYPE_INPUT:
                 ret = ice_aq_get_input_pin_cfg(&pf->hw, pin->idx, NULL, NULL,
                                                NULL, &pin->flags[0],
-                                              &pin->freq, NULL);
+                                              &pin->freq, &pin->phase_adjust);
                 if (ret)
                         goto err;
                 if (ICE_AQC_GET_CGU_IN_CFG_FLG2_INPUT_EN & pin->flags[0]) {
@@ -398,14 +424,27 @@ ice_dpll_pin_state_update(struct ice_pf *pf, struct ice_dpll_pin *pin,
                 break;
         case ICE_DPLL_PIN_TYPE_OUTPUT:
                 ret = ice_aq_get_output_pin_cfg(&pf->hw, pin->idx,
-                                               &pin->flags[0], NULL,
+                                               &pin->flags[0], &parent,
                                                 &pin->freq, NULL);
                 if (ret)
                         goto err;
-               if (ICE_AQC_SET_CGU_OUT_CFG_OUT_EN & pin->flags[0])
-                       pin->state[0] = DPLL_PIN_STATE_CONNECTED;
-               else
-                       pin->state[0] = DPLL_PIN_STATE_DISCONNECTED;
+
+               parent &= ICE_AQC_GET_CGU_OUT_CFG_DPLL_SRC_SEL;
+               if (ICE_AQC_SET_CGU_OUT_CFG_OUT_EN & pin->flags[0]) {
+                       pin->state[pf->dplls.eec.dpll_idx] =
+                               parent == pf->dplls.eec.dpll_idx ?
+                               DPLL_PIN_STATE_CONNECTED :
+                               DPLL_PIN_STATE_DISCONNECTED;
+                       pin->state[pf->dplls.pps.dpll_idx] =
+                               parent == pf->dplls.pps.dpll_idx ?
+                               DPLL_PIN_STATE_CONNECTED :
+                               DPLL_PIN_STATE_DISCONNECTED;
+               } else {
+                       pin->state[pf->dplls.eec.dpll_idx] =
+                               DPLL_PIN_STATE_DISCONNECTED;
+                       pin->state[pf->dplls.pps.dpll_idx] =
+                               DPLL_PIN_STATE_DISCONNECTED;
+               }
                 break;
         case ICE_DPLL_PIN_TYPE_RCLK_INPUT:
                 for (parent = 0; parent < pf->dplls.rclk.num_parents;
@@ -568,9 +607,13 @@ ice_dpll_pin_state_set(const struct dpll_pin *pin, void *pin_priv,
         struct ice_pf *pf = d->pf;
         int ret;
  
+       if (ice_dpll_is_reset(pf, extack))
+               return -EBUSY;
+
         mutex_lock(&pf->dplls.lock);
         if (enable)
-               ret = ice_dpll_pin_enable(&pf->hw, p, pin_type, extack);
+               ret = ice_dpll_pin_enable(&pf->hw, p, d->dpll_idx, pin_type,
+                                         extack);
         else
                 ret = ice_dpll_pin_disable(&pf->hw, p, pin_type, extack);
         if (!ret)
@@ -603,6 +646,11 @@ ice_dpll_output_state_set(const struct dpll_pin *pin, void *pin_priv,
                           struct netlink_ext_ack *extack)
  {
         bool enable = state == DPLL_PIN_STATE_CONNECTED;
+       struct ice_dpll_pin *p = pin_priv;
+       struct ice_dpll *d = dpll_priv;
+
+       if (!enable && p->state[d->dpll_idx] == DPLL_PIN_STATE_DISCONNECTED)
+               return 0;
  
         return ice_dpll_pin_state_set(pin, pin_priv, dpll, dpll_priv, enable,
                                       extack, ICE_DPLL_PIN_TYPE_OUTPUT);
@@ -665,14 +713,16 @@ ice_dpll_pin_state_get(const struct dpll_pin *pin, void *pin_priv,
         struct ice_pf *pf = d->pf;
         int ret;
  
+       if (ice_dpll_is_reset(pf, extack))
+               return -EBUSY;
+
         mutex_lock(&pf->dplls.lock);
         ret = ice_dpll_pin_state_update(pf, p, pin_type, extack);
         if (ret)
                 goto unlock;
-       if (pin_type == ICE_DPLL_PIN_TYPE_INPUT)
+       if (pin_type == ICE_DPLL_PIN_TYPE_INPUT ||
+           pin_type == ICE_DPLL_PIN_TYPE_OUTPUT)
                 *state = p->state[d->dpll_idx];
-       else if (pin_type == ICE_DPLL_PIN_TYPE_OUTPUT)
-               *state = p->state[0];
         ret = 0;
  unlock:
         mutex_unlock(&pf->dplls.lock);
@@ -790,6 +840,9 @@ ice_dpll_input_prio_set(const struct dpll_pin *pin, void *pin_priv,
         struct ice_pf *pf = d->pf;
         int ret;
  
+       if (ice_dpll_is_reset(pf, extack))
+               return -EBUSY;
+
         mutex_lock(&pf->dplls.lock);
         ret = ice_dpll_hw_input_prio_set(pf, d, p, prio, extack);
         mutex_unlock(&pf->dplls.lock);
@@ -910,6 +963,9 @@ ice_dpll_pin_phase_adjust_set(const struct dpll_pin *pin, void *pin_priv,
         u8 flag, flags_en = 0;
         int ret;
  
+       if (ice_dpll_is_reset(pf, extack))
+               return -EBUSY;
+
         mutex_lock(&pf->dplls.lock);
         switch (type) {
         case ICE_DPLL_PIN_TYPE_INPUT:
@@ -1069,6 +1125,9 @@ ice_dpll_rclk_state_on_pin_set(const struct dpll_pin *pin, void *pin_priv,
         int ret = -EINVAL;
         u32 hw_idx;
  
+       if (ice_dpll_is_reset(pf, extack))
+               return -EBUSY;
+
         mutex_lock(&pf->dplls.lock);
         hw_idx = parent->idx - pf->dplls.base_rclk_idx;
         if (hw_idx >= pf->dplls.num_inputs)
@@ -1123,6 +1182,9 @@ ice_dpll_rclk_state_on_pin_get(const struct dpll_pin *pin, void *pin_priv,
         int ret = -EINVAL;
         u32 hw_idx;
  
+       if (ice_dpll_is_reset(pf, extack))
+               return -EBUSY;
+
         mutex_lock(&pf->dplls.lock);
         hw_idx = parent->idx - pf->dplls.base_rclk_idx;
         if (hw_idx >= pf->dplls.num_inputs)
@@ -1305,8 +1367,10 @@ static void ice_dpll_periodic_work(struct kthread_work *work)
         struct ice_pf *pf = container_of(d, struct ice_pf, dplls);
         struct ice_dpll *de = &pf->dplls.eec;
         struct ice_dpll *dp = &pf->dplls.pps;
-       int ret;
+       int ret = 0;
  
+       if (ice_is_reset_in_progress(pf->state))
+               goto resched;
         mutex_lock(&pf->dplls.lock);
         ret = ice_dpll_update_state(pf, de, false);
         if (!ret)
@@ -1326,6 +1390,7 @@ static void ice_dpll_periodic_work(struct kthread_work *work)
         ice_dpll_notify_changes(de);
         ice_dpll_notify_changes(dp);
  
+resched:
         /* Run twice a second or reschedule if update failed */
         kthread_queue_delayed_work(d->kworker, &d->work,
                                    ret ? msecs_to_jiffies(10) :
diff --git a/drivers/net/ethernet/intel/ice/ice_lag.c b/drivers/net/ethernet/intel/ice/ice_lag.c

index 2a25323105e5b9bd5a1dbf072f097ddd90872210..467372d541d21f9c26416275f945b12e684079ef 100644 (file)
--- a/drivers/net/ethernet/intel/ice/ice_lag.c
+++ b/drivers/net/ethernet/intel/ice/ice_lag.c
@@ -151,6 +151,27 @@ ice_lag_find_hw_by_lport(struct ice_lag *lag, u8 lport)
         return NULL;
  }
  
+/**
+ * ice_pkg_has_lport_extract - check if lport extraction supported
+ * @hw: HW struct
+ */
+static bool ice_pkg_has_lport_extract(struct ice_hw *hw)
+{
+       int i;
+
+       for (i = 0; i < hw->blk[ICE_BLK_SW].es.count; i++) {
+               u16 offset;
+               u8 fv_prot;
+
+               ice_find_prot_off(hw, ICE_BLK_SW, ICE_SW_DEFAULT_PROFILE, i,
+                                 &fv_prot, &offset);
+               if (fv_prot == ICE_FV_PROT_MDID &&
+                   offset == ICE_LP_EXT_BUF_OFFSET)
+                       return true;
+       }
+       return false;
+}
+
  /**
   * ice_lag_find_primary - returns pointer to primary interfaces lag struct
   * @lag: local interfaces lag struct
@@ -1206,7 +1227,7 @@ static void ice_lag_del_prune_list(struct ice_lag *lag, struct ice_pf *event_pf)
  }
  
  /**
- * ice_lag_init_feature_support_flag - Check for NVM support for LAG
+ * ice_lag_init_feature_support_flag - Check for package and NVM support for LAG
   * @pf: PF struct
   */
  static void ice_lag_init_feature_support_flag(struct ice_pf *pf)
@@ -1219,7 +1240,7 @@ static void ice_lag_init_feature_support_flag(struct ice_pf *pf)
         else
                 ice_clear_feature_support(pf, ICE_F_ROCE_LAG);
  
-       if (caps->sriov_lag)
+       if (caps->sriov_lag && ice_pkg_has_lport_extract(&pf->hw))
                 ice_set_feature_support(pf, ICE_F_SRIOV_LAG);
         else
                 ice_clear_feature_support(pf, ICE_F_SRIOV_LAG);
diff --git a/drivers/net/ethernet/intel/ice/ice_lag.h b/drivers/net/ethernet/intel/ice/ice_lag.h

index ede833dfa65866da00d8f4a6d77a90470906f863..183b38792ef22d9ac54daa0ea4a8156039681e7c 100644 (file)
--- a/drivers/net/ethernet/intel/ice/ice_lag.h
+++ b/drivers/net/ethernet/intel/ice/ice_lag.h
@@ -17,6 +17,9 @@ enum ice_lag_role {
  #define ICE_LAG_INVALID_PORT 0xFF
  
  #define ICE_LAG_RESET_RETRIES          5
+#define ICE_SW_DEFAULT_PROFILE         0
+#define ICE_FV_PROT_MDID               255
+#define ICE_LP_EXT_BUF_OFFSET          32
  
  struct ice_pf;
  struct ice_vf;
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c

index 9be724291ef82ac7e05c198d9febe029b4946a5e..097bf8fd6bf0edc5599edbf3fe9f3cf2e05e212a 100644 (file)
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -2426,7 +2426,7 @@ ice_vsi_cfg_def(struct ice_vsi *vsi, struct ice_vsi_cfg_params *params)
                 ice_vsi_map_rings_to_vectors(vsi);
  
                 /* Associate q_vector rings to napi */
-               ice_vsi_set_napi_queues(vsi, true);
+               ice_vsi_set_napi_queues(vsi);
  
                 vsi->stat_offsets_loaded = false;
  
@@ -2904,19 +2904,19 @@ void ice_vsi_dis_irq(struct ice_vsi *vsi)
  }
  
  /**
- * ice_queue_set_napi - Set the napi instance for the queue
+ * __ice_queue_set_napi - Set the napi instance for the queue
   * @dev: device to which NAPI and queue belong
   * @queue_index: Index of queue
   * @type: queue type as RX or TX
   * @napi: NAPI context
   * @locked: is the rtnl_lock already held
   *
- * Set the napi instance for the queue
+ * Set the napi instance for the queue. Caller indicates the lock status.
   */
  static void
-ice_queue_set_napi(struct net_device *dev, unsigned int queue_index,
-                  enum netdev_queue_type type, struct napi_struct *napi,
-                  bool locked)
+__ice_queue_set_napi(struct net_device *dev, unsigned int queue_index,
+                    enum netdev_queue_type type, struct napi_struct *napi,
+                    bool locked)
  {
         if (!locked)
                 rtnl_lock();
@@ -2926,26 +2926,79 @@ ice_queue_set_napi(struct net_device *dev, unsigned int queue_index,
  }
  
  /**
- * ice_q_vector_set_napi_queues - Map queue[s] associated with the napi
+ * ice_queue_set_napi - Set the napi instance for the queue
+ * @vsi: VSI being configured
+ * @queue_index: Index of queue
+ * @type: queue type as RX or TX
+ * @napi: NAPI context
+ *
+ * Set the napi instance for the queue. The rtnl lock state is derived from the
+ * execution path.
+ */
+void
+ice_queue_set_napi(struct ice_vsi *vsi, unsigned int queue_index,
+                  enum netdev_queue_type type, struct napi_struct *napi)
+{
+       struct ice_pf *pf = vsi->back;
+
+       if (!vsi->netdev)
+               return;
+
+       if (current_work() == &pf->serv_task ||
+           test_bit(ICE_PREPARED_FOR_RESET, pf->state) ||
+           test_bit(ICE_DOWN, pf->state) ||
+           test_bit(ICE_SUSPENDED, pf->state))
+               __ice_queue_set_napi(vsi->netdev, queue_index, type, napi,
+                                    false);
+       else
+               __ice_queue_set_napi(vsi->netdev, queue_index, type, napi,
+                                    true);
+}
+
+/**
+ * __ice_q_vector_set_napi_queues - Map queue[s] associated with the napi
   * @q_vector: q_vector pointer
   * @locked: is the rtnl_lock already held
   *
+ * Associate the q_vector napi with all the queue[s] on the vector.
+ * Caller indicates the lock status.
+ */
+void __ice_q_vector_set_napi_queues(struct ice_q_vector *q_vector, bool locked)
+{
+       struct ice_rx_ring *rx_ring;
+       struct ice_tx_ring *tx_ring;
+
+       ice_for_each_rx_ring(rx_ring, q_vector->rx)
+               __ice_queue_set_napi(q_vector->vsi->netdev, rx_ring->q_index,
+                                    NETDEV_QUEUE_TYPE_RX, &q_vector->napi,
+                                    locked);
+
+       ice_for_each_tx_ring(tx_ring, q_vector->tx)
+               __ice_queue_set_napi(q_vector->vsi->netdev, tx_ring->q_index,
+                                    NETDEV_QUEUE_TYPE_TX, &q_vector->napi,
+                                    locked);
+       /* Also set the interrupt number for the NAPI */
+       netif_napi_set_irq(&q_vector->napi, q_vector->irq.virq);
+}
+
+/**
+ * ice_q_vector_set_napi_queues - Map queue[s] associated with the napi
+ * @q_vector: q_vector pointer
+ *
   * Associate the q_vector napi with all the queue[s] on the vector
   */
-void ice_q_vector_set_napi_queues(struct ice_q_vector *q_vector, bool locked)
+void ice_q_vector_set_napi_queues(struct ice_q_vector *q_vector)
  {
         struct ice_rx_ring *rx_ring;
         struct ice_tx_ring *tx_ring;
  
         ice_for_each_rx_ring(rx_ring, q_vector->rx)
-               ice_queue_set_napi(q_vector->vsi->netdev, rx_ring->q_index,
-                                  NETDEV_QUEUE_TYPE_RX, &q_vector->napi,
-                                  locked);
+               ice_queue_set_napi(q_vector->vsi, rx_ring->q_index,
+                                  NETDEV_QUEUE_TYPE_RX, &q_vector->napi);
  
         ice_for_each_tx_ring(tx_ring, q_vector->tx)
-               ice_queue_set_napi(q_vector->vsi->netdev, tx_ring->q_index,
-                                  NETDEV_QUEUE_TYPE_TX, &q_vector->napi,
-                                  locked);
+               ice_queue_set_napi(q_vector->vsi, tx_ring->q_index,
+                                  NETDEV_QUEUE_TYPE_TX, &q_vector->napi);
         /* Also set the interrupt number for the NAPI */
         netif_napi_set_irq(&q_vector->napi, q_vector->irq.virq);
  }
@@ -2953,11 +3006,10 @@ void ice_q_vector_set_napi_queues(struct ice_q_vector *q_vector, bool locked)
  /**
   * ice_vsi_set_napi_queues
   * @vsi: VSI pointer
- * @locked: is the rtnl_lock already held
   *
   * Associate queue[s] with napi for all vectors
   */
-void ice_vsi_set_napi_queues(struct ice_vsi *vsi, bool locked)
+void ice_vsi_set_napi_queues(struct ice_vsi *vsi)
  {
         int i;
  
@@ -2965,7 +3017,7 @@ void ice_vsi_set_napi_queues(struct ice_vsi *vsi, bool locked)
                 return;
  
         ice_for_each_q_vector(vsi, i)
-               ice_q_vector_set_napi_queues(vsi->q_vectors[i], locked);
+               ice_q_vector_set_napi_queues(vsi->q_vectors[i]);
  }
  
  /**
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h

index 71bd27244941d549d9253af900629ccb36278072..bfcfc582a4c04ff143390e394d0b65a1d0970391 100644 (file)
--- a/drivers/net/ethernet/intel/ice/ice_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_lib.h
@@ -91,9 +91,15 @@ void ice_vsi_cfg_netdev_tc(struct ice_vsi *vsi, u8 ena_tc);
  struct ice_vsi *
  ice_vsi_setup(struct ice_pf *pf, struct ice_vsi_cfg_params *params);
  
-void ice_q_vector_set_napi_queues(struct ice_q_vector *q_vector, bool locked);
+void
+ice_queue_set_napi(struct ice_vsi *vsi, unsigned int queue_index,
+                  enum netdev_queue_type type, struct napi_struct *napi);
+
+void __ice_q_vector_set_napi_queues(struct ice_q_vector *q_vector, bool locked);
+
+void ice_q_vector_set_napi_queues(struct ice_q_vector *q_vector);
  
-void ice_vsi_set_napi_queues(struct ice_vsi *vsi, bool locked);
+void ice_vsi_set_napi_queues(struct ice_vsi *vsi);
  
  int ice_vsi_release(struct ice_vsi *vsi);
  
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c

index dd4a9bc0dfdc661b2d2f3c48a2df5b773e4f75bb..59c7e37f175fe80f060b4aff05df572529543f5d 100644 (file)
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -3495,7 +3495,7 @@ static void ice_napi_add(struct ice_vsi *vsi)
         ice_for_each_q_vector(vsi, v_idx) {
                 netif_napi_add(vsi->netdev, &vsi->q_vectors[v_idx]->napi,
                                ice_napi_poll);
-               ice_q_vector_set_napi_queues(vsi->q_vectors[v_idx], false);
+               __ice_q_vector_set_napi_queues(vsi->q_vectors[v_idx], false);
         }
  }
  
@@ -5447,6 +5447,7 @@ static int ice_reinit_interrupt_scheme(struct ice_pf *pf)
                 if (ret)
                         goto err_reinit;
                 ice_vsi_map_rings_to_vectors(pf->vsi[v]);
+               ice_vsi_set_napi_queues(pf->vsi[v]);
         }
  
         ret = ice_req_irq_msix_misc(pf);
diff --git a/drivers/net/ethernet/intel/ice/ice_osdep.h b/drivers/net/ethernet/intel/ice/ice_osdep.h

index 82bc54fec7f36400a9be1f6603da2770ab5bb2e5..a2562f04267f23695af92be0bca5c1174702bef3 100644 (file)
--- a/drivers/net/ethernet/intel/ice/ice_osdep.h
+++ b/drivers/net/ethernet/intel/ice/ice_osdep.h
@@ -24,7 +24,7 @@
  #define rd64(a, reg)           readq((a)->hw_addr + (reg))
  
  #define ice_flush(a)           rd32((a), GLGEN_STAT)
-#define ICE_M(m, s)            ((m) << (s))
+#define ICE_M(m, s)            ((m ## U) << (s))
  
  struct ice_dma_mem {
         void *va;
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h

index 41ab6d7bbd9ef923fb766555ba48c5533e989f93..a508e917ce5ffab9e092a62337fbe70b27efbc5e 100644 (file)
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -1072,7 +1072,7 @@ struct ice_aq_get_set_rss_lut_params {
  #define ICE_OROM_VER_BUILD_SHIFT       8
  #define ICE_OROM_VER_BUILD_MASK                (0xffff << ICE_OROM_VER_BUILD_SHIFT)
  #define ICE_OROM_VER_SHIFT             24
-#define ICE_OROM_VER_MASK              (0xff << ICE_OROM_VER_SHIFT)
+#define ICE_OROM_VER_MASK              (0xffU << ICE_OROM_VER_SHIFT)
  #define ICE_SR_PFA_PTR                 0x40
  #define ICE_SR_1ST_NVM_BANK_PTR                0x42
  #define ICE_SR_NVM_BANK_SIZE           0x43
diff --git a/drivers/net/ethernet/intel/idpf/virtchnl2.h b/drivers/net/ethernet/intel/idpf/virtchnl2.h

index 8dc837889723c8a8976fd537e79e7d6acd49c4a8..4a3c4454d25abad18582ea7b93c74b616ef5cf75 100644 (file)
--- a/drivers/net/ethernet/intel/idpf/virtchnl2.h
+++ b/drivers/net/ethernet/intel/idpf/virtchnl2.h
@@ -978,7 +978,7 @@ struct virtchnl2_ptype {
         u8 proto_id_count;
         __le16 pad;
         __le16 proto_id[];
-};
+} __packed __aligned(2);
  VIRTCHNL2_CHECK_STRUCT_LEN(6, virtchnl2_ptype);
  
  /**
diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h

index a2b759531cb7ba44720f000018597d5222bec900..3c2dc7bdebb50eb9f08ec49ce6590f2b35445e53 100644 (file)
--- a/drivers/net/ethernet/intel/igb/igb.h
+++ b/drivers/net/ethernet/intel/igb/igb.h
@@ -637,7 +637,7 @@ struct igb_adapter {
                 struct timespec64 period;
         } perout[IGB_N_PEROUT];
  
-       char fw_version[32];
+       char fw_version[48];
  #ifdef CONFIG_IGB_HWMON
         struct hwmon_buff *igb_hwmon_buff;
         bool ets;
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c

index 4df8d4153aa5f5ce7ac9dd566180d552be9f5b4f..cebb44f51d5f5bbd1177b0caeb1e08f7a2fc30db 100644 (file)
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -3069,7 +3069,6 @@ void igb_set_fw_version(struct igb_adapter *adapter)
  {
         struct e1000_hw *hw = &adapter->hw;
         struct e1000_fw_version fw;
-       char *lbuf;
  
         igb_get_fw_version(hw, &fw);
  
@@ -3077,34 +3076,36 @@ void igb_set_fw_version(struct igb_adapter *adapter)
         case e1000_i210:
         case e1000_i211:
                 if (!(igb_get_flash_presence_i210(hw))) {
-                       lbuf = kasprintf(GFP_KERNEL, "%2d.%2d-%d",
-                                        fw.invm_major, fw.invm_minor,
-                                        fw.invm_img_type);
+                       snprintf(adapter->fw_version,
+                                sizeof(adapter->fw_version),
+                                "%2d.%2d-%d",
+                                fw.invm_major, fw.invm_minor,
+                                fw.invm_img_type);
                         break;
                 }
                 fallthrough;
         default:
                 /* if option rom is valid, display its version too */
                 if (fw.or_valid) {
-                       lbuf = kasprintf(GFP_KERNEL, "%d.%d, 0x%08x, %d.%d.%d",
-                                        fw.eep_major, fw.eep_minor,
-                                        fw.etrack_id, fw.or_major, fw.or_build,
-                                        fw.or_patch);
+                       snprintf(adapter->fw_version,
+                                sizeof(adapter->fw_version),
+                                "%d.%d, 0x%08x, %d.%d.%d",
+                                fw.eep_major, fw.eep_minor, fw.etrack_id,
+                                fw.or_major, fw.or_build, fw.or_patch);
                 /* no option rom */
                 } else if (fw.etrack_id != 0X0000) {
-                       lbuf = kasprintf(GFP_KERNEL, "%d.%d, 0x%08x",
-                                        fw.eep_major, fw.eep_minor,
-                                        fw.etrack_id);
+                       snprintf(adapter->fw_version,
+                                sizeof(adapter->fw_version),
+                                "%d.%d, 0x%08x",
+                                fw.eep_major, fw.eep_minor, fw.etrack_id);
                 } else {
-                       lbuf = kasprintf(GFP_KERNEL, "%d.%d.%d", fw.eep_major,
-                                        fw.eep_minor, fw.eep_build);
+                       snprintf(adapter->fw_version,
+                                sizeof(adapter->fw_version),
+                                "%d.%d.%d",
+                                fw.eep_major, fw.eep_minor, fw.eep_build);
                 }
                 break;
         }
-
-       /* the truncate happens here if it doesn't fit */
-       strscpy(adapter->fw_version, lbuf, sizeof(adapter->fw_version));
-       kfree(lbuf);
  }
  
  /**
diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c

index 319c544b9f04ce5e9ef6f09a9fa3e3f641583f47..f9457055612004c10f74379122063e8136fe7d76 100644 (file)
--- a/drivers/net/ethernet/intel/igb/igb_ptp.c
+++ b/drivers/net/ethernet/intel/igb/igb_ptp.c
@@ -957,7 +957,7 @@ static void igb_ptp_tx_hwtstamp(struct igb_adapter *adapter)
  
         igb_ptp_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
         /* adjust timestamp for the TX latency based on link speed */
-       if (adapter->hw.mac.type == e1000_i210) {
+       if (hw->mac.type == e1000_i210 || hw->mac.type == e1000_i211) {
                 switch (adapter->link_speed) {
                 case SPEED_10:
                         adjust = IGB_I210_TX_LATENCY_10;
@@ -1003,6 +1003,7 @@ int igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va,
                         ktime_t *timestamp)
  {
         struct igb_adapter *adapter = q_vector->adapter;
+       struct e1000_hw *hw = &adapter->hw;
         struct skb_shared_hwtstamps ts;
         __le64 *regval = (__le64 *)va;
         int adjust = 0;
@@ -1022,7 +1023,7 @@ int igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va,
         igb_ptp_systim_to_hwtstamp(adapter, &ts, le64_to_cpu(regval[1]));
  
         /* adjust timestamp for the RX latency based on link speed */
-       if (adapter->hw.mac.type == e1000_i210) {
+       if (hw->mac.type == e1000_i210 || hw->mac.type == e1000_i211) {
                 switch (adapter->link_speed) {
                 case SPEED_10:
                         adjust = IGB_I210_RX_LATENCY_10;
diff --git a/drivers/net/ethernet/intel/igc/igc_phy.c b/drivers/net/ethernet/intel/igc/igc_phy.c

index 7cd8716d2ffa3a90b35cea6218922a8cf656b9eb..861f37076861655df235fed77f6e7d0cb4bb3dc4 100644 (file)
--- a/drivers/net/ethernet/intel/igc/igc_phy.c
+++ b/drivers/net/ethernet/intel/igc/igc_phy.c
@@ -130,11 +130,7 @@ void igc_power_down_phy_copper(struct igc_hw *hw)
         /* The PHY will retain its settings across a power down/up cycle */
         hw->phy.ops.read_reg(hw, PHY_CONTROL, &mii_reg);
         mii_reg |= MII_CR_POWER_DOWN;
-
-       /* Temporary workaround - should be removed when PHY will implement
-        * IEEE registers as properly
-        */
-       /* hw->phy.ops.write_reg(hw, PHY_CONTROL, mii_reg);*/
+       hw->phy.ops.write_reg(hw, PHY_CONTROL, mii_reg);
         usleep_range(1000, 2000);
  }
  
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c

index 6208923e29a2b861363317b983b577b383bbeeb1..c1adc94a5a657a6ac432a52016436479020673f3 100644 (file)
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
@@ -716,7 +716,8 @@ static s32 ixgbe_read_iosf_sb_reg_x550(struct ixgbe_hw *hw, u32 reg_addr,
         if ((command & IXGBE_SB_IOSF_CTRL_RESP_STAT_MASK) != 0) {
                 error = FIELD_GET(IXGBE_SB_IOSF_CTRL_CMPL_ERR_MASK, command);
                 hw_dbg(hw, "Failed to read, error %x\n", error);
-               return -EIO;
+               ret = -EIO;
+               goto out;
         }
  
         if (!ret)
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c

index 167145bdcb75d3f852134fcaa44fc2f307c42478..516adb50f9f6b2b8d4c43f12b51d83da30aae904 100644 (file)
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
@@ -61,28 +61,6 @@ int rvu_npc_get_tx_nibble_cfg(struct rvu *rvu, u64 nibble_ena)
         return 0;
  }
  
-static int npc_mcam_verify_pf_func(struct rvu *rvu,
-                                  struct mcam_entry *entry_data, u8 intf,
-                                  u16 pcifunc)
-{
-       u16 pf_func, pf_func_mask;
-
-       if (is_npc_intf_rx(intf))
-               return 0;
-
-       pf_func_mask = (entry_data->kw_mask[0] >> 32) &
-               NPC_KEX_PF_FUNC_MASK;
-       pf_func = (entry_data->kw[0] >> 32) & NPC_KEX_PF_FUNC_MASK;
-
-       pf_func = be16_to_cpu((__force __be16)pf_func);
-       if (pf_func_mask != NPC_KEX_PF_FUNC_MASK ||
-           ((pf_func & ~RVU_PFVF_FUNC_MASK) !=
-            (pcifunc & ~RVU_PFVF_FUNC_MASK)))
-               return -EINVAL;
-
-       return 0;
-}
-
  void rvu_npc_set_pkind(struct rvu *rvu, int pkind, struct rvu_pfvf *pfvf)
  {
         int blkaddr;
@@ -437,6 +415,10 @@ static void npc_fixup_vf_rule(struct rvu *rvu, struct npc_mcam *mcam,
                         return;
         }
  
+       /* AF modifies given action iff PF/VF has requested for it */
+       if ((entry->action & 0xFULL) != NIX_RX_ACTION_DEFAULT)
+               return;
+
         /* copy VF default entry action to the VF mcam entry */
         rx_action = npc_get_default_entry_action(rvu, mcam, blkaddr,
                                                  target_func);
@@ -1850,8 +1832,8 @@ void npc_mcam_rsrcs_deinit(struct rvu *rvu)
  {
         struct npc_mcam *mcam = &rvu->hw->mcam;
  
-       kfree(mcam->bmap);
-       kfree(mcam->bmap_reverse);
+       bitmap_free(mcam->bmap);
+       bitmap_free(mcam->bmap_reverse);
         kfree(mcam->entry2pfvf_map);
         kfree(mcam->cntr2pfvf_map);
         kfree(mcam->entry2cntr_map);
@@ -1904,21 +1886,20 @@ int npc_mcam_rsrcs_init(struct rvu *rvu, int blkaddr)
         mcam->pf_offset = mcam->nixlf_offset + nixlf_count;
  
         /* Allocate bitmaps for managing MCAM entries */
-       mcam->bmap = kmalloc_array(BITS_TO_LONGS(mcam->bmap_entries),
-                                  sizeof(long), GFP_KERNEL);
+       mcam->bmap = bitmap_zalloc(mcam->bmap_entries, GFP_KERNEL);
         if (!mcam->bmap)
                 return -ENOMEM;
  
-       mcam->bmap_reverse = kmalloc_array(BITS_TO_LONGS(mcam->bmap_entries),
-                                          sizeof(long), GFP_KERNEL);
+       mcam->bmap_reverse = bitmap_zalloc(mcam->bmap_entries, GFP_KERNEL);
         if (!mcam->bmap_reverse)
                 goto free_bmap;
  
         mcam->bmap_fcnt = mcam->bmap_entries;
  
         /* Alloc memory for saving entry to RVU PFFUNC allocation mapping */
-       mcam->entry2pfvf_map = kmalloc_array(mcam->bmap_entries,
-                                            sizeof(u16), GFP_KERNEL);
+       mcam->entry2pfvf_map = kcalloc(mcam->bmap_entries, sizeof(u16),
+                                      GFP_KERNEL);
+
         if (!mcam->entry2pfvf_map)
                 goto free_bmap_reverse;
  
@@ -1941,21 +1922,21 @@ int npc_mcam_rsrcs_init(struct rvu *rvu, int blkaddr)
         if (err)
                 goto free_entry_map;
  
-       mcam->cntr2pfvf_map = kmalloc_array(mcam->counters.max,
-                                           sizeof(u16), GFP_KERNEL);
+       mcam->cntr2pfvf_map = kcalloc(mcam->counters.max, sizeof(u16),
+                                     GFP_KERNEL);
         if (!mcam->cntr2pfvf_map)
                 goto free_cntr_bmap;
  
         /* Alloc memory for MCAM entry to counter mapping and for tracking
          * counter's reference count.
          */
-       mcam->entry2cntr_map = kmalloc_array(mcam->bmap_entries,
-                                            sizeof(u16), GFP_KERNEL);
+       mcam->entry2cntr_map = kcalloc(mcam->bmap_entries, sizeof(u16),
+                                      GFP_KERNEL);
         if (!mcam->entry2cntr_map)
                 goto free_cntr_map;
  
-       mcam->cntr_refcnt = kmalloc_array(mcam->counters.max,
-                                         sizeof(u16), GFP_KERNEL);
+       mcam->cntr_refcnt = kcalloc(mcam->counters.max, sizeof(u16),
+                                   GFP_KERNEL);
         if (!mcam->cntr_refcnt)
                 goto free_entry_cntr_map;
  
@@ -1988,9 +1969,9 @@ free_cntr_bmap:
  free_entry_map:
         kfree(mcam->entry2pfvf_map);
  free_bmap_reverse:
-       kfree(mcam->bmap_reverse);
+       bitmap_free(mcam->bmap_reverse);
  free_bmap:
-       kfree(mcam->bmap);
+       bitmap_free(mcam->bmap);
  
         return -ENOMEM;
  }
@@ -2852,12 +2833,6 @@ int rvu_mbox_handler_npc_mcam_write_entry(struct rvu *rvu,
         else
                 nix_intf = pfvf->nix_rx_intf;
  
-       if (!is_pffunc_af(pcifunc) &&
-           npc_mcam_verify_pf_func(rvu, &req->entry_data, req->intf, pcifunc)) {
-               rc = NPC_MCAM_INVALID_REQ;
-               goto exit;
-       }
-
         /* For AF installed rules, the nix_intf should be set to target NIX */
         if (is_pffunc_af(req->hdr.pcifunc))
                 nix_intf = req->intf;
@@ -3209,10 +3184,6 @@ int rvu_mbox_handler_npc_mcam_alloc_and_write_entry(struct rvu *rvu,
         if (!is_npc_interface_valid(rvu, req->intf))
                 return NPC_MCAM_INVALID_REQ;
  
-       if (npc_mcam_verify_pf_func(rvu, &req->entry_data, req->intf,
-                                   req->hdr.pcifunc))
-               return NPC_MCAM_INVALID_REQ;
-
         /* Try to allocate a MCAM entry */
         entry_req.hdr.pcifunc = req->hdr.pcifunc;
         entry_req.contig = true;
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c

index 7ca6941ea0b9b4d684ba45482b88db066a728f98..02d0b707aea5bd6b9dea286180914b5aaba4a51d 100644 (file)
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
@@ -951,8 +951,11 @@ int otx2_sq_init(struct otx2_nic *pfvf, u16 qidx, u16 sqb_aura)
         if (pfvf->ptp && qidx < pfvf->hw.tx_queues) {
                 err = qmem_alloc(pfvf->dev, &sq->timestamps, qset->sqe_cnt,
                                  sizeof(*sq->timestamps));
-               if (err)
+               if (err) {
+                       kfree(sq->sg);
+                       sq->sg = NULL;
                         return err;
+               }
         }
  
         sq->head = 0;
@@ -968,7 +971,14 @@ int otx2_sq_init(struct otx2_nic *pfvf, u16 qidx, u16 sqb_aura)
         sq->stats.bytes = 0;
         sq->stats.pkts = 0;
  
-       return pfvf->hw_ops->sq_aq_init(pfvf, qidx, sqb_aura);
+       err = pfvf->hw_ops->sq_aq_init(pfvf, qidx, sqb_aura);
+       if (err) {
+               kfree(sq->sg);
+               sq->sg = NULL;
+               return err;
+       }
+
+       return 0;
  
  }
  
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c

index 2928898c7f8df89c45092c209f9a3dd25b43ee21..7f786de6101483a775c8aa4f12789631040fdd95 100644 (file)
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
@@ -314,7 +314,6 @@ static int otx2_set_channels(struct net_device *dev,
         pfvf->hw.tx_queues = channel->tx_count;
         if (pfvf->xdp_prog)
                 pfvf->hw.xdp_queues = channel->rx_count;
-       pfvf->hw.non_qos_queues =  pfvf->hw.tx_queues + pfvf->hw.xdp_queues;
  
         if (if_up)
                 err = dev->netdev_ops->ndo_open(dev);
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c

index a57455aebff6fc58e24c4a4da2d60d78e59f439f..e5fe67e7386551e321949dc3b42074067eb4b3a9 100644 (file)
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
@@ -1744,6 +1744,7 @@ int otx2_open(struct net_device *netdev)
         /* RQ and SQs are mapped to different CQs,
          * so find out max CQ IRQs (i.e CINTs) needed.
          */
+       pf->hw.non_qos_queues =  pf->hw.tx_queues + pf->hw.xdp_queues;
         pf->hw.cint_cnt = max3(pf->hw.rx_queues, pf->hw.tx_queues,
                                pf->hw.tc_tx_queues);
  
@@ -2643,8 +2644,6 @@ static int otx2_xdp_setup(struct otx2_nic *pf, struct bpf_prog *prog)
                 xdp_features_clear_redirect_target(dev);
         }
  
-       pf->hw.non_qos_queues += pf->hw.xdp_queues;
-
         if (if_up)
                 otx2_open(pf->netdev);
  
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c

index 4d519ea833b2c7c4fa439ee56fdd07962221030c..f828d32737af02f6a1492e015a1a3d77a732e732 100644 (file)
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
@@ -1403,7 +1403,7 @@ static bool otx2_xdp_rcv_pkt_handler(struct otx2_nic *pfvf,
                                      struct otx2_cq_queue *cq,
                                      bool *need_xdp_flush)
  {
-       unsigned char *hard_start, *data;
+       unsigned char *hard_start;
         int qidx = cq->cq_idx;
         struct xdp_buff xdp;
         struct page *page;
@@ -1417,9 +1417,8 @@ static bool otx2_xdp_rcv_pkt_handler(struct otx2_nic *pfvf,
  
         xdp_init_buff(&xdp, pfvf->rbsize, &cq->xdp_rxq);
  
-       data = (unsigned char *)phys_to_virt(pa);
-       hard_start = page_address(page);
-       xdp_prepare_buff(&xdp, hard_start, data - hard_start,
+       hard_start = (unsigned char *)phys_to_virt(pa);
+       xdp_prepare_buff(&xdp, hard_start, OTX2_HEAD_ROOM,
                          cqe->sg.seg_size, false);
  
         act = bpf_prog_run_xdp(prog, &xdp);
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c

index a6e91573f8dae8368f7667f5f5caa5636d881a60..de123350bd46b6e55ee5ea83737f79a4bceb6867 100644 (file)
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -4761,7 +4761,10 @@ static int mtk_probe(struct platform_device *pdev)
         }
  
         if (MTK_HAS_CAPS(eth->soc->caps, MTK_36BIT_DMA)) {
-               err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(36));
+               err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(36));
+               if (!err)
+                       err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
+
                 if (err) {
                         dev_err(&pdev->dev, "Wrong DMA config\n");
                         return -EINVAL;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dpll.c b/drivers/net/ethernet/mellanox/mlx5/core/dpll.c

index 18fed2b34fb1cad6319f972ca5b6d604701bbce5..928bf24d4b123945afc9df29ea5d758792d269cb 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/dpll.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/dpll.c
@@ -389,7 +389,7 @@ static void mlx5_dpll_remove(struct auxiliary_device *adev)
         struct mlx5_dpll *mdpll = auxiliary_get_drvdata(adev);
         struct mlx5_core_dev *mdev = mdpll->mdev;
  
-       cancel_delayed_work(&mdpll->work);
+       cancel_delayed_work_sync(&mdpll->work);
         mlx5_dpll_mdev_netdev_untrack(mdpll, mdev);
         destroy_workqueue(mdpll->wq);
         dpll_pin_unregister(mdpll->dpll, mdpll->dpll_pin,
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_lag.c b/drivers/net/ethernet/microchip/lan966x/lan966x_lag.c

index 41fa2523d91d3bf57479dd7d66c1903786aa98b2..5f2cd9a8cf8fb39cef3d6be0806f3a6f9b7cdac7 100644 (file)
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_lag.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_lag.c
@@ -37,19 +37,24 @@ static void lan966x_lag_set_aggr_pgids(struct lan966x *lan966x)
  
         /* Now, set PGIDs for each active LAG */
         for (lag = 0; lag < lan966x->num_phys_ports; ++lag) {
-               struct net_device *bond = lan966x->ports[lag]->bond;
+               struct lan966x_port *port = lan966x->ports[lag];
                 int num_active_ports = 0;
+               struct net_device *bond;
                 unsigned long bond_mask;
                 u8 aggr_idx[16];
  
-               if (!bond || (visited & BIT(lag)))
+               if (!port || !port->bond || (visited & BIT(lag)))
                         continue;
  
+               bond = port->bond;
                 bond_mask = lan966x_lag_get_mask(lan966x, bond);
  
                 for_each_set_bit(p, &bond_mask, lan966x->num_phys_ports) {
                         struct lan966x_port *port = lan966x->ports[p];
  
+                       if (!port)
+                               continue;
+
                         lan_wr(ANA_PGID_PGID_SET(bond_mask),
                                lan966x, ANA_PGID(p));
                         if (port->lag_tx_active)
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_port.c b/drivers/net/ethernet/microchip/lan966x/lan966x_port.c

index 92108d354051c31c44c64b207fb11411d0b4295b..2e83bbb9477e0693f236e83be30277d3e92df235 100644 (file)
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_port.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_port.c
@@ -168,9 +168,10 @@ static void lan966x_port_link_up(struct lan966x_port *port)
         lan966x_taprio_speed_set(port, config->speed);
  
         /* Also the GIGA_MODE_ENA(1) needs to be set regardless of the
-        * port speed for QSGMII ports.
+        * port speed for QSGMII or SGMII ports.
          */
-       if (phy_interface_num_ports(config->portmode) == 4)
+       if (phy_interface_num_ports(config->portmode) == 4 ||
+           config->portmode == PHY_INTERFACE_MODE_SGMII)
                 mode = DEV_MAC_MODE_CFG_GIGA_MODE_ENA_SET(1);
  
         lan_wr(config->duplex | mode,
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c

index d1f7fc8b1b71ab68775f40ad592ebc8aa0e57211..3c066b62e68947cf81fc34c1169cc2edd2991d5a 100644 (file)
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
@@ -757,6 +757,7 @@ static int mchp_sparx5_probe(struct platform_device *pdev)
         platform_set_drvdata(pdev, sparx5);
         sparx5->pdev = pdev;
         sparx5->dev = &pdev->dev;
+       spin_lock_init(&sparx5->tx_lock);
  
         /* Do switch core reset if available */
         reset = devm_reset_control_get_optional_shared(&pdev->dev, "switch");
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h

index 6f565c0c0c3dcd3d3889abb1bf8eac72899037fc..316fed5f27355207146875ee80b3636420ca4945 100644 (file)
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h
@@ -280,6 +280,7 @@ struct sparx5 {
         int xtr_irq;
         /* Frame DMA */
         int fdma_irq;
+       spinlock_t tx_lock; /* lock for frame transmission */
         struct sparx5_rx rx;
         struct sparx5_tx tx;
         /* PTP */
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c b/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c

index 6db6ac6a3bbc26db972e2f611ddd7c72fac29c16..ac7e1cffbcecf0ccc4f89e394730d90ec2ada2f8 100644 (file)
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c
@@ -244,10 +244,12 @@ netdev_tx_t sparx5_port_xmit_impl(struct sk_buff *skb, struct net_device *dev)
         }
  
         skb_tx_timestamp(skb);
+       spin_lock(&sparx5->tx_lock);
         if (sparx5->fdma_irq > 0)
                 ret = sparx5_fdma_xmit(sparx5, ifh, skb);
         else
                 ret = sparx5_inject(sparx5, ifh, skb, dev);
+       spin_unlock(&sparx5->tx_lock);
  
         if (ret == -EBUSY)
                 goto busy;
diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c

index 2967bab72505617abcf59f0b16f5a1d5bb9d127c..15180538b80a1535a8646b407bcc1b06b632b43c 100644 (file)
--- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
@@ -1424,10 +1424,30 @@ static void nfp_nft_ct_translate_mangle_action(struct flow_action_entry *mangle_
                 mangle_action->mangle.mask = (__force u32)cpu_to_be32(mangle_action->mangle.mask);
                 return;
  
+       /* Both struct tcphdr and struct udphdr start with
+        *      __be16 source;
+        *      __be16 dest;
+        * so we can use the same code for both.
+        */
         case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
         case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
-               mangle_action->mangle.val = (__force u16)cpu_to_be16(mangle_action->mangle.val);
-               mangle_action->mangle.mask = (__force u16)cpu_to_be16(mangle_action->mangle.mask);
+               if (mangle_action->mangle.offset == offsetof(struct tcphdr, source)) {
+                       mangle_action->mangle.val =
+                               (__force u32)cpu_to_be32(mangle_action->mangle.val << 16);
+                       /* The mask of mangle action is inverse mask,
+                        * so clear the dest tp port with 0xFFFF to
+                        * instead of rotate-left operation.
+                        */
+                       mangle_action->mangle.mask =
+                               (__force u32)cpu_to_be32(mangle_action->mangle.mask << 16 | 0xFFFF);
+               }
+               if (mangle_action->mangle.offset == offsetof(struct tcphdr, dest)) {
+                       mangle_action->mangle.offset = 0;
+                       mangle_action->mangle.val =
+                               (__force u32)cpu_to_be32(mangle_action->mangle.val);
+                       mangle_action->mangle.mask =
+                               (__force u32)cpu_to_be32(mangle_action->mangle.mask);
+               }
                 return;
  
         default:
@@ -1864,10 +1884,30 @@ int nfp_fl_ct_handle_post_ct(struct nfp_flower_priv *priv,
  {
         struct flow_rule *rule = flow_cls_offload_flow_rule(flow);
         struct nfp_fl_ct_flow_entry *ct_entry;
+       struct flow_action_entry *ct_goto;
         struct nfp_fl_ct_zone_entry *zt;
+       struct flow_action_entry *act;
         bool wildcarded = false;
         struct flow_match_ct ct;
-       struct flow_action_entry *ct_goto;
+       int i;
+
+       flow_action_for_each(i, act, &rule->action) {
+               switch (act->id) {
+               case FLOW_ACTION_REDIRECT:
+               case FLOW_ACTION_REDIRECT_INGRESS:
+               case FLOW_ACTION_MIRRED:
+               case FLOW_ACTION_MIRRED_INGRESS:
+                       if (act->dev->rtnl_link_ops &&
+                           !strcmp(act->dev->rtnl_link_ops->kind, "openvswitch")) {
+                               NL_SET_ERR_MSG_MOD(extack,
+                                                  "unsupported offload: out port is openvswitch internal port");
+                               return -EOPNOTSUPP;
+                       }
+                       break;
+               default:
+                       break;
+               }
+       }
  
         flow_rule_match_ct(rule, &ct);
         if (!ct.mask->ct_zone) {
diff --git a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c

index e522845c7c211619a252bb995dec65160d7a1ae5..0d7d138d6e0d7e4f468f66683707cd22d750b64a 100644 (file)
--- a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
@@ -1084,7 +1084,7 @@ nfp_tunnel_add_shared_mac(struct nfp_app *app, struct net_device *netdev,
         u16 nfp_mac_idx = 0;
  
         entry = nfp_tunnel_lookup_offloaded_macs(app, netdev->dev_addr);
-       if (entry && nfp_tunnel_is_mac_idx_global(entry->index)) {
+       if (entry && (nfp_tunnel_is_mac_idx_global(entry->index) || netif_is_lag_port(netdev))) {
                 if (entry->bridge_count ||
                     !nfp_flower_is_supported_bridge(netdev)) {
                         nfp_tunnel_offloaded_macs_inc_ref_and_link(entry,
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c

index 3b3210d823e8038704391e085b9d7951031dd309..f28e769e6fdadab091d447f3de4cb8df1d2b4d3e 100644 (file)
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -2776,6 +2776,7 @@ static void nfp_net_netdev_init(struct nfp_net *nn)
         case NFP_NFD_VER_NFD3:
                 netdev->netdev_ops = &nfp_nfd3_netdev_ops;
                 netdev->xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY;
+               netdev->xdp_features |= NETDEV_XDP_ACT_REDIRECT;
                 break;
         case NFP_NFD_VER_NFDK:
                 netdev->netdev_ops = &nfp_nfdk_netdev_ops;
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c

index 33b4c28563162eeab3938da414b32cfd480c13d7..3f10c5365c80ebb2fe079b779fee644a46ed33da 100644 (file)
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c
@@ -537,11 +537,13 @@ static int enable_bars(struct nfp6000_pcie *nfp, u16 interface)
         const u32 barcfg_msix_general =
                 NFP_PCIE_BAR_PCIE2CPP_MapType(
                         NFP_PCIE_BAR_PCIE2CPP_MapType_GENERAL) |
-               NFP_PCIE_BAR_PCIE2CPP_LengthSelect_32BIT;
+               NFP_PCIE_BAR_PCIE2CPP_LengthSelect(
+                       NFP_PCIE_BAR_PCIE2CPP_LengthSelect_32BIT);
         const u32 barcfg_msix_xpb =
                 NFP_PCIE_BAR_PCIE2CPP_MapType(
                         NFP_PCIE_BAR_PCIE2CPP_MapType_BULK) |
-               NFP_PCIE_BAR_PCIE2CPP_LengthSelect_32BIT |
+               NFP_PCIE_BAR_PCIE2CPP_LengthSelect(
+                       NFP_PCIE_BAR_PCIE2CPP_LengthSelect_32BIT) |
                 NFP_PCIE_BAR_PCIE2CPP_Target_BaseAddress(
                         NFP_CPP_TARGET_ISLAND_XPB);
         const u32 barcfg_explicit[4] = {
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c

index c49aa358e42444de33b3a3dd08832bf8f56af394..6ba8d4aca0a038b88e7f3ae3a8299ea0a55bd7e0 100644 (file)
--- a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c
@@ -93,6 +93,7 @@ static void ionic_unmap_bars(struct ionic *ionic)
                         bars[i].len = 0;
                 }
         }
+       ionic->num_bars = 0;
  }
  
  void __iomem *ionic_bus_map_dbpage(struct ionic *ionic, int page_num)
@@ -215,15 +216,17 @@ out:
  
  static void ionic_clear_pci(struct ionic *ionic)
  {
-       ionic->idev.dev_info_regs = NULL;
-       ionic->idev.dev_cmd_regs = NULL;
-       ionic->idev.intr_status = NULL;
-       ionic->idev.intr_ctrl = NULL;
-
-       ionic_unmap_bars(ionic);
-       pci_release_regions(ionic->pdev);
+       if (ionic->num_bars) {
+               ionic->idev.dev_info_regs = NULL;
+               ionic->idev.dev_cmd_regs = NULL;
+               ionic->idev.intr_status = NULL;
+               ionic->idev.intr_ctrl = NULL;
+
+               ionic_unmap_bars(ionic);
+               pci_release_regions(ionic->pdev);
+       }
  
-       if (atomic_read(&ionic->pdev->enable_cnt) > 0)
+       if (pci_is_enabled(ionic->pdev))
                 pci_disable_device(ionic->pdev);
  }
  
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.c b/drivers/net/ethernet/pensando/ionic/ionic_dev.c

index 1e7c71f7f081b159e83271eeeb47eb35ac401d69..746072b4dbd0e0d37352bc771aa0c7e963eaa26f 100644 (file)
--- a/drivers/net/ethernet/pensando/ionic/ionic_dev.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.c
@@ -319,22 +319,32 @@ do_check_time:
  
  u8 ionic_dev_cmd_status(struct ionic_dev *idev)
  {
+       if (!idev->dev_cmd_regs)
+               return (u8)PCI_ERROR_RESPONSE;
         return ioread8(&idev->dev_cmd_regs->comp.comp.status);
  }
  
  bool ionic_dev_cmd_done(struct ionic_dev *idev)
  {
+       if (!idev->dev_cmd_regs)
+               return false;
         return ioread32(&idev->dev_cmd_regs->done) & IONIC_DEV_CMD_DONE;
  }
  
  void ionic_dev_cmd_comp(struct ionic_dev *idev, union ionic_dev_cmd_comp *comp)
  {
+       if (!idev->dev_cmd_regs)
+               return;
         memcpy_fromio(comp, &idev->dev_cmd_regs->comp, sizeof(*comp));
  }
  
  void ionic_dev_cmd_go(struct ionic_dev *idev, union ionic_dev_cmd *cmd)
  {
         idev->opcode = cmd->cmd.opcode;
+
+       if (!idev->dev_cmd_regs)
+               return;
+
         memcpy_toio(&idev->dev_cmd_regs->cmd, cmd, sizeof(*cmd));
         iowrite32(0, &idev->dev_cmd_regs->done);
         iowrite32(1, &idev->dev_cmd_regs->doorbell);
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c

index cd3c0b01402e64360c9104a069f0a9bd5b23b65f..0ffc9c4904ac80320cc9c26f51ea6e52abf60784 100644 (file)
--- a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c
@@ -90,18 +90,23 @@ static void ionic_get_regs(struct net_device *netdev, struct ethtool_regs *regs,
                            void *p)
  {
         struct ionic_lif *lif = netdev_priv(netdev);
+       struct ionic_dev *idev;
         unsigned int offset;
         unsigned int size;
  
         regs->version = IONIC_DEV_CMD_REG_VERSION;
  
+       idev = &lif->ionic->idev;
+       if (!idev->dev_info_regs)
+               return;
+
         offset = 0;
         size = IONIC_DEV_INFO_REG_COUNT * sizeof(u32);
         memcpy_fromio(p + offset, lif->ionic->idev.dev_info_regs->words, size);
  
         offset += size;
         size = IONIC_DEV_CMD_REG_COUNT * sizeof(u32);
-       memcpy_fromio(p + offset, lif->ionic->idev.dev_cmd_regs->words, size);
+       memcpy_fromio(p + offset, idev->dev_cmd_regs->words, size);
  }
  
  static void ionic_get_link_ext_stats(struct net_device *netdev,
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_fw.c b/drivers/net/ethernet/pensando/ionic/ionic_fw.c

index 5f40324cd243fe2f2f79b924920951304d25df45..3c209c1a23373339b8455387105128f2dd9057be 100644 (file)
--- a/drivers/net/ethernet/pensando/ionic/ionic_fw.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_fw.c
@@ -109,6 +109,11 @@ int ionic_firmware_update(struct ionic_lif *lif, const struct firmware *fw,
         dl = priv_to_devlink(ionic);
         devlink_flash_update_status_notify(dl, "Preparing to flash", NULL, 0, 0);
  
+       if (!idev->dev_cmd_regs) {
+               err = -ENXIO;
+               goto err_out;
+       }
+
         buf_sz = sizeof(idev->dev_cmd_regs->data);
  
         netdev_dbg(netdev,
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c

index cf2d5ad7b68cc85195e516697d82238c6a7f5924..fcb44ceeb6aa51d944a12b411d904f2715a43be7 100644 (file)
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
@@ -3559,7 +3559,10 @@ int ionic_lif_init(struct ionic_lif *lif)
                         goto err_out_notifyq_deinit;
         }
  
-       err = ionic_init_nic_features(lif);
+       if (test_bit(IONIC_LIF_F_FW_RESET, lif->state))
+               err = ionic_set_nic_features(lif, lif->netdev->features);
+       else
+               err = ionic_init_nic_features(lif);
         if (err)
                 goto err_out_notifyq_deinit;
  
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_main.c b/drivers/net/ethernet/pensando/ionic/ionic_main.c

index 165ab08ad2dda8ea15cca7aba88f586b0010c3f2..2f479de329fec5ef039c5e4ebaa3ea79d88a04a5 100644 (file)
--- a/drivers/net/ethernet/pensando/ionic/ionic_main.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_main.c
@@ -416,6 +416,9 @@ static void ionic_dev_cmd_clean(struct ionic *ionic)
  {
         struct ionic_dev *idev = &ionic->idev;
  
+       if (!idev->dev_cmd_regs)
+               return;
+
         iowrite32(0, &idev->dev_cmd_regs->doorbell);
         memset_io(&idev->dev_cmd_regs->cmd, 0, sizeof(idev->dev_cmd_regs->cmd));
  }
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c

index 54cd96b035d680a61297723b46adc16bf10ab3fa..6f47767598637ed3d2a961f6815cc183f066067b 100644 (file)
--- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
@@ -579,6 +579,9 @@ int ionic_tx_napi(struct napi_struct *napi, int budget)
         work_done = ionic_cq_service(cq, budget,
                                      ionic_tx_service, NULL, NULL);
  
+       if (unlikely(!budget))
+               return budget;
+
         if (work_done < budget && napi_complete_done(napi, work_done)) {
                 ionic_dim_update(qcq, IONIC_LIF_F_TX_DIM_INTR);
                 flags |= IONIC_INTR_CRED_UNMASK;
@@ -607,6 +610,9 @@ int ionic_rx_napi(struct napi_struct *napi, int budget)
         u32 work_done = 0;
         u32 flags = 0;
  
+       if (unlikely(!budget))
+               return budget;
+
         lif = cq->bound_q->lif;
         idev = &lif->ionic->idev;
  
@@ -656,6 +662,9 @@ int ionic_txrx_napi(struct napi_struct *napi, int budget)
         tx_work_done = ionic_cq_service(txcq, IONIC_TX_BUDGET_DEFAULT,
                                         ionic_tx_service, NULL, NULL);
  
+       if (unlikely(!budget))
+               return budget;
+
         rx_work_done = ionic_cq_service(rxcq, budget,
                                         ionic_rx_service, NULL, NULL);
  
diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c

index 0e3731f50fc2873dc3c4c06c16ffe1f4a8707e83..f7566cfa45ca37a3cfd02331c24f49bf576393a7 100644 (file)
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -772,29 +772,25 @@ static bool ravb_rx_gbeth(struct net_device *ndev, int *quota, int q)
         struct ravb_rx_desc *desc;
         struct sk_buff *skb;
         dma_addr_t dma_addr;
+       int rx_packets = 0;
         u8  desc_status;
-       int boguscnt;
         u16 pkt_len;
         u8  die_dt;
         int entry;
         int limit;
+       int i;
  
         entry = priv->cur_rx[q] % priv->num_rx_ring[q];
-       boguscnt = priv->dirty_rx[q] + priv->num_rx_ring[q] - priv->cur_rx[q];
+       limit = priv->dirty_rx[q] + priv->num_rx_ring[q] - priv->cur_rx[q];
         stats = &priv->stats[q];
  
-       boguscnt = min(boguscnt, *quota);
-       limit = boguscnt;
         desc = &priv->gbeth_rx_ring[entry];
-       while (desc->die_dt != DT_FEMPTY) {
+       for (i = 0; i < limit && rx_packets < *quota && desc->die_dt != DT_FEMPTY; i++) {
                 /* Descriptor type must be checked before all other reads */
                 dma_rmb();
                 desc_status = desc->msc;
                 pkt_len = le16_to_cpu(desc->ds_cc) & RX_DS;
  
-               if (--boguscnt < 0)
-                       break;
-
                 /* We use 0-byte descriptors to mark the DMA mapping errors */
                 if (!pkt_len)
                         continue;
@@ -820,7 +816,7 @@ static bool ravb_rx_gbeth(struct net_device *ndev, int *quota, int q)
                                 skb_put(skb, pkt_len);
                                 skb->protocol = eth_type_trans(skb, ndev);
                                 napi_gro_receive(&priv->napi[q], skb);
-                               stats->rx_packets++;
+                               rx_packets++;
                                 stats->rx_bytes += pkt_len;
                                 break;
                         case DT_FSTART:
@@ -848,7 +844,7 @@ static bool ravb_rx_gbeth(struct net_device *ndev, int *quota, int q)
                                         eth_type_trans(priv->rx_1st_skb, ndev);
                                 napi_gro_receive(&priv->napi[q],
                                                  priv->rx_1st_skb);
-                               stats->rx_packets++;
+                               rx_packets++;
                                 stats->rx_bytes += pkt_len;
                                 break;
                         }
@@ -887,9 +883,9 @@ static bool ravb_rx_gbeth(struct net_device *ndev, int *quota, int q)
                 desc->die_dt = DT_FEMPTY;
         }
  
-       *quota -= limit - (++boguscnt);
-
-       return boguscnt <= 0;
+       stats->rx_packets += rx_packets;
+       *quota -= rx_packets;
+       return *quota == 0;
  }
  
  /* Packet receive function for Ethernet AVB */
diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h

index 721c1f8e892fc56ed1e9144619aa32ac676226b1..5ba606a596e779bc17081c55e5bf5c52555ada0b 100644 (file)
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -59,28 +59,51 @@
  #undef FRAME_FILTER_DEBUG
  /* #define FRAME_FILTER_DEBUG */
  
+struct stmmac_q_tx_stats {
+       u64_stats_t tx_bytes;
+       u64_stats_t tx_set_ic_bit;
+       u64_stats_t tx_tso_frames;
+       u64_stats_t tx_tso_nfrags;
+};
+
+struct stmmac_napi_tx_stats {
+       u64_stats_t tx_packets;
+       u64_stats_t tx_pkt_n;
+       u64_stats_t poll;
+       u64_stats_t tx_clean;
+       u64_stats_t tx_set_ic_bit;
+};
+
  struct stmmac_txq_stats {
-       u64 tx_bytes;
-       u64 tx_packets;
-       u64 tx_pkt_n;
-       u64 tx_normal_irq_n;
-       u64 napi_poll;
-       u64 tx_clean;
-       u64 tx_set_ic_bit;
-       u64 tx_tso_frames;
-       u64 tx_tso_nfrags;
-       struct u64_stats_sync syncp;
+       /* Updates protected by tx queue lock. */
+       struct u64_stats_sync q_syncp;
+       struct stmmac_q_tx_stats q;
+
+       /* Updates protected by NAPI poll logic. */
+       struct u64_stats_sync napi_syncp;
+       struct stmmac_napi_tx_stats napi;
  } ____cacheline_aligned_in_smp;
  
+struct stmmac_napi_rx_stats {
+       u64_stats_t rx_bytes;
+       u64_stats_t rx_packets;
+       u64_stats_t rx_pkt_n;
+       u64_stats_t poll;
+};
+
  struct stmmac_rxq_stats {
-       u64 rx_bytes;
-       u64 rx_packets;
-       u64 rx_pkt_n;
-       u64 rx_normal_irq_n;
-       u64 napi_poll;
-       struct u64_stats_sync syncp;
+       /* Updates protected by NAPI poll logic. */
+       struct u64_stats_sync napi_syncp;
+       struct stmmac_napi_rx_stats napi;
  } ____cacheline_aligned_in_smp;
  
+/* Updates on each CPU protected by not allowing nested irqs. */
+struct stmmac_pcpu_stats {
+       struct u64_stats_sync syncp;
+       u64_stats_t rx_normal_irq_n[MTL_MAX_TX_QUEUES];
+       u64_stats_t tx_normal_irq_n[MTL_MAX_RX_QUEUES];
+};
+
  /* Extra statistic and debug information exposed by ethtool */
  struct stmmac_extra_stats {
         /* Transmit errors */
@@ -205,6 +228,7 @@ struct stmmac_extra_stats {
         /* per queue statistics */
         struct stmmac_txq_stats txq_stats[MTL_MAX_TX_QUEUES];
         struct stmmac_rxq_stats rxq_stats[MTL_MAX_RX_QUEUES];
+       struct stmmac_pcpu_stats __percpu *pcpu_stats;
         unsigned long rx_dropped;
         unsigned long rx_errors;
         unsigned long tx_dropped;
@@ -216,6 +240,7 @@ struct stmmac_safety_stats {
         unsigned long mac_errors[32];
         unsigned long mtl_errors[32];
         unsigned long dma_errors[32];
+       unsigned long dma_dpp_errors[32];
  };
  
  /* Number of fields in Safety Stats */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c

index 8f730ada71f91d70b5c1d2707601b927f20aeb79..6b65420e11b5c518251565ca94bfb4a849068436 100644 (file)
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c
@@ -353,6 +353,10 @@ static int imx_dwmac_probe(struct platform_device *pdev)
         if (data->flags & STMMAC_FLAG_HWTSTAMP_CORRECT_LATENCY)
                 plat_dat->flags |= STMMAC_FLAG_HWTSTAMP_CORRECT_LATENCY;
  
+       /* Default TX Q0 to use TSO and rest TXQ for TBS */
+       for (int i = 1; i < plat_dat->tx_queues_to_use; i++)
+               plat_dat->tx_queues_cfg[i].tbs_en = 1;
+
         plat_dat->host_dma_width = dwmac->ops->addr_width;
         plat_dat->init = imx_dwmac_init;
         plat_dat->exit = imx_dwmac_exit;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c

index 137741b94122e5e99320eea5cad9909e6394dc7d..b21d99faa2d04c985427af61724dd073e3a2fe79 100644 (file)
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
@@ -441,8 +441,7 @@ static int sun8i_dwmac_dma_interrupt(struct stmmac_priv *priv,
                                      struct stmmac_extra_stats *x, u32 chan,
                                      u32 dir)
  {
-       struct stmmac_rxq_stats *rxq_stats = &priv->xstats.rxq_stats[chan];
-       struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[chan];
+       struct stmmac_pcpu_stats *stats = this_cpu_ptr(priv->xstats.pcpu_stats);
         int ret = 0;
         u32 v;
  
@@ -455,9 +454,9 @@ static int sun8i_dwmac_dma_interrupt(struct stmmac_priv *priv,
  
         if (v & EMAC_TX_INT) {
                 ret |= handle_tx;
-               u64_stats_update_begin(&txq_stats->syncp);
-               txq_stats->tx_normal_irq_n++;
-               u64_stats_update_end(&txq_stats->syncp);
+               u64_stats_update_begin(&stats->syncp);
+               u64_stats_inc(&stats->tx_normal_irq_n[chan]);
+               u64_stats_update_end(&stats->syncp);
         }
  
         if (v & EMAC_TX_DMA_STOP_INT)
@@ -479,9 +478,9 @@ static int sun8i_dwmac_dma_interrupt(struct stmmac_priv *priv,
  
         if (v & EMAC_RX_INT) {
                 ret |= handle_rx;
-               u64_stats_update_begin(&rxq_stats->syncp);
-               rxq_stats->rx_normal_irq_n++;
-               u64_stats_update_end(&rxq_stats->syncp);
+               u64_stats_update_begin(&stats->syncp);
+               u64_stats_inc(&stats->rx_normal_irq_n[chan]);
+               u64_stats_update_end(&stats->syncp);
         }
  
         if (v & EMAC_RX_BUF_UA_INT)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c

index 9470d3fd2dede2bb436c05f6a92d87824c2db733..0d185e54eb7e24cfd4ef8de38e976aabd3ee9084 100644 (file)
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c
@@ -171,8 +171,7 @@ int dwmac4_dma_interrupt(struct stmmac_priv *priv, void __iomem *ioaddr,
         const struct dwmac4_addrs *dwmac4_addrs = priv->plat->dwmac4_addrs;
         u32 intr_status = readl(ioaddr + DMA_CHAN_STATUS(dwmac4_addrs, chan));
         u32 intr_en = readl(ioaddr + DMA_CHAN_INTR_ENA(dwmac4_addrs, chan));
-       struct stmmac_rxq_stats *rxq_stats = &priv->xstats.rxq_stats[chan];
-       struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[chan];
+       struct stmmac_pcpu_stats *stats = this_cpu_ptr(priv->xstats.pcpu_stats);
         int ret = 0;
  
         if (dir == DMA_DIR_RX)
@@ -201,15 +200,15 @@ int dwmac4_dma_interrupt(struct stmmac_priv *priv, void __iomem *ioaddr,
         }
         /* TX/RX NORMAL interrupts */
         if (likely(intr_status & DMA_CHAN_STATUS_RI)) {
-               u64_stats_update_begin(&rxq_stats->syncp);
-               rxq_stats->rx_normal_irq_n++;
-               u64_stats_update_end(&rxq_stats->syncp);
+               u64_stats_update_begin(&stats->syncp);
+               u64_stats_inc(&stats->rx_normal_irq_n[chan]);
+               u64_stats_update_end(&stats->syncp);
                 ret |= handle_rx;
         }
         if (likely(intr_status & DMA_CHAN_STATUS_TI)) {
-               u64_stats_update_begin(&txq_stats->syncp);
-               txq_stats->tx_normal_irq_n++;
-               u64_stats_update_end(&txq_stats->syncp);
+               u64_stats_update_begin(&stats->syncp);
+               u64_stats_inc(&stats->tx_normal_irq_n[chan]);
+               u64_stats_update_end(&stats->syncp);
                 ret |= handle_tx;
         }
  
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c

index 7907d62d343759d661e00452198ef8e6cfef3601..85e18f9a22f92091bb98f1892d7bb1f5f08bcf2a 100644 (file)
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c
@@ -162,8 +162,7 @@ static void show_rx_process_state(unsigned int status)
  int dwmac_dma_interrupt(struct stmmac_priv *priv, void __iomem *ioaddr,
                         struct stmmac_extra_stats *x, u32 chan, u32 dir)
  {
-       struct stmmac_rxq_stats *rxq_stats = &priv->xstats.rxq_stats[chan];
-       struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[chan];
+       struct stmmac_pcpu_stats *stats = this_cpu_ptr(priv->xstats.pcpu_stats);
         int ret = 0;
         /* read the status register (CSR5) */
         u32 intr_status = readl(ioaddr + DMA_STATUS);
@@ -215,16 +214,16 @@ int dwmac_dma_interrupt(struct stmmac_priv *priv, void __iomem *ioaddr,
                         u32 value = readl(ioaddr + DMA_INTR_ENA);
                         /* to schedule NAPI on real RIE event. */
                         if (likely(value & DMA_INTR_ENA_RIE)) {
-                               u64_stats_update_begin(&rxq_stats->syncp);
-                               rxq_stats->rx_normal_irq_n++;
-                               u64_stats_update_end(&rxq_stats->syncp);
+                               u64_stats_update_begin(&stats->syncp);
+                               u64_stats_inc(&stats->rx_normal_irq_n[chan]);
+                               u64_stats_update_end(&stats->syncp);
                                 ret |= handle_rx;
                         }
                 }
                 if (likely(intr_status & DMA_STATUS_TI)) {
-                       u64_stats_update_begin(&txq_stats->syncp);
-                       txq_stats->tx_normal_irq_n++;
-                       u64_stats_update_end(&txq_stats->syncp);
+                       u64_stats_update_begin(&stats->syncp);
+                       u64_stats_inc(&stats->tx_normal_irq_n[chan]);
+                       u64_stats_update_end(&stats->syncp);
                         ret |= handle_tx;
                 }
                 if (unlikely(intr_status & DMA_STATUS_ERI))
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h

index 207ff1799f2c712fe1e10033fa4d2b32dbd197c6..6a2c7d22df1eb81dd216e00e7525bc8a9092c048 100644 (file)
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
@@ -303,6 +303,8 @@
  #define XGMAC_RXCEIE                   BIT(4)
  #define XGMAC_TXCEIE                   BIT(0)
  #define XGMAC_MTL_ECC_INT_STATUS       0x000010cc
+#define XGMAC_MTL_DPP_CONTROL          0x000010e0
+#define XGMAC_DPP_DISABLE              BIT(0)
  #define XGMAC_MTL_TXQ_OPMODE(x)                (0x00001100 + (0x80 * (x)))
  #define XGMAC_TQS                      GENMASK(25, 16)
  #define XGMAC_TQS_SHIFT                        16
@@ -385,6 +387,7 @@
  #define XGMAC_DCEIE                    BIT(1)
  #define XGMAC_TCEIE                    BIT(0)
  #define XGMAC_DMA_ECC_INT_STATUS       0x0000306c
+#define XGMAC_DMA_DPP_INT_STATUS       0x00003074
  #define XGMAC_DMA_CH_CONTROL(x)                (0x00003100 + (0x80 * (x)))
  #define XGMAC_SPH                      BIT(24)
  #define XGMAC_PBLx8                    BIT(16)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c

index eb48211d9b0eb7013b436b0336c75f512fbf638a..1af2f89a0504ab4c7ad6042e52f5898ba064df6c 100644 (file)
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
@@ -830,6 +830,44 @@ static const struct dwxgmac3_error_desc dwxgmac3_dma_errors[32]= {
         { false, "UNKNOWN", "Unknown Error" }, /* 31 */
  };
  
+#define DPP_RX_ERR "Read Rx Descriptor Parity checker Error"
+#define DPP_TX_ERR "Read Tx Descriptor Parity checker Error"
+
+static const struct dwxgmac3_error_desc dwxgmac3_dma_dpp_errors[32] = {
+       { true, "TDPES0", DPP_TX_ERR },
+       { true, "TDPES1", DPP_TX_ERR },
+       { true, "TDPES2", DPP_TX_ERR },
+       { true, "TDPES3", DPP_TX_ERR },
+       { true, "TDPES4", DPP_TX_ERR },
+       { true, "TDPES5", DPP_TX_ERR },
+       { true, "TDPES6", DPP_TX_ERR },
+       { true, "TDPES7", DPP_TX_ERR },
+       { true, "TDPES8", DPP_TX_ERR },
+       { true, "TDPES9", DPP_TX_ERR },
+       { true, "TDPES10", DPP_TX_ERR },
+       { true, "TDPES11", DPP_TX_ERR },
+       { true, "TDPES12", DPP_TX_ERR },
+       { true, "TDPES13", DPP_TX_ERR },
+       { true, "TDPES14", DPP_TX_ERR },
+       { true, "TDPES15", DPP_TX_ERR },
+       { true, "RDPES0", DPP_RX_ERR },
+       { true, "RDPES1", DPP_RX_ERR },
+       { true, "RDPES2", DPP_RX_ERR },
+       { true, "RDPES3", DPP_RX_ERR },
+       { true, "RDPES4", DPP_RX_ERR },
+       { true, "RDPES5", DPP_RX_ERR },
+       { true, "RDPES6", DPP_RX_ERR },
+       { true, "RDPES7", DPP_RX_ERR },
+       { true, "RDPES8", DPP_RX_ERR },
+       { true, "RDPES9", DPP_RX_ERR },
+       { true, "RDPES10", DPP_RX_ERR },
+       { true, "RDPES11", DPP_RX_ERR },
+       { true, "RDPES12", DPP_RX_ERR },
+       { true, "RDPES13", DPP_RX_ERR },
+       { true, "RDPES14", DPP_RX_ERR },
+       { true, "RDPES15", DPP_RX_ERR },
+};
+
  static void dwxgmac3_handle_dma_err(struct net_device *ndev,
                                     void __iomem *ioaddr, bool correctable,
                                     struct stmmac_safety_stats *stats)
@@ -841,6 +879,13 @@ static void dwxgmac3_handle_dma_err(struct net_device *ndev,
  
         dwxgmac3_log_error(ndev, value, correctable, "DMA",
                            dwxgmac3_dma_errors, STAT_OFF(dma_errors), stats);
+
+       value = readl(ioaddr + XGMAC_DMA_DPP_INT_STATUS);
+       writel(value, ioaddr + XGMAC_DMA_DPP_INT_STATUS);
+
+       dwxgmac3_log_error(ndev, value, false, "DMA_DPP",
+                          dwxgmac3_dma_dpp_errors,
+                          STAT_OFF(dma_dpp_errors), stats);
  }
  
  static int
@@ -881,6 +926,12 @@ dwxgmac3_safety_feat_config(void __iomem *ioaddr, unsigned int asp,
         value |= XGMAC_TMOUTEN; /* FSM Timeout Feature */
         writel(value, ioaddr + XGMAC_MAC_FSM_CONTROL);
  
+       /* 5. Enable Data Path Parity Protection */
+       value = readl(ioaddr + XGMAC_MTL_DPP_CONTROL);
+       /* already enabled by default, explicit enable it again */
+       value &= ~XGMAC_DPP_DISABLE;
+       writel(value, ioaddr + XGMAC_MTL_DPP_CONTROL);
+
         return 0;
  }
  
@@ -914,7 +965,11 @@ static int dwxgmac3_safety_feat_irq_status(struct net_device *ndev,
                 ret |= !corr;
         }
  
-       err = dma & (XGMAC_DEUIS | XGMAC_DECIS);
+       /* DMA_DPP_Interrupt_Status is indicated by MCSIS bit in
+        * DMA_Safety_Interrupt_Status, so we handle DMA Data Path
+        * Parity Errors here
+        */
+       err = dma & (XGMAC_DEUIS | XGMAC_DECIS | XGMAC_MCSIS);
         corr = dma & XGMAC_DECIS;
         if (err) {
                 dwxgmac3_handle_dma_err(ndev, ioaddr, corr, stats);
@@ -930,6 +985,7 @@ static const struct dwxgmac3_error {
         { dwxgmac3_mac_errors },
         { dwxgmac3_mtl_errors },
         { dwxgmac3_dma_errors },
+       { dwxgmac3_dma_dpp_errors },
  };
  
  static int dwxgmac3_safety_feat_dump(struct stmmac_safety_stats *stats,
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c

index 3cde695fec91bd7592e23e725517f0cccee08a42..dd2ab6185c40e813ee4401857875d3e8478303e7 100644 (file)
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
@@ -337,8 +337,7 @@ static int dwxgmac2_dma_interrupt(struct stmmac_priv *priv,
                                   struct stmmac_extra_stats *x, u32 chan,
                                   u32 dir)
  {
-       struct stmmac_rxq_stats *rxq_stats = &priv->xstats.rxq_stats[chan];
-       struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[chan];
+       struct stmmac_pcpu_stats *stats = this_cpu_ptr(priv->xstats.pcpu_stats);
         u32 intr_status = readl(ioaddr + XGMAC_DMA_CH_STATUS(chan));
         u32 intr_en = readl(ioaddr + XGMAC_DMA_CH_INT_EN(chan));
         int ret = 0;
@@ -367,15 +366,15 @@ static int dwxgmac2_dma_interrupt(struct stmmac_priv *priv,
         /* TX/RX NORMAL interrupts */
         if (likely(intr_status & XGMAC_NIS)) {
                 if (likely(intr_status & XGMAC_RI)) {
-                       u64_stats_update_begin(&rxq_stats->syncp);
-                       rxq_stats->rx_normal_irq_n++;
-                       u64_stats_update_end(&rxq_stats->syncp);
+                       u64_stats_update_begin(&stats->syncp);
+                       u64_stats_inc(&stats->rx_normal_irq_n[chan]);
+                       u64_stats_update_end(&stats->syncp);
                         ret |= handle_rx;
                 }
                 if (likely(intr_status & (XGMAC_TI | XGMAC_TBU))) {
-                       u64_stats_update_begin(&txq_stats->syncp);
-                       txq_stats->tx_normal_irq_n++;
-                       u64_stats_update_end(&txq_stats->syncp);
+                       u64_stats_update_begin(&stats->syncp);
+                       u64_stats_inc(&stats->tx_normal_irq_n[chan]);
+                       u64_stats_update_end(&stats->syncp);
                         ret |= handle_tx;
                 }
         }
diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.c b/drivers/net/ethernet/stmicro/stmmac/hwif.c

index 1bd34b2a47e81494eeddf72814b585e47d0b8c60..29367105df548271d3aa22cfad80a40dece256c1 100644 (file)
--- a/drivers/net/ethernet/stmicro/stmmac/hwif.c
+++ b/drivers/net/ethernet/stmicro/stmmac/hwif.c
@@ -224,7 +224,7 @@ static const struct stmmac_hwif_entry {
                 .regs = {
                         .ptp_off = PTP_GMAC4_OFFSET,
                         .mmc_off = MMC_GMAC4_OFFSET,
-                       .est_off = EST_XGMAC_OFFSET,
+                       .est_off = EST_GMAC4_OFFSET,
                 },
                 .desc = &dwmac4_desc_ops,
                 .dma = &dwmac410_dma_ops,
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c

index 42d27b97dd1d036e1410131060b65220b0ab2180..ec44becf0e2d289c4f6aeab983c54e93d70faf75 100644 (file)
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
@@ -549,44 +549,79 @@ stmmac_set_pauseparam(struct net_device *netdev,
         }
  }
  
+static u64 stmmac_get_rx_normal_irq_n(struct stmmac_priv *priv, int q)
+{
+       u64 total;
+       int cpu;
+
+       total = 0;
+       for_each_possible_cpu(cpu) {
+               struct stmmac_pcpu_stats *pcpu;
+               unsigned int start;
+               u64 irq_n;
+
+               pcpu = per_cpu_ptr(priv->xstats.pcpu_stats, cpu);
+               do {
+                       start = u64_stats_fetch_begin(&pcpu->syncp);
+                       irq_n = u64_stats_read(&pcpu->rx_normal_irq_n[q]);
+               } while (u64_stats_fetch_retry(&pcpu->syncp, start));
+               total += irq_n;
+       }
+       return total;
+}
+
+static u64 stmmac_get_tx_normal_irq_n(struct stmmac_priv *priv, int q)
+{
+       u64 total;
+       int cpu;
+
+       total = 0;
+       for_each_possible_cpu(cpu) {
+               struct stmmac_pcpu_stats *pcpu;
+               unsigned int start;
+               u64 irq_n;
+
+               pcpu = per_cpu_ptr(priv->xstats.pcpu_stats, cpu);
+               do {
+                       start = u64_stats_fetch_begin(&pcpu->syncp);
+                       irq_n = u64_stats_read(&pcpu->tx_normal_irq_n[q]);
+               } while (u64_stats_fetch_retry(&pcpu->syncp, start));
+               total += irq_n;
+       }
+       return total;
+}
+
  static void stmmac_get_per_qstats(struct stmmac_priv *priv, u64 *data)
  {
         u32 tx_cnt = priv->plat->tx_queues_to_use;
         u32 rx_cnt = priv->plat->rx_queues_to_use;
         unsigned int start;
-       int q, stat;
-       char *p;
+       int q;
  
         for (q = 0; q < tx_cnt; q++) {
                 struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[q];
-               struct stmmac_txq_stats snapshot;
+               u64 pkt_n;
  
                 do {
-                       start = u64_stats_fetch_begin(&txq_stats->syncp);
-                       snapshot = *txq_stats;
-               } while (u64_stats_fetch_retry(&txq_stats->syncp, start));
+                       start = u64_stats_fetch_begin(&txq_stats->napi_syncp);
+                       pkt_n = u64_stats_read(&txq_stats->napi.tx_pkt_n);
+               } while (u64_stats_fetch_retry(&txq_stats->napi_syncp, start));
  
-               p = (char *)&snapshot + offsetof(struct stmmac_txq_stats, tx_pkt_n);
-               for (stat = 0; stat < STMMAC_TXQ_STATS; stat++) {
-                       *data++ = (*(u64 *)p);
-                       p += sizeof(u64);
-               }
+               *data++ = pkt_n;
+               *data++ = stmmac_get_tx_normal_irq_n(priv, q);
         }
  
         for (q = 0; q < rx_cnt; q++) {
                 struct stmmac_rxq_stats *rxq_stats = &priv->xstats.rxq_stats[q];
-               struct stmmac_rxq_stats snapshot;
+               u64 pkt_n;
  
                 do {
-                       start = u64_stats_fetch_begin(&rxq_stats->syncp);
-                       snapshot = *rxq_stats;
-               } while (u64_stats_fetch_retry(&rxq_stats->syncp, start));
+                       start = u64_stats_fetch_begin(&rxq_stats->napi_syncp);
+                       pkt_n = u64_stats_read(&rxq_stats->napi.rx_pkt_n);
+               } while (u64_stats_fetch_retry(&rxq_stats->napi_syncp, start));
  
-               p = (char *)&snapshot + offsetof(struct stmmac_rxq_stats, rx_pkt_n);
-               for (stat = 0; stat < STMMAC_RXQ_STATS; stat++) {
-                       *data++ = (*(u64 *)p);
-                       p += sizeof(u64);
-               }
+               *data++ = pkt_n;
+               *data++ = stmmac_get_rx_normal_irq_n(priv, q);
         }
  }
  
@@ -645,39 +680,49 @@ static void stmmac_get_ethtool_stats(struct net_device *dev,
         pos = j;
         for (i = 0; i < rx_queues_count; i++) {
                 struct stmmac_rxq_stats *rxq_stats = &priv->xstats.rxq_stats[i];
-               struct stmmac_rxq_stats snapshot;
+               struct stmmac_napi_rx_stats snapshot;
+               u64 n_irq;
  
                 j = pos;
                 do {
-                       start = u64_stats_fetch_begin(&rxq_stats->syncp);
-                       snapshot = *rxq_stats;
-               } while (u64_stats_fetch_retry(&rxq_stats->syncp, start));
-
-               data[j++] += snapshot.rx_pkt_n;
-               data[j++] += snapshot.rx_normal_irq_n;
-               normal_irq_n += snapshot.rx_normal_irq_n;
-               napi_poll += snapshot.napi_poll;
+                       start = u64_stats_fetch_begin(&rxq_stats->napi_syncp);
+                       snapshot = rxq_stats->napi;
+               } while (u64_stats_fetch_retry(&rxq_stats->napi_syncp, start));
+
+               data[j++] += u64_stats_read(&snapshot.rx_pkt_n);
+               n_irq = stmmac_get_rx_normal_irq_n(priv, i);
+               data[j++] += n_irq;
+               normal_irq_n += n_irq;
+               napi_poll += u64_stats_read(&snapshot.poll);
         }
  
         pos = j;
         for (i = 0; i < tx_queues_count; i++) {
                 struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[i];
-               struct stmmac_txq_stats snapshot;
+               struct stmmac_napi_tx_stats napi_snapshot;
+               struct stmmac_q_tx_stats q_snapshot;
+               u64 n_irq;
  
                 j = pos;
                 do {
-                       start = u64_stats_fetch_begin(&txq_stats->syncp);
-                       snapshot = *txq_stats;
-               } while (u64_stats_fetch_retry(&txq_stats->syncp, start));
-
-               data[j++] += snapshot.tx_pkt_n;
-               data[j++] += snapshot.tx_normal_irq_n;
-               normal_irq_n += snapshot.tx_normal_irq_n;
-               data[j++] += snapshot.tx_clean;
-               data[j++] += snapshot.tx_set_ic_bit;
-               data[j++] += snapshot.tx_tso_frames;
-               data[j++] += snapshot.tx_tso_nfrags;
-               napi_poll += snapshot.napi_poll;
+                       start = u64_stats_fetch_begin(&txq_stats->q_syncp);
+                       q_snapshot = txq_stats->q;
+               } while (u64_stats_fetch_retry(&txq_stats->q_syncp, start));
+               do {
+                       start = u64_stats_fetch_begin(&txq_stats->napi_syncp);
+                       napi_snapshot = txq_stats->napi;
+               } while (u64_stats_fetch_retry(&txq_stats->napi_syncp, start));
+
+               data[j++] += u64_stats_read(&napi_snapshot.tx_pkt_n);
+               n_irq = stmmac_get_tx_normal_irq_n(priv, i);
+               data[j++] += n_irq;
+               normal_irq_n += n_irq;
+               data[j++] += u64_stats_read(&napi_snapshot.tx_clean);
+               data[j++] += u64_stats_read(&q_snapshot.tx_set_ic_bit) +
+                       u64_stats_read(&napi_snapshot.tx_set_ic_bit);
+               data[j++] += u64_stats_read(&q_snapshot.tx_tso_frames);
+               data[j++] += u64_stats_read(&q_snapshot.tx_tso_nfrags);
+               napi_poll += u64_stats_read(&napi_snapshot.poll);
         }
         normal_irq_n += priv->xstats.rx_early_irq;
         data[j++] = normal_irq_n;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c

index b334eb16da23aa49af2a0849dc86127a0a69494a..7c6aef033a456455e4334466bf276755f33dbd47 100644 (file)
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -2482,7 +2482,6 @@ static bool stmmac_xdp_xmit_zc(struct stmmac_priv *priv, u32 queue, u32 budget)
         struct xdp_desc xdp_desc;
         bool work_done = true;
         u32 tx_set_ic_bit = 0;
-       unsigned long flags;
  
         /* Avoids TX time-out as we are sharing with slow path */
         txq_trans_cond_update(nq);
@@ -2566,9 +2565,9 @@ static bool stmmac_xdp_xmit_zc(struct stmmac_priv *priv, u32 queue, u32 budget)
                 tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx, priv->dma_conf.dma_tx_size);
                 entry = tx_q->cur_tx;
         }
-       flags = u64_stats_update_begin_irqsave(&txq_stats->syncp);
-       txq_stats->tx_set_ic_bit += tx_set_ic_bit;
-       u64_stats_update_end_irqrestore(&txq_stats->syncp, flags);
+       u64_stats_update_begin(&txq_stats->napi_syncp);
+       u64_stats_add(&txq_stats->napi.tx_set_ic_bit, tx_set_ic_bit);
+       u64_stats_update_end(&txq_stats->napi_syncp);
  
         if (tx_desc) {
                 stmmac_flush_tx_descriptors(priv, queue);
@@ -2616,7 +2615,6 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue,
         unsigned int bytes_compl = 0, pkts_compl = 0;
         unsigned int entry, xmits = 0, count = 0;
         u32 tx_packets = 0, tx_errors = 0;
-       unsigned long flags;
  
         __netif_tx_lock_bh(netdev_get_tx_queue(priv->dev, queue));
  
@@ -2674,7 +2672,8 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue,
                         }
                         if (skb) {
                                 stmmac_get_tx_hwtstamp(priv, p, skb);
-                       } else {
+                       } else if (tx_q->xsk_pool &&
+                                  xp_tx_metadata_enabled(tx_q->xsk_pool)) {
                                 struct stmmac_xsk_tx_complete tx_compl = {
                                         .priv = priv,
                                         .desc = p,
@@ -2782,11 +2781,11 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue,
         if (tx_q->dirty_tx != tx_q->cur_tx)
                 *pending_packets = true;
  
-       flags = u64_stats_update_begin_irqsave(&txq_stats->syncp);
-       txq_stats->tx_packets += tx_packets;
-       txq_stats->tx_pkt_n += tx_packets;
-       txq_stats->tx_clean++;
-       u64_stats_update_end_irqrestore(&txq_stats->syncp, flags);
+       u64_stats_update_begin(&txq_stats->napi_syncp);
+       u64_stats_add(&txq_stats->napi.tx_packets, tx_packets);
+       u64_stats_add(&txq_stats->napi.tx_pkt_n, tx_packets);
+       u64_stats_inc(&txq_stats->napi.tx_clean);
+       u64_stats_update_end(&txq_stats->napi_syncp);
  
         priv->xstats.tx_errors += tx_errors;
  
@@ -3932,6 +3931,9 @@ static int __stmmac_open(struct net_device *dev,
         priv->rx_copybreak = STMMAC_RX_COPYBREAK;
  
         buf_sz = dma_conf->dma_buf_sz;
+       for (int i = 0; i < MTL_MAX_TX_QUEUES; i++)
+               if (priv->dma_conf.tx_queue[i].tbs & STMMAC_TBS_EN)
+                       dma_conf->tx_queue[i].tbs = priv->dma_conf.tx_queue[i].tbs;
         memcpy(&priv->dma_conf, dma_conf, sizeof(*dma_conf));
  
         stmmac_reset_queues_param(priv);
@@ -4004,8 +4006,10 @@ static void stmmac_fpe_stop_wq(struct stmmac_priv *priv)
  {
         set_bit(__FPE_REMOVING, &priv->fpe_task_state);
  
-       if (priv->fpe_wq)
+       if (priv->fpe_wq) {
                 destroy_workqueue(priv->fpe_wq);
+               priv->fpe_wq = NULL;
+       }
  
         netdev_info(priv->dev, "FPE workqueue stop");
  }
@@ -4210,7 +4214,6 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
         struct stmmac_tx_queue *tx_q;
         bool has_vlan, set_ic;
         u8 proto_hdr_len, hdr;
-       unsigned long flags;
         u32 pay_len, mss;
         dma_addr_t des;
         int i;
@@ -4375,13 +4378,13 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
                 netif_tx_stop_queue(netdev_get_tx_queue(priv->dev, queue));
         }
  
-       flags = u64_stats_update_begin_irqsave(&txq_stats->syncp);
-       txq_stats->tx_bytes += skb->len;
-       txq_stats->tx_tso_frames++;
-       txq_stats->tx_tso_nfrags += nfrags;
+       u64_stats_update_begin(&txq_stats->q_syncp);
+       u64_stats_add(&txq_stats->q.tx_bytes, skb->len);
+       u64_stats_inc(&txq_stats->q.tx_tso_frames);
+       u64_stats_add(&txq_stats->q.tx_tso_nfrags, nfrags);
         if (set_ic)
-               txq_stats->tx_set_ic_bit++;
-       u64_stats_update_end_irqrestore(&txq_stats->syncp, flags);
+               u64_stats_inc(&txq_stats->q.tx_set_ic_bit);
+       u64_stats_update_end(&txq_stats->q_syncp);
  
         if (priv->sarc_type)
                 stmmac_set_desc_sarc(priv, first, priv->sarc_type);
@@ -4480,7 +4483,6 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
         struct stmmac_tx_queue *tx_q;
         bool has_vlan, set_ic;
         int entry, first_tx;
-       unsigned long flags;
         dma_addr_t des;
  
         tx_q = &priv->dma_conf.tx_queue[queue];
@@ -4650,11 +4652,11 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
                 netif_tx_stop_queue(netdev_get_tx_queue(priv->dev, queue));
         }
  
-       flags = u64_stats_update_begin_irqsave(&txq_stats->syncp);
-       txq_stats->tx_bytes += skb->len;
+       u64_stats_update_begin(&txq_stats->q_syncp);
+       u64_stats_add(&txq_stats->q.tx_bytes, skb->len);
         if (set_ic)
-               txq_stats->tx_set_ic_bit++;
-       u64_stats_update_end_irqrestore(&txq_stats->syncp, flags);
+               u64_stats_inc(&txq_stats->q.tx_set_ic_bit);
+       u64_stats_update_end(&txq_stats->q_syncp);
  
         if (priv->sarc_type)
                 stmmac_set_desc_sarc(priv, first, priv->sarc_type);
@@ -4918,12 +4920,11 @@ static int stmmac_xdp_xmit_xdpf(struct stmmac_priv *priv, int queue,
                 set_ic = false;
  
         if (set_ic) {
-               unsigned long flags;
                 tx_q->tx_count_frames = 0;
                 stmmac_set_tx_ic(priv, tx_desc);
-               flags = u64_stats_update_begin_irqsave(&txq_stats->syncp);
-               txq_stats->tx_set_ic_bit++;
-               u64_stats_update_end_irqrestore(&txq_stats->syncp, flags);
+               u64_stats_update_begin(&txq_stats->q_syncp);
+               u64_stats_inc(&txq_stats->q.tx_set_ic_bit);
+               u64_stats_update_end(&txq_stats->q_syncp);
         }
  
         stmmac_enable_dma_transmission(priv, priv->ioaddr);
@@ -5073,7 +5074,6 @@ static void stmmac_dispatch_skb_zc(struct stmmac_priv *priv, u32 queue,
         unsigned int len = xdp->data_end - xdp->data;
         enum pkt_hash_types hash_type;
         int coe = priv->hw->rx_csum;
-       unsigned long flags;
         struct sk_buff *skb;
         u32 hash;
  
@@ -5103,10 +5103,10 @@ static void stmmac_dispatch_skb_zc(struct stmmac_priv *priv, u32 queue,
         skb_record_rx_queue(skb, queue);
         napi_gro_receive(&ch->rxtx_napi, skb);
  
-       flags = u64_stats_update_begin_irqsave(&rxq_stats->syncp);
-       rxq_stats->rx_pkt_n++;
-       rxq_stats->rx_bytes += len;
-       u64_stats_update_end_irqrestore(&rxq_stats->syncp, flags);
+       u64_stats_update_begin(&rxq_stats->napi_syncp);
+       u64_stats_inc(&rxq_stats->napi.rx_pkt_n);
+       u64_stats_add(&rxq_stats->napi.rx_bytes, len);
+       u64_stats_update_end(&rxq_stats->napi_syncp);
  }
  
  static bool stmmac_rx_refill_zc(struct stmmac_priv *priv, u32 queue, u32 budget)
@@ -5188,7 +5188,6 @@ static int stmmac_rx_zc(struct stmmac_priv *priv, int limit, u32 queue)
         unsigned int desc_size;
         struct bpf_prog *prog;
         bool failure = false;
-       unsigned long flags;
         int xdp_status = 0;
         int status = 0;
  
@@ -5343,9 +5342,9 @@ read_again:
  
         stmmac_finalize_xdp_rx(priv, xdp_status);
  
-       flags = u64_stats_update_begin_irqsave(&rxq_stats->syncp);
-       rxq_stats->rx_pkt_n += count;
-       u64_stats_update_end_irqrestore(&rxq_stats->syncp, flags);
+       u64_stats_update_begin(&rxq_stats->napi_syncp);
+       u64_stats_add(&rxq_stats->napi.rx_pkt_n, count);
+       u64_stats_update_end(&rxq_stats->napi_syncp);
  
         priv->xstats.rx_dropped += rx_dropped;
         priv->xstats.rx_errors += rx_errors;
@@ -5383,7 +5382,6 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
         unsigned int desc_size;
         struct sk_buff *skb = NULL;
         struct stmmac_xdp_buff ctx;
-       unsigned long flags;
         int xdp_status = 0;
         int buf_sz;
  
@@ -5643,11 +5641,11 @@ drain_data:
  
         stmmac_rx_refill(priv, queue);
  
-       flags = u64_stats_update_begin_irqsave(&rxq_stats->syncp);
-       rxq_stats->rx_packets += rx_packets;
-       rxq_stats->rx_bytes += rx_bytes;
-       rxq_stats->rx_pkt_n += count;
-       u64_stats_update_end_irqrestore(&rxq_stats->syncp, flags);
+       u64_stats_update_begin(&rxq_stats->napi_syncp);
+       u64_stats_add(&rxq_stats->napi.rx_packets, rx_packets);
+       u64_stats_add(&rxq_stats->napi.rx_bytes, rx_bytes);
+       u64_stats_add(&rxq_stats->napi.rx_pkt_n, count);
+       u64_stats_update_end(&rxq_stats->napi_syncp);
  
         priv->xstats.rx_dropped += rx_dropped;
         priv->xstats.rx_errors += rx_errors;
@@ -5662,13 +5660,12 @@ static int stmmac_napi_poll_rx(struct napi_struct *napi, int budget)
         struct stmmac_priv *priv = ch->priv_data;
         struct stmmac_rxq_stats *rxq_stats;
         u32 chan = ch->index;
-       unsigned long flags;
         int work_done;
  
         rxq_stats = &priv->xstats.rxq_stats[chan];
-       flags = u64_stats_update_begin_irqsave(&rxq_stats->syncp);
-       rxq_stats->napi_poll++;
-       u64_stats_update_end_irqrestore(&rxq_stats->syncp, flags);
+       u64_stats_update_begin(&rxq_stats->napi_syncp);
+       u64_stats_inc(&rxq_stats->napi.poll);
+       u64_stats_update_end(&rxq_stats->napi_syncp);
  
         work_done = stmmac_rx(priv, budget, chan);
         if (work_done < budget && napi_complete_done(napi, work_done)) {
@@ -5690,13 +5687,12 @@ static int stmmac_napi_poll_tx(struct napi_struct *napi, int budget)
         struct stmmac_txq_stats *txq_stats;
         bool pending_packets = false;
         u32 chan = ch->index;
-       unsigned long flags;
         int work_done;
  
         txq_stats = &priv->xstats.txq_stats[chan];
-       flags = u64_stats_update_begin_irqsave(&txq_stats->syncp);
-       txq_stats->napi_poll++;
-       u64_stats_update_end_irqrestore(&txq_stats->syncp, flags);
+       u64_stats_update_begin(&txq_stats->napi_syncp);
+       u64_stats_inc(&txq_stats->napi.poll);
+       u64_stats_update_end(&txq_stats->napi_syncp);
  
         work_done = stmmac_tx_clean(priv, budget, chan, &pending_packets);
         work_done = min(work_done, budget);
@@ -5726,17 +5722,16 @@ static int stmmac_napi_poll_rxtx(struct napi_struct *napi, int budget)
         struct stmmac_rxq_stats *rxq_stats;
         struct stmmac_txq_stats *txq_stats;
         u32 chan = ch->index;
-       unsigned long flags;
  
         rxq_stats = &priv->xstats.rxq_stats[chan];
-       flags = u64_stats_update_begin_irqsave(&rxq_stats->syncp);
-       rxq_stats->napi_poll++;
-       u64_stats_update_end_irqrestore(&rxq_stats->syncp, flags);
+       u64_stats_update_begin(&rxq_stats->napi_syncp);
+       u64_stats_inc(&rxq_stats->napi.poll);
+       u64_stats_update_end(&rxq_stats->napi_syncp);
  
         txq_stats = &priv->xstats.txq_stats[chan];
-       flags = u64_stats_update_begin_irqsave(&txq_stats->syncp);
-       txq_stats->napi_poll++;
-       u64_stats_update_end_irqrestore(&txq_stats->syncp, flags);
+       u64_stats_update_begin(&txq_stats->napi_syncp);
+       u64_stats_inc(&txq_stats->napi.poll);
+       u64_stats_update_end(&txq_stats->napi_syncp);
  
         tx_done = stmmac_tx_clean(priv, budget, chan, &tx_pending_packets);
         tx_done = min(tx_done, budget);
@@ -6067,11 +6062,6 @@ static irqreturn_t stmmac_mac_interrupt(int irq, void *dev_id)
         struct net_device *dev = (struct net_device *)dev_id;
         struct stmmac_priv *priv = netdev_priv(dev);
  
-       if (unlikely(!dev)) {
-               netdev_err(priv->dev, "%s: invalid dev pointer\n", __func__);
-               return IRQ_NONE;
-       }
-
         /* Check if adapter is up */
         if (test_bit(STMMAC_DOWN, &priv->state))
                 return IRQ_HANDLED;
@@ -6087,11 +6077,6 @@ static irqreturn_t stmmac_safety_interrupt(int irq, void *dev_id)
         struct net_device *dev = (struct net_device *)dev_id;
         struct stmmac_priv *priv = netdev_priv(dev);
  
-       if (unlikely(!dev)) {
-               netdev_err(priv->dev, "%s: invalid dev pointer\n", __func__);
-               return IRQ_NONE;
-       }
-
         /* Check if adapter is up */
         if (test_bit(STMMAC_DOWN, &priv->state))
                 return IRQ_HANDLED;
@@ -6113,11 +6098,6 @@ static irqreturn_t stmmac_msi_intr_tx(int irq, void *data)
         dma_conf = container_of(tx_q, struct stmmac_dma_conf, tx_queue[chan]);
         priv = container_of(dma_conf, struct stmmac_priv, dma_conf);
  
-       if (unlikely(!data)) {
-               netdev_err(priv->dev, "%s: invalid dev pointer\n", __func__);
-               return IRQ_NONE;
-       }
-
         /* Check if adapter is up */
         if (test_bit(STMMAC_DOWN, &priv->state))
                 return IRQ_HANDLED;
@@ -6144,11 +6124,6 @@ static irqreturn_t stmmac_msi_intr_rx(int irq, void *data)
         dma_conf = container_of(rx_q, struct stmmac_dma_conf, rx_queue[chan]);
         priv = container_of(dma_conf, struct stmmac_priv, dma_conf);
  
-       if (unlikely(!data)) {
-               netdev_err(priv->dev, "%s: invalid dev pointer\n", __func__);
-               return IRQ_NONE;
-       }
-
         /* Check if adapter is up */
         if (test_bit(STMMAC_DOWN, &priv->state))
                 return IRQ_HANDLED;
@@ -7062,10 +7037,13 @@ static void stmmac_get_stats64(struct net_device *dev, struct rtnl_link_stats64
                 u64 tx_bytes;
  
                 do {
-                       start = u64_stats_fetch_begin(&txq_stats->syncp);
-                       tx_packets = txq_stats->tx_packets;
-                       tx_bytes   = txq_stats->tx_bytes;
-               } while (u64_stats_fetch_retry(&txq_stats->syncp, start));
+                       start = u64_stats_fetch_begin(&txq_stats->q_syncp);
+                       tx_bytes   = u64_stats_read(&txq_stats->q.tx_bytes);
+               } while (u64_stats_fetch_retry(&txq_stats->q_syncp, start));
+               do {
+                       start = u64_stats_fetch_begin(&txq_stats->napi_syncp);
+                       tx_packets = u64_stats_read(&txq_stats->napi.tx_packets);
+               } while (u64_stats_fetch_retry(&txq_stats->napi_syncp, start));
  
                 stats->tx_packets += tx_packets;
                 stats->tx_bytes += tx_bytes;
@@ -7077,10 +7055,10 @@ static void stmmac_get_stats64(struct net_device *dev, struct rtnl_link_stats64
                 u64 rx_bytes;
  
                 do {
-                       start = u64_stats_fetch_begin(&rxq_stats->syncp);
-                       rx_packets = rxq_stats->rx_packets;
-                       rx_bytes   = rxq_stats->rx_bytes;
-               } while (u64_stats_fetch_retry(&rxq_stats->syncp, start));
+                       start = u64_stats_fetch_begin(&rxq_stats->napi_syncp);
+                       rx_packets = u64_stats_read(&rxq_stats->napi.rx_packets);
+                       rx_bytes   = u64_stats_read(&rxq_stats->napi.rx_bytes);
+               } while (u64_stats_fetch_retry(&rxq_stats->napi_syncp, start));
  
                 stats->rx_packets += rx_packets;
                 stats->rx_bytes += rx_bytes;
@@ -7474,9 +7452,16 @@ int stmmac_dvr_probe(struct device *device,
         priv->dev = ndev;
  
         for (i = 0; i < MTL_MAX_RX_QUEUES; i++)
-               u64_stats_init(&priv->xstats.rxq_stats[i].syncp);
-       for (i = 0; i < MTL_MAX_TX_QUEUES; i++)
-               u64_stats_init(&priv->xstats.txq_stats[i].syncp);
+               u64_stats_init(&priv->xstats.rxq_stats[i].napi_syncp);
+       for (i = 0; i < MTL_MAX_TX_QUEUES; i++) {
+               u64_stats_init(&priv->xstats.txq_stats[i].q_syncp);
+               u64_stats_init(&priv->xstats.txq_stats[i].napi_syncp);
+       }
+
+       priv->xstats.pcpu_stats =
+               devm_netdev_alloc_pcpu_stats(device, struct stmmac_pcpu_stats);
+       if (!priv->xstats.pcpu_stats)
+               return -ENOMEM;
  
         stmmac_set_ethtool_ops(ndev);
         priv->pause = pause;
diff --git a/drivers/net/ethernet/ti/Kconfig b/drivers/net/ethernet/ti/Kconfig

index be01450c20dc0199ebc5d1d731eca04a47781539..1530d13984d42606f6e4b4d1d28ca3f8c6461ac0 100644 (file)
--- a/drivers/net/ethernet/ti/Kconfig
+++ b/drivers/net/ethernet/ti/Kconfig
@@ -189,6 +189,7 @@ config TI_ICSSG_PRUETH
         select TI_K3_CPPI_DESC_POOL
         depends on PRU_REMOTEPROC
         depends on ARCH_K3 && OF && TI_K3_UDMA_GLUE_LAYER
+       depends on PTP_1588_CLOCK_OPTIONAL
         help
           Support dual Gigabit Ethernet ports over the ICSSG PRU Subsystem.
           This subsystem is available starting with the AM65 platform.
diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c

index ea85c6dd5484617a038e565312e8ad0ccdce6c75..c0a5abd8d9a8e6e0d113c36a9557a1de1c360993 100644 (file)
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -631,6 +631,8 @@ static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv)
                 }
         }
  
+       phy->mac_managed_pm = true;
+
         slave->phy = phy;
  
         phy_attached_info(slave->phy);
diff --git a/drivers/net/ethernet/ti/cpsw_new.c b/drivers/net/ethernet/ti/cpsw_new.c

index 498c50c6d1a701b86596b9148dbdc4523176cee7..087dcb67505a2da5995963d5d67d36dadb580a47 100644 (file)
--- a/drivers/net/ethernet/ti/cpsw_new.c
+++ b/drivers/net/ethernet/ti/cpsw_new.c
@@ -773,6 +773,9 @@ static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv)
                         slave->slave_num);
                 return;
         }
+
+       phy->mac_managed_pm = true;
+
         slave->phy = phy;
  
         phy_attached_info(slave->phy);
diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c

index bcccf43d368b7e2a9efc3a6e855a5b2299e9cdc4..dbbea914604057ca97823c5c6e164be50303df08 100644 (file)
--- a/drivers/net/ethernet/ti/cpts.c
+++ b/drivers/net/ethernet/ti/cpts.c
@@ -638,6 +638,16 @@ static void cpts_calc_mult_shift(struct cpts *cpts)
                  freq, cpts->cc.mult, cpts->cc.shift, (ns - NSEC_PER_SEC));
  }
  
+static void cpts_clk_unregister(void *clk)
+{
+       clk_hw_unregister_mux(clk);
+}
+
+static void cpts_clk_del_provider(void *np)
+{
+       of_clk_del_provider(np);
+}
+
  static int cpts_of_mux_clk_setup(struct cpts *cpts, struct device_node *node)
  {
         struct device_node *refclk_np;
@@ -687,9 +697,7 @@ static int cpts_of_mux_clk_setup(struct cpts *cpts, struct device_node *node)
                 goto mux_fail;
         }
  
-       ret = devm_add_action_or_reset(cpts->dev,
-                                      (void(*)(void *))clk_hw_unregister_mux,
-                                      clk_hw);
+       ret = devm_add_action_or_reset(cpts->dev, cpts_clk_unregister, clk_hw);
         if (ret) {
                 dev_err(cpts->dev, "add clkmux unreg action %d", ret);
                 goto mux_fail;
@@ -699,8 +707,7 @@ static int cpts_of_mux_clk_setup(struct cpts *cpts, struct device_node *node)
         if (ret)
                 goto mux_fail;
  
-       ret = devm_add_action_or_reset(cpts->dev,
-                                      (void(*)(void *))of_clk_del_provider,
+       ret = devm_add_action_or_reset(cpts->dev, cpts_clk_del_provider,
                                        refclk_np);
         if (ret) {
                 dev_err(cpts->dev, "add clkmux provider unreg action %d", ret);
diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.c b/drivers/net/ethernet/toshiba/ps3_gelic_net.c

index d5b75af163d35e6b257e9d3dcb48ada80f8a0f20..c1b0d35c8d05207b351b9313f6ae24b986ff3ca1 100644 (file)
--- a/drivers/net/ethernet/toshiba/ps3_gelic_net.c
+++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.c
@@ -384,18 +384,18 @@ static int gelic_descr_prepare_rx(struct gelic_card *card,
         if (gelic_descr_get_status(descr) !=  GELIC_DESCR_DMA_NOT_IN_USE)
                 dev_info(ctodev(card), "%s: ERROR status\n", __func__);
  
-       descr->skb = netdev_alloc_skb(*card->netdev, rx_skb_size);
-       if (!descr->skb) {
-               descr->hw_regs.payload.dev_addr = 0; /* tell DMAC don't touch memory */
-               return -ENOMEM;
-       }
         descr->hw_regs.dmac_cmd_status = 0;
         descr->hw_regs.result_size = 0;
         descr->hw_regs.valid_size = 0;
         descr->hw_regs.data_error = 0;
         descr->hw_regs.payload.dev_addr = 0;
         descr->hw_regs.payload.size = 0;
-       descr->skb = NULL;
+
+       descr->skb = netdev_alloc_skb(*card->netdev, rx_skb_size);
+       if (!descr->skb) {
+               descr->hw_regs.payload.dev_addr = 0; /* tell DMAC don't touch memory */
+               return -ENOMEM;
+       }
  
         offset = ((unsigned long)descr->skb->data) &
                 (GELIC_NET_RXBUF_ALIGN - 1);
diff --git a/drivers/net/fddi/skfp/skfddi.c b/drivers/net/fddi/skfp/skfddi.c

index 2b6a607ac0b78848d8694af42df349aaa24f16d8..a273362c9e703ce8f807ae86e4705565ab51c605 100644 (file)
--- a/drivers/net/fddi/skfp/skfddi.c
+++ b/drivers/net/fddi/skfp/skfddi.c
@@ -153,6 +153,7 @@ static const struct pci_device_id skfddi_pci_tbl[] = {
         { }                     /* Terminating entry */
  };
  MODULE_DEVICE_TABLE(pci, skfddi_pci_tbl);
+MODULE_DESCRIPTION("SysKonnect FDDI PCI driver");
  MODULE_LICENSE("GPL");
  MODULE_AUTHOR("Mirko Lindner <mlindner@syskonnect.de>");
  
diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c

index b1919278e931f4e9fb6b2d2ec2feb2193b2cda61..2b5357d94ff5683049510c71c932be05abe0f211 100644 (file)
--- a/drivers/net/gtp.c
+++ b/drivers/net/gtp.c
@@ -1903,26 +1903,26 @@ static int __init gtp_init(void)
  
         get_random_bytes(&gtp_h_initval, sizeof(gtp_h_initval));
  
-       err = rtnl_link_register(&gtp_link_ops);
+       err = register_pernet_subsys(&gtp_net_ops);
         if (err < 0)
                 goto error_out;
  
-       err = genl_register_family(&gtp_genl_family);
+       err = rtnl_link_register(&gtp_link_ops);
         if (err < 0)
-               goto unreg_rtnl_link;
+               goto unreg_pernet_subsys;
  
-       err = register_pernet_subsys(&gtp_net_ops);
+       err = genl_register_family(&gtp_genl_family);
         if (err < 0)
-               goto unreg_genl_family;
+               goto unreg_rtnl_link;
  
         pr_info("GTP module loaded (pdp ctx size %zd bytes)\n",
                 sizeof(struct pdp_ctx));
         return 0;
  
-unreg_genl_family:
-       genl_unregister_family(&gtp_genl_family);
  unreg_rtnl_link:
         rtnl_link_unregister(&gtp_link_ops);
+unreg_pernet_subsys:
+       unregister_pernet_subsys(&gtp_net_ops);
  error_out:
         pr_err("error loading GTP module loaded\n");
         return err;
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c

index 1dafa44155d0eb31dfaea9cacdc3954ebee75f4b..a6fcbda64ecc60e5beccf20f2043ab00870cbd5d 100644 (file)
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -708,7 +708,10 @@ void netvsc_device_remove(struct hv_device *device)
         /* Disable NAPI and disassociate its context from the device. */
         for (i = 0; i < net_device->num_chn; i++) {
                 /* See also vmbus_reset_channel_cb(). */
-               napi_disable(&net_device->chan_table[i].napi);
+               /* only disable enabled NAPI channel */
+               if (i < ndev->real_num_rx_queues)
+                       napi_disable(&net_device->chan_table[i].napi);
+
                 netif_napi_del(&net_device->chan_table[i].napi);
         }
  
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c

index 273bd8a20122cdbec238326febb0227ca7889c8d..11831a1c97623985401317e690b66f6985abb750 100644 (file)
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -42,6 +42,10 @@
  #define LINKCHANGE_INT (2 * HZ)
  #define VF_TAKEOVER_INT (HZ / 10)
  
+/* Macros to define the context of vf registration */
+#define VF_REG_IN_PROBE                1
+#define VF_REG_IN_NOTIFIER     2
+
  static unsigned int ring_size __ro_after_init = 128;
  module_param(ring_size, uint, 0444);
  MODULE_PARM_DESC(ring_size, "Ring buffer size (# of 4K pages)");
@@ -2185,7 +2189,7 @@ static rx_handler_result_t netvsc_vf_handle_frame(struct sk_buff **pskb)
  }
  
  static int netvsc_vf_join(struct net_device *vf_netdev,
-                         struct net_device *ndev)
+                         struct net_device *ndev, int context)
  {
         struct net_device_context *ndev_ctx = netdev_priv(ndev);
         int ret;
@@ -2208,7 +2212,11 @@ static int netvsc_vf_join(struct net_device *vf_netdev,
                 goto upper_link_failed;
         }
  
-       schedule_delayed_work(&ndev_ctx->vf_takeover, VF_TAKEOVER_INT);
+       /* If this registration is called from probe context vf_takeover
+        * is taken care of later in probe itself.
+        */
+       if (context == VF_REG_IN_NOTIFIER)
+               schedule_delayed_work(&ndev_ctx->vf_takeover, VF_TAKEOVER_INT);
  
         call_netdevice_notifiers(NETDEV_JOIN, vf_netdev);
  
@@ -2346,7 +2354,7 @@ static int netvsc_prepare_bonding(struct net_device *vf_netdev)
         return NOTIFY_DONE;
  }
  
-static int netvsc_register_vf(struct net_device *vf_netdev)
+static int netvsc_register_vf(struct net_device *vf_netdev, int context)
  {
         struct net_device_context *net_device_ctx;
         struct netvsc_device *netvsc_dev;
@@ -2386,7 +2394,7 @@ static int netvsc_register_vf(struct net_device *vf_netdev)
  
         netdev_info(ndev, "VF registering: %s\n", vf_netdev->name);
  
-       if (netvsc_vf_join(vf_netdev, ndev) != 0)
+       if (netvsc_vf_join(vf_netdev, ndev, context) != 0)
                 return NOTIFY_DONE;
  
         dev_hold(vf_netdev);
@@ -2484,10 +2492,31 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev)
         return NOTIFY_OK;
  }
  
+static int check_dev_is_matching_vf(struct net_device *event_ndev)
+{
+       /* Skip NetVSC interfaces */
+       if (event_ndev->netdev_ops == &device_ops)
+               return -ENODEV;
+
+       /* Avoid non-Ethernet type devices */
+       if (event_ndev->type != ARPHRD_ETHER)
+               return -ENODEV;
+
+       /* Avoid Vlan dev with same MAC registering as VF */
+       if (is_vlan_dev(event_ndev))
+               return -ENODEV;
+
+       /* Avoid Bonding master dev with same MAC registering as VF */
+       if (netif_is_bond_master(event_ndev))
+               return -ENODEV;
+
+       return 0;
+}
+
  static int netvsc_probe(struct hv_device *dev,
                         const struct hv_vmbus_device_id *dev_id)
  {
-       struct net_device *net = NULL;
+       struct net_device *net = NULL, *vf_netdev;
         struct net_device_context *net_device_ctx;
         struct netvsc_device_info *device_info = NULL;
         struct netvsc_device *nvdev;
@@ -2599,6 +2628,30 @@ static int netvsc_probe(struct hv_device *dev,
         }
  
         list_add(&net_device_ctx->list, &netvsc_dev_list);
+
+       /* When the hv_netvsc driver is unloaded and reloaded, the
+        * NET_DEVICE_REGISTER for the vf device is replayed before probe
+        * is complete. This is because register_netdevice_notifier() gets
+        * registered before vmbus_driver_register() so that callback func
+        * is set before probe and we don't miss events like NETDEV_POST_INIT
+        * So, in this section we try to register the matching vf device that
+        * is present as a netdevice, knowing that its register call is not
+        * processed in the netvsc_netdev_notifier(as probing is progress and
+        * get_netvsc_byslot fails).
+        */
+       for_each_netdev(dev_net(net), vf_netdev) {
+               ret = check_dev_is_matching_vf(vf_netdev);
+               if (ret != 0)
+                       continue;
+
+               if (net != get_netvsc_byslot(vf_netdev))
+                       continue;
+
+               netvsc_prepare_bonding(vf_netdev);
+               netvsc_register_vf(vf_netdev, VF_REG_IN_PROBE);
+               __netvsc_vf_setup(net, vf_netdev);
+               break;
+       }
         rtnl_unlock();
  
         netvsc_devinfo_put(device_info);
@@ -2754,28 +2807,17 @@ static int netvsc_netdev_event(struct notifier_block *this,
                                unsigned long event, void *ptr)
  {
         struct net_device *event_dev = netdev_notifier_info_to_dev(ptr);
+       int ret = 0;
  
-       /* Skip our own events */
-       if (event_dev->netdev_ops == &device_ops)
-               return NOTIFY_DONE;
-
-       /* Avoid non-Ethernet type devices */
-       if (event_dev->type != ARPHRD_ETHER)
-               return NOTIFY_DONE;
-
-       /* Avoid Vlan dev with same MAC registering as VF */
-       if (is_vlan_dev(event_dev))
-               return NOTIFY_DONE;
-
-       /* Avoid Bonding master dev with same MAC registering as VF */
-       if (netif_is_bond_master(event_dev))
+       ret = check_dev_is_matching_vf(event_dev);
+       if (ret != 0)
                 return NOTIFY_DONE;
  
         switch (event) {
         case NETDEV_POST_INIT:
                 return netvsc_prepare_bonding(event_dev);
         case NETDEV_REGISTER:
-               return netvsc_register_vf(event_dev);
+               return netvsc_register_vf(event_dev, VF_REG_IN_NOTIFIER);
         case NETDEV_UNREGISTER:
                 return netvsc_unregister_vf(event_dev);
         case NETDEV_UP:
diff --git a/drivers/net/ieee802154/fakelb.c b/drivers/net/ieee802154/fakelb.c

index 35e55f198e05cea45ddb747dff34963eabfac92e..2930141d7dd2d30201e4bd1d4492cbd681fb7c0a 100644 (file)
--- a/drivers/net/ieee802154/fakelb.c
+++ b/drivers/net/ieee802154/fakelb.c
@@ -259,4 +259,5 @@ static __exit void fake_remove_module(void)
  
  module_init(fakelb_init_module);
  module_exit(fake_remove_module);
+MODULE_DESCRIPTION("IEEE 802.15.4 loopback driver");
  MODULE_LICENSE("GPL");
diff --git a/drivers/net/ipa/ipa_interrupt.c b/drivers/net/ipa/ipa_interrupt.c

index 4bc05948f772d8b009e692a62fec564c7380aae3..a78c692f2d3c5dde24879254cab725bc97072634 100644 (file)
--- a/drivers/net/ipa/ipa_interrupt.c
+++ b/drivers/net/ipa/ipa_interrupt.c
@@ -212,7 +212,7 @@ void ipa_interrupt_suspend_clear_all(struct ipa_interrupt *interrupt)
         u32 unit_count;
         u32 unit;
  
-       unit_count = roundup(ipa->endpoint_count, 32);
+       unit_count = DIV_ROUND_UP(ipa->endpoint_count, 32);
         for (unit = 0; unit < unit_count; unit++) {
                 const struct reg *reg;
                 u32 val;
diff --git a/drivers/net/ipvlan/ipvtap.c b/drivers/net/ipvlan/ipvtap.c

index 60944a4beadae611b2c2dd012683cb30052d0f6b..1afc4c47be73f906f58721aa227dd35fd30eff73 100644 (file)
--- a/drivers/net/ipvlan/ipvtap.c
+++ b/drivers/net/ipvlan/ipvtap.c
@@ -237,4 +237,5 @@ static void __exit ipvtap_exit(void)
  module_exit(ipvtap_exit);
  MODULE_ALIAS_RTNL_LINK("ipvtap");
  MODULE_AUTHOR("Sainath Grandhi <sainath.grandhi@intel.com>");
+MODULE_DESCRIPTION("IP-VLAN based tap driver");
  MODULE_LICENSE("GPL");
diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c

index b4d3b9cde8bd685202f135cf9c845d1be76ef428..92a7a36b93ac0cc1b02a551b974fb390254ac484 100644 (file)
--- a/drivers/net/netdevsim/dev.c
+++ b/drivers/net/netdevsim/dev.c
@@ -835,14 +835,14 @@ static void nsim_dev_trap_report_work(struct work_struct *work)
                                       trap_report_dw.work);
         nsim_dev = nsim_trap_data->nsim_dev;
  
-       /* For each running port and enabled packet trap, generate a UDP
-        * packet with a random 5-tuple and report it.
-        */
         if (!devl_trylock(priv_to_devlink(nsim_dev))) {
-               schedule_delayed_work(&nsim_dev->trap_data->trap_report_dw, 0);
+               schedule_delayed_work(&nsim_dev->trap_data->trap_report_dw, 1);
                 return;
         }
  
+       /* For each running port and enabled packet trap, generate a UDP
+        * packet with a random 5-tuple and report it.
+        */
         list_for_each_entry(nsim_dev_port, &nsim_dev->port_list, list) {
                 if (!netif_running(nsim_dev_port->ns->netdev))
                         continue;
diff --git a/drivers/net/phy/mdio_devres.c b/drivers/net/phy/mdio_devres.c

index 69b829e6ab35b84a07f0063f3a6f7b48ea1a6de1..7fd3377dbd7960adb2af9f19999b1a940b55fc33 100644 (file)
--- a/drivers/net/phy/mdio_devres.c
+++ b/drivers/net/phy/mdio_devres.c
@@ -131,4 +131,5 @@ int __devm_of_mdiobus_register(struct device *dev, struct mii_bus *mdio,
  EXPORT_SYMBOL(__devm_of_mdiobus_register);
  #endif /* CONFIG_OF_MDIO */
  
+MODULE_DESCRIPTION("Network MDIO bus devres helpers");
  MODULE_LICENSE("GPL");
diff --git a/drivers/net/phy/mediatek-ge-soc.c b/drivers/net/phy/mediatek-ge-soc.c

index 8a20d9889f105bc609f56a2632132e0ef2c08504..0f3a1538a8b8ee045953a3c5ff308dc824ea7c0a 100644 (file)
--- a/drivers/net/phy/mediatek-ge-soc.c
+++ b/drivers/net/phy/mediatek-ge-soc.c
@@ -489,7 +489,7 @@ static int tx_r50_fill_result(struct phy_device *phydev, u16 tx_r50_cal_val,
         u16 reg, val;
  
         if (phydev->drv->phy_id == MTK_GPHY_ID_MT7988)
-               bias = -2;
+               bias = -1;
  
         val = clamp_val(bias + tx_r50_cal_val, 0, 63);
  
@@ -705,6 +705,11 @@ restore:
  static void mt798x_phy_common_finetune(struct phy_device *phydev)
  {
         phy_select_page(phydev, MTK_PHY_PAGE_EXTENDED_52B5);
+       /* SlvDSPreadyTime = 24, MasDSPreadyTime = 24 */
+       __phy_write(phydev, 0x11, 0xc71);
+       __phy_write(phydev, 0x12, 0xc);
+       __phy_write(phydev, 0x10, 0x8fae);
+
         /* EnabRandUpdTrig = 1 */
         __phy_write(phydev, 0x11, 0x2f00);
         __phy_write(phydev, 0x12, 0xe);
@@ -715,15 +720,56 @@ static void mt798x_phy_common_finetune(struct phy_device *phydev)
         __phy_write(phydev, 0x12, 0x0);
         __phy_write(phydev, 0x10, 0x83aa);
  
-       /* TrFreeze = 0 */
+       /* FfeUpdGainForce = 1(Enable), FfeUpdGainForceVal = 4 */
+       __phy_write(phydev, 0x11, 0x240);
+       __phy_write(phydev, 0x12, 0x0);
+       __phy_write(phydev, 0x10, 0x9680);
+
+       /* TrFreeze = 0 (mt7988 default) */
         __phy_write(phydev, 0x11, 0x0);
         __phy_write(phydev, 0x12, 0x0);
         __phy_write(phydev, 0x10, 0x9686);
  
+       /* SSTrKp100 = 5 */
+       /* SSTrKf100 = 6 */
+       /* SSTrKp1000Mas = 5 */
+       /* SSTrKf1000Mas = 6 */
         /* SSTrKp1000Slv = 5 */
+       /* SSTrKf1000Slv = 6 */
         __phy_write(phydev, 0x11, 0xbaef);
         __phy_write(phydev, 0x12, 0x2e);
         __phy_write(phydev, 0x10, 0x968c);
+       phy_restore_page(phydev, MTK_PHY_PAGE_STANDARD, 0);
+}
+
+static void mt7981_phy_finetune(struct phy_device *phydev)
+{
+       u16 val[8] = { 0x01ce, 0x01c1,
+                      0x020f, 0x0202,
+                      0x03d0, 0x03c0,
+                      0x0013, 0x0005 };
+       int i, k;
+
+       /* 100M eye finetune:
+        * Keep middle level of TX MLT3 shapper as default.
+        * Only change TX MLT3 overshoot level here.
+        */
+       for (k = 0, i = 1; i < 12; i++) {
+               if (i % 3 == 0)
+                       continue;
+               phy_write_mmd(phydev, MDIO_MMD_VEND1, i, val[k++]);
+       }
+
+       phy_select_page(phydev, MTK_PHY_PAGE_EXTENDED_52B5);
+       /* ResetSyncOffset = 6 */
+       __phy_write(phydev, 0x11, 0x600);
+       __phy_write(phydev, 0x12, 0x0);
+       __phy_write(phydev, 0x10, 0x8fc0);
+
+       /* VgaDecRate = 1 */
+       __phy_write(phydev, 0x11, 0x4c2a);
+       __phy_write(phydev, 0x12, 0x3e);
+       __phy_write(phydev, 0x10, 0x8fa4);
  
         /* MrvlTrFix100Kp = 3, MrvlTrFix100Kf = 2,
          * MrvlTrFix1000Kp = 3, MrvlTrFix1000Kf = 2
@@ -738,7 +784,7 @@ static void mt798x_phy_common_finetune(struct phy_device *phydev)
         __phy_write(phydev, 0x10, 0x8ec0);
         phy_restore_page(phydev, MTK_PHY_PAGE_STANDARD, 0);
  
-       /* TR_OPEN_LOOP_EN = 1, lpf_x_average = 9*/
+       /* TR_OPEN_LOOP_EN = 1, lpf_x_average = 9 */
         phy_modify_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_RG_DEV1E_REG234,
                        MTK_PHY_TR_OPEN_LOOP_EN_MASK | MTK_PHY_LPF_X_AVERAGE_MASK,
                        BIT(0) | FIELD_PREP(MTK_PHY_LPF_X_AVERAGE_MASK, 0x9));
@@ -771,48 +817,6 @@ static void mt798x_phy_common_finetune(struct phy_device *phydev)
         phy_write_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_LDO_OUTPUT_V, 0x2222);
  }
  
-static void mt7981_phy_finetune(struct phy_device *phydev)
-{
-       u16 val[8] = { 0x01ce, 0x01c1,
-                      0x020f, 0x0202,
-                      0x03d0, 0x03c0,
-                      0x0013, 0x0005 };
-       int i, k;
-
-       /* 100M eye finetune:
-        * Keep middle level of TX MLT3 shapper as default.
-        * Only change TX MLT3 overshoot level here.
-        */
-       for (k = 0, i = 1; i < 12; i++) {
-               if (i % 3 == 0)
-                       continue;
-               phy_write_mmd(phydev, MDIO_MMD_VEND1, i, val[k++]);
-       }
-
-       phy_select_page(phydev, MTK_PHY_PAGE_EXTENDED_52B5);
-       /* SlvDSPreadyTime = 24, MasDSPreadyTime = 24 */
-       __phy_write(phydev, 0x11, 0xc71);
-       __phy_write(phydev, 0x12, 0xc);
-       __phy_write(phydev, 0x10, 0x8fae);
-
-       /* ResetSyncOffset = 6 */
-       __phy_write(phydev, 0x11, 0x600);
-       __phy_write(phydev, 0x12, 0x0);
-       __phy_write(phydev, 0x10, 0x8fc0);
-
-       /* VgaDecRate = 1 */
-       __phy_write(phydev, 0x11, 0x4c2a);
-       __phy_write(phydev, 0x12, 0x3e);
-       __phy_write(phydev, 0x10, 0x8fa4);
-
-       /* FfeUpdGainForce = 4 */
-       __phy_write(phydev, 0x11, 0x240);
-       __phy_write(phydev, 0x12, 0x0);
-       __phy_write(phydev, 0x10, 0x9680);
-
-       phy_restore_page(phydev, MTK_PHY_PAGE_STANDARD, 0);
-}
-
  static void mt7988_phy_finetune(struct phy_device *phydev)
  {
         u16 val[12] = { 0x0187, 0x01cd, 0x01c8, 0x0182,
@@ -827,17 +831,7 @@ static void mt7988_phy_finetune(struct phy_device *phydev)
         /* TCT finetune */
         phy_write_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_RG_TX_FILTER, 0x5);
  
-       /* Disable TX power saving */
-       phy_modify_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_RXADC_CTRL_RG7,
-                      MTK_PHY_DA_AD_BUF_BIAS_LP_MASK, 0x3 << 8);
-
         phy_select_page(phydev, MTK_PHY_PAGE_EXTENDED_52B5);
-
-       /* SlvDSPreadyTime = 24, MasDSPreadyTime = 12 */
-       __phy_write(phydev, 0x11, 0x671);
-       __phy_write(phydev, 0x12, 0xc);
-       __phy_write(phydev, 0x10, 0x8fae);
-
         /* ResetSyncOffset = 5 */
         __phy_write(phydev, 0x11, 0x500);
         __phy_write(phydev, 0x12, 0x0);
@@ -845,13 +839,27 @@ static void mt7988_phy_finetune(struct phy_device *phydev)
  
         /* VgaDecRate is 1 at default on mt7988 */
  
-       phy_restore_page(phydev, MTK_PHY_PAGE_STANDARD, 0);
+       /* MrvlTrFix100Kp = 6, MrvlTrFix100Kf = 7,
+        * MrvlTrFix1000Kp = 6, MrvlTrFix1000Kf = 7
+        */
+       __phy_write(phydev, 0x11, 0xb90a);
+       __phy_write(phydev, 0x12, 0x6f);
+       __phy_write(phydev, 0x10, 0x8f82);
+
+       /* RemAckCntLimitCtrl = 1 */
+       __phy_write(phydev, 0x11, 0xfbba);
+       __phy_write(phydev, 0x12, 0xc3);
+       __phy_write(phydev, 0x10, 0x87f8);
  
-       phy_select_page(phydev, MTK_PHY_PAGE_EXTENDED_2A30);
-       /* TxClkOffset = 2 */
-       __phy_modify(phydev, MTK_PHY_ANARG_RG, MTK_PHY_TCLKOFFSET_MASK,
-                    FIELD_PREP(MTK_PHY_TCLKOFFSET_MASK, 0x2));
         phy_restore_page(phydev, MTK_PHY_PAGE_STANDARD, 0);
+
+       /* TR_OPEN_LOOP_EN = 1, lpf_x_average = 10 */
+       phy_modify_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_RG_DEV1E_REG234,
+                      MTK_PHY_TR_OPEN_LOOP_EN_MASK | MTK_PHY_LPF_X_AVERAGE_MASK,
+                      BIT(0) | FIELD_PREP(MTK_PHY_LPF_X_AVERAGE_MASK, 0xa));
+
+       /* rg_tr_lpf_cnt_val = 1023 */
+       phy_write_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_RG_LPF_CNT_VAL, 0x3ff);
  }
  
  static void mt798x_phy_eee(struct phy_device *phydev)
@@ -884,11 +892,11 @@ static void mt798x_phy_eee(struct phy_device *phydev)
                        MTK_PHY_LPI_SLV_SEND_TX_EN,
                        FIELD_PREP(MTK_PHY_LPI_SLV_SEND_TX_TIMER_MASK, 0x120));
  
-       phy_modify_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_RG_DEV1E_REG239,
-                      MTK_PHY_LPI_SEND_LOC_TIMER_MASK |
-                      MTK_PHY_LPI_TXPCS_LOC_RCV,
-                      FIELD_PREP(MTK_PHY_LPI_SEND_LOC_TIMER_MASK, 0x117));
+       /* Keep MTK_PHY_LPI_SEND_LOC_TIMER as 375 */
+       phy_clear_bits_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_RG_DEV1E_REG239,
+                          MTK_PHY_LPI_TXPCS_LOC_RCV);
  
+       /* This also fixes some IoT issues, such as CH340 */
         phy_modify_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_RG_DEV1E_REG2C7,
                        MTK_PHY_MAX_GAIN_MASK | MTK_PHY_MIN_GAIN_MASK,
                        FIELD_PREP(MTK_PHY_MAX_GAIN_MASK, 0x8) |
@@ -922,7 +930,7 @@ static void mt798x_phy_eee(struct phy_device *phydev)
         __phy_write(phydev, 0x12, 0x0);
         __phy_write(phydev, 0x10, 0x9690);
  
-       /* REG_EEE_st2TrKf1000 = 3 */
+       /* REG_EEE_st2TrKf1000 = 2 */
         __phy_write(phydev, 0x11, 0x114f);
         __phy_write(phydev, 0x12, 0x2);
         __phy_write(phydev, 0x10, 0x969a);
@@ -947,7 +955,7 @@ static void mt798x_phy_eee(struct phy_device *phydev)
         __phy_write(phydev, 0x12, 0x0);
         __phy_write(phydev, 0x10, 0x96b8);
  
-       /* REGEEE_wake_slv_tr_wait_dfesigdet_en = 1 */
+       /* REGEEE_wake_slv_tr_wait_dfesigdet_en = 0 */
         __phy_write(phydev, 0x11, 0x1463);
         __phy_write(phydev, 0x12, 0x0);
         __phy_write(phydev, 0x10, 0x96ca);
@@ -1459,6 +1467,13 @@ static int mt7988_phy_probe(struct phy_device *phydev)
         if (err)
                 return err;
  
+       /* Disable TX power saving at probing to:
+        * 1. Meet common mode compliance test criteria
+        * 2. Make sure that TX-VCM calibration works fine
+        */
+       phy_modify_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_RXADC_CTRL_RG7,
+                      MTK_PHY_DA_AD_BUF_BIAS_LP_MASK, 0x3 << 8);
+
         return mt798x_phy_calibration(phydev);
  }
  
diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c

index 894172a3e15fe8a6a86e38b64246ebefcb65362b..337899c69738ec46c2b585db76e11fa25738560e 100644 (file)
--- a/drivers/net/phy/realtek.c
+++ b/drivers/net/phy/realtek.c
@@ -421,9 +421,11 @@ static int rtl8211f_config_init(struct phy_device *phydev)
                                 ERR_PTR(ret));
                         return ret;
                 }
+
+               return genphy_soft_reset(phydev);
         }
  
-       return genphy_soft_reset(phydev);
+       return 0;
  }
  
  static int rtl821x_suspend(struct phy_device *phydev)
diff --git a/drivers/net/plip/plip.c b/drivers/net/plip/plip.c

index 40ce8abe699954d106b216e8351925ec1cd9d3a1..cc7d1113ece0ee7d6cfa0e1830bbbdc664c28514 100644 (file)
--- a/drivers/net/plip/plip.c
+++ b/drivers/net/plip/plip.c
@@ -1437,4 +1437,5 @@ static int __init plip_init (void)
  
  module_init(plip_init);
  module_exit(plip_cleanup_module);
+MODULE_DESCRIPTION("PLIP (parallel port) network module");
  MODULE_LICENSE("GPL");
diff --git a/drivers/net/ppp/bsd_comp.c b/drivers/net/ppp/bsd_comp.c

index db0dc36d12e33ed7a481319c2d3a219ff88f3782..55954594e157e2eb6a4331e6da3b2d2ff547a4a3 100644 (file)
--- a/drivers/net/ppp/bsd_comp.c
+++ b/drivers/net/ppp/bsd_comp.c
@@ -1166,5 +1166,6 @@ static void __exit bsdcomp_cleanup(void)
  
  module_init(bsdcomp_init);
  module_exit(bsdcomp_cleanup);
+MODULE_DESCRIPTION("PPP BSD-Compress compression module");
  MODULE_LICENSE("Dual BSD/GPL");
  MODULE_ALIAS("ppp-compress-" __stringify(CI_BSD_COMPRESS));
diff --git a/drivers/net/ppp/ppp_async.c b/drivers/net/ppp/ppp_async.c

index 840da924708b393b16a82ab4e07746538214c0f9..c33c3db3cc0896d9b033aa2b188fbf46be8afd68 100644 (file)
--- a/drivers/net/ppp/ppp_async.c
+++ b/drivers/net/ppp/ppp_async.c
@@ -87,6 +87,7 @@ struct asyncppp {
  static int flag_time = HZ;
  module_param(flag_time, int, 0);
  MODULE_PARM_DESC(flag_time, "ppp_async: interval between flagged packets (in clock ticks)");
+MODULE_DESCRIPTION("PPP async serial channel module");
  MODULE_LICENSE("GPL");
  MODULE_ALIAS_LDISC(N_PPP);
  
@@ -460,6 +461,10 @@ ppp_async_ioctl(struct ppp_channel *chan, unsigned int cmd, unsigned long arg)
         case PPPIOCSMRU:
                 if (get_user(val, p))
                         break;
+               if (val > U16_MAX) {
+                       err = -EINVAL;
+                       break;
+               }
                 if (val < PPP_MRU)
                         val = PPP_MRU;
                 ap->mru = val;
diff --git a/drivers/net/ppp/ppp_deflate.c b/drivers/net/ppp/ppp_deflate.c

index e6d48e5c65a3379e12bbbd4679b1d0b326d3e93b..4d2ff63f2ee2f6bb02a07419513549890956d32e 100644 (file)
--- a/drivers/net/ppp/ppp_deflate.c
+++ b/drivers/net/ppp/ppp_deflate.c
@@ -630,6 +630,7 @@ static void __exit deflate_cleanup(void)
  
  module_init(deflate_init);
  module_exit(deflate_cleanup);
+MODULE_DESCRIPTION("PPP Deflate compression module");
  MODULE_LICENSE("Dual BSD/GPL");
  MODULE_ALIAS("ppp-compress-" __stringify(CI_DEFLATE));
  MODULE_ALIAS("ppp-compress-" __stringify(CI_DEFLATE_DRAFT));
diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c

index 0193af2d31c9bcf5dc8864da49ba4e75ba0192fc..3dd52bf28f15bf9f260719f1bf61e45d6d48b3f7 100644 (file)
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -3604,6 +3604,7 @@ EXPORT_SYMBOL(ppp_input_error);
  EXPORT_SYMBOL(ppp_output_wakeup);
  EXPORT_SYMBOL(ppp_register_compressor);
  EXPORT_SYMBOL(ppp_unregister_compressor);
+MODULE_DESCRIPTION("Generic PPP layer driver");
  MODULE_LICENSE("GPL");
  MODULE_ALIAS_CHARDEV(PPP_MAJOR, 0);
  MODULE_ALIAS_RTNL_LINK("ppp");
diff --git a/drivers/net/ppp/ppp_synctty.c b/drivers/net/ppp/ppp_synctty.c

index 52d05ce4a2819815963eebf4df399058835ff350..45bf59ac8f5711867ed1ba433d3f5e7800b769e4 100644 (file)
--- a/drivers/net/ppp/ppp_synctty.c
+++ b/drivers/net/ppp/ppp_synctty.c
@@ -724,5 +724,6 @@ ppp_sync_cleanup(void)
  
  module_init(ppp_sync_init);
  module_exit(ppp_sync_cleanup);
+MODULE_DESCRIPTION("PPP synchronous TTY channel module");
  MODULE_LICENSE("GPL");
  MODULE_ALIAS_LDISC(N_SYNC_PPP);
diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c

index 8e7238e97d0a71708ebcddda9b1e1a50ab28c17d..2ea4f4890d23b5f1c5229c7f8b303ee85a954037 100644 (file)
--- a/drivers/net/ppp/pppoe.c
+++ b/drivers/net/ppp/pppoe.c
@@ -1007,26 +1007,21 @@ static int pppoe_recvmsg(struct socket *sock, struct msghdr *m,
         struct sk_buff *skb;
         int error = 0;
  
-       if (sk->sk_state & PPPOX_BOUND) {
-               error = -EIO;
-               goto end;
-       }
+       if (sk->sk_state & PPPOX_BOUND)
+               return -EIO;
  
         skb = skb_recv_datagram(sk, flags, &error);
-       if (error < 0)
-               goto end;
+       if (!skb)
+               return error;
  
-       if (skb) {
-               total_len = min_t(size_t, total_len, skb->len);
-               error = skb_copy_datagram_msg(skb, 0, m, total_len);
-               if (error == 0) {
-                       consume_skb(skb);
-                       return total_len;
-               }
+       total_len = min_t(size_t, total_len, skb->len);
+       error = skb_copy_datagram_msg(skb, 0, m, total_len);
+       if (error == 0) {
+               consume_skb(skb);
+               return total_len;
         }
  
         kfree_skb(skb);
-end:
         return error;
  }
  
diff --git a/drivers/net/tun.c b/drivers/net/tun.c

index 4a4f8c8e79fa12dc84a8c83cefbf964dd40e1aa2..8f95a562b8d0c471c44591629e04809f7faef9b2 100644 (file)
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -653,6 +653,7 @@ static void __tun_detach(struct tun_file *tfile, bool clean)
                                    tun->tfiles[tun->numqueues - 1]);
                 ntfile = rtnl_dereference(tun->tfiles[index]);
                 ntfile->queue_index = index;
+               ntfile->xdp_rxq.queue_index = index;
                 rcu_assign_pointer(tun->tfiles[tun->numqueues - 1],
                                    NULL);
  
diff --git a/drivers/net/usb/dm9601.c b/drivers/net/usb/dm9601.c

index 99ec1d4a972db8c1232ce8ee8eb8d97385a9b5f0..8b6d6a1b3c2eca086e77915e26428c1110127f4d 100644 (file)
--- a/drivers/net/usb/dm9601.c
+++ b/drivers/net/usb/dm9601.c
@@ -232,7 +232,7 @@ static int dm9601_mdio_read(struct net_device *netdev, int phy_id, int loc)
         err = dm_read_shared_word(dev, 1, loc, &res);
         if (err < 0) {
                 netdev_err(dev->net, "MDIO read error: %d\n", err);
-               return err;
+               return 0;
         }
  
         netdev_dbg(dev->net,
diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c

index a6d653ff552a261ca50d331dd7d7aa875ca3c362..ba6c8ac2a736f501736419f9e8640e89695515cc 100644 (file)
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -1501,7 +1501,9 @@ static int lan78xx_link_reset(struct lan78xx_net *dev)
  
                 lan78xx_rx_urb_submit_all(dev);
  
+               local_bh_disable();
                 napi_schedule(&dev->napi);
+               local_bh_enable();
         }
  
         return 0;
@@ -3033,7 +3035,8 @@ static int lan78xx_reset(struct lan78xx_net *dev)
         if (dev->chipid == ID_REV_CHIP_ID_7801_)
                 buf &= ~MAC_CR_GMII_EN_;
  
-       if (dev->chipid == ID_REV_CHIP_ID_7800_) {
+       if (dev->chipid == ID_REV_CHIP_ID_7800_ ||
+           dev->chipid == ID_REV_CHIP_ID_7850_) {
                 ret = lan78xx_read_raw_eeprom(dev, 0, 1, &sig);
                 if (!ret && sig != EEPROM_INDICATOR) {
                         /* Implies there is no external eeprom. Set mac speed */
diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c

index a530f20ee257550141e5ec7c17b5fba0087db248..2fa46baa589e5e87e12e145fe46268bdaf9fc219 100644 (file)
--- a/drivers/net/usb/smsc95xx.c
+++ b/drivers/net/usb/smsc95xx.c
@@ -2104,6 +2104,11 @@ static const struct usb_device_id products[] = {
                 USB_DEVICE(0x0424, 0x9E08),
                 .driver_info = (unsigned long) &smsc95xx_info,
         },
+       {
+               /* SYSTEC USB-SPEmodule1 10BASE-T1L Ethernet Device */
+               USB_DEVICE(0x0878, 0x1400),
+               .driver_info = (unsigned long)&smsc95xx_info,
+       },
         {
                 /* Microchip's EVB-LAN8670-USB 10BASE-T1S Ethernet Device */
                 USB_DEVICE(0x184F, 0x0051),
diff --git a/drivers/net/veth.c b/drivers/net/veth.c

index 578e36ea1589c11f1ca26b6e05a84b455d22999e..cd4a6fe458f95d7bbc3c468ae8585d06cf0ac097 100644 (file)
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -1208,14 +1208,6 @@ static int veth_enable_xdp(struct net_device *dev)
                                 veth_disable_xdp_range(dev, 0, dev->real_num_rx_queues, true);
                                 return err;
                         }
-
-                       if (!veth_gro_requested(dev)) {
-                               /* user-space did not require GRO, but adding XDP
-                                * is supposed to get GRO working
-                                */
-                               dev->features |= NETIF_F_GRO;
-                               netdev_features_change(dev);
-                       }
                 }
         }
  
@@ -1235,18 +1227,9 @@ static void veth_disable_xdp(struct net_device *dev)
         for (i = 0; i < dev->real_num_rx_queues; i++)
                 rcu_assign_pointer(priv->rq[i].xdp_prog, NULL);
  
-       if (!netif_running(dev) || !veth_gro_requested(dev)) {
+       if (!netif_running(dev) || !veth_gro_requested(dev))
                 veth_napi_del(dev);
  
-               /* if user-space did not require GRO, since adding XDP
-                * enabled it, clear it now
-                */
-               if (!veth_gro_requested(dev) && netif_running(dev)) {
-                       dev->features &= ~NETIF_F_GRO;
-                       netdev_features_change(dev);
-               }
-       }
-
         veth_disable_xdp_range(dev, 0, dev->real_num_rx_queues, false);
  }
  
@@ -1478,7 +1461,8 @@ static int veth_alloc_queues(struct net_device *dev)
         struct veth_priv *priv = netdev_priv(dev);
         int i;
  
-       priv->rq = kcalloc(dev->num_rx_queues, sizeof(*priv->rq), GFP_KERNEL_ACCOUNT);
+       priv->rq = kvcalloc(dev->num_rx_queues, sizeof(*priv->rq),
+                           GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL);
         if (!priv->rq)
                 return -ENOMEM;
  
@@ -1494,7 +1478,7 @@ static void veth_free_queues(struct net_device *dev)
  {
         struct veth_priv *priv = netdev_priv(dev);
  
-       kfree(priv->rq);
+       kvfree(priv->rq);
  }
  
  static int veth_dev_init(struct net_device *dev)
@@ -1654,6 +1638,14 @@ static int veth_xdp_set(struct net_device *dev, struct bpf_prog *prog,
                 }
  
                 if (!old_prog) {
+                       if (!veth_gro_requested(dev)) {
+                               /* user-space did not require GRO, but adding
+                                * XDP is supposed to get GRO working
+                                */
+                               dev->features |= NETIF_F_GRO;
+                               netdev_features_change(dev);
+                       }
+
                         peer->hw_features &= ~NETIF_F_GSO_SOFTWARE;
                         peer->max_mtu = max_mtu;
                 }
@@ -1669,6 +1661,14 @@ static int veth_xdp_set(struct net_device *dev, struct bpf_prog *prog,
                         if (dev->flags & IFF_UP)
                                 veth_disable_xdp(dev);
  
+                       /* if user-space did not require GRO, since adding XDP
+                        * enabled it, clear it now
+                        */
+                       if (!veth_gro_requested(dev)) {
+                               dev->features &= ~NETIF_F_GRO;
+                               netdev_features_change(dev);
+                       }
+
                         if (peer) {
                                 peer->hw_features |= NETIF_F_GSO_SOFTWARE;
                                 peer->max_mtu = ETH_MAX_MTU;
diff --git a/drivers/net/wireless/ath/ar5523/ar5523.c b/drivers/net/wireless/ath/ar5523/ar5523.c

index 43e0db78d42beccfc2883050bb2665c191e675f8..a742cec44e3db823ae3fa85d6161e20d10dc64fb 100644 (file)
--- a/drivers/net/wireless/ath/ar5523/ar5523.c
+++ b/drivers/net/wireless/ath/ar5523/ar5523.c
@@ -1803,5 +1803,6 @@ static struct usb_driver ar5523_driver = {
  
  module_usb_driver(ar5523_driver);
  
+MODULE_DESCRIPTION("Atheros AR5523 wireless driver");
  MODULE_LICENSE("Dual BSD/GPL");
  MODULE_FIRMWARE(AR5523_FIRMWARE_FILE);
diff --git a/drivers/net/wireless/ath/wcn36xx/main.c b/drivers/net/wireless/ath/wcn36xx/main.c

index 41119fb177e306f30280d1a1d83ae5583976668d..4e6b4df8562f632e34089619f7a9b485b5e71595 100644 (file)
--- a/drivers/net/wireless/ath/wcn36xx/main.c
+++ b/drivers/net/wireless/ath/wcn36xx/main.c
@@ -1685,6 +1685,7 @@ static struct platform_driver wcn36xx_driver = {
  
  module_platform_driver(wcn36xx_driver);
  
+MODULE_DESCRIPTION("Qualcomm Atheros WCN3660/3680 wireless driver");
  MODULE_LICENSE("Dual BSD/GPL");
  MODULE_AUTHOR("Eugene Krasnikov k.eugene.e@gmail.com");
  MODULE_FIRMWARE(WLAN_NV_FILE);
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bca/module.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bca/module.c

index d55f3271d6190234220afd12ac8f6eb7a1d78f64..4f0c1e1a8e605daa4bcf907006bbd8e9a07490fa 100644 (file)
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bca/module.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bca/module.c
@@ -20,6 +20,7 @@ static void __exit brcmf_bca_exit(void)
         brcmf_fwvid_unregister_vendor(BRCMF_FWVENDOR_BCA, THIS_MODULE);
  }
  
+MODULE_DESCRIPTION("Broadcom FullMAC WLAN driver plugin for Broadcom AP chipsets");
  MODULE_LICENSE("Dual BSD/GPL");
  MODULE_IMPORT_NS(BRCMFMAC);
  
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c

index 133c5ea6429cd0e17baea181209c4d701e662d0c..28d6a30cc0106d6a38b51e35c1f518accfdbe987 100644 (file)
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
@@ -3779,8 +3779,10 @@ static int brcmf_internal_escan_add_info(struct cfg80211_scan_request *req,
                 if (req->channels[i] == chan)
                         break;
         }
-       if (i == req->n_channels)
-               req->channels[req->n_channels++] = chan;
+       if (i == req->n_channels) {
+               req->n_channels++;
+               req->channels[i] = chan;
+       }
  
         for (i = 0; i < req->n_ssids; i++) {
                 if (req->ssids[i].ssid_len == ssid_len &&
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cyw/module.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cyw/module.c

index f82fbbe3ecefb7af1019281b3f031f45b9ec30e6..90d06cda03a2f007e9f00c636a22a4a130670dff 100644 (file)
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cyw/module.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cyw/module.c
@@ -20,6 +20,7 @@ static void __exit brcmf_cyw_exit(void)
         brcmf_fwvid_unregister_vendor(BRCMF_FWVENDOR_CYW, THIS_MODULE);
  }
  
+MODULE_DESCRIPTION("Broadcom FullMAC WLAN driver plugin for Cypress/Infineon chipsets");
  MODULE_LICENSE("Dual BSD/GPL");
  MODULE_IMPORT_NS(BRCMFMAC);
  
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/wcc/module.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/wcc/module.c

index 02918d434556b04d797a4141f3dcaede15a7b494..b66135e3cff476a95c5482e099975fd01849bedf 100644 (file)
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/wcc/module.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/wcc/module.c
@@ -20,6 +20,7 @@ static void __exit brcmf_wcc_exit(void)
         brcmf_fwvid_unregister_vendor(BRCMF_FWVENDOR_WCC, THIS_MODULE);
  }
  
+MODULE_DESCRIPTION("Broadcom FullMAC WLAN driver plugin for Broadcom mobility chipsets");
  MODULE_LICENSE("Dual BSD/GPL");
  MODULE_IMPORT_NS(BRCMFMAC);
  
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c

index b96f30d11644e24eb3886e5f2536d6eaad4d01db..dcc4810cb32472dbe8ee374091a2d58241af80c8 100644 (file)
--- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c
@@ -618,7 +618,7 @@ int iwl_sar_get_wrds_table(struct iwl_fw_runtime *fwrt)
                                          &tbl_rev);
         if (!IS_ERR(wifi_pkg)) {
                 if (tbl_rev != 2) {
-                       ret = PTR_ERR(wifi_pkg);
+                       ret = -EINVAL;
                         goto out_free;
                 }
  
@@ -634,7 +634,7 @@ int iwl_sar_get_wrds_table(struct iwl_fw_runtime *fwrt)
                                          &tbl_rev);
         if (!IS_ERR(wifi_pkg)) {
                 if (tbl_rev != 1) {
-                       ret = PTR_ERR(wifi_pkg);
+                       ret = -EINVAL;
                         goto out_free;
                 }
  
@@ -650,7 +650,7 @@ int iwl_sar_get_wrds_table(struct iwl_fw_runtime *fwrt)
                                          &tbl_rev);
         if (!IS_ERR(wifi_pkg)) {
                 if (tbl_rev != 0) {
-                       ret = PTR_ERR(wifi_pkg);
+                       ret = -EINVAL;
                         goto out_free;
                 }
  
@@ -707,7 +707,7 @@ int iwl_sar_get_ewrd_table(struct iwl_fw_runtime *fwrt)
                                          &tbl_rev);
         if (!IS_ERR(wifi_pkg)) {
                 if (tbl_rev != 2) {
-                       ret = PTR_ERR(wifi_pkg);
+                       ret = -EINVAL;
                         goto out_free;
                 }
  
@@ -723,7 +723,7 @@ int iwl_sar_get_ewrd_table(struct iwl_fw_runtime *fwrt)
                                          &tbl_rev);
         if (!IS_ERR(wifi_pkg)) {
                 if (tbl_rev != 1) {
-                       ret = PTR_ERR(wifi_pkg);
+                       ret = -EINVAL;
                         goto out_free;
                 }
  
@@ -739,7 +739,7 @@ int iwl_sar_get_ewrd_table(struct iwl_fw_runtime *fwrt)
                                          &tbl_rev);
         if (!IS_ERR(wifi_pkg)) {
                 if (tbl_rev != 0) {
-                       ret = PTR_ERR(wifi_pkg);
+                       ret = -EINVAL;
                         goto out_free;
                 }
  
@@ -1116,6 +1116,9 @@ int iwl_acpi_get_ppag_table(struct iwl_fw_runtime *fwrt)
                 goto read_table;
         }
  
+       ret = PTR_ERR(wifi_pkg);
+       goto out_free;
+
  read_table:
         fwrt->ppag_ver = tbl_rev;
         flags = &wifi_pkg->package.elements[1];
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/debug.h b/drivers/net/wireless/intel/iwlwifi/fw/api/debug.h

index 798731ecbefde7f625d0cf00ef688f10281727be..b740c65a7dca25807ac648873a0df14796984d1b 100644 (file)
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/debug.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/debug.h
@@ -537,7 +537,7 @@ enum iwl_fw_dbg_config_cmd_type {
  }; /* LDBG_CFG_CMD_TYPE_API_E_VER_1 */
  
  /* this token disables debug asserts in the firmware */
-#define IWL_FW_DBG_CONFIG_TOKEN 0x00011301
+#define IWL_FW_DBG_CONFIG_TOKEN 0x00010001
  
  /**
   * struct iwl_fw_dbg_config_cmd - configure FW debug
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/txq.h b/drivers/net/wireless/intel/iwlwifi/fw/api/txq.h

index 9c69d3674384609b8a7c376900e07a04441c24b0..e6c0f928a6bbf338ca240214635313c05c4e8751 100644 (file)
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/txq.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/txq.h
@@ -1,6 +1,6 @@
  /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
  /*
- * Copyright (C) 2005-2014, 2019-2021, 2023 Intel Corporation
+ * Copyright (C) 2005-2014, 2019-2021, 2023-2024 Intel Corporation
   * Copyright (C) 2013-2015 Intel Mobile Communications GmbH
   * Copyright (C) 2016-2017 Intel Deutschland GmbH
   */
@@ -66,6 +66,16 @@ enum iwl_gen2_tx_fifo {
         IWL_GEN2_TRIG_TX_FIFO_VO,
  };
  
+enum iwl_bz_tx_fifo {
+       IWL_BZ_EDCA_TX_FIFO_BK,
+       IWL_BZ_EDCA_TX_FIFO_BE,
+       IWL_BZ_EDCA_TX_FIFO_VI,
+       IWL_BZ_EDCA_TX_FIFO_VO,
+       IWL_BZ_TRIG_TX_FIFO_BK,
+       IWL_BZ_TRIG_TX_FIFO_BE,
+       IWL_BZ_TRIG_TX_FIFO_VI,
+       IWL_BZ_TRIG_TX_FIFO_VO,
+};
  /**
   * enum iwl_tx_queue_cfg_actions - TXQ config options
   * @TX_QUEUE_CFG_ENABLE_QUEUE: enable a queue
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c

index e27774e7ed74d82bbbb9821f24a2bc3a1578395b..80fda056e46a698458ee4ecf1230f7ef315e3a2a 100644 (file)
--- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
@@ -1,6 +1,6 @@
  // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
  /*
- * Copyright (C) 2005-2014, 2018-2023 Intel Corporation
+ * Copyright (C) 2005-2014, 2018-2024 Intel Corporation
   * Copyright (C) 2013-2015 Intel Mobile Communications GmbH
   * Copyright (C) 2015-2017 Intel Deutschland GmbH
   */
@@ -19,7 +19,6 @@
   * @fwrt_ptr: pointer to the buffer coming from fwrt
   * @trans_ptr: pointer to struct %iwl_trans_dump_data which contains the
   *     transport's data.
- * @trans_len: length of the valid data in trans_ptr
   * @fwrt_len: length of the valid data in fwrt_ptr
   */
  struct iwl_fw_dump_ptrs {
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c

index ffe2670720c9257c30cd86aa3f5edf7386bce602..abf8001bdac179b7e4b40897182f8b54532d6c97 100644 (file)
--- a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c
@@ -128,6 +128,7 @@ static void iwl_dealloc_ucode(struct iwl_drv *drv)
         kfree(drv->fw.ucode_capa.cmd_versions);
         kfree(drv->fw.phy_integration_ver);
         kfree(drv->trans->dbg.pc_data);
+       drv->trans->dbg.pc_data = NULL;
  
         for (i = 0; i < IWL_UCODE_TYPE_MAX; i++)
                 iwl_free_fw_img(drv, drv->fw.img + i);
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c

index 402896988686990fdd7ea9410f8e01819a7a1bc4..2f6774ec37b2286f1fb72e120a18435e804fcb58 100644 (file)
--- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c
@@ -668,7 +668,6 @@ static const struct ieee80211_sband_iftype_data iwl_he_eht_capa[] = {
                         .has_eht = true,
                         .eht_cap_elem = {
                                 .mac_cap_info[0] =
-                                       IEEE80211_EHT_MAC_CAP0_EPCS_PRIO_ACCESS |
                                         IEEE80211_EHT_MAC_CAP0_OM_CONTROL |
                                         IEEE80211_EHT_MAC_CAP0_TRIG_TXOP_SHARING_MODE1 |
                                         IEEE80211_EHT_MAC_CAP0_TRIG_TXOP_SHARING_MODE2 |
@@ -793,7 +792,6 @@ static const struct ieee80211_sband_iftype_data iwl_he_eht_capa[] = {
                         .has_eht = true,
                         .eht_cap_elem = {
                                 .mac_cap_info[0] =
-                                       IEEE80211_EHT_MAC_CAP0_EPCS_PRIO_ACCESS |
                                         IEEE80211_EHT_MAC_CAP0_OM_CONTROL |
                                         IEEE80211_EHT_MAC_CAP0_TRIG_TXOP_SHARING_MODE1 |
                                         IEEE80211_EHT_MAC_CAP0_TRIG_TXOP_SHARING_MODE2,
@@ -1020,8 +1018,7 @@ iwl_nvm_fixup_sband_iftd(struct iwl_trans *trans,
         if (CSR_HW_REV_TYPE(trans->hw_rev) == IWL_CFG_MAC_TYPE_GL &&
             iftype_data->eht_cap.has_eht) {
                 iftype_data->eht_cap.eht_cap_elem.mac_cap_info[0] &=
-                       ~(IEEE80211_EHT_MAC_CAP0_EPCS_PRIO_ACCESS |
-                         IEEE80211_EHT_MAC_CAP0_TRIG_TXOP_SHARING_MODE1 |
+                       ~(IEEE80211_EHT_MAC_CAP0_TRIG_TXOP_SHARING_MODE1 |
                           IEEE80211_EHT_MAC_CAP0_TRIG_TXOP_SHARING_MODE2);
                 iftype_data->eht_cap.eht_cap_elem.phy_cap_info[3] &=
                         ~(IEEE80211_EHT_PHY_CAP0_PARTIAL_BW_UL_MU_MIMO |
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c

index 4582afb149d720d077f30c0e7bb1814e5106d453..05b64176859e809986082c002f91eef247e83add 100644 (file)
--- a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
@@ -1279,7 +1279,9 @@ static int __iwl_mvm_suspend(struct ieee80211_hw *hw,
  
                 mvm->net_detect = true;
         } else {
-               struct iwl_wowlan_config_cmd wowlan_config_cmd = {};
+               struct iwl_wowlan_config_cmd wowlan_config_cmd = {
+                       .offloading_tid = 0,
+               };
  
                 wowlan_config_cmd.sta_id = mvmvif->deflink.ap_sta_id;
  
@@ -1291,6 +1293,11 @@ static int __iwl_mvm_suspend(struct ieee80211_hw *hw,
                         goto out_noreset;
                 }
  
+               ret = iwl_mvm_sta_ensure_queue(
+                       mvm, ap_sta->txq[wowlan_config_cmd.offloading_tid]);
+               if (ret)
+                       goto out_noreset;
+
                 ret = iwl_mvm_get_wowlan_config(mvm, wowlan, &wowlan_config_cmd,
                                                 vif, mvmvif, ap_sta);
                 if (ret)
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c

index c4f96125cf33af0eb066c3950e6dba18d505c4f4..25a5a31e63c2a33a0fc0bbe7317f65df62b9e8de 100644 (file)
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c
@@ -31,6 +31,17 @@ const u8 iwl_mvm_ac_to_gen2_tx_fifo[] = {
         IWL_GEN2_TRIG_TX_FIFO_BK,
  };
  
+const u8 iwl_mvm_ac_to_bz_tx_fifo[] = {
+       IWL_BZ_EDCA_TX_FIFO_VO,
+       IWL_BZ_EDCA_TX_FIFO_VI,
+       IWL_BZ_EDCA_TX_FIFO_BE,
+       IWL_BZ_EDCA_TX_FIFO_BK,
+       IWL_BZ_TRIG_TX_FIFO_VO,
+       IWL_BZ_TRIG_TX_FIFO_VI,
+       IWL_BZ_TRIG_TX_FIFO_BE,
+       IWL_BZ_TRIG_TX_FIFO_BK,
+};
+
  struct iwl_mvm_mac_iface_iterator_data {
         struct iwl_mvm *mvm;
         struct ieee80211_vif *vif;
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c

index 7f13dff04b265caf265f24662d7609f60289120d..53e26c3c3a9af616ac057428503edf6270d53b3d 100644 (file)
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
@@ -1600,7 +1600,8 @@ static int iwl_mvm_mac_add_interface(struct ieee80211_hw *hw,
          */
         if (vif->type == NL80211_IFTYPE_AP ||
             vif->type == NL80211_IFTYPE_ADHOC) {
-               iwl_mvm_vif_dbgfs_add_link(mvm, vif);
+               if (!test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status))
+                       iwl_mvm_vif_dbgfs_add_link(mvm, vif);
                 ret = 0;
                 goto out;
         }
@@ -1640,7 +1641,8 @@ static int iwl_mvm_mac_add_interface(struct ieee80211_hw *hw,
                         iwl_mvm_chandef_get_primary_80(&vif->bss_conf.chandef);
         }
  
-       iwl_mvm_vif_dbgfs_add_link(mvm, vif);
+       if (!test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status))
+               iwl_mvm_vif_dbgfs_add_link(mvm, vif);
  
         if (!test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status) &&
             vif->type == NL80211_IFTYPE_STATION && !vif->p2p &&
@@ -3685,6 +3687,9 @@ iwl_mvm_sta_state_notexist_to_none(struct iwl_mvm *mvm,
                                            NL80211_TDLS_SETUP);
         }
  
+       if (ret)
+               return ret;
+
         for_each_sta_active_link(vif, sta, link_sta, i)
                 link_sta->agg.max_rc_amsdu_len = 1;
  
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c

index 61170173f917a00707fc63956b8d7f252737c809..893b69fc841b896b234078240631d72c792a4c7e 100644 (file)
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c
@@ -81,7 +81,8 @@ static int iwl_mvm_mld_mac_add_interface(struct ieee80211_hw *hw,
                 ieee80211_hw_set(mvm->hw, RX_INCLUDES_FCS);
         }
  
-       iwl_mvm_vif_dbgfs_add_link(mvm, vif);
+       if (!test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status))
+               iwl_mvm_vif_dbgfs_add_link(mvm, vif);
  
         if (!test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status) &&
             vif->type == NL80211_IFTYPE_STATION && !vif->p2p &&
@@ -437,6 +438,9 @@ __iwl_mvm_mld_unassign_vif_chanctx(struct iwl_mvm *mvm,
                 mvmvif->ap_ibss_active = false;
         }
  
+       iwl_mvm_link_changed(mvm, vif, link_conf,
+                            LINK_CONTEXT_MODIFY_ACTIVE, false);
+
         if (iwl_mvm_is_esr_supported(mvm->fwrt.trans) && n_active > 1) {
                 int ret = iwl_mvm_esr_mode_inactive(mvm, vif);
  
@@ -448,9 +452,6 @@ __iwl_mvm_mld_unassign_vif_chanctx(struct iwl_mvm *mvm,
         if (vif->type == NL80211_IFTYPE_MONITOR)
                 iwl_mvm_mld_rm_snif_sta(mvm, vif);
  
-       iwl_mvm_link_changed(mvm, vif, link_conf,
-                            LINK_CONTEXT_MODIFY_ACTIVE, false);
-
         if (switching_chanctx)
                 return;
         mvmvif->link[link_id]->phy_ctxt = NULL;
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h

index 40627961b834a2ee860445b4557cf19498f4e166..81dbef6947f5578dd50e12f157124fa48fcdb728 100644 (file)
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
@@ -1581,12 +1581,16 @@ static inline int iwl_mvm_max_active_links(struct iwl_mvm *mvm,
  
  extern const u8 iwl_mvm_ac_to_tx_fifo[];
  extern const u8 iwl_mvm_ac_to_gen2_tx_fifo[];
+extern const u8 iwl_mvm_ac_to_bz_tx_fifo[];
  
  static inline u8 iwl_mvm_mac_ac_to_tx_fifo(struct iwl_mvm *mvm,
                                            enum ieee80211_ac_numbers ac)
  {
-       return iwl_mvm_has_new_tx_api(mvm) ?
-               iwl_mvm_ac_to_gen2_tx_fifo[ac] : iwl_mvm_ac_to_tx_fifo[ac];
+       if (mvm->trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_BZ)
+               return iwl_mvm_ac_to_bz_tx_fifo[ac];
+       if (iwl_mvm_has_new_tx_api(mvm))
+               return iwl_mvm_ac_to_gen2_tx_fifo[ac];
+       return iwl_mvm_ac_to_tx_fifo[ac];
  }
  
  struct iwl_rate_info {
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c

index 886d0009852872a5fc91a5f2f11476b3fdda78d8..af15d470c69bd60ea3737753b70832b9ffcf7d7f 100644 (file)
--- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
@@ -505,6 +505,10 @@ static bool iwl_mvm_is_dup(struct ieee80211_sta *sta, int queue,
                 return false;
  
         mvm_sta = iwl_mvm_sta_from_mac80211(sta);
+
+       if (WARN_ON_ONCE(!mvm_sta->dup_data))
+               return false;
+
         dup_data = &mvm_sta->dup_data[queue];
  
         /*
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c

index 2a3ca97859749749fff954e097a9d62ae86f5d24..c2e0cff740e9281ee7f73a2a9db4d0add160fee1 100644 (file)
--- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
@@ -1502,6 +1502,34 @@ out_err:
         return ret;
  }
  
+int iwl_mvm_sta_ensure_queue(struct iwl_mvm *mvm,
+                            struct ieee80211_txq *txq)
+{
+       struct iwl_mvm_txq *mvmtxq = iwl_mvm_txq_from_mac80211(txq);
+       int ret = -EINVAL;
+
+       lockdep_assert_held(&mvm->mutex);
+
+       if (likely(test_bit(IWL_MVM_TXQ_STATE_READY, &mvmtxq->state)) ||
+           !txq->sta) {
+               return 0;
+       }
+
+       if (!iwl_mvm_sta_alloc_queue(mvm, txq->sta, txq->ac, txq->tid)) {
+               set_bit(IWL_MVM_TXQ_STATE_READY, &mvmtxq->state);
+               ret = 0;
+       }
+
+       local_bh_disable();
+       spin_lock(&mvm->add_stream_lock);
+       if (!list_empty(&mvmtxq->list))
+               list_del_init(&mvmtxq->list);
+       spin_unlock(&mvm->add_stream_lock);
+       local_bh_enable();
+
+       return ret;
+}
+
  void iwl_mvm_add_new_dqa_stream_wk(struct work_struct *wk)
  {
         struct iwl_mvm *mvm = container_of(wk, struct iwl_mvm,
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h

index b33a0ce096d46c2f92eb127d8942062b42f39345..3cf8a70274ce888833014b4492348c233be0a4c0 100644 (file)
--- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h
@@ -1,6 +1,6 @@
  /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
  /*
- * Copyright (C) 2012-2014, 2018-2023 Intel Corporation
+ * Copyright (C) 2012-2014, 2018-2024 Intel Corporation
   * Copyright (C) 2013-2014 Intel Mobile Communications GmbH
   * Copyright (C) 2015-2016 Intel Deutschland GmbH
   */
@@ -571,6 +571,7 @@ void iwl_mvm_modify_all_sta_disable_tx(struct iwl_mvm *mvm,
                                        bool disable);
  
  void iwl_mvm_csa_client_absent(struct iwl_mvm *mvm, struct ieee80211_vif *vif);
+int iwl_mvm_sta_ensure_queue(struct iwl_mvm *mvm, struct ieee80211_txq *txq);
  void iwl_mvm_add_new_dqa_stream_wk(struct work_struct *wk);
  int iwl_mvm_add_pasn_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
                          struct iwl_mvm_int_sta *sta, u8 *addr, u32 cipher,
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c

index 218fdf1ed5304f333008c8015ae796ff59583db0..2e653a417d6269b333ec12e7d15f43f3c66be46b 100644 (file)
--- a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c
@@ -1,6 +1,6 @@
  // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
  /*
- * Copyright (C) 2012-2014, 2018-2023 Intel Corporation
+ * Copyright (C) 2012-2014, 2018-2024 Intel Corporation
   * Copyright (C) 2013-2015 Intel Mobile Communications GmbH
   * Copyright (C) 2017 Intel Deutschland GmbH
   */
@@ -972,6 +972,7 @@ void iwl_mvm_rx_session_protect_notif(struct iwl_mvm *mvm,
         if (!le32_to_cpu(notif->status) || !le32_to_cpu(notif->start)) {
                 /* End TE, notify mac80211 */
                 mvmvif->time_event_data.id = SESSION_PROTECT_CONF_MAX_ID;
+               mvmvif->time_event_data.link_id = -1;
                 iwl_mvm_p2p_roc_finished(mvm);
                 ieee80211_remain_on_channel_expired(mvm->hw);
         } else if (le32_to_cpu(notif->start)) {
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c

index db986bfc4dc3fe4374e22b071ecbf178fbb796dd..461f26d9214e4ab81e32033c00629b92f7e31700 100644 (file)
--- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
@@ -520,13 +520,24 @@ static void iwl_mvm_set_tx_cmd_crypto(struct iwl_mvm *mvm,
         }
  }
  
+static void iwl_mvm_copy_hdr(void *cmd, const void *hdr, int hdrlen,
+                            const u8 *addr3_override)
+{
+       struct ieee80211_hdr *out_hdr = cmd;
+
+       memcpy(cmd, hdr, hdrlen);
+       if (addr3_override)
+               memcpy(out_hdr->addr3, addr3_override, ETH_ALEN);
+}
+
  /*
   * Allocates and sets the Tx cmd the driver data pointers in the skb
   */
  static struct iwl_device_tx_cmd *
  iwl_mvm_set_tx_params(struct iwl_mvm *mvm, struct sk_buff *skb,
                       struct ieee80211_tx_info *info, int hdrlen,
-                     struct ieee80211_sta *sta, u8 sta_id)
+                     struct ieee80211_sta *sta, u8 sta_id,
+                     const u8 *addr3_override)
  {
         struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
         struct iwl_device_tx_cmd *dev_cmd;
@@ -584,7 +595,7 @@ iwl_mvm_set_tx_params(struct iwl_mvm *mvm, struct sk_buff *skb,
                         cmd->len = cpu_to_le16((u16)skb->len);
  
                         /* Copy MAC header from skb into command buffer */
-                       memcpy(cmd->hdr, hdr, hdrlen);
+                       iwl_mvm_copy_hdr(cmd->hdr, hdr, hdrlen, addr3_override);
  
                         cmd->flags = cpu_to_le16(flags);
                         cmd->rate_n_flags = cpu_to_le32(rate_n_flags);
@@ -599,7 +610,7 @@ iwl_mvm_set_tx_params(struct iwl_mvm *mvm, struct sk_buff *skb,
                         cmd->len = cpu_to_le16((u16)skb->len);
  
                         /* Copy MAC header from skb into command buffer */
-                       memcpy(cmd->hdr, hdr, hdrlen);
+                       iwl_mvm_copy_hdr(cmd->hdr, hdr, hdrlen, addr3_override);
  
                         cmd->flags = cpu_to_le32(flags);
                         cmd->rate_n_flags = cpu_to_le32(rate_n_flags);
@@ -617,7 +628,7 @@ iwl_mvm_set_tx_params(struct iwl_mvm *mvm, struct sk_buff *skb,
         iwl_mvm_set_tx_cmd_rate(mvm, tx_cmd, info, sta, hdr->frame_control);
  
         /* Copy MAC header from skb into command buffer */
-       memcpy(tx_cmd->hdr, hdr, hdrlen);
+       iwl_mvm_copy_hdr(tx_cmd->hdr, hdr, hdrlen, addr3_override);
  
  out:
         return dev_cmd;
@@ -820,7 +831,8 @@ int iwl_mvm_tx_skb_non_sta(struct iwl_mvm *mvm, struct sk_buff *skb)
  
         IWL_DEBUG_TX(mvm, "station Id %d, queue=%d\n", sta_id, queue);
  
-       dev_cmd = iwl_mvm_set_tx_params(mvm, skb, &info, hdrlen, NULL, sta_id);
+       dev_cmd = iwl_mvm_set_tx_params(mvm, skb, &info, hdrlen, NULL, sta_id,
+                                       NULL);
         if (!dev_cmd)
                 return -1;
  
@@ -1140,7 +1152,8 @@ static int iwl_mvm_tx_pkt_queued(struct iwl_mvm *mvm,
   */
  static int iwl_mvm_tx_mpdu(struct iwl_mvm *mvm, struct sk_buff *skb,
                            struct ieee80211_tx_info *info,
-                          struct ieee80211_sta *sta)
+                          struct ieee80211_sta *sta,
+                          const u8 *addr3_override)
  {
         struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
         struct iwl_mvm_sta *mvmsta;
@@ -1172,7 +1185,8 @@ static int iwl_mvm_tx_mpdu(struct iwl_mvm *mvm, struct sk_buff *skb,
                 iwl_mvm_probe_resp_set_noa(mvm, skb);
  
         dev_cmd = iwl_mvm_set_tx_params(mvm, skb, info, hdrlen,
-                                       sta, mvmsta->deflink.sta_id);
+                                       sta, mvmsta->deflink.sta_id,
+                                       addr3_override);
         if (!dev_cmd)
                 goto drop;
  
@@ -1294,9 +1308,11 @@ int iwl_mvm_tx_skb_sta(struct iwl_mvm *mvm, struct sk_buff *skb,
         struct iwl_mvm_sta *mvmsta = iwl_mvm_sta_from_mac80211(sta);
         struct ieee80211_tx_info info;
         struct sk_buff_head mpdus_skbs;
+       struct ieee80211_vif *vif;
         unsigned int payload_len;
         int ret;
         struct sk_buff *orig_skb = skb;
+       const u8 *addr3;
  
         if (WARN_ON_ONCE(!mvmsta))
                 return -1;
@@ -1307,26 +1323,59 @@ int iwl_mvm_tx_skb_sta(struct iwl_mvm *mvm, struct sk_buff *skb,
         memcpy(&info, skb->cb, sizeof(info));
  
         if (!skb_is_gso(skb))
-               return iwl_mvm_tx_mpdu(mvm, skb, &info, sta);
+               return iwl_mvm_tx_mpdu(mvm, skb, &info, sta, NULL);
  
         payload_len = skb_tail_pointer(skb) - skb_transport_header(skb) -
                 tcp_hdrlen(skb) + skb->data_len;
  
         if (payload_len <= skb_shinfo(skb)->gso_size)
-               return iwl_mvm_tx_mpdu(mvm, skb, &info, sta);
+               return iwl_mvm_tx_mpdu(mvm, skb, &info, sta, NULL);
  
         __skb_queue_head_init(&mpdus_skbs);
  
+       vif = info.control.vif;
+       if (!vif)
+               return -1;
+
         ret = iwl_mvm_tx_tso(mvm, skb, &info, sta, &mpdus_skbs);
         if (ret)
                 return ret;
  
         WARN_ON(skb_queue_empty(&mpdus_skbs));
  
+       /*
+        * As described in IEEE sta 802.11-2020, table 9-30 (Address
+        * field contents), A-MSDU address 3 should contain the BSSID
+        * address.
+        * Pass address 3 down to iwl_mvm_tx_mpdu() and further to set it
+        * in the command header. We need to preserve the original
+        * address 3 in the skb header to correctly create all the
+        * A-MSDU subframe headers from it.
+        */
+       switch (vif->type) {
+       case NL80211_IFTYPE_STATION:
+               addr3 = vif->cfg.ap_addr;
+               break;
+       case NL80211_IFTYPE_AP:
+               addr3 = vif->addr;
+               break;
+       default:
+               addr3 = NULL;
+               break;
+       }
+
         while (!skb_queue_empty(&mpdus_skbs)) {
+               struct ieee80211_hdr *hdr;
+               bool amsdu;
+
                 skb = __skb_dequeue(&mpdus_skbs);
+               hdr = (void *)skb->data;
+               amsdu = ieee80211_is_data_qos(hdr->frame_control) &&
+                       (*ieee80211_get_qos_ctl(hdr) &
+                        IEEE80211_QOS_CTL_A_MSDU_PRESENT);
  
-               ret = iwl_mvm_tx_mpdu(mvm, skb, &info, sta);
+               ret = iwl_mvm_tx_mpdu(mvm, skb, &info, sta,
+                                     amsdu ? addr3 : NULL);
                 if (ret) {
                         /* Free skbs created as part of TSO logic that have not yet been dequeued */
                         __skb_queue_purge(&mpdus_skbs);
diff --git a/drivers/net/wireless/intersil/p54/p54spi.c b/drivers/net/wireless/intersil/p54/p54spi.c

index ce0179b8ab368fa7138a394afbc32678b05d20fc..0073b5e0f9c90ba473e71f1902724d8979bdc792 100644 (file)
--- a/drivers/net/wireless/intersil/p54/p54spi.c
+++ b/drivers/net/wireless/intersil/p54/p54spi.c
@@ -700,6 +700,7 @@ static struct spi_driver p54spi_driver = {
  
  module_spi_driver(p54spi_driver);
  
+MODULE_DESCRIPTION("Prism54 SPI wireless driver");
  MODULE_LICENSE("GPL");
  MODULE_AUTHOR("Christian Lamparter <chunkeey@web.de>");
  MODULE_ALIAS("spi:cx3110x");
diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/main.c b/drivers/net/wireless/mediatek/mt76/mt7603/main.c

index 89d738deea62e9ed4d4f9044e1a193548833deff..e2146d30e55363ecdfec2ed26bb7753384f04317 100644 (file)
--- a/drivers/net/wireless/mediatek/mt76/mt7603/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7603/main.c
@@ -728,6 +728,7 @@ const struct ieee80211_ops mt7603_ops = {
         .set_sar_specs = mt7603_set_sar_specs,
  };
  
+MODULE_DESCRIPTION("MediaTek MT7603E and MT76x8 wireless driver");
  MODULE_LICENSE("Dual BSD/GPL");
  
  static int __init mt7603_init(void)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/main.c b/drivers/net/wireless/mediatek/mt76/mt7615/main.c

index dab16b5fc3861198f9eccd3ae48c776bb03a66a1..0971c164b57e926d2d22dd1ef4f0559d45420db2 100644 (file)
--- a/drivers/net/wireless/mediatek/mt76/mt7615/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
@@ -1375,4 +1375,5 @@ const struct ieee80211_ops mt7615_ops = {
  };
  EXPORT_SYMBOL_GPL(mt7615_ops);
  
+MODULE_DESCRIPTION("MediaTek MT7615E and MT7663E wireless driver");
  MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mmio.c b/drivers/net/wireless/mediatek/mt76/mt7615/mmio.c

index ac036a072439d5d0a2c7eb8aad38048b4098b25f..87a956ea3ad74f6fb62a873eba0f499e04a99c32 100644 (file)
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mmio.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mmio.c
@@ -270,4 +270,5 @@ static void __exit mt7615_exit(void)
  
  module_init(mt7615_init);
  module_exit(mt7615_exit);
+MODULE_DESCRIPTION("MediaTek MT7615E MMIO helpers");
  MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/sdio.c b/drivers/net/wireless/mediatek/mt76/mt7615/sdio.c

index 67cedd2555f973fc53cd0e37312771c3e0fa1116..9692890ba51b7b61c43991cb20a59d8394188c1e 100644 (file)
--- a/drivers/net/wireless/mediatek/mt76/mt7615/sdio.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/sdio.c
@@ -253,4 +253,5 @@ module_sdio_driver(mt7663s_driver);
  
  MODULE_AUTHOR("Sean Wang <sean.wang@mediatek.com>");
  MODULE_AUTHOR("Lorenzo Bianconi <lorenzo@kernel.org>");
+MODULE_DESCRIPTION("MediaTek MT7663S (SDIO) wireless driver");
  MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/usb.c b/drivers/net/wireless/mediatek/mt76/mt7615/usb.c

index 04963b9f749838c41717884ecb818e9d0894667b..df737e1ff27b79a21c1b92ca899272eb961800f0 100644 (file)
--- a/drivers/net/wireless/mediatek/mt76/mt7615/usb.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/usb.c
@@ -281,4 +281,5 @@ module_usb_driver(mt7663u_driver);
  
  MODULE_AUTHOR("Sean Wang <sean.wang@mediatek.com>");
  MODULE_AUTHOR("Lorenzo Bianconi <lorenzo@kernel.org>");
+MODULE_DESCRIPTION("MediaTek MT7663U (USB) wireless driver");
  MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/usb_sdio.c b/drivers/net/wireless/mediatek/mt76/mt7615/usb_sdio.c

index 0052d103e276a895e3eddb81edb4397e0149281b..820b395900275a700da43e7139c3a22effa17140 100644 (file)
--- a/drivers/net/wireless/mediatek/mt76/mt7615/usb_sdio.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/usb_sdio.c
@@ -349,4 +349,5 @@ EXPORT_SYMBOL_GPL(mt7663_usb_sdio_register_device);
  
  MODULE_AUTHOR("Lorenzo Bianconi <lorenzo@kernel.org>");
  MODULE_AUTHOR("Sean Wang <sean.wang@mediatek.com>");
+MODULE_DESCRIPTION("MediaTek MT7663 SDIO/USB helpers");
  MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c

index 96494ba2fdf767ba89d25b505ca7ee86b0797c19..3a20ba0d2492840304f9f5b108a8f82643ce7396 100644 (file)
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
@@ -3160,4 +3160,5 @@ exit:
  EXPORT_SYMBOL_GPL(mt76_connac2_mcu_fill_message);
  
  MODULE_AUTHOR("Lorenzo Bianconi <lorenzo@kernel.org>");
+MODULE_DESCRIPTION("MediaTek MT76x connac layer helpers");
  MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x0/eeprom.c b/drivers/net/wireless/mediatek/mt76/mt76x0/eeprom.c

index c3a392a1a659e8a0581809a3a5e03d9847085496..bcd24c9072ec9e52f68c7dac0f124279d525eba0 100644 (file)
--- a/drivers/net/wireless/mediatek/mt76/mt76x0/eeprom.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x0/eeprom.c
@@ -342,4 +342,5 @@ int mt76x0_eeprom_init(struct mt76x02_dev *dev)
         return 0;
  }
  
+MODULE_DESCRIPTION("MediaTek MT76x EEPROM helpers");
  MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c b/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c

index 9277ff38b7a228fd778e8c9909eed0182eb14306..293e66fa83d5d0669d4f26acaff150a41f35debc 100644 (file)
--- a/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c
@@ -302,6 +302,7 @@ static const struct pci_device_id mt76x0e_device_table[] = {
  MODULE_DEVICE_TABLE(pci, mt76x0e_device_table);
  MODULE_FIRMWARE(MT7610E_FIRMWARE);
  MODULE_FIRMWARE(MT7650E_FIRMWARE);
+MODULE_DESCRIPTION("MediaTek MT76x0E (PCIe) wireless driver");
  MODULE_LICENSE("Dual BSD/GPL");
  
  static struct pci_driver mt76x0e_driver = {
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x0/usb.c b/drivers/net/wireless/mediatek/mt76/mt76x0/usb.c

index 0422c332354a131dab040c72a8961ec6f1b79515..dd042949cf82bc6c87f4aaee8b7c5d912faf2162 100644 (file)
--- a/drivers/net/wireless/mediatek/mt76/mt76x0/usb.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x0/usb.c
@@ -336,6 +336,7 @@ err:
  MODULE_DEVICE_TABLE(usb, mt76x0_device_table);
  MODULE_FIRMWARE(MT7610E_FIRMWARE);
  MODULE_FIRMWARE(MT7610U_FIRMWARE);
+MODULE_DESCRIPTION("MediaTek MT76x0U (USB) wireless driver");
  MODULE_LICENSE("GPL");
  
  static struct usb_driver mt76x0_driver = {
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_usb_mcu.c b/drivers/net/wireless/mediatek/mt76/mt76x02_usb_mcu.c

index 02da543dfc5cf381f6edac1753c0d627504e3e48..b2cc449142945f585a7d50ca0d68da21e35531e7 100644 (file)
--- a/drivers/net/wireless/mediatek/mt76/mt76x02_usb_mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x02_usb_mcu.c
@@ -293,4 +293,5 @@ void mt76x02u_init_mcu(struct mt76_dev *dev)
  EXPORT_SYMBOL_GPL(mt76x02u_init_mcu);
  
  MODULE_AUTHOR("Lorenzo Bianconi <lorenzo.bianconi83@gmail.com>");
+MODULE_DESCRIPTION("MediaTek MT76x02 MCU helpers");
  MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_util.c b/drivers/net/wireless/mediatek/mt76/mt76x02_util.c

index 8a0e8124b894003ed80aad02ff6c59be9f3e457f..8020446be37bd99c15b9410b697060a304ef18af 100644 (file)
--- a/drivers/net/wireless/mediatek/mt76/mt76x02_util.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x02_util.c
@@ -696,4 +696,5 @@ void mt76x02_config_mac_addr_list(struct mt76x02_dev *dev)
  }
  EXPORT_SYMBOL_GPL(mt76x02_config_mac_addr_list);
  
+MODULE_DESCRIPTION("MediaTek MT76x02 helpers");
  MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/eeprom.c b/drivers/net/wireless/mediatek/mt76/mt76x2/eeprom.c

index 8c01855885ce3949a16e63fe4169db3345a62044..1fe5f5a02f937783c669205e286e917ec0872db1 100644 (file)
--- a/drivers/net/wireless/mediatek/mt76/mt76x2/eeprom.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x2/eeprom.c
@@ -506,4 +506,5 @@ int mt76x2_eeprom_init(struct mt76x02_dev *dev)
  }
  EXPORT_SYMBOL_GPL(mt76x2_eeprom_init);
  
+MODULE_DESCRIPTION("MediaTek MT76x2 EEPROM helpers");
  MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c b/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c

index df85ebc6e1df07a7c2e48c30efa50cdeec9f993f..30959746e9242712e8196724157db8c93caa96f3 100644 (file)
--- a/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
@@ -165,6 +165,7 @@ mt76x2e_resume(struct pci_dev *pdev)
  MODULE_DEVICE_TABLE(pci, mt76x2e_device_table);
  MODULE_FIRMWARE(MT7662_FIRMWARE);
  MODULE_FIRMWARE(MT7662_ROM_PATCH);
+MODULE_DESCRIPTION("MediaTek MT76x2E (PCIe) wireless driver");
  MODULE_LICENSE("Dual BSD/GPL");
  
  static struct pci_driver mt76pci_driver = {
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/usb.c b/drivers/net/wireless/mediatek/mt76/mt76x2/usb.c

index 55068f3252ef341f4fbdc6d7dd382296e47b17f7..ca78e14251c2f5cda524c046b9c80a96b4481167 100644 (file)
--- a/drivers/net/wireless/mediatek/mt76/mt76x2/usb.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x2/usb.c
@@ -147,4 +147,5 @@ static struct usb_driver mt76x2u_driver = {
  module_usb_driver(mt76x2u_driver);
  
  MODULE_AUTHOR("Lorenzo Bianconi <lorenzo.bianconi83@gmail.com>");
+MODULE_DESCRIPTION("MediaTek MT76x2U (USB) wireless driver");
  MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c b/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c

index aff4f21e843d29ae24b1ef094b407e344597bf36..3039f53e224546a406a2fc4cc0b2f8e07884d456 100644 (file)
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c
@@ -958,4 +958,5 @@ static void __exit mt7915_exit(void)
  
  module_init(mt7915_init);
  module_exit(mt7915_exit);
+MODULE_DESCRIPTION("MediaTek MT7915E MMIO helpers");
  MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/main.c b/drivers/net/wireless/mediatek/mt76/mt7921/main.c

index 0645417e05825f709e19e392e48544d36d2e3534..0d5adc5ddae38283eb618ba00284fb4b527c677c 100644 (file)
--- a/drivers/net/wireless/mediatek/mt76/mt7921/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
@@ -1418,5 +1418,6 @@ const struct ieee80211_ops mt7921_ops = {
  };
  EXPORT_SYMBOL_GPL(mt7921_ops);
  
+MODULE_DESCRIPTION("MediaTek MT7921 core driver");
  MODULE_LICENSE("Dual BSD/GPL");
  MODULE_AUTHOR("Sean Wang <sean.wang@mediatek.com>");
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c

index 57903c6e4f11f0735fd80c4a3c54c4299ca48be0..dde26f3274783d9a7dc84da9be5e8fa214fba21e 100644 (file)
--- a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c
@@ -544,4 +544,5 @@ MODULE_FIRMWARE(MT7922_FIRMWARE_WM);
  MODULE_FIRMWARE(MT7922_ROM_PATCH);
  MODULE_AUTHOR("Sean Wang <sean.wang@mediatek.com>");
  MODULE_AUTHOR("Lorenzo Bianconi <lorenzo@kernel.org>");
+MODULE_DESCRIPTION("MediaTek MT7921E (PCIe) wireless driver");
  MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/sdio.c b/drivers/net/wireless/mediatek/mt76/mt7921/sdio.c

index 7591e54d289733472740a5afe747833070363f2e..a9ce1e746b954bc7c7599f23ec6a6c23031dd384 100644 (file)
--- a/drivers/net/wireless/mediatek/mt76/mt7921/sdio.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/sdio.c
@@ -323,5 +323,6 @@ static struct sdio_driver mt7921s_driver = {
         .drv.pm         = pm_sleep_ptr(&mt7921s_pm_ops),
  };
  module_sdio_driver(mt7921s_driver);
+MODULE_DESCRIPTION("MediaTek MT7921S (SDIO) wireless driver");
  MODULE_AUTHOR("Sean Wang <sean.wang@mediatek.com>");
  MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/usb.c b/drivers/net/wireless/mediatek/mt76/mt7921/usb.c

index e5258c74fc077ac69b310b5bab0e56c02ebfcef5..8b7c03c47598de7bf9037ed40cb370607a837af4 100644 (file)
--- a/drivers/net/wireless/mediatek/mt76/mt7921/usb.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/usb.c
@@ -336,5 +336,6 @@ static struct usb_driver mt7921u_driver = {
  };
  module_usb_driver(mt7921u_driver);
  
+MODULE_DESCRIPTION("MediaTek MT7921U (USB) wireless driver");
  MODULE_AUTHOR("Lorenzo Bianconi <lorenzo@kernel.org>");
  MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/main.c b/drivers/net/wireless/mediatek/mt76/mt7925/main.c

index 8f1075da4903908b5f149e12530d21f6735be07f..125a1be3cb64c6a1a14bb6aaac28ec0fbc11e889 100644 (file)
--- a/drivers/net/wireless/mediatek/mt76/mt7925/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7925/main.c
@@ -1450,4 +1450,5 @@ const struct ieee80211_ops mt7925_ops = {
  EXPORT_SYMBOL_GPL(mt7925_ops);
  
  MODULE_AUTHOR("Deren Wu <deren.wu@mediatek.com>");
+MODULE_DESCRIPTION("MediaTek MT7925 core driver");
  MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/pci.c b/drivers/net/wireless/mediatek/mt76/mt7925/pci.c

index 734f31ee40d3f740873dc0c53356a63fa11ff976..1fd99a856541589b1f3859795e8b23bcc0c06cdf 100644 (file)
--- a/drivers/net/wireless/mediatek/mt76/mt7925/pci.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7925/pci.c
@@ -583,4 +583,5 @@ MODULE_FIRMWARE(MT7925_FIRMWARE_WM);
  MODULE_FIRMWARE(MT7925_ROM_PATCH);
  MODULE_AUTHOR("Deren Wu <deren.wu@mediatek.com>");
  MODULE_AUTHOR("Lorenzo Bianconi <lorenzo@kernel.org>");
+MODULE_DESCRIPTION("MediaTek MT7925E (PCIe) wireless driver");
  MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/usb.c b/drivers/net/wireless/mediatek/mt76/mt7925/usb.c

index 9b885c5b3ed594ddc9c5b62f47dfd4522430f469..1e0f094fc9059dbb02585bdc168b6b763f198004 100644 (file)
--- a/drivers/net/wireless/mediatek/mt76/mt7925/usb.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7925/usb.c
@@ -329,4 +329,5 @@ static struct usb_driver mt7925u_driver = {
  module_usb_driver(mt7925u_driver);
  
  MODULE_AUTHOR("Lorenzo Bianconi <lorenzo@kernel.org>");
+MODULE_DESCRIPTION("MediaTek MT7925U (USB) wireless driver");
  MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/wireless/mediatek/mt76/mt792x_core.c b/drivers/net/wireless/mediatek/mt76/mt792x_core.c

index 502be22dbe3677fb475371b7e4c564be074899d8..c42101aa9e45e958f605e37e763bb090c63aeea5 100644 (file)
--- a/drivers/net/wireless/mediatek/mt76/mt792x_core.c
+++ b/drivers/net/wireless/mediatek/mt76/mt792x_core.c
@@ -862,5 +862,6 @@ int mt792x_load_firmware(struct mt792x_dev *dev)
  }
  EXPORT_SYMBOL_GPL(mt792x_load_firmware);
  
+MODULE_DESCRIPTION("MediaTek MT792x core driver");
  MODULE_LICENSE("Dual BSD/GPL");
  MODULE_AUTHOR("Lorenzo Bianconi <lorenzo@kernel.org>");
diff --git a/drivers/net/wireless/mediatek/mt76/mt792x_usb.c b/drivers/net/wireless/mediatek/mt76/mt792x_usb.c

index 2dd283caed36bf056127d17a6cd3e93e9f6664d4..589a3efb9f8c30bbce14ec288e8ad66ecf0acf99 100644 (file)
--- a/drivers/net/wireless/mediatek/mt76/mt792x_usb.c
+++ b/drivers/net/wireless/mediatek/mt76/mt792x_usb.c
@@ -314,5 +314,6 @@ void mt792xu_disconnect(struct usb_interface *usb_intf)
  }
  EXPORT_SYMBOL_GPL(mt792xu_disconnect);
  
+MODULE_DESCRIPTION("MediaTek MT792x USB helpers");
  MODULE_LICENSE("Dual BSD/GPL");
  MODULE_AUTHOR("Lorenzo Bianconi <lorenzo@kernel.org>");
diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c

index 3c729b563edc5dd6f964e0e72c46f276367c3d65..699be57309c2e4db6d8ff3f456d55fe5a75be291 100644 (file)
--- a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c
@@ -4477,7 +4477,8 @@ int mt7996_mcu_set_txpower_sku(struct mt7996_phy *phy)
  
         skb_put_data(skb, &req, sizeof(req));
         /* cck and ofdm */
-       skb_put_data(skb, &la.cck, sizeof(la.cck) + sizeof(la.ofdm));
+       skb_put_data(skb, &la.cck, sizeof(la.cck));
+       skb_put_data(skb, &la.ofdm, sizeof(la.ofdm));
         /* ht20 */
         skb_put_data(skb, &la.mcs[0], 8);
         /* ht40 */
diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mmio.c b/drivers/net/wireless/mediatek/mt76/mt7996/mmio.c

index c50d89a445e9560672aeab8752de112220c9ab1c..9f2abfa273c9b060a793ae2594963f5c123fc5b0 100644 (file)
--- a/drivers/net/wireless/mediatek/mt76/mt7996/mmio.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7996/mmio.c
@@ -650,4 +650,5 @@ static void __exit mt7996_exit(void)
  
  module_init(mt7996_init);
  module_exit(mt7996_exit);
+MODULE_DESCRIPTION("MediaTek MT7996 MMIO helpers");
  MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/wireless/mediatek/mt76/sdio.c b/drivers/net/wireless/mediatek/mt76/sdio.c

index c52d550f0c32aac260e3163f14ec4989efbacc53..3e88798df0178c17cce2e94f588feb43711ad0a0 100644 (file)
--- a/drivers/net/wireless/mediatek/mt76/sdio.c
+++ b/drivers/net/wireless/mediatek/mt76/sdio.c
@@ -672,4 +672,5 @@ EXPORT_SYMBOL_GPL(mt76s_init);
  
  MODULE_AUTHOR("Sean Wang <sean.wang@mediatek.com>");
  MODULE_AUTHOR("Lorenzo Bianconi <lorenzo@kernel.org>");
+MODULE_DESCRIPTION("MediaTek MT76x SDIO helpers");
  MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/wireless/mediatek/mt76/usb.c b/drivers/net/wireless/mediatek/mt76/usb.c

index 1584665fe3cb68d890bd7606e3eedcd05a470a2c..5a0bcb5071bd7d5ee8c22ef54ad3e05b3ea96cf4 100644 (file)
--- a/drivers/net/wireless/mediatek/mt76/usb.c
+++ b/drivers/net/wireless/mediatek/mt76/usb.c
@@ -1128,4 +1128,5 @@ int mt76u_init(struct mt76_dev *dev, struct usb_interface *intf)
  EXPORT_SYMBOL_GPL(mt76u_init);
  
  MODULE_AUTHOR("Lorenzo Bianconi <lorenzo.bianconi83@gmail.com>");
+MODULE_DESCRIPTION("MediaTek MT76x USB helpers");
  MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/wireless/mediatek/mt76/util.c b/drivers/net/wireless/mediatek/mt76/util.c

index fc76c66ff1a5a58f1c73e9ff50ceeea4926b8063..d6c01a2dd1988c5a9ef50bc96ceb0e30b453302d 100644 (file)
--- a/drivers/net/wireless/mediatek/mt76/util.c
+++ b/drivers/net/wireless/mediatek/mt76/util.c
@@ -138,4 +138,5 @@ int __mt76_worker_fn(void *ptr)
  }
  EXPORT_SYMBOL_GPL(__mt76_worker_fn);
  
+MODULE_DESCRIPTION("MediaTek MT76x helpers");
  MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/wireless/microchip/wilc1000/netdev.c b/drivers/net/wireless/microchip/wilc1000/netdev.c

index 91d71e0f7ef2332354a0b950eea3e8795387ed7b..81e8f25863f5bdc957fa5b4ee53a78c31f81eba6 100644 (file)
--- a/drivers/net/wireless/microchip/wilc1000/netdev.c
+++ b/drivers/net/wireless/microchip/wilc1000/netdev.c
@@ -1018,5 +1018,6 @@ unregister_netdev:
         return ERR_PTR(ret);
  }
  
+MODULE_DESCRIPTION("Atmel WILC1000 core wireless driver");
  MODULE_LICENSE("GPL");
  MODULE_FIRMWARE(WILC1000_FW(WILC1000_API_VER));
diff --git a/drivers/net/wireless/microchip/wilc1000/sdio.c b/drivers/net/wireless/microchip/wilc1000/sdio.c

index 0d13e3e46e98e4b59852811324792793fc27f78e..d6d3946930905275ba021611307d6a16324a0bb8 100644 (file)
--- a/drivers/net/wireless/microchip/wilc1000/sdio.c
+++ b/drivers/net/wireless/microchip/wilc1000/sdio.c
@@ -984,4 +984,5 @@ static struct sdio_driver wilc_sdio_driver = {
  module_driver(wilc_sdio_driver,
               sdio_register_driver,
               sdio_unregister_driver);
+MODULE_DESCRIPTION("Atmel WILC1000 SDIO wireless driver");
  MODULE_LICENSE("GPL");
diff --git a/drivers/net/wireless/microchip/wilc1000/spi.c b/drivers/net/wireless/microchip/wilc1000/spi.c

index 77b4cdff73c370bf1bbd1e1ebec77eb0cac318b7..1d8b241ce43cae3329eb9fee84afc63a1027447e 100644 (file)
--- a/drivers/net/wireless/microchip/wilc1000/spi.c
+++ b/drivers/net/wireless/microchip/wilc1000/spi.c
@@ -273,6 +273,7 @@ static struct spi_driver wilc_spi_driver = {
         .remove = wilc_bus_remove,
  };
  module_spi_driver(wilc_spi_driver);
+MODULE_DESCRIPTION("Atmel WILC1000 SPI wireless driver");
  MODULE_LICENSE("GPL");
  
  static int wilc_spi_tx(struct wilc *wilc, u8 *b, u32 len)
diff --git a/drivers/net/wireless/ti/wl1251/sdio.c b/drivers/net/wireless/ti/wl1251/sdio.c

index 301bd0043a4354032ceac6c45768c347b473163b..4e5b351f80f0922cabc3f1ce57fa44960c090e3c 100644 (file)
--- a/drivers/net/wireless/ti/wl1251/sdio.c
+++ b/drivers/net/wireless/ti/wl1251/sdio.c
@@ -343,5 +343,6 @@ static void __exit wl1251_sdio_exit(void)
  module_init(wl1251_sdio_init);
  module_exit(wl1251_sdio_exit);
  
+MODULE_DESCRIPTION("TI WL1251 SDIO helpers");
  MODULE_LICENSE("GPL");
  MODULE_AUTHOR("Kalle Valo <kvalo@adurom.com>");
diff --git a/drivers/net/wireless/ti/wl1251/spi.c b/drivers/net/wireless/ti/wl1251/spi.c

index 29292f06bd3dcb191bee70af1b295a6a62abf841..1936bb3af54ab6509edff7a6afdf23f9e9a9b728 100644 (file)
--- a/drivers/net/wireless/ti/wl1251/spi.c
+++ b/drivers/net/wireless/ti/wl1251/spi.c
@@ -342,6 +342,7 @@ static struct spi_driver wl1251_spi_driver = {
  
  module_spi_driver(wl1251_spi_driver);
  
+MODULE_DESCRIPTION("TI WL1251 SPI helpers");
  MODULE_LICENSE("GPL");
  MODULE_AUTHOR("Kalle Valo <kvalo@adurom.com>");
  MODULE_ALIAS("spi:wl1251");
diff --git a/drivers/net/wireless/ti/wl12xx/main.c b/drivers/net/wireless/ti/wl12xx/main.c

index de045fe4ca1eb982105a4a7a2d502f142efd5d02..b26d42b4e3cc0fbdc21e55b4b9682b0a81e1ec8e 100644 (file)
--- a/drivers/net/wireless/ti/wl12xx/main.c
+++ b/drivers/net/wireless/ti/wl12xx/main.c
@@ -1955,6 +1955,7 @@ module_param_named(tcxo, tcxo_param, charp, 0);
  MODULE_PARM_DESC(tcxo,
                  "TCXO clock: 19.2, 26, 38.4, 52, 16.368, 32.736, 16.8, 33.6");
  
+MODULE_DESCRIPTION("TI WL12xx wireless driver");
  MODULE_LICENSE("GPL v2");
  MODULE_AUTHOR("Luciano Coelho <coelho@ti.com>");
  MODULE_FIRMWARE(WL127X_FW_NAME_SINGLE);
diff --git a/drivers/net/wireless/ti/wl18xx/main.c b/drivers/net/wireless/ti/wl18xx/main.c

index 20d9181b3410c40b555d7f06836469357782fc9f..2ccac1cdec0120c1709d09add0bcdf390f57e7e7 100644 (file)
--- a/drivers/net/wireless/ti/wl18xx/main.c
+++ b/drivers/net/wireless/ti/wl18xx/main.c
@@ -2086,6 +2086,7 @@ module_param_named(num_rx_desc, num_rx_desc_param, int, 0400);
  MODULE_PARM_DESC(num_rx_desc_param,
                  "Number of Rx descriptors: u8 (default is 32)");
  
+MODULE_DESCRIPTION("TI WiLink 8 wireless driver");
  MODULE_LICENSE("GPL v2");
  MODULE_AUTHOR("Luciano Coelho <coelho@ti.com>");
  MODULE_FIRMWARE(WL18XX_FW_NAME);
diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c

index fb9ed97774c7a29ab1a27689f7e78bf3740c0c3f..5736acb4d2063cbd1ca9f9f23187102d1d30c599 100644 (file)
--- a/drivers/net/wireless/ti/wlcore/main.c
+++ b/drivers/net/wireless/ti/wlcore/main.c
@@ -6793,6 +6793,7 @@ MODULE_PARM_DESC(bug_on_recovery, "BUG() on fw recovery");
  module_param(no_recovery, int, 0600);
  MODULE_PARM_DESC(no_recovery, "Prevent HW recovery. FW will remain stuck.");
  
+MODULE_DESCRIPTION("TI WLAN core driver");
  MODULE_LICENSE("GPL");
  MODULE_AUTHOR("Luciano Coelho <coelho@ti.com>");
  MODULE_AUTHOR("Juuso Oikarinen <juuso.oikarinen@nokia.com>");
diff --git a/drivers/net/wireless/ti/wlcore/sdio.c b/drivers/net/wireless/ti/wlcore/sdio.c

index f0686635db46e1246f3d06a8814f50d4c93c85ff..eb5482ed76ae48488ef5f55d1731c080c25b9919 100644 (file)
--- a/drivers/net/wireless/ti/wlcore/sdio.c
+++ b/drivers/net/wireless/ti/wlcore/sdio.c
@@ -447,6 +447,7 @@ module_sdio_driver(wl1271_sdio_driver);
  module_param(dump, bool, 0600);
  MODULE_PARM_DESC(dump, "Enable sdio read/write dumps.");
  
+MODULE_DESCRIPTION("TI WLAN SDIO helpers");
  MODULE_LICENSE("GPL");
  MODULE_AUTHOR("Luciano Coelho <coelho@ti.com>");
  MODULE_AUTHOR("Juuso Oikarinen <juuso.oikarinen@nokia.com>");
diff --git a/drivers/net/wireless/ti/wlcore/spi.c b/drivers/net/wireless/ti/wlcore/spi.c

index 7d9a139db59e1552e3f4cd6feae526c4a5211e69..0aa2b2f3c5c914160d05198c3fc8c6ed07c6e999 100644 (file)
--- a/drivers/net/wireless/ti/wlcore/spi.c
+++ b/drivers/net/wireless/ti/wlcore/spi.c
@@ -562,6 +562,7 @@ static struct spi_driver wl1271_spi_driver = {
  };
  
  module_spi_driver(wl1271_spi_driver);
+MODULE_DESCRIPTION("TI WLAN SPI helpers");
  MODULE_LICENSE("GPL");
  MODULE_AUTHOR("Luciano Coelho <coelho@ti.com>");
  MODULE_AUTHOR("Juuso Oikarinen <juuso.oikarinen@nokia.com>");
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c

index d7503aef599f04bec326900fe918a974e55bc5cc..ef76850d9bcd232e84f00c4576c5a98ace51458f 100644 (file)
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -104,13 +104,12 @@ bool provides_xdp_headroom = true;
  module_param(provides_xdp_headroom, bool, 0644);
  
  static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx,
-                              u8 status);
+                              s8 status);
  
  static void make_tx_response(struct xenvif_queue *queue,
-                            struct xen_netif_tx_request *txp,
+                            const struct xen_netif_tx_request *txp,
                              unsigned int extra_count,
-                            s8       st);
-static void push_tx_responses(struct xenvif_queue *queue);
+                            s8 status);
  
  static void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx);
  
@@ -208,13 +207,9 @@ static void xenvif_tx_err(struct xenvif_queue *queue,
                           unsigned int extra_count, RING_IDX end)
  {
         RING_IDX cons = queue->tx.req_cons;
-       unsigned long flags;
  
         do {
-               spin_lock_irqsave(&queue->response_lock, flags);
                 make_tx_response(queue, txp, extra_count, XEN_NETIF_RSP_ERROR);
-               push_tx_responses(queue);
-               spin_unlock_irqrestore(&queue->response_lock, flags);
                 if (cons == end)
                         break;
                 RING_COPY_REQUEST(&queue->tx, cons++, txp);
@@ -465,12 +460,7 @@ static void xenvif_get_requests(struct xenvif_queue *queue,
         for (shinfo->nr_frags = 0; nr_slots > 0 && shinfo->nr_frags < MAX_SKB_FRAGS;
              nr_slots--) {
                 if (unlikely(!txp->size)) {
-                       unsigned long flags;
-
-                       spin_lock_irqsave(&queue->response_lock, flags);
                         make_tx_response(queue, txp, 0, XEN_NETIF_RSP_OKAY);
-                       push_tx_responses(queue);
-                       spin_unlock_irqrestore(&queue->response_lock, flags);
                         ++txp;
                         continue;
                 }
@@ -496,14 +486,8 @@ static void xenvif_get_requests(struct xenvif_queue *queue,
  
                 for (shinfo->nr_frags = 0; shinfo->nr_frags < nr_slots; ++txp) {
                         if (unlikely(!txp->size)) {
-                               unsigned long flags;
-
-                               spin_lock_irqsave(&queue->response_lock, flags);
                                 make_tx_response(queue, txp, 0,
                                                  XEN_NETIF_RSP_OKAY);
-                               push_tx_responses(queue);
-                               spin_unlock_irqrestore(&queue->response_lock,
-                                                      flags);
                                 continue;
                         }
  
@@ -995,7 +979,6 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue,
                                          (ret == 0) ?
                                          XEN_NETIF_RSP_OKAY :
                                          XEN_NETIF_RSP_ERROR);
-                       push_tx_responses(queue);
                         continue;
                 }
  
@@ -1007,7 +990,6 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue,
  
                         make_tx_response(queue, &txreq, extra_count,
                                          XEN_NETIF_RSP_OKAY);
-                       push_tx_responses(queue);
                         continue;
                 }
  
@@ -1433,8 +1415,35 @@ int xenvif_tx_action(struct xenvif_queue *queue, int budget)
         return work_done;
  }
  
+static void _make_tx_response(struct xenvif_queue *queue,
+                            const struct xen_netif_tx_request *txp,
+                            unsigned int extra_count,
+                            s8 status)
+{
+       RING_IDX i = queue->tx.rsp_prod_pvt;
+       struct xen_netif_tx_response *resp;
+
+       resp = RING_GET_RESPONSE(&queue->tx, i);
+       resp->id     = txp->id;
+       resp->status = status;
+
+       while (extra_count-- != 0)
+               RING_GET_RESPONSE(&queue->tx, ++i)->status = XEN_NETIF_RSP_NULL;
+
+       queue->tx.rsp_prod_pvt = ++i;
+}
+
+static void push_tx_responses(struct xenvif_queue *queue)
+{
+       int notify;
+
+       RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&queue->tx, notify);
+       if (notify)
+               notify_remote_via_irq(queue->tx_irq);
+}
+
  static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx,
-                              u8 status)
+                              s8 status)
  {
         struct pending_tx_info *pending_tx_info;
         pending_ring_idx_t index;
@@ -1444,8 +1453,8 @@ static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx,
  
         spin_lock_irqsave(&queue->response_lock, flags);
  
-       make_tx_response(queue, &pending_tx_info->req,
-                        pending_tx_info->extra_count, status);
+       _make_tx_response(queue, &pending_tx_info->req,
+                         pending_tx_info->extra_count, status);
  
         /* Release the pending index before pusing the Tx response so
          * its available before a new Tx request is pushed by the
@@ -1459,32 +1468,19 @@ static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx,
         spin_unlock_irqrestore(&queue->response_lock, flags);
  }
  
-
  static void make_tx_response(struct xenvif_queue *queue,
-                            struct xen_netif_tx_request *txp,
+                            const struct xen_netif_tx_request *txp,
                              unsigned int extra_count,
-                            s8       st)
+                            s8 status)
  {
-       RING_IDX i = queue->tx.rsp_prod_pvt;
-       struct xen_netif_tx_response *resp;
-
-       resp = RING_GET_RESPONSE(&queue->tx, i);
-       resp->id     = txp->id;
-       resp->status = st;
-
-       while (extra_count-- != 0)
-               RING_GET_RESPONSE(&queue->tx, ++i)->status = XEN_NETIF_RSP_NULL;
+       unsigned long flags;
  
-       queue->tx.rsp_prod_pvt = ++i;
-}
+       spin_lock_irqsave(&queue->response_lock, flags);
  
-static void push_tx_responses(struct xenvif_queue *queue)
-{
-       int notify;
+       _make_tx_response(queue, txp, extra_count, status);
+       push_tx_responses(queue);
  
-       RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&queue->tx, notify);
-       if (notify)
-               notify_remote_via_irq(queue->tx_irq);
+       spin_unlock_irqrestore(&queue->response_lock, flags);
  }
  
  static void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx)
@@ -1782,5 +1778,6 @@ static void __exit netback_fini(void)
  }
  module_exit(netback_fini);
  
+MODULE_DESCRIPTION("Xen backend network device module");
  MODULE_LICENSE("Dual BSD/GPL");
  MODULE_ALIAS("xen-backend:vif");
diff --git a/drivers/nvme/common/auth.c b/drivers/nvme/common/auth.c

index a23ab5c968b9457bee89f14cc1f158e377ffa084..a3455f1d67fae20268a0e9e02a4c5c34ff054afe 100644 (file)
--- a/drivers/nvme/common/auth.c
+++ b/drivers/nvme/common/auth.c
@@ -471,4 +471,5 @@ int nvme_auth_generate_key(u8 *secret, struct nvme_dhchap_key **ret_key)
  }
  EXPORT_SYMBOL_GPL(nvme_auth_generate_key);
  
+MODULE_DESCRIPTION("NVMe Authentication framework");
  MODULE_LICENSE("GPL v2");
diff --git a/drivers/nvme/common/keyring.c b/drivers/nvme/common/keyring.c

index a5c0431c101cf3775509145e3bc7f12c6b64ccd0..6f7e7a8fa5ae470c463586fb0c638b5dc6f7313e 100644 (file)
--- a/drivers/nvme/common/keyring.c
+++ b/drivers/nvme/common/keyring.c
@@ -181,5 +181,6 @@ static void __exit nvme_keyring_exit(void)
  
  MODULE_LICENSE("GPL v2");
  MODULE_AUTHOR("Hannes Reinecke <hare@suse.de>");
+MODULE_DESCRIPTION("NVMe Keyring implementation");
  module_init(nvme_keyring_init);
  module_exit(nvme_keyring_exit);
diff --git a/drivers/nvme/host/apple.c b/drivers/nvme/host/apple.c

index 596bb11eeba5a9d0a4d1637f2c061775956bb84e..c727cd1f264bf6221d2043d1f65bb70a51f00c1d 100644 (file)
--- a/drivers/nvme/host/apple.c
+++ b/drivers/nvme/host/apple.c
@@ -797,6 +797,7 @@ static int apple_nvme_init_request(struct blk_mq_tag_set *set,
  
  static void apple_nvme_disable(struct apple_nvme *anv, bool shutdown)
  {
+       enum nvme_ctrl_state state = nvme_ctrl_state(&anv->ctrl);
         u32 csts = readl(anv->mmio_nvme + NVME_REG_CSTS);
         bool dead = false, freeze = false;
         unsigned long flags;
@@ -808,8 +809,8 @@ static void apple_nvme_disable(struct apple_nvme *anv, bool shutdown)
         if (csts & NVME_CSTS_CFS)
                 dead = true;
  
-       if (anv->ctrl.state == NVME_CTRL_LIVE ||
-           anv->ctrl.state == NVME_CTRL_RESETTING) {
+       if (state == NVME_CTRL_LIVE ||
+           state == NVME_CTRL_RESETTING) {
                 freeze = true;
                 nvme_start_freeze(&anv->ctrl);
         }
@@ -881,7 +882,7 @@ static enum blk_eh_timer_return apple_nvme_timeout(struct request *req)
         unsigned long flags;
         u32 csts = readl(anv->mmio_nvme + NVME_REG_CSTS);
  
-       if (anv->ctrl.state != NVME_CTRL_LIVE) {
+       if (nvme_ctrl_state(&anv->ctrl) != NVME_CTRL_LIVE) {
                 /*
                  * From rdma.c:
                  * If we are resetting, connecting or deleting we should
@@ -985,10 +986,10 @@ static void apple_nvme_reset_work(struct work_struct *work)
         u32 boot_status, aqa;
         struct apple_nvme *anv =
                 container_of(work, struct apple_nvme, ctrl.reset_work);
+       enum nvme_ctrl_state state = nvme_ctrl_state(&anv->ctrl);
  
-       if (anv->ctrl.state != NVME_CTRL_RESETTING) {
-               dev_warn(anv->dev, "ctrl state %d is not RESETTING\n",
-                        anv->ctrl.state);
+       if (state != NVME_CTRL_RESETTING) {
+               dev_warn(anv->dev, "ctrl state %d is not RESETTING\n", state);
                 ret = -ENODEV;
                 goto out;
         }
diff --git a/drivers/nvme/host/auth.c b/drivers/nvme/host/auth.c

index 72c0525c75f503bb56c7c246c733f9eea57e44ab..a264b3ae078b8c4c28382c7d8f8757c7e3eec594 100644 (file)
--- a/drivers/nvme/host/auth.c
+++ b/drivers/nvme/host/auth.c
@@ -48,11 +48,6 @@ struct nvme_dhchap_queue_context {
  
  static struct workqueue_struct *nvme_auth_wq;
  
-#define nvme_auth_flags_from_qid(qid) \
-       (qid == 0) ? 0 : BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_RESERVED
-#define nvme_auth_queue_from_qid(ctrl, qid) \
-       (qid == 0) ? (ctrl)->fabrics_q : (ctrl)->connect_q
-
  static inline int ctrl_max_dhchaps(struct nvme_ctrl *ctrl)
  {
         return ctrl->opts->nr_io_queues + ctrl->opts->nr_write_queues +
@@ -63,10 +58,15 @@ static int nvme_auth_submit(struct nvme_ctrl *ctrl, int qid,
                             void *data, size_t data_len, bool auth_send)
  {
         struct nvme_command cmd = {};
-       blk_mq_req_flags_t flags = nvme_auth_flags_from_qid(qid);
-       struct request_queue *q = nvme_auth_queue_from_qid(ctrl, qid);
+       nvme_submit_flags_t flags = NVME_SUBMIT_RETRY;
+       struct request_queue *q = ctrl->fabrics_q;
         int ret;
  
+       if (qid != 0) {
+               flags |= NVME_SUBMIT_NOWAIT | NVME_SUBMIT_RESERVED;
+               q = ctrl->connect_q;
+       }
+
         cmd.auth_common.opcode = nvme_fabrics_command;
         cmd.auth_common.secp = NVME_AUTH_DHCHAP_PROTOCOL_IDENTIFIER;
         cmd.auth_common.spsp0 = 0x01;
@@ -80,8 +80,7 @@ static int nvme_auth_submit(struct nvme_ctrl *ctrl, int qid,
         }
  
         ret = __nvme_submit_sync_cmd(q, &cmd, NULL, data, data_len,
-                                    qid == 0 ? NVME_QID_ANY : qid,
-                                    0, flags);
+                                    qid == 0 ? NVME_QID_ANY : qid, flags);
         if (ret > 0)
                 dev_warn(ctrl->device,
                         "qid %d auth_send failed with status %d\n", qid, ret);
@@ -897,7 +896,7 @@ static void nvme_ctrl_auth_work(struct work_struct *work)
          * If the ctrl is no connected, bail as reconnect will handle
          * authentication.
          */
-       if (ctrl->state != NVME_CTRL_LIVE)
+       if (nvme_ctrl_state(ctrl) != NVME_CTRL_LIVE)
                 return;
  
         /* Authenticate admin queue first */
diff --git a/drivers/nvme/host/constants.c b/drivers/nvme/host/constants.c

index 20f46c230885c10f2a82bc87f7091645e2d02db5..6f2ebb5fcdb05e1e65971643c9aff2c3f2271c19 100644 (file)
--- a/drivers/nvme/host/constants.c
+++ b/drivers/nvme/host/constants.c
@@ -171,15 +171,15 @@ static const char * const nvme_statuses[] = {
         [NVME_SC_HOST_ABORTED_CMD] = "Host Aborted Command",
  };
  
-const unsigned char *nvme_get_error_status_str(u16 status)
+const char *nvme_get_error_status_str(u16 status)
  {
         status &= 0x7ff;
         if (status < ARRAY_SIZE(nvme_statuses) && nvme_statuses[status])
-               return nvme_statuses[status & 0x7ff];
+               return nvme_statuses[status];
         return "Unknown";
  }
  
-const unsigned char *nvme_get_opcode_str(u8 opcode)
+const char *nvme_get_opcode_str(u8 opcode)
  {
         if (opcode < ARRAY_SIZE(nvme_ops) && nvme_ops[opcode])
                 return nvme_ops[opcode];
@@ -187,7 +187,7 @@ const unsigned char *nvme_get_opcode_str(u8 opcode)
  }
  EXPORT_SYMBOL_GPL(nvme_get_opcode_str);
  
-const unsigned char *nvme_get_admin_opcode_str(u8 opcode)
+const char *nvme_get_admin_opcode_str(u8 opcode)
  {
         if (opcode < ARRAY_SIZE(nvme_admin_ops) && nvme_admin_ops[opcode])
                 return nvme_admin_ops[opcode];
@@ -195,7 +195,7 @@ const unsigned char *nvme_get_admin_opcode_str(u8 opcode)
  }
  EXPORT_SYMBOL_GPL(nvme_get_admin_opcode_str);
  
-const unsigned char *nvme_get_fabrics_opcode_str(u8 opcode) {
+const char *nvme_get_fabrics_opcode_str(u8 opcode) {
         if (opcode < ARRAY_SIZE(nvme_fabrics_ops) && nvme_fabrics_ops[opcode])
                 return nvme_fabrics_ops[opcode];
         return "Unknown";
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c

index 85ab0fcf9e886451fb070b75dcd53be4a4f88f62..0a96362912ceda0a035b916f432b24afb261f6a7 100644 (file)
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -338,6 +338,30 @@ static void nvme_log_error(struct request *req)
                            nr->status & NVME_SC_DNR  ? "DNR "  : "");
  }
  
+static void nvme_log_err_passthru(struct request *req)
+{
+       struct nvme_ns *ns = req->q->queuedata;
+       struct nvme_request *nr = nvme_req(req);
+
+       pr_err_ratelimited("%s: %s(0x%x), %s (sct 0x%x / sc 0x%x) %s%s"
+               "cdw10=0x%x cdw11=0x%x cdw12=0x%x cdw13=0x%x cdw14=0x%x cdw15=0x%x\n",
+               ns ? ns->disk->disk_name : dev_name(nr->ctrl->device),
+               ns ? nvme_get_opcode_str(nr->cmd->common.opcode) :
+                    nvme_get_admin_opcode_str(nr->cmd->common.opcode),
+               nr->cmd->common.opcode,
+               nvme_get_error_status_str(nr->status),
+               nr->status >> 8 & 7,    /* Status Code Type */
+               nr->status & 0xff,      /* Status Code */
+               nr->status & NVME_SC_MORE ? "MORE " : "",
+               nr->status & NVME_SC_DNR  ? "DNR "  : "",
+               nr->cmd->common.cdw10,
+               nr->cmd->common.cdw11,
+               nr->cmd->common.cdw12,
+               nr->cmd->common.cdw13,
+               nr->cmd->common.cdw14,
+               nr->cmd->common.cdw14);
+}
+
  enum nvme_disposition {
         COMPLETE,
         RETRY,
@@ -385,8 +409,12 @@ static inline void nvme_end_req(struct request *req)
  {
         blk_status_t status = nvme_error_status(nvme_req(req)->status);
  
-       if (unlikely(nvme_req(req)->status && !(req->rq_flags & RQF_QUIET)))
-               nvme_log_error(req);
+       if (unlikely(nvme_req(req)->status && !(req->rq_flags & RQF_QUIET))) {
+               if (blk_rq_is_passthrough(req))
+                       nvme_log_err_passthru(req);
+               else
+                       nvme_log_error(req);
+       }
         nvme_end_req_zoned(req);
         nvme_trace_bio_complete(req);
         if (req->cmd_flags & REQ_NVME_MPATH)
@@ -679,10 +707,21 @@ static inline void nvme_clear_nvme_request(struct request *req)
  /* initialize a passthrough request */
  void nvme_init_request(struct request *req, struct nvme_command *cmd)
  {
-       if (req->q->queuedata)
+       struct nvme_request *nr = nvme_req(req);
+       bool logging_enabled;
+
+       if (req->q->queuedata) {
+               struct nvme_ns *ns = req->q->disk->private_data;
+
+               logging_enabled = ns->head->passthru_err_log_enabled;
                 req->timeout = NVME_IO_TIMEOUT;
-       else /* no queuedata implies admin queue */
+       } else { /* no queuedata implies admin queue */
+               logging_enabled = nr->ctrl->passthru_err_log_enabled;
                 req->timeout = NVME_ADMIN_TIMEOUT;
+       }
+
+       if (!logging_enabled)
+               req->rq_flags |= RQF_QUIET;
  
         /* passthru commands should let the driver set the SGL flags */
         cmd->common.flags &= ~NVME_CMD_SGL_ALL;
@@ -691,8 +730,7 @@ void nvme_init_request(struct request *req, struct nvme_command *cmd)
         if (req->mq_hctx->type == HCTX_TYPE_POLL)
                 req->cmd_flags |= REQ_POLLED;
         nvme_clear_nvme_request(req);
-       req->rq_flags |= RQF_QUIET;
-       memcpy(nvme_req(req)->cmd, cmd, sizeof(*cmd));
+       memcpy(nr->cmd, cmd, sizeof(*cmd));
  }
  EXPORT_SYMBOL_GPL(nvme_init_request);
  
@@ -721,7 +759,7 @@ blk_status_t nvme_fail_nonready_command(struct nvme_ctrl *ctrl,
  EXPORT_SYMBOL_GPL(nvme_fail_nonready_command);
  
  bool __nvme_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
-               bool queue_live)
+               bool queue_live, enum nvme_ctrl_state state)
  {
         struct nvme_request *req = nvme_req(rq);
  
@@ -742,7 +780,7 @@ bool __nvme_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
                  * command, which is require to set the queue live in the
                  * appropinquate states.
                  */
-               switch (nvme_ctrl_state(ctrl)) {
+               switch (state) {
                 case NVME_CTRL_CONNECTING:
                         if (blk_rq_is_passthrough(rq) && nvme_is_fabrics(req->cmd) &&
                             (req->cmd->fabrics.fctype == nvme_fabrics_type_connect ||
@@ -1051,20 +1089,27 @@ EXPORT_SYMBOL_NS_GPL(nvme_execute_rq, NVME_TARGET_PASSTHRU);
   */
  int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
                 union nvme_result *result, void *buffer, unsigned bufflen,
-               int qid, int at_head, blk_mq_req_flags_t flags)
+               int qid, nvme_submit_flags_t flags)
  {
         struct request *req;
         int ret;
+       blk_mq_req_flags_t blk_flags = 0;
  
+       if (flags & NVME_SUBMIT_NOWAIT)
+               blk_flags |= BLK_MQ_REQ_NOWAIT;
+       if (flags & NVME_SUBMIT_RESERVED)
+               blk_flags |= BLK_MQ_REQ_RESERVED;
         if (qid == NVME_QID_ANY)
-               req = blk_mq_alloc_request(q, nvme_req_op(cmd), flags);
+               req = blk_mq_alloc_request(q, nvme_req_op(cmd), blk_flags);
         else
-               req = blk_mq_alloc_request_hctx(q, nvme_req_op(cmd), flags,
+               req = blk_mq_alloc_request_hctx(q, nvme_req_op(cmd), blk_flags,
                                                 qid - 1);
  
         if (IS_ERR(req))
                 return PTR_ERR(req);
         nvme_init_request(req, cmd);
+       if (flags & NVME_SUBMIT_RETRY)
+               req->cmd_flags &= ~REQ_FAILFAST_DRIVER;
  
         if (buffer && bufflen) {
                 ret = blk_rq_map_kern(q, req, buffer, bufflen, GFP_KERNEL);
@@ -1072,7 +1117,7 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
                         goto out;
         }
  
-       ret = nvme_execute_rq(req, at_head);
+       ret = nvme_execute_rq(req, flags & NVME_SUBMIT_AT_HEAD);
         if (result && ret >= 0)
                 *result = nvme_req(req)->result;
   out:
@@ -1085,7 +1130,7 @@ int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
                 void *buffer, unsigned bufflen)
  {
         return __nvme_submit_sync_cmd(q, cmd, NULL, buffer, bufflen,
-                       NVME_QID_ANY, 0, 0);
+                       NVME_QID_ANY, 0);
  }
  EXPORT_SYMBOL_GPL(nvme_submit_sync_cmd);
  
@@ -1108,6 +1153,10 @@ u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode)
                 effects &= ~NVME_CMD_EFFECTS_CSE_MASK;
         } else {
                 effects = le32_to_cpu(ctrl->effects->acs[opcode]);
+
+               /* Ignore execution restrictions if any relaxation bits are set */
+               if (effects & NVME_CMD_EFFECTS_CSER_MASK)
+                       effects &= ~NVME_CMD_EFFECTS_CSE_MASK;
         }
  
         return effects;
@@ -1560,7 +1609,7 @@ static int nvme_features(struct nvme_ctrl *dev, u8 op, unsigned int fid,
         c.features.dword11 = cpu_to_le32(dword11);
  
         ret = __nvme_submit_sync_cmd(dev->admin_q, &c, &res,
-                       buffer, buflen, NVME_QID_ANY, 0, 0);
+                       buffer, buflen, NVME_QID_ANY, 0);
         if (ret >= 0 && result)
                 *result = le32_to_cpu(res.u32);
         return ret;
@@ -2172,7 +2221,7 @@ static int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t l
         cmd.common.cdw11 = cpu_to_le32(len);
  
         return __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, NULL, buffer, len,
-                       NVME_QID_ANY, 1, 0);
+                       NVME_QID_ANY, NVME_SUBMIT_AT_HEAD);
  }
  
  static void nvme_configure_opal(struct nvme_ctrl *ctrl, bool was_suspended)
@@ -3714,6 +3763,13 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info)
         nvme_mpath_add_disk(ns, info->anagrpid);
         nvme_fault_inject_init(&ns->fault_inject, ns->disk->disk_name);
  
+       /*
+        * Set ns->disk->device->driver_data to ns so we can access
+        * ns->head->passthru_err_log_enabled in
+        * nvme_io_passthru_err_log_enabled_[store | show]().
+        */
+       dev_set_drvdata(disk_to_dev(ns->disk), ns);
+
         return;
  
   out_cleanup_ns_from_list:
@@ -4138,6 +4194,7 @@ static bool nvme_ctrl_pp_status(struct nvme_ctrl *ctrl)
  static void nvme_get_fw_slot_info(struct nvme_ctrl *ctrl)
  {
         struct nvme_fw_slot_info_log *log;
+       u8 next_fw_slot, cur_fw_slot;
  
         log = kmalloc(sizeof(*log), GFP_KERNEL);
         if (!log)
@@ -4149,13 +4206,15 @@ static void nvme_get_fw_slot_info(struct nvme_ctrl *ctrl)
                 goto out_free_log;
         }
  
-       if (log->afi & 0x70 || !(log->afi & 0x7)) {
+       cur_fw_slot = log->afi & 0x7;
+       next_fw_slot = (log->afi & 0x70) >> 4;
+       if (!cur_fw_slot || (next_fw_slot && (cur_fw_slot != next_fw_slot))) {
                 dev_info(ctrl->device,
                          "Firmware is activated after next Controller Level Reset\n");
                 goto out_free_log;
         }
  
-       memcpy(ctrl->subsys->firmware_rev, &log->frs[(log->afi & 0x7) - 1],
+       memcpy(ctrl->subsys->firmware_rev, &log->frs[cur_fw_slot - 1],
                 sizeof(ctrl->subsys->firmware_rev));
  
  out_free_log:
@@ -4514,6 +4573,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
         int ret;
  
         WRITE_ONCE(ctrl->state, NVME_CTRL_NEW);
+       ctrl->passthru_err_log_enabled = false;
         clear_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags);
         spin_lock_init(&ctrl->lock);
         mutex_init(&ctrl->scan_lock);
@@ -4851,5 +4911,6 @@ static void __exit nvme_core_exit(void)
  
  MODULE_LICENSE("GPL");
  MODULE_VERSION("1.0");
+MODULE_DESCRIPTION("NVMe host core framework");
  module_init(nvme_core_init);
  module_exit(nvme_core_exit);
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c

index b5752a77ad989f04a14ef9416f6244cf5255441d..495c171daead11395a97de82901b5b71662f8f17 100644 (file)
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -180,7 +180,7 @@ int nvmf_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val)
         cmd.prop_get.offset = cpu_to_le32(off);
  
         ret = __nvme_submit_sync_cmd(ctrl->fabrics_q, &cmd, &res, NULL, 0,
-                       NVME_QID_ANY, 0, 0);
+                       NVME_QID_ANY, 0);
  
         if (ret >= 0)
                 *val = le64_to_cpu(res.u64);
@@ -226,7 +226,7 @@ int nvmf_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val)
         cmd.prop_get.offset = cpu_to_le32(off);
  
         ret = __nvme_submit_sync_cmd(ctrl->fabrics_q, &cmd, &res, NULL, 0,
-                       NVME_QID_ANY, 0, 0);
+                       NVME_QID_ANY, 0);
  
         if (ret >= 0)
                 *val = le64_to_cpu(res.u64);
@@ -271,7 +271,7 @@ int nvmf_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val)
         cmd.prop_set.value = cpu_to_le64(val);
  
         ret = __nvme_submit_sync_cmd(ctrl->fabrics_q, &cmd, NULL, NULL, 0,
-                       NVME_QID_ANY, 0, 0);
+                       NVME_QID_ANY, 0);
         if (unlikely(ret))
                 dev_err(ctrl->device,
                         "Property Set error: %d, offset %#x\n",
@@ -450,8 +450,10 @@ int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl)
                 return -ENOMEM;
  
         ret = __nvme_submit_sync_cmd(ctrl->fabrics_q, &cmd, &res,
-                       data, sizeof(*data), NVME_QID_ANY, 1,
-                       BLK_MQ_REQ_RESERVED | BLK_MQ_REQ_NOWAIT);
+                       data, sizeof(*data), NVME_QID_ANY,
+                       NVME_SUBMIT_AT_HEAD |
+                       NVME_SUBMIT_NOWAIT |
+                       NVME_SUBMIT_RESERVED);
         if (ret) {
                 nvmf_log_connect_error(ctrl, ret, le32_to_cpu(res.u32),
                                        &cmd, data);
@@ -525,11 +527,14 @@ int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid)
                 return -ENOMEM;
  
         ret = __nvme_submit_sync_cmd(ctrl->connect_q, &cmd, &res,
-                       data, sizeof(*data), qid, 1,
-                       BLK_MQ_REQ_RESERVED | BLK_MQ_REQ_NOWAIT);
+                       data, sizeof(*data), qid,
+                       NVME_SUBMIT_AT_HEAD |
+                       NVME_SUBMIT_RESERVED |
+                       NVME_SUBMIT_NOWAIT);
         if (ret) {
                 nvmf_log_connect_error(ctrl, ret, le32_to_cpu(res.u32),
                                        &cmd, data);
+               goto out_free_data;
         }
         result = le32_to_cpu(res.u32);
         if (result & (NVME_CONNECT_AUTHREQ_ATR | NVME_CONNECT_AUTHREQ_ASCR)) {
@@ -1488,6 +1493,7 @@ static void __exit nvmf_exit(void)
  }
  
  MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("NVMe host fabrics library");
  
  module_init(nvmf_init);
  module_exit(nvmf_exit);
diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h

index fbaee5a7be196c08483a41b6673f00e5032bec10..06cc54851b1be39615cdfa6eed1a935dec472f82 100644 (file)
--- a/drivers/nvme/host/fabrics.h
+++ b/drivers/nvme/host/fabrics.h
@@ -185,9 +185,11 @@ static inline bool
  nvmf_ctlr_matches_baseopts(struct nvme_ctrl *ctrl,
                         struct nvmf_ctrl_options *opts)
  {
-       if (ctrl->state == NVME_CTRL_DELETING ||
-           ctrl->state == NVME_CTRL_DELETING_NOIO ||
-           ctrl->state == NVME_CTRL_DEAD ||
+       enum nvme_ctrl_state state = nvme_ctrl_state(ctrl);
+
+       if (state == NVME_CTRL_DELETING ||
+           state == NVME_CTRL_DELETING_NOIO ||
+           state == NVME_CTRL_DEAD ||
             strcmp(opts->subsysnqn, ctrl->opts->subsysnqn) ||
             strcmp(opts->host->nqn, ctrl->opts->host->nqn) ||
             !uuid_equal(&opts->host->id, &ctrl->opts->host->id))
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c

index 16847a316421f393cfbf410f083cd97c657f062e..68a5d971657bb5080f717f5ae1ec5645830aadd5 100644 (file)
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -221,11 +221,6 @@ static LIST_HEAD(nvme_fc_lport_list);
  static DEFINE_IDA(nvme_fc_local_port_cnt);
  static DEFINE_IDA(nvme_fc_ctrl_cnt);
  
-static struct workqueue_struct *nvme_fc_wq;
-
-static bool nvme_fc_waiting_to_unload;
-static DECLARE_COMPLETION(nvme_fc_unload_proceed);
-
  /*
   * These items are short-term. They will eventually be moved into
   * a generic FC class. See comments in module init.
@@ -255,8 +250,6 @@ nvme_fc_free_lport(struct kref *ref)
         /* remove from transport list */
         spin_lock_irqsave(&nvme_fc_lock, flags);
         list_del(&lport->port_list);
-       if (nvme_fc_waiting_to_unload && list_empty(&nvme_fc_lport_list))
-               complete(&nvme_fc_unload_proceed);
         spin_unlock_irqrestore(&nvme_fc_lock, flags);
  
         ida_free(&nvme_fc_local_port_cnt, lport->localport.port_num);
@@ -2574,6 +2567,7 @@ static enum blk_eh_timer_return nvme_fc_timeout(struct request *rq)
  {
         struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
         struct nvme_fc_ctrl *ctrl = op->ctrl;
+       u16 qnum = op->queue->qnum;
         struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu;
         struct nvme_command *sqe = &cmdiu->sqe;
  
@@ -2582,10 +2576,11 @@ static enum blk_eh_timer_return nvme_fc_timeout(struct request *rq)
          * will detect the aborted io and will fail the connection.
          */
         dev_info(ctrl->ctrl.device,
-               "NVME-FC{%d.%d}: io timeout: opcode %d fctype %d w10/11: "
+               "NVME-FC{%d.%d}: io timeout: opcode %d fctype %d (%s) w10/11: "
                 "x%08x/x%08x\n",
-               ctrl->cnum, op->queue->qnum, sqe->common.opcode,
-               sqe->connect.fctype, sqe->common.cdw10, sqe->common.cdw11);
+               ctrl->cnum, qnum, sqe->common.opcode, sqe->fabrics.fctype,
+               nvme_fabrics_opcode_str(qnum, sqe),
+               sqe->common.cdw10, sqe->common.cdw11);
         if (__nvme_fc_abort_op(ctrl, op))
                 nvme_fc_error_recovery(ctrl, "io timeout abort failed");
  
@@ -3575,8 +3570,8 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
         flush_delayed_work(&ctrl->connect_work);
  
         dev_info(ctrl->ctrl.device,
-               "NVME-FC{%d}: new ctrl: NQN \"%s\"\n",
-               ctrl->cnum, nvmf_ctrl_subsysnqn(&ctrl->ctrl));
+               "NVME-FC{%d}: new ctrl: NQN \"%s\", hostnqn: %s\n",
+               ctrl->cnum, nvmf_ctrl_subsysnqn(&ctrl->ctrl), opts->host->nqn);
  
         return &ctrl->ctrl;
  
@@ -3894,10 +3889,6 @@ static int __init nvme_fc_init_module(void)
  {
         int ret;
  
-       nvme_fc_wq = alloc_workqueue("nvme_fc_wq", WQ_MEM_RECLAIM, 0);
-       if (!nvme_fc_wq)
-               return -ENOMEM;
-
         /*
          * NOTE:
          * It is expected that in the future the kernel will combine
@@ -3915,7 +3906,7 @@ static int __init nvme_fc_init_module(void)
         ret = class_register(&fc_class);
         if (ret) {
                 pr_err("couldn't register class fc\n");
-               goto out_destroy_wq;
+               return ret;
         }
  
         /*
@@ -3939,8 +3930,6 @@ out_destroy_device:
         device_destroy(&fc_class, MKDEV(0, 0));
  out_destroy_class:
         class_unregister(&fc_class);
-out_destroy_wq:
-       destroy_workqueue(nvme_fc_wq);
  
         return ret;
  }
@@ -3960,48 +3949,27 @@ nvme_fc_delete_controllers(struct nvme_fc_rport *rport)
         spin_unlock(&rport->lock);
  }
  
-static void
-nvme_fc_cleanup_for_unload(void)
+static void __exit nvme_fc_exit_module(void)
  {
         struct nvme_fc_lport *lport;
         struct nvme_fc_rport *rport;
-
-       list_for_each_entry(lport, &nvme_fc_lport_list, port_list) {
-               list_for_each_entry(rport, &lport->endp_list, endp_list) {
-                       nvme_fc_delete_controllers(rport);
-               }
-       }
-}
-
-static void __exit nvme_fc_exit_module(void)
-{
         unsigned long flags;
-       bool need_cleanup = false;
  
         spin_lock_irqsave(&nvme_fc_lock, flags);
-       nvme_fc_waiting_to_unload = true;
-       if (!list_empty(&nvme_fc_lport_list)) {
-               need_cleanup = true;
-               nvme_fc_cleanup_for_unload();
-       }
+       list_for_each_entry(lport, &nvme_fc_lport_list, port_list)
+               list_for_each_entry(rport, &lport->endp_list, endp_list)
+                       nvme_fc_delete_controllers(rport);
         spin_unlock_irqrestore(&nvme_fc_lock, flags);
-       if (need_cleanup) {
-               pr_info("%s: waiting for ctlr deletes\n", __func__);
-               wait_for_completion(&nvme_fc_unload_proceed);
-               pr_info("%s: ctrl deletes complete\n", __func__);
-       }
+       flush_workqueue(nvme_delete_wq);
  
         nvmf_unregister_transport(&nvme_fc_transport);
  
-       ida_destroy(&nvme_fc_local_port_cnt);
-       ida_destroy(&nvme_fc_ctrl_cnt);
-
         device_destroy(&fc_class, MKDEV(0, 0));
         class_unregister(&fc_class);
-       destroy_workqueue(nvme_fc_wq);
  }
  
  module_init(nvme_fc_init_module);
  module_exit(nvme_fc_exit_module);
  
+MODULE_DESCRIPTION("NVMe host FC transport driver");
  MODULE_LICENSE("GPL v2");
diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c

index 18f5c1be5d67e50ecef131bfe5b223e4e5eda5bd..3dfd5ae99ae05e892eb793cb3b21ba0b75dd6e98 100644 (file)
--- a/drivers/nvme/host/ioctl.c
+++ b/drivers/nvme/host/ioctl.c
@@ -228,7 +228,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
         length = (io.nblocks + 1) << ns->head->lba_shift;
  
         if ((io.control & NVME_RW_PRINFO_PRACT) &&
-           ns->head->ms == sizeof(struct t10_pi_tuple)) {
+           (ns->head->ms == ns->head->pi_size)) {
                 /*
                  * Protection information is stripped/inserted by the
                  * controller.
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c

index 2dd4137a08b284df64788972a067d4282fa92ac7..74de1e64aeead77c604ec31e30bac179a0de245b 100644 (file)
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -156,7 +156,7 @@ void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl)
                 if (!ns->head->disk)
                         continue;
                 kblockd_schedule_work(&ns->head->requeue_work);
-               if (ctrl->state == NVME_CTRL_LIVE)
+               if (nvme_ctrl_state(ns->ctrl) == NVME_CTRL_LIVE)
                         disk_uevent(ns->head->disk, KOBJ_CHANGE);
         }
         up_read(&ctrl->namespaces_rwsem);
@@ -223,13 +223,14 @@ void nvme_mpath_revalidate_paths(struct nvme_ns *ns)
  
  static bool nvme_path_is_disabled(struct nvme_ns *ns)
  {
+       enum nvme_ctrl_state state = nvme_ctrl_state(ns->ctrl);
+
         /*
          * We don't treat NVME_CTRL_DELETING as a disabled path as I/O should
          * still be able to complete assuming that the controller is connected.
          * Otherwise it will fail immediately and return to the requeue list.
          */
-       if (ns->ctrl->state != NVME_CTRL_LIVE &&
-           ns->ctrl->state != NVME_CTRL_DELETING)
+       if (state != NVME_CTRL_LIVE && state != NVME_CTRL_DELETING)
                 return true;
         if (test_bit(NVME_NS_ANA_PENDING, &ns->flags) ||
             !test_bit(NVME_NS_READY, &ns->flags))
@@ -331,7 +332,7 @@ out:
  
  static inline bool nvme_path_is_optimized(struct nvme_ns *ns)
  {
-       return ns->ctrl->state == NVME_CTRL_LIVE &&
+       return nvme_ctrl_state(ns->ctrl) == NVME_CTRL_LIVE &&
                 ns->ana_state == NVME_ANA_OPTIMIZED;
  }
  
@@ -358,7 +359,7 @@ static bool nvme_available_path(struct nvme_ns_head *head)
         list_for_each_entry_rcu(ns, &head->list, siblings) {
                 if (test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ns->ctrl->flags))
                         continue;
-               switch (ns->ctrl->state) {
+               switch (nvme_ctrl_state(ns->ctrl)) {
                 case NVME_CTRL_LIVE:
                 case NVME_CTRL_RESETTING:
                 case NVME_CTRL_CONNECTING:
@@ -667,7 +668,7 @@ static void nvme_update_ns_ana_state(struct nvme_ana_group_desc *desc,
          * controller is ready.
          */
         if (nvme_state_is_live(ns->ana_state) &&
-           ns->ctrl->state == NVME_CTRL_LIVE)
+           nvme_ctrl_state(ns->ctrl) == NVME_CTRL_LIVE)
                 nvme_mpath_set_live(ns);
  }
  
@@ -748,7 +749,7 @@ static void nvme_ana_work(struct work_struct *work)
  {
         struct nvme_ctrl *ctrl = container_of(work, struct nvme_ctrl, ana_work);
  
-       if (ctrl->state != NVME_CTRL_LIVE)
+       if (nvme_ctrl_state(ctrl) != NVME_CTRL_LIVE)
                 return;
  
         nvme_read_ana_log(ctrl);
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h

index 030c8081824065e7fa3d14e1a4918f1c94080565..7b87763e2f8a69f5edef68e2e657ed417e911cb8 100644 (file)
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -263,6 +263,7 @@ enum nvme_ctrl_flags {
  struct nvme_ctrl {
         bool comp_seen;
         bool identified;
+       bool passthru_err_log_enabled;
         enum nvme_ctrl_state state;
         spinlock_t lock;
         struct mutex scan_lock;
@@ -454,6 +455,7 @@ struct nvme_ns_head {
         struct list_head        entry;
         struct kref             ref;
         bool                    shared;
+       bool                    passthru_err_log_enabled;
         int                     instance;
         struct nvme_effects_log *effects;
         u64                     nuse;
@@ -522,7 +524,6 @@ struct nvme_ns {
         struct device           cdev_device;
  
         struct nvme_fault_inject fault_inject;
-
  };
  
  /* NVMe ns supports metadata actions by the controller (generate/strip) */
@@ -805,17 +806,18 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req);
  blk_status_t nvme_fail_nonready_command(struct nvme_ctrl *ctrl,
                 struct request *req);
  bool __nvme_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
-               bool queue_live);
+               bool queue_live, enum nvme_ctrl_state state);
  
  static inline bool nvme_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
                 bool queue_live)
  {
-       if (likely(ctrl->state == NVME_CTRL_LIVE))
+       enum nvme_ctrl_state state = nvme_ctrl_state(ctrl);
+
+       if (likely(state == NVME_CTRL_LIVE))
                 return true;
-       if (ctrl->ops->flags & NVME_F_FABRICS &&
-           ctrl->state == NVME_CTRL_DELETING)
+       if (ctrl->ops->flags & NVME_F_FABRICS && state == NVME_CTRL_DELETING)
                 return queue_live;
-       return __nvme_check_ready(ctrl, rq, queue_live);
+       return __nvme_check_ready(ctrl, rq, queue_live, state);
  }
  
  /*
@@ -836,12 +838,27 @@ static inline bool nvme_is_unique_nsid(struct nvme_ctrl *ctrl,
                 (ctrl->ctratt & NVME_CTRL_CTRATT_NVM_SETS);
  }
  
+/*
+ * Flags for __nvme_submit_sync_cmd()
+ */
+typedef __u32 __bitwise nvme_submit_flags_t;
+
+enum {
+       /* Insert request at the head of the queue */
+       NVME_SUBMIT_AT_HEAD  = (__force nvme_submit_flags_t)(1 << 0),
+       /* Set BLK_MQ_REQ_NOWAIT when allocating request */
+       NVME_SUBMIT_NOWAIT = (__force nvme_submit_flags_t)(1 << 1),
+       /* Set BLK_MQ_REQ_RESERVED when allocating request */
+       NVME_SUBMIT_RESERVED = (__force nvme_submit_flags_t)(1 << 2),
+       /* Retry command when NVME_SC_DNR is not set in the result */
+       NVME_SUBMIT_RETRY = (__force nvme_submit_flags_t)(1 << 3),
+};
+
  int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
                 void *buf, unsigned bufflen);
  int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
                 union nvme_result *result, void *buffer, unsigned bufflen,
-               int qid, int at_head,
-               blk_mq_req_flags_t flags);
+               int qid, nvme_submit_flags_t flags);
  int nvme_set_features(struct nvme_ctrl *dev, unsigned int fid,
                       unsigned int dword11, void *buffer, size_t buflen,
                       u32 *result);
@@ -1124,35 +1141,42 @@ static inline bool nvme_multi_css(struct nvme_ctrl *ctrl)
  }
  
  #ifdef CONFIG_NVME_VERBOSE_ERRORS
-const unsigned char *nvme_get_error_status_str(u16 status);
-const unsigned char *nvme_get_opcode_str(u8 opcode);
-const unsigned char *nvme_get_admin_opcode_str(u8 opcode);
-const unsigned char *nvme_get_fabrics_opcode_str(u8 opcode);
+const char *nvme_get_error_status_str(u16 status);
+const char *nvme_get_opcode_str(u8 opcode);
+const char *nvme_get_admin_opcode_str(u8 opcode);
+const char *nvme_get_fabrics_opcode_str(u8 opcode);
  #else /* CONFIG_NVME_VERBOSE_ERRORS */
-static inline const unsigned char *nvme_get_error_status_str(u16 status)
+static inline const char *nvme_get_error_status_str(u16 status)
  {
         return "I/O Error";
  }
-static inline const unsigned char *nvme_get_opcode_str(u8 opcode)
+static inline const char *nvme_get_opcode_str(u8 opcode)
  {
         return "I/O Cmd";
  }
-static inline const unsigned char *nvme_get_admin_opcode_str(u8 opcode)
+static inline const char *nvme_get_admin_opcode_str(u8 opcode)
  {
         return "Admin Cmd";
  }
  
-static inline const unsigned char *nvme_get_fabrics_opcode_str(u8 opcode)
+static inline const char *nvme_get_fabrics_opcode_str(u8 opcode)
  {
         return "Fabrics Cmd";
  }
  #endif /* CONFIG_NVME_VERBOSE_ERRORS */
  
-static inline const unsigned char *nvme_opcode_str(int qid, u8 opcode, u8 fctype)
+static inline const char *nvme_opcode_str(int qid, u8 opcode)
  {
-       if (opcode == nvme_fabrics_command)
-               return nvme_get_fabrics_opcode_str(fctype);
         return qid ? nvme_get_opcode_str(opcode) :
                 nvme_get_admin_opcode_str(opcode);
  }
+
+static inline const char *nvme_fabrics_opcode_str(
+               int qid, const struct nvme_command *cmd)
+{
+       if (nvme_is_fabrics(cmd))
+               return nvme_get_fabrics_opcode_str(cmd->fabrics.fctype);
+
+       return nvme_opcode_str(qid, cmd->common.opcode);
+}
  #endif /* _NVME_H */
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c

index c1d6357ec98a0107acacdae47024c3110b3cfb9f..e6267a6aa3801e5d76e7d1dc4a509ba0e9fc0159 100644 (file)
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1349,7 +1349,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)
                 dev_warn(dev->ctrl.device,
                          "I/O tag %d (%04x) opcode %#x (%s) QID %d timeout, reset controller\n",
                          req->tag, nvme_cid(req), opcode,
-                        nvme_opcode_str(nvmeq->qid, opcode, 0), nvmeq->qid);
+                        nvme_opcode_str(nvmeq->qid, opcode), nvmeq->qid);
                 nvme_req(req)->flags |= NVME_REQ_CANCELLED;
                 goto disable;
         }
@@ -3543,5 +3543,6 @@ static void __exit nvme_exit(void)
  MODULE_AUTHOR("Matthew Wilcox <willy@linux.intel.com>");
  MODULE_LICENSE("GPL");
  MODULE_VERSION("1.0");
+MODULE_DESCRIPTION("NVMe host PCIe transport driver");
  module_init(nvme_init);
  module_exit(nvme_exit);
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c

index 11dde0d830442df31c74499655e86566ab995a66..20fdd40b1879f5796ab768acade2096eefde9e93 100644 (file)
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -1410,6 +1410,8 @@ static int nvme_rdma_map_sg_pi(struct nvme_rdma_queue *queue,
         struct nvme_ns *ns = rq->q->queuedata;
         struct bio *bio = rq->bio;
         struct nvme_keyed_sgl_desc *sg = &c->common.dptr.ksgl;
+       struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
+       u32 xfer_len;
         int nr;
  
         req->mr = ib_mr_pool_get(queue->qp, &queue->qp->sig_mrs);
@@ -1422,8 +1424,7 @@ static int nvme_rdma_map_sg_pi(struct nvme_rdma_queue *queue,
         if (unlikely(nr))
                 goto mr_put;
  
-       nvme_rdma_set_sig_attrs(blk_get_integrity(bio->bi_bdev->bd_disk), c,
-                               req->mr->sig_attrs, ns->head->pi_type);
+       nvme_rdma_set_sig_attrs(bi, c, req->mr->sig_attrs, ns->head->pi_type);
         nvme_rdma_set_prot_checks(c, &req->mr->sig_attrs->check_mask);
  
         ib_update_fast_reg_key(req->mr, ib_inc_rkey(req->mr->rkey));
@@ -1441,7 +1442,11 @@ static int nvme_rdma_map_sg_pi(struct nvme_rdma_queue *queue,
                      IB_ACCESS_REMOTE_WRITE;
  
         sg->addr = cpu_to_le64(req->mr->iova);
-       put_unaligned_le24(req->mr->length, sg->length);
+       xfer_len = req->mr->length;
+       /* Check if PI is added by the HW */
+       if (!pi_count)
+               xfer_len += (xfer_len >> bi->interval_exp) * ns->head->pi_size;
+       put_unaligned_le24(xfer_len, sg->length);
         put_unaligned_le32(req->mr->rkey, sg->key);
         sg->type = NVME_KEY_SGL_FMT_DATA_DESC << 4;
  
@@ -1946,14 +1951,13 @@ static enum blk_eh_timer_return nvme_rdma_timeout(struct request *rq)
         struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
         struct nvme_rdma_queue *queue = req->queue;
         struct nvme_rdma_ctrl *ctrl = queue->ctrl;
-       u8 opcode = req->req.cmd->common.opcode;
-       u8 fctype = req->req.cmd->fabrics.fctype;
+       struct nvme_command *cmd = req->req.cmd;
         int qid = nvme_rdma_queue_idx(queue);
  
         dev_warn(ctrl->ctrl.device,
                  "I/O tag %d (%04x) opcode %#x (%s) QID %d timeout\n",
-                rq->tag, nvme_cid(rq), opcode,
-                nvme_opcode_str(qid, opcode, fctype), qid);
+                rq->tag, nvme_cid(rq), cmd->common.opcode,
+                nvme_fabrics_opcode_str(qid, cmd), qid);
  
         if (nvme_ctrl_state(&ctrl->ctrl) != NVME_CTRL_LIVE) {
                 /*
@@ -2296,8 +2300,8 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
         if (ret)
                 goto out_uninit_ctrl;
  
-       dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISpcs\n",
-               nvmf_ctrl_subsysnqn(&ctrl->ctrl), &ctrl->addr);
+       dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISpcs, hostnqn: %s\n",
+               nvmf_ctrl_subsysnqn(&ctrl->ctrl), &ctrl->addr, opts->host->nqn);
  
         mutex_lock(&nvme_rdma_ctrl_mutex);
         list_add_tail(&ctrl->list, &nvme_rdma_ctrl_list);
@@ -2400,4 +2404,5 @@ static void __exit nvme_rdma_cleanup_module(void)
  module_init(nvme_rdma_init_module);
  module_exit(nvme_rdma_cleanup_module);
  
+MODULE_DESCRIPTION("NVMe host RDMA transport driver");
  MODULE_LICENSE("GPL v2");
diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c

index 754e911110420f5f30074762c7787a88b183830a..f2832f70e7e0a861070d066bf1ee71c19fbf5ae2 100644 (file)
--- a/drivers/nvme/host/sysfs.c
+++ b/drivers/nvme/host/sysfs.c
@@ -35,6 +35,31 @@ static ssize_t nvme_sysfs_rescan(struct device *dev,
  }
  static DEVICE_ATTR(rescan_controller, S_IWUSR, NULL, nvme_sysfs_rescan);
  
+static ssize_t nvme_adm_passthru_err_log_enabled_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+
+       return sysfs_emit(buf,
+                         ctrl->passthru_err_log_enabled ? "on\n" : "off\n");
+}
+
+static ssize_t nvme_adm_passthru_err_log_enabled_store(struct device *dev,
+               struct device_attribute *attr, const char *buf, size_t count)
+{
+       struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+       bool passthru_err_log_enabled;
+       int err;
+
+       err = kstrtobool(buf, &passthru_err_log_enabled);
+       if (err)
+               return -EINVAL;
+
+       ctrl->passthru_err_log_enabled = passthru_err_log_enabled;
+
+       return count;
+}
+
  static inline struct nvme_ns_head *dev_to_ns_head(struct device *dev)
  {
         struct gendisk *disk = dev_to_disk(dev);
@@ -44,6 +69,37 @@ static inline struct nvme_ns_head *dev_to_ns_head(struct device *dev)
         return nvme_get_ns_from_dev(dev)->head;
  }
  
+static ssize_t nvme_io_passthru_err_log_enabled_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       struct nvme_ns_head *head = dev_to_ns_head(dev);
+
+       return sysfs_emit(buf, head->passthru_err_log_enabled ? "on\n" : "off\n");
+}
+
+static ssize_t nvme_io_passthru_err_log_enabled_store(struct device *dev,
+               struct device_attribute *attr, const char *buf, size_t count)
+{
+       struct nvme_ns_head *head = dev_to_ns_head(dev);
+       bool passthru_err_log_enabled;
+       int err;
+
+       err = kstrtobool(buf, &passthru_err_log_enabled);
+       if (err)
+               return -EINVAL;
+       head->passthru_err_log_enabled = passthru_err_log_enabled;
+
+       return count;
+}
+
+static struct device_attribute dev_attr_adm_passthru_err_log_enabled = \
+       __ATTR(passthru_err_log_enabled, S_IRUGO | S_IWUSR, \
+       nvme_adm_passthru_err_log_enabled_show, nvme_adm_passthru_err_log_enabled_store);
+
+static struct device_attribute dev_attr_io_passthru_err_log_enabled = \
+       __ATTR(passthru_err_log_enabled, S_IRUGO | S_IWUSR, \
+       nvme_io_passthru_err_log_enabled_show, nvme_io_passthru_err_log_enabled_store);
+
  static ssize_t wwid_show(struct device *dev, struct device_attribute *attr,
                 char *buf)
  {
@@ -208,6 +264,7 @@ static struct attribute *nvme_ns_attrs[] = {
         &dev_attr_ana_grpid.attr,
         &dev_attr_ana_state.attr,
  #endif
+       &dev_attr_io_passthru_err_log_enabled.attr,
         NULL,
  };
  
@@ -311,6 +368,7 @@ static ssize_t nvme_sysfs_show_state(struct device *dev,
                                      char *buf)
  {
         struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+       unsigned state = (unsigned)nvme_ctrl_state(ctrl);
         static const char *const state_name[] = {
                 [NVME_CTRL_NEW]         = "new",
                 [NVME_CTRL_LIVE]        = "live",
@@ -321,9 +379,8 @@ static ssize_t nvme_sysfs_show_state(struct device *dev,
                 [NVME_CTRL_DEAD]        = "dead",
         };
  
-       if ((unsigned)ctrl->state < ARRAY_SIZE(state_name) &&
-           state_name[ctrl->state])
-               return sysfs_emit(buf, "%s\n", state_name[ctrl->state]);
+       if (state < ARRAY_SIZE(state_name) && state_name[state])
+               return sysfs_emit(buf, "%s\n", state_name[state]);
  
         return sysfs_emit(buf, "unknown state\n");
  }
@@ -655,6 +712,7 @@ static struct attribute *nvme_dev_attrs[] = {
  #ifdef CONFIG_NVME_TCP_TLS
         &dev_attr_tls_key.attr,
  #endif
+       &dev_attr_adm_passthru_err_log_enabled.attr,
         NULL
  };
  
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c

index d058d990532bfcf6dd521cfa51f411f60f5913fd..a6d596e05602117ff9c38fbcb86645bda4016c59 100644 (file)
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -2428,13 +2428,13 @@ static enum blk_eh_timer_return nvme_tcp_timeout(struct request *rq)
         struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
         struct nvme_ctrl *ctrl = &req->queue->ctrl->ctrl;
         struct nvme_tcp_cmd_pdu *pdu = nvme_tcp_req_cmd_pdu(req);
-       u8 opc = pdu->cmd.common.opcode, fctype = pdu->cmd.fabrics.fctype;
+       struct nvme_command *cmd = &pdu->cmd;
         int qid = nvme_tcp_queue_id(req->queue);
  
         dev_warn(ctrl->device,
                  "I/O tag %d (%04x) type %d opcode %#x (%s) QID %d timeout\n",
-                rq->tag, nvme_cid(rq), pdu->hdr.type, opc,
-                nvme_opcode_str(qid, opc, fctype), qid);
+                rq->tag, nvme_cid(rq), pdu->hdr.type, cmd->common.opcode,
+                nvme_fabrics_opcode_str(qid, cmd), qid);
  
         if (nvme_ctrl_state(ctrl) != NVME_CTRL_LIVE) {
                 /*
@@ -2753,8 +2753,8 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev,
         if (ret)
                 goto out_uninit_ctrl;
  
-       dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISp\n",
-               nvmf_ctrl_subsysnqn(&ctrl->ctrl), &ctrl->addr);
+       dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISp, hostnqn: %s\n",
+               nvmf_ctrl_subsysnqn(&ctrl->ctrl), &ctrl->addr, opts->host->nqn);
  
         mutex_lock(&nvme_tcp_ctrl_mutex);
         list_add_tail(&ctrl->list, &nvme_tcp_ctrl_list);
@@ -2826,4 +2826,5 @@ static void __exit nvme_tcp_cleanup_module(void)
  module_init(nvme_tcp_init_module);
  module_exit(nvme_tcp_cleanup_module);
  
+MODULE_DESCRIPTION("NVMe host TCP transport driver");
  MODULE_LICENSE("GPL v2");
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c

index d26aa30f87026058fb23a1df97d10c1fe7fafbda..8658e9c08534df50c466314c6c70d18d79525324 100644 (file)
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -248,7 +248,7 @@ void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid)
                 nvmet_add_to_changed_ns_log(ctrl, cpu_to_le32(nsid));
                 if (nvmet_aen_bit_disabled(ctrl, NVME_AEN_BIT_NS_ATTR))
                         continue;
-               nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE,
+               nvmet_add_async_event(ctrl, NVME_AER_NOTICE,
                                 NVME_AER_NOTICE_NS_CHANGED,
                                 NVME_LOG_CHANGED_NS);
         }
@@ -265,7 +265,7 @@ void nvmet_send_ana_event(struct nvmet_subsys *subsys,
                         continue;
                 if (nvmet_aen_bit_disabled(ctrl, NVME_AEN_BIT_ANA_CHANGE))
                         continue;
-               nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE,
+               nvmet_add_async_event(ctrl, NVME_AER_NOTICE,
                                 NVME_AER_NOTICE_ANA, NVME_LOG_ANA);
         }
         mutex_unlock(&subsys->lock);
@@ -1705,4 +1705,5 @@ static void __exit nvmet_exit(void)
  module_init(nvmet_init);
  module_exit(nvmet_exit);
  
+MODULE_DESCRIPTION("NVMe target core framework");
  MODULE_LICENSE("GPL v2");
diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c

index 668d257fa98636dc1785e7b5f6bb6b35e8188ab9..68e82ccc0e4e38ffcb2018cce0080741a5984925 100644 (file)
--- a/drivers/nvme/target/discovery.c
+++ b/drivers/nvme/target/discovery.c
@@ -21,7 +21,7 @@ static void __nvmet_disc_changed(struct nvmet_port *port,
         if (nvmet_aen_bit_disabled(ctrl, NVME_AEN_BIT_DISC_CHANGE))
                 return;
  
-       nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE,
+       nvmet_add_async_event(ctrl, NVME_AER_NOTICE,
                               NVME_AER_NOTICE_DISC_CHANGED, NVME_LOG_DISC);
  }
  
diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c

index d8da840a1c0ed1e9c383d59c11227f7fddfe607d..9964ffe347d2ada3cca88e517d294205e77d29ab 100644 (file)
--- a/drivers/nvme/target/fabrics-cmd.c
+++ b/drivers/nvme/target/fabrics-cmd.c
@@ -209,7 +209,7 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req)
         struct nvmf_connect_command *c = &req->cmd->connect;
         struct nvmf_connect_data *d;
         struct nvmet_ctrl *ctrl = NULL;
-       u16 status = 0;
+       u16 status;
         int ret;
  
         if (!nvmet_check_transfer_len(req, sizeof(struct nvmf_connect_data)))
@@ -290,7 +290,7 @@ static void nvmet_execute_io_connect(struct nvmet_req *req)
         struct nvmf_connect_data *d;
         struct nvmet_ctrl *ctrl;
         u16 qid = le16_to_cpu(c->qid);
-       u16 status = 0;
+       u16 status;
  
         if (!nvmet_check_transfer_len(req, sizeof(struct nvmf_connect_data)))
                 return;
diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c

index bda7a3009e85127ca27f99e107d61fbf1f3995f2..fd229f310c931fbfd6c3132185f2b73c135cd633 100644 (file)
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -111,6 +111,8 @@ struct nvmet_fc_tgtport {
         struct nvmet_fc_port_entry      *pe;
         struct kref                     ref;
         u32                             max_sg_cnt;
+
+       struct work_struct              put_work;
  };
  
  struct nvmet_fc_port_entry {
@@ -145,7 +147,6 @@ struct nvmet_fc_tgt_queue {
         struct list_head                avail_defer_list;
         struct workqueue_struct         *work_q;
         struct kref                     ref;
-       struct rcu_head                 rcu;
         /* array of fcp_iods */
         struct nvmet_fc_fcp_iod         fod[] __counted_by(sqsize);
  } __aligned(sizeof(unsigned long long));
@@ -166,10 +167,9 @@ struct nvmet_fc_tgt_assoc {
         struct nvmet_fc_hostport        *hostport;
         struct nvmet_fc_ls_iod          *rcv_disconn;
         struct list_head                a_list;
-       struct nvmet_fc_tgt_queue __rcu *queues[NVMET_NR_QUEUES + 1];
+       struct nvmet_fc_tgt_queue       *queues[NVMET_NR_QUEUES + 1];
         struct kref                     ref;
         struct work_struct              del_work;
-       struct rcu_head                 rcu;
  };
  
  
@@ -249,6 +249,13 @@ static int nvmet_fc_tgt_a_get(struct nvmet_fc_tgt_assoc *assoc);
  static void nvmet_fc_tgt_q_put(struct nvmet_fc_tgt_queue *queue);
  static int nvmet_fc_tgt_q_get(struct nvmet_fc_tgt_queue *queue);
  static void nvmet_fc_tgtport_put(struct nvmet_fc_tgtport *tgtport);
+static void nvmet_fc_put_tgtport_work(struct work_struct *work)
+{
+       struct nvmet_fc_tgtport *tgtport =
+               container_of(work, struct nvmet_fc_tgtport, put_work);
+
+       nvmet_fc_tgtport_put(tgtport);
+}
  static int nvmet_fc_tgtport_get(struct nvmet_fc_tgtport *tgtport);
  static void nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport,
                                         struct nvmet_fc_fcp_iod *fod);
@@ -360,7 +367,7 @@ __nvmet_fc_finish_ls_req(struct nvmet_fc_ls_req_op *lsop)
  
         if (!lsop->req_queued) {
                 spin_unlock_irqrestore(&tgtport->lock, flags);
-               return;
+               goto out_putwork;
         }
  
         list_del(&lsop->lsreq_list);
@@ -373,7 +380,8 @@ __nvmet_fc_finish_ls_req(struct nvmet_fc_ls_req_op *lsop)
                                   (lsreq->rqstlen + lsreq->rsplen),
                                   DMA_BIDIRECTIONAL);
  
-       nvmet_fc_tgtport_put(tgtport);
+out_putwork:
+       queue_work(nvmet_wq, &tgtport->put_work);
  }
  
  static int
@@ -489,8 +497,7 @@ nvmet_fc_xmt_disconnect_assoc(struct nvmet_fc_tgt_assoc *assoc)
          * message is normal. Otherwise, send unless the hostport has
          * already been invalidated by the lldd.
          */
-       if (!tgtport->ops->ls_req || !assoc->hostport ||
-           assoc->hostport->invalid)
+       if (!tgtport->ops->ls_req || assoc->hostport->invalid)
                 return;
  
         lsop = kzalloc((sizeof(*lsop) +
@@ -802,14 +809,11 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc,
         if (!queue)
                 return NULL;
  
-       if (!nvmet_fc_tgt_a_get(assoc))
-               goto out_free_queue;
-
         queue->work_q = alloc_workqueue("ntfc%d.%d.%d", 0, 0,
                                 assoc->tgtport->fc_target_port.port_num,
                                 assoc->a_id, qid);
         if (!queue->work_q)
-               goto out_a_put;
+               goto out_free_queue;
  
         queue->qid = qid;
         queue->sqsize = sqsize;
@@ -831,15 +835,13 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc,
                 goto out_fail_iodlist;
  
         WARN_ON(assoc->queues[qid]);
-       rcu_assign_pointer(assoc->queues[qid], queue);
+       assoc->queues[qid] = queue;
  
         return queue;
  
  out_fail_iodlist:
         nvmet_fc_destroy_fcp_iodlist(assoc->tgtport, queue);
         destroy_workqueue(queue->work_q);
-out_a_put:
-       nvmet_fc_tgt_a_put(assoc);
  out_free_queue:
         kfree(queue);
         return NULL;
@@ -852,15 +854,11 @@ nvmet_fc_tgt_queue_free(struct kref *ref)
         struct nvmet_fc_tgt_queue *queue =
                 container_of(ref, struct nvmet_fc_tgt_queue, ref);
  
-       rcu_assign_pointer(queue->assoc->queues[queue->qid], NULL);
-
         nvmet_fc_destroy_fcp_iodlist(queue->assoc->tgtport, queue);
  
-       nvmet_fc_tgt_a_put(queue->assoc);
-
         destroy_workqueue(queue->work_q);
  
-       kfree_rcu(queue, rcu);
+       kfree(queue);
  }
  
  static void
@@ -969,7 +967,7 @@ nvmet_fc_find_target_queue(struct nvmet_fc_tgtport *tgtport,
         rcu_read_lock();
         list_for_each_entry_rcu(assoc, &tgtport->assoc_list, a_list) {
                 if (association_id == assoc->association_id) {
-                       queue = rcu_dereference(assoc->queues[qid]);
+                       queue = assoc->queues[qid];
                         if (queue &&
                             (!atomic_read(&queue->connected) ||
                              !nvmet_fc_tgt_q_get(queue)))
@@ -1078,8 +1076,6 @@ nvmet_fc_alloc_hostport(struct nvmet_fc_tgtport *tgtport, void *hosthandle)
                 /* new allocation not needed */
                 kfree(newhost);
                 newhost = match;
-               /* no new allocation - release reference */
-               nvmet_fc_tgtport_put(tgtport);
         } else {
                 newhost->tgtport = tgtport;
                 newhost->hosthandle = hosthandle;
@@ -1094,23 +1090,54 @@ nvmet_fc_alloc_hostport(struct nvmet_fc_tgtport *tgtport, void *hosthandle)
  }
  
  static void
-nvmet_fc_delete_assoc(struct work_struct *work)
+nvmet_fc_delete_assoc(struct nvmet_fc_tgt_assoc *assoc)
+{
+       nvmet_fc_delete_target_assoc(assoc);
+       nvmet_fc_tgt_a_put(assoc);
+}
+
+static void
+nvmet_fc_delete_assoc_work(struct work_struct *work)
  {
         struct nvmet_fc_tgt_assoc *assoc =
                 container_of(work, struct nvmet_fc_tgt_assoc, del_work);
+       struct nvmet_fc_tgtport *tgtport = assoc->tgtport;
  
-       nvmet_fc_delete_target_assoc(assoc);
-       nvmet_fc_tgt_a_put(assoc);
+       nvmet_fc_delete_assoc(assoc);
+       nvmet_fc_tgtport_put(tgtport);
+}
+
+static void
+nvmet_fc_schedule_delete_assoc(struct nvmet_fc_tgt_assoc *assoc)
+{
+       nvmet_fc_tgtport_get(assoc->tgtport);
+       queue_work(nvmet_wq, &assoc->del_work);
+}
+
+static bool
+nvmet_fc_assoc_exits(struct nvmet_fc_tgtport *tgtport, u64 association_id)
+{
+       struct nvmet_fc_tgt_assoc *a;
+
+       list_for_each_entry_rcu(a, &tgtport->assoc_list, a_list) {
+               if (association_id == a->association_id)
+                       return true;
+       }
+
+       return false;
  }
  
  static struct nvmet_fc_tgt_assoc *
  nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport, void *hosthandle)
  {
-       struct nvmet_fc_tgt_assoc *assoc, *tmpassoc;
+       struct nvmet_fc_tgt_assoc *assoc;
         unsigned long flags;
+       bool done;
         u64 ran;
         int idx;
-       bool needrandom = true;
+
+       if (!tgtport->pe)
+               return NULL;
  
         assoc = kzalloc(sizeof(*assoc), GFP_KERNEL);
         if (!assoc)
@@ -1120,43 +1147,35 @@ nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport, void *hosthandle)
         if (idx < 0)
                 goto out_free_assoc;
  
-       if (!nvmet_fc_tgtport_get(tgtport))
-               goto out_ida;
-
         assoc->hostport = nvmet_fc_alloc_hostport(tgtport, hosthandle);
         if (IS_ERR(assoc->hostport))
-               goto out_put;
+               goto out_ida;
  
         assoc->tgtport = tgtport;
         assoc->a_id = idx;
         INIT_LIST_HEAD(&assoc->a_list);
         kref_init(&assoc->ref);
-       INIT_WORK(&assoc->del_work, nvmet_fc_delete_assoc);
+       INIT_WORK(&assoc->del_work, nvmet_fc_delete_assoc_work);
         atomic_set(&assoc->terminating, 0);
  
-       while (needrandom) {
+       done = false;
+       do {
                 get_random_bytes(&ran, sizeof(ran) - BYTES_FOR_QID);
                 ran = ran << BYTES_FOR_QID_SHIFT;
  
                 spin_lock_irqsave(&tgtport->lock, flags);
-               needrandom = false;
-               list_for_each_entry(tmpassoc, &tgtport->assoc_list, a_list) {
-                       if (ran == tmpassoc->association_id) {
-                               needrandom = true;
-                               break;
-                       }
-               }
-               if (!needrandom) {
+               rcu_read_lock();
+               if (!nvmet_fc_assoc_exits(tgtport, ran)) {
                         assoc->association_id = ran;
                         list_add_tail_rcu(&assoc->a_list, &tgtport->assoc_list);
+                       done = true;
                 }
+               rcu_read_unlock();
                 spin_unlock_irqrestore(&tgtport->lock, flags);
-       }
+       } while (!done);
  
         return assoc;
  
-out_put:
-       nvmet_fc_tgtport_put(tgtport);
  out_ida:
         ida_free(&tgtport->assoc_cnt, idx);
  out_free_assoc:
@@ -1172,13 +1191,18 @@ nvmet_fc_target_assoc_free(struct kref *ref)
         struct nvmet_fc_tgtport *tgtport = assoc->tgtport;
         struct nvmet_fc_ls_iod  *oldls;
         unsigned long flags;
+       int i;
+
+       for (i = NVMET_NR_QUEUES; i >= 0; i--) {
+               if (assoc->queues[i])
+                       nvmet_fc_delete_target_queue(assoc->queues[i]);
+       }
  
         /* Send Disconnect now that all i/o has completed */
         nvmet_fc_xmt_disconnect_assoc(assoc);
  
         nvmet_fc_free_hostport(assoc->hostport);
         spin_lock_irqsave(&tgtport->lock, flags);
-       list_del_rcu(&assoc->a_list);
         oldls = assoc->rcv_disconn;
         spin_unlock_irqrestore(&tgtport->lock, flags);
         /* if pending Rcv Disconnect Association LS, send rsp now */
@@ -1188,8 +1212,7 @@ nvmet_fc_target_assoc_free(struct kref *ref)
         dev_info(tgtport->dev,
                 "{%d:%d} Association freed\n",
                 tgtport->fc_target_port.port_num, assoc->a_id);
-       kfree_rcu(assoc, rcu);
-       nvmet_fc_tgtport_put(tgtport);
+       kfree(assoc);
  }
  
  static void
@@ -1208,7 +1231,7 @@ static void
  nvmet_fc_delete_target_assoc(struct nvmet_fc_tgt_assoc *assoc)
  {
         struct nvmet_fc_tgtport *tgtport = assoc->tgtport;
-       struct nvmet_fc_tgt_queue *queue;
+       unsigned long flags;
         int i, terminating;
  
         terminating = atomic_xchg(&assoc->terminating, 1);
@@ -1217,29 +1240,21 @@ nvmet_fc_delete_target_assoc(struct nvmet_fc_tgt_assoc *assoc)
         if (terminating)
                 return;
  
+       spin_lock_irqsave(&tgtport->lock, flags);
+       list_del_rcu(&assoc->a_list);
+       spin_unlock_irqrestore(&tgtport->lock, flags);
  
-       for (i = NVMET_NR_QUEUES; i >= 0; i--) {
-               rcu_read_lock();
-               queue = rcu_dereference(assoc->queues[i]);
-               if (!queue) {
-                       rcu_read_unlock();
-                       continue;
-               }
+       synchronize_rcu();
  
-               if (!nvmet_fc_tgt_q_get(queue)) {
-                       rcu_read_unlock();
-                       continue;
-               }
-               rcu_read_unlock();
-               nvmet_fc_delete_target_queue(queue);
-               nvmet_fc_tgt_q_put(queue);
+       /* ensure all in-flight I/Os have been processed */
+       for (i = NVMET_NR_QUEUES; i >= 0; i--) {
+               if (assoc->queues[i])
+                       flush_workqueue(assoc->queues[i]->work_q);
         }
  
         dev_info(tgtport->dev,
                 "{%d:%d} Association deleted\n",
                 tgtport->fc_target_port.port_num, assoc->a_id);
-
-       nvmet_fc_tgt_a_put(assoc);
  }
  
  static struct nvmet_fc_tgt_assoc *
@@ -1415,6 +1430,7 @@ nvmet_fc_register_targetport(struct nvmet_fc_port_info *pinfo,
         kref_init(&newrec->ref);
         ida_init(&newrec->assoc_cnt);
         newrec->max_sg_cnt = template->max_sgl_segments;
+       INIT_WORK(&newrec->put_work, nvmet_fc_put_tgtport_work);
  
         ret = nvmet_fc_alloc_ls_iodlist(newrec);
         if (ret) {
@@ -1492,9 +1508,8 @@ __nvmet_fc_free_assocs(struct nvmet_fc_tgtport *tgtport)
         list_for_each_entry_rcu(assoc, &tgtport->assoc_list, a_list) {
                 if (!nvmet_fc_tgt_a_get(assoc))
                         continue;
-               if (!queue_work(nvmet_wq, &assoc->del_work))
-                       /* already deleting - release local reference */
-                       nvmet_fc_tgt_a_put(assoc);
+               nvmet_fc_schedule_delete_assoc(assoc);
+               nvmet_fc_tgt_a_put(assoc);
         }
         rcu_read_unlock();
  }
@@ -1540,16 +1555,14 @@ nvmet_fc_invalidate_host(struct nvmet_fc_target_port *target_port,
         spin_lock_irqsave(&tgtport->lock, flags);
         list_for_each_entry_safe(assoc, next,
                                 &tgtport->assoc_list, a_list) {
-               if (!assoc->hostport ||
-                   assoc->hostport->hosthandle != hosthandle)
+               if (assoc->hostport->hosthandle != hosthandle)
                         continue;
                 if (!nvmet_fc_tgt_a_get(assoc))
                         continue;
                 assoc->hostport->invalid = 1;
                 noassoc = false;
-               if (!queue_work(nvmet_wq, &assoc->del_work))
-                       /* already deleting - release local reference */
-                       nvmet_fc_tgt_a_put(assoc);
+               nvmet_fc_schedule_delete_assoc(assoc);
+               nvmet_fc_tgt_a_put(assoc);
         }
         spin_unlock_irqrestore(&tgtport->lock, flags);
  
@@ -1581,7 +1594,7 @@ nvmet_fc_delete_ctrl(struct nvmet_ctrl *ctrl)
  
                 rcu_read_lock();
                 list_for_each_entry_rcu(assoc, &tgtport->assoc_list, a_list) {
-                       queue = rcu_dereference(assoc->queues[0]);
+                       queue = assoc->queues[0];
                         if (queue && queue->nvme_sq.ctrl == ctrl) {
                                 if (nvmet_fc_tgt_a_get(assoc))
                                         found_ctrl = true;
@@ -1593,9 +1606,8 @@ nvmet_fc_delete_ctrl(struct nvmet_ctrl *ctrl)
                 nvmet_fc_tgtport_put(tgtport);
  
                 if (found_ctrl) {
-                       if (!queue_work(nvmet_wq, &assoc->del_work))
-                               /* already deleting - release local reference */
-                               nvmet_fc_tgt_a_put(assoc);
+                       nvmet_fc_schedule_delete_assoc(assoc);
+                       nvmet_fc_tgt_a_put(assoc);
                         return;
                 }
  
@@ -1625,6 +1637,8 @@ nvmet_fc_unregister_targetport(struct nvmet_fc_target_port *target_port)
         /* terminate any outstanding associations */
         __nvmet_fc_free_assocs(tgtport);
  
+       flush_workqueue(nvmet_wq);
+
         /*
          * should terminate LS's as well. However, LS's will be generated
          * at the tail end of association termination, so they likely don't
@@ -1870,9 +1884,6 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport,
                                 sizeof(struct fcnvme_ls_disconnect_assoc_acc)),
                         FCNVME_LS_DISCONNECT_ASSOC);
  
-       /* release get taken in nvmet_fc_find_target_assoc */
-       nvmet_fc_tgt_a_put(assoc);
-
         /*
          * The rules for LS response says the response cannot
          * go back until ABTS's have been sent for all outstanding
@@ -1887,8 +1898,6 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport,
         assoc->rcv_disconn = iod;
         spin_unlock_irqrestore(&tgtport->lock, flags);
  
-       nvmet_fc_delete_target_assoc(assoc);
-
         if (oldls) {
                 dev_info(tgtport->dev,
                         "{%d:%d} Multiple Disconnect Association LS's "
@@ -1904,6 +1913,9 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport,
                 nvmet_fc_xmt_ls_rsp(tgtport, oldls);
         }
  
+       nvmet_fc_schedule_delete_assoc(assoc);
+       nvmet_fc_tgt_a_put(assoc);
+
         return false;
  }
  
@@ -2540,8 +2552,9 @@ nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport,
  
         fod->req.cmd = &fod->cmdiubuf.sqe;
         fod->req.cqe = &fod->rspiubuf.cqe;
-       if (tgtport->pe)
-               fod->req.port = tgtport->pe->port;
+       if (!tgtport->pe)
+               goto transport_error;
+       fod->req.port = tgtport->pe->port;
  
         /* clear any response payload */
         memset(&fod->rspiubuf, 0, sizeof(fod->rspiubuf));
@@ -2902,6 +2915,9 @@ nvmet_fc_remove_port(struct nvmet_port *port)
  
         nvmet_fc_portentry_unbind(pe);
  
+       /* terminate any outstanding associations */
+       __nvmet_fc_free_assocs(pe->tgtport);
+
         kfree(pe);
  }
  
@@ -2933,6 +2949,9 @@ static int __init nvmet_fc_init_module(void)
  
  static void __exit nvmet_fc_exit_module(void)
  {
+       /* ensure any shutdown operation, e.g. delete ctrls have finished */
+       flush_workqueue(nvmet_wq);
+
         /* sanity check - all lports should be removed */
         if (!list_empty(&nvmet_fc_target_list))
                 pr_warn("%s: targetport list not empty\n", __func__);
@@ -2945,4 +2964,5 @@ static void __exit nvmet_fc_exit_module(void)
  module_init(nvmet_fc_init_module);
  module_exit(nvmet_fc_exit_module);
  
+MODULE_DESCRIPTION("NVMe target FC transport driver");
  MODULE_LICENSE("GPL v2");
diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c

index ead349af30f1e0c87ee0adde980aa98b5fdb0e8a..1471af250ea62267a812bdb402b05ff9099cfcb6 100644 (file)
--- a/drivers/nvme/target/fcloop.c
+++ b/drivers/nvme/target/fcloop.c
@@ -358,7 +358,7 @@ fcloop_h2t_ls_req(struct nvme_fc_local_port *localport,
         if (!rport->targetport) {
                 tls_req->status = -ECONNREFUSED;
                 spin_lock(&rport->lock);
-               list_add_tail(&rport->ls_list, &tls_req->ls_list);
+               list_add_tail(&tls_req->ls_list, &rport->ls_list);
                 spin_unlock(&rport->lock);
                 queue_work(nvmet_wq, &rport->ls_work);
                 return ret;
@@ -391,7 +391,7 @@ fcloop_h2t_xmt_ls_rsp(struct nvmet_fc_target_port *targetport,
         if (remoteport) {
                 rport = remoteport->private;
                 spin_lock(&rport->lock);
-               list_add_tail(&rport->ls_list, &tls_req->ls_list);
+               list_add_tail(&tls_req->ls_list, &rport->ls_list);
                 spin_unlock(&rport->lock);
                 queue_work(nvmet_wq, &rport->ls_work);
         }
@@ -446,7 +446,7 @@ fcloop_t2h_ls_req(struct nvmet_fc_target_port *targetport, void *hosthandle,
         if (!tport->remoteport) {
                 tls_req->status = -ECONNREFUSED;
                 spin_lock(&tport->lock);
-               list_add_tail(&tport->ls_list, &tls_req->ls_list);
+               list_add_tail(&tls_req->ls_list, &tport->ls_list);
                 spin_unlock(&tport->lock);
                 queue_work(nvmet_wq, &tport->ls_work);
                 return ret;
@@ -1650,4 +1650,5 @@ static void __exit fcloop_exit(void)
  module_init(fcloop_init);
  module_exit(fcloop_exit);
  
+MODULE_DESCRIPTION("NVMe target FC loop transport driver");
  MODULE_LICENSE("GPL v2");
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c

index 9cb434c5807514813afe91eada69c0a925daf83a..e589915ddef85cf5f67fcba50deed724b37616d3 100644 (file)
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -400,7 +400,7 @@ static void nvme_loop_shutdown_ctrl(struct nvme_loop_ctrl *ctrl)
         }
  
         nvme_quiesce_admin_queue(&ctrl->ctrl);
-       if (ctrl->ctrl.state == NVME_CTRL_LIVE)
+       if (nvme_ctrl_state(&ctrl->ctrl) == NVME_CTRL_LIVE)
                 nvme_disable_ctrl(&ctrl->ctrl, true);
  
         nvme_cancel_admin_tagset(&ctrl->ctrl);
@@ -434,8 +434,10 @@ static void nvme_loop_reset_ctrl_work(struct work_struct *work)
         nvme_loop_shutdown_ctrl(ctrl);
  
         if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
-               if (ctrl->ctrl.state != NVME_CTRL_DELETING &&
-                   ctrl->ctrl.state != NVME_CTRL_DELETING_NOIO)
+               enum nvme_ctrl_state state = nvme_ctrl_state(&ctrl->ctrl);
+
+               if (state != NVME_CTRL_DELETING &&
+                   state != NVME_CTRL_DELETING_NOIO)
                         /* state change failure for non-deleted ctrl? */
                         WARN_ON_ONCE(1);
                 return;
@@ -688,5 +690,6 @@ static void __exit nvme_loop_cleanup_module(void)
  module_init(nvme_loop_init_module);
  module_exit(nvme_loop_cleanup_module);
  
+MODULE_DESCRIPTION("NVMe target loop transport driver");
  MODULE_LICENSE("GPL v2");
  MODULE_ALIAS("nvmet-transport-254"); /* 254 == NVMF_TRTYPE_LOOP */
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c

index 667f9c04f35d538bb361f733e50c62bb7c52d9c3..3a0f2c170f4c16f6c1fa6f09c2cac948403550fb 100644 (file)
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -2104,5 +2104,6 @@ static void __exit nvmet_rdma_exit(void)
  module_init(nvmet_rdma_init);
  module_exit(nvmet_rdma_exit);
  
+MODULE_DESCRIPTION("NVMe target RDMA transport driver");
  MODULE_LICENSE("GPL v2");
  MODULE_ALIAS("nvmet-transport-1"); /* 1 == NVMF_TRTYPE_RDMA */
diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c

index 6a1e6bb80062d4753501e07cbcba43870fc00eeb..c8655fc5aa5b8aac838cb4c2e4c0a76c8ebbc174 100644 (file)
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -2216,10 +2216,12 @@ static void __exit nvmet_tcp_exit(void)
         flush_workqueue(nvmet_wq);
  
         destroy_workqueue(nvmet_tcp_wq);
+       ida_destroy(&nvmet_tcp_queue_ida);
  }
  
  module_init(nvmet_tcp_init);
  module_exit(nvmet_tcp_exit);
  
+MODULE_DESCRIPTION("NVMe target TCP transport driver");
  MODULE_LICENSE("GPL v2");
  MODULE_ALIAS("nvmet-transport-3"); /* 3 == NVMF_TRTYPE_TCP */
diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c

index 980123fb4dde05d0e5cd4e0cfe5645b24a8d55dc..eb357ac2e54a2a827ad07b9a073d3e76415a000f 100644 (file)
--- a/drivers/nvmem/core.c
+++ b/drivers/nvmem/core.c
@@ -460,8 +460,9 @@ static int nvmem_populate_sysfs_cells(struct nvmem_device *nvmem)
         list_for_each_entry(entry, &nvmem->cells, node) {
                 sysfs_bin_attr_init(&attrs[i]);
                 attrs[i].attr.name = devm_kasprintf(&nvmem->dev, GFP_KERNEL,
-                                                   "%s@%x", entry->name,
-                                                   entry->offset);
+                                                   "%s@%x,%x", entry->name,
+                                                   entry->offset,
+                                                   entry->bit_offset);
                 attrs[i].attr.mode = 0444;
                 attrs[i].size = entry->bytes;
                 attrs[i].read = &nvmem_cell_attr_read;
diff --git a/drivers/of/property.c b/drivers/of/property.c

index 641a40cf5cf34a7d0aa3bf94362a95ae58065fe1..fa8cd33be1312dc57f075cf6557270794dcc2939 100644 (file)
--- a/drivers/of/property.c
+++ b/drivers/of/property.c
@@ -763,7 +763,9 @@ struct device_node *of_graph_get_port_parent(struct device_node *node)
         /* Walk 3 levels up only if there is 'ports' node. */
         for (depth = 3; depth && node; depth--) {
                 node = of_get_next_parent(node);
-               if (depth == 2 && !of_node_name_eq(node, "ports"))
+               if (depth == 2 && !of_node_name_eq(node, "ports") &&
+                   !of_node_name_eq(node, "in-ports") &&
+                   !of_node_name_eq(node, "out-ports"))
                         break;
         }
         return node;
@@ -1063,36 +1065,6 @@ of_fwnode_device_get_match_data(const struct fwnode_handle *fwnode,
         return of_device_get_match_data(dev);
  }
  
-static struct device_node *of_get_compat_node(struct device_node *np)
-{
-       of_node_get(np);
-
-       while (np) {
-               if (!of_device_is_available(np)) {
-                       of_node_put(np);
-                       np = NULL;
-               }
-
-               if (of_property_present(np, "compatible"))
-                       break;
-
-               np = of_get_next_parent(np);
-       }
-
-       return np;
-}
-
-static struct device_node *of_get_compat_node_parent(struct device_node *np)
-{
-       struct device_node *parent, *node;
-
-       parent = of_get_parent(np);
-       node = of_get_compat_node(parent);
-       of_node_put(parent);
-
-       return node;
-}
-
  static void of_link_to_phandle(struct device_node *con_np,
                               struct device_node *sup_np)
  {
@@ -1222,10 +1194,10 @@ static struct device_node *parse_##fname(struct device_node *np,             \
   *  parse_prop.prop_name: Name of property holding a phandle value
   *  parse_prop.index: For properties holding a list of phandles, this is the
   *                   index into the list
+ * @get_con_dev: If the consumer node containing the property is never converted
+ *              to a struct device, implement this ops so fw_devlink can use it
+ *              to find the true consumer.
   * @optional: Describes whether a supplier is mandatory or not
- * @node_not_dev: The consumer node containing the property is never converted
- *               to a struct device. Instead, parse ancestor nodes for the
- *               compatible property to find a node corresponding to a device.
   *
   * Returns:
   * parse_prop() return values are
@@ -1236,15 +1208,15 @@ static struct device_node *parse_##fname(struct device_node *np,             \
  struct supplier_bindings {
         struct device_node *(*parse_prop)(struct device_node *np,
                                           const char *prop_name, int index);
+       struct device_node *(*get_con_dev)(struct device_node *np);
         bool optional;
-       bool node_not_dev;
  };
  
  DEFINE_SIMPLE_PROP(clocks, "clocks", "#clock-cells")
  DEFINE_SIMPLE_PROP(interconnects, "interconnects", "#interconnect-cells")
  DEFINE_SIMPLE_PROP(iommus, "iommus", "#iommu-cells")
  DEFINE_SIMPLE_PROP(mboxes, "mboxes", "#mbox-cells")
-DEFINE_SIMPLE_PROP(io_channels, "io-channel", "#io-channel-cells")
+DEFINE_SIMPLE_PROP(io_channels, "io-channels", "#io-channel-cells")
  DEFINE_SIMPLE_PROP(interrupt_parent, "interrupt-parent", NULL)
  DEFINE_SIMPLE_PROP(dmas, "dmas", "#dma-cells")
  DEFINE_SIMPLE_PROP(power_domains, "power-domains", "#power-domain-cells")
@@ -1262,7 +1234,6 @@ DEFINE_SIMPLE_PROP(pinctrl5, "pinctrl-5", NULL)
  DEFINE_SIMPLE_PROP(pinctrl6, "pinctrl-6", NULL)
  DEFINE_SIMPLE_PROP(pinctrl7, "pinctrl-7", NULL)
  DEFINE_SIMPLE_PROP(pinctrl8, "pinctrl-8", NULL)
-DEFINE_SIMPLE_PROP(remote_endpoint, "remote-endpoint", NULL)
  DEFINE_SIMPLE_PROP(pwms, "pwms", "#pwm-cells")
  DEFINE_SIMPLE_PROP(resets, "resets", "#reset-cells")
  DEFINE_SIMPLE_PROP(leds, "leds", NULL)
@@ -1328,6 +1299,17 @@ static struct device_node *parse_interrupts(struct device_node *np,
         return of_irq_parse_one(np, index, &sup_args) ? NULL : sup_args.np;
  }
  
+static struct device_node *parse_remote_endpoint(struct device_node *np,
+                                                const char *prop_name,
+                                                int index)
+{
+       /* Return NULL for index > 0 to signify end of remote-endpoints. */
+       if (index > 0 || strcmp(prop_name, "remote-endpoint"))
+               return NULL;
+
+       return of_graph_get_remote_port_parent(np);
+}
+
  static const struct supplier_bindings of_supplier_bindings[] = {
         { .parse_prop = parse_clocks, },
         { .parse_prop = parse_interconnects, },
@@ -1352,7 +1334,10 @@ static const struct supplier_bindings of_supplier_bindings[] = {
         { .parse_prop = parse_pinctrl6, },
         { .parse_prop = parse_pinctrl7, },
         { .parse_prop = parse_pinctrl8, },
-       { .parse_prop = parse_remote_endpoint, .node_not_dev = true, },
+       {
+               .parse_prop = parse_remote_endpoint,
+               .get_con_dev = of_graph_get_port_parent,
+       },
         { .parse_prop = parse_pwms, },
         { .parse_prop = parse_resets, },
         { .parse_prop = parse_leds, },
@@ -1403,8 +1388,8 @@ static int of_link_property(struct device_node *con_np, const char *prop_name)
                 while ((phandle = s->parse_prop(con_np, prop_name, i))) {
                         struct device_node *con_dev_np;
  
-                       con_dev_np = s->node_not_dev
-                                       ? of_get_compat_node_parent(con_np)
+                       con_dev_np = s->get_con_dev
+                                       ? s->get_con_dev(con_np)
                                         : of_node_get(con_np);
                         matched = true;
                         i++;
diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c

index cfd60e35a8992d7d1bf7ee1ea42c10b6f43a7a2e..d7593bde2d02f39c2532ae4d0be41cccaec38526 100644 (file)
--- a/drivers/of/unittest.c
+++ b/drivers/of/unittest.c
@@ -50,6 +50,12 @@ static struct unittest_results {
         failed; \
  })
  
+#ifdef CONFIG_OF_KOBJ
+#define OF_KREF_READ(NODE) kref_read(&(NODE)->kobj.kref)
+#else
+#define OF_KREF_READ(NODE) 1
+#endif
+
  /*
   * Expected message may have a message level other than KERN_INFO.
   * Print the expected message only if the current loglevel will allow
@@ -570,7 +576,7 @@ static void __init of_unittest_parse_phandle_with_args_map(void)
                         pr_err("missing testcase data\n");
                         return;
                 }
-               prefs[i] = kref_read(&p[i]->kobj.kref);
+               prefs[i] = OF_KREF_READ(p[i]);
         }
  
         rc = of_count_phandle_with_args(np, "phandle-list", "#phandle-cells");
@@ -693,9 +699,9 @@ static void __init of_unittest_parse_phandle_with_args_map(void)
         unittest(rc == -EINVAL, "expected:%i got:%i\n", -EINVAL, rc);
  
         for (i = 0; i < ARRAY_SIZE(p); ++i) {
-               unittest(prefs[i] == kref_read(&p[i]->kobj.kref),
+               unittest(prefs[i] == OF_KREF_READ(p[i]),
                          "provider%d: expected:%d got:%d\n",
-                        i, prefs[i], kref_read(&p[i]->kobj.kref));
+                        i, prefs[i], OF_KREF_READ(p[i]));
                 of_node_put(p[i]);
         }
  }
diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c

index 9c2137dae429aa26cd69bfaadb9706193946b2b8..826b5016a101022b990045fa7b68afe85be80c7a 100644 (file)
--- a/drivers/pci/bus.c
+++ b/drivers/pci/bus.c
@@ -386,21 +386,8 @@ void pci_bus_add_devices(const struct pci_bus *bus)
  }
  EXPORT_SYMBOL(pci_bus_add_devices);
  
-/** pci_walk_bus - walk devices on/under bus, calling callback.
- *  @top      bus whose devices should be walked
- *  @cb       callback to be called for each device found
- *  @userdata arbitrary pointer to be passed to callback.
- *
- *  Walk the given bus, including any bridged devices
- *  on buses under this bus.  Call the provided callback
- *  on each device found.
- *
- *  We check the return of @cb each time. If it returns anything
- *  other than 0, we break out.
- *
- */
-void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *),
-                 void *userdata)
+static void __pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *),
+                          void *userdata, bool locked)
  {
         struct pci_dev *dev;
         struct pci_bus *bus;
@@ -408,7 +395,8 @@ void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *),
         int retval;
  
         bus = top;
-       down_read(&pci_bus_sem);
+       if (!locked)
+               down_read(&pci_bus_sem);
         next = top->devices.next;
         for (;;) {
                 if (next == &bus->devices) {
@@ -431,10 +419,37 @@ void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *),
                 if (retval)
                         break;
         }
-       up_read(&pci_bus_sem);
+       if (!locked)
+               up_read(&pci_bus_sem);
+}
+
+/**
+ *  pci_walk_bus - walk devices on/under bus, calling callback.
+ *  @top: bus whose devices should be walked
+ *  @cb: callback to be called for each device found
+ *  @userdata: arbitrary pointer to be passed to callback
+ *
+ *  Walk the given bus, including any bridged devices
+ *  on buses under this bus.  Call the provided callback
+ *  on each device found.
+ *
+ *  We check the return of @cb each time. If it returns anything
+ *  other than 0, we break out.
+ */
+void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), void *userdata)
+{
+       __pci_walk_bus(top, cb, userdata, false);
  }
  EXPORT_SYMBOL_GPL(pci_walk_bus);
  
+void pci_walk_bus_locked(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), void *userdata)
+{
+       lockdep_assert_held(&pci_bus_sem);
+
+       __pci_walk_bus(top, cb, userdata, true);
+}
+EXPORT_SYMBOL_GPL(pci_walk_bus_locked);
+
  struct pci_bus *pci_bus_get(struct pci_bus *bus)
  {
         if (bus)
diff --git a/drivers/pci/controller/dwc/pcie-designware-ep.c b/drivers/pci/controller/dwc/pcie-designware-ep.c

index 5befed2dc02b70bc5f4593b6e04a173e8e7ded08..9a437cfce073c16996927af40fa0e3816b7c7b32 100644 (file)
--- a/drivers/pci/controller/dwc/pcie-designware-ep.c
+++ b/drivers/pci/controller/dwc/pcie-designware-ep.c
@@ -6,6 +6,7 @@
   * Author: Kishon Vijay Abraham I <kishon@ti.com>
   */
  
+#include <linux/align.h>
  #include <linux/bitfield.h>
  #include <linux/of.h>
  #include <linux/platform_device.h>
@@ -482,9 +483,10 @@ int dw_pcie_ep_raise_msi_irq(struct dw_pcie_ep *ep, u8 func_no,
                 reg = ep_func->msi_cap + PCI_MSI_DATA_32;
                 msg_data = dw_pcie_ep_readw_dbi(ep, func_no, reg);
         }
-       aligned_offset = msg_addr_lower & (epc->mem->window.page_size - 1);
-       msg_addr = ((u64)msg_addr_upper) << 32 |
-                       (msg_addr_lower & ~aligned_offset);
+       msg_addr = ((u64)msg_addr_upper) << 32 | msg_addr_lower;
+
+       aligned_offset = msg_addr & (epc->mem->window.page_size - 1);
+       msg_addr = ALIGN_DOWN(msg_addr, epc->mem->window.page_size);
         ret = dw_pcie_ep_map_addr(epc, func_no, 0, ep->msi_mem_phys, msg_addr,
                                   epc->mem->window.page_size);
         if (ret)
@@ -551,7 +553,7 @@ int dw_pcie_ep_raise_msix_irq(struct dw_pcie_ep *ep, u8 func_no,
         }
  
         aligned_offset = msg_addr & (epc->mem->window.page_size - 1);
-       msg_addr &= ~aligned_offset;
+       msg_addr = ALIGN_DOWN(msg_addr, epc->mem->window.page_size);
         ret = dw_pcie_ep_map_addr(epc, func_no, 0, ep->msi_mem_phys, msg_addr,
                                   epc->mem->window.page_size);
         if (ret)
diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c

index 10f2d0bb86bec008e82e6a86211161c693ae568e..2ce2a3bd932bd7e3824b69cc9450135895b7a89e 100644 (file)
--- a/drivers/pci/controller/dwc/pcie-qcom.c
+++ b/drivers/pci/controller/dwc/pcie-qcom.c
@@ -972,7 +972,7 @@ static int qcom_pcie_enable_aspm(struct pci_dev *pdev, void *userdata)
          * Downstream devices need to be in D0 state before enabling PCI PM
          * substates.
          */
-       pci_set_power_state(pdev, PCI_D0);
+       pci_set_power_state_locked(pdev, PCI_D0);
         pci_enable_link_state_locked(pdev, PCIE_LINK_STATE_ALL);
  
         return 0;
diff --git a/drivers/pci/msi/irqdomain.c b/drivers/pci/msi/irqdomain.c

index c8be056c248ded75cae622f1d8cd82bcc81e5500..cfd84a899c82d881f9ed5c446aed0c204bfd3cd4 100644 (file)
--- a/drivers/pci/msi/irqdomain.c
+++ b/drivers/pci/msi/irqdomain.c
@@ -61,7 +61,7 @@ static irq_hw_number_t pci_msi_domain_calc_hwirq(struct msi_desc *desc)
  
         return (irq_hw_number_t)desc->msi_index |
                 pci_dev_id(dev) << 11 |
-               (pci_domain_nr(dev->bus) & 0xFFFFFFFF) << 27;
+               ((irq_hw_number_t)(pci_domain_nr(dev->bus) & 0xFFFFFFFF)) << 27;
  }
  
  static void pci_msi_domain_set_desc(msi_alloc_info_t *arg,
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c

index d8f11a078924c1336326456b0e3f37f7b0e66df9..c3585229c12a2145401d675ff84c20288b8f158e 100644 (file)
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1354,6 +1354,7 @@ end:
  /**
   * pci_set_full_power_state - Put a PCI device into D0 and update its state
   * @dev: PCI device to power up
+ * @locked: whether pci_bus_sem is held
   *
   * Call pci_power_up() to put @dev into D0, read from its PCI_PM_CTRL register
   * to confirm the state change, restore its BARs if they might be lost and
@@ -1363,7 +1364,7 @@ end:
   * to D0, it is more efficient to use pci_power_up() directly instead of this
   * function.
   */
-static int pci_set_full_power_state(struct pci_dev *dev)
+static int pci_set_full_power_state(struct pci_dev *dev, bool locked)
  {
         u16 pmcsr;
         int ret;
@@ -1399,7 +1400,7 @@ static int pci_set_full_power_state(struct pci_dev *dev)
         }
  
         if (dev->bus->self)
-               pcie_aspm_pm_state_change(dev->bus->self);
+               pcie_aspm_pm_state_change(dev->bus->self, locked);
  
         return 0;
  }
@@ -1428,10 +1429,22 @@ void pci_bus_set_current_state(struct pci_bus *bus, pci_power_t state)
                 pci_walk_bus(bus, __pci_dev_set_current_state, &state);
  }
  
+static void __pci_bus_set_current_state(struct pci_bus *bus, pci_power_t state, bool locked)
+{
+       if (!bus)
+               return;
+
+       if (locked)
+               pci_walk_bus_locked(bus, __pci_dev_set_current_state, &state);
+       else
+               pci_walk_bus(bus, __pci_dev_set_current_state, &state);
+}
+
  /**
   * pci_set_low_power_state - Put a PCI device into a low-power state.
   * @dev: PCI device to handle.
   * @state: PCI power state (D1, D2, D3hot) to put the device into.
+ * @locked: whether pci_bus_sem is held
   *
   * Use the device's PCI_PM_CTRL register to put it into a low-power state.
   *
@@ -1442,7 +1455,7 @@ void pci_bus_set_current_state(struct pci_bus *bus, pci_power_t state)
   * 0 if device already is in the requested state.
   * 0 if device's power state has been successfully changed.
   */
-static int pci_set_low_power_state(struct pci_dev *dev, pci_power_t state)
+static int pci_set_low_power_state(struct pci_dev *dev, pci_power_t state, bool locked)
  {
         u16 pmcsr;
  
@@ -1496,29 +1509,12 @@ static int pci_set_low_power_state(struct pci_dev *dev, pci_power_t state)
                                      pci_power_name(state));
  
         if (dev->bus->self)
-               pcie_aspm_pm_state_change(dev->bus->self);
+               pcie_aspm_pm_state_change(dev->bus->self, locked);
  
         return 0;
  }
  
-/**
- * pci_set_power_state - Set the power state of a PCI device
- * @dev: PCI device to handle.
- * @state: PCI power state (D0, D1, D2, D3hot) to put the device into.
- *
- * Transition a device to a new power state, using the platform firmware and/or
- * the device's PCI PM registers.
- *
- * RETURN VALUE:
- * -EINVAL if the requested state is invalid.
- * -EIO if device does not support PCI PM or its PM capabilities register has a
- * wrong version, or device doesn't support the requested state.
- * 0 if the transition is to D1 or D2 but D1 and D2 are not supported.
- * 0 if device already is in the requested state.
- * 0 if the transition is to D3 but D3 is not supported.
- * 0 if device's power state has been successfully changed.
- */
-int pci_set_power_state(struct pci_dev *dev, pci_power_t state)
+static int __pci_set_power_state(struct pci_dev *dev, pci_power_t state, bool locked)
  {
         int error;
  
@@ -1542,7 +1538,7 @@ int pci_set_power_state(struct pci_dev *dev, pci_power_t state)
                 return 0;
  
         if (state == PCI_D0)
-               return pci_set_full_power_state(dev);
+               return pci_set_full_power_state(dev, locked);
  
         /*
          * This device is quirked not to be put into D3, so don't put it in
@@ -1556,16 +1552,16 @@ int pci_set_power_state(struct pci_dev *dev, pci_power_t state)
                  * To put the device in D3cold, put it into D3hot in the native
                  * way, then put it into D3cold using platform ops.
                  */
-               error = pci_set_low_power_state(dev, PCI_D3hot);
+               error = pci_set_low_power_state(dev, PCI_D3hot, locked);
  
                 if (pci_platform_power_transition(dev, PCI_D3cold))
                         return error;
  
                 /* Powering off a bridge may power off the whole hierarchy */
                 if (dev->current_state == PCI_D3cold)
-                       pci_bus_set_current_state(dev->subordinate, PCI_D3cold);
+                       __pci_bus_set_current_state(dev->subordinate, PCI_D3cold, locked);
         } else {
-               error = pci_set_low_power_state(dev, state);
+               error = pci_set_low_power_state(dev, state, locked);
  
                 if (pci_platform_power_transition(dev, state))
                         return error;
@@ -1573,8 +1569,38 @@ int pci_set_power_state(struct pci_dev *dev, pci_power_t state)
  
         return 0;
  }
+
+/**
+ * pci_set_power_state - Set the power state of a PCI device
+ * @dev: PCI device to handle.
+ * @state: PCI power state (D0, D1, D2, D3hot) to put the device into.
+ *
+ * Transition a device to a new power state, using the platform firmware and/or
+ * the device's PCI PM registers.
+ *
+ * RETURN VALUE:
+ * -EINVAL if the requested state is invalid.
+ * -EIO if device does not support PCI PM or its PM capabilities register has a
+ * wrong version, or device doesn't support the requested state.
+ * 0 if the transition is to D1 or D2 but D1 and D2 are not supported.
+ * 0 if device already is in the requested state.
+ * 0 if the transition is to D3 but D3 is not supported.
+ * 0 if device's power state has been successfully changed.
+ */
+int pci_set_power_state(struct pci_dev *dev, pci_power_t state)
+{
+       return __pci_set_power_state(dev, state, false);
+}
  EXPORT_SYMBOL(pci_set_power_state);
  
+int pci_set_power_state_locked(struct pci_dev *dev, pci_power_t state)
+{
+       lockdep_assert_held(&pci_bus_sem);
+
+       return __pci_set_power_state(dev, state, true);
+}
+EXPORT_SYMBOL(pci_set_power_state_locked);
+
  #define PCI_EXP_SAVE_REGS      7
  
  static struct pci_cap_saved_state *_pci_find_saved_cap(struct pci_dev *pci_dev,
@@ -2496,29 +2522,36 @@ static void pci_pme_list_scan(struct work_struct *work)
                 if (pdev->pme_poll) {
                         struct pci_dev *bridge = pdev->bus->self;
                         struct device *dev = &pdev->dev;
-                       int pm_status;
+                       struct device *bdev = bridge ? &bridge->dev : NULL;
+                       int bref = 0;
  
                         /*
-                        * If bridge is in low power state, the
-                        * configuration space of subordinate devices
-                        * may be not accessible
+                        * If we have a bridge, it should be in an active/D0
+                        * state or the configuration space of subordinate
+                        * devices may not be accessible or stable over the
+                        * course of the call.
                          */
-                       if (bridge && bridge->current_state != PCI_D0)
-                               continue;
+                       if (bdev) {
+                               bref = pm_runtime_get_if_active(bdev, true);
+                               if (!bref)
+                                       continue;
+
+                               if (bridge->current_state != PCI_D0)
+                                       goto put_bridge;
+                       }
  
                         /*
-                        * If the device is in a low power state it
-                        * should not be polled either.
+                        * The device itself should be suspended but config
+                        * space must be accessible, therefore it cannot be in
+                        * D3cold.
                          */
-                       pm_status = pm_runtime_get_if_active(dev, true);
-                       if (!pm_status)
-                               continue;
-
-                       if (pdev->current_state != PCI_D3cold)
+                       if (pm_runtime_suspended(dev) &&
+                           pdev->current_state != PCI_D3cold)
                                 pci_pme_wakeup(pdev, NULL);
  
-                       if (pm_status > 0)
-                               pm_runtime_put(dev);
+put_bridge:
+                       if (bref > 0)
+                               pm_runtime_put(bdev);
                 } else {
                         list_del(&pme_dev->list);
                         kfree(pme_dev);
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h

index 2336a8d1edab27646220794a3a4cdd085ba7b3e9..e9750b1b19bad5bfc500909f390f1d890f5eab73 100644 (file)
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -571,12 +571,12 @@ int pcie_retrain_link(struct pci_dev *pdev, bool use_lt);
  #ifdef CONFIG_PCIEASPM
  void pcie_aspm_init_link_state(struct pci_dev *pdev);
  void pcie_aspm_exit_link_state(struct pci_dev *pdev);
-void pcie_aspm_pm_state_change(struct pci_dev *pdev);
+void pcie_aspm_pm_state_change(struct pci_dev *pdev, bool locked);
  void pcie_aspm_powersave_config_link(struct pci_dev *pdev);
  #else
  static inline void pcie_aspm_init_link_state(struct pci_dev *pdev) { }
  static inline void pcie_aspm_exit_link_state(struct pci_dev *pdev) { }
-static inline void pcie_aspm_pm_state_change(struct pci_dev *pdev) { }
+static inline void pcie_aspm_pm_state_change(struct pci_dev *pdev, bool locked) { }
  static inline void pcie_aspm_powersave_config_link(struct pci_dev *pdev) { }
  #endif
  
diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c

index 5a0066ecc3c5adcc97e14f08f166c783f254f6e9..bc0bd86695ec62a2d43428b69eb562f771334bb3 100644 (file)
--- a/drivers/pci/pcie/aspm.c
+++ b/drivers/pci/pcie/aspm.c
@@ -1003,8 +1003,11 @@ void pcie_aspm_exit_link_state(struct pci_dev *pdev)
         up_read(&pci_bus_sem);
  }
  
-/* @pdev: the root port or switch downstream port */
-void pcie_aspm_pm_state_change(struct pci_dev *pdev)
+/*
+ * @pdev: the root port or switch downstream port
+ * @locked: whether pci_bus_sem is held
+ */
+void pcie_aspm_pm_state_change(struct pci_dev *pdev, bool locked)
  {
         struct pcie_link_state *link = pdev->link_state;
  
@@ -1014,12 +1017,14 @@ void pcie_aspm_pm_state_change(struct pci_dev *pdev)
          * Devices changed PM state, we should recheck if latency
          * meets all functions' requirement
          */
-       down_read(&pci_bus_sem);
+       if (!locked)
+               down_read(&pci_bus_sem);
         mutex_lock(&aspm_lock);
         pcie_update_aspm_capable(link->root);
         pcie_config_aspm_path(link);
         mutex_unlock(&aspm_lock);
-       up_read(&pci_bus_sem);
+       if (!locked)
+               up_read(&pci_bus_sem);
  }
  
  void pcie_aspm_powersave_config_link(struct pci_dev *pdev)
diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c

index c584165b13babd946eb7fcd84300bdf68abe31af..7e3aa7e2345fa3a9d7d3b1cb9c5dd40cc15498ff 100644 (file)
--- a/drivers/perf/arm-cmn.c
+++ b/drivers/perf/arm-cmn.c
@@ -2305,6 +2305,17 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
                                 dev_dbg(cmn->dev, "ignoring external node %llx\n", reg);
                                 continue;
                         }
+                       /*
+                        * AmpereOneX erratum AC04_MESH_1 makes some XPs report a bogus
+                        * child count larger than the number of valid child pointers.
+                        * A child offset of 0 can only occur on CMN-600; otherwise it
+                        * would imply the root node being its own grandchild, which
+                        * we can safely dismiss in general.
+                        */
+                       if (reg == 0 && cmn->part != PART_CMN600) {
+                               dev_dbg(cmn->dev, "bogus child pointer?\n");
+                               continue;
+                       }
  
                         arm_cmn_init_node_info(cmn, reg & CMN_CHILD_NODE_ADDR, dn);
  
diff --git a/drivers/perf/cxl_pmu.c b/drivers/perf/cxl_pmu.c

index 365d964b0f6a6d7382455f3b07035fafe1de1fa2..308c9969642e1f149cdebd9f8aed7812adbc5f1f 100644 (file)
--- a/drivers/perf/cxl_pmu.c
+++ b/drivers/perf/cxl_pmu.c
@@ -59,7 +59,7 @@
  #define   CXL_PMU_COUNTER_CFG_EVENT_GRP_ID_IDX_MSK     GENMASK_ULL(63, 59)
  
  #define CXL_PMU_FILTER_CFG_REG(n, f)   (0x400 + 4 * ((f) + (n) * 8))
-#define   CXL_PMU_FILTER_CFG_VALUE_MSK                 GENMASK(15, 0)
+#define   CXL_PMU_FILTER_CFG_VALUE_MSK                 GENMASK(31, 0)
  
  #define CXL_PMU_COUNTER_REG(n)         (0xc00 + 8 * (n))
  
@@ -314,9 +314,9 @@ static bool cxl_pmu_config1_get_edge(struct perf_event *event)
  }
  
  /*
- * CPMU specification allows for 8 filters, each with a 16 bit value...
- * So we need to find 8x16bits to store it in.
- * As the value used for disable is 0xffff, a separate enable switch
+ * CPMU specification allows for 8 filters, each with a 32 bit value...
+ * So we need to find 8x32bits to store it in.
+ * As the value used for disable is 0xffff_ffff, a separate enable switch
   * is needed.
   */
  
@@ -419,7 +419,7 @@ static struct attribute *cxl_pmu_event_attrs[] = {
         CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_cmp,                     CXL_PMU_GID_S2M_NDR, BIT(0)),
         CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_cmps,                    CXL_PMU_GID_S2M_NDR, BIT(1)),
         CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_cmpe,                    CXL_PMU_GID_S2M_NDR, BIT(2)),
-       CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_biconflictack,           CXL_PMU_GID_S2M_NDR, BIT(3)),
+       CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_biconflictack,           CXL_PMU_GID_S2M_NDR, BIT(4)),
         /* CXL rev 3.0 Table 3-46 S2M DRS opcodes */
         CXL_PMU_EVENT_CXL_ATTR(s2m_drs_memdata,                 CXL_PMU_GID_S2M_DRS, BIT(0)),
         CXL_PMU_EVENT_CXL_ATTR(s2m_drs_memdatanxm,              CXL_PMU_GID_S2M_DRS, BIT(1)),
@@ -642,7 +642,7 @@ static void cxl_pmu_event_start(struct perf_event *event, int flags)
                 if (cxl_pmu_config1_hdm_filter_en(event))
                         cfg = cxl_pmu_config2_get_hdm_decoder(event);
                 else
-                       cfg = GENMASK(15, 0); /* No filtering if 0xFFFF_FFFF */
+                       cfg = GENMASK(31, 0); /* No filtering if 0xFFFF_FFFF */
                 writeq(cfg, base + CXL_PMU_FILTER_CFG_REG(hwc->idx, 0));
         }
  
diff --git a/drivers/perf/riscv_pmu.c b/drivers/perf/riscv_pmu.c

index 0dda70e1ef90a19017c902689f970dea684b4f4c..c78a6fd6c57f612221749d44673d47845911231f 100644 (file)
--- a/drivers/perf/riscv_pmu.c
+++ b/drivers/perf/riscv_pmu.c
@@ -150,19 +150,11 @@ u64 riscv_pmu_ctr_get_width_mask(struct perf_event *event)
         struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
         struct hw_perf_event *hwc = &event->hw;
  
-       if (!rvpmu->ctr_get_width)
-       /**
-        * If the pmu driver doesn't support counter width, set it to default
-        * maximum allowed by the specification.
-        */
-               cwidth = 63;
-       else {
-               if (hwc->idx == -1)
-                       /* Handle init case where idx is not initialized yet */
-                       cwidth = rvpmu->ctr_get_width(0);
-               else
-                       cwidth = rvpmu->ctr_get_width(hwc->idx);
-       }
+       if (hwc->idx == -1)
+               /* Handle init case where idx is not initialized yet */
+               cwidth = rvpmu->ctr_get_width(0);
+       else
+               cwidth = rvpmu->ctr_get_width(hwc->idx);
  
         return GENMASK_ULL(cwidth, 0);
  }
diff --git a/drivers/perf/riscv_pmu_legacy.c b/drivers/perf/riscv_pmu_legacy.c

index 79fdd667922e812612aae1f597714bbefa0d4899..fa0bccf4edf2ea6172c7ee72d577cb0904073ea7 100644 (file)
--- a/drivers/perf/riscv_pmu_legacy.c
+++ b/drivers/perf/riscv_pmu_legacy.c
@@ -37,6 +37,12 @@ static int pmu_legacy_event_map(struct perf_event *event, u64 *config)
         return pmu_legacy_ctr_get_idx(event);
  }
  
+/* cycle & instret are always 64 bit, one bit less according to SBI spec */
+static int pmu_legacy_ctr_get_width(int idx)
+{
+       return 63;
+}
+
  static u64 pmu_legacy_read_ctr(struct perf_event *event)
  {
         struct hw_perf_event *hwc = &event->hw;
@@ -111,12 +117,14 @@ static void pmu_legacy_init(struct riscv_pmu *pmu)
         pmu->ctr_stop = NULL;
         pmu->event_map = pmu_legacy_event_map;
         pmu->ctr_get_idx = pmu_legacy_ctr_get_idx;
-       pmu->ctr_get_width = NULL;
+       pmu->ctr_get_width = pmu_legacy_ctr_get_width;
         pmu->ctr_clear_idx = NULL;
         pmu->ctr_read = pmu_legacy_read_ctr;
         pmu->event_mapped = pmu_legacy_event_mapped;
         pmu->event_unmapped = pmu_legacy_event_unmapped;
         pmu->csr_index = pmu_legacy_csr_index;
+       pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
+       pmu->pmu.capabilities |= PERF_PMU_CAP_NO_EXCLUDE;
  
         perf_pmu_register(&pmu->pmu, "cpu", PERF_TYPE_RAW);
  }
diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c

index 16acd4dcdb96c75e07b45a3745a71842f2d7d2b8..452aab49db1e8ccc35a6bb0b76661ca7cb6fb71f 100644 (file)
--- a/drivers/perf/riscv_pmu_sbi.c
+++ b/drivers/perf/riscv_pmu_sbi.c
@@ -512,7 +512,7 @@ static void pmu_sbi_set_scounteren(void *arg)
  
         if (event->hw.idx != -1)
                 csr_write(CSR_SCOUNTEREN,
-                         csr_read(CSR_SCOUNTEREN) | (1 << pmu_sbi_csr_index(event)));
+                         csr_read(CSR_SCOUNTEREN) | BIT(pmu_sbi_csr_index(event)));
  }
  
  static void pmu_sbi_reset_scounteren(void *arg)
@@ -521,7 +521,7 @@ static void pmu_sbi_reset_scounteren(void *arg)
  
         if (event->hw.idx != -1)
                 csr_write(CSR_SCOUNTEREN,
-                         csr_read(CSR_SCOUNTEREN) & ~(1 << pmu_sbi_csr_index(event)));
+                         csr_read(CSR_SCOUNTEREN) & ~BIT(pmu_sbi_csr_index(event)));
  }
  
  static void pmu_sbi_ctr_start(struct perf_event *event, u64 ival)
@@ -731,14 +731,14 @@ static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev)
                 /* compute hardware counter index */
                 hidx = info->csr - CSR_CYCLE;
                 /* check if the corresponding bit is set in sscountovf */
-               if (!(overflow & (1 << hidx)))
+               if (!(overflow & BIT(hidx)))
                         continue;
  
                 /*
                  * Keep a track of overflowed counters so that they can be started
                  * with updated initial value.
                  */
-               overflowed_ctrs |= 1 << lidx;
+               overflowed_ctrs |= BIT(lidx);
                 hw_evt = &event->hw;
                 riscv_pmu_event_update(event);
                 perf_sample_data_init(&data, 0, hw_evt->last_period);
diff --git a/drivers/phy/freescale/phy-fsl-imx8-mipi-dphy.c b/drivers/phy/freescale/phy-fsl-imx8-mipi-dphy.c

index e625b32889bfceaef9846db42e594e971cccb54d..0928a526e2ab3692eaeb1e4abaa45e23eee4cf5b 100644 (file)
--- a/drivers/phy/freescale/phy-fsl-imx8-mipi-dphy.c
+++ b/drivers/phy/freescale/phy-fsl-imx8-mipi-dphy.c
@@ -706,7 +706,7 @@ static int mixel_dphy_probe(struct platform_device *pdev)
                         return ret;
                 }
  
-               priv->id = of_alias_get_id(np, "mipi_dphy");
+               priv->id = of_alias_get_id(np, "mipi-dphy");
                 if (priv->id < 0) {
                         dev_err(dev, "Failed to get phy node alias id: %d\n",
                                 priv->id);
diff --git a/drivers/phy/microchip/lan966x_serdes.c b/drivers/phy/microchip/lan966x_serdes.c

index c1a41b6cd29b1d8f785134627547d68e38079747..b5ac2b7995e7156b73e348814ce4e29b6f71a874 100644 (file)
--- a/drivers/phy/microchip/lan966x_serdes.c
+++ b/drivers/phy/microchip/lan966x_serdes.c
@@ -96,6 +96,8 @@ static const struct serdes_mux lan966x_serdes_muxes[] = {
         SERDES_MUX_SGMII(SERDES6G(1), 3, HSIO_HW_CFG_SD6G_1_CFG,
                          HSIO_HW_CFG_SD6G_1_CFG_SET(1)),
  
+       SERDES_MUX_SGMII(SERDES6G(2), 4, 0, 0),
+
         SERDES_MUX_RGMII(RGMII(0), 2, HSIO_HW_CFG_RGMII_0_CFG |
                          HSIO_HW_CFG_RGMII_ENA |
                          HSIO_HW_CFG_GMII_ENA,
diff --git a/drivers/phy/qualcomm/phy-qcom-eusb2-repeater.c b/drivers/phy/qualcomm/phy-qcom-eusb2-repeater.c

index a623f092b11f642bd3d35655e162a94a454bb14f..a43e20abb10d54a2ff2bbe29907f5c4597d6871d 100644 (file)
--- a/drivers/phy/qualcomm/phy-qcom-eusb2-repeater.c
+++ b/drivers/phy/qualcomm/phy-qcom-eusb2-repeater.c
@@ -37,56 +37,28 @@
  #define EUSB2_TUNE_EUSB_EQU            0x5A
  #define EUSB2_TUNE_EUSB_HS_COMP_CUR    0x5B
  
-#define QCOM_EUSB2_REPEATER_INIT_CFG(r, v)     \
-       {                                       \
-               .reg = r,                       \
-               .val = v,                       \
-       }
-
-enum reg_fields {
-       F_TUNE_EUSB_HS_COMP_CUR,
-       F_TUNE_EUSB_EQU,
-       F_TUNE_EUSB_SLEW,
-       F_TUNE_USB2_HS_COMP_CUR,
-       F_TUNE_USB2_PREEM,
-       F_TUNE_USB2_EQU,
-       F_TUNE_USB2_SLEW,
-       F_TUNE_SQUELCH_U,
-       F_TUNE_HSDISC,
-       F_TUNE_RES_FSDIF,
-       F_TUNE_IUSB2,
-       F_TUNE_USB2_CROSSOVER,
-       F_NUM_TUNE_FIELDS,
-
-       F_FORCE_VAL_5 = F_NUM_TUNE_FIELDS,
-       F_FORCE_EN_5,
-
-       F_EN_CTL1,
-
-       F_RPTR_STATUS,
-       F_NUM_FIELDS,
-};
-
-static struct reg_field eusb2_repeater_tune_reg_fields[F_NUM_FIELDS] = {
-       [F_TUNE_EUSB_HS_COMP_CUR] = REG_FIELD(EUSB2_TUNE_EUSB_HS_COMP_CUR, 0, 1),
-       [F_TUNE_EUSB_EQU] = REG_FIELD(EUSB2_TUNE_EUSB_EQU, 0, 1),
-       [F_TUNE_EUSB_SLEW] = REG_FIELD(EUSB2_TUNE_EUSB_SLEW, 0, 1),
-       [F_TUNE_USB2_HS_COMP_CUR] = REG_FIELD(EUSB2_TUNE_USB2_HS_COMP_CUR, 0, 1),
-       [F_TUNE_USB2_PREEM] = REG_FIELD(EUSB2_TUNE_USB2_PREEM, 0, 2),
-       [F_TUNE_USB2_EQU] = REG_FIELD(EUSB2_TUNE_USB2_EQU, 0, 1),
-       [F_TUNE_USB2_SLEW] = REG_FIELD(EUSB2_TUNE_USB2_SLEW, 0, 1),
-       [F_TUNE_SQUELCH_U] = REG_FIELD(EUSB2_TUNE_SQUELCH_U, 0, 2),
-       [F_TUNE_HSDISC] = REG_FIELD(EUSB2_TUNE_HSDISC, 0, 2),
-       [F_TUNE_RES_FSDIF] = REG_FIELD(EUSB2_TUNE_RES_FSDIF, 0, 2),
-       [F_TUNE_IUSB2] = REG_FIELD(EUSB2_TUNE_IUSB2, 0, 3),
-       [F_TUNE_USB2_CROSSOVER] = REG_FIELD(EUSB2_TUNE_USB2_CROSSOVER, 0, 2),
-
-       [F_FORCE_VAL_5] = REG_FIELD(EUSB2_FORCE_VAL_5, 0, 7),
-       [F_FORCE_EN_5] = REG_FIELD(EUSB2_FORCE_EN_5, 0, 7),
-
-       [F_EN_CTL1] = REG_FIELD(EUSB2_EN_CTL1, 0, 7),
-
-       [F_RPTR_STATUS] = REG_FIELD(EUSB2_RPTR_STATUS, 0, 7),
+enum eusb2_reg_layout {
+       TUNE_EUSB_HS_COMP_CUR,
+       TUNE_EUSB_EQU,
+       TUNE_EUSB_SLEW,
+       TUNE_USB2_HS_COMP_CUR,
+       TUNE_USB2_PREEM,
+       TUNE_USB2_EQU,
+       TUNE_USB2_SLEW,
+       TUNE_SQUELCH_U,
+       TUNE_HSDISC,
+       TUNE_RES_FSDIF,
+       TUNE_IUSB2,
+       TUNE_USB2_CROSSOVER,
+       NUM_TUNE_FIELDS,
+
+       FORCE_VAL_5 = NUM_TUNE_FIELDS,
+       FORCE_EN_5,
+
+       EN_CTL1,
+
+       RPTR_STATUS,
+       LAYOUT_SIZE,
  };
  
  struct eusb2_repeater_cfg {
@@ -98,10 +70,11 @@ struct eusb2_repeater_cfg {
  
  struct eusb2_repeater {
         struct device *dev;
-       struct regmap_field *regs[F_NUM_FIELDS];
+       struct regmap *regmap;
         struct phy *phy;
         struct regulator_bulk_data *vregs;
         const struct eusb2_repeater_cfg *cfg;
+       u32 base;
         enum phy_mode mode;
  };
  
@@ -109,10 +82,10 @@ static const char * const pm8550b_vreg_l[] = {
         "vdd18", "vdd3",
  };
  
-static const u32 pm8550b_init_tbl[F_NUM_TUNE_FIELDS] = {
-       [F_TUNE_IUSB2] = 0x8,
-       [F_TUNE_SQUELCH_U] = 0x3,
-       [F_TUNE_USB2_PREEM] = 0x5,
+static const u32 pm8550b_init_tbl[NUM_TUNE_FIELDS] = {
+       [TUNE_IUSB2] = 0x8,
+       [TUNE_SQUELCH_U] = 0x3,
+       [TUNE_USB2_PREEM] = 0x5,
  };
  
  static const struct eusb2_repeater_cfg pm8550b_eusb2_cfg = {
@@ -140,47 +113,42 @@ static int eusb2_repeater_init_vregs(struct eusb2_repeater *rptr)
  
  static int eusb2_repeater_init(struct phy *phy)
  {
-       struct reg_field *regfields = eusb2_repeater_tune_reg_fields;
         struct eusb2_repeater *rptr = phy_get_drvdata(phy);
         struct device_node *np = rptr->dev->of_node;
-       u32 init_tbl[F_NUM_TUNE_FIELDS] = { 0 };
-       u8 override;
+       struct regmap *regmap = rptr->regmap;
+       const u32 *init_tbl = rptr->cfg->init_tbl;
+       u8 tune_usb2_preem = init_tbl[TUNE_USB2_PREEM];
+       u8 tune_hsdisc = init_tbl[TUNE_HSDISC];
+       u8 tune_iusb2 = init_tbl[TUNE_IUSB2];
+       u32 base = rptr->base;
         u32 val;
         int ret;
-       int i;
+
+       of_property_read_u8(np, "qcom,tune-usb2-amplitude", &tune_iusb2);
+       of_property_read_u8(np, "qcom,tune-usb2-disc-thres", &tune_hsdisc);
+       of_property_read_u8(np, "qcom,tune-usb2-preem", &tune_usb2_preem);
  
         ret = regulator_bulk_enable(rptr->cfg->num_vregs, rptr->vregs);
         if (ret)
                 return ret;
  
-       regmap_field_update_bits(rptr->regs[F_EN_CTL1], EUSB2_RPTR_EN, EUSB2_RPTR_EN);
+       regmap_write(regmap, base + EUSB2_EN_CTL1, EUSB2_RPTR_EN);
  
-       for (i = 0; i < F_NUM_TUNE_FIELDS; i++) {
-               if (init_tbl[i]) {
-                       regmap_field_update_bits(rptr->regs[i], init_tbl[i], init_tbl[i]);
-               } else {
-                       /* Write 0 if there's no value set */
-                       u32 mask = GENMASK(regfields[i].msb, regfields[i].lsb);
-
-                       regmap_field_update_bits(rptr->regs[i], mask, 0);
-               }
-       }
-       memcpy(init_tbl, rptr->cfg->init_tbl, sizeof(init_tbl));
+       regmap_write(regmap, base + EUSB2_TUNE_EUSB_HS_COMP_CUR, init_tbl[TUNE_EUSB_HS_COMP_CUR]);
+       regmap_write(regmap, base + EUSB2_TUNE_EUSB_EQU, init_tbl[TUNE_EUSB_EQU]);
+       regmap_write(regmap, base + EUSB2_TUNE_EUSB_SLEW, init_tbl[TUNE_EUSB_SLEW]);
+       regmap_write(regmap, base + EUSB2_TUNE_USB2_HS_COMP_CUR, init_tbl[TUNE_USB2_HS_COMP_CUR]);
+       regmap_write(regmap, base + EUSB2_TUNE_USB2_EQU, init_tbl[TUNE_USB2_EQU]);
+       regmap_write(regmap, base + EUSB2_TUNE_USB2_SLEW, init_tbl[TUNE_USB2_SLEW]);
+       regmap_write(regmap, base + EUSB2_TUNE_SQUELCH_U, init_tbl[TUNE_SQUELCH_U]);
+       regmap_write(regmap, base + EUSB2_TUNE_RES_FSDIF, init_tbl[TUNE_RES_FSDIF]);
+       regmap_write(regmap, base + EUSB2_TUNE_USB2_CROSSOVER, init_tbl[TUNE_USB2_CROSSOVER]);
  
-       if (!of_property_read_u8(np, "qcom,tune-usb2-amplitude", &override))
-               init_tbl[F_TUNE_IUSB2] = override;
+       regmap_write(regmap, base + EUSB2_TUNE_USB2_PREEM, tune_usb2_preem);
+       regmap_write(regmap, base + EUSB2_TUNE_HSDISC, tune_hsdisc);
+       regmap_write(regmap, base + EUSB2_TUNE_IUSB2, tune_iusb2);
  
-       if (!of_property_read_u8(np, "qcom,tune-usb2-disc-thres", &override))
-               init_tbl[F_TUNE_HSDISC] = override;
-
-       if (!of_property_read_u8(np, "qcom,tune-usb2-preem", &override))
-               init_tbl[F_TUNE_USB2_PREEM] = override;
-
-       for (i = 0; i < F_NUM_TUNE_FIELDS; i++)
-               regmap_field_update_bits(rptr->regs[i], init_tbl[i], init_tbl[i]);
-
-       ret = regmap_field_read_poll_timeout(rptr->regs[F_RPTR_STATUS],
-                                            val, val & RPTR_OK, 10, 5);
+       ret = regmap_read_poll_timeout(regmap, base + EUSB2_RPTR_STATUS, val, val & RPTR_OK, 10, 5);
         if (ret)
                 dev_err(rptr->dev, "initialization timed-out\n");
  
@@ -191,6 +159,8 @@ static int eusb2_repeater_set_mode(struct phy *phy,
                                    enum phy_mode mode, int submode)
  {
         struct eusb2_repeater *rptr = phy_get_drvdata(phy);
+       struct regmap *regmap = rptr->regmap;
+       u32 base = rptr->base;
  
         switch (mode) {
         case PHY_MODE_USB_HOST:
@@ -199,10 +169,8 @@ static int eusb2_repeater_set_mode(struct phy *phy,
                  * per eUSB 1.2 Spec. Below implement software workaround until
                  * PHY and controller is fixing seen observation.
                  */
-               regmap_field_update_bits(rptr->regs[F_FORCE_EN_5],
-                                        F_CLK_19P2M_EN, F_CLK_19P2M_EN);
-               regmap_field_update_bits(rptr->regs[F_FORCE_VAL_5],
-                                        V_CLK_19P2M_EN, V_CLK_19P2M_EN);
+               regmap_write(regmap, base + EUSB2_FORCE_EN_5, F_CLK_19P2M_EN);
+               regmap_write(regmap, base + EUSB2_FORCE_VAL_5, V_CLK_19P2M_EN);
                 break;
         case PHY_MODE_USB_DEVICE:
                 /*
@@ -211,10 +179,8 @@ static int eusb2_repeater_set_mode(struct phy *phy,
                  * repeater doesn't clear previous value due to shared
                  * regulators (say host <-> device mode switch).
                  */
-               regmap_field_update_bits(rptr->regs[F_FORCE_EN_5],
-                                        F_CLK_19P2M_EN, 0);
-               regmap_field_update_bits(rptr->regs[F_FORCE_VAL_5],
-                                        V_CLK_19P2M_EN, 0);
+               regmap_write(regmap, base + EUSB2_FORCE_EN_5, 0);
+               regmap_write(regmap, base + EUSB2_FORCE_VAL_5, 0);
                 break;
         default:
                 return -EINVAL;
@@ -243,9 +209,8 @@ static int eusb2_repeater_probe(struct platform_device *pdev)
         struct device *dev = &pdev->dev;
         struct phy_provider *phy_provider;
         struct device_node *np = dev->of_node;
-       struct regmap *regmap;
-       int i, ret;
         u32 res;
+       int ret;
  
         rptr = devm_kzalloc(dev, sizeof(*rptr), GFP_KERNEL);
         if (!rptr)
@@ -258,22 +223,15 @@ static int eusb2_repeater_probe(struct platform_device *pdev)
         if (!rptr->cfg)
                 return -EINVAL;
  
-       regmap = dev_get_regmap(dev->parent, NULL);
-       if (!regmap)
+       rptr->regmap = dev_get_regmap(dev->parent, NULL);
+       if (!rptr->regmap)
                 return -ENODEV;
  
         ret = of_property_read_u32(np, "reg", &res);
         if (ret < 0)
                 return ret;
  
-       for (i = 0; i < F_NUM_FIELDS; i++)
-               eusb2_repeater_tune_reg_fields[i].reg += res;
-
-       ret = devm_regmap_field_bulk_alloc(dev, regmap, rptr->regs,
-                                          eusb2_repeater_tune_reg_fields,
-                                          F_NUM_FIELDS);
-       if (ret)
-               return ret;
+       rptr->base = res;
  
         ret = eusb2_repeater_init_vregs(rptr);
         if (ret < 0) {
diff --git a/drivers/phy/qualcomm/phy-qcom-m31.c b/drivers/phy/qualcomm/phy-qcom-m31.c

index c2590579190a935d76abc9cde99964c9958d3d07..03fb0d4b75d744492e4646af65287f61e7927f1b 100644 (file)
--- a/drivers/phy/qualcomm/phy-qcom-m31.c
+++ b/drivers/phy/qualcomm/phy-qcom-m31.c
@@ -299,7 +299,7 @@ static int m31usb_phy_probe(struct platform_device *pdev)
  
         qphy->vreg = devm_regulator_get(dev, "vdda-phy");
         if (IS_ERR(qphy->vreg))
-               return dev_err_probe(dev, PTR_ERR(qphy->phy),
+               return dev_err_probe(dev, PTR_ERR(qphy->vreg),
                                      "failed to get vreg\n");
  
         phy_set_drvdata(qphy->phy, qphy);
diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c

index 243cc2b9a0fb6d1fadc7384a9e93f453efad6351..5c003988c35d38cead7cc6b3e1e2af04a07bdb28 100644 (file)
--- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
@@ -1556,6 +1556,14 @@ static const char * const qmp_phy_vreg_l[] = {
         "vdda-phy", "vdda-pll",
  };
  
+static const struct qmp_usb_offsets qmp_usb_offsets_v3 = {
+       .serdes         = 0,
+       .pcs            = 0x800,
+       .pcs_misc       = 0x600,
+       .tx             = 0x200,
+       .rx             = 0x400,
+};
+
  static const struct qmp_usb_offsets qmp_usb_offsets_ipq9574 = {
         .serdes         = 0,
         .pcs            = 0x800,
@@ -1564,7 +1572,7 @@ static const struct qmp_usb_offsets qmp_usb_offsets_ipq9574 = {
         .rx             = 0x400,
  };
  
-static const struct qmp_usb_offsets qmp_usb_offsets_v3 = {
+static const struct qmp_usb_offsets qmp_usb_offsets_v3_msm8996 = {
         .serdes         = 0,
         .pcs            = 0x600,
         .tx             = 0x200,
@@ -1613,6 +1621,24 @@ static const struct qmp_usb_offsets qmp_usb_offsets_v7 = {
         .rx             = 0x1000,
  };
  
+static const struct qmp_phy_cfg ipq6018_usb3phy_cfg = {
+       .lanes                  = 1,
+
+       .offsets                = &qmp_usb_offsets_v3,
+
+       .serdes_tbl             = ipq9574_usb3_serdes_tbl,
+       .serdes_tbl_num         = ARRAY_SIZE(ipq9574_usb3_serdes_tbl),
+       .tx_tbl                 = msm8996_usb3_tx_tbl,
+       .tx_tbl_num             = ARRAY_SIZE(msm8996_usb3_tx_tbl),
+       .rx_tbl                 = ipq8074_usb3_rx_tbl,
+       .rx_tbl_num             = ARRAY_SIZE(ipq8074_usb3_rx_tbl),
+       .pcs_tbl                = ipq8074_usb3_pcs_tbl,
+       .pcs_tbl_num            = ARRAY_SIZE(ipq8074_usb3_pcs_tbl),
+       .vreg_list              = qmp_phy_vreg_l,
+       .num_vregs              = ARRAY_SIZE(qmp_phy_vreg_l),
+       .regs                   = qmp_v3_usb3phy_regs_layout,
+};
+
  static const struct qmp_phy_cfg ipq8074_usb3phy_cfg = {
         .lanes                  = 1,
  
@@ -1652,7 +1678,7 @@ static const struct qmp_phy_cfg ipq9574_usb3phy_cfg = {
  static const struct qmp_phy_cfg msm8996_usb3phy_cfg = {
         .lanes                  = 1,
  
-       .offsets                = &qmp_usb_offsets_v3,
+       .offsets                = &qmp_usb_offsets_v3_msm8996,
  
         .serdes_tbl             = msm8996_usb3_serdes_tbl,
         .serdes_tbl_num         = ARRAY_SIZE(msm8996_usb3_serdes_tbl),
@@ -2563,7 +2589,7 @@ err_node_put:
  static const struct of_device_id qmp_usb_of_match_table[] = {
         {
                 .compatible = "qcom,ipq6018-qmp-usb3-phy",
-               .data = &ipq8074_usb3phy_cfg,
+               .data = &ipq6018_usb3phy_cfg,
         }, {
                 .compatible = "qcom,ipq8074-qmp-usb3-phy",
                 .data = &ipq8074_usb3phy_cfg,
diff --git a/drivers/phy/renesas/phy-rcar-gen3-usb2.c b/drivers/phy/renesas/phy-rcar-gen3-usb2.c

index e53eace7c91e372e60d0fcbb6032e2e8fd510595..6387c0d34c551c0e4e28e09af0792cee69eb2952 100644 (file)
--- a/drivers/phy/renesas/phy-rcar-gen3-usb2.c
+++ b/drivers/phy/renesas/phy-rcar-gen3-usb2.c
@@ -673,8 +673,6 @@ static int rcar_gen3_phy_usb2_probe(struct platform_device *pdev)
         channel->irq = platform_get_irq_optional(pdev, 0);
         channel->dr_mode = rcar_gen3_get_dr_mode(dev->of_node);
         if (channel->dr_mode != USB_DR_MODE_UNKNOWN) {
-               int ret;
-
                 channel->is_otg_channel = true;
                 channel->uses_otg_pins = !of_property_read_bool(dev->of_node,
                                                         "renesas,no-otg-pins");
@@ -738,8 +736,6 @@ static int rcar_gen3_phy_usb2_probe(struct platform_device *pdev)
                 ret = PTR_ERR(provider);
                 goto error;
         } else if (channel->is_otg_channel) {
-               int ret;
-
                 ret = device_create_file(dev, &dev_attr_role);
                 if (ret < 0)
                         goto error;
diff --git a/drivers/phy/ti/phy-omap-usb2.c b/drivers/phy/ti/phy-omap-usb2.c

index dd2913ac0fa28cea0cabf82c491e2ba49dfcb80e..78e19b128962a9a504986c7d0e8135da50527aa3 100644 (file)
--- a/drivers/phy/ti/phy-omap-usb2.c
+++ b/drivers/phy/ti/phy-omap-usb2.c
@@ -117,7 +117,7 @@ static int omap_usb_set_vbus(struct usb_otg *otg, bool enabled)
  {
         struct omap_usb *phy = phy_to_omapusb(otg->usb_phy);
  
-       if (!phy->comparator)
+       if (!phy->comparator || !phy->comparator->set_vbus)
                 return -ENODEV;
  
         return phy->comparator->set_vbus(phy->comparator, enabled);
@@ -127,7 +127,7 @@ static int omap_usb_start_srp(struct usb_otg *otg)
  {
         struct omap_usb *phy = phy_to_omapusb(otg->usb_phy);
  
-       if (!phy->comparator)
+       if (!phy->comparator || !phy->comparator->start_srp)
                 return -ENODEV;
  
         return phy->comparator->start_srp(phy->comparator);
diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c

index 03ecb3d1aaf60da974f32bb344203b418969064f..49f89b70dcecb4a4465b62aecded05aa3e0b19f7 100644 (file)
--- a/drivers/pinctrl/pinctrl-amd.c
+++ b/drivers/pinctrl/pinctrl-amd.c
@@ -1159,7 +1159,7 @@ static int amd_gpio_probe(struct platform_device *pdev)
         }
  
         ret = devm_request_irq(&pdev->dev, gpio_dev->irq, amd_gpio_irq_handler,
-                              IRQF_SHARED, KBUILD_MODNAME, gpio_dev);
+                              IRQF_SHARED | IRQF_ONESHOT, KBUILD_MODNAME, gpio_dev);
         if (ret)
                 goto out2;
  
diff --git a/drivers/platform/x86/amd/pmf/core.c b/drivers/platform/x86/amd/pmf/core.c

index feaa09f5b35a125c9c704a432f82c7672b7bc139..4f734e049f4a46b60b139cf38ec7c7a2e193a4f6 100644 (file)
--- a/drivers/platform/x86/amd/pmf/core.c
+++ b/drivers/platform/x86/amd/pmf/core.c
@@ -296,7 +296,8 @@ static int amd_pmf_suspend_handler(struct device *dev)
  {
         struct amd_pmf_dev *pdev = dev_get_drvdata(dev);
  
-       kfree(pdev->buf);
+       if (pdev->smart_pc_enabled)
+               cancel_delayed_work_sync(&pdev->pb_work);
  
         return 0;
  }
@@ -312,6 +313,9 @@ static int amd_pmf_resume_handler(struct device *dev)
                         return ret;
         }
  
+       if (pdev->smart_pc_enabled)
+               schedule_delayed_work(&pdev->pb_work, msecs_to_jiffies(2000));
+
         return 0;
  }
  
@@ -330,9 +334,14 @@ static void amd_pmf_init_features(struct amd_pmf_dev *dev)
                 dev_dbg(dev->dev, "SPS enabled and Platform Profiles registered\n");
         }
  
-       if (!amd_pmf_init_smart_pc(dev)) {
+       amd_pmf_init_smart_pc(dev);
+       if (dev->smart_pc_enabled) {
                 dev_dbg(dev->dev, "Smart PC Solution Enabled\n");
-       } else if (is_apmf_func_supported(dev, APMF_FUNC_AUTO_MODE)) {
+               /* If Smart PC is enabled, no need to check for other features */
+               return;
+       }
+
+       if (is_apmf_func_supported(dev, APMF_FUNC_AUTO_MODE)) {
                 amd_pmf_init_auto_mode(dev);
                 dev_dbg(dev->dev, "Auto Mode Init done\n");
         } else if (is_apmf_func_supported(dev, APMF_FUNC_DYN_SLIDER_AC) ||
@@ -351,7 +360,7 @@ static void amd_pmf_deinit_features(struct amd_pmf_dev *dev)
                 amd_pmf_deinit_sps(dev);
         }
  
-       if (!dev->smart_pc_enabled) {
+       if (dev->smart_pc_enabled) {
                 amd_pmf_deinit_smart_pc(dev);
         } else if (is_apmf_func_supported(dev, APMF_FUNC_AUTO_MODE)) {
                 amd_pmf_deinit_auto_mode(dev);
diff --git a/drivers/platform/x86/amd/pmf/pmf.h b/drivers/platform/x86/amd/pmf/pmf.h

index 16999c5b334fd44537404c56ab325aff00ede667..66cae1cca73cc16b73210e49af1c836c3da4d260 100644 (file)
--- a/drivers/platform/x86/amd/pmf/pmf.h
+++ b/drivers/platform/x86/amd/pmf/pmf.h
@@ -441,11 +441,6 @@ struct apmf_dyn_slider_output {
         struct apmf_cnqf_power_set ps[APMF_CNQF_MAX];
  } __packed;
  
-enum smart_pc_status {
-       PMF_SMART_PC_ENABLED,
-       PMF_SMART_PC_DISABLED,
-};
-
  /* Smart PC - TA internals */
  enum system_state {
         SYSTEM_STATE_S0i3,
diff --git a/drivers/platform/x86/amd/pmf/tee-if.c b/drivers/platform/x86/amd/pmf/tee-if.c

index f8c0177afb0dae60d4f67f2876ba98c6100d1ceb..8527dca9cf5604dc140ac22c5f06870bd32be296 100644 (file)
--- a/drivers/platform/x86/amd/pmf/tee-if.c
+++ b/drivers/platform/x86/amd/pmf/tee-if.c
@@ -252,15 +252,17 @@ static int amd_pmf_start_policy_engine(struct amd_pmf_dev *dev)
         cookie = readl(dev->policy_buf + POLICY_COOKIE_OFFSET);
         length = readl(dev->policy_buf + POLICY_COOKIE_LEN);
  
-       if (cookie != POLICY_SIGN_COOKIE || !length)
+       if (cookie != POLICY_SIGN_COOKIE || !length) {
+               dev_dbg(dev->dev, "cookie doesn't match\n");
                 return -EINVAL;
+       }
  
         /* Update the actual length */
         dev->policy_sz = length + 512;
         res = amd_pmf_invoke_cmd_init(dev);
         if (res == TA_PMF_TYPE_SUCCESS) {
                 /* Now its safe to announce that smart pc is enabled */
-               dev->smart_pc_enabled = PMF_SMART_PC_ENABLED;
+               dev->smart_pc_enabled = true;
                 /*
                  * Start collecting the data from TA FW after a small delay
                  * or else, we might end up getting stale values.
@@ -268,7 +270,7 @@ static int amd_pmf_start_policy_engine(struct amd_pmf_dev *dev)
                 schedule_delayed_work(&dev->pb_work, msecs_to_jiffies(pb_actions_ms * 3));
         } else {
                 dev_err(dev->dev, "ta invoke cmd init failed err: %x\n", res);
-               dev->smart_pc_enabled = PMF_SMART_PC_DISABLED;
+               dev->smart_pc_enabled = false;
                 return res;
         }
  
@@ -336,25 +338,6 @@ static void amd_pmf_remove_pb(struct amd_pmf_dev *dev) {}
  static void amd_pmf_hex_dump_pb(struct amd_pmf_dev *dev) {}
  #endif
  
-static int amd_pmf_get_bios_buffer(struct amd_pmf_dev *dev)
-{
-       dev->policy_buf = kzalloc(dev->policy_sz, GFP_KERNEL);
-       if (!dev->policy_buf)
-               return -ENOMEM;
-
-       dev->policy_base = devm_ioremap(dev->dev, dev->policy_addr, dev->policy_sz);
-       if (!dev->policy_base)
-               return -ENOMEM;
-
-       memcpy(dev->policy_buf, dev->policy_base, dev->policy_sz);
-
-       amd_pmf_hex_dump_pb(dev);
-       if (pb_side_load)
-               amd_pmf_open_pb(dev, dev->dbgfs_dir);
-
-       return amd_pmf_start_policy_engine(dev);
-}
-
  static int amd_pmf_amdtee_ta_match(struct tee_ioctl_version_data *ver, const void *data)
  {
         return ver->impl_id == TEE_IMPL_ID_AMDTEE;
@@ -453,22 +436,57 @@ int amd_pmf_init_smart_pc(struct amd_pmf_dev *dev)
                 return ret;
  
         INIT_DELAYED_WORK(&dev->pb_work, amd_pmf_invoke_cmd);
-       amd_pmf_set_dram_addr(dev, true);
-       amd_pmf_get_bios_buffer(dev);
+
+       ret = amd_pmf_set_dram_addr(dev, true);
+       if (ret)
+               goto error;
+
+       dev->policy_base = devm_ioremap(dev->dev, dev->policy_addr, dev->policy_sz);
+       if (!dev->policy_base) {
+               ret = -ENOMEM;
+               goto error;
+       }
+
+       dev->policy_buf = kzalloc(dev->policy_sz, GFP_KERNEL);
+       if (!dev->policy_buf) {
+               ret = -ENOMEM;
+               goto error;
+       }
+
+       memcpy(dev->policy_buf, dev->policy_base, dev->policy_sz);
+
+       amd_pmf_hex_dump_pb(dev);
+
         dev->prev_data = kzalloc(sizeof(*dev->prev_data), GFP_KERNEL);
         if (!dev->prev_data)
-               return -ENOMEM;
+               goto error;
+
+       ret = amd_pmf_start_policy_engine(dev);
+       if (ret)
+               goto error;
  
-       return dev->smart_pc_enabled;
+       if (pb_side_load)
+               amd_pmf_open_pb(dev, dev->dbgfs_dir);
+
+       return 0;
+
+error:
+       amd_pmf_deinit_smart_pc(dev);
+
+       return ret;
  }
  
  void amd_pmf_deinit_smart_pc(struct amd_pmf_dev *dev)
  {
-       if (pb_side_load)
+       if (pb_side_load && dev->esbin)
                 amd_pmf_remove_pb(dev);
  
+       cancel_delayed_work_sync(&dev->pb_work);
         kfree(dev->prev_data);
+       dev->prev_data = NULL;
         kfree(dev->policy_buf);
-       cancel_delayed_work_sync(&dev->pb_work);
+       dev->policy_buf = NULL;
+       kfree(dev->buf);
+       dev->buf = NULL;
         amd_pmf_tee_deinit(dev);
  }
diff --git a/drivers/platform/x86/intel/int0002_vgpio.c b/drivers/platform/x86/intel/int0002_vgpio.c

index b6708bab7c53d5afae8b4cd5d4fb07450a1c92ed..527d8fbc7cc1108da998e86d0d8dd970d9c5b179 100644 (file)
--- a/drivers/platform/x86/intel/int0002_vgpio.c
+++ b/drivers/platform/x86/intel/int0002_vgpio.c
@@ -196,7 +196,7 @@ static int int0002_probe(struct platform_device *pdev)
          * IRQs into gpiolib.
          */
         ret = devm_request_irq(dev, irq, int0002_irq,
-                              IRQF_SHARED, "INT0002", chip);
+                              IRQF_ONESHOT | IRQF_SHARED, "INT0002", chip);
         if (ret) {
                 dev_err(dev, "Error requesting IRQ %d: %d\n", irq, ret);
                 return ret;
diff --git a/drivers/platform/x86/intel/vbtn.c b/drivers/platform/x86/intel/vbtn.c

index 210b0a81b7ecbe3ec28499c3c8dbd52cbbf1c3fb..084c355c86f5fa9050ccb881a7efa6682b538773 100644 (file)
--- a/drivers/platform/x86/intel/vbtn.c
+++ b/drivers/platform/x86/intel/vbtn.c
@@ -200,9 +200,6 @@ static void notify_handler(acpi_handle handle, u32 event, void *context)
         autorelease = val && (!ke_rel || ke_rel->type == KE_IGNORE);
  
         sparse_keymap_report_event(input_dev, event, val, autorelease);
-
-       /* Some devices need this to report further events */
-       acpi_evaluate_object(handle, "VBDL", NULL, NULL);
  }
  
  /*
diff --git a/drivers/platform/x86/serdev_helpers.h b/drivers/platform/x86/serdev_helpers.h

new file mode 100644 (file)

index 0000000..bcf3a0c
--- /dev/null
+++ b/drivers/platform/x86/serdev_helpers.h
@@ -0,0 +1,80 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * In some cases UART attached devices which require an in kernel driver,
+ * e.g. UART attached Bluetooth HCIs are described in the ACPI tables
+ * by an ACPI device with a broken or missing UartSerialBusV2() resource.
+ *
+ * This causes the kernel to create a /dev/ttyS# char-device for the UART
+ * instead of creating an in kernel serdev-controller + serdev-device pair
+ * for the in kernel driver.
+ *
+ * The quirk handling in acpi_quirk_skip_serdev_enumeration() makes the kernel
+ * create a serdev-controller device for these UARTs instead of a /dev/ttyS#.
+ *
+ * Instantiating the actual serdev-device to bind to is up to pdx86 code,
+ * this header provides a helper for getting the serdev-controller device.
+ */
+#include <linux/acpi.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/printk.h>
+#include <linux/sprintf.h>
+#include <linux/string.h>
+
+static inline struct device *
+get_serdev_controller(const char *serial_ctrl_hid,
+                     const char *serial_ctrl_uid,
+                     int serial_ctrl_port,
+                     const char *serdev_ctrl_name)
+{
+       struct device *ctrl_dev, *child;
+       struct acpi_device *ctrl_adev;
+       char name[32];
+       int i;
+
+       ctrl_adev = acpi_dev_get_first_match_dev(serial_ctrl_hid, serial_ctrl_uid, -1);
+       if (!ctrl_adev) {
+               pr_err("error could not get %s/%s serial-ctrl adev\n",
+                      serial_ctrl_hid, serial_ctrl_uid);
+               return ERR_PTR(-ENODEV);
+       }
+
+       /* get_first_physical_node() returns a weak ref */
+       ctrl_dev = get_device(acpi_get_first_physical_node(ctrl_adev));
+       if (!ctrl_dev) {
+               pr_err("error could not get %s/%s serial-ctrl physical node\n",
+                      serial_ctrl_hid, serial_ctrl_uid);
+               ctrl_dev = ERR_PTR(-ENODEV);
+               goto put_ctrl_adev;
+       }
+
+       /* Walk host -> uart-ctrl -> port -> serdev-ctrl */
+       for (i = 0; i < 3; i++) {
+               switch (i) {
+               case 0:
+                       snprintf(name, sizeof(name), "%s:0", dev_name(ctrl_dev));
+                       break;
+               case 1:
+                       snprintf(name, sizeof(name), "%s.%d",
+                                dev_name(ctrl_dev), serial_ctrl_port);
+                       break;
+               case 2:
+                       strscpy(name, serdev_ctrl_name, sizeof(name));
+                       break;
+               }
+
+               child = device_find_child_by_name(ctrl_dev, name);
+               put_device(ctrl_dev);
+               if (!child) {
+                       pr_err("error could not find '%s' device\n", name);
+                       ctrl_dev = ERR_PTR(-ENODEV);
+                       goto put_ctrl_adev;
+               }
+
+               ctrl_dev = child;
+       }
+
+put_ctrl_adev:
+       acpi_dev_put(ctrl_adev);
+       return ctrl_dev;
+}
diff --git a/drivers/platform/x86/think-lmi.c b/drivers/platform/x86/think-lmi.c

index 3a396b763c4963d1f965e1d635967bd3f3d60f18..ce3e08815a8e647f2bf5578d0383dd4621d8526f 100644 (file)
--- a/drivers/platform/x86/think-lmi.c
+++ b/drivers/platform/x86/think-lmi.c
@@ -1009,7 +1009,16 @@ static ssize_t current_value_store(struct kobject *kobj,
                  * Note - this sets the variable and then the password as separate
                  * WMI calls. Function tlmi_save_bios_settings will error if the
                  * password is incorrect.
+                * Workstation's require the opcode to be set before changing the
+                * attribute.
                  */
+               if (tlmi_priv.pwd_admin->valid && tlmi_priv.pwd_admin->password[0]) {
+                       ret = tlmi_opcode_setting("WmiOpcodePasswordAdmin",
+                                                 tlmi_priv.pwd_admin->password);
+                       if (ret)
+                               goto out;
+               }
+
                 set_str = kasprintf(GFP_KERNEL, "%s,%s;", setting->display_name,
                                     new_setting);
                 if (!set_str) {
@@ -1021,17 +1030,10 @@ static ssize_t current_value_store(struct kobject *kobj,
                 if (ret)
                         goto out;
  
-               if (tlmi_priv.save_mode == TLMI_SAVE_BULK) {
+               if (tlmi_priv.save_mode == TLMI_SAVE_BULK)
                         tlmi_priv.save_required = true;
-               } else {
-                       if (tlmi_priv.pwd_admin->valid && tlmi_priv.pwd_admin->password[0]) {
-                               ret = tlmi_opcode_setting("WmiOpcodePasswordAdmin",
-                                                         tlmi_priv.pwd_admin->password);
-                               if (ret)
-                                       goto out;
-                       }
+               else
                         ret = tlmi_save_bios_settings("");
-               }
         } else { /* old non-opcode based authentication method (deprecated) */
                 if (tlmi_priv.pwd_admin->valid && tlmi_priv.pwd_admin->password[0]) {
                         auth_str = kasprintf(GFP_KERNEL, "%s,%s,%s;",
diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c

index c4895e9bc7148ae991a541508a0672a9ae0345bf..5ecd9d33250d78f3a38c7f99b8b6c5c903cdf25d 100644 (file)
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -10308,6 +10308,7 @@ static int convert_dytc_to_profile(int funcmode, int dytcmode,
                 return 0;
         default:
                 /* Unknown function */
+               pr_debug("unknown function 0x%x\n", funcmode);
                 return -EOPNOTSUPP;
         }
         return 0;
@@ -10493,8 +10494,8 @@ static void dytc_profile_refresh(void)
                 return;
  
         perfmode = (output >> DYTC_GET_MODE_BIT) & 0xF;
-       convert_dytc_to_profile(funcmode, perfmode, &profile);
-       if (profile != dytc_current_profile) {
+       err = convert_dytc_to_profile(funcmode, perfmode, &profile);
+       if (!err && profile != dytc_current_profile) {
                 dytc_current_profile = profile;
                 platform_profile_notify();
         }
diff --git a/drivers/platform/x86/touchscreen_dmi.c b/drivers/platform/x86/touchscreen_dmi.c

index 7aee5e9ff2b8dd5810f83cc0317ed329b4361d2e..975cf24ae359a882974f35762894108d4a117fb8 100644 (file)
--- a/drivers/platform/x86/touchscreen_dmi.c
+++ b/drivers/platform/x86/touchscreen_dmi.c
@@ -81,7 +81,7 @@ static const struct property_entry chuwi_hi8_air_props[] = {
  };
  
  static const struct ts_dmi_data chuwi_hi8_air_data = {
-       .acpi_name      = "MSSL1680:00",
+       .acpi_name      = "MSSL1680",
         .properties     = chuwi_hi8_air_props,
  };
  
@@ -415,18 +415,13 @@ static const struct property_entry gdix1001_upside_down_props[] = {
         { }
  };
  
-static const struct ts_dmi_data gdix1001_00_upside_down_data = {
-       .acpi_name      = "GDIX1001:00",
-       .properties     = gdix1001_upside_down_props,
-};
-
-static const struct ts_dmi_data gdix1001_01_upside_down_data = {
-       .acpi_name      = "GDIX1001:01",
+static const struct ts_dmi_data gdix1001_upside_down_data = {
+       .acpi_name      = "GDIX1001",
         .properties     = gdix1001_upside_down_props,
  };
  
-static const struct ts_dmi_data gdix1002_00_upside_down_data = {
-       .acpi_name      = "GDIX1002:00",
+static const struct ts_dmi_data gdix1002_upside_down_data = {
+       .acpi_name      = "GDIX1002",
         .properties     = gdix1001_upside_down_props,
  };
  
@@ -1412,7 +1407,7 @@ const struct dmi_system_id touchscreen_dmi_table[] = {
         },
         {
                 /* Juno Tablet */
-               .driver_data = (void *)&gdix1002_00_upside_down_data,
+               .driver_data = (void *)&gdix1002_upside_down_data,
                 .matches = {
                         DMI_MATCH(DMI_SYS_VENDOR, "Default string"),
                         /* Both product- and board-name being "Default string" is somewhat rare */
@@ -1658,7 +1653,7 @@ const struct dmi_system_id touchscreen_dmi_table[] = {
         },
         {
                 /* Teclast X89 (Android version / BIOS) */
-               .driver_data = (void *)&gdix1001_00_upside_down_data,
+               .driver_data = (void *)&gdix1001_upside_down_data,
                 .matches = {
                         DMI_MATCH(DMI_BOARD_VENDOR, "WISKY"),
                         DMI_MATCH(DMI_BOARD_NAME, "3G062i"),
@@ -1666,7 +1661,7 @@ const struct dmi_system_id touchscreen_dmi_table[] = {
         },
         {
                 /* Teclast X89 (Windows version / BIOS) */
-               .driver_data = (void *)&gdix1001_01_upside_down_data,
+               .driver_data = (void *)&gdix1001_upside_down_data,
                 .matches = {
                         /* tPAD is too generic, also match on bios date */
                         DMI_MATCH(DMI_BOARD_VENDOR, "TECLAST"),
@@ -1684,7 +1679,7 @@ const struct dmi_system_id touchscreen_dmi_table[] = {
         },
         {
                 /* Teclast X98 Pro */
-               .driver_data = (void *)&gdix1001_00_upside_down_data,
+               .driver_data = (void *)&gdix1001_upside_down_data,
                 .matches = {
                         /*
                          * Only match BIOS date, because the manufacturers
@@ -1788,7 +1783,7 @@ const struct dmi_system_id touchscreen_dmi_table[] = {
         },
         {
                 /* "WinBook TW100" */
-               .driver_data = (void *)&gdix1001_00_upside_down_data,
+               .driver_data = (void *)&gdix1001_upside_down_data,
                 .matches = {
                         DMI_MATCH(DMI_SYS_VENDOR, "WinBook"),
                         DMI_MATCH(DMI_PRODUCT_NAME, "TW100")
@@ -1796,7 +1791,7 @@ const struct dmi_system_id touchscreen_dmi_table[] = {
         },
         {
                 /* WinBook TW700 */
-               .driver_data = (void *)&gdix1001_00_upside_down_data,
+               .driver_data = (void *)&gdix1001_upside_down_data,
                 .matches = {
                         DMI_MATCH(DMI_SYS_VENDOR, "WinBook"),
                         DMI_MATCH(DMI_PRODUCT_NAME, "TW700")
@@ -1821,7 +1816,7 @@ static void ts_dmi_add_props(struct i2c_client *client)
         int error;
  
         if (has_acpi_companion(dev) &&
-           !strncmp(ts_data->acpi_name, client->name, I2C_NAME_SIZE)) {
+           strstarts(client->name, ts_data->acpi_name)) {
                 error = device_create_managed_software_node(dev, ts_data->properties, NULL);
                 if (error)
                         dev_err(dev, "failed to add properties: %d\n", error);
diff --git a/drivers/platform/x86/x86-android-tablets/core.c b/drivers/platform/x86/x86-android-tablets/core.c

index f8221a15575b327c78df4edb191fdc28c52fe2c1..a3415f1c0b5f82a3f54d4a6eb6fed54d98d5e366 100644 (file)
--- a/drivers/platform/x86/x86-android-tablets/core.c
+++ b/drivers/platform/x86/x86-android-tablets/core.c
@@ -21,6 +21,7 @@
  #include <linux/string.h>
  
  #include "x86-android-tablets.h"
+#include "../serdev_helpers.h"
  
  static struct platform_device *x86_android_tablet_device;
  
@@ -113,6 +114,9 @@ int x86_acpi_irq_helper_get(const struct x86_acpi_irq_data *data)
                 if (irq_type != IRQ_TYPE_NONE && irq_type != irq_get_trigger_type(irq))
                         irq_set_irq_type(irq, irq_type);
  
+               if (data->free_gpio)
+                       devm_gpiod_put(&x86_android_tablet_device->dev, gpiod);
+
                 return irq;
         case X86_ACPI_IRQ_TYPE_PMIC:
                 status = acpi_get_handle(NULL, data->chip, &handle);
@@ -229,38 +233,20 @@ static __init int x86_instantiate_spi_dev(const struct x86_dev_info *dev_info, i
  
  static __init int x86_instantiate_serdev(const struct x86_serdev_info *info, int idx)
  {
-       struct acpi_device *ctrl_adev, *serdev_adev;
+       struct acpi_device *serdev_adev;
         struct serdev_device *serdev;
         struct device *ctrl_dev;
         int ret = -ENODEV;
  
-       ctrl_adev = acpi_dev_get_first_match_dev(info->ctrl_hid, info->ctrl_uid, -1);
-       if (!ctrl_adev) {
-               pr_err("error could not get %s/%s ctrl adev\n",
-                      info->ctrl_hid, info->ctrl_uid);
-               return -ENODEV;
-       }
+       ctrl_dev = get_serdev_controller(info->ctrl_hid, info->ctrl_uid, 0,
+                                        info->ctrl_devname);
+       if (IS_ERR(ctrl_dev))
+               return PTR_ERR(ctrl_dev);
  
         serdev_adev = acpi_dev_get_first_match_dev(info->serdev_hid, NULL, -1);
         if (!serdev_adev) {
                 pr_err("error could not get %s serdev adev\n", info->serdev_hid);
-               goto put_ctrl_adev;
-       }
-
-       /* get_first_physical_node() returns a weak ref, no need to put() it */
-       ctrl_dev = acpi_get_first_physical_node(ctrl_adev);
-       if (!ctrl_dev)  {
-               pr_err("error could not get %s/%s ctrl physical dev\n",
-                      info->ctrl_hid, info->ctrl_uid);
-               goto put_serdev_adev;
-       }
-
-       /* ctrl_dev now points to the controller's parent, get the controller */
-       ctrl_dev = device_find_child_by_name(ctrl_dev, info->ctrl_devname);
-       if (!ctrl_dev) {
-               pr_err("error could not get %s/%s %s ctrl dev\n",
-                      info->ctrl_hid, info->ctrl_uid, info->ctrl_devname);
-               goto put_serdev_adev;
+               goto put_ctrl_dev;
         }
  
         serdev = serdev_device_alloc(to_serdev_controller(ctrl_dev));
@@ -283,8 +269,8 @@ static __init int x86_instantiate_serdev(const struct x86_serdev_info *info, int
  
  put_serdev_adev:
         acpi_dev_put(serdev_adev);
-put_ctrl_adev:
-       acpi_dev_put(ctrl_adev);
+put_ctrl_dev:
+       put_device(ctrl_dev);
         return ret;
  }
  
diff --git a/drivers/platform/x86/x86-android-tablets/lenovo.c b/drivers/platform/x86/x86-android-tablets/lenovo.c

index f1c66a61bfc52786f1a6cd49da8ee88c423adbea..c297391955adbcb9a6b076dfb8f009ae4bce2bcb 100644 (file)
--- a/drivers/platform/x86/x86-android-tablets/lenovo.c
+++ b/drivers/platform/x86/x86-android-tablets/lenovo.c
@@ -116,6 +116,7 @@ static const struct x86_i2c_client_info lenovo_yb1_x90_i2c_clients[] __initconst
                         .trigger = ACPI_EDGE_SENSITIVE,
                         .polarity = ACPI_ACTIVE_LOW,
                         .con_id = "goodix_ts_irq",
+                       .free_gpio = true,
                 },
         }, {
                 /* Wacom Digitizer in keyboard half */
diff --git a/drivers/platform/x86/x86-android-tablets/other.c b/drivers/platform/x86/x86-android-tablets/other.c

index bc6bbf7ec6ea137101394b59d38fb7471675b00c..278402dcb808c5f2b7e25a894c117177867250d0 100644 (file)
--- a/drivers/platform/x86/x86-android-tablets/other.c
+++ b/drivers/platform/x86/x86-android-tablets/other.c
@@ -68,7 +68,7 @@ static const struct x86_i2c_client_info acer_b1_750_i2c_clients[] __initconst =
         },
  };
  
-static struct gpiod_lookup_table acer_b1_750_goodix_gpios = {
+static struct gpiod_lookup_table acer_b1_750_nvt_ts_gpios = {
         .dev_id = "i2c-NVT-ts",
         .table = {
                 GPIO_LOOKUP("INT33FC:01", 26, "reset", GPIO_ACTIVE_LOW),
@@ -77,7 +77,7 @@ static struct gpiod_lookup_table acer_b1_750_goodix_gpios = {
  };
  
  static struct gpiod_lookup_table * const acer_b1_750_gpios[] = {
-       &acer_b1_750_goodix_gpios,
+       &acer_b1_750_nvt_ts_gpios,
         &int3496_reference_gpios,
         NULL
  };
diff --git a/drivers/platform/x86/x86-android-tablets/x86-android-tablets.h b/drivers/platform/x86/x86-android-tablets/x86-android-tablets.h

index 49fed9410adbadad39d397a7b541f52b13c03564..468993edfeee25bcb541daedbe6006ccc7fc44bb 100644 (file)
--- a/drivers/platform/x86/x86-android-tablets/x86-android-tablets.h
+++ b/drivers/platform/x86/x86-android-tablets/x86-android-tablets.h
@@ -39,6 +39,7 @@ struct x86_acpi_irq_data {
         int index;
         int trigger;  /* ACPI_EDGE_SENSITIVE / ACPI_LEVEL_SENSITIVE */
         int polarity; /* ACPI_ACTIVE_HIGH / ACPI_ACTIVE_LOW / ACPI_ACTIVE_BOTH */
+       bool free_gpio; /* Release GPIO after getting IRQ (for TYPE_GPIOINT) */
         const char *con_id;
  };
  
diff --git a/drivers/pmdomain/arm/scmi_perf_domain.c b/drivers/pmdomain/arm/scmi_perf_domain.c

index 709bbc448fad431d894479146982664002578584..d7ef46ccd9b8a414f8066f7fe2718f9867c89003 100644 (file)
--- a/drivers/pmdomain/arm/scmi_perf_domain.c
+++ b/drivers/pmdomain/arm/scmi_perf_domain.c
@@ -159,6 +159,9 @@ static void scmi_perf_domain_remove(struct scmi_device *sdev)
         struct genpd_onecell_data *scmi_pd_data = dev_get_drvdata(dev);
         int i;
  
+       if (!scmi_pd_data)
+               return;
+
         of_genpd_del_provider(dev->of_node);
  
         for (i = 0; i < scmi_pd_data->num_domains; i++)
diff --git a/drivers/pmdomain/core.c b/drivers/pmdomain/core.c

index a1f6cba3ae6c86a386ab68bf46866ee95663eb51..18e232b5ed53d73ab24bd4fe3dab94c69235436d 100644 (file)
--- a/drivers/pmdomain/core.c
+++ b/drivers/pmdomain/core.c
@@ -1109,7 +1109,7 @@ static int __init genpd_power_off_unused(void)
  
         return 0;
  }
-late_initcall(genpd_power_off_unused);
+late_initcall_sync(genpd_power_off_unused);
  
  #ifdef CONFIG_PM_SLEEP
  
diff --git a/drivers/pmdomain/mediatek/mtk-pm-domains.c b/drivers/pmdomain/mediatek/mtk-pm-domains.c

index e26dc17d07ad71d8398044670227c93a6bdd4427..e274e3315fe7a60887bec6a1fa85db69156e7fd6 100644 (file)
--- a/drivers/pmdomain/mediatek/mtk-pm-domains.c
+++ b/drivers/pmdomain/mediatek/mtk-pm-domains.c
@@ -561,6 +561,11 @@ static int scpsys_add_subdomain(struct scpsys *scpsys, struct device_node *paren
                         goto err_put_node;
                 }
  
+               /* recursive call to add all subdomains */
+               ret = scpsys_add_subdomain(scpsys, child);
+               if (ret)
+                       goto err_put_node;
+
                 ret = pm_genpd_add_subdomain(parent_pd, child_pd);
                 if (ret) {
                         dev_err(scpsys->dev, "failed to add %s subdomain to parent %s\n",
@@ -570,11 +575,6 @@ static int scpsys_add_subdomain(struct scpsys *scpsys, struct device_node *paren
                         dev_dbg(scpsys->dev, "%s add subdomain: %s\n", parent_pd->name,
                                 child_pd->name);
                 }
-
-               /* recursive call to add all subdomains */
-               ret = scpsys_add_subdomain(scpsys, child);
-               if (ret)
-                       goto err_put_node;
         }
  
         return 0;
@@ -588,9 +588,6 @@ static void scpsys_remove_one_domain(struct scpsys_domain *pd)
  {
         int ret;
  
-       if (scpsys_domain_is_on(pd))
-               scpsys_power_off(&pd->genpd);
-
         /*
          * We're in the error cleanup already, so we only complain,
          * but won't emit another error on top of the original one.
@@ -600,6 +597,8 @@ static void scpsys_remove_one_domain(struct scpsys_domain *pd)
                 dev_err(pd->scpsys->dev,
                         "failed to remove domain '%s' : %d - state may be inconsistent\n",
                         pd->genpd.name, ret);
+       if (scpsys_domain_is_on(pd))
+               scpsys_power_off(&pd->genpd);
  
         clk_bulk_put(pd->num_clks, pd->clks);
         clk_bulk_put(pd->num_subsys_clks, pd->subsys_clks);
diff --git a/drivers/pmdomain/qcom/rpmhpd.c b/drivers/pmdomain/qcom/rpmhpd.c

index 3078896b13008865816edc575fe0d769b44c9453..47df910645f6680ab4a17948700f426904007b86 100644 (file)
--- a/drivers/pmdomain/qcom/rpmhpd.c
+++ b/drivers/pmdomain/qcom/rpmhpd.c
@@ -692,6 +692,7 @@ static int rpmhpd_aggregate_corner(struct rpmhpd *pd, unsigned int corner)
         unsigned int active_corner, sleep_corner;
         unsigned int this_active_corner = 0, this_sleep_corner = 0;
         unsigned int peer_active_corner = 0, peer_sleep_corner = 0;
+       unsigned int peer_enabled_corner;
  
         if (pd->state_synced) {
                 to_active_sleep(pd, corner, &this_active_corner, &this_sleep_corner);
@@ -701,9 +702,11 @@ static int rpmhpd_aggregate_corner(struct rpmhpd *pd, unsigned int corner)
                 this_sleep_corner = pd->level_count - 1;
         }
  
-       if (peer && peer->enabled)
-               to_active_sleep(peer, peer->corner, &peer_active_corner,
+       if (peer && peer->enabled) {
+               peer_enabled_corner = max(peer->corner, peer->enable_corner);
+               to_active_sleep(peer, peer_enabled_corner, &peer_active_corner,
                                 &peer_sleep_corner);
+       }
  
         active_corner = max(this_active_corner, peer_active_corner);
  
diff --git a/drivers/pmdomain/renesas/r8a77980-sysc.c b/drivers/pmdomain/renesas/r8a77980-sysc.c

index 39ca84a67daadd21202e1ba80f13ec6cbc671a7a..621e411fc9991a4050cd6da699695912f18a46b0 100644 (file)
--- a/drivers/pmdomain/renesas/r8a77980-sysc.c
+++ b/drivers/pmdomain/renesas/r8a77980-sysc.c
@@ -25,7 +25,8 @@ static const struct rcar_sysc_area r8a77980_areas[] __initconst = {
           PD_CPU_NOCR },
         { "ca53-cpu3",  0x200, 3, R8A77980_PD_CA53_CPU3, R8A77980_PD_CA53_SCU,
           PD_CPU_NOCR },
-       { "cr7",        0x240, 0, R8A77980_PD_CR7,      R8A77980_PD_ALWAYS_ON },
+       { "cr7",        0x240, 0, R8A77980_PD_CR7,      R8A77980_PD_ALWAYS_ON,
+         PD_CPU_NOCR },
         { "a3ir",       0x180, 0, R8A77980_PD_A3IR,     R8A77980_PD_ALWAYS_ON },
         { "a2ir0",      0x400, 0, R8A77980_PD_A2IR0,    R8A77980_PD_A3IR },
         { "a2ir1",      0x400, 1, R8A77980_PD_A2IR1,    R8A77980_PD_A3IR },
diff --git a/drivers/power/supply/Kconfig b/drivers/power/supply/Kconfig

index f21cb05815ec6391cc5e11c7edc5190b7163aa94..3e31375491d58055b19f1b61b57dcac3d849b363 100644 (file)
--- a/drivers/power/supply/Kconfig
+++ b/drivers/power/supply/Kconfig
@@ -978,6 +978,7 @@ config CHARGER_QCOM_SMB2
  config FUEL_GAUGE_MM8013
         tristate "Mitsumi MM8013 fuel gauge driver"
         depends on I2C
+       select REGMAP_I2C
         help
           Say Y here to enable the Mitsumi MM8013 fuel gauge driver.
           It enables the monitoring of many battery parameters, including
diff --git a/drivers/power/supply/bq27xxx_battery_i2c.c b/drivers/power/supply/bq27xxx_battery_i2c.c

index 3a1798b0c1a79f3ed3a3fd0be4d84f6df390b3b4..9910c600743ebd9b9e01a1cb393c0378ae837807 100644 (file)
--- a/drivers/power/supply/bq27xxx_battery_i2c.c
+++ b/drivers/power/supply/bq27xxx_battery_i2c.c
@@ -209,7 +209,9 @@ static void bq27xxx_battery_i2c_remove(struct i2c_client *client)
  {
         struct bq27xxx_device_info *di = i2c_get_clientdata(client);
  
-       free_irq(client->irq, di);
+       if (client->irq)
+               free_irq(client->irq, di);
+
         bq27xxx_battery_teardown(di);
  
         mutex_lock(&battery_mutex);
diff --git a/drivers/power/supply/qcom_battmgr.c b/drivers/power/supply/qcom_battmgr.c

index a12e2a66d516f9de6e4b7ccc3f8048861322624a..ec163d1bcd189192abcecbcb4e29e0e4251b2e38 100644 (file)
--- a/drivers/power/supply/qcom_battmgr.c
+++ b/drivers/power/supply/qcom_battmgr.c
@@ -282,7 +282,6 @@ struct qcom_battmgr_wireless {
  
  struct qcom_battmgr {
         struct device *dev;
-       struct auxiliary_device *adev;
         struct pmic_glink_client *client;
  
         enum qcom_battmgr_variant variant;
@@ -1294,69 +1293,11 @@ static void qcom_battmgr_enable_worker(struct work_struct *work)
                 dev_err(battmgr->dev, "failed to request power notifications\n");
  }
  
-static char *qcom_battmgr_battery[] = { "battery" };
-
-static void qcom_battmgr_register_psy(struct qcom_battmgr *battmgr)
-{
-       struct power_supply_config psy_cfg_supply = {};
-       struct auxiliary_device *adev = battmgr->adev;
-       struct power_supply_config psy_cfg = {};
-       struct device *dev = &adev->dev;
-
-       psy_cfg.drv_data = battmgr;
-       psy_cfg.of_node = adev->dev.of_node;
-
-       psy_cfg_supply.drv_data = battmgr;
-       psy_cfg_supply.of_node = adev->dev.of_node;
-       psy_cfg_supply.supplied_to = qcom_battmgr_battery;
-       psy_cfg_supply.num_supplicants = 1;
-
-       if (battmgr->variant == QCOM_BATTMGR_SC8280XP) {
-               battmgr->bat_psy = devm_power_supply_register(dev, &sc8280xp_bat_psy_desc, &psy_cfg);
-               if (IS_ERR(battmgr->bat_psy))
-                       dev_err(dev, "failed to register battery power supply (%ld)\n",
-                               PTR_ERR(battmgr->bat_psy));
-
-               battmgr->ac_psy = devm_power_supply_register(dev, &sc8280xp_ac_psy_desc, &psy_cfg_supply);
-               if (IS_ERR(battmgr->ac_psy))
-                       dev_err(dev, "failed to register AC power supply (%ld)\n",
-                               PTR_ERR(battmgr->ac_psy));
-
-               battmgr->usb_psy = devm_power_supply_register(dev, &sc8280xp_usb_psy_desc, &psy_cfg_supply);
-               if (IS_ERR(battmgr->usb_psy))
-                       dev_err(dev, "failed to register USB power supply (%ld)\n",
-                               PTR_ERR(battmgr->usb_psy));
-
-               battmgr->wls_psy = devm_power_supply_register(dev, &sc8280xp_wls_psy_desc, &psy_cfg_supply);
-               if (IS_ERR(battmgr->wls_psy))
-                       dev_err(dev, "failed to register wireless charing power supply (%ld)\n",
-                               PTR_ERR(battmgr->wls_psy));
-       } else {
-               battmgr->bat_psy = devm_power_supply_register(dev, &sm8350_bat_psy_desc, &psy_cfg);
-               if (IS_ERR(battmgr->bat_psy))
-                       dev_err(dev, "failed to register battery power supply (%ld)\n",
-                               PTR_ERR(battmgr->bat_psy));
-
-               battmgr->usb_psy = devm_power_supply_register(dev, &sm8350_usb_psy_desc, &psy_cfg_supply);
-               if (IS_ERR(battmgr->usb_psy))
-                       dev_err(dev, "failed to register USB power supply (%ld)\n",
-                               PTR_ERR(battmgr->usb_psy));
-
-               battmgr->wls_psy = devm_power_supply_register(dev, &sm8350_wls_psy_desc, &psy_cfg_supply);
-               if (IS_ERR(battmgr->wls_psy))
-                       dev_err(dev, "failed to register wireless charing power supply (%ld)\n",
-                               PTR_ERR(battmgr->wls_psy));
-       }
-}
-
  static void qcom_battmgr_pdr_notify(void *priv, int state)
  {
         struct qcom_battmgr *battmgr = priv;
  
         if (state == SERVREG_SERVICE_STATE_UP) {
-               if (!battmgr->bat_psy)
-                       qcom_battmgr_register_psy(battmgr);
-
                 battmgr->service_up = true;
                 schedule_work(&battmgr->enable_work);
         } else {
@@ -1371,9 +1312,13 @@ static const struct of_device_id qcom_battmgr_of_variants[] = {
         {}
  };
  
+static char *qcom_battmgr_battery[] = { "battery" };
+
  static int qcom_battmgr_probe(struct auxiliary_device *adev,
                               const struct auxiliary_device_id *id)
  {
+       struct power_supply_config psy_cfg_supply = {};
+       struct power_supply_config psy_cfg = {};
         const struct of_device_id *match;
         struct qcom_battmgr *battmgr;
         struct device *dev = &adev->dev;
@@ -1383,7 +1328,14 @@ static int qcom_battmgr_probe(struct auxiliary_device *adev,
                 return -ENOMEM;
  
         battmgr->dev = dev;
-       battmgr->adev = adev;
+
+       psy_cfg.drv_data = battmgr;
+       psy_cfg.of_node = adev->dev.of_node;
+
+       psy_cfg_supply.drv_data = battmgr;
+       psy_cfg_supply.of_node = adev->dev.of_node;
+       psy_cfg_supply.supplied_to = qcom_battmgr_battery;
+       psy_cfg_supply.num_supplicants = 1;
  
         INIT_WORK(&battmgr->enable_work, qcom_battmgr_enable_worker);
         mutex_init(&battmgr->lock);
@@ -1395,6 +1347,43 @@ static int qcom_battmgr_probe(struct auxiliary_device *adev,
         else
                 battmgr->variant = QCOM_BATTMGR_SM8350;
  
+       if (battmgr->variant == QCOM_BATTMGR_SC8280XP) {
+               battmgr->bat_psy = devm_power_supply_register(dev, &sc8280xp_bat_psy_desc, &psy_cfg);
+               if (IS_ERR(battmgr->bat_psy))
+                       return dev_err_probe(dev, PTR_ERR(battmgr->bat_psy),
+                                            "failed to register battery power supply\n");
+
+               battmgr->ac_psy = devm_power_supply_register(dev, &sc8280xp_ac_psy_desc, &psy_cfg_supply);
+               if (IS_ERR(battmgr->ac_psy))
+                       return dev_err_probe(dev, PTR_ERR(battmgr->ac_psy),
+                                            "failed to register AC power supply\n");
+
+               battmgr->usb_psy = devm_power_supply_register(dev, &sc8280xp_usb_psy_desc, &psy_cfg_supply);
+               if (IS_ERR(battmgr->usb_psy))
+                       return dev_err_probe(dev, PTR_ERR(battmgr->usb_psy),
+                                            "failed to register USB power supply\n");
+
+               battmgr->wls_psy = devm_power_supply_register(dev, &sc8280xp_wls_psy_desc, &psy_cfg_supply);
+               if (IS_ERR(battmgr->wls_psy))
+                       return dev_err_probe(dev, PTR_ERR(battmgr->wls_psy),
+                                            "failed to register wireless charing power supply\n");
+       } else {
+               battmgr->bat_psy = devm_power_supply_register(dev, &sm8350_bat_psy_desc, &psy_cfg);
+               if (IS_ERR(battmgr->bat_psy))
+                       return dev_err_probe(dev, PTR_ERR(battmgr->bat_psy),
+                                            "failed to register battery power supply\n");
+
+               battmgr->usb_psy = devm_power_supply_register(dev, &sm8350_usb_psy_desc, &psy_cfg_supply);
+               if (IS_ERR(battmgr->usb_psy))
+                       return dev_err_probe(dev, PTR_ERR(battmgr->usb_psy),
+                                            "failed to register USB power supply\n");
+
+               battmgr->wls_psy = devm_power_supply_register(dev, &sm8350_wls_psy_desc, &psy_cfg_supply);
+               if (IS_ERR(battmgr->wls_psy))
+                       return dev_err_probe(dev, PTR_ERR(battmgr->wls_psy),
+                                            "failed to register wireless charing power supply\n");
+       }
+
         battmgr->client = devm_pmic_glink_register_client(dev,
                                                           PMIC_GLINK_OWNER_BATTMGR,
                                                           qcom_battmgr_callback,
diff --git a/drivers/regulator/max5970-regulator.c b/drivers/regulator/max5970-regulator.c

index bc88a40a88d4cac0bbe61efc4725703b1136654e..8bbcd983a74aa8d8e5db6ae9e9cb7480a9220575 100644 (file)
--- a/drivers/regulator/max5970-regulator.c
+++ b/drivers/regulator/max5970-regulator.c
@@ -29,8 +29,8 @@ struct max5970_regulator {
  };
  
  enum max597x_regulator_id {
-       MAX597X_SW0,
-       MAX597X_SW1,
+       MAX597X_sw0,
+       MAX597X_sw1,
  };
  
  static int max5970_read_adc(struct regmap *regmap, int reg, long *val)
@@ -378,8 +378,8 @@ static int max597x_dt_parse(struct device_node *np,
  }
  
  static const struct regulator_desc regulators[] = {
-       MAX597X_SWITCH(SW0, MAX5970_REG_CHXEN, 0, "vss1"),
-       MAX597X_SWITCH(SW1, MAX5970_REG_CHXEN, 1, "vss2"),
+       MAX597X_SWITCH(sw0, MAX5970_REG_CHXEN, 0, "vss1"),
+       MAX597X_SWITCH(sw1, MAX5970_REG_CHXEN, 1, "vss2"),
  };
  
  static int max597x_regmap_read_clear(struct regmap *map, unsigned int reg,
@@ -392,7 +392,7 @@ static int max597x_regmap_read_clear(struct regmap *map, unsigned int reg,
                 return ret;
  
         if (*val)
-               return regmap_write(map, reg, *val);
+               return regmap_write(map, reg, 0);
  
         return 0;
  }
diff --git a/drivers/regulator/pwm-regulator.c b/drivers/regulator/pwm-regulator.c

index 698c420e0869bd464f1368305b16d26a18360fa4..60cfcd741c2af31ce7e351cdf8cfb35f996264cf 100644 (file)
--- a/drivers/regulator/pwm-regulator.c
+++ b/drivers/regulator/pwm-regulator.c
@@ -157,7 +157,17 @@ static int pwm_regulator_get_voltage(struct regulator_dev *rdev)
  
         pwm_get_state(drvdata->pwm, &pstate);
  
+       if (!pstate.enabled) {
+               if (pstate.polarity == PWM_POLARITY_INVERSED)
+                       pstate.duty_cycle = pstate.period;
+               else
+                       pstate.duty_cycle = 0;
+       }
+
         voltage = pwm_get_relative_duty_cycle(&pstate, duty_unit);
+       if (voltage < min(max_uV_duty, min_uV_duty) ||
+           voltage > max(max_uV_duty, min_uV_duty))
+               return -ENOTRECOVERABLE;
  
         /*
          * The dutycycle for min_uV might be greater than the one for max_uV.
@@ -313,6 +323,32 @@ static int pwm_regulator_init_continuous(struct platform_device *pdev,
         return 0;
  }
  
+static int pwm_regulator_init_boot_on(struct platform_device *pdev,
+                                     struct pwm_regulator_data *drvdata,
+                                     const struct regulator_init_data *init_data)
+{
+       struct pwm_state pstate;
+
+       if (!init_data->constraints.boot_on || drvdata->enb_gpio)
+               return 0;
+
+       pwm_get_state(drvdata->pwm, &pstate);
+       if (pstate.enabled)
+               return 0;
+
+       /*
+        * Update the duty cycle so the output does not change
+        * when the regulator core enables the regulator (and
+        * thus the PWM channel).
+        */
+       if (pstate.polarity == PWM_POLARITY_INVERSED)
+               pstate.duty_cycle = pstate.period;
+       else
+               pstate.duty_cycle = 0;
+
+       return pwm_apply_might_sleep(drvdata->pwm, &pstate);
+}
+
  static int pwm_regulator_probe(struct platform_device *pdev)
  {
         const struct regulator_init_data *init_data;
@@ -372,6 +408,13 @@ static int pwm_regulator_probe(struct platform_device *pdev)
         if (ret)
                 return ret;
  
+       ret = pwm_regulator_init_boot_on(pdev, drvdata, init_data);
+       if (ret) {
+               dev_err(&pdev->dev, "Failed to apply boot_on settings: %d\n",
+                       ret);
+               return ret;
+       }
+
         regulator = devm_regulator_register(&pdev->dev,
                                             &drvdata->desc, &config);
         if (IS_ERR(regulator)) {
diff --git a/drivers/regulator/ti-abb-regulator.c b/drivers/regulator/ti-abb-regulator.c

index f48214e2c3b46000eb2e833b422be4474a08f920..04133510e5af7dee68f7d4cb8f10f7af02ff44ab 100644 (file)
--- a/drivers/regulator/ti-abb-regulator.c
+++ b/drivers/regulator/ti-abb-regulator.c
@@ -726,9 +726,25 @@ static int ti_abb_probe(struct platform_device *pdev)
                         return PTR_ERR(abb->setup_reg);
         }
  
-       abb->int_base = devm_platform_ioremap_resource_byname(pdev, "int-address");
-       if (IS_ERR(abb->int_base))
-               return PTR_ERR(abb->int_base);
+       pname = "int-address";
+       res = platform_get_resource_byname(pdev, IORESOURCE_MEM, pname);
+       if (!res) {
+               dev_err(dev, "Missing '%s' IO resource\n", pname);
+               return -ENODEV;
+       }
+       /*
+        * The MPU interrupt status register (PRM_IRQSTATUS_MPU) is
+        * shared between regulator-abb-{ivahd,dspeve,gpu} driver
+        * instances. Therefore use devm_ioremap() rather than
+        * devm_platform_ioremap_resource_byname() to avoid busy
+        * resource region conflicts.
+        */
+       abb->int_base = devm_ioremap(dev, res->start,
+                                            resource_size(res));
+       if (!abb->int_base) {
+               dev_err(dev, "Unable to map '%s'\n", pname);
+               return -ENOMEM;
+       }
  
         /* Map Optional resources */
         pname = "efuse-address";
diff --git a/drivers/s390/cio/device_ops.c b/drivers/s390/cio/device_ops.c

index c533d1dadc6bbb0f3f388ac62b01049ac99a72c5..a5dba3829769c7954ed2d3ba38800bc768fb0019 100644 (file)
--- a/drivers/s390/cio/device_ops.c
+++ b/drivers/s390/cio/device_ops.c
@@ -202,7 +202,8 @@ int ccw_device_start_timeout_key(struct ccw_device *cdev, struct ccw1 *cpa,
                 return -EINVAL;
         if (cdev->private->state == DEV_STATE_NOT_OPER)
                 return -ENODEV;
-       if (cdev->private->state == DEV_STATE_VERIFY) {
+       if (cdev->private->state == DEV_STATE_VERIFY ||
+           cdev->private->flags.doverify) {
                 /* Remember to fake irb when finished. */
                 if (!cdev->private->flags.fake_irb) {
                         cdev->private->flags.fake_irb = FAKE_CMD_IRB;
@@ -214,8 +215,7 @@ int ccw_device_start_timeout_key(struct ccw_device *cdev, struct ccw1 *cpa,
         }
         if (cdev->private->state != DEV_STATE_ONLINE ||
             ((sch->schib.scsw.cmd.stctl & SCSW_STCTL_PRIM_STATUS) &&
-            !(sch->schib.scsw.cmd.stctl & SCSW_STCTL_SEC_STATUS)) ||
-           cdev->private->flags.doverify)
+            !(sch->schib.scsw.cmd.stctl & SCSW_STCTL_SEC_STATUS)))
                 return -EBUSY;
         ret = cio_set_options (sch, flags);
         if (ret)
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c

index b92a32b4b1141670cc2f3c2e8b91a8e1d526b26a..04c64ce0a1ca1a2006d31ca5c7ee819598f155c4 100644 (file)
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -255,9 +255,10 @@ static void qeth_l3_clear_ip_htable(struct qeth_card *card, int recover)
                 if (!recover) {
                         hash_del(&addr->hnode);
                         kfree(addr);
-                       continue;
+               } else {
+                       /* prepare for recovery */
+                       addr->disp_flag = QETH_DISP_ADDR_ADD;
                 }
-               addr->disp_flag = QETH_DISP_ADDR_ADD;
         }
  
         mutex_unlock(&card->ip_lock);
@@ -278,9 +279,11 @@ static void qeth_l3_recover_ip(struct qeth_card *card)
                 if (addr->disp_flag == QETH_DISP_ADDR_ADD) {
                         rc = qeth_l3_register_addr_entry(card, addr);
  
-                       if (!rc) {
+                       if (!rc || rc == -EADDRINUSE || rc == -ENETDOWN) {
+                               /* keep it in the records */
                                 addr->disp_flag = QETH_DISP_ADDR_DO_NOTHING;
                         } else {
+                               /* bad address */
                                 hash_del(&addr->hnode);
                                 kfree(addr);
                         }
diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig

index addac7fbe37b9870380cc715acf923344071e6e6..9ce27092729c30a2791b329c117fa9314b268352 100644 (file)
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -1270,7 +1270,7 @@ source "drivers/scsi/arm/Kconfig"
  
  config JAZZ_ESP
         bool "MIPS JAZZ FAS216 SCSI support"
-       depends on MACH_JAZZ && SCSI
+       depends on MACH_JAZZ && SCSI=y
         select SCSI_SPI_ATTRS
         help
           This is the driver for the onboard SCSI host adapter of MIPS Magnum
diff --git a/drivers/scsi/fcoe/fcoe_ctlr.c b/drivers/scsi/fcoe/fcoe_ctlr.c

index 19eee108db02145e55d6bebc33a03e4fffba1ef5..5c8d1ba3f8f3c9c2de41e7f111db57f2b4c3e63a 100644 (file)
--- a/drivers/scsi/fcoe/fcoe_ctlr.c
+++ b/drivers/scsi/fcoe/fcoe_ctlr.c
@@ -319,17 +319,16 @@ static void fcoe_ctlr_announce(struct fcoe_ctlr *fip)
  {
         struct fcoe_fcf *sel;
         struct fcoe_fcf *fcf;
-       unsigned long flags;
  
         mutex_lock(&fip->ctlr_mutex);
-       spin_lock_irqsave(&fip->ctlr_lock, flags);
+       spin_lock_bh(&fip->ctlr_lock);
  
         kfree_skb(fip->flogi_req);
         fip->flogi_req = NULL;
         list_for_each_entry(fcf, &fip->fcfs, list)
                 fcf->flogi_sent = 0;
  
-       spin_unlock_irqrestore(&fip->ctlr_lock, flags);
+       spin_unlock_bh(&fip->ctlr_lock);
         sel = fip->sel_fcf;
  
         if (sel && ether_addr_equal(sel->fcf_mac, fip->dest_addr))
@@ -700,7 +699,6 @@ int fcoe_ctlr_els_send(struct fcoe_ctlr *fip, struct fc_lport *lport,
  {
         struct fc_frame *fp;
         struct fc_frame_header *fh;
-       unsigned long flags;
         u16 old_xid;
         u8 op;
         u8 mac[ETH_ALEN];
@@ -734,11 +732,11 @@ int fcoe_ctlr_els_send(struct fcoe_ctlr *fip, struct fc_lport *lport,
                 op = FIP_DT_FLOGI;
                 if (fip->mode == FIP_MODE_VN2VN)
                         break;
-               spin_lock_irqsave(&fip->ctlr_lock, flags);
+               spin_lock_bh(&fip->ctlr_lock);
                 kfree_skb(fip->flogi_req);
                 fip->flogi_req = skb;
                 fip->flogi_req_send = 1;
-               spin_unlock_irqrestore(&fip->ctlr_lock, flags);
+               spin_unlock_bh(&fip->ctlr_lock);
                 schedule_work(&fip->timer_work);
                 return -EINPROGRESS;
         case ELS_FDISC:
@@ -1707,11 +1705,10 @@ static int fcoe_ctlr_flogi_send_locked(struct fcoe_ctlr *fip)
  static int fcoe_ctlr_flogi_retry(struct fcoe_ctlr *fip)
  {
         struct fcoe_fcf *fcf;
-       unsigned long flags;
         int error;
  
         mutex_lock(&fip->ctlr_mutex);
-       spin_lock_irqsave(&fip->ctlr_lock, flags);
+       spin_lock_bh(&fip->ctlr_lock);
         LIBFCOE_FIP_DBG(fip, "re-sending FLOGI - reselect\n");
         fcf = fcoe_ctlr_select(fip);
         if (!fcf || fcf->flogi_sent) {
@@ -1722,7 +1719,7 @@ static int fcoe_ctlr_flogi_retry(struct fcoe_ctlr *fip)
                 fcoe_ctlr_solicit(fip, NULL);
                 error = fcoe_ctlr_flogi_send_locked(fip);
         }
-       spin_unlock_irqrestore(&fip->ctlr_lock, flags);
+       spin_unlock_bh(&fip->ctlr_lock);
         mutex_unlock(&fip->ctlr_mutex);
         return error;
  }
@@ -1739,9 +1736,8 @@ static int fcoe_ctlr_flogi_retry(struct fcoe_ctlr *fip)
  static void fcoe_ctlr_flogi_send(struct fcoe_ctlr *fip)
  {
         struct fcoe_fcf *fcf;
-       unsigned long flags;
  
-       spin_lock_irqsave(&fip->ctlr_lock, flags);
+       spin_lock_bh(&fip->ctlr_lock);
         fcf = fip->sel_fcf;
         if (!fcf || !fip->flogi_req_send)
                 goto unlock;
@@ -1768,7 +1764,7 @@ static void fcoe_ctlr_flogi_send(struct fcoe_ctlr *fip)
         } else /* XXX */
                 LIBFCOE_FIP_DBG(fip, "No FCF selected - defer send\n");
  unlock:
-       spin_unlock_irqrestore(&fip->ctlr_lock, flags);
+       spin_unlock_bh(&fip->ctlr_lock);
  }
  
  /**
diff --git a/drivers/scsi/fnic/fnic.h b/drivers/scsi/fnic/fnic.h

index 2074937c05bc855dea5a580079b84fd677460fb5..ce73f08ee889f1409c43583d959baeae6f0fb895 100644 (file)
--- a/drivers/scsi/fnic/fnic.h
+++ b/drivers/scsi/fnic/fnic.h
@@ -305,6 +305,7 @@ struct fnic {
         unsigned int copy_wq_base;
         struct work_struct link_work;
         struct work_struct frame_work;
+       struct work_struct flush_work;
         struct sk_buff_head frame_queue;
         struct sk_buff_head tx_queue;
  
@@ -363,7 +364,7 @@ void fnic_handle_event(struct work_struct *work);
  int fnic_rq_cmpl_handler(struct fnic *fnic, int);
  int fnic_alloc_rq_frame(struct vnic_rq *rq);
  void fnic_free_rq_buf(struct vnic_rq *rq, struct vnic_rq_buf *buf);
-void fnic_flush_tx(struct fnic *);
+void fnic_flush_tx(struct work_struct *work);
  void fnic_eth_send(struct fcoe_ctlr *, struct sk_buff *skb);
  void fnic_set_port_id(struct fc_lport *, u32, struct fc_frame *);
  void fnic_update_mac(struct fc_lport *, u8 *new);
diff --git a/drivers/scsi/fnic/fnic_fcs.c b/drivers/scsi/fnic/fnic_fcs.c

index 5e312a55cc7da0c73811b0fc26aed17d8c6a034c..a08293b2ad9f59031d5220aba3480a84461a0e8a 100644 (file)
--- a/drivers/scsi/fnic/fnic_fcs.c
+++ b/drivers/scsi/fnic/fnic_fcs.c
@@ -1182,7 +1182,7 @@ int fnic_send(struct fc_lport *lp, struct fc_frame *fp)
  
  /**
   * fnic_flush_tx() - send queued frames.
- * @fnic: fnic device
+ * @work: pointer to work element
   *
   * Send frames that were waiting to go out in FC or Ethernet mode.
   * Whenever changing modes we purge queued frames, so these frames should
@@ -1190,8 +1190,9 @@ int fnic_send(struct fc_lport *lp, struct fc_frame *fp)
   *
   * Called without fnic_lock held.
   */
-void fnic_flush_tx(struct fnic *fnic)
+void fnic_flush_tx(struct work_struct *work)
  {
+       struct fnic *fnic = container_of(work, struct fnic, flush_work);
         struct sk_buff *skb;
         struct fc_frame *fp;
  
diff --git a/drivers/scsi/fnic/fnic_main.c b/drivers/scsi/fnic/fnic_main.c

index 5ed1d897311a88c0d1194bff7b36e9677a45166f..29eead383eb9a478bb71643eaac1a4e302418f0f 100644 (file)
--- a/drivers/scsi/fnic/fnic_main.c
+++ b/drivers/scsi/fnic/fnic_main.c
@@ -830,6 +830,7 @@ static int fnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
                 spin_lock_init(&fnic->vlans_lock);
                 INIT_WORK(&fnic->fip_frame_work, fnic_handle_fip_frame);
                 INIT_WORK(&fnic->event_work, fnic_handle_event);
+               INIT_WORK(&fnic->flush_work, fnic_flush_tx);
                 skb_queue_head_init(&fnic->fip_frame_queue);
                 INIT_LIST_HEAD(&fnic->evlist);
                 INIT_LIST_HEAD(&fnic->vlans);
diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c

index 8d7fc5284293b5283523b049ba38387857ebb09e..fc4cee91b175c14d0950337ac8928b659cbc8cef 100644 (file)
--- a/drivers/scsi/fnic/fnic_scsi.c
+++ b/drivers/scsi/fnic/fnic_scsi.c
@@ -680,7 +680,7 @@ static int fnic_fcpio_fw_reset_cmpl_handler(struct fnic *fnic,
  
         spin_unlock_irqrestore(&fnic->fnic_lock, flags);
  
-       fnic_flush_tx(fnic);
+       queue_work(fnic_event_queue, &fnic->flush_work);
  
   reset_cmpl_handler_end:
         fnic_clear_state_flags(fnic, FNIC_FLAGS_FWRESET);
@@ -736,7 +736,7 @@ static int fnic_fcpio_flogi_reg_cmpl_handler(struct fnic *fnic,
                 }
                 spin_unlock_irqrestore(&fnic->fnic_lock, flags);
  
-               fnic_flush_tx(fnic);
+               queue_work(fnic_event_queue, &fnic->flush_work);
                 queue_work(fnic_event_queue, &fnic->frame_work);
         } else {
                 spin_unlock_irqrestore(&fnic->fnic_lock, flags);
diff --git a/drivers/scsi/initio.c b/drivers/scsi/initio.c

index 2a50fda3a628c3fdc9daa79d73437de565bc5891..625fd547ee60a79c3a8bae9e985cb74d06b9073f 100644 (file)
--- a/drivers/scsi/initio.c
+++ b/drivers/scsi/initio.c
@@ -371,7 +371,6 @@ static u16 initio_se2_rd(unsigned long base, u8 addr)
   */
  static void initio_se2_wr(unsigned long base, u8 addr, u16 val)
  {
-       u8 rb;
         u8 instr;
         int i;
  
@@ -400,7 +399,7 @@ static void initio_se2_wr(unsigned long base, u8 addr, u16 val)
                 udelay(30);
                 outb(SE2CS, base + TUL_NVRAM);                  /* -CLK */
                 udelay(30);
-               if ((rb = inb(base + TUL_NVRAM)) & SE2DI)
+               if (inb(base + TUL_NVRAM) & SE2DI)
                         break;  /* write complete */
         }
         outb(0, base + TUL_NVRAM);                              /* -CS */
diff --git a/drivers/scsi/isci/request.c b/drivers/scsi/isci/request.c

index 71f711cb0628a70d40efc99520ef2dc807494e75..355a0bc0828e749a45513309b942cfdae4878a7c 100644 (file)
--- a/drivers/scsi/isci/request.c
+++ b/drivers/scsi/isci/request.c
@@ -3387,7 +3387,7 @@ static enum sci_status isci_io_request_build(struct isci_host *ihost,
                 return SCI_FAILURE;
         }
  
-       return SCI_SUCCESS;
+       return status;
  }
  
  static struct isci_request *isci_request_from_tag(struct isci_host *ihost, u16 tag)
diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c

index d26941b131fdb81e6bc9fe48ccc57b75a0055af5..bf879d81846b69379f34b91759a45ef8d5af89fb 100644 (file)
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -1918,7 +1918,7 @@ out:
   *
   * Returns the number of SGEs added to the SGL.
   **/
-static int
+static uint32_t
  lpfc_bg_setup_sgl(struct lpfc_hba *phba, struct scsi_cmnd *sc,
                 struct sli4_sge *sgl, int datasegcnt,
                 struct lpfc_io_buf *lpfc_cmd)
@@ -1926,8 +1926,8 @@ lpfc_bg_setup_sgl(struct lpfc_hba *phba, struct scsi_cmnd *sc,
         struct scatterlist *sgde = NULL; /* s/g data entry */
         struct sli4_sge_diseed *diseed = NULL;
         dma_addr_t physaddr;
-       int i = 0, num_sge = 0, status;
-       uint32_t reftag;
+       int i = 0, status;
+       uint32_t reftag, num_sge = 0;
         uint8_t txop, rxop;
  #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
         uint32_t rc;
@@ -2099,7 +2099,7 @@ out:
   *
   * Returns the number of SGEs added to the SGL.
   **/
-static int
+static uint32_t
  lpfc_bg_setup_sgl_prot(struct lpfc_hba *phba, struct scsi_cmnd *sc,
                 struct sli4_sge *sgl, int datacnt, int protcnt,
                 struct lpfc_io_buf *lpfc_cmd)
@@ -2123,8 +2123,8 @@ lpfc_bg_setup_sgl_prot(struct lpfc_hba *phba, struct scsi_cmnd *sc,
         uint32_t rc;
  #endif
         uint32_t checking = 1;
-       uint32_t dma_offset = 0;
-       int num_sge = 0, j = 2;
+       uint32_t dma_offset = 0, num_sge = 0;
+       int j = 2;
         struct sli4_hybrid_sgl *sgl_xtra = NULL;
  
         sgpe = scsi_prot_sglist(sc);
diff --git a/drivers/scsi/mpi3mr/mpi3mr_transport.c b/drivers/scsi/mpi3mr/mpi3mr_transport.c

index c0c8ab5869572f77fa11f1c2154e85802ff8a4e5..d32ad46318cb09af970085b3ab00fc376a934e4f 100644 (file)
--- a/drivers/scsi/mpi3mr/mpi3mr_transport.c
+++ b/drivers/scsi/mpi3mr/mpi3mr_transport.c
@@ -1671,7 +1671,7 @@ mpi3mr_update_mr_sas_port(struct mpi3mr_ioc *mrioc, struct host_port *h_port,
  void
  mpi3mr_refresh_sas_ports(struct mpi3mr_ioc *mrioc)
  {
-       struct host_port h_port[64];
+       struct host_port *h_port = NULL;
         int i, j, found, host_port_count = 0, port_idx;
         u16 sz, attached_handle, ioc_status;
         struct mpi3_sas_io_unit_page0 *sas_io_unit_pg0 = NULL;
@@ -1685,6 +1685,10 @@ mpi3mr_refresh_sas_ports(struct mpi3mr_ioc *mrioc)
         sas_io_unit_pg0 = kzalloc(sz, GFP_KERNEL);
         if (!sas_io_unit_pg0)
                 return;
+       h_port = kcalloc(64, sizeof(struct host_port), GFP_KERNEL);
+       if (!h_port)
+               goto out;
+
         if (mpi3mr_cfg_get_sas_io_unit_pg0(mrioc, sas_io_unit_pg0, sz)) {
                 ioc_err(mrioc, "failure at %s:%d/%s()!\n",
                     __FILE__, __LINE__, __func__);
@@ -1814,6 +1818,7 @@ mpi3mr_refresh_sas_ports(struct mpi3mr_ioc *mrioc)
                 }
         }
  out:
+       kfree(h_port);
         kfree(sas_io_unit_pg0);
  }
  
diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c

index 8761bc58d965f0f6eb6776a4272ca856e8724463..b8120ca93c79740d7827ebff1652b4b22b296421 100644 (file)
--- a/drivers/scsi/mpt3sas/mpt3sas_base.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.c
@@ -7378,7 +7378,9 @@ _base_wait_for_iocstate(struct MPT3SAS_ADAPTER *ioc, int timeout)
                 return -EFAULT;
         }
  
- issue_diag_reset:
+       return 0;
+
+issue_diag_reset:
         rc = _base_diag_reset(ioc);
         return rc;
  }
diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c

index 76d369343c7a9c2457e7d7bc16aa518815cbfc8f..8cad9792a56275b38f70595baf7fb095882a6c1f 100644 (file)
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -328,21 +328,39 @@ static int scsi_vpd_inquiry(struct scsi_device *sdev, unsigned char *buffer,
         return result + 4;
  }
  
+enum scsi_vpd_parameters {
+       SCSI_VPD_HEADER_SIZE = 4,
+       SCSI_VPD_LIST_SIZE = 36,
+};
+
  static int scsi_get_vpd_size(struct scsi_device *sdev, u8 page)
  {
-       unsigned char vpd_header[SCSI_VPD_HEADER_SIZE] __aligned(4);
+       unsigned char vpd[SCSI_VPD_LIST_SIZE] __aligned(4);
         int result;
  
         if (sdev->no_vpd_size)
                 return SCSI_DEFAULT_VPD_LEN;
  
+       /*
+        * Fetch the supported pages VPD and validate that the requested page
+        * number is present.
+        */
+       if (page != 0) {
+               result = scsi_vpd_inquiry(sdev, vpd, 0, sizeof(vpd));
+               if (result < SCSI_VPD_HEADER_SIZE)
+                       return 0;
+
+               result -= SCSI_VPD_HEADER_SIZE;
+               if (!memchr(&vpd[SCSI_VPD_HEADER_SIZE], page, result))
+                       return 0;
+       }
         /*
          * Fetch the VPD page header to find out how big the page
          * is. This is done to prevent problems on legacy devices
          * which can not handle allocation lengths as large as
          * potentially requested by the caller.
          */
-       result = scsi_vpd_inquiry(sdev, vpd_header, page, sizeof(vpd_header));
+       result = scsi_vpd_inquiry(sdev, vpd, page, SCSI_VPD_HEADER_SIZE);
         if (result < 0)
                 return 0;
  
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c

index 79da4b1c1df0adc649954a45f2d630989f12a6d6..612489afe8d2467965759c80562562e26919f704 100644 (file)
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -61,11 +61,11 @@ static int scsi_eh_try_stu(struct scsi_cmnd *scmd);
  static enum scsi_disposition scsi_try_to_abort_cmd(const struct scsi_host_template *,
                                                    struct scsi_cmnd *);
  
-void scsi_eh_wakeup(struct Scsi_Host *shost)
+void scsi_eh_wakeup(struct Scsi_Host *shost, unsigned int busy)
  {
         lockdep_assert_held(shost->host_lock);
  
-       if (scsi_host_busy(shost) == shost->host_failed) {
+       if (busy == shost->host_failed) {
                 trace_scsi_eh_wakeup(shost);
                 wake_up_process(shost->ehandler);
                 SCSI_LOG_ERROR_RECOVERY(5, shost_printk(KERN_INFO, shost,
@@ -88,7 +88,7 @@ void scsi_schedule_eh(struct Scsi_Host *shost)
         if (scsi_host_set_state(shost, SHOST_RECOVERY) == 0 ||
             scsi_host_set_state(shost, SHOST_CANCEL_RECOVERY) == 0) {
                 shost->host_eh_scheduled++;
-               scsi_eh_wakeup(shost);
+               scsi_eh_wakeup(shost, scsi_host_busy(shost));
         }
  
         spin_unlock_irqrestore(shost->host_lock, flags);
@@ -282,11 +282,12 @@ static void scsi_eh_inc_host_failed(struct rcu_head *head)
  {
         struct scsi_cmnd *scmd = container_of(head, typeof(*scmd), rcu);
         struct Scsi_Host *shost = scmd->device->host;
+       unsigned int busy = scsi_host_busy(shost);
         unsigned long flags;
  
         spin_lock_irqsave(shost->host_lock, flags);
         shost->host_failed++;
-       scsi_eh_wakeup(shost);
+       scsi_eh_wakeup(shost, busy);
         spin_unlock_irqrestore(shost->host_lock, flags);
  }
  
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c

index cf3864f720930988fbadc77b3c91c77fe2d3bb62..df5ac03d5d6c2eb5233ad7fcfdad37a1e487b4e6 100644 (file)
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -278,9 +278,11 @@ static void scsi_dec_host_busy(struct Scsi_Host *shost, struct scsi_cmnd *cmd)
         rcu_read_lock();
         __clear_bit(SCMD_STATE_INFLIGHT, &cmd->state);
         if (unlikely(scsi_host_in_recovery(shost))) {
+               unsigned int busy = scsi_host_busy(shost);
+
                 spin_lock_irqsave(shost->host_lock, flags);
                 if (shost->host_failed || shost->host_eh_scheduled)
-                       scsi_eh_wakeup(shost);
+                       scsi_eh_wakeup(shost, busy);
                 spin_unlock_irqrestore(shost->host_lock, flags);
         }
         rcu_read_unlock();
diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h

index 3f0dfb97db6bd1b88755db1fb50dd6e968e385c6..1fbfe1b52c9f1a906ea6b0da7a6b273e2972a903 100644 (file)
--- a/drivers/scsi/scsi_priv.h
+++ b/drivers/scsi/scsi_priv.h
@@ -92,7 +92,7 @@ extern void scmd_eh_abort_handler(struct work_struct *work);
  extern enum blk_eh_timer_return scsi_timeout(struct request *req);
  extern int scsi_error_handler(void *host);
  extern enum scsi_disposition scsi_decide_disposition(struct scsi_cmnd *cmd);
-extern void scsi_eh_wakeup(struct Scsi_Host *shost);
+extern void scsi_eh_wakeup(struct Scsi_Host *shost, unsigned int busy);
  extern void scsi_eh_scmd_add(struct scsi_cmnd *);
  void scsi_eh_ready_devs(struct Scsi_Host *shost,
                         struct list_head *work_q,
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c

index 0833b3e6aa6e8f35b791d3f75fe208fb0f888914..bdd0acf7fa3cb130e64fac2aacf684aa5a91da8b 100644 (file)
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -3407,6 +3407,24 @@ static bool sd_validate_opt_xfer_size(struct scsi_disk *sdkp,
         return true;
  }
  
+static void sd_read_block_zero(struct scsi_disk *sdkp)
+{
+       unsigned int buf_len = sdkp->device->sector_size;
+       char *buffer, cmd[10] = { };
+
+       buffer = kmalloc(buf_len, GFP_KERNEL);
+       if (!buffer)
+               return;
+
+       cmd[0] = READ_10;
+       put_unaligned_be32(0, &cmd[2]); /* Logical block address 0 */
+       put_unaligned_be16(1, &cmd[7]); /* Transfer 1 logical block */
+
+       scsi_execute_cmd(sdkp->device, cmd, REQ_OP_DRV_IN, buffer, buf_len,
+                        SD_TIMEOUT, sdkp->max_retries, NULL);
+       kfree(buffer);
+}
+
  /**
   *     sd_revalidate_disk - called the first time a new disk is seen,
   *     performs disk spin up, read_capacity, etc.
@@ -3446,7 +3464,13 @@ static int sd_revalidate_disk(struct gendisk *disk)
          */
         if (sdkp->media_present) {
                 sd_read_capacity(sdkp, buffer);
-
+               /*
+                * Some USB/UAS devices return generic values for mode pages
+                * until the media has been accessed. Trigger a READ operation
+                * to force the device to populate mode pages.
+                */
+               if (sdp->read_before_ms)
+                       sd_read_block_zero(sdkp);
                 /*
                  * set the default to rotational.  All non-rotational devices
                  * support the block characteristics VPD page, which will
diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c

index ceff1ec13f9ea9ea056da947d3939c51f4797522..385180c98be496989dbf469926f52c974609a013 100644 (file)
--- a/drivers/scsi/smartpqi/smartpqi_init.c
+++ b/drivers/scsi/smartpqi/smartpqi_init.c
@@ -6533,8 +6533,11 @@ static void pqi_map_queues(struct Scsi_Host *shost)
  {
         struct pqi_ctrl_info *ctrl_info = shost_to_hba(shost);
  
-       blk_mq_pci_map_queues(&shost->tag_set.map[HCTX_TYPE_DEFAULT],
+       if (!ctrl_info->disable_managed_interrupts)
+               return blk_mq_pci_map_queues(&shost->tag_set.map[HCTX_TYPE_DEFAULT],
                               ctrl_info->pci_dev, 0);
+       else
+               return blk_mq_map_queues(&shost->tag_set.map[HCTX_TYPE_DEFAULT]);
  }
  
  static inline bool pqi_is_tape_changer_device(struct pqi_scsi_dev *device)
diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c

index a95936b18f695e3ef796098866ea07101e9e346d..7ceb982040a5dfe5d490f9a4bd306e99e5140a53 100644 (file)
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -330,6 +330,7 @@ enum storvsc_request_type {
   */
  
  static int storvsc_ringbuffer_size = (128 * 1024);
+static int aligned_ringbuffer_size;
  static u32 max_outstanding_req_per_channel;
  static int storvsc_change_queue_depth(struct scsi_device *sdev, int queue_depth);
  
@@ -687,8 +688,8 @@ static void handle_sc_creation(struct vmbus_channel *new_sc)
         new_sc->next_request_id_callback = storvsc_next_request_id;
  
         ret = vmbus_open(new_sc,
-                        storvsc_ringbuffer_size,
-                        storvsc_ringbuffer_size,
+                        aligned_ringbuffer_size,
+                        aligned_ringbuffer_size,
                          (void *)&props,
                          sizeof(struct vmstorage_channel_properties),
                          storvsc_on_channel_callback, new_sc);
@@ -1973,7 +1974,7 @@ static int storvsc_probe(struct hv_device *device,
         dma_set_min_align_mask(&device->device, HV_HYP_PAGE_SIZE - 1);
  
         stor_device->port_number = host->host_no;
-       ret = storvsc_connect_to_vsp(device, storvsc_ringbuffer_size, is_fc);
+       ret = storvsc_connect_to_vsp(device, aligned_ringbuffer_size, is_fc);
         if (ret)
                 goto err_out1;
  
@@ -2164,7 +2165,7 @@ static int storvsc_resume(struct hv_device *hv_dev)
  {
         int ret;
  
-       ret = storvsc_connect_to_vsp(hv_dev, storvsc_ringbuffer_size,
+       ret = storvsc_connect_to_vsp(hv_dev, aligned_ringbuffer_size,
                                      hv_dev_is_fc(hv_dev));
         return ret;
  }
@@ -2198,8 +2199,9 @@ static int __init storvsc_drv_init(void)
          * the ring buffer indices) by the max request size (which is
          * vmbus_channel_packet_multipage_buffer + struct vstor_packet + u64)
          */
+       aligned_ringbuffer_size = VMBUS_RING_SIZE(storvsc_ringbuffer_size);
         max_outstanding_req_per_channel =
-               ((storvsc_ringbuffer_size - PAGE_SIZE) /
+               ((aligned_ringbuffer_size - PAGE_SIZE) /
                 ALIGN(MAX_MULTIPAGE_BUFFER_PACKET +
                 sizeof(struct vstor_packet) + sizeof(u64),
                 sizeof(u64)));
diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c

index 4cf20be668a6021c6acfae56c19f0914586a7bf6..617eb892f4ad457feb5d4de3d9c1ceb88a010c61 100644 (file)
--- a/drivers/scsi/virtio_scsi.c
+++ b/drivers/scsi/virtio_scsi.c
@@ -188,8 +188,6 @@ static void virtscsi_vq_done(struct virtio_scsi *vscsi,
                 while ((buf = virtqueue_get_buf(vq, &len)) != NULL)
                         fn(vscsi, buf);
  
-               if (unlikely(virtqueue_is_broken(vq)))
-                       break;
         } while (!virtqueue_enable_cb(vq));
         spin_unlock_irqrestore(&virtscsi_vq->vq_lock, flags);
  }
diff --git a/drivers/soc/apple/mailbox.c b/drivers/soc/apple/mailbox.c

index 780199bf351efbfb24422880ef39510c77def68f..49a0955e82d6cf5eef83e5f63ba8d31194c65324 100644 (file)
--- a/drivers/soc/apple/mailbox.c
+++ b/drivers/soc/apple/mailbox.c
@@ -296,14 +296,14 @@ struct apple_mbox *apple_mbox_get(struct device *dev, int index)
         of_node_put(args.np);
  
         if (!pdev)
-               return ERR_PTR(EPROBE_DEFER);
+               return ERR_PTR(-EPROBE_DEFER);
  
         mbox = platform_get_drvdata(pdev);
         if (!mbox)
-               return ERR_PTR(EPROBE_DEFER);
+               return ERR_PTR(-EPROBE_DEFER);
  
         if (!device_link_add(dev, &pdev->dev, DL_FLAG_AUTOREMOVE_CONSUMER))
-               return ERR_PTR(ENODEV);
+               return ERR_PTR(-ENODEV);
  
         return mbox;
  }
diff --git a/drivers/soc/microchip/Kconfig b/drivers/soc/microchip/Kconfig

index 9b0fdd95276e4e017d32012d1f2de3107556e242..19f4b576f822b2e57309308f5294914af27df570 100644 (file)
--- a/drivers/soc/microchip/Kconfig
+++ b/drivers/soc/microchip/Kconfig
@@ -1,5 +1,5 @@
  config POLARFIRE_SOC_SYS_CTRL
-       tristate "POLARFIRE_SOC_SYS_CTRL"
+       tristate "Microchip PolarFire SoC (MPFS) system controller support"
         depends on POLARFIRE_SOC_MAILBOX
         depends on MTD
         help
diff --git a/drivers/soc/qcom/pmic_glink.c b/drivers/soc/qcom/pmic_glink.c

index f4bfd24386f1b5d2defe9aad6ffcd7123035158d..f913e9bd57ed4a7aa6d1b99d27a40552713b2536 100644 (file)
--- a/drivers/soc/qcom/pmic_glink.c
+++ b/drivers/soc/qcom/pmic_glink.c
@@ -265,10 +265,17 @@ static int pmic_glink_probe(struct platform_device *pdev)
  
         pg->client_mask = *match_data;
  
+       pg->pdr = pdr_handle_alloc(pmic_glink_pdr_callback, pg);
+       if (IS_ERR(pg->pdr)) {
+               ret = dev_err_probe(&pdev->dev, PTR_ERR(pg->pdr),
+                                   "failed to initialize pdr\n");
+               return ret;
+       }
+
         if (pg->client_mask & BIT(PMIC_GLINK_CLIENT_UCSI)) {
                 ret = pmic_glink_add_aux_device(pg, &pg->ucsi_aux, "ucsi");
                 if (ret)
-                       return ret;
+                       goto out_release_pdr_handle;
         }
         if (pg->client_mask & BIT(PMIC_GLINK_CLIENT_ALTMODE)) {
                 ret = pmic_glink_add_aux_device(pg, &pg->altmode_aux, "altmode");
@@ -281,17 +288,11 @@ static int pmic_glink_probe(struct platform_device *pdev)
                         goto out_release_altmode_aux;
         }
  
-       pg->pdr = pdr_handle_alloc(pmic_glink_pdr_callback, pg);
-       if (IS_ERR(pg->pdr)) {
-               ret = dev_err_probe(&pdev->dev, PTR_ERR(pg->pdr), "failed to initialize pdr\n");
-               goto out_release_aux_devices;
-       }
-
         service = pdr_add_lookup(pg->pdr, "tms/servreg", "msm/adsp/charger_pd");
         if (IS_ERR(service)) {
                 ret = dev_err_probe(&pdev->dev, PTR_ERR(service),
                                     "failed adding pdr lookup for charger_pd\n");
-               goto out_release_pdr_handle;
+               goto out_release_aux_devices;
         }
  
         mutex_lock(&__pmic_glink_lock);
@@ -300,8 +301,6 @@ static int pmic_glink_probe(struct platform_device *pdev)
  
         return 0;
  
-out_release_pdr_handle:
-       pdr_handle_release(pg->pdr);
  out_release_aux_devices:
         if (pg->client_mask & BIT(PMIC_GLINK_CLIENT_BATT))
                 pmic_glink_del_aux_device(pg, &pg->ps_aux);
@@ -311,6 +310,8 @@ out_release_altmode_aux:
  out_release_ucsi_aux:
         if (pg->client_mask & BIT(PMIC_GLINK_CLIENT_UCSI))
                 pmic_glink_del_aux_device(pg, &pg->ucsi_aux);
+out_release_pdr_handle:
+       pdr_handle_release(pg->pdr);
  
         return ret;
  }
diff --git a/drivers/soc/qcom/pmic_glink_altmode.c b/drivers/soc/qcom/pmic_glink_altmode.c

index 5fcd0fdd2faa2d087fc03e001dffe4f5016c80a9..b3808fc24c695e89fa10f46b93e0fcfabc3b4d61 100644 (file)
--- a/drivers/soc/qcom/pmic_glink_altmode.c
+++ b/drivers/soc/qcom/pmic_glink_altmode.c
@@ -76,7 +76,7 @@ struct pmic_glink_altmode_port {
  
         struct work_struct work;
  
-       struct device *bridge;
+       struct auxiliary_device *bridge;
  
         enum typec_orientation orientation;
         u16 svid;
@@ -230,7 +230,7 @@ static void pmic_glink_altmode_worker(struct work_struct *work)
         else
                 pmic_glink_altmode_enable_usb(altmode, alt_port);
  
-       drm_aux_hpd_bridge_notify(alt_port->bridge,
+       drm_aux_hpd_bridge_notify(&alt_port->bridge->dev,
                                   alt_port->hpd_state ?
                                   connector_status_connected :
                                   connector_status_disconnected);
@@ -454,7 +454,7 @@ static int pmic_glink_altmode_probe(struct auxiliary_device *adev,
                 alt_port->index = port;
                 INIT_WORK(&alt_port->work, pmic_glink_altmode_worker);
  
-               alt_port->bridge = drm_dp_hpd_bridge_register(dev, to_of_node(fwnode));
+               alt_port->bridge = devm_drm_dp_hpd_bridge_alloc(dev, to_of_node(fwnode));
                 if (IS_ERR(alt_port->bridge)) {
                         fwnode_handle_put(fwnode);
                         return PTR_ERR(alt_port->bridge);
@@ -510,6 +510,16 @@ static int pmic_glink_altmode_probe(struct auxiliary_device *adev,
                 }
         }
  
+       for (port = 0; port < ARRAY_SIZE(altmode->ports); port++) {
+               alt_port = &altmode->ports[port];
+               if (!alt_port->bridge)
+                       continue;
+
+               ret = devm_drm_dp_hpd_bridge_add(dev, alt_port->bridge);
+               if (ret)
+                       return ret;
+       }
+
         altmode->client = devm_pmic_glink_register_client(dev,
                                                           altmode->owner_id,
                                                           pmic_glink_altmode_callback,
diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c

index f94e0d370d466e9742261a84a567593b8073f169..1a8d03958dffbfb77a4cd183d8a18fbd3ed53d63 100644 (file)
--- a/drivers/spi/spi-cadence-quadspi.c
+++ b/drivers/spi/spi-cadence-quadspi.c
@@ -1927,24 +1927,18 @@ static void cqspi_remove(struct platform_device *pdev)
         pm_runtime_disable(&pdev->dev);
  }
  
-static int cqspi_suspend(struct device *dev)
+static int cqspi_runtime_suspend(struct device *dev)
  {
         struct cqspi_st *cqspi = dev_get_drvdata(dev);
-       struct spi_controller *host = dev_get_drvdata(dev);
-       int ret;
  
-       ret = spi_controller_suspend(host);
         cqspi_controller_enable(cqspi, 0);
-
         clk_disable_unprepare(cqspi->clk);
-
-       return ret;
+       return 0;
  }
  
-static int cqspi_resume(struct device *dev)
+static int cqspi_runtime_resume(struct device *dev)
  {
         struct cqspi_st *cqspi = dev_get_drvdata(dev);
-       struct spi_controller *host = dev_get_drvdata(dev);
  
         clk_prepare_enable(cqspi->clk);
         cqspi_wait_idle(cqspi);
@@ -1952,12 +1946,27 @@ static int cqspi_resume(struct device *dev)
  
         cqspi->current_cs = -1;
         cqspi->sclk = 0;
+       return 0;
+}
+
+static int cqspi_suspend(struct device *dev)
+{
+       struct cqspi_st *cqspi = dev_get_drvdata(dev);
+
+       return spi_controller_suspend(cqspi->host);
+}
  
-       return spi_controller_resume(host);
+static int cqspi_resume(struct device *dev)
+{
+       struct cqspi_st *cqspi = dev_get_drvdata(dev);
+
+       return spi_controller_resume(cqspi->host);
  }
  
-static DEFINE_RUNTIME_DEV_PM_OPS(cqspi_dev_pm_ops, cqspi_suspend,
-                                cqspi_resume, NULL);
+static const struct dev_pm_ops cqspi_dev_pm_ops = {
+       RUNTIME_PM_OPS(cqspi_runtime_suspend, cqspi_runtime_resume, NULL)
+       SYSTEM_SLEEP_PM_OPS(cqspi_suspend, cqspi_resume)
+};
  
  static const struct cqspi_driver_platdata cdns_qspi = {
         .quirks = CQSPI_DISABLE_DAC_MODE,
diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c

index 546cdce525fc5b1b49b305b872e81d2b0aed0cb5..833a1bb7a91438e02c2d5a176e1afdaba4159552 100644 (file)
--- a/drivers/spi/spi-imx.c
+++ b/drivers/spi/spi-imx.c
@@ -2,6 +2,7 @@
  // Copyright 2004-2007 Freescale Semiconductor, Inc. All Rights Reserved.
  // Copyright (C) 2008 Juergen Beisert
  
+#include <linux/bits.h>
  #include <linux/clk.h>
  #include <linux/completion.h>
  #include <linux/delay.h>
@@ -660,15 +661,15 @@ static int mx51_ecspi_prepare_transfer(struct spi_imx_data *spi_imx,
                         << MX51_ECSPI_CTRL_BL_OFFSET;
         else {
                 if (spi_imx->usedma) {
-                       ctrl |= (spi_imx->bits_per_word *
-                               spi_imx_bytes_per_word(spi_imx->bits_per_word) - 1)
+                       ctrl |= (spi_imx->bits_per_word - 1)
                                 << MX51_ECSPI_CTRL_BL_OFFSET;
                 } else {
                         if (spi_imx->count >= MX51_ECSPI_CTRL_MAX_BURST)
-                               ctrl |= (MX51_ECSPI_CTRL_MAX_BURST - 1)
+                               ctrl |= (MX51_ECSPI_CTRL_MAX_BURST * BITS_PER_BYTE - 1)
                                                 << MX51_ECSPI_CTRL_BL_OFFSET;
                         else
-                               ctrl |= (spi_imx->count * spi_imx->bits_per_word - 1)
+                               ctrl |= spi_imx->count / DIV_ROUND_UP(spi_imx->bits_per_word,
+                                               BITS_PER_BYTE) * spi_imx->bits_per_word
                                                 << MX51_ECSPI_CTRL_BL_OFFSET;
                 }
         }
diff --git a/drivers/spi/spi-intel-pci.c b/drivers/spi/spi-intel-pci.c

index 07d20ca1164c357813e075b7a1a6763da735ab0a..4337ca51d7aa21555684f62295a39a52772cce3d 100644 (file)
--- a/drivers/spi/spi-intel-pci.c
+++ b/drivers/spi/spi-intel-pci.c
@@ -85,6 +85,7 @@ static const struct pci_device_id intel_spi_pci_ids[] = {
         { PCI_VDEVICE(INTEL, 0xa2a4), (unsigned long)&cnl_info },
         { PCI_VDEVICE(INTEL, 0xa324), (unsigned long)&cnl_info },
         { PCI_VDEVICE(INTEL, 0xa3a4), (unsigned long)&cnl_info },
+       { PCI_VDEVICE(INTEL, 0xa823), (unsigned long)&cnl_info },
         { },
  };
  MODULE_DEVICE_TABLE(pci, intel_spi_pci_ids);
diff --git a/drivers/spi/spi-mxs.c b/drivers/spi/spi-mxs.c

index 1bf080339b5a722b8ec6abb356f8bad231d19d9f..88cbe4f00cc3b11e5fef822a60114a90a88149f8 100644 (file)
--- a/drivers/spi/spi-mxs.c
+++ b/drivers/spi/spi-mxs.c
@@ -39,6 +39,7 @@
  #include <linux/spi/spi.h>
  #include <linux/spi/mxs-spi.h>
  #include <trace/events/spi.h>
+#include <linux/dma/mxs-dma.h>
  
  #define DRIVER_NAME            "mxs-spi"
  
@@ -252,7 +253,7 @@ static int mxs_spi_txrx_dma(struct mxs_spi *spi,
                 desc = dmaengine_prep_slave_sg(ssp->dmach,
                                 &dma_xfer[sg_count].sg, 1,
                                 (flags & TXRX_WRITE) ? DMA_MEM_TO_DEV : DMA_DEV_TO_MEM,
-                               DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+                               DMA_PREP_INTERRUPT | MXS_DMA_CTRL_WAIT4END);
  
                 if (!desc) {
                         dev_err(ssp->dev,
diff --git a/drivers/spi/spi-omap2-mcspi.c b/drivers/spi/spi-omap2-mcspi.c

index a0c9fea908f553e0bf007a7a78a35746f60a25f4..ddf1c684bcc7d863ede340aa196506a3a3505654 100644 (file)
--- a/drivers/spi/spi-omap2-mcspi.c
+++ b/drivers/spi/spi-omap2-mcspi.c
@@ -53,8 +53,6 @@
  
  /* per-register bitmasks: */
  #define OMAP2_MCSPI_IRQSTATUS_EOW      BIT(17)
-#define OMAP2_MCSPI_IRQSTATUS_TX0_EMPTY    BIT(0)
-#define OMAP2_MCSPI_IRQSTATUS_RX0_FULL    BIT(2)
  
  #define OMAP2_MCSPI_MODULCTRL_SINGLE   BIT(0)
  #define OMAP2_MCSPI_MODULCTRL_MS       BIT(2)
@@ -293,7 +291,7 @@ static void omap2_mcspi_set_mode(struct spi_controller *ctlr)
  }
  
  static void omap2_mcspi_set_fifo(const struct spi_device *spi,
-                               struct spi_transfer *t, int enable, int dma_enabled)
+                               struct spi_transfer *t, int enable)
  {
         struct spi_controller *ctlr = spi->controller;
         struct omap2_mcspi_cs *cs = spi->controller_state;
@@ -314,28 +312,20 @@ static void omap2_mcspi_set_fifo(const struct spi_device *spi,
                         max_fifo_depth = OMAP2_MCSPI_MAX_FIFODEPTH / 2;
                 else
                         max_fifo_depth = OMAP2_MCSPI_MAX_FIFODEPTH;
-               if (dma_enabled)
-                       wcnt = t->len / bytes_per_word;
-               else
-                       wcnt = 0;
+
+               wcnt = t->len / bytes_per_word;
                 if (wcnt > OMAP2_MCSPI_MAX_FIFOWCNT)
                         goto disable_fifo;
  
                 xferlevel = wcnt << 16;
                 if (t->rx_buf != NULL) {
                         chconf |= OMAP2_MCSPI_CHCONF_FFER;
-                       if (dma_enabled)
-                               xferlevel |= (bytes_per_word - 1) << 8;
-                       else
-                               xferlevel |= (max_fifo_depth - 1) << 8;
+                       xferlevel |= (bytes_per_word - 1) << 8;
                 }
  
                 if (t->tx_buf != NULL) {
                         chconf |= OMAP2_MCSPI_CHCONF_FFET;
-                       if (dma_enabled)
-                               xferlevel |= bytes_per_word - 1;
-                       else
-                               xferlevel |= (max_fifo_depth - 1);
+                       xferlevel |= bytes_per_word - 1;
                 }
  
                 mcspi_write_reg(ctlr, OMAP2_MCSPI_XFERLEVEL, xferlevel);
@@ -892,113 +882,6 @@ out:
         return count - c;
  }
  
-static unsigned
-omap2_mcspi_txrx_piofifo(struct spi_device *spi, struct spi_transfer *xfer)
-{
-       struct omap2_mcspi_cs   *cs = spi->controller_state;
-       struct omap2_mcspi    *mcspi;
-       unsigned int            count, c;
-       unsigned int            iter, cwc;
-       int last_request;
-       void __iomem            *base = cs->base;
-       void __iomem            *tx_reg;
-       void __iomem            *rx_reg;
-       void __iomem            *chstat_reg;
-       void __iomem        *irqstat_reg;
-       int                     word_len, bytes_per_word;
-       u8              *rx;
-       const u8        *tx;
-
-       mcspi = spi_controller_get_devdata(spi->controller);
-       count = xfer->len;
-       c = count;
-       word_len = cs->word_len;
-       bytes_per_word = mcspi_bytes_per_word(word_len);
-
-       /*
-        * We store the pre-calculated register addresses on stack to speed
-        * up the transfer loop.
-        */
-       tx_reg          = base + OMAP2_MCSPI_TX0;
-       rx_reg          = base + OMAP2_MCSPI_RX0;
-       chstat_reg      = base + OMAP2_MCSPI_CHSTAT0;
-       irqstat_reg    = base + OMAP2_MCSPI_IRQSTATUS;
-
-       if (c < (word_len >> 3))
-               return 0;
-
-       rx = xfer->rx_buf;
-       tx = xfer->tx_buf;
-
-       do {
-               /* calculate number of words in current iteration */
-               cwc = min((unsigned int)mcspi->fifo_depth / bytes_per_word,
-                         c / bytes_per_word);
-               last_request = cwc != (mcspi->fifo_depth / bytes_per_word);
-               if (tx) {
-                       if (mcspi_wait_for_reg_bit(irqstat_reg,
-                                                  OMAP2_MCSPI_IRQSTATUS_TX0_EMPTY) < 0) {
-                               dev_err(&spi->dev, "TX Empty timed out\n");
-                               goto out;
-                       }
-                       writel_relaxed(OMAP2_MCSPI_IRQSTATUS_TX0_EMPTY, irqstat_reg);
-
-                       for (iter = 0; iter < cwc; iter++, tx += bytes_per_word) {
-                               if (bytes_per_word == 1)
-                                       writel_relaxed(*tx, tx_reg);
-                               else if (bytes_per_word == 2)
-                                       writel_relaxed(*((u16 *)tx), tx_reg);
-                               else if (bytes_per_word == 4)
-                                       writel_relaxed(*((u32 *)tx), tx_reg);
-                       }
-               }
-
-               if (rx) {
-                       if (!last_request &&
-                           mcspi_wait_for_reg_bit(irqstat_reg,
-                                                  OMAP2_MCSPI_IRQSTATUS_RX0_FULL) < 0) {
-                               dev_err(&spi->dev, "RX_FULL timed out\n");
-                               goto out;
-                       }
-                       writel_relaxed(OMAP2_MCSPI_IRQSTATUS_RX0_FULL, irqstat_reg);
-
-                       for (iter = 0; iter < cwc; iter++, rx += bytes_per_word) {
-                               if (last_request &&
-                                   mcspi_wait_for_reg_bit(chstat_reg,
-                                                          OMAP2_MCSPI_CHSTAT_RXS) < 0) {
-                                       dev_err(&spi->dev, "RXS timed out\n");
-                                       goto out;
-                               }
-                               if (bytes_per_word == 1)
-                                       *rx = readl_relaxed(rx_reg);
-                               else if (bytes_per_word == 2)
-                                       *((u16 *)rx) = readl_relaxed(rx_reg);
-                               else if (bytes_per_word == 4)
-                                       *((u32 *)rx) = readl_relaxed(rx_reg);
-                       }
-               }
-
-               if (last_request) {
-                       if (mcspi_wait_for_reg_bit(chstat_reg,
-                                                  OMAP2_MCSPI_CHSTAT_EOT) < 0) {
-                               dev_err(&spi->dev, "EOT timed out\n");
-                               goto out;
-                       }
-                       if (mcspi_wait_for_reg_bit(chstat_reg,
-                                                  OMAP2_MCSPI_CHSTAT_TXFFE) < 0) {
-                               dev_err(&spi->dev, "TXFFE timed out\n");
-                               goto out;
-                       }
-                       omap2_mcspi_set_enable(spi, 0);
-               }
-               c -= cwc * bytes_per_word;
-       } while (c >= bytes_per_word);
-
-out:
-       omap2_mcspi_set_enable(spi, 1);
-       return count - c;
-}
-
  static u32 omap2_mcspi_calc_divisor(u32 speed_hz, u32 ref_clk_hz)
  {
         u32 div;
@@ -1323,9 +1206,7 @@ static int omap2_mcspi_transfer_one(struct spi_controller *ctlr,
                 if ((mcspi_dma->dma_rx && mcspi_dma->dma_tx) &&
                     ctlr->cur_msg_mapped &&
                     ctlr->can_dma(ctlr, spi, t))
-                       omap2_mcspi_set_fifo(spi, t, 1, 1);
-               else if (t->len > OMAP2_MCSPI_MAX_FIFODEPTH)
-                       omap2_mcspi_set_fifo(spi, t, 1, 0);
+                       omap2_mcspi_set_fifo(spi, t, 1);
  
                 omap2_mcspi_set_enable(spi, 1);
  
@@ -1338,8 +1219,6 @@ static int omap2_mcspi_transfer_one(struct spi_controller *ctlr,
                     ctlr->cur_msg_mapped &&
                     ctlr->can_dma(ctlr, spi, t))
                         count = omap2_mcspi_txrx_dma(spi, t);
-               else if (mcspi->fifo_depth > 0)
-                       count = omap2_mcspi_txrx_piofifo(spi, t);
                 else
                         count = omap2_mcspi_txrx_pio(spi, t);
  
@@ -1352,7 +1231,7 @@ static int omap2_mcspi_transfer_one(struct spi_controller *ctlr,
         omap2_mcspi_set_enable(spi, 0);
  
         if (mcspi->fifo_depth > 0)
-               omap2_mcspi_set_fifo(spi, t, 0, 0);
+               omap2_mcspi_set_fifo(spi, t, 0);
  
  out:
         /* Restore defaults if they were overriden */
@@ -1375,7 +1254,7 @@ out:
                 omap2_mcspi_set_cs(spi, !(spi->mode & SPI_CS_HIGH));
  
         if (mcspi->fifo_depth > 0 && t)
-               omap2_mcspi_set_fifo(spi, t, 0, 0);
+               omap2_mcspi_set_fifo(spi, t, 0);
  
         return status;
  }
diff --git a/drivers/spi/spi-ppc4xx.c b/drivers/spi/spi-ppc4xx.c

index 03aab661be9d33af1ff6ad93052171f206402b83..82d6264841fc7f090a5541235569e40023330483 100644 (file)
--- a/drivers/spi/spi-ppc4xx.c
+++ b/drivers/spi/spi-ppc4xx.c
@@ -25,11 +25,13 @@
  #include <linux/slab.h>
  #include <linux/errno.h>
  #include <linux/wait.h>
+#include <linux/platform_device.h>
  #include <linux/of_address.h>
  #include <linux/of_irq.h>
  #include <linux/of_platform.h>
  #include <linux/interrupt.h>
  #include <linux/delay.h>
+#include <linux/platform_device.h>
  
  #include <linux/spi/spi.h>
  #include <linux/spi/spi_bitbang.h>
@@ -166,10 +168,8 @@ static int spi_ppc4xx_setupxfer(struct spi_device *spi, struct spi_transfer *t)
         int scr;
         u8 cdm = 0;
         u32 speed;
-       u8 bits_per_word;
  
         /* Start with the generic configuration for this device. */
-       bits_per_word = spi->bits_per_word;
         speed = spi->max_speed_hz;
  
         /*
@@ -177,9 +177,6 @@ static int spi_ppc4xx_setupxfer(struct spi_device *spi, struct spi_transfer *t)
          * the transfer to overwrite the generic configuration with zeros.
          */
         if (t) {
-               if (t->bits_per_word)
-                       bits_per_word = t->bits_per_word;
-
                 if (t->speed_hz)
                         speed = min(t->speed_hz, spi->max_speed_hz);
         }
diff --git a/drivers/spi/spi-sh-msiof.c b/drivers/spi/spi-sh-msiof.c

index cfc3b1ddbd229f04885db1b610298e63b623132f..6f12e4fb2e2e184f1bb4cf9fe12e5437384fc4ac 100644 (file)
--- a/drivers/spi/spi-sh-msiof.c
+++ b/drivers/spi/spi-sh-msiof.c
@@ -136,14 +136,14 @@ struct sh_msiof_spi_priv {
  
  /* SIFCTR */
  #define SIFCTR_TFWM_MASK       GENMASK(31, 29) /* Transmit FIFO Watermark */
-#define SIFCTR_TFWM_64         (0 << 29)       /*  Transfer Request when 64 empty stages */
-#define SIFCTR_TFWM_32         (1 << 29)       /*  Transfer Request when 32 empty stages */
-#define SIFCTR_TFWM_24         (2 << 29)       /*  Transfer Request when 24 empty stages */
-#define SIFCTR_TFWM_16         (3 << 29)       /*  Transfer Request when 16 empty stages */
-#define SIFCTR_TFWM_12         (4 << 29)       /*  Transfer Request when 12 empty stages */
-#define SIFCTR_TFWM_8          (5 << 29)       /*  Transfer Request when 8 empty stages */
-#define SIFCTR_TFWM_4          (6 << 29)       /*  Transfer Request when 4 empty stages */
-#define SIFCTR_TFWM_1          (7 << 29)       /*  Transfer Request when 1 empty stage */
+#define SIFCTR_TFWM_64         (0UL << 29)     /*  Transfer Request when 64 empty stages */
+#define SIFCTR_TFWM_32         (1UL << 29)     /*  Transfer Request when 32 empty stages */
+#define SIFCTR_TFWM_24         (2UL << 29)     /*  Transfer Request when 24 empty stages */
+#define SIFCTR_TFWM_16         (3UL << 29)     /*  Transfer Request when 16 empty stages */
+#define SIFCTR_TFWM_12         (4UL << 29)     /*  Transfer Request when 12 empty stages */
+#define SIFCTR_TFWM_8          (5UL << 29)     /*  Transfer Request when 8 empty stages */
+#define SIFCTR_TFWM_4          (6UL << 29)     /*  Transfer Request when 4 empty stages */
+#define SIFCTR_TFWM_1          (7UL << 29)     /*  Transfer Request when 1 empty stage */
  #define SIFCTR_TFUA_MASK       GENMASK(26, 20) /* Transmit FIFO Usable Area */
  #define SIFCTR_TFUA_SHIFT      20
  #define SIFCTR_TFUA(i)         ((i) << SIFCTR_TFUA_SHIFT)
diff --git a/drivers/staging/iio/impedance-analyzer/ad5933.c b/drivers/staging/iio/impedance-analyzer/ad5933.c

index e748a5d04e970598c4bee3b52c19a57fcbbd1c12..9149d41fe65b7ed48785f80bc712902278eccec3 100644 (file)
--- a/drivers/staging/iio/impedance-analyzer/ad5933.c
+++ b/drivers/staging/iio/impedance-analyzer/ad5933.c
@@ -608,7 +608,7 @@ static void ad5933_work(struct work_struct *work)
                 struct ad5933_state, work.work);
         struct iio_dev *indio_dev = i2c_get_clientdata(st->client);
         __be16 buf[2];
-       int val[2];
+       u16 val[2];
         unsigned char status;
         int ret;
  
diff --git a/drivers/staging/media/atomisp/pci/atomisp_cmd.c b/drivers/staging/media/atomisp/pci/atomisp_cmd.c

index f44e6412f4e31a4ee3ab22cc650eee5a49ee31a1..d0db2efe004525e9d88531e0115c8edd2fa9c4df 100644 (file)
--- a/drivers/staging/media/atomisp/pci/atomisp_cmd.c
+++ b/drivers/staging/media/atomisp/pci/atomisp_cmd.c
@@ -3723,12 +3723,10 @@ apply_min_padding:
  
  static int atomisp_set_crop(struct atomisp_device *isp,
                             const struct v4l2_mbus_framefmt *format,
+                           struct v4l2_subdev_state *sd_state,
                             int which)
  {
         struct atomisp_input_subdev *input = &isp->inputs[isp->asd.input_curr];
-       struct v4l2_subdev_state pad_state = {
-               .pads = &input->pad_cfg,
-       };
         struct v4l2_subdev_selection sel = {
                 .which = which,
                 .target = V4L2_SEL_TGT_CROP,
@@ -3754,7 +3752,7 @@ static int atomisp_set_crop(struct atomisp_device *isp,
         sel.r.left = ((input->native_rect.width - sel.r.width) / 2) & ~1;
         sel.r.top = ((input->native_rect.height - sel.r.height) / 2) & ~1;
  
-       ret = v4l2_subdev_call(input->camera, pad, set_selection, &pad_state, &sel);
+       ret = v4l2_subdev_call(input->camera, pad, set_selection, sd_state, &sel);
         if (ret)
                 dev_err(isp->dev, "Error setting crop to %ux%u @%ux%u: %d\n",
                         sel.r.width, sel.r.height, sel.r.left, sel.r.top, ret);
@@ -3770,9 +3768,6 @@ int atomisp_try_fmt(struct atomisp_device *isp, struct v4l2_pix_format *f,
         const struct atomisp_format_bridge *fmt, *snr_fmt;
         struct atomisp_sub_device *asd = &isp->asd;
         struct atomisp_input_subdev *input = &isp->inputs[asd->input_curr];
-       struct v4l2_subdev_state pad_state = {
-               .pads = &input->pad_cfg,
-       };
         struct v4l2_subdev_format format = {
                 .which = V4L2_SUBDEV_FORMAT_TRY,
         };
@@ -3809,11 +3804,16 @@ int atomisp_try_fmt(struct atomisp_device *isp, struct v4l2_pix_format *f,
         dev_dbg(isp->dev, "try_mbus_fmt: asking for %ux%u\n",
                 format.format.width, format.format.height);
  
-       ret = atomisp_set_crop(isp, &format.format, V4L2_SUBDEV_FORMAT_TRY);
-       if (ret)
-               return ret;
+       v4l2_subdev_lock_state(input->try_sd_state);
+
+       ret = atomisp_set_crop(isp, &format.format, input->try_sd_state,
+                              V4L2_SUBDEV_FORMAT_TRY);
+       if (ret == 0)
+               ret = v4l2_subdev_call(input->camera, pad, set_fmt,
+                                      input->try_sd_state, &format);
+
+       v4l2_subdev_unlock_state(input->try_sd_state);
  
-       ret = v4l2_subdev_call(input->camera, pad, set_fmt, &pad_state, &format);
         if (ret)
                 return ret;
  
@@ -4238,9 +4238,7 @@ static int atomisp_set_fmt_to_snr(struct video_device *vdev, const struct v4l2_p
         struct atomisp_device *isp = asd->isp;
         struct atomisp_input_subdev *input = &isp->inputs[asd->input_curr];
         const struct atomisp_format_bridge *format;
-       struct v4l2_subdev_state pad_state = {
-               .pads = &input->pad_cfg,
-       };
+       struct v4l2_subdev_state *act_sd_state;
         struct v4l2_subdev_format vformat = {
                 .which = V4L2_SUBDEV_FORMAT_TRY,
         };
@@ -4268,12 +4266,18 @@ static int atomisp_set_fmt_to_snr(struct video_device *vdev, const struct v4l2_p
  
         /* Disable dvs if resolution can't be supported by sensor */
         if (asd->params.video_dis_en && asd->run_mode->val == ATOMISP_RUN_MODE_VIDEO) {
-               ret = atomisp_set_crop(isp, &vformat.format, V4L2_SUBDEV_FORMAT_TRY);
-               if (ret)
-                       return ret;
+               v4l2_subdev_lock_state(input->try_sd_state);
+
+               ret = atomisp_set_crop(isp, &vformat.format, input->try_sd_state,
+                                      V4L2_SUBDEV_FORMAT_TRY);
+               if (ret == 0) {
+                       vformat.which = V4L2_SUBDEV_FORMAT_TRY;
+                       ret = v4l2_subdev_call(input->camera, pad, set_fmt,
+                                              input->try_sd_state, &vformat);
+               }
+
+               v4l2_subdev_unlock_state(input->try_sd_state);
  
-               vformat.which = V4L2_SUBDEV_FORMAT_TRY;
-               ret = v4l2_subdev_call(input->camera, pad, set_fmt, &pad_state, &vformat);
                 if (ret)
                         return ret;
  
@@ -4291,12 +4295,18 @@ static int atomisp_set_fmt_to_snr(struct video_device *vdev, const struct v4l2_p
                 }
         }
  
-       ret = atomisp_set_crop(isp, &vformat.format, V4L2_SUBDEV_FORMAT_ACTIVE);
-       if (ret)
-               return ret;
+       act_sd_state = v4l2_subdev_lock_and_get_active_state(input->camera);
+
+       ret = atomisp_set_crop(isp, &vformat.format, act_sd_state,
+                              V4L2_SUBDEV_FORMAT_ACTIVE);
+       if (ret == 0) {
+               vformat.which = V4L2_SUBDEV_FORMAT_ACTIVE;
+               ret = v4l2_subdev_call(input->camera, pad, set_fmt, act_sd_state, &vformat);
+       }
+
+       if (act_sd_state)
+               v4l2_subdev_unlock_state(act_sd_state);
  
-       vformat.which = V4L2_SUBDEV_FORMAT_ACTIVE;
-       ret = v4l2_subdev_call(input->camera, pad, set_fmt, NULL, &vformat);
         if (ret)
                 return ret;
  
diff --git a/drivers/staging/media/atomisp/pci/atomisp_internal.h b/drivers/staging/media/atomisp/pci/atomisp_internal.h

index f7b4bee9574bdb8ea330bec4e04074515e71f5a5..d5b077e602caec6ac2863780f660f7aac751ff02 100644 (file)
--- a/drivers/staging/media/atomisp/pci/atomisp_internal.h
+++ b/drivers/staging/media/atomisp/pci/atomisp_internal.h
@@ -132,8 +132,8 @@ struct atomisp_input_subdev {
         /* Sensor rects for sensors which support crop */
         struct v4l2_rect native_rect;
         struct v4l2_rect active_rect;
-       /* Sensor pad_cfg for which == V4L2_SUBDEV_FORMAT_TRY calls */
-       struct v4l2_subdev_pad_config pad_cfg;
+       /* Sensor state for which == V4L2_SUBDEV_FORMAT_TRY calls */
+       struct v4l2_subdev_state *try_sd_state;
  
         struct v4l2_subdev *motor;
  
diff --git a/drivers/staging/media/atomisp/pci/atomisp_ioctl.c b/drivers/staging/media/atomisp/pci/atomisp_ioctl.c

index 01b7fa9b56a21378459f3aa4101eab6195558546..5b2d88c02d36a083376ee21660923635e9ff70c4 100644 (file)
--- a/drivers/staging/media/atomisp/pci/atomisp_ioctl.c
+++ b/drivers/staging/media/atomisp/pci/atomisp_ioctl.c
@@ -781,12 +781,20 @@ static int atomisp_enum_framesizes(struct file *file, void *priv,
                 .which = V4L2_SUBDEV_FORMAT_ACTIVE,
                 .code = input->code,
         };
+       struct v4l2_subdev_state *act_sd_state;
         int ret;
  
+       if (!input->camera)
+               return -EINVAL;
+
         if (input->crop_support)
                 return atomisp_enum_framesizes_crop(isp, fsize);
  
-       ret = v4l2_subdev_call(input->camera, pad, enum_frame_size, NULL, &fse);
+       act_sd_state = v4l2_subdev_lock_and_get_active_state(input->camera);
+       ret = v4l2_subdev_call(input->camera, pad, enum_frame_size,
+                              act_sd_state, &fse);
+       if (act_sd_state)
+               v4l2_subdev_unlock_state(act_sd_state);
         if (ret)
                 return ret;
  
@@ -803,18 +811,25 @@ static int atomisp_enum_frameintervals(struct file *file, void *priv,
         struct video_device *vdev = video_devdata(file);
         struct atomisp_device *isp = video_get_drvdata(vdev);
         struct atomisp_sub_device *asd = atomisp_to_video_pipe(vdev)->asd;
+       struct atomisp_input_subdev *input = &isp->inputs[asd->input_curr];
         struct v4l2_subdev_frame_interval_enum fie = {
-               .code   = atomisp_in_fmt_conv[0].code,
+               .code = atomisp_in_fmt_conv[0].code,
                 .index = fival->index,
                 .width = fival->width,
                 .height = fival->height,
                 .which = V4L2_SUBDEV_FORMAT_ACTIVE,
         };
+       struct v4l2_subdev_state *act_sd_state;
         int ret;
  
-       ret = v4l2_subdev_call(isp->inputs[asd->input_curr].camera,
-                              pad, enum_frame_interval, NULL,
-                              &fie);
+       if (!input->camera)
+               return -EINVAL;
+
+       act_sd_state = v4l2_subdev_lock_and_get_active_state(input->camera);
+       ret = v4l2_subdev_call(input->camera, pad, enum_frame_interval,
+                              act_sd_state, &fie);
+       if (act_sd_state)
+               v4l2_subdev_unlock_state(act_sd_state);
         if (ret)
                 return ret;
  
@@ -830,30 +845,25 @@ static int atomisp_enum_fmt_cap(struct file *file, void *fh,
         struct video_device *vdev = video_devdata(file);
         struct atomisp_device *isp = video_get_drvdata(vdev);
         struct atomisp_sub_device *asd = atomisp_to_video_pipe(vdev)->asd;
+       struct atomisp_input_subdev *input = &isp->inputs[asd->input_curr];
         struct v4l2_subdev_mbus_code_enum code = {
                 .which = V4L2_SUBDEV_FORMAT_ACTIVE,
         };
         const struct atomisp_format_bridge *format;
-       struct v4l2_subdev *camera;
+       struct v4l2_subdev_state *act_sd_state;
         unsigned int i, fi = 0;
-       int rval;
+       int ret;
  
-       camera = isp->inputs[asd->input_curr].camera;
-       if(!camera) {
-               dev_err(isp->dev, "%s(): camera is NULL, device is %s\n",
-                       __func__, vdev->name);
+       if (!input->camera)
                 return -EINVAL;
-       }
  
-       rval = v4l2_subdev_call(camera, pad, enum_mbus_code, NULL, &code);
-       if (rval == -ENOIOCTLCMD) {
-               dev_warn(isp->dev,
-                        "enum_mbus_code pad op not supported by %s. Please fix your sensor driver!\n",
-                        camera->name);
-       }
-
-       if (rval)
-               return rval;
+       act_sd_state = v4l2_subdev_lock_and_get_active_state(input->camera);
+       ret = v4l2_subdev_call(input->camera, pad, enum_mbus_code,
+                              act_sd_state, &code);
+       if (act_sd_state)
+               v4l2_subdev_unlock_state(act_sd_state);
+       if (ret)
+               return ret;
  
         for (i = 0; i < ARRAY_SIZE(atomisp_output_fmts); i++) {
                 format = &atomisp_output_fmts[i];
diff --git a/drivers/staging/media/atomisp/pci/atomisp_v4l2.c b/drivers/staging/media/atomisp/pci/atomisp_v4l2.c

index c1c8501ec61f57046af5027c8f9a4170597210d5..547e1444ad9733569816c1e43c74e542089f82fa 100644 (file)
--- a/drivers/staging/media/atomisp/pci/atomisp_v4l2.c
+++ b/drivers/staging/media/atomisp/pci/atomisp_v4l2.c
@@ -862,6 +862,9 @@ static void atomisp_unregister_entities(struct atomisp_device *isp)
         v4l2_device_unregister(&isp->v4l2_dev);
         media_device_unregister(&isp->media_dev);
         media_device_cleanup(&isp->media_dev);
+
+       for (i = 0; i < isp->input_cnt; i++)
+               __v4l2_subdev_state_free(isp->inputs[i].try_sd_state);
  }
  
  static int atomisp_register_entities(struct atomisp_device *isp)
@@ -933,32 +936,49 @@ v4l2_device_failed:
  
  static void atomisp_init_sensor(struct atomisp_input_subdev *input)
  {
+       static struct lock_class_key try_sd_state_key;
         struct v4l2_subdev_mbus_code_enum mbus_code_enum = { };
         struct v4l2_subdev_frame_size_enum fse = { };
-       struct v4l2_subdev_state sd_state = {
-               .pads = &input->pad_cfg,
-       };
         struct v4l2_subdev_selection sel = { };
+       struct v4l2_subdev_state *try_sd_state, *act_sd_state;
         int i, err;
  
+       /*
+        * FIXME: Drivers are not supposed to use __v4l2_subdev_state_alloc()
+        * but atomisp needs this for try_fmt on its /dev/video# node since
+        * it emulates a normal v4l2 device there, passing through try_fmt /
+        * set_fmt to the sensor.
+        */
+       try_sd_state = __v4l2_subdev_state_alloc(input->camera,
+                               "atomisp:try_sd_state->lock", &try_sd_state_key);
+       if (IS_ERR(try_sd_state))
+               return;
+
+       input->try_sd_state = try_sd_state;
+
+       act_sd_state = v4l2_subdev_lock_and_get_active_state(input->camera);
+
         mbus_code_enum.which = V4L2_SUBDEV_FORMAT_ACTIVE;
-       err = v4l2_subdev_call(input->camera, pad, enum_mbus_code, NULL, &mbus_code_enum);
+       err = v4l2_subdev_call(input->camera, pad, enum_mbus_code,
+                              act_sd_state, &mbus_code_enum);
         if (!err)
                 input->code = mbus_code_enum.code;
  
         sel.which = V4L2_SUBDEV_FORMAT_ACTIVE;
         sel.target = V4L2_SEL_TGT_NATIVE_SIZE;
-       err = v4l2_subdev_call(input->camera, pad, get_selection, NULL, &sel);
+       err = v4l2_subdev_call(input->camera, pad, get_selection,
+                              act_sd_state, &sel);
         if (err)
-               return;
+               goto unlock_act_sd_state;
  
         input->native_rect = sel.r;
  
         sel.which = V4L2_SUBDEV_FORMAT_ACTIVE;
         sel.target = V4L2_SEL_TGT_CROP_DEFAULT;
-       err = v4l2_subdev_call(input->camera, pad, get_selection, NULL, &sel);
+       err = v4l2_subdev_call(input->camera, pad, get_selection,
+                              act_sd_state, &sel);
         if (err)
-               return;
+               goto unlock_act_sd_state;
  
         input->active_rect = sel.r;
  
@@ -973,7 +993,8 @@ static void atomisp_init_sensor(struct atomisp_input_subdev *input)
                 fse.code = input->code;
                 fse.which = V4L2_SUBDEV_FORMAT_ACTIVE;
  
-               err = v4l2_subdev_call(input->camera, pad, enum_frame_size, NULL, &fse);
+               err = v4l2_subdev_call(input->camera, pad, enum_frame_size,
+                                      act_sd_state, &fse);
                 if (err)
                         break;
  
@@ -989,22 +1010,26 @@ static void atomisp_init_sensor(struct atomisp_input_subdev *input)
          * for padding, set the crop rect to cover the entire sensor instead
          * of only the default active area.
          *
-        * Do this for both try and active formats since the try_crop rect in
-        * pad_cfg may influence (clamp) future try_fmt calls with which == try.
+        * Do this for both try and active formats since the crop rect in
+        * try_sd_state may influence (clamp size) in calls with which == try.
          */
         sel.which = V4L2_SUBDEV_FORMAT_TRY;
         sel.target = V4L2_SEL_TGT_CROP;
         sel.r = input->native_rect;
-       err = v4l2_subdev_call(input->camera, pad, set_selection, &sd_state, &sel);
+       v4l2_subdev_lock_state(input->try_sd_state);
+       err = v4l2_subdev_call(input->camera, pad, set_selection,
+                              input->try_sd_state, &sel);
+       v4l2_subdev_unlock_state(input->try_sd_state);
         if (err)
-               return;
+               goto unlock_act_sd_state;
  
         sel.which = V4L2_SUBDEV_FORMAT_ACTIVE;
         sel.target = V4L2_SEL_TGT_CROP;
         sel.r = input->native_rect;
-       err = v4l2_subdev_call(input->camera, pad, set_selection, NULL, &sel);
+       err = v4l2_subdev_call(input->camera, pad, set_selection,
+                              act_sd_state, &sel);
         if (err)
-               return;
+               goto unlock_act_sd_state;
  
         dev_info(input->camera->dev, "Supports crop native %dx%d active %dx%d binning %d\n",
                  input->native_rect.width, input->native_rect.height,
@@ -1012,6 +1037,10 @@ static void atomisp_init_sensor(struct atomisp_input_subdev *input)
                  input->binning_support);
  
         input->crop_support = true;
+
+unlock_act_sd_state:
+       if (act_sd_state)
+               v4l2_subdev_unlock_state(act_sd_state);
  }
  
  int atomisp_register_device_nodes(struct atomisp_device *isp)
diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c

index a5f58988130a15c921e45570298edfbe212273ba..c1fbcdd1618264f0cd09f5e4078ac600ad6dc22a 100644 (file)
--- a/drivers/target/target_core_configfs.c
+++ b/drivers/target/target_core_configfs.c
@@ -759,6 +759,29 @@ static ssize_t emulate_tas_store(struct config_item *item,
         return count;
  }
  
+static int target_try_configure_unmap(struct se_device *dev,
+                                     const char *config_opt)
+{
+       if (!dev->transport->configure_unmap) {
+               pr_err("Generic Block Discard not supported\n");
+               return -ENOSYS;
+       }
+
+       if (!target_dev_configured(dev)) {
+               pr_err("Generic Block Discard setup for %s requires device to be configured\n",
+                      config_opt);
+               return -ENODEV;
+       }
+
+       if (!dev->transport->configure_unmap(dev)) {
+               pr_err("Generic Block Discard setup for %s failed\n",
+                      config_opt);
+               return -ENOSYS;
+       }
+
+       return 0;
+}
+
  static ssize_t emulate_tpu_store(struct config_item *item,
                 const char *page, size_t count)
  {
@@ -776,11 +799,9 @@ static ssize_t emulate_tpu_store(struct config_item *item,
          * Discard supported is detected iblock_create_virtdevice().
          */
         if (flag && !da->max_unmap_block_desc_count) {
-               if (!dev->transport->configure_unmap ||
-                   !dev->transport->configure_unmap(dev)) {
-                       pr_err("Generic Block Discard not supported\n");
-                       return -ENOSYS;
-               }
+               ret = target_try_configure_unmap(dev, "emulate_tpu");
+               if (ret)
+                       return ret;
         }
  
         da->emulate_tpu = flag;
@@ -806,11 +827,9 @@ static ssize_t emulate_tpws_store(struct config_item *item,
          * Discard supported is detected iblock_create_virtdevice().
          */
         if (flag && !da->max_unmap_block_desc_count) {
-               if (!dev->transport->configure_unmap ||
-                   !dev->transport->configure_unmap(dev)) {
-                       pr_err("Generic Block Discard not supported\n");
-                       return -ENOSYS;
-               }
+               ret = target_try_configure_unmap(dev, "emulate_tpws");
+               if (ret)
+                       return ret;
         }
  
         da->emulate_tpws = flag;
@@ -1022,12 +1041,9 @@ static ssize_t unmap_zeroes_data_store(struct config_item *item,
          * Discard supported is detected iblock_configure_device().
          */
         if (flag && !da->max_unmap_block_desc_count) {
-               if (!dev->transport->configure_unmap ||
-                   !dev->transport->configure_unmap(dev)) {
-                       pr_err("dev[%p]: Thin Provisioning LBPRZ will not be set because max_unmap_block_desc_count is zero\n",
-                              da->da_dev);
-                       return -ENOSYS;
-               }
+               ret = target_try_configure_unmap(dev, "unmap_zeroes_data");
+               if (ret)
+                       return ret;
         }
         da->unmap_zeroes_data = flag;
         pr_debug("dev[%p]: SE Device Thin Provisioning LBPRZ bit: %d\n",
diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c

index 41b7489d37ce95e059ec4849ae7039949c6e6ff1..ed4fd22eac6e0412821a11b1914e4f8e43153576 100644 (file)
--- a/drivers/target/target_core_pscsi.c
+++ b/drivers/target/target_core_pscsi.c
@@ -907,12 +907,15 @@ new_bio:
  
         return 0;
  fail:
-       if (bio)
-               bio_put(bio);
+       if (bio) {
+               bio_uninit(bio);
+               kfree(bio);
+       }
         while (req->bio) {
                 bio = req->bio;
                 req->bio = bio->bi_next;
-               bio_put(bio);
+               bio_uninit(bio);
+               kfree(bio);
         }
         req->biotail = NULL;
         return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
diff --git a/drivers/thunderbolt/tb_regs.h b/drivers/thunderbolt/tb_regs.h

index 87e4795275fe6772e0497e8c50650d4a1835b1af..6f798f6a2b8488ca5011fe813733ffc8b5942f48 100644 (file)
--- a/drivers/thunderbolt/tb_regs.h
+++ b/drivers/thunderbolt/tb_regs.h
@@ -203,7 +203,7 @@ struct tb_regs_switch_header {
  #define ROUTER_CS_5_WOP                                BIT(1)
  #define ROUTER_CS_5_WOU                                BIT(2)
  #define ROUTER_CS_5_WOD                                BIT(3)
-#define ROUTER_CS_5_C3S                                BIT(23)
+#define ROUTER_CS_5_CNS                                BIT(23)
  #define ROUTER_CS_5_PTO                                BIT(24)
  #define ROUTER_CS_5_UTO                                BIT(25)
  #define ROUTER_CS_5_HCO                                BIT(26)
diff --git a/drivers/thunderbolt/usb4.c b/drivers/thunderbolt/usb4.c

index f8f0d24ff6e4629856ea8a59b3941b79633d1781..1515eff8cc3e23434202fead2a9aa038080111d6 100644 (file)
--- a/drivers/thunderbolt/usb4.c
+++ b/drivers/thunderbolt/usb4.c
@@ -290,7 +290,7 @@ int usb4_switch_setup(struct tb_switch *sw)
         }
  
         /* TBT3 supported by the CM */
-       val |= ROUTER_CS_5_C3S;
+       val &= ~ROUTER_CS_5_CNS;
  
         return tb_sw_write(sw, &val, TB_CFG_SWITCH, ROUTER_CS_5, 1);
  }
diff --git a/drivers/tty/hvc/Kconfig b/drivers/tty/hvc/Kconfig

index 6e05c5c7bca1ad258502eaf158b94534c1cdd23d..c2a4e88b328f35888cb44c0fe1ca5f57f2040e66 100644 (file)
--- a/drivers/tty/hvc/Kconfig
+++ b/drivers/tty/hvc/Kconfig
@@ -108,13 +108,15 @@ config HVC_DCC_SERIALIZE_SMP
  
  config HVC_RISCV_SBI
         bool "RISC-V SBI console support"
-       depends on RISCV_SBI
+       depends on RISCV_SBI && NONPORTABLE
         select HVC_DRIVER
         help
           This enables support for console output via RISC-V SBI calls, which
-         is normally used only during boot to output printk.
+         is normally used only during boot to output printk.  This driver
+         conflicts with real console drivers and should not be enabled on
+         systems that directly access the console.
  
-         If you don't know what do to here, say Y.
+         If you don't know what do to here, say N.
  
  config HVCS
         tristate "IBM Hypervisor Virtual Console Server support"
diff --git a/drivers/tty/serial/8250/8250_pci1xxxx.c b/drivers/tty/serial/8250/8250_pci1xxxx.c

index 558c4c7f3104ead7e7fe420c2634428651c88dd2..2dda737b1660bd7bd6f576d2146440d97e945047 100644 (file)
--- a/drivers/tty/serial/8250/8250_pci1xxxx.c
+++ b/drivers/tty/serial/8250/8250_pci1xxxx.c
@@ -311,7 +311,7 @@ static void pci1xxxx_process_read_data(struct uart_port *port,
         }
  
         while (*valid_byte_count) {
-               if (*buff_index > RX_BUF_SIZE)
+               if (*buff_index >= RX_BUF_SIZE)
                         break;
                 rx_buff[*buff_index] = readb(port->membase +
                                              UART_RX_BYTE_FIFO);
diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c

index fccec1698a54104c1487ea65536dce7729123c61..cf2c890a560f05204e249b931668deca04b3cb27 100644 (file)
--- a/drivers/tty/serial/amba-pl011.c
+++ b/drivers/tty/serial/amba-pl011.c
@@ -1339,11 +1339,41 @@ static void pl011_start_tx_pio(struct uart_amba_port *uap)
         }
  }
  
+static void pl011_rs485_tx_start(struct uart_amba_port *uap)
+{
+       struct uart_port *port = &uap->port;
+       u32 cr;
+
+       /* Enable transmitter */
+       cr = pl011_read(uap, REG_CR);
+       cr |= UART011_CR_TXE;
+
+       /* Disable receiver if half-duplex */
+       if (!(port->rs485.flags & SER_RS485_RX_DURING_TX))
+               cr &= ~UART011_CR_RXE;
+
+       if (port->rs485.flags & SER_RS485_RTS_ON_SEND)
+               cr &= ~UART011_CR_RTS;
+       else
+               cr |= UART011_CR_RTS;
+
+       pl011_write(cr, uap, REG_CR);
+
+       if (port->rs485.delay_rts_before_send)
+               mdelay(port->rs485.delay_rts_before_send);
+
+       uap->rs485_tx_started = true;
+}
+
  static void pl011_start_tx(struct uart_port *port)
  {
         struct uart_amba_port *uap =
             container_of(port, struct uart_amba_port, port);
  
+       if ((uap->port.rs485.flags & SER_RS485_ENABLED) &&
+           !uap->rs485_tx_started)
+               pl011_rs485_tx_start(uap);
+
         if (!pl011_dma_tx_start(uap))
                 pl011_start_tx_pio(uap);
  }
@@ -1424,42 +1454,12 @@ static bool pl011_tx_char(struct uart_amba_port *uap, unsigned char c,
         return true;
  }
  
-static void pl011_rs485_tx_start(struct uart_amba_port *uap)
-{
-       struct uart_port *port = &uap->port;
-       u32 cr;
-
-       /* Enable transmitter */
-       cr = pl011_read(uap, REG_CR);
-       cr |= UART011_CR_TXE;
-
-       /* Disable receiver if half-duplex */
-       if (!(port->rs485.flags & SER_RS485_RX_DURING_TX))
-               cr &= ~UART011_CR_RXE;
-
-       if (port->rs485.flags & SER_RS485_RTS_ON_SEND)
-               cr &= ~UART011_CR_RTS;
-       else
-               cr |= UART011_CR_RTS;
-
-       pl011_write(cr, uap, REG_CR);
-
-       if (port->rs485.delay_rts_before_send)
-               mdelay(port->rs485.delay_rts_before_send);
-
-       uap->rs485_tx_started = true;
-}
-
  /* Returns true if tx interrupts have to be (kept) enabled  */
  static bool pl011_tx_chars(struct uart_amba_port *uap, bool from_irq)
  {
         struct circ_buf *xmit = &uap->port.state->xmit;
         int count = uap->fifosize >> 1;
  
-       if ((uap->port.rs485.flags & SER_RS485_ENABLED) &&
-           !uap->rs485_tx_started)
-               pl011_rs485_tx_start(uap);
-
         if (uap->port.x_char) {
                 if (!pl011_tx_char(uap, uap->port.x_char, from_irq))
                         return true;
diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c

index f3a99daebdaa0e59d0d81211fad5a1f85947b011..10bf6d75bf9ee7f9ee13a36796a8af5439ac5eb9 100644 (file)
--- a/drivers/tty/serial/max310x.c
+++ b/drivers/tty/serial/max310x.c
@@ -237,6 +237,14 @@
  #define MAX310x_REV_MASK               (0xf8)
  #define MAX310X_WRITE_BIT              0x80
  
+/* Port startup definitions */
+#define MAX310X_PORT_STARTUP_WAIT_RETRIES      20 /* Number of retries */
+#define MAX310X_PORT_STARTUP_WAIT_DELAY_MS     10 /* Delay between retries */
+
+/* Crystal-related definitions */
+#define MAX310X_XTAL_WAIT_RETRIES      20 /* Number of retries */
+#define MAX310X_XTAL_WAIT_DELAY_MS     10 /* Delay between retries */
+
  /* MAX3107 specific */
  #define MAX3107_REV_ID                 (0xa0)
  
@@ -583,7 +591,7 @@ static int max310x_update_best_err(unsigned long f, long *besterr)
         return 1;
  }
  
-static u32 max310x_set_ref_clk(struct device *dev, struct max310x_port *s,
+static s32 max310x_set_ref_clk(struct device *dev, struct max310x_port *s,
                                unsigned long freq, bool xtal)
  {
         unsigned int div, clksrc, pllcfg = 0;
@@ -641,12 +649,20 @@ static u32 max310x_set_ref_clk(struct device *dev, struct max310x_port *s,
  
         /* Wait for crystal */
         if (xtal) {
-               unsigned int val;
-               msleep(10);
-               regmap_read(s->regmap, MAX310X_STS_IRQSTS_REG, &val);
-               if (!(val & MAX310X_STS_CLKREADY_BIT)) {
-                       dev_warn(dev, "clock is not stable yet\n");
-               }
+               bool stable = false;
+               unsigned int try = 0, val = 0;
+
+               do {
+                       msleep(MAX310X_XTAL_WAIT_DELAY_MS);
+                       regmap_read(s->regmap, MAX310X_STS_IRQSTS_REG, &val);
+
+                       if (val & MAX310X_STS_CLKREADY_BIT)
+                               stable = true;
+               } while (!stable && (++try < MAX310X_XTAL_WAIT_RETRIES));
+
+               if (!stable)
+                       return dev_err_probe(dev, -EAGAIN,
+                                            "clock is not stable\n");
         }
  
         return bestfreq;
@@ -1271,7 +1287,7 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty
  {
         int i, ret, fmin, fmax, freq;
         struct max310x_port *s;
-       u32 uartclk = 0;
+       s32 uartclk = 0;
         bool xtal;
  
         for (i = 0; i < devtype->nr; i++)
@@ -1334,6 +1350,9 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty
                 goto out_clk;
  
         for (i = 0; i < devtype->nr; i++) {
+               bool started = false;
+               unsigned int try = 0, val = 0;
+
                 /* Reset port */
                 regmap_write(regmaps[i], MAX310X_MODE2_REG,
                              MAX310X_MODE2_RST_BIT);
@@ -1342,13 +1361,27 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty
  
                 /* Wait for port startup */
                 do {
-                       regmap_read(regmaps[i], MAX310X_BRGDIVLSB_REG, &ret);
-               } while (ret != 0x01);
+                       msleep(MAX310X_PORT_STARTUP_WAIT_DELAY_MS);
+                       regmap_read(regmaps[i], MAX310X_BRGDIVLSB_REG, &val);
+
+                       if (val == 0x01)
+                               started = true;
+               } while (!started && (++try < MAX310X_PORT_STARTUP_WAIT_RETRIES));
+
+               if (!started) {
+                       ret = dev_err_probe(dev, -EAGAIN, "port reset failed\n");
+                       goto out_uart;
+               }
  
                 regmap_write(regmaps[i], MAX310X_MODE1_REG, devtype->mode1);
         }
  
         uartclk = max310x_set_ref_clk(dev, s, freq, xtal);
+       if (uartclk < 0) {
+               ret = uartclk;
+               goto out_uart;
+       }
+
         dev_dbg(dev, "Reference clock set to %i Hz\n", uartclk);
  
         for (i = 0; i < devtype->nr; i++) {
diff --git a/drivers/tty/serial/mxs-auart.c b/drivers/tty/serial/mxs-auart.c

index 3ec725555bcc1e6eb2843a186a5c49cba1ab0b42..4749331fe618cad7c0af98630f90021b8244bd07 100644 (file)
--- a/drivers/tty/serial/mxs-auart.c
+++ b/drivers/tty/serial/mxs-auart.c
@@ -605,13 +605,16 @@ static void mxs_auart_tx_chars(struct mxs_auart_port *s)
                 return;
         }
  
-       pending = uart_port_tx(&s->port, ch,
+       pending = uart_port_tx_flags(&s->port, ch, UART_TX_NOSTOP,
                 !(mxs_read(s, REG_STAT) & AUART_STAT_TXFF),
                 mxs_write(ch, s, REG_DATA));
         if (pending)
                 mxs_set(AUART_INTR_TXIEN, s, REG_INTR);
         else
                 mxs_clr(AUART_INTR_TXIEN, s, REG_INTR);
+
+       if (uart_tx_stopped(&s->port))
+               mxs_auart_stop_tx(&s->port);
  }
  
  static void mxs_auart_rx_char(struct mxs_auart_port *s)
diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c

index b56ed8c376b22fc5ec8c0e833a39dd976a7a58da..d6a58a9e072a1dad7938fbb53627f4d5e5374adc 100644 (file)
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -1084,8 +1084,8 @@ static int uart_tiocmget(struct tty_struct *tty)
                 goto out;
  
         if (!tty_io_error(tty)) {
-               result = uport->mctrl;
                 uart_port_lock_irq(uport);
+               result = uport->mctrl;
                 result |= uport->ops->get_mctrl(uport);
                 uart_port_unlock_irq(uport);
         }
diff --git a/drivers/tty/serial/stm32-usart.c b/drivers/tty/serial/stm32-usart.c

index 794b7751274034848c65a7e3374b694bcf61c42d..693e932d6feb5842467d1408e04c8d574342cb1f 100644 (file)
--- a/drivers/tty/serial/stm32-usart.c
+++ b/drivers/tty/serial/stm32-usart.c
@@ -251,7 +251,9 @@ static int stm32_usart_config_rs485(struct uart_port *port, struct ktermios *ter
                 writel_relaxed(cr3, port->membase + ofs->cr3);
                 writel_relaxed(cr1, port->membase + ofs->cr1);
  
-               rs485conf->flags |= SER_RS485_RX_DURING_TX;
+               if (!port->rs485_rx_during_tx_gpio)
+                       rs485conf->flags |= SER_RS485_RX_DURING_TX;
+
         } else {
                 stm32_usart_clr_bits(port, ofs->cr3,
                                      USART_CR3_DEM | USART_CR3_DEP);
diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c

index 029d017fc1b66b5c6695096016b54983e26b3e5f..3b89c9d4aa404e48f7f5e95c545652c40ad13127 100644 (file)
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -1469,7 +1469,7 @@ static int ufshcd_devfreq_target(struct device *dev,
         int ret = 0;
         struct ufs_hba *hba = dev_get_drvdata(dev);
         ktime_t start;
-       bool scale_up, sched_clk_scaling_suspend_work = false;
+       bool scale_up = false, sched_clk_scaling_suspend_work = false;
         struct list_head *clk_list = &hba->clk_list_head;
         struct ufs_clk_info *clki;
         unsigned long irq_flags;
@@ -3057,7 +3057,7 @@ bool ufshcd_cmd_inflight(struct scsi_cmnd *cmd)
   */
  static int ufshcd_clear_cmd(struct ufs_hba *hba, u32 task_tag)
  {
-       u32 mask = 1U << task_tag;
+       u32 mask;
         unsigned long flags;
         int err;
  
@@ -3075,6 +3075,8 @@ static int ufshcd_clear_cmd(struct ufs_hba *hba, u32 task_tag)
                 return 0;
         }
  
+       mask = 1U << task_tag;
+
         /* clear outstanding transaction before retry */
         spin_lock_irqsave(hba->host->host_lock, flags);
         ufshcd_utrl_clear(hba, mask);
@@ -6352,7 +6354,6 @@ static void ufshcd_err_handling_prepare(struct ufs_hba *hba)
                 ufshcd_hold(hba);
                 if (!ufshcd_is_clkgating_allowed(hba))
                         ufshcd_setup_clocks(hba, true);
-               ufshcd_release(hba);
                 pm_op = hba->is_sys_suspended ? UFS_SYSTEM_PM : UFS_RUNTIME_PM;
                 ufshcd_vops_resume(hba, pm_op);
         } else {
diff --git a/drivers/usb/cdns3/cdns3-gadget.c b/drivers/usb/cdns3/cdns3-gadget.c

index aeca902ab6cc427b0946cf13ea9b8c725eb3f287..fd1beb10bba726cef258e7438d642f31d6567dfe 100644 (file)
--- a/drivers/usb/cdns3/cdns3-gadget.c
+++ b/drivers/usb/cdns3/cdns3-gadget.c
@@ -828,7 +828,11 @@ void cdns3_gadget_giveback(struct cdns3_endpoint *priv_ep,
                         return;
         }
  
-       if (request->complete) {
+       /*
+        * zlp request is appended by driver, needn't call usb_gadget_giveback_request() to notify
+        * gadget composite driver.
+        */
+       if (request->complete && request->buf != priv_dev->zlp_buf) {
                 spin_unlock(&priv_dev->lock);
                 usb_gadget_giveback_request(&priv_ep->endpoint,
                                             request);
@@ -2540,11 +2544,11 @@ static int cdns3_gadget_ep_disable(struct usb_ep *ep)
  
         while (!list_empty(&priv_ep->wa2_descmiss_req_list)) {
                 priv_req = cdns3_next_priv_request(&priv_ep->wa2_descmiss_req_list);
+               list_del_init(&priv_req->list);
  
                 kfree(priv_req->request.buf);
                 cdns3_gadget_ep_free_request(&priv_ep->endpoint,
                                              &priv_req->request);
-               list_del_init(&priv_req->list);
                 --priv_ep->wa2_counter;
         }
  
diff --git a/drivers/usb/cdns3/core.c b/drivers/usb/cdns3/core.c

index 33548771a0d3a7212781ff39814fedb7d01f0ab4..465e9267b49c12768ac72ecb818f731fc8787641 100644 (file)
--- a/drivers/usb/cdns3/core.c
+++ b/drivers/usb/cdns3/core.c
@@ -395,7 +395,6 @@ pm_put:
         return ret;
  }
  
-
  /**
   * cdns_wakeup_irq - interrupt handler for wakeup events
   * @irq: irq number for cdns3/cdnsp core device
diff --git a/drivers/usb/cdns3/drd.c b/drivers/usb/cdns3/drd.c

index 04b6d12f2b9a39b9bfad76fe1909b22f7c010990..ee917f1b091c893ebccad19bd5a62aea9e65c721 100644 (file)
--- a/drivers/usb/cdns3/drd.c
+++ b/drivers/usb/cdns3/drd.c
@@ -156,7 +156,8 @@ bool cdns_is_device(struct cdns *cdns)
   */
  static void cdns_otg_disable_irq(struct cdns *cdns)
  {
-       writel(0, &cdns->otg_irq_regs->ien);
+       if (cdns->version)
+               writel(0, &cdns->otg_irq_regs->ien);
  }
  
  /**
@@ -422,15 +423,20 @@ int cdns_drd_init(struct cdns *cdns)
  
                 cdns->otg_regs = (void __iomem *)&cdns->otg_v1_regs->cmd;
  
-               if (readl(&cdns->otg_cdnsp_regs->did) == OTG_CDNSP_DID) {
+               state = readl(&cdns->otg_cdnsp_regs->did);
+
+               if (OTG_CDNSP_CHECK_DID(state)) {
                         cdns->otg_irq_regs = (struct cdns_otg_irq_regs __iomem *)
                                               &cdns->otg_cdnsp_regs->ien;
                         cdns->version  = CDNSP_CONTROLLER_V2;
-               } else {
+               } else if (OTG_CDNS3_CHECK_DID(state)) {
                         cdns->otg_irq_regs = (struct cdns_otg_irq_regs __iomem *)
                                               &cdns->otg_v1_regs->ien;
                         writel(1, &cdns->otg_v1_regs->simulate);
                         cdns->version  = CDNS3_CONTROLLER_V1;
+               } else {
+                       dev_err(cdns->dev, "not supporte DID=0x%08x\n", state);
+                       return -EINVAL;
                 }
  
                 dev_dbg(cdns->dev, "DRD version v1 (ID: %08x, rev: %08x)\n",
@@ -483,7 +489,6 @@ int cdns_drd_exit(struct cdns *cdns)
         return 0;
  }
  
-
  /* Indicate the cdns3 core was power lost before */
  bool cdns_power_is_lost(struct cdns *cdns)
  {
diff --git a/drivers/usb/cdns3/drd.h b/drivers/usb/cdns3/drd.h

index cbdf94f73ed917bb14baf23a9087b10aca2f7015..d72370c321d3929fc477854585d9e46be6848fef 100644 (file)
--- a/drivers/usb/cdns3/drd.h
+++ b/drivers/usb/cdns3/drd.h
@@ -79,7 +79,11 @@ struct cdnsp_otg_regs {
         __le32 susp_timing_ctrl;
  };
  
-#define OTG_CDNSP_DID  0x0004034E
+/* CDNSP driver supports 0x000403xx Cadence USB controller family. */
+#define OTG_CDNSP_CHECK_DID(did) (((did) & GENMASK(31, 8)) == 0x00040300)
+
+/* CDNS3 driver supports 0x000402xx Cadence USB controller family. */
+#define OTG_CDNS3_CHECK_DID(did) (((did) & GENMASK(31, 8)) == 0x00040200)
  
  /*
   * Common registers interface for both CDNS3 and CDNSP version of DRD.
diff --git a/drivers/usb/cdns3/host.c b/drivers/usb/cdns3/host.c

index 6164fc4c96a49b60b73f772bdc92b8acf383269c..ceca4d839dfd42b87167f4de3019ab63776fa6c2 100644 (file)
--- a/drivers/usb/cdns3/host.c
+++ b/drivers/usb/cdns3/host.c
@@ -18,6 +18,11 @@
  #include "../host/xhci.h"
  #include "../host/xhci-plat.h"
  
+/*
+ * The XECP_PORT_CAP_REG and XECP_AUX_CTRL_REG1 exist only
+ * in Cadence USB3 dual-role controller, so it can't be used
+ * with Cadence CDNSP dual-role controller.
+ */
  #define XECP_PORT_CAP_REG      0x8000
  #define XECP_AUX_CTRL_REG1     0x8120
  
@@ -57,6 +62,8 @@ static const struct xhci_plat_priv xhci_plat_cdns3_xhci = {
         .resume_quirk = xhci_cdns3_resume_quirk,
  };
  
+static const struct xhci_plat_priv xhci_plat_cdnsp_xhci;
+
  static int __cdns_host_init(struct cdns *cdns)
  {
         struct platform_device *xhci;
@@ -81,8 +88,13 @@ static int __cdns_host_init(struct cdns *cdns)
                 goto err1;
         }
  
-       cdns->xhci_plat_data = kmemdup(&xhci_plat_cdns3_xhci,
-                       sizeof(struct xhci_plat_priv), GFP_KERNEL);
+       if (cdns->version < CDNSP_CONTROLLER_V2)
+               cdns->xhci_plat_data = kmemdup(&xhci_plat_cdns3_xhci,
+                               sizeof(struct xhci_plat_priv), GFP_KERNEL);
+       else
+               cdns->xhci_plat_data = kmemdup(&xhci_plat_cdnsp_xhci,
+                               sizeof(struct xhci_plat_priv), GFP_KERNEL);
+
         if (!cdns->xhci_plat_data) {
                 ret = -ENOMEM;
                 goto err1;
diff --git a/drivers/usb/chipidea/ci.h b/drivers/usb/chipidea/ci.h

index d9bb3d3f026e68cae40de5dee4fa9d81ed391f10..2a38e1eb65466c82a6eb9e4f2feba8fc59ee7dfc 100644 (file)
--- a/drivers/usb/chipidea/ci.h
+++ b/drivers/usb/chipidea/ci.h
@@ -176,6 +176,7 @@ struct hw_bank {
   * @enabled_otg_timer_bits: bits of enabled otg timers
   * @next_otg_timer: next nearest enabled timer to be expired
   * @work: work for role changing
+ * @power_lost_work: work for power lost handling
   * @wq: workqueue thread
   * @qh_pool: allocation pool for queue heads
   * @td_pool: allocation pool for transfer descriptors
@@ -226,6 +227,7 @@ struct ci_hdrc {
         enum otg_fsm_timer              next_otg_timer;
         struct usb_role_switch          *role_switch;
         struct work_struct              work;
+       struct work_struct              power_lost_work;
         struct workqueue_struct         *wq;
  
         struct dma_pool                 *qh_pool;
diff --git a/drivers/usb/chipidea/core.c b/drivers/usb/chipidea/core.c

index 41014f93cfdf35ee42e859244e995a43ede7e777..835bf2428dc6eccee263b05024d42885884cd94d 100644 (file)
--- a/drivers/usb/chipidea/core.c
+++ b/drivers/usb/chipidea/core.c
@@ -856,6 +856,27 @@ static int ci_extcon_register(struct ci_hdrc *ci)
         return 0;
  }
  
+static void ci_power_lost_work(struct work_struct *work)
+{
+       struct ci_hdrc *ci = container_of(work, struct ci_hdrc, power_lost_work);
+       enum ci_role role;
+
+       disable_irq_nosync(ci->irq);
+       pm_runtime_get_sync(ci->dev);
+       if (!ci_otg_is_fsm_mode(ci)) {
+               role = ci_get_role(ci);
+
+               if (ci->role != role) {
+                       ci_handle_id_switch(ci);
+               } else if (role == CI_ROLE_GADGET) {
+                       if (ci->is_otg && hw_read_otgsc(ci, OTGSC_BSV))
+                               usb_gadget_vbus_connect(&ci->gadget);
+               }
+       }
+       pm_runtime_put_sync(ci->dev);
+       enable_irq(ci->irq);
+}
+
  static DEFINE_IDA(ci_ida);
  
  struct platform_device *ci_hdrc_add_device(struct device *dev,
@@ -1045,6 +1066,8 @@ static int ci_hdrc_probe(struct platform_device *pdev)
  
         spin_lock_init(&ci->lock);
         mutex_init(&ci->mutex);
+       INIT_WORK(&ci->power_lost_work, ci_power_lost_work);
+
         ci->dev = dev;
         ci->platdata = dev_get_platdata(dev);
         ci->imx28_write_fix = !!(ci->platdata->flags &
@@ -1396,25 +1419,6 @@ static int ci_suspend(struct device *dev)
         return 0;
  }
  
-static void ci_handle_power_lost(struct ci_hdrc *ci)
-{
-       enum ci_role role;
-
-       disable_irq_nosync(ci->irq);
-       if (!ci_otg_is_fsm_mode(ci)) {
-               role = ci_get_role(ci);
-
-               if (ci->role != role) {
-                       ci_handle_id_switch(ci);
-               } else if (role == CI_ROLE_GADGET) {
-                       if (ci->is_otg && hw_read_otgsc(ci, OTGSC_BSV))
-                               usb_gadget_vbus_connect(&ci->gadget);
-               }
-       }
-
-       enable_irq(ci->irq);
-}
-
  static int ci_resume(struct device *dev)
  {
         struct ci_hdrc *ci = dev_get_drvdata(dev);
@@ -1446,7 +1450,7 @@ static int ci_resume(struct device *dev)
                 ci_role(ci)->resume(ci, power_lost);
  
         if (power_lost)
-               ci_handle_power_lost(ci);
+               queue_work(system_freezable_wq, &ci->power_lost_work);
  
         if (ci->supports_runtime_pm) {
                 pm_runtime_disable(dev);
diff --git a/drivers/usb/common/ulpi.c b/drivers/usb/common/ulpi.c

index 84d91b1c1eed53e11539b69ccaa80080870ab043..0886b19d2e1c8f2b1c0f4e8bf85d6240f7cf19d1 100644 (file)
--- a/drivers/usb/common/ulpi.c
+++ b/drivers/usb/common/ulpi.c
@@ -301,7 +301,7 @@ static int ulpi_register(struct device *dev, struct ulpi *ulpi)
                 return ret;
         }
  
-       root = debugfs_create_dir(dev_name(dev), ulpi_root);
+       root = debugfs_create_dir(dev_name(&ulpi->dev), ulpi_root);
         debugfs_create_file("regs", 0444, root, ulpi, &ulpi_regs_fops);
  
         dev_dbg(&ulpi->dev, "registered ULPI PHY: vendor %04x, product %04x\n",
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c

index ffd7c99e24a3624fba07e8277866a5526c6695eb..e38a4124f6102a5ff2a47107a8286815cfc5c8e2 100644 (file)
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -2053,9 +2053,19 @@ static void update_port_device_state(struct usb_device *udev)
  
         if (udev->parent) {
                 hub = usb_hub_to_struct_hub(udev->parent);
-               port_dev = hub->ports[udev->portnum - 1];
-               WRITE_ONCE(port_dev->state, udev->state);
-               sysfs_notify_dirent(port_dev->state_kn);
+
+               /*
+                * The Link Layer Validation System Driver (lvstest)
+                * has a test step to unbind the hub before running the
+                * rest of the procedure. This triggers hub_disconnect
+                * which will set the hub's maxchild to 0, further
+                * resulting in usb_hub_to_struct_hub returning NULL.
+                */
+               if (hub) {
+                       port_dev = hub->ports[udev->portnum - 1];
+                       WRITE_ONCE(port_dev->state, udev->state);
+                       sysfs_notify_dirent(port_dev->state_kn);
+               }
         }
  }
  
@@ -2388,17 +2398,25 @@ static int usb_enumerate_device_otg(struct usb_device *udev)
                         }
                 } else if (desc->bLength == sizeof
                                 (struct usb_otg_descriptor)) {
-                       /* Set a_alt_hnp_support for legacy otg device */
-                       err = usb_control_msg(udev,
-                               usb_sndctrlpipe(udev, 0),
-                               USB_REQ_SET_FEATURE, 0,
-                               USB_DEVICE_A_ALT_HNP_SUPPORT,
-                               0, NULL, 0,
-                               USB_CTRL_SET_TIMEOUT);
-                       if (err < 0)
-                               dev_err(&udev->dev,
-                                       "set a_alt_hnp_support failed: %d\n",
-                                       err);
+                       /*
+                        * We are operating on a legacy OTP device
+                        * These should be told that they are operating
+                        * on the wrong port if we have another port that does
+                        * support HNP
+                        */
+                       if (bus->otg_port != 0) {
+                               /* Set a_alt_hnp_support for legacy otg device */
+                               err = usb_control_msg(udev,
+                                       usb_sndctrlpipe(udev, 0),
+                                       USB_REQ_SET_FEATURE, 0,
+                                       USB_DEVICE_A_ALT_HNP_SUPPORT,
+                                       0, NULL, 0,
+                                       USB_CTRL_SET_TIMEOUT);
+                               if (err < 0)
+                                       dev_err(&udev->dev,
+                                               "set a_alt_hnp_support failed: %d\n",
+                                               err);
+                       }
                 }
         }
  #endif
diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h

index e3eea965e57bfd3d32fa6b1cb52fd4072734a30d..e120611a5174f7589ac124641a7b279654babff6 100644 (file)
--- a/drivers/usb/dwc3/core.h
+++ b/drivers/usb/dwc3/core.h
@@ -376,7 +376,6 @@
  /* Global HWPARAMS4 Register */
  #define DWC3_GHWPARAMS4_HIBER_SCRATCHBUFS(n)   (((n) & (0x0f << 13)) >> 13)
  #define DWC3_MAX_HIBER_SCRATCHBUFS             15
-#define DWC3_EXT_BUFF_CONTROL          BIT(21)
  
  /* Global HWPARAMS6 Register */
  #define DWC3_GHWPARAMS6_BCSUPPORT              BIT(14)
diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c

index 6604845c397cd2171ee55966fc3ba80f3f2538d1..39564e17f3b07a228d54e503f0926c7b9bb810cf 100644 (file)
--- a/drivers/usb/dwc3/dwc3-pci.c
+++ b/drivers/usb/dwc3/dwc3-pci.c
@@ -51,6 +51,8 @@
  #define PCI_DEVICE_ID_INTEL_MTLP               0x7ec1
  #define PCI_DEVICE_ID_INTEL_MTLS               0x7f6f
  #define PCI_DEVICE_ID_INTEL_MTL                        0x7e7e
+#define PCI_DEVICE_ID_INTEL_ARLH               0x7ec1
+#define PCI_DEVICE_ID_INTEL_ARLH_PCH           0x777e
  #define PCI_DEVICE_ID_INTEL_TGL                        0x9a15
  #define PCI_DEVICE_ID_AMD_MR                   0x163a
  
@@ -421,6 +423,8 @@ static const struct pci_device_id dwc3_pci_id_table[] = {
         { PCI_DEVICE_DATA(INTEL, MTLP, &dwc3_pci_intel_swnode) },
         { PCI_DEVICE_DATA(INTEL, MTL, &dwc3_pci_intel_swnode) },
         { PCI_DEVICE_DATA(INTEL, MTLS, &dwc3_pci_intel_swnode) },
+       { PCI_DEVICE_DATA(INTEL, ARLH, &dwc3_pci_intel_swnode) },
+       { PCI_DEVICE_DATA(INTEL, ARLH_PCH, &dwc3_pci_intel_swnode) },
         { PCI_DEVICE_DATA(INTEL, TGL, &dwc3_pci_intel_swnode) },
  
         { PCI_DEVICE_DATA(AMD, NL_USB, &dwc3_pci_amd_swnode) },
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c

index 019368f8e9c4c3b2c26778eecc39fb23c6d614e6..28f49400f3e8b178e23c881120577da461178c35 100644 (file)
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -673,12 +673,6 @@ static int dwc3_gadget_set_ep_config(struct dwc3_ep *dep, unsigned int action)
                 params.param1 |= DWC3_DEPCFG_BINTERVAL_M1(bInterval_m1);
         }
  
-       if (dep->endpoint.fifo_mode) {
-               if (!(dwc->hwparams.hwparams4 & DWC3_EXT_BUFF_CONTROL))
-                       return -EINVAL;
-               params.param1 |= DWC3_DEPCFG_EBC_HWO_NOWB | DWC3_DEPCFG_USE_EBC;
-       }
-
         return dwc3_send_gadget_ep_cmd(dep, DWC3_DEPCMD_SETEPCONFIG, &params);
  }
  
@@ -2656,6 +2650,11 @@ static int dwc3_gadget_soft_disconnect(struct dwc3 *dwc)
         int ret;
  
         spin_lock_irqsave(&dwc->lock, flags);
+       if (!dwc->pullups_connected) {
+               spin_unlock_irqrestore(&dwc->lock, flags);
+               return 0;
+       }
+
         dwc->connected = false;
  
         /*
@@ -4709,15 +4708,13 @@ int dwc3_gadget_suspend(struct dwc3 *dwc)
         unsigned long flags;
         int ret;
  
-       if (!dwc->gadget_driver)
-               return 0;
-
         ret = dwc3_gadget_soft_disconnect(dwc);
         if (ret)
                 goto err;
  
         spin_lock_irqsave(&dwc->lock, flags);
-       dwc3_disconnect_gadget(dwc);
+       if (dwc->gadget_driver)
+               dwc3_disconnect_gadget(dwc);
         spin_unlock_irqrestore(&dwc->lock, flags);
  
         return 0;
diff --git a/drivers/usb/dwc3/gadget.h b/drivers/usb/dwc3/gadget.h

index fd7a4e94397e64ccc74e362e5d73319918fdbae6..55a56cf67d7364998f9f4a42fd95e5d856cd105c 100644 (file)
--- a/drivers/usb/dwc3/gadget.h
+++ b/drivers/usb/dwc3/gadget.h
@@ -26,8 +26,6 @@ struct dwc3;
  #define DWC3_DEPCFG_XFER_NOT_READY_EN  BIT(10)
  #define DWC3_DEPCFG_FIFO_ERROR_EN      BIT(11)
  #define DWC3_DEPCFG_STREAM_EVENT_EN    BIT(13)
-#define DWC3_DEPCFG_EBC_HWO_NOWB       BIT(14)
-#define DWC3_DEPCFG_USE_EBC            BIT(15)
  #define DWC3_DEPCFG_BINTERVAL_M1(n)    (((n) & 0xff) << 16)
  #define DWC3_DEPCFG_STREAM_CAPABLE     BIT(24)
  #define DWC3_DEPCFG_EP_NUMBER(n)       (((n) & 0x1f) << 25)
diff --git a/drivers/usb/dwc3/host.c b/drivers/usb/dwc3/host.c

index 61f57fe5bb783bcf676cdb47177c66bb2a2e81be..43230915323c7dfa6625bfbbe67b1f8df238dcd4 100644 (file)
--- a/drivers/usb/dwc3/host.c
+++ b/drivers/usb/dwc3/host.c
@@ -61,7 +61,7 @@ out:
  
  int dwc3_host_init(struct dwc3 *dwc)
  {
-       struct property_entry   props[4];
+       struct property_entry   props[5];
         struct platform_device  *xhci;
         int                     ret, irq;
         int                     prop_idx = 0;
@@ -89,6 +89,8 @@ int dwc3_host_init(struct dwc3 *dwc)
  
         memset(props, 0, sizeof(struct property_entry) * ARRAY_SIZE(props));
  
+       props[prop_idx++] = PROPERTY_ENTRY_BOOL("xhci-sg-trb-cache-size-quirk");
+
         if (dwc->usb3_lpm_capable)
                 props[prop_idx++] = PROPERTY_ENTRY_BOOL("usb3-lpm-capable");
  
diff --git a/drivers/usb/gadget/function/f_mass_storage.c b/drivers/usb/gadget/function/f_mass_storage.c

index 722a3ab2b337935e546806e21d1eab357f8f54e7..c265a1f62fc1451dacba18723e0ff75dbbebfc6d 100644 (file)
--- a/drivers/usb/gadget/function/f_mass_storage.c
+++ b/drivers/usb/gadget/function/f_mass_storage.c
@@ -545,21 +545,37 @@ static int start_transfer(struct fsg_dev *fsg, struct usb_ep *ep,
  
  static bool start_in_transfer(struct fsg_common *common, struct fsg_buffhd *bh)
  {
+       int rc;
+
         if (!fsg_is_set(common))
                 return false;
         bh->state = BUF_STATE_SENDING;
-       if (start_transfer(common->fsg, common->fsg->bulk_in, bh->inreq))
+       rc = start_transfer(common->fsg, common->fsg->bulk_in, bh->inreq);
+       if (rc) {
                 bh->state = BUF_STATE_EMPTY;
+               if (rc == -ESHUTDOWN) {
+                       common->running = 0;
+                       return false;
+               }
+       }
         return true;
  }
  
  static bool start_out_transfer(struct fsg_common *common, struct fsg_buffhd *bh)
  {
+       int rc;
+
         if (!fsg_is_set(common))
                 return false;
         bh->state = BUF_STATE_RECEIVING;
-       if (start_transfer(common->fsg, common->fsg->bulk_out, bh->outreq))
+       rc = start_transfer(common->fsg, common->fsg->bulk_out, bh->outreq);
+       if (rc) {
                 bh->state = BUF_STATE_FULL;
+               if (rc == -ESHUTDOWN) {
+                       common->running = 0;
+                       return false;
+               }
+       }
         return true;
  }
  
diff --git a/drivers/usb/gadget/function/f_ncm.c b/drivers/usb/gadget/function/f_ncm.c

index a1575a0ca568d7c46bc23bd605404096db14e6e5..e2a059cfda2cdf8f82b7f280148f15e774c772f0 100644 (file)
--- a/drivers/usb/gadget/function/f_ncm.c
+++ b/drivers/usb/gadget/function/f_ncm.c
@@ -105,8 +105,8 @@ static inline struct f_ncm *func_to_ncm(struct usb_function *f)
  
  /*
   * Although max mtu as dictated by u_ether is 15412 bytes, setting
- * max_segment_sizeto 15426 would not be efficient. If user chooses segment
- * size to be (>= 8192), then we can't aggregate more than one  buffer in each
+ * max_segment_size to 15426 would not be efficient. If user chooses segment
+ * size to be (>= 8192), then we can't aggregate more than one buffer in each
   * NTB (assuming each packet coming from network layer is >= 8192 bytes) as ep
   * maxpacket limit is 16384. So let max_segment_size be limited to 8000 to allow
   * at least 2 packets to be aggregated reducing wastage of NTB buffer space
@@ -1338,7 +1338,15 @@ parse_ntb:
              "Parsed NTB with %d frames\n", dgram_counter);
  
         to_process -= block_len;
-       if (to_process != 0) {
+
+       /*
+        * Windows NCM driver avoids USB ZLPs by adding a 1-byte
+        * zero pad as needed.
+        */
+       if (to_process == 1 &&
+           (*(unsigned char *)(ntb_ptr + block_len) == 0x00)) {
+               to_process--;
+       } else if (to_process > 0) {
                 ntb_ptr = (unsigned char *)(ntb_ptr + block_len);
                 goto parse_ntb;
         }
@@ -1489,7 +1497,7 @@ static int ncm_bind(struct usb_configuration *c, struct usb_function *f)
         ncm_data_intf.bInterfaceNumber = status;
         ncm_union_desc.bSlaveInterface0 = status;
  
-       ecm_desc.wMaxSegmentSize = ncm_opts->max_segment_size;
+       ecm_desc.wMaxSegmentSize = cpu_to_le16(ncm_opts->max_segment_size);
  
         status = -ENODEV;
  
@@ -1685,7 +1693,7 @@ static struct usb_function_instance *ncm_alloc_inst(void)
                 kfree(opts);
                 return ERR_CAST(net);
         }
-       opts->max_segment_size = cpu_to_le16(ETH_FRAME_LEN);
+       opts->max_segment_size = ETH_FRAME_LEN;
         INIT_LIST_HEAD(&opts->ncm_os_desc.ext_prop);
  
         descs[0] = &opts->ncm_os_desc;
diff --git a/drivers/usb/gadget/udc/omap_udc.c b/drivers/usb/gadget/udc/omap_udc.c

index 10c5d7f726a1fdd967d058bcc60302db8d839009..f90eeecf27de110ee4abc9d4cebef8cf73306193 100644 (file)
--- a/drivers/usb/gadget/udc/omap_udc.c
+++ b/drivers/usb/gadget/udc/omap_udc.c
@@ -2036,7 +2036,8 @@ static irqreturn_t omap_udc_iso_irq(int irq, void *_dev)
  
  static inline int machine_without_vbus_sense(void)
  {
-       return  machine_is_omap_osk() || machine_is_sx1();
+       return  machine_is_omap_osk() || machine_is_omap_palmte() ||
+               machine_is_sx1();
  }
  
  static int omap_udc_start(struct usb_gadget *g,
diff --git a/drivers/usb/gadget/udc/pch_udc.c b/drivers/usb/gadget/udc/pch_udc.c

index 4f8617210d852643e55a5b8463d0f03b6d9e9c78..169f72665739feca100e5a65786ddd6af2cbd675 100644 (file)
--- a/drivers/usb/gadget/udc/pch_udc.c
+++ b/drivers/usb/gadget/udc/pch_udc.c
@@ -274,7 +274,6 @@ struct pch_udc_cfg_data {
   * @td_data:           for data request
   * @dev:               reference to device struct
   * @offset_addr:       offset address of ep register
- * @desc:              for this ep
   * @queue:             queue for requests
   * @num:               endpoint number
   * @in:                        endpoint is IN
diff --git a/drivers/usb/host/uhci-grlib.c b/drivers/usb/host/uhci-grlib.c

index ac3fc597031573199a141e60e2b54432d2a2782e..cfebb833668e4b014633d0919be1aa1777c25140 100644 (file)
--- a/drivers/usb/host/uhci-grlib.c
+++ b/drivers/usb/host/uhci-grlib.c
@@ -22,6 +22,7 @@
  #include <linux/of_irq.h>
  #include <linux/of_address.h>
  #include <linux/of_platform.h>
+#include <linux/platform_device.h>
  
  static int uhci_grlib_init(struct usb_hcd *hcd)
  {
diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c

index 4460fa7e9fab9e2e8412e9ed2844da3ad8c7c866..a7716202a8dd58d74f3d31fd48d0833de89db2be 100644 (file)
--- a/drivers/usb/host/xhci-mem.c
+++ b/drivers/usb/host/xhci-mem.c
@@ -1861,14 +1861,14 @@ void xhci_remove_secondary_interrupter(struct usb_hcd *hcd, struct xhci_interrup
         struct xhci_hcd *xhci = hcd_to_xhci(hcd);
         unsigned int intr_num;
  
+       spin_lock_irq(&xhci->lock);
+
         /* interrupter 0 is primary interrupter, don't touch it */
-       if (!ir || !ir->intr_num || ir->intr_num >= xhci->max_interrupters)
+       if (!ir || !ir->intr_num || ir->intr_num >= xhci->max_interrupters) {
                 xhci_dbg(xhci, "Invalid secondary interrupter, can't remove\n");
-
-       /* fixme, should we check xhci->interrupter[intr_num] == ir */
-       /* fixme locking */
-
-       spin_lock_irq(&xhci->lock);
+               spin_unlock_irq(&xhci->lock);
+               return;
+       }
  
         intr_num = ir->intr_num;
  
@@ -2322,7 +2322,7 @@ xhci_add_interrupter(struct xhci_hcd *xhci, struct xhci_interrupter *ir,
         u64 erst_base;
         u32 erst_size;
  
-       if (intr_num > xhci->max_interrupters) {
+       if (intr_num >= xhci->max_interrupters) {
                 xhci_warn(xhci, "Can't add interrupter %d, max interrupters %d\n",
                           intr_num, xhci->max_interrupters);
                 return -EINVAL;
diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c

index f04fde19f5514bed72d25aca8e6f07582b563ebb..3d071b8753088a5437c2e9f82031a6db4ad91208 100644 (file)
--- a/drivers/usb/host/xhci-plat.c
+++ b/drivers/usb/host/xhci-plat.c
@@ -253,6 +253,9 @@ int xhci_plat_probe(struct platform_device *pdev, struct device *sysdev, const s
                 if (device_property_read_bool(tmpdev, "quirk-broken-port-ped"))
                         xhci->quirks |= XHCI_BROKEN_PORT_PED;
  
+               if (device_property_read_bool(tmpdev, "xhci-sg-trb-cache-size-quirk"))
+                       xhci->quirks |= XHCI_SG_TRB_CACHE_SIZE_QUIRK;
+
                 device_property_read_u32(tmpdev, "imod-interval-ns",
                                          &xhci->imod_interval);
         }
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c

index 33806ae966f90c2167c967c38b17571fca0b179d..f0d8a607ff214f86ba33b2e9126ccb186e6c1853 100644 (file)
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -2376,6 +2376,9 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep,
         /* handle completion code */
         switch (trb_comp_code) {
         case COMP_SUCCESS:
+               /* Don't overwrite status if TD had an error, see xHCI 4.9.1 */
+               if (td->error_mid_td)
+                       break;
                 if (remaining) {
                         frame->status = short_framestatus;
                         if (xhci->quirks & XHCI_TRUST_TX_LENGTH)
@@ -2391,9 +2394,13 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep,
         case COMP_BANDWIDTH_OVERRUN_ERROR:
                 frame->status = -ECOMM;
                 break;
-       case COMP_ISOCH_BUFFER_OVERRUN:
         case COMP_BABBLE_DETECTED_ERROR:
+               sum_trbs_for_length = true;
+               fallthrough;
+       case COMP_ISOCH_BUFFER_OVERRUN:
                 frame->status = -EOVERFLOW;
+               if (ep_trb != td->last_trb)
+                       td->error_mid_td = true;
                 break;
         case COMP_INCOMPATIBLE_DEVICE_ERROR:
         case COMP_STALL_ERROR:
@@ -2401,8 +2408,9 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep,
                 break;
         case COMP_USB_TRANSACTION_ERROR:
                 frame->status = -EPROTO;
+               sum_trbs_for_length = true;
                 if (ep_trb != td->last_trb)
-                       return 0;
+                       td->error_mid_td = true;
                 break;
         case COMP_STOPPED:
                 sum_trbs_for_length = true;
@@ -2422,6 +2430,9 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep,
                 break;
         }
  
+       if (td->urb_length_set)
+               goto finish_td;
+
         if (sum_trbs_for_length)
                 frame->actual_length = sum_trb_lengths(xhci, ep->ring, ep_trb) +
                         ep_trb_len - remaining;
@@ -2430,6 +2441,14 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep,
  
         td->urb->actual_length += frame->actual_length;
  
+finish_td:
+       /* Don't give back TD yet if we encountered an error mid TD */
+       if (td->error_mid_td && ep_trb != td->last_trb) {
+               xhci_dbg(xhci, "Error mid isoc TD, wait for final completion event\n");
+               td->urb_length_set = true;
+               return 0;
+       }
+
         return finish_td(xhci, ep, ep_ring, td, trb_comp_code);
  }
  
@@ -2808,17 +2827,51 @@ static int handle_tx_event(struct xhci_hcd *xhci,
                 }
  
                 if (!ep_seg) {
-                       if (!ep->skip ||
-                           !usb_endpoint_xfer_isoc(&td->urb->ep->desc)) {
-                               /* Some host controllers give a spurious
-                                * successful event after a short transfer.
-                                * Ignore it.
-                                */
-                               if ((xhci->quirks & XHCI_SPURIOUS_SUCCESS) &&
-                                               ep_ring->last_td_was_short) {
-                                       ep_ring->last_td_was_short = false;
-                                       goto cleanup;
+
+                       if (ep->skip && usb_endpoint_xfer_isoc(&td->urb->ep->desc)) {
+                               skip_isoc_td(xhci, td, ep, status);
+                               goto cleanup;
+                       }
+
+                       /*
+                        * Some hosts give a spurious success event after a short
+                        * transfer. Ignore it.
+                        */
+                       if ((xhci->quirks & XHCI_SPURIOUS_SUCCESS) &&
+                           ep_ring->last_td_was_short) {
+                               ep_ring->last_td_was_short = false;
+                               goto cleanup;
+                       }
+
+                       /*
+                        * xhci 4.10.2 states isoc endpoints should continue
+                        * processing the next TD if there was an error mid TD.
+                        * So host like NEC don't generate an event for the last
+                        * isoc TRB even if the IOC flag is set.
+                        * xhci 4.9.1 states that if there are errors in mult-TRB
+                        * TDs xHC should generate an error for that TRB, and if xHC
+                        * proceeds to the next TD it should genete an event for
+                        * any TRB with IOC flag on the way. Other host follow this.
+                        * So this event might be for the next TD.
+                        */
+                       if (td->error_mid_td &&
+                           !list_is_last(&td->td_list, &ep_ring->td_list)) {
+                               struct xhci_td *td_next = list_next_entry(td, td_list);
+
+                               ep_seg = trb_in_td(xhci, td_next->start_seg, td_next->first_trb,
+                                                  td_next->last_trb, ep_trb_dma, false);
+                               if (ep_seg) {
+                                       /* give back previous TD, start handling new */
+                                       xhci_dbg(xhci, "Missing TD completion event after mid TD error\n");
+                                       ep_ring->dequeue = td->last_trb;
+                                       ep_ring->deq_seg = td->last_trb_seg;
+                                       inc_deq(xhci, ep_ring);
+                                       xhci_td_cleanup(xhci, td, ep_ring, td->status);
+                                       td = td_next;
                                 }
+                       }
+
+                       if (!ep_seg) {
                                 /* HC is busted, give up! */
                                 xhci_err(xhci,
                                         "ERROR Transfer event TRB DMA ptr not "
@@ -2830,9 +2883,6 @@ static int handle_tx_event(struct xhci_hcd *xhci,
                                           ep_trb_dma, true);
                                 return -ESHUTDOWN;
                         }
-
-                       skip_isoc_td(xhci, td, ep, status);
-                       goto cleanup;
                 }
                 if (trb_comp_code == COMP_SHORT_PACKET)
                         ep_ring->last_td_was_short = true;
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h

index a5c72a634e6a91a262dbbaf316daf58baa0f8dcd..6f82d404883f9accf627c057a96702a7d8d65a80 100644 (file)
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -1549,6 +1549,7 @@ struct xhci_td {
         struct xhci_segment     *bounce_seg;
         /* actual_length of the URB has already been set */
         bool                    urb_length_set;
+       bool                    error_mid_td;
         unsigned int            num_trbs;
  };
  
diff --git a/drivers/usb/roles/class.c b/drivers/usb/roles/class.c

index ae41578bd0149900b0a867f71a0cf6080e238566..70165dd86b5de958ab4f5fe0d1573988977be425 100644 (file)
--- a/drivers/usb/roles/class.c
+++ b/drivers/usb/roles/class.c
@@ -21,7 +21,9 @@ static const struct class role_class = {
  struct usb_role_switch {
         struct device dev;
         struct mutex lock; /* device lock*/
+       struct module *module; /* the module this device depends on */
         enum usb_role role;
+       bool registered;
  
         /* From descriptor */
         struct device *usb2_port;
@@ -48,6 +50,9 @@ int usb_role_switch_set_role(struct usb_role_switch *sw, enum usb_role role)
         if (IS_ERR_OR_NULL(sw))
                 return 0;
  
+       if (!sw->registered)
+               return -EOPNOTSUPP;
+
         mutex_lock(&sw->lock);
  
         ret = sw->set(sw, role);
@@ -73,7 +78,7 @@ enum usb_role usb_role_switch_get_role(struct usb_role_switch *sw)
  {
         enum usb_role role;
  
-       if (IS_ERR_OR_NULL(sw))
+       if (IS_ERR_OR_NULL(sw) || !sw->registered)
                 return USB_ROLE_NONE;
  
         mutex_lock(&sw->lock);
@@ -135,7 +140,7 @@ struct usb_role_switch *usb_role_switch_get(struct device *dev)
                                                   usb_role_switch_match);
  
         if (!IS_ERR_OR_NULL(sw))
-               WARN_ON(!try_module_get(sw->dev.parent->driver->owner));
+               WARN_ON(!try_module_get(sw->module));
  
         return sw;
  }
@@ -157,7 +162,7 @@ struct usb_role_switch *fwnode_usb_role_switch_get(struct fwnode_handle *fwnode)
                 sw = fwnode_connection_find_match(fwnode, "usb-role-switch",
                                                   NULL, usb_role_switch_match);
         if (!IS_ERR_OR_NULL(sw))
-               WARN_ON(!try_module_get(sw->dev.parent->driver->owner));
+               WARN_ON(!try_module_get(sw->module));
  
         return sw;
  }
@@ -172,7 +177,7 @@ EXPORT_SYMBOL_GPL(fwnode_usb_role_switch_get);
  void usb_role_switch_put(struct usb_role_switch *sw)
  {
         if (!IS_ERR_OR_NULL(sw)) {
-               module_put(sw->dev.parent->driver->owner);
+               module_put(sw->module);
                 put_device(&sw->dev);
         }
  }
@@ -189,15 +194,18 @@ struct usb_role_switch *
  usb_role_switch_find_by_fwnode(const struct fwnode_handle *fwnode)
  {
         struct device *dev;
+       struct usb_role_switch *sw = NULL;
  
         if (!fwnode)
                 return NULL;
  
         dev = class_find_device_by_fwnode(&role_class, fwnode);
-       if (dev)
-               WARN_ON(!try_module_get(dev->parent->driver->owner));
+       if (dev) {
+               sw = to_role_switch(dev);
+               WARN_ON(!try_module_get(sw->module));
+       }
  
-       return dev ? to_role_switch(dev) : NULL;
+       return sw;
  }
  EXPORT_SYMBOL_GPL(usb_role_switch_find_by_fwnode);
  
@@ -338,6 +346,7 @@ usb_role_switch_register(struct device *parent,
         sw->set = desc->set;
         sw->get = desc->get;
  
+       sw->module = parent->driver->owner;
         sw->dev.parent = parent;
         sw->dev.fwnode = desc->fwnode;
         sw->dev.class = &role_class;
@@ -352,6 +361,8 @@ usb_role_switch_register(struct device *parent,
                 return ERR_PTR(ret);
         }
  
+       sw->registered = true;
+
         /* TODO: Symlinks for the host port and the device controller. */
  
         return sw;
@@ -366,8 +377,10 @@ EXPORT_SYMBOL_GPL(usb_role_switch_register);
   */
  void usb_role_switch_unregister(struct usb_role_switch *sw)
  {
-       if (!IS_ERR_OR_NULL(sw))
+       if (!IS_ERR_OR_NULL(sw)) {
+               sw->registered = false;
                 device_unregister(&sw->dev);
+       }
  }
  EXPORT_SYMBOL_GPL(usb_role_switch_unregister);
  
diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c

index 1e61fe04317158c3a5e877bfb0d89cb5572ce4ef..923e0ed85444be9fde31e0b0d965813fc99c5acf 100644 (file)
--- a/drivers/usb/serial/cp210x.c
+++ b/drivers/usb/serial/cp210x.c
@@ -146,6 +146,7 @@ static const struct usb_device_id id_table[] = {
         { USB_DEVICE(0x10C4, 0x85F8) }, /* Virtenio Preon32 */
         { USB_DEVICE(0x10C4, 0x8664) }, /* AC-Services CAN-IF */
         { USB_DEVICE(0x10C4, 0x8665) }, /* AC-Services OBD-IF */
+       { USB_DEVICE(0x10C4, 0x87ED) }, /* IMST USB-Stick for Smart Meter */
         { USB_DEVICE(0x10C4, 0x8856) }, /* CEL EM357 ZigBee USB Stick - LR */
         { USB_DEVICE(0x10C4, 0x8857) }, /* CEL EM357 ZigBee USB Stick */
         { USB_DEVICE(0x10C4, 0x88A4) }, /* MMB Networks ZigBee USB Device */
diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c

index 72390dbf0769282e8efb289023ca2b6915494160..2ae124c49d448f63b6d6a3078ad08fffee3ad2d0 100644 (file)
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -2269,6 +2269,7 @@ static const struct usb_device_id option_ids[] = {
         { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0111, 0xff) },                   /* Fibocom FM160 (MBIM mode) */
         { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a0, 0xff) },                   /* Fibocom NL668-AM/NL652-EU (laptop MBIM) */
         { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a2, 0xff) },                   /* Fibocom FM101-GL (laptop MBIM) */
+       { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a3, 0xff) },                   /* Fibocom FM101-GL (laptop MBIM) */
         { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a4, 0xff),                     /* Fibocom FM101-GL (laptop MBIM) */
           .driver_info = RSVD(4) },
         { USB_DEVICE_INTERFACE_CLASS(0x2df3, 0x9d03, 0xff) },                   /* LongSung M5710 */
diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c

index b1e844bf31f81f7984a976bf4ca5dcd8f01b3a97..703a9c56355731c158801f89996937f7ea760d35 100644 (file)
--- a/drivers/usb/serial/qcserial.c
+++ b/drivers/usb/serial/qcserial.c
@@ -184,6 +184,8 @@ static const struct usb_device_id id_table[] = {
         {DEVICE_SWI(0x413c, 0x81d0)},   /* Dell Wireless 5819 */
         {DEVICE_SWI(0x413c, 0x81d1)},   /* Dell Wireless 5818 */
         {DEVICE_SWI(0x413c, 0x81d2)},   /* Dell Wireless 5818 */
+       {DEVICE_SWI(0x413c, 0x8217)},   /* Dell Wireless DW5826e */
+       {DEVICE_SWI(0x413c, 0x8218)},   /* Dell Wireless DW5826e QDL */
  
         /* Huawei devices */
         {DEVICE_HWI(0x03f0, 0x581d)},   /* HP lt4112 LTE/HSPA+ Gobi 4G Modem (Huawei me906e) */
diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c

index c54e9805da536a0ec139ad789017b79131c88561..12cf9940e5b6759167f9ae7450df8af92a85c63a 100644 (file)
--- a/drivers/usb/storage/scsiglue.c
+++ b/drivers/usb/storage/scsiglue.c
@@ -179,6 +179,13 @@ static int slave_configure(struct scsi_device *sdev)
                  */
                 sdev->use_192_bytes_for_3f = 1;
  
+               /*
+                * Some devices report generic values until the media has been
+                * accessed. Force a READ(10) prior to querying device
+                * characteristics.
+                */
+               sdev->read_before_ms = 1;
+
                 /*
                  * Some devices don't like MODE SENSE with page=0x3f,
                  * which is the command used for checking if a device
diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c

index 9707f53cfda9c08507082ac33b69b5d146c6927f..71ace274761f182f0cbb942676e74d7e2c26d7a1 100644 (file)
--- a/drivers/usb/storage/uas.c
+++ b/drivers/usb/storage/uas.c
@@ -878,6 +878,13 @@ static int uas_slave_configure(struct scsi_device *sdev)
         if (devinfo->flags & US_FL_CAPACITY_HEURISTICS)
                 sdev->guess_capacity = 1;
  
+       /*
+        * Some devices report generic values until the media has been
+        * accessed. Force a READ(10) prior to querying device
+        * characteristics.
+        */
+       sdev->read_before_ms = 1;
+
         /*
          * Some devices don't like MODE SENSE with page=0x3f,
          * which is the command used for checking if a device
diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c

index 5945e3a2b0f78f30c526a2a31ff1932f7e96531d..66e532edcece68a713e9ebe7355e3cda38b9e465 100644 (file)
--- a/drivers/usb/typec/tcpm/tcpm.c
+++ b/drivers/usb/typec/tcpm/tcpm.c
@@ -3743,9 +3743,6 @@ static void tcpm_detach(struct tcpm_port *port)
         if (tcpm_port_is_disconnected(port))
                 port->hard_reset_count = 0;
  
-       port->try_src_count = 0;
-       port->try_snk_count = 0;
-
         if (!port->attached)
                 return;
  
@@ -6848,7 +6845,8 @@ struct tcpm_port *tcpm_register_port(struct device *dev, struct tcpc_dev *tcpc)
         if (err)
                 goto out_role_sw_put;
  
-       port->typec_caps.pd = port->pds[0];
+       if (port->pds)
+               port->typec_caps.pd = port->pds[0];
  
         port->typec_port = typec_register_port(port->dev, &port->typec_caps);
         if (IS_ERR(port->typec_port)) {
diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c

index 5392ec6989592041f87b96a7af1479621be799a0..14f5a7bfae2e92873e405b369ca8ce5620d856c0 100644 (file)
--- a/drivers/usb/typec/ucsi/ucsi.c
+++ b/drivers/usb/typec/ucsi/ucsi.c
@@ -938,7 +938,9 @@ static void ucsi_handle_connector_change(struct work_struct *work)
  
         clear_bit(EVENT_PENDING, &con->ucsi->flags);
  
+       mutex_lock(&ucsi->ppm_lock);
         ret = ucsi_acknowledge_connector_change(ucsi);
+       mutex_unlock(&ucsi->ppm_lock);
         if (ret)
                 dev_err(ucsi->dev, "%s: ACK failed (%d)", __func__, ret);
  
diff --git a/drivers/usb/typec/ucsi/ucsi_acpi.c b/drivers/usb/typec/ucsi/ucsi_acpi.c

index 6bbf490ac4010e9ad31a140bd484ec40077f0af6..928eacbeb21ac4cc5b8857644969bff7aba7a8a1 100644 (file)
--- a/drivers/usb/typec/ucsi/ucsi_acpi.c
+++ b/drivers/usb/typec/ucsi/ucsi_acpi.c
@@ -25,6 +25,8 @@ struct ucsi_acpi {
         unsigned long flags;
         guid_t guid;
         u64 cmd;
+       bool dell_quirk_probed;
+       bool dell_quirk_active;
  };
  
  static int ucsi_acpi_dsm(struct ucsi_acpi *ua, int func)
@@ -73,9 +75,13 @@ static int ucsi_acpi_sync_write(struct ucsi *ucsi, unsigned int offset,
                                 const void *val, size_t val_len)
  {
         struct ucsi_acpi *ua = ucsi_get_drvdata(ucsi);
+       bool ack = UCSI_COMMAND(*(u64 *)val) == UCSI_ACK_CC_CI;
         int ret;
  
-       set_bit(COMMAND_PENDING, &ua->flags);
+       if (ack)
+               set_bit(ACK_PENDING, &ua->flags);
+       else
+               set_bit(COMMAND_PENDING, &ua->flags);
  
         ret = ucsi_acpi_async_write(ucsi, offset, val, val_len);
         if (ret)
@@ -85,7 +91,10 @@ static int ucsi_acpi_sync_write(struct ucsi *ucsi, unsigned int offset,
                 ret = -ETIMEDOUT;
  
  out_clear_bit:
-       clear_bit(COMMAND_PENDING, &ua->flags);
+       if (ack)
+               clear_bit(ACK_PENDING, &ua->flags);
+       else
+               clear_bit(COMMAND_PENDING, &ua->flags);
  
         return ret;
  }
@@ -119,12 +128,73 @@ static const struct ucsi_operations ucsi_zenbook_ops = {
         .async_write = ucsi_acpi_async_write
  };
  
-static const struct dmi_system_id zenbook_dmi_id[] = {
+/*
+ * Some Dell laptops expect that an ACK command with the
+ * UCSI_ACK_CONNECTOR_CHANGE bit set is followed by a (separate)
+ * ACK command that only has the UCSI_ACK_COMMAND_COMPLETE bit set.
+ * If this is not done events are not delivered to OSPM and
+ * subsequent commands will timeout.
+ */
+static int
+ucsi_dell_sync_write(struct ucsi *ucsi, unsigned int offset,
+                    const void *val, size_t val_len)
+{
+       struct ucsi_acpi *ua = ucsi_get_drvdata(ucsi);
+       u64 cmd = *(u64 *)val, ack = 0;
+       int ret;
+
+       if (UCSI_COMMAND(cmd) == UCSI_ACK_CC_CI &&
+           cmd & UCSI_ACK_CONNECTOR_CHANGE)
+               ack = UCSI_ACK_CC_CI | UCSI_ACK_COMMAND_COMPLETE;
+
+       ret = ucsi_acpi_sync_write(ucsi, offset, val, val_len);
+       if (ret != 0)
+               return ret;
+       if (ack == 0)
+               return ret;
+
+       if (!ua->dell_quirk_probed) {
+               ua->dell_quirk_probed = true;
+
+               cmd = UCSI_GET_CAPABILITY;
+               ret = ucsi_acpi_sync_write(ucsi, UCSI_CONTROL, &cmd,
+                                          sizeof(cmd));
+               if (ret == 0)
+                       return ucsi_acpi_sync_write(ucsi, UCSI_CONTROL,
+                                                   &ack, sizeof(ack));
+               if (ret != -ETIMEDOUT)
+                       return ret;
+
+               ua->dell_quirk_active = true;
+               dev_err(ua->dev, "Firmware bug: Additional ACK required after ACKing a connector change.\n");
+               dev_err(ua->dev, "Firmware bug: Enabling workaround\n");
+       }
+
+       if (!ua->dell_quirk_active)
+               return ret;
+
+       return ucsi_acpi_sync_write(ucsi, UCSI_CONTROL, &ack, sizeof(ack));
+}
+
+static const struct ucsi_operations ucsi_dell_ops = {
+       .read = ucsi_acpi_read,
+       .sync_write = ucsi_dell_sync_write,
+       .async_write = ucsi_acpi_async_write
+};
+
+static const struct dmi_system_id ucsi_acpi_quirks[] = {
         {
                 .matches = {
                         DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
                         DMI_MATCH(DMI_PRODUCT_NAME, "ZenBook UX325UA_UM325UA"),
                 },
+               .driver_data = (void *)&ucsi_zenbook_ops,
+       },
+       {
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+               },
+               .driver_data = (void *)&ucsi_dell_ops,
         },
         { }
  };
@@ -142,8 +212,10 @@ static void ucsi_acpi_notify(acpi_handle handle, u32 event, void *data)
         if (UCSI_CCI_CONNECTOR(cci))
                 ucsi_connector_change(ua->ucsi, UCSI_CCI_CONNECTOR(cci));
  
-       if (test_bit(COMMAND_PENDING, &ua->flags) &&
-           cci & (UCSI_CCI_ACK_COMPLETE | UCSI_CCI_COMMAND_COMPLETE))
+       if (cci & UCSI_CCI_ACK_COMPLETE && test_bit(ACK_PENDING, &ua->flags))
+               complete(&ua->complete);
+       if (cci & UCSI_CCI_COMMAND_COMPLETE &&
+           test_bit(COMMAND_PENDING, &ua->flags))
                 complete(&ua->complete);
  }
  
@@ -151,6 +223,7 @@ static int ucsi_acpi_probe(struct platform_device *pdev)
  {
         struct acpi_device *adev = ACPI_COMPANION(&pdev->dev);
         const struct ucsi_operations *ops = &ucsi_acpi_ops;
+       const struct dmi_system_id *id;
         struct ucsi_acpi *ua;
         struct resource *res;
         acpi_status status;
@@ -180,8 +253,9 @@ static int ucsi_acpi_probe(struct platform_device *pdev)
         init_completion(&ua->complete);
         ua->dev = &pdev->dev;
  
-       if (dmi_check_system(zenbook_dmi_id))
-               ops = &ucsi_zenbook_ops;
+       id = dmi_first_match(ucsi_acpi_quirks);
+       if (id)
+               ops = id->driver_data;
  
         ua->ucsi = ucsi_create(&pdev->dev, ops);
         if (IS_ERR(ua->ucsi))
diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c

index 1cbc990d42e07cf41904dc726f83c08c72922b77..df6f99bdf70d7c9f8075ca05478bd8c968f7e57b 100644 (file)
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -1862,8 +1862,25 @@ int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma
         /*
          * See remap_pfn_range(), called from vfio_pci_fault() but we can't
          * change vm_flags within the fault handler.  Set them now.
+        *
+        * VM_ALLOW_ANY_UNCACHED: The VMA flag is implemented for ARM64,
+        * allowing KVM stage 2 device mapping attributes to use Normal-NC
+        * rather than DEVICE_nGnRE, which allows guest mappings
+        * supporting write-combining attributes (WC). ARM does not
+        * architecturally guarantee this is safe, and indeed some MMIO
+        * regions like the GICv2 VCPU interface can trigger uncontained
+        * faults if Normal-NC is used.
+        *
+        * To safely use VFIO in KVM the platform must guarantee full
+        * safety in the guest where no action taken against a MMIO
+        * mapping can trigger an uncontained failure. The assumption is
+        * that most VFIO PCI platforms support this for both mapping types,
+        * at least in common flows, based on some expectations of how
+        * PCI IP is integrated. Hence VM_ALLOW_ANY_UNCACHED is set in
+        * the VMA flags.
          */
-       vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP);
+       vm_flags_set(vma, VM_ALLOW_ANY_UNCACHED | VM_IO | VM_PFNMAP |
+                       VM_DONTEXPAND | VM_DONTDUMP);
         vma->vm_ops = &vfio_pci_mmap_ops;
  
         return 0;
diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h

index bde84ad344e50181685f5fbc2620c20b7b33f5a0..50128da18bcaf95f44f933018a59c1450f4d0d07 100644 (file)
--- a/drivers/vfio/vfio.h
+++ b/drivers/vfio/vfio.h
@@ -434,7 +434,7 @@ static inline void vfio_virqfd_exit(void)
  }
  #endif
  
-#ifdef CONFIG_HAVE_KVM
+#if IS_ENABLED(CONFIG_KVM)
  void vfio_device_get_kvm_safe(struct vfio_device *device, struct kvm *kvm);
  void vfio_device_put_kvm(struct vfio_device *device);
  #else
diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c

index 1cc93aac99a290d903819635284860b48600ab5d..e97d796a54fbaf8da0fc3860b8bbb5ca5039acce 100644 (file)
--- a/drivers/vfio/vfio_main.c
+++ b/drivers/vfio/vfio_main.c
@@ -16,7 +16,7 @@
  #include <linux/fs.h>
  #include <linux/idr.h>
  #include <linux/iommu.h>
-#ifdef CONFIG_HAVE_KVM
+#if IS_ENABLED(CONFIG_KVM)
  #include <linux/kvm_host.h>
  #endif
  #include <linux/list.h>
@@ -385,7 +385,7 @@ void vfio_unregister_group_dev(struct vfio_device *device)
  }
  EXPORT_SYMBOL_GPL(vfio_unregister_group_dev);
  
-#ifdef CONFIG_HAVE_KVM
+#if IS_ENABLED(CONFIG_KVM)
  void vfio_device_get_kvm_safe(struct vfio_device *device, struct kvm *kvm)
  {
         void (*pfn)(struct kvm *kvm);
diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c

index 1183e7a871f8b270a9ff2106cef15e44720184a4..46823c2e2ba1207e327607fa0ca0c757bc0968aa 100644 (file)
--- a/drivers/video/fbdev/core/fbcon.c
+++ b/drivers/video/fbdev/core/fbcon.c
@@ -2399,11 +2399,9 @@ static int fbcon_do_set_font(struct vc_data *vc, int w, int h, int charcount,
         struct fbcon_ops *ops = info->fbcon_par;
         struct fbcon_display *p = &fb_display[vc->vc_num];
         int resize, ret, old_userfont, old_width, old_height, old_charcount;
-       char *old_data = NULL;
+       u8 *old_data = vc->vc_font.data;
  
         resize = (w != vc->vc_font.width) || (h != vc->vc_font.height);
-       if (p->userfont)
-               old_data = vc->vc_font.data;
         vc->vc_font.data = (void *)(p->fontdata = data);
         old_userfont = p->userfont;
         if ((p->userfont = userfont))
@@ -2437,13 +2435,13 @@ static int fbcon_do_set_font(struct vc_data *vc, int w, int h, int charcount,
                 update_screen(vc);
         }
  
-       if (old_data && (--REFCOUNT(old_data) == 0))
+       if (old_userfont && (--REFCOUNT(old_data) == 0))
                 kfree(old_data - FONT_EXTRA_WORDS * sizeof(int));
         return 0;
  
  err_out:
         p->fontdata = old_data;
-       vc->vc_font.data = (void *)old_data;
+       vc->vc_font.data = old_data;
  
         if (userfont) {
                 p->userfont = old_userfont;
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c

index b8cfea7812d6b61110cc5e42fe4249d4578dc721..3b9f080109d7e46da11e4efb73a46554d7ff416f 100644 (file)
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -923,8 +923,8 @@ static void shutdown_pirq(struct irq_data *data)
                 return;
  
         do_mask(info, EVT_MASK_REASON_EXPLICIT);
-       xen_evtchn_close(evtchn);
         xen_irq_info_cleanup(info);
+       xen_evtchn_close(evtchn);
  }
  
  static void enable_pirq(struct irq_data *data)
@@ -956,6 +956,7 @@ EXPORT_SYMBOL_GPL(xen_irq_from_gsi);
  static void __unbind_from_irq(struct irq_info *info, unsigned int irq)
  {
         evtchn_port_t evtchn;
+       bool close_evtchn = false;
  
         if (!info) {
                 xen_irq_free_desc(irq);
@@ -975,7 +976,7 @@ static void __unbind_from_irq(struct irq_info *info, unsigned int irq)
                 struct xenbus_device *dev;
  
                 if (!info->is_static)
-                       xen_evtchn_close(evtchn);
+                       close_evtchn = true;
  
                 switch (info->type) {
                 case IRQT_VIRQ:
@@ -995,6 +996,9 @@ static void __unbind_from_irq(struct irq_info *info, unsigned int irq)
                 }
  
                 xen_irq_info_cleanup(info);
+
+               if (close_evtchn)
+                       xen_evtchn_close(evtchn);
         }
  
         xen_free_irq(info);
diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c

index 26ffb8755ffb5da27bd1eb38ceeb59ca06c473ed..f93f73ecefeee4b2b052a3ac758a8fbf9fdc11ae 100644 (file)
--- a/drivers/xen/gntalloc.c
+++ b/drivers/xen/gntalloc.c
@@ -317,7 +317,7 @@ static long gntalloc_ioctl_alloc(struct gntalloc_file_private_data *priv,
                 rc = -EFAULT;
                 goto out_free;
         }
-       if (copy_to_user(arg->gref_ids, gref_ids,
+       if (copy_to_user(arg->gref_ids_flex, gref_ids,
                         sizeof(gref_ids[0]) * op.count)) {
                 rc = -EFAULT;
                 goto out_free;
diff --git a/drivers/xen/pcpu.c b/drivers/xen/pcpu.c

index 50865527314538a8bedbde0f2590fbbb4afce3ce..c63f317e3df3de111b63a6f1b58feefca7de998b 100644 (file)
--- a/drivers/xen/pcpu.c
+++ b/drivers/xen/pcpu.c
@@ -65,7 +65,7 @@ struct pcpu {
         uint32_t flags;
  };
  
-static struct bus_type xen_pcpu_subsys = {
+static const struct bus_type xen_pcpu_subsys = {
         .name = "xen_cpu",
         .dev_name = "xen_cpu",
  };
diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c

index 35b6e306026a4bfa1829f39f9b63ee8568f332d3..67dfa47788649328f6ad5783902f7dbcee9efa48 100644 (file)
--- a/drivers/xen/privcmd.c
+++ b/drivers/xen/privcmd.c
@@ -1223,18 +1223,13 @@ struct privcmd_kernel_ioreq *alloc_ioreq(struct privcmd_ioeventfd *ioeventfd)
         kioreq->ioreq = (struct ioreq *)(page_to_virt(pages[0]));
         mmap_write_unlock(mm);
  
-       size = sizeof(*ports) * kioreq->vcpus;
-       ports = kzalloc(size, GFP_KERNEL);
-       if (!ports) {
-               ret = -ENOMEM;
+       ports = memdup_array_user(u64_to_user_ptr(ioeventfd->ports),
+                                 kioreq->vcpus, sizeof(*ports));
+       if (IS_ERR(ports)) {
+               ret = PTR_ERR(ports);
                 goto error_kfree;
         }
  
-       if (copy_from_user(ports, u64_to_user_ptr(ioeventfd->ports), size)) {
-               ret = -EFAULT;
-               goto error_kfree_ports;
-       }
-
         for (i = 0; i < kioreq->vcpus; i++) {
                 kioreq->ports[i].vcpu = i;
                 kioreq->ports[i].port = ports[i];
@@ -1256,7 +1251,7 @@ struct privcmd_kernel_ioreq *alloc_ioreq(struct privcmd_ioeventfd *ioeventfd)
  error_unbind:
         while (--i >= 0)
                 unbind_from_irqhandler(irq_from_evtchn(ports[i]), &kioreq->ports[i]);
-error_kfree_ports:
+
         kfree(ports);
  error_kfree:
         kfree(kioreq);
diff --git a/drivers/xen/xen-balloon.c b/drivers/xen/xen-balloon.c

index 8cd583db20b1737144dbfb562f83a505d72c1f3d..b293d7652f15593532b9483422bcc56cbaae8cd9 100644 (file)
--- a/drivers/xen/xen-balloon.c
+++ b/drivers/xen/xen-balloon.c
@@ -237,7 +237,7 @@ static const struct attribute_group *balloon_groups[] = {
         NULL
  };
  
-static struct bus_type balloon_subsys = {
+static const struct bus_type balloon_subsys = {
         .name = BALLOON_CLASS_NAME,
         .dev_name = BALLOON_CLASS_NAME,
  };
diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c

index 32835b4b9bc5030ad3e81ae411d56635f8d6a696..51b3124b0d56c98c316c58fa73d92c07e59b726a 100644 (file)
--- a/drivers/xen/xenbus/xenbus_client.c
+++ b/drivers/xen/xenbus/xenbus_client.c
@@ -116,14 +116,15 @@ EXPORT_SYMBOL_GPL(xenbus_strstate);
   * @dev: xenbus device
   * @path: path to watch
   * @watch: watch to register
+ * @will_handle: events queuing determine callback
   * @callback: callback to register
   *
   * Register a @watch on the given path, using the given xenbus_watch structure
- * for storage, and the given @callback function as the callback.  On success,
- * the given @path will be saved as @watch->node, and remains the
- * caller's to free.  On error, @watch->node will
- * be NULL, the device will switch to %XenbusStateClosing, and the error will
- * be saved in the store.
+ * for storage, @will_handle function as the callback to determine if each
+ * event need to be queued, and the given @callback function as the callback.
+ * On success, the given @path will be saved as @watch->node, and remains the
+ * caller's to free.  On error, @watch->node will be NULL, the device will
+ * switch to %XenbusStateClosing, and the error will be saved in the store.
   *
   * Returns: %0 on success or -errno on error
   */
@@ -158,11 +159,13 @@ EXPORT_SYMBOL_GPL(xenbus_watch_path);
   * xenbus_watch_pathfmt - register a watch on a sprintf-formatted path
   * @dev: xenbus device
   * @watch: watch to register
+ * @will_handle: events queuing determine callback
   * @callback: callback to register
   * @pathfmt: format of path to watch
   *
   * Register a watch on the given @path, using the given xenbus_watch
- * structure for storage, and the given @callback function as the
+ * structure for storage, @will_handle function as the callback to determine if
+ * each event need to be queued, and the given @callback function as the
   * callback.  On success, the watched path (@path/@path2) will be saved
   * as @watch->node, and becomes the caller's to kfree().
   * On error, watch->node will be NULL, so the caller has nothing to
diff --git a/fs/affs/affs.h b/fs/affs/affs.h

index 60685ec76d983523f15da7c7e97b38634b750c07..2e612834329ac127ae6e63d802a7b31b89558b0c 100644 (file)
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -105,6 +105,7 @@ struct affs_sb_info {
         int work_queued;                /* non-zero delayed work is queued */
         struct delayed_work sb_work;    /* superblock flush delayed work */
         spinlock_t work_lock;           /* protects sb_work and work_queued */
+       struct rcu_head rcu;
  };
  
  #define AFFS_MOUNT_SF_INTL             0x0001 /* International filesystem. */
diff --git a/fs/affs/super.c b/fs/affs/super.c

index 58b391446ae1fd97e48891c82ec8d88f32314303..b56a95cf414a44277783e7242c33cba5cb818707 100644 (file)
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -640,7 +640,7 @@ static void affs_kill_sb(struct super_block *sb)
                 affs_brelse(sbi->s_root_bh);
                 kfree(sbi->s_prefix);
                 mutex_destroy(&sbi->s_bmlock);
-               kfree(sbi);
+               kfree_rcu(sbi, rcu);
         }
  }
  
diff --git a/fs/afs/dir.c b/fs/afs/dir.c

index b5b8de521f99b26ba6c9b2fd707fb794a62612ae..8a67fc427e748a0840d9e92c1c0e8e4a3d7c4fdc 100644 (file)
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -479,8 +479,10 @@ static int afs_dir_iterate_block(struct afs_vnode *dvnode,
                     dire->u.name[0] == '.' &&
                     ctx->actor != afs_lookup_filldir &&
                     ctx->actor != afs_lookup_one_filldir &&
-                   memcmp(dire->u.name, ".__afs", 6) == 0)
+                   memcmp(dire->u.name, ".__afs", 6) == 0) {
+                       ctx->pos = blkoff + next * sizeof(union afs_xdr_dirent);
                         continue;
+               }
  
                 /* found the next entry */
                 if (!dir_emit(ctx, dire->u.name, nlen,
diff --git a/fs/afs/file.c b/fs/afs/file.c

index 3d33b221d9ca256a3b3d978a835d2db9fff2e284..ef2cc8f565d25b15e086d2fc64c6f565bac7a16b 100644 (file)
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -417,13 +417,17 @@ static void afs_add_open_mmap(struct afs_vnode *vnode)
  
  static void afs_drop_open_mmap(struct afs_vnode *vnode)
  {
-       if (!atomic_dec_and_test(&vnode->cb_nr_mmap))
+       if (atomic_add_unless(&vnode->cb_nr_mmap, -1, 1))
                 return;
  
         down_write(&vnode->volume->open_mmaps_lock);
  
-       if (atomic_read(&vnode->cb_nr_mmap) == 0)
+       read_seqlock_excl(&vnode->cb_lock);
+       // the only place where ->cb_nr_mmap may hit 0
+       // see __afs_break_callback() for the other side...
+       if (atomic_dec_and_test(&vnode->cb_nr_mmap))
                 list_del_init(&vnode->cb_mmap_link);
+       read_sequnlock_excl(&vnode->cb_lock);
  
         up_write(&vnode->volume->open_mmaps_lock);
         flush_work(&vnode->cb_work);
diff --git a/fs/afs/internal.h b/fs/afs/internal.h

index 9c03fcf7ffaa84e9f7604444209bd934b64db466..6ce5a612937c61e2021b32cad1f68a22b7c501ca 100644 (file)
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -321,8 +321,7 @@ struct afs_net {
         struct list_head        fs_probe_slow;  /* List of afs_server to probe at 5m intervals */
         struct hlist_head       fs_proc;        /* procfs servers list */
  
-       struct hlist_head       fs_addresses4;  /* afs_server (by lowest IPv4 addr) */
-       struct hlist_head       fs_addresses6;  /* afs_server (by lowest IPv6 addr) */
+       struct hlist_head       fs_addresses;   /* afs_server (by lowest IPv6 addr) */
         seqlock_t               fs_addr_lock;   /* For fs_addresses[46] */
  
         struct work_struct      fs_manager;
@@ -561,8 +560,7 @@ struct afs_server {
         struct afs_server __rcu *uuid_next;     /* Next server with same UUID */
         struct afs_server       *uuid_prev;     /* Previous server with same UUID */
         struct list_head        probe_link;     /* Link in net->fs_probe_list */
-       struct hlist_node       addr4_link;     /* Link in net->fs_addresses4 */
-       struct hlist_node       addr6_link;     /* Link in net->fs_addresses6 */
+       struct hlist_node       addr_link;      /* Link in net->fs_addresses6 */
         struct hlist_node       proc_link;      /* Link in net->fs_proc */
         struct list_head        volumes;        /* RCU list of afs_server_entry objects */
         struct afs_server       *gc_next;       /* Next server in manager's list */
diff --git a/fs/afs/main.c b/fs/afs/main.c

index 1b3bd21c168acc223bfaf39fa454d2cb49ae3fbb..a14f6013e316d964bfa6eef3e09befe62d591411 100644 (file)
--- a/fs/afs/main.c
+++ b/fs/afs/main.c
@@ -90,8 +90,7 @@ static int __net_init afs_net_init(struct net *net_ns)
         INIT_LIST_HEAD(&net->fs_probe_slow);
         INIT_HLIST_HEAD(&net->fs_proc);
  
-       INIT_HLIST_HEAD(&net->fs_addresses4);
-       INIT_HLIST_HEAD(&net->fs_addresses6);
+       INIT_HLIST_HEAD(&net->fs_addresses);
         seqlock_init(&net->fs_addr_lock);
  
         INIT_WORK(&net->fs_manager, afs_manage_servers);
diff --git a/fs/afs/server.c b/fs/afs/server.c

index e169121f603e28d5679a895d0ca0f136270a6f56..038f9d0ae3af8ee1df24dc163c972e826c5d62fb 100644 (file)
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -38,7 +38,7 @@ struct afs_server *afs_find_server(struct afs_net *net, const struct rxrpc_peer
                 seq++; /* 2 on the 1st/lockless path, otherwise odd */
                 read_seqbegin_or_lock(&net->fs_addr_lock, &seq);
  
-               hlist_for_each_entry_rcu(server, &net->fs_addresses6, addr6_link) {
+               hlist_for_each_entry_rcu(server, &net->fs_addresses, addr_link) {
                         estate = rcu_dereference(server->endpoint_state);
                         alist = estate->addresses;
                         for (i = 0; i < alist->nr_addrs; i++)
@@ -177,10 +177,8 @@ added_dup:
          * bit, but anything we might want to do gets messy and memory
          * intensive.
          */
-       if (alist->nr_ipv4 > 0)
-               hlist_add_head_rcu(&server->addr4_link, &net->fs_addresses4);
-       if (alist->nr_addrs > alist->nr_ipv4)
-               hlist_add_head_rcu(&server->addr6_link, &net->fs_addresses6);
+       if (alist->nr_addrs > 0)
+               hlist_add_head_rcu(&server->addr_link, &net->fs_addresses);
  
         write_sequnlock(&net->fs_addr_lock);
  
@@ -511,10 +509,8 @@ static void afs_gc_servers(struct afs_net *net, struct afs_server *gc_list)
  
                         list_del(&server->probe_link);
                         hlist_del_rcu(&server->proc_link);
-                       if (!hlist_unhashed(&server->addr4_link))
-                               hlist_del_rcu(&server->addr4_link);
-                       if (!hlist_unhashed(&server->addr6_link))
-                               hlist_del_rcu(&server->addr6_link);
+                       if (!hlist_unhashed(&server->addr_link))
+                               hlist_del_rcu(&server->addr_link);
                 }
                 write_sequnlock(&net->fs_lock);
  
diff --git a/fs/afs/volume.c b/fs/afs/volume.c

index 020ecd45e476214f08b9867412ec4b379889344d..af3a3f57c1b3f9512bcaa08ce37a0f8173e809d0 100644 (file)
--- a/fs/afs/volume.c
+++ b/fs/afs/volume.c
@@ -353,7 +353,7 @@ static int afs_update_volume_status(struct afs_volume *volume, struct key *key)
  {
         struct afs_server_list *new, *old, *discard;
         struct afs_vldb_entry *vldb;
-       char idbuf[16];
+       char idbuf[24];
         int ret, idsz;
  
         _enter("");
@@ -361,7 +361,7 @@ static int afs_update_volume_status(struct afs_volume *volume, struct key *key)
         /* We look up an ID by passing it as a decimal string in the
          * operation's name parameter.
          */
-       idsz = sprintf(idbuf, "%llu", volume->vid);
+       idsz = snprintf(idbuf, sizeof(idbuf), "%llu", volume->vid);
  
         vldb = afs_vl_lookup_vldb(volume->cell, key, idbuf, idsz);
         if (IS_ERR(vldb)) {
diff --git a/fs/aio.c b/fs/aio.c

index bb2ff48991f35ed59479a004641e1452c7bad3ea..28223f51193123020166bb4a8a24dbac316943c6 100644 (file)
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -593,6 +593,13 @@ void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel)
         struct kioctx *ctx = req->ki_ctx;
         unsigned long flags;
  
+       /*
+        * kiocb didn't come from aio or is neither a read nor a write, hence
+        * ignore it.
+        */
+       if (!(iocb->ki_flags & IOCB_AIO_RW))
+               return;
+
         if (WARN_ON_ONCE(!list_empty(&req->ki_list)))
                 return;
  
@@ -1509,7 +1516,7 @@ static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb)
         req->ki_complete = aio_complete_rw;
         req->private = NULL;
         req->ki_pos = iocb->aio_offset;
-       req->ki_flags = req->ki_filp->f_iocb_flags;
+       req->ki_flags = req->ki_filp->f_iocb_flags | IOCB_AIO_RW;
         if (iocb->aio_flags & IOCB_FLAG_RESFD)
                 req->ki_flags |= IOCB_EVENTFD;
         if (iocb->aio_flags & IOCB_FLAG_IOPRIO) {
@@ -2158,14 +2165,11 @@ COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id,
  #endif
  
  /* sys_io_cancel:
- *     Attempts to cancel an iocb previously passed to io_submit.  If
- *     the operation is successfully cancelled, the resulting event is
- *     copied into the memory pointed to by result without being placed
- *     into the completion queue and 0 is returned.  May fail with
- *     -EFAULT if any of the data structures pointed to are invalid.
- *     May fail with -EINVAL if aio_context specified by ctx_id is
- *     invalid.  May fail with -EAGAIN if the iocb specified was not
- *     cancelled.  Will fail with -ENOSYS if not implemented.
+ *     Attempts to cancel an iocb previously passed to io_submit(). If the
+ *     operation is successfully cancelled 0 is returned. May fail with
+ *     -EFAULT if any of the data structures pointed to are invalid. May
+ *     fail with -EINVAL if aio_context specified by ctx_id is invalid. Will
+ *     fail with -ENOSYS if not implemented.
   */
  SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
                 struct io_event __user *, result)
@@ -2196,14 +2200,12 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
         }
         spin_unlock_irq(&ctx->ctx_lock);
  
-       if (!ret) {
-               /*
-                * The result argument is no longer used - the io_event is
-                * always delivered via the ring buffer. -EINPROGRESS indicates
-                * cancellation is progress:
-                */
-               ret = -EINPROGRESS;
-       }
+       /*
+        * The result argument is no longer used - the io_event is always
+        * delivered via the ring buffer.
+        */
+       if (ret == 0 && kiocb->rw.ki_flags & IOCB_AIO_RW)
+               aio_complete_rw(&kiocb->rw, -EINTR);
  
         percpu_ref_put(&ctx->users);
  
diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c

index b4dc319bcb2bc0a5363e74f6d2096d3b5652599d..569b97904da42eec8975e8662dd78895d41d62fe 100644 (file)
--- a/fs/bcachefs/backpointers.c
+++ b/fs/bcachefs/backpointers.c
@@ -68,9 +68,11 @@ void bch2_backpointer_to_text(struct printbuf *out, const struct bch_backpointer
  
  void bch2_backpointer_k_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k)
  {
-       prt_str(out, "bucket=");
-       bch2_bpos_to_text(out, bp_pos_to_bucket(c, k.k->p));
-       prt_str(out, " ");
+       if (bch2_dev_exists2(c, k.k->p.inode)) {
+               prt_str(out, "bucket=");
+               bch2_bpos_to_text(out, bp_pos_to_bucket(c, k.k->p));
+               prt_str(out, " ");
+       }
  
         bch2_backpointer_to_text(out, bkey_s_c_to_backpointer(k).v);
  }
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h

index b80c6c9efd8cef95b46b5b45b21f639e18373755..69d0d60d50e366edf9e56ba101dda047536f5338 100644 (file)
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -1249,6 +1249,18 @@ static inline struct stdio_redirect *bch2_fs_stdio_redirect(struct bch_fs *c)
         return stdio;
  }
  
+static inline unsigned metadata_replicas_required(struct bch_fs *c)
+{
+       return min(c->opts.metadata_replicas,
+                  c->opts.metadata_replicas_required);
+}
+
+static inline unsigned data_replicas_required(struct bch_fs *c)
+{
+       return min(c->opts.data_replicas,
+                  c->opts.data_replicas_required);
+}
+
  #define BKEY_PADDED_ONSTACK(key, pad)                          \
         struct { struct bkey_i key; __u64 key ## _pad[pad]; }
  
diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c

index 5467a8635be113102c56bb6f02986209533c35ac..3ef338df82f5e46228f583a85a7cacdba233a64b 100644 (file)
--- a/fs/bcachefs/btree_iter.c
+++ b/fs/bcachefs/btree_iter.c
@@ -2156,7 +2156,9 @@ struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos e
                  * isn't monotonically increasing before FILTER_SNAPSHOTS, and
                  * that's what we check against in extents mode:
                  */
-               if (k.k->p.inode > end.inode)
+               if (unlikely(!(iter->flags & BTREE_ITER_IS_EXTENTS)
+                            ? bkey_gt(k.k->p, end)
+                            : k.k->p.inode > end.inode))
                         goto end;
  
                 if (iter->update_path &&
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c

index 17a5938aa71a6b43b45c12383e4690df146ee2a3..4530b14ff2c3717ec15e92615385c04e185e28e1 100644 (file)
--- a/fs/bcachefs/btree_update_interior.c
+++ b/fs/bcachefs/btree_update_interior.c
@@ -280,7 +280,8 @@ retry:
                                       writepoint_ptr(&c->btree_write_point),
                                       &devs_have,
                                       res->nr_replicas,
-                                     c->opts.metadata_replicas_required,
+                                     min(res->nr_replicas,
+                                         c->opts.metadata_replicas_required),
                                       watermark, 0, cl, &wp);
         if (unlikely(ret))
                 return ERR_PTR(ret);
diff --git a/fs/bcachefs/fs-io-buffered.c b/fs/bcachefs/fs-io-buffered.c

index 73c12e565af50a465260856baaa831eb2a542caa..27710cdd5710ec5bba9ff9a11cad92f7cf14bc09 100644 (file)
--- a/fs/bcachefs/fs-io-buffered.c
+++ b/fs/bcachefs/fs-io-buffered.c
@@ -303,18 +303,6 @@ void bch2_readahead(struct readahead_control *ractl)
         darray_exit(&readpages_iter.folios);
  }
  
-static void __bchfs_readfolio(struct bch_fs *c, struct bch_read_bio *rbio,
-                            subvol_inum inum, struct folio *folio)
-{
-       bch2_folio_create(folio, __GFP_NOFAIL);
-
-       rbio->bio.bi_opf = REQ_OP_READ|REQ_SYNC;
-       rbio->bio.bi_iter.bi_sector = folio_sector(folio);
-       BUG_ON(!bio_add_folio(&rbio->bio, folio, folio_size(folio), 0));
-
-       bch2_trans_run(c, (bchfs_read(trans, rbio, inum, NULL), 0));
-}
-
  static void bch2_read_single_folio_end_io(struct bio *bio)
  {
         complete(bio->bi_private);
@@ -329,6 +317,9 @@ int bch2_read_single_folio(struct folio *folio, struct address_space *mapping)
         int ret;
         DECLARE_COMPLETION_ONSTACK(done);
  
+       if (!bch2_folio_create(folio, GFP_KERNEL))
+               return -ENOMEM;
+
         bch2_inode_opts_get(&opts, c, &inode->ei_inode);
  
         rbio = rbio_init(bio_alloc_bioset(NULL, 1, REQ_OP_READ, GFP_KERNEL, &c->bio_read),
@@ -336,7 +327,11 @@ int bch2_read_single_folio(struct folio *folio, struct address_space *mapping)
         rbio->bio.bi_private = &done;
         rbio->bio.bi_end_io = bch2_read_single_folio_end_io;
  
-       __bchfs_readfolio(c, rbio, inode_inum(inode), folio);
+       rbio->bio.bi_opf = REQ_OP_READ|REQ_SYNC;
+       rbio->bio.bi_iter.bi_sector = folio_sector(folio);
+       BUG_ON(!bio_add_folio(&rbio->bio, folio, folio_size(folio), 0));
+
+       bch2_trans_run(c, (bchfs_read(trans, rbio, inode_inum(inode), NULL), 0));
         wait_for_completion(&done);
  
         ret = blk_status_to_errno(rbio->bio.bi_status);
diff --git a/fs/bcachefs/fs-io-direct.c b/fs/bcachefs/fs-io-direct.c

index e3b219e19e1008ccfe1ff61e966115795f9c1831..33cb6da3a5ad28f2c014c2ef12408937933d49c3 100644 (file)
--- a/fs/bcachefs/fs-io-direct.c
+++ b/fs/bcachefs/fs-io-direct.c
@@ -88,6 +88,8 @@ static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter)
                 return ret;
  
         shorten = iov_iter_count(iter) - round_up(ret, block_bytes(c));
+       if (shorten >= iter->count)
+               shorten = 0;
         iter->count -= shorten;
  
         bio = bio_alloc_bioset(NULL,
diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c

index 3a4c24c28e7fa06deff38f6bb0b240a5daacda8c..3dc8630ff9fe139bd44317d72502ed9bf1f73751 100644 (file)
--- a/fs/bcachefs/fs-ioctl.c
+++ b/fs/bcachefs/fs-ioctl.c
@@ -455,6 +455,7 @@ static long bch2_ioctl_subvolume_destroy(struct bch_fs *c, struct file *filp,
         if (IS_ERR(victim))
                 return PTR_ERR(victim);
  
+       dir = d_inode(path.dentry);
         if (victim->d_sb->s_fs_info != c) {
                 ret = -EXDEV;
                 goto err;
@@ -463,14 +464,13 @@ static long bch2_ioctl_subvolume_destroy(struct bch_fs *c, struct file *filp,
                 ret = -ENOENT;
                 goto err;
         }
-       dir = d_inode(path.dentry);
         ret = __bch2_unlink(dir, victim, true);
         if (!ret) {
                 fsnotify_rmdir(dir, victim);
                 d_delete(victim);
         }
-       inode_unlock(dir);
  err:
+       inode_unlock(dir);
         dput(victim);
         path_put(&path);
         return ret;
diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c

index ec419b8e2c43123b42e0d84c837611fc5f6e2314..77ae65542db9166a4168a78a55064295bb1d9ebf 100644 (file)
--- a/fs/bcachefs/fs.c
+++ b/fs/bcachefs/fs.c
@@ -435,7 +435,7 @@ static int bch2_link(struct dentry *old_dentry, struct inode *vdir,
                 bch2_subvol_is_ro(c, inode->ei_subvol) ?:
                 __bch2_link(c, inode, dir, dentry);
         if (unlikely(ret))
-               return ret;
+               return bch2_err_class(ret);
  
         ihold(&inode->v);
         d_instantiate(dentry, &inode->v);
@@ -487,8 +487,9 @@ static int bch2_unlink(struct inode *vdir, struct dentry *dentry)
         struct bch_inode_info *dir= to_bch_ei(vdir);
         struct bch_fs *c = dir->v.i_sb->s_fs_info;
  
-       return bch2_subvol_is_ro(c, dir->ei_subvol) ?:
+       int ret = bch2_subvol_is_ro(c, dir->ei_subvol) ?:
                 __bch2_unlink(vdir, dentry, false);
+       return bch2_err_class(ret);
  }
  
  static int bch2_symlink(struct mnt_idmap *idmap,
@@ -523,7 +524,7 @@ static int bch2_symlink(struct mnt_idmap *idmap,
         return 0;
  err:
         iput(&inode->v);
-       return ret;
+       return bch2_err_class(ret);
  }
  
  static int bch2_mkdir(struct mnt_idmap *idmap,
@@ -641,7 +642,7 @@ err:
                            src_inode,
                            dst_inode);
  
-       return ret;
+       return bch2_err_class(ret);
  }
  
  static void bch2_setattr_copy(struct mnt_idmap *idmap,
diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c

index ef3a53f9045af2591ab1f9e272dd9d6151250444..2c098ac017b30b6a4b5d016e9f5dde93ee258f2f 100644 (file)
--- a/fs/bcachefs/io_write.c
+++ b/fs/bcachefs/io_write.c
@@ -1564,6 +1564,7 @@ CLOSURE_CALLBACK(bch2_write)
         BUG_ON(!op->write_point.v);
         BUG_ON(bkey_eq(op->pos, POS_MAX));
  
+       op->nr_replicas_required = min_t(unsigned, op->nr_replicas_required, op->nr_replicas);
         op->start_time = local_clock();
         bch2_keylist_init(&op->insert_keys, op->inline_keys);
         wbio_init(bio)->put_bio = false;
diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c

index bfd6585e746da45880da9b5ad8fb502586cbf933..47805193f18cc72c941f72f5b82cfb461eb8982c 100644 (file)
--- a/fs/bcachefs/journal_io.c
+++ b/fs/bcachefs/journal_io.c
@@ -1478,6 +1478,8 @@ static int journal_write_alloc(struct journal *j, struct journal_buf *w)
                 c->opts.foreground_target;
         unsigned i, replicas = 0, replicas_want =
                 READ_ONCE(c->opts.metadata_replicas);
+       unsigned replicas_need = min_t(unsigned, replicas_want,
+                                      READ_ONCE(c->opts.metadata_replicas_required));
  
         rcu_read_lock();
  retry:
@@ -1526,7 +1528,7 @@ done:
  
         BUG_ON(bkey_val_u64s(&w->key.k) > BCH_REPLICAS_MAX);
  
-       return replicas >= c->opts.metadata_replicas_required ? 0 : -EROFS;
+       return replicas >= replicas_need ? 0 : -EROFS;
  }
  
  static void journal_buf_realloc(struct journal *j, struct journal_buf *buf)
diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c

index 820d25e19e5fe3ee6a45e70f23eb74fc1d558e88..c33dca641575dffc58b6db8354e71c879ed5cf26 100644 (file)
--- a/fs/bcachefs/journal_reclaim.c
+++ b/fs/bcachefs/journal_reclaim.c
@@ -205,7 +205,7 @@ void bch2_journal_space_available(struct journal *j)
  
         j->can_discard = can_discard;
  
-       if (nr_online < c->opts.metadata_replicas_required) {
+       if (nr_online < metadata_replicas_required(c)) {
                 ret = JOURNAL_ERR_insufficient_devices;
                 goto out;
         }
@@ -892,9 +892,11 @@ int bch2_journal_flush_device_pins(struct journal *j, int dev_idx)
                                          journal_seq_pin(j, seq)->devs);
                 seq++;
  
-               spin_unlock(&j->lock);
-               ret = bch2_mark_replicas(c, &replicas.e);
-               spin_lock(&j->lock);
+               if (replicas.e.nr_devs) {
+                       spin_unlock(&j->lock);
+                       ret = bch2_mark_replicas(c, &replicas.e);
+                       spin_lock(&j->lock);
+               }
         }
         spin_unlock(&j->lock);
  err:
diff --git a/fs/bcachefs/mean_and_variance.h b/fs/bcachefs/mean_and_variance.h

index b2be565bb8f214bc2ac4ebd6efac324ac20b7241..64df11ab422bf455560bad095973cc6e5a296697 100644 (file)
--- a/fs/bcachefs/mean_and_variance.h
+++ b/fs/bcachefs/mean_and_variance.h
@@ -17,7 +17,7 @@
   * Rust and rustc has issues with u128.
   */
  
-#if defined(__SIZEOF_INT128__) && defined(__KERNEL__)
+#if defined(__SIZEOF_INT128__) && defined(__KERNEL__) && !defined(CONFIG_PARISC)
  
  typedef struct {
         unsigned __int128 v;
diff --git a/fs/bcachefs/printbuf.c b/fs/bcachefs/printbuf.c

index accf246c32330919869bccff32a1ecfcc6d97856..b27d22925929a6554079fb8731f82dfb3dd0421c 100644 (file)
--- a/fs/bcachefs/printbuf.c
+++ b/fs/bcachefs/printbuf.c
@@ -56,6 +56,7 @@ void bch2_prt_vprintf(struct printbuf *out, const char *fmt, va_list args)
  
                 va_copy(args2, args);
                 len = vsnprintf(out->buf + out->pos, printbuf_remaining(out), fmt, args2);
+               va_end(args2);
         } while (len + 1 >= printbuf_remaining(out) &&
                  !bch2_printbuf_make_room(out, len + 1));
  
diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c

index 9127d0e3ca2f6a3fd44e076b42f01ee6f7736427..21e13bb4335be3b6d48005282000c2f0a7c4e2bd 100644 (file)
--- a/fs/bcachefs/recovery.c
+++ b/fs/bcachefs/recovery.c
@@ -577,8 +577,9 @@ u64 bch2_recovery_passes_from_stable(u64 v)
  
  static bool check_version_upgrade(struct bch_fs *c)
  {
-       unsigned latest_compatible = bch2_latest_compatible_version(c->sb.version);
         unsigned latest_version = bcachefs_metadata_version_current;
+       unsigned latest_compatible = min(latest_version,
+                                        bch2_latest_compatible_version(c->sb.version));
         unsigned old_version = c->sb.version_upgrade_complete ?: c->sb.version;
         unsigned new_version = 0;
  
@@ -597,7 +598,7 @@ static bool check_version_upgrade(struct bch_fs *c)
                         new_version = latest_version;
                         break;
                 case BCH_VERSION_UPGRADE_none:
-                       new_version = old_version;
+                       new_version = min(old_version, latest_version);
                         break;
                 }
         }
@@ -774,7 +775,7 @@ int bch2_fs_recovery(struct bch_fs *c)
                 goto err;
         }
  
-       if (!(c->opts.nochanges && c->opts.norecovery)) {
+       if (!c->opts.nochanges) {
                 mutex_lock(&c->sb_lock);
                 bool write_sb = false;
  
@@ -804,7 +805,7 @@ int bch2_fs_recovery(struct bch_fs *c)
                 if (bch2_check_version_downgrade(c)) {
                         struct printbuf buf = PRINTBUF;
  
-                       prt_str(&buf, "Version downgrade required:\n");
+                       prt_str(&buf, "Version downgrade required:");
  
                         __le64 passes = ext->recovery_passes_required[0];
                         bch2_sb_set_downgrade(c,
@@ -812,7 +813,7 @@ int bch2_fs_recovery(struct bch_fs *c)
                                         BCH_VERSION_MINOR(c->sb.version));
                         passes = ext->recovery_passes_required[0] & ~passes;
                         if (passes) {
-                               prt_str(&buf, "  running recovery passes: ");
+                               prt_str(&buf, "\n  running recovery passes: ");
                                 prt_bitflags(&buf, bch2_recovery_passes,
                                              bch2_recovery_passes_from_stable(le64_to_cpu(passes)));
                         }
diff --git a/fs/bcachefs/sb-members.c b/fs/bcachefs/sb-members.c

index a45354d2acde9f3ad0b149247c8ff4c7c869fb15..eff5ce18c69c0600047c1fef688a5980af33c678 100644 (file)
--- a/fs/bcachefs/sb-members.c
+++ b/fs/bcachefs/sb-members.c
@@ -421,7 +421,7 @@ void bch2_dev_errors_reset(struct bch_dev *ca)
         m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx);
         for (unsigned i = 0; i < ARRAY_SIZE(m->errors_at_reset); i++)
                 m->errors_at_reset[i] = cpu_to_le64(atomic64_read(&ca->errors[i]));
-       m->errors_reset_time = ktime_get_real_seconds();
+       m->errors_reset_time = cpu_to_le64(ktime_get_real_seconds());
  
         bch2_write_super(c);
         mutex_unlock(&c->sb_lock);
diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c

index 45f67e8b29eb67f188e5cfb32aa39e0b1ad1d625..ac6ba04d5521714ece2e2cb00400fff60ec05eb6 100644 (file)
--- a/fs/bcachefs/snapshot.c
+++ b/fs/bcachefs/snapshot.c
@@ -728,7 +728,7 @@ static int check_snapshot(struct btree_trans *trans,
                 return 0;
  
         memset(&s, 0, sizeof(s));
-       memcpy(&s, k.v, bkey_val_bytes(k.k));
+       memcpy(&s, k.v, min(sizeof(s), bkey_val_bytes(k.k)));
  
         id = le32_to_cpu(s.parent);
         if (id) {
diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c

index d60c7d27a0477cb0de116675671d5c888d8f1c86..36988add581fb57ceb69fd564e770a059f184c42 100644 (file)
--- a/fs/bcachefs/super-io.c
+++ b/fs/bcachefs/super-io.c
@@ -717,7 +717,7 @@ retry:
  
         if (IS_ERR(sb->bdev_handle)) {
                 ret = PTR_ERR(sb->bdev_handle);
-               goto out;
+               goto err;
         }
         sb->bdev = sb->bdev_handle->bdev;
  
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c

index b9911402b1753baa986a1673339c4454eba87431..6b23e11825e6d47ef46c7f294add46fa455e6a8f 100644 (file)
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -1428,10 +1428,10 @@ bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca,
  
                 required = max(!(flags & BCH_FORCE_IF_METADATA_DEGRADED)
                                ? c->opts.metadata_replicas
-                              : c->opts.metadata_replicas_required,
+                              : metadata_replicas_required(c),
                                !(flags & BCH_FORCE_IF_DATA_DEGRADED)
                                ? c->opts.data_replicas
-                              : c->opts.data_replicas_required);
+                              : data_replicas_required(c));
  
                 return nr_rw >= required;
         case BCH_MEMBER_STATE_failed:
diff --git a/fs/bcachefs/thread_with_file.c b/fs/bcachefs/thread_with_file.c

index b1c867aa2b58e6f097cba1e4eedc37f55a58cc93..9220d7de10db67f6cd4a36040af7fe557756230b 100644 (file)
--- a/fs/bcachefs/thread_with_file.c
+++ b/fs/bcachefs/thread_with_file.c
@@ -53,9 +53,9 @@ int bch2_run_thread_with_file(struct thread_with_file *thr,
         if (ret)
                 goto err;
  
-       fd_install(fd, file);
         get_task_struct(thr->task);
         wake_up_process(thr->task);
+       fd_install(fd, file);
         return fd;
  err:
         if (fd >= 0)
diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c

index 56b815fd9fc6ee5a541aa8e7007f3c00025c493d..3a32faa86b5c4a2eee98de32951c18dc73052041 100644 (file)
--- a/fs/bcachefs/util.c
+++ b/fs/bcachefs/util.c
@@ -289,7 +289,7 @@ int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *task, unsigne
         do {
                 nr_entries = stack_trace_save_tsk(task, stack->data, stack->size, skipnr + 1);
         } while (nr_entries == stack->size &&
-                !(ret = darray_make_room(stack, stack->size * 2)));
+                !(ret = darray_make_room_gfp(stack, stack->size * 2, gfp)));
  
         stack->nr = nr_entries;
         up_read(&task->signal->exec_update_lock);
@@ -418,14 +418,15 @@ static inline void bch2_time_stats_update_one(struct bch2_time_stats *stats,
                 bch2_quantiles_update(&stats->quantiles, duration);
         }
  
-       if (time_after64(end, stats->last_event)) {
+       if (stats->last_event && time_after64(end, stats->last_event)) {
                 freq = end - stats->last_event;
                 mean_and_variance_update(&stats->freq_stats, freq);
                 mean_and_variance_weighted_update(&stats->freq_stats_weighted, freq);
                 stats->max_freq = max(stats->max_freq, freq);
                 stats->min_freq = min(stats->min_freq, freq);
-               stats->last_event = end;
         }
+
+       stats->last_event = end;
  }
  
  static void __bch2_time_stats_clear_buffer(struct bch2_time_stats *stats,
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c

index a9be9ac9922225bb32801aec5834c9e9d87ffc97..378d9103a2072b1628e66d850a42b9254be72b36 100644 (file)
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -1455,6 +1455,7 @@ out:
   */
  void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
  {
+       LIST_HEAD(retry_list);
         struct btrfs_block_group *block_group;
         struct btrfs_space_info *space_info;
         struct btrfs_trans_handle *trans;
@@ -1476,6 +1477,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
  
         spin_lock(&fs_info->unused_bgs_lock);
         while (!list_empty(&fs_info->unused_bgs)) {
+               u64 used;
                 int trimming;
  
                 block_group = list_first_entry(&fs_info->unused_bgs,
@@ -1511,9 +1513,9 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
                         goto next;
                 }
  
+               spin_lock(&space_info->lock);
                 spin_lock(&block_group->lock);
-               if (block_group->reserved || block_group->pinned ||
-                   block_group->used || block_group->ro ||
+               if (btrfs_is_block_group_used(block_group) || block_group->ro ||
                     list_is_singular(&block_group->list)) {
                         /*
                          * We want to bail if we made new allocations or have
@@ -1523,10 +1525,49 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
                          */
                         trace_btrfs_skip_unused_block_group(block_group);
                         spin_unlock(&block_group->lock);
+                       spin_unlock(&space_info->lock);
                         up_write(&space_info->groups_sem);
                         goto next;
                 }
+
+               /*
+                * The block group may be unused but there may be space reserved
+                * accounting with the existence of that block group, that is,
+                * space_info->bytes_may_use was incremented by a task but no
+                * space was yet allocated from the block group by the task.
+                * That space may or may not be allocated, as we are generally
+                * pessimistic about space reservation for metadata as well as
+                * for data when using compression (as we reserve space based on
+                * the worst case, when data can't be compressed, and before
+                * actually attempting compression, before starting writeback).
+                *
+                * So check if the total space of the space_info minus the size
+                * of this block group is less than the used space of the
+                * space_info - if that's the case, then it means we have tasks
+                * that might be relying on the block group in order to allocate
+                * extents, and add back the block group to the unused list when
+                * we finish, so that we retry later in case no tasks ended up
+                * needing to allocate extents from the block group.
+                */
+               used = btrfs_space_info_used(space_info, true);
+               if (space_info->total_bytes - block_group->length < used) {
+                       /*
+                        * Add a reference for the list, compensate for the ref
+                        * drop under the "next" label for the
+                        * fs_info->unused_bgs list.
+                        */
+                       btrfs_get_block_group(block_group);
+                       list_add_tail(&block_group->bg_list, &retry_list);
+
+                       trace_btrfs_skip_unused_block_group(block_group);
+                       spin_unlock(&block_group->lock);
+                       spin_unlock(&space_info->lock);
+                       up_write(&space_info->groups_sem);
+                       goto next;
+               }
+
                 spin_unlock(&block_group->lock);
+               spin_unlock(&space_info->lock);
  
                 /* We don't want to force the issue, only flip if it's ok. */
                 ret = inc_block_group_ro(block_group, 0);
@@ -1650,12 +1691,16 @@ next:
                 btrfs_put_block_group(block_group);
                 spin_lock(&fs_info->unused_bgs_lock);
         }
+       list_splice_tail(&retry_list, &fs_info->unused_bgs);
         spin_unlock(&fs_info->unused_bgs_lock);
         mutex_unlock(&fs_info->reclaim_bgs_lock);
         return;
  
  flip_async:
         btrfs_end_transaction(trans);
+       spin_lock(&fs_info->unused_bgs_lock);
+       list_splice_tail(&retry_list, &fs_info->unused_bgs);
+       spin_unlock(&fs_info->unused_bgs_lock);
         mutex_unlock(&fs_info->reclaim_bgs_lock);
         btrfs_put_block_group(block_group);
         btrfs_discard_punt_unused_bgs_list(fs_info);
@@ -2684,6 +2729,37 @@ next:
                 btrfs_dec_delayed_refs_rsv_bg_inserts(fs_info);
                 list_del_init(&block_group->bg_list);
                 clear_bit(BLOCK_GROUP_FLAG_NEW, &block_group->runtime_flags);
+
+               /*
+                * If the block group is still unused, add it to the list of
+                * unused block groups. The block group may have been created in
+                * order to satisfy a space reservation, in which case the
+                * extent allocation only happens later. But often we don't
+                * actually need to allocate space that we previously reserved,
+                * so the block group may become unused for a long time. For
+                * example for metadata we generally reserve space for a worst
+                * possible scenario, but then don't end up allocating all that
+                * space or none at all (due to no need to COW, extent buffers
+                * were already COWed in the current transaction and still
+                * unwritten, tree heights lower than the maximum possible
+                * height, etc). For data we generally reserve the axact amount
+                * of space we are going to allocate later, the exception is
+                * when using compression, as we must reserve space based on the
+                * uncompressed data size, because the compression is only done
+                * when writeback triggered and we don't know how much space we
+                * are actually going to need, so we reserve the uncompressed
+                * size because the data may be uncompressible in the worst case.
+                */
+               if (ret == 0) {
+                       bool used;
+
+                       spin_lock(&block_group->lock);
+                       used = btrfs_is_block_group_used(block_group);
+                       spin_unlock(&block_group->lock);
+
+                       if (!used)
+                               btrfs_mark_bg_unused(block_group);
+               }
         }
         btrfs_trans_release_chunk_metadata(trans);
  }
diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h

index c4a1f01cc1c240d108702fc8899de9efe00da613..962b11983901a86ae16add7962c5ea5a26796b6f 100644 (file)
--- a/fs/btrfs/block-group.h
+++ b/fs/btrfs/block-group.h
@@ -257,6 +257,13 @@ static inline u64 btrfs_block_group_end(struct btrfs_block_group *block_group)
         return (block_group->start + block_group->length);
  }
  
+static inline bool btrfs_is_block_group_used(const struct btrfs_block_group *bg)
+{
+       lockdep_assert_held(&bg->lock);
+
+       return (bg->used > 0 || bg->reserved > 0 || bg->pinned > 0);
+}
+
  static inline bool btrfs_is_block_group_data_only(
                                         struct btrfs_block_group *block_group)
  {
diff --git a/fs/btrfs/block-rsv.c b/fs/btrfs/block-rsv.c

index ceb5f586a2d55571d53db2de227f4ef0f5ec1c27..1043a8142351b2692587f4a5e6d11147ee7fde99 100644 (file)
--- a/fs/btrfs/block-rsv.c
+++ b/fs/btrfs/block-rsv.c
@@ -494,7 +494,7 @@ struct btrfs_block_rsv *btrfs_use_block_rsv(struct btrfs_trans_handle *trans,
  
         block_rsv = get_block_rsv(trans, root);
  
-       if (unlikely(block_rsv->size == 0))
+       if (unlikely(btrfs_block_rsv_size(block_rsv) == 0))
                 goto try_reserve;
  again:
         ret = btrfs_block_rsv_use_bytes(block_rsv, blocksize);
diff --git a/fs/btrfs/block-rsv.h b/fs/btrfs/block-rsv.h

index b0bd12b8652f4f51e467a95b4bfa36ec8d894837..43a9a6b5a79f4622607529393eaced15cb1409ac 100644 (file)
--- a/fs/btrfs/block-rsv.h
+++ b/fs/btrfs/block-rsv.h
@@ -101,4 +101,36 @@ static inline bool btrfs_block_rsv_full(const struct btrfs_block_rsv *rsv)
         return data_race(rsv->full);
  }
  
+/*
+ * Get the reserved mount of a block reserve in a context where getting a stale
+ * value is acceptable, instead of accessing it directly and trigger data race
+ * warning from KCSAN.
+ */
+static inline u64 btrfs_block_rsv_reserved(struct btrfs_block_rsv *rsv)
+{
+       u64 ret;
+
+       spin_lock(&rsv->lock);
+       ret = rsv->reserved;
+       spin_unlock(&rsv->lock);
+
+       return ret;
+}
+
+/*
+ * Get the size of a block reserve in a context where getting a stale value is
+ * acceptable, instead of accessing it directly and trigger data race warning
+ * from KCSAN.
+ */
+static inline u64 btrfs_block_rsv_size(struct btrfs_block_rsv *rsv)
+{
+       u64 ret;
+
+       spin_lock(&rsv->lock);
+       ret = rsv->size;
+       spin_unlock(&rsv->lock);
+
+       return ret;
+}
+
  #endif /* BTRFS_BLOCK_RSV_H */
diff --git a/fs/btrfs/defrag.c b/fs/btrfs/defrag.c

index c276b136ab63a16d5278a654ab26a670098806c0..5b0b645714183a7adcbe093d48964ee7380c4b24 100644 (file)
--- a/fs/btrfs/defrag.c
+++ b/fs/btrfs/defrag.c
@@ -1046,7 +1046,7 @@ static int defrag_collect_targets(struct btrfs_inode *inode,
                         goto add;
  
                 /* Skip too large extent */
-               if (range_len >= extent_thresh)
+               if (em->len >= extent_thresh)
                         goto next;
  
                 /*
diff --git a/fs/btrfs/delalloc-space.c b/fs/btrfs/delalloc-space.c

index 2833e8ef4c098f680a4883d41a1e925dc477bc2f..acf9f4b6c044025fe2ef288e99716d0373d01f31 100644 (file)
--- a/fs/btrfs/delalloc-space.c
+++ b/fs/btrfs/delalloc-space.c
@@ -245,7 +245,6 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info,
         struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
         u64 reserve_size = 0;
         u64 qgroup_rsv_size = 0;
-       u64 csum_leaves;
         unsigned outstanding_extents;
  
         lockdep_assert_held(&inode->lock);
@@ -260,10 +259,12 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info,
                                                 outstanding_extents);
                 reserve_size += btrfs_calc_metadata_size(fs_info, 1);
         }
-       csum_leaves = btrfs_csum_bytes_to_leaves(fs_info,
-                                                inode->csum_bytes);
-       reserve_size += btrfs_calc_insert_metadata_size(fs_info,
-                                                       csum_leaves);
+       if (!(inode->flags & BTRFS_INODE_NODATASUM)) {
+               u64 csum_leaves;
+
+               csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, inode->csum_bytes);
+               reserve_size += btrfs_calc_insert_metadata_size(fs_info, csum_leaves);
+       }
         /*
          * For qgroup rsv, the calculation is very simple:
          * account one nodesize for each outstanding extent
@@ -278,14 +279,20 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info,
         spin_unlock(&block_rsv->lock);
  }
  
-static void calc_inode_reservations(struct btrfs_fs_info *fs_info,
+static void calc_inode_reservations(struct btrfs_inode *inode,
                                     u64 num_bytes, u64 disk_num_bytes,
                                     u64 *meta_reserve, u64 *qgroup_reserve)
  {
+       struct btrfs_fs_info *fs_info = inode->root->fs_info;
         u64 nr_extents = count_max_extents(fs_info, num_bytes);
-       u64 csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, disk_num_bytes);
+       u64 csum_leaves;
         u64 inode_update = btrfs_calc_metadata_size(fs_info, 1);
  
+       if (inode->flags & BTRFS_INODE_NODATASUM)
+               csum_leaves = 0;
+       else
+               csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, disk_num_bytes);
+
         *meta_reserve = btrfs_calc_insert_metadata_size(fs_info,
                                                 nr_extents + csum_leaves);
  
@@ -337,7 +344,7 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes,
          * everything out and try again, which is bad.  This way we just
          * over-reserve slightly, and clean up the mess when we are done.
          */
-       calc_inode_reservations(fs_info, num_bytes, disk_num_bytes,
+       calc_inode_reservations(inode, num_bytes, disk_num_bytes,
                                 &meta_reserve, &qgroup_reserve);
         ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_reserve, true,
                                                  noflush);
@@ -359,7 +366,8 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes,
         nr_extents = count_max_extents(fs_info, num_bytes);
         spin_lock(&inode->lock);
         btrfs_mod_outstanding_extents(inode, nr_extents);
-       inode->csum_bytes += disk_num_bytes;
+       if (!(inode->flags & BTRFS_INODE_NODATASUM))
+               inode->csum_bytes += disk_num_bytes;
         btrfs_calculate_inode_block_rsv_size(fs_info, inode);
         spin_unlock(&inode->lock);
  
@@ -393,7 +401,8 @@ void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes,
  
         num_bytes = ALIGN(num_bytes, fs_info->sectorsize);
         spin_lock(&inode->lock);
-       inode->csum_bytes -= num_bytes;
+       if (!(inode->flags & BTRFS_INODE_NODATASUM))
+               inode->csum_bytes -= num_bytes;
         btrfs_calculate_inode_block_rsv_size(fs_info, inode);
         spin_unlock(&inode->lock);
  
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c

index 1502d664c89273eb54ba3516528b74eab094f3b3..79c4293ddf373f7d452b2cd05aeec4dd1d9fb5f7 100644 (file)
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -725,6 +725,23 @@ leave:
         return ret;
  }
  
+static int btrfs_check_replace_dev_names(struct btrfs_ioctl_dev_replace_args *args)
+{
+       if (args->start.srcdevid == 0) {
+               if (memchr(args->start.srcdev_name, 0,
+                          sizeof(args->start.srcdev_name)) == NULL)
+                       return -ENAMETOOLONG;
+       } else {
+               args->start.srcdev_name[0] = 0;
+       }
+
+       if (memchr(args->start.tgtdev_name, 0,
+                  sizeof(args->start.tgtdev_name)) == NULL)
+           return -ENAMETOOLONG;
+
+       return 0;
+}
+
  int btrfs_dev_replace_by_ioctl(struct btrfs_fs_info *fs_info,
                             struct btrfs_ioctl_dev_replace_args *args)
  {
@@ -737,10 +754,9 @@ int btrfs_dev_replace_by_ioctl(struct btrfs_fs_info *fs_info,
         default:
                 return -EINVAL;
         }
-
-       if ((args->start.srcdevid == 0 && args->start.srcdev_name[0] == '\0') ||
-           args->start.tgtdev_name[0] == '\0')
-               return -EINVAL;
+       ret = btrfs_check_replace_dev_names(args);
+       if (ret < 0)
+               return ret;
  
         ret = btrfs_dev_replace_start(fs_info, args->start.tgtdev_name,
                                         args->start.srcdevid,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c

index c6907d533fe83912576fd92283658539e0abbb81..c843563914cad08e2dd84ef1741e19d933f092ee 100644 (file)
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1307,12 +1307,12 @@ void btrfs_free_fs_info(struct btrfs_fs_info *fs_info)
   *
   * @objectid:  root id
   * @anon_dev:  preallocated anonymous block device number for new roots,
- *             pass 0 for new allocation.
+ *             pass NULL for a new allocation.
   * @check_ref: whether to check root item references, If true, return -ENOENT
   *             for orphan roots
   */
  static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info,
-                                            u64 objectid, dev_t anon_dev,
+                                            u64 objectid, dev_t *anon_dev,
                                              bool check_ref)
  {
         struct btrfs_root *root;
@@ -1336,8 +1336,17 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info,
  again:
         root = btrfs_lookup_fs_root(fs_info, objectid);
         if (root) {
-               /* Shouldn't get preallocated anon_dev for cached roots */
-               ASSERT(!anon_dev);
+               /*
+                * Some other caller may have read out the newly inserted
+                * subvolume already (for things like backref walk etc).  Not
+                * that common but still possible.  In that case, we just need
+                * to free the anon_dev.
+                */
+               if (unlikely(anon_dev && *anon_dev)) {
+                       free_anon_bdev(*anon_dev);
+                       *anon_dev = 0;
+               }
+
                 if (check_ref && btrfs_root_refs(&root->root_item) == 0) {
                         btrfs_put_root(root);
                         return ERR_PTR(-ENOENT);
@@ -1357,7 +1366,7 @@ again:
                 goto fail;
         }
  
-       ret = btrfs_init_fs_root(root, anon_dev);
+       ret = btrfs_init_fs_root(root, anon_dev ? *anon_dev : 0);
         if (ret)
                 goto fail;
  
@@ -1393,7 +1402,7 @@ fail:
          * root's anon_dev to 0 to avoid a double free, once by btrfs_put_root()
          * and once again by our caller.
          */
-       if (anon_dev)
+       if (anon_dev && *anon_dev)
                 root->anon_dev = 0;
         btrfs_put_root(root);
         return ERR_PTR(ret);
@@ -1409,7 +1418,7 @@ fail:
  struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
                                      u64 objectid, bool check_ref)
  {
-       return btrfs_get_root_ref(fs_info, objectid, 0, check_ref);
+       return btrfs_get_root_ref(fs_info, objectid, NULL, check_ref);
  }
  
  /*
@@ -1417,11 +1426,11 @@ struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
   * the anonymous block device id
   *
   * @objectid:  tree objectid
- * @anon_dev:  if zero, allocate a new anonymous block device or use the
- *             parameter value
+ * @anon_dev:  if NULL, allocate a new anonymous block device or use the
+ *             parameter value if not NULL
   */
  struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info,
-                                        u64 objectid, dev_t anon_dev)
+                                        u64 objectid, dev_t *anon_dev)
  {
         return btrfs_get_root_ref(fs_info, objectid, anon_dev, true);
  }
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h

index 9413726b329bb123202a66cf341320ca2d99e410..eb3473d1c1ac1b239092a594cf0f788f961b4943 100644 (file)
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -61,7 +61,7 @@ void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info);
  struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
                                      u64 objectid, bool check_ref);
  struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info,
-                                        u64 objectid, dev_t anon_dev);
+                                        u64 objectid, dev_t *anon_dev);
  struct btrfs_root *btrfs_get_fs_root_commit_root(struct btrfs_fs_info *fs_info,
                                                  struct btrfs_path *path,
                                                  u64 objectid);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c

index cfd2967f04a293cf3d38956e9e21ce9e6656b498..8b4bef05e22217cfe43af497060889e0e5b02d0a 100644 (file)
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2480,6 +2480,7 @@ static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo,
                                 struct fiemap_cache *cache,
                                 u64 offset, u64 phys, u64 len, u32 flags)
  {
+       u64 cache_end;
         int ret = 0;
  
         /* Set at the end of extent_fiemap(). */
@@ -2489,15 +2490,102 @@ static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo,
                 goto assign;
  
         /*
-        * Sanity check, extent_fiemap() should have ensured that new
-        * fiemap extent won't overlap with cached one.
-        * Not recoverable.
+        * When iterating the extents of the inode, at extent_fiemap(), we may
+        * find an extent that starts at an offset behind the end offset of the
+        * previous extent we processed. This happens if fiemap is called
+        * without FIEMAP_FLAG_SYNC and there are ordered extents completing
+        * while we call btrfs_next_leaf() (through fiemap_next_leaf_item()).
          *
-        * NOTE: Physical address can overlap, due to compression
+        * For example we are in leaf X processing its last item, which is the
+        * file extent item for file range [512K, 1M[, and after
+        * btrfs_next_leaf() releases the path, there's an ordered extent that
+        * completes for the file range [768K, 2M[, and that results in trimming
+        * the file extent item so that it now corresponds to the file range
+        * [512K, 768K[ and a new file extent item is inserted for the file
+        * range [768K, 2M[, which may end up as the last item of leaf X or as
+        * the first item of the next leaf - in either case btrfs_next_leaf()
+        * will leave us with a path pointing to the new extent item, for the
+        * file range [768K, 2M[, since that's the first key that follows the
+        * last one we processed. So in order not to report overlapping extents
+        * to user space, we trim the length of the previously cached extent and
+        * emit it.
+        *
+        * Upon calling btrfs_next_leaf() we may also find an extent with an
+        * offset smaller than or equals to cache->offset, and this happens
+        * when we had a hole or prealloc extent with several delalloc ranges in
+        * it, but after btrfs_next_leaf() released the path, delalloc was
+        * flushed and the resulting ordered extents were completed, so we can
+        * now have found a file extent item for an offset that is smaller than
+        * or equals to what we have in cache->offset. We deal with this as
+        * described below.
          */
-       if (cache->offset + cache->len > offset) {
-               WARN_ON(1);
-               return -EINVAL;
+       cache_end = cache->offset + cache->len;
+       if (cache_end > offset) {
+               if (offset == cache->offset) {
+                       /*
+                        * We cached a dealloc range (found in the io tree) for
+                        * a hole or prealloc extent and we have now found a
+                        * file extent item for the same offset. What we have
+                        * now is more recent and up to date, so discard what
+                        * we had in the cache and use what we have just found.
+                        */
+                       goto assign;
+               } else if (offset > cache->offset) {
+                       /*
+                        * The extent range we previously found ends after the
+                        * offset of the file extent item we found and that
+                        * offset falls somewhere in the middle of that previous
+                        * extent range. So adjust the range we previously found
+                        * to end at the offset of the file extent item we have
+                        * just found, since this extent is more up to date.
+                        * Emit that adjusted range and cache the file extent
+                        * item we have just found. This corresponds to the case
+                        * where a previously found file extent item was split
+                        * due to an ordered extent completing.
+                        */
+                       cache->len = offset - cache->offset;
+                       goto emit;
+               } else {
+                       const u64 range_end = offset + len;
+
+                       /*
+                        * The offset of the file extent item we have just found
+                        * is behind the cached offset. This means we were
+                        * processing a hole or prealloc extent for which we
+                        * have found delalloc ranges (in the io tree), so what
+                        * we have in the cache is the last delalloc range we
+                        * found while the file extent item we found can be
+                        * either for a whole delalloc range we previously
+                        * emmitted or only a part of that range.
+                        *
+                        * We have two cases here:
+                        *
+                        * 1) The file extent item's range ends at or behind the
+                        *    cached extent's end. In this case just ignore the
+                        *    current file extent item because we don't want to
+                        *    overlap with previous ranges that may have been
+                        *    emmitted already;
+                        *
+                        * 2) The file extent item starts behind the currently
+                        *    cached extent but its end offset goes beyond the
+                        *    end offset of the cached extent. We don't want to
+                        *    overlap with a previous range that may have been
+                        *    emmitted already, so we emit the currently cached
+                        *    extent and then partially store the current file
+                        *    extent item's range in the cache, for the subrange
+                        *    going the cached extent's end to the end of the
+                        *    file extent item.
+                        */
+                       if (range_end <= cache_end)
+                               return 0;
+
+                       if (!(flags & (FIEMAP_EXTENT_ENCODED | FIEMAP_EXTENT_DELALLOC)))
+                               phys += cache_end - offset;
+
+                       offset = cache_end;
+                       len = range_end - cache_end;
+                       goto emit;
+               }
         }
  
         /*
@@ -2517,6 +2605,7 @@ static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo,
                 return 0;
         }
  
+emit:
         /* Not mergeable, need to submit cached one */
         ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys,
                                       cache->len, cache->flags);
@@ -2689,16 +2778,34 @@ static int fiemap_process_hole(struct btrfs_inode *inode,
          * it beyond i_size.
          */
         while (cur_offset < end && cur_offset < i_size) {
+               struct extent_state *cached_state = NULL;
                 u64 delalloc_start;
                 u64 delalloc_end;
                 u64 prealloc_start;
+               u64 lockstart;
+               u64 lockend;
                 u64 prealloc_len = 0;
                 bool delalloc;
  
+               lockstart = round_down(cur_offset, inode->root->fs_info->sectorsize);
+               lockend = round_up(end, inode->root->fs_info->sectorsize);
+
+               /*
+                * We are only locking for the delalloc range because that's the
+                * only thing that can change here.  With fiemap we have a lock
+                * on the inode, so no buffered or direct writes can happen.
+                *
+                * However mmaps and normal page writeback will cause this to
+                * change arbitrarily.  We have to lock the extent lock here to
+                * make sure that nobody messes with the tree while we're doing
+                * btrfs_find_delalloc_in_range.
+                */
+               lock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
                 delalloc = btrfs_find_delalloc_in_range(inode, cur_offset, end,
                                                         delalloc_cached_state,
                                                         &delalloc_start,
                                                         &delalloc_end);
+               unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
                 if (!delalloc)
                         break;
  
@@ -2866,15 +2973,15 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
                   u64 start, u64 len)
  {
         const u64 ino = btrfs_ino(inode);
-       struct extent_state *cached_state = NULL;
         struct extent_state *delalloc_cached_state = NULL;
         struct btrfs_path *path;
         struct fiemap_cache cache = { 0 };
         struct btrfs_backref_share_check_ctx *backref_ctx;
         u64 last_extent_end;
         u64 prev_extent_end;
-       u64 lockstart;
-       u64 lockend;
+       u64 range_start;
+       u64 range_end;
+       const u64 sectorsize = inode->root->fs_info->sectorsize;
         bool stopped = false;
         int ret;
  
@@ -2885,22 +2992,19 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
                 goto out;
         }
  
-       lockstart = round_down(start, inode->root->fs_info->sectorsize);
-       lockend = round_up(start + len, inode->root->fs_info->sectorsize);
-       prev_extent_end = lockstart;
-
-       btrfs_inode_lock(inode, BTRFS_ILOCK_SHARED);
-       lock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
+       range_start = round_down(start, sectorsize);
+       range_end = round_up(start + len, sectorsize);
+       prev_extent_end = range_start;
  
         ret = fiemap_find_last_extent_offset(inode, path, &last_extent_end);
         if (ret < 0)
-               goto out_unlock;
+               goto out;
         btrfs_release_path(path);
  
         path->reada = READA_FORWARD;
-       ret = fiemap_search_slot(inode, path, lockstart);
+       ret = fiemap_search_slot(inode, path, range_start);
         if (ret < 0) {
-               goto out_unlock;
+               goto out;
         } else if (ret > 0) {
                 /*
                  * No file extent item found, but we may have delalloc between
@@ -2910,7 +3014,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
                 goto check_eof_delalloc;
         }
  
-       while (prev_extent_end < lockend) {
+       while (prev_extent_end < range_end) {
                 struct extent_buffer *leaf = path->nodes[0];
                 struct btrfs_file_extent_item *ei;
                 struct btrfs_key key;
@@ -2933,21 +3037,21 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
                  * The first iteration can leave us at an extent item that ends
                  * before our range's start. Move to the next item.
                  */
-               if (extent_end <= lockstart)
+               if (extent_end <= range_start)
                         goto next_item;
  
                 backref_ctx->curr_leaf_bytenr = leaf->start;
  
                 /* We have in implicit hole (NO_HOLES feature enabled). */
                 if (prev_extent_end < key.offset) {
-                       const u64 range_end = min(key.offset, lockend) - 1;
+                       const u64 hole_end = min(key.offset, range_end) - 1;
  
                         ret = fiemap_process_hole(inode, fieinfo, &cache,
                                                   &delalloc_cached_state,
                                                   backref_ctx, 0, 0, 0,
-                                                 prev_extent_end, range_end);
+                                                 prev_extent_end, hole_end);
                         if (ret < 0) {
-                               goto out_unlock;
+                               goto out;
                         } else if (ret > 0) {
                                 /* fiemap_fill_next_extent() told us to stop. */
                                 stopped = true;
@@ -2955,7 +3059,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
                         }
  
                         /* We've reached the end of the fiemap range, stop. */
-                       if (key.offset >= lockend) {
+                       if (key.offset >= range_end) {
                                 stopped = true;
                                 break;
                         }
@@ -3003,7 +3107,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
                                                                   extent_gen,
                                                                   backref_ctx);
                                 if (ret < 0)
-                                       goto out_unlock;
+                                       goto out;
                                 else if (ret > 0)
                                         flags |= FIEMAP_EXTENT_SHARED;
                         }
@@ -3014,7 +3118,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
                 }
  
                 if (ret < 0) {
-                       goto out_unlock;
+                       goto out;
                 } else if (ret > 0) {
                         /* fiemap_fill_next_extent() told us to stop. */
                         stopped = true;
@@ -3025,12 +3129,12 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
  next_item:
                 if (fatal_signal_pending(current)) {
                         ret = -EINTR;
-                       goto out_unlock;
+                       goto out;
                 }
  
                 ret = fiemap_next_leaf_item(inode, path);
                 if (ret < 0) {
-                       goto out_unlock;
+                       goto out;
                 } else if (ret > 0) {
                         /* No more file extent items for this inode. */
                         break;
@@ -3049,29 +3153,41 @@ check_eof_delalloc:
         btrfs_free_path(path);
         path = NULL;
  
-       if (!stopped && prev_extent_end < lockend) {
+       if (!stopped && prev_extent_end < range_end) {
                 ret = fiemap_process_hole(inode, fieinfo, &cache,
                                           &delalloc_cached_state, backref_ctx,
-                                         0, 0, 0, prev_extent_end, lockend - 1);
+                                         0, 0, 0, prev_extent_end, range_end - 1);
                 if (ret < 0)
-                       goto out_unlock;
-               prev_extent_end = lockend;
+                       goto out;
+               prev_extent_end = range_end;
         }
  
         if (cache.cached && cache.offset + cache.len >= last_extent_end) {
                 const u64 i_size = i_size_read(&inode->vfs_inode);
  
                 if (prev_extent_end < i_size) {
+                       struct extent_state *cached_state = NULL;
                         u64 delalloc_start;
                         u64 delalloc_end;
+                       u64 lockstart;
+                       u64 lockend;
                         bool delalloc;
  
+                       lockstart = round_down(prev_extent_end, sectorsize);
+                       lockend = round_up(i_size, sectorsize);
+
+                       /*
+                        * See the comment in fiemap_process_hole as to why
+                        * we're doing the locking here.
+                        */
+                       lock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
                         delalloc = btrfs_find_delalloc_in_range(inode,
                                                                 prev_extent_end,
                                                                 i_size - 1,
                                                                 &delalloc_cached_state,
                                                                 &delalloc_start,
                                                                 &delalloc_end);
+                       unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
                         if (!delalloc)
                                 cache.flags |= FIEMAP_EXTENT_LAST;
                 } else {
@@ -3080,10 +3196,6 @@ check_eof_delalloc:
         }
  
         ret = emit_last_fiemap_cache(fieinfo, &cache);
-
-out_unlock:
-       unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
-       btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
  out:
         free_extent_state(delalloc_cached_state);
         btrfs_free_backref_share_ctx(backref_ctx);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c

index 1eb93d3962aac4608cda0255ea31d7e53dbc8da2..4795738d5785bce730fad21b68a00ff729b97915 100644 (file)
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3184,8 +3184,23 @@ out:
                         unwritten_start += logical_len;
                 clear_extent_uptodate(io_tree, unwritten_start, end, NULL);
  
-               /* Drop extent maps for the part of the extent we didn't write. */
-               btrfs_drop_extent_map_range(inode, unwritten_start, end, false);
+               /*
+                * Drop extent maps for the part of the extent we didn't write.
+                *
+                * We have an exception here for the free_space_inode, this is
+                * because when we do btrfs_get_extent() on the free space inode
+                * we will search the commit root.  If this is a new block group
+                * we won't find anything, and we will trip over the assert in
+                * writepage where we do ASSERT(em->block_start !=
+                * EXTENT_MAP_HOLE).
+                *
+                * Theoretically we could also skip this for any NOCOW extent as
+                * we don't mess with the extent map tree in the NOCOW case, but
+                * for now simply skip this if we are the free space inode.
+                */
+               if (!btrfs_is_free_space_inode(inode))
+                       btrfs_drop_extent_map_range(inode, unwritten_start,
+                                                   end, false);
  
                 /*
                  * If the ordered extent had an IOERR or something else went
@@ -7820,6 +7835,7 @@ struct iomap_dio *btrfs_dio_write(struct kiocb *iocb, struct iov_iter *iter,
  static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                         u64 start, u64 len)
  {
+       struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
         int     ret;
  
         ret = fiemap_prep(inode, fieinfo, start, &len, 0);
@@ -7845,7 +7861,26 @@ static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                         return ret;
         }
  
-       return extent_fiemap(BTRFS_I(inode), fieinfo, start, len);
+       btrfs_inode_lock(btrfs_inode, BTRFS_ILOCK_SHARED);
+
+       /*
+        * We did an initial flush to avoid holding the inode's lock while
+        * triggering writeback and waiting for the completion of IO and ordered
+        * extents. Now after we locked the inode we do it again, because it's
+        * possible a new write may have happened in between those two steps.
+        */
+       if (fieinfo->fi_flags & FIEMAP_FLAG_SYNC) {
+               ret = btrfs_wait_ordered_range(inode, 0, LLONG_MAX);
+               if (ret) {
+                       btrfs_inode_unlock(btrfs_inode, BTRFS_ILOCK_SHARED);
+                       return ret;
+               }
+       }
+
+       ret = extent_fiemap(btrfs_inode, fieinfo, start, len);
+       btrfs_inode_unlock(btrfs_inode, BTRFS_ILOCK_SHARED);
+
+       return ret;
  }
  
  static int btrfs_writepages(struct address_space *mapping,
@@ -10273,6 +10308,13 @@ ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from,
         if (encoded->encryption != BTRFS_ENCODED_IO_ENCRYPTION_NONE)
                 return -EINVAL;
  
+       /*
+        * Compressed extents should always have checksums, so error out if we
+        * have a NOCOW file or inode was created while mounted with NODATASUM.
+        */
+       if (inode->flags & BTRFS_INODE_NODATASUM)
+               return -EINVAL;
+
         orig_count = iov_iter_count(from);
  
         /* The extent size must be sane. */
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c

index dfed9dd9c2d75b8205531b030c220b42820e77ce..9d1eac15e09e141212cf0edadffcca873cba58f1 100644 (file)
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -721,7 +721,7 @@ static noinline int create_subvol(struct mnt_idmap *idmap,
         free_extent_buffer(leaf);
         leaf = NULL;
  
-       new_root = btrfs_get_new_fs_root(fs_info, objectid, anon_dev);
+       new_root = btrfs_get_new_fs_root(fs_info, objectid, &anon_dev);
         if (IS_ERR(new_root)) {
                 ret = PTR_ERR(new_root);
                 btrfs_abort_transaction(trans, ret);
@@ -3815,6 +3815,11 @@ static long btrfs_ioctl_qgroup_create(struct file *file, void __user *arg)
                 goto out;
         }
  
+       if (sa->create && is_fstree(sa->qgroupid)) {
+               ret = -EINVAL;
+               goto out;
+       }
+
         trans = btrfs_join_transaction(root);
         if (IS_ERR(trans)) {
                 ret = PTR_ERR(trans);
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c

index 63b426cc77989670e0f7890a1cb75a17348e96cb..5470e1cdf10c5348df676cd290bef45811a46019 100644 (file)
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1736,6 +1736,15 @@ out:
         return ret;
  }
  
+static bool qgroup_has_usage(struct btrfs_qgroup *qgroup)
+{
+       return (qgroup->rfer > 0 || qgroup->rfer_cmpr > 0 ||
+               qgroup->excl > 0 || qgroup->excl_cmpr > 0 ||
+               qgroup->rsv.values[BTRFS_QGROUP_RSV_DATA] > 0 ||
+               qgroup->rsv.values[BTRFS_QGROUP_RSV_META_PREALLOC] > 0 ||
+               qgroup->rsv.values[BTRFS_QGROUP_RSV_META_PERTRANS] > 0);
+}
+
  int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid)
  {
         struct btrfs_fs_info *fs_info = trans->fs_info;
@@ -1755,6 +1764,11 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid)
                 goto out;
         }
  
+       if (is_fstree(qgroupid) && qgroup_has_usage(qgroup)) {
+               ret = -EBUSY;
+               goto out;
+       }
+
         /* Check if there are no children of this qgroup */
         if (!list_empty(&qgroup->members)) {
                 ret = -EBUSY;
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c

index 2d7519a6ce72d3c58e70b1cb567258e642604a87..e48a063ef0851f9476fd37a00572c1dd6c6fe379 100644 (file)
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -6705,11 +6705,20 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
                                 if (ret)
                                         goto out;
                         }
-                       if (sctx->cur_inode_last_extent <
-                           sctx->cur_inode_size) {
-                               ret = send_hole(sctx, sctx->cur_inode_size);
-                               if (ret)
+                       if (sctx->cur_inode_last_extent < sctx->cur_inode_size) {
+                               ret = range_is_hole_in_parent(sctx,
+                                                     sctx->cur_inode_last_extent,
+                                                     sctx->cur_inode_size);
+                               if (ret < 0) {
                                         goto out;
+                               } else if (ret == 0) {
+                                       ret = send_hole(sctx, sctx->cur_inode_size);
+                                       if (ret < 0)
+                                               goto out;
+                               } else {
+                                       /* Range is already a hole, skip. */
+                                       ret = 0;
+                               }
                         }
                 }
                 if (need_truncate) {
@@ -8111,7 +8120,7 @@ long btrfs_ioctl_send(struct inode *inode, struct btrfs_ioctl_send_args *arg)
         }
  
         if (arg->flags & ~BTRFS_SEND_FLAG_MASK) {
-               ret = -EINVAL;
+               ret = -EOPNOTSUPP;
                 goto out;
         }
  
diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c

index 571bb13587d5e7aabc1c40feab093af5257a6782..3b54eb5834746be51807d3e13023fba3f2701981 100644 (file)
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -856,7 +856,7 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info,
  static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info,
                                     struct btrfs_space_info *space_info)
  {
-       u64 global_rsv_size = fs_info->global_block_rsv.reserved;
+       const u64 global_rsv_size = btrfs_block_rsv_reserved(&fs_info->global_block_rsv);
         u64 ordered, delalloc;
         u64 thresh;
         u64 used;
@@ -956,8 +956,8 @@ static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info,
         ordered = percpu_counter_read_positive(&fs_info->ordered_bytes) >> 1;
         delalloc = percpu_counter_read_positive(&fs_info->delalloc_bytes);
         if (ordered >= delalloc)
-               used += fs_info->delayed_refs_rsv.reserved +
-                       fs_info->delayed_block_rsv.reserved;
+               used += btrfs_block_rsv_reserved(&fs_info->delayed_refs_rsv) +
+                       btrfs_block_rsv_reserved(&fs_info->delayed_block_rsv);
         else
                 used += space_info->bytes_may_use - global_rsv_size;
  
@@ -1173,7 +1173,7 @@ static void btrfs_preempt_reclaim_metadata_space(struct work_struct *work)
                 enum btrfs_flush_state flush;
                 u64 delalloc_size = 0;
                 u64 to_reclaim, block_rsv_size;
-               u64 global_rsv_size = global_rsv->reserved;
+               const u64 global_rsv_size = btrfs_block_rsv_reserved(global_rsv);
  
                 loops++;
  
@@ -1185,9 +1185,9 @@ static void btrfs_preempt_reclaim_metadata_space(struct work_struct *work)
                  * assume it's tied up in delalloc reservations.
                  */
                 block_rsv_size = global_rsv_size +
-                       delayed_block_rsv->reserved +
-                       delayed_refs_rsv->reserved +
-                       trans_rsv->reserved;
+                       btrfs_block_rsv_reserved(delayed_block_rsv) +
+                       btrfs_block_rsv_reserved(delayed_refs_rsv) +
+                       btrfs_block_rsv_reserved(trans_rsv);
                 if (block_rsv_size < space_info->bytes_may_use)
                         delalloc_size = space_info->bytes_may_use - block_rsv_size;
  
@@ -1207,16 +1207,16 @@ static void btrfs_preempt_reclaim_metadata_space(struct work_struct *work)
                         to_reclaim = delalloc_size;
                         flush = FLUSH_DELALLOC;
                 } else if (space_info->bytes_pinned >
-                          (delayed_block_rsv->reserved +
-                           delayed_refs_rsv->reserved)) {
+                          (btrfs_block_rsv_reserved(delayed_block_rsv) +
+                           btrfs_block_rsv_reserved(delayed_refs_rsv))) {
                         to_reclaim = space_info->bytes_pinned;
                         flush = COMMIT_TRANS;
-               } else if (delayed_block_rsv->reserved >
-                          delayed_refs_rsv->reserved) {
-                       to_reclaim = delayed_block_rsv->reserved;
+               } else if (btrfs_block_rsv_reserved(delayed_block_rsv) >
+                          btrfs_block_rsv_reserved(delayed_refs_rsv)) {
+                       to_reclaim = btrfs_block_rsv_reserved(delayed_block_rsv);
                         flush = FLUSH_DELAYED_ITEMS_NR;
                 } else {
-                       to_reclaim = delayed_refs_rsv->reserved;
+                       to_reclaim = btrfs_block_rsv_reserved(delayed_refs_rsv);
                         flush = FLUSH_DELAYED_REFS_NR;
                 }
  
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c

index 5b3333ceef04818dbf98270da4bb84c99e5c70f8..bf8e64c766b63b4c8b424f4437791eaea12f24a2 100644 (file)
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -564,56 +564,22 @@ static int btrfs_reserve_trans_metadata(struct btrfs_fs_info *fs_info,
                                         u64 num_bytes,
                                         u64 *delayed_refs_bytes)
  {
-       struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv;
         struct btrfs_space_info *si = fs_info->trans_block_rsv.space_info;
-       u64 extra_delayed_refs_bytes = 0;
-       u64 bytes;
+       u64 bytes = num_bytes + *delayed_refs_bytes;
         int ret;
  
-       /*
-        * If there's a gap between the size of the delayed refs reserve and
-        * its reserved space, than some tasks have added delayed refs or bumped
-        * its size otherwise (due to block group creation or removal, or block
-        * group item update). Also try to allocate that gap in order to prevent
-        * using (and possibly abusing) the global reserve when committing the
-        * transaction.
-        */
-       if (flush == BTRFS_RESERVE_FLUSH_ALL &&
-           !btrfs_block_rsv_full(delayed_refs_rsv)) {
-               spin_lock(&delayed_refs_rsv->lock);
-               if (delayed_refs_rsv->size > delayed_refs_rsv->reserved)
-                       extra_delayed_refs_bytes = delayed_refs_rsv->size -
-                               delayed_refs_rsv->reserved;
-               spin_unlock(&delayed_refs_rsv->lock);
-       }
-
-       bytes = num_bytes + *delayed_refs_bytes + extra_delayed_refs_bytes;
-
         /*
          * We want to reserve all the bytes we may need all at once, so we only
          * do 1 enospc flushing cycle per transaction start.
          */
         ret = btrfs_reserve_metadata_bytes(fs_info, si, bytes, flush);
-       if (ret == 0) {
-               if (extra_delayed_refs_bytes > 0)
-                       btrfs_migrate_to_delayed_refs_rsv(fs_info,
-                                                         extra_delayed_refs_bytes);
-               return 0;
-       }
-
-       if (extra_delayed_refs_bytes > 0) {
-               bytes -= extra_delayed_refs_bytes;
-               ret = btrfs_reserve_metadata_bytes(fs_info, si, bytes, flush);
-               if (ret == 0)
-                       return 0;
-       }
  
         /*
          * If we are an emergency flush, which can steal from the global block
          * reserve, then attempt to not reserve space for the delayed refs, as
          * we will consume space for them from the global block reserve.
          */
-       if (flush == BTRFS_RESERVE_FLUSH_ALL_STEAL) {
+       if (ret && flush == BTRFS_RESERVE_FLUSH_ALL_STEAL) {
                 bytes -= *delayed_refs_bytes;
                 *delayed_refs_bytes = 0;
                 ret = btrfs_reserve_metadata_bytes(fs_info, si, bytes, flush);
@@ -1868,7 +1834,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
         }
  
         key.offset = (u64)-1;
-       pending->snap = btrfs_get_new_fs_root(fs_info, objectid, pending->anon_dev);
+       pending->snap = btrfs_get_new_fs_root(fs_info, objectid, &pending->anon_dev);
         if (IS_ERR(pending->snap)) {
                 ret = PTR_ERR(pending->snap);
                 pending->snap = NULL;
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c

index 168af9d000d168324fcc8355781517ddeedeefd1..5f750fa53a2b2a88c81e9ec05c9a7ab22d44a674 100644 (file)
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -1639,6 +1639,15 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
         }
  
  out:
+       /* Reject non SINGLE data profiles without RST */
+       if ((map->type & BTRFS_BLOCK_GROUP_DATA) &&
+           (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) &&
+           !fs_info->stripe_root) {
+               btrfs_err(fs_info, "zoned: data %s needs raid-stripe-tree",
+                         btrfs_bg_type_to_raid_name(map->type));
+               return -EINVAL;
+       }
+
         if (cache->alloc_offset > cache->zone_capacity) {
                 btrfs_err(fs_info,
  "zoned: invalid write pointer %llu (larger than zone capacity %llu) in block group %llu",
@@ -1670,6 +1679,7 @@ out:
         }
         bitmap_free(active);
         kfree(zone_info);
+       btrfs_free_chunk_map(map);
  
         return ret;
  }
diff --git a/fs/cachefiles/cache.c b/fs/cachefiles/cache.c

index 7077f72e6f4747c2a1f1bd6c898221d4bdaab380..f449f7340aad0811ae2cea3134731e1a2111f5ff 100644 (file)
--- a/fs/cachefiles/cache.c
+++ b/fs/cachefiles/cache.c
@@ -168,6 +168,8 @@ error_unsupported:
         dput(root);
  error_open_root:
         cachefiles_end_secure(cache, saved_cred);
+       put_cred(cache->cache_cred);
+       cache->cache_cred = NULL;
  error_getsec:
         fscache_relinquish_cache(cache_cookie);
         cache->cache = NULL;
diff --git a/fs/cachefiles/daemon.c b/fs/cachefiles/daemon.c

index 3f24905f40661302936f08122394947d55e3d5f3..6465e257423091d5183a6bf4c7963a2e8e900766 100644 (file)
--- a/fs/cachefiles/daemon.c
+++ b/fs/cachefiles/daemon.c
@@ -816,6 +816,7 @@ static void cachefiles_daemon_unbind(struct cachefiles_cache *cache)
         cachefiles_put_directory(cache->graveyard);
         cachefiles_put_directory(cache->store);
         mntput(cache->mnt);
+       put_cred(cache->cache_cred);
  
         kfree(cache->rootdirname);
         kfree(cache->secctx);
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c

index 9c02f328c966cbdd12b8af17d7ddb9d5bb19ea38..7fb4aae97412464c54b42037f75016085d214bd3 100644 (file)
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1452,7 +1452,7 @@ static void __prep_cap(struct cap_msg_args *arg, struct ceph_cap *cap,
         if (flushing & CEPH_CAP_XATTR_EXCL) {
                 arg->old_xattr_buf = __ceph_build_xattrs_blob(ci);
                 arg->xattr_version = ci->i_xattrs.version;
-               arg->xattr_buf = ci->i_xattrs.blob;
+               arg->xattr_buf = ceph_buffer_get(ci->i_xattrs.blob);
         } else {
                 arg->xattr_buf = NULL;
                 arg->old_xattr_buf = NULL;
@@ -1553,6 +1553,7 @@ static void __send_cap(struct cap_msg_args *arg, struct ceph_inode_info *ci)
         encode_cap_msg(msg, arg);
         ceph_con_send(&arg->session->s_con, msg);
         ceph_buffer_put(arg->old_xattr_buf);
+       ceph_buffer_put(arg->xattr_buf);
         if (arg->wake)
                 wake_up_all(&ci->i_cap_wq);
  }
@@ -2155,6 +2156,30 @@ retry:
                       ceph_cap_string(cap->implemented),
                       ceph_cap_string(revoking));
  
+               /* completed revocation? going down and there are no caps? */
+               if (revoking) {
+                       if ((revoking & cap_used) == 0) {
+                               doutc(cl, "completed revocation of %s\n",
+                                     ceph_cap_string(cap->implemented & ~cap->issued));
+                               goto ack;
+                       }
+
+                       /*
+                        * If the "i_wrbuffer_ref" was increased by mmap or generic
+                        * cache write just before the ceph_check_caps() is called,
+                        * the Fb capability revoking will fail this time. Then we
+                        * must wait for the BDI's delayed work to flush the dirty
+                        * pages and to release the "i_wrbuffer_ref", which will cost
+                        * at most 5 seconds. That means the MDS needs to wait at
+                        * most 5 seconds to finished the Fb capability's revocation.
+                        *
+                        * Let's queue a writeback for it.
+                        */
+                       if (S_ISREG(inode->i_mode) && ci->i_wrbuffer_ref &&
+                           (revoking & CEPH_CAP_FILE_BUFFER))
+                               queue_writeback = true;
+               }
+
                 if (cap == ci->i_auth_cap &&
                     (cap->issued & CEPH_CAP_FILE_WR)) {
                         /* request larger max_size from MDS? */
@@ -2182,30 +2207,6 @@ retry:
                         }
                 }
  
-               /* completed revocation? going down and there are no caps? */
-               if (revoking) {
-                       if ((revoking & cap_used) == 0) {
-                               doutc(cl, "completed revocation of %s\n",
-                                     ceph_cap_string(cap->implemented & ~cap->issued));
-                               goto ack;
-                       }
-
-                       /*
-                        * If the "i_wrbuffer_ref" was increased by mmap or generic
-                        * cache write just before the ceph_check_caps() is called,
-                        * the Fb capability revoking will fail this time. Then we
-                        * must wait for the BDI's delayed work to flush the dirty
-                        * pages and to release the "i_wrbuffer_ref", which will cost
-                        * at most 5 seconds. That means the MDS needs to wait at
-                        * most 5 seconds to finished the Fb capability's revocation.
-                        *
-                        * Let's queue a writeback for it.
-                        */
-                       if (S_ISREG(inode->i_mode) && ci->i_wrbuffer_ref &&
-                           (revoking & CEPH_CAP_FILE_BUFFER))
-                               queue_writeback = true;
-               }
-
                 /* want more caps from mds? */
                 if (want & ~cap->mds_wanted) {
                         if (want & ~(cap->mds_wanted | cap->issued))
@@ -3215,7 +3216,6 @@ static int ceph_try_drop_cap_snap(struct ceph_inode_info *ci,
  
  enum put_cap_refs_mode {
         PUT_CAP_REFS_SYNC = 0,
-       PUT_CAP_REFS_NO_CHECK,
         PUT_CAP_REFS_ASYNC,
  };
  
@@ -3331,11 +3331,6 @@ void ceph_put_cap_refs_async(struct ceph_inode_info *ci, int had)
         __ceph_put_cap_refs(ci, had, PUT_CAP_REFS_ASYNC);
  }
  
-void ceph_put_cap_refs_no_check_caps(struct ceph_inode_info *ci, int had)
-{
-       __ceph_put_cap_refs(ci, had, PUT_CAP_REFS_NO_CHECK);
-}
-
  /*
   * Release @nr WRBUFFER refs on dirty pages for the given @snapc snap
   * context.  Adjust per-snap dirty page accounting as appropriate.
@@ -4777,7 +4772,22 @@ int ceph_drop_caps_for_unlink(struct inode *inode)
                 if (__ceph_caps_dirty(ci)) {
                         struct ceph_mds_client *mdsc =
                                 ceph_inode_to_fs_client(inode)->mdsc;
-                       __cap_delay_requeue_front(mdsc, ci);
+
+                       doutc(mdsc->fsc->client, "%p %llx.%llx\n", inode,
+                             ceph_vinop(inode));
+                       spin_lock(&mdsc->cap_unlink_delay_lock);
+                       ci->i_ceph_flags |= CEPH_I_FLUSH;
+                       if (!list_empty(&ci->i_cap_delay_list))
+                               list_del_init(&ci->i_cap_delay_list);
+                       list_add_tail(&ci->i_cap_delay_list,
+                                     &mdsc->cap_unlink_delay_list);
+                       spin_unlock(&mdsc->cap_unlink_delay_lock);
+
+                       /*
+                        * Fire the work immediately, because the MDS maybe
+                        * waiting for caps release.
+                        */
+                       ceph_queue_cap_unlink_work(mdsc);
                 }
         }
         spin_unlock(&ci->i_ceph_lock);
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c

index 0c25d326afc41d9d4d8ba98d3c6c5976647bb3fe..7b2e77517f235ecd47264061c04bae2e6d0b7c83 100644 (file)
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -78,6 +78,8 @@ struct inode *ceph_new_inode(struct inode *dir, struct dentry *dentry,
         if (!inode)
                 return ERR_PTR(-ENOMEM);
  
+       inode->i_blkbits = CEPH_FSCRYPT_BLOCK_SHIFT;
+
         if (!S_ISLNK(*mode)) {
                 err = ceph_pre_init_acls(dir, mode, as_ctx);
                 if (err < 0)
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c

index 548d1de379f3570b729af9e50b67aaff65e36e14..3ab9c268a8bb398b779cc93d3da98f3d13df8fe3 100644 (file)
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1089,7 +1089,7 @@ void ceph_mdsc_release_request(struct kref *kref)
         struct ceph_mds_request *req = container_of(kref,
                                                     struct ceph_mds_request,
                                                     r_kref);
-       ceph_mdsc_release_dir_caps_no_check(req);
+       ceph_mdsc_release_dir_caps_async(req);
         destroy_reply_info(&req->r_reply_info);
         if (req->r_request)
                 ceph_msg_put(req->r_request);
@@ -2484,6 +2484,50 @@ void ceph_reclaim_caps_nr(struct ceph_mds_client *mdsc, int nr)
         }
  }
  
+void ceph_queue_cap_unlink_work(struct ceph_mds_client *mdsc)
+{
+       struct ceph_client *cl = mdsc->fsc->client;
+       if (mdsc->stopping)
+               return;
+
+        if (queue_work(mdsc->fsc->cap_wq, &mdsc->cap_unlink_work)) {
+                doutc(cl, "caps unlink work queued\n");
+        } else {
+                doutc(cl, "failed to queue caps unlink work\n");
+        }
+}
+
+static void ceph_cap_unlink_work(struct work_struct *work)
+{
+       struct ceph_mds_client *mdsc =
+               container_of(work, struct ceph_mds_client, cap_unlink_work);
+       struct ceph_client *cl = mdsc->fsc->client;
+
+       doutc(cl, "begin\n");
+       spin_lock(&mdsc->cap_unlink_delay_lock);
+       while (!list_empty(&mdsc->cap_unlink_delay_list)) {
+               struct ceph_inode_info *ci;
+               struct inode *inode;
+
+               ci = list_first_entry(&mdsc->cap_unlink_delay_list,
+                                     struct ceph_inode_info,
+                                     i_cap_delay_list);
+               list_del_init(&ci->i_cap_delay_list);
+
+               inode = igrab(&ci->netfs.inode);
+               if (inode) {
+                       spin_unlock(&mdsc->cap_unlink_delay_lock);
+                       doutc(cl, "on %p %llx.%llx\n", inode,
+                             ceph_vinop(inode));
+                       ceph_check_caps(ci, CHECK_CAPS_FLUSH);
+                       iput(inode);
+                       spin_lock(&mdsc->cap_unlink_delay_lock);
+               }
+       }
+       spin_unlock(&mdsc->cap_unlink_delay_lock);
+       doutc(cl, "done\n");
+}
+
  /*
   * requests
   */
@@ -4261,7 +4305,7 @@ void ceph_mdsc_release_dir_caps(struct ceph_mds_request *req)
         }
  }
  
-void ceph_mdsc_release_dir_caps_no_check(struct ceph_mds_request *req)
+void ceph_mdsc_release_dir_caps_async(struct ceph_mds_request *req)
  {
         struct ceph_client *cl = req->r_mdsc->fsc->client;
         int dcaps;
@@ -4269,8 +4313,7 @@ void ceph_mdsc_release_dir_caps_no_check(struct ceph_mds_request *req)
         dcaps = xchg(&req->r_dir_caps, 0);
         if (dcaps) {
                 doutc(cl, "releasing r_dir_caps=%s\n", ceph_cap_string(dcaps));
-               ceph_put_cap_refs_no_check_caps(ceph_inode(req->r_parent),
-                                               dcaps);
+               ceph_put_cap_refs_async(ceph_inode(req->r_parent), dcaps);
         }
  }
  
@@ -4306,7 +4349,7 @@ static void replay_unsafe_requests(struct ceph_mds_client *mdsc,
                 if (req->r_session->s_mds != session->s_mds)
                         continue;
  
-               ceph_mdsc_release_dir_caps_no_check(req);
+               ceph_mdsc_release_dir_caps_async(req);
  
                 __send_request(session, req, true);
         }
@@ -5360,6 +5403,8 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
         INIT_LIST_HEAD(&mdsc->cap_delay_list);
         INIT_LIST_HEAD(&mdsc->cap_wait_list);
         spin_lock_init(&mdsc->cap_delay_lock);
+       INIT_LIST_HEAD(&mdsc->cap_unlink_delay_list);
+       spin_lock_init(&mdsc->cap_unlink_delay_lock);
         INIT_LIST_HEAD(&mdsc->snap_flush_list);
         spin_lock_init(&mdsc->snap_flush_lock);
         mdsc->last_cap_flush_tid = 1;
@@ -5368,6 +5413,7 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
         spin_lock_init(&mdsc->cap_dirty_lock);
         init_waitqueue_head(&mdsc->cap_flushing_wq);
         INIT_WORK(&mdsc->cap_reclaim_work, ceph_cap_reclaim_work);
+       INIT_WORK(&mdsc->cap_unlink_work, ceph_cap_unlink_work);
         err = ceph_metric_init(&mdsc->metric);
         if (err)
                 goto err_mdsmap;
@@ -5641,6 +5687,7 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc)
         ceph_cleanup_global_and_empty_realms(mdsc);
  
         cancel_work_sync(&mdsc->cap_reclaim_work);
+       cancel_work_sync(&mdsc->cap_unlink_work);
         cancel_delayed_work_sync(&mdsc->delayed_work); /* cancel timer */
  
         doutc(cl, "done\n");
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h

index 2e6ddaa13d725016dc9a93c6ad1838806eac547e..03f8ff00874f727adff8b88cc8d538fc989692d8 100644 (file)
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -462,6 +462,8 @@ struct ceph_mds_client {
         unsigned long    last_renew_caps;  /* last time we renewed our caps */
         struct list_head cap_delay_list;   /* caps with delayed release */
         spinlock_t       cap_delay_lock;   /* protects cap_delay_list */
+       struct list_head cap_unlink_delay_list;  /* caps with delayed release for unlink */
+       spinlock_t       cap_unlink_delay_lock;  /* protects cap_unlink_delay_list */
         struct list_head snap_flush_list;  /* cap_snaps ready to flush */
         spinlock_t       snap_flush_lock;
  
@@ -475,6 +477,8 @@ struct ceph_mds_client {
         struct work_struct cap_reclaim_work;
         atomic_t           cap_reclaim_pending;
  
+       struct work_struct cap_unlink_work;
+
         /*
          * Cap reservations
          *
@@ -552,7 +556,7 @@ extern int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
                                 struct inode *dir,
                                 struct ceph_mds_request *req);
  extern void ceph_mdsc_release_dir_caps(struct ceph_mds_request *req);
-extern void ceph_mdsc_release_dir_caps_no_check(struct ceph_mds_request *req);
+extern void ceph_mdsc_release_dir_caps_async(struct ceph_mds_request *req);
  static inline void ceph_mdsc_get_request(struct ceph_mds_request *req)
  {
         kref_get(&req->r_kref);
@@ -574,6 +578,7 @@ extern void ceph_flush_cap_releases(struct ceph_mds_client *mdsc,
                                     struct ceph_mds_session *session);
  extern void ceph_queue_cap_reclaim_work(struct ceph_mds_client *mdsc);
  extern void ceph_reclaim_caps_nr(struct ceph_mds_client *mdsc, int nr);
+extern void ceph_queue_cap_unlink_work(struct ceph_mds_client *mdsc);
  extern int ceph_iterate_session_caps(struct ceph_mds_session *session,
                                      int (*cb)(struct inode *, int mds, void *),
                                      void *arg);
diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c

index fae97c25ce58d5b268b7e3d73c5d4c94def4946d..8109aba66e023eb0d3dd5cdf06f3060c5cbf4b1a 100644 (file)
--- a/fs/ceph/mdsmap.c
+++ b/fs/ceph/mdsmap.c
@@ -380,10 +380,11 @@ struct ceph_mdsmap *ceph_mdsmap_decode(struct ceph_mds_client *mdsc, void **p,
                 ceph_decode_skip_8(p, end, bad_ext);
                 /* required_client_features */
                 ceph_decode_skip_set(p, end, 64, bad_ext);
+               /* bal_rank_mask */
+               ceph_decode_skip_string(p, end, bad_ext);
+       }
+       if (mdsmap_ev >= 18) {
                 ceph_decode_64_safe(p, end, m->m_max_xattr_size, bad_ext);
-       } else {
-               /* This forces the usage of the (sync) SETXATTR Op */
-               m->m_max_xattr_size = 0;
         }
  bad_ext:
         doutc(cl, "m_enabled: %d, m_damaged: %d, m_num_laggy: %d\n",
diff --git a/fs/ceph/mdsmap.h b/fs/ceph/mdsmap.h

index 89f1931f1ba6c9643a4098b1255c240e00f0c38e..1f2171dd01bfa34a404eef00113646bdcb978980 100644 (file)
--- a/fs/ceph/mdsmap.h
+++ b/fs/ceph/mdsmap.h
@@ -27,7 +27,11 @@ struct ceph_mdsmap {
         u32 m_session_timeout;          /* seconds */
         u32 m_session_autoclose;        /* seconds */
         u64 m_max_file_size;
-       u64 m_max_xattr_size;           /* maximum size for xattrs blob */
+       /*
+        * maximum size for xattrs blob.
+        * Zeroed by default to force the usage of the (sync) SETXATTR Op.
+        */
+       u64 m_max_xattr_size;
         u32 m_max_mds;                  /* expected up:active mds number */
         u32 m_num_active_mds;           /* actual up:active mds number */
         u32 possible_max_rank;          /* possible max rank index */
diff --git a/fs/ceph/super.h b/fs/ceph/super.h

index b06e2bc86221bf02fe54b2aa3304be80bedc5214..b63b4cd9b5b685a930bc33673f28e7c48a93d605 100644 (file)
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -1255,8 +1255,6 @@ extern void ceph_take_cap_refs(struct ceph_inode_info *ci, int caps,
  extern void ceph_get_cap_refs(struct ceph_inode_info *ci, int caps);
  extern void ceph_put_cap_refs(struct ceph_inode_info *ci, int had);
  extern void ceph_put_cap_refs_async(struct ceph_inode_info *ci, int had);
-extern void ceph_put_cap_refs_no_check_caps(struct ceph_inode_info *ci,
-                                           int had);
  extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
                                        struct ceph_snap_context *snapc);
  extern void __ceph_remove_capsnap(struct inode *inode,
diff --git a/fs/dcache.c b/fs/dcache.c

index b813528fb147784c6f308e67d47f3069e3a96e33..6ebccba333368d06667eb6c1ee433046bd0ab7d8 100644 (file)
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -3061,7 +3061,10 @@ static enum d_walk_ret d_genocide_kill(void *data, struct dentry *dentry)
                 if (d_unhashed(dentry) || !dentry->d_inode)
                         return D_WALK_SKIP;
  
-               dentry->d_lockref.count--;
+               if (!(dentry->d_flags & DCACHE_GENOCIDE)) {
+                       dentry->d_flags |= DCACHE_GENOCIDE;
+                       dentry->d_lockref.count--;
+               }
         }
         return D_WALK_CONTINUE;
  }
diff --git a/fs/efivarfs/internal.h b/fs/efivarfs/internal.h

index 169252e6dc4616c7712126adde4a36ce8e2d6922..f7206158ee81385eeaab387fd16b05aea5a7634b 100644 (file)
--- a/fs/efivarfs/internal.h
+++ b/fs/efivarfs/internal.h
@@ -38,7 +38,7 @@ struct efivar_entry {
  
  int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *,
                             struct list_head *),
-               void *data, bool duplicates, struct list_head *head);
+               void *data, struct list_head *head);
  
  int efivar_entry_add(struct efivar_entry *entry, struct list_head *head);
  void __efivar_entry_add(struct efivar_entry *entry, struct list_head *head);
diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c

index 6038dd39367abe41430c55b04448ced7727dd287..bb14462f6d992a5506f4fda2158952cb410c96f3 100644 (file)
--- a/fs/efivarfs/super.c
+++ b/fs/efivarfs/super.c
@@ -343,12 +343,7 @@ static int efivarfs_fill_super(struct super_block *sb, struct fs_context *fc)
         if (err)
                 return err;
  
-       err = efivar_init(efivarfs_callback, (void *)sb, true,
-                         &sfi->efivarfs_list);
-       if (err)
-               efivar_entry_iter(efivarfs_destroy, &sfi->efivarfs_list, NULL);
-
-       return err;
+       return efivar_init(efivarfs_callback, sb, &sfi->efivarfs_list);
  }
  
  static int efivarfs_get_tree(struct fs_context *fc)
diff --git a/fs/efivarfs/vars.c b/fs/efivarfs/vars.c

index 114ff0fd4e55732e2ebe0cdc8b20d82436571abf..4d722af1014f2a18198cc3e831d1fea68d46e251 100644 (file)
--- a/fs/efivarfs/vars.c
+++ b/fs/efivarfs/vars.c
@@ -361,7 +361,6 @@ static void dup_variable_bug(efi_char16_t *str16, efi_guid_t *vendor_guid,
   * efivar_init - build the initial list of EFI variables
   * @func: callback function to invoke for every variable
   * @data: function-specific data to pass to @func
- * @duplicates: error if we encounter duplicates on @head?
   * @head: initialised head of variable list
   *
   * Get every EFI variable from the firmware and invoke @func. @func
@@ -371,9 +370,9 @@ static void dup_variable_bug(efi_char16_t *str16, efi_guid_t *vendor_guid,
   */
  int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *,
                             struct list_head *),
-               void *data, bool duplicates, struct list_head *head)
+               void *data, struct list_head *head)
  {
-       unsigned long variable_name_size = 1024;
+       unsigned long variable_name_size = 512;
         efi_char16_t *variable_name;
         efi_status_t status;
         efi_guid_t vendor_guid;
@@ -390,12 +389,13 @@ int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *,
                 goto free;
  
         /*
-        * Per EFI spec, the maximum storage allocated for both
-        * the variable name and variable data is 1024 bytes.
+        * A small set of old UEFI implementations reject sizes
+        * above a certain threshold, the lowest seen in the wild
+        * is 512.
          */
  
         do {
-               variable_name_size = 1024;
+               variable_name_size = 512;
  
                 status = efivar_get_next_variable(&variable_name_size,
                                                   variable_name,
@@ -413,8 +413,7 @@ int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *,
                          * we'll ever see a different variable name,
                          * and may end up looping here forever.
                          */
-                       if (duplicates &&
-                           variable_is_present(variable_name, &vendor_guid,
+                       if (variable_is_present(variable_name, &vendor_guid,
                                                 head)) {
                                 dup_variable_bug(variable_name, &vendor_guid,
                                                  variable_name_size);
@@ -432,9 +431,13 @@ int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *,
                         break;
                 case EFI_NOT_FOUND:
                         break;
+               case EFI_BUFFER_TOO_SMALL:
+                       pr_warn("efivars: Variable name size exceeds maximum (%lu > 512)\n",
+                               variable_name_size);
+                       status = EFI_NOT_FOUND;
+                       break;
                 default:
-                       printk(KERN_WARNING "efivars: get_next_variable: status=%lx\n",
-                               status);
+                       pr_warn("efivars: get_next_variable: status=%lx\n", status);
                         status = EFI_NOT_FOUND;
                         break;
                 }
diff --git a/fs/erofs/compress.h b/fs/erofs/compress.h

index 279933e007d21798549df035b4aa595597f225b6..7cc5841577b240f90f9a623e64adc87c3fb24982 100644 (file)
--- a/fs/erofs/compress.h
+++ b/fs/erofs/compress.h
@@ -11,13 +11,12 @@
  struct z_erofs_decompress_req {
         struct super_block *sb;
         struct page **in, **out;
-
         unsigned short pageofs_in, pageofs_out;
         unsigned int inputsize, outputsize;
  
-       /* indicate the algorithm will be used for decompression */
-       unsigned int alg;
+       unsigned int alg;       /* the algorithm for decompression */
         bool inplace_io, partial_decoding, fillgaps;
+       gfp_t gfp;      /* allocation flags for extra temporary buffers */
  };
  
  struct z_erofs_decompressor {
diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c

index 072ef6a66823ef351923f2c0514c9ddec50e5d8f..d4cee95af14c7490e85706589853059b99b7e688 100644 (file)
--- a/fs/erofs/decompressor.c
+++ b/fs/erofs/decompressor.c
@@ -111,8 +111,9 @@ static int z_erofs_lz4_prepare_dstpages(struct z_erofs_lz4_decompress_ctx *ctx,
                         victim = availables[--top];
                         get_page(victim);
                 } else {
-                       victim = erofs_allocpage(pagepool,
-                                                GFP_KERNEL | __GFP_NOFAIL);
+                       victim = erofs_allocpage(pagepool, rq->gfp);
+                       if (!victim)
+                               return -ENOMEM;
                         set_page_private(victim, Z_EROFS_SHORTLIVED_PAGE);
                 }
                 rq->out[i] = victim;
diff --git a/fs/erofs/decompressor_deflate.c b/fs/erofs/decompressor_deflate.c

index 4a64a9c91dd322379d2c4be2268f6c4c24f995ee..b98872058abe82d4034b84c1c93c46645b50968b 100644 (file)
--- a/fs/erofs/decompressor_deflate.c
+++ b/fs/erofs/decompressor_deflate.c
@@ -95,7 +95,7 @@ int z_erofs_load_deflate_config(struct super_block *sb,
  }
  
  int z_erofs_deflate_decompress(struct z_erofs_decompress_req *rq,
-                              struct page **pagepool)
+                              struct page **pgpl)
  {
         const unsigned int nrpages_out =
                 PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT;
@@ -158,8 +158,12 @@ again:
                         strm->z.avail_out = min_t(u32, outsz, PAGE_SIZE - pofs);
                         outsz -= strm->z.avail_out;
                         if (!rq->out[no]) {
-                               rq->out[no] = erofs_allocpage(pagepool,
-                                               GFP_KERNEL | __GFP_NOFAIL);
+                               rq->out[no] = erofs_allocpage(pgpl, rq->gfp);
+                               if (!rq->out[no]) {
+                                       kout = NULL;
+                                       err = -ENOMEM;
+                                       break;
+                               }
                                 set_page_private(rq->out[no],
                                                  Z_EROFS_SHORTLIVED_PAGE);
                         }
@@ -211,8 +215,11 @@ again:
  
                         DBG_BUGON(erofs_page_is_managed(EROFS_SB(sb),
                                                         rq->in[j]));
-                       tmppage = erofs_allocpage(pagepool,
-                                                 GFP_KERNEL | __GFP_NOFAIL);
+                       tmppage = erofs_allocpage(pgpl, rq->gfp);
+                       if (!tmppage) {
+                               err = -ENOMEM;
+                               goto failed;
+                       }
                         set_page_private(tmppage, Z_EROFS_SHORTLIVED_PAGE);
                         copy_highpage(tmppage, rq->in[j]);
                         rq->in[j] = tmppage;
@@ -230,7 +237,7 @@ again:
                         break;
                 }
         }
-
+failed:
         if (zlib_inflateEnd(&strm->z) != Z_OK && !err)
                 err = -EIO;
         if (kout)
diff --git a/fs/erofs/decompressor_lzma.c b/fs/erofs/decompressor_lzma.c

index 2dd14f99c1dc10eeea57eedfccbb649bf184828f..6ca357d83cfa458225f20e2d6f6a45307fef2194 100644 (file)
--- a/fs/erofs/decompressor_lzma.c
+++ b/fs/erofs/decompressor_lzma.c
@@ -148,7 +148,7 @@ again:
  }
  
  int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq,
-                           struct page **pagepool)
+                           struct page **pgpl)
  {
         const unsigned int nrpages_out =
                 PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT;
@@ -215,8 +215,11 @@ again:
                                                    PAGE_SIZE - pageofs);
                         outlen -= strm->buf.out_size;
                         if (!rq->out[no] && rq->fillgaps) {     /* deduped */
-                               rq->out[no] = erofs_allocpage(pagepool,
-                                               GFP_KERNEL | __GFP_NOFAIL);
+                               rq->out[no] = erofs_allocpage(pgpl, rq->gfp);
+                               if (!rq->out[no]) {
+                                       err = -ENOMEM;
+                                       break;
+                               }
                                 set_page_private(rq->out[no],
                                                  Z_EROFS_SHORTLIVED_PAGE);
                         }
@@ -258,8 +261,11 @@ again:
  
                         DBG_BUGON(erofs_page_is_managed(EROFS_SB(rq->sb),
                                                         rq->in[j]));
-                       tmppage = erofs_allocpage(pagepool,
-                                                 GFP_KERNEL | __GFP_NOFAIL);
+                       tmppage = erofs_allocpage(pgpl, rq->gfp);
+                       if (!tmppage) {
+                               err = -ENOMEM;
+                               goto failed;
+                       }
                         set_page_private(tmppage, Z_EROFS_SHORTLIVED_PAGE);
                         copy_highpage(tmppage, rq->in[j]);
                         rq->in[j] = tmppage;
@@ -277,6 +283,7 @@ again:
                         break;
                 }
         }
+failed:
         if (no < nrpages_out && strm->buf.out)
                 kunmap(rq->out[no]);
         if (ni < nrpages_in)
diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c

index bc12030393b24f26231fb363ac07e3150cd6babb..89a7c2453aae6f130e679af1459673397d581842 100644 (file)
--- a/fs/erofs/fscache.c
+++ b/fs/erofs/fscache.c
@@ -381,11 +381,12 @@ static int erofs_fscache_init_domain(struct super_block *sb)
                 goto out;
  
         if (!erofs_pseudo_mnt) {
-               erofs_pseudo_mnt = kern_mount(&erofs_fs_type);
-               if (IS_ERR(erofs_pseudo_mnt)) {
-                       err = PTR_ERR(erofs_pseudo_mnt);
+               struct vfsmount *mnt = kern_mount(&erofs_fs_type);
+               if (IS_ERR(mnt)) {
+                       err = PTR_ERR(mnt);
                         goto out;
                 }
+               erofs_pseudo_mnt = mnt;
         }
  
         domain->volume = sbi->volume;
@@ -459,7 +460,7 @@ static struct erofs_fscache *erofs_fscache_acquire_cookie(struct super_block *sb
  
         inode->i_size = OFFSET_MAX;
         inode->i_mapping->a_ops = &erofs_fscache_meta_aops;
-       mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
+       mapping_set_gfp_mask(inode->i_mapping, GFP_KERNEL);
         inode->i_blkbits = EROFS_SB(sb)->blkszbits;
         inode->i_private = ctx;
  
diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c

index 3d616dea55dc3dbccbac495988f865947b0d2a96..36e638e8b53a3d290fcb7ade23a40dc4805be9e6 100644 (file)
--- a/fs/erofs/inode.c
+++ b/fs/erofs/inode.c
@@ -60,7 +60,7 @@ static void *erofs_read_inode(struct erofs_buf *buf,
                 } else {
                         const unsigned int gotten = sb->s_blocksize - *ofs;
  
-                       copied = kmalloc(vi->inode_isize, GFP_NOFS);
+                       copied = kmalloc(vi->inode_isize, GFP_KERNEL);
                         if (!copied) {
                                 err = -ENOMEM;
                                 goto err_out;
diff --git a/fs/erofs/namei.c b/fs/erofs/namei.c

index d4f631d39f0fa83141eafcb13951bb3fd36598bd..f0110a78acb2078aa2ce6eae13e39481e46b7ea9 100644 (file)
--- a/fs/erofs/namei.c
+++ b/fs/erofs/namei.c
@@ -130,24 +130,24 @@ static void *erofs_find_target_block(struct erofs_buf *target,
                         /* string comparison without already matched prefix */
                         diff = erofs_dirnamecmp(name, &dname, &matched);
  
-                       if (!diff) {
-                               *_ndirents = 0;
-                               goto out;
-                       } else if (diff > 0) {
-                               head = mid + 1;
-                               startprfx = matched;
-
-                               if (!IS_ERR(candidate))
-                                       erofs_put_metabuf(target);
-                               *target = buf;
-                               candidate = de;
-                               *_ndirents = ndirents;
-                       } else {
+                       if (diff < 0) {
                                 erofs_put_metabuf(&buf);
-
                                 back = mid - 1;
                                 endprfx = matched;
+                               continue;
+                       }
+
+                       if (!IS_ERR(candidate))
+                               erofs_put_metabuf(target);
+                       *target = buf;
+                       if (!diff) {
+                               *_ndirents = 0;
+                               return de;
                         }
+                       head = mid + 1;
+                       startprfx = matched;
+                       candidate = de;
+                       *_ndirents = ndirents;
                         continue;
                 }
  out:           /* free if the candidate is valid */
diff --git a/fs/erofs/utils.c b/fs/erofs/utils.c

index 5dea308764b45038f8236bf31b004067f0f297a6..e146d09151af4188efe4cb7bf2ad4a938b8596af 100644 (file)
--- a/fs/erofs/utils.c
+++ b/fs/erofs/utils.c
@@ -81,7 +81,7 @@ struct erofs_workgroup *erofs_insert_workgroup(struct super_block *sb,
  repeat:
         xa_lock(&sbi->managed_pslots);
         pre = __xa_cmpxchg(&sbi->managed_pslots, grp->index,
-                          NULL, grp, GFP_NOFS);
+                          NULL, grp, GFP_KERNEL);
         if (pre) {
                 if (xa_is_err(pre)) {
                         pre = ERR_PTR(xa_err(pre));
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c

index 692c0c39be638dc4b2454b63968a0467043ddc7a..ff0aa72b0db342f10ed7c1b565d2cc7bd6a540ff 100644 (file)
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -82,6 +82,9 @@ struct z_erofs_pcluster {
         /* L: indicate several pageofs_outs or not */
         bool multibases;
  
+       /* L: whether extra buffer allocations are best-effort */
+       bool besteffort;
+
         /* A: compressed bvecs (can be cached or inplaced pages) */
         struct z_erofs_bvec compressed_bvecs[];
  };
@@ -230,7 +233,7 @@ static int z_erofs_bvec_enqueue(struct z_erofs_bvec_iter *iter,
                 struct page *nextpage = *candidate_bvpage;
  
                 if (!nextpage) {
-                       nextpage = erofs_allocpage(pagepool, GFP_NOFS);
+                       nextpage = erofs_allocpage(pagepool, GFP_KERNEL);
                         if (!nextpage)
                                 return -ENOMEM;
                         set_page_private(nextpage, Z_EROFS_SHORTLIVED_PAGE);
@@ -302,7 +305,7 @@ static struct z_erofs_pcluster *z_erofs_alloc_pcluster(unsigned int size)
                 if (nrpages > pcs->maxpages)
                         continue;
  
-               pcl = kmem_cache_zalloc(pcs->slab, GFP_NOFS);
+               pcl = kmem_cache_zalloc(pcs->slab, GFP_KERNEL);
                 if (!pcl)
                         return ERR_PTR(-ENOMEM);
                 pcl->pclustersize = size;
@@ -563,21 +566,19 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe)
                         __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN;
         unsigned int i;
  
-       if (i_blocksize(fe->inode) != PAGE_SIZE)
-               return;
-       if (fe->mode < Z_EROFS_PCLUSTER_FOLLOWED)
+       if (i_blocksize(fe->inode) != PAGE_SIZE ||
+           fe->mode < Z_EROFS_PCLUSTER_FOLLOWED)
                 return;
  
         for (i = 0; i < pclusterpages; ++i) {
                 struct page *page, *newpage;
                 void *t;        /* mark pages just found for debugging */
  
-               /* the compressed page was loaded before */
+               /* Inaccurate check w/o locking to avoid unneeded lookups */
                 if (READ_ONCE(pcl->compressed_bvecs[i].page))
                         continue;
  
                 page = find_get_page(mc, pcl->obj.index + i);
-
                 if (page) {
                         t = (void *)((unsigned long)page | 1);
                         newpage = NULL;
@@ -597,9 +598,13 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe)
                         set_page_private(newpage, Z_EROFS_PREALLOCATED_PAGE);
                         t = (void *)((unsigned long)newpage | 1);
                 }
-
-               if (!cmpxchg_relaxed(&pcl->compressed_bvecs[i].page, NULL, t))
+               spin_lock(&pcl->obj.lockref.lock);
+               if (!pcl->compressed_bvecs[i].page) {
+                       pcl->compressed_bvecs[i].page = t;
+                       spin_unlock(&pcl->obj.lockref.lock);
                         continue;
+               }
+               spin_unlock(&pcl->obj.lockref.lock);
  
                 if (page)
                         put_page(page);
@@ -694,7 +699,7 @@ static void z_erofs_cache_invalidate_folio(struct folio *folio,
         DBG_BUGON(stop > folio_size(folio) || stop < length);
  
         if (offset == 0 && stop == folio_size(folio))
-               while (!z_erofs_cache_release_folio(folio, GFP_NOFS))
+               while (!z_erofs_cache_release_folio(folio, 0))
                         cond_resched();
  }
  
@@ -713,36 +718,30 @@ int erofs_init_managed_cache(struct super_block *sb)
         set_nlink(inode, 1);
         inode->i_size = OFFSET_MAX;
         inode->i_mapping->a_ops = &z_erofs_cache_aops;
-       mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
+       mapping_set_gfp_mask(inode->i_mapping, GFP_KERNEL);
         EROFS_SB(sb)->managed_cache = inode;
         return 0;
  }
  
-static bool z_erofs_try_inplace_io(struct z_erofs_decompress_frontend *fe,
-                                  struct z_erofs_bvec *bvec)
-{
-       struct z_erofs_pcluster *const pcl = fe->pcl;
-
-       while (fe->icur > 0) {
-               if (!cmpxchg(&pcl->compressed_bvecs[--fe->icur].page,
-                            NULL, bvec->page)) {
-                       pcl->compressed_bvecs[fe->icur] = *bvec;
-                       return true;
-               }
-       }
-       return false;
-}
-
  /* callers must be with pcluster lock held */
  static int z_erofs_attach_page(struct z_erofs_decompress_frontend *fe,
                                struct z_erofs_bvec *bvec, bool exclusive)
  {
+       struct z_erofs_pcluster *pcl = fe->pcl;
         int ret;
  
         if (exclusive) {
                 /* give priority for inplaceio to use file pages first */
-               if (z_erofs_try_inplace_io(fe, bvec))
+               spin_lock(&pcl->obj.lockref.lock);
+               while (fe->icur > 0) {
+                       if (pcl->compressed_bvecs[--fe->icur].page)
+                               continue;
+                       pcl->compressed_bvecs[fe->icur] = *bvec;
+                       spin_unlock(&pcl->obj.lockref.lock);
                         return 0;
+               }
+               spin_unlock(&pcl->obj.lockref.lock);
+
                 /* otherwise, check if it can be used as a bvpage */
                 if (fe->mode >= Z_EROFS_PCLUSTER_FOLLOWED &&
                     !fe->candidate_bvpage)
@@ -964,7 +963,7 @@ static int z_erofs_read_fragment(struct super_block *sb, struct page *page,
  }
  
  static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
-                               struct page *page)
+                               struct page *page, bool ra)
  {
         struct inode *const inode = fe->inode;
         struct erofs_map_blocks *const map = &fe->map;
@@ -1014,6 +1013,7 @@ repeat:
                 err = z_erofs_pcluster_begin(fe);
                 if (err)
                         goto out;
+               fe->pcl->besteffort |= !ra;
         }
  
         /*
@@ -1280,6 +1280,9 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be,
                                         .inplace_io = overlapped,
                                         .partial_decoding = pcl->partial,
                                         .fillgaps = pcl->multibases,
+                                       .gfp = pcl->besteffort ?
+                                               GFP_KERNEL | __GFP_NOFAIL :
+                                               GFP_NOWAIT | __GFP_NORETRY
                                  }, be->pagepool);
  
         /* must handle all compressed pages before actual file pages */
@@ -1322,6 +1325,7 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be,
         pcl->length = 0;
         pcl->partial = true;
         pcl->multibases = false;
+       pcl->besteffort = false;
         pcl->bvset.nextpage = NULL;
         pcl->vcnt = 0;
  
@@ -1423,23 +1427,26 @@ static void z_erofs_fill_bio_vec(struct bio_vec *bvec,
  {
         gfp_t gfp = mapping_gfp_mask(mc);
         bool tocache = false;
-       struct z_erofs_bvec *zbv = pcl->compressed_bvecs + nr;
+       struct z_erofs_bvec zbv;
         struct address_space *mapping;
-       struct page *page, *oldpage;
+       struct page *page;
         int justfound, bs = i_blocksize(f->inode);
  
         /* Except for inplace pages, the entire page can be used for I/Os */
         bvec->bv_offset = 0;
         bvec->bv_len = PAGE_SIZE;
  repeat:
-       oldpage = READ_ONCE(zbv->page);
-       if (!oldpage)
+       spin_lock(&pcl->obj.lockref.lock);
+       zbv = pcl->compressed_bvecs[nr];
+       page = zbv.page;
+       justfound = (unsigned long)page & 1UL;
+       page = (struct page *)((unsigned long)page & ~1UL);
+       pcl->compressed_bvecs[nr].page = page;
+       spin_unlock(&pcl->obj.lockref.lock);
+       if (!page)
                 goto out_allocpage;
  
-       justfound = (unsigned long)oldpage & 1UL;
-       page = (struct page *)((unsigned long)oldpage & ~1UL);
         bvec->bv_page = page;
-
         DBG_BUGON(z_erofs_is_shortlived_page(page));
         /*
          * Handle preallocated cached pages.  We tried to allocate such pages
@@ -1448,7 +1455,6 @@ repeat:
          */
         if (page->private == Z_EROFS_PREALLOCATED_PAGE) {
                 set_page_private(page, 0);
-               WRITE_ONCE(zbv->page, page);
                 tocache = true;
                 goto out_tocache;
         }
@@ -1459,9 +1465,9 @@ repeat:
          * therefore it is impossible for `mapping` to be NULL.
          */
         if (mapping && mapping != mc) {
-               if (zbv->offset < 0)
-                       bvec->bv_offset = round_up(-zbv->offset, bs);
-               bvec->bv_len = round_up(zbv->end, bs) - bvec->bv_offset;
+               if (zbv.offset < 0)
+                       bvec->bv_offset = round_up(-zbv.offset, bs);
+               bvec->bv_len = round_up(zbv.end, bs) - bvec->bv_offset;
                 return;
         }
  
@@ -1471,7 +1477,6 @@ repeat:
  
         /* the cached page is still in managed cache */
         if (page->mapping == mc) {
-               WRITE_ONCE(zbv->page, page);
                 /*
                  * The cached page is still available but without a valid
                  * `->private` pcluster hint.  Let's reconnect them.
@@ -1503,11 +1508,15 @@ repeat:
         put_page(page);
  out_allocpage:
         page = erofs_allocpage(&f->pagepool, gfp | __GFP_NOFAIL);
-       if (oldpage != cmpxchg(&zbv->page, oldpage, page)) {
+       spin_lock(&pcl->obj.lockref.lock);
+       if (pcl->compressed_bvecs[nr].page) {
                 erofs_pagepool_add(&f->pagepool, page);
+               spin_unlock(&pcl->obj.lockref.lock);
                 cond_resched();
                 goto repeat;
         }
+       pcl->compressed_bvecs[nr].page = page;
+       spin_unlock(&pcl->obj.lockref.lock);
         bvec->bv_page = page;
  out_tocache:
         if (!tocache || bs != PAGE_SIZE ||
@@ -1685,6 +1694,7 @@ submit_bio_retry:
  
                         if (cur + bvec.bv_len > end)
                                 bvec.bv_len = end - cur;
+                       DBG_BUGON(bvec.bv_len < sb->s_blocksize);
                         if (!bio_add_page(bio, bvec.bv_page, bvec.bv_len,
                                           bvec.bv_offset))
                                 goto submit_bio_retry;
@@ -1785,7 +1795,7 @@ static void z_erofs_pcluster_readmore(struct z_erofs_decompress_frontend *f,
                         if (PageUptodate(page))
                                 unlock_page(page);
                         else
-                               (void)z_erofs_do_read_page(f, page);
+                               (void)z_erofs_do_read_page(f, page, !!rac);
                         put_page(page);
                 }
  
@@ -1806,7 +1816,7 @@ static int z_erofs_read_folio(struct file *file, struct folio *folio)
         f.headoffset = (erofs_off_t)folio->index << PAGE_SHIFT;
  
         z_erofs_pcluster_readmore(&f, NULL, true);
-       err = z_erofs_do_read_page(&f, &folio->page);
+       err = z_erofs_do_read_page(&f, &folio->page, false);
         z_erofs_pcluster_readmore(&f, NULL, false);
         z_erofs_pcluster_end(&f);
  
@@ -1847,7 +1857,7 @@ static void z_erofs_readahead(struct readahead_control *rac)
                 folio = head;
                 head = folio_get_private(folio);
  
-               err = z_erofs_do_read_page(&f, &folio->page);
+               err = z_erofs_do_read_page(&f, &folio->page, true);
                 if (err && err != -EINTR)
                         erofs_err(inode->i_sb, "readahead error at folio %lu @ nid %llu",
                                   folio->index, EROFS_I(inode)->nid);
diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h

index 9474cd50da6d4fd8b9fba92f1f3d8717f19245dc..361595433480c46562765ad4d5c886a071005c25 100644 (file)
--- a/fs/exfat/exfat_fs.h
+++ b/fs/exfat/exfat_fs.h
@@ -275,6 +275,7 @@ struct exfat_sb_info {
  
         spinlock_t inode_hash_lock;
         struct hlist_head inode_hashtable[EXFAT_HASH_SIZE];
+       struct rcu_head rcu;
  };
  
  #define EXFAT_CACHE_VALID      0
diff --git a/fs/exfat/file.c b/fs/exfat/file.c

index d25a96a148af4cdb966c5d20f720aa944cab10c2..cc00f1a7a1e18082af9e0e8ff28f5995de75f1ba 100644 (file)
--- a/fs/exfat/file.c
+++ b/fs/exfat/file.c
@@ -35,13 +35,18 @@ static int exfat_cont_expand(struct inode *inode, loff_t size)
         if (new_num_clusters == num_clusters)
                 goto out;
  
-       exfat_chain_set(&clu, ei->start_clu, num_clusters, ei->flags);
-       ret = exfat_find_last_cluster(sb, &clu, &last_clu);
-       if (ret)
-               return ret;
+       if (num_clusters) {
+               exfat_chain_set(&clu, ei->start_clu, num_clusters, ei->flags);
+               ret = exfat_find_last_cluster(sb, &clu, &last_clu);
+               if (ret)
+                       return ret;
+
+               clu.dir = last_clu + 1;
+       } else {
+               last_clu = EXFAT_EOF_CLUSTER;
+               clu.dir = EXFAT_EOF_CLUSTER;
+       }
  
-       clu.dir = (last_clu == EXFAT_EOF_CLUSTER) ?
-                       EXFAT_EOF_CLUSTER : last_clu + 1;
         clu.size = 0;
         clu.flags = ei->flags;
  
@@ -51,17 +56,19 @@ static int exfat_cont_expand(struct inode *inode, loff_t size)
                 return ret;
  
         /* Append new clusters to chain */
-       if (clu.flags != ei->flags) {
-               exfat_chain_cont_cluster(sb, ei->start_clu, num_clusters);
-               ei->flags = ALLOC_FAT_CHAIN;
-       }
-       if (clu.flags == ALLOC_FAT_CHAIN)
-               if (exfat_ent_set(sb, last_clu, clu.dir))
-                       goto free_clu;
-
-       if (num_clusters == 0)
+       if (num_clusters) {
+               if (clu.flags != ei->flags)
+                       if (exfat_chain_cont_cluster(sb, ei->start_clu, num_clusters))
+                               goto free_clu;
+
+               if (clu.flags == ALLOC_FAT_CHAIN)
+                       if (exfat_ent_set(sb, last_clu, clu.dir))
+                               goto free_clu;
+       } else
                 ei->start_clu = clu.dir;
  
+       ei->flags = clu.flags;
+
  out:
         inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
         /* Expanded range not zeroed, do not update valid_size */
diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c

index 522edcbb2ce4d17a7f219e6016bff31f5d466fdd..0687f952956c34b6d85e785ee13f231d49679e64 100644 (file)
--- a/fs/exfat/inode.c
+++ b/fs/exfat/inode.c
@@ -501,7 +501,7 @@ static ssize_t exfat_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
         struct inode *inode = mapping->host;
         struct exfat_inode_info *ei = EXFAT_I(inode);
         loff_t pos = iocb->ki_pos;
-       loff_t size = iocb->ki_pos + iov_iter_count(iter);
+       loff_t size = pos + iov_iter_count(iter);
         int rw = iov_iter_rw(iter);
         ssize_t ret;
  
@@ -525,11 +525,10 @@ static ssize_t exfat_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
          */
         ret = blockdev_direct_IO(iocb, inode, iter, exfat_get_block);
         if (ret < 0) {
-               if (rw == WRITE)
+               if (rw == WRITE && ret != -EIOCBQUEUED)
                         exfat_write_failed(mapping, size);
  
-               if (ret != -EIOCBQUEUED)
-                       return ret;
+               return ret;
         } else
                 size = pos + ret;
  
diff --git a/fs/exfat/nls.c b/fs/exfat/nls.c

index 705710f93e2ddd3c911df119b2c8e0ca5a9152cd..afdf13c34ff526fb423f322d3503b571e2d153e8 100644 (file)
--- a/fs/exfat/nls.c
+++ b/fs/exfat/nls.c
@@ -655,7 +655,6 @@ static int exfat_load_upcase_table(struct super_block *sb,
         unsigned int sect_size = sb->s_blocksize;
         unsigned int i, index = 0;
         u32 chksum = 0;
-       int ret;
         unsigned char skip = false;
         unsigned short *upcase_table;
  
@@ -673,8 +672,7 @@ static int exfat_load_upcase_table(struct super_block *sb,
                 if (!bh) {
                         exfat_err(sb, "failed to read sector(0x%llx)",
                                   (unsigned long long)sector);
-                       ret = -EIO;
-                       goto free_table;
+                       return -EIO;
                 }
                 sector++;
                 for (i = 0; i < sect_size && index <= 0xFFFF; i += 2) {
@@ -701,15 +699,12 @@ static int exfat_load_upcase_table(struct super_block *sb,
  
         exfat_err(sb, "failed to load upcase table (idx : 0x%08x, chksum : 0x%08x, utbl_chksum : 0x%08x)",
                   index, chksum, utbl_checksum);
-       ret = -EINVAL;
-free_table:
-       exfat_free_upcase_table(sbi);
-       return ret;
+       return -EINVAL;
  }
  
  static int exfat_load_default_upcase_table(struct super_block *sb)
  {
-       int i, ret = -EIO;
+       int i;
         struct exfat_sb_info *sbi = EXFAT_SB(sb);
         unsigned char skip = false;
         unsigned short uni = 0, *upcase_table;
@@ -740,8 +735,7 @@ static int exfat_load_default_upcase_table(struct super_block *sb)
                 return 0;
  
         /* FATAL error: default upcase table has error */
-       exfat_free_upcase_table(sbi);
-       return ret;
+       return -EIO;
  }
  
  int exfat_create_upcase_table(struct super_block *sb)
diff --git a/fs/exfat/super.c b/fs/exfat/super.c

index d9d4fa91010bb1d226b1d00afdbb841e73911d33..fcb6582677650bd1462e501e9c5bb67a032befd4 100644 (file)
--- a/fs/exfat/super.c
+++ b/fs/exfat/super.c
@@ -39,9 +39,6 @@ static void exfat_put_super(struct super_block *sb)
         exfat_free_bitmap(sbi);
         brelse(sbi->boot_bh);
         mutex_unlock(&sbi->s_lock);
-
-       unload_nls(sbi->nls_io);
-       exfat_free_upcase_table(sbi);
  }
  
  static int exfat_sync_fs(struct super_block *sb, int wait)
@@ -600,7 +597,7 @@ static int __exfat_fill_super(struct super_block *sb)
         ret = exfat_load_bitmap(sb);
         if (ret) {
                 exfat_err(sb, "failed to load alloc-bitmap");
-               goto free_upcase_table;
+               goto free_bh;
         }
  
         ret = exfat_count_used_clusters(sb, &sbi->used_clusters);
@@ -613,8 +610,6 @@ static int __exfat_fill_super(struct super_block *sb)
  
  free_alloc_bitmap:
         exfat_free_bitmap(sbi);
-free_upcase_table:
-       exfat_free_upcase_table(sbi);
  free_bh:
         brelse(sbi->boot_bh);
         return ret;
@@ -701,12 +696,10 @@ put_inode:
         sb->s_root = NULL;
  
  free_table:
-       exfat_free_upcase_table(sbi);
         exfat_free_bitmap(sbi);
         brelse(sbi->boot_bh);
  
  check_nls_io:
-       unload_nls(sbi->nls_io);
         return err;
  }
  
@@ -771,13 +764,22 @@ static int exfat_init_fs_context(struct fs_context *fc)
         return 0;
  }
  
+static void delayed_free(struct rcu_head *p)
+{
+       struct exfat_sb_info *sbi = container_of(p, struct exfat_sb_info, rcu);
+
+       unload_nls(sbi->nls_io);
+       exfat_free_upcase_table(sbi);
+       exfat_free_sbi(sbi);
+}
+
  static void exfat_kill_sb(struct super_block *sb)
  {
         struct exfat_sb_info *sbi = sb->s_fs_info;
  
         kill_block_super(sb);
         if (sbi)
-               exfat_free_sbi(sbi);
+               call_rcu(&sbi->rcu, delayed_free);
  }
  
  static struct file_system_type exfat_fs_type = {
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h

index a5d784872303ddb6731f2bf0f8579170809b36fd..023571f8dd1b43887b691c4dd61742d07ac1b356 100644 (file)
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -252,8 +252,10 @@ struct ext4_allocation_request {
  #define EXT4_MAP_MAPPED                BIT(BH_Mapped)
  #define EXT4_MAP_UNWRITTEN     BIT(BH_Unwritten)
  #define EXT4_MAP_BOUNDARY      BIT(BH_Boundary)
+#define EXT4_MAP_DELAYED       BIT(BH_Delay)
  #define EXT4_MAP_FLAGS         (EXT4_MAP_NEW | EXT4_MAP_MAPPED |\
-                                EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY)
+                                EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY |\
+                                EXT4_MAP_DELAYED)
  
  struct ext4_map_blocks {
         ext4_fsblk_t m_pblk;
@@ -2912,10 +2914,10 @@ extern const struct seq_operations ext4_mb_seq_groups_ops;
  extern const struct seq_operations ext4_mb_seq_structs_summary_ops;
  extern int ext4_seq_mb_stats_show(struct seq_file *seq, void *offset);
  extern int ext4_mb_init(struct super_block *);
-extern int ext4_mb_release(struct super_block *);
+extern void ext4_mb_release(struct super_block *);
  extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *,
                                 struct ext4_allocation_request *, int *);
-extern void ext4_discard_preallocations(struct inode *, unsigned int);
+extern void ext4_discard_preallocations(struct inode *);
  extern int __init ext4_init_mballoc(void);
  extern void ext4_exit_mballoc(void);
  extern ext4_group_t ext4_mb_prefetch(struct super_block *sb,
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c

index 01299b55a567aa41fe7147c3d625e5087b11a0fc..7669d154c05e0c1c86c725c2bab490753317d632 100644 (file)
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -100,7 +100,7 @@ static int ext4_ext_trunc_restart_fn(struct inode *inode, int *dropped)
          * i_rwsem. So we can safely drop the i_data_sem here.
          */
         BUG_ON(EXT4_JOURNAL(inode) == NULL);
-       ext4_discard_preallocations(inode, 0);
+       ext4_discard_preallocations(inode);
         up_write(&EXT4_I(inode)->i_data_sem);
         *dropped = 1;
         return 0;
@@ -2229,7 +2229,7 @@ static int ext4_fill_es_cache_info(struct inode *inode,
  
  
  /*
- * ext4_ext_determine_hole - determine hole around given block
+ * ext4_ext_find_hole - find hole around given block according to the given path
   * @inode:     inode we lookup in
   * @path:      path in extent tree to @lblk
   * @lblk:      pointer to logical block around which we want to determine hole
@@ -2241,9 +2241,9 @@ static int ext4_fill_es_cache_info(struct inode *inode,
   * The function returns the length of a hole starting at @lblk. We update @lblk
   * to the beginning of the hole if we managed to find it.
   */
-static ext4_lblk_t ext4_ext_determine_hole(struct inode *inode,
-                                          struct ext4_ext_path *path,
-                                          ext4_lblk_t *lblk)
+static ext4_lblk_t ext4_ext_find_hole(struct inode *inode,
+                                     struct ext4_ext_path *path,
+                                     ext4_lblk_t *lblk)
  {
         int depth = ext_depth(inode);
         struct ext4_extent *ex;
@@ -2270,30 +2270,6 @@ static ext4_lblk_t ext4_ext_determine_hole(struct inode *inode,
         return len;
  }
  
-/*
- * ext4_ext_put_gap_in_cache:
- * calculate boundaries of the gap that the requested block fits into
- * and cache this gap
- */
-static void
-ext4_ext_put_gap_in_cache(struct inode *inode, ext4_lblk_t hole_start,
-                         ext4_lblk_t hole_len)
-{
-       struct extent_status es;
-
-       ext4_es_find_extent_range(inode, &ext4_es_is_delayed, hole_start,
-                                 hole_start + hole_len - 1, &es);
-       if (es.es_len) {
-               /* There's delayed extent containing lblock? */
-               if (es.es_lblk <= hole_start)
-                       return;
-               hole_len = min(es.es_lblk - hole_start, hole_len);
-       }
-       ext_debug(inode, " -> %u:%u\n", hole_start, hole_len);
-       ext4_es_insert_extent(inode, hole_start, hole_len, ~0,
-                             EXTENT_STATUS_HOLE);
-}
-
  /*
   * ext4_ext_rm_idx:
   * removes index from the index block.
@@ -4062,6 +4038,72 @@ static int get_implied_cluster_alloc(struct super_block *sb,
         return 0;
  }
  
+/*
+ * Determine hole length around the given logical block, first try to
+ * locate and expand the hole from the given @path, and then adjust it
+ * if it's partially or completely converted to delayed extents, insert
+ * it into the extent cache tree if it's indeed a hole, finally return
+ * the length of the determined extent.
+ */
+static ext4_lblk_t ext4_ext_determine_insert_hole(struct inode *inode,
+                                                 struct ext4_ext_path *path,
+                                                 ext4_lblk_t lblk)
+{
+       ext4_lblk_t hole_start, len;
+       struct extent_status es;
+
+       hole_start = lblk;
+       len = ext4_ext_find_hole(inode, path, &hole_start);
+again:
+       ext4_es_find_extent_range(inode, &ext4_es_is_delayed, hole_start,
+                                 hole_start + len - 1, &es);
+       if (!es.es_len)
+               goto insert_hole;
+
+       /*
+        * There's a delalloc extent in the hole, handle it if the delalloc
+        * extent is in front of, behind and straddle the queried range.
+        */
+       if (lblk >= es.es_lblk + es.es_len) {
+               /*
+                * The delalloc extent is in front of the queried range,
+                * find again from the queried start block.
+                */
+               len -= lblk - hole_start;
+               hole_start = lblk;
+               goto again;
+       } else if (in_range(lblk, es.es_lblk, es.es_len)) {
+               /*
+                * The delalloc extent containing lblk, it must have been
+                * added after ext4_map_blocks() checked the extent status
+                * tree so we are not holding i_rwsem and delalloc info is
+                * only stabilized by i_data_sem we are going to release
+                * soon. Don't modify the extent status tree and report
+                * extent as a hole, just adjust the length to the delalloc
+                * extent's after lblk.
+                */
+               len = es.es_lblk + es.es_len - lblk;
+               return len;
+       } else {
+               /*
+                * The delalloc extent is partially or completely behind
+                * the queried range, update hole length until the
+                * beginning of the delalloc extent.
+                */
+               len = min(es.es_lblk - hole_start, len);
+       }
+
+insert_hole:
+       /* Put just found gap into cache to speed up subsequent requests */
+       ext_debug(inode, " -> %u:%u\n", hole_start, len);
+       ext4_es_insert_extent(inode, hole_start, len, ~0, EXTENT_STATUS_HOLE);
+
+       /* Update hole_len to reflect hole size after lblk */
+       if (hole_start != lblk)
+               len -= lblk - hole_start;
+
+       return len;
+}
  
  /*
   * Block allocation/map/preallocation routine for extents based files
@@ -4179,22 +4221,12 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
          * we couldn't try to create block if create flag is zero
          */
         if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
-               ext4_lblk_t hole_start, hole_len;
+               ext4_lblk_t len;
  
-               hole_start = map->m_lblk;
-               hole_len = ext4_ext_determine_hole(inode, path, &hole_start);
-               /*
-                * put just found gap into cache to speed up
-                * subsequent requests
-                */
-               ext4_ext_put_gap_in_cache(inode, hole_start, hole_len);
+               len = ext4_ext_determine_insert_hole(inode, path, map->m_lblk);
  
-               /* Update hole_len to reflect hole size after map->m_lblk */
-               if (hole_start != map->m_lblk)
-                       hole_len -= map->m_lblk - hole_start;
                 map->m_pblk = 0;
-               map->m_len = min_t(unsigned int, map->m_len, hole_len);
-
+               map->m_len = min_t(unsigned int, map->m_len, len);
                 goto out;
         }
  
@@ -4313,7 +4345,7 @@ got_allocated_blocks:
                          * not a good idea to call discard here directly,
                          * but otherwise we'd need to call it every free().
                          */
-                       ext4_discard_preallocations(inode, 0);
+                       ext4_discard_preallocations(inode);
                         if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
                                 fb_flags = EXT4_FREE_BLOCKS_NO_QUOT_UPDATE;
                         ext4_free_blocks(handle, inode, NULL, newblock,
@@ -5357,7 +5389,7 @@ static int ext4_collapse_range(struct file *file, loff_t offset, loff_t len)
         ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE, handle);
  
         down_write(&EXT4_I(inode)->i_data_sem);
-       ext4_discard_preallocations(inode, 0);
+       ext4_discard_preallocations(inode);
         ext4_es_remove_extent(inode, punch_start, EXT_MAX_BLOCKS - punch_start);
  
         ret = ext4_ext_remove_space(inode, punch_start, punch_stop - 1);
@@ -5365,7 +5397,7 @@ static int ext4_collapse_range(struct file *file, loff_t offset, loff_t len)
                 up_write(&EXT4_I(inode)->i_data_sem);
                 goto out_stop;
         }
-       ext4_discard_preallocations(inode, 0);
+       ext4_discard_preallocations(inode);
  
         ret = ext4_ext_shift_extents(inode, handle, punch_stop,
                                      punch_stop - punch_start, SHIFT_LEFT);
@@ -5497,7 +5529,7 @@ static int ext4_insert_range(struct file *file, loff_t offset, loff_t len)
                 goto out_stop;
  
         down_write(&EXT4_I(inode)->i_data_sem);
-       ext4_discard_preallocations(inode, 0);
+       ext4_discard_preallocations(inode);
  
         path = ext4_find_extent(inode, offset_lblk, NULL, 0);
         if (IS_ERR(path)) {
diff --git a/fs/ext4/file.c b/fs/ext4/file.c

index 6aa15dafc67786559d3b68ebfefd8f90e119b3fc..54d6ff22585cf1835e8aced5548dbac7c1b89757 100644 (file)
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -174,7 +174,7 @@ static int ext4_release_file(struct inode *inode, struct file *filp)
                         (atomic_read(&inode->i_writecount) == 1) &&
                         !EXT4_I(inode)->i_reserved_data_blocks) {
                 down_write(&EXT4_I(inode)->i_data_sem);
-               ext4_discard_preallocations(inode, 0);
+               ext4_discard_preallocations(inode);
                 up_write(&EXT4_I(inode)->i_data_sem);
         }
         if (is_dx(inode) && filp->private_data)
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c

index a9f3716119d37249de9cd1c12f02abf5c8db08cb..d8ca7f64f9523412a264dc5e266a72a7e6027e04 100644 (file)
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -714,7 +714,7 @@ static int ext4_ind_trunc_restart_fn(handle_t *handle, struct inode *inode,
          * i_rwsem. So we can safely drop the i_data_sem here.
          */
         BUG_ON(EXT4_JOURNAL(inode) == NULL);
-       ext4_discard_preallocations(inode, 0);
+       ext4_discard_preallocations(inode);
         up_write(&EXT4_I(inode)->i_data_sem);
         *dropped = 1;
         return 0;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c

index 5af1b0b8680e9fa5f34f94f4a49127b554691f00..2ccf3b5e3a7c4dcb1b0c6a9d27a3c8a77a145730 100644 (file)
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -371,7 +371,7 @@ void ext4_da_update_reserve_space(struct inode *inode,
          */
         if ((ei->i_reserved_data_blocks == 0) &&
             !inode_is_open_for_write(inode))
-               ext4_discard_preallocations(inode, 0);
+               ext4_discard_preallocations(inode);
  }
  
  static int __check_block_validity(struct inode *inode, const char *func,
@@ -515,6 +515,8 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
                         map->m_len = retval;
                 } else if (ext4_es_is_delayed(&es) || ext4_es_is_hole(&es)) {
                         map->m_pblk = 0;
+                       map->m_flags |= ext4_es_is_delayed(&es) ?
+                                       EXT4_MAP_DELAYED : 0;
                         retval = es.es_len - (map->m_lblk - es.es_lblk);
                         if (retval > map->m_len)
                                 retval = map->m_len;
@@ -1703,11 +1705,8 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
  
         /* Lookup extent status tree firstly */
         if (ext4_es_lookup_extent(inode, iblock, NULL, &es)) {
-               if (ext4_es_is_hole(&es)) {
-                       retval = 0;
-                       down_read(&EXT4_I(inode)->i_data_sem);
+               if (ext4_es_is_hole(&es))
                         goto add_delayed;
-               }
  
                 /*
                  * Delayed extent could be allocated by fallocate.
@@ -1749,26 +1748,11 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
                 retval = ext4_ext_map_blocks(NULL, inode, map, 0);
         else
                 retval = ext4_ind_map_blocks(NULL, inode, map, 0);
-
-add_delayed:
-       if (retval == 0) {
-               int ret;
-
-               /*
-                * XXX: __block_prepare_write() unmaps passed block,
-                * is it OK?
-                */
-
-               ret = ext4_insert_delayed_block(inode, map->m_lblk);
-               if (ret != 0) {
-                       retval = ret;
-                       goto out_unlock;
-               }
-
-               map_bh(bh, inode->i_sb, invalid_block);
-               set_buffer_new(bh);
-               set_buffer_delay(bh);
-       } else if (retval > 0) {
+       if (retval < 0) {
+               up_read(&EXT4_I(inode)->i_data_sem);
+               return retval;
+       }
+       if (retval > 0) {
                 unsigned int status;
  
                 if (unlikely(retval != map->m_len)) {
@@ -1783,11 +1767,21 @@ add_delayed:
                                 EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
                 ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
                                       map->m_pblk, status);
+               up_read(&EXT4_I(inode)->i_data_sem);
+               return retval;
         }
+       up_read(&EXT4_I(inode)->i_data_sem);
  
-out_unlock:
-       up_read((&EXT4_I(inode)->i_data_sem));
+add_delayed:
+       down_write(&EXT4_I(inode)->i_data_sem);
+       retval = ext4_insert_delayed_block(inode, map->m_lblk);
+       up_write(&EXT4_I(inode)->i_data_sem);
+       if (retval)
+               return retval;
  
+       map_bh(bh, inode->i_sb, invalid_block);
+       set_buffer_new(bh);
+       set_buffer_delay(bh);
         return retval;
  }
  
@@ -3268,6 +3262,9 @@ static void ext4_set_iomap(struct inode *inode, struct iomap *iomap,
                 iomap->addr = (u64) map->m_pblk << blkbits;
                 if (flags & IOMAP_DAX)
                         iomap->addr += EXT4_SB(inode->i_sb)->s_dax_part_off;
+       } else if (map->m_flags & EXT4_MAP_DELAYED) {
+               iomap->type = IOMAP_DELALLOC;
+               iomap->addr = IOMAP_NULL_ADDR;
         } else {
                 iomap->type = IOMAP_HOLE;
                 iomap->addr = IOMAP_NULL_ADDR;
@@ -3430,35 +3427,11 @@ const struct iomap_ops ext4_iomap_overwrite_ops = {
         .iomap_end              = ext4_iomap_end,
  };
  
-static bool ext4_iomap_is_delalloc(struct inode *inode,
-                                  struct ext4_map_blocks *map)
-{
-       struct extent_status es;
-       ext4_lblk_t offset = 0, end = map->m_lblk + map->m_len - 1;
-
-       ext4_es_find_extent_range(inode, &ext4_es_is_delayed,
-                                 map->m_lblk, end, &es);
-
-       if (!es.es_len || es.es_lblk > end)
-               return false;
-
-       if (es.es_lblk > map->m_lblk) {
-               map->m_len = es.es_lblk - map->m_lblk;
-               return false;
-       }
-
-       offset = map->m_lblk - es.es_lblk;
-       map->m_len = es.es_len - offset;
-
-       return true;
-}
-
  static int ext4_iomap_begin_report(struct inode *inode, loff_t offset,
                                    loff_t length, unsigned int flags,
                                    struct iomap *iomap, struct iomap *srcmap)
  {
         int ret;
-       bool delalloc = false;
         struct ext4_map_blocks map;
         u8 blkbits = inode->i_blkbits;
  
@@ -3499,13 +3472,8 @@ static int ext4_iomap_begin_report(struct inode *inode, loff_t offset,
         ret = ext4_map_blocks(NULL, inode, &map, 0);
         if (ret < 0)
                 return ret;
-       if (ret == 0)
-               delalloc = ext4_iomap_is_delalloc(inode, &map);
-
  set_iomap:
         ext4_set_iomap(inode, iomap, &map, offset, length, flags);
-       if (delalloc && iomap->type == IOMAP_HOLE)
-               iomap->type = IOMAP_DELALLOC;
  
         return 0;
  }
@@ -4015,12 +3983,12 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
  
         /* If there are blocks to remove, do it */
         if (stop_block > first_block) {
+               ext4_lblk_t hole_len = stop_block - first_block;
  
                 down_write(&EXT4_I(inode)->i_data_sem);
-               ext4_discard_preallocations(inode, 0);
+               ext4_discard_preallocations(inode);
  
-               ext4_es_remove_extent(inode, first_block,
-                                     stop_block - first_block);
+               ext4_es_remove_extent(inode, first_block, hole_len);
  
                 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
                         ret = ext4_ext_remove_space(inode, first_block,
@@ -4029,6 +3997,8 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
                         ret = ext4_ind_remove_space(handle, inode, first_block,
                                                     stop_block);
  
+               ext4_es_insert_extent(inode, first_block, hole_len, ~0,
+                                     EXTENT_STATUS_HOLE);
                 up_write(&EXT4_I(inode)->i_data_sem);
         }
         ext4_fc_track_range(handle, inode, first_block, stop_block);
@@ -4170,7 +4140,7 @@ int ext4_truncate(struct inode *inode)
  
         down_write(&EXT4_I(inode)->i_data_sem);
  
-       ext4_discard_preallocations(inode, 0);
+       ext4_discard_preallocations(inode);
  
         if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
                 err = ext4_ext_truncate(handle, inode);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c

index aa6be510eb8f578f09faf937a3debaabb9c3b499..7160a71044c88a8fe409111ec51cd597408f98f4 100644 (file)
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -467,7 +467,7 @@ static long swap_inode_boot_loader(struct super_block *sb,
         ext4_reset_inode_seed(inode);
         ext4_reset_inode_seed(inode_bl);
  
-       ext4_discard_preallocations(inode, 0);
+       ext4_discard_preallocations(inode);
  
         err = ext4_mark_inode_dirty(handle, inode);
         if (err < 0) {
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c

index f44f668e407f2bda9fe325631c9a4ab62649b9dc..e4f7cf9d89c45a881d6c403fd50fcc499db0b708 100644 (file)
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -564,14 +564,14 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
  
                         blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
                         blocknr += EXT4_C2B(EXT4_SB(sb), first + i);
+                       ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
+                                       EXT4_GROUP_INFO_BBITMAP_CORRUPT);
                         ext4_grp_locked_error(sb, e4b->bd_group,
                                               inode ? inode->i_ino : 0,
                                               blocknr,
                                               "freeing block already freed "
                                               "(bit %u)",
                                               first + i);
-                       ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
-                                       EXT4_GROUP_INFO_BBITMAP_CORRUPT);
                 }
                 mb_clear_bit(first + i, e4b->bd_info->bb_bitmap);
         }
@@ -677,7 +677,7 @@ do {                                                                        \
         }                                                               \
  } while (0)
  
-static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
+static void __mb_check_buddy(struct ext4_buddy *e4b, char *file,
                                 const char *function, int line)
  {
         struct super_block *sb = e4b->bd_sb;
@@ -696,7 +696,7 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
         void *buddy2;
  
         if (e4b->bd_info->bb_check_counter++ % 10)
-               return 0;
+               return;
  
         while (order > 1) {
                 buddy = mb_find_buddy(e4b, order, &max);
@@ -758,7 +758,7 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
  
         grp = ext4_get_group_info(sb, e4b->bd_group);
         if (!grp)
-               return NULL;
+               return;
         list_for_each(cur, &grp->bb_prealloc_list) {
                 ext4_group_t groupnr;
                 struct ext4_prealloc_space *pa;
@@ -768,7 +768,6 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
                 for (i = 0; i < pa->pa_len; i++)
                         MB_CHECK_ASSERT(mb_test_bit(k + i, buddy));
         }
-       return 0;
  }
  #undef MB_CHECK_ASSERT
  #define mb_check_buddy(e4b) __mb_check_buddy(e4b,      \
@@ -842,7 +841,7 @@ mb_update_avg_fragment_size(struct super_block *sb, struct ext4_group_info *grp)
         struct ext4_sb_info *sbi = EXT4_SB(sb);
         int new_order;
  
-       if (!test_opt2(sb, MB_OPTIMIZE_SCAN) || grp->bb_free == 0)
+       if (!test_opt2(sb, MB_OPTIMIZE_SCAN) || grp->bb_fragments == 0)
                 return;
  
         new_order = mb_avg_fragment_size_order(sb,
@@ -871,7 +870,7 @@ mb_update_avg_fragment_size(struct super_block *sb, struct ext4_group_info *grp)
   * cr level needs an update.
   */
  static void ext4_mb_choose_next_group_p2_aligned(struct ext4_allocation_context *ac,
-                       enum criteria *new_cr, ext4_group_t *group, ext4_group_t ngroups)
+                       enum criteria *new_cr, ext4_group_t *group)
  {
         struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
         struct ext4_group_info *iter;
@@ -945,7 +944,7 @@ ext4_mb_find_good_group_avg_frag_lists(struct ext4_allocation_context *ac, int o
   * order. Updates *new_cr if cr level needs an update.
   */
  static void ext4_mb_choose_next_group_goal_fast(struct ext4_allocation_context *ac,
-               enum criteria *new_cr, ext4_group_t *group, ext4_group_t ngroups)
+               enum criteria *new_cr, ext4_group_t *group)
  {
         struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
         struct ext4_group_info *grp = NULL;
@@ -990,7 +989,7 @@ static void ext4_mb_choose_next_group_goal_fast(struct ext4_allocation_context *
   * much and fall to CR_GOAL_LEN_SLOW in that case.
   */
  static void ext4_mb_choose_next_group_best_avail(struct ext4_allocation_context *ac,
-               enum criteria *new_cr, ext4_group_t *group, ext4_group_t ngroups)
+               enum criteria *new_cr, ext4_group_t *group)
  {
         struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
         struct ext4_group_info *grp = NULL;
@@ -1125,11 +1124,11 @@ static void ext4_mb_choose_next_group(struct ext4_allocation_context *ac,
         }
  
         if (*new_cr == CR_POWER2_ALIGNED) {
-               ext4_mb_choose_next_group_p2_aligned(ac, new_cr, group, ngroups);
+               ext4_mb_choose_next_group_p2_aligned(ac, new_cr, group);
         } else if (*new_cr == CR_GOAL_LEN_FAST) {
-               ext4_mb_choose_next_group_goal_fast(ac, new_cr, group, ngroups);
+               ext4_mb_choose_next_group_goal_fast(ac, new_cr, group);
         } else if (*new_cr == CR_BEST_AVAIL_LEN) {
-               ext4_mb_choose_next_group_best_avail(ac, new_cr, group, ngroups);
+               ext4_mb_choose_next_group_best_avail(ac, new_cr, group);
         } else {
                 /*
                  * TODO: For CR=2, we can arrange groups in an rb tree sorted by
@@ -1233,6 +1232,24 @@ void ext4_mb_generate_buddy(struct super_block *sb,
         atomic64_add(period, &sbi->s_mb_generation_time);
  }
  
+static void mb_regenerate_buddy(struct ext4_buddy *e4b)
+{
+       int count;
+       int order = 1;
+       void *buddy;
+
+       while ((buddy = mb_find_buddy(e4b, order++, &count)))
+               mb_set_bits(buddy, 0, count);
+
+       e4b->bd_info->bb_fragments = 0;
+       memset(e4b->bd_info->bb_counters, 0,
+               sizeof(*e4b->bd_info->bb_counters) *
+               (e4b->bd_sb->s_blocksize_bits + 2));
+
+       ext4_mb_generate_buddy(e4b->bd_sb, e4b->bd_buddy,
+               e4b->bd_bitmap, e4b->bd_group, e4b->bd_info);
+}
+
  /* The buddy information is attached the buddy cache inode
   * for convenience. The information regarding each group
   * is loaded via ext4_mb_load_buddy. The information involve
@@ -1891,11 +1908,6 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
         mb_check_buddy(e4b);
         mb_free_blocks_double(inode, e4b, first, count);
  
-       this_cpu_inc(discard_pa_seq);
-       e4b->bd_info->bb_free += count;
-       if (first < e4b->bd_info->bb_first_free)
-               e4b->bd_info->bb_first_free = first;
-
         /* access memory sequentially: check left neighbour,
          * clear range and then check right neighbour
          */
@@ -1909,21 +1921,31 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
                 struct ext4_sb_info *sbi = EXT4_SB(sb);
                 ext4_fsblk_t blocknr;
  
+               /*
+                * Fastcommit replay can free already freed blocks which
+                * corrupts allocation info. Regenerate it.
+                */
+               if (sbi->s_mount_state & EXT4_FC_REPLAY) {
+                       mb_regenerate_buddy(e4b);
+                       goto check;
+               }
+
                 blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
                 blocknr += EXT4_C2B(sbi, block);
-               if (!(sbi->s_mount_state & EXT4_FC_REPLAY)) {
-                       ext4_grp_locked_error(sb, e4b->bd_group,
-                                             inode ? inode->i_ino : 0,
-                                             blocknr,
-                                             "freeing already freed block (bit %u); block bitmap corrupt.",
-                                             block);
-                       ext4_mark_group_bitmap_corrupted(
-                               sb, e4b->bd_group,
+               ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
                                 EXT4_GROUP_INFO_BBITMAP_CORRUPT);
-               }
-               goto done;
+               ext4_grp_locked_error(sb, e4b->bd_group,
+                                     inode ? inode->i_ino : 0, blocknr,
+                                     "freeing already freed block (bit %u); block bitmap corrupt.",
+                                     block);
+               return;
         }
  
+       this_cpu_inc(discard_pa_seq);
+       e4b->bd_info->bb_free += count;
+       if (first < e4b->bd_info->bb_first_free)
+               e4b->bd_info->bb_first_free = first;
+
         /* let's maintain fragments counter */
         if (left_is_free && right_is_free)
                 e4b->bd_info->bb_fragments--;
@@ -1948,9 +1970,9 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
         if (first <= last)
                 mb_buddy_mark_free(e4b, first >> 1, last >> 1);
  
-done:
         mb_set_largest_free_order(sb, e4b->bd_info);
         mb_update_avg_fragment_size(sb, e4b->bd_info);
+check:
         mb_check_buddy(e4b);
  }
  
@@ -2276,6 +2298,9 @@ void ext4_mb_try_best_found(struct ext4_allocation_context *ac,
                 return;
  
         ext4_lock_group(ac->ac_sb, group);
+       if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info)))
+               goto out;
+
         max = mb_find_extent(e4b, ex.fe_start, ex.fe_len, &ex);
  
         if (max > 0) {
@@ -2283,6 +2308,7 @@ void ext4_mb_try_best_found(struct ext4_allocation_context *ac,
                 ext4_mb_use_best_found(ac, e4b);
         }
  
+out:
         ext4_unlock_group(ac->ac_sb, group);
         ext4_mb_unload_buddy(e4b);
  }
@@ -2309,12 +2335,10 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
         if (err)
                 return err;
  
-       if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) {
-               ext4_mb_unload_buddy(e4b);
-               return 0;
-       }
-
         ext4_lock_group(ac->ac_sb, group);
+       if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info)))
+               goto out;
+
         max = mb_find_extent(e4b, ac->ac_g_ex.fe_start,
                              ac->ac_g_ex.fe_len, &ex);
         ex.fe_logical = 0xDEADFA11; /* debug value */
@@ -2347,6 +2371,7 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
                 ac->ac_b_ex = ex;
                 ext4_mb_use_best_found(ac, e4b);
         }
+out:
         ext4_unlock_group(ac->ac_sb, group);
         ext4_mb_unload_buddy(e4b);
  
@@ -2380,12 +2405,12 @@ void ext4_mb_simple_scan_group(struct ext4_allocation_context *ac,
  
                 k = mb_find_next_zero_bit(buddy, max, 0);
                 if (k >= max) {
+                       ext4_mark_group_bitmap_corrupted(ac->ac_sb,
+                                       e4b->bd_group,
+                                       EXT4_GROUP_INFO_BBITMAP_CORRUPT);
                         ext4_grp_locked_error(ac->ac_sb, e4b->bd_group, 0, 0,
                                 "%d free clusters of order %d. But found 0",
                                 grp->bb_counters[i], i);
-                       ext4_mark_group_bitmap_corrupted(ac->ac_sb,
-                                        e4b->bd_group,
-                                       EXT4_GROUP_INFO_BBITMAP_CORRUPT);
                         break;
                 }
                 ac->ac_found++;
@@ -2436,12 +2461,12 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
                          * free blocks even though group info says we
                          * have free blocks
                          */
+                       ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
+                                       EXT4_GROUP_INFO_BBITMAP_CORRUPT);
                         ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
                                         "%d free clusters as per "
                                         "group info. But bitmap says 0",
                                         free);
-                       ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
-                                       EXT4_GROUP_INFO_BBITMAP_CORRUPT);
                         break;
                 }
  
@@ -2467,12 +2492,12 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
                 if (WARN_ON(ex.fe_len <= 0))
                         break;
                 if (free < ex.fe_len) {
+                       ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
+                                       EXT4_GROUP_INFO_BBITMAP_CORRUPT);
                         ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
                                         "%d free clusters as per "
                                         "group info. But got %d blocks",
                                         free, ex.fe_len);
-                       ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
-                                       EXT4_GROUP_INFO_BBITMAP_CORRUPT);
                         /*
                          * The number of free blocks differs. This mostly
                          * indicate that the bitmap is corrupt. So exit
@@ -3725,7 +3750,7 @@ static int ext4_mb_cleanup_pa(struct ext4_group_info *grp)
         return count;
  }
  
-int ext4_mb_release(struct super_block *sb)
+void ext4_mb_release(struct super_block *sb)
  {
         ext4_group_t ngroups = ext4_get_groups_count(sb);
         ext4_group_t i;
@@ -3801,8 +3826,6 @@ int ext4_mb_release(struct super_block *sb)
         }
  
         free_percpu(sbi->s_locality_groups);
-
-       return 0;
  }
  
  static inline int ext4_issue_discard(struct super_block *sb,
@@ -5284,7 +5307,7 @@ static void ext4_mb_new_preallocation(struct ext4_allocation_context *ac)
   * the caller MUST hold group/inode locks.
   * TODO: optimize the case when there are no in-core structures yet
   */
-static noinline_for_stack int
+static noinline_for_stack void
  ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
                         struct ext4_prealloc_space *pa)
  {
@@ -5334,11 +5357,9 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
                  */
         }
         atomic_add(free, &sbi->s_mb_discarded);
-
-       return 0;
  }
  
-static noinline_for_stack int
+static noinline_for_stack void
  ext4_mb_release_group_pa(struct ext4_buddy *e4b,
                                 struct ext4_prealloc_space *pa)
  {
@@ -5352,13 +5373,11 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b,
         if (unlikely(group != e4b->bd_group && pa->pa_len != 0)) {
                 ext4_warning(sb, "bad group: expected %u, group %u, pa_start %llu",
                              e4b->bd_group, group, pa->pa_pstart);
-               return 0;
+               return;
         }
         mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len);
         atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded);
         trace_ext4_mballoc_discard(sb, NULL, group, bit, pa->pa_len);
-
-       return 0;
  }
  
  /*
@@ -5479,7 +5498,7 @@ out_dbg:
   *
   * FIXME!! Make sure it is valid at all the call sites
   */
-void ext4_discard_preallocations(struct inode *inode, unsigned int needed)
+void ext4_discard_preallocations(struct inode *inode)
  {
         struct ext4_inode_info *ei = EXT4_I(inode);
         struct super_block *sb = inode->i_sb;
@@ -5491,9 +5510,8 @@ void ext4_discard_preallocations(struct inode *inode, unsigned int needed)
         struct rb_node *iter;
         int err;
  
-       if (!S_ISREG(inode->i_mode)) {
+       if (!S_ISREG(inode->i_mode))
                 return;
-       }
  
         if (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY)
                 return;
@@ -5501,15 +5519,12 @@ void ext4_discard_preallocations(struct inode *inode, unsigned int needed)
         mb_debug(sb, "discard preallocation for inode %lu\n",
                  inode->i_ino);
         trace_ext4_discard_preallocations(inode,
-                       atomic_read(&ei->i_prealloc_active), needed);
-
-       if (needed == 0)
-               needed = UINT_MAX;
+                       atomic_read(&ei->i_prealloc_active));
  
  repeat:
         /* first, collect all pa's in the inode */
         write_lock(&ei->i_prealloc_lock);
-       for (iter = rb_first(&ei->i_prealloc_node); iter && needed;
+       for (iter = rb_first(&ei->i_prealloc_node); iter;
              iter = rb_next(iter)) {
                 pa = rb_entry(iter, struct ext4_prealloc_space,
                               pa_node.inode_node);
@@ -5533,7 +5548,6 @@ repeat:
                         spin_unlock(&pa->pa_lock);
                         rb_erase(&pa->pa_node.inode_node, &ei->i_prealloc_node);
                         list_add(&pa->u.pa_tmp_list, &list);
-                       needed--;
                         continue;
                 }
  
@@ -5943,7 +5957,7 @@ static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
  /*
   * release all resource we used in allocation
   */
-static int ext4_mb_release_context(struct ext4_allocation_context *ac)
+static void ext4_mb_release_context(struct ext4_allocation_context *ac)
  {
         struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
         struct ext4_prealloc_space *pa = ac->ac_pa;
@@ -5980,7 +5994,6 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac)
         if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC)
                 mutex_unlock(&ac->ac_lg->lg_mutex);
         ext4_mb_collect_stats(ac);
-       return 0;
  }
  
  static int ext4_mb_discard_preallocations(struct super_block *sb, int needed)
@@ -6761,6 +6774,9 @@ __releases(ext4_group_lock_ptr(sb, e4b->bd_group))
         bool set_trimmed = false;
         void *bitmap;
  
+       if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info)))
+               return 0;
+
         last = ext4_last_grp_cluster(sb, e4b->bd_group);
         bitmap = e4b->bd_bitmap;
         if (start == 0 && max >= last)
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h

index d7aeb5da7d86768c10efdc7ef38b4a6cee38ca78..56938532b4ce258e210178d2406e187eec5ef8cc 100644 (file)
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -192,7 +192,6 @@ struct ext4_allocation_context {
          */
         ext4_grpblk_t   ac_orig_goal_len;
  
-       __u32 ac_groups_considered;
         __u32 ac_flags;         /* allocation hints */
         __u16 ac_groups_scanned;
         __u16 ac_groups_linear_remaining;
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c

index 3aa57376d9c2ecbba3d272b572bf56b924b33103..7cd4afa4de1d3127a34ec02f166e90876ad5c6e4 100644 (file)
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -618,6 +618,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
                 goto out;
         o_end = o_start + len;
  
+       *moved_len = 0;
         while (o_start < o_end) {
                 struct ext4_extent *ex;
                 ext4_lblk_t cur_blk, next_blk;
@@ -672,7 +673,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
                  */
                 ext4_double_up_write_data_sem(orig_inode, donor_inode);
                 /* Swap original branches with new branches */
-               move_extent_per_page(o_filp, donor_inode,
+               *moved_len += move_extent_per_page(o_filp, donor_inode,
                                      orig_page_index, donor_page_index,
                                      offset_in_page, cur_len,
                                      unwritten, &ret);
@@ -682,14 +683,11 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
                 o_start += cur_len;
                 d_start += cur_len;
         }
-       *moved_len = o_start - orig_blk;
-       if (*moved_len > len)
-               *moved_len = len;
  
  out:
         if (*moved_len) {
-               ext4_discard_preallocations(orig_inode, 0);
-               ext4_discard_preallocations(donor_inode, 0);
+               ext4_discard_preallocations(orig_inode);
+               ext4_discard_preallocations(donor_inode);
         }
  
         ext4_free_ext_path(path);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c

index dcba0f85dfe245ab83598d5a451783b02044be52..0f931d0c227daa8b00950667d8b8bb42a7a28a48 100644 (file)
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1525,7 +1525,7 @@ void ext4_clear_inode(struct inode *inode)
         ext4_fc_del(inode);
         invalidate_inode_buffers(inode);
         clear_inode(inode);
-       ext4_discard_preallocations(inode, 0);
+       ext4_discard_preallocations(inode);
         ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
         dquot_drop(inode);
         if (EXT4_I(inode)->jinode) {
diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c

index 75bf1f88843c4ce96c285423228a70ff77c08517..645240cc0229fe4a2eda4499ae4a834fe3bd3a66 100644 (file)
--- a/fs/ext4/symlink.c
+++ b/fs/ext4/symlink.c
@@ -92,10 +92,12 @@ static const char *ext4_get_link(struct dentry *dentry, struct inode *inode,
  
         if (!dentry) {
                 bh = ext4_getblk(NULL, inode, 0, EXT4_GET_BLOCKS_CACHED_NOWAIT);
-               if (IS_ERR(bh))
-                       return ERR_CAST(bh);
-               if (!bh || !ext4_buffer_uptodate(bh))
+               if (IS_ERR(bh) || !bh)
                         return ERR_PTR(-ECHILD);
+               if (!ext4_buffer_uptodate(bh)) {
+                       brelse(bh);
+                       return ERR_PTR(-ECHILD);
+               }
         } else {
                 bh = ext4_bread(NULL, inode, 0, 0);
                 if (IS_ERR(bh))
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c

index 91e89e68177ee4bd686a920b9dfad4978d8d5062..b6cad106c37e44258bd6e4433cd4aaedfbb98f65 100644 (file)
--- a/fs/fuse/cuse.c
+++ b/fs/fuse/cuse.c
@@ -474,8 +474,7 @@ err:
  
  static void cuse_fc_release(struct fuse_conn *fc)
  {
-       struct cuse_conn *cc = fc_to_cc(fc);
-       kfree_rcu(cc, fc.rcu);
+       kfree(fc_to_cc(fc));
  }
  
  /**
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h

index 1df83eebda92771d20a42ea2aaefa118effcbc77..bcbe34488862752154ca2284386baacadf972744 100644 (file)
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -888,6 +888,7 @@ struct fuse_mount {
  
         /* Entry on fc->mounts */
         struct list_head fc_entry;
+       struct rcu_head rcu;
  };
  
  static inline struct fuse_mount *get_fuse_mount_super(struct super_block *sb)
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c

index 2a6d44f91729bbd7e3bf1c955a952ecdd695bd0f..516ea2979a90ff2d0eff63a71dc6b8edc4c91b98 100644 (file)
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -930,6 +930,14 @@ void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm,
  }
  EXPORT_SYMBOL_GPL(fuse_conn_init);
  
+static void delayed_release(struct rcu_head *p)
+{
+       struct fuse_conn *fc = container_of(p, struct fuse_conn, rcu);
+
+       put_user_ns(fc->user_ns);
+       fc->release(fc);
+}
+
  void fuse_conn_put(struct fuse_conn *fc)
  {
         if (refcount_dec_and_test(&fc->count)) {
@@ -941,13 +949,12 @@ void fuse_conn_put(struct fuse_conn *fc)
                 if (fiq->ops->release)
                         fiq->ops->release(fiq);
                 put_pid_ns(fc->pid_ns);
-               put_user_ns(fc->user_ns);
                 bucket = rcu_dereference_protected(fc->curr_bucket, 1);
                 if (bucket) {
                         WARN_ON(atomic_read(&bucket->count) != 1);
                         kfree(bucket);
                 }
-               fc->release(fc);
+               call_rcu(&fc->rcu, delayed_release);
         }
  }
  EXPORT_SYMBOL_GPL(fuse_conn_put);
@@ -1366,7 +1373,7 @@ EXPORT_SYMBOL_GPL(fuse_send_init);
  void fuse_free_conn(struct fuse_conn *fc)
  {
         WARN_ON(!list_empty(&fc->devices));
-       kfree_rcu(fc, rcu);
+       kfree(fc);
  }
  EXPORT_SYMBOL_GPL(fuse_free_conn);
  
@@ -1902,7 +1909,7 @@ static void fuse_sb_destroy(struct super_block *sb)
  void fuse_mount_destroy(struct fuse_mount *fm)
  {
         fuse_conn_put(fm->fc);
-       kfree(fm);
+       kfree_rcu(fm, rcu);
  }
  EXPORT_SYMBOL(fuse_mount_destroy);
  
diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c

index 177f1f41f225458344cd000147d71079c19689ab..2e215e8c3c88e57d6ed17ba6cc5cb22420e99af6 100644 (file)
--- a/fs/gfs2/dentry.c
+++ b/fs/gfs2/dentry.c
@@ -32,25 +32,21 @@
  
  static int gfs2_drevalidate(struct dentry *dentry, unsigned int flags)
  {
-       struct dentry *parent = NULL;
+       struct dentry *parent;
         struct gfs2_sbd *sdp;
         struct gfs2_inode *dip;
-       struct inode *dinode, *inode;
+       struct inode *inode;
         struct gfs2_holder d_gh;
         struct gfs2_inode *ip = NULL;
         int error, valid = 0;
         int had_lock = 0;
  
-       if (flags & LOOKUP_RCU) {
-               dinode = d_inode_rcu(READ_ONCE(dentry->d_parent));
-               if (!dinode)
-                       return -ECHILD;
-       } else {
-               parent = dget_parent(dentry);
-               dinode = d_inode(parent);
-       }
-       sdp = GFS2_SB(dinode);
-       dip = GFS2_I(dinode);
+       if (flags & LOOKUP_RCU)
+               return -ECHILD;
+
+       parent = dget_parent(dentry);
+       sdp = GFS2_SB(d_inode(parent));
+       dip = GFS2_I(d_inode(parent));
         inode = d_inode(dentry);
  
         if (inode) {
@@ -66,8 +62,7 @@ static int gfs2_drevalidate(struct dentry *dentry, unsigned int flags)
  
         had_lock = (gfs2_glock_is_locked_by_me(dip->i_gl) != NULL);
         if (!had_lock) {
-               error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED,
-                                          flags & LOOKUP_RCU ? GL_NOBLOCK : 0, &d_gh);
+               error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh);
                 if (error)
                         goto out;
         }
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c

index 6bfc9383b7b8eca60aad0d88c341904b572681bb..1b95db2c3aac3c9a9d5d881985e70622342b52ab 100644 (file)
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -1882,10 +1882,10 @@ int gfs2_permission(struct mnt_idmap *idmap, struct inode *inode,
                 WARN_ON_ONCE(!may_not_block);
                 return -ECHILD;
          }
-       if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
-               int noblock = may_not_block ? GL_NOBLOCK : 0;
-               error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED,
-                                          LM_FLAG_ANY | noblock, &i_gh);
+       if (gfs2_glock_is_locked_by_me(gl) == NULL) {
+               if (may_not_block)
+                       return -ECHILD;
+               error = gfs2_glock_nq_init(gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
                 if (error)
                         return error;
         }
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h

index 7ededcb720c121794eb3782dd84ccff74685296b..012a3d003fbe6162db231058a647cb07f78ef0a9 100644 (file)
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -190,6 +190,7 @@ struct hfsplus_sb_info {
         int work_queued;               /* non-zero delayed work is queued */
         struct delayed_work sync_work; /* FS sync delayed work */
         spinlock_t work_lock;          /* protects sync_work and work_queued */
+       struct rcu_head rcu;
  };
  
  #define HFSPLUS_SB_WRITEBACKUP 0
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c

index 1986b4f18a9013ee27f056b7c871df215f05f862..97920202790f944f0d03dda35cc1a83f27201470 100644 (file)
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -277,6 +277,14 @@ void hfsplus_mark_mdb_dirty(struct super_block *sb)
         spin_unlock(&sbi->work_lock);
  }
  
+static void delayed_free(struct rcu_head *p)
+{
+       struct hfsplus_sb_info *sbi = container_of(p, struct hfsplus_sb_info, rcu);
+
+       unload_nls(sbi->nls);
+       kfree(sbi);
+}
+
  static void hfsplus_put_super(struct super_block *sb)
  {
         struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb);
@@ -302,9 +310,7 @@ static void hfsplus_put_super(struct super_block *sb)
         hfs_btree_close(sbi->ext_tree);
         kfree(sbi->s_vhdr_buf);
         kfree(sbi->s_backup_vhdr_buf);
-       unload_nls(sbi->nls);
-       kfree(sb->s_fs_info);
-       sb->s_fs_info = NULL;
+       call_rcu(&sbi->rcu, delayed_free);
  }
  
  static int hfsplus_statfs(struct dentry *dentry, struct kstatfs *buf)
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c

index ea5b8e57d904e20b964fb5e627c4bae894370401..d746866ae3b6ba79a4ed1d8b6600c29cfc28e005 100644 (file)
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -100,6 +100,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
         loff_t len, vma_len;
         int ret;
         struct hstate *h = hstate_file(file);
+       vm_flags_t vm_flags;
  
         /*
          * vma address alignment (but not the pgoff alignment) has
@@ -141,10 +142,20 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
         file_accessed(file);
  
         ret = -ENOMEM;
+
+       vm_flags = vma->vm_flags;
+       /*
+        * for SHM_HUGETLB, the pages are reserved in the shmget() call so skip
+        * reserving here. Note: only for SHM hugetlbfs file, the inode
+        * flag S_PRIVATE is set.
+        */
+       if (inode->i_flags & S_PRIVATE)
+               vm_flags |= VM_NORESERVE;
+
         if (!hugetlb_reserve_pages(inode,
                                 vma->vm_pgoff >> huge_page_order(h),
                                 len >> huge_page_shift(h), vma,
-                               vma->vm_flags))
+                               vm_flags))
                 goto out;
  
         ret = 0;
@@ -340,7 +351,7 @@ static ssize_t hugetlbfs_read_iter(struct kiocb *iocb, struct iov_iter *to)
                 } else {
                         folio_unlock(folio);
  
-                       if (!folio_test_has_hwpoisoned(folio))
+                       if (!folio_test_hwpoison(folio))
                                 want = nr;
                         else {
                                 /*
@@ -1354,6 +1365,7 @@ static int hugetlbfs_parse_param(struct fs_context *fc, struct fs_parameter *par
  {
         struct hugetlbfs_fs_context *ctx = fc->fs_private;
         struct fs_parse_result result;
+       struct hstate *h;
         char *rest;
         unsigned long ps;
         int opt;
@@ -1398,11 +1410,12 @@ static int hugetlbfs_parse_param(struct fs_context *fc, struct fs_parameter *par
  
         case Opt_pagesize:
                 ps = memparse(param->string, &rest);
-               ctx->hstate = size_to_hstate(ps);
-               if (!ctx->hstate) {
+               h = size_to_hstate(ps);
+               if (!h) {
                         pr_err("Unsupported page size %lu MB\n", ps / SZ_1M);
                         return -EINVAL;
                 }
+               ctx->hstate = h;
                 return 0;
  
         case Opt_min_size:
diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c

index 8eec84c651bfba2da05af6a834c4ad3fe7a60f2b..cb3cda1390adb16e1ad8031783849ba59022db87 100644 (file)
--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -2763,9 +2763,7 @@ static int dbBackSplit(dmtree_t *tp, int leafno, bool is_ctl)
   *     leafno  - the number of the leaf to be updated.
   *     newval  - the new value for the leaf.
   *
- * RETURN VALUES:
- *  0          - success
- *     -EIO    - i/o error
+ * RETURN VALUES: none
   */
  static int dbJoin(dmtree_t *tp, int leafno, int newval, bool is_ctl)
  {
@@ -2792,10 +2790,6 @@ static int dbJoin(dmtree_t *tp, int leafno, int newval, bool is_ctl)
                  * get the buddy size (number of words covered) of
                  * the new value.
                  */
-
-               if ((newval - tp->dmt_budmin) > BUDMIN)
-                       return -EIO;
-
                 budsz = BUDSIZE(newval, tp->dmt_budmin);
  
                 /* try to join.
diff --git a/fs/namei.c b/fs/namei.c

index 4e0de939fea127034c24d7badb18253a9351b52e..9342fa6a38c2bad85c13144b8d8ae4940e88e7e6 100644 (file)
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1717,7 +1717,11 @@ static inline int may_lookup(struct mnt_idmap *idmap,
  {
         if (nd->flags & LOOKUP_RCU) {
                 int err = inode_permission(idmap, nd->inode, MAY_EXEC|MAY_NOT_BLOCK);
-               if (err != -ECHILD || !try_to_unlazy(nd))
+               if (!err)               // success, keep going
+                       return 0;
+               if (!try_to_unlazy(nd))
+                       return -ECHILD; // redo it all non-lazy
+               if (err != -ECHILD)     // hard error
                         return err;
         }
         return inode_permission(idmap, nd->inode, MAY_EXEC);
diff --git a/fs/namespace.c b/fs/namespace.c

index 437f60e96d405861683f7e3596063d26c0e55038..5a51315c6678145467520800ceedc3378df5e7da 100644 (file)
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -4472,10 +4472,15 @@ static int do_mount_setattr(struct path *path, struct mount_kattr *kattr)
         /*
          * If this is an attached mount make sure it's located in the callers
          * mount namespace. If it's not don't let the caller interact with it.
-        * If this is a detached mount make sure it has an anonymous mount
-        * namespace attached to it, i.e. we've created it via OPEN_TREE_CLONE.
+        *
+        * If this mount doesn't have a parent it's most often simply a
+        * detached mount with an anonymous mount namespace. IOW, something
+        * that's simply not attached yet. But there are apparently also users
+        * that do change mount properties on the rootfs itself. That obviously
+        * neither has a parent nor is it a detached mount so we cannot
+        * unconditionally check for detached mounts.
          */
-       if (!(mnt_has_parent(mnt) ? check_mnt(mnt) : is_anon_ns(mnt->mnt_ns)))
+       if ((mnt_has_parent(mnt) || !is_anon_ns(mnt->mnt_ns)) && !check_mnt(mnt))
                 goto out;
  
         /*
diff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c

index a3059b3168fd95756c7e57986ed999e205dfa8aa..9a0d32e4b422ad09518a6c6143638d0c68fb8b84 100644 (file)
--- a/fs/netfs/buffered_write.c
+++ b/fs/netfs/buffered_write.c
@@ -477,6 +477,9 @@ ssize_t netfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
  
         _enter("%llx,%zx,%llx", iocb->ki_pos, iov_iter_count(from), i_size_read(inode));
  
+       if (!iov_iter_count(from))
+               return 0;
+
         if ((iocb->ki_flags & IOCB_DIRECT) ||
             test_bit(NETFS_ICTX_UNBUFFERED, &ictx->flags))
                 return netfs_unbuffered_write_iter(iocb, from);
diff --git a/fs/netfs/direct_write.c b/fs/netfs/direct_write.c

index 60a40d293c87f5fd1088830f07488775b8725bb4..bee047e20f5d6933e3af452eb150e4eb2e97d941 100644 (file)
--- a/fs/netfs/direct_write.c
+++ b/fs/netfs/direct_write.c
@@ -139,6 +139,9 @@ ssize_t netfs_unbuffered_write_iter(struct kiocb *iocb, struct iov_iter *from)
  
         _enter("%llx,%zx,%llx", iocb->ki_pos, iov_iter_count(from), i_size_read(inode));
  
+       if (!iov_iter_count(from))
+               return 0;
+
         trace_netfs_write_iter(iocb, from);
         netfs_stat(&netfs_n_rh_dio_write);
  
@@ -146,7 +149,7 @@ ssize_t netfs_unbuffered_write_iter(struct kiocb *iocb, struct iov_iter *from)
         if (ret < 0)
                 return ret;
         ret = generic_write_checks(iocb, from);
-       if (ret < 0)
+       if (ret <= 0)
                 goto out;
         ret = file_remove_privs(file);
         if (ret < 0)
diff --git a/fs/netfs/io.c b/fs/netfs/io.c

index e8ff1e61ce79b7f67e1252f4b66aa461bfe1d4b8..4261ad6c55b664a7e3da006d007de03664790641 100644 (file)
--- a/fs/netfs/io.c
+++ b/fs/netfs/io.c
@@ -748,6 +748,8 @@ int netfs_begin_read(struct netfs_io_request *rreq, bool sync)
  
         if (!rreq->submitted) {
                 netfs_put_request(rreq, false, netfs_rreq_trace_put_no_submit);
+               if (rreq->origin == NETFS_DIO_READ)
+                       inode_dio_end(rreq->inode);
                 ret = 0;
                 goto out;
         }
diff --git a/fs/nfs/client.c b/fs/nfs/client.c

index 44eca51b28085d9deff764bfe6f9286388e93983..fbdc9ca80f714bdf3d3cad54e63d7c858612e5f1 100644 (file)
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -246,7 +246,7 @@ void nfs_free_client(struct nfs_client *clp)
         put_nfs_version(clp->cl_nfs_mod);
         kfree(clp->cl_hostname);
         kfree(clp->cl_acceptor);
-       kfree(clp);
+       kfree_rcu(clp, rcu);
  }
  EXPORT_SYMBOL_GPL(nfs_free_client);
  
@@ -1006,6 +1006,14 @@ struct nfs_server *nfs_alloc_server(void)
  }
  EXPORT_SYMBOL_GPL(nfs_alloc_server);
  
+static void delayed_free(struct rcu_head *p)
+{
+       struct nfs_server *server = container_of(p, struct nfs_server, rcu);
+
+       nfs_free_iostats(server->io_stats);
+       kfree(server);
+}
+
  /*
   * Free up a server record
   */
@@ -1031,10 +1039,9 @@ void nfs_free_server(struct nfs_server *server)
  
         ida_destroy(&server->lockowner_id);
         ida_destroy(&server->openowner_id);
-       nfs_free_iostats(server->io_stats);
         put_cred(server->cred);
-       kfree(server);
         nfs_release_automount_timer();
+       call_rcu(&server->rcu, delayed_free);
  }
  EXPORT_SYMBOL_GPL(nfs_free_server);
  
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c

index c8ecbe99905960ccd63b7128f273fc38543d876d..ac505671efbdb7a91a346e4f300e352261562eae 100644 (file)
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1431,9 +1431,9 @@ static bool nfs_verifier_is_delegated(struct dentry *dentry)
  static void nfs_set_verifier_locked(struct dentry *dentry, unsigned long verf)
  {
         struct inode *inode = d_inode(dentry);
-       struct inode *dir = d_inode(dentry->d_parent);
+       struct inode *dir = d_inode_rcu(dentry->d_parent);
  
-       if (!nfs_verify_change_attribute(dir, verf))
+       if (!dir || !nfs_verify_change_attribute(dir, verf))
                 return;
         if (inode && NFS_PROTO(inode)->have_delegation(inode, FMODE_READ))
                 nfs_set_verifier_delegated(&verf);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c

index 6dc6340e28529d8efe2a9bb99144ca297909e9d3..7d6c657e0409ddc62567554304e4a51779dd2934 100644 (file)
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4945,10 +4945,8 @@ nfsd_break_deleg_cb(struct file_lock *fl)
          */
         fl->fl_break_time = 0;
  
-       spin_lock(&fp->fi_lock);
         fp->fi_had_conflict = true;
         nfsd_break_one_deleg(dp);
-       spin_unlock(&fp->fi_lock);
         return false;
  }
  
@@ -5557,12 +5555,13 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
         if (status)
                 goto out_unlock;
  
+       status = -EAGAIN;
+       if (fp->fi_had_conflict)
+               goto out_unlock;
+
         spin_lock(&state_lock);
         spin_lock(&fp->fi_lock);
-       if (fp->fi_had_conflict)
-               status = -EAGAIN;
-       else
-               status = hash_delegation_locked(dp, fp);
+       status = hash_delegation_locked(dp, fp);
         spin_unlock(&fp->fi_lock);
         spin_unlock(&state_lock);
  
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c

index bec33b89a075858ebf289a95fa4c83dbf6e86103..0e3fc5ba33c73d7f22deefc1cb68ee8395a1efa4 100644 (file)
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -107,7 +107,13 @@ static vm_fault_t nilfs_page_mkwrite(struct vm_fault *vmf)
         nilfs_transaction_commit(inode->i_sb);
  
   mapped:
-       folio_wait_stable(folio);
+       /*
+        * Since checksumming including data blocks is performed to determine
+        * the validity of the log to be written and used for recovery, it is
+        * necessary to wait for writeback to finish here, regardless of the
+        * stable write requirement of the backing device.
+        */
+       folio_wait_writeback(folio);
   out:
         sb_end_pagefault(inode->i_sb);
         return vmf_fs_error(ret);
diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c

index 0955b657938ff2ce993d92e7d8f81322ce71c2e1..a9b8d77c8c1d55b551582b826dafdcdcd047d13a 100644 (file)
--- a/fs/nilfs2/recovery.c
+++ b/fs/nilfs2/recovery.c
@@ -472,9 +472,10 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs,
  
  static int nilfs_recovery_copy_block(struct the_nilfs *nilfs,
                                      struct nilfs_recovery_block *rb,
-                                    struct page *page)
+                                    loff_t pos, struct page *page)
  {
         struct buffer_head *bh_org;
+       size_t from = pos & ~PAGE_MASK;
         void *kaddr;
  
         bh_org = __bread(nilfs->ns_bdev, rb->blocknr, nilfs->ns_blocksize);
@@ -482,7 +483,7 @@ static int nilfs_recovery_copy_block(struct the_nilfs *nilfs,
                 return -EIO;
  
         kaddr = kmap_atomic(page);
-       memcpy(kaddr + bh_offset(bh_org), bh_org->b_data, bh_org->b_size);
+       memcpy(kaddr + from, bh_org->b_data, bh_org->b_size);
         kunmap_atomic(kaddr);
         brelse(bh_org);
         return 0;
@@ -521,7 +522,7 @@ static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs,
                         goto failed_inode;
                 }
  
-               err = nilfs_recovery_copy_block(nilfs, rb, page);
+               err = nilfs_recovery_copy_block(nilfs, rb, pos, page);
                 if (unlikely(err))
                         goto failed_page;
  
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c

index 2590a0860eab022ba68a18b9db8fe181faab5069..2bfb08052d399972dee9fd49583b77b95104ac83 100644 (file)
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -1703,7 +1703,6 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci)
  
                 list_for_each_entry(bh, &segbuf->sb_payload_buffers,
                                     b_assoc_buffers) {
-                       set_buffer_async_write(bh);
                         if (bh == segbuf->sb_super_root) {
                                 if (bh->b_folio != bd_folio) {
                                         folio_lock(bd_folio);
@@ -1714,6 +1713,7 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci)
                                 }
                                 break;
                         }
+                       set_buffer_async_write(bh);
                         if (bh->b_folio != fs_folio) {
                                 nilfs_begin_folio_io(fs_folio);
                                 fs_folio = bh->b_folio;
@@ -1800,7 +1800,6 @@ static void nilfs_abort_logs(struct list_head *logs, int err)
  
                 list_for_each_entry(bh, &segbuf->sb_payload_buffers,
                                     b_assoc_buffers) {
-                       clear_buffer_async_write(bh);
                         if (bh == segbuf->sb_super_root) {
                                 clear_buffer_uptodate(bh);
                                 if (bh->b_folio != bd_folio) {
@@ -1809,6 +1808,7 @@ static void nilfs_abort_logs(struct list_head *logs, int err)
                                 }
                                 break;
                         }
+                       clear_buffer_async_write(bh);
                         if (bh->b_folio != fs_folio) {
                                 nilfs_end_folio_io(fs_folio, err);
                                 fs_folio = bh->b_folio;
@@ -1896,8 +1896,9 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
                                  BIT(BH_Delay) | BIT(BH_NILFS_Volatile) |
                                  BIT(BH_NILFS_Redirected));
  
-                       set_mask_bits(&bh->b_state, clear_bits, set_bits);
                         if (bh == segbuf->sb_super_root) {
+                               set_buffer_uptodate(bh);
+                               clear_buffer_dirty(bh);
                                 if (bh->b_folio != bd_folio) {
                                         folio_end_writeback(bd_folio);
                                         bd_folio = bh->b_folio;
@@ -1905,6 +1906,7 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
                                 update_sr = true;
                                 break;
                         }
+                       set_mask_bits(&bh->b_state, clear_bits, set_bits);
                         if (bh->b_folio != fs_folio) {
                                 nilfs_end_folio_io(fs_folio, 0);
                                 fs_folio = bh->b_folio;
diff --git a/fs/ntfs3/attrib.c b/fs/ntfs3/attrib.c

index 63f70259edc0d44d6a52c946c9a90663fe16722a..7aadf5010999455e4d16e20581e2334034451e57 100644 (file)
--- a/fs/ntfs3/attrib.c
+++ b/fs/ntfs3/attrib.c
@@ -886,7 +886,7 @@ int attr_data_get_block(struct ntfs_inode *ni, CLST vcn, CLST clen, CLST *lcn,
         struct runs_tree *run = &ni->file.run;
         struct ntfs_sb_info *sbi;
         u8 cluster_bits;
-       struct ATTRIB *attr = NULL, *attr_b;
+       struct ATTRIB *attr, *attr_b;
         struct ATTR_LIST_ENTRY *le, *le_b;
         struct mft_inode *mi, *mi_b;
         CLST hint, svcn, to_alloc, evcn1, next_svcn, asize, end, vcn0, alen;
@@ -904,12 +904,8 @@ int attr_data_get_block(struct ntfs_inode *ni, CLST vcn, CLST clen, CLST *lcn,
                 *len = 0;
         up_read(&ni->file.run_lock);
  
-       if (*len) {
-               if (*lcn != SPARSE_LCN || !new)
-                       return 0; /* Fast normal way without allocation. */
-               else if (clen > *len)
-                       clen = *len;
-       }
+       if (*len && (*lcn != SPARSE_LCN || !new))
+               return 0; /* Fast normal way without allocation. */
  
         /* No cluster in cache or we need to allocate cluster in hole. */
         sbi = ni->mi.sbi;
@@ -918,6 +914,17 @@ int attr_data_get_block(struct ntfs_inode *ni, CLST vcn, CLST clen, CLST *lcn,
         ni_lock(ni);
         down_write(&ni->file.run_lock);
  
+       /* Repeat the code above (under write lock). */
+       if (!run_lookup_entry(run, vcn, lcn, len, NULL))
+               *len = 0;
+
+       if (*len) {
+               if (*lcn != SPARSE_LCN || !new)
+                       goto out; /* normal way without allocation. */
+               if (clen > *len)
+                       clen = *len;
+       }
+
         le_b = NULL;
         attr_b = ni_find_attr(ni, NULL, &le_b, ATTR_DATA, NULL, 0, NULL, &mi_b);
         if (!attr_b) {
@@ -1736,8 +1743,10 @@ repack:
                         le_b = NULL;
                         attr_b = ni_find_attr(ni, NULL, &le_b, ATTR_DATA, NULL,
                                               0, NULL, &mi_b);
-                       if (!attr_b)
-                               return -ENOENT;
+                       if (!attr_b) {
+                               err = -ENOENT;
+                               goto out;
+                       }
  
                         attr = attr_b;
                         le = le_b;
@@ -1818,13 +1827,15 @@ ins_ext:
  ok:
         run_truncate_around(run, vcn);
  out:
-       if (new_valid > data_size)
-               new_valid = data_size;
+       if (attr_b) {
+               if (new_valid > data_size)
+                       new_valid = data_size;
  
-       valid_size = le64_to_cpu(attr_b->nres.valid_size);
-       if (new_valid != valid_size) {
-               attr_b->nres.valid_size = cpu_to_le64(valid_size);
-               mi_b->dirty = true;
+               valid_size = le64_to_cpu(attr_b->nres.valid_size);
+               if (new_valid != valid_size) {
+                       attr_b->nres.valid_size = cpu_to_le64(valid_size);
+                       mi_b->dirty = true;
+               }
         }
  
         return err;
@@ -2073,7 +2084,7 @@ next_attr:
  
         /* Update inode size. */
         ni->i_valid = valid_size;
-       ni->vfs_inode.i_size = data_size;
+       i_size_write(&ni->vfs_inode, data_size);
         inode_set_bytes(&ni->vfs_inode, total_size);
         ni->ni_flags |= NI_FLAG_UPDATE_PARENT;
         mark_inode_dirty(&ni->vfs_inode);
@@ -2488,7 +2499,7 @@ int attr_insert_range(struct ntfs_inode *ni, u64 vbo, u64 bytes)
         mi_b->dirty = true;
  
  done:
-       ni->vfs_inode.i_size += bytes;
+       i_size_write(&ni->vfs_inode, ni->vfs_inode.i_size + bytes);
         ni->ni_flags |= NI_FLAG_UPDATE_PARENT;
         mark_inode_dirty(&ni->vfs_inode);
  
diff --git a/fs/ntfs3/attrlist.c b/fs/ntfs3/attrlist.c

index 7c01735d1219d858b46809147fa06dbcc6cafe4c..9f4bd8d260901ca4fd4db97aea3687459e4ebc1a 100644 (file)
--- a/fs/ntfs3/attrlist.c
+++ b/fs/ntfs3/attrlist.c
@@ -29,7 +29,7 @@ static inline bool al_is_valid_le(const struct ntfs_inode *ni,
  void al_destroy(struct ntfs_inode *ni)
  {
         run_close(&ni->attr_list.run);
-       kfree(ni->attr_list.le);
+       kvfree(ni->attr_list.le);
         ni->attr_list.le = NULL;
         ni->attr_list.size = 0;
         ni->attr_list.dirty = false;
@@ -127,12 +127,13 @@ struct ATTR_LIST_ENTRY *al_enumerate(struct ntfs_inode *ni,
  {
         size_t off;
         u16 sz;
+       const unsigned le_min_size = le_size(0);
  
         if (!le) {
                 le = ni->attr_list.le;
         } else {
                 sz = le16_to_cpu(le->size);
-               if (sz < sizeof(struct ATTR_LIST_ENTRY)) {
+               if (sz < le_min_size) {
                         /* Impossible 'cause we should not return such le. */
                         return NULL;
                 }
@@ -141,7 +142,7 @@ struct ATTR_LIST_ENTRY *al_enumerate(struct ntfs_inode *ni,
  
         /* Check boundary. */
         off = PtrOffset(ni->attr_list.le, le);
-       if (off + sizeof(struct ATTR_LIST_ENTRY) > ni->attr_list.size) {
+       if (off + le_min_size > ni->attr_list.size) {
                 /* The regular end of list. */
                 return NULL;
         }
@@ -149,8 +150,7 @@ struct ATTR_LIST_ENTRY *al_enumerate(struct ntfs_inode *ni,
         sz = le16_to_cpu(le->size);
  
         /* Check le for errors. */
-       if (sz < sizeof(struct ATTR_LIST_ENTRY) ||
-           off + sz > ni->attr_list.size ||
+       if (sz < le_min_size || off + sz > ni->attr_list.size ||
             sz < le->name_off + le->name_len * sizeof(short)) {
                 return NULL;
         }
@@ -318,7 +318,7 @@ int al_add_le(struct ntfs_inode *ni, enum ATTR_TYPE type, const __le16 *name,
                 memcpy(ptr, al->le, off);
                 memcpy(Add2Ptr(ptr, off + sz), le, old_size - off);
                 le = Add2Ptr(ptr, off);
-               kfree(al->le);
+               kvfree(al->le);
                 al->le = ptr;
         } else {
                 memmove(Add2Ptr(le, sz), le, old_size - off);
diff --git a/fs/ntfs3/bitmap.c b/fs/ntfs3/bitmap.c

index 63f14a0232f6a0e0672c5373748bf77b72931bf2..845f9b22deef0f42cabfb4156d4d8fe05c31fce9 100644 (file)
--- a/fs/ntfs3/bitmap.c
+++ b/fs/ntfs3/bitmap.c
@@ -124,7 +124,7 @@ void wnd_close(struct wnd_bitmap *wnd)
  {
         struct rb_node *node, *next;
  
-       kfree(wnd->free_bits);
+       kvfree(wnd->free_bits);
         wnd->free_bits = NULL;
         run_close(&wnd->run);
  
@@ -1360,7 +1360,7 @@ int wnd_extend(struct wnd_bitmap *wnd, size_t new_bits)
                 memcpy(new_free, wnd->free_bits, wnd->nwnd * sizeof(short));
                 memset(new_free + wnd->nwnd, 0,
                        (new_wnd - wnd->nwnd) * sizeof(short));
-               kfree(wnd->free_bits);
+               kvfree(wnd->free_bits);
                 wnd->free_bits = new_free;
         }
  
diff --git a/fs/ntfs3/dir.c b/fs/ntfs3/dir.c

index ec0566b322d5d0b4b36533a3a218671bb0ff7b02..5cf3d9decf646b1935517e8b564d807626e60e0f 100644 (file)
--- a/fs/ntfs3/dir.c
+++ b/fs/ntfs3/dir.c
@@ -309,11 +309,31 @@ static inline int ntfs_filldir(struct ntfs_sb_info *sbi, struct ntfs_inode *ni,
                 return 0;
         }
  
-       /* NTFS: symlinks are "dir + reparse" or "file + reparse" */
-       if (fname->dup.fa & FILE_ATTRIBUTE_REPARSE_POINT)
-               dt_type = DT_LNK;
-       else
-               dt_type = (fname->dup.fa & FILE_ATTRIBUTE_DIRECTORY) ? DT_DIR : DT_REG;
+       /*
+        * NTFS: symlinks are "dir + reparse" or "file + reparse"
+        * Unfortunately reparse attribute is used for many purposes (several dozens).
+        * It is not possible here to know is this name symlink or not.
+        * To get exactly the type of name we should to open inode (read mft).
+        * getattr for opened file (fstat) correctly returns symlink.
+        */
+       dt_type = (fname->dup.fa & FILE_ATTRIBUTE_DIRECTORY) ? DT_DIR : DT_REG;
+
+       /*
+        * It is not reliable to detect the type of name using duplicated information
+        * stored in parent directory.
+        * The only correct way to get the type of name - read MFT record and find ATTR_STD.
+        * The code below is not good idea.
+        * It does additional locks/reads just to get the type of name.
+        * Should we use additional mount option to enable branch below?
+        */
+       if ((fname->dup.fa & FILE_ATTRIBUTE_REPARSE_POINT) &&
+           ino != ni->mi.rno) {
+               struct inode *inode = ntfs_iget5(sbi->sb, &e->ref, NULL);
+               if (!IS_ERR_OR_NULL(inode)) {
+                       dt_type = fs_umode_to_dtype(inode->i_mode);
+                       iput(inode);
+               }
+       }
  
         return !dir_emit(ctx, (s8 *)name, name_len, ino, dt_type);
  }
@@ -495,11 +515,9 @@ static int ntfs_dir_count(struct inode *dir, bool *is_empty, size_t *dirs,
         struct INDEX_HDR *hdr;
         const struct ATTR_FILE_NAME *fname;
         u32 e_size, off, end;
-       u64 vbo = 0;
         size_t drs = 0, fles = 0, bit = 0;
-       loff_t i_size = ni->vfs_inode.i_size;
         struct indx_node *node = NULL;
-       u8 index_bits = ni->dir.index_bits;
+       size_t max_indx = i_size_read(&ni->vfs_inode) >> ni->dir.index_bits;
  
         if (is_empty)
                 *is_empty = true;
@@ -518,8 +536,10 @@ static int ntfs_dir_count(struct inode *dir, bool *is_empty, size_t *dirs,
                         e = Add2Ptr(hdr, off);
                         e_size = le16_to_cpu(e->size);
                         if (e_size < sizeof(struct NTFS_DE) ||
-                           off + e_size > end)
+                           off + e_size > end) {
+                               /* Looks like corruption. */
                                 break;
+                       }
  
                         if (de_is_last(e))
                                 break;
@@ -543,7 +563,7 @@ static int ntfs_dir_count(struct inode *dir, bool *is_empty, size_t *dirs,
                                 fles += 1;
                 }
  
-               if (vbo >= i_size)
+               if (bit >= max_indx)
                         goto out;
  
                 err = indx_used_bit(&ni->dir, ni, &bit);
@@ -553,8 +573,7 @@ static int ntfs_dir_count(struct inode *dir, bool *is_empty, size_t *dirs,
                 if (bit == MINUS_ONE_T)
                         goto out;
  
-               vbo = (u64)bit << index_bits;
-               if (vbo >= i_size)
+               if (bit >= max_indx)
                         goto out;
  
                 err = indx_read(&ni->dir, ni, bit << ni->dir.idx2vbn_bits,
@@ -564,7 +583,6 @@ static int ntfs_dir_count(struct inode *dir, bool *is_empty, size_t *dirs,
  
                 hdr = &node->index->ihdr;
                 bit += 1;
-               vbo = (u64)bit << ni->dir.idx2vbn_bits;
         }
  
  out:
@@ -593,5 +611,9 @@ const struct file_operations ntfs_dir_operations = {
         .iterate_shared = ntfs_readdir,
         .fsync          = generic_file_fsync,
         .open           = ntfs_file_open,
+       .unlocked_ioctl = ntfs_ioctl,
+#ifdef CONFIG_COMPAT
+       .compat_ioctl   = ntfs_compat_ioctl,
+#endif
  };
  // clang-format on
diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c

index a5a30a24ce5dfa70d670826d1b5ac16a668d06be..5418662c80d8878afe72a8b8e8ffc43cc834b176 100644 (file)
--- a/fs/ntfs3/file.c
+++ b/fs/ntfs3/file.c
@@ -48,7 +48,7 @@ static int ntfs_ioctl_fitrim(struct ntfs_sb_info *sbi, unsigned long arg)
         return 0;
  }
  
-static long ntfs_ioctl(struct file *filp, u32 cmd, unsigned long arg)
+long ntfs_ioctl(struct file *filp, u32 cmd, unsigned long arg)
  {
         struct inode *inode = file_inode(filp);
         struct ntfs_sb_info *sbi = inode->i_sb->s_fs_info;
@@ -61,7 +61,7 @@ static long ntfs_ioctl(struct file *filp, u32 cmd, unsigned long arg)
  }
  
  #ifdef CONFIG_COMPAT
-static long ntfs_compat_ioctl(struct file *filp, u32 cmd, unsigned long arg)
+long ntfs_compat_ioctl(struct file *filp, u32 cmd, unsigned long arg)
  
  {
         return ntfs_ioctl(filp, cmd, (unsigned long)compat_ptr(arg));
@@ -188,6 +188,7 @@ static int ntfs_zero_range(struct inode *inode, u64 vbo, u64 vbo_to)
         u32 bh_next, bh_off, to;
         sector_t iblock;
         struct folio *folio;
+       bool dirty = false;
  
         for (; idx < idx_end; idx += 1, from = 0) {
                 page_off = (loff_t)idx << PAGE_SHIFT;
@@ -223,29 +224,27 @@ static int ntfs_zero_range(struct inode *inode, u64 vbo, u64 vbo_to)
                         /* Ok, it's mapped. Make sure it's up-to-date. */
                         if (folio_test_uptodate(folio))
                                 set_buffer_uptodate(bh);
-
-                       if (!buffer_uptodate(bh)) {
-                               err = bh_read(bh, 0);
-                               if (err < 0) {
-                                       folio_unlock(folio);
-                                       folio_put(folio);
-                                       goto out;
-                               }
+                       else if (bh_read(bh, 0) < 0) {
+                               err = -EIO;
+                               folio_unlock(folio);
+                               folio_put(folio);
+                               goto out;
                         }
  
                         mark_buffer_dirty(bh);
-
                 } while (bh_off = bh_next, iblock += 1,
                          head != (bh = bh->b_this_page));
  
                 folio_zero_segment(folio, from, to);
+               dirty = true;
  
                 folio_unlock(folio);
                 folio_put(folio);
                 cond_resched();
         }
  out:
-       mark_inode_dirty(inode);
+       if (dirty)
+               mark_inode_dirty(inode);
         return err;
  }
  
@@ -261,6 +260,9 @@ static int ntfs_file_mmap(struct file *file, struct vm_area_struct *vma)
         bool rw = vma->vm_flags & VM_WRITE;
         int err;
  
+       if (unlikely(ntfs3_forced_shutdown(inode->i_sb)))
+               return -EIO;
+
         if (is_encrypted(ni)) {
                 ntfs_inode_warn(inode, "mmap encrypted not supported");
                 return -EOPNOTSUPP;
@@ -499,10 +501,14 @@ static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len)
                 ni_lock(ni);
                 err = attr_punch_hole(ni, vbo, len, &frame_size);
                 ni_unlock(ni);
+               if (!err)
+                       goto ok;
+
                 if (err != E_NTFS_NOTALIGNED)
                         goto out;
  
                 /* Process not aligned punch. */
+               err = 0;
                 mask = frame_size - 1;
                 vbo_a = (vbo + mask) & ~mask;
                 end_a = end & ~mask;
@@ -525,6 +531,8 @@ static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len)
                         ni_lock(ni);
                         err = attr_punch_hole(ni, vbo_a, end_a - vbo_a, NULL);
                         ni_unlock(ni);
+                       if (err)
+                               goto out;
                 }
         } else if (mode & FALLOC_FL_COLLAPSE_RANGE) {
                 /*
@@ -564,6 +572,8 @@ static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len)
                 ni_lock(ni);
                 err = attr_insert_range(ni, vbo, len);
                 ni_unlock(ni);
+               if (err)
+                       goto out;
         } else {
                 /* Check new size. */
                 u8 cluster_bits = sbi->cluster_bits;
@@ -633,11 +643,18 @@ static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len)
                                             &ni->file.run, i_size, &ni->i_valid,
                                             true, NULL);
                         ni_unlock(ni);
+                       if (err)
+                               goto out;
                 } else if (new_size > i_size) {
-                       inode->i_size = new_size;
+                       i_size_write(inode, new_size);
                 }
         }
  
+ok:
+       err = file_modified(file);
+       if (err)
+               goto out;
+
  out:
         if (map_locked)
                 filemap_invalidate_unlock(mapping);
@@ -663,6 +680,9 @@ int ntfs3_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
         umode_t mode = inode->i_mode;
         int err;
  
+       if (unlikely(ntfs3_forced_shutdown(inode->i_sb)))
+               return -EIO;
+
         err = setattr_prepare(idmap, dentry, attr);
         if (err)
                 goto out;
@@ -676,7 +696,7 @@ int ntfs3_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
                         goto out;
                 }
                 inode_dio_wait(inode);
-               oldsize = inode->i_size;
+               oldsize = i_size_read(inode);
                 newsize = attr->ia_size;
  
                 if (newsize <= oldsize)
@@ -688,7 +708,7 @@ int ntfs3_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
                         goto out;
  
                 ni->ni_flags |= NI_FLAG_UPDATE_PARENT;
-               inode->i_size = newsize;
+               i_size_write(inode, newsize);
         }
  
         setattr_copy(idmap, inode, attr);
@@ -718,6 +738,9 @@ static ssize_t ntfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
         struct inode *inode = file->f_mapping->host;
         struct ntfs_inode *ni = ntfs_i(inode);
  
+       if (unlikely(ntfs3_forced_shutdown(inode->i_sb)))
+               return -EIO;
+
         if (is_encrypted(ni)) {
                 ntfs_inode_warn(inode, "encrypted i/o not supported");
                 return -EOPNOTSUPP;
@@ -752,6 +775,9 @@ static ssize_t ntfs_file_splice_read(struct file *in, loff_t *ppos,
         struct inode *inode = in->f_mapping->host;
         struct ntfs_inode *ni = ntfs_i(inode);
  
+       if (unlikely(ntfs3_forced_shutdown(inode->i_sb)))
+               return -EIO;
+
         if (is_encrypted(ni)) {
                 ntfs_inode_warn(inode, "encrypted i/o not supported");
                 return -EOPNOTSUPP;
@@ -821,7 +847,7 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from)
         size_t count = iov_iter_count(from);
         loff_t pos = iocb->ki_pos;
         struct inode *inode = file_inode(file);
-       loff_t i_size = inode->i_size;
+       loff_t i_size = i_size_read(inode);
         struct address_space *mapping = inode->i_mapping;
         struct ntfs_inode *ni = ntfs_i(inode);
         u64 valid = ni->i_valid;
@@ -1028,6 +1054,8 @@ out:
         iocb->ki_pos += written;
         if (iocb->ki_pos > ni->i_valid)
                 ni->i_valid = iocb->ki_pos;
+       if (iocb->ki_pos > i_size)
+               i_size_write(inode, iocb->ki_pos);
  
         return written;
  }
@@ -1041,8 +1069,12 @@ static ssize_t ntfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
         struct address_space *mapping = file->f_mapping;
         struct inode *inode = mapping->host;
         ssize_t ret;
+       int err;
         struct ntfs_inode *ni = ntfs_i(inode);
  
+       if (unlikely(ntfs3_forced_shutdown(inode->i_sb)))
+               return -EIO;
+
         if (is_encrypted(ni)) {
                 ntfs_inode_warn(inode, "encrypted i/o not supported");
                 return -EOPNOTSUPP;
@@ -1068,6 +1100,12 @@ static ssize_t ntfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
         if (ret <= 0)
                 goto out;
  
+       err = file_modified(iocb->ki_filp);
+       if (err) {
+               ret = err;
+               goto out;
+       }
+
         if (WARN_ON(ni->ni_flags & NI_FLAG_COMPRESSED_MASK)) {
                 /* Should never be here, see ntfs_file_open(). */
                 ret = -EOPNOTSUPP;
@@ -1097,6 +1135,9 @@ int ntfs_file_open(struct inode *inode, struct file *file)
  {
         struct ntfs_inode *ni = ntfs_i(inode);
  
+       if (unlikely(ntfs3_forced_shutdown(inode->i_sb)))
+               return -EIO;
+
         if (unlikely((is_compressed(ni) || is_encrypted(ni)) &&
                      (file->f_flags & O_DIRECT))) {
                 return -EOPNOTSUPP;
@@ -1138,7 +1179,8 @@ static int ntfs_file_release(struct inode *inode, struct file *file)
                 down_write(&ni->file.run_lock);
  
                 err = attr_set_size(ni, ATTR_DATA, NULL, 0, &ni->file.run,
-                                   inode->i_size, &ni->i_valid, false, NULL);
+                                   i_size_read(inode), &ni->i_valid, false,
+                                   NULL);
  
                 up_write(&ni->file.run_lock);
                 ni_unlock(ni);
diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c

index 3df2d9e34b9144f4b039ccc6197c9aa249b7ac64..7f27382e0ce25bcb2c660fa799cf8295a9cf486b 100644 (file)
--- a/fs/ntfs3/frecord.c
+++ b/fs/ntfs3/frecord.c
@@ -778,7 +778,7 @@ static int ni_try_remove_attr_list(struct ntfs_inode *ni)
         run_deallocate(sbi, &ni->attr_list.run, true);
         run_close(&ni->attr_list.run);
         ni->attr_list.size = 0;
-       kfree(ni->attr_list.le);
+       kvfree(ni->attr_list.le);
         ni->attr_list.le = NULL;
         ni->attr_list.dirty = false;
  
@@ -927,7 +927,7 @@ int ni_create_attr_list(struct ntfs_inode *ni)
         return 0;
  
  out:
-       kfree(ni->attr_list.le);
+       kvfree(ni->attr_list.le);
         ni->attr_list.le = NULL;
         ni->attr_list.size = 0;
         return err;
@@ -2099,7 +2099,7 @@ int ni_readpage_cmpr(struct ntfs_inode *ni, struct page *page)
         gfp_t gfp_mask;
         struct page *pg;
  
-       if (vbo >= ni->vfs_inode.i_size) {
+       if (vbo >= i_size_read(&ni->vfs_inode)) {
                 SetPageUptodate(page);
                 err = 0;
                 goto out;
@@ -2173,7 +2173,7 @@ int ni_decompress_file(struct ntfs_inode *ni)
  {
         struct ntfs_sb_info *sbi = ni->mi.sbi;
         struct inode *inode = &ni->vfs_inode;
-       loff_t i_size = inode->i_size;
+       loff_t i_size = i_size_read(inode);
         struct address_space *mapping = inode->i_mapping;
         gfp_t gfp_mask = mapping_gfp_mask(mapping);
         struct page **pages = NULL;
@@ -2508,6 +2508,7 @@ int ni_read_frame(struct ntfs_inode *ni, u64 frame_vbo, struct page **pages,
                 err = -EOPNOTSUPP;
                 goto out1;
  #else
+               loff_t i_size = i_size_read(&ni->vfs_inode);
                 u32 frame_bits = ni_ext_compress_bits(ni);
                 u64 frame64 = frame_vbo >> frame_bits;
                 u64 frames, vbo_data;
@@ -2548,7 +2549,7 @@ int ni_read_frame(struct ntfs_inode *ni, u64 frame_vbo, struct page **pages,
                         }
                 }
  
-               frames = (ni->vfs_inode.i_size - 1) >> frame_bits;
+               frames = (i_size - 1) >> frame_bits;
  
                 err = attr_wof_frame_info(ni, attr, run, frame64, frames,
                                           frame_bits, &ondisk_size, &vbo_data);
@@ -2556,8 +2557,7 @@ int ni_read_frame(struct ntfs_inode *ni, u64 frame_vbo, struct page **pages,
                         goto out2;
  
                 if (frame64 == frames) {
-                       unc_size = 1 + ((ni->vfs_inode.i_size - 1) &
-                                       (frame_size - 1));
+                       unc_size = 1 + ((i_size - 1) & (frame_size - 1));
                         ondisk_size = attr_size(attr) - vbo_data;
                 } else {
                         unc_size = frame_size;
@@ -3259,6 +3259,9 @@ int ni_write_inode(struct inode *inode, int sync, const char *hint)
         if (is_bad_inode(inode) || sb_rdonly(sb))
                 return 0;
  
+       if (unlikely(ntfs3_forced_shutdown(sb)))
+               return -EIO;
+
         if (!ni_trylock(ni)) {
                 /* 'ni' is under modification, skip for now. */
                 mark_inode_dirty_sync(inode);
@@ -3288,7 +3291,7 @@ int ni_write_inode(struct inode *inode, int sync, const char *hint)
                         modified = true;
                 }
  
-               ts = inode_get_mtime(inode);
+               ts = inode_get_ctime(inode);
                 dup.c_time = kernel2nt(&ts);
                 if (std->c_time != dup.c_time) {
                         std->c_time = dup.c_time;
diff --git a/fs/ntfs3/fslog.c b/fs/ntfs3/fslog.c

index 98ccb66508583138ed7f5c273b2f4450be88159b..855519713bf79074ed336ca7094cca5d5cdbc009 100644 (file)
--- a/fs/ntfs3/fslog.c
+++ b/fs/ntfs3/fslog.c
@@ -465,7 +465,7 @@ static inline bool is_rst_area_valid(const struct RESTART_HDR *rhdr)
  {
         const struct RESTART_AREA *ra;
         u16 cl, fl, ul;
-       u32 off, l_size, file_dat_bits, file_size_round;
+       u32 off, l_size, seq_bits;
         u16 ro = le16_to_cpu(rhdr->ra_off);
         u32 sys_page = le32_to_cpu(rhdr->sys_page_size);
  
@@ -511,13 +511,15 @@ static inline bool is_rst_area_valid(const struct RESTART_HDR *rhdr)
         /* Make sure the sequence number bits match the log file size. */
         l_size = le64_to_cpu(ra->l_size);
  
-       file_dat_bits = sizeof(u64) * 8 - le32_to_cpu(ra->seq_num_bits);
-       file_size_round = 1u << (file_dat_bits + 3);
-       if (file_size_round != l_size &&
-           (file_size_round < l_size || (file_size_round / 2) > l_size)) {
-               return false;
+       seq_bits = sizeof(u64) * 8 + 3;
+       while (l_size) {
+               l_size >>= 1;
+               seq_bits -= 1;
         }
  
+       if (seq_bits != ra->seq_num_bits)
+               return false;
+
         /* The log page data offset and record header length must be quad-aligned. */
         if (!IS_ALIGNED(le16_to_cpu(ra->data_off), 8) ||
             !IS_ALIGNED(le16_to_cpu(ra->rec_hdr_len), 8))
@@ -974,6 +976,16 @@ skip_looking:
         return e;
  }
  
+struct restart_info {
+       u64 last_lsn;
+       struct RESTART_HDR *r_page;
+       u32 vbo;
+       bool chkdsk_was_run;
+       bool valid_page;
+       bool initialized;
+       bool restart;
+};
+
  #define RESTART_SINGLE_PAGE_IO cpu_to_le16(0x0001)
  
  #define NTFSLOG_WRAPPED 0x00000001
@@ -987,6 +999,7 @@ struct ntfs_log {
         struct ntfs_inode *ni;
  
         u32 l_size;
+       u32 orig_file_size;
         u32 sys_page_size;
         u32 sys_page_mask;
         u32 page_size;
@@ -1040,6 +1053,8 @@ struct ntfs_log {
  
         struct CLIENT_ID client_id;
         u32 client_undo_commit;
+
+       struct restart_info rst_info, rst_info2;
  };
  
  static inline u32 lsn_to_vbo(struct ntfs_log *log, const u64 lsn)
@@ -1105,16 +1120,6 @@ static inline bool verify_client_lsn(struct ntfs_log *log,
                lsn <= le64_to_cpu(log->ra->current_lsn) && lsn;
  }
  
-struct restart_info {
-       u64 last_lsn;
-       struct RESTART_HDR *r_page;
-       u32 vbo;
-       bool chkdsk_was_run;
-       bool valid_page;
-       bool initialized;
-       bool restart;
-};
-
  static int read_log_page(struct ntfs_log *log, u32 vbo,
                          struct RECORD_PAGE_HDR **buffer, bool *usa_error)
  {
@@ -1176,7 +1181,7 @@ out:
   * restart page header. It will stop the first time we find a
   * valid page header.
   */
-static int log_read_rst(struct ntfs_log *log, u32 l_size, bool first,
+static int log_read_rst(struct ntfs_log *log, bool first,
                         struct restart_info *info)
  {
         u32 skip, vbo;
@@ -1192,7 +1197,7 @@ static int log_read_rst(struct ntfs_log *log, u32 l_size, bool first,
         }
  
         /* Loop continuously until we succeed. */
-       for (; vbo < l_size; vbo = 2 * vbo + skip, skip = 0) {
+       for (; vbo < log->l_size; vbo = 2 * vbo + skip, skip = 0) {
                 bool usa_error;
                 bool brst, bchk;
                 struct RESTART_AREA *ra;
@@ -1285,22 +1290,17 @@ check_result:
  /*
   * Ilog_init_pg_hdr - Init @log from restart page header.
   */
-static void log_init_pg_hdr(struct ntfs_log *log, u32 sys_page_size,
-                           u32 page_size, u16 major_ver, u16 minor_ver)
+static void log_init_pg_hdr(struct ntfs_log *log, u16 major_ver, u16 minor_ver)
  {
-       log->sys_page_size = sys_page_size;
-       log->sys_page_mask = sys_page_size - 1;
-       log->page_size = page_size;
-       log->page_mask = page_size - 1;
-       log->page_bits = blksize_bits(page_size);
+       log->sys_page_size = log->page_size;
+       log->sys_page_mask = log->page_mask;
  
         log->clst_per_page = log->page_size >> log->ni->mi.sbi->cluster_bits;
         if (!log->clst_per_page)
                 log->clst_per_page = 1;
  
-       log->first_page = major_ver >= 2 ?
-                                 0x22 * page_size :
-                                 ((sys_page_size << 1) + (page_size << 1));
+       log->first_page = major_ver >= 2 ? 0x22 * log->page_size :
+                                          4 * log->page_size;
         log->major_ver = major_ver;
         log->minor_ver = minor_ver;
  }
@@ -1308,12 +1308,11 @@ static void log_init_pg_hdr(struct ntfs_log *log, u32 sys_page_size,
  /*
   * log_create - Init @log in cases when we don't have a restart area to use.
   */
-static void log_create(struct ntfs_log *log, u32 l_size, const u64 last_lsn,
+static void log_create(struct ntfs_log *log, const u64 last_lsn,
                        u32 open_log_count, bool wrapped, bool use_multi_page)
  {
-       log->l_size = l_size;
         /* All file offsets must be quadword aligned. */
-       log->file_data_bits = blksize_bits(l_size) - 3;
+       log->file_data_bits = blksize_bits(log->l_size) - 3;
         log->seq_num_mask = (8 << log->file_data_bits) - 1;
         log->seq_num_bits = sizeof(u64) * 8 - log->file_data_bits;
         log->seq_num = (last_lsn >> log->file_data_bits) + 2;
@@ -3720,10 +3719,8 @@ int log_replay(struct ntfs_inode *ni, bool *initialized)
         struct ntfs_sb_info *sbi = ni->mi.sbi;
         struct ntfs_log *log;
  
-       struct restart_info rst_info, rst_info2;
-       u64 rec_lsn, ra_lsn, checkpt_lsn = 0, rlsn = 0;
+       u64 rec_lsn, checkpt_lsn = 0, rlsn = 0;
         struct ATTR_NAME_ENTRY *attr_names = NULL;
-       struct ATTR_NAME_ENTRY *ane;
         struct RESTART_TABLE *dptbl = NULL;
         struct RESTART_TABLE *trtbl = NULL;
         const struct RESTART_TABLE *rt;
@@ -3741,9 +3738,7 @@ int log_replay(struct ntfs_inode *ni, bool *initialized)
         struct TRANSACTION_ENTRY *tr;
         struct DIR_PAGE_ENTRY *dp;
         u32 i, bytes_per_attr_entry;
-       u32 l_size = ni->vfs_inode.i_size;
-       u32 orig_file_size = l_size;
-       u32 page_size, vbo, tail, off, dlen;
+       u32 vbo, tail, off, dlen;
         u32 saved_len, rec_len, transact_id;
         bool use_second_page;
         struct RESTART_AREA *ra2, *ra = NULL;
@@ -3758,52 +3753,50 @@ int log_replay(struct ntfs_inode *ni, bool *initialized)
         u16 t16;
         u32 t32;
  
-       /* Get the size of page. NOTE: To replay we can use default page. */
-#if PAGE_SIZE >= DefaultLogPageSize && PAGE_SIZE <= DefaultLogPageSize * 2
-       page_size = norm_file_page(PAGE_SIZE, &l_size, true);
-#else
-       page_size = norm_file_page(PAGE_SIZE, &l_size, false);
-#endif
-       if (!page_size)
-               return -EINVAL;
-
         log = kzalloc(sizeof(struct ntfs_log), GFP_NOFS);
         if (!log)
                 return -ENOMEM;
  
         log->ni = ni;
-       log->l_size = l_size;
-       log->one_page_buf = kmalloc(page_size, GFP_NOFS);
+       log->l_size = log->orig_file_size = ni->vfs_inode.i_size;
  
+       /* Get the size of page. NOTE: To replay we can use default page. */
+#if PAGE_SIZE >= DefaultLogPageSize && PAGE_SIZE <= DefaultLogPageSize * 2
+       log->page_size = norm_file_page(PAGE_SIZE, &log->l_size, true);
+#else
+       log->page_size = norm_file_page(PAGE_SIZE, &log->l_size, false);
+#endif
+       if (!log->page_size) {
+               err = -EINVAL;
+               goto out;
+       }
+
+       log->one_page_buf = kmalloc(log->page_size, GFP_NOFS);
         if (!log->one_page_buf) {
                 err = -ENOMEM;
                 goto out;
         }
  
-       log->page_size = page_size;
-       log->page_mask = page_size - 1;
-       log->page_bits = blksize_bits(page_size);
+       log->page_mask = log->page_size - 1;
+       log->page_bits = blksize_bits(log->page_size);
  
         /* Look for a restart area on the disk. */
-       memset(&rst_info, 0, sizeof(struct restart_info));
-       err = log_read_rst(log, l_size, true, &rst_info);
+       err = log_read_rst(log, true, &log->rst_info);
         if (err)
                 goto out;
  
         /* remember 'initialized' */
-       *initialized = rst_info.initialized;
+       *initialized = log->rst_info.initialized;
  
-       if (!rst_info.restart) {
-               if (rst_info.initialized) {
+       if (!log->rst_info.restart) {
+               if (log->rst_info.initialized) {
                         /* No restart area but the file is not initialized. */
                         err = -EINVAL;
                         goto out;
                 }
  
-               log_init_pg_hdr(log, page_size, page_size, 1, 1);
-               log_create(log, l_size, 0, get_random_u32(), false, false);
-
-               log->ra = ra;
+               log_init_pg_hdr(log, 1, 1);
+               log_create(log, 0, get_random_u32(), false, false);
  
                 ra = log_create_ra(log);
                 if (!ra) {
@@ -3820,25 +3813,26 @@ int log_replay(struct ntfs_inode *ni, bool *initialized)
          * If the restart offset above wasn't zero then we won't
          * look for a second restart.
          */
-       if (rst_info.vbo)
+       if (log->rst_info.vbo)
                 goto check_restart_area;
  
-       memset(&rst_info2, 0, sizeof(struct restart_info));
-       err = log_read_rst(log, l_size, false, &rst_info2);
+       err = log_read_rst(log, false, &log->rst_info2);
         if (err)
                 goto out;
  
         /* Determine which restart area to use. */
-       if (!rst_info2.restart || rst_info2.last_lsn <= rst_info.last_lsn)
+       if (!log->rst_info2.restart ||
+           log->rst_info2.last_lsn <= log->rst_info.last_lsn)
                 goto use_first_page;
  
         use_second_page = true;
  
-       if (rst_info.chkdsk_was_run && page_size != rst_info.vbo) {
+       if (log->rst_info.chkdsk_was_run &&
+           log->page_size != log->rst_info.vbo) {
                 struct RECORD_PAGE_HDR *sp = NULL;
                 bool usa_error;
  
-               if (!read_log_page(log, page_size, &sp, &usa_error) &&
+               if (!read_log_page(log, log->page_size, &sp, &usa_error) &&
                     sp->rhdr.sign == NTFS_CHKD_SIGNATURE) {
                         use_second_page = false;
                 }
@@ -3846,52 +3840,43 @@ int log_replay(struct ntfs_inode *ni, bool *initialized)
         }
  
         if (use_second_page) {
-               kfree(rst_info.r_page);
-               memcpy(&rst_info, &rst_info2, sizeof(struct restart_info));
-               rst_info2.r_page = NULL;
+               kfree(log->rst_info.r_page);
+               memcpy(&log->rst_info, &log->rst_info2,
+                      sizeof(struct restart_info));
+               log->rst_info2.r_page = NULL;
         }
  
  use_first_page:
-       kfree(rst_info2.r_page);
+       kfree(log->rst_info2.r_page);
  
  check_restart_area:
         /*
          * If the restart area is at offset 0, we want
          * to write the second restart area first.
          */
-       log->init_ra = !!rst_info.vbo;
+       log->init_ra = !!log->rst_info.vbo;
  
         /* If we have a valid page then grab a pointer to the restart area. */
-       ra2 = rst_info.valid_page ?
-                     Add2Ptr(rst_info.r_page,
-                             le16_to_cpu(rst_info.r_page->ra_off)) :
+       ra2 = log->rst_info.valid_page ?
+                     Add2Ptr(log->rst_info.r_page,
+                             le16_to_cpu(log->rst_info.r_page->ra_off)) :
                       NULL;
  
-       if (rst_info.chkdsk_was_run ||
+       if (log->rst_info.chkdsk_was_run ||
             (ra2 && ra2->client_idx[1] == LFS_NO_CLIENT_LE)) {
                 bool wrapped = false;
                 bool use_multi_page = false;
                 u32 open_log_count;
  
                 /* Do some checks based on whether we have a valid log page. */
-               if (!rst_info.valid_page) {
-                       open_log_count = get_random_u32();
-                       goto init_log_instance;
-               }
-               open_log_count = le32_to_cpu(ra2->open_log_count);
-
-               /*
-                * If the restart page size isn't changing then we want to
-                * check how much work we need to do.
-                */
-               if (page_size != le32_to_cpu(rst_info.r_page->sys_page_size))
-                       goto init_log_instance;
+               open_log_count = log->rst_info.valid_page ?
+                                        le32_to_cpu(ra2->open_log_count) :
+                                        get_random_u32();
  
-init_log_instance:
-               log_init_pg_hdr(log, page_size, page_size, 1, 1);
+               log_init_pg_hdr(log, 1, 1);
  
-               log_create(log, l_size, rst_info.last_lsn, open_log_count,
-                          wrapped, use_multi_page);
+               log_create(log, log->rst_info.last_lsn, open_log_count, wrapped,
+                          use_multi_page);
  
                 ra = log_create_ra(log);
                 if (!ra) {
@@ -3916,28 +3901,27 @@ init_log_instance:
          * use the log file. We must use the system page size instead of the
          * default size if there is not a clean shutdown.
          */
-       t32 = le32_to_cpu(rst_info.r_page->sys_page_size);
-       if (page_size != t32) {
-               l_size = orig_file_size;
-               page_size =
-                       norm_file_page(t32, &l_size, t32 == DefaultLogPageSize);
+       t32 = le32_to_cpu(log->rst_info.r_page->sys_page_size);
+       if (log->page_size != t32) {
+               log->l_size = log->orig_file_size;
+               log->page_size = norm_file_page(t32, &log->l_size,
+                                               t32 == DefaultLogPageSize);
         }
  
-       if (page_size != t32 ||
-           page_size != le32_to_cpu(rst_info.r_page->page_size)) {
+       if (log->page_size != t32 ||
+           log->page_size != le32_to_cpu(log->rst_info.r_page->page_size)) {
                 err = -EINVAL;
                 goto out;
         }
  
         /* If the file size has shrunk then we won't mount it. */
-       if (l_size < le64_to_cpu(ra2->l_size)) {
+       if (log->l_size < le64_to_cpu(ra2->l_size)) {
                 err = -EINVAL;
                 goto out;
         }
  
-       log_init_pg_hdr(log, page_size, page_size,
-                       le16_to_cpu(rst_info.r_page->major_ver),
-                       le16_to_cpu(rst_info.r_page->minor_ver));
+       log_init_pg_hdr(log, le16_to_cpu(log->rst_info.r_page->major_ver),
+                       le16_to_cpu(log->rst_info.r_page->minor_ver));
  
         log->l_size = le64_to_cpu(ra2->l_size);
         log->seq_num_bits = le32_to_cpu(ra2->seq_num_bits);
@@ -3945,7 +3929,7 @@ init_log_instance:
         log->seq_num_mask = (8 << log->file_data_bits) - 1;
         log->last_lsn = le64_to_cpu(ra2->current_lsn);
         log->seq_num = log->last_lsn >> log->file_data_bits;
-       log->ra_off = le16_to_cpu(rst_info.r_page->ra_off);
+       log->ra_off = le16_to_cpu(log->rst_info.r_page->ra_off);
         log->restart_size = log->sys_page_size - log->ra_off;
         log->record_header_len = le16_to_cpu(ra2->rec_hdr_len);
         log->ra_size = le16_to_cpu(ra2->ra_len);
@@ -4045,7 +4029,7 @@ find_oldest:
         log->current_avail = current_log_avail(log);
  
         /* Remember which restart area to write first. */
-       log->init_ra = rst_info.vbo;
+       log->init_ra = log->rst_info.vbo;
  
  process_log:
         /* 1.0, 1.1, 2.0 log->major_ver/minor_ver - short values. */
@@ -4105,7 +4089,7 @@ process_log:
         log->client_id.seq_num = cr->seq_num;
         log->client_id.client_idx = client;
  
-       err = read_rst_area(log, &rst, &ra_lsn);
+       err = read_rst_area(log, &rst, &checkpt_lsn);
         if (err)
                 goto out;
  
@@ -4114,9 +4098,8 @@ process_log:
  
         bytes_per_attr_entry = !rst->major_ver ? 0x2C : 0x28;
  
-       checkpt_lsn = le64_to_cpu(rst->check_point_start);
-       if (!checkpt_lsn)
-               checkpt_lsn = ra_lsn;
+       if (rst->check_point_start)
+               checkpt_lsn = le64_to_cpu(rst->check_point_start);
  
         /* Allocate and Read the Transaction Table. */
         if (!rst->transact_table_len)
@@ -4330,23 +4313,20 @@ check_attr_table:
         lcb = NULL;
  
  check_attribute_names2:
-       if (!rst->attr_names_len)
-               goto trace_attribute_table;
-
-       ane = attr_names;
-       if (!oatbl)
-               goto trace_attribute_table;
-       while (ane->off) {
-               /* TODO: Clear table on exit! */
-               oe = Add2Ptr(oatbl, le16_to_cpu(ane->off));
-               t16 = le16_to_cpu(ane->name_bytes);
-               oe->name_len = t16 / sizeof(short);
-               oe->ptr = ane->name;
-               oe->is_attr_name = 2;
-               ane = Add2Ptr(ane, sizeof(struct ATTR_NAME_ENTRY) + t16);
-       }
-
-trace_attribute_table:
+       if (rst->attr_names_len && oatbl) {
+               struct ATTR_NAME_ENTRY *ane = attr_names;
+               while (ane->off) {
+                       /* TODO: Clear table on exit! */
+                       oe = Add2Ptr(oatbl, le16_to_cpu(ane->off));
+                       t16 = le16_to_cpu(ane->name_bytes);
+                       oe->name_len = t16 / sizeof(short);
+                       oe->ptr = ane->name;
+                       oe->is_attr_name = 2;
+                       ane = Add2Ptr(ane,
+                                     sizeof(struct ATTR_NAME_ENTRY) + t16);
+               }
+       }
+
         /*
          * If the checkpt_lsn is zero, then this is a freshly
          * formatted disk and we have no work to do.
@@ -5189,7 +5169,7 @@ out:
         kfree(oatbl);
         kfree(dptbl);
         kfree(attr_names);
-       kfree(rst_info.r_page);
+       kfree(log->rst_info.r_page);
  
         kfree(ra);
         kfree(log->one_page_buf);
diff --git a/fs/ntfs3/fsntfs.c b/fs/ntfs3/fsntfs.c

index fbfe21dbb42597cdb25a6d809f7b396e37508046..ae2ef5c11868c360f1285feffae734b2974d0d5c 100644 (file)
--- a/fs/ntfs3/fsntfs.c
+++ b/fs/ntfs3/fsntfs.c
@@ -853,7 +853,8 @@ void ntfs_update_mftmirr(struct ntfs_sb_info *sbi, int wait)
         /*
          * sb can be NULL here. In this case sbi->flags should be 0 too.
          */
-       if (!sb || !(sbi->flags & NTFS_FLAGS_MFTMIRR))
+       if (!sb || !(sbi->flags & NTFS_FLAGS_MFTMIRR) ||
+           unlikely(ntfs3_forced_shutdown(sb)))
                 return;
  
         blocksize = sb->s_blocksize;
@@ -1006,6 +1007,30 @@ static inline __le32 security_hash(const void *sd, size_t bytes)
         return cpu_to_le32(hash);
  }
  
+/*
+ * simple wrapper for sb_bread_unmovable.
+ */
+struct buffer_head *ntfs_bread(struct super_block *sb, sector_t block)
+{
+       struct ntfs_sb_info *sbi = sb->s_fs_info;
+       struct buffer_head *bh;
+
+       if (unlikely(block >= sbi->volume.blocks)) {
+               /* prevent generic message "attempt to access beyond end of device" */
+               ntfs_err(sb, "try to read out of volume at offset 0x%llx",
+                        (u64)block << sb->s_blocksize_bits);
+               return NULL;
+       }
+
+       bh = sb_bread_unmovable(sb, block);
+       if (bh)
+               return bh;
+
+       ntfs_err(sb, "failed to read volume at offset 0x%llx",
+                (u64)block << sb->s_blocksize_bits);
+       return NULL;
+}
+
  int ntfs_sb_read(struct super_block *sb, u64 lbo, size_t bytes, void *buffer)
  {
         struct block_device *bdev = sb->s_bdev;
@@ -2128,8 +2153,8 @@ int ntfs_insert_security(struct ntfs_sb_info *sbi,
                         if (le32_to_cpu(d_security->size) == new_sec_size &&
                             d_security->key.hash == hash_key.hash &&
                             !memcmp(d_security + 1, sd, size_sd)) {
-                               *security_id = d_security->key.sec_id;
                                 /* Such security already exists. */
+                               *security_id = d_security->key.sec_id;
                                 err = 0;
                                 goto out;
                         }
diff --git a/fs/ntfs3/index.c b/fs/ntfs3/index.c

index cf92b2433f7a750aeb86383eb7440c730ad7dc95..daabaad63aaf64ae65b8d67bbd40de837bfe3486 100644 (file)
--- a/fs/ntfs3/index.c
+++ b/fs/ntfs3/index.c
@@ -1462,7 +1462,7 @@ static int indx_create_allocate(struct ntfs_index *indx, struct ntfs_inode *ni,
                 goto out2;
  
         if (in->name == I30_NAME) {
-               ni->vfs_inode.i_size = data_size;
+               i_size_write(&ni->vfs_inode, data_size);
                 inode_set_bytes(&ni->vfs_inode, alloc_size);
         }
  
@@ -1544,7 +1544,7 @@ static int indx_add_allocate(struct ntfs_index *indx, struct ntfs_inode *ni,
         }
  
         if (in->name == I30_NAME)
-               ni->vfs_inode.i_size = data_size;
+               i_size_write(&ni->vfs_inode, data_size);
  
         *vbn = bit << indx->idx2vbn_bits;
  
@@ -2090,7 +2090,7 @@ static int indx_shrink(struct ntfs_index *indx, struct ntfs_inode *ni,
                 return err;
  
         if (in->name == I30_NAME)
-               ni->vfs_inode.i_size = new_data;
+               i_size_write(&ni->vfs_inode, new_data);
  
         bpb = bitmap_size(bit);
         if (bpb * 8 == nbits)
@@ -2576,7 +2576,7 @@ int indx_delete_entry(struct ntfs_index *indx, struct ntfs_inode *ni,
                 err = attr_set_size(ni, ATTR_ALLOC, in->name, in->name_len,
                                     &indx->alloc_run, 0, NULL, false, NULL);
                 if (in->name == I30_NAME)
-                       ni->vfs_inode.i_size = 0;
+                       i_size_write(&ni->vfs_inode, 0);
  
                 err = ni_remove_attr(ni, ATTR_ALLOC, in->name, in->name_len,
                                      false, NULL);
diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c

index 5e3d713749185f116e145adea4b196fbd7be72d2..eb7a8c9fba0183f40096d673473be4dffaa7c4c8 100644 (file)
--- a/fs/ntfs3/inode.c
+++ b/fs/ntfs3/inode.c
@@ -345,9 +345,7 @@ next_attr:
                         inode->i_size = le16_to_cpu(rp.SymbolicLinkReparseBuffer
                                                             .PrintNameLength) /
                                         sizeof(u16);
-
                         ni->i_valid = inode->i_size;
-
                         /* Clear directory bit. */
                         if (ni->ni_flags & NI_FLAG_DIR) {
                                 indx_clear(&ni->dir);
@@ -412,7 +410,6 @@ end_enum:
                 goto out;
  
         if (!is_match && name) {
-               /* Reuse rec as buffer for ascii name. */
                 err = -ENOENT;
                 goto out;
         }
@@ -427,6 +424,7 @@ end_enum:
  
         if (names != le16_to_cpu(rec->hard_links)) {
                 /* Correct minor error on the fly. Do not mark inode as dirty. */
+               ntfs_inode_warn(inode, "Correct links count -> %u.", names);
                 rec->hard_links = cpu_to_le16(names);
                 ni->mi.dirty = true;
         }
@@ -653,9 +651,10 @@ static noinline int ntfs_get_block_vbo(struct inode *inode, u64 vbo,
                         off = vbo & (PAGE_SIZE - 1);
                         folio_set_bh(bh, folio, off);
  
-                       err = bh_read(bh, 0);
-                       if (err < 0)
+                       if (bh_read(bh, 0) < 0) {
+                               err = -EIO;
                                 goto out;
+                       }
                         folio_zero_segment(folio, off + voff, off + block_size);
                 }
         }
@@ -853,9 +852,13 @@ static int ntfs_resident_writepage(struct folio *folio,
                                    struct writeback_control *wbc, void *data)
  {
         struct address_space *mapping = data;
-       struct ntfs_inode *ni = ntfs_i(mapping->host);
+       struct inode *inode = mapping->host;
+       struct ntfs_inode *ni = ntfs_i(inode);
         int ret;
  
+       if (unlikely(ntfs3_forced_shutdown(inode->i_sb)))
+               return -EIO;
+
         ni_lock(ni);
         ret = attr_data_write_resident(ni, &folio->page);
         ni_unlock(ni);
@@ -869,7 +872,12 @@ static int ntfs_resident_writepage(struct folio *folio,
  static int ntfs_writepages(struct address_space *mapping,
                            struct writeback_control *wbc)
  {
-       if (is_resident(ntfs_i(mapping->host)))
+       struct inode *inode = mapping->host;
+
+       if (unlikely(ntfs3_forced_shutdown(inode->i_sb)))
+               return -EIO;
+
+       if (is_resident(ntfs_i(inode)))
                 return write_cache_pages(mapping, wbc, ntfs_resident_writepage,
                                          mapping);
         return mpage_writepages(mapping, wbc, ntfs_get_block);
@@ -889,6 +897,9 @@ int ntfs_write_begin(struct file *file, struct address_space *mapping,
         struct inode *inode = mapping->host;
         struct ntfs_inode *ni = ntfs_i(inode);
  
+       if (unlikely(ntfs3_forced_shutdown(inode->i_sb)))
+               return -EIO;
+
         *pagep = NULL;
         if (is_resident(ni)) {
                 struct page *page =
@@ -974,7 +985,7 @@ int ntfs_write_end(struct file *file, struct address_space *mapping, loff_t pos,
                 }
  
                 if (pos + err > inode->i_size) {
-                       inode->i_size = pos + err;
+                       i_size_write(inode, pos + err);
                         dirty = true;
                 }
  
@@ -1306,6 +1317,11 @@ struct inode *ntfs_create_inode(struct mnt_idmap *idmap, struct inode *dir,
                 goto out1;
         }
  
+       if (unlikely(ntfs3_forced_shutdown(sb))) {
+               err = -EIO;
+               goto out2;
+       }
+
         /* Mark rw ntfs as dirty. it will be cleared at umount. */
         ntfs_set_state(sbi, NTFS_DIRTY_DIRTY);
  
diff --git a/fs/ntfs3/namei.c b/fs/ntfs3/namei.c

index ee3093be51701e78d1e02f6f30a7b5a4019831a0..cae41db0aaa7d13e1fb4e0132b79261156a39306 100644 (file)
--- a/fs/ntfs3/namei.c
+++ b/fs/ntfs3/namei.c
@@ -181,6 +181,9 @@ static int ntfs_unlink(struct inode *dir, struct dentry *dentry)
         struct ntfs_inode *ni = ntfs_i(dir);
         int err;
  
+       if (unlikely(ntfs3_forced_shutdown(dir->i_sb)))
+               return -EIO;
+
         ni_lock_dir(ni);
  
         err = ntfs_unlink_inode(dir, dentry);
@@ -199,6 +202,9 @@ static int ntfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
         u32 size = strlen(symname);
         struct inode *inode;
  
+       if (unlikely(ntfs3_forced_shutdown(dir->i_sb)))
+               return -EIO;
+
         inode = ntfs_create_inode(idmap, dir, dentry, NULL, S_IFLNK | 0777, 0,
                                   symname, size, NULL);
  
@@ -227,6 +233,9 @@ static int ntfs_rmdir(struct inode *dir, struct dentry *dentry)
         struct ntfs_inode *ni = ntfs_i(dir);
         int err;
  
+       if (unlikely(ntfs3_forced_shutdown(dir->i_sb)))
+               return -EIO;
+
         ni_lock_dir(ni);
  
         err = ntfs_unlink_inode(dir, dentry);
@@ -264,6 +273,9 @@ static int ntfs_rename(struct mnt_idmap *idmap, struct inode *dir,
                       1024);
         static_assert(PATH_MAX >= 4 * 1024);
  
+       if (unlikely(ntfs3_forced_shutdown(sb)))
+               return -EIO;
+
         if (flags & ~RENAME_NOREPLACE)
                 return -EINVAL;
  
diff --git a/fs/ntfs3/ntfs.h b/fs/ntfs3/ntfs.h

index 86aecbb01a92f282ab621a26df9897b70b65df28..9c7478150a0352d4f46574b76e91ec27ce661cdb 100644 (file)
--- a/fs/ntfs3/ntfs.h
+++ b/fs/ntfs3/ntfs.h
@@ -523,12 +523,10 @@ struct ATTR_LIST_ENTRY {
         __le64 vcn;             // 0x08: Starting VCN of this attribute.
         struct MFT_REF ref;     // 0x10: MFT record number with attribute.
         __le16 id;              // 0x18: struct ATTRIB ID.
-       __le16 name[3];         // 0x1A: Just to align. To get real name can use bNameOffset.
+       __le16 name[];          // 0x1A: To get real name use name_off.
  
  }; // sizeof(0x20)
  
-static_assert(sizeof(struct ATTR_LIST_ENTRY) == 0x20);
-
  static inline u32 le_size(u8 name_len)
  {
         return ALIGN(offsetof(struct ATTR_LIST_ENTRY, name) +
diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h

index f6706143d14bced3c2bfdbac59d4df1a8cb9da5e..79356fd29a14141de34ed006517b153fd9e4872b 100644 (file)
--- a/fs/ntfs3/ntfs_fs.h
+++ b/fs/ntfs3/ntfs_fs.h
@@ -61,6 +61,8 @@ enum utf16_endian;
  
  /* sbi->flags */
  #define NTFS_FLAGS_NODISCARD           0x00000001
+/* ntfs in shutdown state. */
+#define NTFS_FLAGS_SHUTDOWN_BIT                0x00000002  /* == 4*/
  /* Set when LogFile is replaying. */
  #define NTFS_FLAGS_LOG_REPLAYING       0x00000008
  /* Set when we changed first MFT's which copy must be updated in $MftMirr. */
@@ -226,7 +228,7 @@ struct ntfs_sb_info {
         u64 maxbytes; // Maximum size for normal files.
         u64 maxbytes_sparse; // Maximum size for sparse file.
  
-       u32 flags; // See NTFS_FLAGS_XXX.
+       unsigned long flags; // See NTFS_FLAGS_
  
         CLST zone_max; // Maximum MFT zone length in clusters
         CLST bad_clusters; // The count of marked bad clusters.
@@ -473,7 +475,7 @@ bool al_delete_le(struct ntfs_inode *ni, enum ATTR_TYPE type, CLST vcn,
  int al_update(struct ntfs_inode *ni, int sync);
  static inline size_t al_aligned(size_t size)
  {
-       return (size + 1023) & ~(size_t)1023;
+       return size_add(size, 1023) & ~(size_t)1023;
  }
  
  /* Globals from bitfunc.c */
@@ -500,6 +502,8 @@ int ntfs3_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
  int ntfs_file_open(struct inode *inode, struct file *file);
  int ntfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                 __u64 start, __u64 len);
+long ntfs_ioctl(struct file *filp, u32 cmd, unsigned long arg);
+long ntfs_compat_ioctl(struct file *filp, u32 cmd, unsigned long arg);
  extern const struct inode_operations ntfs_special_inode_operations;
  extern const struct inode_operations ntfs_file_inode_operations;
  extern const struct file_operations ntfs_file_operations;
@@ -584,6 +588,7 @@ bool check_index_header(const struct INDEX_HDR *hdr, size_t bytes);
  int log_replay(struct ntfs_inode *ni, bool *initialized);
  
  /* Globals from fsntfs.c */
+struct buffer_head *ntfs_bread(struct super_block *sb, sector_t block);
  bool ntfs_fix_pre_write(struct NTFS_RECORD_HEADER *rhdr, size_t bytes);
  int ntfs_fix_post_read(struct NTFS_RECORD_HEADER *rhdr, size_t bytes,
                        bool simple);
@@ -872,7 +877,7 @@ int ntfs_init_acl(struct mnt_idmap *idmap, struct inode *inode,
  
  int ntfs_acl_chmod(struct mnt_idmap *idmap, struct dentry *dentry);
  ssize_t ntfs_listxattr(struct dentry *dentry, char *buffer, size_t size);
-extern const struct xattr_handler * const ntfs_xattr_handlers[];
+extern const struct xattr_handler *const ntfs_xattr_handlers[];
  
  int ntfs_save_wsl_perm(struct inode *inode, __le16 *ea_size);
  void ntfs_get_wsl_perm(struct inode *inode);
@@ -999,6 +1004,11 @@ static inline struct ntfs_sb_info *ntfs_sb(struct super_block *sb)
         return sb->s_fs_info;
  }
  
+static inline int ntfs3_forced_shutdown(struct super_block *sb)
+{
+       return test_bit(NTFS_FLAGS_SHUTDOWN_BIT, &ntfs_sb(sb)->flags);
+}
+
  /*
   * ntfs_up_cluster - Align up on cluster boundary.
   */
@@ -1025,19 +1035,6 @@ static inline u64 bytes_to_block(const struct super_block *sb, u64 size)
         return (size + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
  }
  
-static inline struct buffer_head *ntfs_bread(struct super_block *sb,
-                                            sector_t block)
-{
-       struct buffer_head *bh = sb_bread(sb, block);
-
-       if (bh)
-               return bh;
-
-       ntfs_err(sb, "failed to read volume at offset 0x%llx",
-                (u64)block << sb->s_blocksize_bits);
-       return NULL;
-}
-
  static inline struct ntfs_inode *ntfs_i(struct inode *inode)
  {
         return container_of(inode, struct ntfs_inode, vfs_inode);
diff --git a/fs/ntfs3/record.c b/fs/ntfs3/record.c

index 53629b1f65e995978cef9b312462447305cb578d..6aa3a9d44df1bdc90f56a947caf94c902590d983 100644 (file)
--- a/fs/ntfs3/record.c
+++ b/fs/ntfs3/record.c
@@ -279,7 +279,7 @@ struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr)
                 if (t16 > asize)
                         return NULL;
  
-               if (t16 + le32_to_cpu(attr->res.data_size) > asize)
+               if (le32_to_cpu(attr->res.data_size) > asize - t16)
                         return NULL;
  
                 t32 = sizeof(short) * attr->name_len;
@@ -535,8 +535,20 @@ bool mi_remove_attr(struct ntfs_inode *ni, struct mft_inode *mi,
                 return false;
  
         if (ni && is_attr_indexed(attr)) {
-               le16_add_cpu(&ni->mi.mrec->hard_links, -1);
-               ni->mi.dirty = true;
+               u16 links = le16_to_cpu(ni->mi.mrec->hard_links);
+               struct ATTR_FILE_NAME *fname =
+                       attr->type != ATTR_NAME ?
+                               NULL :
+                               resident_data_ex(attr,
+                                                SIZEOF_ATTRIBUTE_FILENAME);
+               if (fname && fname->type == FILE_NAME_DOS) {
+                       /* Do not decrease links count deleting DOS name. */
+               } else if (!links) {
+                       /* minor error. Not critical. */
+               } else {
+                       ni->mi.mrec->hard_links = cpu_to_le16(links - 1);
+                       ni->mi.dirty = true;
+               }
         }
  
         used -= asize;
diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c

index 9153dffde950c2a396291bea88e3e6d31169f568..cef5467fd92833aec6fb0bce3879826c2a627a09 100644 (file)
--- a/fs/ntfs3/super.c
+++ b/fs/ntfs3/super.c
@@ -122,13 +122,12 @@ void ntfs_inode_printk(struct inode *inode, const char *fmt, ...)
  
         if (name) {
                 struct dentry *de = d_find_alias(inode);
-               const u32 name_len = ARRAY_SIZE(s_name_buf) - 1;
  
                 if (de) {
                         spin_lock(&de->d_lock);
-                       snprintf(name, name_len, " \"%s\"", de->d_name.name);
+                       snprintf(name, sizeof(s_name_buf), " \"%s\"",
+                                de->d_name.name);
                         spin_unlock(&de->d_lock);
-                       name[name_len] = 0; /* To be sure. */
                 } else {
                         name[0] = 0;
                 }
@@ -625,7 +624,7 @@ static void ntfs3_free_sbi(struct ntfs_sb_info *sbi)
  {
         kfree(sbi->new_rec);
         kvfree(ntfs_put_shared(sbi->upcase));
-       kfree(sbi->def_table);
+       kvfree(sbi->def_table);
         kfree(sbi->compress.lznt);
  #ifdef CONFIG_NTFS3_LZX_XPRESS
         xpress_free_decompressor(sbi->compress.xpress);
@@ -714,6 +713,14 @@ static int ntfs_show_options(struct seq_file *m, struct dentry *root)
         return 0;
  }
  
+/*
+ * ntfs_shutdown - super_operations::shutdown
+ */
+static void ntfs_shutdown(struct super_block *sb)
+{
+       set_bit(NTFS_FLAGS_SHUTDOWN_BIT, &ntfs_sb(sb)->flags);
+}
+
  /*
   * ntfs_sync_fs - super_operations::sync_fs
   */
@@ -724,6 +731,9 @@ static int ntfs_sync_fs(struct super_block *sb, int wait)
         struct ntfs_inode *ni;
         struct inode *inode;
  
+       if (unlikely(ntfs3_forced_shutdown(sb)))
+               return -EIO;
+
         ni = sbi->security.ni;
         if (ni) {
                 inode = &ni->vfs_inode;
@@ -763,6 +773,7 @@ static const struct super_operations ntfs_sops = {
         .put_super = ntfs_put_super,
         .statfs = ntfs_statfs,
         .show_options = ntfs_show_options,
+       .shutdown = ntfs_shutdown,
         .sync_fs = ntfs_sync_fs,
         .write_inode = ntfs3_write_inode,
  };
@@ -866,6 +877,7 @@ static int ntfs_init_from_boot(struct super_block *sb, u32 sector_size,
         u16 fn, ao;
         u8 cluster_bits;
         u32 boot_off = 0;
+       sector_t boot_block = 0;
         const char *hint = "Primary boot";
  
         /* Save original dev_size. Used with alternative boot. */
@@ -873,11 +885,11 @@ static int ntfs_init_from_boot(struct super_block *sb, u32 sector_size,
  
         sbi->volume.blocks = dev_size >> PAGE_SHIFT;
  
-       bh = ntfs_bread(sb, 0);
+read_boot:
+       bh = ntfs_bread(sb, boot_block);
         if (!bh)
-               return -EIO;
+               return boot_block ? -EINVAL : -EIO;
  
-check_boot:
         err = -EINVAL;
  
         /* Corrupted image; do not read OOB */
@@ -1108,26 +1120,24 @@ check_boot:
         }
  
  out:
-       if (err == -EINVAL && !bh->b_blocknr && dev_size0 > PAGE_SHIFT) {
+       brelse(bh);
+
+       if (err == -EINVAL && !boot_block && dev_size0 > PAGE_SHIFT) {
                 u32 block_size = min_t(u32, sector_size, PAGE_SIZE);
                 u64 lbo = dev_size0 - sizeof(*boot);
  
-               /*
-                * Try alternative boot (last sector)
-                */
-               brelse(bh);
-
-               sb_set_blocksize(sb, block_size);
-               bh = ntfs_bread(sb, lbo >> blksize_bits(block_size));
-               if (!bh)
-                       return -EINVAL;
-
+               boot_block = lbo >> blksize_bits(block_size);
                 boot_off = lbo & (block_size - 1);
-               hint = "Alternative boot";
-               dev_size = dev_size0; /* restore original size. */
-               goto check_boot;
+               if (boot_block && block_size >= boot_off + sizeof(*boot)) {
+                       /*
+                        * Try alternative boot (last sector)
+                        */
+                       sb_set_blocksize(sb, block_size);
+                       hint = "Alternative boot";
+                       dev_size = dev_size0; /* restore original size. */
+                       goto read_boot;
+               }
         }
-       brelse(bh);
  
         return err;
  }
diff --git a/fs/ntfs3/xattr.c b/fs/ntfs3/xattr.c

index 4274b6f31cfa1c49fec865fe0e1d81ab152e040b..53e7d1fa036aa6e50a3ccd529d88584b1350cd74 100644 (file)
--- a/fs/ntfs3/xattr.c
+++ b/fs/ntfs3/xattr.c
@@ -219,6 +219,9 @@ static ssize_t ntfs_list_ea(struct ntfs_inode *ni, char *buffer,
                 if (!ea->name_len)
                         break;
  
+               if (ea->name_len > ea_size)
+                       break;
+
                 if (buffer) {
                         /* Check if we can use field ea->name */
                         if (off + ea_size > size)
@@ -744,6 +747,9 @@ static int ntfs_getxattr(const struct xattr_handler *handler, struct dentry *de,
         int err;
         struct ntfs_inode *ni = ntfs_i(inode);
  
+       if (unlikely(ntfs3_forced_shutdown(inode->i_sb)))
+               return -EIO;
+
         /* Dispatch request. */
         if (!strcmp(name, SYSTEM_DOS_ATTRIB)) {
                 /* system.dos_attrib */
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c

index b8e25ca51016d9df648ca58495baa9db553330ec..8586e2f5d24390c91263ea1ee48e7c3b22199cd2 100644 (file)
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -265,20 +265,18 @@ static int ovl_copy_up_file(struct ovl_fs *ofs, struct dentry *dentry,
         if (IS_ERR(old_file))
                 return PTR_ERR(old_file);
  
+       /* Try to use clone_file_range to clone up within the same fs */
+       cloned = vfs_clone_file_range(old_file, 0, new_file, 0, len, 0);
+       if (cloned == len)
+               goto out_fput;
+
+       /* Couldn't clone, so now we try to copy the data */
         error = rw_verify_area(READ, old_file, &old_pos, len);
         if (!error)
                 error = rw_verify_area(WRITE, new_file, &new_pos, len);
         if (error)
                 goto out_fput;
  
-       /* Try to use clone_file_range to clone up within the same fs */
-       ovl_start_write(dentry);
-       cloned = do_clone_file_range(old_file, 0, new_file, 0, len, 0);
-       ovl_end_write(dentry);
-       if (cloned == len)
-               goto out_fput;
-       /* Couldn't clone, so now we try to copy the data */
-
         /* Check if lower fs supports seek operation */
         if (old_file->f_mode & FMODE_LSEEK)
                 skip_hole = true;
diff --git a/fs/proc/array.c b/fs/proc/array.c

index ff08a8957552add31a8fdf98e202f8380d519e50..34a47fb0c57f2570a4f7cb1f45373ddaf2afa883 100644 (file)
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -477,13 +477,13 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
         int permitted;
         struct mm_struct *mm;
         unsigned long long start_time;
-       unsigned long cmin_flt = 0, cmaj_flt = 0;
-       unsigned long  min_flt = 0,  maj_flt = 0;
-       u64 cutime, cstime, utime, stime;
-       u64 cgtime, gtime;
+       unsigned long cmin_flt, cmaj_flt, min_flt, maj_flt;
+       u64 cutime, cstime, cgtime, utime, stime, gtime;
         unsigned long rsslim = 0;
         unsigned long flags;
         int exit_code = task->exit_code;
+       struct signal_struct *sig = task->signal;
+       unsigned int seq = 1;
  
         state = *get_task_state(task);
         vsize = eip = esp = 0;
@@ -511,12 +511,8 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
  
         sigemptyset(&sigign);
         sigemptyset(&sigcatch);
-       cutime = cstime = utime = stime = 0;
-       cgtime = gtime = 0;
  
         if (lock_task_sighand(task, &flags)) {
-               struct signal_struct *sig = task->signal;
-
                 if (sig->tty) {
                         struct pid *pgrp = tty_get_pgrp(sig->tty);
                         tty_pgrp = pid_nr_ns(pgrp, ns);
@@ -527,28 +523,9 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
                 num_threads = get_nr_threads(task);
                 collect_sigign_sigcatch(task, &sigign, &sigcatch);
  
-               cmin_flt = sig->cmin_flt;
-               cmaj_flt = sig->cmaj_flt;
-               cutime = sig->cutime;
-               cstime = sig->cstime;
-               cgtime = sig->cgtime;
                 rsslim = READ_ONCE(sig->rlim[RLIMIT_RSS].rlim_cur);
  
-               /* add up live thread stats at the group level */
                 if (whole) {
-                       struct task_struct *t;
-
-                       __for_each_thread(sig, t) {
-                               min_flt += t->min_flt;
-                               maj_flt += t->maj_flt;
-                               gtime += task_gtime(t);
-                       }
-
-                       min_flt += sig->min_flt;
-                       maj_flt += sig->maj_flt;
-                       thread_group_cputime_adjusted(task, &utime, &stime);
-                       gtime += sig->gtime;
-
                         if (sig->flags & (SIGNAL_GROUP_EXIT | SIGNAL_STOP_STOPPED))
                                 exit_code = sig->group_exit_code;
                 }
@@ -562,10 +539,41 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
  
         if (permitted && (!whole || num_threads < 2))
                 wchan = !task_is_running(task);
-       if (!whole) {
+
+       do {
+               seq++; /* 2 on the 1st/lockless path, otherwise odd */
+               flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq);
+
+               cmin_flt = sig->cmin_flt;
+               cmaj_flt = sig->cmaj_flt;
+               cutime = sig->cutime;
+               cstime = sig->cstime;
+               cgtime = sig->cgtime;
+
+               if (whole) {
+                       struct task_struct *t;
+
+                       min_flt = sig->min_flt;
+                       maj_flt = sig->maj_flt;
+                       gtime = sig->gtime;
+
+                       rcu_read_lock();
+                       __for_each_thread(sig, t) {
+                               min_flt += t->min_flt;
+                               maj_flt += t->maj_flt;
+                               gtime += task_gtime(t);
+                       }
+                       rcu_read_unlock();
+               }
+       } while (need_seqretry(&sig->stats_lock, seq));
+       done_seqretry_irqrestore(&sig->stats_lock, seq, flags);
+
+       if (whole) {
+               thread_group_cputime_adjusted(task, &utime, &stime);
+       } else {
+               task_cputime_adjusted(task, &utime, &stime);
                 min_flt = task->min_flt;
                 maj_flt = task->maj_flt;
-               task_cputime_adjusted(task, &utime, &stime);
                 gtime = task_gtime(task);
         }
  
diff --git a/fs/proc/base.c b/fs/proc/base.c

index 98a031ac26484544b8b07aec1ca72f40250ba2ca..18550c071d71c733204e3a94d274ac4d47c00119 100644 (file)
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1878,8 +1878,6 @@ void proc_pid_evict_inode(struct proc_inode *ei)
                 hlist_del_init_rcu(&ei->sibling_inodes);
                 spin_unlock(&pid->lock);
         }
-
-       put_pid(pid);
  }
  
  struct inode *proc_pid_make_inode(struct super_block *sb,
diff --git a/fs/proc/inode.c b/fs/proc/inode.c

index b33e490e3fd9f88f569e3453d603041e665cf6bf..05350f3c2812c57562e9208da69d0e98835dadc9 100644 (file)
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -30,7 +30,6 @@
  
  static void proc_evict_inode(struct inode *inode)
  {
-       struct proc_dir_entry *de;
         struct ctl_table_header *head;
         struct proc_inode *ei = PROC_I(inode);
  
@@ -38,17 +37,8 @@ static void proc_evict_inode(struct inode *inode)
         clear_inode(inode);
  
         /* Stop tracking associated processes */
-       if (ei->pid) {
+       if (ei->pid)
                 proc_pid_evict_inode(ei);
-               ei->pid = NULL;
-       }
-
-       /* Let go of any associated proc directory entry */
-       de = ei->pde;
-       if (de) {
-               pde_put(de);
-               ei->pde = NULL;
-       }
  
         head = ei->sysctl;
         if (head) {
@@ -80,6 +70,13 @@ static struct inode *proc_alloc_inode(struct super_block *sb)
  
  static void proc_free_inode(struct inode *inode)
  {
+       struct proc_inode *ei = PROC_I(inode);
+
+       if (ei->pid)
+               put_pid(ei->pid);
+       /* Let go of any associated proc directory entry */
+       if (ei->pde)
+               pde_put(ei->pde);
         kmem_cache_free(proc_inode_cachep, PROC_I(inode));
  }
  
diff --git a/fs/proc/root.c b/fs/proc/root.c

index b55dbc70287b492ae2e4ed43e2a3c04ee0818798..06a297a27ba3b31a5e2092fcd08d1ca9eebb5849 100644 (file)
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -271,7 +271,7 @@ static void proc_kill_sb(struct super_block *sb)
  
         kill_anon_super(sb);
         put_pid_ns(fs_info->pid_ns);
-       kfree(fs_info);
+       kfree_rcu(fs_info, rcu);
  }
  
  static struct file_system_type proc_fs_type = {
diff --git a/fs/remap_range.c b/fs/remap_range.c

index f8c1120b8311f62324324b911b0aa4aebe4ccb04..de07f978ce3ebe16bf42bf5315996fd074de5aac 100644 (file)
--- a/fs/remap_range.c
+++ b/fs/remap_range.c
@@ -373,9 +373,9 @@ int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
  }
  EXPORT_SYMBOL(generic_remap_file_range_prep);
  
-loff_t do_clone_file_range(struct file *file_in, loff_t pos_in,
-                          struct file *file_out, loff_t pos_out,
-                          loff_t len, unsigned int remap_flags)
+loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in,
+                           struct file *file_out, loff_t pos_out,
+                           loff_t len, unsigned int remap_flags)
  {
         loff_t ret;
  
@@ -391,23 +391,6 @@ loff_t do_clone_file_range(struct file *file_in, loff_t pos_in,
         if (!file_in->f_op->remap_file_range)
                 return -EOPNOTSUPP;
  
-       ret = file_in->f_op->remap_file_range(file_in, pos_in,
-                       file_out, pos_out, len, remap_flags);
-       if (ret < 0)
-               return ret;
-
-       fsnotify_access(file_in);
-       fsnotify_modify(file_out);
-       return ret;
-}
-EXPORT_SYMBOL(do_clone_file_range);
-
-loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in,
-                           struct file *file_out, loff_t pos_out,
-                           loff_t len, unsigned int remap_flags)
-{
-       loff_t ret;
-
         ret = remap_verify_area(file_in, pos_in, len, false);
         if (ret)
                 return ret;
@@ -417,10 +400,14 @@ loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in,
                 return ret;
  
         file_start_write(file_out);
-       ret = do_clone_file_range(file_in, pos_in, file_out, pos_out, len,
-                                 remap_flags);
+       ret = file_in->f_op->remap_file_range(file_in, pos_in,
+                       file_out, pos_out, len, remap_flags);
         file_end_write(file_out);
+       if (ret < 0)
+               return ret;
  
+       fsnotify_access(file_in);
+       fsnotify_modify(file_out);
         return ret;
  }
  EXPORT_SYMBOL(vfs_clone_file_range);
diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c

index 1daeb5714faad14c24c49a5efd5d118aaf04b54c..3de5047a7ff988c2049350e464771e912b12894e 100644 (file)
--- a/fs/smb/client/cached_dir.c
+++ b/fs/smb/client/cached_dir.c
@@ -242,6 +242,7 @@ replay_again:
                 .desired_access =  FILE_READ_DATA | FILE_READ_ATTRIBUTES,
                 .disposition = FILE_OPEN,
                 .fid = pfid,
+               .replay = !!(retries),
         };
  
         rc = SMB2_open_init(tcon, server,
diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c

index 2a4a4e3a8751f2ce8f0409ce79dc5024e02bb883..0c269396ae151b083b4e545e58b2300ca16e0ee3 100644 (file)
--- a/fs/smb/client/cifsfs.c
+++ b/fs/smb/client/cifsfs.c
@@ -1172,6 +1172,9 @@ const char *cifs_get_link(struct dentry *dentry, struct inode *inode,
  {
         char *target_path;
  
+       if (!dentry)
+               return ERR_PTR(-ECHILD);
+
         target_path = kmalloc(PATH_MAX, GFP_KERNEL);
         if (!target_path)
                 return ERR_PTR(-ENOMEM);
diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h

index 16befff4cbb47c9ac104b052401a490398d0fac9..53c75cfb33ab9446740133e7f19da6229eeffd55 100644 (file)
--- a/fs/smb/client/cifsglob.h
+++ b/fs/smb/client/cifsglob.h
@@ -87,7 +87,7 @@
  #define SMB_INTERFACE_POLL_INTERVAL    600
  
  /* maximum number of PDUs in one compound */
-#define MAX_COMPOUND 5
+#define MAX_COMPOUND 7
  
  /*
   * Default number of credits to keep available for SMB3.
@@ -1032,6 +1032,8 @@ struct cifs_chan {
         __u8 signkey[SMB3_SIGN_KEY_SIZE];
  };
  
+#define CIFS_SES_FLAG_SCALE_CHANNELS (0x1)
+
  /*
   * Session structure.  One of these for each uid session with a particular host
   */
@@ -1064,6 +1066,7 @@ struct cifs_ses {
         enum securityEnum sectype; /* what security flavor was specified? */
         bool sign;              /* is signing required? */
         bool domainAuto:1;
+       unsigned int flags;
         __u16 session_flags;
         __u8 smb3signingkey[SMB3_SIGN_KEY_SIZE];
         __u8 smb3encryptionkey[SMB3_ENC_DEC_KEY_SIZE];
@@ -1375,6 +1378,7 @@ struct cifs_open_parms {
         struct cifs_fid *fid;
         umode_t mode;
         bool reconnect:1;
+       bool replay:1; /* indicates that this open is for a replay */
  };
  
  struct cifs_fid {
diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c

index bfd568f8971056b2c9ffbd509e026f140549f1af..ac9595504f4b11fa066a6516f034bff8bc09d56b 100644 (file)
--- a/fs/smb/client/connect.c
+++ b/fs/smb/client/connect.c
@@ -233,6 +233,12 @@ cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server,
         list_for_each_entry_safe(ses, nses, &pserver->smb_ses_list, smb_ses_list) {
                 /* check if iface is still active */
                 spin_lock(&ses->chan_lock);
+               if (cifs_ses_get_chan_index(ses, server) ==
+                   CIFS_INVAL_CHAN_INDEX) {
+                       spin_unlock(&ses->chan_lock);
+                       continue;
+               }
+
                 if (!cifs_chan_is_iface_active(ses, server)) {
                         spin_unlock(&ses->chan_lock);
                         cifs_chan_update_iface(ses, server);
@@ -3438,8 +3444,18 @@ int cifs_mount_get_tcon(struct cifs_mount_ctx *mnt_ctx)
          * the user on mount
          */
         if ((cifs_sb->ctx->wsize == 0) ||
-           (cifs_sb->ctx->wsize > server->ops->negotiate_wsize(tcon, ctx)))
-               cifs_sb->ctx->wsize = server->ops->negotiate_wsize(tcon, ctx);
+           (cifs_sb->ctx->wsize > server->ops->negotiate_wsize(tcon, ctx))) {
+               cifs_sb->ctx->wsize =
+                       round_down(server->ops->negotiate_wsize(tcon, ctx), PAGE_SIZE);
+               /*
+                * in the very unlikely event that the server sent a max write size under PAGE_SIZE,
+                * (which would get rounded down to 0) then reset wsize to absolute minimum eg 4096
+                */
+               if (cifs_sb->ctx->wsize == 0) {
+                       cifs_sb->ctx->wsize = PAGE_SIZE;
+                       cifs_dbg(VFS, "wsize too small, reset to minimum ie PAGE_SIZE, usually 4096\n");
+               }
+       }
         if ((cifs_sb->ctx->rsize == 0) ||
             (cifs_sb->ctx->rsize > server->ops->negotiate_rsize(tcon, ctx)))
                 cifs_sb->ctx->rsize = server->ops->negotiate_rsize(tcon, ctx);
@@ -4228,6 +4244,11 @@ int cifs_tree_connect(const unsigned int xid, struct cifs_tcon *tcon, const stru
  
         /* only send once per connect */
         spin_lock(&tcon->tc_lock);
+
+       /* if tcon is marked for needing reconnect, update state */
+       if (tcon->need_reconnect)
+               tcon->status = TID_NEED_TCON;
+
         if (tcon->status == TID_GOOD) {
                 spin_unlock(&tcon->tc_lock);
                 return 0;
diff --git a/fs/smb/client/dfs.c b/fs/smb/client/dfs.c

index a8a1d386da6566a2dec94099ae08a80199462bae..449c59830039bc04897e5031dba2dbc9c6649bad 100644 (file)
--- a/fs/smb/client/dfs.c
+++ b/fs/smb/client/dfs.c
@@ -565,6 +565,11 @@ int cifs_tree_connect(const unsigned int xid, struct cifs_tcon *tcon, const stru
  
         /* only send once per connect */
         spin_lock(&tcon->tc_lock);
+
+       /* if tcon is marked for needing reconnect, update state */
+       if (tcon->need_reconnect)
+               tcon->status = TID_NEED_TCON;
+
         if (tcon->status == TID_GOOD) {
                 spin_unlock(&tcon->tc_lock);
                 return 0;
@@ -625,8 +630,8 @@ out:
                 spin_lock(&tcon->tc_lock);
                 if (tcon->status == TID_IN_TCON)
                         tcon->status = TID_GOOD;
-               spin_unlock(&tcon->tc_lock);
                 tcon->need_reconnect = false;
+               spin_unlock(&tcon->tc_lock);
         }
  
         return rc;
diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c

index b75282c204dadff986f9711d38e775834c30ddd2..f391c9b803d84f9549b50a57f860abdaf58e43b2 100644 (file)
--- a/fs/smb/client/file.c
+++ b/fs/smb/client/file.c
@@ -175,6 +175,9 @@ cifs_mark_open_files_invalid(struct cifs_tcon *tcon)
  
         /* only send once per connect */
         spin_lock(&tcon->tc_lock);
+       if (tcon->need_reconnect)
+               tcon->status = TID_NEED_RECON;
+
         if (tcon->status != TID_NEED_RECON) {
                 spin_unlock(&tcon->tc_lock);
                 return;
diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c

index 52cbef2eeb28f6ba0013063b4bafcecc08c3a02d..4b2f5aa2ea0e1de026302b9e543e2e13429107f9 100644 (file)
--- a/fs/smb/client/fs_context.c
+++ b/fs/smb/client/fs_context.c
@@ -211,7 +211,7 @@ cifs_parse_security_flavors(struct fs_context *fc, char *value, struct smb3_fs_c
  
         switch (match_token(value, cifs_secflavor_tokens, args)) {
         case Opt_sec_krb5p:
-               cifs_errorf(fc, "sec=krb5p is not supported!\n");
+               cifs_errorf(fc, "sec=krb5p is not supported. Use sec=krb5,seal instead\n");
                 return 1;
         case Opt_sec_krb5i:
                 ctx->sign = true;
@@ -1111,6 +1111,17 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
         case Opt_wsize:
                 ctx->wsize = result.uint_32;
                 ctx->got_wsize = true;
+               if (ctx->wsize % PAGE_SIZE != 0) {
+                       ctx->wsize = round_down(ctx->wsize, PAGE_SIZE);
+                       if (ctx->wsize == 0) {
+                               ctx->wsize = PAGE_SIZE;
+                               cifs_dbg(VFS, "wsize too small, reset to minimum %ld\n", PAGE_SIZE);
+                       } else {
+                               cifs_dbg(VFS,
+                                        "wsize rounded down to %d to multiple of PAGE_SIZE %ld\n",
+                                        ctx->wsize, PAGE_SIZE);
+                       }
+               }
                 break;
         case Opt_acregmax:
                 ctx->acregmax = HZ * result.uint_32;
diff --git a/fs/smb/client/namespace.c b/fs/smb/client/namespace.c

index a6968573b775e7bdcab3df948908e9494f792027..4a517b280f2b79a2c1395a1c627b496def7392a7 100644 (file)
--- a/fs/smb/client/namespace.c
+++ b/fs/smb/client/namespace.c
@@ -168,6 +168,21 @@ static char *automount_fullpath(struct dentry *dentry, void *page)
         return s;
  }
  
+static void fs_context_set_ids(struct smb3_fs_context *ctx)
+{
+       kuid_t uid = current_fsuid();
+       kgid_t gid = current_fsgid();
+
+       if (ctx->multiuser) {
+               if (!ctx->uid_specified)
+                       ctx->linux_uid = uid;
+               if (!ctx->gid_specified)
+                       ctx->linux_gid = gid;
+       }
+       if (!ctx->cruid_specified)
+               ctx->cred_uid = uid;
+}
+
  /*
   * Create a vfsmount that we can automount
   */
@@ -205,6 +220,7 @@ static struct vfsmount *cifs_do_automount(struct path *path)
         tmp.leaf_fullpath = NULL;
         tmp.UNC = tmp.prepath = NULL;
         tmp.dfs_root_ses = NULL;
+       fs_context_set_ids(&tmp);
  
         rc = smb3_fs_context_dup(ctx, &tmp);
         if (rc) {
diff --git a/fs/smb/client/readdir.c b/fs/smb/client/readdir.c

index 3b1b01d10f7d7a2f1d12b158ded96d58203f80d0..b520eea7bfce83b2fd8e9a1d9e1685c39516840e 100644 (file)
--- a/fs/smb/client/readdir.c
+++ b/fs/smb/client/readdir.c
@@ -307,14 +307,16 @@ cifs_dir_info_to_fattr(struct cifs_fattr *fattr, FILE_DIRECTORY_INFO *info,
  }
  
  static void cifs_fulldir_info_to_fattr(struct cifs_fattr *fattr,
-                                      SEARCH_ID_FULL_DIR_INFO *info,
+                                      const void *info,
                                        struct cifs_sb_info *cifs_sb)
  {
+       const FILE_FULL_DIRECTORY_INFO *di = info;
+
         __dir_info_to_fattr(fattr, info);
  
-       /* See MS-FSCC 2.4.19 FileIdFullDirectoryInformation */
+       /* See MS-FSCC 2.4.14, 2.4.19 */
         if (fattr->cf_cifsattrs & ATTR_REPARSE)
-               fattr->cf_cifstag = le32_to_cpu(info->EaSize);
+               fattr->cf_cifstag = le32_to_cpu(di->EaSize);
         cifs_fill_common_info(fattr, cifs_sb);
  }
  
@@ -396,7 +398,7 @@ ffirst_retry:
         } else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) {
                 cifsFile->srch_inf.info_level = SMB_FIND_FILE_ID_FULL_DIR_INFO;
         } else /* not srvinos - BB fixme add check for backlevel? */ {
-               cifsFile->srch_inf.info_level = SMB_FIND_FILE_DIRECTORY_INFO;
+               cifsFile->srch_inf.info_level = SMB_FIND_FILE_FULL_DIRECTORY_INFO;
         }
  
         search_flags = CIFS_SEARCH_CLOSE_AT_END | CIFS_SEARCH_RETURN_RESUME;
@@ -987,10 +989,9 @@ static int cifs_filldir(char *find_entry, struct file *file,
                                        (FIND_FILE_STANDARD_INFO *)find_entry,
                                        cifs_sb);
                 break;
+       case SMB_FIND_FILE_FULL_DIRECTORY_INFO:
         case SMB_FIND_FILE_ID_FULL_DIR_INFO:
-               cifs_fulldir_info_to_fattr(&fattr,
-                                          (SEARCH_ID_FULL_DIR_INFO *)find_entry,
-                                          cifs_sb);
+               cifs_fulldir_info_to_fattr(&fattr, find_entry, cifs_sb);
                 break;
         default:
                 cifs_dir_info_to_fattr(&fattr,
diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c

index cde81042bebda6b8f3a454f46eb8b055af8d2f3c..8f37373fd33344bacbf4d492f6115d9396573379 100644 (file)
--- a/fs/smb/client/sess.c
+++ b/fs/smb/client/sess.c
@@ -75,6 +75,10 @@ cifs_ses_get_chan_index(struct cifs_ses *ses,
  {
         unsigned int i;
  
+       /* if the channel is waiting for termination */
+       if (server && server->terminate)
+               return CIFS_INVAL_CHAN_INDEX;
+
         for (i = 0; i < ses->chan_count; i++) {
                 if (ses->chans[i].server == server)
                         return i;
@@ -84,7 +88,6 @@ cifs_ses_get_chan_index(struct cifs_ses *ses,
         if (server)
                 cifs_dbg(VFS, "unable to get chan index for server: 0x%llx",
                          server->conn_id);
-       WARN_ON(1);
         return CIFS_INVAL_CHAN_INDEX;
  }
  
@@ -269,6 +272,8 @@ int cifs_try_adding_channels(struct cifs_ses *ses)
                                          &iface->sockaddr,
                                          rc);
                                 kref_put(&iface->refcount, release_iface);
+                               /* failure to add chan should increase weight */
+                               iface->weight_fulfilled++;
                                 continue;
                         }
  
diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c

index 83c898afc8354bf04c7a86ee57e4343ad3618319..4695433fcf397f529754cc9ec266cb5ac1727512 100644 (file)
--- a/fs/smb/client/smb2ops.c
+++ b/fs/smb/client/smb2ops.c
@@ -619,7 +619,7 @@ parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf,
                 goto out;
         }
  
-       while (bytes_left >= sizeof(*p)) {
+       while (bytes_left >= (ssize_t)sizeof(*p)) {
                 memset(&tmp_iface, 0, sizeof(tmp_iface));
                 tmp_iface.speed = le64_to_cpu(p->LinkSpeed);
                 tmp_iface.rdma_capable = le32_to_cpu(p->Capability & RDMA_CAPABLE) ? 1 : 0;
@@ -1204,6 +1204,7 @@ replay_again:
                 .disposition = FILE_OPEN,
                 .create_options = cifs_create_options(cifs_sb, 0),
                 .fid = &fid,
+               .replay = !!(retries),
         };
  
         rc = SMB2_open_init(tcon, server,
@@ -1569,6 +1570,7 @@ replay_again:
                 .disposition = FILE_OPEN,
                 .create_options = cifs_create_options(cifs_sb, create_options),
                 .fid = &fid,
+               .replay = !!(retries),
         };
  
         if (qi.flags & PASSTHRU_FSCTL) {
@@ -2295,6 +2297,7 @@ replay_again:
                 .disposition = FILE_OPEN,
                 .create_options = cifs_create_options(cifs_sb, 0),
                 .fid = fid,
+               .replay = !!(retries),
         };
  
         rc = SMB2_open_init(tcon, server,
@@ -2681,6 +2684,7 @@ replay_again:
                 .disposition = FILE_OPEN,
                 .create_options = cifs_create_options(cifs_sb, 0),
                 .fid = &fid,
+               .replay = !!(retries),
         };
  
         rc = SMB2_open_init(tcon, server,
@@ -5213,7 +5217,7 @@ static int smb2_create_reparse_symlink(const unsigned int xid,
         struct inode *new;
         struct kvec iov;
         __le16 *path;
-       char *sym;
+       char *sym, sep = CIFS_DIR_SEP(cifs_sb);
         u16 len, plen;
         int rc = 0;
  
@@ -5227,7 +5231,8 @@ static int smb2_create_reparse_symlink(const unsigned int xid,
                 .symlink_target = sym,
         };
  
-       path = cifs_convert_path_to_utf16(symname, cifs_sb);
+       convert_delimiter(sym, sep);
+       path = cifs_convert_path_to_utf16(sym, cifs_sb);
         if (!path) {
                 rc = -ENOMEM;
                 goto out;
@@ -5250,7 +5255,10 @@ static int smb2_create_reparse_symlink(const unsigned int xid,
         buf->PrintNameLength = cpu_to_le16(plen);
         memcpy(buf->PathBuffer, path, plen);
         buf->Flags = cpu_to_le32(*symname != '/' ? SYMLINK_FLAG_RELATIVE : 0);
+       if (*sym != sep)
+               buf->Flags = cpu_to_le32(SYMLINK_FLAG_RELATIVE);
  
+       convert_delimiter(sym, '/');
         iov.iov_base = buf;
         iov.iov_len = len;
         new = smb2_get_reparse_inode(&data, inode->i_sb, xid,
diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c

index 86f6f35b7f32e8498e2628350abf43daa0d97f96..608ee05491e262c5cf4555c6b51b364cdc60a03b 100644 (file)
--- a/fs/smb/client/smb2pdu.c
+++ b/fs/smb/client/smb2pdu.c
@@ -178,6 +178,7 @@ cifs_chan_skip_or_disable(struct cifs_ses *ses,
                 }
  
                 ses->chans[chan_index].server = NULL;
+               server->terminate = true;
                 spin_unlock(&ses->chan_lock);
  
                 /*
@@ -188,7 +189,6 @@ cifs_chan_skip_or_disable(struct cifs_ses *ses,
                  */
                 cifs_put_tcp_session(server, from_reconnect);
  
-               server->terminate = true;
                 cifs_signal_cifsd_for_reconnect(server, false);
  
                 /* mark primary server as needing reconnect */
@@ -399,6 +399,15 @@ skip_sess_setup:
                 goto out;
         }
  
+       spin_lock(&ses->ses_lock);
+       if (ses->flags & CIFS_SES_FLAG_SCALE_CHANNELS) {
+               spin_unlock(&ses->ses_lock);
+               mutex_unlock(&ses->session_mutex);
+               goto skip_add_channels;
+       }
+       ses->flags |= CIFS_SES_FLAG_SCALE_CHANNELS;
+       spin_unlock(&ses->ses_lock);
+
         if (!rc &&
             (server->capabilities & SMB2_GLOBAL_CAP_MULTI_CHANNEL)) {
                 mutex_unlock(&ses->session_mutex);
@@ -410,7 +419,7 @@ skip_sess_setup:
                 rc = SMB3_request_interfaces(xid, tcon, false);
                 free_xid(xid);
  
-               if (rc == -EOPNOTSUPP) {
+               if (rc == -EOPNOTSUPP && ses->chan_count > 1) {
                         /*
                          * some servers like Azure SMB server do not advertise
                          * that multichannel has been disabled with server
@@ -428,17 +437,22 @@ skip_sess_setup:
                 if (ses->chan_max > ses->chan_count &&
                     ses->iface_count &&
                     !SERVER_IS_CHAN(server)) {
-                       if (ses->chan_count == 1)
+                       if (ses->chan_count == 1) {
                                 cifs_server_dbg(VFS, "supports multichannel now\n");
+                               queue_delayed_work(cifsiod_wq, &tcon->query_interfaces,
+                                                (SMB_INTERFACE_POLL_INTERVAL * HZ));
+                       }
  
                         cifs_try_adding_channels(ses);
-                       queue_delayed_work(cifsiod_wq, &tcon->query_interfaces,
-                                          (SMB_INTERFACE_POLL_INTERVAL * HZ));
                 }
         } else {
                 mutex_unlock(&ses->session_mutex);
         }
+
  skip_add_channels:
+       spin_lock(&ses->ses_lock);
+       ses->flags &= ~CIFS_SES_FLAG_SCALE_CHANNELS;
+       spin_unlock(&ses->ses_lock);
  
         if (smb2_command != SMB2_INTERNAL_CMD)
                 mod_delayed_work(cifsiod_wq, &server->reconnect, 0);
@@ -2390,8 +2404,13 @@ create_durable_v2_buf(struct cifs_open_parms *oparms)
          */
         buf->dcontext.Timeout = cpu_to_le32(oparms->tcon->handle_timeout);
         buf->dcontext.Flags = cpu_to_le32(SMB2_DHANDLE_FLAG_PERSISTENT);
-       generate_random_uuid(buf->dcontext.CreateGuid);
-       memcpy(pfid->create_guid, buf->dcontext.CreateGuid, 16);
+
+       /* for replay, we should not overwrite the existing create guid */
+       if (!oparms->replay) {
+               generate_random_uuid(buf->dcontext.CreateGuid);
+               memcpy(pfid->create_guid, buf->dcontext.CreateGuid, 16);
+       } else
+               memcpy(buf->dcontext.CreateGuid, pfid->create_guid, 16);
  
         /* SMB2_CREATE_DURABLE_HANDLE_REQUEST is "DH2Q" */
         buf->Name[0] = 'D';
@@ -3128,6 +3147,7 @@ replay_again:
         /* reinitialize for possible replay */
         flags = 0;
         server = cifs_pick_channel(ses);
+       oparms->replay = !!(retries);
  
         cifs_dbg(FYI, "create/open\n");
         if (!ses || !server)
@@ -5192,6 +5212,9 @@ int SMB2_query_directory_init(const unsigned int xid,
         case SMB_FIND_FILE_POSIX_INFO:
                 req->FileInformationClass = SMB_FIND_FILE_POSIX_INFO;
                 break;
+       case SMB_FIND_FILE_FULL_DIRECTORY_INFO:
+               req->FileInformationClass = FILE_FULL_DIRECTORY_INFORMATION;
+               break;
         default:
                 cifs_tcon_dbg(VFS, "info level %u isn't supported\n",
                         info_level);
@@ -5261,6 +5284,9 @@ smb2_parse_query_directory(struct cifs_tcon *tcon,
                 /* note that posix payload are variable size */
                 info_buf_size = sizeof(struct smb2_posix_info);
                 break;
+       case SMB_FIND_FILE_FULL_DIRECTORY_INFO:
+               info_buf_size = sizeof(FILE_FULL_DIRECTORY_INFO);
+               break;
         default:
                 cifs_tcon_dbg(VFS, "info level %u isn't supported\n",
                          srch_inf->info_level);
diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c

index e00278fcfa4fa65f063430001c5506e3a2906358..994d70193432978de213a19a0f9933bd90e63671 100644 (file)
--- a/fs/smb/client/transport.c
+++ b/fs/smb/client/transport.c
@@ -435,8 +435,8 @@ smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
         if (!(flags & CIFS_TRANSFORM_REQ))
                 return __smb_send_rqst(server, num_rqst, rqst);
  
-       if (num_rqst > MAX_COMPOUND - 1)
-               return -ENOMEM;
+       if (WARN_ON_ONCE(num_rqst > MAX_COMPOUND - 1))
+               return -EIO;
  
         if (!server->ops->init_transform_rq) {
                 cifs_server_dbg(VFS, "Encryption requested but transform callback is missing\n");
diff --git a/fs/smb/server/misc.c b/fs/smb/server/misc.c

index 9e8afaa686e3aa8c12e908348aa38b34168ec367..1a5faa6f6e7bc3ddb96bdaa1ce953ba06f3bf5a2 100644 (file)
--- a/fs/smb/server/misc.c
+++ b/fs/smb/server/misc.c
@@ -261,6 +261,7 @@ out_ascii:
  
  /**
   * ksmbd_extract_sharename() - get share name from tree connect request
+ * @um: pointer to a unicode_map structure for character encoding handling
   * @treename:  buffer containing tree name and share name
   *
   * Return:      share name on success, otherwise error
diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c

index ba7a72a6a4f45f6b756768c4a3a48e19d74e3683..0c97d3c860726a303081eb25927d38439bfaf4ea 100644 (file)
--- a/fs/smb/server/smb2pdu.c
+++ b/fs/smb/server/smb2pdu.c
@@ -6173,8 +6173,10 @@ static noinline int smb2_read_pipe(struct ksmbd_work *work)
                 err = ksmbd_iov_pin_rsp_read(work, (void *)rsp,
                                              offsetof(struct smb2_read_rsp, Buffer),
                                              aux_payload_buf, nbytes);
-               if (err)
+               if (err) {
+                       kvfree(aux_payload_buf);
                         goto out;
+               }
                 kvfree(rpc_resp);
         } else {
                 err = ksmbd_iov_pin_rsp(work, (void *)rsp,
@@ -6384,8 +6386,10 @@ int smb2_read(struct ksmbd_work *work)
         err = ksmbd_iov_pin_rsp_read(work, (void *)rsp,
                                      offsetof(struct smb2_read_rsp, Buffer),
                                      aux_payload_buf, nbytes);
-       if (err)
+       if (err) {
+               kvfree(aux_payload_buf);
                 goto out;
+       }
         ksmbd_fd_put(work, fp);
         return 0;
  
diff --git a/fs/super.c b/fs/super.c

index d35e852954892dadcf1df6757c8b491904d2edbb..d6efeba0d0ce7c464c6d32c8d9001007d35259b1 100644 (file)
--- a/fs/super.c
+++ b/fs/super.c
@@ -274,9 +274,10 @@ static void destroy_super_work(struct work_struct *work)
  {
         struct super_block *s = container_of(work, struct super_block,
                                                         destroy_work);
-       int i;
-
-       for (i = 0; i < SB_FREEZE_LEVELS; i++)
+       security_sb_free(s);
+       put_user_ns(s->s_user_ns);
+       kfree(s->s_subtype);
+       for (int i = 0; i < SB_FREEZE_LEVELS; i++)
                 percpu_free_rwsem(&s->s_writers.rw_sem[i]);
         kfree(s);
  }
@@ -296,9 +297,6 @@ static void destroy_unused_super(struct super_block *s)
         super_unlock_excl(s);
         list_lru_destroy(&s->s_dentry_lru);
         list_lru_destroy(&s->s_inode_lru);
-       security_sb_free(s);
-       put_user_ns(s->s_user_ns);
-       kfree(s->s_subtype);
         shrinker_free(s->s_shrink);
         /* no delays needed */
         destroy_super_work(&s->destroy_work);
@@ -409,9 +407,6 @@ static void __put_super(struct super_block *s)
                 WARN_ON(s->s_dentry_lru.node);
                 WARN_ON(s->s_inode_lru.node);
                 WARN_ON(!list_empty(&s->s_mounts));
-               security_sb_free(s);
-               put_user_ns(s->s_user_ns);
-               kfree(s->s_subtype);
                 call_rcu(&s->rcu, destroy_super_rcu);
         }
  }
diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c

index 6b211522a13ec100a0af815798d36536b7239c4a..110e8a27218900756f3af6cd515d6e8cf33e9514 100644 (file)
--- a/fs/tracefs/event_inode.c
+++ b/fs/tracefs/event_inode.c
@@ -62,6 +62,46 @@ enum {
  
  #define EVENTFS_MODE_MASK      (EVENTFS_SAVE_MODE - 1)
  
+/*
+ * eventfs_inode reference count management.
+ *
+ * NOTE! We count only references from dentries, in the
+ * form 'dentry->d_fsdata'. There are also references from
+ * directory inodes ('ti->private'), but the dentry reference
+ * count is always a superset of the inode reference count.
+ */
+static void release_ei(struct kref *ref)
+{
+       struct eventfs_inode *ei = container_of(ref, struct eventfs_inode, kref);
+
+       WARN_ON_ONCE(!ei->is_freed);
+
+       kfree(ei->entry_attrs);
+       kfree_const(ei->name);
+       kfree_rcu(ei, rcu);
+}
+
+static inline void put_ei(struct eventfs_inode *ei)
+{
+       if (ei)
+               kref_put(&ei->kref, release_ei);
+}
+
+static inline void free_ei(struct eventfs_inode *ei)
+{
+       if (ei) {
+               ei->is_freed = 1;
+               put_ei(ei);
+       }
+}
+
+static inline struct eventfs_inode *get_ei(struct eventfs_inode *ei)
+{
+       if (ei)
+               kref_get(&ei->kref);
+       return ei;
+}
+
  static struct dentry *eventfs_root_lookup(struct inode *dir,
                                           struct dentry *dentry,
                                           unsigned int flags);
@@ -156,33 +196,30 @@ static int eventfs_set_attr(struct mnt_idmap *idmap, struct dentry *dentry,
         return ret;
  }
  
-static void update_top_events_attr(struct eventfs_inode *ei, struct dentry *dentry)
+static void update_top_events_attr(struct eventfs_inode *ei, struct super_block *sb)
  {
-       struct inode *inode;
+       struct inode *root;
  
         /* Only update if the "events" was on the top level */
         if (!ei || !(ei->attr.mode & EVENTFS_TOPLEVEL))
                 return;
  
         /* Get the tracefs root inode. */
-       inode = d_inode(dentry->d_sb->s_root);
-       ei->attr.uid = inode->i_uid;
-       ei->attr.gid = inode->i_gid;
+       root = d_inode(sb->s_root);
+       ei->attr.uid = root->i_uid;
+       ei->attr.gid = root->i_gid;
  }
  
  static void set_top_events_ownership(struct inode *inode)
  {
         struct tracefs_inode *ti = get_tracefs(inode);
         struct eventfs_inode *ei = ti->private;
-       struct dentry *dentry;
  
         /* The top events directory doesn't get automatically updated */
         if (!ei || !ei->is_events || !(ei->attr.mode & EVENTFS_TOPLEVEL))
                 return;
  
-       dentry = ei->dentry;
-
-       update_top_events_attr(ei, dentry);
+       update_top_events_attr(ei, inode->i_sb);
  
         if (!(ei->attr.mode & EVENTFS_SAVE_UID))
                 inode->i_uid = ei->attr.uid;
@@ -233,10 +270,11 @@ static struct eventfs_inode *eventfs_find_events(struct dentry *dentry)
  {
         struct eventfs_inode *ei;
  
-       mutex_lock(&eventfs_mutex);
         do {
-               /* The parent always has an ei, except for events itself */
-               ei = dentry->d_parent->d_fsdata;
+               // The parent is stable because we do not do renames
+               dentry = dentry->d_parent;
+               // ... and directories always have d_fsdata
+               ei = dentry->d_fsdata;
  
                 /*
                  * If the ei is being freed, the ownership of the children
@@ -246,12 +284,10 @@ static struct eventfs_inode *eventfs_find_events(struct dentry *dentry)
                         ei = NULL;
                         break;
                 }
-
-               dentry = ei->dentry;
+               // Walk upwards until you find the events inode
         } while (!ei->is_events);
-       mutex_unlock(&eventfs_mutex);
  
-       update_top_events_attr(ei, dentry);
+       update_top_events_attr(ei, dentry->d_sb);
  
         return ei;
  }
@@ -281,50 +317,11 @@ static void update_inode_attr(struct dentry *dentry, struct inode *inode,
                 inode->i_gid = attr->gid;
  }
  
-static void update_gid(struct eventfs_inode *ei, kgid_t gid, int level)
-{
-       struct eventfs_inode *ei_child;
-
-       /* at most we have events/system/event */
-       if (WARN_ON_ONCE(level > 3))
-               return;
-
-       ei->attr.gid = gid;
-
-       if (ei->entry_attrs) {
-               for (int i = 0; i < ei->nr_entries; i++) {
-                       ei->entry_attrs[i].gid = gid;
-               }
-       }
-
-       /*
-        * Only eventfs_inode with dentries are updated, make sure
-        * all eventfs_inodes are updated. If one of the children
-        * do not have a dentry, this function must traverse it.
-        */
-       list_for_each_entry_srcu(ei_child, &ei->children, list,
-                                srcu_read_lock_held(&eventfs_srcu)) {
-               if (!ei_child->dentry)
-                       update_gid(ei_child, gid, level + 1);
-       }
-}
-
-void eventfs_update_gid(struct dentry *dentry, kgid_t gid)
-{
-       struct eventfs_inode *ei = dentry->d_fsdata;
-       int idx;
-
-       idx = srcu_read_lock(&eventfs_srcu);
-       update_gid(ei, gid, 0);
-       srcu_read_unlock(&eventfs_srcu, idx);
-}
-
  /**
- * create_file - create a file in the tracefs filesystem
- * @name: the name of the file to create.
+ * lookup_file - look up a file in the tracefs filesystem
+ * @dentry: the dentry to look up
   * @mode: the permission that the file should have.
   * @attr: saved attributes changed by user
- * @parent: parent dentry for this file.
   * @data: something that the caller will want to get to later on.
   * @fop: struct file_operations that should be used for this file.
   *
@@ -332,30 +329,25 @@ void eventfs_update_gid(struct dentry *dentry, kgid_t gid)
   * directory. The inode.i_private pointer will point to @data in the open()
   * call.
   */
-static struct dentry *create_file(const char *name, umode_t mode,
+static struct dentry *lookup_file(struct eventfs_inode *parent_ei,
+                                 struct dentry *dentry,
+                                 umode_t mode,
                                   struct eventfs_attr *attr,
-                                 struct dentry *parent, void *data,
+                                 void *data,
                                   const struct file_operations *fop)
  {
         struct tracefs_inode *ti;
-       struct dentry *dentry;
         struct inode *inode;
  
         if (!(mode & S_IFMT))
                 mode |= S_IFREG;
  
         if (WARN_ON_ONCE(!S_ISREG(mode)))
-               return NULL;
-
-       WARN_ON_ONCE(!parent);
-       dentry = eventfs_start_creating(name, parent);
-
-       if (IS_ERR(dentry))
-               return dentry;
+               return ERR_PTR(-EIO);
  
         inode = tracefs_get_inode(dentry->d_sb);
         if (unlikely(!inode))
-               return eventfs_failed_creating(dentry);
+               return ERR_PTR(-ENOMEM);
  
         /* If the user updated the directory's attributes, use them */
         update_inode_attr(dentry, inode, attr, mode);
@@ -369,32 +361,31 @@ static struct dentry *create_file(const char *name, umode_t mode,
  
         ti = get_tracefs(inode);
         ti->flags |= TRACEFS_EVENT_INODE;
-       d_instantiate(dentry, inode);
-       fsnotify_create(dentry->d_parent->d_inode, dentry);
-       return eventfs_end_creating(dentry);
+
+       // Files have their parent's ei as their fsdata
+       dentry->d_fsdata = get_ei(parent_ei);
+
+       d_add(dentry, inode);
+       return NULL;
  };
  
  /**
- * create_dir - create a dir in the tracefs filesystem
+ * lookup_dir_entry - look up a dir in the tracefs filesystem
+ * @dentry: the directory to look up
   * @ei: the eventfs_inode that represents the directory to create
- * @parent: parent dentry for this file.
   *
- * This function will create a dentry for a directory represented by
+ * This function will look up a dentry for a directory represented by
   * a eventfs_inode.
   */
-static struct dentry *create_dir(struct eventfs_inode *ei, struct dentry *parent)
+static struct dentry *lookup_dir_entry(struct dentry *dentry,
+       struct eventfs_inode *pei, struct eventfs_inode *ei)
  {
         struct tracefs_inode *ti;
-       struct dentry *dentry;
         struct inode *inode;
  
-       dentry = eventfs_start_creating(ei->name, parent);
-       if (IS_ERR(dentry))
-               return dentry;
-
         inode = tracefs_get_inode(dentry->d_sb);
         if (unlikely(!inode))
-               return eventfs_failed_creating(dentry);
+               return ERR_PTR(-ENOMEM);
  
         /* If the user updated the directory's attributes, use them */
         update_inode_attr(dentry, inode, &ei->attr,
@@ -408,64 +399,46 @@ static struct dentry *create_dir(struct eventfs_inode *ei, struct dentry *parent
  
         ti = get_tracefs(inode);
         ti->flags |= TRACEFS_EVENT_INODE;
+       /* Only directories have ti->private set to an ei, not files */
+       ti->private = ei;
  
-       inc_nlink(inode);
-       d_instantiate(dentry, inode);
-       inc_nlink(dentry->d_parent->d_inode);
-       fsnotify_mkdir(dentry->d_parent->d_inode, dentry);
-       return eventfs_end_creating(dentry);
+       dentry->d_fsdata = get_ei(ei);
+
+       d_add(dentry, inode);
+       return NULL;
  }
  
-static void free_ei(struct eventfs_inode *ei)
+static inline struct eventfs_inode *alloc_ei(const char *name)
  {
-       kfree_const(ei->name);
-       kfree(ei->d_children);
-       kfree(ei->entry_attrs);
-       kfree(ei);
+       struct eventfs_inode *ei = kzalloc(sizeof(*ei), GFP_KERNEL);
+
+       if (!ei)
+               return NULL;
+
+       ei->name = kstrdup_const(name, GFP_KERNEL);
+       if (!ei->name) {
+               kfree(ei);
+               return NULL;
+       }
+       kref_init(&ei->kref);
+       return ei;
  }
  
  /**
- * eventfs_set_ei_status_free - remove the dentry reference from an eventfs_inode
- * @ti: the tracefs_inode of the dentry
+ * eventfs_d_release - dentry is going away
   * @dentry: dentry which has the reference to remove.
   *
   * Remove the association between a dentry from an eventfs_inode.
   */
-void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry *dentry)
+void eventfs_d_release(struct dentry *dentry)
  {
-       struct eventfs_inode *ei;
-       int i;
-
-       mutex_lock(&eventfs_mutex);
-
-       ei = dentry->d_fsdata;
-       if (!ei)
-               goto out;
-
-       /* This could belong to one of the files of the ei */
-       if (ei->dentry != dentry) {
-               for (i = 0; i < ei->nr_entries; i++) {
-                       if (ei->d_children[i] == dentry)
-                               break;
-               }
-               if (WARN_ON_ONCE(i == ei->nr_entries))
-                       goto out;
-               ei->d_children[i] = NULL;
-       } else if (ei->is_freed) {
-               free_ei(ei);
-       } else {
-               ei->dentry = NULL;
-       }
-
-       dentry->d_fsdata = NULL;
- out:
-       mutex_unlock(&eventfs_mutex);
+       put_ei(dentry->d_fsdata);
  }
  
  /**
- * create_file_dentry - create a dentry for a file of an eventfs_inode
+ * lookup_file_dentry - create a dentry for a file of an eventfs_inode
   * @ei: the eventfs_inode that the file will be created under
- * @idx: the index into the d_children[] of the @ei
+ * @idx: the index into the entry_attrs[] of the @ei
   * @parent: The parent dentry of the created file.
   * @name: The name of the file to create
   * @mode: The mode of the file.
@@ -476,163 +449,17 @@ void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry *dentry)
   * address located at @e_dentry.
   */
  static struct dentry *
-create_file_dentry(struct eventfs_inode *ei, int idx,
-                  struct dentry *parent, const char *name, umode_t mode, void *data,
+lookup_file_dentry(struct dentry *dentry,
+                  struct eventfs_inode *ei, int idx,
+                  umode_t mode, void *data,
                    const struct file_operations *fops)
  {
         struct eventfs_attr *attr = NULL;
-       struct dentry **e_dentry = &ei->d_children[idx];
-       struct dentry *dentry;
-
-       WARN_ON_ONCE(!inode_is_locked(parent->d_inode));
  
-       mutex_lock(&eventfs_mutex);
-       if (ei->is_freed) {
-               mutex_unlock(&eventfs_mutex);
-               return NULL;
-       }
-       /* If the e_dentry already has a dentry, use it */
-       if (*e_dentry) {
-               dget(*e_dentry);
-               mutex_unlock(&eventfs_mutex);
-               return *e_dentry;
-       }
-
-       /* ei->entry_attrs are protected by SRCU */
         if (ei->entry_attrs)
                 attr = &ei->entry_attrs[idx];
  
-       mutex_unlock(&eventfs_mutex);
-
-       dentry = create_file(name, mode, attr, parent, data, fops);
-
-       mutex_lock(&eventfs_mutex);
-
-       if (IS_ERR_OR_NULL(dentry)) {
-               /*
-                * When the mutex was released, something else could have
-                * created the dentry for this e_dentry. In which case
-                * use that one.
-                *
-                * If ei->is_freed is set, the e_dentry is currently on its
-                * way to being freed, don't return it. If e_dentry is NULL
-                * it means it was already freed.
-                */
-               if (ei->is_freed) {
-                       dentry = NULL;
-               } else {
-                       dentry = *e_dentry;
-                       dget(dentry);
-               }
-               mutex_unlock(&eventfs_mutex);
-               return dentry;
-       }
-
-       if (!*e_dentry && !ei->is_freed) {
-               *e_dentry = dentry;
-               dentry->d_fsdata = ei;
-       } else {
-               /*
-                * Should never happen unless we get here due to being freed.
-                * Otherwise it means two dentries exist with the same name.
-                */
-               WARN_ON_ONCE(!ei->is_freed);
-               dentry = NULL;
-       }
-       mutex_unlock(&eventfs_mutex);
-
-       return dentry;
-}
-
-/**
- * eventfs_post_create_dir - post create dir routine
- * @ei: eventfs_inode of recently created dir
- *
- * Map the meta-data of files within an eventfs dir to their parent dentry
- */
-static void eventfs_post_create_dir(struct eventfs_inode *ei)
-{
-       struct eventfs_inode *ei_child;
-       struct tracefs_inode *ti;
-
-       lockdep_assert_held(&eventfs_mutex);
-
-       /* srcu lock already held */
-       /* fill parent-child relation */
-       list_for_each_entry_srcu(ei_child, &ei->children, list,
-                                srcu_read_lock_held(&eventfs_srcu)) {
-               ei_child->d_parent = ei->dentry;
-       }
-
-       ti = get_tracefs(ei->dentry->d_inode);
-       ti->private = ei;
-}
-
-/**
- * create_dir_dentry - Create a directory dentry for the eventfs_inode
- * @pei: The eventfs_inode parent of ei.
- * @ei: The eventfs_inode to create the directory for
- * @parent: The dentry of the parent of this directory
- *
- * This creates and attaches a directory dentry to the eventfs_inode @ei.
- */
-static struct dentry *
-create_dir_dentry(struct eventfs_inode *pei, struct eventfs_inode *ei,
-                 struct dentry *parent)
-{
-       struct dentry *dentry = NULL;
-
-       WARN_ON_ONCE(!inode_is_locked(parent->d_inode));
-
-       mutex_lock(&eventfs_mutex);
-       if (pei->is_freed || ei->is_freed) {
-               mutex_unlock(&eventfs_mutex);
-               return NULL;
-       }
-       if (ei->dentry) {
-               /* If the eventfs_inode already has a dentry, use it */
-               dentry = ei->dentry;
-               dget(dentry);
-               mutex_unlock(&eventfs_mutex);
-               return dentry;
-       }
-       mutex_unlock(&eventfs_mutex);
-
-       dentry = create_dir(ei, parent);
-
-       mutex_lock(&eventfs_mutex);
-
-       if (IS_ERR_OR_NULL(dentry) && !ei->is_freed) {
-               /*
-                * When the mutex was released, something else could have
-                * created the dentry for this e_dentry. In which case
-                * use that one.
-                *
-                * If ei->is_freed is set, the e_dentry is currently on its
-                * way to being freed.
-                */
-               dentry = ei->dentry;
-               if (dentry)
-                       dget(dentry);
-               mutex_unlock(&eventfs_mutex);
-               return dentry;
-       }
-
-       if (!ei->dentry && !ei->is_freed) {
-               ei->dentry = dentry;
-               eventfs_post_create_dir(ei);
-               dentry->d_fsdata = ei;
-       } else {
-               /*
-                * Should never happen unless we get here due to being freed.
-                * Otherwise it means two dentries exist with the same name.
-                */
-               WARN_ON_ONCE(!ei->is_freed);
-               dentry = NULL;
-       }
-       mutex_unlock(&eventfs_mutex);
-
-       return dentry;
+       return lookup_file(ei, dentry, mode, attr, data, fops);
  }
  
  /**
@@ -649,79 +476,50 @@ static struct dentry *eventfs_root_lookup(struct inode *dir,
                                           struct dentry *dentry,
                                           unsigned int flags)
  {
-       const struct file_operations *fops;
-       const struct eventfs_entry *entry;
         struct eventfs_inode *ei_child;
         struct tracefs_inode *ti;
         struct eventfs_inode *ei;
-       struct dentry *ei_dentry = NULL;
-       struct dentry *ret = NULL;
-       struct dentry *d;
         const char *name = dentry->d_name.name;
-       umode_t mode;
-       void *data;
-       int idx;
-       int i;
-       int r;
+       struct dentry *result = NULL;
  
         ti = get_tracefs(dir);
         if (!(ti->flags & TRACEFS_EVENT_INODE))
-               return NULL;
-
-       /* Grab srcu to prevent the ei from going away */
-       idx = srcu_read_lock(&eventfs_srcu);
+               return ERR_PTR(-EIO);
  
-       /*
-        * Grab the eventfs_mutex to consistent value from ti->private.
-        * This s
-        */
         mutex_lock(&eventfs_mutex);
-       ei = READ_ONCE(ti->private);
-       if (ei && !ei->is_freed)
-               ei_dentry = READ_ONCE(ei->dentry);
-       mutex_unlock(&eventfs_mutex);
  
-       if (!ei || !ei_dentry)
+       ei = ti->private;
+       if (!ei || ei->is_freed)
                 goto out;
  
-       data = ei->data;
-
-       list_for_each_entry_srcu(ei_child, &ei->children, list,
-                                srcu_read_lock_held(&eventfs_srcu)) {
+       list_for_each_entry(ei_child, &ei->children, list) {
                 if (strcmp(ei_child->name, name) != 0)
                         continue;
-               ret = simple_lookup(dir, dentry, flags);
-               if (IS_ERR(ret))
+               if (ei_child->is_freed)
                         goto out;
-               d = create_dir_dentry(ei, ei_child, ei_dentry);
-               dput(d);
+               result = lookup_dir_entry(dentry, ei, ei_child);
                 goto out;
         }
  
-       for (i = 0; i < ei->nr_entries; i++) {
-               entry = &ei->entries[i];
-               if (strcmp(name, entry->name) == 0) {
-                       void *cdata = data;
-                       mutex_lock(&eventfs_mutex);
-                       /* If ei->is_freed, then the event itself may be too */
-                       if (!ei->is_freed)
-                               r = entry->callback(name, &mode, &cdata, &fops);
-                       else
-                               r = -1;
-                       mutex_unlock(&eventfs_mutex);
-                       if (r <= 0)
-                               continue;
-                       ret = simple_lookup(dir, dentry, flags);
-                       if (IS_ERR(ret))
-                               goto out;
-                       d = create_file_dentry(ei, i, ei_dentry, name, mode, cdata, fops);
-                       dput(d);
-                       break;
-               }
+       for (int i = 0; i < ei->nr_entries; i++) {
+               void *data;
+               umode_t mode;
+               const struct file_operations *fops;
+               const struct eventfs_entry *entry = &ei->entries[i];
+
+               if (strcmp(name, entry->name) != 0)
+                       continue;
+
+               data = ei->data;
+               if (entry->callback(name, &mode, &data, &fops) <= 0)
+                       goto out;
+
+               result = lookup_file_dentry(dentry, ei, i, mode, data, fops);
+               goto out;
         }
   out:
-       srcu_read_unlock(&eventfs_srcu, idx);
-       return ret;
+       mutex_unlock(&eventfs_mutex);
+       return result;
  }
  
  /*
@@ -871,25 +669,10 @@ struct eventfs_inode *eventfs_create_dir(const char *name, struct eventfs_inode
         if (!parent)
                 return ERR_PTR(-EINVAL);
  
-       ei = kzalloc(sizeof(*ei), GFP_KERNEL);
+       ei = alloc_ei(name);
         if (!ei)
                 return ERR_PTR(-ENOMEM);
  
-       ei->name = kstrdup_const(name, GFP_KERNEL);
-       if (!ei->name) {
-               kfree(ei);
-               return ERR_PTR(-ENOMEM);
-       }
-
-       if (size) {
-               ei->d_children = kcalloc(size, sizeof(*ei->d_children), GFP_KERNEL);
-               if (!ei->d_children) {
-                       kfree_const(ei->name);
-                       kfree(ei);
-                       return ERR_PTR(-ENOMEM);
-               }
-       }
-
         ei->entries = entries;
         ei->nr_entries = size;
         ei->data = data;
@@ -897,10 +680,8 @@ struct eventfs_inode *eventfs_create_dir(const char *name, struct eventfs_inode
         INIT_LIST_HEAD(&ei->list);
  
         mutex_lock(&eventfs_mutex);
-       if (!parent->is_freed) {
+       if (!parent->is_freed)
                 list_add_tail(&ei->list, &parent->children);
-               ei->d_parent = parent->dentry;
-       }
         mutex_unlock(&eventfs_mutex);
  
         /* Was the parent freed? */
@@ -940,28 +721,20 @@ struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry
         if (IS_ERR(dentry))
                 return ERR_CAST(dentry);
  
-       ei = kzalloc(sizeof(*ei), GFP_KERNEL);
+       ei = alloc_ei(name);
         if (!ei)
-               goto fail_ei;
+               goto fail;
  
         inode = tracefs_get_inode(dentry->d_sb);
         if (unlikely(!inode))
                 goto fail;
  
-       if (size) {
-               ei->d_children = kcalloc(size, sizeof(*ei->d_children), GFP_KERNEL);
-               if (!ei->d_children)
-                       goto fail;
-       }
-
-       ei->dentry = dentry;
+       // Note: we have a ref to the dentry from tracefs_start_creating()
+       ei->events_dir = dentry;
         ei->entries = entries;
         ei->nr_entries = size;
         ei->is_events = 1;
         ei->data = data;
-       ei->name = kstrdup_const(name, GFP_KERNEL);
-       if (!ei->name)
-               goto fail;
  
         /* Save the ownership of this directory */
         uid = d_inode(dentry->d_parent)->i_uid;
@@ -992,11 +765,19 @@ struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry
         inode->i_op = &eventfs_root_dir_inode_operations;
         inode->i_fop = &eventfs_file_operations;
  
-       dentry->d_fsdata = ei;
+       dentry->d_fsdata = get_ei(ei);
  
-       /* directory inodes start off with i_nlink == 2 (for "." entry) */
-       inc_nlink(inode);
+       /*
+        * Keep all eventfs directories with i_nlink == 1.
+        * Due to the dynamic nature of the dentry creations and not
+        * wanting to add a pointer to the parent eventfs_inode in the
+        * eventfs_inode structure, keeping the i_nlink in sync with the
+        * number of directories would cause too much complexity for
+        * something not worth much. Keeping directory links at 1
+        * tells userspace not to trust the link number.
+        */
         d_instantiate(dentry, inode);
+       /* The dentry of the "events" parent does keep track though */
         inc_nlink(dentry->d_parent->d_inode);
         fsnotify_mkdir(dentry->d_parent->d_inode, dentry);
         tracefs_end_creating(dentry);
@@ -1004,72 +785,11 @@ struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry
         return ei;
  
   fail:
-       kfree(ei->d_children);
-       kfree(ei);
- fail_ei:
+       free_ei(ei);
         tracefs_failed_creating(dentry);
         return ERR_PTR(-ENOMEM);
  }
  
-static LLIST_HEAD(free_list);
-
-static void eventfs_workfn(struct work_struct *work)
-{
-        struct eventfs_inode *ei, *tmp;
-        struct llist_node *llnode;
-
-       llnode = llist_del_all(&free_list);
-        llist_for_each_entry_safe(ei, tmp, llnode, llist) {
-               /* This dput() matches the dget() from unhook_dentry() */
-               for (int i = 0; i < ei->nr_entries; i++) {
-                       if (ei->d_children[i])
-                               dput(ei->d_children[i]);
-               }
-               /* This should only get here if it had a dentry */
-               if (!WARN_ON_ONCE(!ei->dentry))
-                       dput(ei->dentry);
-        }
-}
-
-static DECLARE_WORK(eventfs_work, eventfs_workfn);
-
-static void free_rcu_ei(struct rcu_head *head)
-{
-       struct eventfs_inode *ei = container_of(head, struct eventfs_inode, rcu);
-
-       if (ei->dentry) {
-               /* Do not free the ei until all references of dentry are gone */
-               if (llist_add(&ei->llist, &free_list))
-                       queue_work(system_unbound_wq, &eventfs_work);
-               return;
-       }
-
-       /* If the ei doesn't have a dentry, neither should its children */
-       for (int i = 0; i < ei->nr_entries; i++) {
-               WARN_ON_ONCE(ei->d_children[i]);
-       }
-
-       free_ei(ei);
-}
-
-static void unhook_dentry(struct dentry *dentry)
-{
-       if (!dentry)
-               return;
-       /*
-        * Need to add a reference to the dentry that is expected by
-        * simple_recursive_removal(), which will include a dput().
-        */
-       dget(dentry);
-
-       /*
-        * Also add a reference for the dput() in eventfs_workfn().
-        * That is required as that dput() will free the ei after
-        * the SRCU grace period is over.
-        */
-       dget(dentry);
-}
-
  /**
   * eventfs_remove_rec - remove eventfs dir or file from list
   * @ei: eventfs_inode to be removed.
@@ -1082,8 +802,6 @@ static void eventfs_remove_rec(struct eventfs_inode *ei, int level)
  {
         struct eventfs_inode *ei_child;
  
-       if (!ei)
-               return;
         /*
          * Check recursion depth. It should never be greater than 3:
          * 0 - events/
@@ -1095,28 +813,11 @@ static void eventfs_remove_rec(struct eventfs_inode *ei, int level)
                 return;
  
         /* search for nested folders or files */
-       list_for_each_entry_srcu(ei_child, &ei->children, list,
-                                lockdep_is_held(&eventfs_mutex)) {
-               /* Children only have dentry if parent does */
-               WARN_ON_ONCE(ei_child->dentry && !ei->dentry);
+       list_for_each_entry(ei_child, &ei->children, list)
                 eventfs_remove_rec(ei_child, level + 1);
-       }
-
-
-       ei->is_freed = 1;
  
-       for (int i = 0; i < ei->nr_entries; i++) {
-               if (ei->d_children[i]) {
-                       /* Children only have dentry if parent does */
-                       WARN_ON_ONCE(!ei->dentry);
-                       unhook_dentry(ei->d_children[i]);
-               }
-       }
-
-       unhook_dentry(ei->dentry);
-
-       list_del_rcu(&ei->list);
-       call_srcu(&eventfs_srcu, &ei->rcu, free_rcu_ei);
+       list_del(&ei->list);
+       free_ei(ei);
  }
  
  /**
@@ -1127,22 +828,12 @@ static void eventfs_remove_rec(struct eventfs_inode *ei, int level)
   */
  void eventfs_remove_dir(struct eventfs_inode *ei)
  {
-       struct dentry *dentry;
-
         if (!ei)
                 return;
  
         mutex_lock(&eventfs_mutex);
-       dentry = ei->dentry;
         eventfs_remove_rec(ei, 0);
         mutex_unlock(&eventfs_mutex);
-
-       /*
-        * If any of the ei children has a dentry, then the ei itself
-        * must have a dentry.
-        */
-       if (dentry)
-               simple_recursive_removal(dentry, NULL);
  }
  
  /**
@@ -1155,7 +846,11 @@ void eventfs_remove_events_dir(struct eventfs_inode *ei)
  {
         struct dentry *dentry;
  
-       dentry = ei->dentry;
+       dentry = ei->events_dir;
+       if (!dentry)
+               return;
+
+       ei->events_dir = NULL;
         eventfs_remove_dir(ei);
  
         /*
@@ -1165,5 +860,6 @@ void eventfs_remove_events_dir(struct eventfs_inode *ei)
          * sticks around while the other ei->dentry are created
          * and destroyed dynamically.
          */
+       d_invalidate(dentry);
         dput(dentry);
  }
diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c

index e1b172c0e091a8d55fcc80951fa4ed5202b1539e..d65ffad4c327ca11a98a8d2073d8e5c77ac138c3 100644 (file)
--- a/fs/tracefs/inode.c
+++ b/fs/tracefs/inode.c
@@ -38,8 +38,6 @@ static struct inode *tracefs_alloc_inode(struct super_block *sb)
         if (!ti)
                 return NULL;
  
-       ti->flags = 0;
-
         return &ti->vfs_inode;
  }
  
@@ -379,21 +377,30 @@ static const struct super_operations tracefs_super_operations = {
         .show_options   = tracefs_show_options,
  };
  
-static void tracefs_dentry_iput(struct dentry *dentry, struct inode *inode)
+/*
+ * It would be cleaner if eventfs had its own dentry ops.
+ *
+ * Note that d_revalidate is called potentially under RCU,
+ * so it can't take the eventfs mutex etc. It's fine - if
+ * we open a file just as it's marked dead, things will
+ * still work just fine, and just see the old stale case.
+ */
+static void tracefs_d_release(struct dentry *dentry)
  {
-       struct tracefs_inode *ti;
+       if (dentry->d_fsdata)
+               eventfs_d_release(dentry);
+}
  
-       if (!dentry || !inode)
-               return;
+static int tracefs_d_revalidate(struct dentry *dentry, unsigned int flags)
+{
+       struct eventfs_inode *ei = dentry->d_fsdata;
  
-       ti = get_tracefs(inode);
-       if (ti && ti->flags & TRACEFS_EVENT_INODE)
-               eventfs_set_ei_status_free(ti, dentry);
-       iput(inode);
+       return !(ei && ei->is_freed);
  }
  
  static const struct dentry_operations tracefs_dentry_operations = {
-       .d_iput = tracefs_dentry_iput,
+       .d_revalidate = tracefs_d_revalidate,
+       .d_release = tracefs_d_release,
  };
  
  static int trace_fill_super(struct super_block *sb, void *data, int silent)
@@ -497,75 +504,6 @@ struct dentry *tracefs_end_creating(struct dentry *dentry)
         return dentry;
  }
  
-/**
- * eventfs_start_creating - start the process of creating a dentry
- * @name: Name of the file created for the dentry
- * @parent: The parent dentry where this dentry will be created
- *
- * This is a simple helper function for the dynamically created eventfs
- * files. When the directory of the eventfs files are accessed, their
- * dentries are created on the fly. This function is used to start that
- * process.
- */
-struct dentry *eventfs_start_creating(const char *name, struct dentry *parent)
-{
-       struct dentry *dentry;
-       int error;
-
-       /* Must always have a parent. */
-       if (WARN_ON_ONCE(!parent))
-               return ERR_PTR(-EINVAL);
-
-       error = simple_pin_fs(&trace_fs_type, &tracefs_mount,
-                             &tracefs_mount_count);
-       if (error)
-               return ERR_PTR(error);
-
-       if (unlikely(IS_DEADDIR(parent->d_inode)))
-               dentry = ERR_PTR(-ENOENT);
-       else
-               dentry = lookup_one_len(name, parent, strlen(name));
-
-       if (!IS_ERR(dentry) && dentry->d_inode) {
-               dput(dentry);
-               dentry = ERR_PTR(-EEXIST);
-       }
-
-       if (IS_ERR(dentry))
-               simple_release_fs(&tracefs_mount, &tracefs_mount_count);
-
-       return dentry;
-}
-
-/**
- * eventfs_failed_creating - clean up a failed eventfs dentry creation
- * @dentry: The dentry to clean up
- *
- * If after calling eventfs_start_creating(), a failure is detected, the
- * resources created by eventfs_start_creating() needs to be cleaned up. In
- * that case, this function should be called to perform that clean up.
- */
-struct dentry *eventfs_failed_creating(struct dentry *dentry)
-{
-       dput(dentry);
-       simple_release_fs(&tracefs_mount, &tracefs_mount_count);
-       return NULL;
-}
-
-/**
- * eventfs_end_creating - Finish the process of creating a eventfs dentry
- * @dentry: The dentry that has successfully been created.
- *
- * This function is currently just a place holder to match
- * eventfs_start_creating(). In case any synchronization needs to be added,
- * this function will be used to implement that without having to modify
- * the callers of eventfs_start_creating().
- */
-struct dentry *eventfs_end_creating(struct dentry *dentry)
-{
-       return dentry;
-}
-
  /* Find the inode that this will use for default */
  static struct inode *instance_inode(struct dentry *parent, struct inode *inode)
  {
@@ -779,7 +717,11 @@ static void init_once(void *foo)
  {
         struct tracefs_inode *ti = (struct tracefs_inode *) foo;
  
+       /* inode_init_once() calls memset() on the vfs_inode portion */
         inode_init_once(&ti->vfs_inode);
+
+       /* Zero out the rest */
+       memset_after(ti, 0, vfs_inode);
  }
  
  static int __init tracefs_init(void)
diff --git a/fs/tracefs/internal.h b/fs/tracefs/internal.h

index 45397df9bb65bffb783329c156e03a2fdb01ebea..beb3dcd0e434207c882bcfe6ec027b6ca3e43526 100644 (file)
--- a/fs/tracefs/internal.h
+++ b/fs/tracefs/internal.h
@@ -11,9 +11,10 @@ enum {
  };
  
  struct tracefs_inode {
+       struct inode            vfs_inode;
+       /* The below gets initialized with memset_after(ti, 0, vfs_inode) */
         unsigned long           flags;
         void                    *private;
-       struct inode            vfs_inode;
  };
  
  /*
@@ -31,43 +32,37 @@ struct eventfs_attr {
  /*
   * struct eventfs_inode - hold the properties of the eventfs directories.
   * @list:      link list into the parent directory
+ * @rcu:       Union with @list for freeing
+ * @children:  link list into the child eventfs_inode
   * @entries:   the array of entries representing the files in the directory
   * @name:      the name of the directory to create
- * @children:  link list into the child eventfs_inode
- * @dentry:     the dentry of the directory
- * @d_parent:   pointer to the parent's dentry
- * @d_children: The array of dentries to represent the files when created
+ * @events_dir: the dentry of the events directory
   * @entry_attrs: Saved mode and ownership of the @d_children
- * @attr:      Saved mode and ownership of eventfs_inode itself
   * @data:      The private data to pass to the callbacks
+ * @attr:      Saved mode and ownership of eventfs_inode itself
   * @is_freed:  Flag set if the eventfs is on its way to be freed
   *                Note if is_freed is set, then dentry is corrupted.
+ * @is_events: Flag set for only the top level "events" directory
   * @nr_entries: The number of items in @entries
+ * @ino:       The saved inode number
   */
  struct eventfs_inode {
-       struct list_head                list;
+       union {
+               struct list_head        list;
+               struct rcu_head         rcu;
+       };
+       struct list_head                children;
         const struct eventfs_entry      *entries;
         const char                      *name;
-       struct list_head                children;
-       struct dentry                   *dentry; /* Check is_freed to access */
-       struct dentry                   *d_parent;
-       struct dentry                   **d_children;
+       struct dentry                   *events_dir;
         struct eventfs_attr             *entry_attrs;
-       struct eventfs_attr             attr;
         void                            *data;
+       struct eventfs_attr             attr;
+       struct kref                     kref;
         unsigned int                    is_freed:1;
         unsigned int                    is_events:1;
         unsigned int                    nr_entries:30;
         unsigned int                    ino;
-       /*
-        * Union - used for deletion
-        * @llist:      for calling dput() if needed after RCU
-        * @rcu:        eventfs_inode to delete in RCU
-        */
-       union {
-               struct llist_node       llist;
-               struct rcu_head         rcu;
-       };
  };
  
  static inline struct tracefs_inode *get_tracefs(const struct inode *inode)
@@ -79,10 +74,7 @@ struct dentry *tracefs_start_creating(const char *name, struct dentry *parent);
  struct dentry *tracefs_end_creating(struct dentry *dentry);
  struct dentry *tracefs_failed_creating(struct dentry *dentry);
  struct inode *tracefs_get_inode(struct super_block *sb);
-struct dentry *eventfs_start_creating(const char *name, struct dentry *parent);
-struct dentry *eventfs_failed_creating(struct dentry *dentry);
-struct dentry *eventfs_end_creating(struct dentry *dentry);
-void eventfs_update_gid(struct dentry *dentry, kgid_t gid);
-void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry *dentry);
+
+void eventfs_d_release(struct dentry *dentry);
  
  #endif /* _TRACEFS_INTERNAL_H */
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c

index 9976a00a73f99c46fc27bf6a8e93a390af7b941e..e965a48e7db96f89b782038e0aa363d526c2a42e 100644 (file)
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -421,10 +421,10 @@ xfs_attr_complete_op(
         bool                    do_replace = args->op_flags & XFS_DA_OP_REPLACE;
  
         args->op_flags &= ~XFS_DA_OP_REPLACE;
-       if (do_replace) {
-               args->attr_filter &= ~XFS_ATTR_INCOMPLETE;
+       args->attr_filter &= ~XFS_ATTR_INCOMPLETE;
+       if (do_replace)
                 return replace_state;
-       }
+
         return XFS_DAS_DONE;
  }
  
diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c

index 31100120b2c586bbcfb5ee3d1d413926400e215a..e31663cb7b4349e173c2b19ac33eb6b10cd59a33 100644 (file)
--- a/fs/xfs/libxfs/xfs_rtbitmap.c
+++ b/fs/xfs/libxfs/xfs_rtbitmap.c
@@ -1118,20 +1118,6 @@ xfs_rtbitmap_blockcount(
         return howmany_64(rtextents, NBBY * mp->m_sb.sb_blocksize);
  }
  
-/*
- * Compute the maximum level number of the realtime summary file, as defined by
- * mkfs.  The historic use of highbit32 on a 64-bit quantity prohibited correct
- * use of rt volumes with more than 2^32 extents.
- */
-uint8_t
-xfs_compute_rextslog(
-       xfs_rtbxlen_t           rtextents)
-{
-       if (!rtextents)
-               return 0;
-       return xfs_highbit64(rtextents);
-}
-
  /*
   * Compute the number of rtbitmap words needed to populate every block of a
   * bitmap that is large enough to track the given number of rt extents.
diff --git a/fs/xfs/libxfs/xfs_rtbitmap.h b/fs/xfs/libxfs/xfs_rtbitmap.h

index 274dc7dae1faf836217bcac95a859fb9cf510d93..152a66750af554d91a0641ae9a3ed1011ffb7386 100644 (file)
--- a/fs/xfs/libxfs/xfs_rtbitmap.h
+++ b/fs/xfs/libxfs/xfs_rtbitmap.h
@@ -351,20 +351,6 @@ xfs_rtfree_extent(
  int xfs_rtfree_blocks(struct xfs_trans *tp, xfs_fsblock_t rtbno,
                 xfs_filblks_t rtlen);
  
-uint8_t xfs_compute_rextslog(xfs_rtbxlen_t rtextents);
-
-/* Do we support an rt volume having this number of rtextents? */
-static inline bool
-xfs_validate_rtextents(
-       xfs_rtbxlen_t           rtextents)
-{
-       /* No runt rt volumes */
-       if (rtextents == 0)
-               return false;
-
-       return true;
-}
-
  xfs_filblks_t xfs_rtbitmap_blockcount(struct xfs_mount *mp, xfs_rtbxlen_t
                 rtextents);
  unsigned long long xfs_rtbitmap_wordcount(struct xfs_mount *mp,
@@ -383,8 +369,6 @@ unsigned long long xfs_rtsummary_wordcount(struct xfs_mount *mp,
  # define xfs_rtsummary_read_buf(a,b)                   (-ENOSYS)
  # define xfs_rtbuf_cache_relse(a)                      (0)
  # define xfs_rtalloc_extent_is_free(m,t,s,l,i)         (-ENOSYS)
-# define xfs_compute_rextslog(rtx)                     (0)
-# define xfs_validate_rtextents(rtx)                   (false)
  static inline xfs_filblks_t
  xfs_rtbitmap_blockcount(struct xfs_mount *mp, xfs_rtbxlen_t rtextents)
  {
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c

index 4a9e8588f4c98c3647a85682d56fe26620a59ffc..5bb6e2bd6deeed152414cbc8fae5db927f90bdd4 100644 (file)
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -1377,3 +1377,17 @@ xfs_validate_stripe_geometry(
         }
         return true;
  }
+
+/*
+ * Compute the maximum level number of the realtime summary file, as defined by
+ * mkfs.  The historic use of highbit32 on a 64-bit quantity prohibited correct
+ * use of rt volumes with more than 2^32 extents.
+ */
+uint8_t
+xfs_compute_rextslog(
+       xfs_rtbxlen_t           rtextents)
+{
+       if (!rtextents)
+               return 0;
+       return xfs_highbit64(rtextents);
+}
diff --git a/fs/xfs/libxfs/xfs_sb.h b/fs/xfs/libxfs/xfs_sb.h

index 19134b23c10be3824de6a7949d6ccf9ebdfa8de0..2e8e8d63d4eb2249d148b8f6d50f2a71726911f5 100644 (file)
--- a/fs/xfs/libxfs/xfs_sb.h
+++ b/fs/xfs/libxfs/xfs_sb.h
@@ -38,4 +38,6 @@ extern int    xfs_sb_get_secondary(struct xfs_mount *mp,
  extern bool    xfs_validate_stripe_geometry(struct xfs_mount *mp,
                 __s64 sunit, __s64 swidth, int sectorsize, bool silent);
  
+uint8_t xfs_compute_rextslog(xfs_rtbxlen_t rtextents);
+
  #endif /* __XFS_SB_H__ */
diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h

index 20b5375f2d9c9ec466ab2cfc0a6482d20e23965b..62e02d5380ad3b47d6dc403a3b1ffba0d202ce43 100644 (file)
--- a/fs/xfs/libxfs/xfs_types.h
+++ b/fs/xfs/libxfs/xfs_types.h
@@ -251,4 +251,16 @@ bool xfs_verify_fileoff(struct xfs_mount *mp, xfs_fileoff_t off);
  bool xfs_verify_fileext(struct xfs_mount *mp, xfs_fileoff_t off,
                 xfs_fileoff_t len);
  
+/* Do we support an rt volume having this number of rtextents? */
+static inline bool
+xfs_validate_rtextents(
+       xfs_rtbxlen_t           rtextents)
+{
+       /* No runt rt volumes */
+       if (rtextents == 0)
+               return false;
+
+       return true;
+}
+
  #endif /* __XFS_TYPES_H__ */
diff --git a/fs/xfs/scrub/rtbitmap.c b/fs/xfs/scrub/rtbitmap.c

index 441ca99776527453a19b2e709f2482a758fe5af0..46583517377ffadd57e557897bc050428e84bd4c 100644 (file)
--- a/fs/xfs/scrub/rtbitmap.c
+++ b/fs/xfs/scrub/rtbitmap.c
@@ -15,6 +15,7 @@
  #include "xfs_inode.h"
  #include "xfs_bmap.h"
  #include "xfs_bit.h"
+#include "xfs_sb.h"
  #include "scrub/scrub.h"
  #include "scrub/common.h"
  #include "scrub/repair.h"
diff --git a/fs/xfs/scrub/rtsummary.c b/fs/xfs/scrub/rtsummary.c

index fabd0ed9dfa67637686768dff5d27ef0ee78674d..b1ff4f33324a7481ae9818173187e5a767348c76 100644 (file)
--- a/fs/xfs/scrub/rtsummary.c
+++ b/fs/xfs/scrub/rtsummary.c
@@ -16,6 +16,7 @@
  #include "xfs_rtbitmap.h"
  #include "xfs_bit.h"
  #include "xfs_bmap.h"
+#include "xfs_sb.h"
  #include "scrub/scrub.h"
  #include "scrub/common.h"
  #include "scrub/trace.h"
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c

index 5a2512d20bd07473a872592911ede7246b8c11b7..98401de832eeab2c0ca66db4f9d8f22041dc4e28 100644 (file)
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -350,7 +350,6 @@ xfs_setup_dax_always(
                 return -EINVAL;
         }
  
-       xfs_warn(mp, "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
         return 0;
  
  disable_dax:
diff --git a/fs/zonefs/file.c b/fs/zonefs/file.c

index 6ab2318a9c8e80271a3f17d2ad852dacad2f2fa1..dba5dcb62bef5d3788c21fbffa11ebfbe4988b0c 100644 (file)
--- a/fs/zonefs/file.c
+++ b/fs/zonefs/file.c
@@ -348,7 +348,12 @@ static int zonefs_file_write_dio_end_io(struct kiocb *iocb, ssize_t size,
         struct zonefs_inode_info *zi = ZONEFS_I(inode);
  
         if (error) {
-               zonefs_io_error(inode, true);
+               /*
+                * For Sync IOs, error recovery is called from
+                * zonefs_file_dio_write().
+                */
+               if (!is_sync_kiocb(iocb))
+                       zonefs_io_error(inode, true);
                 return error;
         }
  
@@ -491,6 +496,14 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
                         ret = -EINVAL;
                         goto inode_unlock;
                 }
+               /*
+                * Advance the zone write pointer offset. This assumes that the
+                * IO will succeed, which is OK to do because we do not allow
+                * partial writes (IOMAP_DIO_PARTIAL is not set) and if the IO
+                * fails, the error path will correct the write pointer offset.
+                */
+               z->z_wpoffset += count;
+               zonefs_inode_account_active(inode);
                 mutex_unlock(&zi->i_truncate_mutex);
         }
  
@@ -504,20 +517,19 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
         if (ret == -ENOTBLK)
                 ret = -EBUSY;
  
-       if (zonefs_zone_is_seq(z) &&
-           (ret > 0 || ret == -EIOCBQUEUED)) {
-               if (ret > 0)
-                       count = ret;
-
-               /*
-                * Update the zone write pointer offset assuming the write
-                * operation succeeded. If it did not, the error recovery path
-                * will correct it. Also do active seq file accounting.
-                */
-               mutex_lock(&zi->i_truncate_mutex);
-               z->z_wpoffset += count;
-               zonefs_inode_account_active(inode);
-               mutex_unlock(&zi->i_truncate_mutex);
+       /*
+        * For a failed IO or partial completion, trigger error recovery
+        * to update the zone write pointer offset to a correct value.
+        * For asynchronous IOs, zonefs_file_write_dio_end_io() may already
+        * have executed error recovery if the IO already completed when we
+        * reach here. However, we cannot know that and execute error recovery
+        * again (that will not change anything).
+        */
+       if (zonefs_zone_is_seq(z)) {
+               if (ret > 0 && ret != count)
+                       ret = -EIO;
+               if (ret < 0 && ret != -EIOCBQUEUED)
+                       zonefs_io_error(inode, true);
         }
  
  inode_unlock:
diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c

index 93971742613a399d07fa2cf7e1f88cba61a91956..b6e8e7c96251d51f85c7183fa2e14352c5171ac2 100644 (file)
--- a/fs/zonefs/super.c
+++ b/fs/zonefs/super.c
@@ -246,16 +246,18 @@ static void zonefs_inode_update_mode(struct inode *inode)
         z->z_mode = inode->i_mode;
  }
  
-struct zonefs_ioerr_data {
-       struct inode    *inode;
-       bool            write;
-};
-
  static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
                               void *data)
  {
-       struct zonefs_ioerr_data *err = data;
-       struct inode *inode = err->inode;
+       struct blk_zone *z = data;
+
+       *z = *zone;
+       return 0;
+}
+
+static void zonefs_handle_io_error(struct inode *inode, struct blk_zone *zone,
+                                  bool write)
+{
         struct zonefs_zone *z = zonefs_inode_zone(inode);
         struct super_block *sb = inode->i_sb;
         struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
@@ -270,8 +272,8 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
         data_size = zonefs_check_zone_condition(sb, z, zone);
         isize = i_size_read(inode);
         if (!(z->z_flags & (ZONEFS_ZONE_READONLY | ZONEFS_ZONE_OFFLINE)) &&
-           !err->write && isize == data_size)
-               return 0;
+           !write && isize == data_size)
+               return;
  
         /*
          * At this point, we detected either a bad zone or an inconsistency
@@ -292,7 +294,7 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
          * In all cases, warn about inode size inconsistency and handle the
          * IO error according to the zone condition and to the mount options.
          */
-       if (zonefs_zone_is_seq(z) && isize != data_size)
+       if (isize != data_size)
                 zonefs_warn(sb,
                             "inode %lu: invalid size %lld (should be %lld)\n",
                             inode->i_ino, isize, data_size);
@@ -352,8 +354,6 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
         zonefs_i_size_write(inode, data_size);
         z->z_wpoffset = data_size;
         zonefs_inode_account_active(inode);
-
-       return 0;
  }
  
  /*
@@ -367,23 +367,25 @@ void __zonefs_io_error(struct inode *inode, bool write)
  {
         struct zonefs_zone *z = zonefs_inode_zone(inode);
         struct super_block *sb = inode->i_sb;
-       struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
         unsigned int noio_flag;
-       unsigned int nr_zones = 1;
-       struct zonefs_ioerr_data err = {
-               .inode = inode,
-               .write = write,
-       };
+       struct blk_zone zone;
         int ret;
  
         /*
-        * The only files that have more than one zone are conventional zone
-        * files with aggregated conventional zones, for which the inode zone
-        * size is always larger than the device zone size.
+        * Conventional zone have no write pointer and cannot become read-only
+        * or offline. So simply fake a report for a single or aggregated zone
+        * and let zonefs_handle_io_error() correct the zone inode information
+        * according to the mount options.
          */
-       if (z->z_size > bdev_zone_sectors(sb->s_bdev))
-               nr_zones = z->z_size >>
-                       (sbi->s_zone_sectors_shift + SECTOR_SHIFT);
+       if (!zonefs_zone_is_seq(z)) {
+               zone.start = z->z_sector;
+               zone.len = z->z_size >> SECTOR_SHIFT;
+               zone.wp = zone.start + zone.len;
+               zone.type = BLK_ZONE_TYPE_CONVENTIONAL;
+               zone.cond = BLK_ZONE_COND_NOT_WP;
+               zone.capacity = zone.len;
+               goto handle_io_error;
+       }
  
         /*
          * Memory allocations in blkdev_report_zones() can trigger a memory
@@ -394,12 +396,20 @@ void __zonefs_io_error(struct inode *inode, bool write)
          * the GFP_NOIO context avoids both problems.
          */
         noio_flag = memalloc_noio_save();
-       ret = blkdev_report_zones(sb->s_bdev, z->z_sector, nr_zones,
-                                 zonefs_io_error_cb, &err);
-       if (ret != nr_zones)
+       ret = blkdev_report_zones(sb->s_bdev, z->z_sector, 1,
+                                 zonefs_io_error_cb, &zone);
+       memalloc_noio_restore(noio_flag);
+
+       if (ret != 1) {
                 zonefs_err(sb, "Get inode %lu zone information failed %d\n",
                            inode->i_ino, ret);
-       memalloc_noio_restore(noio_flag);
+               zonefs_warn(sb, "remounting filesystem read-only\n");
+               sb->s_flags |= SB_RDONLY;
+               return;
+       }
+
+handle_io_error:
+       zonefs_handle_io_error(inode, &zone, write);
  }
  
  static struct kmem_cache *zonefs_inode_cachep;
diff --git a/include/drm/bridge/aux-bridge.h b/include/drm/bridge/aux-bridge.h

index c4c423e97f069c325ba2ed41b6839adb160d95f6..4453906105ca183a8fe20be81468f5211666d01f 100644 (file)
--- a/include/drm/bridge/aux-bridge.h
+++ b/include/drm/bridge/aux-bridge.h
@@ -9,6 +9,8 @@
  
  #include <drm/drm_connector.h>
  
+struct auxiliary_device;
+
  #if IS_ENABLED(CONFIG_DRM_AUX_BRIDGE)
  int drm_aux_bridge_register(struct device *parent);
  #else
@@ -19,10 +21,23 @@ static inline int drm_aux_bridge_register(struct device *parent)
  #endif
  
  #if IS_ENABLED(CONFIG_DRM_AUX_HPD_BRIDGE)
+struct auxiliary_device *devm_drm_dp_hpd_bridge_alloc(struct device *parent, struct device_node *np);
+int devm_drm_dp_hpd_bridge_add(struct device *dev, struct auxiliary_device *adev);
  struct device *drm_dp_hpd_bridge_register(struct device *parent,
                                           struct device_node *np);
  void drm_aux_hpd_bridge_notify(struct device *dev, enum drm_connector_status status);
  #else
+static inline struct auxiliary_device *devm_drm_dp_hpd_bridge_alloc(struct device *parent,
+                                                                   struct device_node *np)
+{
+       return NULL;
+}
+
+static inline int devm_drm_dp_hpd_bridge_add(struct auxiliary_device *adev)
+{
+       return 0;
+}
+
  static inline struct device *drm_dp_hpd_bridge_register(struct device *parent,
                                                         struct device_node *np)
  {
diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h

index 4b9d8fb393a8496da369a9f899cee00d4bab252b..eb4c369a79eb31b705aa78ec28437d3de69b97b7 100644 (file)
--- a/include/kvm/arm_pmu.h
+++ b/include/kvm/arm_pmu.h
@@ -90,16 +90,6 @@ void kvm_vcpu_pmu_resync_el0(void);
                         vcpu->arch.pmu.events = *kvm_get_pmu_events();  \
         } while (0)
  
-/*
- * Evaluates as true when emulating PMUv3p5, and false otherwise.
- */
-#define kvm_pmu_is_3p5(vcpu) ({                                                \
-       u64 val = IDREG(vcpu->kvm, SYS_ID_AA64DFR0_EL1);                \
-       u8 pmuver = SYS_FIELD_GET(ID_AA64DFR0_EL1, PMUVer, val);        \
-                                                                       \
-       pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P5;                          \
-})
-
  u8 kvm_arm_pmu_get_pmuver_limit(void);
  u64 kvm_pmu_evtyper_mask(struct kvm *kvm);
  int kvm_arm_set_default_pmu(struct kvm *kvm);
@@ -168,7 +158,6 @@ static inline u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1)
  }
  
  #define kvm_vcpu_has_pmu(vcpu)         ({ false; })
-#define kvm_pmu_is_3p5(vcpu)           ({ false; })
  static inline void kvm_pmu_update_vcpu_events(struct kvm_vcpu *vcpu) {}
  static inline void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu) {}
  static inline void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu) {}
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h

index 8cc38e836f540e26411a5d410842cb1a5a5c2ff2..47035946648eafd0e4e88e8f49e0143c4ef58605 100644 (file)
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -13,6 +13,7 @@
  #include <linux/spinlock.h>
  #include <linux/static_key.h>
  #include <linux/types.h>
+#include <linux/xarray.h>
  #include <kvm/iodev.h>
  #include <linux/list.h>
  #include <linux/jump_label.h>
@@ -116,7 +117,7 @@ struct irq_ops {
  
  struct vgic_irq {
         raw_spinlock_t irq_lock;        /* Protects the content of the struct */
-       struct list_head lpi_list;      /* Used to link all LPIs together */
+       struct rcu_head rcu;
         struct list_head ap_list;
  
         struct kvm_vcpu *vcpu;          /* SGIs and PPIs: The VCPU
@@ -273,10 +274,10 @@ struct vgic_dist {
          */
         u64                     propbaser;
  
-       /* Protects the lpi_list and the count value below. */
+       /* Protects the lpi_list. */
         raw_spinlock_t          lpi_list_lock;
-       struct list_head        lpi_list_head;
-       int                     lpi_list_count;
+       struct xarray           lpi_xa;
+       atomic_t                lpi_count;
  
         /* LPI translation cache */
         struct list_head        lpi_translation_cache;
diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h

index ae12696ec492c67339409904bb612e9fdc372689..2ad261082bba5f6f0049fa1c642b6ff057f32b5a 100644 (file)
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -141,8 +141,6 @@ struct bdi_writeback {
         struct delayed_work dwork;      /* work item used for writeback */
         struct delayed_work bw_dwork;   /* work item used for bandwidth estimate */
  
-       unsigned long dirty_sleep;      /* last wait */
-
         struct list_head bdi_node;      /* anchored at bdi->wb_list */
  
  #ifdef CONFIG_CGROUP_WRITEBACK
@@ -179,6 +177,11 @@ struct backing_dev_info {
          * any dirty wbs, which is depended upon by bdi_has_dirty().
          */
         atomic_long_t tot_write_bandwidth;
+       /*
+        * Jiffies when last process was dirty throttled on this bdi. Used by
+        * blk-wbt.
+        */
+       unsigned long last_bdp_sleep;
  
         struct bdi_writeback wb;  /* the root writeback info for this bdi */
         struct list_head wb_list; /* list of all wbs */
diff --git a/include/linux/bits.h b/include/linux/bits.h

index 7c0cf5031abe8796fc40c7307ddfde4832f2655d..0eb24d21aac2142cc94b4489ccd786df58a06400 100644 (file)
--- a/include/linux/bits.h
+++ b/include/linux/bits.h
@@ -4,6 +4,7 @@
  
  #include <linux/const.h>
  #include <vdso/bits.h>
+#include <uapi/linux/bits.h>
  #include <asm/bitsperlong.h>
  
  #define BIT_MASK(nr)           (UL(1) << ((nr) % BITS_PER_LONG))
@@ -30,15 +31,8 @@
  #define GENMASK_INPUT_CHECK(h, l) 0
  #endif
  
-#define __GENMASK(h, l) \
-       (((~UL(0)) - (UL(1) << (l)) + 1) & \
-        (~UL(0) >> (BITS_PER_LONG - 1 - (h))))
  #define GENMASK(h, l) \
         (GENMASK_INPUT_CHECK(h, l) + __GENMASK(h, l))
-
-#define __GENMASK_ULL(h, l) \
-       (((~ULL(0)) - (ULL(1) << (l)) + 1) & \
-        (~ULL(0) >> (BITS_PER_LONG_LONG - 1 - (h))))
  #define GENMASK_ULL(h, l) \
         (GENMASK_INPUT_CHECK(h, l) + __GENMASK_ULL(h, l))
  
diff --git a/include/linux/bvec.h b/include/linux/bvec.h

index 555aae5448ae4ec00065e00553955b31bb44884b..bd1e361b351c5afa88ff02e7023cd79acd5454fd 100644 (file)
--- a/include/linux/bvec.h
+++ b/include/linux/bvec.h
@@ -83,7 +83,7 @@ struct bvec_iter {
  
         unsigned int            bi_bvec_done;   /* number of bytes completed in
                                                    current bvec */
-} __packed;
+} __packed __aligned(4);
  
  struct bvec_iter_all {
         struct bio_vec  bv;
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h

index 2eaaabbe98cb64d3f64a698a6f527e244649ca4e..1717cc57cdacd3532e5de7d35f1c2d6eb4e1ef5b 100644 (file)
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -283,7 +283,7 @@ struct ceph_msg {
         struct kref kref;
         bool more_to_follow;
         bool needs_out_seq;
-       bool sparse_read;
+       u64 sparse_read_total;
         int front_alloc_len;
  
         struct ceph_msgpool *pool;
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h

index fa018d5864e7422c522194c16ff45a8dd0db1376..f66f6aac74f6f108ffba40b62159e047a184b732 100644 (file)
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -45,6 +45,7 @@ enum ceph_sparse_read_state {
         CEPH_SPARSE_READ_HDR    = 0,
         CEPH_SPARSE_READ_EXTENTS,
         CEPH_SPARSE_READ_DATA_LEN,
+       CEPH_SPARSE_READ_DATA_PRE,
         CEPH_SPARSE_READ_DATA,
  };
  
@@ -64,7 +65,7 @@ struct ceph_sparse_read {
         u64                             sr_req_len;  /* orig request length */
         u64                             sr_pos;      /* current pos in buffer */
         int                             sr_index;    /* current extent index */
-       __le32                          sr_datalen;  /* length of actual data */
+       u32                             sr_datalen;  /* length of actual data */
         u32                             sr_count;    /* extent count in reply */
         int                             sr_ext_len;  /* length of extent array */
         struct ceph_sparse_extent       *sr_extent;  /* extent array */
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h

index aebb65bf95a7988dfe8cf9cb1b5ec0945640ced3..75bd1692d2e3791c8122cdce74ebfd67032fee14 100644 (file)
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -64,6 +64,26 @@
                 __builtin_unreachable();        \
         } while (0)
  
+/*
+ * GCC 'asm goto' with outputs miscompiles certain code sequences:
+ *
+ *   https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113921
+ *
+ * Work around it via the same compiler barrier quirk that we used
+ * to use for the old 'asm goto' workaround.
+ *
+ * Also, always mark such 'asm goto' statements as volatile: all
+ * asm goto statements are supposed to be volatile as per the
+ * documentation, but some versions of gcc didn't actually do
+ * that for asms with outputs:
+ *
+ *    https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98619
+ */
+#ifdef CONFIG_GCC_ASM_GOTO_OUTPUT_WORKAROUND
+#define asm_goto_output(x...) \
+       do { asm volatile goto(x); asm (""); } while (0)
+#endif
+
  #if defined(CONFIG_ARCH_USE_BUILTIN_BSWAP)
  #define __HAVE_BUILTIN_BSWAP32__
  #define __HAVE_BUILTIN_BSWAP64__
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h

index 6f1ca49306d2f7e7b51817fc579b82a85246736d..0caf354cb94b5ad9d802addff81a1bd1c3250139 100644 (file)
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -362,8 +362,15 @@ struct ftrace_likely_data {
  #define __member_size(p)       __builtin_object_size(p, 1)
  #endif
  
-#ifndef asm_volatile_goto
-#define asm_volatile_goto(x...) asm goto(x)
+/*
+ * Some versions of gcc do not mark 'asm goto' volatile:
+ *
+ *  https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103979
+ *
+ * We do it here by hand, because it doesn't hurt.
+ */
+#ifndef asm_goto_output
+#define asm_goto_output(x...) asm volatile goto(x)
  #endif
  
  #ifdef CONFIG_CC_HAS_ASM_INLINE
diff --git a/include/linux/cper.h b/include/linux/cper.h

index c1a7dc3251215a5ba0e982568a746ff5b04602d1..265b0f8fc0b3c876191ba94bbc2d1d9dd66dd848 100644 (file)
--- a/include/linux/cper.h
+++ b/include/linux/cper.h
@@ -90,6 +90,29 @@ enum {
         GUID_INIT(0x667DD791, 0xC6B3, 0x4c27, 0x8A, 0x6B, 0x0F, 0x8E,   \
                   0x72, 0x2D, 0xEB, 0x41)
  
+/* CXL Event record UUIDs are formatted as GUIDs and reported in section type */
+/*
+ * General Media Event Record
+ * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43
+ */
+#define CPER_SEC_CXL_GEN_MEDIA_GUID                                    \
+       GUID_INIT(0xfbcd0a77, 0xc260, 0x417f,                           \
+                 0x85, 0xa9, 0x08, 0x8b, 0x16, 0x21, 0xeb, 0xa6)
+/*
+ * DRAM Event Record
+ * CXL rev 3.0 section 8.2.9.2.1.2; Table 8-44
+ */
+#define CPER_SEC_CXL_DRAM_GUID                                         \
+       GUID_INIT(0x601dcbb3, 0x9c06, 0x4eab,                           \
+                 0xb8, 0xaf, 0x4e, 0x9b, 0xfb, 0x5c, 0x96, 0x24)
+/*
+ * Memory Module Event Record
+ * CXL rev 3.0 section 8.2.9.2.1.3; Table 8-45
+ */
+#define CPER_SEC_CXL_MEM_MODULE_GUID                                   \
+       GUID_INIT(0xfe927475, 0xdd59, 0x4339,                           \
+                 0xa5, 0x86, 0x79, 0xba, 0xb1, 0x13, 0xb7, 0x74)
+
  /*
   * Flags bits definitions for flags in struct cper_record_header
   * If set, the error has been recovered
diff --git a/include/linux/cxl-event.h b/include/linux/cxl-event.h

index 91125eca4c8ab8ded08a5b4b687c65c69d656401..03fa6d50d46fe5886d92d3cb6cddfe29fc43af11 100644 (file)
--- a/include/linux/cxl-event.h
+++ b/include/linux/cxl-event.h
@@ -140,22 +140,4 @@ struct cxl_cper_event_rec {
         union cxl_event event;
  } __packed;
  
-typedef void (*cxl_cper_callback)(enum cxl_event_type type,
-                                 struct cxl_cper_event_rec *rec);
-
-#ifdef CONFIG_ACPI_APEI_GHES
-int cxl_cper_register_callback(cxl_cper_callback callback);
-int cxl_cper_unregister_callback(cxl_cper_callback callback);
-#else
-static inline int cxl_cper_register_callback(cxl_cper_callback callback)
-{
-       return 0;
-}
-
-static inline int cxl_cper_unregister_callback(cxl_cper_callback callback)
-{
-       return 0;
-}
-#endif
-
  #endif /* _LINUX_CXL_EVENT_H */
diff --git a/include/linux/dcache.h b/include/linux/dcache.h

index 1666c387861f7a8fae32d7ae3acd17c950142ff5..d07cf2f1bb7db18c37333fd211b2e5b18657b254 100644 (file)
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -173,6 +173,7 @@ struct dentry_operations {
  #define DCACHE_DONTCACHE               BIT(7) /* Purge from memory on final dput() */
  
  #define DCACHE_CANT_MOUNT              BIT(8)
+#define DCACHE_GENOCIDE                        BIT(9)
  #define DCACHE_SHRINK_LIST             BIT(10)
  
  #define DCACHE_OP_WEAK_REVALIDATE      BIT(11)
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h

index 3df70d6131c8fee686ffbd8ecfc7e7c432370bac..752dbde4cec1f8073e225961a41bc91435583350 100644 (file)
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -953,7 +953,8 @@ static inline int dmaengine_slave_config(struct dma_chan *chan,
  
  static inline bool is_slave_direction(enum dma_transfer_direction direction)
  {
-       return (direction == DMA_MEM_TO_DEV) || (direction == DMA_DEV_TO_MEM);
+       return (direction == DMA_MEM_TO_DEV) || (direction == DMA_DEV_TO_MEM) ||
+              (direction == DMA_DEV_TO_DEV);
  }
  
  static inline struct dma_async_tx_descriptor *dmaengine_prep_slave_single(
diff --git a/include/linux/dpll.h b/include/linux/dpll.h

index 9cf896ea1d4122f3bc7094e46a5af81b999937dc..c60591308ae80fb99aa5abb5832b9a228473a916 100644 (file)
--- a/include/linux/dpll.h
+++ b/include/linux/dpll.h
@@ -10,6 +10,8 @@
  #include <uapi/linux/dpll.h>
  #include <linux/device.h>
  #include <linux/netlink.h>
+#include <linux/netdevice.h>
+#include <linux/rtnetlink.h>
  
  struct dpll_device;
  struct dpll_pin;
@@ -167,4 +169,13 @@ int dpll_device_change_ntf(struct dpll_device *dpll);
  
  int dpll_pin_change_ntf(struct dpll_pin *pin);
  
+#if !IS_ENABLED(CONFIG_DPLL)
+static inline struct dpll_pin *netdev_dpll_pin(const struct net_device *dev)
+{
+       return NULL;
+}
+#else
+struct dpll_pin *netdev_dpll_pin(const struct net_device *dev);
+#endif
+
  #endif
diff --git a/include/linux/fs.h b/include/linux/fs.h

index ed5966a70495129be1d6729eed2918240db62df1..1fbc72c5f112c750b87e7d752e4d5871258ddabe 100644 (file)
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -352,6 +352,8 @@ enum rw_hint {
   * unrelated IO (like cache flushing, new IO generation, etc).
   */
  #define IOCB_DIO_CALLER_COMP   (1 << 22)
+/* kiocb is a read or write operation submitted by fs/aio.c. */
+#define IOCB_AIO_RW            (1 << 23)
  
  /* for use in trace events */
  #define TRACE_IOCB_STRINGS \
@@ -2101,9 +2103,6 @@ int __generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
  int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
                                   struct file *file_out, loff_t pos_out,
                                   loff_t *count, unsigned int remap_flags);
-extern loff_t do_clone_file_range(struct file *file_in, loff_t pos_in,
-                                 struct file *file_out, loff_t pos_out,
-                                 loff_t len, unsigned int remap_flags);
  extern loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in,
                                    struct file *file_out, loff_t pos_out,
                                    loff_t len, unsigned int remap_flags);
diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h

index 9a5c6c76e6533385dbb32de98abfd330c8736585..7f75c9a5187417b3b52386573a47f7c1e95e9126 100644 (file)
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -819,6 +819,24 @@ static inline struct gpio_chip *gpiod_to_chip(const struct gpio_desc *desc)
         return ERR_PTR(-ENODEV);
  }
  
+static inline struct gpio_device *gpiod_to_gpio_device(struct gpio_desc *desc)
+{
+       WARN_ON(1);
+       return ERR_PTR(-ENODEV);
+}
+
+static inline int gpio_device_get_base(struct gpio_device *gdev)
+{
+       WARN_ON(1);
+       return -ENODEV;
+}
+
+static inline const char *gpio_device_get_label(struct gpio_device *gdev)
+{
+       WARN_ON(1);
+       return NULL;
+}
+
  static inline int gpiochip_lock_as_irq(struct gpio_chip *gc,
                                        unsigned int offset)
  {
diff --git a/include/linux/hid_bpf.h b/include/linux/hid_bpf.h

index 840cd254172d061ec445bdc845a7f1e8cbf20463..7118ac28d46879b615de35a6e3702208de4001e9 100644 (file)
--- a/include/linux/hid_bpf.h
+++ b/include/linux/hid_bpf.h
@@ -77,17 +77,6 @@ enum hid_bpf_attach_flags {
  int hid_bpf_device_event(struct hid_bpf_ctx *ctx);
  int hid_bpf_rdesc_fixup(struct hid_bpf_ctx *ctx);
  
-/* Following functions are kfunc that we export to BPF programs */
-/* available everywhere in HID-BPF */
-__u8 *hid_bpf_get_data(struct hid_bpf_ctx *ctx, unsigned int offset, const size_t __sz);
-
-/* only available in syscall */
-int hid_bpf_attach_prog(unsigned int hid_id, int prog_fd, __u32 flags);
-int hid_bpf_hw_request(struct hid_bpf_ctx *ctx, __u8 *buf, size_t buf__sz,
-                      enum hid_report_type rtype, enum hid_class_request reqtype);
-struct hid_bpf_ctx *hid_bpf_allocate_context(unsigned int hid_id);
-void hid_bpf_release_context(struct hid_bpf_ctx *ctx);
-
  /*
   * Below is HID internal
   */
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h

index 87e3bedf8eb00323c102787243e7dbfd045ba4e9..641c4567cfa7aee830f8ad0b52abb24bcbe353a8 100644 (file)
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -157,6 +157,7 @@ enum  hrtimer_base_type {
   * @max_hang_time:     Maximum time spent in hrtimer_interrupt
   * @softirq_expiry_lock: Lock which is taken while softirq based hrtimer are
   *                      expired
+ * @online:            CPU is online from an hrtimers point of view
   * @timer_waiters:     A hrtimer_cancel() invocation waits for the timer
   *                     callback to finish.
   * @expires_next:      absolute time of the next event, is required for remote
@@ -179,7 +180,8 @@ struct hrtimer_cpu_base {
         unsigned int                    hres_active             : 1,
                                         in_hrtirq               : 1,
                                         hang_detected           : 1,
-                                       softirq_activated       : 1;
+                                       softirq_activated       : 1,
+                                       online                  : 1;
  #ifdef CONFIG_HIGH_RES_TIMERS
         unsigned int                    nr_events;
         unsigned short                  nr_retries;
diff --git a/include/linux/iio/adc/ad_sigma_delta.h b/include/linux/iio/adc/ad_sigma_delta.h

index 7852f6c9a714c6fb7dc84321c4b416717b6d4666..719cf9cc6e1ac4db6abbd1171b1590716dc50dc9 100644 (file)
--- a/include/linux/iio/adc/ad_sigma_delta.h
+++ b/include/linux/iio/adc/ad_sigma_delta.h
@@ -8,6 +8,8 @@
  #ifndef __AD_SIGMA_DELTA_H__
  #define __AD_SIGMA_DELTA_H__
  
+#include <linux/iio/iio.h>
+
  enum ad_sigma_delta_mode {
         AD_SD_MODE_CONTINUOUS = 0,
         AD_SD_MODE_SINGLE = 1,
@@ -99,7 +101,7 @@ struct ad_sigma_delta {
          * 'rx_buf' is up to 32 bits per sample + 64 bit timestamp,
          * rounded to 16 bytes to take into account padding.
          */
-       uint8_t                         tx_buf[4] ____cacheline_aligned;
+       uint8_t                         tx_buf[4] __aligned(IIO_DMA_MINALIGN);
         uint8_t                         rx_buf[16] __aligned(8);
  };
  
diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h

index 607c3a89a6471df963e6fc4ed09df84b53c0db0f..f9ae5cdd884f5be041246b5aebbf6467980319dc 100644 (file)
--- a/include/linux/iio/common/st_sensors.h
+++ b/include/linux/iio/common/st_sensors.h
@@ -258,9 +258,9 @@ struct st_sensor_data {
         bool hw_irq_trigger;
         s64 hw_timestamp;
  
-       char buffer_data[ST_SENSORS_MAX_BUFFER_SIZE] ____cacheline_aligned;
-
         struct mutex odr_lock;
+
+       char buffer_data[ST_SENSORS_MAX_BUFFER_SIZE] __aligned(IIO_DMA_MINALIGN);
  };
  
  #ifdef CONFIG_IIO_BUFFER
diff --git a/include/linux/iio/imu/adis.h b/include/linux/iio/imu/adis.h

index dc9ea299e0885cd55018f21b80a35155d0ca1d63..8898966bc0f08c152a8156b85151b223f76e9ffe 100644 (file)
--- a/include/linux/iio/imu/adis.h
+++ b/include/linux/iio/imu/adis.h
@@ -11,6 +11,7 @@
  
  #include <linux/spi/spi.h>
  #include <linux/interrupt.h>
+#include <linux/iio/iio.h>
  #include <linux/iio/types.h>
  
  #define ADIS_WRITE_REG(reg) ((0x80 | (reg)))
@@ -131,7 +132,7 @@ struct adis {
         unsigned long           irq_flag;
         void                    *buffer;
  
-       u8                      tx[10] ____cacheline_aligned;
+       u8                      tx[10] __aligned(IIO_DMA_MINALIGN);
         u8                      rx[4];
  };
  
diff --git a/include/linux/iommu.h b/include/linux/iommu.h

index 1ea2a820e1eb035c9eea2ec97d9874c52bbd0b42..5e27cb3a3be99b34e705cb7c4569cfbdf2b11f82 100644 (file)
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -892,11 +892,14 @@ struct iommu_fwspec {
  struct iommu_sva {
         struct device                   *dev;
         struct iommu_domain             *domain;
+       struct list_head                handle_item;
+       refcount_t                      users;
  };
  
  struct iommu_mm_data {
         u32                     pasid;
         struct list_head        sva_domains;
+       struct list_head        sva_handles;
  };
  
  int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode,
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h

index 7e7fd25b09b3ebe3d81e30fb23f506a9ee5a6519..178fe4b69af2776527032acde6a47d92bf98250e 100644 (file)
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -148,6 +148,11 @@ static inline bool kvm_is_error_hva(unsigned long addr)
  
  #endif
  
+static inline bool kvm_is_error_gpa(gpa_t gpa)
+{
+       return gpa == INVALID_GPA;
+}
+
  #define KVM_ERR_PTR_BAD_PAGE   (ERR_PTR(-ENOENT))
  
  static inline bool is_error_page(struct page *page)
@@ -238,7 +243,6 @@ struct kvm_async_pf {
         struct list_head link;
         struct list_head queue;
         struct kvm_vcpu *vcpu;
-       struct mm_struct *mm;
         gpa_t cr2_or_gpa;
         unsigned long addr;
         struct kvm_arch_async_pf arch;
@@ -1319,21 +1323,12 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn);
   *
   * @gpc:          struct gfn_to_pfn_cache object.
   * @kvm:          pointer to kvm instance.
- * @vcpu:         vCPU to be used for marking pages dirty and to be woken on
- *                invalidation.
- * @usage:        indicates if the resulting host physical PFN is used while
- *                the @vcpu is IN_GUEST_MODE (in which case invalidation of 
- *                the cache from MMU notifiers---but not for KVM memslot
- *                changes!---will also force @vcpu to exit the guest and
- *                refresh the cache); and/or if the PFN used directly
- *                by KVM (and thus needs a kernel virtual mapping).
   *
   * This sets up a gfn_to_pfn_cache by initializing locks and assigning the
   * immutable attributes.  Note, the cache must be zero-allocated (or zeroed by
   * the caller before init).
   */
-void kvm_gpc_init(struct gfn_to_pfn_cache *gpc, struct kvm *kvm,
-                 struct kvm_vcpu *vcpu, enum pfn_cache_usage usage);
+void kvm_gpc_init(struct gfn_to_pfn_cache *gpc, struct kvm *kvm);
  
  /**
   * kvm_gpc_activate - prepare a cached kernel mapping and HPA for a given guest
@@ -1353,6 +1348,22 @@ void kvm_gpc_init(struct gfn_to_pfn_cache *gpc, struct kvm *kvm,
   */
  int kvm_gpc_activate(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len);
  
+/**
+ * kvm_gpc_activate_hva - prepare a cached kernel mapping and HPA for a given HVA.
+ *
+ * @gpc:          struct gfn_to_pfn_cache object.
+ * @hva:          userspace virtual address to map.
+ * @len:          sanity check; the range being access must fit a single page.
+ *
+ * @return:       0 for success.
+ *                -EINVAL for a mapping which would cross a page boundary.
+ *                -EFAULT for an untranslatable guest physical address.
+ *
+ * The semantics of this function are the same as those of kvm_gpc_activate(). It
+ * merely bypasses a layer of address translation.
+ */
+int kvm_gpc_activate_hva(struct gfn_to_pfn_cache *gpc, unsigned long hva, unsigned long len);
+
  /**
   * kvm_gpc_check - check validity of a gfn_to_pfn_cache.
   *
@@ -1399,6 +1410,16 @@ int kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, unsigned long len);
   */
  void kvm_gpc_deactivate(struct gfn_to_pfn_cache *gpc);
  
+static inline bool kvm_gpc_is_gpa_active(struct gfn_to_pfn_cache *gpc)
+{
+       return gpc->active && !kvm_is_error_gpa(gpc->gpa);
+}
+
+static inline bool kvm_gpc_is_hva_active(struct gfn_to_pfn_cache *gpc)
+{
+       return gpc->active && kvm_is_error_gpa(gpc->gpa);
+}
+
  void kvm_sigset_activate(struct kvm_vcpu *vcpu);
  void kvm_sigset_deactivate(struct kvm_vcpu *vcpu);
  
@@ -1505,9 +1526,10 @@ bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu);
  int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu);
  bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu);
  bool kvm_arch_dy_has_pending_interrupt(struct kvm_vcpu *vcpu);
+bool kvm_arch_vcpu_preempted_in_kernel(struct kvm_vcpu *vcpu);
  int kvm_arch_post_init_vm(struct kvm *kvm);
  void kvm_arch_pre_destroy_vm(struct kvm *kvm);
-int kvm_arch_create_vm_debugfs(struct kvm *kvm);
+void kvm_arch_create_vm_debugfs(struct kvm *kvm);
  
  #ifndef __KVM_HAVE_ARCH_VM_ALLOC
  /*
@@ -1788,11 +1810,21 @@ static inline hpa_t pfn_to_hpa(kvm_pfn_t pfn)
         return (hpa_t)pfn << PAGE_SHIFT;
  }
  
-static inline bool kvm_is_error_gpa(struct kvm *kvm, gpa_t gpa)
+static inline bool kvm_is_gpa_in_memslot(struct kvm *kvm, gpa_t gpa)
  {
         unsigned long hva = gfn_to_hva(kvm, gpa_to_gfn(gpa));
  
-       return kvm_is_error_hva(hva);
+       return !kvm_is_error_hva(hva);
+}
+
+static inline void kvm_gpc_mark_dirty_in_slot(struct gfn_to_pfn_cache *gpc)
+{
+       lockdep_assert_held(&gpc->lock);
+
+       if (!gpc->memslot)
+               return;
+
+       mark_page_dirty_in_slot(gpc->kvm, gpc->memslot, gpa_to_gfn(gpc->gpa));
  }
  
  enum kvm_stat_kind {
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h

index 9d1f7835d8c13917ad171297752e072c04bec1b3..d93f6522b2c34c2e5e33b80266f43cb2f7775bde 100644 (file)
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -49,12 +49,6 @@ typedef u64            hfn_t;
  
  typedef hfn_t kvm_pfn_t;
  
-enum pfn_cache_usage {
-       KVM_GUEST_USES_PFN = BIT(0),
-       KVM_HOST_USES_PFN  = BIT(1),
-       KVM_GUEST_AND_HOST_USE_PFN = KVM_GUEST_USES_PFN | KVM_HOST_USES_PFN,
-};
-
  struct gfn_to_hva_cache {
         u64 generation;
         gpa_t gpa;
@@ -69,13 +63,11 @@ struct gfn_to_pfn_cache {
         unsigned long uhva;
         struct kvm_memory_slot *memslot;
         struct kvm *kvm;
-       struct kvm_vcpu *vcpu;
         struct list_head list;
         rwlock_t lock;
         struct mutex refresh_lock;
         void *khva;
         kvm_pfn_t pfn;
-       enum pfn_cache_usage usage;
         bool active;
         bool valid;
  };
diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h

index 185924c5637876a153957a3a206fe907dcc784a5..76458b6d53da7667b31fc3a80007bb1a609ec1d8 100644 (file)
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -315,9 +315,9 @@ LSM_HOOK(int, 0, socket_getsockopt, struct socket *sock, int level, int optname)
  LSM_HOOK(int, 0, socket_setsockopt, struct socket *sock, int level, int optname)
  LSM_HOOK(int, 0, socket_shutdown, struct socket *sock, int how)
  LSM_HOOK(int, 0, socket_sock_rcv_skb, struct sock *sk, struct sk_buff *skb)
-LSM_HOOK(int, 0, socket_getpeersec_stream, struct socket *sock,
+LSM_HOOK(int, -ENOPROTOOPT, socket_getpeersec_stream, struct socket *sock,
          sockptr_t optval, sockptr_t optlen, unsigned int len)
-LSM_HOOK(int, 0, socket_getpeersec_dgram, struct socket *sock,
+LSM_HOOK(int, -ENOPROTOOPT, socket_getpeersec_dgram, struct socket *sock,
          struct sk_buff *skb, u32 *secid)
  LSM_HOOK(int, 0, sk_alloc_security, struct sock *sk, int family, gfp_t priority)
  LSM_HOOK(void, LSM_RET_VOID, sk_free_security, struct sock *sk)
diff --git a/include/linux/memblock.h b/include/linux/memblock.h

index b695f9e946dabb46f08e1d1688d275bb3ff35b49..e2082240586d00b5f21af7b3f9e0ca6176b5884a 100644 (file)
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -121,6 +121,8 @@ int memblock_reserve(phys_addr_t base, phys_addr_t size);
  int memblock_physmem_add(phys_addr_t base, phys_addr_t size);
  #endif
  void memblock_trim_memory(phys_addr_t align);
+unsigned long memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1,
+                                    phys_addr_t base2, phys_addr_t size2);
  bool memblock_overlaps_region(struct memblock_type *type,
                               phys_addr_t base, phys_addr_t size);
  bool memblock_validate_numa_coverage(unsigned long threshold_bytes);
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h

index c726f90ab752452cbe9726462ecedd72b21656f6..3fd6310b6da60fb1e13806ac4f1ca8a95e7e295a 100644 (file)
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1103,7 +1103,7 @@ struct mlx5_ifc_roce_cap_bits {
         u8         sw_r_roce_src_udp_port[0x1];
         u8         fl_rc_qp_when_roce_disabled[0x1];
         u8         fl_rc_qp_when_roce_enabled[0x1];
-       u8         reserved_at_7[0x1];
+       u8         roce_cc_general[0x1];
         u8         qp_ooo_transmit_default[0x1];
         u8         reserved_at_9[0x15];
         u8         qp_ts_format[0x2];
diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h

index bd53cf4be7bdcbe4ea47ab640fbe0052ffc88bef..f0e55bf3ec8b5b0dd10c3270c1659e1fbb96ac64 100644 (file)
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -269,7 +269,10 @@ struct mlx5_wqe_eth_seg {
         union {
                 struct {
                         __be16 sz;
-                       u8     start[2];
+                       union {
+                               u8     start[2];
+                               DECLARE_FLEX_ARRAY(u8, data);
+                       };
                 } inline_hdr;
                 struct {
                         __be16 type;
diff --git a/include/linux/mm.h b/include/linux/mm.h

index f5a97dec51694894a979dd1d045a6b982622e09c..59576e56c58b297edd292603a5e42e489a6d915c 100644 (file)
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -391,6 +391,20 @@ extern unsigned int kobjsize(const void *objp);
  # define VM_UFFD_MINOR         VM_NONE
  #endif /* CONFIG_HAVE_ARCH_USERFAULTFD_MINOR */
  
+/*
+ * This flag is used to connect VFIO to arch specific KVM code. It
+ * indicates that the memory under this VMA is safe for use with any
+ * non-cachable memory type inside KVM. Some VFIO devices, on some
+ * platforms, are thought to be unsafe and can cause machine crashes
+ * if KVM does not lock down the memory type.
+ */
+#ifdef CONFIG_64BIT
+#define VM_ALLOW_ANY_UNCACHED_BIT      39
+#define VM_ALLOW_ANY_UNCACHED          BIT(VM_ALLOW_ANY_UNCACHED_BIT)
+#else
+#define VM_ALLOW_ANY_UNCACHED          VM_NONE
+#endif
+
  /* Bits set in the VMA until the stack is in its final location */
  #define VM_STACK_INCOMPLETE_SETUP (VM_RAND_READ | VM_SEQ_READ | VM_STACK_EARLY)
  
diff --git a/include/linux/mman.h b/include/linux/mman.h

index 40d94411d49204e7276a6ad9554eb17335fd4577..dc7048824be81d628ca12f0874c1a7508da0d5c1 100644 (file)
--- a/include/linux/mman.h
+++ b/include/linux/mman.h
@@ -156,6 +156,7 @@ calc_vm_flag_bits(unsigned long flags)
         return _calc_vm_trans(flags, MAP_GROWSDOWN,  VM_GROWSDOWN ) |
                _calc_vm_trans(flags, MAP_LOCKED,     VM_LOCKED    ) |
                _calc_vm_trans(flags, MAP_SYNC,       VM_SYNC      ) |
+              _calc_vm_trans(flags, MAP_STACK,      VM_NOHUGEPAGE) |
                arch_calc_vm_flag_bits(flags);
  }
  
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h

index 4ed33b12782151632e36aa114039cb4a0916fe06..a497f189d98818bcda37458746ebb2bded7826e4 100644 (file)
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -2013,9 +2013,9 @@ static inline int pfn_valid(unsigned long pfn)
         if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS)
                 return 0;
         ms = __pfn_to_section(pfn);
-       rcu_read_lock();
+       rcu_read_lock_sched();
         if (!valid_section(ms)) {
-               rcu_read_unlock();
+               rcu_read_unlock_sched();
                 return 0;
         }
         /*
@@ -2023,7 +2023,7 @@ static inline int pfn_valid(unsigned long pfn)
          * the entire section-sized span.
          */
         ret = early_section(ms) || pfn_section_valid(ms, pfn);
-       rcu_read_unlock();
+       rcu_read_unlock_sched();
  
         return ret;
  }
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h

index 118c40258d07b787adf518e576e75545e4bae846..a9c973b92294bb110cf3cd336485972127b01b58 100644 (file)
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2141,6 +2141,11 @@ struct net_device {
  
         /* TXRX read-mostly hotpath */
         __cacheline_group_begin(net_device_read_txrx);
+       union {
+               struct pcpu_lstats __percpu             *lstats;
+               struct pcpu_sw_netstats __percpu        *tstats;
+               struct pcpu_dstats __percpu             *dstats;
+       };
         unsigned int            flags;
         unsigned short          hard_header_len;
         netdev_features_t       features;
@@ -2395,11 +2400,6 @@ struct net_device {
         enum netdev_ml_priv_type        ml_priv_type;
  
         enum netdev_stat_type           pcpu_stat_type:8;
-       union {
-               struct pcpu_lstats __percpu             *lstats;
-               struct pcpu_sw_netstats __percpu        *tstats;
-               struct pcpu_dstats __percpu             *dstats;
-       };
  
  #if IS_ENABLED(CONFIG_GARP)
         struct garp_port __rcu  *garp_port;
@@ -2469,7 +2469,7 @@ struct net_device {
         struct devlink_port     *devlink_port;
  
  #if IS_ENABLED(CONFIG_DPLL)
-       struct dpll_pin         *dpll_pin;
+       struct dpll_pin __rcu   *dpll_pin;
  #endif
  #if IS_ENABLED(CONFIG_PAGE_POOL)
         /** @page_pools: page pools created for this netdevice */
@@ -4035,15 +4035,6 @@ bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b);
  void netdev_dpll_pin_set(struct net_device *dev, struct dpll_pin *dpll_pin);
  void netdev_dpll_pin_clear(struct net_device *dev);
  
-static inline struct dpll_pin *netdev_dpll_pin(const struct net_device *dev)
-{
-#if IS_ENABLED(CONFIG_DPLL)
-       return dev->dpll_pin;
-#else
-       return NULL;
-#endif
-}
-
  struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev, bool *again);
  struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
                                     struct netdev_queue *txq, int *ret);
diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h

index 80900d9109920f686f971e431b95361a831fcc26..ce660d51549b469243357bf9187e108582bc4d95 100644 (file)
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -474,6 +474,7 @@ struct nf_ct_hook {
                               const struct sk_buff *);
         void (*attach)(struct sk_buff *nskb, const struct sk_buff *skb);
         void (*set_closing)(struct nf_conntrack *nfct);
+       int (*confirm)(struct sk_buff *skb);
  };
  extern const struct nf_ct_hook __rcu *nf_ct_hook;
  
diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h

index e8c350a3ade153d852bec011dbd3c72a352d319d..e9f4f845d760afafbfb6e45b220dfb6919a29779 100644 (file)
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -186,6 +186,8 @@ struct ip_set_type_variant {
         /* Return true if "b" set is the same as "a"
          * according to the create set parameters */
         bool (*same_set)(const struct ip_set *a, const struct ip_set *b);
+       /* Cancel ongoing garbage collectors before destroying the set*/
+       void (*cancel_gc)(struct ip_set *set);
         /* Region-locking is used */
         bool region_lock;
  };
@@ -242,6 +244,8 @@ extern void ip_set_type_unregister(struct ip_set_type *set_type);
  
  /* A generic IP set */
  struct ip_set {
+       /* For call_cru in destroy */
+       struct rcu_head rcu;
         /* The name of the set */
         char name[IPSET_MAXNAMELEN];
         /* Lock protecting the set data */
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h

index cd797e00fe359a91b44b2e012309e87d5b446a7e..92de074e63b98c03cefbb2f07d60de0b8f1fb039 100644 (file)
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -124,6 +124,7 @@ struct nfs_client {
         char                    cl_ipaddr[48];
         struct net              *cl_net;
         struct list_head        pending_cb_stateids;
+       struct rcu_head         rcu;
  };
  
  /*
@@ -265,6 +266,7 @@ struct nfs_server {
         const struct cred       *cred;
         bool                    has_sec_mnt_opts;
         struct kobject          kobj;
+       struct rcu_head         rcu;
  };
  
  /* Server capabilities */
diff --git a/include/linux/nvme.h b/include/linux/nvme.h

index 462c21e0e417654e56edf314ebcb62d8c6f4ad16..3ef4053ea9500bc975acd5433ff387633c92eb62 100644 (file)
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -646,6 +646,7 @@ enum {
         NVME_CMD_EFFECTS_NCC            = 1 << 2,
         NVME_CMD_EFFECTS_NIC            = 1 << 3,
         NVME_CMD_EFFECTS_CCC            = 1 << 4,
+       NVME_CMD_EFFECTS_CSER_MASK      = GENMASK(15, 14),
         NVME_CMD_EFFECTS_CSE_MASK       = GENMASK(18, 16),
         NVME_CMD_EFFECTS_UUID_SEL       = 1 << 19,
         NVME_CMD_EFFECTS_SCOPE_MASK     = GENMASK(31, 20),
@@ -816,12 +817,6 @@ struct nvme_reservation_status_ext {
         struct nvme_registered_ctrl_ext regctl_eds[];
  };
  
-enum nvme_async_event_type {
-       NVME_AER_TYPE_ERROR     = 0,
-       NVME_AER_TYPE_SMART     = 1,
-       NVME_AER_TYPE_NOTICE    = 2,
-};
-
  /* I/O commands */
  
  enum nvme_opcode {
@@ -1818,7 +1813,7 @@ struct nvme_command {
         };
  };
  
-static inline bool nvme_is_fabrics(struct nvme_command *cmd)
+static inline bool nvme_is_fabrics(const struct nvme_command *cmd)
  {
         return cmd->common.opcode == nvme_fabrics_command;
  }
@@ -1837,7 +1832,7 @@ struct nvme_error_slot {
         __u8            resv2[24];
  };
  
-static inline bool nvme_is_write(struct nvme_command *cmd)
+static inline bool nvme_is_write(const struct nvme_command *cmd)
  {
         /*
          * What a mess...
diff --git a/include/linux/pci.h b/include/linux/pci.h

index add9368e6314b9d7038a651af3f8e1b9e08d7ffa..7ab0d13672dafa0faaeaf4cf02e7bab6fcc3130b 100644 (file)
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1422,6 +1422,7 @@ int pci_load_and_free_saved_state(struct pci_dev *dev,
                                   struct pci_saved_state **state);
  int pci_platform_power_transition(struct pci_dev *dev, pci_power_t state);
  int pci_set_power_state(struct pci_dev *dev, pci_power_t state);
+int pci_set_power_state_locked(struct pci_dev *dev, pci_power_t state);
  pci_power_t pci_choose_state(struct pci_dev *dev, pm_message_t state);
  bool pci_pme_capable(struct pci_dev *dev, pci_power_t state);
  void pci_pme_active(struct pci_dev *dev, bool enable);
@@ -1625,6 +1626,8 @@ int pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max,
  
  void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *),
                   void *userdata);
+void pci_walk_bus_locked(struct pci_bus *top, int (*cb)(struct pci_dev *, void *),
+                        void *userdata);
  int pci_cfg_space_size(struct pci_dev *dev);
  unsigned char pci_bus_max_busnr(struct pci_bus *bus);
  void pci_setup_bridge(struct pci_bus *bus);
@@ -2025,6 +2028,8 @@ static inline int pci_save_state(struct pci_dev *dev) { return 0; }
  static inline void pci_restore_state(struct pci_dev *dev) { }
  static inline int pci_set_power_state(struct pci_dev *dev, pci_power_t state)
  { return 0; }
+static inline int pci_set_power_state_locked(struct pci_dev *dev, pci_power_t state)
+{ return 0; }
  static inline int pci_wake_from_d3(struct pci_dev *dev, bool enable)
  { return 0; }
  static inline pci_power_t pci_choose_state(struct pci_dev *dev,
diff --git a/include/linux/poison.h b/include/linux/poison.h

index 27a7dad17eefb83b917569fdc6a5df7298f3859b..1f0ee2459f2aa2db997a979ff7df74b1d1bd588c 100644 (file)
--- a/include/linux/poison.h
+++ b/include/linux/poison.h
@@ -92,4 +92,7 @@
  /********** VFS **********/
  #define VFS_PTR_POISON ((void *)(0xF5 + POISON_POINTER_DELTA))
  
+/********** lib/stackdepot.c **********/
+#define STACK_DEPOT_POISON ((void *)(0xD390 + POISON_POINTER_DELTA))
+
  #endif
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h

index de407e7c3b55fdbd9b5d3cbe93585b1e417a3e20..0b2a8985444097f91cd0557563ad9438e3c494ea 100644 (file)
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -65,6 +65,7 @@ struct proc_fs_info {
         kgid_t pid_gid;
         enum proc_hidepid hide_pid;
         enum proc_pidonly pidonly;
+       struct rcu_head rcu;
  };
  
  static inline struct proc_fs_info *proc_sb_info(struct super_block *sb)
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h

index eaaef3ffec221b93cfbb9a2f4c20646b473754fa..90507d4afcd6debb80eef494c4c874c8a1732e49 100644 (file)
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -393,6 +393,10 @@ static inline void user_single_step_report(struct pt_regs *regs)
  #define current_user_stack_pointer() user_stack_pointer(current_pt_regs())
  #endif
  
+#ifndef exception_ip
+#define exception_ip(x) instruction_pointer(x)
+#endif
+
  extern int task_current_syscall(struct task_struct *target, struct syscall_info *info);
  
  extern void sigaction_compat_abi(struct k_sigaction *act, struct k_sigaction *oact);
diff --git a/include/linux/seq_buf.h b/include/linux/seq_buf.h

index c44f4b47b945306318d8ed164c498abfe2512a10..fe41da0059700432044c1260952073914f7a26fc 100644 (file)
--- a/include/linux/seq_buf.h
+++ b/include/linux/seq_buf.h
@@ -2,7 +2,10 @@
  #ifndef _LINUX_SEQ_BUF_H
  #define _LINUX_SEQ_BUF_H
  
-#include <linux/fs.h>
+#include <linux/bug.h>
+#include <linux/minmax.h>
+#include <linux/seq_file.h>
+#include <linux/types.h>
  
  /*
   * Trace sequences are used to allow a function to call several other functions
@@ -10,7 +13,7 @@
   */
  
  /**
- * seq_buf - seq buffer structure
+ * struct seq_buf - seq buffer structure
   * @buffer:    pointer to the buffer
   * @size:      size of the buffer
   * @len:       the amount of data inside the buffer
@@ -77,10 +80,10 @@ static inline unsigned int seq_buf_used(struct seq_buf *s)
  }
  
  /**
- * seq_buf_str - get %NUL-terminated C string from seq_buf
+ * seq_buf_str - get NUL-terminated C string from seq_buf
   * @s: the seq_buf handle
   *
- * This makes sure that the buffer in @s is nul terminated and
+ * This makes sure that the buffer in @s is NUL-terminated and
   * safe to read as a string.
   *
   * Note, if this is called when the buffer has overflowed, then
@@ -90,7 +93,7 @@ static inline unsigned int seq_buf_used(struct seq_buf *s)
   * After this function is called, s->buffer is safe to use
   * in string operations.
   *
- * Returns @s->buf after making sure it is terminated.
+ * Returns: @s->buf after making sure it is terminated.
   */
  static inline const char *seq_buf_str(struct seq_buf *s)
  {
@@ -110,7 +113,7 @@ static inline const char *seq_buf_str(struct seq_buf *s)
   * @s: the seq_buf handle
   * @bufp: the beginning of the buffer is stored here
   *
- * Return the number of bytes available in the buffer, or zero if
+ * Returns: the number of bytes available in the buffer, or zero if
   * there's no space.
   */
  static inline size_t seq_buf_get_buf(struct seq_buf *s, char **bufp)
@@ -132,7 +135,7 @@ static inline size_t seq_buf_get_buf(struct seq_buf *s, char **bufp)
   * @num: the number of bytes to commit
   *
   * Commit @num bytes of data written to a buffer previously acquired
- * by seq_buf_get.  To signal an error condition, or that the data
+ * by seq_buf_get_buf(). To signal an error condition, or that the data
   * didn't fit in the available space, pass a negative @num value.
   */
  static inline void seq_buf_commit(struct seq_buf *s, int num)
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h

index 536b2581d3e2007593323a53c050d037d6ac5dd1..55b1f3ba48ac1725f110747b8d05ce6bbfc1de0b 100644 (file)
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -748,8 +748,17 @@ struct uart_driver {
  
  void uart_write_wakeup(struct uart_port *port);
  
-#define __uart_port_tx(uport, ch, tx_ready, put_char, tx_done, for_test,      \
-               for_post)                                                     \
+/**
+ * enum UART_TX_FLAGS -- flags for uart_port_tx_flags()
+ *
+ * @UART_TX_NOSTOP: don't call port->ops->stop_tx() on empty buffer
+ */
+enum UART_TX_FLAGS {
+       UART_TX_NOSTOP = BIT(0),
+};
+
+#define __uart_port_tx(uport, ch, flags, tx_ready, put_char, tx_done,        \
+                      for_test, for_post)                                    \
  ({                                                                           \
         struct uart_port *__port = (uport);                                   \
         struct circ_buf *xmit = &__port->state->xmit;                         \
@@ -777,7 +786,7 @@ void uart_write_wakeup(struct uart_port *port);
         if (pending < WAKEUP_CHARS) {                                         \
                 uart_write_wakeup(__port);                                    \
                                                                               \
-               if (pending == 0)                                             \
+               if (!((flags) & UART_TX_NOSTOP) && pending == 0)              \
                         __port->ops->stop_tx(__port);                         \
         }                                                                     \
                                                                               \
@@ -812,7 +821,7 @@ void uart_write_wakeup(struct uart_port *port);
   */
  #define uart_port_tx_limited(port, ch, count, tx_ready, put_char, tx_done) ({ \
         unsigned int __count = (count);                                       \
-       __uart_port_tx(port, ch, tx_ready, put_char, tx_done, __count,        \
+       __uart_port_tx(port, ch, 0, tx_ready, put_char, tx_done, __count,     \
                         __count--);                                           \
  })
  
@@ -826,8 +835,21 @@ void uart_write_wakeup(struct uart_port *port);
   * See uart_port_tx_limited() for more details.
   */
  #define uart_port_tx(port, ch, tx_ready, put_char)                     \
-       __uart_port_tx(port, ch, tx_ready, put_char, ({}), true, ({}))
+       __uart_port_tx(port, ch, 0, tx_ready, put_char, ({}), true, ({}))
+
  
+/**
+ * uart_port_tx_flags -- transmit helper for uart_port with flags
+ * @port: uart port
+ * @ch: variable to store a character to be written to the HW
+ * @flags: %UART_TX_NOSTOP or similar
+ * @tx_ready: can HW accept more data function
+ * @put_char: function to write a character
+ *
+ * See uart_port_tx_limited() for more details.
+ */
+#define uart_port_tx_flags(port, ch, flags, tx_ready, put_char)                \
+       __uart_port_tx(port, ch, flags, tx_ready, put_char, ({}), true, ({}))
  /*
   * Baud rate helpers.
   */
diff --git a/include/linux/swap.h b/include/linux/swap.h

index 4db00ddad26169060e1d42d5ca9b9723546ab81c..8d28f6091a320ef024597dfd6f84526702d5ec41 100644 (file)
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -549,6 +549,11 @@ static inline int swap_duplicate(swp_entry_t swp)
         return 0;
  }
  
+static inline int swapcache_prepare(swp_entry_t swp)
+{
+       return 0;
+}
+
  static inline void swap_free(swp_entry_t swp)
  {
  }
diff --git a/include/linux/tcp.h b/include/linux/tcp.h

index 89b290d8c8dc9f115df7a295bc8d2512698db169..a1c47a6d69b0efd7e62765fbd873c848da22aaec 100644 (file)
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -221,8 +221,10 @@ struct tcp_sock {
         u32     lost_out;       /* Lost packets                 */
         u32     sacked_out;     /* SACK'd packets                       */
         u16     tcp_header_len; /* Bytes of tcp header to send          */
+       u8      scaling_ratio;  /* see tcp_win_from_space() */
         u8      chrono_type : 2,        /* current chronograph type */
                 repair      : 1,
+               tcp_usec_ts : 1, /* TSval values in usec */
                 is_sack_reneg:1,    /* in recovery from loss with SACK reneg? */
                 is_cwnd_limited:1;/* forward progress limited by snd_cwnd? */
         __cacheline_group_end(tcp_sock_read_txrx);
@@ -352,7 +354,6 @@ struct tcp_sock {
         u32     compressed_ack_rcv_nxt;
         struct list_head tsq_node; /* anchor in tsq_tasklet.head list */
  
-       u8      scaling_ratio;  /* see tcp_win_from_space() */
         /* Information of the most recently (s)acked skb */
         struct tcp_rack {
                 u64 mstamp; /* (Re)sent time of the skb */
@@ -368,8 +369,7 @@ struct tcp_sock {
         u8      compressed_ack;
         u8      dup_ack_counter:2,
                 tlp_retrans:1,  /* TLP is a retransmission */
-               tcp_usec_ts:1, /* TSval values in usec */
-               unused:4;
+               unused:5;
         u8      thin_lto    : 1,/* Use linear timeouts for thin streams */
                 recvmsg_inq : 1,/* Indicate # of bytes in queue upon recvmsg */
                 fastopen_connect:1, /* FASTOPEN_CONNECT sockopt */
diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h

index a771ccc038ac949f2b4a835e28d720735e17ee22..6532beb587b1978e09bc5b17dc088daf91f9f88c 100644 (file)
--- a/include/linux/usb/gadget.h
+++ b/include/linux/usb/gadget.h
@@ -236,7 +236,6 @@ struct usb_ep {
         unsigned                max_streams:16;
         unsigned                mult:2;
         unsigned                maxburst:5;
-       unsigned                fifo_mode:1;
         u8                      address;
         const struct usb_endpoint_descriptor    *desc;
         const struct usb_ss_ep_comp_descriptor  *comp_desc;
diff --git a/include/net/af_unix.h b/include/net/af_unix.h

index 49c4640027d8a6b93e903a6238d21e8541e31da4..afd40dce40f3d593f6fa0a11828aee9fd1582de3 100644 (file)
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -46,12 +46,6 @@ struct scm_stat {
  
  #define UNIXCB(skb)    (*(struct unix_skb_parms *)&((skb)->cb))
  
-#define unix_state_lock(s)     spin_lock(&unix_sk(s)->lock)
-#define unix_state_unlock(s)   spin_unlock(&unix_sk(s)->lock)
-#define unix_state_lock_nested(s) \
-                               spin_lock_nested(&unix_sk(s)->lock, \
-                               SINGLE_DEPTH_NESTING)
-
  /* The AF_UNIX socket */
  struct unix_sock {
         /* WARNING: sk has to be the first member */
@@ -77,6 +71,20 @@ struct unix_sock {
  #define unix_sk(ptr) container_of_const(ptr, struct unix_sock, sk)
  #define unix_peer(sk) (unix_sk(sk)->peer)
  
+#define unix_state_lock(s)     spin_lock(&unix_sk(s)->lock)
+#define unix_state_unlock(s)   spin_unlock(&unix_sk(s)->lock)
+enum unix_socket_lock_class {
+       U_LOCK_NORMAL,
+       U_LOCK_SECOND,  /* for double locking, see unix_state_double_lock(). */
+       U_LOCK_DIAG, /* used while dumping icons, see sk_diag_dump_icons(). */
+};
+
+static inline void unix_state_lock_nested(struct sock *sk,
+                                  enum unix_socket_lock_class subclass)
+{
+       spin_lock_nested(&unix_sk(sk)->lock, subclass);
+}
+
  #define peer_wait peer_wq.wait
  
  long unix_inq_len(struct sock *sk);
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h

index cf79656ce09ca1f05b733bfce2393f4a044d9454..2b54fdd8ca15a8fae0f810fc5ba550a1b1a44676 100644 (file)
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -2910,6 +2910,8 @@ struct cfg80211_bss_ies {
   *     own the beacon_ies, but they're just pointers to the ones from the
   *     @hidden_beacon_bss struct)
   * @proberesp_ies: the information elements from the last Probe Response frame
+ * @proberesp_ecsa_stuck: ECSA element is stuck in the Probe Response frame,
+ *     cannot rely on it having valid data
   * @hidden_beacon_bss: in case this BSS struct represents a probe response from
   *     a BSS that hides the SSID in its beacon, this points to the BSS struct
   *     that holds the beacon data. @beacon_ies is still valid, of course, and
@@ -2950,6 +2952,8 @@ struct cfg80211_bss {
         u8 chains;
         s8 chain_signal[IEEE80211_MAX_CHAINS];
  
+       u8 proberesp_ecsa_stuck:1;
+
         u8 bssid_index;
         u8 max_bssid_indicator;
  
diff --git a/include/net/ip.h b/include/net/ip.h

index de0c69c57e3cb7485e3d8473bc0b109e4280d2f6..25cb688bdc62360292e25b0d676f135101a2118c 100644 (file)
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -767,7 +767,7 @@ int ip_options_rcv_srr(struct sk_buff *skb, struct net_device *dev);
   *     Functions provided by ip_sockglue.c
   */
  
-void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb);
+void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb, bool drop_dst);
  void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk,
                          struct sk_buff *skb, int tlen, int offset);
  int ip_cmsg_send(struct sock *sk, struct msghdr *msg,
diff --git a/include/net/mctp.h b/include/net/mctp.h

index da86e106c91d57b2eedfc7bb301867eb8e51c123..2bff5f47ce82f1c6f2774f49d13a647c573034d7 100644 (file)
--- a/include/net/mctp.h
+++ b/include/net/mctp.h
@@ -249,6 +249,7 @@ struct mctp_route {
  struct mctp_route *mctp_route_lookup(struct net *net, unsigned int dnet,
                                      mctp_eid_t daddr);
  
+/* always takes ownership of skb */
  int mctp_local_output(struct sock *sk, struct mctp_route *rt,
                       struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag);
  
diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h

index 956c752ceb3180115eec0b607d81cafe5f038ce8..a763dd327c6ea95d6b94fda1ea2efd8f1784335f 100644 (file)
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -276,7 +276,7 @@ nf_flow_table_offload_del_cb(struct nf_flowtable *flow_table,
  }
  
  void flow_offload_route_init(struct flow_offload *flow,
-                            const struct nf_flow_route *route);
+                            struct nf_flow_route *route);
  
  int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow);
  void flow_offload_refresh(struct nf_flowtable *flow_table,
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h

index 4e1ea18eb5f05f763429ed2f75c0c1c9aee20d44..510244cc0f8f0e479f252598ba2aaf43b8918978 100644 (file)
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -808,10 +808,16 @@ static inline struct nft_set_elem_expr *nft_set_ext_expr(const struct nft_set_ex
         return nft_set_ext(ext, NFT_SET_EXT_EXPRESSIONS);
  }
  
-static inline bool nft_set_elem_expired(const struct nft_set_ext *ext)
+static inline bool __nft_set_elem_expired(const struct nft_set_ext *ext,
+                                         u64 tstamp)
  {
         return nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION) &&
-              time_is_before_eq_jiffies64(*nft_set_ext_expiration(ext));
+              time_after_eq64(tstamp, *nft_set_ext_expiration(ext));
+}
+
+static inline bool nft_set_elem_expired(const struct nft_set_ext *ext)
+{
+       return __nft_set_elem_expired(ext, get_jiffies_64());
  }
  
  static inline struct nft_set_ext *nft_set_elem_ext(const struct nft_set *set,
@@ -1351,6 +1357,7 @@ void nft_obj_notify(struct net *net, const struct nft_table *table,
   *     @type: stateful object numeric type
   *     @owner: module owner
   *     @maxattr: maximum netlink attribute
+ *     @family: address family for AF-specific object types
   *     @policy: netlink attribute policy
   */
  struct nft_object_type {
@@ -1360,6 +1367,7 @@ struct nft_object_type {
         struct list_head                list;
         u32                             type;
         unsigned int                    maxattr;
+       u8                              family;
         struct module                   *owner;
         const struct nla_policy         *policy;
  };
@@ -1777,6 +1785,7 @@ struct nftables_pernet {
         struct list_head        notify_list;
         struct mutex            commit_mutex;
         u64                     table_handle;
+       u64                     tstamp;
         unsigned int            base_seq;
         unsigned int            gc_seq;
         u8                      validate_state;
@@ -1789,6 +1798,11 @@ static inline struct nftables_pernet *nft_pernet(const struct net *net)
         return net_generic(net, nf_tables_net_id);
  }
  
+static inline u64 nft_net_tstamp(const struct net *net)
+{
+       return nft_pernet(net)->tstamp;
+}
+
  #define __NFT_REDUCE_READONLY  1UL
  #define NFT_REDUCE_READONLY    (void *)__NFT_REDUCE_READONLY
  
diff --git a/include/net/switchdev.h b/include/net/switchdev.h

index a43062d4c734bb4e8e855fdd150b8c169a7fe172..8346b0d29542c3d5569b94b35eaa12461f78d62a 100644 (file)
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -308,6 +308,9 @@ void switchdev_deferred_process(void);
  int switchdev_port_attr_set(struct net_device *dev,
                             const struct switchdev_attr *attr,
                             struct netlink_ext_ack *extack);
+bool switchdev_port_obj_act_is_deferred(struct net_device *dev,
+                                       enum switchdev_notifier_type nt,
+                                       const struct switchdev_obj *obj);
  int switchdev_port_obj_add(struct net_device *dev,
                            const struct switchdev_obj *obj,
                            struct netlink_ext_ack *extack);
diff --git a/include/net/tcp.h b/include/net/tcp.h

index dd78a11810310e84ef1c1ed8c3e0e274ddd77d7f..f6eba9652d010fbc8482bfd3c99377d631686324 100644 (file)
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -2506,7 +2506,7 @@ struct tcp_ulp_ops {
         /* cleanup ulp */
         void (*release)(struct sock *sk);
         /* diagnostic */
-       int (*get_info)(const struct sock *sk, struct sk_buff *skb);
+       int (*get_info)(struct sock *sk, struct sk_buff *skb);
         size_t (*get_info_size)(const struct sock *sk);
         /* clone ulp */
         void (*clone)(const struct request_sock *req, struct sock *newsk,
diff --git a/include/net/tls.h b/include/net/tls.h

index 962f0c501111bac34781c4419913540c162ea058..340ad43971e4711d8091a6397bb5cf3c3c4ef0fd 100644 (file)
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -97,9 +97,6 @@ struct tls_sw_context_tx {
         struct tls_rec *open_rec;
         struct list_head tx_list;
         atomic_t encrypt_pending;
-       /* protect crypto_wait with encrypt_pending */
-       spinlock_t encrypt_compl_lock;
-       int async_notify;
         u8 async_capable:1;
  
  #define BIT_TX_SCHEDULED       0
@@ -136,8 +133,6 @@ struct tls_sw_context_rx {
         struct tls_strparser strp;
  
         atomic_t decrypt_pending;
-       /* protect crypto_wait with decrypt_pending*/
-       spinlock_t decrypt_compl_lock;
         struct sk_buff_head async_hold;
         struct wait_queue_head wq;
  };
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h

index 5ec1e71a09de7698616dff799a935da15083deef..c38f4fe5e64cf4f14b668328ab0cfac76ea5d496 100644 (file)
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -100,10 +100,6 @@ struct scsi_vpd {
         unsigned char   data[];
  };
  
-enum scsi_vpd_parameters {
-       SCSI_VPD_HEADER_SIZE = 4,
-};
-
  struct scsi_device {
         struct Scsi_Host *host;
         struct request_queue *request_queue;
@@ -208,6 +204,7 @@ struct scsi_device {
         unsigned use_10_for_rw:1; /* first try 10-byte read / write */
         unsigned use_10_for_ms:1; /* first try 10-byte mode sense/select */
         unsigned set_dbd_for_ms:1; /* Set "DBD" field in mode sense */
+       unsigned read_before_ms:1;      /* perform a READ before MODE SENSE */
         unsigned no_report_opcodes:1;   /* no REPORT SUPPORTED OPERATION CODES */
         unsigned no_write_same:1;       /* no WRITE SAME command */
         unsigned use_16_for_rw:1; /* Use read/write(16) over read/write(10) */
diff --git a/include/sound/cs35l56.h b/include/sound/cs35l56.h

index 8c18e8b6d27d21b34962cc392dfeaffe48070847..b24716ab27504bdfa17f221a39053dd21dd961d9 100644 (file)
--- a/include/sound/cs35l56.h
+++ b/include/sound/cs35l56.h
@@ -75,6 +75,7 @@
  #define CS35L56_DSP1_AHBM_WINDOW_DEBUG_0               0x25E2040
  #define CS35L56_DSP1_AHBM_WINDOW_DEBUG_1               0x25E2044
  #define CS35L56_DSP1_XMEM_UNPACKED24_0                 0x2800000
+#define CS35L56_DSP1_FW_VER                            0x2800010
  #define CS35L56_DSP1_HALO_STATE_A1                     0x2801E58
  #define CS35L56_DSP1_HALO_STATE                                0x28021E0
  #define CS35L56_DSP1_PM_CUR_STATE_A1                   0x2804000
@@ -241,7 +242,7 @@
  
  #define CS35L56_CONTROL_PORT_READY_US                  2200
  #define CS35L56_HALO_STATE_POLL_US                     1000
-#define CS35L56_HALO_STATE_TIMEOUT_US                  50000
+#define CS35L56_HALO_STATE_TIMEOUT_US                  250000
  #define CS35L56_RESET_PULSE_MIN_US                     1100
  #define CS35L56_WAKE_HOLD_TIME_US                      1000
  
@@ -272,6 +273,7 @@ extern const char * const cs35l56_tx_input_texts[CS35L56_NUM_INPUT_SRC];
  extern const unsigned int cs35l56_tx_input_values[CS35L56_NUM_INPUT_SRC];
  
  int cs35l56_set_patch(struct cs35l56_base *cs35l56_base);
+int cs35l56_force_sync_asp1_registers_from_cache(struct cs35l56_base *cs35l56_base);
  int cs35l56_mbox_send(struct cs35l56_base *cs35l56_base, unsigned int command);
  int cs35l56_firmware_shutdown(struct cs35l56_base *cs35l56_base);
  int cs35l56_wait_for_firmware_boot(struct cs35l56_base *cs35l56_base);
@@ -284,7 +286,10 @@ int cs35l56_is_fw_reload_needed(struct cs35l56_base *cs35l56_base);
  int cs35l56_runtime_suspend_common(struct cs35l56_base *cs35l56_base);
  int cs35l56_runtime_resume_common(struct cs35l56_base *cs35l56_base, bool is_soundwire);
  void cs35l56_init_cs_dsp(struct cs35l56_base *cs35l56_base, struct cs_dsp *cs_dsp);
+int cs35l56_read_prot_status(struct cs35l56_base *cs35l56_base,
+                            bool *fw_missing, unsigned int *fw_version);
  int cs35l56_hw_init(struct cs35l56_base *cs35l56_base);
+int cs35l56_get_speaker_id(struct cs35l56_base *cs35l56_base);
  int cs35l56_get_bclk_freq_id(unsigned int freq);
  void cs35l56_fill_supply_names(struct regulator_bulk_data *data);
  
diff --git a/include/sound/soc-card.h b/include/sound/soc-card.h

index ecc02e955279fdfa3f10d116eeb5a2d7271cc91c..1f4c39922d825035be15ba38e7ac6d112b36c457 100644 (file)
--- a/include/sound/soc-card.h
+++ b/include/sound/soc-card.h
@@ -30,6 +30,8 @@ static inline void snd_soc_card_mutex_unlock(struct snd_soc_card *card)
  
  struct snd_kcontrol *snd_soc_card_get_kcontrol(struct snd_soc_card *soc_card,
                                                const char *name);
+struct snd_kcontrol *snd_soc_card_get_kcontrol_locked(struct snd_soc_card *soc_card,
+                                                     const char *name);
  int snd_soc_card_jack_new(struct snd_soc_card *card, const char *id, int type,
                           struct snd_soc_jack *jack);
  int snd_soc_card_jack_new_pins(struct snd_soc_card *card, const char *id,
diff --git a/include/sound/tas2781.h b/include/sound/tas2781.h

index b00d65417c310a42a39aec6ce927b85083ace264..9aff384941de27f925d0491312a7088ebeb6a297 100644 (file)
--- a/include/sound/tas2781.h
+++ b/include/sound/tas2781.h
@@ -142,6 +142,7 @@ struct tasdevice_priv {
  
  void tas2781_reset(struct tasdevice_priv *tas_dev);
  int tascodec_init(struct tasdevice_priv *tas_priv, void *codec,
+       struct module *module,
         void (*cont)(const struct firmware *fw, void *context));
  struct tasdevice_priv *tasdevice_kzalloc(struct i2c_client *i2c);
  int tasdevice_init(struct tasdevice_priv *tas_priv);
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h

index 65029dfb92fbc3162c30d0a85a6805afa3ab335e..a697f4b77162dd79c45c5cdb25db63332818fcc7 100644 (file)
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -772,15 +772,14 @@ TRACE_EVENT(ext4_mb_release_group_pa,
  );
  
  TRACE_EVENT(ext4_discard_preallocations,
-       TP_PROTO(struct inode *inode, unsigned int len, unsigned int needed),
+       TP_PROTO(struct inode *inode, unsigned int len),
  
-       TP_ARGS(inode, len, needed),
+       TP_ARGS(inode, len),
  
         TP_STRUCT__entry(
                 __field(        dev_t,          dev             )
                 __field(        ino_t,          ino             )
                 __field(        unsigned int,   len             )
-               __field(        unsigned int,   needed          )
  
         ),
  
@@ -788,13 +787,11 @@ TRACE_EVENT(ext4_discard_preallocations,
                 __entry->dev    = inode->i_sb->s_dev;
                 __entry->ino    = inode->i_ino;
                 __entry->len    = len;
-               __entry->needed = needed;
         ),
  
-       TP_printk("dev %d,%d ino %lu len: %u needed %u",
+       TP_printk("dev %d,%d ino %lu len: %u",
                   MAJOR(__entry->dev), MINOR(__entry->dev),
-                 (unsigned long) __entry->ino, __entry->len,
-                 __entry->needed)
+                 (unsigned long) __entry->ino, __entry->len)
  );
  
  TRACE_EVENT(ext4_mb_discard_preallocations,
diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h

index 4c1ef7b3705c26baf79c135f235c94195635bf4b..87b8de9b6c1c440ce4a8b2fe6072b4d81cbc1cf4 100644 (file)
--- a/include/trace/events/rxrpc.h
+++ b/include/trace/events/rxrpc.h
@@ -128,6 +128,7 @@
         EM(rxrpc_skb_eaten_by_unshare_nomem,    "ETN unshar-nm") \
         EM(rxrpc_skb_get_conn_secured,          "GET conn-secd") \
         EM(rxrpc_skb_get_conn_work,             "GET conn-work") \
+       EM(rxrpc_skb_get_last_nack,             "GET last-nack") \
         EM(rxrpc_skb_get_local_work,            "GET locl-work") \
         EM(rxrpc_skb_get_reject_work,           "GET rej-work ") \
         EM(rxrpc_skb_get_to_recvmsg,            "GET to-recv  ") \
@@ -141,6 +142,7 @@
         EM(rxrpc_skb_put_error_report,          "PUT error-rep") \
         EM(rxrpc_skb_put_input,                 "PUT input    ") \
         EM(rxrpc_skb_put_jumbo_subpacket,       "PUT jumbo-sub") \
+       EM(rxrpc_skb_put_last_nack,             "PUT last-nack") \
         EM(rxrpc_skb_put_purge,                 "PUT purge    ") \
         EM(rxrpc_skb_put_rotate,                "PUT rotate   ") \
         EM(rxrpc_skb_put_unknown,               "PUT unknown  ") \
@@ -1552,7 +1554,7 @@ TRACE_EVENT(rxrpc_congest,
                     memcpy(&__entry->sum, summary, sizeof(__entry->sum));
                            ),
  
-           TP_printk("c=%08x r=%08x %s q=%08x %s cw=%u ss=%u nA=%u,%u+%u r=%u b=%u u=%u d=%u l=%x%s%s%s",
+           TP_printk("c=%08x r=%08x %s q=%08x %s cw=%u ss=%u nA=%u,%u+%u,%u b=%u u=%u d=%u l=%x%s%s%s",
                       __entry->call,
                       __entry->ack_serial,
                       __print_symbolic(__entry->sum.ack_reason, rxrpc_ack_names),
@@ -1560,9 +1562,9 @@ TRACE_EVENT(rxrpc_congest,
                       __print_symbolic(__entry->sum.mode, rxrpc_congest_modes),
                       __entry->sum.cwnd,
                       __entry->sum.ssthresh,
-                     __entry->sum.nr_acks, __entry->sum.saw_nacks,
+                     __entry->sum.nr_acks, __entry->sum.nr_retained_nacks,
                       __entry->sum.nr_new_acks,
-                     __entry->sum.nr_rot_new_acks,
+                     __entry->sum.nr_new_nacks,
                       __entry->top - __entry->hard_ack,
                       __entry->sum.cumulative_acks,
                       __entry->sum.dup_acks,
diff --git a/include/uapi/asm-generic/bitsperlong.h b/include/uapi/asm-generic/bitsperlong.h

index 352cb81947b87697960486e4af4e48313e891d3d..fadb3f857f2855db40dca88333d977e6a51b4c11 100644 (file)
--- a/include/uapi/asm-generic/bitsperlong.h
+++ b/include/uapi/asm-generic/bitsperlong.h
@@ -24,4 +24,8 @@
  #endif
  #endif
  
+#ifndef __BITS_PER_LONG_LONG
+#define __BITS_PER_LONG_LONG 64
+#endif
+
  #endif /* _UAPI__ASM_GENERIC_BITS_PER_LONG */
diff --git a/include/uapi/drm/ivpu_accel.h b/include/uapi/drm/ivpu_accel.h

index 63c49318a863076b0861b945c1511d98f9ffffe3..19a13468eca5e4c13cbdf05444604a54567a67ae 100644 (file)
--- a/include/uapi/drm/ivpu_accel.h
+++ b/include/uapi/drm/ivpu_accel.h
@@ -305,6 +305,7 @@ struct drm_ivpu_submit {
  
  /* drm_ivpu_bo_wait job status codes */
  #define DRM_IVPU_JOB_STATUS_SUCCESS 0
+#define DRM_IVPU_JOB_STATUS_ABORTED 256
  
  /**
   * struct drm_ivpu_bo_wait - Wait for BO to become inactive
diff --git a/include/uapi/drm/nouveau_drm.h b/include/uapi/drm/nouveau_drm.h

index 0bade1592f34f21690eab41de48595d7aaa24fe4..77d7ff0d5b110da4a05a4a7730d01bbd2d7c581e 100644 (file)
--- a/include/uapi/drm/nouveau_drm.h
+++ b/include/uapi/drm/nouveau_drm.h
@@ -54,6 +54,20 @@ extern "C" {
   */
  #define NOUVEAU_GETPARAM_EXEC_PUSH_MAX   17
  
+/*
+ * NOUVEAU_GETPARAM_VRAM_BAR_SIZE - query bar size
+ *
+ * Query the VRAM BAR size.
+ */
+#define NOUVEAU_GETPARAM_VRAM_BAR_SIZE 18
+
+/*
+ * NOUVEAU_GETPARAM_VRAM_USED
+ *
+ * Get remaining VRAM size.
+ */
+#define NOUVEAU_GETPARAM_VRAM_USED 19
+
  struct drm_nouveau_getparam {
         __u64 param;
         __u64 value;
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h

index 9fa3ae324731a6a96d47d81e18566b321f2f0bca..bb0c8a9941164228fef069433194aa2e549a0174 100644 (file)
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -831,11 +831,6 @@ struct drm_xe_vm_destroy {
   *  - %DRM_XE_VM_BIND_OP_PREFETCH
   *
   * and the @flags can be:
- *  - %DRM_XE_VM_BIND_FLAG_READONLY
- *  - %DRM_XE_VM_BIND_FLAG_ASYNC
- *  - %DRM_XE_VM_BIND_FLAG_IMMEDIATE - Valid on a faulting VM only, do the
- *    MAP operation immediately rather than deferring the MAP to the page
- *    fault handler.
   *  - %DRM_XE_VM_BIND_FLAG_NULL - When the NULL flag is set, the page
   *    tables are setup with a special bit which indicates writes are
   *    dropped and all reads return zero. In the future, the NULL flags
@@ -928,9 +923,8 @@ struct drm_xe_vm_bind_op {
         /** @op: Bind operation to perform */
         __u32 op;
  
-#define DRM_XE_VM_BIND_FLAG_READONLY   (1 << 0)
-#define DRM_XE_VM_BIND_FLAG_IMMEDIATE  (1 << 1)
  #define DRM_XE_VM_BIND_FLAG_NULL       (1 << 2)
+#define DRM_XE_VM_BIND_FLAG_DUMPABLE   (1 << 3)
         /** @flags: Bind flags */
         __u32 flags;
  
@@ -1045,20 +1039,6 @@ struct drm_xe_exec_queue_create {
  #define DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY               0
  #define   DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY              0
  #define   DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE             1
-#define   DRM_XE_EXEC_QUEUE_SET_PROPERTY_PREEMPTION_TIMEOUT    2
-#define   DRM_XE_EXEC_QUEUE_SET_PROPERTY_PERSISTENCE           3
-#define   DRM_XE_EXEC_QUEUE_SET_PROPERTY_JOB_TIMEOUT           4
-#define   DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_TRIGGER           5
-#define   DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_NOTIFY            6
-#define   DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_GRANULARITY       7
-/* Monitor 128KB contiguous region with 4K sub-granularity */
-#define     DRM_XE_ACC_GRANULARITY_128K                                0
-/* Monitor 2MB contiguous region with 64KB sub-granularity */
-#define     DRM_XE_ACC_GRANULARITY_2M                          1
-/* Monitor 16MB contiguous region with 512KB sub-granularity */
-#define     DRM_XE_ACC_GRANULARITY_16M                         2
-/* Monitor 64MB contiguous region with 2M sub-granularity */
-#define     DRM_XE_ACC_GRANULARITY_64M                         3
  
         /** @extensions: Pointer to the first extension struct, if any */
         __u64 extensions;
diff --git a/include/uapi/linux/bits.h b/include/uapi/linux/bits.h

new file mode 100644 (file)

index 0000000..3c2a101
--- /dev/null
+++ b/include/uapi/linux/bits.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/* bits.h: Macros for dealing with bitmasks.  */
+
+#ifndef _UAPI_LINUX_BITS_H
+#define _UAPI_LINUX_BITS_H
+
+#define __GENMASK(h, l) \
+        (((~_UL(0)) - (_UL(1) << (l)) + 1) & \
+         (~_UL(0) >> (__BITS_PER_LONG - 1 - (h))))
+
+#define __GENMASK_ULL(h, l) \
+        (((~_ULL(0)) - (_ULL(1) << (l)) + 1) & \
+         (~_ULL(0) >> (__BITS_PER_LONG_LONG - 1 - (h))))
+
+#endif /* _UAPI_LINUX_BITS_H */
diff --git a/include/uapi/linux/iio/types.h b/include/uapi/linux/iio/types.h

index 5060963707b1ed44d9b640454e9b0656a505d761..f2e0b2d50e6b5ffadd52ced37e8a1e36999c9d97 100644 (file)
--- a/include/uapi/linux/iio/types.h
+++ b/include/uapi/linux/iio/types.h
@@ -91,8 +91,6 @@ enum iio_modifier {
         IIO_MOD_CO2,
         IIO_MOD_VOC,
         IIO_MOD_LIGHT_UV,
-       IIO_MOD_LIGHT_UVA,
-       IIO_MOD_LIGHT_UVB,
         IIO_MOD_LIGHT_DUV,
         IIO_MOD_PM1,
         IIO_MOD_PM2P5,
@@ -107,6 +105,8 @@ enum iio_modifier {
         IIO_MOD_PITCH,
         IIO_MOD_YAW,
         IIO_MOD_ROLL,
+       IIO_MOD_LIGHT_UVA,
+       IIO_MOD_LIGHT_UVB,
  };
  
  enum iio_event_type {
diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h

index c4c53a9ab9595b2a5b95e5b22cafa5bd2cd6fd3c..ff8d21f9e95b7798eaf3e00635050e1631d6697a 100644 (file)
--- a/include/uapi/linux/in6.h
+++ b/include/uapi/linux/in6.h
@@ -145,7 +145,7 @@ struct in6_flowlabel_req {
  #define IPV6_TLV_PADN          1
  #define IPV6_TLV_ROUTERALERT   5
  #define IPV6_TLV_CALIPSO       7       /* RFC 5570 */
-#define IPV6_TLV_IOAM          49      /* TEMPORARY IANA allocation for IOAM */
+#define IPV6_TLV_IOAM          49      /* RFC 9486 */
  #define IPV6_TLV_JUMBO         194
  #define IPV6_TLV_HAO           201     /* home address option */
  
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h

index c3308536482bdb2bfb1279279325faf5430a3356..2190adbe30027cec3bd88bdf7c4366cd7c424b82 100644 (file)
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -16,6 +16,11 @@
  
  #define KVM_API_VERSION 12
  
+/*
+ * Backwards-compatible definitions.
+ */
+#define __KVM_HAVE_GUEST_DEBUG
+
  /* for KVM_SET_USER_MEMORY_REGION */
  struct kvm_userspace_memory_region {
         __u32 slot;
@@ -85,43 +90,6 @@ struct kvm_pit_config {
  
  #define KVM_PIT_SPEAKER_DUMMY     1
  
-struct kvm_s390_skeys {
-       __u64 start_gfn;
-       __u64 count;
-       __u64 skeydata_addr;
-       __u32 flags;
-       __u32 reserved[9];
-};
-
-#define KVM_S390_CMMA_PEEK (1 << 0)
-
-/**
- * kvm_s390_cmma_log - Used for CMMA migration.
- *
- * Used both for input and output.
- *
- * @start_gfn: Guest page number to start from.
- * @count: Size of the result buffer.
- * @flags: Control operation mode via KVM_S390_CMMA_* flags
- * @remaining: Used with KVM_S390_GET_CMMA_BITS. Indicates how many dirty
- *             pages are still remaining.
- * @mask: Used with KVM_S390_SET_CMMA_BITS. Bitmap of bits to actually set
- *        in the PGSTE.
- * @values: Pointer to the values buffer.
- *
- * Used in KVM_S390_{G,S}ET_CMMA_BITS ioctls.
- */
-struct kvm_s390_cmma_log {
-       __u64 start_gfn;
-       __u32 count;
-       __u32 flags;
-       union {
-               __u64 remaining;
-               __u64 mask;
-       };
-       __u64 values;
-};
-
  struct kvm_hyperv_exit {
  #define KVM_EXIT_HYPERV_SYNIC          1
  #define KVM_EXIT_HYPERV_HCALL          2
@@ -315,11 +283,6 @@ struct kvm_run {
                         __u32 ipb;
                 } s390_sieic;
                 /* KVM_EXIT_S390_RESET */
-#define KVM_S390_RESET_POR       1
-#define KVM_S390_RESET_CLEAR     2
-#define KVM_S390_RESET_SUBSYSTEM 4
-#define KVM_S390_RESET_CPU_INIT  8
-#define KVM_S390_RESET_IPL       16
                 __u64 s390_reset_flags;
                 /* KVM_EXIT_S390_UCONTROL */
                 struct {
@@ -536,43 +499,6 @@ struct kvm_translation {
         __u8  pad[5];
  };
  
-/* for KVM_S390_MEM_OP */
-struct kvm_s390_mem_op {
-       /* in */
-       __u64 gaddr;            /* the guest address */
-       __u64 flags;            /* flags */
-       __u32 size;             /* amount of bytes */
-       __u32 op;               /* type of operation */
-       __u64 buf;              /* buffer in userspace */
-       union {
-               struct {
-                       __u8 ar;        /* the access register number */
-                       __u8 key;       /* access key, ignored if flag unset */
-                       __u8 pad1[6];   /* ignored */
-                       __u64 old_addr; /* ignored if cmpxchg flag unset */
-               };
-               __u32 sida_offset; /* offset into the sida */
-               __u8 reserved[32]; /* ignored */
-       };
-};
-/* types for kvm_s390_mem_op->op */
-#define KVM_S390_MEMOP_LOGICAL_READ    0
-#define KVM_S390_MEMOP_LOGICAL_WRITE   1
-#define KVM_S390_MEMOP_SIDA_READ       2
-#define KVM_S390_MEMOP_SIDA_WRITE      3
-#define KVM_S390_MEMOP_ABSOLUTE_READ   4
-#define KVM_S390_MEMOP_ABSOLUTE_WRITE  5
-#define KVM_S390_MEMOP_ABSOLUTE_CMPXCHG        6
-
-/* flags for kvm_s390_mem_op->flags */
-#define KVM_S390_MEMOP_F_CHECK_ONLY            (1ULL << 0)
-#define KVM_S390_MEMOP_F_INJECT_EXCEPTION      (1ULL << 1)
-#define KVM_S390_MEMOP_F_SKEY_PROTECTION       (1ULL << 2)
-
-/* flags specifying extension support via KVM_CAP_S390_MEM_OP_EXTENSION */
-#define KVM_S390_MEMOP_EXTENSION_CAP_BASE      (1 << 0)
-#define KVM_S390_MEMOP_EXTENSION_CAP_CMPXCHG   (1 << 1)
-
  /* for KVM_INTERRUPT */
  struct kvm_interrupt {
         /* in */
@@ -637,124 +563,6 @@ struct kvm_mp_state {
         __u32 mp_state;
  };
  
-struct kvm_s390_psw {
-       __u64 mask;
-       __u64 addr;
-};
-
-/* valid values for type in kvm_s390_interrupt */
-#define KVM_S390_SIGP_STOP             0xfffe0000u
-#define KVM_S390_PROGRAM_INT           0xfffe0001u
-#define KVM_S390_SIGP_SET_PREFIX       0xfffe0002u
-#define KVM_S390_RESTART               0xfffe0003u
-#define KVM_S390_INT_PFAULT_INIT       0xfffe0004u
-#define KVM_S390_INT_PFAULT_DONE       0xfffe0005u
-#define KVM_S390_MCHK                  0xfffe1000u
-#define KVM_S390_INT_CLOCK_COMP                0xffff1004u
-#define KVM_S390_INT_CPU_TIMER         0xffff1005u
-#define KVM_S390_INT_VIRTIO            0xffff2603u
-#define KVM_S390_INT_SERVICE           0xffff2401u
-#define KVM_S390_INT_EMERGENCY         0xffff1201u
-#define KVM_S390_INT_EXTERNAL_CALL     0xffff1202u
-/* Anything below 0xfffe0000u is taken by INT_IO */
-#define KVM_S390_INT_IO(ai,cssid,ssid,schid)   \
-       (((schid)) |                           \
-        ((ssid) << 16) |                      \
-        ((cssid) << 18) |                     \
-        ((ai) << 26))
-#define KVM_S390_INT_IO_MIN            0x00000000u
-#define KVM_S390_INT_IO_MAX            0xfffdffffu
-#define KVM_S390_INT_IO_AI_MASK                0x04000000u
-
-
-struct kvm_s390_interrupt {
-       __u32 type;
-       __u32 parm;
-       __u64 parm64;
-};
-
-struct kvm_s390_io_info {
-       __u16 subchannel_id;
-       __u16 subchannel_nr;
-       __u32 io_int_parm;
-       __u32 io_int_word;
-};
-
-struct kvm_s390_ext_info {
-       __u32 ext_params;
-       __u32 pad;
-       __u64 ext_params2;
-};
-
-struct kvm_s390_pgm_info {
-       __u64 trans_exc_code;
-       __u64 mon_code;
-       __u64 per_address;
-       __u32 data_exc_code;
-       __u16 code;
-       __u16 mon_class_nr;
-       __u8 per_code;
-       __u8 per_atmid;
-       __u8 exc_access_id;
-       __u8 per_access_id;
-       __u8 op_access_id;
-#define KVM_S390_PGM_FLAGS_ILC_VALID   0x01
-#define KVM_S390_PGM_FLAGS_ILC_0       0x02
-#define KVM_S390_PGM_FLAGS_ILC_1       0x04
-#define KVM_S390_PGM_FLAGS_ILC_MASK    0x06
-#define KVM_S390_PGM_FLAGS_NO_REWIND   0x08
-       __u8 flags;
-       __u8 pad[2];
-};
-
-struct kvm_s390_prefix_info {
-       __u32 address;
-};
-
-struct kvm_s390_extcall_info {
-       __u16 code;
-};
-
-struct kvm_s390_emerg_info {
-       __u16 code;
-};
-
-#define KVM_S390_STOP_FLAG_STORE_STATUS        0x01
-struct kvm_s390_stop_info {
-       __u32 flags;
-};
-
-struct kvm_s390_mchk_info {
-       __u64 cr14;
-       __u64 mcic;
-       __u64 failing_storage_address;
-       __u32 ext_damage_code;
-       __u32 pad;
-       __u8 fixed_logout[16];
-};
-
-struct kvm_s390_irq {
-       __u64 type;
-       union {
-               struct kvm_s390_io_info io;
-               struct kvm_s390_ext_info ext;
-               struct kvm_s390_pgm_info pgm;
-               struct kvm_s390_emerg_info emerg;
-               struct kvm_s390_extcall_info extcall;
-               struct kvm_s390_prefix_info prefix;
-               struct kvm_s390_stop_info stop;
-               struct kvm_s390_mchk_info mchk;
-               char reserved[64];
-       } u;
-};
-
-struct kvm_s390_irq_state {
-       __u64 buf;
-       __u32 flags;        /* will stay unused for compatibility reasons */
-       __u32 len;
-       __u32 reserved[4];  /* will stay unused for compatibility reasons */
-};
-
  /* for KVM_SET_GUEST_DEBUG */
  
  #define KVM_GUESTDBG_ENABLE            0x00000001
@@ -810,50 +618,6 @@ struct kvm_enable_cap {
         __u8  pad[64];
  };
  
-/* for KVM_PPC_GET_PVINFO */
-
-#define KVM_PPC_PVINFO_FLAGS_EV_IDLE   (1<<0)
-
-struct kvm_ppc_pvinfo {
-       /* out */
-       __u32 flags;
-       __u32 hcall[4];
-       __u8  pad[108];
-};
-
-/* for KVM_PPC_GET_SMMU_INFO */
-#define KVM_PPC_PAGE_SIZES_MAX_SZ      8
-
-struct kvm_ppc_one_page_size {
-       __u32 page_shift;       /* Page shift (or 0) */
-       __u32 pte_enc;          /* Encoding in the HPTE (>>12) */
-};
-
-struct kvm_ppc_one_seg_page_size {
-       __u32 page_shift;       /* Base page shift of segment (or 0) */
-       __u32 slb_enc;          /* SLB encoding for BookS */
-       struct kvm_ppc_one_page_size enc[KVM_PPC_PAGE_SIZES_MAX_SZ];
-};
-
-#define KVM_PPC_PAGE_SIZES_REAL                0x00000001
-#define KVM_PPC_1T_SEGMENTS            0x00000002
-#define KVM_PPC_NO_HASH                        0x00000004
-
-struct kvm_ppc_smmu_info {
-       __u64 flags;
-       __u32 slb_size;
-       __u16 data_keys;        /* # storage keys supported for data */
-       __u16 instr_keys;       /* # storage keys supported for instructions */
-       struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ];
-};
-
-/* for KVM_PPC_RESIZE_HPT_{PREPARE,COMMIT} */
-struct kvm_ppc_resize_hpt {
-       __u64 flags;
-       __u32 shift;
-       __u32 pad;
-};
-
  #define KVMIO 0xAE
  
  /* machine type bits, to be used as argument to KVM_CREATE_VM */
@@ -923,9 +687,7 @@ struct kvm_ppc_resize_hpt {
  /* Bug in KVM_SET_USER_MEMORY_REGION fixed: */
  #define KVM_CAP_DESTROY_MEMORY_REGION_WORKS 21
  #define KVM_CAP_USER_NMI 22
-#ifdef __KVM_HAVE_GUEST_DEBUG
  #define KVM_CAP_SET_GUEST_DEBUG 23
-#endif
  #ifdef __KVM_HAVE_PIT
  #define KVM_CAP_REINJECT_CONTROL 24
  #endif
@@ -1156,8 +918,6 @@ struct kvm_ppc_resize_hpt {
  #define KVM_CAP_GUEST_MEMFD 234
  #define KVM_CAP_VM_TYPES 235
  
-#ifdef KVM_CAP_IRQ_ROUTING
-
  struct kvm_irq_routing_irqchip {
         __u32 irqchip;
         __u32 pin;
@@ -1222,42 +982,6 @@ struct kvm_irq_routing {
         struct kvm_irq_routing_entry entries[];
  };
  
-#endif
-
-#ifdef KVM_CAP_MCE
-/* x86 MCE */
-struct kvm_x86_mce {
-       __u64 status;
-       __u64 addr;
-       __u64 misc;
-       __u64 mcg_status;
-       __u8 bank;
-       __u8 pad1[7];
-       __u64 pad2[3];
-};
-#endif
-
-#ifdef KVM_CAP_XEN_HVM
-#define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR       (1 << 0)
-#define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL     (1 << 1)
-#define KVM_XEN_HVM_CONFIG_SHARED_INFO         (1 << 2)
-#define KVM_XEN_HVM_CONFIG_RUNSTATE            (1 << 3)
-#define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL       (1 << 4)
-#define KVM_XEN_HVM_CONFIG_EVTCHN_SEND         (1 << 5)
-#define KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG        (1 << 6)
-#define KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE        (1 << 7)
-
-struct kvm_xen_hvm_config {
-       __u32 flags;
-       __u32 msr;
-       __u64 blob_addr_32;
-       __u64 blob_addr_64;
-       __u8 blob_size_32;
-       __u8 blob_size_64;
-       __u8 pad2[30];
-};
-#endif
-
  #define KVM_IRQFD_FLAG_DEASSIGN (1 << 0)
  /*
   * Available with KVM_CAP_IRQFD_RESAMPLE
@@ -1442,11 +1166,6 @@ struct kvm_vfio_spapr_tce {
                                          struct kvm_userspace_memory_region2)
  
  /* enable ucontrol for s390 */
-struct kvm_s390_ucas_mapping {
-       __u64 user_addr;
-       __u64 vcpu_addr;
-       __u64 length;
-};
  #define KVM_S390_UCAS_MAP        _IOW(KVMIO, 0x50, struct kvm_s390_ucas_mapping)
  #define KVM_S390_UCAS_UNMAP      _IOW(KVMIO, 0x51, struct kvm_s390_ucas_mapping)
  #define KVM_S390_VCPU_FAULT     _IOW(KVMIO, 0x52, unsigned long)
@@ -1641,89 +1360,6 @@ struct kvm_enc_region {
  #define KVM_S390_NORMAL_RESET  _IO(KVMIO,   0xc3)
  #define KVM_S390_CLEAR_RESET   _IO(KVMIO,   0xc4)
  
-struct kvm_s390_pv_sec_parm {
-       __u64 origin;
-       __u64 length;
-};
-
-struct kvm_s390_pv_unp {
-       __u64 addr;
-       __u64 size;
-       __u64 tweak;
-};
-
-enum pv_cmd_dmp_id {
-       KVM_PV_DUMP_INIT,
-       KVM_PV_DUMP_CONFIG_STOR_STATE,
-       KVM_PV_DUMP_COMPLETE,
-       KVM_PV_DUMP_CPU,
-};
-
-struct kvm_s390_pv_dmp {
-       __u64 subcmd;
-       __u64 buff_addr;
-       __u64 buff_len;
-       __u64 gaddr;            /* For dump storage state */
-       __u64 reserved[4];
-};
-
-enum pv_cmd_info_id {
-       KVM_PV_INFO_VM,
-       KVM_PV_INFO_DUMP,
-};
-
-struct kvm_s390_pv_info_dump {
-       __u64 dump_cpu_buffer_len;
-       __u64 dump_config_mem_buffer_per_1m;
-       __u64 dump_config_finalize_len;
-};
-
-struct kvm_s390_pv_info_vm {
-       __u64 inst_calls_list[4];
-       __u64 max_cpus;
-       __u64 max_guests;
-       __u64 max_guest_addr;
-       __u64 feature_indication;
-};
-
-struct kvm_s390_pv_info_header {
-       __u32 id;
-       __u32 len_max;
-       __u32 len_written;
-       __u32 reserved;
-};
-
-struct kvm_s390_pv_info {
-       struct kvm_s390_pv_info_header header;
-       union {
-               struct kvm_s390_pv_info_dump dump;
-               struct kvm_s390_pv_info_vm vm;
-       };
-};
-
-enum pv_cmd_id {
-       KVM_PV_ENABLE,
-       KVM_PV_DISABLE,
-       KVM_PV_SET_SEC_PARMS,
-       KVM_PV_UNPACK,
-       KVM_PV_VERIFY,
-       KVM_PV_PREP_RESET,
-       KVM_PV_UNSHARE_ALL,
-       KVM_PV_INFO,
-       KVM_PV_DUMP,
-       KVM_PV_ASYNC_CLEANUP_PREPARE,
-       KVM_PV_ASYNC_CLEANUP_PERFORM,
-};
-
-struct kvm_pv_cmd {
-       __u32 cmd;      /* Command to be executed */
-       __u16 rc;       /* Ultravisor return code */
-       __u16 rrc;      /* Ultravisor return reason code */
-       __u64 data;     /* Data or address */
-       __u32 flags;    /* flags for future extensions. Must be 0 for now */
-       __u32 reserved[3];
-};
-
  /* Available with KVM_CAP_S390_PROTECTED */
  #define KVM_S390_PV_COMMAND            _IOWR(KVMIO, 0xc5, struct kvm_pv_cmd)
  
@@ -1737,58 +1373,6 @@ struct kvm_pv_cmd {
  #define KVM_XEN_HVM_GET_ATTR   _IOWR(KVMIO, 0xc8, struct kvm_xen_hvm_attr)
  #define KVM_XEN_HVM_SET_ATTR   _IOW(KVMIO,  0xc9, struct kvm_xen_hvm_attr)
  
-struct kvm_xen_hvm_attr {
-       __u16 type;
-       __u16 pad[3];
-       union {
-               __u8 long_mode;
-               __u8 vector;
-               __u8 runstate_update_flag;
-               struct {
-                       __u64 gfn;
-#define KVM_XEN_INVALID_GFN ((__u64)-1)
-               } shared_info;
-               struct {
-                       __u32 send_port;
-                       __u32 type; /* EVTCHNSTAT_ipi / EVTCHNSTAT_interdomain */
-                       __u32 flags;
-#define KVM_XEN_EVTCHN_DEASSIGN                (1 << 0)
-#define KVM_XEN_EVTCHN_UPDATE          (1 << 1)
-#define KVM_XEN_EVTCHN_RESET           (1 << 2)
-                       /*
-                        * Events sent by the guest are either looped back to
-                        * the guest itself (potentially on a different port#)
-                        * or signalled via an eventfd.
-                        */
-                       union {
-                               struct {
-                                       __u32 port;
-                                       __u32 vcpu;
-                                       __u32 priority;
-                               } port;
-                               struct {
-                                       __u32 port; /* Zero for eventfd */
-                                       __s32 fd;
-                               } eventfd;
-                               __u32 padding[4];
-                       } deliver;
-               } evtchn;
-               __u32 xen_version;
-               __u64 pad[8];
-       } u;
-};
-
-
-/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */
-#define KVM_XEN_ATTR_TYPE_LONG_MODE            0x0
-#define KVM_XEN_ATTR_TYPE_SHARED_INFO          0x1
-#define KVM_XEN_ATTR_TYPE_UPCALL_VECTOR                0x2
-/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */
-#define KVM_XEN_ATTR_TYPE_EVTCHN               0x3
-#define KVM_XEN_ATTR_TYPE_XEN_VERSION          0x4
-/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG */
-#define KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG 0x5
-
  /* Per-vCPU Xen attributes */
  #define KVM_XEN_VCPU_GET_ATTR  _IOWR(KVMIO, 0xca, struct kvm_xen_vcpu_attr)
  #define KVM_XEN_VCPU_SET_ATTR  _IOW(KVMIO,  0xcb, struct kvm_xen_vcpu_attr)
@@ -1799,242 +1383,6 @@ struct kvm_xen_hvm_attr {
  #define KVM_GET_SREGS2             _IOR(KVMIO,  0xcc, struct kvm_sregs2)
  #define KVM_SET_SREGS2             _IOW(KVMIO,  0xcd, struct kvm_sregs2)
  
-struct kvm_xen_vcpu_attr {
-       __u16 type;
-       __u16 pad[3];
-       union {
-               __u64 gpa;
-#define KVM_XEN_INVALID_GPA ((__u64)-1)
-               __u64 pad[8];
-               struct {
-                       __u64 state;
-                       __u64 state_entry_time;
-                       __u64 time_running;
-                       __u64 time_runnable;
-                       __u64 time_blocked;
-                       __u64 time_offline;
-               } runstate;
-               __u32 vcpu_id;
-               struct {
-                       __u32 port;
-                       __u32 priority;
-                       __u64 expires_ns;
-               } timer;
-               __u8 vector;
-       } u;
-};
-
-/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */
-#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO       0x0
-#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO  0x1
-#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR   0x2
-#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT        0x3
-#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA   0x4
-#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST 0x5
-/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */
-#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID         0x6
-#define KVM_XEN_VCPU_ATTR_TYPE_TIMER           0x7
-#define KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR   0x8
-
-/* Secure Encrypted Virtualization command */
-enum sev_cmd_id {
-       /* Guest initialization commands */
-       KVM_SEV_INIT = 0,
-       KVM_SEV_ES_INIT,
-       /* Guest launch commands */
-       KVM_SEV_LAUNCH_START,
-       KVM_SEV_LAUNCH_UPDATE_DATA,
-       KVM_SEV_LAUNCH_UPDATE_VMSA,
-       KVM_SEV_LAUNCH_SECRET,
-       KVM_SEV_LAUNCH_MEASURE,
-       KVM_SEV_LAUNCH_FINISH,
-       /* Guest migration commands (outgoing) */
-       KVM_SEV_SEND_START,
-       KVM_SEV_SEND_UPDATE_DATA,
-       KVM_SEV_SEND_UPDATE_VMSA,
-       KVM_SEV_SEND_FINISH,
-       /* Guest migration commands (incoming) */
-       KVM_SEV_RECEIVE_START,
-       KVM_SEV_RECEIVE_UPDATE_DATA,
-       KVM_SEV_RECEIVE_UPDATE_VMSA,
-       KVM_SEV_RECEIVE_FINISH,
-       /* Guest status and debug commands */
-       KVM_SEV_GUEST_STATUS,
-       KVM_SEV_DBG_DECRYPT,
-       KVM_SEV_DBG_ENCRYPT,
-       /* Guest certificates commands */
-       KVM_SEV_CERT_EXPORT,
-       /* Attestation report */
-       KVM_SEV_GET_ATTESTATION_REPORT,
-       /* Guest Migration Extension */
-       KVM_SEV_SEND_CANCEL,
-
-       KVM_SEV_NR_MAX,
-};
-
-struct kvm_sev_cmd {
-       __u32 id;
-       __u64 data;
-       __u32 error;
-       __u32 sev_fd;
-};
-
-struct kvm_sev_launch_start {
-       __u32 handle;
-       __u32 policy;
-       __u64 dh_uaddr;
-       __u32 dh_len;
-       __u64 session_uaddr;
-       __u32 session_len;
-};
-
-struct kvm_sev_launch_update_data {
-       __u64 uaddr;
-       __u32 len;
-};
-
-
-struct kvm_sev_launch_secret {
-       __u64 hdr_uaddr;
-       __u32 hdr_len;
-       __u64 guest_uaddr;
-       __u32 guest_len;
-       __u64 trans_uaddr;
-       __u32 trans_len;
-};
-
-struct kvm_sev_launch_measure {
-       __u64 uaddr;
-       __u32 len;
-};
-
-struct kvm_sev_guest_status {
-       __u32 handle;
-       __u32 policy;
-       __u32 state;
-};
-
-struct kvm_sev_dbg {
-       __u64 src_uaddr;
-       __u64 dst_uaddr;
-       __u32 len;
-};
-
-struct kvm_sev_attestation_report {
-       __u8 mnonce[16];
-       __u64 uaddr;
-       __u32 len;
-};
-
-struct kvm_sev_send_start {
-       __u32 policy;
-       __u64 pdh_cert_uaddr;
-       __u32 pdh_cert_len;
-       __u64 plat_certs_uaddr;
-       __u32 plat_certs_len;
-       __u64 amd_certs_uaddr;
-       __u32 amd_certs_len;
-       __u64 session_uaddr;
-       __u32 session_len;
-};
-
-struct kvm_sev_send_update_data {
-       __u64 hdr_uaddr;
-       __u32 hdr_len;
-       __u64 guest_uaddr;
-       __u32 guest_len;
-       __u64 trans_uaddr;
-       __u32 trans_len;
-};
-
-struct kvm_sev_receive_start {
-       __u32 handle;
-       __u32 policy;
-       __u64 pdh_uaddr;
-       __u32 pdh_len;
-       __u64 session_uaddr;
-       __u32 session_len;
-};
-
-struct kvm_sev_receive_update_data {
-       __u64 hdr_uaddr;
-       __u32 hdr_len;
-       __u64 guest_uaddr;
-       __u32 guest_len;
-       __u64 trans_uaddr;
-       __u32 trans_len;
-};
-
-#define KVM_DEV_ASSIGN_ENABLE_IOMMU    (1 << 0)
-#define KVM_DEV_ASSIGN_PCI_2_3         (1 << 1)
-#define KVM_DEV_ASSIGN_MASK_INTX       (1 << 2)
-
-struct kvm_assigned_pci_dev {
-       __u32 assigned_dev_id;
-       __u32 busnr;
-       __u32 devfn;
-       __u32 flags;
-       __u32 segnr;
-       union {
-               __u32 reserved[11];
-       };
-};
-
-#define KVM_DEV_IRQ_HOST_INTX    (1 << 0)
-#define KVM_DEV_IRQ_HOST_MSI     (1 << 1)
-#define KVM_DEV_IRQ_HOST_MSIX    (1 << 2)
-
-#define KVM_DEV_IRQ_GUEST_INTX   (1 << 8)
-#define KVM_DEV_IRQ_GUEST_MSI    (1 << 9)
-#define KVM_DEV_IRQ_GUEST_MSIX   (1 << 10)
-
-#define KVM_DEV_IRQ_HOST_MASK   0x00ff
-#define KVM_DEV_IRQ_GUEST_MASK   0xff00
-
-struct kvm_assigned_irq {
-       __u32 assigned_dev_id;
-       __u32 host_irq; /* ignored (legacy field) */
-       __u32 guest_irq;
-       __u32 flags;
-       union {
-               __u32 reserved[12];
-       };
-};
-
-struct kvm_assigned_msix_nr {
-       __u32 assigned_dev_id;
-       __u16 entry_nr;
-       __u16 padding;
-};
-
-#define KVM_MAX_MSIX_PER_DEV           256
-struct kvm_assigned_msix_entry {
-       __u32 assigned_dev_id;
-       __u32 gsi;
-       __u16 entry; /* The index of entry in the MSI-X table */
-       __u16 padding[3];
-};
-
-#define KVM_X2APIC_API_USE_32BIT_IDS            (1ULL << 0)
-#define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK  (1ULL << 1)
-
-/* Available with KVM_CAP_ARM_USER_IRQ */
-
-/* Bits for run->s.regs.device_irq_level */
-#define KVM_ARM_DEV_EL1_VTIMER         (1 << 0)
-#define KVM_ARM_DEV_EL1_PTIMER         (1 << 1)
-#define KVM_ARM_DEV_PMU                        (1 << 2)
-
-struct kvm_hyperv_eventfd {
-       __u32 conn_id;
-       __s32 fd;
-       __u32 flags;
-       __u32 padding[3];
-};
-
-#define KVM_HYPERV_CONN_ID_MASK                0x00ffffff
-#define KVM_HYPERV_EVENTFD_DEASSIGN    (1 << 0)
-
  #define KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE    (1 << 0)
  #define KVM_DIRTY_LOG_INITIALLY_SET            (1 << 1)
  
@@ -2180,33 +1528,6 @@ struct kvm_stats_desc {
  /* Available with KVM_CAP_S390_ZPCI_OP */
  #define KVM_S390_ZPCI_OP         _IOW(KVMIO,  0xd1, struct kvm_s390_zpci_op)
  
-struct kvm_s390_zpci_op {
-       /* in */
-       __u32 fh;               /* target device */
-       __u8  op;               /* operation to perform */
-       __u8  pad[3];
-       union {
-               /* for KVM_S390_ZPCIOP_REG_AEN */
-               struct {
-                       __u64 ibv;      /* Guest addr of interrupt bit vector */
-                       __u64 sb;       /* Guest addr of summary bit */
-                       __u32 flags;
-                       __u32 noi;      /* Number of interrupts */
-                       __u8 isc;       /* Guest interrupt subclass */
-                       __u8 sbo;       /* Offset of guest summary bit vector */
-                       __u16 pad;
-               } reg_aen;
-               __u64 reserved[8];
-       } u;
-};
-
-/* types for kvm_s390_zpci_op->op */
-#define KVM_S390_ZPCIOP_REG_AEN                0
-#define KVM_S390_ZPCIOP_DEREG_AEN      1
-
-/* flags for kvm_s390_zpci_op->u.reg_aen.flags */
-#define KVM_S390_ZPCIOP_REGAEN_HOST    (1 << 0)
-
  /* Available with KVM_CAP_MEMORY_ATTRIBUTES */
  #define KVM_SET_MEMORY_ATTRIBUTES              _IOW(KVMIO,  0xd2, struct kvm_memory_attributes)
  
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h

index ca30232b7bc8af49a6c3dd1c03e105628aafabf9..117c6a9b845b1a6fde23a952560c0e807a5a3d90 100644 (file)
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -285,9 +285,11 @@ enum nft_rule_attributes {
  /**
   * enum nft_rule_compat_flags - nf_tables rule compat flags
   *
+ * @NFT_RULE_COMPAT_F_UNUSED: unused
   * @NFT_RULE_COMPAT_F_INV: invert the check result
   */
  enum nft_rule_compat_flags {
+       NFT_RULE_COMPAT_F_UNUSED = (1 << 0),
         NFT_RULE_COMPAT_F_INV   = (1 << 1),
         NFT_RULE_COMPAT_F_MASK  = NFT_RULE_COMPAT_F_INV,
  };
diff --git a/include/uapi/linux/serial.h b/include/uapi/linux/serial.h

index 9086367db0435365689d1f8cbe2a9693958df401..de9b4733607e6b61b08ff7089ff90070168ff4a2 100644 (file)
--- a/include/uapi/linux/serial.h
+++ b/include/uapi/linux/serial.h
@@ -145,12 +145,13 @@ struct serial_rs485 {
  #define SER_RS485_ENABLED              _BITUL(0)
  #define SER_RS485_RTS_ON_SEND          _BITUL(1)
  #define SER_RS485_RTS_AFTER_SEND       _BITUL(2)
-#define SER_RS485_RX_DURING_TX         _BITUL(3)
-#define SER_RS485_TERMINATE_BUS                _BITUL(4)
-#define SER_RS485_ADDRB                        _BITUL(5)
-#define SER_RS485_ADDR_RECV            _BITUL(6)
-#define SER_RS485_ADDR_DEST            _BITUL(7)
-#define SER_RS485_MODE_RS422           _BITUL(8)
+/* Placeholder for bit 3: SER_RS485_RTS_BEFORE_SEND, which isn't used anymore */
+#define SER_RS485_RX_DURING_TX         _BITUL(4)
+#define SER_RS485_TERMINATE_BUS                _BITUL(5)
+#define SER_RS485_ADDRB                        _BITUL(6)
+#define SER_RS485_ADDR_RECV            _BITUL(7)
+#define SER_RS485_ADDR_DEST            _BITUL(8)
+#define SER_RS485_MODE_RS422           _BITUL(9)
  
         __u32   delay_rts_before_send;
         __u32   delay_rts_after_send;
diff --git a/include/uapi/sound/asound.h b/include/uapi/sound/asound.h

index d5b9cfbd9ceac69323d0fe487cc49ab388a2e523..628d46a0da92eb0393dd592a38e987d08dcf6db0 100644 (file)
--- a/include/uapi/sound/asound.h
+++ b/include/uapi/sound/asound.h
@@ -142,7 +142,7 @@ struct snd_hwdep_dsp_image {
   *                                                                           *
   *****************************************************************************/
  
-#define SNDRV_PCM_VERSION              SNDRV_PROTOCOL_VERSION(2, 0, 16)
+#define SNDRV_PCM_VERSION              SNDRV_PROTOCOL_VERSION(2, 0, 17)
  
  typedef unsigned long snd_pcm_uframes_t;
  typedef signed long snd_pcm_sframes_t;
@@ -416,7 +416,7 @@ struct snd_pcm_hw_params {
         unsigned int rmask;             /* W: requested masks */
         unsigned int cmask;             /* R: changed masks */
         unsigned int info;              /* R: Info flags for returned setup */
-       unsigned int msbits;            /* R: used most significant bits */
+       unsigned int msbits;            /* R: used most significant bits (in sample bit-width) */
         unsigned int rate_num;          /* R: rate numerator */
         unsigned int rate_den;          /* R: rate denominator */
         snd_pcm_uframes_t fifo_size;    /* R: chip FIFO size in frames */
diff --git a/include/uapi/xen/gntalloc.h b/include/uapi/xen/gntalloc.h

index 48d2790ef928c798279babb69fbd07f7d6dcc20d..3109282672f33cecc00cd3f08e9dfd22e7991637 100644 (file)
--- a/include/uapi/xen/gntalloc.h
+++ b/include/uapi/xen/gntalloc.h
@@ -31,7 +31,10 @@ struct ioctl_gntalloc_alloc_gref {
         __u64 index;
         /* The grant references of the newly created grant, one per page */
         /* Variable size, depending on count */
-       __u32 gref_ids[1];
+       union {
+               __u32 gref_ids[1];
+               __DECLARE_FLEX_ARRAY(__u32, gref_ids_flex);
+       };
  };
  
  #define GNTALLOC_FLAG_WRITABLE 1
diff --git a/init/Kconfig b/init/Kconfig

index 8d4e836e1b6b15c1846fa2f6148111e63f4b4aa9..8426d59cc634d6dc86cc9954072a425b277c24ea 100644 (file)
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -89,6 +89,15 @@ config CC_HAS_ASM_GOTO_TIED_OUTPUT
         # Detect buggy gcc and clang, fixed in gcc-11 clang-14.
         def_bool $(success,echo 'int foo(int *x) { asm goto (".long (%l[bar]) - .": "+m"(*x) ::: bar); return *x; bar: return 0; }' | $CC -x c - -c -o /dev/null)
  
+config GCC_ASM_GOTO_OUTPUT_WORKAROUND
+       bool
+       depends on CC_IS_GCC && CC_HAS_ASM_GOTO_OUTPUT
+       # Fixed in GCC 14, 13.3, 12.4 and 11.5
+       # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113921
+       default y if GCC_VERSION < 110500
+       default y if GCC_VERSION >= 120000 && GCC_VERSION < 120400
+       default y if GCC_VERSION >= 130000 && GCC_VERSION < 130300
+
  config TOOLS_SUPPORT_RELR
         def_bool $(success,env "CC=$(CC)" "LD=$(LD)" "NM=$(NM)" "OBJCOPY=$(OBJCOPY)" $(srctree)/scripts/tools-support-relr.sh)
  
@@ -876,13 +885,13 @@ config CC_NO_ARRAY_BOUNDS
         bool
         default y if CC_IS_GCC && GCC_VERSION >= 110000 && GCC11_NO_ARRAY_BOUNDS
  
-# Currently, disable -Wstringop-overflow for GCC 11, globally.
-config GCC11_NO_STRINGOP_OVERFLOW
+# Currently, disable -Wstringop-overflow for GCC globally.
+config GCC_NO_STRINGOP_OVERFLOW
         def_bool y
  
  config CC_NO_STRINGOP_OVERFLOW
         bool
-       default y if CC_IS_GCC && GCC_VERSION >= 110000 && GCC_VERSION < 120000 && GCC11_NO_STRINGOP_OVERFLOW
+       default y if CC_IS_GCC && GCC_NO_STRINGOP_OVERFLOW
  
  config CC_STRINGOP_OVERFLOW
         bool
diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h

index 04e33f25919ca78332fc3429b8c98d427b768688..d5495710c17877624c75d8fa36b71af9535336a3 100644 (file)
--- a/io_uring/io_uring.h
+++ b/io_uring/io_uring.h
@@ -15,11 +15,17 @@
  #include <trace/events/io_uring.h>
  #endif
  
-
  enum {
         IOU_OK                  = 0,
         IOU_ISSUE_SKIP_COMPLETE = -EIOCBQUEUED,
  
+       /*
+        * Requeue the task_work to restart operations on this request. The
+        * actual value isn't important, should just be not an otherwise
+        * valid error code, yet less than -MAX_ERRNO and valid internally.
+        */
+       IOU_REQUEUE             = -3072,
+
         /*
          * Intended only when both IO_URING_F_MULTISHOT is passed
          * to indicate to the poll runner that multishot should be
diff --git a/io_uring/net.c b/io_uring/net.c

index 75d494dad7e2c7b22a53f50fc422d807a0559000..161622029147ca3e6c13784274ea084d05695ccc 100644 (file)
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -60,6 +60,7 @@ struct io_sr_msg {
         unsigned                        len;
         unsigned                        done_io;
         unsigned                        msg_flags;
+       unsigned                        nr_multishot_loops;
         u16                             flags;
         /* initialised and used only by !msg send variants */
         u16                             addr_len;
@@ -70,6 +71,13 @@ struct io_sr_msg {
         struct io_kiocb                 *notif;
  };
  
+/*
+ * Number of times we'll try and do receives if there's more data. If we
+ * exceed this limit, then add us to the back of the queue and retry from
+ * there. This helps fairness between flooding clients.
+ */
+#define MULTISHOT_MAX_RETRY    32
+
  static inline bool io_check_multishot(struct io_kiocb *req,
                                       unsigned int issue_flags)
  {
@@ -611,6 +619,7 @@ int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
                 sr->msg_flags |= MSG_CMSG_COMPAT;
  #endif
         sr->done_io = 0;
+       sr->nr_multishot_loops = 0;
         return 0;
  }
  
@@ -645,23 +654,35 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
                 return true;
         }
  
-       if (!mshot_finished) {
-               if (io_fill_cqe_req_aux(req, issue_flags & IO_URING_F_COMPLETE_DEFER,
-                                       *ret, cflags | IORING_CQE_F_MORE)) {
-                       io_recv_prep_retry(req);
-                       /* Known not-empty or unknown state, retry */
-                       if (cflags & IORING_CQE_F_SOCK_NONEMPTY ||
-                           msg->msg_inq == -1)
+       if (mshot_finished)
+               goto finish;
+
+       /*
+        * Fill CQE for this receive and see if we should keep trying to
+        * receive from this socket.
+        */
+       if (io_fill_cqe_req_aux(req, issue_flags & IO_URING_F_COMPLETE_DEFER,
+                               *ret, cflags | IORING_CQE_F_MORE)) {
+               struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
+               int mshot_retry_ret = IOU_ISSUE_SKIP_COMPLETE;
+
+               io_recv_prep_retry(req);
+               /* Known not-empty or unknown state, retry */
+               if (cflags & IORING_CQE_F_SOCK_NONEMPTY || msg->msg_inq == -1) {
+                       if (sr->nr_multishot_loops++ < MULTISHOT_MAX_RETRY)
                                 return false;
-                       if (issue_flags & IO_URING_F_MULTISHOT)
-                               *ret = IOU_ISSUE_SKIP_COMPLETE;
-                       else
-                               *ret = -EAGAIN;
-                       return true;
+                       /* mshot retries exceeded, force a requeue */
+                       sr->nr_multishot_loops = 0;
+                       mshot_retry_ret = IOU_REQUEUE;
                 }
-               /* Otherwise stop multishot but use the current result. */
+               if (issue_flags & IO_URING_F_MULTISHOT)
+                       *ret = mshot_retry_ret;
+               else
+                       *ret = -EAGAIN;
+               return true;
         }
-
+       /* Otherwise stop multishot but use the current result. */
+finish:
         io_req_set_res(req, *ret, cflags);
  
         if (issue_flags & IO_URING_F_MULTISHOT)
@@ -902,6 +923,7 @@ retry_multishot:
                 if (!buf)
                         return -ENOBUFS;
                 sr->buf = buf;
+               sr->len = len;
         }
  
         ret = import_ubuf(ITER_DEST, sr->buf, len, &msg.msg_iter);
@@ -1350,7 +1372,7 @@ retry:
                          * has already been done
                          */
                         if (issue_flags & IO_URING_F_MULTISHOT)
-                               ret = IOU_ISSUE_SKIP_COMPLETE;
+                               return IOU_ISSUE_SKIP_COMPLETE;
                         return ret;
                 }
                 if (ret == -ERESTARTSYS)
@@ -1375,7 +1397,8 @@ retry:
                                 ret, IORING_CQE_F_MORE))
                 goto retry;
  
-       return -ECANCELED;
+       io_req_set_res(req, ret, 0);
+       return IOU_STOP_MULTISHOT;
  }
  
  int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
diff --git a/io_uring/poll.c b/io_uring/poll.c

index d59b74a99d4e4b444dcb2f86dc9d3594d838e1cf..7513afc7b702e4cbc727717fc59aa8eb758465df 100644 (file)
--- a/io_uring/poll.c
+++ b/io_uring/poll.c
@@ -226,8 +226,29 @@ enum {
         IOU_POLL_NO_ACTION = 1,
         IOU_POLL_REMOVE_POLL_USE_RES = 2,
         IOU_POLL_REISSUE = 3,
+       IOU_POLL_REQUEUE = 4,
  };
  
+static void __io_poll_execute(struct io_kiocb *req, int mask)
+{
+       unsigned flags = 0;
+
+       io_req_set_res(req, mask, 0);
+       req->io_task_work.func = io_poll_task_func;
+
+       trace_io_uring_task_add(req, mask);
+
+       if (!(req->flags & REQ_F_POLL_NO_LAZY))
+               flags = IOU_F_TWQ_LAZY_WAKE;
+       __io_req_task_work_add(req, flags);
+}
+
+static inline void io_poll_execute(struct io_kiocb *req, int res)
+{
+       if (io_poll_get_ownership(req))
+               __io_poll_execute(req, res);
+}
+
  /*
   * All poll tw should go through this. Checks for poll events, manages
   * references, does rewait, etc.
@@ -309,6 +330,8 @@ static int io_poll_check_events(struct io_kiocb *req, struct io_tw_state *ts)
                         int ret = io_poll_issue(req, ts);
                         if (ret == IOU_STOP_MULTISHOT)
                                 return IOU_POLL_REMOVE_POLL_USE_RES;
+                       else if (ret == IOU_REQUEUE)
+                               return IOU_POLL_REQUEUE;
                         if (ret < 0)
                                 return ret;
                 }
@@ -331,8 +354,12 @@ void io_poll_task_func(struct io_kiocb *req, struct io_tw_state *ts)
         int ret;
  
         ret = io_poll_check_events(req, ts);
-       if (ret == IOU_POLL_NO_ACTION)
+       if (ret == IOU_POLL_NO_ACTION) {
+               return;
+       } else if (ret == IOU_POLL_REQUEUE) {
+               __io_poll_execute(req, 0);
                 return;
+       }
         io_poll_remove_entries(req);
         io_poll_tw_hash_eject(req, ts);
  
@@ -364,26 +391,6 @@ void io_poll_task_func(struct io_kiocb *req, struct io_tw_state *ts)
         }
  }
  
-static void __io_poll_execute(struct io_kiocb *req, int mask)
-{
-       unsigned flags = 0;
-
-       io_req_set_res(req, mask, 0);
-       req->io_task_work.func = io_poll_task_func;
-
-       trace_io_uring_task_add(req, mask);
-
-       if (!(req->flags & REQ_F_POLL_NO_LAZY))
-               flags = IOU_F_TWQ_LAZY_WAKE;
-       __io_req_task_work_add(req, flags);
-}
-
-static inline void io_poll_execute(struct io_kiocb *req, int res)
-{
-       if (io_poll_get_ownership(req))
-               __io_poll_execute(req, res);
-}
-
  static void io_poll_cancel_req(struct io_kiocb *req)
  {
         io_poll_mark_cancelled(req);
diff --git a/io_uring/poll.h b/io_uring/poll.h

index ff4d5d753387e80568ccc90734732d6cb999b39e..1dacae9e816c9269e8a1ae5bfab4d12fa9aaa9ac 100644 (file)
--- a/io_uring/poll.h
+++ b/io_uring/poll.h
@@ -24,6 +24,15 @@ struct async_poll {
         struct io_poll          *double_poll;
  };
  
+/*
+ * Must only be called inside issue_flags & IO_URING_F_MULTISHOT, or
+ * potentially other cases where we already "own" this poll request.
+ */
+static inline void io_poll_multishot_retry(struct io_kiocb *req)
+{
+       atomic_inc(&req->poll_refs);
+}
+
  int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
  int io_poll_add(struct io_kiocb *req, unsigned int issue_flags);
  
diff --git a/io_uring/rw.c b/io_uring/rw.c

index 118cc9f1cf1602a4859eb3359c8b2e64cf6db620..d5e79d9bdc717b8cb917d6e06b2cbbe6840dd762 100644 (file)
--- a/io_uring/rw.c
+++ b/io_uring/rw.c
@@ -18,6 +18,7 @@
  #include "opdef.h"
  #include "kbuf.h"
  #include "rsrc.h"
+#include "poll.h"
  #include "rw.h"
  
  struct io_rw {
@@ -962,8 +963,15 @@ int io_read_mshot(struct io_kiocb *req, unsigned int issue_flags)
                 if (io_fill_cqe_req_aux(req,
                                         issue_flags & IO_URING_F_COMPLETE_DEFER,
                                         ret, cflags | IORING_CQE_F_MORE)) {
-                       if (issue_flags & IO_URING_F_MULTISHOT)
+                       if (issue_flags & IO_URING_F_MULTISHOT) {
+                               /*
+                                * Force retry, as we might have more data to
+                                * be read and otherwise it won't get retried
+                                * until (if ever) another poll is triggered.
+                                */
+                               io_poll_multishot_retry(req);
                                 return IOU_ISSUE_SKIP_COMPLETE;
+                       }
                         return -EAGAIN;
                 }
         }
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c

index be72824f32b2cc5e3dfcb8d2bd613b86116a498c..d19cd863d294ea1b589aeae327ae6b10e7211a93 100644 (file)
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -1101,6 +1101,7 @@ struct bpf_hrtimer {
         struct bpf_prog *prog;
         void __rcu *callback_fn;
         void *value;
+       struct rcu_head rcu;
  };
  
  /* the actual struct hidden inside uapi struct bpf_timer */
@@ -1332,6 +1333,7 @@ BPF_CALL_1(bpf_timer_cancel, struct bpf_timer_kern *, timer)
  
         if (in_nmi())
                 return -EOPNOTSUPP;
+       rcu_read_lock();
         __bpf_spin_lock_irqsave(&timer->lock);
         t = timer->timer;
         if (!t) {
@@ -1353,6 +1355,7 @@ out:
          * if it was running.
          */
         ret = ret ?: hrtimer_cancel(&t->timer);
+       rcu_read_unlock();
         return ret;
  }
  
@@ -1407,7 +1410,7 @@ out:
          */
         if (this_cpu_read(hrtimer_running) != t)
                 hrtimer_cancel(&t->timer);
-       kfree(t);
+       kfree_rcu(t, rcu);
  }
  
  BPF_CALL_2(bpf_kptr_xchg, void *, map_value, void *, ptr)
diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c

index e5c3500443c6e71f4fca7a6403dc33aa054d6fd8..ec4e97c61eefe667955e984b934f354b9162b52d 100644 (file)
--- a/kernel/bpf/task_iter.c
+++ b/kernel/bpf/task_iter.c
@@ -978,6 +978,8 @@ __bpf_kfunc int bpf_iter_task_new(struct bpf_iter_task *it,
         BUILD_BUG_ON(__alignof__(struct bpf_iter_task_kern) !=
                                         __alignof__(struct bpf_iter_task));
  
+       kit->pos = NULL;
+
         switch (flags) {
         case BPF_TASK_ITER_ALL_THREADS:
         case BPF_TASK_ITER_ALL_PROCS:
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c

index 65f598694d550359f2b926ef26ae30d0c80c6f69..b263f093ee76133a38b0f13c5143323f1eb069ab 100644 (file)
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5227,7 +5227,9 @@ BTF_ID(struct, prog_test_ref_kfunc)
  #ifdef CONFIG_CGROUPS
  BTF_ID(struct, cgroup)
  #endif
+#ifdef CONFIG_BPF_JIT
  BTF_ID(struct, bpf_cpumask)
+#endif
  BTF_ID(struct, task_struct)
  BTF_SET_END(rcu_protected_types)
  
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c

index 485bb0389b488d28a4efb23901b514d93b3834f6..929e98c629652a0fef1b71e6c002cca41936c4b4 100644 (file)
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -537,7 +537,7 @@ retry:
                 }
         }
  
-       ret = __replace_page(vma, vaddr, old_page, new_page);
+       ret = __replace_page(vma, vaddr & PAGE_MASK, old_page, new_page);
         if (new_page)
                 put_page(new_page);
  put_old:
diff --git a/kernel/exit.c b/kernel/exit.c

index 3988a02efaef06444654a415ce298d378ab925ec..dfb963d2f862ada6a2f06259c6df4923272cb218 100644 (file)
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1127,17 +1127,14 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
                  * and nobody can change them.
                  *
                  * psig->stats_lock also protects us from our sub-threads
-                * which can reap other children at the same time. Until
-                * we change k_getrusage()-like users to rely on this lock
-                * we have to take ->siglock as well.
+                * which can reap other children at the same time.
                  *
                  * We use thread_group_cputime_adjusted() to get times for
                  * the thread group, which consolidates times for all threads
                  * in the group including the group leader.
                  */
                 thread_group_cputime_adjusted(p, &tgutime, &tgstime);
-               spin_lock_irq(&current->sighand->siglock);
-               write_seqlock(&psig->stats_lock);
+               write_seqlock_irq(&psig->stats_lock);
                 psig->cutime += tgutime + sig->cutime;
                 psig->cstime += tgstime + sig->cstime;
                 psig->cgtime += task_gtime(p) + sig->gtime + sig->cgtime;
@@ -1160,8 +1157,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
                         psig->cmaxrss = maxrss;
                 task_io_accounting_add(&psig->ioac, &p->ioac);
                 task_io_accounting_add(&psig->ioac, &sig->ioac);
-               write_sequnlock(&psig->stats_lock);
-               spin_unlock_irq(&current->sighand->siglock);
+               write_sequnlock_irq(&psig->stats_lock);
         }
  
         if (wo->wo_rusage)
diff --git a/kernel/kprobes.c b/kernel/kprobes.c

index d5a0ee40bf66c5318df14c5a49294850434e13d3..9d9095e817928658d2c6d54d5da6f4826ff7c6be 100644 (file)
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1993,7 +1993,7 @@ NOKPROBE_SYMBOL(__kretprobe_find_ret_addr);
  unsigned long kretprobe_find_ret_addr(struct task_struct *tsk, void *fp,
                                       struct llist_node **cur)
  {
-       struct kretprobe_instance *ri = NULL;
+       struct kretprobe_instance *ri;
         kprobe_opcode_t *ret;
  
         if (WARN_ON_ONCE(!cur))
@@ -2802,7 +2802,7 @@ static int show_kprobe_addr(struct seq_file *pi, void *v)
  {
         struct hlist_head *head;
         struct kprobe *p, *kp;
-       const char *sym = NULL;
+       const char *sym;
         unsigned int i = *(loff_t *) v;
         unsigned long offset = 0;
         char *modname, namebuf[KSYM_NAME_LEN];
diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c

index 2ad881d07752c15f60a4c14bee21051117d5aeb2..4e715b9b278e7fd7fbea70110f5a829635a4bc01 100644 (file)
--- a/kernel/sched/membarrier.c
+++ b/kernel/sched/membarrier.c
@@ -162,6 +162,9 @@
         | MEMBARRIER_PRIVATE_EXPEDITED_RSEQ_BITMASK                     \
         | MEMBARRIER_CMD_GET_REGISTRATIONS)
  
+static DEFINE_MUTEX(membarrier_ipi_mutex);
+#define SERIALIZE_IPI() guard(mutex)(&membarrier_ipi_mutex)
+
  static void ipi_mb(void *info)
  {
         smp_mb();       /* IPIs should be serializing but paranoid. */
@@ -259,6 +262,7 @@ static int membarrier_global_expedited(void)
         if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
                 return -ENOMEM;
  
+       SERIALIZE_IPI();
         cpus_read_lock();
         rcu_read_lock();
         for_each_online_cpu(cpu) {
@@ -347,6 +351,7 @@ static int membarrier_private_expedited(int flags, int cpu_id)
         if (cpu_id < 0 && !zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
                 return -ENOMEM;
  
+       SERIALIZE_IPI();
         cpus_read_lock();
  
         if (cpu_id >= 0) {
@@ -460,6 +465,7 @@ static int sync_runqueues_membarrier_state(struct mm_struct *mm)
          * between threads which are users of @mm has its membarrier state
          * updated.
          */
+       SERIALIZE_IPI();
         cpus_read_lock();
         rcu_read_lock();
         for_each_online_cpu(cpu) {
diff --git a/kernel/sys.c b/kernel/sys.c

index e219fcfa112d863eeef58381d04fd4bab16a1e32..f8e543f1e38a06dc3a4aa2f777c7e88d444e5565 100644 (file)
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1785,21 +1785,24 @@ void getrusage(struct task_struct *p, int who, struct rusage *r)
         struct task_struct *t;
         unsigned long flags;
         u64 tgutime, tgstime, utime, stime;
-       unsigned long maxrss = 0;
+       unsigned long maxrss;
+       struct mm_struct *mm;
         struct signal_struct *sig = p->signal;
+       unsigned int seq = 0;
  
-       memset((char *)r, 0, sizeof (*r));
+retry:
+       memset(r, 0, sizeof(*r));
         utime = stime = 0;
+       maxrss = 0;
  
         if (who == RUSAGE_THREAD) {
                 task_cputime_adjusted(current, &utime, &stime);
                 accumulate_thread_rusage(p, r);
                 maxrss = sig->maxrss;
-               goto out;
+               goto out_thread;
         }
  
-       if (!lock_task_sighand(p, &flags))
-               return;
+       flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq);
  
         switch (who) {
         case RUSAGE_BOTH:
@@ -1819,9 +1822,6 @@ void getrusage(struct task_struct *p, int who, struct rusage *r)
                 fallthrough;
  
         case RUSAGE_SELF:
-               thread_group_cputime_adjusted(p, &tgutime, &tgstime);
-               utime += tgutime;
-               stime += tgstime;
                 r->ru_nvcsw += sig->nvcsw;
                 r->ru_nivcsw += sig->nivcsw;
                 r->ru_minflt += sig->min_flt;
@@ -1830,28 +1830,42 @@ void getrusage(struct task_struct *p, int who, struct rusage *r)
                 r->ru_oublock += sig->oublock;
                 if (maxrss < sig->maxrss)
                         maxrss = sig->maxrss;
+
+               rcu_read_lock();
                 __for_each_thread(sig, t)
                         accumulate_thread_rusage(t, r);
+               rcu_read_unlock();
+
                 break;
  
         default:
                 BUG();
         }
-       unlock_task_sighand(p, &flags);
  
-out:
-       r->ru_utime = ns_to_kernel_old_timeval(utime);
-       r->ru_stime = ns_to_kernel_old_timeval(stime);
+       if (need_seqretry(&sig->stats_lock, seq)) {
+               seq = 1;
+               goto retry;
+       }
+       done_seqretry_irqrestore(&sig->stats_lock, seq, flags);
  
-       if (who != RUSAGE_CHILDREN) {
-               struct mm_struct *mm = get_task_mm(p);
+       if (who == RUSAGE_CHILDREN)
+               goto out_children;
  
-               if (mm) {
-                       setmax_mm_hiwater_rss(&maxrss, mm);
-                       mmput(mm);
-               }
+       thread_group_cputime_adjusted(p, &tgutime, &tgstime);
+       utime += tgutime;
+       stime += tgstime;
+
+out_thread:
+       mm = get_task_mm(p);
+       if (mm) {
+               setmax_mm_hiwater_rss(&maxrss, mm);
+               mmput(mm);
         }
+
+out_children:
         r->ru_maxrss = maxrss * (PAGE_SIZE / 1024); /* convert pages to KBs */
+       r->ru_utime = ns_to_kernel_old_timeval(utime);
+       r->ru_stime = ns_to_kernel_old_timeval(stime);
  }
  
  SYSCALL_DEFINE2(getrusage, int, who, struct rusage __user *, ru)
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c

index 760793998cdd703a387c64a792a7b7f7dab552d5..edb0f821dceaa1720ac94fc53f4002a1e5f7bdd3 100644 (file)
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1085,6 +1085,7 @@ static int enqueue_hrtimer(struct hrtimer *timer,
                            enum hrtimer_mode mode)
  {
         debug_activate(timer, mode);
+       WARN_ON_ONCE(!base->cpu_base->online);
  
         base->cpu_base->active_bases |= 1 << base->index;
  
@@ -2183,6 +2184,7 @@ int hrtimers_prepare_cpu(unsigned int cpu)
         cpu_base->softirq_next_timer = NULL;
         cpu_base->expires_next = KTIME_MAX;
         cpu_base->softirq_expires_next = KTIME_MAX;
+       cpu_base->online = 1;
         hrtimer_cpu_base_init_expiry_lock(cpu_base);
         return 0;
  }
@@ -2250,6 +2252,7 @@ int hrtimers_cpu_dying(unsigned int dying_cpu)
         smp_call_function_single(ncpu, retrigger_next_event, NULL, 0);
  
         raw_spin_unlock(&new_base->lock);
+       old_base->online = 0;
         raw_spin_unlock(&old_base->lock);
  
         return 0;
diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c

index 6cd2a4e3afb8fb6045dbf27543a67147dea20900..9ff0182458408438ddc5d7c720d866d0adc02dfd 100644 (file)
--- a/kernel/trace/fprobe.c
+++ b/kernel/trace/fprobe.c
@@ -189,9 +189,6 @@ static int fprobe_init_rethook(struct fprobe *fp, int num)
  {
         int size;
  
-       if (num <= 0)
-               return -EINVAL;
-
         if (!fp->exit_handler) {
                 fp->rethook = NULL;
                 return 0;
@@ -199,15 +196,16 @@ static int fprobe_init_rethook(struct fprobe *fp, int num)
  
         /* Initialize rethook if needed */
         if (fp->nr_maxactive)
-               size = fp->nr_maxactive;
+               num = fp->nr_maxactive;
         else
-               size = num * num_possible_cpus() * 2;
-       if (size <= 0)
+               num *= num_possible_cpus() * 2;
+       if (num <= 0)
                 return -EINVAL;
  
+       size = sizeof(struct fprobe_rethook_node) + fp->entry_data_size;
+
         /* Initialize rethook */
-       fp->rethook = rethook_alloc((void *)fp, fprobe_exit_handler,
-                               sizeof(struct fprobe_rethook_node), size);
+       fp->rethook = rethook_alloc((void *)fp, fprobe_exit_handler, size, num);
         if (IS_ERR(fp->rethook))
                 return PTR_ERR(fp->rethook);
  
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c

index b01ae7d36021819e6d929ce2ab1e0a5a61464309..83ba342aef31f7d919f4c24a73d2c4cea76be137 100644 (file)
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -5325,7 +5325,17 @@ static LIST_HEAD(ftrace_direct_funcs);
  
  static int register_ftrace_function_nolock(struct ftrace_ops *ops);
  
+/*
+ * If there are multiple ftrace_ops, use SAVE_REGS by default, so that direct
+ * call will be jumped from ftrace_regs_caller. Only if the architecture does
+ * not support ftrace_regs_caller but direct_call, use SAVE_ARGS so that it
+ * jumps from ftrace_caller for multiple ftrace_ops.
+ */
+#ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_REGS
  #define MULTI_FLAGS (FTRACE_OPS_FL_DIRECT | FTRACE_OPS_FL_SAVE_ARGS)
+#else
+#define MULTI_FLAGS (FTRACE_OPS_FL_DIRECT | FTRACE_OPS_FL_SAVE_REGS)
+#endif
  
  static int check_direct_multi(struct ftrace_ops *ops)
  {
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c

index 13aaf5e85b811b72f60b355f833a680342a41c7f..0699027b4f4c92d3a6ed7c62ee64491d7cd89046 100644 (file)
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -944,7 +944,7 @@ __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu,
                 full = 0;
         } else {
                 if (!cpumask_test_cpu(cpu, buffer->cpumask))
-                       return -EINVAL;
+                       return EPOLLERR;
  
                 cpu_buffer = buffer->buffers[cpu];
                 work = &cpu_buffer->irq_work;
@@ -5877,6 +5877,10 @@ int ring_buffer_subbuf_order_set(struct trace_buffer *buffer, int order)
         if (psize <= BUF_PAGE_HDR_SIZE)
                 return -EINVAL;
  
+       /* Size of a subbuf cannot be greater than the write counter */
+       if (psize > RB_WRITE_MASK + 1)
+               return -EINVAL;
+
         old_order = buffer->subbuf_order;
         old_size = buffer->subbuf_size;
  
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c

index 2a7c6fd934e9cb391b5ddf589748c572810ad6d1..8198bfc54b58d9729ab6f6318eb9720a32d40375 100644 (file)
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -39,6 +39,7 @@
  #include <linux/ctype.h>
  #include <linux/init.h>
  #include <linux/panic_notifier.h>
+#include <linux/kmemleak.h>
  #include <linux/poll.h>
  #include <linux/nmi.h>
  #include <linux/fs.h>
@@ -1532,7 +1533,7 @@ void disable_trace_on_warning(void)
  bool tracer_tracing_is_on(struct trace_array *tr)
  {
         if (tr->array_buffer.buffer)
-               return ring_buffer_record_is_on(tr->array_buffer.buffer);
+               return ring_buffer_record_is_set_on(tr->array_buffer.buffer);
         return !tr->buffer_disabled;
  }
  
@@ -2320,7 +2321,7 @@ struct saved_cmdlines_buffer {
         unsigned *map_cmdline_to_pid;
         unsigned cmdline_num;
         int cmdline_idx;
-       char *saved_cmdlines;
+       char saved_cmdlines[];
  };
  static struct saved_cmdlines_buffer *savedcmd;
  
@@ -2334,47 +2335,60 @@ static inline void set_cmdline(int idx, const char *cmdline)
         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
  }
  
-static int allocate_cmdlines_buffer(unsigned int val,
-                                   struct saved_cmdlines_buffer *s)
+static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
+{
+       int order = get_order(sizeof(*s) + s->cmdline_num * TASK_COMM_LEN);
+
+       kfree(s->map_cmdline_to_pid);
+       kmemleak_free(s);
+       free_pages((unsigned long)s, order);
+}
+
+static struct saved_cmdlines_buffer *allocate_cmdlines_buffer(unsigned int val)
  {
+       struct saved_cmdlines_buffer *s;
+       struct page *page;
+       int orig_size, size;
+       int order;
+
+       /* Figure out how much is needed to hold the given number of cmdlines */
+       orig_size = sizeof(*s) + val * TASK_COMM_LEN;
+       order = get_order(orig_size);
+       size = 1 << (order + PAGE_SHIFT);
+       page = alloc_pages(GFP_KERNEL, order);
+       if (!page)
+               return NULL;
+
+       s = page_address(page);
+       kmemleak_alloc(s, size, 1, GFP_KERNEL);
+       memset(s, 0, sizeof(*s));
+
+       /* Round up to actual allocation */
+       val = (size - sizeof(*s)) / TASK_COMM_LEN;
+       s->cmdline_num = val;
+
         s->map_cmdline_to_pid = kmalloc_array(val,
                                               sizeof(*s->map_cmdline_to_pid),
                                               GFP_KERNEL);
-       if (!s->map_cmdline_to_pid)
-               return -ENOMEM;
-
-       s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
-       if (!s->saved_cmdlines) {
-               kfree(s->map_cmdline_to_pid);
-               return -ENOMEM;
+       if (!s->map_cmdline_to_pid) {
+               free_saved_cmdlines_buffer(s);
+               return NULL;
         }
  
         s->cmdline_idx = 0;
-       s->cmdline_num = val;
         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
                sizeof(s->map_pid_to_cmdline));
         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
                val * sizeof(*s->map_cmdline_to_pid));
  
-       return 0;
+       return s;
  }
  
  static int trace_create_savedcmd(void)
  {
-       int ret;
-
-       savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
-       if (!savedcmd)
-               return -ENOMEM;
-
-       ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
-       if (ret < 0) {
-               kfree(savedcmd);
-               savedcmd = NULL;
-               return -ENOMEM;
-       }
+       savedcmd = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT);
  
-       return 0;
+       return savedcmd ? 0 : -ENOMEM;
  }
  
  int is_tracing_stopped(void)
@@ -6056,26 +6070,14 @@ tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
  }
  
-static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
-{
-       kfree(s->saved_cmdlines);
-       kfree(s->map_cmdline_to_pid);
-       kfree(s);
-}
-
  static int tracing_resize_saved_cmdlines(unsigned int val)
  {
         struct saved_cmdlines_buffer *s, *savedcmd_temp;
  
-       s = kmalloc(sizeof(*s), GFP_KERNEL);
+       s = allocate_cmdlines_buffer(val);
         if (!s)
                 return -ENOMEM;
  
-       if (allocate_cmdlines_buffer(val, s) < 0) {
-               kfree(s);
-               return -ENOMEM;
-       }
-
         preempt_disable();
         arch_spin_lock(&trace_cmdline_lock);
         savedcmd_temp = savedcmd;
diff --git a/kernel/trace/trace_btf.c b/kernel/trace/trace_btf.c

index ca224d53bfdcd0df9f6609d3f30a4a6054868136..5bbdbcbbde3cd281f82f3cbc79f72f7a0c3e67ee 100644 (file)
--- a/kernel/trace/trace_btf.c
+++ b/kernel/trace/trace_btf.c
@@ -91,8 +91,8 @@ retry:
         for_each_member(i, type, member) {
                 if (!member->name_off) {
                         /* Anonymous union/struct: push it for later use */
-                       type = btf_type_skip_modifiers(btf, member->type, &tid);
-                       if (type && top < BTF_ANON_STACK_MAX) {
+                       if (btf_type_skip_modifiers(btf, member->type, &tid) &&
+                           top < BTF_ANON_STACK_MAX) {
                                 anon_stack[top].tid = tid;
                                 anon_stack[top++].offset =
                                         cur_offset + member->offset;
diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c

index e7af286af4f1ad9d3ac578cb3ce3c58ba3d5ce0b..c82b401a294d961ae75c48f3a164e92f3ad181b1 100644 (file)
--- a/kernel/trace/trace_events_synth.c
+++ b/kernel/trace/trace_events_synth.c
@@ -441,8 +441,9 @@ static unsigned int trace_string(struct synth_trace_event *entry,
         if (is_dynamic) {
                 union trace_synth_field *data = &entry->fields[*n_u64];
  
+               len = fetch_store_strlen((unsigned long)str_val);
                 data->as_dynamic.offset = struct_size(entry, fields, event->n_u64) + data_size;
-               data->as_dynamic.len = fetch_store_strlen((unsigned long)str_val);
+               data->as_dynamic.len = len;
  
                 ret = fetch_store_string((unsigned long)str_val, &entry->fields[*n_u64], entry);
  
diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c

index 46439e3bcec4d20b45ae8202d7a68888778fa208..b33c3861fbbbf303e78f740a0fcc41caa2a77d77 100644 (file)
--- a/kernel/trace/trace_events_trigger.c
+++ b/kernel/trace/trace_events_trigger.c
@@ -1470,8 +1470,10 @@ register_snapshot_trigger(char *glob,
                           struct event_trigger_data *data,
                           struct trace_event_file *file)
  {
-       if (tracing_alloc_snapshot_instance(file->tr) != 0)
-               return 0;
+       int ret = tracing_alloc_snapshot_instance(file->tr);
+
+       if (ret < 0)
+               return ret;
  
         return register_trigger(glob, data, file);
  }
diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c

index bd0d01d00fb9d52d0736524c3274f1382fee8462..a8e28f9b9271cf6545351f7d4f7ece1fbd9d8989 100644 (file)
--- a/kernel/trace/trace_osnoise.c
+++ b/kernel/trace/trace_osnoise.c
@@ -2444,6 +2444,9 @@ static int timerlat_fd_open(struct inode *inode, struct file *file)
         tlat = this_cpu_tmr_var();
         tlat->count = 0;
  
+       hrtimer_init(&tlat->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD);
+       tlat->timer.function = timerlat_irq;
+
         migrate_enable();
         return 0;
  };
@@ -2526,9 +2529,6 @@ timerlat_fd_read(struct file *file, char __user *ubuf, size_t count,
                 tlat->tracing_thread = false;
                 tlat->kthread = current;
  
-               hrtimer_init(&tlat->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD);
-               tlat->timer.function = timerlat_irq;
-
                 /* Annotate now to drift new period */
                 tlat->abs_period = hrtimer_cb_get_time(&tlat->timer);
  
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c

index 4dc74d73fc1df5af9ee297611865705d44259663..34289f9c67076b2ab81ffc67bd5a518926e59ca6 100644 (file)
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -1159,9 +1159,12 @@ static int traceprobe_parse_probe_arg_body(const char *argv, ssize_t *size,
         if (!(ctx->flags & TPARG_FL_TEVENT) &&
             (strcmp(arg, "$comm") == 0 || strcmp(arg, "$COMM") == 0 ||
              strncmp(arg, "\\\"", 2) == 0)) {
-               /* The type of $comm must be "string", and not an array. */
-               if (parg->count || (t && strcmp(t, "string")))
+               /* The type of $comm must be "string", and not an array type. */
+               if (parg->count || (t && strcmp(t, "string"))) {
+                       trace_probe_log_err(ctx->offset + (t ? (t - arg) : 0),
+                                       NEED_STRING_TYPE);
                         goto out;
+               }
                 parg->type = find_fetch_type("string", ctx->flags);
         } else
                 parg->type = find_fetch_type(t, ctx->flags);
@@ -1169,18 +1172,6 @@ static int traceprobe_parse_probe_arg_body(const char *argv, ssize_t *size,
                 trace_probe_log_err(ctx->offset + (t ? (t - arg) : 0), BAD_TYPE);
                 goto out;
         }
-       parg->offset = *size;
-       *size += parg->type->size * (parg->count ?: 1);
-
-       ret = -ENOMEM;
-       if (parg->count) {
-               len = strlen(parg->type->fmttype) + 6;
-               parg->fmt = kmalloc(len, GFP_KERNEL);
-               if (!parg->fmt)
-                       goto out;
-               snprintf(parg->fmt, len, "%s[%d]", parg->type->fmttype,
-                        parg->count);
-       }
  
         code = tmp = kcalloc(FETCH_INSN_MAX, sizeof(*code), GFP_KERNEL);
         if (!code)
@@ -1204,6 +1195,19 @@ static int traceprobe_parse_probe_arg_body(const char *argv, ssize_t *size,
                                 goto fail;
                 }
         }
+       parg->offset = *size;
+       *size += parg->type->size * (parg->count ?: 1);
+
+       if (parg->count) {
+               len = strlen(parg->type->fmttype) + 6;
+               parg->fmt = kmalloc(len, GFP_KERNEL);
+               if (!parg->fmt) {
+                       ret = -ENOMEM;
+                       goto out;
+               }
+               snprintf(parg->fmt, len, "%s[%d]", parg->type->fmttype,
+                        parg->count);
+       }
  
         ret = -EINVAL;
         /* Store operation */
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h

index 850d9ecb6765a8bd372b214ee6f302e3374ffa93..c1877d0182691c20eba09e60a98d80daf2dd810a 100644 (file)
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -515,7 +515,8 @@ extern int traceprobe_define_arg_fields(struct trace_event_call *event_call,
         C(BAD_HYPHEN,           "Failed to parse single hyphen. Forgot '>'?"),  \
         C(NO_BTF_FIELD,         "This field is not found."),    \
         C(BAD_BTF_TID,          "Failed to get BTF type info."),\
-       C(BAD_TYPE4STR,         "This type does not fit for string."),
+       C(BAD_TYPE4STR,         "This type does not fit for string."),\
+       C(NEED_STRING_TYPE,     "$comm and immediate-string only accepts string type"),
  
  #undef C
  #define C(a, b)                TP_ERR_##a
diff --git a/kernel/workqueue.c b/kernel/workqueue.c

index 76e60faed892357002868cdf9fb41c76ad4eba54..7b482a26d74196c4505d7b45017ed153c75572fd 100644 (file)
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -5786,13 +5786,9 @@ static int workqueue_apply_unbound_cpumask(const cpumask_var_t unbound_cpumask)
         list_for_each_entry(wq, &workqueues, list) {
                 if (!(wq->flags & WQ_UNBOUND))
                         continue;
-
                 /* creating multiple pwqs breaks ordering guarantee */
-               if (!list_empty(&wq->pwqs)) {
-                       if (wq->flags & __WQ_ORDERED_EXPLICIT)
-                               continue;
-                       wq->flags &= ~__WQ_ORDERED;
-               }
+               if (wq->flags & __WQ_ORDERED)
+                       continue;
  
                 ctx = apply_wqattrs_prepare(wq, wq->unbound_attrs, unbound_cpumask);
                 if (IS_ERR(ctx)) {
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug

index 975a07f9f1cc08838d272f83d5f04a85ff2f5cd2..ef36b829ae1f55bcfe4c58b567ded4fc348db0af 100644 (file)
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -2235,6 +2235,7 @@ config TEST_DIV64
  config TEST_IOV_ITER
         tristate "Test iov_iter operation" if !KUNIT_ALL_TESTS
         depends on KUNIT
+       depends on MMU
         default KUNIT_ALL_TESTS
         help
           Enable this to turn on testing of the operation of the I/O iterator
diff --git a/lib/checksum_kunit.c b/lib/checksum_kunit.c

index 225bb77014600f796e972a9c0f03638c23750a06..bf70850035c76f468c7c0af023454bf5bc6716e3 100644 (file)
--- a/lib/checksum_kunit.c
+++ b/lib/checksum_kunit.c
@@ -215,7 +215,7 @@ static const u32 init_sums_no_overflow[] = {
         0xffff0000, 0xfffffffb,
  };
  
-static const __sum16 expected_csum_ipv6_magic[] = {
+static const u16 expected_csum_ipv6_magic[] = {
         0x18d4, 0x3085, 0x2e4b, 0xd9f4, 0xbdc8, 0x78f,  0x1034, 0x8422, 0x6fc0,
         0xd2f6, 0xbeb5, 0x9d3,  0x7e2a, 0x312e, 0x778e, 0xc1bb, 0x7cf2, 0x9d1e,
         0xca21, 0xf3ff, 0x7569, 0xb02e, 0xca86, 0x7e76, 0x4539, 0x45e3, 0xf28d,
@@ -241,7 +241,7 @@ static const __sum16 expected_csum_ipv6_magic[] = {
         0x3845, 0x1014
  };
  
-static const __sum16 expected_fast_csum[] = {
+static const u16 expected_fast_csum[] = {
         0xda83, 0x45da, 0x4f46, 0x4e4f, 0x34e,  0xe902, 0xa5e9, 0x87a5, 0x7187,
         0x5671, 0xf556, 0x6df5, 0x816d, 0x8f81, 0xbb8f, 0xfbba, 0x5afb, 0xbe5a,
         0xedbe, 0xabee, 0x6aac, 0xe6b,  0xea0d, 0x67ea, 0x7e68, 0x8a7e, 0x6f8a,
@@ -577,7 +577,8 @@ static void test_csum_no_carry_inputs(struct kunit *test)
  
  static void test_ip_fast_csum(struct kunit *test)
  {
-       __sum16 csum_result, expected;
+       __sum16 csum_result;
+       u16 expected;
  
         for (int len = IPv4_MIN_WORDS; len < IPv4_MAX_WORDS; len++) {
                 for (int index = 0; index < NUM_IP_FAST_CSUM_TESTS; index++) {
@@ -586,7 +587,7 @@ static void test_ip_fast_csum(struct kunit *test)
                                 expected_fast_csum[(len - IPv4_MIN_WORDS) *
                                                    NUM_IP_FAST_CSUM_TESTS +
                                                    index];
-                       CHECK_EQ(expected, csum_result);
+                       CHECK_EQ(to_sum16(expected), csum_result);
                 }
         }
  }
@@ -598,7 +599,7 @@ static void test_csum_ipv6_magic(struct kunit *test)
         const struct in6_addr *daddr;
         unsigned int len;
         unsigned char proto;
-       unsigned int csum;
+       __wsum csum;
  
         const int daddr_offset = sizeof(struct in6_addr);
         const int len_offset = sizeof(struct in6_addr) + sizeof(struct in6_addr);
@@ -611,10 +612,10 @@ static void test_csum_ipv6_magic(struct kunit *test)
                 saddr = (const struct in6_addr *)(random_buf + i);
                 daddr = (const struct in6_addr *)(random_buf + i +
                                                   daddr_offset);
-               len = *(unsigned int *)(random_buf + i + len_offset);
+               len = le32_to_cpu(*(__le32 *)(random_buf + i + len_offset));
                 proto = *(random_buf + i + proto_offset);
-               csum = *(unsigned int *)(random_buf + i + csum_offset);
-               CHECK_EQ(expected_csum_ipv6_magic[i],
+               csum = *(__wsum *)(random_buf + i + csum_offset);
+               CHECK_EQ(to_sum16(expected_csum_ipv6_magic[i]),
                          csum_ipv6_magic(saddr, daddr, len, proto, csum));
         }
  #endif /* !CONFIG_NET */
diff --git a/lib/kobject.c b/lib/kobject.c

index 59dbcbdb1c916d93dcbcbb3b97911098ef9420a7..72fa20f405f1520a63dd50d9aa37f6609306eb3e 100644 (file)
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -74,10 +74,12 @@ static int create_dir(struct kobject *kobj)
         if (error)
                 return error;
  
-       error = sysfs_create_groups(kobj, ktype->default_groups);
-       if (error) {
-               sysfs_remove_dir(kobj);
-               return error;
+       if (ktype) {
+               error = sysfs_create_groups(kobj, ktype->default_groups);
+               if (error) {
+                       sysfs_remove_dir(kobj);
+                       return error;
+               }
         }
  
         /*
@@ -589,7 +591,8 @@ static void __kobject_del(struct kobject *kobj)
         sd = kobj->sd;
         ktype = get_ktype(kobj);
  
-       sysfs_remove_groups(kobj, ktype->default_groups);
+       if (ktype)
+               sysfs_remove_groups(kobj, ktype->default_groups);
  
         /* send "remove" if the caller did not do it but sent "add" */
         if (kobj->state_add_uevent_sent && !kobj->state_remove_uevent_sent) {
@@ -666,6 +669,10 @@ static void kobject_cleanup(struct kobject *kobj)
         pr_debug("'%s' (%p): %s, parent %p\n",
                  kobject_name(kobj), kobj, __func__, kobj->parent);
  
+       if (t && !t->release)
+               pr_debug("'%s' (%p): does not have a release() function, it is broken and must be fixed. See Documentation/core-api/kobject.rst.\n",
+                        kobject_name(kobj), kobj);
+
         /* remove from sysfs if the caller did not do it */
         if (kobj->state_in_sysfs) {
                 pr_debug("'%s' (%p): auto cleanup kobject_del\n",
@@ -676,13 +683,10 @@ static void kobject_cleanup(struct kobject *kobj)
                 parent = NULL;
         }
  
-       if (t->release) {
+       if (t && t->release) {
                 pr_debug("'%s' (%p): calling ktype release\n",
                          kobject_name(kobj), kobj);
                 t->release(kobj);
-       } else {
-               pr_debug("'%s' (%p): does not have a release() function, it is broken and must be fixed. See Documentation/core-api/kobject.rst.\n",
-                        kobject_name(kobj), kobj);
         }
  
         /* free name if we allocated it */
@@ -1056,7 +1060,7 @@ const struct kobj_ns_type_operations *kobj_child_ns_ops(const struct kobject *pa
  {
         const struct kobj_ns_type_operations *ops = NULL;
  
-       if (parent && parent->ktype->child_ns_type)
+       if (parent && parent->ktype && parent->ktype->child_ns_type)
                 ops = parent->ktype->child_ns_type(parent);
  
         return ops;
diff --git a/lib/kunit/device-impl.h b/lib/kunit/device-impl.h

index 54bd558364053c2eb5437e0bf5b94da6cdfba0e1..5fcd48ff0f36a37415c67cf2223cdf7ffeaef794 100644 (file)
--- a/lib/kunit/device-impl.h
+++ b/lib/kunit/device-impl.h
@@ -13,5 +13,7 @@
  
  // For internal use only -- registers the kunit_bus.
  int kunit_bus_init(void);
+// For internal use only -- unregisters the kunit_bus.
+void kunit_bus_shutdown(void);
  
  #endif //_KUNIT_DEVICE_IMPL_H
diff --git a/lib/kunit/device.c b/lib/kunit/device.c

index f5371287b3750f0cefdc423748846b4257d8d14a..644a38a1f5b1cf0686a67e7df1a366a3d84e61a6 100644 (file)
--- a/lib/kunit/device.c
+++ b/lib/kunit/device.c
@@ -45,8 +45,8 @@ int kunit_bus_init(void)
         int error;
  
         kunit_bus_device = root_device_register("kunit");
-       if (!kunit_bus_device)
-               return -ENOMEM;
+       if (IS_ERR(kunit_bus_device))
+               return PTR_ERR(kunit_bus_device);
  
         error = bus_register(&kunit_bus_type);
         if (error)
@@ -54,6 +54,20 @@ int kunit_bus_init(void)
         return error;
  }
  
+/* Unregister the 'kunit_bus' in case the KUnit module is unloaded. */
+void kunit_bus_shutdown(void)
+{
+       /* Make sure the bus exists before we unregister it. */
+       if (IS_ERR_OR_NULL(kunit_bus_device))
+               return;
+
+       bus_unregister(&kunit_bus_type);
+
+       root_device_unregister(kunit_bus_device);
+
+       kunit_bus_device = NULL;
+}
+
  /* Release a 'fake' KUnit device. */
  static void kunit_device_release(struct device *d)
  {
diff --git a/lib/kunit/executor.c b/lib/kunit/executor.c

index 717b9599036ba0bccf1ffe846b7b051c591109f2..689fff2b2b106a597bbf9b2e473d37e193537b03 100644 (file)
--- a/lib/kunit/executor.c
+++ b/lib/kunit/executor.c
@@ -146,6 +146,10 @@ void kunit_free_suite_set(struct kunit_suite_set suite_set)
         kfree(suite_set.start);
  }
  
+/*
+ * Filter and reallocate test suites. Must return the filtered test suites set
+ * allocated at a valid virtual address or NULL in case of error.
+ */
  struct kunit_suite_set
  kunit_filter_suites(const struct kunit_suite_set *suite_set,
                     const char *filter_glob,
diff --git a/lib/kunit/kunit-test.c b/lib/kunit/kunit-test.c

index c4259d910356ba7e8f24847cd347eb5861071cb4..f7980ef236a38bdefd8e0e7b53915f6057348617 100644 (file)
--- a/lib/kunit/kunit-test.c
+++ b/lib/kunit/kunit-test.c
@@ -720,7 +720,7 @@ static void kunit_device_cleanup_test(struct kunit *test)
         long action_was_run = 0;
  
         test_device = kunit_device_register(test, "my_device");
-       KUNIT_ASSERT_NOT_NULL(test, test_device);
+       KUNIT_ASSERT_NOT_ERR_OR_NULL(test, test_device);
  
         /* Add an action to verify cleanup. */
         devm_add_action(test_device, test_dev_action, &action_was_run);
diff --git a/lib/kunit/test.c b/lib/kunit/test.c

index f95d2093a0aa3359c0cb08462ea62e76ab0f2ecf..1d1475578515c261fe74b454502f1a2a5ac3bb81 100644 (file)
--- a/lib/kunit/test.c
+++ b/lib/kunit/test.c
@@ -17,6 +17,7 @@
  #include <linux/panic.h>
  #include <linux/sched/debug.h>
  #include <linux/sched.h>
+#include <linux/mm.h>
  
  #include "debugfs.h"
  #include "device-impl.h"
@@ -801,12 +802,19 @@ static void kunit_module_exit(struct module *mod)
         };
         const char *action = kunit_action();
  
+       /*
+        * Check if the start address is a valid virtual address to detect
+        * if the module load sequence has failed and the suite set has not
+        * been initialized and filtered.
+        */
+       if (!suite_set.start || !virt_addr_valid(suite_set.start))
+               return;
+
         if (!action)
                 __kunit_test_suites_exit(mod->kunit_suites,
                                          mod->num_kunit_suites);
  
-       if (suite_set.start)
-               kunit_free_suite_set(suite_set);
+       kunit_free_suite_set(suite_set);
  }
  
  static int kunit_module_notify(struct notifier_block *nb, unsigned long val,
@@ -816,12 +824,12 @@ static int kunit_module_notify(struct notifier_block *nb, unsigned long val,
  
         switch (val) {
         case MODULE_STATE_LIVE:
+               kunit_module_init(mod);
                 break;
         case MODULE_STATE_GOING:
                 kunit_module_exit(mod);
                 break;
         case MODULE_STATE_COMING:
-               kunit_module_init(mod);
                 break;
         case MODULE_STATE_UNFORMED:
                 break;
@@ -920,6 +928,9 @@ static void __exit kunit_exit(void)
  #ifdef CONFIG_MODULES
         unregister_module_notifier(&kunit_mod_nb);
  #endif
+
+       kunit_bus_shutdown();
+
         kunit_debugfs_cleanup();
  }
  module_exit(kunit_exit);
diff --git a/lib/nlattr.c b/lib/nlattr.c

index ed2ab43e1b22c0156e5d361c6bfa7eb745759232..be9c576b6e2dc6d35d67d31f15014ab747f478ce 100644 (file)
--- a/lib/nlattr.c
+++ b/lib/nlattr.c
@@ -30,6 +30,8 @@ static const u8 nla_attr_len[NLA_TYPE_MAX+1] = {
         [NLA_S16]       = sizeof(s16),
         [NLA_S32]       = sizeof(s32),
         [NLA_S64]       = sizeof(s64),
+       [NLA_BE16]      = sizeof(__be16),
+       [NLA_BE32]      = sizeof(__be32),
  };
  
  static const u8 nla_attr_minlen[NLA_TYPE_MAX+1] = {
@@ -43,6 +45,8 @@ static const u8 nla_attr_minlen[NLA_TYPE_MAX+1] = {
         [NLA_S16]       = sizeof(s16),
         [NLA_S32]       = sizeof(s32),
         [NLA_S64]       = sizeof(s64),
+       [NLA_BE16]      = sizeof(__be16),
+       [NLA_BE32]      = sizeof(__be32),
  };
  
  /*
diff --git a/lib/seq_buf.c b/lib/seq_buf.c

index 010c730ca7fca9b9aab83960a78638046043693f..f3f3436d60a9403eae5b1ef9b091b027881f14fb 100644 (file)
--- a/lib/seq_buf.c
+++ b/lib/seq_buf.c
@@ -13,16 +13,26 @@
   * seq_buf_init() more than once to reset the seq_buf to start
   * from scratch.
   */
-#include <linux/uaccess.h>
-#include <linux/seq_file.h>
+
+#include <linux/bug.h>
+#include <linux/err.h>
+#include <linux/export.h>
+#include <linux/hex.h>
+#include <linux/minmax.h>
+#include <linux/printk.h>
  #include <linux/seq_buf.h>
+#include <linux/seq_file.h>
+#include <linux/sprintf.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
  
  /**
   * seq_buf_can_fit - can the new data fit in the current buffer?
   * @s: the seq_buf descriptor
   * @len: The length to see if it can fit in the current buffer
   *
- * Returns true if there's enough unused space in the seq_buf buffer
+ * Returns: true if there's enough unused space in the seq_buf buffer
   * to fit the amount of new data according to @len.
   */
  static bool seq_buf_can_fit(struct seq_buf *s, size_t len)
@@ -35,7 +45,7 @@ static bool seq_buf_can_fit(struct seq_buf *s, size_t len)
   * @m: the seq_file descriptor that is the destination
   * @s: the seq_buf descriptor that is the source.
   *
- * Returns zero on success, non zero otherwise
+ * Returns: zero on success, non-zero otherwise.
   */
  int seq_buf_print_seq(struct seq_file *m, struct seq_buf *s)
  {
@@ -50,9 +60,9 @@ int seq_buf_print_seq(struct seq_file *m, struct seq_buf *s)
   * @fmt: printf format string
   * @args: va_list of arguments from a printf() type function
   *
- * Writes a vnprintf() format into the sequencce buffer.
+ * Writes a vnprintf() format into the sequence buffer.
   *
- * Returns zero on success, -1 on overflow.
+ * Returns: zero on success, -1 on overflow.
   */
  int seq_buf_vprintf(struct seq_buf *s, const char *fmt, va_list args)
  {
@@ -78,7 +88,7 @@ int seq_buf_vprintf(struct seq_buf *s, const char *fmt, va_list args)
   *
   * Writes a printf() format into the sequence buffer.
   *
- * Returns zero on success, -1 on overflow.
+ * Returns: zero on success, -1 on overflow.
   */
  int seq_buf_printf(struct seq_buf *s, const char *fmt, ...)
  {
@@ -94,12 +104,12 @@ int seq_buf_printf(struct seq_buf *s, const char *fmt, ...)
  EXPORT_SYMBOL_GPL(seq_buf_printf);
  
  /**
- * seq_buf_do_printk - printk seq_buf line by line
+ * seq_buf_do_printk - printk() seq_buf line by line
   * @s: seq_buf descriptor
   * @lvl: printk level
   *
   * printk()-s a multi-line sequential buffer line by line. The function
- * makes sure that the buffer in @s is nul terminated and safe to read
+ * makes sure that the buffer in @s is NUL-terminated and safe to read
   * as a string.
   */
  void seq_buf_do_printk(struct seq_buf *s, const char *lvl)
@@ -139,7 +149,7 @@ EXPORT_SYMBOL_GPL(seq_buf_do_printk);
   * This function will take the format and the binary array and finish
   * the conversion into the ASCII string within the buffer.
   *
- * Returns zero on success, -1 on overflow.
+ * Returns: zero on success, -1 on overflow.
   */
  int seq_buf_bprintf(struct seq_buf *s, const char *fmt, const u32 *binary)
  {
@@ -167,7 +177,7 @@ int seq_buf_bprintf(struct seq_buf *s, const char *fmt, const u32 *binary)
   *
   * Copy a simple string into the sequence buffer.
   *
- * Returns zero on success, -1 on overflow
+ * Returns: zero on success, -1 on overflow.
   */
  int seq_buf_puts(struct seq_buf *s, const char *str)
  {
@@ -196,7 +206,7 @@ EXPORT_SYMBOL_GPL(seq_buf_puts);
   *
   * Copy a single character into the sequence buffer.
   *
- * Returns zero on success, -1 on overflow
+ * Returns: zero on success, -1 on overflow.
   */
  int seq_buf_putc(struct seq_buf *s, unsigned char c)
  {
@@ -212,7 +222,7 @@ int seq_buf_putc(struct seq_buf *s, unsigned char c)
  EXPORT_SYMBOL_GPL(seq_buf_putc);
  
  /**
- * seq_buf_putmem - write raw data into the sequenc buffer
+ * seq_buf_putmem - write raw data into the sequence buffer
   * @s: seq_buf descriptor
   * @mem: The raw memory to copy into the buffer
   * @len: The length of the raw memory to copy (in bytes)
@@ -221,7 +231,7 @@ EXPORT_SYMBOL_GPL(seq_buf_putc);
   * buffer and a strcpy() would not work. Using this function allows
   * for such cases.
   *
- * Returns zero on success, -1 on overflow
+ * Returns: zero on success, -1 on overflow.
   */
  int seq_buf_putmem(struct seq_buf *s, const void *mem, unsigned int len)
  {
@@ -249,7 +259,7 @@ int seq_buf_putmem(struct seq_buf *s, const void *mem, unsigned int len)
   * raw memory into the buffer it writes its ASCII representation of it
   * in hex characters.
   *
- * Returns zero on success, -1 on overflow
+ * Returns: zero on success, -1 on overflow.
   */
  int seq_buf_putmem_hex(struct seq_buf *s, const void *mem,
                        unsigned int len)
@@ -297,7 +307,7 @@ int seq_buf_putmem_hex(struct seq_buf *s, const void *mem,
   *
   * Write a path name into the sequence buffer.
   *
- * Returns the number of written bytes on success, -1 on overflow
+ * Returns: the number of written bytes on success, -1 on overflow.
   */
  int seq_buf_path(struct seq_buf *s, const struct path *path, const char *esc)
  {
@@ -332,6 +342,7 @@ int seq_buf_path(struct seq_buf *s, const struct path *path, const char *esc)
   * or until it reaches the end of the content in the buffer (@s->len),
   * whichever comes first.
   *
+ * Returns:
   * On success, it returns a positive number of the number of bytes
   * it copied.
   *
@@ -382,11 +393,11 @@ int seq_buf_to_user(struct seq_buf *s, char __user *ubuf, size_t start, int cnt)
   * linebuf size is maximal length for one line.
   * 32 * 3 - maximum bytes per line, each printed into 2 chars + 1 for
   *     separating space
- * 2 - spaces separating hex dump and ascii representation
- * 32 - ascii representation
+ * 2 - spaces separating hex dump and ASCII representation
+ * 32 - ASCII representation
   * 1 - terminating '\0'
   *
- * Returns zero on success, -1 on overflow
+ * Returns: zero on success, -1 on overflow.
   */
  int seq_buf_hex_dump(struct seq_buf *s, const char *prefix_str, int prefix_type,
                      int rowsize, int groupsize,
diff --git a/lib/stackdepot.c b/lib/stackdepot.c

index a0be5d05c7f08187667c91c7d0886843df52225c..4a7055a63d9f8a8a6723563fd8a30115653eea83 100644 (file)
--- a/lib/stackdepot.c
+++ b/lib/stackdepot.c
@@ -14,6 +14,7 @@
  
  #define pr_fmt(fmt) "stackdepot: " fmt
  
+#include <linux/debugfs.h>
  #include <linux/gfp.h>
  #include <linux/jhash.h>
  #include <linux/kernel.h>
@@ -21,8 +22,10 @@
  #include <linux/list.h>
  #include <linux/mm.h>
  #include <linux/mutex.h>
-#include <linux/percpu.h>
+#include <linux/poison.h>
  #include <linux/printk.h>
+#include <linux/rculist.h>
+#include <linux/rcupdate.h>
  #include <linux/refcount.h>
  #include <linux/slab.h>
  #include <linux/spinlock.h>
@@ -41,17 +44,7 @@
  #define DEPOT_OFFSET_BITS (DEPOT_POOL_ORDER + PAGE_SHIFT - DEPOT_STACK_ALIGN)
  #define DEPOT_POOL_INDEX_BITS (DEPOT_HANDLE_BITS - DEPOT_OFFSET_BITS - \
                                STACK_DEPOT_EXTRA_BITS)
-#if IS_ENABLED(CONFIG_KMSAN) && CONFIG_STACKDEPOT_MAX_FRAMES >= 32
-/*
- * KMSAN is frequently used in fuzzing scenarios and thus saves a lot of stack
- * traces. As KMSAN does not support evicting stack traces from the stack
- * depot, the stack depot capacity might be reached quickly with large stack
- * records. Adjust the maximum number of stack depot pools for this case.
- */
-#define DEPOT_POOLS_CAP (8192 * (CONFIG_STACKDEPOT_MAX_FRAMES / 16))
-#else
  #define DEPOT_POOLS_CAP 8192
-#endif
  #define DEPOT_MAX_POOLS \
         (((1LL << (DEPOT_POOL_INDEX_BITS)) < DEPOT_POOLS_CAP) ? \
          (1LL << (DEPOT_POOL_INDEX_BITS)) : DEPOT_POOLS_CAP)
@@ -67,17 +60,30 @@ union handle_parts {
  };
  
  struct stack_record {
-       struct list_head list;          /* Links in hash table or freelist */
+       struct list_head hash_list;     /* Links in the hash table */
         u32 hash;                       /* Hash in hash table */
         u32 size;                       /* Number of stored frames */
-       union handle_parts handle;
+       union handle_parts handle;      /* Constant after initialization */
         refcount_t count;
-       unsigned long entries[CONFIG_STACKDEPOT_MAX_FRAMES];    /* Frames */
+       union {
+               unsigned long entries[CONFIG_STACKDEPOT_MAX_FRAMES];    /* Frames */
+               struct {
+                       /*
+                        * An important invariant of the implementation is to
+                        * only place a stack record onto the freelist iff its
+                        * refcount is zero. Because stack records with a zero
+                        * refcount are never considered as valid, it is safe to
+                        * union @entries and freelist management state below.
+                        * Conversely, as soon as an entry is off the freelist
+                        * and its refcount becomes non-zero, the below must not
+                        * be accessed until being placed back on the freelist.
+                        */
+                       struct list_head free_list;     /* Links in the freelist */
+                       unsigned long rcu_state;        /* RCU cookie */
+               };
+       };
  };
  
-#define DEPOT_STACK_RECORD_SIZE \
-       ALIGN(sizeof(struct stack_record), 1 << DEPOT_STACK_ALIGN)
-
  static bool stack_depot_disabled;
  static bool __stack_depot_early_init_requested __initdata = IS_ENABLED(CONFIG_STACKDEPOT_ALWAYS_INIT);
  static bool __stack_depot_early_init_passed __initdata;
@@ -103,17 +109,33 @@ static void *stack_pools[DEPOT_MAX_POOLS];
  static void *new_pool;
  /* Number of pools in stack_pools. */
  static int pools_num;
+/* Offset to the unused space in the currently used pool. */
+static size_t pool_offset = DEPOT_POOL_SIZE;
  /* Freelist of stack records within stack_pools. */
  static LIST_HEAD(free_stacks);
-/*
- * Stack depot tries to keep an extra pool allocated even before it runs out
- * of space in the currently used pool. This flag marks whether this extra pool
- * needs to be allocated. It has the value 0 when either an extra pool is not
- * yet allocated or if the limit on the number of pools is reached.
- */
-static bool new_pool_required = true;
-/* Lock that protects the variables above. */
-static DEFINE_RWLOCK(pool_rwlock);
+/* The lock must be held when performing pool or freelist modifications. */
+static DEFINE_RAW_SPINLOCK(pool_lock);
+
+/* Statistics counters for debugfs. */
+enum depot_counter_id {
+       DEPOT_COUNTER_REFD_ALLOCS,
+       DEPOT_COUNTER_REFD_FREES,
+       DEPOT_COUNTER_REFD_INUSE,
+       DEPOT_COUNTER_FREELIST_SIZE,
+       DEPOT_COUNTER_PERSIST_COUNT,
+       DEPOT_COUNTER_PERSIST_BYTES,
+       DEPOT_COUNTER_COUNT,
+};
+static long counters[DEPOT_COUNTER_COUNT];
+static const char *const counter_names[] = {
+       [DEPOT_COUNTER_REFD_ALLOCS]     = "refcounted_allocations",
+       [DEPOT_COUNTER_REFD_FREES]      = "refcounted_frees",
+       [DEPOT_COUNTER_REFD_INUSE]      = "refcounted_in_use",
+       [DEPOT_COUNTER_FREELIST_SIZE]   = "freelist_size",
+       [DEPOT_COUNTER_PERSIST_COUNT]   = "persistent_count",
+       [DEPOT_COUNTER_PERSIST_BYTES]   = "persistent_bytes",
+};
+static_assert(ARRAY_SIZE(counter_names) == DEPOT_COUNTER_COUNT);
  
  static int __init disable_stack_depot(char *str)
  {
@@ -258,174 +280,273 @@ out_unlock:
  }
  EXPORT_SYMBOL_GPL(stack_depot_init);
  
-/* Initializes a stack depol pool. */
-static void depot_init_pool(void *pool)
+/*
+ * Initializes new stack pool, and updates the list of pools.
+ */
+static bool depot_init_pool(void **prealloc)
  {
-       int offset;
+       lockdep_assert_held(&pool_lock);
  
-       lockdep_assert_held_write(&pool_rwlock);
+       if (unlikely(pools_num >= DEPOT_MAX_POOLS)) {
+               /* Bail out if we reached the pool limit. */
+               WARN_ON_ONCE(pools_num > DEPOT_MAX_POOLS); /* should never happen */
+               WARN_ON_ONCE(!new_pool); /* to avoid unnecessary pre-allocation */
+               WARN_ONCE(1, "Stack depot reached limit capacity");
+               return false;
+       }
  
-       WARN_ON(!list_empty(&free_stacks));
+       if (!new_pool && *prealloc) {
+               /* We have preallocated memory, use it. */
+               WRITE_ONCE(new_pool, *prealloc);
+               *prealloc = NULL;
+       }
  
-       /* Initialize handles and link stack records into the freelist. */
-       for (offset = 0; offset <= DEPOT_POOL_SIZE - DEPOT_STACK_RECORD_SIZE;
-            offset += DEPOT_STACK_RECORD_SIZE) {
-               struct stack_record *stack = pool + offset;
+       if (!new_pool)
+               return false; /* new_pool and *prealloc are NULL */
  
-               stack->handle.pool_index = pools_num;
-               stack->handle.offset = offset >> DEPOT_STACK_ALIGN;
-               stack->handle.extra = 0;
+       /* Save reference to the pool to be used by depot_fetch_stack(). */
+       stack_pools[pools_num] = new_pool;
  
-               list_add(&stack->list, &free_stacks);
-       }
+       /*
+        * Stack depot tries to keep an extra pool allocated even before it runs
+        * out of space in the currently used pool.
+        *
+        * To indicate that a new preallocation is needed new_pool is reset to
+        * NULL; do not reset to NULL if we have reached the maximum number of
+        * pools.
+        */
+       if (pools_num < DEPOT_MAX_POOLS)
+               WRITE_ONCE(new_pool, NULL);
+       else
+               WRITE_ONCE(new_pool, STACK_DEPOT_POISON);
  
-       /* Save reference to the pool to be used by depot_fetch_stack(). */
-       stack_pools[pools_num] = pool;
-       pools_num++;
+       /* Pairs with concurrent READ_ONCE() in depot_fetch_stack(). */
+       WRITE_ONCE(pools_num, pools_num + 1);
+       ASSERT_EXCLUSIVE_WRITER(pools_num);
+
+       pool_offset = 0;
+
+       return true;
  }
  
  /* Keeps the preallocated memory to be used for a new stack depot pool. */
  static void depot_keep_new_pool(void **prealloc)
  {
-       lockdep_assert_held_write(&pool_rwlock);
+       lockdep_assert_held(&pool_lock);
  
         /*
          * If a new pool is already saved or the maximum number of
          * pools is reached, do not use the preallocated memory.
          */
-       if (!new_pool_required)
+       if (new_pool)
                 return;
  
-       /*
-        * Use the preallocated memory for the new pool
-        * as long as we do not exceed the maximum number of pools.
-        */
-       if (pools_num < DEPOT_MAX_POOLS) {
-               new_pool = *prealloc;
-               *prealloc = NULL;
+       WRITE_ONCE(new_pool, *prealloc);
+       *prealloc = NULL;
+}
+
+/*
+ * Try to initialize a new stack record from the current pool, a cached pool, or
+ * the current pre-allocation.
+ */
+static struct stack_record *depot_pop_free_pool(void **prealloc, size_t size)
+{
+       struct stack_record *stack;
+       void *current_pool;
+       u32 pool_index;
+
+       lockdep_assert_held(&pool_lock);
+
+       if (pool_offset + size > DEPOT_POOL_SIZE) {
+               if (!depot_init_pool(prealloc))
+                       return NULL;
         }
  
-       /*
-        * At this point, either a new pool is kept or the maximum
-        * number of pools is reached. In either case, take note that
-        * keeping another pool is not required.
-        */
-       new_pool_required = false;
+       if (WARN_ON_ONCE(pools_num < 1))
+               return NULL;
+       pool_index = pools_num - 1;
+       current_pool = stack_pools[pool_index];
+       if (WARN_ON_ONCE(!current_pool))
+               return NULL;
+
+       stack = current_pool + pool_offset;
+
+       /* Pre-initialize handle once. */
+       stack->handle.pool_index = pool_index;
+       stack->handle.offset = pool_offset >> DEPOT_STACK_ALIGN;
+       stack->handle.extra = 0;
+       INIT_LIST_HEAD(&stack->hash_list);
+
+       pool_offset += size;
+
+       return stack;
  }
  
-/* Updates references to the current and the next stack depot pools. */
-static bool depot_update_pools(void **prealloc)
+/* Try to find next free usable entry from the freelist. */
+static struct stack_record *depot_pop_free(void)
  {
-       lockdep_assert_held_write(&pool_rwlock);
+       struct stack_record *stack;
  
-       /* Check if we still have objects in the freelist. */
-       if (!list_empty(&free_stacks))
-               goto out_keep_prealloc;
+       lockdep_assert_held(&pool_lock);
  
-       /* Check if we have a new pool saved and use it. */
-       if (new_pool) {
-               depot_init_pool(new_pool);
-               new_pool = NULL;
+       if (list_empty(&free_stacks))
+               return NULL;
  
-               /* Take note that we might need a new new_pool. */
-               if (pools_num < DEPOT_MAX_POOLS)
-                       new_pool_required = true;
+       /*
+        * We maintain the invariant that the elements in front are least
+        * recently used, and are therefore more likely to be associated with an
+        * RCU grace period in the past. Consequently it is sufficient to only
+        * check the first entry.
+        */
+       stack = list_first_entry(&free_stacks, struct stack_record, free_list);
+       if (!poll_state_synchronize_rcu(stack->rcu_state))
+               return NULL;
  
-               /* Try keeping the preallocated memory for new_pool. */
-               goto out_keep_prealloc;
-       }
+       list_del(&stack->free_list);
+       counters[DEPOT_COUNTER_FREELIST_SIZE]--;
  
-       /* Bail out if we reached the pool limit. */
-       if (unlikely(pools_num >= DEPOT_MAX_POOLS)) {
-               WARN_ONCE(1, "Stack depot reached limit capacity");
-               return false;
-       }
+       return stack;
+}
  
-       /* Check if we have preallocated memory and use it. */
-       if (*prealloc) {
-               depot_init_pool(*prealloc);
-               *prealloc = NULL;
-               return true;
-       }
+static inline size_t depot_stack_record_size(struct stack_record *s, unsigned int nr_entries)
+{
+       const size_t used = flex_array_size(s, entries, nr_entries);
+       const size_t unused = sizeof(s->entries) - used;
  
-       return false;
+       WARN_ON_ONCE(sizeof(s->entries) < used);
  
-out_keep_prealloc:
-       /* Keep the preallocated memory for a new pool if required. */
-       if (*prealloc)
-               depot_keep_new_pool(prealloc);
-       return true;
+       return ALIGN(sizeof(struct stack_record) - unused, 1 << DEPOT_STACK_ALIGN);
  }
  
  /* Allocates a new stack in a stack depot pool. */
  static struct stack_record *
-depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc)
+depot_alloc_stack(unsigned long *entries, unsigned int nr_entries, u32 hash, depot_flags_t flags, void **prealloc)
  {
-       struct stack_record *stack;
+       struct stack_record *stack = NULL;
+       size_t record_size;
  
-       lockdep_assert_held_write(&pool_rwlock);
+       lockdep_assert_held(&pool_lock);
  
-       /* Update current and new pools if required and possible. */
-       if (!depot_update_pools(prealloc))
+       /* This should already be checked by public API entry points. */
+       if (WARN_ON_ONCE(!nr_entries))
                 return NULL;
  
-       /* Check if we have a stack record to save the stack trace. */
-       if (list_empty(&free_stacks))
-               return NULL;
+       /* Limit number of saved frames to CONFIG_STACKDEPOT_MAX_FRAMES. */
+       if (nr_entries > CONFIG_STACKDEPOT_MAX_FRAMES)
+               nr_entries = CONFIG_STACKDEPOT_MAX_FRAMES;
  
-       /* Get and unlink the first entry from the freelist. */
-       stack = list_first_entry(&free_stacks, struct stack_record, list);
-       list_del(&stack->list);
+       if (flags & STACK_DEPOT_FLAG_GET) {
+               /*
+                * Evictable entries have to allocate the max. size so they may
+                * safely be re-used by differently sized allocations.
+                */
+               record_size = depot_stack_record_size(stack, CONFIG_STACKDEPOT_MAX_FRAMES);
+               stack = depot_pop_free();
+       } else {
+               record_size = depot_stack_record_size(stack, nr_entries);
+       }
  
-       /* Limit number of saved frames to CONFIG_STACKDEPOT_MAX_FRAMES. */
-       if (size > CONFIG_STACKDEPOT_MAX_FRAMES)
-               size = CONFIG_STACKDEPOT_MAX_FRAMES;
+       if (!stack) {
+               stack = depot_pop_free_pool(prealloc, record_size);
+               if (!stack)
+                       return NULL;
+       }
  
         /* Save the stack trace. */
         stack->hash = hash;
-       stack->size = size;
-       /* stack->handle is already filled in by depot_init_pool(). */
-       refcount_set(&stack->count, 1);
-       memcpy(stack->entries, entries, flex_array_size(stack, entries, size));
+       stack->size = nr_entries;
+       /* stack->handle is already filled in by depot_pop_free_pool(). */
+       memcpy(stack->entries, entries, flex_array_size(stack, entries, nr_entries));
+
+       if (flags & STACK_DEPOT_FLAG_GET) {
+               refcount_set(&stack->count, 1);
+               counters[DEPOT_COUNTER_REFD_ALLOCS]++;
+               counters[DEPOT_COUNTER_REFD_INUSE]++;
+       } else {
+               /* Warn on attempts to switch to refcounting this entry. */
+               refcount_set(&stack->count, REFCOUNT_SATURATED);
+               counters[DEPOT_COUNTER_PERSIST_COUNT]++;
+               counters[DEPOT_COUNTER_PERSIST_BYTES] += record_size;
+       }
  
         /*
          * Let KMSAN know the stored stack record is initialized. This shall
          * prevent false positive reports if instrumented code accesses it.
          */
-       kmsan_unpoison_memory(stack, DEPOT_STACK_RECORD_SIZE);
+       kmsan_unpoison_memory(stack, record_size);
  
         return stack;
  }
  
  static struct stack_record *depot_fetch_stack(depot_stack_handle_t handle)
  {
+       const int pools_num_cached = READ_ONCE(pools_num);
         union handle_parts parts = { .handle = handle };
         void *pool;
         size_t offset = parts.offset << DEPOT_STACK_ALIGN;
         struct stack_record *stack;
  
-       lockdep_assert_held(&pool_rwlock);
+       lockdep_assert_not_held(&pool_lock);
  
-       if (parts.pool_index > pools_num) {
+       if (parts.pool_index > pools_num_cached) {
                 WARN(1, "pool index %d out of bounds (%d) for stack id %08x\n",
-                    parts.pool_index, pools_num, handle);
+                    parts.pool_index, pools_num_cached, handle);
                 return NULL;
         }
  
         pool = stack_pools[parts.pool_index];
-       if (!pool)
+       if (WARN_ON(!pool))
                 return NULL;
  
         stack = pool + offset;
+       if (WARN_ON(!refcount_read(&stack->count)))
+               return NULL;
+
         return stack;
  }
  
  /* Links stack into the freelist. */
  static void depot_free_stack(struct stack_record *stack)
  {
-       lockdep_assert_held_write(&pool_rwlock);
+       unsigned long flags;
+
+       lockdep_assert_not_held(&pool_lock);
+
+       raw_spin_lock_irqsave(&pool_lock, flags);
+       printk_deferred_enter();
  
-       list_add(&stack->list, &free_stacks);
+       /*
+        * Remove the entry from the hash list. Concurrent list traversal may
+        * still observe the entry, but since the refcount is zero, this entry
+        * will no longer be considered as valid.
+        */
+       list_del_rcu(&stack->hash_list);
+
+       /*
+        * Due to being used from constrained contexts such as the allocators,
+        * NMI, or even RCU itself, stack depot cannot rely on primitives that
+        * would sleep (such as synchronize_rcu()) or recursively call into
+        * stack depot again (such as call_rcu()).
+        *
+        * Instead, get an RCU cookie, so that we can ensure this entry isn't
+        * moved onto another list until the next grace period, and concurrent
+        * RCU list traversal remains safe.
+        */
+       stack->rcu_state = get_state_synchronize_rcu();
+
+       /*
+        * Add the entry to the freelist tail, so that older entries are
+        * considered first - their RCU cookie is more likely to no longer be
+        * associated with the current grace period.
+        */
+       list_add_tail(&stack->free_list, &free_stacks);
+
+       counters[DEPOT_COUNTER_FREELIST_SIZE]++;
+       counters[DEPOT_COUNTER_REFD_FREES]++;
+       counters[DEPOT_COUNTER_REFD_INUSE]--;
+
+       printk_deferred_exit();
+       raw_spin_unlock_irqrestore(&pool_lock, flags);
  }
  
  /* Calculates the hash for a stack. */
@@ -453,22 +574,52 @@ int stackdepot_memcmp(const unsigned long *u1, const unsigned long *u2,
  
  /* Finds a stack in a bucket of the hash table. */
  static inline struct stack_record *find_stack(struct list_head *bucket,
-                                            unsigned long *entries, int size,
-                                            u32 hash)
+                                             unsigned long *entries, int size,
+                                             u32 hash, depot_flags_t flags)
  {
-       struct list_head *pos;
-       struct stack_record *found;
+       struct stack_record *stack, *ret = NULL;
  
-       lockdep_assert_held(&pool_rwlock);
+       /*
+        * Stack depot may be used from instrumentation that instruments RCU or
+        * tracing itself; use variant that does not call into RCU and cannot be
+        * traced.
+        *
+        * Note: Such use cases must take care when using refcounting to evict
+        * unused entries, because the stack record free-then-reuse code paths
+        * do call into RCU.
+        */
+       rcu_read_lock_sched_notrace();
+
+       list_for_each_entry_rcu(stack, bucket, hash_list) {
+               if (stack->hash != hash || stack->size != size)
+                       continue;
+
+               /*
+                * This may race with depot_free_stack() accessing the freelist
+                * management state unioned with @entries. The refcount is zero
+                * in that case and the below refcount_inc_not_zero() will fail.
+                */
+               if (data_race(stackdepot_memcmp(entries, stack->entries, size)))
+                       continue;
+
+               /*
+                * Try to increment refcount. If this succeeds, the stack record
+                * is valid and has not yet been freed.
+                *
+                * If STACK_DEPOT_FLAG_GET is not used, it is undefined behavior
+                * to then call stack_depot_put() later, and we can assume that
+                * a stack record is never placed back on the freelist.
+                */
+               if ((flags & STACK_DEPOT_FLAG_GET) && !refcount_inc_not_zero(&stack->count))
+                       continue;
  
-       list_for_each(pos, bucket) {
-               found = list_entry(pos, struct stack_record, list);
-               if (found->hash == hash &&
-                   found->size == size &&
-                   !stackdepot_memcmp(entries, found->entries, size))
-                       return found;
+               ret = stack;
+               break;
         }
-       return NULL;
+
+       rcu_read_unlock_sched_notrace();
+
+       return ret;
  }
  
  depot_stack_handle_t stack_depot_save_flags(unsigned long *entries,
@@ -482,7 +633,6 @@ depot_stack_handle_t stack_depot_save_flags(unsigned long *entries,
         struct page *page = NULL;
         void *prealloc = NULL;
         bool can_alloc = depot_flags & STACK_DEPOT_FLAG_CAN_ALLOC;
-       bool need_alloc = false;
         unsigned long flags;
         u32 hash;
  
@@ -505,31 +655,16 @@ depot_stack_handle_t stack_depot_save_flags(unsigned long *entries,
         hash = hash_stack(entries, nr_entries);
         bucket = &stack_table[hash & stack_hash_mask];
  
-       read_lock_irqsave(&pool_rwlock, flags);
-       printk_deferred_enter();
-
-       /* Fast path: look the stack trace up without full locking. */
-       found = find_stack(bucket, entries, nr_entries, hash);
-       if (found) {
-               if (depot_flags & STACK_DEPOT_FLAG_GET)
-                       refcount_inc(&found->count);
-               printk_deferred_exit();
-               read_unlock_irqrestore(&pool_rwlock, flags);
+       /* Fast path: look the stack trace up without locking. */
+       found = find_stack(bucket, entries, nr_entries, hash, depot_flags);
+       if (found)
                 goto exit;
-       }
-
-       /* Take note if another stack pool needs to be allocated. */
-       if (new_pool_required)
-               need_alloc = true;
-
-       printk_deferred_exit();
-       read_unlock_irqrestore(&pool_rwlock, flags);
  
         /*
          * Allocate memory for a new pool if required now:
          * we won't be able to do that under the lock.
          */
-       if (unlikely(can_alloc && need_alloc)) {
+       if (unlikely(can_alloc && !READ_ONCE(new_pool))) {
                 /*
                  * Zero out zone modifiers, as we don't have specific zone
                  * requirements. Keep the flags related to allocation in atomic
@@ -543,31 +678,36 @@ depot_stack_handle_t stack_depot_save_flags(unsigned long *entries,
                         prealloc = page_address(page);
         }
  
-       write_lock_irqsave(&pool_rwlock, flags);
+       raw_spin_lock_irqsave(&pool_lock, flags);
         printk_deferred_enter();
  
-       found = find_stack(bucket, entries, nr_entries, hash);
+       /* Try to find again, to avoid concurrently inserting duplicates. */
+       found = find_stack(bucket, entries, nr_entries, hash, depot_flags);
         if (!found) {
                 struct stack_record *new =
-                       depot_alloc_stack(entries, nr_entries, hash, &prealloc);
+                       depot_alloc_stack(entries, nr_entries, hash, depot_flags, &prealloc);
  
                 if (new) {
-                       list_add(&new->list, bucket);
+                       /*
+                        * This releases the stack record into the bucket and
+                        * makes it visible to readers in find_stack().
+                        */
+                       list_add_rcu(&new->hash_list, bucket);
                         found = new;
                 }
-       } else {
-               if (depot_flags & STACK_DEPOT_FLAG_GET)
-                       refcount_inc(&found->count);
+       }
+
+       if (prealloc) {
                 /*
-                * Stack depot already contains this stack trace, but let's
-                * keep the preallocated memory for future.
+                * Either stack depot already contains this stack trace, or
+                * depot_alloc_stack() did not consume the preallocated memory.
+                * Try to keep the preallocated memory for future.
                  */
-               if (prealloc)
-                       depot_keep_new_pool(&prealloc);
+               depot_keep_new_pool(&prealloc);
         }
  
         printk_deferred_exit();
-       write_unlock_irqrestore(&pool_rwlock, flags);
+       raw_spin_unlock_irqrestore(&pool_lock, flags);
  exit:
         if (prealloc) {
                 /* Stack depot didn't use this memory, free it. */
@@ -592,7 +732,6 @@ unsigned int stack_depot_fetch(depot_stack_handle_t handle,
                                unsigned long **entries)
  {
         struct stack_record *stack;
-       unsigned long flags;
  
         *entries = NULL;
         /*
@@ -604,13 +743,13 @@ unsigned int stack_depot_fetch(depot_stack_handle_t handle,
         if (!handle || stack_depot_disabled)
                 return 0;
  
-       read_lock_irqsave(&pool_rwlock, flags);
-       printk_deferred_enter();
-
         stack = depot_fetch_stack(handle);
-
-       printk_deferred_exit();
-       read_unlock_irqrestore(&pool_rwlock, flags);
+       /*
+        * Should never be NULL, otherwise this is a use-after-put (or just a
+        * corrupt handle).
+        */
+       if (WARN(!stack, "corrupt handle or use after stack_depot_put()"))
+               return 0;
  
         *entries = stack->entries;
         return stack->size;
@@ -620,29 +759,20 @@ EXPORT_SYMBOL_GPL(stack_depot_fetch);
  void stack_depot_put(depot_stack_handle_t handle)
  {
         struct stack_record *stack;
-       unsigned long flags;
  
         if (!handle || stack_depot_disabled)
                 return;
  
-       write_lock_irqsave(&pool_rwlock, flags);
-       printk_deferred_enter();
-
         stack = depot_fetch_stack(handle);
-       if (WARN_ON(!stack))
-               goto out;
-
-       if (refcount_dec_and_test(&stack->count)) {
-               /* Unlink stack from the hash table. */
-               list_del(&stack->list);
+       /*
+        * Should always be able to find the stack record, otherwise this is an
+        * unbalanced put attempt (or corrupt handle).
+        */
+       if (WARN(!stack, "corrupt handle or unbalanced stack_depot_put()"))
+               return;
  
-               /* Free stack. */
+       if (refcount_dec_and_test(&stack->count))
                 depot_free_stack(stack);
-       }
-
-out:
-       printk_deferred_exit();
-       write_unlock_irqrestore(&pool_rwlock, flags);
  }
  EXPORT_SYMBOL_GPL(stack_depot_put);
  
@@ -690,3 +820,30 @@ unsigned int stack_depot_get_extra_bits(depot_stack_handle_t handle)
         return parts.extra;
  }
  EXPORT_SYMBOL(stack_depot_get_extra_bits);
+
+static int stats_show(struct seq_file *seq, void *v)
+{
+       /*
+        * data race ok: These are just statistics counters, and approximate
+        * statistics are ok for debugging.
+        */
+       seq_printf(seq, "pools: %d\n", data_race(pools_num));
+       for (int i = 0; i < DEPOT_COUNTER_COUNT; i++)
+               seq_printf(seq, "%s: %ld\n", counter_names[i], data_race(counters[i]));
+
+       return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(stats);
+
+static int depot_debugfs_init(void)
+{
+       struct dentry *dir;
+
+       if (stack_depot_disabled)
+               return 0;
+
+       dir = debugfs_create_dir("stackdepot", NULL);
+       debugfs_create_file("stats", 0444, dir, NULL, &stats_fops);
+       return 0;
+}
+late_initcall(depot_debugfs_init);
diff --git a/mm/backing-dev.c b/mm/backing-dev.c

index 1e3447bccdb14d126b3c108fd27ab652b5a3a94f..e039d05304dd9ca52da735962c0ef951fb448ec5 100644 (file)
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -436,7 +436,6 @@ static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi,
         INIT_LIST_HEAD(&wb->work_list);
         INIT_DELAYED_WORK(&wb->dwork, wb_workfn);
         INIT_DELAYED_WORK(&wb->bw_dwork, wb_update_bandwidth_workfn);
-       wb->dirty_sleep = jiffies;
  
         err = fprop_local_init_percpu(&wb->completions, gfp);
         if (err)
@@ -921,6 +920,7 @@ int bdi_init(struct backing_dev_info *bdi)
         INIT_LIST_HEAD(&bdi->bdi_list);
         INIT_LIST_HEAD(&bdi->wb_list);
         init_waitqueue_head(&bdi->wb_waitq);
+       bdi->last_bdp_sleep = jiffies;
  
         return cgwb_bdi_init(bdi);
  }
diff --git a/mm/damon/core.c b/mm/damon/core.c

index 36f6f1d21ff069de12575a4f0d932e0dfc316c11..5b325749fc12597ddd273ae605bdb1c04a93f99e 100644 (file)
--- a/mm/damon/core.c
+++ b/mm/damon/core.c
@@ -1026,6 +1026,9 @@ static void damon_do_apply_schemes(struct damon_ctx *c,
         damon_for_each_scheme(s, c) {
                 struct damos_quota *quota = &s->quota;
  
+               if (c->passed_sample_intervals != s->next_apply_sis)
+                       continue;
+
                 if (!s->wmarks.activated)
                         continue;
  
@@ -1176,10 +1179,6 @@ static void kdamond_apply_schemes(struct damon_ctx *c)
                 if (c->passed_sample_intervals != s->next_apply_sis)
                         continue;
  
-               s->next_apply_sis +=
-                       (s->apply_interval_us ? s->apply_interval_us :
-                        c->attrs.aggr_interval) / sample_interval;
-
                 if (!s->wmarks.activated)
                         continue;
  
@@ -1195,6 +1194,14 @@ static void kdamond_apply_schemes(struct damon_ctx *c)
                 damon_for_each_region_safe(r, next_r, t)
                         damon_do_apply_schemes(c, t, r);
         }
+
+       damon_for_each_scheme(s, c) {
+               if (c->passed_sample_intervals != s->next_apply_sis)
+                       continue;
+               s->next_apply_sis +=
+                       (s->apply_interval_us ? s->apply_interval_us :
+                        c->attrs.aggr_interval) / sample_interval;
+       }
  }
  
  /*
diff --git a/mm/damon/lru_sort.c b/mm/damon/lru_sort.c

index f2e5f9431892eb207bec1da87224282e3de27371..3de2916a65c38c372b5ed8472b7a87b34026aed7 100644 (file)
--- a/mm/damon/lru_sort.c
+++ b/mm/damon/lru_sort.c
@@ -185,9 +185,21 @@ static struct damos *damon_lru_sort_new_cold_scheme(unsigned int cold_thres)
         return damon_lru_sort_new_scheme(&pattern, DAMOS_LRU_DEPRIO);
  }
  
+static void damon_lru_sort_copy_quota_status(struct damos_quota *dst,
+               struct damos_quota *src)
+{
+       dst->total_charged_sz = src->total_charged_sz;
+       dst->total_charged_ns = src->total_charged_ns;
+       dst->charged_sz = src->charged_sz;
+       dst->charged_from = src->charged_from;
+       dst->charge_target_from = src->charge_target_from;
+       dst->charge_addr_from = src->charge_addr_from;
+}
+
  static int damon_lru_sort_apply_parameters(void)
  {
-       struct damos *scheme;
+       struct damos *scheme, *hot_scheme, *cold_scheme;
+       struct damos *old_hot_scheme = NULL, *old_cold_scheme = NULL;
         unsigned int hot_thres, cold_thres;
         int err = 0;
  
@@ -195,18 +207,35 @@ static int damon_lru_sort_apply_parameters(void)
         if (err)
                 return err;
  
+       damon_for_each_scheme(scheme, ctx) {
+               if (!old_hot_scheme) {
+                       old_hot_scheme = scheme;
+                       continue;
+               }
+               old_cold_scheme = scheme;
+       }
+
         hot_thres = damon_max_nr_accesses(&damon_lru_sort_mon_attrs) *
                 hot_thres_access_freq / 1000;
-       scheme = damon_lru_sort_new_hot_scheme(hot_thres);
-       if (!scheme)
+       hot_scheme = damon_lru_sort_new_hot_scheme(hot_thres);
+       if (!hot_scheme)
                 return -ENOMEM;
-       damon_set_schemes(ctx, &scheme, 1);
+       if (old_hot_scheme)
+               damon_lru_sort_copy_quota_status(&hot_scheme->quota,
+                               &old_hot_scheme->quota);
  
         cold_thres = cold_min_age / damon_lru_sort_mon_attrs.aggr_interval;
-       scheme = damon_lru_sort_new_cold_scheme(cold_thres);
-       if (!scheme)
+       cold_scheme = damon_lru_sort_new_cold_scheme(cold_thres);
+       if (!cold_scheme) {
+               damon_destroy_scheme(hot_scheme);
                 return -ENOMEM;
-       damon_add_scheme(ctx, scheme);
+       }
+       if (old_cold_scheme)
+               damon_lru_sort_copy_quota_status(&cold_scheme->quota,
+                               &old_cold_scheme->quota);
+
+       damon_set_schemes(ctx, &hot_scheme, 1);
+       damon_add_scheme(ctx, cold_scheme);
  
         return damon_set_region_biggest_system_ram_default(target,
                                         &monitor_region_start,
diff --git a/mm/damon/reclaim.c b/mm/damon/reclaim.c

index ab974e477d2f2850f642fbbafac48a8b3a5d136b..66e190f0374ac84b47100b8ba21fe4c32e104891 100644 (file)
--- a/mm/damon/reclaim.c
+++ b/mm/damon/reclaim.c
@@ -150,9 +150,20 @@ static struct damos *damon_reclaim_new_scheme(void)
                         &damon_reclaim_wmarks);
  }
  
+static void damon_reclaim_copy_quota_status(struct damos_quota *dst,
+               struct damos_quota *src)
+{
+       dst->total_charged_sz = src->total_charged_sz;
+       dst->total_charged_ns = src->total_charged_ns;
+       dst->charged_sz = src->charged_sz;
+       dst->charged_from = src->charged_from;
+       dst->charge_target_from = src->charge_target_from;
+       dst->charge_addr_from = src->charge_addr_from;
+}
+
  static int damon_reclaim_apply_parameters(void)
  {
-       struct damos *scheme;
+       struct damos *scheme, *old_scheme;
         struct damos_filter *filter;
         int err = 0;
  
@@ -164,6 +175,11 @@ static int damon_reclaim_apply_parameters(void)
         scheme = damon_reclaim_new_scheme();
         if (!scheme)
                 return -ENOMEM;
+       if (!list_empty(&ctx->schemes)) {
+               damon_for_each_scheme(old_scheme, ctx)
+                       damon_reclaim_copy_quota_status(&scheme->quota,
+                                       &old_scheme->quota);
+       }
         if (skip_anon) {
                 filter = damos_new_filter(DAMOS_FILTER_TYPE_ANON, true);
                 if (!filter) {
diff --git a/mm/damon/sysfs-schemes.c b/mm/damon/sysfs-schemes.c

index 8dbaac6e5c2d05dc4bc7c2b7545be26d508afd9a..ae0f0b314f3a9a5ec251021d0fb68d423fa53cd7 100644 (file)
--- a/mm/damon/sysfs-schemes.c
+++ b/mm/damon/sysfs-schemes.c
@@ -1905,6 +1905,10 @@ void damos_sysfs_set_quota_scores(struct damon_sysfs_schemes *sysfs_schemes,
         damon_for_each_scheme(scheme, ctx) {
                 struct damon_sysfs_scheme *sysfs_scheme;
  
+               /* user could have removed the scheme sysfs dir */
+               if (i >= sysfs_schemes->nr)
+                       break;
+
                 sysfs_scheme = sysfs_schemes->schemes_arr[i];
                 damos_sysfs_set_quota_score(sysfs_scheme->quotas->goals,
                                 &scheme->quota);
@@ -2194,7 +2198,7 @@ static void damos_tried_regions_init_upd_status(
                 sysfs_regions->upd_timeout_jiffies = jiffies +
                         2 * usecs_to_jiffies(scheme->apply_interval_us ?
                                         scheme->apply_interval_us :
-                                       ctx->attrs.sample_interval);
+                                       ctx->attrs.aggr_interval);
         }
  }
  
diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c

index 5662e29fe25335cf9e6227ae9fd4f22971095adb..65c19025da3dfee99ba0c1129874283bfcd5c72f 100644 (file)
--- a/mm/debug_vm_pgtable.c
+++ b/mm/debug_vm_pgtable.c
@@ -362,6 +362,12 @@ static void __init pud_advanced_tests(struct pgtable_debug_args *args)
         vaddr &= HPAGE_PUD_MASK;
  
         pud = pfn_pud(args->pud_pfn, args->page_prot);
+       /*
+        * Some architectures have debug checks to make sure
+        * huge pud mapping are only found with devmap entries
+        * For now test with only devmap entries.
+        */
+       pud = pud_mkdevmap(pud);
         set_pud_at(args->mm, vaddr, args->pudp, pud);
         flush_dcache_page(page);
         pudp_set_wrprotect(args->mm, vaddr, args->pudp);
@@ -374,6 +380,7 @@ static void __init pud_advanced_tests(struct pgtable_debug_args *args)
         WARN_ON(!pud_none(pud));
  #endif /* __PAGETABLE_PMD_FOLDED */
         pud = pfn_pud(args->pud_pfn, args->page_prot);
+       pud = pud_mkdevmap(pud);
         pud = pud_wrprotect(pud);
         pud = pud_mkclean(pud);
         set_pud_at(args->mm, vaddr, args->pudp, pud);
@@ -391,6 +398,7 @@ static void __init pud_advanced_tests(struct pgtable_debug_args *args)
  #endif /* __PAGETABLE_PMD_FOLDED */
  
         pud = pfn_pud(args->pud_pfn, args->page_prot);
+       pud = pud_mkdevmap(pud);
         pud = pud_mkyoung(pud);
         set_pud_at(args->mm, vaddr, args->pudp, pud);
         flush_dcache_page(page);
diff --git a/mm/filemap.c b/mm/filemap.c

index 750e779c23db74730fa7743c2307d1b996729d62..4a30de98a8c75daec31d1d79d15a9d9514e9fd1d 100644 (file)
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -4111,28 +4111,40 @@ static void filemap_cachestat(struct address_space *mapping,
  
         rcu_read_lock();
         xas_for_each(&xas, folio, last_index) {
+               int order;
                 unsigned long nr_pages;
                 pgoff_t folio_first_index, folio_last_index;
  
+               /*
+                * Don't deref the folio. It is not pinned, and might
+                * get freed (and reused) underneath us.
+                *
+                * We *could* pin it, but that would be expensive for
+                * what should be a fast and lightweight syscall.
+                *
+                * Instead, derive all information of interest from
+                * the rcu-protected xarray.
+                */
+
                 if (xas_retry(&xas, folio))
                         continue;
  
+               order = xa_get_order(xas.xa, xas.xa_index);
+               nr_pages = 1 << order;
+               folio_first_index = round_down(xas.xa_index, 1 << order);
+               folio_last_index = folio_first_index + nr_pages - 1;
+
+               /* Folios might straddle the range boundaries, only count covered pages */
+               if (folio_first_index < first_index)
+                       nr_pages -= first_index - folio_first_index;
+
+               if (folio_last_index > last_index)
+                       nr_pages -= folio_last_index - last_index;
+
                 if (xa_is_value(folio)) {
                         /* page is evicted */
                         void *shadow = (void *)folio;
                         bool workingset; /* not used */
-                       int order = xa_get_order(xas.xa, xas.xa_index);
-
-                       nr_pages = 1 << order;
-                       folio_first_index = round_down(xas.xa_index, 1 << order);
-                       folio_last_index = folio_first_index + nr_pages - 1;
-
-                       /* Folios might straddle the range boundaries, only count covered pages */
-                       if (folio_first_index < first_index)
-                               nr_pages -= first_index - folio_first_index;
-
-                       if (folio_last_index > last_index)
-                               nr_pages -= folio_last_index - last_index;
  
                         cs->nr_evicted += nr_pages;
  
@@ -4150,24 +4162,13 @@ static void filemap_cachestat(struct address_space *mapping,
                         goto resched;
                 }
  
-               nr_pages = folio_nr_pages(folio);
-               folio_first_index = folio_pgoff(folio);
-               folio_last_index = folio_first_index + nr_pages - 1;
-
-               /* Folios might straddle the range boundaries, only count covered pages */
-               if (folio_first_index < first_index)
-                       nr_pages -= first_index - folio_first_index;
-
-               if (folio_last_index > last_index)
-                       nr_pages -= folio_last_index - last_index;
-
                 /* page is in cache */
                 cs->nr_cache += nr_pages;
  
-               if (folio_test_dirty(folio))
+               if (xas_get_mark(&xas, PAGECACHE_TAG_DIRTY))
                         cs->nr_dirty += nr_pages;
  
-               if (folio_test_writeback(folio))
+               if (xas_get_mark(&xas, PAGECACHE_TAG_WRITEBACK))
                         cs->nr_writeback += nr_pages;
  
  resched:
diff --git a/mm/huge_memory.c b/mm/huge_memory.c

index 94ef5c02b459642f2625775bc66ca147cb2ac992..94c958f7ebb50dd925070157c0d0b2432dfc0483 100644 (file)
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -37,6 +37,7 @@
  #include <linux/page_owner.h>
  #include <linux/sched/sysctl.h>
  #include <linux/memory-tiers.h>
+#include <linux/compat.h>
  
  #include <asm/tlb.h>
  #include <asm/pgalloc.h>
@@ -809,7 +810,10 @@ static unsigned long __thp_get_unmapped_area(struct file *filp,
  {
         loff_t off_end = off + len;
         loff_t off_align = round_up(off, size);
-       unsigned long len_pad, ret;
+       unsigned long len_pad, ret, off_sub;
+
+       if (IS_ENABLED(CONFIG_32BIT) || in_compat_syscall())
+               return 0;
  
         if (off_end <= off_align || (off_end - off_align) < size)
                 return 0;
@@ -835,7 +839,13 @@ static unsigned long __thp_get_unmapped_area(struct file *filp,
         if (ret == addr)
                 return addr;
  
-       ret += (off - ret) & (size - 1);
+       off_sub = (off - ret) & (size - 1);
+
+       if (current->mm->get_unmapped_area == arch_get_unmapped_area_topdown &&
+           !off_sub)
+               return ret + size;
+
+       ret += off_sub;
         return ret;
  }
  
@@ -2437,7 +2447,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
                         page = pmd_page(old_pmd);
                         folio = page_folio(page);
                         if (!folio_test_dirty(folio) && pmd_dirty(old_pmd))
-                               folio_set_dirty(folio);
+                               folio_mark_dirty(folio);
                         if (!folio_test_referenced(folio) && pmd_young(old_pmd))
                                 folio_set_referenced(folio);
                         folio_remove_rmap_pmd(folio, page, vma);
@@ -3563,7 +3573,7 @@ int set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw,
         }
  
         if (pmd_dirty(pmdval))
-               folio_set_dirty(folio);
+               folio_mark_dirty(folio);
         if (pmd_write(pmdval))
                 entry = make_writable_migration_entry(page_to_pfn(page));
         else if (anon_exclusive)
diff --git a/mm/kasan/common.c b/mm/kasan/common.c

index 610efae912209472d818628778f1ebce5b0e2cc1..6ca63e8dda741b5e4094f7205f0b74a163be2e43 100644 (file)
--- a/mm/kasan/common.c
+++ b/mm/kasan/common.c
@@ -65,8 +65,7 @@ void kasan_save_track(struct kasan_track *track, gfp_t flags)
  {
         depot_stack_handle_t stack;
  
-       stack = kasan_save_stack(flags,
-                       STACK_DEPOT_FLAG_CAN_ALLOC | STACK_DEPOT_FLAG_GET);
+       stack = kasan_save_stack(flags, STACK_DEPOT_FLAG_CAN_ALLOC);
         kasan_set_track(track, stack);
  }
  
@@ -266,10 +265,9 @@ bool __kasan_slab_free(struct kmem_cache *cache, void *object,
                 return true;
  
         /*
-        * If the object is not put into quarantine, it will likely be quickly
-        * reallocated. Thus, release its metadata now.
+        * Note: Keep per-object metadata to allow KASAN print stack traces for
+        * use-after-free-before-realloc bugs.
          */
-       kasan_release_object_meta(cache, object);
  
         /* Let slab put the object onto the freelist. */
         return false;
diff --git a/mm/kasan/generic.c b/mm/kasan/generic.c

index df6627f62402c01dab04e6955bf80e7fb4b4b2ae..1900f857603456ec20c1f7bb0841362624c11260 100644 (file)
--- a/mm/kasan/generic.c
+++ b/mm/kasan/generic.c
@@ -485,16 +485,6 @@ void kasan_init_object_meta(struct kmem_cache *cache, const void *object)
         if (alloc_meta) {
                 /* Zero out alloc meta to mark it as invalid. */
                 __memset(alloc_meta, 0, sizeof(*alloc_meta));
-
-               /*
-                * Prepare the lock for saving auxiliary stack traces.
-                * Temporarily disable KASAN bug reporting to allow instrumented
-                * raw_spin_lock_init to access aux_lock, which resides inside
-                * of a redzone.
-                */
-               kasan_disable_current();
-               raw_spin_lock_init(&alloc_meta->aux_lock);
-               kasan_enable_current();
         }
  
         /*
@@ -506,47 +496,23 @@ void kasan_init_object_meta(struct kmem_cache *cache, const void *object)
  
  static void release_alloc_meta(struct kasan_alloc_meta *meta)
  {
-       /* Evict the stack traces from stack depot. */
-       stack_depot_put(meta->alloc_track.stack);
-       stack_depot_put(meta->aux_stack[0]);
-       stack_depot_put(meta->aux_stack[1]);
-
-       /*
-        * Zero out alloc meta to mark it as invalid but keep aux_lock
-        * initialized to avoid having to reinitialize it when another object
-        * is allocated in the same slot.
-        */
-       __memset(&meta->alloc_track, 0, sizeof(meta->alloc_track));
-       __memset(meta->aux_stack, 0, sizeof(meta->aux_stack));
+       /* Zero out alloc meta to mark it as invalid. */
+       __memset(meta, 0, sizeof(*meta));
  }
  
  static void release_free_meta(const void *object, struct kasan_free_meta *meta)
  {
+       if (!kasan_arch_is_ready())
+               return;
+
         /* Check if free meta is valid. */
         if (*(u8 *)kasan_mem_to_shadow(object) != KASAN_SLAB_FREE_META)
                 return;
  
-       /* Evict the stack trace from the stack depot. */
-       stack_depot_put(meta->free_track.stack);
-
         /* Mark free meta as invalid. */
         *(u8 *)kasan_mem_to_shadow(object) = KASAN_SLAB_FREE;
  }
  
-void kasan_release_object_meta(struct kmem_cache *cache, const void *object)
-{
-       struct kasan_alloc_meta *alloc_meta;
-       struct kasan_free_meta *free_meta;
-
-       alloc_meta = kasan_get_alloc_meta(cache, object);
-       if (alloc_meta)
-               release_alloc_meta(alloc_meta);
-
-       free_meta = kasan_get_free_meta(cache, object);
-       if (free_meta)
-               release_free_meta(object, free_meta);
-}
-
  size_t kasan_metadata_size(struct kmem_cache *cache, bool in_object)
  {
         struct kasan_cache *info = &cache->kasan_info;
@@ -571,8 +537,6 @@ static void __kasan_record_aux_stack(void *addr, depot_flags_t depot_flags)
         struct kmem_cache *cache;
         struct kasan_alloc_meta *alloc_meta;
         void *object;
-       depot_stack_handle_t new_handle, old_handle;
-       unsigned long flags;
  
         if (is_kfence_address(addr) || !slab)
                 return;
@@ -583,33 +547,18 @@ static void __kasan_record_aux_stack(void *addr, depot_flags_t depot_flags)
         if (!alloc_meta)
                 return;
  
-       new_handle = kasan_save_stack(0, depot_flags);
-
-       /*
-        * Temporarily disable KASAN bug reporting to allow instrumented
-        * spinlock functions to access aux_lock, which resides inside of a
-        * redzone.
-        */
-       kasan_disable_current();
-       raw_spin_lock_irqsave(&alloc_meta->aux_lock, flags);
-       old_handle = alloc_meta->aux_stack[1];
         alloc_meta->aux_stack[1] = alloc_meta->aux_stack[0];
-       alloc_meta->aux_stack[0] = new_handle;
-       raw_spin_unlock_irqrestore(&alloc_meta->aux_lock, flags);
-       kasan_enable_current();
-
-       stack_depot_put(old_handle);
+       alloc_meta->aux_stack[0] = kasan_save_stack(0, depot_flags);
  }
  
  void kasan_record_aux_stack(void *addr)
  {
-       return __kasan_record_aux_stack(addr,
-                       STACK_DEPOT_FLAG_CAN_ALLOC | STACK_DEPOT_FLAG_GET);
+       return __kasan_record_aux_stack(addr, STACK_DEPOT_FLAG_CAN_ALLOC);
  }
  
  void kasan_record_aux_stack_noalloc(void *addr)
  {
-       return __kasan_record_aux_stack(addr, STACK_DEPOT_FLAG_GET);
+       return __kasan_record_aux_stack(addr, 0);
  }
  
  void kasan_save_alloc_info(struct kmem_cache *cache, void *object, gfp_t flags)
@@ -620,7 +569,7 @@ void kasan_save_alloc_info(struct kmem_cache *cache, void *object, gfp_t flags)
         if (!alloc_meta)
                 return;
  
-       /* Evict previous stack traces (might exist for krealloc or mempool). */
+       /* Invalidate previous stack traces (might exist for krealloc or mempool). */
         release_alloc_meta(alloc_meta);
  
         kasan_save_track(&alloc_meta->alloc_track, flags);
@@ -634,7 +583,7 @@ void kasan_save_free_info(struct kmem_cache *cache, void *object)
         if (!free_meta)
                 return;
  
-       /* Evict previous stack trace (might exist for mempool). */
+       /* Invalidate previous stack trace (might exist for mempool). */
         release_free_meta(object, free_meta);
  
         kasan_save_track(&free_meta->free_track, 0);
diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h

index d0f172f2b9783f1b1e73ea82ed5d3e6aaf2bec75..fb2b9ac0659a7add8f4ca95b9dcdc38b937cd216 100644 (file)
--- a/mm/kasan/kasan.h
+++ b/mm/kasan/kasan.h
@@ -6,7 +6,6 @@
  #include <linux/kasan.h>
  #include <linux/kasan-tags.h>
  #include <linux/kfence.h>
-#include <linux/spinlock.h>
  #include <linux/stackdepot.h>
  
  #if defined(CONFIG_KASAN_SW_TAGS) || defined(CONFIG_KASAN_HW_TAGS)
@@ -265,13 +264,6 @@ struct kasan_global {
  struct kasan_alloc_meta {
         struct kasan_track alloc_track;
         /* Free track is stored in kasan_free_meta. */
-       /*
-        * aux_lock protects aux_stack from accesses from concurrent
-        * kasan_record_aux_stack calls. It is a raw spinlock to avoid sleeping
-        * on RT kernels, as kasan_record_aux_stack_noalloc can be called from
-        * non-sleepable contexts.
-        */
-       raw_spinlock_t aux_lock;
         depot_stack_handle_t aux_stack[2];
  };
  
@@ -398,10 +390,8 @@ struct kasan_alloc_meta *kasan_get_alloc_meta(struct kmem_cache *cache,
  struct kasan_free_meta *kasan_get_free_meta(struct kmem_cache *cache,
                                                 const void *object);
  void kasan_init_object_meta(struct kmem_cache *cache, const void *object);
-void kasan_release_object_meta(struct kmem_cache *cache, const void *object);
  #else
  static inline void kasan_init_object_meta(struct kmem_cache *cache, const void *object) { }
-static inline void kasan_release_object_meta(struct kmem_cache *cache, const void *object) { }
  #endif
  
  depot_stack_handle_t kasan_save_stack(gfp_t flags, depot_flags_t depot_flags);
diff --git a/mm/kasan/quarantine.c b/mm/kasan/quarantine.c

index 3ba02efb952aac15b4e511ad985caae0d0935bac..6958aa713c67ee7b0d2c74af676ecb82788d22cd 100644 (file)
--- a/mm/kasan/quarantine.c
+++ b/mm/kasan/quarantine.c
@@ -145,7 +145,10 @@ static void qlink_free(struct qlist_node *qlink, struct kmem_cache *cache)
         void *object = qlink_to_object(qlink, cache);
         struct kasan_free_meta *free_meta = kasan_get_free_meta(cache, object);
  
-       kasan_release_object_meta(cache, object);
+       /*
+        * Note: Keep per-object metadata to allow KASAN print stack traces for
+        * use-after-free-before-realloc bugs.
+        */
  
         /*
          * If init_on_free is enabled and KASAN's free metadata is stored in
diff --git a/mm/madvise.c b/mm/madvise.c

index 912155a94ed5871c1805f33ec624c7c7c1ee28c8..cfa5e7288261189cb8242e5a0367fe6ffeebca12 100644 (file)
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -429,6 +429,7 @@ restart:
                 if (++batch_count == SWAP_CLUSTER_MAX) {
                         batch_count = 0;
                         if (need_resched()) {
+                               arch_leave_lazy_mmu_mode();
                                 pte_unmap_unlock(start_pte, ptl);
                                 cond_resched();
                                 goto restart;
diff --git a/mm/memblock.c b/mm/memblock.c

index 4dcb2ee35eca856a43694f4402dea0c1c9bf6d8a..d09136e040d3cc37b1b7b74a7cdd5d2c92eb8cb7 100644 (file)
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -180,8 +180,9 @@ static inline phys_addr_t memblock_cap_size(phys_addr_t base, phys_addr_t *size)
  /*
   * Address comparison utilities
   */
-static unsigned long __init_memblock memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1,
-                                      phys_addr_t base2, phys_addr_t size2)
+unsigned long __init_memblock
+memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1, phys_addr_t base2,
+                      phys_addr_t size2)
  {
         return ((base1 < (base2 + size2)) && (base2 < (base1 + size1)));
  }
@@ -2249,6 +2250,7 @@ static const char * const flagname[] = {
         [ilog2(MEMBLOCK_MIRROR)] = "MIRROR",
         [ilog2(MEMBLOCK_NOMAP)] = "NOMAP",
         [ilog2(MEMBLOCK_DRIVER_MANAGED)] = "DRV_MNG",
+       [ilog2(MEMBLOCK_RSRV_NOINIT)] = "RSV_NIT",
  };
  
  static int memblock_debug_show(struct seq_file *m, void *private)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c

index e4c8735e7c85cf061a2ab31c9be250934c680879..61932c9215e7734e4dfc7dc6e427c3692d1c3c6f 100644 (file)
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -621,6 +621,15 @@ static inline int memcg_events_index(enum vm_event_item idx)
  }
  
  struct memcg_vmstats_percpu {
+       /* Stats updates since the last flush */
+       unsigned int                    stats_updates;
+
+       /* Cached pointers for fast iteration in memcg_rstat_updated() */
+       struct memcg_vmstats_percpu     *parent;
+       struct memcg_vmstats            *vmstats;
+
+       /* The above should fit a single cacheline for memcg_rstat_updated() */
+
         /* Local (CPU and cgroup) page state & events */
         long                    state[MEMCG_NR_STAT];
         unsigned long           events[NR_MEMCG_EVENTS];
@@ -632,10 +641,7 @@ struct memcg_vmstats_percpu {
         /* Cgroup1: threshold notifications & softlimit tree updates */
         unsigned long           nr_page_events;
         unsigned long           targets[MEM_CGROUP_NTARGETS];
-
-       /* Stats updates since the last flush */
-       unsigned int            stats_updates;
-};
+} ____cacheline_aligned;
  
  struct memcg_vmstats {
         /* Aggregated (CPU and subtree) page state & events */
@@ -698,36 +704,35 @@ static void memcg_stats_unlock(void)
  }
  
  
-static bool memcg_should_flush_stats(struct mem_cgroup *memcg)
+static bool memcg_vmstats_needs_flush(struct memcg_vmstats *vmstats)
  {
-       return atomic64_read(&memcg->vmstats->stats_updates) >
+       return atomic64_read(&vmstats->stats_updates) >
                 MEMCG_CHARGE_BATCH * num_online_cpus();
  }
  
  static inline void memcg_rstat_updated(struct mem_cgroup *memcg, int val)
  {
+       struct memcg_vmstats_percpu *statc;
         int cpu = smp_processor_id();
-       unsigned int x;
  
         if (!val)
                 return;
  
         cgroup_rstat_updated(memcg->css.cgroup, cpu);
-
-       for (; memcg; memcg = parent_mem_cgroup(memcg)) {
-               x = __this_cpu_add_return(memcg->vmstats_percpu->stats_updates,
-                                         abs(val));
-
-               if (x < MEMCG_CHARGE_BATCH)
+       statc = this_cpu_ptr(memcg->vmstats_percpu);
+       for (; statc; statc = statc->parent) {
+               statc->stats_updates += abs(val);
+               if (statc->stats_updates < MEMCG_CHARGE_BATCH)
                         continue;
  
                 /*
                  * If @memcg is already flush-able, increasing stats_updates is
                  * redundant. Avoid the overhead of the atomic update.
                  */
-               if (!memcg_should_flush_stats(memcg))
-                       atomic64_add(x, &memcg->vmstats->stats_updates);
-               __this_cpu_write(memcg->vmstats_percpu->stats_updates, 0);
+               if (!memcg_vmstats_needs_flush(statc->vmstats))
+                       atomic64_add(statc->stats_updates,
+                                    &statc->vmstats->stats_updates);
+               statc->stats_updates = 0;
         }
  }
  
@@ -756,7 +761,7 @@ void mem_cgroup_flush_stats(struct mem_cgroup *memcg)
         if (!memcg)
                 memcg = root_mem_cgroup;
  
-       if (memcg_should_flush_stats(memcg))
+       if (memcg_vmstats_needs_flush(memcg->vmstats))
                 do_flush_stats(memcg);
  }
  
@@ -770,7 +775,7 @@ void mem_cgroup_flush_stats_ratelimited(struct mem_cgroup *memcg)
  static void flush_memcg_stats_dwork(struct work_struct *w)
  {
         /*
-        * Deliberately ignore memcg_should_flush_stats() here so that flushing
+        * Deliberately ignore memcg_vmstats_needs_flush() here so that flushing
          * in latency-sensitive paths is as cheap as possible.
          */
         do_flush_stats(root_mem_cgroup);
@@ -2623,8 +2628,9 @@ static unsigned long calculate_high_delay(struct mem_cgroup *memcg,
  }
  
  /*
- * Scheduled by try_charge() to be executed from the userland return path
- * and reclaims memory over the high limit.
+ * Reclaims memory over the high limit. Called directly from
+ * try_charge() (context permitting), as well as from the userland
+ * return path where reclaim is always able to block.
   */
  void mem_cgroup_handle_over_high(gfp_t gfp_mask)
  {
@@ -2643,6 +2649,17 @@ void mem_cgroup_handle_over_high(gfp_t gfp_mask)
         current->memcg_nr_pages_over_high = 0;
  
  retry_reclaim:
+       /*
+        * Bail if the task is already exiting. Unlike memory.max,
+        * memory.high enforcement isn't as strict, and there is no
+        * OOM killer involved, which means the excess could already
+        * be much bigger (and still growing) than it could for
+        * memory.max; the dying task could get stuck in fruitless
+        * reclaim for a long time, which isn't desirable.
+        */
+       if (task_is_dying())
+               goto out;
+
         /*
          * The allocating task should reclaim at least the batch size, but for
          * subsequent retries we only want to do what's necessary to prevent oom
@@ -2693,6 +2710,9 @@ retry_reclaim:
         }
  
         /*
+        * Reclaim didn't manage to push usage below the limit, slow
+        * this allocating task down.
+        *
          * If we exit early, we're guaranteed to die (since
          * schedule_timeout_killable sets TASK_KILLABLE). This means we don't
          * need to account for any ill-begotten jiffies to pay them off later.
@@ -2887,11 +2907,17 @@ done_restock:
                 }
         } while ((memcg = parent_mem_cgroup(memcg)));
  
+       /*
+        * Reclaim is set up above to be called from the userland
+        * return path. But also attempt synchronous reclaim to avoid
+        * excessive overrun while the task is still inside the
+        * kernel. If this is successful, the return path will see it
+        * when it rechecks the overage and simply bail out.
+        */
         if (current->memcg_nr_pages_over_high > MEMCG_CHARGE_BATCH &&
             !(current->flags & PF_MEMALLOC) &&
-           gfpflags_allow_blocking(gfp_mask)) {
+           gfpflags_allow_blocking(gfp_mask))
                 mem_cgroup_handle_over_high(gfp_mask);
-       }
         return 0;
  }
  
@@ -5456,10 +5482,11 @@ static void mem_cgroup_free(struct mem_cgroup *memcg)
         __mem_cgroup_free(memcg);
  }
  
-static struct mem_cgroup *mem_cgroup_alloc(void)
+static struct mem_cgroup *mem_cgroup_alloc(struct mem_cgroup *parent)
  {
+       struct memcg_vmstats_percpu *statc, *pstatc;
         struct mem_cgroup *memcg;
-       int node;
+       int node, cpu;
         int __maybe_unused i;
         long error = -ENOMEM;
  
@@ -5483,6 +5510,14 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
         if (!memcg->vmstats_percpu)
                 goto fail;
  
+       for_each_possible_cpu(cpu) {
+               if (parent)
+                       pstatc = per_cpu_ptr(parent->vmstats_percpu, cpu);
+               statc = per_cpu_ptr(memcg->vmstats_percpu, cpu);
+               statc->parent = parent ? pstatc : NULL;
+               statc->vmstats = memcg->vmstats;
+       }
+
         for_each_node(node)
                 if (alloc_mem_cgroup_per_node_info(memcg, node))
                         goto fail;
@@ -5528,7 +5563,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
         struct mem_cgroup *memcg, *old_memcg;
  
         old_memcg = set_active_memcg(parent);
-       memcg = mem_cgroup_alloc();
+       memcg = mem_cgroup_alloc(parent);
         set_active_memcg(old_memcg);
         if (IS_ERR(memcg))
                 return ERR_CAST(memcg);
@@ -7936,9 +7971,13 @@ bool mem_cgroup_swap_full(struct folio *folio)
  
  static int __init setup_swap_account(char *s)
  {
-       pr_warn_once("The swapaccount= commandline option is deprecated. "
-                    "Please report your usecase to linux-mm@kvack.org if you "
-                    "depend on this functionality.\n");
+       bool res;
+
+       if (!kstrtobool(s, &res) && !res)
+               pr_warn_once("The swapaccount=0 commandline option is deprecated "
+                            "in favor of configuring swap control via cgroupfs. "
+                            "Please report your usecase to linux-mm@kvack.org if you "
+                            "depend on this functionality.\n");
         return 1;
  }
  __setup("swapaccount=", setup_swap_account);
diff --git a/mm/memory-failure.c b/mm/memory-failure.c

index 4f9b61f4a6682a530a202d02a6998c0b687906dd..9349948f1abfd120977706bbda23456999f057bc 100644 (file)
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -982,7 +982,7 @@ static bool has_extra_refcount(struct page_state *ps, struct page *p,
         int count = page_count(p) - 1;
  
         if (extra_pins)
-               count -= 1;
+               count -= folio_nr_pages(page_folio(p));
  
         if (count > 0) {
                 pr_err("%#lx: %s still referenced by %d users\n",
@@ -1377,6 +1377,9 @@ void ClearPageHWPoisonTakenOff(struct page *page)
   */
  static inline bool HWPoisonHandlable(struct page *page, unsigned long flags)
  {
+       if (PageSlab(page))
+               return false;
+
         /* Soft offline could migrate non-LRU movable pages */
         if ((flags & MF_SOFT_OFFLINE) && __PageMovable(page))
                 return true;
diff --git a/mm/memory.c b/mm/memory.c

index 7e1f4849463aa3645a0eead97f40a90caf5e6d5f..0bfc8b007c01a3323a15a17d51c4da46a6207540 100644 (file)
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1464,7 +1464,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
                         delay_rmap = 0;
                         if (!folio_test_anon(folio)) {
                                 if (pte_dirty(ptent)) {
-                                       folio_set_dirty(folio);
+                                       folio_mark_dirty(folio);
                                         if (tlb_delay_rmap(tlb)) {
                                                 delay_rmap = 1;
                                                 force_flush = 1;
@@ -3799,6 +3799,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
         struct page *page;
         struct swap_info_struct *si = NULL;
         rmap_t rmap_flags = RMAP_NONE;
+       bool need_clear_cache = false;
         bool exclusive = false;
         swp_entry_t entry;
         pte_t pte;
@@ -3867,6 +3868,20 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
         if (!folio) {
                 if (data_race(si->flags & SWP_SYNCHRONOUS_IO) &&
                     __swap_count(entry) == 1) {
+                       /*
+                        * Prevent parallel swapin from proceeding with
+                        * the cache flag. Otherwise, another thread may
+                        * finish swapin first, free the entry, and swapout
+                        * reusing the same entry. It's undetectable as
+                        * pte_same() returns true due to entry reuse.
+                        */
+                       if (swapcache_prepare(entry)) {
+                               /* Relax a bit to prevent rapid repeated page faults */
+                               schedule_timeout_uninterruptible(1);
+                               goto out;
+                       }
+                       need_clear_cache = true;
+
                         /* skip swapcache */
                         folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0,
                                                 vma, vmf->address, false);
@@ -4117,6 +4132,9 @@ unlock:
         if (vmf->pte)
                 pte_unmap_unlock(vmf->pte, vmf->ptl);
  out:
+       /* Clear the swap cache pin for direct swapin after PTL unlock */
+       if (need_clear_cache)
+               swapcache_clear(si, entry);
         if (si)
                 put_swap_device(si);
         return ret;
@@ -4131,6 +4149,8 @@ out_release:
                 folio_unlock(swapcache);
                 folio_put(swapcache);
         }
+       if (need_clear_cache)
+               swapcache_clear(si, entry);
         if (si)
                 put_swap_device(si);
         return ret;
@@ -5478,7 +5498,7 @@ static inline bool get_mmap_lock_carefully(struct mm_struct *mm, struct pt_regs
                 return true;
  
         if (regs && !user_mode(regs)) {
-               unsigned long ip = instruction_pointer(regs);
+               unsigned long ip = exception_ip(regs);
                 if (!search_exception_tables(ip))
                         return false;
         }
@@ -5503,7 +5523,7 @@ static inline bool upgrade_mmap_lock_carefully(struct mm_struct *mm, struct pt_r
  {
         mmap_read_unlock(mm);
         if (regs && !user_mode(regs)) {
-               unsigned long ip = instruction_pointer(regs);
+               unsigned long ip = exception_ip(regs);
                 if (!search_exception_tables(ip))
                         return false;
         }
diff --git a/mm/migrate.c b/mm/migrate.c

index cc9f2bcd73b492aebacab4b812a515cf7e70b92b..c27b1f8097d4a72e569ce5a06be42b93184e9db0 100644 (file)
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -2519,6 +2519,14 @@ static int numamigrate_isolate_folio(pg_data_t *pgdat, struct folio *folio)
                         if (managed_zone(pgdat->node_zones + z))
                                 break;
                 }
+
+               /*
+                * If there are no managed zones, it should not proceed
+                * further.
+                */
+               if (z < 0)
+                       return 0;
+
                 wakeup_kswapd(pgdat->node_zones + z, 0,
                               folio_order(folio), ZONE_MOVABLE);
                 return 0;
diff --git a/mm/mmap.c b/mm/mmap.c

index b78e83d351d2864a6a339059ac734b6602eb5824..d89770eaab6b6111117783ca7ff532871c1d71a5 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1825,15 +1825,17 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
                 /*
                  * mmap_region() will call shmem_zero_setup() to create a file,
                  * so use shmem's get_unmapped_area in case it can be huge.
-                * do_mmap() will clear pgoff, so match alignment.
                  */
-               pgoff = 0;
                 get_area = shmem_get_unmapped_area;
         } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
                 /* Ensures that larger anonymous mappings are THP aligned. */
                 get_area = thp_get_unmapped_area;
         }
  
+       /* Always treat pgoff as zero for anonymous memory. */
+       if (!file)
+               pgoff = 0;
+
         addr = get_area(file, addr, len, pgoff, flags);
         if (IS_ERR_VALUE(addr))
                 return addr;
diff --git a/mm/page-writeback.c b/mm/page-writeback.c

index cd4e4ae77c40ae0497efeaa8fb391f6550e51a4b..3f255534986a2fda07e2d35187bb385f64749c5c 100644 (file)
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1638,7 +1638,7 @@ static inline void wb_dirty_limits(struct dirty_throttle_control *dtc)
          */
         dtc->wb_thresh = __wb_calc_thresh(dtc);
         dtc->wb_bg_thresh = dtc->thresh ?
-               div_u64((u64)dtc->wb_thresh * dtc->bg_thresh, dtc->thresh) : 0;
+               div64_u64(dtc->wb_thresh * dtc->bg_thresh, dtc->thresh) : 0;
  
         /*
          * In order to avoid the stacked BDI deadlock we need
@@ -1921,7 +1921,7 @@ pause:
                         break;
                 }
                 __set_current_state(TASK_KILLABLE);
-               wb->dirty_sleep = now;
+               bdi->last_bdp_sleep = jiffies;
                 io_schedule_timeout(pause);
  
                 current->dirty_paused_when = now + pause;
diff --git a/mm/readahead.c b/mm/readahead.c

index 23620c57c1225bef9e3e1193a7163c36a916951f..2648ec4f04947b2e837377da68d7b8ae1fd48f7a 100644 (file)
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -469,7 +469,7 @@ static inline int ra_alloc_folio(struct readahead_control *ractl, pgoff_t index,
  
         if (!folio)
                 return -ENOMEM;
-       mark = round_up(mark, 1UL << order);
+       mark = round_down(mark, 1UL << order);
         if (index == mark)
                 folio_set_readahead(folio);
         err = filemap_add_folio(ractl->mapping, folio, index, gfp);
@@ -575,7 +575,7 @@ static void ondemand_readahead(struct readahead_control *ractl,
          * It's the expected callback index, assume sequential access.
          * Ramp up sizes, and push forward the readahead window.
          */
-       expected = round_up(ra->start + ra->size - ra->async_size,
+       expected = round_down(ra->start + ra->size - ra->async_size,
                         1UL << order);
         if (index == expected || index == (ra->start + ra->size)) {
                 ra->start += ra->size;
diff --git a/mm/swap.h b/mm/swap.h

index 758c46ca671ed110ae8e25fad48196d3feed03dc..fc2f6ade7f80b399707bcc67c44f813aea0b846d 100644 (file)
--- a/mm/swap.h
+++ b/mm/swap.h
@@ -41,6 +41,7 @@ void __delete_from_swap_cache(struct folio *folio,
  void delete_from_swap_cache(struct folio *folio);
  void clear_shadow_from_swap_cache(int type, unsigned long begin,
                                   unsigned long end);
+void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry);
  struct folio *swap_cache_get_folio(swp_entry_t entry,
                 struct vm_area_struct *vma, unsigned long addr);
  struct folio *filemap_get_incore_folio(struct address_space *mapping,
@@ -97,6 +98,10 @@ static inline int swap_writepage(struct page *p, struct writeback_control *wbc)
         return 0;
  }
  
+static inline void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry)
+{
+}
+
  static inline struct folio *swap_cache_get_folio(swp_entry_t entry,
                 struct vm_area_struct *vma, unsigned long addr)
  {
diff --git a/mm/swap_state.c b/mm/swap_state.c

index e671266ad77241f461a17cbb2e486fe48a423f69..7255c01a1e4e16d758186019f904e70a7890a5cc 100644 (file)
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -680,9 +680,10 @@ skip:
         /* The page was likely read above, so no need for plugging here */
         folio = __read_swap_cache_async(entry, gfp_mask, mpol, ilx,
                                         &page_allocated, false);
-       if (unlikely(page_allocated))
+       if (unlikely(page_allocated)) {
+               zswap_folio_swapin(folio);
                 swap_read_folio(folio, false, NULL);
-       zswap_folio_swapin(folio);
+       }
         return folio;
  }
  
@@ -855,9 +856,10 @@ skip:
         /* The folio was likely read above, so no need for plugging here */
         folio = __read_swap_cache_async(targ_entry, gfp_mask, mpol, targ_ilx,
                                         &page_allocated, false);
-       if (unlikely(page_allocated))
+       if (unlikely(page_allocated)) {
+               zswap_folio_swapin(folio);
                 swap_read_folio(folio, false, NULL);
-       zswap_folio_swapin(folio);
+       }
         return folio;
  }
  
diff --git a/mm/swapfile.c b/mm/swapfile.c

index 556ff7347d5f04402b61cc5bd9d0d123a36dc1d5..746aa9da530255035b4624fefff862d416af836d 100644 (file)
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -3365,6 +3365,19 @@ int swapcache_prepare(swp_entry_t entry)
         return __swap_duplicate(entry, SWAP_HAS_CACHE);
  }
  
+void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry)
+{
+       struct swap_cluster_info *ci;
+       unsigned long offset = swp_offset(entry);
+       unsigned char usage;
+
+       ci = lock_cluster_or_swap_info(si, offset);
+       usage = __swap_entry_free_locked(si, offset, SWAP_HAS_CACHE);
+       unlock_cluster_or_swap_info(si, ci);
+       if (!usage)
+               free_swap_slot(entry);
+}
+
  struct swap_info_struct *swp_swap_info(swp_entry_t entry)
  {
         return swap_type_to_swap_info(swp_type(entry));
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c

index 20e3b0d9cf7ed0d59d86a11b2472f0e138160692..7cf7d43842590ccd99bf37795918a7054b61a8c4 100644 (file)
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -357,6 +357,7 @@ static __always_inline ssize_t mfill_atomic_hugetlb(
                                               unsigned long dst_start,
                                               unsigned long src_start,
                                               unsigned long len,
+                                             atomic_t *mmap_changing,
                                               uffd_flags_t flags)
  {
         struct mm_struct *dst_mm = dst_vma->vm_mm;
@@ -472,6 +473,15 @@ retry:
                                 goto out;
                         }
                         mmap_read_lock(dst_mm);
+                       /*
+                        * If memory mappings are changing because of non-cooperative
+                        * operation (e.g. mremap) running in parallel, bail out and
+                        * request the user to retry later
+                        */
+                       if (mmap_changing && atomic_read(mmap_changing)) {
+                               err = -EAGAIN;
+                               break;
+                       }
  
                         dst_vma = NULL;
                         goto retry;
@@ -506,6 +516,7 @@ extern ssize_t mfill_atomic_hugetlb(struct vm_area_struct *dst_vma,
                                     unsigned long dst_start,
                                     unsigned long src_start,
                                     unsigned long len,
+                                   atomic_t *mmap_changing,
                                     uffd_flags_t flags);
  #endif /* CONFIG_HUGETLB_PAGE */
  
@@ -622,8 +633,8 @@ retry:
          * If this is a HUGETLB vma, pass off to appropriate routine
          */
         if (is_vm_hugetlb_page(dst_vma))
-               return  mfill_atomic_hugetlb(dst_vma, dst_start,
-                                            src_start, len, flags);
+               return  mfill_atomic_hugetlb(dst_vma, dst_start, src_start,
+                                            len, mmap_changing, flags);
  
         if (!vma_is_anonymous(dst_vma) && !vma_is_shmem(dst_vma))
                 goto out_unlock;
@@ -891,8 +902,8 @@ static int move_present_pte(struct mm_struct *mm,
  
         double_pt_lock(dst_ptl, src_ptl);
  
-       if (!pte_same(*src_pte, orig_src_pte) ||
-           !pte_same(*dst_pte, orig_dst_pte)) {
+       if (!pte_same(ptep_get(src_pte), orig_src_pte) ||
+           !pte_same(ptep_get(dst_pte), orig_dst_pte)) {
                 err = -EAGAIN;
                 goto out;
         }
@@ -935,8 +946,8 @@ static int move_swap_pte(struct mm_struct *mm,
  
         double_pt_lock(dst_ptl, src_ptl);
  
-       if (!pte_same(*src_pte, orig_src_pte) ||
-           !pte_same(*dst_pte, orig_dst_pte)) {
+       if (!pte_same(ptep_get(src_pte), orig_src_pte) ||
+           !pte_same(ptep_get(dst_pte), orig_dst_pte)) {
                 double_pt_unlock(dst_ptl, src_ptl);
                 return -EAGAIN;
         }
@@ -1005,7 +1016,7 @@ retry:
         }
  
         spin_lock(dst_ptl);
-       orig_dst_pte = *dst_pte;
+       orig_dst_pte = ptep_get(dst_pte);
         spin_unlock(dst_ptl);
         if (!pte_none(orig_dst_pte)) {
                 err = -EEXIST;
@@ -1013,7 +1024,7 @@ retry:
         }
  
         spin_lock(src_ptl);
-       orig_src_pte = *src_pte;
+       orig_src_pte = ptep_get(src_pte);
         spin_unlock(src_ptl);
         if (pte_none(orig_src_pte)) {
                 if (!(mode & UFFDIO_MOVE_MODE_ALLOW_SRC_HOLES))
@@ -1043,7 +1054,7 @@ retry:
                          * page isn't freed under us
                          */
                         spin_lock(src_ptl);
-                       if (!pte_same(orig_src_pte, *src_pte)) {
+                       if (!pte_same(orig_src_pte, ptep_get(src_pte))) {
                                 spin_unlock(src_ptl);
                                 err = -EAGAIN;
                                 goto out;
diff --git a/mm/zswap.c b/mm/zswap.c

index ca25b676048ea6d0b399661e9ebca137585f8dbd..db4625af65fb7f6655a057e145bbe20dd64f7ae9 100644 (file)
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -377,10 +377,9 @@ void zswap_folio_swapin(struct folio *folio)
  {
         struct lruvec *lruvec;
  
-       if (folio) {
-               lruvec = folio_lruvec(folio);
-               atomic_long_inc(&lruvec->zswap_lruvec_state.nr_zswap_protected);
-       }
+       VM_WARN_ON_ONCE(!folio_test_locked(folio));
+       lruvec = folio_lruvec(folio);
+       atomic_long_inc(&lruvec->zswap_lruvec_state.nr_zswap_protected);
  }
  
  /*********************************
@@ -536,10 +535,6 @@ static struct zpool *zswap_find_zpool(struct zswap_entry *entry)
   */
  static void zswap_free_entry(struct zswap_entry *entry)
  {
-       if (entry->objcg) {
-               obj_cgroup_uncharge_zswap(entry->objcg, entry->length);
-               obj_cgroup_put(entry->objcg);
-       }
         if (!entry->length)
                 atomic_dec(&zswap_same_filled_pages);
         else {
@@ -548,6 +543,10 @@ static void zswap_free_entry(struct zswap_entry *entry)
                 atomic_dec(&entry->pool->nr_stored);
                 zswap_pool_put(entry->pool);
         }
+       if (entry->objcg) {
+               obj_cgroup_uncharge_zswap(entry->objcg, entry->length);
+               obj_cgroup_put(entry->objcg);
+       }
         zswap_entry_cache_free(entry);
         atomic_dec(&zswap_stored_pages);
         zswap_update_total_size();
@@ -895,10 +894,8 @@ static enum lru_status shrink_memcg_cb(struct list_head *item, struct list_lru_o
                  * into the warmer region. We should terminate shrinking (if we're in the dynamic
                  * shrinker context).
                  */
-               if (writeback_result == -EEXIST && encountered_page_in_swapcache) {
-                       ret = LRU_SKIP;
+               if (writeback_result == -EEXIST && encountered_page_in_swapcache)
                         *encountered_page_in_swapcache = true;
-               }
  
                 goto put_unlock;
         }
@@ -1442,6 +1439,8 @@ static int zswap_writeback_entry(struct zswap_entry *entry,
         if (zswap_rb_search(&tree->rbroot, swp_offset(entry->swpentry)) != entry) {
                 spin_unlock(&tree->lock);
                 delete_from_swap_cache(folio);
+               folio_unlock(folio);
+               folio_put(folio);
                 return -ENOMEM;
         }
         spin_unlock(&tree->lock);
@@ -1519,7 +1518,7 @@ bool zswap_store(struct folio *folio)
         if (folio_test_large(folio))
                 return false;
  
-       if (!zswap_enabled || !tree)
+       if (!tree)
                 return false;
  
         /*
@@ -1534,6 +1533,10 @@ bool zswap_store(struct folio *folio)
                 zswap_invalidate_entry(tree, dupentry);
         }
         spin_unlock(&tree->lock);
+
+       if (!zswap_enabled)
+               return false;
+
         objcg = get_obj_cgroup_from_folio(folio);
         if (objcg && !obj_cgroup_may_zswap(objcg)) {
                 memcg = get_mem_cgroup_from_objcg(objcg);
diff --git a/net/6lowpan/core.c b/net/6lowpan/core.c

index 7b3341cef926ef37ce84c7dd09301c84c0a103c6..850d4a185f55f87f70a5e0a5112d1ab20d3eb070 100644 (file)
--- a/net/6lowpan/core.c
+++ b/net/6lowpan/core.c
@@ -179,4 +179,5 @@ static void __exit lowpan_module_exit(void)
  module_init(lowpan_module_init);
  module_exit(lowpan_module_exit);
  
+MODULE_DESCRIPTION("IPv6 over Low-Power Wireless Personal Area Network core module");
  MODULE_LICENSE("GPL");
diff --git a/net/atm/mpc.c b/net/atm/mpc.c

index 033871e718a34f7430929f862fcbcc886d933622..324e3ab96bb393d815901bb2d221ef6e18cf25e4 100644 (file)
--- a/net/atm/mpc.c
+++ b/net/atm/mpc.c
@@ -1532,4 +1532,5 @@ static void __exit atm_mpoa_cleanup(void)
  module_init(atm_mpoa_init);
  module_exit(atm_mpoa_cleanup);
  
+MODULE_DESCRIPTION("Multi-Protocol Over ATM (MPOA) driver");
  MODULE_LICENSE("GPL");
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c

index d982daea832927d38474f8d46764a82f87a09659..14088c4ff2f66f9049858598f30dc0069c27fb70 100644 (file)
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -2175,6 +2175,7 @@ void batadv_mcast_free(struct batadv_priv *bat_priv)
         cancel_delayed_work_sync(&bat_priv->mcast.work);
  
         batadv_tvlv_container_unregister(bat_priv, BATADV_TVLV_MCAST, 2);
+       batadv_tvlv_handler_unregister(bat_priv, BATADV_TVLV_MCAST_TRACKER, 1);
         batadv_tvlv_handler_unregister(bat_priv, BATADV_TVLV_MCAST, 2);
  
         /* safely calling outside of worker, as worker was canceled above */
@@ -2198,6 +2199,8 @@ void batadv_mcast_purge_orig(struct batadv_orig_node *orig)
                                       BATADV_MCAST_WANT_NO_RTR4);
         batadv_mcast_want_rtr6_update(bat_priv, orig,
                                       BATADV_MCAST_WANT_NO_RTR6);
+       batadv_mcast_have_mc_ptype_update(bat_priv, orig,
+                                         BATADV_MCAST_HAVE_MC_PTYPE_CAPA);
  
         spin_unlock_bh(&orig->mcast_handler_lock);
  }
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c

index 65601aa52e0d8b669ac8aaec116301398a5e865b..2821a42cefdc6e0f83fa4a765bc881a67795a2b5 100644 (file)
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1049,6 +1049,7 @@ static void hci_error_reset(struct work_struct *work)
  {
         struct hci_dev *hdev = container_of(work, struct hci_dev, error_reset);
  
+       hci_dev_hold(hdev);
         BT_DBG("%s", hdev->name);
  
         if (hdev->hw_error)
@@ -1056,10 +1057,10 @@ static void hci_error_reset(struct work_struct *work)
         else
                 bt_dev_err(hdev, "hardware error 0x%2.2x", hdev->hw_error_code);
  
-       if (hci_dev_do_close(hdev))
-               return;
+       if (!hci_dev_do_close(hdev))
+               hci_dev_do_open(hdev);
  
-       hci_dev_do_open(hdev);
+       hci_dev_put(hdev);
  }
  
  void hci_uuids_clear(struct hci_dev *hdev)
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c

index ef8c3bed73617efa01052f7c84f170bee4666eef..2a5f5a7d2412be4aef32e8bfeb69cab0f6ad4fec 100644 (file)
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -5329,9 +5329,12 @@ static void hci_io_capa_request_evt(struct hci_dev *hdev, void *data,
         hci_dev_lock(hdev);
  
         conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr);
-       if (!conn || !hci_conn_ssp_enabled(conn))
+       if (!conn || !hci_dev_test_flag(hdev, HCI_SSP_ENABLED))
                 goto unlock;
  
+       /* Assume remote supports SSP since it has triggered this event */
+       set_bit(HCI_CONN_SSP_ENABLED, &conn->flags);
+
         hci_conn_hold(conn);
  
         if (!hci_dev_test_flag(hdev, HCI_MGMT))
@@ -6794,6 +6797,10 @@ static void hci_le_remote_conn_param_req_evt(struct hci_dev *hdev, void *data,
                 return send_conn_param_neg_reply(hdev, handle,
                                                  HCI_ERROR_UNKNOWN_CONN_ID);
  
+       if (max > hcon->le_conn_max_interval)
+               return send_conn_param_neg_reply(hdev, handle,
+                                                HCI_ERROR_INVALID_LL_PARAMS);
+
         if (hci_check_conn_params(min, max, latency, timeout))
                 return send_conn_param_neg_reply(hdev, handle,
                                                  HCI_ERROR_INVALID_LL_PARAMS);
@@ -7420,10 +7427,10 @@ static void hci_store_wake_reason(struct hci_dev *hdev, u8 event,
          * keep track of the bdaddr of the connection event that woke us up.
          */
         if (event == HCI_EV_CONN_REQUEST) {
-               bacpy(&hdev->wake_addr, &conn_complete->bdaddr);
+               bacpy(&hdev->wake_addr, &conn_request->bdaddr);
                 hdev->wake_addr_type = BDADDR_BREDR;
         } else if (event == HCI_EV_CONN_COMPLETE) {
-               bacpy(&hdev->wake_addr, &conn_request->bdaddr);
+               bacpy(&hdev->wake_addr, &conn_complete->bdaddr);
                 hdev->wake_addr_type = BDADDR_BREDR;
         } else if (event == HCI_EV_LE_META) {
                 struct hci_ev_le_meta *le_ev = (void *)skb->data;
diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c

index a6fc8a2a5c673d5266ceb98bef1d69b70ae19e4c..5716345a26dfb757b540137fa7616f4af49d013c 100644 (file)
--- a/net/bluetooth/hci_sync.c
+++ b/net/bluetooth/hci_sync.c
@@ -2206,8 +2206,11 @@ static int hci_le_add_accept_list_sync(struct hci_dev *hdev,
  
         /* During suspend, only wakeable devices can be in acceptlist */
         if (hdev->suspended &&
-           !(params->flags & HCI_CONN_FLAG_REMOTE_WAKEUP))
+           !(params->flags & HCI_CONN_FLAG_REMOTE_WAKEUP)) {
+               hci_le_del_accept_list_sync(hdev, &params->addr,
+                                           params->addr_type);
                 return 0;
+       }
  
         /* Select filter policy to accept all advertising */
         if (*num_entries >= hdev->le_accept_list_size)
@@ -5559,7 +5562,7 @@ static int hci_inquiry_sync(struct hci_dev *hdev, u8 length)
  
         bt_dev_dbg(hdev, "");
  
-       if (hci_dev_test_flag(hdev, HCI_INQUIRY))
+       if (test_bit(HCI_INQUIRY, &hdev->flags))
                 return 0;
  
         hci_dev_lock(hdev);
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c

index 60298975d5c45620f21ca5fe161da1a9fdf55eec..656f49b299d20d9141b9579aef84acf3b81bff7e 100644 (file)
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -5613,7 +5613,13 @@ static inline int l2cap_conn_param_update_req(struct l2cap_conn *conn,
  
         memset(&rsp, 0, sizeof(rsp));
  
-       err = hci_check_conn_params(min, max, latency, to_multiplier);
+       if (max > hcon->le_conn_max_interval) {
+               BT_DBG("requested connection interval exceeds current bounds.");
+               err = -EINVAL;
+       } else {
+               err = hci_check_conn_params(min, max, latency, to_multiplier);
+       }
+
         if (err)
                 rsp.result = cpu_to_le16(L2CAP_CONN_PARAM_REJECTED);
         else
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c

index bb72ff6eb22f4b30864aefd2588cce982d37d153..ee3b4aad8bd8d65239efc591cf33a631690a270f 100644 (file)
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -1045,6 +1045,8 @@ static void rpa_expired(struct work_struct *work)
         hci_cmd_sync_queue(hdev, rpa_expired_sync, NULL, NULL);
  }
  
+static int set_discoverable_sync(struct hci_dev *hdev, void *data);
+
  static void discov_off(struct work_struct *work)
  {
         struct hci_dev *hdev = container_of(work, struct hci_dev,
@@ -1063,7 +1065,7 @@ static void discov_off(struct work_struct *work)
         hci_dev_clear_flag(hdev, HCI_DISCOVERABLE);
         hdev->discov_timeout = 0;
  
-       hci_update_discoverable(hdev);
+       hci_cmd_sync_queue(hdev, set_discoverable_sync, NULL, NULL);
  
         mgmt_new_settings(hdev);
  
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c

index 053ef8f25fae47b369068adb49f1391b32fd7bc9..1d34d8497033299907d341212c2977b2b1d9b870 100644 (file)
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -1941,7 +1941,7 @@ static struct rfcomm_session *rfcomm_process_rx(struct rfcomm_session *s)
         /* Get data directly from socket receive queue without copying it. */
         while ((skb = skb_dequeue(&sk->sk_receive_queue))) {
                 skb_orphan(skb);
-               if (!skb_linearize(skb)) {
+               if (!skb_linearize(skb) && sk->sk_state != BT_CLOSED) {
                         s = rfcomm_recv_frame(s, skb);
                         if (!s)
                                 break;
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c

index d7d021af102981255ba284d396826feb71ae20be..2d7b7324295885e7a5ee70dd63b5dffd9a9a8968 100644 (file)
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1762,6 +1762,10 @@ static void br_ip6_multicast_querier_expired(struct timer_list *t)
  }
  #endif
  
+static void br_multicast_query_delay_expired(struct timer_list *t)
+{
+}
+
  static void br_multicast_select_own_querier(struct net_bridge_mcast *brmctx,
                                             struct br_ip *ip,
                                             struct sk_buff *skb)
@@ -3198,7 +3202,7 @@ br_multicast_update_query_timer(struct net_bridge_mcast *brmctx,
                                 unsigned long max_delay)
  {
         if (!timer_pending(&query->timer))
-               query->delay_time = jiffies + max_delay;
+               mod_timer(&query->delay_timer, jiffies + max_delay);
  
         mod_timer(&query->timer, jiffies + brmctx->multicast_querier_interval);
  }
@@ -4041,13 +4045,11 @@ void br_multicast_ctx_init(struct net_bridge *br,
         brmctx->multicast_querier_interval = 255 * HZ;
         brmctx->multicast_membership_interval = 260 * HZ;
  
-       brmctx->ip4_other_query.delay_time = 0;
         brmctx->ip4_querier.port_ifidx = 0;
         seqcount_spinlock_init(&brmctx->ip4_querier.seq, &br->multicast_lock);
         brmctx->multicast_igmp_version = 2;
  #if IS_ENABLED(CONFIG_IPV6)
         brmctx->multicast_mld_version = 1;
-       brmctx->ip6_other_query.delay_time = 0;
         brmctx->ip6_querier.port_ifidx = 0;
         seqcount_spinlock_init(&brmctx->ip6_querier.seq, &br->multicast_lock);
  #endif
@@ -4056,6 +4058,8 @@ void br_multicast_ctx_init(struct net_bridge *br,
                     br_ip4_multicast_local_router_expired, 0);
         timer_setup(&brmctx->ip4_other_query.timer,
                     br_ip4_multicast_querier_expired, 0);
+       timer_setup(&brmctx->ip4_other_query.delay_timer,
+                   br_multicast_query_delay_expired, 0);
         timer_setup(&brmctx->ip4_own_query.timer,
                     br_ip4_multicast_query_expired, 0);
  #if IS_ENABLED(CONFIG_IPV6)
@@ -4063,6 +4067,8 @@ void br_multicast_ctx_init(struct net_bridge *br,
                     br_ip6_multicast_local_router_expired, 0);
         timer_setup(&brmctx->ip6_other_query.timer,
                     br_ip6_multicast_querier_expired, 0);
+       timer_setup(&brmctx->ip6_other_query.delay_timer,
+                   br_multicast_query_delay_expired, 0);
         timer_setup(&brmctx->ip6_own_query.timer,
                     br_ip6_multicast_query_expired, 0);
  #endif
@@ -4197,10 +4203,12 @@ static void __br_multicast_stop(struct net_bridge_mcast *brmctx)
  {
         del_timer_sync(&brmctx->ip4_mc_router_timer);
         del_timer_sync(&brmctx->ip4_other_query.timer);
+       del_timer_sync(&brmctx->ip4_other_query.delay_timer);
         del_timer_sync(&brmctx->ip4_own_query.timer);
  #if IS_ENABLED(CONFIG_IPV6)
         del_timer_sync(&brmctx->ip6_mc_router_timer);
         del_timer_sync(&brmctx->ip6_other_query.timer);
+       del_timer_sync(&brmctx->ip6_other_query.delay_timer);
         del_timer_sync(&brmctx->ip6_own_query.timer);
  #endif
  }
@@ -4643,13 +4651,15 @@ int br_multicast_set_querier(struct net_bridge_mcast *brmctx, unsigned long val)
         max_delay = brmctx->multicast_query_response_interval;
  
         if (!timer_pending(&brmctx->ip4_other_query.timer))
-               brmctx->ip4_other_query.delay_time = jiffies + max_delay;
+               mod_timer(&brmctx->ip4_other_query.delay_timer,
+                         jiffies + max_delay);
  
         br_multicast_start_querier(brmctx, &brmctx->ip4_own_query);
  
  #if IS_ENABLED(CONFIG_IPV6)
         if (!timer_pending(&brmctx->ip6_other_query.timer))
-               brmctx->ip6_other_query.delay_time = jiffies + max_delay;
+               mod_timer(&brmctx->ip6_other_query.delay_timer,
+                         jiffies + max_delay);
  
         br_multicast_start_querier(brmctx, &brmctx->ip6_own_query);
  #endif
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c

index ed17208907578a231d283c04bd97ce48bebdffaa..35e10c5a766d550e0c5cb85cf5a0c4835b52a89d 100644 (file)
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -43,6 +43,10 @@
  #include <linux/sysctl.h>
  #endif
  
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+#include <net/netfilter/nf_conntrack_core.h>
+#endif
+
  static unsigned int brnf_net_id __read_mostly;
  
  struct brnf_net {
@@ -553,6 +557,90 @@ static unsigned int br_nf_pre_routing(void *priv,
         return NF_STOLEN;
  }
  
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+/* conntracks' nf_confirm logic cannot handle cloned skbs referencing
+ * the same nf_conn entry, which will happen for multicast (broadcast)
+ * Frames on bridges.
+ *
+ * Example:
+ *      macvlan0
+ *      br0
+ *  ethX  ethY
+ *
+ * ethX (or Y) receives multicast or broadcast packet containing
+ * an IP packet, not yet in conntrack table.
+ *
+ * 1. skb passes through bridge and fake-ip (br_netfilter)Prerouting.
+ *    -> skb->_nfct now references a unconfirmed entry
+ * 2. skb is broad/mcast packet. bridge now passes clones out on each bridge
+ *    interface.
+ * 3. skb gets passed up the stack.
+ * 4. In macvlan case, macvlan driver retains clone(s) of the mcast skb
+ *    and schedules a work queue to send them out on the lower devices.
+ *
+ *    The clone skb->_nfct is not a copy, it is the same entry as the
+ *    original skb.  The macvlan rx handler then returns RX_HANDLER_PASS.
+ * 5. Normal conntrack hooks (in NF_INET_LOCAL_IN) confirm the orig skb.
+ *
+ * The Macvlan broadcast worker and normal confirm path will race.
+ *
+ * This race will not happen if step 2 already confirmed a clone. In that
+ * case later steps perform skb_clone() with skb->_nfct already confirmed (in
+ * hash table).  This works fine.
+ *
+ * But such confirmation won't happen when eb/ip/nftables rules dropped the
+ * packets before they reached the nf_confirm step in postrouting.
+ *
+ * Work around this problem by explicit confirmation of the entry at
+ * LOCAL_IN time, before upper layer has a chance to clone the unconfirmed
+ * entry.
+ *
+ */
+static unsigned int br_nf_local_in(void *priv,
+                                  struct sk_buff *skb,
+                                  const struct nf_hook_state *state)
+{
+       struct nf_conntrack *nfct = skb_nfct(skb);
+       const struct nf_ct_hook *ct_hook;
+       struct nf_conn *ct;
+       int ret;
+
+       if (!nfct || skb->pkt_type == PACKET_HOST)
+               return NF_ACCEPT;
+
+       ct = container_of(nfct, struct nf_conn, ct_general);
+       if (likely(nf_ct_is_confirmed(ct)))
+               return NF_ACCEPT;
+
+       WARN_ON_ONCE(skb_shared(skb));
+       WARN_ON_ONCE(refcount_read(&nfct->use) != 1);
+
+       /* We can't call nf_confirm here, it would create a dependency
+        * on nf_conntrack module.
+        */
+       ct_hook = rcu_dereference(nf_ct_hook);
+       if (!ct_hook) {
+               skb->_nfct = 0ul;
+               nf_conntrack_put(nfct);
+               return NF_ACCEPT;
+       }
+
+       nf_bridge_pull_encap_header(skb);
+       ret = ct_hook->confirm(skb);
+       switch (ret & NF_VERDICT_MASK) {
+       case NF_STOLEN:
+               return NF_STOLEN;
+       default:
+               nf_bridge_push_encap_header(skb);
+               break;
+       }
+
+       ct = container_of(nfct, struct nf_conn, ct_general);
+       WARN_ON_ONCE(!nf_ct_is_confirmed(ct));
+
+       return ret;
+}
+#endif
  
  /* PF_BRIDGE/FORWARD *************************************************/
  static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
@@ -964,6 +1052,14 @@ static const struct nf_hook_ops br_nf_ops[] = {
                 .hooknum = NF_BR_PRE_ROUTING,
                 .priority = NF_BR_PRI_BRNF,
         },
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+       {
+               .hook = br_nf_local_in,
+               .pf = NFPROTO_BRIDGE,
+               .hooknum = NF_BR_LOCAL_IN,
+               .priority = NF_BR_PRI_LAST,
+       },
+#endif
         {
                 .hook = br_nf_forward,
                 .pf = NFPROTO_BRIDGE,
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h

index b0a92c344722be6bf195d571bf1a26daf759dfca..86ea5e6689b5ce49a4b71b383893d2ef5b53d110 100644 (file)
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -78,7 +78,7 @@ struct bridge_mcast_own_query {
  /* other querier */
  struct bridge_mcast_other_query {
         struct timer_list               timer;
-       unsigned long                   delay_time;
+       struct timer_list               delay_timer;
  };
  
  /* selected querier */
@@ -1159,7 +1159,7 @@ __br_multicast_querier_exists(struct net_bridge_mcast *brmctx,
                 own_querier_enabled = false;
         }
  
-       return time_is_before_jiffies(querier->delay_time) &&
+       return !timer_pending(&querier->delay_timer) &&
                (own_querier_enabled || timer_pending(&querier->timer));
  }
  
diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c

index ee84e783e1dff5b67994a3ba5a4e5d8aa875eeef..7b41ee8740cbbaf6b959d9273c49ebcd4830a5c8 100644 (file)
--- a/net/bridge/br_switchdev.c
+++ b/net/bridge/br_switchdev.c
@@ -595,21 +595,40 @@ br_switchdev_mdb_replay_one(struct notifier_block *nb, struct net_device *dev,
  }
  
  static int br_switchdev_mdb_queue_one(struct list_head *mdb_list,
+                                     struct net_device *dev,
+                                     unsigned long action,
                                       enum switchdev_obj_id id,
                                       const struct net_bridge_mdb_entry *mp,
                                       struct net_device *orig_dev)
  {
-       struct switchdev_obj_port_mdb *mdb;
+       struct switchdev_obj_port_mdb mdb = {
+               .obj = {
+                       .id = id,
+                       .orig_dev = orig_dev,
+               },
+       };
+       struct switchdev_obj_port_mdb *pmdb;
  
-       mdb = kzalloc(sizeof(*mdb), GFP_ATOMIC);
-       if (!mdb)
-               return -ENOMEM;
+       br_switchdev_mdb_populate(&mdb, mp);
+
+       if (action == SWITCHDEV_PORT_OBJ_ADD &&
+           switchdev_port_obj_act_is_deferred(dev, action, &mdb.obj)) {
+               /* This event is already in the deferred queue of
+                * events, so this replay must be elided, lest the
+                * driver receives duplicate events for it. This can
+                * only happen when replaying additions, since
+                * modifications are always immediately visible in
+                * br->mdb_list, whereas actual event delivery may be
+                * delayed.
+                */
+               return 0;
+       }
  
-       mdb->obj.id = id;
-       mdb->obj.orig_dev = orig_dev;
-       br_switchdev_mdb_populate(mdb, mp);
-       list_add_tail(&mdb->obj.list, mdb_list);
+       pmdb = kmemdup(&mdb, sizeof(mdb), GFP_ATOMIC);
+       if (!pmdb)
+               return -ENOMEM;
  
+       list_add_tail(&pmdb->obj.list, mdb_list);
         return 0;
  }
  
@@ -677,51 +696,50 @@ br_switchdev_mdb_replay(struct net_device *br_dev, struct net_device *dev,
         if (!br_opt_get(br, BROPT_MULTICAST_ENABLED))
                 return 0;
  
-       /* We cannot walk over br->mdb_list protected just by the rtnl_mutex,
-        * because the write-side protection is br->multicast_lock. But we
-        * need to emulate the [ blocking ] calling context of a regular
-        * switchdev event, so since both br->multicast_lock and RCU read side
-        * critical sections are atomic, we have no choice but to pick the RCU
-        * read side lock, queue up all our events, leave the critical section
-        * and notify switchdev from blocking context.
+       if (adding)
+               action = SWITCHDEV_PORT_OBJ_ADD;
+       else
+               action = SWITCHDEV_PORT_OBJ_DEL;
+
+       /* br_switchdev_mdb_queue_one() will take care to not queue a
+        * replay of an event that is already pending in the switchdev
+        * deferred queue. In order to safely determine that, there
+        * must be no new deferred MDB notifications enqueued for the
+        * duration of the MDB scan. Therefore, grab the write-side
+        * lock to avoid racing with any concurrent IGMP/MLD snooping.
          */
-       rcu_read_lock();
+       spin_lock_bh(&br->multicast_lock);
  
-       hlist_for_each_entry_rcu(mp, &br->mdb_list, mdb_node) {
+       hlist_for_each_entry(mp, &br->mdb_list, mdb_node) {
                 struct net_bridge_port_group __rcu * const *pp;
                 const struct net_bridge_port_group *p;
  
                 if (mp->host_joined) {
-                       err = br_switchdev_mdb_queue_one(&mdb_list,
+                       err = br_switchdev_mdb_queue_one(&mdb_list, dev, action,
                                                          SWITCHDEV_OBJ_ID_HOST_MDB,
                                                          mp, br_dev);
                         if (err) {
-                               rcu_read_unlock();
+                               spin_unlock_bh(&br->multicast_lock);
                                 goto out_free_mdb;
                         }
                 }
  
-               for (pp = &mp->ports; (p = rcu_dereference(*pp)) != NULL;
+               for (pp = &mp->ports; (p = mlock_dereference(*pp, br)) != NULL;
                      pp = &p->next) {
                         if (p->key.port->dev != dev)
                                 continue;
  
-                       err = br_switchdev_mdb_queue_one(&mdb_list,
+                       err = br_switchdev_mdb_queue_one(&mdb_list, dev, action,
                                                          SWITCHDEV_OBJ_ID_PORT_MDB,
                                                          mp, dev);
                         if (err) {
-                               rcu_read_unlock();
+                               spin_unlock_bh(&br->multicast_lock);
                                 goto out_free_mdb;
                         }
                 }
         }
  
-       rcu_read_unlock();
-
-       if (adding)
-               action = SWITCHDEV_PORT_OBJ_ADD;
-       else
-               action = SWITCHDEV_PORT_OBJ_DEL;
+       spin_unlock_bh(&br->multicast_lock);
  
         list_for_each_entry(obj, &mdb_list, list) {
                 err = br_switchdev_mdb_replay_one(nb, dev,
@@ -786,6 +804,16 @@ static void nbp_switchdev_unsync_objs(struct net_bridge_port *p,
         br_switchdev_mdb_replay(br_dev, dev, ctx, false, blocking_nb, NULL);
  
         br_switchdev_vlan_replay(br_dev, ctx, false, blocking_nb, NULL);
+
+       /* Make sure that the device leaving this bridge has seen all
+        * relevant events before it is disassociated. In the normal
+        * case, when the device is directly attached to the bridge,
+        * this is covered by del_nbp(). If the association was indirect
+        * however, e.g. via a team or bond, and the device is leaving
+        * that intermediate device, then the bridge port remains in
+        * place.
+        */
+       switchdev_deferred_process();
  }
  
  /* Let the bridge know that this port is offloaded, so that it can assign a
diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c

index abb090f94ed2609eeb9cd54b4e5faed1c3cb7bfe..6f877e31709bad3646ea15bf3a96999ed275bdc1 100644 (file)
--- a/net/bridge/netfilter/nf_conntrack_bridge.c
+++ b/net/bridge/netfilter/nf_conntrack_bridge.c
@@ -291,6 +291,30 @@ static unsigned int nf_ct_bridge_pre(void *priv, struct sk_buff *skb,
         return nf_conntrack_in(skb, &bridge_state);
  }
  
+static unsigned int nf_ct_bridge_in(void *priv, struct sk_buff *skb,
+                                   const struct nf_hook_state *state)
+{
+       enum ip_conntrack_info ctinfo;
+       struct nf_conn *ct;
+
+       if (skb->pkt_type == PACKET_HOST)
+               return NF_ACCEPT;
+
+       /* nf_conntrack_confirm() cannot handle concurrent clones,
+        * this happens for broad/multicast frames with e.g. macvlan on top
+        * of the bridge device.
+        */
+       ct = nf_ct_get(skb, &ctinfo);
+       if (!ct || nf_ct_is_confirmed(ct) || nf_ct_is_template(ct))
+               return NF_ACCEPT;
+
+       /* let inet prerouting call conntrack again */
+       skb->_nfct = 0;
+       nf_ct_put(ct);
+
+       return NF_ACCEPT;
+}
+
  static void nf_ct_bridge_frag_save(struct sk_buff *skb,
                                    struct nf_bridge_frag_data *data)
  {
@@ -385,6 +409,12 @@ static struct nf_hook_ops nf_ct_bridge_hook_ops[] __read_mostly = {
                 .hooknum        = NF_BR_PRE_ROUTING,
                 .priority       = NF_IP_PRI_CONNTRACK,
         },
+       {
+               .hook           = nf_ct_bridge_in,
+               .pf             = NFPROTO_BRIDGE,
+               .hooknum        = NF_BR_LOCAL_IN,
+               .priority       = NF_IP_PRI_CONNTRACK_CONFIRM,
+       },
         {
                 .hook           = nf_ct_bridge_post,
                 .pf             = NFPROTO_BRIDGE,
diff --git a/net/can/j1939/j1939-priv.h b/net/can/j1939/j1939-priv.h

index 16af1a7f80f60e18b5526c973f66e98821786a78..31a93cae5111b50d54e10a061a9e235e81a1da1c 100644 (file)
--- a/net/can/j1939/j1939-priv.h
+++ b/net/can/j1939/j1939-priv.h
@@ -86,7 +86,7 @@ struct j1939_priv {
         unsigned int tp_max_packet_size;
  
         /* lock for j1939_socks list */
-       spinlock_t j1939_socks_lock;
+       rwlock_t j1939_socks_lock;
         struct list_head j1939_socks;
  
         struct kref rx_kref;
@@ -301,6 +301,7 @@ struct j1939_sock {
  
         int ifindex;
         struct j1939_addr addr;
+       spinlock_t filters_lock;
         struct j1939_filter *filters;
         int nfilters;
         pgn_t pgn_rx_filter;
diff --git a/net/can/j1939/main.c b/net/can/j1939/main.c

index ecff1c947d683b2f3e4eeff144f39a8f5ff5de1a..a6fb89fa62785121f9edc42308b052142fda5fdd 100644 (file)
--- a/net/can/j1939/main.c
+++ b/net/can/j1939/main.c
@@ -274,7 +274,7 @@ struct j1939_priv *j1939_netdev_start(struct net_device *ndev)
                 return ERR_PTR(-ENOMEM);
  
         j1939_tp_init(priv);
-       spin_lock_init(&priv->j1939_socks_lock);
+       rwlock_init(&priv->j1939_socks_lock);
         INIT_LIST_HEAD(&priv->j1939_socks);
  
         mutex_lock(&j1939_netdev_lock);
diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c

index 14c43166323393541bc102f47a311c79199a2acd..305dd72c844c70f1589f14fcb668bd46b92ff6f2 100644 (file)
--- a/net/can/j1939/socket.c
+++ b/net/can/j1939/socket.c
@@ -80,16 +80,16 @@ static void j1939_jsk_add(struct j1939_priv *priv, struct j1939_sock *jsk)
         jsk->state |= J1939_SOCK_BOUND;
         j1939_priv_get(priv);
  
-       spin_lock_bh(&priv->j1939_socks_lock);
+       write_lock_bh(&priv->j1939_socks_lock);
         list_add_tail(&jsk->list, &priv->j1939_socks);
-       spin_unlock_bh(&priv->j1939_socks_lock);
+       write_unlock_bh(&priv->j1939_socks_lock);
  }
  
  static void j1939_jsk_del(struct j1939_priv *priv, struct j1939_sock *jsk)
  {
-       spin_lock_bh(&priv->j1939_socks_lock);
+       write_lock_bh(&priv->j1939_socks_lock);
         list_del_init(&jsk->list);
-       spin_unlock_bh(&priv->j1939_socks_lock);
+       write_unlock_bh(&priv->j1939_socks_lock);
  
         j1939_priv_put(priv);
         jsk->state &= ~J1939_SOCK_BOUND;
@@ -262,12 +262,17 @@ static bool j1939_sk_match_dst(struct j1939_sock *jsk,
  static bool j1939_sk_match_filter(struct j1939_sock *jsk,
                                   const struct j1939_sk_buff_cb *skcb)
  {
-       const struct j1939_filter *f = jsk->filters;
-       int nfilter = jsk->nfilters;
+       const struct j1939_filter *f;
+       int nfilter;
+
+       spin_lock_bh(&jsk->filters_lock);
+
+       f = jsk->filters;
+       nfilter = jsk->nfilters;
  
         if (!nfilter)
                 /* receive all when no filters are assigned */
-               return true;
+               goto filter_match_found;
  
         for (; nfilter; ++f, --nfilter) {
                 if ((skcb->addr.pgn & f->pgn_mask) != f->pgn)
@@ -276,9 +281,15 @@ static bool j1939_sk_match_filter(struct j1939_sock *jsk,
                         continue;
                 if ((skcb->addr.src_name & f->name_mask) != f->name)
                         continue;
-               return true;
+               goto filter_match_found;
         }
+
+       spin_unlock_bh(&jsk->filters_lock);
         return false;
+
+filter_match_found:
+       spin_unlock_bh(&jsk->filters_lock);
+       return true;
  }
  
  static bool j1939_sk_recv_match_one(struct j1939_sock *jsk,
@@ -329,13 +340,13 @@ bool j1939_sk_recv_match(struct j1939_priv *priv, struct j1939_sk_buff_cb *skcb)
         struct j1939_sock *jsk;
         bool match = false;
  
-       spin_lock_bh(&priv->j1939_socks_lock);
+       read_lock_bh(&priv->j1939_socks_lock);
         list_for_each_entry(jsk, &priv->j1939_socks, list) {
                 match = j1939_sk_recv_match_one(jsk, skcb);
                 if (match)
                         break;
         }
-       spin_unlock_bh(&priv->j1939_socks_lock);
+       read_unlock_bh(&priv->j1939_socks_lock);
  
         return match;
  }
@@ -344,11 +355,11 @@ void j1939_sk_recv(struct j1939_priv *priv, struct sk_buff *skb)
  {
         struct j1939_sock *jsk;
  
-       spin_lock_bh(&priv->j1939_socks_lock);
+       read_lock_bh(&priv->j1939_socks_lock);
         list_for_each_entry(jsk, &priv->j1939_socks, list) {
                 j1939_sk_recv_one(jsk, skb);
         }
-       spin_unlock_bh(&priv->j1939_socks_lock);
+       read_unlock_bh(&priv->j1939_socks_lock);
  }
  
  static void j1939_sk_sock_destruct(struct sock *sk)
@@ -401,6 +412,7 @@ static int j1939_sk_init(struct sock *sk)
         atomic_set(&jsk->skb_pending, 0);
         spin_lock_init(&jsk->sk_session_queue_lock);
         INIT_LIST_HEAD(&jsk->sk_session_queue);
+       spin_lock_init(&jsk->filters_lock);
  
         /* j1939_sk_sock_destruct() depends on SOCK_RCU_FREE flag */
         sock_set_flag(sk, SOCK_RCU_FREE);
@@ -703,9 +715,11 @@ static int j1939_sk_setsockopt(struct socket *sock, int level, int optname,
                 }
  
                 lock_sock(&jsk->sk);
+               spin_lock_bh(&jsk->filters_lock);
                 ofilters = jsk->filters;
                 jsk->filters = filters;
                 jsk->nfilters = count;
+               spin_unlock_bh(&jsk->filters_lock);
                 release_sock(&jsk->sk);
                 kfree(ofilters);
                 return 0;
@@ -1080,12 +1094,12 @@ void j1939_sk_errqueue(struct j1939_session *session,
         }
  
         /* spread RX notifications to all sockets subscribed to this session */
-       spin_lock_bh(&priv->j1939_socks_lock);
+       read_lock_bh(&priv->j1939_socks_lock);
         list_for_each_entry(jsk, &priv->j1939_socks, list) {
                 if (j1939_sk_recv_match_one(jsk, &session->skcb))
                         __j1939_sk_errqueue(session, &jsk->sk, type);
         }
-       spin_unlock_bh(&priv->j1939_socks_lock);
+       read_unlock_bh(&priv->j1939_socks_lock);
  };
  
  void j1939_sk_send_loop_abort(struct sock *sk, int err)
@@ -1273,7 +1287,7 @@ void j1939_sk_netdev_event_netdown(struct j1939_priv *priv)
         struct j1939_sock *jsk;
         int error_code = ENETDOWN;
  
-       spin_lock_bh(&priv->j1939_socks_lock);
+       read_lock_bh(&priv->j1939_socks_lock);
         list_for_each_entry(jsk, &priv->j1939_socks, list) {
                 jsk->sk.sk_err = error_code;
                 if (!sock_flag(&jsk->sk, SOCK_DEAD))
@@ -1281,7 +1295,7 @@ void j1939_sk_netdev_event_netdown(struct j1939_priv *priv)
  
                 j1939_sk_queue_drop_all(priv, jsk, error_code);
         }
-       spin_unlock_bh(&priv->j1939_socks_lock);
+       read_unlock_bh(&priv->j1939_socks_lock);
  }
  
  static int j1939_sk_no_ioctlcmd(struct socket *sock, unsigned int cmd,
diff --git a/net/ceph/messenger_v1.c b/net/ceph/messenger_v1.c

index f9a50d7f0d204639f821835d341bb87c13a80333..0cb61c76b9b87da0746294cb371bc62defec0f81 100644 (file)
--- a/net/ceph/messenger_v1.c
+++ b/net/ceph/messenger_v1.c
@@ -160,8 +160,9 @@ static size_t sizeof_footer(struct ceph_connection *con)
  static void prepare_message_data(struct ceph_msg *msg, u32 data_len)
  {
         /* Initialize data cursor if it's not a sparse read */
-       if (!msg->sparse_read)
-               ceph_msg_data_cursor_init(&msg->cursor, msg, data_len);
+       u64 len = msg->sparse_read_total ? : data_len;
+
+       ceph_msg_data_cursor_init(&msg->cursor, msg, len);
  }
  
  /*
@@ -991,7 +992,7 @@ static inline int read_partial_message_section(struct ceph_connection *con,
         return read_partial_message_chunk(con, section, sec_len, crc);
  }
  
-static int read_sparse_msg_extent(struct ceph_connection *con, u32 *crc)
+static int read_partial_sparse_msg_extent(struct ceph_connection *con, u32 *crc)
  {
         struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor;
         bool do_bounce = ceph_test_opt(from_msgr(con->msgr), RXBOUNCE);
@@ -1026,7 +1027,7 @@ static int read_sparse_msg_extent(struct ceph_connection *con, u32 *crc)
         return 1;
  }
  
-static int read_sparse_msg_data(struct ceph_connection *con)
+static int read_partial_sparse_msg_data(struct ceph_connection *con)
  {
         struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor;
         bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC);
@@ -1036,31 +1037,31 @@ static int read_sparse_msg_data(struct ceph_connection *con)
         if (do_datacrc)
                 crc = con->in_data_crc;
  
-       do {
+       while (cursor->total_resid) {
                 if (con->v1.in_sr_kvec.iov_base)
                         ret = read_partial_message_chunk(con,
                                                          &con->v1.in_sr_kvec,
                                                          con->v1.in_sr_len,
                                                          &crc);
                 else if (cursor->sr_resid > 0)
-                       ret = read_sparse_msg_extent(con, &crc);
-
-               if (ret <= 0) {
-                       if (do_datacrc)
-                               con->in_data_crc = crc;
-                       return ret;
-               }
+                       ret = read_partial_sparse_msg_extent(con, &crc);
+               if (ret <= 0)
+                       break;
  
                 memset(&con->v1.in_sr_kvec, 0, sizeof(con->v1.in_sr_kvec));
                 ret = con->ops->sparse_read(con, cursor,
                                 (char **)&con->v1.in_sr_kvec.iov_base);
+               if (ret <= 0) {
+                       ret = ret ? ret : 1;  /* must return > 0 to indicate success */
+                       break;
+               }
                 con->v1.in_sr_len = ret;
-       } while (ret > 0);
+       }
  
         if (do_datacrc)
                 con->in_data_crc = crc;
  
-       return ret < 0 ? ret : 1;  /* must return > 0 to indicate success */
+       return ret;
  }
  
  static int read_partial_msg_data(struct ceph_connection *con)
@@ -1253,8 +1254,8 @@ static int read_partial_message(struct ceph_connection *con)
                 if (!m->num_data_items)
                         return -EIO;
  
-               if (m->sparse_read)
-                       ret = read_sparse_msg_data(con);
+               if (m->sparse_read_total)
+                       ret = read_partial_sparse_msg_data(con);
                 else if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE))
                         ret = read_partial_msg_data_bounce(con);
                 else
diff --git a/net/ceph/messenger_v2.c b/net/ceph/messenger_v2.c

index f8ec60e1aba3a112aaa024c235f0117297b9bf70..a0ca5414b333df92b3aa0085a95b928cdc0609a5 100644 (file)
--- a/net/ceph/messenger_v2.c
+++ b/net/ceph/messenger_v2.c
@@ -1128,7 +1128,7 @@ static int decrypt_tail(struct ceph_connection *con)
         struct sg_table enc_sgt = {};
         struct sg_table sgt = {};
         struct page **pages = NULL;
-       bool sparse = con->in_msg->sparse_read;
+       bool sparse = !!con->in_msg->sparse_read_total;
         int dpos = 0;
         int tail_len;
         int ret;
@@ -2060,7 +2060,7 @@ static int prepare_read_tail_plain(struct ceph_connection *con)
         }
  
         if (data_len(msg)) {
-               if (msg->sparse_read)
+               if (msg->sparse_read_total)
                         con->v2.in_state = IN_S_PREPARE_SPARSE_DATA;
                 else
                         con->v2.in_state = IN_S_PREPARE_READ_DATA;
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c

index 625622016f5761e36bccc3f7a239e265039ce95d..9d078b37fe0b9b085894be86db17053227de9a18 100644 (file)
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -5510,7 +5510,7 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
         }
  
         m = ceph_msg_get(req->r_reply);
-       m->sparse_read = (bool)srlen;
+       m->sparse_read_total = srlen;
  
         dout("get_reply tid %lld %p\n", tid, m);
  
@@ -5777,11 +5777,8 @@ static int prep_next_sparse_read(struct ceph_connection *con,
         }
  
         if (o->o_sparse_op_idx < 0) {
-               u64 srlen = sparse_data_requested(req);
-
-               dout("%s: [%d] starting new sparse read req. srlen=0x%llx\n",
-                    __func__, o->o_osd, srlen);
-               ceph_msg_data_cursor_init(cursor, con->in_msg, srlen);
+               dout("%s: [%d] starting new sparse read req\n",
+                    __func__, o->o_osd);
         } else {
                 u64 end;
  
@@ -5857,8 +5854,8 @@ static int osd_sparse_read(struct ceph_connection *con,
         struct ceph_osd *o = con->private;
         struct ceph_sparse_read *sr = &o->o_sparse_read;
         u32 count = sr->sr_count;
-       u64 eoff, elen;
-       int ret;
+       u64 eoff, elen, len = 0;
+       int i, ret;
  
         switch (sr->sr_state) {
         case CEPH_SPARSE_READ_HDR:
@@ -5903,8 +5900,20 @@ next_op:
                 convert_extent_map(sr);
                 ret = sizeof(sr->sr_datalen);
                 *pbuf = (char *)&sr->sr_datalen;
-               sr->sr_state = CEPH_SPARSE_READ_DATA;
+               sr->sr_state = CEPH_SPARSE_READ_DATA_PRE;
                 break;
+       case CEPH_SPARSE_READ_DATA_PRE:
+               /* Convert sr_datalen to host-endian */
+               sr->sr_datalen = le32_to_cpu((__force __le32)sr->sr_datalen);
+               for (i = 0; i < count; i++)
+                       len += sr->sr_extent[i].len;
+               if (sr->sr_datalen != len) {
+                       pr_warn_ratelimited("data len %u != extent len %llu\n",
+                                           sr->sr_datalen, len);
+                       return -EREMOTEIO;
+               }
+               sr->sr_state = CEPH_SPARSE_READ_DATA;
+               fallthrough;
         case CEPH_SPARSE_READ_DATA:
                 if (sr->sr_index >= count) {
                         sr->sr_state = CEPH_SPARSE_READ_HDR;
diff --git a/net/core/datagram.c b/net/core/datagram.c

index 103d46fa0eeb34af20b2d74b79f38e7424e25155..a8b625abe242c657dca8cd0188c236553757c6b2 100644 (file)
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -751,7 +751,7 @@ size_t memcpy_to_iter_csum(void *iter_to, size_t progress,
                            size_t len, void *from, void *priv2)
  {
         __wsum *csum = priv2;
-       __wsum next = csum_partial_copy_nocheck(from, iter_to, len);
+       __wsum next = csum_partial_copy_nocheck(from + progress, iter_to, len);
  
         *csum = csum_block_add(*csum, next, progress);
         return 0;
diff --git a/net/core/dev.c b/net/core/dev.c

index cb2dab0feee0abe758479a7a001342bf6613df08..0230391c78f71e22d3c0e925ff8d3d792aa54a32 100644 (file)
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -336,7 +336,7 @@ int netdev_name_node_alt_create(struct net_device *dev, const char *name)
                 return -ENOMEM;
         netdev_name_node_add(net, name_node);
         /* The node that holds dev->name acts as a head of per-device list. */
-       list_add_tail(&name_node->list, &dev->name_node->list);
+       list_add_tail_rcu(&name_node->list, &dev->name_node->list);
  
         return 0;
  }
@@ -9078,7 +9078,7 @@ static void netdev_dpll_pin_assign(struct net_device *dev, struct dpll_pin *dpll
  {
  #if IS_ENABLED(CONFIG_DPLL)
         rtnl_lock();
-       dev->dpll_pin = dpll_pin;
+       rcu_assign_pointer(dev->dpll_pin, dpll_pin);
         rtnl_unlock();
  #endif
  }
@@ -11652,11 +11652,12 @@ static void __init net_dev_struct_check(void)
         CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_tx, 160);
  
         /* TXRX read-mostly hotpath */
+       CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, lstats);
         CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, flags);
         CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, hard_header_len);
         CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, features);
         CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, ip6_ptr);
-       CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_txrx, 30);
+       CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_txrx, 38);
  
         /* RX read-mostly hotpath */
         CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, ptype_specific);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c

index f6f29eb03ec277a1ea17ccc220fa7624bf6db092..ae86f751efc3701a8bba8c4833e2db2b22730bd2 100644 (file)
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1020,14 +1020,17 @@ static size_t rtnl_xdp_size(void)
  static size_t rtnl_prop_list_size(const struct net_device *dev)
  {
         struct netdev_name_node *name_node;
-       size_t size;
+       unsigned int cnt = 0;
+
+       rcu_read_lock();
+       list_for_each_entry_rcu(name_node, &dev->name_node->list, list)
+               cnt++;
+       rcu_read_unlock();
  
-       if (list_empty(&dev->name_node->list))
+       if (!cnt)
                 return 0;
-       size = nla_total_size(0);
-       list_for_each_entry(name_node, &dev->name_node->list, list)
-               size += nla_total_size(ALTIFNAMSIZ);
-       return size;
+
+       return nla_total_size(0) + cnt * nla_total_size(ALTIFNAMSIZ);
  }
  
  static size_t rtnl_proto_down_size(const struct net_device *dev)
@@ -5166,10 +5169,9 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
         struct net *net = sock_net(skb->sk);
         struct ifinfomsg *ifm;
         struct net_device *dev;
-       struct nlattr *br_spec, *attr = NULL;
+       struct nlattr *br_spec, *attr, *br_flags_attr = NULL;
         int rem, err = -EOPNOTSUPP;
         u16 flags = 0;
-       bool have_flags = false;
  
         if (nlmsg_len(nlh) < sizeof(*ifm))
                 return -EINVAL;
@@ -5187,11 +5189,11 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
         br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
         if (br_spec) {
                 nla_for_each_nested(attr, br_spec, rem) {
-                       if (nla_type(attr) == IFLA_BRIDGE_FLAGS && !have_flags) {
+                       if (nla_type(attr) == IFLA_BRIDGE_FLAGS && !br_flags_attr) {
                                 if (nla_len(attr) < sizeof(flags))
                                         return -EINVAL;
  
-                               have_flags = true;
+                               br_flags_attr = attr;
                                 flags = nla_get_u16(attr);
                         }
  
@@ -5235,8 +5237,8 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
                 }
         }
  
-       if (have_flags)
-               memcpy(nla_data(attr), &flags, sizeof(flags));
+       if (br_flags_attr)
+               memcpy(nla_data(br_flags_attr), &flags, sizeof(flags));
  out:
         return err;
  }
diff --git a/net/core/skmsg.c b/net/core/skmsg.c

index 93ecfceac1bc49bd843728518215ade5ced374a5..4d75ef9d24bfa7cbffe642448f5116ac0b943ed2 100644 (file)
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -1226,8 +1226,11 @@ static void sk_psock_verdict_data_ready(struct sock *sk)
  
                 rcu_read_lock();
                 psock = sk_psock(sk);
-               if (psock)
-                       psock->saved_data_ready(sk);
+               if (psock) {
+                       read_lock_bh(&sk->sk_callback_lock);
+                       sk_psock_data_ready(sk, psock);
+                       read_unlock_bh(&sk->sk_callback_lock);
+               }
                 rcu_read_unlock();
         }
  }
diff --git a/net/core/sock.c b/net/core/sock.c

index 0a7f46c37f0cfc169e11377107c8342c229da0de..5e78798456fd81dbd34e94021531340f7ba5ab0a 100644 (file)
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1188,6 +1188,17 @@ int sk_setsockopt(struct sock *sk, int level, int optname,
                  */
                 WRITE_ONCE(sk->sk_txrehash, (u8)val);
                 return 0;
+       case SO_PEEK_OFF:
+               {
+               int (*set_peek_off)(struct sock *sk, int val);
+
+               set_peek_off = READ_ONCE(sock->ops)->set_peek_off;
+               if (set_peek_off)
+                       ret = set_peek_off(sk, val);
+               else
+                       ret = -EOPNOTSUPP;
+               return ret;
+               }
         }
  
         sockopt_lock_sock(sk);
@@ -1430,18 +1441,6 @@ set_sndbuf:
                 sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool);
                 break;
  
-       case SO_PEEK_OFF:
-               {
-               int (*set_peek_off)(struct sock *sk, int val);
-
-               set_peek_off = READ_ONCE(sock->ops)->set_peek_off;
-               if (set_peek_off)
-                       ret = set_peek_off(sk, val);
-               else
-                       ret = -EOPNOTSUPP;
-               break;
-               }
-
         case SO_NOFCS:
                 sock_valbool_flag(sk, SOCK_NOFCS, valbool);
                 break;
diff --git a/net/devlink/core.c b/net/devlink/core.c

index 4275a2bc6d8e062052a88503b731d9599ca55d2a..7f0b093208d75b91e25cb78a73bece8ef2577831 100644 (file)
--- a/net/devlink/core.c
+++ b/net/devlink/core.c
@@ -46,7 +46,7 @@ struct devlink_rel {
                 u32 obj_index;
                 devlink_rel_notify_cb_t *notify_cb;
                 devlink_rel_cleanup_cb_t *cleanup_cb;
-               struct work_struct notify_work;
+               struct delayed_work notify_work;
         } nested_in;
  };
  
@@ -70,7 +70,7 @@ static void __devlink_rel_put(struct devlink_rel *rel)
  static void devlink_rel_nested_in_notify_work(struct work_struct *work)
  {
         struct devlink_rel *rel = container_of(work, struct devlink_rel,
-                                              nested_in.notify_work);
+                                              nested_in.notify_work.work);
         struct devlink *devlink;
  
         devlink = devlinks_xa_get(rel->nested_in.devlink_index);
@@ -96,13 +96,13 @@ rel_put:
         return;
  
  reschedule_work:
-       schedule_work(&rel->nested_in.notify_work);
+       schedule_delayed_work(&rel->nested_in.notify_work, 1);
  }
  
  static void devlink_rel_nested_in_notify_work_schedule(struct devlink_rel *rel)
  {
         __devlink_rel_get(rel);
-       schedule_work(&rel->nested_in.notify_work);
+       schedule_delayed_work(&rel->nested_in.notify_work, 0);
  }
  
  static struct devlink_rel *devlink_rel_alloc(void)
@@ -123,8 +123,8 @@ static struct devlink_rel *devlink_rel_alloc(void)
         }
  
         refcount_set(&rel->refcount, 1);
-       INIT_WORK(&rel->nested_in.notify_work,
-                 &devlink_rel_nested_in_notify_work);
+       INIT_DELAYED_WORK(&rel->nested_in.notify_work,
+                         &devlink_rel_nested_in_notify_work);
         return rel;
  }
  
@@ -529,14 +529,20 @@ static int __init devlink_init(void)
  {
         int err;
  
-       err = genl_register_family(&devlink_nl_family);
-       if (err)
-               goto out;
         err = register_pernet_subsys(&devlink_pernet_ops);
         if (err)
                 goto out;
+       err = genl_register_family(&devlink_nl_family);
+       if (err)
+               goto out_unreg_pernet_subsys;
         err = register_netdevice_notifier(&devlink_port_netdevice_nb);
+       if (!err)
+               return 0;
+
+       genl_unregister_family(&devlink_nl_family);
  
+out_unreg_pernet_subsys:
+       unregister_pernet_subsys(&devlink_pernet_ops);
  out:
         WARN_ON(err);
         return err;
diff --git a/net/devlink/port.c b/net/devlink/port.c

index 62e54e152ecf1fa601cb2cd755988c9ff97670af..4b2d46ccfe484f1ae2c21b5b2921a113d59e13f5 100644 (file)
--- a/net/devlink/port.c
+++ b/net/devlink/port.c
@@ -583,7 +583,7 @@ devlink_nl_port_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
  
         xa_for_each_start(&devlink->ports, port_index, devlink_port, state->idx) {
                 err = devlink_nl_port_fill(msg, devlink_port,
-                                          DEVLINK_CMD_NEW,
+                                          DEVLINK_CMD_PORT_NEW,
                                            NETLINK_CB(cb->skb).portid,
                                            cb->nlh->nlmsg_seq, flags,
                                            cb->extack);
@@ -674,7 +674,7 @@ static int devlink_port_function_validate(struct devlink_port *devlink_port,
                 return -EOPNOTSUPP;
         }
         if (tb[DEVLINK_PORT_FN_ATTR_STATE] && !ops->port_fn_state_set) {
-               NL_SET_ERR_MSG_ATTR(extack, tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR],
+               NL_SET_ERR_MSG_ATTR(extack, tb[DEVLINK_PORT_FN_ATTR_STATE],
                                     "Function does not support state setting");
                 return -EOPNOTSUPP;
         }
diff --git a/net/handshake/handshake-test.c b/net/handshake/handshake-test.c

index 16ed7bfd29e4fbf39e204278950834b9e69b9f97..34fd1d9b2db861de15f7ce68828abedbf6bc771c 100644 (file)
--- a/net/handshake/handshake-test.c
+++ b/net/handshake/handshake-test.c
@@ -471,7 +471,10 @@ static void handshake_req_destroy_test1(struct kunit *test)
         handshake_req_cancel(sock->sk);
  
         /* Act */
-       fput(filp);
+       /* Ensure the close/release/put process has run to
+        * completion before checking the result.
+        */
+       __fput_sync(filp);
  
         /* Assert */
         KUNIT_EXPECT_PTR_EQ(test, handshake_req_destroy_test, req);
diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c

index 7ceb9ac6e7309372a5931f92c9b8adcc390af5f4..9d71b66183daf94e19945d75cfb5c33df6ce346c 100644 (file)
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -308,7 +308,7 @@ static void send_hsr_supervision_frame(struct hsr_port *master,
  
         skb = hsr_init_skb(master);
         if (!skb) {
-               WARN_ONCE(1, "HSR: Could not send supervision frame\n");
+               netdev_warn_once(master->dev, "HSR: Could not send supervision frame\n");
                 return;
         }
  
@@ -355,7 +355,7 @@ static void send_prp_supervision_frame(struct hsr_port *master,
  
         skb = hsr_init_skb(master);
         if (!skb) {
-               WARN_ONCE(1, "PRP: Could not send supervision frame\n");
+               netdev_warn_once(master->dev, "PRP: Could not send supervision frame\n");
                 return;
         }
  
diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c

index 80cdc6f6b34c97601961179c4839dc68c0a6d2e1..5d68cb181695d9a9f83809142a0300b8ddad5f53 100644 (file)
--- a/net/hsr/hsr_forward.c
+++ b/net/hsr/hsr_forward.c
@@ -83,7 +83,7 @@ static bool is_supervision_frame(struct hsr_priv *hsr, struct sk_buff *skb)
                 return false;
  
         /* Get next tlv */
-       total_length += sizeof(struct hsr_sup_tlv) + hsr_sup_tag->tlv.HSR_TLV_length;
+       total_length += hsr_sup_tag->tlv.HSR_TLV_length;
         if (!pskb_may_pull(skb, total_length))
                 return false;
         skb_pull(skb, total_length);
@@ -435,7 +435,7 @@ static void hsr_forward_do(struct hsr_frame_info *frame)
                         continue;
  
                 /* Don't send frame over port where it has been sent before.
-                * Also fro SAN, this shouldn't be done.
+                * Also for SAN, this shouldn't be done.
                  */
                 if (!frame->is_from_san &&
                     hsr_register_frame_out(port, frame->node_src,
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c

index 4e635dd3d3c8cca0aee00fa508368dc3d8965b93..a5a820ee2026691afdd5ca3255962b5116fca290 100644 (file)
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1628,10 +1628,12 @@ EXPORT_SYMBOL(inet_current_timestamp);
  
  int inet_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
  {
-       if (sk->sk_family == AF_INET)
+       unsigned int family = READ_ONCE(sk->sk_family);
+
+       if (family == AF_INET)
                 return ip_recv_error(sk, msg, len, addr_len);
  #if IS_ENABLED(CONFIG_IPV6)
-       if (sk->sk_family == AF_INET6)
+       if (family == AF_INET6)
                 return pingv6_ops.ipv6_recv_error(sk, msg, len, addr_len);
  #endif
         return -EINVAL;
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c

index a2e6e1fdf82be44c15daefa2a423967ccd8999f7..64aec3dff8ec85135a8d14e5618900927eb59959 100644 (file)
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -597,5 +597,6 @@ static void __exit ah4_fini(void)
  
  module_init(ah4_init);
  module_exit(ah4_fini);
+MODULE_DESCRIPTION("IPv4 AH transformation library");
  MODULE_LICENSE("GPL");
  MODULE_ALIAS_XFRM_TYPE(AF_INET, XFRM_PROTO_AH);
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c

index 9456f5bb35e5d9e97d6c05be21561b435e2b704a..0d0d725b46ad0c56b19b6356f6d3e6be8bdcae83 100644 (file)
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1125,7 +1125,8 @@ static int arp_req_get(struct arpreq *r, struct net_device *dev)
         if (neigh) {
                 if (!(READ_ONCE(neigh->nud_state) & NUD_NOARP)) {
                         read_lock_bh(&neigh->lock);
-                       memcpy(r->arp_ha.sa_data, neigh->ha, dev->addr_len);
+                       memcpy(r->arp_ha.sa_data, neigh->ha,
+                              min(dev->addr_len, sizeof(r->arp_ha.sa_data_min)));
                         r->arp_flags = arp_state_to_flags(neigh);
                         read_unlock_bh(&neigh->lock);
                         r->arp_ha.sa_family = dev->type;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c

index ca0ff15dc8fa358b81a804eda7398ecd10f00743..bc74f131fe4dfad327e71c1a8f0a4b66cdc526e5 100644 (file)
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1825,6 +1825,21 @@ done:
         return err;
  }
  
+/* Combine dev_addr_genid and dev_base_seq to detect changes.
+ */
+static u32 inet_base_seq(const struct net *net)
+{
+       u32 res = atomic_read(&net->ipv4.dev_addr_genid) +
+                 net->dev_base_seq;
+
+       /* Must not return 0 (see nl_dump_check_consistent()).
+        * Chose a value far away from 0.
+        */
+       if (!res)
+               res = 0x80000000;
+       return res;
+}
+
  static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
  {
         const struct nlmsghdr *nlh = cb->nlh;
@@ -1876,8 +1891,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
                 idx = 0;
                 head = &tgt_net->dev_index_head[h];
                 rcu_read_lock();
-               cb->seq = atomic_read(&tgt_net->ipv4.dev_addr_genid) ^
-                         tgt_net->dev_base_seq;
+               cb->seq = inet_base_seq(tgt_net);
                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
                         if (idx < s_idx)
                                 goto cont;
@@ -2278,8 +2292,7 @@ static int inet_netconf_dump_devconf(struct sk_buff *skb,
                 idx = 0;
                 head = &net->dev_index_head[h];
                 rcu_read_lock();
-               cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
-                         net->dev_base_seq;
+               cb->seq = inet_base_seq(net);
                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
                         if (idx < s_idx)
                                 goto cont;
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c

index 4ccfc104f13a517ec15e5b609502708c289e3b57..4dd9e50406720cfc90d280f61e4616d6a2e58d3c 100644 (file)
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -1247,5 +1247,6 @@ static void __exit esp4_fini(void)
  
  module_init(esp4_init);
  module_exit(esp4_fini);
+MODULE_DESCRIPTION("IPv4 ESP transformation library");
  MODULE_LICENSE("GPL");
  MODULE_ALIAS_XFRM_TYPE(AF_INET, XFRM_PROTO_ESP);
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c

index 93e9193df54461b25c61089bd5db4dd33c32dab6..308ff34002ea6b5e0620004f65ffd833087afbc1 100644 (file)
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -1130,10 +1130,33 @@ ok:
         return 0;
  
  error:
+       if (sk_hashed(sk)) {
+               spinlock_t *lock = inet_ehash_lockp(hinfo, sk->sk_hash);
+
+               sock_prot_inuse_add(net, sk->sk_prot, -1);
+
+               spin_lock(lock);
+               sk_nulls_del_node_init_rcu(sk);
+               spin_unlock(lock);
+
+               sk->sk_hash = 0;
+               inet_sk(sk)->inet_sport = 0;
+               inet_sk(sk)->inet_num = 0;
+
+               if (tw)
+                       inet_twsk_bind_unhash(tw, hinfo);
+       }
+
         spin_unlock(&head2->lock);
         if (tb_created)
                 inet_bind_bucket_destroy(hinfo->bind_bucket_cachep, tb);
-       spin_unlock_bh(&head->lock);
+       spin_unlock(&head->lock);
+
+       if (tw)
+               inet_twsk_deschedule_put(tw);
+
+       local_bh_enable();
+
         return -ENOMEM;
  }
  
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c

index 5169c3c72cffe49cef613e69889d139db867ff74..6b9cf5a24c19ff06634f7841141b8a30639b8d17 100644 (file)
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -1793,6 +1793,7 @@ static void __exit ipgre_fini(void)
  
  module_init(ipgre_init);
  module_exit(ipgre_fini);
+MODULE_DESCRIPTION("IPv4 GRE tunnels over IP library");
  MODULE_LICENSE("GPL");
  MODULE_ALIAS_RTNL_LINK("gre");
  MODULE_ALIAS_RTNL_LINK("gretap");
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c

index b06f678b03a19b806fd14764a4caad60caf02919..67d846622365e8da9c2295f76943a504d16b066f 100644 (file)
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -972,8 +972,8 @@ static int __ip_append_data(struct sock *sk,
         unsigned int maxfraglen, fragheaderlen, maxnonfragsize;
         int csummode = CHECKSUM_NONE;
         struct rtable *rt = (struct rtable *)cork->dst;
+       bool paged, hold_tskey, extra_uref = false;
         unsigned int wmem_alloc_delta = 0;
-       bool paged, extra_uref = false;
         u32 tskey = 0;
  
         skb = skb_peek_tail(queue);
@@ -982,10 +982,6 @@ static int __ip_append_data(struct sock *sk,
         mtu = cork->gso_size ? IP_MAX_MTU : cork->fragsize;
         paged = !!cork->gso_size;
  
-       if (cork->tx_flags & SKBTX_ANY_TSTAMP &&
-           READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID)
-               tskey = atomic_inc_return(&sk->sk_tskey) - 1;
-
         hh_len = LL_RESERVED_SPACE(rt->dst.dev);
  
         fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
@@ -1052,6 +1048,11 @@ static int __ip_append_data(struct sock *sk,
  
         cork->length += length;
  
+       hold_tskey = cork->tx_flags & SKBTX_ANY_TSTAMP &&
+                    READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID;
+       if (hold_tskey)
+               tskey = atomic_inc_return(&sk->sk_tskey) - 1;
+
         /* So, what's going on in the loop below?
          *
          * We use calculated fragment length to generate chained skb,
@@ -1274,6 +1275,8 @@ error:
         cork->length -= length;
         IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS);
         refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
+       if (hold_tskey)
+               atomic_dec(&sk->sk_tskey);
         return err;
  }
  
@@ -1287,6 +1290,12 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork,
         if (unlikely(!rt))
                 return -EFAULT;
  
+       cork->fragsize = ip_sk_use_pmtu(sk) ?
+                        dst_mtu(&rt->dst) : READ_ONCE(rt->dst.dev->mtu);
+
+       if (!inetdev_valid_mtu(cork->fragsize))
+               return -ENETUNREACH;
+
         /*
          * setup for corking.
          */
@@ -1303,12 +1312,6 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork,
                 cork->addr = ipc->addr;
         }
  
-       cork->fragsize = ip_sk_use_pmtu(sk) ?
-                        dst_mtu(&rt->dst) : READ_ONCE(rt->dst.dev->mtu);
-
-       if (!inetdev_valid_mtu(cork->fragsize))
-               return -ENETUNREACH;
-
         cork->gso_size = ipc->gso_size;
  
         cork->dst = &rt->dst;
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c

index 7aa9dc0e6760df6c9980252854014ab6fdd1c3f7..21d2ffa919e98b41ed325f978ae573b9f25f4d71 100644 (file)
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -1363,12 +1363,13 @@ e_inval:
   * ipv4_pktinfo_prepare - transfer some info from rtable to skb
   * @sk: socket
   * @skb: buffer
+ * @drop_dst: if true, drops skb dst
   *
   * To support IP_CMSG_PKTINFO option, we store rt_iif and specific
   * destination in skb->cb[] before dst drop.
   * This way, receiver doesn't make cache line misses to read rtable.
   */
-void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb)
+void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb, bool drop_dst)
  {
         struct in_pktinfo *pktinfo = PKTINFO_SKB_CB(skb);
         bool prepare = inet_test_bit(PKTINFO, sk) ||
@@ -1397,7 +1398,8 @@ void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb)
                 pktinfo->ipi_ifindex = 0;
                 pktinfo->ipi_spec_dst.s_addr = 0;
         }
-       skb_dst_drop(skb);
+       if (drop_dst)
+               skb_dst_drop(skb);
  }
  
  int ip_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c

index beeae624c412d752bd5ee5d459a88f57640445e9..1b6981de3f29514dac72161be02f3ac6e4625551 100644 (file)
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -554,6 +554,20 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
         return 0;
  }
  
+static void ip_tunnel_adj_headroom(struct net_device *dev, unsigned int headroom)
+{
+       /* we must cap headroom to some upperlimit, else pskb_expand_head
+        * will overflow header offsets in skb_headers_offset_update().
+        */
+       static const unsigned int max_allowed = 512;
+
+       if (headroom > max_allowed)
+               headroom = max_allowed;
+
+       if (headroom > READ_ONCE(dev->needed_headroom))
+               WRITE_ONCE(dev->needed_headroom, headroom);
+}
+
  void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
                        u8 proto, int tunnel_hlen)
  {
@@ -632,13 +646,13 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
         }
  
         headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
-       if (headroom > READ_ONCE(dev->needed_headroom))
-               WRITE_ONCE(dev->needed_headroom, headroom);
-
-       if (skb_cow_head(skb, READ_ONCE(dev->needed_headroom))) {
+       if (skb_cow_head(skb, headroom)) {
                 ip_rt_put(rt);
                 goto tx_dropped;
         }
+
+       ip_tunnel_adj_headroom(dev, headroom);
+
         iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl,
                       df, !net_eq(tunnel->net, dev_net(dev)));
         return;
@@ -818,16 +832,16 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
  
         max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
                         + rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
-       if (max_headroom > READ_ONCE(dev->needed_headroom))
-               WRITE_ONCE(dev->needed_headroom, max_headroom);
  
-       if (skb_cow_head(skb, READ_ONCE(dev->needed_headroom))) {
+       if (skb_cow_head(skb, max_headroom)) {
                 ip_rt_put(rt);
                 DEV_STATS_INC(dev, tx_dropped);
                 kfree_skb(skb);
                 return;
         }
  
+       ip_tunnel_adj_headroom(dev, max_headroom);
+
         iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl,
                       df, !net_eq(tunnel->net, dev_net(dev)));
         return;
@@ -1298,4 +1312,5 @@ void ip_tunnel_setup(struct net_device *dev, unsigned int net_id)
  }
  EXPORT_SYMBOL_GPL(ip_tunnel_setup);
  
+MODULE_DESCRIPTION("IPv4 tunnel implementation library");
  MODULE_LICENSE("GPL");
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c

index 586b1b3e35b805d46158531ae8e7b49122abbaa7..80ccd6661aa32f2b60a720a18deec26e9e2cc18d 100644 (file)
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -332,7 +332,7 @@ static int iptunnel_pmtud_build_icmpv6(struct sk_buff *skb, int mtu)
         };
         skb_reset_network_header(skb);
  
-       csum = csum_partial(icmp6h, len, 0);
+       csum = skb_checksum(skb, skb_transport_offset(skb), len, 0);
         icmp6h->icmp6_cksum = csum_ipv6_magic(&nip6h->saddr, &nip6h->daddr, len,
                                               IPPROTO_ICMPV6, csum);
  
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c

index 9ab9b3ebe0cd1a9e95f489d98c5a3d89c7c0edf6..d1d6bb28ed6e95c6e9c247bf1df1b27287bc8328 100644 (file)
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -721,6 +721,7 @@ static void __exit vti_fini(void)
  
  module_init(vti_init);
  module_exit(vti_fini);
+MODULE_DESCRIPTION("Virtual (secure) IP tunneling library");
  MODULE_LICENSE("GPL");
  MODULE_ALIAS_RTNL_LINK("vti");
  MODULE_ALIAS_NETDEV("ip_vti0");
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c

index 27b8f83c6ea200314f41a29ecfea494b9ddef2ca..03afa3871efc53b5af543e7d53283be69a02f818 100644 (file)
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -658,6 +658,7 @@ static void __exit ipip_fini(void)
  
  module_init(ipip_init);
  module_exit(ipip_fini);
+MODULE_DESCRIPTION("IP/IP protocol decoder library");
  MODULE_LICENSE("GPL");
  MODULE_ALIAS_RTNL_LINK("ipip");
  MODULE_ALIAS_NETDEV("tunl0");
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c

index 9d6f59531b3a0b0bc082e1f1febf4568368580b9..3622298365105d99c0277f1c1616fb5fc63cdc2d 100644 (file)
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1073,7 +1073,7 @@ static int ipmr_cache_report(const struct mr_table *mrt,
                 msg = (struct igmpmsg *)skb_network_header(skb);
                 msg->im_vif = vifi;
                 msg->im_vif_hi = vifi >> 8;
-               ipv4_pktinfo_prepare(mroute_sk, pkt);
+               ipv4_pktinfo_prepare(mroute_sk, pkt, false);
                 memcpy(skb->cb, pkt->cb, sizeof(skb->cb));
                 /* Add our header */
                 igmp = skb_put(skb, sizeof(struct igmphdr));
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c

index 27da9d7294c0b4fb9027bb7feb704063dc6302db..aea89326c69793f94bb8489cdf0c93b7524ba3fc 100644 (file)
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -292,7 +292,7 @@ static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb)
  
         /* Charge it to the socket. */
  
-       ipv4_pktinfo_prepare(sk, skb);
+       ipv4_pktinfo_prepare(sk, skb, true);
         if (sock_queue_rcv_skb_reason(sk, skb, &reason) < 0) {
                 kfree_skb_reason(skb, reason);
                 return NET_RX_DROP;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c

index a1c6de385ccef91fe3c3e072ac5d2a20f0394a2b..c82dc42f57c65df112f79080ff407cd98d11ce68 100644 (file)
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1786,7 +1786,17 @@ static skb_frag_t *skb_advance_to_frag(struct sk_buff *skb, u32 offset_skb,
  
  static bool can_map_frag(const skb_frag_t *frag)
  {
-       return skb_frag_size(frag) == PAGE_SIZE && !skb_frag_off(frag);
+       struct page *page;
+
+       if (skb_frag_size(frag) != PAGE_SIZE || skb_frag_off(frag))
+               return false;
+
+       page = skb_frag_page(frag);
+
+       if (PageCompound(page) || page->mapping)
+               return false;
+
+       return true;
  }
  
  static int find_next_mappable_frag(const skb_frag_t *frag,
@@ -4605,7 +4615,8 @@ static void __init tcp_struct_check(void)
         CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, prr_out);
         CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, lost_out);
         CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, sacked_out);
-       CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_read_txrx, 31);
+       CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, scaling_ratio);
+       CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_read_txrx, 32);
  
         /* RX read-mostly hotpath cache lines */
         CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, copied_seq);
diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c

index 5048c47c79b2848a1b42032dceb4884f43cd4748..4c1f836aae38b7a75b912db9e2cdc84ccfc48e56 100644 (file)
--- a/net/ipv4/tunnel4.c
+++ b/net/ipv4/tunnel4.c
@@ -294,4 +294,5 @@ static void __exit tunnel4_fini(void)
  
  module_init(tunnel4_init);
  module_exit(tunnel4_fini);
+MODULE_DESCRIPTION("IPv4 XFRM tunnel library");
  MODULE_LICENSE("GPL");
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c

index 148ffb007969f57edc4be8ec1c235062ad49b503..e474b201900f9317069a31e4b507964fe11b2297 100644 (file)
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1589,12 +1589,7 @@ int udp_init_sock(struct sock *sk)
  
  void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len)
  {
-       if (unlikely(READ_ONCE(sk->sk_peek_off) >= 0)) {
-               bool slow = lock_sock_fast(sk);
-
-               sk_peek_offset_bwd(sk, len);
-               unlock_sock_fast(sk, slow);
-       }
+       sk_peek_offset_bwd(sk, len);
  
         if (!skb_unref(skb))
                 return;
@@ -2169,7 +2164,7 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
  
         udp_csum_pull_header(skb);
  
-       ipv4_pktinfo_prepare(sk, skb);
+       ipv4_pktinfo_prepare(sk, skb, true);
         return __udp_queue_rcv_skb(sk, skb);
  
  csum_error:
diff --git a/net/ipv4/udp_tunnel_core.c b/net/ipv4/udp_tunnel_core.c

index a87defb2b16729886d20fcec53cea939c7fea4b7..860aff5f85990252607651c173a6d84006e5afe1 100644 (file)
--- a/net/ipv4/udp_tunnel_core.c
+++ b/net/ipv4/udp_tunnel_core.c
@@ -253,4 +253,5 @@ struct rtable *udp_tunnel_dst_lookup(struct sk_buff *skb,
  }
  EXPORT_SYMBOL_GPL(udp_tunnel_dst_lookup);
  
+MODULE_DESCRIPTION("IPv4 Foo over UDP tunnel driver");
  MODULE_LICENSE("GPL");
diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c

index 8489fa10658377eb0942943e537a453d781f4520..8cb266af139311b48af474fc19eff32c6d8b5e37 100644 (file)
--- a/net/ipv4/xfrm4_tunnel.c
+++ b/net/ipv4/xfrm4_tunnel.c
@@ -114,5 +114,6 @@ static void __exit ipip_fini(void)
  
  module_init(ipip_init);
  module_exit(ipip_fini);
+MODULE_DESCRIPTION("IPv4 XFRM tunnel driver");
  MODULE_LICENSE("GPL");
  MODULE_ALIAS_XFRM_TYPE(AF_INET, XFRM_PROTO_IPIP);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c

index 733ace18806c61f487d83081dc6d39d079959f77..055230b669cf21d87738a4371543c599c3476f98 100644 (file)
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -708,6 +708,22 @@ errout:
         return err;
  }
  
+/* Combine dev_addr_genid and dev_base_seq to detect changes.
+ */
+static u32 inet6_base_seq(const struct net *net)
+{
+       u32 res = atomic_read(&net->ipv6.dev_addr_genid) +
+                 net->dev_base_seq;
+
+       /* Must not return 0 (see nl_dump_check_consistent()).
+        * Chose a value far away from 0.
+        */
+       if (!res)
+               res = 0x80000000;
+       return res;
+}
+
+
  static int inet6_netconf_dump_devconf(struct sk_buff *skb,
                                       struct netlink_callback *cb)
  {
@@ -741,8 +757,7 @@ static int inet6_netconf_dump_devconf(struct sk_buff *skb,
                 idx = 0;
                 head = &net->dev_index_head[h];
                 rcu_read_lock();
-               cb->seq = atomic_read(&net->ipv6.dev_addr_genid) ^
-                         net->dev_base_seq;
+               cb->seq = inet6_base_seq(net);
                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
                         if (idx < s_idx)
                                 goto cont;
@@ -5362,7 +5377,7 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
         }
  
         rcu_read_lock();
-       cb->seq = atomic_read(&tgt_net->ipv6.dev_addr_genid) ^ tgt_net->dev_base_seq;
+       cb->seq = inet6_base_seq(tgt_net);
         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
                 idx = 0;
                 head = &tgt_net->dev_index_head[h];
@@ -5494,9 +5509,10 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh,
         }
  
         addr = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer);
-       if (!addr)
-               return -EINVAL;
-
+       if (!addr) {
+               err = -EINVAL;
+               goto errout;
+       }
         ifm = nlmsg_data(nlh);
         if (ifm->ifa_index)
                 dev = dev_get_by_index(tgt_net, ifm->ifa_index);
diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c

index 507a8353a6bdb94cd5e83aad6efd877d84cfdc85..c008d21925d7f4afa31cc55deec0ccc321cdab04 100644 (file)
--- a/net/ipv6/addrconf_core.c
+++ b/net/ipv6/addrconf_core.c
@@ -220,19 +220,26 @@ const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) {
  EXPORT_SYMBOL_GPL(ipv6_stub);
  
  /* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */
-const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT;
+const struct in6_addr in6addr_loopback __aligned(BITS_PER_LONG/8)
+       = IN6ADDR_LOOPBACK_INIT;
  EXPORT_SYMBOL(in6addr_loopback);
-const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;
+const struct in6_addr in6addr_any __aligned(BITS_PER_LONG/8)
+       = IN6ADDR_ANY_INIT;
  EXPORT_SYMBOL(in6addr_any);
-const struct in6_addr in6addr_linklocal_allnodes = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
+const struct in6_addr in6addr_linklocal_allnodes __aligned(BITS_PER_LONG/8)
+       = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
  EXPORT_SYMBOL(in6addr_linklocal_allnodes);
-const struct in6_addr in6addr_linklocal_allrouters = IN6ADDR_LINKLOCAL_ALLROUTERS_INIT;
+const struct in6_addr in6addr_linklocal_allrouters __aligned(BITS_PER_LONG/8)
+       = IN6ADDR_LINKLOCAL_ALLROUTERS_INIT;
  EXPORT_SYMBOL(in6addr_linklocal_allrouters);
-const struct in6_addr in6addr_interfacelocal_allnodes = IN6ADDR_INTERFACELOCAL_ALLNODES_INIT;
+const struct in6_addr in6addr_interfacelocal_allnodes __aligned(BITS_PER_LONG/8)
+       = IN6ADDR_INTERFACELOCAL_ALLNODES_INIT;
  EXPORT_SYMBOL(in6addr_interfacelocal_allnodes);
-const struct in6_addr in6addr_interfacelocal_allrouters = IN6ADDR_INTERFACELOCAL_ALLROUTERS_INIT;
+const struct in6_addr in6addr_interfacelocal_allrouters __aligned(BITS_PER_LONG/8)
+       = IN6ADDR_INTERFACELOCAL_ALLROUTERS_INIT;
  EXPORT_SYMBOL(in6addr_interfacelocal_allrouters);
-const struct in6_addr in6addr_sitelocal_allrouters = IN6ADDR_SITELOCAL_ALLROUTERS_INIT;
+const struct in6_addr in6addr_sitelocal_allrouters __aligned(BITS_PER_LONG/8)
+       = IN6ADDR_SITELOCAL_ALLROUTERS_INIT;
  EXPORT_SYMBOL(in6addr_sitelocal_allrouters);
  
  static void snmp6_free_dev(struct inet6_dev *idev)
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c

index 2016e90e6e1d21a49696c9933f1b77320cc71953..eb474f0987ae016b9d800e9f83d70d73171b21d2 100644 (file)
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -800,5 +800,6 @@ static void __exit ah6_fini(void)
  module_init(ah6_init);
  module_exit(ah6_fini);
  
+MODULE_DESCRIPTION("IPv6 AH transformation helpers");
  MODULE_LICENSE("GPL");
  MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_AH);
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c

index 2cc1a45742d823a793d95140910942fb83e7f331..6e6efe026cdcc2feab9a1f18fb784042b586f045 100644 (file)
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -1301,5 +1301,6 @@ static void __exit esp6_fini(void)
  module_init(esp6_init);
  module_exit(esp6_fini);
  
+MODULE_DESCRIPTION("IPv6 ESP transformation helpers");
  MODULE_LICENSE("GPL");
  MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_ESP);
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c

index 4952ae792450575d275f1565d2bc198e440b67f6..02e9ffb63af1971c0949ccd0c392b995efb41ccb 100644 (file)
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -177,6 +177,8 @@ static bool ip6_parse_tlv(bool hopbyhop,
                                 case IPV6_TLV_IOAM:
                                         if (!ipv6_hop_ioam(skb, off))
                                                 return false;
+
+                                       nh = skb_network_header(skb);
                                         break;
                                 case IPV6_TLV_JUMBO:
                                         if (!ipv6_hop_jumbo(skb, off))
@@ -943,6 +945,14 @@ static bool ipv6_hop_ioam(struct sk_buff *skb, int optoff)
                 if (!skb_valid_dst(skb))
                         ip6_route_input(skb);
  
+               /* About to mangle packet header */
+               if (skb_ensure_writable(skb, optoff + 2 + hdr->opt_len))
+                       goto drop;
+
+               /* Trace pointer may have changed */
+               trace = (struct ioam6_trace_hdr *)(skb_network_header(skb)
+                                                  + optoff + sizeof(*hdr));
+
                 ioam6_fill_trace_data(skb, ns, trace, true);
                 break;
         default:
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c

index a722a43dd668581cf4efb08ee5ab8410e5adebb7..31b86fe661aa6cd94fb5d8848900406c2db110e3 100644 (file)
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1424,11 +1424,11 @@ static int __ip6_append_data(struct sock *sk,
         bool zc = false;
         u32 tskey = 0;
         struct rt6_info *rt = (struct rt6_info *)cork->dst;
+       bool paged, hold_tskey, extra_uref = false;
         struct ipv6_txoptions *opt = v6_cork->opt;
         int csummode = CHECKSUM_NONE;
         unsigned int maxnonfragsize, headersize;
         unsigned int wmem_alloc_delta = 0;
-       bool paged, extra_uref = false;
  
         skb = skb_peek_tail(queue);
         if (!skb) {
@@ -1440,10 +1440,6 @@ static int __ip6_append_data(struct sock *sk,
         mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
         orig_mtu = mtu;
  
-       if (cork->tx_flags & SKBTX_ANY_TSTAMP &&
-           READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID)
-               tskey = atomic_inc_return(&sk->sk_tskey) - 1;
-
         hh_len = LL_RESERVED_SPACE(rt->dst.dev);
  
         fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
@@ -1538,6 +1534,11 @@ emsgsize:
                         flags &= ~MSG_SPLICE_PAGES;
         }
  
+       hold_tskey = cork->tx_flags & SKBTX_ANY_TSTAMP &&
+                    READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID;
+       if (hold_tskey)
+               tskey = atomic_inc_return(&sk->sk_tskey) - 1;
+
         /*
          * Let's try using as much space as possible.
          * Use MTU if total length of the message fits into the MTU.
@@ -1794,6 +1795,8 @@ error:
         cork->length -= length;
         IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
         refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
+       if (hold_tskey)
+               atomic_dec(&sk->sk_tskey);
         return err;
  }
  
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c

index 46c19bd4899011d53b4feb84e25013c01ddce701..9bbabf750a21e251d4e8f9e3059c707505f5ce32 100644 (file)
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -796,8 +796,8 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb,
                                                 struct sk_buff *skb),
                          bool log_ecn_err)
  {
-       const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
-       int err;
+       const struct ipv6hdr *ipv6h;
+       int nh, err;
  
         if ((!(tpi->flags & TUNNEL_CSUM) &&
              (tunnel->parms.i_flags & TUNNEL_CSUM)) ||
@@ -829,7 +829,6 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb,
                         goto drop;
                 }
  
-               ipv6h = ipv6_hdr(skb);
                 skb->protocol = eth_type_trans(skb, tunnel->dev);
                 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
         } else {
@@ -837,7 +836,23 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb,
                 skb_reset_mac_header(skb);
         }
  
+       /* Save offset of outer header relative to skb->head,
+        * because we are going to reset the network header to the inner header
+        * and might change skb->head.
+        */
+       nh = skb_network_header(skb) - skb->head;
+
         skb_reset_network_header(skb);
+
+       if (!pskb_inet_may_pull(skb)) {
+               DEV_STATS_INC(tunnel->dev, rx_length_errors);
+               DEV_STATS_INC(tunnel->dev, rx_errors);
+               goto drop;
+       }
+
+       /* Get the outer header. */
+       ipv6h = (struct ipv6hdr *)(skb->head + nh);
+
         memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
  
         __skb_tunnel_rx(skb, tunnel->dev, tunnel->net);
diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c

index a7bf0327b380be90bfdcc2182ed3a4296a0e814f..c99053189ea8a13be63927290576655e8da0c0fb 100644 (file)
--- a/net/ipv6/ip6_udp_tunnel.c
+++ b/net/ipv6/ip6_udp_tunnel.c
@@ -182,4 +182,5 @@ struct dst_entry *udp_tunnel6_dst_lookup(struct sk_buff *skb,
  }
  EXPORT_SYMBOL_GPL(udp_tunnel6_dst_lookup);
  
+MODULE_DESCRIPTION("IPv6 Foo over UDP tunnel driver");
  MODULE_LICENSE("GPL");
diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c

index 83d2a8be263fb7bdd0cbe820168b1aac9a4336b2..6a16a5bd0d910bca55a87c55580cbd1cae71bede 100644 (file)
--- a/net/ipv6/mip6.c
+++ b/net/ipv6/mip6.c
@@ -405,6 +405,7 @@ static void __exit mip6_fini(void)
  module_init(mip6_init);
  module_exit(mip6_fini);
  
+MODULE_DESCRIPTION("IPv6 Mobility driver");
  MODULE_LICENSE("GPL");
  MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_DSTOPTS);
  MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_ROUTING);
diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c

index 29346a6eec9ffed46b00153c4a6cb0295a327ceb..35508abd76f43d771ed7e66f29bc143af4a81977 100644 (file)
--- a/net/ipv6/seg6.c
+++ b/net/ipv6/seg6.c
@@ -512,22 +512,24 @@ int __init seg6_init(void)
  {
         int err;
  
-       err = genl_register_family(&seg6_genl_family);
+       err = register_pernet_subsys(&ip6_segments_ops);
         if (err)
                 goto out;
  
-       err = register_pernet_subsys(&ip6_segments_ops);
+       err = genl_register_family(&seg6_genl_family);
         if (err)
-               goto out_unregister_genl;
+               goto out_unregister_pernet;
  
  #ifdef CONFIG_IPV6_SEG6_LWTUNNEL
         err = seg6_iptunnel_init();
         if (err)
-               goto out_unregister_pernet;
+               goto out_unregister_genl;
  
         err = seg6_local_init();
-       if (err)
-               goto out_unregister_pernet;
+       if (err) {
+               seg6_iptunnel_exit();
+               goto out_unregister_genl;
+       }
  #endif
  
  #ifdef CONFIG_IPV6_SEG6_HMAC
@@ -548,11 +550,11 @@ out_unregister_iptun:
  #endif
  #endif
  #ifdef CONFIG_IPV6_SEG6_LWTUNNEL
-out_unregister_pernet:
-       unregister_pernet_subsys(&ip6_segments_ops);
-#endif
  out_unregister_genl:
         genl_unregister_family(&seg6_genl_family);
+#endif
+out_unregister_pernet:
+       unregister_pernet_subsys(&ip6_segments_ops);
         goto out;
  }
  
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c

index cc24cefdb85c0944c03c019b1c4214302d18e2c8..5e9f625b76e36b9a61c6c2db0b4163e78dca549a 100644 (file)
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1956,6 +1956,7 @@ xfrm_tunnel_failed:
  
  module_init(sit_init);
  module_exit(sit_cleanup);
+MODULE_DESCRIPTION("IPv6-in-IPv4 tunnel SIT driver");
  MODULE_LICENSE("GPL");
  MODULE_ALIAS_RTNL_LINK("sit");
  MODULE_ALIAS_NETDEV("sit0");
diff --git a/net/ipv6/tunnel6.c b/net/ipv6/tunnel6.c

index 00e8d8b1c9a75fa1a820ac85eba91e3e24750c01..dc4ea9b11794e800eb027855e59a56fa6197df0b 100644 (file)
--- a/net/ipv6/tunnel6.c
+++ b/net/ipv6/tunnel6.c
@@ -302,4 +302,5 @@ static void __exit tunnel6_fini(void)
  
  module_init(tunnel6_init);
  module_exit(tunnel6_fini);
+MODULE_DESCRIPTION("IP-in-IPv6 tunnel driver");
  MODULE_LICENSE("GPL");
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c

index 1323f2f6928e2abf277e9ce7bd06025cd0049031..f6cb94f82cc3a2b40717a0c4406801dd26ac18c3 100644 (file)
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -401,5 +401,6 @@ static void __exit xfrm6_tunnel_fini(void)
  
  module_init(xfrm6_tunnel_init);
  module_exit(xfrm6_tunnel_fini);
+MODULE_DESCRIPTION("IPv6 XFRM tunnel driver");
  MODULE_LICENSE("GPL");
  MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_IPV6);
diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c

index 6334f64f04d5f28c7e01e959d18b343d7c641336..b0b3e9c5af44fdd83b0a108bdeb0f3f6a3ffb85e 100644 (file)
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -156,7 +156,7 @@ static char iucv_error_pathid[16] = "INVALID PATHID";
  static LIST_HEAD(iucv_handler_list);
  
  /*
- * iucv_path_table: an array of iucv_path structures.
+ * iucv_path_table: array of pointers to iucv_path structures.
   */
  static struct iucv_path **iucv_path_table;
  static unsigned long iucv_max_pathid;
@@ -544,7 +544,7 @@ static int iucv_enable(void)
  
         cpus_read_lock();
         rc = -ENOMEM;
-       alloc_size = iucv_max_pathid * sizeof(struct iucv_path);
+       alloc_size = iucv_max_pathid * sizeof(*iucv_path_table);
         iucv_path_table = kzalloc(alloc_size, GFP_KERNEL);
         if (!iucv_path_table)
                 goto out;
diff --git a/net/key/af_key.c b/net/key/af_key.c

index d68d01804dc7bcf7df78ff6968518298fe9d596a..f79fb99271ed84b8fe981a2b34a25b6abcf9d8e0 100644 (file)
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -3924,5 +3924,6 @@ out_unregister_key_proto:
  
  module_init(ipsec_pfkey_init);
  module_exit(ipsec_pfkey_exit);
+MODULE_DESCRIPTION("PF_KEY socket helpers");
  MODULE_LICENSE("GPL");
  MODULE_ALIAS_NETPROTO(PF_KEY);
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c

index dd3153966173db09d42de02fa3ad4d44d05620f4..7bf14cf9ffaa967483ac0ee01e3f8e835754cd57 100644 (file)
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -627,7 +627,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
  
  back_from_confirm:
         lock_sock(sk);
-       ulen = len + skb_queue_empty(&sk->sk_write_queue) ? transhdrlen : 0;
+       ulen = len + (skb_queue_empty(&sk->sk_write_queue) ? transhdrlen : 0);
         err = ip6_append_data(sk, ip_generic_getfrag, msg,
                               ulen, transhdrlen, &ipc6,
                               &fl6, (struct rt6_info *)dst,
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c

index 20551cfb7da6d8dd098c906477895e26c080fe32..fde1140d899efc7ba02e6bc3998cb857ef30df14 100644 (file)
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -226,6 +226,8 @@ static int llc_ui_release(struct socket *sock)
         }
         netdev_put(llc->dev, &llc->dev_tracker);
         sock_put(sk);
+       sock_orphan(sk);
+       sock->sk = NULL;
         llc_sk_free(sk);
  out:
         return 0;
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c

index 489dd97f51724a86053a9c4e9269487c4c7e928b..327682995c9260c9c7498ff9b322ecf5d59c6717 100644 (file)
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -5,7 +5,7 @@
   * Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net>
   * Copyright 2013-2015  Intel Mobile Communications GmbH
   * Copyright (C) 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2022 Intel Corporation
+ * Copyright (C) 2018-2024 Intel Corporation
   */
  
  #include <linux/ieee80211.h>
@@ -987,7 +987,8 @@ static int
  ieee80211_set_unsol_bcast_probe_resp(struct ieee80211_sub_if_data *sdata,
                                      struct cfg80211_unsol_bcast_probe_resp *params,
                                      struct ieee80211_link_data *link,
-                                    struct ieee80211_bss_conf *link_conf)
+                                    struct ieee80211_bss_conf *link_conf,
+                                    u64 *changed)
  {
         struct unsol_bcast_probe_resp_data *new, *old = NULL;
  
@@ -1011,7 +1012,8 @@ ieee80211_set_unsol_bcast_probe_resp(struct ieee80211_sub_if_data *sdata,
                 RCU_INIT_POINTER(link->u.ap.unsol_bcast_probe_resp, NULL);
         }
  
-       return BSS_CHANGED_UNSOL_BCAST_PROBE_RESP;
+       *changed |= BSS_CHANGED_UNSOL_BCAST_PROBE_RESP;
+       return 0;
  }
  
  static int ieee80211_set_ftm_responder_params(
@@ -1450,10 +1452,9 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
  
         err = ieee80211_set_unsol_bcast_probe_resp(sdata,
                                                    &params->unsol_bcast_probe_resp,
-                                                  link, link_conf);
+                                                  link, link_conf, &changed);
         if (err < 0)
                 goto error;
-       changed |= err;
  
         err = drv_start_ap(sdata->local, sdata, link_conf);
         if (err) {
@@ -1525,10 +1526,9 @@ static int ieee80211_change_beacon(struct wiphy *wiphy, struct net_device *dev,
  
         err = ieee80211_set_unsol_bcast_probe_resp(sdata,
                                                    &params->unsol_bcast_probe_resp,
-                                                  link, link_conf);
+                                                  link, link_conf, &changed);
         if (err < 0)
                 return err;
-       changed |= err;
  
         if (beacon->he_bss_color_valid &&
             beacon->he_bss_color.enabled != link_conf->he_bss_color.enabled) {
@@ -1869,6 +1869,8 @@ static int sta_link_apply_parameters(struct ieee80211_local *local,
                                               sband->band);
         }
  
+       ieee80211_sta_set_rx_nss(link_sta);
+
         return ret;
  }
  
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c

index dce5606ed66da5a31a476aec16bb55412e1e72cc..68596ef78b15ee9596f6f81e8dd2d2f82c1d56cd 100644 (file)
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -997,8 +997,8 @@ static void add_link_files(struct ieee80211_link_data *link,
         }
  }
  
-void ieee80211_debugfs_add_netdev(struct ieee80211_sub_if_data *sdata,
-                                 bool mld_vif)
+static void ieee80211_debugfs_add_netdev(struct ieee80211_sub_if_data *sdata,
+                                        bool mld_vif)
  {
         char buf[10+IFNAMSIZ];
  
diff --git a/net/mac80211/debugfs_netdev.h b/net/mac80211/debugfs_netdev.h

index b226b1aae88a5d4205c0206b351b63e6ee54c2a2..a02ec0a413f61468dded52076fbfef9a35da17b0 100644 (file)
--- a/net/mac80211/debugfs_netdev.h
+++ b/net/mac80211/debugfs_netdev.h
@@ -11,8 +11,6 @@
  #include "ieee80211_i.h"
  
  #ifdef CONFIG_MAC80211_DEBUGFS
-void ieee80211_debugfs_add_netdev(struct ieee80211_sub_if_data *sdata,
-                                 bool mld_vif);
  void ieee80211_debugfs_remove_netdev(struct ieee80211_sub_if_data *sdata);
  void ieee80211_debugfs_rename_netdev(struct ieee80211_sub_if_data *sdata);
  void ieee80211_debugfs_recreate_netdev(struct ieee80211_sub_if_data *sdata,
@@ -24,9 +22,6 @@ void ieee80211_link_debugfs_remove(struct ieee80211_link_data *link);
  void ieee80211_link_debugfs_drv_add(struct ieee80211_link_data *link);
  void ieee80211_link_debugfs_drv_remove(struct ieee80211_link_data *link);
  #else
-static inline void ieee80211_debugfs_add_netdev(
-       struct ieee80211_sub_if_data *sdata, bool mld_vif)
-{}
  static inline void ieee80211_debugfs_remove_netdev(
         struct ieee80211_sub_if_data *sdata)
  {}
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c

index e4e7c0b38cb6efcbb65786d071f436e09c7bf322..11c4caa4748e4038a2c758e34ae8dbf762e8159e 100644 (file)
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1783,7 +1783,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
         /* need to do this after the switch so vif.type is correct */
         ieee80211_link_setup(&sdata->deflink);
  
-       ieee80211_debugfs_add_netdev(sdata, false);
+       ieee80211_debugfs_recreate_netdev(sdata, false);
  }
  
  static int ieee80211_runtime_change_iftype(struct ieee80211_sub_if_data *sdata,
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c

index 073105deb42481f2792a33fa0341d509f7a95017..2022a26eb8811492ef8029de9e89dfd4bbb5f101 100644 (file)
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -8,7 +8,7 @@
   * Copyright 2007, Michael Wu <flamingice@sourmilk.net>
   * Copyright 2013-2014  Intel Mobile Communications GmbH
   * Copyright (C) 2015 - 2017 Intel Deutschland GmbH
- * Copyright (C) 2018 - 2023 Intel Corporation
+ * Copyright (C) 2018 - 2024 Intel Corporation
   */
  
  #include <linux/delay.h>
@@ -2918,6 +2918,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
  
         /* other links will be destroyed */
         sdata->deflink.u.mgd.bss = NULL;
+       sdata->deflink.smps_mode = IEEE80211_SMPS_OFF;
  
         netif_carrier_off(sdata->dev);
  
@@ -5045,9 +5046,6 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
         if (!link)
                 return 0;
  
-       /* will change later if needed */
-       link->smps_mode = IEEE80211_SMPS_OFF;
-
         /*
          * If this fails (possibly due to channel context sharing
          * on incompatible channels, e.g. 80+80 and 160 sharing the
@@ -7096,6 +7094,7 @@ void ieee80211_mgd_setup_link(struct ieee80211_link_data *link)
         link->u.mgd.p2p_noa_index = -1;
         link->u.mgd.conn_flags = 0;
         link->conf->bssid = link->u.mgd.bssid;
+       link->smps_mode = IEEE80211_SMPS_OFF;
  
         wiphy_work_init(&link->u.mgd.request_smps_work,
                         ieee80211_request_smps_mgd_work);
@@ -7309,6 +7308,75 @@ out_err:
         return err;
  }
  
+static bool ieee80211_mgd_csa_present(struct ieee80211_sub_if_data *sdata,
+                                     const struct cfg80211_bss_ies *ies,
+                                     u8 cur_channel, bool ignore_ecsa)
+{
+       const struct element *csa_elem, *ecsa_elem;
+       struct ieee80211_channel_sw_ie *csa = NULL;
+       struct ieee80211_ext_chansw_ie *ecsa = NULL;
+
+       if (!ies)
+               return false;
+
+       csa_elem = cfg80211_find_elem(WLAN_EID_CHANNEL_SWITCH,
+                                     ies->data, ies->len);
+       if (csa_elem && csa_elem->datalen == sizeof(*csa))
+               csa = (void *)csa_elem->data;
+
+       ecsa_elem = cfg80211_find_elem(WLAN_EID_EXT_CHANSWITCH_ANN,
+                                      ies->data, ies->len);
+       if (ecsa_elem && ecsa_elem->datalen == sizeof(*ecsa))
+               ecsa = (void *)ecsa_elem->data;
+
+       if (csa && csa->count == 0)
+               csa = NULL;
+       if (csa && !csa->mode && csa->new_ch_num == cur_channel)
+               csa = NULL;
+
+       if (ecsa && ecsa->count == 0)
+               ecsa = NULL;
+       if (ecsa && !ecsa->mode && ecsa->new_ch_num == cur_channel)
+               ecsa = NULL;
+
+       if (ignore_ecsa && ecsa) {
+               sdata_info(sdata,
+                          "Ignoring ECSA in probe response - was considered stuck!\n");
+               return csa;
+       }
+
+       return csa || ecsa;
+}
+
+static bool ieee80211_mgd_csa_in_process(struct ieee80211_sub_if_data *sdata,
+                                        struct cfg80211_bss *bss)
+{
+       u8 cur_channel;
+       bool ret;
+
+       cur_channel = ieee80211_frequency_to_channel(bss->channel->center_freq);
+
+       rcu_read_lock();
+       if (ieee80211_mgd_csa_present(sdata,
+                                     rcu_dereference(bss->beacon_ies),
+                                     cur_channel, false)) {
+               ret = true;
+               goto out;
+       }
+
+       if (ieee80211_mgd_csa_present(sdata,
+                                     rcu_dereference(bss->proberesp_ies),
+                                     cur_channel, bss->proberesp_ecsa_stuck)) {
+               ret = true;
+               goto out;
+       }
+
+       ret = false;
+out:
+       rcu_read_unlock();
+       return ret;
+}
+
  /* config hooks */
  int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
                        struct cfg80211_auth_request *req)
@@ -7317,7 +7385,6 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
         struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
         struct ieee80211_mgd_auth_data *auth_data;
         struct ieee80211_link_data *link;
-       const struct element *csa_elem, *ecsa_elem;
         u16 auth_alg;
         int err;
         bool cont_auth;
@@ -7360,21 +7427,10 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
         if (ifmgd->assoc_data)
                 return -EBUSY;
  
-       rcu_read_lock();
-       csa_elem = ieee80211_bss_get_elem(req->bss, WLAN_EID_CHANNEL_SWITCH);
-       ecsa_elem = ieee80211_bss_get_elem(req->bss,
-                                          WLAN_EID_EXT_CHANSWITCH_ANN);
-       if ((csa_elem &&
-            csa_elem->datalen == sizeof(struct ieee80211_channel_sw_ie) &&
-            ((struct ieee80211_channel_sw_ie *)csa_elem->data)->count != 0) ||
-           (ecsa_elem &&
-            ecsa_elem->datalen == sizeof(struct ieee80211_ext_chansw_ie) &&
-            ((struct ieee80211_ext_chansw_ie *)ecsa_elem->data)->count != 0)) {
-               rcu_read_unlock();
+       if (ieee80211_mgd_csa_in_process(sdata, req->bss)) {
                 sdata_info(sdata, "AP is in CSA process, reject auth\n");
                 return -EINVAL;
         }
-       rcu_read_unlock();
  
         auth_data = kzalloc(sizeof(*auth_data) + req->auth_data_len +
                             req->ie_len, GFP_KERNEL);
@@ -7684,7 +7740,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
         struct ieee80211_local *local = sdata->local;
         struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
         struct ieee80211_mgd_assoc_data *assoc_data;
-       const struct element *ssid_elem, *csa_elem, *ecsa_elem;
+       const struct element *ssid_elem;
         struct ieee80211_vif_cfg *vif_cfg = &sdata->vif.cfg;
         ieee80211_conn_flags_t conn_flags = 0;
         struct ieee80211_link_data *link;
@@ -7707,23 +7763,15 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
  
         cbss = req->link_id < 0 ? req->bss : req->links[req->link_id].bss;
  
-       rcu_read_lock();
-       ssid_elem = ieee80211_bss_get_elem(cbss, WLAN_EID_SSID);
-       if (!ssid_elem || ssid_elem->datalen > sizeof(assoc_data->ssid)) {
-               rcu_read_unlock();
+       if (ieee80211_mgd_csa_in_process(sdata, cbss)) {
+               sdata_info(sdata, "AP is in CSA process, reject assoc\n");
                 kfree(assoc_data);
                 return -EINVAL;
         }
  
-       csa_elem = ieee80211_bss_get_elem(cbss, WLAN_EID_CHANNEL_SWITCH);
-       ecsa_elem = ieee80211_bss_get_elem(cbss, WLAN_EID_EXT_CHANSWITCH_ANN);
-       if ((csa_elem &&
-            csa_elem->datalen == sizeof(struct ieee80211_channel_sw_ie) &&
-            ((struct ieee80211_channel_sw_ie *)csa_elem->data)->count != 0) ||
-           (ecsa_elem &&
-            ecsa_elem->datalen == sizeof(struct ieee80211_ext_chansw_ie) &&
-            ((struct ieee80211_ext_chansw_ie *)ecsa_elem->data)->count != 0)) {
-               sdata_info(sdata, "AP is in CSA process, reject assoc\n");
+       rcu_read_lock();
+       ssid_elem = ieee80211_bss_get_elem(cbss, WLAN_EID_SSID);
+       if (!ssid_elem || ssid_elem->datalen > sizeof(assoc_data->ssid)) {
                 rcu_read_unlock();
                 kfree(assoc_data);
                 return -EINVAL;
@@ -7998,8 +8046,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
  
                 rcu_read_lock();
                 beacon_ies = rcu_dereference(req->bss->beacon_ies);
-
-               if (beacon_ies) {
+               if (!beacon_ies) {
                         /*
                          * Wait up to one beacon interval ...
                          * should this be more if we miss one?
@@ -8080,6 +8127,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
                 ieee80211_report_disconnect(sdata, frame_buf,
                                             sizeof(frame_buf), true,
                                             req->reason_code, false);
+               drv_mgd_complete_tx(sdata->local, sdata, &info);
                 return 0;
         }
  
diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c

index d5ea5f5bcf3a069e1d4dc5dd2638275e58aae51f..9d33fd2377c88af8ec38b6e398d103449f3b03b8 100644 (file)
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -119,7 +119,8 @@ void rate_control_rate_update(struct ieee80211_local *local,
                 rcu_read_unlock();
         }
  
-       drv_sta_rc_update(local, sta->sdata, &sta->sta, changed);
+       if (sta->uploaded)
+               drv_sta_rc_update(local, sta->sdata, &sta->sta, changed);
  }
  
  int ieee80211_rate_control_register(const struct rate_control_ops *ops)
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c

index 645355e5f1bc7baba435db18c0c8d8243a4649c6..f9d5842601fa9433ba0303f3b6572129b3e2f9fe 100644 (file)
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -9,7 +9,7 @@
   * Copyright 2007, Michael Wu <flamingice@sourmilk.net>
   * Copyright 2013-2015  Intel Mobile Communications GmbH
   * Copyright 2016-2017  Intel Deutschland GmbH
- * Copyright (C) 2018-2023 Intel Corporation
+ * Copyright (C) 2018-2024 Intel Corporation
   */
  
  #include <linux/if_arp.h>
@@ -237,14 +237,18 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
  }
  
  static bool ieee80211_scan_accept_presp(struct ieee80211_sub_if_data *sdata,
+                                       struct ieee80211_channel *channel,
                                         u32 scan_flags, const u8 *da)
  {
         if (!sdata)
                 return false;
-       /* accept broadcast for OCE */
-       if (scan_flags & NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP &&
-           is_broadcast_ether_addr(da))
+
+       /* accept broadcast on 6 GHz and for OCE */
+       if (is_broadcast_ether_addr(da) &&
+           (channel->band == NL80211_BAND_6GHZ ||
+            scan_flags & NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP))
                 return true;
+
         if (scan_flags & NL80211_SCAN_FLAG_RANDOM_ADDR)
                 return true;
         return ether_addr_equal(da, sdata->vif.addr);
@@ -293,6 +297,12 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb)
                 wiphy_delayed_work_queue(local->hw.wiphy, &local->scan_work, 0);
         }
  
+       channel = ieee80211_get_channel_khz(local->hw.wiphy,
+                                           ieee80211_rx_status_to_khz(rx_status));
+
+       if (!channel || channel->flags & IEEE80211_CHAN_DISABLED)
+               return;
+
         if (ieee80211_is_probe_resp(mgmt->frame_control)) {
                 struct cfg80211_scan_request *scan_req;
                 struct cfg80211_sched_scan_request *sched_scan_req;
@@ -310,19 +320,15 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb)
                 /* ignore ProbeResp to foreign address or non-bcast (OCE)
                  * unless scanning with randomised address
                  */
-               if (!ieee80211_scan_accept_presp(sdata1, scan_req_flags,
+               if (!ieee80211_scan_accept_presp(sdata1, channel,
+                                                scan_req_flags,
                                                  mgmt->da) &&
-                   !ieee80211_scan_accept_presp(sdata2, sched_scan_req_flags,
+                   !ieee80211_scan_accept_presp(sdata2, channel,
+                                                sched_scan_req_flags,
                                                  mgmt->da))
                         return;
         }
  
-       channel = ieee80211_get_channel_khz(local->hw.wiphy,
-                                       ieee80211_rx_status_to_khz(rx_status));
-
-       if (!channel || channel->flags & IEEE80211_CHAN_DISABLED)
-               return;
-
         bss = ieee80211_bss_info_update(local, rx_status,
                                         mgmt, skb->len,
                                         channel);
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c

index 68a48abc72876c4abaa8cf4c95d7c2793dc07813..6fbb15b65902c754ea4c2487a40d4ce0ed38634a 100644 (file)
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -5,7 +5,7 @@
   * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
   * Copyright 2007      Johannes Berg <johannes@sipsolutions.net>
   * Copyright 2013-2014  Intel Mobile Communications GmbH
- * Copyright (C) 2018-2022 Intel Corporation
+ * Copyright (C) 2018-2024 Intel Corporation
   *
   * Transmit and frame generation functions.
   */
@@ -3100,10 +3100,11 @@ void ieee80211_check_fast_xmit(struct sta_info *sta)
                         /* DA SA BSSID */
                         build.da_offs = offsetof(struct ieee80211_hdr, addr1);
                         build.sa_offs = offsetof(struct ieee80211_hdr, addr2);
+                       rcu_read_lock();
                         link = rcu_dereference(sdata->link[tdls_link_id]);
-                       if (WARN_ON_ONCE(!link))
-                               break;
-                       memcpy(hdr->addr3, link->u.mgd.bssid, ETH_ALEN);
+                       if (!WARN_ON_ONCE(!link))
+                               memcpy(hdr->addr3, link->u.mgd.bssid, ETH_ALEN);
+                       rcu_read_unlock();
                         build.hdr_len = 24;
                         break;
                 }
@@ -3926,6 +3927,7 @@ begin:
                         goto begin;
  
                 skb = __skb_dequeue(&tx.skbs);
+               info = IEEE80211_SKB_CB(skb);
  
                 if (!skb_queue_empty(&tx.skbs)) {
                         spin_lock_bh(&fq->lock);
@@ -3970,7 +3972,7 @@ begin:
         }
  
  encap_out:
-       IEEE80211_SKB_CB(skb)->control.vif = vif;
+       info->control.vif = vif;
  
         if (tx.sta &&
             wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL)) {
diff --git a/net/mac80211/wbrf.c b/net/mac80211/wbrf.c

index a05c5b971789c796658a04e0a41a3748fdce75da..3a8612309137312f88dfe6bad79ac854fe76fcc1 100644 (file)
--- a/net/mac80211/wbrf.c
+++ b/net/mac80211/wbrf.c
@@ -23,8 +23,6 @@ void ieee80211_check_wbrf_support(struct ieee80211_local *local)
                 return;
  
         local->wbrf_supported = acpi_amd_wbrf_supported_producer(dev);
-       dev_dbg(dev, "WBRF is %s supported\n",
-               local->wbrf_supported ? "" : "not");
  }
  
  static void get_chan_freq_boundary(u32 center_freq, u32 bandwidth, u64 *start, u64 *end)
diff --git a/net/mctp/route.c b/net/mctp/route.c

index 7a47a58aa54b446acf7451ba6bdc1b834adda327..ceee44ea09d97b025a490058403cf435e3337ef5 100644 (file)
--- a/net/mctp/route.c
+++ b/net/mctp/route.c
@@ -663,7 +663,7 @@ struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk,
         spin_unlock_irqrestore(&mns->keys_lock, flags);
  
         if (!tagbits) {
-               kfree(key);
+               mctp_key_unref(key);
                 return ERR_PTR(-EBUSY);
         }
  
@@ -888,7 +888,7 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt,
                 dev = dev_get_by_index_rcu(sock_net(sk), cb->ifindex);
                 if (!dev) {
                         rcu_read_unlock();
-                       return rc;
+                       goto out_free;
                 }
                 rt->dev = __mctp_dev_get(dev);
                 rcu_read_unlock();
@@ -903,7 +903,8 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt,
                 rt->mtu = 0;
  
         } else {
-               return -EINVAL;
+               rc = -EINVAL;
+               goto out_free;
         }
  
         spin_lock_irqsave(&rt->dev->addrs_lock, flags);
@@ -966,12 +967,17 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt,
                 rc = mctp_do_fragment_route(rt, skb, mtu, tag);
         }
  
+       /* route output functions consume the skb, even on error */
+       skb = NULL;
+
  out_release:
         if (!ext_rt)
                 mctp_route_release(rt);
  
         mctp_dev_put(tmp_rt.dev);
  
+out_free:
+       kfree_skb(skb);
         return rc;
  }
  
diff --git a/net/mptcp/diag.c b/net/mptcp/diag.c

index a536586742f28c1ddd54c79e62eb56fea267a8fa..7017dd60659dc7133318c1c82e3f429bea3a5d57 100644 (file)
--- a/net/mptcp/diag.c
+++ b/net/mptcp/diag.c
@@ -13,17 +13,22 @@
  #include <uapi/linux/mptcp.h>
  #include "protocol.h"
  
-static int subflow_get_info(const struct sock *sk, struct sk_buff *skb)
+static int subflow_get_info(struct sock *sk, struct sk_buff *skb)
  {
         struct mptcp_subflow_context *sf;
         struct nlattr *start;
         u32 flags = 0;
+       bool slow;
         int err;
  
+       if (inet_sk_state_load(sk) == TCP_LISTEN)
+               return 0;
+
         start = nla_nest_start_noflag(skb, INET_ULP_INFO_MPTCP);
         if (!start)
                 return -EMSGSIZE;
  
+       slow = lock_sock_fast(sk);
         rcu_read_lock();
         sf = rcu_dereference(inet_csk(sk)->icsk_ulp_data);
         if (!sf) {
@@ -63,17 +68,19 @@ static int subflow_get_info(const struct sock *sk, struct sk_buff *skb)
                         sf->map_data_len) ||
             nla_put_u32(skb, MPTCP_SUBFLOW_ATTR_FLAGS, flags) ||
             nla_put_u8(skb, MPTCP_SUBFLOW_ATTR_ID_REM, sf->remote_id) ||
-           nla_put_u8(skb, MPTCP_SUBFLOW_ATTR_ID_LOC, sf->local_id)) {
+           nla_put_u8(skb, MPTCP_SUBFLOW_ATTR_ID_LOC, subflow_get_local_id(sf))) {
                 err = -EMSGSIZE;
                 goto nla_failure;
         }
  
         rcu_read_unlock();
+       unlock_sock_fast(sk, slow);
         nla_nest_end(skb, start);
         return 0;
  
  nla_failure:
         rcu_read_unlock();
+       unlock_sock_fast(sk, slow);
         nla_nest_cancel(skb, start);
         return err;
  }
diff --git a/net/mptcp/fastopen.c b/net/mptcp/fastopen.c

index 74698582a2859e4d6ea40abaf8d0f31943e0d128..ad28da655f8bcc75e4ea05d4de2e2ab073ebc2c5 100644 (file)
--- a/net/mptcp/fastopen.c
+++ b/net/mptcp/fastopen.c
@@ -59,13 +59,12 @@ void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subf
         mptcp_data_unlock(sk);
  }
  
-void mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow,
-                                  const struct mptcp_options_received *mp_opt)
+void __mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow,
+                                    const struct mptcp_options_received *mp_opt)
  {
         struct sock *sk = (struct sock *)msk;
         struct sk_buff *skb;
  
-       mptcp_data_lock(sk);
         skb = skb_peek_tail(&sk->sk_receive_queue);
         if (skb) {
                 WARN_ON_ONCE(MPTCP_SKB_CB(skb)->end_seq);
@@ -77,5 +76,4 @@ void mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_
         }
  
         pr_debug("msk=%p ack_seq=%llx", msk, msk->ack_seq);
-       mptcp_data_unlock(sk);
  }
diff --git a/net/mptcp/options.c b/net/mptcp/options.c

index d2527d189a799319c068a5b76a5816cc7a905861..63fc0758c22d45e356d4edadff991b7e88ec8659 100644 (file)
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -962,9 +962,7 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
                 /* subflows are fully established as soon as we get any
                  * additional ack, including ADD_ADDR.
                  */
-               subflow->fully_established = 1;
-               WRITE_ONCE(msk->fully_established, true);
-               goto check_notify;
+               goto set_fully_established;
         }
  
         /* If the first established packet does not contain MP_CAPABLE + data
@@ -983,10 +981,13 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
         if (mp_opt->deny_join_id0)
                 WRITE_ONCE(msk->pm.remote_deny_join_id0, true);
  
-set_fully_established:
         if (unlikely(!READ_ONCE(msk->pm.server_side)))
                 pr_warn_once("bogus mpc option on established client sk");
-       mptcp_subflow_fully_established(subflow, mp_opt);
+
+set_fully_established:
+       mptcp_data_lock((struct sock *)msk);
+       __mptcp_subflow_fully_established(msk, subflow, mp_opt);
+       mptcp_data_unlock((struct sock *)msk);
  
  check_notify:
         /* if the subflow is not already linked into the conn_list, we can't
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c

index 287a60381eae6e39c68d49a65530ea5bdc8a6675..58d17d9604e78fde24795219e53e18646c53b0de 100644 (file)
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -396,19 +396,6 @@ void mptcp_pm_free_anno_list(struct mptcp_sock *msk)
         }
  }
  
-static bool lookup_address_in_vec(const struct mptcp_addr_info *addrs, unsigned int nr,
-                                 const struct mptcp_addr_info *addr)
-{
-       int i;
-
-       for (i = 0; i < nr; i++) {
-               if (addrs[i].id == addr->id)
-                       return true;
-       }
-
-       return false;
-}
-
  /* Fill all the remote addresses into the array addrs[],
   * and return the array size.
   */
@@ -440,18 +427,34 @@ static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk,
                 msk->pm.subflows++;
                 addrs[i++] = remote;
         } else {
+               DECLARE_BITMAP(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1);
+
+               /* Forbid creation of new subflows matching existing
+                * ones, possibly already created by incoming ADD_ADDR
+                */
+               bitmap_zero(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1);
+               mptcp_for_each_subflow(msk, subflow)
+                       if (READ_ONCE(subflow->local_id) == local->id)
+                               __set_bit(subflow->remote_id, unavail_id);
+
                 mptcp_for_each_subflow(msk, subflow) {
                         ssk = mptcp_subflow_tcp_sock(subflow);
                         remote_address((struct sock_common *)ssk, &addrs[i]);
-                       addrs[i].id = subflow->remote_id;
+                       addrs[i].id = READ_ONCE(subflow->remote_id);
                         if (deny_id0 && !addrs[i].id)
                                 continue;
  
+                       if (test_bit(addrs[i].id, unavail_id))
+                               continue;
+
                         if (!mptcp_pm_addr_families_match(sk, local, &addrs[i]))
                                 continue;
  
-                       if (!lookup_address_in_vec(addrs, i, &addrs[i]) &&
-                           msk->pm.subflows < subflows_max) {
+                       if (msk->pm.subflows < subflows_max) {
+                               /* forbid creating multiple address towards
+                                * this id
+                                */
+                               __set_bit(addrs[i].id, unavail_id);
                                 msk->pm.subflows++;
                                 i++;
                         }
@@ -799,18 +802,18 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk,
  
                 mptcp_for_each_subflow_safe(msk, subflow, tmp) {
                         struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+                       u8 remote_id = READ_ONCE(subflow->remote_id);
                         int how = RCV_SHUTDOWN | SEND_SHUTDOWN;
-                       u8 id = subflow->local_id;
+                       u8 id = subflow_get_local_id(subflow);
  
-                       if (rm_type == MPTCP_MIB_RMADDR && subflow->remote_id != rm_id)
+                       if (rm_type == MPTCP_MIB_RMADDR && remote_id != rm_id)
                                 continue;
                         if (rm_type == MPTCP_MIB_RMSUBFLOW && !mptcp_local_id_match(msk, id, rm_id))
                                 continue;
  
                         pr_debug(" -> %s rm_list_ids[%d]=%u local_id=%u remote_id=%u mpc_id=%u",
                                  rm_type == MPTCP_MIB_RMADDR ? "address" : "subflow",
-                                i, rm_id, subflow->local_id, subflow->remote_id,
-                                msk->mpc_endpoint_id);
+                                i, rm_id, id, remote_id, msk->mpc_endpoint_id);
                         spin_unlock_bh(&msk->pm.lock);
                         mptcp_subflow_shutdown(sk, ssk, how);
  
@@ -901,7 +904,8 @@ static void __mptcp_pm_release_addr_entry(struct mptcp_pm_addr_entry *entry)
  }
  
  static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet,
-                                            struct mptcp_pm_addr_entry *entry)
+                                            struct mptcp_pm_addr_entry *entry,
+                                            bool needs_id)
  {
         struct mptcp_pm_addr_entry *cur, *del_entry = NULL;
         unsigned int addr_max;
@@ -949,7 +953,7 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet,
                 }
         }
  
-       if (!entry->addr.id) {
+       if (!entry->addr.id && needs_id) {
  find_next:
                 entry->addr.id = find_next_zero_bit(pernet->id_bitmap,
                                                     MPTCP_PM_MAX_ADDR_ID + 1,
@@ -960,7 +964,7 @@ find_next:
                 }
         }
  
-       if (!entry->addr.id)
+       if (!entry->addr.id && needs_id)
                 goto out;
  
         __set_bit(entry->addr.id, pernet->id_bitmap);
@@ -1092,7 +1096,7 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc
         entry->ifindex = 0;
         entry->flags = MPTCP_PM_ADDR_FLAG_IMPLICIT;
         entry->lsk = NULL;
-       ret = mptcp_pm_nl_append_new_local_addr(pernet, entry);
+       ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, true);
         if (ret < 0)
                 kfree(entry);
  
@@ -1285,6 +1289,18 @@ next:
         return 0;
  }
  
+static bool mptcp_pm_has_addr_attr_id(const struct nlattr *attr,
+                                     struct genl_info *info)
+{
+       struct nlattr *tb[MPTCP_PM_ADDR_ATTR_MAX + 1];
+
+       if (!nla_parse_nested_deprecated(tb, MPTCP_PM_ADDR_ATTR_MAX, attr,
+                                        mptcp_pm_address_nl_policy, info->extack) &&
+           tb[MPTCP_PM_ADDR_ATTR_ID])
+               return true;
+       return false;
+}
+
  int mptcp_pm_nl_add_addr_doit(struct sk_buff *skb, struct genl_info *info)
  {
         struct nlattr *attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR];
@@ -1326,7 +1342,8 @@ int mptcp_pm_nl_add_addr_doit(struct sk_buff *skb, struct genl_info *info)
                         goto out_free;
                 }
         }
-       ret = mptcp_pm_nl_append_new_local_addr(pernet, entry);
+       ret = mptcp_pm_nl_append_new_local_addr(pernet, entry,
+                                               !mptcp_pm_has_addr_attr_id(attr, info));
         if (ret < 0) {
                 GENL_SET_ERR_MSG_FMT(info, "too many addresses or duplicate one: %d", ret);
                 goto out_free;
@@ -1980,7 +1997,7 @@ static int mptcp_event_add_subflow(struct sk_buff *skb, const struct sock *ssk)
         if (WARN_ON_ONCE(!sf))
                 return -EINVAL;
  
-       if (nla_put_u8(skb, MPTCP_ATTR_LOC_ID, sf->local_id))
+       if (nla_put_u8(skb, MPTCP_ATTR_LOC_ID, subflow_get_local_id(sf)))
                 return -EMSGSIZE;
  
         if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, sf->remote_id))
diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c

index efecbe3cf41533324a5df71da39f775dd2078ca6..bc97cc30f013abdba076aa93596dd213e9353eb8 100644 (file)
--- a/net/mptcp/pm_userspace.c
+++ b/net/mptcp/pm_userspace.c
@@ -26,7 +26,8 @@ void mptcp_free_local_addr_list(struct mptcp_sock *msk)
  }
  
  static int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk,
-                                                   struct mptcp_pm_addr_entry *entry)
+                                                   struct mptcp_pm_addr_entry *entry,
+                                                   bool needs_id)
  {
         DECLARE_BITMAP(id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
         struct mptcp_pm_addr_entry *match = NULL;
@@ -41,7 +42,7 @@ static int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk,
         spin_lock_bh(&msk->pm.lock);
         list_for_each_entry(e, &msk->pm.userspace_pm_local_addr_list, list) {
                 addr_match = mptcp_addresses_equal(&e->addr, &entry->addr, true);
-               if (addr_match && entry->addr.id == 0)
+               if (addr_match && entry->addr.id == 0 && needs_id)
                         entry->addr.id = e->addr.id;
                 id_match = (e->addr.id == entry->addr.id);
                 if (addr_match && id_match) {
@@ -64,7 +65,7 @@ static int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk,
                 }
  
                 *e = *entry;
-               if (!e->addr.id)
+               if (!e->addr.id && needs_id)
                         e->addr.id = find_next_zero_bit(id_bitmap,
                                                         MPTCP_PM_MAX_ADDR_ID + 1,
                                                         1);
@@ -130,10 +131,21 @@ int mptcp_userspace_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk,
  int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk,
                                     struct mptcp_addr_info *skc)
  {
-       struct mptcp_pm_addr_entry new_entry;
+       struct mptcp_pm_addr_entry *entry = NULL, *e, new_entry;
         __be16 msk_sport =  ((struct inet_sock *)
                              inet_sk((struct sock *)msk))->inet_sport;
  
+       spin_lock_bh(&msk->pm.lock);
+       list_for_each_entry(e, &msk->pm.userspace_pm_local_addr_list, list) {
+               if (mptcp_addresses_equal(&e->addr, skc, false)) {
+                       entry = e;
+                       break;
+               }
+       }
+       spin_unlock_bh(&msk->pm.lock);
+       if (entry)
+               return entry->addr.id;
+
         memset(&new_entry, 0, sizeof(struct mptcp_pm_addr_entry));
         new_entry.addr = *skc;
         new_entry.addr.id = 0;
@@ -142,7 +154,7 @@ int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk,
         if (new_entry.addr.port == msk_sport)
                 new_entry.addr.port = 0;
  
-       return mptcp_userspace_pm_append_new_local_addr(msk, &new_entry);
+       return mptcp_userspace_pm_append_new_local_addr(msk, &new_entry, true);
  }
  
  int mptcp_pm_nl_announce_doit(struct sk_buff *skb, struct genl_info *info)
@@ -187,7 +199,7 @@ int mptcp_pm_nl_announce_doit(struct sk_buff *skb, struct genl_info *info)
                 goto announce_err;
         }
  
-       err = mptcp_userspace_pm_append_new_local_addr(msk, &addr_val);
+       err = mptcp_userspace_pm_append_new_local_addr(msk, &addr_val, false);
         if (err < 0) {
                 GENL_SET_ERR_MSG(info, "did not match address and id");
                 goto announce_err;
@@ -222,7 +234,7 @@ static int mptcp_userspace_pm_remove_id_zero_address(struct mptcp_sock *msk,
  
         lock_sock(sk);
         mptcp_for_each_subflow(msk, subflow) {
-               if (subflow->local_id == 0) {
+               if (READ_ONCE(subflow->local_id) == 0) {
                         has_id_0 = true;
                         break;
                 }
@@ -367,7 +379,7 @@ int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info)
         }
  
         local.addr = addr_l;
-       err = mptcp_userspace_pm_append_new_local_addr(msk, &local);
+       err = mptcp_userspace_pm_append_new_local_addr(msk, &local, false);
         if (err < 0) {
                 GENL_SET_ERR_MSG(info, "did not match address and id");
                 goto create_err;
@@ -483,6 +495,16 @@ int mptcp_pm_nl_subflow_destroy_doit(struct sk_buff *skb, struct genl_info *info
                 goto destroy_err;
         }
  
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+       if (addr_l.family == AF_INET && ipv6_addr_v4mapped(&addr_r.addr6)) {
+               ipv6_addr_set_v4mapped(addr_l.addr.s_addr, &addr_l.addr6);
+               addr_l.family = AF_INET6;
+       }
+       if (addr_r.family == AF_INET && ipv6_addr_v4mapped(&addr_l.addr6)) {
+               ipv6_addr_set_v4mapped(addr_r.addr.s_addr, &addr_r.addr6);
+               addr_r.family = AF_INET6;
+       }
+#endif
         if (addr_l.family != addr_r.family) {
                 GENL_SET_ERR_MSG(info, "address families do not match");
                 err = -EINVAL;
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c

index 3ed4709a75096025683149b2d4af0a1d5f24141c..7833a49f6214a194a282bba92671e9cdd945ad92 100644 (file)
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -85,7 +85,7 @@ static int __mptcp_socket_create(struct mptcp_sock *msk)
         subflow->subflow_id = msk->subflow_id++;
  
         /* This is the first subflow, always with id 0 */
-       subflow->local_id_valid = 1;
+       WRITE_ONCE(subflow->local_id, 0);
         mptcp_sock_graft(msk->first, sk->sk_socket);
         iput(SOCK_INODE(ssock));
  
@@ -1260,6 +1260,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
                 mpext = mptcp_get_ext(skb);
                 if (!mptcp_skb_can_collapse_to(data_seq, skb, mpext)) {
                         TCP_SKB_CB(skb)->eor = 1;
+                       tcp_mark_push(tcp_sk(ssk), skb);
                         goto alloc_skb;
                 }
  
@@ -1505,8 +1506,11 @@ static void mptcp_update_post_push(struct mptcp_sock *msk,
  
  void mptcp_check_and_set_pending(struct sock *sk)
  {
-       if (mptcp_send_head(sk))
-               mptcp_sk(sk)->push_pending |= BIT(MPTCP_PUSH_PENDING);
+       if (mptcp_send_head(sk)) {
+               mptcp_data_lock(sk);
+               mptcp_sk(sk)->cb_flags |= BIT(MPTCP_PUSH_PENDING);
+               mptcp_data_unlock(sk);
+       }
  }
  
  static int __subflow_push_pending(struct sock *sk, struct sock *ssk,
@@ -1960,6 +1964,9 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
         if (copied <= 0)
                 return;
  
+       if (!msk->rcvspace_init)
+               mptcp_rcv_space_init(msk, msk->first);
+
         msk->rcvq_space.copied += copied;
  
         mstamp = div_u64(tcp_clock_ns(), NSEC_PER_USEC);
@@ -2314,9 +2321,6 @@ bool __mptcp_retransmit_pending_data(struct sock *sk)
         if (__mptcp_check_fallback(msk))
                 return false;
  
-       if (tcp_rtx_and_write_queues_empty(sk))
-               return false;
-
         /* the closing socket has some data untransmitted and/or unacked:
          * some data in the mptcp rtx queue has not really xmitted yet.
          * keep it simple and re-inject the whole mptcp level rtx queue
@@ -3145,7 +3149,6 @@ static int mptcp_disconnect(struct sock *sk, int flags)
         mptcp_destroy_common(msk, MPTCP_CF_FASTCLOSE);
         WRITE_ONCE(msk->flags, 0);
         msk->cb_flags = 0;
-       msk->push_pending = 0;
         msk->recovery = false;
         msk->can_ack = false;
         msk->fully_established = false;
@@ -3161,6 +3164,7 @@ static int mptcp_disconnect(struct sock *sk, int flags)
         msk->bytes_received = 0;
         msk->bytes_sent = 0;
         msk->bytes_retrans = 0;
+       msk->rcvspace_init = 0;
  
         WRITE_ONCE(sk->sk_shutdown, 0);
         sk_error_report(sk);
@@ -3174,8 +3178,50 @@ static struct ipv6_pinfo *mptcp_inet6_sk(const struct sock *sk)
  
         return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
  }
+
+static void mptcp_copy_ip6_options(struct sock *newsk, const struct sock *sk)
+{
+       const struct ipv6_pinfo *np = inet6_sk(sk);
+       struct ipv6_txoptions *opt;
+       struct ipv6_pinfo *newnp;
+
+       newnp = inet6_sk(newsk);
+
+       rcu_read_lock();
+       opt = rcu_dereference(np->opt);
+       if (opt) {
+               opt = ipv6_dup_options(newsk, opt);
+               if (!opt)
+                       net_warn_ratelimited("%s: Failed to copy ip6 options\n", __func__);
+       }
+       RCU_INIT_POINTER(newnp->opt, opt);
+       rcu_read_unlock();
+}
  #endif
  
+static void mptcp_copy_ip_options(struct sock *newsk, const struct sock *sk)
+{
+       struct ip_options_rcu *inet_opt, *newopt = NULL;
+       const struct inet_sock *inet = inet_sk(sk);
+       struct inet_sock *newinet;
+
+       newinet = inet_sk(newsk);
+
+       rcu_read_lock();
+       inet_opt = rcu_dereference(inet->inet_opt);
+       if (inet_opt) {
+               newopt = sock_kmalloc(newsk, sizeof(*inet_opt) +
+                                     inet_opt->opt.optlen, GFP_ATOMIC);
+               if (newopt)
+                       memcpy(newopt, inet_opt, sizeof(*inet_opt) +
+                              inet_opt->opt.optlen);
+               else
+                       net_warn_ratelimited("%s: Failed to copy ip options\n", __func__);
+       }
+       RCU_INIT_POINTER(newinet->inet_opt, newopt);
+       rcu_read_unlock();
+}
+
  struct sock *mptcp_sk_clone_init(const struct sock *sk,
                                  const struct mptcp_options_received *mp_opt,
                                  struct sock *ssk,
@@ -3183,6 +3229,7 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
  {
         struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
         struct sock *nsk = sk_clone_lock(sk, GFP_ATOMIC);
+       struct mptcp_subflow_context *subflow;
         struct mptcp_sock *msk;
  
         if (!nsk)
@@ -3195,6 +3242,13 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
  
         __mptcp_init_sock(nsk);
  
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+       if (nsk->sk_family == AF_INET6)
+               mptcp_copy_ip6_options(nsk, sk);
+       else
+#endif
+               mptcp_copy_ip_options(nsk, sk);
+
         msk = mptcp_sk(nsk);
         msk->local_key = subflow_req->local_key;
         msk->token = subflow_req->token;
@@ -3206,7 +3260,7 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
         msk->write_seq = subflow_req->idsn + 1;
         msk->snd_nxt = msk->write_seq;
         msk->snd_una = msk->write_seq;
-       msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd;
+       msk->wnd_end = msk->snd_nxt + tcp_sk(ssk)->snd_wnd;
         msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq;
         mptcp_init_sched(msk, mptcp_sk(sk)->sched);
  
@@ -3223,7 +3277,8 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
  
         /* The msk maintain a ref to each subflow in the connections list */
         WRITE_ONCE(msk->first, ssk);
-       list_add(&mptcp_subflow_ctx(ssk)->node, &msk->conn_list);
+       subflow = mptcp_subflow_ctx(ssk);
+       list_add(&subflow->node, &msk->conn_list);
         sock_hold(ssk);
  
         /* new mpc subflow takes ownership of the newly
@@ -3238,6 +3293,9 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
         __mptcp_propagate_sndbuf(nsk, ssk);
  
         mptcp_rcv_space_init(msk, ssk);
+
+       if (mp_opt->suboptions & OPTION_MPTCP_MPC_ACK)
+               __mptcp_subflow_fully_established(msk, subflow, mp_opt);
         bh_unlock_sock(nsk);
  
         /* note: the newly allocated socket refcount is 2 now */
@@ -3248,6 +3306,7 @@ void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk)
  {
         const struct tcp_sock *tp = tcp_sk(ssk);
  
+       msk->rcvspace_init = 1;
         msk->rcvq_space.copied = 0;
         msk->rcvq_space.rtt_us = 0;
  
@@ -3258,8 +3317,6 @@ void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk)
                                       TCP_INIT_CWND * tp->advmss);
         if (msk->rcvq_space.space == 0)
                 msk->rcvq_space.space = TCP_INIT_CWND * TCP_MSS_DEFAULT;
-
-       WRITE_ONCE(msk->wnd_end, msk->snd_nxt + tcp_sk(ssk)->snd_wnd);
  }
  
  void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags)
@@ -3333,8 +3390,7 @@ static void mptcp_release_cb(struct sock *sk)
         struct mptcp_sock *msk = mptcp_sk(sk);
  
         for (;;) {
-               unsigned long flags = (msk->cb_flags & MPTCP_FLAGS_PROCESS_CTX_NEED) |
-                                     msk->push_pending;
+               unsigned long flags = (msk->cb_flags & MPTCP_FLAGS_PROCESS_CTX_NEED);
                 struct list_head join_list;
  
                 if (!flags)
@@ -3350,7 +3406,6 @@ static void mptcp_release_cb(struct sock *sk)
                  *    datapath acquires the msk socket spinlock while helding
                  *    the subflow socket lock
                  */
-               msk->push_pending = 0;
                 msk->cb_flags &= ~flags;
                 spin_unlock_bh(&sk->sk_lock.slock);
  
@@ -3478,13 +3533,8 @@ void mptcp_finish_connect(struct sock *ssk)
          * accessing the field below
          */
         WRITE_ONCE(msk->local_key, subflow->local_key);
-       WRITE_ONCE(msk->write_seq, subflow->idsn + 1);
-       WRITE_ONCE(msk->snd_nxt, msk->write_seq);
-       WRITE_ONCE(msk->snd_una, msk->write_seq);
  
         mptcp_pm_new_connection(msk, ssk, 0);
-
-       mptcp_rcv_space_init(msk, ssk);
  }
  
  void mptcp_sock_graft(struct sock *sk, struct socket *parent)
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h

index 3517f2d24a226ff0be1adec800044810f1aa31c6..07f6242afc1ae09d3c17aadfe7bb104eb3cf177c 100644 (file)
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -286,7 +286,6 @@ struct mptcp_sock {
         int             rmem_released;
         unsigned long   flags;
         unsigned long   cb_flags;
-       unsigned long   push_pending;
         bool            recovery;               /* closing subflow write queue reinjected */
         bool            can_ack;
         bool            fully_established;
@@ -305,7 +304,8 @@ struct mptcp_sock {
                         nodelay:1,
                         fastopening:1,
                         in_accept_queue:1,
-                       free_first:1;
+                       free_first:1,
+                       rcvspace_init:1;
         struct work_struct work;
         struct sk_buff  *ooo_last_skb;
         struct rb_root  out_of_order_queue;
@@ -491,10 +491,9 @@ struct mptcp_subflow_context {
                 remote_key_valid : 1,        /* received the peer key from */
                 disposable : 1,     /* ctx can be free at ulp release time */
                 stale : 1,          /* unable to snd/rcv data, do not use for xmit */
-               local_id_valid : 1, /* local_id is correctly initialized */
                 valid_csum_seen : 1,        /* at least one csum validated */
                 is_mptfo : 1,       /* subflow is doing TFO */
-               __unused : 9;
+               __unused : 10;
         bool    data_avail;
         bool    scheduled;
         u32     remote_nonce;
@@ -505,7 +504,7 @@ struct mptcp_subflow_context {
                 u8      hmac[MPTCPOPT_HMAC_LEN]; /* MPJ subflow only */
                 u64     iasn;       /* initial ack sequence number, MPC subflows only */
         };
-       u8      local_id;
+       s16     local_id;           /* if negative not initialized yet */
         u8      remote_id;
         u8      reset_seen:1;
         u8      reset_transient:1;
@@ -556,6 +555,7 @@ mptcp_subflow_ctx_reset(struct mptcp_subflow_context *subflow)
  {
         memset(&subflow->reset, 0, sizeof(subflow->reset));
         subflow->request_mptcp = 1;
+       WRITE_ONCE(subflow->local_id, -1);
  }
  
  static inline u64
@@ -622,8 +622,9 @@ unsigned int mptcp_stale_loss_cnt(const struct net *net);
  unsigned int mptcp_close_timeout(const struct sock *sk);
  int mptcp_get_pm_type(const struct net *net);
  const char *mptcp_get_scheduler(const struct net *net);
-void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
-                                    const struct mptcp_options_received *mp_opt);
+void __mptcp_subflow_fully_established(struct mptcp_sock *msk,
+                                      struct mptcp_subflow_context *subflow,
+                                      const struct mptcp_options_received *mp_opt);
  bool __mptcp_retransmit_pending_data(struct sock *sk);
  void mptcp_check_and_set_pending(struct sock *sk);
  void __mptcp_push_pending(struct sock *sk, unsigned int flags);
@@ -789,6 +790,16 @@ static inline bool mptcp_data_fin_enabled(const struct mptcp_sock *msk)
                READ_ONCE(msk->write_seq) == READ_ONCE(msk->snd_nxt);
  }
  
+static inline void mptcp_write_space(struct sock *sk)
+{
+       if (sk_stream_is_writeable(sk)) {
+               /* pairs with memory barrier in mptcp_poll */
+               smp_mb();
+               if (test_and_clear_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags))
+                       sk_stream_write_space(sk);
+       }
+}
+
  static inline void __mptcp_sync_sndbuf(struct sock *sk)
  {
         struct mptcp_subflow_context *subflow;
@@ -807,6 +818,7 @@ static inline void __mptcp_sync_sndbuf(struct sock *sk)
  
         /* the msk max wmem limit is <nr_subflows> * tcp wmem[2] */
         WRITE_ONCE(sk->sk_sndbuf, new_sndbuf);
+       mptcp_write_space(sk);
  }
  
  /* The called held both the msk socket and the subflow socket locks,
@@ -837,16 +849,6 @@ static inline void mptcp_propagate_sndbuf(struct sock *sk, struct sock *ssk)
         local_bh_enable();
  }
  
-static inline void mptcp_write_space(struct sock *sk)
-{
-       if (sk_stream_is_writeable(sk)) {
-               /* pairs with memory barrier in mptcp_poll */
-               smp_mb();
-               if (test_and_clear_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags))
-                       sk_stream_write_space(sk);
-       }
-}
-
  void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags);
  
  #define MPTCP_TOKEN_MAX_RETRIES        4
@@ -952,8 +954,8 @@ void mptcp_event_pm_listener(const struct sock *ssk,
                              enum mptcp_event_type event);
  bool mptcp_userspace_pm_active(const struct mptcp_sock *msk);
  
-void mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow,
-                                  const struct mptcp_options_received *mp_opt);
+void __mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow,
+                                    const struct mptcp_options_received *mp_opt);
  void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subflow,
                                               struct request_sock *req);
  
@@ -1021,6 +1023,15 @@ int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc);
  int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc);
  int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc);
  
+static inline u8 subflow_get_local_id(const struct mptcp_subflow_context *subflow)
+{
+       int local_id = READ_ONCE(subflow->local_id);
+
+       if (local_id < 0)
+               return 0;
+       return local_id;
+}
+
  void __init mptcp_pm_nl_init(void);
  void mptcp_pm_nl_work(struct mptcp_sock *msk);
  void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk,
@@ -1128,7 +1139,8 @@ static inline bool subflow_simultaneous_connect(struct sock *sk)
  {
         struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
  
-       return (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_FIN_WAIT1) &&
+       return (1 << sk->sk_state) &
+              (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2 | TCPF_CLOSING) &&
                is_active_ssk(subflow) &&
                !subflow->conn_finished;
  }
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c

index 0dcb721c89d193e8943aa414610fcf4284d51f38..71ba86246ff893c5bf65f77802510b52c3d68fd4 100644 (file)
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -421,29 +421,26 @@ static bool subflow_use_different_dport(struct mptcp_sock *msk, const struct soc
  
  void __mptcp_sync_state(struct sock *sk, int state)
  {
+       struct mptcp_subflow_context *subflow;
         struct mptcp_sock *msk = mptcp_sk(sk);
+       struct sock *ssk = msk->first;
+
+       subflow = mptcp_subflow_ctx(ssk);
+       __mptcp_propagate_sndbuf(sk, ssk);
+       if (!msk->rcvspace_init)
+               mptcp_rcv_space_init(msk, ssk);
  
-       __mptcp_propagate_sndbuf(sk, msk->first);
         if (sk->sk_state == TCP_SYN_SENT) {
+               /* subflow->idsn is always available is TCP_SYN_SENT state,
+                * even for the FASTOPEN scenarios
+                */
+               WRITE_ONCE(msk->write_seq, subflow->idsn + 1);
+               WRITE_ONCE(msk->snd_nxt, msk->write_seq);
                 mptcp_set_state(sk, state);
                 sk->sk_state_change(sk);
         }
  }
  
-static void mptcp_propagate_state(struct sock *sk, struct sock *ssk)
-{
-       struct mptcp_sock *msk = mptcp_sk(sk);
-
-       mptcp_data_lock(sk);
-       if (!sock_owned_by_user(sk)) {
-               __mptcp_sync_state(sk, ssk->sk_state);
-       } else {
-               msk->pending_state = ssk->sk_state;
-               __set_bit(MPTCP_SYNC_STATE, &msk->cb_flags);
-       }
-       mptcp_data_unlock(sk);
-}
-
  static void subflow_set_remote_key(struct mptcp_sock *msk,
                                    struct mptcp_subflow_context *subflow,
                                    const struct mptcp_options_received *mp_opt)
@@ -465,6 +462,31 @@ static void subflow_set_remote_key(struct mptcp_sock *msk,
         atomic64_set(&msk->rcv_wnd_sent, subflow->iasn);
  }
  
+static void mptcp_propagate_state(struct sock *sk, struct sock *ssk,
+                                 struct mptcp_subflow_context *subflow,
+                                 const struct mptcp_options_received *mp_opt)
+{
+       struct mptcp_sock *msk = mptcp_sk(sk);
+
+       mptcp_data_lock(sk);
+       if (mp_opt) {
+               /* Options are available only in the non fallback cases
+                * avoid updating rx path fields otherwise
+                */
+               WRITE_ONCE(msk->snd_una, subflow->idsn + 1);
+               WRITE_ONCE(msk->wnd_end, subflow->idsn + 1 + tcp_sk(ssk)->snd_wnd);
+               subflow_set_remote_key(msk, subflow, mp_opt);
+       }
+
+       if (!sock_owned_by_user(sk)) {
+               __mptcp_sync_state(sk, ssk->sk_state);
+       } else {
+               msk->pending_state = ssk->sk_state;
+               __set_bit(MPTCP_SYNC_STATE, &msk->cb_flags);
+       }
+       mptcp_data_unlock(sk);
+}
+
  static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
  {
         struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
@@ -499,10 +521,9 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
                 if (mp_opt.deny_join_id0)
                         WRITE_ONCE(msk->pm.remote_deny_join_id0, true);
                 subflow->mp_capable = 1;
-               subflow_set_remote_key(msk, subflow, &mp_opt);
                 MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVEACK);
                 mptcp_finish_connect(sk);
-               mptcp_propagate_state(parent, sk);
+               mptcp_propagate_state(parent, sk, subflow, &mp_opt);
         } else if (subflow->request_join) {
                 u8 hmac[SHA256_DIGEST_SIZE];
  
@@ -514,7 +535,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
                 subflow->backup = mp_opt.backup;
                 subflow->thmac = mp_opt.thmac;
                 subflow->remote_nonce = mp_opt.nonce;
-               subflow->remote_id = mp_opt.join_id;
+               WRITE_ONCE(subflow->remote_id, mp_opt.join_id);
                 pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u backup=%d",
                          subflow, subflow->thmac, subflow->remote_nonce,
                          subflow->backup);
@@ -545,8 +566,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
                 }
         } else if (mptcp_check_fallback(sk)) {
  fallback:
-               mptcp_rcv_space_init(msk, sk);
-               mptcp_propagate_state(parent, sk);
+               mptcp_propagate_state(parent, sk, subflow, NULL);
         }
         return;
  
@@ -557,8 +577,8 @@ do_reset:
  
  static void subflow_set_local_id(struct mptcp_subflow_context *subflow, int local_id)
  {
-       subflow->local_id = local_id;
-       subflow->local_id_valid = 1;
+       WARN_ON_ONCE(local_id < 0 || local_id > 255);
+       WRITE_ONCE(subflow->local_id, local_id);
  }
  
  static int subflow_chk_local_id(struct sock *sk)
@@ -567,7 +587,7 @@ static int subflow_chk_local_id(struct sock *sk)
         struct mptcp_sock *msk = mptcp_sk(subflow->conn);
         int err;
  
-       if (likely(subflow->local_id_valid))
+       if (likely(subflow->local_id >= 0))
                 return 0;
  
         err = mptcp_pm_get_local_id(msk, (struct sock_common *)sk);
@@ -731,17 +751,16 @@ void mptcp_subflow_drop_ctx(struct sock *ssk)
         kfree_rcu(ctx, rcu);
  }
  
-void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
-                                    const struct mptcp_options_received *mp_opt)
+void __mptcp_subflow_fully_established(struct mptcp_sock *msk,
+                                      struct mptcp_subflow_context *subflow,
+                                      const struct mptcp_options_received *mp_opt)
  {
-       struct mptcp_sock *msk = mptcp_sk(subflow->conn);
-
         subflow_set_remote_key(msk, subflow, mp_opt);
         subflow->fully_established = 1;
         WRITE_ONCE(msk->fully_established, true);
  
         if (subflow->is_mptfo)
-               mptcp_fastopen_gen_msk_ackseq(msk, subflow, mp_opt);
+               __mptcp_fastopen_gen_msk_ackseq(msk, subflow, mp_opt);
  }
  
  static struct sock *subflow_syn_recv_sock(const struct sock *sk,
@@ -834,7 +853,6 @@ create_child:
                          * mpc option
                          */
                         if (mp_opt.suboptions & OPTION_MPTCP_MPC_ACK) {
-                               mptcp_subflow_fully_established(ctx, &mp_opt);
                                 mptcp_pm_fully_established(owner, child);
                                 ctx->pm_notified = 1;
                         }
@@ -1549,7 +1567,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
         pr_debug("msk=%p remote_token=%u local_id=%d remote_id=%d", msk,
                  remote_token, local_id, remote_id);
         subflow->remote_token = remote_token;
-       subflow->remote_id = remote_id;
+       WRITE_ONCE(subflow->remote_id, remote_id);
         subflow->request_join = 1;
         subflow->request_bkup = !!(flags & MPTCP_PM_ADDR_FLAG_BACKUP);
         subflow->subflow_id = msk->subflow_id++;
@@ -1713,6 +1731,7 @@ static struct mptcp_subflow_context *subflow_create_ctx(struct sock *sk,
         pr_debug("subflow=%p", ctx);
  
         ctx->tcp_sock = sk;
+       WRITE_ONCE(ctx->local_id, -1);
  
         return ctx;
  }
@@ -1744,10 +1763,9 @@ static void subflow_state_change(struct sock *sk)
         msk = mptcp_sk(parent);
         if (subflow_simultaneous_connect(sk)) {
                 mptcp_do_fallback(sk);
-               mptcp_rcv_space_init(msk, sk);
                 pr_fallback(msk);
                 subflow->conn_finished = 1;
-               mptcp_propagate_state(parent, sk);
+               mptcp_propagate_state(parent, sk, subflow, NULL);
         }
  
         /* as recvmsg() does not acquire the subflow socket for ssk selection
@@ -1949,14 +1967,14 @@ static void subflow_ulp_clone(const struct request_sock *req,
                 new_ctx->idsn = subflow_req->idsn;
  
                 /* this is the first subflow, id is always 0 */
-               new_ctx->local_id_valid = 1;
+               subflow_set_local_id(new_ctx, 0);
         } else if (subflow_req->mp_join) {
                 new_ctx->ssn_offset = subflow_req->ssn_offset;
                 new_ctx->mp_join = 1;
                 new_ctx->fully_established = 1;
                 new_ctx->remote_key_valid = 1;
                 new_ctx->backup = subflow_req->backup;
-               new_ctx->remote_id = subflow_req->remote_id;
+               WRITE_ONCE(new_ctx->remote_id, subflow_req->remote_id);
                 new_ctx->token = subflow_req->token;
                 new_ctx->thmac = subflow_req->thmac;
  
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h

index 21f7860e8fa1fd4f1f46c9ad278bfeb261090152..cb48a2b9cb9fd708c2f99adadfd4b21671b44a4a 100644 (file)
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -30,6 +30,7 @@
  #define mtype_del              IPSET_TOKEN(MTYPE, _del)
  #define mtype_list             IPSET_TOKEN(MTYPE, _list)
  #define mtype_gc               IPSET_TOKEN(MTYPE, _gc)
+#define mtype_cancel_gc                IPSET_TOKEN(MTYPE, _cancel_gc)
  #define mtype                  MTYPE
  
  #define get_ext(set, map, id)  ((map)->extensions + ((set)->dsize * (id)))
@@ -59,9 +60,6 @@ mtype_destroy(struct ip_set *set)
  {
         struct mtype *map = set->data;
  
-       if (SET_WITH_TIMEOUT(set))
-               del_timer_sync(&map->gc);
-
         if (set->dsize && set->extensions & IPSET_EXT_DESTROY)
                 mtype_ext_cleanup(set);
         ip_set_free(map->members);
@@ -290,6 +288,15 @@ mtype_gc(struct timer_list *t)
         add_timer(&map->gc);
  }
  
+static void
+mtype_cancel_gc(struct ip_set *set)
+{
+       struct mtype *map = set->data;
+
+       if (SET_WITH_TIMEOUT(set))
+               del_timer_sync(&map->gc);
+}
+
  static const struct ip_set_type_variant mtype = {
         .kadt   = mtype_kadt,
         .uadt   = mtype_uadt,
@@ -303,6 +310,7 @@ static const struct ip_set_type_variant mtype = {
         .head   = mtype_head,
         .list   = mtype_list,
         .same_set = mtype_same_set,
+       .cancel_gc = mtype_cancel_gc,
  };
  
  #endif /* __IP_SET_BITMAP_IP_GEN_H */
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c

index 4c133e06be1de2f8972b50ac87e6b0b7bfc9ac6d..3184cc6be4c9d375fb2bda49d1bbec6623618c77 100644 (file)
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -1154,6 +1154,7 @@ static int ip_set_create(struct sk_buff *skb, const struct nfnl_info *info,
         return ret;
  
  cleanup:
+       set->variant->cancel_gc(set);
         set->variant->destroy(set);
  put_out:
         module_put(set->type->me);
@@ -1182,6 +1183,14 @@ ip_set_destroy_set(struct ip_set *set)
         kfree(set);
  }
  
+static void
+ip_set_destroy_set_rcu(struct rcu_head *head)
+{
+       struct ip_set *set = container_of(head, struct ip_set, rcu);
+
+       ip_set_destroy_set(set);
+}
+
  static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
                           const struct nlattr * const attr[])
  {
@@ -1193,8 +1202,6 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
         if (unlikely(protocol_min_failed(attr)))
                 return -IPSET_ERR_PROTOCOL;
  
-       /* Must wait for flush to be really finished in list:set */
-       rcu_barrier();
  
         /* Commands are serialized and references are
          * protected by the ip_set_ref_lock.
@@ -1206,8 +1213,10 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
          * counter, so if it's already zero, we can proceed
          * without holding the lock.
          */
-       read_lock_bh(&ip_set_ref_lock);
         if (!attr[IPSET_ATTR_SETNAME]) {
+               /* Must wait for flush to be really finished in list:set */
+               rcu_barrier();
+               read_lock_bh(&ip_set_ref_lock);
                 for (i = 0; i < inst->ip_set_max; i++) {
                         s = ip_set(inst, i);
                         if (s && (s->ref || s->ref_netlink)) {
@@ -1221,6 +1230,8 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
                         s = ip_set(inst, i);
                         if (s) {
                                 ip_set(inst, i) = NULL;
+                               /* Must cancel garbage collectors */
+                               s->variant->cancel_gc(s);
                                 ip_set_destroy_set(s);
                         }
                 }
@@ -1228,6 +1239,9 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
                 inst->is_destroyed = false;
         } else {
                 u32 flags = flag_exist(info->nlh);
+               u16 features = 0;
+
+               read_lock_bh(&ip_set_ref_lock);
                 s = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]),
                                     &i);
                 if (!s) {
@@ -1238,10 +1252,16 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
                         ret = -IPSET_ERR_BUSY;
                         goto out;
                 }
+               features = s->type->features;
                 ip_set(inst, i) = NULL;
                 read_unlock_bh(&ip_set_ref_lock);
-
-               ip_set_destroy_set(s);
+               if (features & IPSET_TYPE_NAME) {
+                       /* Must wait for flush to be really finished  */
+                       rcu_barrier();
+               }
+               /* Must cancel garbage collectors */
+               s->variant->cancel_gc(s);
+               call_rcu(&s->rcu, ip_set_destroy_set_rcu);
         }
         return 0;
  out:
@@ -1394,9 +1414,6 @@ static int ip_set_swap(struct sk_buff *skb, const struct nfnl_info *info,
         ip_set(inst, to_id) = from;
         write_unlock_bh(&ip_set_ref_lock);
  
-       /* Make sure all readers of the old set pointers are completed. */
-       synchronize_rcu();
-
         return 0;
  }
  
@@ -2362,6 +2379,7 @@ ip_set_net_exit(struct net *net)
                 set = ip_set(inst, i);
                 if (set) {
                         ip_set(inst, i) = NULL;
+                       set->variant->cancel_gc(set);
                         ip_set_destroy_set(set);
                 }
         }
@@ -2409,8 +2427,11 @@ ip_set_fini(void)
  {
         nf_unregister_sockopt(&so_set);
         nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
-
         unregister_pernet_subsys(&ip_set_net_ops);
+
+       /* Wait for call_rcu() in destroy */
+       rcu_barrier();
+
         pr_debug("these are the famous last words\n");
  }
  
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h

index cbf80da9a01caf0616d7d77d5be16521b6c0d47e..cf3ce72c3de645168b4698176518a02df6a6fa5a 100644 (file)
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -222,6 +222,7 @@ static const union nf_inet_addr zeromask = {};
  #undef mtype_gc_do
  #undef mtype_gc
  #undef mtype_gc_init
+#undef mtype_cancel_gc
  #undef mtype_variant
  #undef mtype_data_match
  
@@ -266,6 +267,7 @@ static const union nf_inet_addr zeromask = {};
  #define mtype_gc_do            IPSET_TOKEN(MTYPE, _gc_do)
  #define mtype_gc               IPSET_TOKEN(MTYPE, _gc)
  #define mtype_gc_init          IPSET_TOKEN(MTYPE, _gc_init)
+#define mtype_cancel_gc                IPSET_TOKEN(MTYPE, _cancel_gc)
  #define mtype_variant          IPSET_TOKEN(MTYPE, _variant)
  #define mtype_data_match       IPSET_TOKEN(MTYPE, _data_match)
  
@@ -430,7 +432,7 @@ mtype_ahash_destroy(struct ip_set *set, struct htable *t, bool ext_destroy)
         u32 i;
  
         for (i = 0; i < jhash_size(t->htable_bits); i++) {
-               n = __ipset_dereference(hbucket(t, i));
+               n = (__force struct hbucket *)hbucket(t, i);
                 if (!n)
                         continue;
                 if (set->extensions & IPSET_EXT_DESTROY && ext_destroy)
@@ -450,10 +452,7 @@ mtype_destroy(struct ip_set *set)
         struct htype *h = set->data;
         struct list_head *l, *lt;
  
-       if (SET_WITH_TIMEOUT(set))
-               cancel_delayed_work_sync(&h->gc.dwork);
-
-       mtype_ahash_destroy(set, ipset_dereference_nfnl(h->table), true);
+       mtype_ahash_destroy(set, (__force struct htable *)h->table, true);
         list_for_each_safe(l, lt, &h->ad) {
                 list_del(l);
                 kfree(l);
@@ -599,6 +598,15 @@ mtype_gc_init(struct htable_gc *gc)
         queue_delayed_work(system_power_efficient_wq, &gc->dwork, HZ);
  }
  
+static void
+mtype_cancel_gc(struct ip_set *set)
+{
+       struct htype *h = set->data;
+
+       if (SET_WITH_TIMEOUT(set))
+               cancel_delayed_work_sync(&h->gc.dwork);
+}
+
  static int
  mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
           struct ip_set_ext *mext, u32 flags);
@@ -1441,6 +1449,7 @@ static const struct ip_set_type_variant mtype_variant = {
         .uref   = mtype_uref,
         .resize = mtype_resize,
         .same_set = mtype_same_set,
+       .cancel_gc = mtype_cancel_gc,
         .region_lock = true,
  };
  
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c

index e162636525cfb4ad02de58982382a289e5bcbc45..6c3f28bc59b3259f0033cd4adc0ba5711db08c26 100644 (file)
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -426,9 +426,6 @@ list_set_destroy(struct ip_set *set)
         struct list_set *map = set->data;
         struct set_elem *e, *n;
  
-       if (SET_WITH_TIMEOUT(set))
-               timer_shutdown_sync(&map->gc);
-
         list_for_each_entry_safe(e, n, &map->members, list) {
                 list_del(&e->list);
                 ip_set_put_byindex(map->net, e->id);
@@ -545,6 +542,15 @@ list_set_same_set(const struct ip_set *a, const struct ip_set *b)
                a->extensions == b->extensions;
  }
  
+static void
+list_set_cancel_gc(struct ip_set *set)
+{
+       struct list_set *map = set->data;
+
+       if (SET_WITH_TIMEOUT(set))
+               timer_shutdown_sync(&map->gc);
+}
+
  static const struct ip_set_type_variant set_variant = {
         .kadt   = list_set_kadt,
         .uadt   = list_set_uadt,
@@ -558,6 +564,7 @@ static const struct ip_set_type_variant set_variant = {
         .head   = list_set_head,
         .list   = list_set_list,
         .same_set = list_set_same_set,
+       .cancel_gc = list_set_cancel_gc,
  };
  
  static void
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c

index 2e5f3864d353a39cfde138b725e790d7290b82c9..5b876fa7f9af9e5dfe950929b29f0fc92daf9bab 100644 (file)
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -2756,6 +2756,7 @@ static const struct nf_ct_hook nf_conntrack_hook = {
         .get_tuple_skb  = nf_conntrack_get_tuple_skb,
         .attach         = nf_conntrack_attach,
         .set_closing    = nf_conntrack_set_closing,
+       .confirm        = __nf_conntrack_confirm,
  };
  
  void nf_conntrack_init_end(void)
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c

index 0c22a02c2035ccb9c760d71fe2e5dd9c461bf239..3b846cbdc050d324626586fb6ece00985efd874b 100644 (file)
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -876,6 +876,7 @@ struct ctnetlink_filter_u32 {
  
  struct ctnetlink_filter {
         u8 family;
+       bool zone_filter;
  
         u_int32_t orig_flags;
         u_int32_t reply_flags;
@@ -992,9 +993,12 @@ ctnetlink_alloc_filter(const struct nlattr * const cda[], u8 family)
         if (err)
                 goto err_filter;
  
-       err = ctnetlink_parse_zone(cda[CTA_ZONE], &filter->zone);
-       if (err < 0)
-               goto err_filter;
+       if (cda[CTA_ZONE]) {
+               err = ctnetlink_parse_zone(cda[CTA_ZONE], &filter->zone);
+               if (err < 0)
+                       goto err_filter;
+               filter->zone_filter = true;
+       }
  
         if (!cda[CTA_FILTER])
                 return filter;
@@ -1148,7 +1152,7 @@ static int ctnetlink_filter_match(struct nf_conn *ct, void *data)
         if (filter->family && nf_ct_l3num(ct) != filter->family)
                 goto ignore_entry;
  
-       if (filter->zone.id != NF_CT_DEFAULT_ZONE_ID &&
+       if (filter->zone_filter &&
             !nf_ct_zone_equal_any(ct, &filter->zone))
                 goto ignore_entry;
  
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c

index c6bd533983c1ff275796b789cdb973c09f646984..4cc97f971264ed779434ab4597dd0162586b3736 100644 (file)
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -283,7 +283,7 @@ sctp_new(struct nf_conn *ct, const struct sk_buff *skb,
                         pr_debug("Setting vtag %x for secondary conntrack\n",
                                  sh->vtag);
                         ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] = sh->vtag;
-               } else {
+               } else if (sch->type == SCTP_CID_SHUTDOWN_ACK) {
                 /* If it is a shutdown ack OOTB packet, we expect a return
                    shutdown complete, otherwise an ABORT Sec 8.4 (5) and (8) */
                         pr_debug("Setting vtag %x for new conn OOTB\n",
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c

index e573be5afde7a591e00e799aadeadcf455b31f05..ae493599a3ef03415f6c40e942cdab700acb84c6 100644 (file)
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -457,7 +457,8 @@ static void tcp_init_sender(struct ip_ct_tcp_state *sender,
                             const struct sk_buff *skb,
                             unsigned int dataoff,
                             const struct tcphdr *tcph,
-                           u32 end, u32 win)
+                           u32 end, u32 win,
+                           enum ip_conntrack_dir dir)
  {
         /* SYN-ACK in reply to a SYN
          * or SYN from reply direction in simultaneous open.
@@ -471,7 +472,8 @@ static void tcp_init_sender(struct ip_ct_tcp_state *sender,
          * Both sides must send the Window Scale option
          * to enable window scaling in either direction.
          */
-       if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE &&
+       if (dir == IP_CT_DIR_REPLY &&
+           !(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE &&
               receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE)) {
                 sender->td_scale = 0;
                 receiver->td_scale = 0;
@@ -542,7 +544,7 @@ tcp_in_window(struct nf_conn *ct, enum ip_conntrack_dir dir,
                 if (tcph->syn) {
                         tcp_init_sender(sender, receiver,
                                         skb, dataoff, tcph,
-                                       end, win);
+                                       end, win, dir);
                         if (!tcph->ack)
                                 /* Simultaneous open */
                                 return NFCT_TCP_ACCEPT;
@@ -585,7 +587,7 @@ tcp_in_window(struct nf_conn *ct, enum ip_conntrack_dir dir,
                  */
                 tcp_init_sender(sender, receiver,
                                 skb, dataoff, tcph,
-                               end, win);
+                               end, win, dir);
  
                 if (dir == IP_CT_DIR_REPLY && !tcph->ack)
                         return NFCT_TCP_ACCEPT;
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c

index 920a5a29ae1dceba6849aaad6d62701567d3ec99..a0571339239c40ded96c4a9466d53d5de2887ed5 100644 (file)
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -87,12 +87,22 @@ static u32 flow_offload_dst_cookie(struct flow_offload_tuple *flow_tuple)
         return 0;
  }
  
+static struct dst_entry *nft_route_dst_fetch(struct nf_flow_route *route,
+                                            enum flow_offload_tuple_dir dir)
+{
+       struct dst_entry *dst = route->tuple[dir].dst;
+
+       route->tuple[dir].dst = NULL;
+
+       return dst;
+}
+
  static int flow_offload_fill_route(struct flow_offload *flow,
-                                  const struct nf_flow_route *route,
+                                  struct nf_flow_route *route,
                                    enum flow_offload_tuple_dir dir)
  {
         struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple;
-       struct dst_entry *dst = route->tuple[dir].dst;
+       struct dst_entry *dst = nft_route_dst_fetch(route, dir);
         int i, j = 0;
  
         switch (flow_tuple->l3proto) {
@@ -122,6 +132,7 @@ static int flow_offload_fill_route(struct flow_offload *flow,
                        ETH_ALEN);
                 flow_tuple->out.ifidx = route->tuple[dir].out.ifindex;
                 flow_tuple->out.hw_ifidx = route->tuple[dir].out.hw_ifindex;
+               dst_release(dst);
                 break;
         case FLOW_OFFLOAD_XMIT_XFRM:
         case FLOW_OFFLOAD_XMIT_NEIGH:
@@ -146,7 +157,7 @@ static void nft_flow_dst_release(struct flow_offload *flow,
  }
  
  void flow_offload_route_init(struct flow_offload *flow,
-                           const struct nf_flow_route *route)
+                            struct nf_flow_route *route)
  {
         flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_ORIGINAL);
         flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_REPLY);
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c

index 8cc52d2bd31be518df778bbe2cfaad6172d90dbc..e16f158388bbe568cddc1be5a0a6d16069897822 100644 (file)
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -193,11 +193,12 @@ void nf_logger_put(int pf, enum nf_log_type type)
                 return;
         }
  
-       BUG_ON(loggers[pf][type] == NULL);
-
         rcu_read_lock();
         logger = rcu_dereference(loggers[pf][type]);
-       module_put(logger->me);
+       if (!logger)
+               WARN_ON_ONCE(1);
+       else
+               module_put(logger->me);
         rcu_read_unlock();
  }
  EXPORT_SYMBOL_GPL(nf_logger_put);
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c

index c3d7ecbc777ce08525bedee77d637c18682d816c..016c816d91cbc49bfbd5417c295e26667b7be179 100644 (file)
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -551,8 +551,11 @@ static void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple,
  find_free_id:
         if (range->flags & NF_NAT_RANGE_PROTO_OFFSET)
                 off = (ntohs(*keyptr) - ntohs(range->base_proto.all));
-       else
+       else if ((range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL) ||
+                maniptype != NF_NAT_MANIP_DST)
                 off = get_random_u16();
+       else
+               off = 0;
  
         attempts = range_size;
         if (attempts > NF_NAT_MAX_ATTEMPTS)
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c

index c537104411e7d1b1c1b449b55c7a3c43fb7e2ac3..7e938c7397dda5416d16ab9312f5f9876afeca79 100644 (file)
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -684,15 +684,16 @@ static int nft_delobj(struct nft_ctx *ctx, struct nft_object *obj)
         return err;
  }
  
-static int nft_trans_flowtable_add(struct nft_ctx *ctx, int msg_type,
-                                  struct nft_flowtable *flowtable)
+static struct nft_trans *
+nft_trans_flowtable_add(struct nft_ctx *ctx, int msg_type,
+                       struct nft_flowtable *flowtable)
  {
         struct nft_trans *trans;
  
         trans = nft_trans_alloc(ctx, msg_type,
                                 sizeof(struct nft_trans_flowtable));
         if (trans == NULL)
-               return -ENOMEM;
+               return ERR_PTR(-ENOMEM);
  
         if (msg_type == NFT_MSG_NEWFLOWTABLE)
                 nft_activate_next(ctx->net, flowtable);
@@ -701,22 +702,22 @@ static int nft_trans_flowtable_add(struct nft_ctx *ctx, int msg_type,
         nft_trans_flowtable(trans) = flowtable;
         nft_trans_commit_list_add_tail(ctx->net, trans);
  
-       return 0;
+       return trans;
  }
  
  static int nft_delflowtable(struct nft_ctx *ctx,
                             struct nft_flowtable *flowtable)
  {
-       int err;
+       struct nft_trans *trans;
  
-       err = nft_trans_flowtable_add(ctx, NFT_MSG_DELFLOWTABLE, flowtable);
-       if (err < 0)
-               return err;
+       trans = nft_trans_flowtable_add(ctx, NFT_MSG_DELFLOWTABLE, flowtable);
+       if (IS_ERR(trans))
+               return PTR_ERR(trans);
  
         nft_deactivate_next(ctx->net, flowtable);
         nft_use_dec(&ctx->table->use);
  
-       return err;
+       return 0;
  }
  
  static void __nft_reg_track_clobber(struct nft_regs_track *track, u8 dreg)
@@ -1251,6 +1252,7 @@ static int nf_tables_updtable(struct nft_ctx *ctx)
         return 0;
  
  err_register_hooks:
+       ctx->table->flags |= NFT_TABLE_F_DORMANT;
         nft_trans_destroy(trans);
         return ret;
  }
@@ -2080,7 +2082,7 @@ static struct nft_hook *nft_netdev_hook_alloc(struct net *net,
         struct nft_hook *hook;
         int err;
  
-       hook = kmalloc(sizeof(struct nft_hook), GFP_KERNEL_ACCOUNT);
+       hook = kzalloc(sizeof(struct nft_hook), GFP_KERNEL_ACCOUNT);
         if (!hook) {
                 err = -ENOMEM;
                 goto err_hook_alloc;
@@ -2503,19 +2505,15 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
         RCU_INIT_POINTER(chain->blob_gen_0, blob);
         RCU_INIT_POINTER(chain->blob_gen_1, blob);
  
-       err = nf_tables_register_hook(net, table, chain);
-       if (err < 0)
-               goto err_destroy_chain;
-
         if (!nft_use_inc(&table->use)) {
                 err = -EMFILE;
-               goto err_use;
+               goto err_destroy_chain;
         }
  
         trans = nft_trans_chain_add(ctx, NFT_MSG_NEWCHAIN);
         if (IS_ERR(trans)) {
                 err = PTR_ERR(trans);
-               goto err_unregister_hook;
+               goto err_trans;
         }
  
         nft_trans_chain_policy(trans) = NFT_CHAIN_POLICY_UNSET;
@@ -2523,17 +2521,22 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
                 nft_trans_chain_policy(trans) = policy;
  
         err = nft_chain_add(table, chain);
-       if (err < 0) {
-               nft_trans_destroy(trans);
-               goto err_unregister_hook;
-       }
+       if (err < 0)
+               goto err_chain_add;
+
+       /* This must be LAST to ensure no packets are walking over this chain. */
+       err = nf_tables_register_hook(net, table, chain);
+       if (err < 0)
+               goto err_register_hook;
  
         return 0;
  
-err_unregister_hook:
+err_register_hook:
+       nft_chain_del(chain);
+err_chain_add:
+       nft_trans_destroy(trans);
+err_trans:
         nft_use_dec_restore(&table->use);
-err_use:
-       nf_tables_unregister_hook(net, table, chain);
  err_destroy_chain:
         nf_tables_chain_destroy(ctx);
  
@@ -7551,11 +7554,15 @@ nla_put_failure:
         return -1;
  }
  
-static const struct nft_object_type *__nft_obj_type_get(u32 objtype)
+static const struct nft_object_type *__nft_obj_type_get(u32 objtype, u8 family)
  {
         const struct nft_object_type *type;
  
         list_for_each_entry(type, &nf_tables_objects, list) {
+               if (type->family != NFPROTO_UNSPEC &&
+                   type->family != family)
+                       continue;
+
                 if (objtype == type->type)
                         return type;
         }
@@ -7563,11 +7570,11 @@ static const struct nft_object_type *__nft_obj_type_get(u32 objtype)
  }
  
  static const struct nft_object_type *
-nft_obj_type_get(struct net *net, u32 objtype)
+nft_obj_type_get(struct net *net, u32 objtype, u8 family)
  {
         const struct nft_object_type *type;
  
-       type = __nft_obj_type_get(objtype);
+       type = __nft_obj_type_get(objtype, family);
         if (type != NULL && try_module_get(type->owner))
                 return type;
  
@@ -7660,7 +7667,7 @@ static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info,
                 if (info->nlh->nlmsg_flags & NLM_F_REPLACE)
                         return -EOPNOTSUPP;
  
-               type = __nft_obj_type_get(objtype);
+               type = __nft_obj_type_get(objtype, family);
                 if (WARN_ON_ONCE(!type))
                         return -ENOENT;
  
@@ -7674,7 +7681,7 @@ static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info,
         if (!nft_use_inc(&table->use))
                 return -EMFILE;
  
-       type = nft_obj_type_get(net, objtype);
+       type = nft_obj_type_get(net, objtype, family);
         if (IS_ERR(type)) {
                 err = PTR_ERR(type);
                 goto err_type;
@@ -8451,9 +8458,9 @@ static int nf_tables_newflowtable(struct sk_buff *skb,
         u8 family = info->nfmsg->nfgen_family;
         const struct nf_flowtable_type *type;
         struct nft_flowtable *flowtable;
-       struct nft_hook *hook, *next;
         struct net *net = info->net;
         struct nft_table *table;
+       struct nft_trans *trans;
         struct nft_ctx ctx;
         int err;
  
@@ -8533,34 +8540,34 @@ static int nf_tables_newflowtable(struct sk_buff *skb,
         err = nft_flowtable_parse_hook(&ctx, nla, &flowtable_hook, flowtable,
                                        extack, true);
         if (err < 0)
-               goto err4;
+               goto err_flowtable_parse_hooks;
  
         list_splice(&flowtable_hook.list, &flowtable->hook_list);
         flowtable->data.priority = flowtable_hook.priority;
         flowtable->hooknum = flowtable_hook.num;
  
+       trans = nft_trans_flowtable_add(&ctx, NFT_MSG_NEWFLOWTABLE, flowtable);
+       if (IS_ERR(trans)) {
+               err = PTR_ERR(trans);
+               goto err_flowtable_trans;
+       }
+
+       /* This must be LAST to ensure no packets are walking over this flowtable. */
         err = nft_register_flowtable_net_hooks(ctx.net, table,
                                                &flowtable->hook_list,
                                                flowtable);
-       if (err < 0) {
-               nft_hooks_destroy(&flowtable->hook_list);
-               goto err4;
-       }
-
-       err = nft_trans_flowtable_add(&ctx, NFT_MSG_NEWFLOWTABLE, flowtable);
         if (err < 0)
-               goto err5;
+               goto err_flowtable_hooks;
  
         list_add_tail_rcu(&flowtable->list, &table->flowtables);
  
         return 0;
-err5:
-       list_for_each_entry_safe(hook, next, &flowtable->hook_list, list) {
-               nft_unregister_flowtable_hook(net, flowtable, hook);
-               list_del_rcu(&hook->list);
-               kfree_rcu(hook, rcu);
-       }
-err4:
+
+err_flowtable_hooks:
+       nft_trans_destroy(trans);
+err_flowtable_trans:
+       nft_hooks_destroy(&flowtable->hook_list);
+err_flowtable_parse_hooks:
         flowtable->data.type->free(&flowtable->data);
  err3:
         module_put(type->owner);
@@ -9823,6 +9830,7 @@ dead_elem:
  struct nft_trans_gc *nft_trans_gc_catchall_sync(struct nft_trans_gc *gc)
  {
         struct nft_set_elem_catchall *catchall, *next;
+       u64 tstamp = nft_net_tstamp(gc->net);
         const struct nft_set *set = gc->set;
         struct nft_elem_priv *elem_priv;
         struct nft_set_ext *ext;
@@ -9832,7 +9840,7 @@ struct nft_trans_gc *nft_trans_gc_catchall_sync(struct nft_trans_gc *gc)
         list_for_each_entry_safe(catchall, next, &set->catchall_list, list) {
                 ext = nft_set_elem_ext(set, catchall->elem);
  
-               if (!nft_set_elem_expired(ext))
+               if (!__nft_set_elem_expired(ext, tstamp))
                         continue;
  
                 gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL);
@@ -10618,6 +10626,7 @@ static bool nf_tables_valid_genid(struct net *net, u32 genid)
         bool genid_ok;
  
         mutex_lock(&nft_net->commit_mutex);
+       nft_net->tstamp = get_jiffies_64();
  
         genid_ok = genid == 0 || nft_net->base_seq == genid;
         if (!genid_ok)
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c

index 171d1f52d3dd0da711cd63b23ec31d72fa88cdd2..5cf38fc0a366ac55c0ce11798baf7fb93c88283f 100644 (file)
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -232,18 +232,25 @@ static void nfqnl_reinject(struct nf_queue_entry *entry, unsigned int verdict)
         if (verdict == NF_ACCEPT ||
             verdict == NF_REPEAT ||
             verdict == NF_STOP) {
+               unsigned int ct_verdict = verdict;
+
                 rcu_read_lock();
                 ct_hook = rcu_dereference(nf_ct_hook);
                 if (ct_hook)
-                       verdict = ct_hook->update(entry->state.net, entry->skb);
+                       ct_verdict = ct_hook->update(entry->state.net, entry->skb);
                 rcu_read_unlock();
  
-               switch (verdict & NF_VERDICT_MASK) {
+               switch (ct_verdict & NF_VERDICT_MASK) {
+               case NF_ACCEPT:
+                       /* follow userspace verdict, could be REPEAT */
+                       break;
                 case NF_STOLEN:
                         nf_queue_entry_free(entry);
                         return;
+               default:
+                       verdict = ct_verdict & NF_VERDICT_MASK;
+                       break;
                 }
-
         }
         nf_reinject(entry, verdict);
  }
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c

index f0eeda97bfcd9da2ea98d4ae69c5a1d4a6c30956..d3d11dede54507262022725a5e54a12f0def7f89 100644 (file)
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -135,7 +135,7 @@ static void nft_target_eval_bridge(const struct nft_expr *expr,
  
  static const struct nla_policy nft_target_policy[NFTA_TARGET_MAX + 1] = {
         [NFTA_TARGET_NAME]      = { .type = NLA_NUL_STRING },
-       [NFTA_TARGET_REV]       = { .type = NLA_U32 },
+       [NFTA_TARGET_REV]       = NLA_POLICY_MAX(NLA_BE32, 255),
         [NFTA_TARGET_INFO]      = { .type = NLA_BINARY },
  };
  
@@ -200,6 +200,7 @@ static const struct nla_policy nft_rule_compat_policy[NFTA_RULE_COMPAT_MAX + 1]
  static int nft_parse_compat(const struct nlattr *attr, u16 *proto, bool *inv)
  {
         struct nlattr *tb[NFTA_RULE_COMPAT_MAX+1];
+       u32 l4proto;
         u32 flags;
         int err;
  
@@ -212,12 +213,18 @@ static int nft_parse_compat(const struct nlattr *attr, u16 *proto, bool *inv)
                 return -EINVAL;
  
         flags = ntohl(nla_get_be32(tb[NFTA_RULE_COMPAT_FLAGS]));
-       if (flags & ~NFT_RULE_COMPAT_F_MASK)
+       if (flags & NFT_RULE_COMPAT_F_UNUSED ||
+           flags & ~NFT_RULE_COMPAT_F_MASK)
                 return -EINVAL;
         if (flags & NFT_RULE_COMPAT_F_INV)
                 *inv = true;
  
-       *proto = ntohl(nla_get_be32(tb[NFTA_RULE_COMPAT_PROTO]));
+       l4proto = ntohl(nla_get_be32(tb[NFTA_RULE_COMPAT_PROTO]));
+       if (l4proto > U16_MAX)
+               return -EINVAL;
+
+       *proto = l4proto;
+
         return 0;
  }
  
@@ -352,10 +359,20 @@ static int nft_target_validate(const struct nft_ctx *ctx,
  
         if (ctx->family != NFPROTO_IPV4 &&
             ctx->family != NFPROTO_IPV6 &&
+           ctx->family != NFPROTO_INET &&
             ctx->family != NFPROTO_BRIDGE &&
             ctx->family != NFPROTO_ARP)
                 return -EOPNOTSUPP;
  
+       ret = nft_chain_validate_hooks(ctx->chain,
+                                      (1 << NF_INET_PRE_ROUTING) |
+                                      (1 << NF_INET_LOCAL_IN) |
+                                      (1 << NF_INET_FORWARD) |
+                                      (1 << NF_INET_LOCAL_OUT) |
+                                      (1 << NF_INET_POST_ROUTING));
+       if (ret)
+               return ret;
+
         if (nft_is_base_chain(ctx->chain)) {
                 const struct nft_base_chain *basechain =
                                                 nft_base_chain(ctx->chain);
@@ -419,7 +436,7 @@ static void nft_match_eval(const struct nft_expr *expr,
  
  static const struct nla_policy nft_match_policy[NFTA_MATCH_MAX + 1] = {
         [NFTA_MATCH_NAME]       = { .type = NLA_NUL_STRING },
-       [NFTA_MATCH_REV]        = { .type = NLA_U32 },
+       [NFTA_MATCH_REV]        = NLA_POLICY_MAX(NLA_BE32, 255),
         [NFTA_MATCH_INFO]       = { .type = NLA_BINARY },
  };
  
@@ -603,10 +620,20 @@ static int nft_match_validate(const struct nft_ctx *ctx,
  
         if (ctx->family != NFPROTO_IPV4 &&
             ctx->family != NFPROTO_IPV6 &&
+           ctx->family != NFPROTO_INET &&
             ctx->family != NFPROTO_BRIDGE &&
             ctx->family != NFPROTO_ARP)
                 return -EOPNOTSUPP;
  
+       ret = nft_chain_validate_hooks(ctx->chain,
+                                      (1 << NF_INET_PRE_ROUTING) |
+                                      (1 << NF_INET_LOCAL_IN) |
+                                      (1 << NF_INET_FORWARD) |
+                                      (1 << NF_INET_LOCAL_OUT) |
+                                      (1 << NF_INET_POST_ROUTING));
+       if (ret)
+               return ret;
+
         if (nft_is_base_chain(ctx->chain)) {
                 const struct nft_base_chain *basechain =
                                                 nft_base_chain(ctx->chain);
@@ -724,7 +751,7 @@ out_put:
  static const struct nla_policy nfnl_compat_policy_get[NFTA_COMPAT_MAX+1] = {
         [NFTA_COMPAT_NAME]      = { .type = NLA_NUL_STRING,
                                     .len = NFT_COMPAT_NAME_MAX-1 },
-       [NFTA_COMPAT_REV]       = { .type = NLA_U32 },
+       [NFTA_COMPAT_REV]       = NLA_POLICY_MAX(NLA_BE32, 255),
         [NFTA_COMPAT_TYPE]      = { .type = NLA_U32 },
  };
  
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c

index 86bb9d7797d9eeaea730e463c389a958e0b6ec85..bfd3e5a14dab68484469bdba71af37a460822549 100644 (file)
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -476,6 +476,9 @@ static int nft_ct_get_init(const struct nft_ctx *ctx,
                 break;
  #endif
         case NFT_CT_ID:
+               if (tb[NFTA_CT_DIRECTION])
+                       return -EINVAL;
+
                 len = sizeof(u32);
                 break;
         default:
@@ -1250,7 +1253,31 @@ static int nft_ct_expect_obj_init(const struct nft_ctx *ctx,
         if (tb[NFTA_CT_EXPECT_L3PROTO])
                 priv->l3num = ntohs(nla_get_be16(tb[NFTA_CT_EXPECT_L3PROTO]));
  
+       switch (priv->l3num) {
+       case NFPROTO_IPV4:
+       case NFPROTO_IPV6:
+               if (priv->l3num != ctx->family)
+                       return -EINVAL;
+
+               fallthrough;
+       case NFPROTO_INET:
+               break;
+       default:
+               return -EOPNOTSUPP;
+       }
+
         priv->l4proto = nla_get_u8(tb[NFTA_CT_EXPECT_L4PROTO]);
+       switch (priv->l4proto) {
+       case IPPROTO_TCP:
+       case IPPROTO_UDP:
+       case IPPROTO_UDPLITE:
+       case IPPROTO_DCCP:
+       case IPPROTO_SCTP:
+               break;
+       default:
+               return -EOPNOTSUPP;
+       }
+
         priv->dport = nla_get_be16(tb[NFTA_CT_EXPECT_DPORT]);
         priv->timeout = nla_get_u32(tb[NFTA_CT_EXPECT_TIMEOUT]);
         priv->size = nla_get_u8(tb[NFTA_CT_EXPECT_SIZE]);
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c

index 397351fa4d5f82d8bcec25e1d69f327dc60e0199..ab95760987010b649483bf052fbdba9fde4c9624 100644 (file)
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -361,6 +361,7 @@ static void nft_flow_offload_eval(const struct nft_expr *expr,
                 ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
         }
  
+       __set_bit(NF_FLOW_HW_BIDIRECTIONAL, &flow->flags);
         ret = flow_offload_add(flowtable, flow);
         if (ret < 0)
                 goto err_flow_add;
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c

index 6c2061bfdae6c361c530088ca51aa2790d850ba4..6968a3b342367c6c0cb0df7523fdfd5864038802 100644 (file)
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -36,6 +36,7 @@ struct nft_rhash_cmp_arg {
         const struct nft_set            *set;
         const u32                       *key;
         u8                              genmask;
+       u64                             tstamp;
  };
  
  static inline u32 nft_rhash_key(const void *data, u32 len, u32 seed)
@@ -62,7 +63,7 @@ static inline int nft_rhash_cmp(struct rhashtable_compare_arg *arg,
                 return 1;
         if (nft_set_elem_is_dead(&he->ext))
                 return 1;
-       if (nft_set_elem_expired(&he->ext))
+       if (__nft_set_elem_expired(&he->ext, x->tstamp))
                 return 1;
         if (!nft_set_elem_active(&he->ext, x->genmask))
                 return 1;
@@ -87,6 +88,7 @@ bool nft_rhash_lookup(const struct net *net, const struct nft_set *set,
                 .genmask = nft_genmask_cur(net),
                 .set     = set,
                 .key     = key,
+               .tstamp  = get_jiffies_64(),
         };
  
         he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
@@ -106,6 +108,7 @@ nft_rhash_get(const struct net *net, const struct nft_set *set,
                 .genmask = nft_genmask_cur(net),
                 .set     = set,
                 .key     = elem->key.val.data,
+               .tstamp  = get_jiffies_64(),
         };
  
         he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
@@ -131,6 +134,7 @@ static bool nft_rhash_update(struct nft_set *set, const u32 *key,
                 .genmask = NFT_GENMASK_ANY,
                 .set     = set,
                 .key     = key,
+               .tstamp  = get_jiffies_64(),
         };
  
         he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
@@ -175,6 +179,7 @@ static int nft_rhash_insert(const struct net *net, const struct nft_set *set,
                 .genmask = nft_genmask_next(net),
                 .set     = set,
                 .key     = elem->key.val.data,
+               .tstamp  = nft_net_tstamp(net),
         };
         struct nft_rhash_elem *prev;
  
@@ -216,6 +221,7 @@ nft_rhash_deactivate(const struct net *net, const struct nft_set *set,
                 .genmask = nft_genmask_next(net),
                 .set     = set,
                 .key     = elem->key.val.data,
+               .tstamp  = nft_net_tstamp(net),
         };
  
         rcu_read_lock();
diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c

index efd523496be45f59408e8b6dcec7ff40dbcf5844..aa1d9e93a9a04859d48e417501c7f9e889187400 100644 (file)
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -342,9 +342,6 @@
  #include "nft_set_pipapo_avx2.h"
  #include "nft_set_pipapo.h"
  
-/* Current working bitmap index, toggled between field matches */
-static DEFINE_PER_CPU(bool, nft_pipapo_scratch_index);
-
  /**
   * pipapo_refill() - For each set bit, set bits from selected mapping table item
   * @map:       Bitmap to be scanned for set bits
@@ -412,6 +409,7 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set,
                        const u32 *key, const struct nft_set_ext **ext)
  {
         struct nft_pipapo *priv = nft_set_priv(set);
+       struct nft_pipapo_scratch *scratch;
         unsigned long *res_map, *fill_map;
         u8 genmask = nft_genmask_cur(net);
         const u8 *rp = (const u8 *)key;
@@ -422,15 +420,17 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set,
  
         local_bh_disable();
  
-       map_index = raw_cpu_read(nft_pipapo_scratch_index);
-
         m = rcu_dereference(priv->match);
  
         if (unlikely(!m || !*raw_cpu_ptr(m->scratch)))
                 goto out;
  
-       res_map  = *raw_cpu_ptr(m->scratch) + (map_index ? m->bsize_max : 0);
-       fill_map = *raw_cpu_ptr(m->scratch) + (map_index ? 0 : m->bsize_max);
+       scratch = *raw_cpu_ptr(m->scratch);
+
+       map_index = scratch->map_index;
+
+       res_map  = scratch->map + (map_index ? m->bsize_max : 0);
+       fill_map = scratch->map + (map_index ? 0 : m->bsize_max);
  
         memset(res_map, 0xff, m->bsize_max * sizeof(*res_map));
  
@@ -460,7 +460,7 @@ next_match:
                 b = pipapo_refill(res_map, f->bsize, f->rules, fill_map, f->mt,
                                   last);
                 if (b < 0) {
-                       raw_cpu_write(nft_pipapo_scratch_index, map_index);
+                       scratch->map_index = map_index;
                         local_bh_enable();
  
                         return false;
@@ -477,7 +477,7 @@ next_match:
                          * current inactive bitmap is clean and can be reused as
                          * *next* bitmap (not initial) for the next packet.
                          */
-                       raw_cpu_write(nft_pipapo_scratch_index, map_index);
+                       scratch->map_index = map_index;
                         local_bh_enable();
  
                         return true;
@@ -504,6 +504,7 @@ out:
   * @set:       nftables API set representation
   * @data:      Key data to be matched against existing elements
   * @genmask:   If set, check that element is active in given genmask
+ * @tstamp:    timestamp to check for expired elements
   *
   * This is essentially the same as the lookup function, except that it matches
   * key data against the uncommitted copy and doesn't use preallocated maps for
@@ -513,7 +514,8 @@ out:
   */
  static struct nft_pipapo_elem *pipapo_get(const struct net *net,
                                           const struct nft_set *set,
-                                         const u8 *data, u8 genmask)
+                                         const u8 *data, u8 genmask,
+                                         u64 tstamp)
  {
         struct nft_pipapo_elem *ret = ERR_PTR(-ENOENT);
         struct nft_pipapo *priv = nft_set_priv(set);
@@ -566,7 +568,7 @@ next_match:
                         goto out;
  
                 if (last) {
-                       if (nft_set_elem_expired(&f->mt[b].e->ext))
+                       if (__nft_set_elem_expired(&f->mt[b].e->ext, tstamp))
                                 goto next_match;
                         if ((genmask &&
                              !nft_set_elem_active(&f->mt[b].e->ext, genmask)))
@@ -603,10 +605,10 @@ static struct nft_elem_priv *
  nft_pipapo_get(const struct net *net, const struct nft_set *set,
                const struct nft_set_elem *elem, unsigned int flags)
  {
-       static struct nft_pipapo_elem *e;
+       struct nft_pipapo_elem *e;
  
         e = pipapo_get(net, set, (const u8 *)elem->key.val.data,
-                      nft_genmask_cur(net));
+                      nft_genmask_cur(net), get_jiffies_64());
         if (IS_ERR(e))
                 return ERR_CAST(e);
  
@@ -1108,6 +1110,25 @@ static void pipapo_map(struct nft_pipapo_match *m,
                 f->mt[map[i].to + j].e = e;
  }
  
+/**
+ * pipapo_free_scratch() - Free per-CPU map at original (not aligned) address
+ * @m:         Matching data
+ * @cpu:       CPU number
+ */
+static void pipapo_free_scratch(const struct nft_pipapo_match *m, unsigned int cpu)
+{
+       struct nft_pipapo_scratch *s;
+       void *mem;
+
+       s = *per_cpu_ptr(m->scratch, cpu);
+       if (!s)
+               return;
+
+       mem = s;
+       mem -= s->align_off;
+       kfree(mem);
+}
+
  /**
   * pipapo_realloc_scratch() - Reallocate scratch maps for partial match results
   * @clone:     Copy of matching data with pending insertions and deletions
@@ -1121,12 +1142,13 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone,
         int i;
  
         for_each_possible_cpu(i) {
-               unsigned long *scratch;
+               struct nft_pipapo_scratch *scratch;
  #ifdef NFT_PIPAPO_ALIGN
-               unsigned long *scratch_aligned;
+               void *scratch_aligned;
+               u32 align_off;
  #endif
-
-               scratch = kzalloc_node(bsize_max * sizeof(*scratch) * 2 +
+               scratch = kzalloc_node(struct_size(scratch, map,
+                                                  bsize_max * 2) +
                                        NFT_PIPAPO_ALIGN_HEADROOM,
                                        GFP_KERNEL, cpu_to_node(i));
                 if (!scratch) {
@@ -1140,14 +1162,25 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone,
                         return -ENOMEM;
                 }
  
-               kfree(*per_cpu_ptr(clone->scratch, i));
-
-               *per_cpu_ptr(clone->scratch, i) = scratch;
+               pipapo_free_scratch(clone, i);
  
  #ifdef NFT_PIPAPO_ALIGN
-               scratch_aligned = NFT_PIPAPO_LT_ALIGN(scratch);
-               *per_cpu_ptr(clone->scratch_aligned, i) = scratch_aligned;
+               /* Align &scratch->map (not the struct itself): the extra
+                * %NFT_PIPAPO_ALIGN_HEADROOM bytes passed to kzalloc_node()
+                * above guarantee we can waste up to those bytes in order
+                * to align the map field regardless of its offset within
+                * the struct.
+                */
+               BUILD_BUG_ON(offsetof(struct nft_pipapo_scratch, map) > NFT_PIPAPO_ALIGN_HEADROOM);
+
+               scratch_aligned = NFT_PIPAPO_LT_ALIGN(&scratch->map);
+               scratch_aligned -= offsetof(struct nft_pipapo_scratch, map);
+               align_off = scratch_aligned - (void *)scratch;
+
+               scratch = scratch_aligned;
+               scratch->align_off = align_off;
  #endif
+               *per_cpu_ptr(clone->scratch, i) = scratch;
         }
  
         return 0;
@@ -1173,6 +1206,7 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
         struct nft_pipapo_match *m = priv->clone;
         u8 genmask = nft_genmask_next(net);
         struct nft_pipapo_elem *e, *dup;
+       u64 tstamp = nft_net_tstamp(net);
         struct nft_pipapo_field *f;
         const u8 *start_p, *end_p;
         int i, bsize_max, err = 0;
@@ -1182,7 +1216,7 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
         else
                 end = start;
  
-       dup = pipapo_get(net, set, start, genmask);
+       dup = pipapo_get(net, set, start, genmask, tstamp);
         if (!IS_ERR(dup)) {
                 /* Check if we already have the same exact entry */
                 const struct nft_data *dup_key, *dup_end;
@@ -1204,7 +1238,7 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
  
         if (PTR_ERR(dup) == -ENOENT) {
                 /* Look for partially overlapping entries */
-               dup = pipapo_get(net, set, end, nft_genmask_next(net));
+               dup = pipapo_get(net, set, end, nft_genmask_next(net), tstamp);
         }
  
         if (PTR_ERR(dup) != -ENOENT) {
@@ -1301,11 +1335,6 @@ static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old)
         if (!new->scratch)
                 goto out_scratch;
  
-#ifdef NFT_PIPAPO_ALIGN
-       new->scratch_aligned = alloc_percpu(*new->scratch_aligned);
-       if (!new->scratch_aligned)
-               goto out_scratch;
-#endif
         for_each_possible_cpu(i)
                 *per_cpu_ptr(new->scratch, i) = NULL;
  
@@ -1357,10 +1386,7 @@ out_lt:
         }
  out_scratch_realloc:
         for_each_possible_cpu(i)
-               kfree(*per_cpu_ptr(new->scratch, i));
-#ifdef NFT_PIPAPO_ALIGN
-       free_percpu(new->scratch_aligned);
-#endif
+               pipapo_free_scratch(new, i);
  out_scratch:
         free_percpu(new->scratch);
         kfree(new);
@@ -1560,6 +1586,7 @@ static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m)
  {
         struct nft_pipapo *priv = nft_set_priv(set);
         struct net *net = read_pnet(&set->net);
+       u64 tstamp = nft_net_tstamp(net);
         int rules_f0, first_rule = 0;
         struct nft_pipapo_elem *e;
         struct nft_trans_gc *gc;
@@ -1594,7 +1621,7 @@ static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m)
                 /* synchronous gc never fails, there is no need to set on
                  * NFT_SET_ELEM_DEAD_BIT.
                  */
-               if (nft_set_elem_expired(&e->ext)) {
+               if (__nft_set_elem_expired(&e->ext, tstamp)) {
                         priv->dirty = true;
  
                         gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL);
@@ -1640,13 +1667,9 @@ static void pipapo_free_match(struct nft_pipapo_match *m)
         int i;
  
         for_each_possible_cpu(i)
-               kfree(*per_cpu_ptr(m->scratch, i));
+               pipapo_free_scratch(m, i);
  
-#ifdef NFT_PIPAPO_ALIGN
-       free_percpu(m->scratch_aligned);
-#endif
         free_percpu(m->scratch);
-
         pipapo_free_fields(m);
  
         kfree(m);
@@ -1769,7 +1792,7 @@ static void *pipapo_deactivate(const struct net *net, const struct nft_set *set,
  {
         struct nft_pipapo_elem *e;
  
-       e = pipapo_get(net, set, data, nft_genmask_next(net));
+       e = pipapo_get(net, set, data, nft_genmask_next(net), nft_net_tstamp(net));
         if (IS_ERR(e))
                 return NULL;
  
@@ -2132,7 +2155,7 @@ static int nft_pipapo_init(const struct nft_set *set,
         m->field_count = field_count;
         m->bsize_max = 0;
  
-       m->scratch = alloc_percpu(unsigned long *);
+       m->scratch = alloc_percpu(struct nft_pipapo_scratch *);
         if (!m->scratch) {
                 err = -ENOMEM;
                 goto out_scratch;
@@ -2140,16 +2163,6 @@ static int nft_pipapo_init(const struct nft_set *set,
         for_each_possible_cpu(i)
                 *per_cpu_ptr(m->scratch, i) = NULL;
  
-#ifdef NFT_PIPAPO_ALIGN
-       m->scratch_aligned = alloc_percpu(unsigned long *);
-       if (!m->scratch_aligned) {
-               err = -ENOMEM;
-               goto out_free;
-       }
-       for_each_possible_cpu(i)
-               *per_cpu_ptr(m->scratch_aligned, i) = NULL;
-#endif
-
         rcu_head_init(&m->rcu);
  
         nft_pipapo_for_each_field(f, i, m) {
@@ -2180,9 +2193,6 @@ static int nft_pipapo_init(const struct nft_set *set,
         return 0;
  
  out_free:
-#ifdef NFT_PIPAPO_ALIGN
-       free_percpu(m->scratch_aligned);
-#endif
         free_percpu(m->scratch);
  out_scratch:
         kfree(m);
@@ -2236,11 +2246,8 @@ static void nft_pipapo_destroy(const struct nft_ctx *ctx,
  
                 nft_set_pipapo_match_destroy(ctx, set, m);
  
-#ifdef NFT_PIPAPO_ALIGN
-               free_percpu(m->scratch_aligned);
-#endif
                 for_each_possible_cpu(cpu)
-                       kfree(*per_cpu_ptr(m->scratch, cpu));
+                       pipapo_free_scratch(m, cpu);
                 free_percpu(m->scratch);
                 pipapo_free_fields(m);
                 kfree(m);
@@ -2253,11 +2260,8 @@ static void nft_pipapo_destroy(const struct nft_ctx *ctx,
                 if (priv->dirty)
                         nft_set_pipapo_match_destroy(ctx, set, m);
  
-#ifdef NFT_PIPAPO_ALIGN
-               free_percpu(priv->clone->scratch_aligned);
-#endif
                 for_each_possible_cpu(cpu)
-                       kfree(*per_cpu_ptr(priv->clone->scratch, cpu));
+                       pipapo_free_scratch(priv->clone, cpu);
                 free_percpu(priv->clone->scratch);
  
                 pipapo_free_fields(priv->clone);
diff --git a/net/netfilter/nft_set_pipapo.h b/net/netfilter/nft_set_pipapo.h

index 1040223da5fa3ab7bbfd4da4d348baee3d22a0d6..3842c7341a9f40a088d78532c4b610f3a99d7d23 100644 (file)
--- a/net/netfilter/nft_set_pipapo.h
+++ b/net/netfilter/nft_set_pipapo.h
@@ -130,21 +130,29 @@ struct nft_pipapo_field {
         union nft_pipapo_map_bucket *mt;
  };
  
+/**
+ * struct nft_pipapo_scratch - percpu data used for lookup and matching
+ * @map_index: Current working bitmap index, toggled between field matches
+ * @align_off: Offset to get the originally allocated address
+ * @map:       store partial matching results during lookup
+ */
+struct nft_pipapo_scratch {
+       u8 map_index;
+       u32 align_off;
+       unsigned long map[];
+};
+
  /**
   * struct nft_pipapo_match - Data used for lookup and matching
- * @field_count                Amount of fields in set
+ * @field_count:       Amount of fields in set
   * @scratch:           Preallocated per-CPU maps for partial matching results
- * @scratch_aligned:   Version of @scratch aligned to NFT_PIPAPO_ALIGN bytes
   * @bsize_max:         Maximum lookup table bucket size of all fields, in longs
- * @rcu                        Matching data is swapped on commits
+ * @rcu:               Matching data is swapped on commits
   * @f:                 Fields, with lookup and mapping tables
   */
  struct nft_pipapo_match {
         int field_count;
-#ifdef NFT_PIPAPO_ALIGN
-       unsigned long * __percpu *scratch_aligned;
-#endif
-       unsigned long * __percpu *scratch;
+       struct nft_pipapo_scratch * __percpu *scratch;
         size_t bsize_max;
         struct rcu_head rcu;
         struct nft_pipapo_field f[] __counted_by(field_count);
diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c

index 52e0d026d30ad2c92f63f589727cdc0b39d7092b..a3a8ddca991894b28aa1a1cd7c84ba0380366b5f 100644 (file)
--- a/net/netfilter/nft_set_pipapo_avx2.c
+++ b/net/netfilter/nft_set_pipapo_avx2.c
@@ -57,7 +57,7 @@
  
  /* Jump to label if @reg is zero */
  #define NFT_PIPAPO_AVX2_NOMATCH_GOTO(reg, label)                       \
-       asm_volatile_goto("vptest %%ymm" #reg ", %%ymm" #reg ";"        \
+       asm goto("vptest %%ymm" #reg ", %%ymm" #reg ";" \
                           "je %l[" #label "]" : : : : label)
  
  /* Store 256 bits from YMM register into memory. Contrary to bucket load
@@ -71,9 +71,6 @@
  #define NFT_PIPAPO_AVX2_ZERO(reg)                                      \
         asm volatile("vpxor %ymm" #reg ", %ymm" #reg ", %ymm" #reg)
  
-/* Current working bitmap index, toggled between field matches */
-static DEFINE_PER_CPU(bool, nft_pipapo_avx2_scratch_index);
-
  /**
   * nft_pipapo_avx2_prepare() - Prepare before main algorithm body
   *
@@ -1120,11 +1117,12 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
                             const u32 *key, const struct nft_set_ext **ext)
  {
         struct nft_pipapo *priv = nft_set_priv(set);
-       unsigned long *res, *fill, *scratch;
+       struct nft_pipapo_scratch *scratch;
         u8 genmask = nft_genmask_cur(net);
         const u8 *rp = (const u8 *)key;
         struct nft_pipapo_match *m;
         struct nft_pipapo_field *f;
+       unsigned long *res, *fill;
         bool map_index;
         int i, ret = 0;
  
@@ -1141,15 +1139,16 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
          */
         kernel_fpu_begin_mask(0);
  
-       scratch = *raw_cpu_ptr(m->scratch_aligned);
+       scratch = *raw_cpu_ptr(m->scratch);
         if (unlikely(!scratch)) {
                 kernel_fpu_end();
                 return false;
         }
-       map_index = raw_cpu_read(nft_pipapo_avx2_scratch_index);
  
-       res  = scratch + (map_index ? m->bsize_max : 0);
-       fill = scratch + (map_index ? 0 : m->bsize_max);
+       map_index = scratch->map_index;
+
+       res  = scratch->map + (map_index ? m->bsize_max : 0);
+       fill = scratch->map + (map_index ? 0 : m->bsize_max);
  
         /* Starting map doesn't need to be set for this implementation */
  
@@ -1221,7 +1220,7 @@ next_match:
  
  out:
         if (i % 2)
-               raw_cpu_write(nft_pipapo_avx2_scratch_index, !map_index);
+               scratch->map_index = !map_index;
         kernel_fpu_end();
  
         return ret >= 0;
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c

index baa3fea4fe65c8f938e665a7fb6b0e4fc0f8f9ad..9944fe479e5361dc140f75be8b90bf3c5deb40f6 100644 (file)
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -234,7 +234,7 @@ static void nft_rbtree_gc_elem_remove(struct net *net, struct nft_set *set,
  
  static const struct nft_rbtree_elem *
  nft_rbtree_gc_elem(const struct nft_set *__set, struct nft_rbtree *priv,
-                  struct nft_rbtree_elem *rbe, u8 genmask)
+                  struct nft_rbtree_elem *rbe)
  {
         struct nft_set *set = (struct nft_set *)__set;
         struct rb_node *prev = rb_prev(&rbe->node);
@@ -253,7 +253,7 @@ nft_rbtree_gc_elem(const struct nft_set *__set, struct nft_rbtree *priv,
         while (prev) {
                 rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node);
                 if (nft_rbtree_interval_end(rbe_prev) &&
-                   nft_set_elem_active(&rbe_prev->ext, genmask))
+                   nft_set_elem_active(&rbe_prev->ext, NFT_GENMASK_ANY))
                         break;
  
                 prev = rb_prev(prev);
@@ -313,6 +313,7 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
         struct nft_rbtree *priv = nft_set_priv(set);
         u8 cur_genmask = nft_genmask_cur(net);
         u8 genmask = nft_genmask_next(net);
+       u64 tstamp = nft_net_tstamp(net);
         int d;
  
         /* Descend the tree to search for an existing element greater than the
@@ -360,11 +361,11 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
                 /* perform garbage collection to avoid bogus overlap reports
                  * but skip new elements in this transaction.
                  */
-               if (nft_set_elem_expired(&rbe->ext) &&
+               if (__nft_set_elem_expired(&rbe->ext, tstamp) &&
                     nft_set_elem_active(&rbe->ext, cur_genmask)) {
                         const struct nft_rbtree_elem *removed_end;
  
-                       removed_end = nft_rbtree_gc_elem(set, priv, rbe, genmask);
+                       removed_end = nft_rbtree_gc_elem(set, priv, rbe);
                         if (IS_ERR(removed_end))
                                 return PTR_ERR(removed_end);
  
@@ -551,6 +552,7 @@ nft_rbtree_deactivate(const struct net *net, const struct nft_set *set,
         const struct nft_rbtree *priv = nft_set_priv(set);
         const struct rb_node *parent = priv->root.rb_node;
         u8 genmask = nft_genmask_next(net);
+       u64 tstamp = nft_net_tstamp(net);
         int d;
  
         while (parent != NULL) {
@@ -571,7 +573,7 @@ nft_rbtree_deactivate(const struct net *net, const struct nft_set *set,
                                    nft_rbtree_interval_end(this)) {
                                 parent = parent->rb_right;
                                 continue;
-                       } else if (nft_set_elem_expired(&rbe->ext)) {
+                       } else if (__nft_set_elem_expired(&rbe->ext, tstamp)) {
                                 break;
                         } else if (!nft_set_elem_active(&rbe->ext, genmask)) {
                                 parent = parent->rb_left;
@@ -624,9 +626,10 @@ static void nft_rbtree_gc(struct nft_set *set)
  {
         struct nft_rbtree *priv = nft_set_priv(set);
         struct nft_rbtree_elem *rbe, *rbe_end = NULL;
+       struct net *net = read_pnet(&set->net);
+       u64 tstamp = nft_net_tstamp(net);
         struct rb_node *node, *next;
         struct nft_trans_gc *gc;
-       struct net *net;
  
         set  = nft_set_container_of(priv);
         net  = read_pnet(&set->net);
@@ -648,7 +651,7 @@ static void nft_rbtree_gc(struct nft_set *set)
                         rbe_end = rbe;
                         continue;
                 }
-               if (!nft_set_elem_expired(&rbe->ext))
+               if (!__nft_set_elem_expired(&rbe->ext, tstamp))
                         continue;
  
                 gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL);
diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c

index 9f21953c7433ff942caba909a8c8673baa3e003c..f735d79d8be5778a008485e893a2be78584318fe 100644 (file)
--- a/net/netfilter/nft_tunnel.c
+++ b/net/netfilter/nft_tunnel.c
@@ -713,6 +713,7 @@ static const struct nft_object_ops nft_tunnel_obj_ops = {
  
  static struct nft_object_type nft_tunnel_obj_type __read_mostly = {
         .type           = NFT_OBJECT_TUNNEL,
+       .family         = NFPROTO_NETDEV,
         .ops            = &nft_tunnel_obj_ops,
         .maxattr        = NFTA_TUNNEL_KEY_MAX,
         .policy         = nft_tunnel_key_policy,
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c

index 9c962347cf859f16fc76e4d8a2fd22cdb3d142d6..ff315351269fe643073bb2984485b3a76566b1c8 100644 (file)
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -167,7 +167,7 @@ static inline u32 netlink_group_mask(u32 group)
  static struct sk_buff *netlink_to_full_skb(const struct sk_buff *skb,
                                            gfp_t gfp_mask)
  {
-       unsigned int len = skb_end_offset(skb);
+       unsigned int len = skb->len;
         struct sk_buff *new;
  
         new = alloc_skb(len, gfp_mask);
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c

index 97348cedb16b30d9a60cb8096a8408f6a8890e6d..cdad47b140fa4bd54ac0571457ab16ab505a3a11 100644 (file)
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -1208,6 +1208,10 @@ void nci_free_device(struct nci_dev *ndev)
  {
         nfc_free_device(ndev->nfc_dev);
         nci_hci_deallocate(ndev);
+
+       /* drop partial rx data packet if present */
+       if (ndev->rx_data_reassembly)
+               kfree_skb(ndev->rx_data_reassembly);
         kfree(ndev);
  }
  EXPORT_SYMBOL(nci_free_device);
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c

index 88965e2068ac655317169256486613aadf471580..ebc5728aab4eaf0bc165cbdb03e26fc852af9a3e 100644 (file)
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -48,6 +48,7 @@ struct ovs_len_tbl {
  
  #define OVS_ATTR_NESTED -1
  #define OVS_ATTR_VARIABLE -2
+#define OVS_COPY_ACTIONS_MAX_DEPTH 16
  
  static bool actions_may_change_flow(const struct nlattr *actions)
  {
@@ -2545,13 +2546,15 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
                                   const struct sw_flow_key *key,
                                   struct sw_flow_actions **sfa,
                                   __be16 eth_type, __be16 vlan_tci,
-                                 u32 mpls_label_count, bool log);
+                                 u32 mpls_label_count, bool log,
+                                 u32 depth);
  
  static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
                                     const struct sw_flow_key *key,
                                     struct sw_flow_actions **sfa,
                                     __be16 eth_type, __be16 vlan_tci,
-                                   u32 mpls_label_count, bool log, bool last)
+                                   u32 mpls_label_count, bool log, bool last,
+                                   u32 depth)
  {
         const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
         const struct nlattr *probability, *actions;
@@ -2602,7 +2605,8 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
                 return err;
  
         err = __ovs_nla_copy_actions(net, actions, key, sfa,
-                                    eth_type, vlan_tci, mpls_label_count, log);
+                                    eth_type, vlan_tci, mpls_label_count, log,
+                                    depth + 1);
  
         if (err)
                 return err;
@@ -2617,7 +2621,8 @@ static int validate_and_copy_dec_ttl(struct net *net,
                                      const struct sw_flow_key *key,
                                      struct sw_flow_actions **sfa,
                                      __be16 eth_type, __be16 vlan_tci,
-                                    u32 mpls_label_count, bool log)
+                                    u32 mpls_label_count, bool log,
+                                    u32 depth)
  {
         const struct nlattr *attrs[OVS_DEC_TTL_ATTR_MAX + 1];
         int start, action_start, err, rem;
@@ -2660,7 +2665,8 @@ static int validate_and_copy_dec_ttl(struct net *net,
                 return action_start;
  
         err = __ovs_nla_copy_actions(net, actions, key, sfa, eth_type,
-                                    vlan_tci, mpls_label_count, log);
+                                    vlan_tci, mpls_label_count, log,
+                                    depth + 1);
         if (err)
                 return err;
  
@@ -2674,7 +2680,8 @@ static int validate_and_copy_clone(struct net *net,
                                    const struct sw_flow_key *key,
                                    struct sw_flow_actions **sfa,
                                    __be16 eth_type, __be16 vlan_tci,
-                                  u32 mpls_label_count, bool log, bool last)
+                                  u32 mpls_label_count, bool log, bool last,
+                                  u32 depth)
  {
         int start, err;
         u32 exec;
@@ -2694,7 +2701,8 @@ static int validate_and_copy_clone(struct net *net,
                 return err;
  
         err = __ovs_nla_copy_actions(net, attr, key, sfa,
-                                    eth_type, vlan_tci, mpls_label_count, log);
+                                    eth_type, vlan_tci, mpls_label_count, log,
+                                    depth + 1);
         if (err)
                 return err;
  
@@ -3063,7 +3071,7 @@ static int validate_and_copy_check_pkt_len(struct net *net,
                                            struct sw_flow_actions **sfa,
                                            __be16 eth_type, __be16 vlan_tci,
                                            u32 mpls_label_count,
-                                          bool log, bool last)
+                                          bool log, bool last, u32 depth)
  {
         const struct nlattr *acts_if_greater, *acts_if_lesser_eq;
         struct nlattr *a[OVS_CHECK_PKT_LEN_ATTR_MAX + 1];
@@ -3111,7 +3119,8 @@ static int validate_and_copy_check_pkt_len(struct net *net,
                 return nested_acts_start;
  
         err = __ovs_nla_copy_actions(net, acts_if_lesser_eq, key, sfa,
-                                    eth_type, vlan_tci, mpls_label_count, log);
+                                    eth_type, vlan_tci, mpls_label_count, log,
+                                    depth + 1);
  
         if (err)
                 return err;
@@ -3124,7 +3133,8 @@ static int validate_and_copy_check_pkt_len(struct net *net,
                 return nested_acts_start;
  
         err = __ovs_nla_copy_actions(net, acts_if_greater, key, sfa,
-                                    eth_type, vlan_tci, mpls_label_count, log);
+                                    eth_type, vlan_tci, mpls_label_count, log,
+                                    depth + 1);
  
         if (err)
                 return err;
@@ -3152,12 +3162,16 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
                                   const struct sw_flow_key *key,
                                   struct sw_flow_actions **sfa,
                                   __be16 eth_type, __be16 vlan_tci,
-                                 u32 mpls_label_count, bool log)
+                                 u32 mpls_label_count, bool log,
+                                 u32 depth)
  {
         u8 mac_proto = ovs_key_mac_proto(key);
         const struct nlattr *a;
         int rem, err;
  
+       if (depth > OVS_COPY_ACTIONS_MAX_DEPTH)
+               return -EOVERFLOW;
+
         nla_for_each_nested(a, attr, rem) {
                 /* Expected argument lengths, (u32)-1 for variable length. */
                 static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = {
@@ -3355,7 +3369,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
                         err = validate_and_copy_sample(net, a, key, sfa,
                                                        eth_type, vlan_tci,
                                                        mpls_label_count,
-                                                      log, last);
+                                                      log, last, depth);
                         if (err)
                                 return err;
                         skip_copy = true;
@@ -3426,7 +3440,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
                         err = validate_and_copy_clone(net, a, key, sfa,
                                                       eth_type, vlan_tci,
                                                       mpls_label_count,
-                                                     log, last);
+                                                     log, last, depth);
                         if (err)
                                 return err;
                         skip_copy = true;
@@ -3440,7 +3454,8 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
                                                               eth_type,
                                                               vlan_tci,
                                                               mpls_label_count,
-                                                             log, last);
+                                                             log, last,
+                                                             depth);
                         if (err)
                                 return err;
                         skip_copy = true;
@@ -3450,7 +3465,8 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
                 case OVS_ACTION_ATTR_DEC_TTL:
                         err = validate_and_copy_dec_ttl(net, a, key, sfa,
                                                         eth_type, vlan_tci,
-                                                       mpls_label_count, log);
+                                                       mpls_label_count, log,
+                                                       depth);
                         if (err)
                                 return err;
                         skip_copy = true;
@@ -3495,7 +3511,8 @@ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
  
         (*sfa)->orig_len = nla_len(attr);
         err = __ovs_nla_copy_actions(net, attr, key, sfa, key->eth.type,
-                                    key->eth.vlan.tci, mpls_label_count, log);
+                                    key->eth.vlan.tci, mpls_label_count, log,
+                                    0);
         if (err)
                 ovs_nla_free_flow_actions(*sfa);
  
diff --git a/net/phonet/datagram.c b/net/phonet/datagram.c

index 3aa50dc7535b7761c77652d2f38826419b57c26a..976fe250b50955ec51b0c5d73f2dfa132990b60b 100644 (file)
--- a/net/phonet/datagram.c
+++ b/net/phonet/datagram.c
@@ -34,10 +34,10 @@ static int pn_ioctl(struct sock *sk, int cmd, int *karg)
  
         switch (cmd) {
         case SIOCINQ:
-               lock_sock(sk);
+               spin_lock_bh(&sk->sk_receive_queue.lock);
                 skb = skb_peek(&sk->sk_receive_queue);
                 *karg = skb ? skb->len : 0;
-               release_sock(sk);
+               spin_unlock_bh(&sk->sk_receive_queue.lock);
                 return 0;
  
         case SIOCPNADDRESOURCE:
diff --git a/net/phonet/pep.c b/net/phonet/pep.c

index faba31f2eff2903bee7082b295f137ff848a1e10..3dd5f52bc1b58e3f1ee4e235126438c723f1f73c 100644 (file)
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -917,6 +917,37 @@ static int pep_sock_enable(struct sock *sk, struct sockaddr *addr, int len)
         return 0;
  }
  
+static unsigned int pep_first_packet_length(struct sock *sk)
+{
+       struct pep_sock *pn = pep_sk(sk);
+       struct sk_buff_head *q;
+       struct sk_buff *skb;
+       unsigned int len = 0;
+       bool found = false;
+
+       if (sock_flag(sk, SOCK_URGINLINE)) {
+               q = &pn->ctrlreq_queue;
+               spin_lock_bh(&q->lock);
+               skb = skb_peek(q);
+               if (skb) {
+                       len = skb->len;
+                       found = true;
+               }
+               spin_unlock_bh(&q->lock);
+       }
+
+       if (likely(!found)) {
+               q = &sk->sk_receive_queue;
+               spin_lock_bh(&q->lock);
+               skb = skb_peek(q);
+               if (skb)
+                       len = skb->len;
+               spin_unlock_bh(&q->lock);
+       }
+
+       return len;
+}
+
  static int pep_ioctl(struct sock *sk, int cmd, int *karg)
  {
         struct pep_sock *pn = pep_sk(sk);
@@ -929,15 +960,7 @@ static int pep_ioctl(struct sock *sk, int cmd, int *karg)
                         break;
                 }
  
-               lock_sock(sk);
-               if (sock_flag(sk, SOCK_URGINLINE) &&
-                   !skb_queue_empty(&pn->ctrlreq_queue))
-                       *karg = skb_peek(&pn->ctrlreq_queue)->len;
-               else if (!skb_queue_empty(&sk->sk_receive_queue))
-                       *karg = skb_peek(&sk->sk_receive_queue)->len;
-               else
-                       *karg = 0;
-               release_sock(sk);
+               *karg = pep_first_packet_length(sk);
                 ret = 0;
                 break;
  
diff --git a/net/rds/recv.c b/net/rds/recv.c

index c71b923764fd7cd7268953b968c5f5749a0b98a6..5627f80013f8b17d3de6284784fe3cbb02bba754 100644 (file)
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -425,6 +425,7 @@ static int rds_still_queued(struct rds_sock *rs, struct rds_incoming *inc,
         struct sock *sk = rds_rs_to_sk(rs);
         int ret = 0;
         unsigned long flags;
+       struct rds_incoming *to_drop = NULL;
  
         write_lock_irqsave(&rs->rs_recv_lock, flags);
         if (!list_empty(&inc->i_item)) {
@@ -435,11 +436,14 @@ static int rds_still_queued(struct rds_sock *rs, struct rds_incoming *inc,
                                               -be32_to_cpu(inc->i_hdr.h_len),
                                               inc->i_hdr.h_dport);
                         list_del_init(&inc->i_item);
-                       rds_inc_put(inc);
+                       to_drop = inc;
                 }
         }
         write_unlock_irqrestore(&rs->rs_recv_lock, flags);
  
+       if (to_drop)
+               rds_inc_put(to_drop);
+
         rdsdebug("inc %p rs %p still %d dropped %d\n", inc, rs, ret, drop);
         return ret;
  }
@@ -758,16 +762,21 @@ void rds_clear_recv_queue(struct rds_sock *rs)
         struct sock *sk = rds_rs_to_sk(rs);
         struct rds_incoming *inc, *tmp;
         unsigned long flags;
+       LIST_HEAD(to_drop);
  
         write_lock_irqsave(&rs->rs_recv_lock, flags);
         list_for_each_entry_safe(inc, tmp, &rs->rs_recv_queue, i_item) {
                 rds_recv_rcvbuf_delta(rs, sk, inc->i_conn->c_lcong,
                                       -be32_to_cpu(inc->i_hdr.h_len),
                                       inc->i_hdr.h_dport);
+               list_move(&inc->i_item, &to_drop);
+       }
+       write_unlock_irqrestore(&rs->rs_recv_lock, flags);
+
+       list_for_each_entry_safe(inc, tmp, &to_drop, i_item) {
                 list_del_init(&inc->i_item);
                 rds_inc_put(inc);
         }
-       write_unlock_irqrestore(&rs->rs_recv_lock, flags);
  }
  
  /*
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h

index dbeb75c298573adc580568744d6781a5c6193b0d..7818aae1be8e00c1e9b15c918868ca11b40a7213 100644 (file)
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -199,11 +199,19 @@ struct rxrpc_host_header {
   */
  struct rxrpc_skb_priv {
         struct rxrpc_connection *conn;  /* Connection referred to (poke packet) */
-       u16             offset;         /* Offset of data */
-       u16             len;            /* Length of data */
-       u8              flags;
+       union {
+               struct {
+                       u16             offset;         /* Offset of data */
+                       u16             len;            /* Length of data */
+                       u8              flags;
  #define RXRPC_RX_VERIFIED      0x01
-
+               };
+               struct {
+                       rxrpc_seq_t     first_ack;      /* First packet in acks table */
+                       u8              nr_acks;        /* Number of acks+nacks */
+                       u8              nr_nacks;       /* Number of nacks */
+               };
+       };
         struct rxrpc_host_header hdr;   /* RxRPC packet header from this packet */
  };
  
@@ -510,7 +518,7 @@ struct rxrpc_connection {
         enum rxrpc_call_completion completion;  /* Completion condition */
         s32                     abort_code;     /* Abort code of connection abort */
         int                     debug_id;       /* debug ID for printks */
-       atomic_t                serial;         /* packet serial number counter */
+       rxrpc_serial_t          tx_serial;      /* Outgoing packet serial number counter */
         unsigned int            hi_serial;      /* highest serial number received */
         u32                     service_id;     /* Service ID, possibly upgraded */
         u32                     security_level; /* Security level selected */
@@ -692,11 +700,11 @@ struct rxrpc_call {
         u8                      cong_dup_acks;  /* Count of ACKs showing missing packets */
         u8                      cong_cumul_acks; /* Cumulative ACK count */
         ktime_t                 cong_tstamp;    /* Last time cwnd was changed */
+       struct sk_buff          *cong_last_nack; /* Last ACK with nacks received */
  
         /* Receive-phase ACK management (ACKs we send). */
         u8                      ackr_reason;    /* reason to ACK */
         u16                     ackr_sack_base; /* Starting slot in SACK table ring */
-       rxrpc_serial_t          ackr_serial;    /* serial of packet being ACK'd */
         rxrpc_seq_t             ackr_window;    /* Base of SACK window */
         rxrpc_seq_t             ackr_wtop;      /* Base of SACK window */
         unsigned int            ackr_nr_unacked; /* Number of unacked packets */
@@ -730,7 +738,8 @@ struct rxrpc_call {
  struct rxrpc_ack_summary {
         u16                     nr_acks;                /* Number of ACKs in packet */
         u16                     nr_new_acks;            /* Number of new ACKs in packet */
-       u16                     nr_rot_new_acks;        /* Number of rotated new ACKs */
+       u16                     nr_new_nacks;           /* Number of new nacks in packet */
+       u16                     nr_retained_nacks;      /* Number of nacks retained between ACKs */
         u8                      ack_reason;
         bool                    saw_nacks;              /* Saw NACKs in packet */
         bool                    new_low_nack;           /* T if new low NACK found */
@@ -822,6 +831,20 @@ static inline bool rxrpc_sending_to_client(const struct rxrpc_txbuf *txb)
  
  #include <trace/events/rxrpc.h>
  
+/*
+ * Allocate the next serial number on a connection.  0 must be skipped.
+ */
+static inline rxrpc_serial_t rxrpc_get_next_serial(struct rxrpc_connection *conn)
+{
+       rxrpc_serial_t serial;
+
+       serial = conn->tx_serial;
+       if (serial == 0)
+               serial = 1;
+       conn->tx_serial = serial + 1;
+       return serial;
+}
+
  /*
   * af_rxrpc.c
   */
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c

index e363f21a20141bb13c931fc0cd40c862c49e5829..0f78544d043be9327ea13cc91fbfa532d6ef4002 100644 (file)
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -43,8 +43,6 @@ void rxrpc_propose_delay_ACK(struct rxrpc_call *call, rxrpc_serial_t serial,
         unsigned long expiry = rxrpc_soft_ack_delay;
         unsigned long now = jiffies, ack_at;
  
-       call->ackr_serial = serial;
-
         if (rxrpc_soft_ack_delay < expiry)
                 expiry = rxrpc_soft_ack_delay;
         if (call->peer->srtt_us != 0)
@@ -114,6 +112,7 @@ static void rxrpc_congestion_timeout(struct rxrpc_call *call)
  void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb)
  {
         struct rxrpc_ackpacket *ack = NULL;
+       struct rxrpc_skb_priv *sp;
         struct rxrpc_txbuf *txb;
         unsigned long resend_at;
         rxrpc_seq_t transmitted = READ_ONCE(call->tx_transmitted);
@@ -141,14 +140,15 @@ void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb)
          * explicitly NAK'd packets.
          */
         if (ack_skb) {
+               sp = rxrpc_skb(ack_skb);
                 ack = (void *)ack_skb->data + sizeof(struct rxrpc_wire_header);
  
-               for (i = 0; i < ack->nAcks; i++) {
+               for (i = 0; i < sp->nr_acks; i++) {
                         rxrpc_seq_t seq;
  
                         if (ack->acks[i] & 1)
                                 continue;
-                       seq = ntohl(ack->firstPacket) + i;
+                       seq = sp->first_ack + i;
                         if (after(txb->seq, transmitted))
                                 break;
                         if (after(txb->seq, seq))
@@ -373,7 +373,6 @@ static void rxrpc_send_initial_ping(struct rxrpc_call *call)
  bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb)
  {
         unsigned long now, next, t;
-       rxrpc_serial_t ackr_serial;
         bool resend = false, expired = false;
         s32 abort_code;
  
@@ -423,8 +422,7 @@ bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb)
         if (time_after_eq(now, t)) {
                 trace_rxrpc_timer(call, rxrpc_timer_exp_ack, now);
                 cmpxchg(&call->delay_ack_at, t, now + MAX_JIFFY_OFFSET);
-               ackr_serial = xchg(&call->ackr_serial, 0);
-               rxrpc_send_ACK(call, RXRPC_ACK_DELAY, ackr_serial,
+               rxrpc_send_ACK(call, RXRPC_ACK_DELAY, 0,
                                rxrpc_propose_ack_ping_for_lost_ack);
         }
  
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c

index 0943e54370ba0e71bcfa6d2238704b0b41c49ee9..9fc9a6c3f685868fe69842d5ac133a1b898674eb 100644 (file)
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -686,6 +686,7 @@ static void rxrpc_destroy_call(struct work_struct *work)
  
         del_timer_sync(&call->timer);
  
+       rxrpc_free_skb(call->cong_last_nack, rxrpc_skb_put_last_nack);
         rxrpc_cleanup_ring(call);
         while ((txb = list_first_entry_or_null(&call->tx_sendmsg,
                                                struct rxrpc_txbuf, call_link))) {
diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c

index 95f4bc206b3dc9a571abe6fb63cc6fe05575e9c9..1f251d758cb9d8be81856187d78e1994ef179072 100644 (file)
--- a/net/rxrpc/conn_event.c
+++ b/net/rxrpc/conn_event.c
@@ -95,6 +95,14 @@ void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
  
         _enter("%d", conn->debug_id);
  
+       if (sp && sp->hdr.type == RXRPC_PACKET_TYPE_ACK) {
+               if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header),
+                                 &pkt.ack, sizeof(pkt.ack)) < 0)
+                       return;
+               if (pkt.ack.reason == RXRPC_ACK_PING_RESPONSE)
+                       return;
+       }
+
         chan = &conn->channels[channel];
  
         /* If the last call got moved on whilst we were waiting to run, just
@@ -117,7 +125,7 @@ void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
         iov[2].iov_base = &ack_info;
         iov[2].iov_len  = sizeof(ack_info);
  
-       serial = atomic_inc_return(&conn->serial);
+       serial = rxrpc_get_next_serial(conn);
  
         pkt.whdr.epoch          = htonl(conn->proto.epoch);
         pkt.whdr.cid            = htonl(conn->proto.cid | channel);
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c

index 92495e73b8699185cf76c60aa88f62d77a29dd56..9691de00ade7522d36174bbe1ab9098c1b52b145 100644 (file)
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -45,11 +45,9 @@ static void rxrpc_congestion_management(struct rxrpc_call *call,
         }
  
         cumulative_acks += summary->nr_new_acks;
-       cumulative_acks += summary->nr_rot_new_acks;
         if (cumulative_acks > 255)
                 cumulative_acks = 255;
  
-       summary->mode = call->cong_mode;
         summary->cwnd = call->cong_cwnd;
         summary->ssthresh = call->cong_ssthresh;
         summary->cumulative_acks = cumulative_acks;
@@ -151,6 +149,7 @@ out_no_clear_ca:
                 cwnd = RXRPC_TX_MAX_WINDOW;
         call->cong_cwnd = cwnd;
         call->cong_cumul_acks = cumulative_acks;
+       summary->mode = call->cong_mode;
         trace_rxrpc_congest(call, summary, acked_serial, change);
         if (resend)
                 rxrpc_resend(call, skb);
@@ -213,7 +212,6 @@ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to,
         list_for_each_entry_rcu(txb, &call->tx_buffer, call_link, false) {
                 if (before_eq(txb->seq, call->acks_hard_ack))
                         continue;
-               summary->nr_rot_new_acks++;
                 if (test_bit(RXRPC_TXBUF_LAST, &txb->flags)) {
                         set_bit(RXRPC_CALL_TX_LAST, &call->flags);
                         rot_last = true;
@@ -254,6 +252,11 @@ static void rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun,
  {
         ASSERT(test_bit(RXRPC_CALL_TX_LAST, &call->flags));
  
+       if (unlikely(call->cong_last_nack)) {
+               rxrpc_free_skb(call->cong_last_nack, rxrpc_skb_put_last_nack);
+               call->cong_last_nack = NULL;
+       }
+
         switch (__rxrpc_call_state(call)) {
         case RXRPC_CALL_CLIENT_SEND_REQUEST:
         case RXRPC_CALL_CLIENT_AWAIT_REPLY:
@@ -702,6 +705,43 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb,
                 wake_up(&call->waitq);
  }
  
+/*
+ * Determine how many nacks from the previous ACK have now been satisfied.
+ */
+static rxrpc_seq_t rxrpc_input_check_prev_ack(struct rxrpc_call *call,
+                                             struct rxrpc_ack_summary *summary,
+                                             rxrpc_seq_t seq)
+{
+       struct sk_buff *skb = call->cong_last_nack;
+       struct rxrpc_ackpacket ack;
+       struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+       unsigned int i, new_acks = 0, retained_nacks = 0;
+       rxrpc_seq_t old_seq = sp->first_ack;
+       u8 *acks = skb->data + sizeof(struct rxrpc_wire_header) + sizeof(ack);
+
+       if (after_eq(seq, old_seq + sp->nr_acks)) {
+               summary->nr_new_acks += sp->nr_nacks;
+               summary->nr_new_acks += seq - (old_seq + sp->nr_acks);
+               summary->nr_retained_nacks = 0;
+       } else if (seq == old_seq) {
+               summary->nr_retained_nacks = sp->nr_nacks;
+       } else {
+               for (i = 0; i < sp->nr_acks; i++) {
+                       if (acks[i] == RXRPC_ACK_TYPE_NACK) {
+                               if (before(old_seq + i, seq))
+                                       new_acks++;
+                               else
+                                       retained_nacks++;
+                       }
+               }
+
+               summary->nr_new_acks += new_acks;
+               summary->nr_retained_nacks = retained_nacks;
+       }
+
+       return old_seq + sp->nr_acks;
+}
+
  /*
   * Process individual soft ACKs.
   *
@@ -711,25 +751,51 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb,
   * the timer on the basis that the peer might just not have processed them at
   * the time the ACK was sent.
   */
-static void rxrpc_input_soft_acks(struct rxrpc_call *call, u8 *acks,
-                                 rxrpc_seq_t seq, int nr_acks,
-                                 struct rxrpc_ack_summary *summary)
+static void rxrpc_input_soft_acks(struct rxrpc_call *call,
+                                 struct rxrpc_ack_summary *summary,
+                                 struct sk_buff *skb,
+                                 rxrpc_seq_t seq,
+                                 rxrpc_seq_t since)
  {
-       unsigned int i;
+       struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+       unsigned int i, old_nacks = 0;
+       rxrpc_seq_t lowest_nak = seq + sp->nr_acks;
+       u8 *acks = skb->data + sizeof(struct rxrpc_wire_header) + sizeof(struct rxrpc_ackpacket);
  
-       for (i = 0; i < nr_acks; i++) {
+       for (i = 0; i < sp->nr_acks; i++) {
                 if (acks[i] == RXRPC_ACK_TYPE_ACK) {
                         summary->nr_acks++;
-                       summary->nr_new_acks++;
+                       if (after_eq(seq, since))
+                               summary->nr_new_acks++;
                 } else {
-                       if (!summary->saw_nacks &&
-                           call->acks_lowest_nak != seq + i) {
-                               call->acks_lowest_nak = seq + i;
-                               summary->new_low_nack = true;
-                       }
                         summary->saw_nacks = true;
+                       if (before(seq, since)) {
+                               /* Overlap with previous ACK */
+                               old_nacks++;
+                       } else {
+                               summary->nr_new_nacks++;
+                               sp->nr_nacks++;
+                       }
+
+                       if (before(seq, lowest_nak))
+                               lowest_nak = seq;
                 }
+               seq++;
+       }
+
+       if (lowest_nak != call->acks_lowest_nak) {
+               call->acks_lowest_nak = lowest_nak;
+               summary->new_low_nack = true;
         }
+
+       /* We *can* have more nacks than we did - the peer is permitted to drop
+        * packets it has soft-acked and re-request them.  Further, it is
+        * possible for the nack distribution to change whilst the number of
+        * nacks stays the same or goes down.
+        */
+       if (old_nacks < summary->nr_retained_nacks)
+               summary->nr_new_acks += summary->nr_retained_nacks - old_nacks;
+       summary->nr_retained_nacks = old_nacks;
  }
  
  /*
@@ -773,7 +839,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
         struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
         struct rxrpc_ackinfo info;
         rxrpc_serial_t ack_serial, acked_serial;
-       rxrpc_seq_t first_soft_ack, hard_ack, prev_pkt;
+       rxrpc_seq_t first_soft_ack, hard_ack, prev_pkt, since;
         int nr_acks, offset, ioffset;
  
         _enter("");
@@ -789,6 +855,8 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
         prev_pkt = ntohl(ack.previousPacket);
         hard_ack = first_soft_ack - 1;
         nr_acks = ack.nAcks;
+       sp->first_ack = first_soft_ack;
+       sp->nr_acks = nr_acks;
         summary.ack_reason = (ack.reason < RXRPC_ACK__INVALID ?
                               ack.reason : RXRPC_ACK__INVALID);
  
@@ -858,6 +926,16 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
         if (nr_acks > 0)
                 skb_condense(skb);
  
+       if (call->cong_last_nack) {
+               since = rxrpc_input_check_prev_ack(call, &summary, first_soft_ack);
+               rxrpc_free_skb(call->cong_last_nack, rxrpc_skb_put_last_nack);
+               call->cong_last_nack = NULL;
+       } else {
+               summary.nr_new_acks = first_soft_ack - call->acks_first_seq;
+               call->acks_lowest_nak = first_soft_ack + nr_acks;
+               since = first_soft_ack;
+       }
+
         call->acks_latest_ts = skb->tstamp;
         call->acks_first_seq = first_soft_ack;
         call->acks_prev_seq = prev_pkt;
@@ -866,7 +944,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
         case RXRPC_ACK_PING:
                 break;
         default:
-               if (after(acked_serial, call->acks_highest_serial))
+               if (acked_serial && after(acked_serial, call->acks_highest_serial))
                         call->acks_highest_serial = acked_serial;
                 break;
         }
@@ -905,8 +983,9 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
         if (nr_acks > 0) {
                 if (offset > (int)skb->len - nr_acks)
                         return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_short_sack);
-               rxrpc_input_soft_acks(call, skb->data + offset, first_soft_ack,
-                                     nr_acks, &summary);
+               rxrpc_input_soft_acks(call, &summary, skb, first_soft_ack, since);
+               rxrpc_get_skb(skb, rxrpc_skb_get_last_nack);
+               call->cong_last_nack = skb;
         }
  
         if (test_bit(RXRPC_CALL_TX_LAST, &call->flags) &&
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c

index a0906145e8293ca457fd0b1493ba3892f5f0729a..4a292f860ae37a41bddcd99f7e3bdc6a2c092d29 100644 (file)
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -216,7 +216,7 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
         iov[0].iov_len  = sizeof(txb->wire) + sizeof(txb->ack) + n;
         len = iov[0].iov_len;
  
-       serial = atomic_inc_return(&conn->serial);
+       serial = rxrpc_get_next_serial(conn);
         txb->wire.serial = htonl(serial);
         trace_rxrpc_tx_ack(call->debug_id, serial,
                            ntohl(txb->ack.firstPacket),
@@ -302,7 +302,7 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call)
         iov[0].iov_base = &pkt;
         iov[0].iov_len  = sizeof(pkt);
  
-       serial = atomic_inc_return(&conn->serial);
+       serial = rxrpc_get_next_serial(conn);
         pkt.whdr.serial = htonl(serial);
  
         iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, sizeof(pkt));
@@ -334,7 +334,7 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
         _enter("%x,{%d}", txb->seq, txb->len);
  
         /* Each transmission of a Tx packet needs a new serial number */
-       serial = atomic_inc_return(&conn->serial);
+       serial = rxrpc_get_next_serial(conn);
         txb->wire.serial = htonl(serial);
  
         if (test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags) &&
@@ -558,7 +558,7 @@ void rxrpc_send_conn_abort(struct rxrpc_connection *conn)
  
         len = iov[0].iov_len + iov[1].iov_len;
  
-       serial = atomic_inc_return(&conn->serial);
+       serial = rxrpc_get_next_serial(conn);
         whdr.serial = htonl(serial);
  
         iov_iter_kvec(&msg.msg_iter, WRITE, iov, 2, len);
diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c

index 6c86cbb98d1d601edc9dad728c72f887067a376e..26dc2f26d92d8d67f82229675254d7217c2184e0 100644 (file)
--- a/net/rxrpc/proc.c
+++ b/net/rxrpc/proc.c
@@ -181,7 +181,7 @@ print:
                    atomic_read(&conn->active),
                    state,
                    key_serial(conn->key),
-                  atomic_read(&conn->serial),
+                  conn->tx_serial,
                    conn->hi_serial,
                    conn->channels[0].call_id,
                    conn->channels[1].call_id,
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c

index b52dedcebce0a7aafe0888f97e79bb81435749f2..6b32d61d4cdc46719d4a011987f6ea112ae59fc1 100644 (file)
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -664,7 +664,7 @@ static int rxkad_issue_challenge(struct rxrpc_connection *conn)
  
         len = iov[0].iov_len + iov[1].iov_len;
  
-       serial = atomic_inc_return(&conn->serial);
+       serial = rxrpc_get_next_serial(conn);
         whdr.serial = htonl(serial);
  
         ret = kernel_sendmsg(conn->local->socket, &msg, iov, 2, len);
@@ -721,7 +721,7 @@ static int rxkad_send_response(struct rxrpc_connection *conn,
  
         len = iov[0].iov_len + iov[1].iov_len + iov[2].iov_len;
  
-       serial = atomic_inc_return(&conn->serial);
+       serial = rxrpc_get_next_serial(conn);
         whdr.serial = htonl(serial);
  
         rxrpc_local_dont_fragment(conn->local, false);
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c

index 12386f590b0f61f45e4ff40c9ec3605326671a2d..6faa7d00da09771ae130581604c3b14c50472966 100644 (file)
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -232,18 +232,14 @@ release_idr:
         return err;
  }
  
-static bool is_mirred_nested(void)
-{
-       return unlikely(__this_cpu_read(mirred_nest_level) > 1);
-}
-
-static int tcf_mirred_forward(bool want_ingress, struct sk_buff *skb)
+static int
+tcf_mirred_forward(bool at_ingress, bool want_ingress, struct sk_buff *skb)
  {
         int err;
  
         if (!want_ingress)
                 err = tcf_dev_queue_xmit(skb, dev_queue_xmit);
-       else if (is_mirred_nested())
+       else if (!at_ingress)
                 err = netif_rx(skb);
         else
                 err = netif_receive_skb(skb);
@@ -270,8 +266,7 @@ static int tcf_mirred_to_dev(struct sk_buff *skb, struct tcf_mirred *m,
         if (unlikely(!(dev->flags & IFF_UP)) || !netif_carrier_ok(dev)) {
                 net_notice_ratelimited("tc mirred to Houston: device %s is down\n",
                                        dev->name);
-               err = -ENODEV;
-               goto out;
+               goto err_cant_do;
         }
  
         /* we could easily avoid the clone only if called by ingress and clsact;
@@ -283,10 +278,8 @@ static int tcf_mirred_to_dev(struct sk_buff *skb, struct tcf_mirred *m,
                 tcf_mirred_can_reinsert(retval);
         if (!dont_clone) {
                 skb_to_send = skb_clone(skb, GFP_ATOMIC);
-               if (!skb_to_send) {
-                       err =  -ENOMEM;
-                       goto out;
-               }
+               if (!skb_to_send)
+                       goto err_cant_do;
         }
  
         want_ingress = tcf_mirred_act_wants_ingress(m_eaction);
@@ -319,19 +312,20 @@ static int tcf_mirred_to_dev(struct sk_buff *skb, struct tcf_mirred *m,
  
                 skb_set_redirected(skb_to_send, skb_to_send->tc_at_ingress);
  
-               err = tcf_mirred_forward(want_ingress, skb_to_send);
+               err = tcf_mirred_forward(at_ingress, want_ingress, skb_to_send);
         } else {
-               err = tcf_mirred_forward(want_ingress, skb_to_send);
+               err = tcf_mirred_forward(at_ingress, want_ingress, skb_to_send);
         }
-
-       if (err) {
-out:
+       if (err)
                 tcf_action_inc_overlimit_qstats(&m->common);
-               if (is_redirect)
-                       retval = TC_ACT_SHOT;
-       }
  
         return retval;
+
+err_cant_do:
+       if (is_redirect)
+               retval = TC_ACT_SHOT;
+       tcf_action_inc_overlimit_qstats(&m->common);
+       return retval;
  }
  
  static int tcf_blockcast_redir(struct sk_buff *skb, struct tcf_mirred *m,
@@ -533,8 +527,6 @@ static int mirred_device_event(struct notifier_block *unused,
                                  * net_device are already rcu protected.
                                  */
                                 RCU_INIT_POINTER(m->tcfm_dev, NULL);
-                       } else if (m->tcfm_blockid) {
-                               m->tcfm_blockid = 0;
                         }
                         spin_unlock_bh(&m->tcf_lock);
                 }
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c

index efb9d2811b73d18862f824b0b7a8b4e6b905271d..6ee7064c82fcc3bdb7596e2ad8fe33bc6456102d 100644 (file)
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -2460,8 +2460,11 @@ unbind_filter:
         }
  
  errout_idr:
-       if (!fold)
+       if (!fold) {
+               spin_lock(&tp->lock);
                 idr_remove(&head->handle_idr, fnew->handle);
+               spin_unlock(&tp->lock);
+       }
         __fl_put(fnew);
  errout_tb:
         kfree(tb);
diff --git a/net/sched/em_canid.c b/net/sched/em_canid.c

index 5ea84decec19a6b6593e1f9caa31e0e914a52e9d..5337bc46275519a062e54d093df84f3ea8f58583 100644 (file)
--- a/net/sched/em_canid.c
+++ b/net/sched/em_canid.c
@@ -222,6 +222,7 @@ static void __exit exit_em_canid(void)
         tcf_em_unregister(&em_canid_ops);
  }
  
+MODULE_DESCRIPTION("ematch classifier to match CAN IDs embedded in skb CAN frames");
  MODULE_LICENSE("GPL");
  
  module_init(init_em_canid);
diff --git a/net/sched/em_cmp.c b/net/sched/em_cmp.c

index f17b049ea53090d750133422f90b3ad673fd6c41..c90ad7ea26b4697cbedf25a51fc1b92771040c4e 100644 (file)
--- a/net/sched/em_cmp.c
+++ b/net/sched/em_cmp.c
@@ -87,6 +87,7 @@ static void __exit exit_em_cmp(void)
         tcf_em_unregister(&em_cmp_ops);
  }
  
+MODULE_DESCRIPTION("ematch classifier for basic data types(8/16/32 bit) against skb data");
  MODULE_LICENSE("GPL");
  
  module_init(init_em_cmp);
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c

index 09d8afd04a2a78ac55b0ddd1b424ddcb28b9ba83..8996c73c9779b5fa804e6f913834cf1fe4d071e6 100644 (file)
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -1006,6 +1006,7 @@ static void __exit exit_em_meta(void)
         tcf_em_unregister(&em_meta_ops);
  }
  
+MODULE_DESCRIPTION("ematch classifier for various internal kernel metadata, skb metadata and sk metadata");
  MODULE_LICENSE("GPL");
  
  module_init(init_em_meta);
diff --git a/net/sched/em_nbyte.c b/net/sched/em_nbyte.c

index a83b237cbeb06553c805dfeac3632fd69d6dc3c6..4f9f21a05d5e40aadfdc4c339b8178ad43dc2c8b 100644 (file)
--- a/net/sched/em_nbyte.c
+++ b/net/sched/em_nbyte.c
@@ -68,6 +68,7 @@ static void __exit exit_em_nbyte(void)
         tcf_em_unregister(&em_nbyte_ops);
  }
  
+MODULE_DESCRIPTION("ematch classifier for arbitrary skb multi-bytes");
  MODULE_LICENSE("GPL");
  
  module_init(init_em_nbyte);
diff --git a/net/sched/em_text.c b/net/sched/em_text.c

index f176afb70559eb0a594a2f724765ccb0a1d3b746..420c66203b1777632500ee3d5e2d89a46b50bc4c 100644 (file)
--- a/net/sched/em_text.c
+++ b/net/sched/em_text.c
@@ -147,6 +147,7 @@ static void __exit exit_em_text(void)
         tcf_em_unregister(&em_text_ops);
  }
  
+MODULE_DESCRIPTION("ematch classifier for embedded text in skbs");
  MODULE_LICENSE("GPL");
  
  module_init(init_em_text);
diff --git a/net/sched/em_u32.c b/net/sched/em_u32.c

index 71b070da043796d1872ae7aecc2348ab6a4b37f1..fdec4db5ec89d047427d62fb6ce95b3649a80f9f 100644 (file)
--- a/net/sched/em_u32.c
+++ b/net/sched/em_u32.c
@@ -52,6 +52,7 @@ static void __exit exit_em_u32(void)
         tcf_em_unregister(&em_u32_ops);
  }
  
+MODULE_DESCRIPTION("ematch skb classifier using 32 bit chunks of data");
  MODULE_LICENSE("GPL");
  
  module_init(init_em_u32);
diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c

index 7182c5a450fb5b804d19fdb1b04b75a2c34eb2d0..5c1652181805880ff9f5eaae45e0bab6b00170df 100644 (file)
--- a/net/sctp/inqueue.c
+++ b/net/sctp/inqueue.c
@@ -38,6 +38,14 @@ void sctp_inq_init(struct sctp_inq *queue)
         INIT_WORK(&queue->immediate, NULL);
  }
  
+/* Properly release the chunk which is being worked on. */
+static inline void sctp_inq_chunk_free(struct sctp_chunk *chunk)
+{
+       if (chunk->head_skb)
+               chunk->skb = chunk->head_skb;
+       sctp_chunk_free(chunk);
+}
+
  /* Release the memory associated with an SCTP inqueue.  */
  void sctp_inq_free(struct sctp_inq *queue)
  {
@@ -53,7 +61,7 @@ void sctp_inq_free(struct sctp_inq *queue)
          * free it as well.
          */
         if (queue->in_progress) {
-               sctp_chunk_free(queue->in_progress);
+               sctp_inq_chunk_free(queue->in_progress);
                 queue->in_progress = NULL;
         }
  }
@@ -130,9 +138,7 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue)
                                 goto new_skb;
                         }
  
-                       if (chunk->head_skb)
-                               chunk->skb = chunk->head_skb;
-                       sctp_chunk_free(chunk);
+                       sctp_inq_chunk_free(chunk);
                         chunk = queue->in_progress = NULL;
                 } else {
                         /* Nothing to do. Next chunk in the packet, please. */
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c

index a2cb30af46cb158fcd3a1c12349a0375a1020203..0f53a5c6fd9d9c88c78f51640b179bf214e78bda 100644 (file)
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -924,6 +924,7 @@ static int smc_switch_to_fallback(struct smc_sock *smc, int reason_code)
                 smc->clcsock->file->private_data = smc->clcsock;
                 smc->clcsock->wq.fasync_list =
                         smc->sk.sk_socket->wq.fasync_list;
+               smc->sk.sk_socket->wq.fasync_list = NULL;
  
                 /* There might be some wait entries remaining
                  * in smc sk->sk_wq and they should be woken up
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c

index 95cc95458e2d8d2c2c3578088544ee1abe0ea8a6..e4c858411207a51d043aef96a47c48ac63f5dd8a 100644 (file)
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -1877,9 +1877,15 @@ static bool smcd_lgr_match(struct smc_link_group *lgr,
                            struct smcd_dev *smcismdev,
                            struct smcd_gid *peer_gid)
  {
-       return lgr->peer_gid.gid == peer_gid->gid && lgr->smcd == smcismdev &&
-               smc_ism_is_virtual(smcismdev) ?
-               (lgr->peer_gid.gid_ext == peer_gid->gid_ext) : 1;
+       if (lgr->peer_gid.gid != peer_gid->gid ||
+           lgr->smcd != smcismdev)
+               return false;
+
+       if (smc_ism_is_virtual(smcismdev) &&
+           lgr->peer_gid.gid_ext != peer_gid->gid_ext)
+               return false;
+
+       return true;
  }
  
  /* create a new SMC connection (and a new link group if necessary) */
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c

index f60c93e5a25d69f6c918ab43a9c48a973cbf90b4..b969e505c7b77002e17936c7ee4fa6e6c79ad223 100644 (file)
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1598,10 +1598,10 @@ void svc_process_bc(struct rpc_rqst *req, struct svc_rqst *rqstp)
         /* Finally, send the reply synchronously */
         if (rqstp->bc_to_initval > 0) {
                 timeout.to_initval = rqstp->bc_to_initval;
-               timeout.to_retries = rqstp->bc_to_initval;
+               timeout.to_retries = rqstp->bc_to_retries;
         } else {
                 timeout.to_initval = req->rq_xprt->timeout->to_initval;
-               timeout.to_initval = req->rq_xprt->timeout->to_retries;
+               timeout.to_retries = req->rq_xprt->timeout->to_retries;
         }
         memcpy(&req->rq_snd_buf, &rqstp->rq_res, sizeof(req->rq_snd_buf));
         task = rpc_run_bc_task(req, &timeout);
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c

index 5b045284849e03151b172cf55248492aed2b3472..c9189a970eec317745a06c27064f504a6ff2e3d2 100644 (file)
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -19,6 +19,35 @@
  #include <linux/rtnetlink.h>
  #include <net/switchdev.h>
  
+static bool switchdev_obj_eq(const struct switchdev_obj *a,
+                            const struct switchdev_obj *b)
+{
+       const struct switchdev_obj_port_vlan *va, *vb;
+       const struct switchdev_obj_port_mdb *ma, *mb;
+
+       if (a->id != b->id || a->orig_dev != b->orig_dev)
+               return false;
+
+       switch (a->id) {
+       case SWITCHDEV_OBJ_ID_PORT_VLAN:
+               va = SWITCHDEV_OBJ_PORT_VLAN(a);
+               vb = SWITCHDEV_OBJ_PORT_VLAN(b);
+               return va->flags == vb->flags &&
+                       va->vid == vb->vid &&
+                       va->changed == vb->changed;
+       case SWITCHDEV_OBJ_ID_PORT_MDB:
+       case SWITCHDEV_OBJ_ID_HOST_MDB:
+               ma = SWITCHDEV_OBJ_PORT_MDB(a);
+               mb = SWITCHDEV_OBJ_PORT_MDB(b);
+               return ma->vid == mb->vid &&
+                       ether_addr_equal(ma->addr, mb->addr);
+       default:
+               break;
+       }
+
+       BUG();
+}
+
  static LIST_HEAD(deferred);
  static DEFINE_SPINLOCK(deferred_lock);
  
@@ -307,6 +336,50 @@ int switchdev_port_obj_del(struct net_device *dev,
  }
  EXPORT_SYMBOL_GPL(switchdev_port_obj_del);
  
+/**
+ *     switchdev_port_obj_act_is_deferred - Is object action pending?
+ *
+ *     @dev: port device
+ *     @nt: type of action; add or delete
+ *     @obj: object to test
+ *
+ *     Returns true if a deferred item is pending, which is
+ *     equivalent to the action @nt on an object @obj.
+ *
+ *     rtnl_lock must be held.
+ */
+bool switchdev_port_obj_act_is_deferred(struct net_device *dev,
+                                       enum switchdev_notifier_type nt,
+                                       const struct switchdev_obj *obj)
+{
+       struct switchdev_deferred_item *dfitem;
+       bool found = false;
+
+       ASSERT_RTNL();
+
+       spin_lock_bh(&deferred_lock);
+
+       list_for_each_entry(dfitem, &deferred, list) {
+               if (dfitem->dev != dev)
+                       continue;
+
+               if ((dfitem->func == switchdev_port_obj_add_deferred &&
+                    nt == SWITCHDEV_PORT_OBJ_ADD) ||
+                   (dfitem->func == switchdev_port_obj_del_deferred &&
+                    nt == SWITCHDEV_PORT_OBJ_DEL)) {
+                       if (switchdev_obj_eq((const void *)dfitem->data, obj)) {
+                               found = true;
+                               break;
+                       }
+               }
+       }
+
+       spin_unlock_bh(&deferred_lock);
+
+       return found;
+}
+EXPORT_SYMBOL_GPL(switchdev_port_obj_act_is_deferred);
+
  static ATOMIC_NOTIFIER_HEAD(switchdev_notif_chain);
  static BLOCKING_NOTIFIER_HEAD(switchdev_blocking_notif_chain);
  
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c

index 2cde375477e381aa4a542cd4cf24db067770b466..878415c43527615801186d79a6c0c73b62bf5750 100644 (file)
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -1086,6 +1086,12 @@ int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info)
  
  #ifdef CONFIG_TIPC_MEDIA_UDP
         if (attrs[TIPC_NLA_BEARER_UDP_OPTS]) {
+               if (b->media->type_id != TIPC_MEDIA_TYPE_UDP) {
+                       rtnl_unlock();
+                       NL_SET_ERR_MSG(info->extack, "UDP option is unsupported");
+                       return -EINVAL;
+               }
+
                 err = tipc_udp_nl_bearer_add(b,
                                              attrs[TIPC_NLA_BEARER_UDP_OPTS]);
                 if (err) {
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c

index 1c2c6800949dd4c2800f76326b7500f9743c4720..b4674f03d71a9fb9a5526555d7aca9b9cc5e665c 100644 (file)
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -1003,7 +1003,7 @@ static u16 tls_user_config(struct tls_context *ctx, bool tx)
         return 0;
  }
  
-static int tls_get_info(const struct sock *sk, struct sk_buff *skb)
+static int tls_get_info(struct sock *sk, struct sk_buff *skb)
  {
         u16 version, cipher_type;
         struct tls_context *ctx;
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c

index 31e8a94dfc111b7705fe19b9b4ddee3e6a317a23..211f57164cb611fd2665f682906be96aa35463ed 100644 (file)
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -52,6 +52,7 @@ struct tls_decrypt_arg {
         struct_group(inargs,
         bool zc;
         bool async;
+       bool async_done;
         u8 tail;
         );
  
@@ -63,6 +64,7 @@ struct tls_decrypt_ctx {
         u8 iv[TLS_MAX_IV_SIZE];
         u8 aad[TLS_MAX_AAD_SIZE];
         u8 tail;
+       bool free_sgout;
         struct scatterlist sg[];
  };
  
@@ -187,7 +189,6 @@ static void tls_decrypt_done(void *data, int err)
         struct aead_request *aead_req = data;
         struct crypto_aead *aead = crypto_aead_reqtfm(aead_req);
         struct scatterlist *sgout = aead_req->dst;
-       struct scatterlist *sgin = aead_req->src;
         struct tls_sw_context_rx *ctx;
         struct tls_decrypt_ctx *dctx;
         struct tls_context *tls_ctx;
@@ -196,6 +197,17 @@ static void tls_decrypt_done(void *data, int err)
         struct sock *sk;
         int aead_size;
  
+       /* If requests get too backlogged crypto API returns -EBUSY and calls
+        * ->complete(-EINPROGRESS) immediately followed by ->complete(0)
+        * to make waiting for backlog to flush with crypto_wait_req() easier.
+        * First wait converts -EBUSY -> -EINPROGRESS, and the second one
+        * -EINPROGRESS -> 0.
+        * We have a single struct crypto_async_request per direction, this
+        * scheme doesn't help us, so just ignore the first ->complete().
+        */
+       if (err == -EINPROGRESS)
+               return;
+
         aead_size = sizeof(*aead_req) + crypto_aead_reqsize(aead);
         aead_size = ALIGN(aead_size, __alignof__(*dctx));
         dctx = (void *)((u8 *)aead_req + aead_size);
@@ -213,7 +225,7 @@ static void tls_decrypt_done(void *data, int err)
         }
  
         /* Free the destination pages if skb was not decrypted inplace */
-       if (sgout != sgin) {
+       if (dctx->free_sgout) {
                 /* Skip the first S/G entry as it points to AAD */
                 for_each_sg(sg_next(sgout), sg, UINT_MAX, pages) {
                         if (!sg)
@@ -224,10 +236,17 @@ static void tls_decrypt_done(void *data, int err)
  
         kfree(aead_req);
  
-       spin_lock_bh(&ctx->decrypt_compl_lock);
-       if (!atomic_dec_return(&ctx->decrypt_pending))
+       if (atomic_dec_and_test(&ctx->decrypt_pending))
                 complete(&ctx->async_wait.completion);
-       spin_unlock_bh(&ctx->decrypt_compl_lock);
+}
+
+static int tls_decrypt_async_wait(struct tls_sw_context_rx *ctx)
+{
+       if (!atomic_dec_and_test(&ctx->decrypt_pending))
+               crypto_wait_req(-EINPROGRESS, &ctx->async_wait);
+       atomic_inc(&ctx->decrypt_pending);
+
+       return ctx->async_wait.err;
  }
  
  static int tls_do_decryption(struct sock *sk,
@@ -253,20 +272,33 @@ static int tls_do_decryption(struct sock *sk,
                 aead_request_set_callback(aead_req,
                                           CRYPTO_TFM_REQ_MAY_BACKLOG,
                                           tls_decrypt_done, aead_req);
+               DEBUG_NET_WARN_ON_ONCE(atomic_read(&ctx->decrypt_pending) < 1);
                 atomic_inc(&ctx->decrypt_pending);
         } else {
+               DECLARE_CRYPTO_WAIT(wait);
+
                 aead_request_set_callback(aead_req,
                                           CRYPTO_TFM_REQ_MAY_BACKLOG,
-                                         crypto_req_done, &ctx->async_wait);
+                                         crypto_req_done, &wait);
+               ret = crypto_aead_decrypt(aead_req);
+               if (ret == -EINPROGRESS || ret == -EBUSY)
+                       ret = crypto_wait_req(ret, &wait);
+               return ret;
         }
  
         ret = crypto_aead_decrypt(aead_req);
-       if (ret == -EINPROGRESS) {
-               if (darg->async)
-                       return 0;
+       if (ret == -EINPROGRESS)
+               return 0;
  
-               ret = crypto_wait_req(ret, &ctx->async_wait);
+       if (ret == -EBUSY) {
+               ret = tls_decrypt_async_wait(ctx);
+               darg->async_done = true;
+               /* all completions have run, we're not doing async anymore */
+               darg->async = false;
+               return ret;
         }
+
+       atomic_dec(&ctx->decrypt_pending);
         darg->async = false;
  
         return ret;
@@ -439,9 +471,10 @@ static void tls_encrypt_done(void *data, int err)
         struct tls_rec *rec = data;
         struct scatterlist *sge;
         struct sk_msg *msg_en;
-       bool ready = false;
         struct sock *sk;
-       int pending;
+
+       if (err == -EINPROGRESS) /* see the comment in tls_decrypt_done() */
+               return;
  
         msg_en = &rec->msg_encrypted;
  
@@ -476,23 +509,25 @@ static void tls_encrypt_done(void *data, int err)
                 /* If received record is at head of tx_list, schedule tx */
                 first_rec = list_first_entry(&ctx->tx_list,
                                              struct tls_rec, list);
-               if (rec == first_rec)
-                       ready = true;
+               if (rec == first_rec) {
+                       /* Schedule the transmission */
+                       if (!test_and_set_bit(BIT_TX_SCHEDULED,
+                                             &ctx->tx_bitmask))
+                               schedule_delayed_work(&ctx->tx_work.work, 1);
+               }
         }
  
-       spin_lock_bh(&ctx->encrypt_compl_lock);
-       pending = atomic_dec_return(&ctx->encrypt_pending);
-
-       if (!pending && ctx->async_notify)
+       if (atomic_dec_and_test(&ctx->encrypt_pending))
                 complete(&ctx->async_wait.completion);
-       spin_unlock_bh(&ctx->encrypt_compl_lock);
+}
  
-       if (!ready)
-               return;
+static int tls_encrypt_async_wait(struct tls_sw_context_tx *ctx)
+{
+       if (!atomic_dec_and_test(&ctx->encrypt_pending))
+               crypto_wait_req(-EINPROGRESS, &ctx->async_wait);
+       atomic_inc(&ctx->encrypt_pending);
  
-       /* Schedule the transmission */
-       if (!test_and_set_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask))
-               schedule_delayed_work(&ctx->tx_work.work, 1);
+       return ctx->async_wait.err;
  }
  
  static int tls_do_encryption(struct sock *sk,
@@ -541,9 +576,14 @@ static int tls_do_encryption(struct sock *sk,
  
         /* Add the record in tx_list */
         list_add_tail((struct list_head *)&rec->list, &ctx->tx_list);
+       DEBUG_NET_WARN_ON_ONCE(atomic_read(&ctx->encrypt_pending) < 1);
         atomic_inc(&ctx->encrypt_pending);
  
         rc = crypto_aead_encrypt(aead_req);
+       if (rc == -EBUSY) {
+               rc = tls_encrypt_async_wait(ctx);
+               rc = rc ?: -EINPROGRESS;
+       }
         if (!rc || rc != -EINPROGRESS) {
                 atomic_dec(&ctx->encrypt_pending);
                 sge->offset -= prot->prepend_size;
@@ -984,7 +1024,6 @@ static int tls_sw_sendmsg_locked(struct sock *sk, struct msghdr *msg,
         int num_zc = 0;
         int orig_size;
         int ret = 0;
-       int pending;
  
         if (!eor && (msg->msg_flags & MSG_EOR))
                 return -EINVAL;
@@ -1163,24 +1202,12 @@ trim_sgl:
         if (!num_async) {
                 goto send_end;
         } else if (num_zc) {
-               /* Wait for pending encryptions to get completed */
-               spin_lock_bh(&ctx->encrypt_compl_lock);
-               ctx->async_notify = true;
-
-               pending = atomic_read(&ctx->encrypt_pending);
-               spin_unlock_bh(&ctx->encrypt_compl_lock);
-               if (pending)
-                       crypto_wait_req(-EINPROGRESS, &ctx->async_wait);
-               else
-                       reinit_completion(&ctx->async_wait.completion);
-
-               /* There can be no concurrent accesses, since we have no
-                * pending encrypt operations
-                */
-               WRITE_ONCE(ctx->async_notify, false);
+               int err;
  
-               if (ctx->async_wait.err) {
-                       ret = ctx->async_wait.err;
+               /* Wait for pending encryptions to get completed */
+               err = tls_encrypt_async_wait(ctx);
+               if (err) {
+                       ret = err;
                         copied = 0;
                 }
         }
@@ -1229,7 +1256,6 @@ void tls_sw_splice_eof(struct socket *sock)
         ssize_t copied = 0;
         bool retrying = false;
         int ret = 0;
-       int pending;
  
         if (!ctx->open_rec)
                 return;
@@ -1264,22 +1290,7 @@ retry:
         }
  
         /* Wait for pending encryptions to get completed */
-       spin_lock_bh(&ctx->encrypt_compl_lock);
-       ctx->async_notify = true;
-
-       pending = atomic_read(&ctx->encrypt_pending);
-       spin_unlock_bh(&ctx->encrypt_compl_lock);
-       if (pending)
-               crypto_wait_req(-EINPROGRESS, &ctx->async_wait);
-       else
-               reinit_completion(&ctx->async_wait.completion);
-
-       /* There can be no concurrent accesses, since we have no pending
-        * encrypt operations
-        */
-       WRITE_ONCE(ctx->async_notify, false);
-
-       if (ctx->async_wait.err)
+       if (tls_encrypt_async_wait(ctx))
                 goto unlock;
  
         /* Transmit if any encryptions have completed */
@@ -1581,12 +1592,16 @@ static int tls_decrypt_sg(struct sock *sk, struct iov_iter *out_iov,
         } else if (out_sg) {
                 memcpy(sgout, out_sg, n_sgout * sizeof(*sgout));
         }
+       dctx->free_sgout = !!pages;
  
         /* Prepare and submit AEAD request */
         err = tls_do_decryption(sk, sgin, sgout, dctx->iv,
                                 data_len + prot->tail_size, aead_req, darg);
-       if (err)
+       if (err) {
+               if (darg->async_done)
+                       goto exit_free_skb;
                 goto exit_free_pages;
+       }
  
         darg->skb = clear_skb ?: tls_strp_msg(ctx);
         clear_skb = NULL;
@@ -1598,6 +1613,9 @@ static int tls_decrypt_sg(struct sock *sk, struct iov_iter *out_iov,
                 return err;
         }
  
+       if (unlikely(darg->async_done))
+               return 0;
+
         if (prot->tail_size)
                 darg->tail = dctx->tail;
  
@@ -1769,7 +1787,8 @@ static int process_rx_list(struct tls_sw_context_rx *ctx,
                            u8 *control,
                            size_t skip,
                            size_t len,
-                          bool is_peek)
+                          bool is_peek,
+                          bool *more)
  {
         struct sk_buff *skb = skb_peek(&ctx->rx_list);
         struct tls_msg *tlm;
@@ -1782,7 +1801,7 @@ static int process_rx_list(struct tls_sw_context_rx *ctx,
  
                 err = tls_record_content_type(msg, tlm, control);
                 if (err <= 0)
-                       goto out;
+                       goto more;
  
                 if (skip < rxm->full_len)
                         break;
@@ -1800,12 +1819,12 @@ static int process_rx_list(struct tls_sw_context_rx *ctx,
  
                 err = tls_record_content_type(msg, tlm, control);
                 if (err <= 0)
-                       goto out;
+                       goto more;
  
                 err = skb_copy_datagram_msg(skb, rxm->offset + skip,
                                             msg, chunk);
                 if (err < 0)
-                       goto out;
+                       goto more;
  
                 len = len - chunk;
                 copied = copied + chunk;
@@ -1841,6 +1860,10 @@ static int process_rx_list(struct tls_sw_context_rx *ctx,
  
  out:
         return copied ? : err;
+more:
+       if (more)
+               *more = true;
+       goto out;
  }
  
  static bool
@@ -1940,10 +1963,12 @@ int tls_sw_recvmsg(struct sock *sk,
         struct strp_msg *rxm;
         struct tls_msg *tlm;
         ssize_t copied = 0;
+       ssize_t peeked = 0;
         bool async = false;
         int target, err;
         bool is_kvec = iov_iter_is_kvec(&msg->msg_iter);
         bool is_peek = flags & MSG_PEEK;
+       bool rx_more = false;
         bool released = true;
         bool bpf_strp_enabled;
         bool zc_capable;
@@ -1963,12 +1988,12 @@ int tls_sw_recvmsg(struct sock *sk,
                 goto end;
  
         /* Process pending decrypted records. It must be non-zero-copy */
-       err = process_rx_list(ctx, msg, &control, 0, len, is_peek);
+       err = process_rx_list(ctx, msg, &control, 0, len, is_peek, &rx_more);
         if (err < 0)
                 goto end;
  
         copied = err;
-       if (len <= copied)
+       if (len <= copied || (copied && control != TLS_RECORD_TYPE_DATA) || rx_more)
                 goto end;
  
         target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
@@ -2061,6 +2086,8 @@ put_on_rx_list:
                                 decrypted += chunk;
                                 len -= chunk;
                                 __skb_queue_tail(&ctx->rx_list, skb);
+                               if (unlikely(control != TLS_RECORD_TYPE_DATA))
+                                       break;
                                 continue;
                         }
  
@@ -2084,8 +2111,10 @@ put_on_rx_list:
                         if (err < 0)
                                 goto put_on_rx_list_err;
  
-                       if (is_peek)
+                       if (is_peek) {
+                               peeked += chunk;
                                 goto put_on_rx_list;
+                       }
  
                         if (partially_consumed) {
                                 rxm->offset += chunk;
@@ -2109,16 +2138,10 @@ put_on_rx_list:
  
  recv_end:
         if (async) {
-               int ret, pending;
+               int ret;
  
                 /* Wait for all previously submitted records to be decrypted */
-               spin_lock_bh(&ctx->decrypt_compl_lock);
-               reinit_completion(&ctx->async_wait.completion);
-               pending = atomic_read(&ctx->decrypt_pending);
-               spin_unlock_bh(&ctx->decrypt_compl_lock);
-               ret = 0;
-               if (pending)
-                       ret = crypto_wait_req(-EINPROGRESS, &ctx->async_wait);
+               ret = tls_decrypt_async_wait(ctx);
                 __skb_queue_purge(&ctx->async_hold);
  
                 if (ret) {
@@ -2130,12 +2153,11 @@ recv_end:
  
                 /* Drain records from the rx_list & copy if required */
                 if (is_peek || is_kvec)
-                       err = process_rx_list(ctx, msg, &control, copied,
-                                             decrypted, is_peek);
+                       err = process_rx_list(ctx, msg, &control, copied + peeked,
+                                             decrypted - peeked, is_peek, NULL);
                 else
                         err = process_rx_list(ctx, msg, &control, 0,
-                                             async_copy_bytes, is_peek);
-               decrypted += max(err, 0);
+                                             async_copy_bytes, is_peek, NULL);
         }
  
         copied += decrypted;
@@ -2435,16 +2457,9 @@ void tls_sw_release_resources_tx(struct sock *sk)
         struct tls_context *tls_ctx = tls_get_ctx(sk);
         struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
         struct tls_rec *rec, *tmp;
-       int pending;
  
         /* Wait for any pending async encryptions to complete */
-       spin_lock_bh(&ctx->encrypt_compl_lock);
-       ctx->async_notify = true;
-       pending = atomic_read(&ctx->encrypt_pending);
-       spin_unlock_bh(&ctx->encrypt_compl_lock);
-
-       if (pending)
-               crypto_wait_req(-EINPROGRESS, &ctx->async_wait);
+       tls_encrypt_async_wait(ctx);
  
         tls_tx_records(sk, -1);
  
@@ -2607,7 +2622,7 @@ static struct tls_sw_context_tx *init_ctx_tx(struct tls_context *ctx, struct soc
         }
  
         crypto_init_wait(&sw_ctx_tx->async_wait);
-       spin_lock_init(&sw_ctx_tx->encrypt_compl_lock);
+       atomic_set(&sw_ctx_tx->encrypt_pending, 1);
         INIT_LIST_HEAD(&sw_ctx_tx->tx_list);
         INIT_DELAYED_WORK(&sw_ctx_tx->tx_work.work, tx_work_handler);
         sw_ctx_tx->tx_work.sk = sk;
@@ -2628,7 +2643,7 @@ static struct tls_sw_context_rx *init_ctx_rx(struct tls_context *ctx)
         }
  
         crypto_init_wait(&sw_ctx_rx->async_wait);
-       spin_lock_init(&sw_ctx_rx->decrypt_compl_lock);
+       atomic_set(&sw_ctx_rx->decrypt_pending, 1);
         init_waitqueue_head(&sw_ctx_rx->wq);
         skb_queue_head_init(&sw_ctx_rx->rx_list);
         skb_queue_head_init(&sw_ctx_rx->async_hold);
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c

index ac1f2bc18fc9685652c26ac3b68f19bfd82f8332..0748e7ea5210e7d597acf87fc6caf1ea2156562e 100644 (file)
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -782,19 +782,6 @@ static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
  static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
                                   int);
  
-static int unix_set_peek_off(struct sock *sk, int val)
-{
-       struct unix_sock *u = unix_sk(sk);
-
-       if (mutex_lock_interruptible(&u->iolock))
-               return -EINTR;
-
-       WRITE_ONCE(sk->sk_peek_off, val);
-       mutex_unlock(&u->iolock);
-
-       return 0;
-}
-
  #ifdef CONFIG_PROC_FS
  static int unix_count_nr_fds(struct sock *sk)
  {
@@ -862,7 +849,7 @@ static const struct proto_ops unix_stream_ops = {
         .read_skb =     unix_stream_read_skb,
         .mmap =         sock_no_mmap,
         .splice_read =  unix_stream_splice_read,
-       .set_peek_off = unix_set_peek_off,
+       .set_peek_off = sk_set_peek_off,
         .show_fdinfo =  unix_show_fdinfo,
  };
  
@@ -886,7 +873,7 @@ static const struct proto_ops unix_dgram_ops = {
         .read_skb =     unix_read_skb,
         .recvmsg =      unix_dgram_recvmsg,
         .mmap =         sock_no_mmap,
-       .set_peek_off = unix_set_peek_off,
+       .set_peek_off = sk_set_peek_off,
         .show_fdinfo =  unix_show_fdinfo,
  };
  
@@ -909,7 +896,7 @@ static const struct proto_ops unix_seqpacket_ops = {
         .sendmsg =      unix_seqpacket_sendmsg,
         .recvmsg =      unix_seqpacket_recvmsg,
         .mmap =         sock_no_mmap,
-       .set_peek_off = unix_set_peek_off,
+       .set_peek_off = sk_set_peek_off,
         .show_fdinfo =  unix_show_fdinfo,
  };
  
@@ -1344,13 +1331,11 @@ static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
                 unix_state_lock(sk1);
                 return;
         }
-       if (sk1 < sk2) {
-               unix_state_lock(sk1);
-               unix_state_lock_nested(sk2);
-       } else {
-               unix_state_lock(sk2);
-               unix_state_lock_nested(sk1);
-       }
+       if (sk1 > sk2)
+               swap(sk1, sk2);
+
+       unix_state_lock(sk1);
+       unix_state_lock_nested(sk2, U_LOCK_SECOND);
  }
  
  static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
@@ -1591,7 +1576,7 @@ restart:
                 goto out_unlock;
         }
  
-       unix_state_lock_nested(sk);
+       unix_state_lock_nested(sk, U_LOCK_SECOND);
  
         if (sk->sk_state != st) {
                 unix_state_unlock(sk);
diff --git a/net/unix/diag.c b/net/unix/diag.c

index bec09a3a1d44ce56d43e16583fdf3b417cce4033..be19827eca36dbb68ec97b2e9b3c80e22b4fa4be 100644 (file)
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -84,7 +84,7 @@ static int sk_diag_dump_icons(struct sock *sk, struct sk_buff *nlskb)
                          * queue lock. With the other's queue locked it's
                          * OK to lock the state.
                          */
-                       unix_state_lock_nested(req);
+                       unix_state_lock_nested(req, U_LOCK_DIAG);
                         peer = unix_sk(req)->peer;
                         buf[i++] = (peer ? sock_i_ino(peer) : 0);
                         unix_state_unlock(req);
diff --git a/net/unix/garbage.c b/net/unix/garbage.c

index 2405f0f9af31c0ccefe2aa404002cfab8583c090..2a81880dac7b7b464b5ae9443fa3b2863cd76471 100644 (file)
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -284,9 +284,17 @@ void unix_gc(void)
          * which are creating the cycle(s).
          */
         skb_queue_head_init(&hitlist);
-       list_for_each_entry(u, &gc_candidates, link)
+       list_for_each_entry(u, &gc_candidates, link) {
                 scan_children(&u->sk, inc_inflight, &hitlist);
  
+#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
+               if (u->oob_skb) {
+                       kfree_skb(u->oob_skb);
+                       u->oob_skb = NULL;
+               }
+#endif
+       }
+
         /* not_cycle_list contains those sockets which do not make up a
          * cycle.  Restore these to the inflight list.
          */
diff --git a/net/wireless/core.c b/net/wireless/core.c

index 409d74c57ca0d8c8d36c2260897fce39557620ee..3fb1b637352a9d0b469206d890601031ffd4c68f 100644 (file)
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -5,7 +5,7 @@
   * Copyright 2006-2010         Johannes Berg <johannes@sipsolutions.net>
   * Copyright 2013-2014  Intel Mobile Communications GmbH
   * Copyright 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2023 Intel Corporation
+ * Copyright (C) 2018-2024 Intel Corporation
   */
  
  #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -1661,6 +1661,7 @@ void wiphy_delayed_work_queue(struct wiphy *wiphy,
                               unsigned long delay)
  {
         if (!delay) {
+               del_timer(&dwork->timer);
                 wiphy_work_queue(wiphy, &dwork->work);
                 return;
         }
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c

index b09700400d09744ee1b0c990e46806264df25e3b..bd54a928bab4120134711f54e677cb1f60c4ba7b 100644 (file)
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -4197,6 +4197,8 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info)
  
                 if (ntype != NL80211_IFTYPE_MESH_POINT)
                         return -EINVAL;
+               if (otype != NL80211_IFTYPE_MESH_POINT)
+                       return -EINVAL;
                 if (netif_running(dev))
                         return -EBUSY;
  
diff --git a/net/wireless/scan.c b/net/wireless/scan.c

index 2249b1a89d1c4cee36bda840d64dda612d367c5f..389a52c29bfc728c2437037b4f0e180b974d12ba 100644 (file)
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -1731,6 +1731,61 @@ static void cfg80211_update_hidden_bsses(struct cfg80211_internal_bss *known,
         }
  }
  
+static void cfg80211_check_stuck_ecsa(struct cfg80211_registered_device *rdev,
+                                     struct cfg80211_internal_bss *known,
+                                     const struct cfg80211_bss_ies *old)
+{
+       const struct ieee80211_ext_chansw_ie *ecsa;
+       const struct element *elem_new, *elem_old;
+       const struct cfg80211_bss_ies *new, *bcn;
+
+       if (known->pub.proberesp_ecsa_stuck)
+               return;
+
+       new = rcu_dereference_protected(known->pub.proberesp_ies,
+                                       lockdep_is_held(&rdev->bss_lock));
+       if (WARN_ON(!new))
+               return;
+
+       if (new->tsf - old->tsf < USEC_PER_SEC)
+               return;
+
+       elem_old = cfg80211_find_elem(WLAN_EID_EXT_CHANSWITCH_ANN,
+                                     old->data, old->len);
+       if (!elem_old)
+               return;
+
+       elem_new = cfg80211_find_elem(WLAN_EID_EXT_CHANSWITCH_ANN,
+                                     new->data, new->len);
+       if (!elem_new)
+               return;
+
+       bcn = rcu_dereference_protected(known->pub.beacon_ies,
+                                       lockdep_is_held(&rdev->bss_lock));
+       if (bcn &&
+           cfg80211_find_elem(WLAN_EID_EXT_CHANSWITCH_ANN,
+                              bcn->data, bcn->len))
+               return;
+
+       if (elem_new->datalen != elem_old->datalen)
+               return;
+       if (elem_new->datalen < sizeof(struct ieee80211_ext_chansw_ie))
+               return;
+       if (memcmp(elem_new->data, elem_old->data, elem_new->datalen))
+               return;
+
+       ecsa = (void *)elem_new->data;
+
+       if (!ecsa->mode)
+               return;
+
+       if (ecsa->new_ch_num !=
+           ieee80211_frequency_to_channel(known->pub.channel->center_freq))
+               return;
+
+       known->pub.proberesp_ecsa_stuck = 1;
+}
+
  static bool
  cfg80211_update_known_bss(struct cfg80211_registered_device *rdev,
                           struct cfg80211_internal_bss *known,
@@ -1750,8 +1805,10 @@ cfg80211_update_known_bss(struct cfg80211_registered_device *rdev,
                 /* Override possible earlier Beacon frame IEs */
                 rcu_assign_pointer(known->pub.ies,
                                    new->pub.proberesp_ies);
-               if (old)
+               if (old) {
+                       cfg80211_check_stuck_ecsa(rdev, known, old);
                         kfree_rcu((struct cfg80211_bss_ies *)old, rcu_head);
+               }
         }
  
         if (rcu_access_pointer(new->pub.beacon_ies)) {
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c

index 1eadfac03cc41d35709c001a77759a23f7dbdc39..b78c0e095e221fd775b9e1eafa4dd15485915079 100644 (file)
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -722,7 +722,8 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
                         memcpy(vaddr, buffer, len);
                         kunmap_local(vaddr);
  
-                       skb_add_rx_frag(skb, nr_frags, page, 0, len, 0);
+                       skb_add_rx_frag(skb, nr_frags, page, 0, len, PAGE_SIZE);
+                       refcount_add(PAGE_SIZE, &xs->sk.sk_wmem_alloc);
                 }
  
                 if (first_frag && desc->options & XDP_TX_METADATA) {
diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c

index 41533c631431493882a7fa427d393c4b6a753e74..e6da7e8495c9cfdc3e81eb408e444a39442a2c9b 100644 (file)
--- a/net/xfrm/xfrm_algo.c
+++ b/net/xfrm/xfrm_algo.c
@@ -858,4 +858,5 @@ int xfrm_count_pfkey_enc_supported(void)
  }
  EXPORT_SYMBOL_GPL(xfrm_count_pfkey_enc_supported);
  
+MODULE_DESCRIPTION("XFRM Algorithm interface");
  MODULE_LICENSE("GPL");
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c

index ad01997c3aa9dd851a3fa4ad6dd6c877eaaddd36..f037be190baeacf8a7fc4c26240dd224a39cb984 100644 (file)
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -3888,5 +3888,6 @@ static void __exit xfrm_user_exit(void)
  
  module_init(xfrm_user_init);
  module_exit(xfrm_user_exit);
+MODULE_DESCRIPTION("XFRM User interface");
  MODULE_LICENSE("GPL");
  MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_XFRM);
diff --git a/samples/bpf/asm_goto_workaround.h b/samples/bpf/asm_goto_workaround.h

index 7048bb3594d65be6d132d4103ee801fadf087b7e..634e81d83efd9577337e37f1ce42911574b0adf9 100644 (file)
--- a/samples/bpf/asm_goto_workaround.h
+++ b/samples/bpf/asm_goto_workaround.h
@@ -4,14 +4,14 @@
  #define __ASM_GOTO_WORKAROUND_H
  
  /*
- * This will bring in asm_volatile_goto and asm_inline macro definitions
+ * This will bring in asm_goto_output and asm_inline macro definitions
   * if enabled by compiler and config options.
   */
  #include <linux/types.h>
  
-#ifdef asm_volatile_goto
-#undef asm_volatile_goto
-#define asm_volatile_goto(x...) asm volatile("invalid use of asm_volatile_goto")
+#ifdef asm_goto_output
+#undef asm_goto_output
+#define asm_goto_output(x...) asm volatile("invalid use of asm_goto_output")
  #endif
  
  /*
diff --git a/scripts/Kconfig.include b/scripts/Kconfig.include

index 5a84b6443875c47013348bfed85ebefe8c6da4db..3ee8ecfb8c044c3bf65461e81af5a9e95391fa44 100644 (file)
--- a/scripts/Kconfig.include
+++ b/scripts/Kconfig.include
@@ -33,7 +33,7 @@ ld-option = $(success,$(LD) -v $(1))
  
  # $(as-instr,<instr>)
  # Return y if the assembler supports <instr>, n otherwise
-as-instr = $(success,printf "%b\n" "$(1)" | $(CC) $(CLANG_FLAGS) -c -x assembler-with-cpp -o /dev/null -)
+as-instr = $(success,printf "%b\n" "$(1)" | $(CC) $(CLANG_FLAGS) -Wa$(comma)--fatal-warnings -c -x assembler-with-cpp -o /dev/null -)
  
  # check if $(CC) and $(LD) exist
  $(error-if,$(failure,command -v $(CC)),C compiler '$(CC)' not found)
diff --git a/scripts/Makefile.compiler b/scripts/Makefile.compiler

index 8fcb427405a6f17f61655a6d0881c433f22e1dd6..92be0c9a13eeb51beca06abe15bfe22c6e72bfcb 100644 (file)
--- a/scripts/Makefile.compiler
+++ b/scripts/Makefile.compiler
@@ -38,7 +38,7 @@ as-option = $(call try-run,\
  # Usage: aflags-y += $(call as-instr,instr,option1,option2)
  
  as-instr = $(call try-run,\
-       printf "%b\n" "$(1)" | $(CC) -Werror $(CLANG_FLAGS) $(KBUILD_AFLAGS) -c -x assembler-with-cpp -o "$$TMP" -,$(2),$(3))
+       printf "%b\n" "$(1)" | $(CC) -Werror $(CLANG_FLAGS) $(KBUILD_AFLAGS) -Wa$(comma)--fatal-warnings -c -x assembler-with-cpp -o "$$TMP" -,$(2),$(3))
  
  # __cc-option
  # Usage: MY_CFLAGS += $(call __cc-option,$(CC),$(MY_CFLAGS),-march=winchip-c6,-march=i586)
diff --git a/scripts/Makefile.defconf b/scripts/Makefile.defconf

index ab271b2051a2459cc83d05111ba1be0558cdc954..226ea3df3b4b4caf70a8b7cc1c7ead71def9af7c 100644 (file)
--- a/scripts/Makefile.defconf
+++ b/scripts/Makefile.defconf
@@ -9,8 +9,8 @@
  # Input config fragments without '.config' suffix
  define merge_into_defconfig
         $(Q)$(CONFIG_SHELL) $(srctree)/scripts/kconfig/merge_config.sh \
-               -m -O $(objtree) $(srctree)/arch/$(ARCH)/configs/$(1) \
-               $(foreach config,$(2),$(srctree)/arch/$(ARCH)/configs/$(config).config)
+               -m -O $(objtree) $(srctree)/arch/$(SRCARCH)/configs/$(1) \
+               $(foreach config,$(2),$(srctree)/arch/$(SRCARCH)/configs/$(config).config)
         +$(Q)$(MAKE) -f $(srctree)/Makefile olddefconfig
  endef
  
@@ -23,7 +23,7 @@ endef
  # Input config fragments without '.config' suffix
  define merge_into_defconfig_override
         $(Q)$(CONFIG_SHELL) $(srctree)/scripts/kconfig/merge_config.sh \
-               -Q -m -O $(objtree) $(srctree)/arch/$(ARCH)/configs/$(1) \
-               $(foreach config,$(2),$(srctree)/arch/$(ARCH)/configs/$(config).config)
+               -Q -m -O $(objtree) $(srctree)/arch/$(SRCARCH)/configs/$(1) \
+               $(foreach config,$(2),$(srctree)/arch/$(SRCARCH)/configs/$(config).config)
         +$(Q)$(MAKE) -f $(srctree)/Makefile olddefconfig
  endef
diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py

index 61b7dddedc461e2ece91a7b25bcf14987fc98886..0669bac5e900e134c45a025697bae3b6251c09b1 100755 (executable)
--- a/scripts/bpf_doc.py
+++ b/scripts/bpf_doc.py
@@ -513,7 +513,7 @@ eBPF programs can have an associated license, passed along with the bytecode
  instructions to the kernel when the programs are loaded. The format for that
  string is identical to the one in use for kernel modules (Dual licenses, such
  as "Dual BSD/GPL", may be used). Some helper functions are only accessible to
-programs that are compatible with the GNU Privacy License (GPL).
+programs that are compatible with the GNU General Public License (GNU GPL).
  
  In order to use such helpers, the eBPF program must be loaded with the correct
  license string passed (via **attr**) to the **bpf**\\ () system call, and this
diff --git a/scripts/clang-tools/gen_compile_commands.py b/scripts/clang-tools/gen_compile_commands.py

index 5dea4479240bc02226828f13f3fec2dc3acf36c6..e4fb686dfaa9f0ee49fadb43c6e4404fcd5ac8f3 100755 (executable)
--- a/scripts/clang-tools/gen_compile_commands.py
+++ b/scripts/clang-tools/gen_compile_commands.py
@@ -170,7 +170,7 @@ def process_line(root_directory, command_prefix, file_path):
      # escape the pound sign '#', either as '\#' or '$(pound)' (depending on the
      # kernel version). The compile_commands.json file is not interepreted
      # by Make, so this code replaces the escaped version with '#'.
-    prefix = command_prefix.replace('\#', '#').replace('$(pound)', '#')
+    prefix = command_prefix.replace(r'\#', '#').replace('$(pound)', '#')
  
      # Return the canonical path, eliminating any symbolic links encountered in the path.
      abs_path = os.path.realpath(os.path.join(root_directory, file_path))
diff --git a/scripts/gdb/linux/constants.py.in b/scripts/gdb/linux/constants.py.in

index e810e0c27ff18d3adc16fda616888e45f6e7d7da..5cace7588e243222eff43bfc4a3ba3d232dbe0ed 100644 (file)
--- a/scripts/gdb/linux/constants.py.in
+++ b/scripts/gdb/linux/constants.py.in
@@ -130,7 +130,11 @@ LX_CONFIG(CONFIG_X86_MCE_THRESHOLD)
  LX_CONFIG(CONFIG_X86_MCE_AMD)
  LX_CONFIG(CONFIG_X86_MCE)
  LX_CONFIG(CONFIG_X86_IO_APIC)
-LX_CONFIG(CONFIG_HAVE_KVM)
+/*
+ * CONFIG_KVM can be "m" but it affects common code too.  Use CONFIG_KVM_COMMON
+ * as a proxy for IS_ENABLED(CONFIG_KVM).
+ */
+LX_CONFIG_KVM = IS_BUILTIN(CONFIG_KVM_COMMON)
  LX_CONFIG(CONFIG_NUMA)
  LX_CONFIG(CONFIG_ARM64)
  LX_CONFIG(CONFIG_ARM64_4K_PAGES)
diff --git a/scripts/gdb/linux/interrupts.py b/scripts/gdb/linux/interrupts.py

index ef478e273791f359edc722b28c4d35f55bc8e2ee..66ae5c7690cf1750eb0bbaf3a0942fafa3fe9868 100644 (file)
--- a/scripts/gdb/linux/interrupts.py
+++ b/scripts/gdb/linux/interrupts.py
@@ -151,7 +151,7 @@ def x86_show_interupts(prec):
          if cnt is not None:
              text += "%*s: %10u\n" % (prec, "MIS", cnt['counter'])
  
-    if constants.LX_CONFIG_HAVE_KVM:
+    if constants.LX_CONFIG_KVM:
          text += x86_show_irqstat(prec, "PIN", 'kvm_posted_intr_ipis', 'Posted-interrupt notification event')
          text += x86_show_irqstat(prec, "NPI", 'kvm_posted_intr_nested_ipis', 'Nested posted-interrupt event')
          text += x86_show_irqstat(prec, "PIW", 'kvm_posted_intr_wakeup_ipis', 'Posted-interrupt wakeup event')
diff --git a/scripts/kconfig/symbol.c b/scripts/kconfig/symbol.c

index 3e808528aaeab2625424b56247eed97fa107232d..e9e9fb8d86746460c893a51a2989ca4061fc8e52 100644 (file)
--- a/scripts/kconfig/symbol.c
+++ b/scripts/kconfig/symbol.c
@@ -345,6 +345,8 @@ void sym_calc_value(struct symbol *sym)
  
         oldval = sym->curr;
  
+       newval.tri = no;
+
         switch (sym->type) {
         case S_INT:
                 newval.val = "0";
@@ -357,7 +359,7 @@ void sym_calc_value(struct symbol *sym)
                 break;
         case S_BOOLEAN:
         case S_TRISTATE:
-               newval = symbol_no.curr;
+               newval.val = "n";
                 break;
         default:
                 sym->curr.val = sym->name;
diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh

index a432b171be826a9c4ff362c518f97024ab90a811..7862a81017477daec1f702fdf46166e381bd396d 100755 (executable)
--- a/scripts/link-vmlinux.sh
+++ b/scripts/link-vmlinux.sh
@@ -135,8 +135,13 @@ gen_btf()
         ${OBJCOPY} --only-section=.BTF --set-section-flags .BTF=alloc,readonly \
                 --strip-all ${1} ${2} 2>/dev/null
         # Change e_type to ET_REL so that it can be used to link final vmlinux.
-       # Unlike GNU ld, lld does not allow an ET_EXEC input.
-       printf '\1' | dd of=${2} conv=notrunc bs=1 seek=16 status=none
+       # GNU ld 2.35+ and lld do not allow an ET_EXEC input.
+       if is_enabled CONFIG_CPU_BIG_ENDIAN; then
+               et_rel='\0\1'
+       else
+               et_rel='\1\0'
+       fi
+       printf "${et_rel}" | dd of=${2} conv=notrunc bs=1 seek=16 status=none
  }
  
  # Create ${2} .S file with all symbols from the ${1} object file
diff --git a/scripts/mksysmap b/scripts/mksysmap

index 9ba1c9da0a40f28a4efe22856868ac745ab68b56..57ff5656d566fbc659801bdb4fd445b7b9ef2b86 100755 (executable)
--- a/scripts/mksysmap
+++ b/scripts/mksysmap
@@ -48,17 +48,8 @@ ${NM} -n ${1} | sed >${2} -e "
  / __kvm_nvhe_\\$/d
  / __kvm_nvhe_\.L/d
  
-# arm64 lld
-/ __AArch64ADRPThunk_/d
-
-# arm lld
-/ __ARMV5PILongThunk_/d
-/ __ARMV7PILongThunk_/d
-/ __ThumbV7PILongThunk_/d
-
-# mips lld
-/ __LA25Thunk_/d
-/ __microLA25Thunk_/d
+# lld arm/aarch64/mips thunks
+/ __[[:alnum:]]*Thunk_/d
  
  # CFI type identifiers
  / __kcfi_typeid_/d
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c

index 795b21154446df9d6cd37920e4b5183e0cbddeef..267b9a0a3abcd849fe4f0bae4cddd8a287d26184 100644 (file)
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -70,9 +70,7 @@ void modpost_log(enum loglevel loglevel, const char *fmt, ...)
                 break;
         case LOG_ERROR:
                 fprintf(stderr, "ERROR: ");
-               break;
-       case LOG_FATAL:
-               fprintf(stderr, "FATAL: ");
+               error_occurred = true;
                 break;
         default: /* invalid loglevel, ignore */
                 break;
@@ -83,16 +81,8 @@ void modpost_log(enum loglevel loglevel, const char *fmt, ...)
         va_start(arglist, fmt);
         vfprintf(stderr, fmt, arglist);
         va_end(arglist);
-
-       if (loglevel == LOG_FATAL)
-               exit(1);
-       if (loglevel == LOG_ERROR)
-               error_occurred = true;
  }
  
-void __attribute__((alias("modpost_log")))
-modpost_log_noret(enum loglevel loglevel, const char *fmt, ...);
-
  static inline bool strends(const char *str, const char *postfix)
  {
         if (strlen(str) < strlen(postfix))
@@ -806,7 +796,8 @@ static void check_section(const char *modname, struct elf_info *elf,
  
  #define DATA_SECTIONS ".data", ".data.rel"
  #define TEXT_SECTIONS ".text", ".text.*", ".sched.text", \
-               ".kprobes.text", ".cpuidle.text", ".noinstr.text"
+               ".kprobes.text", ".cpuidle.text", ".noinstr.text", \
+               ".ltext", ".ltext.*"
  #define OTHER_TEXT_SECTIONS ".ref.text", ".head.text", ".spinlock.text", \
                 ".fixup", ".entry.text", ".exception.text", \
                 ".coldtext", ".softirqentry.text"
diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h

index 835cababf1b09eb2353f8777f934dfabf3731454..ee43c795063682b440818cd3a81ba5355afba456 100644 (file)
--- a/scripts/mod/modpost.h
+++ b/scripts/mod/modpost.h
@@ -194,15 +194,11 @@ void *sym_get_data(const struct elf_info *info, const Elf_Sym *sym);
  enum loglevel {
         LOG_WARN,
         LOG_ERROR,
-       LOG_FATAL
  };
  
  void __attribute__((format(printf, 2, 3)))
  modpost_log(enum loglevel loglevel, const char *fmt, ...);
  
-void __attribute__((format(printf, 2, 3), noreturn))
-modpost_log_noret(enum loglevel loglevel, const char *fmt, ...);
-
  /*
   * warn - show the given message, then let modpost continue running, still
   *        allowing modpost to exit successfully. This should be used when
@@ -218,4 +214,4 @@ modpost_log_noret(enum loglevel loglevel, const char *fmt, ...);
   */
  #define warn(fmt, args...)     modpost_log(LOG_WARN, fmt, ##args)
  #define error(fmt, args...)    modpost_log(LOG_ERROR, fmt, ##args)
-#define fatal(fmt, args...)    modpost_log_noret(LOG_FATAL, fmt, ##args)
+#define fatal(fmt, args...)    do { error(fmt, ##args); exit(1); } while (1)
diff --git a/scripts/mod/sumversion.c b/scripts/mod/sumversion.c

index 31066bfdba04e30abffa2d4f3088760a0a3bc753..dc4878502276ce94bc7d3d8a213934a21751078a 100644 (file)
--- a/scripts/mod/sumversion.c
+++ b/scripts/mod/sumversion.c
@@ -326,7 +326,12 @@ static int parse_source_files(const char *objfile, struct md4_ctx *md)
  
         /* Sum all files in the same dir or subdirs. */
         while ((line = get_line(&pos))) {
-               char* p = line;
+               char* p;
+
+               /* trim the leading spaces away */
+               while (isspace(*line))
+                       line++;
+               p = line;
  
                 if (strncmp(line, "source_", sizeof("source_")-1) == 0) {
                         p = strrchr(line, ' ');
diff --git a/scripts/package/kernel.spec b/scripts/package/kernel.spec

index 89298983a16941a20ccbd72330af1e168652c3f4..f58726671fb37424308c678126e5edd4a22dc5f3 100644 (file)
--- a/scripts/package/kernel.spec
+++ b/scripts/package/kernel.spec
@@ -55,12 +55,12 @@ patch -p1 < %{SOURCE2}
  %{make} %{makeflags} KERNELRELEASE=%{KERNELRELEASE} KBUILD_BUILD_VERSION=%{release}
  
  %install
-mkdir -p %{buildroot}/boot
-cp $(%{make} %{makeflags} -s image_name) %{buildroot}/boot/vmlinuz-%{KERNELRELEASE}
+mkdir -p %{buildroot}/lib/modules/%{KERNELRELEASE}
+cp $(%{make} %{makeflags} -s image_name) %{buildroot}/lib/modules/%{KERNELRELEASE}/vmlinuz
  %{make} %{makeflags} INSTALL_MOD_PATH=%{buildroot} modules_install
  %{make} %{makeflags} INSTALL_HDR_PATH=%{buildroot}/usr headers_install
-cp System.map %{buildroot}/boot/System.map-%{KERNELRELEASE}
-cp .config %{buildroot}/boot/config-%{KERNELRELEASE}
+cp System.map %{buildroot}/lib/modules/%{KERNELRELEASE}
+cp .config %{buildroot}/lib/modules/%{KERNELRELEASE}/config
  ln -fns /usr/src/kernels/%{KERNELRELEASE} %{buildroot}/lib/modules/%{KERNELRELEASE}/build
  %if %{with_devel}
  %{make} %{makeflags} run-command KBUILD_RUN_COMMAND='${srctree}/scripts/package/install-extmod-build %{buildroot}/usr/src/kernels/%{KERNELRELEASE}'
@@ -70,13 +70,14 @@ ln -fns /usr/src/kernels/%{KERNELRELEASE} %{buildroot}/lib/modules/%{KERNELRELEA
  rm -rf %{buildroot}
  
  %post
-if [ -x /sbin/installkernel -a -r /boot/vmlinuz-%{KERNELRELEASE} -a -r /boot/System.map-%{KERNELRELEASE} ]; then
-cp /boot/vmlinuz-%{KERNELRELEASE} /boot/.vmlinuz-%{KERNELRELEASE}-rpm
-cp /boot/System.map-%{KERNELRELEASE} /boot/.System.map-%{KERNELRELEASE}-rpm
-rm -f /boot/vmlinuz-%{KERNELRELEASE} /boot/System.map-%{KERNELRELEASE}
-/sbin/installkernel %{KERNELRELEASE} /boot/.vmlinuz-%{KERNELRELEASE}-rpm /boot/.System.map-%{KERNELRELEASE}-rpm
-rm -f /boot/.vmlinuz-%{KERNELRELEASE}-rpm /boot/.System.map-%{KERNELRELEASE}-rpm
+if [ -x /usr/bin/kernel-install ]; then
+       /usr/bin/kernel-install add %{KERNELRELEASE} /lib/modules/%{KERNELRELEASE}/vmlinuz
  fi
+for file in vmlinuz System.map config; do
+       if ! cmp --silent "/lib/modules/%{KERNELRELEASE}/${file}" "/boot/${file}-%{KERNELRELEASE}"; then
+               cp "/lib/modules/%{KERNELRELEASE}/${file}" "/boot/${file}-%{KERNELRELEASE}"
+       fi
+done
  
  %preun
  if [ -x /sbin/new-kernel-pkg ]; then
@@ -94,7 +95,6 @@ fi
  %defattr (-, root, root)
  /lib/modules/%{KERNELRELEASE}
  %exclude /lib/modules/%{KERNELRELEASE}/build
-/boot/*
  
  %files headers
  %defattr (-, root, root)
diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c

index 98e1150bee9d0cbecb79c7e81cb05159f6160b04..9a3dcaafb5b1ee20c4d2d5d355d81302ead8d427 100644 (file)
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -784,7 +784,7 @@ static int apparmor_getselfattr(unsigned int attr, struct lsm_ctx __user *lx,
         int error = -ENOENT;
         struct aa_task_ctx *ctx = task_ctx(current);
         struct aa_label *label = NULL;
-       char *value;
+       char *value = NULL;
  
         switch (attr) {
         case LSM_ATTR_CURRENT:
diff --git a/security/landlock/fs.c b/security/landlock/fs.c

index fc520a06f9af107310aa81050b8ad21accc6640d..0171f7eb6ee15d384835a6cd68a2afcff1f3b653 100644 (file)
--- a/security/landlock/fs.c
+++ b/security/landlock/fs.c
@@ -737,8 +737,8 @@ static int current_check_refer_path(struct dentry *const old_dentry,
         bool allow_parent1, allow_parent2;
         access_mask_t access_request_parent1, access_request_parent2;
         struct path mnt_dir;
-       layer_mask_t layer_masks_parent1[LANDLOCK_NUM_ACCESS_FS],
-               layer_masks_parent2[LANDLOCK_NUM_ACCESS_FS];
+       layer_mask_t layer_masks_parent1[LANDLOCK_NUM_ACCESS_FS] = {},
+                    layer_masks_parent2[LANDLOCK_NUM_ACCESS_FS] = {};
  
         if (!dom)
                 return 0;
diff --git a/security/security.c b/security/security.c

index 0144a98d3712e66733462a37a47518beb2eab743..7035ee35a393020303304741092e6b016e02669c 100644 (file)
--- a/security/security.c
+++ b/security/security.c
@@ -29,6 +29,7 @@
  #include <linux/backing-dev.h>
  #include <linux/string.h>
  #include <linux/msg.h>
+#include <linux/overflow.h>
  #include <net/flow.h>
  
  /* How many LSMs were built into the kernel? */
@@ -4015,6 +4016,7 @@ int security_setselfattr(unsigned int attr, struct lsm_ctx __user *uctx,
         struct security_hook_list *hp;
         struct lsm_ctx *lctx;
         int rc = LSM_RET_DEFAULT(setselfattr);
+       u64 required_len;
  
         if (flags)
                 return -EINVAL;
@@ -4027,8 +4029,9 @@ int security_setselfattr(unsigned int attr, struct lsm_ctx __user *uctx,
         if (IS_ERR(lctx))
                 return PTR_ERR(lctx);
  
-       if (size < lctx->len || size < lctx->ctx_len + sizeof(*lctx) ||
-           lctx->len < lctx->ctx_len + sizeof(*lctx)) {
+       if (size < lctx->len ||
+           check_add_overflow(sizeof(*lctx), lctx->ctx_len, &required_len) ||
+           lctx->len < required_len) {
                 rc = -EINVAL;
                 goto free_out;
         }
@@ -4255,7 +4258,19 @@ EXPORT_SYMBOL(security_inode_setsecctx);
   */
  int security_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen)
  {
-       return call_int_hook(inode_getsecctx, -EOPNOTSUPP, inode, ctx, ctxlen);
+       struct security_hook_list *hp;
+       int rc;
+
+       /*
+        * Only one module will provide a security context.
+        */
+       hlist_for_each_entry(hp, &security_hook_heads.inode_getsecctx, list) {
+               rc = hp->hook.inode_getsecctx(inode, ctx, ctxlen);
+               if (rc != LSM_RET_DEFAULT(inode_getsecctx))
+                       return rc;
+       }
+
+       return LSM_RET_DEFAULT(inode_getsecctx);
  }
  EXPORT_SYMBOL(security_inode_getsecctx);
  
@@ -4612,8 +4627,20 @@ EXPORT_SYMBOL(security_sock_rcv_skb);
  int security_socket_getpeersec_stream(struct socket *sock, sockptr_t optval,
                                       sockptr_t optlen, unsigned int len)
  {
-       return call_int_hook(socket_getpeersec_stream, -ENOPROTOOPT, sock,
-                            optval, optlen, len);
+       struct security_hook_list *hp;
+       int rc;
+
+       /*
+        * Only one module will provide a security context.
+        */
+       hlist_for_each_entry(hp, &security_hook_heads.socket_getpeersec_stream,
+                            list) {
+               rc = hp->hook.socket_getpeersec_stream(sock, optval, optlen,
+                                                      len);
+               if (rc != LSM_RET_DEFAULT(socket_getpeersec_stream))
+                       return rc;
+       }
+       return LSM_RET_DEFAULT(socket_getpeersec_stream);
  }
  
  /**
@@ -4633,8 +4660,19 @@ int security_socket_getpeersec_stream(struct socket *sock, sockptr_t optval,
  int security_socket_getpeersec_dgram(struct socket *sock,
                                      struct sk_buff *skb, u32 *secid)
  {
-       return call_int_hook(socket_getpeersec_dgram, -ENOPROTOOPT, sock,
-                            skb, secid);
+       struct security_hook_list *hp;
+       int rc;
+
+       /*
+        * Only one module will provide a security context.
+        */
+       hlist_for_each_entry(hp, &security_hook_heads.socket_getpeersec_dgram,
+                            list) {
+               rc = hp->hook.socket_getpeersec_dgram(sock, skb, secid);
+               if (rc != LSM_RET_DEFAULT(socket_getpeersec_dgram))
+                       return rc;
+       }
+       return LSM_RET_DEFAULT(socket_getpeersec_dgram);
  }
  EXPORT_SYMBOL(security_socket_getpeersec_dgram);
  
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c

index a6bf90ace84c74bdb11330d7bb278183dfb13275..338b023a8c3edb5918d59a2bc07e23221507ff45 100644 (file)
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -6559,7 +6559,7 @@ static int selinux_getselfattr(unsigned int attr, struct lsm_ctx __user *ctx,
                                size_t *size, u32 flags)
  {
         int rc;
-       char *val;
+       char *val = NULL;
         int val_len;
  
         val_len = selinux_lsm_getattr(attr, current, &val);
diff --git a/security/tomoyo/common.c b/security/tomoyo/common.c

index 57ee70ae50f24ac771a7bd74d224c17b1ff03d25..ea3140d510ecbfee06666df588a795b9f5bfc5ce 100644 (file)
--- a/security/tomoyo/common.c
+++ b/security/tomoyo/common.c
@@ -2649,13 +2649,14 @@ ssize_t tomoyo_write_control(struct tomoyo_io_buffer *head,
  {
         int error = buffer_len;
         size_t avail_len = buffer_len;
-       char *cp0 = head->write_buf;
+       char *cp0;
         int idx;
  
         if (!head->write)
                 return -EINVAL;
         if (mutex_lock_interruptible(&head->io_sem))
                 return -EINTR;
+       cp0 = head->write_buf;
         head->read_user_buf_avail = 0;
         idx = tomoyo_read_lock();
         /* Read a line and dispatch it to the policy handler. */
diff --git a/sound/core/Makefile b/sound/core/Makefile

index a6b444ee283264ca60e8d5673c4378f6175f128c..f6526b33713756071c14d4da2c7e051d1ae17bf9 100644 (file)
--- a/sound/core/Makefile
+++ b/sound/core/Makefile
@@ -32,7 +32,6 @@ snd-ump-objs      := ump.o
  snd-ump-$(CONFIG_SND_UMP_LEGACY_RAWMIDI) += ump_convert.o
  snd-timer-objs    := timer.o
  snd-hrtimer-objs  := hrtimer.o
-snd-rtctimer-objs := rtctimer.o
  snd-hwdep-objs    := hwdep.o
  snd-seq-device-objs := seq_device.o
  
diff --git a/sound/core/pcm.c b/sound/core/pcm.c

index a09f0154e6a7029c72fb3f0d6d8bd36f202b72c8..d0788126cbab10a2ef8daaab9201f366f27d8c63 100644 (file)
--- a/sound/core/pcm.c
+++ b/sound/core/pcm.c
@@ -211,6 +211,10 @@ static const char * const snd_pcm_format_names[] = {
         FORMAT(DSD_U32_LE),
         FORMAT(DSD_U16_BE),
         FORMAT(DSD_U32_BE),
+       FORMAT(S20_LE),
+       FORMAT(S20_BE),
+       FORMAT(U20_LE),
+       FORMAT(U20_BE),
  };
  
  /**
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c

index f5ff00f99788a80135e2832f0cdb64650e3122d7..21baf6bf7e25a048e64d37331b0109f2ad7ec354 100644 (file)
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -486,6 +486,11 @@ static int fixup_unreferenced_params(struct snd_pcm_substream *substream,
                 i = hw_param_interval_c(params, SNDRV_PCM_HW_PARAM_SAMPLE_BITS);
                 if (snd_interval_single(i))
                         params->msbits = snd_interval_value(i);
+               m = hw_param_mask_c(params, SNDRV_PCM_HW_PARAM_FORMAT);
+               if (snd_mask_single(m)) {
+                       snd_pcm_format_t format = (__force snd_pcm_format_t)snd_mask_min(m);
+                       params->msbits = snd_pcm_format_width(format);
+               }
         }
  
         if (params->msbits) {
diff --git a/sound/core/ump.c b/sound/core/ump.c

index 3bef1944e955ff24940c1cba0d0a82f79db33d6e..fe7911498cc4325a866a87328087c54a0a6c791a 100644 (file)
--- a/sound/core/ump.c
+++ b/sound/core/ump.c
@@ -985,7 +985,7 @@ static int snd_ump_legacy_open(struct snd_rawmidi_substream *substream)
         struct snd_ump_endpoint *ump = substream->rmidi->private_data;
         int dir = substream->stream;
         int group = ump->legacy_mapping[substream->number];
-       int err;
+       int err = 0;
  
         mutex_lock(&ump->open_mutex);
         if (ump->legacy_substreams[dir][group]) {
@@ -1009,7 +1009,7 @@ static int snd_ump_legacy_open(struct snd_rawmidi_substream *substream)
         spin_unlock_irq(&ump->legacy_locks[dir]);
   unlock:
         mutex_unlock(&ump->open_mutex);
-       return 0;
+       return err;
  }
  
  static int snd_ump_legacy_close(struct snd_rawmidi_substream *substream)
diff --git a/sound/firewire/amdtp-stream.c b/sound/firewire/amdtp-stream.c

index a13c0b408aadfcc6d2f1f588cdd96a733961bf2a..7be17bca257f0ddba4799875c9250f96ba4e4b8a 100644 (file)
--- a/sound/firewire/amdtp-stream.c
+++ b/sound/firewire/amdtp-stream.c
@@ -951,7 +951,7 @@ static int generate_tx_packet_descs(struct amdtp_stream *s, struct pkt_desc *des
                                 // to the reason.
                                 unsigned int safe_cycle = increment_ohci_cycle_count(next_cycle,
                                                                 IR_JUMBO_PAYLOAD_MAX_SKIP_CYCLES);
-                               lost = (compare_ohci_cycle_count(safe_cycle, cycle) > 0);
+                               lost = (compare_ohci_cycle_count(safe_cycle, cycle) < 0);
                         }
                         if (lost) {
                                 dev_err(&s->unit->device, "Detect discontinuity of cycle: %d %d\n",
diff --git a/sound/pci/hda/Kconfig b/sound/pci/hda/Kconfig

index 21a90b3c4cc7300e0abfc301e34338170dd6b911..8e0ff70fb6101ff9b94aab0d79c4b6f7cf90498d 100644 (file)
--- a/sound/pci/hda/Kconfig
+++ b/sound/pci/hda/Kconfig
@@ -156,7 +156,7 @@ config SND_HDA_SCODEC_CS35L56_I2C
         depends on I2C
         depends on ACPI || COMPILE_TEST
         depends on SND_SOC
-       select CS_DSP
+       select FW_CS_DSP
         select SND_HDA_GENERIC
         select SND_SOC_CS35L56_SHARED
         select SND_HDA_SCODEC_CS35L56
@@ -171,7 +171,7 @@ config SND_HDA_SCODEC_CS35L56_SPI
         depends on SPI_MASTER
         depends on ACPI || COMPILE_TEST
         depends on SND_SOC
-       select CS_DSP
+       select FW_CS_DSP
         select SND_HDA_GENERIC
         select SND_SOC_CS35L56_SHARED
         select SND_HDA_SCODEC_CS35L56
diff --git a/sound/pci/hda/cs35l41_hda_property.c b/sound/pci/hda/cs35l41_hda_property.c

index 35277ce890a46fb9204cc80a55e812cb64c842d5..87dcb367e2391939de02f24c6edd1dd8a4aac8a8 100644 (file)
--- a/sound/pci/hda/cs35l41_hda_property.c
+++ b/sound/pci/hda/cs35l41_hda_property.c
@@ -76,6 +76,8 @@ static const struct cs35l41_config cs35l41_config_table[] = {
         { "10431533", 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 1000, 4500, 24 },
         { "10431573", 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 1000, 4500, 24 },
         { "10431663", 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, -1, 0, 1000, 4500, 24 },
+       { "10431683", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 0, 0, 0 },
+       { "104316A3", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 0, 0, 0 },
         { "104316D3", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 0, 0, 0 },
         { "104316F3", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 0, 0, 0 },
         { "104317F3", 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 1000, 4500, 24 },
@@ -89,10 +91,12 @@ static const struct cs35l41_config cs35l41_config_table[] = {
         { "10431D1F", 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 1000, 4500, 24 },
         { "10431DA2", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 0, 0, 0 },
         { "10431E02", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 0, 0, 0 },
+       { "10431E12", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 0, 0, 0 },
         { "10431EE2", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, -1, -1, 0, 0, 0 },
         { "10431F12", 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 1000, 4500, 24 },
         { "10431F1F", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, -1, 0, 0, 0, 0 },
         { "10431F62", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 0, 0, 0 },
+       { "17AA386F", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, -1, -1, 0, 0, 0 },
         { "17AA38B4", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 0, 0, 0 },
         { "17AA38B5", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 0, 0, 0 },
         { "17AA38B6", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 0, 0, 0 },
@@ -410,6 +414,8 @@ static const struct cs35l41_prop_model cs35l41_prop_model_table[] = {
         { "CSC3551", "10431533", generic_dsd_config },
         { "CSC3551", "10431573", generic_dsd_config },
         { "CSC3551", "10431663", generic_dsd_config },
+       { "CSC3551", "10431683", generic_dsd_config },
+       { "CSC3551", "104316A3", generic_dsd_config },
         { "CSC3551", "104316D3", generic_dsd_config },
         { "CSC3551", "104316F3", generic_dsd_config },
         { "CSC3551", "104317F3", generic_dsd_config },
@@ -423,10 +429,12 @@ static const struct cs35l41_prop_model cs35l41_prop_model_table[] = {
         { "CSC3551", "10431D1F", generic_dsd_config },
         { "CSC3551", "10431DA2", generic_dsd_config },
         { "CSC3551", "10431E02", generic_dsd_config },
+       { "CSC3551", "10431E12", generic_dsd_config },
         { "CSC3551", "10431EE2", generic_dsd_config },
         { "CSC3551", "10431F12", generic_dsd_config },
         { "CSC3551", "10431F1F", generic_dsd_config },
         { "CSC3551", "10431F62", generic_dsd_config },
+       { "CSC3551", "17AA386F", generic_dsd_config },
         { "CSC3551", "17AA38B4", generic_dsd_config },
         { "CSC3551", "17AA38B5", generic_dsd_config },
         { "CSC3551", "17AA38B6", generic_dsd_config },
diff --git a/sound/pci/hda/cs35l56_hda.c b/sound/pci/hda/cs35l56_hda.c

index b61e1de8c4bf905a6bddd8bc859da2dc9b8cd408..75a14ba54fcd1c270459b47be66ecb6aa799aa33 100644 (file)
--- a/sound/pci/hda/cs35l56_hda.c
+++ b/sound/pci/hda/cs35l56_hda.c
@@ -30,14 +30,23 @@
    *  ASP1_RX_WL = 24 bits per sample
    *  ASP1_TX_WL = 24 bits per sample
    *  ASP1_RXn_EN 1..3 and ASP1_TXn_EN 1..4 disabled
+  *
+  * Override any Windows-specific mixer settings applied by the firmware.
    */
  static const struct reg_sequence cs35l56_hda_dai_config[] = {
         { CS35L56_ASP1_CONTROL1,        0x00000021 },
         { CS35L56_ASP1_CONTROL2,        0x20200200 },
         { CS35L56_ASP1_CONTROL3,        0x00000003 },
+       { CS35L56_ASP1_FRAME_CONTROL1,  0x03020100 },
+       { CS35L56_ASP1_FRAME_CONTROL5,  0x00020100 },
         { CS35L56_ASP1_DATA_CONTROL5,   0x00000018 },
         { CS35L56_ASP1_DATA_CONTROL1,   0x00000018 },
         { CS35L56_ASP1_ENABLES1,        0x00000000 },
+       { CS35L56_ASP1TX1_INPUT,        0x00000018 },
+       { CS35L56_ASP1TX2_INPUT,        0x00000019 },
+       { CS35L56_ASP1TX3_INPUT,        0x00000020 },
+       { CS35L56_ASP1TX4_INPUT,        0x00000028 },
+
  };
  
  static void cs35l56_hda_play(struct cs35l56_hda *cs35l56)
@@ -133,6 +142,10 @@ static int cs35l56_hda_runtime_resume(struct device *dev)
                 }
         }
  
+       ret = cs35l56_force_sync_asp1_registers_from_cache(&cs35l56->base);
+       if (ret)
+               goto err;
+
         return 0;
  
  err:
@@ -384,7 +397,7 @@ static const struct cs_dsp_client_ops cs35l56_hda_client_ops = {
  
  static int cs35l56_hda_request_firmware_file(struct cs35l56_hda *cs35l56,
                                              const struct firmware **firmware, char **filename,
-                                            const char *dir, const char *system_name,
+                                            const char *base_name, const char *system_name,
                                              const char *amp_name,
                                              const char *filetype)
  {
@@ -392,17 +405,13 @@ static int cs35l56_hda_request_firmware_file(struct cs35l56_hda *cs35l56,
         int ret = 0;
  
         if (system_name && amp_name)
-               *filename = kasprintf(GFP_KERNEL, "%scs35l56%s-%02x-dsp1-misc-%s-%s.%s", dir,
-                                     cs35l56->base.secured ? "s" : "", cs35l56->base.rev,
+               *filename = kasprintf(GFP_KERNEL, "%s-%s-%s.%s", base_name,
                                       system_name, amp_name, filetype);
         else if (system_name)
-               *filename = kasprintf(GFP_KERNEL, "%scs35l56%s-%02x-dsp1-misc-%s.%s", dir,
-                                     cs35l56->base.secured ? "s" : "", cs35l56->base.rev,
+               *filename = kasprintf(GFP_KERNEL, "%s-%s.%s", base_name,
                                       system_name, filetype);
         else
-               *filename = kasprintf(GFP_KERNEL, "%scs35l56%s-%02x-dsp1-misc.%s", dir,
-                                     cs35l56->base.secured ? "s" : "", cs35l56->base.rev,
-                                     filetype);
+               *filename = kasprintf(GFP_KERNEL, "%s.%s", base_name, filetype);
  
         if (!*filename)
                 return -ENOMEM;
@@ -435,8 +444,8 @@ static int cs35l56_hda_request_firmware_file(struct cs35l56_hda *cs35l56,
         return 0;
  }
  
-static const char cirrus_dir[] = "cirrus/";
  static void cs35l56_hda_request_firmware_files(struct cs35l56_hda *cs35l56,
+                                              unsigned int preloaded_fw_ver,
                                                const struct firmware **wmfw_firmware,
                                                char **wmfw_filename,
                                                const struct firmware **coeff_firmware,
@@ -444,55 +453,73 @@ static void cs35l56_hda_request_firmware_files(struct cs35l56_hda *cs35l56,
  {
         const char *system_name = cs35l56->system_name;
         const char *amp_name = cs35l56->amp_name;
+       char base_name[37];
         int ret;
  
+       if (preloaded_fw_ver) {
+               snprintf(base_name, sizeof(base_name),
+                        "cirrus/cs35l56-%02x%s-%06x-dsp1-misc",
+                        cs35l56->base.rev,
+                        cs35l56->base.secured ? "-s" : "",
+                        preloaded_fw_ver & 0xffffff);
+       } else {
+               snprintf(base_name, sizeof(base_name),
+                        "cirrus/cs35l56-%02x%s-dsp1-misc",
+                        cs35l56->base.rev,
+                        cs35l56->base.secured ? "-s" : "");
+       }
+
         if (system_name && amp_name) {
                 if (!cs35l56_hda_request_firmware_file(cs35l56, wmfw_firmware, wmfw_filename,
-                                                      cirrus_dir, system_name, amp_name, "wmfw")) {
+                                                      base_name, system_name, amp_name, "wmfw")) {
                         cs35l56_hda_request_firmware_file(cs35l56, coeff_firmware, coeff_filename,
-                                                         cirrus_dir, system_name, amp_name, "bin");
+                                                         base_name, system_name, amp_name, "bin");
                         return;
                 }
         }
  
         if (system_name) {
                 if (!cs35l56_hda_request_firmware_file(cs35l56, wmfw_firmware, wmfw_filename,
-                                                      cirrus_dir, system_name, NULL, "wmfw")) {
+                                                      base_name, system_name, NULL, "wmfw")) {
                         if (amp_name)
                                 cs35l56_hda_request_firmware_file(cs35l56,
                                                                   coeff_firmware, coeff_filename,
-                                                                 cirrus_dir, system_name,
+                                                                 base_name, system_name,
                                                                   amp_name, "bin");
                         if (!*coeff_firmware)
                                 cs35l56_hda_request_firmware_file(cs35l56,
                                                                   coeff_firmware, coeff_filename,
-                                                                 cirrus_dir, system_name,
+                                                                 base_name, system_name,
                                                                   NULL, "bin");
                         return;
                 }
+
+               /*
+                * Check for system-specific bin files without wmfw before
+                * falling back to generic firmware
+                */
+               if (amp_name)
+                       cs35l56_hda_request_firmware_file(cs35l56, coeff_firmware, coeff_filename,
+                                                         base_name, system_name, amp_name, "bin");
+               if (!*coeff_firmware)
+                       cs35l56_hda_request_firmware_file(cs35l56, coeff_firmware, coeff_filename,
+                                                         base_name, system_name, NULL, "bin");
+
+               if (*coeff_firmware)
+                       return;
         }
  
         ret = cs35l56_hda_request_firmware_file(cs35l56, wmfw_firmware, wmfw_filename,
-                                               cirrus_dir, NULL, NULL, "wmfw");
+                                               base_name, NULL, NULL, "wmfw");
         if (!ret) {
                 cs35l56_hda_request_firmware_file(cs35l56, coeff_firmware, coeff_filename,
-                                                 cirrus_dir, NULL, NULL, "bin");
+                                                 base_name, NULL, NULL, "bin");
                 return;
         }
  
-       /* When a firmware file is not found must still search for the coeff files */
-       if (system_name) {
-               if (amp_name)
-                       cs35l56_hda_request_firmware_file(cs35l56, coeff_firmware, coeff_filename,
-                                                         cirrus_dir, system_name, amp_name, "bin");
-               if (!*coeff_firmware)
-                       cs35l56_hda_request_firmware_file(cs35l56, coeff_firmware, coeff_filename,
-                                                         cirrus_dir, system_name, NULL, "bin");
-       }
-
         if (!*coeff_firmware)
                 cs35l56_hda_request_firmware_file(cs35l56, coeff_firmware, coeff_filename,
-                                                 cirrus_dir, NULL, NULL, "bin");
+                                                 base_name, NULL, NULL, "bin");
  }
  
  static void cs35l56_hda_release_firmware_files(const struct firmware *wmfw_firmware,
@@ -526,7 +553,8 @@ static int cs35l56_hda_fw_load(struct cs35l56_hda *cs35l56)
         const struct firmware *wmfw_firmware = NULL;
         char *coeff_filename = NULL;
         char *wmfw_filename = NULL;
-       unsigned int firmware_missing;
+       unsigned int preloaded_fw_ver;
+       bool firmware_missing;
         int ret = 0;
  
         /* Prepare for a new DSP power-up */
@@ -537,24 +565,21 @@ static int cs35l56_hda_fw_load(struct cs35l56_hda *cs35l56)
  
         pm_runtime_get_sync(cs35l56->base.dev);
  
-       ret = regmap_read(cs35l56->base.regmap, CS35L56_PROTECTION_STATUS, &firmware_missing);
-       if (ret) {
-               dev_err(cs35l56->base.dev, "Failed to read PROTECTION_STATUS: %d\n", ret);
+       /*
+        * The firmware can only be upgraded if it is currently running
+        * from the built-in ROM. If not, the wmfw/bin must be for the
+        * version of firmware that is running on the chip.
+        */
+       ret = cs35l56_read_prot_status(&cs35l56->base, &firmware_missing, &preloaded_fw_ver);
+       if (ret)
                 goto err_pm_put;
-       }
  
-       firmware_missing &= CS35L56_FIRMWARE_MISSING;
+       if (firmware_missing)
+               preloaded_fw_ver = 0;
  
-       /*
-        * Firmware can only be downloaded if the CS35L56 is secured or is
-        * running from the built-in ROM. If it is secured the BIOS will have
-        * downloaded firmware, and the wmfw/bin files will only contain
-        * tunings that are safe to download with the firmware running.
-        */
-       if (cs35l56->base.secured || firmware_missing) {
-               cs35l56_hda_request_firmware_files(cs35l56, &wmfw_firmware, &wmfw_filename,
-                                                  &coeff_firmware, &coeff_filename);
-       }
+       cs35l56_hda_request_firmware_files(cs35l56, preloaded_fw_ver,
+                                          &wmfw_firmware, &wmfw_filename,
+                                          &coeff_firmware, &coeff_filename);
  
         /*
          * If the BIOS didn't patch the firmware a bin file is mandatory to
@@ -569,12 +594,12 @@ static int cs35l56_hda_fw_load(struct cs35l56_hda *cs35l56)
         mutex_lock(&cs35l56->base.irq_lock);
  
         /*
-        * When the device is running in secure mode the firmware files can
-        * only contain insecure tunings and therefore we do not need to
-        * shutdown the firmware to apply them and can use the lower cost
-        * reinit sequence instead.
+        * If the firmware hasn't been patched it must be shutdown before
+        * doing a full patch and reset afterwards. If it is already
+        * running a patched version the firmware files only contain
+        * tunings and we can use the lower cost reinit sequence instead.
          */
-       if (!cs35l56->base.secured && (wmfw_firmware || coeff_firmware)) {
+       if (firmware_missing && (wmfw_firmware || coeff_firmware)) {
                 ret = cs35l56_firmware_shutdown(&cs35l56->base);
                 if (ret)
                         goto err;
@@ -593,7 +618,7 @@ static int cs35l56_hda_fw_load(struct cs35l56_hda *cs35l56)
         if (coeff_filename)
                 dev_dbg(cs35l56->base.dev, "Loaded Coefficients: %s\n", coeff_filename);
  
-       if (cs35l56->base.secured) {
+       if (!firmware_missing) {
                 ret = cs35l56_mbox_send(&cs35l56->base, CS35L56_MBOX_CMD_AUDIO_REINIT);
                 if (ret)
                         goto err_powered_up;
@@ -976,6 +1001,9 @@ int cs35l56_hda_common_probe(struct cs35l56_hda *cs35l56, int id)
  
         regmap_multi_reg_write(cs35l56->base.regmap, cs35l56_hda_dai_config,
                                ARRAY_SIZE(cs35l56_hda_dai_config));
+       ret = cs35l56_force_sync_asp1_registers_from_cache(&cs35l56->base);
+       if (ret)
+               goto err;
  
         /*
          * By default only enable one ASP1TXn, where n=amplifier index,
@@ -1035,16 +1063,6 @@ const struct dev_pm_ops cs35l56_hda_pm_ops = {
  };
  EXPORT_SYMBOL_NS_GPL(cs35l56_hda_pm_ops, SND_HDA_SCODEC_CS35L56);
  
-#if IS_ENABLED(CONFIG_SND_HDA_SCODEC_CS35L56_KUNIT_TEST)
-/* Hooks to export static function to KUnit test */
-
-int cs35l56_hda_test_hook_get_speaker_id(struct device *dev, int amp_index, int num_amps)
-{
-       return cs35l56_hda_get_speaker_id(dev, amp_index, num_amps);
-}
-EXPORT_SYMBOL_NS_GPL(cs35l56_hda_test_hook_get_speaker_id, SND_HDA_SCODEC_CS35L56);
-#endif
-
  MODULE_DESCRIPTION("CS35L56 HDA Driver");
  MODULE_IMPORT_NS(SND_HDA_CIRRUS_SCODEC);
  MODULE_IMPORT_NS(SND_HDA_CS_DSP_CONTROLS);
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c

index 2276adc8447840a232eb493e5bac54af0cb35682..1b550c42db092739135e5917a74914894e254454 100644 (file)
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -1729,9 +1729,11 @@ static int default_bdl_pos_adj(struct azx *chip)
         /* some exceptions: Atoms seem problematic with value 1 */
         if (chip->pci->vendor == PCI_VENDOR_ID_INTEL) {
                 switch (chip->pci->device) {
-               case 0x0f04: /* Baytrail */
-               case 0x2284: /* Braswell */
+               case PCI_DEVICE_ID_INTEL_HDA_BYT:
+               case PCI_DEVICE_ID_INTEL_HDA_BSW:
                         return 32;
+               case PCI_DEVICE_ID_INTEL_HDA_APL:
+                       return 64;
                 }
         }
  
diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c

index e8819e8a98763cb5a4760768be387a4a387c384d..e8209178d87bbcc88551e6773c90c9fec7d98af2 100644 (file)
--- a/sound/pci/hda/patch_conexant.c
+++ b/sound/pci/hda/patch_conexant.c
@@ -344,6 +344,7 @@ enum {
         CXT_FIXUP_HP_ZBOOK_MUTE_LED,
         CXT_FIXUP_HEADSET_MIC,
         CXT_FIXUP_HP_MIC_NO_PRESENCE,
+       CXT_PINCFG_SWS_JS201D,
  };
  
  /* for hda_fixup_thinkpad_acpi() */
@@ -841,6 +842,17 @@ static const struct hda_pintbl cxt_pincfg_lemote[] = {
         {}
  };
  
+/* SuoWoSi/South-holding JS201D with sn6140 */
+static const struct hda_pintbl cxt_pincfg_sws_js201d[] = {
+       { 0x16, 0x03211040 }, /* hp out */
+       { 0x17, 0x91170110 }, /* SPK/Class_D */
+       { 0x18, 0x95a70130 }, /* Internal mic */
+       { 0x19, 0x03a11020 }, /* Headset Mic */
+       { 0x1a, 0x40f001f0 }, /* Not used */
+       { 0x21, 0x40f001f0 }, /* Not used */
+       {}
+};
+
  static const struct hda_fixup cxt_fixups[] = {
         [CXT_PINCFG_LENOVO_X200] = {
                 .type = HDA_FIXUP_PINS,
@@ -996,6 +1008,10 @@ static const struct hda_fixup cxt_fixups[] = {
                 .chained = true,
                 .chain_id = CXT_FIXUP_HEADSET_MIC,
         },
+       [CXT_PINCFG_SWS_JS201D] = {
+               .type = HDA_FIXUP_PINS,
+               .v.pins = cxt_pincfg_sws_js201d,
+       },
  };
  
  static const struct snd_pci_quirk cxt5045_fixups[] = {
@@ -1069,6 +1085,7 @@ static const struct snd_pci_quirk cxt5066_fixups[] = {
         SND_PCI_QUIRK(0x103c, 0x8457, "HP Z2 G4 mini", CXT_FIXUP_HP_MIC_NO_PRESENCE),
         SND_PCI_QUIRK(0x103c, 0x8458, "HP Z2 G4 mini premium", CXT_FIXUP_HP_MIC_NO_PRESENCE),
         SND_PCI_QUIRK(0x1043, 0x138d, "Asus", CXT_FIXUP_HEADPHONE_MIC_PIN),
+       SND_PCI_QUIRK(0x14f1, 0x0265, "SWS JS201D", CXT_PINCFG_SWS_JS201D),
         SND_PCI_QUIRK(0x152d, 0x0833, "OLPC XO-1.5", CXT_FIXUP_OLPC_XO),
         SND_PCI_QUIRK(0x17aa, 0x20f2, "Lenovo T400", CXT_PINCFG_LENOVO_TP410),
         SND_PCI_QUIRK(0x17aa, 0x215e, "Lenovo T410", CXT_PINCFG_LENOVO_TP410),
@@ -1109,6 +1126,7 @@ static const struct hda_model_fixup cxt5066_fixup_models[] = {
         { .id = CXT_FIXUP_HP_ZBOOK_MUTE_LED, .name = "hp-zbook-mute-led" },
         { .id = CXT_FIXUP_HP_MIC_NO_PRESENCE, .name = "hp-mic-fix" },
         { .id = CXT_PINCFG_LENOVO_NOTEBOOK, .name = "lenovo-20149" },
+       { .id = CXT_PINCFG_SWS_JS201D, .name = "sws-js201d" },
         {}
  };
  
diff --git a/sound/pci/hda/patch_cs8409.c b/sound/pci/hda/patch_cs8409.c

index 627899959ffe8c34c76211d51824e1d1e93d33a8..e41316e2e98338a5d69245ed9e9db1b979a2fdf2 100644 (file)
--- a/sound/pci/hda/patch_cs8409.c
+++ b/sound/pci/hda/patch_cs8409.c
@@ -1371,6 +1371,7 @@ void dolphin_fixups(struct hda_codec *codec, const struct hda_fixup *fix, int ac
                 spec->scodecs[CS8409_CODEC1] = &dolphin_cs42l42_1;
                 spec->scodecs[CS8409_CODEC1]->codec = codec;
                 spec->num_scodecs = 2;
+               spec->gen.suppress_vmaster = 1;
  
                 codec->patch_ops = cs8409_dolphin_patch_ops;
  
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c

index f6f16622f9cc78a1ac8ca0de8b82e915f580f7fd..62701197a019b0c4faef3d4d72cad96e841d9cbc 100644 (file)
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -439,6 +439,10 @@ static void alc_fill_eapd_coef(struct hda_codec *codec)
                 alc_update_coef_idx(codec, 0x67, 0xf000, 0x3000);
                 fallthrough;
         case 0x10ec0215:
+       case 0x10ec0285:
+       case 0x10ec0289:
+               alc_update_coef_idx(codec, 0x36, 1<<13, 0);
+               fallthrough;
         case 0x10ec0230:
         case 0x10ec0233:
         case 0x10ec0235:
@@ -452,9 +456,7 @@ static void alc_fill_eapd_coef(struct hda_codec *codec)
         case 0x10ec0283:
         case 0x10ec0286:
         case 0x10ec0288:
-       case 0x10ec0285:
         case 0x10ec0298:
-       case 0x10ec0289:
         case 0x10ec0300:
                 alc_update_coef_idx(codec, 0x10, 1<<9, 0);
                 break;
@@ -3682,6 +3684,7 @@ static void alc285_hp_init(struct hda_codec *codec)
         int i, val;
         int coef38, coef0d, coef36;
  
+       alc_write_coefex_idx(codec, 0x58, 0x00, 0x1888); /* write default value */
         alc_update_coef_idx(codec, 0x4a, 1<<15, 1<<15); /* Reset HP JD */
         coef38 = alc_read_coef_idx(codec, 0x38); /* Amp control */
         coef0d = alc_read_coef_idx(codec, 0x0d); /* Digital Misc control */
@@ -7442,6 +7445,7 @@ enum {
         ALC287_FIXUP_LEGION_15IMHG05_AUTOMUTE,
         ALC287_FIXUP_YOGA7_14ITL_SPEAKERS,
         ALC298_FIXUP_LENOVO_C940_DUET7,
+       ALC287_FIXUP_LENOVO_14IRP8_DUETITL,
         ALC287_FIXUP_13S_GEN2_SPEAKERS,
         ALC256_FIXUP_SET_COEF_DEFAULTS,
         ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE,
@@ -7493,6 +7497,26 @@ static void alc298_fixup_lenovo_c940_duet7(struct hda_codec *codec,
         __snd_hda_apply_fixup(codec, id, action, 0);
  }
  
+/* A special fixup for Lenovo Slim/Yoga Pro 9 14IRP8 and Yoga DuetITL 2021;
+ * 14IRP8 PCI SSID will mistakenly be matched with the DuetITL codec SSID,
+ * so we need to apply a different fixup in this case. The only DuetITL codec
+ * SSID reported so far is the 17aa:3802 while the 14IRP8 has the 17aa:38be
+ * and 17aa:38bf. If it weren't for the PCI SSID, the 14IRP8 models would
+ * have matched correctly by their codecs.
+ */
+static void alc287_fixup_lenovo_14irp8_duetitl(struct hda_codec *codec,
+                                             const struct hda_fixup *fix,
+                                             int action)
+{
+       int id;
+
+       if (codec->core.subsystem_id == 0x17aa3802)
+               id = ALC287_FIXUP_YOGA7_14ITL_SPEAKERS; /* DuetITL */
+       else
+               id = ALC287_FIXUP_TAS2781_I2C; /* 14IRP8 */
+       __snd_hda_apply_fixup(codec, id, action, 0);
+}
+
  static const struct hda_fixup alc269_fixups[] = {
         [ALC269_FIXUP_GPIO2] = {
                 .type = HDA_FIXUP_FUNC,
@@ -9377,6 +9401,10 @@ static const struct hda_fixup alc269_fixups[] = {
                 .type = HDA_FIXUP_FUNC,
                 .v.func = alc298_fixup_lenovo_c940_duet7,
         },
+       [ALC287_FIXUP_LENOVO_14IRP8_DUETITL] = {
+               .type = HDA_FIXUP_FUNC,
+               .v.func = alc287_fixup_lenovo_14irp8_duetitl,
+       },
         [ALC287_FIXUP_13S_GEN2_SPEAKERS] = {
                 .type = HDA_FIXUP_VERBS,
                 .v.verbs = (const struct hda_verb[]) {
@@ -9577,13 +9605,13 @@ static const struct hda_fixup alc269_fixups[] = {
                 .type = HDA_FIXUP_FUNC,
                 .v.func = cs35l41_fixup_i2c_two,
                 .chained = true,
-               .chain_id = ALC269_FIXUP_THINKPAD_ACPI,
+               .chain_id = ALC285_FIXUP_THINKPAD_NO_BASS_SPK_HEADSET_JACK,
         },
         [ALC287_FIXUP_TAS2781_I2C] = {
                 .type = HDA_FIXUP_FUNC,
                 .v.func = tas2781_fixup_i2c,
                 .chained = true,
-               .chain_id = ALC269_FIXUP_THINKPAD_ACPI,
+               .chain_id = ALC285_FIXUP_THINKPAD_HEADSET_JACK,
         },
         [ALC287_FIXUP_YOGA7_14ARB7_I2C] = {
                 .type = HDA_FIXUP_FUNC,
@@ -9604,6 +9632,8 @@ static const struct hda_fixup alc269_fixups[] = {
         [ALC287_FIXUP_THINKPAD_I2S_SPK] = {
                 .type = HDA_FIXUP_FUNC,
                 .v.func = alc287_fixup_bind_dacs,
+               .chained = true,
+               .chain_id = ALC285_FIXUP_THINKPAD_NO_BASS_SPK_HEADSET_JACK,
         },
         [ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD] = {
                 .type = HDA_FIXUP_FUNC,
@@ -9653,6 +9683,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
         SND_PCI_QUIRK(0x1025, 0x1247, "Acer vCopperbox", ALC269VC_FIXUP_ACER_VCOPPERBOX_PINS),
         SND_PCI_QUIRK(0x1025, 0x1248, "Acer Veriton N4660G", ALC269VC_FIXUP_ACER_MIC_NO_PRESENCE),
         SND_PCI_QUIRK(0x1025, 0x1269, "Acer SWIFT SF314-54", ALC256_FIXUP_ACER_HEADSET_MIC),
+       SND_PCI_QUIRK(0x1025, 0x126a, "Acer Swift SF114-32", ALC256_FIXUP_ACER_MIC_NO_PRESENCE),
         SND_PCI_QUIRK(0x1025, 0x128f, "Acer Veriton Z6860G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC),
         SND_PCI_QUIRK(0x1025, 0x1290, "Acer Veriton Z4860G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC),
         SND_PCI_QUIRK(0x1025, 0x1291, "Acer Veriton Z4660G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC),
@@ -9732,12 +9763,16 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
         SND_PCI_QUIRK(0x1028, 0x0b71, "Dell Inspiron 16 Plus 7620", ALC295_FIXUP_DELL_INSPIRON_TOP_SPEAKERS),
         SND_PCI_QUIRK(0x1028, 0x0beb, "Dell XPS 15 9530 (2023)", ALC289_FIXUP_DELL_CS35L41_SPI_2),
         SND_PCI_QUIRK(0x1028, 0x0c03, "Dell Precision 5340", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x1028, 0x0c0b, "Dell Oasis 14 RPL-P", ALC289_FIXUP_RTK_AMP_DUAL_SPK),
+       SND_PCI_QUIRK(0x1028, 0x0c0d, "Dell Oasis", ALC289_FIXUP_RTK_AMP_DUAL_SPK),
+       SND_PCI_QUIRK(0x1028, 0x0c0e, "Dell Oasis 16", ALC289_FIXUP_RTK_AMP_DUAL_SPK),
         SND_PCI_QUIRK(0x1028, 0x0c19, "Dell Precision 3340", ALC236_FIXUP_DELL_DUAL_CODECS),
         SND_PCI_QUIRK(0x1028, 0x0c1a, "Dell Precision 3340", ALC236_FIXUP_DELL_DUAL_CODECS),
         SND_PCI_QUIRK(0x1028, 0x0c1b, "Dell Precision 3440", ALC236_FIXUP_DELL_DUAL_CODECS),
         SND_PCI_QUIRK(0x1028, 0x0c1c, "Dell Precision 3540", ALC236_FIXUP_DELL_DUAL_CODECS),
         SND_PCI_QUIRK(0x1028, 0x0c1d, "Dell Precision 3440", ALC236_FIXUP_DELL_DUAL_CODECS),
         SND_PCI_QUIRK(0x1028, 0x0c1e, "Dell Precision 3540", ALC236_FIXUP_DELL_DUAL_CODECS),
+       SND_PCI_QUIRK(0x1028, 0x0c28, "Dell Inspiron 16 Plus 7630", ALC295_FIXUP_DELL_INSPIRON_TOP_SPEAKERS),
         SND_PCI_QUIRK(0x1028, 0x0c4d, "Dell", ALC287_FIXUP_CS35L41_I2C_4),
         SND_PCI_QUIRK(0x1028, 0x0cbd, "Dell Oasis 13 CS MTL-U", ALC289_FIXUP_DELL_CS35L41_SPI_2),
         SND_PCI_QUIRK(0x1028, 0x0cbe, "Dell Oasis 13 2-IN-1 MTL-U", ALC289_FIXUP_DELL_CS35L41_SPI_2),
@@ -9852,6 +9887,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
         SND_PCI_QUIRK(0x103c, 0x8786, "HP OMEN 15", ALC285_FIXUP_HP_MUTE_LED),
         SND_PCI_QUIRK(0x103c, 0x8787, "HP OMEN 15", ALC285_FIXUP_HP_MUTE_LED),
         SND_PCI_QUIRK(0x103c, 0x8788, "HP OMEN 15", ALC285_FIXUP_HP_MUTE_LED),
+       SND_PCI_QUIRK(0x103c, 0x87b7, "HP Laptop 14-fq0xxx", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2),
         SND_PCI_QUIRK(0x103c, 0x87c8, "HP", ALC287_FIXUP_HP_GPIO_LED),
         SND_PCI_QUIRK(0x103c, 0x87e5, "HP ProBook 440 G8 Notebook PC", ALC236_FIXUP_HP_GPIO_LED),
         SND_PCI_QUIRK(0x103c, 0x87e7, "HP ProBook 450 G8 Notebook PC", ALC236_FIXUP_HP_GPIO_LED),
@@ -9893,6 +9929,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
         SND_PCI_QUIRK(0x103c, 0x8973, "HP EliteBook 860 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
         SND_PCI_QUIRK(0x103c, 0x8974, "HP EliteBook 840 Aero G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
         SND_PCI_QUIRK(0x103c, 0x8975, "HP EliteBook x360 840 Aero G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+       SND_PCI_QUIRK(0x103c, 0x897d, "HP mt440 Mobile Thin Client U74", ALC236_FIXUP_HP_GPIO_LED),
         SND_PCI_QUIRK(0x103c, 0x8981, "HP Elite Dragonfly G3", ALC245_FIXUP_CS35L41_SPI_4),
         SND_PCI_QUIRK(0x103c, 0x898e, "HP EliteBook 835 G9", ALC287_FIXUP_CS35L41_I2C_2),
         SND_PCI_QUIRK(0x103c, 0x898f, "HP EliteBook 835 G9", ALC287_FIXUP_CS35L41_I2C_2),
@@ -9918,16 +9955,20 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
         SND_PCI_QUIRK(0x103c, 0x8aa3, "HP ProBook 450 G9 (MB 8AA1)", ALC236_FIXUP_HP_GPIO_LED),
         SND_PCI_QUIRK(0x103c, 0x8aa8, "HP EliteBook 640 G9 (MB 8AA6)", ALC236_FIXUP_HP_GPIO_LED),
         SND_PCI_QUIRK(0x103c, 0x8aab, "HP EliteBook 650 G9 (MB 8AA9)", ALC236_FIXUP_HP_GPIO_LED),
+       SND_PCI_QUIRK(0x103c, 0x8ab9, "HP EliteBook 840 G8 (MB 8AB8)", ALC285_FIXUP_HP_GPIO_LED),
         SND_PCI_QUIRK(0x103c, 0x8abb, "HP ZBook Firefly 14 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
         SND_PCI_QUIRK(0x103c, 0x8ad1, "HP EliteBook 840 14 inch G9 Notebook PC", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
         SND_PCI_QUIRK(0x103c, 0x8ad2, "HP EliteBook 860 16 inch G9 Notebook PC", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+       SND_PCI_QUIRK(0x103c, 0x8b0f, "HP Elite mt645 G7 Mobile Thin Client U81", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
         SND_PCI_QUIRK(0x103c, 0x8b2f, "HP 255 15.6 inch G10 Notebook PC", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2),
+       SND_PCI_QUIRK(0x103c, 0x8b3f, "HP mt440 Mobile Thin Client U91", ALC236_FIXUP_HP_GPIO_LED),
         SND_PCI_QUIRK(0x103c, 0x8b42, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
         SND_PCI_QUIRK(0x103c, 0x8b43, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
         SND_PCI_QUIRK(0x103c, 0x8b44, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
         SND_PCI_QUIRK(0x103c, 0x8b45, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
         SND_PCI_QUIRK(0x103c, 0x8b46, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
         SND_PCI_QUIRK(0x103c, 0x8b47, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+       SND_PCI_QUIRK(0x103c, 0x8b59, "HP Elite mt645 G7 Mobile Thin Client U89", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
         SND_PCI_QUIRK(0x103c, 0x8b5d, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
         SND_PCI_QUIRK(0x103c, 0x8b5e, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
         SND_PCI_QUIRK(0x103c, 0x8b63, "HP Elite Dragonfly 13.5 inch G4", ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED),
@@ -9957,6 +9998,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
         SND_PCI_QUIRK(0x103c, 0x8c72, "HP EliteBook 865 G11", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED),
         SND_PCI_QUIRK(0x103c, 0x8c96, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
         SND_PCI_QUIRK(0x103c, 0x8c97, "HP ZBook", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
+       SND_PCI_QUIRK(0x103c, 0x8ca1, "HP ZBook Power", ALC236_FIXUP_HP_GPIO_LED),
+       SND_PCI_QUIRK(0x103c, 0x8ca2, "HP ZBook Power", ALC236_FIXUP_HP_GPIO_LED),
         SND_PCI_QUIRK(0x103c, 0x8ca4, "HP ZBook Fury", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
         SND_PCI_QUIRK(0x103c, 0x8ca7, "HP ZBook Fury", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
         SND_PCI_QUIRK(0x103c, 0x8cf5, "HP ZBook Studio 16", ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED),
@@ -9994,6 +10037,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
         SND_PCI_QUIRK(0x1043, 0x1662, "ASUS GV301QH", ALC294_FIXUP_ASUS_DUAL_SPK),
         SND_PCI_QUIRK(0x1043, 0x1663, "ASUS GU603ZI/ZJ/ZQ/ZU/ZV", ALC285_FIXUP_ASUS_HEADSET_MIC),
         SND_PCI_QUIRK(0x1043, 0x1683, "ASUS UM3402YAR", ALC287_FIXUP_CS35L41_I2C_2),
+       SND_PCI_QUIRK(0x1043, 0x16a3, "ASUS UX3402VA", ALC245_FIXUP_CS35L41_SPI_2),
         SND_PCI_QUIRK(0x1043, 0x16b2, "ASUS GU603", ALC289_FIXUP_ASUS_GA401),
         SND_PCI_QUIRK(0x1043, 0x16d3, "ASUS UX5304VA", ALC245_FIXUP_CS35L41_SPI_2),
         SND_PCI_QUIRK(0x1043, 0x16e3, "ASUS UX50", ALC269_FIXUP_STEREO_DMIC),
@@ -10037,14 +10081,12 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
         SND_PCI_QUIRK(0x1043, 0x1d4e, "ASUS TM420", ALC256_FIXUP_ASUS_HPE),
         SND_PCI_QUIRK(0x1043, 0x1da2, "ASUS UP6502ZA/ZD", ALC245_FIXUP_CS35L41_SPI_2),
         SND_PCI_QUIRK(0x1043, 0x1e02, "ASUS UX3402ZA", ALC245_FIXUP_CS35L41_SPI_2),
-       SND_PCI_QUIRK(0x1043, 0x16a3, "ASUS UX3402VA", ALC245_FIXUP_CS35L41_SPI_2),
-       SND_PCI_QUIRK(0x1043, 0x1f62, "ASUS UX7602ZM", ALC245_FIXUP_CS35L41_SPI_2),
         SND_PCI_QUIRK(0x1043, 0x1e11, "ASUS Zephyrus G15", ALC289_FIXUP_ASUS_GA502),
-       SND_PCI_QUIRK(0x1043, 0x1e12, "ASUS UM6702RA/RC", ALC287_FIXUP_CS35L41_I2C_2),
+       SND_PCI_QUIRK(0x1043, 0x1e12, "ASUS UM3402", ALC287_FIXUP_CS35L41_I2C_2),
         SND_PCI_QUIRK(0x1043, 0x1e51, "ASUS Zephyrus M15", ALC294_FIXUP_ASUS_GU502_PINS),
         SND_PCI_QUIRK(0x1043, 0x1e5e, "ASUS ROG Strix G513", ALC294_FIXUP_ASUS_G513_PINS),
         SND_PCI_QUIRK(0x1043, 0x1e8e, "ASUS Zephyrus G15", ALC289_FIXUP_ASUS_GA401),
-       SND_PCI_QUIRK(0x1043, 0x1ee2, "ASUS UM3402", ALC287_FIXUP_CS35L41_I2C_2),
+       SND_PCI_QUIRK(0x1043, 0x1ee2, "ASUS UM6702RA/RC", ALC287_FIXUP_CS35L41_I2C_2),
         SND_PCI_QUIRK(0x1043, 0x1c52, "ASUS Zephyrus G15 2022", ALC289_FIXUP_ASUS_GA401),
         SND_PCI_QUIRK(0x1043, 0x1f11, "ASUS Zephyrus G14", ALC289_FIXUP_ASUS_GA401),
         SND_PCI_QUIRK(0x1043, 0x1f12, "ASUS UM5302", ALC287_FIXUP_CS35L41_I2C_2),
@@ -10235,7 +10277,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
         SND_PCI_QUIRK(0x17aa, 0x31af, "ThinkCentre Station", ALC623_FIXUP_LENOVO_THINKSTATION_P340),
         SND_PCI_QUIRK(0x17aa, 0x334b, "Lenovo ThinkCentre M70 Gen5", ALC283_FIXUP_HEADSET_MIC),
         SND_PCI_QUIRK(0x17aa, 0x3801, "Lenovo Yoga9 14IAP7", ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN),
-       SND_PCI_QUIRK(0x17aa, 0x3802, "Lenovo Yoga DuetITL 2021", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS),
+       SND_PCI_QUIRK(0x17aa, 0x3802, "Lenovo Yoga Pro 9 14IRP8 / DuetITL 2021", ALC287_FIXUP_LENOVO_14IRP8_DUETITL),
         SND_PCI_QUIRK(0x17aa, 0x3813, "Legion 7i 15IMHG05", ALC287_FIXUP_LEGION_15IMHG05_SPEAKERS),
         SND_PCI_QUIRK(0x17aa, 0x3818, "Lenovo C940 / Yoga Duet 7", ALC298_FIXUP_LENOVO_C940_DUET7),
         SND_PCI_QUIRK(0x17aa, 0x3819, "Lenovo 13s Gen2 ITL", ALC287_FIXUP_13S_GEN2_SPEAKERS),
@@ -10251,6 +10293,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
         SND_PCI_QUIRK(0x17aa, 0x3853, "Lenovo Yoga 7 15ITL5", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS),
         SND_PCI_QUIRK(0x17aa, 0x3855, "Legion 7 16ITHG6", ALC287_FIXUP_LEGION_16ITHG6),
         SND_PCI_QUIRK(0x17aa, 0x3869, "Lenovo Yoga7 14IAL7", ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN),
+       SND_PCI_QUIRK(0x17aa, 0x386f, "Legion 7i 16IAX7", ALC287_FIXUP_CS35L41_I2C_2),
         SND_PCI_QUIRK(0x17aa, 0x3870, "Lenovo Yoga 7 14ARB7", ALC287_FIXUP_YOGA7_14ARB7_I2C),
         SND_PCI_QUIRK(0x17aa, 0x387d, "Yoga S780-16 pro Quad AAC", ALC287_FIXUP_TAS2781_I2C),
         SND_PCI_QUIRK(0x17aa, 0x387e, "Yoga S780-16 pro Quad YC", ALC287_FIXUP_TAS2781_I2C),
@@ -10322,6 +10365,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
         SND_PCI_QUIRK(0x1d72, 0x1945, "Redmi G", ALC256_FIXUP_ASUS_HEADSET_MIC),
         SND_PCI_QUIRK(0x1d72, 0x1947, "RedmiBook Air", ALC255_FIXUP_XIAOMI_HEADSET_MIC),
         SND_PCI_QUIRK(0x2782, 0x0232, "CHUWI CoreBook XPro", ALC269VB_FIXUP_CHUWI_COREBOOK_XPRO),
+       SND_PCI_QUIRK(0x2782, 0x1707, "Vaio VJFE-ADL", ALC298_FIXUP_SPK_VOLUME),
         SND_PCI_QUIRK(0x8086, 0x2074, "Intel NUC 8", ALC233_FIXUP_INTEL_NUC8_DMIC),
         SND_PCI_QUIRK(0x8086, 0x2080, "Intel NUC 8 Rugged", ALC256_FIXUP_INTEL_NUC8_RUGGED),
         SND_PCI_QUIRK(0x8086, 0x2081, "Intel NUC 10", ALC256_FIXUP_INTEL_NUC10),
diff --git a/sound/pci/hda/tas2781_hda_i2c.c b/sound/pci/hda/tas2781_hda_i2c.c

index 2dd809de62e5a4ac049a49752f447b0a8b4a3be6..1bfb00102a77a4d49695efeaf7016eded961345b 100644 (file)
--- a/sound/pci/hda/tas2781_hda_i2c.c
+++ b/sound/pci/hda/tas2781_hda_i2c.c
@@ -710,7 +710,7 @@ static int tas2781_hda_bind(struct device *dev, struct device *master,
  
         strscpy(comps->name, dev_name(dev), sizeof(comps->name));
  
-       ret = tascodec_init(tas_hda->priv, codec, tasdev_fw_ready);
+       ret = tascodec_init(tas_hda->priv, codec, THIS_MODULE, tasdev_fw_ready);
         if (!ret)
                 comps->playback_hook = tas2781_hda_playback_hook;
  
diff --git a/sound/soc/amd/acp/acp-mach-common.c b/sound/soc/amd/acp/acp-mach-common.c

index c90ec3419247797a0628bfa5207eaf3afcb8d012..504d1b8c4cbb4f104a8b8e70adf10894f211ce6c 100644 (file)
--- a/sound/soc/amd/acp/acp-mach-common.c
+++ b/sound/soc/amd/acp/acp-mach-common.c
@@ -505,6 +505,13 @@ static int acp_card_rt5682s_hw_params(struct snd_pcm_substream *substream,
  
         clk_set_rate(drvdata->wclk, srate);
         clk_set_rate(drvdata->bclk, srate * ch * format);
+       if (!drvdata->soc_mclk) {
+               ret = acp_clk_enable(drvdata, srate, ch * format);
+               if (ret < 0) {
+                       dev_err(rtd->card->dev, "Failed to enable HS clk: %d\n", ret);
+                       return ret;
+               }
+       }
  
         return 0;
  }
@@ -1464,8 +1471,13 @@ int acp_sofdsp_dai_links_create(struct snd_soc_card *card)
         if (drv_data->amp_cpu_id == I2S_SP) {
                 links[i].name = "acp-amp-codec";
                 links[i].id = AMP_BE_ID;
-               links[i].cpus = sof_sp_virtual;
-               links[i].num_cpus = ARRAY_SIZE(sof_sp_virtual);
+               if (drv_data->platform == RENOIR) {
+                       links[i].cpus = sof_sp;
+                       links[i].num_cpus = ARRAY_SIZE(sof_sp);
+               } else {
+                       links[i].cpus = sof_sp_virtual;
+                       links[i].num_cpus = ARRAY_SIZE(sof_sp_virtual);
+               }
                 links[i].platforms = sof_component;
                 links[i].num_platforms = ARRAY_SIZE(sof_component);
                 links[i].dpcm_playback = 1;
diff --git a/sound/soc/amd/acp/acp-sof-mach.c b/sound/soc/amd/acp/acp-sof-mach.c

index 2a9fd3275e42f5fa1086d10baf4a8cc4cc2b69b1..20b94814a0462147258fe94cd4219b772afa45f0 100644 (file)
--- a/sound/soc/amd/acp/acp-sof-mach.c
+++ b/sound/soc/amd/acp/acp-sof-mach.c
@@ -48,6 +48,7 @@ static struct acp_card_drvdata sof_rt5682s_rt1019_data = {
         .hs_codec_id = RT5682S,
         .amp_codec_id = RT1019,
         .dmic_codec_id = DMIC,
+       .platform = RENOIR,
         .tdm_mode = false,
  };
  
@@ -58,6 +59,7 @@ static struct acp_card_drvdata sof_rt5682s_max_data = {
         .hs_codec_id = RT5682S,
         .amp_codec_id = MAX98360A,
         .dmic_codec_id = DMIC,
+       .platform = RENOIR,
         .tdm_mode = false,
  };
  
@@ -68,6 +70,7 @@ static struct acp_card_drvdata sof_nau8825_data = {
         .hs_codec_id = NAU8825,
         .amp_codec_id = MAX98360A,
         .dmic_codec_id = DMIC,
+       .platform = REMBRANDT,
         .soc_mclk = true,
         .tdm_mode = false,
  };
@@ -79,6 +82,7 @@ static struct acp_card_drvdata sof_rt5682s_hs_rt1019_data = {
         .hs_codec_id = RT5682S,
         .amp_codec_id = RT1019,
         .dmic_codec_id = DMIC,
+       .platform = REMBRANDT,
         .soc_mclk = true,
         .tdm_mode = false,
  };
diff --git a/sound/soc/amd/acp/acp3x-es83xx/acp3x-es83xx.c b/sound/soc/amd/acp/acp3x-es83xx/acp3x-es83xx.c

index f85b85ea4be9c28cf6ba1f29b4c7290908bc842d..2b0aa270a3e9d75c8ebd47aaf12e6fc0b73c75b3 100644 (file)
--- a/sound/soc/amd/acp/acp3x-es83xx/acp3x-es83xx.c
+++ b/sound/soc/amd/acp/acp3x-es83xx/acp3x-es83xx.c
@@ -354,6 +354,14 @@ static const struct dmi_system_id acp3x_es83xx_dmi_table[] = {
                 },
                 .driver_data = (void *)(ES83XX_ENABLE_DMIC|ES83XX_48_MHZ_MCLK),
         },
+       {
+               .matches = {
+                       DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "HUAWEI"),
+                       DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "HVY-WXX9"),
+                       DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "M1010"),
+               },
+               .driver_data = (void *)(ES83XX_ENABLE_DMIC),
+       },
         {
                 .matches = {
                         DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "HUAWEI"),
diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c

index d83cb6e4c62aecc6e54a700e5d22f136253e42fb..abb9589b8477cd844066c2a4c683b70eea5d3ddd 100644 (file)
--- a/sound/soc/amd/yc/acp6x-mach.c
+++ b/sound/soc/amd/yc/acp6x-mach.c
@@ -199,6 +199,20 @@ static const struct dmi_system_id yc_acp_quirk_table[] = {
                         DMI_MATCH(DMI_PRODUCT_NAME, "21HY"),
                 }
         },
+       {
+               .driver_data = &acp6x_card,
+               .matches = {
+                       DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "21J2"),
+               }
+       },
+       {
+               .driver_data = &acp6x_card,
+               .matches = {
+                       DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "21J0"),
+               }
+       },
         {
                 .driver_data = &acp6x_card,
                 .matches = {
@@ -234,6 +248,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = {
                         DMI_MATCH(DMI_PRODUCT_NAME, "82UG"),
                 }
         },
+       {
+               .driver_data = &acp6x_card,
+               .matches = {
+                       DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "82UU"),
+               }
+       },
         {
                 .driver_data = &acp6x_card,
                 .matches = {
@@ -248,6 +269,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = {
                         DMI_MATCH(DMI_PRODUCT_NAME, "82YM"),
                 }
         },
+       {
+               .driver_data = &acp6x_card,
+               .matches = {
+                       DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "83AS"),
+               }
+       },
         {
                 .driver_data = &acp6x_card,
                 .matches = {
@@ -297,6 +325,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = {
                         DMI_MATCH(DMI_PRODUCT_NAME, "Bravo 15 B7ED"),
                 }
         },
+       {
+               .driver_data = &acp6x_card,
+               .matches = {
+                       DMI_MATCH(DMI_BOARD_VENDOR, "Micro-Star International Co., Ltd."),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "Bravo 15 C7VF"),
+               }
+       },
         {
                 .driver_data = &acp6x_card,
                 .matches = {
diff --git a/sound/soc/amd/yc/pci-acp6x.c b/sound/soc/amd/yc/pci-acp6x.c

index 7af6a349b1d41fb60d9450a312927e4774889a34..694b8e31390248b88e4bc8aba843316244f0bdb4 100644 (file)
--- a/sound/soc/amd/yc/pci-acp6x.c
+++ b/sound/soc/amd/yc/pci-acp6x.c
@@ -162,6 +162,7 @@ static int snd_acp6x_probe(struct pci_dev *pci,
         /* Yellow Carp device check */
         switch (pci->revision) {
         case 0x60:
+       case 0x63:
         case 0x6f:
                 break;
         default:
diff --git a/sound/soc/codecs/cs35l45.c b/sound/soc/codecs/cs35l45.c

index 44c221745c3b255c7e69a4ab092ad09581a9b1f9..2392c6effed857c32326ee032f776dcc577ed0fb 100644 (file)
--- a/sound/soc/codecs/cs35l45.c
+++ b/sound/soc/codecs/cs35l45.c
@@ -184,7 +184,7 @@ static int cs35l45_activate_ctl(struct snd_soc_component *component,
         else
                 snprintf(name, SNDRV_CTL_ELEM_ID_NAME_MAXLEN, "%s", ctl_name);
  
-       kcontrol = snd_soc_card_get_kcontrol(component->card, name);
+       kcontrol = snd_soc_card_get_kcontrol_locked(component->card, name);
         if (!kcontrol) {
                 dev_err(component->dev, "Can't find kcontrol %s\n", name);
                 return -EINVAL;
diff --git a/sound/soc/codecs/cs35l56-shared.c b/sound/soc/codecs/cs35l56-shared.c

index 953ba066bab1e30dfc22ea94bead73ce2f91c0fe..cb4e83126b085228fa196aeb7e635537c39cfc24 100644 (file)
--- a/sound/soc/codecs/cs35l56-shared.c
+++ b/sound/soc/codecs/cs35l56-shared.c
@@ -5,6 +5,7 @@
  // Copyright (C) 2023 Cirrus Logic, Inc. and
  //                    Cirrus Logic International Semiconductor Ltd.
  
+#include <linux/gpio/consumer.h>
  #include <linux/regmap.h>
  #include <linux/regulator/consumer.h>
  #include <linux/types.h>
@@ -12,6 +13,15 @@
  #include "cs35l56.h"
  
  static const struct reg_sequence cs35l56_patch[] = {
+       /*
+        * Firmware can change these to non-defaults to satisfy SDCA.
+        * Ensure that they are at known defaults.
+        */
+       { CS35L56_SWIRE_DP3_CH1_INPUT,          0x00000018 },
+       { CS35L56_SWIRE_DP3_CH2_INPUT,          0x00000019 },
+       { CS35L56_SWIRE_DP3_CH3_INPUT,          0x00000029 },
+       { CS35L56_SWIRE_DP3_CH4_INPUT,          0x00000028 },
+
         /* These are not reset by a soft-reset, so patch to defaults. */
         { CS35L56_MAIN_RENDER_USER_MUTE,        0x00000000 },
         { CS35L56_MAIN_RENDER_USER_VOLUME,      0x00000000 },
@@ -34,15 +44,13 @@ static const struct reg_default cs35l56_reg_defaults[] = {
         { CS35L56_ASP1_FRAME_CONTROL5,          0x00020100 },
         { CS35L56_ASP1_DATA_CONTROL1,           0x00000018 },
         { CS35L56_ASP1_DATA_CONTROL5,           0x00000018 },
-       { CS35L56_ASP1TX1_INPUT,                0x00000018 },
-       { CS35L56_ASP1TX2_INPUT,                0x00000019 },
-       { CS35L56_ASP1TX3_INPUT,                0x00000020 },
-       { CS35L56_ASP1TX4_INPUT,                0x00000028 },
+
+       /* no defaults for ASP1TX mixer */
+
         { CS35L56_SWIRE_DP3_CH1_INPUT,          0x00000018 },
         { CS35L56_SWIRE_DP3_CH2_INPUT,          0x00000019 },
         { CS35L56_SWIRE_DP3_CH3_INPUT,          0x00000029 },
         { CS35L56_SWIRE_DP3_CH4_INPUT,          0x00000028 },
-       { CS35L56_IRQ1_CFG,                     0x00000000 },
         { CS35L56_IRQ1_MASK_1,                  0x83ffffff },
         { CS35L56_IRQ1_MASK_2,                  0xffff7fff },
         { CS35L56_IRQ1_MASK_4,                  0xe0ffffff },
@@ -195,6 +203,47 @@ static bool cs35l56_volatile_reg(struct device *dev, unsigned int reg)
         }
  }
  
+/*
+ * The firmware boot sequence can overwrite the ASP1 config registers so that
+ * they don't match regmap's view of their values. Rewrite the values from the
+ * regmap cache into the hardware registers.
+ */
+int cs35l56_force_sync_asp1_registers_from_cache(struct cs35l56_base *cs35l56_base)
+{
+       struct reg_sequence asp1_regs[] = {
+               { .reg = CS35L56_ASP1_ENABLES1 },
+               { .reg = CS35L56_ASP1_CONTROL1 },
+               { .reg = CS35L56_ASP1_CONTROL2 },
+               { .reg = CS35L56_ASP1_CONTROL3 },
+               { .reg = CS35L56_ASP1_FRAME_CONTROL1 },
+               { .reg = CS35L56_ASP1_FRAME_CONTROL5 },
+               { .reg = CS35L56_ASP1_DATA_CONTROL1 },
+               { .reg = CS35L56_ASP1_DATA_CONTROL5 },
+       };
+       int i, ret;
+
+       /* Read values from regmap cache into a write sequence */
+       for (i = 0; i < ARRAY_SIZE(asp1_regs); ++i) {
+               ret = regmap_read(cs35l56_base->regmap, asp1_regs[i].reg, &asp1_regs[i].def);
+               if (ret)
+                       goto err;
+       }
+
+       /* Write the values cache-bypassed so that they will be written to silicon */
+       ret = regmap_multi_reg_write_bypassed(cs35l56_base->regmap, asp1_regs,
+                                             ARRAY_SIZE(asp1_regs));
+       if (ret)
+               goto err;
+
+       return 0;
+
+err:
+       dev_err(cs35l56_base->dev, "Failed to sync ASP1 registers: %d\n", ret);
+
+       return ret;
+}
+EXPORT_SYMBOL_NS_GPL(cs35l56_force_sync_asp1_registers_from_cache, SND_SOC_CS35L56_SHARED);
+
  int cs35l56_mbox_send(struct cs35l56_base *cs35l56_base, unsigned int command)
  {
         unsigned int val;
@@ -286,6 +335,7 @@ void cs35l56_wait_min_reset_pulse(void)
  EXPORT_SYMBOL_NS_GPL(cs35l56_wait_min_reset_pulse, SND_SOC_CS35L56_SHARED);
  
  static const struct reg_sequence cs35l56_system_reset_seq[] = {
+       REG_SEQ0(CS35L56_DSP1_HALO_STATE, 0),
         REG_SEQ0(CS35L56_DSP_VIRTUAL1_MBOX_1, CS35L56_MBOX_CMD_SYSTEM_RESET),
  };
  
@@ -400,17 +450,6 @@ int cs35l56_is_fw_reload_needed(struct cs35l56_base *cs35l56_base)
         unsigned int val;
         int ret;
  
-       /* Nothing to re-patch if we haven't done any patching yet. */
-       if (!cs35l56_base->fw_patched)
-               return false;
-
-       /*
-        * If we have control of RESET we will have asserted it so the firmware
-        * will need re-patching.
-        */
-       if (cs35l56_base->reset_gpio)
-               return true;
-
         /*
          * In secure mode FIRMWARE_MISSING is cleared by the BIOS loader so
          * can't be used here to test for memory retention.
@@ -590,10 +629,35 @@ void cs35l56_init_cs_dsp(struct cs35l56_base *cs35l56_base, struct cs_dsp *cs_ds
  }
  EXPORT_SYMBOL_NS_GPL(cs35l56_init_cs_dsp, SND_SOC_CS35L56_SHARED);
  
+int cs35l56_read_prot_status(struct cs35l56_base *cs35l56_base,
+                            bool *fw_missing, unsigned int *fw_version)
+{
+       unsigned int prot_status;
+       int ret;
+
+       ret = regmap_read(cs35l56_base->regmap, CS35L56_PROTECTION_STATUS, &prot_status);
+       if (ret) {
+               dev_err(cs35l56_base->dev, "Get PROTECTION_STATUS failed: %d\n", ret);
+               return ret;
+       }
+
+       *fw_missing = !!(prot_status & CS35L56_FIRMWARE_MISSING);
+
+       ret = regmap_read(cs35l56_base->regmap, CS35L56_DSP1_FW_VER, fw_version);
+       if (ret) {
+               dev_err(cs35l56_base->dev, "Get FW VER failed: %d\n", ret);
+               return ret;
+       }
+
+       return 0;
+}
+EXPORT_SYMBOL_NS_GPL(cs35l56_read_prot_status, SND_SOC_CS35L56_SHARED);
+
  int cs35l56_hw_init(struct cs35l56_base *cs35l56_base)
  {
         int ret;
-       unsigned int devid, revid, otpid, secured;
+       unsigned int devid, revid, otpid, secured, fw_ver;
+       bool fw_missing;
  
         /*
          * When the system is not using a reset_gpio ensure the device is
@@ -652,8 +716,13 @@ int cs35l56_hw_init(struct cs35l56_base *cs35l56_base)
                 return ret;
         }
  
-       dev_info(cs35l56_base->dev, "Cirrus Logic CS35L56%s Rev %02X OTP%d\n",
-                cs35l56_base->secured ? "s" : "", cs35l56_base->rev, otpid);
+       ret = cs35l56_read_prot_status(cs35l56_base, &fw_missing, &fw_ver);
+       if (ret)
+               return ret;
+
+       dev_info(cs35l56_base->dev, "Cirrus Logic CS35L56%s Rev %02X OTP%d fw:%d.%d.%d (patched=%u)\n",
+                cs35l56_base->secured ? "s" : "", cs35l56_base->rev, otpid,
+                fw_ver >> 16, (fw_ver >> 8) & 0xff, fw_ver & 0xff, !fw_missing);
  
         /* Wake source and *_BLOCKED interrupts default to unmasked, so mask them */
         regmap_write(cs35l56_base->regmap, CS35L56_IRQ1_MASK_20, 0xffffffff);
@@ -668,6 +737,41 @@ int cs35l56_hw_init(struct cs35l56_base *cs35l56_base)
  }
  EXPORT_SYMBOL_NS_GPL(cs35l56_hw_init, SND_SOC_CS35L56_SHARED);
  
+int cs35l56_get_speaker_id(struct cs35l56_base *cs35l56_base)
+{
+       struct gpio_descs *descs;
+       int speaker_id;
+       int i, ret;
+
+       /* Read the speaker type qualifier from the motherboard GPIOs */
+       descs = gpiod_get_array_optional(cs35l56_base->dev, "spk-id", GPIOD_IN);
+       if (!descs) {
+               return -ENOENT;
+       } else if (IS_ERR(descs)) {
+               ret = PTR_ERR(descs);
+               return dev_err_probe(cs35l56_base->dev, ret, "Failed to get spk-id-gpios\n");
+       }
+
+       speaker_id = 0;
+       for (i = 0; i < descs->ndescs; i++) {
+               ret = gpiod_get_value_cansleep(descs->desc[i]);
+               if (ret < 0) {
+                       dev_err_probe(cs35l56_base->dev, ret, "Failed to read spk-id[%d]\n", i);
+                       goto err;
+               }
+
+               speaker_id |= (ret << i);
+       }
+
+       dev_dbg(cs35l56_base->dev, "Speaker ID = %d\n", speaker_id);
+       ret = speaker_id;
+err:
+       gpiod_put_array(descs);
+
+       return ret;
+}
+EXPORT_SYMBOL_NS_GPL(cs35l56_get_speaker_id, SND_SOC_CS35L56_SHARED);
+
  static const u32 cs35l56_bclk_valid_for_pll_freq_table[] = {
         [0x0C] = 128000,
         [0x0F] = 256000,
diff --git a/sound/soc/codecs/cs35l56.c b/sound/soc/codecs/cs35l56.c

index 45b4de3eff94ffef7ba6ef8ffcccf11747f74415..6dd0319bc843cf5d1740222e57c380881b4b5b05 100644 (file)
--- a/sound/soc/codecs/cs35l56.c
+++ b/sound/soc/codecs/cs35l56.c
@@ -5,6 +5,7 @@
  // Copyright (C) 2023 Cirrus Logic, Inc. and
  //                    Cirrus Logic International Semiconductor Ltd.
  
+#include <linux/acpi.h>
  #include <linux/completion.h>
  #include <linux/debugfs.h>
  #include <linux/delay.h>
@@ -15,6 +16,7 @@
  #include <linux/module.h>
  #include <linux/pm.h>
  #include <linux/pm_runtime.h>
+#include <linux/property.h>
  #include <linux/regmap.h>
  #include <linux/regulator/consumer.h>
  #include <linux/slab.h>
@@ -59,6 +61,131 @@ static int cs35l56_dspwait_put_volsw(struct snd_kcontrol *kcontrol,
         return snd_soc_put_volsw(kcontrol, ucontrol);
  }
  
+static const unsigned short cs35l56_asp1_mixer_regs[] = {
+       CS35L56_ASP1TX1_INPUT, CS35L56_ASP1TX2_INPUT,
+       CS35L56_ASP1TX3_INPUT, CS35L56_ASP1TX4_INPUT,
+};
+
+static const char * const cs35l56_asp1_mux_control_names[] = {
+       "ASP1 TX1 Source", "ASP1 TX2 Source", "ASP1 TX3 Source", "ASP1 TX4 Source"
+};
+
+static int cs35l56_sync_asp1_mixer_widgets_with_firmware(struct cs35l56_private *cs35l56)
+{
+       struct snd_soc_dapm_context *dapm = snd_soc_component_get_dapm(cs35l56->component);
+       const char *prefix = cs35l56->component->name_prefix;
+       char full_name[SNDRV_CTL_ELEM_ID_NAME_MAXLEN];
+       const char *name;
+       struct snd_kcontrol *kcontrol;
+       struct soc_enum *e;
+       unsigned int val[4];
+       int i, item, ret;
+
+       if (cs35l56->asp1_mixer_widgets_initialized)
+               return 0;
+
+       /*
+        * Resume so we can read the registers from silicon if the regmap
+        * cache has not yet been populated.
+        */
+       ret = pm_runtime_resume_and_get(cs35l56->base.dev);
+       if (ret < 0)
+               return ret;
+
+       /* Wait for firmware download and reboot */
+       cs35l56_wait_dsp_ready(cs35l56);
+
+       ret = regmap_bulk_read(cs35l56->base.regmap, CS35L56_ASP1TX1_INPUT,
+                              val, ARRAY_SIZE(val));
+
+       pm_runtime_mark_last_busy(cs35l56->base.dev);
+       pm_runtime_put_autosuspend(cs35l56->base.dev);
+
+       if (ret) {
+               dev_err(cs35l56->base.dev, "Failed to read ASP1 mixer regs: %d\n", ret);
+               return ret;
+       }
+
+       for (i = 0; i < ARRAY_SIZE(cs35l56_asp1_mux_control_names); ++i) {
+               name = cs35l56_asp1_mux_control_names[i];
+
+               if (prefix) {
+                       snprintf(full_name, sizeof(full_name), "%s %s", prefix, name);
+                       name = full_name;
+               }
+
+               kcontrol = snd_soc_card_get_kcontrol_locked(dapm->card, name);
+               if (!kcontrol) {
+                       dev_warn(cs35l56->base.dev, "Could not find control %s\n", name);
+                       continue;
+               }
+
+               e = (struct soc_enum *)kcontrol->private_value;
+               item = snd_soc_enum_val_to_item(e, val[i] & CS35L56_ASP_TXn_SRC_MASK);
+               snd_soc_dapm_mux_update_power(dapm, kcontrol, item, e, NULL);
+       }
+
+       cs35l56->asp1_mixer_widgets_initialized = true;
+
+       return 0;
+}
+
+static int cs35l56_dspwait_asp1tx_get(struct snd_kcontrol *kcontrol,
+                                     struct snd_ctl_elem_value *ucontrol)
+{
+       struct snd_soc_component *component = snd_soc_dapm_kcontrol_component(kcontrol);
+       struct cs35l56_private *cs35l56 = snd_soc_component_get_drvdata(component);
+       struct soc_enum *e = (struct soc_enum *)kcontrol->private_value;
+       int index = e->shift_l;
+       unsigned int addr, val;
+       int ret;
+
+       ret = cs35l56_sync_asp1_mixer_widgets_with_firmware(cs35l56);
+       if (ret)
+               return ret;
+
+       addr = cs35l56_asp1_mixer_regs[index];
+       ret = regmap_read(cs35l56->base.regmap, addr, &val);
+       if (ret)
+               return ret;
+
+       val &= CS35L56_ASP_TXn_SRC_MASK;
+       ucontrol->value.enumerated.item[0] = snd_soc_enum_val_to_item(e, val);
+
+       return 0;
+}
+
+static int cs35l56_dspwait_asp1tx_put(struct snd_kcontrol *kcontrol,
+                                     struct snd_ctl_elem_value *ucontrol)
+{
+       struct snd_soc_component *component = snd_soc_dapm_kcontrol_component(kcontrol);
+       struct snd_soc_dapm_context *dapm = snd_soc_dapm_kcontrol_dapm(kcontrol);
+       struct cs35l56_private *cs35l56 = snd_soc_component_get_drvdata(component);
+       struct soc_enum *e = (struct soc_enum *)kcontrol->private_value;
+       int item = ucontrol->value.enumerated.item[0];
+       int index = e->shift_l;
+       unsigned int addr, val;
+       bool changed;
+       int ret;
+
+       ret = cs35l56_sync_asp1_mixer_widgets_with_firmware(cs35l56);
+       if (ret)
+               return ret;
+
+       addr = cs35l56_asp1_mixer_regs[index];
+       val = snd_soc_enum_item_to_val(e, item);
+
+       ret = regmap_update_bits_check(cs35l56->base.regmap, addr,
+                                      CS35L56_ASP_TXn_SRC_MASK, val, &changed);
+       if (ret)
+               return ret;
+
+       if (changed)
+               snd_soc_dapm_mux_update_power(dapm, kcontrol, item, e, NULL);
+
+       return changed;
+}
+
  static DECLARE_TLV_DB_SCALE(vol_tlv, -10000, 25, 0);
  
  static const struct snd_kcontrol_new cs35l56_controls[] = {
@@ -77,40 +204,44 @@ static const struct snd_kcontrol_new cs35l56_controls[] = {
  };
  
  static SOC_VALUE_ENUM_SINGLE_DECL(cs35l56_asp1tx1_enum,
-                                 CS35L56_ASP1TX1_INPUT,
-                                 0, CS35L56_ASP_TXn_SRC_MASK,
+                                 SND_SOC_NOPM,
+                                 0, 0,
                                   cs35l56_tx_input_texts,
                                   cs35l56_tx_input_values);
  
  static const struct snd_kcontrol_new asp1_tx1_mux =
-       SOC_DAPM_ENUM("ASP1TX1 SRC", cs35l56_asp1tx1_enum);
+       SOC_DAPM_ENUM_EXT("ASP1TX1 SRC", cs35l56_asp1tx1_enum,
+                         cs35l56_dspwait_asp1tx_get, cs35l56_dspwait_asp1tx_put);
  
  static SOC_VALUE_ENUM_SINGLE_DECL(cs35l56_asp1tx2_enum,
-                                 CS35L56_ASP1TX2_INPUT,
-                                 0, CS35L56_ASP_TXn_SRC_MASK,
+                                 SND_SOC_NOPM,
+                                 1, 0,
                                   cs35l56_tx_input_texts,
                                   cs35l56_tx_input_values);
  
  static const struct snd_kcontrol_new asp1_tx2_mux =
-       SOC_DAPM_ENUM("ASP1TX2 SRC", cs35l56_asp1tx2_enum);
+       SOC_DAPM_ENUM_EXT("ASP1TX2 SRC", cs35l56_asp1tx2_enum,
+                         cs35l56_dspwait_asp1tx_get, cs35l56_dspwait_asp1tx_put);
  
  static SOC_VALUE_ENUM_SINGLE_DECL(cs35l56_asp1tx3_enum,
-                                 CS35L56_ASP1TX3_INPUT,
-                                 0, CS35L56_ASP_TXn_SRC_MASK,
+                                 SND_SOC_NOPM,
+                                 2, 0,
                                   cs35l56_tx_input_texts,
                                   cs35l56_tx_input_values);
  
  static const struct snd_kcontrol_new asp1_tx3_mux =
-       SOC_DAPM_ENUM("ASP1TX3 SRC", cs35l56_asp1tx3_enum);
+       SOC_DAPM_ENUM_EXT("ASP1TX3 SRC", cs35l56_asp1tx3_enum,
+                         cs35l56_dspwait_asp1tx_get, cs35l56_dspwait_asp1tx_put);
  
  static SOC_VALUE_ENUM_SINGLE_DECL(cs35l56_asp1tx4_enum,
-                                 CS35L56_ASP1TX4_INPUT,
-                                 0, CS35L56_ASP_TXn_SRC_MASK,
+                                 SND_SOC_NOPM,
+                                 3, 0,
                                   cs35l56_tx_input_texts,
                                   cs35l56_tx_input_values);
  
  static const struct snd_kcontrol_new asp1_tx4_mux =
-       SOC_DAPM_ENUM("ASP1TX4 SRC", cs35l56_asp1tx4_enum);
+       SOC_DAPM_ENUM_EXT("ASP1TX4 SRC", cs35l56_asp1tx4_enum,
+                         cs35l56_dspwait_asp1tx_get, cs35l56_dspwait_asp1tx_put);
  
  static SOC_VALUE_ENUM_SINGLE_DECL(cs35l56_sdw1tx1_enum,
                                 CS35L56_SWIRE_DP3_CH1_INPUT,
@@ -148,6 +279,21 @@ static SOC_VALUE_ENUM_SINGLE_DECL(cs35l56_sdw1tx4_enum,
  static const struct snd_kcontrol_new sdw1_tx4_mux =
         SOC_DAPM_ENUM("SDW1TX4 SRC", cs35l56_sdw1tx4_enum);
  
+static int cs35l56_asp1_cfg_event(struct snd_soc_dapm_widget *w,
+                                 struct snd_kcontrol *kcontrol, int event)
+{
+       struct snd_soc_component *component = snd_soc_dapm_to_component(w->dapm);
+       struct cs35l56_private *cs35l56 = snd_soc_component_get_drvdata(component);
+
+       switch (event) {
+       case SND_SOC_DAPM_PRE_PMU:
+               /* Override register values set by firmware boot */
+               return cs35l56_force_sync_asp1_registers_from_cache(&cs35l56->base);
+       default:
+               return 0;
+       }
+}
+
  static int cs35l56_play_event(struct snd_soc_dapm_widget *w,
                               struct snd_kcontrol *kcontrol, int event)
  {
@@ -184,6 +330,9 @@ static const struct snd_soc_dapm_widget cs35l56_dapm_widgets[] = {
         SND_SOC_DAPM_REGULATOR_SUPPLY("VDD_B", 0, 0),
         SND_SOC_DAPM_REGULATOR_SUPPLY("VDD_AMP", 0, 0),
  
+       SND_SOC_DAPM_SUPPLY("ASP1 CFG", SND_SOC_NOPM, 0, 0, cs35l56_asp1_cfg_event,
+                           SND_SOC_DAPM_PRE_PMU),
+
         SND_SOC_DAPM_SUPPLY("PLAY", SND_SOC_NOPM, 0, 0, cs35l56_play_event,
                             SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMU | SND_SOC_DAPM_POST_PMD),
  
@@ -251,6 +400,9 @@ static const struct snd_soc_dapm_route cs35l56_audio_map[] = {
         { "AMP", NULL, "VDD_B" },
         { "AMP", NULL, "VDD_AMP" },
  
+       { "ASP1 Playback", NULL, "ASP1 CFG" },
+       { "ASP1 Capture", NULL, "ASP1 CFG" },
+
         { "ASP1 Playback", NULL, "PLAY" },
         { "SDW1 Playback", NULL, "PLAY" },
  
@@ -650,7 +802,7 @@ static struct snd_soc_dai_driver cs35l56_dai[] = {
         }
  };
  
-static void cs35l56_secure_patch(struct cs35l56_private *cs35l56)
+static void cs35l56_reinit_patch(struct cs35l56_private *cs35l56)
  {
         int ret;
  
@@ -662,19 +814,10 @@ static void cs35l56_secure_patch(struct cs35l56_private *cs35l56)
                 cs35l56_mbox_send(&cs35l56->base, CS35L56_MBOX_CMD_AUDIO_REINIT);
  }
  
-static void cs35l56_patch(struct cs35l56_private *cs35l56)
+static void cs35l56_patch(struct cs35l56_private *cs35l56, bool firmware_missing)
  {
-       unsigned int firmware_missing;
         int ret;
  
-       ret = regmap_read(cs35l56->base.regmap, CS35L56_PROTECTION_STATUS, &firmware_missing);
-       if (ret) {
-               dev_err(cs35l56->base.dev, "Failed to read PROTECTION_STATUS: %d\n", ret);
-               return;
-       }
-
-       firmware_missing &= CS35L56_FIRMWARE_MISSING;
-
         /*
          * Disable SoundWire interrupts to prevent race with IRQ work.
          * Setting sdw_irq_no_unmask prevents the handler re-enabling
@@ -747,23 +890,51 @@ static void cs35l56_dsp_work(struct work_struct *work)
         struct cs35l56_private *cs35l56 = container_of(work,
                                                        struct cs35l56_private,
                                                        dsp_work);
+       unsigned int firmware_version;
+       bool firmware_missing;
+       int ret;
  
         if (!cs35l56->base.init_done)
                 return;
  
         pm_runtime_get_sync(cs35l56->base.dev);
  
+       ret = cs35l56_read_prot_status(&cs35l56->base, &firmware_missing, &firmware_version);
+       if (ret)
+               goto err;
+
+       /* Populate fw file qualifier with the revision and security state */
+       kfree(cs35l56->dsp.fwf_name);
+       if (firmware_missing) {
+               cs35l56->dsp.fwf_name = kasprintf(GFP_KERNEL, "%02x-dsp1", cs35l56->base.rev);
+       } else {
+               /* Firmware files must match the running firmware version */
+               cs35l56->dsp.fwf_name = kasprintf(GFP_KERNEL,
+                                                 "%02x%s-%06x-dsp1",
+                                                 cs35l56->base.rev,
+                                                 cs35l56->base.secured ? "-s" : "",
+                                                 firmware_version);
+       }
+
+       if (!cs35l56->dsp.fwf_name)
+               goto err;
+
+       dev_dbg(cs35l56->base.dev, "DSP fwf name: '%s' system name: '%s'\n",
+               cs35l56->dsp.fwf_name, cs35l56->dsp.system_name);
+
         /*
-        * When the device is running in secure mode the firmware files can
-        * only contain insecure tunings and therefore we do not need to
-        * shutdown the firmware to apply them and can use the lower cost
-        * reinit sequence instead.
+        * The firmware cannot be patched if it is already running from
+        * patch RAM. In this case the firmware files are versioned to
+        * match the running firmware version and will only contain
+        * tunings. We do not need to shutdown the firmware to apply
+        * tunings so can use the lower cost reinit sequence instead.
          */
-       if (cs35l56->base.secured)
-               cs35l56_secure_patch(cs35l56);
+       if (!firmware_missing)
+               cs35l56_reinit_patch(cs35l56);
         else
-               cs35l56_patch(cs35l56);
+               cs35l56_patch(cs35l56, firmware_missing);
  
+err:
         pm_runtime_mark_last_busy(cs35l56->base.dev);
         pm_runtime_put_autosuspend(cs35l56->base.dev);
  }
@@ -778,10 +949,19 @@ static int cs35l56_component_probe(struct snd_soc_component *component)
  
         if (!cs35l56->dsp.system_name &&
             (snd_soc_card_get_pci_ssid(component->card, &vendor, &device) == 0)) {
-               cs35l56->dsp.system_name = devm_kasprintf(cs35l56->base.dev,
-                                                         GFP_KERNEL,
-                                                         "%04x%04x",
-                                                         vendor, device);
+               /* Append a speaker qualifier if there is a speaker ID */
+               if (cs35l56->speaker_id >= 0) {
+                       cs35l56->dsp.system_name = devm_kasprintf(cs35l56->base.dev,
+                                                                 GFP_KERNEL,
+                                                                 "%04x%04x-spkid%d",
+                                                                 vendor, device,
+                                                                 cs35l56->speaker_id);
+               } else {
+                       cs35l56->dsp.system_name = devm_kasprintf(cs35l56->base.dev,
+                                                                 GFP_KERNEL,
+                                                                 "%04x%04x",
+                                                                 vendor, device);
+               }
                 if (!cs35l56->dsp.system_name)
                         return -ENOMEM;
         }
@@ -799,6 +979,13 @@ static int cs35l56_component_probe(struct snd_soc_component *component)
         debugfs_create_bool("can_hibernate", 0444, debugfs_root, &cs35l56->base.can_hibernate);
         debugfs_create_bool("fw_patched", 0444, debugfs_root, &cs35l56->base.fw_patched);
  
+       /*
+        * The widgets for the ASP1TX mixer can't be initialized
+        * until the firmware has been downloaded and rebooted.
+        */
+       regcache_drop_region(cs35l56->base.regmap, CS35L56_ASP1TX1_INPUT, CS35L56_ASP1TX4_INPUT);
+       cs35l56->asp1_mixer_widgets_initialized = false;
+
         queue_work(cs35l56->dsp_wq, &cs35l56->dsp_work);
  
         return 0;
@@ -809,6 +996,16 @@ static void cs35l56_component_remove(struct snd_soc_component *component)
         struct cs35l56_private *cs35l56 = snd_soc_component_get_drvdata(component);
  
         cancel_work_sync(&cs35l56->dsp_work);
+
+       if (cs35l56->dsp.cs_dsp.booted)
+               wm_adsp_power_down(&cs35l56->dsp);
+
+       wm_adsp2_component_remove(&cs35l56->dsp, component);
+
+       kfree(cs35l56->dsp.fwf_name);
+       cs35l56->dsp.fwf_name = NULL;
+
+       cs35l56->component = NULL;
  }
  
  static int cs35l56_set_bias_level(struct snd_soc_component *component,
@@ -1050,7 +1247,13 @@ static int cs35l56_get_firmware_uid(struct cs35l56_private *cs35l56)
         if (ret < 0)
                 return 0;
  
-       cs35l56->dsp.system_name = devm_kstrdup(dev, prop, GFP_KERNEL);
+       /* Append a speaker qualifier if there is a speaker ID */
+       if (cs35l56->speaker_id >= 0)
+               cs35l56->dsp.system_name = devm_kasprintf(dev, GFP_KERNEL, "%s-spkid%d",
+                                                         prop, cs35l56->speaker_id);
+       else
+               cs35l56->dsp.system_name = devm_kstrdup(dev, prop, GFP_KERNEL);
+
         if (cs35l56->dsp.system_name == NULL)
                 return -ENOMEM;
  
@@ -1059,12 +1262,101 @@ static int cs35l56_get_firmware_uid(struct cs35l56_private *cs35l56)
         return 0;
  }
  
+/*
+ * Some SoundWire laptops have a spk-id-gpios property but it points to
+ * the wrong ACPI Device node so can't be used to get the GPIO. Try to
+ * find the SDCA node containing the GpioIo resource and add a GPIO
+ * mapping to it.
+ */
+static const struct acpi_gpio_params cs35l56_af01_first_gpio = { 0, 0, false };
+static const struct acpi_gpio_mapping cs35l56_af01_spkid_gpios_mapping[] = {
+       { "spk-id-gpios", &cs35l56_af01_first_gpio, 1 },
+       { }
+};
+
+static void cs35l56_acpi_dev_release_driver_gpios(void *adev)
+{
+       acpi_dev_remove_driver_gpios(adev);
+}
+
+static int cs35l56_try_get_broken_sdca_spkid_gpio(struct cs35l56_private *cs35l56)
+{
+       struct fwnode_handle *af01_fwnode;
+       const union acpi_object *obj;
+       struct gpio_desc *desc;
+       int ret;
+
+       /* Find the SDCA node containing the GpioIo */
+       af01_fwnode = device_get_named_child_node(cs35l56->base.dev, "AF01");
+       if (!af01_fwnode) {
+               dev_dbg(cs35l56->base.dev, "No AF01 node\n");
+               return -ENOENT;
+       }
+
+       ret = acpi_dev_get_property(ACPI_COMPANION(cs35l56->base.dev),
+                                   "spk-id-gpios", ACPI_TYPE_PACKAGE, &obj);
+       if (ret) {
+               dev_dbg(cs35l56->base.dev, "Could not get spk-id-gpios package: %d\n", ret);
+               return -ENOENT;
+       }
+
+       /* The broken properties we can handle are a 4-element package (one GPIO) */
+       if (obj->package.count != 4) {
+               dev_warn(cs35l56->base.dev, "Unexpected spk-id element count %d\n",
+                        obj->package.count);
+               return -ENOENT;
+       }
+
+       /* Add a GPIO mapping if it doesn't already have one */
+       if (!fwnode_property_present(af01_fwnode, "spk-id-gpios")) {
+               struct acpi_device *adev = to_acpi_device_node(af01_fwnode);
+
+               /*
+                * Can't use devm_acpi_dev_add_driver_gpios() because the
+                * mapping isn't being added to the node pointed to by
+                * ACPI_COMPANION().
+                */
+               ret = acpi_dev_add_driver_gpios(adev, cs35l56_af01_spkid_gpios_mapping);
+               if (ret) {
+                       return dev_err_probe(cs35l56->base.dev, ret,
+                                            "Failed to add gpio mapping to AF01\n");
+               }
+
+               ret = devm_add_action_or_reset(cs35l56->base.dev,
+                                              cs35l56_acpi_dev_release_driver_gpios,
+                                              adev);
+               if (ret)
+                       return ret;
+
+               dev_dbg(cs35l56->base.dev, "Added spk-id-gpios mapping to AF01\n");
+       }
+
+       desc = fwnode_gpiod_get_index(af01_fwnode, "spk-id", 0, GPIOD_IN, NULL);
+       if (IS_ERR(desc)) {
+               ret = PTR_ERR(desc);
+               return dev_err_probe(cs35l56->base.dev, ret, "Get GPIO from AF01 failed\n");
+       }
+
+       ret = gpiod_get_value_cansleep(desc);
+       gpiod_put(desc);
+
+       if (ret < 0) {
+               dev_err_probe(cs35l56->base.dev, ret, "Error reading spk-id GPIO\n");
+               return ret;
+               }
+
+       dev_info(cs35l56->base.dev, "Got spk-id from AF01\n");
+
+       return ret;
+}
+
  int cs35l56_common_probe(struct cs35l56_private *cs35l56)
  {
         int ret;
  
         init_completion(&cs35l56->init_completion);
         mutex_init(&cs35l56->base.irq_lock);
+       cs35l56->speaker_id = -ENOENT;
  
         dev_set_drvdata(cs35l56->base.dev, cs35l56);
  
@@ -1101,6 +1393,15 @@ int cs35l56_common_probe(struct cs35l56_private *cs35l56)
                 gpiod_set_value_cansleep(cs35l56->base.reset_gpio, 1);
         }
  
+       ret = cs35l56_get_speaker_id(&cs35l56->base);
+       if (ACPI_COMPANION(cs35l56->base.dev) && cs35l56->sdw_peripheral && (ret == -ENOENT))
+               ret = cs35l56_try_get_broken_sdca_spkid_gpio(cs35l56);
+
+       if ((ret < 0) && (ret != -ENOENT))
+               goto err;
+
+       cs35l56->speaker_id = ret;
+
         ret = cs35l56_get_firmware_uid(cs35l56);
         if (ret != 0)
                 goto err;
@@ -1152,11 +1453,9 @@ int cs35l56_init(struct cs35l56_private *cs35l56)
         if (ret < 0)
                 return ret;
  
-       /* Populate the DSP information with the revision and security state */
-       cs35l56->dsp.part = devm_kasprintf(cs35l56->base.dev, GFP_KERNEL, "cs35l56%s-%02x",
-                                          cs35l56->base.secured ? "s" : "", cs35l56->base.rev);
-       if (!cs35l56->dsp.part)
-               return -ENOMEM;
+       ret = cs35l56_set_patch(&cs35l56->base);
+       if (ret)
+               return ret;
  
         if (!cs35l56->base.reset_gpio) {
                 dev_dbg(cs35l56->base.dev, "No reset gpio: using soft reset\n");
@@ -1190,10 +1489,6 @@ post_soft_reset:
         if (ret)
                 return ret;
  
-       ret = cs35l56_set_patch(&cs35l56->base);
-       if (ret)
-               return ret;
-
         /* Registers could be dirty after soft reset or SoundWire enumeration */
         regcache_sync(cs35l56->base.regmap);
  
diff --git a/sound/soc/codecs/cs35l56.h b/sound/soc/codecs/cs35l56.h

index 8159c3e217d936c02baf88c5659a99e4f3159ddd..b000e7365e4065eaffc3b3ea2aa2714b9acccdfd 100644 (file)
--- a/sound/soc/codecs/cs35l56.h
+++ b/sound/soc/codecs/cs35l56.h
@@ -44,12 +44,14 @@ struct cs35l56_private {
         bool sdw_attached;
         struct completion init_completion;
  
+       int speaker_id;
         u32 rx_mask;
         u32 tx_mask;
         u8 asp_slot_width;
         u8 asp_slot_count;
         bool tdm_mode;
         bool sysclk_set;
+       bool asp1_mixer_widgets_initialized;
         u8 old_sdw_clock_scale;
  };
  
diff --git a/sound/soc/codecs/cs42l43.c b/sound/soc/codecs/cs42l43.c

index 6a64681767de8122d06bee23b5a21674d930bf57..a97ccb512deba86305ff433fa5b639fdfbb8dcbe 100644 (file)
--- a/sound/soc/codecs/cs42l43.c
+++ b/sound/soc/codecs/cs42l43.c
@@ -2257,7 +2257,10 @@ static int cs42l43_codec_probe(struct platform_device *pdev)
         pm_runtime_use_autosuspend(priv->dev);
         pm_runtime_set_active(priv->dev);
         pm_runtime_get_noresume(priv->dev);
-       devm_pm_runtime_enable(priv->dev);
+
+       ret = devm_pm_runtime_enable(priv->dev);
+       if (ret)
+               goto err_pm;
  
         for (i = 0; i < ARRAY_SIZE(cs42l43_irqs); i++) {
                 ret = cs42l43_request_irq(priv, dom, cs42l43_irqs[i].name,
@@ -2333,8 +2336,47 @@ static int cs42l43_codec_runtime_resume(struct device *dev)
         return 0;
  }
  
-static DEFINE_RUNTIME_DEV_PM_OPS(cs42l43_codec_pm_ops, NULL,
-                                cs42l43_codec_runtime_resume, NULL);
+static int cs42l43_codec_suspend(struct device *dev)
+{
+       struct cs42l43 *cs42l43 = dev_get_drvdata(dev);
+
+       disable_irq(cs42l43->irq);
+
+       return 0;
+}
+
+static int cs42l43_codec_suspend_noirq(struct device *dev)
+{
+       struct cs42l43 *cs42l43 = dev_get_drvdata(dev);
+
+       enable_irq(cs42l43->irq);
+
+       return 0;
+}
+
+static int cs42l43_codec_resume(struct device *dev)
+{
+       struct cs42l43 *cs42l43 = dev_get_drvdata(dev);
+
+       enable_irq(cs42l43->irq);
+
+       return 0;
+}
+
+static int cs42l43_codec_resume_noirq(struct device *dev)
+{
+       struct cs42l43 *cs42l43 = dev_get_drvdata(dev);
+
+       disable_irq(cs42l43->irq);
+
+       return 0;
+}
+
+static const struct dev_pm_ops cs42l43_codec_pm_ops = {
+       SYSTEM_SLEEP_PM_OPS(cs42l43_codec_suspend, cs42l43_codec_resume)
+       NOIRQ_SYSTEM_SLEEP_PM_OPS(cs42l43_codec_suspend_noirq, cs42l43_codec_resume_noirq)
+       RUNTIME_PM_OPS(NULL, cs42l43_codec_runtime_resume, NULL)
+};
  
  static const struct platform_device_id cs42l43_codec_id_table[] = {
         { "cs42l43-codec", },
diff --git a/sound/soc/codecs/es8326.c b/sound/soc/codecs/es8326.c

old mode 100755 (executable)

new mode 100644 (file)

index fa890f6..cbcd02e
--- a/sound/soc/codecs/es8326.c
+++ b/sound/soc/codecs/es8326.c
@@ -45,6 +45,82 @@ struct es8326_priv {
         int jack_remove_retry;
  };
  
+static int es8326_crosstalk1_get(struct snd_kcontrol *kcontrol,
+               struct snd_ctl_elem_value *ucontrol)
+{
+       struct snd_soc_component *component = snd_kcontrol_chip(kcontrol);
+       struct es8326_priv *es8326 = snd_soc_component_get_drvdata(component);
+       unsigned int crosstalk_h, crosstalk_l;
+       unsigned int crosstalk;
+
+       regmap_read(es8326->regmap, ES8326_DAC_RAMPRATE, &crosstalk_h);
+       regmap_read(es8326->regmap, ES8326_DAC_CROSSTALK, &crosstalk_l);
+       crosstalk_h &= 0x20;
+       crosstalk_l &= 0xf0;
+       crosstalk = crosstalk_h >> 1 | crosstalk_l >> 4;
+       ucontrol->value.integer.value[0] = crosstalk;
+
+       return 0;
+}
+
+static int es8326_crosstalk1_set(struct snd_kcontrol *kcontrol,
+               struct snd_ctl_elem_value *ucontrol)
+{
+       struct snd_soc_component *component = snd_kcontrol_chip(kcontrol);
+       struct es8326_priv *es8326 = snd_soc_component_get_drvdata(component);
+       unsigned int crosstalk_h, crosstalk_l;
+       unsigned int crosstalk;
+
+       crosstalk = ucontrol->value.integer.value[0];
+       regmap_read(es8326->regmap, ES8326_DAC_CROSSTALK, &crosstalk_l);
+       crosstalk_h = (crosstalk & 0x10) << 1;
+       crosstalk_l &= 0x0f;
+       crosstalk_l |= (crosstalk & 0x0f) << 4;
+       regmap_update_bits(es8326->regmap, ES8326_DAC_RAMPRATE,
+                       0x20, crosstalk_h);
+       regmap_write(es8326->regmap, ES8326_DAC_CROSSTALK, crosstalk_l);
+
+       return 0;
+}
+
+static int es8326_crosstalk2_get(struct snd_kcontrol *kcontrol,
+               struct snd_ctl_elem_value *ucontrol)
+{
+       struct snd_soc_component *component = snd_kcontrol_chip(kcontrol);
+       struct es8326_priv *es8326 = snd_soc_component_get_drvdata(component);
+       unsigned int crosstalk_h, crosstalk_l;
+       unsigned int crosstalk;
+
+       regmap_read(es8326->regmap, ES8326_DAC_RAMPRATE, &crosstalk_h);
+       regmap_read(es8326->regmap, ES8326_DAC_CROSSTALK, &crosstalk_l);
+       crosstalk_h &= 0x10;
+       crosstalk_l &= 0x0f;
+       crosstalk = crosstalk_h  | crosstalk_l;
+       ucontrol->value.integer.value[0] = crosstalk;
+
+       return 0;
+}
+
+static int es8326_crosstalk2_set(struct snd_kcontrol *kcontrol,
+               struct snd_ctl_elem_value *ucontrol)
+{
+       struct snd_soc_component *component = snd_kcontrol_chip(kcontrol);
+       struct es8326_priv *es8326 = snd_soc_component_get_drvdata(component);
+       unsigned int crosstalk_h, crosstalk_l;
+       unsigned int crosstalk;
+
+       crosstalk = ucontrol->value.integer.value[0];
+       regmap_read(es8326->regmap, ES8326_DAC_CROSSTALK, &crosstalk_l);
+       crosstalk_h = crosstalk & 0x10;
+       crosstalk_l &= 0xf0;
+       crosstalk_l |= crosstalk & 0x0f;
+       regmap_update_bits(es8326->regmap, ES8326_DAC_RAMPRATE,
+                       0x10, crosstalk_h);
+       regmap_write(es8326->regmap, ES8326_DAC_CROSSTALK, crosstalk_l);
+
+       return 0;
+}
+
  static const SNDRV_CTL_TLVD_DECLARE_DB_SCALE(dac_vol_tlv, -9550, 50, 0);
  static const SNDRV_CTL_TLVD_DECLARE_DB_SCALE(adc_vol_tlv, -9550, 50, 0);
  static const SNDRV_CTL_TLVD_DECLARE_DB_SCALE(adc_analog_pga_tlv, 0, 300, 0);
@@ -102,6 +178,10 @@ static const struct snd_kcontrol_new es8326_snd_controls[] = {
         SOC_SINGLE_TLV("ALC Capture Target Level", ES8326_ALC_LEVEL,
                         0, 0x0f, 0, drc_target_tlv),
  
+       SOC_SINGLE_EXT("CROSSTALK1", SND_SOC_NOPM, 0, 31, 0,
+                       es8326_crosstalk1_get, es8326_crosstalk1_set),
+       SOC_SINGLE_EXT("CROSSTALK2", SND_SOC_NOPM, 0, 31, 0,
+                       es8326_crosstalk2_get, es8326_crosstalk2_set),
  };
  
  static const struct snd_soc_dapm_widget es8326_dapm_widgets[] = {
@@ -117,12 +197,6 @@ static const struct snd_soc_dapm_widget es8326_dapm_widgets[] = {
         SND_SOC_DAPM_AIF_OUT("I2S OUT", "I2S1 Capture", 0, SND_SOC_NOPM, 0, 0),
         SND_SOC_DAPM_AIF_IN("I2S IN", "I2S1 Playback", 0, SND_SOC_NOPM, 0, 0),
  
-       /* ADC Digital Mute */
-       SND_SOC_DAPM_PGA("ADC L1", ES8326_ADC_MUTE, 0, 1, NULL, 0),
-       SND_SOC_DAPM_PGA("ADC R1", ES8326_ADC_MUTE, 1, 1, NULL, 0),
-       SND_SOC_DAPM_PGA("ADC L2", ES8326_ADC_MUTE, 2, 1, NULL, 0),
-       SND_SOC_DAPM_PGA("ADC R2", ES8326_ADC_MUTE, 3, 1, NULL, 0),
-
         /* Analog Power Supply*/
         SND_SOC_DAPM_DAC("Right DAC", NULL, ES8326_ANA_PDN, 0, 1),
         SND_SOC_DAPM_DAC("Left DAC", NULL, ES8326_ANA_PDN, 1, 1),
@@ -142,15 +216,10 @@ static const struct snd_soc_dapm_widget es8326_dapm_widgets[] = {
  };
  
  static const struct snd_soc_dapm_route es8326_dapm_routes[] = {
-       {"ADC L1", NULL, "MIC1"},
-       {"ADC R1", NULL, "MIC2"},
-       {"ADC L2", NULL, "MIC3"},
-       {"ADC R2", NULL, "MIC4"},
-
-       {"ADC L", NULL, "ADC L1"},
-       {"ADC R", NULL, "ADC R1"},
-       {"ADC L", NULL, "ADC L2"},
-       {"ADC R", NULL, "ADC R2"},
+       {"ADC L", NULL, "MIC1"},
+       {"ADC R", NULL, "MIC2"},
+       {"ADC L", NULL, "MIC3"},
+       {"ADC R", NULL, "MIC4"},
  
         {"I2S OUT", NULL, "ADC L"},
         {"I2S OUT", NULL, "ADC R"},
@@ -440,10 +509,16 @@ static int es8326_mute(struct snd_soc_dai *dai, int mute, int direction)
         unsigned int offset_l, offset_r;
  
         if (mute) {
-               regmap_write(es8326->regmap, ES8326_HP_CAL, ES8326_HP_OFF);
-               regmap_update_bits(es8326->regmap, ES8326_DAC_MUTE,
-                               ES8326_MUTE_MASK, ES8326_MUTE);
-               regmap_write(es8326->regmap, ES8326_HP_DRIVER, 0xf0);
+               if (direction == SNDRV_PCM_STREAM_PLAYBACK) {
+                       regmap_write(es8326->regmap, ES8326_HP_CAL, ES8326_HP_OFF);
+                       regmap_update_bits(es8326->regmap, ES8326_DAC_MUTE,
+                                       ES8326_MUTE_MASK, ES8326_MUTE);
+                       regmap_update_bits(es8326->regmap, ES8326_HP_DRIVER_REF,
+                                       0x30, 0x00);
+               } else {
+                       regmap_update_bits(es8326->regmap,  ES8326_ADC_MUTE,
+                                       0x0F, 0x0F);
+               }
         } else {
                 if (!es8326->calibrated) {
                         regmap_write(es8326->regmap, ES8326_HP_CAL, ES8326_HP_FORCE_CAL);
@@ -456,11 +531,22 @@ static int es8326_mute(struct snd_soc_dai *dai, int mute, int direction)
                         regmap_write(es8326->regmap, ES8326_HPR_OFFSET_INI, offset_r);
                         es8326->calibrated = true;
                 }
-               regmap_write(es8326->regmap, ES8326_HP_DRIVER, 0xa1);
-               regmap_write(es8326->regmap, ES8326_HP_VOL, 0x91);
-               regmap_write(es8326->regmap, ES8326_HP_CAL, ES8326_HP_ON);
-               regmap_update_bits(es8326->regmap, ES8326_DAC_MUTE,
-                               ES8326_MUTE_MASK, ~(ES8326_MUTE));
+               if (direction == SNDRV_PCM_STREAM_PLAYBACK) {
+                       regmap_update_bits(es8326->regmap, ES8326_DAC_DSM, 0x01, 0x01);
+                       usleep_range(1000, 5000);
+                       regmap_update_bits(es8326->regmap, ES8326_DAC_DSM, 0x01, 0x00);
+                       usleep_range(1000, 5000);
+                       regmap_update_bits(es8326->regmap, ES8326_HP_DRIVER_REF, 0x30, 0x20);
+                       regmap_update_bits(es8326->regmap, ES8326_HP_DRIVER_REF, 0x30, 0x30);
+                       regmap_write(es8326->regmap, ES8326_HP_DRIVER, 0xa1);
+                       regmap_write(es8326->regmap, ES8326_HP_CAL, ES8326_HP_ON);
+                       regmap_update_bits(es8326->regmap, ES8326_DAC_MUTE,
+                                       ES8326_MUTE_MASK, ~(ES8326_MUTE));
+               } else {
+                       msleep(300);
+                       regmap_update_bits(es8326->regmap,  ES8326_ADC_MUTE,
+                                       0x0F, 0x00);
+               }
         }
         return 0;
  }
@@ -477,23 +563,20 @@ static int es8326_set_bias_level(struct snd_soc_component *codec,
                 if (ret)
                         return ret;
  
-               regmap_update_bits(es8326->regmap, ES8326_DAC_DSM, 0x01, 0x00);
+               regmap_update_bits(es8326->regmap, ES8326_RESET, 0x02, 0x02);
+               usleep_range(5000, 10000);
                 regmap_write(es8326->regmap, ES8326_INTOUT_IO, es8326->interrupt_clk);
                 regmap_write(es8326->regmap, ES8326_SDINOUT1_IO,
                             (ES8326_IO_DMIC_CLK << ES8326_SDINOUT1_SHIFT));
-               regmap_write(es8326->regmap, ES8326_VMIDSEL, 0x0E);
                 regmap_write(es8326->regmap, ES8326_PGA_PDN, 0x40);
                 regmap_write(es8326->regmap, ES8326_ANA_PDN, 0x00);
                 regmap_update_bits(es8326->regmap,  ES8326_CLK_CTL, 0x20, 0x20);
-
-               regmap_update_bits(es8326->regmap, ES8326_RESET,
-                               ES8326_CSM_ON, ES8326_CSM_ON);
+               regmap_update_bits(es8326->regmap, ES8326_RESET, 0x02, 0x00);
                 break;
         case SND_SOC_BIAS_PREPARE:
                 break;
         case SND_SOC_BIAS_STANDBY:
                 regmap_write(es8326->regmap, ES8326_ANA_PDN, 0x3b);
-               regmap_write(es8326->regmap, ES8326_VMIDSEL, 0x00);
                 regmap_update_bits(es8326->regmap, ES8326_CLK_CTL, 0x20, 0x00);
                 regmap_write(es8326->regmap, ES8326_SDINOUT1_IO, ES8326_IO_INPUT);
                 break;
@@ -513,7 +596,7 @@ static const struct snd_soc_dai_ops es8326_ops = {
         .set_fmt = es8326_set_dai_fmt,
         .set_sysclk = es8326_set_dai_sysclk,
         .mute_stream = es8326_mute,
-       .no_capture_mute = 1,
+       .no_capture_mute = 0,
  };
  
  static struct snd_soc_dai_driver es8326_dai = {
@@ -672,6 +755,8 @@ static void es8326_jack_detect_handler(struct work_struct *work)
                         es8326->hp = 0;
                 }
                 regmap_update_bits(es8326->regmap, ES8326_HPDET_TYPE, 0x03, 0x01);
+               regmap_write(es8326->regmap, ES8326_SYS_BIAS, 0x0a);
+               regmap_update_bits(es8326->regmap, ES8326_HP_DRIVER_REF, 0x0f, 0x03);
                 /*
                  * Inverted HPJACK_POL bit to trigger one IRQ to double check HP Removal event
                  */
@@ -695,8 +780,11 @@ static void es8326_jack_detect_handler(struct work_struct *work)
                          * Don't report jack status.
                          */
                         regmap_update_bits(es8326->regmap, ES8326_HPDET_TYPE, 0x03, 0x01);
+                       es8326_enable_micbias(es8326->component);
                         usleep_range(50000, 70000);
                         regmap_update_bits(es8326->regmap, ES8326_HPDET_TYPE, 0x03, 0x00);
+                       regmap_write(es8326->regmap, ES8326_SYS_BIAS, 0x1f);
+                       regmap_update_bits(es8326->regmap, ES8326_HP_DRIVER_REF, 0x0f, 0x08);
                         queue_delayed_work(system_wq, &es8326->jack_detect_work,
                                         msecs_to_jiffies(400));
                         es8326->hp = 1;
@@ -736,13 +824,10 @@ exit:
  static irqreturn_t es8326_irq(int irq, void *dev_id)
  {
         struct es8326_priv *es8326 = dev_id;
-       struct snd_soc_component *comp = es8326->component;
  
         if (!es8326->jack)
                 goto out;
  
-       es8326_enable_micbias(comp);
-
         if (es8326->jack->status & SND_JACK_HEADSET)
                 queue_delayed_work(system_wq, &es8326->jack_detect_work,
                                    msecs_to_jiffies(10));
@@ -766,14 +851,14 @@ static int es8326_calibrate(struct snd_soc_component *component)
         if ((es8326->version == ES8326_VERSION_B) && (es8326->calibrated == false)) {
                 dev_dbg(component->dev, "ES8326_VERSION_B, calibrating\n");
                 regmap_write(es8326->regmap, ES8326_CLK_INV, 0xc0);
-               regmap_write(es8326->regmap, ES8326_CLK_DIV1, 0x01);
+               regmap_write(es8326->regmap, ES8326_CLK_DIV1, 0x03);
                 regmap_write(es8326->regmap, ES8326_CLK_DLL, 0x30);
                 regmap_write(es8326->regmap, ES8326_CLK_MUX, 0xed);
                 regmap_write(es8326->regmap, ES8326_CLK_DAC_SEL, 0x08);
                 regmap_write(es8326->regmap, ES8326_CLK_TRI, 0xc1);
                 regmap_write(es8326->regmap, ES8326_DAC_MUTE, 0x03);
                 regmap_write(es8326->regmap, ES8326_ANA_VSEL, 0x7f);
-               regmap_write(es8326->regmap, ES8326_VMIDLOW, 0x03);
+               regmap_write(es8326->regmap, ES8326_VMIDLOW, 0x23);
                 regmap_write(es8326->regmap, ES8326_DAC2HPMIX, 0x88);
                 usleep_range(15000, 20000);
                 regmap_write(es8326->regmap, ES8326_HP_OFFSET_CAL, 0x8c);
@@ -814,13 +899,13 @@ static int es8326_resume(struct snd_soc_component *component)
         /* reset internal clock state */
         regmap_write(es8326->regmap, ES8326_RESET, 0x1f);
         regmap_write(es8326->regmap, ES8326_VMIDSEL, 0x0E);
+       regmap_write(es8326->regmap, ES8326_ANA_LP, 0xf0);
         usleep_range(10000, 15000);
         regmap_write(es8326->regmap, ES8326_HPJACK_TIMER, 0xe9);
-       regmap_write(es8326->regmap, ES8326_ANA_MICBIAS, 0x4b);
+       regmap_write(es8326->regmap, ES8326_ANA_MICBIAS, 0xcb);
         /* set headphone default type and detect pin */
         regmap_write(es8326->regmap, ES8326_HPDET_TYPE, 0x83);
         regmap_write(es8326->regmap, ES8326_CLK_RESAMPLE, 0x05);
-       regmap_write(es8326->regmap, ES8326_HP_MISC, 0x30);
  
         /* set internal oscillator as clock source of headpone cp */
         regmap_write(es8326->regmap, ES8326_CLK_DIV_CPC, 0x89);
@@ -828,14 +913,15 @@ static int es8326_resume(struct snd_soc_component *component)
         /* clock manager reset release */
         regmap_write(es8326->regmap, ES8326_RESET, 0x17);
         /* set headphone detection as half scan mode */
-       regmap_write(es8326->regmap, ES8326_HP_MISC, 0x30);
+       regmap_write(es8326->regmap, ES8326_HP_MISC, 0x3d);
         regmap_write(es8326->regmap, ES8326_PULLUP_CTL, 0x00);
  
         /* enable headphone driver */
+       regmap_write(es8326->regmap, ES8326_HP_VOL, 0xc4);
         regmap_write(es8326->regmap, ES8326_HP_DRIVER, 0xa7);
         usleep_range(2000, 5000);
-       regmap_write(es8326->regmap, ES8326_HP_DRIVER_REF, 0xa3);
-       regmap_write(es8326->regmap, ES8326_HP_DRIVER_REF, 0xb3);
+       regmap_write(es8326->regmap, ES8326_HP_DRIVER_REF, 0x23);
+       regmap_write(es8326->regmap, ES8326_HP_DRIVER_REF, 0x33);
         regmap_write(es8326->regmap, ES8326_HP_DRIVER, 0xa1);
  
         regmap_write(es8326->regmap, ES8326_CLK_INV, 0x00);
@@ -844,6 +930,8 @@ static int es8326_resume(struct snd_soc_component *component)
         regmap_write(es8326->regmap, ES8326_CLK_CAL_TIME, 0x00);
         /* calibrate for B version */
         es8326_calibrate(component);
+       regmap_write(es8326->regmap, ES8326_DAC_CROSSTALK, 0xaa);
+       regmap_write(es8326->regmap, ES8326_DAC_RAMPRATE, 0x00);
         /* turn off headphone out */
         regmap_write(es8326->regmap, ES8326_HP_CAL, 0x00);
         /* set ADC and DAC in low power mode */
@@ -856,6 +944,14 @@ static int es8326_resume(struct snd_soc_component *component)
         regmap_write(es8326->regmap, ES8326_DAC_DSM, 0x08);
         regmap_write(es8326->regmap, ES8326_DAC_VPPSCALE, 0x15);
  
+       regmap_write(es8326->regmap, ES8326_HPDET_TYPE, 0x80 |
+                       ((es8326->version == ES8326_VERSION_B) ?
+                       (ES8326_HP_DET_SRC_PIN9 | es8326->jack_pol) :
+                       (ES8326_HP_DET_SRC_PIN9 | es8326->jack_pol | 0x04)));
+       usleep_range(5000, 10000);
+       es8326_enable_micbias(es8326->component);
+       usleep_range(50000, 70000);
+       regmap_update_bits(es8326->regmap, ES8326_HPDET_TYPE, 0x03, 0x00);
         regmap_write(es8326->regmap, ES8326_INT_SOURCE,
                     (ES8326_INT_SRC_PIN9 | ES8326_INT_SRC_BUTTON));
         regmap_write(es8326->regmap, ES8326_INTOUT_IO,
@@ -864,7 +960,7 @@ static int es8326_resume(struct snd_soc_component *component)
                     (ES8326_IO_DMIC_CLK << ES8326_SDINOUT1_SHIFT));
         regmap_write(es8326->regmap, ES8326_SDINOUT23_IO, ES8326_IO_INPUT);
  
-       regmap_write(es8326->regmap, ES8326_ANA_PDN, 0x3b);
+       regmap_write(es8326->regmap, ES8326_ANA_PDN, 0x00);
         regmap_write(es8326->regmap, ES8326_RESET, ES8326_CSM_ON);
         regmap_update_bits(es8326->regmap, ES8326_PGAGAIN, ES8326_MIC_SEL_MASK,
                            ES8326_MIC1_SEL);
@@ -872,11 +968,7 @@ static int es8326_resume(struct snd_soc_component *component)
         regmap_update_bits(es8326->regmap, ES8326_DAC_MUTE, ES8326_MUTE_MASK,
                            ES8326_MUTE);
  
-       regmap_write(es8326->regmap, ES8326_HPDET_TYPE, 0x80 |
-                       ((es8326->version == ES8326_VERSION_B) ?
-                       (ES8326_HP_DET_SRC_PIN9 | es8326->jack_pol) :
-                       (ES8326_HP_DET_SRC_PIN9 | es8326->jack_pol | 0x04)));
-       regmap_write(es8326->regmap, ES8326_HP_VOL, 0x11);
+       regmap_write(es8326->regmap, ES8326_ADC_MUTE, 0x0f);
  
         es8326->jack_remove_retry = 0;
         es8326->hp = 0;
diff --git a/sound/soc/codecs/es8326.h b/sound/soc/codecs/es8326.h

index 90a08351d6acd043b42a6ebed574591a3defc6a0..4234bbb900c4530c6746fab5f75fcb0b049e4fdb 100644 (file)
--- a/sound/soc/codecs/es8326.h
+++ b/sound/soc/codecs/es8326.h
@@ -72,6 +72,7 @@
  #define ES8326_DAC_VOL         0x50
  #define ES8326_DRC_RECOVERY    0x53
  #define ES8326_DRC_WINSIZE     0x54
+#define ES8326_DAC_CROSSTALK   0x55
  #define ES8326_HPJACK_TIMER    0x56
  #define ES8326_HPDET_TYPE      0x57
  #define ES8326_INT_SOURCE      0x58
@@ -100,7 +101,7 @@
  #define ES8326_MUTE (3 << 0)
  
  /* ES8326_CLK_CTL */
-#define ES8326_CLK_ON (0x7f << 0)
+#define ES8326_CLK_ON (0x7e << 0)
  #define ES8326_CLK_OFF (0 << 0)
  
  /* ES8326_CLK_INV */
diff --git a/sound/soc/codecs/lpass-wsa-macro.c b/sound/soc/codecs/lpass-wsa-macro.c

index 7e21cec3c2fb97a9be518b4316cdeafae2cf0776..6ce309980cd10e200dc62a1941b07f6f7728d3cd 100644 (file)
--- a/sound/soc/codecs/lpass-wsa-macro.c
+++ b/sound/soc/codecs/lpass-wsa-macro.c
@@ -1584,7 +1584,6 @@ static int wsa_macro_enable_interpolator(struct snd_soc_dapm_widget *w,
         u16 gain_reg;
         u16 reg;
         int val;
-       int offset_val = 0;
         struct wsa_macro *wsa = snd_soc_component_get_drvdata(component);
  
         if (w->shift == WSA_MACRO_COMP1) {
@@ -1623,10 +1622,8 @@ static int wsa_macro_enable_interpolator(struct snd_soc_dapm_widget *w,
                                         CDC_WSA_RX1_RX_PATH_MIX_SEC0,
                                         CDC_WSA_RX_PGA_HALF_DB_MASK,
                                         CDC_WSA_RX_PGA_HALF_DB_ENABLE);
-                       offset_val = -2;
                 }
                 val = snd_soc_component_read(component, gain_reg);
-               val += offset_val;
                 snd_soc_component_write(component, gain_reg, val);
                 wsa_macro_config_ear_spkr_gain(component, wsa,
                                                 event, gain_reg);
@@ -1654,10 +1651,6 @@ static int wsa_macro_enable_interpolator(struct snd_soc_dapm_widget *w,
                                         CDC_WSA_RX1_RX_PATH_MIX_SEC0,
                                         CDC_WSA_RX_PGA_HALF_DB_MASK,
                                         CDC_WSA_RX_PGA_HALF_DB_DISABLE);
-                       offset_val = 2;
-                       val = snd_soc_component_read(component, gain_reg);
-                       val += offset_val;
-                       snd_soc_component_write(component, gain_reg, val);
                 }
                 wsa_macro_config_ear_spkr_gain(component, wsa,
                                                 event, gain_reg);
diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c

index 5150d6ee374810f34a881dd33d4ea57187e13522..20191a4473c2d2c7b4ae6bddf34109a403b84e3d 100644 (file)
--- a/sound/soc/codecs/rt5645.c
+++ b/sound/soc/codecs/rt5645.c
@@ -3317,6 +3317,7 @@ static void rt5645_jack_detect_work(struct work_struct *work)
                                     report, SND_JACK_HEADPHONE);
                 snd_soc_jack_report(rt5645->mic_jack,
                                     report, SND_JACK_MICROPHONE);
+               mutex_unlock(&rt5645->jd_mutex);
                 return;
         case 4:
                 val = snd_soc_component_read(rt5645->component, RT5645_A_JD_CTRL1) & 0x0020;
@@ -3692,6 +3693,11 @@ static const struct rt5645_platform_data jd_mode3_monospk_platform_data = {
         .mono_speaker = true,
  };
  
+static const struct rt5645_platform_data jd_mode3_inv_data = {
+       .jd_mode = 3,
+       .inv_jd1_1 = true,
+};
+
  static const struct rt5645_platform_data jd_mode3_platform_data = {
         .jd_mode = 3,
  };
@@ -3837,6 +3843,16 @@ static const struct dmi_system_id dmi_platform_data[] = {
                   DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "AMI Corporation"),
                   DMI_EXACT_MATCH(DMI_BOARD_NAME, "Cherry Trail CR"),
                   DMI_EXACT_MATCH(DMI_BOARD_VERSION, "Default string"),
+                 /*
+                  * Above strings are too generic, LattePanda BIOS versions for
+                  * all 4 hw revisions are:
+                  * DF-BI-7-S70CR100-*
+                  * DF-BI-7-S70CR110-*
+                  * DF-BI-7-S70CR200-*
+                  * LP-BS-7-S70CR700-*
+                  * Do a partial match for S70CR to avoid false positive matches.
+                  */
+                 DMI_MATCH(DMI_BIOS_VERSION, "S70CR"),
                 },
                 .driver_data = (void *)&lattepanda_board_platform_data,
         },
@@ -3871,6 +3887,16 @@ static const struct dmi_system_id dmi_platform_data[] = {
                 },
                 .driver_data = (void *)&intel_braswell_platform_data,
         },
+       {
+               .ident = "Meegopad T08",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Default string"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "Default string"),
+                       DMI_MATCH(DMI_BOARD_NAME, "T3 MRD"),
+                       DMI_MATCH(DMI_BOARD_VERSION, "V1.1"),
+               },
+               .driver_data = (void *)&jd_mode3_inv_data,
+       },
         { }
  };
  
diff --git a/sound/soc/codecs/tas2781-comlib.c b/sound/soc/codecs/tas2781-comlib.c

index b7e56ceb1acff9f41e6a26f09e1c4395ff537553..5d0e5348b361a568475fd1fde3a299a57926b365 100644 (file)
--- a/sound/soc/codecs/tas2781-comlib.c
+++ b/sound/soc/codecs/tas2781-comlib.c
@@ -267,6 +267,7 @@ void tas2781_reset(struct tasdevice_priv *tas_dev)
  EXPORT_SYMBOL_GPL(tas2781_reset);
  
  int tascodec_init(struct tasdevice_priv *tas_priv, void *codec,
+       struct module *module,
         void (*cont)(const struct firmware *fw, void *context))
  {
         int ret = 0;
@@ -280,7 +281,7 @@ int tascodec_init(struct tasdevice_priv *tas_priv, void *codec,
                 tas_priv->dev_name, tas_priv->ndev);
         crc8_populate_msb(tas_priv->crc8_lkp_tbl, TASDEVICE_CRC8_POLYNOMIAL);
         tas_priv->codec = codec;
-       ret = request_firmware_nowait(THIS_MODULE, FW_ACTION_UEVENT,
+       ret = request_firmware_nowait(module, FW_ACTION_UEVENT,
                 tas_priv->rca_binaryname, tas_priv->dev, GFP_KERNEL, tas_priv,
                 cont);
         if (ret)
diff --git a/sound/soc/codecs/tas2781-i2c.c b/sound/soc/codecs/tas2781-i2c.c

index 32913bd1a623381ee6e8d3d72c3f8e49d60ff0f7..b5abff230e43701f0f00c7b19f895469305747d7 100644 (file)
--- a/sound/soc/codecs/tas2781-i2c.c
+++ b/sound/soc/codecs/tas2781-i2c.c
@@ -566,7 +566,7 @@ static int tasdevice_codec_probe(struct snd_soc_component *codec)
  {
         struct tasdevice_priv *tas_priv = snd_soc_component_get_drvdata(codec);
  
-       return tascodec_init(tas_priv, codec, tasdevice_fw_ready);
+       return tascodec_init(tas_priv, codec, THIS_MODULE, tasdevice_fw_ready);
  }
  
  static void tasdevice_deinit(void *context)
diff --git a/sound/soc/codecs/wcd9335.c b/sound/soc/codecs/wcd9335.c

index 43c648efd0d938db5e0cb470a625617b8dc1860f..deb15b95992d5cc494562a91f13adbc348e2dd31 100644 (file)
--- a/sound/soc/codecs/wcd9335.c
+++ b/sound/soc/codecs/wcd9335.c
@@ -3033,7 +3033,6 @@ static int wcd9335_codec_enable_mix_path(struct snd_soc_dapm_widget *w,
  {
         struct snd_soc_component *comp = snd_soc_dapm_to_component(w->dapm);
         u16 gain_reg;
-       int offset_val = 0;
         int val = 0;
  
         switch (w->reg) {
@@ -3073,7 +3072,6 @@ static int wcd9335_codec_enable_mix_path(struct snd_soc_dapm_widget *w,
         switch (event) {
         case SND_SOC_DAPM_POST_PMU:
                 val = snd_soc_component_read(comp, gain_reg);
-               val += offset_val;
                 snd_soc_component_write(comp, gain_reg, val);
                 break;
         case SND_SOC_DAPM_POST_PMD:
@@ -3294,7 +3292,6 @@ static int wcd9335_codec_enable_interpolator(struct snd_soc_dapm_widget *w,
         u16 gain_reg;
         u16 reg;
         int val;
-       int offset_val = 0;
  
         if (!(snd_soc_dapm_widget_name_cmp(w, "RX INT0 INTERP"))) {
                 reg = WCD9335_CDC_RX0_RX_PATH_CTL;
@@ -3337,7 +3334,6 @@ static int wcd9335_codec_enable_interpolator(struct snd_soc_dapm_widget *w,
         case SND_SOC_DAPM_POST_PMU:
                 wcd9335_config_compander(comp, w->shift, event);
                 val = snd_soc_component_read(comp, gain_reg);
-               val += offset_val;
                 snd_soc_component_write(comp, gain_reg, val);
                 break;
         case SND_SOC_DAPM_POST_PMD:
diff --git a/sound/soc/codecs/wcd934x.c b/sound/soc/codecs/wcd934x.c

index 1b6e376f3833cbc5f59034e88f90a4ee3845632a..6813268e6a19f3048877c5ae0ee55ae227543c04 100644 (file)
--- a/sound/soc/codecs/wcd934x.c
+++ b/sound/soc/codecs/wcd934x.c
@@ -13,7 +13,6 @@
  #include <linux/of.h>
  #include <linux/platform_device.h>
  #include <linux/regmap.h>
-#include <linux/regulator/consumer.h>
  #include <linux/slab.h>
  #include <linux/slimbus.h>
  #include <sound/pcm_params.h>
diff --git a/sound/soc/codecs/wcd938x.c b/sound/soc/codecs/wcd938x.c

index faf8d3f9b3c5d929d935ad4c5c63fe45371e4edc..6021aa5a56891969b04db64ac019bafb0766c701 100644 (file)
--- a/sound/soc/codecs/wcd938x.c
+++ b/sound/soc/codecs/wcd938x.c
@@ -210,7 +210,7 @@ struct wcd938x_priv {
  };
  
  static const SNDRV_CTL_TLVD_DECLARE_DB_MINMAX(ear_pa_gain, 600, -1800);
-static const DECLARE_TLV_DB_SCALE(line_gain, -3000, 150, -3000);
+static const DECLARE_TLV_DB_SCALE(line_gain, -3000, 150, 0);
  static const SNDRV_CTL_TLVD_DECLARE_DB_MINMAX(analog_gain, 0, 3000);
  
  struct wcd938x_mbhc_zdet_param {
@@ -3587,10 +3587,8 @@ static int wcd938x_probe(struct platform_device *pdev)
         mutex_init(&wcd938x->micb_lock);
  
         ret = wcd938x_populate_dt_data(wcd938x, dev);
-       if (ret) {
-               dev_err(dev, "%s: Fail to obtain platform data\n", __func__);
-               return -EINVAL;
-       }
+       if (ret)
+               return ret;
  
         ret = wcd938x_add_slave_components(wcd938x, dev, &match);
         if (ret)
diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c

index c01e31175015cc2f354175dec019fac591a98b4b..36ea0dcdc7ab0033eb48e393d783e4f7d4df9854 100644 (file)
--- a/sound/soc/codecs/wm_adsp.c
+++ b/sound/soc/codecs/wm_adsp.c
@@ -739,19 +739,25 @@ static int wm_adsp_request_firmware_file(struct wm_adsp *dsp,
                                          const char *filetype)
  {
         struct cs_dsp *cs_dsp = &dsp->cs_dsp;
+       const char *fwf;
         char *s, c;
         int ret = 0;
  
+       if (dsp->fwf_name)
+               fwf = dsp->fwf_name;
+       else
+               fwf = dsp->cs_dsp.name;
+
         if (system_name && asoc_component_prefix)
                 *filename = kasprintf(GFP_KERNEL, "%s%s-%s-%s-%s-%s.%s", dir, dsp->part,
-                                     dsp->fwf_name, wm_adsp_fw[dsp->fw].file, system_name,
+                                     fwf, wm_adsp_fw[dsp->fw].file, system_name,
                                       asoc_component_prefix, filetype);
         else if (system_name)
                 *filename = kasprintf(GFP_KERNEL, "%s%s-%s-%s-%s.%s", dir, dsp->part,
-                                     dsp->fwf_name, wm_adsp_fw[dsp->fw].file, system_name,
+                                     fwf, wm_adsp_fw[dsp->fw].file, system_name,
                                       filetype);
         else
-               *filename = kasprintf(GFP_KERNEL, "%s%s-%s-%s.%s", dir, dsp->part, dsp->fwf_name,
+               *filename = kasprintf(GFP_KERNEL, "%s%s-%s-%s.%s", dir, dsp->part, fwf,
                                       wm_adsp_fw[dsp->fw].file, filetype);
  
         if (*filename == NULL)
@@ -823,6 +829,23 @@ static int wm_adsp_request_firmware_files(struct wm_adsp *dsp,
                 }
         }
  
+       /* Check system-specific bin without wmfw before falling back to generic */
+       if (dsp->wmfw_optional && system_name) {
+               if (asoc_component_prefix)
+                       wm_adsp_request_firmware_file(dsp, coeff_firmware, coeff_filename,
+                                                     cirrus_dir, system_name,
+                                                     asoc_component_prefix, "bin");
+
+               if (!*coeff_firmware)
+                       wm_adsp_request_firmware_file(dsp, coeff_firmware, coeff_filename,
+                                                     cirrus_dir, system_name,
+                                                     NULL, "bin");
+
+               if (*coeff_firmware)
+                       return 0;
+       }
+
+       /* Check legacy location */
         if (!wm_adsp_request_firmware_file(dsp, wmfw_firmware, wmfw_filename,
                                            "", NULL, NULL, "wmfw")) {
                 wm_adsp_request_firmware_file(dsp, coeff_firmware, coeff_filename,
@@ -830,62 +853,28 @@ static int wm_adsp_request_firmware_files(struct wm_adsp *dsp,
                 return 0;
         }
  
+       /* Fall back to generic wmfw and optional matching bin */
         ret = wm_adsp_request_firmware_file(dsp, wmfw_firmware, wmfw_filename,
                                             cirrus_dir, NULL, NULL, "wmfw");
-       if (!ret) {
+       if (!ret || dsp->wmfw_optional) {
                 wm_adsp_request_firmware_file(dsp, coeff_firmware, coeff_filename,
                                               cirrus_dir, NULL, NULL, "bin");
                 return 0;
         }
  
-       if (dsp->wmfw_optional) {
-               if (system_name) {
-                       if (asoc_component_prefix)
-                               wm_adsp_request_firmware_file(dsp, coeff_firmware, coeff_filename,
-                                                             cirrus_dir, system_name,
-                                                             asoc_component_prefix, "bin");
-
-                       if (!*coeff_firmware)
-                               wm_adsp_request_firmware_file(dsp, coeff_firmware, coeff_filename,
-                                                             cirrus_dir, system_name,
-                                                             NULL, "bin");
-               }
-
-               if (!*coeff_firmware)
-                       wm_adsp_request_firmware_file(dsp, coeff_firmware, coeff_filename,
-                                                     "", NULL, NULL, "bin");
-
-               if (!*coeff_firmware)
-                       wm_adsp_request_firmware_file(dsp, coeff_firmware, coeff_filename,
-                                                     cirrus_dir, NULL, NULL, "bin");
-
-               return 0;
-       }
-
         adsp_err(dsp, "Failed to request firmware <%s>%s-%s-%s<-%s<%s>>.wmfw\n",
-                cirrus_dir, dsp->part, dsp->fwf_name, wm_adsp_fw[dsp->fw].file,
-                system_name, asoc_component_prefix);
+                cirrus_dir, dsp->part,
+                dsp->fwf_name ? dsp->fwf_name : dsp->cs_dsp.name,
+                wm_adsp_fw[dsp->fw].file, system_name, asoc_component_prefix);
  
         return -ENOENT;
  }
  
  static int wm_adsp_common_init(struct wm_adsp *dsp)
  {
-       char *p;
-
         INIT_LIST_HEAD(&dsp->compr_list);
         INIT_LIST_HEAD(&dsp->buffer_list);
  
-       if (!dsp->fwf_name) {
-               p = devm_kstrdup(dsp->cs_dsp.dev, dsp->cs_dsp.name, GFP_KERNEL);
-               if (!p)
-                       return -ENOMEM;
-
-               dsp->fwf_name = p;
-               for (; *p != 0; ++p)
-                       *p = tolower(*p);
-       }
-
         return 0;
  }
  
diff --git a/sound/soc/codecs/wsa883x.c b/sound/soc/codecs/wsa883x.c

index cb83c569e18d6aef70b23a56198dbf5ccd5ef2d8..a2e86ef7d18f5981b4604372e4b20930695aa5c4 100644 (file)
--- a/sound/soc/codecs/wsa883x.c
+++ b/sound/soc/codecs/wsa883x.c
@@ -1098,7 +1098,11 @@ static int wsa_dev_mode_put(struct snd_kcontrol *kcontrol,
         return 1;
  }
  
-static const DECLARE_TLV_DB_SCALE(pa_gain, -300, 150, -300);
+static const SNDRV_CTL_TLVD_DECLARE_DB_RANGE(pa_gain,
+       0, 14, TLV_DB_SCALE_ITEM(-300, 0, 0),
+       15, 29, TLV_DB_SCALE_ITEM(-300, 150, 0),
+       30, 31, TLV_DB_SCALE_ITEM(1800, 0, 0),
+);
  
  static int wsa883x_get_swr_port(struct snd_kcontrol *kcontrol,
                                 struct snd_ctl_elem_value *ucontrol)
diff --git a/sound/soc/fsl/fsl_xcvr.c b/sound/soc/fsl/fsl_xcvr.c

index f0fb33d719c25135722014f9763c65df3289ed7e..c46f64557a7ffd268716e852dfe4411251da08cc 100644 (file)
--- a/sound/soc/fsl/fsl_xcvr.c
+++ b/sound/soc/fsl/fsl_xcvr.c
@@ -174,7 +174,9 @@ static int fsl_xcvr_activate_ctl(struct snd_soc_dai *dai, const char *name,
         struct snd_kcontrol *kctl;
         bool enabled;
  
-       kctl = snd_soc_card_get_kcontrol(card, name);
+       lockdep_assert_held(&card->snd_card->controls_rwsem);
+
+       kctl = snd_soc_card_get_kcontrol_locked(card, name);
         if (kctl == NULL)
                 return -ENOENT;
  
@@ -576,10 +578,14 @@ static int fsl_xcvr_startup(struct snd_pcm_substream *substream,
         xcvr->streams |= BIT(substream->stream);
  
         if (!xcvr->soc_data->spdif_only) {
+               struct snd_soc_card *card = dai->component->card;
+
                 /* Disable XCVR controls if there is stream started */
+               down_read(&card->snd_card->controls_rwsem);
                 fsl_xcvr_activate_ctl(dai, fsl_xcvr_mode_kctl.name, false);
                 fsl_xcvr_activate_ctl(dai, fsl_xcvr_arc_mode_kctl.name, false);
                 fsl_xcvr_activate_ctl(dai, fsl_xcvr_earc_capds_kctl.name, false);
+               up_read(&card->snd_card->controls_rwsem);
         }
  
         return 0;
@@ -598,11 +604,15 @@ static void fsl_xcvr_shutdown(struct snd_pcm_substream *substream,
         /* Enable XCVR controls if there is no stream started */
         if (!xcvr->streams) {
                 if (!xcvr->soc_data->spdif_only) {
+                       struct snd_soc_card *card = dai->component->card;
+
+                       down_read(&card->snd_card->controls_rwsem);
                         fsl_xcvr_activate_ctl(dai, fsl_xcvr_mode_kctl.name, true);
                         fsl_xcvr_activate_ctl(dai, fsl_xcvr_arc_mode_kctl.name,
                                                 (xcvr->mode == FSL_XCVR_MODE_ARC));
                         fsl_xcvr_activate_ctl(dai, fsl_xcvr_earc_capds_kctl.name,
                                                 (xcvr->mode == FSL_XCVR_MODE_EARC));
+                       up_read(&card->snd_card->controls_rwsem);
                 }
                 ret = regmap_update_bits(xcvr->regmap, FSL_XCVR_EXT_IER0,
                                          FSL_XCVR_IRQ_EARC_ALL, 0);
diff --git a/sound/soc/intel/avs/core.c b/sound/soc/intel/avs/core.c

index 59c3793f65df0c5573ec6e7f873ef33d31f46371..db78eb2f0108071736b6bafa5e77f874b15f3375 100644 (file)
--- a/sound/soc/intel/avs/core.c
+++ b/sound/soc/intel/avs/core.c
@@ -477,6 +477,9 @@ static int avs_pci_probe(struct pci_dev *pci, const struct pci_device_id *id)
         return 0;
  
  err_i915_init:
+       pci_free_irq(pci, 0, adev);
+       pci_free_irq(pci, 0, bus);
+       pci_free_irq_vectors(pci);
         pci_clear_master(pci);
         pci_set_drvdata(pci, NULL);
  err_acquire_irq:
diff --git a/sound/soc/intel/avs/topology.c b/sound/soc/intel/avs/topology.c

index 778236d3fd2806912120ab0eb953f96a6c79c90b..48b3c67c91032c97b7da54e9d822876f0e66b994 100644 (file)
--- a/sound/soc/intel/avs/topology.c
+++ b/sound/soc/intel/avs/topology.c
@@ -857,7 +857,7 @@ assign_copier_gtw_instance(struct snd_soc_component *comp, struct avs_tplg_modcf
         }
  
         /* If topology sets value don't overwrite it */
-       if (cfg->copier.vindex.i2s.instance)
+       if (cfg->copier.vindex.val)
                 return;
  
         mach = dev_get_platdata(comp->card->dev);
diff --git a/sound/soc/intel/boards/bytcht_cx2072x.c b/sound/soc/intel/boards/bytcht_cx2072x.c

index 10a84a2c1036e9ce67751702a047795a5eabf9b7..c014d85a08b24755682f3faf97e3e60cd171dc7a 100644 (file)
--- a/sound/soc/intel/boards/bytcht_cx2072x.c
+++ b/sound/soc/intel/boards/bytcht_cx2072x.c
@@ -241,7 +241,8 @@ static int snd_byt_cht_cx2072x_probe(struct platform_device *pdev)
  
         /* fix index of codec dai */
         for (i = 0; i < ARRAY_SIZE(byt_cht_cx2072x_dais); i++) {
-               if (!strcmp(byt_cht_cx2072x_dais[i].codecs->name,
+               if (byt_cht_cx2072x_dais[i].codecs->name &&
+                   !strcmp(byt_cht_cx2072x_dais[i].codecs->name,
                             "i2c-14F10720:00")) {
                         dai_index = i;
                         break;
diff --git a/sound/soc/intel/boards/bytcht_da7213.c b/sound/soc/intel/boards/bytcht_da7213.c

index 7e5eea690023dff7bbdea996c739428c28445cf9..f4ac3ddd148b83757881426a2522adacd3d966d3 100644 (file)
--- a/sound/soc/intel/boards/bytcht_da7213.c
+++ b/sound/soc/intel/boards/bytcht_da7213.c
@@ -245,7 +245,8 @@ static int bytcht_da7213_probe(struct platform_device *pdev)
  
         /* fix index of codec dai */
         for (i = 0; i < ARRAY_SIZE(dailink); i++) {
-               if (!strcmp(dailink[i].codecs->name, "i2c-DLGS7213:00")) {
+               if (dailink[i].codecs->name &&
+                   !strcmp(dailink[i].codecs->name, "i2c-DLGS7213:00")) {
                         dai_index = i;
                         break;
                 }
diff --git a/sound/soc/intel/boards/bytcht_es8316.c b/sound/soc/intel/boards/bytcht_es8316.c

index 1564a88a885efa1838317f453bf3d514a529d7e8..2fcec2e02bb53b403350ee76cb124ed99882087e 100644 (file)
--- a/sound/soc/intel/boards/bytcht_es8316.c
+++ b/sound/soc/intel/boards/bytcht_es8316.c
@@ -546,7 +546,8 @@ static int snd_byt_cht_es8316_mc_probe(struct platform_device *pdev)
  
         /* fix index of codec dai */
         for (i = 0; i < ARRAY_SIZE(byt_cht_es8316_dais); i++) {
-               if (!strcmp(byt_cht_es8316_dais[i].codecs->name,
+               if (byt_cht_es8316_dais[i].codecs->name &&
+                   !strcmp(byt_cht_es8316_dais[i].codecs->name,
                             "i2c-ESSX8316:00")) {
                         dai_index = i;
                         break;
diff --git a/sound/soc/intel/boards/bytcr_rt5640.c b/sound/soc/intel/boards/bytcr_rt5640.c

index 42466b4b1ca45e159ea40c42809018929e0ed0dc..03be5e26ec4ab5472efc14c58c5fd5939743f3fa 100644 (file)
--- a/sound/soc/intel/boards/bytcr_rt5640.c
+++ b/sound/soc/intel/boards/bytcr_rt5640.c
@@ -1652,7 +1652,8 @@ static int snd_byt_rt5640_mc_probe(struct platform_device *pdev)
  
         /* fix index of codec dai */
         for (i = 0; i < ARRAY_SIZE(byt_rt5640_dais); i++) {
-               if (!strcmp(byt_rt5640_dais[i].codecs->name,
+               if (byt_rt5640_dais[i].codecs->name &&
+                   !strcmp(byt_rt5640_dais[i].codecs->name,
                             "i2c-10EC5640:00")) {
                         dai_index = i;
                         break;
diff --git a/sound/soc/intel/boards/bytcr_rt5651.c b/sound/soc/intel/boards/bytcr_rt5651.c

index f9fe8414f454ff481b7e1b84f5377bb4d8835161..80c841b000a311229c310fec3ba91264696e6025 100644 (file)
--- a/sound/soc/intel/boards/bytcr_rt5651.c
+++ b/sound/soc/intel/boards/bytcr_rt5651.c
@@ -910,7 +910,8 @@ static int snd_byt_rt5651_mc_probe(struct platform_device *pdev)
  
         /* fix index of codec dai */
         for (i = 0; i < ARRAY_SIZE(byt_rt5651_dais); i++) {
-               if (!strcmp(byt_rt5651_dais[i].codecs->name,
+               if (byt_rt5651_dais[i].codecs->name &&
+                   !strcmp(byt_rt5651_dais[i].codecs->name,
                             "i2c-10EC5651:00")) {
                         dai_index = i;
                         break;
diff --git a/sound/soc/intel/boards/bytcr_wm5102.c b/sound/soc/intel/boards/bytcr_wm5102.c

index 6978ebde669357fc7a25abc9961aaafc278b1789..cccb5e90c0fefc6a888ac302a63ed50a9423342d 100644 (file)
--- a/sound/soc/intel/boards/bytcr_wm5102.c
+++ b/sound/soc/intel/boards/bytcr_wm5102.c
@@ -605,7 +605,8 @@ static int snd_byt_wm5102_mc_probe(struct platform_device *pdev)
  
         /* find index of codec dai */
         for (i = 0; i < ARRAY_SIZE(byt_wm5102_dais); i++) {
-               if (!strcmp(byt_wm5102_dais[i].codecs->name,
+               if (byt_wm5102_dais[i].codecs->name &&
+                   !strcmp(byt_wm5102_dais[i].codecs->name,
                             "wm5102-codec")) {
                         dai_index = i;
                         break;
diff --git a/sound/soc/intel/boards/cht_bsw_rt5645.c b/sound/soc/intel/boards/cht_bsw_rt5645.c

index c952a96cde7ebe27ba6f61ed6d417d0f063d0e1e..eb41b7115d01dd38685d5a10cd393e46eb4106a4 100644 (file)
--- a/sound/soc/intel/boards/cht_bsw_rt5645.c
+++ b/sound/soc/intel/boards/cht_bsw_rt5645.c
@@ -40,7 +40,6 @@ struct cht_acpi_card {
  struct cht_mc_private {
         struct snd_soc_jack jack;
         struct cht_acpi_card *acpi_card;
-       char codec_name[SND_ACPI_I2C_ID_LEN];
         struct clk *mclk;
  };
  
@@ -567,14 +566,14 @@ static int snd_cht_mc_probe(struct platform_device *pdev)
         }
  
         card->dev = &pdev->dev;
-       sprintf(drv->codec_name, "i2c-%s:00", drv->acpi_card->codec_id);
  
         /* set correct codec name */
         for (i = 0; i < ARRAY_SIZE(cht_dailink); i++)
-               if (!strcmp(card->dai_link[i].codecs->name,
+               if (cht_dailink[i].codecs->name &&
+                   !strcmp(cht_dailink[i].codecs->name,
                             "i2c-10EC5645:00")) {
-                       card->dai_link[i].codecs->name = drv->codec_name;
                         dai_index = i;
+                       break;
                 }
  
         /* fixup codec name based on HID */
diff --git a/sound/soc/intel/boards/cht_bsw_rt5672.c b/sound/soc/intel/boards/cht_bsw_rt5672.c

index 8cf0b33cc02eb5763acbb572ab0387efbe8da325..be2d1a8dbca807dd1f4af070382d2d2f169c9e27 100644 (file)
--- a/sound/soc/intel/boards/cht_bsw_rt5672.c
+++ b/sound/soc/intel/boards/cht_bsw_rt5672.c
@@ -466,7 +466,8 @@ static int snd_cht_mc_probe(struct platform_device *pdev)
  
         /* find index of codec dai */
         for (i = 0; i < ARRAY_SIZE(cht_dailink); i++) {
-               if (!strcmp(cht_dailink[i].codecs->name, RT5672_I2C_DEFAULT)) {
+               if (cht_dailink[i].codecs->name &&
+                   !strcmp(cht_dailink[i].codecs->name, RT5672_I2C_DEFAULT)) {
                         dai_index = i;
                         break;
                 }
diff --git a/sound/soc/qcom/lpass-cdc-dma.c b/sound/soc/qcom/lpass-cdc-dma.c

index 48b03e60e3a3d760c9d872d9ffc3527b0aed40fe..8106c586f68a4ec456ae2be11f37b1c2c8cd806c 100644 (file)
--- a/sound/soc/qcom/lpass-cdc-dma.c
+++ b/sound/soc/qcom/lpass-cdc-dma.c
@@ -259,7 +259,7 @@ static int lpass_cdc_dma_daiops_trigger(struct snd_pcm_substream *substream,
                                     int cmd, struct snd_soc_dai *dai)
  {
         struct snd_soc_pcm_runtime *soc_runtime = snd_soc_substream_to_rtd(substream);
-       struct lpaif_dmactl *dmactl;
+       struct lpaif_dmactl *dmactl = NULL;
         int ret = 0, id;
  
         switch (cmd) {
diff --git a/sound/soc/qcom/qdsp6/q6apm-dai.c b/sound/soc/qcom/qdsp6/q6apm-dai.c

index 052e40cb38feca032752784545a4a07332369054..00bbd291be5cea4b5f43ee06b85b55dfb8eb91c2 100644 (file)
--- a/sound/soc/qcom/qdsp6/q6apm-dai.c
+++ b/sound/soc/qcom/qdsp6/q6apm-dai.c
@@ -123,7 +123,7 @@ static struct snd_pcm_hardware q6apm_dai_hardware_playback = {
         .fifo_size =            0,
  };
  
-static void event_handler(uint32_t opcode, uint32_t token, uint32_t *payload, void *priv)
+static void event_handler(uint32_t opcode, uint32_t token, void *payload, void *priv)
  {
         struct q6apm_dai_rtd *prtd = priv;
         struct snd_pcm_substream *substream = prtd->substream;
@@ -157,7 +157,7 @@ static void event_handler(uint32_t opcode, uint32_t token, uint32_t *payload, vo
  }
  
  static void event_handler_compr(uint32_t opcode, uint32_t token,
-                               uint32_t *payload, void *priv)
+                               void *payload, void *priv)
  {
         struct q6apm_dai_rtd *prtd = priv;
         struct snd_compr_stream *substream = prtd->cstream;
@@ -352,7 +352,7 @@ static int q6apm_dai_open(struct snd_soc_component *component,
  
         spin_lock_init(&prtd->lock);
         prtd->substream = substream;
-       prtd->graph = q6apm_graph_open(dev, (q6apm_cb)event_handler, prtd, graph_id);
+       prtd->graph = q6apm_graph_open(dev, event_handler, prtd, graph_id);
         if (IS_ERR(prtd->graph)) {
                 dev_err(dev, "%s: Could not allocate memory\n", __func__);
                 ret = PTR_ERR(prtd->graph);
@@ -496,7 +496,7 @@ static int q6apm_dai_compr_open(struct snd_soc_component *component,
                 return -ENOMEM;
  
         prtd->cstream = stream;
-       prtd->graph = q6apm_graph_open(dev, (q6apm_cb)event_handler_compr, prtd, graph_id);
+       prtd->graph = q6apm_graph_open(dev, event_handler_compr, prtd, graph_id);
         if (IS_ERR(prtd->graph)) {
                 ret = PTR_ERR(prtd->graph);
                 kfree(prtd);
diff --git a/sound/soc/qcom/sc8280xp.c b/sound/soc/qcom/sc8280xp.c

index ed4bb551bfbb92c965eba25c048ce4fa12648283..b7fd503a166668d3fe41500bf52e58de129c92de 100644 (file)
--- a/sound/soc/qcom/sc8280xp.c
+++ b/sound/soc/qcom/sc8280xp.c
@@ -32,12 +32,14 @@ static int sc8280xp_snd_init(struct snd_soc_pcm_runtime *rtd)
         case WSA_CODEC_DMA_RX_0:
         case WSA_CODEC_DMA_RX_1:
                 /*
-                * set limit of 0dB on Digital Volume for Speakers,
-                * this can prevent damage of speakers to some extent without
-                * active speaker protection
+                * Set limit of -3 dB on Digital Volume and 0 dB on PA Volume
+                * to reduce the risk of speaker damage until we have active
+                * speaker protection in place.
                  */
-               snd_soc_limit_volume(card, "WSA_RX0 Digital Volume", 84);
-               snd_soc_limit_volume(card, "WSA_RX1 Digital Volume", 84);
+               snd_soc_limit_volume(card, "WSA_RX0 Digital Volume", 81);
+               snd_soc_limit_volume(card, "WSA_RX1 Digital Volume", 81);
+               snd_soc_limit_volume(card, "SpkrLeft PA Volume", 17);
+               snd_soc_limit_volume(card, "SpkrRight PA Volume", 17);
                 break;
         default:
                 break;
diff --git a/sound/soc/soc-card.c b/sound/soc/soc-card.c

index 285ab4c9c7168314ae34bead44a5229bc5d8b96b..8a2f163da6bc9e8e61fc1f196f3b89556815cafe 100644 (file)
--- a/sound/soc/soc-card.c
+++ b/sound/soc/soc-card.c
@@ -5,6 +5,9 @@
  // Copyright (C) 2019 Renesas Electronics Corp.
  // Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
  //
+
+#include <linux/lockdep.h>
+#include <linux/rwsem.h>
  #include <sound/soc.h>
  #include <sound/jack.h>
  
@@ -26,12 +29,15 @@ static inline int _soc_card_ret(struct snd_soc_card *card,
         return ret;
  }
  
-struct snd_kcontrol *snd_soc_card_get_kcontrol(struct snd_soc_card *soc_card,
-                                              const char *name)
+struct snd_kcontrol *snd_soc_card_get_kcontrol_locked(struct snd_soc_card *soc_card,
+                                                     const char *name)
  {
         struct snd_card *card = soc_card->snd_card;
         struct snd_kcontrol *kctl;
  
+       /* must be held read or write */
+       lockdep_assert_held(&card->controls_rwsem);
+
         if (unlikely(!name))
                 return NULL;
  
@@ -40,6 +46,20 @@ struct snd_kcontrol *snd_soc_card_get_kcontrol(struct snd_soc_card *soc_card,
                         return kctl;
         return NULL;
  }
+EXPORT_SYMBOL_GPL(snd_soc_card_get_kcontrol_locked);
+
+struct snd_kcontrol *snd_soc_card_get_kcontrol(struct snd_soc_card *soc_card,
+                                              const char *name)
+{
+       struct snd_card *card = soc_card->snd_card;
+       struct snd_kcontrol *kctl;
+
+       down_read(&card->controls_rwsem);
+       kctl = snd_soc_card_get_kcontrol_locked(soc_card, name);
+       up_read(&card->controls_rwsem);
+
+       return kctl;
+}
  EXPORT_SYMBOL_GPL(snd_soc_card_get_kcontrol);
  
  static int jack_new(struct snd_soc_card *card, const char *id, int type,
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c

index f8524b5bfb330652afb48091b7afab12b1a70d6e..516350533e73f8ee1084164e44068f825eb7fafe 100644 (file)
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -1037,7 +1037,7 @@ component_dai_empty:
         return -EINVAL;
  }
  
-#define MAX_DEFAULT_CH_MAP_SIZE 7
+#define MAX_DEFAULT_CH_MAP_SIZE 8
  static struct snd_soc_dai_link_ch_map default_ch_map_sync[MAX_DEFAULT_CH_MAP_SIZE] = {
         { .cpu = 0, .codec = 0 },
         { .cpu = 1, .codec = 1 },
@@ -1046,6 +1046,7 @@ static struct snd_soc_dai_link_ch_map default_ch_map_sync[MAX_DEFAULT_CH_MAP_SIZ
         { .cpu = 4, .codec = 4 },
         { .cpu = 5, .codec = 5 },
         { .cpu = 6, .codec = 6 },
+       { .cpu = 7, .codec = 7 },
  };
  static struct snd_soc_dai_link_ch_map default_ch_map_1cpu[MAX_DEFAULT_CH_MAP_SIZE] = {
         { .cpu = 0, .codec = 0 },
@@ -1055,6 +1056,7 @@ static struct snd_soc_dai_link_ch_map default_ch_map_1cpu[MAX_DEFAULT_CH_MAP_SIZ
         { .cpu = 0, .codec = 4 },
         { .cpu = 0, .codec = 5 },
         { .cpu = 0, .codec = 6 },
+       { .cpu = 0, .codec = 7 },
  };
  static struct snd_soc_dai_link_ch_map default_ch_map_1codec[MAX_DEFAULT_CH_MAP_SIZE] = {
         { .cpu = 0, .codec = 0 },
@@ -1064,6 +1066,7 @@ static struct snd_soc_dai_link_ch_map default_ch_map_1codec[MAX_DEFAULT_CH_MAP_S
         { .cpu = 4, .codec = 0 },
         { .cpu = 5, .codec = 0 },
         { .cpu = 6, .codec = 0 },
+       { .cpu = 7, .codec = 0 },
  };
  static int snd_soc_compensate_channel_connection_map(struct snd_soc_card *card,
                                                      struct snd_soc_dai_link *dai_link)
diff --git a/sound/soc/sof/amd/acp-ipc.c b/sound/soc/sof/amd/acp-ipc.c

index 2743f07a5e0811912722d174bd4096950e788a62..b44b1b1adb6ed9e913c857168902f00949abcb86 100644 (file)
--- a/sound/soc/sof/amd/acp-ipc.c
+++ b/sound/soc/sof/amd/acp-ipc.c
@@ -188,11 +188,13 @@ irqreturn_t acp_sof_ipc_irq_thread(int irq, void *context)
  
         dsp_ack = snd_sof_dsp_read(sdev, ACP_DSP_BAR, ACP_SCRATCH_REG_0 + dsp_ack_write);
         if (dsp_ack) {
+               spin_lock_irq(&sdev->ipc_lock);
                 /* handle immediate reply from DSP core */
                 acp_dsp_ipc_get_reply(sdev);
                 snd_sof_ipc_reply(sdev, 0);
                 /* set the done bit */
                 acp_dsp_ipc_dsp_done(sdev);
+               spin_unlock_irq(&sdev->ipc_lock);
                 ipc_irq = true;
         }
  
diff --git a/sound/soc/sof/amd/acp.c b/sound/soc/sof/amd/acp.c

index 32a741fcb84fffcc3988e4deabf4e99ef4863d49..07632ae6ccf5ec058565d50f1992475795c490d2 100644 (file)
--- a/sound/soc/sof/amd/acp.c
+++ b/sound/soc/sof/amd/acp.c
@@ -355,21 +355,20 @@ static irqreturn_t acp_irq_thread(int irq, void *context)
         unsigned int count = ACP_HW_SEM_RETRY_COUNT;
  
         spin_lock_irq(&sdev->ipc_lock);
-       while (snd_sof_dsp_read(sdev, ACP_DSP_BAR, desc->hw_semaphore_offset)) {
-               /* Wait until acquired HW Semaphore lock or timeout */
-               count--;
-               if (!count) {
-                       dev_err(sdev->dev, "%s: Failed to acquire HW lock\n", __func__);
-                       spin_unlock_irq(&sdev->ipc_lock);
-                       return IRQ_NONE;
-               }
+       /* Wait until acquired HW Semaphore lock or timeout */
+       while (snd_sof_dsp_read(sdev, ACP_DSP_BAR, desc->hw_semaphore_offset) && --count)
+               ;
+       spin_unlock_irq(&sdev->ipc_lock);
+
+       if (!count) {
+               dev_err(sdev->dev, "%s: Failed to acquire HW lock\n", __func__);
+               return IRQ_NONE;
         }
  
         sof_ops(sdev)->irq_thread(irq, sdev);
         /* Unlock or Release HW Semaphore */
         snd_sof_dsp_write(sdev, ACP_DSP_BAR, desc->hw_semaphore_offset, 0x0);
  
-       spin_unlock_irq(&sdev->ipc_lock);
         return IRQ_HANDLED;
  };
  
diff --git a/sound/soc/sof/intel/pci-lnl.c b/sound/soc/sof/intel/pci-lnl.c

index 78a57eb9cbc377c0b525827a0539baea6850ca6f..b26ffe767fab553467897b4facc13931668b27fe 100644 (file)
--- a/sound/soc/sof/intel/pci-lnl.c
+++ b/sound/soc/sof/intel/pci-lnl.c
@@ -36,7 +36,7 @@ static const struct sof_dev_desc lnl_desc = {
                 [SOF_IPC_TYPE_4] = "intel/sof-ipc4/lnl",
         },
         .default_tplg_path = {
-               [SOF_IPC_TYPE_4] = "intel/sof-ace-tplg",
+               [SOF_IPC_TYPE_4] = "intel/sof-ipc4-tplg",
         },
         .default_fw_filename = {
                 [SOF_IPC_TYPE_4] = "sof-lnl.ri",
diff --git a/sound/soc/sof/intel/pci-tgl.c b/sound/soc/sof/intel/pci-tgl.c

index 0660d4b2ac96b66da5997d1aed60ddb91341571a..a361ee9d1107f5ed7533d1f71400f95ae4ad34af 100644 (file)
--- a/sound/soc/sof/intel/pci-tgl.c
+++ b/sound/soc/sof/intel/pci-tgl.c
@@ -33,18 +33,18 @@ static const struct sof_dev_desc tgl_desc = {
         .dspless_mode_supported = true,         /* Only supported for HDaudio */
         .default_fw_path = {
                 [SOF_IPC_TYPE_3] = "intel/sof",
-               [SOF_IPC_TYPE_4] = "intel/avs/tgl",
+               [SOF_IPC_TYPE_4] = "intel/sof-ipc4/tgl",
         },
         .default_lib_path = {
-               [SOF_IPC_TYPE_4] = "intel/avs-lib/tgl",
+               [SOF_IPC_TYPE_4] = "intel/sof-ipc4-lib/tgl",
         },
         .default_tplg_path = {
                 [SOF_IPC_TYPE_3] = "intel/sof-tplg",
-               [SOF_IPC_TYPE_4] = "intel/avs-tplg",
+               [SOF_IPC_TYPE_4] = "intel/sof-ipc4-tplg",
         },
         .default_fw_filename = {
                 [SOF_IPC_TYPE_3] = "sof-tgl.ri",
-               [SOF_IPC_TYPE_4] = "dsp_basefw.bin",
+               [SOF_IPC_TYPE_4] = "sof-tgl.ri",
         },
         .nocodec_tplg_filename = "sof-tgl-nocodec.tplg",
         .ops = &sof_tgl_ops,
@@ -66,18 +66,18 @@ static const struct sof_dev_desc tglh_desc = {
         .dspless_mode_supported = true,         /* Only supported for HDaudio */
         .default_fw_path = {
                 [SOF_IPC_TYPE_3] = "intel/sof",
-               [SOF_IPC_TYPE_4] = "intel/avs/tgl-h",
+               [SOF_IPC_TYPE_4] = "intel/sof-ipc4/tgl-h",
         },
         .default_lib_path = {
-               [SOF_IPC_TYPE_4] = "intel/avs-lib/tgl-h",
+               [SOF_IPC_TYPE_4] = "intel/sof-ipc4-lib/tgl-h",
         },
         .default_tplg_path = {
                 [SOF_IPC_TYPE_3] = "intel/sof-tplg",
-               [SOF_IPC_TYPE_4] = "intel/avs-tplg",
+               [SOF_IPC_TYPE_4] = "intel/sof-ipc4-tplg",
         },
         .default_fw_filename = {
                 [SOF_IPC_TYPE_3] = "sof-tgl-h.ri",
-               [SOF_IPC_TYPE_4] = "dsp_basefw.bin",
+               [SOF_IPC_TYPE_4] = "sof-tgl-h.ri",
         },
         .nocodec_tplg_filename = "sof-tgl-nocodec.tplg",
         .ops = &sof_tgl_ops,
@@ -98,18 +98,18 @@ static const struct sof_dev_desc ehl_desc = {
         .dspless_mode_supported = true,         /* Only supported for HDaudio */
         .default_fw_path = {
                 [SOF_IPC_TYPE_3] = "intel/sof",
-               [SOF_IPC_TYPE_4] = "intel/avs/ehl",
+               [SOF_IPC_TYPE_4] = "intel/sof-ipc4/ehl",
         },
         .default_lib_path = {
-               [SOF_IPC_TYPE_4] = "intel/avs-lib/ehl",
+               [SOF_IPC_TYPE_4] = "intel/sof-ipc4-lib/ehl",
         },
         .default_tplg_path = {
                 [SOF_IPC_TYPE_3] = "intel/sof-tplg",
-               [SOF_IPC_TYPE_4] = "intel/avs-tplg",
+               [SOF_IPC_TYPE_4] = "intel/sof-ipc4-tplg",
         },
         .default_fw_filename = {
                 [SOF_IPC_TYPE_3] = "sof-ehl.ri",
-               [SOF_IPC_TYPE_4] = "dsp_basefw.bin",
+               [SOF_IPC_TYPE_4] = "sof-ehl.ri",
         },
         .nocodec_tplg_filename = "sof-ehl-nocodec.tplg",
         .ops = &sof_tgl_ops,
@@ -131,18 +131,18 @@ static const struct sof_dev_desc adls_desc = {
         .dspless_mode_supported = true,         /* Only supported for HDaudio */
         .default_fw_path = {
                 [SOF_IPC_TYPE_3] = "intel/sof",
-               [SOF_IPC_TYPE_4] = "intel/avs/adl-s",
+               [SOF_IPC_TYPE_4] = "intel/sof-ipc4/adl-s",
         },
         .default_lib_path = {
-               [SOF_IPC_TYPE_4] = "intel/avs-lib/adl-s",
+               [SOF_IPC_TYPE_4] = "intel/sof-ipc4-lib/adl-s",
         },
         .default_tplg_path = {
                 [SOF_IPC_TYPE_3] = "intel/sof-tplg",
-               [SOF_IPC_TYPE_4] = "intel/avs-tplg",
+               [SOF_IPC_TYPE_4] = "intel/sof-ipc4-tplg",
         },
         .default_fw_filename = {
                 [SOF_IPC_TYPE_3] = "sof-adl-s.ri",
-               [SOF_IPC_TYPE_4] = "dsp_basefw.bin",
+               [SOF_IPC_TYPE_4] = "sof-adl-s.ri",
         },
         .nocodec_tplg_filename = "sof-adl-nocodec.tplg",
         .ops = &sof_tgl_ops,
@@ -164,18 +164,18 @@ static const struct sof_dev_desc adl_desc = {
         .dspless_mode_supported = true,         /* Only supported for HDaudio */
         .default_fw_path = {
                 [SOF_IPC_TYPE_3] = "intel/sof",
-               [SOF_IPC_TYPE_4] = "intel/avs/adl",
+               [SOF_IPC_TYPE_4] = "intel/sof-ipc4/adl",
         },
         .default_lib_path = {
-               [SOF_IPC_TYPE_4] = "intel/avs-lib/adl",
+               [SOF_IPC_TYPE_4] = "intel/sof-ipc4-lib/adl",
         },
         .default_tplg_path = {
                 [SOF_IPC_TYPE_3] = "intel/sof-tplg",
-               [SOF_IPC_TYPE_4] = "intel/avs-tplg",
+               [SOF_IPC_TYPE_4] = "intel/sof-ipc4-tplg",
         },
         .default_fw_filename = {
                 [SOF_IPC_TYPE_3] = "sof-adl.ri",
-               [SOF_IPC_TYPE_4] = "dsp_basefw.bin",
+               [SOF_IPC_TYPE_4] = "sof-adl.ri",
         },
         .nocodec_tplg_filename = "sof-adl-nocodec.tplg",
         .ops = &sof_tgl_ops,
@@ -197,18 +197,18 @@ static const struct sof_dev_desc adl_n_desc = {
         .dspless_mode_supported = true,         /* Only supported for HDaudio */
         .default_fw_path = {
                 [SOF_IPC_TYPE_3] = "intel/sof",
-               [SOF_IPC_TYPE_4] = "intel/avs/adl-n",
+               [SOF_IPC_TYPE_4] = "intel/sof-ipc4/adl-n",
         },
         .default_lib_path = {
-               [SOF_IPC_TYPE_4] = "intel/avs-lib/adl-n",
+               [SOF_IPC_TYPE_4] = "intel/sof-ipc4-lib/adl-n",
         },
         .default_tplg_path = {
                 [SOF_IPC_TYPE_3] = "intel/sof-tplg",
-               [SOF_IPC_TYPE_4] = "intel/avs-tplg",
+               [SOF_IPC_TYPE_4] = "intel/sof-ipc4-tplg",
         },
         .default_fw_filename = {
                 [SOF_IPC_TYPE_3] = "sof-adl-n.ri",
-               [SOF_IPC_TYPE_4] = "dsp_basefw.bin",
+               [SOF_IPC_TYPE_4] = "sof-adl-n.ri",
         },
         .nocodec_tplg_filename = "sof-adl-nocodec.tplg",
         .ops = &sof_tgl_ops,
@@ -230,18 +230,18 @@ static const struct sof_dev_desc rpls_desc = {
         .dspless_mode_supported = true,         /* Only supported for HDaudio */
         .default_fw_path = {
                 [SOF_IPC_TYPE_3] = "intel/sof",
-               [SOF_IPC_TYPE_4] = "intel/avs/rpl-s",
+               [SOF_IPC_TYPE_4] = "intel/sof-ipc4/rpl-s",
         },
         .default_lib_path = {
-               [SOF_IPC_TYPE_4] = "intel/avs-lib/rpl-s",
+               [SOF_IPC_TYPE_4] = "intel/sof-ipc4-lib/rpl-s",
         },
         .default_tplg_path = {
                 [SOF_IPC_TYPE_3] = "intel/sof-tplg",
-               [SOF_IPC_TYPE_4] = "intel/avs-tplg",
+               [SOF_IPC_TYPE_4] = "intel/sof-ipc4-tplg",
         },
         .default_fw_filename = {
                 [SOF_IPC_TYPE_3] = "sof-rpl-s.ri",
-               [SOF_IPC_TYPE_4] = "dsp_basefw.bin",
+               [SOF_IPC_TYPE_4] = "sof-rpl-s.ri",
         },
         .nocodec_tplg_filename = "sof-rpl-nocodec.tplg",
         .ops = &sof_tgl_ops,
@@ -263,18 +263,18 @@ static const struct sof_dev_desc rpl_desc = {
         .dspless_mode_supported = true,         /* Only supported for HDaudio */
         .default_fw_path = {
                 [SOF_IPC_TYPE_3] = "intel/sof",
-               [SOF_IPC_TYPE_4] = "intel/avs/rpl",
+               [SOF_IPC_TYPE_4] = "intel/sof-ipc4/rpl",
         },
         .default_lib_path = {
-               [SOF_IPC_TYPE_4] = "intel/avs-lib/rpl",
+               [SOF_IPC_TYPE_4] = "intel/sof-ipc4-lib/rpl",
         },
         .default_tplg_path = {
                 [SOF_IPC_TYPE_3] = "intel/sof-tplg",
-               [SOF_IPC_TYPE_4] = "intel/avs-tplg",
+               [SOF_IPC_TYPE_4] = "intel/sof-ipc4-tplg",
         },
         .default_fw_filename = {
                 [SOF_IPC_TYPE_3] = "sof-rpl.ri",
-               [SOF_IPC_TYPE_4] = "dsp_basefw.bin",
+               [SOF_IPC_TYPE_4] = "sof-rpl.ri",
         },
         .nocodec_tplg_filename = "sof-rpl-nocodec.tplg",
         .ops = &sof_tgl_ops,
diff --git a/sound/soc/sof/ipc3-topology.c b/sound/soc/sof/ipc3-topology.c

index a8832a1c1a2442c8af573943787e3b6768b5e36f..d47698f4be2deb6f5bb943e2de2611923c45c387 100644 (file)
--- a/sound/soc/sof/ipc3-topology.c
+++ b/sound/soc/sof/ipc3-topology.c
@@ -2360,27 +2360,16 @@ static int sof_tear_down_left_over_pipelines(struct snd_sof_dev *sdev)
         return 0;
  }
  
-/*
- * For older firmware, this function doesn't free widgets for static pipelines during suspend.
- * It only resets use_count for all widgets.
- */
-static int sof_ipc3_tear_down_all_pipelines(struct snd_sof_dev *sdev, bool verify)
+static int sof_ipc3_free_widgets_in_list(struct snd_sof_dev *sdev, bool include_scheduler,
+                                        bool *dyn_widgets, bool verify)
  {
         struct sof_ipc_fw_version *v = &sdev->fw_ready.version;
         struct snd_sof_widget *swidget;
-       struct snd_sof_route *sroute;
-       bool dyn_widgets = false;
         int ret;
  
-       /*
-        * This function is called during suspend and for one-time topology verification during
-        * first boot. In both cases, there is no need to protect swidget->use_count and
-        * sroute->setup because during suspend all running streams are suspended and during
-        * topology loading the sound card unavailable to open PCMs.
-        */
         list_for_each_entry(swidget, &sdev->widget_list, list) {
                 if (swidget->dynamic_pipeline_widget) {
-                       dyn_widgets = true;
+                       *dyn_widgets = true;
                         continue;
                 }
  
@@ -2395,11 +2384,49 @@ static int sof_ipc3_tear_down_all_pipelines(struct snd_sof_dev *sdev, bool verif
                         continue;
                 }
  
+               if (include_scheduler && swidget->id != snd_soc_dapm_scheduler)
+                       continue;
+
+               if (!include_scheduler && swidget->id == snd_soc_dapm_scheduler)
+                       continue;
+
                 ret = sof_widget_free(sdev, swidget);
                 if (ret < 0)
                         return ret;
         }
  
+       return 0;
+}
+
+/*
+ * For older firmware, this function doesn't free widgets for static pipelines during suspend.
+ * It only resets use_count for all widgets.
+ */
+static int sof_ipc3_tear_down_all_pipelines(struct snd_sof_dev *sdev, bool verify)
+{
+       struct sof_ipc_fw_version *v = &sdev->fw_ready.version;
+       struct snd_sof_widget *swidget;
+       struct snd_sof_route *sroute;
+       bool dyn_widgets = false;
+       int ret;
+
+       /*
+        * This function is called during suspend and for one-time topology verification during
+        * first boot. In both cases, there is no need to protect swidget->use_count and
+        * sroute->setup because during suspend all running streams are suspended and during
+        * topology loading the sound card unavailable to open PCMs. Do not free the scheduler
+        * widgets yet so that the secondary cores do not get powered down before all the widgets
+        * associated with the scheduler are freed.
+        */
+       ret = sof_ipc3_free_widgets_in_list(sdev, false, &dyn_widgets, verify);
+       if (ret < 0)
+               return ret;
+
+       /* free all the scheduler widgets now */
+       ret = sof_ipc3_free_widgets_in_list(sdev, true, &dyn_widgets, verify);
+       if (ret < 0)
+               return ret;
+
         /*
          * Tear down all pipelines associated with PCMs that did not get suspended
          * and unset the prepare flag so that they can be set up again during resume.
diff --git a/sound/soc/sof/ipc3.c b/sound/soc/sof/ipc3.c

index fb40378ad0840255a9b35cb74f99c9b80d5271b9..c03dd513fbff142fee51a9b0233920024d691f1d 100644 (file)
--- a/sound/soc/sof/ipc3.c
+++ b/sound/soc/sof/ipc3.c
@@ -1067,7 +1067,7 @@ static void sof_ipc3_rx_msg(struct snd_sof_dev *sdev)
                 return;
         }
  
-       if (hdr.size < sizeof(hdr)) {
+       if (hdr.size < sizeof(hdr) || hdr.size > SOF_IPC_MSG_MAX_SIZE) {
                 dev_err(sdev->dev, "The received message size is invalid\n");
                 return;
         }
diff --git a/sound/soc/sof/ipc4-pcm.c b/sound/soc/sof/ipc4-pcm.c

index 85d3f390e4b290774687086f37b2a73473117e54..07eb5c6d4adf3246877e4881c1927a6a4f8c39ee 100644 (file)
--- a/sound/soc/sof/ipc4-pcm.c
+++ b/sound/soc/sof/ipc4-pcm.c
@@ -413,7 +413,18 @@ skip_pause_transition:
         ret = sof_ipc4_set_multi_pipeline_state(sdev, state, trigger_list);
         if (ret < 0) {
                 dev_err(sdev->dev, "failed to set final state %d for all pipelines\n", state);
-               goto free;
+               /*
+                * workaround: if the firmware is crashed while setting the
+                * pipelines to reset state we must ignore the error code and
+                * reset it to 0.
+                * Since the firmware is crashed we will not send IPC messages
+                * and we are going to see errors printed, but the state of the
+                * widgets will be correct for the next boot.
+                */
+               if (sdev->fw_state != SOF_FW_CRASHED || state != SOF_IPC4_PIPE_RESET)
+                       goto free;
+
+               ret = 0;
         }
  
         /* update RUNNING/RESET state for all pipelines that were just triggered */
diff --git a/sound/soc/sunxi/sun4i-spdif.c b/sound/soc/sunxi/sun4i-spdif.c

index 702386823d17263ffa6acacb6d0bd71adb7c83d9..f41c309558579f1c3c4b1d0e7bcca1b2e64d8747 100644 (file)
--- a/sound/soc/sunxi/sun4i-spdif.c
+++ b/sound/soc/sunxi/sun4i-spdif.c
@@ -577,6 +577,11 @@ static const struct of_device_id sun4i_spdif_of_match[] = {
                 .compatible = "allwinner,sun50i-h6-spdif",
                 .data = &sun50i_h6_spdif_quirks,
         },
+       {
+               .compatible = "allwinner,sun50i-h616-spdif",
+               /* Essentially the same as the H6, but without RX */
+               .data = &sun50i_h6_spdif_quirks,
+       },
         { /* sentinel */ }
  };
  MODULE_DEVICE_TABLE(of, sun4i_spdif_of_match);
diff --git a/sound/usb/clock.c b/sound/usb/clock.c

index 33db334e6556674414047b1a1d660ec3e8083100..60fcb872a80b6c1f79afcec88e959df33a04a4da 100644 (file)
--- a/sound/usb/clock.c
+++ b/sound/usb/clock.c
@@ -261,6 +261,8 @@ static int __uac_clock_find_source(struct snd_usb_audio *chip,
         int ret, i, cur, err, pins, clock_id;
         const u8 *sources;
         int proto = fmt->protocol;
+       bool readable, writeable;
+       u32 bmControls;
  
         entity_id &= 0xff;
  
@@ -292,11 +294,27 @@ static int __uac_clock_find_source(struct snd_usb_audio *chip,
                 sources = GET_VAL(selector, proto, baCSourceID);
                 cur = 0;
  
+               if (proto == UAC_VERSION_3)
+                       bmControls = le32_to_cpu(*(__le32 *)(&selector->v3.baCSourceID[0] + pins));
+               else
+                       bmControls = *(__u8 *)(&selector->v2.baCSourceID[0] + pins);
+
+               readable = uac_v2v3_control_is_readable(bmControls,
+                                                       UAC2_CX_CLOCK_SELECTOR);
+               writeable = uac_v2v3_control_is_writeable(bmControls,
+                                                         UAC2_CX_CLOCK_SELECTOR);
+
                 if (pins == 1) {
                         ret = 1;
                         goto find_source;
                 }
  
+               /* for now just warn about buggy device */
+               if (!readable)
+                       usb_audio_warn(chip,
+                               "%s(): clock selector control is not readable, id %d\n",
+                               __func__, clock_id);
+
                 /* the entity ID we are looking at is a selector.
                  * find out what it currently selects */
                 ret = uac_clock_selector_get_val(chip, clock_id);
@@ -325,17 +343,29 @@ static int __uac_clock_find_source(struct snd_usb_audio *chip,
                                               visited, validate);
                 if (ret > 0) {
                         /* Skip setting clock selector again for some devices */
-                       if (chip->quirk_flags & QUIRK_FLAG_SKIP_CLOCK_SELECTOR)
+                       if (chip->quirk_flags & QUIRK_FLAG_SKIP_CLOCK_SELECTOR ||
+                           !writeable)
                                 return ret;
                         err = uac_clock_selector_set_val(chip, entity_id, cur);
-                       if (err < 0)
+                       if (err < 0) {
+                               if (pins == 1) {
+                                       usb_audio_dbg(chip,
+                                                     "%s(): selector returned an error, "
+                                                     "assuming a firmware bug, id %d, ret %d\n",
+                                                     __func__, clock_id, err);
+                                       return ret;
+                               }
                                 return err;
+                       }
                 }
  
                 if (!validate || ret > 0 || !chip->autoclock)
                         return ret;
  
         find_others:
+               if (!writeable)
+                       return -ENXIO;
+
                 /* The current clock source is invalid, try others. */
                 for (i = 1; i <= pins; i++) {
                         if (i == cur)
diff --git a/sound/usb/format.c b/sound/usb/format.c

index ab5fed9f55b60ec8b255448a9cbb435f9e04d96b..3b45d0ee769389aafb3e752cec5b96223c52077b 100644 (file)
--- a/sound/usb/format.c
+++ b/sound/usb/format.c
@@ -470,9 +470,11 @@ static int validate_sample_rate_table_v2v3(struct snd_usb_audio *chip,
                                            int clock)
  {
         struct usb_device *dev = chip->dev;
+       struct usb_host_interface *alts;
         unsigned int *table;
         unsigned int nr_rates;
         int i, err;
+       u32 bmControls;
  
         /* performing the rate verification may lead to unexpected USB bus
          * behavior afterwards by some unknown reason.  Do this only for the
@@ -481,6 +483,24 @@ static int validate_sample_rate_table_v2v3(struct snd_usb_audio *chip,
         if (!(chip->quirk_flags & QUIRK_FLAG_VALIDATE_RATES))
                 return 0; /* don't perform the validation as default */
  
+       alts = snd_usb_get_host_interface(chip, fp->iface, fp->altsetting);
+       if (!alts)
+               return 0;
+
+       if (fp->protocol == UAC_VERSION_3) {
+               struct uac3_as_header_descriptor *as = snd_usb_find_csint_desc(
+                               alts->extra, alts->extralen, NULL, UAC_AS_GENERAL);
+               bmControls = le32_to_cpu(as->bmControls);
+       } else {
+               struct uac2_as_header_descriptor *as = snd_usb_find_csint_desc(
+                               alts->extra, alts->extralen, NULL, UAC_AS_GENERAL);
+               bmControls = as->bmControls;
+       }
+
+       if (!uac_v2v3_control_is_readable(bmControls,
+                               UAC2_AS_VAL_ALT_SETTINGS))
+               return 0;
+
         table = kcalloc(fp->nr_rates, sizeof(*table), GFP_KERNEL);
         if (!table)
                 return -ENOMEM;
diff --git a/sound/usb/midi.c b/sound/usb/midi.c

index 6b0993258e039b052b9196f3a2f43f720623f2ce..c1f2e5a03de969af932b3f8394332eb6f80983a3 100644 (file)
--- a/sound/usb/midi.c
+++ b/sound/usb/midi.c
@@ -1742,50 +1742,44 @@ static void snd_usbmidi_get_port_info(struct snd_rawmidi *rmidi, int number,
         }
  }
  
-static struct usb_midi_in_jack_descriptor *find_usb_in_jack_descriptor(
-                                       struct usb_host_interface *hostif, uint8_t jack_id)
+/* return iJack for the corresponding jackID */
+static int find_usb_ijack(struct usb_host_interface *hostif, uint8_t jack_id)
  {
         unsigned char *extra = hostif->extra;
         int extralen = hostif->extralen;
+       struct usb_descriptor_header *h;
+       struct usb_midi_out_jack_descriptor *outjd;
+       struct usb_midi_in_jack_descriptor *injd;
+       size_t sz;
  
         while (extralen > 4) {
-               struct usb_midi_in_jack_descriptor *injd =
-                               (struct usb_midi_in_jack_descriptor *)extra;
+               h = (struct usb_descriptor_header *)extra;
+               if (h->bDescriptorType != USB_DT_CS_INTERFACE)
+                       goto next;
  
+               outjd = (struct usb_midi_out_jack_descriptor *)h;
+               if (h->bLength >= sizeof(*outjd) &&
+                   outjd->bDescriptorSubtype == UAC_MIDI_OUT_JACK &&
+                   outjd->bJackID == jack_id) {
+                       sz = USB_DT_MIDI_OUT_SIZE(outjd->bNrInputPins);
+                       if (outjd->bLength < sz)
+                               goto next;
+                       return *(extra + sz - 1);
+               }
+
+               injd = (struct usb_midi_in_jack_descriptor *)h;
                 if (injd->bLength >= sizeof(*injd) &&
-                   injd->bDescriptorType == USB_DT_CS_INTERFACE &&
                     injd->bDescriptorSubtype == UAC_MIDI_IN_JACK &&
-                               injd->bJackID == jack_id)
-                       return injd;
-               if (!extra[0])
-                       break;
-               extralen -= extra[0];
-               extra += extra[0];
-       }
-       return NULL;
-}
-
-static struct usb_midi_out_jack_descriptor *find_usb_out_jack_descriptor(
-                                       struct usb_host_interface *hostif, uint8_t jack_id)
-{
-       unsigned char *extra = hostif->extra;
-       int extralen = hostif->extralen;
+                   injd->bJackID == jack_id)
+                       return injd->iJack;
  
-       while (extralen > 4) {
-               struct usb_midi_out_jack_descriptor *outjd =
-                               (struct usb_midi_out_jack_descriptor *)extra;
-
-               if (outjd->bLength >= sizeof(*outjd) &&
-                   outjd->bDescriptorType == USB_DT_CS_INTERFACE &&
-                   outjd->bDescriptorSubtype == UAC_MIDI_OUT_JACK &&
-                               outjd->bJackID == jack_id)
-                       return outjd;
+next:
                 if (!extra[0])
                         break;
                 extralen -= extra[0];
                 extra += extra[0];
         }
-       return NULL;
+       return 0;
  }
  
  static void snd_usbmidi_init_substream(struct snd_usb_midi *umidi,
@@ -1796,13 +1790,10 @@ static void snd_usbmidi_init_substream(struct snd_usb_midi *umidi,
         const char *name_format;
         struct usb_interface *intf;
         struct usb_host_interface *hostif;
-       struct usb_midi_in_jack_descriptor *injd;
-       struct usb_midi_out_jack_descriptor *outjd;
         uint8_t jack_name_buf[32];
         uint8_t *default_jack_name = "MIDI";
         uint8_t *jack_name = default_jack_name;
         uint8_t iJack;
-       size_t sz;
         int res;
  
         struct snd_rawmidi_substream *substream =
@@ -1816,21 +1807,7 @@ static void snd_usbmidi_init_substream(struct snd_usb_midi *umidi,
         intf = umidi->iface;
         if (intf && jack_id >= 0) {
                 hostif = intf->cur_altsetting;
-               iJack = 0;
-               if (stream != SNDRV_RAWMIDI_STREAM_OUTPUT) {
-                       /* in jacks connect to outs */
-                       outjd = find_usb_out_jack_descriptor(hostif, jack_id);
-                       if (outjd) {
-                               sz = USB_DT_MIDI_OUT_SIZE(outjd->bNrInputPins);
-                               if (outjd->bLength >= sz)
-                                       iJack = *(((uint8_t *) outjd) + sz - sizeof(uint8_t));
-                       }
-               } else {
-                       /* and out jacks connect to ins */
-                       injd = find_usb_in_jack_descriptor(hostif, jack_id);
-                       if (injd)
-                               iJack = injd->iJack;
-               }
+               iJack = find_usb_ijack(hostif, jack_id);
                 if (iJack != 0) {
                         res = usb_string(umidi->dev, iJack, jack_name_buf,
                           ARRAY_SIZE(jack_name_buf));
diff --git a/sound/usb/midi2.c b/sound/usb/midi2.c

index 1ec177fe284eddd7eb431d56083e82886c858550..820d3e4b672ab603b6f2cb91ba95d12b60d519f5 100644 (file)
--- a/sound/usb/midi2.c
+++ b/sound/usb/midi2.c
@@ -1085,7 +1085,7 @@ int snd_usb_midi_v2_create(struct snd_usb_audio *chip,
         }
         if ((quirk && quirk->type != QUIRK_MIDI_STANDARD_INTERFACE) ||
             iface->num_altsetting < 2) {
-               usb_audio_info(chip, "Quirk or no altest; falling back to MIDI 1.0\n");
+               usb_audio_info(chip, "Quirk or no altset; falling back to MIDI 1.0\n");
                 goto fallback_to_midi1;
         }
         hostif = &iface->altsetting[1];
diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c

index 07cc6a201579aa864f6ebd113d638cf4a36153d8..09712e61c606ef21c2b39bb80b8906a70f6ed4ff 100644 (file)
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c
@@ -2031,10 +2031,14 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = {
                    QUIRK_FLAG_CTL_MSG_DELAY_1M | QUIRK_FLAG_IGNORE_CTL_ERROR),
         DEVICE_FLG(0x0499, 0x1509, /* Steinberg UR22 */
                    QUIRK_FLAG_GENERIC_IMPLICIT_FB),
+       DEVICE_FLG(0x0499, 0x3108, /* Yamaha YIT-W12TX */
+                  QUIRK_FLAG_GET_SAMPLE_RATE),
         DEVICE_FLG(0x04d8, 0xfeea, /* Benchmark DAC1 Pre */
                    QUIRK_FLAG_GET_SAMPLE_RATE),
         DEVICE_FLG(0x04e8, 0xa051, /* Samsung USBC Headset (AKG) */
                    QUIRK_FLAG_SKIP_CLOCK_SELECTOR | QUIRK_FLAG_CTL_MSG_DELAY_5M),
+       DEVICE_FLG(0x0525, 0xa4ad, /* Hamedal C20 usb camero */
+                  QUIRK_FLAG_IFACE_SKIP_CLOSE),
         DEVICE_FLG(0x054c, 0x0b8c, /* Sony WALKMAN NW-A45 DAC */
                    QUIRK_FLAG_SET_IFACE_FIRST),
         DEVICE_FLG(0x0556, 0x0014, /* Phoenix Audio TMX320VC */
@@ -2073,14 +2077,22 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = {
                    QUIRK_FLAG_GENERIC_IMPLICIT_FB),
         DEVICE_FLG(0x0763, 0x2031, /* M-Audio Fast Track C600 */
                    QUIRK_FLAG_GENERIC_IMPLICIT_FB),
+       DEVICE_FLG(0x07fd, 0x000b, /* MOTU M Series 2nd hardware revision */
+                  QUIRK_FLAG_CTL_MSG_DELAY_1M),
         DEVICE_FLG(0x08bb, 0x2702, /* LineX FM Transmitter */
                    QUIRK_FLAG_IGNORE_CTL_ERROR),
         DEVICE_FLG(0x0951, 0x16ad, /* Kingston HyperX */
                    QUIRK_FLAG_CTL_MSG_DELAY_1M),
         DEVICE_FLG(0x0b0e, 0x0349, /* Jabra 550a */
                    QUIRK_FLAG_CTL_MSG_DELAY_1M),
+       DEVICE_FLG(0x0ecb, 0x205c, /* JBL Quantum610 Wireless */
+                  QUIRK_FLAG_FIXED_RATE),
+       DEVICE_FLG(0x0ecb, 0x2069, /* JBL Quantum810 Wireless */
+                  QUIRK_FLAG_FIXED_RATE),
         DEVICE_FLG(0x0fd9, 0x0008, /* Hauppauge HVR-950Q */
                    QUIRK_FLAG_SHARE_MEDIA_DEVICE | QUIRK_FLAG_ALIGN_TRANSFER),
+       DEVICE_FLG(0x1224, 0x2a25, /* Jieli Technology USB PHY 2.0 */
+                  QUIRK_FLAG_GET_SAMPLE_RATE),
         DEVICE_FLG(0x1395, 0x740a, /* Sennheiser DECT */
                    QUIRK_FLAG_GET_SAMPLE_RATE),
         DEVICE_FLG(0x1397, 0x0507, /* Behringer UMC202HD */
@@ -2113,6 +2125,10 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = {
                    QUIRK_FLAG_ITF_USB_DSD_DAC | QUIRK_FLAG_CTL_MSG_DELAY),
         DEVICE_FLG(0x1901, 0x0191, /* GE B850V3 CP2114 audio interface */
                    QUIRK_FLAG_GET_SAMPLE_RATE),
+       DEVICE_FLG(0x19f7, 0x0035, /* RODE NT-USB+ */
+                  QUIRK_FLAG_GET_SAMPLE_RATE),
+       DEVICE_FLG(0x1bcf, 0x2283, /* NexiGo N930AF FHD Webcam */
+                  QUIRK_FLAG_GET_SAMPLE_RATE),
         DEVICE_FLG(0x2040, 0x7200, /* Hauppauge HVR-950Q */
                    QUIRK_FLAG_SHARE_MEDIA_DEVICE | QUIRK_FLAG_ALIGN_TRANSFER),
         DEVICE_FLG(0x2040, 0x7201, /* Hauppauge HVR-950Q-MXL */
@@ -2155,6 +2171,12 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = {
                    QUIRK_FLAG_IGNORE_CTL_ERROR),
         DEVICE_FLG(0x2912, 0x30c8, /* Audioengine D1 */
                    QUIRK_FLAG_GET_SAMPLE_RATE),
+       DEVICE_FLG(0x2b53, 0x0023, /* Fiero SC-01 (firmware v1.0.0 @ 48 kHz) */
+                  QUIRK_FLAG_GENERIC_IMPLICIT_FB),
+       DEVICE_FLG(0x2b53, 0x0024, /* Fiero SC-01 (firmware v1.0.0 @ 96 kHz) */
+                  QUIRK_FLAG_GENERIC_IMPLICIT_FB),
+       DEVICE_FLG(0x2b53, 0x0031, /* Fiero SC-01 (firmware v1.1.0) */
+                  QUIRK_FLAG_GENERIC_IMPLICIT_FB),
         DEVICE_FLG(0x30be, 0x0101, /* Schiit Hel */
                    QUIRK_FLAG_IGNORE_CTL_ERROR),
         DEVICE_FLG(0x413c, 0xa506, /* Dell AE515 sound bar */
@@ -2163,22 +2185,6 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = {
                    QUIRK_FLAG_ALIGN_TRANSFER),
         DEVICE_FLG(0x534d, 0x2109, /* MacroSilicon MS2109 */
                    QUIRK_FLAG_ALIGN_TRANSFER),
-       DEVICE_FLG(0x1224, 0x2a25, /* Jieli Technology USB PHY 2.0 */
-                  QUIRK_FLAG_GET_SAMPLE_RATE),
-       DEVICE_FLG(0x2b53, 0x0023, /* Fiero SC-01 (firmware v1.0.0 @ 48 kHz) */
-                  QUIRK_FLAG_GENERIC_IMPLICIT_FB),
-       DEVICE_FLG(0x2b53, 0x0024, /* Fiero SC-01 (firmware v1.0.0 @ 96 kHz) */
-                  QUIRK_FLAG_GENERIC_IMPLICIT_FB),
-       DEVICE_FLG(0x2b53, 0x0031, /* Fiero SC-01 (firmware v1.1.0) */
-                  QUIRK_FLAG_GENERIC_IMPLICIT_FB),
-       DEVICE_FLG(0x0525, 0xa4ad, /* Hamedal C20 usb camero */
-                  QUIRK_FLAG_IFACE_SKIP_CLOSE),
-       DEVICE_FLG(0x0ecb, 0x205c, /* JBL Quantum610 Wireless */
-                  QUIRK_FLAG_FIXED_RATE),
-       DEVICE_FLG(0x0ecb, 0x2069, /* JBL Quantum810 Wireless */
-                  QUIRK_FLAG_FIXED_RATE),
-       DEVICE_FLG(0x1bcf, 0x2283, /* NexiGo N930AF FHD Webcam */
-                  QUIRK_FLAG_GET_SAMPLE_RATE),
  
         /* Vendor matches */
         VENDOR_FLG(0x045e, /* MS Lifecam */
diff --git a/sound/virtio/virtio_card.c b/sound/virtio/virtio_card.c

index e2847c040f750f98a77cb0bfe4ec2a548f5eb691..b158c3cb8e5f5fce75e22306c7707935465bc57f 100644 (file)
--- a/sound/virtio/virtio_card.c
+++ b/sound/virtio/virtio_card.c
@@ -91,8 +91,6 @@ static void virtsnd_event_notify_cb(struct virtqueue *vqueue)
                         virtsnd_event_dispatch(snd, event);
                         virtsnd_event_send(vqueue, event, true, GFP_ATOMIC);
                 }
-               if (unlikely(virtqueue_is_broken(vqueue)))
-                       break;
         } while (!virtqueue_enable_cb(vqueue));
         spin_unlock_irqrestore(&queue->lock, flags);
  }
diff --git a/sound/virtio/virtio_ctl_msg.c b/sound/virtio/virtio_ctl_msg.c

index 18dc5aca2e0c5b2a1e6c0d4391b34865b797995e..9dabea01277f845726ee2a908b9288c9f0e5e918 100644 (file)
--- a/sound/virtio/virtio_ctl_msg.c
+++ b/sound/virtio/virtio_ctl_msg.c
@@ -303,8 +303,6 @@ void virtsnd_ctl_notify_cb(struct virtqueue *vqueue)
                 virtqueue_disable_cb(vqueue);
                 while ((msg = virtqueue_get_buf(vqueue, &length)))
                         virtsnd_ctl_msg_complete(msg);
-               if (unlikely(virtqueue_is_broken(vqueue)))
-                       break;
         } while (!virtqueue_enable_cb(vqueue));
         spin_unlock_irqrestore(&queue->lock, flags);
  }
diff --git a/sound/virtio/virtio_pcm_msg.c b/sound/virtio/virtio_pcm_msg.c

index 542446c4c7ba8e4da2d7dd5b701c829e45c24084..8c32efaf4c5294e6aba0adcfb8a40a22d3a0d261 100644 (file)
--- a/sound/virtio/virtio_pcm_msg.c
+++ b/sound/virtio/virtio_pcm_msg.c
@@ -358,8 +358,6 @@ static inline void virtsnd_pcm_notify_cb(struct virtio_snd_queue *queue)
                 virtqueue_disable_cb(queue->vqueue);
                 while ((msg = virtqueue_get_buf(queue->vqueue, &written_bytes)))
                         virtsnd_pcm_msg_complete(msg, written_bytes);
-               if (unlikely(virtqueue_is_broken(queue->vqueue)))
-                       break;
         } while (!virtqueue_enable_cb(queue->vqueue));
         spin_unlock_irqrestore(&queue->lock, flags);
  }
diff --git a/tools/arch/riscv/include/asm/csr.h b/tools/arch/riscv/include/asm/csr.h

new file mode 100644 (file)

index 0000000..0dfc092
--- /dev/null
+++ b/tools/arch/riscv/include/asm/csr.h
@@ -0,0 +1,541 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2015 Regents of the University of California
+ */
+
+#ifndef _ASM_RISCV_CSR_H
+#define _ASM_RISCV_CSR_H
+
+#include <linux/bits.h>
+
+/* Status register flags */
+#define SR_SIE         _AC(0x00000002, UL) /* Supervisor Interrupt Enable */
+#define SR_MIE         _AC(0x00000008, UL) /* Machine Interrupt Enable */
+#define SR_SPIE                _AC(0x00000020, UL) /* Previous Supervisor IE */
+#define SR_MPIE                _AC(0x00000080, UL) /* Previous Machine IE */
+#define SR_SPP         _AC(0x00000100, UL) /* Previously Supervisor */
+#define SR_MPP         _AC(0x00001800, UL) /* Previously Machine */
+#define SR_SUM         _AC(0x00040000, UL) /* Supervisor User Memory Access */
+
+#define SR_FS          _AC(0x00006000, UL) /* Floating-point Status */
+#define SR_FS_OFF      _AC(0x00000000, UL)
+#define SR_FS_INITIAL  _AC(0x00002000, UL)
+#define SR_FS_CLEAN    _AC(0x00004000, UL)
+#define SR_FS_DIRTY    _AC(0x00006000, UL)
+
+#define SR_VS          _AC(0x00000600, UL) /* Vector Status */
+#define SR_VS_OFF      _AC(0x00000000, UL)
+#define SR_VS_INITIAL  _AC(0x00000200, UL)
+#define SR_VS_CLEAN    _AC(0x00000400, UL)
+#define SR_VS_DIRTY    _AC(0x00000600, UL)
+
+#define SR_XS          _AC(0x00018000, UL) /* Extension Status */
+#define SR_XS_OFF      _AC(0x00000000, UL)
+#define SR_XS_INITIAL  _AC(0x00008000, UL)
+#define SR_XS_CLEAN    _AC(0x00010000, UL)
+#define SR_XS_DIRTY    _AC(0x00018000, UL)
+
+#define SR_FS_VS       (SR_FS | SR_VS) /* Vector and Floating-Point Unit */
+
+#ifndef CONFIG_64BIT
+#define SR_SD          _AC(0x80000000, UL) /* FS/VS/XS dirty */
+#else
+#define SR_SD          _AC(0x8000000000000000, UL) /* FS/VS/XS dirty */
+#endif
+
+#ifdef CONFIG_64BIT
+#define SR_UXL         _AC(0x300000000, UL) /* XLEN mask for U-mode */
+#define SR_UXL_32      _AC(0x100000000, UL) /* XLEN = 32 for U-mode */
+#define SR_UXL_64      _AC(0x200000000, UL) /* XLEN = 64 for U-mode */
+#endif
+
+/* SATP flags */
+#ifndef CONFIG_64BIT
+#define SATP_PPN       _AC(0x003FFFFF, UL)
+#define SATP_MODE_32   _AC(0x80000000, UL)
+#define SATP_MODE_SHIFT        31
+#define SATP_ASID_BITS 9
+#define SATP_ASID_SHIFT        22
+#define SATP_ASID_MASK _AC(0x1FF, UL)
+#else
+#define SATP_PPN       _AC(0x00000FFFFFFFFFFF, UL)
+#define SATP_MODE_39   _AC(0x8000000000000000, UL)
+#define SATP_MODE_48   _AC(0x9000000000000000, UL)
+#define SATP_MODE_57   _AC(0xa000000000000000, UL)
+#define SATP_MODE_SHIFT        60
+#define SATP_ASID_BITS 16
+#define SATP_ASID_SHIFT        44
+#define SATP_ASID_MASK _AC(0xFFFF, UL)
+#endif
+
+/* Exception cause high bit - is an interrupt if set */
+#define CAUSE_IRQ_FLAG         (_AC(1, UL) << (__riscv_xlen - 1))
+
+/* Interrupt causes (minus the high bit) */
+#define IRQ_S_SOFT             1
+#define IRQ_VS_SOFT            2
+#define IRQ_M_SOFT             3
+#define IRQ_S_TIMER            5
+#define IRQ_VS_TIMER           6
+#define IRQ_M_TIMER            7
+#define IRQ_S_EXT              9
+#define IRQ_VS_EXT             10
+#define IRQ_M_EXT              11
+#define IRQ_S_GEXT             12
+#define IRQ_PMU_OVF            13
+#define IRQ_LOCAL_MAX          (IRQ_PMU_OVF + 1)
+#define IRQ_LOCAL_MASK         GENMASK((IRQ_LOCAL_MAX - 1), 0)
+
+/* Exception causes */
+#define EXC_INST_MISALIGNED    0
+#define EXC_INST_ACCESS                1
+#define EXC_INST_ILLEGAL       2
+#define EXC_BREAKPOINT         3
+#define EXC_LOAD_MISALIGNED    4
+#define EXC_LOAD_ACCESS                5
+#define EXC_STORE_MISALIGNED   6
+#define EXC_STORE_ACCESS       7
+#define EXC_SYSCALL            8
+#define EXC_HYPERVISOR_SYSCALL 9
+#define EXC_SUPERVISOR_SYSCALL 10
+#define EXC_INST_PAGE_FAULT    12
+#define EXC_LOAD_PAGE_FAULT    13
+#define EXC_STORE_PAGE_FAULT   15
+#define EXC_INST_GUEST_PAGE_FAULT      20
+#define EXC_LOAD_GUEST_PAGE_FAULT      21
+#define EXC_VIRTUAL_INST_FAULT         22
+#define EXC_STORE_GUEST_PAGE_FAULT     23
+
+/* PMP configuration */
+#define PMP_R                  0x01
+#define PMP_W                  0x02
+#define PMP_X                  0x04
+#define PMP_A                  0x18
+#define PMP_A_TOR              0x08
+#define PMP_A_NA4              0x10
+#define PMP_A_NAPOT            0x18
+#define PMP_L                  0x80
+
+/* HSTATUS flags */
+#ifdef CONFIG_64BIT
+#define HSTATUS_VSXL           _AC(0x300000000, UL)
+#define HSTATUS_VSXL_SHIFT     32
+#endif
+#define HSTATUS_VTSR           _AC(0x00400000, UL)
+#define HSTATUS_VTW            _AC(0x00200000, UL)
+#define HSTATUS_VTVM           _AC(0x00100000, UL)
+#define HSTATUS_VGEIN          _AC(0x0003f000, UL)
+#define HSTATUS_VGEIN_SHIFT    12
+#define HSTATUS_HU             _AC(0x00000200, UL)
+#define HSTATUS_SPVP           _AC(0x00000100, UL)
+#define HSTATUS_SPV            _AC(0x00000080, UL)
+#define HSTATUS_GVA            _AC(0x00000040, UL)
+#define HSTATUS_VSBE           _AC(0x00000020, UL)
+
+/* HGATP flags */
+#define HGATP_MODE_OFF         _AC(0, UL)
+#define HGATP_MODE_SV32X4      _AC(1, UL)
+#define HGATP_MODE_SV39X4      _AC(8, UL)
+#define HGATP_MODE_SV48X4      _AC(9, UL)
+#define HGATP_MODE_SV57X4      _AC(10, UL)
+
+#define HGATP32_MODE_SHIFT     31
+#define HGATP32_VMID_SHIFT     22
+#define HGATP32_VMID           GENMASK(28, 22)
+#define HGATP32_PPN            GENMASK(21, 0)
+
+#define HGATP64_MODE_SHIFT     60
+#define HGATP64_VMID_SHIFT     44
+#define HGATP64_VMID           GENMASK(57, 44)
+#define HGATP64_PPN            GENMASK(43, 0)
+
+#define HGATP_PAGE_SHIFT       12
+
+#ifdef CONFIG_64BIT
+#define HGATP_PPN              HGATP64_PPN
+#define HGATP_VMID_SHIFT       HGATP64_VMID_SHIFT
+#define HGATP_VMID             HGATP64_VMID
+#define HGATP_MODE_SHIFT       HGATP64_MODE_SHIFT
+#else
+#define HGATP_PPN              HGATP32_PPN
+#define HGATP_VMID_SHIFT       HGATP32_VMID_SHIFT
+#define HGATP_VMID             HGATP32_VMID
+#define HGATP_MODE_SHIFT       HGATP32_MODE_SHIFT
+#endif
+
+/* VSIP & HVIP relation */
+#define VSIP_TO_HVIP_SHIFT     (IRQ_VS_SOFT - IRQ_S_SOFT)
+#define VSIP_VALID_MASK                ((_AC(1, UL) << IRQ_S_SOFT) | \
+                                (_AC(1, UL) << IRQ_S_TIMER) | \
+                                (_AC(1, UL) << IRQ_S_EXT))
+
+/* AIA CSR bits */
+#define TOPI_IID_SHIFT         16
+#define TOPI_IID_MASK          GENMASK(11, 0)
+#define TOPI_IPRIO_MASK                GENMASK(7, 0)
+#define TOPI_IPRIO_BITS                8
+
+#define TOPEI_ID_SHIFT         16
+#define TOPEI_ID_MASK          GENMASK(10, 0)
+#define TOPEI_PRIO_MASK                GENMASK(10, 0)
+
+#define ISELECT_IPRIO0         0x30
+#define ISELECT_IPRIO15                0x3f
+#define ISELECT_MASK           GENMASK(8, 0)
+
+#define HVICTL_VTI             BIT(30)
+#define HVICTL_IID             GENMASK(27, 16)
+#define HVICTL_IID_SHIFT       16
+#define HVICTL_DPR             BIT(9)
+#define HVICTL_IPRIOM          BIT(8)
+#define HVICTL_IPRIO           GENMASK(7, 0)
+
+/* xENVCFG flags */
+#define ENVCFG_STCE                    (_AC(1, ULL) << 63)
+#define ENVCFG_PBMTE                   (_AC(1, ULL) << 62)
+#define ENVCFG_CBZE                    (_AC(1, UL) << 7)
+#define ENVCFG_CBCFE                   (_AC(1, UL) << 6)
+#define ENVCFG_CBIE_SHIFT              4
+#define ENVCFG_CBIE                    (_AC(0x3, UL) << ENVCFG_CBIE_SHIFT)
+#define ENVCFG_CBIE_ILL                        _AC(0x0, UL)
+#define ENVCFG_CBIE_FLUSH              _AC(0x1, UL)
+#define ENVCFG_CBIE_INV                        _AC(0x3, UL)
+#define ENVCFG_FIOM                    _AC(0x1, UL)
+
+/* Smstateen bits */
+#define SMSTATEEN0_AIA_IMSIC_SHIFT     58
+#define SMSTATEEN0_AIA_IMSIC           (_ULL(1) << SMSTATEEN0_AIA_IMSIC_SHIFT)
+#define SMSTATEEN0_AIA_SHIFT           59
+#define SMSTATEEN0_AIA                 (_ULL(1) << SMSTATEEN0_AIA_SHIFT)
+#define SMSTATEEN0_AIA_ISEL_SHIFT      60
+#define SMSTATEEN0_AIA_ISEL            (_ULL(1) << SMSTATEEN0_AIA_ISEL_SHIFT)
+#define SMSTATEEN0_HSENVCFG_SHIFT      62
+#define SMSTATEEN0_HSENVCFG            (_ULL(1) << SMSTATEEN0_HSENVCFG_SHIFT)
+#define SMSTATEEN0_SSTATEEN0_SHIFT     63
+#define SMSTATEEN0_SSTATEEN0           (_ULL(1) << SMSTATEEN0_SSTATEEN0_SHIFT)
+
+/* symbolic CSR names: */
+#define CSR_CYCLE              0xc00
+#define CSR_TIME               0xc01
+#define CSR_INSTRET            0xc02
+#define CSR_HPMCOUNTER3                0xc03
+#define CSR_HPMCOUNTER4                0xc04
+#define CSR_HPMCOUNTER5                0xc05
+#define CSR_HPMCOUNTER6                0xc06
+#define CSR_HPMCOUNTER7                0xc07
+#define CSR_HPMCOUNTER8                0xc08
+#define CSR_HPMCOUNTER9                0xc09
+#define CSR_HPMCOUNTER10       0xc0a
+#define CSR_HPMCOUNTER11       0xc0b
+#define CSR_HPMCOUNTER12       0xc0c
+#define CSR_HPMCOUNTER13       0xc0d
+#define CSR_HPMCOUNTER14       0xc0e
+#define CSR_HPMCOUNTER15       0xc0f
+#define CSR_HPMCOUNTER16       0xc10
+#define CSR_HPMCOUNTER17       0xc11
+#define CSR_HPMCOUNTER18       0xc12
+#define CSR_HPMCOUNTER19       0xc13
+#define CSR_HPMCOUNTER20       0xc14
+#define CSR_HPMCOUNTER21       0xc15
+#define CSR_HPMCOUNTER22       0xc16
+#define CSR_HPMCOUNTER23       0xc17
+#define CSR_HPMCOUNTER24       0xc18
+#define CSR_HPMCOUNTER25       0xc19
+#define CSR_HPMCOUNTER26       0xc1a
+#define CSR_HPMCOUNTER27       0xc1b
+#define CSR_HPMCOUNTER28       0xc1c
+#define CSR_HPMCOUNTER29       0xc1d
+#define CSR_HPMCOUNTER30       0xc1e
+#define CSR_HPMCOUNTER31       0xc1f
+#define CSR_CYCLEH             0xc80
+#define CSR_TIMEH              0xc81
+#define CSR_INSTRETH           0xc82
+#define CSR_HPMCOUNTER3H       0xc83
+#define CSR_HPMCOUNTER4H       0xc84
+#define CSR_HPMCOUNTER5H       0xc85
+#define CSR_HPMCOUNTER6H       0xc86
+#define CSR_HPMCOUNTER7H       0xc87
+#define CSR_HPMCOUNTER8H       0xc88
+#define CSR_HPMCOUNTER9H       0xc89
+#define CSR_HPMCOUNTER10H      0xc8a
+#define CSR_HPMCOUNTER11H      0xc8b
+#define CSR_HPMCOUNTER12H      0xc8c
+#define CSR_HPMCOUNTER13H      0xc8d
+#define CSR_HPMCOUNTER14H      0xc8e
+#define CSR_HPMCOUNTER15H      0xc8f
+#define CSR_HPMCOUNTER16H      0xc90
+#define CSR_HPMCOUNTER17H      0xc91
+#define CSR_HPMCOUNTER18H      0xc92
+#define CSR_HPMCOUNTER19H      0xc93
+#define CSR_HPMCOUNTER20H      0xc94
+#define CSR_HPMCOUNTER21H      0xc95
+#define CSR_HPMCOUNTER22H      0xc96
+#define CSR_HPMCOUNTER23H      0xc97
+#define CSR_HPMCOUNTER24H      0xc98
+#define CSR_HPMCOUNTER25H      0xc99
+#define CSR_HPMCOUNTER26H      0xc9a
+#define CSR_HPMCOUNTER27H      0xc9b
+#define CSR_HPMCOUNTER28H      0xc9c
+#define CSR_HPMCOUNTER29H      0xc9d
+#define CSR_HPMCOUNTER30H      0xc9e
+#define CSR_HPMCOUNTER31H      0xc9f
+
+#define CSR_SSCOUNTOVF         0xda0
+
+#define CSR_SSTATUS            0x100
+#define CSR_SIE                        0x104
+#define CSR_STVEC              0x105
+#define CSR_SCOUNTEREN         0x106
+#define CSR_SENVCFG            0x10a
+#define CSR_SSTATEEN0          0x10c
+#define CSR_SSCRATCH           0x140
+#define CSR_SEPC               0x141
+#define CSR_SCAUSE             0x142
+#define CSR_STVAL              0x143
+#define CSR_SIP                        0x144
+#define CSR_SATP               0x180
+
+#define CSR_STIMECMP           0x14D
+#define CSR_STIMECMPH          0x15D
+
+/* Supervisor-Level Window to Indirectly Accessed Registers (AIA) */
+#define CSR_SISELECT           0x150
+#define CSR_SIREG              0x151
+
+/* Supervisor-Level Interrupts (AIA) */
+#define CSR_STOPEI             0x15c
+#define CSR_STOPI              0xdb0
+
+/* Supervisor-Level High-Half CSRs (AIA) */
+#define CSR_SIEH               0x114
+#define CSR_SIPH               0x154
+
+#define CSR_VSSTATUS           0x200
+#define CSR_VSIE               0x204
+#define CSR_VSTVEC             0x205
+#define CSR_VSSCRATCH          0x240
+#define CSR_VSEPC              0x241
+#define CSR_VSCAUSE            0x242
+#define CSR_VSTVAL             0x243
+#define CSR_VSIP               0x244
+#define CSR_VSATP              0x280
+#define CSR_VSTIMECMP          0x24D
+#define CSR_VSTIMECMPH         0x25D
+
+#define CSR_HSTATUS            0x600
+#define CSR_HEDELEG            0x602
+#define CSR_HIDELEG            0x603
+#define CSR_HIE                        0x604
+#define CSR_HTIMEDELTA         0x605
+#define CSR_HCOUNTEREN         0x606
+#define CSR_HGEIE              0x607
+#define CSR_HENVCFG            0x60a
+#define CSR_HTIMEDELTAH                0x615
+#define CSR_HENVCFGH           0x61a
+#define CSR_HTVAL              0x643
+#define CSR_HIP                        0x644
+#define CSR_HVIP               0x645
+#define CSR_HTINST             0x64a
+#define CSR_HGATP              0x680
+#define CSR_HGEIP              0xe12
+
+/* Virtual Interrupts and Interrupt Priorities (H-extension with AIA) */
+#define CSR_HVIEN              0x608
+#define CSR_HVICTL             0x609
+#define CSR_HVIPRIO1           0x646
+#define CSR_HVIPRIO2           0x647
+
+/* VS-Level Window to Indirectly Accessed Registers (H-extension with AIA) */
+#define CSR_VSISELECT          0x250
+#define CSR_VSIREG             0x251
+
+/* VS-Level Interrupts (H-extension with AIA) */
+#define CSR_VSTOPEI            0x25c
+#define CSR_VSTOPI             0xeb0
+
+/* Hypervisor and VS-Level High-Half CSRs (H-extension with AIA) */
+#define CSR_HIDELEGH           0x613
+#define CSR_HVIENH             0x618
+#define CSR_HVIPH              0x655
+#define CSR_HVIPRIO1H          0x656
+#define CSR_HVIPRIO2H          0x657
+#define CSR_VSIEH              0x214
+#define CSR_VSIPH              0x254
+
+/* Hypervisor stateen CSRs */
+#define CSR_HSTATEEN0          0x60c
+#define CSR_HSTATEEN0H         0x61c
+
+#define CSR_MSTATUS            0x300
+#define CSR_MISA               0x301
+#define CSR_MIDELEG            0x303
+#define CSR_MIE                        0x304
+#define CSR_MTVEC              0x305
+#define CSR_MENVCFG            0x30a
+#define CSR_MENVCFGH           0x31a
+#define CSR_MSCRATCH           0x340
+#define CSR_MEPC               0x341
+#define CSR_MCAUSE             0x342
+#define CSR_MTVAL              0x343
+#define CSR_MIP                        0x344
+#define CSR_PMPCFG0            0x3a0
+#define CSR_PMPADDR0           0x3b0
+#define CSR_MVENDORID          0xf11
+#define CSR_MARCHID            0xf12
+#define CSR_MIMPID             0xf13
+#define CSR_MHARTID            0xf14
+
+/* Machine-Level Window to Indirectly Accessed Registers (AIA) */
+#define CSR_MISELECT           0x350
+#define CSR_MIREG              0x351
+
+/* Machine-Level Interrupts (AIA) */
+#define CSR_MTOPEI             0x35c
+#define CSR_MTOPI              0xfb0
+
+/* Virtual Interrupts for Supervisor Level (AIA) */
+#define CSR_MVIEN              0x308
+#define CSR_MVIP               0x309
+
+/* Machine-Level High-Half CSRs (AIA) */
+#define CSR_MIDELEGH           0x313
+#define CSR_MIEH               0x314
+#define CSR_MVIENH             0x318
+#define CSR_MVIPH              0x319
+#define CSR_MIPH               0x354
+
+#define CSR_VSTART             0x8
+#define CSR_VCSR               0xf
+#define CSR_VL                 0xc20
+#define CSR_VTYPE              0xc21
+#define CSR_VLENB              0xc22
+
+#ifdef CONFIG_RISCV_M_MODE
+# define CSR_STATUS    CSR_MSTATUS
+# define CSR_IE                CSR_MIE
+# define CSR_TVEC      CSR_MTVEC
+# define CSR_SCRATCH   CSR_MSCRATCH
+# define CSR_EPC       CSR_MEPC
+# define CSR_CAUSE     CSR_MCAUSE
+# define CSR_TVAL      CSR_MTVAL
+# define CSR_IP                CSR_MIP
+
+# define CSR_IEH               CSR_MIEH
+# define CSR_ISELECT   CSR_MISELECT
+# define CSR_IREG      CSR_MIREG
+# define CSR_IPH               CSR_MIPH
+# define CSR_TOPEI     CSR_MTOPEI
+# define CSR_TOPI      CSR_MTOPI
+
+# define SR_IE         SR_MIE
+# define SR_PIE                SR_MPIE
+# define SR_PP         SR_MPP
+
+# define RV_IRQ_SOFT           IRQ_M_SOFT
+# define RV_IRQ_TIMER  IRQ_M_TIMER
+# define RV_IRQ_EXT            IRQ_M_EXT
+#else /* CONFIG_RISCV_M_MODE */
+# define CSR_STATUS    CSR_SSTATUS
+# define CSR_IE                CSR_SIE
+# define CSR_TVEC      CSR_STVEC
+# define CSR_SCRATCH   CSR_SSCRATCH
+# define CSR_EPC       CSR_SEPC
+# define CSR_CAUSE     CSR_SCAUSE
+# define CSR_TVAL      CSR_STVAL
+# define CSR_IP                CSR_SIP
+
+# define CSR_IEH               CSR_SIEH
+# define CSR_ISELECT   CSR_SISELECT
+# define CSR_IREG      CSR_SIREG
+# define CSR_IPH               CSR_SIPH
+# define CSR_TOPEI     CSR_STOPEI
+# define CSR_TOPI      CSR_STOPI
+
+# define SR_IE         SR_SIE
+# define SR_PIE                SR_SPIE
+# define SR_PP         SR_SPP
+
+# define RV_IRQ_SOFT           IRQ_S_SOFT
+# define RV_IRQ_TIMER  IRQ_S_TIMER
+# define RV_IRQ_EXT            IRQ_S_EXT
+# define RV_IRQ_PMU    IRQ_PMU_OVF
+# define SIP_LCOFIP     (_AC(0x1, UL) << IRQ_PMU_OVF)
+
+#endif /* !CONFIG_RISCV_M_MODE */
+
+/* IE/IP (Supervisor/Machine Interrupt Enable/Pending) flags */
+#define IE_SIE         (_AC(0x1, UL) << RV_IRQ_SOFT)
+#define IE_TIE         (_AC(0x1, UL) << RV_IRQ_TIMER)
+#define IE_EIE         (_AC(0x1, UL) << RV_IRQ_EXT)
+
+#ifdef __ASSEMBLY__
+#define __ASM_STR(x)    x
+#else
+#define __ASM_STR(x)    #x
+#endif
+
+#ifndef __ASSEMBLY__
+
+#define csr_swap(csr, val)                                     \
+({                                                             \
+       unsigned long __v = (unsigned long)(val);               \
+       __asm__ __volatile__ ("csrrw %0, " __ASM_STR(csr) ", %1"\
+                             : "=r" (__v) : "rK" (__v)         \
+                             : "memory");                      \
+       __v;                                                    \
+})
+
+#define csr_read(csr)                                          \
+({                                                             \
+       register unsigned long __v;                             \
+       __asm__ __volatile__ ("csrr %0, " __ASM_STR(csr)        \
+                             : "=r" (__v) :                    \
+                             : "memory");                      \
+       __v;                                                    \
+})
+
+#define csr_write(csr, val)                                    \
+({                                                             \
+       unsigned long __v = (unsigned long)(val);               \
+       __asm__ __volatile__ ("csrw " __ASM_STR(csr) ", %0"     \
+                             : : "rK" (__v)                    \
+                             : "memory");                      \
+})
+
+#define csr_read_set(csr, val)                                 \
+({                                                             \
+       unsigned long __v = (unsigned long)(val);               \
+       __asm__ __volatile__ ("csrrs %0, " __ASM_STR(csr) ", %1"\
+                             : "=r" (__v) : "rK" (__v)         \
+                             : "memory");                      \
+       __v;                                                    \
+})
+
+#define csr_set(csr, val)                                      \
+({                                                             \
+       unsigned long __v = (unsigned long)(val);               \
+       __asm__ __volatile__ ("csrs " __ASM_STR(csr) ", %0"     \
+                             : : "rK" (__v)                    \
+                             : "memory");                      \
+})
+
+#define csr_read_clear(csr, val)                               \
+({                                                             \
+       unsigned long __v = (unsigned long)(val);               \
+       __asm__ __volatile__ ("csrrc %0, " __ASM_STR(csr) ", %1"\
+                             : "=r" (__v) : "rK" (__v)         \
+                             : "memory");                      \
+       __v;                                                    \
+})
+
+#define csr_clear(csr, val)                                    \
+({                                                             \
+       unsigned long __v = (unsigned long)(val);               \
+       __asm__ __volatile__ ("csrc " __ASM_STR(csr) ", %0"     \
+                             : : "rK" (__v)                    \
+                             : "memory");                      \
+})
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_RISCV_CSR_H */
diff --git a/tools/arch/riscv/include/asm/vdso/processor.h b/tools/arch/riscv/include/asm/vdso/processor.h

new file mode 100644 (file)

index 0000000..662aca0
--- /dev/null
+++ b/tools/arch/riscv/include/asm/vdso/processor.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __ASM_VDSO_PROCESSOR_H
+#define __ASM_VDSO_PROCESSOR_H
+
+#ifndef __ASSEMBLY__
+
+#include <asm-generic/barrier.h>
+
+static inline void cpu_relax(void)
+{
+#ifdef __riscv_muldiv
+       int dummy;
+       /* In lieu of a halt instruction, induce a long-latency stall. */
+       __asm__ __volatile__ ("div %0, %0, zero" : "=r" (dummy));
+#endif
+
+#ifdef CONFIG_TOOLCHAIN_HAS_ZIHINTPAUSE
+       /*
+        * Reduce instruction retirement.
+        * This assumes the PC changes.
+        */
+       __asm__ __volatile__ ("pause");
+#else
+       /* Encoding of the pause instruction */
+       __asm__ __volatile__ (".4byte 0x100000F");
+#endif
+       barrier();
+}
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __ASM_VDSO_PROCESSOR_H */
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h

index f4542d2718f4f635ce8879da123764e72e9af47b..29cb275a219d7fb38fa0d16e6ba48e91c9d032b4 100644 (file)
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -198,6 +198,7 @@
  #define X86_FEATURE_CAT_L3             ( 7*32+ 4) /* Cache Allocation Technology L3 */
  #define X86_FEATURE_CAT_L2             ( 7*32+ 5) /* Cache Allocation Technology L2 */
  #define X86_FEATURE_CDP_L3             ( 7*32+ 6) /* Code and Data Prioritization L3 */
+#define X86_FEATURE_TDX_HOST_PLATFORM  ( 7*32+ 7) /* Platform supports being a TDX host */
  #define X86_FEATURE_HW_PSTATE          ( 7*32+ 8) /* AMD HW-PState */
  #define X86_FEATURE_PROC_FEEDBACK      ( 7*32+ 9) /* AMD ProcFeedbackInterface */
  #define X86_FEATURE_XCOMPACTED         ( 7*32+10) /* "" Use compacted XSTATE (XSAVES or XSAVEC) */
@@ -308,10 +309,14 @@
  #define X86_FEATURE_SMBA               (11*32+21) /* "" Slow Memory Bandwidth Allocation */
  #define X86_FEATURE_BMEC               (11*32+22) /* "" Bandwidth Monitoring Event Configuration */
  #define X86_FEATURE_USER_SHSTK         (11*32+23) /* Shadow stack support for user mode applications */
-
  #define X86_FEATURE_SRSO               (11*32+24) /* "" AMD BTB untrain RETs */
  #define X86_FEATURE_SRSO_ALIAS         (11*32+25) /* "" AMD BTB untrain RETs through aliasing */
  #define X86_FEATURE_IBPB_ON_VMEXIT     (11*32+26) /* "" Issue an IBPB only on VMEXIT */
+#define X86_FEATURE_APIC_MSRS_FENCE    (11*32+27) /* "" IA32_TSC_DEADLINE and X2APIC MSRs need fencing */
+#define X86_FEATURE_ZEN2               (11*32+28) /* "" CPU based on Zen2 microarchitecture */
+#define X86_FEATURE_ZEN3               (11*32+29) /* "" CPU based on Zen3 microarchitecture */
+#define X86_FEATURE_ZEN4               (11*32+30) /* "" CPU based on Zen4 microarchitecture */
+#define X86_FEATURE_ZEN1               (11*32+31) /* "" CPU based on Zen1 microarchitecture */
  
  /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
  #define X86_FEATURE_AVX_VNNI           (12*32+ 4) /* AVX VNNI instructions */
@@ -495,6 +500,7 @@
  #define X86_BUG_EIBRS_PBRSB            X86_BUG(28) /* EIBRS is vulnerable to Post Barrier RSB Predictions */
  #define X86_BUG_SMT_RSB                        X86_BUG(29) /* CPU is vulnerable to Cross-Thread Return Address Predictions */
  #define X86_BUG_GDS                    X86_BUG(30) /* CPU is affected by Gather Data Sampling */
+#define X86_BUG_TDX_PW_MCE             X86_BUG(31) /* CPU may incur #MC if non-TD software does partial write to TDX private memory */
  
  /* BUG word 2 */
  #define X86_BUG_SRSO                   X86_BUG(1*32 + 0) /* AMD SRSO bug */
diff --git a/tools/arch/x86/include/asm/irq_vectors.h b/tools/arch/x86/include/asm/irq_vectors.h

index 3a19904c2db6935fda03c0a7c9eeaa47e62f823c..3f73ac3ed3a0709a700ae927bca01069a1910665 100644 (file)
--- a/tools/arch/x86/include/asm/irq_vectors.h
+++ b/tools/arch/x86/include/asm/irq_vectors.h
@@ -84,7 +84,7 @@
  #define HYPERVISOR_CALLBACK_VECTOR     0xf3
  
  /* Vector for KVM to deliver posted interrupt IPI */
-#ifdef CONFIG_HAVE_KVM
+#if IS_ENABLED(CONFIG_KVM)
  #define POSTED_INTR_VECTOR             0xf2
  #define POSTED_INTR_WAKEUP_VECTOR      0xf1
  #define POSTED_INTR_NESTED_VECTOR      0xf0
diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h

index 1d51e1850ed03d46e84c71de0c451067d0baac5b..f1bd7b91b3c63735738825f15cd3c82fca7579ce 100644 (file)
--- a/tools/arch/x86/include/asm/msr-index.h
+++ b/tools/arch/x86/include/asm/msr-index.h
@@ -237,6 +237,11 @@
  #define LBR_INFO_CYCLES                        0xffff
  #define LBR_INFO_BR_TYPE_OFFSET                56
  #define LBR_INFO_BR_TYPE               (0xfull << LBR_INFO_BR_TYPE_OFFSET)
+#define LBR_INFO_BR_CNTR_OFFSET                32
+#define LBR_INFO_BR_CNTR_NUM           4
+#define LBR_INFO_BR_CNTR_BITS          2
+#define LBR_INFO_BR_CNTR_MASK          GENMASK_ULL(LBR_INFO_BR_CNTR_BITS - 1, 0)
+#define LBR_INFO_BR_CNTR_FULL_MASK     GENMASK_ULL(LBR_INFO_BR_CNTR_NUM * LBR_INFO_BR_CNTR_BITS - 1, 0)
  
  #define MSR_ARCH_LBR_CTL               0x000014ce
  #define ARCH_LBR_CTL_LBREN             BIT(0)
@@ -536,6 +541,9 @@
  #define MSR_RELOAD_PMC0                        0x000014c1
  #define MSR_RELOAD_FIXED_CTR0          0x00001309
  
+/* KeyID partitioning between MKTME and TDX */
+#define MSR_IA32_MKTME_KEYID_PARTITIONING      0x00000087
+
  /*
   * AMD64 MSRs. Not complete. See the architecture manual for a more
   * complete list.
diff --git a/tools/arch/x86/include/asm/rmwcc.h b/tools/arch/x86/include/asm/rmwcc.h

index 11ff975242cac7cff4dfaab3a4591dc4cb82eb1d..e2ff22b379a44c584b7325249c48db9b3368c7d8 100644 (file)
--- a/tools/arch/x86/include/asm/rmwcc.h
+++ b/tools/arch/x86/include/asm/rmwcc.h
@@ -4,7 +4,7 @@
  
  #define __GEN_RMWcc(fullop, var, cc, ...)                              \
  do {                                                                   \
-       asm_volatile_goto (fullop "; j" cc " %l[cc_label]"              \
+       asm goto (fullop "; j" cc " %l[cc_label]"               \
                         : : "m" (var), ## __VA_ARGS__                   \
                         : "memory" : cc_label);                         \
         return 0;                                                       \
diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h

index 1a6a1f98794967d260e2898b0dbb62f830d45664..a448d0964fc06ebd0c15cd0b550e3c2cefbf57bf 100644 (file)
--- a/tools/arch/x86/include/uapi/asm/kvm.h
+++ b/tools/arch/x86/include/uapi/asm/kvm.h
@@ -562,4 +562,7 @@ struct kvm_pmu_event_filter {
  /* x86-specific KVM_EXIT_HYPERCALL flags. */
  #define KVM_EXIT_HYPERCALL_LONG_MODE   BIT(0)
  
+#define KVM_X86_DEFAULT_VM     0
+#define KVM_X86_SW_PROTECTED_VM        1
+
  #endif /* _ASM_X86_KVM_H */
diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S

index d055b82d22ccd083975a874e5a96abbeff8f496b..59cf6f9065aa84d8a4a6a92999f3d3d1f3367681 100644 (file)
--- a/tools/arch/x86/lib/memcpy_64.S
+++ b/tools/arch/x86/lib/memcpy_64.S
@@ -1,11 +1,11 @@
  /* SPDX-License-Identifier: GPL-2.0-only */
  /* Copyright 2002 Andi Kleen */
  
+#include <linux/export.h>
  #include <linux/linkage.h>
  #include <asm/errno.h>
  #include <asm/cpufeatures.h>
  #include <asm/alternative.h>
-#include <asm/export.h>
  
  .section .noinstr.text, "ax"
  
@@ -39,7 +39,7 @@ SYM_TYPED_FUNC_START(__memcpy)
  SYM_FUNC_END(__memcpy)
  EXPORT_SYMBOL(__memcpy)
  
-SYM_FUNC_ALIAS(memcpy, __memcpy)
+SYM_FUNC_ALIAS_MEMFUNC(memcpy, __memcpy)
  EXPORT_SYMBOL(memcpy)
  
  SYM_FUNC_START_LOCAL(memcpy_orig)
diff --git a/tools/arch/x86/lib/memset_64.S b/tools/arch/x86/lib/memset_64.S

index 7c59a704c4584bf7ef3e6a50f2021c31e6f15029..0199d56cb479d88ce0bc6556c092ea87ae9ceb3b 100644 (file)
--- a/tools/arch/x86/lib/memset_64.S
+++ b/tools/arch/x86/lib/memset_64.S
@@ -1,10 +1,10 @@
  /* SPDX-License-Identifier: GPL-2.0 */
  /* Copyright 2002 Andi Kleen, SuSE Labs */
  
+#include <linux/export.h>
  #include <linux/linkage.h>
  #include <asm/cpufeatures.h>
  #include <asm/alternative.h>
-#include <asm/export.h>
  
  .section .noinstr.text, "ax"
  
@@ -40,7 +40,7 @@ SYM_FUNC_START(__memset)
  SYM_FUNC_END(__memset)
  EXPORT_SYMBOL(__memset)
  
-SYM_FUNC_ALIAS(memset, __memset)
+SYM_FUNC_ALIAS_MEMFUNC(memset, __memset)
  EXPORT_SYMBOL(memset)
  
  SYM_FUNC_START_LOCAL(memset_orig)
diff --git a/tools/include/asm-generic/unaligned.h b/tools/include/asm-generic/unaligned.h

index 2fd551915c2025ee7d7adc53f30e44e7b6bf01c1..cdd2fd078027afc99a21a7edb2fa097caf6e4a92 100644 (file)
--- a/tools/include/asm-generic/unaligned.h
+++ b/tools/include/asm-generic/unaligned.h
@@ -105,9 +105,9 @@ static inline u32 get_unaligned_le24(const void *p)
  
  static inline void __put_unaligned_be24(const u32 val, u8 *p)
  {
-       *p++ = val >> 16;
-       *p++ = val >> 8;
-       *p++ = val;
+       *p++ = (val >> 16) & 0xff;
+       *p++ = (val >> 8) & 0xff;
+       *p++ = val & 0xff;
  }
  
  static inline void put_unaligned_be24(const u32 val, void *p)
@@ -117,9 +117,9 @@ static inline void put_unaligned_be24(const u32 val, void *p)
  
  static inline void __put_unaligned_le24(const u32 val, u8 *p)
  {
-       *p++ = val;
-       *p++ = val >> 8;
-       *p++ = val >> 16;
+       *p++ = val & 0xff;
+       *p++ = (val >> 8) & 0xff;
+       *p++ = (val >> 16) & 0xff;
  }
  
  static inline void put_unaligned_le24(const u32 val, void *p)
@@ -129,12 +129,12 @@ static inline void put_unaligned_le24(const u32 val, void *p)
  
  static inline void __put_unaligned_be48(const u64 val, u8 *p)
  {
-       *p++ = val >> 40;
-       *p++ = val >> 32;
-       *p++ = val >> 24;
-       *p++ = val >> 16;
-       *p++ = val >> 8;
-       *p++ = val;
+       *p++ = (val >> 40) & 0xff;
+       *p++ = (val >> 32) & 0xff;
+       *p++ = (val >> 24) & 0xff;
+       *p++ = (val >> 16) & 0xff;
+       *p++ = (val >> 8) & 0xff;
+       *p++ = val & 0xff;
  }
  
  static inline void put_unaligned_be48(const u64 val, void *p)
diff --git a/tools/include/linux/compiler_types.h b/tools/include/linux/compiler_types.h

index 1bdd834bdd57198059c91222036314403191cdbc..d09f9dc172a486875e2e62cf8550a69f24c9beed 100644 (file)
--- a/tools/include/linux/compiler_types.h
+++ b/tools/include/linux/compiler_types.h
@@ -36,8 +36,8 @@
  #include <linux/compiler-gcc.h>
  #endif
  
-#ifndef asm_volatile_goto
-#define asm_volatile_goto(x...) asm goto(x)
+#ifndef asm_goto_output
+#define asm_goto_output(x...) asm goto(x)
  #endif
  
  #endif /* __LINUX_COMPILER_TYPES_H */
diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h

index 756b013fb8324bd7a320e60cebec2ca692faa149..75f00965ab1586cd64d00928217596de5034bd25 100644 (file)
--- a/tools/include/uapi/asm-generic/unistd.h
+++ b/tools/include/uapi/asm-generic/unistd.h
@@ -829,8 +829,21 @@ __SYSCALL(__NR_futex_wait, sys_futex_wait)
  #define __NR_futex_requeue 456
  __SYSCALL(__NR_futex_requeue, sys_futex_requeue)
  
+#define __NR_statmount   457
+__SYSCALL(__NR_statmount, sys_statmount)
+
+#define __NR_listmount   458
+__SYSCALL(__NR_listmount, sys_listmount)
+
+#define __NR_lsm_get_self_attr 459
+__SYSCALL(__NR_lsm_get_self_attr, sys_lsm_get_self_attr)
+#define __NR_lsm_set_self_attr 460
+__SYSCALL(__NR_lsm_set_self_attr, sys_lsm_set_self_attr)
+#define __NR_lsm_list_modules 461
+__SYSCALL(__NR_lsm_list_modules, sys_lsm_list_modules)
+
  #undef __NR_syscalls
-#define __NR_syscalls 457
+#define __NR_syscalls 462
  
  /*
   * 32 bit systems traditionally used different
diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h

index de723566c5ae82382192923e17478209f7c94f41..16122819edfeff872b91d989d1f6267640ae1391 100644 (file)
--- a/tools/include/uapi/drm/drm.h
+++ b/tools/include/uapi/drm/drm.h
@@ -713,7 +713,8 @@ struct drm_gem_open {
  /**
   * DRM_CAP_ASYNC_PAGE_FLIP
   *
- * If set to 1, the driver supports &DRM_MODE_PAGE_FLIP_ASYNC.
+ * If set to 1, the driver supports &DRM_MODE_PAGE_FLIP_ASYNC for legacy
+ * page-flips.
   */
  #define DRM_CAP_ASYNC_PAGE_FLIP                0x7
  /**
@@ -773,6 +774,13 @@ struct drm_gem_open {
   * :ref:`drm_sync_objects`.
   */
  #define DRM_CAP_SYNCOBJ_TIMELINE       0x14
+/**
+ * DRM_CAP_ATOMIC_ASYNC_PAGE_FLIP
+ *
+ * If set to 1, the driver supports &DRM_MODE_PAGE_FLIP_ASYNC for atomic
+ * commits.
+ */
+#define DRM_CAP_ATOMIC_ASYNC_PAGE_FLIP 0x15
  
  /* DRM_IOCTL_GET_CAP ioctl argument type */
  struct drm_get_cap {
@@ -842,6 +850,31 @@ struct drm_get_cap {
   */
  #define DRM_CLIENT_CAP_WRITEBACK_CONNECTORS    5
  
+/**
+ * DRM_CLIENT_CAP_CURSOR_PLANE_HOTSPOT
+ *
+ * Drivers for para-virtualized hardware (e.g. vmwgfx, qxl, virtio and
+ * virtualbox) have additional restrictions for cursor planes (thus
+ * making cursor planes on those drivers not truly universal,) e.g.
+ * they need cursor planes to act like one would expect from a mouse
+ * cursor and have correctly set hotspot properties.
+ * If this client cap is not set the DRM core will hide cursor plane on
+ * those virtualized drivers because not setting it implies that the
+ * client is not capable of dealing with those extra restictions.
+ * Clients which do set cursor hotspot and treat the cursor plane
+ * like a mouse cursor should set this property.
+ * The client must enable &DRM_CLIENT_CAP_ATOMIC first.
+ *
+ * Setting this property on drivers which do not special case
+ * cursor planes (i.e. non-virtualized drivers) will return
+ * EOPNOTSUPP, which can be used by userspace to gauge
+ * requirements of the hardware/drivers they're running on.
+ *
+ * This capability is always supported for atomic-capable virtualized
+ * drivers starting from kernel version 6.6.
+ */
+#define DRM_CLIENT_CAP_CURSOR_PLANE_HOTSPOT    6
+
  /* DRM_IOCTL_SET_CLIENT_CAP ioctl argument type */
  struct drm_set_client_cap {
         __u64 capability;
@@ -893,6 +926,7 @@ struct drm_syncobj_transfer {
  #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0)
  #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1)
  #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE (1 << 2) /* wait for time point to become available */
+#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_DEADLINE (1 << 3) /* set fence deadline to deadline_nsec */
  struct drm_syncobj_wait {
         __u64 handles;
         /* absolute timeout */
@@ -901,6 +935,14 @@ struct drm_syncobj_wait {
         __u32 flags;
         __u32 first_signaled; /* only valid when not waiting all */
         __u32 pad;
+       /**
+        * @deadline_nsec - fence deadline hint
+        *
+        * Deadline hint, in absolute CLOCK_MONOTONIC, to set on backing
+        * fence(s) if the DRM_SYNCOBJ_WAIT_FLAGS_WAIT_DEADLINE flag is
+        * set.
+        */
+       __u64 deadline_nsec;
  };
  
  struct drm_syncobj_timeline_wait {
@@ -913,6 +955,14 @@ struct drm_syncobj_timeline_wait {
         __u32 flags;
         __u32 first_signaled; /* only valid when not waiting all */
         __u32 pad;
+       /**
+        * @deadline_nsec - fence deadline hint
+        *
+        * Deadline hint, in absolute CLOCK_MONOTONIC, to set on backing
+        * fence(s) if the DRM_SYNCOBJ_WAIT_FLAGS_WAIT_DEADLINE flag is
+        * set.
+        */
+       __u64 deadline_nsec;
  };
  
  /**
@@ -1218,6 +1268,26 @@ extern "C" {
  
  #define DRM_IOCTL_SYNCOBJ_EVENTFD      DRM_IOWR(0xCF, struct drm_syncobj_eventfd)
  
+/**
+ * DRM_IOCTL_MODE_CLOSEFB - Close a framebuffer.
+ *
+ * This closes a framebuffer previously added via ADDFB/ADDFB2. The IOCTL
+ * argument is a framebuffer object ID.
+ *
+ * This IOCTL is similar to &DRM_IOCTL_MODE_RMFB, except it doesn't disable
+ * planes and CRTCs. As long as the framebuffer is used by a plane, it's kept
+ * alive. When the plane no longer uses the framebuffer (because the
+ * framebuffer is replaced with another one, or the plane is disabled), the
+ * framebuffer is cleaned up.
+ *
+ * This is useful to implement flicker-free transitions between two processes.
+ *
+ * Depending on the threat model, user-space may want to ensure that the
+ * framebuffer doesn't expose any sensitive user information: closed
+ * framebuffers attached to a plane can be read back by the next DRM master.
+ */
+#define DRM_IOCTL_MODE_CLOSEFB         DRM_IOWR(0xD0, struct drm_mode_closefb)
+
  /*
   * Device specific ioctls should only be in their respective headers
   * The device specific ioctl range is from 0x40 to 0x9f.
diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h

index 218edb0a96f8c043df13a5bf25f85ec754ee449a..fd4f9574d177a269b2cdbe5a36b3b30f2addbc94 100644 (file)
--- a/tools/include/uapi/drm/i915_drm.h
+++ b/tools/include/uapi/drm/i915_drm.h
@@ -693,7 +693,7 @@ typedef struct drm_i915_irq_wait {
  #define I915_PARAM_HAS_EXEC_FENCE       44
  
  /* Query whether DRM_I915_GEM_EXECBUFFER2 supports the ability to capture
- * user specified bufffers for post-mortem debugging of GPU hangs. See
+ * user-specified buffers for post-mortem debugging of GPU hangs. See
   * EXEC_OBJECT_CAPTURE.
   */
  #define I915_PARAM_HAS_EXEC_CAPTURE     45
@@ -1606,7 +1606,7 @@ struct drm_i915_gem_busy {
          * is accurate.
          *
          * The returned dword is split into two fields to indicate both
-        * the engine classess on which the object is being read, and the
+        * the engine classes on which the object is being read, and the
          * engine class on which it is currently being written (if any).
          *
          * The low word (bits 0:15) indicate if the object is being written
@@ -1815,7 +1815,7 @@ struct drm_i915_gem_madvise {
         __u32 handle;
  
         /* Advice: either the buffer will be needed again in the near future,
-        *         or wont be and could be discarded under memory pressure.
+        *         or won't be and could be discarded under memory pressure.
          */
         __u32 madv;
  
@@ -3246,7 +3246,7 @@ struct drm_i915_query_topology_info {
   *     // enough to hold our array of engines. The kernel will fill out the
   *     // item.length for us, which is the number of bytes we need.
   *     //
- *     // Alternatively a large buffer can be allocated straight away enabling
+ *     // Alternatively a large buffer can be allocated straightaway enabling
   *     // querying in one pass, in which case item.length should contain the
   *     // length of the provided buffer.
   *     err = ioctl(fd, DRM_IOCTL_I915_QUERY, &query);
@@ -3256,7 +3256,7 @@ struct drm_i915_query_topology_info {
   *     // Now that we allocated the required number of bytes, we call the ioctl
   *     // again, this time with the data_ptr pointing to our newly allocated
   *     // blob, which the kernel can then populate with info on all engines.
- *     item.data_ptr = (uintptr_t)&info,
+ *     item.data_ptr = (uintptr_t)&info;
   *
   *     err = ioctl(fd, DRM_IOCTL_I915_QUERY, &query);
   *     if (err) ...
@@ -3286,7 +3286,7 @@ struct drm_i915_query_topology_info {
  /**
   * struct drm_i915_engine_info
   *
- * Describes one engine and it's capabilities as known to the driver.
+ * Describes one engine and its capabilities as known to the driver.
   */
  struct drm_i915_engine_info {
         /** @engine: Engine class and instance. */
diff --git a/tools/include/uapi/linux/fcntl.h b/tools/include/uapi/linux/fcntl.h

index 6c80f96049bd07d1aa527c103acb07fe52bfd617..282e90aeb163c0288590995b38fe011b19e85111 100644 (file)
--- a/tools/include/uapi/linux/fcntl.h
+++ b/tools/include/uapi/linux/fcntl.h
@@ -116,5 +116,8 @@
  #define AT_HANDLE_FID          AT_REMOVEDIR    /* file handle is needed to
                                         compare object identity and may not
                                         be usable to open_by_handle_at(2) */
+#if defined(__KERNEL__)
+#define AT_GETATTR_NOSEC       0x80000000
+#endif
  
  #endif /* _UAPI_LINUX_FCNTL_H */
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h

index 211b86de35ac53f6457bbd2fae8c973ce6b3a968..c3308536482bdb2bfb1279279325faf5430a3356 100644 (file)
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -16,76 +16,6 @@
  
  #define KVM_API_VERSION 12
  
-/* *** Deprecated interfaces *** */
-
-#define KVM_TRC_SHIFT           16
-
-#define KVM_TRC_ENTRYEXIT       (1 << KVM_TRC_SHIFT)
-#define KVM_TRC_HANDLER         (1 << (KVM_TRC_SHIFT + 1))
-
-#define KVM_TRC_VMENTRY         (KVM_TRC_ENTRYEXIT + 0x01)
-#define KVM_TRC_VMEXIT          (KVM_TRC_ENTRYEXIT + 0x02)
-#define KVM_TRC_PAGE_FAULT      (KVM_TRC_HANDLER + 0x01)
-
-#define KVM_TRC_HEAD_SIZE       12
-#define KVM_TRC_CYCLE_SIZE      8
-#define KVM_TRC_EXTRA_MAX       7
-
-#define KVM_TRC_INJ_VIRQ         (KVM_TRC_HANDLER + 0x02)
-#define KVM_TRC_REDELIVER_EVT    (KVM_TRC_HANDLER + 0x03)
-#define KVM_TRC_PEND_INTR        (KVM_TRC_HANDLER + 0x04)
-#define KVM_TRC_IO_READ          (KVM_TRC_HANDLER + 0x05)
-#define KVM_TRC_IO_WRITE         (KVM_TRC_HANDLER + 0x06)
-#define KVM_TRC_CR_READ          (KVM_TRC_HANDLER + 0x07)
-#define KVM_TRC_CR_WRITE         (KVM_TRC_HANDLER + 0x08)
-#define KVM_TRC_DR_READ          (KVM_TRC_HANDLER + 0x09)
-#define KVM_TRC_DR_WRITE         (KVM_TRC_HANDLER + 0x0A)
-#define KVM_TRC_MSR_READ         (KVM_TRC_HANDLER + 0x0B)
-#define KVM_TRC_MSR_WRITE        (KVM_TRC_HANDLER + 0x0C)
-#define KVM_TRC_CPUID            (KVM_TRC_HANDLER + 0x0D)
-#define KVM_TRC_INTR             (KVM_TRC_HANDLER + 0x0E)
-#define KVM_TRC_NMI              (KVM_TRC_HANDLER + 0x0F)
-#define KVM_TRC_VMMCALL          (KVM_TRC_HANDLER + 0x10)
-#define KVM_TRC_HLT              (KVM_TRC_HANDLER + 0x11)
-#define KVM_TRC_CLTS             (KVM_TRC_HANDLER + 0x12)
-#define KVM_TRC_LMSW             (KVM_TRC_HANDLER + 0x13)
-#define KVM_TRC_APIC_ACCESS      (KVM_TRC_HANDLER + 0x14)
-#define KVM_TRC_TDP_FAULT        (KVM_TRC_HANDLER + 0x15)
-#define KVM_TRC_GTLB_WRITE       (KVM_TRC_HANDLER + 0x16)
-#define KVM_TRC_STLB_WRITE       (KVM_TRC_HANDLER + 0x17)
-#define KVM_TRC_STLB_INVAL       (KVM_TRC_HANDLER + 0x18)
-#define KVM_TRC_PPC_INSTR        (KVM_TRC_HANDLER + 0x19)
-
-struct kvm_user_trace_setup {
-       __u32 buf_size;
-       __u32 buf_nr;
-};
-
-#define __KVM_DEPRECATED_MAIN_W_0x06 \
-       _IOW(KVMIO, 0x06, struct kvm_user_trace_setup)
-#define __KVM_DEPRECATED_MAIN_0x07 _IO(KVMIO, 0x07)
-#define __KVM_DEPRECATED_MAIN_0x08 _IO(KVMIO, 0x08)
-
-#define __KVM_DEPRECATED_VM_R_0x70 _IOR(KVMIO, 0x70, struct kvm_assigned_irq)
-
-struct kvm_breakpoint {
-       __u32 enabled;
-       __u32 padding;
-       __u64 address;
-};
-
-struct kvm_debug_guest {
-       __u32 enabled;
-       __u32 pad;
-       struct kvm_breakpoint breakpoints[4];
-       __u32 singlestep;
-};
-
-#define __KVM_DEPRECATED_VCPU_W_0x87 _IOW(KVMIO, 0x87, struct kvm_debug_guest)
-
-/* *** End of deprecated interfaces *** */
-
-
  /* for KVM_SET_USER_MEMORY_REGION */
  struct kvm_userspace_memory_region {
         __u32 slot;
@@ -95,6 +25,19 @@ struct kvm_userspace_memory_region {
         __u64 userspace_addr; /* start of the userspace allocated memory */
  };
  
+/* for KVM_SET_USER_MEMORY_REGION2 */
+struct kvm_userspace_memory_region2 {
+       __u32 slot;
+       __u32 flags;
+       __u64 guest_phys_addr;
+       __u64 memory_size;
+       __u64 userspace_addr;
+       __u64 guest_memfd_offset;
+       __u32 guest_memfd;
+       __u32 pad1;
+       __u64 pad2[14];
+};
+
  /*
   * The bit 0 ~ bit 15 of kvm_userspace_memory_region::flags are visible for
   * userspace, other bits are reserved for kvm internal use which are defined
@@ -102,6 +45,7 @@ struct kvm_userspace_memory_region {
   */
  #define KVM_MEM_LOG_DIRTY_PAGES        (1UL << 0)
  #define KVM_MEM_READONLY       (1UL << 1)
+#define KVM_MEM_GUEST_MEMFD    (1UL << 2)
  
  /* for KVM_IRQ_LINE */
  struct kvm_irq_level {
@@ -265,6 +209,7 @@ struct kvm_xen_exit {
  #define KVM_EXIT_RISCV_CSR        36
  #define KVM_EXIT_NOTIFY           37
  #define KVM_EXIT_LOONGARCH_IOCSR  38
+#define KVM_EXIT_MEMORY_FAULT     39
  
  /* For KVM_EXIT_INTERNAL_ERROR */
  /* Emulate instruction failed. */
@@ -518,6 +463,13 @@ struct kvm_run {
  #define KVM_NOTIFY_CONTEXT_INVALID     (1 << 0)
                         __u32 flags;
                 } notify;
+               /* KVM_EXIT_MEMORY_FAULT */
+               struct {
+#define KVM_MEMORY_EXIT_FLAG_PRIVATE   (1ULL << 3)
+                       __u64 flags;
+                       __u64 gpa;
+                       __u64 size;
+               } memory_fault;
                 /* Fix the size of the union. */
                 char padding[256];
         };
@@ -945,9 +897,6 @@ struct kvm_ppc_resize_hpt {
   */
  #define KVM_GET_VCPU_MMAP_SIZE    _IO(KVMIO,   0x04) /* in bytes */
  #define KVM_GET_SUPPORTED_CPUID   _IOWR(KVMIO, 0x05, struct kvm_cpuid2)
-#define KVM_TRACE_ENABLE          __KVM_DEPRECATED_MAIN_W_0x06
-#define KVM_TRACE_PAUSE           __KVM_DEPRECATED_MAIN_0x07
-#define KVM_TRACE_DISABLE         __KVM_DEPRECATED_MAIN_0x08
  #define KVM_GET_EMULATED_CPUID   _IOWR(KVMIO, 0x09, struct kvm_cpuid2)
  #define KVM_GET_MSR_FEATURE_INDEX_LIST    _IOWR(KVMIO, 0x0a, struct kvm_msr_list)
  
@@ -1201,6 +1150,11 @@ struct kvm_ppc_resize_hpt {
  #define KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE 228
  #define KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES 229
  #define KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES 230
+#define KVM_CAP_USER_MEMORY2 231
+#define KVM_CAP_MEMORY_FAULT_INFO 232
+#define KVM_CAP_MEMORY_ATTRIBUTES 233
+#define KVM_CAP_GUEST_MEMFD 234
+#define KVM_CAP_VM_TYPES 235
  
  #ifdef KVM_CAP_IRQ_ROUTING
  
@@ -1291,6 +1245,7 @@ struct kvm_x86_mce {
  #define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL       (1 << 4)
  #define KVM_XEN_HVM_CONFIG_EVTCHN_SEND         (1 << 5)
  #define KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG        (1 << 6)
+#define KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE        (1 << 7)
  
  struct kvm_xen_hvm_config {
         __u32 flags;
@@ -1483,6 +1438,8 @@ struct kvm_vfio_spapr_tce {
                                         struct kvm_userspace_memory_region)
  #define KVM_SET_TSS_ADDR          _IO(KVMIO,   0x47)
  #define KVM_SET_IDENTITY_MAP_ADDR _IOW(KVMIO,  0x48, __u64)
+#define KVM_SET_USER_MEMORY_REGION2 _IOW(KVMIO, 0x49, \
+                                        struct kvm_userspace_memory_region2)
  
  /* enable ucontrol for s390 */
  struct kvm_s390_ucas_mapping {
@@ -1507,20 +1464,8 @@ struct kvm_s390_ucas_mapping {
                         _IOW(KVMIO,  0x67, struct kvm_coalesced_mmio_zone)
  #define KVM_UNREGISTER_COALESCED_MMIO \
                         _IOW(KVMIO,  0x68, struct kvm_coalesced_mmio_zone)
-#define KVM_ASSIGN_PCI_DEVICE     _IOR(KVMIO,  0x69, \
-                                      struct kvm_assigned_pci_dev)
  #define KVM_SET_GSI_ROUTING       _IOW(KVMIO,  0x6a, struct kvm_irq_routing)
-/* deprecated, replaced by KVM_ASSIGN_DEV_IRQ */
-#define KVM_ASSIGN_IRQ            __KVM_DEPRECATED_VM_R_0x70
-#define KVM_ASSIGN_DEV_IRQ        _IOW(KVMIO,  0x70, struct kvm_assigned_irq)
  #define KVM_REINJECT_CONTROL      _IO(KVMIO,   0x71)
-#define KVM_DEASSIGN_PCI_DEVICE   _IOW(KVMIO,  0x72, \
-                                      struct kvm_assigned_pci_dev)
-#define KVM_ASSIGN_SET_MSIX_NR    _IOW(KVMIO,  0x73, \
-                                      struct kvm_assigned_msix_nr)
-#define KVM_ASSIGN_SET_MSIX_ENTRY _IOW(KVMIO,  0x74, \
-                                      struct kvm_assigned_msix_entry)
-#define KVM_DEASSIGN_DEV_IRQ      _IOW(KVMIO,  0x75, struct kvm_assigned_irq)
  #define KVM_IRQFD                 _IOW(KVMIO,  0x76, struct kvm_irqfd)
  #define KVM_CREATE_PIT2                  _IOW(KVMIO,  0x77, struct kvm_pit_config)
  #define KVM_SET_BOOT_CPU_ID       _IO(KVMIO,   0x78)
@@ -1537,9 +1482,6 @@ struct kvm_s390_ucas_mapping {
  *  KVM_CAP_VM_TSC_CONTROL to set defaults for a VM */
  #define KVM_SET_TSC_KHZ           _IO(KVMIO,  0xa2)
  #define KVM_GET_TSC_KHZ           _IO(KVMIO,  0xa3)
-/* Available with KVM_CAP_PCI_2_3 */
-#define KVM_ASSIGN_SET_INTX_MASK  _IOW(KVMIO,  0xa4, \
-                                      struct kvm_assigned_pci_dev)
  /* Available with KVM_CAP_SIGNAL_MSI */
  #define KVM_SIGNAL_MSI            _IOW(KVMIO,  0xa5, struct kvm_msi)
  /* Available with KVM_CAP_PPC_GET_SMMU_INFO */
@@ -1592,8 +1534,6 @@ struct kvm_s390_ucas_mapping {
  #define KVM_SET_SREGS             _IOW(KVMIO,  0x84, struct kvm_sregs)
  #define KVM_TRANSLATE             _IOWR(KVMIO, 0x85, struct kvm_translation)
  #define KVM_INTERRUPT             _IOW(KVMIO,  0x86, struct kvm_interrupt)
-/* KVM_DEBUG_GUEST is no longer supported, use KVM_SET_GUEST_DEBUG instead */
-#define KVM_DEBUG_GUEST           __KVM_DEPRECATED_VCPU_W_0x87
  #define KVM_GET_MSRS              _IOWR(KVMIO, 0x88, struct kvm_msrs)
  #define KVM_SET_MSRS              _IOW(KVMIO,  0x89, struct kvm_msrs)
  #define KVM_SET_CPUID             _IOW(KVMIO,  0x8a, struct kvm_cpuid)
@@ -2267,4 +2207,24 @@ struct kvm_s390_zpci_op {
  /* flags for kvm_s390_zpci_op->u.reg_aen.flags */
  #define KVM_S390_ZPCIOP_REGAEN_HOST    (1 << 0)
  
+/* Available with KVM_CAP_MEMORY_ATTRIBUTES */
+#define KVM_SET_MEMORY_ATTRIBUTES              _IOW(KVMIO,  0xd2, struct kvm_memory_attributes)
+
+struct kvm_memory_attributes {
+       __u64 address;
+       __u64 size;
+       __u64 attributes;
+       __u64 flags;
+};
+
+#define KVM_MEMORY_ATTRIBUTE_PRIVATE           (1ULL << 3)
+
+#define KVM_CREATE_GUEST_MEMFD _IOWR(KVMIO,  0xd4, struct kvm_create_guest_memfd)
+
+struct kvm_create_guest_memfd {
+       __u64 size;
+       __u64 flags;
+       __u64 reserved[6];
+};
+
  #endif /* __LINUX_KVM_H */
diff --git a/tools/include/uapi/linux/mount.h b/tools/include/uapi/linux/mount.h

index bb242fdcfe6b29bf96e287023701dd8629042969..ad5478dbad007341f70a8816aa506216ffea89ec 100644 (file)
--- a/tools/include/uapi/linux/mount.h
+++ b/tools/include/uapi/linux/mount.h
@@ -138,4 +138,74 @@ struct mount_attr {
  /* List of all mount_attr versions. */
  #define MOUNT_ATTR_SIZE_VER0   32 /* sizeof first published struct */
  
+
+/*
+ * Structure for getting mount/superblock/filesystem info with statmount(2).
+ *
+ * The interface is similar to statx(2): individual fields or groups can be
+ * selected with the @mask argument of statmount().  Kernel will set the @mask
+ * field according to the supported fields.
+ *
+ * If string fields are selected, then the caller needs to pass a buffer that
+ * has space after the fixed part of the structure.  Nul terminated strings are
+ * copied there and offsets relative to @str are stored in the relevant fields.
+ * If the buffer is too small, then EOVERFLOW is returned.  The actually used
+ * size is returned in @size.
+ */
+struct statmount {
+       __u32 size;             /* Total size, including strings */
+       __u32 __spare1;
+       __u64 mask;             /* What results were written */
+       __u32 sb_dev_major;     /* Device ID */
+       __u32 sb_dev_minor;
+       __u64 sb_magic;         /* ..._SUPER_MAGIC */
+       __u32 sb_flags;         /* SB_{RDONLY,SYNCHRONOUS,DIRSYNC,LAZYTIME} */
+       __u32 fs_type;          /* [str] Filesystem type */
+       __u64 mnt_id;           /* Unique ID of mount */
+       __u64 mnt_parent_id;    /* Unique ID of parent (for root == mnt_id) */
+       __u32 mnt_id_old;       /* Reused IDs used in proc/.../mountinfo */
+       __u32 mnt_parent_id_old;
+       __u64 mnt_attr;         /* MOUNT_ATTR_... */
+       __u64 mnt_propagation;  /* MS_{SHARED,SLAVE,PRIVATE,UNBINDABLE} */
+       __u64 mnt_peer_group;   /* ID of shared peer group */
+       __u64 mnt_master;       /* Mount receives propagation from this ID */
+       __u64 propagate_from;   /* Propagation from in current namespace */
+       __u32 mnt_root;         /* [str] Root of mount relative to root of fs */
+       __u32 mnt_point;        /* [str] Mountpoint relative to current root */
+       __u64 __spare2[50];
+       char str[];             /* Variable size part containing strings */
+};
+
+/*
+ * Structure for passing mount ID and miscellaneous parameters to statmount(2)
+ * and listmount(2).
+ *
+ * For statmount(2) @param represents the request mask.
+ * For listmount(2) @param represents the last listed mount id (or zero).
+ */
+struct mnt_id_req {
+       __u32 size;
+       __u32 spare;
+       __u64 mnt_id;
+       __u64 param;
+};
+
+/* List of all mnt_id_req versions. */
+#define MNT_ID_REQ_SIZE_VER0   24 /* sizeof first published struct */
+
+/*
+ * @mask bits for statmount(2)
+ */
+#define STATMOUNT_SB_BASIC             0x00000001U     /* Want/got sb_... */
+#define STATMOUNT_MNT_BASIC            0x00000002U     /* Want/got mnt_... */
+#define STATMOUNT_PROPAGATE_FROM       0x00000004U     /* Want/got propagate_from */
+#define STATMOUNT_MNT_ROOT             0x00000008U     /* Want/got mnt_root  */
+#define STATMOUNT_MNT_POINT            0x00000010U     /* Want/got mnt_point */
+#define STATMOUNT_FS_TYPE              0x00000020U     /* Want/got fs_type */
+
+/*
+ * Special @mnt_id values that can be passed to listmount
+ */
+#define LSMT_ROOT              0xffffffffffffffff      /* root mount */
+
  #endif /* _UAPI_LINUX_MOUNT_H */
diff --git a/tools/include/uapi/linux/stat.h b/tools/include/uapi/linux/stat.h

index 7cab2c65d3d7fce9210d2fb6d02012233b9923cf..2f2ee82d55175d052c0214a7e29da5d6ce2738ab 100644 (file)
--- a/tools/include/uapi/linux/stat.h
+++ b/tools/include/uapi/linux/stat.h
@@ -154,6 +154,7 @@ struct statx {
  #define STATX_BTIME            0x00000800U     /* Want/got stx_btime */
  #define STATX_MNT_ID           0x00001000U     /* Got stx_mnt_id */
  #define STATX_DIOALIGN         0x00002000U     /* Want/got direct I/O alignment info */
+#define STATX_MNT_ID_UNIQUE    0x00004000U     /* Want/got extended stx_mount_id */
  
  #define STATX__RESERVED                0x80000000U     /* Reserved for future struct statx expansion */
  
diff --git a/tools/net/ynl/lib/ynl.c b/tools/net/ynl/lib/ynl.c

index c82a7f41b31c571c09c3dd454a7db3ebbb8a1e60..45e49671ae87b01fac48b2877f46403cf9ee1c36 100644 (file)
--- a/tools/net/ynl/lib/ynl.c
+++ b/tools/net/ynl/lib/ynl.c
@@ -466,6 +466,8 @@ ynl_gemsg_start_dump(struct ynl_sock *ys, __u32 id, __u8 cmd, __u8 version)
  
  int ynl_recv_ack(struct ynl_sock *ys, int ret)
  {
+       struct ynl_parse_arg yarg = { .ys = ys, };
+
         if (!ret) {
                 yerr(ys, YNL_ERROR_EXPECT_ACK,
                      "Expecting an ACK but nothing received");
@@ -478,7 +480,7 @@ int ynl_recv_ack(struct ynl_sock *ys, int ret)
                 return ret;
         }
         return mnl_cb_run(ys->rx_buf, ret, ys->seq, ys->portid,
-                         ynl_cb_null, ys);
+                         ynl_cb_null, &yarg);
  }
  
  int ynl_cb_null(const struct nlmsghdr *nlh, void *data)
@@ -521,6 +523,7 @@ ynl_get_family_info_mcast(struct ynl_sock *ys, const struct nlattr *mcasts)
                                 ys->mcast_groups[i].name[GENL_NAMSIZ - 1] = 0;
                         }
                 }
+               i++;
         }
  
         return 0;
@@ -586,7 +589,13 @@ static int ynl_sock_read_family(struct ynl_sock *ys, const char *family_name)
                 return err;
         }
  
-       return ynl_recv_ack(ys, err);
+       err = ynl_recv_ack(ys, err);
+       if (err < 0) {
+               free(ys->mcast_groups);
+               return err;
+       }
+
+       return 0;
  }
  
  struct ynl_sock *
@@ -741,11 +750,14 @@ err_free:
  
  static int ynl_ntf_trampoline(const struct nlmsghdr *nlh, void *data)
  {
-       return ynl_ntf_parse((struct ynl_sock *)data, nlh);
+       struct ynl_parse_arg *yarg = data;
+
+       return ynl_ntf_parse(yarg->ys, nlh);
  }
  
  int ynl_ntf_check(struct ynl_sock *ys)
  {
+       struct ynl_parse_arg yarg = { .ys = ys, };
         ssize_t len;
         int err;
  
@@ -767,7 +779,7 @@ int ynl_ntf_check(struct ynl_sock *ys)
                         return len;
  
                 err = mnl_cb_run2(ys->rx_buf, len, ys->seq, ys->portid,
-                                 ynl_ntf_trampoline, ys,
+                                 ynl_ntf_trampoline, &yarg,
                                   ynl_cb_array, NLMSG_MIN_TYPE);
                 if (err < 0)
                         return err;
diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt

index 1b90575ee3c84eb206f9291e8fd05d43c70b2f9c..3b12595193c9f49a78a490b888f22d615b92dc43 100644 (file)
--- a/tools/perf/Documentation/perf-list.txt
+++ b/tools/perf/Documentation/perf-list.txt
@@ -47,6 +47,10 @@ Print PMU events and metrics limited to the specific PMU name.
  --json::
  Output in JSON format.
  
+-o::
+--output=::
+       Output file name. By default output is written to stdout.
+
  [[EVENT_MODIFIERS]]
  EVENT MODIFIERS
  ---------------
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf

index 27e7c478880fdecd10761fc07d4249bf1581d9c0..f8774a9b1377a3e98b98543a66b4f8aea6fb6837 100644 (file)
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -236,6 +236,16 @@ else
    SHELLCHECK := $(shell which shellcheck 2> /dev/null)
  endif
  
+# shellcheck is using in tools/perf/tests/Build with option -a/--check-sourced (
+# introduced in v0.4.7) and -S/--severity (introduced in v0.6.0). So make the
+# minimal shellcheck version as v0.6.0.
+ifneq ($(SHELLCHECK),)
+  ifeq ($(shell expr $(shell $(SHELLCHECK) --version | grep version: | \
+        sed -e 's/.\+ \([0-9]\+\).\([0-9]\+\).\([0-9]\+\)/\1\2\3/g') \< 060), 1)
+    SHELLCHECK :=
+  endif
+endif
+
  export srctree OUTPUT RM CC CXX LD AR CFLAGS CXXFLAGS V BISON FLEX AWK
  export HOSTCC HOSTLD HOSTAR HOSTCFLAGS SHELLCHECK
  
diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c

index 61c2c96cc0701b886d7c1daecd92cfa25581d1f7..e27a1b1288c29ffe96ce871bc5bab76c8a67c8b7 100644 (file)
--- a/tools/perf/builtin-list.c
+++ b/tools/perf/builtin-list.c
@@ -30,6 +30,8 @@
   * functions.
   */
  struct print_state {
+       /** @fp: File to write output to. */
+       FILE *fp;
         /**
          * @pmu_glob: Optionally restrict PMU and metric matching to PMU or
          * debugfs subsystem name.
@@ -66,13 +68,15 @@ static void default_print_start(void *ps)
  {
         struct print_state *print_state = ps;
  
-       if (!print_state->name_only && pager_in_use())
-               printf("\nList of pre-defined events (to be used in -e or -M):\n\n");
+       if (!print_state->name_only && pager_in_use()) {
+               fprintf(print_state->fp,
+                       "\nList of pre-defined events (to be used in -e or -M):\n\n");
+       }
  }
  
  static void default_print_end(void *print_state __maybe_unused) {}
  
-static void wordwrap(const char *s, int start, int max, int corr)
+static void wordwrap(FILE *fp, const char *s, int start, int max, int corr)
  {
         int column = start;
         int n;
@@ -82,10 +86,10 @@ static void wordwrap(const char *s, int start, int max, int corr)
                 int wlen = strcspn(s, " \t\n");
  
                 if ((column + wlen >= max && column > start) || saw_newline) {
-                       printf("\n%*s", start, "");
+                       fprintf(fp, "\n%*s", start, "");
                         column = start + corr;
                 }
-               n = printf("%s%.*s", column > start ? " " : "", wlen, s);
+               n = fprintf(fp, "%s%.*s", column > start ? " " : "", wlen, s);
                 if (n <= 0)
                         break;
                 saw_newline = s[wlen] == '\n';
@@ -104,6 +108,7 @@ static void default_print_event(void *ps, const char *pmu_name, const char *topi
  {
         struct print_state *print_state = ps;
         int pos;
+       FILE *fp = print_state->fp;
  
         if (deprecated && !print_state->deprecated)
                 return;
@@ -119,30 +124,30 @@ static void default_print_event(void *ps, const char *pmu_name, const char *topi
  
         if (print_state->name_only) {
                 if (event_alias && strlen(event_alias))
-                       printf("%s ", event_alias);
+                       fprintf(fp, "%s ", event_alias);
                 else
-                       printf("%s ", event_name);
+                       fprintf(fp, "%s ", event_name);
                 return;
         }
  
         if (strcmp(print_state->last_topic, topic ?: "")) {
                 if (topic)
-                       printf("\n%s:\n", topic);
+                       fprintf(fp, "\n%s:\n", topic);
                 zfree(&print_state->last_topic);
                 print_state->last_topic = strdup(topic ?: "");
         }
  
         if (event_alias && strlen(event_alias))
-               pos = printf("  %s OR %s", event_name, event_alias);
+               pos = fprintf(fp, "  %s OR %s", event_name, event_alias);
         else
-               pos = printf("  %s", event_name);
+               pos = fprintf(fp, "  %s", event_name);
  
         if (!topic && event_type_desc) {
                 for (; pos < 53; pos++)
-                       putchar(' ');
-               printf("[%s]\n", event_type_desc);
+                       fputc(' ', fp);
+               fprintf(fp, "[%s]\n", event_type_desc);
         } else
-               putchar('\n');
+               fputc('\n', fp);
  
         if (desc && print_state->desc) {
                 char *desc_with_unit = NULL;
@@ -155,22 +160,22 @@ static void default_print_event(void *ps, const char *pmu_name, const char *topi
                                               ? "%s. Unit: %s" : "%s Unit: %s",
                                             desc, pmu_name);
                 }
-               printf("%*s", 8, "[");
-               wordwrap(desc_len > 0 ? desc_with_unit : desc, 8, pager_get_columns(), 0);
-               printf("]\n");
+               fprintf(fp, "%*s", 8, "[");
+               wordwrap(fp, desc_len > 0 ? desc_with_unit : desc, 8, pager_get_columns(), 0);
+               fprintf(fp, "]\n");
                 free(desc_with_unit);
         }
         long_desc = long_desc ?: desc;
         if (long_desc && print_state->long_desc) {
-               printf("%*s", 8, "[");
-               wordwrap(long_desc, 8, pager_get_columns(), 0);
-               printf("]\n");
+               fprintf(fp, "%*s", 8, "[");
+               wordwrap(fp, long_desc, 8, pager_get_columns(), 0);
+               fprintf(fp, "]\n");
         }
  
         if (print_state->detailed && encoding_desc) {
-               printf("%*s", 8, "");
-               wordwrap(encoding_desc, 8, pager_get_columns(), 0);
-               putchar('\n');
+               fprintf(fp, "%*s", 8, "");
+               wordwrap(fp, encoding_desc, 8, pager_get_columns(), 0);
+               fputc('\n', fp);
         }
  }
  
@@ -184,6 +189,7 @@ static void default_print_metric(void *ps,
                                 const char *unit __maybe_unused)
  {
         struct print_state *print_state = ps;
+       FILE *fp = print_state->fp;
  
         if (print_state->event_glob &&
             (!print_state->metrics || !name || !strglobmatch(name, print_state->event_glob)) &&
@@ -192,27 +198,27 @@ static void default_print_metric(void *ps,
  
         if (!print_state->name_only && !print_state->last_metricgroups) {
                 if (print_state->metricgroups) {
-                       printf("\nMetric Groups:\n");
+                       fprintf(fp, "\nMetric Groups:\n");
                         if (!print_state->metrics)
-                               putchar('\n');
+                               fputc('\n', fp);
                 } else {
-                       printf("\nMetrics:\n\n");
+                       fprintf(fp, "\nMetrics:\n\n");
                 }
         }
         if (!print_state->last_metricgroups ||
             strcmp(print_state->last_metricgroups, group ?: "")) {
                 if (group && print_state->metricgroups) {
                         if (print_state->name_only)
-                               printf("%s ", group);
+                               fprintf(fp, "%s ", group);
                         else if (print_state->metrics) {
                                 const char *gdesc = describe_metricgroup(group);
  
                                 if (gdesc)
-                                       printf("\n%s: [%s]\n", group, gdesc);
+                                       fprintf(fp, "\n%s: [%s]\n", group, gdesc);
                                 else
-                                       printf("\n%s:\n", group);
+                                       fprintf(fp, "\n%s:\n", group);
                         } else
-                               printf("%s\n", group);
+                               fprintf(fp, "%s\n", group);
                 }
                 zfree(&print_state->last_metricgroups);
                 print_state->last_metricgroups = strdup(group ?: "");
@@ -223,53 +229,59 @@ static void default_print_metric(void *ps,
         if (print_state->name_only) {
                 if (print_state->metrics &&
                     !strlist__has_entry(print_state->visited_metrics, name)) {
-                       printf("%s ", name);
+                       fprintf(fp, "%s ", name);
                         strlist__add(print_state->visited_metrics, name);
                 }
                 return;
         }
-       printf("  %s\n", name);
+       fprintf(fp, "  %s\n", name);
  
         if (desc && print_state->desc) {
-               printf("%*s", 8, "[");
-               wordwrap(desc, 8, pager_get_columns(), 0);
-               printf("]\n");
+               fprintf(fp, "%*s", 8, "[");
+               wordwrap(fp, desc, 8, pager_get_columns(), 0);
+               fprintf(fp, "]\n");
         }
         if (long_desc && print_state->long_desc) {
-               printf("%*s", 8, "[");
-               wordwrap(long_desc, 8, pager_get_columns(), 0);
-               printf("]\n");
+               fprintf(fp, "%*s", 8, "[");
+               wordwrap(fp, long_desc, 8, pager_get_columns(), 0);
+               fprintf(fp, "]\n");
         }
         if (expr && print_state->detailed) {
-               printf("%*s", 8, "[");
-               wordwrap(expr, 8, pager_get_columns(), 0);
-               printf("]\n");
+               fprintf(fp, "%*s", 8, "[");
+               wordwrap(fp, expr, 8, pager_get_columns(), 0);
+               fprintf(fp, "]\n");
         }
         if (threshold && print_state->detailed) {
-               printf("%*s", 8, "[");
-               wordwrap(threshold, 8, pager_get_columns(), 0);
-               printf("]\n");
+               fprintf(fp, "%*s", 8, "[");
+               wordwrap(fp, threshold, 8, pager_get_columns(), 0);
+               fprintf(fp, "]\n");
         }
  }
  
  struct json_print_state {
+       /** @fp: File to write output to. */
+       FILE *fp;
         /** Should a separator be printed prior to the next item? */
         bool need_sep;
  };
  
-static void json_print_start(void *print_state __maybe_unused)
+static void json_print_start(void *ps)
  {
-       printf("[\n");
+       struct json_print_state *print_state = ps;
+       FILE *fp = print_state->fp;
+
+       fprintf(fp, "[\n");
  }
  
  static void json_print_end(void *ps)
  {
         struct json_print_state *print_state = ps;
+       FILE *fp = print_state->fp;
  
-       printf("%s]\n", print_state->need_sep ? "\n" : "");
+       fprintf(fp, "%s]\n", print_state->need_sep ? "\n" : "");
  }
  
-static void fix_escape_printf(struct strbuf *buf, const char *fmt, ...)
+static void fix_escape_fprintf(FILE *fp, struct strbuf *buf, const char *fmt, ...)
  {
         va_list args;
  
@@ -318,7 +330,7 @@ static void fix_escape_printf(struct strbuf *buf, const char *fmt, ...)
                 }
         }
         va_end(args);
-       fputs(buf->buf, stdout);
+       fputs(buf->buf, fp);
  }
  
  static void json_print_event(void *ps, const char *pmu_name, const char *topic,
@@ -330,60 +342,71 @@ static void json_print_event(void *ps, const char *pmu_name, const char *topic,
  {
         struct json_print_state *print_state = ps;
         bool need_sep = false;
+       FILE *fp = print_state->fp;
         struct strbuf buf;
  
         strbuf_init(&buf, 0);
-       printf("%s{\n", print_state->need_sep ? ",\n" : "");
+       fprintf(fp, "%s{\n", print_state->need_sep ? ",\n" : "");
         print_state->need_sep = true;
         if (pmu_name) {
-               fix_escape_printf(&buf, "\t\"Unit\": \"%S\"", pmu_name);
+               fix_escape_fprintf(fp, &buf, "\t\"Unit\": \"%S\"", pmu_name);
                 need_sep = true;
         }
         if (topic) {
-               fix_escape_printf(&buf, "%s\t\"Topic\": \"%S\"", need_sep ? ",\n" : "", topic);
+               fix_escape_fprintf(fp, &buf, "%s\t\"Topic\": \"%S\"",
+                                  need_sep ? ",\n" : "",
+                                  topic);
                 need_sep = true;
         }
         if (event_name) {
-               fix_escape_printf(&buf, "%s\t\"EventName\": \"%S\"", need_sep ? ",\n" : "",
-                                 event_name);
+               fix_escape_fprintf(fp, &buf, "%s\t\"EventName\": \"%S\"",
+                                  need_sep ? ",\n" : "",
+                                  event_name);
                 need_sep = true;
         }
         if (event_alias && strlen(event_alias)) {
-               fix_escape_printf(&buf, "%s\t\"EventAlias\": \"%S\"", need_sep ? ",\n" : "",
-                                 event_alias);
+               fix_escape_fprintf(fp, &buf, "%s\t\"EventAlias\": \"%S\"",
+                                  need_sep ? ",\n" : "",
+                                  event_alias);
                 need_sep = true;
         }
         if (scale_unit && strlen(scale_unit)) {
-               fix_escape_printf(&buf, "%s\t\"ScaleUnit\": \"%S\"", need_sep ? ",\n" : "",
-                                 scale_unit);
+               fix_escape_fprintf(fp, &buf, "%s\t\"ScaleUnit\": \"%S\"",
+                                  need_sep ? ",\n" : "",
+                                  scale_unit);
                 need_sep = true;
         }
         if (event_type_desc) {
-               fix_escape_printf(&buf, "%s\t\"EventType\": \"%S\"", need_sep ? ",\n" : "",
-                                 event_type_desc);
+               fix_escape_fprintf(fp, &buf, "%s\t\"EventType\": \"%S\"",
+                                  need_sep ? ",\n" : "",
+                                  event_type_desc);
                 need_sep = true;
         }
         if (deprecated) {
-               fix_escape_printf(&buf, "%s\t\"Deprecated\": \"%S\"", need_sep ? ",\n" : "",
-                                 deprecated ? "1" : "0");
+               fix_escape_fprintf(fp, &buf, "%s\t\"Deprecated\": \"%S\"",
+                                  need_sep ? ",\n" : "",
+                                  deprecated ? "1" : "0");
                 need_sep = true;
         }
         if (desc) {
-               fix_escape_printf(&buf, "%s\t\"BriefDescription\": \"%S\"", need_sep ? ",\n" : "",
-                                 desc);
+               fix_escape_fprintf(fp, &buf, "%s\t\"BriefDescription\": \"%S\"",
+                                  need_sep ? ",\n" : "",
+                                  desc);
                 need_sep = true;
         }
         if (long_desc) {
-               fix_escape_printf(&buf, "%s\t\"PublicDescription\": \"%S\"", need_sep ? ",\n" : "",
-                                 long_desc);
+               fix_escape_fprintf(fp, &buf, "%s\t\"PublicDescription\": \"%S\"",
+                                  need_sep ? ",\n" : "",
+                                  long_desc);
                 need_sep = true;
         }
         if (encoding_desc) {
-               fix_escape_printf(&buf, "%s\t\"Encoding\": \"%S\"", need_sep ? ",\n" : "",
-                                 encoding_desc);
+               fix_escape_fprintf(fp, &buf, "%s\t\"Encoding\": \"%S\"",
+                                  need_sep ? ",\n" : "",
+                                  encoding_desc);
                 need_sep = true;
         }
-       printf("%s}", need_sep ? "\n" : "");
+       fprintf(fp, "%s}", need_sep ? "\n" : "");
         strbuf_release(&buf);
  }
  
@@ -394,43 +417,53 @@ static void json_print_metric(void *ps __maybe_unused, const char *group,
  {
         struct json_print_state *print_state = ps;
         bool need_sep = false;
+       FILE *fp = print_state->fp;
         struct strbuf buf;
  
         strbuf_init(&buf, 0);
-       printf("%s{\n", print_state->need_sep ? ",\n" : "");
+       fprintf(fp, "%s{\n", print_state->need_sep ? ",\n" : "");
         print_state->need_sep = true;
         if (group) {
-               fix_escape_printf(&buf, "\t\"MetricGroup\": \"%S\"", group);
+               fix_escape_fprintf(fp, &buf, "\t\"MetricGroup\": \"%S\"", group);
                 need_sep = true;
         }
         if (name) {
-               fix_escape_printf(&buf, "%s\t\"MetricName\": \"%S\"", need_sep ? ",\n" : "", name);
+               fix_escape_fprintf(fp, &buf, "%s\t\"MetricName\": \"%S\"",
+                                  need_sep ? ",\n" : "",
+                                  name);
                 need_sep = true;
         }
         if (expr) {
-               fix_escape_printf(&buf, "%s\t\"MetricExpr\": \"%S\"", need_sep ? ",\n" : "", expr);
+               fix_escape_fprintf(fp, &buf, "%s\t\"MetricExpr\": \"%S\"",
+                                  need_sep ? ",\n" : "",
+                                  expr);
                 need_sep = true;
         }
         if (threshold) {
-               fix_escape_printf(&buf, "%s\t\"MetricThreshold\": \"%S\"", need_sep ? ",\n" : "",
-                                 threshold);
+               fix_escape_fprintf(fp, &buf, "%s\t\"MetricThreshold\": \"%S\"",
+                                  need_sep ? ",\n" : "",
+                                  threshold);
                 need_sep = true;
         }
         if (unit) {
-               fix_escape_printf(&buf, "%s\t\"ScaleUnit\": \"%S\"", need_sep ? ",\n" : "", unit);
+               fix_escape_fprintf(fp, &buf, "%s\t\"ScaleUnit\": \"%S\"",
+                                  need_sep ? ",\n" : "",
+                                  unit);
                 need_sep = true;
         }
         if (desc) {
-               fix_escape_printf(&buf, "%s\t\"BriefDescription\": \"%S\"", need_sep ? ",\n" : "",
-                                 desc);
+               fix_escape_fprintf(fp, &buf, "%s\t\"BriefDescription\": \"%S\"",
+                                  need_sep ? ",\n" : "",
+                                  desc);
                 need_sep = true;
         }
         if (long_desc) {
-               fix_escape_printf(&buf, "%s\t\"PublicDescription\": \"%S\"", need_sep ? ",\n" : "",
-                                 long_desc);
+               fix_escape_fprintf(fp, &buf, "%s\t\"PublicDescription\": \"%S\"",
+                                  need_sep ? ",\n" : "",
+                                  long_desc);
                 need_sep = true;
         }
-       printf("%s}", need_sep ? "\n" : "");
+       fprintf(fp, "%s}", need_sep ? "\n" : "");
         strbuf_release(&buf);
  }
  
@@ -449,8 +482,12 @@ static bool default_skip_duplicate_pmus(void *ps)
  int cmd_list(int argc, const char **argv)
  {
         int i, ret = 0;
-       struct print_state default_ps = {};
-       struct print_state json_ps = {};
+       struct print_state default_ps = {
+               .fp = stdout,
+       };
+       struct print_state json_ps = {
+               .fp = stdout,
+       };
         void *ps = &default_ps;
         struct print_callbacks print_cb = {
                 .print_start = default_print_start,
@@ -461,6 +498,7 @@ int cmd_list(int argc, const char **argv)
         };
         const char *cputype = NULL;
         const char *unit_name = NULL;
+       const char *output_path = NULL;
         bool json = false;
         struct option list_options[] = {
                 OPT_BOOLEAN(0, "raw-dump", &default_ps.name_only, "Dump raw events"),
@@ -471,6 +509,7 @@ int cmd_list(int argc, const char **argv)
                             "Print longer event descriptions."),
                 OPT_BOOLEAN(0, "details", &default_ps.detailed,
                             "Print information on the perf event names and expressions used internally by events."),
+               OPT_STRING('o', "output", &output_path, "file", "output file name"),
                 OPT_BOOLEAN(0, "deprecated", &default_ps.deprecated,
                             "Print deprecated events."),
                 OPT_STRING(0, "cputype", &cputype, "cpu type",
@@ -497,6 +536,11 @@ int cmd_list(int argc, const char **argv)
         argc = parse_options(argc, argv, list_options, list_usage,
                              PARSE_OPT_STOP_AT_NON_OPTION);
  
+       if (output_path) {
+               default_ps.fp = fopen(output_path, "w");
+               json_ps.fp = default_ps.fp;
+       }
+
         setup_pager();
  
         if (!default_ps.name_only)
@@ -618,5 +662,8 @@ out:
         free(default_ps.last_topic);
         free(default_ps.last_metricgroups);
         strlist__delete(default_ps.visited_metrics);
+       if (output_path)
+               fclose(default_ps.fp);
+
         return ret;
  }
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c

index 91e6828c38cc2ef4c6b4d28d842309ce4e475f8d..86c91012517267c5355d7fedebdeed42e9cfb675 100644 (file)
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -4080,8 +4080,8 @@ int cmd_record(int argc, const char **argv)
         }
  
         if (rec->switch_output.num_files) {
-               rec->switch_output.filenames = calloc(sizeof(char *),
-                                                     rec->switch_output.num_files);
+               rec->switch_output.filenames = calloc(rec->switch_output.num_files,
+                                                     sizeof(char *));
                 if (!rec->switch_output.filenames) {
                         err = -EINVAL;
                         goto out_opts;
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c

index baf1ab083436e3f980157cb5d3646d6ccc59a40c..5301d1badd435906ddf152511e6935b73236c034 100644 (file)
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -357,7 +357,7 @@ static void perf_top__print_sym_table(struct perf_top *top)
  
  static void prompt_integer(int *target, const char *msg)
  {
-       char *buf = malloc(0), *p;
+       char *buf = NULL, *p;
         size_t dummy = 0;
         int tmp;
  
diff --git a/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json b/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json

index 35124a4ddcb2bd547d190b40cdbb2c81fd5f5841..bbfa3883e53384f563427e1b7567e679e1d1465f 100644 (file)
--- a/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json
@@ -114,7 +114,7 @@
      },
      {
          "BriefDescription": "Counts the number of issue slots  that were not consumed by the backend due to certain allocation restrictions.",
-        "MetricExpr": "TOPDOWN_BE_BOUND.ALLOC_RESTRICTIONS / tma_info_core_slots",
+        "MetricExpr": "cpu_atom@TOPDOWN_BE_BOUND.ALLOC_RESTRICTIONS@ / tma_info_core_slots",
          "MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group",
          "MetricName": "tma_alloc_restriction",
          "MetricThreshold": "tma_alloc_restriction > 0.1",
@@ -124,7 +124,7 @@
      {
          "BriefDescription": "Counts the total number of issue slots  that were not consumed by the backend due to backend stalls",
          "DefaultMetricgroupName": "TopdownL1",
-        "MetricExpr": "TOPDOWN_BE_BOUND.ALL / tma_info_core_slots",
+        "MetricExpr": "cpu_atom@TOPDOWN_BE_BOUND.ALL@ / tma_info_core_slots",
          "MetricGroup": "Default;TopdownL1;tma_L1_group",
          "MetricName": "tma_backend_bound",
          "MetricThreshold": "tma_backend_bound > 0.1",
@@ -169,7 +169,7 @@
      },
      {
          "BriefDescription": "Counts the number of issue slots  that were not delivered by the frontend due to BACLEARS, which occurs when the Branch Target Buffer (BTB) prediction or lack thereof, was corrected by a later branch predictor in the frontend",
-        "MetricExpr": "TOPDOWN_FE_BOUND.BRANCH_DETECT / tma_info_core_slots",
+        "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.BRANCH_DETECT@ / tma_info_core_slots",
          "MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_latency_group",
          "MetricName": "tma_branch_detect",
          "MetricThreshold": "tma_branch_detect > 0.05",
@@ -179,7 +179,7 @@
      },
      {
          "BriefDescription": "Counts the number of issue slots  that were not consumed by the backend due to branch mispredicts.",
-        "MetricExpr": "TOPDOWN_BAD_SPECULATION.MISPREDICT / tma_info_core_slots",
+        "MetricExpr": "cpu_atom@TOPDOWN_BAD_SPECULATION.MISPREDICT@ / tma_info_core_slots",
          "MetricGroup": "TopdownL2;tma_L2_group;tma_bad_speculation_group",
          "MetricName": "tma_branch_mispredicts",
          "MetricThreshold": "tma_branch_mispredicts > 0.05",
@@ -189,7 +189,7 @@
      },
      {
          "BriefDescription": "Counts the number of issue slots  that were not delivered by the frontend due to BTCLEARS, which occurs when the Branch Target Buffer (BTB) predicts a taken branch.",
-        "MetricExpr": "TOPDOWN_FE_BOUND.BRANCH_RESTEER / tma_info_core_slots",
+        "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.BRANCH_RESTEER@ / tma_info_core_slots",
          "MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_latency_group",
          "MetricName": "tma_branch_resteer",
          "MetricThreshold": "tma_branch_resteer > 0.05",
@@ -198,7 +198,7 @@
      },
      {
          "BriefDescription": "Counts the number of issue slots  that were not delivered by the frontend due to the microcode sequencer (MS).",
-        "MetricExpr": "TOPDOWN_FE_BOUND.CISC / tma_info_core_slots",
+        "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.CISC@ / tma_info_core_slots",
          "MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
          "MetricName": "tma_cisc",
          "MetricThreshold": "tma_cisc > 0.05",
@@ -217,7 +217,7 @@
      },
      {
          "BriefDescription": "Counts the number of issue slots  that were not delivered by the frontend due to decode stalls.",
-        "MetricExpr": "TOPDOWN_FE_BOUND.DECODE / tma_info_core_slots",
+        "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.DECODE@ / tma_info_core_slots",
          "MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
          "MetricName": "tma_decode",
          "MetricThreshold": "tma_decode > 0.05",
@@ -235,7 +235,6 @@
      },
      {
          "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load miss which hit in DRAM or MMIO (Non-DRAM).",
-        "MetricConstraint": "NO_GROUP_EVENTS",
          "MetricExpr": "cpu_atom@MEM_BOUND_STALLS.LOAD_DRAM_HIT@ / tma_info_core_clks - max((cpu_atom@MEM_BOUND_STALLS.LOAD@ - cpu_atom@LD_HEAD.L1_MISS_AT_RET@) / tma_info_core_clks, 0) * cpu_atom@MEM_BOUND_STALLS.LOAD_DRAM_HIT@ / cpu_atom@MEM_BOUND_STALLS.LOAD@",
          "MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group",
          "MetricName": "tma_dram_bound",
@@ -245,7 +244,7 @@
      },
      {
          "BriefDescription": "Counts the number of issue slots  that were not consumed by the backend due to a machine clear classified as a fast nuke due to memory ordering, memory disambiguation and memory renaming.",
-        "MetricExpr": "TOPDOWN_BAD_SPECULATION.FASTNUKE / tma_info_core_slots",
+        "MetricExpr": "cpu_atom@TOPDOWN_BAD_SPECULATION.FASTNUKE@ / tma_info_core_slots",
          "MetricGroup": "TopdownL3;tma_L3_group;tma_machine_clears_group",
          "MetricName": "tma_fast_nuke",
          "MetricThreshold": "tma_fast_nuke > 0.05",
@@ -254,7 +253,7 @@
      },
      {
          "BriefDescription": "Counts the number of issue slots  that were not delivered by the frontend due to frontend bandwidth restrictions due to decode, predecode, cisc, and other limitations.",
-        "MetricExpr": "TOPDOWN_FE_BOUND.FRONTEND_BANDWIDTH / tma_info_core_slots",
+        "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.FRONTEND_BANDWIDTH@ / tma_info_core_slots",
          "MetricGroup": "TopdownL2;tma_L2_group;tma_frontend_bound_group",
          "MetricName": "tma_fetch_bandwidth",
          "MetricThreshold": "tma_fetch_bandwidth > 0.1",
@@ -264,7 +263,7 @@
      },
      {
          "BriefDescription": "Counts the number of issue slots  that were not delivered by the frontend due to frontend bandwidth restrictions due to decode, predecode, cisc, and other limitations.",
-        "MetricExpr": "TOPDOWN_FE_BOUND.FRONTEND_LATENCY / tma_info_core_slots",
+        "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.FRONTEND_LATENCY@ / tma_info_core_slots",
          "MetricGroup": "TopdownL2;tma_L2_group;tma_frontend_bound_group",
          "MetricName": "tma_fetch_latency",
          "MetricThreshold": "tma_fetch_latency > 0.15",
@@ -283,7 +282,7 @@
      },
      {
          "BriefDescription": "Counts the number of floating point divide operations per uop.",
-        "MetricExpr": "UOPS_RETIRED.FPDIV / tma_info_core_slots",
+        "MetricExpr": "cpu_atom@UOPS_RETIRED.FPDIV@ / tma_info_core_slots",
          "MetricGroup": "TopdownL3;tma_L3_group;tma_base_group",
          "MetricName": "tma_fpdiv_uops",
          "MetricThreshold": "tma_fpdiv_uops > 0.2",
@@ -293,7 +292,7 @@
      {
          "BriefDescription": "Counts the number of issue slots  that were not consumed by the backend due to frontend stalls.",
          "DefaultMetricgroupName": "TopdownL1",
-        "MetricExpr": "TOPDOWN_FE_BOUND.ALL / tma_info_core_slots",
+        "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.ALL@ / tma_info_core_slots",
          "MetricGroup": "Default;TopdownL1;tma_L1_group",
          "MetricName": "tma_frontend_bound",
          "MetricThreshold": "tma_frontend_bound > 0.2",
@@ -303,7 +302,7 @@
      },
      {
          "BriefDescription": "Counts the number of issue slots  that were not delivered by the frontend due to instruction cache misses.",
-        "MetricExpr": "TOPDOWN_FE_BOUND.ICACHE / tma_info_core_slots",
+        "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.ICACHE@ / tma_info_core_slots",
          "MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_latency_group",
          "MetricName": "tma_icache_misses",
          "MetricThreshold": "tma_icache_misses > 0.05",
@@ -330,7 +329,7 @@
      },
      {
          "BriefDescription": "Instructions Per Cycle",
-        "MetricExpr": "INST_RETIRED.ANY / tma_info_core_clks",
+        "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / tma_info_core_clks",
          "MetricName": "tma_info_core_ipc",
          "Unit": "cpu_atom"
      },
@@ -342,7 +341,7 @@
      },
      {
          "BriefDescription": "Uops Per Instruction",
-        "MetricExpr": "UOPS_RETIRED.ALL / INST_RETIRED.ANY",
+        "MetricExpr": "cpu_atom@UOPS_RETIRED.ALL@ / INST_RETIRED.ANY",
          "MetricName": "tma_info_core_upi",
          "Unit": "cpu_atom"
      },
@@ -366,13 +365,13 @@
      },
      {
          "BriefDescription": "Ratio of all branches which mispredict",
-        "MetricExpr": "BR_MISP_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.ALL_BRANCHES",
+        "MetricExpr": "cpu_atom@BR_MISP_RETIRED.ALL_BRANCHES@ / BR_INST_RETIRED.ALL_BRANCHES",
          "MetricName": "tma_info_inst_mix_branch_mispredict_ratio",
          "Unit": "cpu_atom"
      },
      {
          "BriefDescription": "Ratio between Mispredicted branches and unknown branches",
-        "MetricExpr": "BR_MISP_RETIRED.ALL_BRANCHES / BACLEARS.ANY",
+        "MetricExpr": "cpu_atom@BR_MISP_RETIRED.ALL_BRANCHES@ / BACLEARS.ANY",
          "MetricName": "tma_info_inst_mix_branch_mispredict_to_unknown_branch_ratio",
          "Unit": "cpu_atom"
      },
@@ -390,61 +389,61 @@
      },
      {
          "BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)",
-        "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES",
+        "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / BR_INST_RETIRED.ALL_BRANCHES",
          "MetricName": "tma_info_inst_mix_ipbranch",
          "Unit": "cpu_atom"
      },
      {
          "BriefDescription": "Instruction per (near) call (lower number means higher occurrence rate)",
-        "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.CALL",
+        "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / BR_INST_RETIRED.CALL",
          "MetricName": "tma_info_inst_mix_ipcall",
          "Unit": "cpu_atom"
      },
      {
          "BriefDescription": "Instructions per Far Branch",
-        "MetricExpr": "INST_RETIRED.ANY / (cpu_atom@BR_INST_RETIRED.FAR_BRANCH@ / 2)",
+        "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / (cpu_atom@BR_INST_RETIRED.FAR_BRANCH@ / 2)",
          "MetricName": "tma_info_inst_mix_ipfarbranch",
          "Unit": "cpu_atom"
      },
      {
          "BriefDescription": "Instructions per Load",
-        "MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_LOADS",
+        "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / MEM_UOPS_RETIRED.ALL_LOADS",
          "MetricName": "tma_info_inst_mix_ipload",
          "Unit": "cpu_atom"
      },
      {
          "BriefDescription": "Instructions per retired conditional Branch Misprediction where the branch was not taken",
-        "MetricExpr": "INST_RETIRED.ANY / (cpu_atom@BR_MISP_RETIRED.COND@ - cpu_atom@BR_MISP_RETIRED.COND_TAKEN@)",
+        "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / (cpu_atom@BR_MISP_RETIRED.COND@ - cpu_atom@BR_MISP_RETIRED.COND_TAKEN@)",
          "MetricName": "tma_info_inst_mix_ipmisp_cond_ntaken",
          "Unit": "cpu_atom"
      },
      {
          "BriefDescription": "Instructions per retired conditional Branch Misprediction where the branch was taken",
-        "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.COND_TAKEN",
+        "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / BR_MISP_RETIRED.COND_TAKEN",
          "MetricName": "tma_info_inst_mix_ipmisp_cond_taken",
          "Unit": "cpu_atom"
      },
      {
          "BriefDescription": "Instructions per retired indirect call or jump Branch Misprediction",
-        "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.INDIRECT",
+        "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / BR_MISP_RETIRED.INDIRECT",
          "MetricName": "tma_info_inst_mix_ipmisp_indirect",
          "Unit": "cpu_atom"
      },
      {
          "BriefDescription": "Instructions per retired return Branch Misprediction",
-        "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.RETURN",
+        "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / BR_MISP_RETIRED.RETURN",
          "MetricName": "tma_info_inst_mix_ipmisp_ret",
          "Unit": "cpu_atom"
      },
      {
          "BriefDescription": "Instructions per retired Branch Misprediction",
-        "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
+        "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / BR_MISP_RETIRED.ALL_BRANCHES",
          "MetricName": "tma_info_inst_mix_ipmispredict",
          "Unit": "cpu_atom"
      },
      {
          "BriefDescription": "Instructions per Store",
-        "MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_STORES",
+        "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / MEM_UOPS_RETIRED.ALL_STORES",
          "MetricName": "tma_info_inst_mix_ipstore",
          "Unit": "cpu_atom"
      },
@@ -480,19 +479,19 @@
      },
      {
          "BriefDescription": "Cycle cost per DRAM hit",
-        "MetricExpr": "MEM_BOUND_STALLS.LOAD_DRAM_HIT / MEM_LOAD_UOPS_RETIRED.DRAM_HIT",
+        "MetricExpr": "cpu_atom@MEM_BOUND_STALLS.LOAD_DRAM_HIT@ / MEM_LOAD_UOPS_RETIRED.DRAM_HIT",
          "MetricName": "tma_info_memory_cycles_per_demand_load_dram_hit",
          "Unit": "cpu_atom"
      },
      {
          "BriefDescription": "Cycle cost per L2 hit",
-        "MetricExpr": "MEM_BOUND_STALLS.LOAD_L2_HIT / MEM_LOAD_UOPS_RETIRED.L2_HIT",
+        "MetricExpr": "cpu_atom@MEM_BOUND_STALLS.LOAD_L2_HIT@ / MEM_LOAD_UOPS_RETIRED.L2_HIT",
          "MetricName": "tma_info_memory_cycles_per_demand_load_l2_hit",
          "Unit": "cpu_atom"
      },
      {
          "BriefDescription": "Cycle cost per LLC hit",
-        "MetricExpr": "MEM_BOUND_STALLS.LOAD_LLC_HIT / MEM_LOAD_UOPS_RETIRED.L3_HIT",
+        "MetricExpr": "cpu_atom@MEM_BOUND_STALLS.LOAD_LLC_HIT@ / MEM_LOAD_UOPS_RETIRED.L3_HIT",
          "MetricName": "tma_info_memory_cycles_per_demand_load_l3_hit",
          "Unit": "cpu_atom"
      },
@@ -504,7 +503,7 @@
      },
      {
          "BriefDescription": "Average CPU Utilization",
-        "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
+        "MetricExpr": "cpu_atom@CPU_CLK_UNHALTED.REF_TSC@ / TSC",
          "MetricName": "tma_info_system_cpu_utilization",
          "Unit": "cpu_atom"
      },
@@ -524,7 +523,7 @@
      },
      {
          "BriefDescription": "Counts the number of issue slots  that were not delivered by the frontend due to Instruction Table Lookaside Buffer (ITLB) misses.",
-        "MetricExpr": "TOPDOWN_FE_BOUND.ITLB / tma_info_core_slots",
+        "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.ITLB@ / tma_info_core_slots",
          "MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_latency_group",
          "MetricName": "tma_itlb_misses",
          "MetricThreshold": "tma_itlb_misses > 0.05",
@@ -533,7 +532,7 @@
      },
      {
          "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a load block.",
-        "MetricExpr": "LD_HEAD.L1_BOUND_AT_RET / tma_info_core_clks",
+        "MetricExpr": "cpu_atom@LD_HEAD.L1_BOUND_AT_RET@ / tma_info_core_clks",
          "MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group",
          "MetricName": "tma_l1_bound",
          "MetricThreshold": "tma_l1_bound > 0.1",
@@ -542,7 +541,6 @@
      },
      {
          "BriefDescription": "Counts the number of cycles a core is stalled due to a demand load which hit in the L2 Cache.",
-        "MetricConstraint": "NO_GROUP_EVENTS",
          "MetricExpr": "cpu_atom@MEM_BOUND_STALLS.LOAD_L2_HIT@ / tma_info_core_clks - max((cpu_atom@MEM_BOUND_STALLS.LOAD@ - cpu_atom@LD_HEAD.L1_MISS_AT_RET@) / tma_info_core_clks, 0) * cpu_atom@MEM_BOUND_STALLS.LOAD_L2_HIT@ / cpu_atom@MEM_BOUND_STALLS.LOAD@",
          "MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group",
          "MetricName": "tma_l2_bound",
@@ -552,7 +550,6 @@
      },
      {
          "BriefDescription": "Counts the number of cycles a core is stalled due to a demand load which hit in the Last Level Cache (LLC) or other core with HITE/F/M.",
-        "MetricConstraint": "NO_GROUP_EVENTS_NMI",
          "MetricExpr": "cpu_atom@MEM_BOUND_STALLS.LOAD_LLC_HIT@ / tma_info_core_clks - max((cpu_atom@MEM_BOUND_STALLS.LOAD@ - cpu_atom@LD_HEAD.L1_MISS_AT_RET@) / tma_info_core_clks, 0) * cpu_atom@MEM_BOUND_STALLS.LOAD_LLC_HIT@ / cpu_atom@MEM_BOUND_STALLS.LOAD@",
          "MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group",
          "MetricName": "tma_l3_bound",
@@ -571,7 +568,7 @@
      },
      {
          "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a machine clear (nuke) of any kind including memory ordering and memory disambiguation.",
-        "MetricExpr": "TOPDOWN_BAD_SPECULATION.MACHINE_CLEARS / tma_info_core_slots",
+        "MetricExpr": "cpu_atom@TOPDOWN_BAD_SPECULATION.MACHINE_CLEARS@ / tma_info_core_slots",
          "MetricGroup": "TopdownL2;tma_L2_group;tma_bad_speculation_group",
          "MetricName": "tma_machine_clears",
          "MetricThreshold": "tma_machine_clears > 0.05",
@@ -581,7 +578,7 @@
      },
      {
          "BriefDescription": "Counts the number of issue slots  that were not consumed by the backend due to memory reservation stalls in which a scheduler is not able to accept uops.",
-        "MetricExpr": "TOPDOWN_BE_BOUND.MEM_SCHEDULER / tma_info_core_slots",
+        "MetricExpr": "cpu_atom@TOPDOWN_BE_BOUND.MEM_SCHEDULER@ / tma_info_core_slots",
          "MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group",
          "MetricName": "tma_mem_scheduler",
          "MetricThreshold": "tma_mem_scheduler > 0.1",
@@ -590,7 +587,7 @@
      },
      {
          "BriefDescription": "Counts the number of cycles the core is stalled due to stores or loads.",
-        "MetricExpr": "min(cpu_atom@TOPDOWN_BE_BOUND.ALL@ / tma_info_core_slots, cpu_atom@LD_HEAD.ANY_AT_RET@ / tma_info_core_clks + tma_store_bound)",
+        "MetricExpr": "min(tma_backend_bound, cpu_atom@LD_HEAD.ANY_AT_RET@ / tma_info_core_clks + tma_store_bound)",
          "MetricGroup": "TopdownL2;tma_L2_group;tma_backend_bound_group",
          "MetricName": "tma_memory_bound",
          "MetricThreshold": "tma_memory_bound > 0.2",
@@ -609,7 +606,7 @@
      },
      {
          "BriefDescription": "Counts the number of uops that are from the complex flows issued by the micro-sequencer (MS)",
-        "MetricExpr": "UOPS_RETIRED.MS / tma_info_core_slots",
+        "MetricExpr": "cpu_atom@UOPS_RETIRED.MS@ / tma_info_core_slots",
          "MetricGroup": "TopdownL2;tma_L2_group;tma_retiring_group",
          "MetricName": "tma_ms_uops",
          "MetricThreshold": "tma_ms_uops > 0.05",
@@ -620,7 +617,7 @@
      },
      {
          "BriefDescription": "Counts the number of issue slots  that were not consumed by the backend due to IEC or FPC RAT stalls, which can be due to FIQ or IEC reservation stalls in which the integer, floating point or SIMD scheduler is not able to accept uops.",
-        "MetricExpr": "TOPDOWN_BE_BOUND.NON_MEM_SCHEDULER / tma_info_core_slots",
+        "MetricExpr": "cpu_atom@TOPDOWN_BE_BOUND.NON_MEM_SCHEDULER@ / tma_info_core_slots",
          "MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group",
          "MetricName": "tma_non_mem_scheduler",
          "MetricThreshold": "tma_non_mem_scheduler > 0.1",
@@ -629,7 +626,7 @@
      },
      {
          "BriefDescription": "Counts the number of issue slots  that were not consumed by the backend due to a machine clear (slow nuke).",
-        "MetricExpr": "TOPDOWN_BAD_SPECULATION.NUKE / tma_info_core_slots",
+        "MetricExpr": "cpu_atom@TOPDOWN_BAD_SPECULATION.NUKE@ / tma_info_core_slots",
          "MetricGroup": "TopdownL3;tma_L3_group;tma_machine_clears_group",
          "MetricName": "tma_nuke",
          "MetricThreshold": "tma_nuke > 0.05",
@@ -638,7 +635,7 @@
      },
      {
          "BriefDescription": "Counts the number of issue slots  that were not delivered by the frontend due to other common frontend stalls not categorized.",
-        "MetricExpr": "TOPDOWN_FE_BOUND.OTHER / tma_info_core_slots",
+        "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.OTHER@ / tma_info_core_slots",
          "MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
          "MetricName": "tma_other_fb",
          "MetricThreshold": "tma_other_fb > 0.05",
@@ -647,7 +644,7 @@
      },
      {
          "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a number of other load blocks.",
-        "MetricExpr": "LD_HEAD.OTHER_AT_RET / tma_info_core_clks",
+        "MetricExpr": "cpu_atom@LD_HEAD.OTHER_AT_RET@ / tma_info_core_clks",
          "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
          "MetricName": "tma_other_l1",
          "MetricThreshold": "tma_other_l1 > 0.05",
@@ -683,7 +680,7 @@
      },
      {
          "BriefDescription": "Counts the number of issue slots  that were not delivered by the frontend due to wrong predecodes.",
-        "MetricExpr": "TOPDOWN_FE_BOUND.PREDECODE / tma_info_core_slots",
+        "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.PREDECODE@ / tma_info_core_slots",
          "MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
          "MetricName": "tma_predecode",
          "MetricThreshold": "tma_predecode > 0.05",
@@ -692,7 +689,7 @@
      },
      {
          "BriefDescription": "Counts the number of issue slots  that were not consumed by the backend due to the physical register file unable to accept an entry (marble stalls).",
-        "MetricExpr": "TOPDOWN_BE_BOUND.REGISTER / tma_info_core_slots",
+        "MetricExpr": "cpu_atom@TOPDOWN_BE_BOUND.REGISTER@ / tma_info_core_slots",
          "MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group",
          "MetricName": "tma_register",
          "MetricThreshold": "tma_register > 0.1",
@@ -701,7 +698,7 @@
      },
      {
          "BriefDescription": "Counts the number of issue slots  that were not consumed by the backend due to the reorder buffer being full (ROB stalls).",
-        "MetricExpr": "TOPDOWN_BE_BOUND.REORDER_BUFFER / tma_info_core_slots",
+        "MetricExpr": "cpu_atom@TOPDOWN_BE_BOUND.REORDER_BUFFER@ / tma_info_core_slots",
          "MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group",
          "MetricName": "tma_reorder_buffer",
          "MetricThreshold": "tma_reorder_buffer > 0.1",
@@ -722,7 +719,7 @@
      {
          "BriefDescription": "Counts the number of issue slots  that result in retirement slots.",
          "DefaultMetricgroupName": "TopdownL1",
-        "MetricExpr": "TOPDOWN_RETIRING.ALL / tma_info_core_slots",
+        "MetricExpr": "cpu_atom@TOPDOWN_RETIRING.ALL@ / tma_info_core_slots",
          "MetricGroup": "Default;TopdownL1;tma_L1_group",
          "MetricName": "tma_retiring",
          "MetricThreshold": "tma_retiring > 0.75",
@@ -741,7 +738,7 @@
      },
      {
          "BriefDescription": "Counts the number of issue slots  that were not consumed by the backend due to scoreboards from the instruction queue (IQ), jump execution unit (JEU), or microcode sequencer (MS).",
-        "MetricExpr": "TOPDOWN_BE_BOUND.SERIALIZATION / tma_info_core_slots",
+        "MetricExpr": "cpu_atom@TOPDOWN_BE_BOUND.SERIALIZATION@ / tma_info_core_slots",
          "MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group",
          "MetricName": "tma_serialization",
          "MetricThreshold": "tma_serialization > 0.1",
@@ -768,7 +765,7 @@
      },
      {
          "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a first level TLB miss.",
-        "MetricExpr": "LD_HEAD.DTLB_MISS_AT_RET / tma_info_core_clks",
+        "MetricExpr": "cpu_atom@LD_HEAD.DTLB_MISS_AT_RET@ / tma_info_core_clks",
          "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
          "MetricName": "tma_stlb_hit",
          "MetricThreshold": "tma_stlb_hit > 0.05",
@@ -777,7 +774,7 @@
      },
      {
          "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a second level TLB miss requiring a page walk.",
-        "MetricExpr": "LD_HEAD.PGWALK_AT_RET / tma_info_core_clks",
+        "MetricExpr": "cpu_atom@LD_HEAD.PGWALK_AT_RET@ / tma_info_core_clks",
          "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
          "MetricName": "tma_stlb_miss",
          "MetricThreshold": "tma_stlb_miss > 0.05",
@@ -795,8 +792,7 @@
      },
      {
          "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a store forward block.",
-        "MetricConstraint": "NO_GROUP_EVENTS_NMI",
-        "MetricExpr": "LD_HEAD.ST_ADDR_AT_RET / tma_info_core_clks",
+        "MetricExpr": "cpu_atom@LD_HEAD.ST_ADDR_AT_RET@ / tma_info_core_clks",
          "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
          "MetricName": "tma_store_fwd_blk",
          "MetricThreshold": "tma_store_fwd_blk > 0.05",
@@ -875,7 +871,7 @@
      },
      {
          "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers",
-        "MetricExpr": "INT_MISC.CLEAR_RESTEER_CYCLES / tma_info_thread_clks + tma_unknown_branches",
+        "MetricExpr": "cpu_core@INT_MISC.CLEAR_RESTEER_CYCLES@ / tma_info_thread_clks + tma_unknown_branches",
          "MetricGroup": "FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group",
          "MetricName": "tma_branch_resteers",
          "MetricThreshold": "tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -905,7 +901,6 @@
      },
      {
          "BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to contested accesses",
-        "MetricConstraint": "NO_GROUP_EVENTS",
          "MetricExpr": "(25 * tma_info_system_average_frequency * (cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD@ * (cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM@ / (cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM@ + cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD@))) + 24 * tma_info_system_average_frequency * cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS@) * (1 + cpu_core@MEM_LOAD_RETIRED.FB_HIT@ / cpu_core@MEM_LOAD_RETIRED.L1_MISS@ / 2) / tma_info_thread_clks",
          "MetricGroup": "DataSharing;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
          "MetricName": "tma_contested_accesses",
@@ -927,7 +922,6 @@
      },
      {
          "BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to data-sharing accesses",
-        "MetricConstraint": "NO_GROUP_EVENTS",
          "MetricExpr": "24 * tma_info_system_average_frequency * (cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_NO_FWD@ + cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD@ * (1 - cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM@ / (cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM@ + cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD@))) * (1 + cpu_core@MEM_LOAD_RETIRED.FB_HIT@ / cpu_core@MEM_LOAD_RETIRED.L1_MISS@ / 2) / tma_info_thread_clks",
          "MetricGroup": "Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
          "MetricName": "tma_data_sharing",
@@ -948,7 +942,7 @@
      },
      {
          "BriefDescription": "This metric represents fraction of cycles where the Divider unit was active",
-        "MetricExpr": "ARITH.DIV_ACTIVE / tma_info_thread_clks",
+        "MetricExpr": "cpu_core@ARITH.DIV_ACTIVE@ / tma_info_thread_clks",
          "MetricGroup": "TopdownL3;tma_L3_group;tma_core_bound_group",
          "MetricName": "tma_divider",
          "MetricThreshold": "tma_divider > 0.2 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)",
@@ -958,7 +952,6 @@
      },
      {
          "BriefDescription": "This metric estimates how often the CPU was stalled on accesses to external memory (DRAM) by loads",
-        "MetricConstraint": "NO_GROUP_EVENTS",
          "MetricExpr": "cpu_core@MEMORY_ACTIVITY.STALLS_L3_MISS@ / tma_info_thread_clks",
          "MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
          "MetricName": "tma_dram_bound",
@@ -979,7 +972,7 @@
      },
      {
          "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to switches from DSB to MITE pipelines",
-        "MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / tma_info_thread_clks",
+        "MetricExpr": "cpu_core@DSB2MITE_SWITCHES.PENALTY_CYCLES@ / tma_info_thread_clks",
          "MetricGroup": "DSBmiss;FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueFB",
          "MetricName": "tma_dsb_switches",
          "MetricThreshold": "tma_dsb_switches > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -1019,7 +1012,7 @@
      },
      {
          "BriefDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed",
-        "MetricExpr": "L1D_PEND_MISS.FB_FULL / tma_info_thread_clks",
+        "MetricExpr": "cpu_core@L1D_PEND_MISS.FB_FULL@ / tma_info_thread_clks",
          "MetricGroup": "MemoryBW;TopdownL4;tma_L4_group;tma_issueBW;tma_issueSL;tma_issueSmSt;tma_l1_bound_group",
          "MetricName": "tma_fb_full",
          "MetricThreshold": "tma_fb_full > 0.3",
@@ -1154,7 +1147,7 @@
      },
      {
          "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to instruction cache misses",
-        "MetricExpr": "ICACHE_DATA.STALLS / tma_info_thread_clks",
+        "MetricExpr": "cpu_core@ICACHE_DATA.STALLS@ / tma_info_thread_clks",
          "MetricGroup": "BigFoot;FetchLat;IcMiss;TopdownL3;tma_L3_group;tma_fetch_latency_group",
          "MetricName": "tma_icache_misses",
          "MetricThreshold": "tma_icache_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -1164,7 +1157,6 @@
      },
      {
          "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)",
-        "MetricConstraint": "NO_GROUP_EVENTS",
          "MetricExpr": "(tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) * tma_info_thread_slots / BR_MISP_RETIRED.ALL_BRANCHES",
          "MetricGroup": "Bad;BrMispredicts;tma_issueBM",
          "MetricName": "tma_info_bad_spec_branch_misprediction_cost",
@@ -1173,7 +1165,7 @@
      },
      {
          "BriefDescription": "Instructions per retired mispredicts for conditional non-taken branches (lower number means higher occurrence rate).",
-        "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.COND_NTAKEN",
+        "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / BR_MISP_RETIRED.COND_NTAKEN",
          "MetricGroup": "Bad;BrMispredicts",
          "MetricName": "tma_info_bad_spec_ipmisp_cond_ntaken",
          "MetricThreshold": "tma_info_bad_spec_ipmisp_cond_ntaken < 200",
@@ -1181,7 +1173,7 @@
      },
      {
          "BriefDescription": "Instructions per retired mispredicts for conditional taken branches (lower number means higher occurrence rate).",
-        "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.COND_TAKEN",
+        "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / BR_MISP_RETIRED.COND_TAKEN",
          "MetricGroup": "Bad;BrMispredicts",
          "MetricName": "tma_info_bad_spec_ipmisp_cond_taken",
          "MetricThreshold": "tma_info_bad_spec_ipmisp_cond_taken < 200",
@@ -1197,7 +1189,7 @@
      },
      {
          "BriefDescription": "Instructions per retired mispredicts for return branches (lower number means higher occurrence rate).",
-        "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.RET",
+        "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / BR_MISP_RETIRED.RET",
          "MetricGroup": "Bad;BrMispredicts",
          "MetricName": "tma_info_bad_spec_ipmisp_ret",
          "MetricThreshold": "tma_info_bad_spec_ipmisp_ret < 500",
@@ -1205,7 +1197,7 @@
      },
      {
          "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear) (lower number means higher occurrence rate)",
-        "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
+        "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / BR_MISP_RETIRED.ALL_BRANCHES",
          "MetricGroup": "Bad;BadSpec;BrMispredicts",
          "MetricName": "tma_info_bad_spec_ipmispredict",
          "MetricThreshold": "tma_info_bad_spec_ipmispredict < 200",
@@ -1213,7 +1205,6 @@
      },
      {
          "BriefDescription": "Probability of Core Bound bottleneck hidden by SMT-profiling artifacts",
-        "MetricConstraint": "NO_GROUP_EVENTS",
          "MetricExpr": "(100 * (1 - tma_core_bound / tma_ports_utilization if tma_core_bound < tma_ports_utilization else 1) if tma_info_system_smt_2t_utilization > 0.5 else 0)",
          "MetricGroup": "Cor;SMT",
          "MetricName": "tma_info_botlnk_l0_core_bound_likely",
@@ -1222,7 +1213,6 @@
      },
      {
          "BriefDescription": "Total pipeline cost of DSB (uop cache) misses - subset of the Instruction_Fetch_BW Bottleneck",
-        "MetricConstraint": "NO_GROUP_EVENTS",
          "MetricExpr": "100 * (tma_fetch_latency * tma_dsb_switches / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_mite / (tma_dsb + tma_lsd + tma_mite))",
          "MetricGroup": "DSBmiss;Fed;tma_issueFB",
          "MetricName": "tma_info_botlnk_l2_dsb_misses",
@@ -1232,7 +1222,6 @@
      },
      {
          "BriefDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck",
-        "MetricConstraint": "NO_GROUP_EVENTS",
          "MetricExpr": "100 * (tma_fetch_latency * tma_icache_misses / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
          "MetricGroup": "Fed;FetchLat;IcMiss;tma_issueFL",
          "MetricName": "tma_info_botlnk_l2_ic_misses",
@@ -1242,7 +1231,6 @@
      },
      {
          "BriefDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses)",
-        "MetricConstraint": "NO_GROUP_EVENTS",
          "MetricExpr": "100 * tma_fetch_latency * (tma_itlb_misses + tma_icache_misses + tma_unknown_branches) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)",
          "MetricGroup": "BigFoot;Fed;Frontend;IcMiss;MemoryTLB;tma_issueBC",
          "MetricName": "tma_info_bottleneck_big_code",
@@ -1261,7 +1249,6 @@
      },
      {
          "BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks",
-        "MetricConstraint": "NO_GROUP_EVENTS",
          "MetricExpr": "100 * (tma_frontend_bound - tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) - tma_info_bottleneck_big_code",
          "MetricGroup": "Fed;FetchBW;Frontend",
          "MetricName": "tma_info_bottleneck_instruction_fetch_bw",
@@ -1270,7 +1257,6 @@
      },
      {
          "BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks",
-        "MetricConstraint": "NO_GROUP_EVENTS",
          "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full))) + tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_fb_full / (tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk))",
          "MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW",
          "MetricName": "tma_info_bottleneck_memory_bandwidth",
@@ -1280,7 +1266,6 @@
      },
      {
          "BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)",
-        "MetricConstraint": "NO_GROUP_EVENTS",
          "MetricExpr": "100 * tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_dtlb_load / max(tma_l1_bound, tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
          "MetricGroup": "Mem;MemoryTLB;Offcore;tma_issueTLB",
          "MetricName": "tma_info_bottleneck_memory_data_tlbs",
@@ -1290,7 +1275,6 @@
      },
      {
          "BriefDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches)",
-        "MetricConstraint": "NO_GROUP_EVENTS",
          "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound))",
          "MetricGroup": "Mem;MemoryLat;Offcore;tma_issueLat",
          "MetricName": "tma_info_bottleneck_memory_latency",
@@ -1300,7 +1284,6 @@
      },
      {
          "BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks",
-        "MetricConstraint": "NO_GROUP_EVENTS",
          "MetricExpr": "100 * (tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
          "MetricGroup": "Bad;BadSpec;BrMispredicts;tma_issueBM",
          "MetricName": "tma_info_bottleneck_mispredictions",
@@ -1317,14 +1300,14 @@
      },
      {
          "BriefDescription": "Fraction of branches that are non-taken conditionals",
-        "MetricExpr": "BR_INST_RETIRED.COND_NTAKEN / BR_INST_RETIRED.ALL_BRANCHES",
+        "MetricExpr": "cpu_core@BR_INST_RETIRED.COND_NTAKEN@ / BR_INST_RETIRED.ALL_BRANCHES",
          "MetricGroup": "Bad;Branches;CodeGen;PGO",
          "MetricName": "tma_info_branches_cond_nt",
          "Unit": "cpu_core"
      },
      {
          "BriefDescription": "Fraction of branches that are taken conditionals",
-        "MetricExpr": "BR_INST_RETIRED.COND_TAKEN / BR_INST_RETIRED.ALL_BRANCHES",
+        "MetricExpr": "cpu_core@BR_INST_RETIRED.COND_TAKEN@ / BR_INST_RETIRED.ALL_BRANCHES",
          "MetricGroup": "Bad;Branches;CodeGen;PGO",
          "MetricName": "tma_info_branches_cond_tk",
          "Unit": "cpu_core"
@@ -1352,7 +1335,7 @@
      },
      {
          "BriefDescription": "Instructions Per Cycle across hyper-threads (per physical core)",
-        "MetricExpr": "INST_RETIRED.ANY / tma_info_core_core_clks",
+        "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / tma_info_core_core_clks",
          "MetricGroup": "Ret;SMT;TmaL1;tma_L1_group",
          "MetricName": "tma_info_core_coreipc",
          "Unit": "cpu_core"
@@ -1374,14 +1357,14 @@
      },
      {
          "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-core",
-        "MetricExpr": "UOPS_EXECUTED.THREAD / (cpu_core@UOPS_EXECUTED.CORE_CYCLES_GE_1@ / 2 if #SMT_on else cpu_core@UOPS_EXECUTED.CORE_CYCLES_GE_1@)",
+        "MetricExpr": "cpu_core@UOPS_EXECUTED.THREAD@ / (cpu_core@UOPS_EXECUTED.CORE_CYCLES_GE_1@ / 2 if #SMT_on else cpu_core@UOPS_EXECUTED.CORE_CYCLES_GE_1@)",
          "MetricGroup": "Backend;Cor;Pipeline;PortsUtil",
          "MetricName": "tma_info_core_ilp",
          "Unit": "cpu_core"
      },
      {
          "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)",
-        "MetricExpr": "IDQ.DSB_UOPS / cpu_core@UOPS_ISSUED.ANY@",
+        "MetricExpr": "cpu_core@IDQ.DSB_UOPS@ / cpu_core@UOPS_ISSUED.ANY@",
          "MetricGroup": "DSB;Fed;FetchBW;tma_issueFB",
          "MetricName": "tma_info_frontend_dsb_coverage",
          "MetricThreshold": "tma_info_frontend_dsb_coverage < 0.7 & tma_info_thread_ipc / 6 > 0.35",
@@ -1390,28 +1373,28 @@
      },
      {
          "BriefDescription": "Average number of cycles of a switch from the DSB fetch-unit to MITE fetch unit - see DSB_Switches tree node for details.",
-        "MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / cpu_core@DSB2MITE_SWITCHES.PENALTY_CYCLES\\,cmask\\=1\\,edge@",
+        "MetricExpr": "cpu_core@DSB2MITE_SWITCHES.PENALTY_CYCLES@ / cpu_core@DSB2MITE_SWITCHES.PENALTY_CYCLES\\,cmask\\=1\\,edge@",
          "MetricGroup": "DSBmiss",
          "MetricName": "tma_info_frontend_dsb_switch_cost",
          "Unit": "cpu_core"
      },
      {
          "BriefDescription": "Average number of Uops issued by front-end when it issued something",
-        "MetricExpr": "UOPS_ISSUED.ANY / cpu_core@UOPS_ISSUED.ANY\\,cmask\\=1@",
+        "MetricExpr": "cpu_core@UOPS_ISSUED.ANY@ / cpu_core@UOPS_ISSUED.ANY\\,cmask\\=1@",
          "MetricGroup": "Fed;FetchBW",
          "MetricName": "tma_info_frontend_fetch_upc",
          "Unit": "cpu_core"
      },
      {
          "BriefDescription": "Average Latency for L1 instruction cache misses",
-        "MetricExpr": "ICACHE_DATA.STALLS / cpu_core@ICACHE_DATA.STALLS\\,cmask\\=1\\,edge@",
+        "MetricExpr": "cpu_core@ICACHE_DATA.STALLS@ / cpu_core@ICACHE_DATA.STALLS\\,cmask\\=1\\,edge@",
          "MetricGroup": "Fed;FetchLat;IcMiss",
          "MetricName": "tma_info_frontend_icache_miss_latency",
          "Unit": "cpu_core"
      },
      {
          "BriefDescription": "Instructions per non-speculative DSB miss (lower number means higher occurrence rate)",
-        "MetricExpr": "INST_RETIRED.ANY / FRONTEND_RETIRED.ANY_DSB_MISS",
+        "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / FRONTEND_RETIRED.ANY_DSB_MISS",
          "MetricGroup": "DSBmiss;Fed",
          "MetricName": "tma_info_frontend_ipdsb_miss_ret",
          "MetricThreshold": "tma_info_frontend_ipdsb_miss_ret < 50",
@@ -1440,14 +1423,14 @@
      },
      {
          "BriefDescription": "Fraction of Uops delivered by the LSD (Loop Stream Detector; aka Loop Cache)",
-        "MetricExpr": "LSD.UOPS / cpu_core@UOPS_ISSUED.ANY@",
+        "MetricExpr": "cpu_core@LSD.UOPS@ / cpu_core@UOPS_ISSUED.ANY@",
          "MetricGroup": "Fed;LSD",
          "MetricName": "tma_info_frontend_lsd_coverage",
          "Unit": "cpu_core"
      },
      {
          "BriefDescription": "Branch instructions per taken branch.",
-        "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN",
+        "MetricExpr": "cpu_core@BR_INST_RETIRED.ALL_BRANCHES@ / BR_INST_RETIRED.NEAR_TAKEN",
          "MetricGroup": "Branches;Fed;PGO",
          "MetricName": "tma_info_inst_mix_bptkbranch",
          "Unit": "cpu_core"
@@ -1462,7 +1445,7 @@
      },
      {
          "BriefDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate)",
-        "MetricExpr": "INST_RETIRED.ANY / (cpu_core@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0x3c@)",
+        "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / (cpu_core@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0x3c@)",
          "MetricGroup": "Flops;InsType",
          "MetricName": "tma_info_inst_mix_iparith",
          "MetricThreshold": "tma_info_inst_mix_iparith < 10",
@@ -1471,7 +1454,7 @@
      },
      {
          "BriefDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate)",
-        "MetricExpr": "INST_RETIRED.ANY / (cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE@)",
+        "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / (cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE@)",
          "MetricGroup": "Flops;FpVector;InsType",
          "MetricName": "tma_info_inst_mix_iparith_avx128",
          "MetricThreshold": "tma_info_inst_mix_iparith_avx128 < 10",
@@ -1480,7 +1463,7 @@
      },
      {
          "BriefDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate)",
-        "MetricExpr": "INST_RETIRED.ANY / (cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE@ + cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE@)",
+        "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / (cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE@ + cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE@)",
          "MetricGroup": "Flops;FpVector;InsType",
          "MetricName": "tma_info_inst_mix_iparith_avx256",
          "MetricThreshold": "tma_info_inst_mix_iparith_avx256 < 10",
@@ -1489,7 +1472,7 @@
      },
      {
          "BriefDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate)",
-        "MetricExpr": "INST_RETIRED.ANY / FP_ARITH_INST_RETIRED.SCALAR_DOUBLE",
+        "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / FP_ARITH_INST_RETIRED.SCALAR_DOUBLE",
          "MetricGroup": "Flops;FpScalar;InsType",
          "MetricName": "tma_info_inst_mix_iparith_scalar_dp",
          "MetricThreshold": "tma_info_inst_mix_iparith_scalar_dp < 10",
@@ -1498,7 +1481,7 @@
      },
      {
          "BriefDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate)",
-        "MetricExpr": "INST_RETIRED.ANY / FP_ARITH_INST_RETIRED.SCALAR_SINGLE",
+        "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / FP_ARITH_INST_RETIRED.SCALAR_SINGLE",
          "MetricGroup": "Flops;FpScalar;InsType",
          "MetricName": "tma_info_inst_mix_iparith_scalar_sp",
          "MetricThreshold": "tma_info_inst_mix_iparith_scalar_sp < 10",
@@ -1507,7 +1490,7 @@
      },
      {
          "BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)",
-        "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES",
+        "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / BR_INST_RETIRED.ALL_BRANCHES",
          "MetricGroup": "Branches;Fed;InsType",
          "MetricName": "tma_info_inst_mix_ipbranch",
          "MetricThreshold": "tma_info_inst_mix_ipbranch < 8",
@@ -1515,7 +1498,7 @@
      },
      {
          "BriefDescription": "Instructions per (near) call (lower number means higher occurrence rate)",
-        "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL",
+        "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / BR_INST_RETIRED.NEAR_CALL",
          "MetricGroup": "Branches;Fed;PGO",
          "MetricName": "tma_info_inst_mix_ipcall",
          "MetricThreshold": "tma_info_inst_mix_ipcall < 200",
@@ -1523,7 +1506,7 @@
      },
      {
          "BriefDescription": "Instructions per Floating Point (FP) Operation (lower number means higher occurrence rate)",
-        "MetricExpr": "INST_RETIRED.ANY / (cpu_core@FP_ARITH_INST_RETIRED.SCALAR_SINGLE@ + cpu_core@FP_ARITH_INST_RETIRED.SCALAR_DOUBLE@ + 2 * cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + 4 * (cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE@ + cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE@) + 8 * cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE@)",
+        "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / (cpu_core@FP_ARITH_INST_RETIRED.SCALAR_SINGLE@ + cpu_core@FP_ARITH_INST_RETIRED.SCALAR_DOUBLE@ + 2 * cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + 4 * (cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE@ + cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE@) + 8 * cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE@)",
          "MetricGroup": "Flops;InsType",
          "MetricName": "tma_info_inst_mix_ipflop",
          "MetricThreshold": "tma_info_inst_mix_ipflop < 10",
@@ -1531,7 +1514,7 @@
      },
      {
          "BriefDescription": "Instructions per Load (lower number means higher occurrence rate)",
-        "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_LOADS",
+        "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / MEM_INST_RETIRED.ALL_LOADS",
          "MetricGroup": "InsType",
          "MetricName": "tma_info_inst_mix_ipload",
          "MetricThreshold": "tma_info_inst_mix_ipload < 3",
@@ -1539,7 +1522,7 @@
      },
      {
          "BriefDescription": "Instructions per Store (lower number means higher occurrence rate)",
-        "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_STORES",
+        "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / MEM_INST_RETIRED.ALL_STORES",
          "MetricGroup": "InsType",
          "MetricName": "tma_info_inst_mix_ipstore",
          "MetricThreshold": "tma_info_inst_mix_ipstore < 8",
@@ -1547,7 +1530,7 @@
      },
      {
          "BriefDescription": "Instructions per Software prefetch instruction (of any type: NTA/T0/T1/T2/Prefetch) (lower number means higher occurrence rate)",
-        "MetricExpr": "INST_RETIRED.ANY / cpu_core@SW_PREFETCH_ACCESS.T0\\,umask\\=0xF@",
+        "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / cpu_core@SW_PREFETCH_ACCESS.T0\\,umask\\=0xF@",
          "MetricGroup": "Prefetches",
          "MetricName": "tma_info_inst_mix_ipswpf",
          "MetricThreshold": "tma_info_inst_mix_ipswpf < 100",
@@ -1555,7 +1538,7 @@
      },
      {
          "BriefDescription": "Instruction per taken branch",
-        "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_TAKEN",
+        "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / BR_INST_RETIRED.NEAR_TAKEN",
          "MetricGroup": "Branches;Fed;FetchBW;Frontend;PGO;tma_issueFB",
          "MetricName": "tma_info_inst_mix_iptb",
          "MetricThreshold": "tma_info_inst_mix_iptb < 13",
@@ -1655,14 +1638,14 @@
      },
      {
          "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
-        "MetricExpr": "L1D_PEND_MISS.PENDING / MEM_LOAD_COMPLETED.L1_MISS_ANY",
+        "MetricExpr": "cpu_core@L1D_PEND_MISS.PENDING@ / MEM_LOAD_COMPLETED.L1_MISS_ANY",
          "MetricGroup": "Mem;MemoryBound;MemoryLat",
          "MetricName": "tma_info_memory_load_miss_real_latency",
          "Unit": "cpu_core"
      },
      {
          "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss",
-        "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
+        "MetricExpr": "cpu_core@L1D_PEND_MISS.PENDING@ / L1D_PEND_MISS.PENDING_CYCLES",
          "MetricGroup": "Mem;MemoryBW;MemoryBound",
          "MetricName": "tma_info_memory_mlp",
          "PublicDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)",
@@ -1670,28 +1653,28 @@
      },
      {
          "BriefDescription": "Average Parallel L2 cache miss data reads",
-        "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
+        "MetricExpr": "cpu_core@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD@ / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
          "MetricGroup": "Memory_BW;Offcore",
          "MetricName": "tma_info_memory_oro_data_l2_mlp",
          "Unit": "cpu_core"
      },
      {
          "BriefDescription": "Average Latency for L2 cache miss demand Loads",
-        "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS.DEMAND_DATA_RD",
+        "MetricExpr": "cpu_core@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD@ / OFFCORE_REQUESTS.DEMAND_DATA_RD",
          "MetricGroup": "Memory_Lat;Offcore",
          "MetricName": "tma_info_memory_oro_load_l2_miss_latency",
          "Unit": "cpu_core"
      },
      {
          "BriefDescription": "Average Parallel L2 cache miss demand Loads",
-        "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / cpu_core@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD\\,cmask\\=1@",
+        "MetricExpr": "cpu_core@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD@ / cpu_core@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD\\,cmask\\=1@",
          "MetricGroup": "Memory_BW;Offcore",
          "MetricName": "tma_info_memory_oro_load_l2_mlp",
          "Unit": "cpu_core"
      },
      {
          "BriefDescription": "Average Latency for L3 cache miss demand Loads",
-        "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD / OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD",
+        "MetricExpr": "cpu_core@OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD@ / OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD",
          "MetricGroup": "Memory_Lat;Offcore",
          "MetricName": "tma_info_memory_oro_load_l3_miss_latency",
          "Unit": "cpu_core"
@@ -1755,14 +1738,14 @@
      },
      {
          "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-thread",
-        "MetricExpr": "UOPS_EXECUTED.THREAD / cpu_core@UOPS_EXECUTED.THREAD\\,cmask\\=1@",
+        "MetricExpr": "cpu_core@UOPS_EXECUTED.THREAD@ / cpu_core@UOPS_EXECUTED.THREAD\\,cmask\\=1@",
          "MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
          "MetricName": "tma_info_pipeline_execute",
          "Unit": "cpu_core"
      },
      {
          "BriefDescription": "Instructions per a microcode Assist invocation",
-        "MetricExpr": "INST_RETIRED.ANY / cpu_core@ASSISTS.ANY\\,umask\\=0x1B@",
+        "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / cpu_core@ASSISTS.ANY\\,umask\\=0x1B@",
          "MetricGroup": "Pipeline;Ret;Retire",
          "MetricName": "tma_info_pipeline_ipassist",
          "MetricThreshold": "tma_info_pipeline_ipassist < 100e3",
@@ -1778,7 +1761,7 @@
      },
      {
          "BriefDescription": "Estimated fraction of retirement-cycles dealing with repeat instructions",
-        "MetricExpr": "INST_RETIRED.REP_ITERATION / cpu_core@UOPS_RETIRED.SLOTS\\,cmask\\=1@",
+        "MetricExpr": "cpu_core@INST_RETIRED.REP_ITERATION@ / cpu_core@UOPS_RETIRED.SLOTS\\,cmask\\=1@",
          "MetricGroup": "Pipeline;Ret",
          "MetricName": "tma_info_pipeline_strings_cycles",
          "MetricThreshold": "tma_info_pipeline_strings_cycles > 0.1",
@@ -1793,7 +1776,7 @@
      },
      {
          "BriefDescription": "Average CPU Utilization",
-        "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
+        "MetricExpr": "cpu_core@CPU_CLK_UNHALTED.REF_TSC@ / TSC",
          "MetricGroup": "HPC;Summary",
          "MetricName": "tma_info_system_cpu_utilization",
          "Unit": "cpu_core"
@@ -1816,7 +1799,7 @@
      },
      {
          "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
-        "MetricExpr": "INST_RETIRED.ANY / cpu_core@BR_INST_RETIRED.FAR_BRANCH@u",
+        "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / cpu_core@BR_INST_RETIRED.FAR_BRANCH@u",
          "MetricGroup": "Branches;OS",
          "MetricName": "tma_info_system_ipfarbranch",
          "MetricThreshold": "tma_info_system_ipfarbranch < 1e6",
@@ -1847,6 +1830,7 @@
      },
      {
          "BriefDescription": "Average latency of data read request to external memory (in nanoseconds)",
+        "MetricConstraint": "NO_GROUP_EVENTS",
          "MetricExpr": "(UNC_ARB_TRK_OCCUPANCY.RD + UNC_ARB_DAT_OCCUPANCY.RD) / UNC_ARB_TRK_REQUESTS.RD",
          "MetricGroup": "Mem;MemoryLat;SoC",
          "MetricName": "tma_info_system_mem_read_latency",
@@ -1855,6 +1839,7 @@
      },
      {
          "BriefDescription": "Average latency of all requests to external memory (in Uncore cycles)",
+        "MetricConstraint": "NO_GROUP_EVENTS",
          "MetricExpr": "(UNC_ARB_TRK_OCCUPANCY.ALL + UNC_ARB_DAT_OCCUPANCY.RD) / UNC_ARB_TRK_REQUESTS.ALL",
          "MetricGroup": "Mem;SoC",
          "MetricName": "tma_info_system_mem_request_latency",
@@ -1897,7 +1882,7 @@
      },
      {
          "BriefDescription": "The ratio of Executed- by Issued-Uops",
-        "MetricExpr": "UOPS_EXECUTED.THREAD / UOPS_ISSUED.ANY",
+        "MetricExpr": "cpu_core@UOPS_EXECUTED.THREAD@ / UOPS_ISSUED.ANY",
          "MetricGroup": "Cor;Pipeline",
          "MetricName": "tma_info_thread_execute_per_issue",
          "PublicDescription": "The ratio of Executed- by Issued-Uops. Ratio > 1 suggests high rate of uop micro-fusions. Ratio < 1 suggest high rate of \"execute\" at rename stage.",
@@ -1905,7 +1890,7 @@
      },
      {
          "BriefDescription": "Instructions Per Cycle (per Logical Processor)",
-        "MetricExpr": "INST_RETIRED.ANY / tma_info_thread_clks",
+        "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / tma_info_thread_clks",
          "MetricGroup": "Ret;Summary",
          "MetricName": "tma_info_thread_ipc",
          "Unit": "cpu_core"
@@ -1972,7 +1957,7 @@
      },
      {
          "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Instruction TLB (ITLB) misses",
-        "MetricExpr": "ICACHE_TAG.STALLS / tma_info_thread_clks",
+        "MetricExpr": "cpu_core@ICACHE_TAG.STALLS@ / tma_info_thread_clks",
          "MetricGroup": "BigFoot;FetchLat;MemoryTLB;TopdownL3;tma_L3_group;tma_fetch_latency_group",
          "MetricName": "tma_itlb_misses",
          "MetricThreshold": "tma_itlb_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -1992,7 +1977,6 @@
      },
      {
          "BriefDescription": "This metric estimates how often the CPU was stalled due to L2 cache accesses by loads",
-        "MetricConstraint": "NO_GROUP_EVENTS",
          "MetricExpr": "(cpu_core@MEMORY_ACTIVITY.STALLS_L1D_MISS@ - cpu_core@MEMORY_ACTIVITY.STALLS_L2_MISS@) / tma_info_thread_clks",
          "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
          "MetricName": "tma_l2_bound",
@@ -2003,7 +1987,6 @@
      },
      {
          "BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core",
-        "MetricConstraint": "NO_GROUP_EVENTS_NMI",
          "MetricExpr": "(cpu_core@MEMORY_ACTIVITY.STALLS_L2_MISS@ - cpu_core@MEMORY_ACTIVITY.STALLS_L3_MISS@) / tma_info_thread_clks",
          "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
          "MetricName": "tma_l3_bound",
@@ -2024,7 +2007,7 @@
      },
      {
          "BriefDescription": "This metric represents fraction of cycles CPU was stalled due to Length Changing Prefixes (LCPs)",
-        "MetricExpr": "DECODE.LCP / tma_info_thread_clks",
+        "MetricExpr": "cpu_core@DECODE.LCP@ / tma_info_thread_clks",
          "MetricGroup": "FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueFB",
          "MetricName": "tma_lcp",
          "MetricThreshold": "tma_lcp > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
@@ -2045,7 +2028,7 @@
      },
      {
          "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port for Load operations",
-        "MetricExpr": "UOPS_DISPATCHED.PORT_2_3_10 / (3 * tma_info_core_core_clks)",
+        "MetricExpr": "cpu_core@UOPS_DISPATCHED.PORT_2_3_10@ / (3 * tma_info_core_core_clks)",
          "MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
          "MetricName": "tma_load_op_utilization",
          "MetricThreshold": "tma_load_op_utilization > 0.6",
@@ -2064,7 +2047,7 @@
      },
      {
          "BriefDescription": "This metric estimates the fraction of cycles where the Second-level TLB (STLB) was missed by load accesses, performing a hardware page walk",
-        "MetricExpr": "DTLB_LOAD_MISSES.WALK_ACTIVE / tma_info_thread_clks",
+        "MetricExpr": "cpu_core@DTLB_LOAD_MISSES.WALK_ACTIVE@ / tma_info_thread_clks",
          "MetricGroup": "MemoryTLB;TopdownL5;tma_L5_group;tma_dtlb_load_group",
          "MetricName": "tma_load_stlb_miss",
          "MetricThreshold": "tma_load_stlb_miss > 0.05 & (tma_dtlb_load > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
@@ -2073,7 +2056,6 @@
      },
      {
          "BriefDescription": "This metric represents fraction of cycles the CPU spent handling cache misses due to lock operations",
-        "MetricConstraint": "NO_GROUP_EVENTS",
          "MetricExpr": "(16 * max(0, cpu_core@MEM_INST_RETIRED.LOCK_LOADS@ - cpu_core@L2_RQSTS.ALL_RFO@) + cpu_core@MEM_INST_RETIRED.LOCK_LOADS@ / cpu_core@MEM_INST_RETIRED.ALL_STORES@ * (10 * cpu_core@L2_RQSTS.RFO_HIT@ + min(cpu_core@CPU_CLK_UNHALTED.THREAD@, cpu_core@OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO@))) / tma_info_thread_clks",
          "MetricGroup": "Offcore;TopdownL4;tma_L4_group;tma_issueRFO;tma_l1_bound_group",
          "MetricName": "tma_lock_latency",
@@ -2136,6 +2118,7 @@
      },
      {
          "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to LFENCE Instructions.",
+        "MetricConstraint": "NO_GROUP_EVENTS_NMI",
          "MetricExpr": "13 * cpu_core@MISC2_RETIRED.LFENCE@ / tma_info_thread_clks",
          "MetricGroup": "TopdownL6;tma_L6_group;tma_serializing_operation_group",
          "MetricName": "tma_memory_fence",
@@ -2145,7 +2128,6 @@
      },
      {
          "BriefDescription": "This metric represents fraction of slots where the CPU was retiring memory operations -- uops for memory load or store accesses.",
-        "MetricConstraint": "NO_GROUP_EVENTS",
          "MetricExpr": "tma_light_operations * cpu_core@MEM_UOP_RETIRED.ANY@ / (tma_retiring * tma_info_thread_slots)",
          "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
          "MetricName": "tma_memory_operations",
@@ -2155,7 +2137,7 @@
      },
      {
          "BriefDescription": "This metric represents fraction of slots the CPU was retiring uops fetched by the Microcode Sequencer (MS) unit",
-        "MetricExpr": "UOPS_RETIRED.MS / tma_info_thread_slots",
+        "MetricExpr": "cpu_core@UOPS_RETIRED.MS@ / tma_info_thread_slots",
          "MetricGroup": "MicroSeq;TopdownL3;tma_L3_group;tma_heavy_operations_group;tma_issueMC;tma_issueMS",
          "MetricName": "tma_microcode_sequencer",
          "MetricThreshold": "tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1",
@@ -2225,7 +2207,6 @@
      },
      {
          "BriefDescription": "This metric represents the remaining light uops fraction the CPU has executed - remaining means not covered by other sibling nodes",
-        "MetricConstraint": "NO_GROUP_EVENTS",
          "MetricExpr": "max(0, tma_light_operations - (tma_fp_arith + tma_int_operations + tma_memory_operations + tma_fused_instructions + tma_non_fused_branches + tma_nop_instructions))",
          "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
          "MetricName": "tma_other_light_ops",
@@ -2246,7 +2227,7 @@
      },
      {
          "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 0 ([SNB+] ALU; [HSW+] ALU and 2nd branch)",
-        "MetricExpr": "UOPS_DISPATCHED.PORT_0 / tma_info_core_core_clks",
+        "MetricExpr": "cpu_core@UOPS_DISPATCHED.PORT_0@ / tma_info_core_core_clks",
          "MetricGroup": "Compute;TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
          "MetricName": "tma_port_0",
          "MetricThreshold": "tma_port_0 > 0.6",
@@ -2256,7 +2237,7 @@
      },
      {
          "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 1 (ALU)",
-        "MetricExpr": "UOPS_DISPATCHED.PORT_1 / tma_info_core_core_clks",
+        "MetricExpr": "cpu_core@UOPS_DISPATCHED.PORT_1@ / tma_info_core_core_clks",
          "MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
          "MetricName": "tma_port_1",
          "MetricThreshold": "tma_port_1 > 0.6",
@@ -2266,7 +2247,7 @@
      },
      {
          "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+]Primary Branch and simple ALU)",
-        "MetricExpr": "UOPS_DISPATCHED.PORT_6 / tma_info_core_core_clks",
+        "MetricExpr": "cpu_core@UOPS_DISPATCHED.PORT_6@ / tma_info_core_core_clks",
          "MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
          "MetricName": "tma_port_6",
          "MetricThreshold": "tma_port_6 > 0.6",
@@ -2296,7 +2277,7 @@
      },
      {
          "BriefDescription": "This metric represents fraction of cycles where the CPU executed total of 1 uop per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
-        "MetricExpr": "EXE_ACTIVITY.1_PORTS_UTIL / tma_info_thread_clks",
+        "MetricExpr": "cpu_core@EXE_ACTIVITY.1_PORTS_UTIL@ / tma_info_thread_clks",
          "MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issueL1;tma_ports_utilization_group",
          "MetricName": "tma_ports_utilized_1",
          "MetricThreshold": "tma_ports_utilized_1 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -2306,7 +2287,8 @@
      },
      {
          "BriefDescription": "This metric represents fraction of cycles CPU executed total of 2 uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
-        "MetricExpr": "EXE_ACTIVITY.2_PORTS_UTIL / tma_info_thread_clks",
+        "MetricConstraint": "NO_GROUP_EVENTS_NMI",
+        "MetricExpr": "cpu_core@EXE_ACTIVITY.2_PORTS_UTIL@ / tma_info_thread_clks",
          "MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issue2P;tma_ports_utilization_group",
          "MetricName": "tma_ports_utilized_2",
          "MetricThreshold": "tma_ports_utilized_2 > 0.15 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -2316,7 +2298,8 @@
      },
      {
          "BriefDescription": "This metric represents fraction of cycles CPU executed total of 3 or more uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
-        "MetricExpr": "UOPS_EXECUTED.CYCLES_GE_3 / tma_info_thread_clks",
+        "MetricConstraint": "NO_GROUP_EVENTS_NMI",
+        "MetricExpr": "cpu_core@UOPS_EXECUTED.CYCLES_GE_3@ / tma_info_thread_clks",
          "MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
          "MetricName": "tma_ports_utilized_3m",
          "MetricThreshold": "tma_ports_utilized_3m > 0.7 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -2338,7 +2321,7 @@
      },
      {
          "BriefDescription": "This metric represents fraction of cycles the CPU issue-pipeline was stalled due to serializing operations",
-        "MetricExpr": "RESOURCE_STALLS.SCOREBOARD / tma_info_thread_clks",
+        "MetricExpr": "cpu_core@RESOURCE_STALLS.SCOREBOARD@ / tma_info_thread_clks",
          "MetricGroup": "PortsUtil;TopdownL5;tma_L5_group;tma_issueSO;tma_ports_utilized_0_group",
          "MetricName": "tma_serializing_operation",
          "MetricThreshold": "tma_serializing_operation > 0.1 & (tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)))",
@@ -2348,7 +2331,7 @@
      },
      {
          "BriefDescription": "This metric represents Shuffle (cross \"vector lane\" data transfers) uops fraction the CPU has retired.",
-        "MetricExpr": "INT_VEC_RETIRED.SHUFFLES / (tma_retiring * tma_info_thread_slots)",
+        "MetricExpr": "cpu_core@INT_VEC_RETIRED.SHUFFLES@ / (tma_retiring * tma_info_thread_slots)",
          "MetricGroup": "HPC;Pipeline;TopdownL4;tma_L4_group;tma_int_operations_group",
          "MetricName": "tma_shuffles",
          "MetricThreshold": "tma_shuffles > 0.1 & (tma_int_operations > 0.1 & tma_light_operations > 0.6)",
@@ -2357,7 +2340,8 @@
      },
      {
          "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to PAUSE Instructions",
-        "MetricExpr": "CPU_CLK_UNHALTED.PAUSE / tma_info_thread_clks",
+        "MetricConstraint": "NO_GROUP_EVENTS_NMI",
+        "MetricExpr": "cpu_core@CPU_CLK_UNHALTED.PAUSE@ / tma_info_thread_clks",
          "MetricGroup": "TopdownL6;tma_L6_group;tma_serializing_operation_group",
          "MetricName": "tma_slow_pause",
          "MetricThreshold": "tma_slow_pause > 0.05 & (tma_serializing_operation > 0.1 & (tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))))",
@@ -2377,8 +2361,7 @@
      },
      {
          "BriefDescription": "This metric represents rate of split store accesses",
-        "MetricConstraint": "NO_GROUP_EVENTS_NMI",
-        "MetricExpr": "MEM_INST_RETIRED.SPLIT_STORES / tma_info_core_core_clks",
+        "MetricExpr": "cpu_core@MEM_INST_RETIRED.SPLIT_STORES@ / tma_info_core_core_clks",
          "MetricGroup": "TopdownL4;tma_L4_group;tma_issueSpSt;tma_store_bound_group",
          "MetricName": "tma_split_stores",
          "MetricThreshold": "tma_split_stores > 0.2 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
@@ -2398,7 +2381,7 @@
      },
      {
          "BriefDescription": "This metric estimates how often CPU was stalled  due to RFO store memory accesses; RFO store issue a read-for-ownership request before the write",
-        "MetricExpr": "EXE_ACTIVITY.BOUND_ON_STORES / tma_info_thread_clks",
+        "MetricExpr": "cpu_core@EXE_ACTIVITY.BOUND_ON_STORES@ / tma_info_thread_clks",
          "MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
          "MetricName": "tma_store_bound",
          "MetricThreshold": "tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -2408,7 +2391,6 @@
      },
      {
          "BriefDescription": "This metric roughly estimates fraction of cycles when the memory subsystem had loads blocked since they could not forward data from earlier (in program order) overlapping stores",
-        "MetricConstraint": "NO_GROUP_EVENTS_NMI",
          "MetricExpr": "13 * cpu_core@LD_BLOCKS.STORE_FORWARD@ / tma_info_thread_clks",
          "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
          "MetricName": "tma_store_fwd_blk",
@@ -2448,7 +2430,7 @@
      },
      {
          "BriefDescription": "This metric estimates the fraction of cycles where the STLB was missed by store accesses, performing a hardware page walk",
-        "MetricExpr": "DTLB_STORE_MISSES.WALK_ACTIVE / tma_info_core_core_clks",
+        "MetricExpr": "cpu_core@DTLB_STORE_MISSES.WALK_ACTIVE@ / tma_info_core_core_clks",
          "MetricGroup": "MemoryTLB;TopdownL5;tma_L5_group;tma_dtlb_store_group",
          "MetricName": "tma_store_stlb_miss",
          "MetricThreshold": "tma_store_stlb_miss > 0.05 & (tma_dtlb_store > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
@@ -2467,7 +2449,7 @@
      },
      {
          "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to new branch address clears",
-        "MetricExpr": "INT_MISC.UNKNOWN_BRANCH_CYCLES / tma_info_thread_clks",
+        "MetricExpr": "cpu_core@INT_MISC.UNKNOWN_BRANCH_CYCLES@ / tma_info_thread_clks",
          "MetricGroup": "BigFoot;FetchLat;TopdownL4;tma_L4_group;tma_branch_resteers_group",
          "MetricName": "tma_unknown_branches",
          "MetricThreshold": "tma_unknown_branches > 0.05 & (tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15))",
diff --git a/tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json b/tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json

index c150c14ac6ed9925888fb9afc5e15e8dabdc1d8f..a35edf7d86a97e20570cf3f053d1b314be37fd20 100644 (file)
--- a/tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json
@@ -195,7 +195,6 @@
      },
      {
          "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load miss which hit in DRAM or MMIO (Non-DRAM).",
-        "MetricConstraint": "NO_GROUP_EVENTS",
          "MetricExpr": "MEM_BOUND_STALLS.LOAD_DRAM_HIT / tma_info_core_clks - max((MEM_BOUND_STALLS.LOAD - LD_HEAD.L1_MISS_AT_RET) / tma_info_core_clks, 0) * MEM_BOUND_STALLS.LOAD_DRAM_HIT / MEM_BOUND_STALLS.LOAD",
          "MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group",
          "MetricName": "tma_dram_bound",
@@ -457,7 +456,6 @@
      },
      {
          "BriefDescription": "Counts the number of cycles a core is stalled due to a demand load which hit in the L2 Cache.",
-        "MetricConstraint": "NO_GROUP_EVENTS",
          "MetricExpr": "MEM_BOUND_STALLS.LOAD_L2_HIT / tma_info_core_clks - max((MEM_BOUND_STALLS.LOAD - LD_HEAD.L1_MISS_AT_RET) / tma_info_core_clks, 0) * MEM_BOUND_STALLS.LOAD_L2_HIT / MEM_BOUND_STALLS.LOAD",
          "MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group",
          "MetricName": "tma_l2_bound",
@@ -466,7 +464,6 @@
      },
      {
          "BriefDescription": "Counts the number of cycles a core is stalled due to a demand load which hit in the Last Level Cache (LLC) or other core with HITE/F/M.",
-        "MetricConstraint": "NO_GROUP_EVENTS_NMI",
          "MetricExpr": "MEM_BOUND_STALLS.LOAD_LLC_HIT / tma_info_core_clks - max((MEM_BOUND_STALLS.LOAD - LD_HEAD.L1_MISS_AT_RET) / tma_info_core_clks, 0) * MEM_BOUND_STALLS.LOAD_LLC_HIT / MEM_BOUND_STALLS.LOAD",
          "MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group",
          "MetricName": "tma_l3_bound",
@@ -683,7 +680,6 @@
      },
      {
          "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a store forward block.",
-        "MetricConstraint": "NO_GROUP_EVENTS_NMI",
          "MetricExpr": "LD_HEAD.ST_ADDR_AT_RET / tma_info_core_clks",
          "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
          "MetricName": "tma_store_fwd_blk",
diff --git a/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json b/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json

index e31a4aac9f205e4d462b43472dbfc02c2ffd91c1..56e54babcc26f16aee88abae0716675e3ab97c83 100644 (file)
--- a/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json
@@ -400,7 +400,6 @@
      },
      {
          "BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to contested accesses",
-        "MetricConstraint": "NO_GROUP_EVENTS",
          "MetricExpr": "(76 * tma_info_system_average_frequency * (MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD * (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM / (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM + OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD))) + 75.5 * tma_info_system_average_frequency * MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
          "MetricGroup": "DataSharing;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
          "MetricName": "tma_contested_accesses",
@@ -421,7 +420,6 @@
      },
      {
          "BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to data-sharing accesses",
-        "MetricConstraint": "NO_GROUP_EVENTS",
          "MetricExpr": "75.5 * tma_info_system_average_frequency * (MEM_LOAD_L3_HIT_RETIRED.XSNP_NO_FWD + MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD * (1 - OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM / (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM + OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD))) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
          "MetricGroup": "Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
          "MetricName": "tma_data_sharing",
@@ -449,7 +447,6 @@
      },
      {
          "BriefDescription": "This metric estimates how often the CPU was stalled on accesses to external memory (DRAM) by loads",
-        "MetricConstraint": "NO_GROUP_EVENTS",
          "MetricExpr": "(MEMORY_ACTIVITY.STALLS_L3_MISS / tma_info_thread_clks - tma_pmm_bound if #has_pmem > 0 else MEMORY_ACTIVITY.STALLS_L3_MISS / tma_info_thread_clks)",
          "MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
          "MetricName": "tma_dram_bound",
@@ -656,7 +653,6 @@
      },
      {
          "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)",
-        "MetricConstraint": "NO_GROUP_EVENTS",
          "MetricExpr": "(tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) * tma_info_thread_slots / BR_MISP_RETIRED.ALL_BRANCHES",
          "MetricGroup": "Bad;BrMispredicts;tma_issueBM",
          "MetricName": "tma_info_bad_spec_branch_misprediction_cost",
@@ -699,7 +695,6 @@
      },
      {
          "BriefDescription": "Probability of Core Bound bottleneck hidden by SMT-profiling artifacts",
-        "MetricConstraint": "NO_GROUP_EVENTS",
          "MetricExpr": "(100 * (1 - tma_core_bound / tma_ports_utilization if tma_core_bound < tma_ports_utilization else 1) if tma_info_system_smt_2t_utilization > 0.5 else 0)",
          "MetricGroup": "Cor;SMT",
          "MetricName": "tma_info_botlnk_l0_core_bound_likely",
@@ -707,7 +702,6 @@
      },
      {
          "BriefDescription": "Total pipeline cost of DSB (uop cache) misses - subset of the Instruction_Fetch_BW Bottleneck",
-        "MetricConstraint": "NO_GROUP_EVENTS",
          "MetricExpr": "100 * (tma_fetch_latency * tma_dsb_switches / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_mite / (tma_dsb + tma_mite))",
          "MetricGroup": "DSBmiss;Fed;tma_issueFB",
          "MetricName": "tma_info_botlnk_l2_dsb_misses",
@@ -716,7 +710,6 @@
      },
      {
          "BriefDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck",
-        "MetricConstraint": "NO_GROUP_EVENTS",
          "MetricExpr": "100 * (tma_fetch_latency * tma_icache_misses / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
          "MetricGroup": "Fed;FetchLat;IcMiss;tma_issueFL",
          "MetricName": "tma_info_botlnk_l2_ic_misses",
@@ -725,7 +718,6 @@
      },
      {
          "BriefDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses)",
-        "MetricConstraint": "NO_GROUP_EVENTS",
          "MetricExpr": "100 * tma_fetch_latency * (tma_itlb_misses + tma_icache_misses + tma_unknown_branches) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)",
          "MetricGroup": "BigFoot;Fed;Frontend;IcMiss;MemoryTLB;tma_issueBC",
          "MetricName": "tma_info_bottleneck_big_code",
@@ -742,7 +734,6 @@
      },
      {
          "BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks",
-        "MetricConstraint": "NO_GROUP_EVENTS",
          "MetricExpr": "100 * (tma_frontend_bound - tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) - tma_info_bottleneck_big_code",
          "MetricGroup": "Fed;FetchBW;Frontend",
          "MetricName": "tma_info_bottleneck_instruction_fetch_bw",
@@ -750,7 +741,6 @@
      },
      {
          "BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks",
-        "MetricConstraint": "NO_GROUP_EVENTS",
          "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full))) + tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_fb_full / (tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk))",
          "MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW",
          "MetricName": "tma_info_bottleneck_memory_bandwidth",
@@ -759,7 +749,6 @@
      },
      {
          "BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)",
-        "MetricConstraint": "NO_GROUP_EVENTS",
          "MetricExpr": "100 * tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_dtlb_load / max(tma_l1_bound, tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
          "MetricGroup": "Mem;MemoryTLB;Offcore;tma_issueTLB",
          "MetricName": "tma_info_bottleneck_memory_data_tlbs",
@@ -768,7 +757,6 @@
      },
      {
          "BriefDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches)",
-        "MetricConstraint": "NO_GROUP_EVENTS",
          "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound))",
          "MetricGroup": "Mem;MemoryLat;Offcore;tma_issueLat",
          "MetricName": "tma_info_bottleneck_memory_latency",
@@ -777,7 +765,6 @@
      },
      {
          "BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks",
-        "MetricConstraint": "NO_GROUP_EVENTS",
          "MetricExpr": "100 * (tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
          "MetricGroup": "Bad;BadSpec;BrMispredicts;tma_issueBM",
          "MetricName": "tma_info_bottleneck_mispredictions",
@@ -1301,6 +1288,7 @@
      },
      {
          "BriefDescription": "Average latency of data read request to external memory (in nanoseconds)",
+        "MetricConstraint": "NO_GROUP_EVENTS",
          "MetricExpr": "1e9 * (UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD / UNC_CHA_TOR_INSERTS.IA_MISS_DRD) / (tma_info_system_socket_clks / duration_time)",
          "MetricGroup": "Mem;MemoryLat;SoC",
          "MetricName": "tma_info_system_mem_read_latency",
@@ -1455,7 +1443,6 @@
      },
      {
          "BriefDescription": "This metric estimates how often the CPU was stalled due to L2 cache accesses by loads",
-        "MetricConstraint": "NO_GROUP_EVENTS",
          "MetricExpr": "(MEMORY_ACTIVITY.STALLS_L1D_MISS - MEMORY_ACTIVITY.STALLS_L2_MISS) / tma_info_thread_clks",
          "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
          "MetricName": "tma_l2_bound",
@@ -1465,7 +1452,6 @@
      },
      {
          "BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core",
-        "MetricConstraint": "NO_GROUP_EVENTS_NMI",
          "MetricExpr": "(MEMORY_ACTIVITY.STALLS_L2_MISS - MEMORY_ACTIVITY.STALLS_L3_MISS) / tma_info_thread_clks",
          "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
          "MetricName": "tma_l3_bound",
@@ -1538,7 +1524,6 @@
      },
      {
          "BriefDescription": "This metric represents fraction of cycles the CPU spent handling cache misses due to lock operations",
-        "MetricConstraint": "NO_GROUP_EVENTS",
          "MetricExpr": "(16 * max(0, MEM_INST_RETIRED.LOCK_LOADS - L2_RQSTS.ALL_RFO) + MEM_INST_RETIRED.LOCK_LOADS / MEM_INST_RETIRED.ALL_STORES * (10 * L2_RQSTS.RFO_HIT + min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO))) / tma_info_thread_clks",
          "MetricGroup": "Offcore;TopdownL4;tma_L4_group;tma_issueRFO;tma_l1_bound_group",
          "MetricName": "tma_lock_latency",
@@ -1596,6 +1581,7 @@
      },
      {
          "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to LFENCE Instructions.",
+        "MetricConstraint": "NO_GROUP_EVENTS_NMI",
          "MetricExpr": "13 * MISC2_RETIRED.LFENCE / tma_info_thread_clks",
          "MetricGroup": "TopdownL6;tma_L6_group;tma_serializing_operation_group",
          "MetricName": "tma_memory_fence",
@@ -1604,7 +1590,6 @@
      },
      {
          "BriefDescription": "This metric represents fraction of slots where the CPU was retiring memory operations -- uops for memory load or store accesses.",
-        "MetricConstraint": "NO_GROUP_EVENTS",
          "MetricExpr": "tma_light_operations * MEM_UOP_RETIRED.ANY / (tma_retiring * tma_info_thread_slots)",
          "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
          "MetricName": "tma_memory_operations",
@@ -1676,7 +1661,6 @@
      },
      {
          "BriefDescription": "This metric represents the remaining light uops fraction the CPU has executed - remaining means not covered by other sibling nodes",
-        "MetricConstraint": "NO_GROUP_EVENTS",
          "MetricExpr": "max(0, tma_light_operations - (tma_fp_arith + tma_int_operations + tma_memory_operations + tma_fused_instructions + tma_non_fused_branches + tma_nop_instructions))",
          "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
          "MetricName": "tma_other_light_ops",
@@ -1758,6 +1742,7 @@
      },
      {
          "BriefDescription": "This metric represents fraction of cycles CPU executed total of 2 uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
+        "MetricConstraint": "NO_GROUP_EVENTS_NMI",
          "MetricExpr": "EXE_ACTIVITY.2_PORTS_UTIL / tma_info_thread_clks",
          "MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issue2P;tma_ports_utilization_group",
          "MetricName": "tma_ports_utilized_2",
@@ -1767,6 +1752,7 @@
      },
      {
          "BriefDescription": "This metric represents fraction of cycles CPU executed total of 3 or more uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
+        "MetricConstraint": "NO_GROUP_EVENTS_NMI",
          "MetricExpr": "UOPS_EXECUTED.CYCLES_GE_3 / tma_info_thread_clks",
          "MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
          "MetricName": "tma_ports_utilized_3m",
@@ -1822,6 +1808,7 @@
      },
      {
          "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to PAUSE Instructions",
+        "MetricConstraint": "NO_GROUP_EVENTS_NMI",
          "MetricExpr": "CPU_CLK_UNHALTED.PAUSE / tma_info_thread_clks",
          "MetricGroup": "TopdownL6;tma_L6_group;tma_serializing_operation_group",
          "MetricName": "tma_slow_pause",
@@ -1840,7 +1827,6 @@
      },
      {
          "BriefDescription": "This metric represents rate of split store accesses",
-        "MetricConstraint": "NO_GROUP_EVENTS_NMI",
          "MetricExpr": "MEM_INST_RETIRED.SPLIT_STORES / tma_info_core_core_clks",
          "MetricGroup": "TopdownL4;tma_L4_group;tma_issueSpSt;tma_store_bound_group",
          "MetricName": "tma_split_stores",
@@ -1868,7 +1854,6 @@
      },
      {
          "BriefDescription": "This metric roughly estimates fraction of cycles when the memory subsystem had loads blocked since they could not forward data from earlier (in program order) overlapping stores",
-        "MetricConstraint": "NO_GROUP_EVENTS_NMI",
          "MetricExpr": "13 * LD_BLOCKS.STORE_FORWARD / tma_info_thread_clks",
          "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
          "MetricName": "tma_store_fwd_blk",
diff --git a/tools/perf/tests/shell/daemon.sh b/tools/perf/tests/shell/daemon.sh

index 4c598cfc5afa14816f128da39f86cc7f18b4c68a..e5fa8d6f9eb1fdad3a5d700da9fa788c771a31ad 100755 (executable)
--- a/tools/perf/tests/shell/daemon.sh
+++ b/tools/perf/tests/shell/daemon.sh
@@ -414,16 +414,30 @@ EOF
         # start daemon
         daemon_start ${config} test
  
-       # send 2 signals
-       perf daemon signal --config ${config} --session test
-       perf daemon signal --config ${config}
-
-       # stop daemon
-       daemon_exit ${config}
-
-       # count is 2 perf.data for signals and 1 for perf record finished
-       count=`ls ${base}/session-test/*perf.data* | wc -l`
-       if [ ${count} -ne 3 ]; then
+        # send 2 signals then exit. Do this in a loop watching the number of
+        # files to avoid races. If the loop retries more than 600 times then
+        # give up.
+       local retries=0
+       local signals=0
+       local success=0
+       while [ ${retries} -lt 600 ] && [ ${success} -eq 0 ]; do
+               local files
+               files=`ls ${base}/session-test/*perf.data* 2> /dev/null | wc -l`
+               if [ ${signals} -eq 0 ]; then
+                       perf daemon signal --config ${config} --session test
+                       signals=1
+               elif [ ${signals} -eq 1 ] && [ $files -ge 1 ]; then
+                       perf daemon signal --config ${config}
+                       signals=2
+               elif [ ${signals} -eq 2 ] && [ $files -ge 2 ]; then
+                       daemon_exit ${config}
+                       signals=3
+               elif [ ${signals} -eq 3 ] && [ $files -ge 3 ]; then
+                       success=1
+               fi
+               retries=$((${retries} +1))
+       done
+       if [ ${success} -eq 0 ]; then
                 error=1
                 echo "FAILED: perf data no generated"
         fi
diff --git a/tools/perf/tests/shell/list.sh b/tools/perf/tests/shell/list.sh

index 22b004f2b23ec6bb2c7748c6b804c292c276daf6..8a868ae64560e1842019155d4f1f519e5b8eecef 100755 (executable)
--- a/tools/perf/tests/shell/list.sh
+++ b/tools/perf/tests/shell/list.sh
@@ -3,17 +3,32 @@
  # SPDX-License-Identifier: GPL-2.0
  
  set -e
-err=0
  
  shelldir=$(dirname "$0")
  # shellcheck source=lib/setup_python.sh
  . "${shelldir}"/lib/setup_python.sh
  
+list_output=$(mktemp /tmp/__perf_test.list_output.json.XXXXX)
+
+cleanup() {
+  rm -f "${list_output}"
+
+  trap - EXIT TERM INT
+}
+
+trap_cleanup() {
+  cleanup
+  exit 1
+}
+trap trap_cleanup EXIT TERM INT
+
  test_list_json() {
    echo "Json output test"
-  perf list -j | $PYTHON -m json.tool
+  perf list -j -o "${list_output}"
+  $PYTHON -m json.tool "${list_output}"
    echo "Json output test [Success]"
  }
  
  test_list_json
-exit $err
+cleanup
+exit 0
diff --git a/tools/perf/tests/shell/script.sh b/tools/perf/tests/shell/script.sh

index 5ae7bd0031a8226ab7e1f38ed4869e9058f0cf1d..fa4d71e2e72a6146485859883541023c9ac9e772 100755 (executable)
--- a/tools/perf/tests/shell/script.sh
+++ b/tools/perf/tests/shell/script.sh
@@ -36,8 +36,7 @@ test_db()
         echo "DB test"
  
         # Check if python script is supported
-       libpython=$(perf version --build-options | grep python | grep -cv OFF)
-       if [ "${libpython}" != "1" ] ; then
+        if perf version --build-options | grep python | grep -q OFF ; then
                 echo "SKIP: python scripting is not supported"
                 err=2
                 return
@@ -54,7 +53,14 @@ def sample_table(*args):
  def call_path_table(*args):
      print(f'call_path_table({args}')
  _end_of_file_
-       perf record -g -o "${perfdatafile}" true
+       case $(uname -m)
+       in s390x)
+               cmd_flags="--call-graph dwarf -e cpu-clock";;
+       *)
+               cmd_flags="-g";;
+       esac
+
+       perf record $cmd_flags -o "${perfdatafile}" true
         perf script -i "${perfdatafile}" -s "${db_test}"
         echo "DB test [Success]"
  }
diff --git a/tools/perf/trace/beauty/statx.c b/tools/perf/trace/beauty/statx.c

index 5f5320f7c6e27d17a944e7196cfa1605c66357be..dc5943a6352d91dc67bafedfad09bdc40da1e135 100644 (file)
--- a/tools/perf/trace/beauty/statx.c
+++ b/tools/perf/trace/beauty/statx.c
@@ -67,6 +67,7 @@ size_t syscall_arg__scnprintf_statx_mask(char *bf, size_t size, struct syscall_a
         P_FLAG(BTIME);
         P_FLAG(MNT_ID);
         P_FLAG(DIOALIGN);
+       P_FLAG(MNT_ID_UNIQUE);
  
  #undef P_FLAG
  
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c

index 95f25e9fb994ab2a5190c40f91e9bbe3d5f884be..55a300a0977b416e60e90819ad1a9feefcdcbc84 100644 (file)
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -103,7 +103,14 @@ struct evlist *evlist__new_default(void)
         err = parse_event(evlist, can_profile_kernel ? "cycles:P" : "cycles:Pu");
         if (err) {
                 evlist__delete(evlist);
-               evlist = NULL;
+               return NULL;
+       }
+
+       if (evlist->core.nr_entries > 1) {
+               struct evsel *evsel;
+
+               evlist__for_each_entry(evlist, evsel)
+                       evsel__set_sample_id(evsel, /*can_sample_identifier=*/false);
         }
  
         return evlist;
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c

index 0888b7163b7cc25c33724f4e61099ab16a2ab60a..fa359180ebf8fc45e1248e4241543817e0660260 100644 (file)
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -491,8 +491,8 @@ static int hist_entry__init(struct hist_entry *he,
         }
  
         if (symbol_conf.res_sample) {
-               he->res_samples = calloc(sizeof(struct res_sample),
-                                       symbol_conf.res_sample);
+               he->res_samples = calloc(symbol_conf.res_sample,
+                                       sizeof(struct res_sample));
                 if (!he->res_samples)
                         goto err_srcline;
         }
diff --git a/tools/perf/util/include/linux/linkage.h b/tools/perf/util/include/linux/linkage.h

index 75e2248416f55f6792563a2614b211a36281222f..178b00205fe6a7b2d75f3a9d68b7bad99ccd82af 100644 (file)
--- a/tools/perf/util/include/linux/linkage.h
+++ b/tools/perf/util/include/linux/linkage.h
@@ -115,6 +115,10 @@
         SYM_ALIAS(alias, name, SYM_T_FUNC, SYM_L_WEAK)
  #endif
  
+#ifndef SYM_FUNC_ALIAS_MEMFUNC
+#define SYM_FUNC_ALIAS_MEMFUNC SYM_FUNC_ALIAS
+#endif
+
  // In the kernel sources (include/linux/cfi_types.h), this has a different
  // definition when CONFIG_CFI_CLANG is used, for tools/ just use the !clang
  // definition:
diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c

index ca3e0404f18720d7a3cc2376896195f55cf1192d..966cca5a3e88cd94b78c27ce8429f820f1fa86b9 100644 (file)
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -286,7 +286,7 @@ static int setup_metric_events(const char *pmu, struct hashmap *ids,
         *out_metric_events = NULL;
         ids_size = hashmap__size(ids);
  
-       metric_events = calloc(sizeof(void *), ids_size + 1);
+       metric_events = calloc(ids_size + 1, sizeof(void *));
         if (!metric_events)
                 return -ENOMEM;
  
diff --git a/tools/perf/util/print-events.c b/tools/perf/util/print-events.c

index b0fc48be623f31bcfd478a79b66b5295708a5a15..9e47712507cc265d46c7cf5d66033185006ba2f3 100644 (file)
--- a/tools/perf/util/print-events.c
+++ b/tools/perf/util/print-events.c
@@ -66,7 +66,7 @@ void print_tracepoint_events(const struct print_callbacks *print_cb __maybe_unus
  
         put_tracing_file(events_path);
         if (events_fd < 0) {
-               printf("Error: failed to open tracing events directory\n");
+               pr_err("Error: failed to open tracing events directory\n");
                 return;
         }
  
diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c

index 3712186353fb94109e327195d1aee6d2177763ec..2a0289c149599927f1ee4023e530866d8da15a71 100644 (file)
--- a/tools/perf/util/synthetic-events.c
+++ b/tools/perf/util/synthetic-events.c
@@ -1055,11 +1055,11 @@ int perf_event__synthesize_threads(struct perf_tool *tool,
         if (thread_nr > n)
                 thread_nr = n;
  
-       synthesize_threads = calloc(sizeof(pthread_t), thread_nr);
+       synthesize_threads = calloc(thread_nr, sizeof(pthread_t));
         if (synthesize_threads == NULL)
                 goto free_dirent;
  
-       args = calloc(sizeof(*args), thread_nr);
+       args = calloc(thread_nr, sizeof(*args));
         if (args == NULL)
                 goto free_threads;
  
diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild

index caff3834671f9dfb7d261d5b6633532f71ecd9f5..030b388800f05191e5b6492b97ebf96e9d45dcf3 100644 (file)
--- a/tools/testing/cxl/Kbuild
+++ b/tools/testing/cxl/Kbuild
@@ -13,6 +13,7 @@ ldflags-y += --wrap=cxl_hdm_decode_init
  ldflags-y += --wrap=cxl_dvsec_rr_decode
  ldflags-y += --wrap=devm_cxl_add_rch_dport
  ldflags-y += --wrap=cxl_rcd_component_reg_phys
+ldflags-y += --wrap=cxl_endpoint_parse_cdat
  
  DRIVERS := ../../../drivers
  CXL_SRC := $(DRIVERS)/cxl
diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c

index a3cdbb2be038c45e27326925d81ba43294b56c31..908e0d0839369c2e41f090bddc2e9a9b9121b4c9 100644 (file)
--- a/tools/testing/cxl/test/cxl.c
+++ b/tools/testing/cxl/test/cxl.c
@@ -15,6 +15,8 @@
  
  static int interleave_arithmetic;
  
+#define FAKE_QTG_ID    42
+
  #define NR_CXL_HOST_BRIDGES 2
  #define NR_CXL_SINGLE_HOST 1
  #define NR_CXL_RCH 1
@@ -209,7 +211,7 @@ static struct {
                         .granularity = 4,
                         .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
                                         ACPI_CEDT_CFMWS_RESTRICT_VOLATILE,
-                       .qtg_id = 0,
+                       .qtg_id = FAKE_QTG_ID,
                         .window_size = SZ_256M * 4UL,
                 },
                 .target = { 0 },
@@ -224,7 +226,7 @@ static struct {
                         .granularity = 4,
                         .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
                                         ACPI_CEDT_CFMWS_RESTRICT_VOLATILE,
-                       .qtg_id = 1,
+                       .qtg_id = FAKE_QTG_ID,
                         .window_size = SZ_256M * 8UL,
                 },
                 .target = { 0, 1, },
@@ -239,7 +241,7 @@ static struct {
                         .granularity = 4,
                         .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
                                         ACPI_CEDT_CFMWS_RESTRICT_PMEM,
-                       .qtg_id = 2,
+                       .qtg_id = FAKE_QTG_ID,
                         .window_size = SZ_256M * 4UL,
                 },
                 .target = { 0 },
@@ -254,7 +256,7 @@ static struct {
                         .granularity = 4,
                         .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
                                         ACPI_CEDT_CFMWS_RESTRICT_PMEM,
-                       .qtg_id = 3,
+                       .qtg_id = FAKE_QTG_ID,
                         .window_size = SZ_256M * 8UL,
                 },
                 .target = { 0, 1, },
@@ -269,7 +271,7 @@ static struct {
                         .granularity = 4,
                         .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
                                         ACPI_CEDT_CFMWS_RESTRICT_PMEM,
-                       .qtg_id = 4,
+                       .qtg_id = FAKE_QTG_ID,
                         .window_size = SZ_256M * 4UL,
                 },
                 .target = { 2 },
@@ -284,7 +286,7 @@ static struct {
                         .granularity = 4,
                         .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
                                         ACPI_CEDT_CFMWS_RESTRICT_VOLATILE,
-                       .qtg_id = 5,
+                       .qtg_id = FAKE_QTG_ID,
                         .window_size = SZ_256M,
                 },
                 .target = { 3 },
@@ -301,7 +303,7 @@ static struct {
                         .granularity = 4,
                         .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
                                         ACPI_CEDT_CFMWS_RESTRICT_PMEM,
-                       .qtg_id = 0,
+                       .qtg_id = FAKE_QTG_ID,
                         .window_size = SZ_256M * 8UL,
                 },
                 .target = { 0, },
@@ -317,7 +319,7 @@ static struct {
                         .granularity = 0,
                         .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
                                         ACPI_CEDT_CFMWS_RESTRICT_PMEM,
-                       .qtg_id = 1,
+                       .qtg_id = FAKE_QTG_ID,
                         .window_size = SZ_256M * 8UL,
                 },
                 .target = { 0, 1, },
@@ -333,7 +335,7 @@ static struct {
                         .granularity = 0,
                         .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
                                         ACPI_CEDT_CFMWS_RESTRICT_PMEM,
-                       .qtg_id = 0,
+                       .qtg_id = FAKE_QTG_ID,
                         .window_size = SZ_256M * 16UL,
                 },
                 .target = { 0, 1, 0, 1, },
@@ -976,6 +978,48 @@ static int mock_cxl_port_enumerate_dports(struct cxl_port *port)
         return 0;
  }
  
+/*
+ * Faking the cxl_dpa_perf for the memdev when appropriate.
+ */
+static void dpa_perf_setup(struct cxl_port *endpoint, struct range *range,
+                          struct cxl_dpa_perf *dpa_perf)
+{
+       dpa_perf->qos_class = FAKE_QTG_ID;
+       dpa_perf->dpa_range = *range;
+       dpa_perf->coord.read_latency = 500;
+       dpa_perf->coord.write_latency = 500;
+       dpa_perf->coord.read_bandwidth = 1000;
+       dpa_perf->coord.write_bandwidth = 1000;
+}
+
+static void mock_cxl_endpoint_parse_cdat(struct cxl_port *port)
+{
+       struct cxl_root *cxl_root __free(put_cxl_root) =
+               find_cxl_root(port);
+       struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport_dev);
+       struct cxl_dev_state *cxlds = cxlmd->cxlds;
+       struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
+       struct range pmem_range = {
+               .start = cxlds->pmem_res.start,
+               .end = cxlds->pmem_res.end,
+       };
+       struct range ram_range = {
+               .start = cxlds->ram_res.start,
+               .end = cxlds->ram_res.end,
+       };
+
+       if (!cxl_root)
+               return;
+
+       if (range_len(&ram_range))
+               dpa_perf_setup(port, &ram_range, &mds->ram_perf);
+
+       if (range_len(&pmem_range))
+               dpa_perf_setup(port, &pmem_range, &mds->pmem_perf);
+
+       cxl_memdev_update_perf(cxlmd);
+}
+
  static struct cxl_mock_ops cxl_mock_ops = {
         .is_mock_adev = is_mock_adev,
         .is_mock_bridge = is_mock_bridge,
@@ -989,6 +1033,7 @@ static struct cxl_mock_ops cxl_mock_ops = {
         .devm_cxl_setup_hdm = mock_cxl_setup_hdm,
         .devm_cxl_add_passthrough_decoder = mock_cxl_add_passthrough_decoder,
         .devm_cxl_enumerate_decoders = mock_cxl_enumerate_decoders,
+       .cxl_endpoint_parse_cdat = mock_cxl_endpoint_parse_cdat,
         .list = LIST_HEAD_INIT(cxl_mock_ops.list),
  };
  
diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c

index 1a61e68e30950ba623b52c5920a7874e3d97b9cd..6f737941dc0e164b9611e9dac91cb9e55b69e715 100644 (file)
--- a/tools/testing/cxl/test/mock.c
+++ b/tools/testing/cxl/test/mock.c
@@ -285,6 +285,20 @@ resource_size_t __wrap_cxl_rcd_component_reg_phys(struct device *dev,
  }
  EXPORT_SYMBOL_NS_GPL(__wrap_cxl_rcd_component_reg_phys, CXL);
  
+void __wrap_cxl_endpoint_parse_cdat(struct cxl_port *port)
+{
+       int index;
+       struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
+       struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport_dev);
+
+       if (ops && ops->is_mock_dev(cxlmd->dev.parent))
+               ops->cxl_endpoint_parse_cdat(port);
+       else
+               cxl_endpoint_parse_cdat(port);
+       put_cxl_mock_ops(index);
+}
+EXPORT_SYMBOL_NS_GPL(__wrap_cxl_endpoint_parse_cdat, CXL);
+
  MODULE_LICENSE("GPL v2");
  MODULE_IMPORT_NS(ACPI);
  MODULE_IMPORT_NS(CXL);
diff --git a/tools/testing/cxl/test/mock.h b/tools/testing/cxl/test/mock.h

index a94223750346c8d897197f3171091459982d29ac..d1b0271d282203b7bccac68aedef0646d1391d59 100644 (file)
--- a/tools/testing/cxl/test/mock.h
+++ b/tools/testing/cxl/test/mock.h
@@ -25,6 +25,7 @@ struct cxl_mock_ops {
         int (*devm_cxl_add_passthrough_decoder)(struct cxl_port *port);
         int (*devm_cxl_enumerate_decoders)(
                 struct cxl_hdm *hdm, struct cxl_endpoint_dvsec_info *info);
+       void (*cxl_endpoint_parse_cdat)(struct cxl_port *port);
  };
  
  void register_cxl_mock_ops(struct cxl_mock_ops *ops);
diff --git a/tools/testing/selftests/bpf/prog_tests/iters.c b/tools/testing/selftests/bpf/prog_tests/iters.c

index bf84d4a1d9ae2c68ceeac9f25373fd9df01b6935..3c440370c1f0f2b9cc67a754da8087e447efa625 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/iters.c
+++ b/tools/testing/selftests/bpf/prog_tests/iters.c
@@ -193,6 +193,7 @@ static void subtest_task_iters(void)
         ASSERT_EQ(skel->bss->procs_cnt, 1, "procs_cnt");
         ASSERT_EQ(skel->bss->threads_cnt, thread_num + 1, "threads_cnt");
         ASSERT_EQ(skel->bss->proc_threads_cnt, thread_num + 1, "proc_threads_cnt");
+       ASSERT_EQ(skel->bss->invalid_cnt, 0, "invalid_cnt");
         pthread_mutex_unlock(&do_nothing_mutex);
         for (int i = 0; i < thread_num; i++)
                 ASSERT_OK(pthread_join(thread_ids[i], &ret), "pthread_join");
diff --git a/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c b/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c

new file mode 100644 (file)

index 0000000..3405923
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2024. Huawei Technologies Co., Ltd */
+#include "test_progs.h"
+#include "read_vsyscall.skel.h"
+
+#if defined(__x86_64__)
+/* For VSYSCALL_ADDR */
+#include <asm/vsyscall.h>
+#else
+/* To prevent build failure on non-x86 arch */
+#define VSYSCALL_ADDR 0UL
+#endif
+
+struct read_ret_desc {
+       const char *name;
+       int ret;
+} all_read[] = {
+       { .name = "probe_read_kernel", .ret = -ERANGE },
+       { .name = "probe_read_kernel_str", .ret = -ERANGE },
+       { .name = "probe_read", .ret = -ERANGE },
+       { .name = "probe_read_str", .ret = -ERANGE },
+       { .name = "probe_read_user", .ret = -EFAULT },
+       { .name = "probe_read_user_str", .ret = -EFAULT },
+       { .name = "copy_from_user", .ret = -EFAULT },
+       { .name = "copy_from_user_task", .ret = -EFAULT },
+};
+
+void test_read_vsyscall(void)
+{
+       struct read_vsyscall *skel;
+       unsigned int i;
+       int err;
+
+#if !defined(__x86_64__)
+       test__skip();
+       return;
+#endif
+       skel = read_vsyscall__open_and_load();
+       if (!ASSERT_OK_PTR(skel, "read_vsyscall open_load"))
+               return;
+
+       skel->bss->target_pid = getpid();
+       err = read_vsyscall__attach(skel);
+       if (!ASSERT_EQ(err, 0, "read_vsyscall attach"))
+               goto out;
+
+       /* userspace may don't have vsyscall page due to LEGACY_VSYSCALL_NONE,
+        * but it doesn't affect the returned error codes.
+        */
+       skel->bss->user_ptr = (void *)VSYSCALL_ADDR;
+       usleep(1);
+
+       for (i = 0; i < ARRAY_SIZE(all_read); i++)
+               ASSERT_EQ(skel->bss->read_ret[i], all_read[i].ret, all_read[i].name);
+out:
+       read_vsyscall__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/timer.c b/tools/testing/selftests/bpf/prog_tests/timer.c

index 760ad96b4be099ed74779d8895165df5d212f091..d66687f1ee6a8df52cb228010a293a3d4d102216 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/timer.c
+++ b/tools/testing/selftests/bpf/prog_tests/timer.c
@@ -4,10 +4,29 @@
  #include "timer.skel.h"
  #include "timer_failure.skel.h"
  
+#define NUM_THR 8
+
+static void *spin_lock_thread(void *arg)
+{
+       int i, err, prog_fd = *(int *)arg;
+       LIBBPF_OPTS(bpf_test_run_opts, topts);
+
+       for (i = 0; i < 10000; i++) {
+               err = bpf_prog_test_run_opts(prog_fd, &topts);
+               if (!ASSERT_OK(err, "test_run_opts err") ||
+                   !ASSERT_OK(topts.retval, "test_run_opts retval"))
+                       break;
+       }
+
+       pthread_exit(arg);
+}
+
  static int timer(struct timer *timer_skel)
  {
-       int err, prog_fd;
+       int i, err, prog_fd;
         LIBBPF_OPTS(bpf_test_run_opts, topts);
+       pthread_t thread_id[NUM_THR];
+       void *ret;
  
         err = timer__attach(timer_skel);
         if (!ASSERT_OK(err, "timer_attach"))
@@ -43,6 +62,20 @@ static int timer(struct timer *timer_skel)
         /* check that code paths completed */
         ASSERT_EQ(timer_skel->bss->ok, 1 | 2 | 4, "ok");
  
+       prog_fd = bpf_program__fd(timer_skel->progs.race);
+       for (i = 0; i < NUM_THR; i++) {
+               err = pthread_create(&thread_id[i], NULL,
+                                    &spin_lock_thread, &prog_fd);
+               if (!ASSERT_OK(err, "pthread_create"))
+                       break;
+       }
+
+       while (i) {
+               err = pthread_join(thread_id[--i], &ret);
+               if (ASSERT_OK(err, "pthread_join"))
+                       ASSERT_EQ(ret, (void *)&prog_fd, "pthread_join");
+       }
+
         return 0;
  }
  
diff --git a/tools/testing/selftests/bpf/progs/iters_task.c b/tools/testing/selftests/bpf/progs/iters_task.c

index c9b4055cd410ae6378066e28e2f41c9b23d47ab1..e4d53e40ff2086112dff757581ef37f8fdcbe272 100644 (file)
--- a/tools/testing/selftests/bpf/progs/iters_task.c
+++ b/tools/testing/selftests/bpf/progs/iters_task.c
@@ -10,7 +10,7 @@
  char _license[] SEC("license") = "GPL";
  
  pid_t target_pid;
-int procs_cnt, threads_cnt, proc_threads_cnt;
+int procs_cnt, threads_cnt, proc_threads_cnt, invalid_cnt;
  
  void bpf_rcu_read_lock(void) __ksym;
  void bpf_rcu_read_unlock(void) __ksym;
@@ -26,6 +26,16 @@ int iter_task_for_each_sleep(void *ctx)
         procs_cnt = threads_cnt = proc_threads_cnt = 0;
  
         bpf_rcu_read_lock();
+       bpf_for_each(task, pos, NULL, ~0U) {
+               /* Below instructions shouldn't be executed for invalid flags */
+               invalid_cnt++;
+       }
+
+       bpf_for_each(task, pos, NULL, BPF_TASK_ITER_PROC_THREADS) {
+               /* Below instructions shouldn't be executed for invalid task__nullable */
+               invalid_cnt++;
+       }
+
         bpf_for_each(task, pos, NULL, BPF_TASK_ITER_ALL_PROCS)
                 if (pos->pid == target_pid)
                         procs_cnt++;
diff --git a/tools/testing/selftests/bpf/progs/read_vsyscall.c b/tools/testing/selftests/bpf/progs/read_vsyscall.c

new file mode 100644 (file)

index 0000000..986f966
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/read_vsyscall.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2024. Huawei Technologies Co., Ltd */
+#include <linux/types.h>
+#include <bpf/bpf_helpers.h>
+
+#include "bpf_misc.h"
+
+int target_pid = 0;
+void *user_ptr = 0;
+int read_ret[8];
+
+char _license[] SEC("license") = "GPL";
+
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
+int do_probe_read(void *ctx)
+{
+       char buf[8];
+
+       if ((bpf_get_current_pid_tgid() >> 32) != target_pid)
+               return 0;
+
+       read_ret[0] = bpf_probe_read_kernel(buf, sizeof(buf), user_ptr);
+       read_ret[1] = bpf_probe_read_kernel_str(buf, sizeof(buf), user_ptr);
+       read_ret[2] = bpf_probe_read(buf, sizeof(buf), user_ptr);
+       read_ret[3] = bpf_probe_read_str(buf, sizeof(buf), user_ptr);
+       read_ret[4] = bpf_probe_read_user(buf, sizeof(buf), user_ptr);
+       read_ret[5] = bpf_probe_read_user_str(buf, sizeof(buf), user_ptr);
+
+       return 0;
+}
+
+SEC("fentry.s/" SYS_PREFIX "sys_nanosleep")
+int do_copy_from_user(void *ctx)
+{
+       char buf[8];
+
+       if ((bpf_get_current_pid_tgid() >> 32) != target_pid)
+               return 0;
+
+       read_ret[6] = bpf_copy_from_user(buf, sizeof(buf), user_ptr);
+       read_ret[7] = bpf_copy_from_user_task(buf, sizeof(buf), user_ptr,
+                                             bpf_get_current_task_btf(), 0);
+
+       return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/timer.c b/tools/testing/selftests/bpf/progs/timer.c

index 8b946c8188c65d10ed86de886ea9aead585de86d..f615da97df26382f4dd758015dd0aaf2cd359614 100644 (file)
--- a/tools/testing/selftests/bpf/progs/timer.c
+++ b/tools/testing/selftests/bpf/progs/timer.c
@@ -51,7 +51,8 @@ struct {
         __uint(max_entries, 1);
         __type(key, int);
         __type(value, struct elem);
-} abs_timer SEC(".maps"), soft_timer_pinned SEC(".maps"), abs_timer_pinned SEC(".maps");
+} abs_timer SEC(".maps"), soft_timer_pinned SEC(".maps"), abs_timer_pinned SEC(".maps"),
+       race_array SEC(".maps");
  
  __u64 bss_data;
  __u64 abs_data;
@@ -390,3 +391,34 @@ int BPF_PROG2(test5, int, a)
  
         return 0;
  }
+
+static int race_timer_callback(void *race_array, int *race_key, struct bpf_timer *timer)
+{
+       bpf_timer_start(timer, 1000000, 0);
+       return 0;
+}
+
+SEC("syscall")
+int race(void *ctx)
+{
+       struct bpf_timer *timer;
+       int err, race_key = 0;
+       struct elem init;
+
+       __builtin_memset(&init, 0, sizeof(struct elem));
+       bpf_map_update_elem(&race_array, &race_key, &init, BPF_ANY);
+
+       timer = bpf_map_lookup_elem(&race_array, &race_key);
+       if (!timer)
+               return 1;
+
+       err = bpf_timer_init(timer, &race_array, CLOCK_MONOTONIC);
+       if (err && err != -EBUSY)
+               return 1;
+
+       bpf_timer_set_callback(timer, race_timer_callback);
+       bpf_timer_start(timer, 0, 0);
+       bpf_timer_cancel(timer);
+
+       return 0;
+}
diff --git a/tools/testing/selftests/core/close_range_test.c b/tools/testing/selftests/core/close_range_test.c

index 534576f06df1cc78f63619d873f77ad0390f45e5..c59e4adb905df61494db41d99355fbac5d742bab 100644 (file)
--- a/tools/testing/selftests/core/close_range_test.c
+++ b/tools/testing/selftests/core/close_range_test.c
@@ -12,6 +12,7 @@
  #include <syscall.h>
  #include <unistd.h>
  #include <sys/resource.h>
+#include <linux/close_range.h>
  
  #include "../kselftest_harness.h"
  #include "../clone3/clone3_selftests.h"
diff --git a/tools/testing/selftests/drivers/net/bonding/bond_options.sh b/tools/testing/selftests/drivers/net/bonding/bond_options.sh

index d508486cc0bdc2c917f9386aa2aea796f12d2c1d..9a3d3c389dadda07d1e8d499fea65e307c656056 100755 (executable)
--- a/tools/testing/selftests/drivers/net/bonding/bond_options.sh
+++ b/tools/testing/selftests/drivers/net/bonding/bond_options.sh
@@ -62,6 +62,8 @@ prio_test()
  
         # create bond
         bond_reset "${param}"
+       # set active_slave to primary eth1 specifically
+       ip -n ${s_ns} link set bond0 type bond active_slave eth1
  
         # check bonding member prio value
         ip -n ${s_ns} link set eth0 type bond_slave prio 0
diff --git a/tools/testing/selftests/drivers/net/bonding/lag_lib.sh b/tools/testing/selftests/drivers/net/bonding/lag_lib.sh

index 2a268b17b61f515b5c50a1fdbe3d7ae21af00578..dbdd736a41d394c9a6e2897d971eb31e728eae34 100644 (file)
--- a/tools/testing/selftests/drivers/net/bonding/lag_lib.sh
+++ b/tools/testing/selftests/drivers/net/bonding/lag_lib.sh
@@ -48,6 +48,17 @@ test_LAG_cleanup()
         ip link add mv0 link "$name" up address "$ucaddr" type macvlan
         # Used to test dev->mc handling
         ip address add "$addr6" dev "$name"
+
+       # Check that addresses were added as expected
+       (grep_bridge_fdb "$ucaddr" bridge fdb show dev dummy1 ||
+               grep_bridge_fdb "$ucaddr" bridge fdb show dev dummy2) >/dev/null
+       check_err $? "macvlan unicast address not found on a slave"
+
+       # mcaddr is added asynchronously by addrconf_dad_work(), use busywait
+       (busywait 10000 grep_bridge_fdb "$mcaddr" bridge fdb show dev dummy1 ||
+               grep_bridge_fdb "$mcaddr" bridge fdb show dev dummy2) >/dev/null
+       check_err $? "IPv6 solicited-node multicast mac address not found on a slave"
+
         ip link set dev "$name" down
         ip link del "$name"
  
diff --git a/tools/testing/selftests/drivers/net/team/config b/tools/testing/selftests/drivers/net/team/config

index 265b6882cc21ed0c285ae9f37f9282bfb2e440d1..b5e3a3aad4bfbb5f1d77b4fd1bd4ae566f6394a1 100644 (file)
--- a/tools/testing/selftests/drivers/net/team/config
+++ b/tools/testing/selftests/drivers/net/team/config
@@ -1,3 +1,5 @@
+CONFIG_DUMMY=y
+CONFIG_IPV6=y
+CONFIG_MACVLAN=y
  CONFIG_NET_TEAM=y
  CONFIG_NET_TEAM_MODE_LOADBALANCE=y
-CONFIG_MACVLAN=y
diff --git a/tools/testing/selftests/dt/test_unprobed_devices.sh b/tools/testing/selftests/dt/test_unprobed_devices.sh

index b07af2a4c4de0b680f37d510337d38b23691a478..7fae90293a9d88278ae9b6202758dfe08b4d7899 100755 (executable)
--- a/tools/testing/selftests/dt/test_unprobed_devices.sh
+++ b/tools/testing/selftests/dt/test_unprobed_devices.sh
@@ -33,8 +33,8 @@ if [[ ! -d "${PDT}" ]]; then
  fi
  
  nodes_compatible=$(
-       for node_compat in $(find ${PDT} -name compatible); do
-               node=$(dirname "${node_compat}")
+       for node in $(find ${PDT} -type d); do
+               [ ! -f "${node}"/compatible ] && continue
                 # Check if node is available
                 if [[ -e "${node}"/status ]]; then
                         status=$(tr -d '\000' < "${node}"/status)
@@ -46,10 +46,11 @@ nodes_compatible=$(
  
  nodes_dev_bound=$(
         IFS=$'\n'
-       for uevent in $(find /sys/devices -name uevent); do
-               if [[ -d "$(dirname "${uevent}")"/driver ]]; then
-                       grep '^OF_FULLNAME=' "${uevent}" | sed -e 's|OF_FULLNAME=||'
-               fi
+       for dev_dir in $(find /sys/devices -type d); do
+               [ ! -f "${dev_dir}"/uevent ] && continue
+               [ ! -d "${dev_dir}"/driver ] && continue
+
+               grep '^OF_FULLNAME=' "${dev_dir}"/uevent | sed -e 's|OF_FULLNAME=||'
         done
         )
  
diff --git a/tools/testing/selftests/hid/tests/test_wacom_generic.py b/tools/testing/selftests/hid/tests/test_wacom_generic.py

index 352fc39f3c6c160bfdcd2b3c655bfe319f00892c..b62c7dba6777f975dd9158f6788a6177307bc9e4 100644 (file)
--- a/tools/testing/selftests/hid/tests/test_wacom_generic.py
+++ b/tools/testing/selftests/hid/tests/test_wacom_generic.py
@@ -880,8 +880,8 @@ class TestDTH2452Tablet(test_multitouch.BaseTest.TestMultitouch, TouchTabletTest
          does not overlap with other contacts. The value of `t` may be
          incremented over time to move the point along a linear path.
          """
-        x = 50 + 10 * contact_id + t
-        y = 100 + 100 * contact_id + t
+        x = 50 + 10 * contact_id + t * 11
+        y = 100 + 100 * contact_id + t * 11
          return test_multitouch.Touch(contact_id, x, y)
  
      def make_contacts(self, n, t=0):
@@ -902,8 +902,8 @@ class TestDTH2452Tablet(test_multitouch.BaseTest.TestMultitouch, TouchTabletTest
          tracking_id = contact_ids.tracking_id
          slot_num = contact_ids.slot_num
  
-        x = 50 + 10 * contact_id + t
-        y = 100 + 100 * contact_id + t
+        x = 50 + 10 * contact_id + t * 11
+        y = 100 + 100 * contact_id + t * 11
  
          # If the data isn't supposed to be stored in any slots, there is
          # nothing we can check for in the evdev stream.
diff --git a/tools/testing/selftests/iommu/config b/tools/testing/selftests/iommu/config

index 6c4f901d6fed3c200bbcb40a6ba7dd22c0b2e2bd..110d73917615d177d5d7a891f08d523619c404f3 100644 (file)
--- a/tools/testing/selftests/iommu/config
+++ b/tools/testing/selftests/iommu/config
@@ -1,2 +1,3 @@
-CONFIG_IOMMUFD
-CONFIG_IOMMUFD_TEST
+CONFIG_IOMMUFD=y
+CONFIG_FAULT_INJECTION=y
+CONFIG_IOMMUFD_TEST=y
diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c

index 1a881e7a21d1b26ce7ad19de1cc5ea07d3773ff9..edf1c99c9936c8549e8a2938a2ff11875197b3d4 100644 (file)
--- a/tools/testing/selftests/iommu/iommufd.c
+++ b/tools/testing/selftests/iommu/iommufd.c
@@ -12,6 +12,7 @@
  static unsigned long HUGEPAGE_SIZE;
  
  #define MOCK_PAGE_SIZE (PAGE_SIZE / 2)
+#define MOCK_HUGE_PAGE_SIZE (512 * MOCK_PAGE_SIZE)
  
  static unsigned long get_huge_page_size(void)
  {
@@ -1716,10 +1717,12 @@ FIXTURE(iommufd_dirty_tracking)
  FIXTURE_VARIANT(iommufd_dirty_tracking)
  {
         unsigned long buffer_size;
+       bool hugepages;
  };
  
  FIXTURE_SETUP(iommufd_dirty_tracking)
  {
+       int mmap_flags;
         void *vrc;
         int rc;
  
@@ -1732,25 +1735,41 @@ FIXTURE_SETUP(iommufd_dirty_tracking)
                            variant->buffer_size, rc);
         }
  
+       mmap_flags = MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED;
+       if (variant->hugepages) {
+               /*
+                * MAP_POPULATE will cause the kernel to fail mmap if THPs are
+                * not available.
+                */
+               mmap_flags |= MAP_HUGETLB | MAP_POPULATE;
+       }
         assert((uintptr_t)self->buffer % HUGEPAGE_SIZE == 0);
         vrc = mmap(self->buffer, variant->buffer_size, PROT_READ | PROT_WRITE,
-                  MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
+                  mmap_flags, -1, 0);
         assert(vrc == self->buffer);
  
         self->page_size = MOCK_PAGE_SIZE;
         self->bitmap_size =
                 variant->buffer_size / self->page_size / BITS_PER_BYTE;
  
-       /* Provision with an extra (MOCK_PAGE_SIZE) for the unaligned case */
+       /* Provision with an extra (PAGE_SIZE) for the unaligned case */
         rc = posix_memalign(&self->bitmap, PAGE_SIZE,
-                           self->bitmap_size + MOCK_PAGE_SIZE);
+                           self->bitmap_size + PAGE_SIZE);
         assert(!rc);
         assert(self->bitmap);
         assert((uintptr_t)self->bitmap % PAGE_SIZE == 0);
  
         test_ioctl_ioas_alloc(&self->ioas_id);
-       test_cmd_mock_domain(self->ioas_id, &self->stdev_id, &self->hwpt_id,
-                            &self->idev_id);
+       /* Enable 1M mock IOMMU hugepages */
+       if (variant->hugepages) {
+               test_cmd_mock_domain_flags(self->ioas_id,
+                                          MOCK_FLAGS_DEVICE_HUGE_IOVA,
+                                          &self->stdev_id, &self->hwpt_id,
+                                          &self->idev_id);
+       } else {
+               test_cmd_mock_domain(self->ioas_id, &self->stdev_id,
+                                    &self->hwpt_id, &self->idev_id);
+       }
  }
  
  FIXTURE_TEARDOWN(iommufd_dirty_tracking)
@@ -1784,12 +1803,26 @@ FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty128M)
         .buffer_size = 128UL * 1024UL * 1024UL,
  };
  
+FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty128M_huge)
+{
+       /* 4K bitmap (128M IOVA range) */
+       .buffer_size = 128UL * 1024UL * 1024UL,
+       .hugepages = true,
+};
+
  FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty256M)
  {
         /* 8K bitmap (256M IOVA range) */
         .buffer_size = 256UL * 1024UL * 1024UL,
  };
  
+FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty256M_huge)
+{
+       /* 8K bitmap (256M IOVA range) */
+       .buffer_size = 256UL * 1024UL * 1024UL,
+       .hugepages = true,
+};
+
  TEST_F(iommufd_dirty_tracking, enforce_dirty)
  {
         uint32_t ioas_id, stddev_id, idev_id;
@@ -1849,65 +1882,80 @@ TEST_F(iommufd_dirty_tracking, device_dirty_capability)
  
  TEST_F(iommufd_dirty_tracking, get_dirty_bitmap)
  {
-       uint32_t stddev_id;
+       uint32_t page_size = MOCK_PAGE_SIZE;
         uint32_t hwpt_id;
         uint32_t ioas_id;
  
+       if (variant->hugepages)
+               page_size = MOCK_HUGE_PAGE_SIZE;
+
         test_ioctl_ioas_alloc(&ioas_id);
         test_ioctl_ioas_map_fixed_id(ioas_id, self->buffer,
                                      variant->buffer_size, MOCK_APERTURE_START);
  
         test_cmd_hwpt_alloc(self->idev_id, ioas_id,
                             IOMMU_HWPT_ALLOC_DIRTY_TRACKING, &hwpt_id);
-       test_cmd_mock_domain(hwpt_id, &stddev_id, NULL, NULL);
  
         test_cmd_set_dirty_tracking(hwpt_id, true);
  
         test_mock_dirty_bitmaps(hwpt_id, variant->buffer_size,
-                               MOCK_APERTURE_START, self->page_size,
+                               MOCK_APERTURE_START, self->page_size, page_size,
                                 self->bitmap, self->bitmap_size, 0, _metadata);
  
         /* PAGE_SIZE unaligned bitmap */
         test_mock_dirty_bitmaps(hwpt_id, variant->buffer_size,
-                               MOCK_APERTURE_START, self->page_size,
+                               MOCK_APERTURE_START, self->page_size, page_size,
                                 self->bitmap + MOCK_PAGE_SIZE,
                                 self->bitmap_size, 0, _metadata);
  
-       test_ioctl_destroy(stddev_id);
+       /* u64 unaligned bitmap */
+       test_mock_dirty_bitmaps(hwpt_id, variant->buffer_size,
+                               MOCK_APERTURE_START, self->page_size, page_size,
+                               self->bitmap + 0xff1, self->bitmap_size, 0,
+                               _metadata);
+
         test_ioctl_destroy(hwpt_id);
  }
  
  TEST_F(iommufd_dirty_tracking, get_dirty_bitmap_no_clear)
  {
-       uint32_t stddev_id;
+       uint32_t page_size = MOCK_PAGE_SIZE;
         uint32_t hwpt_id;
         uint32_t ioas_id;
  
+       if (variant->hugepages)
+               page_size = MOCK_HUGE_PAGE_SIZE;
+
         test_ioctl_ioas_alloc(&ioas_id);
         test_ioctl_ioas_map_fixed_id(ioas_id, self->buffer,
                                      variant->buffer_size, MOCK_APERTURE_START);
  
         test_cmd_hwpt_alloc(self->idev_id, ioas_id,
                             IOMMU_HWPT_ALLOC_DIRTY_TRACKING, &hwpt_id);
-       test_cmd_mock_domain(hwpt_id, &stddev_id, NULL, NULL);
  
         test_cmd_set_dirty_tracking(hwpt_id, true);
  
         test_mock_dirty_bitmaps(hwpt_id, variant->buffer_size,
-                               MOCK_APERTURE_START, self->page_size,
+                               MOCK_APERTURE_START, self->page_size, page_size,
                                 self->bitmap, self->bitmap_size,
                                 IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR,
                                 _metadata);
  
         /* Unaligned bitmap */
         test_mock_dirty_bitmaps(hwpt_id, variant->buffer_size,
-                               MOCK_APERTURE_START, self->page_size,
+                               MOCK_APERTURE_START, self->page_size, page_size,
                                 self->bitmap + MOCK_PAGE_SIZE,
                                 self->bitmap_size,
                                 IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR,
                                 _metadata);
  
-       test_ioctl_destroy(stddev_id);
+       /* u64 unaligned bitmap */
+       test_mock_dirty_bitmaps(hwpt_id, variant->buffer_size,
+                               MOCK_APERTURE_START, self->page_size, page_size,
+                               self->bitmap + 0xff1, self->bitmap_size,
+                               IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR,
+                               _metadata);
+
         test_ioctl_destroy(hwpt_id);
  }
  
diff --git a/tools/testing/selftests/iommu/iommufd_utils.h b/tools/testing/selftests/iommu/iommufd_utils.h

index c646264aa41fdc1871c60bba6dc25841767f399b..8d2b46b2114da814f75740992c0dc4b1be14d33b 100644 (file)
--- a/tools/testing/selftests/iommu/iommufd_utils.h
+++ b/tools/testing/selftests/iommu/iommufd_utils.h
@@ -344,16 +344,19 @@ static int _test_cmd_mock_domain_set_dirty(int fd, __u32 hwpt_id, size_t length,
                                                   page_size, bitmap, nr))
  
  static int _test_mock_dirty_bitmaps(int fd, __u32 hwpt_id, size_t length,
-                                   __u64 iova, size_t page_size, __u64 *bitmap,
+                                   __u64 iova, size_t page_size,
+                                   size_t pte_page_size, __u64 *bitmap,
                                     __u64 bitmap_size, __u32 flags,
                                     struct __test_metadata *_metadata)
  {
-       unsigned long i, nbits = bitmap_size * BITS_PER_BYTE;
-       unsigned long nr = nbits / 2;
+       unsigned long npte = pte_page_size / page_size, pteset = 2 * npte;
+       unsigned long nbits = bitmap_size * BITS_PER_BYTE;
+       unsigned long j, i, nr = nbits / pteset ?: 1;
         __u64 out_dirty = 0;
  
         /* Mark all even bits as dirty in the mock domain */
-       for (i = 0; i < nbits; i += 2)
+       memset(bitmap, 0, bitmap_size);
+       for (i = 0; i < nbits; i += pteset)
                 set_bit(i, (unsigned long *)bitmap);
  
         test_cmd_mock_domain_set_dirty(fd, hwpt_id, length, iova, page_size,
@@ -365,8 +368,12 @@ static int _test_mock_dirty_bitmaps(int fd, __u32 hwpt_id, size_t length,
         test_cmd_get_dirty_bitmap(fd, hwpt_id, length, iova, page_size, bitmap,
                                   flags);
         /* Beware ASSERT_EQ() is two statements -- braces are not redundant! */
-       for (i = 0; i < nbits; i++) {
-               ASSERT_EQ(!(i % 2), test_bit(i, (unsigned long *)bitmap));
+       for (i = 0; i < nbits; i += pteset) {
+               for (j = 0; j < pteset; j++) {
+                       ASSERT_EQ(j < npte,
+                                 test_bit(i + j, (unsigned long *)bitmap));
+               }
+               ASSERT_EQ(!(i % pteset), test_bit(i, (unsigned long *)bitmap));
         }
  
         memset(bitmap, 0, bitmap_size);
@@ -374,19 +381,23 @@ static int _test_mock_dirty_bitmaps(int fd, __u32 hwpt_id, size_t length,
                                   flags);
  
         /* It as read already -- expect all zeroes */
-       for (i = 0; i < nbits; i++) {
-               ASSERT_EQ(!(i % 2) && (flags &
-                                      IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR),
-                         test_bit(i, (unsigned long *)bitmap));
+       for (i = 0; i < nbits; i += pteset) {
+               for (j = 0; j < pteset; j++) {
+                       ASSERT_EQ(
+                               (j < npte) &&
+                                       (flags &
+                                        IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR),
+                               test_bit(i + j, (unsigned long *)bitmap));
+               }
         }
  
         return 0;
  }
-#define test_mock_dirty_bitmaps(hwpt_id, length, iova, page_size, bitmap,      \
-                               bitmap_size, flags, _metadata)                 \
+#define test_mock_dirty_bitmaps(hwpt_id, length, iova, page_size, pte_size,\
+                               bitmap, bitmap_size, flags, _metadata)     \
         ASSERT_EQ(0, _test_mock_dirty_bitmaps(self->fd, hwpt_id, length, iova, \
-                                             page_size, bitmap, bitmap_size,  \
-                                             flags, _metadata))
+                                             page_size, pte_size, bitmap,     \
+                                             bitmap_size, flags, _metadata))
  
  static int _test_cmd_create_access(int fd, unsigned int ioas_id,
                                    __u32 *access_id, unsigned int flags)
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile

index 492e937fab00648d5dbda4e1c98bdb1840468fde..19f5710bb456804db60d42a066a8c623562768b6 100644 (file)
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -36,7 +36,9 @@ LIBKVM_x86_64 += lib/x86_64/apic.c
  LIBKVM_x86_64 += lib/x86_64/handlers.S
  LIBKVM_x86_64 += lib/x86_64/hyperv.c
  LIBKVM_x86_64 += lib/x86_64/memstress.c
+LIBKVM_x86_64 += lib/x86_64/pmu.c
  LIBKVM_x86_64 += lib/x86_64/processor.c
+LIBKVM_x86_64 += lib/x86_64/sev.c
  LIBKVM_x86_64 += lib/x86_64/svm.c
  LIBKVM_x86_64 += lib/x86_64/ucall.c
  LIBKVM_x86_64 += lib/x86_64/vmx.c
@@ -53,6 +55,7 @@ LIBKVM_s390x += lib/s390x/diag318_test_handler.c
  LIBKVM_s390x += lib/s390x/processor.c
  LIBKVM_s390x += lib/s390x/ucall.c
  
+LIBKVM_riscv += lib/riscv/handlers.S
  LIBKVM_riscv += lib/riscv/processor.c
  LIBKVM_riscv += lib/riscv/ucall.c
  
@@ -80,6 +83,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test
  TEST_GEN_PROGS_x86_64 += x86_64/monitor_mwait_test
  TEST_GEN_PROGS_x86_64 += x86_64/nested_exceptions_test
  TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test
+TEST_GEN_PROGS_x86_64 += x86_64/pmu_counters_test
  TEST_GEN_PROGS_x86_64 += x86_64/pmu_event_filter_test
  TEST_GEN_PROGS_x86_64 += x86_64/private_mem_conversions_test
  TEST_GEN_PROGS_x86_64 += x86_64/private_mem_kvm_exits_test
@@ -117,6 +121,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/vmx_pmu_caps_test
  TEST_GEN_PROGS_x86_64 += x86_64/xen_shinfo_test
  TEST_GEN_PROGS_x86_64 += x86_64/xen_vmcall_test
  TEST_GEN_PROGS_x86_64 += x86_64/sev_migrate_tests
+TEST_GEN_PROGS_x86_64 += x86_64/sev_smoke_test
  TEST_GEN_PROGS_x86_64 += x86_64/amx_test
  TEST_GEN_PROGS_x86_64 += x86_64/max_vcpuid_cap_test
  TEST_GEN_PROGS_x86_64 += x86_64/triple_fault_event_test
@@ -143,7 +148,6 @@ TEST_GEN_PROGS_x86_64 += system_counter_offset_test
  TEST_GEN_PROGS_EXTENDED_x86_64 += x86_64/nx_huge_pages_test
  
  TEST_GEN_PROGS_aarch64 += aarch64/aarch32_id_regs
-TEST_GEN_PROGS_aarch64 += aarch64/arch_timer
  TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions
  TEST_GEN_PROGS_aarch64 += aarch64/hypercalls
  TEST_GEN_PROGS_aarch64 += aarch64/page_fault_test
@@ -155,6 +159,7 @@ TEST_GEN_PROGS_aarch64 += aarch64/vgic_init
  TEST_GEN_PROGS_aarch64 += aarch64/vgic_irq
  TEST_GEN_PROGS_aarch64 += aarch64/vpmu_counter_access
  TEST_GEN_PROGS_aarch64 += access_tracking_perf_test
+TEST_GEN_PROGS_aarch64 += arch_timer
  TEST_GEN_PROGS_aarch64 += demand_paging_test
  TEST_GEN_PROGS_aarch64 += dirty_log_test
  TEST_GEN_PROGS_aarch64 += dirty_log_perf_test
@@ -184,6 +189,7 @@ TEST_GEN_PROGS_s390x += rseq_test
  TEST_GEN_PROGS_s390x += set_memory_region_test
  TEST_GEN_PROGS_s390x += kvm_binary_stats_test
  
+TEST_GEN_PROGS_riscv += arch_timer
  TEST_GEN_PROGS_riscv += demand_paging_test
  TEST_GEN_PROGS_riscv += dirty_log_test
  TEST_GEN_PROGS_riscv += get-reg-list
@@ -194,6 +200,7 @@ TEST_GEN_PROGS_riscv += kvm_page_table_test
  TEST_GEN_PROGS_riscv += set_memory_region_test
  TEST_GEN_PROGS_riscv += steal_time
  
+SPLIT_TESTS += arch_timer
  SPLIT_TESTS += get-reg-list
  
  TEST_PROGS += $(TEST_PROGS_$(ARCH_DIR))
@@ -217,7 +224,7 @@ else
  LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH)/include
  endif
  CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \
-       -Wno-gnu-variable-sized-type-not-at-end -MD -MP \
+       -Wno-gnu-variable-sized-type-not-at-end -MD -MP -DCONFIG_64BIT \
         -fno-builtin-memcmp -fno-builtin-memcpy -fno-builtin-memset \
         -fno-builtin-strnlen \
         -fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) \
@@ -260,32 +267,36 @@ LIBKVM_C_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_C))
  LIBKVM_S_OBJ := $(patsubst %.S, $(OUTPUT)/%.o, $(LIBKVM_S))
  LIBKVM_STRING_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_STRING))
  LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) $(LIBKVM_STRING_OBJ)
-SPLIT_TESTS_TARGETS := $(patsubst %, $(OUTPUT)/%, $(SPLIT_TESTS))
-SPLIT_TESTS_OBJS := $(patsubst %, $(ARCH_DIR)/%.o, $(SPLIT_TESTS))
+SPLIT_TEST_GEN_PROGS := $(patsubst %, $(OUTPUT)/%, $(SPLIT_TESTS))
+SPLIT_TEST_GEN_OBJ := $(patsubst %, $(OUTPUT)/$(ARCH_DIR)/%.o, $(SPLIT_TESTS))
  
  TEST_GEN_OBJ = $(patsubst %, %.o, $(TEST_GEN_PROGS))
  TEST_GEN_OBJ += $(patsubst %, %.o, $(TEST_GEN_PROGS_EXTENDED))
  TEST_DEP_FILES = $(patsubst %.o, %.d, $(TEST_GEN_OBJ))
  TEST_DEP_FILES += $(patsubst %.o, %.d, $(LIBKVM_OBJS))
-TEST_DEP_FILES += $(patsubst %.o, %.d, $(SPLIT_TESTS_OBJS))
+TEST_DEP_FILES += $(patsubst %.o, %.d, $(SPLIT_TEST_GEN_OBJ))
  -include $(TEST_DEP_FILES)
  
-$(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): %: %.o
+x := $(shell mkdir -p $(sort $(OUTPUT)/$(ARCH_DIR) $(dir $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ))))
+
+$(filter-out $(SPLIT_TEST_GEN_PROGS), $(TEST_GEN_PROGS)) \
+$(TEST_GEN_PROGS_EXTENDED): %: %.o
         $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH) $< $(LIBKVM_OBJS) $(LDLIBS) -o $@
  $(TEST_GEN_OBJ): $(OUTPUT)/%.o: %.c
         $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
  
-$(SPLIT_TESTS_TARGETS): %: %.o $(SPLIT_TESTS_OBJS)
+$(SPLIT_TEST_GEN_PROGS): $(OUTPUT)/%: $(OUTPUT)/%.o $(OUTPUT)/$(ARCH_DIR)/%.o
         $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH) $^ $(LDLIBS) -o $@
+$(SPLIT_TEST_GEN_OBJ): $(OUTPUT)/$(ARCH_DIR)/%.o: $(ARCH_DIR)/%.c
+       $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
  
  EXTRA_CLEAN += $(GEN_HDRS) \
                $(LIBKVM_OBJS) \
-              $(SPLIT_TESTS_OBJS) \
+              $(SPLIT_TEST_GEN_OBJ) \
                $(TEST_DEP_FILES) \
                $(TEST_GEN_OBJ) \
                cscope.*
  
-x := $(shell mkdir -p $(sort $(dir $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ))))
  $(LIBKVM_C_OBJ): $(OUTPUT)/%.o: %.c $(GEN_HDRS)
         $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
  
@@ -299,7 +310,7 @@ $(LIBKVM_STRING_OBJ): $(OUTPUT)/%.o: %.c
         $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c -ffreestanding $< -o $@
  
  x := $(shell mkdir -p $(sort $(dir $(TEST_GEN_PROGS))))
-$(SPLIT_TESTS_OBJS): $(GEN_HDRS)
+$(SPLIT_TEST_GEN_OBJ): $(GEN_HDRS)
  $(TEST_GEN_PROGS): $(LIBKVM_OBJS)
  $(TEST_GEN_PROGS_EXTENDED): $(LIBKVM_OBJS)
  $(TEST_GEN_OBJ): $(GEN_HDRS)
diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer.c b/tools/testing/selftests/kvm/aarch64/arch_timer.c

index 274b8465b42a5aa1d210ced625db7ce42799e693..ddba2c2fb5deb1b2ec2ab02db352bada7e70feee 100644 (file)
--- a/tools/testing/selftests/kvm/aarch64/arch_timer.c
+++ b/tools/testing/selftests/kvm/aarch64/arch_timer.c
@@ -1,64 +1,19 @@
  // SPDX-License-Identifier: GPL-2.0-only
  /*
- * arch_timer.c - Tests the aarch64 timer IRQ functionality
- *
   * The test validates both the virtual and physical timer IRQs using
- * CVAL and TVAL registers. This consitutes the four stages in the test.
- * The guest's main thread configures the timer interrupt for a stage
- * and waits for it to fire, with a timeout equal to the timer period.
- * It asserts that the timeout doesn't exceed the timer period.
- *
- * On the other hand, upon receipt of an interrupt, the guest's interrupt
- * handler validates the interrupt by checking if the architectural state
- * is in compliance with the specifications.
- *
- * The test provides command-line options to configure the timer's
- * period (-p), number of vCPUs (-n), and iterations per stage (-i).
- * To stress-test the timer stack even more, an option to migrate the
- * vCPUs across pCPUs (-m), at a particular rate, is also provided.
+ * CVAL and TVAL registers.
   *
   * Copyright (c) 2021, Google LLC.
   */
  #define _GNU_SOURCE
  
-#include <stdlib.h>
-#include <pthread.h>
-#include <linux/kvm.h>
-#include <linux/sizes.h>
-#include <linux/bitmap.h>
-#include <sys/sysinfo.h>
-
-#include "kvm_util.h"
-#include "processor.h"
-#include "delay.h"
  #include "arch_timer.h"
+#include "delay.h"
  #include "gic.h"
+#include "processor.h"
+#include "timer_test.h"
  #include "vgic.h"
  
-#define NR_VCPUS_DEF                   4
-#define NR_TEST_ITERS_DEF              5
-#define TIMER_TEST_PERIOD_MS_DEF       10
-#define TIMER_TEST_ERR_MARGIN_US       100
-#define TIMER_TEST_MIGRATION_FREQ_MS   2
-
-struct test_args {
-       int nr_vcpus;
-       int nr_iter;
-       int timer_period_ms;
-       int migration_freq_ms;
-       struct kvm_arm_counter_offset offset;
-};
-
-static struct test_args test_args = {
-       .nr_vcpus = NR_VCPUS_DEF,
-       .nr_iter = NR_TEST_ITERS_DEF,
-       .timer_period_ms = TIMER_TEST_PERIOD_MS_DEF,
-       .migration_freq_ms = TIMER_TEST_MIGRATION_FREQ_MS,
-       .offset = { .reserved = 1 },
-};
-
-#define msecs_to_usecs(msec)           ((msec) * 1000LL)
-
  #define GICD_BASE_GPA                  0x8000000ULL
  #define GICR_BASE_GPA                  0x80A0000ULL
  
@@ -70,22 +25,8 @@ enum guest_stage {
         GUEST_STAGE_MAX,
  };
  
-/* Shared variables between host and guest */
-struct test_vcpu_shared_data {
-       int nr_iter;
-       enum guest_stage guest_stage;
-       uint64_t xcnt;
-};
-
-static struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
-static pthread_t pt_vcpu_run[KVM_MAX_VCPUS];
-static struct test_vcpu_shared_data vcpu_shared_data[KVM_MAX_VCPUS];
-
  static int vtimer_irq, ptimer_irq;
  
-static unsigned long *vcpu_done_map;
-static pthread_mutex_t vcpu_done_map_lock;
-
  static void
  guest_configure_timer_action(struct test_vcpu_shared_data *shared_data)
  {
@@ -158,9 +99,9 @@ static void guest_validate_irq(unsigned int intid,
  
         /* Basic 'timer condition met' check */
         __GUEST_ASSERT(xcnt >= cval,
-                      "xcnt = 0x%llx, cval = 0x%llx, xcnt_diff_us = 0x%llx",
+                      "xcnt = 0x%lx, cval = 0x%lx, xcnt_diff_us = 0x%lx",
                        xcnt, cval, xcnt_diff_us);
-       __GUEST_ASSERT(xctl & CTL_ISTATUS, "xcnt = 0x%llx", xcnt);
+       __GUEST_ASSERT(xctl & CTL_ISTATUS, "xctl = 0x%lx", xctl);
  
         WRITE_ONCE(shared_data->nr_iter, shared_data->nr_iter + 1);
  }
@@ -190,10 +131,14 @@ static void guest_run_stage(struct test_vcpu_shared_data *shared_data,
  
                 /* Setup a timeout for the interrupt to arrive */
                 udelay(msecs_to_usecs(test_args.timer_period_ms) +
-                       TIMER_TEST_ERR_MARGIN_US);
+                       test_args.timer_err_margin_us);
  
                 irq_iter = READ_ONCE(shared_data->nr_iter);
-               GUEST_ASSERT_EQ(config_iter + 1, irq_iter);
+               __GUEST_ASSERT(config_iter + 1 == irq_iter,
+                               "config_iter + 1 = 0x%lx, irq_iter = 0x%lx.\n"
+                               "  Guest timer interrupt was not trigged within the specified\n"
+                               "  interval, try to increase the error margin by [-e] option.\n",
+                               config_iter + 1, irq_iter);
         }
  }
  
@@ -222,137 +167,6 @@ static void guest_code(void)
         GUEST_DONE();
  }
  
-static void *test_vcpu_run(void *arg)
-{
-       unsigned int vcpu_idx = (unsigned long)arg;
-       struct ucall uc;
-       struct kvm_vcpu *vcpu = vcpus[vcpu_idx];
-       struct kvm_vm *vm = vcpu->vm;
-       struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[vcpu_idx];
-
-       vcpu_run(vcpu);
-
-       /* Currently, any exit from guest is an indication of completion */
-       pthread_mutex_lock(&vcpu_done_map_lock);
-       __set_bit(vcpu_idx, vcpu_done_map);
-       pthread_mutex_unlock(&vcpu_done_map_lock);
-
-       switch (get_ucall(vcpu, &uc)) {
-       case UCALL_SYNC:
-       case UCALL_DONE:
-               break;
-       case UCALL_ABORT:
-               sync_global_from_guest(vm, *shared_data);
-               fprintf(stderr, "Guest assert failed,  vcpu %u; stage; %u; iter: %u\n",
-                       vcpu_idx, shared_data->guest_stage, shared_data->nr_iter);
-               REPORT_GUEST_ASSERT(uc);
-               break;
-       default:
-               TEST_FAIL("Unexpected guest exit\n");
-       }
-
-       return NULL;
-}
-
-static uint32_t test_get_pcpu(void)
-{
-       uint32_t pcpu;
-       unsigned int nproc_conf;
-       cpu_set_t online_cpuset;
-
-       nproc_conf = get_nprocs_conf();
-       sched_getaffinity(0, sizeof(cpu_set_t), &online_cpuset);
-
-       /* Randomly find an available pCPU to place a vCPU on */
-       do {
-               pcpu = rand() % nproc_conf;
-       } while (!CPU_ISSET(pcpu, &online_cpuset));
-
-       return pcpu;
-}
-
-static int test_migrate_vcpu(unsigned int vcpu_idx)
-{
-       int ret;
-       cpu_set_t cpuset;
-       uint32_t new_pcpu = test_get_pcpu();
-
-       CPU_ZERO(&cpuset);
-       CPU_SET(new_pcpu, &cpuset);
-
-       pr_debug("Migrating vCPU: %u to pCPU: %u\n", vcpu_idx, new_pcpu);
-
-       ret = pthread_setaffinity_np(pt_vcpu_run[vcpu_idx],
-                                    sizeof(cpuset), &cpuset);
-
-       /* Allow the error where the vCPU thread is already finished */
-       TEST_ASSERT(ret == 0 || ret == ESRCH,
-                   "Failed to migrate the vCPU:%u to pCPU: %u; ret: %d\n",
-                   vcpu_idx, new_pcpu, ret);
-
-       return ret;
-}
-
-static void *test_vcpu_migration(void *arg)
-{
-       unsigned int i, n_done;
-       bool vcpu_done;
-
-       do {
-               usleep(msecs_to_usecs(test_args.migration_freq_ms));
-
-               for (n_done = 0, i = 0; i < test_args.nr_vcpus; i++) {
-                       pthread_mutex_lock(&vcpu_done_map_lock);
-                       vcpu_done = test_bit(i, vcpu_done_map);
-                       pthread_mutex_unlock(&vcpu_done_map_lock);
-
-                       if (vcpu_done) {
-                               n_done++;
-                               continue;
-                       }
-
-                       test_migrate_vcpu(i);
-               }
-       } while (test_args.nr_vcpus != n_done);
-
-       return NULL;
-}
-
-static void test_run(struct kvm_vm *vm)
-{
-       pthread_t pt_vcpu_migration;
-       unsigned int i;
-       int ret;
-
-       pthread_mutex_init(&vcpu_done_map_lock, NULL);
-       vcpu_done_map = bitmap_zalloc(test_args.nr_vcpus);
-       TEST_ASSERT(vcpu_done_map, "Failed to allocate vcpu done bitmap\n");
-
-       for (i = 0; i < (unsigned long)test_args.nr_vcpus; i++) {
-               ret = pthread_create(&pt_vcpu_run[i], NULL, test_vcpu_run,
-                                    (void *)(unsigned long)i);
-               TEST_ASSERT(!ret, "Failed to create vCPU-%d pthread\n", i);
-       }
-
-       /* Spawn a thread to control the vCPU migrations */
-       if (test_args.migration_freq_ms) {
-               srand(time(NULL));
-
-               ret = pthread_create(&pt_vcpu_migration, NULL,
-                                       test_vcpu_migration, NULL);
-               TEST_ASSERT(!ret, "Failed to create the migration pthread\n");
-       }
-
-
-       for (i = 0; i < test_args.nr_vcpus; i++)
-               pthread_join(pt_vcpu_run[i], NULL);
-
-       if (test_args.migration_freq_ms)
-               pthread_join(pt_vcpu_migration, NULL);
-
-       bitmap_free(vcpu_done_map);
-}
-
  static void test_init_timer_irq(struct kvm_vm *vm)
  {
         /* Timer initid should be same for all the vCPUs, so query only vCPU-0 */
@@ -369,7 +183,7 @@ static void test_init_timer_irq(struct kvm_vm *vm)
  
  static int gic_fd;
  
-static struct kvm_vm *test_vm_create(void)
+struct kvm_vm *test_vm_create(void)
  {
         struct kvm_vm *vm;
         unsigned int i;
@@ -380,11 +194,15 @@ static struct kvm_vm *test_vm_create(void)
         vm_init_descriptor_tables(vm);
         vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT, guest_irq_handler);
  
-       if (!test_args.offset.reserved) {
-               if (kvm_has_cap(KVM_CAP_COUNTER_OFFSET))
-                       vm_ioctl(vm, KVM_ARM_SET_COUNTER_OFFSET, &test_args.offset);
-               else
-                       TEST_FAIL("no support for global offset\n");
+       if (!test_args.reserved) {
+               if (kvm_has_cap(KVM_CAP_COUNTER_OFFSET)) {
+                       struct kvm_arm_counter_offset offset = {
+                               .counter_offset = test_args.counter_offset,
+                               .reserved = 0,
+                       };
+                       vm_ioctl(vm, KVM_ARM_SET_COUNTER_OFFSET, &offset);
+               } else
+                       TEST_FAIL("no support for global offset");
         }
  
         for (i = 0; i < nr_vcpus; i++)
@@ -400,81 +218,8 @@ static struct kvm_vm *test_vm_create(void)
         return vm;
  }
  
-static void test_vm_cleanup(struct kvm_vm *vm)
+void test_vm_cleanup(struct kvm_vm *vm)
  {
         close(gic_fd);
         kvm_vm_free(vm);
  }
-
-static void test_print_help(char *name)
-{
-       pr_info("Usage: %s [-h] [-n nr_vcpus] [-i iterations] [-p timer_period_ms]\n",
-               name);
-       pr_info("\t-n: Number of vCPUs to configure (default: %u; max: %u)\n",
-               NR_VCPUS_DEF, KVM_MAX_VCPUS);
-       pr_info("\t-i: Number of iterations per stage (default: %u)\n",
-               NR_TEST_ITERS_DEF);
-       pr_info("\t-p: Periodicity (in ms) of the guest timer (default: %u)\n",
-               TIMER_TEST_PERIOD_MS_DEF);
-       pr_info("\t-m: Frequency (in ms) of vCPUs to migrate to different pCPU. 0 to turn off (default: %u)\n",
-               TIMER_TEST_MIGRATION_FREQ_MS);
-       pr_info("\t-o: Counter offset (in counter cycles, default: 0)\n");
-       pr_info("\t-h: print this help screen\n");
-}
-
-static bool parse_args(int argc, char *argv[])
-{
-       int opt;
-
-       while ((opt = getopt(argc, argv, "hn:i:p:m:o:")) != -1) {
-               switch (opt) {
-               case 'n':
-                       test_args.nr_vcpus = atoi_positive("Number of vCPUs", optarg);
-                       if (test_args.nr_vcpus > KVM_MAX_VCPUS) {
-                               pr_info("Max allowed vCPUs: %u\n",
-                                       KVM_MAX_VCPUS);
-                               goto err;
-                       }
-                       break;
-               case 'i':
-                       test_args.nr_iter = atoi_positive("Number of iterations", optarg);
-                       break;
-               case 'p':
-                       test_args.timer_period_ms = atoi_positive("Periodicity", optarg);
-                       break;
-               case 'm':
-                       test_args.migration_freq_ms = atoi_non_negative("Frequency", optarg);
-                       break;
-               case 'o':
-                       test_args.offset.counter_offset = strtol(optarg, NULL, 0);
-                       test_args.offset.reserved = 0;
-                       break;
-               case 'h':
-               default:
-                       goto err;
-               }
-       }
-
-       return true;
-
-err:
-       test_print_help(argv[0]);
-       return false;
-}
-
-int main(int argc, char *argv[])
-{
-       struct kvm_vm *vm;
-
-       if (!parse_args(argc, argv))
-               exit(KSFT_SKIP);
-
-       __TEST_REQUIRE(!test_args.migration_freq_ms || get_nprocs() >= 2,
-                      "At least two physical CPUs needed for vCPU migration");
-
-       vm = test_vm_create();
-       test_run(vm);
-       test_vm_cleanup(vm);
-
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c

index 866002917441c6bc10995a6e631ca7cac2405325..2582c49e525adf64ddc0275eec65333bd4587a14 100644 (file)
--- a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
+++ b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
@@ -365,7 +365,7 @@ static void guest_wp_handler(struct ex_regs *regs)
  
  static void guest_ss_handler(struct ex_regs *regs)
  {
-       __GUEST_ASSERT(ss_idx < 4, "Expected index < 4, got '%u'", ss_idx);
+       __GUEST_ASSERT(ss_idx < 4, "Expected index < 4, got '%lu'", ss_idx);
         ss_addr[ss_idx++] = regs->pc;
         regs->pstate |= SPSR_SS;
  }
diff --git a/tools/testing/selftests/kvm/aarch64/hypercalls.c b/tools/testing/selftests/kvm/aarch64/hypercalls.c

index 31f66ba97228babe46bf2c46640077065ded7241..9d192ce0078d6989f34d32b7e7e41326507338c2 100644 (file)
--- a/tools/testing/selftests/kvm/aarch64/hypercalls.c
+++ b/tools/testing/selftests/kvm/aarch64/hypercalls.c
@@ -105,12 +105,12 @@ static void guest_test_hvc(const struct test_hvc_info *hc_info)
                 case TEST_STAGE_HVC_IFACE_FEAT_DISABLED:
                 case TEST_STAGE_HVC_IFACE_FALSE_INFO:
                         __GUEST_ASSERT(res.a0 == SMCCC_RET_NOT_SUPPORTED,
-                                      "a0 = 0x%lx, func_id = 0x%x, arg1 = 0x%llx, stage = %u",
+                                      "a0 = 0x%lx, func_id = 0x%x, arg1 = 0x%lx, stage = %u",
                                         res.a0, hc_info->func_id, hc_info->arg1, stage);
                         break;
                 case TEST_STAGE_HVC_IFACE_FEAT_ENABLED:
                         __GUEST_ASSERT(res.a0 != SMCCC_RET_NOT_SUPPORTED,
-                                      "a0 = 0x%lx, func_id = 0x%x, arg1 = 0x%llx, stage = %u",
+                                      "a0 = 0x%lx, func_id = 0x%x, arg1 = 0x%lx, stage = %u",
                                         res.a0, hc_info->func_id, hc_info->arg1, stage);
                         break;
                 default:
@@ -175,18 +175,18 @@ static void test_fw_regs_before_vm_start(struct kvm_vcpu *vcpu)
                 /* First 'read' should be an upper limit of the features supported */
                 vcpu_get_reg(vcpu, reg_info->reg, &val);
                 TEST_ASSERT(val == FW_REG_ULIMIT_VAL(reg_info->max_feat_bit),
-                       "Expected all the features to be set for reg: 0x%lx; expected: 0x%lx; read: 0x%lx\n",
+                       "Expected all the features to be set for reg: 0x%lx; expected: 0x%lx; read: 0x%lx",
                         reg_info->reg, FW_REG_ULIMIT_VAL(reg_info->max_feat_bit), val);
  
                 /* Test a 'write' by disabling all the features of the register map */
                 ret = __vcpu_set_reg(vcpu, reg_info->reg, 0);
                 TEST_ASSERT(ret == 0,
-                       "Failed to clear all the features of reg: 0x%lx; ret: %d\n",
+                       "Failed to clear all the features of reg: 0x%lx; ret: %d",
                         reg_info->reg, errno);
  
                 vcpu_get_reg(vcpu, reg_info->reg, &val);
                 TEST_ASSERT(val == 0,
-                       "Expected all the features to be cleared for reg: 0x%lx\n", reg_info->reg);
+                       "Expected all the features to be cleared for reg: 0x%lx", reg_info->reg);
  
                 /*
                  * Test enabling a feature that's not supported.
@@ -195,7 +195,7 @@ static void test_fw_regs_before_vm_start(struct kvm_vcpu *vcpu)
                 if (reg_info->max_feat_bit < 63) {
                         ret = __vcpu_set_reg(vcpu, reg_info->reg, BIT(reg_info->max_feat_bit + 1));
                         TEST_ASSERT(ret != 0 && errno == EINVAL,
-                       "Unexpected behavior or return value (%d) while setting an unsupported feature for reg: 0x%lx\n",
+                       "Unexpected behavior or return value (%d) while setting an unsupported feature for reg: 0x%lx",
                         errno, reg_info->reg);
                 }
         }
@@ -216,7 +216,7 @@ static void test_fw_regs_after_vm_start(struct kvm_vcpu *vcpu)
                  */
                 vcpu_get_reg(vcpu, reg_info->reg, &val);
                 TEST_ASSERT(val == 0,
-                       "Expected all the features to be cleared for reg: 0x%lx\n",
+                       "Expected all the features to be cleared for reg: 0x%lx",
                         reg_info->reg);
  
                 /*
@@ -226,7 +226,7 @@ static void test_fw_regs_after_vm_start(struct kvm_vcpu *vcpu)
                  */
                 ret = __vcpu_set_reg(vcpu, reg_info->reg, FW_REG_ULIMIT_VAL(reg_info->max_feat_bit));
                 TEST_ASSERT(ret != 0 && errno == EBUSY,
-               "Unexpected behavior or return value (%d) while setting a feature while VM is running for reg: 0x%lx\n",
+               "Unexpected behavior or return value (%d) while setting a feature while VM is running for reg: 0x%lx",
                 errno, reg_info->reg);
         }
  }
@@ -265,7 +265,7 @@ static void test_guest_stage(struct kvm_vm **vm, struct kvm_vcpu **vcpu)
         case TEST_STAGE_HVC_IFACE_FALSE_INFO:
                 break;
         default:
-               TEST_FAIL("Unknown test stage: %d\n", prev_stage);
+               TEST_FAIL("Unknown test stage: %d", prev_stage);
         }
  }
  
@@ -294,7 +294,7 @@ static void test_run(void)
                         REPORT_GUEST_ASSERT(uc);
                         break;
                 default:
-                       TEST_FAIL("Unexpected guest exit\n");
+                       TEST_FAIL("Unexpected guest exit");
                 }
         }
  
diff --git a/tools/testing/selftests/kvm/aarch64/page_fault_test.c b/tools/testing/selftests/kvm/aarch64/page_fault_test.c

index 08a5ca5bed56a9f602c01c024b19771a5cc9e219..5972905275cfacee914b618348a20a3cef080011 100644 (file)
--- a/tools/testing/selftests/kvm/aarch64/page_fault_test.c
+++ b/tools/testing/selftests/kvm/aarch64/page_fault_test.c
@@ -292,7 +292,7 @@ static void guest_code(struct test_desc *test)
  
  static void no_dabt_handler(struct ex_regs *regs)
  {
-       GUEST_FAIL("Unexpected dabt, far_el1 = 0x%llx", read_sysreg(far_el1));
+       GUEST_FAIL("Unexpected dabt, far_el1 = 0x%lx", read_sysreg(far_el1));
  }
  
  static void no_iabt_handler(struct ex_regs *regs)
@@ -414,10 +414,10 @@ static bool punch_hole_in_backing_store(struct kvm_vm *vm,
         if (fd != -1) {
                 ret = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
                                 0, paging_size);
-               TEST_ASSERT(ret == 0, "fallocate failed\n");
+               TEST_ASSERT(ret == 0, "fallocate failed");
         } else {
                 ret = madvise(hva, paging_size, MADV_DONTNEED);
-               TEST_ASSERT(ret == 0, "madvise failed\n");
+               TEST_ASSERT(ret == 0, "madvise failed");
         }
  
         return true;
@@ -501,7 +501,7 @@ static bool handle_cmd(struct kvm_vm *vm, int cmd)
  
  void fail_vcpu_run_no_handler(int ret)
  {
-       TEST_FAIL("Unexpected vcpu run failure\n");
+       TEST_FAIL("Unexpected vcpu run failure");
  }
  
  void fail_vcpu_run_mmio_no_syndrome_handler(int ret)
diff --git a/tools/testing/selftests/kvm/aarch64/set_id_regs.c b/tools/testing/selftests/kvm/aarch64/set_id_regs.c

index bac05210b53927970eb91cbac3e985b8ba74c897..16e2338686c172c1e1ece67e405df3ab666f7ded 100644 (file)
--- a/tools/testing/selftests/kvm/aarch64/set_id_regs.c
+++ b/tools/testing/selftests/kvm/aarch64/set_id_regs.c
@@ -32,6 +32,10 @@ struct reg_ftr_bits {
         enum ftr_type type;
         uint8_t shift;
         uint64_t mask;
+       /*
+        * For FTR_EXACT, safe_val is used as the exact safe value.
+        * For FTR_LOWER_SAFE, safe_val is used as the minimal safe value.
+        */
         int64_t safe_val;
  };
  
@@ -65,13 +69,13 @@ struct test_feature_reg {
  
  static const struct reg_ftr_bits ftr_id_aa64dfr0_el1[] = {
         S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64DFR0_EL1, PMUVer, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64DFR0_EL1, DebugVer, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64DFR0_EL1, DebugVer, ID_AA64DFR0_EL1_DebugVer_IMP),
         REG_FTR_END,
  };
  
  static const struct reg_ftr_bits ftr_id_dfr0_el1[] = {
-       S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_DFR0_EL1, PerfMon, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_DFR0_EL1, CopDbg, 0),
+       S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_DFR0_EL1, PerfMon, ID_DFR0_EL1_PerfMon_PMUv3),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_DFR0_EL1, CopDbg, ID_DFR0_EL1_CopDbg_Armv8),
         REG_FTR_END,
  };
  
@@ -224,13 +228,13 @@ uint64_t get_safe_value(const struct reg_ftr_bits *ftr_bits, uint64_t ftr)
  {
         uint64_t ftr_max = GENMASK_ULL(ARM64_FEATURE_FIELD_BITS - 1, 0);
  
-       if (ftr_bits->type == FTR_UNSIGNED) {
+       if (ftr_bits->sign == FTR_UNSIGNED) {
                 switch (ftr_bits->type) {
                 case FTR_EXACT:
                         ftr = ftr_bits->safe_val;
                         break;
                 case FTR_LOWER_SAFE:
-                       if (ftr > 0)
+                       if (ftr > ftr_bits->safe_val)
                                 ftr--;
                         break;
                 case FTR_HIGHER_SAFE:
@@ -252,7 +256,7 @@ uint64_t get_safe_value(const struct reg_ftr_bits *ftr_bits, uint64_t ftr)
                         ftr = ftr_bits->safe_val;
                         break;
                 case FTR_LOWER_SAFE:
-                       if (ftr > 0)
+                       if (ftr > ftr_bits->safe_val)
                                 ftr--;
                         break;
                 case FTR_HIGHER_SAFE:
@@ -276,7 +280,7 @@ uint64_t get_invalid_value(const struct reg_ftr_bits *ftr_bits, uint64_t ftr)
  {
         uint64_t ftr_max = GENMASK_ULL(ARM64_FEATURE_FIELD_BITS - 1, 0);
  
-       if (ftr_bits->type == FTR_UNSIGNED) {
+       if (ftr_bits->sign == FTR_UNSIGNED) {
                 switch (ftr_bits->type) {
                 case FTR_EXACT:
                         ftr = max((uint64_t)ftr_bits->safe_val + 1, ftr + 1);
diff --git a/tools/testing/selftests/kvm/aarch64/smccc_filter.c b/tools/testing/selftests/kvm/aarch64/smccc_filter.c

index f4ceae9c89257d211bab149be149510898e02290..2d189f3da228cdb74b8a8976c0736abde18ce9b5 100644 (file)
--- a/tools/testing/selftests/kvm/aarch64/smccc_filter.c
+++ b/tools/testing/selftests/kvm/aarch64/smccc_filter.c
@@ -178,7 +178,7 @@ static void expect_call_denied(struct kvm_vcpu *vcpu)
         struct ucall uc;
  
         if (get_ucall(vcpu, &uc) != UCALL_SYNC)
-               TEST_FAIL("Unexpected ucall: %lu\n", uc.cmd);
+               TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
  
         TEST_ASSERT(uc.args[1] == SMCCC_RET_NOT_SUPPORTED,
                     "Unexpected SMCCC return code: %lu", uc.args[1]);
diff --git a/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c b/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c

index 9d51b56913496ed38f0413183b49380aba017ba3..f2fb0e3f14bca8cb8dc18a8a9d328f0cb1299803 100644 (file)
--- a/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c
+++ b/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c
@@ -93,22 +93,6 @@ static inline void write_sel_evtyper(int sel, unsigned long val)
         isb();
  }
  
-static inline void enable_counter(int idx)
-{
-       uint64_t v = read_sysreg(pmcntenset_el0);
-
-       write_sysreg(BIT(idx) | v, pmcntenset_el0);
-       isb();
-}
-
-static inline void disable_counter(int idx)
-{
-       uint64_t v = read_sysreg(pmcntenset_el0);
-
-       write_sysreg(BIT(idx) | v, pmcntenclr_el0);
-       isb();
-}
-
  static void pmu_disable_reset(void)
  {
         uint64_t pmcr = read_sysreg(pmcr_el0);
@@ -195,11 +179,11 @@ struct pmc_accessor pmc_accessors[] = {
                                                                                  \
         if (set_expected)                                                        \
                 __GUEST_ASSERT((_tval & mask),                                   \
-                               "tval: 0x%lx; mask: 0x%lx; set_expected: 0x%lx", \
+                               "tval: 0x%lx; mask: 0x%lx; set_expected: %u",    \
                                 _tval, mask, set_expected);                      \
         else                                                                     \
                 __GUEST_ASSERT(!(_tval & mask),                                  \
-                               "tval: 0x%lx; mask: 0x%lx; set_expected: 0x%lx", \
+                               "tval: 0x%lx; mask: 0x%lx; set_expected: %u",    \
                                 _tval, mask, set_expected);                      \
  }
  
@@ -286,7 +270,7 @@ static void test_access_pmc_regs(struct pmc_accessor *acc, int pmc_idx)
         acc->write_typer(pmc_idx, write_data);
         read_data = acc->read_typer(pmc_idx);
         __GUEST_ASSERT(read_data == write_data,
-                      "pmc_idx: 0x%lx; acc_idx: 0x%lx; read_data: 0x%lx; write_data: 0x%lx",
+                      "pmc_idx: 0x%x; acc_idx: 0x%lx; read_data: 0x%lx; write_data: 0x%lx",
                        pmc_idx, PMC_ACC_TO_IDX(acc), read_data, write_data);
  
         /*
@@ -297,14 +281,14 @@ static void test_access_pmc_regs(struct pmc_accessor *acc, int pmc_idx)
  
         /* The count value must be 0, as it is disabled and reset */
         __GUEST_ASSERT(read_data == 0,
-                      "pmc_idx: 0x%lx; acc_idx: 0x%lx; read_data: 0x%lx",
+                      "pmc_idx: 0x%x; acc_idx: 0x%lx; read_data: 0x%lx",
                        pmc_idx, PMC_ACC_TO_IDX(acc), read_data);
  
         write_data = read_data + pmc_idx + 0x12345;
         acc->write_cntr(pmc_idx, write_data);
         read_data = acc->read_cntr(pmc_idx);
         __GUEST_ASSERT(read_data == write_data,
-                      "pmc_idx: 0x%lx; acc_idx: 0x%lx; read_data: 0x%lx; write_data: 0x%lx",
+                      "pmc_idx: 0x%x; acc_idx: 0x%lx; read_data: 0x%lx; write_data: 0x%lx",
                        pmc_idx, PMC_ACC_TO_IDX(acc), read_data, write_data);
  }
  
@@ -379,7 +363,7 @@ static void guest_code(uint64_t expected_pmcr_n)
         int i, pmc;
  
         __GUEST_ASSERT(expected_pmcr_n <= ARMV8_PMU_MAX_GENERAL_COUNTERS,
-                       "Expected PMCR.N: 0x%lx; ARMv8 general counters: 0x%lx",
+                       "Expected PMCR.N: 0x%lx; ARMv8 general counters: 0x%x",
                         expected_pmcr_n, ARMV8_PMU_MAX_GENERAL_COUNTERS);
  
         pmcr = read_sysreg(pmcr_el0);
@@ -517,11 +501,11 @@ static void test_create_vpmu_vm_with_pmcr_n(uint64_t pmcr_n, bool expect_fail)
  
         if (expect_fail)
                 TEST_ASSERT(pmcr_orig == pmcr,
-                           "PMCR.N modified by KVM to a larger value (PMCR: 0x%lx) for pmcr_n: 0x%lx\n",
+                           "PMCR.N modified by KVM to a larger value (PMCR: 0x%lx) for pmcr_n: 0x%lx",
                             pmcr, pmcr_n);
         else
                 TEST_ASSERT(pmcr_n == get_pmcr_n(pmcr),
-                           "Failed to update PMCR.N to %lu (received: %lu)\n",
+                           "Failed to update PMCR.N to %lu (received: %lu)",
                             pmcr_n, get_pmcr_n(pmcr));
  }
  
@@ -594,12 +578,12 @@ static void run_pmregs_validity_test(uint64_t pmcr_n)
                  */
                 vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id), &reg_val);
                 TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0,
-                           "Initial read of set_reg: 0x%llx has unimplemented counters enabled: 0x%lx\n",
+                           "Initial read of set_reg: 0x%llx has unimplemented counters enabled: 0x%lx",
                             KVM_ARM64_SYS_REG(set_reg_id), reg_val);
  
                 vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(clr_reg_id), &reg_val);
                 TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0,
-                           "Initial read of clr_reg: 0x%llx has unimplemented counters enabled: 0x%lx\n",
+                           "Initial read of clr_reg: 0x%llx has unimplemented counters enabled: 0x%lx",
                             KVM_ARM64_SYS_REG(clr_reg_id), reg_val);
  
                 /*
@@ -611,12 +595,12 @@ static void run_pmregs_validity_test(uint64_t pmcr_n)
  
                 vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id), &reg_val);
                 TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0,
-                           "Read of set_reg: 0x%llx has unimplemented counters enabled: 0x%lx\n",
+                           "Read of set_reg: 0x%llx has unimplemented counters enabled: 0x%lx",
                             KVM_ARM64_SYS_REG(set_reg_id), reg_val);
  
                 vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(clr_reg_id), &reg_val);
                 TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0,
-                           "Read of clr_reg: 0x%llx has unimplemented counters enabled: 0x%lx\n",
+                           "Read of clr_reg: 0x%llx has unimplemented counters enabled: 0x%lx",
                             KVM_ARM64_SYS_REG(clr_reg_id), reg_val);
         }
  
diff --git a/tools/testing/selftests/kvm/arch_timer.c b/tools/testing/selftests/kvm/arch_timer.c

new file mode 100644 (file)

index 0000000..ae1f1a6
--- /dev/null
+++ b/tools/testing/selftests/kvm/arch_timer.c
@@ -0,0 +1,259 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * arch_timer.c - Tests the arch timer IRQ functionality
+ *
+ * The guest's main thread configures the timer interrupt and waits
+ * for it to fire, with a timeout equal to the timer period.
+ * It asserts that the timeout doesn't exceed the timer period plus
+ * a user configurable error margin(default to 100us)
+ *
+ * On the other hand, upon receipt of an interrupt, the guest's interrupt
+ * handler validates the interrupt by checking if the architectural state
+ * is in compliance with the specifications.
+ *
+ * The test provides command-line options to configure the timer's
+ * period (-p), number of vCPUs (-n), iterations per stage (-i) and timer
+ * interrupt arrival error margin (-e). To stress-test the timer stack
+ * even more, an option to migrate the vCPUs across pCPUs (-m), at a
+ * particular rate, is also provided.
+ *
+ * Copyright (c) 2021, Google LLC.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdlib.h>
+#include <pthread.h>
+#include <linux/sizes.h>
+#include <linux/bitmap.h>
+#include <sys/sysinfo.h>
+
+#include "timer_test.h"
+
+struct test_args test_args = {
+       .nr_vcpus = NR_VCPUS_DEF,
+       .nr_iter = NR_TEST_ITERS_DEF,
+       .timer_period_ms = TIMER_TEST_PERIOD_MS_DEF,
+       .migration_freq_ms = TIMER_TEST_MIGRATION_FREQ_MS,
+       .timer_err_margin_us = TIMER_TEST_ERR_MARGIN_US,
+       .reserved = 1,
+};
+
+struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
+struct test_vcpu_shared_data vcpu_shared_data[KVM_MAX_VCPUS];
+
+static pthread_t pt_vcpu_run[KVM_MAX_VCPUS];
+static unsigned long *vcpu_done_map;
+static pthread_mutex_t vcpu_done_map_lock;
+
+static void *test_vcpu_run(void *arg)
+{
+       unsigned int vcpu_idx = (unsigned long)arg;
+       struct ucall uc;
+       struct kvm_vcpu *vcpu = vcpus[vcpu_idx];
+       struct kvm_vm *vm = vcpu->vm;
+       struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[vcpu_idx];
+
+       vcpu_run(vcpu);
+
+       /* Currently, any exit from guest is an indication of completion */
+       pthread_mutex_lock(&vcpu_done_map_lock);
+       __set_bit(vcpu_idx, vcpu_done_map);
+       pthread_mutex_unlock(&vcpu_done_map_lock);
+
+       switch (get_ucall(vcpu, &uc)) {
+       case UCALL_SYNC:
+       case UCALL_DONE:
+               break;
+       case UCALL_ABORT:
+               sync_global_from_guest(vm, *shared_data);
+               fprintf(stderr, "Guest assert failed,  vcpu %u; stage; %u; iter: %u\n",
+                       vcpu_idx, shared_data->guest_stage, shared_data->nr_iter);
+               REPORT_GUEST_ASSERT(uc);
+               break;
+       default:
+               TEST_FAIL("Unexpected guest exit");
+       }
+
+       pr_info("PASS(vCPU-%d).\n", vcpu_idx);
+
+       return NULL;
+}
+
+static uint32_t test_get_pcpu(void)
+{
+       uint32_t pcpu;
+       unsigned int nproc_conf;
+       cpu_set_t online_cpuset;
+
+       nproc_conf = get_nprocs_conf();
+       sched_getaffinity(0, sizeof(cpu_set_t), &online_cpuset);
+
+       /* Randomly find an available pCPU to place a vCPU on */
+       do {
+               pcpu = rand() % nproc_conf;
+       } while (!CPU_ISSET(pcpu, &online_cpuset));
+
+       return pcpu;
+}
+
+static int test_migrate_vcpu(unsigned int vcpu_idx)
+{
+       int ret;
+       cpu_set_t cpuset;
+       uint32_t new_pcpu = test_get_pcpu();
+
+       CPU_ZERO(&cpuset);
+       CPU_SET(new_pcpu, &cpuset);
+
+       pr_debug("Migrating vCPU: %u to pCPU: %u\n", vcpu_idx, new_pcpu);
+
+       ret = pthread_setaffinity_np(pt_vcpu_run[vcpu_idx],
+                                    sizeof(cpuset), &cpuset);
+
+       /* Allow the error where the vCPU thread is already finished */
+       TEST_ASSERT(ret == 0 || ret == ESRCH,
+                   "Failed to migrate the vCPU:%u to pCPU: %u; ret: %d",
+                   vcpu_idx, new_pcpu, ret);
+
+       return ret;
+}
+
+static void *test_vcpu_migration(void *arg)
+{
+       unsigned int i, n_done;
+       bool vcpu_done;
+
+       do {
+               usleep(msecs_to_usecs(test_args.migration_freq_ms));
+
+               for (n_done = 0, i = 0; i < test_args.nr_vcpus; i++) {
+                       pthread_mutex_lock(&vcpu_done_map_lock);
+                       vcpu_done = test_bit(i, vcpu_done_map);
+                       pthread_mutex_unlock(&vcpu_done_map_lock);
+
+                       if (vcpu_done) {
+                               n_done++;
+                               continue;
+                       }
+
+                       test_migrate_vcpu(i);
+               }
+       } while (test_args.nr_vcpus != n_done);
+
+       return NULL;
+}
+
+static void test_run(struct kvm_vm *vm)
+{
+       pthread_t pt_vcpu_migration;
+       unsigned int i;
+       int ret;
+
+       pthread_mutex_init(&vcpu_done_map_lock, NULL);
+       vcpu_done_map = bitmap_zalloc(test_args.nr_vcpus);
+       TEST_ASSERT(vcpu_done_map, "Failed to allocate vcpu done bitmap");
+
+       for (i = 0; i < (unsigned long)test_args.nr_vcpus; i++) {
+               ret = pthread_create(&pt_vcpu_run[i], NULL, test_vcpu_run,
+                                    (void *)(unsigned long)i);
+               TEST_ASSERT(!ret, "Failed to create vCPU-%d pthread", i);
+       }
+
+       /* Spawn a thread to control the vCPU migrations */
+       if (test_args.migration_freq_ms) {
+               srand(time(NULL));
+
+               ret = pthread_create(&pt_vcpu_migration, NULL,
+                                       test_vcpu_migration, NULL);
+               TEST_ASSERT(!ret, "Failed to create the migration pthread");
+       }
+
+
+       for (i = 0; i < test_args.nr_vcpus; i++)
+               pthread_join(pt_vcpu_run[i], NULL);
+
+       if (test_args.migration_freq_ms)
+               pthread_join(pt_vcpu_migration, NULL);
+
+       bitmap_free(vcpu_done_map);
+}
+
+static void test_print_help(char *name)
+{
+       pr_info("Usage: %s [-h] [-n nr_vcpus] [-i iterations] [-p timer_period_ms]\n"
+               "\t\t    [-m migration_freq_ms] [-o counter_offset]\n"
+               "\t\t    [-e timer_err_margin_us]\n", name);
+       pr_info("\t-n: Number of vCPUs to configure (default: %u; max: %u)\n",
+               NR_VCPUS_DEF, KVM_MAX_VCPUS);
+       pr_info("\t-i: Number of iterations per stage (default: %u)\n",
+               NR_TEST_ITERS_DEF);
+       pr_info("\t-p: Periodicity (in ms) of the guest timer (default: %u)\n",
+               TIMER_TEST_PERIOD_MS_DEF);
+       pr_info("\t-m: Frequency (in ms) of vCPUs to migrate to different pCPU. 0 to turn off (default: %u)\n",
+               TIMER_TEST_MIGRATION_FREQ_MS);
+       pr_info("\t-o: Counter offset (in counter cycles, default: 0) [aarch64-only]\n");
+       pr_info("\t-e: Interrupt arrival error margin (in us) of the guest timer (default: %u)\n",
+               TIMER_TEST_ERR_MARGIN_US);
+       pr_info("\t-h: print this help screen\n");
+}
+
+static bool parse_args(int argc, char *argv[])
+{
+       int opt;
+
+       while ((opt = getopt(argc, argv, "hn:i:p:m:o:e:")) != -1) {
+               switch (opt) {
+               case 'n':
+                       test_args.nr_vcpus = atoi_positive("Number of vCPUs", optarg);
+                       if (test_args.nr_vcpus > KVM_MAX_VCPUS) {
+                               pr_info("Max allowed vCPUs: %u\n",
+                                       KVM_MAX_VCPUS);
+                               goto err;
+                       }
+                       break;
+               case 'i':
+                       test_args.nr_iter = atoi_positive("Number of iterations", optarg);
+                       break;
+               case 'p':
+                       test_args.timer_period_ms = atoi_positive("Periodicity", optarg);
+                       break;
+               case 'm':
+                       test_args.migration_freq_ms = atoi_non_negative("Frequency", optarg);
+                       break;
+               case 'e':
+                       test_args.timer_err_margin_us = atoi_non_negative("Error Margin", optarg);
+                       break;
+               case 'o':
+                       test_args.counter_offset = strtol(optarg, NULL, 0);
+                       test_args.reserved = 0;
+                       break;
+               case 'h':
+               default:
+                       goto err;
+               }
+       }
+
+       return true;
+
+err:
+       test_print_help(argv[0]);
+       return false;
+}
+
+int main(int argc, char *argv[])
+{
+       struct kvm_vm *vm;
+
+       if (!parse_args(argc, argv))
+               exit(KSFT_SKIP);
+
+       __TEST_REQUIRE(!test_args.migration_freq_ms || get_nprocs() >= 2,
+                      "At least two physical CPUs needed for vCPU migration");
+
+       vm = test_vm_create();
+       test_run(vm);
+       test_vm_cleanup(vm);
+
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c

index 09c116a82a8499d7b14dc60bdcb088dd6c6914c7..bf3609f718544fb2b5d6c116d8eec4b28c29ab98 100644 (file)
--- a/tools/testing/selftests/kvm/demand_paging_test.c
+++ b/tools/testing/selftests/kvm/demand_paging_test.c
@@ -45,10 +45,10 @@ static void vcpu_worker(struct memstress_vcpu_args *vcpu_args)
  
         /* Let the guest access its memory */
         ret = _vcpu_run(vcpu);
-       TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret);
+       TEST_ASSERT(ret == 0, "vcpu_run failed: %d", ret);
         if (get_ucall(vcpu, NULL) != UCALL_SYNC) {
                 TEST_ASSERT(false,
-                           "Invalid guest sync status: exit_reason=%s\n",
+                           "Invalid guest sync status: exit_reason=%s",
                             exit_reason_str(run->exit_reason));
         }
  
diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c

index d374dbcf9a535dbd9efc7316e9c63c4152010e7a..504f6fe980e8fd7bf57ae105ed574fd3bf62d583 100644 (file)
--- a/tools/testing/selftests/kvm/dirty_log_perf_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c
@@ -88,9 +88,9 @@ static void vcpu_worker(struct memstress_vcpu_args *vcpu_args)
                 ret = _vcpu_run(vcpu);
                 ts_diff = timespec_elapsed(start);
  
-               TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret);
+               TEST_ASSERT(ret == 0, "vcpu_run failed: %d", ret);
                 TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC,
-                           "Invalid guest sync status: exit_reason=%s\n",
+                           "Invalid guest sync status: exit_reason=%s",
                             exit_reason_str(run->exit_reason));
  
                 pr_debug("Got sync event from vCPU %d\n", vcpu_idx);
diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c

index 6cbecf4997676f327095a399a75dcfa514fca13c..eaad5b20854ccf095a4447245554f8ecd48e0506 100644 (file)
--- a/tools/testing/selftests/kvm/dirty_log_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_test.c
@@ -262,7 +262,7 @@ static void default_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err)
                     "vcpu run failed: errno=%d", err);
  
         TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC,
-                   "Invalid guest sync status: exit_reason=%s\n",
+                   "Invalid guest sync status: exit_reason=%s",
                     exit_reason_str(run->exit_reason));
  
         vcpu_handle_sync_stop();
@@ -376,7 +376,10 @@ static void dirty_ring_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot,
  
         cleared = kvm_vm_reset_dirty_ring(vcpu->vm);
  
-       /* Cleared pages should be the same as collected */
+       /*
+        * Cleared pages should be the same as collected, as KVM is supposed to
+        * clear only the entries that have been harvested.
+        */
         TEST_ASSERT(cleared == count, "Reset dirty pages (%u) mismatch "
                     "with collected (%u)", cleared, count);
  
@@ -410,17 +413,11 @@ static void dirty_ring_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err)
                 pr_info("vcpu continues now.\n");
         } else {
                 TEST_ASSERT(false, "Invalid guest sync status: "
-                           "exit_reason=%s\n",
+                           "exit_reason=%s",
                             exit_reason_str(run->exit_reason));
         }
  }
  
-static void dirty_ring_before_vcpu_join(void)
-{
-       /* Kick another round of vcpu just to make sure it will quit */
-       sem_post(&sem_vcpu_cont);
-}
-
  struct log_mode {
         const char *name;
         /* Return true if this mode is supported, otherwise false */
@@ -433,7 +430,6 @@ struct log_mode {
                                      uint32_t *ring_buf_idx);
         /* Hook to call when after each vcpu run */
         void (*after_vcpu_run)(struct kvm_vcpu *vcpu, int ret, int err);
-       void (*before_vcpu_join) (void);
  } log_modes[LOG_MODE_NUM] = {
         {
                 .name = "dirty-log",
@@ -452,7 +448,6 @@ struct log_mode {
                 .supported = dirty_ring_supported,
                 .create_vm_done = dirty_ring_create_vm_done,
                 .collect_dirty_pages = dirty_ring_collect_dirty_pages,
-               .before_vcpu_join = dirty_ring_before_vcpu_join,
                 .after_vcpu_run = dirty_ring_after_vcpu_run,
         },
  };
@@ -513,14 +508,6 @@ static void log_mode_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err)
                 mode->after_vcpu_run(vcpu, ret, err);
  }
  
-static void log_mode_before_vcpu_join(void)
-{
-       struct log_mode *mode = &log_modes[host_log_mode];
-
-       if (mode->before_vcpu_join)
-               mode->before_vcpu_join();
-}
-
  static void generate_random_array(uint64_t *guest_array, uint64_t size)
  {
         uint64_t i;
@@ -719,6 +706,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
         struct kvm_vm *vm;
         unsigned long *bmap;
         uint32_t ring_buf_idx = 0;
+       int sem_val;
  
         if (!log_mode_supported()) {
                 print_skip("Log mode '%s' not supported",
@@ -788,12 +776,22 @@ static void run_test(enum vm_guest_mode mode, void *arg)
         /* Start the iterations */
         iteration = 1;
         sync_global_to_guest(vm, iteration);
-       host_quit = false;
+       WRITE_ONCE(host_quit, false);
         host_dirty_count = 0;
         host_clear_count = 0;
         host_track_next_count = 0;
         WRITE_ONCE(dirty_ring_vcpu_ring_full, false);
  
+       /*
+        * Ensure the previous iteration didn't leave a dangling semaphore, i.e.
+        * that the main task and vCPU worker were synchronized and completed
+        * verification of all iterations.
+        */
+       sem_getvalue(&sem_vcpu_stop, &sem_val);
+       TEST_ASSERT_EQ(sem_val, 0);
+       sem_getvalue(&sem_vcpu_cont, &sem_val);
+       TEST_ASSERT_EQ(sem_val, 0);
+
         pthread_create(&vcpu_thread, NULL, vcpu_worker, vcpu);
  
         while (iteration < p->iterations) {
@@ -819,15 +817,21 @@ static void run_test(enum vm_guest_mode mode, void *arg)
                 assert(host_log_mode == LOG_MODE_DIRTY_RING ||
                        atomic_read(&vcpu_sync_stop_requested) == false);
                 vm_dirty_log_verify(mode, bmap);
-               sem_post(&sem_vcpu_cont);
  
-               iteration++;
+               /*
+                * Set host_quit before sem_vcpu_cont in the final iteration to
+                * ensure that the vCPU worker doesn't resume the guest.  As
+                * above, the dirty ring test may stop and wait even when not
+                * explicitly request to do so, i.e. would hang waiting for a
+                * "continue" if it's allowed to resume the guest.
+                */
+               if (++iteration == p->iterations)
+                       WRITE_ONCE(host_quit, true);
+
+               sem_post(&sem_vcpu_cont);
                 sync_global_to_guest(vm, iteration);
         }
  
-       /* Tell the vcpu thread to quit */
-       host_quit = true;
-       log_mode_before_vcpu_join();
         pthread_join(vcpu_thread, NULL);
  
         pr_info("Total bits checked: dirty (%"PRIu64"), clear (%"PRIu64"), "
diff --git a/tools/testing/selftests/kvm/get-reg-list.c b/tools/testing/selftests/kvm/get-reg-list.c

index 8274ef04301f6704528293206603efca9690b29c..91f05f78e8249124332e9a28fef53daf4e746fd1 100644 (file)
--- a/tools/testing/selftests/kvm/get-reg-list.c
+++ b/tools/testing/selftests/kvm/get-reg-list.c
@@ -152,7 +152,7 @@ static void check_supported(struct vcpu_reg_list *c)
                         continue;
  
                 __TEST_REQUIRE(kvm_has_cap(s->capability),
-                              "%s: %s not available, skipping tests\n",
+                              "%s: %s not available, skipping tests",
                                config_name(c), s->name);
         }
  }
diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c

index c78a98c1a915ce9a95261206ee6b10cb258b2955..92eae206baa62ec410460fd18938b2777da58775 100644 (file)
--- a/tools/testing/selftests/kvm/guest_memfd_test.c
+++ b/tools/testing/selftests/kvm/guest_memfd_test.c
@@ -167,6 +167,9 @@ static void test_create_guest_memfd_multiple(struct kvm_vm *vm)
         TEST_ASSERT(ret != -1, "memfd fstat should succeed");
         TEST_ASSERT(st1.st_size == 4096, "first memfd st_size should still match requested size");
         TEST_ASSERT(st1.st_ino != st2.st_ino, "different memfd should have different inode numbers");
+
+       close(fd2);
+       close(fd1);
  }
  
  int main(int argc, char *argv[])
diff --git a/tools/testing/selftests/kvm/guest_print_test.c b/tools/testing/selftests/kvm/guest_print_test.c

index 41230b74619023d447f48fc8555936e194ce9387..3502caa3590c6488442a015ff40e5fa1027e41e6 100644 (file)
--- a/tools/testing/selftests/kvm/guest_print_test.c
+++ b/tools/testing/selftests/kvm/guest_print_test.c
@@ -98,7 +98,7 @@ static void ucall_abort(const char *assert_msg, const char *expected_assert_msg)
         int offset = len_str - len_substr;
  
         TEST_ASSERT(len_substr <= len_str,
-                   "Expected '%s' to be a substring of '%s'\n",
+                   "Expected '%s' to be a substring of '%s'",
                     assert_msg, expected_assert_msg);
  
         TEST_ASSERT(strcmp(&assert_msg[offset], expected_assert_msg) == 0,
@@ -116,7 +116,7 @@ static void run_test(struct kvm_vcpu *vcpu, const char *expected_printf,
                 vcpu_run(vcpu);
  
                 TEST_ASSERT(run->exit_reason == UCALL_EXIT_REASON,
-                           "Unexpected exit reason: %u (%s),\n",
+                           "Unexpected exit reason: %u (%s),",
                             run->exit_reason, exit_reason_str(run->exit_reason));
  
                 switch (get_ucall(vcpu, &uc)) {
@@ -161,11 +161,11 @@ static void test_limits(void)
         vcpu_run(vcpu);
  
         TEST_ASSERT(run->exit_reason == UCALL_EXIT_REASON,
-                   "Unexpected exit reason: %u (%s),\n",
+                   "Unexpected exit reason: %u (%s),",
                     run->exit_reason, exit_reason_str(run->exit_reason));
  
         TEST_ASSERT(get_ucall(vcpu, &uc) == UCALL_ABORT,
-                   "Unexpected ucall command: %lu,  Expected: %u (UCALL_ABORT)\n",
+                   "Unexpected ucall command: %lu,  Expected: %u (UCALL_ABORT)",
                     uc.cmd, UCALL_ABORT);
  
         kvm_vm_free(vm);
diff --git a/tools/testing/selftests/kvm/hardware_disable_test.c b/tools/testing/selftests/kvm/hardware_disable_test.c

index f5d59b9934f184163e3e1e48578508d2a5255eae..decc521fc7603b1440cbb414f94acdf042ce1c5a 100644 (file)
--- a/tools/testing/selftests/kvm/hardware_disable_test.c
+++ b/tools/testing/selftests/kvm/hardware_disable_test.c
@@ -41,7 +41,7 @@ static void *run_vcpu(void *arg)
  
         vcpu_run(vcpu);
  
-       TEST_ASSERT(false, "%s: exited with reason %d: %s\n",
+       TEST_ASSERT(false, "%s: exited with reason %d: %s",
                     __func__, run->exit_reason,
                     exit_reason_str(run->exit_reason));
         pthread_exit(NULL);
@@ -55,7 +55,7 @@ static void *sleeping_thread(void *arg)
                 fd = open("/dev/null", O_RDWR);
                 close(fd);
         }
-       TEST_ASSERT(false, "%s: exited\n", __func__);
+       TEST_ASSERT(false, "%s: exited", __func__);
         pthread_exit(NULL);
  }
  
@@ -118,7 +118,7 @@ static void run_test(uint32_t run)
         for (i = 0; i < VCPU_NUM; ++i)
                 check_join(threads[i], &b);
         /* Should not be reached */
-       TEST_ASSERT(false, "%s: [%d] child escaped the ninja\n", __func__, run);
+       TEST_ASSERT(false, "%s: [%d] child escaped the ninja", __func__, run);
  }
  
  void wait_for_child_setup(pid_t pid)
diff --git a/tools/testing/selftests/kvm/include/aarch64/kvm_util_arch.h b/tools/testing/selftests/kvm/include/aarch64/kvm_util_arch.h

new file mode 100644 (file)

index 0000000..e43a57d
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/aarch64/kvm_util_arch.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UTIL_ARCH_H
+#define SELFTEST_KVM_UTIL_ARCH_H
+
+struct kvm_vm_arch {};
+
+#endif  // SELFTEST_KVM_UTIL_ARCH_H
diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/aarch64/processor.h

index cf20e44e86f2f9fb7feeccf88fdc93fecd7fbfd2..9e518b56282736caeaf549e1d504ce53f9fc64e3 100644 (file)
--- a/tools/testing/selftests/kvm/include/aarch64/processor.h
+++ b/tools/testing/selftests/kvm/include/aarch64/processor.h
@@ -226,8 +226,4 @@ void smccc_smc(uint32_t function_id, uint64_t arg0, uint64_t arg1,
                uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5,
                uint64_t arg6, struct arm_smccc_res *res);
  
-
-
-uint32_t guest_get_vcpuid(void);
-
  #endif /* SELFTEST_KVM_PROCESSOR_H */
diff --git a/tools/testing/selftests/kvm/include/kvm_test_harness.h b/tools/testing/selftests/kvm/include/kvm_test_harness.h

new file mode 100644 (file)

index 0000000..8f7c685
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/kvm_test_harness.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Macros for defining a KVM test
+ *
+ * Copyright (C) 2022, Google LLC.
+ */
+
+#ifndef SELFTEST_KVM_TEST_HARNESS_H
+#define SELFTEST_KVM_TEST_HARNESS_H
+
+#include "kselftest_harness.h"
+
+#define KVM_ONE_VCPU_TEST_SUITE(name)                                  \
+       FIXTURE(name) {                                                 \
+               struct kvm_vcpu *vcpu;                                  \
+       };                                                              \
+                                                                       \
+       FIXTURE_SETUP(name) {                                           \
+               (void)vm_create_with_one_vcpu(&self->vcpu, NULL);       \
+       }                                                               \
+                                                                       \
+       FIXTURE_TEARDOWN(name) {                                        \
+               kvm_vm_free(self->vcpu->vm);                            \
+       }
+
+#define KVM_ONE_VCPU_TEST(suite, test, guestcode)                      \
+static void __suite##_##test(struct kvm_vcpu *vcpu);                   \
+                                                                       \
+TEST_F(suite, test)                                                    \
+{                                                                      \
+       vcpu_arch_set_entry_point(self->vcpu, guestcode);               \
+       __suite##_##test(self->vcpu);                                   \
+}                                                                      \
+static void __suite##_##test(struct kvm_vcpu *vcpu)
+
+#endif /* SELFTEST_KVM_TEST_HARNESS_H */
diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h

index 9e5afc472c14268bbe629cb9c1baf6049b702457..3e0db283a46ad18dcd9a51c47aa35fa04616e8e0 100644 (file)
--- a/tools/testing/selftests/kvm/include/kvm_util_base.h
+++ b/tools/testing/selftests/kvm/include/kvm_util_base.h
@@ -18,9 +18,11 @@
  #include <linux/types.h>
  
  #include <asm/atomic.h>
+#include <asm/kvm.h>
  
  #include <sys/ioctl.h>
  
+#include "kvm_util_arch.h"
  #include "sparsebit.h"
  
  /*
@@ -46,6 +48,7 @@ typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */
  struct userspace_mem_region {
         struct kvm_userspace_memory_region2 region;
         struct sparsebit *unused_phy_pages;
+       struct sparsebit *protected_phy_pages;
         int fd;
         off_t offset;
         enum vm_mem_backing_src_type backing_src_type;
@@ -90,6 +93,7 @@ enum kvm_mem_region_type {
  struct kvm_vm {
         int mode;
         unsigned long type;
+       uint8_t subtype;
         int kvm_fd;
         int fd;
         unsigned int pgtable_levels;
@@ -111,6 +115,9 @@ struct kvm_vm {
         vm_vaddr_t idt;
         vm_vaddr_t handlers;
         uint32_t dirty_ring_size;
+       uint64_t gpa_tag_mask;
+
+       struct kvm_vm_arch arch;
  
         /* Cache of information for binary stats interface */
         int stats_fd;
@@ -191,10 +198,14 @@ enum vm_guest_mode {
  };
  
  struct vm_shape {
-       enum vm_guest_mode mode;
-       unsigned int type;
+       uint32_t type;
+       uint8_t  mode;
+       uint8_t  subtype;
+       uint16_t padding;
  };
  
+kvm_static_assert(sizeof(struct vm_shape) == sizeof(uint64_t));
+
  #define VM_TYPE_DEFAULT                        0
  
  #define VM_SHAPE(__mode)                       \
@@ -259,6 +270,10 @@ bool get_kvm_param_bool(const char *param);
  bool get_kvm_intel_param_bool(const char *param);
  bool get_kvm_amd_param_bool(const char *param);
  
+int get_kvm_param_integer(const char *param);
+int get_kvm_intel_param_integer(const char *param);
+int get_kvm_amd_param_integer(const char *param);
+
  unsigned int kvm_check_cap(long cap);
  
  static inline bool kvm_has_cap(long cap)
@@ -564,6 +579,13 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
                 uint64_t guest_paddr, uint32_t slot, uint64_t npages,
                 uint32_t flags, int guest_memfd_fd, uint64_t guest_memfd_offset);
  
+#ifndef vm_arch_has_protected_memory
+static inline bool vm_arch_has_protected_memory(struct kvm_vm *vm)
+{
+       return false;
+}
+#endif
+
  void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags);
  void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa);
  void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot);
@@ -573,6 +595,9 @@ vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_mi
  vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min);
  vm_vaddr_t __vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
                             enum kvm_mem_region_type type);
+vm_vaddr_t vm_vaddr_alloc_shared(struct kvm_vm *vm, size_t sz,
+                                vm_vaddr_t vaddr_min,
+                                enum kvm_mem_region_type type);
  vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages);
  vm_vaddr_t __vm_vaddr_alloc_page(struct kvm_vm *vm,
                                  enum kvm_mem_region_type type);
@@ -585,6 +610,12 @@ void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva);
  vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva);
  void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa);
  
+
+static inline vm_paddr_t vm_untag_gpa(struct kvm_vm *vm, vm_paddr_t gpa)
+{
+       return gpa & ~vm->gpa_tag_mask;
+}
+
  void vcpu_run(struct kvm_vcpu *vcpu);
  int _vcpu_run(struct kvm_vcpu *vcpu);
  
@@ -827,10 +858,23 @@ const char *exit_reason_str(unsigned int exit_reason);
  
  vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min,
                              uint32_t memslot);
-vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
-                             vm_paddr_t paddr_min, uint32_t memslot);
+vm_paddr_t __vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
+                               vm_paddr_t paddr_min, uint32_t memslot,
+                               bool protected);
  vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm);
  
+static inline vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
+                                           vm_paddr_t paddr_min, uint32_t memslot)
+{
+       /*
+        * By default, allocate memory as protected for VMs that support
+        * protected memory, as the majority of memory for such VMs is
+        * protected, i.e. using shared memory is effectively opt-in.
+        */
+       return __vm_phy_pages_alloc(vm, num, paddr_min, memslot,
+                                   vm_arch_has_protected_memory(vm));
+}
+
  /*
   * ____vm_create() does KVM_CREATE_VM and little else.  __vm_create() also
   * loads the test binary into guest memory and creates an IRQ chip (x86 only).
@@ -969,15 +1013,18 @@ static inline void vcpu_dump(FILE *stream, struct kvm_vcpu *vcpu,
   * Input Args:
   *   vm - Virtual Machine
   *   vcpu_id - The id of the VCPU to add to the VM.
- *   guest_code - The vCPU's entry point
   */
-struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
-                                 void *guest_code);
+struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id);
+void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code);
  
  static inline struct kvm_vcpu *vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
                                            void *guest_code)
  {
-       return vm_arch_vcpu_add(vm, vcpu_id, guest_code);
+       struct kvm_vcpu *vcpu = vm_arch_vcpu_add(vm, vcpu_id);
+
+       vcpu_arch_set_entry_point(vcpu, guest_code);
+
+       return vcpu;
  }
  
  /* Re-create a vCPU after restarting a VM, e.g. for state save/restore tests. */
@@ -1081,4 +1128,8 @@ void kvm_selftest_arch_init(void);
  
  void kvm_arch_vm_post_create(struct kvm_vm *vm);
  
+bool vm_is_gpa_protected(struct kvm_vm *vm, vm_paddr_t paddr);
+
+uint32_t guest_get_vcpuid(void);
+
  #endif /* SELFTEST_KVM_UTIL_BASE_H */
diff --git a/tools/testing/selftests/kvm/include/riscv/arch_timer.h b/tools/testing/selftests/kvm/include/riscv/arch_timer.h

new file mode 100644 (file)

index 0000000..225d81d
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/riscv/arch_timer.h
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * RISC-V Arch Timer(sstc) specific interface
+ *
+ * Copyright (c) 2024 Intel Corporation
+ */
+
+#ifndef SELFTEST_KVM_ARCH_TIMER_H
+#define SELFTEST_KVM_ARCH_TIMER_H
+
+#include <asm/csr.h>
+#include <asm/vdso/processor.h>
+
+static unsigned long timer_freq;
+
+#define msec_to_cycles(msec)   \
+       ((timer_freq) * (uint64_t)(msec) / 1000)
+
+#define usec_to_cycles(usec)   \
+       ((timer_freq) * (uint64_t)(usec) / 1000000)
+
+#define cycles_to_usec(cycles) \
+       ((uint64_t)(cycles) * 1000000 / (timer_freq))
+
+static inline uint64_t timer_get_cycles(void)
+{
+       return csr_read(CSR_TIME);
+}
+
+static inline void timer_set_cmp(uint64_t cval)
+{
+       csr_write(CSR_STIMECMP, cval);
+}
+
+static inline uint64_t timer_get_cmp(void)
+{
+       return csr_read(CSR_STIMECMP);
+}
+
+static inline void timer_irq_enable(void)
+{
+       csr_set(CSR_SIE, IE_TIE);
+}
+
+static inline void timer_irq_disable(void)
+{
+       csr_clear(CSR_SIE, IE_TIE);
+}
+
+static inline void timer_set_next_cmp_ms(uint32_t msec)
+{
+       uint64_t now_ct = timer_get_cycles();
+       uint64_t next_ct = now_ct + msec_to_cycles(msec);
+
+       timer_set_cmp(next_ct);
+}
+
+static inline void __delay(uint64_t cycles)
+{
+       uint64_t start = timer_get_cycles();
+
+       while ((timer_get_cycles() - start) < cycles)
+               cpu_relax();
+}
+
+static inline void udelay(unsigned long usec)
+{
+       __delay(usec_to_cycles(usec));
+}
+
+#endif /* SELFTEST_KVM_ARCH_TIMER_H */
diff --git a/tools/testing/selftests/kvm/include/riscv/kvm_util_arch.h b/tools/testing/selftests/kvm/include/riscv/kvm_util_arch.h

new file mode 100644 (file)

index 0000000..e43a57d
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/riscv/kvm_util_arch.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UTIL_ARCH_H
+#define SELFTEST_KVM_UTIL_ARCH_H
+
+struct kvm_vm_arch {};
+
+#endif  // SELFTEST_KVM_UTIL_ARCH_H
diff --git a/tools/testing/selftests/kvm/include/riscv/processor.h b/tools/testing/selftests/kvm/include/riscv/processor.h

index a0f9efe5a2a8de6afda4d4531f1ca6bda22f4b9c..ce473fe251dde487a1775538a85c2b4166704012 100644 (file)
--- a/tools/testing/selftests/kvm/include/riscv/processor.h
+++ b/tools/testing/selftests/kvm/include/riscv/processor.h
@@ -7,8 +7,9 @@
  #ifndef SELFTEST_KVM_PROCESSOR_H
  #define SELFTEST_KVM_PROCESSOR_H
  
-#include "kvm_util.h"
  #include <linux/stringify.h>
+#include <asm/csr.h>
+#include "kvm_util.h"
  
  static inline uint64_t __kvm_reg_id(uint64_t type, uint64_t subtype,
                                     uint64_t idx, uint64_t size)
@@ -47,6 +48,58 @@ static inline uint64_t __kvm_reg_id(uint64_t type, uint64_t subtype,
                                                      KVM_REG_RISCV_SBI_SINGLE,          \
                                                      idx, KVM_REG_SIZE_ULONG)
  
+bool __vcpu_has_ext(struct kvm_vcpu *vcpu, uint64_t ext);
+
+struct ex_regs {
+       unsigned long ra;
+       unsigned long sp;
+       unsigned long gp;
+       unsigned long tp;
+       unsigned long t0;
+       unsigned long t1;
+       unsigned long t2;
+       unsigned long s0;
+       unsigned long s1;
+       unsigned long a0;
+       unsigned long a1;
+       unsigned long a2;
+       unsigned long a3;
+       unsigned long a4;
+       unsigned long a5;
+       unsigned long a6;
+       unsigned long a7;
+       unsigned long s2;
+       unsigned long s3;
+       unsigned long s4;
+       unsigned long s5;
+       unsigned long s6;
+       unsigned long s7;
+       unsigned long s8;
+       unsigned long s9;
+       unsigned long s10;
+       unsigned long s11;
+       unsigned long t3;
+       unsigned long t4;
+       unsigned long t5;
+       unsigned long t6;
+       unsigned long epc;
+       unsigned long status;
+       unsigned long cause;
+};
+
+#define NR_VECTORS  2
+#define NR_EXCEPTIONS  32
+#define EC_MASK  (NR_EXCEPTIONS - 1)
+
+typedef void(*exception_handler_fn)(struct ex_regs *);
+
+void vm_init_vector_tables(struct kvm_vm *vm);
+void vcpu_init_vector_tables(struct kvm_vcpu *vcpu);
+
+void vm_install_exception_handler(struct kvm_vm *vm, int vector, exception_handler_fn handler);
+
+void vm_install_interrupt_handler(struct kvm_vm *vm, exception_handler_fn handler);
+
  /* L3 index Bit[47:39] */
  #define PGTBL_L3_INDEX_MASK                    0x0000FF8000000000ULL
  #define PGTBL_L3_INDEX_SHIFT                   39
@@ -101,13 +154,6 @@ static inline uint64_t __kvm_reg_id(uint64_t type, uint64_t subtype,
  #define PGTBL_PAGE_SIZE                                PGTBL_L0_BLOCK_SIZE
  #define PGTBL_PAGE_SIZE_SHIFT                  PGTBL_L0_BLOCK_SHIFT
  
-#define SATP_PPN                               _AC(0x00000FFFFFFFFFFF, UL)
-#define SATP_MODE_39                           _AC(0x8000000000000000, UL)
-#define SATP_MODE_48                           _AC(0x9000000000000000, UL)
-#define SATP_ASID_BITS                         16
-#define SATP_ASID_SHIFT                                44
-#define SATP_ASID_MASK                         _AC(0xFFFF, UL)
-
  /* SBI return error codes */
  #define SBI_SUCCESS                            0
  #define SBI_ERR_FAILURE                                -1
@@ -147,4 +193,14 @@ struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0,
  
  bool guest_sbi_probe_extension(int extid, long *out_val);
  
+static inline void local_irq_enable(void)
+{
+       csr_set(CSR_SSTATUS, SR_SIE);
+}
+
+static inline void local_irq_disable(void)
+{
+       csr_clear(CSR_SSTATUS, SR_SIE);
+}
+
  #endif /* SELFTEST_KVM_PROCESSOR_H */
diff --git a/tools/testing/selftests/kvm/include/s390x/kvm_util_arch.h b/tools/testing/selftests/kvm/include/s390x/kvm_util_arch.h

new file mode 100644 (file)

index 0000000..e43a57d
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/s390x/kvm_util_arch.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UTIL_ARCH_H
+#define SELFTEST_KVM_UTIL_ARCH_H
+
+struct kvm_vm_arch {};
+
+#endif  // SELFTEST_KVM_UTIL_ARCH_H
diff --git a/tools/testing/selftests/kvm/include/sparsebit.h b/tools/testing/selftests/kvm/include/sparsebit.h

index 12a9a4b9ceadc20d6421b9eafce2dc02e1bb002a..bc760761e1a320fff163a83e620874ed6be2daed 100644 (file)
--- a/tools/testing/selftests/kvm/include/sparsebit.h
+++ b/tools/testing/selftests/kvm/include/sparsebit.h
@@ -30,26 +30,26 @@ typedef uint64_t sparsebit_num_t;
  
  struct sparsebit *sparsebit_alloc(void);
  void sparsebit_free(struct sparsebit **sbitp);
-void sparsebit_copy(struct sparsebit *dstp, struct sparsebit *src);
+void sparsebit_copy(struct sparsebit *dstp, const struct sparsebit *src);
  
-bool sparsebit_is_set(struct sparsebit *sbit, sparsebit_idx_t idx);
-bool sparsebit_is_set_num(struct sparsebit *sbit,
+bool sparsebit_is_set(const struct sparsebit *sbit, sparsebit_idx_t idx);
+bool sparsebit_is_set_num(const struct sparsebit *sbit,
                           sparsebit_idx_t idx, sparsebit_num_t num);
-bool sparsebit_is_clear(struct sparsebit *sbit, sparsebit_idx_t idx);
-bool sparsebit_is_clear_num(struct sparsebit *sbit,
+bool sparsebit_is_clear(const struct sparsebit *sbit, sparsebit_idx_t idx);
+bool sparsebit_is_clear_num(const struct sparsebit *sbit,
                             sparsebit_idx_t idx, sparsebit_num_t num);
-sparsebit_num_t sparsebit_num_set(struct sparsebit *sbit);
-bool sparsebit_any_set(struct sparsebit *sbit);
-bool sparsebit_any_clear(struct sparsebit *sbit);
-bool sparsebit_all_set(struct sparsebit *sbit);
-bool sparsebit_all_clear(struct sparsebit *sbit);
-sparsebit_idx_t sparsebit_first_set(struct sparsebit *sbit);
-sparsebit_idx_t sparsebit_first_clear(struct sparsebit *sbit);
-sparsebit_idx_t sparsebit_next_set(struct sparsebit *sbit, sparsebit_idx_t prev);
-sparsebit_idx_t sparsebit_next_clear(struct sparsebit *sbit, sparsebit_idx_t prev);
-sparsebit_idx_t sparsebit_next_set_num(struct sparsebit *sbit,
+sparsebit_num_t sparsebit_num_set(const struct sparsebit *sbit);
+bool sparsebit_any_set(const struct sparsebit *sbit);
+bool sparsebit_any_clear(const struct sparsebit *sbit);
+bool sparsebit_all_set(const struct sparsebit *sbit);
+bool sparsebit_all_clear(const struct sparsebit *sbit);
+sparsebit_idx_t sparsebit_first_set(const struct sparsebit *sbit);
+sparsebit_idx_t sparsebit_first_clear(const struct sparsebit *sbit);
+sparsebit_idx_t sparsebit_next_set(const struct sparsebit *sbit, sparsebit_idx_t prev);
+sparsebit_idx_t sparsebit_next_clear(const struct sparsebit *sbit, sparsebit_idx_t prev);
+sparsebit_idx_t sparsebit_next_set_num(const struct sparsebit *sbit,
                                        sparsebit_idx_t start, sparsebit_num_t num);
-sparsebit_idx_t sparsebit_next_clear_num(struct sparsebit *sbit,
+sparsebit_idx_t sparsebit_next_clear_num(const struct sparsebit *sbit,
                                          sparsebit_idx_t start, sparsebit_num_t num);
  
  void sparsebit_set(struct sparsebit *sbitp, sparsebit_idx_t idx);
@@ -62,9 +62,29 @@ void sparsebit_clear_num(struct sparsebit *sbitp,
                          sparsebit_idx_t start, sparsebit_num_t num);
  void sparsebit_clear_all(struct sparsebit *sbitp);
  
-void sparsebit_dump(FILE *stream, struct sparsebit *sbit,
+void sparsebit_dump(FILE *stream, const struct sparsebit *sbit,
                     unsigned int indent);
-void sparsebit_validate_internal(struct sparsebit *sbit);
+void sparsebit_validate_internal(const struct sparsebit *sbit);
+
+/*
+ * Iterate over an inclusive ranges within sparsebit @s. In each iteration,
+ * @range_begin and @range_end will take the beginning and end of the set
+ * range, which are of type sparsebit_idx_t.
+ *
+ * For example, if the range [3, 7] (inclusive) is set, within the
+ * iteration,@range_begin will take the value 3 and @range_end will take
+ * the value 7.
+ *
+ * Ensure that there is at least one bit set before using this macro with
+ * sparsebit_any_set(), because sparsebit_first_set() will abort if none
+ * are set.
+ */
+#define sparsebit_for_each_set_range(s, range_begin, range_end)         \
+       for (range_begin = sparsebit_first_set(s),                      \
+            range_end = sparsebit_next_clear(s, range_begin) - 1;      \
+            range_begin && range_end;                                  \
+            range_begin = sparsebit_next_set(s, range_end),            \
+            range_end = sparsebit_next_clear(s, range_begin) - 1)
  
  #ifdef __cplusplus
  }
diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h

index 71a41fa924b7d09cb1a3aaf9bcc779d7d3311110..8a6e30612c86269575bdf7fee6449a52098d374f 100644 (file)
--- a/tools/testing/selftests/kvm/include/test_util.h
+++ b/tools/testing/selftests/kvm/include/test_util.h
@@ -20,6 +20,8 @@
  #include <sys/mman.h>
  #include "kselftest.h"
  
+#define msecs_to_usecs(msec)    ((msec) * 1000ULL)
+
  static inline int _no_printf(const char *format, ...) { return 0; }
  
  #ifdef DEBUG
@@ -195,4 +197,6 @@ __printf(3, 4) int guest_snprintf(char *buf, int n, const char *fmt, ...);
  
  char *strdup_printf(const char *fmt, ...) __attribute__((format(printf, 1, 2), nonnull(1)));
  
+char *sys_get_cur_clocksource(void);
+
  #endif /* SELFTEST_KVM_TEST_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/timer_test.h b/tools/testing/selftests/kvm/include/timer_test.h

new file mode 100644 (file)

index 0000000..9b6edaa
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/timer_test.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * timer test specific header
+ *
+ * Copyright (C) 2018, Google LLC
+ */
+
+#ifndef SELFTEST_KVM_TIMER_TEST_H
+#define SELFTEST_KVM_TIMER_TEST_H
+
+#include "kvm_util.h"
+
+#define NR_VCPUS_DEF            4
+#define NR_TEST_ITERS_DEF       5
+#define TIMER_TEST_PERIOD_MS_DEF    10
+#define TIMER_TEST_ERR_MARGIN_US    100
+#define TIMER_TEST_MIGRATION_FREQ_MS    2
+
+/* Timer test cmdline parameters */
+struct test_args {
+       uint32_t nr_vcpus;
+       uint32_t nr_iter;
+       uint32_t timer_period_ms;
+       uint32_t migration_freq_ms;
+       uint32_t timer_err_margin_us;
+       /* Members of struct kvm_arm_counter_offset */
+       uint64_t counter_offset;
+       uint64_t reserved;
+};
+
+/* Shared variables between host and guest */
+struct test_vcpu_shared_data {
+       uint32_t nr_iter;
+       int guest_stage;
+       uint64_t xcnt;
+};
+
+extern struct test_args test_args;
+extern struct kvm_vcpu *vcpus[];
+extern struct test_vcpu_shared_data vcpu_shared_data[];
+
+struct kvm_vm *test_vm_create(void);
+void test_vm_cleanup(struct kvm_vm *vm);
+
+#endif /* SELFTEST_KVM_TIMER_TEST_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/kvm_util_arch.h b/tools/testing/selftests/kvm/include/x86_64/kvm_util_arch.h

new file mode 100644 (file)

index 0000000..9f17251
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/x86_64/kvm_util_arch.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UTIL_ARCH_H
+#define SELFTEST_KVM_UTIL_ARCH_H
+
+#include <stdbool.h>
+#include <stdint.h>
+
+struct kvm_vm_arch {
+       uint64_t c_bit;
+       uint64_t s_bit;
+       int sev_fd;
+       bool is_pt_protected;
+};
+
+static inline bool __vm_arch_has_protected_memory(struct kvm_vm_arch *arch)
+{
+       return arch->c_bit || arch->s_bit;
+}
+
+#define vm_arch_has_protected_memory(vm) \
+       __vm_arch_has_protected_memory(&(vm)->arch)
+
+#endif  // SELFTEST_KVM_UTIL_ARCH_H
diff --git a/tools/testing/selftests/kvm/include/x86_64/pmu.h b/tools/testing/selftests/kvm/include/x86_64/pmu.h

new file mode 100644 (file)

index 0000000..3c10c4d
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/x86_64/pmu.h
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2023, Tencent, Inc.
+ */
+#ifndef SELFTEST_KVM_PMU_H
+#define SELFTEST_KVM_PMU_H
+
+#include <stdint.h>
+
+#define KVM_PMU_EVENT_FILTER_MAX_EVENTS                        300
+
+/*
+ * Encode an eventsel+umask pair into event-select MSR format.  Note, this is
+ * technically AMD's format, as Intel's format only supports 8 bits for the
+ * event selector, i.e. doesn't use bits 24:16 for the selector.  But, OR-ing
+ * in '0' is a nop and won't clobber the CMASK.
+ */
+#define RAW_EVENT(eventsel, umask) (((eventsel & 0xf00UL) << 24) |     \
+                                   ((eventsel) & 0xff) |               \
+                                   ((umask) & 0xff) << 8)
+
+/*
+ * These are technically Intel's definitions, but except for CMASK (see above),
+ * AMD's layout is compatible with Intel's.
+ */
+#define ARCH_PERFMON_EVENTSEL_EVENT            GENMASK_ULL(7, 0)
+#define ARCH_PERFMON_EVENTSEL_UMASK            GENMASK_ULL(15, 8)
+#define ARCH_PERFMON_EVENTSEL_USR              BIT_ULL(16)
+#define ARCH_PERFMON_EVENTSEL_OS               BIT_ULL(17)
+#define ARCH_PERFMON_EVENTSEL_EDGE             BIT_ULL(18)
+#define ARCH_PERFMON_EVENTSEL_PIN_CONTROL      BIT_ULL(19)
+#define ARCH_PERFMON_EVENTSEL_INT              BIT_ULL(20)
+#define ARCH_PERFMON_EVENTSEL_ANY              BIT_ULL(21)
+#define ARCH_PERFMON_EVENTSEL_ENABLE           BIT_ULL(22)
+#define ARCH_PERFMON_EVENTSEL_INV              BIT_ULL(23)
+#define ARCH_PERFMON_EVENTSEL_CMASK            GENMASK_ULL(31, 24)
+
+/* RDPMC control flags, Intel only. */
+#define INTEL_RDPMC_METRICS                    BIT_ULL(29)
+#define INTEL_RDPMC_FIXED                      BIT_ULL(30)
+#define INTEL_RDPMC_FAST                       BIT_ULL(31)
+
+/* Fixed PMC controls, Intel only. */
+#define FIXED_PMC_GLOBAL_CTRL_ENABLE(_idx)     BIT_ULL((32 + (_idx)))
+
+#define FIXED_PMC_KERNEL                       BIT_ULL(0)
+#define FIXED_PMC_USER                         BIT_ULL(1)
+#define FIXED_PMC_ANYTHREAD                    BIT_ULL(2)
+#define FIXED_PMC_ENABLE_PMI                   BIT_ULL(3)
+#define FIXED_PMC_NR_BITS                      4
+#define FIXED_PMC_CTRL(_idx, _val)             ((_val) << ((_idx) * FIXED_PMC_NR_BITS))
+
+#define PMU_CAP_FW_WRITES                      BIT_ULL(13)
+#define PMU_CAP_LBR_FMT                                0x3f
+
+#define        INTEL_ARCH_CPU_CYCLES                   RAW_EVENT(0x3c, 0x00)
+#define        INTEL_ARCH_INSTRUCTIONS_RETIRED         RAW_EVENT(0xc0, 0x00)
+#define        INTEL_ARCH_REFERENCE_CYCLES             RAW_EVENT(0x3c, 0x01)
+#define        INTEL_ARCH_LLC_REFERENCES               RAW_EVENT(0x2e, 0x4f)
+#define        INTEL_ARCH_LLC_MISSES                   RAW_EVENT(0x2e, 0x41)
+#define        INTEL_ARCH_BRANCHES_RETIRED             RAW_EVENT(0xc4, 0x00)
+#define        INTEL_ARCH_BRANCHES_MISPREDICTED        RAW_EVENT(0xc5, 0x00)
+#define        INTEL_ARCH_TOPDOWN_SLOTS                RAW_EVENT(0xa4, 0x01)
+
+#define        AMD_ZEN_CORE_CYCLES                     RAW_EVENT(0x76, 0x00)
+#define        AMD_ZEN_INSTRUCTIONS_RETIRED            RAW_EVENT(0xc0, 0x00)
+#define        AMD_ZEN_BRANCHES_RETIRED                RAW_EVENT(0xc2, 0x00)
+#define        AMD_ZEN_BRANCHES_MISPREDICTED           RAW_EVENT(0xc3, 0x00)
+
+/*
+ * Note!  The order and thus the index of the architectural events matters as
+ * support for each event is enumerated via CPUID using the index of the event.
+ */
+enum intel_pmu_architectural_events {
+       INTEL_ARCH_CPU_CYCLES_INDEX,
+       INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX,
+       INTEL_ARCH_REFERENCE_CYCLES_INDEX,
+       INTEL_ARCH_LLC_REFERENCES_INDEX,
+       INTEL_ARCH_LLC_MISSES_INDEX,
+       INTEL_ARCH_BRANCHES_RETIRED_INDEX,
+       INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX,
+       INTEL_ARCH_TOPDOWN_SLOTS_INDEX,
+       NR_INTEL_ARCH_EVENTS,
+};
+
+enum amd_pmu_zen_events {
+       AMD_ZEN_CORE_CYCLES_INDEX,
+       AMD_ZEN_INSTRUCTIONS_INDEX,
+       AMD_ZEN_BRANCHES_INDEX,
+       AMD_ZEN_BRANCH_MISSES_INDEX,
+       NR_AMD_ZEN_EVENTS,
+};
+
+extern const uint64_t intel_pmu_arch_events[];
+extern const uint64_t amd_pmu_zen_events[];
+
+#endif /* SELFTEST_KVM_PMU_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h

index a84863503fcb46cda532840f3be4512cf35061c3..3bd03b088dda605348c7f85fc8d190ef63cf9e5e 100644 (file)
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -23,6 +23,15 @@
  extern bool host_cpu_is_intel;
  extern bool host_cpu_is_amd;
  
+enum vm_guest_x86_subtype {
+       VM_SUBTYPE_NONE = 0,
+       VM_SUBTYPE_SEV,
+       VM_SUBTYPE_SEV_ES,
+};
+
+/* Forced emulation prefix, used to invoke the emulator unconditionally. */
+#define KVM_FEP "ud2; .byte 'k', 'v', 'm';"
+
  #define NMI_VECTOR             0x02
  
  #define X86_EFLAGS_FIXED        (1u << 1)
@@ -273,6 +282,7 @@ struct kvm_x86_cpu_property {
  #define X86_PROPERTY_MAX_EXT_LEAF              KVM_X86_CPU_PROPERTY(0x80000000, 0, EAX, 0, 31)
  #define X86_PROPERTY_MAX_PHY_ADDR              KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 0, 7)
  #define X86_PROPERTY_MAX_VIRT_ADDR             KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 8, 15)
+#define X86_PROPERTY_SEV_C_BIT                 KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 0, 5)
  #define X86_PROPERTY_PHYS_ADDR_REDUCTION       KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 6, 11)
  
  #define X86_PROPERTY_MAX_CENTAUR_LEAF          KVM_X86_CPU_PROPERTY(0xC0000000, 0, EAX, 0, 31)
@@ -282,24 +292,41 @@ struct kvm_x86_cpu_property {
   * that indicates the feature is _not_ supported, and a property that states
   * the length of the bit mask of unsupported features.  A feature is supported
   * if the size of the bit mask is larger than the "unavailable" bit, and said
- * bit is not set.
+ * bit is not set.  Fixed counters also bizarre enumeration, but inverted from
+ * arch events for general purpose counters.  Fixed counters are supported if a
+ * feature flag is set **OR** the total number of fixed counters is greater
+ * than index of the counter.
   *
- * Wrap the "unavailable" feature to simplify checking whether or not a given
- * architectural event is supported.
+ * Wrap the events for general purpose and fixed counters to simplify checking
+ * whether or not a given architectural event is supported.
   */
  struct kvm_x86_pmu_feature {
-       struct kvm_x86_cpu_feature anti_feature;
+       struct kvm_x86_cpu_feature f;
  };
-#define        KVM_X86_PMU_FEATURE(name, __bit)                                        \
-({                                                                             \
-       struct kvm_x86_pmu_feature feature = {                                  \
-               .anti_feature = KVM_X86_CPU_FEATURE(0xa, 0, EBX, __bit),        \
-       };                                                                      \
-                                                                               \
-       feature;                                                                \
+#define        KVM_X86_PMU_FEATURE(__reg, __bit)                               \
+({                                                                     \
+       struct kvm_x86_pmu_feature feature = {                          \
+               .f = KVM_X86_CPU_FEATURE(0xa, 0, __reg, __bit),         \
+       };                                                              \
+                                                                       \
+       kvm_static_assert(KVM_CPUID_##__reg == KVM_CPUID_EBX ||         \
+                         KVM_CPUID_##__reg == KVM_CPUID_ECX);          \
+       feature;                                                        \
  })
  
-#define X86_PMU_FEATURE_BRANCH_INSNS_RETIRED   KVM_X86_PMU_FEATURE(BRANCH_INSNS_RETIRED, 5)
+#define X86_PMU_FEATURE_CPU_CYCLES                     KVM_X86_PMU_FEATURE(EBX, 0)
+#define X86_PMU_FEATURE_INSNS_RETIRED                  KVM_X86_PMU_FEATURE(EBX, 1)
+#define X86_PMU_FEATURE_REFERENCE_CYCLES               KVM_X86_PMU_FEATURE(EBX, 2)
+#define X86_PMU_FEATURE_LLC_REFERENCES                 KVM_X86_PMU_FEATURE(EBX, 3)
+#define X86_PMU_FEATURE_LLC_MISSES                     KVM_X86_PMU_FEATURE(EBX, 4)
+#define X86_PMU_FEATURE_BRANCH_INSNS_RETIRED           KVM_X86_PMU_FEATURE(EBX, 5)
+#define X86_PMU_FEATURE_BRANCHES_MISPREDICTED          KVM_X86_PMU_FEATURE(EBX, 6)
+#define X86_PMU_FEATURE_TOPDOWN_SLOTS                  KVM_X86_PMU_FEATURE(EBX, 7)
+
+#define X86_PMU_FEATURE_INSNS_RETIRED_FIXED            KVM_X86_PMU_FEATURE(ECX, 0)
+#define X86_PMU_FEATURE_CPU_CYCLES_FIXED               KVM_X86_PMU_FEATURE(ECX, 1)
+#define X86_PMU_FEATURE_REFERENCE_TSC_CYCLES_FIXED     KVM_X86_PMU_FEATURE(ECX, 2)
+#define X86_PMU_FEATURE_TOPDOWN_SLOTS_FIXED            KVM_X86_PMU_FEATURE(ECX, 3)
  
  static inline unsigned int x86_family(unsigned int eax)
  {
@@ -698,10 +725,16 @@ static __always_inline bool this_cpu_has_p(struct kvm_x86_cpu_property property)
  
  static inline bool this_pmu_has(struct kvm_x86_pmu_feature feature)
  {
-       uint32_t nr_bits = this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH);
+       uint32_t nr_bits;
  
-       return nr_bits > feature.anti_feature.bit &&
-              !this_cpu_has(feature.anti_feature);
+       if (feature.f.reg == KVM_CPUID_EBX) {
+               nr_bits = this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH);
+               return nr_bits > feature.f.bit && !this_cpu_has(feature.f);
+       }
+
+       GUEST_ASSERT(feature.f.reg == KVM_CPUID_ECX);
+       nr_bits = this_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
+       return nr_bits > feature.f.bit || this_cpu_has(feature.f);
  }
  
  static __always_inline uint64_t this_cpu_supported_xcr0(void)
@@ -917,10 +950,16 @@ static __always_inline bool kvm_cpu_has_p(struct kvm_x86_cpu_property property)
  
  static inline bool kvm_pmu_has(struct kvm_x86_pmu_feature feature)
  {
-       uint32_t nr_bits = kvm_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH);
+       uint32_t nr_bits;
+
+       if (feature.f.reg == KVM_CPUID_EBX) {
+               nr_bits = kvm_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH);
+               return nr_bits > feature.f.bit && !kvm_cpu_has(feature.f);
+       }
  
-       return nr_bits > feature.anti_feature.bit &&
-              !kvm_cpu_has(feature.anti_feature);
+       TEST_ASSERT_EQ(feature.f.reg, KVM_CPUID_ECX);
+       nr_bits = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
+       return nr_bits > feature.f.bit || kvm_cpu_has(feature.f);
  }
  
  static __always_inline uint64_t kvm_cpu_supported_xcr0(void)
@@ -995,7 +1034,9 @@ static inline void vcpu_set_cpuid(struct kvm_vcpu *vcpu)
         vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid);
  }
  
-void vcpu_set_cpuid_maxphyaddr(struct kvm_vcpu *vcpu, uint8_t maxphyaddr);
+void vcpu_set_cpuid_property(struct kvm_vcpu *vcpu,
+                            struct kvm_x86_cpu_property property,
+                            uint32_t value);
  
  void vcpu_clear_cpuid_entry(struct kvm_vcpu *vcpu, uint32_t function);
  void vcpu_set_or_clear_cpuid_feature(struct kvm_vcpu *vcpu,
@@ -1059,6 +1100,7 @@ do {                                                                                      \
  } while (0)
  
  void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits);
+void kvm_init_vm_address_properties(struct kvm_vm *vm);
  bool vm_is_unrestricted_guest(struct kvm_vm *vm);
  
  struct ex_regs {
@@ -1120,16 +1162,19 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector,
   * r9  = exception vector (non-zero)
   * r10 = error code
   */
-#define KVM_ASM_SAFE(insn)                                     \
+#define __KVM_ASM_SAFE(insn, fep)                              \
         "mov $" __stringify(KVM_EXCEPTION_MAGIC) ", %%r9\n\t"   \
         "lea 1f(%%rip), %%r10\n\t"                              \
         "lea 2f(%%rip), %%r11\n\t"                              \
-       "1: " insn "\n\t"                                       \
+       fep "1: " insn "\n\t"                                   \
         "xor %%r9, %%r9\n\t"                                    \
         "2:\n\t"                                                \
         "mov  %%r9b, %[vector]\n\t"                             \
         "mov  %%r10, %[error_code]\n\t"
  
+#define KVM_ASM_SAFE(insn) __KVM_ASM_SAFE(insn, "")
+#define KVM_ASM_SAFE_FEP(insn) __KVM_ASM_SAFE(insn, KVM_FEP)
+
  #define KVM_ASM_SAFE_OUTPUTS(v, ec)    [vector] "=qm"(v), [error_code] "=rm"(ec)
  #define KVM_ASM_SAFE_CLOBBERS  "r9", "r10", "r11"
  
@@ -1156,21 +1201,58 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector,
         vector;                                                         \
  })
  
-static inline uint8_t rdmsr_safe(uint32_t msr, uint64_t *val)
-{
-       uint64_t error_code;
-       uint8_t vector;
-       uint32_t a, d;
+#define kvm_asm_safe_fep(insn, inputs...)                              \
+({                                                                     \
+       uint64_t ign_error_code;                                        \
+       uint8_t vector;                                                 \
+                                                                       \
+       asm volatile(KVM_ASM_SAFE(insn)                                 \
+                    : KVM_ASM_SAFE_OUTPUTS(vector, ign_error_code)     \
+                    : inputs                                           \
+                    : KVM_ASM_SAFE_CLOBBERS);                          \
+       vector;                                                         \
+})
  
-       asm volatile(KVM_ASM_SAFE("rdmsr")
-                    : "=a"(a), "=d"(d), KVM_ASM_SAFE_OUTPUTS(vector, error_code)
-                    : "c"(msr)
-                    : KVM_ASM_SAFE_CLOBBERS);
+#define kvm_asm_safe_ec_fep(insn, error_code, inputs...)               \
+({                                                                     \
+       uint8_t vector;                                                 \
+                                                                       \
+       asm volatile(KVM_ASM_SAFE_FEP(insn)                             \
+                    : KVM_ASM_SAFE_OUTPUTS(vector, error_code)         \
+                    : inputs                                           \
+                    : KVM_ASM_SAFE_CLOBBERS);                          \
+       vector;                                                         \
+})
  
-       *val = (uint64_t)a | ((uint64_t)d << 32);
-       return vector;
+#define BUILD_READ_U64_SAFE_HELPER(insn, _fep, _FEP)                   \
+static inline uint8_t insn##_safe ##_fep(uint32_t idx, uint64_t *val)  \
+{                                                                      \
+       uint64_t error_code;                                            \
+       uint8_t vector;                                                 \
+       uint32_t a, d;                                                  \
+                                                                       \
+       asm volatile(KVM_ASM_SAFE##_FEP(#insn)                          \
+                    : "=a"(a), "=d"(d),                                \
+                      KVM_ASM_SAFE_OUTPUTS(vector, error_code)         \
+                    : "c"(idx)                                         \
+                    : KVM_ASM_SAFE_CLOBBERS);                          \
+                                                                       \
+       *val = (uint64_t)a | ((uint64_t)d << 32);                       \
+       return vector;                                                  \
  }
  
+/*
+ * Generate {insn}_safe() and {insn}_safe_fep() helpers for instructions that
+ * use ECX as in input index, and EDX:EAX as a 64-bit output.
+ */
+#define BUILD_READ_U64_SAFE_HELPERS(insn)                              \
+       BUILD_READ_U64_SAFE_HELPER(insn, , )                            \
+       BUILD_READ_U64_SAFE_HELPER(insn, _fep, _FEP)                    \
+
+BUILD_READ_U64_SAFE_HELPERS(rdmsr)
+BUILD_READ_U64_SAFE_HELPERS(rdpmc)
+BUILD_READ_U64_SAFE_HELPERS(xgetbv)
+
  static inline uint8_t wrmsr_safe(uint32_t msr, uint64_t val)
  {
         return kvm_asm_safe("wrmsr", "a"(val & -1u), "d"(val >> 32), "c"(msr));
@@ -1186,6 +1268,16 @@ static inline uint8_t xsetbv_safe(uint32_t index, uint64_t value)
  
  bool kvm_is_tdp_enabled(void);
  
+static inline bool kvm_is_pmu_enabled(void)
+{
+       return get_kvm_param_bool("enable_pmu");
+}
+
+static inline bool kvm_is_forced_emulation_enabled(void)
+{
+       return !!get_kvm_param_integer("force_emulation_prefix");
+}
+
  uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr,
                                     int *level);
  uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr);
@@ -1271,4 +1363,6 @@ void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
  #define PFERR_GUEST_PAGE_MASK  BIT_ULL(PFERR_GUEST_PAGE_BIT)
  #define PFERR_IMPLICIT_ACCESS  BIT_ULL(PFERR_IMPLICIT_ACCESS_BIT)
  
+bool sys_clocksource_is_based_on_tsc(void);
+
  #endif /* SELFTEST_KVM_PROCESSOR_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/sev.h b/tools/testing/selftests/kvm/include/x86_64/sev.h

new file mode 100644 (file)

index 0000000..8a1bf88
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/x86_64/sev.h
@@ -0,0 +1,107 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Helpers used for SEV guests
+ *
+ */
+#ifndef SELFTEST_KVM_SEV_H
+#define SELFTEST_KVM_SEV_H
+
+#include <stdint.h>
+#include <stdbool.h>
+
+#include "linux/psp-sev.h"
+
+#include "kvm_util.h"
+#include "svm_util.h"
+#include "processor.h"
+
+enum sev_guest_state {
+       SEV_GUEST_STATE_UNINITIALIZED = 0,
+       SEV_GUEST_STATE_LAUNCH_UPDATE,
+       SEV_GUEST_STATE_LAUNCH_SECRET,
+       SEV_GUEST_STATE_RUNNING,
+};
+
+#define SEV_POLICY_NO_DBG      (1UL << 0)
+#define SEV_POLICY_ES          (1UL << 2)
+
+#define GHCB_MSR_TERM_REQ      0x100
+
+void sev_vm_launch(struct kvm_vm *vm, uint32_t policy);
+void sev_vm_launch_measure(struct kvm_vm *vm, uint8_t *measurement);
+void sev_vm_launch_finish(struct kvm_vm *vm);
+
+struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t policy, void *guest_code,
+                                          struct kvm_vcpu **cpu);
+
+kvm_static_assert(SEV_RET_SUCCESS == 0);
+
+/*
+ * The KVM_MEMORY_ENCRYPT_OP uAPI is utter garbage and takes an "unsigned long"
+ * instead of a proper struct.  The size of the parameter is embedded in the
+ * ioctl number, i.e. is ABI and thus immutable.  Hack around the mess by
+ * creating an overlay to pass in an "unsigned long" without a cast (casting
+ * will make the compiler unhappy due to dereferencing an aliased pointer).
+ */
+#define __vm_sev_ioctl(vm, cmd, arg)                                   \
+({                                                                     \
+       int r;                                                          \
+                                                                       \
+       union {                                                         \
+               struct kvm_sev_cmd c;                                   \
+               unsigned long raw;                                      \
+       } sev_cmd = { .c = {                                            \
+               .id = (cmd),                                            \
+               .data = (uint64_t)(arg),                                \
+               .sev_fd = (vm)->arch.sev_fd,                            \
+       } };                                                            \
+                                                                       \
+       r = __vm_ioctl(vm, KVM_MEMORY_ENCRYPT_OP, &sev_cmd.raw);        \
+       r ?: sev_cmd.c.error;                                           \
+})
+
+#define vm_sev_ioctl(vm, cmd, arg)                                     \
+({                                                                     \
+       int ret = __vm_sev_ioctl(vm, cmd, arg);                         \
+                                                                       \
+       __TEST_ASSERT_VM_VCPU_IOCTL(!ret, #cmd, ret, vm);               \
+})
+
+static inline void sev_vm_init(struct kvm_vm *vm)
+{
+       vm->arch.sev_fd = open_sev_dev_path_or_exit();
+
+       vm_sev_ioctl(vm, KVM_SEV_INIT, NULL);
+}
+
+
+static inline void sev_es_vm_init(struct kvm_vm *vm)
+{
+       vm->arch.sev_fd = open_sev_dev_path_or_exit();
+
+       vm_sev_ioctl(vm, KVM_SEV_ES_INIT, NULL);
+}
+
+static inline void sev_register_encrypted_memory(struct kvm_vm *vm,
+                                                struct userspace_mem_region *region)
+{
+       struct kvm_enc_region range = {
+               .addr = region->region.userspace_addr,
+               .size = region->region.memory_size,
+       };
+
+       vm_ioctl(vm, KVM_MEMORY_ENCRYPT_REG_REGION, &range);
+}
+
+static inline void sev_launch_update_data(struct kvm_vm *vm, vm_paddr_t gpa,
+                                         uint64_t size)
+{
+       struct kvm_sev_launch_update_data update_data = {
+               .uaddr = (unsigned long)addr_gpa2hva(vm, gpa),
+               .len = size,
+       };
+
+       vm_sev_ioctl(vm, KVM_SEV_LAUNCH_UPDATE_DATA, &update_data);
+}
+
+#endif /* SELFTEST_KVM_SEV_H */
diff --git a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c

index 31b3cb24b9a75cdf0cc9e71f1f0cf820ec20edae..b9e23265e4b3833a4fa07acca1461fb35cd8297f 100644 (file)
--- a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c
+++ b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c
@@ -65,7 +65,7 @@ int main(int argc, char *argv[])
  
                         int r = setrlimit(RLIMIT_NOFILE, &rl);
                         __TEST_REQUIRE(r >= 0,
-                                      "RLIMIT_NOFILE hard limit is too low (%d, wanted %d)\n",
+                                      "RLIMIT_NOFILE hard limit is too low (%d, wanted %d)",
                                        old_rlim_max, nr_fds_wanted);
                 } else {
                         TEST_ASSERT(!setrlimit(RLIMIT_NOFILE, &rl), "setrlimit() failed!");
diff --git a/tools/testing/selftests/kvm/kvm_page_table_test.c b/tools/testing/selftests/kvm/kvm_page_table_test.c

index e37dc9c21888f4bc4ed06bf3bacff89e28c68b5d..e0ba97ac1c5611a386981caf679b12910b600d8b 100644 (file)
--- a/tools/testing/selftests/kvm/kvm_page_table_test.c
+++ b/tools/testing/selftests/kvm/kvm_page_table_test.c
@@ -204,9 +204,9 @@ static void *vcpu_worker(void *data)
                 ret = _vcpu_run(vcpu);
                 ts_diff = timespec_elapsed(start);
  
-               TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret);
+               TEST_ASSERT(ret == 0, "vcpu_run failed: %d", ret);
                 TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC,
-                           "Invalid guest sync status: exit_reason=%s\n",
+                           "Invalid guest sync status: exit_reason=%s",
                             exit_reason_str(vcpu->run->exit_reason));
  
                 pr_debug("Got sync event from vCPU %d\n", vcpu->id);
diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c

index 41c776b642c0cd0be722e4bad1e0e9cc1f0cff80..a9eb17295be42f36b564113d4a0b01a3e4782d12 100644 (file)
--- a/tools/testing/selftests/kvm/lib/aarch64/processor.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c
@@ -365,8 +365,13 @@ void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent)
                 indent, "", pstate, pc);
  }
  
-struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
-                                 struct kvm_vcpu_init *init, void *guest_code)
+void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code)
+{
+       vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.pc), (uint64_t)guest_code);
+}
+
+static struct kvm_vcpu *__aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
+                                          struct kvm_vcpu_init *init)
  {
         size_t stack_size;
         uint64_t stack_vaddr;
@@ -381,15 +386,22 @@ struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
         aarch64_vcpu_setup(vcpu, init);
  
         vcpu_set_reg(vcpu, ARM64_CORE_REG(sp_el1), stack_vaddr + stack_size);
-       vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.pc), (uint64_t)guest_code);
+       return vcpu;
+}
+
+struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
+                                 struct kvm_vcpu_init *init, void *guest_code)
+{
+       struct kvm_vcpu *vcpu = __aarch64_vcpu_add(vm, vcpu_id, init);
+
+       vcpu_arch_set_entry_point(vcpu, guest_code);
  
         return vcpu;
  }
  
-struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
-                                 void *guest_code)
+struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
  {
-       return aarch64_vcpu_add(vm, vcpu_id, NULL, guest_code);
+       return __aarch64_vcpu_add(vm, vcpu_id, NULL);
  }
  
  void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...)
@@ -398,7 +410,7 @@ void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...)
         int i;
  
         TEST_ASSERT(num >= 1 && num <= 8, "Unsupported number of args,\n"
-                   "  num: %u\n", num);
+                   "  num: %u", num);
  
         va_start(ap, num);
  
diff --git a/tools/testing/selftests/kvm/lib/aarch64/vgic.c b/tools/testing/selftests/kvm/lib/aarch64/vgic.c

index b5f28d21a947704456b3e8cc9a0e86825d386c2f..184378d593e9a8e45ca7d46e6116ca692d710abd 100644 (file)
--- a/tools/testing/selftests/kvm/lib/aarch64/vgic.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/vgic.c
@@ -38,7 +38,7 @@ int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs,
         struct list_head *iter;
         unsigned int nr_gic_pages, nr_vcpus_created = 0;
  
-       TEST_ASSERT(nr_vcpus, "Number of vCPUs cannot be empty\n");
+       TEST_ASSERT(nr_vcpus, "Number of vCPUs cannot be empty");
  
         /*
          * Make sure that the caller is infact calling this
@@ -47,7 +47,7 @@ int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs,
         list_for_each(iter, &vm->vcpus)
                 nr_vcpus_created++;
         TEST_ASSERT(nr_vcpus == nr_vcpus_created,
-                       "Number of vCPUs requested (%u) doesn't match with the ones created for the VM (%u)\n",
+                       "Number of vCPUs requested (%u) doesn't match with the ones created for the VM (%u)",
                         nr_vcpus, nr_vcpus_created);
  
         /* Distributor setup */
diff --git a/tools/testing/selftests/kvm/lib/elf.c b/tools/testing/selftests/kvm/lib/elf.c

index 266f3876e10aff98955b6301d4e0ddaa3236e26e..f34d926d9735913f5ba826a8bee07aa0b8169d43 100644 (file)
--- a/tools/testing/selftests/kvm/lib/elf.c
+++ b/tools/testing/selftests/kvm/lib/elf.c
@@ -184,7 +184,7 @@ void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename)
                                 "Seek to program segment offset failed,\n"
                                 "  program header idx: %u errno: %i\n"
                                 "  offset_rv: 0x%jx\n"
-                               "  expected: 0x%jx\n",
+                               "  expected: 0x%jx",
                                 n1, errno, (intmax_t) offset_rv,
                                 (intmax_t) phdr.p_offset);
                         test_read(fd, addr_gva2hva(vm, phdr.p_vaddr),
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c

index e066d584c65611b4da45b0312734c3fab7b3dcd6..b2262b5fad9e79a509f9c686e675baf2b95b00a0 100644 (file)
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -27,7 +27,8 @@ int open_path_or_exit(const char *path, int flags)
         int fd;
  
         fd = open(path, flags);
-       __TEST_REQUIRE(fd >= 0, "%s not available (errno: %d)", path, errno);
+       __TEST_REQUIRE(fd >= 0 || errno != ENOENT, "Cannot open %s: %s", path, strerror(errno));
+       TEST_ASSERT(fd >= 0, "Failed to open '%s'", path);
  
         return fd;
  }
@@ -51,13 +52,13 @@ int open_kvm_dev_path_or_exit(void)
         return _open_kvm_dev_path_or_exit(O_RDONLY);
  }
  
-static bool get_module_param_bool(const char *module_name, const char *param)
+static ssize_t get_module_param(const char *module_name, const char *param,
+                               void *buffer, size_t buffer_size)
  {
         const int path_size = 128;
         char path[path_size];
-       char value;
-       ssize_t r;
-       int fd;
+       ssize_t bytes_read;
+       int fd, r;
  
         r = snprintf(path, path_size, "/sys/module/%s/parameters/%s",
                      module_name, param);
@@ -66,11 +67,46 @@ static bool get_module_param_bool(const char *module_name, const char *param)
  
         fd = open_path_or_exit(path, O_RDONLY);
  
-       r = read(fd, &value, 1);
-       TEST_ASSERT(r == 1, "read(%s) failed", path);
+       bytes_read = read(fd, buffer, buffer_size);
+       TEST_ASSERT(bytes_read > 0, "read(%s) returned %ld, wanted %ld bytes",
+                   path, bytes_read, buffer_size);
  
         r = close(fd);
         TEST_ASSERT(!r, "close(%s) failed", path);
+       return bytes_read;
+}
+
+static int get_module_param_integer(const char *module_name, const char *param)
+{
+       /*
+        * 16 bytes to hold a 64-bit value (1 byte per char), 1 byte for the
+        * NUL char, and 1 byte because the kernel sucks and inserts a newline
+        * at the end.
+        */
+       char value[16 + 1 + 1];
+       ssize_t r;
+
+       memset(value, '\0', sizeof(value));
+
+       r = get_module_param(module_name, param, value, sizeof(value));
+       TEST_ASSERT(value[r - 1] == '\n',
+                   "Expected trailing newline, got char '%c'", value[r - 1]);
+
+       /*
+        * Squash the newline, otherwise atoi_paranoid() will complain about
+        * trailing non-NUL characters in the string.
+        */
+       value[r - 1] = '\0';
+       return atoi_paranoid(value);
+}
+
+static bool get_module_param_bool(const char *module_name, const char *param)
+{
+       char value;
+       ssize_t r;
+
+       r = get_module_param(module_name, param, &value, sizeof(value));
+       TEST_ASSERT_EQ(r, 1);
  
         if (value == 'Y')
                 return true;
@@ -95,6 +131,21 @@ bool get_kvm_amd_param_bool(const char *param)
         return get_module_param_bool("kvm_amd", param);
  }
  
+int get_kvm_param_integer(const char *param)
+{
+       return get_module_param_integer("kvm", param);
+}
+
+int get_kvm_intel_param_integer(const char *param)
+{
+       return get_module_param_integer("kvm_intel", param);
+}
+
+int get_kvm_amd_param_integer(const char *param)
+{
+       return get_module_param_integer("kvm_amd", param);
+}
+
  /*
   * Capability
   *
@@ -225,6 +276,7 @@ struct kvm_vm *____vm_create(struct vm_shape shape)
  
         vm->mode = shape.mode;
         vm->type = shape.type;
+       vm->subtype = shape.subtype;
  
         vm->pa_bits = vm_guest_mode_params[vm->mode].pa_bits;
         vm->va_bits = vm_guest_mode_params[vm->mode].va_bits;
@@ -265,6 +317,7 @@ struct kvm_vm *____vm_create(struct vm_shape shape)
         case VM_MODE_PXXV48_4K:
  #ifdef __x86_64__
                 kvm_get_cpu_address_width(&vm->pa_bits, &vm->va_bits);
+               kvm_init_vm_address_properties(vm);
                 /*
                  * Ignore KVM support for 5-level paging (vm->va_bits == 57),
                  * it doesn't take effect unless a CR4.LA57 is set, which it
@@ -320,7 +373,7 @@ static uint64_t vm_nr_pages_required(enum vm_guest_mode mode,
         uint64_t nr_pages;
  
         TEST_ASSERT(nr_runnable_vcpus,
-                   "Use vm_create_barebones() for VMs that _never_ have vCPUs\n");
+                   "Use vm_create_barebones() for VMs that _never_ have vCPUs");
  
         TEST_ASSERT(nr_runnable_vcpus <= kvm_check_cap(KVM_CAP_MAX_VCPUS),
                     "nr_vcpus = %d too large for host, max-vcpus = %d",
@@ -491,7 +544,7 @@ void kvm_pin_this_task_to_pcpu(uint32_t pcpu)
         CPU_ZERO(&mask);
         CPU_SET(pcpu, &mask);
         r = sched_setaffinity(0, sizeof(mask), &mask);
-       TEST_ASSERT(!r, "sched_setaffinity() failed for pCPU '%u'.\n", pcpu);
+       TEST_ASSERT(!r, "sched_setaffinity() failed for pCPU '%u'.", pcpu);
  }
  
  static uint32_t parse_pcpu(const char *cpu_str, const cpu_set_t *allowed_mask)
@@ -499,7 +552,7 @@ static uint32_t parse_pcpu(const char *cpu_str, const cpu_set_t *allowed_mask)
         uint32_t pcpu = atoi_non_negative("CPU number", cpu_str);
  
         TEST_ASSERT(CPU_ISSET(pcpu, allowed_mask),
-                   "Not allowed to run on pCPU '%d', check cgroups?\n", pcpu);
+                   "Not allowed to run on pCPU '%d', check cgroups?", pcpu);
         return pcpu;
  }
  
@@ -529,7 +582,7 @@ void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[],
         int i, r;
  
         cpu_list = strdup(pcpus_string);
-       TEST_ASSERT(cpu_list, "strdup() allocation failed.\n");
+       TEST_ASSERT(cpu_list, "strdup() allocation failed.");
  
         r = sched_getaffinity(0, sizeof(allowed_mask), &allowed_mask);
         TEST_ASSERT(!r, "sched_getaffinity() failed");
@@ -538,7 +591,7 @@ void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[],
  
         /* 1. Get all pcpus for vcpus. */
         for (i = 0; i < nr_vcpus; i++) {
-               TEST_ASSERT(cpu, "pCPU not provided for vCPU '%d'\n", i);
+               TEST_ASSERT(cpu, "pCPU not provided for vCPU '%d'", i);
                 vcpu_to_pcpu[i] = parse_pcpu(cpu, &allowed_mask);
                 cpu = strtok(NULL, delim);
         }
@@ -665,6 +718,7 @@ static void __vm_mem_region_delete(struct kvm_vm *vm,
         vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, &region->region);
  
         sparsebit_free(&region->unused_phy_pages);
+       sparsebit_free(&region->protected_phy_pages);
         ret = munmap(region->mmap_start, region->mmap_size);
         TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret));
         if (region->fd >= 0) {
@@ -1046,6 +1100,8 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
         }
  
         region->unused_phy_pages = sparsebit_alloc();
+       if (vm_arch_has_protected_memory(vm))
+               region->protected_phy_pages = sparsebit_alloc();
         sparsebit_set_num(region->unused_phy_pages,
                 guest_paddr >> vm->page_shift, npages);
         region->region.slot = slot;
@@ -1057,7 +1113,7 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
         TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION2 IOCTL failed,\n"
                 "  rc: %i errno: %i\n"
                 "  slot: %u flags: 0x%x\n"
-               "  guest_phys_addr: 0x%lx size: 0x%lx guest_memfd: %d\n",
+               "  guest_phys_addr: 0x%lx size: 0x%lx guest_memfd: %d",
                 ret, errno, slot, flags,
                 guest_paddr, (uint64_t) region->region.memory_size,
                 region->region.guest_memfd);
@@ -1222,7 +1278,7 @@ void vm_guest_mem_fallocate(struct kvm_vm *vm, uint64_t base, uint64_t size,
                 len = min_t(uint64_t, end - gpa, region->region.memory_size - offset);
  
                 ret = fallocate(region->region.guest_memfd, mode, fd_offset, len);
-               TEST_ASSERT(!ret, "fallocate() failed to %s at %lx (len = %lu), fd = %d, mode = %x, offset = %lx\n",
+               TEST_ASSERT(!ret, "fallocate() failed to %s at %lx (len = %lu), fd = %d, mode = %x, offset = %lx",
                             punch_hole ? "punch hole" : "allocate", gpa, len,
                             region->region.guest_memfd, mode, fd_offset);
         }
@@ -1265,7 +1321,7 @@ struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
         struct kvm_vcpu *vcpu;
  
         /* Confirm a vcpu with the specified id doesn't already exist. */
-       TEST_ASSERT(!vcpu_exists(vm, vcpu_id), "vCPU%d already exists\n", vcpu_id);
+       TEST_ASSERT(!vcpu_exists(vm, vcpu_id), "vCPU%d already exists", vcpu_id);
  
         /* Allocate and initialize new vcpu structure. */
         vcpu = calloc(1, sizeof(*vcpu));
@@ -1376,15 +1432,17 @@ va_found:
         return pgidx_start * vm->page_size;
  }
  
-vm_vaddr_t __vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
-                           enum kvm_mem_region_type type)
+static vm_vaddr_t ____vm_vaddr_alloc(struct kvm_vm *vm, size_t sz,
+                                    vm_vaddr_t vaddr_min,
+                                    enum kvm_mem_region_type type,
+                                    bool protected)
  {
         uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0);
  
         virt_pgd_alloc(vm);
-       vm_paddr_t paddr = vm_phy_pages_alloc(vm, pages,
-                                             KVM_UTIL_MIN_PFN * vm->page_size,
-                                             vm->memslots[type]);
+       vm_paddr_t paddr = __vm_phy_pages_alloc(vm, pages,
+                                               KVM_UTIL_MIN_PFN * vm->page_size,
+                                               vm->memslots[type], protected);
  
         /*
          * Find an unused range of virtual page addresses of at least
@@ -1404,6 +1462,20 @@ vm_vaddr_t __vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
         return vaddr_start;
  }
  
+vm_vaddr_t __vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
+                           enum kvm_mem_region_type type)
+{
+       return ____vm_vaddr_alloc(vm, sz, vaddr_min, type,
+                                 vm_arch_has_protected_memory(vm));
+}
+
+vm_vaddr_t vm_vaddr_alloc_shared(struct kvm_vm *vm, size_t sz,
+                                vm_vaddr_t vaddr_min,
+                                enum kvm_mem_region_type type)
+{
+       return ____vm_vaddr_alloc(vm, sz, vaddr_min, type, false);
+}
+
  /*
   * VM Virtual Address Allocate
   *
@@ -1526,6 +1598,8 @@ void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa)
  {
         struct userspace_mem_region *region;
  
+       gpa = vm_untag_gpa(vm, gpa);
+
         region = userspace_mem_region_find(vm, gpa, gpa);
         if (!region) {
                 TEST_FAIL("No vm physical memory at 0x%lx", gpa);
@@ -1872,6 +1946,10 @@ void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
                         region->host_mem);
                 fprintf(stream, "%*sunused_phy_pages: ", indent + 2, "");
                 sparsebit_dump(stream, region->unused_phy_pages, 0);
+               if (region->protected_phy_pages) {
+                       fprintf(stream, "%*sprotected_phy_pages: ", indent + 2, "");
+                       sparsebit_dump(stream, region->protected_phy_pages, 0);
+               }
         }
         fprintf(stream, "%*sMapped Virtual Pages:\n", indent, "");
         sparsebit_dump(stream, vm->vpages_mapped, indent + 2);
@@ -1973,6 +2051,7 @@ const char *exit_reason_str(unsigned int exit_reason)
   *   num - number of pages
   *   paddr_min - Physical address minimum
   *   memslot - Memory region to allocate page from
+ *   protected - True if the pages will be used as protected/private memory
   *
   * Output Args: None
   *
@@ -1984,8 +2063,9 @@ const char *exit_reason_str(unsigned int exit_reason)
   * and their base address is returned. A TEST_ASSERT failure occurs if
   * not enough pages are available at or above paddr_min.
   */
-vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
-                             vm_paddr_t paddr_min, uint32_t memslot)
+vm_paddr_t __vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
+                               vm_paddr_t paddr_min, uint32_t memslot,
+                               bool protected)
  {
         struct userspace_mem_region *region;
         sparsebit_idx_t pg, base;
@@ -1998,8 +2078,10 @@ vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
                 paddr_min, vm->page_size);
  
         region = memslot2region(vm, memslot);
-       base = pg = paddr_min >> vm->page_shift;
+       TEST_ASSERT(!protected || region->protected_phy_pages,
+                   "Region doesn't support protected memory");
  
+       base = pg = paddr_min >> vm->page_shift;
         do {
                 for (; pg < base + num; ++pg) {
                         if (!sparsebit_is_set(region->unused_phy_pages, pg)) {
@@ -2018,8 +2100,11 @@ vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
                 abort();
         }
  
-       for (pg = base; pg < base + num; ++pg)
+       for (pg = base; pg < base + num; ++pg) {
                 sparsebit_clear(region->unused_phy_pages, pg);
+               if (protected)
+                       sparsebit_set(region->protected_phy_pages, pg);
+       }
  
         return base * vm->page_size;
  }
@@ -2223,3 +2308,18 @@ void __attribute((constructor)) kvm_selftest_init(void)
  
         kvm_selftest_arch_init();
  }
+
+bool vm_is_gpa_protected(struct kvm_vm *vm, vm_paddr_t paddr)
+{
+       sparsebit_idx_t pg = 0;
+       struct userspace_mem_region *region;
+
+       if (!vm_arch_has_protected_memory(vm))
+               return false;
+
+       region = userspace_mem_region_find(vm, paddr, paddr);
+       TEST_ASSERT(region, "No vm physical memory at 0x%lx", paddr);
+
+       pg = paddr >> vm->page_shift;
+       return sparsebit_is_set(region->protected_phy_pages, pg);
+}
diff --git a/tools/testing/selftests/kvm/lib/memstress.c b/tools/testing/selftests/kvm/lib/memstress.c

index d05487e5a371df1d17c96d4fbec1b1f6b2e60c0f..cf2c739713080f3f55e1383fb5d0a9e65f5d1dc7 100644 (file)
--- a/tools/testing/selftests/kvm/lib/memstress.c
+++ b/tools/testing/selftests/kvm/lib/memstress.c
@@ -192,7 +192,7 @@ struct kvm_vm *memstress_create_vm(enum vm_guest_mode mode, int nr_vcpus,
         TEST_ASSERT(guest_num_pages < region_end_gfn,
                     "Requested more guest memory than address space allows.\n"
                     "    guest pages: %" PRIx64 " max gfn: %" PRIx64
-                   " nr_vcpus: %d wss: %" PRIx64 "]\n",
+                   " nr_vcpus: %d wss: %" PRIx64 "]",
                     guest_num_pages, region_end_gfn - 1, nr_vcpus, vcpu_memory_bytes);
  
         args->gpa = (region_end_gfn - guest_num_pages - 1) * args->guest_page_size;
diff --git a/tools/testing/selftests/kvm/lib/riscv/handlers.S b/tools/testing/selftests/kvm/lib/riscv/handlers.S

new file mode 100644 (file)

index 0000000..aa0abd3
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/riscv/handlers.S
@@ -0,0 +1,101 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2023 Intel Corporation
+ */
+
+#ifndef __ASSEMBLY__
+#define __ASSEMBLY__
+#endif
+
+#include <asm/csr.h>
+
+.macro save_context
+       addi  sp, sp, (-8*34)
+       sd    x1, 0(sp)
+       sd    x2, 8(sp)
+       sd    x3, 16(sp)
+       sd    x4, 24(sp)
+       sd    x5, 32(sp)
+       sd    x6, 40(sp)
+       sd    x7, 48(sp)
+       sd    x8, 56(sp)
+       sd    x9, 64(sp)
+       sd    x10, 72(sp)
+       sd    x11, 80(sp)
+       sd    x12, 88(sp)
+       sd    x13, 96(sp)
+       sd    x14, 104(sp)
+       sd    x15, 112(sp)
+       sd    x16, 120(sp)
+       sd    x17, 128(sp)
+       sd    x18, 136(sp)
+       sd    x19, 144(sp)
+       sd    x20, 152(sp)
+       sd    x21, 160(sp)
+       sd    x22, 168(sp)
+       sd    x23, 176(sp)
+       sd    x24, 184(sp)
+       sd    x25, 192(sp)
+       sd    x26, 200(sp)
+       sd    x27, 208(sp)
+       sd    x28, 216(sp)
+       sd    x29, 224(sp)
+       sd    x30, 232(sp)
+       sd    x31, 240(sp)
+       csrr  s0, CSR_SEPC
+       csrr  s1, CSR_SSTATUS
+       csrr  s2, CSR_SCAUSE
+       sd    s0, 248(sp)
+       sd    s1, 256(sp)
+       sd    s2, 264(sp)
+.endm
+
+.macro restore_context
+       ld    s2, 264(sp)
+       ld    s1, 256(sp)
+       ld    s0, 248(sp)
+       csrw  CSR_SCAUSE, s2
+       csrw  CSR_SSTATUS, s1
+       csrw  CSR_SEPC, s0
+       ld    x31, 240(sp)
+       ld    x30, 232(sp)
+       ld    x29, 224(sp)
+       ld    x28, 216(sp)
+       ld    x27, 208(sp)
+       ld    x26, 200(sp)
+       ld    x25, 192(sp)
+       ld    x24, 184(sp)
+       ld    x23, 176(sp)
+       ld    x22, 168(sp)
+       ld    x21, 160(sp)
+       ld    x20, 152(sp)
+       ld    x19, 144(sp)
+       ld    x18, 136(sp)
+       ld    x17, 128(sp)
+       ld    x16, 120(sp)
+       ld    x15, 112(sp)
+       ld    x14, 104(sp)
+       ld    x13, 96(sp)
+       ld    x12, 88(sp)
+       ld    x11, 80(sp)
+       ld    x10, 72(sp)
+       ld    x9, 64(sp)
+       ld    x8, 56(sp)
+       ld    x7, 48(sp)
+       ld    x6, 40(sp)
+       ld    x5, 32(sp)
+       ld    x4, 24(sp)
+       ld    x3, 16(sp)
+       ld    x2, 8(sp)
+       ld    x1, 0(sp)
+       addi  sp, sp, (8*34)
+.endm
+
+.balign 4
+.global exception_vectors
+exception_vectors:
+       save_context
+       move  a0, sp
+       call  route_exception
+       restore_context
+       sret
diff --git a/tools/testing/selftests/kvm/lib/riscv/processor.c b/tools/testing/selftests/kvm/lib/riscv/processor.c

index 7ca736fb4194046072bf69b3210f0fefd8ce0834..e8211f5d68637ea7d0b9916e7931a23c0573dbe0 100644 (file)
--- a/tools/testing/selftests/kvm/lib/riscv/processor.c
+++ b/tools/testing/selftests/kvm/lib/riscv/processor.c
@@ -13,6 +13,18 @@
  
  #define DEFAULT_RISCV_GUEST_STACK_VADDR_MIN    0xac0000
  
+static vm_vaddr_t exception_handlers;
+
+bool __vcpu_has_ext(struct kvm_vcpu *vcpu, uint64_t ext)
+{
+       unsigned long value = 0;
+       int ret;
+
+       ret = __vcpu_get_reg(vcpu, ext, &value);
+
+       return !ret && !!value;
+}
+
  static uint64_t page_align(struct kvm_vm *vm, uint64_t v)
  {
         return (v + vm->page_size) & ~(vm->page_size - 1);
@@ -277,8 +289,12 @@ static void __aligned(16) guest_unexp_trap(void)
                   0, 0, 0, 0, 0, 0);
  }
  
-struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
-                                 void *guest_code)
+void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code)
+{
+       vcpu_set_reg(vcpu, RISCV_CORE_REG(regs.pc), (unsigned long)guest_code);
+}
+
+struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
  {
         int r;
         size_t stack_size;
@@ -312,7 +328,9 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
  
         /* Setup stack pointer and program counter of guest */
         vcpu_set_reg(vcpu, RISCV_CORE_REG(regs.sp), stack_vaddr + stack_size);
-       vcpu_set_reg(vcpu, RISCV_CORE_REG(regs.pc), (unsigned long)guest_code);
+
+       /* Setup sscratch for guest_get_vcpuid() */
+       vcpu_set_reg(vcpu, RISCV_GENERAL_CSR_REG(sscratch), vcpu_id);
  
         /* Setup default exception vector of guest */
         vcpu_set_reg(vcpu, RISCV_GENERAL_CSR_REG(stvec), (unsigned long)guest_unexp_trap);
@@ -327,7 +345,7 @@ void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...)
         int i;
  
         TEST_ASSERT(num >= 1 && num <= 8, "Unsupported number of args,\n"
-                   "  num: %u\n", num);
+                   "  num: %u", num);
  
         va_start(ap, num);
  
@@ -364,8 +382,80 @@ void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...)
         va_end(ap);
  }
  
+void kvm_exit_unexpected_exception(int vector, int ec)
+{
+       ucall(UCALL_UNHANDLED, 2, vector, ec);
+}
+
  void assert_on_unhandled_exception(struct kvm_vcpu *vcpu)
  {
+       struct ucall uc;
+
+       if (get_ucall(vcpu, &uc) == UCALL_UNHANDLED) {
+               TEST_FAIL("Unexpected exception (vector:0x%lx, ec:0x%lx)",
+                       uc.args[0], uc.args[1]);
+       }
+}
+
+struct handlers {
+       exception_handler_fn exception_handlers[NR_VECTORS][NR_EXCEPTIONS];
+};
+
+void route_exception(struct ex_regs *regs)
+{
+       struct handlers *handlers = (struct handlers *)exception_handlers;
+       int vector = 0, ec;
+
+       ec = regs->cause & ~CAUSE_IRQ_FLAG;
+       if (ec >= NR_EXCEPTIONS)
+               goto unexpected_exception;
+
+       /* Use the same handler for all the interrupts */
+       if (regs->cause & CAUSE_IRQ_FLAG) {
+               vector = 1;
+               ec = 0;
+       }
+
+       if (handlers && handlers->exception_handlers[vector][ec])
+               return handlers->exception_handlers[vector][ec](regs);
+
+unexpected_exception:
+       return kvm_exit_unexpected_exception(vector, ec);
+}
+
+void vcpu_init_vector_tables(struct kvm_vcpu *vcpu)
+{
+       extern char exception_vectors;
+
+       vcpu_set_reg(vcpu, RISCV_GENERAL_CSR_REG(stvec), (unsigned long)&exception_vectors);
+}
+
+void vm_init_vector_tables(struct kvm_vm *vm)
+{
+       vm->handlers = __vm_vaddr_alloc(vm, sizeof(struct handlers),
+                                  vm->page_size, MEM_REGION_DATA);
+
+       *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers;
+}
+
+void vm_install_exception_handler(struct kvm_vm *vm, int vector, exception_handler_fn handler)
+{
+       struct handlers *handlers = addr_gva2hva(vm, vm->handlers);
+
+       assert(vector < NR_EXCEPTIONS);
+       handlers->exception_handlers[0][vector] = handler;
+}
+
+void vm_install_interrupt_handler(struct kvm_vm *vm, exception_handler_fn handler)
+{
+       struct handlers *handlers = addr_gva2hva(vm, vm->handlers);
+
+       handlers->exception_handlers[1][0] = handler;
+}
+
+uint32_t guest_get_vcpuid(void)
+{
+       return csr_read(CSR_SSCRATCH);
  }
  
  struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0,
diff --git a/tools/testing/selftests/kvm/lib/s390x/processor.c b/tools/testing/selftests/kvm/lib/s390x/processor.c

index 15945121daf17dc46bf38cf8f2a24d23b8f073f3..4ad4492eea1d96f88a544ab510300ed43e2934f1 100644 (file)
--- a/tools/testing/selftests/kvm/lib/s390x/processor.c
+++ b/tools/testing/selftests/kvm/lib/s390x/processor.c
@@ -155,15 +155,18 @@ void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
         virt_dump_region(stream, vm, indent, vm->pgd);
  }
  
-struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
-                                 void *guest_code)
+void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code)
+{
+       vcpu->run->psw_addr = (uintptr_t)guest_code;
+}
+
+struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
  {
         size_t stack_size =  DEFAULT_STACK_PGS * getpagesize();
         uint64_t stack_vaddr;
         struct kvm_regs regs;
         struct kvm_sregs sregs;
         struct kvm_vcpu *vcpu;
-       struct kvm_run *run;
  
         TEST_ASSERT(vm->page_size == 4096, "Unsupported page size: 0x%x",
                     vm->page_size);
@@ -184,9 +187,7 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
         sregs.crs[1] = vm->pgd | 0xf;           /* Primary region table */
         vcpu_sregs_set(vcpu, &sregs);
  
-       run = vcpu->run;
-       run->psw_mask = 0x0400000180000000ULL;  /* DAT enabled + 64 bit mode */
-       run->psw_addr = (uintptr_t)guest_code;
+       vcpu->run->psw_mask = 0x0400000180000000ULL;  /* DAT enabled + 64 bit mode */
  
         return vcpu;
  }
@@ -198,7 +199,7 @@ void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...)
         int i;
  
         TEST_ASSERT(num >= 1 && num <= 5, "Unsupported number of args,\n"
-                   "  num: %u\n",
+                   "  num: %u",
                     num);
  
         va_start(ap, num);
diff --git a/tools/testing/selftests/kvm/lib/sparsebit.c b/tools/testing/selftests/kvm/lib/sparsebit.c

index 88cb6b84e6f31009e8fc61e2ae0b69b9728e0ef1..cfed9d26cc71b06a9c4e52c89a52b079944d3a49 100644 (file)
--- a/tools/testing/selftests/kvm/lib/sparsebit.c
+++ b/tools/testing/selftests/kvm/lib/sparsebit.c
@@ -202,7 +202,7 @@ static sparsebit_num_t node_num_set(struct node *nodep)
  /* Returns a pointer to the node that describes the
   * lowest bit index.
   */
-static struct node *node_first(struct sparsebit *s)
+static struct node *node_first(const struct sparsebit *s)
  {
         struct node *nodep;
  
@@ -216,7 +216,7 @@ static struct node *node_first(struct sparsebit *s)
   * lowest bit index > the index of the node pointed to by np.
   * Returns NULL if no node with a higher index exists.
   */
-static struct node *node_next(struct sparsebit *s, struct node *np)
+static struct node *node_next(const struct sparsebit *s, struct node *np)
  {
         struct node *nodep = np;
  
@@ -244,7 +244,7 @@ static struct node *node_next(struct sparsebit *s, struct node *np)
   * highest index < the index of the node pointed to by np.
   * Returns NULL if no node with a lower index exists.
   */
-static struct node *node_prev(struct sparsebit *s, struct node *np)
+static struct node *node_prev(const struct sparsebit *s, struct node *np)
  {
         struct node *nodep = np;
  
@@ -273,7 +273,7 @@ static struct node *node_prev(struct sparsebit *s, struct node *np)
   * subtree and duplicates the bit settings to the newly allocated nodes.
   * Returns the newly allocated copy of subtree.
   */
-static struct node *node_copy_subtree(struct node *subtree)
+static struct node *node_copy_subtree(const struct node *subtree)
  {
         struct node *root;
  
@@ -307,7 +307,7 @@ static struct node *node_copy_subtree(struct node *subtree)
   * index is within the bits described by the mask bits or the number of
   * contiguous bits set after the mask.  Returns NULL if there is no such node.
   */
-static struct node *node_find(struct sparsebit *s, sparsebit_idx_t idx)
+static struct node *node_find(const struct sparsebit *s, sparsebit_idx_t idx)
  {
         struct node *nodep;
  
@@ -393,7 +393,7 @@ static struct node *node_add(struct sparsebit *s, sparsebit_idx_t idx)
  }
  
  /* Returns whether all the bits in the sparsebit array are set.  */
-bool sparsebit_all_set(struct sparsebit *s)
+bool sparsebit_all_set(const struct sparsebit *s)
  {
         /*
          * If any nodes there must be at least one bit set.  Only case
@@ -775,7 +775,7 @@ static void node_reduce(struct sparsebit *s, struct node *nodep)
  /* Returns whether the bit at the index given by idx, within the
   * sparsebit array is set or not.
   */
-bool sparsebit_is_set(struct sparsebit *s, sparsebit_idx_t idx)
+bool sparsebit_is_set(const struct sparsebit *s, sparsebit_idx_t idx)
  {
         struct node *nodep;
  
@@ -921,7 +921,7 @@ static inline sparsebit_idx_t node_first_clear(struct node *nodep, int start)
   * used by test cases after they detect an unexpected condition, as a means
   * to capture diagnostic information.
   */
-static void sparsebit_dump_internal(FILE *stream, struct sparsebit *s,
+static void sparsebit_dump_internal(FILE *stream, const struct sparsebit *s,
         unsigned int indent)
  {
         /* Dump the contents of s */
@@ -969,7 +969,7 @@ void sparsebit_free(struct sparsebit **sbitp)
   * sparsebit_alloc().  It can though already have bits set, which
   * if different from src will be cleared.
   */
-void sparsebit_copy(struct sparsebit *d, struct sparsebit *s)
+void sparsebit_copy(struct sparsebit *d, const struct sparsebit *s)
  {
         /* First clear any bits already set in the destination */
         sparsebit_clear_all(d);
@@ -981,7 +981,7 @@ void sparsebit_copy(struct sparsebit *d, struct sparsebit *s)
  }
  
  /* Returns whether num consecutive bits starting at idx are all set.  */
-bool sparsebit_is_set_num(struct sparsebit *s,
+bool sparsebit_is_set_num(const struct sparsebit *s,
         sparsebit_idx_t idx, sparsebit_num_t num)
  {
         sparsebit_idx_t next_cleared;
@@ -1005,14 +1005,14 @@ bool sparsebit_is_set_num(struct sparsebit *s,
  }
  
  /* Returns whether the bit at the index given by idx.  */
-bool sparsebit_is_clear(struct sparsebit *s,
+bool sparsebit_is_clear(const struct sparsebit *s,
         sparsebit_idx_t idx)
  {
         return !sparsebit_is_set(s, idx);
  }
  
  /* Returns whether num consecutive bits starting at idx are all cleared.  */
-bool sparsebit_is_clear_num(struct sparsebit *s,
+bool sparsebit_is_clear_num(const struct sparsebit *s,
         sparsebit_idx_t idx, sparsebit_num_t num)
  {
         sparsebit_idx_t next_set;
@@ -1041,13 +1041,13 @@ bool sparsebit_is_clear_num(struct sparsebit *s,
   * value.  Use sparsebit_any_set(), instead of sparsebit_num_set() > 0,
   * to determine if the sparsebit array has any bits set.
   */
-sparsebit_num_t sparsebit_num_set(struct sparsebit *s)
+sparsebit_num_t sparsebit_num_set(const struct sparsebit *s)
  {
         return s->num_set;
  }
  
  /* Returns whether any bit is set in the sparsebit array.  */
-bool sparsebit_any_set(struct sparsebit *s)
+bool sparsebit_any_set(const struct sparsebit *s)
  {
         /*
          * Nodes only describe set bits.  If any nodes then there
@@ -1070,20 +1070,20 @@ bool sparsebit_any_set(struct sparsebit *s)
  }
  
  /* Returns whether all the bits in the sparsebit array are cleared.  */
-bool sparsebit_all_clear(struct sparsebit *s)
+bool sparsebit_all_clear(const struct sparsebit *s)
  {
         return !sparsebit_any_set(s);
  }
  
  /* Returns whether all the bits in the sparsebit array are set.  */
-bool sparsebit_any_clear(struct sparsebit *s)
+bool sparsebit_any_clear(const struct sparsebit *s)
  {
         return !sparsebit_all_set(s);
  }
  
  /* Returns the index of the first set bit.  Abort if no bits are set.
   */
-sparsebit_idx_t sparsebit_first_set(struct sparsebit *s)
+sparsebit_idx_t sparsebit_first_set(const struct sparsebit *s)
  {
         struct node *nodep;
  
@@ -1097,7 +1097,7 @@ sparsebit_idx_t sparsebit_first_set(struct sparsebit *s)
  /* Returns the index of the first cleared bit.  Abort if
   * no bits are cleared.
   */
-sparsebit_idx_t sparsebit_first_clear(struct sparsebit *s)
+sparsebit_idx_t sparsebit_first_clear(const struct sparsebit *s)
  {
         struct node *nodep1, *nodep2;
  
@@ -1151,7 +1151,7 @@ sparsebit_idx_t sparsebit_first_clear(struct sparsebit *s)
  /* Returns index of next bit set within s after the index given by prev.
   * Returns 0 if there are no bits after prev that are set.
   */
-sparsebit_idx_t sparsebit_next_set(struct sparsebit *s,
+sparsebit_idx_t sparsebit_next_set(const struct sparsebit *s,
         sparsebit_idx_t prev)
  {
         sparsebit_idx_t lowest_possible = prev + 1;
@@ -1244,7 +1244,7 @@ sparsebit_idx_t sparsebit_next_set(struct sparsebit *s,
  /* Returns index of next bit cleared within s after the index given by prev.
   * Returns 0 if there are no bits after prev that are cleared.
   */
-sparsebit_idx_t sparsebit_next_clear(struct sparsebit *s,
+sparsebit_idx_t sparsebit_next_clear(const struct sparsebit *s,
         sparsebit_idx_t prev)
  {
         sparsebit_idx_t lowest_possible = prev + 1;
@@ -1300,7 +1300,7 @@ sparsebit_idx_t sparsebit_next_clear(struct sparsebit *s,
   * and returns the index of the first sequence of num consecutively set
   * bits.  Returns a value of 0 of no such sequence exists.
   */
-sparsebit_idx_t sparsebit_next_set_num(struct sparsebit *s,
+sparsebit_idx_t sparsebit_next_set_num(const struct sparsebit *s,
         sparsebit_idx_t start, sparsebit_num_t num)
  {
         sparsebit_idx_t idx;
@@ -1335,7 +1335,7 @@ sparsebit_idx_t sparsebit_next_set_num(struct sparsebit *s,
   * and returns the index of the first sequence of num consecutively cleared
   * bits.  Returns a value of 0 of no such sequence exists.
   */
-sparsebit_idx_t sparsebit_next_clear_num(struct sparsebit *s,
+sparsebit_idx_t sparsebit_next_clear_num(const struct sparsebit *s,
         sparsebit_idx_t start, sparsebit_num_t num)
  {
         sparsebit_idx_t idx;
@@ -1583,7 +1583,7 @@ static size_t display_range(FILE *stream, sparsebit_idx_t low,
   * contiguous bits.  This is done because '-' is used to specify command-line
   * options, and sometimes ranges are specified as command-line arguments.
   */
-void sparsebit_dump(FILE *stream, struct sparsebit *s,
+void sparsebit_dump(FILE *stream, const struct sparsebit *s,
         unsigned int indent)
  {
         size_t current_line_len = 0;
@@ -1681,7 +1681,7 @@ void sparsebit_dump(FILE *stream, struct sparsebit *s,
   * s.  On error, diagnostic information is printed to stderr and
   * abort is called.
   */
-void sparsebit_validate_internal(struct sparsebit *s)
+void sparsebit_validate_internal(const struct sparsebit *s)
  {
         bool error_detected = false;
         struct node *nodep, *prev = NULL;
diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c

index 5d7f28b02d73bab79891b808bb46c095c52bd505..5a8f8becb12984ff52a16f01281b5d396ce318be 100644 (file)
--- a/tools/testing/selftests/kvm/lib/test_util.c
+++ b/tools/testing/selftests/kvm/lib/test_util.c
@@ -392,3 +392,28 @@ char *strdup_printf(const char *fmt, ...)
  
         return str;
  }
+
+#define CLOCKSOURCE_PATH "/sys/devices/system/clocksource/clocksource0/current_clocksource"
+
+char *sys_get_cur_clocksource(void)
+{
+       char *clk_name;
+       struct stat st;
+       FILE *fp;
+
+       fp = fopen(CLOCKSOURCE_PATH, "r");
+       TEST_ASSERT(fp, "failed to open clocksource file, errno: %d", errno);
+
+       TEST_ASSERT(!fstat(fileno(fp), &st), "failed to stat clocksource file, errno: %d",
+                   errno);
+
+       clk_name = malloc(st.st_size);
+       TEST_ASSERT(clk_name, "failed to allocate buffer to read file");
+
+       TEST_ASSERT(fgets(clk_name, st.st_size, fp), "failed to read clocksource file: %d",
+                   ferror(fp));
+
+       fclose(fp);
+
+       return clk_name;
+}
diff --git a/tools/testing/selftests/kvm/lib/ucall_common.c b/tools/testing/selftests/kvm/lib/ucall_common.c

index 816a3fa109bfb6b608eadeda9f0827687ebbe1b5..f5af65a41c296e77f1502f32a8813fcf77dbdf20 100644 (file)
--- a/tools/testing/selftests/kvm/lib/ucall_common.c
+++ b/tools/testing/selftests/kvm/lib/ucall_common.c
@@ -29,7 +29,8 @@ void ucall_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
         vm_vaddr_t vaddr;
         int i;
  
-       vaddr = __vm_vaddr_alloc(vm, sizeof(*hdr), KVM_UTIL_MIN_VADDR, MEM_REGION_DATA);
+       vaddr = vm_vaddr_alloc_shared(vm, sizeof(*hdr), KVM_UTIL_MIN_VADDR,
+                                     MEM_REGION_DATA);
         hdr = (struct ucall_header *)addr_gva2hva(vm, vaddr);
         memset(hdr, 0, sizeof(*hdr));
  
diff --git a/tools/testing/selftests/kvm/lib/userfaultfd_util.c b/tools/testing/selftests/kvm/lib/userfaultfd_util.c

index 271f6389158122a973fa597d68ee147f7169374c..f4eef6eb2dc2cc2bdefc90cbe5ac845be3a18b53 100644 (file)
--- a/tools/testing/selftests/kvm/lib/userfaultfd_util.c
+++ b/tools/testing/selftests/kvm/lib/userfaultfd_util.c
@@ -69,7 +69,7 @@ static void *uffd_handler_thread_fn(void *arg)
                 if (pollfd[1].revents & POLLIN) {
                         r = read(pollfd[1].fd, &tmp_chr, 1);
                         TEST_ASSERT(r == 1,
-                                   "Error reading pipefd in UFFD thread\n");
+                                   "Error reading pipefd in UFFD thread");
                         break;
                 }
  
diff --git a/tools/testing/selftests/kvm/lib/x86_64/pmu.c b/tools/testing/selftests/kvm/lib/x86_64/pmu.c

new file mode 100644 (file)

index 0000000..f31f042
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/x86_64/pmu.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2023, Tencent, Inc.
+ */
+
+#include <stdint.h>
+
+#include <linux/kernel.h>
+
+#include "kvm_util.h"
+#include "pmu.h"
+
+const uint64_t intel_pmu_arch_events[] = {
+       INTEL_ARCH_CPU_CYCLES,
+       INTEL_ARCH_INSTRUCTIONS_RETIRED,
+       INTEL_ARCH_REFERENCE_CYCLES,
+       INTEL_ARCH_LLC_REFERENCES,
+       INTEL_ARCH_LLC_MISSES,
+       INTEL_ARCH_BRANCHES_RETIRED,
+       INTEL_ARCH_BRANCHES_MISPREDICTED,
+       INTEL_ARCH_TOPDOWN_SLOTS,
+};
+kvm_static_assert(ARRAY_SIZE(intel_pmu_arch_events) == NR_INTEL_ARCH_EVENTS);
+
+const uint64_t amd_pmu_zen_events[] = {
+       AMD_ZEN_CORE_CYCLES,
+       AMD_ZEN_INSTRUCTIONS_RETIRED,
+       AMD_ZEN_BRANCHES_RETIRED,
+       AMD_ZEN_BRANCHES_MISPREDICTED,
+};
+kvm_static_assert(ARRAY_SIZE(amd_pmu_zen_events) == NR_AMD_ZEN_EVENTS);
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c

index d8288374078e4b3ce888bed569c7e59192d43e7c..74a4c736c9ae1e9849c22199af984dc75872d9ef 100644 (file)
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -9,6 +9,7 @@
  #include "test_util.h"
  #include "kvm_util.h"
  #include "processor.h"
+#include "sev.h"
  
  #ifndef NUM_INTERRUPTS
  #define NUM_INTERRUPTS 256
@@ -157,6 +158,8 @@ static uint64_t *virt_create_upper_pte(struct kvm_vm *vm,
  {
         uint64_t *pte = virt_get_pte(vm, parent_pte, vaddr, current_level);
  
+       paddr = vm_untag_gpa(vm, paddr);
+
         if (!(*pte & PTE_PRESENT_MASK)) {
                 *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK;
                 if (current_level == target_level)
@@ -170,10 +173,10 @@ static uint64_t *virt_create_upper_pte(struct kvm_vm *vm,
                  * this level.
                  */
                 TEST_ASSERT(current_level != target_level,
-                           "Cannot create hugepage at level: %u, vaddr: 0x%lx\n",
+                           "Cannot create hugepage at level: %u, vaddr: 0x%lx",
                             current_level, vaddr);
                 TEST_ASSERT(!(*pte & PTE_LARGE_MASK),
-                           "Cannot create page table at level: %u, vaddr: 0x%lx\n",
+                           "Cannot create page table at level: %u, vaddr: 0x%lx",
                             current_level, vaddr);
         }
         return pte;
@@ -200,6 +203,8 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level)
                     "Physical address beyond maximum supported,\n"
                     "  paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
                     paddr, vm->max_gfn, vm->page_size);
+       TEST_ASSERT(vm_untag_gpa(vm, paddr) == paddr,
+                   "Unexpected bits in paddr: %lx", paddr);
  
         /*
          * Allocate upper level page tables, if not already present.  Return
@@ -220,8 +225,17 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level)
         /* Fill in page table entry. */
         pte = virt_get_pte(vm, pde, vaddr, PG_LEVEL_4K);
         TEST_ASSERT(!(*pte & PTE_PRESENT_MASK),
-                   "PTE already present for 4k page at vaddr: 0x%lx\n", vaddr);
+                   "PTE already present for 4k page at vaddr: 0x%lx", vaddr);
         *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK | (paddr & PHYSICAL_PAGE_MASK);
+
+       /*
+        * Neither SEV nor TDX supports shared page tables, so only the final
+        * leaf PTE needs manually set the C/S-bit.
+        */
+       if (vm_is_gpa_protected(vm, paddr))
+               *pte |= vm->arch.c_bit;
+       else
+               *pte |= vm->arch.s_bit;
  }
  
  void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
@@ -253,7 +267,7 @@ static bool vm_is_target_pte(uint64_t *pte, int *level, int current_level)
         if (*pte & PTE_LARGE_MASK) {
                 TEST_ASSERT(*level == PG_LEVEL_NONE ||
                             *level == current_level,
-                           "Unexpected hugepage at level %d\n", current_level);
+                           "Unexpected hugepage at level %d", current_level);
                 *level = current_level;
         }
  
@@ -265,6 +279,9 @@ uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr,
  {
         uint64_t *pml4e, *pdpe, *pde;
  
+       TEST_ASSERT(!vm->arch.is_pt_protected,
+                   "Walking page tables of protected guests is impossible");
+
         TEST_ASSERT(*level >= PG_LEVEL_NONE && *level < PG_LEVEL_NUM,
                     "Invalid PG_LEVEL_* '%d'", *level);
  
@@ -496,7 +513,7 @@ vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
          * No need for a hugepage mask on the PTE, x86-64 requires the "unused"
          * address bits to be zero.
          */
-       return PTE_GET_PA(*pte) | (gva & ~HUGEPAGE_MASK(level));
+       return vm_untag_gpa(vm, PTE_GET_PA(*pte)) | (gva & ~HUGEPAGE_MASK(level));
  }
  
  static void kvm_setup_gdt(struct kvm_vm *vm, struct kvm_dtable *dt)
@@ -560,10 +577,23 @@ void kvm_arch_vm_post_create(struct kvm_vm *vm)
         vm_create_irqchip(vm);
         sync_global_to_guest(vm, host_cpu_is_intel);
         sync_global_to_guest(vm, host_cpu_is_amd);
+
+       if (vm->subtype == VM_SUBTYPE_SEV)
+               sev_vm_init(vm);
+       else if (vm->subtype == VM_SUBTYPE_SEV_ES)
+               sev_es_vm_init(vm);
+}
+
+void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code)
+{
+       struct kvm_regs regs;
+
+       vcpu_regs_get(vcpu, &regs);
+       regs.rip = (unsigned long) guest_code;
+       vcpu_regs_set(vcpu, &regs);
  }
  
-struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
-                                 void *guest_code)
+struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
  {
         struct kvm_mp_state mp_state;
         struct kvm_regs regs;
@@ -597,7 +627,6 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
         vcpu_regs_get(vcpu, &regs);
         regs.rflags = regs.rflags | 0x2;
         regs.rsp = stack_vaddr;
-       regs.rip = (unsigned long) guest_code;
         vcpu_regs_set(vcpu, &regs);
  
         /* Setup the MP state */
@@ -752,12 +781,21 @@ void vcpu_init_cpuid(struct kvm_vcpu *vcpu, const struct kvm_cpuid2 *cpuid)
         vcpu_set_cpuid(vcpu);
  }
  
-void vcpu_set_cpuid_maxphyaddr(struct kvm_vcpu *vcpu, uint8_t maxphyaddr)
+void vcpu_set_cpuid_property(struct kvm_vcpu *vcpu,
+                            struct kvm_x86_cpu_property property,
+                            uint32_t value)
  {
-       struct kvm_cpuid_entry2 *entry = vcpu_get_cpuid_entry(vcpu, 0x80000008);
+       struct kvm_cpuid_entry2 *entry;
+
+       entry = __vcpu_get_cpuid_entry(vcpu, property.function, property.index);
+
+       (&entry->eax)[property.reg] &= ~GENMASK(property.hi_bit, property.lo_bit);
+       (&entry->eax)[property.reg] |= value << property.lo_bit;
  
-       entry->eax = (entry->eax & ~0xff) | maxphyaddr;
         vcpu_set_cpuid(vcpu);
+
+       /* Sanity check that @value doesn't exceed the bounds in any way. */
+       TEST_ASSERT_EQ(kvm_cpuid_property(vcpu->cpuid, property), value);
  }
  
  void vcpu_clear_cpuid_entry(struct kvm_vcpu *vcpu, uint32_t function)
@@ -825,7 +863,7 @@ void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...)
         struct kvm_regs regs;
  
         TEST_ASSERT(num >= 1 && num <= 6, "Unsupported number of args,\n"
-                   "  num: %u\n",
+                   "  num: %u",
                     num);
  
         va_start(ap, num);
@@ -1041,6 +1079,14 @@ void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits)
         }
  }
  
+void kvm_init_vm_address_properties(struct kvm_vm *vm)
+{
+       if (vm->subtype == VM_SUBTYPE_SEV || vm->subtype == VM_SUBTYPE_SEV_ES) {
+               vm->arch.c_bit = BIT_ULL(this_cpu_property(X86_PROPERTY_SEV_C_BIT));
+               vm->gpa_tag_mask = vm->arch.c_bit;
+       }
+}
+
  static void set_idt_entry(struct kvm_vm *vm, int vector, unsigned long addr,
                           int dpl, unsigned short selector)
  {
@@ -1299,3 +1345,14 @@ void kvm_selftest_arch_init(void)
         host_cpu_is_intel = this_cpu_is_intel();
         host_cpu_is_amd = this_cpu_is_amd();
  }
+
+bool sys_clocksource_is_based_on_tsc(void)
+{
+       char *clk_name = sys_get_cur_clocksource();
+       bool ret = !strcmp(clk_name, "tsc\n") ||
+                  !strcmp(clk_name, "hyperv_clocksource_tsc_page\n");
+
+       free(clk_name);
+
+       return ret;
+}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/sev.c b/tools/testing/selftests/kvm/lib/x86_64/sev.c

new file mode 100644 (file)

index 0000000..e248d33
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/x86_64/sev.c
@@ -0,0 +1,114 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <stdint.h>
+#include <stdbool.h>
+
+#include "sev.h"
+
+/*
+ * sparsebit_next_clear() can return 0 if [x, 2**64-1] are all set, and the
+ * -1 would then cause an underflow back to 2**64 - 1. This is expected and
+ * correct.
+ *
+ * If the last range in the sparsebit is [x, y] and we try to iterate,
+ * sparsebit_next_set() will return 0, and sparsebit_next_clear() will try
+ * and find the first range, but that's correct because the condition
+ * expression would cause us to quit the loop.
+ */
+static void encrypt_region(struct kvm_vm *vm, struct userspace_mem_region *region)
+{
+       const struct sparsebit *protected_phy_pages = region->protected_phy_pages;
+       const vm_paddr_t gpa_base = region->region.guest_phys_addr;
+       const sparsebit_idx_t lowest_page_in_region = gpa_base >> vm->page_shift;
+       sparsebit_idx_t i, j;
+
+       if (!sparsebit_any_set(protected_phy_pages))
+               return;
+
+       sev_register_encrypted_memory(vm, region);
+
+       sparsebit_for_each_set_range(protected_phy_pages, i, j) {
+               const uint64_t size = (j - i + 1) * vm->page_size;
+               const uint64_t offset = (i - lowest_page_in_region) * vm->page_size;
+
+               sev_launch_update_data(vm, gpa_base + offset, size);
+       }
+}
+
+void sev_vm_launch(struct kvm_vm *vm, uint32_t policy)
+{
+       struct kvm_sev_launch_start launch_start = {
+               .policy = policy,
+       };
+       struct userspace_mem_region *region;
+       struct kvm_sev_guest_status status;
+       int ctr;
+
+       vm_sev_ioctl(vm, KVM_SEV_LAUNCH_START, &launch_start);
+       vm_sev_ioctl(vm, KVM_SEV_GUEST_STATUS, &status);
+
+       TEST_ASSERT_EQ(status.policy, policy);
+       TEST_ASSERT_EQ(status.state, SEV_GUEST_STATE_LAUNCH_UPDATE);
+
+       hash_for_each(vm->regions.slot_hash, ctr, region, slot_node)
+               encrypt_region(vm, region);
+
+       if (policy & SEV_POLICY_ES)
+               vm_sev_ioctl(vm, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL);
+
+       vm->arch.is_pt_protected = true;
+}
+
+void sev_vm_launch_measure(struct kvm_vm *vm, uint8_t *measurement)
+{
+       struct kvm_sev_launch_measure launch_measure;
+       struct kvm_sev_guest_status guest_status;
+
+       launch_measure.len = 256;
+       launch_measure.uaddr = (__u64)measurement;
+       vm_sev_ioctl(vm, KVM_SEV_LAUNCH_MEASURE, &launch_measure);
+
+       vm_sev_ioctl(vm, KVM_SEV_GUEST_STATUS, &guest_status);
+       TEST_ASSERT_EQ(guest_status.state, SEV_GUEST_STATE_LAUNCH_SECRET);
+}
+
+void sev_vm_launch_finish(struct kvm_vm *vm)
+{
+       struct kvm_sev_guest_status status;
+
+       vm_sev_ioctl(vm, KVM_SEV_GUEST_STATUS, &status);
+       TEST_ASSERT(status.state == SEV_GUEST_STATE_LAUNCH_UPDATE ||
+                   status.state == SEV_GUEST_STATE_LAUNCH_SECRET,
+                   "Unexpected guest state: %d", status.state);
+
+       vm_sev_ioctl(vm, KVM_SEV_LAUNCH_FINISH, NULL);
+
+       vm_sev_ioctl(vm, KVM_SEV_GUEST_STATUS, &status);
+       TEST_ASSERT_EQ(status.state, SEV_GUEST_STATE_RUNNING);
+}
+
+struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t policy, void *guest_code,
+                                          struct kvm_vcpu **cpu)
+{
+       struct vm_shape shape = {
+               .type = VM_TYPE_DEFAULT,
+               .mode = VM_MODE_DEFAULT,
+               .subtype = policy & SEV_POLICY_ES ? VM_SUBTYPE_SEV_ES :
+                                                   VM_SUBTYPE_SEV,
+       };
+       struct kvm_vm *vm;
+       struct kvm_vcpu *cpus[1];
+       uint8_t measurement[512];
+
+       vm = __vm_create_with_vcpus(shape, 1, 0, guest_code, cpus);
+       *cpu = cpus[0];
+
+       sev_vm_launch(vm, policy);
+
+       /* TODO: Validate the measurement is as expected. */
+       sev_vm_launch_measure(vm, measurement);
+
+       sev_vm_launch_finish(vm);
+
+       return vm;
+}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c

index 59d97531c9b17f7b0c1893d1aa5a63575e3fdb5c..089b8925b6b22d9929bee551435ebdca0fa24764 100644 (file)
--- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
@@ -54,7 +54,7 @@ int vcpu_enable_evmcs(struct kvm_vcpu *vcpu)
         /* KVM should return supported EVMCS version range */
         TEST_ASSERT(((evmcs_ver >> 8) >= (evmcs_ver & 0xff)) &&
                     (evmcs_ver & 0xff) > 0,
-                   "Incorrect EVMCS version range: %x:%x\n",
+                   "Incorrect EVMCS version range: %x:%x",
                     evmcs_ver & 0xff, evmcs_ver >> 8);
  
         return evmcs_ver;
@@ -387,10 +387,10 @@ static void nested_create_pte(struct kvm_vm *vm,
                  * this level.
                  */
                 TEST_ASSERT(current_level != target_level,
-                           "Cannot create hugepage at level: %u, nested_paddr: 0x%lx\n",
+                           "Cannot create hugepage at level: %u, nested_paddr: 0x%lx",
                             current_level, nested_paddr);
                 TEST_ASSERT(!pte->page_size,
-                           "Cannot create page table at level: %u, nested_paddr: 0x%lx\n",
+                           "Cannot create page table at level: %u, nested_paddr: 0x%lx",
                             current_level, nested_paddr);
         }
  }
diff --git a/tools/testing/selftests/kvm/memslot_modification_stress_test.c b/tools/testing/selftests/kvm/memslot_modification_stress_test.c

index 9855c41ca811fa69a77f41f212ddc6086d47467c..1563619666123fed1da83a2786d309c51cde25d3 100644 (file)
--- a/tools/testing/selftests/kvm/memslot_modification_stress_test.c
+++ b/tools/testing/selftests/kvm/memslot_modification_stress_test.c
@@ -45,7 +45,7 @@ static void vcpu_worker(struct memstress_vcpu_args *vcpu_args)
         /* Let the guest access its memory until a stop signal is received */
         while (!READ_ONCE(memstress_args.stop_vcpus)) {
                 ret = _vcpu_run(vcpu);
-               TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret);
+               TEST_ASSERT(ret == 0, "vcpu_run failed: %d", ret);
  
                 if (get_ucall(vcpu, NULL) == UCALL_SYNC)
                         continue;
diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c

index 8698d1ab60d00f72399571b62b49314f3a8c401f..579a64f97333b8d0ebd4533fdfdee1a172ef731a 100644 (file)
--- a/tools/testing/selftests/kvm/memslot_perf_test.c
+++ b/tools/testing/selftests/kvm/memslot_perf_test.c
@@ -175,11 +175,11 @@ static void wait_for_vcpu(void)
         struct timespec ts;
  
         TEST_ASSERT(!clock_gettime(CLOCK_REALTIME, &ts),
-                   "clock_gettime() failed: %d\n", errno);
+                   "clock_gettime() failed: %d", errno);
  
         ts.tv_sec += 2;
         TEST_ASSERT(!sem_timedwait(&vcpu_ready, &ts),
-                   "sem_timedwait() failed: %d\n", errno);
+                   "sem_timedwait() failed: %d", errno);
  }
  
  static void *vm_gpa2hva(struct vm_data *data, uint64_t gpa, uint64_t *rempages)
@@ -336,7 +336,7 @@ static bool prepare_vm(struct vm_data *data, int nslots, uint64_t *maxslots,
  
                 gpa = vm_phy_pages_alloc(data->vm, npages, guest_addr, slot);
                 TEST_ASSERT(gpa == guest_addr,
-                           "vm_phy_pages_alloc() failed\n");
+                           "vm_phy_pages_alloc() failed");
  
                 data->hva_slots[slot - 1] = addr_gpa2hva(data->vm, guest_addr);
                 memset(data->hva_slots[slot - 1], 0, npages * guest_page_size);
diff --git a/tools/testing/selftests/kvm/riscv/arch_timer.c b/tools/testing/selftests/kvm/riscv/arch_timer.c

new file mode 100644 (file)

index 0000000..e22848f
--- /dev/null
+++ b/tools/testing/selftests/kvm/riscv/arch_timer.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * arch_timer.c - Tests the riscv64 sstc timer IRQ functionality
+ *
+ * The test validates the sstc timer IRQs using vstimecmp registers.
+ * It's ported from the aarch64 arch_timer test.
+ *
+ * Copyright (c) 2024, Intel Corporation.
+ */
+
+#define _GNU_SOURCE
+
+#include "arch_timer.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "timer_test.h"
+
+static int timer_irq = IRQ_S_TIMER;
+
+static void guest_irq_handler(struct ex_regs *regs)
+{
+       uint64_t xcnt, xcnt_diff_us, cmp;
+       unsigned int intid = regs->cause & ~CAUSE_IRQ_FLAG;
+       uint32_t cpu = guest_get_vcpuid();
+       struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu];
+
+       timer_irq_disable();
+
+       xcnt = timer_get_cycles();
+       cmp = timer_get_cmp();
+       xcnt_diff_us = cycles_to_usec(xcnt - shared_data->xcnt);
+
+       /* Make sure we are dealing with the correct timer IRQ */
+       GUEST_ASSERT_EQ(intid, timer_irq);
+
+       __GUEST_ASSERT(xcnt >= cmp,
+                       "xcnt = 0x%"PRIx64", cmp = 0x%"PRIx64", xcnt_diff_us = 0x%" PRIx64,
+                       xcnt, cmp, xcnt_diff_us);
+
+       WRITE_ONCE(shared_data->nr_iter, shared_data->nr_iter + 1);
+}
+
+static void guest_run(struct test_vcpu_shared_data *shared_data)
+{
+       uint32_t irq_iter, config_iter;
+
+       shared_data->nr_iter = 0;
+       shared_data->guest_stage = 0;
+
+       for (config_iter = 0; config_iter < test_args.nr_iter; config_iter++) {
+               /* Setup the next interrupt */
+               timer_set_next_cmp_ms(test_args.timer_period_ms);
+               shared_data->xcnt = timer_get_cycles();
+               timer_irq_enable();
+
+               /* Setup a timeout for the interrupt to arrive */
+               udelay(msecs_to_usecs(test_args.timer_period_ms) +
+                       test_args.timer_err_margin_us);
+
+               irq_iter = READ_ONCE(shared_data->nr_iter);
+               __GUEST_ASSERT(config_iter + 1 == irq_iter,
+                               "config_iter + 1 = 0x%x, irq_iter = 0x%x.\n"
+                               "  Guest timer interrupt was not trigged within the specified\n"
+                               "  interval, try to increase the error margin by [-e] option.\n",
+                               config_iter + 1, irq_iter);
+       }
+}
+
+static void guest_code(void)
+{
+       uint32_t cpu = guest_get_vcpuid();
+       struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu];
+
+       timer_irq_disable();
+       local_irq_enable();
+
+       guest_run(shared_data);
+
+       GUEST_DONE();
+}
+
+struct kvm_vm *test_vm_create(void)
+{
+       struct kvm_vm *vm;
+       int nr_vcpus = test_args.nr_vcpus;
+
+       vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus);
+       __TEST_REQUIRE(__vcpu_has_ext(vcpus[0], RISCV_ISA_EXT_REG(KVM_RISCV_ISA_EXT_SSTC)),
+                                  "SSTC not available, skipping test\n");
+
+       vm_init_vector_tables(vm);
+       vm_install_interrupt_handler(vm, guest_irq_handler);
+
+       for (int i = 0; i < nr_vcpus; i++)
+               vcpu_init_vector_tables(vcpus[i]);
+
+       /* Initialize guest timer frequency. */
+       vcpu_get_reg(vcpus[0], RISCV_TIMER_REG(frequency), &timer_freq);
+       sync_global_to_guest(vm, timer_freq);
+       pr_debug("timer_freq: %lu\n", timer_freq);
+
+       /* Make all the test's cmdline args visible to the guest */
+       sync_global_to_guest(vm, test_args);
+
+       return vm;
+}
+
+void test_vm_cleanup(struct kvm_vm *vm)
+{
+       kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/riscv/get-reg-list.c b/tools/testing/selftests/kvm/riscv/get-reg-list.c

index 6652108816db462160230a17c4da32fa078526dc..b882b7b9b78506b04d60a2e6870e036272584f87 100644 (file)
--- a/tools/testing/selftests/kvm/riscv/get-reg-list.c
+++ b/tools/testing/selftests/kvm/riscv/get-reg-list.c
@@ -47,17 +47,46 @@ bool filter_reg(__u64 reg)
         case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVINVAL:
         case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVNAPOT:
         case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVPBMT:
+       case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZACAS:
         case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBA:
         case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBB:
+       case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBC:
+       case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBKB:
+       case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBKC:
+       case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBKX:
         case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBS:
+       case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZFA:
+       case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZFH:
+       case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZFHMIN:
         case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOM:
         case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOZ:
         case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICNTR:
         case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICOND:
         case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICSR:
         case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZIFENCEI:
+       case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZIHINTNTL:
         case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZIHINTPAUSE:
         case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZIHPM:
+       case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKND:
+       case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKNE:
+       case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKNH:
+       case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKR:
+       case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKSED:
+       case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKSH:
+       case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKT:
+       case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZTSO:
+       case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVBB:
+       case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVBC:
+       case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVFH:
+       case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVFHMIN:
+       case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVKB:
+       case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVKG:
+       case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVKNED:
+       case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVKNHA:
+       case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVKNHB:
+       case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVKSED:
+       case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVKSH:
+       case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVKT:
         /*
          * Like ISA_EXT registers, SBI_EXT registers are only visible when the
          * host supports them and disabling them does not affect the visibility
@@ -96,15 +125,6 @@ bool check_reject_set(int err)
         return err == EINVAL;
  }
  
-static bool vcpu_has_ext(struct kvm_vcpu *vcpu, uint64_t ext_id)
-{
-       int ret;
-       unsigned long value;
-
-       ret = __vcpu_get_reg(vcpu, ext_id, &value);
-       return (ret) ? false : !!value;
-}
-
  void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c)
  {
         unsigned long isa_ext_state[KVM_RISCV_ISA_EXT_MAX] = { 0 };
@@ -149,8 +169,8 @@ void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c)
                 __vcpu_set_reg(vcpu, feature, 1);
  
                 /* Double check whether the desired extension was enabled */
-               __TEST_REQUIRE(vcpu_has_ext(vcpu, feature),
-                              "%s not available, skipping tests\n", s->name);
+               __TEST_REQUIRE(__vcpu_has_ext(vcpu, feature),
+                              "%s not available, skipping tests", s->name);
         }
  }
  
@@ -392,17 +412,46 @@ static const char *isa_ext_single_id_to_str(__u64 reg_off)
                 KVM_ISA_EXT_ARR(SVINVAL),
                 KVM_ISA_EXT_ARR(SVNAPOT),
                 KVM_ISA_EXT_ARR(SVPBMT),
+               KVM_ISA_EXT_ARR(ZACAS),
                 KVM_ISA_EXT_ARR(ZBA),
                 KVM_ISA_EXT_ARR(ZBB),
+               KVM_ISA_EXT_ARR(ZBC),
+               KVM_ISA_EXT_ARR(ZBKB),
+               KVM_ISA_EXT_ARR(ZBKC),
+               KVM_ISA_EXT_ARR(ZBKX),
                 KVM_ISA_EXT_ARR(ZBS),
+               KVM_ISA_EXT_ARR(ZFA),
+               KVM_ISA_EXT_ARR(ZFH),
+               KVM_ISA_EXT_ARR(ZFHMIN),
                 KVM_ISA_EXT_ARR(ZICBOM),
                 KVM_ISA_EXT_ARR(ZICBOZ),
                 KVM_ISA_EXT_ARR(ZICNTR),
                 KVM_ISA_EXT_ARR(ZICOND),
                 KVM_ISA_EXT_ARR(ZICSR),
                 KVM_ISA_EXT_ARR(ZIFENCEI),
+               KVM_ISA_EXT_ARR(ZIHINTNTL),
                 KVM_ISA_EXT_ARR(ZIHINTPAUSE),
                 KVM_ISA_EXT_ARR(ZIHPM),
+               KVM_ISA_EXT_ARR(ZKND),
+               KVM_ISA_EXT_ARR(ZKNE),
+               KVM_ISA_EXT_ARR(ZKNH),
+               KVM_ISA_EXT_ARR(ZKR),
+               KVM_ISA_EXT_ARR(ZKSED),
+               KVM_ISA_EXT_ARR(ZKSH),
+               KVM_ISA_EXT_ARR(ZKT),
+               KVM_ISA_EXT_ARR(ZTSO),
+               KVM_ISA_EXT_ARR(ZVBB),
+               KVM_ISA_EXT_ARR(ZVBC),
+               KVM_ISA_EXT_ARR(ZVFH),
+               KVM_ISA_EXT_ARR(ZVFHMIN),
+               KVM_ISA_EXT_ARR(ZVKB),
+               KVM_ISA_EXT_ARR(ZVKG),
+               KVM_ISA_EXT_ARR(ZVKNED),
+               KVM_ISA_EXT_ARR(ZVKNHA),
+               KVM_ISA_EXT_ARR(ZVKNHB),
+               KVM_ISA_EXT_ARR(ZVKSED),
+               KVM_ISA_EXT_ARR(ZVKSH),
+               KVM_ISA_EXT_ARR(ZVKT),
         };
  
         if (reg_off >= ARRAY_SIZE(kvm_isa_ext_reg_name))
@@ -886,17 +935,46 @@ KVM_ISA_EXT_SIMPLE_CONFIG(sstc, SSTC);
  KVM_ISA_EXT_SIMPLE_CONFIG(svinval, SVINVAL);
  KVM_ISA_EXT_SIMPLE_CONFIG(svnapot, SVNAPOT);
  KVM_ISA_EXT_SIMPLE_CONFIG(svpbmt, SVPBMT);
+KVM_ISA_EXT_SIMPLE_CONFIG(zacas, ZACAS);
  KVM_ISA_EXT_SIMPLE_CONFIG(zba, ZBA);
  KVM_ISA_EXT_SIMPLE_CONFIG(zbb, ZBB);
+KVM_ISA_EXT_SIMPLE_CONFIG(zbc, ZBC);
+KVM_ISA_EXT_SIMPLE_CONFIG(zbkb, ZBKB);
+KVM_ISA_EXT_SIMPLE_CONFIG(zbkc, ZBKC);
+KVM_ISA_EXT_SIMPLE_CONFIG(zbkx, ZBKX);
  KVM_ISA_EXT_SIMPLE_CONFIG(zbs, ZBS);
+KVM_ISA_EXT_SIMPLE_CONFIG(zfa, ZFA);
+KVM_ISA_EXT_SIMPLE_CONFIG(zfh, ZFH);
+KVM_ISA_EXT_SIMPLE_CONFIG(zfhmin, ZFHMIN);
  KVM_ISA_EXT_SUBLIST_CONFIG(zicbom, ZICBOM);
  KVM_ISA_EXT_SUBLIST_CONFIG(zicboz, ZICBOZ);
  KVM_ISA_EXT_SIMPLE_CONFIG(zicntr, ZICNTR);
  KVM_ISA_EXT_SIMPLE_CONFIG(zicond, ZICOND);
  KVM_ISA_EXT_SIMPLE_CONFIG(zicsr, ZICSR);
  KVM_ISA_EXT_SIMPLE_CONFIG(zifencei, ZIFENCEI);
+KVM_ISA_EXT_SIMPLE_CONFIG(zihintntl, ZIHINTNTL);
  KVM_ISA_EXT_SIMPLE_CONFIG(zihintpause, ZIHINTPAUSE);
  KVM_ISA_EXT_SIMPLE_CONFIG(zihpm, ZIHPM);
+KVM_ISA_EXT_SIMPLE_CONFIG(zknd, ZKND);
+KVM_ISA_EXT_SIMPLE_CONFIG(zkne, ZKNE);
+KVM_ISA_EXT_SIMPLE_CONFIG(zknh, ZKNH);
+KVM_ISA_EXT_SIMPLE_CONFIG(zkr, ZKR);
+KVM_ISA_EXT_SIMPLE_CONFIG(zksed, ZKSED);
+KVM_ISA_EXT_SIMPLE_CONFIG(zksh, ZKSH);
+KVM_ISA_EXT_SIMPLE_CONFIG(zkt, ZKT);
+KVM_ISA_EXT_SIMPLE_CONFIG(ztso, ZTSO);
+KVM_ISA_EXT_SIMPLE_CONFIG(zvbb, ZVBB);
+KVM_ISA_EXT_SIMPLE_CONFIG(zvbc, ZVBC);
+KVM_ISA_EXT_SIMPLE_CONFIG(zvfh, ZVFH);
+KVM_ISA_EXT_SIMPLE_CONFIG(zvfhmin, ZVFHMIN);
+KVM_ISA_EXT_SIMPLE_CONFIG(zvkb, ZVKB);
+KVM_ISA_EXT_SIMPLE_CONFIG(zvkg, ZVKG);
+KVM_ISA_EXT_SIMPLE_CONFIG(zvkned, ZVKNED);
+KVM_ISA_EXT_SIMPLE_CONFIG(zvknha, ZVKNHA);
+KVM_ISA_EXT_SIMPLE_CONFIG(zvknhb, ZVKNHB);
+KVM_ISA_EXT_SIMPLE_CONFIG(zvksed, ZVKSED);
+KVM_ISA_EXT_SIMPLE_CONFIG(zvksh, ZVKSH);
+KVM_ISA_EXT_SIMPLE_CONFIG(zvkt, ZVKT);
  
  struct vcpu_reg_list *vcpu_configs[] = {
         &config_sbi_base,
@@ -912,16 +990,45 @@ struct vcpu_reg_list *vcpu_configs[] = {
         &config_svinval,
         &config_svnapot,
         &config_svpbmt,
+       &config_zacas,
         &config_zba,
         &config_zbb,
+       &config_zbc,
+       &config_zbkb,
+       &config_zbkc,
+       &config_zbkx,
         &config_zbs,
+       &config_zfa,
+       &config_zfh,
+       &config_zfhmin,
         &config_zicbom,
         &config_zicboz,
         &config_zicntr,
         &config_zicond,
         &config_zicsr,
         &config_zifencei,
+       &config_zihintntl,
         &config_zihintpause,
         &config_zihpm,
+       &config_zknd,
+       &config_zkne,
+       &config_zknh,
+       &config_zkr,
+       &config_zksed,
+       &config_zksh,
+       &config_zkt,
+       &config_ztso,
+       &config_zvbb,
+       &config_zvbc,
+       &config_zvfh,
+       &config_zvfhmin,
+       &config_zvkb,
+       &config_zvkg,
+       &config_zvkned,
+       &config_zvknha,
+       &config_zvknhb,
+       &config_zvksed,
+       &config_zvksh,
+       &config_zvkt,
  };
  int vcpu_configs_n = ARRAY_SIZE(vcpu_configs);
diff --git a/tools/testing/selftests/kvm/rseq_test.c b/tools/testing/selftests/kvm/rseq_test.c

index f74e76d03b7e306667221ee9c5411bda2af417d5..28f97fb520441476c374dffc0bcf24bf6553a021 100644 (file)
--- a/tools/testing/selftests/kvm/rseq_test.c
+++ b/tools/testing/selftests/kvm/rseq_test.c
@@ -245,7 +245,7 @@ int main(int argc, char *argv[])
                 } while (snapshot != atomic_read(&seq_cnt));
  
                 TEST_ASSERT(rseq_cpu == cpu,
-                           "rseq CPU = %d, sched CPU = %d\n", rseq_cpu, cpu);
+                           "rseq CPU = %d, sched CPU = %d", rseq_cpu, cpu);
         }
  
         /*
@@ -256,7 +256,7 @@ int main(int argc, char *argv[])
          * migrations given the 1us+ delay in the migration task.
          */
         TEST_ASSERT(i > (NR_TASK_MIGRATIONS / 2),
-                   "Only performed %d KVM_RUNs, task stalled too much?\n", i);
+                   "Only performed %d KVM_RUNs, task stalled too much?", i);
  
         pthread_join(migration_thread, NULL);
  
diff --git a/tools/testing/selftests/kvm/s390x/resets.c b/tools/testing/selftests/kvm/s390x/resets.c

index e41e2cb8ffa9797c470fb061a34fb106b687a47d..357943f2bea87fff66384bcf381cf9100a5a3a6b 100644 (file)
--- a/tools/testing/selftests/kvm/s390x/resets.c
+++ b/tools/testing/selftests/kvm/s390x/resets.c
@@ -78,7 +78,7 @@ static void assert_noirq(struct kvm_vcpu *vcpu)
          * (notably, the emergency call interrupt we have injected) should
          * be cleared by the resets, so this should be 0.
          */
-       TEST_ASSERT(irqs >= 0, "Could not fetch IRQs: errno %d\n", errno);
+       TEST_ASSERT(irqs >= 0, "Could not fetch IRQs: errno %d", errno);
         TEST_ASSERT(!irqs, "IRQ pending");
  }
  
@@ -199,7 +199,7 @@ static void inject_irq(struct kvm_vcpu *vcpu)
         irq->type = KVM_S390_INT_EMERGENCY;
         irq->u.emerg.code = vcpu->id;
         irqs = __vcpu_ioctl(vcpu, KVM_S390_SET_IRQ_STATE, &irq_state);
-       TEST_ASSERT(irqs >= 0, "Error injecting EMERGENCY IRQ errno %d\n", errno);
+       TEST_ASSERT(irqs >= 0, "Error injecting EMERGENCY IRQ errno %d", errno);
  }
  
  static struct kvm_vm *create_vm(struct kvm_vcpu **vcpu)
diff --git a/tools/testing/selftests/kvm/s390x/sync_regs_test.c b/tools/testing/selftests/kvm/s390x/sync_regs_test.c

index 636a70ddac1ea36151cb57a0dfd74ddcca33de14..43fb25ddc3eca3a83ba14c1905972243c4bda793 100644 (file)
--- a/tools/testing/selftests/kvm/s390x/sync_regs_test.c
+++ b/tools/testing/selftests/kvm/s390x/sync_regs_test.c
@@ -39,13 +39,13 @@ static void guest_code(void)
  #define REG_COMPARE(reg) \
         TEST_ASSERT(left->reg == right->reg, \
                     "Register " #reg \
-                   " values did not match: 0x%llx, 0x%llx\n", \
+                   " values did not match: 0x%llx, 0x%llx", \
                     left->reg, right->reg)
  
  #define REG_COMPARE32(reg) \
         TEST_ASSERT(left->reg == right->reg, \
                     "Register " #reg \
-                   " values did not match: 0x%x, 0x%x\n", \
+                   " values did not match: 0x%x, 0x%x", \
                     left->reg, right->reg)
  
  
@@ -82,14 +82,14 @@ void test_read_invalid(struct kvm_vcpu *vcpu)
         run->kvm_valid_regs = INVALID_SYNC_FIELD;
         rv = _vcpu_run(vcpu);
         TEST_ASSERT(rv < 0 && errno == EINVAL,
-                   "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n",
+                   "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d",
                     rv);
         run->kvm_valid_regs = 0;
  
         run->kvm_valid_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
         rv = _vcpu_run(vcpu);
         TEST_ASSERT(rv < 0 && errno == EINVAL,
-                   "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n",
+                   "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d",
                     rv);
         run->kvm_valid_regs = 0;
  }
@@ -103,14 +103,14 @@ void test_set_invalid(struct kvm_vcpu *vcpu)
         run->kvm_dirty_regs = INVALID_SYNC_FIELD;
         rv = _vcpu_run(vcpu);
         TEST_ASSERT(rv < 0 && errno == EINVAL,
-                   "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n",
+                   "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d",
                     rv);
         run->kvm_dirty_regs = 0;
  
         run->kvm_dirty_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
         rv = _vcpu_run(vcpu);
         TEST_ASSERT(rv < 0 && errno == EINVAL,
-                   "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n",
+                   "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d",
                     rv);
         run->kvm_dirty_regs = 0;
  }
@@ -125,12 +125,12 @@ void test_req_and_verify_all_valid_regs(struct kvm_vcpu *vcpu)
         /* Request and verify all valid register sets. */
         run->kvm_valid_regs = TEST_SYNC_FIELDS;
         rv = _vcpu_run(vcpu);
-       TEST_ASSERT(rv == 0, "vcpu_run failed: %d\n", rv);
+       TEST_ASSERT(rv == 0, "vcpu_run failed: %d", rv);
         TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
         TEST_ASSERT(run->s390_sieic.icptcode == 4 &&
                     (run->s390_sieic.ipa >> 8) == 0x83 &&
                     (run->s390_sieic.ipb >> 16) == 0x501,
-                   "Unexpected interception code: ic=%u, ipa=0x%x, ipb=0x%x\n",
+                   "Unexpected interception code: ic=%u, ipa=0x%x, ipb=0x%x",
                     run->s390_sieic.icptcode, run->s390_sieic.ipa,
                     run->s390_sieic.ipb);
  
@@ -161,7 +161,7 @@ void test_set_and_verify_various_reg_values(struct kvm_vcpu *vcpu)
         }
  
         rv = _vcpu_run(vcpu);
-       TEST_ASSERT(rv == 0, "vcpu_run failed: %d\n", rv);
+       TEST_ASSERT(rv == 0, "vcpu_run failed: %d", rv);
         TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
         TEST_ASSERT(run->s.regs.gprs[11] == 0xBAD1DEA + 1,
                     "r11 sync regs value incorrect 0x%llx.",
@@ -193,7 +193,7 @@ void test_clear_kvm_dirty_regs_bits(struct kvm_vcpu *vcpu)
         run->s.regs.gprs[11] = 0xDEADBEEF;
         run->s.regs.diag318 = 0x4B1D;
         rv = _vcpu_run(vcpu);
-       TEST_ASSERT(rv == 0, "vcpu_run failed: %d\n", rv);
+       TEST_ASSERT(rv == 0, "vcpu_run failed: %d", rv);
         TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
         TEST_ASSERT(run->s.regs.gprs[11] != 0xDEADBEEF,
                     "r11 sync regs value incorrect 0x%llx.",
diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c

index 075b80dbe2370d2ff472685f4b02b4e1243d7123..06b43ed23580b67c060aeaadea11b06641a629c3 100644 (file)
--- a/tools/testing/selftests/kvm/set_memory_region_test.c
+++ b/tools/testing/selftests/kvm/set_memory_region_test.c
@@ -98,11 +98,11 @@ static void wait_for_vcpu(void)
         struct timespec ts;
  
         TEST_ASSERT(!clock_gettime(CLOCK_REALTIME, &ts),
-                   "clock_gettime() failed: %d\n", errno);
+                   "clock_gettime() failed: %d", errno);
  
         ts.tv_sec += 2;
         TEST_ASSERT(!sem_timedwait(&vcpu_ready, &ts),
-                   "sem_timedwait() failed: %d\n", errno);
+                   "sem_timedwait() failed: %d", errno);
  
         /* Wait for the vCPU thread to reenter the guest. */
         usleep(100000);
@@ -302,7 +302,7 @@ static void test_delete_memory_region(void)
         if (run->exit_reason == KVM_EXIT_INTERNAL_ERROR)
                 TEST_ASSERT(regs.rip >= final_rip_start &&
                             regs.rip < final_rip_end,
-                           "Bad rip, expected 0x%lx - 0x%lx, got 0x%llx\n",
+                           "Bad rip, expected 0x%lx - 0x%lx, got 0x%llx",
                             final_rip_start, final_rip_end, regs.rip);
  
         kvm_vm_free(vm);
@@ -367,11 +367,21 @@ static void test_invalid_memory_region_flags(void)
         }
  
         if (supported_flags & KVM_MEM_GUEST_MEMFD) {
+               int guest_memfd = vm_create_guest_memfd(vm, MEM_REGION_SIZE, 0);
+
                 r = __vm_set_user_memory_region2(vm, 0,
                                                  KVM_MEM_LOG_DIRTY_PAGES | KVM_MEM_GUEST_MEMFD,
-                                                0, MEM_REGION_SIZE, NULL, 0, 0);
+                                                0, MEM_REGION_SIZE, NULL, guest_memfd, 0);
                 TEST_ASSERT(r && errno == EINVAL,
                             "KVM_SET_USER_MEMORY_REGION2 should have failed, dirty logging private memory is unsupported");
+
+               r = __vm_set_user_memory_region2(vm, 0,
+                                                KVM_MEM_READONLY | KVM_MEM_GUEST_MEMFD,
+                                                0, MEM_REGION_SIZE, NULL, guest_memfd, 0);
+               TEST_ASSERT(r && errno == EINVAL,
+                           "KVM_SET_USER_MEMORY_REGION2 should have failed, read-only GUEST_MEMFD memslots are unsupported");
+
+               close(guest_memfd);
         }
  }
  
diff --git a/tools/testing/selftests/kvm/system_counter_offset_test.c b/tools/testing/selftests/kvm/system_counter_offset_test.c

index 7f5b330b6a1b182f7a5890d3f7b67b7d2535accc..513d421a9bff85a96e619f187310769de7e48490 100644 (file)
--- a/tools/testing/selftests/kvm/system_counter_offset_test.c
+++ b/tools/testing/selftests/kvm/system_counter_offset_test.c
@@ -108,7 +108,7 @@ static void enter_guest(struct kvm_vcpu *vcpu)
                         handle_abort(&uc);
                         return;
                 default:
-                       TEST_ASSERT(0, "unhandled ucall %ld\n",
+                       TEST_ASSERT(0, "unhandled ucall %ld",
                                     get_ucall(vcpu, &uc));
                 }
         }
diff --git a/tools/testing/selftests/kvm/x86_64/amx_test.c b/tools/testing/selftests/kvm/x86_64/amx_test.c

index 11329e5ff945eb1c383c7237ebc349712734dd50..eae521f050e09fd6f69e896c03cf381ac05c4d71 100644 (file)
--- a/tools/testing/selftests/kvm/x86_64/amx_test.c
+++ b/tools/testing/selftests/kvm/x86_64/amx_test.c
@@ -221,7 +221,7 @@ int main(int argc, char *argv[])
         vm_vaddr_t amx_cfg, tiledata, xstate;
         struct ucall uc;
         u32 amx_offset;
-       int stage, ret;
+       int ret;
  
         /*
          * Note, all off-by-default features must be enabled before anything
@@ -263,7 +263,7 @@ int main(int argc, char *argv[])
         memset(addr_gva2hva(vm, xstate), 0, PAGE_SIZE * DIV_ROUND_UP(XSAVE_SIZE, PAGE_SIZE));
         vcpu_args_set(vcpu, 3, amx_cfg, tiledata, xstate);
  
-       for (stage = 1; ; stage++) {
+       for (;;) {
                 vcpu_run(vcpu);
                 TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
  
@@ -296,7 +296,7 @@ int main(int argc, char *argv[])
                                 void *tiles_data = (void *)addr_gva2hva(vm, tiledata);
                                 /* Only check TMM0 register, 1 tile */
                                 ret = memcmp(amx_start, tiles_data, TILE_SIZE);
-                               TEST_ASSERT(ret == 0, "memcmp failed, ret=%d\n", ret);
+                               TEST_ASSERT(ret == 0, "memcmp failed, ret=%d", ret);
                                 kvm_x86_state_cleanup(state);
                                 break;
                         case 9:
diff --git a/tools/testing/selftests/kvm/x86_64/cpuid_test.c b/tools/testing/selftests/kvm/x86_64/cpuid_test.c

index 3b34d8156d1c97a879d497a12eefe4f110628973..8c579ce714e9a7ce3982123b089856c3b5963d43 100644 (file)
--- a/tools/testing/selftests/kvm/x86_64/cpuid_test.c
+++ b/tools/testing/selftests/kvm/x86_64/cpuid_test.c
@@ -84,7 +84,7 @@ static void compare_cpuids(const struct kvm_cpuid2 *cpuid1,
  
                 TEST_ASSERT(e1->function == e2->function &&
                             e1->index == e2->index && e1->flags == e2->flags,
-                           "CPUID entries[%d] mismtach: 0x%x.%d.%x vs. 0x%x.%d.%x\n",
+                           "CPUID entries[%d] mismtach: 0x%x.%d.%x vs. 0x%x.%d.%x",
                             i, e1->function, e1->index, e1->flags,
                             e2->function, e2->index, e2->flags);
  
@@ -170,7 +170,7 @@ static void test_get_cpuid2(struct kvm_vcpu *vcpu)
  
         vcpu_ioctl(vcpu, KVM_GET_CPUID2, cpuid);
         TEST_ASSERT(cpuid->nent == vcpu->cpuid->nent,
-                   "KVM didn't update nent on success, wanted %u, got %u\n",
+                   "KVM didn't update nent on success, wanted %u, got %u",
                     vcpu->cpuid->nent, cpuid->nent);
  
         for (i = 0; i < vcpu->cpuid->nent; i++) {
diff --git a/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c b/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c

index 634c6bfcd5720717e0d4cc93e9cdf92ae96fb1d0..ee3b384b991c8be2957bdcf56fa52aa6a4a00a26 100644 (file)
--- a/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c
+++ b/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c
@@ -92,7 +92,6 @@ static void run_test(enum vm_guest_mode mode, void *unused)
         uint64_t host_num_pages;
         uint64_t pages_per_slot;
         int i;
-       uint64_t total_4k_pages;
         struct kvm_page_stats stats_populated;
         struct kvm_page_stats stats_dirty_logging_enabled;
         struct kvm_page_stats stats_dirty_pass[ITERATIONS];
@@ -107,6 +106,9 @@ static void run_test(enum vm_guest_mode mode, void *unused)
         guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages);
         host_num_pages = vm_num_host_pages(mode, guest_num_pages);
         pages_per_slot = host_num_pages / SLOTS;
+       TEST_ASSERT_EQ(host_num_pages, pages_per_slot * SLOTS);
+       TEST_ASSERT(!(host_num_pages % 512),
+                   "Number of pages, '%lu' not a multiple of 2MiB", host_num_pages);
  
         bitmaps = memstress_alloc_bitmaps(SLOTS, pages_per_slot);
  
@@ -165,10 +167,8 @@ static void run_test(enum vm_guest_mode mode, void *unused)
         memstress_free_bitmaps(bitmaps, SLOTS);
         memstress_destroy_vm(vm);
  
-       /* Make assertions about the page counts. */
-       total_4k_pages = stats_populated.pages_4k;
-       total_4k_pages += stats_populated.pages_2m * 512;
-       total_4k_pages += stats_populated.pages_1g * 512 * 512;
+       TEST_ASSERT_EQ((stats_populated.pages_2m * 512 +
+                       stats_populated.pages_1g * 512 * 512), host_num_pages);
  
         /*
          * Check that all huge pages were split. Since large pages can only
@@ -180,19 +180,22 @@ static void run_test(enum vm_guest_mode mode, void *unused)
          */
         if (dirty_log_manual_caps) {
                 TEST_ASSERT_EQ(stats_clear_pass[0].hugepages, 0);
-               TEST_ASSERT_EQ(stats_clear_pass[0].pages_4k, total_4k_pages);
+               TEST_ASSERT(stats_clear_pass[0].pages_4k >= host_num_pages,
+                           "Expected at least '%lu' 4KiB pages, found only '%lu'",
+                           host_num_pages, stats_clear_pass[0].pages_4k);
                 TEST_ASSERT_EQ(stats_dirty_logging_enabled.hugepages, stats_populated.hugepages);
         } else {
                 TEST_ASSERT_EQ(stats_dirty_logging_enabled.hugepages, 0);
-               TEST_ASSERT_EQ(stats_dirty_logging_enabled.pages_4k, total_4k_pages);
+               TEST_ASSERT(stats_dirty_logging_enabled.pages_4k >= host_num_pages,
+                           "Expected at least '%lu' 4KiB pages, found only '%lu'",
+                           host_num_pages, stats_dirty_logging_enabled.pages_4k);
         }
  
         /*
          * Once dirty logging is disabled and the vCPUs have touched all their
-        * memory again, the page counts should be the same as they were
+        * memory again, the hugepage counts should be the same as they were
          * right after initial population of memory.
          */
-       TEST_ASSERT_EQ(stats_populated.pages_4k, stats_repopulated.pages_4k);
         TEST_ASSERT_EQ(stats_populated.pages_2m, stats_repopulated.pages_2m);
         TEST_ASSERT_EQ(stats_populated.pages_1g, stats_repopulated.pages_1g);
  }
diff --git a/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c b/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c

index 0f728f05ea82f7dc8051ed812cfde79639ae2385..f3c2239228b10e3ba7cbfe9f8406cfbbe5f58825 100644 (file)
--- a/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c
+++ b/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c
@@ -9,6 +9,7 @@
  #include <linux/stringify.h>
  #include <stdint.h>
  
+#include "kvm_test_harness.h"
  #include "apic.h"
  #include "test_util.h"
  #include "kvm_util.h"
@@ -83,6 +84,8 @@ static void guest_main(void)
         GUEST_DONE();
  }
  
+KVM_ONE_VCPU_TEST_SUITE(fix_hypercall);
+
  static void enter_guest(struct kvm_vcpu *vcpu)
  {
         struct kvm_run *run = vcpu->run;
@@ -103,14 +106,11 @@ static void enter_guest(struct kvm_vcpu *vcpu)
         }
  }
  
-static void test_fix_hypercall(bool disable_quirk)
+static void test_fix_hypercall(struct kvm_vcpu *vcpu, bool disable_quirk)
  {
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-
-       vm = vm_create_with_one_vcpu(&vcpu, guest_main);
+       struct kvm_vm *vm = vcpu->vm;
  
-       vm_init_descriptor_tables(vcpu->vm);
+       vm_init_descriptor_tables(vm);
         vcpu_init_descriptor_tables(vcpu);
         vm_install_exception_handler(vcpu->vm, UD_VECTOR, guest_ud_handler);
  
@@ -126,10 +126,19 @@ static void test_fix_hypercall(bool disable_quirk)
         enter_guest(vcpu);
  }
  
-int main(void)
+KVM_ONE_VCPU_TEST(fix_hypercall, enable_quirk, guest_main)
+{
+       test_fix_hypercall(vcpu, false);
+}
+
+KVM_ONE_VCPU_TEST(fix_hypercall, disable_quirk, guest_main)
+{
+       test_fix_hypercall(vcpu, true);
+}
+
+int main(int argc, char *argv[])
  {
         TEST_REQUIRE(kvm_check_cap(KVM_CAP_DISABLE_QUIRKS2) & KVM_X86_QUIRK_FIX_HYPERCALL_INSN);
  
-       test_fix_hypercall(false);
-       test_fix_hypercall(true);
+       return test_harness_run(argc, argv);
  }
diff --git a/tools/testing/selftests/kvm/x86_64/flds_emulation.h b/tools/testing/selftests/kvm/x86_64/flds_emulation.h

index 0a1573d52882b7b127307a829b0d1dc4d1af6560..37b1a9f5286447a1bb4a8c8f7a69c807ac0d6ced 100644 (file)
--- a/tools/testing/selftests/kvm/x86_64/flds_emulation.h
+++ b/tools/testing/selftests/kvm/x86_64/flds_emulation.h
@@ -41,7 +41,7 @@ static inline void handle_flds_emulation_failure_exit(struct kvm_vcpu *vcpu)
  
         insn_bytes = run->emulation_failure.insn_bytes;
         TEST_ASSERT(insn_bytes[0] == 0xd9 && insn_bytes[1] == 0,
-                   "Expected 'flds [eax]', opcode '0xd9 0x00', got opcode 0x%02x 0x%02x\n",
+                   "Expected 'flds [eax]', opcode '0xd9 0x00', got opcode 0x%02x 0x%02x",
                     insn_bytes[0], insn_bytes[1]);
  
         vcpu_regs_get(vcpu, &regs);
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c

index f5e1e98f04f9ef0a00f3d80ba8a0bd94f90feffd..e058bc676cd6930d54593093579fff15e1930d91 100644 (file)
--- a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c
@@ -212,6 +212,7 @@ int main(void)
         int stage;
  
         TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_TIME));
+       TEST_REQUIRE(sys_clocksource_is_based_on_tsc());
  
         vm = vm_create_with_one_vcpu(&vcpu, guest_main);
  
@@ -220,7 +221,7 @@ int main(void)
         tsc_page_gva = vm_vaddr_alloc_page(vm);
         memset(addr_gva2hva(vm, tsc_page_gva), 0x0, getpagesize());
         TEST_ASSERT((addr_gva2gpa(vm, tsc_page_gva) & (getpagesize() - 1)) == 0,
-               "TSC page has to be page aligned\n");
+               "TSC page has to be page aligned");
         vcpu_args_set(vcpu, 2, tsc_page_gva, addr_gva2gpa(vm, tsc_page_gva));
  
         host_check_tsc_msr_rdtsc(vcpu);
@@ -237,7 +238,7 @@ int main(void)
                         break;
                 case UCALL_DONE:
                         /* Keep in sync with guest_main() */
-                       TEST_ASSERT(stage == 11, "Testing ended prematurely, stage %d\n",
+                       TEST_ASSERT(stage == 11, "Testing ended prematurely, stage %d",
                                     stage);
                         goto out;
                 default:
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86_64/hyperv_features.c

index 4f4193fc74ffa29454c193ed741603c5fef1004b..b923a285e96f9492108ac17c21a070a4dd4ffa61 100644 (file)
--- a/tools/testing/selftests/kvm/x86_64/hyperv_features.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_features.c
@@ -454,7 +454,7 @@ static void guest_test_msrs_access(void)
                 case 44:
                         /* MSR is not available when CPUID feature bit is unset */
                         if (!has_invtsc)
-                               continue;
+                               goto next_stage;
                         msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL;
                         msr->write = false;
                         msr->fault_expected = true;
@@ -462,7 +462,7 @@ static void guest_test_msrs_access(void)
                 case 45:
                         /* MSR is vailable when CPUID feature bit is set */
                         if (!has_invtsc)
-                               continue;
+                               goto next_stage;
                         vcpu_set_cpuid_feature(vcpu, HV_ACCESS_TSC_INVARIANT);
                         msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL;
                         msr->write = false;
@@ -471,7 +471,7 @@ static void guest_test_msrs_access(void)
                 case 46:
                         /* Writing bits other than 0 is forbidden */
                         if (!has_invtsc)
-                               continue;
+                               goto next_stage;
                         msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL;
                         msr->write = true;
                         msr->write_val = 0xdeadbeef;
@@ -480,7 +480,7 @@ static void guest_test_msrs_access(void)
                 case 47:
                         /* Setting bit 0 enables the feature */
                         if (!has_invtsc)
-                               continue;
+                               goto next_stage;
                         msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL;
                         msr->write = true;
                         msr->write_val = 1;
@@ -513,6 +513,7 @@ static void guest_test_msrs_access(void)
                         return;
                 }
  
+next_stage:
                 stage++;
                 kvm_vm_free(vm);
         }
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c b/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c

index 65e5f4c05068a8fff78caf386f76ef107ac407f2..f1617762c22fecaff954824c3ca09cebec1eb78d 100644 (file)
--- a/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c
@@ -289,7 +289,7 @@ int main(int argc, char *argv[])
                 switch (get_ucall(vcpu[0], &uc)) {
                 case UCALL_SYNC:
                         TEST_ASSERT(uc.args[1] == stage,
-                                   "Unexpected stage: %ld (%d expected)\n",
+                                   "Unexpected stage: %ld (%d expected)",
                                     uc.args[1], stage);
                         break;
                 case UCALL_DONE:
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c b/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c

index c4443f71f8dd01f6aafde337c618d414c61c1ce3..05b56095cf76f6b8857f7f83bccdf68b3787d759 100644 (file)
--- a/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c
@@ -658,7 +658,7 @@ int main(int argc, char *argv[])
                 switch (get_ucall(vcpu[0], &uc)) {
                 case UCALL_SYNC:
                         TEST_ASSERT(uc.args[1] == stage,
-                                   "Unexpected stage: %ld (%d expected)\n",
+                                   "Unexpected stage: %ld (%d expected)",
                                     uc.args[1], stage);
                         break;
                 case UCALL_ABORT:
diff --git a/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c b/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c

index 1778704360a6634ee7df3dfacd8f63fd3d236cab..5bc12222d87af696dc2f35e8874c4eb3f58a4f7e 100644 (file)
--- a/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c
+++ b/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c
@@ -92,7 +92,7 @@ static void setup_clock(struct kvm_vm *vm, struct test_case *test_case)
                                 break;
                 } while (errno == EINTR);
  
-               TEST_ASSERT(!r, "clock_gettime() failed: %d\n", r);
+               TEST_ASSERT(!r, "clock_gettime() failed: %d", r);
  
                 data.realtime = ts.tv_sec * NSEC_PER_SEC;
                 data.realtime += ts.tv_nsec;
@@ -127,47 +127,11 @@ static void enter_guest(struct kvm_vcpu *vcpu)
                         handle_abort(&uc);
                         return;
                 default:
-                       TEST_ASSERT(0, "unhandled ucall: %ld\n", uc.cmd);
+                       TEST_ASSERT(0, "unhandled ucall: %ld", uc.cmd);
                 }
         }
  }
  
-#define CLOCKSOURCE_PATH "/sys/devices/system/clocksource/clocksource0/current_clocksource"
-
-static void check_clocksource(void)
-{
-       char *clk_name;
-       struct stat st;
-       FILE *fp;
-
-       fp = fopen(CLOCKSOURCE_PATH, "r");
-       if (!fp) {
-               pr_info("failed to open clocksource file: %d; assuming TSC.\n",
-                       errno);
-               return;
-       }
-
-       if (fstat(fileno(fp), &st)) {
-               pr_info("failed to stat clocksource file: %d; assuming TSC.\n",
-                       errno);
-               goto out;
-       }
-
-       clk_name = malloc(st.st_size);
-       TEST_ASSERT(clk_name, "failed to allocate buffer to read file\n");
-
-       if (!fgets(clk_name, st.st_size, fp)) {
-               pr_info("failed to read clocksource file: %d; assuming TSC.\n",
-                       ferror(fp));
-               goto out;
-       }
-
-       TEST_ASSERT(!strncmp(clk_name, "tsc\n", st.st_size),
-                   "clocksource not supported: %s", clk_name);
-out:
-       fclose(fp);
-}
-
  int main(void)
  {
         struct kvm_vcpu *vcpu;
@@ -179,7 +143,7 @@ int main(void)
         flags = kvm_check_cap(KVM_CAP_ADJUST_CLOCK);
         TEST_REQUIRE(flags & KVM_CLOCK_REALTIME);
  
-       check_clocksource();
+       TEST_REQUIRE(sys_clocksource_is_based_on_tsc());
  
         vm = vm_create_with_one_vcpu(&vcpu, guest_main);
  
diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c

index 83e25bccc139decff79249e99a336ef2cb8cc820..17bbb96fc4dfcbc25e2ed62e5010d2d71a9d1746 100644 (file)
--- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c
+++ b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c
@@ -257,9 +257,9 @@ int main(int argc, char **argv)
         TEST_REQUIRE(kvm_has_cap(KVM_CAP_VM_DISABLE_NX_HUGE_PAGES));
  
         __TEST_REQUIRE(token == MAGIC_TOKEN,
-                      "This test must be run with the magic token %d.\n"
-                      "This is done by nx_huge_pages_test.sh, which\n"
-                      "also handles environment setup for the test.", MAGIC_TOKEN);
+                      "This test must be run with the magic token via '-t %d'.\n"
+                      "Running via nx_huge_pages_test.sh, which also handles "
+                      "environment setup, is strongly recommended.", MAGIC_TOKEN);
  
         run_test(reclaim_period_ms, false, reboot_permissions);
         run_test(reclaim_period_ms, true, reboot_permissions);
diff --git a/tools/testing/selftests/kvm/x86_64/platform_info_test.c b/tools/testing/selftests/kvm/x86_64/platform_info_test.c

index c9a07963d68aaedcc6b45d4e00e932e5981645b0..87011965dc41664d46bb285fa5dd408876fe20c2 100644 (file)
--- a/tools/testing/selftests/kvm/x86_64/platform_info_test.c
+++ b/tools/testing/selftests/kvm/x86_64/platform_info_test.c
@@ -44,7 +44,7 @@ static void test_msr_platform_info_enabled(struct kvm_vcpu *vcpu)
  
         get_ucall(vcpu, &uc);
         TEST_ASSERT(uc.cmd == UCALL_SYNC,
-                       "Received ucall other than UCALL_SYNC: %lu\n", uc.cmd);
+                       "Received ucall other than UCALL_SYNC: %lu", uc.cmd);
         TEST_ASSERT((uc.args[1] & MSR_PLATFORM_INFO_MAX_TURBO_RATIO) ==
                 MSR_PLATFORM_INFO_MAX_TURBO_RATIO,
                 "Expected MSR_PLATFORM_INFO to have max turbo ratio mask: %i.",
diff --git a/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c b/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c

new file mode 100644 (file)

index 0000000..29609b5
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c
@@ -0,0 +1,620 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023, Tencent, Inc.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <x86intrin.h>
+
+#include "pmu.h"
+#include "processor.h"
+
+/* Number of LOOP instructions for the guest measurement payload. */
+#define NUM_BRANCHES           10
+/*
+ * Number of "extra" instructions that will be counted, i.e. the number of
+ * instructions that are needed to set up the loop and then disabled the
+ * counter.  1 CLFLUSH/CLFLUSHOPT/NOP, 1 MFENCE, 2 MOV, 2 XOR, 1 WRMSR.
+ */
+#define NUM_EXTRA_INSNS                7
+#define NUM_INSNS_RETIRED      (NUM_BRANCHES + NUM_EXTRA_INSNS)
+
+static uint8_t kvm_pmu_version;
+static bool kvm_has_perf_caps;
+static bool is_forced_emulation_enabled;
+
+static struct kvm_vm *pmu_vm_create_with_one_vcpu(struct kvm_vcpu **vcpu,
+                                                 void *guest_code,
+                                                 uint8_t pmu_version,
+                                                 uint64_t perf_capabilities)
+{
+       struct kvm_vm *vm;
+
+       vm = vm_create_with_one_vcpu(vcpu, guest_code);
+       vm_init_descriptor_tables(vm);
+       vcpu_init_descriptor_tables(*vcpu);
+
+       sync_global_to_guest(vm, kvm_pmu_version);
+       sync_global_to_guest(vm, is_forced_emulation_enabled);
+
+       /*
+        * Set PERF_CAPABILITIES before PMU version as KVM disallows enabling
+        * features via PERF_CAPABILITIES if the guest doesn't have a vPMU.
+        */
+       if (kvm_has_perf_caps)
+               vcpu_set_msr(*vcpu, MSR_IA32_PERF_CAPABILITIES, perf_capabilities);
+
+       vcpu_set_cpuid_property(*vcpu, X86_PROPERTY_PMU_VERSION, pmu_version);
+       return vm;
+}
+
+static void run_vcpu(struct kvm_vcpu *vcpu)
+{
+       struct ucall uc;
+
+       do {
+               vcpu_run(vcpu);
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_SYNC:
+                       break;
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+                       break;
+               case UCALL_PRINTF:
+                       pr_info("%s", uc.buffer);
+                       break;
+               case UCALL_DONE:
+                       break;
+               default:
+                       TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+               }
+       } while (uc.cmd != UCALL_DONE);
+}
+
+static uint8_t guest_get_pmu_version(void)
+{
+       /*
+        * Return the effective PMU version, i.e. the minimum between what KVM
+        * supports and what is enumerated to the guest.  The host deliberately
+        * advertises a PMU version to the guest beyond what is actually
+        * supported by KVM to verify KVM doesn't freak out and do something
+        * bizarre with an architecturally valid, but unsupported, version.
+        */
+       return min_t(uint8_t, kvm_pmu_version, this_cpu_property(X86_PROPERTY_PMU_VERSION));
+}
+
+/*
+ * If an architectural event is supported and guaranteed to generate at least
+ * one "hit, assert that its count is non-zero.  If an event isn't supported or
+ * the test can't guarantee the associated action will occur, then all bets are
+ * off regarding the count, i.e. no checks can be done.
+ *
+ * Sanity check that in all cases, the event doesn't count when it's disabled,
+ * and that KVM correctly emulates the write of an arbitrary value.
+ */
+static void guest_assert_event_count(uint8_t idx,
+                                    struct kvm_x86_pmu_feature event,
+                                    uint32_t pmc, uint32_t pmc_msr)
+{
+       uint64_t count;
+
+       count = _rdpmc(pmc);
+       if (!this_pmu_has(event))
+               goto sanity_checks;
+
+       switch (idx) {
+       case INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX:
+               GUEST_ASSERT_EQ(count, NUM_INSNS_RETIRED);
+               break;
+       case INTEL_ARCH_BRANCHES_RETIRED_INDEX:
+               GUEST_ASSERT_EQ(count, NUM_BRANCHES);
+               break;
+       case INTEL_ARCH_LLC_REFERENCES_INDEX:
+       case INTEL_ARCH_LLC_MISSES_INDEX:
+               if (!this_cpu_has(X86_FEATURE_CLFLUSHOPT) &&
+                   !this_cpu_has(X86_FEATURE_CLFLUSH))
+                       break;
+               fallthrough;
+       case INTEL_ARCH_CPU_CYCLES_INDEX:
+       case INTEL_ARCH_REFERENCE_CYCLES_INDEX:
+               GUEST_ASSERT_NE(count, 0);
+               break;
+       case INTEL_ARCH_TOPDOWN_SLOTS_INDEX:
+               GUEST_ASSERT(count >= NUM_INSNS_RETIRED);
+               break;
+       default:
+               break;
+       }
+
+sanity_checks:
+       __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
+       GUEST_ASSERT_EQ(_rdpmc(pmc), count);
+
+       wrmsr(pmc_msr, 0xdead);
+       GUEST_ASSERT_EQ(_rdpmc(pmc), 0xdead);
+}
+
+/*
+ * Enable and disable the PMC in a monolithic asm blob to ensure that the
+ * compiler can't insert _any_ code into the measured sequence.  Note, ECX
+ * doesn't need to be clobbered as the input value, @pmc_msr, is restored
+ * before the end of the sequence.
+ *
+ * If CLFUSH{,OPT} is supported, flush the cacheline containing (at least) the
+ * start of the loop to force LLC references and misses, i.e. to allow testing
+ * that those events actually count.
+ *
+ * If forced emulation is enabled (and specified), force emulation on a subset
+ * of the measured code to verify that KVM correctly emulates instructions and
+ * branches retired events in conjunction with hardware also counting said
+ * events.
+ */
+#define GUEST_MEASURE_EVENT(_msr, _value, clflush, FEP)                                \
+do {                                                                           \
+       __asm__ __volatile__("wrmsr\n\t"                                        \
+                            clflush "\n\t"                                     \
+                            "mfence\n\t"                                       \
+                            "1: mov $" __stringify(NUM_BRANCHES) ", %%ecx\n\t" \
+                            FEP "loop .\n\t"                                   \
+                            FEP "mov %%edi, %%ecx\n\t"                         \
+                            FEP "xor %%eax, %%eax\n\t"                         \
+                            FEP "xor %%edx, %%edx\n\t"                         \
+                            "wrmsr\n\t"                                        \
+                            :: "a"((uint32_t)_value), "d"(_value >> 32),       \
+                               "c"(_msr), "D"(_msr)                            \
+       );                                                                      \
+} while (0)
+
+#define GUEST_TEST_EVENT(_idx, _event, _pmc, _pmc_msr, _ctrl_msr, _value, FEP) \
+do {                                                                           \
+       wrmsr(pmc_msr, 0);                                                      \
+                                                                               \
+       if (this_cpu_has(X86_FEATURE_CLFLUSHOPT))                               \
+               GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflushopt 1f", FEP);   \
+       else if (this_cpu_has(X86_FEATURE_CLFLUSH))                             \
+               GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflush 1f", FEP);      \
+       else                                                                    \
+               GUEST_MEASURE_EVENT(_ctrl_msr, _value, "nop", FEP);             \
+                                                                               \
+       guest_assert_event_count(_idx, _event, _pmc, _pmc_msr);                 \
+} while (0)
+
+static void __guest_test_arch_event(uint8_t idx, struct kvm_x86_pmu_feature event,
+                                   uint32_t pmc, uint32_t pmc_msr,
+                                   uint32_t ctrl_msr, uint64_t ctrl_msr_value)
+{
+       GUEST_TEST_EVENT(idx, event, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, "");
+
+       if (is_forced_emulation_enabled)
+               GUEST_TEST_EVENT(idx, event, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, KVM_FEP);
+}
+
+#define X86_PMU_FEATURE_NULL                                           \
+({                                                                     \
+       struct kvm_x86_pmu_feature feature = {};                        \
+                                                                       \
+       feature;                                                        \
+})
+
+static bool pmu_is_null_feature(struct kvm_x86_pmu_feature event)
+{
+       return !(*(u64 *)&event);
+}
+
+static void guest_test_arch_event(uint8_t idx)
+{
+       const struct {
+               struct kvm_x86_pmu_feature gp_event;
+               struct kvm_x86_pmu_feature fixed_event;
+       } intel_event_to_feature[] = {
+               [INTEL_ARCH_CPU_CYCLES_INDEX]            = { X86_PMU_FEATURE_CPU_CYCLES, X86_PMU_FEATURE_CPU_CYCLES_FIXED },
+               [INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX]  = { X86_PMU_FEATURE_INSNS_RETIRED, X86_PMU_FEATURE_INSNS_RETIRED_FIXED },
+               /*
+                * Note, the fixed counter for reference cycles is NOT the same
+                * as the general purpose architectural event.  The fixed counter
+                * explicitly counts at the same frequency as the TSC, whereas
+                * the GP event counts at a fixed, but uarch specific, frequency.
+                * Bundle them here for simplicity.
+                */
+               [INTEL_ARCH_REFERENCE_CYCLES_INDEX]      = { X86_PMU_FEATURE_REFERENCE_CYCLES, X86_PMU_FEATURE_REFERENCE_TSC_CYCLES_FIXED },
+               [INTEL_ARCH_LLC_REFERENCES_INDEX]        = { X86_PMU_FEATURE_LLC_REFERENCES, X86_PMU_FEATURE_NULL },
+               [INTEL_ARCH_LLC_MISSES_INDEX]            = { X86_PMU_FEATURE_LLC_MISSES, X86_PMU_FEATURE_NULL },
+               [INTEL_ARCH_BRANCHES_RETIRED_INDEX]      = { X86_PMU_FEATURE_BRANCH_INSNS_RETIRED, X86_PMU_FEATURE_NULL },
+               [INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX] = { X86_PMU_FEATURE_BRANCHES_MISPREDICTED, X86_PMU_FEATURE_NULL },
+               [INTEL_ARCH_TOPDOWN_SLOTS_INDEX]         = { X86_PMU_FEATURE_TOPDOWN_SLOTS, X86_PMU_FEATURE_TOPDOWN_SLOTS_FIXED },
+       };
+
+       uint32_t nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
+       uint32_t pmu_version = guest_get_pmu_version();
+       /* PERF_GLOBAL_CTRL exists only for Architectural PMU Version 2+. */
+       bool guest_has_perf_global_ctrl = pmu_version >= 2;
+       struct kvm_x86_pmu_feature gp_event, fixed_event;
+       uint32_t base_pmc_msr;
+       unsigned int i;
+
+       /* The host side shouldn't invoke this without a guest PMU. */
+       GUEST_ASSERT(pmu_version);
+
+       if (this_cpu_has(X86_FEATURE_PDCM) &&
+           rdmsr(MSR_IA32_PERF_CAPABILITIES) & PMU_CAP_FW_WRITES)
+               base_pmc_msr = MSR_IA32_PMC0;
+       else
+               base_pmc_msr = MSR_IA32_PERFCTR0;
+
+       gp_event = intel_event_to_feature[idx].gp_event;
+       GUEST_ASSERT_EQ(idx, gp_event.f.bit);
+
+       GUEST_ASSERT(nr_gp_counters);
+
+       for (i = 0; i < nr_gp_counters; i++) {
+               uint64_t eventsel = ARCH_PERFMON_EVENTSEL_OS |
+                                   ARCH_PERFMON_EVENTSEL_ENABLE |
+                                   intel_pmu_arch_events[idx];
+
+               wrmsr(MSR_P6_EVNTSEL0 + i, 0);
+               if (guest_has_perf_global_ctrl)
+                       wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, BIT_ULL(i));
+
+               __guest_test_arch_event(idx, gp_event, i, base_pmc_msr + i,
+                                       MSR_P6_EVNTSEL0 + i, eventsel);
+       }
+
+       if (!guest_has_perf_global_ctrl)
+               return;
+
+       fixed_event = intel_event_to_feature[idx].fixed_event;
+       if (pmu_is_null_feature(fixed_event) || !this_pmu_has(fixed_event))
+               return;
+
+       i = fixed_event.f.bit;
+
+       wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL));
+
+       __guest_test_arch_event(idx, fixed_event, i | INTEL_RDPMC_FIXED,
+                               MSR_CORE_PERF_FIXED_CTR0 + i,
+                               MSR_CORE_PERF_GLOBAL_CTRL,
+                               FIXED_PMC_GLOBAL_CTRL_ENABLE(i));
+}
+
+static void guest_test_arch_events(void)
+{
+       uint8_t i;
+
+       for (i = 0; i < NR_INTEL_ARCH_EVENTS; i++)
+               guest_test_arch_event(i);
+
+       GUEST_DONE();
+}
+
+static void test_arch_events(uint8_t pmu_version, uint64_t perf_capabilities,
+                            uint8_t length, uint8_t unavailable_mask)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+
+       /* Testing arch events requires a vPMU (there are no negative tests). */
+       if (!pmu_version)
+               return;
+
+       vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_arch_events,
+                                        pmu_version, perf_capabilities);
+
+       vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH,
+                               length);
+       vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_EVENTS_MASK,
+                               unavailable_mask);
+
+       run_vcpu(vcpu);
+
+       kvm_vm_free(vm);
+}
+
+/*
+ * Limit testing to MSRs that are actually defined by Intel (in the SDM).  MSRs
+ * that aren't defined counter MSRs *probably* don't exist, but there's no
+ * guarantee that currently undefined MSR indices won't be used for something
+ * other than PMCs in the future.
+ */
+#define MAX_NR_GP_COUNTERS     8
+#define MAX_NR_FIXED_COUNTERS  3
+
+#define GUEST_ASSERT_PMC_MSR_ACCESS(insn, msr, expect_gp, vector)              \
+__GUEST_ASSERT(expect_gp ? vector == GP_VECTOR : !vector,                      \
+              "Expected %s on " #insn "(0x%x), got vector %u",                 \
+              expect_gp ? "#GP" : "no fault", msr, vector)                     \
+
+#define GUEST_ASSERT_PMC_VALUE(insn, msr, val, expected)                       \
+       __GUEST_ASSERT(val == expected_val,                                     \
+                      "Expected " #insn "(0x%x) to yield 0x%lx, got 0x%lx",    \
+                      msr, expected_val, val);
+
+static void guest_test_rdpmc(uint32_t rdpmc_idx, bool expect_success,
+                            uint64_t expected_val)
+{
+       uint8_t vector;
+       uint64_t val;
+
+       vector = rdpmc_safe(rdpmc_idx, &val);
+       GUEST_ASSERT_PMC_MSR_ACCESS(RDPMC, rdpmc_idx, !expect_success, vector);
+       if (expect_success)
+               GUEST_ASSERT_PMC_VALUE(RDPMC, rdpmc_idx, val, expected_val);
+
+       if (!is_forced_emulation_enabled)
+               return;
+
+       vector = rdpmc_safe_fep(rdpmc_idx, &val);
+       GUEST_ASSERT_PMC_MSR_ACCESS(RDPMC, rdpmc_idx, !expect_success, vector);
+       if (expect_success)
+               GUEST_ASSERT_PMC_VALUE(RDPMC, rdpmc_idx, val, expected_val);
+}
+
+static void guest_rd_wr_counters(uint32_t base_msr, uint8_t nr_possible_counters,
+                                uint8_t nr_counters, uint32_t or_mask)
+{
+       const bool pmu_has_fast_mode = !guest_get_pmu_version();
+       uint8_t i;
+
+       for (i = 0; i < nr_possible_counters; i++) {
+               /*
+                * TODO: Test a value that validates full-width writes and the
+                * width of the counters.
+                */
+               const uint64_t test_val = 0xffff;
+               const uint32_t msr = base_msr + i;
+
+               /*
+                * Fixed counters are supported if the counter is less than the
+                * number of enumerated contiguous counters *or* the counter is
+                * explicitly enumerated in the supported counters mask.
+                */
+               const bool expect_success = i < nr_counters || (or_mask & BIT(i));
+
+               /*
+                * KVM drops writes to MSR_P6_PERFCTR[0|1] if the counters are
+                * unsupported, i.e. doesn't #GP and reads back '0'.
+                */
+               const uint64_t expected_val = expect_success ? test_val : 0;
+               const bool expect_gp = !expect_success && msr != MSR_P6_PERFCTR0 &&
+                                      msr != MSR_P6_PERFCTR1;
+               uint32_t rdpmc_idx;
+               uint8_t vector;
+               uint64_t val;
+
+               vector = wrmsr_safe(msr, test_val);
+               GUEST_ASSERT_PMC_MSR_ACCESS(WRMSR, msr, expect_gp, vector);
+
+               vector = rdmsr_safe(msr, &val);
+               GUEST_ASSERT_PMC_MSR_ACCESS(RDMSR, msr, expect_gp, vector);
+
+               /* On #GP, the result of RDMSR is undefined. */
+               if (!expect_gp)
+                       GUEST_ASSERT_PMC_VALUE(RDMSR, msr, val, expected_val);
+
+               /*
+                * Redo the read tests with RDPMC, which has different indexing
+                * semantics and additional capabilities.
+                */
+               rdpmc_idx = i;
+               if (base_msr == MSR_CORE_PERF_FIXED_CTR0)
+                       rdpmc_idx |= INTEL_RDPMC_FIXED;
+
+               guest_test_rdpmc(rdpmc_idx, expect_success, expected_val);
+
+               /*
+                * KVM doesn't support non-architectural PMUs, i.e. it should
+                * impossible to have fast mode RDPMC.  Verify that attempting
+                * to use fast RDPMC always #GPs.
+                */
+               GUEST_ASSERT(!expect_success || !pmu_has_fast_mode);
+               rdpmc_idx |= INTEL_RDPMC_FAST;
+               guest_test_rdpmc(rdpmc_idx, false, -1ull);
+
+               vector = wrmsr_safe(msr, 0);
+               GUEST_ASSERT_PMC_MSR_ACCESS(WRMSR, msr, expect_gp, vector);
+       }
+}
+
+static void guest_test_gp_counters(void)
+{
+       uint8_t nr_gp_counters = 0;
+       uint32_t base_msr;
+
+       if (guest_get_pmu_version())
+               nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
+
+       if (this_cpu_has(X86_FEATURE_PDCM) &&
+           rdmsr(MSR_IA32_PERF_CAPABILITIES) & PMU_CAP_FW_WRITES)
+               base_msr = MSR_IA32_PMC0;
+       else
+               base_msr = MSR_IA32_PERFCTR0;
+
+       guest_rd_wr_counters(base_msr, MAX_NR_GP_COUNTERS, nr_gp_counters, 0);
+       GUEST_DONE();
+}
+
+static void test_gp_counters(uint8_t pmu_version, uint64_t perf_capabilities,
+                            uint8_t nr_gp_counters)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+
+       vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_gp_counters,
+                                        pmu_version, perf_capabilities);
+
+       vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_NR_GP_COUNTERS,
+                               nr_gp_counters);
+
+       run_vcpu(vcpu);
+
+       kvm_vm_free(vm);
+}
+
+static void guest_test_fixed_counters(void)
+{
+       uint64_t supported_bitmask = 0;
+       uint8_t nr_fixed_counters = 0;
+       uint8_t i;
+
+       /* Fixed counters require Architectural vPMU Version 2+. */
+       if (guest_get_pmu_version() >= 2)
+               nr_fixed_counters = this_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
+
+       /*
+        * The supported bitmask for fixed counters was introduced in PMU
+        * version 5.
+        */
+       if (guest_get_pmu_version() >= 5)
+               supported_bitmask = this_cpu_property(X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK);
+
+       guest_rd_wr_counters(MSR_CORE_PERF_FIXED_CTR0, MAX_NR_FIXED_COUNTERS,
+                            nr_fixed_counters, supported_bitmask);
+
+       for (i = 0; i < MAX_NR_FIXED_COUNTERS; i++) {
+               uint8_t vector;
+               uint64_t val;
+
+               if (i >= nr_fixed_counters && !(supported_bitmask & BIT_ULL(i))) {
+                       vector = wrmsr_safe(MSR_CORE_PERF_FIXED_CTR_CTRL,
+                                           FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL));
+                       __GUEST_ASSERT(vector == GP_VECTOR,
+                                      "Expected #GP for counter %u in FIXED_CTR_CTRL", i);
+
+                       vector = wrmsr_safe(MSR_CORE_PERF_GLOBAL_CTRL,
+                                           FIXED_PMC_GLOBAL_CTRL_ENABLE(i));
+                       __GUEST_ASSERT(vector == GP_VECTOR,
+                                      "Expected #GP for counter %u in PERF_GLOBAL_CTRL", i);
+                       continue;
+               }
+
+               wrmsr(MSR_CORE_PERF_FIXED_CTR0 + i, 0);
+               wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL));
+               wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, FIXED_PMC_GLOBAL_CTRL_ENABLE(i));
+               __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
+               wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+               val = rdmsr(MSR_CORE_PERF_FIXED_CTR0 + i);
+
+               GUEST_ASSERT_NE(val, 0);
+       }
+       GUEST_DONE();
+}
+
+static void test_fixed_counters(uint8_t pmu_version, uint64_t perf_capabilities,
+                               uint8_t nr_fixed_counters,
+                               uint32_t supported_bitmask)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+
+       vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_fixed_counters,
+                                        pmu_version, perf_capabilities);
+
+       vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK,
+                               supported_bitmask);
+       vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_NR_FIXED_COUNTERS,
+                               nr_fixed_counters);
+
+       run_vcpu(vcpu);
+
+       kvm_vm_free(vm);
+}
+
+static void test_intel_counters(void)
+{
+       uint8_t nr_arch_events = kvm_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH);
+       uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
+       uint8_t nr_gp_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
+       uint8_t pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION);
+       unsigned int i;
+       uint8_t v, j;
+       uint32_t k;
+
+       const uint64_t perf_caps[] = {
+               0,
+               PMU_CAP_FW_WRITES,
+       };
+
+       /*
+        * Test up to PMU v5, which is the current maximum version defined by
+        * Intel, i.e. is the last version that is guaranteed to be backwards
+        * compatible with KVM's existing behavior.
+        */
+       uint8_t max_pmu_version = max_t(typeof(pmu_version), pmu_version, 5);
+
+       /*
+        * Detect the existence of events that aren't supported by selftests.
+        * This will (obviously) fail any time the kernel adds support for a
+        * new event, but it's worth paying that price to keep the test fresh.
+        */
+       TEST_ASSERT(nr_arch_events <= NR_INTEL_ARCH_EVENTS,
+                   "New architectural event(s) detected; please update this test (length = %u, mask = %x)",
+                   nr_arch_events, kvm_cpu_property(X86_PROPERTY_PMU_EVENTS_MASK));
+
+       /*
+        * Force iterating over known arch events regardless of whether or not
+        * KVM/hardware supports a given event.
+        */
+       nr_arch_events = max_t(typeof(nr_arch_events), nr_arch_events, NR_INTEL_ARCH_EVENTS);
+
+       for (v = 0; v <= max_pmu_version; v++) {
+               for (i = 0; i < ARRAY_SIZE(perf_caps); i++) {
+                       if (!kvm_has_perf_caps && perf_caps[i])
+                               continue;
+
+                       pr_info("Testing arch events, PMU version %u, perf_caps = %lx\n",
+                               v, perf_caps[i]);
+                       /*
+                        * To keep the total runtime reasonable, test every
+                        * possible non-zero, non-reserved bitmap combination
+                        * only with the native PMU version and the full bit
+                        * vector length.
+                        */
+                       if (v == pmu_version) {
+                               for (k = 1; k < (BIT(nr_arch_events) - 1); k++)
+                                       test_arch_events(v, perf_caps[i], nr_arch_events, k);
+                       }
+                       /*
+                        * Test single bits for all PMU version and lengths up
+                        * the number of events +1 (to verify KVM doesn't do
+                        * weird things if the guest length is greater than the
+                        * host length).  Explicitly test a mask of '0' and all
+                        * ones i.e. all events being available and unavailable.
+                        */
+                       for (j = 0; j <= nr_arch_events + 1; j++) {
+                               test_arch_events(v, perf_caps[i], j, 0);
+                               test_arch_events(v, perf_caps[i], j, 0xff);
+
+                               for (k = 0; k < nr_arch_events; k++)
+                                       test_arch_events(v, perf_caps[i], j, BIT(k));
+                       }
+
+                       pr_info("Testing GP counters, PMU version %u, perf_caps = %lx\n",
+                               v, perf_caps[i]);
+                       for (j = 0; j <= nr_gp_counters; j++)
+                               test_gp_counters(v, perf_caps[i], j);
+
+                       pr_info("Testing fixed counters, PMU version %u, perf_caps = %lx\n",
+                               v, perf_caps[i]);
+                       for (j = 0; j <= nr_fixed_counters; j++) {
+                               for (k = 0; k <= (BIT(nr_fixed_counters) - 1); k++)
+                                       test_fixed_counters(v, perf_caps[i], j, k);
+                       }
+               }
+       }
+}
+
+int main(int argc, char *argv[])
+{
+       TEST_REQUIRE(kvm_is_pmu_enabled());
+
+       TEST_REQUIRE(host_cpu_is_intel);
+       TEST_REQUIRE(kvm_cpu_has_p(X86_PROPERTY_PMU_VERSION));
+       TEST_REQUIRE(kvm_cpu_property(X86_PROPERTY_PMU_VERSION) > 0);
+
+       kvm_pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION);
+       kvm_has_perf_caps = kvm_cpu_has(X86_FEATURE_PDCM);
+       is_forced_emulation_enabled = kvm_is_forced_emulation_enabled();
+
+       test_intel_counters();
+
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c

index 283cc55597a4fe28c02197ea18c269b9294a16b2..3c85d1ae989366fd7983c41cf7b02d0a0ba4f8f2 100644 (file)
--- a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
+++ b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
@@ -11,72 +11,18 @@
   */
  
  #define _GNU_SOURCE /* for program_invocation_short_name */
-#include "test_util.h"
+
  #include "kvm_util.h"
+#include "pmu.h"
  #include "processor.h"
-
-/*
- * In lieu of copying perf_event.h into tools...
- */
-#define ARCH_PERFMON_EVENTSEL_OS                       (1ULL << 17)
-#define ARCH_PERFMON_EVENTSEL_ENABLE                   (1ULL << 22)
-
-/* End of stuff taken from perf_event.h. */
-
-/* Oddly, this isn't in perf_event.h. */
-#define ARCH_PERFMON_BRANCHES_RETIRED          5
+#include "test_util.h"
  
  #define NUM_BRANCHES 42
-#define INTEL_PMC_IDX_FIXED            32
-
-/* Matches KVM_PMU_EVENT_FILTER_MAX_EVENTS in pmu.c */
-#define MAX_FILTER_EVENTS              300
  #define MAX_TEST_EVENTS                10
  
  #define PMU_EVENT_FILTER_INVALID_ACTION                (KVM_PMU_EVENT_DENY + 1)
  #define PMU_EVENT_FILTER_INVALID_FLAGS                 (KVM_PMU_EVENT_FLAGS_VALID_MASK << 1)
-#define PMU_EVENT_FILTER_INVALID_NEVENTS               (MAX_FILTER_EVENTS + 1)
-
-/*
- * This is how the event selector and unit mask are stored in an AMD
- * core performance event-select register. Intel's format is similar,
- * but the event selector is only 8 bits.
- */
-#define EVENT(select, umask) ((select & 0xf00UL) << 24 | (select & 0xff) | \
-                             (umask & 0xff) << 8)
-
-/*
- * "Branch instructions retired", from the Intel SDM, volume 3,
- * "Pre-defined Architectural Performance Events."
- */
-
-#define INTEL_BR_RETIRED EVENT(0xc4, 0)
-
-/*
- * "Retired branch instructions", from Processor Programming Reference
- * (PPR) for AMD Family 17h Model 01h, Revision B1 Processors,
- * Preliminary Processor Programming Reference (PPR) for AMD Family
- * 17h Model 31h, Revision B0 Processors, and Preliminary Processor
- * Programming Reference (PPR) for AMD Family 19h Model 01h, Revision
- * B1 Processors Volume 1 of 2.
- */
-
-#define AMD_ZEN_BR_RETIRED EVENT(0xc2, 0)
-
-
-/*
- * "Retired instructions", from Processor Programming Reference
- * (PPR) for AMD Family 17h Model 01h, Revision B1 Processors,
- * Preliminary Processor Programming Reference (PPR) for AMD Family
- * 17h Model 31h, Revision B0 Processors, and Preliminary Processor
- * Programming Reference (PPR) for AMD Family 19h Model 01h, Revision
- * B1 Processors Volume 1 of 2.
- *                      --- and ---
- * "Instructions retired", from the Intel SDM, volume 3,
- * "Pre-defined Architectural Performance Events."
- */
-
-#define INST_RETIRED EVENT(0xc0, 0)
+#define PMU_EVENT_FILTER_INVALID_NEVENTS               (KVM_PMU_EVENT_FILTER_MAX_EVENTS + 1)
  
  struct __kvm_pmu_event_filter {
         __u32 action;
@@ -84,26 +30,28 @@ struct __kvm_pmu_event_filter {
         __u32 fixed_counter_bitmap;
         __u32 flags;
         __u32 pad[4];
-       __u64 events[MAX_FILTER_EVENTS];
+       __u64 events[KVM_PMU_EVENT_FILTER_MAX_EVENTS];
  };
  
  /*
- * This event list comprises Intel's eight architectural events plus
- * AMD's "retired branch instructions" for Zen[123] (and possibly
- * other AMD CPUs).
+ * This event list comprises Intel's known architectural events, plus AMD's
+ * "retired branch instructions" for Zen1-Zen3 (and* possibly other AMD CPUs).
+ * Note, AMD and Intel use the same encoding for instructions retired.
   */
+kvm_static_assert(INTEL_ARCH_INSTRUCTIONS_RETIRED == AMD_ZEN_INSTRUCTIONS_RETIRED);
+
  static const struct __kvm_pmu_event_filter base_event_filter = {
         .nevents = ARRAY_SIZE(base_event_filter.events),
         .events = {
-               EVENT(0x3c, 0),
-               INST_RETIRED,
-               EVENT(0x3c, 1),
-               EVENT(0x2e, 0x4f),
-               EVENT(0x2e, 0x41),
-               EVENT(0xc4, 0),
-               EVENT(0xc5, 0),
-               EVENT(0xa4, 1),
-               AMD_ZEN_BR_RETIRED,
+               INTEL_ARCH_CPU_CYCLES,
+               INTEL_ARCH_INSTRUCTIONS_RETIRED,
+               INTEL_ARCH_REFERENCE_CYCLES,
+               INTEL_ARCH_LLC_REFERENCES,
+               INTEL_ARCH_LLC_MISSES,
+               INTEL_ARCH_BRANCHES_RETIRED,
+               INTEL_ARCH_BRANCHES_MISPREDICTED,
+               INTEL_ARCH_TOPDOWN_SLOTS,
+               AMD_ZEN_BRANCHES_RETIRED,
         },
  };
  
@@ -165,9 +113,9 @@ static void intel_guest_code(void)
         for (;;) {
                 wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
                 wrmsr(MSR_P6_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE |
-                     ARCH_PERFMON_EVENTSEL_OS | INTEL_BR_RETIRED);
+                     ARCH_PERFMON_EVENTSEL_OS | INTEL_ARCH_BRANCHES_RETIRED);
                 wrmsr(MSR_P6_EVNTSEL1, ARCH_PERFMON_EVENTSEL_ENABLE |
-                     ARCH_PERFMON_EVENTSEL_OS | INST_RETIRED);
+                     ARCH_PERFMON_EVENTSEL_OS | INTEL_ARCH_INSTRUCTIONS_RETIRED);
                 wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0x3);
  
                 run_and_measure_loop(MSR_IA32_PMC0);
@@ -189,9 +137,9 @@ static void amd_guest_code(void)
         for (;;) {
                 wrmsr(MSR_K7_EVNTSEL0, 0);
                 wrmsr(MSR_K7_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE |
-                     ARCH_PERFMON_EVENTSEL_OS | AMD_ZEN_BR_RETIRED);
+                     ARCH_PERFMON_EVENTSEL_OS | AMD_ZEN_BRANCHES_RETIRED);
                 wrmsr(MSR_K7_EVNTSEL1, ARCH_PERFMON_EVENTSEL_ENABLE |
-                     ARCH_PERFMON_EVENTSEL_OS | INST_RETIRED);
+                     ARCH_PERFMON_EVENTSEL_OS | AMD_ZEN_INSTRUCTIONS_RETIRED);
  
                 run_and_measure_loop(MSR_K7_PERFCTR0);
                 GUEST_SYNC(0);
@@ -312,7 +260,7 @@ static void test_amd_deny_list(struct kvm_vcpu *vcpu)
                 .action = KVM_PMU_EVENT_DENY,
                 .nevents = 1,
                 .events = {
-                       EVENT(0x1C2, 0),
+                       RAW_EVENT(0x1C2, 0),
                 },
         };
  
@@ -347,9 +295,9 @@ static void test_not_member_deny_list(struct kvm_vcpu *vcpu)
  
         f.action = KVM_PMU_EVENT_DENY;
  
-       remove_event(&f, INST_RETIRED);
-       remove_event(&f, INTEL_BR_RETIRED);
-       remove_event(&f, AMD_ZEN_BR_RETIRED);
+       remove_event(&f, INTEL_ARCH_INSTRUCTIONS_RETIRED);
+       remove_event(&f, INTEL_ARCH_BRANCHES_RETIRED);
+       remove_event(&f, AMD_ZEN_BRANCHES_RETIRED);
         test_with_filter(vcpu, &f);
  
         ASSERT_PMC_COUNTING_INSTRUCTIONS();
@@ -361,9 +309,9 @@ static void test_not_member_allow_list(struct kvm_vcpu *vcpu)
  
         f.action = KVM_PMU_EVENT_ALLOW;
  
-       remove_event(&f, INST_RETIRED);
-       remove_event(&f, INTEL_BR_RETIRED);
-       remove_event(&f, AMD_ZEN_BR_RETIRED);
+       remove_event(&f, INTEL_ARCH_INSTRUCTIONS_RETIRED);
+       remove_event(&f, INTEL_ARCH_BRANCHES_RETIRED);
+       remove_event(&f, AMD_ZEN_BRANCHES_RETIRED);
         test_with_filter(vcpu, &f);
  
         ASSERT_PMC_NOT_COUNTING_INSTRUCTIONS();
@@ -452,9 +400,9 @@ static bool use_amd_pmu(void)
   *  - Sapphire Rapids, Ice Lake, Cascade Lake, Skylake.
   */
  #define MEM_INST_RETIRED               0xD0
-#define MEM_INST_RETIRED_LOAD          EVENT(MEM_INST_RETIRED, 0x81)
-#define MEM_INST_RETIRED_STORE         EVENT(MEM_INST_RETIRED, 0x82)
-#define MEM_INST_RETIRED_LOAD_STORE    EVENT(MEM_INST_RETIRED, 0x83)
+#define MEM_INST_RETIRED_LOAD          RAW_EVENT(MEM_INST_RETIRED, 0x81)
+#define MEM_INST_RETIRED_STORE         RAW_EVENT(MEM_INST_RETIRED, 0x82)
+#define MEM_INST_RETIRED_LOAD_STORE    RAW_EVENT(MEM_INST_RETIRED, 0x83)
  
  static bool supports_event_mem_inst_retired(void)
  {
@@ -486,9 +434,9 @@ static bool supports_event_mem_inst_retired(void)
   * B1 Processors Volume 1 of 2.
   */
  #define LS_DISPATCH            0x29
-#define LS_DISPATCH_LOAD       EVENT(LS_DISPATCH, BIT(0))
-#define LS_DISPATCH_STORE      EVENT(LS_DISPATCH, BIT(1))
-#define LS_DISPATCH_LOAD_STORE EVENT(LS_DISPATCH, BIT(2))
+#define LS_DISPATCH_LOAD       RAW_EVENT(LS_DISPATCH, BIT(0))
+#define LS_DISPATCH_STORE      RAW_EVENT(LS_DISPATCH, BIT(1))
+#define LS_DISPATCH_LOAD_STORE RAW_EVENT(LS_DISPATCH, BIT(2))
  
  #define INCLUDE_MASKED_ENTRY(event_select, mask, match) \
         KVM_PMU_ENCODE_MASKED_ENTRY(event_select, mask, match, false)
@@ -729,14 +677,14 @@ static void add_dummy_events(uint64_t *events, int nevents)
  
  static void test_masked_events(struct kvm_vcpu *vcpu)
  {
-       int nevents = MAX_FILTER_EVENTS - MAX_TEST_EVENTS;
-       uint64_t events[MAX_FILTER_EVENTS];
+       int nevents = KVM_PMU_EVENT_FILTER_MAX_EVENTS - MAX_TEST_EVENTS;
+       uint64_t events[KVM_PMU_EVENT_FILTER_MAX_EVENTS];
  
         /* Run the test cases against a sparse PMU event filter. */
         run_masked_events_tests(vcpu, events, 0);
  
         /* Run the test cases against a dense PMU event filter. */
-       add_dummy_events(events, MAX_FILTER_EVENTS);
+       add_dummy_events(events, KVM_PMU_EVENT_FILTER_MAX_EVENTS);
         run_masked_events_tests(vcpu, events, nevents);
  }
  
@@ -809,20 +757,19 @@ static void test_filter_ioctl(struct kvm_vcpu *vcpu)
         TEST_ASSERT(!r, "Masking non-existent fixed counters should be allowed");
  }
  
-static void intel_run_fixed_counter_guest_code(uint8_t fixed_ctr_idx)
+static void intel_run_fixed_counter_guest_code(uint8_t idx)
  {
         for (;;) {
                 wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
-               wrmsr(MSR_CORE_PERF_FIXED_CTR0 + fixed_ctr_idx, 0);
+               wrmsr(MSR_CORE_PERF_FIXED_CTR0 + idx, 0);
  
                 /* Only OS_EN bit is enabled for fixed counter[idx]. */
-               wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, BIT_ULL(4 * fixed_ctr_idx));
-               wrmsr(MSR_CORE_PERF_GLOBAL_CTRL,
-                     BIT_ULL(INTEL_PMC_IDX_FIXED + fixed_ctr_idx));
+               wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(idx, FIXED_PMC_KERNEL));
+               wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, FIXED_PMC_GLOBAL_CTRL_ENABLE(idx));
                 __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
                 wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
  
-               GUEST_SYNC(rdmsr(MSR_CORE_PERF_FIXED_CTR0 + fixed_ctr_idx));
+               GUEST_SYNC(rdmsr(MSR_CORE_PERF_FIXED_CTR0 + idx));
         }
  }
  
@@ -866,7 +813,7 @@ static void __test_fixed_counter_bitmap(struct kvm_vcpu *vcpu, uint8_t idx,
          * userspace doesn't set any pmu filter.
          */
         count = run_vcpu_to_sync(vcpu);
-       TEST_ASSERT(count, "Unexpected count value: %ld\n", count);
+       TEST_ASSERT(count, "Unexpected count value: %ld", count);
  
         for (i = 0; i < BIT(nr_fixed_counters); i++) {
                 bitmap = BIT(i);
@@ -920,7 +867,7 @@ int main(int argc, char *argv[])
         struct kvm_vcpu *vcpu, *vcpu2 = NULL;
         struct kvm_vm *vm;
  
-       TEST_REQUIRE(get_kvm_param_bool("enable_pmu"));
+       TEST_REQUIRE(kvm_is_pmu_enabled());
         TEST_REQUIRE(kvm_has_cap(KVM_CAP_PMU_EVENT_FILTER));
         TEST_REQUIRE(kvm_has_cap(KVM_CAP_PMU_EVENT_MASKED_EVENTS));
  
diff --git a/tools/testing/selftests/kvm/x86_64/private_mem_conversions_test.c b/tools/testing/selftests/kvm/x86_64/private_mem_conversions_test.c

index 65ad38b6be1f1ad45afa085f5dbfb6402d581bb4..e0f642d2a3c4b562d186d4551583b6f5699d661b 100644 (file)
--- a/tools/testing/selftests/kvm/x86_64/private_mem_conversions_test.c
+++ b/tools/testing/selftests/kvm/x86_64/private_mem_conversions_test.c
@@ -434,6 +434,8 @@ static void test_mem_conversions(enum vm_mem_backing_src_type src_type, uint32_t
  
         r = fallocate(memfd, FALLOC_FL_KEEP_SIZE, 0, memfd_size);
         TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r));
+
+       close(memfd);
  }
  
  static void usage(const char *cmd)
diff --git a/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c b/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c

index c7ef97561038e8bc297324387160990f170f53a9..0a6dfba3905b68c03cebaf1e821c7019c4a68cf1 100644 (file)
--- a/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c
+++ b/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c
@@ -10,11 +10,9 @@
  #include "test_util.h"
  #include "kvm_util.h"
  #include "processor.h"
-#include "svm_util.h"
+#include "sev.h"
  #include "kselftest.h"
  
-#define SEV_POLICY_ES 0b100
-
  #define NR_MIGRATE_TEST_VCPUS 4
  #define NR_MIGRATE_TEST_VMS 3
  #define NR_LOCK_TESTING_THREADS 3
@@ -22,46 +20,24 @@
  
  bool have_sev_es;
  
-static int __sev_ioctl(int vm_fd, int cmd_id, void *data, __u32 *fw_error)
-{
-       struct kvm_sev_cmd cmd = {
-               .id = cmd_id,
-               .data = (uint64_t)data,
-               .sev_fd = open_sev_dev_path_or_exit(),
-       };
-       int ret;
-
-       ret = ioctl(vm_fd, KVM_MEMORY_ENCRYPT_OP, &cmd);
-       *fw_error = cmd.error;
-       return ret;
-}
-
-static void sev_ioctl(int vm_fd, int cmd_id, void *data)
-{
-       int ret;
-       __u32 fw_error;
-
-       ret = __sev_ioctl(vm_fd, cmd_id, data, &fw_error);
-       TEST_ASSERT(ret == 0 && fw_error == SEV_RET_SUCCESS,
-                   "%d failed: return code: %d, errno: %d, fw error: %d",
-                   cmd_id, ret, errno, fw_error);
-}
-
  static struct kvm_vm *sev_vm_create(bool es)
  {
         struct kvm_vm *vm;
-       struct kvm_sev_launch_start start = { 0 };
         int i;
  
         vm = vm_create_barebones();
-       sev_ioctl(vm->fd, es ? KVM_SEV_ES_INIT : KVM_SEV_INIT, NULL);
+       if (!es)
+               sev_vm_init(vm);
+       else
+               sev_es_vm_init(vm);
+
         for (i = 0; i < NR_MIGRATE_TEST_VCPUS; ++i)
                 __vm_vcpu_add(vm, i);
+
+       sev_vm_launch(vm, es ? SEV_POLICY_ES : 0);
+
         if (es)
-               start.policy |= SEV_POLICY_ES;
-       sev_ioctl(vm->fd, KVM_SEV_LAUNCH_START, &start);
-       if (es)
-               sev_ioctl(vm->fd, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL);
+               vm_sev_ioctl(vm, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL);
         return vm;
  }
  
@@ -91,7 +67,7 @@ static void sev_migrate_from(struct kvm_vm *dst, struct kvm_vm *src)
         int ret;
  
         ret = __sev_migrate_from(dst, src);
-       TEST_ASSERT(!ret, "Migration failed, ret: %d, errno: %d\n", ret, errno);
+       TEST_ASSERT(!ret, "Migration failed, ret: %d, errno: %d", ret, errno);
  }
  
  static void test_sev_migrate_from(bool es)
@@ -113,7 +89,7 @@ static void test_sev_migrate_from(bool es)
         /* Migrate the guest back to the original VM. */
         ret = __sev_migrate_from(src_vm, dst_vms[NR_MIGRATE_TEST_VMS - 1]);
         TEST_ASSERT(ret == -1 && errno == EIO,
-                   "VM that was migrated from should be dead. ret %d, errno: %d\n", ret,
+                   "VM that was migrated from should be dead. ret %d, errno: %d", ret,
                     errno);
  
         kvm_vm_free(src_vm);
@@ -172,7 +148,7 @@ static void test_sev_migrate_parameters(void)
         vm_no_sev = aux_vm_create(true);
         ret = __sev_migrate_from(vm_no_vcpu, vm_no_sev);
         TEST_ASSERT(ret == -1 && errno == EINVAL,
-                   "Migrations require SEV enabled. ret %d, errno: %d\n", ret,
+                   "Migrations require SEV enabled. ret %d, errno: %d", ret,
                     errno);
  
         if (!have_sev_es)
@@ -181,31 +157,31 @@ static void test_sev_migrate_parameters(void)
         sev_vm = sev_vm_create(/* es= */ false);
         sev_es_vm = sev_vm_create(/* es= */ true);
         sev_es_vm_no_vmsa = vm_create_barebones();
-       sev_ioctl(sev_es_vm_no_vmsa->fd, KVM_SEV_ES_INIT, NULL);
+       sev_es_vm_init(sev_es_vm_no_vmsa);
         __vm_vcpu_add(sev_es_vm_no_vmsa, 1);
  
         ret = __sev_migrate_from(sev_vm, sev_es_vm);
         TEST_ASSERT(
                 ret == -1 && errno == EINVAL,
-               "Should not be able migrate to SEV enabled VM. ret: %d, errno: %d\n",
+               "Should not be able migrate to SEV enabled VM. ret: %d, errno: %d",
                 ret, errno);
  
         ret = __sev_migrate_from(sev_es_vm, sev_vm);
         TEST_ASSERT(
                 ret == -1 && errno == EINVAL,
-               "Should not be able migrate to SEV-ES enabled VM. ret: %d, errno: %d\n",
+               "Should not be able migrate to SEV-ES enabled VM. ret: %d, errno: %d",
                 ret, errno);
  
         ret = __sev_migrate_from(vm_no_vcpu, sev_es_vm);
         TEST_ASSERT(
                 ret == -1 && errno == EINVAL,
-               "SEV-ES migrations require same number of vCPUS. ret: %d, errno: %d\n",
+               "SEV-ES migrations require same number of vCPUS. ret: %d, errno: %d",
                 ret, errno);
  
         ret = __sev_migrate_from(vm_no_vcpu, sev_es_vm_no_vmsa);
         TEST_ASSERT(
                 ret == -1 && errno == EINVAL,
-               "SEV-ES migrations require UPDATE_VMSA. ret %d, errno: %d\n",
+               "SEV-ES migrations require UPDATE_VMSA. ret %d, errno: %d",
                 ret, errno);
  
         kvm_vm_free(sev_vm);
@@ -227,16 +203,16 @@ static void sev_mirror_create(struct kvm_vm *dst, struct kvm_vm *src)
         int ret;
  
         ret = __sev_mirror_create(dst, src);
-       TEST_ASSERT(!ret, "Copying context failed, ret: %d, errno: %d\n", ret, errno);
+       TEST_ASSERT(!ret, "Copying context failed, ret: %d, errno: %d", ret, errno);
  }
  
-static void verify_mirror_allowed_cmds(int vm_fd)
+static void verify_mirror_allowed_cmds(struct kvm_vm *vm)
  {
         struct kvm_sev_guest_status status;
+       int cmd_id;
  
-       for (int cmd_id = KVM_SEV_INIT; cmd_id < KVM_SEV_NR_MAX; ++cmd_id) {
+       for (cmd_id = KVM_SEV_INIT; cmd_id < KVM_SEV_NR_MAX; ++cmd_id) {
                 int ret;
-               __u32 fw_error;
  
                 /*
                  * These commands are allowed for mirror VMs, all others are
@@ -256,14 +232,14 @@ static void verify_mirror_allowed_cmds(int vm_fd)
                  * These commands should be disallowed before the data
                  * parameter is examined so NULL is OK here.
                  */
-               ret = __sev_ioctl(vm_fd, cmd_id, NULL, &fw_error);
+               ret = __vm_sev_ioctl(vm, cmd_id, NULL);
                 TEST_ASSERT(
                         ret == -1 && errno == EINVAL,
-                       "Should not be able call command: %d. ret: %d, errno: %d\n",
+                       "Should not be able call command: %d. ret: %d, errno: %d",
                         cmd_id, ret, errno);
         }
  
-       sev_ioctl(vm_fd, KVM_SEV_GUEST_STATUS, &status);
+       vm_sev_ioctl(vm, KVM_SEV_GUEST_STATUS, &status);
  }
  
  static void test_sev_mirror(bool es)
@@ -281,9 +257,9 @@ static void test_sev_mirror(bool es)
                 __vm_vcpu_add(dst_vm, i);
  
         if (es)
-               sev_ioctl(dst_vm->fd, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL);
+               vm_sev_ioctl(dst_vm, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL);
  
-       verify_mirror_allowed_cmds(dst_vm->fd);
+       verify_mirror_allowed_cmds(dst_vm);
  
         kvm_vm_free(src_vm);
         kvm_vm_free(dst_vm);
@@ -301,18 +277,18 @@ static void test_sev_mirror_parameters(void)
         ret = __sev_mirror_create(sev_vm, sev_vm);
         TEST_ASSERT(
                 ret == -1 && errno == EINVAL,
-               "Should not be able copy context to self. ret: %d, errno: %d\n",
+               "Should not be able copy context to self. ret: %d, errno: %d",
                 ret, errno);
  
         ret = __sev_mirror_create(vm_no_vcpu, vm_with_vcpu);
         TEST_ASSERT(ret == -1 && errno == EINVAL,
-                   "Copy context requires SEV enabled. ret %d, errno: %d\n", ret,
+                   "Copy context requires SEV enabled. ret %d, errno: %d", ret,
                     errno);
  
         ret = __sev_mirror_create(vm_with_vcpu, sev_vm);
         TEST_ASSERT(
                 ret == -1 && errno == EINVAL,
-               "SEV copy context requires no vCPUS on the destination. ret: %d, errno: %d\n",
+               "SEV copy context requires no vCPUS on the destination. ret: %d, errno: %d",
                 ret, errno);
  
         if (!have_sev_es)
@@ -322,13 +298,13 @@ static void test_sev_mirror_parameters(void)
         ret = __sev_mirror_create(sev_vm, sev_es_vm);
         TEST_ASSERT(
                 ret == -1 && errno == EINVAL,
-               "Should not be able copy context to SEV enabled VM. ret: %d, errno: %d\n",
+               "Should not be able copy context to SEV enabled VM. ret: %d, errno: %d",
                 ret, errno);
  
         ret = __sev_mirror_create(sev_es_vm, sev_vm);
         TEST_ASSERT(
                 ret == -1 && errno == EINVAL,
-               "Should not be able copy context to SEV-ES enabled VM. ret: %d, errno: %d\n",
+               "Should not be able copy context to SEV-ES enabled VM. ret: %d, errno: %d",
                 ret, errno);
  
         kvm_vm_free(sev_es_vm);
diff --git a/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c b/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c

new file mode 100644 (file)

index 0000000..026779f
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+#include "linux/psp-sev.h"
+#include "sev.h"
+
+
+static void guest_sev_es_code(void)
+{
+       /* TODO: Check CPUID after GHCB-based hypercall support is added. */
+       GUEST_ASSERT(rdmsr(MSR_AMD64_SEV) & MSR_AMD64_SEV_ENABLED);
+       GUEST_ASSERT(rdmsr(MSR_AMD64_SEV) & MSR_AMD64_SEV_ES_ENABLED);
+
+       /*
+        * TODO: Add GHCB and ucall support for SEV-ES guests.  For now, simply
+        * force "termination" to signal "done" via the GHCB MSR protocol.
+        */
+       wrmsr(MSR_AMD64_SEV_ES_GHCB, GHCB_MSR_TERM_REQ);
+       __asm__ __volatile__("rep; vmmcall");
+}
+
+static void guest_sev_code(void)
+{
+       GUEST_ASSERT(this_cpu_has(X86_FEATURE_SEV));
+       GUEST_ASSERT(rdmsr(MSR_AMD64_SEV) & MSR_AMD64_SEV_ENABLED);
+
+       GUEST_DONE();
+}
+
+static void test_sev(void *guest_code, uint64_t policy)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+       struct ucall uc;
+
+       vm = vm_sev_create_with_one_vcpu(policy, guest_code, &vcpu);
+
+       for (;;) {
+               vcpu_run(vcpu);
+
+               if (policy & SEV_POLICY_ES) {
+                       TEST_ASSERT(vcpu->run->exit_reason == KVM_EXIT_SYSTEM_EVENT,
+                                   "Wanted SYSTEM_EVENT, got %s",
+                                   exit_reason_str(vcpu->run->exit_reason));
+                       TEST_ASSERT_EQ(vcpu->run->system_event.type, KVM_SYSTEM_EVENT_SEV_TERM);
+                       TEST_ASSERT_EQ(vcpu->run->system_event.ndata, 1);
+                       TEST_ASSERT_EQ(vcpu->run->system_event.data[0], GHCB_MSR_TERM_REQ);
+                       break;
+               }
+
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_SYNC:
+                       continue;
+               case UCALL_DONE:
+                       return;
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+               default:
+                       TEST_FAIL("Unexpected exit: %s",
+                                 exit_reason_str(vcpu->run->exit_reason));
+               }
+       }
+
+       kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SEV));
+
+       test_sev(guest_sev_code, SEV_POLICY_NO_DBG);
+       test_sev(guest_sev_code, 0);
+
+       if (kvm_cpu_has(X86_FEATURE_SEV_ES)) {
+               test_sev(guest_sev_es_code, SEV_POLICY_ES | SEV_POLICY_NO_DBG);
+               test_sev(guest_sev_es_code, SEV_POLICY_ES);
+       }
+
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c b/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c

index 06edf00a97d61dc3ca265926c2043d839daddeeb..416207c38a17ea416506a3f0babea8682b9862f5 100644 (file)
--- a/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c
+++ b/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c
@@ -63,7 +63,7 @@ int main(int argc, char *argv[])
         vm_init_descriptor_tables(vm);
         vcpu_init_descriptor_tables(vcpu);
  
-       vcpu_set_cpuid_maxphyaddr(vcpu, MAXPHYADDR);
+       vcpu_set_cpuid_property(vcpu, X86_PROPERTY_MAX_PHY_ADDR, MAXPHYADDR);
  
         rc = kvm_check_cap(KVM_CAP_EXIT_ON_EMULATION_FAILURE);
         TEST_ASSERT(rc, "KVM_CAP_EXIT_ON_EMULATION_FAILURE is unavailable");
@@ -74,7 +74,7 @@ int main(int argc, char *argv[])
                                     MEM_REGION_SIZE / PAGE_SIZE, 0);
         gpa = vm_phy_pages_alloc(vm, MEM_REGION_SIZE / PAGE_SIZE,
                                  MEM_REGION_GPA, MEM_REGION_SLOT);
-       TEST_ASSERT(gpa == MEM_REGION_GPA, "Failed vm_phy_pages_alloc\n");
+       TEST_ASSERT(gpa == MEM_REGION_GPA, "Failed vm_phy_pages_alloc");
         virt_map(vm, MEM_REGION_GVA, MEM_REGION_GPA, 1);
         hva = addr_gpa2hva(vm, MEM_REGION_GPA);
         memset(hva, 0, PAGE_SIZE);
@@ -102,7 +102,7 @@ int main(int argc, char *argv[])
         case UCALL_DONE:
                 break;
         default:
-               TEST_FAIL("Unrecognized ucall: %lu\n", uc.cmd);
+               TEST_FAIL("Unrecognized ucall: %lu", uc.cmd);
         }
  
         kvm_vm_free(vm);
diff --git a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c

index 00965ba33f730c2a443773dc89e41d465c39beeb..adb5593daf483ec6b84d18dda8f5ebf8823a6827 100644 (file)
--- a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c
@@ -17,6 +17,7 @@
  #include <sys/ioctl.h>
  #include <pthread.h>
  
+#include "kvm_test_harness.h"
  #include "test_util.h"
  #include "kvm_util.h"
  #include "processor.h"
@@ -41,12 +42,14 @@ void guest_code(void)
                      : "rax", "rbx");
  }
  
+KVM_ONE_VCPU_TEST_SUITE(sync_regs_test);
+
  static void compare_regs(struct kvm_regs *left, struct kvm_regs *right)
  {
  #define REG_COMPARE(reg) \
         TEST_ASSERT(left->reg == right->reg, \
                     "Register " #reg \
-                   " values did not match: 0x%llx, 0x%llx\n", \
+                   " values did not match: 0x%llx, 0x%llx", \
                     left->reg, right->reg)
         REG_COMPARE(rax);
         REG_COMPARE(rbx);
@@ -152,18 +155,15 @@ static noinline void *race_sregs_cr4(void *arg)
         return NULL;
  }
  
-static void race_sync_regs(void *racer)
+static void race_sync_regs(struct kvm_vcpu *vcpu, void *racer)
  {
         const time_t TIMEOUT = 2; /* seconds, roughly */
         struct kvm_x86_state *state;
         struct kvm_translation tr;
-       struct kvm_vcpu *vcpu;
         struct kvm_run *run;
-       struct kvm_vm *vm;
         pthread_t thread;
         time_t t;
  
-       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
         run = vcpu->run;
  
         run->kvm_valid_regs = KVM_SYNC_X86_SREGS;
@@ -205,61 +205,61 @@ static void race_sync_regs(void *racer)
         TEST_ASSERT_EQ(pthread_join(thread, NULL), 0);
  
         kvm_x86_state_cleanup(state);
-       kvm_vm_free(vm);
  }
  
-int main(int argc, char *argv[])
+KVM_ONE_VCPU_TEST(sync_regs_test, read_invalid, guest_code)
  {
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-       struct kvm_run *run;
-       struct kvm_regs regs;
-       struct kvm_sregs sregs;
-       struct kvm_vcpu_events events;
-       int rv, cap;
-
-       cap = kvm_check_cap(KVM_CAP_SYNC_REGS);
-       TEST_REQUIRE((cap & TEST_SYNC_FIELDS) == TEST_SYNC_FIELDS);
-       TEST_REQUIRE(!(cap & INVALID_SYNC_FIELD));
-
-       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-
-       run = vcpu->run;
+       struct kvm_run *run = vcpu->run;
+       int rv;
  
         /* Request reading invalid register set from VCPU. */
         run->kvm_valid_regs = INVALID_SYNC_FIELD;
         rv = _vcpu_run(vcpu);
         TEST_ASSERT(rv < 0 && errno == EINVAL,
-                   "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n",
+                   "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d",
                     rv);
         run->kvm_valid_regs = 0;
  
         run->kvm_valid_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
         rv = _vcpu_run(vcpu);
         TEST_ASSERT(rv < 0 && errno == EINVAL,
-                   "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n",
+                   "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d",
                     rv);
         run->kvm_valid_regs = 0;
+}
+
+KVM_ONE_VCPU_TEST(sync_regs_test, set_invalid, guest_code)
+{
+       struct kvm_run *run = vcpu->run;
+       int rv;
  
         /* Request setting invalid register set into VCPU. */
         run->kvm_dirty_regs = INVALID_SYNC_FIELD;
         rv = _vcpu_run(vcpu);
         TEST_ASSERT(rv < 0 && errno == EINVAL,
-                   "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n",
+                   "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d",
                     rv);
         run->kvm_dirty_regs = 0;
  
         run->kvm_dirty_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
         rv = _vcpu_run(vcpu);
         TEST_ASSERT(rv < 0 && errno == EINVAL,
-                   "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n",
+                   "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d",
                     rv);
         run->kvm_dirty_regs = 0;
+}
+
+KVM_ONE_VCPU_TEST(sync_regs_test, req_and_verify_all_valid, guest_code)
+{
+       struct kvm_run *run = vcpu->run;
+       struct kvm_vcpu_events events;
+       struct kvm_sregs sregs;
+       struct kvm_regs regs;
  
         /* Request and verify all valid register sets. */
         /* TODO: BUILD TIME CHECK: TEST_ASSERT(KVM_SYNC_X86_NUM_FIELDS != 3); */
         run->kvm_valid_regs = TEST_SYNC_FIELDS;
-       rv = _vcpu_run(vcpu);
+       vcpu_run(vcpu);
         TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
  
         vcpu_regs_get(vcpu, &regs);
@@ -270,6 +270,19 @@ int main(int argc, char *argv[])
  
         vcpu_events_get(vcpu, &events);
         compare_vcpu_events(&events, &run->s.regs.events);
+}
+
+KVM_ONE_VCPU_TEST(sync_regs_test, set_and_verify_various, guest_code)
+{
+       struct kvm_run *run = vcpu->run;
+       struct kvm_vcpu_events events;
+       struct kvm_sregs sregs;
+       struct kvm_regs regs;
+
+       /* Run once to get register set */
+       run->kvm_valid_regs = TEST_SYNC_FIELDS;
+       vcpu_run(vcpu);
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
  
         /* Set and verify various register values. */
         run->s.regs.regs.rbx = 0xBAD1DEA;
@@ -278,7 +291,7 @@ int main(int argc, char *argv[])
  
         run->kvm_valid_regs = TEST_SYNC_FIELDS;
         run->kvm_dirty_regs = KVM_SYNC_X86_REGS | KVM_SYNC_X86_SREGS;
-       rv = _vcpu_run(vcpu);
+       vcpu_run(vcpu);
         TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
         TEST_ASSERT(run->s.regs.regs.rbx == 0xBAD1DEA + 1,
                     "rbx sync regs value incorrect 0x%llx.",
@@ -295,6 +308,11 @@ int main(int argc, char *argv[])
  
         vcpu_events_get(vcpu, &events);
         compare_vcpu_events(&events, &run->s.regs.events);
+}
+
+KVM_ONE_VCPU_TEST(sync_regs_test, clear_kvm_dirty_regs_bits, guest_code)
+{
+       struct kvm_run *run = vcpu->run;
  
         /* Clear kvm_dirty_regs bits, verify new s.regs values are
          * overwritten with existing guest values.
@@ -302,11 +320,22 @@ int main(int argc, char *argv[])
         run->kvm_valid_regs = TEST_SYNC_FIELDS;
         run->kvm_dirty_regs = 0;
         run->s.regs.regs.rbx = 0xDEADBEEF;
-       rv = _vcpu_run(vcpu);
+       vcpu_run(vcpu);
         TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
         TEST_ASSERT(run->s.regs.regs.rbx != 0xDEADBEEF,
                     "rbx sync regs value incorrect 0x%llx.",
                     run->s.regs.regs.rbx);
+}
+
+KVM_ONE_VCPU_TEST(sync_regs_test, clear_kvm_valid_and_dirty_regs, guest_code)
+{
+       struct kvm_run *run = vcpu->run;
+       struct kvm_regs regs;
+
+       /* Run once to get register set */
+       run->kvm_valid_regs = TEST_SYNC_FIELDS;
+       vcpu_run(vcpu);
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
  
         /* Clear kvm_valid_regs bits and kvm_dirty_bits.
          * Verify s.regs values are not overwritten with existing guest values
@@ -315,9 +344,10 @@ int main(int argc, char *argv[])
         run->kvm_valid_regs = 0;
         run->kvm_dirty_regs = 0;
         run->s.regs.regs.rbx = 0xAAAA;
+       vcpu_regs_get(vcpu, &regs);
         regs.rbx = 0xBAC0;
         vcpu_regs_set(vcpu, &regs);
-       rv = _vcpu_run(vcpu);
+       vcpu_run(vcpu);
         TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
         TEST_ASSERT(run->s.regs.regs.rbx == 0xAAAA,
                     "rbx sync regs value incorrect 0x%llx.",
@@ -326,6 +356,17 @@ int main(int argc, char *argv[])
         TEST_ASSERT(regs.rbx == 0xBAC0 + 1,
                     "rbx guest value incorrect 0x%llx.",
                     regs.rbx);
+}
+
+KVM_ONE_VCPU_TEST(sync_regs_test, clear_kvm_valid_regs_bits, guest_code)
+{
+       struct kvm_run *run = vcpu->run;
+       struct kvm_regs regs;
+
+       /* Run once to get register set */
+       run->kvm_valid_regs = TEST_SYNC_FIELDS;
+       vcpu_run(vcpu);
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
  
         /* Clear kvm_valid_regs bits. Verify s.regs values are not overwritten
          * with existing guest values but that guest values are overwritten
@@ -334,7 +375,7 @@ int main(int argc, char *argv[])
         run->kvm_valid_regs = 0;
         run->kvm_dirty_regs = TEST_SYNC_FIELDS;
         run->s.regs.regs.rbx = 0xBBBB;
-       rv = _vcpu_run(vcpu);
+       vcpu_run(vcpu);
         TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
         TEST_ASSERT(run->s.regs.regs.rbx == 0xBBBB,
                     "rbx sync regs value incorrect 0x%llx.",
@@ -343,12 +384,30 @@ int main(int argc, char *argv[])
         TEST_ASSERT(regs.rbx == 0xBBBB + 1,
                     "rbx guest value incorrect 0x%llx.",
                     regs.rbx);
+}
  
-       kvm_vm_free(vm);
+KVM_ONE_VCPU_TEST(sync_regs_test, race_cr4, guest_code)
+{
+       race_sync_regs(vcpu, race_sregs_cr4);
+}
+
+KVM_ONE_VCPU_TEST(sync_regs_test, race_exc, guest_code)
+{
+       race_sync_regs(vcpu, race_events_exc);
+}
  
-       race_sync_regs(race_sregs_cr4);
-       race_sync_regs(race_events_exc);
-       race_sync_regs(race_events_inj_pen);
+KVM_ONE_VCPU_TEST(sync_regs_test, race_inj_pen, guest_code)
+{
+       race_sync_regs(vcpu, race_events_inj_pen);
+}
+
+int main(int argc, char *argv[])
+{
+       int cap;
+
+       cap = kvm_check_cap(KVM_CAP_SYNC_REGS);
+       TEST_REQUIRE((cap & TEST_SYNC_FIELDS) == TEST_SYNC_FIELDS);
+       TEST_REQUIRE(!(cap & INVALID_SYNC_FIELD));
  
-       return 0;
+       return test_harness_run(argc, argv);
  }
diff --git a/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c b/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c

index 0ed32ec903d03548ce11fa5bcc42eba329808506..dcbb3c29fb8e9f82b9dce0b222111f8a8eb4776a 100644 (file)
--- a/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c
+++ b/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c
@@ -143,7 +143,7 @@ static void run_vcpu_expect_gp(struct kvm_vcpu *vcpu)
  
         TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
         TEST_ASSERT(get_ucall(vcpu, &uc) == UCALL_SYNC,
-                   "Expect UCALL_SYNC\n");
+                   "Expect UCALL_SYNC");
         TEST_ASSERT(uc.args[1] == SYNC_GP, "#GP is expected.");
         printf("vCPU received GP in guest.\n");
  }
@@ -188,7 +188,7 @@ static void *run_ucna_injection(void *arg)
  
         TEST_ASSERT_KVM_EXIT_REASON(params->vcpu, KVM_EXIT_IO);
         TEST_ASSERT(get_ucall(params->vcpu, &uc) == UCALL_SYNC,
-                   "Expect UCALL_SYNC\n");
+                   "Expect UCALL_SYNC");
         TEST_ASSERT(uc.args[1] == SYNC_FIRST_UCNA, "Injecting first UCNA.");
  
         printf("Injecting first UCNA at %#x.\n", FIRST_UCNA_ADDR);
@@ -198,7 +198,7 @@ static void *run_ucna_injection(void *arg)
  
         TEST_ASSERT_KVM_EXIT_REASON(params->vcpu, KVM_EXIT_IO);
         TEST_ASSERT(get_ucall(params->vcpu, &uc) == UCALL_SYNC,
-                   "Expect UCALL_SYNC\n");
+                   "Expect UCALL_SYNC");
         TEST_ASSERT(uc.args[1] == SYNC_SECOND_UCNA, "Injecting second UCNA.");
  
         printf("Injecting second UCNA at %#x.\n", SECOND_UCNA_ADDR);
@@ -208,7 +208,7 @@ static void *run_ucna_injection(void *arg)
  
         TEST_ASSERT_KVM_EXIT_REASON(params->vcpu, KVM_EXIT_IO);
         if (get_ucall(params->vcpu, &uc) == UCALL_ABORT) {
-               TEST_ASSERT(false, "vCPU assertion failure: %s.\n",
+               TEST_ASSERT(false, "vCPU assertion failure: %s.",
                             (const char *)uc.args[0]);
         }
  
diff --git a/tools/testing/selftests/kvm/x86_64/userspace_io_test.c b/tools/testing/selftests/kvm/x86_64/userspace_io_test.c

index 255c50b0dc32675dfef64e65e1a0c6b164d0ca39..9481cbcf284f69b662a2f9f1a5fb3ff304ab0605 100644 (file)
--- a/tools/testing/selftests/kvm/x86_64/userspace_io_test.c
+++ b/tools/testing/selftests/kvm/x86_64/userspace_io_test.c
@@ -71,7 +71,7 @@ int main(int argc, char *argv[])
                         break;
  
                 TEST_ASSERT(run->io.port == 0x80,
-                           "Expected I/O at port 0x80, got port 0x%x\n", run->io.port);
+                           "Expected I/O at port 0x80, got port 0x%x", run->io.port);
  
                 /*
                  * Modify the rep string count in RCX: 2 => 1 and 3 => 8192.
diff --git a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c b/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c

index 3533dc2fbfeeb136b217eb79e819e2e374e2b0cd..f4f61a2d2464c1911efcf2dd9b02b1ff90af9e41 100644 (file)
--- a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c
+++ b/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c
@@ -8,14 +8,12 @@
  #define _GNU_SOURCE /* for program_invocation_short_name */
  #include <sys/ioctl.h>
  
+#include "kvm_test_harness.h"
  #include "test_util.h"
  #include "kvm_util.h"
  #include "vmx.h"
  
-/* Forced emulation prefix, used to invoke the emulator unconditionally. */
-#define KVM_FEP "ud2; .byte 'k', 'v', 'm';"
-#define KVM_FEP_LENGTH 5
-static int fep_available = 1;
+static bool fep_available;
  
  #define MSR_NON_EXISTENT 0x474f4f00
  
@@ -260,13 +258,6 @@ static void guest_code_filter_allow(void)
         GUEST_ASSERT(data == 2);
         GUEST_ASSERT(guest_exception_count == 0);
  
-       /*
-        * Test to see if the instruction emulator is available (ie: the module
-        * parameter 'kvm.force_emulation_prefix=1' is set).  This instruction
-        * will #UD if it isn't available.
-        */
-       __asm__ __volatile__(KVM_FEP "nop");
-
         if (fep_available) {
                 /* Let userspace know we aren't done. */
                 GUEST_SYNC(0);
@@ -388,12 +379,6 @@ static void guest_fep_gp_handler(struct ex_regs *regs)
                            &em_wrmsr_start, &em_wrmsr_end);
  }
  
-static void guest_ud_handler(struct ex_regs *regs)
-{
-       fep_available = 0;
-       regs->rip += KVM_FEP_LENGTH;
-}
-
  static void check_for_guest_assert(struct kvm_vcpu *vcpu)
  {
         struct ucall uc;
@@ -527,13 +512,15 @@ static void run_guest_then_process_ucall_done(struct kvm_vcpu *vcpu)
         process_ucall_done(vcpu);
  }
  
-static void test_msr_filter_allow(void)
+KVM_ONE_VCPU_TEST_SUITE(user_msr);
+
+KVM_ONE_VCPU_TEST(user_msr, msr_filter_allow, guest_code_filter_allow)
  {
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
+       struct kvm_vm *vm = vcpu->vm;
+       uint64_t cmd;
         int rc;
  
-       vm = vm_create_with_one_vcpu(&vcpu, guest_code_filter_allow);
+       sync_global_to_guest(vm, fep_available);
  
         rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR);
         TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available");
@@ -561,11 +548,11 @@ static void test_msr_filter_allow(void)
         run_guest_then_process_wrmsr(vcpu, MSR_NON_EXISTENT);
         run_guest_then_process_rdmsr(vcpu, MSR_NON_EXISTENT);
  
-       vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler);
         vcpu_run(vcpu);
-       vm_install_exception_handler(vm, UD_VECTOR, NULL);
+       cmd = process_ucall(vcpu);
  
-       if (process_ucall(vcpu) != UCALL_DONE) {
+       if (fep_available) {
+               TEST_ASSERT_EQ(cmd, UCALL_SYNC);
                 vm_install_exception_handler(vm, GP_VECTOR, guest_fep_gp_handler);
  
                 /* Process emulated rdmsr and wrmsr instructions. */
@@ -583,10 +570,9 @@ static void test_msr_filter_allow(void)
                 /* Confirm the guest completed without issues. */
                 run_guest_then_process_ucall_done(vcpu);
         } else {
+               TEST_ASSERT_EQ(cmd, UCALL_DONE);
                 printf("To run the instruction emulated tests set the module parameter 'kvm.force_emulation_prefix=1'\n");
         }
-
-       kvm_vm_free(vm);
  }
  
  static int handle_ucall(struct kvm_vcpu *vcpu)
@@ -646,16 +632,12 @@ static void handle_wrmsr(struct kvm_run *run)
         }
  }
  
-static void test_msr_filter_deny(void)
+KVM_ONE_VCPU_TEST(user_msr, msr_filter_deny, guest_code_filter_deny)
  {
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-       struct kvm_run *run;
+       struct kvm_vm *vm = vcpu->vm;
+       struct kvm_run *run = vcpu->run;
         int rc;
  
-       vm = vm_create_with_one_vcpu(&vcpu, guest_code_filter_deny);
-       run = vcpu->run;
-
         rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR);
         TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available");
         vm_enable_cap(vm, KVM_CAP_X86_USER_SPACE_MSR, KVM_MSR_EXIT_REASON_INVAL |
@@ -689,18 +671,13 @@ static void test_msr_filter_deny(void)
  done:
         TEST_ASSERT(msr_reads == 4, "Handled 4 rdmsr in user space");
         TEST_ASSERT(msr_writes == 3, "Handled 3 wrmsr in user space");
-
-       kvm_vm_free(vm);
  }
  
-static void test_msr_permission_bitmap(void)
+KVM_ONE_VCPU_TEST(user_msr, msr_permission_bitmap, guest_code_permission_bitmap)
  {
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
+       struct kvm_vm *vm = vcpu->vm;
         int rc;
  
-       vm = vm_create_with_one_vcpu(&vcpu, guest_code_permission_bitmap);
-
         rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR);
         TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available");
         vm_enable_cap(vm, KVM_CAP_X86_USER_SPACE_MSR, KVM_MSR_EXIT_REASON_FILTER);
@@ -715,8 +692,6 @@ static void test_msr_permission_bitmap(void)
         vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_gs);
         run_guest_then_process_rdmsr(vcpu, MSR_GS_BASE);
         run_guest_then_process_ucall_done(vcpu);
-
-       kvm_vm_free(vm);
  }
  
  #define test_user_exit_msr_ioctl(vm, cmd, arg, flag, valid_mask)       \
@@ -786,31 +761,20 @@ static void run_msr_filter_flag_test(struct kvm_vm *vm)
  }
  
  /* Test that attempts to write to the unused bits in a flag fails. */
-static void test_user_exit_msr_flags(void)
+KVM_ONE_VCPU_TEST(user_msr, user_exit_msr_flags, NULL)
  {
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-
-       vm = vm_create_with_one_vcpu(&vcpu, NULL);
+       struct kvm_vm *vm = vcpu->vm;
  
         /* Test flags for KVM_CAP_X86_USER_SPACE_MSR. */
         run_user_space_msr_flag_test(vm);
  
         /* Test flags and range flags for KVM_X86_SET_MSR_FILTER. */
         run_msr_filter_flag_test(vm);
-
-       kvm_vm_free(vm);
  }
  
  int main(int argc, char *argv[])
  {
-       test_msr_filter_allow();
-
-       test_msr_filter_deny();
-
-       test_msr_permission_bitmap();
+       fep_available = kvm_is_forced_emulation_enabled();
  
-       test_user_exit_msr_flags();
-
-       return 0;
+       return test_harness_run(argc, argv);
  }
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c b/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c

index 2bed5fb3a0d6e51aa63f9732d77dac330378d2c8..a81a24761aac072a0359305826712a917ed06bba 100644 (file)
--- a/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c
@@ -99,7 +99,7 @@ int main(int argc, char *argv[])
                         TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_INTERNAL_ERROR);
                         TEST_ASSERT(run->internal.suberror ==
                                     KVM_INTERNAL_ERROR_EMULATION,
-                                   "Got internal suberror other than KVM_INTERNAL_ERROR_EMULATION: %u\n",
+                                   "Got internal suberror other than KVM_INTERNAL_ERROR_EMULATION: %u",
                                     run->internal.suberror);
                         break;
                 }
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c

index e4ad5fef52ffc5b08ec8d3f445b518b52229e672..7f6f5f23fb9b67fcb186a0e9c9ad00aaced6d2d3 100644 (file)
--- a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
@@ -128,17 +128,17 @@ int main(int argc, char *argv[])
                          */
                         kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap);
                         if (uc.args[1]) {
-                               TEST_ASSERT(test_bit(0, bmap), "Page 0 incorrectly reported clean\n");
-                               TEST_ASSERT(host_test_mem[0] == 1, "Page 0 not written by guest\n");
+                               TEST_ASSERT(test_bit(0, bmap), "Page 0 incorrectly reported clean");
+                               TEST_ASSERT(host_test_mem[0] == 1, "Page 0 not written by guest");
                         } else {
-                               TEST_ASSERT(!test_bit(0, bmap), "Page 0 incorrectly reported dirty\n");
-                               TEST_ASSERT(host_test_mem[0] == 0xaaaaaaaaaaaaaaaaULL, "Page 0 written by guest\n");
+                               TEST_ASSERT(!test_bit(0, bmap), "Page 0 incorrectly reported dirty");
+                               TEST_ASSERT(host_test_mem[0] == 0xaaaaaaaaaaaaaaaaULL, "Page 0 written by guest");
                         }
  
-                       TEST_ASSERT(!test_bit(1, bmap), "Page 1 incorrectly reported dirty\n");
-                       TEST_ASSERT(host_test_mem[4096 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 1 written by guest\n");
-                       TEST_ASSERT(!test_bit(2, bmap), "Page 2 incorrectly reported dirty\n");
-                       TEST_ASSERT(host_test_mem[8192 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 2 written by guest\n");
+                       TEST_ASSERT(!test_bit(1, bmap), "Page 1 incorrectly reported dirty");
+                       TEST_ASSERT(host_test_mem[4096 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 1 written by guest");
+                       TEST_ASSERT(!test_bit(2, bmap), "Page 2 incorrectly reported dirty");
+                       TEST_ASSERT(host_test_mem[8192 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 2 written by guest");
                         break;
                 case UCALL_DONE:
                         done = true;
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c b/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c

index a9b827c69f32c5f96548d040c72d979166041f19..fad3634fd9eb62e34ded1c3f59b1d38a0b61d03a 100644 (file)
--- a/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c
@@ -28,7 +28,7 @@ static void __run_vcpu_with_invalid_state(struct kvm_vcpu *vcpu)
  
         TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_INTERNAL_ERROR);
         TEST_ASSERT(run->emulation_failure.suberror == KVM_INTERNAL_ERROR_EMULATION,
-                   "Expected emulation failure, got %d\n",
+                   "Expected emulation failure, got %d",
                     run->emulation_failure.suberror);
  }
  
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c b/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c

index e710b6e7fb384aac124ad0c7f646a872b347ad51..1759fa5cb3f29c337a09a5d15569ff028067e2db 100644 (file)
--- a/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c
@@ -116,23 +116,6 @@ static void l1_guest_code(struct vmx_pages *vmx_pages)
         GUEST_DONE();
  }
  
-static bool system_has_stable_tsc(void)
-{
-       bool tsc_is_stable;
-       FILE *fp;
-       char buf[4];
-
-       fp = fopen("/sys/devices/system/clocksource/clocksource0/current_clocksource", "r");
-       if (fp == NULL)
-               return false;
-
-       tsc_is_stable = fgets(buf, sizeof(buf), fp) &&
-                       !strncmp(buf, "tsc", sizeof(buf));
-
-       fclose(fp);
-       return tsc_is_stable;
-}
-
  int main(int argc, char *argv[])
  {
         struct kvm_vcpu *vcpu;
@@ -148,7 +131,7 @@ int main(int argc, char *argv[])
  
         TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
         TEST_REQUIRE(kvm_has_cap(KVM_CAP_TSC_CONTROL));
-       TEST_REQUIRE(system_has_stable_tsc());
+       TEST_REQUIRE(sys_clocksource_is_based_on_tsc());
  
         /*
          * We set L1's scale factor to be a random number from 2 to 10.
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c b/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c

index 2a8d4ac2f0204780498dd42b6343deb5fbb04c9b..ea0cb3cae0f759be3072b3facb92291edb8c2d55 100644 (file)
--- a/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c
@@ -15,10 +15,11 @@
  
  #include <linux/bitmap.h>
  
+#include "kvm_test_harness.h"
  #include "kvm_util.h"
  #include "vmx.h"
  
-union perf_capabilities {
+static union perf_capabilities {
         struct {
                 u64     lbr_format:6;
                 u64     pebs_trap:1;
@@ -32,7 +33,7 @@ union perf_capabilities {
                 u64     anythread_deprecated:1;
         };
         u64     capabilities;
-};
+} host_cap;
  
  /*
   * The LBR format and most PEBS features are immutable, all other features are
@@ -73,19 +74,19 @@ static void guest_code(uint64_t current_val)
         GUEST_DONE();
  }
  
+KVM_ONE_VCPU_TEST_SUITE(vmx_pmu_caps);
+
  /*
   * Verify that guest WRMSRs to PERF_CAPABILITIES #GP regardless of the value
   * written, that the guest always sees the userspace controlled value, and that
   * PERF_CAPABILITIES is immutable after KVM_RUN.
   */
-static void test_guest_wrmsr_perf_capabilities(union perf_capabilities host_cap)
+KVM_ONE_VCPU_TEST(vmx_pmu_caps, guest_wrmsr_perf_capabilities, guest_code)
  {
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm = vm_create_with_one_vcpu(&vcpu, guest_code);
         struct ucall uc;
         int r, i;
  
-       vm_init_descriptor_tables(vm);
+       vm_init_descriptor_tables(vcpu->vm);
         vcpu_init_descriptor_tables(vcpu);
  
         vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
@@ -117,31 +118,21 @@ static void test_guest_wrmsr_perf_capabilities(union perf_capabilities host_cap)
                 TEST_ASSERT(!r, "Post-KVM_RUN write '0x%llx'didn't fail",
                             host_cap.capabilities ^ BIT_ULL(i));
         }
-
-       kvm_vm_free(vm);
  }
  
  /*
   * Verify KVM allows writing PERF_CAPABILITIES with all KVM-supported features
   * enabled, as well as '0' (to disable all features).
   */
-static void test_basic_perf_capabilities(union perf_capabilities host_cap)
+KVM_ONE_VCPU_TEST(vmx_pmu_caps, basic_perf_capabilities, guest_code)
  {
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm = vm_create_with_one_vcpu(&vcpu, NULL);
-
         vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, 0);
         vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
-
-       kvm_vm_free(vm);
  }
  
-static void test_fungible_perf_capabilities(union perf_capabilities host_cap)
+KVM_ONE_VCPU_TEST(vmx_pmu_caps, fungible_perf_capabilities, guest_code)
  {
         const uint64_t fungible_caps = host_cap.capabilities & ~immutable_caps.capabilities;
-
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm = vm_create_with_one_vcpu(&vcpu, NULL);
         int bit;
  
         for_each_set_bit(bit, &fungible_caps, 64) {
@@ -150,8 +141,6 @@ static void test_fungible_perf_capabilities(union perf_capabilities host_cap)
                              host_cap.capabilities & ~BIT_ULL(bit));
         }
         vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
-
-       kvm_vm_free(vm);
  }
  
  /*
@@ -160,14 +149,11 @@ static void test_fungible_perf_capabilities(union perf_capabilities host_cap)
   * separately as they are multi-bit values, e.g. toggling or setting a single
   * bit can generate a false positive without dedicated safeguards.
   */
-static void test_immutable_perf_capabilities(union perf_capabilities host_cap)
+KVM_ONE_VCPU_TEST(vmx_pmu_caps, immutable_perf_capabilities, guest_code)
  {
         const uint64_t reserved_caps = (~host_cap.capabilities |
                                         immutable_caps.capabilities) &
                                        ~format_caps.capabilities;
-
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm = vm_create_with_one_vcpu(&vcpu, NULL);
         union perf_capabilities val = host_cap;
         int r, bit;
  
@@ -201,8 +187,6 @@ static void test_immutable_perf_capabilities(union perf_capabilities host_cap)
                 TEST_ASSERT(!r, "Bad PEBS FMT = 0x%x didn't fail, host = 0x%x",
                             val.pebs_format, host_cap.pebs_format);
         }
-
-       kvm_vm_free(vm);
  }
  
  /*
@@ -211,17 +195,13 @@ static void test_immutable_perf_capabilities(union perf_capabilities host_cap)
   * LBR_TOS as those bits are writable across all uarch implementations (arch
   * LBRs will need to poke a different MSR).
   */
-static void test_lbr_perf_capabilities(union perf_capabilities host_cap)
+KVM_ONE_VCPU_TEST(vmx_pmu_caps, lbr_perf_capabilities, guest_code)
  {
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
         int r;
  
         if (!host_cap.lbr_format)
                 return;
  
-       vm = vm_create_with_one_vcpu(&vcpu, NULL);
-
         vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
         vcpu_set_msr(vcpu, MSR_LBR_TOS, 7);
  
@@ -229,15 +209,11 @@ static void test_lbr_perf_capabilities(union perf_capabilities host_cap)
  
         r = _vcpu_set_msr(vcpu, MSR_LBR_TOS, 7);
         TEST_ASSERT(!r, "Writing LBR_TOS should fail after disabling vPMU");
-
-       kvm_vm_free(vm);
  }
  
  int main(int argc, char *argv[])
  {
-       union perf_capabilities host_cap;
-
-       TEST_REQUIRE(get_kvm_param_bool("enable_pmu"));
+       TEST_REQUIRE(kvm_is_pmu_enabled());
         TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_PDCM));
  
         TEST_REQUIRE(kvm_cpu_has_p(X86_PROPERTY_PMU_VERSION));
@@ -248,9 +224,5 @@ int main(int argc, char *argv[])
         TEST_ASSERT(host_cap.full_width_write,
                     "Full-width writes should always be supported");
  
-       test_basic_perf_capabilities(host_cap);
-       test_fungible_perf_capabilities(host_cap);
-       test_immutable_perf_capabilities(host_cap);
-       test_guest_wrmsr_perf_capabilities(host_cap);
-       test_lbr_perf_capabilities(host_cap);
+       return test_harness_run(argc, argv);
  }
diff --git a/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c b/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c

index 67ac2a3292efd4e5a4ff24073af25849ce375dd9..725c206ba0b92bc9d073dcbc7d8403cb7d2f0bd1 100644 (file)
--- a/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c
@@ -216,7 +216,7 @@ static void *vcpu_thread(void *arg)
                             "Halting vCPU halted %lu times, woke %lu times, received %lu IPIs.\n"
                             "Halter TPR=%#x PPR=%#x LVR=%#x\n"
                             "Migrations attempted: %lu\n"
-                           "Migrations completed: %lu\n",
+                           "Migrations completed: %lu",
                             vcpu->id, (const char *)uc.args[0],
                             params->data->ipis_sent, params->data->hlt_count,
                             params->data->wake_count,
@@ -288,7 +288,7 @@ void do_migrations(struct test_data_page *data, int run_secs, int delay_usecs,
         }
  
         TEST_ASSERT(nodes > 1,
-                   "Did not find at least 2 numa nodes. Can't do migration\n");
+                   "Did not find at least 2 numa nodes. Can't do migration");
  
         fprintf(stderr, "Migrating amongst %d nodes found\n", nodes);
  
@@ -347,7 +347,7 @@ void do_migrations(struct test_data_page *data, int run_secs, int delay_usecs,
                                     wake_count != data->wake_count,
                                     "IPI, HLT and wake count have not increased "
                                     "in the last %lu seconds. "
-                                   "HLTer is likely hung.\n", interval_secs);
+                                   "HLTer is likely hung.", interval_secs);
  
                         ipis_sent = data->ipis_sent;
                         hlt_count = data->hlt_count;
@@ -381,7 +381,7 @@ void get_cmdline_args(int argc, char *argv[], int *run_secs,
                                     "-m adds calls to migrate_pages while vCPUs are running."
                                     " Default is no migrations.\n"
                                     "-d <delay microseconds> - delay between migrate_pages() calls."
-                                   " Default is %d microseconds.\n",
+                                   " Default is %d microseconds.",
                                     DEFAULT_RUN_SECS, DEFAULT_DELAY_USECS);
                 }
         }
diff --git a/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c b/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c

index dc6217440db3ae193fd9bfbcfbaadea223e57c03..25a0b0db5c3c9dfac6819de37e8c7d0a541935fb 100644 (file)
--- a/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c
@@ -116,7 +116,7 @@ int main(int argc, char *argv[])
                 vcpu_run(vcpu);
  
                 TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
-                           "Unexpected exit reason: %u (%s),\n",
+                           "Unexpected exit reason: %u (%s),",
                             run->exit_reason,
                             exit_reason_str(run->exit_reason));
  
diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c

index 9ec9ab60b63ee27d2deb6f90d9b38a71e2deb142..d2ea0435f4f76332fb702d3e84d19b536a8ea4ee 100644 (file)
--- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
@@ -62,6 +62,7 @@ enum {
         TEST_POLL_TIMEOUT,
         TEST_POLL_MASKED,
         TEST_POLL_WAKE,
+       SET_VCPU_INFO,
         TEST_TIMER_PAST,
         TEST_LOCKING_SEND_RACE,
         TEST_LOCKING_POLL_RACE,
@@ -321,6 +322,10 @@ static void guest_code(void)
  
         GUEST_SYNC(TEST_POLL_WAKE);
  
+       /* Set the vcpu_info to point at exactly the place it already is to
+        * make sure the attribute is functional. */
+       GUEST_SYNC(SET_VCPU_INFO);
+
         /* A timer wake an *unmasked* port which should wake us with an
          * actual interrupt, while we're polling on a different port. */
         ports[0]++;
@@ -389,6 +394,7 @@ static int cmp_timespec(struct timespec *a, struct timespec *b)
                 return 0;
  }
  
+static struct shared_info *shinfo;
  static struct vcpu_info *vinfo;
  static struct kvm_vcpu *vcpu;
  
@@ -404,20 +410,38 @@ static void *juggle_shinfo_state(void *arg)
  {
         struct kvm_vm *vm = (struct kvm_vm *)arg;
  
-       struct kvm_xen_hvm_attr cache_activate = {
+       struct kvm_xen_hvm_attr cache_activate_gfn = {
                 .type = KVM_XEN_ATTR_TYPE_SHARED_INFO,
                 .u.shared_info.gfn = SHINFO_REGION_GPA / PAGE_SIZE
         };
  
-       struct kvm_xen_hvm_attr cache_deactivate = {
+       struct kvm_xen_hvm_attr cache_deactivate_gfn = {
                 .type = KVM_XEN_ATTR_TYPE_SHARED_INFO,
                 .u.shared_info.gfn = KVM_XEN_INVALID_GFN
         };
  
+       struct kvm_xen_hvm_attr cache_activate_hva = {
+               .type = KVM_XEN_ATTR_TYPE_SHARED_INFO_HVA,
+               .u.shared_info.hva = (unsigned long)shinfo
+       };
+
+       struct kvm_xen_hvm_attr cache_deactivate_hva = {
+               .type = KVM_XEN_ATTR_TYPE_SHARED_INFO,
+               .u.shared_info.hva = 0
+       };
+
+       int xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM);
+
         for (;;) {
-               __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_activate);
-               __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_deactivate);
+               __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_activate_gfn);
                 pthread_testcancel();
+               __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_deactivate_gfn);
+
+               if (xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA) {
+                       __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_activate_hva);
+                       pthread_testcancel();
+                       __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_deactivate_hva);
+               }
         }
  
         return NULL;
@@ -442,6 +466,7 @@ int main(int argc, char *argv[])
         bool do_runstate_flag = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG);
         bool do_eventfd_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL);
         bool do_evtchn_tests = do_eventfd_tests && !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_SEND);
+       bool has_shinfo_hva = !!(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA);
  
         clock_gettime(CLOCK_REALTIME, &min_ts);
  
@@ -452,7 +477,7 @@ int main(int argc, char *argv[])
                                     SHINFO_REGION_GPA, SHINFO_REGION_SLOT, 3, 0);
         virt_map(vm, SHINFO_REGION_GVA, SHINFO_REGION_GPA, 3);
  
-       struct shared_info *shinfo = addr_gpa2hva(vm, SHINFO_VADDR);
+       shinfo = addr_gpa2hva(vm, SHINFO_VADDR);
  
         int zero_fd = open("/dev/zero", O_RDONLY);
         TEST_ASSERT(zero_fd != -1, "Failed to open /dev/zero");
@@ -488,10 +513,16 @@ int main(int argc, char *argv[])
                             "Failed to read back RUNSTATE_UPDATE_FLAG attr");
         }
  
-       struct kvm_xen_hvm_attr ha = {
-               .type = KVM_XEN_ATTR_TYPE_SHARED_INFO,
-               .u.shared_info.gfn = SHINFO_REGION_GPA / PAGE_SIZE,
-       };
+       struct kvm_xen_hvm_attr ha = {};
+
+       if (has_shinfo_hva) {
+               ha.type = KVM_XEN_ATTR_TYPE_SHARED_INFO_HVA;
+               ha.u.shared_info.hva = (unsigned long)shinfo;
+       } else {
+               ha.type = KVM_XEN_ATTR_TYPE_SHARED_INFO;
+               ha.u.shared_info.gfn = SHINFO_ADDR / PAGE_SIZE;
+       }
+
         vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &ha);
  
         /*
@@ -862,6 +893,16 @@ int main(int argc, char *argv[])
                                 alarm(1);
                                 break;
  
+                       case SET_VCPU_INFO:
+                               if (has_shinfo_hva) {
+                                       struct kvm_xen_vcpu_attr vih = {
+                                               .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO_HVA,
+                                               .u.hva = (unsigned long)vinfo
+                                       };
+                                       vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &vih);
+                               }
+                               break;
+
                         case TEST_TIMER_PAST:
                                 TEST_ASSERT(!evtchn_irq_expected,
                                             "Expected event channel IRQ but it didn't happen");
diff --git a/tools/testing/selftests/kvm/x86_64/xss_msr_test.c b/tools/testing/selftests/kvm/x86_64/xss_msr_test.c

index e0ddf47362e773fbba85169b6c517ac505a3077b..167c97abff1b816dd9792b19f71c58847f433f78 100644 (file)
--- a/tools/testing/selftests/kvm/x86_64/xss_msr_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xss_msr_test.c
@@ -29,7 +29,7 @@ int main(int argc, char *argv[])
  
         xss_val = vcpu_get_msr(vcpu, MSR_IA32_XSS);
         TEST_ASSERT(xss_val == 0,
-                   "MSR_IA32_XSS should be initialized to zero\n");
+                   "MSR_IA32_XSS should be initialized to zero");
  
         vcpu_set_msr(vcpu, MSR_IA32_XSS, xss_val);
  
diff --git a/tools/testing/selftests/landlock/common.h b/tools/testing/selftests/landlock/common.h

index 5b79758cae627593c68b9fd465451efcf7b75f9f..e64bbdf0e86eac8bf1751ee287508aca1a7ed27c 100644 (file)
--- a/tools/testing/selftests/landlock/common.h
+++ b/tools/testing/selftests/landlock/common.h
@@ -9,6 +9,7 @@
  
  #include <errno.h>
  #include <linux/landlock.h>
+#include <linux/securebits.h>
  #include <sys/capability.h>
  #include <sys/socket.h>
  #include <sys/syscall.h>
@@ -115,11 +116,16 @@ static void _init_caps(struct __test_metadata *const _metadata, bool drop_all)
                 /* clang-format off */
                 CAP_DAC_OVERRIDE,
                 CAP_MKNOD,
+               CAP_NET_ADMIN,
+               CAP_NET_BIND_SERVICE,
                 CAP_SYS_ADMIN,
                 CAP_SYS_CHROOT,
-               CAP_NET_BIND_SERVICE,
                 /* clang-format on */
         };
+       const unsigned int noroot = SECBIT_NOROOT | SECBIT_NOROOT_LOCKED;
+
+       if ((cap_get_secbits() & noroot) != noroot)
+               EXPECT_EQ(0, cap_set_secbits(noroot));
  
         cap_p = cap_get_proc();
         EXPECT_NE(NULL, cap_p)
@@ -137,6 +143,8 @@ static void _init_caps(struct __test_metadata *const _metadata, bool drop_all)
                         TH_LOG("Failed to cap_set_flag: %s", strerror(errno));
                 }
         }
+
+       /* Automatically resets ambient capabilities. */
         EXPECT_NE(-1, cap_set_proc(cap_p))
         {
                 TH_LOG("Failed to cap_set_proc: %s", strerror(errno));
@@ -145,6 +153,9 @@ static void _init_caps(struct __test_metadata *const _metadata, bool drop_all)
         {
                 TH_LOG("Failed to cap_free: %s", strerror(errno));
         }
+
+       /* Quickly checks that ambient capabilities are cleared. */
+       EXPECT_NE(-1, cap_get_ambient(caps[0]));
  }
  
  /* We cannot put such helpers in a library because of kselftest_harness.h . */
@@ -158,8 +169,9 @@ static void __maybe_unused drop_caps(struct __test_metadata *const _metadata)
         _init_caps(_metadata, true);
  }
  
-static void _effective_cap(struct __test_metadata *const _metadata,
-                          const cap_value_t caps, const cap_flag_value_t value)
+static void _change_cap(struct __test_metadata *const _metadata,
+                       const cap_flag_t flag, const cap_value_t cap,
+                       const cap_flag_value_t value)
  {
         cap_t cap_p;
  
@@ -168,7 +180,7 @@ static void _effective_cap(struct __test_metadata *const _metadata,
         {
                 TH_LOG("Failed to cap_get_proc: %s", strerror(errno));
         }
-       EXPECT_NE(-1, cap_set_flag(cap_p, CAP_EFFECTIVE, 1, &caps, value))
+       EXPECT_NE(-1, cap_set_flag(cap_p, flag, 1, &cap, value))
         {
                 TH_LOG("Failed to cap_set_flag: %s", strerror(errno));
         }
@@ -183,15 +195,35 @@ static void _effective_cap(struct __test_metadata *const _metadata,
  }
  
  static void __maybe_unused set_cap(struct __test_metadata *const _metadata,
-                                  const cap_value_t caps)
+                                  const cap_value_t cap)
  {
-       _effective_cap(_metadata, caps, CAP_SET);
+       _change_cap(_metadata, CAP_EFFECTIVE, cap, CAP_SET);
  }
  
  static void __maybe_unused clear_cap(struct __test_metadata *const _metadata,
-                                    const cap_value_t caps)
+                                    const cap_value_t cap)
+{
+       _change_cap(_metadata, CAP_EFFECTIVE, cap, CAP_CLEAR);
+}
+
+static void __maybe_unused
+set_ambient_cap(struct __test_metadata *const _metadata, const cap_value_t cap)
+{
+       _change_cap(_metadata, CAP_INHERITABLE, cap, CAP_SET);
+
+       EXPECT_NE(-1, cap_set_ambient(cap, CAP_SET))
+       {
+               TH_LOG("Failed to set ambient capability %d: %s", cap,
+                      strerror(errno));
+       }
+}
+
+static void __maybe_unused clear_ambient_cap(
+       struct __test_metadata *const _metadata, const cap_value_t cap)
  {
-       _effective_cap(_metadata, caps, CAP_CLEAR);
+       EXPECT_EQ(1, cap_get_ambient(cap));
+       _change_cap(_metadata, CAP_INHERITABLE, cap, CAP_CLEAR);
+       EXPECT_EQ(0, cap_get_ambient(cap));
  }
  
  /* Receives an FD from a UNIX socket. Returns the received FD, or -errno. */
diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c

index 50818904397c577e6953b7fd66bbfe894827b120..2d6d9b43d958cfb7c247e2cfa1fdbdf7a48c4c08 100644 (file)
--- a/tools/testing/selftests/landlock/fs_test.c
+++ b/tools/testing/selftests/landlock/fs_test.c
@@ -241,9 +241,11 @@ struct mnt_opt {
         const char *const data;
  };
  
-const struct mnt_opt mnt_tmp = {
+#define MNT_TMP_DATA "size=4m,mode=700"
+
+static const struct mnt_opt mnt_tmp = {
         .type = "tmpfs",
-       .data = "size=4m,mode=700",
+       .data = MNT_TMP_DATA,
  };
  
  static int mount_opt(const struct mnt_opt *const mnt, const char *const target)
@@ -4632,7 +4634,10 @@ FIXTURE_VARIANT(layout3_fs)
  /* clang-format off */
  FIXTURE_VARIANT_ADD(layout3_fs, tmpfs) {
         /* clang-format on */
-       .mnt = mnt_tmp,
+       .mnt = {
+               .type = "tmpfs",
+               .data = MNT_TMP_DATA,
+       },
         .file_path = file1_s1d1,
  };
  
diff --git a/tools/testing/selftests/landlock/net_test.c b/tools/testing/selftests/landlock/net_test.c

index ea5f727dd25778df7def21365eae073981ee08fe..936cfc879f1d2c419195338a8af04c095fe770f8 100644 (file)
--- a/tools/testing/selftests/landlock/net_test.c
+++ b/tools/testing/selftests/landlock/net_test.c
@@ -17,6 +17,7 @@
  #include <string.h>
  #include <sys/prctl.h>
  #include <sys/socket.h>
+#include <sys/syscall.h>
  #include <sys/un.h>
  
  #include "common.h"
@@ -54,6 +55,11 @@ struct service_fixture {
         };
  };
  
+static pid_t sys_gettid(void)
+{
+       return syscall(__NR_gettid);
+}
+
  static int set_service(struct service_fixture *const srv,
                        const struct protocol_variant prot,
                        const unsigned short index)
@@ -88,7 +94,7 @@ static int set_service(struct service_fixture *const srv,
         case AF_UNIX:
                 srv->unix_addr.sun_family = prot.domain;
                 sprintf(srv->unix_addr.sun_path,
-                       "_selftests-landlock-net-tid%d-index%d", gettid(),
+                       "_selftests-landlock-net-tid%d-index%d", sys_gettid(),
                         index);
                 srv->unix_addr_len = SUN_LEN(&srv->unix_addr);
                 srv->unix_addr.sun_path[0] = '\0';
@@ -101,8 +107,11 @@ static void setup_loopback(struct __test_metadata *const _metadata)
  {
         set_cap(_metadata, CAP_SYS_ADMIN);
         ASSERT_EQ(0, unshare(CLONE_NEWNET));
-       ASSERT_EQ(0, system("ip link set dev lo up"));
         clear_cap(_metadata, CAP_SYS_ADMIN);
+
+       set_ambient_cap(_metadata, CAP_NET_ADMIN);
+       ASSERT_EQ(0, system("ip link set dev lo up"));
+       clear_ambient_cap(_metadata, CAP_NET_ADMIN);
  }
  
  static bool is_restricted(const struct protocol_variant *const prot,
diff --git a/tools/testing/selftests/livepatch/functions.sh b/tools/testing/selftests/livepatch/functions.sh

index c8416c54b4637b1380810f0c9f71bc99e1710b8e..b1fd7362c2feec339228036dace5d28fe1b1719b 100644 (file)
--- a/tools/testing/selftests/livepatch/functions.sh
+++ b/tools/testing/selftests/livepatch/functions.sh
@@ -42,17 +42,6 @@ function die() {
         exit 1
  }
  
-# save existing dmesg so we can detect new content
-function save_dmesg() {
-       SAVED_DMESG=$(mktemp --tmpdir -t klp-dmesg-XXXXXX)
-       dmesg > "$SAVED_DMESG"
-}
-
-# cleanup temporary dmesg file from save_dmesg()
-function cleanup_dmesg_file() {
-       rm -f "$SAVED_DMESG"
-}
-
  function push_config() {
         DYNAMIC_DEBUG=$(grep '^kernel/livepatch' /sys/kernel/debug/dynamic_debug/control | \
                         awk -F'[: ]' '{print "file " $1 " line " $2 " " $4}')
@@ -99,7 +88,6 @@ function set_ftrace_enabled() {
  
  function cleanup() {
         pop_config
-       cleanup_dmesg_file
  }
  
  # setup_config - save the current config and set a script exit trap that
@@ -280,7 +268,15 @@ function set_pre_patch_ret {
  function start_test {
         local test="$1"
  
-       save_dmesg
+       # Dump something unique into the dmesg log, then stash the entry
+       # in LAST_DMESG.  The check_result() function will use it to
+       # find new kernel messages since the test started.
+       local last_dmesg_msg="livepatch kselftest timestamp: $(date --rfc-3339=ns)"
+       log "$last_dmesg_msg"
+       loop_until 'dmesg | grep -q "$last_dmesg_msg"' ||
+               die "buffer busy? can't find canary dmesg message: $last_dmesg_msg"
+       LAST_DMESG=$(dmesg | grep "$last_dmesg_msg")
+
         echo -n "TEST: $test ... "
         log "===== TEST: $test ====="
  }
@@ -291,23 +287,24 @@ function check_result {
         local expect="$*"
         local result
  
-       # Note: when comparing dmesg output, the kernel log timestamps
-       # help differentiate repeated testing runs.  Remove them with a
-       # post-comparison sed filter.
-
-       result=$(dmesg | comm --nocheck-order -13 "$SAVED_DMESG" - | \
+       # Test results include any new dmesg entry since LAST_DMESG, then:
+       # - include lines matching keywords
+       # - exclude lines matching keywords
+       # - filter out dmesg timestamp prefixes
+       result=$(dmesg | awk -v last_dmesg="$LAST_DMESG" 'p; $0 == last_dmesg { p=1 }' | \
                  grep -e 'livepatch:' -e 'test_klp' | \
                  grep -v '\(tainting\|taints\) kernel' | \
                  sed 's/^\[[ 0-9.]*\] //')
  
         if [[ "$expect" == "$result" ]] ; then
                 echo "ok"
+       elif [[ "$result" == "" ]] ; then
+               echo -e "not ok\n\nbuffer overrun? can't find canary dmesg entry: $LAST_DMESG\n"
+               die "livepatch kselftest(s) failed"
         else
                 echo -e "not ok\n\n$(diff -upr --label expected --label result <(echo "$expect") <(echo "$result"))\n"
                 die "livepatch kselftest(s) failed"
         fi
-
-       cleanup_dmesg_file
  }
  
  # check_sysfs_rights(modname, rel_path, expected_rights) - check sysfs
diff --git a/tools/testing/selftests/mm/charge_reserved_hugetlb.sh b/tools/testing/selftests/mm/charge_reserved_hugetlb.sh

index 0899019a7fcb4b04bcedca44227f2c2dd5a83597..e14bdd4455f2d2798077b8a701790bcee0732e90 100755 (executable)
--- a/tools/testing/selftests/mm/charge_reserved_hugetlb.sh
+++ b/tools/testing/selftests/mm/charge_reserved_hugetlb.sh
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
  # SPDX-License-Identifier: GPL-2.0
  
  # Kselftest framework requirement - SKIP code is 4.
diff --git a/tools/testing/selftests/mm/ksm_tests.c b/tools/testing/selftests/mm/ksm_tests.c

index 380b691d3eb9fbe9c1070937d9561b732343aec0..b748c48908d9d4af9ba31fe7d2443329c13c3dc2 100644 (file)
--- a/tools/testing/selftests/mm/ksm_tests.c
+++ b/tools/testing/selftests/mm/ksm_tests.c
@@ -566,7 +566,7 @@ static int ksm_merge_hugepages_time(int merge_type, int mapping, int prot,
         if (map_ptr_orig == MAP_FAILED)
                 err(2, "initial mmap");
  
-       if (madvise(map_ptr, len + HPAGE_SIZE, MADV_HUGEPAGE))
+       if (madvise(map_ptr, len, MADV_HUGEPAGE))
                 err(2, "MADV_HUGEPAGE");
  
         pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
diff --git a/tools/testing/selftests/mm/map_hugetlb.c b/tools/testing/selftests/mm/map_hugetlb.c

index 193281560b61be23d3b55857030ea07b4ad3f95d..86e8f2048a409028b28ece3f755d06f535726c47 100644 (file)
--- a/tools/testing/selftests/mm/map_hugetlb.c
+++ b/tools/testing/selftests/mm/map_hugetlb.c
@@ -15,6 +15,7 @@
  #include <unistd.h>
  #include <sys/mman.h>
  #include <fcntl.h>
+#include "vm_util.h"
  
  #define LENGTH (256UL*1024*1024)
  #define PROTECTION (PROT_READ | PROT_WRITE)
@@ -58,10 +59,16 @@ int main(int argc, char **argv)
  {
         void *addr;
         int ret;
+       size_t hugepage_size;
         size_t length = LENGTH;
         int flags = FLAGS;
         int shift = 0;
  
+       hugepage_size = default_huge_page_size();
+       /* munmap with fail if the length is not page aligned */
+       if (hugepage_size > length)
+               length = hugepage_size;
+
         if (argc > 1)
                 length = atol(argv[1]) << 20;
         if (argc > 2) {
diff --git a/tools/testing/selftests/mm/mremap_test.c b/tools/testing/selftests/mm/mremap_test.c

index 1d4c1589c3055d3bb22eebe2c02fa7b015e4a665..2f8b991f78cb4cade90dc05f502a647a955fb582 100644 (file)
--- a/tools/testing/selftests/mm/mremap_test.c
+++ b/tools/testing/selftests/mm/mremap_test.c
@@ -360,7 +360,8 @@ static long long remap_region(struct config c, unsigned int threshold_mb,
                               char pattern_seed)
  {
         void *addr, *src_addr, *dest_addr, *dest_preamble_addr;
-       unsigned long long i;
+       int d;
+       unsigned long long t;
         struct timespec t_start = {0, 0}, t_end = {0, 0};
         long long  start_ns, end_ns, align_mask, ret, offset;
         unsigned long long threshold;
@@ -378,8 +379,8 @@ static long long remap_region(struct config c, unsigned int threshold_mb,
  
         /* Set byte pattern for source block. */
         srand(pattern_seed);
-       for (i = 0; i < threshold; i++)
-               memset((char *) src_addr + i, (char) rand(), 1);
+       for (t = 0; t < threshold; t++)
+               memset((char *) src_addr + t, (char) rand(), 1);
  
         /* Mask to zero out lower bits of address for alignment */
         align_mask = ~(c.dest_alignment - 1);
@@ -420,8 +421,8 @@ static long long remap_region(struct config c, unsigned int threshold_mb,
  
                 /* Set byte pattern for the dest preamble block. */
                 srand(pattern_seed);
-               for (i = 0; i < c.dest_preamble_size; i++)
-                       memset((char *) dest_preamble_addr + i, (char) rand(), 1);
+               for (d = 0; d < c.dest_preamble_size; d++)
+                       memset((char *) dest_preamble_addr + d, (char) rand(), 1);
         }
  
         clock_gettime(CLOCK_MONOTONIC, &t_start);
@@ -437,14 +438,14 @@ static long long remap_region(struct config c, unsigned int threshold_mb,
  
         /* Verify byte pattern after remapping */
         srand(pattern_seed);
-       for (i = 0; i < threshold; i++) {
+       for (t = 0; t < threshold; t++) {
                 char c = (char) rand();
  
-               if (((char *) dest_addr)[i] != c) {
+               if (((char *) dest_addr)[t] != c) {
                         ksft_print_msg("Data after remap doesn't match at offset %llu\n",
-                                      i);
+                                      t);
                         ksft_print_msg("Expected: %#x\t Got: %#x\n", c & 0xff,
-                                       ((char *) dest_addr)[i] & 0xff);
+                                       ((char *) dest_addr)[t] & 0xff);
                         ret = -1;
                         goto clean_up_dest;
                 }
@@ -453,14 +454,14 @@ static long long remap_region(struct config c, unsigned int threshold_mb,
         /* Verify the dest preamble byte pattern after remapping */
         if (c.dest_preamble_size) {
                 srand(pattern_seed);
-               for (i = 0; i < c.dest_preamble_size; i++) {
+               for (d = 0; d < c.dest_preamble_size; d++) {
                         char c = (char) rand();
  
-                       if (((char *) dest_preamble_addr)[i] != c) {
+                       if (((char *) dest_preamble_addr)[d] != c) {
                                 ksft_print_msg("Preamble data after remap doesn't match at offset %d\n",
-                                              i);
+                                              d);
                                 ksft_print_msg("Expected: %#x\t Got: %#x\n", c & 0xff,
-                                              ((char *) dest_preamble_addr)[i] & 0xff);
+                                              ((char *) dest_preamble_addr)[d] & 0xff);
                                 ret = -1;
                                 goto clean_up_dest;
                         }
diff --git a/tools/testing/selftests/mm/uffd-unit-tests.c b/tools/testing/selftests/mm/uffd-unit-tests.c

index cce90a10515ad2fe78fe68147d26732d717c3bb6..2b9f8cc52639d1942238b41a1ad55edc6bd406ed 100644 (file)
--- a/tools/testing/selftests/mm/uffd-unit-tests.c
+++ b/tools/testing/selftests/mm/uffd-unit-tests.c
@@ -1517,6 +1517,12 @@ int main(int argc, char *argv[])
                                 continue;
  
                         uffd_test_start("%s on %s", test->name, mem_type->name);
+                       if ((mem_type->mem_flag == MEM_HUGETLB ||
+                           mem_type->mem_flag == MEM_HUGETLB_PRIVATE) &&
+                           (default_huge_page_size() == 0)) {
+                               uffd_test_skip("huge page size is 0, feature missing?");
+                               continue;
+                       }
                         if (!uffd_feature_supported(test)) {
                                 uffd_test_skip("feature missing");
                                 continue;
diff --git a/tools/testing/selftests/mm/va_high_addr_switch.sh b/tools/testing/selftests/mm/va_high_addr_switch.sh

index 45cae7cab27e12705c59cc56f6fdf5e675805f92..a0a75f3029043727b96bdb59728ed80d4d5cd9c0 100755 (executable)
--- a/tools/testing/selftests/mm/va_high_addr_switch.sh
+++ b/tools/testing/selftests/mm/va_high_addr_switch.sh
@@ -29,9 +29,15 @@ check_supported_x86_64()
         # See man 1 gzip under '-f'.
         local pg_table_levels=$(gzip -dcfq "${config}" | grep PGTABLE_LEVELS | cut -d'=' -f 2)
  
+       local cpu_supports_pl5=$(awk '/^flags/ {if (/la57/) {print 0;}
+               else {print 1}; exit}' /proc/cpuinfo 2>/dev/null)
+
         if [[ "${pg_table_levels}" -lt 5 ]]; then
                 echo "$0: PGTABLE_LEVELS=${pg_table_levels}, must be >= 5 to run this test"
                 exit $ksft_skip
+       elif [[ "${cpu_supports_pl5}" -ne 0 ]]; then
+               echo "$0: CPU does not have the necessary la57 flag to support page table level 5"
+               exit $ksft_skip
         fi
  }
  
diff --git a/tools/testing/selftests/mm/write_hugetlb_memory.sh b/tools/testing/selftests/mm/write_hugetlb_memory.sh

index 70a02301f4c276ba6313c3baa1ab3b5058a68b0c..3d2d2eb9d6fff077cca24fd82a2a4990c34706d1 100755 (executable)
--- a/tools/testing/selftests/mm/write_hugetlb_memory.sh
+++ b/tools/testing/selftests/mm/write_hugetlb_memory.sh
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
  # SPDX-License-Identifier: GPL-2.0
  
  set -e
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile

index 50818075e566e1abf1f2f9e587951e5abed238fc..211753756bdee87daf7ebb1af06dbb4c1f6ee383 100644 (file)
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -53,8 +53,7 @@ TEST_PROGS += bind_bhash.sh
  TEST_PROGS += ip_local_port_range.sh
  TEST_PROGS += rps_default_mask.sh
  TEST_PROGS += big_tcp.sh
-TEST_PROGS_EXTENDED := in_netns.sh setup_loopback.sh setup_veth.sh
-TEST_PROGS_EXTENDED += toeplitz_client.sh toeplitz.sh lib.sh
+TEST_PROGS_EXTENDED := toeplitz_client.sh toeplitz.sh
  TEST_GEN_FILES =  socket nettest
  TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
  TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd txring_overwrite
@@ -84,6 +83,7 @@ TEST_PROGS += sctp_vrf.sh
  TEST_GEN_FILES += sctp_hello
  TEST_GEN_FILES += csum
  TEST_GEN_FILES += nat6to4.o
+TEST_GEN_FILES += xdp_dummy.o
  TEST_GEN_FILES += ip_local_port_range
  TEST_GEN_FILES += bind_wildcard
  TEST_PROGS += test_vxlan_mdb.sh
@@ -95,6 +95,7 @@ TEST_PROGS += fq_band_pktlimit.sh
  TEST_PROGS += vlan_hw_filter.sh
  
  TEST_FILES := settings
+TEST_FILES += in_netns.sh lib.sh net_helper.sh setup_loopback.sh setup_veth.sh
  
  include ../lib.mk
  
@@ -104,7 +105,7 @@ $(OUTPUT)/tcp_inq: LDLIBS += -lpthread
  $(OUTPUT)/bind_bhash: LDLIBS += -lpthread
  $(OUTPUT)/io_uring_zerocopy_tx: CFLAGS += -I../../../include/
  
-# Rules to generate bpf obj nat6to4.o
+# Rules to generate bpf objs
  CLANG ?= clang
  SCRATCH_DIR := $(OUTPUT)/tools
  BUILD_DIR := $(SCRATCH_DIR)/build
@@ -139,7 +140,7 @@ endif
  
  CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG),$(CLANG_TARGET_ARCH))
  
-$(OUTPUT)/nat6to4.o: nat6to4.c $(BPFOBJ) | $(MAKE_DIRS)
+$(OUTPUT)/nat6to4.o $(OUTPUT)/xdp_dummy.o: $(OUTPUT)/%.o : %.c $(BPFOBJ) | $(MAKE_DIRS)
         $(CLANG) -O2 --target=bpf -c $< $(CCINCLUDE) $(CLANG_SYS_INCLUDES) -o $@
  
  $(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile)                    \
diff --git a/tools/testing/selftests/net/big_tcp.sh b/tools/testing/selftests/net/big_tcp.sh

index cde9a91c479716e178c569cac18bedc9698357b6..2db9d15cd45feafc007669ae358372249c74151a 100755 (executable)
--- a/tools/testing/selftests/net/big_tcp.sh
+++ b/tools/testing/selftests/net/big_tcp.sh
@@ -122,7 +122,9 @@ do_netperf() {
         local netns=$1
  
         [ "$NF" = "6" ] && serip=$SERVER_IP6
-       ip net exec $netns netperf -$NF -t TCP_STREAM -H $serip 2>&1 >/dev/null
+
+       # use large write to be sure to generate big tcp packets
+       ip net exec $netns netperf -$NF -t TCP_STREAM -l 1 -H $serip -- -m 262144 2>&1 >/dev/null
  }
  
  do_test() {
diff --git a/tools/testing/selftests/net/cmsg_ipv6.sh b/tools/testing/selftests/net/cmsg_ipv6.sh

index f30bd57d5e38744de09a86cab47ed4999ba46e25..8bc23fb4c82b71c88c6a0f292e8735b831a5d03f 100755 (executable)
--- a/tools/testing/selftests/net/cmsg_ipv6.sh
+++ b/tools/testing/selftests/net/cmsg_ipv6.sh
@@ -89,7 +89,7 @@ for ovr in setsock cmsg both diff; do
         check_result $? 0 "TCLASS $prot $ovr - pass"
  
         while [ -d /proc/$BG ]; do
-           $NSEXE ./cmsg_sender -6 -p u $TGT6 1234
+           $NSEXE ./cmsg_sender -6 -p $p $m $((TOS2)) $TGT6 1234
         done
  
         tcpdump -r $TMPF -v 2>&1 | grep "class $TOS2" >> /dev/null
@@ -126,7 +126,7 @@ for ovr in setsock cmsg both diff; do
         check_result $? 0 "HOPLIMIT $prot $ovr - pass"
  
         while [ -d /proc/$BG ]; do
-           $NSEXE ./cmsg_sender -6 -p u $TGT6 1234
+           $NSEXE ./cmsg_sender -6 -p $p $m $LIM $TGT6 1234
         done
  
         tcpdump -r $TMPF -v 2>&1 | grep "hlim $LIM[^0-9]" >> /dev/null
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config

index 19ff7505166096483d79709676c03eb8a9135fc5..5e4390cac17eda2c96d35c5cdcdf6899610e4367 100644 (file)
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -19,17 +19,27 @@ CONFIG_BRIDGE_VLAN_FILTERING=y
  CONFIG_BRIDGE=y
  CONFIG_CRYPTO_CHACHA20POLY1305=m
  CONFIG_VLAN_8021Q=y
+CONFIG_GENEVE=m
  CONFIG_IFB=y
  CONFIG_INET_DIAG=y
+CONFIG_INET_ESP=y
+CONFIG_INET_ESP_OFFLOAD=y
+CONFIG_NET_FOU=y
+CONFIG_NET_FOU_IP_TUNNELS=y
  CONFIG_IP_GRE=m
  CONFIG_NETFILTER=y
  CONFIG_NETFILTER_ADVANCED=y
  CONFIG_NF_CONNTRACK=m
+CONFIG_IPV6_SIT=y
+CONFIG_IP_DCCP=m
  CONFIG_NF_NAT=m
  CONFIG_IP6_NF_IPTABLES=m
  CONFIG_IP_NF_IPTABLES=m
  CONFIG_IP6_NF_NAT=m
+CONFIG_IP6_NF_RAW=m
  CONFIG_IP_NF_NAT=m
+CONFIG_IP_NF_RAW=m
+CONFIG_IP_NF_TARGET_TTL=m
  CONFIG_IPV6_GRE=m
  CONFIG_IPV6_SEG6_LWTUNNEL=y
  CONFIG_L2TP_ETH=m
@@ -45,16 +55,26 @@ CONFIG_NF_TABLES=m
  CONFIG_NF_TABLES_IPV6=y
  CONFIG_NF_TABLES_IPV4=y
  CONFIG_NFT_NAT=m
+CONFIG_NETFILTER_XT_MATCH_LENGTH=m
+CONFIG_NET_ACT_CSUM=m
+CONFIG_NET_ACT_CT=m
  CONFIG_NET_ACT_GACT=m
+CONFIG_NET_ACT_PEDIT=m
  CONFIG_NET_CLS_BASIC=m
+CONFIG_NET_CLS_BPF=m
+CONFIG_NET_CLS_MATCHALL=m
  CONFIG_NET_CLS_U32=m
  CONFIG_NET_IPGRE_DEMUX=m
  CONFIG_NET_IPGRE=m
+CONFIG_NET_IPIP=y
  CONFIG_NET_SCH_FQ_CODEL=m
  CONFIG_NET_SCH_HTB=m
  CONFIG_NET_SCH_FQ=m
  CONFIG_NET_SCH_ETF=m
  CONFIG_NET_SCH_NETEM=y
+CONFIG_NET_SCH_PRIO=m
+CONFIG_NFT_COMPAT=m
+CONFIG_NF_FLOW_TABLE=m
  CONFIG_PSAMPLE=m
  CONFIG_TCP_MD5SIG=y
  CONFIG_TEST_BLACKHOLE_DEV=m
@@ -63,7 +83,6 @@ CONFIG_TLS=m
  CONFIG_TRACEPOINTS=y
  CONFIG_NET_DROP_MONITOR=m
  CONFIG_NETDEVSIM=m
-CONFIG_NET_FOU=m
  CONFIG_MPLS_ROUTING=m
  CONFIG_MPLS_IPTUNNEL=m
  CONFIG_NET_SCH_INGRESS=m
@@ -80,3 +99,4 @@ CONFIG_IP_SCTP=m
  CONFIG_NETFILTER_XT_MATCH_POLICY=m
  CONFIG_CRYPTO_ARIA=y
  CONFIG_XFRM_INTERFACE=m
+CONFIG_XFRM_USER=m
diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile

index 452693514be4b06842dbe32088c5495c2c933f0b..4de92632f48360c0002900260af9b35d34186f4b 100644 (file)
--- a/tools/testing/selftests/net/forwarding/Makefile
+++ b/tools/testing/selftests/net/forwarding/Makefile
@@ -112,7 +112,7 @@ TEST_PROGS = bridge_fdb_learning_limit.sh \
         vxlan_symmetric_ipv6.sh \
         vxlan_symmetric.sh
  
-TEST_PROGS_EXTENDED := devlink_lib.sh \
+TEST_FILES := devlink_lib.sh \
         ethtool_lib.sh \
         fib_offload_lib.sh \
         forwarding.config.sample \
diff --git a/tools/testing/selftests/net/forwarding/bridge_locked_port.sh b/tools/testing/selftests/net/forwarding/bridge_locked_port.sh

index 9af9f6964808baee53e69c51ad1adb4128fc700a..c62331b2e006069e8812dedf797968c24726493d 100755 (executable)
--- a/tools/testing/selftests/net/forwarding/bridge_locked_port.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_locked_port.sh
@@ -327,10 +327,10 @@ locked_port_mab_redirect()
         RET=0
         check_port_mab_support || return 0
  
-       bridge link set dev $swp1 learning on locked on mab on
         tc qdisc add dev $swp1 clsact
         tc filter add dev $swp1 ingress protocol all pref 1 handle 101 flower \
                 action mirred egress redirect dev $swp2
+       bridge link set dev $swp1 learning on locked on mab on
  
         ping_do $h1 192.0.2.2
         check_err $? "Ping did not work with redirection"
@@ -349,8 +349,8 @@ locked_port_mab_redirect()
         check_err $? "Locked entry not created after deleting filter"
  
         bridge fdb del `mac_get $h1` vlan 1 dev $swp1 master
-       tc qdisc del dev $swp1 clsact
         bridge link set dev $swp1 learning off locked off mab off
+       tc qdisc del dev $swp1 clsact
  
         log_test "Locked port MAB redirect"
  }
diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb.sh b/tools/testing/selftests/net/forwarding/bridge_mdb.sh

index 61348f71728cd54537f49e17b192e08514323b4c..d9d587454d207931a539f59be15cbc63d471888f 100755 (executable)
--- a/tools/testing/selftests/net/forwarding/bridge_mdb.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_mdb.sh
@@ -329,7 +329,7 @@ __cfg_test_port_ip_star_g()
  
         bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q " 0.00"
         check_err $? "(*, G) \"permanent\" entry has a pending group timer"
-       bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q "\/0.00"
+       bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q "/0.00"
         check_err $? "\"permanent\" source entry has a pending source timer"
  
         bridge mdb del dev br0 port $swp1 grp $grp vid 10
@@ -346,7 +346,7 @@ __cfg_test_port_ip_star_g()
  
         bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q " 0.00"
         check_fail $? "(*, G) EXCLUDE entry does not have a pending group timer"
-       bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q "\/0.00"
+       bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q "/0.00"
         check_err $? "\"blocked\" source entry has a pending source timer"
  
         bridge mdb del dev br0 port $swp1 grp $grp vid 10
@@ -363,7 +363,7 @@ __cfg_test_port_ip_star_g()
  
         bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q " 0.00"
         check_err $? "(*, G) INCLUDE entry has a pending group timer"
-       bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q "\/0.00"
+       bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q "/0.00"
         check_fail $? "Source entry does not have a pending source timer"
  
         bridge mdb del dev br0 port $swp1 grp $grp vid 10
@@ -1252,14 +1252,17 @@ fwd_test()
         echo
         log_info "# Forwarding tests"
  
+       # Set the Max Response Delay to 100 centiseconds (1 second) so that the
+       # bridge will start forwarding according to its MDB soon after a
+       # multicast querier is enabled.
+       ip link set dev br0 type bridge mcast_query_response_interval 100
+
         # Forwarding according to MDB entries only takes place when the bridge
         # detects that there is a valid querier in the network. Set the bridge
         # as the querier and assign it a valid IPv6 link-local address to be
         # used as the source address for MLD queries.
         ip -6 address add fe80::1/64 nodad dev br0
         ip link set dev br0 type bridge mcast_querier 1
-       # Wait the default Query Response Interval (10 seconds) for the bridge
-       # to determine that there are no other queriers in the network.
         sleep 10
  
         fwd_test_host
@@ -1267,6 +1270,7 @@ fwd_test()
  
         ip link set dev br0 type bridge mcast_querier 0
         ip -6 address del fe80::1/64 dev br0
+       ip link set dev br0 type bridge mcast_query_response_interval 1000
  }
  
  ctrl_igmpv3_is_in_test()
diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh

index b0f5e55d2d0b2584aefacc135ffe6b2d2cab34fc..58962963650227bcc942354a052d8bf2bd95aa13 100755 (executable)
--- a/tools/testing/selftests/net/forwarding/tc_actions.sh
+++ b/tools/testing/selftests/net/forwarding/tc_actions.sh
@@ -235,9 +235,6 @@ mirred_egress_to_ingress_tcp_test()
         check_err $? "didn't mirred redirect ICMP"
         tc_check_packets "dev $h1 ingress" 102 10
         check_err $? "didn't drop mirred ICMP"
-       local overlimits=$(tc_rule_stats_get ${h1} 101 egress .overlimits)
-       test ${overlimits} = 10
-       check_err $? "wrong overlimits, expected 10 got ${overlimits}"
  
         tc filter del dev $h1 egress protocol ip pref 100 handle 100 flower
         tc filter del dev $h1 egress protocol ip pref 101 handle 101 flower
diff --git a/tools/testing/selftests/net/forwarding/tc_flower_l2_miss.sh b/tools/testing/selftests/net/forwarding/tc_flower_l2_miss.sh

index 20a7cb7222b8baa4062a769f2f0d27daf2b00cfd..c2420bb72c128119f005de590e36952dc5960f36 100755 (executable)
--- a/tools/testing/selftests/net/forwarding/tc_flower_l2_miss.sh
+++ b/tools/testing/selftests/net/forwarding/tc_flower_l2_miss.sh
@@ -209,14 +209,17 @@ test_l2_miss_multicast()
         # both registered and unregistered multicast traffic.
         bridge link set dev $swp2 mcast_router 2
  
+       # Set the Max Response Delay to 100 centiseconds (1 second) so that the
+       # bridge will start forwarding according to its MDB soon after a
+       # multicast querier is enabled.
+       ip link set dev br1 type bridge mcast_query_response_interval 100
+
         # Forwarding according to MDB entries only takes place when the bridge
         # detects that there is a valid querier in the network. Set the bridge
         # as the querier and assign it a valid IPv6 link-local address to be
         # used as the source address for MLD queries.
         ip link set dev br1 type bridge mcast_querier 1
         ip -6 address add fe80::1/64 nodad dev br1
-       # Wait the default Query Response Interval (10 seconds) for the bridge
-       # to determine that there are no other queriers in the network.
         sleep 10
  
         test_l2_miss_multicast_ipv4
@@ -224,6 +227,7 @@ test_l2_miss_multicast()
  
         ip -6 address del fe80::1/64 dev br1
         ip link set dev br1 type bridge mcast_querier 0
+       ip link set dev br1 type bridge mcast_query_response_interval 1000
         bridge link set dev $swp2 mcast_router 1
  }
  
diff --git a/tools/testing/selftests/net/gro.sh b/tools/testing/selftests/net/gro.sh

index 19352f106c1dff1b316ef9991e32c9d78142ac45..02c21ff4ca81fddc89ca697fe3d3f04a5dc792c8 100755 (executable)
--- a/tools/testing/selftests/net/gro.sh
+++ b/tools/testing/selftests/net/gro.sh
@@ -31,6 +31,11 @@ run_test() {
        1>>log.txt
      wait "${server_pid}"
      exit_code=$?
+    if [[ ${test} == "large" && -n "${KSFT_MACHINE_SLOW}" && \
+          ${exit_code} -ne 0 ]]; then
+        echo "Ignoring errors due to slow environment" 1>&2
+        exit_code=0
+    fi
      if [[ "${exit_code}" -eq 0 ]]; then
          break;
      fi
diff --git a/tools/testing/selftests/net/ioam6.sh b/tools/testing/selftests/net/ioam6.sh

index fe59ca3e5596bfe3abfbb477dad2d8bcbb608a56..12491850ae985a779b069662ccba312a3dc1964e 100755 (executable)
--- a/tools/testing/selftests/net/ioam6.sh
+++ b/tools/testing/selftests/net/ioam6.sh
@@ -367,14 +367,12 @@ run_test()
    local desc=$2
    local node_src=$3
    local node_dst=$4
-  local ip6_src=$5
-  local ip6_dst=$6
-  local if_dst=$7
-  local trace_type=$8
-  local ioam_ns=$9
-
-  ip netns exec $node_dst ./ioam6_parser $if_dst $name $ip6_src $ip6_dst \
-         $trace_type $ioam_ns &
+  local ip6_dst=$5
+  local trace_type=$6
+  local ioam_ns=$7
+  local type=$8
+
+  ip netns exec $node_dst ./ioam6_parser $name $trace_type $ioam_ns $type &
    local spid=$!
    sleep 0.1
  
@@ -489,7 +487,7 @@ out_undef_ns()
           trace prealloc type 0x800000 ns 0 size 4 dev veth0
  
    run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \
-         db01::2 db01::1 veth0 0x800000 0
+         db01::1 0x800000 0 $1
  
    [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down
  }
@@ -509,7 +507,7 @@ out_no_room()
           trace prealloc type 0xc00000 ns 123 size 4 dev veth0
  
    run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \
-         db01::2 db01::1 veth0 0xc00000 123
+         db01::1 0xc00000 123 $1
  
    [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down
  }
@@ -543,14 +541,14 @@ out_bits()
        if [ $cmd_res != 0 ]
        then
          npassed=$((npassed+1))
-        log_test_passed "$descr"
+        log_test_passed "$descr ($1 mode)"
        else
          nfailed=$((nfailed+1))
-        log_test_failed "$descr"
+        log_test_failed "$descr ($1 mode)"
        fi
      else
         run_test "out_bit$i" "$descr ($1 mode)" $ioam_node_alpha \
-           $ioam_node_beta db01::2 db01::1 veth0 ${bit2type[$i]} 123
+           $ioam_node_beta db01::1 ${bit2type[$i]} 123 $1
      fi
    done
  
@@ -574,7 +572,7 @@ out_full_supp_trace()
           trace prealloc type 0xfff002 ns 123 size 100 dev veth0
  
    run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \
-         db01::2 db01::1 veth0 0xfff002 123
+         db01::1 0xfff002 123 $1
  
    [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down
  }
@@ -604,7 +602,7 @@ in_undef_ns()
           trace prealloc type 0x800000 ns 0 size 4 dev veth0
  
    run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \
-         db01::2 db01::1 veth0 0x800000 0
+         db01::1 0x800000 0 $1
  
    [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down
  }
@@ -624,7 +622,7 @@ in_no_room()
           trace prealloc type 0xc00000 ns 123 size 4 dev veth0
  
    run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \
-         db01::2 db01::1 veth0 0xc00000 123
+         db01::1 0xc00000 123 $1
  
    [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down
  }
@@ -651,7 +649,7 @@ in_bits()
             dev veth0
  
      run_test "in_bit$i" "${desc/<n>/$i} ($1 mode)" $ioam_node_alpha \
-           $ioam_node_beta db01::2 db01::1 veth0 ${bit2type[$i]} 123
+           $ioam_node_beta db01::1 ${bit2type[$i]} 123 $1
    done
  
    [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down
@@ -679,7 +677,7 @@ in_oflag()
           trace prealloc type 0xc00000 ns 123 size 4 dev veth0
  
    run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \
-         db01::2 db01::1 veth0 0xc00000 123
+         db01::1 0xc00000 123 $1
  
    [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down
  
@@ -703,7 +701,7 @@ in_full_supp_trace()
           trace prealloc type 0xfff002 ns 123 size 80 dev veth0
  
    run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \
-         db01::2 db01::1 veth0 0xfff002 123
+         db01::1 0xfff002 123 $1
  
    [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down
  }
@@ -731,7 +729,7 @@ fwd_full_supp_trace()
           trace prealloc type 0xfff002 ns 123 size 244 via db01::1 dev veth0
  
    run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_gamma \
-         db01::2 db02::2 veth0 0xfff002 123
+         db02::2 0xfff002 123 $1
  
    [ "$1" = "encap" ] && ip -netns $ioam_node_gamma link set ip6tnl0 down
  }
diff --git a/tools/testing/selftests/net/ioam6_parser.c b/tools/testing/selftests/net/ioam6_parser.c

index d9d1d41901267439aac832166e46410c85f44111..895e5bb5044bb126dc9894cf35a68d7cf1c79ec7 100644 (file)
--- a/tools/testing/selftests/net/ioam6_parser.c
+++ b/tools/testing/selftests/net/ioam6_parser.c
@@ -8,7 +8,6 @@
  #include <errno.h>
  #include <limits.h>
  #include <linux/const.h>
-#include <linux/if_ether.h>
  #include <linux/ioam6.h>
  #include <linux/ipv6.h>
  #include <stdlib.h>
@@ -512,14 +511,6 @@ static int str2id(const char *tname)
         return -1;
  }
  
-static int ipv6_addr_equal(const struct in6_addr *a1, const struct in6_addr *a2)
-{
-       return ((a1->s6_addr32[0] ^ a2->s6_addr32[0]) |
-               (a1->s6_addr32[1] ^ a2->s6_addr32[1]) |
-               (a1->s6_addr32[2] ^ a2->s6_addr32[2]) |
-               (a1->s6_addr32[3] ^ a2->s6_addr32[3])) == 0;
-}
-
  static int get_u32(__u32 *val, const char *arg, int base)
  {
         unsigned long res;
@@ -603,70 +594,80 @@ static int (*func[__TEST_MAX])(int, struct ioam6_trace_hdr *, __u32, __u16) = {
  
  int main(int argc, char **argv)
  {
-       int fd, size, hoplen, tid, ret = 1;
-       struct in6_addr src, dst;
+       int fd, size, hoplen, tid, ret = 1, on = 1;
         struct ioam6_hdr *opt;
-       struct ipv6hdr *ip6h;
-       __u8 buffer[400], *p;
-       __u16 ioam_ns;
+       struct cmsghdr *cmsg;
+       struct msghdr msg;
+       struct iovec iov;
+       __u8 buffer[512];
         __u32 tr_type;
+       __u16 ioam_ns;
+       __u8 *ptr;
  
-       if (argc != 7)
+       if (argc != 5)
                 goto out;
  
-       tid = str2id(argv[2]);
+       tid = str2id(argv[1]);
         if (tid < 0 || !func[tid])
                 goto out;
  
-       if (inet_pton(AF_INET6, argv[3], &src) != 1 ||
-           inet_pton(AF_INET6, argv[4], &dst) != 1)
+       if (get_u32(&tr_type, argv[2], 16) ||
+           get_u16(&ioam_ns, argv[3], 0))
                 goto out;
  
-       if (get_u32(&tr_type, argv[5], 16) ||
-           get_u16(&ioam_ns, argv[6], 0))
+       fd = socket(PF_INET6, SOCK_RAW,
+                   !strcmp(argv[4], "encap") ? IPPROTO_IPV6 : IPPROTO_ICMPV6);
+       if (fd < 0)
                 goto out;
  
-       fd = socket(AF_PACKET, SOCK_DGRAM, __cpu_to_be16(ETH_P_IPV6));
-       if (!fd)
-               goto out;
+       setsockopt(fd, IPPROTO_IPV6, IPV6_RECVHOPOPTS,  &on, sizeof(on));
  
-       if (setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE,
-                      argv[1], strlen(argv[1])))
+       iov.iov_len = 1;
+       iov.iov_base = malloc(CMSG_SPACE(sizeof(buffer)));
+       if (!iov.iov_base)
                 goto close;
-
  recv:
-       size = recv(fd, buffer, sizeof(buffer), 0);
+       memset(&msg, 0, sizeof(msg));
+       msg.msg_iov = &iov;
+       msg.msg_iovlen = 1;
+       msg.msg_control = buffer;
+       msg.msg_controllen = CMSG_SPACE(sizeof(buffer));
+
+       size = recvmsg(fd, &msg, 0);
         if (size <= 0)
                 goto close;
  
-       ip6h = (struct ipv6hdr *)buffer;
+       for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
+               if (cmsg->cmsg_level != IPPROTO_IPV6 ||
+                   cmsg->cmsg_type != IPV6_HOPOPTS ||
+                   cmsg->cmsg_len < sizeof(struct ipv6_hopopt_hdr))
+                       continue;
  
-       if (!ipv6_addr_equal(&ip6h->saddr, &src) ||
-           !ipv6_addr_equal(&ip6h->daddr, &dst))
-               goto recv;
+               ptr = (__u8 *)CMSG_DATA(cmsg);
  
-       if (ip6h->nexthdr != IPPROTO_HOPOPTS)
-               goto close;
+               hoplen = (ptr[1] + 1) << 3;
+               ptr += sizeof(struct ipv6_hopopt_hdr);
  
-       p = buffer + sizeof(*ip6h);
-       hoplen = (p[1] + 1) << 3;
-       p += sizeof(struct ipv6_hopopt_hdr);
+               while (hoplen > 0) {
+                       opt = (struct ioam6_hdr *)ptr;
  
-       while (hoplen > 0) {
-               opt = (struct ioam6_hdr *)p;
+                       if (opt->opt_type == IPV6_TLV_IOAM &&
+                           opt->type == IOAM6_TYPE_PREALLOC) {
+                               ptr += sizeof(*opt);
+                               ret = func[tid](tid,
+                                               (struct ioam6_trace_hdr *)ptr,
+                                               tr_type, ioam_ns);
+                               goto close;
+                       }
  
-               if (opt->opt_type == IPV6_TLV_IOAM &&
-                   opt->type == IOAM6_TYPE_PREALLOC) {
-                       p += sizeof(*opt);
-                       ret = func[tid](tid, (struct ioam6_trace_hdr *)p,
-                                          tr_type, ioam_ns);
-                       break;
+                       ptr += opt->opt_len + 2;
+                       hoplen -= opt->opt_len + 2;
                 }
-
-               p += opt->opt_len + 2;
-               hoplen -= opt->opt_len + 2;
         }
+
+       goto recv;
  close:
+       free(iov.iov_base);
         close(fd);
  out:
         return ret;
diff --git a/tools/testing/selftests/net/ip_local_port_range.c b/tools/testing/selftests/net/ip_local_port_range.c

index 0f217a1cc837de22de278d15f7cfb6bbdb372c50..6ebd58869a637227319524c0bdc69fe2495a37c9 100644 (file)
--- a/tools/testing/selftests/net/ip_local_port_range.c
+++ b/tools/testing/selftests/net/ip_local_port_range.c
@@ -16,6 +16,10 @@
  #define IP_LOCAL_PORT_RANGE 51
  #endif
  
+#ifndef IPPROTO_MPTCP
+#define IPPROTO_MPTCP 262
+#endif
+
  static __u32 pack_port_range(__u16 lo, __u16 hi)
  {
         return (hi << 16) | (lo << 0);
diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh

index dca549443801135cf8db35f9545885a6ab772504..f9fe182dfbd44e9de0f0caa27b409281c0584081 100644 (file)
--- a/tools/testing/selftests/net/lib.sh
+++ b/tools/testing/selftests/net/lib.sh
@@ -4,6 +4,9 @@
  ##############################################################################
  # Defines
  
+WAIT_TIMEOUT=${WAIT_TIMEOUT:=20}
+BUSYWAIT_TIMEOUT=$((WAIT_TIMEOUT * 1000)) # ms
+
  # Kselftest framework requirement - SKIP code is 4.
  ksft_skip=4
  # namespace list created by setup_ns
@@ -48,7 +51,7 @@ cleanup_ns()
  
         for ns in "$@"; do
                 ip netns delete "${ns}" &> /dev/null
-               if ! busywait 2 ip netns list \| grep -vq "^$ns$" &> /dev/null; then
+               if ! busywait $BUSYWAIT_TIMEOUT ip netns list \| grep -vq "^$ns$" &> /dev/null; then
                         echo "Warn: Failed to remove namespace $ns"
                         ret=1
                 fi
diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config

index e317c2e44dae840149fad7fe14a3a41d699b063e..4f80014cae4940a3f56ebb313349baa8540c0a0a 100644 (file)
--- a/tools/testing/selftests/net/mptcp/config
+++ b/tools/testing/selftests/net/mptcp/config
@@ -22,8 +22,11 @@ CONFIG_NFT_TPROXY=m
  CONFIG_NFT_SOCKET=m
  CONFIG_IP_ADVANCED_ROUTER=y
  CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_NF_FILTER=m
+CONFIG_IP_NF_MANGLE=m
  CONFIG_IP_NF_TARGET_REJECT=m
  CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_IP6_NF_FILTER=m
  CONFIG_NET_ACT_CSUM=m
  CONFIG_NET_ACT_PEDIT=m
  CONFIG_NET_CLS_ACT=y
diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh

index 04fcb8a077c995c768d222171c045caf2ab3c3b3..f300f4e1eb59f89b1d3096f7a19821dc4be92402 100755 (executable)
--- a/tools/testing/selftests/net/mptcp/diag.sh
+++ b/tools/testing/selftests/net/mptcp/diag.sh
@@ -20,7 +20,7 @@ flush_pids()
  
         ip netns pids "${ns}" | xargs --no-run-if-empty kill -SIGUSR1 &>/dev/null
  
-       for _ in $(seq 10); do
+       for _ in $(seq $((timeout_poll * 10))); do
                 [ -z "$(ip netns pids "${ns}")" ] && break
                 sleep 0.1
         done
@@ -62,8 +62,8 @@ __chk_nr()
         nr=$(eval $command)
  
         printf "%-50s" "$msg"
-       if [ $nr != $expected ]; then
-               if [ $nr = "$skip" ] && ! mptcp_lib_expect_all_features; then
+       if [ "$nr" != "$expected" ]; then
+               if [ "$nr" = "$skip" ] && ! mptcp_lib_expect_all_features; then
                         echo "[ skip ] Feature probably not supported"
                         mptcp_lib_result_skip "${msg}"
                 else
@@ -91,6 +91,15 @@ chk_msk_nr()
         __chk_msk_nr "grep -c token:" "$@"
  }
  
+chk_listener_nr()
+{
+       local expected=$1
+       local msg="$2"
+
+       __chk_nr "ss -inmlHMON $ns | wc -l" "$expected" "$msg - mptcp" 0
+       __chk_nr "ss -inmlHtON $ns | wc -l" "$expected" "$msg - subflows"
+}
+
  wait_msk_nr()
  {
         local condition="grep -c token:"
@@ -166,9 +175,13 @@ chk_msk_listen()
  chk_msk_inuse()
  {
         local expected=$1
-       local msg="$2"
+       local msg="....chk ${2:-${expected}} msk in use"
         local listen_nr
  
+       if [ "${expected}" -eq 0 ]; then
+               msg+=" after flush"
+       fi
+
         listen_nr=$(ss -N "${ns}" -Ml | grep -c LISTEN)
         expected=$((expected + listen_nr))
  
@@ -179,16 +192,21 @@ chk_msk_inuse()
                 sleep 0.1
         done
  
-       __chk_nr get_msk_inuse $expected "$msg" 0
+       __chk_nr get_msk_inuse $expected "${msg}" 0
  }
  
  # $1: cestab nr
  chk_msk_cestab()
  {
-       local cestab=$1
+       local expected=$1
+       local msg="....chk ${2:-${expected}} cestab"
+
+       if [ "${expected}" -eq 0 ]; then
+               msg+=" after flush"
+       fi
  
         __chk_nr "mptcp_lib_get_counter ${ns} MPTcpExtMPCurrEstab" \
-                "${cestab}" "....chk ${cestab} cestab" ""
+                "${expected}" "${msg}" ""
  }
  
  wait_connected()
@@ -227,12 +245,12 @@ wait_connected $ns 10000
  chk_msk_nr 2 "after MPC handshake "
  chk_msk_remote_key_nr 2 "....chk remote_key"
  chk_msk_fallback_nr 0 "....chk no fallback"
-chk_msk_inuse 2 "....chk 2 msk in use"
+chk_msk_inuse 2
  chk_msk_cestab 2
  flush_pids
  
-chk_msk_inuse 0 "....chk 0 msk in use after flush"
-chk_msk_cestab 0
+chk_msk_inuse 0 "2->0"
+chk_msk_cestab 0 "2->0"
  
  echo "a" | \
         timeout ${timeout_test} \
@@ -247,12 +265,12 @@ echo "b" | \
                                 127.0.0.1 >/dev/null &
  wait_connected $ns 10001
  chk_msk_fallback_nr 1 "check fallback"
-chk_msk_inuse 1 "....chk 1 msk in use"
+chk_msk_inuse 1
  chk_msk_cestab 1
  flush_pids
  
-chk_msk_inuse 0 "....chk 0 msk in use after flush"
-chk_msk_cestab 0
+chk_msk_inuse 0 "1->0"
+chk_msk_cestab 0 "1->0"
  
  NR_CLIENTS=100
  for I in `seq 1 $NR_CLIENTS`; do
@@ -273,12 +291,31 @@ for I in `seq 1 $NR_CLIENTS`; do
  done
  
  wait_msk_nr $((NR_CLIENTS*2)) "many msk socket present"
-chk_msk_inuse $((NR_CLIENTS*2)) "....chk many msk in use"
-chk_msk_cestab $((NR_CLIENTS*2))
+chk_msk_inuse $((NR_CLIENTS*2)) "many"
+chk_msk_cestab $((NR_CLIENTS*2)) "many"
  flush_pids
  
-chk_msk_inuse 0 "....chk 0 msk in use after flush"
-chk_msk_cestab 0
+chk_msk_inuse 0 "many->0"
+chk_msk_cestab 0 "many->0"
+
+chk_listener_nr 0 "no listener sockets"
+NR_SERVERS=100
+for I in $(seq 1 $NR_SERVERS); do
+       ip netns exec $ns ./mptcp_connect -p $((I + 20001)) \
+               -t ${timeout_poll} -l 0.0.0.0 >/dev/null 2>&1 &
+done
+
+for I in $(seq 1 $NR_SERVERS); do
+       mptcp_lib_wait_local_port_listen $ns $((I + 20001))
+done
+
+chk_listener_nr $NR_SERVERS "many listener sockets"
+
+# graceful termination
+for I in $(seq 1 $NR_SERVERS); do
+       echo a | ip netns exec $ns ./mptcp_connect -p $((I + 20001)) 127.0.0.1 >/dev/null 2>&1 &
+done
+flush_pids
  
  mptcp_lib_result_print_all_tap
  exit $ret
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh

index 3a5b630261910b1cfdd001e05c97c07335b61827..e4581b0dfb967723e36b1847c512f02f4bc87a45 100755 (executable)
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -161,6 +161,11 @@ check_tools()
                 exit $ksft_skip
         fi
  
+       if ! ss -h | grep -q MPTCP; then
+               echo "SKIP: ss tool does not support MPTCP"
+               exit $ksft_skip
+       fi
+
         # Use the legacy version if available to support old kernel versions
         if iptables-legacy -V &> /dev/null; then
                 iptables="iptables-legacy"
@@ -643,13 +648,6 @@ kill_events_pids()
         mptcp_lib_kill_wait $evts_ns2_pid
  }
  
-kill_tests_wait()
-{
-       #shellcheck disable=SC2046
-       kill -SIGUSR1 $(ip netns pids $ns2) $(ip netns pids $ns1)
-       wait
-}
-
  pm_nl_set_limits()
  {
         local ns=$1
@@ -3340,16 +3338,17 @@ userspace_pm_rm_sf()
  {
         local evts=$evts_ns1
         local t=${3:-1}
-       local ip=4
+       local ip
         local tk da dp sp
         local cnt
  
         [ "$1" == "$ns2" ] && evts=$evts_ns2
-       if mptcp_lib_is_v6 $2; then ip=6; fi
+       [ -n "$(mptcp_lib_evts_get_info "saddr4" "$evts" $t)" ] && ip=4
+       [ -n "$(mptcp_lib_evts_get_info "saddr6" "$evts" $t)" ] && ip=6
         tk=$(mptcp_lib_evts_get_info token "$evts")
-       da=$(mptcp_lib_evts_get_info "daddr$ip" "$evts" $t)
-       dp=$(mptcp_lib_evts_get_info dport "$evts" $t)
-       sp=$(mptcp_lib_evts_get_info sport "$evts" $t)
+       da=$(mptcp_lib_evts_get_info "daddr$ip" "$evts" $t $2)
+       dp=$(mptcp_lib_evts_get_info dport "$evts" $t $2)
+       sp=$(mptcp_lib_evts_get_info sport "$evts" $t $2)
  
         cnt=$(rm_sf_count ${1})
         ip netns exec $1 ./pm_nl_ctl dsf lip $2 lport $sp \
@@ -3436,24 +3435,27 @@ userspace_tests()
         if reset_with_events "userspace pm add & remove address" &&
            continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
                 set_userspace_pm $ns1
-               pm_nl_set_limits $ns2 1 1
+               pm_nl_set_limits $ns2 2 2
                 speed=5 \
                         run_tests $ns1 $ns2 10.0.1.1 &
                 local tests_pid=$!
                 wait_mpj $ns1
                 userspace_pm_add_addr $ns1 10.0.2.1 10
-               chk_join_nr 1 1 1
-               chk_add_nr 1 1
-               chk_mptcp_info subflows 1 subflows 1
-               chk_subflows_total 2 2
-               chk_mptcp_info add_addr_signal 1 add_addr_accepted 1
+               userspace_pm_add_addr $ns1 10.0.3.1 20
+               chk_join_nr 2 2 2
+               chk_add_nr 2 2
+               chk_mptcp_info subflows 2 subflows 2
+               chk_subflows_total 3 3
+               chk_mptcp_info add_addr_signal 2 add_addr_accepted 2
                 userspace_pm_rm_addr $ns1 10
                 userspace_pm_rm_sf $ns1 "::ffff:10.0.2.1" $SUB_ESTABLISHED
-               chk_rm_nr 1 1 invert
+               userspace_pm_rm_addr $ns1 20
+               userspace_pm_rm_sf $ns1 10.0.3.1 $SUB_ESTABLISHED
+               chk_rm_nr 2 2 invert
                 chk_mptcp_info subflows 0 subflows 0
                 chk_subflows_total 1 1
                 kill_events_pids
-               wait $tests_pid
+               mptcp_lib_kill_wait $tests_pid
         fi
  
         # userspace pm create destroy subflow
@@ -3475,7 +3477,7 @@ userspace_tests()
                 chk_mptcp_info subflows 0 subflows 0
                 chk_subflows_total 1 1
                 kill_events_pids
-               wait $tests_pid
+               mptcp_lib_kill_wait $tests_pid
         fi
  
         # userspace pm create id 0 subflow
@@ -3494,7 +3496,7 @@ userspace_tests()
                 chk_mptcp_info subflows 1 subflows 1
                 chk_subflows_total 2 2
                 kill_events_pids
-               wait $tests_pid
+               mptcp_lib_kill_wait $tests_pid
         fi
  
         # userspace pm remove initial subflow
@@ -3518,7 +3520,7 @@ userspace_tests()
                 chk_mptcp_info subflows 1 subflows 1
                 chk_subflows_total 1 1
                 kill_events_pids
-               wait $tests_pid
+               mptcp_lib_kill_wait $tests_pid
         fi
  
         # userspace pm send RM_ADDR for ID 0
@@ -3544,7 +3546,7 @@ userspace_tests()
                 chk_mptcp_info subflows 1 subflows 1
                 chk_subflows_total 1 1
                 kill_events_pids
-               wait $tests_pid
+               mptcp_lib_kill_wait $tests_pid
         fi
  }
  
@@ -3558,7 +3560,8 @@ endpoint_tests()
                 pm_nl_set_limits $ns2 2 2
                 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
                 speed=slow \
-                       run_tests $ns1 $ns2 10.0.1.1 2>/dev/null &
+                       run_tests $ns1 $ns2 10.0.1.1 &
+               local tests_pid=$!
  
                 wait_mpj $ns1
                 pm_nl_check_endpoint "creation" \
@@ -3573,7 +3576,7 @@ endpoint_tests()
                 pm_nl_add_endpoint $ns2 10.0.2.2 flags signal
                 pm_nl_check_endpoint "modif is allowed" \
                         $ns2 10.0.2.2 id 1 flags signal
-               kill_tests_wait
+               mptcp_lib_kill_wait $tests_pid
         fi
  
         if reset "delete and re-add" &&
@@ -3582,7 +3585,8 @@ endpoint_tests()
                 pm_nl_set_limits $ns2 1 1
                 pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow
                 test_linkfail=4 speed=20 \
-                       run_tests $ns1 $ns2 10.0.1.1 2>/dev/null &
+                       run_tests $ns1 $ns2 10.0.1.1 &
+               local tests_pid=$!
  
                 wait_mpj $ns2
                 chk_subflow_nr "before delete" 2
@@ -3597,7 +3601,7 @@ endpoint_tests()
                 wait_mpj $ns2
                 chk_subflow_nr "after re-add" 2
                 chk_mptcp_info subflows 1 subflows 1
-               kill_tests_wait
+               mptcp_lib_kill_wait $tests_pid
         fi
  }
  
diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh

index 022262a2cfe0ee59976d398f665c8057dfaea0d7..3777d66fc56d36a4770b164fd781af298cd4eb70 100644 (file)
--- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
@@ -6,7 +6,7 @@ readonly KSFT_FAIL=1
  readonly KSFT_SKIP=4
  
  # shellcheck disable=SC2155 # declare and assign separately
-readonly KSFT_TEST=$(basename "${0}" | sed 's/\.sh$//g')
+readonly KSFT_TEST="${MPTCP_LIB_KSFT_TEST:-$(basename "${0}" .sh)}"
  
  MPTCP_LIB_SUBTESTS=()
  
@@ -213,9 +213,9 @@ mptcp_lib_get_info_value() {
         grep "${2}" | sed -n 's/.*\('"${1}"':\)\([0-9a-f:.]*\).*$/\2/p;q'
  }
  
-# $1: info name ; $2: evts_ns ; $3: event type
+# $1: info name ; $2: evts_ns ; [$3: event type; [$4: addr]]
  mptcp_lib_evts_get_info() {
-       mptcp_lib_get_info_value "${1}" "^type:${3:-1}," < "${2}"
+       grep "${4:-}" "${2}" | mptcp_lib_get_info_value "${1}" "^type:${3:-1},"
  }
  
  # $1: PID
diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh

index 8f4ff123a7eb92646845a5dea4caf28483057085..71899a3ffa7a9d7831c61f08b7f3b9c20aaed58e 100755 (executable)
--- a/tools/testing/selftests/net/mptcp/pm_netlink.sh
+++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh
@@ -183,7 +183,7 @@ check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \
  subflow 10.0.1.1" "          (nobackup)"
  
  # fullmesh support has been added later
-ip netns exec $ns1 ./pm_nl_ctl set id 1 flags fullmesh
+ip netns exec $ns1 ./pm_nl_ctl set id 1 flags fullmesh 2>/dev/null
  if ip netns exec $ns1 ./pm_nl_ctl dump | grep -q "fullmesh" ||
     mptcp_lib_expect_all_features; then
         check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \
@@ -194,6 +194,12 @@ subflow 10.0.1.1" "          (nofullmesh)"
         ip netns exec $ns1 ./pm_nl_ctl set id 1 flags backup,fullmesh
         check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \
  subflow,backup,fullmesh 10.0.1.1" "          (backup,fullmesh)"
+else
+       for st in fullmesh nofullmesh backup,fullmesh; do
+               st="          (${st})"
+               printf "%-50s%s\n" "${st}" "[SKIP]"
+               mptcp_lib_result_skip "${st}"
+       done
  fi
  
  mptcp_lib_result_print_all_tap
diff --git a/tools/testing/selftests/net/mptcp/settings b/tools/testing/selftests/net/mptcp/settings

index 79b65bdf05db6586726cc76d3313f12368d21dc5..abc5648b59abde537dca90791404691050c759e2 100644 (file)
--- a/tools/testing/selftests/net/mptcp/settings
+++ b/tools/testing/selftests/net/mptcp/settings
@@ -1 +1 @@
-timeout=1200
+timeout=1800
diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh

index ae8ad5d6fb9dac680573b4207a67781e18773c09..8f9ddb3ad4fe83501f54a1ac5e62047108eea910 100755 (executable)
--- a/tools/testing/selftests/net/mptcp/simult_flows.sh
+++ b/tools/testing/selftests/net/mptcp/simult_flows.sh
@@ -250,7 +250,8 @@ run_test()
                 [ $bail -eq 0 ] || exit $ret
         fi
  
-       printf "%-60s" "$msg - reverse direction"
+       msg+=" - reverse direction"
+       printf "%-60s" "${msg}"
         do_transfer $large $small $time
         lret=$?
         mptcp_lib_result_code "${lret}" "${msg}"
@@ -284,12 +285,12 @@ done
  
  setup
  run_test 10 10 0 0 "balanced bwidth"
-run_test 10 10 1 50 "balanced bwidth with unbalanced delay"
+run_test 10 10 1 25 "balanced bwidth with unbalanced delay"
  
  # we still need some additional infrastructure to pass the following test-cases
-run_test 30 10 0 0 "unbalanced bwidth"
-run_test 30 10 1 50 "unbalanced bwidth with unbalanced delay"
-run_test 30 10 50 1 "unbalanced bwidth with opposed, unbalanced delay"
+run_test 10 3 0 0 "unbalanced bwidth"
+run_test 10 3 1 25 "unbalanced bwidth with unbalanced delay"
+run_test 10 3 25 1 "unbalanced bwidth with opposed, unbalanced delay"
  
  mptcp_lib_result_print_all_tap
  exit $ret
diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh

index 6167837f48e17ef8ba0d41ba541f73da765f945e..1b94a75604fee98788ba5792b384ca4870bdafbb 100755 (executable)
--- a/tools/testing/selftests/net/mptcp/userspace_pm.sh
+++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh
@@ -75,7 +75,7 @@ print_test()
  {
         test_name="${1}"
  
-       _printf "%-63s" "${test_name}"
+       _printf "%-68s" "${test_name}"
  }
  
  print_results()
@@ -542,7 +542,7 @@ verify_subflow_events()
         local remid
         local info
  
-       info="${e_saddr} (${e_from}) => ${e_daddr} (${e_to})"
+       info="${e_saddr} (${e_from}) => ${e_daddr}:${e_dport} (${e_to})"
  
         if [ "$e_type" = "$SUB_ESTABLISHED" ]
         then
diff --git a/tools/testing/selftests/net/net_helper.sh b/tools/testing/selftests/net/net_helper.sh

old mode 100755 (executable)

new mode 100644 (file)

index 4fe0bef..6596fe0
--- a/tools/testing/selftests/net/net_helper.sh
+++ b/tools/testing/selftests/net/net_helper.sh
@@ -8,13 +8,16 @@ wait_local_port_listen()
         local listener_ns="${1}"
         local port="${2}"
         local protocol="${3}"
-       local port_hex
+       local pattern
         local i
  
-       port_hex="$(printf "%04X" "${port}")"
+       pattern=":$(printf "%04X" "${port}") "
+
+       # for tcp protocol additionally check the socket state
+       [ ${protocol} = "tcp" ] && pattern="${pattern}0A"
         for i in $(seq 10); do
-               if ip netns exec "${listener_ns}" cat /proc/net/"${protocol}"* | \
-                  grep -q "${port_hex}"; then
+               if ip netns exec "${listener_ns}" awk '{print $2" "$4}' \
+                  /proc/net/"${protocol}"* | grep -q "${pattern}"; then
                         break
                 fi
                 sleep 0.1
diff --git a/tools/testing/selftests/net/openvswitch/openvswitch.sh b/tools/testing/selftests/net/openvswitch/openvswitch.sh

index f8499d4c87f3f763e774619666e00ce6a17d333b..36e40256ab92a696de62339dd7c7342df3468372 100755 (executable)
--- a/tools/testing/selftests/net/openvswitch/openvswitch.sh
+++ b/tools/testing/selftests/net/openvswitch/openvswitch.sh
@@ -502,7 +502,20 @@ test_netlink_checks () {
             wc -l) == 2 ] || \
               return 1
  
+       info "Checking clone depth"
         ERR_MSG="Flow actions may not be safe on all matching packets"
+       PRE_TEST=$(dmesg | grep -c "${ERR_MSG}")
+       ovs_add_flow "test_netlink_checks" nv0 \
+               'in_port(1),eth(),eth_type(0x800),ipv4()' \
+               'clone(clone(clone(clone(clone(clone(clone(clone(clone(clone(clone(clone(clone(clone(clone(clone(clone(drop)))))))))))))))))' \
+               >/dev/null 2>&1 && return 1
+       POST_TEST=$(dmesg | grep -c "${ERR_MSG}")
+
+       if [ "$PRE_TEST" == "$POST_TEST" ]; then
+               info "failed - clone depth too large"
+               return 1
+       fi
+
         PRE_TEST=$(dmesg | grep -c "${ERR_MSG}")
         ovs_add_flow "test_netlink_checks" nv0 \
                 'in_port(1),eth(),eth_type(0x0806),arp()' 'drop(0),2' \
diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py

index b97e621face958838f1ad5cf2d12dfc5875db4f4..5e0e539a323d55a44802495ca356661ef8de783a 100644 (file)
--- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
+++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
@@ -299,7 +299,7 @@ class ovsactions(nla):
          ("OVS_ACTION_ATTR_PUSH_NSH", "none"),
          ("OVS_ACTION_ATTR_POP_NSH", "flag"),
          ("OVS_ACTION_ATTR_METER", "none"),
-        ("OVS_ACTION_ATTR_CLONE", "none"),
+        ("OVS_ACTION_ATTR_CLONE", "recursive"),
          ("OVS_ACTION_ATTR_CHECK_PKT_LEN", "none"),
          ("OVS_ACTION_ATTR_ADD_MPLS", "none"),
          ("OVS_ACTION_ATTR_DEC_TTL", "none"),
@@ -465,29 +465,42 @@ class ovsactions(nla):
                      print_str += "pop_mpls"
              else:
                  datum = self.get_attr(field[0])
-                print_str += datum.dpstr(more)
+                if field[0] == "OVS_ACTION_ATTR_CLONE":
+                    print_str += "clone("
+                    print_str += datum.dpstr(more)
+                    print_str += ")"
+                else:
+                    print_str += datum.dpstr(more)
  
          return print_str
  
      def parse(self, actstr):
+        totallen = len(actstr)
          while len(actstr) != 0:
              parsed = False
+            parencount = 0
              if actstr.startswith("drop"):
                  # If no reason is provided, the implicit drop is used (i.e no
                  # action). If some reason is given, an explicit action is used.
-                actstr, reason = parse_extract_field(
-                    actstr,
-                    "drop(",
-                    "([0-9]+)",
-                    lambda x: int(x, 0),
-                    False,
-                    None,
-                )
+                reason = None
+                if actstr.startswith("drop("):
+                    parencount += 1
+
+                    actstr, reason = parse_extract_field(
+                        actstr,
+                        "drop(",
+                        "([0-9]+)",
+                        lambda x: int(x, 0),
+                        False,
+                        None,
+                    )
+
                  if reason is not None:
                      self["attrs"].append(["OVS_ACTION_ATTR_DROP", reason])
                      parsed = True
                  else:
-                    return
+                    actstr = actstr[len("drop"): ]
+                    return (totallen - len(actstr))
  
              elif parse_starts_block(actstr, "^(\d+)", False, True):
                  actstr, output = parse_extract_field(
@@ -504,6 +517,7 @@ class ovsactions(nla):
                      False,
                      0,
                  )
+                parencount += 1
                  self["attrs"].append(["OVS_ACTION_ATTR_RECIRC", recircid])
                  parsed = True
  
@@ -516,12 +530,22 @@ class ovsactions(nla):
  
              for flat_act in parse_flat_map:
                  if parse_starts_block(actstr, flat_act[0], False):
-                    actstr += len(flat_act[0])
+                    actstr = actstr[len(flat_act[0]):]
                      self["attrs"].append([flat_act[1]])
                      actstr = actstr[strspn(actstr, ", ") :]
                      parsed = True
  
-            if parse_starts_block(actstr, "ct(", False):
+            if parse_starts_block(actstr, "clone(", False):
+                parencount += 1
+                subacts = ovsactions()
+                actstr = actstr[len("clone("):]
+                parsedLen = subacts.parse(actstr)
+                lst = []
+                self["attrs"].append(("OVS_ACTION_ATTR_CLONE", subacts))
+                actstr = actstr[parsedLen:]
+                parsed = True
+            elif parse_starts_block(actstr, "ct(", False):
+                parencount += 1
                  actstr = actstr[len("ct(") :]
                  ctact = ovsactions.ctact()
  
@@ -553,6 +577,7 @@ class ovsactions(nla):
                          natact = ovsactions.ctact.natattr()
  
                          if actstr.startswith("("):
+                            parencount += 1
                              t = None
                              actstr = actstr[1:]
                              if actstr.startswith("src"):
@@ -607,15 +632,29 @@ class ovsactions(nla):
                                      actstr = actstr[strspn(actstr, ", ") :]
  
                          ctact["attrs"].append(["OVS_CT_ATTR_NAT", natact])
-                        actstr = actstr[strspn(actstr, ",) ") :]
+                        actstr = actstr[strspn(actstr, ", ") :]
  
                  self["attrs"].append(["OVS_ACTION_ATTR_CT", ctact])
                  parsed = True
  
-            actstr = actstr[strspn(actstr, "), ") :]
+            actstr = actstr[strspn(actstr, ", ") :]
+            while parencount > 0:
+                parencount -= 1
+                actstr = actstr[strspn(actstr, " "):]
+                if len(actstr) and actstr[0] != ")":
+                    raise ValueError("Action str: '%s' unbalanced" % actstr)
+                actstr = actstr[1:]
+
+            if len(actstr) and actstr[0] == ")":
+                return (totallen - len(actstr))
+
+            actstr = actstr[strspn(actstr, ", ") :]
+
              if not parsed:
                  raise ValueError("Action str: '%s' not supported" % actstr)
  
+        return (totallen - len(actstr))
+
  
  class ovskey(nla):
      nla_flags = NLA_F_NESTED
@@ -2111,6 +2150,8 @@ def main(argv):
      ovsflow = OvsFlow()
      ndb = NDB()
  
+    sys.setrecursionlimit(100000)
+
      if hasattr(args, "showdp"):
          found = False
          for iface in ndb.interfaces:
diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh

index f10879788f61ba4f4c01d6cb60a929048ec56540..cfc84958025a61e7ee24a3675a0969e0e3c7cd52 100755 (executable)
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -199,6 +199,7 @@
  #      Same as above but with IPv6
  
  source lib.sh
+source net_helper.sh
  
  PAUSE_ON_FAIL=no
  VERBOSE=0
@@ -707,23 +708,23 @@ setup_xfrm6() {
  }
  
  setup_xfrm4udp() {
-       setup_xfrm 4 ${veth4_a_addr} ${veth4_b_addr} "encap espinudp 4500 4500 0.0.0.0"
-       setup_nettest_xfrm 4 4500
+       setup_xfrm 4 ${veth4_a_addr} ${veth4_b_addr} "encap espinudp 4500 4500 0.0.0.0" && \
+               setup_nettest_xfrm 4 4500
  }
  
  setup_xfrm6udp() {
-       setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr} "encap espinudp 4500 4500 0.0.0.0"
-       setup_nettest_xfrm 6 4500
+       setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr} "encap espinudp 4500 4500 0.0.0.0" && \
+               setup_nettest_xfrm 6 4500
  }
  
  setup_xfrm4udprouted() {
-       setup_xfrm 4 ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 "encap espinudp 4500 4500 0.0.0.0"
-       setup_nettest_xfrm 4 4500
+       setup_xfrm 4 ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 "encap espinudp 4500 4500 0.0.0.0" && \
+               setup_nettest_xfrm 4 4500
  }
  
  setup_xfrm6udprouted() {
-       setup_xfrm 6 ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 "encap espinudp 4500 4500 0.0.0.0"
-       setup_nettest_xfrm 6 4500
+       setup_xfrm 6 ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 "encap espinudp 4500 4500 0.0.0.0" && \
+               setup_nettest_xfrm 6 4500
  }
  
  setup_routing_old() {
@@ -1335,12 +1336,14 @@ test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception() {
                 else
                         TCPDST="TCP:[${dst}]:50000"
                 fi
-               ${ns_b} socat -T 3 -u -6 TCP-LISTEN:50000 STDOUT > $tmpoutfile &
+               ${ns_b} socat -T 3 -u -6 TCP-LISTEN:50000,reuseaddr STDOUT > $tmpoutfile &
+               local socat_pid=$!
  
-               sleep 1
+               wait_local_port_listen ${NS_B} 50000 tcp
  
-               dd if=/dev/zero of=/dev/stdout status=none bs=1M count=1 | ${target} socat -T 3 -u STDIN $TCPDST,connect-timeout=3
+               dd if=/dev/zero status=none bs=1M count=1 | ${target} socat -T 3 -u STDIN $TCPDST,connect-timeout=3
  
+               wait ${socat_pid}
                 size=$(du -sb $tmpoutfile)
                 size=${size%%/tmp/*}
  
@@ -1954,6 +1957,13 @@ check_command() {
         return 0
  }
  
+check_running() {
+       pid=${1}
+       cmd=${2}
+
+       [ "$(cat /proc/${pid}/cmdline 2>/dev/null | tr -d '\0')" = "{cmd}" ]
+}
+
  test_cleanup_vxlanX_exception() {
         outer="${1}"
         encap="vxlan"
@@ -1984,11 +1994,12 @@ test_cleanup_vxlanX_exception() {
  
         ${ns_a} ip link del dev veth_A-R1 &
         iplink_pid=$!
-       sleep 1
-       if [ "$(cat /proc/${iplink_pid}/cmdline 2>/dev/null | tr -d '\0')" = "iplinkdeldevveth_A-R1" ]; then
-               err "  can't delete veth device in a timely manner, PMTU dst likely leaked"
-               return 1
-       fi
+       for i in $(seq 1 20); do
+               check_running ${iplink_pid} "iplinkdeldevveth_A-R1" || return 0
+               sleep 0.1
+       done
+       err "  can't delete veth device in a timely manner, PMTU dst likely leaked"
+       return 1
  }
  
  test_cleanup_ipv6_exception() {
diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh

index 4667d74579d135eb74d701d79baa3036c88f2635..874a2952aa8ee16b1841bdd7e2930e54a1c99303 100755 (executable)
--- a/tools/testing/selftests/net/rtnetlink.sh
+++ b/tools/testing/selftests/net/rtnetlink.sh
@@ -440,7 +440,6 @@ kci_test_encap_vxlan()
         local ret=0
         vxlan="test-vxlan0"
         vlan="test-vlan0"
-       testns="$1"
         run_cmd ip -netns "$testns" link add "$vxlan" type vxlan id 42 group 239.1.1.1 \
                 dev "$devdummy" dstport 4789
         if [ $? -ne 0 ]; then
@@ -485,7 +484,6 @@ kci_test_encap_fou()
  {
         local ret=0
         name="test-fou"
-       testns="$1"
         run_cmd_grep 'Usage: ip fou' ip fou help
         if [ $? -ne 0 ];then
                 end_test "SKIP: fou: iproute2 too old"
@@ -526,8 +524,8 @@ kci_test_encap()
         run_cmd ip -netns "$testns" link set lo up
         run_cmd ip -netns "$testns" link add name "$devdummy" type dummy
         run_cmd ip -netns "$testns" link set "$devdummy" up
-       run_cmd kci_test_encap_vxlan "$testns"
-       run_cmd kci_test_encap_fou "$testns"
+       run_cmd kci_test_encap_vxlan
+       run_cmd kci_test_encap_fou
  
         ip netns del "$testns"
         return $ret
diff --git a/tools/testing/selftests/net/setup_loopback.sh b/tools/testing/selftests/net/setup_loopback.sh

old mode 100755 (executable)

new mode 100644 (file)
diff --git a/tools/testing/selftests/net/setup_veth.sh b/tools/testing/selftests/net/setup_veth.sh

index a9a1759e035ca875ac22036fa52984b32ada76f8..1f78a87f6f37eaab8dc41850e1a906f9aef6315f 100644 (file)
--- a/tools/testing/selftests/net/setup_veth.sh
+++ b/tools/testing/selftests/net/setup_veth.sh
@@ -11,7 +11,7 @@ setup_veth_ns() {
         local -r ns_mac="$4"
  
         [[ -e /var/run/netns/"${ns_name}" ]] || ip netns add "${ns_name}"
-       echo 100000 > "/sys/class/net/${ns_dev}/gro_flush_timeout"
+       echo 1000000 > "/sys/class/net/${ns_dev}/gro_flush_timeout"
         ip link set dev "${ns_dev}" netns "${ns_name}" mtu 65535
         ip -netns "${ns_name}" link set dev "${ns_dev}" up
  
diff --git a/tools/testing/selftests/net/so_txtime.sh b/tools/testing/selftests/net/so_txtime.sh

index 3f06f4d286a988f15ccbc006f8015b24a07be3f0..5e861ad32a42e11b680236b4a016ed952caf5914 100755 (executable)
--- a/tools/testing/selftests/net/so_txtime.sh
+++ b/tools/testing/selftests/net/so_txtime.sh
@@ -5,6 +5,7 @@
  
  set -e
  
+readonly ksft_skip=4
  readonly DEV="veth0"
  readonly BIN="./so_txtime"
  
@@ -46,7 +47,7 @@ ip -netns "${NS2}" addr add 192.168.1.2/24 dev "${DEV}"
  ip -netns "${NS1}" addr add       fd::1/64 dev "${DEV}" nodad
  ip -netns "${NS2}" addr add       fd::2/64 dev "${DEV}" nodad
  
-do_test() {
+run_test() {
         local readonly IP="$1"
         local readonly CLOCK="$2"
         local readonly TXARGS="$3"
@@ -64,12 +65,25 @@ do_test() {
         fi
  
         local readonly START="$(date +%s%N --date="+ 0.1 seconds")"
+
         ip netns exec "${NS2}" "${BIN}" -"${IP}" -c "${CLOCK}" -t "${START}" -S "${SADDR}" -D "${DADDR}" "${RXARGS}" -r &
         ip netns exec "${NS1}" "${BIN}" -"${IP}" -c "${CLOCK}" -t "${START}" -S "${SADDR}" -D "${DADDR}" "${TXARGS}"
         wait "$!"
  }
  
+do_test() {
+       run_test $@
+       [ $? -ne 0 ] && ret=1
+}
+
+do_fail_test() {
+       run_test $@
+       [ $? -eq 0 ] && ret=1
+}
+
  ip netns exec "${NS1}" tc qdisc add dev "${DEV}" root fq
+set +e
+ret=0
  do_test 4 mono a,-1 a,-1
  do_test 6 mono a,0 a,0
  do_test 6 mono a,10 a,10
@@ -77,13 +91,20 @@ do_test 4 mono a,10,b,20 a,10,b,20
  do_test 6 mono a,20,b,10 b,20,a,20
  
  if ip netns exec "${NS1}" tc qdisc replace dev "${DEV}" root etf clockid CLOCK_TAI delta 400000; then
-       ! do_test 4 tai a,-1 a,-1
-       ! do_test 6 tai a,0 a,0
+       do_fail_test 4 tai a,-1 a,-1
+       do_fail_test 6 tai a,0 a,0
         do_test 6 tai a,10 a,10
         do_test 4 tai a,10,b,20 a,10,b,20
         do_test 6 tai a,20,b,10 b,10,a,20
  else
         echo "tc ($(tc -V)) does not support qdisc etf. skipping"
+       [ $ret -eq 0 ] && ret=$ksft_skip
  fi
  
-echo OK. All tests passed
+if [ $ret -eq 0 ]; then
+       echo OK. All tests passed
+elif [[ $ret -ne $ksft_skip && -n "$KSFT_MACHINE_SLOW" ]]; then
+       echo "Ignoring errors due to slow environment" 1>&2
+       ret=0
+fi
+exit $ret
diff --git a/tools/testing/selftests/net/tcp_ao/config b/tools/testing/selftests/net/tcp_ao/config

new file mode 100644 (file)

index 0000000..d3277a9
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/config
@@ -0,0 +1,10 @@
+CONFIG_CRYPTO_HMAC=y
+CONFIG_CRYPTO_RMD160=y
+CONFIG_CRYPTO_SHA1=y
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_IPV6=y
+CONFIG_NET_L3_MASTER_DEV=y
+CONFIG_NET_VRF=y
+CONFIG_TCP_AO=y
+CONFIG_TCP_MD5SIG=y
+CONFIG_VETH=m
diff --git a/tools/testing/selftests/net/tcp_ao/key-management.c b/tools/testing/selftests/net/tcp_ao/key-management.c

index c48b4970ca17e07220813192fadbc553cdc89250..24e62120b7924d3a1555a7e42097f9e55338b4db 100644 (file)
--- a/tools/testing/selftests/net/tcp_ao/key-management.c
+++ b/tools/testing/selftests/net/tcp_ao/key-management.c
@@ -417,9 +417,9 @@ struct test_key {
                 matches_vrf             : 1,
                 is_current              : 1,
                 is_rnext                : 1,
-               used_on_handshake       : 1,
-               used_after_accept       : 1,
-               used_on_client          : 1;
+               used_on_server_tx       : 1,
+               used_on_client_tx       : 1,
+               skip_counters_checks    : 1;
  };
  
  struct key_collection {
@@ -609,16 +609,14 @@ static int key_collection_socket(bool server, unsigned int port)
                                 addr = &this_ip_dest;
                         sndid = key->client_keyid;
                         rcvid = key->server_keyid;
-                       set_current = key->is_current;
-                       set_rnext = key->is_rnext;
+                       key->used_on_client_tx = set_current = key->is_current;
+                       key->used_on_server_tx = set_rnext = key->is_rnext;
                 }
  
                 if (test_add_key_cr(sk, key->password, key->len,
                                     *addr, vrf, sndid, rcvid, key->maclen,
                                     key->alg, set_current, set_rnext))
                         test_key_error("setsockopt(TCP_AO_ADD_KEY)", key);
-               if (set_current || set_rnext)
-                       key->used_on_handshake = 1;
  #ifdef DEBUG
                 test_print("%s [%u/%u] key: { %s, %u:%u, %u, %u:%u:%u:%u (%u)}",
                            server ? "server" : "client", i, collection.nr_keys,
@@ -640,22 +638,22 @@ static void verify_counters(const char *tst_name, bool is_listen_sk, bool server
         for (i = 0; i < collection.nr_keys; i++) {
                 struct test_key *key = &collection.keys[i];
                 uint8_t sndid, rcvid;
-               bool was_used;
+               bool rx_cnt_expected;
  
+               if (key->skip_counters_checks)
+                       continue;
                 if (server) {
                         sndid = key->server_keyid;
                         rcvid = key->client_keyid;
-                       if (is_listen_sk)
-                               was_used = key->used_on_handshake;
-                       else
-                               was_used = key->used_after_accept;
+                       rx_cnt_expected = key->used_on_client_tx;
                 } else {
                         sndid = key->client_keyid;
                         rcvid = key->server_keyid;
-                       was_used = key->used_on_client;
+                       rx_cnt_expected = key->used_on_server_tx;
                 }
  
-               test_tcp_ao_key_counters_cmp(tst_name, a, b, was_used,
+               test_tcp_ao_key_counters_cmp(tst_name, a, b,
+                                            rx_cnt_expected ? TEST_CNT_KEY_GOOD : 0,
                                              sndid, rcvid);
         }
         test_tcp_ao_counters_free(a);
@@ -843,7 +841,7 @@ static void end_server(const char *tst_name, int sk,
         synchronize_threads(); /* 4: verified => closed */
         close(sk);
  
-       verify_counters(tst_name, true, false, begin, &end);
+       verify_counters(tst_name, false, true, begin, &end);
         synchronize_threads(); /* 5: counters */
  }
  
@@ -916,9 +914,8 @@ static int run_client(const char *tst_name, unsigned int port,
                 current_index = nr_keys - 1;
         if (rnext_index < 0)
                 rnext_index = nr_keys - 1;
-       collection.keys[current_index].used_on_handshake = 1;
-       collection.keys[rnext_index].used_after_accept = 1;
-       collection.keys[rnext_index].used_on_client = 1;
+       collection.keys[current_index].used_on_client_tx = 1;
+       collection.keys[rnext_index].used_on_server_tx = 1;
  
         synchronize_threads(); /* 3: accepted => send data */
         if (test_client_verify(sk, msg_sz, msg_nr, TEST_TIMEOUT_SEC)) {
@@ -1059,7 +1056,16 @@ static void check_current_back(const char *tst_name, unsigned int port,
                 test_error("Can't change the current key");
         if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC))
                 test_fail("verify failed");
-       collection.keys[rotate_to_index].used_after_accept = 1;
+       /* There is a race here: between setting the current_key with
+        * setsockopt(TCP_AO_INFO) and starting to send some data - there
+        * might have been a segment received with the desired
+        * RNext_key set. In turn that would mean that the first outgoing
+        * segment will have the desired current_key (flipped back).
+        * Which is what the user/test wants. As it's racy, skip checking
+        * the counters, yet check what are the resulting current/rnext
+        * keys on both sides.
+        */
+       collection.keys[rotate_to_index].skip_counters_checks = 1;
  
         end_client(tst_name, sk, nr_keys, current_index, rnext_index, &tmp);
  }
@@ -1089,7 +1095,7 @@ static void roll_over_keys(const char *tst_name, unsigned int port,
                 }
                 verify_current_rnext(tst_name, sk, -1,
                                      collection.keys[i].server_keyid);
-               collection.keys[i].used_on_client = 1;
+               collection.keys[i].used_on_server_tx = 1;
                 synchronize_threads(); /* verify current/rnext */
         }
         end_client(tst_name, sk, nr_keys, current_index, rnext_index, &tmp);
diff --git a/tools/testing/selftests/net/tcp_ao/lib/sock.c b/tools/testing/selftests/net/tcp_ao/lib/sock.c

index c75d82885a2e1aa40f463bbdc65999c05c6a063d..15aeb0963058fdf645451206b3015dd707aa0c13 100644 (file)
--- a/tools/testing/selftests/net/tcp_ao/lib/sock.c
+++ b/tools/testing/selftests/net/tcp_ao/lib/sock.c
@@ -62,7 +62,9 @@ int test_wait_fd(int sk, time_t sec, bool write)
                 return -ETIMEDOUT;
         }
  
-       if (getsockopt(sk, SOL_SOCKET, SO_ERROR, &ret, &slen) || ret)
+       if (getsockopt(sk, SOL_SOCKET, SO_ERROR, &ret, &slen))
+               return -errno;
+       if (ret)
                 return -ret;
         return 0;
  }
@@ -584,9 +586,11 @@ int test_client_verify(int sk, const size_t msg_len, const size_t nr,
  {
         size_t buf_sz = msg_len * nr;
         char *buf = alloca(buf_sz);
+       ssize_t ret;
  
         randomize_buffer(buf, buf_sz);
-       if (test_client_loop(sk, buf, buf_sz, msg_len, timeout_sec) != buf_sz)
-               return -1;
-       return 0;
+       ret = test_client_loop(sk, buf, buf_sz, msg_len, timeout_sec);
+       if (ret < 0)
+               return (int)ret;
+       return ret != buf_sz ? -1 : 0;
  }
diff --git a/tools/testing/selftests/net/tcp_ao/rst.c b/tools/testing/selftests/net/tcp_ao/rst.c

index ac06009a7f5f65ddf0095aa6d7044e98abf032cf..7df8b8700e39e96292f8eafdf105ee0314a65497 100644 (file)
--- a/tools/testing/selftests/net/tcp_ao/rst.c
+++ b/tools/testing/selftests/net/tcp_ao/rst.c
@@ -1,10 +1,33 @@
  // SPDX-License-Identifier: GPL-2.0
-/* Author: Dmitry Safonov <dima@arista.com> */
+/*
+ * The test checks that both active and passive reset have correct TCP-AO
+ * signature. An "active" reset (abort) here is procured from closing
+ * listen() socket with non-accepted connections in the queue:
+ * inet_csk_listen_stop() => inet_child_forget() =>
+ *                        => tcp_disconnect() => tcp_send_active_reset()
+ *
+ * The passive reset is quite hard to get on established TCP connections.
+ * It could be procured from non-established states, but the synchronization
+ * part from userspace in order to reliably get RST seems uneasy.
+ * So, instead it's procured by corrupting SEQ number on TIMED-WAIT state.
+ *
+ * It's important to test both passive and active RST as they go through
+ * different code-paths:
+ * - tcp_send_active_reset() makes no-data skb, sends it with tcp_transmit_skb()
+ * - tcp_v*_send_reset() create their reply skbs and send them with
+ *   ip_send_unicast_reply()
+ *
+ * In both cases TCP-AO signatures have to be correct, which is verified by
+ * (1) checking that the TCP-AO connection was reset and (2) TCP-AO counters.
+ *
+ * Author: Dmitry Safonov <dima@arista.com>
+ */
  #include <inttypes.h>
  #include "../../../../include/linux/kernel.h"
  #include "aolib.h"
  
  const size_t quota = 1000;
+const size_t packet_sz = 100;
  /*
   * Backlog == 0 means 1 connection in queue, see:
   * commit 64a146513f8f ("[NET]: Revert incorrect accept queue...")
@@ -59,26 +82,6 @@ static void close_forced(int sk)
         close(sk);
  }
  
-static int test_wait_for_exception(int sk, time_t sec)
-{
-       struct timeval tv = { .tv_sec = sec };
-       struct timeval *ptv = NULL;
-       fd_set efds;
-       int ret;
-
-       FD_ZERO(&efds);
-       FD_SET(sk, &efds);
-
-       if (sec)
-               ptv = &tv;
-
-       errno = 0;
-       ret = select(sk + 1, NULL, NULL, &efds, ptv);
-       if (ret < 0)
-               return -errno;
-       return ret ? sk : 0;
-}
-
  static void test_server_active_rst(unsigned int port)
  {
         struct tcp_ao_counters cnt1, cnt2;
@@ -155,17 +158,16 @@ static void test_server_passive_rst(unsigned int port)
                         test_fail("server returned %zd", bytes);
         }
  
-       synchronize_threads(); /* 3: chekpoint/restore the connection */
+       synchronize_threads(); /* 3: checkpoint the client */
+       synchronize_threads(); /* 4: close the server, creating twsk */
         if (test_get_tcp_ao_counters(sk, &ao2))
                 test_error("test_get_tcp_ao_counters()");
-
-       synchronize_threads(); /* 4: terminate server + send more on client */
-       bytes = test_server_run(sk, quota, TEST_RETRANSMIT_SEC);
         close(sk);
+
+       synchronize_threads(); /* 5: restore the socket, send more data */
         test_tcp_ao_counters_cmp("passive RST server", &ao1, &ao2, TEST_CNT_GOOD);
  
-       synchronize_threads(); /* 5: verified => closed */
-       close(sk);
+       synchronize_threads(); /* 6: server exits */
  }
  
  static void *server_fn(void *arg)
@@ -284,7 +286,7 @@ static void test_client_active_rst(unsigned int port)
                 test_error("test_wait_fds(): %d", err);
  
         synchronize_threads(); /* 3: close listen socket */
-       if (test_client_verify(sk[0], 100, quota / 100, TEST_TIMEOUT_SEC))
+       if (test_client_verify(sk[0], packet_sz, quota / packet_sz, TEST_TIMEOUT_SEC))
                 test_fail("Failed to send data on connected socket");
         else
                 test_ok("Verified established tcp connection");
@@ -323,7 +325,6 @@ static void test_client_passive_rst(unsigned int port)
         struct tcp_sock_state img;
         sockaddr_af saddr;
         int sk, err;
-       socklen_t slen = sizeof(err);
  
         sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP);
         if (sk < 0)
@@ -337,18 +338,51 @@ static void test_client_passive_rst(unsigned int port)
                 test_error("failed to connect()");
  
         synchronize_threads(); /* 2: accepted => send data */
-       if (test_client_verify(sk, 100, quota / 100, TEST_TIMEOUT_SEC))
+       if (test_client_verify(sk, packet_sz, quota / packet_sz, TEST_TIMEOUT_SEC))
                 test_fail("Failed to send data on connected socket");
         else
                 test_ok("Verified established tcp connection");
  
-       synchronize_threads(); /* 3: chekpoint/restore the connection */
+       synchronize_threads(); /* 3: checkpoint the client */
         test_enable_repair(sk);
         test_sock_checkpoint(sk, &img, &saddr);
         test_ao_checkpoint(sk, &ao_img);
-       test_kill_sk(sk);
+       test_disable_repair(sk);
  
-       img.out.seq += quota;
+       synchronize_threads(); /* 4: close the server, creating twsk */
+
+       /*
+        * The "corruption" in SEQ has to be small enough to fit into TCP
+        * window, see tcp_timewait_state_process() for out-of-window
+        * segments.
+        */
+       img.out.seq += 5; /* 5 is more noticeable in tcpdump than 1 */
+
+       /*
+        * FIXME: This is kind-of ugly and dirty, but it works.
+        *
+        * At this moment, the server has close'ed(sk).
+        * The passive RST that is being targeted here is new data after
+        * half-duplex close, see tcp_timewait_state_process() => TCP_TW_RST
+        *
+        * What is needed here is:
+        * (1) wait for FIN from the server
+        * (2) make sure that the ACK from the client went out
+        * (3) make sure that the ACK was received and processed by the server
+        *
+        * Otherwise, the data that will be sent from "repaired" socket
+        * post SEQ corruption may get to the server before it's in
+        * TCP_FIN_WAIT2.
+        *
+        * (1) is easy with select()/poll()
+        * (2) is possible by polling tcpi_state from TCP_INFO
+        * (3) is quite complex: as server's socket was already closed,
+        *     probably the way to do it would be tcp-diag.
+        */
+       sleep(TEST_RETRANSMIT_SEC);
+
+       synchronize_threads(); /* 5: restore the socket, send more data */
+       test_kill_sk(sk);
  
         sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP);
         if (sk < 0)
@@ -366,25 +400,33 @@ static void test_client_passive_rst(unsigned int port)
         test_disable_repair(sk);
         test_sock_state_free(&img);
  
-       synchronize_threads(); /* 4: terminate server + send more on client */
-       if (test_client_verify(sk, 100, quota / 100, 2 * TEST_TIMEOUT_SEC))
-               test_ok("client connection broken post-seq-adjust");
-       else
-               test_fail("client connection still works post-seq-adjust");
-
-       test_wait_for_exception(sk, TEST_TIMEOUT_SEC);
-
-       if (getsockopt(sk, SOL_SOCKET, SO_ERROR, &err, &slen))
-               test_error("getsockopt()");
-       if (err != ECONNRESET && err != EPIPE)
-               test_fail("client connection was not reset: %d", err);
+       /*
+        * This is how "passive reset" is acquired in this test from TCP_TW_RST:
+        *
+        * IP 10.0.254.1.7011 > 10.0.1.1.59772: Flags [P.], seq 901:1001, ack 1001, win 249,
+        *    options [tcp-ao keyid 100 rnextkeyid 100 mac 0x10217d6c36a22379086ef3b1], length 100
+        * IP 10.0.254.1.7011 > 10.0.1.1.59772: Flags [F.], seq 1001, ack 1001, win 249,
+        *    options [tcp-ao keyid 100 rnextkeyid 100 mac 0x104ffc99b98c10a5298cc268], length 0
+        * IP 10.0.1.1.59772 > 10.0.254.1.7011: Flags [.], ack 1002, win 251,
+        *    options [tcp-ao keyid 100 rnextkeyid 100 mac 0xe496dd4f7f5a8a66873c6f93,nop,nop,sack 1 {1001:1002}], length 0
+        * IP 10.0.1.1.59772 > 10.0.254.1.7011: Flags [P.], seq 1006:1106, ack 1001, win 251,
+        *    options [tcp-ao keyid 100 rnextkeyid 100 mac 0x1b5f3330fb23fbcd0c77d0ca], length 100
+        * IP 10.0.254.1.7011 > 10.0.1.1.59772: Flags [R], seq 3215596252, win 0,
+        *    options [tcp-ao keyid 100 rnextkeyid 100 mac 0x0bcfbbf497bce844312304b2], length 0
+        */
+       err = test_client_verify(sk, packet_sz, quota / packet_sz, 2 * TEST_TIMEOUT_SEC);
+       /* Make sure that the connection was reset, not timeouted */
+       if (err && err == -ECONNRESET)
+               test_ok("client sock was passively reset post-seq-adjust");
+       else if (err)
+               test_fail("client sock was not reset post-seq-adjust: %d", err);
         else
-               test_ok("client connection was reset");
+               test_fail("client sock is yet connected post-seq-adjust");
  
         if (test_get_tcp_ao_counters(sk, &ao2))
                 test_error("test_get_tcp_ao_counters()");
  
-       synchronize_threads(); /* 5: verified => closed */
+       synchronize_threads(); /* 6: server exits */
         close(sk);
         test_tcp_ao_counters_cmp("client passive RST", &ao1, &ao2, TEST_CNT_GOOD);
  }
@@ -410,6 +452,6 @@ static void *client_fn(void *arg)
  
  int main(int argc, char *argv[])
  {
-       test_init(15, server_fn, client_fn);
+       test_init(14, server_fn, client_fn);
         return 0;
  }
diff --git a/tools/testing/selftests/net/tcp_ao/settings b/tools/testing/selftests/net/tcp_ao/settings

new file mode 100644 (file)

index 0000000..6091b45
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/settings
@@ -0,0 +1 @@
+timeout=120
diff --git a/tools/testing/selftests/net/tcp_ao/unsigned-md5.c b/tools/testing/selftests/net/tcp_ao/unsigned-md5.c

index c5b568cd7d901ce19d26cc0228dc7089581bb7f1..6b59a652159f7754417471c066a06bd4eb511a41 100644 (file)
--- a/tools/testing/selftests/net/tcp_ao/unsigned-md5.c
+++ b/tools/testing/selftests/net/tcp_ao/unsigned-md5.c
@@ -110,9 +110,9 @@ static void try_accept(const char *tst_name, unsigned int port,
                 test_tcp_ao_counters_cmp(tst_name, &ao_cnt1, &ao_cnt2, cnt_expected);
  
  out:
-       synchronize_threads(); /* close() */
+       synchronize_threads(); /* test_kill_sk() */
         if (sk > 0)
-               close(sk);
+               test_kill_sk(sk);
  }
  
  static void server_add_routes(void)
@@ -302,10 +302,10 @@ static void try_connect(const char *tst_name, unsigned int port,
                 test_ok("%s: connected", tst_name);
  
  out:
-       synchronize_threads(); /* close() */
+       synchronize_threads(); /* test_kill_sk() */
         /* _test_connect_socket() cleans up on failure */
         if (ret > 0)
-               close(sk);
+               test_kill_sk(sk);
  }
  
  #define PREINSTALL_MD5_FIRST   BIT(0)
@@ -486,10 +486,10 @@ static void try_to_add(const char *tst_name, unsigned int port,
         }
  
  out:
-       synchronize_threads(); /* close() */
+       synchronize_threads(); /* test_kill_sk() */
         /* _test_connect_socket() cleans up on failure */
         if (ret > 0)
-               close(sk);
+               test_kill_sk(sk);
  }
  
  static void client_add_ip(union tcp_addr *client, const char *ip)
diff --git a/tools/testing/selftests/net/test_bridge_backup_port.sh b/tools/testing/selftests/net/test_bridge_backup_port.sh

index 70a7d87ba2d21cecf6d76f7d184e0902e7b6d3e9..1b3f89e2b86e6aac2f9d631bb9bb22265c3f1734 100755 (executable)
--- a/tools/testing/selftests/net/test_bridge_backup_port.sh
+++ b/tools/testing/selftests/net/test_bridge_backup_port.sh
@@ -124,6 +124,16 @@ tc_check_packets()
         [[ $pkts == $count ]]
  }
  
+bridge_link_check()
+{
+       local ns=$1; shift
+       local dev=$1; shift
+       local state=$1; shift
+
+       bridge -n $ns -d -j link show dev $dev | \
+               jq -e ".[][\"state\"] == \"$state\"" &> /dev/null
+}
+
  ################################################################################
  # Setup
  
@@ -259,6 +269,7 @@ backup_port()
         log_test $? 0 "No forwarding out of vx0"
  
         run_cmd "ip -n $sw1 link set dev swp1 carrier off"
+       busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled
         log_test $? 0 "swp1 carrier off"
  
         run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
@@ -268,6 +279,7 @@ backup_port()
         log_test $? 0 "No forwarding out of vx0"
  
         run_cmd "ip -n $sw1 link set dev swp1 carrier on"
+       busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 forwarding
         log_test $? 0 "swp1 carrier on"
  
         # Configure vx0 as the backup port of swp1 and check that packets are
@@ -284,6 +296,7 @@ backup_port()
         log_test $? 0 "No forwarding out of vx0"
  
         run_cmd "ip -n $sw1 link set dev swp1 carrier off"
+       busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled
         log_test $? 0 "swp1 carrier off"
  
         run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
@@ -293,6 +306,7 @@ backup_port()
         log_test $? 0 "Forwarding out of vx0"
  
         run_cmd "ip -n $sw1 link set dev swp1 carrier on"
+       busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 forwarding
         log_test $? 0 "swp1 carrier on"
  
         run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
@@ -314,6 +328,7 @@ backup_port()
         log_test $? 0 "No forwarding out of vx0"
  
         run_cmd "ip -n $sw1 link set dev swp1 carrier off"
+       busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled
         log_test $? 0 "swp1 carrier off"
  
         run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
@@ -369,6 +384,7 @@ backup_nhid()
         log_test $? 0 "No forwarding out of vx0"
  
         run_cmd "ip -n $sw1 link set dev swp1 carrier off"
+       busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled
         log_test $? 0 "swp1 carrier off"
  
         run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
@@ -382,6 +398,7 @@ backup_nhid()
         log_test $? 0 "Forwarding using VXLAN FDB entry"
  
         run_cmd "ip -n $sw1 link set dev swp1 carrier on"
+       busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 forwarding
         log_test $? 0 "swp1 carrier on"
  
         # Configure nexthop ID 10 as the backup nexthop ID of swp1 and check
@@ -398,6 +415,7 @@ backup_nhid()
         log_test $? 0 "No forwarding out of vx0"
  
         run_cmd "ip -n $sw1 link set dev swp1 carrier off"
+       busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled
         log_test $? 0 "swp1 carrier off"
  
         run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
@@ -411,6 +429,7 @@ backup_nhid()
         log_test $? 0 "No forwarding using VXLAN FDB entry"
  
         run_cmd "ip -n $sw1 link set dev swp1 carrier on"
+       busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 forwarding
         log_test $? 0 "swp1 carrier on"
  
         run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
@@ -441,6 +460,7 @@ backup_nhid()
         log_test $? 0 "No forwarding using VXLAN FDB entry"
  
         run_cmd "ip -n $sw1 link set dev swp1 carrier off"
+       busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled
         log_test $? 0 "swp1 carrier off"
  
         run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
@@ -497,6 +517,7 @@ backup_nhid_invalid()
         log_test $? 0 "Valid nexthop as backup nexthop"
  
         run_cmd "ip -n $sw1 link set dev swp1 carrier off"
+       busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled
         log_test $? 0 "swp1 carrier off"
  
         run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
@@ -604,7 +625,9 @@ backup_nhid_ping()
         run_cmd "bridge -n $sw2 link set dev swp1 backup_nhid 10"
  
         run_cmd "ip -n $sw1 link set dev swp1 carrier off"
+       busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled
         run_cmd "ip -n $sw2 link set dev swp1 carrier off"
+       busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw2 swp1 disabled
  
         run_cmd "ip netns exec $sw1 ping -i 0.1 -c 10 -w $PING_TIMEOUT 192.0.2.66"
         log_test $? 0 "Ping with backup nexthop ID"
diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c

index 7799e042a9719cda33ea7d004d2ae4a2ec608a4f..b95c249f81c254dae9160b42ec595b3d2daf6679 100644 (file)
--- a/tools/testing/selftests/net/tls.c
+++ b/tools/testing/selftests/net/tls.c
@@ -1002,12 +1002,12 @@ TEST_F(tls, recv_partial)
  
         memset(recv_mem, 0, sizeof(recv_mem));
         EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
-       EXPECT_NE(recv(self->cfd, recv_mem, strlen(test_str_first),
-                      MSG_WAITALL), -1);
+       EXPECT_EQ(recv(self->cfd, recv_mem, strlen(test_str_first),
+                      MSG_WAITALL), strlen(test_str_first));
         EXPECT_EQ(memcmp(test_str_first, recv_mem, strlen(test_str_first)), 0);
         memset(recv_mem, 0, sizeof(recv_mem));
-       EXPECT_NE(recv(self->cfd, recv_mem, strlen(test_str_second),
-                      MSG_WAITALL), -1);
+       EXPECT_EQ(recv(self->cfd, recv_mem, strlen(test_str_second),
+                      MSG_WAITALL), strlen(test_str_second));
         EXPECT_EQ(memcmp(test_str_second, recv_mem, strlen(test_str_second)),
                   0);
  }
@@ -1485,6 +1485,51 @@ TEST_F(tls, control_msg)
         EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
  }
  
+TEST_F(tls, control_msg_nomerge)
+{
+       char *rec1 = "1111";
+       char *rec2 = "2222";
+       int send_len = 5;
+       char buf[15];
+
+       if (self->notls)
+               SKIP(return, "no TLS support");
+
+       EXPECT_EQ(tls_send_cmsg(self->fd, 100, rec1, send_len, 0), send_len);
+       EXPECT_EQ(tls_send_cmsg(self->fd, 100, rec2, send_len, 0), send_len);
+
+       EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, 100, buf, sizeof(buf), MSG_PEEK), send_len);
+       EXPECT_EQ(memcmp(buf, rec1, send_len), 0);
+
+       EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, 100, buf, sizeof(buf), MSG_PEEK), send_len);
+       EXPECT_EQ(memcmp(buf, rec1, send_len), 0);
+
+       EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, 100, buf, sizeof(buf), 0), send_len);
+       EXPECT_EQ(memcmp(buf, rec1, send_len), 0);
+
+       EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, 100, buf, sizeof(buf), 0), send_len);
+       EXPECT_EQ(memcmp(buf, rec2, send_len), 0);
+}
+
+TEST_F(tls, data_control_data)
+{
+       char *rec1 = "1111";
+       char *rec2 = "2222";
+       char *rec3 = "3333";
+       int send_len = 5;
+       char buf[15];
+
+       if (self->notls)
+               SKIP(return, "no TLS support");
+
+       EXPECT_EQ(send(self->fd, rec1, send_len, 0), send_len);
+       EXPECT_EQ(tls_send_cmsg(self->fd, 100, rec2, send_len, 0), send_len);
+       EXPECT_EQ(send(self->fd, rec3, send_len, 0), send_len);
+
+       EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_PEEK), send_len);
+       EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_PEEK), send_len);
+}
+
  TEST_F(tls, shutdown)
  {
         char const *test_str = "test_read";
@@ -1874,13 +1919,13 @@ TEST_F(tls_err, poll_partial_rec_async)
                 /* Child should sleep in poll(), never get a wake */
                 pfd.fd = self->cfd2;
                 pfd.events = POLLIN;
-               EXPECT_EQ(poll(&pfd, 1, 5), 0);
+               EXPECT_EQ(poll(&pfd, 1, 20), 0);
  
                 EXPECT_EQ(write(p[1], &token, 1), 1); /* Barrier #1 */
  
                 pfd.fd = self->cfd2;
                 pfd.events = POLLIN;
-               EXPECT_EQ(poll(&pfd, 1, 5), 1);
+               EXPECT_EQ(poll(&pfd, 1, 20), 1);
  
                 exit(!_metadata->passed);
         }
diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh

index af5dc57c8ce935907fd93279077c0d326205415e..8802604148dda1c2565fdb0d5b0aaabb0cad1427 100755 (executable)
--- a/tools/testing/selftests/net/udpgro.sh
+++ b/tools/testing/selftests/net/udpgro.sh
@@ -7,7 +7,7 @@ source net_helper.sh
  
  readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)"
  
-BPF_FILE="../bpf/xdp_dummy.bpf.o"
+BPF_FILE="xdp_dummy.o"
  
  # set global exit status, but never reset nonzero one.
  check_err()
@@ -197,7 +197,7 @@ run_all() {
  }
  
  if [ ! -f ${BPF_FILE} ]; then
-       echo "Missing ${BPF_FILE}. Build bpf selftest first"
+       echo "Missing ${BPF_FILE}. Run 'make' first"
         exit -1
  fi
  
diff --git a/tools/testing/selftests/net/udpgro_bench.sh b/tools/testing/selftests/net/udpgro_bench.sh

index cb664679b4342992a16694a182c7d0b3a7e9d80b..7080eae5312b2f9fa13c41868337fd4433fb0de6 100755 (executable)
--- a/tools/testing/selftests/net/udpgro_bench.sh
+++ b/tools/testing/selftests/net/udpgro_bench.sh
@@ -7,7 +7,7 @@ source net_helper.sh
  
  readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)"
  
-BPF_FILE="../bpf/xdp_dummy.bpf.o"
+BPF_FILE="xdp_dummy.o"
  
  cleanup() {
         local -r jobs="$(jobs -p)"
@@ -84,7 +84,7 @@ run_all() {
  }
  
  if [ ! -f ${BPF_FILE} ]; then
-       echo "Missing ${BPF_FILE}. Build bpf selftest first"
+       echo "Missing ${BPF_FILE}. Run 'make' first"
         exit -1
  fi
  
diff --git a/tools/testing/selftests/net/udpgro_frglist.sh b/tools/testing/selftests/net/udpgro_frglist.sh

index dd47fa96f6b3e5ea1cf1f750a4fd55d7a0c4592b..e1ff645bd3d1c7b0b8ba177ee73ce595a91f3808 100755 (executable)
--- a/tools/testing/selftests/net/udpgro_frglist.sh
+++ b/tools/testing/selftests/net/udpgro_frglist.sh
@@ -7,7 +7,7 @@ source net_helper.sh
  
  readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)"
  
-BPF_FILE="../bpf/xdp_dummy.bpf.o"
+BPF_FILE="xdp_dummy.o"
  
  cleanup() {
         local -r jobs="$(jobs -p)"
@@ -85,12 +85,12 @@ run_all() {
  }
  
  if [ ! -f ${BPF_FILE} ]; then
-       echo "Missing ${BPF_FILE}. Build bpf selftest first"
+       echo "Missing ${BPF_FILE}. Run 'make' first"
         exit -1
  fi
  
  if [ ! -f nat6to4.o ]; then
-       echo "Missing nat6to4 helper. Build bpf nat6to4.o selftest first"
+       echo "Missing nat6to4 helper. Run 'make' first"
         exit -1
  fi
  
diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh

index c079565add39224eb99e011f941b6f0a11c1648c..9cd5e885e91f74b01007cf14bbdb9808aa04c632 100755 (executable)
--- a/tools/testing/selftests/net/udpgro_fwd.sh
+++ b/tools/testing/selftests/net/udpgro_fwd.sh
@@ -1,7 +1,9 @@
  #!/bin/bash
  # SPDX-License-Identifier: GPL-2.0
  
-BPF_FILE="../bpf/xdp_dummy.bpf.o"
+source net_helper.sh
+
+BPF_FILE="xdp_dummy.o"
  readonly BASE="ns-$(mktemp -u XXXXXX)"
  readonly SRC=2
  readonly DST=1
@@ -37,6 +39,10 @@ create_ns() {
         for ns in $NS_SRC $NS_DST; do
                 ip netns add $ns
                 ip -n $ns link set dev lo up
+
+               # disable route solicitations to decrease 'noise' traffic
+               ip netns exec $ns sysctl -qw net.ipv6.conf.default.router_solicitations=0
+               ip netns exec $ns sysctl -qw net.ipv6.conf.all.router_solicitations=0
         done
  
         ip link add name veth$SRC type veth peer name veth$DST
@@ -78,6 +84,12 @@ create_vxlan_pair() {
                 create_vxlan_endpoint $BASE$ns veth$ns $BM_NET_V6$((3 - $ns)) vxlan6$ns 6
                 ip -n $BASE$ns addr add dev vxlan6$ns $OL_NET_V6$ns/24 nodad
         done
+
+       # preload neighbur cache, do avoid some noisy traffic
+       local addr_dst=$(ip -j -n $BASE$DST link show dev vxlan6$DST  |jq -r '.[]["address"]')
+       local addr_src=$(ip -j -n $BASE$SRC link show dev vxlan6$SRC  |jq -r '.[]["address"]')
+       ip -n $BASE$DST neigh add dev vxlan6$DST lladdr $addr_src $OL_NET_V6$SRC
+       ip -n $BASE$SRC neigh add dev vxlan6$SRC lladdr $addr_dst $OL_NET_V6$DST
  }
  
  is_ipv6() {
@@ -117,9 +129,9 @@ run_test() {
         # not enable GRO
         ip netns exec $NS_DST $ipt -A INPUT -p udp --dport 4789
         ip netns exec $NS_DST $ipt -A INPUT -p udp --dport 8000
-       ip netns exec $NS_DST ./udpgso_bench_rx -C 1000 -R 10 -n 10 -l 1300 $rx_args &
+       ip netns exec $NS_DST ./udpgso_bench_rx -C 2000 -R 100 -n 10 -l 1300 $rx_args &
         local spid=$!
-       sleep 0.1
+       wait_local_port_listen "$NS_DST" 8000 udp
         ip netns exec $NS_SRC ./udpgso_bench_tx $family -M 1 -s 13000 -S 1300 -D $dst
         local retc=$?
         wait $spid
@@ -166,9 +178,9 @@ run_bench() {
         # bind the sender and the receiver to different CPUs to try
         # get reproducible results
         ip netns exec $NS_DST bash -c "echo 2 > /sys/class/net/veth$DST/queues/rx-0/rps_cpus"
-       ip netns exec $NS_DST taskset 0x2 ./udpgso_bench_rx -C 1000 -R 10  &
+       ip netns exec $NS_DST taskset 0x2 ./udpgso_bench_rx -C 2000 -R 100  &
         local spid=$!
-       sleep 0.1
+       wait_local_port_listen "$NS_DST" 8000 udp
         ip netns exec $NS_SRC taskset 0x1 ./udpgso_bench_tx $family -l 3 -S 1300 -D $dst
         local retc=$?
         wait $spid
diff --git a/tools/testing/selftests/net/udpgso_bench_rx.c b/tools/testing/selftests/net/udpgso_bench_rx.c

index f35a924d4a3030780447f2cc137f6ff373ed693c..1cbadd267c963c0c067308d3fb16493625e8f1b7 100644 (file)
--- a/tools/testing/selftests/net/udpgso_bench_rx.c
+++ b/tools/testing/selftests/net/udpgso_bench_rx.c
@@ -375,7 +375,7 @@ static void do_recv(void)
                         do_flush_udp(fd);
  
                 tnow = gettimeofday_ms();
-               if (tnow > treport) {
+               if (!cfg_expected_pkt_nr && tnow > treport) {
                         if (packets)
                                 fprintf(stderr,
                                         "%s rx: %6lu MB/s %8lu calls/s\n",
diff --git a/tools/testing/selftests/net/veth.sh b/tools/testing/selftests/net/veth.sh

index 2d073595c620210254bc372bc428b05121e9b26b..5ae85def07395b50c07600f4a31b7ff69578bb9f 100755 (executable)
--- a/tools/testing/selftests/net/veth.sh
+++ b/tools/testing/selftests/net/veth.sh
@@ -1,7 +1,7 @@
  #!/bin/sh
  # SPDX-License-Identifier: GPL-2.0
  
-BPF_FILE="../bpf/xdp_dummy.bpf.o"
+BPF_FILE="xdp_dummy.o"
  readonly STATS="$(mktemp -p /tmp ns-XXXXXX)"
  readonly BASE=`basename $STATS`
  readonly SRC=2
@@ -218,7 +218,7 @@ while getopts "hs:" option; do
  done
  
  if [ ! -f ${BPF_FILE} ]; then
-       echo "Missing ${BPF_FILE}. Build bpf selftest first"
+       echo "Missing ${BPF_FILE}. Run 'make' first"
         exit 1
  fi
  
@@ -246,6 +246,20 @@ ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on
  chk_gro "        - aggregation with TSO off" 1
  cleanup
  
+create_ns
+ip -n $NS_DST link set dev veth$DST up
+ip -n $NS_DST link set dev veth$DST xdp object ${BPF_FILE} section xdp
+chk_gro_flag "gro vs xdp while down - gro flag on" $DST on
+ip -n $NS_DST link set dev veth$DST down
+chk_gro_flag "                      - after down" $DST on
+ip -n $NS_DST link set dev veth$DST xdp off
+chk_gro_flag "                      - after xdp off" $DST off
+ip -n $NS_DST link set dev veth$DST up
+chk_gro_flag "                      - after up" $DST off
+ip -n $NS_SRC link set dev veth$SRC xdp object ${BPF_FILE} section xdp
+chk_gro_flag "                      - after peer xdp" $DST off
+cleanup
+
  create_ns
  chk_channels "default channels" $DST 1 1
  
diff --git a/tools/testing/selftests/net/xdp_dummy.c b/tools/testing/selftests/net/xdp_dummy.c

new file mode 100644 (file)

index 0000000..d988b2e
--- /dev/null
+++ b/tools/testing/selftests/net/xdp_dummy.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define KBUILD_MODNAME "xdp_dummy"
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+SEC("xdp")
+int xdp_dummy_prog(struct xdp_md *ctx)
+{
+       return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile

index db27153eb4a02c1db3f0f9dc55445558fbb5d5ea..936c3085bb8373ea74036a6870cb67f5b103f0ae 100644 (file)
--- a/tools/testing/selftests/netfilter/Makefile
+++ b/tools/testing/selftests/netfilter/Makefile
@@ -7,7 +7,8 @@ TEST_PROGS := nft_trans_stress.sh nft_fib.sh nft_nat.sh bridge_brouter.sh \
         nft_queue.sh nft_meta.sh nf_nat_edemux.sh \
         ipip-conntrack-mtu.sh conntrack_tcp_unreplied.sh \
         conntrack_vrf.sh nft_synproxy.sh rpath.sh nft_audit.sh \
-       conntrack_sctp_collision.sh xt_string.sh
+       conntrack_sctp_collision.sh xt_string.sh \
+       bridge_netfilter.sh
  
  HOSTPKG_CONFIG := pkg-config
  
diff --git a/tools/testing/selftests/netfilter/bridge_netfilter.sh b/tools/testing/selftests/netfilter/bridge_netfilter.sh

new file mode 100644 (file)

index 0000000..659b3ab
--- /dev/null
+++ b/tools/testing/selftests/netfilter/bridge_netfilter.sh
@@ -0,0 +1,188 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test bridge netfilter + conntrack, a combination that doesn't really work,
+# with multicast/broadcast packets racing for hash table insertion.
+
+#           eth0    br0     eth0
+# setup is: ns1 <->,ns0 <-> ns3
+#           ns2 <-'    `'-> ns4
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+ret=0
+
+sfx=$(mktemp -u "XXXXXXXX")
+ns0="ns0-$sfx"
+ns1="ns1-$sfx"
+ns2="ns2-$sfx"
+ns3="ns3-$sfx"
+ns4="ns4-$sfx"
+
+ebtables -V > /dev/null 2>&1
+if [ $? -ne 0 ];then
+       echo "SKIP: Could not run test without ebtables"
+       exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+       echo "SKIP: Could not run test without ip tool"
+       exit $ksft_skip
+fi
+
+for i in $(seq 0 4); do
+  eval ip netns add \$ns$i
+done
+
+cleanup() {
+  for i in $(seq 0 4); do eval ip netns del \$ns$i;done
+}
+
+trap cleanup EXIT
+
+do_ping()
+{
+       fromns="$1"
+       dstip="$2"
+
+       ip netns exec $fromns ping -c 1 -q $dstip > /dev/null
+       if [ $? -ne 0 ]; then
+               echo "ERROR: ping from $fromns to $dstip"
+               ip netns exec ${ns0} nft list ruleset
+               ret=1
+       fi
+}
+
+bcast_ping()
+{
+       fromns="$1"
+       dstip="$2"
+
+       for i in $(seq 1 1000); do
+               ip netns exec $fromns ping -q -f -b -c 1 -q $dstip > /dev/null 2>&1
+               if [ $? -ne 0 ]; then
+                       echo "ERROR: ping -b from $fromns to $dstip"
+                       ip netns exec ${ns0} nft list ruleset
+                       fi
+       done
+}
+
+ip link add veth1 netns ${ns0} type veth peer name eth0 netns ${ns1}
+if [ $? -ne 0 ]; then
+       echo "SKIP: Can't create veth device"
+       exit $ksft_skip
+fi
+
+ip link add veth2 netns ${ns0} type veth peer name eth0 netns $ns2
+ip link add veth3 netns ${ns0} type veth peer name eth0 netns $ns3
+ip link add veth4 netns ${ns0} type veth peer name eth0 netns $ns4
+
+ip -net ${ns0} link set lo up
+
+for i in $(seq 1 4); do
+  ip -net ${ns0} link set veth$i up
+done
+
+ip -net ${ns0} link add br0 type bridge stp_state 0 forward_delay 0 nf_call_iptables 1 nf_call_ip6tables 1 nf_call_arptables 1
+if [ $? -ne 0 ]; then
+       echo "SKIP: Can't create bridge br0"
+       exit $ksft_skip
+fi
+
+# make veth0,1,2 part of bridge.
+for i in $(seq 1 3); do
+  ip -net ${ns0} link set veth$i master br0
+done
+
+# add a macvlan on top of the bridge.
+MACVLAN_ADDR=ba:f3:13:37:42:23
+ip -net ${ns0} link add link br0 name macvlan0 type macvlan mode private
+ip -net ${ns0} link set macvlan0 address ${MACVLAN_ADDR}
+ip -net ${ns0} link set macvlan0 up
+ip -net ${ns0} addr add 10.23.0.1/24 dev macvlan0
+
+# add a macvlan on top of veth4.
+MACVLAN_ADDR=ba:f3:13:37:42:24
+ip -net ${ns0} link add link veth4 name macvlan4 type macvlan mode vepa
+ip -net ${ns0} link set macvlan4 address ${MACVLAN_ADDR}
+ip -net ${ns0} link set macvlan4 up
+
+# make the macvlan part of the bridge.
+# veth4 is not a bridge port, only the macvlan on top of it.
+ip -net ${ns0} link set macvlan4 master br0
+
+ip -net ${ns0} link set br0 up
+ip -net ${ns0} addr add 10.0.0.1/24 dev br0
+ip netns exec ${ns0} sysctl -q net.bridge.bridge-nf-call-iptables=1
+ret=$?
+if [ $ret -ne 0 ] ; then
+       echo "SKIP: bridge netfilter not available"
+       ret=$ksft_skip
+fi
+
+# for testing, so namespaces will reply to ping -b probes.
+ip netns exec ${ns0} sysctl -q net.ipv4.icmp_echo_ignore_broadcasts=0
+
+# enable conntrack in ns0 and drop broadcast packets in forward to
+# avoid them from getting confirmed in the postrouting hook before
+# the cloned skb is passed up the stack.
+ip netns exec ${ns0} nft -f - <<EOF
+table ip filter {
+       chain input {
+               type filter hook input priority 1; policy accept
+               iifname br0 counter
+               ct state new accept
+       }
+}
+
+table bridge filter {
+       chain forward {
+               type filter hook forward priority 0; policy accept
+               meta pkttype broadcast ip protocol icmp counter drop
+       }
+}
+EOF
+
+# place 1, 2 & 3 in same subnet, connected via ns0:br0.
+# ns4 is placed in same subnet as well, but its not
+# part of the bridge: the corresponding veth4 is not
+# part of the bridge, only its macvlan interface.
+for i in $(seq 1 4); do
+  eval ip -net \$ns$i link set lo up
+  eval ip -net \$ns$i link set eth0 up
+done
+for i in $(seq 1 2); do
+  eval ip -net \$ns$i addr add 10.0.0.1$i/24 dev eth0
+done
+
+ip -net ${ns3} addr add 10.23.0.13/24 dev eth0
+ip -net ${ns4} addr add 10.23.0.14/24 dev eth0
+
+# test basic connectivity
+do_ping ${ns1} 10.0.0.12
+do_ping ${ns3} 10.23.0.1
+do_ping ${ns4} 10.23.0.1
+
+if [ $ret -eq 0 ];then
+       echo "PASS: netns connectivity: ns1 can reach ns2, ns3 and ns4 can reach ns0"
+fi
+
+bcast_ping ${ns1} 10.0.0.255
+
+# This should deliver broadcast to macvlan0, which is on top of ns0:br0.
+bcast_ping ${ns3} 10.23.0.255
+
+# same, this time via veth4:macvlan4.
+bcast_ping ${ns4} 10.23.0.255
+
+read t < /proc/sys/kernel/tainted
+
+if [ $t -eq 0 ];then
+       echo PASS: kernel not tainted
+else
+       echo ERROR: kernel is tainted
+       ret=1
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/netfilter/conntrack_dump_flush.c b/tools/testing/selftests/netfilter/conntrack_dump_flush.c

index f18c6db13bbff402202f6bd4796b4581803e2d73..b11ea8ee67194604de4a7dcbda7539dbffe7b7a1 100644 (file)
--- a/tools/testing/selftests/netfilter/conntrack_dump_flush.c
+++ b/tools/testing/selftests/netfilter/conntrack_dump_flush.c
@@ -13,7 +13,7 @@
  #include "../kselftest_harness.h"
  
  #define TEST_ZONE_ID 123
-#define CTA_FILTER_F_CTA_TUPLE_ZONE (1 << 2)
+#define NF_CT_DEFAULT_ZONE_ID 0
  
  static int reply_counter;
  
@@ -336,6 +336,9 @@ FIXTURE_SETUP(conntrack_dump_flush)
         ret = conntrack_data_generate_v4(self->sock, 0xf4f4f4f4, 0xf5f5f5f5,
                                          TEST_ZONE_ID + 2);
         EXPECT_EQ(ret, 0);
+       ret = conntrack_data_generate_v4(self->sock, 0xf6f6f6f6, 0xf7f7f7f7,
+                                        NF_CT_DEFAULT_ZONE_ID);
+       EXPECT_EQ(ret, 0);
  
         src = (struct in6_addr) {{
                 .__u6_addr32 = {
@@ -395,6 +398,26 @@ FIXTURE_SETUP(conntrack_dump_flush)
                                          TEST_ZONE_ID + 2);
         EXPECT_EQ(ret, 0);
  
+       src = (struct in6_addr) {{
+               .__u6_addr32 = {
+                       0xb80d0120,
+                       0x00000000,
+                       0x00000000,
+                       0x07000000
+               }
+       }};
+       dst = (struct in6_addr) {{
+               .__u6_addr32 = {
+                       0xb80d0120,
+                       0x00000000,
+                       0x00000000,
+                       0x08000000
+               }
+       }};
+       ret = conntrack_data_generate_v6(self->sock, src, dst,
+                                        NF_CT_DEFAULT_ZONE_ID);
+       EXPECT_EQ(ret, 0);
+
         ret = conntracK_count_zone(self->sock, TEST_ZONE_ID);
         EXPECT_GE(ret, 2);
         if (ret > 2)
@@ -425,6 +448,24 @@ TEST_F(conntrack_dump_flush, test_flush_by_zone)
         EXPECT_EQ(ret, 2);
         ret = conntracK_count_zone(self->sock, TEST_ZONE_ID + 2);
         EXPECT_EQ(ret, 2);
+       ret = conntracK_count_zone(self->sock, NF_CT_DEFAULT_ZONE_ID);
+       EXPECT_EQ(ret, 2);
+}
+
+TEST_F(conntrack_dump_flush, test_flush_by_zone_default)
+{
+       int ret;
+
+       ret = conntrack_flush_zone(self->sock, NF_CT_DEFAULT_ZONE_ID);
+       EXPECT_EQ(ret, 0);
+       ret = conntracK_count_zone(self->sock, TEST_ZONE_ID);
+       EXPECT_EQ(ret, 2);
+       ret = conntracK_count_zone(self->sock, TEST_ZONE_ID + 1);
+       EXPECT_EQ(ret, 2);
+       ret = conntracK_count_zone(self->sock, TEST_ZONE_ID + 2);
+       EXPECT_EQ(ret, 2);
+       ret = conntracK_count_zone(self->sock, NF_CT_DEFAULT_ZONE_ID);
+       EXPECT_EQ(ret, 0);
  }
  
  TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/powerpc/math/fpu_signal.c b/tools/testing/selftests/powerpc/math/fpu_signal.c

index 7b1addd504209fadb59f908a843b23a9d0218f3f..8a64f63e37ce215e4aeff2675b7114704075ae48 100644 (file)
--- a/tools/testing/selftests/powerpc/math/fpu_signal.c
+++ b/tools/testing/selftests/powerpc/math/fpu_signal.c
@@ -18,6 +18,7 @@
  #include <pthread.h>
  
  #include "utils.h"
+#include "fpu.h"
  
  /* Number of times each thread should receive the signal */
  #define ITERATIONS 10
@@ -27,9 +28,7 @@
   */
  #define THREAD_FACTOR 8
  
-__thread double darray[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0,
-                    1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0,
-                    2.1};
+__thread double darray[32];
  
  bool bad_context;
  int threads_starting;
@@ -43,9 +42,9 @@ void signal_fpu_sig(int sig, siginfo_t *info, void *context)
         ucontext_t *uc = context;
         mcontext_t *mc = &uc->uc_mcontext;
  
-       /* Only the non volatiles were loaded up */
-       for (i = 14; i < 32; i++) {
-               if (mc->fp_regs[i] != darray[i - 14]) {
+       // Don't check f30/f31, they're used as scratches in check_all_fprs()
+       for (i = 0; i < 30; i++) {
+               if (mc->fp_regs[i] != darray[i]) {
                         bad_context = true;
                         break;
                 }
@@ -54,7 +53,6 @@ void signal_fpu_sig(int sig, siginfo_t *info, void *context)
  
  void *signal_fpu_c(void *p)
  {
-       int i;
         long rc;
         struct sigaction act;
         act.sa_sigaction = signal_fpu_sig;
@@ -64,9 +62,7 @@ void *signal_fpu_c(void *p)
                 return p;
  
         srand(pthread_self());
-       for (i = 0; i < 21; i++)
-               darray[i] = rand();
-
+       randomise_darray(darray, ARRAY_SIZE(darray));
         rc = preempt_fpu(darray, &threads_starting, &running);
  
         return (void *) rc;
diff --git a/tools/testing/selftests/powerpc/papr_vpd/papr_vpd.c b/tools/testing/selftests/powerpc/papr_vpd/papr_vpd.c

index 98cbb9109ee6e8e1e6047c1640c8cb0f4a2b5625..505294da1b9fb5e7bd07aac4a119164900c8f2e6 100644 (file)
--- a/tools/testing/selftests/powerpc/papr_vpd/papr_vpd.c
+++ b/tools/testing/selftests/powerpc/papr_vpd/papr_vpd.c
@@ -263,10 +263,10 @@ static int papr_vpd_system_loc_code(void)
         off_t size;
         int fd;
  
-       SKIP_IF_MSG(get_system_loc_code(&lc),
-                   "Cannot determine system location code");
         SKIP_IF_MSG(devfd < 0 && errno == ENOENT,
                     DEVPATH " not present");
+       SKIP_IF_MSG(get_system_loc_code(&lc),
+                   "Cannot determine system location code");
  
         FAIL_IF(devfd < 0);
  
diff --git a/tools/testing/selftests/rseq/basic_percpu_ops_test.c b/tools/testing/selftests/rseq/basic_percpu_ops_test.c

index 88754296196870a5d0ef3afb52373c8d40cbc598..2348d2c20d0a1aaf3a05a1c7005983f442708b3c 100644 (file)
--- a/tools/testing/selftests/rseq/basic_percpu_ops_test.c
+++ b/tools/testing/selftests/rseq/basic_percpu_ops_test.c
@@ -24,6 +24,11 @@ bool rseq_validate_cpu_id(void)
  {
         return rseq_mm_cid_available();
  }
+static
+bool rseq_use_cpu_index(void)
+{
+       return false;   /* Use mm_cid */
+}
  #else
  # define RSEQ_PERCPU   RSEQ_PERCPU_CPU_ID
  static
@@ -36,6 +41,11 @@ bool rseq_validate_cpu_id(void)
  {
         return rseq_current_cpu_raw() >= 0;
  }
+static
+bool rseq_use_cpu_index(void)
+{
+       return true;    /* Use cpu_id as index. */
+}
  #endif
  
  struct percpu_lock_entry {
@@ -274,7 +284,7 @@ void test_percpu_list(void)
         /* Generate list entries for every usable cpu. */
         sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
         for (i = 0; i < CPU_SETSIZE; i++) {
-               if (!CPU_ISSET(i, &allowed_cpus))
+               if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
                         continue;
                 for (j = 1; j <= 100; j++) {
                         struct percpu_list_node *node;
@@ -299,7 +309,7 @@ void test_percpu_list(void)
         for (i = 0; i < CPU_SETSIZE; i++) {
                 struct percpu_list_node *node;
  
-               if (!CPU_ISSET(i, &allowed_cpus))
+               if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
                         continue;
  
                 while ((node = __percpu_list_pop(&list, i))) {
diff --git a/tools/testing/selftests/rseq/param_test.c b/tools/testing/selftests/rseq/param_test.c

index 20403d58345cd523186b9423750ea7ad669cdd96..2f37961240caa7cc43f142fac32fd7f9c9c211d4 100644 (file)
--- a/tools/testing/selftests/rseq/param_test.c
+++ b/tools/testing/selftests/rseq/param_test.c
@@ -288,6 +288,11 @@ bool rseq_validate_cpu_id(void)
  {
         return rseq_mm_cid_available();
  }
+static
+bool rseq_use_cpu_index(void)
+{
+       return false;   /* Use mm_cid */
+}
  # ifdef TEST_MEMBARRIER
  /*
   * Membarrier does not currently support targeting a mm_cid, so
@@ -312,6 +317,11 @@ bool rseq_validate_cpu_id(void)
  {
         return rseq_current_cpu_raw() >= 0;
  }
+static
+bool rseq_use_cpu_index(void)
+{
+       return true;    /* Use cpu_id as index. */
+}
  # ifdef TEST_MEMBARRIER
  static
  int rseq_membarrier_expedited(int cpu)
@@ -715,7 +725,7 @@ void test_percpu_list(void)
         /* Generate list entries for every usable cpu. */
         sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
         for (i = 0; i < CPU_SETSIZE; i++) {
-               if (!CPU_ISSET(i, &allowed_cpus))
+               if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
                         continue;
                 for (j = 1; j <= 100; j++) {
                         struct percpu_list_node *node;
@@ -752,7 +762,7 @@ void test_percpu_list(void)
         for (i = 0; i < CPU_SETSIZE; i++) {
                 struct percpu_list_node *node;
  
-               if (!CPU_ISSET(i, &allowed_cpus))
+               if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
                         continue;
  
                 while ((node = __percpu_list_pop(&list, i))) {
@@ -902,7 +912,7 @@ void test_percpu_buffer(void)
         /* Generate list entries for every usable cpu. */
         sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
         for (i = 0; i < CPU_SETSIZE; i++) {
-               if (!CPU_ISSET(i, &allowed_cpus))
+               if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
                         continue;
                 /* Worse-case is every item in same CPU. */
                 buffer.c[i].array =
@@ -952,7 +962,7 @@ void test_percpu_buffer(void)
         for (i = 0; i < CPU_SETSIZE; i++) {
                 struct percpu_buffer_node *node;
  
-               if (!CPU_ISSET(i, &allowed_cpus))
+               if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
                         continue;
  
                 while ((node = __percpu_buffer_pop(&buffer, i))) {
@@ -1113,7 +1123,7 @@ void test_percpu_memcpy_buffer(void)
         /* Generate list entries for every usable cpu. */
         sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
         for (i = 0; i < CPU_SETSIZE; i++) {
-               if (!CPU_ISSET(i, &allowed_cpus))
+               if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
                         continue;
                 /* Worse-case is every item in same CPU. */
                 buffer.c[i].array =
@@ -1160,7 +1170,7 @@ void test_percpu_memcpy_buffer(void)
         for (i = 0; i < CPU_SETSIZE; i++) {
                 struct percpu_memcpy_buffer_node item;
  
-               if (!CPU_ISSET(i, &allowed_cpus))
+               if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
                         continue;
  
                 while (__percpu_memcpy_buffer_pop(&buffer, &item, i)) {
diff --git a/tools/testing/selftests/seccomp/seccomp_benchmark.c b/tools/testing/selftests/seccomp/seccomp_benchmark.c

index 5b5c9d558dee07bc1f7afd7df280e1189858451e..97b86980b768f4fa09da58f16d71ba42f42d2c8d 100644 (file)
--- a/tools/testing/selftests/seccomp/seccomp_benchmark.c
+++ b/tools/testing/selftests/seccomp/seccomp_benchmark.c
@@ -38,10 +38,10 @@ unsigned long long timing(clockid_t clk_id, unsigned long long samples)
         i *= 1000000000ULL;
         i += finish.tv_nsec - start.tv_nsec;
  
-       printf("%lu.%09lu - %lu.%09lu = %llu (%.1fs)\n",
-               finish.tv_sec, finish.tv_nsec,
-               start.tv_sec, start.tv_nsec,
-               i, (double)i / 1000000000.0);
+       ksft_print_msg("%lu.%09lu - %lu.%09lu = %llu (%.1fs)\n",
+                      finish.tv_sec, finish.tv_nsec,
+                      start.tv_sec, start.tv_nsec,
+                      i, (double)i / 1000000000.0);
  
         return i;
  }
@@ -53,7 +53,7 @@ unsigned long long calibrate(void)
         pid_t pid, ret;
         int seconds = 15;
  
-       printf("Calibrating sample size for %d seconds worth of syscalls ...\n", seconds);
+       ksft_print_msg("Calibrating sample size for %d seconds worth of syscalls ...\n", seconds);
  
         samples = 0;
         pid = getpid();
@@ -98,24 +98,36 @@ bool le(int i_one, int i_two)
  }
  
  long compare(const char *name_one, const char *name_eval, const char *name_two,
-            unsigned long long one, bool (*eval)(int, int), unsigned long long two)
+            unsigned long long one, bool (*eval)(int, int), unsigned long long two,
+            bool skip)
  {
         bool good;
  
-       printf("\t%s %s %s (%lld %s %lld): ", name_one, name_eval, name_two,
-              (long long)one, name_eval, (long long)two);
+       if (skip) {
+               ksft_test_result_skip("%s %s %s\n", name_one, name_eval,
+                                     name_two);
+               return 0;
+       }
+
+       ksft_print_msg("\t%s %s %s (%lld %s %lld): ", name_one, name_eval, name_two,
+                      (long long)one, name_eval, (long long)two);
         if (one > INT_MAX) {
-               printf("Miscalculation! Measurement went negative: %lld\n", (long long)one);
-               return 1;
+               ksft_print_msg("Miscalculation! Measurement went negative: %lld\n", (long long)one);
+               good = false;
+               goto out;
         }
         if (two > INT_MAX) {
-               printf("Miscalculation! Measurement went negative: %lld\n", (long long)two);
-               return 1;
+               ksft_print_msg("Miscalculation! Measurement went negative: %lld\n", (long long)two);
+               good = false;
+               goto out;
         }
  
         good = eval(one, two);
         printf("%s\n", good ? "✔️" : "❌");
  
+out:
+       ksft_test_result(good, "%s %s %s\n", name_one, name_eval, name_two);
+
         return good ? 0 : 1;
  }
  
@@ -142,15 +154,22 @@ int main(int argc, char *argv[])
         unsigned long long samples, calc;
         unsigned long long native, filter1, filter2, bitmap1, bitmap2;
         unsigned long long entry, per_filter1, per_filter2;
+       bool skip = false;
  
         setbuf(stdout, NULL);
  
-       printf("Running on:\n");
+       ksft_print_header();
+       ksft_set_plan(7);
+
+       ksft_print_msg("Running on:\n");
+       ksft_print_msg("");
         system("uname -a");
  
-       printf("Current BPF sysctl settings:\n");
+       ksft_print_msg("Current BPF sysctl settings:\n");
         /* Avoid using "sysctl" which may not be installed. */
+       ksft_print_msg("");
         system("grep -H . /proc/sys/net/core/bpf_jit_enable");
+       ksft_print_msg("");
         system("grep -H . /proc/sys/net/core/bpf_jit_harden");
  
         if (argc > 1)
@@ -158,11 +177,11 @@ int main(int argc, char *argv[])
         else
                 samples = calibrate();
  
-       printf("Benchmarking %llu syscalls...\n", samples);
+       ksft_print_msg("Benchmarking %llu syscalls...\n", samples);
  
         /* Native call */
         native = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples;
-       printf("getpid native: %llu ns\n", native);
+       ksft_print_msg("getpid native: %llu ns\n", native);
  
         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
         assert(ret == 0);
@@ -172,35 +191,37 @@ int main(int argc, char *argv[])
         assert(ret == 0);
  
         bitmap1 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples;
-       printf("getpid RET_ALLOW 1 filter (bitmap): %llu ns\n", bitmap1);
+       ksft_print_msg("getpid RET_ALLOW 1 filter (bitmap): %llu ns\n", bitmap1);
  
         /* Second filter resulting in a bitmap */
         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &bitmap_prog);
         assert(ret == 0);
  
         bitmap2 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples;
-       printf("getpid RET_ALLOW 2 filters (bitmap): %llu ns\n", bitmap2);
+       ksft_print_msg("getpid RET_ALLOW 2 filters (bitmap): %llu ns\n", bitmap2);
  
         /* Third filter, can no longer be converted to bitmap */
         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
         assert(ret == 0);
  
         filter1 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples;
-       printf("getpid RET_ALLOW 3 filters (full): %llu ns\n", filter1);
+       ksft_print_msg("getpid RET_ALLOW 3 filters (full): %llu ns\n", filter1);
  
         /* Fourth filter, can not be converted to bitmap because of filter 3 */
         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &bitmap_prog);
         assert(ret == 0);
  
         filter2 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples;
-       printf("getpid RET_ALLOW 4 filters (full): %llu ns\n", filter2);
+       ksft_print_msg("getpid RET_ALLOW 4 filters (full): %llu ns\n", filter2);
  
         /* Estimations */
  #define ESTIMATE(fmt, var, what)       do {                    \
                 var = (what);                                   \
-               printf("Estimated " fmt ": %llu ns\n", var);    \
-               if (var > INT_MAX)                              \
-                       goto more_samples;                      \
+               ksft_print_msg("Estimated " fmt ": %llu ns\n", var);    \
+               if (var > INT_MAX) {                            \
+                       skip = true;                            \
+                       ret |= 1;                               \
+               }                                               \
         } while (0)
  
         ESTIMATE("total seccomp overhead for 1 bitmapped filter", calc,
@@ -218,31 +239,34 @@ int main(int argc, char *argv[])
         ESTIMATE("seccomp per-filter overhead (filters / 4)", per_filter2,
                  (filter2 - native - entry) / 4);
  
-       printf("Expectations:\n");
-       ret |= compare("native", "≤", "1 bitmap", native, le, bitmap1);
-       bits = compare("native", "≤", "1 filter", native, le, filter1);
+       ksft_print_msg("Expectations:\n");
+       ret |= compare("native", "≤", "1 bitmap", native, le, bitmap1,
+                      skip);
+       bits = compare("native", "≤", "1 filter", native, le, filter1,
+                      skip);
         if (bits)
-               goto more_samples;
+               skip = true;
  
         ret |= compare("per-filter (last 2 diff)", "≈", "per-filter (filters / 4)",
-                       per_filter1, approx, per_filter2);
+                      per_filter1, approx, per_filter2, skip);
  
         bits = compare("1 bitmapped", "≈", "2 bitmapped",
-                       bitmap1 - native, approx, bitmap2 - native);
+                      bitmap1 - native, approx, bitmap2 - native, skip);
         if (bits) {
-               printf("Skipping constant action bitmap expectations: they appear unsupported.\n");
-               goto out;
+               ksft_print_msg("Skipping constant action bitmap expectations: they appear unsupported.\n");
+               skip = true;
         }
  
-       ret |= compare("entry", "≈", "1 bitmapped", entry, approx, bitmap1 - native);
-       ret |= compare("entry", "≈", "2 bitmapped", entry, approx, bitmap2 - native);
+       ret |= compare("entry", "≈", "1 bitmapped", entry, approx,
+                      bitmap1 - native, skip);
+       ret |= compare("entry", "≈", "2 bitmapped", entry, approx,
+                      bitmap2 - native, skip);
         ret |= compare("native + entry + (per filter * 4)", "≈", "4 filters total",
-                       entry + (per_filter1 * 4) + native, approx, filter2);
-       if (ret == 0)
-               goto out;
+                      entry + (per_filter1 * 4) + native, approx, filter2,
+                      skip);
  
-more_samples:
-       printf("Saw unexpected benchmark result. Try running again with more samples?\n");
-out:
-       return 0;
+       if (ret)
+               ksft_print_msg("Saw unexpected benchmark result. Try running again with more samples?\n");
+
+       ksft_finished();
  }
diff --git a/tools/tracing/rtla/Makefile b/tools/tracing/rtla/Makefile

index 2456a399eb9ae1ce2a3c90c10ce1403dd23f62d1..afd18c678ff5a584a92e13ae8ee30bcba90f21ca 100644 (file)
--- a/tools/tracing/rtla/Makefile
+++ b/tools/tracing/rtla/Makefile
@@ -28,10 +28,15 @@ FOPTS       :=      -flto=auto -ffat-lto-objects -fexceptions -fstack-protector-strong \
                 -fasynchronous-unwind-tables -fstack-clash-protection
  WOPTS  :=      -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -Wno-maybe-uninitialized
  
+ifeq ($(CC),clang)
+  FOPTS := $(filter-out -ffat-lto-objects, $(FOPTS))
+  WOPTS := $(filter-out -Wno-maybe-uninitialized, $(WOPTS))
+endif
+
  TRACEFS_HEADERS        := $$($(PKG_CONFIG) --cflags libtracefs)
  
  CFLAGS :=      -O -g -DVERSION=\"$(VERSION)\" $(FOPTS) $(MOPTS) $(WOPTS) $(TRACEFS_HEADERS) $(EXTRA_CFLAGS)
-LDFLAGS        :=      -ggdb $(EXTRA_LDFLAGS)
+LDFLAGS        :=      -flto=auto -ggdb $(EXTRA_LDFLAGS)
  LIBS   :=      $$($(PKG_CONFIG) --libs libtracefs)
  
  SRC    :=      $(wildcard src/*.c)
diff --git a/tools/tracing/rtla/src/osnoise_hist.c b/tools/tracing/rtla/src/osnoise_hist.c

index 8f81fa007364890dd4303b047c484786a56390f2..01870d50942a19a242f6444c7b11f019b460ec4e 100644 (file)
--- a/tools/tracing/rtla/src/osnoise_hist.c
+++ b/tools/tracing/rtla/src/osnoise_hist.c
@@ -135,8 +135,7 @@ static void osnoise_hist_update_multiple(struct osnoise_tool *tool, int cpu,
         if (params->output_divisor)
                 duration = duration / params->output_divisor;
  
-       if (data->bucket_size)
-               bucket = duration / data->bucket_size;
+       bucket = duration / data->bucket_size;
  
         total_duration = duration * count;
  
@@ -480,7 +479,11 @@ static void osnoise_hist_usage(char *usage)
  
         for (i = 0; msg[i]; i++)
                 fprintf(stderr, "%s\n", msg[i]);
-       exit(1);
+
+       if (usage)
+               exit(EXIT_FAILURE);
+
+       exit(EXIT_SUCCESS);
  }
  
  /*
diff --git a/tools/tracing/rtla/src/osnoise_top.c b/tools/tracing/rtla/src/osnoise_top.c

index f7c959be8677799788eda3e577246cd3d2ec444a..457360db07673191fbc034c9fdb3193b91c4499c 100644 (file)
--- a/tools/tracing/rtla/src/osnoise_top.c
+++ b/tools/tracing/rtla/src/osnoise_top.c
@@ -331,7 +331,11 @@ static void osnoise_top_usage(struct osnoise_top_params *params, char *usage)
  
         for (i = 0; msg[i]; i++)
                 fprintf(stderr, "%s\n", msg[i]);
-       exit(1);
+
+       if (usage)
+               exit(EXIT_FAILURE);
+
+       exit(EXIT_SUCCESS);
  }
  
  /*
diff --git a/tools/tracing/rtla/src/timerlat_hist.c b/tools/tracing/rtla/src/timerlat_hist.c

index 47d3d8b53cb2177fe7db4c39a21e630aca22fa23..dbf154082f958c146bed6537dc527f83e57993d4 100644 (file)
--- a/tools/tracing/rtla/src/timerlat_hist.c
+++ b/tools/tracing/rtla/src/timerlat_hist.c
@@ -178,8 +178,7 @@ timerlat_hist_update(struct osnoise_tool *tool, int cpu,
         if (params->output_divisor)
                 latency = latency / params->output_divisor;
  
-       if (data->bucket_size)
-               bucket = latency / data->bucket_size;
+       bucket = latency / data->bucket_size;
  
         if (!context) {
                 hist = data->hist[cpu].irq;
@@ -546,7 +545,11 @@ static void timerlat_hist_usage(char *usage)
  
         for (i = 0; msg[i]; i++)
                 fprintf(stderr, "%s\n", msg[i]);
-       exit(1);
+
+       if (usage)
+               exit(EXIT_FAILURE);
+
+       exit(EXIT_SUCCESS);
  }
  
  /*
diff --git a/tools/tracing/rtla/src/timerlat_top.c b/tools/tracing/rtla/src/timerlat_top.c

index 1640f121baca50d99b94621309522d3fb824bc33..3e9af2c3868880197dc3075b74d94a15bea07d38 100644 (file)
--- a/tools/tracing/rtla/src/timerlat_top.c
+++ b/tools/tracing/rtla/src/timerlat_top.c
@@ -375,7 +375,11 @@ static void timerlat_top_usage(char *usage)
  
         for (i = 0; msg[i]; i++)
                 fprintf(stderr, "%s\n", msg[i]);
-       exit(1);
+
+       if (usage)
+               exit(EXIT_FAILURE);
+
+       exit(EXIT_SUCCESS);
  }
  
  /*
diff --git a/tools/tracing/rtla/src/utils.c b/tools/tracing/rtla/src/utils.c

index c769d7b3842c0967e85f7dc1d8c6c705edd1f2dd..9ac71a66840c1bec2e944f3a9db0f427f3c7edfb 100644 (file)
--- a/tools/tracing/rtla/src/utils.c
+++ b/tools/tracing/rtla/src/utils.c
@@ -238,12 +238,6 @@ static inline int sched_setattr(pid_t pid, const struct sched_attr *attr,
         return syscall(__NR_sched_setattr, pid, attr, flags);
  }
  
-static inline int sched_getattr(pid_t pid, struct sched_attr *attr,
-                               unsigned int size, unsigned int flags)
-{
-       return syscall(__NR_sched_getattr, pid, attr, size, flags);
-}
-
  int __set_sched_attr(int pid, struct sched_attr *attr)
  {
         int flags = 0;
@@ -479,13 +473,13 @@ int parse_prio(char *arg, struct sched_attr *sched_param)
                 if (prio == INVALID_VAL)
                         return -1;
  
-               if (prio < sched_get_priority_min(SCHED_OTHER))
+               if (prio < MIN_NICE)
                         return -1;
-               if (prio > sched_get_priority_max(SCHED_OTHER))
+               if (prio > MAX_NICE)
                         return -1;
  
                 sched_param->sched_policy   = SCHED_OTHER;
-               sched_param->sched_priority = prio;
+               sched_param->sched_nice = prio;
                 break;
         default:
                 return -1;
@@ -536,7 +530,7 @@ int set_cpu_dma_latency(int32_t latency)
   */
  static const int find_mount(const char *fs, char *mp, int sizeof_mp)
  {
-       char mount_point[MAX_PATH];
+       char mount_point[MAX_PATH+1];
         char type[100];
         int found = 0;
         FILE *fp;
diff --git a/tools/tracing/rtla/src/utils.h b/tools/tracing/rtla/src/utils.h

index 04ed1e650495a357daabfe40653c1eab5e89b005..d44513e6c66a01a5fc75f472dcfb9ebbfbd57bfb 100644 (file)
--- a/tools/tracing/rtla/src/utils.h
+++ b/tools/tracing/rtla/src/utils.h
@@ -9,6 +9,8 @@
   */
  #define BUFF_U64_STR_SIZE      24
  #define MAX_PATH               1024
+#define MAX_NICE               20
+#define MIN_NICE               -19
  
  #define container_of(ptr, type, member)({                      \
         const typeof(((type *)0)->member) *__mptr = (ptr);      \
diff --git a/tools/verification/rv/Makefile b/tools/verification/rv/Makefile

index 3d0f3888a58c66816fca24b5a9d7ab2106f0e992..485f8aeddbe033f227faf32139630d2a616cbd66 100644 (file)
--- a/tools/verification/rv/Makefile
+++ b/tools/verification/rv/Makefile
@@ -28,10 +28,15 @@ FOPTS       :=      -flto=auto -ffat-lto-objects -fexceptions -fstack-protector-strong \
                 -fasynchronous-unwind-tables -fstack-clash-protection
  WOPTS  :=      -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -Wno-maybe-uninitialized
  
+ifeq ($(CC),clang)
+  FOPTS := $(filter-out -ffat-lto-objects, $(FOPTS))
+  WOPTS := $(filter-out -Wno-maybe-uninitialized, $(WOPTS))
+endif
+
  TRACEFS_HEADERS        := $$($(PKG_CONFIG) --cflags libtracefs)
  
  CFLAGS :=      -O -g -DVERSION=\"$(VERSION)\" $(FOPTS) $(MOPTS) $(WOPTS) $(TRACEFS_HEADERS) $(EXTRA_CFLAGS) -I include
-LDFLAGS        :=      -ggdb $(EXTRA_LDFLAGS)
+LDFLAGS        :=      -flto=auto -ggdb $(EXTRA_LDFLAGS)
  LIBS   :=      $$($(PKG_CONFIG) --libs libtracefs)
  
  SRC    :=      $(wildcard src/*.c)
diff --git a/tools/verification/rv/src/in_kernel.c b/tools/verification/rv/src/in_kernel.c

index ad28582bcf2b1ca6b6c9ba9e5d09b0bb5fbe63c0..f04479ecc96c0b75af1afb2e7855cf1cf2491970 100644 (file)
--- a/tools/verification/rv/src/in_kernel.c
+++ b/tools/verification/rv/src/in_kernel.c
@@ -210,9 +210,9 @@ static char *ikm_read_reactor(char *monitor_name)
  static char *ikm_get_current_reactor(char *monitor_name)
  {
         char *reactors = ikm_read_reactor(monitor_name);
+       char *curr_reactor = NULL;
         char *start;
         char *end;
-       char *curr_reactor;
  
         if (!reactors)
                 return NULL;
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig

index 184dab4ee871c6d2f54e1b095fab43c5f9f97e9a..29b73eedfe741a43b231a8ca91fc5ef58add7d0b 100644 (file)
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -1,9 +1,6 @@
  # SPDX-License-Identifier: GPL-2.0
  # KVM common configuration items and defaults
  
-config HAVE_KVM
-       bool
-
  config KVM_COMMON
         bool
         select EVENTFD
@@ -55,6 +52,9 @@ config KVM_ASYNC_PF_SYNC
  config HAVE_KVM_MSI
         bool
  
+config HAVE_KVM_READONLY_MEM
+       bool
+
  config HAVE_KVM_CPU_RELAX_INTERCEPT
         bool
  
@@ -73,6 +73,7 @@ config KVM_COMPAT
  
  config HAVE_KVM_IRQ_BYPASS
         bool
+       select IRQ_BYPASS_MANAGER
  
  config HAVE_KVM_VCPU_ASYNC_IOCTL
         bool
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c

index e033c79d528e0040e88fdd02f6eec3c6d6c00213..99a63bad0306c5699c8282b175f203bdc7bfba0a 100644 (file)
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -46,8 +46,8 @@ static void async_pf_execute(struct work_struct *work)
  {
         struct kvm_async_pf *apf =
                 container_of(work, struct kvm_async_pf, work);
-       struct mm_struct *mm = apf->mm;
         struct kvm_vcpu *vcpu = apf->vcpu;
+       struct mm_struct *mm = vcpu->kvm->mm;
         unsigned long addr = apf->addr;
         gpa_t cr2_or_gpa = apf->cr2_or_gpa;
         int locked = 1;
@@ -56,15 +56,24 @@ static void async_pf_execute(struct work_struct *work)
         might_sleep();
  
         /*
-        * This work is run asynchronously to the task which owns
-        * mm and might be done in another context, so we must
-        * access remotely.
+        * Attempt to pin the VM's host address space, and simply skip gup() if
+        * acquiring a pin fail, i.e. if the process is exiting.  Note, KVM
+        * holds a reference to its associated mm_struct until the very end of
+        * kvm_destroy_vm(), i.e. the struct itself won't be freed before this
+        * work item is fully processed.
          */
-       mmap_read_lock(mm);
-       get_user_pages_remote(mm, addr, 1, FOLL_WRITE, NULL, &locked);
-       if (locked)
-               mmap_read_unlock(mm);
+       if (mmget_not_zero(mm)) {
+               mmap_read_lock(mm);
+               get_user_pages_remote(mm, addr, 1, FOLL_WRITE, NULL, &locked);
+               if (locked)
+                       mmap_read_unlock(mm);
+               mmput(mm);
+       }
  
+       /*
+        * Notify and kick the vCPU even if faulting in the page failed, e.g.
+        * so that the vCPU can retry the fault synchronously.
+        */
         if (IS_ENABLED(CONFIG_KVM_ASYNC_PF_SYNC))
                 kvm_arch_async_page_present(vcpu, apf);
  
@@ -74,20 +83,39 @@ static void async_pf_execute(struct work_struct *work)
         apf->vcpu = NULL;
         spin_unlock(&vcpu->async_pf.lock);
  
-       if (!IS_ENABLED(CONFIG_KVM_ASYNC_PF_SYNC) && first)
-               kvm_arch_async_page_present_queued(vcpu);
-
         /*
-        * apf may be freed by kvm_check_async_pf_completion() after
-        * this point
+        * The apf struct may be freed by kvm_check_async_pf_completion() as
+        * soon as the lock is dropped.  Nullify it to prevent improper usage.
          */
+       apf = NULL;
+
+       if (!IS_ENABLED(CONFIG_KVM_ASYNC_PF_SYNC) && first)
+               kvm_arch_async_page_present_queued(vcpu);
  
         trace_kvm_async_pf_completed(addr, cr2_or_gpa);
  
         __kvm_vcpu_wake_up(vcpu);
+}
  
-       mmput(mm);
-       kvm_put_kvm(vcpu->kvm);
+static void kvm_flush_and_free_async_pf_work(struct kvm_async_pf *work)
+{
+       /*
+        * The async #PF is "done", but KVM must wait for the work item itself,
+        * i.e. async_pf_execute(), to run to completion.  If KVM is a module,
+        * KVM must ensure *no* code owned by the KVM (the module) can be run
+        * after the last call to module_put().  Note, flushing the work item
+        * is always required when the item is taken off the completion queue.
+        * E.g. even if the vCPU handles the item in the "normal" path, the VM
+        * could be terminated before async_pf_execute() completes.
+        *
+        * Wake all events skip the queue and go straight done, i.e. don't
+        * need to be flushed (but sanity check that the work wasn't queued).
+        */
+       if (work->wakeup_all)
+               WARN_ON_ONCE(work->work.func);
+       else
+               flush_work(&work->work);
+       kmem_cache_free(async_pf_cache, work);
  }
  
  void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
@@ -112,11 +140,8 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
  #ifdef CONFIG_KVM_ASYNC_PF_SYNC
                 flush_work(&work->work);
  #else
-               if (cancel_work_sync(&work->work)) {
-                       mmput(work->mm);
-                       kvm_put_kvm(vcpu->kvm); /* == work->vcpu->kvm */
+               if (cancel_work_sync(&work->work))
                         kmem_cache_free(async_pf_cache, work);
-               }
  #endif
                 spin_lock(&vcpu->async_pf.lock);
         }
@@ -126,7 +151,10 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
                         list_first_entry(&vcpu->async_pf.done,
                                          typeof(*work), link);
                 list_del(&work->link);
-               kmem_cache_free(async_pf_cache, work);
+
+               spin_unlock(&vcpu->async_pf.lock);
+               kvm_flush_and_free_async_pf_work(work);
+               spin_lock(&vcpu->async_pf.lock);
         }
         spin_unlock(&vcpu->async_pf.lock);
  
@@ -151,7 +179,7 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
  
                 list_del(&work->queue);
                 vcpu->async_pf.queued--;
-               kmem_cache_free(async_pf_cache, work);
+               kvm_flush_and_free_async_pf_work(work);
         }
  }
  
@@ -184,9 +212,6 @@ bool kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
         work->cr2_or_gpa = cr2_or_gpa;
         work->addr = hva;
         work->arch = *arch;
-       work->mm = current->mm;
-       mmget(work->mm);
-       kvm_get_kvm(work->vcpu->kvm);
  
         INIT_WORK(&work->work, async_pf_execute);
  
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c

index 10bfc88a69f72b6a0e310cca043fb04882e24eb1..fb49c2a602002ed30a5f426203fa0e30be2436b0 100644 (file)
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -421,7 +421,7 @@ int __kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int capacity,
                 if (WARN_ON_ONCE(!capacity))
                         return -EIO;
  
-               mc->objects = kvmalloc_array(sizeof(void *), capacity, gfp);
+               mc->objects = kvmalloc_array(capacity, sizeof(void *), gfp);
                 if (!mc->objects)
                         return -ENOMEM;
  
@@ -890,7 +890,9 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
  
         /* Pairs with the increment in range_start(). */
         spin_lock(&kvm->mn_invalidate_lock);
-       wake = (--kvm->mn_active_invalidate_count == 0);
+       if (!WARN_ON_ONCE(!kvm->mn_active_invalidate_count))
+               --kvm->mn_active_invalidate_count;
+       wake = !kvm->mn_active_invalidate_count;
         spin_unlock(&kvm->mn_invalidate_lock);
  
         /*
@@ -1150,10 +1152,7 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, const char *fdname)
                                     &stat_fops_per_vm);
         }
  
-       ret = kvm_arch_create_vm_debugfs(kvm);
-       if (ret)
-               goto out_err;
-
+       kvm_arch_create_vm_debugfs(kvm);
         return 0;
  out_err:
         kvm_destroy_vm_debugfs(kvm);
@@ -1183,9 +1182,8 @@ void __weak kvm_arch_pre_destroy_vm(struct kvm *kvm)
   * Cleanup should be automatic done in kvm_destroy_vm_debugfs() recursively, so
   * a per-arch destroy interface is not needed.
   */
-int __weak kvm_arch_create_vm_debugfs(struct kvm *kvm)
+void __weak kvm_arch_create_vm_debugfs(struct kvm *kvm)
  {
-       return 0;
  }
  
  static struct kvm *kvm_create_vm(unsigned long type, const char *fdname)
@@ -1614,8 +1612,14 @@ static int check_memory_region_flags(struct kvm *kvm,
         if (mem->flags & KVM_MEM_GUEST_MEMFD)
                 valid_flags &= ~KVM_MEM_LOG_DIRTY_PAGES;
  
-#ifdef __KVM_HAVE_READONLY_MEM
-       valid_flags |= KVM_MEM_READONLY;
+#ifdef CONFIG_HAVE_KVM_READONLY_MEM
+       /*
+        * GUEST_MEMFD is incompatible with read-only memslots, as writes to
+        * read-only memslots have emulated MMIO, not page fault, semantics,
+        * and KVM doesn't allow emulated MMIO for private memory.
+        */
+       if (!(mem->flags & KVM_MEM_GUEST_MEMFD))
+               valid_flags |= KVM_MEM_READONLY;
  #endif
  
         if (mem->flags & ~valid_flags)
@@ -4042,6 +4046,18 @@ static bool vcpu_dy_runnable(struct kvm_vcpu *vcpu)
         return false;
  }
  
+/*
+ * By default, simply query the target vCPU's current mode when checking if a
+ * vCPU was preempted in kernel mode.  All architectures except x86 (or more
+ * specifical, except VMX) allow querying whether or not a vCPU is in kernel
+ * mode even if the vCPU is NOT loaded, i.e. using kvm_arch_vcpu_in_kernel()
+ * directly for cross-vCPU checks is functionally correct and accurate.
+ */
+bool __weak kvm_arch_vcpu_preempted_in_kernel(struct kvm_vcpu *vcpu)
+{
+       return kvm_arch_vcpu_in_kernel(vcpu);
+}
+
  bool __weak kvm_arch_dy_has_pending_interrupt(struct kvm_vcpu *vcpu)
  {
         return false;
@@ -4078,9 +4094,16 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode)
                                 continue;
                         if (kvm_vcpu_is_blocking(vcpu) && !vcpu_dy_runnable(vcpu))
                                 continue;
+
+                       /*
+                        * Treat the target vCPU as being in-kernel if it has a
+                        * pending interrupt, as the vCPU trying to yield may
+                        * be spinning waiting on IPI delivery, i.e. the target
+                        * vCPU is in-kernel for the purposes of directed yield.
+                        */
                         if (READ_ONCE(vcpu->preempted) && yield_to_kernel_mode &&
                             !kvm_arch_dy_has_pending_interrupt(vcpu) &&
-                           !kvm_arch_vcpu_in_kernel(vcpu))
+                           !kvm_arch_vcpu_preempted_in_kernel(vcpu))
                                 continue;
                         if (!kvm_vcpu_eligible_for_directed_yield(vcpu))
                                 continue;
diff --git a/virt/kvm/pfncache.c b/virt/kvm/pfncache.c

index 2d6aba67783078180bb89e77cfa6ef61239d497d..4e07112a24c2f6d02f67ee22a7f7eaa15c5f098b 100644 (file)
--- a/virt/kvm/pfncache.c
+++ b/virt/kvm/pfncache.c
@@ -25,55 +25,36 @@
  void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm, unsigned long start,
                                        unsigned long end, bool may_block)
  {
-       DECLARE_BITMAP(vcpu_bitmap, KVM_MAX_VCPUS);
         struct gfn_to_pfn_cache *gpc;
-       bool evict_vcpus = false;
  
         spin_lock(&kvm->gpc_lock);
         list_for_each_entry(gpc, &kvm->gpc_list, list) {
-               write_lock_irq(&gpc->lock);
+               read_lock_irq(&gpc->lock);
  
                 /* Only a single page so no need to care about length */
                 if (gpc->valid && !is_error_noslot_pfn(gpc->pfn) &&
                     gpc->uhva >= start && gpc->uhva < end) {
-                       gpc->valid = false;
+                       read_unlock_irq(&gpc->lock);
  
                         /*
-                        * If a guest vCPU could be using the physical address,
-                        * it needs to be forced out of guest mode.
+                        * There is a small window here where the cache could
+                        * be modified, and invalidation would no longer be
+                        * necessary. Hence check again whether invalidation
+                        * is still necessary once the write lock has been
+                        * acquired.
                          */
-                       if (gpc->usage & KVM_GUEST_USES_PFN) {
-                               if (!evict_vcpus) {
-                                       evict_vcpus = true;
-                                       bitmap_zero(vcpu_bitmap, KVM_MAX_VCPUS);
-                               }
-                               __set_bit(gpc->vcpu->vcpu_idx, vcpu_bitmap);
-                       }
-               }
-               write_unlock_irq(&gpc->lock);
-       }
-       spin_unlock(&kvm->gpc_lock);
-
-       if (evict_vcpus) {
-               /*
-                * KVM needs to ensure the vCPU is fully out of guest context
-                * before allowing the invalidation to continue.
-                */
-               unsigned int req = KVM_REQ_OUTSIDE_GUEST_MODE;
-               bool called;
  
-               /*
-                * If the OOM reaper is active, then all vCPUs should have
-                * been stopped already, so perform the request without
-                * KVM_REQUEST_WAIT and be sad if any needed to be IPI'd.
-                */
-               if (!may_block)
-                       req &= ~KVM_REQUEST_WAIT;
-
-               called = kvm_make_vcpus_request_mask(kvm, req, vcpu_bitmap);
+                       write_lock_irq(&gpc->lock);
+                       if (gpc->valid && !is_error_noslot_pfn(gpc->pfn) &&
+                           gpc->uhva >= start && gpc->uhva < end)
+                               gpc->valid = false;
+                       write_unlock_irq(&gpc->lock);
+                       continue;
+               }
  
-               WARN_ON_ONCE(called && !may_block);
+               read_unlock_irq(&gpc->lock);
         }
+       spin_unlock(&kvm->gpc_lock);
  }
  
  bool kvm_gpc_check(struct gfn_to_pfn_cache *gpc, unsigned long len)
@@ -83,10 +64,17 @@ bool kvm_gpc_check(struct gfn_to_pfn_cache *gpc, unsigned long len)
         if (!gpc->active)
                 return false;
  
-       if ((gpc->gpa & ~PAGE_MASK) + len > PAGE_SIZE)
+       /*
+        * If the page was cached from a memslot, make sure the memslots have
+        * not been re-configured.
+        */
+       if (!kvm_is_error_gpa(gpc->gpa) && gpc->generation != slots->generation)
+               return false;
+
+       if (kvm_is_error_hva(gpc->uhva))
                 return false;
  
-       if (gpc->generation != slots->generation || kvm_is_error_hva(gpc->uhva))
+       if (offset_in_page(gpc->uhva) + len > PAGE_SIZE)
                 return false;
  
         if (!gpc->valid)
@@ -94,19 +82,33 @@ bool kvm_gpc_check(struct gfn_to_pfn_cache *gpc, unsigned long len)
  
         return true;
  }
-EXPORT_SYMBOL_GPL(kvm_gpc_check);
  
-static void gpc_unmap_khva(kvm_pfn_t pfn, void *khva)
+static void *gpc_map(kvm_pfn_t pfn)
  {
-       /* Unmap the old pfn/page if it was mapped before. */
-       if (!is_error_noslot_pfn(pfn) && khva) {
-               if (pfn_valid(pfn))
-                       kunmap(pfn_to_page(pfn));
+       if (pfn_valid(pfn))
+               return kmap(pfn_to_page(pfn));
+
  #ifdef CONFIG_HAS_IOMEM
-               else
-                       memunmap(khva);
+       return memremap(pfn_to_hpa(pfn), PAGE_SIZE, MEMREMAP_WB);
+#else
+       return NULL;
  #endif
+}
+
+static void gpc_unmap(kvm_pfn_t pfn, void *khva)
+{
+       /* Unmap the old pfn/page if it was mapped before. */
+       if (is_error_noslot_pfn(pfn) || !khva)
+               return;
+
+       if (pfn_valid(pfn)) {
+               kunmap(pfn_to_page(pfn));
+               return;
         }
+
+#ifdef CONFIG_HAS_IOMEM
+       memunmap(khva);
+#endif
  }
  
  static inline bool mmu_notifier_retry_cache(struct kvm *kvm, unsigned long mmu_seq)
@@ -140,7 +142,7 @@ static inline bool mmu_notifier_retry_cache(struct kvm *kvm, unsigned long mmu_s
  static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc)
  {
         /* Note, the new page offset may be different than the old! */
-       void *old_khva = gpc->khva - offset_in_page(gpc->khva);
+       void *old_khva = (void *)PAGE_ALIGN_DOWN((uintptr_t)gpc->khva);
         kvm_pfn_t new_pfn = KVM_PFN_ERR_FAULT;
         void *new_khva = NULL;
         unsigned long mmu_seq;
@@ -175,7 +177,7 @@ static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc)
                          * the existing mapping and didn't create a new one.
                          */
                         if (new_khva != old_khva)
-                               gpc_unmap_khva(new_pfn, new_khva);
+                               gpc_unmap(new_pfn, new_khva);
  
                         kvm_release_pfn_clean(new_pfn);
  
@@ -192,20 +194,14 @@ static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc)
                  * pfn.  Note, kmap() and memremap() can both sleep, so this
                  * too must be done outside of gpc->lock!
                  */
-               if (gpc->usage & KVM_HOST_USES_PFN) {
-                       if (new_pfn == gpc->pfn) {
-                               new_khva = old_khva;
-                       } else if (pfn_valid(new_pfn)) {
-                               new_khva = kmap(pfn_to_page(new_pfn));
-#ifdef CONFIG_HAS_IOMEM
-                       } else {
-                               new_khva = memremap(pfn_to_hpa(new_pfn), PAGE_SIZE, MEMREMAP_WB);
-#endif
-                       }
-                       if (!new_khva) {
-                               kvm_release_pfn_clean(new_pfn);
-                               goto out_error;
-                       }
+               if (new_pfn == gpc->pfn)
+                       new_khva = old_khva;
+               else
+                       new_khva = gpc_map(new_pfn);
+
+               if (!new_khva) {
+                       kvm_release_pfn_clean(new_pfn);
+                       goto out_error;
                 }
  
                 write_lock_irq(&gpc->lock);
@@ -219,7 +215,7 @@ static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc)
  
         gpc->valid = true;
         gpc->pfn = new_pfn;
-       gpc->khva = new_khva + (gpc->gpa & ~PAGE_MASK);
+       gpc->khva = new_khva + offset_in_page(gpc->uhva);
  
         /*
          * Put the reference to the _new_ pfn.  The pfn is now tracked by the
@@ -236,30 +232,31 @@ out_error:
         return -EFAULT;
  }
  
-static int __kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa,
+static int __kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long uhva,
                              unsigned long len)
  {
-       struct kvm_memslots *slots = kvm_memslots(gpc->kvm);
-       unsigned long page_offset = gpa & ~PAGE_MASK;
+       unsigned long page_offset;
         bool unmap_old = false;
         unsigned long old_uhva;
         kvm_pfn_t old_pfn;
+       bool hva_change = false;
         void *old_khva;
         int ret;
  
+       /* Either gpa or uhva must be valid, but not both */
+       if (WARN_ON_ONCE(kvm_is_error_gpa(gpa) == kvm_is_error_hva(uhva)))
+               return -EINVAL;
+
         /*
-        * If must fit within a single page. The 'len' argument is
-        * only to enforce that.
+        * The cached acces must fit within a single page. The 'len' argument
+        * exists only to enforce that.
          */
+       page_offset = kvm_is_error_gpa(gpa) ? offset_in_page(uhva) :
+                                             offset_in_page(gpa);
         if (page_offset + len > PAGE_SIZE)
                 return -EINVAL;
  
-       /*
-        * If another task is refreshing the cache, wait for it to complete.
-        * There is no guarantee that concurrent refreshes will see the same
-        * gpa, memslots generation, etc..., so they must be fully serialized.
-        */
-       mutex_lock(&gpc->refresh_lock);
+       lockdep_assert_held(&gpc->refresh_lock);
  
         write_lock_irq(&gpc->lock);
  
@@ -269,30 +266,52 @@ static int __kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa,
         }
  
         old_pfn = gpc->pfn;
-       old_khva = gpc->khva - offset_in_page(gpc->khva);
-       old_uhva = gpc->uhva;
-
-       /* If the userspace HVA is invalid, refresh that first */
-       if (gpc->gpa != gpa || gpc->generation != slots->generation ||
-           kvm_is_error_hva(gpc->uhva)) {
-               gfn_t gfn = gpa_to_gfn(gpa);
-
-               gpc->gpa = gpa;
-               gpc->generation = slots->generation;
-               gpc->memslot = __gfn_to_memslot(slots, gfn);
-               gpc->uhva = gfn_to_hva_memslot(gpc->memslot, gfn);
-
-               if (kvm_is_error_hva(gpc->uhva)) {
-                       ret = -EFAULT;
-                       goto out;
+       old_khva = (void *)PAGE_ALIGN_DOWN((uintptr_t)gpc->khva);
+       old_uhva = PAGE_ALIGN_DOWN(gpc->uhva);
+
+       if (kvm_is_error_gpa(gpa)) {
+               gpc->gpa = INVALID_GPA;
+               gpc->memslot = NULL;
+               gpc->uhva = PAGE_ALIGN_DOWN(uhva);
+
+               if (gpc->uhva != old_uhva)
+                       hva_change = true;
+       } else {
+               struct kvm_memslots *slots = kvm_memslots(gpc->kvm);
+
+               if (gpc->gpa != gpa || gpc->generation != slots->generation ||
+                   kvm_is_error_hva(gpc->uhva)) {
+                       gfn_t gfn = gpa_to_gfn(gpa);
+
+                       gpc->gpa = gpa;
+                       gpc->generation = slots->generation;
+                       gpc->memslot = __gfn_to_memslot(slots, gfn);
+                       gpc->uhva = gfn_to_hva_memslot(gpc->memslot, gfn);
+
+                       if (kvm_is_error_hva(gpc->uhva)) {
+                               ret = -EFAULT;
+                               goto out;
+                       }
+
+                       /*
+                        * Even if the GPA and/or the memslot generation changed, the
+                        * HVA may still be the same.
+                        */
+                       if (gpc->uhva != old_uhva)
+                               hva_change = true;
+               } else {
+                       gpc->uhva = old_uhva;
                 }
         }
  
+       /* Note: the offset must be correct before calling hva_to_pfn_retry() */
+       gpc->uhva += page_offset;
+
         /*
          * If the userspace HVA changed or the PFN was already invalid,
          * drop the lock and do the HVA to PFN lookup again.
          */
-       if (!gpc->valid || old_uhva != gpc->uhva) {
+       if (!gpc->valid || hva_change) {
                 ret = hva_to_pfn_retry(gpc);
         } else {
                 /*
@@ -323,41 +342,47 @@ static int __kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa,
  out_unlock:
         write_unlock_irq(&gpc->lock);
  
-       mutex_unlock(&gpc->refresh_lock);
-
         if (unmap_old)
-               gpc_unmap_khva(old_pfn, old_khva);
+               gpc_unmap(old_pfn, old_khva);
  
         return ret;
  }
  
  int kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, unsigned long len)
  {
-       return __kvm_gpc_refresh(gpc, gpc->gpa, len);
+       unsigned long uhva;
+
+       guard(mutex)(&gpc->refresh_lock);
+
+       /*
+        * If the GPA is valid then ignore the HVA, as a cache can be GPA-based
+        * or HVA-based, not both.  For GPA-based caches, the HVA will be
+        * recomputed during refresh if necessary.
+        */
+       uhva = kvm_is_error_gpa(gpc->gpa) ? gpc->uhva : KVM_HVA_ERR_BAD;
+
+       return __kvm_gpc_refresh(gpc, gpc->gpa, uhva, len);
  }
-EXPORT_SYMBOL_GPL(kvm_gpc_refresh);
  
-void kvm_gpc_init(struct gfn_to_pfn_cache *gpc, struct kvm *kvm,
-                 struct kvm_vcpu *vcpu, enum pfn_cache_usage usage)
+void kvm_gpc_init(struct gfn_to_pfn_cache *gpc, struct kvm *kvm)
  {
-       WARN_ON_ONCE(!usage || (usage & KVM_GUEST_AND_HOST_USE_PFN) != usage);
-       WARN_ON_ONCE((usage & KVM_GUEST_USES_PFN) && !vcpu);
-
         rwlock_init(&gpc->lock);
         mutex_init(&gpc->refresh_lock);
  
         gpc->kvm = kvm;
-       gpc->vcpu = vcpu;
-       gpc->usage = usage;
         gpc->pfn = KVM_PFN_ERR_FAULT;
+       gpc->gpa = INVALID_GPA;
         gpc->uhva = KVM_HVA_ERR_BAD;
+       gpc->active = gpc->valid = false;
  }
-EXPORT_SYMBOL_GPL(kvm_gpc_init);
  
-int kvm_gpc_activate(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len)
+static int __kvm_gpc_activate(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long uhva,
+                             unsigned long len)
  {
         struct kvm *kvm = gpc->kvm;
  
+       guard(mutex)(&gpc->refresh_lock);
+
         if (!gpc->active) {
                 if (KVM_BUG_ON(gpc->valid, kvm))
                         return -EIO;
@@ -375,9 +400,18 @@ int kvm_gpc_activate(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len)
                 gpc->active = true;
                 write_unlock_irq(&gpc->lock);
         }
-       return __kvm_gpc_refresh(gpc, gpa, len);
+       return __kvm_gpc_refresh(gpc, gpa, uhva, len);
+}
+
+int kvm_gpc_activate(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len)
+{
+       return __kvm_gpc_activate(gpc, gpa, KVM_HVA_ERR_BAD, len);
+}
+
+int kvm_gpc_activate_hva(struct gfn_to_pfn_cache *gpc, unsigned long uhva, unsigned long len)
+{
+       return __kvm_gpc_activate(gpc, INVALID_GPA, uhva, len);
  }
-EXPORT_SYMBOL_GPL(kvm_gpc_activate);
  
  void kvm_gpc_deactivate(struct gfn_to_pfn_cache *gpc)
  {
@@ -385,6 +419,8 @@ void kvm_gpc_deactivate(struct gfn_to_pfn_cache *gpc)
         kvm_pfn_t old_pfn;
         void *old_khva;
  
+       guard(mutex)(&gpc->refresh_lock);
+
         if (gpc->active) {
                 /*
                  * Deactivate the cache before removing it from the list, KVM
@@ -412,7 +448,6 @@ void kvm_gpc_deactivate(struct gfn_to_pfn_cache *gpc)
                 list_del(&gpc->list);
                 spin_unlock(&kvm->gpc_lock);
  
-               gpc_unmap_khva(old_pfn, old_khva);
+               gpc_unmap(old_pfn, old_khva);
         }
  }
-EXPORT_SYMBOL_GPL(kvm_gpc_deactivate);
author	Paolo Bonzini <pbonzini@redhat.com>
	Thu, 14 Mar 2024 18:47:56 +0000 (14:47 -0400)
committer	Paolo Bonzini <pbonzini@redhat.com>
	Thu, 14 Mar 2024 18:47:56 +0000 (14:47 -0400)