diff options
596 files changed, 9894 insertions, 19698 deletions
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index 184193bcb262..caf36105a1c7 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -1857,8 +1857,10 @@ following two functions. wbc_init_bio(@wbc, @bio) Should be called for each bio carrying writeback data and - associates the bio with the inode's owner cgroup. Can be - called anytime between bio allocation and submission. + associates the bio with the inode's owner cgroup and the + corresponding request queue. This must be called after + a queue (device) has been associated with the bio and + before submission. wbc_account_io(@wbc, @page, @bytes) Should be called for each data segment being written out. @@ -1877,7 +1879,7 @@ the configuration, the bio may be executed at a lower priority and if the writeback session is holding shared resources, e.g. a journal entry, may lead to priority inversion. There is no one easy solution for the problem. Filesystems can try to work around specific problem -cases by skipping wbc_init_bio() or using bio_associate_blkcg() +cases by skipping wbc_init_bio() or using bio_associate_create_blkg() directly. diff --git a/Documentation/arm64/elf_hwcaps.txt b/Documentation/arm64/elf_hwcaps.txt index d6aff2c5e9e2..ea819ae024dd 100644 --- a/Documentation/arm64/elf_hwcaps.txt +++ b/Documentation/arm64/elf_hwcaps.txt @@ -78,11 +78,11 @@ HWCAP_EVTSTRM HWCAP_AES - Functionality implied by ID_AA64ISAR1_EL1.AES == 0b0001. + Functionality implied by ID_AA64ISAR0_EL1.AES == 0b0001. HWCAP_PMULL - Functionality implied by ID_AA64ISAR1_EL1.AES == 0b0010. + Functionality implied by ID_AA64ISAR0_EL1.AES == 0b0010. HWCAP_SHA1 @@ -153,7 +153,7 @@ HWCAP_ASIMDDP HWCAP_SHA512 - Functionality implied by ID_AA64ISAR0_EL1.SHA2 == 0b0002. + Functionality implied by ID_AA64ISAR0_EL1.SHA2 == 0b0010. HWCAP_SVE @@ -173,8 +173,12 @@ HWCAP_USCAT HWCAP_ILRCPC - Functionality implied by ID_AA64ISR1_EL1.LRCPC == 0b0002. + Functionality implied by ID_AA64ISAR1_EL1.LRCPC == 0b0010. HWCAP_FLAGM Functionality implied by ID_AA64ISAR0_EL1.TS == 0b0001. + +HWCAP_SSBS + + Functionality implied by ID_AA64PFR1_EL1.SSBS == 0b0010. diff --git a/Documentation/arm64/hugetlbpage.txt b/Documentation/arm64/hugetlbpage.txt new file mode 100644 index 000000000000..cfae87dc653b --- /dev/null +++ b/Documentation/arm64/hugetlbpage.txt @@ -0,0 +1,38 @@ +HugeTLBpage on ARM64 +==================== + +Hugepage relies on making efficient use of TLBs to improve performance of +address translations. The benefit depends on both - + + - the size of hugepages + - size of entries supported by the TLBs + +The ARM64 port supports two flavours of hugepages. + +1) Block mappings at the pud/pmd level +-------------------------------------- + +These are regular hugepages where a pmd or a pud page table entry points to a +block of memory. Regardless of the supported size of entries in TLB, block +mappings reduce the depth of page table walk needed to translate hugepage +addresses. + +2) Using the Contiguous bit +--------------------------- + +The architecture provides a contiguous bit in the translation table entries +(D4.5.3, ARM DDI 0487C.a) that hints to the MMU to indicate that it is one of a +contiguous set of entries that can be cached in a single TLB entry. + +The contiguous bit is used in Linux to increase the mapping size at the pmd and +pte (last) level. The number of supported contiguous entries varies by page size +and level of the page table. + + +The following hugepage sizes are supported - + + CONT PTE PMD CONT PMD PUD + -------- --- -------- --- + 4K: 64K 2M 32M 1G + 16K: 2M 32M 1G + 64K: 2M 512M 16G diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt index 3b2f2dd82225..76ccded8b74c 100644 --- a/Documentation/arm64/silicon-errata.txt +++ b/Documentation/arm64/silicon-errata.txt @@ -56,6 +56,7 @@ stable kernels. | ARM | Cortex-A72 | #853709 | N/A | | ARM | Cortex-A73 | #858921 | ARM64_ERRATUM_858921 | | ARM | Cortex-A55 | #1024718 | ARM64_ERRATUM_1024718 | +| ARM | Cortex-A76 | #1188873 | ARM64_ERRATUM_1188873 | | ARM | MMU-500 | #841119,#826419 | N/A | | | | | | | Cavium | ThunderX ITS | #22375, #24313 | CAVIUM_ERRATUM_22375 | diff --git a/Documentation/blockdev/README.DAC960 b/Documentation/blockdev/README.DAC960 deleted file mode 100644 index bd85fb9dc6e5..000000000000 --- a/Documentation/blockdev/README.DAC960 +++ /dev/null @@ -1,756 +0,0 @@ - Linux Driver for Mylex DAC960/AcceleRAID/eXtremeRAID PCI RAID Controllers - - Version 2.2.11 for Linux 2.2.19 - Version 2.4.11 for Linux 2.4.12 - - PRODUCTION RELEASE - - 11 October 2001 - - Leonard N. Zubkoff - Dandelion Digital - lnz@dandelion.com - - Copyright 1998-2001 by Leonard N. Zubkoff <lnz@dandelion.com> - - - INTRODUCTION - -Mylex, Inc. designs and manufactures a variety of high performance PCI RAID -controllers. Mylex Corporation is located at 34551 Ardenwood Blvd., Fremont, -California 94555, USA and can be reached at 510.796.6100 or on the World Wide -Web at http://www.mylex.com. Mylex Technical Support can be reached by -electronic mail at mylexsup@us.ibm.com, by voice at 510.608.2400, or by FAX at -510.745.7715. Contact information for offices in Europe and Japan is available -on their Web site. - -The latest information on Linux support for DAC960 PCI RAID Controllers, as -well as the most recent release of this driver, will always be available from -my Linux Home Page at URL "http://www.dandelion.com/Linux/". The Linux DAC960 -driver supports all current Mylex PCI RAID controllers including the new -eXtremeRAID 2000/3000 and AcceleRAID 352/170/160 models which have an entirely -new firmware interface from the older eXtremeRAID 1100, AcceleRAID 150/200/250, -and DAC960PJ/PG/PU/PD/PL. See below for a complete controller list as well as -minimum firmware version requirements. For simplicity, in most places this -documentation refers to DAC960 generically rather than explicitly listing all -the supported models. - -Driver bug reports should be sent via electronic mail to "lnz@dandelion.com". -Please include with the bug report the complete configuration messages reported -by the driver at startup, along with any subsequent system messages relevant to -the controller's operation, and a detailed description of your system's -hardware configuration. Driver bugs are actually quite rare; if you encounter -problems with disks being marked offline, for example, please contact Mylex -Technical Support as the problem is related to the hardware configuration -rather than the Linux driver. - -Please consult the RAID controller documentation for detailed information -regarding installation and configuration of the controllers. This document -primarily provides information specific to the Linux support. - - - DRIVER FEATURES - -The DAC960 RAID controllers are supported solely as high performance RAID -controllers, not as interfaces to arbitrary SCSI devices. The Linux DAC960 -driver operates at the block device level, the same level as the SCSI and IDE -drivers. Unlike other RAID controllers currently supported on Linux, the -DAC960 driver is not dependent on the SCSI subsystem, and hence avoids all the -complexity and unnecessary code that would be associated with an implementation -as a SCSI driver. The DAC960 driver is designed for as high a performance as -possible with no compromises or extra code for compatibility with lower -performance devices. The DAC960 driver includes extensive error logging and -online configuration management capabilities. Except for initial configuration -of the controller and adding new disk drives, most everything can be handled -from Linux while the system is operational. - -The DAC960 driver is architected to support up to 8 controllers per system. -Each DAC960 parallel SCSI controller can support up to 15 disk drives per -channel, for a maximum of 60 drives on a four channel controller; the fibre -channel eXtremeRAID 3000 controller supports up to 125 disk drives per loop for -a total of 250 drives. The drives installed on a controller are divided into -one or more "Drive Groups", and then each Drive Group is subdivided further -into 1 to 32 "Logical Drives". Each Logical Drive has a specific RAID Level -and caching policy associated with it, and it appears to Linux as a single -block device. Logical Drives are further subdivided into up to 7 partitions -through the normal Linux and PC disk partitioning schemes. Logical Drives are -also known as "System Drives", and Drive Groups are also called "Packs". Both -terms are in use in the Mylex documentation; I have chosen to standardize on -the more generic "Logical Drive" and "Drive Group". - -DAC960 RAID disk devices are named in the style of the obsolete Device File -System (DEVFS). The device corresponding to Logical Drive D on Controller C -is referred to as /dev/rd/cCdD, and the partitions are called /dev/rd/cCdDp1 -through /dev/rd/cCdDp7. For example, partition 3 of Logical Drive 5 on -Controller 2 is referred to as /dev/rd/c2d5p3. Note that unlike with SCSI -disks the device names will not change in the event of a disk drive failure. -The DAC960 driver is assigned major numbers 48 - 55 with one major number per -controller. The 8 bits of minor number are divided into 5 bits for the Logical -Drive and 3 bits for the partition. - - - SUPPORTED DAC960/AcceleRAID/eXtremeRAID PCI RAID CONTROLLERS - -The following list comprises the supported DAC960, AcceleRAID, and eXtremeRAID -PCI RAID Controllers as of the date of this document. It is recommended that -anyone purchasing a Mylex PCI RAID Controller not in the following table -contact the author beforehand to verify that it is or will be supported. - -eXtremeRAID 3000 - 1 Wide Ultra-2/LVD SCSI channel - 2 External Fibre FC-AL channels - 233MHz StrongARM SA 110 Processor - 64 Bit 33MHz PCI (backward compatible with 32 Bit PCI slots) - 32MB/64MB ECC SDRAM Memory - -eXtremeRAID 2000 - 4 Wide Ultra-160 LVD SCSI channels - 233MHz StrongARM SA 110 Processor - 64 Bit 33MHz PCI (backward compatible with 32 Bit PCI slots) - 32MB/64MB ECC SDRAM Memory - -AcceleRAID 352 - 2 Wide Ultra-160 LVD SCSI channels - 100MHz Intel i960RN RISC Processor - 64 Bit 33MHz PCI (backward compatible with 32 Bit PCI slots) - 32MB/64MB ECC SDRAM Memory - -AcceleRAID 170 - 1 Wide Ultra-160 LVD SCSI channel - 100MHz Intel i960RM RISC Processor - 16MB/32MB/64MB ECC SDRAM Memory - -AcceleRAID 160 (AcceleRAID 170LP) - 1 Wide Ultra-160 LVD SCSI channel - 100MHz Intel i960RS RISC Processor - Built in 16M ECC SDRAM Memory - PCI Low Profile Form Factor - fit for 2U height - -eXtremeRAID 1100 (DAC1164P) - 3 Wide Ultra-2/LVD SCSI channels - 233MHz StrongARM SA 110 Processor - 64 Bit 33MHz PCI (backward compatible with 32 Bit PCI slots) - 16MB/32MB/64MB Parity SDRAM Memory with Battery Backup - -AcceleRAID 250 (DAC960PTL1) - Uses onboard Symbios SCSI chips on certain motherboards - Also includes one onboard Wide Ultra-2/LVD SCSI Channel - 66MHz Intel i960RD RISC Processor - 4MB/8MB/16MB/32MB/64MB/128MB ECC EDO Memory - -AcceleRAID 200 (DAC960PTL0) - Uses onboard Symbios SCSI chips on certain motherboards - Includes no onboard SCSI Channels - 66MHz Intel i960RD RISC Processor - 4MB/8MB/16MB/32MB/64MB/128MB ECC EDO Memory - -AcceleRAID 150 (DAC960PRL) - Uses onboard Symbios SCSI chips on certain motherboards - Also includes one onboard Wide Ultra-2/LVD SCSI Channel - 33MHz Intel i960RP RISC Processor - 4MB Parity EDO Memory - -DAC960PJ 1/2/3 Wide Ultra SCSI-3 Channels - 66MHz Intel i960RD RISC Processor - 4MB/8MB/16MB/32MB/64MB/128MB ECC EDO Memory - -DAC960PG 1/2/3 Wide Ultra SCSI-3 Channels - 33MHz Intel i960RP RISC Processor - 4MB/8MB ECC EDO Memory - -DAC960PU 1/2/3 Wide Ultra SCSI-3 Channels - Intel i960CF RISC Processor - 4MB/8MB EDRAM or 2MB/4MB/8MB/16MB/32MB DRAM Memory - -DAC960PD 1/2/3 Wide Fast SCSI-2 Channels - Intel i960CF RISC Processor - 4MB/8MB EDRAM or 2MB/4MB/8MB/16MB/32MB DRAM Memory - -DAC960PL 1/2/3 Wide Fast SCSI-2 Channels - Intel i960 RISC Processor - 2MB/4MB/8MB/16MB/32MB DRAM Memory - -DAC960P 1/2/3 Wide Fast SCSI-2 Channels - Intel i960 RISC Processor - 2MB/4MB/8MB/16MB/32MB DRAM Memory - -For the eXtremeRAID 2000/3000 and AcceleRAID 352/170/160, firmware version -6.00-01 or above is required. - -For the eXtremeRAID 1100, firmware version 5.06-0-52 or above is required. - -For the AcceleRAID 250, 200, and 150, firmware version 4.06-0-57 or above is -required. - -For the DAC960PJ and DAC960PG, firmware version 4.06-0-00 or above is required. - -For the DAC960PU, DAC960PD, DAC960PL, and DAC960P, either firmware version -3.51-0-04 or above is required (for dual Flash ROM controllers), or firmware -version 2.73-0-00 or above is required (for single Flash ROM controllers) - -Please note that not all SCSI disk drives are suitable for use with DAC960 -controllers, and only particular firmware versions of any given model may -actually function correctly. Similarly, not all motherboards have a BIOS that -properly initializes the AcceleRAID 250, AcceleRAID 200, AcceleRAID 150, -DAC960PJ, and DAC960PG because the Intel i960RD/RP is a multi-function device. -If in doubt, contact Mylex RAID Technical Support (mylexsup@us.ibm.com) to -verify compatibility. Mylex makes available a hard disk compatibility list at -http://www.mylex.com/support/hdcomp/hd-lists.html. - - - DRIVER INSTALLATION - -This distribution was prepared for Linux kernel version 2.2.19 or 2.4.12. - -To install the DAC960 RAID driver, you may use the following commands, -replacing "/usr/src" with wherever you keep your Linux kernel source tree: - - cd /usr/src - tar -xvzf DAC960-2.2.11.tar.gz (or DAC960-2.4.11.tar.gz) - mv README.DAC960 linux/Documentation - mv DAC960.[ch] linux/drivers/block - patch -p0 < DAC960.patch (if DAC960.patch is included) - cd linux - make config - make bzImage (or zImage) - -Then install "arch/x86/boot/bzImage" or "arch/x86/boot/zImage" as your -standard kernel, run lilo if appropriate, and reboot. - -To create the necessary devices in /dev, the "make_rd" script included in -"DAC960-Utilities.tar.gz" from http://www.dandelion.com/Linux/ may be used. -LILO 21 and FDISK v2.9 include DAC960 support; also included in this archive -are patches to LILO 20 and FDISK v2.8 that add DAC960 support, along with -statically linked executables of LILO and FDISK. This modified version of LILO -will allow booting from a DAC960 controller and/or mounting the root file -system from a DAC960. - -Red Hat Linux 6.0 and SuSE Linux 6.1 include support for Mylex PCI RAID -controllers. Installing directly onto a DAC960 may be problematic from other -Linux distributions until their installation utilities are updated. - - - INSTALLATION NOTES - -Before installing Linux or adding DAC960 logical drives to an existing Linux -system, the controller must first be configured to provide one or more logical -drives using the BIOS Configuration Utility or DACCF. Please note that since -there are only at most 6 usable partitions on each logical drive, systems -requiring more partitions should subdivide a drive group into multiple logical -drives, each of which can have up to 6 usable partitions. Also, note that with -large disk arrays it is advisable to enable the 8GB BIOS Geometry (255/63) -rather than accepting the default 2GB BIOS Geometry (128/32); failing to so do -will cause the logical drive geometry to have more than 65535 cylinders which -will make it impossible for FDISK to be used properly. The 8GB BIOS Geometry -can be enabled by configuring the DAC960 BIOS, which is accessible via Alt-M -during the BIOS initialization sequence. - -For maximum performance and the most efficient E2FSCK performance, it is -recommended that EXT2 file systems be built with a 4KB block size and 16 block -stride to match the DAC960 controller's 64KB default stripe size. The command -"mke2fs -b 4096 -R stride=16 <device>" is appropriate. Unless there will be a -large number of small files on the file systems, it is also beneficial to add -the "-i 16384" option to increase the bytes per inode parameter thereby -reducing the file system metadata. Finally, on systems that will only be run -with Linux 2.2 or later kernels it is beneficial to enable sparse superblocks -with the "-s 1" option. - - - DAC960 ANNOUNCEMENTS MAILING LIST - -The DAC960 Announcements Mailing List provides a forum for informing Linux -users of new driver releases and other announcements regarding Linux support -for DAC960 PCI RAID Controllers. To join the mailing list, send a message to -"dac960-announce-request@dandelion.com" with the line "subscribe" in the -message body. - - - CONTROLLER CONFIGURATION AND STATUS MONITORING - -The DAC960 RAID controllers running firmware 4.06 or above include a Background -Initialization facility so that system downtime is minimized both for initial -installation and subsequent configuration of additional storage. The BIOS -Configuration Utility (accessible via Alt-R during the BIOS initialization -sequence) is used to quickly configure the controller, and then the logical -drives that have been created are available for immediate use even while they -are still being initialized by the controller. The primary need for online -configuration and status monitoring is then to avoid system downtime when disk -drives fail and must be replaced. Mylex's online monitoring and configuration -utilities are being ported to Linux and will become available at some point in -the future. Note that with a SAF-TE (SCSI Accessed Fault-Tolerant Enclosure) -enclosure, the controller is able to rebuild failed drives automatically as -soon as a drive replacement is made available. - -The primary interfaces for controller configuration and status monitoring are -special files created in the /proc/rd/... hierarchy along with the normal -system console logging mechanism. Whenever the system is operating, the DAC960 -driver queries each controller for status information every 10 seconds, and -checks for additional conditions every 60 seconds. The initial status of each -controller is always available for controller N in /proc/rd/cN/initial_status, -and the current status as of the last status monitoring query is available in -/proc/rd/cN/current_status. In addition, status changes are also logged by the -driver to the system console and will appear in the log files maintained by -syslog. The progress of asynchronous rebuild or consistency check operations -is also available in /proc/rd/cN/current_status, and progress messages are -logged to the system console at most every 60 seconds. - -Starting with the 2.2.3/2.0.3 versions of the driver, the status information -available in /proc/rd/cN/initial_status and /proc/rd/cN/current_status has been -augmented to include the vendor, model, revision, and serial number (if -available) for each physical device found connected to the controller: - -***** DAC960 RAID Driver Version 2.2.3 of 19 August 1999 ***** -Copyright 1998-1999 by Leonard N. Zubkoff <lnz@dandelion.com> -Configuring Mylex DAC960PRL PCI RAID Controller - Firmware Version: 4.07-0-07, Channels: 1, Memory Size: 16MB - PCI Bus: 1, Device: 4, Function: 1, I/O Address: Unassigned - PCI Address: 0xFE300000 mapped at 0xA0800000, IRQ Channel: 21 - Controller Queue Depth: 128, Maximum Blocks per Command: 128 - Driver Queue Depth: 127, Maximum Scatter/Gather Segments: 33 - Stripe Size: 64KB, Segment Size: 8KB, BIOS Geometry: 255/63 - SAF-TE Enclosure Management Enabled - Physical Devices: - 0:0 Vendor: IBM Model: DRVS09D Revision: 0270 - Serial Number: 68016775HA - Disk Status: Online, 17928192 blocks - 0:1 Vendor: IBM Model: DRVS09D Revision: 0270 - Serial Number: 68004E53HA - Disk Status: Online, 17928192 blocks - 0:2 Vendor: IBM Model: DRVS09D Revision: 0270 - Serial Number: 13013935HA - Disk Status: Online, 17928192 blocks - 0:3 Vendor: IBM Model: DRVS09D Revision: 0270 - Serial Number: 13016897HA - Disk Status: Online, 17928192 blocks - 0:4 Vendor: IBM Model: DRVS09D Revision: 0270 - Serial Number: 68019905HA - Disk Status: Online, 17928192 blocks - 0:5 Vendor: IBM Model: DRVS09D Revision: 0270 - Serial Number: 68012753HA - Disk Status: Online, 17928192 blocks - 0:6 Vendor: ESG-SHV Model: SCA HSBP M6 Revision: 0.61 - Logical Drives: - /dev/rd/c0d0: RAID-5, Online, 89640960 blocks, Write Thru - No Rebuild or Consistency Check in Progress - -To simplify the monitoring process for custom software, the special file -/proc/rd/status returns "OK" when all DAC960 controllers in the system are -operating normally and no failures have occurred, or "ALERT" if any logical -drives are offline or critical or any non-standby physical drives are dead. - -Configuration commands for controller N are available via the special file -/proc/rd/cN/user_command. A human readable command can be written to this -special file to initiate a configuration operation, and the results of the -operation can then be read back from the special file in addition to being -logged to the system console. The shell command sequence - - echo "<configuration-command>" > /proc/rd/c0/user_command - cat /proc/rd/c0/user_command - -is typically used to execute configuration commands. The configuration -commands are: - - flush-cache - - The "flush-cache" command flushes the controller's cache. The system - automatically flushes the cache at shutdown or if the driver module is - unloaded, so this command is only needed to be certain a write back cache - is flushed to disk before the system is powered off by a command to a UPS. - Note that the flush-cache command also stops an asynchronous rebuild or - consistency check, so it should not be used except when the system is being - halted. - - kill <channel>:<target-id> - - The "kill" command marks the physical drive <channel>:<target-id> as DEAD. - This command is provided primarily for testing, and should not be used - during normal system operation. - - make-online <channel>:<target-id> - - The "make-online" command changes the physical drive <channel>:<target-id> - from status DEAD to status ONLINE. In cases where multiple physical drives - have been killed simultaneously, this command may be used to bring all but - one of them back online, after which a rebuild to the final drive is - necessary. - - Warning: make-online should only be used on a dead physical drive that is - an active part of a drive group, never on a standby drive. The command - should never be used on a dead drive that is part of a critical logical - drive; rebuild should be used if only a single drive is dead. - - make-standby <channel>:<target-id> - - The "make-standby" command changes physical drive <channel>:<target-id> - from status DEAD to status STANDBY. It should only be used in cases where - a dead drive was replaced after an automatic rebuild was performed onto a - standby drive. It cannot be used to add a standby drive to the controller - configuration if one was not created initially; the BIOS Configuration - Utility must be used for that currently. - - rebuild <channel>:<target-id> - - The "rebuild" command initiates an asynchronous rebuild onto physical drive - <channel>:<target-id>. It should only be used when a dead drive has been - replaced. - - check-consistency <logical-drive-number> - - The "check-consistency" command initiates an asynchronous consistency check - of <logical-drive-number> with automatic restoration. It can be used - whenever it is desired to verify the consistency of the redundancy - information. - - cancel-rebuild - cancel-consistency-check - - The "cancel-rebuild" and "cancel-consistency-check" commands cancel any - rebuild or consistency check operations previously initiated. - - - EXAMPLE I - DRIVE FAILURE WITHOUT A STANDBY DRIVE - -The following annotated logs demonstrate the controller configuration and and -online status monitoring capabilities of the Linux DAC960 Driver. The test -configuration comprises 6 1GB Quantum Atlas I disk drives on two channels of a -DAC960PJ controller. The physical drives are configured into a single drive -group without a standby drive, and the drive group has been configured into two -logical drives, one RAID-5 and one RAID-6. Note that these logs are from an -earlier version of the driver and the messages have changed somewhat with newer -releases, but the functionality remains similar. First, here is the current -status of the RAID configuration: - -gwynedd:/u/lnz# cat /proc/rd/c0/current_status -***** DAC960 RAID Driver Version 2.0.0 of 23 March 1999 ***** -Copyright 1998-1999 by Leonard N. Zubkoff <lnz@dandelion.com> -Configuring Mylex DAC960PJ PCI RAID Controller - Firmware Version: 4.06-0-08, Channels: 3, Memory Size: 8MB - PCI Bus: 0, Device: 19, Function: 1, I/O Address: Unassigned - PCI Address: 0xFD4FC000 mapped at 0x8807000, IRQ Channel: 9 - Controller Queue Depth: 128, Maximum Blocks per Command: 128 - Driver Queue Depth: 127, Maximum Scatter/Gather Segments: 33 - Stripe Size: 64KB, Segment Size: 8KB, BIOS Geometry: 255/63 - Physical Devices: - 0:1 - Disk: Online, 2201600 blocks - 0:2 - Disk: Online, 2201600 blocks - 0:3 - Disk: Online, 2201600 blocks - 1:1 - Disk: Online, 2201600 blocks - 1:2 - Disk: Online, 2201600 blocks - 1:3 - Disk: Online, 2201600 blocks - Logical Drives: - /dev/rd/c0d0: RAID-5, Online, 5498880 blocks, Write Thru - /dev/rd/c0d1: RAID-6, Online, 3305472 blocks, Write Thru - No Rebuild or Consistency Check in Progress - -gwynedd:/u/lnz# cat /proc/rd/status -OK - -The above messages indicate that everything is healthy, and /proc/rd/status -returns "OK" indicating that there are no problems with any DAC960 controller -in the system. For demonstration purposes, while I/O is active Physical Drive -1:1 is now disconnected, simulating a drive failure. The failure is noted by -the driver within 10 seconds of the controller's having detected it, and the -driver logs the following console status messages indicating that Logical -Drives 0 and 1 are now CRITICAL as a result of Physical Drive 1:1 being DEAD: - -DAC960#0: Physical Drive 1:2 Error Log: Sense Key = 6, ASC = 29, ASCQ = 02 -DAC960#0: Physical Drive 1:3 Error Log: Sense Key = 6, ASC = 29, ASCQ = 02 -DAC960#0: Physical Drive 1:1 killed because of timeout on SCSI command -DAC960#0: Physical Drive 1:1 is now DEAD -DAC960#0: Logical Drive 0 (/dev/rd/c0d0) is now CRITICAL -DAC960#0: Logical Drive 1 (/dev/rd/c0d1) is now CRITICAL - -The Sense Keys logged here are just Check Condition / Unit Attention conditions -arising from a SCSI bus reset that is forced by the controller during its error -recovery procedures. Concurrently with the above, the driver status available -from /proc/rd also reflects the drive failure. The status message in -/proc/rd/status has changed from "OK" to "ALERT": - -gwynedd:/u/lnz# cat /proc/rd/status -ALERT - -and /proc/rd/c0/current_status has been updated: - -gwynedd:/u/lnz# cat /proc/rd/c0/current_status - ... - Physical Devices: - 0:1 - Disk: Online, 2201600 blocks - 0:2 - Disk: Online, 2201600 blocks - 0:3 - Disk: Online, 2201600 blocks - 1:1 - Disk: Dead, 2201600 blocks - 1:2 - Disk: Online, 2201600 blocks - 1:3 - Disk: Online, 2201600 blocks - Logical Drives: - /dev/rd/c0d0: RAID-5, Critical, 5498880 blocks, Write Thru - /dev/rd/c0d1: RAID-6, Critical, 3305472 blocks, Write Thru - No Rebuild or Consistency Check in Progress - -Since there are no standby drives configured, the system can continue to access -the logical drives in a performance degraded mode until the failed drive is -replaced and a rebuild operation completed to restore the redundancy of the -logical drives. Once Physical Drive 1:1 is replaced with a properly -functioning drive, or if the physical drive was killed without having failed -(e.g., due to electrical problems on the SCSI bus), the user can instruct the -controller to initiate a rebuild operation onto the newly replaced drive: - -gwynedd:/u/lnz# echo "rebuild 1:1" > /proc/rd/c0/user_command -gwynedd:/u/lnz# cat /proc/rd/c0/user_command -Rebuild of Physical Drive 1:1 Initiated - -The echo command instructs the controller to initiate an asynchronous rebuild -operation onto Physical Drive 1:1, and the status message that results from the -operation is then available for reading from /proc/rd/c0/user_command, as well -as being logged to the console by the driver. - -Within 10 seconds of this command the driver logs the initiation of the -asynchronous rebuild operation: - -DAC960#0: Rebuild of Physical Drive 1:1 Initiated -DAC960#0: Physical Drive 1:1 Error Log: Sense Key = 6, ASC = 29, ASCQ = 01 -DAC960#0: Physical Drive 1:1 is now WRITE-ONLY -DAC960#0: Rebuild in Progress: Logical Drive 0 (/dev/rd/c0d0) 1% completed - -and /proc/rd/c0/current_status is updated: - -gwynedd:/u/lnz# cat /proc/rd/c0/current_status - ... - Physical Devices: - 0:1 - Disk: Online, 2201600 blocks - 0:2 - Disk: Online, 2201600 blocks - 0:3 - Disk: Online, 2201600 blocks - 1:1 - Disk: Write-Only, 2201600 blocks - 1:2 - Disk: Online, 2201600 blocks - 1:3 - Disk: Online, 2201600 blocks - Logical Drives: - /dev/rd/c0d0: RAID-5, Critical, 5498880 blocks, Write Thru - /dev/rd/c0d1: RAID-6, Critical, 3305472 blocks, Write Thru - Rebuild in Progress: Logical Drive 0 (/dev/rd/c0d0) 6% completed - -As the rebuild progresses, the current status in /proc/rd/c0/current_status is -updated every 10 seconds: - -gwynedd:/u/lnz# cat /proc/rd/c0/current_status - ... - Physical Devices: - 0:1 - Disk: Online, 2201600 blocks - 0:2 - Disk: Online, 2201600 blocks - 0:3 - Disk: Online, 2201600 blocks - 1:1 - Disk: Write-Only, 2201600 blocks - 1:2 - Disk: Online, 2201600 blocks - 1:3 - Disk: Online, 2201600 blocks - Logical Drives: - /dev/rd/c0d0: RAID-5, Critical, 5498880 blocks, Write Thru - /dev/rd/c0d1: RAID-6, Critical, 3305472 blocks, Write Thru - Rebuild in Progress: Logical Drive 0 (/dev/rd/c0d0) 15% completed - -and every minute a progress message is logged to the console by the driver: - -DAC960#0: Rebuild in Progress: Logical Drive 0 (/dev/rd/c0d0) 32% completed -DAC960#0: Rebuild in Progress: Logical Drive 0 (/dev/rd/c0d0) 63% completed -DAC960#0: Rebuild in Progress: Logical Drive 0 (/dev/rd/c0d0) 94% completed -DAC960#0: Rebuild in Progress: Logical Drive 1 (/dev/rd/c0d1) 94% completed - -Finally, the rebuild completes successfully. The driver logs the status of the -logical and physical drives and the rebuild completion: - -DAC960#0: Rebuild Completed Successfully -DAC960#0: Physical Drive 1:1 is now ONLINE -DAC960#0: Logical Drive 0 (/dev/rd/c0d0) is now ONLINE -DAC960#0: Logical Drive 1 (/dev/rd/c0d1) is now ONLINE - -/proc/rd/c0/current_status is updated: - -gwynedd:/u/lnz# cat /proc/rd/c0/current_status - ... - Physical Devices: - 0:1 - Disk: Online, 2201600 blocks - 0:2 - Disk: Online, 2201600 blocks - 0:3 - Disk: Online, 2201600 blocks - 1:1 - Disk: Online, 2201600 blocks - 1:2 - Disk: Online, 2201600 blocks - 1:3 - Disk: Online, 2201600 blocks - Logical Drives: - /dev/rd/c0d0: RAID-5, Online, 5498880 blocks, Write Thru - /dev/rd/c0d1: RAID-6, Online, 3305472 blocks, Write Thru - Rebuild Completed Successfully - -and /proc/rd/status indicates that everything is healthy once again: - -gwynedd:/u/lnz# cat /proc/rd/status -OK - - - EXAMPLE II - DRIVE FAILURE WITH A STANDBY DRIVE - -The following annotated logs demonstrate the controller configuration and and -online status monitoring capabilities of the Linux DAC960 Driver. The test -configuration comprises 6 1GB Quantum Atlas I disk drives on two channels of a -DAC960PJ controller. The physical drives are configured into a single drive -group with a standby drive, and the drive group has been configured into two -logical drives, one RAID-5 and one RAID-6. Note that these logs are from an -earlier version of the driver and the messages have changed somewhat with newer -releases, but the functionality remains similar. First, here is the current -status of the RAID configuration: - -gwynedd:/u/lnz# cat /proc/rd/c0/current_status -***** DAC960 RAID Driver Version 2.0.0 of 23 March 1999 ***** -Copyright 1998-1999 by Leonard N. Zubkoff <lnz@dandelion.com> -Configuring Mylex DAC960PJ PCI RAID Controller - Firmware Version: 4.06-0-08, Channels: 3, Memory Size: 8MB - PCI Bus: 0, Device: 19, Function: 1, I/O Address: Unassigned - PCI Address: 0xFD4FC000 mapped at 0x8807000, IRQ Channel: 9 - Controller Queue Depth: 128, Maximum Blocks per Command: 128 - Driver Queue Depth: 127, Maximum Scatter/Gather Segments: 33 - Stripe Size: 64KB, Segment Size: 8KB, BIOS Geometry: 255/63 - Physical Devices: - 0:1 - Disk: Online, 2201600 blocks - 0:2 - Disk: Online, 2201600 blocks - 0:3 - Disk: Online, 2201600 blocks - 1:1 - Disk: Online, 2201600 blocks - 1:2 - Disk: Online, 2201600 blocks - 1:3 - Disk: Standby, 2201600 blocks - Logical Drives: - /dev/rd/c0d0: RAID-5, Online, 4399104 blocks, Write Thru - /dev/rd/c0d1: RAID-6, Online, 2754560 blocks, Write Thru - No Rebuild or Consistency Check in Progress - -gwynedd:/u/lnz# cat /proc/rd/status -OK - -The above messages indicate that everything is healthy, and /proc/rd/status -returns "OK" indicating that there are no problems with any DAC960 controller -in the system. For demonstration purposes, while I/O is active Physical Drive -1:2 is now disconnected, simulating a drive failure. The failure is noted by -the driver within 10 seconds of the controller's having detected it, and the -driver logs the following console status messages: - -DAC960#0: Physical Drive 1:1 Error Log: Sense Key = 6, ASC = 29, ASCQ = 02 -DAC960#0: Physical Drive 1:3 Error Log: Sense Key = 6, ASC = 29, ASCQ = 02 -DAC960#0: Physical Drive 1:2 killed because of timeout on SCSI command -DAC960#0: Physical Drive 1:2 is now DEAD -DAC960#0: Physical Drive 1:2 killed because it was removed -DAC960#0: Logical Drive 0 (/dev/rd/c0d0) is now CRITICAL -DAC960#0: Logical Drive 1 (/dev/rd/c0d1) is now CRITICAL - -Since a standby drive is configured, the controller automatically begins -rebuilding onto the standby drive: - -DAC960#0: Physical Drive 1:3 is now WRITE-ONLY -DAC960#0: Rebuild in Progress: Logical Drive 0 (/dev/rd/c0d0) 4% completed - -Concurrently with the above, the driver status available from /proc/rd also -reflects the drive failure and automatic rebuild. The status message in -/proc/rd/status has changed from "OK" to "ALERT": - -gwynedd:/u/lnz# cat /proc/rd/status -ALERT - -and /proc/rd/c0/current_status has been updated: - -gwynedd:/u/lnz# cat /proc/rd/c0/current_status - ... - Physical Devices: - 0:1 - Disk: Online, 2201600 blocks - 0:2 - Disk: Online, 2201600 blocks - 0:3 - Disk: Online, 2201600 blocks - 1:1 - Disk: Online, 2201600 blocks - 1:2 - Disk: Dead, 2201600 blocks - 1:3 - Disk: Write-Only, 2201600 blocks - Logical Drives: - /dev/rd/c0d0: RAID-5, Critical, 4399104 blocks, Write Thru - /dev/rd/c0d1: RAID-6, Critical, 2754560 blocks, Write Thru - Rebuild in Progress: Logical Drive 0 (/dev/rd/c0d0) 4% completed - -As the rebuild progresses, the current status in /proc/rd/c0/current_status is -updated every 10 seconds: - -gwynedd:/u/lnz# cat /proc/rd/c0/current_status - ... - Physical Devices: - 0:1 - Disk: Online, 2201600 blocks - 0:2 - Disk: Online, 2201600 blocks - 0:3 - Disk: Online, 2201600 blocks - 1:1 - Disk: Online, 2201600 blocks - 1:2 - Disk: Dead, 2201600 blocks - 1:3 - Disk: Write-Only, 2201600 blocks - Logical Drives: - /dev/rd/c0d0: RAID-5, Critical, 4399104 blocks, Write Thru - /dev/rd/c0d1: RAID-6, Critical, 2754560 blocks, Write Thru - Rebuild in Progress: Logical Drive 0 (/dev/rd/c0d0) 40% completed - -and every minute a progress message is logged on the console by the driver: - -DAC960#0: Rebuild in Progress: Logical Drive 0 (/dev/rd/c0d0) 40% completed -DAC960#0: Rebuild in Progress: Logical Drive 0 (/dev/rd/c0d0) 76% completed -DAC960#0: Rebuild in Progress: Logical Drive 1 (/dev/rd/c0d1) 66% completed -DAC960#0: Rebuild in Progress: Logical Drive 1 (/dev/rd/c0d1) 84% completed - -Finally, the rebuild completes successfully. The driver logs the status of the -logical and physical drives and the rebuild completion: - -DAC960#0: Rebuild Completed Successfully -DAC960#0: Physical Drive 1:3 is now ONLINE -DAC960#0: Logical Drive 0 (/dev/rd/c0d0) is now ONLINE -DAC960#0: Logical Drive 1 (/dev/rd/c0d1) is now ONLINE - -/proc/rd/c0/current_status is updated: - -***** DAC960 RAID Driver Version 2.0.0 of 23 March 1999 ***** -Copyright 1998-1999 by Leonard N. Zubkoff <lnz@dandelion.com> -Configuring Mylex DAC960PJ PCI RAID Controller - Firmware Version: 4.06-0-08, Channels: 3, Memory Size: 8MB - PCI Bus: 0, Device: 19, Function: 1, I/O Address: Unassigned - PCI Address: 0xFD4FC000 mapped at 0x8807000, IRQ Channel: 9 - Controller Queue Depth: 128, Maximum Blocks per Command: 128 - Driver Queue Depth: 127, Maximum Scatter/Gather Segments: 33 - Stripe Size: 64KB, Segment Size: 8KB, BIOS Geometry: 255/63 - Physical Devices: - 0:1 - Disk: Online, 2201600 blocks - 0:2 - Disk: Online, 2201600 blocks - 0:3 - Disk: Online, 2201600 blocks - 1:1 - Disk: Online, 2201600 blocks - 1:2 - Disk: Dead, 2201600 blocks - 1:3 - Disk: Online, 2201600 blocks - Logical Drives: - /dev/rd/c0d0: RAID-5, Online, 4399104 blocks, Write Thru - /dev/rd/c0d1: RAID-6, Online, 2754560 blocks, Write Thru - Rebuild Completed Successfully - -and /proc/rd/status indicates that everything is healthy once again: - -gwynedd:/u/lnz# cat /proc/rd/status -OK - -Note that the absence of a viable standby drive does not create an "ALERT" -status. Once dead Physical Drive 1:2 has been replaced, the controller must be -told that this has occurred and that the newly replaced drive should become the -new standby drive: - -gwynedd:/u/lnz# echo "make-standby 1:2" > /proc/rd/c0/user_command -gwynedd:/u/lnz# cat /proc/rd/c0/user_command -Make Standby of Physical Drive 1:2 Succeeded - -The echo command instructs the controller to make Physical Drive 1:2 into a -standby drive, and the status message that results from the operation is then -available for reading from /proc/rd/c0/user_command, as well as being logged to -the console by the driver. Within 60 seconds of this command the driver logs: - -DAC960#0: Physical Drive 1:2 Error Log: Sense Key = 6, ASC = 29, ASCQ = 01 -DAC960#0: Physical Drive 1:2 is now STANDBY -DAC960#0: Make Standby of Physical Drive 1:2 Succeeded - -and /proc/rd/c0/current_status is updated: - -gwynedd:/u/lnz# cat /proc/rd/c0/current_status - ... - Physical Devices: - 0:1 - Disk: Online, 2201600 blocks - 0:2 - Disk: Online, 2201600 blocks - 0:3 - Disk: Online, 2201600 blocks - 1:1 - Disk: Online, 2201600 blocks - 1:2 - Disk: Standby, 2201600 blocks - 1:3 - Disk: Online, 2201600 blocks - Logical Drives: - /dev/rd/c0d0: RAID-5, Online, 4399104 blocks, Write Thru - /dev/rd/c0d1: RAID-6, Online, 2754560 blocks, Write Thru - Rebuild Completed Successfully diff --git a/Documentation/blockdev/zram.txt b/Documentation/blockdev/zram.txt index 875b2b56b87f..3c1b5ab54bc0 100644 --- a/Documentation/blockdev/zram.txt +++ b/Documentation/blockdev/zram.txt @@ -190,7 +190,7 @@ whitespace: notify_free Depending on device usage scenario it may account a) the number of pages freed because of swap slot free notifications or b) the number of pages freed because of - REQ_DISCARD requests sent by bio. The former ones are + REQ_OP_DISCARD requests sent by bio. The former ones are sent to a swap block device when a swap slot is freed, which implies that this disk is being used as a swap disk. The latter ones are sent by filesystem mounted with diff --git a/Documentation/core-api/idr.rst b/Documentation/core-api/idr.rst index d351e880a2f6..a2738050c4f0 100644 --- a/Documentation/core-api/idr.rst +++ b/Documentation/core-api/idr.rst @@ -1,4 +1,4 @@ -.. SPDX-License-Identifier: CC-BY-SA-4.0 +.. SPDX-License-Identifier: GPL-2.0+ ============= ID Allocation diff --git a/Documentation/device-mapper/log-writes.txt b/Documentation/device-mapper/log-writes.txt index f4ebcbaf50f3..b638d124be6a 100644 --- a/Documentation/device-mapper/log-writes.txt +++ b/Documentation/device-mapper/log-writes.txt @@ -38,7 +38,7 @@ inconsistent file system. Any REQ_FUA requests bypass this flushing mechanism and are logged as soon as they complete as those requests will obviously bypass the device cache. -Any REQ_DISCARD requests are treated like WRITE requests. Otherwise we would +Any REQ_OP_DISCARD requests are treated like WRITE requests. Otherwise we would have all the DISCARD requests, and then the WRITE requests and then the FLUSH request. Consider the following example: diff --git a/Documentation/devicetree/bindings/ata/ahci-platform.txt b/Documentation/devicetree/bindings/ata/ahci-platform.txt index 5d5bd456d9d9..e30fd106df4f 100644 --- a/Documentation/devicetree/bindings/ata/ahci-platform.txt +++ b/Documentation/devicetree/bindings/ata/ahci-platform.txt @@ -10,6 +10,7 @@ PHYs. Required properties: - compatible : compatible string, one of: - "allwinner,sun4i-a10-ahci" + - "allwinner,sun8i-r40-ahci" - "brcm,iproc-ahci" - "hisilicon,hisi-ahci" - "cavium,octeon-7130-ahci" @@ -31,8 +32,10 @@ Optional properties: - clocks : a list of phandle + clock specifier pairs - resets : a list of phandle + reset specifier pairs - target-supply : regulator for SATA target power +- phy-supply : regulator for PHY power - phys : reference to the SATA PHY node - phy-names : must be "sata-phy" +- ahci-supply : regulator for AHCI controller - ports-implemented : Mask that indicates which ports that the HBA supports are available for software to use. Useful if PORTS_IMPL is not programmed by the BIOS, which is true with @@ -42,12 +45,13 @@ Required properties when using sub-nodes: - #address-cells : number of cells to encode an address - #size-cells : number of cells representing the size of an address +For allwinner,sun8i-r40-ahci, the reset propertie must be present. Sub-nodes required properties: - reg : the port number And at least one of the following properties: - phys : reference to the SATA PHY node -- target-supply : regulator for SATA target power +- target-supply : regulator for SATA target power Examples: sata@ffe08000 { diff --git a/Documentation/devicetree/bindings/ata/brcm,sata-brcm.txt b/Documentation/devicetree/bindings/ata/brcm,sata-brcm.txt index 0a5b3b47f217..7713a413c6a7 100644 --- a/Documentation/devicetree/bindings/ata/brcm,sata-brcm.txt +++ b/Documentation/devicetree/bindings/ata/brcm,sata-brcm.txt @@ -9,6 +9,7 @@ Required properties: "brcm,bcm7445-ahci" "brcm,bcm-nsp-ahci" "brcm,sata3-ahci" + "brcm,bcm63138-ahci" - reg : register mappings for AHCI and SATA_TOP_CTRL - reg-names : "ahci" and "top-ctrl" - interrupts : interrupt mapping for SATA IRQ diff --git a/Documentation/process/code-of-conduct-interpretation.rst b/Documentation/process/code-of-conduct-interpretation.rst new file mode 100644 index 000000000000..e899f14a4ba2 --- /dev/null +++ b/Documentation/process/code-of-conduct-interpretation.rst @@ -0,0 +1,156 @@ +.. _code_of_conduct_interpretation: + +Linux Kernel Contributor Covenant Code of Conduct Interpretation +================================================================ + +The :ref:`code_of_conduct` is a general document meant to +provide a set of rules for almost any open source community. Every +open-source community is unique and the Linux kernel is no exception. +Because of this, this document describes how we in the Linux kernel +community will interpret it. We also do not expect this interpretation +to be static over time, and will adjust it as needed. + +The Linux kernel development effort is a very personal process compared +to "traditional" ways of developing software. Your contributions and +ideas behind them will be carefully reviewed, often resulting in +critique and criticism. The review will almost always require +improvements before the material can be included in the +kernel. Know that this happens because everyone involved wants to see +the best possible solution for the overall success of Linux. This +development process has been proven to create the most robust operating +system kernel ever, and we do not want to do anything to cause the +quality of submission and eventual result to ever decrease. + +Maintainers +----------- + +The Code of Conduct uses the term "maintainers" numerous times. In the +kernel community, a "maintainer" is anyone who is responsible for a +subsystem, driver, or file, and is listed in the MAINTAINERS file in the +kernel source tree. + +Responsibilities +---------------- + +The Code of Conduct mentions rights and responsibilities for +maintainers, and this needs some further clarifications. + +First and foremost, it is a reasonable expectation to have maintainers +lead by example. + +That being said, our community is vast and broad, and there is no new +requirement for maintainers to unilaterally handle how other people +behave in the parts of the community where they are active. That +responsibility is upon all of us, and ultimately the Code of Conduct +documents final escalation paths in case of unresolved concerns +regarding conduct issues. + +Maintainers should be willing to help when problems occur, and work with +others in the community when needed. Do not be afraid to reach out to +the Technical Advisory Board (TAB) or other maintainers if you're +uncertain how to handle situations that come up. It will not be +considered a violation report unless you want it to be. If you are +uncertain about approaching the TAB or any other maintainers, please +reach out to our conflict mediator, Mishi Choudhary <mishi@linux.com>. + +In the end, "be kind to each other" is really what the end goal is for +everybody. We know everyone is human and we all fail at times, but the +primary goal for all of us should be to work toward amicable resolutions +of problems. Enforcement of the code of conduct will only be a last +resort option. + +Our goal of creating a robust and technically advanced operating system +and the technical complexity involved naturally require expertise and +decision-making. + +The required expertise varies depending on the area of contribution. It +is determined mainly by context and technical complexity and only +secondary by the expectations of contributors and maintainers. + +Both the expertise expectations and decision-making are subject to +discussion, but at the very end there is a basic necessity to be able to +make decisions in order to make progress. This prerogative is in the +hands of maintainers and project's leadership and is expected to be used +in good faith. + +As a consequence, setting expertise expectations, making decisions and +rejecting unsuitable contributions are not viewed as a violation of the +Code of Conduct. + +While maintainers are in general welcoming to newcomers, their capacity +of helping contributors overcome the entry hurdles is limited, so they +have to set priorities. This, also, is not to be seen as a violation of +the Code of Conduct. The kernel community is aware of that and provides +entry level programs in various forms like kernelnewbies.org. + +Scope +----- + +The Linux kernel community primarily interacts on a set of public email +lists distributed around a number of different servers controlled by a +number of different companies or individuals. All of these lists are +defined in the MAINTAINERS file in the kernel source tree. Any emails +sent to those mailing lists are considered covered by the Code of +Conduct. + +Developers who use the kernel.org bugzilla, and other subsystem bugzilla +or bug tracking tools should follow the guidelines of the Code of +Conduct. The Linux kernel community does not have an "official" project +email address, or "official" social media address. Any activity +performed using a kernel.org email account must follow the Code of +Conduct as published for kernel.org, just as any individual using a +corporate email account must follow the specific rules of that +corporation. + +The Code of Conduct does not prohibit continuing to include names, email +addresses, and associated comments in mailing list messages, kernel +change log messages, or code comments. + +Interaction in other forums is covered by whatever rules apply to said +forums and is in general not covered by the Code of Conduct. Exceptions +may be considered for extreme circumstances. + +Contributions submitted for the kernel should use appropriate language. +Content that already exists predating the Code of Conduct will not be +addressed now as a violation. Inappropriate language can be seen as a +bug, though; such bugs will be fixed more quickly if any interested +parties submit patches to that effect. Expressions that are currently +part of the user/kernel API, or reflect terminology used in published +standards or specifications, are not considered bugs. + +Enforcement +----------- + +The address listed in the Code of Conduct goes to the Code of Conduct +Committee. The exact members receiving these emails at any given time +are listed at https://kernel.org/code-of-conduct.html. Members can not +access reports made before they joined or after they have left the +committee. + +The initial Code of Conduct Committee consists of volunteer members of +the TAB, as well as a professional mediator acting as a neutral third +party. The first task of the committee is to establish documented +processes, which will be made public. + +Any member of the committee, including the mediator, can be contacted +directly if a reporter does not wish to include the full committee in a +complaint or concern. + +The Code of Conduct Committee reviews the cases according to the +processes (see above) and consults with the TAB as needed and +appropriate, for instance to request and receive information about the +kernel community. + +Any decisions by the committee will be brought to the TAB, for +implementation of enforcement with the relevant maintainers if needed. +A decision by the Code of Conduct Committee can be overturned by the TAB +by a two-thirds vote. + +At quarterly intervals, the Code of Conduct Committee and TAB will +provide a report summarizing the anonymised reports that the Code of +Conduct committee has received and their status, as well details of any +overridden decisions including complete and identifiable voting details. + +We expect to establish a different process for Code of Conduct Committee +staffing beyond the bootstrap period. This document will be updated +with that information when this occurs. diff --git a/Documentation/process/code-of-conduct.rst b/Documentation/process/code-of-conduct.rst index ab7c24b5478c..be50294aebd5 100644 --- a/Documentation/process/code-of-conduct.rst +++ b/Documentation/process/code-of-conduct.rst @@ -1,3 +1,5 @@ +.. _code_of_conduct: + Contributor Covenant Code of Conduct ++++++++++++++++++++++++++++++++++++ @@ -63,19 +65,22 @@ Enforcement =========== Instances of abusive, harassing, or otherwise unacceptable behavior may be -reported by contacting the Technical Advisory Board (TAB) at -<tab@lists.linux-foundation.org>. All complaints will be reviewed and -investigated and will result in a response that is deemed necessary and -appropriate to the circumstances. The TAB is obligated to maintain -confidentiality with regard to the reporter of an incident. Further details of -specific enforcement policies may be posted separately. - -Maintainers who do not follow or enforce the Code of Conduct in good faith may -face temporary or permanent repercussions as determined by other members of the -project’s leadership. +reported by contacting the Code of Conduct Committee at +<conduct@kernel.org>. All complaints will be reviewed and investigated +and will result in a response that is deemed necessary and appropriate +to the circumstances. The Code of Conduct Committee is obligated to +maintain confidentiality with regard to the reporter of an incident. +Further details of specific enforcement policies may be posted +separately. Attribution =========== This Code of Conduct is adapted from the Contributor Covenant, version 1.4, available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html + +Interpretation +============== + +See the :ref:`code_of_conduct_interpretation` document for how the Linux +kernel community will be interpreting this document. diff --git a/Documentation/process/index.rst b/Documentation/process/index.rst index 9ae3e317bddf..42691e2880eb 100644 --- a/Documentation/process/index.rst +++ b/Documentation/process/index.rst @@ -21,6 +21,7 @@ Below are the essential guides that every developer should read. howto code-of-conduct + code-of-conduct-interpretation development-process submitting-patches coding-style diff --git a/LICENSES/other/CC-BY-SA-4.0 b/LICENSES/other/CC-BY-SA-4.0 deleted file mode 100644 index f9158e831e79..000000000000 --- a/LICENSES/other/CC-BY-SA-4.0 +++ /dev/null @@ -1,397 +0,0 @@ -Valid-License-Identifier: CC-BY-SA-4.0 -SPDX-URL: https://spdx.org/licenses/CC-BY-SA-4.0 -Usage-Guide: - To use the Creative Commons Attribution Share Alike 4.0 International - license put the following SPDX tag/value pair into a comment according to - the placement guidelines in the licensing rules documentation: - SPDX-License-Identifier: CC-BY-SA-4.0 -License-Text: - -Creative Commons Attribution-ShareAlike 4.0 International - -Creative Commons Corporation ("Creative Commons") is not a law firm and -does not provide legal services or legal advice. Distribution of Creative -Commons public licenses does not create a lawyer-client or other -relationship. Creative Commons makes its licenses and related information -available on an "as-is" basis. Creative Commons gives no warranties -regarding its licenses, any material licensed under their terms and -conditions, or any related information. Creative Commons disclaims all -liability for damages resulting from their use to the fullest extent -possible. - -Using Creative Commons Public Licenses - -Creative Commons public licenses provide a standard set of terms and -conditions that creators and other rights holders may use to share original -works of authorship and other material subject to copyright and certain -other rights specified in the public license below. The following -considerations are for informational purposes only, are not exhaustive, and -do not form part of our licenses. - -Considerations for licensors: Our public licenses are intended for use by -those authorized to give the public permission to use material in ways -otherwise restricted by copyright and certain other rights. Our licenses -are irrevocable. Licensors should read and understand the terms and -conditions of the license they choose before applying it. Licensors should -also secure all rights necessary before applying our licenses so that the -public can reuse the material as expected. Licensors should clearly mark -any material not subject to the license. This includes other CC-licensed -material, or material used under an exception or limitation to -copyright. More considerations for licensors : -wiki.creativecommons.org/Considerations_for_licensors - -Considerations for the public: By using one of our public licenses, a -licensor grants the public permission to use the licensed material under -specified terms and conditions. If the licensor's permission is not -necessary for any reason - for example, because of any applicable exception -or limitation to copyright - then that use is not regulated by the -license. Our licenses grant only permissions under copyright and certain -other rights that a licensor has authority to grant. Use of the licensed -material may still be restricted for other reasons, including because -others have copyright or other rights in the material. A licensor may make -special requests, such as asking that all changes be marked or described. - -Although not required by our licenses, you are encouraged to respect those -requests where reasonable. More considerations for the public : -wiki.creativecommons.org/Considerations_for_licensees - -Creative Commons Attribution-ShareAlike 4.0 International Public License - -By exercising the Licensed Rights (defined below), You accept and agree to -be bound by the terms and conditions of this Creative Commons -Attribution-ShareAlike 4.0 International Public License ("Public -License"). To the extent this Public License may be interpreted as a -contract, You are granted the Licensed Rights in consideration of Your -acceptance of these terms and conditions, and the Licensor grants You such -rights in consideration of benefits the Licensor receives from making the -Licensed Material available under these terms and conditions. - -Section 1 - Definitions. - - a. Adapted Material means material subject to Copyright and Similar - Rights that is derived from or based upon the Licensed Material and - in which the Licensed Material is translated, altered, arranged, - transformed, or otherwise modified in a manner requiring permission - under the Copyright and Similar Rights held by the Licensor. For - purposes of this Public License, where the Licensed Material is a - musical work, performance, or sound recording, Adapted Material is - always produced where the Licensed Material is synched in timed - relation with a moving image. - - b. Adapter's License means the license You apply to Your Copyright and - Similar Rights in Your contributions to Adapted Material in - accordance with the terms and conditions of this Public License. - - c. BY-SA Compatible License means a license listed at - creativecommons.org/compatiblelicenses, approved by Creative Commons - as essentially the equivalent of this Public License. - - d. Copyright and Similar Rights means copyright and/or similar rights - closely related to copyright including, without limitation, - performance, broadcast, sound recording, and Sui Generis Database - Rights, without regard to how the rights are labeled or - categorized. For purposes of this Public License, the rights - specified in Section 2(b)(1)-(2) are not Copyright and Similar - Rights. - - e. Effective Technological Measures means those measures that, in the - absence of proper authority, may not be circumvented under laws - fulfilling obligations under Article 11 of the WIPO Copyright Treaty - adopted on December 20, 1996, and/or similar international - agreements. - - f. Exceptions and Limitations means fair use, fair dealing, and/or any - other exception or limitation to Copyright and Similar Rights that - applies to Your use of the Licensed Material. - - g. License Elements means the license attributes listed in the name of - a Creative Commons Public License. The License Elements of this - Public License are Attribution and ShareAlike. - - h. Licensed Material means the artistic or literary work, database, or - other material to which the Licensor applied this Public License. - - i. Licensed Rights means the rights granted to You subject to the terms - and conditions of this Public License, which are limited to all - Copyright and Similar Rights that apply to Your use of the Licensed - Material and that the Licensor has authority to license. - - j. Licensor means the individual(s) or entity(ies) granting rights - under this Public License. - - k. Share means to provide material to the public by any means or - process that requires permission under the Licensed Rights, such as - reproduction, public display, public performance, distribution, - dissemination, communication, or importation, and to make material - available to the public including in ways that members of the public - may access the material from a place and at a time individually - chosen by them. - - l. Sui Generis Database Rights means rights other than copyright - resulting from Directive 96/9/EC of the European Parliament and of - the Council of 11 March 1996 on the legal protection of databases, - as amended and/or succeeded, as well as other essentially equivalent - rights anywhere in the world. m. You means the individual or entity - exercising the Licensed Rights under this Public License. Your has a - corresponding meaning. - -Section 2 - Scope. - - a. License grant. - - 1. Subject to the terms and conditions of this Public License, the - Licensor hereby grants You a worldwide, royalty-free, - non-sublicensable, non-exclusive, irrevocable license to - exercise the Licensed Rights in the Licensed Material to: - - A. reproduce and Share the Licensed Material, in whole or in part; and - - B. produce, reproduce, and Share Adapted Material. - - 2. Exceptions and Limitations. For the avoidance of doubt, where - Exceptions and Limitations apply to Your use, this Public - License does not apply, and You do not need to comply with its - terms and conditions. - - 3. Term. The term of this Public License is specified in Section 6(a). - - 4. Media and formats; technical modifications allowed. The Licensor - authorizes You to exercise the Licensed Rights in all media and - formats whether now known or hereafter created, and to make - technical modifications necessary to do so. The Licensor waives - and/or agrees not to assert any right or authority to forbid You - from making technical modifications necessary to exercise the - Licensed Rights, including technical modifications necessary to - circumvent Effective Technological Measures. For purposes of - this Public License, simply making modifications authorized by - this Section 2(a)(4) never produces Adapted Material. - - 5. Downstream recipients. - - A. Offer from the Licensor - Licensed Material. Every recipient - of the Licensed Material automatically receives an offer - from the Licensor to exercise the Licensed Rights under the - terms and conditions of this Public License. - - B. Additional offer from the Licensor - Adapted Material. Every - recipient of Adapted Material from You automatically - receives an offer from the Licensor to exercise the Licensed - Rights in the Adapted Material under the conditions of the - Adapter's License You apply. - - C. No downstream restrictions. You may not offer or impose any - additional or different terms or conditions on, or apply any - Effective Technological Measures to, the Licensed Material - if doing so restricts exercise of the Licensed Rights by any - recipient of the Licensed Material. - - 6. No endorsement. Nothing in this Public License constitutes or - may be construed as permission to assert or imply that You are, - or that Your use of the Licensed Material is, connected with, or - sponsored, endorsed, or granted official status by, the Licensor - or others designated to receive attribution as provided in - Section 3(a)(1)(A)(i). - - b. Other rights. - - 1. Moral rights, such as the right of integrity, are not licensed - under this Public License, nor are publicity, privacy, and/or - other similar personality rights; however, to the extent - possible, the Licensor waives and/or agrees not to assert any - such rights held by the Licensor to the limited extent necessary - to allow You to exercise the Licensed Rights, but not otherwise. - - 2. Patent and trademark rights are not licensed under this Public - License. - - 3. To the extent possible, the Licensor waives any right to collect - royalties from You for the exercise of the Licensed Rights, - whether directly or through a collecting society under any - voluntary or waivable statutory or compulsory licensing - scheme. In all other cases the Licensor expressly reserves any - right to collect such royalties. - -Section 3 - License Conditions. - -Your exercise of the Licensed Rights is expressly made subject to the -following conditions. - - a. Attribution. - - 1. If You Share the Licensed Material (including in modified form), - You must: - - A. retain the following if it is supplied by the Licensor with - the Licensed Material: - - i. identification of the creator(s) of the Licensed - Material and any others designated to receive - attribution, in any reasonable manner requested by the - Licensor (including by pseudonym if designated); - - ii. a copyright notice; - - iii. a notice that refers to this Public License; - - iv. a notice that refers to the disclaimer of warranties; - - v. a URI or hyperlink to the Licensed Material to the extent reasonably practicable; - - B. indicate if You modified the Licensed Material and retain an - indication of any previous modifications; and - - C. indicate the Licensed Material is licensed under this Public - License, and include the text of, or the URI or hyperlink to, - this Public License. - - 2. You may satisfy the conditions in Section 3(a)(1) in any - reasonable manner based on the medium, means, and context in - which You Share the Licensed Material. For example, it may be - reasonable to satisfy the conditions by providing a URI or - hyperlink to a resource that includes the required information. - - 3. If requested by the Licensor, You must remove any of the - information required by Section 3(a)(1)(A) to the extent - reasonably practicable. b. ShareAlike.In addition to the - conditions in Section 3(a), if You Share Adapted Material You - produce, the following conditions also apply. - - 1. The Adapter's License You apply must be a Creative Commons - license with the same License Elements, this version or - later, or a BY-SA Compatible License. - - 2. You must include the text of, or the URI or hyperlink to, the - Adapter's License You apply. You may satisfy this condition - in any reasonable manner based on the medium, means, and - context in which You Share Adapted Material. - - 3. You may not offer or impose any additional or different terms - or conditions on, or apply any Effective Technological - Measures to, Adapted Material that restrict exercise of the - rights granted under the Adapter's License You apply. - -Section 4 - Sui Generis Database Rights. - -Where the Licensed Rights include Sui Generis Database Rights that apply to -Your use of the Licensed Material: - - a. for the avoidance of doubt, Section 2(a)(1) grants You the right to - extract, reuse, reproduce, and Share all or a substantial portion of - the contents of the database; - - b. if You include all or a substantial portion of the database contents - in a database in which You have Sui Generis Database Rights, then - the database in which You have Sui Generis Database Rights (but not - its individual contents) is Adapted Material, including for purposes - of Section 3(b); and - - c. You must comply with the conditions in Section 3(a) if You Share all - or a substantial portion of the contents of the database. - - For the avoidance of doubt, this Section 4 supplements and does not - replace Your obligations under this Public License where the Licensed - Rights include other Copyright and Similar Rights. - -Section 5 - Disclaimer of Warranties and Limitation of Liability. - - a. Unless otherwise separately undertaken by the Licensor, to the - extent possible, the Licensor offers the Licensed Material as-is and - as-available, and makes no representations or warranties of any kind - concerning the Licensed Material, whether express, implied, - statutory, or other. This includes, without limitation, warranties - of title, merchantability, fitness for a particular purpose, - non-infringement, absence of latent or other defects, accuracy, or - the presence or absence of errors, whether or not known or - discoverable. Where disclaimers of warranties are not allowed in - full or in part, this disclaimer may not apply to You. - - b. To the extent possible, in no event will the Licensor be liable to - You on any legal theory (including, without limitation, negligence) - or otherwise for any direct, special, indirect, incidental, - consequential, punitive, exemplary, or other losses, costs, - expenses, or damages arising out of this Public License or use of - the Licensed Material, even if the Licensor has been advised of the - possibility of such losses, costs, expenses, or damages. Where a - limitation of liability is not allowed in full or in part, this - limitation may not apply to You. - - c. The disclaimer of warranties and limitation of liability provided - above shall be interpreted in a manner that, to the extent possible, - most closely approximates an absolute disclaimer and waiver of all - liability. - -Section 6 - Term and Termination. - - a. This Public License applies for the term of the Copyright and - Similar Rights licensed here. However, if You fail to comply with - this Public License, then Your rights under this Public License - terminate automatically. - - b. Where Your right to use the Licensed Material has terminated under - Section 6(a), it reinstates: - - 1. automatically as of the date the violation is cured, provided it - is cured within 30 days of Your discovery of the violation; or - - 2. upon express reinstatement by the Licensor. - - c. For the avoidance of doubt, this Section 6(b) does not affect any - right the Licensor may have to seek remedies for Your violations of - this Public License. - - d. For the avoidance of doubt, the Licensor may also offer the Licensed - Material under separate terms or conditions or stop distributing the - Licensed Material at any time; however, doing so will not terminate - this Public License. - - e. Sections 1, 5, 6, 7, and 8 survive termination of this Public License. - -Section 7 - Other Terms and Conditions. - - a. The Licensor shall not be bound by any additional or different terms - or conditions communicated by You unless expressly agreed. - - b. Any arrangements, understandings, or agreements regarding the - Licensed Material not stated herein are separate from and - independent of the terms and conditions of this Public License. - -Section 8 - Interpretation. - - a. For the avoidance of doubt, this Public License does not, and shall - not be interpreted to, reduce, limit, restrict, or impose conditions - on any use of the Licensed Material that could lawfully be made - without permission under this Public License. - - b. To the extent possible, if any provision of this Public License is - deemed unenforceable, it shall be automatically reformed to the - minimum extent necessary to make it enforceable. If the provision - cannot be reformed, it shall be severed from this Public License - without affecting the enforceability of the remaining terms and - conditions. - - c. No term or condition of this Public License will be waived and no - failure to comply consented to unless expressly agreed to by the - Licensor. - - d. Nothing in this Public License constitutes or may be interpreted as - a limitation upon, or waiver of, any privileges and immunities that - apply to the Licensor or You, including from the legal processes of - any jurisdiction or authority. - -Creative Commons is not a party to its public licenses. Notwithstanding, -Creative Commons may elect to apply one of its public licenses to material -it publishes and in those instances will be considered the "Licensor." The -text of the Creative Commons public licenses is dedicated to the public -domain under the CC0 Public Domain Dedication. Except for the limited -purpose of indicating that material is shared under a Creative Commons -public license or as otherwise permitted by the Creative Commons policies -published at creativecommons.org/policies, Creative Commons does not -authorize the use of the trademark "Creative Commons" or any other -trademark or logo of Creative Commons without its prior written consent -including, without limitation, in connection with any unauthorized -modifications to any of its public licenses or any other arrangements, -understandings, or agreements concerning use of licensed material. For the -avoidance of doubt, this paragraph does not form part of the public -licenses. - -Creative Commons may be contacted at creativecommons.org. diff --git a/MAINTAINERS b/MAINTAINERS index 20a64eb40b55..c742c517f95c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3006,6 +3006,14 @@ S: Supported F: drivers/gpio/gpio-brcmstb.c F: Documentation/devicetree/bindings/gpio/brcm,brcmstb-gpio.txt +BROADCOM BRCMSTB I2C DRIVER +M: Kamal Dasu <kdasu.kdev@gmail.com> +L: linux-i2c@vger.kernel.org +L: bcm-kernel-feedback-list@broadcom.com +S: Supported +F: drivers/i2c/busses/i2c-brcmstb.c +F: Documentation/devicetree/bindings/i2c/i2c-brcmstb.txt + BROADCOM BRCMSTB USB2 and USB3 PHY DRIVER M: Al Cooper <alcooperx@gmail.com> L: linux-kernel@vger.kernel.org @@ -3673,6 +3681,12 @@ S: Maintained F: Documentation/devicetree/bindings/media/coda.txt F: drivers/media/platform/coda/ +CODE OF CONDUCT +M: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +S: Supported +F: Documentation/process/code-of-conduct.rst +F: Documentation/process/code-of-conduct-interpretation.rst + COMMON CLK FRAMEWORK M: Michael Turquette <mturquette@baylibre.com> M: Stephen Boyd <sboyd@kernel.org> @@ -9651,7 +9665,8 @@ MIPS/LOONGSON2 ARCHITECTURE M: Jiaxun Yang <jiaxun.yang@flygoat.com> L: linux-mips@linux-mips.org S: Maintained -F: arch/mips/loongson64/*{2e/2f}* +F: arch/mips/loongson64/fuloong-2e/ +F: arch/mips/loongson64/lemote-2f/ F: arch/mips/include/asm/mach-loongson64/ F: drivers/*/*loongson2* F: drivers/*/*/*loongson2* @@ -9691,6 +9706,19 @@ S: Maintained F: arch/arm/boot/dts/mmp* F: arch/arm/mach-mmp/ +MMU GATHER AND TLB INVALIDATION +M: Will Deacon <will.deacon@arm.com> +M: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com> +M: Andrew Morton <akpm@linux-foundation.org> +M: Nick Piggin <npiggin@gmail.com> +M: Peter Zijlstra <peterz@infradead.org> +L: linux-arch@vger.kernel.org +L: linux-mm@kvack.org +S: Maintained +F: arch/*/include/asm/tlb.h +F: include/asm-generic/tlb.h +F: mm/mmu_gather.c + MN88472 MEDIA DRIVER M: Antti Palosaari <crope@iki.fi> L: linux-media@vger.kernel.org @@ -9858,7 +9886,7 @@ M: Peter Rosin <peda@axentia.se> S: Maintained F: Documentation/ABI/testing/sysfs-class-mux* F: Documentation/devicetree/bindings/mux/ -F: include/linux/dt-bindings/mux/ +F: include/dt-bindings/mux/ F: include/linux/mux/ F: drivers/mux/ @@ -10115,7 +10143,6 @@ L: netdev@vger.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec.git T: git git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec-next.git S: Maintained -F: net/core/flow.c F: net/xfrm/ F: net/key/ F: net/ipv4/xfrm* @@ -13074,7 +13101,7 @@ SELINUX SECURITY MODULE M: Paul Moore <paul@paul-moore.com> M: Stephen Smalley <sds@tycho.nsa.gov> M: Eric Paris <eparis@parisplace.org> -L: selinux@tycho.nsa.gov (moderated for non-subscribers) +L: selinux@vger.kernel.org W: https://selinuxproject.org W: https://github.com/SELinuxProject T: git git://git.kernel.org/pub/scm/linux/kernel/git/pcmoore/selinux.git @@ -13501,8 +13528,8 @@ L: linux-arm-kernel@lists.infradead.org S: Maintained F: Documentation/devicetree/bindings/arm/firmware/sdei.txt F: drivers/firmware/arm_sdei.c -F: include/linux/sdei.h -F: include/uapi/linux/sdei.h +F: include/linux/arm_sdei.h +F: include/uapi/linux/arm_sdei.h SOFTWARE RAID (Multiple Disks) SUPPORT M: Shaohua Li <shli@kernel.org> @@ -2,8 +2,8 @@ VERSION = 4 PATCHLEVEL = 19 SUBLEVEL = 0 -EXTRAVERSION = -rc7 -NAME = Merciless Moray +EXTRAVERSION = +NAME = "People's Front" # *DOCUMENTATION* # To see a list of typical targets execute "make help" @@ -483,13 +483,15 @@ endif ifeq ($(cc-name),clang) ifneq ($(CROSS_COMPILE),) CLANG_TARGET := --target=$(notdir $(CROSS_COMPILE:%-=%)) -GCC_TOOLCHAIN := $(realpath $(dir $(shell which $(LD)))/..) +GCC_TOOLCHAIN_DIR := $(dir $(shell which $(LD))) +CLANG_PREFIX := --prefix=$(GCC_TOOLCHAIN_DIR) +GCC_TOOLCHAIN := $(realpath $(GCC_TOOLCHAIN_DIR)/..) endif ifneq ($(GCC_TOOLCHAIN),) CLANG_GCC_TC := --gcc-toolchain=$(GCC_TOOLCHAIN) endif -KBUILD_CFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC) -KBUILD_AFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC) +KBUILD_CFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC) $(CLANG_PREFIX) +KBUILD_AFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC) $(CLANG_PREFIX) KBUILD_CFLAGS += $(call cc-option, -no-integrated-as) KBUILD_AFLAGS += $(call cc-option, -no-integrated-as) endif diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index b4441b0764d7..e98c6b8e6186 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -9,6 +9,7 @@ config ARC def_bool y select ARC_TIMERS + select ARCH_HAS_DMA_COHERENT_TO_PFN select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_SYNC_DMA_FOR_CPU select ARCH_HAS_SYNC_DMA_FOR_DEVICE @@ -17,8 +18,7 @@ config ARC select BUILDTIME_EXTABLE_SORT select CLONE_BACKWARDS select COMMON_CLK - select DMA_NONCOHERENT_OPS - select DMA_NONCOHERENT_MMAP + select DMA_DIRECT_OPS select GENERIC_ATOMIC64 if !ISA_ARCV2 || !(ARC_HAS_LL64 && ARC_HAS_LLSC) select GENERIC_CLOCKEVENTS select GENERIC_FIND_FIRST_BIT @@ -149,7 +149,7 @@ config ARC_CPU_770 Support for ARC770 core introduced with Rel 4.10 (Summer 2011) This core has a bunch of cool new features: -MMU-v3: Variable Page Sz (4k, 8k, 16k), bigger J-TLB (128x4) - Shared Address Spaces (for sharing TLB entires in MMU) + Shared Address Spaces (for sharing TLB entries in MMU) -Caches: New Prog Model, Region Flush -Insns: endian swap, load-locked/store-conditional, time-stamp-ctr diff --git a/arch/arc/Makefile b/arch/arc/Makefile index 99cce77ab98f..644815c0516e 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -6,33 +6,11 @@ # published by the Free Software Foundation. # -ifeq ($(CROSS_COMPILE),) -ifndef CONFIG_CPU_BIG_ENDIAN -CROSS_COMPILE := arc-linux- -else -CROSS_COMPILE := arceb-linux- -endif -endif - KBUILD_DEFCONFIG := nsim_700_defconfig cflags-y += -fno-common -pipe -fno-builtin -mmedium-calls -D__linux__ cflags-$(CONFIG_ISA_ARCOMPACT) += -mA7 -cflags-$(CONFIG_ISA_ARCV2) += -mcpu=archs - -is_700 = $(shell $(CC) -dM -E - < /dev/null | grep -q "ARC700" && echo 1 || echo 0) - -ifdef CONFIG_ISA_ARCOMPACT -ifeq ($(is_700), 0) - $(error Toolchain not configured for ARCompact builds) -endif -endif - -ifdef CONFIG_ISA_ARCV2 -ifeq ($(is_700), 1) - $(error Toolchain not configured for ARCv2 builds) -endif -endif +cflags-$(CONFIG_ISA_ARCV2) += -mcpu=hs38 ifdef CONFIG_ARC_CURR_IN_REG # For a global register defintion, make sure it gets passed to every file @@ -79,7 +57,7 @@ cflags-$(disable_small_data) += -mno-sdata -fcall-used-gp cflags-$(CONFIG_CPU_BIG_ENDIAN) += -mbig-endian ldflags-$(CONFIG_CPU_BIG_ENDIAN) += -EB -LIBGCC := $(shell $(CC) $(cflags-y) --print-libgcc-file-name) +LIBGCC = $(shell $(CC) $(cflags-y) --print-libgcc-file-name) # Modules with short calls might break for calls into builtin-kernel KBUILD_CFLAGS_MODULE += -mlong-calls -mno-millicode diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c index 4674541eba3f..8ce6e7235915 100644 --- a/arch/arc/kernel/process.c +++ b/arch/arc/kernel/process.c @@ -241,6 +241,26 @@ int copy_thread(unsigned long clone_flags, task_thread_info(current)->thr_ptr; } + + /* + * setup usermode thread pointer #1: + * when child is picked by scheduler, __switch_to() uses @c_callee to + * populate usermode callee regs: this works (despite being in a kernel + * function) since special return path for child @ret_from_fork() + * ensures those regs are not clobbered all the way to RTIE to usermode + */ + c_callee->r25 = task_thread_info(p)->thr_ptr; + +#ifdef CONFIG_ARC_CURR_IN_REG + /* + * setup usermode thread pointer #2: + * however for this special use of r25 in kernel, __switch_to() sets + * r25 for kernel needs and only in the final return path is usermode + * r25 setup, from pt_regs->user_r25. So set that up as well + */ + c_regs->user_r25 = c_callee->r25; +#endif + return 0; } diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c index c75d5c3470e3..db203ff69ccf 100644 --- a/arch/arc/mm/dma.c +++ b/arch/arc/mm/dma.c @@ -84,29 +84,10 @@ void arch_dma_free(struct device *dev, size_t size, void *vaddr, __free_pages(page, get_order(size)); } -int arch_dma_mmap(struct device *dev, struct vm_area_struct *vma, - void *cpu_addr, dma_addr_t dma_addr, size_t size, - unsigned long attrs) +long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr, + dma_addr_t dma_addr) { - unsigned long user_count = vma_pages(vma); - unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT; - unsigned long pfn = __phys_to_pfn(dma_addr); - unsigned long off = vma->vm_pgoff; - int ret = -ENXIO; - - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - - if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret)) - return ret; - - if (off < count && user_count <= (count - off)) { - ret = remap_pfn_range(vma, vma->vm_start, - pfn + off, - user_count << PAGE_SHIFT, - vma->vm_page_prot); - } - - return ret; + return __phys_to_pfn(dma_addr); } /* @@ -167,7 +148,7 @@ void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr, } /* - * Plug in coherent or noncoherent dma ops + * Plug in direct dma map ops. */ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, const struct iommu_ops *iommu, bool coherent) @@ -175,13 +156,11 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, /* * IOC hardware snoops all DMA traffic keeping the caches consistent * with memory - eliding need for any explicit cache maintenance of - * DMA buffers - so we can use dma_direct cache ops. + * DMA buffers. */ - if (is_isa_arcv2() && ioc_enable && coherent) { - set_dma_ops(dev, &dma_direct_ops); - dev_info(dev, "use dma_direct_ops cache ops\n"); - } else { - set_dma_ops(dev, &dma_noncoherent_ops); - dev_info(dev, "use dma_noncoherent_ops cache ops\n"); - } + if (is_isa_arcv2() && ioc_enable && coherent) + dev->dma_coherent = true; + + dev_info(dev, "use %sncoherent DMA ops\n", + dev->dma_coherent ? "" : "non"); } diff --git a/arch/arm/boot/dts/imx53-qsb-common.dtsi b/arch/arm/boot/dts/imx53-qsb-common.dtsi index 7423d462d1e4..50dde84b72ed 100644 --- a/arch/arm/boot/dts/imx53-qsb-common.dtsi +++ b/arch/arm/boot/dts/imx53-qsb-common.dtsi @@ -123,6 +123,17 @@ }; }; +&cpu0 { + /* CPU rated to 1GHz, not 1.2GHz as per the default settings */ + operating-points = < + /* kHz uV */ + 166666 850000 + 400000 900000 + 800000 1050000 + 1000000 1200000 + >; +}; + &esdhc1 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_esdhc1>; diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h index 8436f6ade57d..965b7c846ecb 100644 --- a/arch/arm/include/asm/dma-mapping.h +++ b/arch/arm/include/asm/dma-mapping.h @@ -100,8 +100,10 @@ static inline unsigned long dma_max_pfn(struct device *dev) extern void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, const struct iommu_ops *iommu, bool coherent); +#ifdef CONFIG_MMU #define arch_teardown_dma_ops arch_teardown_dma_ops extern void arch_teardown_dma_ops(struct device *dev); +#endif /* do not use this function in a driver */ static inline bool is_device_dma_coherent(struct device *dev) diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h index 2cfbc531f63b..6b51826ab3d1 100644 --- a/arch/arm/include/asm/io.h +++ b/arch/arm/include/asm/io.h @@ -28,7 +28,6 @@ #include <asm/byteorder.h> #include <asm/memory.h> #include <asm-generic/pci_iomap.h> -#include <xen/xen.h> /* * ISA I/O bus memory addresses are 1:1 with the physical address. @@ -459,20 +458,6 @@ extern void pci_iounmap(struct pci_dev *dev, void __iomem *addr); #include <asm-generic/io.h> -/* - * can the hardware map this into one segment or not, given no other - * constraints. - */ -#define BIOVEC_MERGEABLE(vec1, vec2) \ - ((bvec_to_phys((vec1)) + (vec1)->bv_len) == bvec_to_phys((vec2))) - -struct bio_vec; -extern bool xen_biovec_phys_mergeable(const struct bio_vec *vec1, - const struct bio_vec *vec2); -#define BIOVEC_PHYS_MERGEABLE(vec1, vec2) \ - (__BIOVEC_PHYS_MERGEABLE(vec1, vec2) && \ - (!xen_domain() || xen_biovec_phys_mergeable(vec1, vec2))) - #ifdef CONFIG_MMU #define ARCH_HAS_VALID_PHYS_ADDR_RANGE extern int valid_phys_addr_range(phys_addr_t addr, size_t size); diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h index 3ab8b3781bfe..2d43dca29c72 100644 --- a/arch/arm/include/asm/kvm_arm.h +++ b/arch/arm/include/asm/kvm_arm.h @@ -161,6 +161,7 @@ #else #define VTTBR_X (5 - KVM_T0SZ) #endif +#define VTTBR_CNP_BIT _AC(1, UL) #define VTTBR_BADDR_MASK (((_AC(1, ULL) << (40 - VTTBR_X)) - 1) << VTTBR_X) #define VTTBR_VMID_SHIFT _AC(48, ULL) #define VTTBR_VMID_MASK(size) (_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT) diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 265ea9cf7df7..847f01fa429d 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -355,6 +355,11 @@ static inline int hyp_map_aux_data(void) #define kvm_phys_to_vttbr(addr) (addr) +static inline bool kvm_cpu_has_cnp(void) +{ + return false; +} + #endif /* !__ASSEMBLY__ */ #endif /* __ARM_KVM_MMU_H__ */ diff --git a/arch/arm/kernel/vmlinux.lds.h b/arch/arm/kernel/vmlinux.lds.h index ae5fdff18406..8247bc15addc 100644 --- a/arch/arm/kernel/vmlinux.lds.h +++ b/arch/arm/kernel/vmlinux.lds.h @@ -49,6 +49,8 @@ #define ARM_DISCARD \ *(.ARM.exidx.exit.text) \ *(.ARM.extab.exit.text) \ + *(.ARM.exidx.text.exit) \ + *(.ARM.extab.text.exit) \ ARM_CPU_DISCARD(*(.ARM.exidx.cpuexit.text)) \ ARM_CPU_DISCARD(*(.ARM.extab.cpuexit.text)) \ ARM_EXIT_DISCARD(EXIT_TEXT) \ diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c index 450c7a4fbc8a..cb094e55dc5f 100644 --- a/arch/arm/kvm/coproc.c +++ b/arch/arm/kvm/coproc.c @@ -478,15 +478,15 @@ static const struct coproc_reg cp15_regs[] = { /* ICC_SGI1R */ { CRm64(12), Op1( 0), is64, access_gic_sgi}, - /* ICC_ASGI1R */ - { CRm64(12), Op1( 1), is64, access_gic_sgi}, - /* ICC_SGI0R */ - { CRm64(12), Op1( 2), is64, access_gic_sgi}, /* VBAR: swapped by interrupt.S. */ { CRn(12), CRm( 0), Op1( 0), Op2( 0), is32, NULL, reset_val, c12_VBAR, 0x00000000 }, + /* ICC_ASGI1R */ + { CRm64(12), Op1( 1), is64, access_gic_sgi}, + /* ICC_SGI0R */ + { CRm64(12), Op1( 2), is64, access_gic_sgi}, /* ICC_SRE */ { CRn(12), CRm(12), Op1( 0), Op2(5), is32, access_gic_sre }, diff --git a/arch/arm/mm/dma-mapping-nommu.c b/arch/arm/mm/dma-mapping-nommu.c index f448a0663b10..712416ecd8e6 100644 --- a/arch/arm/mm/dma-mapping-nommu.c +++ b/arch/arm/mm/dma-mapping-nommu.c @@ -47,7 +47,8 @@ static void *arm_nommu_dma_alloc(struct device *dev, size_t size, */ if (attrs & DMA_ATTR_NON_CONSISTENT) - return dma_direct_alloc(dev, size, dma_handle, gfp, attrs); + return dma_direct_alloc_pages(dev, size, dma_handle, gfp, + attrs); ret = dma_alloc_from_global_coherent(size, dma_handle); @@ -70,7 +71,7 @@ static void arm_nommu_dma_free(struct device *dev, size_t size, unsigned long attrs) { if (attrs & DMA_ATTR_NON_CONSISTENT) { - dma_direct_free(dev, size, cpu_addr, dma_addr, attrs); + dma_direct_free_pages(dev, size, cpu_addr, dma_addr, attrs); } else { int ret = dma_release_from_global_coherent(get_order(size), cpu_addr); @@ -90,7 +91,7 @@ static int arm_nommu_dma_mmap(struct device *dev, struct vm_area_struct *vma, if (dma_mmap_from_global_coherent(vma, cpu_addr, size, &ret)) return ret; - return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size); + return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size, attrs); } @@ -237,7 +238,3 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, set_dma_ops(dev, dma_ops); } - -void arch_teardown_dma_ops(struct device *dev) -{ -} diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 1b1a0e95c751..a8ae30fab508 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -75,6 +75,7 @@ config ARM64 select CLONE_BACKWARDS select COMMON_CLK select CPU_PM if (SUSPEND || CPU_IDLE) + select CRC32 select DCACHE_WORD_ACCESS select DMA_DIRECT_OPS select EDAC_SUPPORT @@ -142,6 +143,7 @@ config ARM64 select HAVE_PERF_USER_STACK_DUMP select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RCU_TABLE_FREE + select HAVE_RCU_TABLE_INVALIDATE select HAVE_RSEQ select HAVE_STACKPROTECTOR select HAVE_SYSCALL_TRACEPOINTS @@ -479,6 +481,19 @@ config ARM64_ERRATUM_1024718 If unsure, say Y. +config ARM64_ERRATUM_1188873 + bool "Cortex-A76: MRC read following MRRC read of specific Generic Timer in AArch32 might give incorrect result" + default y + select ARM_ARCH_TIMER_OOL_WORKAROUND + help + This option adds work arounds for ARM Cortex-A76 erratum 1188873 + + Affected Cortex-A76 cores (r0p0, r1p0, r2p0) could cause + register corruption when accessing the timer registers from + AArch32 userspace. + + If unsure, say Y. + config CAVIUM_ERRATUM_22375 bool "Cavium erratum 22375, 24313" default y @@ -769,9 +784,6 @@ source kernel/Kconfig.hz config ARCH_SUPPORTS_DEBUG_PAGEALLOC def_bool y -config ARCH_HAS_HOLES_MEMORYMODEL - def_bool y if SPARSEMEM - config ARCH_SPARSEMEM_ENABLE def_bool y select SPARSEMEM_VMEMMAP_ENABLE @@ -786,7 +798,7 @@ config ARCH_FLATMEM_ENABLE def_bool !NUMA config HAVE_ARCH_PFN_VALID - def_bool ARCH_HAS_HOLES_MEMORYMODEL || !SPARSEMEM + def_bool y config HW_PERF_EVENTS def_bool y @@ -1132,6 +1144,20 @@ config ARM64_RAS_EXTN and access the new registers if the system supports the extension. Platform RAS features may additionally depend on firmware support. +config ARM64_CNP + bool "Enable support for Common Not Private (CNP) translations" + default y + depends on ARM64_PAN || !ARM64_SW_TTBR0_PAN + help + Common Not Private (CNP) allows translation table entries to + be shared between different PEs in the same inner shareable + domain, so the hardware can use this fact to optimise the + caching of such entries in the TLB. + + Selecting this option allows the CNP feature to be detected + at runtime, and does not affect PEs that do not implement + this feature. + endmenu config ARM64_SVE diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 0bcc98dbba56..6142402c2eb4 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -286,12 +286,11 @@ alternative_endif ldr \rd, [\rn, #MM_CONTEXT_ID] .endm /* - * read_ctr - read CTR_EL0. If the system has mismatched - * cache line sizes, provide the system wide safe value - * from arm64_ftr_reg_ctrel0.sys_val + * read_ctr - read CTR_EL0. If the system has mismatched register fields, + * provide the system wide safe value from arm64_ftr_reg_ctrel0.sys_val */ .macro read_ctr, reg -alternative_if_not ARM64_MISMATCHED_CACHE_LINE_SIZE +alternative_if_not ARM64_MISMATCHED_CACHE_TYPE mrs \reg, ctr_el0 // read CTR nop alternative_else diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h index 5ee5bca8c24b..13dd42c3ad4e 100644 --- a/arch/arm64/include/asm/cache.h +++ b/arch/arm64/include/asm/cache.h @@ -40,6 +40,15 @@ #define L1_CACHE_SHIFT (6) #define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) + +#define CLIDR_LOUU_SHIFT 27 +#define CLIDR_LOC_SHIFT 24 +#define CLIDR_LOUIS_SHIFT 21 + +#define CLIDR_LOUU(clidr) (((clidr) >> CLIDR_LOUU_SHIFT) & 0x7) +#define CLIDR_LOC(clidr) (((clidr) >> CLIDR_LOC_SHIFT) & 0x7) +#define CLIDR_LOUIS(clidr) (((clidr) >> CLIDR_LOUIS_SHIFT) & 0x7) + /* * Memory returned by kmalloc() may be used for DMA, so we must make * sure that all such allocations are cache aligned. Otherwise, @@ -84,6 +93,37 @@ static inline int cache_line_size(void) return cwg ? 4 << cwg : ARCH_DMA_MINALIGN; } +/* + * Read the effective value of CTR_EL0. + * + * According to ARM ARM for ARMv8-A (ARM DDI 0487C.a), + * section D10.2.33 "CTR_EL0, Cache Type Register" : + * + * CTR_EL0.IDC reports the data cache clean requirements for + * instruction to data coherence. + * + * 0 - dcache clean to PoU is required unless : + * (CLIDR_EL1.LoC == 0) || (CLIDR_EL1.LoUIS == 0 && CLIDR_EL1.LoUU == 0) + * 1 - dcache clean to PoU is not required for i-to-d coherence. + * + * This routine provides the CTR_EL0 with the IDC field updated to the + * effective state. + */ +static inline u32 __attribute_const__ read_cpuid_effective_cachetype(void) +{ + u32 ctr = read_cpuid_cachetype(); + + if (!(ctr & BIT(CTR_IDC_SHIFT))) { + u64 clidr = read_sysreg(clidr_el1); + + if (CLIDR_LOC(clidr) == 0 || + (CLIDR_LOUIS(clidr) == 0 && CLIDR_LOUU(clidr) == 0)) + ctr |= BIT(CTR_IDC_SHIFT); + } + + return ctr; +} + #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h index 1a037b94eba1..cee28a05ee98 100644 --- a/arch/arm64/include/asm/compat.h +++ b/arch/arm64/include/asm/compat.h @@ -159,6 +159,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr) } #define compat_user_stack_pointer() (user_stack_pointer(task_pt_regs(current))) +#define COMPAT_MINSIGSTKSZ 2048 static inline void __user *arch_compat_alloc_user_space(long len) { diff --git a/arch/arm64/include/asm/compiler.h b/arch/arm64/include/asm/compiler.h deleted file mode 100644 index ee35fd0f2236..000000000000 --- a/arch/arm64/include/asm/compiler.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Based on arch/arm/include/asm/compiler.h - * - * Copyright (C) 2012 ARM Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ -#ifndef __ASM_COMPILER_H -#define __ASM_COMPILER_H - -/* - * This is used to ensure the compiler did actually allocate the register we - * asked it for some inline assembly sequences. Apparently we can't trust the - * compiler from one version to another so a bit of paranoia won't hurt. This - * string is meant to be concatenated with the inline asm string and will - * cause compilation to stop on mismatch. (for details, see gcc PR 15089) - */ -#define __asmeq(x, y) ".ifnc " x "," y " ; .err ; .endif\n\t" - -#endif /* __ASM_COMPILER_H */ diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index ae1f70450fb2..6e2d254c09eb 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -33,7 +33,7 @@ #define ARM64_WORKAROUND_CAVIUM_27456 12 #define ARM64_HAS_32BIT_EL0 13 #define ARM64_HARDEN_EL2_VECTORS 14 -#define ARM64_MISMATCHED_CACHE_LINE_SIZE 15 +#define ARM64_HAS_CNP 15 #define ARM64_HAS_NO_FPSIMD 16 #define ARM64_WORKAROUND_REPEAT_TLBI 17 #define ARM64_WORKAROUND_QCOM_FALKOR_E1003 18 @@ -51,7 +51,10 @@ #define ARM64_SSBD 30 #define ARM64_MISMATCHED_CACHE_TYPE 31 #define ARM64_HAS_STAGE2_FWB 32 +#define ARM64_HAS_CRC32 33 +#define ARM64_SSBS 34 +#define ARM64_WORKAROUND_1188873 35 -#define ARM64_NCAPS 33 +#define ARM64_NCAPS 36 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 1717ba1db35d..6db48d90ad63 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -262,7 +262,7 @@ extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0; /* * CPU feature detected at boot time based on system-wide value of a * feature. It is safe for a late CPU to have this feature even though - * the system hasn't enabled it, although the featuer will not be used + * the system hasn't enabled it, although the feature will not be used * by Linux in this case. If the system has enabled this feature already, * then every late CPU must have it. */ @@ -508,6 +508,12 @@ static inline bool system_supports_sve(void) cpus_have_const_cap(ARM64_SVE); } +static inline bool system_supports_cnp(void) +{ + return IS_ENABLED(CONFIG_ARM64_CNP) && + cpus_have_const_cap(ARM64_HAS_CNP); +} + #define ARM64_SSBD_UNKNOWN -1 #define ARM64_SSBD_FORCE_DISABLE 0 #define ARM64_SSBD_KERNEL 1 @@ -530,6 +536,7 @@ void arm64_set_ssbd_mitigation(bool state); static inline void arm64_set_ssbd_mitigation(bool state) {} #endif +extern int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt); #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index ea690b3562af..12f93e4d2452 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -86,6 +86,7 @@ #define ARM_CPU_PART_CORTEX_A75 0xD0A #define ARM_CPU_PART_CORTEX_A35 0xD04 #define ARM_CPU_PART_CORTEX_A55 0xD05 +#define ARM_CPU_PART_CORTEX_A76 0xD0B #define APM_CPU_PART_POTENZA 0x000 @@ -110,6 +111,7 @@ #define MIDR_CORTEX_A75 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A75) #define MIDR_CORTEX_A35 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A35) #define MIDR_CORTEX_A55 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A55) +#define MIDR_CORTEX_A76 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76) #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX) diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h index 22e4c83de5a5..8d91f2233135 100644 --- a/arch/arm64/include/asm/daifflags.h +++ b/arch/arm64/include/asm/daifflags.h @@ -36,11 +36,8 @@ static inline unsigned long local_daif_save(void) { unsigned long flags; - asm volatile( - "mrs %0, daif // local_daif_save\n" - : "=r" (flags) - : - : "memory"); + flags = arch_local_save_flags(); + local_daif_mask(); return flags; @@ -60,11 +57,9 @@ static inline void local_daif_restore(unsigned long flags) { if (!arch_irqs_disabled_flags(flags)) trace_hardirqs_on(); - asm volatile( - "msr daif, %0 // local_daif_restore" - : - : "r" (flags) - : "memory"); + + arch_local_irq_restore(flags); + if (arch_irqs_disabled_flags(flags)) trace_hardirqs_off(); } diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index ce70c3ffb993..676de2ec1762 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -137,6 +137,8 @@ #define ESR_ELx_CV (UL(1) << 24) #define ESR_ELx_COND_SHIFT (20) #define ESR_ELx_COND_MASK (UL(0xF) << ESR_ELx_COND_SHIFT) +#define ESR_ELx_WFx_ISS_TI (UL(1) << 0) +#define ESR_ELx_WFx_ISS_WFI (UL(0) << 0) #define ESR_ELx_WFx_ISS_WFE (UL(1) << 0) #define ESR_ELx_xVC_IMM_MASK ((1UL << 16) - 1) @@ -148,6 +150,9 @@ #define DISR_EL1_ESR_MASK (ESR_ELx_AET | ESR_ELx_EA | ESR_ELx_FSC) /* ESR value templates for specific events */ +#define ESR_ELx_WFx_MASK (ESR_ELx_EC_MASK | ESR_ELx_WFx_ISS_TI) +#define ESR_ELx_WFx_WFI_VAL ((ESR_ELx_EC_WFx << ESR_ELx_EC_SHIFT) | \ + ESR_ELx_WFx_ISS_WFI) /* BRK instruction trap from AArch64 state */ #define ESR_ELx_VAL_BRK64(imm) \ @@ -187,6 +192,8 @@ #define ESR_ELx_SYS64_ISS_SYS_OP_MASK (ESR_ELx_SYS64_ISS_SYS_MASK | \ ESR_ELx_SYS64_ISS_DIR_MASK) +#define ESR_ELx_SYS64_ISS_RT(esr) \ + (((esr) & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT) /* * User space cache operations have the following sysreg encoding * in System instructions. @@ -206,6 +213,18 @@ #define ESR_ELx_SYS64_ISS_EL0_CACHE_OP_VAL \ (ESR_ELx_SYS64_ISS_SYS_VAL(1, 3, 1, 7, 0) | \ ESR_ELx_SYS64_ISS_DIR_WRITE) +/* + * User space MRS operations which are supported for emulation + * have the following sysreg encoding in System instructions. + * op0 = 3, op1= 0, crn = 0, {crm = 0, 4-7}, READ (L = 1) + */ +#define ESR_ELx_SYS64_ISS_SYS_MRS_OP_MASK (ESR_ELx_SYS64_ISS_OP0_MASK | \ + ESR_ELx_SYS64_ISS_OP1_MASK | \ + ESR_ELx_SYS64_ISS_CRN_MASK | \ + ESR_ELx_SYS64_ISS_DIR_MASK) +#define ESR_ELx_SYS64_ISS_SYS_MRS_OP_VAL \ + (ESR_ELx_SYS64_ISS_SYS_VAL(3, 0, 0, 0, 0) | \ + ESR_ELx_SYS64_ISS_DIR_READ) #define ESR_ELx_SYS64_ISS_SYS_CTR ESR_ELx_SYS64_ISS_SYS_VAL(3, 3, 1, 0, 0) #define ESR_ELx_SYS64_ISS_SYS_CTR_READ (ESR_ELx_SYS64_ISS_SYS_CTR | \ @@ -249,6 +268,64 @@ #define ESR_ELx_FP_EXC_TFV (UL(1) << 23) +/* + * ISS field definitions for CP15 accesses + */ +#define ESR_ELx_CP15_32_ISS_DIR_MASK 0x1 +#define ESR_ELx_CP15_32_ISS_DIR_READ 0x1 +#define ESR_ELx_CP15_32_ISS_DIR_WRITE 0x0 + +#define ESR_ELx_CP15_32_ISS_RT_SHIFT 5 +#define ESR_ELx_CP15_32_ISS_RT_MASK (UL(0x1f) << ESR_ELx_CP15_32_ISS_RT_SHIFT) +#define ESR_ELx_CP15_32_ISS_CRM_SHIFT 1 +#define ESR_ELx_CP15_32_ISS_CRM_MASK (UL(0xf) << ESR_ELx_CP15_32_ISS_CRM_SHIFT) +#define ESR_ELx_CP15_32_ISS_CRN_SHIFT 10 +#define ESR_ELx_CP15_32_ISS_CRN_MASK (UL(0xf) << ESR_ELx_CP15_32_ISS_CRN_SHIFT) +#define ESR_ELx_CP15_32_ISS_OP1_SHIFT 14 +#define ESR_ELx_CP15_32_ISS_OP1_MASK (UL(0x7) << ESR_ELx_CP15_32_ISS_OP1_SHIFT) +#define ESR_ELx_CP15_32_ISS_OP2_SHIFT 17 +#define ESR_ELx_CP15_32_ISS_OP2_MASK (UL(0x7) << ESR_ELx_CP15_32_ISS_OP2_SHIFT) + +#define ESR_ELx_CP15_32_ISS_SYS_MASK (ESR_ELx_CP15_32_ISS_OP1_MASK | \ + ESR_ELx_CP15_32_ISS_OP2_MASK | \ + ESR_ELx_CP15_32_ISS_CRN_MASK | \ + ESR_ELx_CP15_32_ISS_CRM_MASK | \ + ESR_ELx_CP15_32_ISS_DIR_MASK) +#define ESR_ELx_CP15_32_ISS_SYS_VAL(op1, op2, crn, crm) \ + (((op1) << ESR_ELx_CP15_32_ISS_OP1_SHIFT) | \ + ((op2) << ESR_ELx_CP15_32_ISS_OP2_SHIFT) | \ + ((crn) << ESR_ELx_CP15_32_ISS_CRN_SHIFT) | \ + ((crm) << ESR_ELx_CP15_32_ISS_CRM_SHIFT)) + +#define ESR_ELx_CP15_64_ISS_DIR_MASK 0x1 +#define ESR_ELx_CP15_64_ISS_DIR_READ 0x1 +#define ESR_ELx_CP15_64_ISS_DIR_WRITE 0x0 + +#define ESR_ELx_CP15_64_ISS_RT_SHIFT 5 +#define ESR_ELx_CP15_64_ISS_RT_MASK (UL(0x1f) << ESR_ELx_CP15_64_ISS_RT_SHIFT) + +#define ESR_ELx_CP15_64_ISS_RT2_SHIFT 10 +#define ESR_ELx_CP15_64_ISS_RT2_MASK (UL(0x1f) << ESR_ELx_CP15_64_ISS_RT2_SHIFT) + +#define ESR_ELx_CP15_64_ISS_OP1_SHIFT 16 +#define ESR_ELx_CP15_64_ISS_OP1_MASK (UL(0xf) << ESR_ELx_CP15_64_ISS_OP1_SHIFT) +#define ESR_ELx_CP15_64_ISS_CRM_SHIFT 1 +#define ESR_ELx_CP15_64_ISS_CRM_MASK (UL(0xf) << ESR_ELx_CP15_64_ISS_CRM_SHIFT) + +#define ESR_ELx_CP15_64_ISS_SYS_VAL(op1, crm) \ + (((op1) << ESR_ELx_CP15_64_ISS_OP1_SHIFT) | \ + ((crm) << ESR_ELx_CP15_64_ISS_CRM_SHIFT)) + +#define ESR_ELx_CP15_64_ISS_SYS_MASK (ESR_ELx_CP15_64_ISS_OP1_MASK | \ + ESR_ELx_CP15_64_ISS_CRM_MASK | \ + ESR_ELx_CP15_64_ISS_DIR_MASK) + +#define ESR_ELx_CP15_64_ISS_SYS_CNTVCT (ESR_ELx_CP15_64_ISS_SYS_VAL(1, 14) | \ + ESR_ELx_CP15_64_ISS_DIR_READ) + +#define ESR_ELx_CP15_32_ISS_SYS_CNTFRQ (ESR_ELx_CP15_32_ISS_SYS_VAL(0, 0, 14, 0) |\ + ESR_ELx_CP15_32_ISS_DIR_READ) + #ifndef __ASSEMBLY__ #include <asm/types.h> diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h index 35b2e50f17fb..9f8b915af3a7 100644 --- a/arch/arm64/include/asm/io.h +++ b/arch/arm64/include/asm/io.h @@ -31,8 +31,6 @@ #include <asm/alternative.h> #include <asm/cpufeature.h> -#include <xen/xen.h> - /* * Generic IO read/write. These perform native-endian accesses. */ @@ -205,12 +203,5 @@ extern int valid_mmap_phys_addr_range(unsigned long pfn, size_t size); extern int devmem_is_allowed(unsigned long pfn); -struct bio_vec; -extern bool xen_biovec_phys_mergeable(const struct bio_vec *vec1, - const struct bio_vec *vec2); -#define BIOVEC_PHYS_MERGEABLE(vec1, vec2) \ - (__BIOVEC_PHYS_MERGEABLE(vec1, vec2) && \ - (!xen_domain() || xen_biovec_phys_mergeable(vec1, vec2))) - #endif /* __KERNEL__ */ #endif /* __ASM_IO_H */ diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h index a780f6714b44..850e2122d53f 100644 --- a/arch/arm64/include/asm/kernel-pgtable.h +++ b/arch/arm64/include/asm/kernel-pgtable.h @@ -97,7 +97,7 @@ + EARLY_PGDS((vstart), (vend)) /* each PGDIR needs a next level page table */ \ + EARLY_PUDS((vstart), (vend)) /* each PUD needs a next level page table */ \ + EARLY_PMDS((vstart), (vend))) /* each PMD needs a next level page table */ -#define SWAPPER_DIR_SIZE (PAGE_SIZE * EARLY_PAGES(KIMAGE_VADDR + TEXT_OFFSET, _end)) +#define INIT_DIR_SIZE (PAGE_SIZE * EARLY_PAGES(KIMAGE_VADDR + TEXT_OFFSET, _end)) #define IDMAP_DIR_SIZE (IDMAP_PGTABLE_LEVELS * PAGE_SIZE) #ifdef CONFIG_ARM64_SW_TTBR0_PAN diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index aa45df752a16..b476bc46f0ab 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -175,6 +175,7 @@ #define VTCR_EL2_FLAGS (VTCR_EL2_COMMON_BITS | VTCR_EL2_TGRAN_FLAGS) #define VTTBR_X (VTTBR_X_TGRAN_MAGIC - VTCR_EL2_T0SZ_IPA) +#define VTTBR_CNP_BIT (UL(1)) #define VTTBR_BADDR_MASK (((UL(1) << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_X) #define VTTBR_VMID_SHIFT (UL(48)) #define VTTBR_VMID_MASK(size) (_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 6106a85ae0be..21247870def7 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -335,7 +335,7 @@ static inline bool kvm_vcpu_dabt_isextabt(const struct kvm_vcpu *vcpu) static inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu) { u32 esr = kvm_vcpu_get_hsr(vcpu); - return (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT; + return ESR_ELx_SYS64_ISS_RT(esr); } static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 3d6d7336f871..2842bf149029 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -387,6 +387,8 @@ struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr); DECLARE_PER_CPU(kvm_cpu_context_t, kvm_host_cpu_state); +void __kvm_enable_ssbs(void); + static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr, unsigned long hyp_stack_ptr, unsigned long vector_ptr) @@ -407,6 +409,15 @@ static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr, */ BUG_ON(!static_branch_likely(&arm64_const_caps_ready)); __kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr, tpidr_el2); + + /* + * Disabling SSBD on a non-VHE system requires us to enable SSBS + * at EL2. + */ + if (!has_vhe() && this_cpu_has_cap(ARM64_SSBS) && + arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) { + kvm_call_hyp(__kvm_enable_ssbs); + } } static inline bool kvm_arch_check_sve_has_vhe(void) diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index d6fff7de5539..64337afbf124 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -517,5 +517,10 @@ static inline int hyp_map_aux_data(void) #define kvm_phys_to_vttbr(addr) phys_to_ttbr(addr) +static inline bool kvm_cpu_has_cnp(void) +{ + return system_supports_cnp(); +} + #endif /* __ASSEMBLY__ */ #endif /* __ARM64_KVM_MMU_H__ */ diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index dd320df0d026..7689c7aa1d77 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -95,5 +95,8 @@ extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, extern void *fixmap_remap_fdt(phys_addr_t dt_phys); extern void mark_linear_text_alias_ro(void); +#define INIT_MM_CONTEXT(name) \ + .pgd = init_pg_dir, + #endif /* !__ASSEMBLY__ */ #endif diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 39ec0b8a689e..1e58bf58c22b 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -147,12 +147,25 @@ static inline void cpu_replace_ttbr1(pgd_t *pgdp) extern ttbr_replace_func idmap_cpu_replace_ttbr1; ttbr_replace_func *replace_phys; - phys_addr_t pgd_phys = virt_to_phys(pgdp); + /* phys_to_ttbr() zeros lower 2 bits of ttbr with 52-bit PA */ + phys_addr_t ttbr1 = phys_to_ttbr(virt_to_phys(pgdp)); + + if (system_supports_cnp() && !WARN_ON(pgdp != lm_alias(swapper_pg_dir))) { + /* + * cpu_replace_ttbr1() is used when there's a boot CPU + * up (i.e. cpufeature framework is not up yet) and + * latter only when we enable CNP via cpufeature's + * enable() callback. + * Also we rely on the cpu_hwcap bit being set before + * calling the enable() function. + */ + ttbr1 |= TTBR_CNP_BIT; + } replace_phys = (void *)__pa_symbol(idmap_cpu_replace_ttbr1); cpu_install_idmap(); - replace_phys(pgd_phys); + replace_phys(ttbr1); cpu_uninstall_idmap(); } diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h index 60d02c81a3a2..c88a3cb117a1 100644 --- a/arch/arm64/include/asm/page.h +++ b/arch/arm64/include/asm/page.h @@ -37,9 +37,7 @@ extern void clear_page(void *to); typedef struct page *pgtable_t; -#ifdef CONFIG_HAVE_ARCH_PFN_VALID extern int pfn_valid(unsigned long); -#endif #include <asm/memory.h> diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index fd208eac9f2a..1d7d8da2ef9b 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -211,6 +211,8 @@ #define PHYS_MASK_SHIFT (CONFIG_ARM64_PA_BITS) #define PHYS_MASK ((UL(1) << PHYS_MASK_SHIFT) - 1) +#define TTBR_CNP_BIT (UL(1) << 0) + /* * TCR flags. */ diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 1bdeca8918a6..50b1ef8584c0 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -360,6 +360,7 @@ static inline int pmd_protnone(pmd_t pmd) #define pmd_present(pmd) pte_present(pmd_pte(pmd)) #define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd)) #define pmd_young(pmd) pte_young(pmd_pte(pmd)) +#define pmd_valid(pmd) pte_valid(pmd_pte(pmd)) #define pmd_wrprotect(pmd) pte_pmd(pte_wrprotect(pmd_pte(pmd))) #define pmd_mkold(pmd) pte_pmd(pte_mkold(pmd_pte(pmd))) #define pmd_mkwrite(pmd) pte_pmd(pte_mkwrite(pmd_pte(pmd))) @@ -428,10 +429,33 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, PUD_TYPE_TABLE) #endif +extern pgd_t init_pg_dir[PTRS_PER_PGD]; +extern pgd_t init_pg_end[]; +extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; +extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; +extern pgd_t tramp_pg_dir[PTRS_PER_PGD]; + +extern void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd); + +static inline bool in_swapper_pgdir(void *addr) +{ + return ((unsigned long)addr & PAGE_MASK) == + ((unsigned long)swapper_pg_dir & PAGE_MASK); +} + static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) { +#ifdef __PAGETABLE_PMD_FOLDED + if (in_swapper_pgdir(pmdp)) { + set_swapper_pgd((pgd_t *)pmdp, __pgd(pmd_val(pmd))); + return; + } +#endif /* __PAGETABLE_PMD_FOLDED */ + WRITE_ONCE(*pmdp, pmd); - dsb(ishst); + + if (pmd_valid(pmd)) + dsb(ishst); } static inline void pmd_clear(pmd_t *pmdp) @@ -477,11 +501,21 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd) #define pud_none(pud) (!pud_val(pud)) #define pud_bad(pud) (!(pud_val(pud) & PUD_TABLE_BIT)) #define pud_present(pud) pte_present(pud_pte(pud)) +#define pud_valid(pud) pte_valid(pud_pte(pud)) static inline void set_pud(pud_t *pudp, pud_t pud) { +#ifdef __PAGETABLE_PUD_FOLDED + if (in_swapper_pgdir(pudp)) { + set_swapper_pgd((pgd_t *)pudp, __pgd(pud_val(pud))); + return; + } +#endif /* __PAGETABLE_PUD_FOLDED */ + WRITE_ONCE(*pudp, pud); - dsb(ishst); + + if (pud_valid(pud)) + dsb(ishst); } static inline void pud_clear(pud_t *pudp) @@ -532,6 +566,11 @@ static inline phys_addr_t pud_page_paddr(pud_t pud) static inline void set_pgd(pgd_t *pgdp, pgd_t pgd) { + if (in_swapper_pgdir(pgdp)) { + set_swapper_pgd(pgdp, pgd); + return; + } + WRITE_ONCE(*pgdp, pgd); dsb(ishst); } @@ -712,11 +751,6 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma, } #endif -extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; -extern pgd_t swapper_pg_end[]; -extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; -extern pgd_t tramp_pg_dir[PTRS_PER_PGD]; - /* * Encode and decode a swap entry: * bits 0-1: present (must be zero) diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 79657ad91397..2bf6691371c2 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -174,6 +174,10 @@ static inline void start_thread(struct pt_regs *regs, unsigned long pc, { start_thread_common(regs, pc); regs->pstate = PSR_MODE_EL0t; + + if (arm64_get_ssbd_state() != ARM64_SSBD_FORCE_ENABLE) + regs->pstate |= PSR_SSBS_BIT; + regs->sp = sp; } @@ -190,6 +194,9 @@ static inline void compat_start_thread(struct pt_regs *regs, unsigned long pc, regs->pstate |= PSR_AA32_E_BIT; #endif + if (arm64_get_ssbd_state() != ARM64_SSBD_FORCE_ENABLE) + regs->pstate |= PSR_AA32_SSBS_BIT; + regs->compat_sp = sp; } #endif @@ -244,10 +251,6 @@ static inline void spin_lock_prefetch(const void *ptr) #endif -void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused); -void cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused); -void cpu_clear_disr(const struct arm64_cpu_capabilities *__unused); - extern unsigned long __ro_after_init signal_minsigstksz; /* sigframe size */ extern void __init minsigstksz_setup(void); diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index 177b851ca6d9..6bc43889d11e 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -50,6 +50,7 @@ #define PSR_AA32_I_BIT 0x00000080 #define PSR_AA32_A_BIT 0x00000100 #define PSR_AA32_E_BIT 0x00000200 +#define PSR_AA32_SSBS_BIT 0x00800000 #define PSR_AA32_DIT_BIT 0x01000000 #define PSR_AA32_Q_BIT 0x08000000 #define PSR_AA32_V_BIT 0x10000000 diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index c1470931b897..0c909c4a932f 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -20,7 +20,6 @@ #ifndef __ASM_SYSREG_H #define __ASM_SYSREG_H -#include <asm/compiler.h> #include <linux/stringify.h> /* @@ -84,13 +83,26 @@ #endif /* CONFIG_BROKEN_GAS_INST */ -#define REG_PSTATE_PAN_IMM sys_reg(0, 0, 4, 0, 4) -#define REG_PSTATE_UAO_IMM sys_reg(0, 0, 4, 0, 3) +/* + * Instructions for modifying PSTATE fields. + * As per Arm ARM for v8-A, Section "C.5.1.3 op0 == 0b00, architectural hints, + * barriers and CLREX, and PSTATE access", ARM DDI 0487 C.a, system instructions + * for accessing PSTATE fields have the following encoding: + * Op0 = 0, CRn = 4 + * Op1, Op2 encodes the PSTATE field modified and defines the constraints. + * CRm = Imm4 for the instruction. + * Rt = 0x1f + */ +#define pstate_field(op1, op2) ((op1) << Op1_shift | (op2) << Op2_shift) +#define PSTATE_Imm_shift CRm_shift + +#define PSTATE_PAN pstate_field(0, 4) +#define PSTATE_UAO pstate_field(0, 3) +#define PSTATE_SSBS pstate_field(3, 1) -#define SET_PSTATE_PAN(x) __emit_inst(0xd5000000 | REG_PSTATE_PAN_IMM | \ - (!!x)<<8 | 0x1f) -#define SET_PSTATE_UAO(x) __emit_inst(0xd5000000 | REG_PSTATE_UAO_IMM | \ - (!!x)<<8 | 0x1f) +#define SET_PSTATE_PAN(x) __emit_inst(0xd500401f | PSTATE_PAN | ((!!x) << PSTATE_Imm_shift)) +#define SET_PSTATE_UAO(x) __emit_inst(0xd500401f | PSTATE_UAO | ((!!x) << PSTATE_Imm_shift)) +#define SET_PSTATE_SSBS(x) __emit_inst(0xd500401f | PSTATE_SSBS | ((!!x) << PSTATE_Imm_shift)) #define SYS_DC_ISW sys_insn(1, 0, 7, 6, 2) #define SYS_DC_CSW sys_insn(1, 0, 7, 10, 2) @@ -419,6 +431,7 @@ #define SYS_ICH_LR15_EL2 __SYS__LR8_EL2(7) /* Common SCTLR_ELx flags. */ +#define SCTLR_ELx_DSSBS (1UL << 44) #define SCTLR_ELx_EE (1 << 25) #define SCTLR_ELx_IESB (1 << 21) #define SCTLR_ELx_WXN (1 << 19) @@ -439,7 +452,7 @@ (1 << 10) | (1 << 13) | (1 << 14) | (1 << 15) | \ (1 << 17) | (1 << 20) | (1 << 24) | (1 << 26) | \ (1 << 27) | (1 << 30) | (1 << 31) | \ - (0xffffffffUL << 32)) + (0xffffefffUL << 32)) #ifdef CONFIG_CPU_BIG_ENDIAN #define ENDIAN_SET_EL2 SCTLR_ELx_EE @@ -453,7 +466,7 @@ #define SCTLR_EL2_SET (SCTLR_ELx_IESB | ENDIAN_SET_EL2 | SCTLR_EL2_RES1) #define SCTLR_EL2_CLEAR (SCTLR_ELx_M | SCTLR_ELx_A | SCTLR_ELx_C | \ SCTLR_ELx_SA | SCTLR_ELx_I | SCTLR_ELx_WXN | \ - ENDIAN_CLEAR_EL2 | SCTLR_EL2_RES0) + SCTLR_ELx_DSSBS | ENDIAN_CLEAR_EL2 | SCTLR_EL2_RES0) #if (SCTLR_EL2_SET ^ SCTLR_EL2_CLEAR) != 0xffffffffffffffff #error "Inconsistent SCTLR_EL2 set/clear bits" @@ -477,7 +490,7 @@ (1 << 29)) #define SCTLR_EL1_RES0 ((1 << 6) | (1 << 10) | (1 << 13) | (1 << 17) | \ (1 << 27) | (1 << 30) | (1 << 31) | \ - (0xffffffffUL << 32)) + (0xffffefffUL << 32)) #ifdef CONFIG_CPU_BIG_ENDIAN #define ENDIAN_SET_EL1 (SCTLR_EL1_E0E | SCTLR_ELx_EE) @@ -489,12 +502,12 @@ #define SCTLR_EL1_SET (SCTLR_ELx_M | SCTLR_ELx_C | SCTLR_ELx_SA |\ SCTLR_EL1_SA0 | SCTLR_EL1_SED | SCTLR_ELx_I |\ - SCTLR_EL1_DZE | SCTLR_EL1_UCT | SCTLR_EL1_NTWI |\ + SCTLR_EL1_DZE | SCTLR_EL1_UCT |\ SCTLR_EL1_NTWE | SCTLR_ELx_IESB | SCTLR_EL1_SPAN |\ ENDIAN_SET_EL1 | SCTLR_EL1_UCI | SCTLR_EL1_RES1) #define SCTLR_EL1_CLEAR (SCTLR_ELx_A | SCTLR_EL1_CP15BEN | SCTLR_EL1_ITD |\ SCTLR_EL1_UMA | SCTLR_ELx_WXN | ENDIAN_CLEAR_EL1 |\ - SCTLR_EL1_RES0) + SCTLR_ELx_DSSBS | SCTLR_EL1_NTWI | SCTLR_EL1_RES0) #if (SCTLR_EL1_SET ^ SCTLR_EL1_CLEAR) != 0xffffffffffffffff #error "Inconsistent SCTLR_EL1 set/clear bits" @@ -544,6 +557,13 @@ #define ID_AA64PFR0_EL0_64BIT_ONLY 0x1 #define ID_AA64PFR0_EL0_32BIT_64BIT 0x2 +/* id_aa64pfr1 */ +#define ID_AA64PFR1_SSBS_SHIFT 4 + +#define ID_AA64PFR1_SSBS_PSTATE_NI 0 +#define ID_AA64PFR1_SSBS_PSTATE_ONLY 1 +#define ID_AA64PFR1_SSBS_PSTATE_INSNS 2 + /* id_aa64mmfr0 */ #define ID_AA64MMFR0_TGRAN4_SHIFT 28 #define ID_AA64MMFR0_TGRAN64_SHIFT 24 diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h index a3233167be60..106fdc951b6e 100644 --- a/arch/arm64/include/asm/tlb.h +++ b/arch/arm64/include/asm/tlb.h @@ -22,16 +22,10 @@ #include <linux/pagemap.h> #include <linux/swap.h> -#ifdef CONFIG_HAVE_RCU_TABLE_FREE - -#define tlb_remove_entry(tlb, entry) tlb_remove_table(tlb, entry) static inline void __tlb_remove_table(void *_table) { free_page_and_swap_cache((struct page *)_table); } -#else -#define tlb_remove_entry(tlb, entry) tlb_remove_page(tlb, entry) -#endif /* CONFIG_HAVE_RCU_TABLE_FREE */ static void tlb_flush(struct mmu_gather *tlb); @@ -40,36 +34,35 @@ static void tlb_flush(struct mmu_gather *tlb); static inline void tlb_flush(struct mmu_gather *tlb) { struct vm_area_struct vma = TLB_FLUSH_VMA(tlb->mm, 0); + bool last_level = !tlb->freed_tables; + unsigned long stride = tlb_get_unmap_size(tlb); /* - * The ASID allocator will either invalidate the ASID or mark - * it as used. + * If we're tearing down the address space then we only care about + * invalidating the walk-cache, since the ASID allocator won't + * reallocate our ASID without invalidating the entire TLB. */ - if (tlb->fullmm) + if (tlb->fullmm) { + if (!last_level) + flush_tlb_mm(tlb->mm); return; + } - /* - * The intermediate page table levels are already handled by - * the __(pte|pmd|pud)_free_tlb() functions, so last level - * TLBI is sufficient here. - */ - __flush_tlb_range(&vma, tlb->start, tlb->end, true); + __flush_tlb_range(&vma, tlb->start, tlb->end, stride, last_level); } static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, unsigned long addr) { - __flush_tlb_pgtable(tlb->mm, addr); pgtable_page_dtor(pte); - tlb_remove_entry(tlb, pte); + tlb_remove_table(tlb, pte); } #if CONFIG_PGTABLE_LEVELS > 2 static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, unsigned long addr) { - __flush_tlb_pgtable(tlb->mm, addr); - tlb_remove_entry(tlb, virt_to_page(pmdp)); + tlb_remove_table(tlb, virt_to_page(pmdp)); } #endif @@ -77,8 +70,7 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pudp, unsigned long addr) { - __flush_tlb_pgtable(tlb->mm, addr); - tlb_remove_entry(tlb, virt_to_page(pudp)); + tlb_remove_table(tlb, virt_to_page(pudp)); } #endif diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index a4a1901140ee..c3c0387aee18 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -70,43 +70,73 @@ }) /* - * TLB Management - * ============== + * TLB Invalidation + * ================ * - * The TLB specific code is expected to perform whatever tests it needs - * to determine if it should invalidate the TLB for each call. Start - * addresses are inclusive and end addresses are exclusive; it is safe to - * round these addresses down. + * This header file implements the low-level TLB invalidation routines + * (sometimes referred to as "flushing" in the kernel) for arm64. * - * flush_tlb_all() + * Every invalidation operation uses the following template: + * + * DSB ISHST // Ensure prior page-table updates have completed + * TLBI ... // Invalidate the TLB + * DSB ISH // Ensure the TLB invalidation has completed + * if (invalidated kernel mappings) + * ISB // Discard any instructions fetched from the old mapping + * + * + * The following functions form part of the "core" TLB invalidation API, + * as documented in Documentation/core-api/cachetlb.rst: * - * Invalidate the entire TLB. + * flush_tlb_all() + * Invalidate the entire TLB (kernel + user) on all CPUs * * flush_tlb_mm(mm) + * Invalidate an entire user address space on all CPUs. + * The 'mm' argument identifies the ASID to invalidate. + * + * flush_tlb_range(vma, start, end) + * Invalidate the virtual-address range '[start, end)' on all + * CPUs for the user address space corresponding to 'vma->mm'. + * Note that this operation also invalidates any walk-cache + * entries associated with translations for the specified address + * range. + * + * flush_tlb_kernel_range(start, end) + * Same as flush_tlb_range(..., start, end), but applies to + * kernel mappings rather than a particular user address space. + * Whilst not explicitly documented, this function is used when + * unmapping pages from vmalloc/io space. + * + * flush_tlb_page(vma, addr) + * Invalidate a single user mapping for address 'addr' in the + * address space corresponding to 'vma->mm'. Note that this + * operation only invalidates a single, last-level page-table + * entry and therefore does not affect any walk-caches. * - * Invalidate all TLB entries in a particular address space. - * - mm - mm_struct describing address space * - * flush_tlb_range(mm,start,end) + * Next, we have some undocumented invalidation routines that you probably + * don't want to call unless you know what you're doing: * - * Invalidate a range of TLB entries in the specified address - * space. - * - mm - mm_struct describing address space - * - start - start address (may not be aligned) - * - end - end address (exclusive, may not be aligned) + * local_flush_tlb_all() + * Same as flush_tlb_all(), but only applies to the calling CPU. * - * flush_tlb_page(vaddr,vma) + * __flush_tlb_kernel_pgtable(addr) + * Invalidate a single kernel mapping for address 'addr' on all + * CPUs, ensuring that any walk-cache entries associated with the + * translation are also invalidated. * - * Invalidate the specified page in the specified address range. - * - vaddr - virtual address (may not be aligned) - * - vma - vma_struct describing address range + * __flush_tlb_range(vma, start, end, stride, last_level) + * Invalidate the virtual-address range '[start, end)' on all + * CPUs for the user address space corresponding to 'vma->mm'. + * The invalidation operations are issued at a granularity + * determined by 'stride' and only affect any walk-cache entries + * if 'last_level' is equal to false. * - * flush_kern_tlb_page(kaddr) * - * Invalidate the TLB entry for the specified page. The address - * will be in the kernels virtual memory space. Current uses - * only require the D-TLB to be invalidated. - * - kaddr - Kernel virtual memory address + * Finally, take a look at asm/tlb.h to see how tlb_flush() is implemented + * on top of these routines, since that is our interface to the mmu_gather + * API as used by munmap() and friends. */ static inline void local_flush_tlb_all(void) { @@ -149,25 +179,28 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, * This is meant to avoid soft lock-ups on large TLB flushing ranges and not * necessarily a performance improvement. */ -#define MAX_TLB_RANGE (1024UL << PAGE_SHIFT) +#define MAX_TLBI_OPS 1024UL static inline void __flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end, - bool last_level) + unsigned long stride, bool last_level) { unsigned long asid = ASID(vma->vm_mm); unsigned long addr; - if ((end - start) > MAX_TLB_RANGE) { + if ((end - start) > (MAX_TLBI_OPS * stride)) { flush_tlb_mm(vma->vm_mm); return; } + /* Convert the stride into units of 4k */ + stride >>= 12; + start = __TLBI_VADDR(start, asid); end = __TLBI_VADDR(end, asid); dsb(ishst); - for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12)) { + for (addr = start; addr < end; addr += stride) { if (last_level) { __tlbi(vale1is, addr); __tlbi_user(vale1is, addr); @@ -182,14 +215,18 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma, static inline void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - __flush_tlb_range(vma, start, end, false); + /* + * We cannot use leaf-only invalidation here, since we may be invalidating + * table entries as part of collapsing hugepages or moving page tables. + */ + __flush_tlb_range(vma, start, end, PAGE_SIZE, false); } static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end) { unsigned long addr; - if ((end - start) > MAX_TLB_RANGE) { + if ((end - start) > (MAX_TLBI_OPS * PAGE_SIZE)) { flush_tlb_all(); return; } @@ -199,7 +236,7 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end dsb(ishst); for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12)) - __tlbi(vaae1is, addr); + __tlbi(vaale1is, addr); dsb(ish); isb(); } @@ -208,20 +245,11 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end * Used to invalidate the TLB (walk caches) corresponding to intermediate page * table levels (pgd/pud/pmd). */ -static inline void __flush_tlb_pgtable(struct mm_struct *mm, - unsigned long uaddr) -{ - unsigned long addr = __TLBI_VADDR(uaddr, ASID(mm)); - - __tlbi(vae1is, addr); - __tlbi_user(vae1is, addr); - dsb(ish); -} - static inline void __flush_tlb_kernel_pgtable(unsigned long kaddr) { unsigned long addr = __TLBI_VADDR(kaddr, 0); + dsb(ishst); __tlbi(vaae1is, addr); dsb(ish); } diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index e66b0fca99c2..07c34087bd5e 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -32,7 +32,6 @@ #include <asm/cpufeature.h> #include <asm/ptrace.h> #include <asm/memory.h> -#include <asm/compiler.h> #include <asm/extable.h> #define get_ds() (KERNEL_DS) diff --git a/arch/arm64/include/asm/xen/events.h b/arch/arm64/include/asm/xen/events.h index 4e22b7a8c038..2788e95d0ff0 100644 --- a/arch/arm64/include/asm/xen/events.h +++ b/arch/arm64/include/asm/xen/events.h @@ -14,7 +14,7 @@ enum ipi_vector { static inline int xen_irqs_disabled(struct pt_regs *regs) { - return raw_irqs_disabled_flags((unsigned long) regs->pstate); + return !interrupts_enabled(regs); } #define xchg_xen_ulong(ptr, val) xchg((ptr), (val)) diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index 17c65c8f33cb..2bcd6e4f3474 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -48,5 +48,6 @@ #define HWCAP_USCAT (1 << 25) #define HWCAP_ILRCPC (1 << 26) #define HWCAP_FLAGM (1 << 27) +#define HWCAP_SSBS (1 << 28) #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h index 98c4ce55d9c3..a36227fdb084 100644 --- a/arch/arm64/include/uapi/asm/ptrace.h +++ b/arch/arm64/include/uapi/asm/ptrace.h @@ -46,6 +46,7 @@ #define PSR_I_BIT 0x00000080 #define PSR_A_BIT 0x00000100 #define PSR_D_BIT 0x00000200 +#define PSR_SSBS_BIT 0x00001000 #define PSR_PAN_BIT 0x00400000 #define PSR_UAO_BIT 0x00800000 #define PSR_V_BIT 0x10000000 diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index dec10898d688..a509e35132d2 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -68,21 +68,43 @@ static bool has_mismatched_cache_type(const struct arm64_cpu_capabilities *entry, int scope) { - u64 mask = CTR_CACHE_MINLINE_MASK; - - /* Skip matching the min line sizes for cache type check */ - if (entry->capability == ARM64_MISMATCHED_CACHE_TYPE) - mask ^= arm64_ftr_reg_ctrel0.strict_mask; + u64 mask = arm64_ftr_reg_ctrel0.strict_mask; + u64 sys = arm64_ftr_reg_ctrel0.sys_val & mask; + u64 ctr_raw, ctr_real; WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); - return (read_cpuid_cachetype() & mask) != - (arm64_ftr_reg_ctrel0.sys_val & mask); + + /* + * We want to make sure that all the CPUs in the system expose + * a consistent CTR_EL0 to make sure that applications behaves + * correctly with migration. + * + * If a CPU has CTR_EL0.IDC but does not advertise it via CTR_EL0 : + * + * 1) It is safe if the system doesn't support IDC, as CPU anyway + * reports IDC = 0, consistent with the rest. + * + * 2) If the system has IDC, it is still safe as we trap CTR_EL0 + * access on this CPU via the ARM64_HAS_CACHE_IDC capability. + * + * So, we need to make sure either the raw CTR_EL0 or the effective + * CTR_EL0 matches the system's copy to allow a secondary CPU to boot. + */ + ctr_raw = read_cpuid_cachetype() & mask; + ctr_real = read_cpuid_effective_cachetype() & mask; + + return (ctr_real != sys) && (ctr_raw != sys); } static void cpu_enable_trap_ctr_access(const struct arm64_cpu_capabilities *__unused) { - sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCT, 0); + u64 mask = arm64_ftr_reg_ctrel0.strict_mask; + + /* Trap CTR_EL0 access on this CPU, only if it has a mismatch */ + if ((read_cpuid_cachetype() & mask) != + (arm64_ftr_reg_ctrel0.sys_val & mask)) + sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCT, 0); } atomic_t arm64_el2_vector_last_slot = ATOMIC_INIT(-1); @@ -116,6 +138,15 @@ static void __install_bp_hardening_cb(bp_hardening_cb_t fn, static DEFINE_SPINLOCK(bp_lock); int cpu, slot = -1; + /* + * enable_smccc_arch_workaround_1() passes NULL for the hyp_vecs + * start/end if we're a guest. Skip the hyp-vectors work. + */ + if (!hyp_vecs_start) { + __this_cpu_write(bp_hardening_data.fn, fn); + return; + } + spin_lock(&bp_lock); for_each_possible_cpu(cpu) { if (per_cpu(bp_hardening_data.fn, cpu) == fn) { @@ -312,6 +343,14 @@ void __init arm64_enable_wa2_handling(struct alt_instr *alt, void arm64_set_ssbd_mitigation(bool state) { + if (this_cpu_has_cap(ARM64_SSBS)) { + if (state) + asm volatile(SET_PSTATE_SSBS(0)); + else + asm volatile(SET_PSTATE_SSBS(1)); + return; + } + switch (psci_ops.conduit) { case PSCI_CONDUIT_HVC: arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_2, state, NULL); @@ -336,6 +375,11 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); + if (this_cpu_has_cap(ARM64_SSBS)) { + required = false; + goto out_printmsg; + } + if (psci_ops.smccc_version == SMCCC_VERSION_1_0) { ssbd_state = ARM64_SSBD_UNKNOWN; return false; @@ -384,7 +428,6 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, switch (ssbd_state) { case ARM64_SSBD_FORCE_DISABLE: - pr_info_once("%s disabled from command-line\n", entry->desc); arm64_set_ssbd_mitigation(false); required = false; break; @@ -397,7 +440,6 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, break; case ARM64_SSBD_FORCE_ENABLE: - pr_info_once("%s forced from command-line\n", entry->desc); arm64_set_ssbd_mitigation(true); required = true; break; @@ -407,10 +449,27 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, break; } +out_printmsg: + switch (ssbd_state) { + case ARM64_SSBD_FORCE_DISABLE: + pr_info_once("%s disabled from command-line\n", entry->desc); + break; + + case ARM64_SSBD_FORCE_ENABLE: + pr_info_once("%s forced from command-line\n", entry->desc); + break; + } + return required; } #endif /* CONFIG_ARM64_SSBD */ +static void __maybe_unused +cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused) +{ + sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCI, 0); +} + #define CAP_MIDR_RANGE(model, v_min, r_min, v_max, r_max) \ .matches = is_affected_midr_range, \ .midr_range = MIDR_RANGE(model, v_min, r_min, v_max, r_max) @@ -616,14 +675,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { }, #endif { - .desc = "Mismatched cache line size", - .capability = ARM64_MISMATCHED_CACHE_LINE_SIZE, - .matches = has_mismatched_cache_type, - .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, - .cpu_enable = cpu_enable_trap_ctr_access, - }, - { - .desc = "Mismatched cache type", + .desc = "Mismatched cache type (CTR_EL0)", .capability = ARM64_MISMATCHED_CACHE_TYPE, .matches = has_mismatched_cache_type, .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, @@ -680,6 +732,14 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .matches = has_ssbd_mitigation, }, #endif +#ifdef CONFIG_ARM64_ERRATUM_1188873 + { + /* Cortex-A76 r0p0 to r2p0 */ + .desc = "ARM erratum 1188873", + .capability = ARM64_WORKAROUND_1188873, + ERRATA_MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 2, 0), + }, +#endif { } }; diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index e238b7932096..af50064dea51 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -20,6 +20,7 @@ #include <linux/bsearch.h> #include <linux/cpumask.h> +#include <linux/crash_dump.h> #include <linux/sort.h> #include <linux/stop_machine.h> #include <linux/types.h> @@ -117,6 +118,7 @@ EXPORT_SYMBOL(cpu_hwcap_keys); static bool __maybe_unused cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused); +static void cpu_enable_cnp(struct arm64_cpu_capabilities const *cap); /* * NOTE: Any changes to the visibility of features should be kept in @@ -164,6 +166,11 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = { ARM64_FTR_END, }; +static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = { + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_SSBS_SHIFT, 4, ID_AA64PFR1_SSBS_PSTATE_NI), + ARM64_FTR_END, +}; + static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = { S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_TGRAN4_SHIFT, 4, ID_AA64MMFR0_TGRAN4_NI), S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_TGRAN64_SHIFT, 4, ID_AA64MMFR0_TGRAN64_NI), @@ -371,7 +378,7 @@ static const struct __ftr_reg_entry { /* Op1 = 0, CRn = 0, CRm = 4 */ ARM64_FTR_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0), - ARM64_FTR_REG(SYS_ID_AA64PFR1_EL1, ftr_raz), + ARM64_FTR_REG(SYS_ID_AA64PFR1_EL1, ftr_id_aa64pfr1), ARM64_FTR_REG(SYS_ID_AA64ZFR0_EL1, ftr_raz), /* Op1 = 0, CRn = 0, CRm = 5 */ @@ -657,7 +664,6 @@ void update_cpu_features(int cpu, /* * EL3 is not our concern. - * ID_AA64PFR1 is currently RES0. */ taint |= check_update_ftr_reg(SYS_ID_AA64PFR0_EL1, cpu, info->reg_id_aa64pfr0, boot->reg_id_aa64pfr0); @@ -848,15 +854,55 @@ static bool has_no_fpsimd(const struct arm64_cpu_capabilities *entry, int __unus } static bool has_cache_idc(const struct arm64_cpu_capabilities *entry, - int __unused) + int scope) { - return read_sanitised_ftr_reg(SYS_CTR_EL0) & BIT(CTR_IDC_SHIFT); + u64 ctr; + + if (scope == SCOPE_SYSTEM) + ctr = arm64_ftr_reg_ctrel0.sys_val; + else + ctr = read_cpuid_effective_cachetype(); + + return ctr & BIT(CTR_IDC_SHIFT); +} + +static void cpu_emulate_effective_ctr(const struct arm64_cpu_capabilities *__unused) +{ + /* + * If the CPU exposes raw CTR_EL0.IDC = 0, while effectively + * CTR_EL0.IDC = 1 (from CLIDR values), we need to trap accesses + * to the CTR_EL0 on this CPU and emulate it with the real/safe + * value. + */ + if (!(read_cpuid_cachetype() & BIT(CTR_IDC_SHIFT))) + sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCT, 0); } static bool has_cache_dic(const struct arm64_cpu_capabilities *entry, - int __unused) + int scope) { - return read_sanitised_ftr_reg(SYS_CTR_EL0) & BIT(CTR_DIC_SHIFT); + u64 ctr; + + if (scope == SCOPE_SYSTEM) + ctr = arm64_ftr_reg_ctrel0.sys_val; + else + ctr = read_cpuid_cachetype(); + + return ctr & BIT(CTR_DIC_SHIFT); +} + +static bool __maybe_unused +has_useable_cnp(const struct arm64_cpu_capabilities *entry, int scope) +{ + /* + * Kdump isn't guaranteed to power-off all secondary CPUs, CNP + * may share TLB entries with a CPU stuck in the crashed + * kernel. + */ + if (is_kdump_kernel()) + return false; + + return has_cpuid_feature(entry, scope); } #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 @@ -1035,6 +1081,70 @@ static void cpu_has_fwb(const struct arm64_cpu_capabilities *__unused) WARN_ON(val & (7 << 27 | 7 << 21)); } +#ifdef CONFIG_ARM64_SSBD +static int ssbs_emulation_handler(struct pt_regs *regs, u32 instr) +{ + if (user_mode(regs)) + return 1; + + if (instr & BIT(PSTATE_Imm_shift)) + regs->pstate |= PSR_SSBS_BIT; + else + regs->pstate &= ~PSR_SSBS_BIT; + + arm64_skip_faulting_instruction(regs, 4); + return 0; +} + +static struct undef_hook ssbs_emulation_hook = { + .instr_mask = ~(1U << PSTATE_Imm_shift), + .instr_val = 0xd500401f | PSTATE_SSBS, + .fn = ssbs_emulation_handler, +}; + +static void cpu_enable_ssbs(const struct arm64_cpu_capabilities *__unused) +{ + static bool undef_hook_registered = false; + static DEFINE_SPINLOCK(hook_lock); + + spin_lock(&hook_lock); + if (!undef_hook_registered) { + register_undef_hook(&ssbs_emulation_hook); + undef_hook_registered = true; + } + spin_unlock(&hook_lock); + + if (arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) { + sysreg_clear_set(sctlr_el1, 0, SCTLR_ELx_DSSBS); + arm64_set_ssbd_mitigation(false); + } else { + arm64_set_ssbd_mitigation(true); + } +} +#endif /* CONFIG_ARM64_SSBD */ + +#ifdef CONFIG_ARM64_PAN +static void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused) +{ + /* + * We modify PSTATE. This won't work from irq context as the PSTATE + * is discarded once we return from the exception. + */ + WARN_ON_ONCE(in_interrupt()); + + sysreg_clear_set(sctlr_el1, SCTLR_EL1_SPAN, 0); + asm(SET_PSTATE_PAN(1)); +} +#endif /* CONFIG_ARM64_PAN */ + +#ifdef CONFIG_ARM64_RAS_EXTN +static void cpu_clear_disr(const struct arm64_cpu_capabilities *__unused) +{ + /* Firmware may have left a deferred SError in this register. */ + write_sysreg_s(0, SYS_DISR_EL1); +} +#endif /* CONFIG_ARM64_RAS_EXTN */ + static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "GIC system register CPU interface", @@ -1184,6 +1294,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .capability = ARM64_HAS_CACHE_IDC, .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cache_idc, + .cpu_enable = cpu_emulate_effective_ctr, }, { .desc = "Instruction cache invalidation not required for I/D coherence", @@ -1222,6 +1333,41 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .cpu_enable = cpu_enable_hw_dbm, }, #endif +#ifdef CONFIG_ARM64_SSBD + { + .desc = "CRC32 instructions", + .capability = ARM64_HAS_CRC32, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = has_cpuid_feature, + .sys_reg = SYS_ID_AA64ISAR0_EL1, + .field_pos = ID_AA64ISAR0_CRC32_SHIFT, + .min_field_value = 1, + }, + { + .desc = "Speculative Store Bypassing Safe (SSBS)", + .capability = ARM64_SSBS, + .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE, + .matches = has_cpuid_feature, + .sys_reg = SYS_ID_AA64PFR1_EL1, + .field_pos = ID_AA64PFR1_SSBS_SHIFT, + .sign = FTR_UNSIGNED, + .min_field_value = ID_AA64PFR1_SSBS_PSTATE_ONLY, + .cpu_enable = cpu_enable_ssbs, + }, +#endif +#ifdef CONFIG_ARM64_CNP + { + .desc = "Common not Private translations", + .capability = ARM64_HAS_CNP, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = has_useable_cnp, + .sys_reg = SYS_ID_AA64MMFR2_EL1, + .sign = FTR_UNSIGNED, + .field_pos = ID_AA64MMFR2_CNP_SHIFT, + .min_field_value = 1, + .cpu_enable = cpu_enable_cnp, + }, +#endif {}, }; @@ -1267,6 +1413,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { #ifdef CONFIG_ARM64_SVE HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_SVE_SHIFT, FTR_UNSIGNED, ID_AA64PFR0_SVE, CAP_HWCAP, HWCAP_SVE), #endif + HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SSBS_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_SSBS_PSTATE_INSNS, CAP_HWCAP, HWCAP_SSBS), {}, }; @@ -1658,6 +1805,11 @@ cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused) return (cpus_have_const_cap(ARM64_HAS_PAN) && !cpus_have_const_cap(ARM64_HAS_UAO)); } +static void __maybe_unused cpu_enable_cnp(struct arm64_cpu_capabilities const *cap) +{ + cpu_replace_ttbr1(lm_alias(swapper_pg_dir)); +} + /* * We emulate only the following system register space. * Op0 = 0x3, CRn = 0x0, Op1 = 0x0, CRm = [0, 4 - 7] @@ -1719,27 +1871,32 @@ static int emulate_sys_reg(u32 id, u64 *valp) return 0; } -static int emulate_mrs(struct pt_regs *regs, u32 insn) +int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt) { int rc; - u32 sys_reg, dst; u64 val; - /* - * sys_reg values are defined as used in mrs/msr instruction. - * shift the imm value to get the encoding. - */ - sys_reg = (u32)aarch64_insn_decode_immediate(AARCH64_INSN_IMM_16, insn) << 5; rc = emulate_sys_reg(sys_reg, &val); if (!rc) { - dst = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RT, insn); - pt_regs_write_reg(regs, dst, val); + pt_regs_write_reg(regs, rt, val); arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE); } - return rc; } +static int emulate_mrs(struct pt_regs *regs, u32 insn) +{ + u32 sys_reg, rt; + + /* + * sys_reg values are defined as used in mrs/msr instruction. + * shift the imm value to get the encoding. + */ + sys_reg = (u32)aarch64_insn_decode_immediate(AARCH64_INSN_IMM_16, insn) << 5; + rt = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RT, insn); + return do_emulate_mrs(regs, sys_reg, rt); +} + static struct undef_hook mrs_hook = { .instr_mask = 0xfff00000, .instr_val = 0xd5300000, @@ -1755,9 +1912,3 @@ static int __init enable_mrs_emulation(void) } core_initcall(enable_mrs_emulation); - -void cpu_clear_disr(const struct arm64_cpu_capabilities *__unused) -{ - /* Firmware may have left a deferred SError in this register. */ - write_sysreg_s(0, SYS_DISR_EL1); -} diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index e9ab7b3ed317..bcc2831399cb 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -81,6 +81,7 @@ static const char *const hwcap_str[] = { "uscat", "ilrcpc", "flagm", + "ssbs", NULL }; @@ -324,7 +325,15 @@ static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info) static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) { info->reg_cntfrq = arch_timer_get_cntfrq(); - info->reg_ctr = read_cpuid_cachetype(); + /* + * Use the effective value of the CTR_EL0 than the raw value + * exposed by the CPU. CTR_E0.IDC field value must be interpreted + * with the CLIDR_EL1 fields to avoid triggering false warnings + * when there is a mismatch across the CPUs. Keep track of the + * effective value of the CTR_EL0 in our internal records for + * acurate sanity check and feature enablement. + */ + info->reg_ctr = read_cpuid_effective_cachetype(); info->reg_dczid = read_cpuid(DCZID_EL0); info->reg_midr = read_cpuid_id(); info->reg_revidr = read_cpuid(REVIDR_EL1); diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 09dbea221a27..039144ecbcb2 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -589,7 +589,7 @@ el1_undef: inherit_daif pstate=x23, tmp=x2 mov x0, sp bl do_undefinstr - ASM_BUG() + kernel_exit 1 el1_dbg: /* * Debug exception handling @@ -665,6 +665,7 @@ el0_sync: cmp x24, #ESR_ELx_EC_FP_EXC64 // FP/ASIMD exception b.eq el0_fpsimd_exc cmp x24, #ESR_ELx_EC_SYS64 // configurable trap + ccmp x24, #ESR_ELx_EC_WFx, #4, ne b.eq el0_sys cmp x24, #ESR_ELx_EC_SP_ALIGN // stack alignment exception b.eq el0_sp_pc @@ -697,9 +698,9 @@ el0_sync_compat: cmp x24, #ESR_ELx_EC_UNKNOWN // unknown exception in EL0 b.eq el0_undef cmp x24, #ESR_ELx_EC_CP15_32 // CP15 MRC/MCR trap - b.eq el0_undef + b.eq el0_cp15 cmp x24, #ESR_ELx_EC_CP15_64 // CP15 MRRC/MCRR trap - b.eq el0_undef + b.eq el0_cp15 cmp x24, #ESR_ELx_EC_CP14_MR // CP14 MRC/MCR trap b.eq el0_undef cmp x24, #ESR_ELx_EC_CP14_LS // CP14 LDC/STC trap @@ -722,6 +723,17 @@ el0_irq_compat: el0_error_compat: kernel_entry 0, 32 b el0_error_naked + +el0_cp15: + /* + * Trapped CP15 (MRC, MCR, MRRC, MCRR) instructions + */ + enable_daif + ct_user_exit + mov x0, x25 + mov x1, sp + bl do_cp15instr + b ret_to_user #endif el0_da: diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index b0853069702f..4471f570a295 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -287,19 +287,21 @@ __create_page_tables: mov x28, lr /* - * Invalidate the idmap and swapper page tables to avoid potential - * dirty cache lines being evicted. + * Invalidate the init page tables to avoid potential dirty cache lines + * being evicted. Other page tables are allocated in rodata as part of + * the kernel image, and thus are clean to the PoC per the boot + * protocol. */ - adrp x0, idmap_pg_dir - adrp x1, swapper_pg_end + adrp x0, init_pg_dir + adrp x1, init_pg_end sub x1, x1, x0 bl __inval_dcache_area /* - * Clear the idmap and swapper page tables. + * Clear the init page tables. */ - adrp x0, idmap_pg_dir - adrp x1, swapper_pg_end + adrp x0, init_pg_dir + adrp x1, init_pg_end sub x1, x1, x0 1: stp xzr, xzr, [x0], #16 stp xzr, xzr, [x0], #16 @@ -373,7 +375,7 @@ __create_page_tables: /* * Map the kernel image (starting with PHYS_OFFSET). */ - adrp x0, swapper_pg_dir + adrp x0, init_pg_dir mov_q x5, KIMAGE_VADDR + TEXT_OFFSET // compile time __va(_text) add x5, x5, x23 // add KASLR displacement mov x4, PTRS_PER_PGD @@ -390,7 +392,7 @@ __create_page_tables: * tables again to remove any speculatively loaded cache lines. */ adrp x0, idmap_pg_dir - adrp x1, swapper_pg_end + adrp x1, init_pg_end sub x1, x1, x0 dmb sy bl __inval_dcache_area @@ -706,6 +708,7 @@ secondary_startup: * Common entry point for secondary CPUs. */ bl __cpu_setup // initialise processor + adrp x1, swapper_pg_dir bl __enable_mmu ldr x8, =__secondary_switched br x8 @@ -748,6 +751,7 @@ ENDPROC(__secondary_switched) * Enable the MMU. * * x0 = SCTLR_EL1 value for turning on the MMU. + * x1 = TTBR1_EL1 value * * Returns to the caller via x30/lr. This requires the caller to be covered * by the .idmap.text section. @@ -756,17 +760,16 @@ ENDPROC(__secondary_switched) * If it isn't, park the CPU */ ENTRY(__enable_mmu) - mrs x1, ID_AA64MMFR0_EL1 - ubfx x2, x1, #ID_AA64MMFR0_TGRAN_SHIFT, 4 + mrs x2, ID_AA64MMFR0_EL1 + ubfx x2, x2, #ID_AA64MMFR0_TGRAN_SHIFT, 4 cmp x2, #ID_AA64MMFR0_TGRAN_SUPPORTED b.ne __no_granule_support - update_early_cpu_boot_status 0, x1, x2 - adrp x1, idmap_pg_dir - adrp x2, swapper_pg_dir - phys_to_ttbr x3, x1 - phys_to_ttbr x4, x2 - msr ttbr0_el1, x3 // load TTBR0 - msr ttbr1_el1, x4 // load TTBR1 + update_early_cpu_boot_status 0, x2, x3 + adrp x2, idmap_pg_dir + phys_to_ttbr x1, x1 + phys_to_ttbr x2, x2 + msr ttbr0_el1, x2 // load TTBR0 + msr ttbr1_el1, x1 // load TTBR1 isb msr sctlr_el1, x0 isb @@ -823,6 +826,7 @@ __primary_switch: mrs x20, sctlr_el1 // preserve old SCTLR_EL1 value #endif + adrp x1, init_pg_dir bl __enable_mmu #ifdef CONFIG_RELOCATABLE bl __relocate_kernel diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 8e38d5267f22..e213f8e867f6 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -966,6 +966,12 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event, return 0; } +static int armv8pmu_filter_match(struct perf_event *event) +{ + unsigned long evtype = event->hw.config_base & ARMV8_PMU_EVTYPE_EVENT; + return evtype != ARMV8_PMUV3_PERFCTR_CHAIN; +} + static void armv8pmu_reset(void *info) { struct arm_pmu *cpu_pmu = (struct arm_pmu *)info; @@ -1114,6 +1120,7 @@ static int armv8_pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->stop = armv8pmu_stop, cpu_pmu->reset = armv8pmu_reset, cpu_pmu->set_event_filter = armv8pmu_set_event_filter; + cpu_pmu->filter_match = armv8pmu_filter_match; return 0; } diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index e78c3ef04d95..9b65132e789a 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -107,7 +107,7 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) if (!p->ainsn.api.insn) return -ENOMEM; break; - }; + } /* prepare the instruction */ if (p->ainsn.api.insn) diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 7f1628effe6d..ce99c58cd1f1 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -358,6 +358,10 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start, if (IS_ENABLED(CONFIG_ARM64_UAO) && cpus_have_const_cap(ARM64_HAS_UAO)) childregs->pstate |= PSR_UAO_BIT; + + if (arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) + childregs->pstate |= PSR_SSBS_BIT; + p->thread.cpu_context.x19 = stack_start; p->thread.cpu_context.x20 = stk_sz; } diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c index e8edbf13302a..8cdaf25e99cd 100644 --- a/arch/arm64/kernel/psci.c +++ b/arch/arm64/kernel/psci.c @@ -24,7 +24,6 @@ #include <uapi/linux/psci.h> -#include <asm/compiler.h> #include <asm/cpu_ops.h> #include <asm/errno.h> #include <asm/smp_plat.h> diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 5b4fac434c84..d0f62dd24c90 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -64,6 +64,9 @@ #include <asm/xen/hypervisor.h> #include <asm/mmu_context.h> +static int num_standard_resources; +static struct resource *standard_resources; + phys_addr_t __fdt_pointer __initdata; /* @@ -206,14 +209,19 @@ static void __init request_standard_resources(void) { struct memblock_region *region; struct resource *res; + unsigned long i = 0; kernel_code.start = __pa_symbol(_text); kernel_code.end = __pa_symbol(__init_begin - 1); kernel_data.start = __pa_symbol(_sdata); kernel_data.end = __pa_symbol(_end - 1); + num_standard_resources = memblock.memory.cnt; + standard_resources = alloc_bootmem_low(num_standard_resources * + sizeof(*standard_resources)); + for_each_memblock(memory, region) { - res = alloc_bootmem_low(sizeof(*res)); + res = &standard_resources[i++]; if (memblock_is_nomap(region)) { res->name = "reserved"; res->flags = IORESOURCE_MEM; @@ -243,36 +251,26 @@ static void __init request_standard_resources(void) static int __init reserve_memblock_reserved_regions(void) { - phys_addr_t start, end, roundup_end = 0; - struct resource *mem, *res; - u64 i; - - for_each_reserved_mem_region(i, &start, &end) { - if (end <= roundup_end) - continue; /* done already */ - - start = __pfn_to_phys(PFN_DOWN(start)); - end = __pfn_to_phys(PFN_UP(end)) - 1; - roundup_end = end; - - res = kzalloc(sizeof(*res), GFP_ATOMIC); - if (WARN_ON(!res)) - return -ENOMEM; - res->start = start; - res->end = end; - res->name = "reserved"; - res->flags = IORESOURCE_MEM; - - mem = request_resource_conflict(&iomem_resource, res); - /* - * We expected memblock_reserve() regions to conflict with - * memory created by request_standard_resources(). - */ - if (WARN_ON_ONCE(!mem)) + u64 i, j; + + for (i = 0; i < num_standard_resources; ++i) { + struct resource *mem = &standard_resources[i]; + phys_addr_t r_start, r_end, mem_size = resource_size(mem); + + if (!memblock_is_region_reserved(mem->start, mem_size)) continue; - kfree(res); - reserve_region_with_split(mem, start, end, "reserved"); + for_each_reserved_mem_region(j, &r_start, &r_end) { + resource_size_t start, end; + + start = max(PFN_PHYS(PFN_DOWN(r_start)), mem->start); + end = min(PFN_PHYS(PFN_UP(r_end)) - 1, mem->end); + + if (start > mem->end || end < mem->start) + continue; + + reserve_region_with_split(mem, start, end, "reserved"); + } } return 0; @@ -351,12 +349,8 @@ void __init setup_arch(char **cmdline_p) #endif #ifdef CONFIG_VT -#if defined(CONFIG_VGA_CONSOLE) - conswitchp = &vga_con; -#elif defined(CONFIG_DUMMY_CONSOLE) conswitchp = &dummy_con; #endif -#endif if (boot_args[1] || boot_args[2] || boot_args[3]) { pr_err("WARNING: x1-x3 nonzero in violation of boot protocol:\n" "\tx1: %016llx\n\tx2: %016llx\n\tx3: %016llx\n" diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S index bebec8ef9372..3e53ffa07994 100644 --- a/arch/arm64/kernel/sleep.S +++ b/arch/arm64/kernel/sleep.S @@ -101,6 +101,7 @@ ENTRY(cpu_resume) bl el2_setup // if in EL2 drop to EL1 cleanly bl __cpu_setup /* enable the MMU early - so we can access sleep_save_stash by va */ + adrp x1, swapper_pg_dir bl __enable_mmu ldr x8, =_cpu_resume br x8 diff --git a/arch/arm64/kernel/ssbd.c b/arch/arm64/kernel/ssbd.c index 3432e5ef9f41..885f13e58708 100644 --- a/arch/arm64/kernel/ssbd.c +++ b/arch/arm64/kernel/ssbd.c @@ -3,17 +3,33 @@ * Copyright (C) 2018 ARM Ltd, All Rights Reserved. */ +#include <linux/compat.h> #include <linux/errno.h> #include <linux/sched.h> +#include <linux/sched/task_stack.h> #include <linux/thread_info.h> #include <asm/cpufeature.h> +static void ssbd_ssbs_enable(struct task_struct *task) +{ + u64 val = is_compat_thread(task_thread_info(task)) ? + PSR_AA32_SSBS_BIT : PSR_SSBS_BIT; + + task_pt_regs(task)->pstate |= val; +} + +static void ssbd_ssbs_disable(struct task_struct *task) +{ + u64 val = is_compat_thread(task_thread_info(task)) ? + PSR_AA32_SSBS_BIT : PSR_SSBS_BIT; + + task_pt_regs(task)->pstate &= ~val; +} + /* * prctl interface for SSBD - * FIXME: Drop the below ifdefery once merged in 4.18. */ -#ifdef PR_SPEC_STORE_BYPASS static int ssbd_prctl_set(struct task_struct *task, unsigned long ctrl) { int state = arm64_get_ssbd_state(); @@ -46,12 +62,14 @@ static int ssbd_prctl_set(struct task_struct *task, unsigned long ctrl) return -EPERM; task_clear_spec_ssb_disable(task); clear_tsk_thread_flag(task, TIF_SSBD); + ssbd_ssbs_enable(task); break; case PR_SPEC_DISABLE: if (state == ARM64_SSBD_FORCE_DISABLE) return -EPERM; task_set_spec_ssb_disable(task); set_tsk_thread_flag(task, TIF_SSBD); + ssbd_ssbs_disable(task); break; case PR_SPEC_FORCE_DISABLE: if (state == ARM64_SSBD_FORCE_DISABLE) @@ -59,6 +77,7 @@ static int ssbd_prctl_set(struct task_struct *task, unsigned long ctrl) task_set_spec_ssb_disable(task); task_set_spec_ssb_force_disable(task); set_tsk_thread_flag(task, TIF_SSBD); + ssbd_ssbs_disable(task); break; default: return -ERANGE; @@ -107,4 +126,3 @@ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) return -ENODEV; } } -#endif /* PR_SPEC_STORE_BYPASS */ diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index 70c283368b64..9405d1b7f4b0 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -48,6 +48,10 @@ void notrace __cpu_suspend_exit(void) */ cpu_uninstall_idmap(); + /* Restore CnP bit in TTBR1_EL1 */ + if (system_supports_cnp()) + cpu_replace_ttbr1(lm_alias(swapper_pg_dir)); + /* * PSTATE was not saved over suspend/resume, re-enable any detected * features that might not have been set correctly. diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 039e9ff379cc..4066da7f1e5e 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -310,10 +310,12 @@ static int call_undef_hook(struct pt_regs *regs) int (*fn)(struct pt_regs *regs, u32 instr) = NULL; void __user *pc = (void __user *)instruction_pointer(regs); - if (!user_mode(regs)) - return 1; - - if (compat_thumb_mode(regs)) { + if (!user_mode(regs)) { + __le32 instr_le; + if (probe_kernel_address((__force __le32 *)pc, instr_le)) + goto exit; + instr = le32_to_cpu(instr_le); + } else if (compat_thumb_mode(regs)) { /* 16-bit Thumb instruction */ __le16 instr_le; if (get_user(instr_le, (__le16 __user *)pc)) @@ -352,6 +354,9 @@ void force_signal_inject(int signal, int code, unsigned long address) const char *desc; struct pt_regs *regs = current_pt_regs(); + if (WARN_ON(!user_mode(regs))) + return; + clear_siginfo(&info); switch (signal) { @@ -406,14 +411,10 @@ asmlinkage void __exception do_undefinstr(struct pt_regs *regs) if (call_undef_hook(regs) == 0) return; + BUG_ON(!user_mode(regs)); force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc); } -void cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused) -{ - sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCI, 0); -} - #define __user_cache_maint(insn, address, res) \ if (address >= user_addr_max()) { \ res = -EFAULT; \ @@ -437,7 +438,7 @@ void cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused) static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs) { unsigned long address; - int rt = (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT; + int rt = ESR_ELx_SYS64_ISS_RT(esr); int crm = (esr & ESR_ELx_SYS64_ISS_CRM_MASK) >> ESR_ELx_SYS64_ISS_CRM_SHIFT; int ret = 0; @@ -472,7 +473,7 @@ static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs) static void ctr_read_handler(unsigned int esr, struct pt_regs *regs) { - int rt = (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT; + int rt = ESR_ELx_SYS64_ISS_RT(esr); unsigned long val = arm64_ftr_reg_user_value(&arm64_ftr_reg_ctrel0); pt_regs_write_reg(regs, rt, val); @@ -482,7 +483,7 @@ static void ctr_read_handler(unsigned int esr, struct pt_regs *regs) static void cntvct_read_handler(unsigned int esr, struct pt_regs *regs) { - int rt = (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT; + int rt = ESR_ELx_SYS64_ISS_RT(esr); pt_regs_write_reg(regs, rt, arch_counter_get_cntvct()); arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE); @@ -490,12 +491,28 @@ static void cntvct_read_handler(unsigned int esr, struct pt_regs *regs) static void cntfrq_read_handler(unsigned int esr, struct pt_regs *regs) { - int rt = (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT; + int rt = ESR_ELx_SYS64_ISS_RT(esr); pt_regs_write_reg(regs, rt, arch_timer_get_rate()); arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE); } +static void mrs_handler(unsigned int esr, struct pt_regs *regs) +{ + u32 sysreg, rt; + + rt = ESR_ELx_SYS64_ISS_RT(esr); + sysreg = esr_sys64_to_sysreg(esr); + + if (do_emulate_mrs(regs, sysreg, rt) != 0) + force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc); +} + +static void wfi_handler(unsigned int esr, struct pt_regs *regs) +{ + arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE); +} + struct sys64_hook { unsigned int esr_mask; unsigned int esr_val; @@ -526,9 +543,176 @@ static struct sys64_hook sys64_hooks[] = { .esr_val = ESR_ELx_SYS64_ISS_SYS_CNTFRQ, .handler = cntfrq_read_handler, }, + { + /* Trap read access to CPUID registers */ + .esr_mask = ESR_ELx_SYS64_ISS_SYS_MRS_OP_MASK, + .esr_val = ESR_ELx_SYS64_ISS_SYS_MRS_OP_VAL, + .handler = mrs_handler, + }, + { + /* Trap WFI instructions executed in userspace */ + .esr_mask = ESR_ELx_WFx_MASK, + .esr_val = ESR_ELx_WFx_WFI_VAL, + .handler = wfi_handler, + }, {}, }; + +#ifdef CONFIG_COMPAT +#define PSTATE_IT_1_0_SHIFT 25 +#define PSTATE_IT_1_0_MASK (0x3 << PSTATE_IT_1_0_SHIFT) +#define PSTATE_IT_7_2_SHIFT 10 +#define PSTATE_IT_7_2_MASK (0x3f << PSTATE_IT_7_2_SHIFT) + +static u32 compat_get_it_state(struct pt_regs *regs) +{ + u32 it, pstate = regs->pstate; + + it = (pstate & PSTATE_IT_1_0_MASK) >> PSTATE_IT_1_0_SHIFT; + it |= ((pstate & PSTATE_IT_7_2_MASK) >> PSTATE_IT_7_2_SHIFT) << 2; + + return it; +} + +static void compat_set_it_state(struct pt_regs *regs, u32 it) +{ + u32 pstate_it; + + pstate_it = (it << PSTATE_IT_1_0_SHIFT) & PSTATE_IT_1_0_MASK; + pstate_it |= ((it >> 2) << PSTATE_IT_7_2_SHIFT) & PSTATE_IT_7_2_MASK; + + regs->pstate &= ~PSR_AA32_IT_MASK; + regs->pstate |= pstate_it; +} + +static bool cp15_cond_valid(unsigned int esr, struct pt_regs *regs) +{ + int cond; + + /* Only a T32 instruction can trap without CV being set */ + if (!(esr & ESR_ELx_CV)) { + u32 it; + + it = compat_get_it_state(regs); + if (!it) + return true; + + cond = it >> 4; + } else { + cond = (esr & ESR_ELx_COND_MASK) >> ESR_ELx_COND_SHIFT; + } + + return aarch32_opcode_cond_checks[cond](regs->pstate); +} + +static void advance_itstate(struct pt_regs *regs) +{ + u32 it; + + /* ARM mode */ + if (!(regs->pstate & PSR_AA32_T_BIT) || + !(regs->pstate & PSR_AA32_IT_MASK)) + return; + + it = compat_get_it_state(regs); + + /* + * If this is the last instruction of the block, wipe the IT + * state. Otherwise advance it. + */ + if (!(it & 7)) + it = 0; + else + it = (it & 0xe0) | ((it << 1) & 0x1f); + + compat_set_it_state(regs, it); +} + +static void arm64_compat_skip_faulting_instruction(struct pt_regs *regs, + unsigned int sz) +{ + advance_itstate(regs); + arm64_skip_faulting_instruction(regs, sz); +} + +static void compat_cntfrq_read_handler(unsigned int esr, struct pt_regs *regs) +{ + int reg = (esr & ESR_ELx_CP15_32_ISS_RT_MASK) >> ESR_ELx_CP15_32_ISS_RT_SHIFT; + + pt_regs_write_reg(regs, reg, arch_timer_get_rate()); + arm64_compat_skip_faulting_instruction(regs, 4); +} + +static struct sys64_hook cp15_32_hooks[] = { + { + .esr_mask = ESR_ELx_CP15_32_ISS_SYS_MASK, + .esr_val = ESR_ELx_CP15_32_ISS_SYS_CNTFRQ, + .handler = compat_cntfrq_read_handler, + }, + {}, +}; + +static void compat_cntvct_read_handler(unsigned int esr, struct pt_regs *regs) +{ + int rt = (esr & ESR_ELx_CP15_64_ISS_RT_MASK) >> ESR_ELx_CP15_64_ISS_RT_SHIFT; + int rt2 = (esr & ESR_ELx_CP15_64_ISS_RT2_MASK) >> ESR_ELx_CP15_64_ISS_RT2_SHIFT; + u64 val = arch_counter_get_cntvct(); + + pt_regs_write_reg(regs, rt, lower_32_bits(val)); + pt_regs_write_reg(regs, rt2, upper_32_bits(val)); + arm64_compat_skip_faulting_instruction(regs, 4); +} + +static struct sys64_hook cp15_64_hooks[] = { + { + .esr_mask = ESR_ELx_CP15_64_ISS_SYS_MASK, + .esr_val = ESR_ELx_CP15_64_ISS_SYS_CNTVCT, + .handler = compat_cntvct_read_handler, + }, + {}, +}; + +asmlinkage void __exception do_cp15instr(unsigned int esr, struct pt_regs *regs) +{ + struct sys64_hook *hook, *hook_base; + + if (!cp15_cond_valid(esr, regs)) { + /* + * There is no T16 variant of a CP access, so we + * always advance PC by 4 bytes. + */ + arm64_compat_skip_faulting_instruction(regs, 4); + return; + } + + switch (ESR_ELx_EC(esr)) { + case ESR_ELx_EC_CP15_32: + hook_base = cp15_32_hooks; + break; + case ESR_ELx_EC_CP15_64: + hook_base = cp15_64_hooks; + break; + default: + do_undefinstr(regs); + return; + } + + for (hook = hook_base; hook->handler; hook++) + if ((hook->esr_mask & esr) == hook->esr_val) { + hook->handler(esr, regs); + return; + } + + /* + * New cp15 instructions may previously have been undefined at + * EL0. Fall back to our usual undefined instruction handler + * so that we handle these consistently. + */ + do_undefinstr(regs); +} +#endif + asmlinkage void __exception do_sysinstr(unsigned int esr, struct pt_regs *regs) { struct sys64_hook *hook; @@ -605,7 +789,6 @@ asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr) handler[reason], smp_processor_id(), esr, esr_get_class_string(esr)); - die("Oops - bad mode", regs, 0); local_daif_mask(); panic("bad mode"); } diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 605d1b60469c..ab29c06a7d4b 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -138,6 +138,23 @@ SECTIONS EXCEPTION_TABLE(8) /* __init_begin will be marked RO NX */ NOTES + . = ALIGN(PAGE_SIZE); + idmap_pg_dir = .; + . += IDMAP_DIR_SIZE; + +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 + tramp_pg_dir = .; + . += PAGE_SIZE; +#endif + +#ifdef CONFIG_ARM64_SW_TTBR0_PAN + reserved_ttbr0 = .; + . += RESERVED_TTBR0_SIZE; +#endif + swapper_pg_dir = .; + . += PAGE_SIZE; + swapper_pg_end = .; + . = ALIGN(SEGMENT_ALIGN); __init_begin = .; __inittext_begin = .; @@ -216,21 +233,9 @@ SECTIONS BSS_SECTION(0, 0, 0) . = ALIGN(PAGE_SIZE); - idmap_pg_dir = .; - . += IDMAP_DIR_SIZE; - -#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 - tramp_pg_dir = .; - . += PAGE_SIZE; -#endif - -#ifdef CONFIG_ARM64_SW_TTBR0_PAN - reserved_ttbr0 = .; - . += RESERVED_TTBR0_SIZE; -#endif - swapper_pg_dir = .; - . += SWAPPER_DIR_SIZE; - swapper_pg_end = .; + init_pg_dir = .; + . += INIT_DIR_SIZE; + init_pg_end = .; __pecoff_data_size = ABSOLUTE(. - __initdata_begin); _end = .; diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S index ea9225160786..4576b86a5579 100644 --- a/arch/arm64/kvm/hyp-init.S +++ b/arch/arm64/kvm/hyp-init.S @@ -65,6 +65,9 @@ __do_hyp_init: b.lo __kvm_handle_stub_hvc phys_to_ttbr x4, x0 +alternative_if ARM64_HAS_CNP + orr x4, x4, #TTBR_CNP_BIT +alternative_else_nop_endif msr ttbr0_el2, x4 mrs x4, tcr_el1 diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c index 9ce223944983..76d016b446b2 100644 --- a/arch/arm64/kvm/hyp/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/sysreg-sr.c @@ -288,3 +288,14 @@ void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu) vcpu->arch.sysregs_loaded_on_cpu = false; } + +void __hyp_text __kvm_enable_ssbs(void) +{ + u64 tmp; + + asm volatile( + "mrs %0, sctlr_el2\n" + "orr %0, %0, %1\n" + "msr sctlr_el2, %0" + : "=&r" (tmp) : "L" (SCTLR_ELx_DSSBS)); +} diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile index 68755fd70dcf..69ff9887f724 100644 --- a/arch/arm64/lib/Makefile +++ b/arch/arm64/lib/Makefile @@ -12,7 +12,7 @@ lib-y := clear_user.o delay.o copy_from_user.o \ # when supported by the CPU. Result and argument registers are handled # correctly, based on the function prototype. lib-$(CONFIG_ARM64_LSE_ATOMICS) += atomic_ll_sc.o -CFLAGS_atomic_ll_sc.o := -fcall-used-x0 -ffixed-x1 -ffixed-x2 \ +CFLAGS_atomic_ll_sc.o := -ffixed-x1 -ffixed-x2 \ -ffixed-x3 -ffixed-x4 -ffixed-x5 -ffixed-x6 \ -ffixed-x7 -fcall-saved-x8 -fcall-saved-x9 \ -fcall-saved-x10 -fcall-saved-x11 -fcall-saved-x12 \ @@ -25,3 +25,5 @@ KCOV_INSTRUMENT_atomic_ll_sc.o := n UBSAN_SANITIZE_atomic_ll_sc.o := n lib-$(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) += uaccess_flushcache.o + +obj-$(CONFIG_CRC32) += crc32.o diff --git a/arch/arm64/lib/crc32.S b/arch/arm64/lib/crc32.S new file mode 100644 index 000000000000..5bc1e85b4e1c --- /dev/null +++ b/arch/arm64/lib/crc32.S @@ -0,0 +1,60 @@ +/* + * Accelerated CRC32(C) using AArch64 CRC instructions + * + * Copyright (C) 2016 - 2018 Linaro Ltd <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/linkage.h> +#include <asm/alternative.h> +#include <asm/assembler.h> + + .cpu generic+crc + + .macro __crc32, c +0: subs x2, x2, #16 + b.mi 8f + ldp x3, x4, [x1], #16 +CPU_BE( rev x3, x3 ) +CPU_BE( rev x4, x4 ) + crc32\c\()x w0, w0, x3 + crc32\c\()x w0, w0, x4 + b.ne 0b + ret + +8: tbz x2, #3, 4f + ldr x3, [x1], #8 +CPU_BE( rev x3, x3 ) + crc32\c\()x w0, w0, x3 +4: tbz x2, #2, 2f + ldr w3, [x1], #4 +CPU_BE( rev w3, w3 ) + crc32\c\()w w0, w0, w3 +2: tbz x2, #1, 1f + ldrh w3, [x1], #2 +CPU_BE( rev16 w3, w3 ) + crc32\c\()h w0, w0, w3 +1: tbz x2, #0, 0f + ldrb w3, [x1] + crc32\c\()b w0, w0, w3 +0: ret + .endm + + .align 5 +ENTRY(crc32_le) +alternative_if_not ARM64_HAS_CRC32 + b crc32_le_base +alternative_else_nop_endif + __crc32 +ENDPROC(crc32_le) + + .align 5 +ENTRY(__crc32c_le) +alternative_if_not ARM64_HAS_CRC32 + b __crc32c_le_base +alternative_else_nop_endif + __crc32 c +ENDPROC(__crc32c_le) diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index c127f94da8e2..1f0ea2facf24 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -88,7 +88,7 @@ void verify_cpu_asid_bits(void) } } -static void flush_context(unsigned int cpu) +static void flush_context(void) { int i; u64 asid; @@ -142,7 +142,7 @@ static bool check_update_reserved_asid(u64 asid, u64 newasid) return hit; } -static u64 new_context(struct mm_struct *mm, unsigned int cpu) +static u64 new_context(struct mm_struct *mm) { static u32 cur_idx = 1; u64 asid = atomic64_read(&mm->context.id); @@ -180,7 +180,7 @@ static u64 new_context(struct mm_struct *mm, unsigned int cpu) /* We're out of ASIDs, so increment the global generation count */ generation = atomic64_add_return_relaxed(ASID_FIRST_VERSION, &asid_generation); - flush_context(cpu); + flush_context(); /* We have more ASIDs than CPUs, so this will always succeed */ asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1); @@ -196,6 +196,9 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu) unsigned long flags; u64 asid, old_active_asid; + if (system_supports_cnp()) + cpu_set_reserved_ttbr0(); + asid = atomic64_read(&mm->context.id); /* @@ -223,7 +226,7 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu) /* Check that our ASID belongs to the current generation. */ asid = atomic64_read(&mm->context.id); if ((asid ^ atomic64_read(&asid_generation)) >> asid_bits) { - asid = new_context(mm, cpu); + asid = new_context(mm); atomic64_set(&mm->context.id, asid); } diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c index 65dfc8571bf8..fcb1f2a6d7c6 100644 --- a/arch/arm64/mm/dump.c +++ b/arch/arm64/mm/dump.c @@ -36,8 +36,8 @@ static const struct addr_marker address_markers[] = { #endif { MODULES_VADDR, "Modules start" }, { MODULES_END, "Modules end" }, - { VMALLOC_START, "vmalloc() Area" }, - { VMALLOC_END, "vmalloc() End" }, + { VMALLOC_START, "vmalloc() area" }, + { VMALLOC_END, "vmalloc() end" }, { FIXADDR_START, "Fixmap start" }, { FIXADDR_TOP, "Fixmap end" }, { PCI_IO_START, "PCI I/O start" }, @@ -46,7 +46,7 @@ static const struct addr_marker address_markers[] = { { VMEMMAP_START, "vmemmap start" }, { VMEMMAP_START + VMEMMAP_SIZE, "vmemmap end" }, #endif - { PAGE_OFFSET, "Linear Mapping" }, + { PAGE_OFFSET, "Linear mapping" }, { -1, NULL }, }; diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 50b30ff30de4..d0e638ef3af6 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -37,6 +37,7 @@ #include <asm/cmpxchg.h> #include <asm/cpufeature.h> #include <asm/exception.h> +#include <asm/daifflags.h> #include <asm/debug-monitors.h> #include <asm/esr.h> #include <asm/sysreg.h> @@ -56,10 +57,16 @@ struct fault_info { }; static const struct fault_info fault_info[]; +static struct fault_info debug_fault_info[]; static inline const struct fault_info *esr_to_fault_info(unsigned int esr) { - return fault_info + (esr & 63); + return fault_info + (esr & ESR_ELx_FSC); +} + +static inline const struct fault_info *esr_to_debug_fault_info(unsigned int esr) +{ + return debug_fault_info + DBG_ESR_EVT(esr); } #ifdef CONFIG_KPROBES @@ -235,9 +242,8 @@ static bool is_el1_instruction_abort(unsigned int esr) return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_CUR; } -static inline bool is_el1_permission_fault(unsigned int esr, - struct pt_regs *regs, - unsigned long addr) +static inline bool is_el1_permission_fault(unsigned long addr, unsigned int esr, + struct pt_regs *regs) { unsigned int ec = ESR_ELx_EC(esr); unsigned int fsc_type = esr & ESR_ELx_FSC_TYPE; @@ -283,7 +289,7 @@ static void __do_kernel_fault(unsigned long addr, unsigned int esr, if (!is_el1_instruction_abort(esr) && fixup_exception(regs)) return; - if (is_el1_permission_fault(esr, regs, addr)) { + if (is_el1_permission_fault(addr, esr, regs)) { if (esr & ESR_ELx_WNR) msg = "write to read-only memory"; else @@ -454,7 +460,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, mm_flags |= FAULT_FLAG_WRITE; } - if (addr < TASK_SIZE && is_el1_permission_fault(esr, regs, addr)) { + if (addr < TASK_SIZE && is_el1_permission_fault(addr, esr, regs)) { /* regs->orig_addr_limit may be 0 if we entered from EL0 */ if (regs->orig_addr_limit == KERNEL_DS) die_kernel_fault("access to user memory with fs=KERNEL_DS", @@ -771,7 +777,7 @@ asmlinkage void __exception do_el0_ia_bp_hardening(unsigned long addr, if (addr > TASK_SIZE) arm64_apply_bp_hardening(); - local_irq_enable(); + local_daif_restore(DAIF_PROCCTX); do_mem_abort(addr, esr, regs); } @@ -785,7 +791,7 @@ asmlinkage void __exception do_sp_pc_abort(unsigned long addr, if (user_mode(regs)) { if (instruction_pointer(regs) > TASK_SIZE) arm64_apply_bp_hardening(); - local_irq_enable(); + local_daif_restore(DAIF_PROCCTX); } clear_siginfo(&info); @@ -831,7 +837,7 @@ asmlinkage int __exception do_debug_exception(unsigned long addr, unsigned int esr, struct pt_regs *regs) { - const struct fault_info *inf = debug_fault_info + DBG_ESR_EVT(esr); + const struct fault_info *inf = esr_to_debug_fault_info(esr); int rv; /* @@ -864,17 +870,3 @@ asmlinkage int __exception do_debug_exception(unsigned long addr, return rv; } NOKPROBE_SYMBOL(do_debug_exception); - -#ifdef CONFIG_ARM64_PAN -void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused) -{ - /* - * We modify PSTATE. This won't work from irq context as the PSTATE - * is discarded once we return from the exception. - */ - WARN_ON_ONCE(in_interrupt()); - - sysreg_clear_set(sctlr_el1, SCTLR_EL1_SPAN, 0); - asm(SET_PSTATE_PAN(1)); -} -#endif /* CONFIG_ARM64_PAN */ diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 787e27964ab9..3cf87341859f 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -284,7 +284,6 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) #endif /* CONFIG_NUMA */ -#ifdef CONFIG_HAVE_ARCH_PFN_VALID int pfn_valid(unsigned long pfn) { phys_addr_t addr = pfn << PAGE_SHIFT; @@ -294,7 +293,6 @@ int pfn_valid(unsigned long pfn) return memblock_is_map_memory(addr); } EXPORT_SYMBOL(pfn_valid); -#endif #ifndef CONFIG_SPARSEMEM static void __init arm64_memory_present(void) diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index 12145874c02b..fccb1a6f8c6f 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c @@ -192,7 +192,7 @@ void __init kasan_init(void) /* * We are going to perform proper setup of shadow memory. - * At first we should unmap early shadow (clear_pgds() call bellow). + * At first we should unmap early shadow (clear_pgds() call below). * However, instrumented code couldn't execute without shadow memory. * tmp_pg_dir used to keep early shadow mapped until full shadow * setup will be finished. diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 8080c9f489c3..9498c15b847b 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -67,6 +67,24 @@ static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss; static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss __maybe_unused; static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss __maybe_unused; +static DEFINE_SPINLOCK(swapper_pgdir_lock); + +void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd) +{ + pgd_t *fixmap_pgdp; + + spin_lock(&swapper_pgdir_lock); + fixmap_pgdp = pgd_set_fixmap(__pa_symbol(pgdp)); + WRITE_ONCE(*fixmap_pgdp, pgd); + /* + * We need dsb(ishst) here to ensure the page-table-walker sees + * our new entry before set_p?d() returns. The fixmap's + * flush_tlb_kernel_range() via clear_fixmap() does this for us. + */ + pgd_clear_fixmap(); + spin_unlock(&swapper_pgdir_lock); +} + pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, unsigned long size, pgprot_t vma_prot) { @@ -629,34 +647,18 @@ static void __init map_kernel(pgd_t *pgdp) */ void __init paging_init(void) { - phys_addr_t pgd_phys = early_pgtable_alloc(); - pgd_t *pgdp = pgd_set_fixmap(pgd_phys); + pgd_t *pgdp = pgd_set_fixmap(__pa_symbol(swapper_pg_dir)); map_kernel(pgdp); map_mem(pgdp); - /* - * We want to reuse the original swapper_pg_dir so we don't have to - * communicate the new address to non-coherent secondaries in - * secondary_entry, and so cpu_switch_mm can generate the address with - * adrp+add rather than a load from some global variable. - * - * To do this we need to go via a temporary pgd. - */ - cpu_replace_ttbr1(__va(pgd_phys)); - memcpy(swapper_pg_dir, pgdp, PGD_SIZE); - cpu_replace_ttbr1(lm_alias(swapper_pg_dir)); - pgd_clear_fixmap(); - memblock_free(pgd_phys, PAGE_SIZE); - /* - * We only reuse the PGD from the swapper_pg_dir, not the pud + pmd - * allocated with it. - */ - memblock_free(__pa_symbol(swapper_pg_dir) + PAGE_SIZE, - __pa_symbol(swapper_pg_end) - __pa_symbol(swapper_pg_dir) - - PAGE_SIZE); + cpu_replace_ttbr1(lm_alias(swapper_pg_dir)); + init_mm.pgd = swapper_pg_dir; + + memblock_free(__pa_symbol(init_pg_dir), + __pa_symbol(init_pg_end) - __pa_symbol(init_pg_dir)); } /* diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c index 146c04ceaa51..d7b66fc5e1c5 100644 --- a/arch/arm64/mm/numa.c +++ b/arch/arm64/mm/numa.c @@ -391,7 +391,6 @@ static int __init numa_init(int (*init_func)(void)) nodes_clear(numa_nodes_parsed); nodes_clear(node_possible_map); nodes_clear(node_online_map); - numa_free_distance(); ret = numa_alloc_distance(); if (ret < 0) @@ -399,20 +398,24 @@ static int __init numa_init(int (*init_func)(void)) ret = init_func(); if (ret < 0) - return ret; + goto out_free_distance; if (nodes_empty(numa_nodes_parsed)) { pr_info("No NUMA configuration found\n"); - return -EINVAL; + ret = -EINVAL; + goto out_free_distance; } ret = numa_register_nodes(); if (ret < 0) - return ret; + goto out_free_distance; setup_node_to_cpumask_map(); return 0; +out_free_distance: + numa_free_distance(); + return ret; } /** @@ -432,7 +435,7 @@ static int __init dummy_numa_init(void) if (numa_off) pr_info("NUMA disabled\n"); /* Forced off on command line. */ pr_info("Faking a node at [mem %#018Lx-%#018Lx]\n", - 0LLU, PFN_PHYS(max_pfn) - 1); + memblock_start_of_DRAM(), memblock_end_of_DRAM() - 1); for_each_memblock(memory, mblk) { ret = numa_add_memblk(0, mblk->base, mblk->base + mblk->size); diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 03646e6a2ef4..2c75b0b903ae 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -160,6 +160,12 @@ ENTRY(cpu_do_switch_mm) mrs x2, ttbr1_el1 mmid x1, x1 // get mm->context.id phys_to_ttbr x3, x0 + +alternative_if ARM64_HAS_CNP + cbz x1, 1f // skip CNP for reserved ASID + orr x3, x3, #TTBR_CNP_BIT +1: +alternative_else_nop_endif #ifdef CONFIG_ARM64_SW_TTBR0_PAN bfi x3, x1, #48, #16 // set the ASID field in TTBR0 #endif @@ -184,7 +190,7 @@ ENDPROC(cpu_do_switch_mm) .endm /* - * void idmap_cpu_replace_ttbr1(phys_addr_t new_pgd) + * void idmap_cpu_replace_ttbr1(phys_addr_t ttbr1) * * This is the low-level counterpart to cpu_replace_ttbr1, and should not be * called by anything else. It can only be executed from a TTBR0 mapping. @@ -194,8 +200,7 @@ ENTRY(idmap_cpu_replace_ttbr1) __idmap_cpu_set_reserved_ttbr1 x1, x3 - phys_to_ttbr x3, x0 - msr ttbr1_el1, x3 + msr ttbr1_el1, x0 isb restore_daif x2 diff --git a/arch/c6x/Kconfig b/arch/c6x/Kconfig index a641b0bf1611..f65a084607fd 100644 --- a/arch/c6x/Kconfig +++ b/arch/c6x/Kconfig @@ -9,7 +9,7 @@ config C6X select ARCH_HAS_SYNC_DMA_FOR_CPU select ARCH_HAS_SYNC_DMA_FOR_DEVICE select CLKDEV_LOOKUP - select DMA_NONCOHERENT_OPS + select DMA_DIRECT_OPS select GENERIC_ATOMIC64 select GENERIC_IRQ_SHOW select HAVE_ARCH_TRACEHOOK diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig index 89a4b22f34d9..3ef46522e89f 100644 --- a/arch/hexagon/Kconfig +++ b/arch/hexagon/Kconfig @@ -4,6 +4,7 @@ comment "Linux Kernel Configuration for Hexagon" config HEXAGON def_bool y + select ARCH_HAS_SYNC_DMA_FOR_DEVICE select ARCH_NO_PREEMPT select HAVE_OPROFILE # Other pending projects/to-do items. @@ -29,6 +30,7 @@ config HEXAGON select GENERIC_CLOCKEVENTS_BROADCAST select MODULES_USE_ELF_RELA select GENERIC_CPU_DEVICES + select DMA_DIRECT_OPS ---help--- Qualcomm Hexagon is a processor architecture designed for high performance and low power across a wide variety of applications. diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild index dd2fd9c0d292..47c4da3d64a4 100644 --- a/arch/hexagon/include/asm/Kbuild +++ b/arch/hexagon/include/asm/Kbuild @@ -6,6 +6,7 @@ generic-y += compat.h generic-y += current.h generic-y += device.h generic-y += div64.h +generic-y += dma-mapping.h generic-y += emergency-restart.h generic-y += extable.h generic-y += fb.h diff --git a/arch/hexagon/include/asm/dma-mapping.h b/arch/hexagon/include/asm/dma-mapping.h deleted file mode 100644 index 263f6acbfb0f..000000000000 --- a/arch/hexagon/include/asm/dma-mapping.h +++ /dev/null @@ -1,40 +0,0 @@ -/* - * DMA operations for the Hexagon architecture - * - * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 and - * only version 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA. - */ - -#ifndef _ASM_DMA_MAPPING_H -#define _ASM_DMA_MAPPING_H - -#include <linux/types.h> -#include <linux/cache.h> -#include <linux/mm.h> -#include <linux/scatterlist.h> -#include <linux/dma-debug.h> -#include <asm/io.h> - -struct device; - -extern const struct dma_map_ops *dma_ops; - -static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) -{ - return dma_ops; -} - -#endif diff --git a/arch/hexagon/kernel/dma.c b/arch/hexagon/kernel/dma.c index 7ebe7ad19d15..706699374444 100644 --- a/arch/hexagon/kernel/dma.c +++ b/arch/hexagon/kernel/dma.c @@ -18,32 +18,19 @@ * 02110-1301, USA. */ -#include <linux/dma-mapping.h> -#include <linux/dma-direct.h> +#include <linux/dma-noncoherent.h> #include <linux/bootmem.h> #include <linux/genalloc.h> -#include <asm/dma-mapping.h> #include <linux/module.h> #include <asm/page.h> -#define HEXAGON_MAPPING_ERROR 0 - -const struct dma_map_ops *dma_ops; -EXPORT_SYMBOL(dma_ops); - -static inline void *dma_addr_to_virt(dma_addr_t dma_addr) -{ - return phys_to_virt((unsigned long) dma_addr); -} - static struct gen_pool *coherent_pool; /* Allocates from a pool of uncached memory that was reserved at boot time */ -static void *hexagon_dma_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_addr, gfp_t flag, - unsigned long attrs) +void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_addr, + gfp_t flag, unsigned long attrs) { void *ret; @@ -75,58 +62,17 @@ static void *hexagon_dma_alloc_coherent(struct device *dev, size_t size, return ret; } -static void hexagon_free_coherent(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_addr, unsigned long attrs) +void arch_dma_free(struct device *dev, size_t size, void *vaddr, + dma_addr_t dma_addr, unsigned long attrs) { gen_pool_free(coherent_pool, (unsigned long) vaddr, size); } -static int check_addr(const char *name, struct device *hwdev, - dma_addr_t bus, size_t size) -{ - if (hwdev && hwdev->dma_mask && !dma_capable(hwdev, bus, size)) { - if (*hwdev->dma_mask >= DMA_BIT_MASK(32)) - printk(KERN_ERR - "%s: overflow %Lx+%zu of device mask %Lx\n", - name, (long long)bus, size, - (long long)*hwdev->dma_mask); - return 0; - } - return 1; -} - -static int hexagon_map_sg(struct device *hwdev, struct scatterlist *sg, - int nents, enum dma_data_direction dir, - unsigned long attrs) +void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr, + size_t size, enum dma_data_direction dir) { - struct scatterlist *s; - int i; - - WARN_ON(nents == 0 || sg[0].length == 0); - - for_each_sg(sg, s, nents, i) { - s->dma_address = sg_phys(s); - if (!check_addr("map_sg", hwdev, s->dma_address, s->length)) - return 0; - - s->dma_length = s->length; + void *addr = phys_to_virt(paddr); - if (attrs & DMA_ATTR_SKIP_CPU_SYNC) - continue; - - flush_dcache_range(dma_addr_to_virt(s->dma_address), - dma_addr_to_virt(s->dma_address + s->length)); - } - - return nents; -} - -/* - * address is virtual - */ -static inline void dma_sync(void *addr, size_t size, - enum dma_data_direction dir) -{ switch (dir) { case DMA_TO_DEVICE: hexagon_clean_dcache_range((unsigned long) addr, @@ -144,76 +90,3 @@ static inline void dma_sync(void *addr, size_t size, BUG(); } } - -/** - * hexagon_map_page() - maps an address for device DMA - * @dev: pointer to DMA device - * @page: pointer to page struct of DMA memory - * @offset: offset within page - * @size: size of memory to map - * @dir: transfer direction - * @attrs: pointer to DMA attrs (not used) - * - * Called to map a memory address to a DMA address prior - * to accesses to/from device. - * - * We don't particularly have many hoops to jump through - * so far. Straight translation between phys and virtual. - * - * DMA is not cache coherent so sync is necessary; this - * seems to be a convenient place to do it. - * - */ -static dma_addr_t hexagon_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction dir, - unsigned long attrs) -{ - dma_addr_t bus = page_to_phys(page) + offset; - WARN_ON(size == 0); - - if (!check_addr("map_single", dev, bus, size)) - return HEXAGON_MAPPING_ERROR; - - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - dma_sync(dma_addr_to_virt(bus), size, dir); - - return bus; -} - -static void hexagon_sync_single_for_cpu(struct device *dev, - dma_addr_t dma_handle, size_t size, - enum dma_data_direction dir) -{ - dma_sync(dma_addr_to_virt(dma_handle), size, dir); -} - -static void hexagon_sync_single_for_device(struct device *dev, - dma_addr_t dma_handle, size_t size, - enum dma_data_direction dir) -{ - dma_sync(dma_addr_to_virt(dma_handle), size, dir); -} - -static int hexagon_mapping_error(struct device *dev, dma_addr_t dma_addr) -{ - return dma_addr == HEXAGON_MAPPING_ERROR; -} - -const struct dma_map_ops hexagon_dma_ops = { - .alloc = hexagon_dma_alloc_coherent, - .free = hexagon_free_coherent, - .map_sg = hexagon_map_sg, - .map_page = hexagon_map_page, - .sync_single_for_cpu = hexagon_sync_single_for_cpu, - .sync_single_for_device = hexagon_sync_single_for_device, - .mapping_error = hexagon_mapping_error, -}; - -void __init hexagon_dma_init(void) -{ - if (dma_ops) - return; - - dma_ops = &hexagon_dma_ops; -} diff --git a/arch/ia64/include/asm/dma-mapping.h b/arch/ia64/include/asm/dma-mapping.h index 76e4d6632d68..522745ae67bb 100644 --- a/arch/ia64/include/asm/dma-mapping.h +++ b/arch/ia64/include/asm/dma-mapping.h @@ -10,8 +10,6 @@ #include <linux/scatterlist.h> #include <linux/dma-debug.h> -#define ARCH_HAS_DMA_GET_REQUIRED_MASK - extern const struct dma_map_ops *dma_ops; extern struct ia64_machine_vector ia64_mv; extern void set_iommu_machvec(void); diff --git a/arch/ia64/include/asm/machvec.h b/arch/ia64/include/asm/machvec.h index 267f4f170191..5133739966bc 100644 --- a/arch/ia64/include/asm/machvec.h +++ b/arch/ia64/include/asm/machvec.h @@ -44,7 +44,6 @@ typedef void ia64_mv_kernel_launch_event_t(void); /* DMA-mapping interface: */ typedef void ia64_mv_dma_init (void); -typedef u64 ia64_mv_dma_get_required_mask (struct device *); typedef const struct dma_map_ops *ia64_mv_dma_get_ops(struct device *); /* @@ -127,7 +126,6 @@ extern void machvec_tlb_migrate_finish (struct mm_struct *); # define platform_global_tlb_purge ia64_mv.global_tlb_purge # define platform_tlb_migrate_finish ia64_mv.tlb_migrate_finish # define platform_dma_init ia64_mv.dma_init -# define platform_dma_get_required_mask ia64_mv.dma_get_required_mask # define platform_dma_get_ops ia64_mv.dma_get_ops # define platform_irq_to_vector ia64_mv.irq_to_vector # define platform_local_vector_to_irq ia64_mv.local_vector_to_irq @@ -171,7 +169,6 @@ struct ia64_machine_vector { ia64_mv_global_tlb_purge_t *global_tlb_purge; ia64_mv_tlb_migrate_finish_t *tlb_migrate_finish; ia64_mv_dma_init *dma_init; - ia64_mv_dma_get_required_mask *dma_get_required_mask; ia64_mv_dma_get_ops *dma_get_ops; ia64_mv_irq_to_vector *irq_to_vector; ia64_mv_local_vector_to_irq *local_vector_to_irq; @@ -211,7 +208,6 @@ struct ia64_machine_vector { platform_global_tlb_purge, \ platform_tlb_migrate_finish, \ platform_dma_init, \ - platform_dma_get_required_mask, \ platform_dma_get_ops, \ platform_irq_to_vector, \ platform_local_vector_to_irq, \ @@ -286,9 +282,6 @@ extern const struct dma_map_ops *dma_get_ops(struct device *); #ifndef platform_dma_get_ops # define platform_dma_get_ops dma_get_ops #endif -#ifndef platform_dma_get_required_mask -# define platform_dma_get_required_mask ia64_dma_get_required_mask -#endif #ifndef platform_irq_to_vector # define platform_irq_to_vector __ia64_irq_to_vector #endif diff --git a/arch/ia64/include/asm/machvec_init.h b/arch/ia64/include/asm/machvec_init.h index 2b32fd06b7c6..2aafb69a3787 100644 --- a/arch/ia64/include/asm/machvec_init.h +++ b/arch/ia64/include/asm/machvec_init.h @@ -4,7 +4,6 @@ extern ia64_mv_send_ipi_t ia64_send_ipi; extern ia64_mv_global_tlb_purge_t ia64_global_tlb_purge; -extern ia64_mv_dma_get_required_mask ia64_dma_get_required_mask; extern ia64_mv_irq_to_vector __ia64_irq_to_vector; extern ia64_mv_local_vector_to_irq __ia64_local_vector_to_irq; extern ia64_mv_pci_get_legacy_mem_t ia64_pci_get_legacy_mem; diff --git a/arch/ia64/include/asm/machvec_sn2.h b/arch/ia64/include/asm/machvec_sn2.h index ece9fa85be88..b5153d300289 100644 --- a/arch/ia64/include/asm/machvec_sn2.h +++ b/arch/ia64/include/asm/machvec_sn2.h @@ -55,7 +55,6 @@ extern ia64_mv_readb_t __sn_readb_relaxed; extern ia64_mv_readw_t __sn_readw_relaxed; extern ia64_mv_readl_t __sn_readl_relaxed; extern ia64_mv_readq_t __sn_readq_relaxed; -extern ia64_mv_dma_get_required_mask sn_dma_get_required_mask; extern ia64_mv_dma_init sn_dma_init; extern ia64_mv_migrate_t sn_migrate; extern ia64_mv_kernel_launch_event_t sn_kernel_launch_event; @@ -100,7 +99,6 @@ extern ia64_mv_pci_fixup_bus_t sn_pci_fixup_bus; #define platform_pci_get_legacy_mem sn_pci_get_legacy_mem #define platform_pci_legacy_read sn_pci_legacy_read #define platform_pci_legacy_write sn_pci_legacy_write -#define platform_dma_get_required_mask sn_dma_get_required_mask #define platform_dma_init sn_dma_init #define platform_migrate sn_migrate #define platform_kernel_launch_event sn_kernel_launch_event diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c index 7ccc64d5fe3e..5d71800df431 100644 --- a/arch/ia64/pci/pci.c +++ b/arch/ia64/pci/pci.c @@ -568,32 +568,6 @@ static void __init set_pci_dfl_cacheline_size(void) pci_dfl_cache_line_size = (1 << cci.pcci_line_size) / 4; } -u64 ia64_dma_get_required_mask(struct device *dev) -{ - u32 low_totalram = ((max_pfn - 1) << PAGE_SHIFT); - u32 high_totalram = ((max_pfn - 1) >> (32 - PAGE_SHIFT)); - u64 mask; - - if (!high_totalram) { - /* convert to mask just covering totalram */ - low_totalram = (1 << (fls(low_totalram) - 1)); - low_totalram += low_totalram - 1; - mask = low_totalram; - } else { - high_totalram = (1 << (fls(high_totalram) - 1)); - high_totalram += high_totalram - 1; - mask = (((u64)high_totalram) << 32) + 0xffffffff; - } - return mask; -} -EXPORT_SYMBOL_GPL(ia64_dma_get_required_mask); - -u64 dma_get_required_mask(struct device *dev) -{ - return platform_dma_get_required_mask(dev); -} -EXPORT_SYMBOL_GPL(dma_get_required_mask); - static int __init pcibios_init(void) { set_pci_dfl_cacheline_size(); diff --git a/arch/ia64/sn/pci/pci_dma.c b/arch/ia64/sn/pci/pci_dma.c index 74c934a997bb..96eb2567718a 100644 --- a/arch/ia64/sn/pci/pci_dma.c +++ b/arch/ia64/sn/pci/pci_dma.c @@ -344,11 +344,10 @@ static int sn_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) return 0; } -u64 sn_dma_get_required_mask(struct device *dev) +static u64 sn_dma_get_required_mask(struct device *dev) { return DMA_BIT_MASK(64); } -EXPORT_SYMBOL_GPL(sn_dma_get_required_mask); char *sn_pci_get_legacy_mem(struct pci_bus *bus) { @@ -473,6 +472,7 @@ static struct dma_map_ops sn_dma_ops = { .sync_sg_for_device = sn_dma_sync_sg_for_device, .mapping_error = sn_dma_mapping_error, .dma_supported = sn_dma_supported, + .get_required_mask = sn_dma_get_required_mask, }; void sn_dma_init(void) diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index 070553791e97..c7b2a8d60a41 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -26,7 +26,7 @@ config M68K select MODULES_USE_ELF_RELA select OLD_SIGSUSPEND3 select OLD_SIGACTION - select DMA_NONCOHERENT_OPS if HAS_DMA + select DMA_DIRECT_OPS if HAS_DMA select HAVE_MEMBLOCK select ARCH_DISCARD_MEMBLOCK select NO_BOOTMEM diff --git a/arch/m68k/emu/nfblock.c b/arch/m68k/emu/nfblock.c index e9110b9b8bcd..38049357d6d3 100644 --- a/arch/m68k/emu/nfblock.c +++ b/arch/m68k/emu/nfblock.c @@ -73,7 +73,7 @@ static blk_qc_t nfhd_make_request(struct request_queue *queue, struct bio *bio) len = bvec.bv_len; len >>= 9; nfhd_read_write(dev->id, 0, dir, sec >> shift, len >> shift, - bvec_to_phys(&bvec)); + page_to_phys(bvec.bv_page) + bvec.bv_offset); sec += len; } bio_endio(bio); diff --git a/arch/m68k/include/asm/atafd.h b/arch/m68k/include/asm/atafd.h deleted file mode 100644 index ad7014cad633..000000000000 --- a/arch/m68k/include/asm/atafd.h +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_M68K_FD_H -#define _ASM_M68K_FD_H - -/* Definitions for the Atari Floppy driver */ - -struct atari_format_descr { - int track; /* to be formatted */ - int head; /* "" "" */ - int sect_offset; /* offset of first sector */ -}; - -#endif diff --git a/arch/m68k/include/asm/atafdreg.h b/arch/m68k/include/asm/atafdreg.h deleted file mode 100644 index c31b4919ed2d..000000000000 --- a/arch/m68k/include/asm/atafdreg.h +++ /dev/null @@ -1,80 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_FDREG_H -#define _LINUX_FDREG_H - -/* -** WD1772 stuff - */ - -/* register codes */ - -#define FDCSELREG_STP (0x80) /* command/status register */ -#define FDCSELREG_TRA (0x82) /* track register */ -#define FDCSELREG_SEC (0x84) /* sector register */ -#define FDCSELREG_DTA (0x86) /* data register */ - -/* register names for FDC_READ/WRITE macros */ - -#define FDCREG_CMD 0 -#define FDCREG_STATUS 0 -#define FDCREG_TRACK 2 -#define FDCREG_SECTOR 4 -#define FDCREG_DATA 6 - -/* command opcodes */ - -#define FDCCMD_RESTORE (0x00) /* - */ -#define FDCCMD_SEEK (0x10) /* | */ -#define FDCCMD_STEP (0x20) /* | TYP 1 Commands */ -#define FDCCMD_STIN (0x40) /* | */ -#define FDCCMD_STOT (0x60) /* - */ -#define FDCCMD_RDSEC (0x80) /* - TYP 2 Commands */ -#define FDCCMD_WRSEC (0xa0) /* - " */ -#define FDCCMD_RDADR (0xc0) /* - */ -#define FDCCMD_RDTRA (0xe0) /* | TYP 3 Commands */ -#define FDCCMD_WRTRA (0xf0) /* - */ -#define FDCCMD_FORCI (0xd0) /* - TYP 4 Command */ - -/* command modifier bits */ - -#define FDCCMDADD_SR6 (0x00) /* step rate settings */ -#define FDCCMDADD_SR12 (0x01) -#define FDCCMDADD_SR2 (0x02) -#define FDCCMDADD_SR3 (0x03) -#define FDCCMDADD_V (0x04) /* verify */ -#define FDCCMDADD_H (0x08) /* wait for spin-up */ -#define FDCCMDADD_U (0x10) /* update track register */ -#define FDCCMDADD_M (0x10) /* multiple sector access */ -#define FDCCMDADD_E (0x04) /* head settling flag */ -#define FDCCMDADD_P (0x02) /* precompensation off */ -#define FDCCMDADD_A0 (0x01) /* DAM flag */ - -/* status register bits */ - -#define FDCSTAT_MOTORON (0x80) /* motor on */ -#define FDCSTAT_WPROT (0x40) /* write protected (FDCCMD_WR*) */ -#define FDCSTAT_SPINUP (0x20) /* motor speed stable (Type I) */ -#define FDCSTAT_DELDAM (0x20) /* sector has deleted DAM (Type II+III) */ -#define FDCSTAT_RECNF (0x10) /* record not found */ -#define FDCSTAT_CRC (0x08) /* CRC error */ -#define FDCSTAT_TR00 (0x04) /* Track 00 flag (Type I) */ -#define FDCSTAT_LOST (0x04) /* Lost Data (Type II+III) */ -#define FDCSTAT_IDX (0x02) /* Index status (Type I) */ -#define FDCSTAT_DRQ (0x02) /* DRQ status (Type II+III) */ -#define FDCSTAT_BUSY (0x01) /* FDC is busy */ - - -/* PSG Port A Bit Nr 0 .. Side Sel .. 0 -> Side 1 1 -> Side 2 */ -#define DSKSIDE (0x01) - -#define DSKDRVNONE (0x06) -#define DSKDRV0 (0x02) -#define DSKDRV1 (0x04) - -/* step rates */ -#define FDCSTEP_6 0x00 -#define FDCSTEP_12 0x01 -#define FDCSTEP_2 0x02 -#define FDCSTEP_3 0x03 - -#endif diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig index ace5c5bf1836..164a4857737a 100644 --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig @@ -1,6 +1,7 @@ config MICROBLAZE def_bool y select ARCH_NO_SWAP + select ARCH_HAS_DMA_COHERENT_TO_PFN if MMU select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_SYNC_DMA_FOR_CPU select ARCH_HAS_SYNC_DMA_FOR_DEVICE @@ -11,8 +12,7 @@ config MICROBLAZE select TIMER_OF select CLONE_BACKWARDS3 select COMMON_CLK - select DMA_NONCOHERENT_OPS - select DMA_NONCOHERENT_MMAP + select DMA_DIRECT_OPS select GENERIC_ATOMIC64 select GENERIC_CLOCKEVENTS select GENERIC_CPU_DEVICES diff --git a/arch/microblaze/include/asm/pgtable.h b/arch/microblaze/include/asm/pgtable.h index 7b650ab14fa0..f64ebb9c9a41 100644 --- a/arch/microblaze/include/asm/pgtable.h +++ b/arch/microblaze/include/asm/pgtable.h @@ -553,8 +553,6 @@ void __init *early_get_page(void); extern unsigned long ioremap_bot, ioremap_base; -unsigned long consistent_virt_to_pfn(void *vaddr); - void setup_memory(void); #endif /* __ASSEMBLY__ */ diff --git a/arch/microblaze/kernel/dma.c b/arch/microblaze/kernel/dma.c index 71032cf64669..a89c2d4ed5ff 100644 --- a/arch/microblaze/kernel/dma.c +++ b/arch/microblaze/kernel/dma.c @@ -42,25 +42,3 @@ void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr, { __dma_sync(dev, paddr, size, dir); } - -int arch_dma_mmap(struct device *dev, struct vm_area_struct *vma, - void *cpu_addr, dma_addr_t handle, size_t size, - unsigned long attrs) -{ -#ifdef CONFIG_MMU - unsigned long user_count = vma_pages(vma); - unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT; - unsigned long off = vma->vm_pgoff; - unsigned long pfn; - - if (off >= count || user_count > (count - off)) - return -ENXIO; - - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - pfn = consistent_virt_to_pfn(cpu_addr); - return remap_pfn_range(vma, vma->vm_start, pfn + off, - vma->vm_end - vma->vm_start, vma->vm_page_prot); -#else - return -ENXIO; -#endif -} diff --git a/arch/microblaze/mm/consistent.c b/arch/microblaze/mm/consistent.c index c9a278ac795a..d801cc5f5b95 100644 --- a/arch/microblaze/mm/consistent.c +++ b/arch/microblaze/mm/consistent.c @@ -165,7 +165,8 @@ static pte_t *consistent_virt_to_pte(void *vaddr) return pte_offset_kernel(pmd_offset(pgd_offset_k(addr), addr), addr); } -unsigned long consistent_virt_to_pfn(void *vaddr) +long arch_dma_coherent_to_pfn(struct device *dev, void *vaddr, + dma_addr_t dma_addr) { pte_t *ptep = consistent_virt_to_pte(vaddr); diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 35511999156a..77c022e56e6e 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -1106,21 +1106,22 @@ config ARCH_SUPPORTS_UPROBES bool config DMA_MAYBE_COHERENT + select ARCH_HAS_DMA_COHERENCE_H select DMA_NONCOHERENT bool config DMA_PERDEV_COHERENT bool - select DMA_MAYBE_COHERENT + select DMA_NONCOHERENT config DMA_NONCOHERENT bool + select ARCH_HAS_DMA_MMAP_PGPROT select ARCH_HAS_SYNC_DMA_FOR_DEVICE select ARCH_HAS_SYNC_DMA_FOR_CPU select NEED_DMA_MAP_STATE - select DMA_NONCOHERENT_MMAP + select ARCH_HAS_DMA_COHERENT_TO_PFN select DMA_NONCOHERENT_CACHE_SYNC - select DMA_NONCOHERENT_OPS config SYS_HAS_EARLY_PRINTK bool diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild index 58351e48421e..9a81e72119da 100644 --- a/arch/mips/include/asm/Kbuild +++ b/arch/mips/include/asm/Kbuild @@ -1,6 +1,7 @@ # MIPS headers generic-(CONFIG_GENERIC_CSUM) += checksum.h generic-y += current.h +generic-y += device.h generic-y += dma-contiguous.h generic-y += emergency-restart.h generic-y += export.h diff --git a/arch/mips/include/asm/device.h b/arch/mips/include/asm/device.h deleted file mode 100644 index 6aa796f1081a..000000000000 --- a/arch/mips/include/asm/device.h +++ /dev/null @@ -1,19 +0,0 @@ -/* - * Arch specific extensions to struct device - * - * This file is released under the GPLv2 - */ -#ifndef _ASM_MIPS_DEVICE_H -#define _ASM_MIPS_DEVICE_H - -struct dev_archdata { -#ifdef CONFIG_DMA_PERDEV_COHERENT - /* Non-zero if DMA is coherent with CPU caches */ - bool dma_coherent; -#endif -}; - -struct pdev_archdata { -}; - -#endif /* _ASM_MIPS_DEVICE_H*/ diff --git a/arch/mips/include/asm/dma-coherence.h b/arch/mips/include/asm/dma-coherence.h index 8eda48748ed5..5eaa1fcc878a 100644 --- a/arch/mips/include/asm/dma-coherence.h +++ b/arch/mips/include/asm/dma-coherence.h @@ -20,6 +20,12 @@ enum coherent_io_user_state { #elif defined(CONFIG_DMA_MAYBE_COHERENT) extern enum coherent_io_user_state coherentio; extern int hw_coherentio; + +static inline bool dev_is_dma_coherent(struct device *dev) +{ + return coherentio == IO_COHERENCE_ENABLED || + (coherentio == IO_COHERENCE_DEFAULT && hw_coherentio); +} #else #ifdef CONFIG_DMA_NONCOHERENT #define coherentio IO_COHERENCE_DISABLED diff --git a/arch/mips/include/asm/dma-mapping.h b/arch/mips/include/asm/dma-mapping.h index e81c4e97ff1a..b4c477eb46ce 100644 --- a/arch/mips/include/asm/dma-mapping.h +++ b/arch/mips/include/asm/dma-mapping.h @@ -12,8 +12,6 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) return &jazz_dma_ops; #elif defined(CONFIG_SWIOTLB) return &swiotlb_dma_ops; -#elif defined(CONFIG_DMA_NONCOHERENT_OPS) - return &dma_noncoherent_ops; #else return &dma_direct_ops; #endif @@ -25,7 +23,7 @@ static inline void arch_setup_dma_ops(struct device *dev, u64 dma_base, bool coherent) { #ifdef CONFIG_DMA_PERDEV_COHERENT - dev->archdata.dma_coherent = coherent; + dev->dma_coherent = coherent; #endif } diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h index b2fa62922d88..49d6046ca1d0 100644 --- a/arch/mips/include/asm/processor.h +++ b/arch/mips/include/asm/processor.h @@ -13,6 +13,7 @@ #include <linux/atomic.h> #include <linux/cpumask.h> +#include <linux/sizes.h> #include <linux/threads.h> #include <asm/cachectl.h> @@ -80,11 +81,10 @@ extern unsigned int vced_count, vcei_count; #endif -/* - * One page above the stack is used for branch delay slot "emulation". - * See dsemul.c for details. - */ -#define STACK_TOP ((TASK_SIZE & PAGE_MASK) - PAGE_SIZE) +#define VDSO_RANDOMIZE_SIZE (TASK_IS_32BIT_ADDR ? SZ_1M : SZ_256M) + +extern unsigned long mips_stack_top(void); +#define STACK_TOP mips_stack_top() /* * This decides where the kernel will search for a free chunk of vm diff --git a/arch/mips/jazz/jazzdma.c b/arch/mips/jazz/jazzdma.c index d31bc2f01208..0a0aaf39fd16 100644 --- a/arch/mips/jazz/jazzdma.c +++ b/arch/mips/jazz/jazzdma.c @@ -564,13 +564,13 @@ static void *jazz_dma_alloc(struct device *dev, size_t size, { void *ret; - ret = dma_direct_alloc(dev, size, dma_handle, gfp, attrs); + ret = dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs); if (!ret) return NULL; *dma_handle = vdma_alloc(virt_to_phys(ret), size); if (*dma_handle == VDMA_ERROR) { - dma_direct_free(dev, size, ret, *dma_handle, attrs); + dma_direct_free_pages(dev, size, ret, *dma_handle, attrs); return NULL; } @@ -587,7 +587,7 @@ static void jazz_dma_free(struct device *dev, size_t size, void *vaddr, vdma_free(dma_handle); if (!(attrs & DMA_ATTR_NON_CONSISTENT)) vaddr = (void *)CAC_ADDR((unsigned long)vaddr); - return dma_direct_free(dev, size, vaddr, dma_handle, attrs); + dma_direct_free_pages(dev, size, vaddr, dma_handle, attrs); } static dma_addr_t jazz_dma_map_page(struct device *dev, struct page *page, @@ -682,7 +682,6 @@ static int jazz_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) const struct dma_map_ops jazz_dma_ops = { .alloc = jazz_dma_alloc, .free = jazz_dma_free, - .mmap = arch_dma_mmap, .map_page = jazz_dma_map_page, .unmap_page = jazz_dma_unmap_page, .map_sg = jazz_dma_map_sg, diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index 8fc69891e117..d4f7fd4550e1 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -32,6 +32,7 @@ #include <linux/nmi.h> #include <linux/cpu.h> +#include <asm/abi.h> #include <asm/asm.h> #include <asm/bootinfo.h> #include <asm/cpu.h> @@ -39,6 +40,7 @@ #include <asm/dsp.h> #include <asm/fpu.h> #include <asm/irq.h> +#include <asm/mips-cps.h> #include <asm/msa.h> #include <asm/pgtable.h> #include <asm/mipsregs.h> @@ -645,6 +647,29 @@ out: return pc; } +unsigned long mips_stack_top(void) +{ + unsigned long top = TASK_SIZE & PAGE_MASK; + + /* One page for branch delay slot "emulation" */ + top -= PAGE_SIZE; + + /* Space for the VDSO, data page & GIC user page */ + top -= PAGE_ALIGN(current->thread.abi->vdso->size); + top -= PAGE_SIZE; + top -= mips_gic_present() ? PAGE_SIZE : 0; + + /* Space for cache colour alignment */ + if (cpu_has_dc_aliases) + top -= shm_align_mask + 1; + + /* Space to randomize the VDSO base */ + if (current->flags & PF_RANDOMIZE) + top -= VDSO_RANDOMIZE_SIZE; + + return top; +} + /* * Don't forget that the stack pointer must be aligned on a 8 bytes * boundary for 32-bits ABI and 16 bytes for 64-bits ABI. diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index c71d1eb7da59..e64b9e8bb002 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -846,6 +846,34 @@ static void __init arch_mem_init(char **cmdline_p) struct memblock_region *reg; extern void plat_mem_setup(void); + /* + * Initialize boot_command_line to an innocuous but non-empty string in + * order to prevent early_init_dt_scan_chosen() from copying + * CONFIG_CMDLINE into it without our knowledge. We handle + * CONFIG_CMDLINE ourselves below & don't want to duplicate its + * content because repeating arguments can be problematic. + */ + strlcpy(boot_command_line, " ", COMMAND_LINE_SIZE); + + /* call board setup routine */ + plat_mem_setup(); + + /* + * Make sure all kernel memory is in the maps. The "UP" and + * "DOWN" are opposite for initdata since if it crosses over + * into another memory section you don't want that to be + * freed when the initdata is freed. + */ + arch_mem_addpart(PFN_DOWN(__pa_symbol(&_text)) << PAGE_SHIFT, + PFN_UP(__pa_symbol(&_edata)) << PAGE_SHIFT, + BOOT_MEM_RAM); + arch_mem_addpart(PFN_UP(__pa_symbol(&__init_begin)) << PAGE_SHIFT, + PFN_DOWN(__pa_symbol(&__init_end)) << PAGE_SHIFT, + BOOT_MEM_INIT_RAM); + + pr_info("Determined physical RAM map:\n"); + print_memory_map(); + #if defined(CONFIG_CMDLINE_BOOL) && defined(CONFIG_CMDLINE_OVERRIDE) strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE); #else @@ -873,26 +901,6 @@ static void __init arch_mem_init(char **cmdline_p) } #endif #endif - - /* call board setup routine */ - plat_mem_setup(); - - /* - * Make sure all kernel memory is in the maps. The "UP" and - * "DOWN" are opposite for initdata since if it crosses over - * into another memory section you don't want that to be - * freed when the initdata is freed. - */ - arch_mem_addpart(PFN_DOWN(__pa_symbol(&_text)) << PAGE_SHIFT, - PFN_UP(__pa_symbol(&_edata)) << PAGE_SHIFT, - BOOT_MEM_RAM); - arch_mem_addpart(PFN_UP(__pa_symbol(&__init_begin)) << PAGE_SHIFT, - PFN_DOWN(__pa_symbol(&__init_end)) << PAGE_SHIFT, - BOOT_MEM_INIT_RAM); - - pr_info("Determined physical RAM map:\n"); - print_memory_map(); - strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); *cmdline_p = command_line; @@ -1067,7 +1075,7 @@ static int __init debugfs_mips(void) arch_initcall(debugfs_mips); #endif -#if defined(CONFIG_DMA_MAYBE_COHERENT) && !defined(CONFIG_DMA_PERDEV_COHERENT) +#ifdef CONFIG_DMA_MAYBE_COHERENT /* User defined DMA coherency from command line. */ enum coherent_io_user_state coherentio = IO_COHERENCE_DEFAULT; EXPORT_SYMBOL_GPL(coherentio); diff --git a/arch/mips/kernel/vdso.c b/arch/mips/kernel/vdso.c index 8f845f6e5f42..48a9c6b90e07 100644 --- a/arch/mips/kernel/vdso.c +++ b/arch/mips/kernel/vdso.c @@ -15,6 +15,7 @@ #include <linux/ioport.h> #include <linux/kernel.h> #include <linux/mm.h> +#include <linux/random.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/timekeeper_internal.h> @@ -97,6 +98,21 @@ void update_vsyscall_tz(void) } } +static unsigned long vdso_base(void) +{ + unsigned long base; + + /* Skip the delay slot emulation page */ + base = STACK_TOP + PAGE_SIZE; + + if (current->flags & PF_RANDOMIZE) { + base += get_random_int() & (VDSO_RANDOMIZE_SIZE - 1); + base = PAGE_ALIGN(base); + } + + return base; +} + int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { struct mips_vdso_image *image = current->thread.abi->vdso; @@ -137,7 +153,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) if (cpu_has_dc_aliases) size += shm_align_mask + 1; - base = get_unmapped_area(NULL, 0, size, 0, 0); + base = get_unmapped_area(NULL, vdso_base(), size, 0, 0); if (IS_ERR_VALUE(base)) { ret = base; goto out; diff --git a/arch/mips/lib/memset.S b/arch/mips/lib/memset.S index 3a6f34ef5ffc..069acec3df9f 100644 --- a/arch/mips/lib/memset.S +++ b/arch/mips/lib/memset.S @@ -280,9 +280,11 @@ * unset_bytes = end_addr - current_addr + 1 * a2 = t1 - a0 + 1 */ + .set reorder PTR_SUBU a2, t1, a0 + PTR_ADDIU a2, 1 jr ra - PTR_ADDIU a2, 1 + .set noreorder .endm diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c index a9ef057c79fe..05bd77727fb9 100644 --- a/arch/mips/mm/c-r4k.c +++ b/arch/mips/mm/c-r4k.c @@ -1955,22 +1955,21 @@ void r4k_cache_init(void) __flush_icache_user_range = r4k_flush_icache_user_range; __local_flush_icache_user_range = local_r4k_flush_icache_user_range; -#if defined(CONFIG_DMA_NONCOHERENT) || defined(CONFIG_DMA_MAYBE_COHERENT) -# if defined(CONFIG_DMA_PERDEV_COHERENT) - if (0) { -# else - if ((coherentio == IO_COHERENCE_ENABLED) || - ((coherentio == IO_COHERENCE_DEFAULT) && hw_coherentio)) { -# endif +#ifdef CONFIG_DMA_NONCOHERENT +#ifdef CONFIG_DMA_MAYBE_COHERENT + if (coherentio == IO_COHERENCE_ENABLED || + (coherentio == IO_COHERENCE_DEFAULT && hw_coherentio)) { _dma_cache_wback_inv = (void *)cache_noop; _dma_cache_wback = (void *)cache_noop; _dma_cache_inv = (void *)cache_noop; - } else { + } else +#endif /* CONFIG_DMA_MAYBE_COHERENT */ + { _dma_cache_wback_inv = r4k_dma_cache_wback_inv; _dma_cache_wback = r4k_dma_cache_wback_inv; _dma_cache_inv = r4k_dma_cache_inv; } -#endif +#endif /* CONFIG_DMA_NONCOHERENT */ build_clear_page(); build_copy_page(); diff --git a/arch/mips/mm/dma-noncoherent.c b/arch/mips/mm/dma-noncoherent.c index 2aca1236af36..e6c9485cadcf 100644 --- a/arch/mips/mm/dma-noncoherent.c +++ b/arch/mips/mm/dma-noncoherent.c @@ -14,26 +14,6 @@ #include <asm/dma-coherence.h> #include <asm/io.h> -#ifdef CONFIG_DMA_PERDEV_COHERENT -static inline int dev_is_coherent(struct device *dev) -{ - return dev->archdata.dma_coherent; -} -#else -static inline int dev_is_coherent(struct device *dev) -{ - switch (coherentio) { - default: - case IO_COHERENCE_DEFAULT: - return hw_coherentio; - case IO_COHERENCE_ENABLED: - return 1; - case IO_COHERENCE_DISABLED: - return 0; - } -} -#endif /* CONFIG_DMA_PERDEV_COHERENT */ - /* * The affected CPUs below in 'cpu_needs_post_dma_flush()' can speculatively * fill random cachelines with stale data at any time, requiring an extra @@ -49,9 +29,6 @@ static inline int dev_is_coherent(struct device *dev) */ static inline bool cpu_needs_post_dma_flush(struct device *dev) { - if (dev_is_coherent(dev)) - return false; - switch (boot_cpu_type()) { case CPU_R10000: case CPU_R12000: @@ -72,11 +49,8 @@ void *arch_dma_alloc(struct device *dev, size_t size, { void *ret; - ret = dma_direct_alloc(dev, size, dma_handle, gfp, attrs); - if (!ret) - return NULL; - - if (!dev_is_coherent(dev) && !(attrs & DMA_ATTR_NON_CONSISTENT)) { + ret = dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs); + if (!ret && !(attrs & DMA_ATTR_NON_CONSISTENT)) { dma_cache_wback_inv((unsigned long) ret, size); ret = (void *)UNCAC_ADDR(ret); } @@ -87,43 +61,24 @@ void *arch_dma_alloc(struct device *dev, size_t size, void arch_dma_free(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs) { - if (!(attrs & DMA_ATTR_NON_CONSISTENT) && !dev_is_coherent(dev)) + if (!(attrs & DMA_ATTR_NON_CONSISTENT)) cpu_addr = (void *)CAC_ADDR((unsigned long)cpu_addr); - dma_direct_free(dev, size, cpu_addr, dma_addr, attrs); + dma_direct_free_pages(dev, size, cpu_addr, dma_addr, attrs); } -int arch_dma_mmap(struct device *dev, struct vm_area_struct *vma, - void *cpu_addr, dma_addr_t dma_addr, size_t size, - unsigned long attrs) +long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr, + dma_addr_t dma_addr) { - unsigned long user_count = vma_pages(vma); - unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT; - unsigned long addr = (unsigned long)cpu_addr; - unsigned long off = vma->vm_pgoff; - unsigned long pfn; - int ret = -ENXIO; - - if (!dev_is_coherent(dev)) - addr = CAC_ADDR(addr); - - pfn = page_to_pfn(virt_to_page((void *)addr)); + unsigned long addr = CAC_ADDR((unsigned long)cpu_addr); + return page_to_pfn(virt_to_page((void *)addr)); +} +pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot, + unsigned long attrs) +{ if (attrs & DMA_ATTR_WRITE_COMBINE) - vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); - else - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - - if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret)) - return ret; - - if (off < count && user_count <= (count - off)) { - ret = remap_pfn_range(vma, vma->vm_start, - pfn + off, - user_count << PAGE_SHIFT, - vma->vm_page_prot); - } - - return ret; + return pgprot_writecombine(prot); + return pgprot_noncached(prot); } static inline void dma_sync_virt(void *addr, size_t size, @@ -187,8 +142,7 @@ static inline void dma_sync_phys(phys_addr_t paddr, size_t size, void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr, size_t size, enum dma_data_direction dir) { - if (!dev_is_coherent(dev)) - dma_sync_phys(paddr, size, dir); + dma_sync_phys(paddr, size, dir); } void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr, @@ -203,6 +157,5 @@ void arch_dma_cache_sync(struct device *dev, void *vaddr, size_t size, { BUG_ON(direction == DMA_NONE); - if (!dev_is_coherent(dev)) - dma_sync_virt(vaddr, size, direction); + dma_sync_virt(vaddr, size, direction); } diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig index 7068f341133d..56992330026a 100644 --- a/arch/nds32/Kconfig +++ b/arch/nds32/Kconfig @@ -11,7 +11,7 @@ config NDS32 select CLKSRC_MMIO select CLONE_BACKWARDS select COMMON_CLK - select DMA_NONCOHERENT_OPS + select DMA_DIRECT_OPS select GENERIC_ATOMIC64 select GENERIC_CPU_DEVICES select GENERIC_CLOCKEVENTS diff --git a/arch/nios2/Kconfig b/arch/nios2/Kconfig index f4ad1138e6b9..03965692fbfe 100644 --- a/arch/nios2/Kconfig +++ b/arch/nios2/Kconfig @@ -4,7 +4,7 @@ config NIOS2 select ARCH_HAS_SYNC_DMA_FOR_CPU select ARCH_HAS_SYNC_DMA_FOR_DEVICE select ARCH_NO_SWAP - select DMA_NONCOHERENT_OPS + select DMA_DIRECT_OPS select TIMER_OF select GENERIC_ATOMIC64 select GENERIC_CLOCKEVENTS diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig index e0081e734827..a655ae280637 100644 --- a/arch/openrisc/Kconfig +++ b/arch/openrisc/Kconfig @@ -7,7 +7,7 @@ config OPENRISC def_bool y select ARCH_HAS_SYNC_DMA_FOR_DEVICE - select DMA_NONCOHERENT_OPS + select DMA_DIRECT_OPS select OF select OF_EARLY_FLATTREE select IRQ_DOMAIN diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 8e6d83f79e72..f1cd12afd943 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -186,7 +186,7 @@ config PA11 depends on PA7000 || PA7100LC || PA7200 || PA7300LC select ARCH_HAS_SYNC_DMA_FOR_CPU select ARCH_HAS_SYNC_DMA_FOR_DEVICE - select DMA_NONCOHERENT_OPS + select DMA_DIRECT_OPS select DMA_NONCOHERENT_CACHE_SYNC config PREFETCH diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c index 4e87c35c22b7..755e89ec828a 100644 --- a/arch/parisc/kernel/setup.c +++ b/arch/parisc/kernel/setup.c @@ -102,7 +102,7 @@ void __init dma_ops_init(void) case pcxl: /* falls through */ case pcxs: case pcxt: - hppa_dma_ops = &dma_noncoherent_ops; + hppa_dma_ops = &dma_direct_ops; break; default: break; diff --git a/arch/parisc/kernel/unwind.c b/arch/parisc/kernel/unwind.c index f329b466e68f..2d14f17838d2 100644 --- a/arch/parisc/kernel/unwind.c +++ b/arch/parisc/kernel/unwind.c @@ -426,7 +426,7 @@ void unwind_frame_init_task(struct unwind_frame_info *info, r.gr[30] = get_parisc_stackpointer(); regs = &r; } - unwind_frame_init(info, task, &r); + unwind_frame_init(info, task, regs); } else { unwind_frame_init_from_blocked_task(info, task); } diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 2fdc865ca374..2a2486526d1f 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -114,7 +114,7 @@ */ #define _HPAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \ _PAGE_ACCESSED | H_PAGE_THP_HUGE | _PAGE_PTE | \ - _PAGE_SOFT_DIRTY) + _PAGE_SOFT_DIRTY | _PAGE_DEVMAP) /* * user access blocked by key */ @@ -132,7 +132,7 @@ */ #define _PAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \ _PAGE_ACCESSED | _PAGE_SPECIAL | _PAGE_PTE | \ - _PAGE_SOFT_DIRTY) + _PAGE_SOFT_DIRTY | _PAGE_DEVMAP) #define H_PTE_PKEY (H_PTE_PKEY_BIT0 | H_PTE_PKEY_BIT1 | H_PTE_PKEY_BIT2 | \ H_PTE_PKEY_BIT3 | H_PTE_PKEY_BIT4) diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index 3cae9168f63c..e44a8d7959f5 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -108,7 +108,8 @@ int sclp_early_get_core_info(struct sclp_core_info *info); void sclp_early_get_ipl_info(struct sclp_ipl_info *info); void sclp_early_detect(void); void sclp_early_printk(const char *s); -void __sclp_early_printk(const char *s, unsigned int len); +void sclp_early_printk_force(const char *s); +void __sclp_early_printk(const char *s, unsigned int len, unsigned int force); int _sclp_get_core_info(struct sclp_core_info *info); int sclp_core_configure(u8 core); diff --git a/arch/s390/kernel/early_printk.c b/arch/s390/kernel/early_printk.c index 9431784d7796..40c1dfec944e 100644 --- a/arch/s390/kernel/early_printk.c +++ b/arch/s390/kernel/early_printk.c @@ -10,7 +10,7 @@ static void sclp_early_write(struct console *con, const char *s, unsigned int len) { - __sclp_early_printk(s, len); + __sclp_early_printk(s, len, 0); } static struct console sclp_early_console = { diff --git a/arch/s390/kernel/swsusp.S b/arch/s390/kernel/swsusp.S index a049a7b9d6e8..c1a080b11ae9 100644 --- a/arch/s390/kernel/swsusp.S +++ b/arch/s390/kernel/swsusp.S @@ -198,12 +198,10 @@ pgm_check_entry: /* Suspend CPU not available -> panic */ larl %r15,init_thread_union - ahi %r15,1<<(PAGE_SHIFT+THREAD_SIZE_ORDER) + aghi %r15,1<<(PAGE_SHIFT+THREAD_SIZE_ORDER) + aghi %r15,-STACK_FRAME_OVERHEAD larl %r2,.Lpanic_string - lghi %r1,0 - sam31 - sigp %r1,%r0,SIGP_SET_ARCHITECTURE - brasl %r14,sclp_early_printk + brasl %r14,sclp_early_printk_force larl %r3,.Ldisabled_wait_31 lpsw 0(%r3) 4: diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 1fb7b6d72baf..475d786a65b0 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -7,6 +7,7 @@ config SUPERH select ARCH_NO_COHERENT_DMA_MMAP if !MMU select HAVE_PATA_PLATFORM select CLKDEV_LOOKUP + select DMA_DIRECT_OPS select HAVE_IDE if HAS_IOPORT_MAP select HAVE_MEMBLOCK select HAVE_MEMBLOCK_NODE_MAP @@ -158,13 +159,11 @@ config SWAP_IO_SPACE bool config DMA_COHERENT - select DMA_DIRECT_OPS bool config DMA_NONCOHERENT def_bool !DMA_COHERENT select ARCH_HAS_SYNC_DMA_FOR_DEVICE - select DMA_NONCOHERENT_OPS config PGTABLE_LEVELS default 3 if X2TLB diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index e6f2a38d2e61..7e2aa59fcc29 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -51,7 +51,7 @@ config SPARC config SPARC32 def_bool !64BIT select ARCH_HAS_SYNC_DMA_FOR_CPU - select DMA_NONCOHERENT_OPS + select DMA_DIRECT_OPS select GENERIC_ATOMIC64 select CLZ_TAB select HAVE_UID16 diff --git a/arch/sparc/include/asm/cpudata_64.h b/arch/sparc/include/asm/cpudata_64.h index 666d6b5c0440..9c3fc03abe9a 100644 --- a/arch/sparc/include/asm/cpudata_64.h +++ b/arch/sparc/include/asm/cpudata_64.h @@ -28,7 +28,7 @@ typedef struct { unsigned short sock_id; /* physical package */ unsigned short core_id; unsigned short max_cache_id; /* groupings of highest shared cache */ - unsigned short proc_id; /* strand (aka HW thread) id */ + signed short proc_id; /* strand (aka HW thread) id */ } cpuinfo_sparc; DECLARE_PER_CPU(cpuinfo_sparc, __cpu_data); diff --git a/arch/sparc/include/asm/dma-mapping.h b/arch/sparc/include/asm/dma-mapping.h index e17566376934..b0bb2fcaf1c9 100644 --- a/arch/sparc/include/asm/dma-mapping.h +++ b/arch/sparc/include/asm/dma-mapping.h @@ -14,11 +14,11 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) { #ifdef CONFIG_SPARC_LEON if (sparc_cpu_model == sparc_leon) - return &dma_noncoherent_ops; + return &dma_direct_ops; #endif #if defined(CONFIG_SPARC32) && defined(CONFIG_PCI) if (bus == &pci_bus_type) - return &dma_noncoherent_ops; + return &dma_direct_ops; #endif return dma_ops; } diff --git a/arch/sparc/include/uapi/asm/unistd.h b/arch/sparc/include/uapi/asm/unistd.h index 09acf0ddec10..45b4bf1875e6 100644 --- a/arch/sparc/include/uapi/asm/unistd.h +++ b/arch/sparc/include/uapi/asm/unistd.h @@ -427,8 +427,9 @@ #define __NR_preadv2 358 #define __NR_pwritev2 359 #define __NR_statx 360 +#define __NR_io_pgetevents 361 -#define NR_syscalls 361 +#define NR_syscalls 362 /* Bitmask values returned from kern_features system call. */ #define KERN_FEATURE_MIXED_MODE_STACK 0x00000001 diff --git a/arch/sparc/kernel/kgdb_32.c b/arch/sparc/kernel/kgdb_32.c index 5868fc333ea8..639c8e54530a 100644 --- a/arch/sparc/kernel/kgdb_32.c +++ b/arch/sparc/kernel/kgdb_32.c @@ -122,7 +122,7 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code, linux_regs->pc = addr; linux_regs->npc = addr + 4; } - /* fallthru */ + /* fall through */ case 'D': case 'k': diff --git a/arch/sparc/kernel/kgdb_64.c b/arch/sparc/kernel/kgdb_64.c index d5f7dc6323d5..a68bbddbdba4 100644 --- a/arch/sparc/kernel/kgdb_64.c +++ b/arch/sparc/kernel/kgdb_64.c @@ -148,7 +148,7 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code, linux_regs->tpc = addr; linux_regs->tnpc = addr + 4; } - /* fallthru */ + /* fall through */ case 'D': case 'k': diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index d3149baaa33c..67b3e6b3ce5d 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -24,6 +24,7 @@ #include <asm/cpudata.h> #include <linux/uaccess.h> #include <linux/atomic.h> +#include <linux/sched/clock.h> #include <asm/nmi.h> #include <asm/pcr.h> #include <asm/cacheflush.h> @@ -927,6 +928,8 @@ static void read_in_all_counters(struct cpu_hw_events *cpuc) sparc_perf_event_update(cp, &cp->hw, cpuc->current_idx[i]); cpuc->current_idx[i] = PIC_NO_INDEX; + if (cp->hw.state & PERF_HES_STOPPED) + cp->hw.state |= PERF_HES_ARCH; } } } @@ -959,10 +962,12 @@ static void calculate_single_pcr(struct cpu_hw_events *cpuc) enc = perf_event_get_enc(cpuc->events[i]); cpuc->pcr[0] &= ~mask_for_index(idx); - if (hwc->state & PERF_HES_STOPPED) + if (hwc->state & PERF_HES_ARCH) { cpuc->pcr[0] |= nop_for_index(idx); - else + } else { cpuc->pcr[0] |= event_encoding(enc, idx); + hwc->state = 0; + } } out: cpuc->pcr[0] |= cpuc->event[0]->hw.config_base; @@ -988,6 +993,9 @@ static void calculate_multiple_pcrs(struct cpu_hw_events *cpuc) cpuc->current_idx[i] = idx; + if (cp->hw.state & PERF_HES_ARCH) + continue; + sparc_pmu_start(cp, PERF_EF_RELOAD); } out: @@ -1079,6 +1087,8 @@ static void sparc_pmu_start(struct perf_event *event, int flags) event->hw.state = 0; sparc_pmu_enable_event(cpuc, &event->hw, idx); + + perf_event_update_userpage(event); } static void sparc_pmu_stop(struct perf_event *event, int flags) @@ -1371,9 +1381,9 @@ static int sparc_pmu_add(struct perf_event *event, int ef_flags) cpuc->events[n0] = event->hw.event_base; cpuc->current_idx[n0] = PIC_NO_INDEX; - event->hw.state = PERF_HES_UPTODATE; + event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; if (!(ef_flags & PERF_EF_START)) - event->hw.state |= PERF_HES_STOPPED; + event->hw.state |= PERF_HES_ARCH; /* * If group events scheduling transaction was started, @@ -1603,6 +1613,8 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self, struct perf_sample_data data; struct cpu_hw_events *cpuc; struct pt_regs *regs; + u64 finish_clock; + u64 start_clock; int i; if (!atomic_read(&active_events)) @@ -1616,6 +1628,8 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self, return NOTIFY_DONE; } + start_clock = sched_clock(); + regs = args->regs; cpuc = this_cpu_ptr(&cpu_hw_events); @@ -1654,6 +1668,10 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self, sparc_pmu_stop(event, 0); } + finish_clock = sched_clock(); + + perf_sample_event_took(finish_clock - start_clock); + return NOTIFY_STOP; } diff --git a/arch/sparc/kernel/rtrap_64.S b/arch/sparc/kernel/rtrap_64.S index f6528884a2c8..4073e2b87dd0 100644 --- a/arch/sparc/kernel/rtrap_64.S +++ b/arch/sparc/kernel/rtrap_64.S @@ -84,8 +84,9 @@ __handle_signal: ldx [%sp + PTREGS_OFF + PT_V9_TSTATE], %l1 sethi %hi(0xf << 20), %l4 and %l1, %l4, %l4 + andn %l1, %l4, %l1 ba,pt %xcc, __handle_preemption_continue - andn %l1, %l4, %l1 + srl %l4, 20, %l4 /* When returning from a NMI (%pil==15) interrupt we want to * avoid running softirqs, doing IRQ tracing, preempting, etc. diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S index 12bee14b552c..621a363098ec 100644 --- a/arch/sparc/kernel/systbls_32.S +++ b/arch/sparc/kernel/systbls_32.S @@ -90,4 +90,4 @@ sys_call_table: /*345*/ .long sys_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf /*350*/ .long sys_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen /*355*/ .long sys_setsockopt, sys_mlock2, sys_copy_file_range, sys_preadv2, sys_pwritev2 -/*360*/ .long sys_statx +/*360*/ .long sys_statx, sys_io_pgetevents diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S index 387ef993880a..bb68c805b891 100644 --- a/arch/sparc/kernel/systbls_64.S +++ b/arch/sparc/kernel/systbls_64.S @@ -91,7 +91,7 @@ sys_call_table32: .word sys_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf /*350*/ .word sys32_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen .word compat_sys_setsockopt, sys_mlock2, sys_copy_file_range, compat_sys_preadv2, compat_sys_pwritev2 -/*360*/ .word sys_statx +/*360*/ .word sys_statx, compat_sys_io_pgetevents #endif /* CONFIG_COMPAT */ @@ -173,4 +173,4 @@ sys_call_table: .word sys_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf /*350*/ .word sys64_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen .word sys_setsockopt, sys_mlock2, sys_copy_file_range, sys_preadv2, sys_pwritev2 -/*360*/ .word sys_statx +/*360*/ .word sys_statx, sys_io_pgetevents diff --git a/arch/sparc/kernel/viohs.c b/arch/sparc/kernel/viohs.c index 635d67ffc9a3..7db5aabe9708 100644 --- a/arch/sparc/kernel/viohs.c +++ b/arch/sparc/kernel/viohs.c @@ -180,11 +180,17 @@ static int send_dreg(struct vio_driver_state *vio) struct vio_dring_register pkt; char all[sizeof(struct vio_dring_register) + (sizeof(struct ldc_trans_cookie) * - dr->ncookies)]; + VIO_MAX_RING_COOKIES)]; } u; + size_t bytes = sizeof(struct vio_dring_register) + + (sizeof(struct ldc_trans_cookie) * + dr->ncookies); int i; - memset(&u, 0, sizeof(u)); + if (WARN_ON(bytes > sizeof(u))) + return -EINVAL; + + memset(&u, 0, bytes); init_tag(&u.pkt.tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, VIO_DRING_REG); u.pkt.dring_ident = 0; u.pkt.num_descr = dr->num_entries; @@ -206,7 +212,7 @@ static int send_dreg(struct vio_driver_state *vio) (unsigned long long) u.pkt.cookies[i].cookie_size); } - return send_ctrl(vio, &u.pkt.tag, sizeof(u)); + return send_ctrl(vio, &u.pkt.tag, bytes); } static int send_rdx(struct vio_driver_state *vio) diff --git a/arch/sparc/vdso/Makefile b/arch/sparc/vdso/Makefile index dd0b5a92ffd0..dc85570d8839 100644 --- a/arch/sparc/vdso/Makefile +++ b/arch/sparc/vdso/Makefile @@ -31,23 +31,21 @@ obj-y += $(vdso_img_objs) targets += $(vdso_img_cfiles) targets += $(vdso_img_sodbg) $(vdso_img-y:%=vdso%.so) -export CPPFLAGS_vdso.lds += -P -C +CPPFLAGS_vdso.lds += -P -C VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \ -Wl,--no-undefined \ -Wl,-z,max-page-size=8192 -Wl,-z,common-page-size=8192 \ $(DISABLE_LTO) -$(obj)/vdso64.so.dbg: $(src)/vdso.lds $(vobjs) FORCE +$(obj)/vdso64.so.dbg: $(obj)/vdso.lds $(vobjs) FORCE $(call if_changed,vdso) HOST_EXTRACFLAGS += -I$(srctree)/tools/include hostprogs-y += vdso2c quiet_cmd_vdso2c = VDSO2C $@ -define cmd_vdso2c - $(obj)/vdso2c $< $(<:%.dbg=%) $@ -endef + cmd_vdso2c = $(obj)/vdso2c $< $(<:%.dbg=%) $@ $(obj)/vdso-image-%.c: $(obj)/vdso%.so.dbg $(obj)/vdso%.so $(obj)/vdso2c FORCE $(call if_changed,vdso2c) diff --git a/arch/sparc/vdso/vclock_gettime.c b/arch/sparc/vdso/vclock_gettime.c index 3feb3d960ca5..75dca9aab737 100644 --- a/arch/sparc/vdso/vclock_gettime.c +++ b/arch/sparc/vdso/vclock_gettime.c @@ -33,9 +33,19 @@ #define TICK_PRIV_BIT (1ULL << 63) #endif +#ifdef CONFIG_SPARC64 #define SYSCALL_STRING \ "ta 0x6d;" \ - "sub %%g0, %%o0, %%o0;" \ + "bcs,a 1f;" \ + " sub %%g0, %%o0, %%o0;" \ + "1:" +#else +#define SYSCALL_STRING \ + "ta 0x10;" \ + "bcs,a 1f;" \ + " sub %%g0, %%o0, %%o0;" \ + "1:" +#endif #define SYSCALL_CLOBBERS \ "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", \ diff --git a/arch/sparc/vdso/vma.c b/arch/sparc/vdso/vma.c index f51595f861b8..5eaff3c1aa0c 100644 --- a/arch/sparc/vdso/vma.c +++ b/arch/sparc/vdso/vma.c @@ -262,7 +262,9 @@ static __init int vdso_setup(char *s) unsigned long val; err = kstrtoul(s, 10, &val); + if (err) + return err; vdso_enabled = val; - return err; + return 0; } __setup("vdso=", vdso_setup); diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index 83c470364dfb..74c002ddc0ce 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -23,6 +23,7 @@ #include <linux/module.h> #include <linux/init.h> #include <linux/blkdev.h> +#include <linux/blk-mq.h> #include <linux/ata.h> #include <linux/hdreg.h> #include <linux/cdrom.h> @@ -142,7 +143,6 @@ struct cow { #define MAX_SG 64 struct ubd { - struct list_head restart; /* name (and fd, below) of the file opened for writing, either the * backing or the cow file. */ char *file; @@ -156,11 +156,8 @@ struct ubd { struct cow cow; struct platform_device pdev; struct request_queue *queue; + struct blk_mq_tag_set tag_set; spinlock_t lock; - struct scatterlist sg[MAX_SG]; - struct request *request; - int start_sg, end_sg; - sector_t rq_pos; }; #define DEFAULT_COW { \ @@ -182,10 +179,6 @@ struct ubd { .shared = 0, \ .cow = DEFAULT_COW, \ .lock = __SPIN_LOCK_UNLOCKED(ubd_devs.lock), \ - .request = NULL, \ - .start_sg = 0, \ - .end_sg = 0, \ - .rq_pos = 0, \ } /* Protected by ubd_lock */ @@ -196,6 +189,9 @@ static int fake_ide = 0; static struct proc_dir_entry *proc_ide_root = NULL; static struct proc_dir_entry *proc_ide = NULL; +static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd); + static void make_proc_ide(void) { proc_ide_root = proc_mkdir("ide", NULL); @@ -436,11 +432,8 @@ __uml_help(udb_setup, " in the boot output.\n\n" ); -static void do_ubd_request(struct request_queue * q); - /* Only changed by ubd_init, which is an initcall. */ static int thread_fd = -1; -static LIST_HEAD(restart); /* Function to read several request pointers at a time * handling fractional reads if (and as) needed @@ -498,9 +491,6 @@ static int bulk_req_safe_read( /* Called without dev->lock held, and only in interrupt context. */ static void ubd_handler(void) { - struct ubd *ubd; - struct list_head *list, *next_ele; - unsigned long flags; int n; int count; @@ -520,23 +510,17 @@ static void ubd_handler(void) return; } for (count = 0; count < n/sizeof(struct io_thread_req *); count++) { - blk_end_request( - (*irq_req_buffer)[count]->req, - BLK_STS_OK, - (*irq_req_buffer)[count]->length - ); - kfree((*irq_req_buffer)[count]); + struct io_thread_req *io_req = (*irq_req_buffer)[count]; + int err = io_req->error ? BLK_STS_IOERR : BLK_STS_OK; + + if (!blk_update_request(io_req->req, err, io_req->length)) + __blk_mq_end_request(io_req->req, err); + + kfree(io_req); } } - reactivate_fd(thread_fd, UBD_IRQ); - list_for_each_safe(list, next_ele, &restart){ - ubd = container_of(list, struct ubd, restart); - list_del_init(&ubd->restart); - spin_lock_irqsave(&ubd->lock, flags); - do_ubd_request(ubd->queue); - spin_unlock_irqrestore(&ubd->lock, flags); - } + reactivate_fd(thread_fd, UBD_IRQ); } static irqreturn_t ubd_intr(int irq, void *dev) @@ -857,6 +841,7 @@ static void ubd_device_release(struct device *dev) struct ubd *ubd_dev = dev_get_drvdata(dev); blk_cleanup_queue(ubd_dev->queue); + blk_mq_free_tag_set(&ubd_dev->tag_set); *ubd_dev = ((struct ubd) DEFAULT_UBD); } @@ -891,7 +876,7 @@ static int ubd_disk_register(int major, u64 size, int unit, disk->private_data = &ubd_devs[unit]; disk->queue = ubd_devs[unit].queue; - device_add_disk(parent, disk); + device_add_disk(parent, disk, NULL); *disk_out = disk; return 0; @@ -899,6 +884,10 @@ static int ubd_disk_register(int major, u64 size, int unit, #define ROUND_BLOCK(n) ((n + ((1 << 9) - 1)) & (-1 << 9)) +static const struct blk_mq_ops ubd_mq_ops = { + .queue_rq = ubd_queue_rq, +}; + static int ubd_add(int n, char **error_out) { struct ubd *ubd_dev = &ubd_devs[n]; @@ -915,15 +904,23 @@ static int ubd_add(int n, char **error_out) ubd_dev->size = ROUND_BLOCK(ubd_dev->size); - INIT_LIST_HEAD(&ubd_dev->restart); - sg_init_table(ubd_dev->sg, MAX_SG); + ubd_dev->tag_set.ops = &ubd_mq_ops; + ubd_dev->tag_set.queue_depth = 64; + ubd_dev->tag_set.numa_node = NUMA_NO_NODE; + ubd_dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; + ubd_dev->tag_set.driver_data = ubd_dev; + ubd_dev->tag_set.nr_hw_queues = 1; - err = -ENOMEM; - ubd_dev->queue = blk_init_queue(do_ubd_request, &ubd_dev->lock); - if (ubd_dev->queue == NULL) { - *error_out = "Failed to initialize device queue"; + err = blk_mq_alloc_tag_set(&ubd_dev->tag_set); + if (err) goto out; + + ubd_dev->queue = blk_mq_init_queue(&ubd_dev->tag_set); + if (IS_ERR(ubd_dev->queue)) { + err = PTR_ERR(ubd_dev->queue); + goto out_cleanup; } + ubd_dev->queue->queuedata = ubd_dev; blk_queue_write_cache(ubd_dev->queue, true, false); @@ -931,7 +928,7 @@ static int ubd_add(int n, char **error_out) err = ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, &ubd_gendisk[n]); if(err){ *error_out = "Failed to register device"; - goto out_cleanup; + goto out_cleanup_tags; } if (fake_major != UBD_MAJOR) @@ -949,6 +946,8 @@ static int ubd_add(int n, char **error_out) out: return err; +out_cleanup_tags: + blk_mq_free_tag_set(&ubd_dev->tag_set); out_cleanup: blk_cleanup_queue(ubd_dev->queue); goto out; @@ -1290,123 +1289,82 @@ static void cowify_req(struct io_thread_req *req, unsigned long *bitmap, req->bitmap_words, bitmap_len); } -/* Called with dev->lock held */ -static void prepare_request(struct request *req, struct io_thread_req *io_req, - unsigned long long offset, int page_offset, - int len, struct page *page) +static int ubd_queue_one_vec(struct blk_mq_hw_ctx *hctx, struct request *req, + u64 off, struct bio_vec *bvec) { - struct gendisk *disk = req->rq_disk; - struct ubd *ubd_dev = disk->private_data; - - io_req->req = req; - io_req->fds[0] = (ubd_dev->cow.file != NULL) ? ubd_dev->cow.fd : - ubd_dev->fd; - io_req->fds[1] = ubd_dev->fd; - io_req->cow_offset = -1; - io_req->offset = offset; - io_req->length = len; - io_req->error = 0; - io_req->sector_mask = 0; - - io_req->op = (rq_data_dir(req) == READ) ? UBD_READ : UBD_WRITE; - io_req->offsets[0] = 0; - io_req->offsets[1] = ubd_dev->cow.data_offset; - io_req->buffer = page_address(page) + page_offset; - io_req->sectorsize = 1 << 9; - - if(ubd_dev->cow.file != NULL) - cowify_req(io_req, ubd_dev->cow.bitmap, - ubd_dev->cow.bitmap_offset, ubd_dev->cow.bitmap_len); - -} + struct ubd *dev = hctx->queue->queuedata; + struct io_thread_req *io_req; + int ret; -/* Called with dev->lock held */ -static void prepare_flush_request(struct request *req, - struct io_thread_req *io_req) -{ - struct gendisk *disk = req->rq_disk; - struct ubd *ubd_dev = disk->private_data; + io_req = kmalloc(sizeof(struct io_thread_req), GFP_ATOMIC); + if (!io_req) + return -ENOMEM; io_req->req = req; - io_req->fds[0] = (ubd_dev->cow.file != NULL) ? ubd_dev->cow.fd : - ubd_dev->fd; - io_req->op = UBD_FLUSH; -} + if (dev->cow.file) + io_req->fds[0] = dev->cow.fd; + else + io_req->fds[0] = dev->fd; -static bool submit_request(struct io_thread_req *io_req, struct ubd *dev) -{ - int n = os_write_file(thread_fd, &io_req, - sizeof(io_req)); - if (n != sizeof(io_req)) { - if (n != -EAGAIN) - printk("write to io thread failed, " - "errno = %d\n", -n); - else if (list_empty(&dev->restart)) - list_add(&dev->restart, &restart); + if (req_op(req) == REQ_OP_FLUSH) { + io_req->op = UBD_FLUSH; + } else { + io_req->fds[1] = dev->fd; + io_req->cow_offset = -1; + io_req->offset = off; + io_req->length = bvec->bv_len; + io_req->error = 0; + io_req->sector_mask = 0; + + io_req->op = rq_data_dir(req) == READ ? UBD_READ : UBD_WRITE; + io_req->offsets[0] = 0; + io_req->offsets[1] = dev->cow.data_offset; + io_req->buffer = page_address(bvec->bv_page) + bvec->bv_offset; + io_req->sectorsize = 1 << 9; + + if (dev->cow.file) { + cowify_req(io_req, dev->cow.bitmap, + dev->cow.bitmap_offset, dev->cow.bitmap_len); + } + } + ret = os_write_file(thread_fd, &io_req, sizeof(io_req)); + if (ret != sizeof(io_req)) { + if (ret != -EAGAIN) + pr_err("write to io thread failed: %d\n", -ret); kfree(io_req); - return false; } - return true; + + return ret; } -/* Called with dev->lock held */ -static void do_ubd_request(struct request_queue *q) +static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) { - struct io_thread_req *io_req; - struct request *req; - - while(1){ - struct ubd *dev = q->queuedata; - if(dev->request == NULL){ - struct request *req = blk_fetch_request(q); - if(req == NULL) - return; - - dev->request = req; - dev->rq_pos = blk_rq_pos(req); - dev->start_sg = 0; - dev->end_sg = blk_rq_map_sg(q, req, dev->sg); - } - - req = dev->request; + struct request *req = bd->rq; + int ret = 0; - if (req_op(req) == REQ_OP_FLUSH) { - io_req = kmalloc(sizeof(struct io_thread_req), - GFP_ATOMIC); - if (io_req == NULL) { - if (list_empty(&dev->restart)) - list_add(&dev->restart, &restart); - return; - } - prepare_flush_request(req, io_req); - if (submit_request(io_req, dev) == false) - return; - } + blk_mq_start_request(req); - while(dev->start_sg < dev->end_sg){ - struct scatterlist *sg = &dev->sg[dev->start_sg]; - - io_req = kmalloc(sizeof(struct io_thread_req), - GFP_ATOMIC); - if(io_req == NULL){ - if(list_empty(&dev->restart)) - list_add(&dev->restart, &restart); - return; - } - prepare_request(req, io_req, - (unsigned long long)dev->rq_pos << 9, - sg->offset, sg->length, sg_page(sg)); - - if (submit_request(io_req, dev) == false) - return; - - dev->rq_pos += sg->length >> 9; - dev->start_sg++; + if (req_op(req) == REQ_OP_FLUSH) { + ret = ubd_queue_one_vec(hctx, req, 0, NULL); + } else { + struct req_iterator iter; + struct bio_vec bvec; + u64 off = (u64)blk_rq_pos(req) << 9; + + rq_for_each_segment(bvec, req, iter) { + ret = ubd_queue_one_vec(hctx, req, off, &bvec); + if (ret < 0) + goto out; + off += bvec.bv_len; } - dev->end_sg = 0; - dev->request = NULL; } +out: + if (ret < 0) { + blk_mq_requeue_request(req, true); + } + return BLK_STS_OK; } static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo) diff --git a/arch/unicore32/Kconfig b/arch/unicore32/Kconfig index 60eae744d8fd..3a3b40f79558 100644 --- a/arch/unicore32/Kconfig +++ b/arch/unicore32/Kconfig @@ -4,6 +4,7 @@ config UNICORE32 select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_MIGHT_HAVE_PC_SERIO + select DMA_DIRECT_OPS select HAVE_MEMBLOCK select HAVE_GENERIC_DMA_COHERENT select HAVE_KERNEL_GZIP @@ -20,7 +21,6 @@ config UNICORE32 select GENERIC_IOMAP select MODULES_USE_ELF_REL select NEED_DMA_MAP_STATE - select SWIOTLB help UniCore-32 is 32-bit Instruction Set Architecture, including a series of low-power-consumption RISC chip diff --git a/arch/unicore32/include/asm/Kbuild b/arch/unicore32/include/asm/Kbuild index bfc7abe77905..1372553dc0a9 100644 --- a/arch/unicore32/include/asm/Kbuild +++ b/arch/unicore32/include/asm/Kbuild @@ -4,6 +4,7 @@ generic-y += compat.h generic-y += current.h generic-y += device.h generic-y += div64.h +generic-y += dma-mapping.h generic-y += emergency-restart.h generic-y += exec.h generic-y += extable.h diff --git a/arch/unicore32/include/asm/dma-mapping.h b/arch/unicore32/include/asm/dma-mapping.h deleted file mode 100644 index 790bc2ef4af2..000000000000 --- a/arch/unicore32/include/asm/dma-mapping.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * linux/arch/unicore32/include/asm/dma-mapping.h - * - * Code specific to PKUnity SoC and UniCore ISA - * - * Copyright (C) 2001-2010 GUAN Xue-tao - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#ifndef __UNICORE_DMA_MAPPING_H__ -#define __UNICORE_DMA_MAPPING_H__ - -#include <linux/swiotlb.h> - -static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) -{ - return &swiotlb_dma_ops; -} - -#endif diff --git a/arch/unicore32/mm/init.c b/arch/unicore32/mm/init.c index f4950fbfe574..5f72a8d1d953 100644 --- a/arch/unicore32/mm/init.c +++ b/arch/unicore32/mm/init.c @@ -234,9 +234,6 @@ void __init bootmem_init(void) uc32_bootmem_init(min, max_low); -#ifdef CONFIG_SWIOTLB - swiotlb_init(1); -#endif /* * Sparsemem tries to allocate bootmem in memory_present(), * so must be done after the fixed reservations diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 28764dacf018..466f66c8a7f8 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -37,6 +37,7 @@ KBUILD_CFLAGS += $(call cc-option,-ffreestanding) KBUILD_CFLAGS += $(call cc-option,-fno-stack-protector) KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member) KBUILD_CFLAGS += $(call cc-disable-warning, gnu) +KBUILD_CFLAGS += -Wno-pointer-sign KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ GCOV_PROFILE := n diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 2767c625a52c..fbbf1ba57ec6 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -389,6 +389,13 @@ * that register for the time this macro runs */ + /* + * The high bits of the CS dword (__csh) are used for + * CS_FROM_ENTRY_STACK and CS_FROM_USER_CR3. Clear them in case + * hardware didn't do this for us. + */ + andl $(0x0000ffff), PT_CS(%esp) + /* Are we on the entry stack? Bail out if not! */ movl PER_CPU_VAR(cpu_entry_area), %ecx addl $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx @@ -407,12 +414,6 @@ /* Load top of task-stack into %edi */ movl TSS_entry2task_stack(%edi), %edi - /* - * Clear unused upper bits of the dword containing the word-sized CS - * slot in pt_regs in case hardware didn't clear it for us. - */ - andl $(0x0000ffff), PT_CS(%esp) - /* Special case - entry from kernel mode via entry stack */ #ifdef CONFIG_VM86 movl PT_EFLAGS(%esp), %ecx # mix EFLAGS and CS diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 957dfb693ecc..f95dcb209fdf 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1187,6 +1187,16 @@ ENTRY(paranoid_entry) xorl %ebx, %ebx 1: + /* + * Always stash CR3 in %r14. This value will be restored, + * verbatim, at exit. Needed if paranoid_entry interrupted + * another entry that already switched to the user CR3 value + * but has not yet returned to userspace. + * + * This is also why CS (stashed in the "iret frame" by the + * hardware at entry) can not be used: this may be a return + * to kernel code, but with a user CR3 value. + */ SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14 ret @@ -1211,11 +1221,13 @@ ENTRY(paranoid_exit) testl %ebx, %ebx /* swapgs needed? */ jnz .Lparanoid_exit_no_swapgs TRACE_IRQS_IRETQ + /* Always restore stashed CR3 value (see paranoid_entry) */ RESTORE_CR3 scratch_reg=%rbx save_reg=%r14 SWAPGS_UNSAFE_STACK jmp .Lparanoid_exit_restore .Lparanoid_exit_no_swapgs: TRACE_IRQS_IRETQ_DEBUG + /* Always restore stashed CR3 value (see paranoid_entry) */ RESTORE_CR3 scratch_reg=%rbx save_reg=%r14 .Lparanoid_exit_restore: jmp restore_regs_and_return_to_kernel @@ -1626,6 +1638,7 @@ end_repeat_nmi: movq $-1, %rsi call do_nmi + /* Always restore stashed CR3 value (see paranoid_entry) */ RESTORE_CR3 scratch_reg=%r15 save_reg=%r14 testl %ebx, %ebx /* swapgs needed? */ diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index a38bf5a1e37a..69dcdf195b61 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -528,7 +528,7 @@ static inline void fpregs_activate(struct fpu *fpu) static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu) { - if (old_fpu->initialized) { + if (static_cpu_has(X86_FEATURE_FPU) && old_fpu->initialized) { if (!copy_fpregs_to_fpstate(old_fpu)) old_fpu->last_cpu = -1; else diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 6de64840dd22..9a92a3ac2ac5 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -369,18 +369,6 @@ extern void __iomem *ioremap_wt(resource_size_t offset, unsigned long size); extern bool is_early_ioremap_ptep(pte_t *ptep); -#ifdef CONFIG_XEN -#include <xen/xen.h> -struct bio_vec; - -extern bool xen_biovec_phys_mergeable(const struct bio_vec *vec1, - const struct bio_vec *vec2); - -#define BIOVEC_PHYS_MERGEABLE(vec1, vec2) \ - (__BIOVEC_PHYS_MERGEABLE(vec1, vec2) && \ - (!xen_domain() || xen_biovec_phys_mergeable(vec1, vec2))) -#endif /* CONFIG_XEN */ - #define IO_SPACE_LIMIT 0xffff #include <asm-generic/io.h> diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index e9202a0de8f0..1a19d11cfbbd 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -185,22 +185,22 @@ do { \ typeof(var) pfo_ret__; \ switch (sizeof(var)) { \ case 1: \ - asm(op "b "__percpu_arg(1)",%0" \ + asm volatile(op "b "__percpu_arg(1)",%0"\ : "=q" (pfo_ret__) \ : "m" (var)); \ break; \ case 2: \ - asm(op "w "__percpu_arg(1)",%0" \ + asm volatile(op "w "__percpu_arg(1)",%0"\ : "=r" (pfo_ret__) \ : "m" (var)); \ break; \ case 4: \ - asm(op "l "__percpu_arg(1)",%0" \ + asm volatile(op "l "__percpu_arg(1)",%0"\ : "=r" (pfo_ret__) \ : "m" (var)); \ break; \ case 8: \ - asm(op "q "__percpu_arg(1)",%0" \ + asm volatile(op "q "__percpu_arg(1)",%0"\ : "=r" (pfo_ret__) \ : "m" (var)); \ break; \ diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index b64acb08a62b..106b7d0e2dae 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -124,7 +124,7 @@ */ #define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \ _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY | \ - _PAGE_SOFT_DIRTY) + _PAGE_SOFT_DIRTY | _PAGE_DEVMAP) #define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE) /* diff --git a/arch/x86/include/asm/xen/events.h b/arch/x86/include/asm/xen/events.h index d383140e1dc8..068d9b067c83 100644 --- a/arch/x86/include/asm/xen/events.h +++ b/arch/x86/include/asm/xen/events.h @@ -2,6 +2,8 @@ #ifndef _ASM_X86_XEN_EVENTS_H #define _ASM_X86_XEN_EVENTS_H +#include <xen/xen.h> + enum ipi_vector { XEN_RESCHEDULE_VECTOR, XEN_CALL_FUNCTION_VECTOR, diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c index f299d8a479bb..3f9d1b4019bb 100644 --- a/arch/x86/kernel/amd_gart_64.c +++ b/arch/x86/kernel/amd_gart_64.c @@ -482,7 +482,7 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, { void *vaddr; - vaddr = dma_direct_alloc(dev, size, dma_addr, flag, attrs); + vaddr = dma_direct_alloc_pages(dev, size, dma_addr, flag, attrs); if (!vaddr || !force_iommu || dev->coherent_dma_mask <= DMA_BIT_MASK(24)) return vaddr; @@ -494,7 +494,7 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, goto out_free; return vaddr; out_free: - dma_direct_free(dev, size, vaddr, *dma_addr, attrs); + dma_direct_free_pages(dev, size, vaddr, *dma_addr, attrs); return NULL; } @@ -504,7 +504,7 @@ gart_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_addr, unsigned long attrs) { gart_unmap_page(dev, dma_addr, size, DMA_BIDIRECTIONAL, 0); - dma_direct_free(dev, size, vaddr, dma_addr, attrs); + dma_direct_free_pages(dev, size, vaddr, dma_addr, attrs); } static int gart_mapping_error(struct device *dev, dma_addr_t dma_addr) diff --git a/arch/x86/kernel/cpu/intel_rdt.h b/arch/x86/kernel/cpu/intel_rdt.h index 285eb3ec4200..3736f6dc9545 100644 --- a/arch/x86/kernel/cpu/intel_rdt.h +++ b/arch/x86/kernel/cpu/intel_rdt.h @@ -529,14 +529,14 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of, int rdtgroup_schemata_show(struct kernfs_open_file *of, struct seq_file *s, void *v); bool rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d, - u32 _cbm, int closid, bool exclusive); + unsigned long cbm, int closid, bool exclusive); unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, struct rdt_domain *d, - u32 cbm); + unsigned long cbm); enum rdtgrp_mode rdtgroup_mode_by_closid(int closid); int rdtgroup_tasks_assigned(struct rdtgroup *r); int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp); int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp); -bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, u32 _cbm); +bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, unsigned long cbm); bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d); int rdt_pseudo_lock_init(void); void rdt_pseudo_lock_release(void); diff --git a/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c b/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c index 40f3903ae5d9..f8c260d522ca 100644 --- a/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c +++ b/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c @@ -797,25 +797,27 @@ int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp) /** * rdtgroup_cbm_overlaps_pseudo_locked - Test if CBM or portion is pseudo-locked * @d: RDT domain - * @_cbm: CBM to test + * @cbm: CBM to test * - * @d represents a cache instance and @_cbm a capacity bitmask that is - * considered for it. Determine if @_cbm overlaps with any existing + * @d represents a cache instance and @cbm a capacity bitmask that is + * considered for it. Determine if @cbm overlaps with any existing * pseudo-locked region on @d. * - * Return: true if @_cbm overlaps with pseudo-locked region on @d, false + * @cbm is unsigned long, even if only 32 bits are used, to make the + * bitmap functions work correctly. + * + * Return: true if @cbm overlaps with pseudo-locked region on @d, false * otherwise. */ -bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, u32 _cbm) +bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, unsigned long cbm) { - unsigned long *cbm = (unsigned long *)&_cbm; - unsigned long *cbm_b; unsigned int cbm_len; + unsigned long cbm_b; if (d->plr) { cbm_len = d->plr->r->cache.cbm_len; - cbm_b = (unsigned long *)&d->plr->cbm; - if (bitmap_intersects(cbm, cbm_b, cbm_len)) + cbm_b = d->plr->cbm; + if (bitmap_intersects(&cbm, &cbm_b, cbm_len)) return true; } return false; diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c index 1b8e86a5d5e1..b140c68bc14b 100644 --- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c +++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c @@ -975,33 +975,34 @@ static int rdtgroup_mode_show(struct kernfs_open_file *of, * is false then overlaps with any resource group or hardware entities * will be considered. * + * @cbm is unsigned long, even if only 32 bits are used, to make the + * bitmap functions work correctly. + * * Return: false if CBM does not overlap, true if it does. */ bool rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d, - u32 _cbm, int closid, bool exclusive) + unsigned long cbm, int closid, bool exclusive) { - unsigned long *cbm = (unsigned long *)&_cbm; - unsigned long *ctrl_b; enum rdtgrp_mode mode; + unsigned long ctrl_b; u32 *ctrl; int i; /* Check for any overlap with regions used by hardware directly */ if (!exclusive) { - if (bitmap_intersects(cbm, - (unsigned long *)&r->cache.shareable_bits, - r->cache.cbm_len)) + ctrl_b = r->cache.shareable_bits; + if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len)) return true; } /* Check for overlap with other resource groups */ ctrl = d->ctrl_val; for (i = 0; i < closids_supported(); i++, ctrl++) { - ctrl_b = (unsigned long *)ctrl; + ctrl_b = *ctrl; mode = rdtgroup_mode_by_closid(i); if (closid_allocated(i) && i != closid && mode != RDT_MODE_PSEUDO_LOCKSETUP) { - if (bitmap_intersects(cbm, ctrl_b, r->cache.cbm_len)) { + if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len)) { if (exclusive) { if (mode == RDT_MODE_EXCLUSIVE) return true; @@ -1138,15 +1139,18 @@ out: * computed by first dividing the total cache size by the CBM length to * determine how many bytes each bit in the bitmask represents. The result * is multiplied with the number of bits set in the bitmask. + * + * @cbm is unsigned long, even if only 32 bits are used to make the + * bitmap functions work correctly. */ unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, - struct rdt_domain *d, u32 cbm) + struct rdt_domain *d, unsigned long cbm) { struct cpu_cacheinfo *ci; unsigned int size = 0; int num_b, i; - num_b = bitmap_weight((unsigned long *)&cbm, r->cache.cbm_len); + num_b = bitmap_weight(&cbm, r->cache.cbm_len); ci = get_cpu_cacheinfo(cpumask_any(&d->cpu_mask)); for (i = 0; i < ci->num_leaves; i++) { if (ci->info_list[i].level == r->cache_level) { @@ -2353,6 +2357,7 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp) u32 used_b = 0, unused_b = 0; u32 closid = rdtgrp->closid; struct rdt_resource *r; + unsigned long tmp_cbm; enum rdtgrp_mode mode; struct rdt_domain *d; int i, ret; @@ -2390,9 +2395,14 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp) * modify the CBM based on system availability. */ cbm_ensure_valid(&d->new_ctrl, r); - if (bitmap_weight((unsigned long *) &d->new_ctrl, - r->cache.cbm_len) < - r->cache.min_cbm_bits) { + /* + * Assign the u32 CBM to an unsigned long to ensure + * that bitmap_weight() does not access out-of-bound + * memory. + */ + tmp_cbm = d->new_ctrl; + if (bitmap_weight(&tmp_cbm, r->cache.cbm_len) < + r->cache.min_cbm_bits) { rdt_last_cmd_printf("no space on %s:%d\n", r->name, d->id); return -ENOSPC; diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 23f1691670b6..61a949d84dfa 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -314,7 +314,6 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) * thread's fpu state, reconstruct fxstate from the fsave * header. Validate and sanitize the copied state. */ - struct fpu *fpu = &tsk->thread.fpu; struct user_i387_ia32_struct env; int err = 0; diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index 661583662430..71c0b01d93b1 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c @@ -42,10 +42,8 @@ IOMMU_INIT_FINISH(pci_swiotlb_detect_override, int __init pci_swiotlb_detect_4gb(void) { /* don't initialize swiotlb if iommu=off (no_iommu=1) */ -#ifdef CONFIG_X86_64 if (!no_iommu && max_possible_pfn > MAX_DMA32_PFN) swiotlb = 1; -#endif /* * If SME is active then swiotlb will be set to 1 so that bounce diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index be01328eb755..fddaefc51fb6 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c @@ -25,7 +25,7 @@ #include <asm/time.h> #ifdef CONFIG_X86_64 -__visible volatile unsigned long jiffies __cacheline_aligned = INITIAL_JIFFIES; +__visible volatile unsigned long jiffies __cacheline_aligned_in_smp = INITIAL_JIFFIES; #endif unsigned long profile_pc(struct pt_regs *regs) diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index b52bd2b6cdb4..6d5dc5dabfd7 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -58,7 +58,7 @@ struct cyc2ns { static DEFINE_PER_CPU_ALIGNED(struct cyc2ns, cyc2ns); -void cyc2ns_read_begin(struct cyc2ns_data *data) +void __always_inline cyc2ns_read_begin(struct cyc2ns_data *data) { int seq, idx; @@ -75,7 +75,7 @@ void cyc2ns_read_begin(struct cyc2ns_data *data) } while (unlikely(seq != this_cpu_read(cyc2ns.seq.sequence))); } -void cyc2ns_read_end(void) +void __always_inline cyc2ns_read_end(void) { preempt_enable_notrace(); } @@ -104,7 +104,7 @@ void cyc2ns_read_end(void) * -johnstul@us.ibm.com "math is hard, lets go shopping!" */ -static inline unsigned long long cycles_2_ns(unsigned long long cyc) +static __always_inline unsigned long long cycles_2_ns(unsigned long long cyc) { struct cyc2ns_data data; unsigned long long ns; diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index d96092b35936..61ccfb13899e 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -436,14 +436,18 @@ static inline struct kvm_svm *to_kvm_svm(struct kvm *kvm) static inline bool svm_sev_enabled(void) { - return max_sev_asid; + return IS_ENABLED(CONFIG_KVM_AMD_SEV) ? max_sev_asid : 0; } static inline bool sev_guest(struct kvm *kvm) { +#ifdef CONFIG_KVM_AMD_SEV struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; return sev->active; +#else + return false; +#endif } static inline int sev_get_asid(struct kvm *kvm) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 612fd17be635..e665aa7167cf 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1572,8 +1572,12 @@ static int vmx_hv_remote_flush_tlb(struct kvm *kvm) goto out; } + /* + * FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE hypercall needs the address of the + * base of EPT PML4 table, strip off EPT configuration information. + */ ret = hyperv_flush_guest_mapping( - to_vmx(kvm_get_vcpu(kvm, 0))->ept_pointer); + to_vmx(kvm_get_vcpu(kvm, 0))->ept_pointer & PAGE_MASK); out: spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock); diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 089e78c4effd..59274e2c1ac4 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -115,6 +115,8 @@ static inline void pgd_list_del(pgd_t *pgd) #define UNSHARED_PTRS_PER_PGD \ (SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD) +#define MAX_UNSHARED_PTRS_PER_PGD \ + max_t(size_t, KERNEL_PGD_BOUNDARY, PTRS_PER_PGD) static void pgd_set_mm(pgd_t *pgd, struct mm_struct *mm) @@ -181,6 +183,7 @@ static void pgd_dtor(pgd_t *pgd) * and initialize the kernel pmds here. */ #define PREALLOCATED_PMDS UNSHARED_PTRS_PER_PGD +#define MAX_PREALLOCATED_PMDS MAX_UNSHARED_PTRS_PER_PGD /* * We allocate separate PMDs for the kernel part of the user page-table @@ -189,6 +192,7 @@ static void pgd_dtor(pgd_t *pgd) */ #define PREALLOCATED_USER_PMDS (static_cpu_has(X86_FEATURE_PTI) ? \ KERNEL_PGD_PTRS : 0) +#define MAX_PREALLOCATED_USER_PMDS KERNEL_PGD_PTRS void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd) { @@ -210,7 +214,9 @@ void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd) /* No need to prepopulate any pagetable entries in non-PAE modes. */ #define PREALLOCATED_PMDS 0 +#define MAX_PREALLOCATED_PMDS 0 #define PREALLOCATED_USER_PMDS 0 +#define MAX_PREALLOCATED_USER_PMDS 0 #endif /* CONFIG_X86_PAE */ static void free_pmds(struct mm_struct *mm, pmd_t *pmds[], int count) @@ -428,8 +434,8 @@ static inline void _pgd_free(pgd_t *pgd) pgd_t *pgd_alloc(struct mm_struct *mm) { pgd_t *pgd; - pmd_t *u_pmds[PREALLOCATED_USER_PMDS]; - pmd_t *pmds[PREALLOCATED_PMDS]; + pmd_t *u_pmds[MAX_PREALLOCATED_USER_PMDS]; + pmd_t *pmds[MAX_PREALLOCATED_PMDS]; pgd = _pgd_alloc(); diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 2eeddd814653..0ca46e03b830 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -5,6 +5,7 @@ #include <linux/kexec.h> #include <linux/slab.h> +#include <xen/xen.h> #include <xen/features.h> #include <xen/page.h> #include <xen/interface/memory.h> diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c index c85d1a88f476..2a9025343534 100644 --- a/arch/x86/xen/enlighten_pvh.c +++ b/arch/x86/xen/enlighten_pvh.c @@ -11,6 +11,7 @@ #include <asm/xen/interface.h> #include <asm/xen/hypercall.h> +#include <xen/xen.h> #include <xen/interface/memory.h> #include <xen/interface/hvm/start_info.h> diff --git a/arch/x86/xen/platform-pci-unplug.c b/arch/x86/xen/platform-pci-unplug.c index 33a783c77d96..b99585034dd2 100644 --- a/arch/x86/xen/platform-pci-unplug.c +++ b/arch/x86/xen/platform-pci-unplug.c @@ -23,6 +23,7 @@ #include <linux/io.h> #include <linux/export.h> +#include <xen/xen.h> #include <xen/platform_pci.h> #include "xen-ops.h" diff --git a/arch/x86/xen/pmu.c b/arch/x86/xen/pmu.c index 95997e6c0696..0972184f3f19 100644 --- a/arch/x86/xen/pmu.c +++ b/arch/x86/xen/pmu.c @@ -3,6 +3,7 @@ #include <linux/interrupt.h> #include <asm/xen/hypercall.h> +#include <xen/xen.h> #include <xen/page.h> #include <xen/interface/xen.h> #include <xen/interface/vcpu.h> diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index b9ad83a0ee5d..ea5d8d03e53b 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -13,7 +13,7 @@ config XTENSA select BUILDTIME_EXTABLE_SORT select CLONE_BACKWARDS select COMMON_CLK - select DMA_NONCOHERENT_OPS + select DMA_DIRECT_OPS select GENERIC_ATOMIC64 select GENERIC_CLOCKEVENTS select GENERIC_IRQ_SHOW diff --git a/block/Kconfig b/block/Kconfig index 1f2469a0123c..f7045aa47edb 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -74,7 +74,6 @@ config BLK_DEV_BSG config BLK_DEV_BSGLIB bool "Block layer SG support v4 helper lib" - default n select BLK_DEV_BSG select BLK_SCSI_REQUEST help @@ -107,7 +106,6 @@ config BLK_DEV_ZONED config BLK_DEV_THROTTLING bool "Block layer bio throttling support" depends on BLK_CGROUP=y - default n ---help--- Block layer bio throttling support. It can be used to limit the IO rate to a device. IO rate policies are per cgroup and @@ -119,7 +117,6 @@ config BLK_DEV_THROTTLING config BLK_DEV_THROTTLING_LOW bool "Block throttling .low limit interface support (EXPERIMENTAL)" depends on BLK_DEV_THROTTLING - default n ---help--- Add .low limit interface for block throttling. The low limit is a best effort limit to prioritize cgroups. Depending on the setting, the limit @@ -130,7 +127,6 @@ config BLK_DEV_THROTTLING_LOW config BLK_CMDLINE_PARSER bool "Block device command line partition parser" - default n ---help--- Enabling this option allows you to specify the partition layout from the kernel boot args. This is typically of use for embedded devices @@ -141,7 +137,6 @@ config BLK_CMDLINE_PARSER config BLK_WBT bool "Enable support for block device writeback throttling" - default n ---help--- Enabling this option enables the block layer to throttle buffered background writeback from the VM, making it more smooth and having @@ -152,7 +147,6 @@ config BLK_WBT config BLK_CGROUP_IOLATENCY bool "Enable support for latency based cgroup IO protection" depends on BLK_CGROUP=y - default n ---help--- Enabling this option enables the .latency interface for IO throttling. The IO controller will attempt to maintain average IO latencies below @@ -163,7 +157,6 @@ config BLK_CGROUP_IOLATENCY config BLK_WBT_SQ bool "Single queue writeback throttling" - default n depends on BLK_WBT ---help--- Enable writeback throttling by default on legacy single queue devices @@ -228,4 +221,7 @@ config BLK_MQ_RDMA depends on BLOCK && INFINIBAND default y +config BLK_PM + def_bool BLOCK && PM + source block/Kconfig.iosched diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched index a4a8914bf7a4..f95a48b0d7b2 100644 --- a/block/Kconfig.iosched +++ b/block/Kconfig.iosched @@ -36,7 +36,6 @@ config IOSCHED_CFQ config CFQ_GROUP_IOSCHED bool "CFQ Group Scheduling support" depends on IOSCHED_CFQ && BLK_CGROUP - default n ---help--- Enable group IO scheduling in CFQ. @@ -82,7 +81,6 @@ config MQ_IOSCHED_KYBER config IOSCHED_BFQ tristate "BFQ I/O scheduler" - default n ---help--- BFQ I/O scheduler for BLK-MQ. BFQ distributes the bandwidth of of the device among all processes according to their weights, @@ -94,7 +92,6 @@ config IOSCHED_BFQ config BFQ_GROUP_IOSCHED bool "BFQ hierarchical scheduling support" depends on IOSCHED_BFQ && BLK_CGROUP - default n ---help--- Enable hierarchical scheduling in BFQ, using the blkio diff --git a/block/Makefile b/block/Makefile index 572b33f32c07..27eac600474f 100644 --- a/block/Makefile +++ b/block/Makefile @@ -37,3 +37,4 @@ obj-$(CONFIG_BLK_WBT) += blk-wbt.o obj-$(CONFIG_BLK_DEBUG_FS) += blk-mq-debugfs.o obj-$(CONFIG_BLK_DEBUG_FS_ZONED)+= blk-mq-debugfs-zoned.o obj-$(CONFIG_BLK_SED_OPAL) += sed-opal.o +obj-$(CONFIG_BLK_PM) += blk-pm.o diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index 9fe5952d117d..d9a7916ff0ab 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -642,7 +642,7 @@ void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio) uint64_t serial_nr; rcu_read_lock(); - serial_nr = bio_blkcg(bio)->css.serial_nr; + serial_nr = __bio_blkcg(bio)->css.serial_nr; /* * Check whether blkcg has changed. The condition may trigger @@ -651,7 +651,7 @@ void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio) if (unlikely(!bfqd) || likely(bic->blkcg_serial_nr == serial_nr)) goto out; - bfqg = __bfq_bic_change_cgroup(bfqd, bic, bio_blkcg(bio)); + bfqg = __bfq_bic_change_cgroup(bfqd, bic, __bio_blkcg(bio)); /* * Update blkg_path for bfq_log_* functions. We cache this * path, and update it here, for the following diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 653100fb719e..6075100f03a5 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -624,12 +624,13 @@ void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq) } /* - * Tell whether there are active queues or groups with differentiated weights. + * Tell whether there are active queues with different weights or + * active groups. */ -static bool bfq_differentiated_weights(struct bfq_data *bfqd) +static bool bfq_varied_queue_weights_or_active_groups(struct bfq_data *bfqd) { /* - * For weights to differ, at least one of the trees must contain + * For queue weights to differ, queue_weights_tree must contain * at least two nodes. */ return (!RB_EMPTY_ROOT(&bfqd->queue_weights_tree) && @@ -637,9 +638,7 @@ static bool bfq_differentiated_weights(struct bfq_data *bfqd) bfqd->queue_weights_tree.rb_node->rb_right) #ifdef CONFIG_BFQ_GROUP_IOSCHED ) || - (!RB_EMPTY_ROOT(&bfqd->group_weights_tree) && - (bfqd->group_weights_tree.rb_node->rb_left || - bfqd->group_weights_tree.rb_node->rb_right) + (bfqd->num_active_groups > 0 #endif ); } @@ -657,26 +656,25 @@ static bool bfq_differentiated_weights(struct bfq_data *bfqd) * 3) all active groups at the same level in the groups tree have the same * number of children. * - * Unfortunately, keeping the necessary state for evaluating exactly the - * above symmetry conditions would be quite complex and time-consuming. - * Therefore this function evaluates, instead, the following stronger - * sub-conditions, for which it is much easier to maintain the needed - * state: + * Unfortunately, keeping the necessary state for evaluating exactly + * the last two symmetry sub-conditions above would be quite complex + * and time consuming. Therefore this function evaluates, instead, + * only the following stronger two sub-conditions, for which it is + * much easier to maintain the needed state: * 1) all active queues have the same weight, - * 2) all active groups have the same weight, - * 3) all active groups have at most one active child each. - * In particular, the last two conditions are always true if hierarchical - * support and the cgroups interface are not enabled, thus no state needs - * to be maintained in this case. + * 2) there are no active groups. + * In particular, the last condition is always true if hierarchical + * support or the cgroups interface are not enabled, thus no state + * needs to be maintained in this case. */ static bool bfq_symmetric_scenario(struct bfq_data *bfqd) { - return !bfq_differentiated_weights(bfqd); + return !bfq_varied_queue_weights_or_active_groups(bfqd); } /* * If the weight-counter tree passed as input contains no counter for - * the weight of the input entity, then add that counter; otherwise just + * the weight of the input queue, then add that counter; otherwise just * increment the existing counter. * * Note that weight-counter trees contain few nodes in mostly symmetric @@ -687,25 +685,25 @@ static bool bfq_symmetric_scenario(struct bfq_data *bfqd) * In most scenarios, the rate at which nodes are created/destroyed * should be low too. */ -void bfq_weights_tree_add(struct bfq_data *bfqd, struct bfq_entity *entity, +void bfq_weights_tree_add(struct bfq_data *bfqd, struct bfq_queue *bfqq, struct rb_root *root) { + struct bfq_entity *entity = &bfqq->entity; struct rb_node **new = &(root->rb_node), *parent = NULL; /* - * Do not insert if the entity is already associated with a + * Do not insert if the queue is already associated with a * counter, which happens if: - * 1) the entity is associated with a queue, - * 2) a request arrival has caused the queue to become both + * 1) a request arrival has caused the queue to become both * non-weight-raised, and hence change its weight, and * backlogged; in this respect, each of the two events * causes an invocation of this function, - * 3) this is the invocation of this function caused by the + * 2) this is the invocation of this function caused by the * second event. This second invocation is actually useless, * and we handle this fact by exiting immediately. More * efficient or clearer solutions might possibly be adopted. */ - if (entity->weight_counter) + if (bfqq->weight_counter) return; while (*new) { @@ -715,7 +713,7 @@ void bfq_weights_tree_add(struct bfq_data *bfqd, struct bfq_entity *entity, parent = *new; if (entity->weight == __counter->weight) { - entity->weight_counter = __counter; + bfqq->weight_counter = __counter; goto inc_counter; } if (entity->weight < __counter->weight) @@ -724,66 +722,67 @@ void bfq_weights_tree_add(struct bfq_data *bfqd, struct bfq_entity *entity, new = &((*new)->rb_right); } - entity->weight_counter = kzalloc(sizeof(struct bfq_weight_counter), - GFP_ATOMIC); + bfqq->weight_counter = kzalloc(sizeof(struct bfq_weight_counter), + GFP_ATOMIC); /* * In the unlucky event of an allocation failure, we just - * exit. This will cause the weight of entity to not be - * considered in bfq_differentiated_weights, which, in its - * turn, causes the scenario to be deemed wrongly symmetric in - * case entity's weight would have been the only weight making - * the scenario asymmetric. On the bright side, no unbalance - * will however occur when entity becomes inactive again (the - * invocation of this function is triggered by an activation - * of entity). In fact, bfq_weights_tree_remove does nothing - * if !entity->weight_counter. + * exit. This will cause the weight of queue to not be + * considered in bfq_varied_queue_weights_or_active_groups, + * which, in its turn, causes the scenario to be deemed + * wrongly symmetric in case bfqq's weight would have been + * the only weight making the scenario asymmetric. On the + * bright side, no unbalance will however occur when bfqq + * becomes inactive again (the invocation of this function + * is triggered by an activation of queue). In fact, + * bfq_weights_tree_remove does nothing if + * !bfqq->weight_counter. */ - if (unlikely(!entity->weight_counter)) + if (unlikely(!bfqq->weight_counter)) return; - entity->weight_counter->weight = entity->weight; - rb_link_node(&entity->weight_counter->weights_node, parent, new); - rb_insert_color(&entity->weight_counter->weights_node, root); + bfqq->weight_counter->weight = entity->weight; + rb_link_node(&bfqq->weight_counter->weights_node, parent, new); + rb_insert_color(&bfqq->weight_counter->weights_node, root); inc_counter: - entity->weight_counter->num_active++; + bfqq->weight_counter->num_active++; } /* - * Decrement the weight counter associated with the entity, and, if the + * Decrement the weight counter associated with the queue, and, if the * counter reaches 0, remove the counter from the tree. * See the comments to the function bfq_weights_tree_add() for considerations * about overhead. */ void __bfq_weights_tree_remove(struct bfq_data *bfqd, - struct bfq_entity *entity, + struct bfq_queue *bfqq, struct rb_root *root) { - if (!entity->weight_counter) + if (!bfqq->weight_counter) return; - entity->weight_counter->num_active--; - if (entity->weight_counter->num_active > 0) + bfqq->weight_counter->num_active--; + if (bfqq->weight_counter->num_active > 0) goto reset_entity_pointer; - rb_erase(&entity->weight_counter->weights_node, root); - kfree(entity->weight_counter); + rb_erase(&bfqq->weight_counter->weights_node, root); + kfree(bfqq->weight_counter); reset_entity_pointer: - entity->weight_counter = NULL; + bfqq->weight_counter = NULL; } /* - * Invoke __bfq_weights_tree_remove on bfqq and all its inactive - * parent entities. + * Invoke __bfq_weights_tree_remove on bfqq and decrement the number + * of active groups for each queue's inactive parent entity. */ void bfq_weights_tree_remove(struct bfq_data *bfqd, struct bfq_queue *bfqq) { struct bfq_entity *entity = bfqq->entity.parent; - __bfq_weights_tree_remove(bfqd, &bfqq->entity, + __bfq_weights_tree_remove(bfqd, bfqq, &bfqd->queue_weights_tree); for_each_entity(entity) { @@ -797,17 +796,13 @@ void bfq_weights_tree_remove(struct bfq_data *bfqd, * next_in_service for details on why * in_service_entity must be checked too). * - * As a consequence, the weight of entity is - * not to be removed. In addition, if entity - * is active, then its parent entities are - * active as well, and thus their weights are - * not to be removed either. In the end, this - * loop must stop here. + * As a consequence, its parent entities are + * active as well, and thus this loop must + * stop here. */ break; } - __bfq_weights_tree_remove(bfqd, entity, - &bfqd->group_weights_tree); + bfqd->num_active_groups--; } } @@ -3182,6 +3177,13 @@ static unsigned long bfq_bfqq_softrt_next_start(struct bfq_data *bfqd, jiffies + nsecs_to_jiffies(bfqq->bfqd->bfq_slice_idle) + 4); } +static bool bfq_bfqq_injectable(struct bfq_queue *bfqq) +{ + return BFQQ_SEEKY(bfqq) && bfqq->wr_coeff == 1 && + blk_queue_nonrot(bfqq->bfqd->queue) && + bfqq->bfqd->hw_tag; +} + /** * bfq_bfqq_expire - expire a queue. * @bfqd: device owning the queue. @@ -3291,6 +3293,8 @@ void bfq_bfqq_expire(struct bfq_data *bfqd, if (ref == 1) /* bfqq is gone, no more actions on it */ return; + bfqq->injected_service = 0; + /* mark bfqq as waiting a request only if a bic still points to it */ if (!bfq_bfqq_busy(bfqq) && reason != BFQQE_BUDGET_TIMEOUT && @@ -3497,9 +3501,11 @@ static bool bfq_better_to_idle(struct bfq_queue *bfqq) * symmetric scenario where: * (i) each of these processes must get the same throughput as * the others; - * (ii) all these processes have the same I/O pattern - (either sequential or random). - * In fact, in such a scenario, the drive will tend to treat + * (ii) the I/O of each process has the same properties, in + * terms of locality (sequential or random), direction + * (reads or writes), request sizes, greediness + * (from I/O-bound to sporadic), and so on. + * In fact, in such a scenario, the drive tends to treat * the requests of each of these processes in about the same * way as the requests of the others, and thus to provide * each of these processes with about the same throughput @@ -3508,18 +3514,50 @@ static bool bfq_better_to_idle(struct bfq_queue *bfqq) * certainly needed to guarantee that bfqq receives its * assigned fraction of the device throughput (see [1] for * details). + * The problem is that idling may significantly reduce + * throughput with certain combinations of types of I/O and + * devices. An important example is sync random I/O, on flash + * storage with command queueing. So, unless bfqq falls in the + * above cases where idling also boosts throughput, it would + * be important to check conditions (i) and (ii) accurately, + * so as to avoid idling when not strictly needed for service + * guarantees. + * + * Unfortunately, it is extremely difficult to thoroughly + * check condition (ii). And, in case there are active groups, + * it becomes very difficult to check condition (i) too. In + * fact, if there are active groups, then, for condition (i) + * to become false, it is enough that an active group contains + * more active processes or sub-groups than some other active + * group. We address this issue with the following bi-modal + * behavior, implemented in the function + * bfq_symmetric_scenario(). * - * We address this issue by controlling, actually, only the - * symmetry sub-condition (i), i.e., provided that - * sub-condition (i) holds, idling is not performed, - * regardless of whether sub-condition (ii) holds. In other - * words, only if sub-condition (i) holds, then idling is + * If there are active groups, then the scenario is tagged as + * asymmetric, conservatively, without checking any of the + * conditions (i) and (ii). So the device is idled for bfqq. + * This behavior matches also the fact that groups are created + * exactly if controlling I/O (to preserve bandwidth and + * latency guarantees) is a primary concern. + * + * On the opposite end, if there are no active groups, then + * only condition (i) is actually controlled, i.e., provided + * that condition (i) holds, idling is not performed, + * regardless of whether condition (ii) holds. In other words, + * only if condition (i) does not hold, then idling is * allowed, and the device tends to be prevented from queueing - * many requests, possibly of several processes. The reason - * for not controlling also sub-condition (ii) is that we - * exploit preemption to preserve guarantees in case of - * symmetric scenarios, even if (ii) does not hold, as - * explained in the next two paragraphs. + * many requests, possibly of several processes. Since there + * are no active groups, then, to control condition (i) it is + * enough to check whether all active queues have the same + * weight. + * + * Not checking condition (ii) evidently exposes bfqq to the + * risk of getting less throughput than its fair share. + * However, for queues with the same weight, a further + * mechanism, preemption, mitigates or even eliminates this + * problem. And it does so without consequences on overall + * throughput. This mechanism and its benefits are explained + * in the next three paragraphs. * * Even if a queue, say Q, is expired when it remains idle, Q * can still preempt the new in-service queue if the next @@ -3533,11 +3571,7 @@ static bool bfq_better_to_idle(struct bfq_queue *bfqq) * idling allows the internal queues of the device to contain * many requests, and thus to reorder requests, we can rather * safely assume that the internal scheduler still preserves a - * minimum of mid-term fairness. The motivation for using - * preemption instead of idling is that, by not idling, - * service guarantees are preserved without minimally - * sacrificing throughput. In other words, both a high - * throughput and its desired distribution are obtained. + * minimum of mid-term fairness. * * More precisely, this preemption-based, idleless approach * provides fairness in terms of IOPS, and not sectors per @@ -3556,22 +3590,27 @@ static bool bfq_better_to_idle(struct bfq_queue *bfqq) * 1024/8 times as high as the service received by the other * queue. * - * On the other hand, device idling is performed, and thus - * pure sector-domain guarantees are provided, for the - * following queues, which are likely to need stronger - * throughput guarantees: weight-raised queues, and queues - * with a higher weight than other queues. When such queues - * are active, sub-condition (i) is false, which triggers - * device idling. + * The motivation for using preemption instead of idling (for + * queues with the same weight) is that, by not idling, + * service guarantees are preserved (completely or at least in + * part) without minimally sacrificing throughput. And, if + * there is no active group, then the primary expectation for + * this device is probably a high throughput. * - * According to the above considerations, the next variable is - * true (only) if sub-condition (i) holds. To compute the - * value of this variable, we not only use the return value of - * the function bfq_symmetric_scenario(), but also check - * whether bfqq is being weight-raised, because - * bfq_symmetric_scenario() does not take into account also - * weight-raised queues (see comments on - * bfq_weights_tree_add()). + * We are now left only with explaining the additional + * compound condition that is checked below for deciding + * whether the scenario is asymmetric. To explain this + * compound condition, we need to add that the function + * bfq_symmetric_scenario checks the weights of only + * non-weight-raised queues, for efficiency reasons (see + * comments on bfq_weights_tree_add()). Then the fact that + * bfqq is weight-raised is checked explicitly here. More + * precisely, the compound condition below takes into account + * also the fact that, even if bfqq is being weight-raised, + * the scenario is still symmetric if all active queues happen + * to be weight-raised. Actually, we should be even more + * precise here, and differentiate between interactive weight + * raising and soft real-time weight raising. * * As a side note, it is worth considering that the above * device-idling countermeasures may however fail in the @@ -3583,7 +3622,8 @@ static bool bfq_better_to_idle(struct bfq_queue *bfqq) * to let requests be served in the desired order until all * the requests already queued in the device have been served. */ - asymmetric_scenario = bfqq->wr_coeff > 1 || + asymmetric_scenario = (bfqq->wr_coeff > 1 && + bfqd->wr_busy_queues < bfqd->busy_queues) || !bfq_symmetric_scenario(bfqd); /* @@ -3629,6 +3669,30 @@ static bool bfq_bfqq_must_idle(struct bfq_queue *bfqq) return RB_EMPTY_ROOT(&bfqq->sort_list) && bfq_better_to_idle(bfqq); } +static struct bfq_queue *bfq_choose_bfqq_for_injection(struct bfq_data *bfqd) +{ + struct bfq_queue *bfqq; + + /* + * A linear search; but, with a high probability, very few + * steps are needed to find a candidate queue, i.e., a queue + * with enough budget left for its next request. In fact: + * - BFQ dynamically updates the budget of every queue so as + * to accommodate the expected backlog of the queue; + * - if a queue gets all its requests dispatched as injected + * service, then the queue is removed from the active list + * (and re-added only if it gets new requests, but with + * enough budget for its new backlog). + */ + list_for_each_entry(bfqq, &bfqd->active_list, bfqq_list) + if (!RB_EMPTY_ROOT(&bfqq->sort_list) && + bfq_serv_to_charge(bfqq->next_rq, bfqq) <= + bfq_bfqq_budget_left(bfqq)) + return bfqq; + + return NULL; +} + /* * Select a queue for service. If we have a current queue in service, * check whether to continue servicing it, or retrieve and set a new one. @@ -3710,10 +3774,19 @@ check_queue: * No requests pending. However, if the in-service queue is idling * for a new request, or has requests waiting for a completion and * may idle after their completion, then keep it anyway. + * + * Yet, to boost throughput, inject service from other queues if + * possible. */ if (bfq_bfqq_wait_request(bfqq) || (bfqq->dispatched != 0 && bfq_better_to_idle(bfqq))) { - bfqq = NULL; + if (bfq_bfqq_injectable(bfqq) && + bfqq->injected_service * bfqq->inject_coeff < + bfqq->entity.service * 10) + bfqq = bfq_choose_bfqq_for_injection(bfqd); + else + bfqq = NULL; + goto keep_queue; } @@ -3803,6 +3876,14 @@ static struct request *bfq_dispatch_rq_from_bfqq(struct bfq_data *bfqd, bfq_dispatch_remove(bfqd->queue, rq); + if (bfqq != bfqd->in_service_queue) { + if (likely(bfqd->in_service_queue)) + bfqd->in_service_queue->injected_service += + bfq_serv_to_charge(rq, bfqq); + + goto return_rq; + } + /* * If weight raising has to terminate for bfqq, then next * function causes an immediate update of bfqq's weight, @@ -3821,13 +3902,12 @@ static struct request *bfq_dispatch_rq_from_bfqq(struct bfq_data *bfqd, * belongs to CLASS_IDLE and other queues are waiting for * service. */ - if (bfqd->busy_queues > 1 && bfq_class_idle(bfqq)) - goto expire; - - return rq; + if (!(bfqd->busy_queues > 1 && bfq_class_idle(bfqq))) + goto return_rq; -expire: bfq_bfqq_expire(bfqd, bfqq, false, BFQQE_BUDGET_EXHAUSTED); + +return_rq: return rq; } @@ -4232,6 +4312,13 @@ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, bfq_mark_bfqq_has_short_ttime(bfqq); bfq_mark_bfqq_sync(bfqq); bfq_mark_bfqq_just_created(bfqq); + /* + * Aggressively inject a lot of service: up to 90%. + * This coefficient remains constant during bfqq life, + * but this behavior might be changed, after enough + * testing and tuning. + */ + bfqq->inject_coeff = 1; } else bfq_clear_bfqq_sync(bfqq); @@ -4297,7 +4384,7 @@ static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd, rcu_read_lock(); - bfqg = bfq_find_set_group(bfqd, bio_blkcg(bio)); + bfqg = bfq_find_set_group(bfqd, __bio_blkcg(bio)); if (!bfqg) { bfqq = &bfqd->oom_bfqq; goto out; @@ -5330,7 +5417,7 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e) bfqd->idle_slice_timer.function = bfq_idle_slice_timer; bfqd->queue_weights_tree = RB_ROOT; - bfqd->group_weights_tree = RB_ROOT; + bfqd->num_active_groups = 0; INIT_LIST_HEAD(&bfqd->active_list); INIT_LIST_HEAD(&bfqd->idle_list); diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h index a8a2e5aca4d4..77651d817ecd 100644 --- a/block/bfq-iosched.h +++ b/block/bfq-iosched.h @@ -108,15 +108,14 @@ struct bfq_sched_data { }; /** - * struct bfq_weight_counter - counter of the number of all active entities + * struct bfq_weight_counter - counter of the number of all active queues * with a given weight. */ struct bfq_weight_counter { - unsigned int weight; /* weight of the entities this counter refers to */ - unsigned int num_active; /* nr of active entities with this weight */ + unsigned int weight; /* weight of the queues this counter refers to */ + unsigned int num_active; /* nr of active queues with this weight */ /* - * Weights tree member (see bfq_data's @queue_weights_tree and - * @group_weights_tree) + * Weights tree member (see bfq_data's @queue_weights_tree) */ struct rb_node weights_node; }; @@ -151,8 +150,6 @@ struct bfq_weight_counter { struct bfq_entity { /* service_tree member */ struct rb_node rb_node; - /* pointer to the weight counter associated with this entity */ - struct bfq_weight_counter *weight_counter; /* * Flag, true if the entity is on a tree (either the active or @@ -266,6 +263,9 @@ struct bfq_queue { /* entity representing this queue in the scheduler */ struct bfq_entity entity; + /* pointer to the weight counter associated with this entity */ + struct bfq_weight_counter *weight_counter; + /* maximum budget allowed from the feedback mechanism */ int max_budget; /* budget expiration (in jiffies) */ @@ -351,6 +351,32 @@ struct bfq_queue { unsigned long split_time; /* time of last split */ unsigned long first_IO_time; /* time of first I/O for this queue */ + + /* max service rate measured so far */ + u32 max_service_rate; + /* + * Ratio between the service received by bfqq while it is in + * service, and the cumulative service (of requests of other + * queues) that may be injected while bfqq is empty but still + * in service. To increase precision, the coefficient is + * measured in tenths of unit. Here are some example of (1) + * ratios, (2) resulting percentages of service injected + * w.r.t. to the total service dispatched while bfqq is in + * service, and (3) corresponding values of the coefficient: + * 1 (50%) -> 10 + * 2 (33%) -> 20 + * 10 (9%) -> 100 + * 9.9 (9%) -> 99 + * 1.5 (40%) -> 15 + * 0.5 (66%) -> 5 + * 0.1 (90%) -> 1 + * + * So, if the coefficient is lower than 10, then + * injected service is more than bfqq service. + */ + unsigned int inject_coeff; + /* amount of service injected in current service slot */ + unsigned int injected_service; }; /** @@ -423,14 +449,9 @@ struct bfq_data { */ struct rb_root queue_weights_tree; /* - * rbtree of non-queue @bfq_entity weight counters, sorted by - * weight. Used to keep track of whether all @bfq_groups have - * the same weight. The tree contains one counter for each - * distinct weight associated to some active @bfq_group (see - * the comments to the functions bfq_weights_tree_[add|remove] - * for further details). + * number of groups with requests still waiting for completion */ - struct rb_root group_weights_tree; + unsigned int num_active_groups; /* * Number of bfq_queues containing requests (including the @@ -825,10 +846,10 @@ struct bfq_queue *bic_to_bfqq(struct bfq_io_cq *bic, bool is_sync); void bic_set_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq, bool is_sync); struct bfq_data *bic_to_bfqd(struct bfq_io_cq *bic); void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq); -void bfq_weights_tree_add(struct bfq_data *bfqd, struct bfq_entity *entity, +void bfq_weights_tree_add(struct bfq_data *bfqd, struct bfq_queue *bfqq, struct rb_root *root); void __bfq_weights_tree_remove(struct bfq_data *bfqd, - struct bfq_entity *entity, + struct bfq_queue *bfqq, struct rb_root *root); void bfq_weights_tree_remove(struct bfq_data *bfqd, struct bfq_queue *bfqq); diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c index ae52bff43ce4..476b5a90a5a4 100644 --- a/block/bfq-wf2q.c +++ b/block/bfq-wf2q.c @@ -788,25 +788,29 @@ __bfq_entity_update_weight_prio(struct bfq_service_tree *old_st, new_weight = entity->orig_weight * (bfqq ? bfqq->wr_coeff : 1); /* - * If the weight of the entity changes, remove the entity - * from its old weight counter (if there is a counter - * associated with the entity), and add it to the counter - * associated with its new weight. + * If the weight of the entity changes, and the entity is a + * queue, remove the entity from its old weight counter (if + * there is a counter associated with the entity). */ if (prev_weight != new_weight) { - root = bfqq ? &bfqd->queue_weights_tree : - &bfqd->group_weights_tree; - __bfq_weights_tree_remove(bfqd, entity, root); + if (bfqq) { + root = &bfqd->queue_weights_tree; + __bfq_weights_tree_remove(bfqd, bfqq, root); + } else + bfqd->num_active_groups--; } entity->weight = new_weight; /* - * Add the entity to its weights tree only if it is - * not associated with a weight-raised queue. + * Add the entity, if it is not a weight-raised queue, + * to the counter associated with its new weight. */ - if (prev_weight != new_weight && - (bfqq ? bfqq->wr_coeff == 1 : 1)) - /* If we get here, root has been initialized. */ - bfq_weights_tree_add(bfqd, entity, root); + if (prev_weight != new_weight) { + if (bfqq && bfqq->wr_coeff == 1) { + /* If we get here, root has been initialized. */ + bfq_weights_tree_add(bfqd, bfqq, root); + } else + bfqd->num_active_groups++; + } new_st->wsum += entity->weight; @@ -1012,9 +1016,9 @@ static void __bfq_activate_entity(struct bfq_entity *entity, if (!bfq_entity_to_bfqq(entity)) { /* bfq_group */ struct bfq_group *bfqg = container_of(entity, struct bfq_group, entity); + struct bfq_data *bfqd = bfqg->bfqd; - bfq_weights_tree_add(bfqg->bfqd, entity, - &bfqd->group_weights_tree); + bfqd->num_active_groups++; } #endif @@ -1181,10 +1185,17 @@ bool __bfq_deactivate_entity(struct bfq_entity *entity, bool ins_into_idle_tree) st = bfq_entity_service_tree(entity); is_in_service = entity == sd->in_service_entity; - if (is_in_service) { - bfq_calc_finish(entity, entity->service); + bfq_calc_finish(entity, entity->service); + + if (is_in_service) sd->in_service_entity = NULL; - } + else + /* + * Non in-service entity: nobody will take care of + * resetting its service counter on expiration. Do it + * now. + */ + entity->service = 0; if (entity->tree == &st->active) bfq_active_extract(st, entity); @@ -1685,7 +1696,7 @@ void bfq_add_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq) if (!bfqq->dispatched) if (bfqq->wr_coeff == 1) - bfq_weights_tree_add(bfqd, &bfqq->entity, + bfq_weights_tree_add(bfqd, bfqq, &bfqd->queue_weights_tree); if (bfqq->wr_coeff > 1) diff --git a/block/bio-integrity.c b/block/bio-integrity.c index 67b5fb861a51..290af497997b 100644 --- a/block/bio-integrity.c +++ b/block/bio-integrity.c @@ -306,6 +306,8 @@ bool bio_integrity_prep(struct bio *bio) if (bio_data_dir(bio) == WRITE) { bio_integrity_process(bio, &bio->bi_iter, bi->profile->generate_fn); + } else { + bip->bio_iter = bio->bi_iter; } return true; @@ -331,20 +333,14 @@ static void bio_integrity_verify_fn(struct work_struct *work) container_of(work, struct bio_integrity_payload, bip_work); struct bio *bio = bip->bip_bio; struct blk_integrity *bi = blk_get_integrity(bio->bi_disk); - struct bvec_iter iter = bio->bi_iter; /* * At the moment verify is called bio's iterator was advanced * during split and completion, we need to rewind iterator to * it's original position. */ - if (bio_rewind_iter(bio, &iter, iter.bi_done)) { - bio->bi_status = bio_integrity_process(bio, &iter, - bi->profile->verify_fn); - } else { - bio->bi_status = BLK_STS_IOERR; - } - + bio->bi_status = bio_integrity_process(bio, &bip->bio_iter, + bi->profile->verify_fn); bio_integrity_free(bio); bio_endio(bio); } diff --git a/block/bio.c b/block/bio.c index 0093bed81c0e..bbfeb4ee2892 100644 --- a/block/bio.c +++ b/block/bio.c @@ -609,7 +609,9 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src) bio->bi_iter = bio_src->bi_iter; bio->bi_io_vec = bio_src->bi_io_vec; - bio_clone_blkcg_association(bio, bio_src); + bio_clone_blkg_association(bio, bio_src); + + blkcg_bio_issue_init(bio); } EXPORT_SYMBOL(__bio_clone_fast); @@ -729,7 +731,7 @@ int bio_add_pc_page(struct request_queue *q, struct bio *bio, struct page } /* If we may be able to merge these biovecs, force a recount */ - if (bio->bi_vcnt > 1 && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec))) + if (bio->bi_vcnt > 1 && biovec_phys_mergeable(q, bvec - 1, bvec)) bio_clear_flag(bio, BIO_SEG_VALID); done: @@ -827,6 +829,8 @@ int bio_add_page(struct bio *bio, struct page *page, } EXPORT_SYMBOL(bio_add_page); +#define PAGE_PTRS_PER_BVEC (sizeof(struct bio_vec) / sizeof(struct page *)) + /** * __bio_iov_iter_get_pages - pin user or kernel pages and add them to a bio * @bio: bio to add pages to @@ -839,38 +843,35 @@ EXPORT_SYMBOL(bio_add_page); */ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) { - unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt, idx; + unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt; + unsigned short entries_left = bio->bi_max_vecs - bio->bi_vcnt; struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt; struct page **pages = (struct page **)bv; + ssize_t size, left; + unsigned len, i; size_t offset; - ssize_t size; + + /* + * Move page array up in the allocated memory for the bio vecs as far as + * possible so that we can start filling biovecs from the beginning + * without overwriting the temporary page array. + */ + BUILD_BUG_ON(PAGE_PTRS_PER_BVEC < 2); + pages += entries_left * (PAGE_PTRS_PER_BVEC - 1); size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset); if (unlikely(size <= 0)) return size ? size : -EFAULT; - idx = nr_pages = (size + offset + PAGE_SIZE - 1) / PAGE_SIZE; - /* - * Deep magic below: We need to walk the pinned pages backwards - * because we are abusing the space allocated for the bio_vecs - * for the page array. Because the bio_vecs are larger than the - * page pointers by definition this will always work. But it also - * means we can't use bio_add_page, so any changes to it's semantics - * need to be reflected here as well. - */ - bio->bi_iter.bi_size += size; - bio->bi_vcnt += nr_pages; + for (left = size, i = 0; left > 0; left -= len, i++) { + struct page *page = pages[i]; - while (idx--) { - bv[idx].bv_page = pages[idx]; - bv[idx].bv_len = PAGE_SIZE; - bv[idx].bv_offset = 0; + len = min_t(size_t, PAGE_SIZE - offset, left); + if (WARN_ON_ONCE(bio_add_page(bio, page, len, offset) != len)) + return -EINVAL; + offset = 0; } - bv[0].bv_offset += offset; - bv[0].bv_len -= offset; - bv[nr_pages - 1].bv_len -= nr_pages * PAGE_SIZE - offset - size; - iov_iter_advance(iter, size); return 0; } @@ -1807,7 +1808,6 @@ struct bio *bio_split(struct bio *bio, int sectors, bio_integrity_trim(split); bio_advance(bio, split->bi_iter.bi_size); - bio->bi_iter.bi_done = 0; if (bio_flagged(bio, BIO_TRACE_COMPLETION)) bio_set_flag(split, BIO_TRACE_COMPLETION); @@ -1956,69 +1956,151 @@ EXPORT_SYMBOL(bioset_init_from_src); #ifdef CONFIG_BLK_CGROUP +/** + * bio_associate_blkg - associate a bio with the a blkg + * @bio: target bio + * @blkg: the blkg to associate + * + * This tries to associate @bio with the specified blkg. Association failure + * is handled by walking up the blkg tree. Therefore, the blkg associated can + * be anything between @blkg and the root_blkg. This situation only happens + * when a cgroup is dying and then the remaining bios will spill to the closest + * alive blkg. + * + * A reference will be taken on the @blkg and will be released when @bio is + * freed. + */ +int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg) +{ + if (unlikely(bio->bi_blkg)) + return -EBUSY; + bio->bi_blkg = blkg_tryget_closest(blkg); + return 0; +} + +/** + * __bio_associate_blkg_from_css - internal blkg association function + * + * This in the core association function that all association paths rely on. + * A blkg reference is taken which is released upon freeing of the bio. + */ +static int __bio_associate_blkg_from_css(struct bio *bio, + struct cgroup_subsys_state *css) +{ + struct request_queue *q = bio->bi_disk->queue; + struct blkcg_gq *blkg; + int ret; + + rcu_read_lock(); + + if (!css || !css->parent) + blkg = q->root_blkg; + else + blkg = blkg_lookup_create(css_to_blkcg(css), q); + + ret = bio_associate_blkg(bio, blkg); + + rcu_read_unlock(); + return ret; +} + +/** + * bio_associate_blkg_from_css - associate a bio with a specified css + * @bio: target bio + * @css: target css + * + * Associate @bio with the blkg found by combining the css's blkg and the + * request_queue of the @bio. This falls back to the queue's root_blkg if + * the association fails with the css. + */ +int bio_associate_blkg_from_css(struct bio *bio, + struct cgroup_subsys_state *css) +{ + if (unlikely(bio->bi_blkg)) + return -EBUSY; + return __bio_associate_blkg_from_css(bio, css); +} +EXPORT_SYMBOL_GPL(bio_associate_blkg_from_css); + #ifdef CONFIG_MEMCG /** - * bio_associate_blkcg_from_page - associate a bio with the page's blkcg + * bio_associate_blkg_from_page - associate a bio with the page's blkg * @bio: target bio * @page: the page to lookup the blkcg from * - * Associate @bio with the blkcg from @page's owning memcg. This works like - * every other associate function wrt references. + * Associate @bio with the blkg from @page's owning memcg and the respective + * request_queue. If cgroup_e_css returns NULL, fall back to the queue's + * root_blkg. + * + * Note: this must be called after bio has an associated device. */ -int bio_associate_blkcg_from_page(struct bio *bio, struct page *page) +int bio_associate_blkg_from_page(struct bio *bio, struct page *page) { - struct cgroup_subsys_state *blkcg_css; + struct cgroup_subsys_state *css; + int ret; - if (unlikely(bio->bi_css)) + if (unlikely(bio->bi_blkg)) return -EBUSY; if (!page->mem_cgroup) return 0; - blkcg_css = cgroup_get_e_css(page->mem_cgroup->css.cgroup, - &io_cgrp_subsys); - bio->bi_css = blkcg_css; - return 0; + + rcu_read_lock(); + + css = cgroup_e_css(page->mem_cgroup->css.cgroup, &io_cgrp_subsys); + + ret = __bio_associate_blkg_from_css(bio, css); + + rcu_read_unlock(); + return ret; } #endif /* CONFIG_MEMCG */ /** - * bio_associate_blkcg - associate a bio with the specified blkcg + * bio_associate_create_blkg - associate a bio with a blkg from q + * @q: request_queue where bio is going * @bio: target bio - * @blkcg_css: css of the blkcg to associate - * - * Associate @bio with the blkcg specified by @blkcg_css. Block layer will - * treat @bio as if it were issued by a task which belongs to the blkcg. * - * This function takes an extra reference of @blkcg_css which will be put - * when @bio is released. The caller must own @bio and is responsible for - * synchronizing calls to this function. + * Associate @bio with the blkg found from the bio's css and the request_queue. + * If one is not found, bio_lookup_blkg creates the blkg. This falls back to + * the queue's root_blkg if association fails. */ -int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css) +int bio_associate_create_blkg(struct request_queue *q, struct bio *bio) { - if (unlikely(bio->bi_css)) - return -EBUSY; - css_get(blkcg_css); - bio->bi_css = blkcg_css; - return 0; + struct cgroup_subsys_state *css; + int ret = 0; + + /* someone has already associated this bio with a blkg */ + if (bio->bi_blkg) + return ret; + + rcu_read_lock(); + + css = blkcg_css(); + + ret = __bio_associate_blkg_from_css(bio, css); + + rcu_read_unlock(); + return ret; } -EXPORT_SYMBOL_GPL(bio_associate_blkcg); /** - * bio_associate_blkg - associate a bio with the specified blkg + * bio_reassociate_blkg - reassociate a bio with a blkg from q + * @q: request_queue where bio is going * @bio: target bio - * @blkg: the blkg to associate * - * Associate @bio with the blkg specified by @blkg. This is the queue specific - * blkcg information associated with the @bio, a reference will be taken on the - * @blkg and will be freed when the bio is freed. + * When submitting a bio, multiple recursive calls to make_request() may occur. + * This causes the initial associate done in blkcg_bio_issue_check() to be + * incorrect and reference the prior request_queue. This performs reassociation + * when this situation happens. */ -int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg) +int bio_reassociate_blkg(struct request_queue *q, struct bio *bio) { - if (unlikely(bio->bi_blkg)) - return -EBUSY; - if (!blkg_try_get(blkg)) - return -ENODEV; - bio->bi_blkg = blkg; - return 0; + if (bio->bi_blkg) { + blkg_put(bio->bi_blkg); + bio->bi_blkg = NULL; + } + + return bio_associate_create_blkg(q, bio); } /** @@ -2031,10 +2113,6 @@ void bio_disassociate_task(struct bio *bio) put_io_context(bio->bi_ioc); bio->bi_ioc = NULL; } - if (bio->bi_css) { - css_put(bio->bi_css); - bio->bi_css = NULL; - } if (bio->bi_blkg) { blkg_put(bio->bi_blkg); bio->bi_blkg = NULL; @@ -2042,16 +2120,16 @@ void bio_disassociate_task(struct bio *bio) } /** - * bio_clone_blkcg_association - clone blkcg association from src to dst bio + * bio_clone_blkg_association - clone blkg association from src to dst bio * @dst: destination bio * @src: source bio */ -void bio_clone_blkcg_association(struct bio *dst, struct bio *src) +void bio_clone_blkg_association(struct bio *dst, struct bio *src) { - if (src->bi_css) - WARN_ON(bio_associate_blkcg(dst, src->bi_css)); + if (src->bi_blkg) + bio_associate_blkg(dst, src->bi_blkg); } -EXPORT_SYMBOL_GPL(bio_clone_blkcg_association); +EXPORT_SYMBOL_GPL(bio_clone_blkg_association); #endif /* CONFIG_BLK_CGROUP */ static void __init biovec_init_slabs(void) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index c630e02836a8..992da5592c6e 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -84,6 +84,37 @@ static void blkg_free(struct blkcg_gq *blkg) kfree(blkg); } +static void __blkg_release(struct rcu_head *rcu) +{ + struct blkcg_gq *blkg = container_of(rcu, struct blkcg_gq, rcu_head); + + percpu_ref_exit(&blkg->refcnt); + + /* release the blkcg and parent blkg refs this blkg has been holding */ + css_put(&blkg->blkcg->css); + if (blkg->parent) + blkg_put(blkg->parent); + + wb_congested_put(blkg->wb_congested); + + blkg_free(blkg); +} + +/* + * A group is RCU protected, but having an rcu lock does not mean that one + * can access all the fields of blkg and assume these are valid. For + * example, don't try to follow throtl_data and request queue links. + * + * Having a reference to blkg under an rcu allows accesses to only values + * local to groups like group stats and group rate limits. + */ +static void blkg_release(struct percpu_ref *ref) +{ + struct blkcg_gq *blkg = container_of(ref, struct blkcg_gq, refcnt); + + call_rcu(&blkg->rcu_head, __blkg_release); +} + /** * blkg_alloc - allocate a blkg * @blkcg: block cgroup the new blkg is associated with @@ -110,7 +141,6 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q, blkg->q = q; INIT_LIST_HEAD(&blkg->q_node); blkg->blkcg = blkcg; - atomic_set(&blkg->refcnt, 1); /* root blkg uses @q->root_rl, init rl only for !root blkgs */ if (blkcg != &blkcg_root) { @@ -217,6 +247,11 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg, blkg_get(blkg->parent); } + ret = percpu_ref_init(&blkg->refcnt, blkg_release, 0, + GFP_NOWAIT | __GFP_NOWARN); + if (ret) + goto err_cancel_ref; + /* invoke per-policy init */ for (i = 0; i < BLKCG_MAX_POLS; i++) { struct blkcg_policy *pol = blkcg_policy[i]; @@ -249,6 +284,8 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg, blkg_put(blkg); return ERR_PTR(ret); +err_cancel_ref: + percpu_ref_exit(&blkg->refcnt); err_put_congested: wb_congested_put(wb_congested); err_put_css: @@ -259,7 +296,7 @@ err_free_blkg: } /** - * blkg_lookup_create - lookup blkg, try to create one if not there + * __blkg_lookup_create - lookup blkg, try to create one if not there * @blkcg: blkcg of interest * @q: request_queue of interest * @@ -268,12 +305,11 @@ err_free_blkg: * that all non-root blkg's have access to the parent blkg. This function * should be called under RCU read lock and @q->queue_lock. * - * Returns pointer to the looked up or created blkg on success, ERR_PTR() - * value on error. If @q is dead, returns ERR_PTR(-EINVAL). If @q is not - * dead and bypassing, returns ERR_PTR(-EBUSY). + * Returns the blkg or the closest blkg if blkg_create fails as it walks + * down from root. */ -struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, - struct request_queue *q) +struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg, + struct request_queue *q) { struct blkcg_gq *blkg; @@ -285,7 +321,7 @@ struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, * we shouldn't allow anything to go through for a bypassing queue. */ if (unlikely(blk_queue_bypass(q))) - return ERR_PTR(blk_queue_dying(q) ? -ENODEV : -EBUSY); + return q->root_blkg; blkg = __blkg_lookup(blkcg, q, true); if (blkg) @@ -293,23 +329,58 @@ struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, /* * Create blkgs walking down from blkcg_root to @blkcg, so that all - * non-root blkgs have access to their parents. + * non-root blkgs have access to their parents. Returns the closest + * blkg to the intended blkg should blkg_create() fail. */ while (true) { struct blkcg *pos = blkcg; struct blkcg *parent = blkcg_parent(blkcg); - - while (parent && !__blkg_lookup(parent, q, false)) { + struct blkcg_gq *ret_blkg = q->root_blkg; + + while (parent) { + blkg = __blkg_lookup(parent, q, false); + if (blkg) { + /* remember closest blkg */ + ret_blkg = blkg; + break; + } pos = parent; parent = blkcg_parent(parent); } blkg = blkg_create(pos, q, NULL); - if (pos == blkcg || IS_ERR(blkg)) + if (IS_ERR(blkg)) + return ret_blkg; + if (pos == blkcg) return blkg; } } +/** + * blkg_lookup_create - find or create a blkg + * @blkcg: target block cgroup + * @q: target request_queue + * + * This looks up or creates the blkg representing the unique pair + * of the blkcg and the request_queue. + */ +struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, + struct request_queue *q) +{ + struct blkcg_gq *blkg = blkg_lookup(blkcg, q); + unsigned long flags; + + if (unlikely(!blkg)) { + spin_lock_irqsave(q->queue_lock, flags); + + blkg = __blkg_lookup_create(blkcg, q); + + spin_unlock_irqrestore(q->queue_lock, flags); + } + + return blkg; +} + static void blkg_destroy(struct blkcg_gq *blkg) { struct blkcg *blkcg = blkg->blkcg; @@ -353,7 +424,7 @@ static void blkg_destroy(struct blkcg_gq *blkg) * Put the reference taken at the time of creation so that when all * queues are gone, group can be destroyed. */ - blkg_put(blkg); + percpu_ref_kill(&blkg->refcnt); } /** @@ -381,29 +452,6 @@ static void blkg_destroy_all(struct request_queue *q) } /* - * A group is RCU protected, but having an rcu lock does not mean that one - * can access all the fields of blkg and assume these are valid. For - * example, don't try to follow throtl_data and request queue links. - * - * Having a reference to blkg under an rcu allows accesses to only values - * local to groups like group stats and group rate limits. - */ -void __blkg_release_rcu(struct rcu_head *rcu_head) -{ - struct blkcg_gq *blkg = container_of(rcu_head, struct blkcg_gq, rcu_head); - - /* release the blkcg and parent blkg refs this blkg has been holding */ - css_put(&blkg->blkcg->css); - if (blkg->parent) - blkg_put(blkg->parent); - - wb_congested_put(blkg->wb_congested); - - blkg_free(blkg); -} -EXPORT_SYMBOL_GPL(__blkg_release_rcu); - -/* * The next function used by blk_queue_for_each_rl(). It's a bit tricky * because the root blkg uses @q->root_rl instead of its own rl. */ @@ -1748,8 +1796,7 @@ void blkcg_maybe_throttle_current(void) blkg = blkg_lookup(blkcg, q); if (!blkg) goto out; - blkg = blkg_try_get(blkg); - if (!blkg) + if (!blkg_tryget(blkg)) goto out; rcu_read_unlock(); diff --git a/block/blk-core.c b/block/blk-core.c index cff0a60ee200..3ed60723e242 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -42,6 +42,7 @@ #include "blk.h" #include "blk-mq.h" #include "blk-mq-sched.h" +#include "blk-pm.h" #include "blk-rq-qos.h" #ifdef CONFIG_DEBUG_FS @@ -421,24 +422,25 @@ void blk_sync_queue(struct request_queue *q) EXPORT_SYMBOL(blk_sync_queue); /** - * blk_set_preempt_only - set QUEUE_FLAG_PREEMPT_ONLY + * blk_set_pm_only - increment pm_only counter * @q: request queue pointer - * - * Returns the previous value of the PREEMPT_ONLY flag - 0 if the flag was not - * set and 1 if the flag was already set. */ -int blk_set_preempt_only(struct request_queue *q) +void blk_set_pm_only(struct request_queue *q) { - return blk_queue_flag_test_and_set(QUEUE_FLAG_PREEMPT_ONLY, q); + atomic_inc(&q->pm_only); } -EXPORT_SYMBOL_GPL(blk_set_preempt_only); +EXPORT_SYMBOL_GPL(blk_set_pm_only); -void blk_clear_preempt_only(struct request_queue *q) +void blk_clear_pm_only(struct request_queue *q) { - blk_queue_flag_clear(QUEUE_FLAG_PREEMPT_ONLY, q); - wake_up_all(&q->mq_freeze_wq); + int pm_only; + + pm_only = atomic_dec_return(&q->pm_only); + WARN_ON_ONCE(pm_only < 0); + if (pm_only == 0) + wake_up_all(&q->mq_freeze_wq); } -EXPORT_SYMBOL_GPL(blk_clear_preempt_only); +EXPORT_SYMBOL_GPL(blk_clear_pm_only); /** * __blk_run_queue_uncond - run a queue whether or not it has been stopped @@ -917,7 +919,7 @@ EXPORT_SYMBOL(blk_alloc_queue); */ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags) { - const bool preempt = flags & BLK_MQ_REQ_PREEMPT; + const bool pm = flags & BLK_MQ_REQ_PREEMPT; while (true) { bool success = false; @@ -925,11 +927,11 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags) rcu_read_lock(); if (percpu_ref_tryget_live(&q->q_usage_counter)) { /* - * The code that sets the PREEMPT_ONLY flag is - * responsible for ensuring that that flag is globally - * visible before the queue is unfrozen. + * The code that increments the pm_only counter is + * responsible for ensuring that that counter is + * globally visible before the queue is unfrozen. */ - if (preempt || !blk_queue_preempt_only(q)) { + if (pm || !blk_queue_pm_only(q)) { success = true; } else { percpu_ref_put(&q->q_usage_counter); @@ -954,7 +956,8 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags) wait_event(q->mq_freeze_wq, (atomic_read(&q->mq_freeze_depth) == 0 && - (preempt || !blk_queue_preempt_only(q))) || + (pm || (blk_pm_request_resume(q), + !blk_queue_pm_only(q)))) || blk_queue_dying(q)); if (blk_queue_dying(q)) return -ENODEV; @@ -1051,8 +1054,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id, mutex_init(&q->sysfs_lock); spin_lock_init(&q->__queue_lock); - if (!q->mq_ops) - q->queue_lock = lock ? : &q->__queue_lock; + q->queue_lock = lock ? : &q->__queue_lock; /* * A queue starts its life with bypass turned on to avoid @@ -1160,7 +1162,7 @@ int blk_init_allocated_queue(struct request_queue *q) { WARN_ON_ONCE(q->mq_ops); - q->fq = blk_alloc_flush_queue(q, NUMA_NO_NODE, q->cmd_size); + q->fq = blk_alloc_flush_queue(q, NUMA_NO_NODE, q->cmd_size, GFP_KERNEL); if (!q->fq) return -ENOMEM; @@ -1726,16 +1728,6 @@ void part_round_stats(struct request_queue *q, int cpu, struct hd_struct *part) } EXPORT_SYMBOL_GPL(part_round_stats); -#ifdef CONFIG_PM -static void blk_pm_put_request(struct request *rq) -{ - if (rq->q->dev && !(rq->rq_flags & RQF_PM) && !--rq->q->nr_pending) - pm_runtime_mark_last_busy(rq->q->dev); -} -#else -static inline void blk_pm_put_request(struct request *rq) {} -#endif - void __blk_put_request(struct request_queue *q, struct request *req) { req_flags_t rq_flags = req->rq_flags; @@ -1752,6 +1744,7 @@ void __blk_put_request(struct request_queue *q, struct request *req) blk_req_zone_write_unlock(req); blk_pm_put_request(req); + blk_pm_mark_last_busy(req); elv_completed_request(q, req); @@ -2440,6 +2433,7 @@ blk_qc_t generic_make_request(struct bio *bio) if (q) blk_queue_exit(q); q = bio->bi_disk->queue; + bio_reassociate_blkg(q, bio); flags = 0; if (bio->bi_opf & REQ_NOWAIT) flags = BLK_MQ_REQ_NOWAIT; @@ -2750,30 +2744,6 @@ void blk_account_io_done(struct request *req, u64 now) } } -#ifdef CONFIG_PM -/* - * Don't process normal requests when queue is suspended - * or in the process of suspending/resuming - */ -static bool blk_pm_allow_request(struct request *rq) -{ - switch (rq->q->rpm_status) { - case RPM_RESUMING: - case RPM_SUSPENDING: - return rq->rq_flags & RQF_PM; - case RPM_SUSPENDED: - return false; - default: - return true; - } -} -#else -static bool blk_pm_allow_request(struct request *rq) -{ - return true; -} -#endif - void blk_account_io_start(struct request *rq, bool new_io) { struct hd_struct *part; @@ -2819,11 +2789,14 @@ static struct request *elv_next_request(struct request_queue *q) while (1) { list_for_each_entry(rq, &q->queue_head, queuelist) { - if (blk_pm_allow_request(rq)) - return rq; - - if (rq->rq_flags & RQF_SOFTBARRIER) - break; +#ifdef CONFIG_PM + /* + * If a request gets queued in state RPM_SUSPENDED + * then that's a kernel bug. + */ + WARN_ON_ONCE(q->rpm_status == RPM_SUSPENDED); +#endif + return rq; } /* @@ -3755,191 +3728,6 @@ void blk_finish_plug(struct blk_plug *plug) } EXPORT_SYMBOL(blk_finish_plug); -#ifdef CONFIG_PM -/** - * blk_pm_runtime_init - Block layer runtime PM initialization routine - * @q: the queue of the device - * @dev: the device the queue belongs to - * - * Description: - * Initialize runtime-PM-related fields for @q and start auto suspend for - * @dev. Drivers that want to take advantage of request-based runtime PM - * should call this function after @dev has been initialized, and its - * request queue @q has been allocated, and runtime PM for it can not happen - * yet(either due to disabled/forbidden or its usage_count > 0). In most - * cases, driver should call this function before any I/O has taken place. - * - * This function takes care of setting up using auto suspend for the device, - * the autosuspend delay is set to -1 to make runtime suspend impossible - * until an updated value is either set by user or by driver. Drivers do - * not need to touch other autosuspend settings. - * - * The block layer runtime PM is request based, so only works for drivers - * that use request as their IO unit instead of those directly use bio's. - */ -void blk_pm_runtime_init(struct request_queue *q, struct device *dev) -{ - /* Don't enable runtime PM for blk-mq until it is ready */ - if (q->mq_ops) { - pm_runtime_disable(dev); - return; - } - - q->dev = dev; - q->rpm_status = RPM_ACTIVE; - pm_runtime_set_autosuspend_delay(q->dev, -1); - pm_runtime_use_autosuspend(q->dev); -} -EXPORT_SYMBOL(blk_pm_runtime_init); - -/** - * blk_pre_runtime_suspend - Pre runtime suspend check - * @q: the queue of the device - * - * Description: - * This function will check if runtime suspend is allowed for the device - * by examining if there are any requests pending in the queue. If there - * are requests pending, the device can not be runtime suspended; otherwise, - * the queue's status will be updated to SUSPENDING and the driver can - * proceed to suspend the device. - * - * For the not allowed case, we mark last busy for the device so that - * runtime PM core will try to autosuspend it some time later. - * - * This function should be called near the start of the device's - * runtime_suspend callback. - * - * Return: - * 0 - OK to runtime suspend the device - * -EBUSY - Device should not be runtime suspended - */ -int blk_pre_runtime_suspend(struct request_queue *q) -{ - int ret = 0; - - if (!q->dev) - return ret; - - spin_lock_irq(q->queue_lock); - if (q->nr_pending) { - ret = -EBUSY; - pm_runtime_mark_last_busy(q->dev); - } else { - q->rpm_status = RPM_SUSPENDING; - } - spin_unlock_irq(q->queue_lock); - return ret; -} -EXPORT_SYMBOL(blk_pre_runtime_suspend); - -/** - * blk_post_runtime_suspend - Post runtime suspend processing - * @q: the queue of the device - * @err: return value of the device's runtime_suspend function - * - * Description: - * Update the queue's runtime status according to the return value of the - * device's runtime suspend function and mark last busy for the device so - * that PM core will try to auto suspend the device at a later time. - * - * This function should be called near the end of the device's - * runtime_suspend callback. - */ -void blk_post_runtime_suspend(struct request_queue *q, int err) -{ - if (!q->dev) - return; - - spin_lock_irq(q->queue_lock); - if (!err) { - q->rpm_status = RPM_SUSPENDED; - } else { - q->rpm_status = RPM_ACTIVE; - pm_runtime_mark_last_busy(q->dev); - } - spin_unlock_irq(q->queue_lock); -} -EXPORT_SYMBOL(blk_post_runtime_suspend); - -/** - * blk_pre_runtime_resume - Pre runtime resume processing - * @q: the queue of the device - * - * Description: - * Update the queue's runtime status to RESUMING in preparation for the - * runtime resume of the device. - * - * This function should be called near the start of the device's - * runtime_resume callback. - */ -void blk_pre_runtime_resume(struct request_queue *q) -{ - if (!q->dev) - return; - - spin_lock_irq(q->queue_lock); - q->rpm_status = RPM_RESUMING; - spin_unlock_irq(q->queue_lock); -} -EXPORT_SYMBOL(blk_pre_runtime_resume); - -/** - * blk_post_runtime_resume - Post runtime resume processing - * @q: the queue of the device - * @err: return value of the device's runtime_resume function - * - * Description: - * Update the queue's runtime status according to the return value of the - * device's runtime_resume function. If it is successfully resumed, process - * the requests that are queued into the device's queue when it is resuming - * and then mark last busy and initiate autosuspend for it. - * - * This function should be called near the end of the device's - * runtime_resume callback. - */ -void blk_post_runtime_resume(struct request_queue *q, int err) -{ - if (!q->dev) - return; - - spin_lock_irq(q->queue_lock); - if (!err) { - q->rpm_status = RPM_ACTIVE; - __blk_run_queue(q); - pm_runtime_mark_last_busy(q->dev); - pm_request_autosuspend(q->dev); - } else { - q->rpm_status = RPM_SUSPENDED; - } - spin_unlock_irq(q->queue_lock); -} -EXPORT_SYMBOL(blk_post_runtime_resume); - -/** - * blk_set_runtime_active - Force runtime status of the queue to be active - * @q: the queue of the device - * - * If the device is left runtime suspended during system suspend the resume - * hook typically resumes the device and corrects runtime status - * accordingly. However, that does not affect the queue runtime PM status - * which is still "suspended". This prevents processing requests from the - * queue. - * - * This function can be used in driver's resume hook to correct queue - * runtime PM status and re-enable peeking requests from the queue. It - * should be called before first request is added to the queue. - */ -void blk_set_runtime_active(struct request_queue *q) -{ - spin_lock_irq(q->queue_lock); - q->rpm_status = RPM_ACTIVE; - pm_runtime_mark_last_busy(q->dev); - pm_request_autosuspend(q->dev); - spin_unlock_irq(q->queue_lock); -} -EXPORT_SYMBOL(blk_set_runtime_active); -#endif - int __init blk_dev_init(void) { BUILD_BUG_ON(REQ_OP_LAST >= (1 << REQ_OP_BITS)); diff --git a/block/blk-flush.c b/block/blk-flush.c index ce41f666de3e..8b44b86779da 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -566,12 +566,12 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask, EXPORT_SYMBOL(blkdev_issue_flush); struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q, - int node, int cmd_size) + int node, int cmd_size, gfp_t flags) { struct blk_flush_queue *fq; int rq_sz = sizeof(struct request); - fq = kzalloc_node(sizeof(*fq), GFP_KERNEL, node); + fq = kzalloc_node(sizeof(*fq), flags, node); if (!fq) goto fail; @@ -579,7 +579,7 @@ struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q, spin_lock_init(&fq->mq_flush_lock); rq_sz = round_up(rq_sz + cmd_size, cache_line_size()); - fq->flush_rq = kzalloc_node(rq_sz, GFP_KERNEL, node); + fq->flush_rq = kzalloc_node(rq_sz, flags, node); if (!fq->flush_rq) goto fail_rq; diff --git a/block/blk-integrity.c b/block/blk-integrity.c index 6121611e1316..d1ab089e0919 100644 --- a/block/blk-integrity.c +++ b/block/blk-integrity.c @@ -49,12 +49,8 @@ int blk_rq_count_integrity_sg(struct request_queue *q, struct bio *bio) bio_for_each_integrity_vec(iv, bio, iter) { if (prev) { - if (!BIOVEC_PHYS_MERGEABLE(&ivprv, &iv)) + if (!biovec_phys_mergeable(q, &ivprv, &iv)) goto new_segment; - - if (!BIOVEC_SEG_BOUNDARY(q, &ivprv, &iv)) - goto new_segment; - if (seg_size + iv.bv_len > queue_max_segment_size(q)) goto new_segment; @@ -95,12 +91,8 @@ int blk_rq_map_integrity_sg(struct request_queue *q, struct bio *bio, bio_for_each_integrity_vec(iv, bio, iter) { if (prev) { - if (!BIOVEC_PHYS_MERGEABLE(&ivprv, &iv)) + if (!biovec_phys_mergeable(q, &ivprv, &iv)) goto new_segment; - - if (!BIOVEC_SEG_BOUNDARY(q, &ivprv, &iv)) - goto new_segment; - if (sg->length + iv.bv_len > queue_max_segment_size(q)) goto new_segment; diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c index 19923f8a029d..35c48d7b8f78 100644 --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c @@ -115,9 +115,22 @@ struct child_latency_info { atomic_t scale_cookie; }; +struct percentile_stats { + u64 total; + u64 missed; +}; + +struct latency_stat { + union { + struct percentile_stats ps; + struct blk_rq_stat rqs; + }; +}; + struct iolatency_grp { struct blkg_policy_data pd; - struct blk_rq_stat __percpu *stats; + struct latency_stat __percpu *stats; + struct latency_stat cur_stat; struct blk_iolatency *blkiolat; struct rq_depth rq_depth; struct rq_wait rq_wait; @@ -132,6 +145,7 @@ struct iolatency_grp { /* Our current number of IO's for the last summation. */ u64 nr_samples; + bool ssd; struct child_latency_info child_lat; }; @@ -172,6 +186,80 @@ static inline struct blkcg_gq *lat_to_blkg(struct iolatency_grp *iolat) return pd_to_blkg(&iolat->pd); } +static inline void latency_stat_init(struct iolatency_grp *iolat, + struct latency_stat *stat) +{ + if (iolat->ssd) { + stat->ps.total = 0; + stat->ps.missed = 0; + } else + blk_rq_stat_init(&stat->rqs); +} + +static inline void latency_stat_sum(struct iolatency_grp *iolat, + struct latency_stat *sum, + struct latency_stat *stat) +{ + if (iolat->ssd) { + sum->ps.total += stat->ps.total; + sum->ps.missed += stat->ps.missed; + } else + blk_rq_stat_sum(&sum->rqs, &stat->rqs); +} + +static inline void latency_stat_record_time(struct iolatency_grp *iolat, + u64 req_time) +{ + struct latency_stat *stat = get_cpu_ptr(iolat->stats); + if (iolat->ssd) { + if (req_time >= iolat->min_lat_nsec) + stat->ps.missed++; + stat->ps.total++; + } else + blk_rq_stat_add(&stat->rqs, req_time); + put_cpu_ptr(stat); +} + +static inline bool latency_sum_ok(struct iolatency_grp *iolat, + struct latency_stat *stat) +{ + if (iolat->ssd) { + u64 thresh = div64_u64(stat->ps.total, 10); + thresh = max(thresh, 1ULL); + return stat->ps.missed < thresh; + } + return stat->rqs.mean <= iolat->min_lat_nsec; +} + +static inline u64 latency_stat_samples(struct iolatency_grp *iolat, + struct latency_stat *stat) +{ + if (iolat->ssd) + return stat->ps.total; + return stat->rqs.nr_samples; +} + +static inline void iolat_update_total_lat_avg(struct iolatency_grp *iolat, + struct latency_stat *stat) +{ + int exp_idx; + + if (iolat->ssd) + return; + + /* + * CALC_LOAD takes in a number stored in fixed point representation. + * Because we are using this for IO time in ns, the values stored + * are significantly larger than the FIXED_1 denominator (2048). + * Therefore, rounding errors in the calculation are negligible and + * can be ignored. + */ + exp_idx = min_t(int, BLKIOLATENCY_NR_EXP_FACTORS - 1, + div64_u64(iolat->cur_win_nsec, + BLKIOLATENCY_EXP_BUCKET_SIZE)); + CALC_LOAD(iolat->lat_avg, iolatency_exp_factors[exp_idx], stat->rqs.mean); +} + static inline bool iolatency_may_queue(struct iolatency_grp *iolat, wait_queue_entry_t *wait, bool first_block) @@ -255,7 +343,7 @@ static void scale_cookie_change(struct blk_iolatency *blkiolat, struct child_latency_info *lat_info, bool up) { - unsigned long qd = blk_queue_depth(blkiolat->rqos.q); + unsigned long qd = blkiolat->rqos.q->nr_requests; unsigned long scale = scale_amount(qd, up); unsigned long old = atomic_read(&lat_info->scale_cookie); unsigned long max_scale = qd << 1; @@ -295,10 +383,9 @@ static void scale_cookie_change(struct blk_iolatency *blkiolat, */ static void scale_change(struct iolatency_grp *iolat, bool up) { - unsigned long qd = blk_queue_depth(iolat->blkiolat->rqos.q); + unsigned long qd = iolat->blkiolat->rqos.q->nr_requests; unsigned long scale = scale_amount(qd, up); unsigned long old = iolat->rq_depth.max_depth; - bool changed = false; if (old > qd) old = qd; @@ -308,15 +395,13 @@ static void scale_change(struct iolatency_grp *iolat, bool up) return; if (old < qd) { - changed = true; old += scale; old = min(old, qd); iolat->rq_depth.max_depth = old; wake_up_all(&iolat->rq_wait.wait); } - } else if (old > 1) { + } else { old >>= 1; - changed = true; iolat->rq_depth.max_depth = max(old, 1UL); } } @@ -369,7 +454,7 @@ static void check_scale_change(struct iolatency_grp *iolat) * scale down event. */ samples_thresh = lat_info->nr_samples * 5; - samples_thresh = div64_u64(samples_thresh, 100); + samples_thresh = max(1ULL, div64_u64(samples_thresh, 100)); if (iolat->nr_samples <= samples_thresh) return; } @@ -395,34 +480,12 @@ static void blkcg_iolatency_throttle(struct rq_qos *rqos, struct bio *bio, spinlock_t *lock) { struct blk_iolatency *blkiolat = BLKIOLATENCY(rqos); - struct blkcg *blkcg; - struct blkcg_gq *blkg; - struct request_queue *q = rqos->q; + struct blkcg_gq *blkg = bio->bi_blkg; bool issue_as_root = bio_issue_as_root_blkg(bio); if (!blk_iolatency_enabled(blkiolat)) return; - rcu_read_lock(); - blkcg = bio_blkcg(bio); - bio_associate_blkcg(bio, &blkcg->css); - blkg = blkg_lookup(blkcg, q); - if (unlikely(!blkg)) { - if (!lock) - spin_lock_irq(q->queue_lock); - blkg = blkg_lookup_create(blkcg, q); - if (IS_ERR(blkg)) - blkg = NULL; - if (!lock) - spin_unlock_irq(q->queue_lock); - } - if (!blkg) - goto out; - - bio_issue_init(&bio->bi_issue, bio_sectors(bio)); - bio_associate_blkg(bio, blkg); -out: - rcu_read_unlock(); while (blkg && blkg->parent) { struct iolatency_grp *iolat = blkg_to_lat(blkg); if (!iolat) { @@ -443,7 +506,6 @@ static void iolatency_record_time(struct iolatency_grp *iolat, struct bio_issue *issue, u64 now, bool issue_as_root) { - struct blk_rq_stat *rq_stat; u64 start = bio_issue_time(issue); u64 req_time; @@ -469,9 +531,7 @@ static void iolatency_record_time(struct iolatency_grp *iolat, return; } - rq_stat = get_cpu_ptr(iolat->stats); - blk_rq_stat_add(rq_stat, req_time); - put_cpu_ptr(rq_stat); + latency_stat_record_time(iolat, req_time); } #define BLKIOLATENCY_MIN_ADJUST_TIME (500 * NSEC_PER_MSEC) @@ -482,17 +542,17 @@ static void iolatency_check_latencies(struct iolatency_grp *iolat, u64 now) struct blkcg_gq *blkg = lat_to_blkg(iolat); struct iolatency_grp *parent; struct child_latency_info *lat_info; - struct blk_rq_stat stat; + struct latency_stat stat; unsigned long flags; - int cpu, exp_idx; + int cpu; - blk_rq_stat_init(&stat); + latency_stat_init(iolat, &stat); preempt_disable(); for_each_online_cpu(cpu) { - struct blk_rq_stat *s; + struct latency_stat *s; s = per_cpu_ptr(iolat->stats, cpu); - blk_rq_stat_sum(&stat, s); - blk_rq_stat_init(s); + latency_stat_sum(iolat, &stat, s); + latency_stat_init(iolat, s); } preempt_enable(); @@ -502,41 +562,36 @@ static void iolatency_check_latencies(struct iolatency_grp *iolat, u64 now) lat_info = &parent->child_lat; - /* - * CALC_LOAD takes in a number stored in fixed point representation. - * Because we are using this for IO time in ns, the values stored - * are significantly larger than the FIXED_1 denominator (2048). - * Therefore, rounding errors in the calculation are negligible and - * can be ignored. - */ - exp_idx = min_t(int, BLKIOLATENCY_NR_EXP_FACTORS - 1, - div64_u64(iolat->cur_win_nsec, - BLKIOLATENCY_EXP_BUCKET_SIZE)); - CALC_LOAD(iolat->lat_avg, iolatency_exp_factors[exp_idx], stat.mean); + iolat_update_total_lat_avg(iolat, &stat); /* Everything is ok and we don't need to adjust the scale. */ - if (stat.mean <= iolat->min_lat_nsec && + if (latency_sum_ok(iolat, &stat) && atomic_read(&lat_info->scale_cookie) == DEFAULT_SCALE_COOKIE) return; /* Somebody beat us to the punch, just bail. */ spin_lock_irqsave(&lat_info->lock, flags); + + latency_stat_sum(iolat, &iolat->cur_stat, &stat); lat_info->nr_samples -= iolat->nr_samples; - lat_info->nr_samples += stat.nr_samples; - iolat->nr_samples = stat.nr_samples; + lat_info->nr_samples += latency_stat_samples(iolat, &iolat->cur_stat); + iolat->nr_samples = latency_stat_samples(iolat, &iolat->cur_stat); if ((lat_info->last_scale_event >= now || - now - lat_info->last_scale_event < BLKIOLATENCY_MIN_ADJUST_TIME) && - lat_info->scale_lat <= iolat->min_lat_nsec) + now - lat_info->last_scale_event < BLKIOLATENCY_MIN_ADJUST_TIME)) goto out; - if (stat.mean <= iolat->min_lat_nsec && - stat.nr_samples >= BLKIOLATENCY_MIN_GOOD_SAMPLES) { + if (latency_sum_ok(iolat, &iolat->cur_stat) && + latency_sum_ok(iolat, &stat)) { + if (latency_stat_samples(iolat, &iolat->cur_stat) < + BLKIOLATENCY_MIN_GOOD_SAMPLES) + goto out; if (lat_info->scale_grp == iolat) { lat_info->last_scale_event = now; scale_cookie_change(iolat->blkiolat, lat_info, true); } - } else if (stat.mean > iolat->min_lat_nsec) { + } else if (lat_info->scale_lat == 0 || + lat_info->scale_lat >= iolat->min_lat_nsec) { lat_info->last_scale_event = now; if (!lat_info->scale_grp || lat_info->scale_lat > iolat->min_lat_nsec) { @@ -545,6 +600,7 @@ static void iolatency_check_latencies(struct iolatency_grp *iolat, u64 now) } scale_cookie_change(iolat->blkiolat, lat_info, false); } + latency_stat_init(iolat, &iolat->cur_stat); out: spin_unlock_irqrestore(&lat_info->lock, flags); } @@ -650,7 +706,7 @@ static void blkiolatency_timer_fn(struct timer_list *t) * We could be exiting, don't access the pd unless we have a * ref on the blkg. */ - if (!blkg_try_get(blkg)) + if (!blkg_tryget(blkg)) continue; iolat = blkg_to_lat(blkg); @@ -761,7 +817,6 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf, { struct blkcg *blkcg = css_to_blkcg(of_css(of)); struct blkcg_gq *blkg; - struct blk_iolatency *blkiolat; struct blkg_conf_ctx ctx; struct iolatency_grp *iolat; char *p, *tok; @@ -774,7 +829,6 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf, return ret; iolat = blkg_to_lat(ctx.blkg); - blkiolat = iolat->blkiolat; p = ctx.body; ret = -EINVAL; @@ -835,13 +889,43 @@ static int iolatency_print_limit(struct seq_file *sf, void *v) return 0; } +static size_t iolatency_ssd_stat(struct iolatency_grp *iolat, char *buf, + size_t size) +{ + struct latency_stat stat; + int cpu; + + latency_stat_init(iolat, &stat); + preempt_disable(); + for_each_online_cpu(cpu) { + struct latency_stat *s; + s = per_cpu_ptr(iolat->stats, cpu); + latency_stat_sum(iolat, &stat, s); + } + preempt_enable(); + + if (iolat->rq_depth.max_depth == UINT_MAX) + return scnprintf(buf, size, " missed=%llu total=%llu depth=max", + (unsigned long long)stat.ps.missed, + (unsigned long long)stat.ps.total); + return scnprintf(buf, size, " missed=%llu total=%llu depth=%u", + (unsigned long long)stat.ps.missed, + (unsigned long long)stat.ps.total, + iolat->rq_depth.max_depth); +} + static size_t iolatency_pd_stat(struct blkg_policy_data *pd, char *buf, size_t size) { struct iolatency_grp *iolat = pd_to_lat(pd); - unsigned long long avg_lat = div64_u64(iolat->lat_avg, NSEC_PER_USEC); - unsigned long long cur_win = div64_u64(iolat->cur_win_nsec, NSEC_PER_MSEC); + unsigned long long avg_lat; + unsigned long long cur_win; + + if (iolat->ssd) + return iolatency_ssd_stat(iolat, buf, size); + avg_lat = div64_u64(iolat->lat_avg, NSEC_PER_USEC); + cur_win = div64_u64(iolat->cur_win_nsec, NSEC_PER_MSEC); if (iolat->rq_depth.max_depth == UINT_MAX) return scnprintf(buf, size, " depth=max avg_lat=%llu win=%llu", avg_lat, cur_win); @@ -858,8 +942,8 @@ static struct blkg_policy_data *iolatency_pd_alloc(gfp_t gfp, int node) iolat = kzalloc_node(sizeof(*iolat), gfp, node); if (!iolat) return NULL; - iolat->stats = __alloc_percpu_gfp(sizeof(struct blk_rq_stat), - __alignof__(struct blk_rq_stat), gfp); + iolat->stats = __alloc_percpu_gfp(sizeof(struct latency_stat), + __alignof__(struct latency_stat), gfp); if (!iolat->stats) { kfree(iolat); return NULL; @@ -876,15 +960,21 @@ static void iolatency_pd_init(struct blkg_policy_data *pd) u64 now = ktime_to_ns(ktime_get()); int cpu; + if (blk_queue_nonrot(blkg->q)) + iolat->ssd = true; + else + iolat->ssd = false; + for_each_possible_cpu(cpu) { - struct blk_rq_stat *stat; + struct latency_stat *stat; stat = per_cpu_ptr(iolat->stats, cpu); - blk_rq_stat_init(stat); + latency_stat_init(iolat, stat); } + latency_stat_init(iolat, &iolat->cur_stat); rq_wait_init(&iolat->rq_wait); spin_lock_init(&iolat->child_lat.lock); - iolat->rq_depth.queue_depth = blk_queue_depth(blkg->q); + iolat->rq_depth.queue_depth = blkg->q->nr_requests; iolat->rq_depth.max_depth = UINT_MAX; iolat->rq_depth.default_depth = iolat->rq_depth.queue_depth; iolat->blkiolat = blkiolat; diff --git a/block/blk-lib.c b/block/blk-lib.c index d1b9dd03da25..bbd44666f2b5 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -29,9 +29,7 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, { struct request_queue *q = bdev_get_queue(bdev); struct bio *bio = *biop; - unsigned int granularity; unsigned int op; - int alignment; sector_t bs_mask; if (!q) @@ -54,38 +52,16 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, if ((sector | nr_sects) & bs_mask) return -EINVAL; - /* Zero-sector (unknown) and one-sector granularities are the same. */ - granularity = max(q->limits.discard_granularity >> 9, 1U); - alignment = (bdev_discard_alignment(bdev) >> 9) % granularity; - while (nr_sects) { - unsigned int req_sects; - sector_t end_sect, tmp; + unsigned int req_sects = nr_sects; + sector_t end_sect; - /* - * Issue in chunks of the user defined max discard setting, - * ensuring that bi_size doesn't overflow - */ - req_sects = min_t(sector_t, nr_sects, - q->limits.max_discard_sectors); if (!req_sects) goto fail; if (req_sects > UINT_MAX >> 9) req_sects = UINT_MAX >> 9; - /* - * If splitting a request, and the next starting sector would be - * misaligned, stop the discard at the previous aligned sector. - */ end_sect = sector + req_sects; - tmp = end_sect; - if (req_sects < nr_sects && - sector_div(tmp, granularity) != alignment) { - end_sect = end_sect - alignment; - sector_div(end_sect, granularity); - end_sect = end_sect * granularity + alignment; - req_sects = end_sect - sector; - } bio = next_bio(bio, 0, gfp_mask); bio->bi_iter.bi_sector = sector; diff --git a/block/blk-merge.c b/block/blk-merge.c index aaec38cc37b8..42a46744c11b 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -12,6 +12,69 @@ #include "blk.h" +/* + * Check if the two bvecs from two bios can be merged to one segment. If yes, + * no need to check gap between the two bios since the 1st bio and the 1st bvec + * in the 2nd bio can be handled in one segment. + */ +static inline bool bios_segs_mergeable(struct request_queue *q, + struct bio *prev, struct bio_vec *prev_last_bv, + struct bio_vec *next_first_bv) +{ + if (!biovec_phys_mergeable(q, prev_last_bv, next_first_bv)) + return false; + if (prev->bi_seg_back_size + next_first_bv->bv_len > + queue_max_segment_size(q)) + return false; + return true; +} + +static inline bool bio_will_gap(struct request_queue *q, + struct request *prev_rq, struct bio *prev, struct bio *next) +{ + struct bio_vec pb, nb; + + if (!bio_has_data(prev) || !queue_virt_boundary(q)) + return false; + + /* + * Don't merge if the 1st bio starts with non-zero offset, otherwise it + * is quite difficult to respect the sg gap limit. We work hard to + * merge a huge number of small single bios in case of mkfs. + */ + if (prev_rq) + bio_get_first_bvec(prev_rq->bio, &pb); + else + bio_get_first_bvec(prev, &pb); + if (pb.bv_offset) + return true; + + /* + * We don't need to worry about the situation that the merged segment + * ends in unaligned virt boundary: + * + * - if 'pb' ends aligned, the merged segment ends aligned + * - if 'pb' ends unaligned, the next bio must include + * one single bvec of 'nb', otherwise the 'nb' can't + * merge with 'pb' + */ + bio_get_last_bvec(prev, &pb); + bio_get_first_bvec(next, &nb); + if (bios_segs_mergeable(q, prev, &pb, &nb)) + return false; + return __bvec_gap_to_prev(q, &pb, nb.bv_offset); +} + +static inline bool req_gap_back_merge(struct request *req, struct bio *bio) +{ + return bio_will_gap(req->q, req, req->biotail, bio); +} + +static inline bool req_gap_front_merge(struct request *req, struct bio *bio) +{ + return bio_will_gap(req->q, NULL, bio, req->bio); +} + static struct bio *blk_bio_discard_split(struct request_queue *q, struct bio *bio, struct bio_set *bs, @@ -134,9 +197,7 @@ static struct bio *blk_bio_segment_split(struct request_queue *q, if (bvprvp && blk_queue_cluster(q)) { if (seg_size + bv.bv_len > queue_max_segment_size(q)) goto new_segment; - if (!BIOVEC_PHYS_MERGEABLE(bvprvp, &bv)) - goto new_segment; - if (!BIOVEC_SEG_BOUNDARY(q, bvprvp, &bv)) + if (!biovec_phys_mergeable(q, bvprvp, &bv)) goto new_segment; seg_size += bv.bv_len; @@ -267,9 +328,7 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q, if (seg_size + bv.bv_len > queue_max_segment_size(q)) goto new_segment; - if (!BIOVEC_PHYS_MERGEABLE(&bvprv, &bv)) - goto new_segment; - if (!BIOVEC_SEG_BOUNDARY(q, &bvprv, &bv)) + if (!biovec_phys_mergeable(q, &bvprv, &bv)) goto new_segment; seg_size += bv.bv_len; @@ -349,17 +408,7 @@ static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio, bio_get_last_bvec(bio, &end_bv); bio_get_first_bvec(nxt, &nxt_bv); - if (!BIOVEC_PHYS_MERGEABLE(&end_bv, &nxt_bv)) - return 0; - - /* - * bio and nxt are contiguous in memory; check if the queue allows - * these two to be merged into one - */ - if (BIOVEC_SEG_BOUNDARY(q, &end_bv, &nxt_bv)) - return 1; - - return 0; + return biovec_phys_mergeable(q, &end_bv, &nxt_bv); } static inline void @@ -373,10 +422,7 @@ __blk_segment_map_sg(struct request_queue *q, struct bio_vec *bvec, if (*sg && *cluster) { if ((*sg)->length + nbytes > queue_max_segment_size(q)) goto new_segment; - - if (!BIOVEC_PHYS_MERGEABLE(bvprv, bvec)) - goto new_segment; - if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bvec)) + if (!biovec_phys_mergeable(q, bvprv, bvec)) goto new_segment; (*sg)->length += nbytes; diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index cb1e6cf7ac48..41b86f50d126 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -102,6 +102,14 @@ static int blk_flags_show(struct seq_file *m, const unsigned long flags, return 0; } +static int queue_pm_only_show(void *data, struct seq_file *m) +{ + struct request_queue *q = data; + + seq_printf(m, "%d\n", atomic_read(&q->pm_only)); + return 0; +} + #define QUEUE_FLAG_NAME(name) [QUEUE_FLAG_##name] = #name static const char *const blk_queue_flag_name[] = { QUEUE_FLAG_NAME(QUEUED), @@ -132,7 +140,6 @@ static const char *const blk_queue_flag_name[] = { QUEUE_FLAG_NAME(REGISTERED), QUEUE_FLAG_NAME(SCSI_PASSTHROUGH), QUEUE_FLAG_NAME(QUIESCED), - QUEUE_FLAG_NAME(PREEMPT_ONLY), }; #undef QUEUE_FLAG_NAME @@ -209,6 +216,7 @@ static ssize_t queue_write_hint_store(void *data, const char __user *buf, static const struct blk_mq_debugfs_attr blk_mq_debugfs_queue_attrs[] = { { "poll_stat", 0400, queue_poll_stat_show }, { "requeue_list", 0400, .seq_ops = &queue_requeue_list_seq_ops }, + { "pm_only", 0600, queue_pm_only_show, NULL }, { "state", 0600, queue_state_show, queue_state_write }, { "write_hints", 0600, queue_write_hint_show, queue_write_hint_store }, { "zone_wlock", 0400, queue_zone_wlock_show, NULL }, @@ -423,8 +431,7 @@ static void hctx_show_busy_rq(struct request *rq, void *data, bool reserved) { const struct show_busy_params *params = data; - if (blk_mq_map_queue(rq->q, rq->mq_ctx->cpu) == params->hctx && - blk_mq_rq_state(rq) != MQ_RQ_IDLE) + if (blk_mq_map_queue(rq->q, rq->mq_ctx->cpu) == params->hctx) __blk_mq_debugfs_rq_show(params->m, list_entry_rq(&rq->queuelist)); } diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h index 4e028ee42430..8a9544203173 100644 --- a/block/blk-mq-sched.h +++ b/block/blk-mq-sched.h @@ -49,12 +49,12 @@ blk_mq_sched_allow_merge(struct request_queue *q, struct request *rq, return true; } -static inline void blk_mq_sched_completed_request(struct request *rq) +static inline void blk_mq_sched_completed_request(struct request *rq, u64 now) { struct elevator_queue *e = rq->q->elevator; if (e && e->type->ops.mq.completed_request) - e->type->ops.mq.completed_request(rq); + e->type->ops.mq.completed_request(rq, now); } static inline void blk_mq_sched_started_request(struct request *rq) diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 41317c50a446..cfda95b85d34 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -232,13 +232,26 @@ static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data) /* * We can hit rq == NULL here, because the tagging functions - * test and set the bit before assining ->rqs[]. + * test and set the bit before assigning ->rqs[]. */ if (rq && rq->q == hctx->queue) iter_data->fn(hctx, rq, iter_data->data, reserved); return true; } +/** + * bt_for_each - iterate over the requests associated with a hardware queue + * @hctx: Hardware queue to examine. + * @bt: sbitmap to examine. This is either the breserved_tags member + * or the bitmap_tags member of struct blk_mq_tags. + * @fn: Pointer to the function that will be called for each request + * associated with @hctx that has been assigned a driver tag. + * @fn will be called as follows: @fn(@hctx, rq, @data, @reserved) + * where rq is a pointer to a request. + * @data: Will be passed as third argument to @fn. + * @reserved: Indicates whether @bt is the breserved_tags member or the + * bitmap_tags member of struct blk_mq_tags. + */ static void bt_for_each(struct blk_mq_hw_ctx *hctx, struct sbitmap_queue *bt, busy_iter_fn *fn, void *data, bool reserved) { @@ -280,6 +293,18 @@ static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data) return true; } +/** + * bt_tags_for_each - iterate over the requests in a tag map + * @tags: Tag map to iterate over. + * @bt: sbitmap to examine. This is either the breserved_tags member + * or the bitmap_tags member of struct blk_mq_tags. + * @fn: Pointer to the function that will be called for each started + * request. @fn will be called as follows: @fn(rq, @data, + * @reserved) where rq is a pointer to a request. + * @data: Will be passed as second argument to @fn. + * @reserved: Indicates whether @bt is the breserved_tags member or the + * bitmap_tags member of struct blk_mq_tags. + */ static void bt_tags_for_each(struct blk_mq_tags *tags, struct sbitmap_queue *bt, busy_tag_iter_fn *fn, void *data, bool reserved) { @@ -294,6 +319,15 @@ static void bt_tags_for_each(struct blk_mq_tags *tags, struct sbitmap_queue *bt, sbitmap_for_each_set(&bt->sb, bt_tags_iter, &iter_data); } +/** + * blk_mq_all_tag_busy_iter - iterate over all started requests in a tag map + * @tags: Tag map to iterate over. + * @fn: Pointer to the function that will be called for each started + * request. @fn will be called as follows: @fn(rq, @priv, + * reserved) where rq is a pointer to a request. 'reserved' + * indicates whether or not @rq is a reserved request. + * @priv: Will be passed as second argument to @fn. + */ static void blk_mq_all_tag_busy_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn, void *priv) { @@ -302,6 +336,15 @@ static void blk_mq_all_tag_busy_iter(struct blk_mq_tags *tags, bt_tags_for_each(tags, &tags->bitmap_tags, fn, priv, false); } +/** + * blk_mq_tagset_busy_iter - iterate over all started requests in a tag set + * @tagset: Tag set to iterate over. + * @fn: Pointer to the function that will be called for each started + * request. @fn will be called as follows: @fn(rq, @priv, + * reserved) where rq is a pointer to a request. 'reserved' + * indicates whether or not @rq is a reserved request. + * @priv: Will be passed as second argument to @fn. + */ void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset, busy_tag_iter_fn *fn, void *priv) { @@ -314,6 +357,20 @@ void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset, } EXPORT_SYMBOL(blk_mq_tagset_busy_iter); +/** + * blk_mq_queue_tag_busy_iter - iterate over all requests with a driver tag + * @q: Request queue to examine. + * @fn: Pointer to the function that will be called for each request + * on @q. @fn will be called as follows: @fn(hctx, rq, @priv, + * reserved) where rq is a pointer to a request and hctx points + * to the hardware queue associated with the request. 'reserved' + * indicates whether or not @rq is a reserved request. + * @priv: Will be passed as third argument to @fn. + * + * Note: if @q->tag_set is shared with other request queues then @fn will be + * called for all requests on all queues that share that tag set and not only + * for requests associated with @q. + */ void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn, void *priv) { @@ -321,9 +378,11 @@ void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn, int i; /* - * __blk_mq_update_nr_hw_queues will update the nr_hw_queues and - * queue_hw_ctx after freeze the queue, so we use q_usage_counter - * to avoid race with it. + * __blk_mq_update_nr_hw_queues() updates nr_hw_queues and queue_hw_ctx + * while the queue is frozen. So we can use q_usage_counter to avoid + * racing with it. __blk_mq_update_nr_hw_queues() uses + * synchronize_rcu() to ensure this function left the critical section + * below. */ if (!percpu_ref_tryget(&q->q_usage_counter)) return; @@ -332,7 +391,7 @@ void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn, struct blk_mq_tags *tags = hctx->tags; /* - * If not software queues are currently mapped to this + * If no software queues are currently mapped to this * hardware queue, there's nothing to check */ if (!blk_mq_hw_queue_mapped(hctx)) diff --git a/block/blk-mq.c b/block/blk-mq.c index e3c39ea8e17b..dcf10e39995a 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -33,6 +33,7 @@ #include "blk-mq.h" #include "blk-mq-debugfs.h" #include "blk-mq-tag.h" +#include "blk-pm.h" #include "blk-stat.h" #include "blk-mq-sched.h" #include "blk-rq-qos.h" @@ -198,7 +199,7 @@ void blk_mq_unfreeze_queue(struct request_queue *q) freeze_depth = atomic_dec_return(&q->mq_freeze_depth); WARN_ON_ONCE(freeze_depth < 0); if (!freeze_depth) { - percpu_ref_reinit(&q->q_usage_counter); + percpu_ref_resurrect(&q->q_usage_counter); wake_up_all(&q->mq_freeze_wq); } } @@ -475,6 +476,7 @@ static void __blk_mq_free_request(struct request *rq) struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu); const int sched_tag = rq->internal_tag; + blk_pm_mark_last_busy(rq); if (rq->tag != -1) blk_mq_put_tag(hctx, hctx->tags, ctx, rq->tag); if (sched_tag != -1) @@ -526,6 +528,9 @@ inline void __blk_mq_end_request(struct request *rq, blk_status_t error) blk_stat_add(rq, now); } + if (rq->internal_tag != -1) + blk_mq_sched_completed_request(rq, now); + blk_account_io_done(rq, now); if (rq->end_io) { @@ -562,8 +567,20 @@ static void __blk_mq_complete_request(struct request *rq) if (!blk_mq_mark_complete(rq)) return; - if (rq->internal_tag != -1) - blk_mq_sched_completed_request(rq); + + /* + * Most of single queue controllers, there is only one irq vector + * for handling IO completion, and the only irq's affinity is set + * as all possible CPUs. On most of ARCHs, this affinity means the + * irq is handled on one specific CPU. + * + * So complete IO reqeust in softirq context in case of single queue + * for not degrading IO performance by irqsoff latency. + */ + if (rq->q->nr_hw_queues == 1) { + __blk_complete_request(rq); + return; + } if (!test_bit(QUEUE_FLAG_SAME_COMP, &rq->q->queue_flags)) { rq->q->softirq_done_fn(rq); @@ -2137,8 +2154,6 @@ static void blk_mq_exit_hctx(struct request_queue *q, struct blk_mq_tag_set *set, struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) { - blk_mq_debugfs_unregister_hctx(hctx); - if (blk_mq_hw_queue_mapped(hctx)) blk_mq_tag_idle(hctx); @@ -2165,6 +2180,7 @@ static void blk_mq_exit_hw_queues(struct request_queue *q, queue_for_each_hw_ctx(q, hctx, i) { if (i == nr_queue) break; + blk_mq_debugfs_unregister_hctx(hctx); blk_mq_exit_hctx(q, set, hctx, i); } } @@ -2194,12 +2210,12 @@ static int blk_mq_init_hctx(struct request_queue *q, * runtime */ hctx->ctxs = kmalloc_array_node(nr_cpu_ids, sizeof(void *), - GFP_KERNEL, node); + GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY, node); if (!hctx->ctxs) goto unregister_cpu_notifier; - if (sbitmap_init_node(&hctx->ctx_map, nr_cpu_ids, ilog2(8), GFP_KERNEL, - node)) + if (sbitmap_init_node(&hctx->ctx_map, nr_cpu_ids, ilog2(8), + GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY, node)) goto free_ctxs; hctx->nr_ctx = 0; @@ -2212,7 +2228,8 @@ static int blk_mq_init_hctx(struct request_queue *q, set->ops->init_hctx(hctx, set->driver_data, hctx_idx)) goto free_bitmap; - hctx->fq = blk_alloc_flush_queue(q, hctx->numa_node, set->cmd_size); + hctx->fq = blk_alloc_flush_queue(q, hctx->numa_node, set->cmd_size, + GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY); if (!hctx->fq) goto exit_hctx; @@ -2222,8 +2239,6 @@ static int blk_mq_init_hctx(struct request_queue *q, if (hctx->flags & BLK_MQ_F_BLOCKING) init_srcu_struct(hctx->srcu); - blk_mq_debugfs_register_hctx(q, hctx); - return 0; free_fq: @@ -2492,6 +2507,39 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) } EXPORT_SYMBOL(blk_mq_init_queue); +/* + * Helper for setting up a queue with mq ops, given queue depth, and + * the passed in mq ops flags. + */ +struct request_queue *blk_mq_init_sq_queue(struct blk_mq_tag_set *set, + const struct blk_mq_ops *ops, + unsigned int queue_depth, + unsigned int set_flags) +{ + struct request_queue *q; + int ret; + + memset(set, 0, sizeof(*set)); + set->ops = ops; + set->nr_hw_queues = 1; + set->queue_depth = queue_depth; + set->numa_node = NUMA_NO_NODE; + set->flags = set_flags; + + ret = blk_mq_alloc_tag_set(set); + if (ret) + return ERR_PTR(ret); + + q = blk_mq_init_queue(set); + if (IS_ERR(q)) { + blk_mq_free_tag_set(set); + return q; + } + + return q; +} +EXPORT_SYMBOL(blk_mq_init_sq_queue); + static int blk_mq_hw_ctx_size(struct blk_mq_tag_set *tag_set) { int hw_ctx_size = sizeof(struct blk_mq_hw_ctx); @@ -2506,48 +2554,90 @@ static int blk_mq_hw_ctx_size(struct blk_mq_tag_set *tag_set) return hw_ctx_size; } +static struct blk_mq_hw_ctx *blk_mq_alloc_and_init_hctx( + struct blk_mq_tag_set *set, struct request_queue *q, + int hctx_idx, int node) +{ + struct blk_mq_hw_ctx *hctx; + + hctx = kzalloc_node(blk_mq_hw_ctx_size(set), + GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY, + node); + if (!hctx) + return NULL; + + if (!zalloc_cpumask_var_node(&hctx->cpumask, + GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY, + node)) { + kfree(hctx); + return NULL; + } + + atomic_set(&hctx->nr_active, 0); + hctx->numa_node = node; + hctx->queue_num = hctx_idx; + + if (blk_mq_init_hctx(q, set, hctx, hctx_idx)) { + free_cpumask_var(hctx->cpumask); + kfree(hctx); + return NULL; + } + blk_mq_hctx_kobj_init(hctx); + + return hctx; +} + static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set, struct request_queue *q) { - int i, j; + int i, j, end; struct blk_mq_hw_ctx **hctxs = q->queue_hw_ctx; - blk_mq_sysfs_unregister(q); - /* protect against switching io scheduler */ mutex_lock(&q->sysfs_lock); for (i = 0; i < set->nr_hw_queues; i++) { int node; - - if (hctxs[i]) - continue; + struct blk_mq_hw_ctx *hctx; node = blk_mq_hw_queue_to_node(q->mq_map, i); - hctxs[i] = kzalloc_node(blk_mq_hw_ctx_size(set), - GFP_KERNEL, node); - if (!hctxs[i]) - break; - - if (!zalloc_cpumask_var_node(&hctxs[i]->cpumask, GFP_KERNEL, - node)) { - kfree(hctxs[i]); - hctxs[i] = NULL; - break; - } - - atomic_set(&hctxs[i]->nr_active, 0); - hctxs[i]->numa_node = node; - hctxs[i]->queue_num = i; + /* + * If the hw queue has been mapped to another numa node, + * we need to realloc the hctx. If allocation fails, fallback + * to use the previous one. + */ + if (hctxs[i] && (hctxs[i]->numa_node == node)) + continue; - if (blk_mq_init_hctx(q, set, hctxs[i], i)) { - free_cpumask_var(hctxs[i]->cpumask); - kfree(hctxs[i]); - hctxs[i] = NULL; - break; + hctx = blk_mq_alloc_and_init_hctx(set, q, i, node); + if (hctx) { + if (hctxs[i]) { + blk_mq_exit_hctx(q, set, hctxs[i], i); + kobject_put(&hctxs[i]->kobj); + } + hctxs[i] = hctx; + } else { + if (hctxs[i]) + pr_warn("Allocate new hctx on node %d fails,\ + fallback to previous one on node %d\n", + node, hctxs[i]->numa_node); + else + break; } - blk_mq_hctx_kobj_init(hctxs[i]); } - for (j = i; j < q->nr_hw_queues; j++) { + /* + * Increasing nr_hw_queues fails. Free the newly allocated + * hctxs and keep the previous q->nr_hw_queues. + */ + if (i != set->nr_hw_queues) { + j = q->nr_hw_queues; + end = i; + } else { + j = i; + end = q->nr_hw_queues; + q->nr_hw_queues = set->nr_hw_queues; + } + + for (; j < end; j++) { struct blk_mq_hw_ctx *hctx = hctxs[j]; if (hctx) { @@ -2559,9 +2649,7 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set, } } - q->nr_hw_queues = i; mutex_unlock(&q->sysfs_lock); - blk_mq_sysfs_register(q); } struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, @@ -2659,25 +2747,6 @@ void blk_mq_free_queue(struct request_queue *q) blk_mq_exit_hw_queues(q, set, set->nr_hw_queues); } -/* Basically redo blk_mq_init_queue with queue frozen */ -static void blk_mq_queue_reinit(struct request_queue *q) -{ - WARN_ON_ONCE(!atomic_read(&q->mq_freeze_depth)); - - blk_mq_debugfs_unregister_hctxs(q); - blk_mq_sysfs_unregister(q); - - /* - * redo blk_mq_init_cpu_queues and blk_mq_init_hw_queues. FIXME: maybe - * we should change hctx numa_node according to the new topology (this - * involves freeing and re-allocating memory, worth doing?) - */ - blk_mq_map_swqueue(q); - - blk_mq_sysfs_register(q); - blk_mq_debugfs_register_hctxs(q); -} - static int __blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set) { int i; @@ -2964,6 +3033,7 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, { struct request_queue *q; LIST_HEAD(head); + int prev_nr_hw_queues; lockdep_assert_held(&set->tag_list_lock); @@ -2987,11 +3057,30 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, if (!blk_mq_elv_switch_none(&head, q)) goto switch_back; + list_for_each_entry(q, &set->tag_list, tag_set_list) { + blk_mq_debugfs_unregister_hctxs(q); + blk_mq_sysfs_unregister(q); + } + + prev_nr_hw_queues = set->nr_hw_queues; set->nr_hw_queues = nr_hw_queues; blk_mq_update_queue_map(set); +fallback: list_for_each_entry(q, &set->tag_list, tag_set_list) { blk_mq_realloc_hw_ctxs(set, q); - blk_mq_queue_reinit(q); + if (q->nr_hw_queues != set->nr_hw_queues) { + pr_warn("Increasing nr_hw_queues to %d fails, fallback to %d\n", + nr_hw_queues, prev_nr_hw_queues); + set->nr_hw_queues = prev_nr_hw_queues; + blk_mq_map_queues(set); + goto fallback; + } + blk_mq_map_swqueue(q); + } + + list_for_each_entry(q, &set->tag_list, tag_set_list) { + blk_mq_sysfs_register(q); + blk_mq_debugfs_register_hctxs(q); } switch_back: diff --git a/block/blk-pm.c b/block/blk-pm.c new file mode 100644 index 000000000000..f8fdae01bea2 --- /dev/null +++ b/block/blk-pm.c @@ -0,0 +1,216 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/blk-mq.h> +#include <linux/blk-pm.h> +#include <linux/blkdev.h> +#include <linux/pm_runtime.h> +#include "blk-mq.h" +#include "blk-mq-tag.h" + +/** + * blk_pm_runtime_init - Block layer runtime PM initialization routine + * @q: the queue of the device + * @dev: the device the queue belongs to + * + * Description: + * Initialize runtime-PM-related fields for @q and start auto suspend for + * @dev. Drivers that want to take advantage of request-based runtime PM + * should call this function after @dev has been initialized, and its + * request queue @q has been allocated, and runtime PM for it can not happen + * yet(either due to disabled/forbidden or its usage_count > 0). In most + * cases, driver should call this function before any I/O has taken place. + * + * This function takes care of setting up using auto suspend for the device, + * the autosuspend delay is set to -1 to make runtime suspend impossible + * until an updated value is either set by user or by driver. Drivers do + * not need to touch other autosuspend settings. + * + * The block layer runtime PM is request based, so only works for drivers + * that use request as their IO unit instead of those directly use bio's. + */ +void blk_pm_runtime_init(struct request_queue *q, struct device *dev) +{ + q->dev = dev; + q->rpm_status = RPM_ACTIVE; + pm_runtime_set_autosuspend_delay(q->dev, -1); + pm_runtime_use_autosuspend(q->dev); +} +EXPORT_SYMBOL(blk_pm_runtime_init); + +/** + * blk_pre_runtime_suspend - Pre runtime suspend check + * @q: the queue of the device + * + * Description: + * This function will check if runtime suspend is allowed for the device + * by examining if there are any requests pending in the queue. If there + * are requests pending, the device can not be runtime suspended; otherwise, + * the queue's status will be updated to SUSPENDING and the driver can + * proceed to suspend the device. + * + * For the not allowed case, we mark last busy for the device so that + * runtime PM core will try to autosuspend it some time later. + * + * This function should be called near the start of the device's + * runtime_suspend callback. + * + * Return: + * 0 - OK to runtime suspend the device + * -EBUSY - Device should not be runtime suspended + */ +int blk_pre_runtime_suspend(struct request_queue *q) +{ + int ret = 0; + + if (!q->dev) + return ret; + + WARN_ON_ONCE(q->rpm_status != RPM_ACTIVE); + + /* + * Increase the pm_only counter before checking whether any + * non-PM blk_queue_enter() calls are in progress to avoid that any + * new non-PM blk_queue_enter() calls succeed before the pm_only + * counter is decreased again. + */ + blk_set_pm_only(q); + ret = -EBUSY; + /* Switch q_usage_counter from per-cpu to atomic mode. */ + blk_freeze_queue_start(q); + /* + * Wait until atomic mode has been reached. Since that + * involves calling call_rcu(), it is guaranteed that later + * blk_queue_enter() calls see the pm-only state. See also + * http://lwn.net/Articles/573497/. + */ + percpu_ref_switch_to_atomic_sync(&q->q_usage_counter); + if (percpu_ref_is_zero(&q->q_usage_counter)) + ret = 0; + /* Switch q_usage_counter back to per-cpu mode. */ + blk_mq_unfreeze_queue(q); + + spin_lock_irq(q->queue_lock); + if (ret < 0) + pm_runtime_mark_last_busy(q->dev); + else + q->rpm_status = RPM_SUSPENDING; + spin_unlock_irq(q->queue_lock); + + if (ret) + blk_clear_pm_only(q); + + return ret; +} +EXPORT_SYMBOL(blk_pre_runtime_suspend); + +/** + * blk_post_runtime_suspend - Post runtime suspend processing + * @q: the queue of the device + * @err: return value of the device's runtime_suspend function + * + * Description: + * Update the queue's runtime status according to the return value of the + * device's runtime suspend function and mark last busy for the device so + * that PM core will try to auto suspend the device at a later time. + * + * This function should be called near the end of the device's + * runtime_suspend callback. + */ +void blk_post_runtime_suspend(struct request_queue *q, int err) +{ + if (!q->dev) + return; + + spin_lock_irq(q->queue_lock); + if (!err) { + q->rpm_status = RPM_SUSPENDED; + } else { + q->rpm_status = RPM_ACTIVE; + pm_runtime_mark_last_busy(q->dev); + } + spin_unlock_irq(q->queue_lock); + + if (err) + blk_clear_pm_only(q); +} +EXPORT_SYMBOL(blk_post_runtime_suspend); + +/** + * blk_pre_runtime_resume - Pre runtime resume processing + * @q: the queue of the device + * + * Description: + * Update the queue's runtime status to RESUMING in preparation for the + * runtime resume of the device. + * + * This function should be called near the start of the device's + * runtime_resume callback. + */ +void blk_pre_runtime_resume(struct request_queue *q) +{ + if (!q->dev) + return; + + spin_lock_irq(q->queue_lock); + q->rpm_status = RPM_RESUMING; + spin_unlock_irq(q->queue_lock); +} +EXPORT_SYMBOL(blk_pre_runtime_resume); + +/** + * blk_post_runtime_resume - Post runtime resume processing + * @q: the queue of the device + * @err: return value of the device's runtime_resume function + * + * Description: + * Update the queue's runtime status according to the return value of the + * device's runtime_resume function. If it is successfully resumed, process + * the requests that are queued into the device's queue when it is resuming + * and then mark last busy and initiate autosuspend for it. + * + * This function should be called near the end of the device's + * runtime_resume callback. + */ +void blk_post_runtime_resume(struct request_queue *q, int err) +{ + if (!q->dev) + return; + + spin_lock_irq(q->queue_lock); + if (!err) { + q->rpm_status = RPM_ACTIVE; + pm_runtime_mark_last_busy(q->dev); + pm_request_autosuspend(q->dev); + } else { + q->rpm_status = RPM_SUSPENDED; + } + spin_unlock_irq(q->queue_lock); + + if (!err) + blk_clear_pm_only(q); +} +EXPORT_SYMBOL(blk_post_runtime_resume); + +/** + * blk_set_runtime_active - Force runtime status of the queue to be active + * @q: the queue of the device + * + * If the device is left runtime suspended during system suspend the resume + * hook typically resumes the device and corrects runtime status + * accordingly. However, that does not affect the queue runtime PM status + * which is still "suspended". This prevents processing requests from the + * queue. + * + * This function can be used in driver's resume hook to correct queue + * runtime PM status and re-enable peeking requests from the queue. It + * should be called before first request is added to the queue. + */ +void blk_set_runtime_active(struct request_queue *q) +{ + spin_lock_irq(q->queue_lock); + q->rpm_status = RPM_ACTIVE; + pm_runtime_mark_last_busy(q->dev); + pm_request_autosuspend(q->dev); + spin_unlock_irq(q->queue_lock); +} +EXPORT_SYMBOL(blk_set_runtime_active); diff --git a/block/blk-pm.h b/block/blk-pm.h new file mode 100644 index 000000000000..a8564ea72a41 --- /dev/null +++ b/block/blk-pm.h @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _BLOCK_BLK_PM_H_ +#define _BLOCK_BLK_PM_H_ + +#include <linux/pm_runtime.h> + +#ifdef CONFIG_PM +static inline void blk_pm_request_resume(struct request_queue *q) +{ + if (q->dev && (q->rpm_status == RPM_SUSPENDED || + q->rpm_status == RPM_SUSPENDING)) + pm_request_resume(q->dev); +} + +static inline void blk_pm_mark_last_busy(struct request *rq) +{ + if (rq->q->dev && !(rq->rq_flags & RQF_PM)) + pm_runtime_mark_last_busy(rq->q->dev); +} + +static inline void blk_pm_requeue_request(struct request *rq) +{ + lockdep_assert_held(rq->q->queue_lock); + + if (rq->q->dev && !(rq->rq_flags & RQF_PM)) + rq->q->nr_pending--; +} + +static inline void blk_pm_add_request(struct request_queue *q, + struct request *rq) +{ + lockdep_assert_held(q->queue_lock); + + if (q->dev && !(rq->rq_flags & RQF_PM)) + q->nr_pending++; +} + +static inline void blk_pm_put_request(struct request *rq) +{ + lockdep_assert_held(rq->q->queue_lock); + + if (rq->q->dev && !(rq->rq_flags & RQF_PM)) + --rq->q->nr_pending; +} +#else +static inline void blk_pm_request_resume(struct request_queue *q) +{ +} + +static inline void blk_pm_mark_last_busy(struct request *rq) +{ +} + +static inline void blk_pm_requeue_request(struct request *rq) +{ +} + +static inline void blk_pm_add_request(struct request_queue *q, + struct request *rq) +{ +} + +static inline void blk_pm_put_request(struct request *rq) +{ +} +#endif + +#endif /* _BLOCK_BLK_PM_H_ */ diff --git a/block/blk-softirq.c b/block/blk-softirq.c index 15c1f5e12eb8..e47a2f751884 100644 --- a/block/blk-softirq.c +++ b/block/blk-softirq.c @@ -97,8 +97,8 @@ static int blk_softirq_cpu_dead(unsigned int cpu) void __blk_complete_request(struct request *req) { - int ccpu, cpu; struct request_queue *q = req->q; + int cpu, ccpu = q->mq_ops ? req->mq_ctx->cpu : req->cpu; unsigned long flags; bool shared = false; @@ -110,8 +110,7 @@ void __blk_complete_request(struct request *req) /* * Select completion CPU */ - if (req->cpu != -1) { - ccpu = req->cpu; + if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) && ccpu != -1) { if (!test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags)) shared = cpus_share_cache(cpu, ccpu); } else diff --git a/block/blk-stat.c b/block/blk-stat.c index 7587b1c3caaf..90561af85a62 100644 --- a/block/blk-stat.c +++ b/block/blk-stat.c @@ -190,6 +190,7 @@ void blk_stat_enable_accounting(struct request_queue *q) blk_queue_flag_set(QUEUE_FLAG_STATS, q); spin_unlock(&q->stats->lock); } +EXPORT_SYMBOL_GPL(blk_stat_enable_accounting); struct blk_queue_stats *blk_alloc_queue_stats(void) { diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 01d0620a4e4a..4bda70e8db48 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -84,8 +84,7 @@ struct throtl_service_queue { * RB tree of active children throtl_grp's, which are sorted by * their ->disptime. */ - struct rb_root pending_tree; /* RB tree of active tgs */ - struct rb_node *first_pending; /* first node in the tree */ + struct rb_root_cached pending_tree; /* RB tree of active tgs */ unsigned int nr_pending; /* # queued in the tree */ unsigned long first_pending_disptime; /* disptime of the first tg */ struct timer_list pending_timer; /* fires on first_pending_disptime */ @@ -475,7 +474,7 @@ static void throtl_service_queue_init(struct throtl_service_queue *sq) { INIT_LIST_HEAD(&sq->queued[0]); INIT_LIST_HEAD(&sq->queued[1]); - sq->pending_tree = RB_ROOT; + sq->pending_tree = RB_ROOT_CACHED; timer_setup(&sq->pending_timer, throtl_pending_timer_fn, 0); } @@ -616,31 +615,23 @@ static void throtl_pd_free(struct blkg_policy_data *pd) static struct throtl_grp * throtl_rb_first(struct throtl_service_queue *parent_sq) { + struct rb_node *n; /* Service tree is empty */ if (!parent_sq->nr_pending) return NULL; - if (!parent_sq->first_pending) - parent_sq->first_pending = rb_first(&parent_sq->pending_tree); - - if (parent_sq->first_pending) - return rb_entry_tg(parent_sq->first_pending); - - return NULL; -} - -static void rb_erase_init(struct rb_node *n, struct rb_root *root) -{ - rb_erase(n, root); - RB_CLEAR_NODE(n); + n = rb_first_cached(&parent_sq->pending_tree); + WARN_ON_ONCE(!n); + if (!n) + return NULL; + return rb_entry_tg(n); } static void throtl_rb_erase(struct rb_node *n, struct throtl_service_queue *parent_sq) { - if (parent_sq->first_pending == n) - parent_sq->first_pending = NULL; - rb_erase_init(n, &parent_sq->pending_tree); + rb_erase_cached(n, &parent_sq->pending_tree); + RB_CLEAR_NODE(n); --parent_sq->nr_pending; } @@ -658,11 +649,11 @@ static void update_min_dispatch_time(struct throtl_service_queue *parent_sq) static void tg_service_queue_add(struct throtl_grp *tg) { struct throtl_service_queue *parent_sq = tg->service_queue.parent_sq; - struct rb_node **node = &parent_sq->pending_tree.rb_node; + struct rb_node **node = &parent_sq->pending_tree.rb_root.rb_node; struct rb_node *parent = NULL; struct throtl_grp *__tg; unsigned long key = tg->disptime; - int left = 1; + bool leftmost = true; while (*node != NULL) { parent = *node; @@ -672,15 +663,13 @@ static void tg_service_queue_add(struct throtl_grp *tg) node = &parent->rb_left; else { node = &parent->rb_right; - left = 0; + leftmost = false; } } - if (left) - parent_sq->first_pending = &tg->rb_node; - rb_link_node(&tg->rb_node, parent, node); - rb_insert_color(&tg->rb_node, &parent_sq->pending_tree); + rb_insert_color_cached(&tg->rb_node, &parent_sq->pending_tree, + leftmost); } static void __throtl_enqueue_tg(struct throtl_grp *tg) @@ -2126,21 +2115,11 @@ static inline void throtl_update_latency_buckets(struct throtl_data *td) } #endif -static void blk_throtl_assoc_bio(struct throtl_grp *tg, struct bio *bio) -{ -#ifdef CONFIG_BLK_DEV_THROTTLING_LOW - /* fallback to root_blkg if we fail to get a blkg ref */ - if (bio->bi_css && (bio_associate_blkg(bio, tg_to_blkg(tg)) == -ENODEV)) - bio_associate_blkg(bio, bio->bi_disk->queue->root_blkg); - bio_issue_init(&bio->bi_issue, bio_sectors(bio)); -#endif -} - bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg, struct bio *bio) { struct throtl_qnode *qn = NULL; - struct throtl_grp *tg = blkg_to_tg(blkg ?: q->root_blkg); + struct throtl_grp *tg = blkg_to_tg(blkg); struct throtl_service_queue *sq; bool rw = bio_data_dir(bio); bool throttled = false; @@ -2159,7 +2138,6 @@ bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg, if (unlikely(blk_queue_bypass(q))) goto out_unlock; - blk_throtl_assoc_bio(tg, bio); blk_throtl_update_idletime(tg); sq = &tg->service_queue; diff --git a/block/blk-wbt.c b/block/blk-wbt.c index 8e20a0677dcf..8ac93fcbaa2e 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c @@ -310,6 +310,7 @@ static void scale_up(struct rq_wb *rwb) rq_depth_scale_up(&rwb->rq_depth); calc_wb_limits(rwb); rwb->unknown_cnt = 0; + rwb_wake_all(rwb); rwb_trace_step(rwb, "scale up"); } @@ -318,7 +319,6 @@ static void scale_down(struct rq_wb *rwb, bool hard_throttle) rq_depth_scale_down(&rwb->rq_depth, hard_throttle); calc_wb_limits(rwb); rwb->unknown_cnt = 0; - rwb_wake_all(rwb); rwb_trace_step(rwb, "scale down"); } diff --git a/block/blk.h b/block/blk.h index 9db4e389582c..3d2aecba96a4 100644 --- a/block/blk.h +++ b/block/blk.h @@ -4,6 +4,7 @@ #include <linux/idr.h> #include <linux/blk-mq.h> +#include <xen/xen.h> #include "blk-mq.h" /* Amount of time in which a process may batch requests */ @@ -124,7 +125,7 @@ static inline void __blk_get_queue(struct request_queue *q) } struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q, - int node, int cmd_size); + int node, int cmd_size, gfp_t flags); void blk_free_flush_queue(struct blk_flush_queue *q); int blk_init_rl(struct request_list *rl, struct request_queue *q, @@ -149,6 +150,41 @@ static inline void blk_queue_enter_live(struct request_queue *q) percpu_ref_get(&q->q_usage_counter); } +static inline bool biovec_phys_mergeable(struct request_queue *q, + struct bio_vec *vec1, struct bio_vec *vec2) +{ + unsigned long mask = queue_segment_boundary(q); + phys_addr_t addr1 = page_to_phys(vec1->bv_page) + vec1->bv_offset; + phys_addr_t addr2 = page_to_phys(vec2->bv_page) + vec2->bv_offset; + + if (addr1 + vec1->bv_len != addr2) + return false; + if (xen_domain() && !xen_biovec_phys_mergeable(vec1, vec2)) + return false; + if ((addr1 | mask) != ((addr2 + vec2->bv_len - 1) | mask)) + return false; + return true; +} + +static inline bool __bvec_gap_to_prev(struct request_queue *q, + struct bio_vec *bprv, unsigned int offset) +{ + return offset || + ((bprv->bv_offset + bprv->bv_len) & queue_virt_boundary(q)); +} + +/* + * Check if adding a bio_vec after bprv with offset would create a gap in + * the SG list. Most drivers don't care about this, but some do. + */ +static inline bool bvec_gap_to_prev(struct request_queue *q, + struct bio_vec *bprv, unsigned int offset) +{ + if (!queue_virt_boundary(q)) + return false; + return __bvec_gap_to_prev(q, bprv, offset); +} + #ifdef CONFIG_BLK_DEV_INTEGRITY void blk_flush_integrity(void); bool __bio_integrity_endio(struct bio *); @@ -158,7 +194,38 @@ static inline bool bio_integrity_endio(struct bio *bio) return __bio_integrity_endio(bio); return true; } -#else + +static inline bool integrity_req_gap_back_merge(struct request *req, + struct bio *next) +{ + struct bio_integrity_payload *bip = bio_integrity(req->bio); + struct bio_integrity_payload *bip_next = bio_integrity(next); + + return bvec_gap_to_prev(req->q, &bip->bip_vec[bip->bip_vcnt - 1], + bip_next->bip_vec[0].bv_offset); +} + +static inline bool integrity_req_gap_front_merge(struct request *req, + struct bio *bio) +{ + struct bio_integrity_payload *bip = bio_integrity(bio); + struct bio_integrity_payload *bip_next = bio_integrity(req->bio); + + return bvec_gap_to_prev(req->q, &bip->bip_vec[bip->bip_vcnt - 1], + bip_next->bip_vec[0].bv_offset); +} +#else /* CONFIG_BLK_DEV_INTEGRITY */ +static inline bool integrity_req_gap_back_merge(struct request *req, + struct bio *next) +{ + return false; +} +static inline bool integrity_req_gap_front_merge(struct request *req, + struct bio *bio) +{ + return false; +} + static inline void blk_flush_integrity(void) { } @@ -166,7 +233,7 @@ static inline bool bio_integrity_endio(struct bio *bio) { return true; } -#endif +#endif /* CONFIG_BLK_DEV_INTEGRITY */ void blk_timeout_work(struct work_struct *work); unsigned long blk_rq_timeout(unsigned long timeout); diff --git a/block/bounce.c b/block/bounce.c index bc63b3a2d18c..ec0d99995f5f 100644 --- a/block/bounce.c +++ b/block/bounce.c @@ -31,6 +31,24 @@ static struct bio_set bounce_bio_set, bounce_bio_split; static mempool_t page_pool, isa_page_pool; +static void init_bounce_bioset(void) +{ + static bool bounce_bs_setup; + int ret; + + if (bounce_bs_setup) + return; + + ret = bioset_init(&bounce_bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS); + BUG_ON(ret); + if (bioset_integrity_create(&bounce_bio_set, BIO_POOL_SIZE)) + BUG_ON(1); + + ret = bioset_init(&bounce_bio_split, BIO_POOL_SIZE, 0, 0); + BUG_ON(ret); + bounce_bs_setup = true; +} + #if defined(CONFIG_HIGHMEM) static __init int init_emergency_pool(void) { @@ -44,14 +62,7 @@ static __init int init_emergency_pool(void) BUG_ON(ret); pr_info("pool size: %d pages\n", POOL_SIZE); - ret = bioset_init(&bounce_bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS); - BUG_ON(ret); - if (bioset_integrity_create(&bounce_bio_set, BIO_POOL_SIZE)) - BUG_ON(1); - - ret = bioset_init(&bounce_bio_split, BIO_POOL_SIZE, 0, 0); - BUG_ON(ret); - + init_bounce_bioset(); return 0; } @@ -86,6 +97,8 @@ static void *mempool_alloc_pages_isa(gfp_t gfp_mask, void *data) return mempool_alloc_pages(gfp_mask | GFP_DMA, data); } +static DEFINE_MUTEX(isa_mutex); + /* * gets called "every" time someone init's a queue with BLK_BOUNCE_ISA * as the max address, so check if the pool has already been created. @@ -94,14 +107,20 @@ int init_emergency_isa_pool(void) { int ret; - if (mempool_initialized(&isa_page_pool)) + mutex_lock(&isa_mutex); + + if (mempool_initialized(&isa_page_pool)) { + mutex_unlock(&isa_mutex); return 0; + } ret = mempool_init(&isa_page_pool, ISA_POOL_SIZE, mempool_alloc_pages_isa, mempool_free_pages, (void *) 0); BUG_ON(ret); pr_info("isa pool size: %d pages\n", ISA_POOL_SIZE); + init_bounce_bioset(); + mutex_unlock(&isa_mutex); return 0; } @@ -257,7 +276,9 @@ static struct bio *bounce_clone_bio(struct bio *bio_src, gfp_t gfp_mask, } } - bio_clone_blkcg_association(bio, bio_src); + bio_clone_blkg_association(bio, bio_src); + + blkcg_bio_issue_init(bio); return bio; } diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 2eb87444b157..6a3d87dd3c1a 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -1644,14 +1644,20 @@ static void cfq_pd_offline(struct blkg_policy_data *pd) int i; for (i = 0; i < IOPRIO_BE_NR; i++) { - if (cfqg->async_cfqq[0][i]) + if (cfqg->async_cfqq[0][i]) { cfq_put_queue(cfqg->async_cfqq[0][i]); - if (cfqg->async_cfqq[1][i]) + cfqg->async_cfqq[0][i] = NULL; + } + if (cfqg->async_cfqq[1][i]) { cfq_put_queue(cfqg->async_cfqq[1][i]); + cfqg->async_cfqq[1][i] = NULL; + } } - if (cfqg->async_idle_cfqq) + if (cfqg->async_idle_cfqq) { cfq_put_queue(cfqg->async_idle_cfqq); + cfqg->async_idle_cfqq = NULL; + } /* * @blkg is going offline and will be ignored by @@ -3753,7 +3759,7 @@ static void check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio) uint64_t serial_nr; rcu_read_lock(); - serial_nr = bio_blkcg(bio)->css.serial_nr; + serial_nr = __bio_blkcg(bio)->css.serial_nr; rcu_read_unlock(); /* @@ -3818,7 +3824,7 @@ cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct cfq_io_cq *cic, struct cfq_group *cfqg; rcu_read_lock(); - cfqg = cfq_lookup_cfqg(cfqd, bio_blkcg(bio)); + cfqg = cfq_lookup_cfqg(cfqd, __bio_blkcg(bio)); if (!cfqg) { cfqq = &cfqd->oom_cfqq; goto out; diff --git a/block/elevator.c b/block/elevator.c index fae58b2f906f..8fdcd64ae12e 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -41,6 +41,7 @@ #include "blk.h" #include "blk-mq-sched.h" +#include "blk-pm.h" #include "blk-wbt.h" static DEFINE_SPINLOCK(elv_list_lock); @@ -557,27 +558,6 @@ void elv_bio_merged(struct request_queue *q, struct request *rq, e->type->ops.sq.elevator_bio_merged_fn(q, rq, bio); } -#ifdef CONFIG_PM -static void blk_pm_requeue_request(struct request *rq) -{ - if (rq->q->dev && !(rq->rq_flags & RQF_PM)) - rq->q->nr_pending--; -} - -static void blk_pm_add_request(struct request_queue *q, struct request *rq) -{ - if (q->dev && !(rq->rq_flags & RQF_PM) && q->nr_pending++ == 0 && - (q->rpm_status == RPM_SUSPENDED || q->rpm_status == RPM_SUSPENDING)) - pm_request_resume(q->dev); -} -#else -static inline void blk_pm_requeue_request(struct request *rq) {} -static inline void blk_pm_add_request(struct request_queue *q, - struct request *rq) -{ -} -#endif - void elv_requeue_request(struct request_queue *q, struct request *rq) { /* diff --git a/block/genhd.c b/block/genhd.c index be5bab20b2ab..cff6bdf27226 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -567,7 +567,8 @@ static int exact_lock(dev_t devt, void *data) return 0; } -static void register_disk(struct device *parent, struct gendisk *disk) +static void register_disk(struct device *parent, struct gendisk *disk, + const struct attribute_group **groups) { struct device *ddev = disk_to_dev(disk); struct block_device *bdev; @@ -582,6 +583,10 @@ static void register_disk(struct device *parent, struct gendisk *disk) /* delay uevents, until we scanned partition table */ dev_set_uevent_suppress(ddev, 1); + if (groups) { + WARN_ON(ddev->groups); + ddev->groups = groups; + } if (device_add(ddev)) return; if (!sysfs_deprecated) { @@ -647,6 +652,7 @@ exit: * __device_add_disk - add disk information to kernel list * @parent: parent device for the disk * @disk: per-device partitioning information + * @groups: Additional per-device sysfs groups * @register_queue: register the queue if set to true * * This function registers the partitioning information in @disk @@ -655,6 +661,7 @@ exit: * FIXME: error handling */ static void __device_add_disk(struct device *parent, struct gendisk *disk, + const struct attribute_group **groups, bool register_queue) { dev_t devt; @@ -698,7 +705,7 @@ static void __device_add_disk(struct device *parent, struct gendisk *disk, blk_register_region(disk_devt(disk), disk->minors, NULL, exact_match, exact_lock, disk); } - register_disk(parent, disk); + register_disk(parent, disk, groups); if (register_queue) blk_register_queue(disk); @@ -712,15 +719,17 @@ static void __device_add_disk(struct device *parent, struct gendisk *disk, blk_integrity_add(disk); } -void device_add_disk(struct device *parent, struct gendisk *disk) +void device_add_disk(struct device *parent, struct gendisk *disk, + const struct attribute_group **groups) + { - __device_add_disk(parent, disk, true); + __device_add_disk(parent, disk, groups, true); } EXPORT_SYMBOL(device_add_disk); void device_add_disk_no_queue_reg(struct device *parent, struct gendisk *disk) { - __device_add_disk(parent, disk, false); + __device_add_disk(parent, disk, NULL, false); } EXPORT_SYMBOL(device_add_disk_no_queue_reg); diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c index a1660bafc912..eccac01a10b6 100644 --- a/block/kyber-iosched.c +++ b/block/kyber-iosched.c @@ -29,19 +29,30 @@ #include "blk-mq-debugfs.h" #include "blk-mq-sched.h" #include "blk-mq-tag.h" -#include "blk-stat.h" -/* Scheduling domains. */ +#define CREATE_TRACE_POINTS +#include <trace/events/kyber.h> + +/* + * Scheduling domains: the device is divided into multiple domains based on the + * request type. + */ enum { KYBER_READ, - KYBER_SYNC_WRITE, - KYBER_OTHER, /* Async writes, discard, etc. */ + KYBER_WRITE, + KYBER_DISCARD, + KYBER_OTHER, KYBER_NUM_DOMAINS, }; -enum { - KYBER_MIN_DEPTH = 256, +static const char *kyber_domain_names[] = { + [KYBER_READ] = "READ", + [KYBER_WRITE] = "WRITE", + [KYBER_DISCARD] = "DISCARD", + [KYBER_OTHER] = "OTHER", +}; +enum { /* * In order to prevent starvation of synchronous requests by a flood of * asynchronous requests, we reserve 25% of requests for synchronous @@ -51,25 +62,87 @@ enum { }; /* - * Initial device-wide depths for each scheduling domain. + * Maximum device-wide depth for each scheduling domain. * - * Even for fast devices with lots of tags like NVMe, you can saturate - * the device with only a fraction of the maximum possible queue depth. - * So, we cap these to a reasonable value. + * Even for fast devices with lots of tags like NVMe, you can saturate the + * device with only a fraction of the maximum possible queue depth. So, we cap + * these to a reasonable value. */ static const unsigned int kyber_depth[] = { [KYBER_READ] = 256, - [KYBER_SYNC_WRITE] = 128, - [KYBER_OTHER] = 64, + [KYBER_WRITE] = 128, + [KYBER_DISCARD] = 64, + [KYBER_OTHER] = 16, }; /* - * Scheduling domain batch sizes. We favor reads. + * Default latency targets for each scheduling domain. + */ +static const u64 kyber_latency_targets[] = { + [KYBER_READ] = 2ULL * NSEC_PER_MSEC, + [KYBER_WRITE] = 10ULL * NSEC_PER_MSEC, + [KYBER_DISCARD] = 5ULL * NSEC_PER_SEC, +}; + +/* + * Batch size (number of requests we'll dispatch in a row) for each scheduling + * domain. */ static const unsigned int kyber_batch_size[] = { [KYBER_READ] = 16, - [KYBER_SYNC_WRITE] = 8, - [KYBER_OTHER] = 8, + [KYBER_WRITE] = 8, + [KYBER_DISCARD] = 1, + [KYBER_OTHER] = 1, +}; + +/* + * Requests latencies are recorded in a histogram with buckets defined relative + * to the target latency: + * + * <= 1/4 * target latency + * <= 1/2 * target latency + * <= 3/4 * target latency + * <= target latency + * <= 1 1/4 * target latency + * <= 1 1/2 * target latency + * <= 1 3/4 * target latency + * > 1 3/4 * target latency + */ +enum { + /* + * The width of the latency histogram buckets is + * 1 / (1 << KYBER_LATENCY_SHIFT) * target latency. + */ + KYBER_LATENCY_SHIFT = 2, + /* + * The first (1 << KYBER_LATENCY_SHIFT) buckets are <= target latency, + * thus, "good". + */ + KYBER_GOOD_BUCKETS = 1 << KYBER_LATENCY_SHIFT, + /* There are also (1 << KYBER_LATENCY_SHIFT) "bad" buckets. */ + KYBER_LATENCY_BUCKETS = 2 << KYBER_LATENCY_SHIFT, +}; + +/* + * We measure both the total latency and the I/O latency (i.e., latency after + * submitting to the device). + */ +enum { + KYBER_TOTAL_LATENCY, + KYBER_IO_LATENCY, +}; + +static const char *kyber_latency_type_names[] = { + [KYBER_TOTAL_LATENCY] = "total", + [KYBER_IO_LATENCY] = "I/O", +}; + +/* + * Per-cpu latency histograms: total latency and I/O latency for each scheduling + * domain except for KYBER_OTHER. + */ +struct kyber_cpu_latency { + atomic_t buckets[KYBER_OTHER][2][KYBER_LATENCY_BUCKETS]; }; /* @@ -88,12 +161,9 @@ struct kyber_ctx_queue { struct kyber_queue_data { struct request_queue *q; - struct blk_stat_callback *cb; - /* - * The device is divided into multiple scheduling domains based on the - * request type. Each domain has a fixed number of in-flight requests of - * that type device-wide, limited by these tokens. + * Each scheduling domain has a limited number of in-flight requests + * device-wide, limited by these tokens. */ struct sbitmap_queue domain_tokens[KYBER_NUM_DOMAINS]; @@ -103,8 +173,19 @@ struct kyber_queue_data { */ unsigned int async_depth; + struct kyber_cpu_latency __percpu *cpu_latency; + + /* Timer for stats aggregation and adjusting domain tokens. */ + struct timer_list timer; + + unsigned int latency_buckets[KYBER_OTHER][2][KYBER_LATENCY_BUCKETS]; + + unsigned long latency_timeout[KYBER_OTHER]; + + int domain_p99[KYBER_OTHER]; + /* Target latencies in nanoseconds. */ - u64 read_lat_nsec, write_lat_nsec; + u64 latency_targets[KYBER_OTHER]; }; struct kyber_hctx_data { @@ -124,233 +205,219 @@ static int kyber_domain_wake(wait_queue_entry_t *wait, unsigned mode, int flags, static unsigned int kyber_sched_domain(unsigned int op) { - if ((op & REQ_OP_MASK) == REQ_OP_READ) + switch (op & REQ_OP_MASK) { + case REQ_OP_READ: return KYBER_READ; - else if ((op & REQ_OP_MASK) == REQ_OP_WRITE && op_is_sync(op)) - return KYBER_SYNC_WRITE; - else + case REQ_OP_WRITE: + return KYBER_WRITE; + case REQ_OP_DISCARD: + return KYBER_DISCARD; + default: return KYBER_OTHER; + } } -enum { - NONE = 0, - GOOD = 1, - GREAT = 2, - BAD = -1, - AWFUL = -2, -}; - -#define IS_GOOD(status) ((status) > 0) -#define IS_BAD(status) ((status) < 0) - -static int kyber_lat_status(struct blk_stat_callback *cb, - unsigned int sched_domain, u64 target) +static void flush_latency_buckets(struct kyber_queue_data *kqd, + struct kyber_cpu_latency *cpu_latency, + unsigned int sched_domain, unsigned int type) { - u64 latency; - - if (!cb->stat[sched_domain].nr_samples) - return NONE; + unsigned int *buckets = kqd->latency_buckets[sched_domain][type]; + atomic_t *cpu_buckets = cpu_latency->buckets[sched_domain][type]; + unsigned int bucket; - latency = cb->stat[sched_domain].mean; - if (latency >= 2 * target) - return AWFUL; - else if (latency > target) - return BAD; - else if (latency <= target / 2) - return GREAT; - else /* (latency <= target) */ - return GOOD; + for (bucket = 0; bucket < KYBER_LATENCY_BUCKETS; bucket++) + buckets[bucket] += atomic_xchg(&cpu_buckets[bucket], 0); } /* - * Adjust the read or synchronous write depth given the status of reads and - * writes. The goal is that the latencies of the two domains are fair (i.e., if - * one is good, then the other is good). + * Calculate the histogram bucket with the given percentile rank, or -1 if there + * aren't enough samples yet. */ -static void kyber_adjust_rw_depth(struct kyber_queue_data *kqd, - unsigned int sched_domain, int this_status, - int other_status) +static int calculate_percentile(struct kyber_queue_data *kqd, + unsigned int sched_domain, unsigned int type, + unsigned int percentile) { - unsigned int orig_depth, depth; + unsigned int *buckets = kqd->latency_buckets[sched_domain][type]; + unsigned int bucket, samples = 0, percentile_samples; + + for (bucket = 0; bucket < KYBER_LATENCY_BUCKETS; bucket++) + samples += buckets[bucket]; + + if (!samples) + return -1; /* - * If this domain had no samples, or reads and writes are both good or - * both bad, don't adjust the depth. + * We do the calculation once we have 500 samples or one second passes + * since the first sample was recorded, whichever comes first. */ - if (this_status == NONE || - (IS_GOOD(this_status) && IS_GOOD(other_status)) || - (IS_BAD(this_status) && IS_BAD(other_status))) - return; - - orig_depth = depth = kqd->domain_tokens[sched_domain].sb.depth; + if (!kqd->latency_timeout[sched_domain]) + kqd->latency_timeout[sched_domain] = max(jiffies + HZ, 1UL); + if (samples < 500 && + time_is_after_jiffies(kqd->latency_timeout[sched_domain])) { + return -1; + } + kqd->latency_timeout[sched_domain] = 0; - if (other_status == NONE) { - depth++; - } else { - switch (this_status) { - case GOOD: - if (other_status == AWFUL) - depth -= max(depth / 4, 1U); - else - depth -= max(depth / 8, 1U); - break; - case GREAT: - if (other_status == AWFUL) - depth /= 2; - else - depth -= max(depth / 4, 1U); + percentile_samples = DIV_ROUND_UP(samples * percentile, 100); + for (bucket = 0; bucket < KYBER_LATENCY_BUCKETS - 1; bucket++) { + if (buckets[bucket] >= percentile_samples) break; - case BAD: - depth++; - break; - case AWFUL: - if (other_status == GREAT) - depth += 2; - else - depth++; - break; - } + percentile_samples -= buckets[bucket]; } + memset(buckets, 0, sizeof(kqd->latency_buckets[sched_domain][type])); - depth = clamp(depth, 1U, kyber_depth[sched_domain]); - if (depth != orig_depth) - sbitmap_queue_resize(&kqd->domain_tokens[sched_domain], depth); + trace_kyber_latency(kqd->q, kyber_domain_names[sched_domain], + kyber_latency_type_names[type], percentile, + bucket + 1, 1 << KYBER_LATENCY_SHIFT, samples); + + return bucket; } -/* - * Adjust the depth of other requests given the status of reads and synchronous - * writes. As long as either domain is doing fine, we don't throttle, but if - * both domains are doing badly, we throttle heavily. - */ -static void kyber_adjust_other_depth(struct kyber_queue_data *kqd, - int read_status, int write_status, - bool have_samples) -{ - unsigned int orig_depth, depth; - int status; - - orig_depth = depth = kqd->domain_tokens[KYBER_OTHER].sb.depth; - - if (read_status == NONE && write_status == NONE) { - depth += 2; - } else if (have_samples) { - if (read_status == NONE) - status = write_status; - else if (write_status == NONE) - status = read_status; - else - status = max(read_status, write_status); - switch (status) { - case GREAT: - depth += 2; - break; - case GOOD: - depth++; - break; - case BAD: - depth -= max(depth / 4, 1U); - break; - case AWFUL: - depth /= 2; - break; - } +static void kyber_resize_domain(struct kyber_queue_data *kqd, + unsigned int sched_domain, unsigned int depth) +{ + depth = clamp(depth, 1U, kyber_depth[sched_domain]); + if (depth != kqd->domain_tokens[sched_domain].sb.depth) { + sbitmap_queue_resize(&kqd->domain_tokens[sched_domain], depth); + trace_kyber_adjust(kqd->q, kyber_domain_names[sched_domain], + depth); } - - depth = clamp(depth, 1U, kyber_depth[KYBER_OTHER]); - if (depth != orig_depth) - sbitmap_queue_resize(&kqd->domain_tokens[KYBER_OTHER], depth); } -/* - * Apply heuristics for limiting queue depths based on gathered latency - * statistics. - */ -static void kyber_stat_timer_fn(struct blk_stat_callback *cb) +static void kyber_timer_fn(struct timer_list *t) { - struct kyber_queue_data *kqd = cb->data; - int read_status, write_status; + struct kyber_queue_data *kqd = from_timer(kqd, t, timer); + unsigned int sched_domain; + int cpu; + bool bad = false; + + /* Sum all of the per-cpu latency histograms. */ + for_each_online_cpu(cpu) { + struct kyber_cpu_latency *cpu_latency; + + cpu_latency = per_cpu_ptr(kqd->cpu_latency, cpu); + for (sched_domain = 0; sched_domain < KYBER_OTHER; sched_domain++) { + flush_latency_buckets(kqd, cpu_latency, sched_domain, + KYBER_TOTAL_LATENCY); + flush_latency_buckets(kqd, cpu_latency, sched_domain, + KYBER_IO_LATENCY); + } + } - read_status = kyber_lat_status(cb, KYBER_READ, kqd->read_lat_nsec); - write_status = kyber_lat_status(cb, KYBER_SYNC_WRITE, kqd->write_lat_nsec); + /* + * Check if any domains have a high I/O latency, which might indicate + * congestion in the device. Note that we use the p90; we don't want to + * be too sensitive to outliers here. + */ + for (sched_domain = 0; sched_domain < KYBER_OTHER; sched_domain++) { + int p90; - kyber_adjust_rw_depth(kqd, KYBER_READ, read_status, write_status); - kyber_adjust_rw_depth(kqd, KYBER_SYNC_WRITE, write_status, read_status); - kyber_adjust_other_depth(kqd, read_status, write_status, - cb->stat[KYBER_OTHER].nr_samples != 0); + p90 = calculate_percentile(kqd, sched_domain, KYBER_IO_LATENCY, + 90); + if (p90 >= KYBER_GOOD_BUCKETS) + bad = true; + } /* - * Continue monitoring latencies if we aren't hitting the targets or - * we're still throttling other requests. + * Adjust the scheduling domain depths. If we determined that there was + * congestion, we throttle all domains with good latencies. Either way, + * we ease up on throttling domains with bad latencies. */ - if (!blk_stat_is_active(kqd->cb) && - ((IS_BAD(read_status) || IS_BAD(write_status) || - kqd->domain_tokens[KYBER_OTHER].sb.depth < kyber_depth[KYBER_OTHER]))) - blk_stat_activate_msecs(kqd->cb, 100); + for (sched_domain = 0; sched_domain < KYBER_OTHER; sched_domain++) { + unsigned int orig_depth, depth; + int p99; + + p99 = calculate_percentile(kqd, sched_domain, + KYBER_TOTAL_LATENCY, 99); + /* + * This is kind of subtle: different domains will not + * necessarily have enough samples to calculate the latency + * percentiles during the same window, so we have to remember + * the p99 for the next time we observe congestion; once we do, + * we don't want to throttle again until we get more data, so we + * reset it to -1. + */ + if (bad) { + if (p99 < 0) + p99 = kqd->domain_p99[sched_domain]; + kqd->domain_p99[sched_domain] = -1; + } else if (p99 >= 0) { + kqd->domain_p99[sched_domain] = p99; + } + if (p99 < 0) + continue; + + /* + * If this domain has bad latency, throttle less. Otherwise, + * throttle more iff we determined that there is congestion. + * + * The new depth is scaled linearly with the p99 latency vs the + * latency target. E.g., if the p99 is 3/4 of the target, then + * we throttle down to 3/4 of the current depth, and if the p99 + * is 2x the target, then we double the depth. + */ + if (bad || p99 >= KYBER_GOOD_BUCKETS) { + orig_depth = kqd->domain_tokens[sched_domain].sb.depth; + depth = (orig_depth * (p99 + 1)) >> KYBER_LATENCY_SHIFT; + kyber_resize_domain(kqd, sched_domain, depth); + } + } } -static unsigned int kyber_sched_tags_shift(struct kyber_queue_data *kqd) +static unsigned int kyber_sched_tags_shift(struct request_queue *q) { /* * All of the hardware queues have the same depth, so we can just grab * the shift of the first one. */ - return kqd->q->queue_hw_ctx[0]->sched_tags->bitmap_tags.sb.shift; -} - -static int kyber_bucket_fn(const struct request *rq) -{ - return kyber_sched_domain(rq->cmd_flags); + return q->queue_hw_ctx[0]->sched_tags->bitmap_tags.sb.shift; } static struct kyber_queue_data *kyber_queue_data_alloc(struct request_queue *q) { struct kyber_queue_data *kqd; - unsigned int max_tokens; unsigned int shift; int ret = -ENOMEM; int i; - kqd = kmalloc_node(sizeof(*kqd), GFP_KERNEL, q->node); + kqd = kzalloc_node(sizeof(*kqd), GFP_KERNEL, q->node); if (!kqd) goto err; + kqd->q = q; - kqd->cb = blk_stat_alloc_callback(kyber_stat_timer_fn, kyber_bucket_fn, - KYBER_NUM_DOMAINS, kqd); - if (!kqd->cb) + kqd->cpu_latency = alloc_percpu_gfp(struct kyber_cpu_latency, + GFP_KERNEL | __GFP_ZERO); + if (!kqd->cpu_latency) goto err_kqd; - /* - * The maximum number of tokens for any scheduling domain is at least - * the queue depth of a single hardware queue. If the hardware doesn't - * have many tags, still provide a reasonable number. - */ - max_tokens = max_t(unsigned int, q->tag_set->queue_depth, - KYBER_MIN_DEPTH); + timer_setup(&kqd->timer, kyber_timer_fn, 0); + for (i = 0; i < KYBER_NUM_DOMAINS; i++) { WARN_ON(!kyber_depth[i]); WARN_ON(!kyber_batch_size[i]); ret = sbitmap_queue_init_node(&kqd->domain_tokens[i], - max_tokens, -1, false, GFP_KERNEL, - q->node); + kyber_depth[i], -1, false, + GFP_KERNEL, q->node); if (ret) { while (--i >= 0) sbitmap_queue_free(&kqd->domain_tokens[i]); - goto err_cb; + goto err_buckets; } - sbitmap_queue_resize(&kqd->domain_tokens[i], kyber_depth[i]); } - shift = kyber_sched_tags_shift(kqd); - kqd->async_depth = (1U << shift) * KYBER_ASYNC_PERCENT / 100U; + for (i = 0; i < KYBER_OTHER; i++) { + kqd->domain_p99[i] = -1; + kqd->latency_targets[i] = kyber_latency_targets[i]; + } - kqd->read_lat_nsec = 2000000ULL; - kqd->write_lat_nsec = 10000000ULL; + shift = kyber_sched_tags_shift(q); + kqd->async_depth = (1U << shift) * KYBER_ASYNC_PERCENT / 100U; return kqd; -err_cb: - blk_stat_free_callback(kqd->cb); +err_buckets: + free_percpu(kqd->cpu_latency); err_kqd: kfree(kqd); err: @@ -372,25 +439,24 @@ static int kyber_init_sched(struct request_queue *q, struct elevator_type *e) return PTR_ERR(kqd); } + blk_stat_enable_accounting(q); + eq->elevator_data = kqd; q->elevator = eq; - blk_stat_add_callback(q, kqd->cb); - return 0; } static void kyber_exit_sched(struct elevator_queue *e) { struct kyber_queue_data *kqd = e->elevator_data; - struct request_queue *q = kqd->q; int i; - blk_stat_remove_callback(q, kqd->cb); + del_timer_sync(&kqd->timer); for (i = 0; i < KYBER_NUM_DOMAINS; i++) sbitmap_queue_free(&kqd->domain_tokens[i]); - blk_stat_free_callback(kqd->cb); + free_percpu(kqd->cpu_latency); kfree(kqd); } @@ -558,41 +624,44 @@ static void kyber_finish_request(struct request *rq) rq_clear_domain_token(kqd, rq); } -static void kyber_completed_request(struct request *rq) +static void add_latency_sample(struct kyber_cpu_latency *cpu_latency, + unsigned int sched_domain, unsigned int type, + u64 target, u64 latency) { - struct request_queue *q = rq->q; - struct kyber_queue_data *kqd = q->elevator->elevator_data; - unsigned int sched_domain; - u64 now, latency, target; + unsigned int bucket; + u64 divisor; - /* - * Check if this request met our latency goal. If not, quickly gather - * some statistics and start throttling. - */ - sched_domain = kyber_sched_domain(rq->cmd_flags); - switch (sched_domain) { - case KYBER_READ: - target = kqd->read_lat_nsec; - break; - case KYBER_SYNC_WRITE: - target = kqd->write_lat_nsec; - break; - default: - return; + if (latency > 0) { + divisor = max_t(u64, target >> KYBER_LATENCY_SHIFT, 1); + bucket = min_t(unsigned int, div64_u64(latency - 1, divisor), + KYBER_LATENCY_BUCKETS - 1); + } else { + bucket = 0; } - /* If we are already monitoring latencies, don't check again. */ - if (blk_stat_is_active(kqd->cb)) - return; + atomic_inc(&cpu_latency->buckets[sched_domain][type][bucket]); +} - now = ktime_get_ns(); - if (now < rq->io_start_time_ns) +static void kyber_completed_request(struct request *rq, u64 now) +{ + struct kyber_queue_data *kqd = rq->q->elevator->elevator_data; + struct kyber_cpu_latency *cpu_latency; + unsigned int sched_domain; + u64 target; + + sched_domain = kyber_sched_domain(rq->cmd_flags); + if (sched_domain == KYBER_OTHER) return; - latency = now - rq->io_start_time_ns; + cpu_latency = get_cpu_ptr(kqd->cpu_latency); + target = kqd->latency_targets[sched_domain]; + add_latency_sample(cpu_latency, sched_domain, KYBER_TOTAL_LATENCY, + target, now - rq->start_time_ns); + add_latency_sample(cpu_latency, sched_domain, KYBER_IO_LATENCY, target, + now - rq->io_start_time_ns); + put_cpu_ptr(kqd->cpu_latency); - if (latency > target) - blk_stat_activate_msecs(kqd->cb, 10); + timer_reduce(&kqd->timer, jiffies + HZ / 10); } struct flush_kcq_data { @@ -713,6 +782,9 @@ kyber_dispatch_cur_domain(struct kyber_queue_data *kqd, rq_set_domain_token(rq, nr); list_del_init(&rq->queuelist); return rq; + } else { + trace_kyber_throttled(kqd->q, + kyber_domain_names[khd->cur_domain]); } } else if (sbitmap_any_bit_set(&khd->kcq_map[khd->cur_domain])) { nr = kyber_get_domain_token(kqd, khd, hctx); @@ -723,6 +795,9 @@ kyber_dispatch_cur_domain(struct kyber_queue_data *kqd, rq_set_domain_token(rq, nr); list_del_init(&rq->queuelist); return rq; + } else { + trace_kyber_throttled(kqd->q, + kyber_domain_names[khd->cur_domain]); } } @@ -790,17 +865,17 @@ static bool kyber_has_work(struct blk_mq_hw_ctx *hctx) return false; } -#define KYBER_LAT_SHOW_STORE(op) \ -static ssize_t kyber_##op##_lat_show(struct elevator_queue *e, \ - char *page) \ +#define KYBER_LAT_SHOW_STORE(domain, name) \ +static ssize_t kyber_##name##_lat_show(struct elevator_queue *e, \ + char *page) \ { \ struct kyber_queue_data *kqd = e->elevator_data; \ \ - return sprintf(page, "%llu\n", kqd->op##_lat_nsec); \ + return sprintf(page, "%llu\n", kqd->latency_targets[domain]); \ } \ \ -static ssize_t kyber_##op##_lat_store(struct elevator_queue *e, \ - const char *page, size_t count) \ +static ssize_t kyber_##name##_lat_store(struct elevator_queue *e, \ + const char *page, size_t count) \ { \ struct kyber_queue_data *kqd = e->elevator_data; \ unsigned long long nsec; \ @@ -810,12 +885,12 @@ static ssize_t kyber_##op##_lat_store(struct elevator_queue *e, \ if (ret) \ return ret; \ \ - kqd->op##_lat_nsec = nsec; \ + kqd->latency_targets[domain] = nsec; \ \ return count; \ } -KYBER_LAT_SHOW_STORE(read); -KYBER_LAT_SHOW_STORE(write); +KYBER_LAT_SHOW_STORE(KYBER_READ, read); +KYBER_LAT_SHOW_STORE(KYBER_WRITE, write); #undef KYBER_LAT_SHOW_STORE #define KYBER_LAT_ATTR(op) __ATTR(op##_lat_nsec, 0644, kyber_##op##_lat_show, kyber_##op##_lat_store) @@ -882,7 +957,8 @@ static int kyber_##name##_waiting_show(void *data, struct seq_file *m) \ return 0; \ } KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_READ, read) -KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_SYNC_WRITE, sync_write) +KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_WRITE, write) +KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_DISCARD, discard) KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_OTHER, other) #undef KYBER_DEBUGFS_DOMAIN_ATTRS @@ -900,20 +976,7 @@ static int kyber_cur_domain_show(void *data, struct seq_file *m) struct blk_mq_hw_ctx *hctx = data; struct kyber_hctx_data *khd = hctx->sched_data; - switch (khd->cur_domain) { - case KYBER_READ: - seq_puts(m, "READ\n"); - break; - case KYBER_SYNC_WRITE: - seq_puts(m, "SYNC_WRITE\n"); - break; - case KYBER_OTHER: - seq_puts(m, "OTHER\n"); - break; - default: - seq_printf(m, "%u\n", khd->cur_domain); - break; - } + seq_printf(m, "%s\n", kyber_domain_names[khd->cur_domain]); return 0; } @@ -930,7 +993,8 @@ static int kyber_batching_show(void *data, struct seq_file *m) {#name "_tokens", 0400, kyber_##name##_tokens_show} static const struct blk_mq_debugfs_attr kyber_queue_debugfs_attrs[] = { KYBER_QUEUE_DOMAIN_ATTRS(read), - KYBER_QUEUE_DOMAIN_ATTRS(sync_write), + KYBER_QUEUE_DOMAIN_ATTRS(write), + KYBER_QUEUE_DOMAIN_ATTRS(discard), KYBER_QUEUE_DOMAIN_ATTRS(other), {"async_depth", 0400, kyber_async_depth_show}, {}, @@ -942,7 +1006,8 @@ static const struct blk_mq_debugfs_attr kyber_queue_debugfs_attrs[] = { {#name "_waiting", 0400, kyber_##name##_waiting_show} static const struct blk_mq_debugfs_attr kyber_hctx_debugfs_attrs[] = { KYBER_HCTX_DOMAIN_ATTRS(read), - KYBER_HCTX_DOMAIN_ATTRS(sync_write), + KYBER_HCTX_DOMAIN_ATTRS(write), + KYBER_HCTX_DOMAIN_ATTRS(discard), KYBER_HCTX_DOMAIN_ATTRS(other), {"cur_domain", 0400, kyber_cur_domain_show}, {"batching", 0400, kyber_batching_show}, diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index 08f26db2da7e..2a361e22d38d 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -1428,7 +1428,7 @@ static int __init iort_add_platform_device(struct acpi_iort_node *node, return 0; dma_deconfigure: - acpi_dma_deconfigure(&pdev->dev); + arch_teardown_dma_ops(&pdev->dev); dev_put: platform_device_put(pdev); diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index e1b6231cfa1c..56676a56b3e3 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -1469,16 +1469,6 @@ int acpi_dma_configure(struct device *dev, enum dev_dma_attr attr) } EXPORT_SYMBOL_GPL(acpi_dma_configure); -/** - * acpi_dma_deconfigure - Tear-down DMA configuration for the device. - * @dev: The pointer to the device - */ -void acpi_dma_deconfigure(struct device *dev) -{ - arch_teardown_dma_ops(dev); -} -EXPORT_SYMBOL_GPL(acpi_dma_deconfigure); - static void acpi_init_coherency(struct acpi_device *adev) { unsigned long long cca = 0; diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index 39b181d6bd0d..4ca7a6b4eaae 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -33,7 +33,6 @@ if ATA config ATA_NONSTANDARD bool - default n config ATA_VERBOSE_ERROR bool "Verbose ATA error reporting" @@ -62,7 +61,6 @@ config ATA_ACPI config SATA_ZPODD bool "SATA Zero Power Optical Disc Drive (ZPODD) support" depends on ATA_ACPI && PM - default n help This option adds support for SATA Zero Power Optical Disc Drive (ZPODD). It requires both the ODD and the platform @@ -121,7 +119,8 @@ config SATA_AHCI_PLATFORM config AHCI_BRCM tristate "Broadcom AHCI SATA support" - depends on ARCH_BRCMSTB || BMIPS_GENERIC || ARCH_BCM_NSP + depends on ARCH_BRCMSTB || BMIPS_GENERIC || ARCH_BCM_NSP || \ + ARCH_BCM_63XX help This option enables support for the AHCI SATA3 controller found on Broadcom SoC's. diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h index 6a1515f0da40..ef356e70e6de 100644 --- a/drivers/ata/ahci.h +++ b/drivers/ata/ahci.h @@ -352,6 +352,8 @@ struct ahci_host_priv { struct clk *clks[AHCI_MAX_CLKS]; /* Optional */ struct reset_control *rsts; /* Optional */ struct regulator **target_pwrs; /* Optional */ + struct regulator *ahci_regulator;/* Optional */ + struct regulator *phy_regulator;/* Optional */ /* * If platform uses PHYs. There is a 1:1 relation between the port number and * the PHY position in this array. diff --git a/drivers/ata/ahci_brcm.c b/drivers/ata/ahci_brcm.c index f3d557777d82..fba5a3044c8a 100644 --- a/drivers/ata/ahci_brcm.c +++ b/drivers/ata/ahci_brcm.c @@ -25,6 +25,7 @@ #include <linux/module.h> #include <linux/of.h> #include <linux/platform_device.h> +#include <linux/reset.h> #include <linux/string.h> #include "ahci.h" @@ -94,6 +95,7 @@ struct brcm_ahci_priv { u32 port_mask; u32 quirks; enum brcm_ahci_version version; + struct reset_control *rcdev; }; static inline u32 brcm_sata_readreg(void __iomem *addr) @@ -381,6 +383,7 @@ static struct scsi_host_template ahci_platform_sht = { static const struct of_device_id ahci_of_match[] = { {.compatible = "brcm,bcm7425-ahci", .data = (void *)BRCM_SATA_BCM7425}, {.compatible = "brcm,bcm7445-ahci", .data = (void *)BRCM_SATA_BCM7445}, + {.compatible = "brcm,bcm63138-ahci", .data = (void *)BRCM_SATA_BCM7445}, {.compatible = "brcm,bcm-nsp-ahci", .data = (void *)BRCM_SATA_NSP}, {}, }; @@ -411,6 +414,11 @@ static int brcm_ahci_probe(struct platform_device *pdev) if (IS_ERR(priv->top_ctrl)) return PTR_ERR(priv->top_ctrl); + /* Reset is optional depending on platform */ + priv->rcdev = devm_reset_control_get(&pdev->dev, "ahci"); + if (!IS_ERR_OR_NULL(priv->rcdev)) + reset_control_deassert(priv->rcdev); + if ((priv->version == BRCM_SATA_BCM7425) || (priv->version == BRCM_SATA_NSP)) { priv->quirks |= BRCM_AHCI_QUIRK_NO_NCQ; diff --git a/drivers/ata/ahci_platform.c b/drivers/ata/ahci_platform.c index 46f0bd75eff7..cf1e0e18a7a9 100644 --- a/drivers/ata/ahci_platform.c +++ b/drivers/ata/ahci_platform.c @@ -33,6 +33,13 @@ static const struct ata_port_info ahci_port_info = { .port_ops = &ahci_platform_ops, }; +static const struct ata_port_info ahci_port_info_nolpm = { + .flags = AHCI_FLAG_COMMON | ATA_FLAG_NO_LPM, + .pio_mask = ATA_PIO4, + .udma_mask = ATA_UDMA6, + .port_ops = &ahci_platform_ops, +}; + static struct scsi_host_template ahci_platform_sht = { AHCI_SHT(DRV_NAME), }; @@ -41,6 +48,7 @@ static int ahci_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct ahci_host_priv *hpriv; + const struct ata_port_info *port; int rc; hpriv = ahci_platform_get_resources(pdev, @@ -58,7 +66,11 @@ static int ahci_probe(struct platform_device *pdev) if (of_device_is_compatible(dev->of_node, "hisilicon,hisi-ahci")) hpriv->flags |= AHCI_HFLAG_NO_FBS | AHCI_HFLAG_NO_NCQ; - rc = ahci_platform_init_host(pdev, hpriv, &ahci_port_info, + port = acpi_device_get_match_data(dev); + if (!port) + port = &ahci_port_info; + + rc = ahci_platform_init_host(pdev, hpriv, port, &ahci_platform_sht); if (rc) goto disable_resources; @@ -85,6 +97,7 @@ static const struct of_device_id ahci_of_match[] = { MODULE_DEVICE_TABLE(of, ahci_of_match); static const struct acpi_device_id ahci_acpi_match[] = { + { "APMC0D33", (unsigned long)&ahci_port_info_nolpm }, { ACPI_DEVICE_CLASS(PCI_CLASS_STORAGE_SATA_AHCI, 0xffffff) }, {}, }; diff --git a/drivers/ata/ahci_sunxi.c b/drivers/ata/ahci_sunxi.c index 631610b72aa5..911710643305 100644 --- a/drivers/ata/ahci_sunxi.c +++ b/drivers/ata/ahci_sunxi.c @@ -181,7 +181,7 @@ static int ahci_sunxi_probe(struct platform_device *pdev) struct ahci_host_priv *hpriv; int rc; - hpriv = ahci_platform_get_resources(pdev, 0); + hpriv = ahci_platform_get_resources(pdev, AHCI_PLATFORM_GET_RESETS); if (IS_ERR(hpriv)) return PTR_ERR(hpriv); @@ -250,6 +250,7 @@ static SIMPLE_DEV_PM_OPS(ahci_sunxi_pm_ops, ahci_platform_suspend, static const struct of_device_id ahci_sunxi_of_match[] = { { .compatible = "allwinner,sun4i-a10-ahci", }, + { .compatible = "allwinner,sun8i-r40-ahci", }, { }, }; MODULE_DEVICE_TABLE(of, ahci_sunxi_of_match); diff --git a/drivers/ata/libahci_platform.c b/drivers/ata/libahci_platform.c index c92c10d55374..4b900fc659f7 100644 --- a/drivers/ata/libahci_platform.c +++ b/drivers/ata/libahci_platform.c @@ -139,7 +139,7 @@ EXPORT_SYMBOL_GPL(ahci_platform_disable_clks); * ahci_platform_enable_regulators - Enable regulators * @hpriv: host private area to store config values * - * This function enables all the regulators found in + * This function enables all the regulators found in controller and * hpriv->target_pwrs, if any. If a regulator fails to be enabled, it * disables all the regulators already enabled in reverse order and * returns an error. @@ -151,6 +151,18 @@ int ahci_platform_enable_regulators(struct ahci_host_priv *hpriv) { int rc, i; + if (hpriv->ahci_regulator) { + rc = regulator_enable(hpriv->ahci_regulator); + if (rc) + return rc; + } + + if (hpriv->phy_regulator) { + rc = regulator_enable(hpriv->phy_regulator); + if (rc) + goto disable_ahci_pwrs; + } + for (i = 0; i < hpriv->nports; i++) { if (!hpriv->target_pwrs[i]) continue; @@ -167,6 +179,11 @@ disable_target_pwrs: if (hpriv->target_pwrs[i]) regulator_disable(hpriv->target_pwrs[i]); + if (hpriv->phy_regulator) + regulator_disable(hpriv->phy_regulator); +disable_ahci_pwrs: + if (hpriv->ahci_regulator) + regulator_disable(hpriv->ahci_regulator); return rc; } EXPORT_SYMBOL_GPL(ahci_platform_enable_regulators); @@ -175,7 +192,8 @@ EXPORT_SYMBOL_GPL(ahci_platform_enable_regulators); * ahci_platform_disable_regulators - Disable regulators * @hpriv: host private area to store config values * - * This function disables all regulators found in hpriv->target_pwrs. + * This function disables all regulators found in hpriv->target_pwrs and + * AHCI controller. */ void ahci_platform_disable_regulators(struct ahci_host_priv *hpriv) { @@ -186,6 +204,11 @@ void ahci_platform_disable_regulators(struct ahci_host_priv *hpriv) continue; regulator_disable(hpriv->target_pwrs[i]); } + + if (hpriv->ahci_regulator) + regulator_disable(hpriv->ahci_regulator); + if (hpriv->phy_regulator) + regulator_disable(hpriv->phy_regulator); } EXPORT_SYMBOL_GPL(ahci_platform_disable_regulators); /** @@ -303,8 +326,8 @@ static int ahci_platform_get_phy(struct ahci_host_priv *hpriv, u32 port, /* No PHY support. Check if PHY is required. */ if (of_find_property(node, "phys", NULL)) { dev_err(dev, - "couldn't get PHY in node %s: ENOSYS\n", - node->name); + "couldn't get PHY in node %pOFn: ENOSYS\n", + node); break; } /* fall through */ @@ -316,8 +339,8 @@ static int ahci_platform_get_phy(struct ahci_host_priv *hpriv, u32 port, default: dev_err(dev, - "couldn't get PHY in node %s: %d\n", - node->name, rc); + "couldn't get PHY in node %pOFn: %d\n", + node, rc); break; } @@ -351,6 +374,7 @@ static int ahci_platform_get_regulator(struct ahci_host_priv *hpriv, u32 port, * * 1) mmio registers (IORESOURCE_MEM 0, mandatory) * 2) regulator for controlling the targets power (optional) + * regulator for controlling the AHCI controller (optional) * 3) 0 - AHCI_MAX_CLKS clocks, as specified in the devs devicetree node, * or for non devicetree enabled platforms a single clock * 4) resets, if flags has AHCI_PLATFORM_GET_RESETS (optional) @@ -408,6 +432,24 @@ struct ahci_host_priv *ahci_platform_get_resources(struct platform_device *pdev, hpriv->clks[i] = clk; } + hpriv->ahci_regulator = devm_regulator_get_optional(dev, "ahci"); + if (IS_ERR(hpriv->ahci_regulator)) { + rc = PTR_ERR(hpriv->ahci_regulator); + if (rc == -EPROBE_DEFER) + goto err_out; + rc = 0; + hpriv->ahci_regulator = NULL; + } + + hpriv->phy_regulator = devm_regulator_get_optional(dev, "phy"); + if (IS_ERR(hpriv->phy_regulator)) { + rc = PTR_ERR(hpriv->phy_regulator); + if (rc == -EPROBE_DEFER) + goto err_out; + rc = 0; + hpriv->phy_regulator = NULL; + } + if (flags & AHCI_PLATFORM_GET_RESETS) { hpriv->rsts = devm_reset_control_array_get_optional_shared(dev); if (IS_ERR(hpriv->rsts)) { diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 1984fc78c750..3d4887d0e84a 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -639,8 +639,8 @@ int ata_cmd_ioctl(struct scsi_device *scsidev, void __user *arg) if (args[0] == ATA_CMD_SMART) { /* hack -- ide driver does this too */ scsi_cmd[6] = args[3]; scsi_cmd[8] = args[1]; - scsi_cmd[10] = 0x4f; - scsi_cmd[12] = 0xc2; + scsi_cmd[10] = ATA_SMART_LBAM_PASS; + scsi_cmd[12] = ATA_SMART_LBAH_PASS; } else { scsi_cmd[6] = args[1]; } diff --git a/drivers/ata/pata_atiixp.c b/drivers/ata/pata_atiixp.c index 4d49fd3c927b..843bb200a1ee 100644 --- a/drivers/ata/pata_atiixp.c +++ b/drivers/ata/pata_atiixp.c @@ -279,7 +279,7 @@ static int atiixp_init_one(struct pci_dev *pdev, const struct pci_device_id *id) const struct ata_port_info *ppi[] = { &info, &info }; /* SB600 doesn't have secondary port wired */ - if((pdev->device == PCI_DEVICE_ID_ATI_IXP600_IDE)) + if (pdev->device == PCI_DEVICE_ID_ATI_IXP600_IDE) ppi[1] = &ata_dummy_port_info; return ata_pci_bmdma_init_one(pdev, ppi, &atiixp_sht, NULL, diff --git a/drivers/ata/pata_ep93xx.c b/drivers/ata/pata_ep93xx.c index 0a550190955a..cc6d06c1b2c7 100644 --- a/drivers/ata/pata_ep93xx.c +++ b/drivers/ata/pata_ep93xx.c @@ -659,7 +659,7 @@ static void ep93xx_pata_dma_init(struct ep93xx_pata_data *drv_data) * start of new transfer. */ drv_data->dma_rx_data.port = EP93XX_DMA_IDE; - drv_data->dma_rx_data.direction = DMA_FROM_DEVICE; + drv_data->dma_rx_data.direction = DMA_DEV_TO_MEM; drv_data->dma_rx_data.name = "ep93xx-pata-rx"; drv_data->dma_rx_channel = dma_request_channel(mask, ep93xx_pata_dma_filter, &drv_data->dma_rx_data); @@ -667,7 +667,7 @@ static void ep93xx_pata_dma_init(struct ep93xx_pata_data *drv_data) return; drv_data->dma_tx_data.port = EP93XX_DMA_IDE; - drv_data->dma_tx_data.direction = DMA_TO_DEVICE; + drv_data->dma_tx_data.direction = DMA_MEM_TO_DEV; drv_data->dma_tx_data.name = "ep93xx-pata-tx"; drv_data->dma_tx_channel = dma_request_channel(mask, ep93xx_pata_dma_filter, &drv_data->dma_tx_data); @@ -678,7 +678,7 @@ static void ep93xx_pata_dma_init(struct ep93xx_pata_data *drv_data) /* Configure receive channel direction and source address */ memset(&conf, 0, sizeof(conf)); - conf.direction = DMA_FROM_DEVICE; + conf.direction = DMA_DEV_TO_MEM; conf.src_addr = drv_data->udma_in_phys; conf.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; if (dmaengine_slave_config(drv_data->dma_rx_channel, &conf)) { @@ -689,7 +689,7 @@ static void ep93xx_pata_dma_init(struct ep93xx_pata_data *drv_data) /* Configure transmit channel direction and destination address */ memset(&conf, 0, sizeof(conf)); - conf.direction = DMA_TO_DEVICE; + conf.direction = DMA_MEM_TO_DEV; conf.dst_addr = drv_data->udma_out_phys; conf.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; if (dmaengine_slave_config(drv_data->dma_tx_channel, &conf)) { diff --git a/drivers/base/dd.c b/drivers/base/dd.c index edfc9f0b1180..169412ee4ae8 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -480,9 +480,11 @@ re_probe: if (ret) goto pinctrl_bind_failed; - ret = dma_configure(dev); - if (ret) - goto dma_failed; + if (dev->bus->dma_configure) { + ret = dev->bus->dma_configure(dev); + if (ret) + goto dma_failed; + } if (driver_sysfs_add(dev)) { printk(KERN_ERR "%s: driver_sysfs_add(%s) failed\n", @@ -537,7 +539,7 @@ re_probe: goto done; probe_failed: - dma_deconfigure(dev); + arch_teardown_dma_ops(dev); dma_failed: if (dev->bus) blocking_notifier_call_chain(&dev->bus->p->bus_notifier, @@ -966,7 +968,7 @@ static void __device_release_driver(struct device *dev, struct device *parent) drv->remove(dev); device_links_driver_cleanup(dev); - dma_deconfigure(dev); + arch_teardown_dma_ops(dev); devres_release_all(dev); dev->driver = NULL; diff --git a/drivers/base/platform.c b/drivers/base/platform.c index dff82a3c2caa..23cf4427f425 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -1180,7 +1180,7 @@ int __init platform_bus_init(void) } #ifndef ARCH_HAS_DMA_GET_REQUIRED_MASK -u64 dma_get_required_mask(struct device *dev) +static u64 dma_default_get_required_mask(struct device *dev) { u32 low_totalram = ((max_pfn - 1) << PAGE_SHIFT); u32 high_totalram = ((max_pfn - 1) >> (32 - PAGE_SHIFT)); @@ -1198,6 +1198,15 @@ u64 dma_get_required_mask(struct device *dev) } return mask; } + +u64 dma_get_required_mask(struct device *dev) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + + if (ops->get_required_mask) + return ops->get_required_mask(dev); + return dma_default_get_required_mask(dev); +} EXPORT_SYMBOL_GPL(dma_get_required_mask); #endif diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c deleted file mode 100644 index 581312ac375f..000000000000 --- a/drivers/block/DAC960.c +++ /dev/null @@ -1,7229 +0,0 @@ -/* - - Linux Driver for Mylex DAC960/AcceleRAID/eXtremeRAID PCI RAID Controllers - - Copyright 1998-2001 by Leonard N. Zubkoff <lnz@dandelion.com> - Portions Copyright 2002 by Mylex (An IBM Business Unit) - - This program is free software; you may redistribute and/or modify it under - the terms of the GNU General Public License Version 2 as published by the - Free Software Foundation. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY, without even the implied warranty of MERCHANTABILITY - or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - for complete details. - -*/ - - -#define DAC960_DriverVersion "2.5.49" -#define DAC960_DriverDate "21 Aug 2007" - - -#include <linux/compiler.h> -#include <linux/module.h> -#include <linux/types.h> -#include <linux/miscdevice.h> -#include <linux/blkdev.h> -#include <linux/bio.h> -#include <linux/completion.h> -#include <linux/delay.h> -#include <linux/genhd.h> -#include <linux/hdreg.h> -#include <linux/blkpg.h> -#include <linux/dma-mapping.h> -#include <linux/interrupt.h> -#include <linux/ioport.h> -#include <linux/mm.h> -#include <linux/slab.h> -#include <linux/mutex.h> -#include <linux/proc_fs.h> -#include <linux/seq_file.h> -#include <linux/reboot.h> -#include <linux/spinlock.h> -#include <linux/timer.h> -#include <linux/pci.h> -#include <linux/init.h> -#include <linux/jiffies.h> -#include <linux/random.h> -#include <linux/scatterlist.h> -#include <asm/io.h> -#include <linux/uaccess.h> -#include "DAC960.h" - -#define DAC960_GAM_MINOR 252 - - -static DEFINE_MUTEX(DAC960_mutex); -static DAC960_Controller_T *DAC960_Controllers[DAC960_MaxControllers]; -static int DAC960_ControllerCount; -static struct proc_dir_entry *DAC960_ProcDirectoryEntry; - -static long disk_size(DAC960_Controller_T *p, int drive_nr) -{ - if (p->FirmwareType == DAC960_V1_Controller) { - if (drive_nr >= p->LogicalDriveCount) - return 0; - return p->V1.LogicalDriveInformation[drive_nr]. - LogicalDriveSize; - } else { - DAC960_V2_LogicalDeviceInfo_T *i = - p->V2.LogicalDeviceInformation[drive_nr]; - if (i == NULL) - return 0; - return i->ConfigurableDeviceSize; - } -} - -static int DAC960_open(struct block_device *bdev, fmode_t mode) -{ - struct gendisk *disk = bdev->bd_disk; - DAC960_Controller_T *p = disk->queue->queuedata; - int drive_nr = (long)disk->private_data; - int ret = -ENXIO; - - mutex_lock(&DAC960_mutex); - if (p->FirmwareType == DAC960_V1_Controller) { - if (p->V1.LogicalDriveInformation[drive_nr]. - LogicalDriveState == DAC960_V1_LogicalDrive_Offline) - goto out; - } else { - DAC960_V2_LogicalDeviceInfo_T *i = - p->V2.LogicalDeviceInformation[drive_nr]; - if (!i || i->LogicalDeviceState == DAC960_V2_LogicalDevice_Offline) - goto out; - } - - check_disk_change(bdev); - - if (!get_capacity(p->disks[drive_nr])) - goto out; - ret = 0; -out: - mutex_unlock(&DAC960_mutex); - return ret; -} - -static int DAC960_getgeo(struct block_device *bdev, struct hd_geometry *geo) -{ - struct gendisk *disk = bdev->bd_disk; - DAC960_Controller_T *p = disk->queue->queuedata; - int drive_nr = (long)disk->private_data; - - if (p->FirmwareType == DAC960_V1_Controller) { - geo->heads = p->V1.GeometryTranslationHeads; - geo->sectors = p->V1.GeometryTranslationSectors; - geo->cylinders = p->V1.LogicalDriveInformation[drive_nr]. - LogicalDriveSize / (geo->heads * geo->sectors); - } else { - DAC960_V2_LogicalDeviceInfo_T *i = - p->V2.LogicalDeviceInformation[drive_nr]; - switch (i->DriveGeometry) { - case DAC960_V2_Geometry_128_32: - geo->heads = 128; - geo->sectors = 32; - break; - case DAC960_V2_Geometry_255_63: - geo->heads = 255; - geo->sectors = 63; - break; - default: - DAC960_Error("Illegal Logical Device Geometry %d\n", - p, i->DriveGeometry); - return -EINVAL; - } - - geo->cylinders = i->ConfigurableDeviceSize / - (geo->heads * geo->sectors); - } - - return 0; -} - -static unsigned int DAC960_check_events(struct gendisk *disk, - unsigned int clearing) -{ - DAC960_Controller_T *p = disk->queue->queuedata; - int drive_nr = (long)disk->private_data; - - if (!p->LogicalDriveInitiallyAccessible[drive_nr]) - return DISK_EVENT_MEDIA_CHANGE; - return 0; -} - -static int DAC960_revalidate_disk(struct gendisk *disk) -{ - DAC960_Controller_T *p = disk->queue->queuedata; - int unit = (long)disk->private_data; - - set_capacity(disk, disk_size(p, unit)); - return 0; -} - -static const struct block_device_operations DAC960_BlockDeviceOperations = { - .owner = THIS_MODULE, - .open = DAC960_open, - .getgeo = DAC960_getgeo, - .check_events = DAC960_check_events, - .revalidate_disk = DAC960_revalidate_disk, -}; - - -/* - DAC960_AnnounceDriver announces the Driver Version and Date, Author's Name, - Copyright Notice, and Electronic Mail Address. -*/ - -static void DAC960_AnnounceDriver(DAC960_Controller_T *Controller) -{ - DAC960_Announce("***** DAC960 RAID Driver Version " - DAC960_DriverVersion " of " - DAC960_DriverDate " *****\n", Controller); - DAC960_Announce("Copyright 1998-2001 by Leonard N. Zubkoff " - "<lnz@dandelion.com>\n", Controller); -} - - -/* - DAC960_Failure prints a standardized error message, and then returns false. -*/ - -static bool DAC960_Failure(DAC960_Controller_T *Controller, - unsigned char *ErrorMessage) -{ - DAC960_Error("While configuring DAC960 PCI RAID Controller at\n", - Controller); - if (Controller->IO_Address == 0) - DAC960_Error("PCI Bus %d Device %d Function %d I/O Address N/A " - "PCI Address 0x%X\n", Controller, - Controller->Bus, Controller->Device, - Controller->Function, Controller->PCI_Address); - else DAC960_Error("PCI Bus %d Device %d Function %d I/O Address " - "0x%X PCI Address 0x%X\n", Controller, - Controller->Bus, Controller->Device, - Controller->Function, Controller->IO_Address, - Controller->PCI_Address); - DAC960_Error("%s FAILED - DETACHING\n", Controller, ErrorMessage); - return false; -} - -/* - init_dma_loaf() and slice_dma_loaf() are helper functions for - aggregating the dma-mapped memory for a well-known collection of - data structures that are of different lengths. - - These routines don't guarantee any alignment. The caller must - include any space needed for alignment in the sizes of the structures - that are passed in. - */ - -static bool init_dma_loaf(struct pci_dev *dev, struct dma_loaf *loaf, - size_t len) -{ - void *cpu_addr; - dma_addr_t dma_handle; - - cpu_addr = pci_alloc_consistent(dev, len, &dma_handle); - if (cpu_addr == NULL) - return false; - - loaf->cpu_free = loaf->cpu_base = cpu_addr; - loaf->dma_free =loaf->dma_base = dma_handle; - loaf->length = len; - memset(cpu_addr, 0, len); - return true; -} - -static void *slice_dma_loaf(struct dma_loaf *loaf, size_t len, - dma_addr_t *dma_handle) -{ - void *cpu_end = loaf->cpu_free + len; - void *cpu_addr = loaf->cpu_free; - - BUG_ON(cpu_end > loaf->cpu_base + loaf->length); - *dma_handle = loaf->dma_free; - loaf->cpu_free = cpu_end; - loaf->dma_free += len; - return cpu_addr; -} - -static void free_dma_loaf(struct pci_dev *dev, struct dma_loaf *loaf_handle) -{ - if (loaf_handle->cpu_base != NULL) - pci_free_consistent(dev, loaf_handle->length, - loaf_handle->cpu_base, loaf_handle->dma_base); -} - - -/* - DAC960_CreateAuxiliaryStructures allocates and initializes the auxiliary - data structures for Controller. It returns true on success and false on - failure. -*/ - -static bool DAC960_CreateAuxiliaryStructures(DAC960_Controller_T *Controller) -{ - int CommandAllocationLength, CommandAllocationGroupSize; - int CommandsRemaining = 0, CommandIdentifier, CommandGroupByteCount; - void *AllocationPointer = NULL; - void *ScatterGatherCPU = NULL; - dma_addr_t ScatterGatherDMA; - struct dma_pool *ScatterGatherPool; - void *RequestSenseCPU = NULL; - dma_addr_t RequestSenseDMA; - struct dma_pool *RequestSensePool = NULL; - - if (Controller->FirmwareType == DAC960_V1_Controller) - { - CommandAllocationLength = offsetof(DAC960_Command_T, V1.EndMarker); - CommandAllocationGroupSize = DAC960_V1_CommandAllocationGroupSize; - ScatterGatherPool = dma_pool_create("DAC960_V1_ScatterGather", - &Controller->PCIDevice->dev, - DAC960_V1_ScatterGatherLimit * sizeof(DAC960_V1_ScatterGatherSegment_T), - sizeof(DAC960_V1_ScatterGatherSegment_T), 0); - if (ScatterGatherPool == NULL) - return DAC960_Failure(Controller, - "AUXILIARY STRUCTURE CREATION (SG)"); - Controller->ScatterGatherPool = ScatterGatherPool; - } - else - { - CommandAllocationLength = offsetof(DAC960_Command_T, V2.EndMarker); - CommandAllocationGroupSize = DAC960_V2_CommandAllocationGroupSize; - ScatterGatherPool = dma_pool_create("DAC960_V2_ScatterGather", - &Controller->PCIDevice->dev, - DAC960_V2_ScatterGatherLimit * sizeof(DAC960_V2_ScatterGatherSegment_T), - sizeof(DAC960_V2_ScatterGatherSegment_T), 0); - if (ScatterGatherPool == NULL) - return DAC960_Failure(Controller, - "AUXILIARY STRUCTURE CREATION (SG)"); - RequestSensePool = dma_pool_create("DAC960_V2_RequestSense", - &Controller->PCIDevice->dev, sizeof(DAC960_SCSI_RequestSense_T), - sizeof(int), 0); - if (RequestSensePool == NULL) { - dma_pool_destroy(ScatterGatherPool); - return DAC960_Failure(Controller, - "AUXILIARY STRUCTURE CREATION (SG)"); - } - Controller->ScatterGatherPool = ScatterGatherPool; - Controller->V2.RequestSensePool = RequestSensePool; - } - Controller->CommandAllocationGroupSize = CommandAllocationGroupSize; - Controller->FreeCommands = NULL; - for (CommandIdentifier = 1; - CommandIdentifier <= Controller->DriverQueueDepth; - CommandIdentifier++) - { - DAC960_Command_T *Command; - if (--CommandsRemaining <= 0) - { - CommandsRemaining = - Controller->DriverQueueDepth - CommandIdentifier + 1; - if (CommandsRemaining > CommandAllocationGroupSize) - CommandsRemaining = CommandAllocationGroupSize; - CommandGroupByteCount = - CommandsRemaining * CommandAllocationLength; - AllocationPointer = kzalloc(CommandGroupByteCount, GFP_ATOMIC); - if (AllocationPointer == NULL) - return DAC960_Failure(Controller, - "AUXILIARY STRUCTURE CREATION"); - } - Command = (DAC960_Command_T *) AllocationPointer; - AllocationPointer += CommandAllocationLength; - Command->CommandIdentifier = CommandIdentifier; - Command->Controller = Controller; - Command->Next = Controller->FreeCommands; - Controller->FreeCommands = Command; - Controller->Commands[CommandIdentifier-1] = Command; - ScatterGatherCPU = dma_pool_alloc(ScatterGatherPool, GFP_ATOMIC, - &ScatterGatherDMA); - if (ScatterGatherCPU == NULL) - return DAC960_Failure(Controller, "AUXILIARY STRUCTURE CREATION"); - - if (RequestSensePool != NULL) { - RequestSenseCPU = dma_pool_alloc(RequestSensePool, GFP_ATOMIC, - &RequestSenseDMA); - if (RequestSenseCPU == NULL) { - dma_pool_free(ScatterGatherPool, ScatterGatherCPU, - ScatterGatherDMA); - return DAC960_Failure(Controller, - "AUXILIARY STRUCTURE CREATION"); - } - } - if (Controller->FirmwareType == DAC960_V1_Controller) { - Command->cmd_sglist = Command->V1.ScatterList; - Command->V1.ScatterGatherList = - (DAC960_V1_ScatterGatherSegment_T *)ScatterGatherCPU; - Command->V1.ScatterGatherListDMA = ScatterGatherDMA; - sg_init_table(Command->cmd_sglist, DAC960_V1_ScatterGatherLimit); - } else { - Command->cmd_sglist = Command->V2.ScatterList; - Command->V2.ScatterGatherList = - (DAC960_V2_ScatterGatherSegment_T *)ScatterGatherCPU; - Command->V2.ScatterGatherListDMA = ScatterGatherDMA; - Command->V2.RequestSense = - (DAC960_SCSI_RequestSense_T *)RequestSenseCPU; - Command->V2.RequestSenseDMA = RequestSenseDMA; - sg_init_table(Command->cmd_sglist, DAC960_V2_ScatterGatherLimit); - } - } - return true; -} - - -/* - DAC960_DestroyAuxiliaryStructures deallocates the auxiliary data - structures for Controller. -*/ - -static void DAC960_DestroyAuxiliaryStructures(DAC960_Controller_T *Controller) -{ - int i; - struct dma_pool *ScatterGatherPool = Controller->ScatterGatherPool; - struct dma_pool *RequestSensePool = NULL; - void *ScatterGatherCPU; - dma_addr_t ScatterGatherDMA; - void *RequestSenseCPU; - dma_addr_t RequestSenseDMA; - DAC960_Command_T *CommandGroup = NULL; - - - if (Controller->FirmwareType == DAC960_V2_Controller) - RequestSensePool = Controller->V2.RequestSensePool; - - Controller->FreeCommands = NULL; - for (i = 0; i < Controller->DriverQueueDepth; i++) - { - DAC960_Command_T *Command = Controller->Commands[i]; - - if (Command == NULL) - continue; - - if (Controller->FirmwareType == DAC960_V1_Controller) { - ScatterGatherCPU = (void *)Command->V1.ScatterGatherList; - ScatterGatherDMA = Command->V1.ScatterGatherListDMA; - RequestSenseCPU = NULL; - RequestSenseDMA = (dma_addr_t)0; - } else { - ScatterGatherCPU = (void *)Command->V2.ScatterGatherList; - ScatterGatherDMA = Command->V2.ScatterGatherListDMA; - RequestSenseCPU = (void *)Command->V2.RequestSense; - RequestSenseDMA = Command->V2.RequestSenseDMA; - } - if (ScatterGatherCPU != NULL) - dma_pool_free(ScatterGatherPool, ScatterGatherCPU, ScatterGatherDMA); - if (RequestSenseCPU != NULL) - dma_pool_free(RequestSensePool, RequestSenseCPU, RequestSenseDMA); - - if ((Command->CommandIdentifier - % Controller->CommandAllocationGroupSize) == 1) { - /* - * We can't free the group of commands until all of the - * request sense and scatter gather dma structures are free. - * Remember the beginning of the group, but don't free it - * until we've reached the beginning of the next group. - */ - kfree(CommandGroup); - CommandGroup = Command; - } - Controller->Commands[i] = NULL; - } - kfree(CommandGroup); - - if (Controller->CombinedStatusBuffer != NULL) - { - kfree(Controller->CombinedStatusBuffer); - Controller->CombinedStatusBuffer = NULL; - Controller->CurrentStatusBuffer = NULL; - } - - dma_pool_destroy(ScatterGatherPool); - if (Controller->FirmwareType == DAC960_V1_Controller) - return; - - dma_pool_destroy(RequestSensePool); - - for (i = 0; i < DAC960_MaxLogicalDrives; i++) { - kfree(Controller->V2.LogicalDeviceInformation[i]); - Controller->V2.LogicalDeviceInformation[i] = NULL; - } - - for (i = 0; i < DAC960_V2_MaxPhysicalDevices; i++) - { - kfree(Controller->V2.PhysicalDeviceInformation[i]); - Controller->V2.PhysicalDeviceInformation[i] = NULL; - kfree(Controller->V2.InquiryUnitSerialNumber[i]); - Controller->V2.InquiryUnitSerialNumber[i] = NULL; - } -} - - -/* - DAC960_V1_ClearCommand clears critical fields of Command for DAC960 V1 - Firmware Controllers. -*/ - -static inline void DAC960_V1_ClearCommand(DAC960_Command_T *Command) -{ - DAC960_V1_CommandMailbox_T *CommandMailbox = &Command->V1.CommandMailbox; - memset(CommandMailbox, 0, sizeof(DAC960_V1_CommandMailbox_T)); - Command->V1.CommandStatus = 0; -} - - -/* - DAC960_V2_ClearCommand clears critical fields of Command for DAC960 V2 - Firmware Controllers. -*/ - -static inline void DAC960_V2_ClearCommand(DAC960_Command_T *Command) -{ - DAC960_V2_CommandMailbox_T *CommandMailbox = &Command->V2.CommandMailbox; - memset(CommandMailbox, 0, sizeof(DAC960_V2_CommandMailbox_T)); - Command->V2.CommandStatus = 0; -} - - -/* - DAC960_AllocateCommand allocates a Command structure from Controller's - free list. During driver initialization, a special initialization command - has been placed on the free list to guarantee that command allocation can - never fail. -*/ - -static inline DAC960_Command_T *DAC960_AllocateCommand(DAC960_Controller_T - *Controller) -{ - DAC960_Command_T *Command = Controller->FreeCommands; - if (Command == NULL) return NULL; - Controller->FreeCommands = Command->Next; - Command->Next = NULL; - return Command; -} - - -/* - DAC960_DeallocateCommand deallocates Command, returning it to Controller's - free list. -*/ - -static inline void DAC960_DeallocateCommand(DAC960_Command_T *Command) -{ - DAC960_Controller_T *Controller = Command->Controller; - - Command->Request = NULL; - Command->Next = Controller->FreeCommands; - Controller->FreeCommands = Command; -} - - -/* - DAC960_WaitForCommand waits for a wake_up on Controller's Command Wait Queue. -*/ - -static void DAC960_WaitForCommand(DAC960_Controller_T *Controller) -{ - spin_unlock_irq(&Controller->queue_lock); - __wait_event(Controller->CommandWaitQueue, Controller->FreeCommands); - spin_lock_irq(&Controller->queue_lock); -} - -/* - DAC960_GEM_QueueCommand queues Command for DAC960 GEM Series Controllers. -*/ - -static void DAC960_GEM_QueueCommand(DAC960_Command_T *Command) -{ - DAC960_Controller_T *Controller = Command->Controller; - void __iomem *ControllerBaseAddress = Controller->BaseAddress; - DAC960_V2_CommandMailbox_T *CommandMailbox = &Command->V2.CommandMailbox; - DAC960_V2_CommandMailbox_T *NextCommandMailbox = - Controller->V2.NextCommandMailbox; - - CommandMailbox->Common.CommandIdentifier = Command->CommandIdentifier; - DAC960_GEM_WriteCommandMailbox(NextCommandMailbox, CommandMailbox); - - if (Controller->V2.PreviousCommandMailbox1->Words[0] == 0 || - Controller->V2.PreviousCommandMailbox2->Words[0] == 0) - DAC960_GEM_MemoryMailboxNewCommand(ControllerBaseAddress); - - Controller->V2.PreviousCommandMailbox2 = - Controller->V2.PreviousCommandMailbox1; - Controller->V2.PreviousCommandMailbox1 = NextCommandMailbox; - - if (++NextCommandMailbox > Controller->V2.LastCommandMailbox) - NextCommandMailbox = Controller->V2.FirstCommandMailbox; - - Controller->V2.NextCommandMailbox = NextCommandMailbox; -} - -/* - DAC960_BA_QueueCommand queues Command for DAC960 BA Series Controllers. -*/ - -static void DAC960_BA_QueueCommand(DAC960_Command_T *Command) -{ - DAC960_Controller_T *Controller = Command->Controller; - void __iomem *ControllerBaseAddress = Controller->BaseAddress; - DAC960_V2_CommandMailbox_T *CommandMailbox = &Command->V2.CommandMailbox; - DAC960_V2_CommandMailbox_T *NextCommandMailbox = - Controller->V2.NextCommandMailbox; - CommandMailbox->Common.CommandIdentifier = Command->CommandIdentifier; - DAC960_BA_WriteCommandMailbox(NextCommandMailbox, CommandMailbox); - if (Controller->V2.PreviousCommandMailbox1->Words[0] == 0 || - Controller->V2.PreviousCommandMailbox2->Words[0] == 0) - DAC960_BA_MemoryMailboxNewCommand(ControllerBaseAddress); - Controller->V2.PreviousCommandMailbox2 = - Controller->V2.PreviousCommandMailbox1; - Controller->V2.PreviousCommandMailbox1 = NextCommandMailbox; - if (++NextCommandMailbox > Controller->V2.LastCommandMailbox) - NextCommandMailbox = Controller->V2.FirstCommandMailbox; - Controller->V2.NextCommandMailbox = NextCommandMailbox; -} - - -/* - DAC960_LP_QueueCommand queues Command for DAC960 LP Series Controllers. -*/ - -static void DAC960_LP_QueueCommand(DAC960_Command_T *Command) -{ - DAC960_Controller_T *Controller = Command->Controller; - void __iomem *ControllerBaseAddress = Controller->BaseAddress; - DAC960_V2_CommandMailbox_T *CommandMailbox = &Command->V2.CommandMailbox; - DAC960_V2_CommandMailbox_T *NextCommandMailbox = - Controller->V2.NextCommandMailbox; - CommandMailbox->Common.CommandIdentifier = Command->CommandIdentifier; - DAC960_LP_WriteCommandMailbox(NextCommandMailbox, CommandMailbox); - if (Controller->V2.PreviousCommandMailbox1->Words[0] == 0 || - Controller->V2.PreviousCommandMailbox2->Words[0] == 0) - DAC960_LP_MemoryMailboxNewCommand(ControllerBaseAddress); - Controller->V2.PreviousCommandMailbox2 = - Controller->V2.PreviousCommandMailbox1; - Controller->V2.PreviousCommandMailbox1 = NextCommandMailbox; - if (++NextCommandMailbox > Controller->V2.LastCommandMailbox) - NextCommandMailbox = Controller->V2.FirstCommandMailbox; - Controller->V2.NextCommandMailbox = NextCommandMailbox; -} - - -/* - DAC960_LA_QueueCommandDualMode queues Command for DAC960 LA Series - Controllers with Dual Mode Firmware. -*/ - -static void DAC960_LA_QueueCommandDualMode(DAC960_Command_T *Command) -{ - DAC960_Controller_T *Controller = Command->Controller; - void __iomem *ControllerBaseAddress = Controller->BaseAddress; - DAC960_V1_CommandMailbox_T *CommandMailbox = &Command->V1.CommandMailbox; - DAC960_V1_CommandMailbox_T *NextCommandMailbox = - Controller->V1.NextCommandMailbox; - CommandMailbox->Common.CommandIdentifier = Command->CommandIdentifier; - DAC960_LA_WriteCommandMailbox(NextCommandMailbox, CommandMailbox); - if (Controller->V1.PreviousCommandMailbox1->Words[0] == 0 || - Controller->V1.PreviousCommandMailbox2->Words[0] == 0) - DAC960_LA_MemoryMailboxNewCommand(ControllerBaseAddress); - Controller->V1.PreviousCommandMailbox2 = - Controller->V1.PreviousCommandMailbox1; - Controller->V1.PreviousCommandMailbox1 = NextCommandMailbox; - if (++NextCommandMailbox > Controller->V1.LastCommandMailbox) - NextCommandMailbox = Controller->V1.FirstCommandMailbox; - Controller->V1.NextCommandMailbox = NextCommandMailbox; -} - - -/* - DAC960_LA_QueueCommandSingleMode queues Command for DAC960 LA Series - Controllers with Single Mode Firmware. -*/ - -static void DAC960_LA_QueueCommandSingleMode(DAC960_Command_T *Command) -{ - DAC960_Controller_T *Controller = Command->Controller; - void __iomem *ControllerBaseAddress = Controller->BaseAddress; - DAC960_V1_CommandMailbox_T *CommandMailbox = &Command->V1.CommandMailbox; - DAC960_V1_CommandMailbox_T *NextCommandMailbox = - Controller->V1.NextCommandMailbox; - CommandMailbox->Common.CommandIdentifier = Command->CommandIdentifier; - DAC960_LA_WriteCommandMailbox(NextCommandMailbox, CommandMailbox); - if (Controller->V1.PreviousCommandMailbox1->Words[0] == 0 || - Controller->V1.PreviousCommandMailbox2->Words[0] == 0) - DAC960_LA_HardwareMailboxNewCommand(ControllerBaseAddress); - Controller->V1.PreviousCommandMailbox2 = - Controller->V1.PreviousCommandMailbox1; - Controller->V1.PreviousCommandMailbox1 = NextCommandMailbox; - if (++NextCommandMailbox > Controller->V1.LastCommandMailbox) - NextCommandMailbox = Controller->V1.FirstCommandMailbox; - Controller->V1.NextCommandMailbox = NextCommandMailbox; -} - - -/* - DAC960_PG_QueueCommandDualMode queues Command for DAC960 PG Series - Controllers with Dual Mode Firmware. -*/ - -static void DAC960_PG_QueueCommandDualMode(DAC960_Command_T *Command) -{ - DAC960_Controller_T *Controller = Command->Controller; - void __iomem *ControllerBaseAddress = Controller->BaseAddress; - DAC960_V1_CommandMailbox_T *CommandMailbox = &Command->V1.CommandMailbox; - DAC960_V1_CommandMailbox_T *NextCommandMailbox = - Controller->V1.NextCommandMailbox; - CommandMailbox->Common.CommandIdentifier = Command->CommandIdentifier; - DAC960_PG_WriteCommandMailbox(NextCommandMailbox, CommandMailbox); - if (Controller->V1.PreviousCommandMailbox1->Words[0] == 0 || - Controller->V1.PreviousCommandMailbox2->Words[0] == 0) - DAC960_PG_MemoryMailboxNewCommand(ControllerBaseAddress); - Controller->V1.PreviousCommandMailbox2 = - Controller->V1.PreviousCommandMailbox1; - Controller->V1.PreviousCommandMailbox1 = NextCommandMailbox; - if (++NextCommandMailbox > Controller->V1.LastCommandMailbox) - NextCommandMailbox = Controller->V1.FirstCommandMailbox; - Controller->V1.NextCommandMailbox = NextCommandMailbox; -} - - -/* - DAC960_PG_QueueCommandSingleMode queues Command for DAC960 PG Series - Controllers with Single Mode Firmware. -*/ - -static void DAC960_PG_QueueCommandSingleMode(DAC960_Command_T *Command) -{ - DAC960_Controller_T *Controller = Command->Controller; - void __iomem *ControllerBaseAddress = Controller->BaseAddress; - DAC960_V1_CommandMailbox_T *CommandMailbox = &Command->V1.CommandMailbox; - DAC960_V1_CommandMailbox_T *NextCommandMailbox = - Controller->V1.NextCommandMailbox; - CommandMailbox->Common.CommandIdentifier = Command->CommandIdentifier; - DAC960_PG_WriteCommandMailbox(NextCommandMailbox, CommandMailbox); - if (Controller->V1.PreviousCommandMailbox1->Words[0] == 0 || - Controller->V1.PreviousCommandMailbox2->Words[0] == 0) - DAC960_PG_HardwareMailboxNewCommand(ControllerBaseAddress); - Controller->V1.PreviousCommandMailbox2 = - Controller->V1.PreviousCommandMailbox1; - Controller->V1.PreviousCommandMailbox1 = NextCommandMailbox; - if (++NextCommandMailbox > Controller->V1.LastCommandMailbox) - NextCommandMailbox = Controller->V1.FirstCommandMailbox; - Controller->V1.NextCommandMailbox = NextCommandMailbox; -} - - -/* - DAC960_PD_QueueCommand queues Command for DAC960 PD Series Controllers. -*/ - -static void DAC960_PD_QueueCommand(DAC960_Command_T *Command) -{ - DAC960_Controller_T *Controller = Command->Controller; - void __iomem *ControllerBaseAddress = Controller->BaseAddress; - DAC960_V1_CommandMailbox_T *CommandMailbox = &Command->V1.CommandMailbox; - CommandMailbox->Common.CommandIdentifier = Command->CommandIdentifier; - while (DAC960_PD_MailboxFullP(ControllerBaseAddress)) - udelay(1); - DAC960_PD_WriteCommandMailbox(ControllerBaseAddress, CommandMailbox); - DAC960_PD_NewCommand(ControllerBaseAddress); -} - - -/* - DAC960_P_QueueCommand queues Command for DAC960 P Series Controllers. -*/ - -static void DAC960_P_QueueCommand(DAC960_Command_T *Command) -{ - DAC960_Controller_T *Controller = Command->Controller; - void __iomem *ControllerBaseAddress = Controller->BaseAddress; - DAC960_V1_CommandMailbox_T *CommandMailbox = &Command->V1.CommandMailbox; - CommandMailbox->Common.CommandIdentifier = Command->CommandIdentifier; - switch (CommandMailbox->Common.CommandOpcode) - { - case DAC960_V1_Enquiry: - CommandMailbox->Common.CommandOpcode = DAC960_V1_Enquiry_Old; - break; - case DAC960_V1_GetDeviceState: - CommandMailbox->Common.CommandOpcode = DAC960_V1_GetDeviceState_Old; - break; - case DAC960_V1_Read: - CommandMailbox->Common.CommandOpcode = DAC960_V1_Read_Old; - DAC960_PD_To_P_TranslateReadWriteCommand(CommandMailbox); - break; - case DAC960_V1_Write: - CommandMailbox->Common.CommandOpcode = DAC960_V1_Write_Old; - DAC960_PD_To_P_TranslateReadWriteCommand(CommandMailbox); - break; - case DAC960_V1_ReadWithScatterGather: - CommandMailbox->Common.CommandOpcode = - DAC960_V1_ReadWithScatterGather_Old; - DAC960_PD_To_P_TranslateReadWriteCommand(CommandMailbox); - break; - case DAC960_V1_WriteWithScatterGather: - CommandMailbox->Common.CommandOpcode = - DAC960_V1_WriteWithScatterGather_Old; - DAC960_PD_To_P_TranslateReadWriteCommand(CommandMailbox); - break; - default: - break; - } - while (DAC960_PD_MailboxFullP(ControllerBaseAddress)) - udelay(1); - DAC960_PD_WriteCommandMailbox(ControllerBaseAddress, CommandMailbox); - DAC960_PD_NewCommand(ControllerBaseAddress); -} - - -/* - DAC960_ExecuteCommand executes Command and waits for completion. -*/ - -static void DAC960_ExecuteCommand(DAC960_Command_T *Command) -{ - DAC960_Controller_T *Controller = Command->Controller; - DECLARE_COMPLETION_ONSTACK(Completion); - unsigned long flags; - Command->Completion = &Completion; - - spin_lock_irqsave(&Controller->queue_lock, flags); - DAC960_QueueCommand(Command); - spin_unlock_irqrestore(&Controller->queue_lock, flags); - - if (in_interrupt()) - return; - wait_for_completion(&Completion); -} - - -/* - DAC960_V1_ExecuteType3 executes a DAC960 V1 Firmware Controller Type 3 - Command and waits for completion. It returns true on success and false - on failure. -*/ - -static bool DAC960_V1_ExecuteType3(DAC960_Controller_T *Controller, - DAC960_V1_CommandOpcode_T CommandOpcode, - dma_addr_t DataDMA) -{ - DAC960_Command_T *Command = DAC960_AllocateCommand(Controller); - DAC960_V1_CommandMailbox_T *CommandMailbox = &Command->V1.CommandMailbox; - DAC960_V1_CommandStatus_T CommandStatus; - DAC960_V1_ClearCommand(Command); - Command->CommandType = DAC960_ImmediateCommand; - CommandMailbox->Type3.CommandOpcode = CommandOpcode; - CommandMailbox->Type3.BusAddress = DataDMA; - DAC960_ExecuteCommand(Command); - CommandStatus = Command->V1.CommandStatus; - DAC960_DeallocateCommand(Command); - return (CommandStatus == DAC960_V1_NormalCompletion); -} - - -/* - DAC960_V1_ExecuteTypeB executes a DAC960 V1 Firmware Controller Type 3B - Command and waits for completion. It returns true on success and false - on failure. -*/ - -static bool DAC960_V1_ExecuteType3B(DAC960_Controller_T *Controller, - DAC960_V1_CommandOpcode_T CommandOpcode, - unsigned char CommandOpcode2, - dma_addr_t DataDMA) -{ - DAC960_Command_T *Command = DAC960_AllocateCommand(Controller); - DAC960_V1_CommandMailbox_T *CommandMailbox = &Command->V1.CommandMailbox; - DAC960_V1_CommandStatus_T CommandStatus; - DAC960_V1_ClearCommand(Command); - Command->CommandType = DAC960_ImmediateCommand; - CommandMailbox->Type3B.CommandOpcode = CommandOpcode; - CommandMailbox->Type3B.CommandOpcode2 = CommandOpcode2; - CommandMailbox->Type3B.BusAddress = DataDMA; - DAC960_ExecuteCommand(Command); - CommandStatus = Command->V1.CommandStatus; - DAC960_DeallocateCommand(Command); - return (CommandStatus == DAC960_V1_NormalCompletion); -} - - -/* - DAC960_V1_ExecuteType3D executes a DAC960 V1 Firmware Controller Type 3D - Command and waits for completion. It returns true on success and false - on failure. -*/ - -static bool DAC960_V1_ExecuteType3D(DAC960_Controller_T *Controller, - DAC960_V1_CommandOpcode_T CommandOpcode, - unsigned char Channel, - unsigned char TargetID, - dma_addr_t DataDMA) -{ - DAC960_Command_T *Command = DAC960_AllocateCommand(Controller); - DAC960_V1_CommandMailbox_T *CommandMailbox = &Command->V1.CommandMailbox; - DAC960_V1_CommandStatus_T CommandStatus; - DAC960_V1_ClearCommand(Command); - Command->CommandType = DAC960_ImmediateCommand; - CommandMailbox->Type3D.CommandOpcode = CommandOpcode; - CommandMailbox->Type3D.Channel = Channel; - CommandMailbox->Type3D.TargetID = TargetID; - CommandMailbox->Type3D.BusAddress = DataDMA; - DAC960_ExecuteCommand(Command); - CommandStatus = Command->V1.CommandStatus; - DAC960_DeallocateCommand(Command); - return (CommandStatus == DAC960_V1_NormalCompletion); -} - - -/* - DAC960_V2_GeneralInfo executes a DAC960 V2 Firmware General Information - Reading IOCTL Command and waits for completion. It returns true on success - and false on failure. - - Return data in The controller's HealthStatusBuffer, which is dma-able memory -*/ - -static bool DAC960_V2_GeneralInfo(DAC960_Controller_T *Controller) -{ - DAC960_Command_T *Command = DAC960_AllocateCommand(Controller); - DAC960_V2_CommandMailbox_T *CommandMailbox = &Command->V2.CommandMailbox; - DAC960_V2_CommandStatus_T CommandStatus; - DAC960_V2_ClearCommand(Command); - Command->CommandType = DAC960_ImmediateCommand; - CommandMailbox->Common.CommandOpcode = DAC960_V2_IOCTL; - CommandMailbox->Common.CommandControlBits - .DataTransferControllerToHost = true; - CommandMailbox->Common.CommandControlBits - .NoAutoRequestSense = true; - CommandMailbox->Common.DataTransferSize = sizeof(DAC960_V2_HealthStatusBuffer_T); - CommandMailbox->Common.IOCTL_Opcode = DAC960_V2_GetHealthStatus; - CommandMailbox->Common.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentDataPointer = - Controller->V2.HealthStatusBufferDMA; - CommandMailbox->Common.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentByteCount = - CommandMailbox->Common.DataTransferSize; - DAC960_ExecuteCommand(Command); - CommandStatus = Command->V2.CommandStatus; - DAC960_DeallocateCommand(Command); - return (CommandStatus == DAC960_V2_NormalCompletion); -} - - -/* - DAC960_V2_ControllerInfo executes a DAC960 V2 Firmware Controller - Information Reading IOCTL Command and waits for completion. It returns - true on success and false on failure. - - Data is returned in the controller's V2.NewControllerInformation dma-able - memory buffer. -*/ - -static bool DAC960_V2_NewControllerInfo(DAC960_Controller_T *Controller) -{ - DAC960_Command_T *Command = DAC960_AllocateCommand(Controller); - DAC960_V2_CommandMailbox_T *CommandMailbox = &Command->V2.CommandMailbox; - DAC960_V2_CommandStatus_T CommandStatus; - DAC960_V2_ClearCommand(Command); - Command->CommandType = DAC960_ImmediateCommand; - CommandMailbox->ControllerInfo.CommandOpcode = DAC960_V2_IOCTL; - CommandMailbox->ControllerInfo.CommandControlBits - .DataTransferControllerToHost = true; - CommandMailbox->ControllerInfo.CommandControlBits - .NoAutoRequestSense = true; - CommandMailbox->ControllerInfo.DataTransferSize = sizeof(DAC960_V2_ControllerInfo_T); - CommandMailbox->ControllerInfo.ControllerNumber = 0; - CommandMailbox->ControllerInfo.IOCTL_Opcode = DAC960_V2_GetControllerInfo; - CommandMailbox->ControllerInfo.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentDataPointer = - Controller->V2.NewControllerInformationDMA; - CommandMailbox->ControllerInfo.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentByteCount = - CommandMailbox->ControllerInfo.DataTransferSize; - DAC960_ExecuteCommand(Command); - CommandStatus = Command->V2.CommandStatus; - DAC960_DeallocateCommand(Command); - return (CommandStatus == DAC960_V2_NormalCompletion); -} - - -/* - DAC960_V2_LogicalDeviceInfo executes a DAC960 V2 Firmware Controller Logical - Device Information Reading IOCTL Command and waits for completion. It - returns true on success and false on failure. - - Data is returned in the controller's V2.NewLogicalDeviceInformation -*/ - -static bool DAC960_V2_NewLogicalDeviceInfo(DAC960_Controller_T *Controller, - unsigned short LogicalDeviceNumber) -{ - DAC960_Command_T *Command = DAC960_AllocateCommand(Controller); - DAC960_V2_CommandMailbox_T *CommandMailbox = &Command->V2.CommandMailbox; - DAC960_V2_CommandStatus_T CommandStatus; - - DAC960_V2_ClearCommand(Command); - Command->CommandType = DAC960_ImmediateCommand; - CommandMailbox->LogicalDeviceInfo.CommandOpcode = - DAC960_V2_IOCTL; - CommandMailbox->LogicalDeviceInfo.CommandControlBits - .DataTransferControllerToHost = true; - CommandMailbox->LogicalDeviceInfo.CommandControlBits - .NoAutoRequestSense = true; - CommandMailbox->LogicalDeviceInfo.DataTransferSize = - sizeof(DAC960_V2_LogicalDeviceInfo_T); - CommandMailbox->LogicalDeviceInfo.LogicalDevice.LogicalDeviceNumber = - LogicalDeviceNumber; - CommandMailbox->LogicalDeviceInfo.IOCTL_Opcode = DAC960_V2_GetLogicalDeviceInfoValid; - CommandMailbox->LogicalDeviceInfo.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentDataPointer = - Controller->V2.NewLogicalDeviceInformationDMA; - CommandMailbox->LogicalDeviceInfo.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentByteCount = - CommandMailbox->LogicalDeviceInfo.DataTransferSize; - DAC960_ExecuteCommand(Command); - CommandStatus = Command->V2.CommandStatus; - DAC960_DeallocateCommand(Command); - return (CommandStatus == DAC960_V2_NormalCompletion); -} - - -/* - DAC960_V2_PhysicalDeviceInfo executes a DAC960 V2 Firmware Controller "Read - Physical Device Information" IOCTL Command and waits for completion. It - returns true on success and false on failure. - - The Channel, TargetID, LogicalUnit arguments should be 0 the first time - this function is called for a given controller. This will return data - for the "first" device on that controller. The returned data includes a - Channel, TargetID, LogicalUnit that can be passed in to this routine to - get data for the NEXT device on that controller. - - Data is stored in the controller's V2.NewPhysicalDeviceInfo dma-able - memory buffer. - -*/ - -static bool DAC960_V2_NewPhysicalDeviceInfo(DAC960_Controller_T *Controller, - unsigned char Channel, - unsigned char TargetID, - unsigned char LogicalUnit) -{ - DAC960_Command_T *Command = DAC960_AllocateCommand(Controller); - DAC960_V2_CommandMailbox_T *CommandMailbox = &Command->V2.CommandMailbox; - DAC960_V2_CommandStatus_T CommandStatus; - - DAC960_V2_ClearCommand(Command); - Command->CommandType = DAC960_ImmediateCommand; - CommandMailbox->PhysicalDeviceInfo.CommandOpcode = DAC960_V2_IOCTL; - CommandMailbox->PhysicalDeviceInfo.CommandControlBits - .DataTransferControllerToHost = true; - CommandMailbox->PhysicalDeviceInfo.CommandControlBits - .NoAutoRequestSense = true; - CommandMailbox->PhysicalDeviceInfo.DataTransferSize = - sizeof(DAC960_V2_PhysicalDeviceInfo_T); - CommandMailbox->PhysicalDeviceInfo.PhysicalDevice.LogicalUnit = LogicalUnit; - CommandMailbox->PhysicalDeviceInfo.PhysicalDevice.TargetID = TargetID; - CommandMailbox->PhysicalDeviceInfo.PhysicalDevice.Channel = Channel; - CommandMailbox->PhysicalDeviceInfo.IOCTL_Opcode = - DAC960_V2_GetPhysicalDeviceInfoValid; - CommandMailbox->PhysicalDeviceInfo.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentDataPointer = - Controller->V2.NewPhysicalDeviceInformationDMA; - CommandMailbox->PhysicalDeviceInfo.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentByteCount = - CommandMailbox->PhysicalDeviceInfo.DataTransferSize; - DAC960_ExecuteCommand(Command); - CommandStatus = Command->V2.CommandStatus; - DAC960_DeallocateCommand(Command); - return (CommandStatus == DAC960_V2_NormalCompletion); -} - - -static void DAC960_V2_ConstructNewUnitSerialNumber( - DAC960_Controller_T *Controller, - DAC960_V2_CommandMailbox_T *CommandMailbox, int Channel, int TargetID, - int LogicalUnit) -{ - CommandMailbox->SCSI_10.CommandOpcode = DAC960_V2_SCSI_10_Passthru; - CommandMailbox->SCSI_10.CommandControlBits - .DataTransferControllerToHost = true; - CommandMailbox->SCSI_10.CommandControlBits - .NoAutoRequestSense = true; - CommandMailbox->SCSI_10.DataTransferSize = - sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T); - CommandMailbox->SCSI_10.PhysicalDevice.LogicalUnit = LogicalUnit; - CommandMailbox->SCSI_10.PhysicalDevice.TargetID = TargetID; - CommandMailbox->SCSI_10.PhysicalDevice.Channel = Channel; - CommandMailbox->SCSI_10.CDBLength = 6; - CommandMailbox->SCSI_10.SCSI_CDB[0] = 0x12; /* INQUIRY */ - CommandMailbox->SCSI_10.SCSI_CDB[1] = 1; /* EVPD = 1 */ - CommandMailbox->SCSI_10.SCSI_CDB[2] = 0x80; /* Page Code */ - CommandMailbox->SCSI_10.SCSI_CDB[3] = 0; /* Reserved */ - CommandMailbox->SCSI_10.SCSI_CDB[4] = - sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T); - CommandMailbox->SCSI_10.SCSI_CDB[5] = 0; /* Control */ - CommandMailbox->SCSI_10.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentDataPointer = - Controller->V2.NewInquiryUnitSerialNumberDMA; - CommandMailbox->SCSI_10.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentByteCount = - CommandMailbox->SCSI_10.DataTransferSize; -} - - -/* - DAC960_V2_NewUnitSerialNumber executes an SCSI pass-through - Inquiry command to a SCSI device identified by Channel number, - Target id, Logical Unit Number. This function Waits for completion - of the command. - - The return data includes Unit Serial Number information for the - specified device. - - Data is stored in the controller's V2.NewPhysicalDeviceInfo dma-able - memory buffer. -*/ - -static bool DAC960_V2_NewInquiryUnitSerialNumber(DAC960_Controller_T *Controller, - int Channel, int TargetID, int LogicalUnit) -{ - DAC960_Command_T *Command; - DAC960_V2_CommandMailbox_T *CommandMailbox; - DAC960_V2_CommandStatus_T CommandStatus; - - Command = DAC960_AllocateCommand(Controller); - CommandMailbox = &Command->V2.CommandMailbox; - DAC960_V2_ClearCommand(Command); - Command->CommandType = DAC960_ImmediateCommand; - - DAC960_V2_ConstructNewUnitSerialNumber(Controller, CommandMailbox, - Channel, TargetID, LogicalUnit); - - DAC960_ExecuteCommand(Command); - CommandStatus = Command->V2.CommandStatus; - DAC960_DeallocateCommand(Command); - return (CommandStatus == DAC960_V2_NormalCompletion); -} - - -/* - DAC960_V2_DeviceOperation executes a DAC960 V2 Firmware Controller Device - Operation IOCTL Command and waits for completion. It returns true on - success and false on failure. -*/ - -static bool DAC960_V2_DeviceOperation(DAC960_Controller_T *Controller, - DAC960_V2_IOCTL_Opcode_T IOCTL_Opcode, - DAC960_V2_OperationDevice_T - OperationDevice) -{ - DAC960_Command_T *Command = DAC960_AllocateCommand(Controller); - DAC960_V2_CommandMailbox_T *CommandMailbox = &Command->V2.CommandMailbox; - DAC960_V2_CommandStatus_T CommandStatus; - DAC960_V2_ClearCommand(Command); - Command->CommandType = DAC960_ImmediateCommand; - CommandMailbox->DeviceOperation.CommandOpcode = DAC960_V2_IOCTL; - CommandMailbox->DeviceOperation.CommandControlBits - .DataTransferControllerToHost = true; - CommandMailbox->DeviceOperation.CommandControlBits - .NoAutoRequestSense = true; - CommandMailbox->DeviceOperation.IOCTL_Opcode = IOCTL_Opcode; - CommandMailbox->DeviceOperation.OperationDevice = OperationDevice; - DAC960_ExecuteCommand(Command); - CommandStatus = Command->V2.CommandStatus; - DAC960_DeallocateCommand(Command); - return (CommandStatus == DAC960_V2_NormalCompletion); -} - - -/* - DAC960_V1_EnableMemoryMailboxInterface enables the Memory Mailbox Interface - for DAC960 V1 Firmware Controllers. - - PD and P controller types have no memory mailbox, but still need the - other dma mapped memory. -*/ - -static bool DAC960_V1_EnableMemoryMailboxInterface(DAC960_Controller_T - *Controller) -{ - void __iomem *ControllerBaseAddress = Controller->BaseAddress; - DAC960_HardwareType_T hw_type = Controller->HardwareType; - struct pci_dev *PCI_Device = Controller->PCIDevice; - struct dma_loaf *DmaPages = &Controller->DmaPages; - size_t DmaPagesSize; - size_t CommandMailboxesSize; - size_t StatusMailboxesSize; - - DAC960_V1_CommandMailbox_T *CommandMailboxesMemory; - dma_addr_t CommandMailboxesMemoryDMA; - - DAC960_V1_StatusMailbox_T *StatusMailboxesMemory; - dma_addr_t StatusMailboxesMemoryDMA; - - DAC960_V1_CommandMailbox_T CommandMailbox; - DAC960_V1_CommandStatus_T CommandStatus; - int TimeoutCounter; - int i; - - memset(&CommandMailbox, 0, sizeof(DAC960_V1_CommandMailbox_T)); - - if (pci_set_dma_mask(Controller->PCIDevice, DMA_BIT_MASK(32))) - return DAC960_Failure(Controller, "DMA mask out of range"); - - if ((hw_type == DAC960_PD_Controller) || (hw_type == DAC960_P_Controller)) { - CommandMailboxesSize = 0; - StatusMailboxesSize = 0; - } else { - CommandMailboxesSize = DAC960_V1_CommandMailboxCount * sizeof(DAC960_V1_CommandMailbox_T); - StatusMailboxesSize = DAC960_V1_StatusMailboxCount * sizeof(DAC960_V1_StatusMailbox_T); - } - DmaPagesSize = CommandMailboxesSize + StatusMailboxesSize + - sizeof(DAC960_V1_DCDB_T) + sizeof(DAC960_V1_Enquiry_T) + - sizeof(DAC960_V1_ErrorTable_T) + sizeof(DAC960_V1_EventLogEntry_T) + - sizeof(DAC960_V1_RebuildProgress_T) + - sizeof(DAC960_V1_LogicalDriveInformationArray_T) + - sizeof(DAC960_V1_BackgroundInitializationStatus_T) + - sizeof(DAC960_V1_DeviceState_T) + sizeof(DAC960_SCSI_Inquiry_T) + - sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T); - - if (!init_dma_loaf(PCI_Device, DmaPages, DmaPagesSize)) - return false; - - - if ((hw_type == DAC960_PD_Controller) || (hw_type == DAC960_P_Controller)) - goto skip_mailboxes; - - CommandMailboxesMemory = slice_dma_loaf(DmaPages, - CommandMailboxesSize, &CommandMailboxesMemoryDMA); - - /* These are the base addresses for the command memory mailbox array */ - Controller->V1.FirstCommandMailbox = CommandMailboxesMemory; - Controller->V1.FirstCommandMailboxDMA = CommandMailboxesMemoryDMA; - - CommandMailboxesMemory += DAC960_V1_CommandMailboxCount - 1; - Controller->V1.LastCommandMailbox = CommandMailboxesMemory; - Controller->V1.NextCommandMailbox = Controller->V1.FirstCommandMailbox; - Controller->V1.PreviousCommandMailbox1 = Controller->V1.LastCommandMailbox; - Controller->V1.PreviousCommandMailbox2 = - Controller->V1.LastCommandMailbox - 1; - - /* These are the base addresses for the status memory mailbox array */ - StatusMailboxesMemory = slice_dma_loaf(DmaPages, - StatusMailboxesSize, &StatusMailboxesMemoryDMA); - - Controller->V1.FirstStatusMailbox = StatusMailboxesMemory; - Controller->V1.FirstStatusMailboxDMA = StatusMailboxesMemoryDMA; - StatusMailboxesMemory += DAC960_V1_StatusMailboxCount - 1; - Controller->V1.LastStatusMailbox = StatusMailboxesMemory; - Controller->V1.NextStatusMailbox = Controller->V1.FirstStatusMailbox; - -skip_mailboxes: - Controller->V1.MonitoringDCDB = slice_dma_loaf(DmaPages, - sizeof(DAC960_V1_DCDB_T), - &Controller->V1.MonitoringDCDB_DMA); - - Controller->V1.NewEnquiry = slice_dma_loaf(DmaPages, - sizeof(DAC960_V1_Enquiry_T), - &Controller->V1.NewEnquiryDMA); - - Controller->V1.NewErrorTable = slice_dma_loaf(DmaPages, - sizeof(DAC960_V1_ErrorTable_T), - &Controller->V1.NewErrorTableDMA); - - Controller->V1.EventLogEntry = slice_dma_loaf(DmaPages, - sizeof(DAC960_V1_EventLogEntry_T), - &Controller->V1.EventLogEntryDMA); - - Controller->V1.RebuildProgress = slice_dma_loaf(DmaPages, - sizeof(DAC960_V1_RebuildProgress_T), - &Controller->V1.RebuildProgressDMA); - - Controller->V1.NewLogicalDriveInformation = slice_dma_loaf(DmaPages, - sizeof(DAC960_V1_LogicalDriveInformationArray_T), - &Controller->V1.NewLogicalDriveInformationDMA); - - Controller->V1.BackgroundInitializationStatus = slice_dma_loaf(DmaPages, - sizeof(DAC960_V1_BackgroundInitializationStatus_T), - &Controller->V1.BackgroundInitializationStatusDMA); - - Controller->V1.NewDeviceState = slice_dma_loaf(DmaPages, - sizeof(DAC960_V1_DeviceState_T), - &Controller->V1.NewDeviceStateDMA); - - Controller->V1.NewInquiryStandardData = slice_dma_loaf(DmaPages, - sizeof(DAC960_SCSI_Inquiry_T), - &Controller->V1.NewInquiryStandardDataDMA); - - Controller->V1.NewInquiryUnitSerialNumber = slice_dma_loaf(DmaPages, - sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T), - &Controller->V1.NewInquiryUnitSerialNumberDMA); - - if ((hw_type == DAC960_PD_Controller) || (hw_type == DAC960_P_Controller)) - return true; - - /* Enable the Memory Mailbox Interface. */ - Controller->V1.DualModeMemoryMailboxInterface = true; - CommandMailbox.TypeX.CommandOpcode = 0x2B; - CommandMailbox.TypeX.CommandIdentifier = 0; - CommandMailbox.TypeX.CommandOpcode2 = 0x14; - CommandMailbox.TypeX.CommandMailboxesBusAddress = - Controller->V1.FirstCommandMailboxDMA; - CommandMailbox.TypeX.StatusMailboxesBusAddress = - Controller->V1.FirstStatusMailboxDMA; -#define TIMEOUT_COUNT 1000000 - - for (i = 0; i < 2; i++) - switch (Controller->HardwareType) - { - case DAC960_LA_Controller: - TimeoutCounter = TIMEOUT_COUNT; - while (--TimeoutCounter >= 0) - { - if (!DAC960_LA_HardwareMailboxFullP(ControllerBaseAddress)) - break; - udelay(10); - } - if (TimeoutCounter < 0) return false; - DAC960_LA_WriteHardwareMailbox(ControllerBaseAddress, &CommandMailbox); - DAC960_LA_HardwareMailboxNewCommand(ControllerBaseAddress); - TimeoutCounter = TIMEOUT_COUNT; - while (--TimeoutCounter >= 0) - { - if (DAC960_LA_HardwareMailboxStatusAvailableP( - ControllerBaseAddress)) - break; - udelay(10); - } - if (TimeoutCounter < 0) return false; - CommandStatus = DAC960_LA_ReadStatusRegister(ControllerBaseAddress); - DAC960_LA_AcknowledgeHardwareMailboxInterrupt(ControllerBaseAddress); - DAC960_LA_AcknowledgeHardwareMailboxStatus(ControllerBaseAddress); - if (CommandStatus == DAC960_V1_NormalCompletion) return true; - Controller->V1.DualModeMemoryMailboxInterface = false; - CommandMailbox.TypeX.CommandOpcode2 = 0x10; - break; - case DAC960_PG_Controller: - TimeoutCounter = TIMEOUT_COUNT; - while (--TimeoutCounter >= 0) - { - if (!DAC960_PG_HardwareMailboxFullP(ControllerBaseAddress)) - break; - udelay(10); - } - if (TimeoutCounter < 0) return false; - DAC960_PG_WriteHardwareMailbox(ControllerBaseAddress, &CommandMailbox); - DAC960_PG_HardwareMailboxNewCommand(ControllerBaseAddress); - - TimeoutCounter = TIMEOUT_COUNT; - while (--TimeoutCounter >= 0) - { - if (DAC960_PG_HardwareMailboxStatusAvailableP( - ControllerBaseAddress)) - break; - udelay(10); - } - if (TimeoutCounter < 0) return false; - CommandStatus = DAC960_PG_ReadStatusRegister(ControllerBaseAddress); - DAC960_PG_AcknowledgeHardwareMailboxInterrupt(ControllerBaseAddress); - DAC960_PG_AcknowledgeHardwareMailboxStatus(ControllerBaseAddress); - if (CommandStatus == DAC960_V1_NormalCompletion) return true; - Controller->V1.DualModeMemoryMailboxInterface = false; - CommandMailbox.TypeX.CommandOpcode2 = 0x10; - break; - default: - DAC960_Failure(Controller, "Unknown Controller Type\n"); - break; - } - return false; -} - - -/* - DAC960_V2_EnableMemoryMailboxInterface enables the Memory Mailbox Interface - for DAC960 V2 Firmware Controllers. - - Aggregate the space needed for the controller's memory mailbox and - the other data structures that will be targets of dma transfers with - the controller. Allocate a dma-mapped region of memory to hold these - structures. Then, save CPU pointers and dma_addr_t values to reference - the structures that are contained in that region. -*/ - -static bool DAC960_V2_EnableMemoryMailboxInterface(DAC960_Controller_T - *Controller) -{ - void __iomem *ControllerBaseAddress = Controller->BaseAddress; - struct pci_dev *PCI_Device = Controller->PCIDevice; - struct dma_loaf *DmaPages = &Controller->DmaPages; - size_t DmaPagesSize; - size_t CommandMailboxesSize; - size_t StatusMailboxesSize; - - DAC960_V2_CommandMailbox_T *CommandMailboxesMemory; - dma_addr_t CommandMailboxesMemoryDMA; - - DAC960_V2_StatusMailbox_T *StatusMailboxesMemory; - dma_addr_t StatusMailboxesMemoryDMA; - - DAC960_V2_CommandMailbox_T *CommandMailbox; - dma_addr_t CommandMailboxDMA; - DAC960_V2_CommandStatus_T CommandStatus; - - if (pci_set_dma_mask(Controller->PCIDevice, DMA_BIT_MASK(64)) && - pci_set_dma_mask(Controller->PCIDevice, DMA_BIT_MASK(32))) - return DAC960_Failure(Controller, "DMA mask out of range"); - - /* This is a temporary dma mapping, used only in the scope of this function */ - CommandMailbox = pci_alloc_consistent(PCI_Device, - sizeof(DAC960_V2_CommandMailbox_T), &CommandMailboxDMA); - if (CommandMailbox == NULL) - return false; - - CommandMailboxesSize = DAC960_V2_CommandMailboxCount * sizeof(DAC960_V2_CommandMailbox_T); - StatusMailboxesSize = DAC960_V2_StatusMailboxCount * sizeof(DAC960_V2_StatusMailbox_T); - DmaPagesSize = - CommandMailboxesSize + StatusMailboxesSize + - sizeof(DAC960_V2_HealthStatusBuffer_T) + - sizeof(DAC960_V2_ControllerInfo_T) + - sizeof(DAC960_V2_LogicalDeviceInfo_T) + - sizeof(DAC960_V2_PhysicalDeviceInfo_T) + - sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T) + - sizeof(DAC960_V2_Event_T) + - sizeof(DAC960_V2_PhysicalToLogicalDevice_T); - - if (!init_dma_loaf(PCI_Device, DmaPages, DmaPagesSize)) { - pci_free_consistent(PCI_Device, sizeof(DAC960_V2_CommandMailbox_T), - CommandMailbox, CommandMailboxDMA); - return false; - } - - CommandMailboxesMemory = slice_dma_loaf(DmaPages, - CommandMailboxesSize, &CommandMailboxesMemoryDMA); - - /* These are the base addresses for the command memory mailbox array */ - Controller->V2.FirstCommandMailbox = CommandMailboxesMemory; - Controller->V2.FirstCommandMailboxDMA = CommandMailboxesMemoryDMA; - - CommandMailboxesMemory += DAC960_V2_CommandMailboxCount - 1; - Controller->V2.LastCommandMailbox = CommandMailboxesMemory; - Controller->V2.NextCommandMailbox = Controller->V2.FirstCommandMailbox; - Controller->V2.PreviousCommandMailbox1 = Controller->V2.LastCommandMailbox; - Controller->V2.PreviousCommandMailbox2 = - Controller->V2.LastCommandMailbox - 1; - - /* These are the base addresses for the status memory mailbox array */ - StatusMailboxesMemory = slice_dma_loaf(DmaPages, - StatusMailboxesSize, &StatusMailboxesMemoryDMA); - - Controller->V2.FirstStatusMailbox = StatusMailboxesMemory; - Controller->V2.FirstStatusMailboxDMA = StatusMailboxesMemoryDMA; - StatusMailboxesMemory += DAC960_V2_StatusMailboxCount - 1; - Controller->V2.LastStatusMailbox = StatusMailboxesMemory; - Controller->V2.NextStatusMailbox = Controller->V2.FirstStatusMailbox; - - Controller->V2.HealthStatusBuffer = slice_dma_loaf(DmaPages, - sizeof(DAC960_V2_HealthStatusBuffer_T), - &Controller->V2.HealthStatusBufferDMA); - - Controller->V2.NewControllerInformation = slice_dma_loaf(DmaPages, - sizeof(DAC960_V2_ControllerInfo_T), - &Controller->V2.NewControllerInformationDMA); - - Controller->V2.NewLogicalDeviceInformation = slice_dma_loaf(DmaPages, - sizeof(DAC960_V2_LogicalDeviceInfo_T), - &Controller->V2.NewLogicalDeviceInformationDMA); - - Controller->V2.NewPhysicalDeviceInformation = slice_dma_loaf(DmaPages, - sizeof(DAC960_V2_PhysicalDeviceInfo_T), - &Controller->V2.NewPhysicalDeviceInformationDMA); - - Controller->V2.NewInquiryUnitSerialNumber = slice_dma_loaf(DmaPages, - sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T), - &Controller->V2.NewInquiryUnitSerialNumberDMA); - - Controller->V2.Event = slice_dma_loaf(DmaPages, - sizeof(DAC960_V2_Event_T), - &Controller->V2.EventDMA); - - Controller->V2.PhysicalToLogicalDevice = slice_dma_loaf(DmaPages, - sizeof(DAC960_V2_PhysicalToLogicalDevice_T), - &Controller->V2.PhysicalToLogicalDeviceDMA); - - /* - Enable the Memory Mailbox Interface. - - I don't know why we can't just use one of the memory mailboxes - we just allocated to do this, instead of using this temporary one. - Try this change later. - */ - memset(CommandMailbox, 0, sizeof(DAC960_V2_CommandMailbox_T)); - CommandMailbox->SetMemoryMailbox.CommandIdentifier = 1; - CommandMailbox->SetMemoryMailbox.CommandOpcode = DAC960_V2_IOCTL; - CommandMailbox->SetMemoryMailbox.CommandControlBits.NoAutoRequestSense = true; - CommandMailbox->SetMemoryMailbox.FirstCommandMailboxSizeKB = - (DAC960_V2_CommandMailboxCount * sizeof(DAC960_V2_CommandMailbox_T)) >> 10; - CommandMailbox->SetMemoryMailbox.FirstStatusMailboxSizeKB = - (DAC960_V2_StatusMailboxCount * sizeof(DAC960_V2_StatusMailbox_T)) >> 10; - CommandMailbox->SetMemoryMailbox.SecondCommandMailboxSizeKB = 0; - CommandMailbox->SetMemoryMailbox.SecondStatusMailboxSizeKB = 0; - CommandMailbox->SetMemoryMailbox.RequestSenseSize = 0; - CommandMailbox->SetMemoryMailbox.IOCTL_Opcode = DAC960_V2_SetMemoryMailbox; - CommandMailbox->SetMemoryMailbox.HealthStatusBufferSizeKB = 1; - CommandMailbox->SetMemoryMailbox.HealthStatusBufferBusAddress = - Controller->V2.HealthStatusBufferDMA; - CommandMailbox->SetMemoryMailbox.FirstCommandMailboxBusAddress = - Controller->V2.FirstCommandMailboxDMA; - CommandMailbox->SetMemoryMailbox.FirstStatusMailboxBusAddress = - Controller->V2.FirstStatusMailboxDMA; - switch (Controller->HardwareType) - { - case DAC960_GEM_Controller: - while (DAC960_GEM_HardwareMailboxFullP(ControllerBaseAddress)) - udelay(1); - DAC960_GEM_WriteHardwareMailbox(ControllerBaseAddress, CommandMailboxDMA); - DAC960_GEM_HardwareMailboxNewCommand(ControllerBaseAddress); - while (!DAC960_GEM_HardwareMailboxStatusAvailableP(ControllerBaseAddress)) - udelay(1); - CommandStatus = DAC960_GEM_ReadCommandStatus(ControllerBaseAddress); - DAC960_GEM_AcknowledgeHardwareMailboxInterrupt(ControllerBaseAddress); - DAC960_GEM_AcknowledgeHardwareMailboxStatus(ControllerBaseAddress); - break; - case DAC960_BA_Controller: - while (DAC960_BA_HardwareMailboxFullP(ControllerBaseAddress)) - udelay(1); - DAC960_BA_WriteHardwareMailbox(ControllerBaseAddress, CommandMailboxDMA); - DAC960_BA_HardwareMailboxNewCommand(ControllerBaseAddress); - while (!DAC960_BA_HardwareMailboxStatusAvailableP(ControllerBaseAddress)) - udelay(1); - CommandStatus = DAC960_BA_ReadCommandStatus(ControllerBaseAddress); - DAC960_BA_AcknowledgeHardwareMailboxInterrupt(ControllerBaseAddress); - DAC960_BA_AcknowledgeHardwareMailboxStatus(ControllerBaseAddress); - break; - case DAC960_LP_Controller: - while (DAC960_LP_HardwareMailboxFullP(ControllerBaseAddress)) - udelay(1); - DAC960_LP_WriteHardwareMailbox(ControllerBaseAddress, CommandMailboxDMA); - DAC960_LP_HardwareMailboxNewCommand(ControllerBaseAddress); - while (!DAC960_LP_HardwareMailboxStatusAvailableP(ControllerBaseAddress)) - udelay(1); - CommandStatus = DAC960_LP_ReadCommandStatus(ControllerBaseAddress); - DAC960_LP_AcknowledgeHardwareMailboxInterrupt(ControllerBaseAddress); - DAC960_LP_AcknowledgeHardwareMailboxStatus(ControllerBaseAddress); - break; - default: - DAC960_Failure(Controller, "Unknown Controller Type\n"); - CommandStatus = DAC960_V2_AbormalCompletion; - break; - } - pci_free_consistent(PCI_Device, sizeof(DAC960_V2_CommandMailbox_T), - CommandMailbox, CommandMailboxDMA); - return (CommandStatus == DAC960_V2_NormalCompletion); -} - - -/* - DAC960_V1_ReadControllerConfiguration reads the Configuration Information - from DAC960 V1 Firmware Controllers and initializes the Controller structure. -*/ - -static bool DAC960_V1_ReadControllerConfiguration(DAC960_Controller_T - *Controller) -{ - DAC960_V1_Enquiry2_T *Enquiry2; - dma_addr_t Enquiry2DMA; - DAC960_V1_Config2_T *Config2; - dma_addr_t Config2DMA; - int LogicalDriveNumber, Channel, TargetID; - struct dma_loaf local_dma; - - if (!init_dma_loaf(Controller->PCIDevice, &local_dma, - sizeof(DAC960_V1_Enquiry2_T) + sizeof(DAC960_V1_Config2_T))) - return DAC960_Failure(Controller, "LOGICAL DEVICE ALLOCATION"); - - Enquiry2 = slice_dma_loaf(&local_dma, sizeof(DAC960_V1_Enquiry2_T), &Enquiry2DMA); - Config2 = slice_dma_loaf(&local_dma, sizeof(DAC960_V1_Config2_T), &Config2DMA); - - if (!DAC960_V1_ExecuteType3(Controller, DAC960_V1_Enquiry, - Controller->V1.NewEnquiryDMA)) { - free_dma_loaf(Controller->PCIDevice, &local_dma); - return DAC960_Failure(Controller, "ENQUIRY"); - } - memcpy(&Controller->V1.Enquiry, Controller->V1.NewEnquiry, - sizeof(DAC960_V1_Enquiry_T)); - - if (!DAC960_V1_ExecuteType3(Controller, DAC960_V1_Enquiry2, Enquiry2DMA)) { - free_dma_loaf(Controller->PCIDevice, &local_dma); - return DAC960_Failure(Controller, "ENQUIRY2"); - } - - if (!DAC960_V1_ExecuteType3(Controller, DAC960_V1_ReadConfig2, Config2DMA)) { - free_dma_loaf(Controller->PCIDevice, &local_dma); - return DAC960_Failure(Controller, "READ CONFIG2"); - } - - if (!DAC960_V1_ExecuteType3(Controller, DAC960_V1_GetLogicalDriveInformation, - Controller->V1.NewLogicalDriveInformationDMA)) { - free_dma_loaf(Controller->PCIDevice, &local_dma); - return DAC960_Failure(Controller, "GET LOGICAL DRIVE INFORMATION"); - } - memcpy(&Controller->V1.LogicalDriveInformation, - Controller->V1.NewLogicalDriveInformation, - sizeof(DAC960_V1_LogicalDriveInformationArray_T)); - - for (Channel = 0; Channel < Enquiry2->ActualChannels; Channel++) - for (TargetID = 0; TargetID < Enquiry2->MaxTargets; TargetID++) { - if (!DAC960_V1_ExecuteType3D(Controller, DAC960_V1_GetDeviceState, - Channel, TargetID, - Controller->V1.NewDeviceStateDMA)) { - free_dma_loaf(Controller->PCIDevice, &local_dma); - return DAC960_Failure(Controller, "GET DEVICE STATE"); - } - memcpy(&Controller->V1.DeviceState[Channel][TargetID], - Controller->V1.NewDeviceState, sizeof(DAC960_V1_DeviceState_T)); - } - /* - Initialize the Controller Model Name and Full Model Name fields. - */ - switch (Enquiry2->HardwareID.SubModel) - { - case DAC960_V1_P_PD_PU: - if (Enquiry2->SCSICapability.BusSpeed == DAC960_V1_Ultra) - strcpy(Controller->ModelName, "DAC960PU"); - else strcpy(Controller->ModelName, "DAC960PD"); - break; - case DAC960_V1_PL: - strcpy(Controller->ModelName, "DAC960PL"); - break; - case DAC960_V1_PG: - strcpy(Controller->ModelName, "DAC960PG"); - break; - case DAC960_V1_PJ: - strcpy(Controller->ModelName, "DAC960PJ"); - break; - case DAC960_V1_PR: - strcpy(Controller->ModelName, "DAC960PR"); - break; - case DAC960_V1_PT: - strcpy(Controller->ModelName, "DAC960PT"); - break; - case DAC960_V1_PTL0: - strcpy(Controller->ModelName, "DAC960PTL0"); - break; - case DAC960_V1_PRL: - strcpy(Controller->ModelName, "DAC960PRL"); - break; - case DAC960_V1_PTL1: - strcpy(Controller->ModelName, "DAC960PTL1"); - break; - case DAC960_V1_1164P: - strcpy(Controller->ModelName, "DAC1164P"); - break; - default: - free_dma_loaf(Controller->PCIDevice, &local_dma); - return DAC960_Failure(Controller, "MODEL VERIFICATION"); - } - strcpy(Controller->FullModelName, "Mylex "); - strcat(Controller->FullModelName, Controller->ModelName); - /* - Initialize the Controller Firmware Version field and verify that it - is a supported firmware version. The supported firmware versions are: - - DAC1164P 5.06 and above - DAC960PTL/PRL/PJ/PG 4.06 and above - DAC960PU/PD/PL 3.51 and above - DAC960PU/PD/PL/P 2.73 and above - */ -#if defined(CONFIG_ALPHA) - /* - DEC Alpha machines were often equipped with DAC960 cards that were - OEMed from Mylex, and had their own custom firmware. Version 2.70, - the last custom FW revision to be released by DEC for these older - controllers, appears to work quite well with this driver. - - Cards tested successfully were several versions each of the PD and - PU, called by DEC the KZPSC and KZPAC, respectively, and having - the Manufacturer Numbers (from Mylex), usually on a sticker on the - back of the board, of: - - KZPSC: D040347 (1-channel) or D040348 (2-channel) or D040349 (3-channel) - KZPAC: D040395 (1-channel) or D040396 (2-channel) or D040397 (3-channel) - */ -# define FIRMWARE_27X "2.70" -#else -# define FIRMWARE_27X "2.73" -#endif - - if (Enquiry2->FirmwareID.MajorVersion == 0) - { - Enquiry2->FirmwareID.MajorVersion = - Controller->V1.Enquiry.MajorFirmwareVersion; - Enquiry2->FirmwareID.MinorVersion = - Controller->V1.Enquiry.MinorFirmwareVersion; - Enquiry2->FirmwareID.FirmwareType = '0'; - Enquiry2->FirmwareID.TurnID = 0; - } - snprintf(Controller->FirmwareVersion, sizeof(Controller->FirmwareVersion), - "%d.%02d-%c-%02d", - Enquiry2->FirmwareID.MajorVersion, - Enquiry2->FirmwareID.MinorVersion, - Enquiry2->FirmwareID.FirmwareType, - Enquiry2->FirmwareID.TurnID); - if (!((Controller->FirmwareVersion[0] == '5' && - strcmp(Controller->FirmwareVersion, "5.06") >= 0) || - (Controller->FirmwareVersion[0] == '4' && - strcmp(Controller->FirmwareVersion, "4.06") >= 0) || - (Controller->FirmwareVersion[0] == '3' && - strcmp(Controller->FirmwareVersion, "3.51") >= 0) || - (Controller->FirmwareVersion[0] == '2' && - strcmp(Controller->FirmwareVersion, FIRMWARE_27X) >= 0))) - { - DAC960_Failure(Controller, "FIRMWARE VERSION VERIFICATION"); - DAC960_Error("Firmware Version = '%s'\n", Controller, - Controller->FirmwareVersion); - free_dma_loaf(Controller->PCIDevice, &local_dma); - return false; - } - /* - Initialize the Controller Channels, Targets, Memory Size, and SAF-TE - Enclosure Management Enabled fields. - */ - Controller->Channels = Enquiry2->ActualChannels; - Controller->Targets = Enquiry2->MaxTargets; - Controller->MemorySize = Enquiry2->MemorySize >> 20; - Controller->V1.SAFTE_EnclosureManagementEnabled = - (Enquiry2->FaultManagementType == DAC960_V1_SAFTE); - /* - Initialize the Controller Queue Depth, Driver Queue Depth, Logical Drive - Count, Maximum Blocks per Command, Controller Scatter/Gather Limit, and - Driver Scatter/Gather Limit. The Driver Queue Depth must be at most one - less than the Controller Queue Depth to allow for an automatic drive - rebuild operation. - */ - Controller->ControllerQueueDepth = Controller->V1.Enquiry.MaxCommands; - Controller->DriverQueueDepth = Controller->ControllerQueueDepth - 1; - if (Controller->DriverQueueDepth > DAC960_MaxDriverQueueDepth) - Controller->DriverQueueDepth = DAC960_MaxDriverQueueDepth; - Controller->LogicalDriveCount = - Controller->V1.Enquiry.NumberOfLogicalDrives; - Controller->MaxBlocksPerCommand = Enquiry2->MaxBlocksPerCommand; - Controller->ControllerScatterGatherLimit = Enquiry2->MaxScatterGatherEntries; - Controller->DriverScatterGatherLimit = - Controller->ControllerScatterGatherLimit; - if (Controller->DriverScatterGatherLimit > DAC960_V1_ScatterGatherLimit) - Controller->DriverScatterGatherLimit = DAC960_V1_ScatterGatherLimit; - /* - Initialize the Stripe Size, Segment Size, and Geometry Translation. - */ - Controller->V1.StripeSize = Config2->BlocksPerStripe * Config2->BlockFactor - >> (10 - DAC960_BlockSizeBits); - Controller->V1.SegmentSize = Config2->BlocksPerCacheLine * Config2->BlockFactor - >> (10 - DAC960_BlockSizeBits); - switch (Config2->DriveGeometry) - { - case DAC960_V1_Geometry_128_32: - Controller->V1.GeometryTranslationHeads = 128; - Controller->V1.GeometryTranslationSectors = 32; - break; - case DAC960_V1_Geometry_255_63: - Controller->V1.GeometryTranslationHeads = 255; - Controller->V1.GeometryTranslationSectors = 63; - break; - default: - free_dma_loaf(Controller->PCIDevice, &local_dma); - return DAC960_Failure(Controller, "CONFIG2 DRIVE GEOMETRY"); - } - /* - Initialize the Background Initialization Status. - */ - if ((Controller->FirmwareVersion[0] == '4' && - strcmp(Controller->FirmwareVersion, "4.08") >= 0) || - (Controller->FirmwareVersion[0] == '5' && - strcmp(Controller->FirmwareVersion, "5.08") >= 0)) - { - Controller->V1.BackgroundInitializationStatusSupported = true; - DAC960_V1_ExecuteType3B(Controller, - DAC960_V1_BackgroundInitializationControl, 0x20, - Controller-> - V1.BackgroundInitializationStatusDMA); - memcpy(&Controller->V1.LastBackgroundInitializationStatus, - Controller->V1.BackgroundInitializationStatus, - sizeof(DAC960_V1_BackgroundInitializationStatus_T)); - } - /* - Initialize the Logical Drive Initially Accessible flag. - */ - for (LogicalDriveNumber = 0; - LogicalDriveNumber < Controller->LogicalDriveCount; - LogicalDriveNumber++) - if (Controller->V1.LogicalDriveInformation - [LogicalDriveNumber].LogicalDriveState != - DAC960_V1_LogicalDrive_Offline) - Controller->LogicalDriveInitiallyAccessible[LogicalDriveNumber] = true; - Controller->V1.LastRebuildStatus = DAC960_V1_NoRebuildOrCheckInProgress; - free_dma_loaf(Controller->PCIDevice, &local_dma); - return true; -} - - -/* - DAC960_V2_ReadControllerConfiguration reads the Configuration Information - from DAC960 V2 Firmware Controllers and initializes the Controller structure. -*/ - -static bool DAC960_V2_ReadControllerConfiguration(DAC960_Controller_T - *Controller) -{ - DAC960_V2_ControllerInfo_T *ControllerInfo = - &Controller->V2.ControllerInformation; - unsigned short LogicalDeviceNumber = 0; - int ModelNameLength; - - /* Get data into dma-able area, then copy into permanent location */ - if (!DAC960_V2_NewControllerInfo(Controller)) - return DAC960_Failure(Controller, "GET CONTROLLER INFO"); - memcpy(ControllerInfo, Controller->V2.NewControllerInformation, - sizeof(DAC960_V2_ControllerInfo_T)); - - - if (!DAC960_V2_GeneralInfo(Controller)) - return DAC960_Failure(Controller, "GET HEALTH STATUS"); - - /* - Initialize the Controller Model Name and Full Model Name fields. - */ - ModelNameLength = sizeof(ControllerInfo->ControllerName); - if (ModelNameLength > sizeof(Controller->ModelName)-1) - ModelNameLength = sizeof(Controller->ModelName)-1; - memcpy(Controller->ModelName, ControllerInfo->ControllerName, - ModelNameLength); - ModelNameLength--; - while (Controller->ModelName[ModelNameLength] == ' ' || - Controller->ModelName[ModelNameLength] == '\0') - ModelNameLength--; - Controller->ModelName[++ModelNameLength] = '\0'; - strcpy(Controller->FullModelName, "Mylex "); - strcat(Controller->FullModelName, Controller->ModelName); - /* - Initialize the Controller Firmware Version field. - */ - sprintf(Controller->FirmwareVersion, "%d.%02d-%02d", - ControllerInfo->FirmwareMajorVersion, - ControllerInfo->FirmwareMinorVersion, - ControllerInfo->FirmwareTurnNumber); - if (ControllerInfo->FirmwareMajorVersion == 6 && - ControllerInfo->FirmwareMinorVersion == 0 && - ControllerInfo->FirmwareTurnNumber < 1) - { - DAC960_Info("FIRMWARE VERSION %s DOES NOT PROVIDE THE CONTROLLER\n", - Controller, Controller->FirmwareVersion); - DAC960_Info("STATUS MONITORING FUNCTIONALITY NEEDED BY THIS DRIVER.\n", - Controller); - DAC960_Info("PLEASE UPGRADE TO VERSION 6.00-01 OR ABOVE.\n", - Controller); - } - /* - Initialize the Controller Channels, Targets, and Memory Size. - */ - Controller->Channels = ControllerInfo->NumberOfPhysicalChannelsPresent; - Controller->Targets = - ControllerInfo->MaximumTargetsPerChannel - [ControllerInfo->NumberOfPhysicalChannelsPresent-1]; - Controller->MemorySize = ControllerInfo->MemorySizeMB; - /* - Initialize the Controller Queue Depth, Driver Queue Depth, Logical Drive - Count, Maximum Blocks per Command, Controller Scatter/Gather Limit, and - Driver Scatter/Gather Limit. The Driver Queue Depth must be at most one - less than the Controller Queue Depth to allow for an automatic drive - rebuild operation. - */ - Controller->ControllerQueueDepth = ControllerInfo->MaximumParallelCommands; - Controller->DriverQueueDepth = Controller->ControllerQueueDepth - 1; - if (Controller->DriverQueueDepth > DAC960_MaxDriverQueueDepth) - Controller->DriverQueueDepth = DAC960_MaxDriverQueueDepth; - Controller->LogicalDriveCount = ControllerInfo->LogicalDevicesPresent; - Controller->MaxBlocksPerCommand = - ControllerInfo->MaximumDataTransferSizeInBlocks; - Controller->ControllerScatterGatherLimit = - ControllerInfo->MaximumScatterGatherEntries; - Controller->DriverScatterGatherLimit = - Controller->ControllerScatterGatherLimit; - if (Controller->DriverScatterGatherLimit > DAC960_V2_ScatterGatherLimit) - Controller->DriverScatterGatherLimit = DAC960_V2_ScatterGatherLimit; - /* - Initialize the Logical Device Information. - */ - while (true) - { - DAC960_V2_LogicalDeviceInfo_T *NewLogicalDeviceInfo = - Controller->V2.NewLogicalDeviceInformation; - DAC960_V2_LogicalDeviceInfo_T *LogicalDeviceInfo; - DAC960_V2_PhysicalDevice_T PhysicalDevice; - - if (!DAC960_V2_NewLogicalDeviceInfo(Controller, LogicalDeviceNumber)) - break; - LogicalDeviceNumber = NewLogicalDeviceInfo->LogicalDeviceNumber; - if (LogicalDeviceNumber >= DAC960_MaxLogicalDrives) { - DAC960_Error("DAC960: Logical Drive Number %d not supported\n", - Controller, LogicalDeviceNumber); - break; - } - if (NewLogicalDeviceInfo->DeviceBlockSizeInBytes != DAC960_BlockSize) { - DAC960_Error("DAC960: Logical Drive Block Size %d not supported\n", - Controller, NewLogicalDeviceInfo->DeviceBlockSizeInBytes); - LogicalDeviceNumber++; - continue; - } - PhysicalDevice.Controller = 0; - PhysicalDevice.Channel = NewLogicalDeviceInfo->Channel; - PhysicalDevice.TargetID = NewLogicalDeviceInfo->TargetID; - PhysicalDevice.LogicalUnit = NewLogicalDeviceInfo->LogicalUnit; - Controller->V2.LogicalDriveToVirtualDevice[LogicalDeviceNumber] = - PhysicalDevice; - if (NewLogicalDeviceInfo->LogicalDeviceState != - DAC960_V2_LogicalDevice_Offline) - Controller->LogicalDriveInitiallyAccessible[LogicalDeviceNumber] = true; - LogicalDeviceInfo = kmalloc(sizeof(DAC960_V2_LogicalDeviceInfo_T), - GFP_ATOMIC); - if (LogicalDeviceInfo == NULL) - return DAC960_Failure(Controller, "LOGICAL DEVICE ALLOCATION"); - Controller->V2.LogicalDeviceInformation[LogicalDeviceNumber] = - LogicalDeviceInfo; - memcpy(LogicalDeviceInfo, NewLogicalDeviceInfo, - sizeof(DAC960_V2_LogicalDeviceInfo_T)); - LogicalDeviceNumber++; - } - return true; -} - - -/* - DAC960_ReportControllerConfiguration reports the Configuration Information - for Controller. -*/ - -static bool DAC960_ReportControllerConfiguration(DAC960_Controller_T - *Controller) -{ - DAC960_Info("Configuring Mylex %s PCI RAID Controller\n", - Controller, Controller->ModelName); - DAC960_Info(" Firmware Version: %s, Channels: %d, Memory Size: %dMB\n", - Controller, Controller->FirmwareVersion, - Controller->Channels, Controller->MemorySize); - DAC960_Info(" PCI Bus: %d, Device: %d, Function: %d, I/O Address: ", - Controller, Controller->Bus, - Controller->Device, Controller->Function); - if (Controller->IO_Address == 0) - DAC960_Info("Unassigned\n", Controller); - else DAC960_Info("0x%X\n", Controller, Controller->IO_Address); - DAC960_Info(" PCI Address: 0x%X mapped at 0x%lX, IRQ Channel: %d\n", - Controller, Controller->PCI_Address, - (unsigned long) Controller->BaseAddress, - Controller->IRQ_Channel); - DAC960_Info(" Controller Queue Depth: %d, " - "Maximum Blocks per Command: %d\n", - Controller, Controller->ControllerQueueDepth, - Controller->MaxBlocksPerCommand); - DAC960_Info(" Driver Queue Depth: %d, " - "Scatter/Gather Limit: %d of %d Segments\n", - Controller, Controller->DriverQueueDepth, - Controller->DriverScatterGatherLimit, - Controller->ControllerScatterGatherLimit); - if (Controller->FirmwareType == DAC960_V1_Controller) - { - DAC960_Info(" Stripe Size: %dKB, Segment Size: %dKB, " - "BIOS Geometry: %d/%d\n", Controller, - Controller->V1.StripeSize, - Controller->V1.SegmentSize, - Controller->V1.GeometryTranslationHeads, - Controller->V1.GeometryTranslationSectors); - if (Controller->V1.SAFTE_EnclosureManagementEnabled) - DAC960_Info(" SAF-TE Enclosure Management Enabled\n", Controller); - } - return true; -} - - -/* - DAC960_V1_ReadDeviceConfiguration reads the Device Configuration Information - for DAC960 V1 Firmware Controllers by requesting the SCSI Inquiry and SCSI - Inquiry Unit Serial Number information for each device connected to - Controller. -*/ - -static bool DAC960_V1_ReadDeviceConfiguration(DAC960_Controller_T - *Controller) -{ - struct dma_loaf local_dma; - - dma_addr_t DCDBs_dma[DAC960_V1_MaxChannels]; - DAC960_V1_DCDB_T *DCDBs_cpu[DAC960_V1_MaxChannels]; - - dma_addr_t SCSI_Inquiry_dma[DAC960_V1_MaxChannels]; - DAC960_SCSI_Inquiry_T *SCSI_Inquiry_cpu[DAC960_V1_MaxChannels]; - - dma_addr_t SCSI_NewInquiryUnitSerialNumberDMA[DAC960_V1_MaxChannels]; - DAC960_SCSI_Inquiry_UnitSerialNumber_T *SCSI_NewInquiryUnitSerialNumberCPU[DAC960_V1_MaxChannels]; - - struct completion Completions[DAC960_V1_MaxChannels]; - unsigned long flags; - int Channel, TargetID; - - if (!init_dma_loaf(Controller->PCIDevice, &local_dma, - DAC960_V1_MaxChannels*(sizeof(DAC960_V1_DCDB_T) + - sizeof(DAC960_SCSI_Inquiry_T) + - sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T)))) - return DAC960_Failure(Controller, - "DMA ALLOCATION FAILED IN ReadDeviceConfiguration"); - - for (Channel = 0; Channel < Controller->Channels; Channel++) { - DCDBs_cpu[Channel] = slice_dma_loaf(&local_dma, - sizeof(DAC960_V1_DCDB_T), DCDBs_dma + Channel); - SCSI_Inquiry_cpu[Channel] = slice_dma_loaf(&local_dma, - sizeof(DAC960_SCSI_Inquiry_T), - SCSI_Inquiry_dma + Channel); - SCSI_NewInquiryUnitSerialNumberCPU[Channel] = slice_dma_loaf(&local_dma, - sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T), - SCSI_NewInquiryUnitSerialNumberDMA + Channel); - } - - for (TargetID = 0; TargetID < Controller->Targets; TargetID++) - { - /* - * For each channel, submit a probe for a device on that channel. - * The timeout interval for a device that is present is 10 seconds. - * With this approach, the timeout periods can elapse in parallel - * on each channel. - */ - for (Channel = 0; Channel < Controller->Channels; Channel++) - { - dma_addr_t NewInquiryStandardDataDMA = SCSI_Inquiry_dma[Channel]; - DAC960_V1_DCDB_T *DCDB = DCDBs_cpu[Channel]; - dma_addr_t DCDB_dma = DCDBs_dma[Channel]; - DAC960_Command_T *Command = Controller->Commands[Channel]; - struct completion *Completion = &Completions[Channel]; - - init_completion(Completion); - DAC960_V1_ClearCommand(Command); - Command->CommandType = DAC960_ImmediateCommand; - Command->Completion = Completion; - Command->V1.CommandMailbox.Type3.CommandOpcode = DAC960_V1_DCDB; - Command->V1.CommandMailbox.Type3.BusAddress = DCDB_dma; - DCDB->Channel = Channel; - DCDB->TargetID = TargetID; - DCDB->Direction = DAC960_V1_DCDB_DataTransferDeviceToSystem; - DCDB->EarlyStatus = false; - DCDB->Timeout = DAC960_V1_DCDB_Timeout_10_seconds; - DCDB->NoAutomaticRequestSense = false; - DCDB->DisconnectPermitted = true; - DCDB->TransferLength = sizeof(DAC960_SCSI_Inquiry_T); - DCDB->BusAddress = NewInquiryStandardDataDMA; - DCDB->CDBLength = 6; - DCDB->TransferLengthHigh4 = 0; - DCDB->SenseLength = sizeof(DCDB->SenseData); - DCDB->CDB[0] = 0x12; /* INQUIRY */ - DCDB->CDB[1] = 0; /* EVPD = 0 */ - DCDB->CDB[2] = 0; /* Page Code */ - DCDB->CDB[3] = 0; /* Reserved */ - DCDB->CDB[4] = sizeof(DAC960_SCSI_Inquiry_T); - DCDB->CDB[5] = 0; /* Control */ - - spin_lock_irqsave(&Controller->queue_lock, flags); - DAC960_QueueCommand(Command); - spin_unlock_irqrestore(&Controller->queue_lock, flags); - } - /* - * Wait for the problems submitted in the previous loop - * to complete. On the probes that are successful, - * get the serial number of the device that was found. - */ - for (Channel = 0; Channel < Controller->Channels; Channel++) - { - DAC960_SCSI_Inquiry_T *InquiryStandardData = - &Controller->V1.InquiryStandardData[Channel][TargetID]; - DAC960_SCSI_Inquiry_T *NewInquiryStandardData = SCSI_Inquiry_cpu[Channel]; - dma_addr_t NewInquiryUnitSerialNumberDMA = - SCSI_NewInquiryUnitSerialNumberDMA[Channel]; - DAC960_SCSI_Inquiry_UnitSerialNumber_T *NewInquiryUnitSerialNumber = - SCSI_NewInquiryUnitSerialNumberCPU[Channel]; - DAC960_SCSI_Inquiry_UnitSerialNumber_T *InquiryUnitSerialNumber = - &Controller->V1.InquiryUnitSerialNumber[Channel][TargetID]; - DAC960_Command_T *Command = Controller->Commands[Channel]; - DAC960_V1_DCDB_T *DCDB = DCDBs_cpu[Channel]; - struct completion *Completion = &Completions[Channel]; - - wait_for_completion(Completion); - - if (Command->V1.CommandStatus != DAC960_V1_NormalCompletion) { - memset(InquiryStandardData, 0, sizeof(DAC960_SCSI_Inquiry_T)); - InquiryStandardData->PeripheralDeviceType = 0x1F; - continue; - } else - memcpy(InquiryStandardData, NewInquiryStandardData, sizeof(DAC960_SCSI_Inquiry_T)); - - /* Preserve Channel and TargetID values from the previous loop */ - Command->Completion = Completion; - DCDB->TransferLength = sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T); - DCDB->BusAddress = NewInquiryUnitSerialNumberDMA; - DCDB->SenseLength = sizeof(DCDB->SenseData); - DCDB->CDB[0] = 0x12; /* INQUIRY */ - DCDB->CDB[1] = 1; /* EVPD = 1 */ - DCDB->CDB[2] = 0x80; /* Page Code */ - DCDB->CDB[3] = 0; /* Reserved */ - DCDB->CDB[4] = sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T); - DCDB->CDB[5] = 0; /* Control */ - - spin_lock_irqsave(&Controller->queue_lock, flags); - DAC960_QueueCommand(Command); - spin_unlock_irqrestore(&Controller->queue_lock, flags); - wait_for_completion(Completion); - - if (Command->V1.CommandStatus != DAC960_V1_NormalCompletion) { - memset(InquiryUnitSerialNumber, 0, - sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T)); - InquiryUnitSerialNumber->PeripheralDeviceType = 0x1F; - } else - memcpy(InquiryUnitSerialNumber, NewInquiryUnitSerialNumber, - sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T)); - } - } - free_dma_loaf(Controller->PCIDevice, &local_dma); - return true; -} - - -/* - DAC960_V2_ReadDeviceConfiguration reads the Device Configuration Information - for DAC960 V2 Firmware Controllers by requesting the Physical Device - Information and SCSI Inquiry Unit Serial Number information for each - device connected to Controller. -*/ - -static bool DAC960_V2_ReadDeviceConfiguration(DAC960_Controller_T - *Controller) -{ - unsigned char Channel = 0, TargetID = 0, LogicalUnit = 0; - unsigned short PhysicalDeviceIndex = 0; - - while (true) - { - DAC960_V2_PhysicalDeviceInfo_T *NewPhysicalDeviceInfo = - Controller->V2.NewPhysicalDeviceInformation; - DAC960_V2_PhysicalDeviceInfo_T *PhysicalDeviceInfo; - DAC960_SCSI_Inquiry_UnitSerialNumber_T *NewInquiryUnitSerialNumber = - Controller->V2.NewInquiryUnitSerialNumber; - DAC960_SCSI_Inquiry_UnitSerialNumber_T *InquiryUnitSerialNumber; - - if (!DAC960_V2_NewPhysicalDeviceInfo(Controller, Channel, TargetID, LogicalUnit)) - break; - - PhysicalDeviceInfo = kmalloc(sizeof(DAC960_V2_PhysicalDeviceInfo_T), - GFP_ATOMIC); - if (PhysicalDeviceInfo == NULL) - return DAC960_Failure(Controller, "PHYSICAL DEVICE ALLOCATION"); - Controller->V2.PhysicalDeviceInformation[PhysicalDeviceIndex] = - PhysicalDeviceInfo; - memcpy(PhysicalDeviceInfo, NewPhysicalDeviceInfo, - sizeof(DAC960_V2_PhysicalDeviceInfo_T)); - - InquiryUnitSerialNumber = kmalloc( - sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T), GFP_ATOMIC); - if (InquiryUnitSerialNumber == NULL) { - kfree(PhysicalDeviceInfo); - return DAC960_Failure(Controller, "SERIAL NUMBER ALLOCATION"); - } - Controller->V2.InquiryUnitSerialNumber[PhysicalDeviceIndex] = - InquiryUnitSerialNumber; - - Channel = NewPhysicalDeviceInfo->Channel; - TargetID = NewPhysicalDeviceInfo->TargetID; - LogicalUnit = NewPhysicalDeviceInfo->LogicalUnit; - - /* - Some devices do NOT have Unit Serial Numbers. - This command fails for them. But, we still want to - remember those devices are there. Construct a - UnitSerialNumber structure for the failure case. - */ - if (!DAC960_V2_NewInquiryUnitSerialNumber(Controller, Channel, TargetID, LogicalUnit)) { - memset(InquiryUnitSerialNumber, 0, - sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T)); - InquiryUnitSerialNumber->PeripheralDeviceType = 0x1F; - } else - memcpy(InquiryUnitSerialNumber, NewInquiryUnitSerialNumber, - sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T)); - - PhysicalDeviceIndex++; - LogicalUnit++; - } - return true; -} - - -/* - DAC960_SanitizeInquiryData sanitizes the Vendor, Model, Revision, and - Product Serial Number fields of the Inquiry Standard Data and Inquiry - Unit Serial Number structures. -*/ - -static void DAC960_SanitizeInquiryData(DAC960_SCSI_Inquiry_T - *InquiryStandardData, - DAC960_SCSI_Inquiry_UnitSerialNumber_T - *InquiryUnitSerialNumber, - unsigned char *Vendor, - unsigned char *Model, - unsigned char *Revision, - unsigned char *SerialNumber) -{ - int SerialNumberLength, i; - if (InquiryStandardData->PeripheralDeviceType == 0x1F) return; - for (i = 0; i < sizeof(InquiryStandardData->VendorIdentification); i++) - { - unsigned char VendorCharacter = - InquiryStandardData->VendorIdentification[i]; - Vendor[i] = (VendorCharacter >= ' ' && VendorCharacter <= '~' - ? VendorCharacter : ' '); - } - Vendor[sizeof(InquiryStandardData->VendorIdentification)] = '\0'; - for (i = 0; i < sizeof(InquiryStandardData->ProductIdentification); i++) - { - unsigned char ModelCharacter = - InquiryStandardData->ProductIdentification[i]; - Model[i] = (ModelCharacter >= ' ' && ModelCharacter <= '~' - ? ModelCharacter : ' '); - } - Model[sizeof(InquiryStandardData->ProductIdentification)] = '\0'; - for (i = 0; i < sizeof(InquiryStandardData->ProductRevisionLevel); i++) - { - unsigned char RevisionCharacter = - InquiryStandardData->ProductRevisionLevel[i]; - Revision[i] = (RevisionCharacter >= ' ' && RevisionCharacter <= '~' - ? RevisionCharacter : ' '); - } - Revision[sizeof(InquiryStandardData->ProductRevisionLevel)] = '\0'; - if (InquiryUnitSerialNumber->PeripheralDeviceType == 0x1F) return; - SerialNumberLength = InquiryUnitSerialNumber->PageLength; - if (SerialNumberLength > - sizeof(InquiryUnitSerialNumber->ProductSerialNumber)) - SerialNumberLength = sizeof(InquiryUnitSerialNumber->ProductSerialNumber); - for (i = 0; i < SerialNumberLength; i++) - { - unsigned char SerialNumberCharacter = - InquiryUnitSerialNumber->ProductSerialNumber[i]; - SerialNumber[i] = - (SerialNumberCharacter >= ' ' && SerialNumberCharacter <= '~' - ? SerialNumberCharacter : ' '); - } - SerialNumber[SerialNumberLength] = '\0'; -} - - -/* - DAC960_V1_ReportDeviceConfiguration reports the Device Configuration - Information for DAC960 V1 Firmware Controllers. -*/ - -static bool DAC960_V1_ReportDeviceConfiguration(DAC960_Controller_T - *Controller) -{ - int LogicalDriveNumber, Channel, TargetID; - DAC960_Info(" Physical Devices:\n", Controller); - for (Channel = 0; Channel < Controller->Channels; Channel++) - for (TargetID = 0; TargetID < Controller->Targets; TargetID++) - { - DAC960_SCSI_Inquiry_T *InquiryStandardData = - &Controller->V1.InquiryStandardData[Channel][TargetID]; - DAC960_SCSI_Inquiry_UnitSerialNumber_T *InquiryUnitSerialNumber = - &Controller->V1.InquiryUnitSerialNumber[Channel][TargetID]; - DAC960_V1_DeviceState_T *DeviceState = - &Controller->V1.DeviceState[Channel][TargetID]; - DAC960_V1_ErrorTableEntry_T *ErrorEntry = - &Controller->V1.ErrorTable.ErrorTableEntries[Channel][TargetID]; - char Vendor[1+sizeof(InquiryStandardData->VendorIdentification)]; - char Model[1+sizeof(InquiryStandardData->ProductIdentification)]; - char Revision[1+sizeof(InquiryStandardData->ProductRevisionLevel)]; - char SerialNumber[1+sizeof(InquiryUnitSerialNumber - ->ProductSerialNumber)]; - if (InquiryStandardData->PeripheralDeviceType == 0x1F) continue; - DAC960_SanitizeInquiryData(InquiryStandardData, InquiryUnitSerialNumber, - Vendor, Model, Revision, SerialNumber); - DAC960_Info(" %d:%d%s Vendor: %s Model: %s Revision: %s\n", - Controller, Channel, TargetID, (TargetID < 10 ? " " : ""), - Vendor, Model, Revision); - if (InquiryUnitSerialNumber->PeripheralDeviceType != 0x1F) - DAC960_Info(" Serial Number: %s\n", Controller, SerialNumber); - if (DeviceState->Present && - DeviceState->DeviceType == DAC960_V1_DiskType) - { - if (Controller->V1.DeviceResetCount[Channel][TargetID] > 0) - DAC960_Info(" Disk Status: %s, %u blocks, %d resets\n", - Controller, - (DeviceState->DeviceState == DAC960_V1_Device_Dead - ? "Dead" - : DeviceState->DeviceState - == DAC960_V1_Device_WriteOnly - ? "Write-Only" - : DeviceState->DeviceState - == DAC960_V1_Device_Online - ? "Online" : "Standby"), - DeviceState->DiskSize, - Controller->V1.DeviceResetCount[Channel][TargetID]); - else - DAC960_Info(" Disk Status: %s, %u blocks\n", Controller, - (DeviceState->DeviceState == DAC960_V1_Device_Dead - ? "Dead" - : DeviceState->DeviceState - == DAC960_V1_Device_WriteOnly - ? "Write-Only" - : DeviceState->DeviceState - == DAC960_V1_Device_Online - ? "Online" : "Standby"), - DeviceState->DiskSize); - } - if (ErrorEntry->ParityErrorCount > 0 || - ErrorEntry->SoftErrorCount > 0 || - ErrorEntry->HardErrorCount > 0 || - ErrorEntry->MiscErrorCount > 0) - DAC960_Info(" Errors - Parity: %d, Soft: %d, " - "Hard: %d, Misc: %d\n", Controller, - ErrorEntry->ParityErrorCount, - ErrorEntry->SoftErrorCount, - ErrorEntry->HardErrorCount, - ErrorEntry->MiscErrorCount); - } - DAC960_Info(" Logical Drives:\n", Controller); - for (LogicalDriveNumber = 0; - LogicalDriveNumber < Controller->LogicalDriveCount; - LogicalDriveNumber++) - { - DAC960_V1_LogicalDriveInformation_T *LogicalDriveInformation = - &Controller->V1.LogicalDriveInformation[LogicalDriveNumber]; - DAC960_Info(" /dev/rd/c%dd%d: RAID-%d, %s, %u blocks, %s\n", - Controller, Controller->ControllerNumber, LogicalDriveNumber, - LogicalDriveInformation->RAIDLevel, - (LogicalDriveInformation->LogicalDriveState - == DAC960_V1_LogicalDrive_Online - ? "Online" - : LogicalDriveInformation->LogicalDriveState - == DAC960_V1_LogicalDrive_Critical - ? "Critical" : "Offline"), - LogicalDriveInformation->LogicalDriveSize, - (LogicalDriveInformation->WriteBack - ? "Write Back" : "Write Thru")); - } - return true; -} - - -/* - DAC960_V2_ReportDeviceConfiguration reports the Device Configuration - Information for DAC960 V2 Firmware Controllers. -*/ - -static bool DAC960_V2_ReportDeviceConfiguration(DAC960_Controller_T - *Controller) -{ - int PhysicalDeviceIndex, LogicalDriveNumber; - DAC960_Info(" Physical Devices:\n", Controller); - for (PhysicalDeviceIndex = 0; - PhysicalDeviceIndex < DAC960_V2_MaxPhysicalDevices; - PhysicalDeviceIndex++) - { - DAC960_V2_PhysicalDeviceInfo_T *PhysicalDeviceInfo = - Controller->V2.PhysicalDeviceInformation[PhysicalDeviceIndex]; - DAC960_SCSI_Inquiry_T *InquiryStandardData = - (DAC960_SCSI_Inquiry_T *) &PhysicalDeviceInfo->SCSI_InquiryData; - DAC960_SCSI_Inquiry_UnitSerialNumber_T *InquiryUnitSerialNumber = - Controller->V2.InquiryUnitSerialNumber[PhysicalDeviceIndex]; - char Vendor[1+sizeof(InquiryStandardData->VendorIdentification)]; - char Model[1+sizeof(InquiryStandardData->ProductIdentification)]; - char Revision[1+sizeof(InquiryStandardData->ProductRevisionLevel)]; - char SerialNumber[1+sizeof(InquiryUnitSerialNumber->ProductSerialNumber)]; - if (PhysicalDeviceInfo == NULL) break; - DAC960_SanitizeInquiryData(InquiryStandardData, InquiryUnitSerialNumber, - Vendor, Model, Revision, SerialNumber); - DAC960_Info(" %d:%d%s Vendor: %s Model: %s Revision: %s\n", - Controller, - PhysicalDeviceInfo->Channel, - PhysicalDeviceInfo->TargetID, - (PhysicalDeviceInfo->TargetID < 10 ? " " : ""), - Vendor, Model, Revision); - if (PhysicalDeviceInfo->NegotiatedSynchronousMegaTransfers == 0) - DAC960_Info(" %sAsynchronous\n", Controller, - (PhysicalDeviceInfo->NegotiatedDataWidthBits == 16 - ? "Wide " :"")); - else - DAC960_Info(" %sSynchronous at %d MB/sec\n", Controller, - (PhysicalDeviceInfo->NegotiatedDataWidthBits == 16 - ? "Wide " :""), - (PhysicalDeviceInfo->NegotiatedSynchronousMegaTransfers - * PhysicalDeviceInfo->NegotiatedDataWidthBits/8)); - if (InquiryUnitSerialNumber->PeripheralDeviceType != 0x1F) - DAC960_Info(" Serial Number: %s\n", Controller, SerialNumber); - if (PhysicalDeviceInfo->PhysicalDeviceState == - DAC960_V2_Device_Unconfigured) - continue; - DAC960_Info(" Disk Status: %s, %u blocks\n", Controller, - (PhysicalDeviceInfo->PhysicalDeviceState - == DAC960_V2_Device_Online - ? "Online" - : PhysicalDeviceInfo->PhysicalDeviceState - == DAC960_V2_Device_Rebuild - ? "Rebuild" - : PhysicalDeviceInfo->PhysicalDeviceState - == DAC960_V2_Device_Missing - ? "Missing" - : PhysicalDeviceInfo->PhysicalDeviceState - == DAC960_V2_Device_Critical - ? "Critical" - : PhysicalDeviceInfo->PhysicalDeviceState - == DAC960_V2_Device_Dead - ? "Dead" - : PhysicalDeviceInfo->PhysicalDeviceState - == DAC960_V2_Device_SuspectedDead - ? "Suspected-Dead" - : PhysicalDeviceInfo->PhysicalDeviceState - == DAC960_V2_Device_CommandedOffline - ? "Commanded-Offline" - : PhysicalDeviceInfo->PhysicalDeviceState - == DAC960_V2_Device_Standby - ? "Standby" : "Unknown"), - PhysicalDeviceInfo->ConfigurableDeviceSize); - if (PhysicalDeviceInfo->ParityErrors == 0 && - PhysicalDeviceInfo->SoftErrors == 0 && - PhysicalDeviceInfo->HardErrors == 0 && - PhysicalDeviceInfo->MiscellaneousErrors == 0 && - PhysicalDeviceInfo->CommandTimeouts == 0 && - PhysicalDeviceInfo->Retries == 0 && - PhysicalDeviceInfo->Aborts == 0 && - PhysicalDeviceInfo->PredictedFailuresDetected == 0) - continue; - DAC960_Info(" Errors - Parity: %d, Soft: %d, " - "Hard: %d, Misc: %d\n", Controller, - PhysicalDeviceInfo->ParityErrors, - PhysicalDeviceInfo->SoftErrors, - PhysicalDeviceInfo->HardErrors, - PhysicalDeviceInfo->MiscellaneousErrors); - DAC960_Info(" Timeouts: %d, Retries: %d, " - "Aborts: %d, Predicted: %d\n", Controller, - PhysicalDeviceInfo->CommandTimeouts, - PhysicalDeviceInfo->Retries, - PhysicalDeviceInfo->Aborts, - PhysicalDeviceInfo->PredictedFailuresDetected); - } - DAC960_Info(" Logical Drives:\n", Controller); - for (LogicalDriveNumber = 0; - LogicalDriveNumber < DAC960_MaxLogicalDrives; - LogicalDriveNumber++) - { - DAC960_V2_LogicalDeviceInfo_T *LogicalDeviceInfo = - Controller->V2.LogicalDeviceInformation[LogicalDriveNumber]; - static const unsigned char *ReadCacheStatus[] = { - "Read Cache Disabled", - "Read Cache Enabled", - "Read Ahead Enabled", - "Intelligent Read Ahead Enabled", - "-", "-", "-", "-" - }; - static const unsigned char *WriteCacheStatus[] = { - "Write Cache Disabled", - "Logical Device Read Only", - "Write Cache Enabled", - "Intelligent Write Cache Enabled", - "-", "-", "-", "-" - }; - unsigned char *GeometryTranslation; - if (LogicalDeviceInfo == NULL) continue; - switch (LogicalDeviceInfo->DriveGeometry) - { - case DAC960_V2_Geometry_128_32: - GeometryTranslation = "128/32"; - break; - case DAC960_V2_Geometry_255_63: - GeometryTranslation = "255/63"; - break; - default: - GeometryTranslation = "Invalid"; - DAC960_Error("Illegal Logical Device Geometry %d\n", - Controller, LogicalDeviceInfo->DriveGeometry); - break; - } - DAC960_Info(" /dev/rd/c%dd%d: RAID-%d, %s, %u blocks\n", - Controller, Controller->ControllerNumber, LogicalDriveNumber, - LogicalDeviceInfo->RAIDLevel, - (LogicalDeviceInfo->LogicalDeviceState - == DAC960_V2_LogicalDevice_Online - ? "Online" - : LogicalDeviceInfo->LogicalDeviceState - == DAC960_V2_LogicalDevice_Critical - ? "Critical" : "Offline"), - LogicalDeviceInfo->ConfigurableDeviceSize); - DAC960_Info(" Logical Device %s, BIOS Geometry: %s\n", - Controller, - (LogicalDeviceInfo->LogicalDeviceControl - .LogicalDeviceInitialized - ? "Initialized" : "Uninitialized"), - GeometryTranslation); - if (LogicalDeviceInfo->StripeSize == 0) - { - if (LogicalDeviceInfo->CacheLineSize == 0) - DAC960_Info(" Stripe Size: N/A, " - "Segment Size: N/A\n", Controller); - else - DAC960_Info(" Stripe Size: N/A, " - "Segment Size: %dKB\n", Controller, - 1 << (LogicalDeviceInfo->CacheLineSize - 2)); - } - else - { - if (LogicalDeviceInfo->CacheLineSize == 0) - DAC960_Info(" Stripe Size: %dKB, " - "Segment Size: N/A\n", Controller, - 1 << (LogicalDeviceInfo->StripeSize - 2)); - else - DAC960_Info(" Stripe Size: %dKB, " - "Segment Size: %dKB\n", Controller, - 1 << (LogicalDeviceInfo->StripeSize - 2), - 1 << (LogicalDeviceInfo->CacheLineSize - 2)); - } - DAC960_Info(" %s, %s\n", Controller, - ReadCacheStatus[ - LogicalDeviceInfo->LogicalDeviceControl.ReadCache], - WriteCacheStatus[ - LogicalDeviceInfo->LogicalDeviceControl.WriteCache]); - if (LogicalDeviceInfo->SoftErrors > 0 || - LogicalDeviceInfo->CommandsFailed > 0 || - LogicalDeviceInfo->DeferredWriteErrors) - DAC960_Info(" Errors - Soft: %d, Failed: %d, " - "Deferred Write: %d\n", Controller, - LogicalDeviceInfo->SoftErrors, - LogicalDeviceInfo->CommandsFailed, - LogicalDeviceInfo->DeferredWriteErrors); - - } - return true; -} - -/* - DAC960_RegisterBlockDevice registers the Block Device structures - associated with Controller. -*/ - -static bool DAC960_RegisterBlockDevice(DAC960_Controller_T *Controller) -{ - int MajorNumber = DAC960_MAJOR + Controller->ControllerNumber; - int n; - - /* - Register the Block Device Major Number for this DAC960 Controller. - */ - if (register_blkdev(MajorNumber, "dac960") < 0) - return false; - - for (n = 0; n < DAC960_MaxLogicalDrives; n++) { - struct gendisk *disk = Controller->disks[n]; - struct request_queue *RequestQueue; - - /* for now, let all request queues share controller's lock */ - RequestQueue = blk_init_queue(DAC960_RequestFunction,&Controller->queue_lock); - if (!RequestQueue) { - printk("DAC960: failure to allocate request queue\n"); - continue; - } - Controller->RequestQueue[n] = RequestQueue; - RequestQueue->queuedata = Controller; - blk_queue_max_segments(RequestQueue, Controller->DriverScatterGatherLimit); - blk_queue_max_hw_sectors(RequestQueue, Controller->MaxBlocksPerCommand); - disk->queue = RequestQueue; - sprintf(disk->disk_name, "rd/c%dd%d", Controller->ControllerNumber, n); - disk->major = MajorNumber; - disk->first_minor = n << DAC960_MaxPartitionsBits; - disk->fops = &DAC960_BlockDeviceOperations; - } - /* - Indicate the Block Device Registration completed successfully, - */ - return true; -} - - -/* - DAC960_UnregisterBlockDevice unregisters the Block Device structures - associated with Controller. -*/ - -static void DAC960_UnregisterBlockDevice(DAC960_Controller_T *Controller) -{ - int MajorNumber = DAC960_MAJOR + Controller->ControllerNumber; - int disk; - - /* does order matter when deleting gendisk and cleanup in request queue? */ - for (disk = 0; disk < DAC960_MaxLogicalDrives; disk++) { - del_gendisk(Controller->disks[disk]); - blk_cleanup_queue(Controller->RequestQueue[disk]); - Controller->RequestQueue[disk] = NULL; - } - - /* - Unregister the Block Device Major Number for this DAC960 Controller. - */ - unregister_blkdev(MajorNumber, "dac960"); -} - -/* - DAC960_ComputeGenericDiskInfo computes the values for the Generic Disk - Information Partition Sector Counts and Block Sizes. -*/ - -static void DAC960_ComputeGenericDiskInfo(DAC960_Controller_T *Controller) -{ - int disk; - for (disk = 0; disk < DAC960_MaxLogicalDrives; disk++) - set_capacity(Controller->disks[disk], disk_size(Controller, disk)); -} - -/* - DAC960_ReportErrorStatus reports Controller BIOS Messages passed through - the Error Status Register when the driver performs the BIOS handshaking. - It returns true for fatal errors and false otherwise. -*/ - -static bool DAC960_ReportErrorStatus(DAC960_Controller_T *Controller, - unsigned char ErrorStatus, - unsigned char Parameter0, - unsigned char Parameter1) -{ - switch (ErrorStatus) - { - case 0x00: - DAC960_Notice("Physical Device %d:%d Not Responding\n", - Controller, Parameter1, Parameter0); - break; - case 0x08: - if (Controller->DriveSpinUpMessageDisplayed) break; - DAC960_Notice("Spinning Up Drives\n", Controller); - Controller->DriveSpinUpMessageDisplayed = true; - break; - case 0x30: - DAC960_Notice("Configuration Checksum Error\n", Controller); - break; - case 0x60: - DAC960_Notice("Mirror Race Recovery Failed\n", Controller); - break; - case 0x70: - DAC960_Notice("Mirror Race Recovery In Progress\n", Controller); - break; - case 0x90: - DAC960_Notice("Physical Device %d:%d COD Mismatch\n", - Controller, Parameter1, Parameter0); - break; - case 0xA0: - DAC960_Notice("Logical Drive Installation Aborted\n", Controller); - break; - case 0xB0: - DAC960_Notice("Mirror Race On A Critical Logical Drive\n", Controller); - break; - case 0xD0: - DAC960_Notice("New Controller Configuration Found\n", Controller); - break; - case 0xF0: - DAC960_Error("Fatal Memory Parity Error for Controller at\n", Controller); - return true; - default: - DAC960_Error("Unknown Initialization Error %02X for Controller at\n", - Controller, ErrorStatus); - return true; - } - return false; -} - - -/* - * DAC960_DetectCleanup releases the resources that were allocated - * during DAC960_DetectController(). DAC960_DetectController can - * has several internal failure points, so not ALL resources may - * have been allocated. It's important to free only - * resources that HAVE been allocated. The code below always - * tests that the resource has been allocated before attempting to - * free it. - */ -static void DAC960_DetectCleanup(DAC960_Controller_T *Controller) -{ - int i; - - /* Free the memory mailbox, status, and related structures */ - free_dma_loaf(Controller->PCIDevice, &Controller->DmaPages); - if (Controller->MemoryMappedAddress) { - switch(Controller->HardwareType) - { - case DAC960_GEM_Controller: - DAC960_GEM_DisableInterrupts(Controller->BaseAddress); - break; - case DAC960_BA_Controller: - DAC960_BA_DisableInterrupts(Controller->BaseAddress); - break; - case DAC960_LP_Controller: - DAC960_LP_DisableInterrupts(Controller->BaseAddress); - break; - case DAC960_LA_Controller: - DAC960_LA_DisableInterrupts(Controller->BaseAddress); - break; - case DAC960_PG_Controller: - DAC960_PG_DisableInterrupts(Controller->BaseAddress); - break; - case DAC960_PD_Controller: - DAC960_PD_DisableInterrupts(Controller->BaseAddress); - break; - case DAC960_P_Controller: - DAC960_PD_DisableInterrupts(Controller->BaseAddress); - break; - } - iounmap(Controller->MemoryMappedAddress); - } - if (Controller->IRQ_Channel) - free_irq(Controller->IRQ_Channel, Controller); - if (Controller->IO_Address) - release_region(Controller->IO_Address, 0x80); - pci_disable_device(Controller->PCIDevice); - for (i = 0; (i < DAC960_MaxLogicalDrives) && Controller->disks[i]; i++) - put_disk(Controller->disks[i]); - DAC960_Controllers[Controller->ControllerNumber] = NULL; - kfree(Controller); -} - - -/* - DAC960_DetectController detects Mylex DAC960/AcceleRAID/eXtremeRAID - PCI RAID Controllers by interrogating the PCI Configuration Space for - Controller Type. -*/ - -static DAC960_Controller_T * -DAC960_DetectController(struct pci_dev *PCI_Device, - const struct pci_device_id *entry) -{ - struct DAC960_privdata *privdata = - (struct DAC960_privdata *)entry->driver_data; - irq_handler_t InterruptHandler = privdata->InterruptHandler; - unsigned int MemoryWindowSize = privdata->MemoryWindowSize; - DAC960_Controller_T *Controller = NULL; - unsigned char DeviceFunction = PCI_Device->devfn; - unsigned char ErrorStatus, Parameter0, Parameter1; - unsigned int IRQ_Channel; - void __iomem *BaseAddress; - int i; - - Controller = kzalloc(sizeof(DAC960_Controller_T), GFP_ATOMIC); - if (Controller == NULL) { - DAC960_Error("Unable to allocate Controller structure for " - "Controller at\n", NULL); - return NULL; - } - Controller->ControllerNumber = DAC960_ControllerCount; - DAC960_Controllers[DAC960_ControllerCount++] = Controller; - Controller->Bus = PCI_Device->bus->number; - Controller->FirmwareType = privdata->FirmwareType; - Controller->HardwareType = privdata->HardwareType; - Controller->Device = DeviceFunction >> 3; - Controller->Function = DeviceFunction & 0x7; - Controller->PCIDevice = PCI_Device; - strcpy(Controller->FullModelName, "DAC960"); - - if (pci_enable_device(PCI_Device)) - goto Failure; - - switch (Controller->HardwareType) - { - case DAC960_GEM_Controller: - Controller->PCI_Address = pci_resource_start(PCI_Device, 0); - break; - case DAC960_BA_Controller: - Controller->PCI_Address = pci_resource_start(PCI_Device, 0); - break; - case DAC960_LP_Controller: - Controller->PCI_Address = pci_resource_start(PCI_Device, 0); - break; - case DAC960_LA_Controller: - Controller->PCI_Address = pci_resource_start(PCI_Device, 0); - break; - case DAC960_PG_Controller: - Controller->PCI_Address = pci_resource_start(PCI_Device, 0); - break; - case DAC960_PD_Controller: - Controller->IO_Address = pci_resource_start(PCI_Device, 0); - Controller->PCI_Address = pci_resource_start(PCI_Device, 1); - break; - case DAC960_P_Controller: - Controller->IO_Address = pci_resource_start(PCI_Device, 0); - Controller->PCI_Address = pci_resource_start(PCI_Device, 1); - break; - } - - pci_set_drvdata(PCI_Device, (void *)((long)Controller->ControllerNumber)); - for (i = 0; i < DAC960_MaxLogicalDrives; i++) { - Controller->disks[i] = alloc_disk(1<<DAC960_MaxPartitionsBits); - if (!Controller->disks[i]) - goto Failure; - Controller->disks[i]->private_data = (void *)((long)i); - } - init_waitqueue_head(&Controller->CommandWaitQueue); - init_waitqueue_head(&Controller->HealthStatusWaitQueue); - spin_lock_init(&Controller->queue_lock); - DAC960_AnnounceDriver(Controller); - /* - Map the Controller Register Window. - */ - if (MemoryWindowSize < PAGE_SIZE) - MemoryWindowSize = PAGE_SIZE; - Controller->MemoryMappedAddress = - ioremap_nocache(Controller->PCI_Address & PAGE_MASK, MemoryWindowSize); - Controller->BaseAddress = - Controller->MemoryMappedAddress + (Controller->PCI_Address & ~PAGE_MASK); - if (Controller->MemoryMappedAddress == NULL) - { - DAC960_Error("Unable to map Controller Register Window for " - "Controller at\n", Controller); - goto Failure; - } - BaseAddress = Controller->BaseAddress; - switch (Controller->HardwareType) - { - case DAC960_GEM_Controller: - DAC960_GEM_DisableInterrupts(BaseAddress); - DAC960_GEM_AcknowledgeHardwareMailboxStatus(BaseAddress); - udelay(1000); - while (DAC960_GEM_InitializationInProgressP(BaseAddress)) - { - if (DAC960_GEM_ReadErrorStatus(BaseAddress, &ErrorStatus, - &Parameter0, &Parameter1) && - DAC960_ReportErrorStatus(Controller, ErrorStatus, - Parameter0, Parameter1)) - goto Failure; - udelay(10); - } - if (!DAC960_V2_EnableMemoryMailboxInterface(Controller)) - { - DAC960_Error("Unable to Enable Memory Mailbox Interface " - "for Controller at\n", Controller); - goto Failure; - } - DAC960_GEM_EnableInterrupts(BaseAddress); - Controller->QueueCommand = DAC960_GEM_QueueCommand; - Controller->ReadControllerConfiguration = - DAC960_V2_ReadControllerConfiguration; - Controller->ReadDeviceConfiguration = - DAC960_V2_ReadDeviceConfiguration; - Controller->ReportDeviceConfiguration = - DAC960_V2_ReportDeviceConfiguration; - Controller->QueueReadWriteCommand = - DAC960_V2_QueueReadWriteCommand; - break; - case DAC960_BA_Controller: - DAC960_BA_DisableInterrupts(BaseAddress); - DAC960_BA_AcknowledgeHardwareMailboxStatus(BaseAddress); - udelay(1000); - while (DAC960_BA_InitializationInProgressP(BaseAddress)) - { - if (DAC960_BA_ReadErrorStatus(BaseAddress, &ErrorStatus, - &Parameter0, &Parameter1) && - DAC960_ReportErrorStatus(Controller, ErrorStatus, - Parameter0, Parameter1)) - goto Failure; - udelay(10); - } - if (!DAC960_V2_EnableMemoryMailboxInterface(Controller)) - { - DAC960_Error("Unable to Enable Memory Mailbox Interface " - "for Controller at\n", Controller); - goto Failure; - } - DAC960_BA_EnableInterrupts(BaseAddress); - Controller->QueueCommand = DAC960_BA_QueueCommand; - Controller->ReadControllerConfiguration = - DAC960_V2_ReadControllerConfiguration; - Controller->ReadDeviceConfiguration = - DAC960_V2_ReadDeviceConfiguration; - Controller->ReportDeviceConfiguration = - DAC960_V2_ReportDeviceConfiguration; - Controller->QueueReadWriteCommand = - DAC960_V2_QueueReadWriteCommand; - break; - case DAC960_LP_Controller: - DAC960_LP_DisableInterrupts(BaseAddress); - DAC960_LP_AcknowledgeHardwareMailboxStatus(BaseAddress); - udelay(1000); - while (DAC960_LP_InitializationInProgressP(BaseAddress)) - { - if (DAC960_LP_ReadErrorStatus(BaseAddress, &ErrorStatus, - &Parameter0, &Parameter1) && - DAC960_ReportErrorStatus(Controller, ErrorStatus, - Parameter0, Parameter1)) - goto Failure; - udelay(10); - } - if (!DAC960_V2_EnableMemoryMailboxInterface(Controller)) - { - DAC960_Error("Unable to Enable Memory Mailbox Interface " - "for Controller at\n", Controller); - goto Failure; - } - DAC960_LP_EnableInterrupts(BaseAddress); - Controller->QueueCommand = DAC960_LP_QueueCommand; - Controller->ReadControllerConfiguration = - DAC960_V2_ReadControllerConfiguration; - Controller->ReadDeviceConfiguration = - DAC960_V2_ReadDeviceConfiguration; - Controller->ReportDeviceConfiguration = - DAC960_V2_ReportDeviceConfiguration; - Controller->QueueReadWriteCommand = - DAC960_V2_QueueReadWriteCommand; - break; - case DAC960_LA_Controller: - DAC960_LA_DisableInterrupts(BaseAddress); - DAC960_LA_AcknowledgeHardwareMailboxStatus(BaseAddress); - udelay(1000); - while (DAC960_LA_InitializationInProgressP(BaseAddress)) - { - if (DAC960_LA_ReadErrorStatus(BaseAddress, &ErrorStatus, - &Parameter0, &Parameter1) && - DAC960_ReportErrorStatus(Controller, ErrorStatus, - Parameter0, Parameter1)) - goto Failure; - udelay(10); - } - if (!DAC960_V1_EnableMemoryMailboxInterface(Controller)) - { - DAC960_Error("Unable to Enable Memory Mailbox Interface " - "for Controller at\n", Controller); - goto Failure; - } - DAC960_LA_EnableInterrupts(BaseAddress); - if (Controller->V1.DualModeMemoryMailboxInterface) - Controller->QueueCommand = DAC960_LA_QueueCommandDualMode; - else Controller->QueueCommand = DAC960_LA_QueueCommandSingleMode; - Controller->ReadControllerConfiguration = - DAC960_V1_ReadControllerConfiguration; - Controller->ReadDeviceConfiguration = - DAC960_V1_ReadDeviceConfiguration; - Controller->ReportDeviceConfiguration = - DAC960_V1_ReportDeviceConfiguration; - Controller->QueueReadWriteCommand = - DAC960_V1_QueueReadWriteCommand; - break; - case DAC960_PG_Controller: - DAC960_PG_DisableInterrupts(BaseAddress); - DAC960_PG_AcknowledgeHardwareMailboxStatus(BaseAddress); - udelay(1000); - while (DAC960_PG_InitializationInProgressP(BaseAddress)) - { - if (DAC960_PG_ReadErrorStatus(BaseAddress, &ErrorStatus, - &Parameter0, &Parameter1) && - DAC960_ReportErrorStatus(Controller, ErrorStatus, - Parameter0, Parameter1)) - goto Failure; - udelay(10); - } - if (!DAC960_V1_EnableMemoryMailboxInterface(Controller)) - { - DAC960_Error("Unable to Enable Memory Mailbox Interface " - "for Controller at\n", Controller); - goto Failure; - } - DAC960_PG_EnableInterrupts(BaseAddress); - if (Controller->V1.DualModeMemoryMailboxInterface) - Controller->QueueCommand = DAC960_PG_QueueCommandDualMode; - else Controller->QueueCommand = DAC960_PG_QueueCommandSingleMode; - Controller->ReadControllerConfiguration = - DAC960_V1_ReadControllerConfiguration; - Controller->ReadDeviceConfiguration = - DAC960_V1_ReadDeviceConfiguration; - Controller->ReportDeviceConfiguration = - DAC960_V1_ReportDeviceConfiguration; - Controller->QueueReadWriteCommand = - DAC960_V1_QueueReadWriteCommand; - break; - case DAC960_PD_Controller: - if (!request_region(Controller->IO_Address, 0x80, - Controller->FullModelName)) { - DAC960_Error("IO port 0x%lx busy for Controller at\n", - Controller, Controller->IO_Address); - goto Failure; - } - DAC960_PD_DisableInterrupts(BaseAddress); - DAC960_PD_AcknowledgeStatus(BaseAddress); - udelay(1000); - while (DAC960_PD_InitializationInProgressP(BaseAddress)) - { - if (DAC960_PD_ReadErrorStatus(BaseAddress, &ErrorStatus, - &Parameter0, &Parameter1) && - DAC960_ReportErrorStatus(Controller, ErrorStatus, - Parameter0, Parameter1)) - goto Failure; - udelay(10); - } - if (!DAC960_V1_EnableMemoryMailboxInterface(Controller)) - { - DAC960_Error("Unable to allocate DMA mapped memory " - "for Controller at\n", Controller); - goto Failure; - } - DAC960_PD_EnableInterrupts(BaseAddress); - Controller->QueueCommand = DAC960_PD_QueueCommand; - Controller->ReadControllerConfiguration = - DAC960_V1_ReadControllerConfiguration; - Controller->ReadDeviceConfiguration = - DAC960_V1_ReadDeviceConfiguration; - Controller->ReportDeviceConfiguration = - DAC960_V1_ReportDeviceConfiguration; - Controller->QueueReadWriteCommand = - DAC960_V1_QueueReadWriteCommand; - break; - case DAC960_P_Controller: - if (!request_region(Controller->IO_Address, 0x80, - Controller->FullModelName)){ - DAC960_Error("IO port 0x%lx busy for Controller at\n", - Controller, Controller->IO_Address); - goto Failure; - } - DAC960_PD_DisableInterrupts(BaseAddress); - DAC960_PD_AcknowledgeStatus(BaseAddress); - udelay(1000); - while (DAC960_PD_InitializationInProgressP(BaseAddress)) - { - if (DAC960_PD_ReadErrorStatus(BaseAddress, &ErrorStatus, - &Parameter0, &Parameter1) && - DAC960_ReportErrorStatus(Controller, ErrorStatus, - Parameter0, Parameter1)) - goto Failure; - udelay(10); - } - if (!DAC960_V1_EnableMemoryMailboxInterface(Controller)) - { - DAC960_Error("Unable to allocate DMA mapped memory" - "for Controller at\n", Controller); - goto Failure; - } - DAC960_PD_EnableInterrupts(BaseAddress); - Controller->QueueCommand = DAC960_P_QueueCommand; - Controller->ReadControllerConfiguration = - DAC960_V1_ReadControllerConfiguration; - Controller->ReadDeviceConfiguration = - DAC960_V1_ReadDeviceConfiguration; - Controller->ReportDeviceConfiguration = - DAC960_V1_ReportDeviceConfiguration; - Controller->QueueReadWriteCommand = - DAC960_V1_QueueReadWriteCommand; - break; - } - /* - Acquire shared access to the IRQ Channel. - */ - IRQ_Channel = PCI_Device->irq; - if (request_irq(IRQ_Channel, InterruptHandler, IRQF_SHARED, - Controller->FullModelName, Controller) < 0) - { - DAC960_Error("Unable to acquire IRQ Channel %d for Controller at\n", - Controller, Controller->IRQ_Channel); - goto Failure; - } - Controller->IRQ_Channel = IRQ_Channel; - Controller->InitialCommand.CommandIdentifier = 1; - Controller->InitialCommand.Controller = Controller; - Controller->Commands[0] = &Controller->InitialCommand; - Controller->FreeCommands = &Controller->InitialCommand; - return Controller; - -Failure: - if (Controller->IO_Address == 0) - DAC960_Error("PCI Bus %d Device %d Function %d I/O Address N/A " - "PCI Address 0x%X\n", Controller, - Controller->Bus, Controller->Device, - Controller->Function, Controller->PCI_Address); - else - DAC960_Error("PCI Bus %d Device %d Function %d I/O Address " - "0x%X PCI Address 0x%X\n", Controller, - Controller->Bus, Controller->Device, - Controller->Function, Controller->IO_Address, - Controller->PCI_Address); - DAC960_DetectCleanup(Controller); - DAC960_ControllerCount--; - return NULL; -} - -/* - DAC960_InitializeController initializes Controller. -*/ - -static bool -DAC960_InitializeController(DAC960_Controller_T *Controller) -{ - if (DAC960_ReadControllerConfiguration(Controller) && - DAC960_ReportControllerConfiguration(Controller) && - DAC960_CreateAuxiliaryStructures(Controller) && - DAC960_ReadDeviceConfiguration(Controller) && - DAC960_ReportDeviceConfiguration(Controller) && - DAC960_RegisterBlockDevice(Controller)) - { - /* - Initialize the Monitoring Timer. - */ - timer_setup(&Controller->MonitoringTimer, - DAC960_MonitoringTimerFunction, 0); - Controller->MonitoringTimer.expires = - jiffies + DAC960_MonitoringTimerInterval; - add_timer(&Controller->MonitoringTimer); - Controller->ControllerInitialized = true; - return true; - } - return false; -} - - -/* - DAC960_FinalizeController finalizes Controller. -*/ - -static void DAC960_FinalizeController(DAC960_Controller_T *Controller) -{ - if (Controller->ControllerInitialized) - { - unsigned long flags; - - /* - * Acquiring and releasing lock here eliminates - * a very low probability race. - * - * The code below allocates controller command structures - * from the free list without holding the controller lock. - * This is safe assuming there is no other activity on - * the controller at the time. - * - * But, there might be a monitoring command still - * in progress. Setting the Shutdown flag while holding - * the lock ensures that there is no monitoring command - * in the interrupt handler currently, and any monitoring - * commands that complete from this time on will NOT return - * their command structure to the free list. - */ - - spin_lock_irqsave(&Controller->queue_lock, flags); - Controller->ShutdownMonitoringTimer = 1; - spin_unlock_irqrestore(&Controller->queue_lock, flags); - - del_timer_sync(&Controller->MonitoringTimer); - if (Controller->FirmwareType == DAC960_V1_Controller) - { - DAC960_Notice("Flushing Cache...", Controller); - DAC960_V1_ExecuteType3(Controller, DAC960_V1_Flush, 0); - DAC960_Notice("done\n", Controller); - - if (Controller->HardwareType == DAC960_PD_Controller) - release_region(Controller->IO_Address, 0x80); - } - else - { - DAC960_Notice("Flushing Cache...", Controller); - DAC960_V2_DeviceOperation(Controller, DAC960_V2_PauseDevice, - DAC960_V2_RAID_Controller); - DAC960_Notice("done\n", Controller); - } - } - DAC960_UnregisterBlockDevice(Controller); - DAC960_DestroyAuxiliaryStructures(Controller); - DAC960_DestroyProcEntries(Controller); - DAC960_DetectCleanup(Controller); -} - - -/* - DAC960_Probe verifies controller's existence and - initializes the DAC960 Driver for that controller. -*/ - -static int -DAC960_Probe(struct pci_dev *dev, const struct pci_device_id *entry) -{ - int disk; - DAC960_Controller_T *Controller; - - if (DAC960_ControllerCount == DAC960_MaxControllers) - { - DAC960_Error("More than %d DAC960 Controllers detected - " - "ignoring from Controller at\n", - NULL, DAC960_MaxControllers); - return -ENODEV; - } - - Controller = DAC960_DetectController(dev, entry); - if (!Controller) - return -ENODEV; - - if (!DAC960_InitializeController(Controller)) { - DAC960_FinalizeController(Controller); - return -ENODEV; - } - - for (disk = 0; disk < DAC960_MaxLogicalDrives; disk++) { - set_capacity(Controller->disks[disk], disk_size(Controller, disk)); - add_disk(Controller->disks[disk]); - } - DAC960_CreateProcEntries(Controller); - return 0; -} - - -/* - DAC960_Finalize finalizes the DAC960 Driver. -*/ - -static void DAC960_Remove(struct pci_dev *PCI_Device) -{ - int Controller_Number = (long)pci_get_drvdata(PCI_Device); - DAC960_Controller_T *Controller = DAC960_Controllers[Controller_Number]; - if (Controller != NULL) - DAC960_FinalizeController(Controller); -} - - -/* - DAC960_V1_QueueReadWriteCommand prepares and queues a Read/Write Command for - DAC960 V1 Firmware Controllers. -*/ - -static void DAC960_V1_QueueReadWriteCommand(DAC960_Command_T *Command) -{ - DAC960_Controller_T *Controller = Command->Controller; - DAC960_V1_CommandMailbox_T *CommandMailbox = &Command->V1.CommandMailbox; - DAC960_V1_ScatterGatherSegment_T *ScatterGatherList = - Command->V1.ScatterGatherList; - struct scatterlist *ScatterList = Command->V1.ScatterList; - - DAC960_V1_ClearCommand(Command); - - if (Command->SegmentCount == 1) - { - if (Command->DmaDirection == PCI_DMA_FROMDEVICE) - CommandMailbox->Type5.CommandOpcode = DAC960_V1_Read; - else - CommandMailbox->Type5.CommandOpcode = DAC960_V1_Write; - - CommandMailbox->Type5.LD.TransferLength = Command->BlockCount; - CommandMailbox->Type5.LD.LogicalDriveNumber = Command->LogicalDriveNumber; - CommandMailbox->Type5.LogicalBlockAddress = Command->BlockNumber; - CommandMailbox->Type5.BusAddress = - (DAC960_BusAddress32_T)sg_dma_address(ScatterList); - } - else - { - int i; - - if (Command->DmaDirection == PCI_DMA_FROMDEVICE) - CommandMailbox->Type5.CommandOpcode = DAC960_V1_ReadWithScatterGather; - else - CommandMailbox->Type5.CommandOpcode = DAC960_V1_WriteWithScatterGather; - - CommandMailbox->Type5.LD.TransferLength = Command->BlockCount; - CommandMailbox->Type5.LD.LogicalDriveNumber = Command->LogicalDriveNumber; - CommandMailbox->Type5.LogicalBlockAddress = Command->BlockNumber; - CommandMailbox->Type5.BusAddress = Command->V1.ScatterGatherListDMA; - - CommandMailbox->Type5.ScatterGatherCount = Command->SegmentCount; - - for (i = 0; i < Command->SegmentCount; i++, ScatterList++, ScatterGatherList++) { - ScatterGatherList->SegmentDataPointer = - (DAC960_BusAddress32_T)sg_dma_address(ScatterList); - ScatterGatherList->SegmentByteCount = - (DAC960_ByteCount32_T)sg_dma_len(ScatterList); - } - } - DAC960_QueueCommand(Command); -} - - -/* - DAC960_V2_QueueReadWriteCommand prepares and queues a Read/Write Command for - DAC960 V2 Firmware Controllers. -*/ - -static void DAC960_V2_QueueReadWriteCommand(DAC960_Command_T *Command) -{ - DAC960_Controller_T *Controller = Command->Controller; - DAC960_V2_CommandMailbox_T *CommandMailbox = &Command->V2.CommandMailbox; - struct scatterlist *ScatterList = Command->V2.ScatterList; - - DAC960_V2_ClearCommand(Command); - - CommandMailbox->SCSI_10.CommandOpcode = DAC960_V2_SCSI_10; - CommandMailbox->SCSI_10.CommandControlBits.DataTransferControllerToHost = - (Command->DmaDirection == PCI_DMA_FROMDEVICE); - CommandMailbox->SCSI_10.DataTransferSize = - Command->BlockCount << DAC960_BlockSizeBits; - CommandMailbox->SCSI_10.RequestSenseBusAddress = Command->V2.RequestSenseDMA; - CommandMailbox->SCSI_10.PhysicalDevice = - Controller->V2.LogicalDriveToVirtualDevice[Command->LogicalDriveNumber]; - CommandMailbox->SCSI_10.RequestSenseSize = sizeof(DAC960_SCSI_RequestSense_T); - CommandMailbox->SCSI_10.CDBLength = 10; - CommandMailbox->SCSI_10.SCSI_CDB[0] = - (Command->DmaDirection == PCI_DMA_FROMDEVICE ? 0x28 : 0x2A); - CommandMailbox->SCSI_10.SCSI_CDB[2] = Command->BlockNumber >> 24; - CommandMailbox->SCSI_10.SCSI_CDB[3] = Command->BlockNumber >> 16; - CommandMailbox->SCSI_10.SCSI_CDB[4] = Command->BlockNumber >> 8; - CommandMailbox->SCSI_10.SCSI_CDB[5] = Command->BlockNumber; - CommandMailbox->SCSI_10.SCSI_CDB[7] = Command->BlockCount >> 8; - CommandMailbox->SCSI_10.SCSI_CDB[8] = Command->BlockCount; - - if (Command->SegmentCount == 1) - { - CommandMailbox->SCSI_10.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentDataPointer = - (DAC960_BusAddress64_T)sg_dma_address(ScatterList); - CommandMailbox->SCSI_10.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentByteCount = - CommandMailbox->SCSI_10.DataTransferSize; - } - else - { - DAC960_V2_ScatterGatherSegment_T *ScatterGatherList; - int i; - - if (Command->SegmentCount > 2) - { - ScatterGatherList = Command->V2.ScatterGatherList; - CommandMailbox->SCSI_10.CommandControlBits - .AdditionalScatterGatherListMemory = true; - CommandMailbox->SCSI_10.DataTransferMemoryAddress - .ExtendedScatterGather.ScatterGatherList0Length = Command->SegmentCount; - CommandMailbox->SCSI_10.DataTransferMemoryAddress - .ExtendedScatterGather.ScatterGatherList0Address = - Command->V2.ScatterGatherListDMA; - } - else - ScatterGatherList = CommandMailbox->SCSI_10.DataTransferMemoryAddress - .ScatterGatherSegments; - - for (i = 0; i < Command->SegmentCount; i++, ScatterList++, ScatterGatherList++) { - ScatterGatherList->SegmentDataPointer = - (DAC960_BusAddress64_T)sg_dma_address(ScatterList); - ScatterGatherList->SegmentByteCount = - (DAC960_ByteCount64_T)sg_dma_len(ScatterList); - } - } - DAC960_QueueCommand(Command); -} - - -static int DAC960_process_queue(DAC960_Controller_T *Controller, struct request_queue *req_q) -{ - struct request *Request; - DAC960_Command_T *Command; - - while(1) { - Request = blk_peek_request(req_q); - if (!Request) - return 1; - - Command = DAC960_AllocateCommand(Controller); - if (Command == NULL) - return 0; - - if (rq_data_dir(Request) == READ) { - Command->DmaDirection = PCI_DMA_FROMDEVICE; - Command->CommandType = DAC960_ReadCommand; - } else { - Command->DmaDirection = PCI_DMA_TODEVICE; - Command->CommandType = DAC960_WriteCommand; - } - Command->Completion = Request->end_io_data; - Command->LogicalDriveNumber = (long)Request->rq_disk->private_data; - Command->BlockNumber = blk_rq_pos(Request); - Command->BlockCount = blk_rq_sectors(Request); - Command->Request = Request; - blk_start_request(Request); - Command->SegmentCount = blk_rq_map_sg(req_q, - Command->Request, Command->cmd_sglist); - /* pci_map_sg MAY change the value of SegCount */ - Command->SegmentCount = pci_map_sg(Controller->PCIDevice, Command->cmd_sglist, - Command->SegmentCount, Command->DmaDirection); - - DAC960_QueueReadWriteCommand(Command); - } -} - -/* - DAC960_ProcessRequest attempts to remove one I/O Request from Controller's - I/O Request Queue and queues it to the Controller. WaitForCommand is true if - this function should wait for a Command to become available if necessary. - This function returns true if an I/O Request was queued and false otherwise. -*/ -static void DAC960_ProcessRequest(DAC960_Controller_T *controller) -{ - int i; - - if (!controller->ControllerInitialized) - return; - - /* Do this better later! */ - for (i = controller->req_q_index; i < DAC960_MaxLogicalDrives; i++) { - struct request_queue *req_q = controller->RequestQueue[i]; - - if (req_q == NULL) - continue; - - if (!DAC960_process_queue(controller, req_q)) { - controller->req_q_index = i; - return; - } - } - - if (controller->req_q_index == 0) - return; - - for (i = 0; i < controller->req_q_index; i++) { - struct request_queue *req_q = controller->RequestQueue[i]; - - if (req_q == NULL) - continue; - - if (!DAC960_process_queue(controller, req_q)) { - controller->req_q_index = i; - return; - } - } -} - - -/* - DAC960_queue_partial_rw extracts one bio from the request already - associated with argument command, and construct a new command block to retry I/O - only on that bio. Queue that command to the controller. - - This function re-uses a previously-allocated Command, - there is no failure mode from trying to allocate a command. -*/ - -static void DAC960_queue_partial_rw(DAC960_Command_T *Command) -{ - DAC960_Controller_T *Controller = Command->Controller; - struct request *Request = Command->Request; - struct request_queue *req_q = Controller->RequestQueue[Command->LogicalDriveNumber]; - - if (Command->DmaDirection == PCI_DMA_FROMDEVICE) - Command->CommandType = DAC960_ReadRetryCommand; - else - Command->CommandType = DAC960_WriteRetryCommand; - - /* - * We could be more efficient with these mapping requests - * and map only the portions that we need. But since this - * code should almost never be called, just go with a - * simple coding. - */ - (void)blk_rq_map_sg(req_q, Command->Request, Command->cmd_sglist); - - (void)pci_map_sg(Controller->PCIDevice, Command->cmd_sglist, 1, Command->DmaDirection); - /* - * Resubmitting the request sector at a time is really tedious. - * But, this should almost never happen. So, we're willing to pay - * this price so that in the end, as much of the transfer is completed - * successfully as possible. - */ - Command->SegmentCount = 1; - Command->BlockNumber = blk_rq_pos(Request); - Command->BlockCount = 1; - DAC960_QueueReadWriteCommand(Command); - return; -} - -/* - DAC960_RequestFunction is the I/O Request Function for DAC960 Controllers. -*/ - -static void DAC960_RequestFunction(struct request_queue *RequestQueue) -{ - DAC960_ProcessRequest(RequestQueue->queuedata); -} - -/* - DAC960_ProcessCompletedBuffer performs completion processing for an - individual Buffer. -*/ - -static inline bool DAC960_ProcessCompletedRequest(DAC960_Command_T *Command, - bool SuccessfulIO) -{ - struct request *Request = Command->Request; - blk_status_t Error = SuccessfulIO ? BLK_STS_OK : BLK_STS_IOERR; - - pci_unmap_sg(Command->Controller->PCIDevice, Command->cmd_sglist, - Command->SegmentCount, Command->DmaDirection); - - if (!__blk_end_request(Request, Error, Command->BlockCount << 9)) { - if (Command->Completion) { - complete(Command->Completion); - Command->Completion = NULL; - } - return true; - } - return false; -} - -/* - DAC960_V1_ReadWriteError prints an appropriate error message for Command - when an error occurs on a Read or Write operation. -*/ - -static void DAC960_V1_ReadWriteError(DAC960_Command_T *Command) -{ - DAC960_Controller_T *Controller = Command->Controller; - unsigned char *CommandName = "UNKNOWN"; - switch (Command->CommandType) - { - case DAC960_ReadCommand: - case DAC960_ReadRetryCommand: - CommandName = "READ"; - break; - case DAC960_WriteCommand: - case DAC960_WriteRetryCommand: - CommandName = "WRITE"; - break; - case DAC960_MonitoringCommand: - case DAC960_ImmediateCommand: - case DAC960_QueuedCommand: - break; - } - switch (Command->V1.CommandStatus) - { - case DAC960_V1_IrrecoverableDataError: - DAC960_Error("Irrecoverable Data Error on %s:\n", - Controller, CommandName); - break; - case DAC960_V1_LogicalDriveNonexistentOrOffline: - DAC960_Error("Logical Drive Nonexistent or Offline on %s:\n", - Controller, CommandName); - break; - case DAC960_V1_AccessBeyondEndOfLogicalDrive: - DAC960_Error("Attempt to Access Beyond End of Logical Drive " - "on %s:\n", Controller, CommandName); - break; - case DAC960_V1_BadDataEncountered: - DAC960_Error("Bad Data Encountered on %s:\n", Controller, CommandName); - break; - default: - DAC960_Error("Unexpected Error Status %04X on %s:\n", - Controller, Command->V1.CommandStatus, CommandName); - break; - } - DAC960_Error(" /dev/rd/c%dd%d: absolute blocks %u..%u\n", - Controller, Controller->ControllerNumber, - Command->LogicalDriveNumber, Command->BlockNumber, - Command->BlockNumber + Command->BlockCount - 1); -} - - -/* - DAC960_V1_ProcessCompletedCommand performs completion processing for Command - for DAC960 V1 Firmware Controllers. -*/ - -static void DAC960_V1_ProcessCompletedCommand(DAC960_Command_T *Command) -{ - DAC960_Controller_T *Controller = Command->Controller; - DAC960_CommandType_T CommandType = Command->CommandType; - DAC960_V1_CommandOpcode_T CommandOpcode = - Command->V1.CommandMailbox.Common.CommandOpcode; - DAC960_V1_CommandStatus_T CommandStatus = Command->V1.CommandStatus; - - if (CommandType == DAC960_ReadCommand || - CommandType == DAC960_WriteCommand) - { - -#ifdef FORCE_RETRY_DEBUG - CommandStatus = DAC960_V1_IrrecoverableDataError; -#endif - - if (CommandStatus == DAC960_V1_NormalCompletion) { - - if (!DAC960_ProcessCompletedRequest(Command, true)) - BUG(); - - } else if (CommandStatus == DAC960_V1_IrrecoverableDataError || - CommandStatus == DAC960_V1_BadDataEncountered) - { - /* - * break the command down into pieces and resubmit each - * piece, hoping that some of them will succeed. - */ - DAC960_queue_partial_rw(Command); - return; - } - else - { - if (CommandStatus != DAC960_V1_LogicalDriveNonexistentOrOffline) - DAC960_V1_ReadWriteError(Command); - - if (!DAC960_ProcessCompletedRequest(Command, false)) - BUG(); - } - } - else if (CommandType == DAC960_ReadRetryCommand || - CommandType == DAC960_WriteRetryCommand) - { - bool normal_completion; -#ifdef FORCE_RETRY_FAILURE_DEBUG - static int retry_count = 1; -#endif - /* - Perform completion processing for the portion that was - retried, and submit the next portion, if any. - */ - normal_completion = true; - if (CommandStatus != DAC960_V1_NormalCompletion) { - normal_completion = false; - if (CommandStatus != DAC960_V1_LogicalDriveNonexistentOrOffline) - DAC960_V1_ReadWriteError(Command); - } - -#ifdef FORCE_RETRY_FAILURE_DEBUG - if (!(++retry_count % 10000)) { - printk("V1 error retry failure test\n"); - normal_completion = false; - DAC960_V1_ReadWriteError(Command); - } -#endif - - if (!DAC960_ProcessCompletedRequest(Command, normal_completion)) { - DAC960_queue_partial_rw(Command); - return; - } - } - - else if (CommandType == DAC960_MonitoringCommand) - { - if (Controller->ShutdownMonitoringTimer) - return; - if (CommandOpcode == DAC960_V1_Enquiry) - { - DAC960_V1_Enquiry_T *OldEnquiry = &Controller->V1.Enquiry; - DAC960_V1_Enquiry_T *NewEnquiry = Controller->V1.NewEnquiry; - unsigned int OldCriticalLogicalDriveCount = - OldEnquiry->CriticalLogicalDriveCount; - unsigned int NewCriticalLogicalDriveCount = - NewEnquiry->CriticalLogicalDriveCount; - if (NewEnquiry->NumberOfLogicalDrives > Controller->LogicalDriveCount) - { - int LogicalDriveNumber = Controller->LogicalDriveCount - 1; - while (++LogicalDriveNumber < NewEnquiry->NumberOfLogicalDrives) - DAC960_Critical("Logical Drive %d (/dev/rd/c%dd%d) " - "Now Exists\n", Controller, - LogicalDriveNumber, - Controller->ControllerNumber, - LogicalDriveNumber); - Controller->LogicalDriveCount = NewEnquiry->NumberOfLogicalDrives; - DAC960_ComputeGenericDiskInfo(Controller); - } - if (NewEnquiry->NumberOfLogicalDrives < Controller->LogicalDriveCount) - { - int LogicalDriveNumber = NewEnquiry->NumberOfLogicalDrives - 1; - while (++LogicalDriveNumber < Controller->LogicalDriveCount) - DAC960_Critical("Logical Drive %d (/dev/rd/c%dd%d) " - "No Longer Exists\n", Controller, - LogicalDriveNumber, - Controller->ControllerNumber, - LogicalDriveNumber); - Controller->LogicalDriveCount = NewEnquiry->NumberOfLogicalDrives; - DAC960_ComputeGenericDiskInfo(Controller); - } - if (NewEnquiry->StatusFlags.DeferredWriteError != - OldEnquiry->StatusFlags.DeferredWriteError) - DAC960_Critical("Deferred Write Error Flag is now %s\n", Controller, - (NewEnquiry->StatusFlags.DeferredWriteError - ? "TRUE" : "FALSE")); - if ((NewCriticalLogicalDriveCount > 0 || - NewCriticalLogicalDriveCount != OldCriticalLogicalDriveCount) || - (NewEnquiry->OfflineLogicalDriveCount > 0 || - NewEnquiry->OfflineLogicalDriveCount != - OldEnquiry->OfflineLogicalDriveCount) || - (NewEnquiry->DeadDriveCount > 0 || - NewEnquiry->DeadDriveCount != - OldEnquiry->DeadDriveCount) || - (NewEnquiry->EventLogSequenceNumber != - OldEnquiry->EventLogSequenceNumber) || - Controller->MonitoringTimerCount == 0 || - time_after_eq(jiffies, Controller->SecondaryMonitoringTime - + DAC960_SecondaryMonitoringInterval)) - { - Controller->V1.NeedLogicalDriveInformation = true; - Controller->V1.NewEventLogSequenceNumber = - NewEnquiry->EventLogSequenceNumber; - Controller->V1.NeedErrorTableInformation = true; - Controller->V1.NeedDeviceStateInformation = true; - Controller->V1.StartDeviceStateScan = true; - Controller->V1.NeedBackgroundInitializationStatus = - Controller->V1.BackgroundInitializationStatusSupported; - Controller->SecondaryMonitoringTime = jiffies; - } - if (NewEnquiry->RebuildFlag == DAC960_V1_StandbyRebuildInProgress || - NewEnquiry->RebuildFlag - == DAC960_V1_BackgroundRebuildInProgress || - OldEnquiry->RebuildFlag == DAC960_V1_StandbyRebuildInProgress || - OldEnquiry->RebuildFlag == DAC960_V1_BackgroundRebuildInProgress) - { - Controller->V1.NeedRebuildProgress = true; - Controller->V1.RebuildProgressFirst = - (NewEnquiry->CriticalLogicalDriveCount < - OldEnquiry->CriticalLogicalDriveCount); - } - if (OldEnquiry->RebuildFlag == DAC960_V1_BackgroundCheckInProgress) - switch (NewEnquiry->RebuildFlag) - { - case DAC960_V1_NoStandbyRebuildOrCheckInProgress: - DAC960_Progress("Consistency Check Completed Successfully\n", - Controller); - break; - case DAC960_V1_StandbyRebuildInProgress: - case DAC960_V1_BackgroundRebuildInProgress: - break; - case DAC960_V1_BackgroundCheckInProgress: - Controller->V1.NeedConsistencyCheckProgress = true; - break; - case DAC960_V1_StandbyRebuildCompletedWithError: - DAC960_Progress("Consistency Check Completed with Error\n", - Controller); - break; - case DAC960_V1_BackgroundRebuildOrCheckFailed_DriveFailed: - DAC960_Progress("Consistency Check Failed - " - "Physical Device Failed\n", Controller); - break; - case DAC960_V1_BackgroundRebuildOrCheckFailed_LogicalDriveFailed: - DAC960_Progress("Consistency Check Failed - " - "Logical Drive Failed\n", Controller); - break; - case DAC960_V1_BackgroundRebuildOrCheckFailed_OtherCauses: - DAC960_Progress("Consistency Check Failed - Other Causes\n", - Controller); - break; - case DAC960_V1_BackgroundRebuildOrCheckSuccessfullyTerminated: - DAC960_Progress("Consistency Check Successfully Terminated\n", - Controller); - break; - } - else if (NewEnquiry->RebuildFlag - == DAC960_V1_BackgroundCheckInProgress) - Controller->V1.NeedConsistencyCheckProgress = true; - Controller->MonitoringAlertMode = - (NewEnquiry->CriticalLogicalDriveCount > 0 || - NewEnquiry->OfflineLogicalDriveCount > 0 || - NewEnquiry->DeadDriveCount > 0); - if (NewEnquiry->RebuildFlag > DAC960_V1_BackgroundCheckInProgress) - { - Controller->V1.PendingRebuildFlag = NewEnquiry->RebuildFlag; - Controller->V1.RebuildFlagPending = true; - } - memcpy(&Controller->V1.Enquiry, &Controller->V1.NewEnquiry, - sizeof(DAC960_V1_Enquiry_T)); - } - else if (CommandOpcode == DAC960_V1_PerformEventLogOperation) - { - static char - *DAC960_EventMessages[] = - { "killed because write recovery failed", - "killed because of SCSI bus reset failure", - "killed because of double check condition", - "killed because it was removed", - "killed because of gross error on SCSI chip", - "killed because of bad tag returned from drive", - "killed because of timeout on SCSI command", - "killed because of reset SCSI command issued from system", - "killed because busy or parity error count exceeded limit", - "killed because of 'kill drive' command from system", - "killed because of selection timeout", - "killed due to SCSI phase sequence error", - "killed due to unknown status" }; - DAC960_V1_EventLogEntry_T *EventLogEntry = - Controller->V1.EventLogEntry; - if (EventLogEntry->SequenceNumber == - Controller->V1.OldEventLogSequenceNumber) - { - unsigned char SenseKey = EventLogEntry->SenseKey; - unsigned char AdditionalSenseCode = - EventLogEntry->AdditionalSenseCode; - unsigned char AdditionalSenseCodeQualifier = - EventLogEntry->AdditionalSenseCodeQualifier; - if (SenseKey == DAC960_SenseKey_VendorSpecific && - AdditionalSenseCode == 0x80 && - AdditionalSenseCodeQualifier < - ARRAY_SIZE(DAC960_EventMessages)) - DAC960_Critical("Physical Device %d:%d %s\n", Controller, - EventLogEntry->Channel, - EventLogEntry->TargetID, - DAC960_EventMessages[ - AdditionalSenseCodeQualifier]); - else if (SenseKey == DAC960_SenseKey_UnitAttention && - AdditionalSenseCode == 0x29) - { - if (Controller->MonitoringTimerCount > 0) - Controller->V1.DeviceResetCount[EventLogEntry->Channel] - [EventLogEntry->TargetID]++; - } - else if (!(SenseKey == DAC960_SenseKey_NoSense || - (SenseKey == DAC960_SenseKey_NotReady && - AdditionalSenseCode == 0x04 && - (AdditionalSenseCodeQualifier == 0x01 || - AdditionalSenseCodeQualifier == 0x02)))) - { - DAC960_Critical("Physical Device %d:%d Error Log: " - "Sense Key = %X, ASC = %02X, ASCQ = %02X\n", - Controller, - EventLogEntry->Channel, - EventLogEntry->TargetID, - SenseKey, - AdditionalSenseCode, - AdditionalSenseCodeQualifier); - DAC960_Critical("Physical Device %d:%d Error Log: " - "Information = %02X%02X%02X%02X " - "%02X%02X%02X%02X\n", - Controller, - EventLogEntry->Channel, - EventLogEntry->TargetID, - EventLogEntry->Information[0], - EventLogEntry->Information[1], - EventLogEntry->Information[2], - EventLogEntry->Information[3], - EventLogEntry->CommandSpecificInformation[0], - EventLogEntry->CommandSpecificInformation[1], - EventLogEntry->CommandSpecificInformation[2], - EventLogEntry->CommandSpecificInformation[3]); - } - } - Controller->V1.OldEventLogSequenceNumber++; - } - else if (CommandOpcode == DAC960_V1_GetErrorTable) - { - DAC960_V1_ErrorTable_T *OldErrorTable = &Controller->V1.ErrorTable; - DAC960_V1_ErrorTable_T *NewErrorTable = Controller->V1.NewErrorTable; - int Channel, TargetID; - for (Channel = 0; Channel < Controller->Channels; Channel++) - for (TargetID = 0; TargetID < Controller->Targets; TargetID++) - { - DAC960_V1_ErrorTableEntry_T *NewErrorEntry = - &NewErrorTable->ErrorTableEntries[Channel][TargetID]; - DAC960_V1_ErrorTableEntry_T *OldErrorEntry = - &OldErrorTable->ErrorTableEntries[Channel][TargetID]; - if ((NewErrorEntry->ParityErrorCount != - OldErrorEntry->ParityErrorCount) || - (NewErrorEntry->SoftErrorCount != - OldErrorEntry->SoftErrorCount) || - (NewErrorEntry->HardErrorCount != - OldErrorEntry->HardErrorCount) || - (NewErrorEntry->MiscErrorCount != - OldErrorEntry->MiscErrorCount)) - DAC960_Critical("Physical Device %d:%d Errors: " - "Parity = %d, Soft = %d, " - "Hard = %d, Misc = %d\n", - Controller, Channel, TargetID, - NewErrorEntry->ParityErrorCount, - NewErrorEntry->SoftErrorCount, - NewErrorEntry->HardErrorCount, - NewErrorEntry->MiscErrorCount); - } - memcpy(&Controller->V1.ErrorTable, Controller->V1.NewErrorTable, - sizeof(DAC960_V1_ErrorTable_T)); - } - else if (CommandOpcode == DAC960_V1_GetDeviceState) - { - DAC960_V1_DeviceState_T *OldDeviceState = - &Controller->V1.DeviceState[Controller->V1.DeviceStateChannel] - [Controller->V1.DeviceStateTargetID]; - DAC960_V1_DeviceState_T *NewDeviceState = - Controller->V1.NewDeviceState; - if (NewDeviceState->DeviceState != OldDeviceState->DeviceState) - DAC960_Critical("Physical Device %d:%d is now %s\n", Controller, - Controller->V1.DeviceStateChannel, - Controller->V1.DeviceStateTargetID, - (NewDeviceState->DeviceState - == DAC960_V1_Device_Dead - ? "DEAD" - : NewDeviceState->DeviceState - == DAC960_V1_Device_WriteOnly - ? "WRITE-ONLY" - : NewDeviceState->DeviceState - == DAC960_V1_Device_Online - ? "ONLINE" : "STANDBY")); - if (OldDeviceState->DeviceState == DAC960_V1_Device_Dead && - NewDeviceState->DeviceState != DAC960_V1_Device_Dead) - { - Controller->V1.NeedDeviceInquiryInformation = true; - Controller->V1.NeedDeviceSerialNumberInformation = true; - Controller->V1.DeviceResetCount - [Controller->V1.DeviceStateChannel] - [Controller->V1.DeviceStateTargetID] = 0; - } - memcpy(OldDeviceState, NewDeviceState, - sizeof(DAC960_V1_DeviceState_T)); - } - else if (CommandOpcode == DAC960_V1_GetLogicalDriveInformation) - { - int LogicalDriveNumber; - for (LogicalDriveNumber = 0; - LogicalDriveNumber < Controller->LogicalDriveCount; - LogicalDriveNumber++) - { - DAC960_V1_LogicalDriveInformation_T *OldLogicalDriveInformation = - &Controller->V1.LogicalDriveInformation[LogicalDriveNumber]; - DAC960_V1_LogicalDriveInformation_T *NewLogicalDriveInformation = - &(*Controller->V1.NewLogicalDriveInformation)[LogicalDriveNumber]; - if (NewLogicalDriveInformation->LogicalDriveState != - OldLogicalDriveInformation->LogicalDriveState) - DAC960_Critical("Logical Drive %d (/dev/rd/c%dd%d) " - "is now %s\n", Controller, - LogicalDriveNumber, - Controller->ControllerNumber, - LogicalDriveNumber, - (NewLogicalDriveInformation->LogicalDriveState - == DAC960_V1_LogicalDrive_Online - ? "ONLINE" - : NewLogicalDriveInformation->LogicalDriveState - == DAC960_V1_LogicalDrive_Critical - ? "CRITICAL" : "OFFLINE")); - if (NewLogicalDriveInformation->WriteBack != - OldLogicalDriveInformation->WriteBack) - DAC960_Critical("Logical Drive %d (/dev/rd/c%dd%d) " - "is now %s\n", Controller, - LogicalDriveNumber, - Controller->ControllerNumber, - LogicalDriveNumber, - (NewLogicalDriveInformation->WriteBack - ? "WRITE BACK" : "WRITE THRU")); - } - memcpy(&Controller->V1.LogicalDriveInformation, - Controller->V1.NewLogicalDriveInformation, - sizeof(DAC960_V1_LogicalDriveInformationArray_T)); - } - else if (CommandOpcode == DAC960_V1_GetRebuildProgress) - { - unsigned int LogicalDriveNumber = - Controller->V1.RebuildProgress->LogicalDriveNumber; - unsigned int LogicalDriveSize = - Controller->V1.RebuildProgress->LogicalDriveSize; - unsigned int BlocksCompleted = - LogicalDriveSize - Controller->V1.RebuildProgress->RemainingBlocks; - if (CommandStatus == DAC960_V1_NoRebuildOrCheckInProgress && - Controller->V1.LastRebuildStatus == DAC960_V1_NormalCompletion) - CommandStatus = DAC960_V1_RebuildSuccessful; - switch (CommandStatus) - { - case DAC960_V1_NormalCompletion: - Controller->EphemeralProgressMessage = true; - DAC960_Progress("Rebuild in Progress: " - "Logical Drive %d (/dev/rd/c%dd%d) " - "%d%% completed\n", - Controller, LogicalDriveNumber, - Controller->ControllerNumber, - LogicalDriveNumber, - (100 * (BlocksCompleted >> 7)) - / (LogicalDriveSize >> 7)); - Controller->EphemeralProgressMessage = false; - break; - case DAC960_V1_RebuildFailed_LogicalDriveFailure: - DAC960_Progress("Rebuild Failed due to " - "Logical Drive Failure\n", Controller); - break; - case DAC960_V1_RebuildFailed_BadBlocksOnOther: - DAC960_Progress("Rebuild Failed due to " - "Bad Blocks on Other Drives\n", Controller); - break; - case DAC960_V1_RebuildFailed_NewDriveFailed: - DAC960_Progress("Rebuild Failed due to " - "Failure of Drive Being Rebuilt\n", Controller); - break; - case DAC960_V1_NoRebuildOrCheckInProgress: - break; - case DAC960_V1_RebuildSuccessful: - DAC960_Progress("Rebuild Completed Successfully\n", Controller); - break; - case DAC960_V1_RebuildSuccessfullyTerminated: - DAC960_Progress("Rebuild Successfully Terminated\n", Controller); - break; - } - Controller->V1.LastRebuildStatus = CommandStatus; - if (CommandType != DAC960_MonitoringCommand && - Controller->V1.RebuildStatusPending) - { - Command->V1.CommandStatus = Controller->V1.PendingRebuildStatus; - Controller->V1.RebuildStatusPending = false; - } - else if (CommandType == DAC960_MonitoringCommand && - CommandStatus != DAC960_V1_NormalCompletion && - CommandStatus != DAC960_V1_NoRebuildOrCheckInProgress) - { - Controller->V1.PendingRebuildStatus = CommandStatus; - Controller->V1.RebuildStatusPending = true; - } - } - else if (CommandOpcode == DAC960_V1_RebuildStat) - { - unsigned int LogicalDriveNumber = - Controller->V1.RebuildProgress->LogicalDriveNumber; - unsigned int LogicalDriveSize = - Controller->V1.RebuildProgress->LogicalDriveSize; - unsigned int BlocksCompleted = - LogicalDriveSize - Controller->V1.RebuildProgress->RemainingBlocks; - if (CommandStatus == DAC960_V1_NormalCompletion) - { - Controller->EphemeralProgressMessage = true; - DAC960_Progress("Consistency Check in Progress: " - "Logical Drive %d (/dev/rd/c%dd%d) " - "%d%% completed\n", - Controller, LogicalDriveNumber, - Controller->ControllerNumber, - LogicalDriveNumber, - (100 * (BlocksCompleted >> 7)) - / (LogicalDriveSize >> 7)); - Controller->EphemeralProgressMessage = false; - } - } - else if (CommandOpcode == DAC960_V1_BackgroundInitializationControl) - { - unsigned int LogicalDriveNumber = - Controller->V1.BackgroundInitializationStatus->LogicalDriveNumber; - unsigned int LogicalDriveSize = - Controller->V1.BackgroundInitializationStatus->LogicalDriveSize; - unsigned int BlocksCompleted = - Controller->V1.BackgroundInitializationStatus->BlocksCompleted; - switch (CommandStatus) - { - case DAC960_V1_NormalCompletion: - switch (Controller->V1.BackgroundInitializationStatus->Status) - { - case DAC960_V1_BackgroundInitializationInvalid: - break; - case DAC960_V1_BackgroundInitializationStarted: - DAC960_Progress("Background Initialization Started\n", - Controller); - break; - case DAC960_V1_BackgroundInitializationInProgress: - if (BlocksCompleted == - Controller->V1.LastBackgroundInitializationStatus. - BlocksCompleted && - LogicalDriveNumber == - Controller->V1.LastBackgroundInitializationStatus. - LogicalDriveNumber) - break; - Controller->EphemeralProgressMessage = true; - DAC960_Progress("Background Initialization in Progress: " - "Logical Drive %d (/dev/rd/c%dd%d) " - "%d%% completed\n", - Controller, LogicalDriveNumber, - Controller->ControllerNumber, - LogicalDriveNumber, - (100 * (BlocksCompleted >> 7)) - / (LogicalDriveSize >> 7)); - Controller->EphemeralProgressMessage = false; - break; - case DAC960_V1_BackgroundInitializationSuspended: - DAC960_Progress("Background Initialization Suspended\n", - Controller); - break; - case DAC960_V1_BackgroundInitializationCancelled: - DAC960_Progress("Background Initialization Cancelled\n", - Controller); - break; - } - memcpy(&Controller->V1.LastBackgroundInitializationStatus, - Controller->V1.BackgroundInitializationStatus, - sizeof(DAC960_V1_BackgroundInitializationStatus_T)); - break; - case DAC960_V1_BackgroundInitSuccessful: - if (Controller->V1.BackgroundInitializationStatus->Status == - DAC960_V1_BackgroundInitializationInProgress) - DAC960_Progress("Background Initialization " - "Completed Successfully\n", Controller); - Controller->V1.BackgroundInitializationStatus->Status = - DAC960_V1_BackgroundInitializationInvalid; - break; - case DAC960_V1_BackgroundInitAborted: - if (Controller->V1.BackgroundInitializationStatus->Status == - DAC960_V1_BackgroundInitializationInProgress) - DAC960_Progress("Background Initialization Aborted\n", - Controller); - Controller->V1.BackgroundInitializationStatus->Status = - DAC960_V1_BackgroundInitializationInvalid; - break; - case DAC960_V1_NoBackgroundInitInProgress: - break; - } - } - else if (CommandOpcode == DAC960_V1_DCDB) - { - /* - This is a bit ugly. - - The InquiryStandardData and - the InquiryUntitSerialNumber information - retrieval operations BOTH use the DAC960_V1_DCDB - commands. the test above can't distinguish between - these two cases. - - Instead, we rely on the order of code later in this - function to ensure that DeviceInquiryInformation commands - are submitted before DeviceSerialNumber commands. - */ - if (Controller->V1.NeedDeviceInquiryInformation) - { - DAC960_SCSI_Inquiry_T *InquiryStandardData = - &Controller->V1.InquiryStandardData - [Controller->V1.DeviceStateChannel] - [Controller->V1.DeviceStateTargetID]; - if (CommandStatus != DAC960_V1_NormalCompletion) - { - memset(InquiryStandardData, 0, - sizeof(DAC960_SCSI_Inquiry_T)); - InquiryStandardData->PeripheralDeviceType = 0x1F; - } - else - memcpy(InquiryStandardData, - Controller->V1.NewInquiryStandardData, - sizeof(DAC960_SCSI_Inquiry_T)); - Controller->V1.NeedDeviceInquiryInformation = false; - } - else if (Controller->V1.NeedDeviceSerialNumberInformation) - { - DAC960_SCSI_Inquiry_UnitSerialNumber_T *InquiryUnitSerialNumber = - &Controller->V1.InquiryUnitSerialNumber - [Controller->V1.DeviceStateChannel] - [Controller->V1.DeviceStateTargetID]; - if (CommandStatus != DAC960_V1_NormalCompletion) - { - memset(InquiryUnitSerialNumber, 0, - sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T)); - InquiryUnitSerialNumber->PeripheralDeviceType = 0x1F; - } - else - memcpy(InquiryUnitSerialNumber, - Controller->V1.NewInquiryUnitSerialNumber, - sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T)); - Controller->V1.NeedDeviceSerialNumberInformation = false; - } - } - /* - Begin submitting new monitoring commands. - */ - if (Controller->V1.NewEventLogSequenceNumber - - Controller->V1.OldEventLogSequenceNumber > 0) - { - Command->V1.CommandMailbox.Type3E.CommandOpcode = - DAC960_V1_PerformEventLogOperation; - Command->V1.CommandMailbox.Type3E.OperationType = - DAC960_V1_GetEventLogEntry; - Command->V1.CommandMailbox.Type3E.OperationQualifier = 1; - Command->V1.CommandMailbox.Type3E.SequenceNumber = - Controller->V1.OldEventLogSequenceNumber; - Command->V1.CommandMailbox.Type3E.BusAddress = - Controller->V1.EventLogEntryDMA; - DAC960_QueueCommand(Command); - return; - } - if (Controller->V1.NeedErrorTableInformation) - { - Controller->V1.NeedErrorTableInformation = false; - Command->V1.CommandMailbox.Type3.CommandOpcode = - DAC960_V1_GetErrorTable; - Command->V1.CommandMailbox.Type3.BusAddress = - Controller->V1.NewErrorTableDMA; - DAC960_QueueCommand(Command); - return; - } - if (Controller->V1.NeedRebuildProgress && - Controller->V1.RebuildProgressFirst) - { - Controller->V1.NeedRebuildProgress = false; - Command->V1.CommandMailbox.Type3.CommandOpcode = - DAC960_V1_GetRebuildProgress; - Command->V1.CommandMailbox.Type3.BusAddress = - Controller->V1.RebuildProgressDMA; - DAC960_QueueCommand(Command); - return; - } - if (Controller->V1.NeedDeviceStateInformation) - { - if (Controller->V1.NeedDeviceInquiryInformation) - { - DAC960_V1_DCDB_T *DCDB = Controller->V1.MonitoringDCDB; - dma_addr_t DCDB_DMA = Controller->V1.MonitoringDCDB_DMA; - - dma_addr_t NewInquiryStandardDataDMA = - Controller->V1.NewInquiryStandardDataDMA; - - Command->V1.CommandMailbox.Type3.CommandOpcode = DAC960_V1_DCDB; - Command->V1.CommandMailbox.Type3.BusAddress = DCDB_DMA; - DCDB->Channel = Controller->V1.DeviceStateChannel; - DCDB->TargetID = Controller->V1.DeviceStateTargetID; - DCDB->Direction = DAC960_V1_DCDB_DataTransferDeviceToSystem; - DCDB->EarlyStatus = false; - DCDB->Timeout = DAC960_V1_DCDB_Timeout_10_seconds; - DCDB->NoAutomaticRequestSense = false; - DCDB->DisconnectPermitted = true; - DCDB->TransferLength = sizeof(DAC960_SCSI_Inquiry_T); - DCDB->BusAddress = NewInquiryStandardDataDMA; - DCDB->CDBLength = 6; - DCDB->TransferLengthHigh4 = 0; - DCDB->SenseLength = sizeof(DCDB->SenseData); - DCDB->CDB[0] = 0x12; /* INQUIRY */ - DCDB->CDB[1] = 0; /* EVPD = 0 */ - DCDB->CDB[2] = 0; /* Page Code */ - DCDB->CDB[3] = 0; /* Reserved */ - DCDB->CDB[4] = sizeof(DAC960_SCSI_Inquiry_T); - DCDB->CDB[5] = 0; /* Control */ - DAC960_QueueCommand(Command); - return; - } - if (Controller->V1.NeedDeviceSerialNumberInformation) - { - DAC960_V1_DCDB_T *DCDB = Controller->V1.MonitoringDCDB; - dma_addr_t DCDB_DMA = Controller->V1.MonitoringDCDB_DMA; - dma_addr_t NewInquiryUnitSerialNumberDMA = - Controller->V1.NewInquiryUnitSerialNumberDMA; - - Command->V1.CommandMailbox.Type3.CommandOpcode = DAC960_V1_DCDB; - Command->V1.CommandMailbox.Type3.BusAddress = DCDB_DMA; - DCDB->Channel = Controller->V1.DeviceStateChannel; - DCDB->TargetID = Controller->V1.DeviceStateTargetID; - DCDB->Direction = DAC960_V1_DCDB_DataTransferDeviceToSystem; - DCDB->EarlyStatus = false; - DCDB->Timeout = DAC960_V1_DCDB_Timeout_10_seconds; - DCDB->NoAutomaticRequestSense = false; - DCDB->DisconnectPermitted = true; - DCDB->TransferLength = - sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T); - DCDB->BusAddress = NewInquiryUnitSerialNumberDMA; - DCDB->CDBLength = 6; - DCDB->TransferLengthHigh4 = 0; - DCDB->SenseLength = sizeof(DCDB->SenseData); - DCDB->CDB[0] = 0x12; /* INQUIRY */ - DCDB->CDB[1] = 1; /* EVPD = 1 */ - DCDB->CDB[2] = 0x80; /* Page Code */ - DCDB->CDB[3] = 0; /* Reserved */ - DCDB->CDB[4] = sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T); - DCDB->CDB[5] = 0; /* Control */ - DAC960_QueueCommand(Command); - return; - } - if (Controller->V1.StartDeviceStateScan) - { - Controller->V1.DeviceStateChannel = 0; - Controller->V1.DeviceStateTargetID = 0; - Controller->V1.StartDeviceStateScan = false; - } - else if (++Controller->V1.DeviceStateTargetID == Controller->Targets) - { - Controller->V1.DeviceStateChannel++; - Controller->V1.DeviceStateTargetID = 0; - } - if (Controller->V1.DeviceStateChannel < Controller->Channels) - { - Controller->V1.NewDeviceState->DeviceState = - DAC960_V1_Device_Dead; - Command->V1.CommandMailbox.Type3D.CommandOpcode = - DAC960_V1_GetDeviceState; - Command->V1.CommandMailbox.Type3D.Channel = - Controller->V1.DeviceStateChannel; - Command->V1.CommandMailbox.Type3D.TargetID = - Controller->V1.DeviceStateTargetID; - Command->V1.CommandMailbox.Type3D.BusAddress = - Controller->V1.NewDeviceStateDMA; - DAC960_QueueCommand(Command); - return; - } - Controller->V1.NeedDeviceStateInformation = false; - } - if (Controller->V1.NeedLogicalDriveInformation) - { - Controller->V1.NeedLogicalDriveInformation = false; - Command->V1.CommandMailbox.Type3.CommandOpcode = - DAC960_V1_GetLogicalDriveInformation; - Command->V1.CommandMailbox.Type3.BusAddress = - Controller->V1.NewLogicalDriveInformationDMA; - DAC960_QueueCommand(Command); - return; - } - if (Controller->V1.NeedRebuildProgress) - { - Controller->V1.NeedRebuildProgress = false; - Command->V1.CommandMailbox.Type3.CommandOpcode = - DAC960_V1_GetRebuildProgress; - Command->V1.CommandMailbox.Type3.BusAddress = - Controller->V1.RebuildProgressDMA; - DAC960_QueueCommand(Command); - return; - } - if (Controller->V1.NeedConsistencyCheckProgress) - { - Controller->V1.NeedConsistencyCheckProgress = false; - Command->V1.CommandMailbox.Type3.CommandOpcode = - DAC960_V1_RebuildStat; - Command->V1.CommandMailbox.Type3.BusAddress = - Controller->V1.RebuildProgressDMA; - DAC960_QueueCommand(Command); - return; - } - if (Controller->V1.NeedBackgroundInitializationStatus) - { - Controller->V1.NeedBackgroundInitializationStatus = false; - Command->V1.CommandMailbox.Type3B.CommandOpcode = - DAC960_V1_BackgroundInitializationControl; - Command->V1.CommandMailbox.Type3B.CommandOpcode2 = 0x20; - Command->V1.CommandMailbox.Type3B.BusAddress = - Controller->V1.BackgroundInitializationStatusDMA; - DAC960_QueueCommand(Command); - return; - } - Controller->MonitoringTimerCount++; - Controller->MonitoringTimer.expires = - jiffies + DAC960_MonitoringTimerInterval; - add_timer(&Controller->MonitoringTimer); - } - if (CommandType == DAC960_ImmediateCommand) - { - complete(Command->Completion); - Command->Completion = NULL; - return; - } - if (CommandType == DAC960_QueuedCommand) - { - DAC960_V1_KernelCommand_T *KernelCommand = Command->V1.KernelCommand; - KernelCommand->CommandStatus = Command->V1.CommandStatus; - Command->V1.KernelCommand = NULL; - if (CommandOpcode == DAC960_V1_DCDB) - Controller->V1.DirectCommandActive[KernelCommand->DCDB->Channel] - [KernelCommand->DCDB->TargetID] = - false; - DAC960_DeallocateCommand(Command); - KernelCommand->CompletionFunction(KernelCommand); - return; - } - /* - Queue a Status Monitoring Command to the Controller using the just - completed Command if one was deferred previously due to lack of a - free Command when the Monitoring Timer Function was called. - */ - if (Controller->MonitoringCommandDeferred) - { - Controller->MonitoringCommandDeferred = false; - DAC960_V1_QueueMonitoringCommand(Command); - return; - } - /* - Deallocate the Command. - */ - DAC960_DeallocateCommand(Command); - /* - Wake up any processes waiting on a free Command. - */ - wake_up(&Controller->CommandWaitQueue); -} - - -/* - DAC960_V2_ReadWriteError prints an appropriate error message for Command - when an error occurs on a Read or Write operation. -*/ - -static void DAC960_V2_ReadWriteError(DAC960_Command_T *Command) -{ - DAC960_Controller_T *Controller = Command->Controller; - static const unsigned char *SenseErrors[] = { - "NO SENSE", "RECOVERED ERROR", - "NOT READY", "MEDIUM ERROR", - "HARDWARE ERROR", "ILLEGAL REQUEST", - "UNIT ATTENTION", "DATA PROTECT", - "BLANK CHECK", "VENDOR-SPECIFIC", - "COPY ABORTED", "ABORTED COMMAND", - "EQUAL", "VOLUME OVERFLOW", - "MISCOMPARE", "RESERVED" - }; - unsigned char *CommandName = "UNKNOWN"; - switch (Command->CommandType) - { - case DAC960_ReadCommand: - case DAC960_ReadRetryCommand: - CommandName = "READ"; - break; - case DAC960_WriteCommand: - case DAC960_WriteRetryCommand: - CommandName = "WRITE"; - break; - case DAC960_MonitoringCommand: - case DAC960_ImmediateCommand: - case DAC960_QueuedCommand: - break; - } - DAC960_Error("Error Condition %s on %s:\n", Controller, - SenseErrors[Command->V2.RequestSense->SenseKey], CommandName); - DAC960_Error(" /dev/rd/c%dd%d: absolute blocks %u..%u\n", - Controller, Controller->ControllerNumber, - Command->LogicalDriveNumber, Command->BlockNumber, - Command->BlockNumber + Command->BlockCount - 1); -} - - -/* - DAC960_V2_ReportEvent prints an appropriate message when a Controller Event - occurs. -*/ - -static void DAC960_V2_ReportEvent(DAC960_Controller_T *Controller, - DAC960_V2_Event_T *Event) -{ - DAC960_SCSI_RequestSense_T *RequestSense = - (DAC960_SCSI_RequestSense_T *) &Event->RequestSenseData; - unsigned char MessageBuffer[DAC960_LineBufferSize]; - static struct { int EventCode; unsigned char *EventMessage; } EventList[] = - { /* Physical Device Events (0x0000 - 0x007F) */ - { 0x0001, "P Online" }, - { 0x0002, "P Standby" }, - { 0x0005, "P Automatic Rebuild Started" }, - { 0x0006, "P Manual Rebuild Started" }, - { 0x0007, "P Rebuild Completed" }, - { 0x0008, "P Rebuild Cancelled" }, - { 0x0009, "P Rebuild Failed for Unknown Reasons" }, - { 0x000A, "P Rebuild Failed due to New Physical Device" }, - { 0x000B, "P Rebuild Failed due to Logical Drive Failure" }, - { 0x000C, "S Offline" }, - { 0x000D, "P Found" }, - { 0x000E, "P Removed" }, - { 0x000F, "P Unconfigured" }, - { 0x0010, "P Expand Capacity Started" }, - { 0x0011, "P Expand Capacity Completed" }, - { 0x0012, "P Expand Capacity Failed" }, - { 0x0013, "P Command Timed Out" }, - { 0x0014, "P Command Aborted" }, - { 0x0015, "P Command Retried" }, - { 0x0016, "P Parity Error" }, - { 0x0017, "P Soft Error" }, - { 0x0018, "P Miscellaneous Error" }, - { 0x0019, "P Reset" }, - { 0x001A, "P Active Spare Found" }, - { 0x001B, "P Warm Spare Found" }, - { 0x001C, "S Sense Data Received" }, - { 0x001D, "P Initialization Started" }, - { 0x001E, "P Initialization Completed" }, - { 0x001F, "P Initialization Failed" }, - { 0x0020, "P Initialization Cancelled" }, - { 0x0021, "P Failed because Write Recovery Failed" }, - { 0x0022, "P Failed because SCSI Bus Reset Failed" }, - { 0x0023, "P Failed because of Double Check Condition" }, - { 0x0024, "P Failed because Device Cannot Be Accessed" }, - { 0x0025, "P Failed because of Gross Error on SCSI Processor" }, - { 0x0026, "P Failed because of Bad Tag from Device" }, - { 0x0027, "P Failed because of Command Timeout" }, - { 0x0028, "P Failed because of System Reset" }, - { 0x0029, "P Failed because of Busy Status or Parity Error" }, - { 0x002A, "P Failed because Host Set Device to Failed State" }, - { 0x002B, "P Failed because of Selection Timeout" }, - { 0x002C, "P Failed because of SCSI Bus Phase Error" }, - { 0x002D, "P Failed because Device Returned Unknown Status" }, - { 0x002E, "P Failed because Device Not Ready" }, - { 0x002F, "P Failed because Device Not Found at Startup" }, - { 0x0030, "P Failed because COD Write Operation Failed" }, - { 0x0031, "P Failed because BDT Write Operation Failed" }, - { 0x0039, "P Missing at Startup" }, - { 0x003A, "P Start Rebuild Failed due to Physical Drive Too Small" }, - { 0x003C, "P Temporarily Offline Device Automatically Made Online" }, - { 0x003D, "P Standby Rebuild Started" }, - /* Logical Device Events (0x0080 - 0x00FF) */ - { 0x0080, "M Consistency Check Started" }, - { 0x0081, "M Consistency Check Completed" }, - { 0x0082, "M Consistency Check Cancelled" }, - { 0x0083, "M Consistency Check Completed With Errors" }, - { 0x0084, "M Consistency Check Failed due to Logical Drive Failure" }, - { 0x0085, "M Consistency Check Failed due to Physical Device Failure" }, - { 0x0086, "L Offline" }, - { 0x0087, "L Critical" }, - { 0x0088, "L Online" }, - { 0x0089, "M Automatic Rebuild Started" }, - { 0x008A, "M Manual Rebuild Started" }, - { 0x008B, "M Rebuild Completed" }, - { 0x008C, "M Rebuild Cancelled" }, - { 0x008D, "M Rebuild Failed for Unknown Reasons" }, - { 0x008E, "M Rebuild Failed due to New Physical Device" }, - { 0x008F, "M Rebuild Failed due to Logical Drive Failure" }, - { 0x0090, "M Initialization Started" }, - { 0x0091, "M Initialization Completed" }, - { 0x0092, "M Initialization Cancelled" }, - { 0x0093, "M Initialization Failed" }, - { 0x0094, "L Found" }, - { 0x0095, "L Deleted" }, - { 0x0096, "M Expand Capacity Started" }, - { 0x0097, "M Expand Capacity Completed" }, - { 0x0098, "M Expand Capacity Failed" }, - { 0x0099, "L Bad Block Found" }, - { 0x009A, "L Size Changed" }, - { 0x009B, "L Type Changed" }, - { 0x009C, "L Bad Data Block Found" }, - { 0x009E, "L Read of Data Block in BDT" }, - { 0x009F, "L Write Back Data for Disk Block Lost" }, - { 0x00A0, "L Temporarily Offline RAID-5/3 Drive Made Online" }, - { 0x00A1, "L Temporarily Offline RAID-6/1/0/7 Drive Made Online" }, - { 0x00A2, "L Standby Rebuild Started" }, - /* Fault Management Events (0x0100 - 0x017F) */ - { 0x0140, "E Fan %d Failed" }, - { 0x0141, "E Fan %d OK" }, - { 0x0142, "E Fan %d Not Present" }, - { 0x0143, "E Power Supply %d Failed" }, - { 0x0144, "E Power Supply %d OK" }, - { 0x0145, "E Power Supply %d Not Present" }, - { 0x0146, "E Temperature Sensor %d Temperature Exceeds Safe Limit" }, - { 0x0147, "E Temperature Sensor %d Temperature Exceeds Working Limit" }, - { 0x0148, "E Temperature Sensor %d Temperature Normal" }, - { 0x0149, "E Temperature Sensor %d Not Present" }, - { 0x014A, "E Enclosure Management Unit %d Access Critical" }, - { 0x014B, "E Enclosure Management Unit %d Access OK" }, - { 0x014C, "E Enclosure Management Unit %d Access Offline" }, - /* Controller Events (0x0180 - 0x01FF) */ - { 0x0181, "C Cache Write Back Error" }, - { 0x0188, "C Battery Backup Unit Found" }, - { 0x0189, "C Battery Backup Unit Charge Level Low" }, - { 0x018A, "C Battery Backup Unit Charge Level OK" }, - { 0x0193, "C Installation Aborted" }, - { 0x0195, "C Battery Backup Unit Physically Removed" }, - { 0x0196, "C Memory Error During Warm Boot" }, - { 0x019E, "C Memory Soft ECC Error Corrected" }, - { 0x019F, "C Memory Hard ECC Error Corrected" }, - { 0x01A2, "C Battery Backup Unit Failed" }, - { 0x01AB, "C Mirror Race Recovery Failed" }, - { 0x01AC, "C Mirror Race on Critical Drive" }, - /* Controller Internal Processor Events */ - { 0x0380, "C Internal Controller Hung" }, - { 0x0381, "C Internal Controller Firmware Breakpoint" }, - { 0x0390, "C Internal Controller i960 Processor Specific Error" }, - { 0x03A0, "C Internal Controller StrongARM Processor Specific Error" }, - { 0, "" } }; - int EventListIndex = 0, EventCode; - unsigned char EventType, *EventMessage; - if (Event->EventCode == 0x1C && - RequestSense->SenseKey == DAC960_SenseKey_VendorSpecific && - (RequestSense->AdditionalSenseCode == 0x80 || - RequestSense->AdditionalSenseCode == 0x81)) - Event->EventCode = ((RequestSense->AdditionalSenseCode - 0x80) << 8) | - RequestSense->AdditionalSenseCodeQualifier; - while (true) - { - EventCode = EventList[EventListIndex].EventCode; - if (EventCode == Event->EventCode || EventCode == 0) break; - EventListIndex++; - } - EventType = EventList[EventListIndex].EventMessage[0]; - EventMessage = &EventList[EventListIndex].EventMessage[2]; - if (EventCode == 0) - { - DAC960_Critical("Unknown Controller Event Code %04X\n", - Controller, Event->EventCode); - return; - } - switch (EventType) - { - case 'P': - DAC960_Critical("Physical Device %d:%d %s\n", Controller, - Event->Channel, Event->TargetID, EventMessage); - break; - case 'L': - DAC960_Critical("Logical Drive %d (/dev/rd/c%dd%d) %s\n", Controller, - Event->LogicalUnit, Controller->ControllerNumber, - Event->LogicalUnit, EventMessage); - break; - case 'M': - DAC960_Progress("Logical Drive %d (/dev/rd/c%dd%d) %s\n", Controller, - Event->LogicalUnit, Controller->ControllerNumber, - Event->LogicalUnit, EventMessage); - break; - case 'S': - if (RequestSense->SenseKey == DAC960_SenseKey_NoSense || - (RequestSense->SenseKey == DAC960_SenseKey_NotReady && - RequestSense->AdditionalSenseCode == 0x04 && - (RequestSense->AdditionalSenseCodeQualifier == 0x01 || - RequestSense->AdditionalSenseCodeQualifier == 0x02))) - break; - DAC960_Critical("Physical Device %d:%d %s\n", Controller, - Event->Channel, Event->TargetID, EventMessage); - DAC960_Critical("Physical Device %d:%d Request Sense: " - "Sense Key = %X, ASC = %02X, ASCQ = %02X\n", - Controller, - Event->Channel, - Event->TargetID, - RequestSense->SenseKey, - RequestSense->AdditionalSenseCode, - RequestSense->AdditionalSenseCodeQualifier); - DAC960_Critical("Physical Device %d:%d Request Sense: " - "Information = %02X%02X%02X%02X " - "%02X%02X%02X%02X\n", - Controller, - Event->Channel, - Event->TargetID, - RequestSense->Information[0], - RequestSense->Information[1], - RequestSense->Information[2], - RequestSense->Information[3], - RequestSense->CommandSpecificInformation[0], - RequestSense->CommandSpecificInformation[1], - RequestSense->CommandSpecificInformation[2], - RequestSense->CommandSpecificInformation[3]); - break; - case 'E': - if (Controller->SuppressEnclosureMessages) break; - sprintf(MessageBuffer, EventMessage, Event->LogicalUnit); - DAC960_Critical("Enclosure %d %s\n", Controller, - Event->TargetID, MessageBuffer); - break; - case 'C': - DAC960_Critical("Controller %s\n", Controller, EventMessage); - break; - default: - DAC960_Critical("Unknown Controller Event Code %04X\n", - Controller, Event->EventCode); - break; - } -} - - -/* - DAC960_V2_ReportProgress prints an appropriate progress message for - Logical Device Long Operations. -*/ - -static void DAC960_V2_ReportProgress(DAC960_Controller_T *Controller, - unsigned char *MessageString, - unsigned int LogicalDeviceNumber, - unsigned long BlocksCompleted, - unsigned long LogicalDeviceSize) -{ - Controller->EphemeralProgressMessage = true; - DAC960_Progress("%s in Progress: Logical Drive %d (/dev/rd/c%dd%d) " - "%d%% completed\n", Controller, - MessageString, - LogicalDeviceNumber, - Controller->ControllerNumber, - LogicalDeviceNumber, - (100 * (BlocksCompleted >> 7)) / (LogicalDeviceSize >> 7)); - Controller->EphemeralProgressMessage = false; -} - - -/* - DAC960_V2_ProcessCompletedCommand performs completion processing for Command - for DAC960 V2 Firmware Controllers. -*/ - -static void DAC960_V2_ProcessCompletedCommand(DAC960_Command_T *Command) -{ - DAC960_Controller_T *Controller = Command->Controller; - DAC960_CommandType_T CommandType = Command->CommandType; - DAC960_V2_CommandMailbox_T *CommandMailbox = &Command->V2.CommandMailbox; - DAC960_V2_IOCTL_Opcode_T IOCTLOpcode = CommandMailbox->Common.IOCTL_Opcode; - DAC960_V2_CommandOpcode_T CommandOpcode = CommandMailbox->SCSI_10.CommandOpcode; - DAC960_V2_CommandStatus_T CommandStatus = Command->V2.CommandStatus; - - if (CommandType == DAC960_ReadCommand || - CommandType == DAC960_WriteCommand) - { - -#ifdef FORCE_RETRY_DEBUG - CommandStatus = DAC960_V2_AbormalCompletion; -#endif - Command->V2.RequestSense->SenseKey = DAC960_SenseKey_MediumError; - - if (CommandStatus == DAC960_V2_NormalCompletion) { - - if (!DAC960_ProcessCompletedRequest(Command, true)) - BUG(); - - } else if (Command->V2.RequestSense->SenseKey == DAC960_SenseKey_MediumError) - { - /* - * break the command down into pieces and resubmit each - * piece, hoping that some of them will succeed. - */ - DAC960_queue_partial_rw(Command); - return; - } - else - { - if (Command->V2.RequestSense->SenseKey != DAC960_SenseKey_NotReady) - DAC960_V2_ReadWriteError(Command); - /* - Perform completion processing for all buffers in this I/O Request. - */ - (void)DAC960_ProcessCompletedRequest(Command, false); - } - } - else if (CommandType == DAC960_ReadRetryCommand || - CommandType == DAC960_WriteRetryCommand) - { - bool normal_completion; - -#ifdef FORCE_RETRY_FAILURE_DEBUG - static int retry_count = 1; -#endif - /* - Perform completion processing for the portion that was - retried, and submit the next portion, if any. - */ - normal_completion = true; - if (CommandStatus != DAC960_V2_NormalCompletion) { - normal_completion = false; - if (Command->V2.RequestSense->SenseKey != DAC960_SenseKey_NotReady) - DAC960_V2_ReadWriteError(Command); - } - -#ifdef FORCE_RETRY_FAILURE_DEBUG - if (!(++retry_count % 10000)) { - printk("V2 error retry failure test\n"); - normal_completion = false; - DAC960_V2_ReadWriteError(Command); - } -#endif - - if (!DAC960_ProcessCompletedRequest(Command, normal_completion)) { - DAC960_queue_partial_rw(Command); - return; - } - } - else if (CommandType == DAC960_MonitoringCommand) - { - if (Controller->ShutdownMonitoringTimer) - return; - if (IOCTLOpcode == DAC960_V2_GetControllerInfo) - { - DAC960_V2_ControllerInfo_T *NewControllerInfo = - Controller->V2.NewControllerInformation; - DAC960_V2_ControllerInfo_T *ControllerInfo = - &Controller->V2.ControllerInformation; - Controller->LogicalDriveCount = - NewControllerInfo->LogicalDevicesPresent; - Controller->V2.NeedLogicalDeviceInformation = true; - Controller->V2.NeedPhysicalDeviceInformation = true; - Controller->V2.StartLogicalDeviceInformationScan = true; - Controller->V2.StartPhysicalDeviceInformationScan = true; - Controller->MonitoringAlertMode = - (NewControllerInfo->LogicalDevicesCritical > 0 || - NewControllerInfo->LogicalDevicesOffline > 0 || - NewControllerInfo->PhysicalDisksCritical > 0 || - NewControllerInfo->PhysicalDisksOffline > 0); - memcpy(ControllerInfo, NewControllerInfo, - sizeof(DAC960_V2_ControllerInfo_T)); - } - else if (IOCTLOpcode == DAC960_V2_GetEvent) - { - if (CommandStatus == DAC960_V2_NormalCompletion) { - DAC960_V2_ReportEvent(Controller, Controller->V2.Event); - } - Controller->V2.NextEventSequenceNumber++; - } - else if (IOCTLOpcode == DAC960_V2_GetPhysicalDeviceInfoValid && - CommandStatus == DAC960_V2_NormalCompletion) - { - DAC960_V2_PhysicalDeviceInfo_T *NewPhysicalDeviceInfo = - Controller->V2.NewPhysicalDeviceInformation; - unsigned int PhysicalDeviceIndex = Controller->V2.PhysicalDeviceIndex; - DAC960_V2_PhysicalDeviceInfo_T *PhysicalDeviceInfo = - Controller->V2.PhysicalDeviceInformation[PhysicalDeviceIndex]; - DAC960_SCSI_Inquiry_UnitSerialNumber_T *InquiryUnitSerialNumber = - Controller->V2.InquiryUnitSerialNumber[PhysicalDeviceIndex]; - unsigned int DeviceIndex; - while (PhysicalDeviceInfo != NULL && - (NewPhysicalDeviceInfo->Channel > - PhysicalDeviceInfo->Channel || - (NewPhysicalDeviceInfo->Channel == - PhysicalDeviceInfo->Channel && - (NewPhysicalDeviceInfo->TargetID > - PhysicalDeviceInfo->TargetID || - (NewPhysicalDeviceInfo->TargetID == - PhysicalDeviceInfo->TargetID && - NewPhysicalDeviceInfo->LogicalUnit > - PhysicalDeviceInfo->LogicalUnit))))) - { - DAC960_Critical("Physical Device %d:%d No Longer Exists\n", - Controller, - PhysicalDeviceInfo->Channel, - PhysicalDeviceInfo->TargetID); - Controller->V2.PhysicalDeviceInformation - [PhysicalDeviceIndex] = NULL; - Controller->V2.InquiryUnitSerialNumber - [PhysicalDeviceIndex] = NULL; - kfree(PhysicalDeviceInfo); - kfree(InquiryUnitSerialNumber); - for (DeviceIndex = PhysicalDeviceIndex; - DeviceIndex < DAC960_V2_MaxPhysicalDevices - 1; - DeviceIndex++) - { - Controller->V2.PhysicalDeviceInformation[DeviceIndex] = - Controller->V2.PhysicalDeviceInformation[DeviceIndex+1]; - Controller->V2.InquiryUnitSerialNumber[DeviceIndex] = - Controller->V2.InquiryUnitSerialNumber[DeviceIndex+1]; - } - Controller->V2.PhysicalDeviceInformation - [DAC960_V2_MaxPhysicalDevices-1] = NULL; - Controller->V2.InquiryUnitSerialNumber - [DAC960_V2_MaxPhysicalDevices-1] = NULL; - PhysicalDeviceInfo = - Controller->V2.PhysicalDeviceInformation[PhysicalDeviceIndex]; - InquiryUnitSerialNumber = - Controller->V2.InquiryUnitSerialNumber[PhysicalDeviceIndex]; - } - if (PhysicalDeviceInfo == NULL || - (NewPhysicalDeviceInfo->Channel != - PhysicalDeviceInfo->Channel) || - (NewPhysicalDeviceInfo->TargetID != - PhysicalDeviceInfo->TargetID) || - (NewPhysicalDeviceInfo->LogicalUnit != - PhysicalDeviceInfo->LogicalUnit)) - { - PhysicalDeviceInfo = - kmalloc(sizeof(DAC960_V2_PhysicalDeviceInfo_T), GFP_ATOMIC); - InquiryUnitSerialNumber = - kmalloc(sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T), - GFP_ATOMIC); - if (InquiryUnitSerialNumber == NULL || - PhysicalDeviceInfo == NULL) - { - kfree(InquiryUnitSerialNumber); - InquiryUnitSerialNumber = NULL; - kfree(PhysicalDeviceInfo); - PhysicalDeviceInfo = NULL; - } - DAC960_Critical("Physical Device %d:%d Now Exists%s\n", - Controller, - NewPhysicalDeviceInfo->Channel, - NewPhysicalDeviceInfo->TargetID, - (PhysicalDeviceInfo != NULL - ? "" : " - Allocation Failed")); - if (PhysicalDeviceInfo != NULL) - { - memset(PhysicalDeviceInfo, 0, - sizeof(DAC960_V2_PhysicalDeviceInfo_T)); - PhysicalDeviceInfo->PhysicalDeviceState = - DAC960_V2_Device_InvalidState; - memset(InquiryUnitSerialNumber, 0, - sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T)); - InquiryUnitSerialNumber->PeripheralDeviceType = 0x1F; - for (DeviceIndex = DAC960_V2_MaxPhysicalDevices - 1; - DeviceIndex > PhysicalDeviceIndex; - DeviceIndex--) - { - Controller->V2.PhysicalDeviceInformation[DeviceIndex] = - Controller->V2.PhysicalDeviceInformation[DeviceIndex-1]; - Controller->V2.InquiryUnitSerialNumber[DeviceIndex] = - Controller->V2.InquiryUnitSerialNumber[DeviceIndex-1]; - } - Controller->V2.PhysicalDeviceInformation - [PhysicalDeviceIndex] = - PhysicalDeviceInfo; - Controller->V2.InquiryUnitSerialNumber - [PhysicalDeviceIndex] = - InquiryUnitSerialNumber; - Controller->V2.NeedDeviceSerialNumberInformation = true; - } - } - if (PhysicalDeviceInfo != NULL) - { - if (NewPhysicalDeviceInfo->PhysicalDeviceState != - PhysicalDeviceInfo->PhysicalDeviceState) - DAC960_Critical( - "Physical Device %d:%d is now %s\n", Controller, - NewPhysicalDeviceInfo->Channel, - NewPhysicalDeviceInfo->TargetID, - (NewPhysicalDeviceInfo->PhysicalDeviceState - == DAC960_V2_Device_Online - ? "ONLINE" - : NewPhysicalDeviceInfo->PhysicalDeviceState - == DAC960_V2_Device_Rebuild - ? "REBUILD" - : NewPhysicalDeviceInfo->PhysicalDeviceState - == DAC960_V2_Device_Missing - ? "MISSING" - : NewPhysicalDeviceInfo->PhysicalDeviceState - == DAC960_V2_Device_Critical - ? "CRITICAL" - : NewPhysicalDeviceInfo->PhysicalDeviceState - == DAC960_V2_Device_Dead - ? "DEAD" - : NewPhysicalDeviceInfo->PhysicalDeviceState - == DAC960_V2_Device_SuspectedDead - ? "SUSPECTED-DEAD" - : NewPhysicalDeviceInfo->PhysicalDeviceState - == DAC960_V2_Device_CommandedOffline - ? "COMMANDED-OFFLINE" - : NewPhysicalDeviceInfo->PhysicalDeviceState - == DAC960_V2_Device_Standby - ? "STANDBY" : "UNKNOWN")); - if ((NewPhysicalDeviceInfo->ParityErrors != - PhysicalDeviceInfo->ParityErrors) || - (NewPhysicalDeviceInfo->SoftErrors != - PhysicalDeviceInfo->SoftErrors) || - (NewPhysicalDeviceInfo->HardErrors != - PhysicalDeviceInfo->HardErrors) || - (NewPhysicalDeviceInfo->MiscellaneousErrors != - PhysicalDeviceInfo->MiscellaneousErrors) || - (NewPhysicalDeviceInfo->CommandTimeouts != - PhysicalDeviceInfo->CommandTimeouts) || - (NewPhysicalDeviceInfo->Retries != - PhysicalDeviceInfo->Retries) || - (NewPhysicalDeviceInfo->Aborts != - PhysicalDeviceInfo->Aborts) || - (NewPhysicalDeviceInfo->PredictedFailuresDetected != - PhysicalDeviceInfo->PredictedFailuresDetected)) - { - DAC960_Critical("Physical Device %d:%d Errors: " - "Parity = %d, Soft = %d, " - "Hard = %d, Misc = %d\n", - Controller, - NewPhysicalDeviceInfo->Channel, - NewPhysicalDeviceInfo->TargetID, - NewPhysicalDeviceInfo->ParityErrors, - NewPhysicalDeviceInfo->SoftErrors, - NewPhysicalDeviceInfo->HardErrors, - NewPhysicalDeviceInfo->MiscellaneousErrors); - DAC960_Critical("Physical Device %d:%d Errors: " - "Timeouts = %d, Retries = %d, " - "Aborts = %d, Predicted = %d\n", - Controller, - NewPhysicalDeviceInfo->Channel, - NewPhysicalDeviceInfo->TargetID, - NewPhysicalDeviceInfo->CommandTimeouts, - NewPhysicalDeviceInfo->Retries, - NewPhysicalDeviceInfo->Aborts, - NewPhysicalDeviceInfo - ->PredictedFailuresDetected); - } - if ((PhysicalDeviceInfo->PhysicalDeviceState - == DAC960_V2_Device_Dead || - PhysicalDeviceInfo->PhysicalDeviceState - == DAC960_V2_Device_InvalidState) && - NewPhysicalDeviceInfo->PhysicalDeviceState - != DAC960_V2_Device_Dead) - Controller->V2.NeedDeviceSerialNumberInformation = true; - memcpy(PhysicalDeviceInfo, NewPhysicalDeviceInfo, - sizeof(DAC960_V2_PhysicalDeviceInfo_T)); - } - NewPhysicalDeviceInfo->LogicalUnit++; - Controller->V2.PhysicalDeviceIndex++; - } - else if (IOCTLOpcode == DAC960_V2_GetPhysicalDeviceInfoValid) - { - unsigned int DeviceIndex; - for (DeviceIndex = Controller->V2.PhysicalDeviceIndex; - DeviceIndex < DAC960_V2_MaxPhysicalDevices; - DeviceIndex++) - { - DAC960_V2_PhysicalDeviceInfo_T *PhysicalDeviceInfo = - Controller->V2.PhysicalDeviceInformation[DeviceIndex]; - DAC960_SCSI_Inquiry_UnitSerialNumber_T *InquiryUnitSerialNumber = - Controller->V2.InquiryUnitSerialNumber[DeviceIndex]; - if (PhysicalDeviceInfo == NULL) break; - DAC960_Critical("Physical Device %d:%d No Longer Exists\n", - Controller, - PhysicalDeviceInfo->Channel, - PhysicalDeviceInfo->TargetID); - Controller->V2.PhysicalDeviceInformation[DeviceIndex] = NULL; - Controller->V2.InquiryUnitSerialNumber[DeviceIndex] = NULL; - kfree(PhysicalDeviceInfo); - kfree(InquiryUnitSerialNumber); - } - Controller->V2.NeedPhysicalDeviceInformation = false; - } - else if (IOCTLOpcode == DAC960_V2_GetLogicalDeviceInfoValid && - CommandStatus == DAC960_V2_NormalCompletion) - { - DAC960_V2_LogicalDeviceInfo_T *NewLogicalDeviceInfo = - Controller->V2.NewLogicalDeviceInformation; - unsigned short LogicalDeviceNumber = - NewLogicalDeviceInfo->LogicalDeviceNumber; - DAC960_V2_LogicalDeviceInfo_T *LogicalDeviceInfo = - Controller->V2.LogicalDeviceInformation[LogicalDeviceNumber]; - if (LogicalDeviceInfo == NULL) - { - DAC960_V2_PhysicalDevice_T PhysicalDevice; - PhysicalDevice.Controller = 0; - PhysicalDevice.Channel = NewLogicalDeviceInfo->Channel; - PhysicalDevice.TargetID = NewLogicalDeviceInfo->TargetID; - PhysicalDevice.LogicalUnit = NewLogicalDeviceInfo->LogicalUnit; - Controller->V2.LogicalDriveToVirtualDevice[LogicalDeviceNumber] = - PhysicalDevice; - LogicalDeviceInfo = kmalloc(sizeof(DAC960_V2_LogicalDeviceInfo_T), - GFP_ATOMIC); - Controller->V2.LogicalDeviceInformation[LogicalDeviceNumber] = - LogicalDeviceInfo; - DAC960_Critical("Logical Drive %d (/dev/rd/c%dd%d) " - "Now Exists%s\n", Controller, - LogicalDeviceNumber, - Controller->ControllerNumber, - LogicalDeviceNumber, - (LogicalDeviceInfo != NULL - ? "" : " - Allocation Failed")); - if (LogicalDeviceInfo != NULL) - { - memset(LogicalDeviceInfo, 0, - sizeof(DAC960_V2_LogicalDeviceInfo_T)); - DAC960_ComputeGenericDiskInfo(Controller); - } - } - if (LogicalDeviceInfo != NULL) - { - unsigned long LogicalDeviceSize = - NewLogicalDeviceInfo->ConfigurableDeviceSize; - if (NewLogicalDeviceInfo->LogicalDeviceState != - LogicalDeviceInfo->LogicalDeviceState) - DAC960_Critical("Logical Drive %d (/dev/rd/c%dd%d) " - "is now %s\n", Controller, - LogicalDeviceNumber, - Controller->ControllerNumber, - LogicalDeviceNumber, - (NewLogicalDeviceInfo->LogicalDeviceState - == DAC960_V2_LogicalDevice_Online - ? "ONLINE" - : NewLogicalDeviceInfo->LogicalDeviceState - == DAC960_V2_LogicalDevice_Critical - ? "CRITICAL" : "OFFLINE")); - if ((NewLogicalDeviceInfo->SoftErrors != - LogicalDeviceInfo->SoftErrors) || - (NewLogicalDeviceInfo->CommandsFailed != - LogicalDeviceInfo->CommandsFailed) || - (NewLogicalDeviceInfo->DeferredWriteErrors != - LogicalDeviceInfo->DeferredWriteErrors)) - DAC960_Critical("Logical Drive %d (/dev/rd/c%dd%d) Errors: " - "Soft = %d, Failed = %d, Deferred Write = %d\n", - Controller, LogicalDeviceNumber, - Controller->ControllerNumber, - LogicalDeviceNumber, - NewLogicalDeviceInfo->SoftErrors, - NewLogicalDeviceInfo->CommandsFailed, - NewLogicalDeviceInfo->DeferredWriteErrors); - if (NewLogicalDeviceInfo->ConsistencyCheckInProgress) - DAC960_V2_ReportProgress(Controller, - "Consistency Check", - LogicalDeviceNumber, - NewLogicalDeviceInfo - ->ConsistencyCheckBlockNumber, - LogicalDeviceSize); - else if (NewLogicalDeviceInfo->RebuildInProgress) - DAC960_V2_ReportProgress(Controller, - "Rebuild", - LogicalDeviceNumber, - NewLogicalDeviceInfo - ->RebuildBlockNumber, - LogicalDeviceSize); - else if (NewLogicalDeviceInfo->BackgroundInitializationInProgress) - DAC960_V2_ReportProgress(Controller, - "Background Initialization", - LogicalDeviceNumber, - NewLogicalDeviceInfo - ->BackgroundInitializationBlockNumber, - LogicalDeviceSize); - else if (NewLogicalDeviceInfo->ForegroundInitializationInProgress) - DAC960_V2_ReportProgress(Controller, - "Foreground Initialization", - LogicalDeviceNumber, - NewLogicalDeviceInfo - ->ForegroundInitializationBlockNumber, - LogicalDeviceSize); - else if (NewLogicalDeviceInfo->DataMigrationInProgress) - DAC960_V2_ReportProgress(Controller, - "Data Migration", - LogicalDeviceNumber, - NewLogicalDeviceInfo - ->DataMigrationBlockNumber, - LogicalDeviceSize); - else if (NewLogicalDeviceInfo->PatrolOperationInProgress) - DAC960_V2_ReportProgress(Controller, - "Patrol Operation", - LogicalDeviceNumber, - NewLogicalDeviceInfo - ->PatrolOperationBlockNumber, - LogicalDeviceSize); - if (LogicalDeviceInfo->BackgroundInitializationInProgress && - !NewLogicalDeviceInfo->BackgroundInitializationInProgress) - DAC960_Progress("Logical Drive %d (/dev/rd/c%dd%d) " - "Background Initialization %s\n", - Controller, - LogicalDeviceNumber, - Controller->ControllerNumber, - LogicalDeviceNumber, - (NewLogicalDeviceInfo->LogicalDeviceControl - .LogicalDeviceInitialized - ? "Completed" : "Failed")); - memcpy(LogicalDeviceInfo, NewLogicalDeviceInfo, - sizeof(DAC960_V2_LogicalDeviceInfo_T)); - } - Controller->V2.LogicalDriveFoundDuringScan - [LogicalDeviceNumber] = true; - NewLogicalDeviceInfo->LogicalDeviceNumber++; - } - else if (IOCTLOpcode == DAC960_V2_GetLogicalDeviceInfoValid) - { - int LogicalDriveNumber; - for (LogicalDriveNumber = 0; - LogicalDriveNumber < DAC960_MaxLogicalDrives; - LogicalDriveNumber++) - { - DAC960_V2_LogicalDeviceInfo_T *LogicalDeviceInfo = - Controller->V2.LogicalDeviceInformation[LogicalDriveNumber]; - if (LogicalDeviceInfo == NULL || - Controller->V2.LogicalDriveFoundDuringScan - [LogicalDriveNumber]) - continue; - DAC960_Critical("Logical Drive %d (/dev/rd/c%dd%d) " - "No Longer Exists\n", Controller, - LogicalDriveNumber, - Controller->ControllerNumber, - LogicalDriveNumber); - Controller->V2.LogicalDeviceInformation - [LogicalDriveNumber] = NULL; - kfree(LogicalDeviceInfo); - Controller->LogicalDriveInitiallyAccessible - [LogicalDriveNumber] = false; - DAC960_ComputeGenericDiskInfo(Controller); - } - Controller->V2.NeedLogicalDeviceInformation = false; - } - else if (CommandOpcode == DAC960_V2_SCSI_10_Passthru) - { - DAC960_SCSI_Inquiry_UnitSerialNumber_T *InquiryUnitSerialNumber = - Controller->V2.InquiryUnitSerialNumber[Controller->V2.PhysicalDeviceIndex - 1]; - - if (CommandStatus != DAC960_V2_NormalCompletion) { - memset(InquiryUnitSerialNumber, - 0, sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T)); - InquiryUnitSerialNumber->PeripheralDeviceType = 0x1F; - } else - memcpy(InquiryUnitSerialNumber, - Controller->V2.NewInquiryUnitSerialNumber, - sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T)); - - Controller->V2.NeedDeviceSerialNumberInformation = false; - } - - if (Controller->V2.HealthStatusBuffer->NextEventSequenceNumber - - Controller->V2.NextEventSequenceNumber > 0) - { - CommandMailbox->GetEvent.CommandOpcode = DAC960_V2_IOCTL; - CommandMailbox->GetEvent.DataTransferSize = sizeof(DAC960_V2_Event_T); - CommandMailbox->GetEvent.EventSequenceNumberHigh16 = - Controller->V2.NextEventSequenceNumber >> 16; - CommandMailbox->GetEvent.ControllerNumber = 0; - CommandMailbox->GetEvent.IOCTL_Opcode = - DAC960_V2_GetEvent; - CommandMailbox->GetEvent.EventSequenceNumberLow16 = - Controller->V2.NextEventSequenceNumber & 0xFFFF; - CommandMailbox->GetEvent.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentDataPointer = - Controller->V2.EventDMA; - CommandMailbox->GetEvent.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentByteCount = - CommandMailbox->GetEvent.DataTransferSize; - DAC960_QueueCommand(Command); - return; - } - if (Controller->V2.NeedPhysicalDeviceInformation) - { - if (Controller->V2.NeedDeviceSerialNumberInformation) - { - DAC960_SCSI_Inquiry_UnitSerialNumber_T *InquiryUnitSerialNumber = - Controller->V2.NewInquiryUnitSerialNumber; - InquiryUnitSerialNumber->PeripheralDeviceType = 0x1F; - - DAC960_V2_ConstructNewUnitSerialNumber(Controller, CommandMailbox, - Controller->V2.NewPhysicalDeviceInformation->Channel, - Controller->V2.NewPhysicalDeviceInformation->TargetID, - Controller->V2.NewPhysicalDeviceInformation->LogicalUnit - 1); - - - DAC960_QueueCommand(Command); - return; - } - if (Controller->V2.StartPhysicalDeviceInformationScan) - { - Controller->V2.PhysicalDeviceIndex = 0; - Controller->V2.NewPhysicalDeviceInformation->Channel = 0; - Controller->V2.NewPhysicalDeviceInformation->TargetID = 0; - Controller->V2.NewPhysicalDeviceInformation->LogicalUnit = 0; - Controller->V2.StartPhysicalDeviceInformationScan = false; - } - CommandMailbox->PhysicalDeviceInfo.CommandOpcode = DAC960_V2_IOCTL; - CommandMailbox->PhysicalDeviceInfo.DataTransferSize = - sizeof(DAC960_V2_PhysicalDeviceInfo_T); - CommandMailbox->PhysicalDeviceInfo.PhysicalDevice.LogicalUnit = - Controller->V2.NewPhysicalDeviceInformation->LogicalUnit; - CommandMailbox->PhysicalDeviceInfo.PhysicalDevice.TargetID = - Controller->V2.NewPhysicalDeviceInformation->TargetID; - CommandMailbox->PhysicalDeviceInfo.PhysicalDevice.Channel = - Controller->V2.NewPhysicalDeviceInformation->Channel; - CommandMailbox->PhysicalDeviceInfo.IOCTL_Opcode = - DAC960_V2_GetPhysicalDeviceInfoValid; - CommandMailbox->PhysicalDeviceInfo.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentDataPointer = - Controller->V2.NewPhysicalDeviceInformationDMA; - CommandMailbox->PhysicalDeviceInfo.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentByteCount = - CommandMailbox->PhysicalDeviceInfo.DataTransferSize; - DAC960_QueueCommand(Command); - return; - } - if (Controller->V2.NeedLogicalDeviceInformation) - { - if (Controller->V2.StartLogicalDeviceInformationScan) - { - int LogicalDriveNumber; - for (LogicalDriveNumber = 0; - LogicalDriveNumber < DAC960_MaxLogicalDrives; - LogicalDriveNumber++) - Controller->V2.LogicalDriveFoundDuringScan - [LogicalDriveNumber] = false; - Controller->V2.NewLogicalDeviceInformation->LogicalDeviceNumber = 0; - Controller->V2.StartLogicalDeviceInformationScan = false; - } - CommandMailbox->LogicalDeviceInfo.CommandOpcode = DAC960_V2_IOCTL; - CommandMailbox->LogicalDeviceInfo.DataTransferSize = - sizeof(DAC960_V2_LogicalDeviceInfo_T); - CommandMailbox->LogicalDeviceInfo.LogicalDevice.LogicalDeviceNumber = - Controller->V2.NewLogicalDeviceInformation->LogicalDeviceNumber; - CommandMailbox->LogicalDeviceInfo.IOCTL_Opcode = - DAC960_V2_GetLogicalDeviceInfoValid; - CommandMailbox->LogicalDeviceInfo.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentDataPointer = - Controller->V2.NewLogicalDeviceInformationDMA; - CommandMailbox->LogicalDeviceInfo.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentByteCount = - CommandMailbox->LogicalDeviceInfo.DataTransferSize; - DAC960_QueueCommand(Command); - return; - } - Controller->MonitoringTimerCount++; - Controller->MonitoringTimer.expires = - jiffies + DAC960_HealthStatusMonitoringInterval; - add_timer(&Controller->MonitoringTimer); - } - if (CommandType == DAC960_ImmediateCommand) - { - complete(Command->Completion); - Command->Completion = NULL; - return; - } - if (CommandType == DAC960_QueuedCommand) - { - DAC960_V2_KernelCommand_T *KernelCommand = Command->V2.KernelCommand; - KernelCommand->CommandStatus = CommandStatus; - KernelCommand->RequestSenseLength = Command->V2.RequestSenseLength; - KernelCommand->DataTransferLength = Command->V2.DataTransferResidue; - Command->V2.KernelCommand = NULL; - DAC960_DeallocateCommand(Command); - KernelCommand->CompletionFunction(KernelCommand); - return; - } - /* - Queue a Status Monitoring Command to the Controller using the just - completed Command if one was deferred previously due to lack of a - free Command when the Monitoring Timer Function was called. - */ - if (Controller->MonitoringCommandDeferred) - { - Controller->MonitoringCommandDeferred = false; - DAC960_V2_QueueMonitoringCommand(Command); - return; - } - /* - Deallocate the Command. - */ - DAC960_DeallocateCommand(Command); - /* - Wake up any processes waiting on a free Command. - */ - wake_up(&Controller->CommandWaitQueue); -} - -/* - DAC960_GEM_InterruptHandler handles hardware interrupts from DAC960 GEM Series - Controllers. -*/ - -static irqreturn_t DAC960_GEM_InterruptHandler(int IRQ_Channel, - void *DeviceIdentifier) -{ - DAC960_Controller_T *Controller = DeviceIdentifier; - void __iomem *ControllerBaseAddress = Controller->BaseAddress; - DAC960_V2_StatusMailbox_T *NextStatusMailbox; - unsigned long flags; - - spin_lock_irqsave(&Controller->queue_lock, flags); - DAC960_GEM_AcknowledgeInterrupt(ControllerBaseAddress); - NextStatusMailbox = Controller->V2.NextStatusMailbox; - while (NextStatusMailbox->Fields.CommandIdentifier > 0) - { - DAC960_V2_CommandIdentifier_T CommandIdentifier = - NextStatusMailbox->Fields.CommandIdentifier; - DAC960_Command_T *Command = Controller->Commands[CommandIdentifier-1]; - Command->V2.CommandStatus = NextStatusMailbox->Fields.CommandStatus; - Command->V2.RequestSenseLength = - NextStatusMailbox->Fields.RequestSenseLength; - Command->V2.DataTransferResidue = - NextStatusMailbox->Fields.DataTransferResidue; - NextStatusMailbox->Words[0] = 0; - if (++NextStatusMailbox > Controller->V2.LastStatusMailbox) - NextStatusMailbox = Controller->V2.FirstStatusMailbox; - DAC960_V2_ProcessCompletedCommand(Command); - } - Controller->V2.NextStatusMailbox = NextStatusMailbox; - /* - Attempt to remove additional I/O Requests from the Controller's - I/O Request Queue and queue them to the Controller. - */ - DAC960_ProcessRequest(Controller); - spin_unlock_irqrestore(&Controller->queue_lock, flags); - return IRQ_HANDLED; -} - -/* - DAC960_BA_InterruptHandler handles hardware interrupts from DAC960 BA Series - Controllers. -*/ - -static irqreturn_t DAC960_BA_InterruptHandler(int IRQ_Channel, - void *DeviceIdentifier) -{ - DAC960_Controller_T *Controller = DeviceIdentifier; - void __iomem *ControllerBaseAddress = Controller->BaseAddress; - DAC960_V2_StatusMailbox_T *NextStatusMailbox; - unsigned long flags; - - spin_lock_irqsave(&Controller->queue_lock, flags); - DAC960_BA_AcknowledgeInterrupt(ControllerBaseAddress); - NextStatusMailbox = Controller->V2.NextStatusMailbox; - while (NextStatusMailbox->Fields.CommandIdentifier > 0) - { - DAC960_V2_CommandIdentifier_T CommandIdentifier = - NextStatusMailbox->Fields.CommandIdentifier; - DAC960_Command_T *Command = Controller->Commands[CommandIdentifier-1]; - Command->V2.CommandStatus = NextStatusMailbox->Fields.CommandStatus; - Command->V2.RequestSenseLength = - NextStatusMailbox->Fields.RequestSenseLength; - Command->V2.DataTransferResidue = - NextStatusMailbox->Fields.DataTransferResidue; - NextStatusMailbox->Words[0] = 0; - if (++NextStatusMailbox > Controller->V2.LastStatusMailbox) - NextStatusMailbox = Controller->V2.FirstStatusMailbox; - DAC960_V2_ProcessCompletedCommand(Command); - } - Controller->V2.NextStatusMailbox = NextStatusMailbox; - /* - Attempt to remove additional I/O Requests from the Controller's - I/O Request Queue and queue them to the Controller. - */ - DAC960_ProcessRequest(Controller); - spin_unlock_irqrestore(&Controller->queue_lock, flags); - return IRQ_HANDLED; -} - - -/* - DAC960_LP_InterruptHandler handles hardware interrupts from DAC960 LP Series - Controllers. -*/ - -static irqreturn_t DAC960_LP_InterruptHandler(int IRQ_Channel, - void *DeviceIdentifier) -{ - DAC960_Controller_T *Controller = DeviceIdentifier; - void __iomem *ControllerBaseAddress = Controller->BaseAddress; - DAC960_V2_StatusMailbox_T *NextStatusMailbox; - unsigned long flags; - - spin_lock_irqsave(&Controller->queue_lock, flags); - DAC960_LP_AcknowledgeInterrupt(ControllerBaseAddress); - NextStatusMailbox = Controller->V2.NextStatusMailbox; - while (NextStatusMailbox->Fields.CommandIdentifier > 0) - { - DAC960_V2_CommandIdentifier_T CommandIdentifier = - NextStatusMailbox->Fields.CommandIdentifier; - DAC960_Command_T *Command = Controller->Commands[CommandIdentifier-1]; - Command->V2.CommandStatus = NextStatusMailbox->Fields.CommandStatus; - Command->V2.RequestSenseLength = - NextStatusMailbox->Fields.RequestSenseLength; - Command->V2.DataTransferResidue = - NextStatusMailbox->Fields.DataTransferResidue; - NextStatusMailbox->Words[0] = 0; - if (++NextStatusMailbox > Controller->V2.LastStatusMailbox) - NextStatusMailbox = Controller->V2.FirstStatusMailbox; - DAC960_V2_ProcessCompletedCommand(Command); - } - Controller->V2.NextStatusMailbox = NextStatusMailbox; - /* - Attempt to remove additional I/O Requests from the Controller's - I/O Request Queue and queue them to the Controller. - */ - DAC960_ProcessRequest(Controller); - spin_unlock_irqrestore(&Controller->queue_lock, flags); - return IRQ_HANDLED; -} - - -/* - DAC960_LA_InterruptHandler handles hardware interrupts from DAC960 LA Series - Controllers. -*/ - -static irqreturn_t DAC960_LA_InterruptHandler(int IRQ_Channel, - void *DeviceIdentifier) -{ - DAC960_Controller_T *Controller = DeviceIdentifier; - void __iomem *ControllerBaseAddress = Controller->BaseAddress; - DAC960_V1_StatusMailbox_T *NextStatusMailbox; - unsigned long flags; - - spin_lock_irqsave(&Controller->queue_lock, flags); - DAC960_LA_AcknowledgeInterrupt(ControllerBaseAddress); - NextStatusMailbox = Controller->V1.NextStatusMailbox; - while (NextStatusMailbox->Fields.Valid) - { - DAC960_V1_CommandIdentifier_T CommandIdentifier = - NextStatusMailbox->Fields.CommandIdentifier; - DAC960_Command_T *Command = Controller->Commands[CommandIdentifier-1]; - Command->V1.CommandStatus = NextStatusMailbox->Fields.CommandStatus; - NextStatusMailbox->Word = 0; - if (++NextStatusMailbox > Controller->V1.LastStatusMailbox) - NextStatusMailbox = Controller->V1.FirstStatusMailbox; - DAC960_V1_ProcessCompletedCommand(Command); - } - Controller->V1.NextStatusMailbox = NextStatusMailbox; - /* - Attempt to remove additional I/O Requests from the Controller's - I/O Request Queue and queue them to the Controller. - */ - DAC960_ProcessRequest(Controller); - spin_unlock_irqrestore(&Controller->queue_lock, flags); - return IRQ_HANDLED; -} - - -/* - DAC960_PG_InterruptHandler handles hardware interrupts from DAC960 PG Series - Controllers. -*/ - -static irqreturn_t DAC960_PG_InterruptHandler(int IRQ_Channel, - void *DeviceIdentifier) -{ - DAC960_Controller_T *Controller = DeviceIdentifier; - void __iomem *ControllerBaseAddress = Controller->BaseAddress; - DAC960_V1_StatusMailbox_T *NextStatusMailbox; - unsigned long flags; - - spin_lock_irqsave(&Controller->queue_lock, flags); - DAC960_PG_AcknowledgeInterrupt(ControllerBaseAddress); - NextStatusMailbox = Controller->V1.NextStatusMailbox; - while (NextStatusMailbox->Fields.Valid) - { - DAC960_V1_CommandIdentifier_T CommandIdentifier = - NextStatusMailbox->Fields.CommandIdentifier; - DAC960_Command_T *Command = Controller->Commands[CommandIdentifier-1]; - Command->V1.CommandStatus = NextStatusMailbox->Fields.CommandStatus; - NextStatusMailbox->Word = 0; - if (++NextStatusMailbox > Controller->V1.LastStatusMailbox) - NextStatusMailbox = Controller->V1.FirstStatusMailbox; - DAC960_V1_ProcessCompletedCommand(Command); - } - Controller->V1.NextStatusMailbox = NextStatusMailbox; - /* - Attempt to remove additional I/O Requests from the Controller's - I/O Request Queue and queue them to the Controller. - */ - DAC960_ProcessRequest(Controller); - spin_unlock_irqrestore(&Controller->queue_lock, flags); - return IRQ_HANDLED; -} - - -/* - DAC960_PD_InterruptHandler handles hardware interrupts from DAC960 PD Series - Controllers. -*/ - -static irqreturn_t DAC960_PD_InterruptHandler(int IRQ_Channel, - void *DeviceIdentifier) -{ - DAC960_Controller_T *Controller = DeviceIdentifier; - void __iomem *ControllerBaseAddress = Controller->BaseAddress; - unsigned long flags; - - spin_lock_irqsave(&Controller->queue_lock, flags); - while (DAC960_PD_StatusAvailableP(ControllerBaseAddress)) - { - DAC960_V1_CommandIdentifier_T CommandIdentifier = - DAC960_PD_ReadStatusCommandIdentifier(ControllerBaseAddress); - DAC960_Command_T *Command = Controller->Commands[CommandIdentifier-1]; - Command->V1.CommandStatus = - DAC960_PD_ReadStatusRegister(ControllerBaseAddress); - DAC960_PD_AcknowledgeInterrupt(ControllerBaseAddress); - DAC960_PD_AcknowledgeStatus(ControllerBaseAddress); - DAC960_V1_ProcessCompletedCommand(Command); - } - /* - Attempt to remove additional I/O Requests from the Controller's - I/O Request Queue and queue them to the Controller. - */ - DAC960_ProcessRequest(Controller); - spin_unlock_irqrestore(&Controller->queue_lock, flags); - return IRQ_HANDLED; -} - - -/* - DAC960_P_InterruptHandler handles hardware interrupts from DAC960 P Series - Controllers. - - Translations of DAC960_V1_Enquiry and DAC960_V1_GetDeviceState rely - on the data having been placed into DAC960_Controller_T, rather than - an arbitrary buffer. -*/ - -static irqreturn_t DAC960_P_InterruptHandler(int IRQ_Channel, - void *DeviceIdentifier) -{ - DAC960_Controller_T *Controller = DeviceIdentifier; - void __iomem *ControllerBaseAddress = Controller->BaseAddress; - unsigned long flags; - - spin_lock_irqsave(&Controller->queue_lock, flags); - while (DAC960_PD_StatusAvailableP(ControllerBaseAddress)) - { - DAC960_V1_CommandIdentifier_T CommandIdentifier = - DAC960_PD_ReadStatusCommandIdentifier(ControllerBaseAddress); - DAC960_Command_T *Command = Controller->Commands[CommandIdentifier-1]; - DAC960_V1_CommandMailbox_T *CommandMailbox = &Command->V1.CommandMailbox; - DAC960_V1_CommandOpcode_T CommandOpcode = - CommandMailbox->Common.CommandOpcode; - Command->V1.CommandStatus = - DAC960_PD_ReadStatusRegister(ControllerBaseAddress); - DAC960_PD_AcknowledgeInterrupt(ControllerBaseAddress); - DAC960_PD_AcknowledgeStatus(ControllerBaseAddress); - switch (CommandOpcode) - { - case DAC960_V1_Enquiry_Old: - Command->V1.CommandMailbox.Common.CommandOpcode = DAC960_V1_Enquiry; - DAC960_P_To_PD_TranslateEnquiry(Controller->V1.NewEnquiry); - break; - case DAC960_V1_GetDeviceState_Old: - Command->V1.CommandMailbox.Common.CommandOpcode = - DAC960_V1_GetDeviceState; - DAC960_P_To_PD_TranslateDeviceState(Controller->V1.NewDeviceState); - break; - case DAC960_V1_Read_Old: - Command->V1.CommandMailbox.Common.CommandOpcode = DAC960_V1_Read; - DAC960_P_To_PD_TranslateReadWriteCommand(CommandMailbox); - break; - case DAC960_V1_Write_Old: - Command->V1.CommandMailbox.Common.CommandOpcode = DAC960_V1_Write; - DAC960_P_To_PD_TranslateReadWriteCommand(CommandMailbox); - break; - case DAC960_V1_ReadWithScatterGather_Old: - Command->V1.CommandMailbox.Common.CommandOpcode = - DAC960_V1_ReadWithScatterGather; - DAC960_P_To_PD_TranslateReadWriteCommand(CommandMailbox); - break; - case DAC960_V1_WriteWithScatterGather_Old: - Command->V1.CommandMailbox.Common.CommandOpcode = - DAC960_V1_WriteWithScatterGather; - DAC960_P_To_PD_TranslateReadWriteCommand(CommandMailbox); - break; - default: - break; - } - DAC960_V1_ProcessCompletedCommand(Command); - } - /* - Attempt to remove additional I/O Requests from the Controller's - I/O Request Queue and queue them to the Controller. - */ - DAC960_ProcessRequest(Controller); - spin_unlock_irqrestore(&Controller->queue_lock, flags); - return IRQ_HANDLED; -} - - -/* - DAC960_V1_QueueMonitoringCommand queues a Monitoring Command to DAC960 V1 - Firmware Controllers. -*/ - -static void DAC960_V1_QueueMonitoringCommand(DAC960_Command_T *Command) -{ - DAC960_Controller_T *Controller = Command->Controller; - DAC960_V1_CommandMailbox_T *CommandMailbox = &Command->V1.CommandMailbox; - DAC960_V1_ClearCommand(Command); - Command->CommandType = DAC960_MonitoringCommand; - CommandMailbox->Type3.CommandOpcode = DAC960_V1_Enquiry; - CommandMailbox->Type3.BusAddress = Controller->V1.NewEnquiryDMA; - DAC960_QueueCommand(Command); -} - - -/* - DAC960_V2_QueueMonitoringCommand queues a Monitoring Command to DAC960 V2 - Firmware Controllers. -*/ - -static void DAC960_V2_QueueMonitoringCommand(DAC960_Command_T *Command) -{ - DAC960_Controller_T *Controller = Command->Controller; - DAC960_V2_CommandMailbox_T *CommandMailbox = &Command->V2.CommandMailbox; - DAC960_V2_ClearCommand(Command); - Command->CommandType = DAC960_MonitoringCommand; - CommandMailbox->ControllerInfo.CommandOpcode = DAC960_V2_IOCTL; - CommandMailbox->ControllerInfo.CommandControlBits - .DataTransferControllerToHost = true; - CommandMailbox->ControllerInfo.CommandControlBits - .NoAutoRequestSense = true; - CommandMailbox->ControllerInfo.DataTransferSize = - sizeof(DAC960_V2_ControllerInfo_T); - CommandMailbox->ControllerInfo.ControllerNumber = 0; - CommandMailbox->ControllerInfo.IOCTL_Opcode = DAC960_V2_GetControllerInfo; - CommandMailbox->ControllerInfo.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentDataPointer = - Controller->V2.NewControllerInformationDMA; - CommandMailbox->ControllerInfo.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentByteCount = - CommandMailbox->ControllerInfo.DataTransferSize; - DAC960_QueueCommand(Command); -} - - -/* - DAC960_MonitoringTimerFunction is the timer function for monitoring - the status of DAC960 Controllers. -*/ - -static void DAC960_MonitoringTimerFunction(struct timer_list *t) -{ - DAC960_Controller_T *Controller = from_timer(Controller, t, MonitoringTimer); - DAC960_Command_T *Command; - unsigned long flags; - - if (Controller->FirmwareType == DAC960_V1_Controller) - { - spin_lock_irqsave(&Controller->queue_lock, flags); - /* - Queue a Status Monitoring Command to Controller. - */ - Command = DAC960_AllocateCommand(Controller); - if (Command != NULL) - DAC960_V1_QueueMonitoringCommand(Command); - else Controller->MonitoringCommandDeferred = true; - spin_unlock_irqrestore(&Controller->queue_lock, flags); - } - else - { - DAC960_V2_ControllerInfo_T *ControllerInfo = - &Controller->V2.ControllerInformation; - unsigned int StatusChangeCounter = - Controller->V2.HealthStatusBuffer->StatusChangeCounter; - bool ForceMonitoringCommand = false; - if (time_after(jiffies, Controller->SecondaryMonitoringTime - + DAC960_SecondaryMonitoringInterval)) - { - int LogicalDriveNumber; - for (LogicalDriveNumber = 0; - LogicalDriveNumber < DAC960_MaxLogicalDrives; - LogicalDriveNumber++) - { - DAC960_V2_LogicalDeviceInfo_T *LogicalDeviceInfo = - Controller->V2.LogicalDeviceInformation[LogicalDriveNumber]; - if (LogicalDeviceInfo == NULL) continue; - if (!LogicalDeviceInfo->LogicalDeviceControl - .LogicalDeviceInitialized) - { - ForceMonitoringCommand = true; - break; - } - } - Controller->SecondaryMonitoringTime = jiffies; - } - if (StatusChangeCounter == Controller->V2.StatusChangeCounter && - Controller->V2.HealthStatusBuffer->NextEventSequenceNumber - == Controller->V2.NextEventSequenceNumber && - (ControllerInfo->BackgroundInitializationsActive + - ControllerInfo->LogicalDeviceInitializationsActive + - ControllerInfo->PhysicalDeviceInitializationsActive + - ControllerInfo->ConsistencyChecksActive + - ControllerInfo->RebuildsActive + - ControllerInfo->OnlineExpansionsActive == 0 || - time_before(jiffies, Controller->PrimaryMonitoringTime - + DAC960_MonitoringTimerInterval)) && - !ForceMonitoringCommand) - { - Controller->MonitoringTimer.expires = - jiffies + DAC960_HealthStatusMonitoringInterval; - add_timer(&Controller->MonitoringTimer); - return; - } - Controller->V2.StatusChangeCounter = StatusChangeCounter; - Controller->PrimaryMonitoringTime = jiffies; - - spin_lock_irqsave(&Controller->queue_lock, flags); - /* - Queue a Status Monitoring Command to Controller. - */ - Command = DAC960_AllocateCommand(Controller); - if (Command != NULL) - DAC960_V2_QueueMonitoringCommand(Command); - else Controller->MonitoringCommandDeferred = true; - spin_unlock_irqrestore(&Controller->queue_lock, flags); - /* - Wake up any processes waiting on a Health Status Buffer change. - */ - wake_up(&Controller->HealthStatusWaitQueue); - } -} - -/* - DAC960_CheckStatusBuffer verifies that there is room to hold ByteCount - additional bytes in the Combined Status Buffer and grows the buffer if - necessary. It returns true if there is enough room and false otherwise. -*/ - -static bool DAC960_CheckStatusBuffer(DAC960_Controller_T *Controller, - unsigned int ByteCount) -{ - unsigned char *NewStatusBuffer; - if (Controller->InitialStatusLength + 1 + - Controller->CurrentStatusLength + ByteCount + 1 <= - Controller->CombinedStatusBufferLength) - return true; - if (Controller->CombinedStatusBufferLength == 0) - { - unsigned int NewStatusBufferLength = DAC960_InitialStatusBufferSize; - while (NewStatusBufferLength < ByteCount) - NewStatusBufferLength *= 2; - Controller->CombinedStatusBuffer = kmalloc(NewStatusBufferLength, - GFP_ATOMIC); - if (Controller->CombinedStatusBuffer == NULL) return false; - Controller->CombinedStatusBufferLength = NewStatusBufferLength; - return true; - } - NewStatusBuffer = kmalloc_array(2, Controller->CombinedStatusBufferLength, - GFP_ATOMIC); - if (NewStatusBuffer == NULL) - { - DAC960_Warning("Unable to expand Combined Status Buffer - Truncating\n", - Controller); - return false; - } - memcpy(NewStatusBuffer, Controller->CombinedStatusBuffer, - Controller->CombinedStatusBufferLength); - kfree(Controller->CombinedStatusBuffer); - Controller->CombinedStatusBuffer = NewStatusBuffer; - Controller->CombinedStatusBufferLength *= 2; - Controller->CurrentStatusBuffer = - &NewStatusBuffer[Controller->InitialStatusLength + 1]; - return true; -} - - -/* - DAC960_Message prints Driver Messages. -*/ - -static void DAC960_Message(DAC960_MessageLevel_T MessageLevel, - unsigned char *Format, - DAC960_Controller_T *Controller, - ...) -{ - static unsigned char Buffer[DAC960_LineBufferSize]; - static bool BeginningOfLine = true; - va_list Arguments; - int Length = 0; - va_start(Arguments, Controller); - Length = vsprintf(Buffer, Format, Arguments); - va_end(Arguments); - if (Controller == NULL) - printk("%sDAC960#%d: %s", DAC960_MessageLevelMap[MessageLevel], - DAC960_ControllerCount, Buffer); - else if (MessageLevel == DAC960_AnnounceLevel || - MessageLevel == DAC960_InfoLevel) - { - if (!Controller->ControllerInitialized) - { - if (DAC960_CheckStatusBuffer(Controller, Length)) - { - strcpy(&Controller->CombinedStatusBuffer - [Controller->InitialStatusLength], - Buffer); - Controller->InitialStatusLength += Length; - Controller->CurrentStatusBuffer = - &Controller->CombinedStatusBuffer - [Controller->InitialStatusLength + 1]; - } - if (MessageLevel == DAC960_AnnounceLevel) - { - static int AnnouncementLines = 0; - if (++AnnouncementLines <= 2) - printk("%sDAC960: %s", DAC960_MessageLevelMap[MessageLevel], - Buffer); - } - else - { - if (BeginningOfLine) - { - if (Buffer[0] != '\n' || Length > 1) - printk("%sDAC960#%d: %s", - DAC960_MessageLevelMap[MessageLevel], - Controller->ControllerNumber, Buffer); - } - else printk("%s", Buffer); - } - } - else if (DAC960_CheckStatusBuffer(Controller, Length)) - { - strcpy(&Controller->CurrentStatusBuffer[ - Controller->CurrentStatusLength], Buffer); - Controller->CurrentStatusLength += Length; - } - } - else if (MessageLevel == DAC960_ProgressLevel) - { - strcpy(Controller->ProgressBuffer, Buffer); - Controller->ProgressBufferLength = Length; - if (Controller->EphemeralProgressMessage) - { - if (time_after_eq(jiffies, Controller->LastProgressReportTime - + DAC960_ProgressReportingInterval)) - { - printk("%sDAC960#%d: %s", DAC960_MessageLevelMap[MessageLevel], - Controller->ControllerNumber, Buffer); - Controller->LastProgressReportTime = jiffies; - } - } - else printk("%sDAC960#%d: %s", DAC960_MessageLevelMap[MessageLevel], - Controller->ControllerNumber, Buffer); - } - else if (MessageLevel == DAC960_UserCriticalLevel) - { - strcpy(&Controller->UserStatusBuffer[Controller->UserStatusLength], - Buffer); - Controller->UserStatusLength += Length; - if (Buffer[0] != '\n' || Length > 1) - printk("%sDAC960#%d: %s", DAC960_MessageLevelMap[MessageLevel], - Controller->ControllerNumber, Buffer); - } - else - { - if (BeginningOfLine) - printk("%sDAC960#%d: %s", DAC960_MessageLevelMap[MessageLevel], - Controller->ControllerNumber, Buffer); - else printk("%s", Buffer); - } - BeginningOfLine = (Buffer[Length-1] == '\n'); -} - - -/* - DAC960_ParsePhysicalDevice parses spaces followed by a Physical Device - Channel:TargetID specification from a User Command string. It updates - Channel and TargetID and returns true on success and false on failure. -*/ - -static bool DAC960_ParsePhysicalDevice(DAC960_Controller_T *Controller, - char *UserCommandString, - unsigned char *Channel, - unsigned char *TargetID) -{ - char *NewUserCommandString = UserCommandString; - unsigned long XChannel, XTargetID; - while (*UserCommandString == ' ') UserCommandString++; - if (UserCommandString == NewUserCommandString) - return false; - XChannel = simple_strtoul(UserCommandString, &NewUserCommandString, 10); - if (NewUserCommandString == UserCommandString || - *NewUserCommandString != ':' || - XChannel >= Controller->Channels) - return false; - UserCommandString = ++NewUserCommandString; - XTargetID = simple_strtoul(UserCommandString, &NewUserCommandString, 10); - if (NewUserCommandString == UserCommandString || - *NewUserCommandString != '\0' || - XTargetID >= Controller->Targets) - return false; - *Channel = XChannel; - *TargetID = XTargetID; - return true; -} - - -/* - DAC960_ParseLogicalDrive parses spaces followed by a Logical Drive Number - specification from a User Command string. It updates LogicalDriveNumber and - returns true on success and false on failure. -*/ - -static bool DAC960_ParseLogicalDrive(DAC960_Controller_T *Controller, - char *UserCommandString, - unsigned char *LogicalDriveNumber) -{ - char *NewUserCommandString = UserCommandString; - unsigned long XLogicalDriveNumber; - while (*UserCommandString == ' ') UserCommandString++; - if (UserCommandString == NewUserCommandString) - return false; - XLogicalDriveNumber = - simple_strtoul(UserCommandString, &NewUserCommandString, 10); - if (NewUserCommandString == UserCommandString || - *NewUserCommandString != '\0' || - XLogicalDriveNumber > DAC960_MaxLogicalDrives - 1) - return false; - *LogicalDriveNumber = XLogicalDriveNumber; - return true; -} - - -/* - DAC960_V1_SetDeviceState sets the Device State for a Physical Device for - DAC960 V1 Firmware Controllers. -*/ - -static void DAC960_V1_SetDeviceState(DAC960_Controller_T *Controller, - DAC960_Command_T *Command, - unsigned char Channel, - unsigned char TargetID, - DAC960_V1_PhysicalDeviceState_T - DeviceState, - const unsigned char *DeviceStateString) -{ - DAC960_V1_CommandMailbox_T *CommandMailbox = &Command->V1.CommandMailbox; - CommandMailbox->Type3D.CommandOpcode = DAC960_V1_StartDevice; - CommandMailbox->Type3D.Channel = Channel; - CommandMailbox->Type3D.TargetID = TargetID; - CommandMailbox->Type3D.DeviceState = DeviceState; - CommandMailbox->Type3D.Modifier = 0; - DAC960_ExecuteCommand(Command); - switch (Command->V1.CommandStatus) - { - case DAC960_V1_NormalCompletion: - DAC960_UserCritical("%s of Physical Device %d:%d Succeeded\n", Controller, - DeviceStateString, Channel, TargetID); - break; - case DAC960_V1_UnableToStartDevice: - DAC960_UserCritical("%s of Physical Device %d:%d Failed - " - "Unable to Start Device\n", Controller, - DeviceStateString, Channel, TargetID); - break; - case DAC960_V1_NoDeviceAtAddress: - DAC960_UserCritical("%s of Physical Device %d:%d Failed - " - "No Device at Address\n", Controller, - DeviceStateString, Channel, TargetID); - break; - case DAC960_V1_InvalidChannelOrTargetOrModifier: - DAC960_UserCritical("%s of Physical Device %d:%d Failed - " - "Invalid Channel or Target or Modifier\n", - Controller, DeviceStateString, Channel, TargetID); - break; - case DAC960_V1_ChannelBusy: - DAC960_UserCritical("%s of Physical Device %d:%d Failed - " - "Channel Busy\n", Controller, - DeviceStateString, Channel, TargetID); - break; - default: - DAC960_UserCritical("%s of Physical Device %d:%d Failed - " - "Unexpected Status %04X\n", Controller, - DeviceStateString, Channel, TargetID, - Command->V1.CommandStatus); - break; - } -} - - -/* - DAC960_V1_ExecuteUserCommand executes a User Command for DAC960 V1 Firmware - Controllers. -*/ - -static bool DAC960_V1_ExecuteUserCommand(DAC960_Controller_T *Controller, - unsigned char *UserCommand) -{ - DAC960_Command_T *Command; - DAC960_V1_CommandMailbox_T *CommandMailbox; - unsigned long flags; - unsigned char Channel, TargetID, LogicalDriveNumber; - - spin_lock_irqsave(&Controller->queue_lock, flags); - while ((Command = DAC960_AllocateCommand(Controller)) == NULL) - DAC960_WaitForCommand(Controller); - spin_unlock_irqrestore(&Controller->queue_lock, flags); - Controller->UserStatusLength = 0; - DAC960_V1_ClearCommand(Command); - Command->CommandType = DAC960_ImmediateCommand; - CommandMailbox = &Command->V1.CommandMailbox; - if (strcmp(UserCommand, "flush-cache") == 0) - { - CommandMailbox->Type3.CommandOpcode = DAC960_V1_Flush; - DAC960_ExecuteCommand(Command); - DAC960_UserCritical("Cache Flush Completed\n", Controller); - } - else if (strncmp(UserCommand, "kill", 4) == 0 && - DAC960_ParsePhysicalDevice(Controller, &UserCommand[4], - &Channel, &TargetID)) - { - DAC960_V1_DeviceState_T *DeviceState = - &Controller->V1.DeviceState[Channel][TargetID]; - if (DeviceState->Present && - DeviceState->DeviceType == DAC960_V1_DiskType && - DeviceState->DeviceState != DAC960_V1_Device_Dead) - DAC960_V1_SetDeviceState(Controller, Command, Channel, TargetID, - DAC960_V1_Device_Dead, "Kill"); - else DAC960_UserCritical("Kill of Physical Device %d:%d Illegal\n", - Controller, Channel, TargetID); - } - else if (strncmp(UserCommand, "make-online", 11) == 0 && - DAC960_ParsePhysicalDevice(Controller, &UserCommand[11], - &Channel, &TargetID)) - { - DAC960_V1_DeviceState_T *DeviceState = - &Controller->V1.DeviceState[Channel][TargetID]; - if (DeviceState->Present && - DeviceState->DeviceType == DAC960_V1_DiskType && - DeviceState->DeviceState == DAC960_V1_Device_Dead) - DAC960_V1_SetDeviceState(Controller, Command, Channel, TargetID, - DAC960_V1_Device_Online, "Make Online"); - else DAC960_UserCritical("Make Online of Physical Device %d:%d Illegal\n", - Controller, Channel, TargetID); - - } - else if (strncmp(UserCommand, "make-standby", 12) == 0 && - DAC960_ParsePhysicalDevice(Controller, &UserCommand[12], - &Channel, &TargetID)) - { - DAC960_V1_DeviceState_T *DeviceState = - &Controller->V1.DeviceState[Channel][TargetID]; - if (DeviceState->Present && - DeviceState->DeviceType == DAC960_V1_DiskType && - DeviceState->DeviceState == DAC960_V1_Device_Dead) - DAC960_V1_SetDeviceState(Controller, Command, Channel, TargetID, - DAC960_V1_Device_Standby, "Make Standby"); - else DAC960_UserCritical("Make Standby of Physical " - "Device %d:%d Illegal\n", - Controller, Channel, TargetID); - } - else if (strncmp(UserCommand, "rebuild", 7) == 0 && - DAC960_ParsePhysicalDevice(Controller, &UserCommand[7], - &Channel, &TargetID)) - { - CommandMailbox->Type3D.CommandOpcode = DAC960_V1_RebuildAsync; - CommandMailbox->Type3D.Channel = Channel; - CommandMailbox->Type3D.TargetID = TargetID; - DAC960_ExecuteCommand(Command); - switch (Command->V1.CommandStatus) - { - case DAC960_V1_NormalCompletion: - DAC960_UserCritical("Rebuild of Physical Device %d:%d Initiated\n", - Controller, Channel, TargetID); - break; - case DAC960_V1_AttemptToRebuildOnlineDrive: - DAC960_UserCritical("Rebuild of Physical Device %d:%d Failed - " - "Attempt to Rebuild Online or " - "Unresponsive Drive\n", - Controller, Channel, TargetID); - break; - case DAC960_V1_NewDiskFailedDuringRebuild: - DAC960_UserCritical("Rebuild of Physical Device %d:%d Failed - " - "New Disk Failed During Rebuild\n", - Controller, Channel, TargetID); - break; - case DAC960_V1_InvalidDeviceAddress: - DAC960_UserCritical("Rebuild of Physical Device %d:%d Failed - " - "Invalid Device Address\n", - Controller, Channel, TargetID); - break; - case DAC960_V1_RebuildOrCheckAlreadyInProgress: - DAC960_UserCritical("Rebuild of Physical Device %d:%d Failed - " - "Rebuild or Consistency Check Already " - "in Progress\n", Controller, Channel, TargetID); - break; - default: - DAC960_UserCritical("Rebuild of Physical Device %d:%d Failed - " - "Unexpected Status %04X\n", Controller, - Channel, TargetID, Command->V1.CommandStatus); - break; - } - } - else if (strncmp(UserCommand, "check-consistency", 17) == 0 && - DAC960_ParseLogicalDrive(Controller, &UserCommand[17], - &LogicalDriveNumber)) - { - CommandMailbox->Type3C.CommandOpcode = DAC960_V1_CheckConsistencyAsync; - CommandMailbox->Type3C.LogicalDriveNumber = LogicalDriveNumber; - CommandMailbox->Type3C.AutoRestore = true; - DAC960_ExecuteCommand(Command); - switch (Command->V1.CommandStatus) - { - case DAC960_V1_NormalCompletion: - DAC960_UserCritical("Consistency Check of Logical Drive %d " - "(/dev/rd/c%dd%d) Initiated\n", - Controller, LogicalDriveNumber, - Controller->ControllerNumber, - LogicalDriveNumber); - break; - case DAC960_V1_DependentDiskIsDead: - DAC960_UserCritical("Consistency Check of Logical Drive %d " - "(/dev/rd/c%dd%d) Failed - " - "Dependent Physical Device is DEAD\n", - Controller, LogicalDriveNumber, - Controller->ControllerNumber, - LogicalDriveNumber); - break; - case DAC960_V1_InvalidOrNonredundantLogicalDrive: - DAC960_UserCritical("Consistency Check of Logical Drive %d " - "(/dev/rd/c%dd%d) Failed - " - "Invalid or Nonredundant Logical Drive\n", - Controller, LogicalDriveNumber, - Controller->ControllerNumber, - LogicalDriveNumber); - break; - case DAC960_V1_RebuildOrCheckAlreadyInProgress: - DAC960_UserCritical("Consistency Check of Logical Drive %d " - "(/dev/rd/c%dd%d) Failed - Rebuild or " - "Consistency Check Already in Progress\n", - Controller, LogicalDriveNumber, - Controller->ControllerNumber, - LogicalDriveNumber); - break; - default: - DAC960_UserCritical("Consistency Check of Logical Drive %d " - "(/dev/rd/c%dd%d) Failed - " - "Unexpected Status %04X\n", - Controller, LogicalDriveNumber, - Controller->ControllerNumber, - LogicalDriveNumber, Command->V1.CommandStatus); - break; - } - } - else if (strcmp(UserCommand, "cancel-rebuild") == 0 || - strcmp(UserCommand, "cancel-consistency-check") == 0) - { - /* - the OldRebuildRateConstant is never actually used - once its value is retrieved from the controller. - */ - unsigned char *OldRebuildRateConstant; - dma_addr_t OldRebuildRateConstantDMA; - - OldRebuildRateConstant = pci_alloc_consistent( Controller->PCIDevice, - sizeof(char), &OldRebuildRateConstantDMA); - if (OldRebuildRateConstant == NULL) { - DAC960_UserCritical("Cancellation of Rebuild or " - "Consistency Check Failed - " - "Out of Memory", - Controller); - goto failure; - } - CommandMailbox->Type3R.CommandOpcode = DAC960_V1_RebuildControl; - CommandMailbox->Type3R.RebuildRateConstant = 0xFF; - CommandMailbox->Type3R.BusAddress = OldRebuildRateConstantDMA; - DAC960_ExecuteCommand(Command); - switch (Command->V1.CommandStatus) - { - case DAC960_V1_NormalCompletion: - DAC960_UserCritical("Rebuild or Consistency Check Cancelled\n", - Controller); - break; - default: - DAC960_UserCritical("Cancellation of Rebuild or " - "Consistency Check Failed - " - "Unexpected Status %04X\n", - Controller, Command->V1.CommandStatus); - break; - } -failure: - pci_free_consistent(Controller->PCIDevice, sizeof(char), - OldRebuildRateConstant, OldRebuildRateConstantDMA); - } - else DAC960_UserCritical("Illegal User Command: '%s'\n", - Controller, UserCommand); - - spin_lock_irqsave(&Controller->queue_lock, flags); - DAC960_DeallocateCommand(Command); - spin_unlock_irqrestore(&Controller->queue_lock, flags); - return true; -} - - -/* - DAC960_V2_TranslatePhysicalDevice translates a Physical Device Channel and - TargetID into a Logical Device. It returns true on success and false - on failure. -*/ - -static bool DAC960_V2_TranslatePhysicalDevice(DAC960_Command_T *Command, - unsigned char Channel, - unsigned char TargetID, - unsigned short - *LogicalDeviceNumber) -{ - DAC960_V2_CommandMailbox_T SavedCommandMailbox, *CommandMailbox; - DAC960_Controller_T *Controller = Command->Controller; - - CommandMailbox = &Command->V2.CommandMailbox; - memcpy(&SavedCommandMailbox, CommandMailbox, - sizeof(DAC960_V2_CommandMailbox_T)); - - CommandMailbox->PhysicalDeviceInfo.CommandOpcode = DAC960_V2_IOCTL; - CommandMailbox->PhysicalDeviceInfo.CommandControlBits - .DataTransferControllerToHost = true; - CommandMailbox->PhysicalDeviceInfo.CommandControlBits - .NoAutoRequestSense = true; - CommandMailbox->PhysicalDeviceInfo.DataTransferSize = - sizeof(DAC960_V2_PhysicalToLogicalDevice_T); - CommandMailbox->PhysicalDeviceInfo.PhysicalDevice.TargetID = TargetID; - CommandMailbox->PhysicalDeviceInfo.PhysicalDevice.Channel = Channel; - CommandMailbox->PhysicalDeviceInfo.IOCTL_Opcode = - DAC960_V2_TranslatePhysicalToLogicalDevice; - CommandMailbox->Common.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentDataPointer = - Controller->V2.PhysicalToLogicalDeviceDMA; - CommandMailbox->Common.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentByteCount = - CommandMailbox->Common.DataTransferSize; - - DAC960_ExecuteCommand(Command); - *LogicalDeviceNumber = Controller->V2.PhysicalToLogicalDevice->LogicalDeviceNumber; - - memcpy(CommandMailbox, &SavedCommandMailbox, - sizeof(DAC960_V2_CommandMailbox_T)); - return (Command->V2.CommandStatus == DAC960_V2_NormalCompletion); -} - - -/* - DAC960_V2_ExecuteUserCommand executes a User Command for DAC960 V2 Firmware - Controllers. -*/ - -static bool DAC960_V2_ExecuteUserCommand(DAC960_Controller_T *Controller, - unsigned char *UserCommand) -{ - DAC960_Command_T *Command; - DAC960_V2_CommandMailbox_T *CommandMailbox; - unsigned long flags; - unsigned char Channel, TargetID, LogicalDriveNumber; - unsigned short LogicalDeviceNumber; - - spin_lock_irqsave(&Controller->queue_lock, flags); - while ((Command = DAC960_AllocateCommand(Controller)) == NULL) - DAC960_WaitForCommand(Controller); - spin_unlock_irqrestore(&Controller->queue_lock, flags); - Controller->UserStatusLength = 0; - DAC960_V2_ClearCommand(Command); - Command->CommandType = DAC960_ImmediateCommand; - CommandMailbox = &Command->V2.CommandMailbox; - CommandMailbox->Common.CommandOpcode = DAC960_V2_IOCTL; - CommandMailbox->Common.CommandControlBits.DataTransferControllerToHost = true; - CommandMailbox->Common.CommandControlBits.NoAutoRequestSense = true; - if (strcmp(UserCommand, "flush-cache") == 0) - { - CommandMailbox->DeviceOperation.IOCTL_Opcode = DAC960_V2_PauseDevice; - CommandMailbox->DeviceOperation.OperationDevice = - DAC960_V2_RAID_Controller; - DAC960_ExecuteCommand(Command); - DAC960_UserCritical("Cache Flush Completed\n", Controller); - } - else if (strncmp(UserCommand, "kill", 4) == 0 && - DAC960_ParsePhysicalDevice(Controller, &UserCommand[4], - &Channel, &TargetID) && - DAC960_V2_TranslatePhysicalDevice(Command, Channel, TargetID, - &LogicalDeviceNumber)) - { - CommandMailbox->SetDeviceState.LogicalDevice.LogicalDeviceNumber = - LogicalDeviceNumber; - CommandMailbox->SetDeviceState.IOCTL_Opcode = - DAC960_V2_SetDeviceState; - CommandMailbox->SetDeviceState.DeviceState.PhysicalDeviceState = - DAC960_V2_Device_Dead; - DAC960_ExecuteCommand(Command); - DAC960_UserCritical("Kill of Physical Device %d:%d %s\n", - Controller, Channel, TargetID, - (Command->V2.CommandStatus - == DAC960_V2_NormalCompletion - ? "Succeeded" : "Failed")); - } - else if (strncmp(UserCommand, "make-online", 11) == 0 && - DAC960_ParsePhysicalDevice(Controller, &UserCommand[11], - &Channel, &TargetID) && - DAC960_V2_TranslatePhysicalDevice(Command, Channel, TargetID, - &LogicalDeviceNumber)) - { - CommandMailbox->SetDeviceState.LogicalDevice.LogicalDeviceNumber = - LogicalDeviceNumber; - CommandMailbox->SetDeviceState.IOCTL_Opcode = - DAC960_V2_SetDeviceState; - CommandMailbox->SetDeviceState.DeviceState.PhysicalDeviceState = - DAC960_V2_Device_Online; - DAC960_ExecuteCommand(Command); - DAC960_UserCritical("Make Online of Physical Device %d:%d %s\n", - Controller, Channel, TargetID, - (Command->V2.CommandStatus - == DAC960_V2_NormalCompletion - ? "Succeeded" : "Failed")); - } - else if (strncmp(UserCommand, "make-standby", 12) == 0 && - DAC960_ParsePhysicalDevice(Controller, &UserCommand[12], - &Channel, &TargetID) && - DAC960_V2_TranslatePhysicalDevice(Command, Channel, TargetID, - &LogicalDeviceNumber)) - { - CommandMailbox->SetDeviceState.LogicalDevice.LogicalDeviceNumber = - LogicalDeviceNumber; - CommandMailbox->SetDeviceState.IOCTL_Opcode = - DAC960_V2_SetDeviceState; - CommandMailbox->SetDeviceState.DeviceState.PhysicalDeviceState = - DAC960_V2_Device_Standby; - DAC960_ExecuteCommand(Command); - DAC960_UserCritical("Make Standby of Physical Device %d:%d %s\n", - Controller, Channel, TargetID, - (Command->V2.CommandStatus - == DAC960_V2_NormalCompletion - ? "Succeeded" : "Failed")); - } - else if (strncmp(UserCommand, "rebuild", 7) == 0 && - DAC960_ParsePhysicalDevice(Controller, &UserCommand[7], - &Channel, &TargetID) && - DAC960_V2_TranslatePhysicalDevice(Command, Channel, TargetID, - &LogicalDeviceNumber)) - { - CommandMailbox->LogicalDeviceInfo.LogicalDevice.LogicalDeviceNumber = - LogicalDeviceNumber; - CommandMailbox->LogicalDeviceInfo.IOCTL_Opcode = - DAC960_V2_RebuildDeviceStart; - DAC960_ExecuteCommand(Command); - DAC960_UserCritical("Rebuild of Physical Device %d:%d %s\n", - Controller, Channel, TargetID, - (Command->V2.CommandStatus - == DAC960_V2_NormalCompletion - ? "Initiated" : "Not Initiated")); - } - else if (strncmp(UserCommand, "cancel-rebuild", 14) == 0 && - DAC960_ParsePhysicalDevice(Controller, &UserCommand[14], - &Channel, &TargetID) && - DAC960_V2_TranslatePhysicalDevice(Command, Channel, TargetID, - &LogicalDeviceNumber)) - { - CommandMailbox->LogicalDeviceInfo.LogicalDevice.LogicalDeviceNumber = - LogicalDeviceNumber; - CommandMailbox->LogicalDeviceInfo.IOCTL_Opcode = - DAC960_V2_RebuildDeviceStop; - DAC960_ExecuteCommand(Command); - DAC960_UserCritical("Rebuild of Physical Device %d:%d %s\n", - Controller, Channel, TargetID, - (Command->V2.CommandStatus - == DAC960_V2_NormalCompletion - ? "Cancelled" : "Not Cancelled")); - } - else if (strncmp(UserCommand, "check-consistency", 17) == 0 && - DAC960_ParseLogicalDrive(Controller, &UserCommand[17], - &LogicalDriveNumber)) - { - CommandMailbox->ConsistencyCheck.LogicalDevice.LogicalDeviceNumber = - LogicalDriveNumber; - CommandMailbox->ConsistencyCheck.IOCTL_Opcode = - DAC960_V2_ConsistencyCheckStart; - CommandMailbox->ConsistencyCheck.RestoreConsistency = true; - CommandMailbox->ConsistencyCheck.InitializedAreaOnly = false; - DAC960_ExecuteCommand(Command); - DAC960_UserCritical("Consistency Check of Logical Drive %d " - "(/dev/rd/c%dd%d) %s\n", - Controller, LogicalDriveNumber, - Controller->ControllerNumber, - LogicalDriveNumber, - (Command->V2.CommandStatus - == DAC960_V2_NormalCompletion - ? "Initiated" : "Not Initiated")); - } - else if (strncmp(UserCommand, "cancel-consistency-check", 24) == 0 && - DAC960_ParseLogicalDrive(Controller, &UserCommand[24], - &LogicalDriveNumber)) - { - CommandMailbox->ConsistencyCheck.LogicalDevice.LogicalDeviceNumber = - LogicalDriveNumber; - CommandMailbox->ConsistencyCheck.IOCTL_Opcode = - DAC960_V2_ConsistencyCheckStop; - DAC960_ExecuteCommand(Command); - DAC960_UserCritical("Consistency Check of Logical Drive %d " - "(/dev/rd/c%dd%d) %s\n", - Controller, LogicalDriveNumber, - Controller->ControllerNumber, - LogicalDriveNumber, - (Command->V2.CommandStatus - == DAC960_V2_NormalCompletion - ? "Cancelled" : "Not Cancelled")); - } - else if (strcmp(UserCommand, "perform-discovery") == 0) - { - CommandMailbox->Common.IOCTL_Opcode = DAC960_V2_StartDiscovery; - DAC960_ExecuteCommand(Command); - DAC960_UserCritical("Discovery %s\n", Controller, - (Command->V2.CommandStatus - == DAC960_V2_NormalCompletion - ? "Initiated" : "Not Initiated")); - if (Command->V2.CommandStatus == DAC960_V2_NormalCompletion) - { - CommandMailbox->ControllerInfo.CommandOpcode = DAC960_V2_IOCTL; - CommandMailbox->ControllerInfo.CommandControlBits - .DataTransferControllerToHost = true; - CommandMailbox->ControllerInfo.CommandControlBits - .NoAutoRequestSense = true; - CommandMailbox->ControllerInfo.DataTransferSize = - sizeof(DAC960_V2_ControllerInfo_T); - CommandMailbox->ControllerInfo.ControllerNumber = 0; - CommandMailbox->ControllerInfo.IOCTL_Opcode = - DAC960_V2_GetControllerInfo; - /* - * How does this NOT race with the queued Monitoring - * usage of this structure? - */ - CommandMailbox->ControllerInfo.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentDataPointer = - Controller->V2.NewControllerInformationDMA; - CommandMailbox->ControllerInfo.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentByteCount = - CommandMailbox->ControllerInfo.DataTransferSize; - while (1) { - DAC960_ExecuteCommand(Command); - if (!Controller->V2.NewControllerInformation->PhysicalScanActive) - break; - msleep(1000); - } - DAC960_UserCritical("Discovery Completed\n", Controller); - } - } - else if (strcmp(UserCommand, "suppress-enclosure-messages") == 0) - Controller->SuppressEnclosureMessages = true; - else DAC960_UserCritical("Illegal User Command: '%s'\n", - Controller, UserCommand); - - spin_lock_irqsave(&Controller->queue_lock, flags); - DAC960_DeallocateCommand(Command); - spin_unlock_irqrestore(&Controller->queue_lock, flags); - return true; -} - -static int __maybe_unused dac960_proc_show(struct seq_file *m, void *v) -{ - unsigned char *StatusMessage = "OK\n"; - int ControllerNumber; - for (ControllerNumber = 0; - ControllerNumber < DAC960_ControllerCount; - ControllerNumber++) - { - DAC960_Controller_T *Controller = DAC960_Controllers[ControllerNumber]; - if (Controller == NULL) continue; - if (Controller->MonitoringAlertMode) - { - StatusMessage = "ALERT\n"; - break; - } - } - seq_puts(m, StatusMessage); - return 0; -} - -static int __maybe_unused dac960_initial_status_proc_show(struct seq_file *m, - void *v) -{ - DAC960_Controller_T *Controller = (DAC960_Controller_T *)m->private; - seq_printf(m, "%.*s", Controller->InitialStatusLength, Controller->CombinedStatusBuffer); - return 0; -} - -static int __maybe_unused dac960_current_status_proc_show(struct seq_file *m, - void *v) -{ - DAC960_Controller_T *Controller = (DAC960_Controller_T *) m->private; - unsigned char *StatusMessage = - "No Rebuild or Consistency Check in Progress\n"; - int ProgressMessageLength = strlen(StatusMessage); - if (jiffies != Controller->LastCurrentStatusTime) - { - Controller->CurrentStatusLength = 0; - DAC960_AnnounceDriver(Controller); - DAC960_ReportControllerConfiguration(Controller); - DAC960_ReportDeviceConfiguration(Controller); - if (Controller->ProgressBufferLength > 0) - ProgressMessageLength = Controller->ProgressBufferLength; - if (DAC960_CheckStatusBuffer(Controller, 2 + ProgressMessageLength)) - { - unsigned char *CurrentStatusBuffer = Controller->CurrentStatusBuffer; - CurrentStatusBuffer[Controller->CurrentStatusLength++] = ' '; - CurrentStatusBuffer[Controller->CurrentStatusLength++] = ' '; - if (Controller->ProgressBufferLength > 0) - strcpy(&CurrentStatusBuffer[Controller->CurrentStatusLength], - Controller->ProgressBuffer); - else - strcpy(&CurrentStatusBuffer[Controller->CurrentStatusLength], - StatusMessage); - Controller->CurrentStatusLength += ProgressMessageLength; - } - Controller->LastCurrentStatusTime = jiffies; - } - seq_printf(m, "%.*s", Controller->CurrentStatusLength, Controller->CurrentStatusBuffer); - return 0; -} - -static int dac960_user_command_proc_show(struct seq_file *m, void *v) -{ - DAC960_Controller_T *Controller = (DAC960_Controller_T *)m->private; - - seq_printf(m, "%.*s", Controller->UserStatusLength, Controller->UserStatusBuffer); - return 0; -} - -static int dac960_user_command_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, dac960_user_command_proc_show, PDE_DATA(inode)); -} - -static ssize_t dac960_user_command_proc_write(struct file *file, - const char __user *Buffer, - size_t Count, loff_t *pos) -{ - DAC960_Controller_T *Controller = PDE_DATA(file_inode(file)); - unsigned char CommandBuffer[80]; - int Length; - if (Count > sizeof(CommandBuffer)-1) return -EINVAL; - if (copy_from_user(CommandBuffer, Buffer, Count)) return -EFAULT; - CommandBuffer[Count] = '\0'; - Length = strlen(CommandBuffer); - if (Length > 0 && CommandBuffer[Length-1] == '\n') - CommandBuffer[--Length] = '\0'; - if (Controller->FirmwareType == DAC960_V1_Controller) - return (DAC960_V1_ExecuteUserCommand(Controller, CommandBuffer) - ? Count : -EBUSY); - else - return (DAC960_V2_ExecuteUserCommand(Controller, CommandBuffer) - ? Count : -EBUSY); -} - -static const struct file_operations dac960_user_command_proc_fops = { - .owner = THIS_MODULE, - .open = dac960_user_command_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, - .write = dac960_user_command_proc_write, -}; - -/* - DAC960_CreateProcEntries creates the /proc/rd/... entries for the - DAC960 Driver. -*/ - -static void DAC960_CreateProcEntries(DAC960_Controller_T *Controller) -{ - struct proc_dir_entry *ControllerProcEntry; - - if (DAC960_ProcDirectoryEntry == NULL) { - DAC960_ProcDirectoryEntry = proc_mkdir("rd", NULL); - proc_create_single("status", 0, DAC960_ProcDirectoryEntry, - dac960_proc_show); - } - - snprintf(Controller->ControllerName, sizeof(Controller->ControllerName), - "c%d", Controller->ControllerNumber); - ControllerProcEntry = proc_mkdir(Controller->ControllerName, - DAC960_ProcDirectoryEntry); - proc_create_single_data("initial_status", 0, ControllerProcEntry, - dac960_initial_status_proc_show, Controller); - proc_create_single_data("current_status", 0, ControllerProcEntry, - dac960_current_status_proc_show, Controller); - proc_create_data("user_command", 0600, ControllerProcEntry, &dac960_user_command_proc_fops, Controller); - Controller->ControllerProcEntry = ControllerProcEntry; -} - - -/* - DAC960_DestroyProcEntries destroys the /proc/rd/... entries for the - DAC960 Driver. -*/ - -static void DAC960_DestroyProcEntries(DAC960_Controller_T *Controller) -{ - if (Controller->ControllerProcEntry == NULL) - return; - remove_proc_entry("initial_status", Controller->ControllerProcEntry); - remove_proc_entry("current_status", Controller->ControllerProcEntry); - remove_proc_entry("user_command", Controller->ControllerProcEntry); - remove_proc_entry(Controller->ControllerName, DAC960_ProcDirectoryEntry); - Controller->ControllerProcEntry = NULL; -} - -#ifdef DAC960_GAM_MINOR - -static long DAC960_gam_get_controller_info(DAC960_ControllerInfo_T __user *UserSpaceControllerInfo) -{ - DAC960_ControllerInfo_T ControllerInfo; - DAC960_Controller_T *Controller; - int ControllerNumber; - long ErrorCode; - - if (UserSpaceControllerInfo == NULL) - ErrorCode = -EINVAL; - else ErrorCode = get_user(ControllerNumber, - &UserSpaceControllerInfo->ControllerNumber); - if (ErrorCode != 0) - goto out; - ErrorCode = -ENXIO; - if (ControllerNumber < 0 || - ControllerNumber > DAC960_ControllerCount - 1) { - goto out; - } - Controller = DAC960_Controllers[ControllerNumber]; - if (Controller == NULL) - goto out; - memset(&ControllerInfo, 0, sizeof(DAC960_ControllerInfo_T)); - ControllerInfo.ControllerNumber = ControllerNumber; - ControllerInfo.FirmwareType = Controller->FirmwareType; - ControllerInfo.Channels = Controller->Channels; - ControllerInfo.Targets = Controller->Targets; - ControllerInfo.PCI_Bus = Controller->Bus; - ControllerInfo.PCI_Device = Controller->Device; - ControllerInfo.PCI_Function = Controller->Function; - ControllerInfo.IRQ_Channel = Controller->IRQ_Channel; - ControllerInfo.PCI_Address = Controller->PCI_Address; - strcpy(ControllerInfo.ModelName, Controller->ModelName); - strcpy(ControllerInfo.FirmwareVersion, Controller->FirmwareVersion); - ErrorCode = (copy_to_user(UserSpaceControllerInfo, &ControllerInfo, - sizeof(DAC960_ControllerInfo_T)) ? -EFAULT : 0); -out: - return ErrorCode; -} - -static long DAC960_gam_v1_execute_command(DAC960_V1_UserCommand_T __user *UserSpaceUserCommand) -{ - DAC960_V1_UserCommand_T UserCommand; - DAC960_Controller_T *Controller; - DAC960_Command_T *Command = NULL; - DAC960_V1_CommandOpcode_T CommandOpcode; - DAC960_V1_CommandStatus_T CommandStatus; - DAC960_V1_DCDB_T DCDB; - DAC960_V1_DCDB_T *DCDB_IOBUF = NULL; - dma_addr_t DCDB_IOBUFDMA; - unsigned long flags; - int ControllerNumber, DataTransferLength; - unsigned char *DataTransferBuffer = NULL; - dma_addr_t DataTransferBufferDMA; - long ErrorCode; - - if (UserSpaceUserCommand == NULL) { - ErrorCode = -EINVAL; - goto out; - } - if (copy_from_user(&UserCommand, UserSpaceUserCommand, - sizeof(DAC960_V1_UserCommand_T))) { - ErrorCode = -EFAULT; - goto out; - } - ControllerNumber = UserCommand.ControllerNumber; - ErrorCode = -ENXIO; - if (ControllerNumber < 0 || - ControllerNumber > DAC960_ControllerCount - 1) - goto out; - Controller = DAC960_Controllers[ControllerNumber]; - if (Controller == NULL) - goto out; - ErrorCode = -EINVAL; - if (Controller->FirmwareType != DAC960_V1_Controller) - goto out; - CommandOpcode = UserCommand.CommandMailbox.Common.CommandOpcode; - DataTransferLength = UserCommand.DataTransferLength; - if (CommandOpcode & 0x80) - goto out; - if (CommandOpcode == DAC960_V1_DCDB) - { - if (copy_from_user(&DCDB, UserCommand.DCDB, - sizeof(DAC960_V1_DCDB_T))) { - ErrorCode = -EFAULT; - goto out; - } - if (DCDB.Channel >= DAC960_V1_MaxChannels) - goto out; - if (!((DataTransferLength == 0 && - DCDB.Direction - == DAC960_V1_DCDB_NoDataTransfer) || - (DataTransferLength > 0 && - DCDB.Direction - == DAC960_V1_DCDB_DataTransferDeviceToSystem) || - (DataTransferLength < 0 && - DCDB.Direction - == DAC960_V1_DCDB_DataTransferSystemToDevice))) - goto out; - if (((DCDB.TransferLengthHigh4 << 16) | DCDB.TransferLength) - != abs(DataTransferLength)) - goto out; - DCDB_IOBUF = pci_alloc_consistent(Controller->PCIDevice, - sizeof(DAC960_V1_DCDB_T), &DCDB_IOBUFDMA); - if (DCDB_IOBUF == NULL) { - ErrorCode = -ENOMEM; - goto out; - } - } - ErrorCode = -ENOMEM; - if (DataTransferLength > 0) - { - DataTransferBuffer = pci_zalloc_consistent(Controller->PCIDevice, - DataTransferLength, - &DataTransferBufferDMA); - if (DataTransferBuffer == NULL) - goto out; - } - else if (DataTransferLength < 0) - { - DataTransferBuffer = pci_alloc_consistent(Controller->PCIDevice, - -DataTransferLength, &DataTransferBufferDMA); - if (DataTransferBuffer == NULL) - goto out; - if (copy_from_user(DataTransferBuffer, - UserCommand.DataTransferBuffer, - -DataTransferLength)) { - ErrorCode = -EFAULT; - goto out; - } - } - if (CommandOpcode == DAC960_V1_DCDB) - { - spin_lock_irqsave(&Controller->queue_lock, flags); - while ((Command = DAC960_AllocateCommand(Controller)) == NULL) - DAC960_WaitForCommand(Controller); - while (Controller->V1.DirectCommandActive[DCDB.Channel] - [DCDB.TargetID]) - { - spin_unlock_irq(&Controller->queue_lock); - __wait_event(Controller->CommandWaitQueue, - !Controller->V1.DirectCommandActive - [DCDB.Channel][DCDB.TargetID]); - spin_lock_irq(&Controller->queue_lock); - } - Controller->V1.DirectCommandActive[DCDB.Channel] - [DCDB.TargetID] = true; - spin_unlock_irqrestore(&Controller->queue_lock, flags); - DAC960_V1_ClearCommand(Command); - Command->CommandType = DAC960_ImmediateCommand; - memcpy(&Command->V1.CommandMailbox, &UserCommand.CommandMailbox, - sizeof(DAC960_V1_CommandMailbox_T)); - Command->V1.CommandMailbox.Type3.BusAddress = DCDB_IOBUFDMA; - DCDB.BusAddress = DataTransferBufferDMA; - memcpy(DCDB_IOBUF, &DCDB, sizeof(DAC960_V1_DCDB_T)); - } - else - { - spin_lock_irqsave(&Controller->queue_lock, flags); - while ((Command = DAC960_AllocateCommand(Controller)) == NULL) - DAC960_WaitForCommand(Controller); - spin_unlock_irqrestore(&Controller->queue_lock, flags); - DAC960_V1_ClearCommand(Command); - Command->CommandType = DAC960_ImmediateCommand; - memcpy(&Command->V1.CommandMailbox, &UserCommand.CommandMailbox, - sizeof(DAC960_V1_CommandMailbox_T)); - if (DataTransferBuffer != NULL) - Command->V1.CommandMailbox.Type3.BusAddress = - DataTransferBufferDMA; - } - DAC960_ExecuteCommand(Command); - CommandStatus = Command->V1.CommandStatus; - spin_lock_irqsave(&Controller->queue_lock, flags); - DAC960_DeallocateCommand(Command); - spin_unlock_irqrestore(&Controller->queue_lock, flags); - if (DataTransferLength > 0) - { - if (copy_to_user(UserCommand.DataTransferBuffer, - DataTransferBuffer, DataTransferLength)) { - ErrorCode = -EFAULT; - goto Failure1; - } - } - if (CommandOpcode == DAC960_V1_DCDB) - { - /* - I don't believe Target or Channel in the DCDB_IOBUF - should be any different from the contents of DCDB. - */ - Controller->V1.DirectCommandActive[DCDB.Channel] - [DCDB.TargetID] = false; - if (copy_to_user(UserCommand.DCDB, DCDB_IOBUF, - sizeof(DAC960_V1_DCDB_T))) { - ErrorCode = -EFAULT; - goto Failure1; - } - } - ErrorCode = CommandStatus; - Failure1: - if (DataTransferBuffer != NULL) - pci_free_consistent(Controller->PCIDevice, abs(DataTransferLength), - DataTransferBuffer, DataTransferBufferDMA); - if (DCDB_IOBUF != NULL) - pci_free_consistent(Controller->PCIDevice, sizeof(DAC960_V1_DCDB_T), - DCDB_IOBUF, DCDB_IOBUFDMA); - out: - return ErrorCode; -} - -static long DAC960_gam_v2_execute_command(DAC960_V2_UserCommand_T __user *UserSpaceUserCommand) -{ - DAC960_V2_UserCommand_T UserCommand; - DAC960_Controller_T *Controller; - DAC960_Command_T *Command = NULL; - DAC960_V2_CommandMailbox_T *CommandMailbox; - DAC960_V2_CommandStatus_T CommandStatus; - unsigned long flags; - int ControllerNumber, DataTransferLength; - int DataTransferResidue, RequestSenseLength; - unsigned char *DataTransferBuffer = NULL; - dma_addr_t DataTransferBufferDMA; - unsigned char *RequestSenseBuffer = NULL; - dma_addr_t RequestSenseBufferDMA; - long ErrorCode = -EINVAL; - - if (UserSpaceUserCommand == NULL) - goto out; - if (copy_from_user(&UserCommand, UserSpaceUserCommand, - sizeof(DAC960_V2_UserCommand_T))) { - ErrorCode = -EFAULT; - goto out; - } - ErrorCode = -ENXIO; - ControllerNumber = UserCommand.ControllerNumber; - if (ControllerNumber < 0 || - ControllerNumber > DAC960_ControllerCount - 1) - goto out; - Controller = DAC960_Controllers[ControllerNumber]; - if (Controller == NULL) - goto out; - if (Controller->FirmwareType != DAC960_V2_Controller){ - ErrorCode = -EINVAL; - goto out; - } - DataTransferLength = UserCommand.DataTransferLength; - ErrorCode = -ENOMEM; - if (DataTransferLength > 0) - { - DataTransferBuffer = pci_zalloc_consistent(Controller->PCIDevice, - DataTransferLength, - &DataTransferBufferDMA); - if (DataTransferBuffer == NULL) - goto out; - } - else if (DataTransferLength < 0) - { - DataTransferBuffer = pci_alloc_consistent(Controller->PCIDevice, - -DataTransferLength, &DataTransferBufferDMA); - if (DataTransferBuffer == NULL) - goto out; - if (copy_from_user(DataTransferBuffer, - UserCommand.DataTransferBuffer, - -DataTransferLength)) { - ErrorCode = -EFAULT; - goto Failure2; - } - } - RequestSenseLength = UserCommand.RequestSenseLength; - if (RequestSenseLength > 0) - { - RequestSenseBuffer = pci_zalloc_consistent(Controller->PCIDevice, - RequestSenseLength, - &RequestSenseBufferDMA); - if (RequestSenseBuffer == NULL) - { - ErrorCode = -ENOMEM; - goto Failure2; - } - } - spin_lock_irqsave(&Controller->queue_lock, flags); - while ((Command = DAC960_AllocateCommand(Controller)) == NULL) - DAC960_WaitForCommand(Controller); - spin_unlock_irqrestore(&Controller->queue_lock, flags); - DAC960_V2_ClearCommand(Command); - Command->CommandType = DAC960_ImmediateCommand; - CommandMailbox = &Command->V2.CommandMailbox; - memcpy(CommandMailbox, &UserCommand.CommandMailbox, - sizeof(DAC960_V2_CommandMailbox_T)); - CommandMailbox->Common.CommandControlBits - .AdditionalScatterGatherListMemory = false; - CommandMailbox->Common.CommandControlBits - .NoAutoRequestSense = true; - CommandMailbox->Common.DataTransferSize = 0; - CommandMailbox->Common.DataTransferPageNumber = 0; - memset(&CommandMailbox->Common.DataTransferMemoryAddress, 0, - sizeof(DAC960_V2_DataTransferMemoryAddress_T)); - if (DataTransferLength != 0) - { - if (DataTransferLength > 0) - { - CommandMailbox->Common.CommandControlBits - .DataTransferControllerToHost = true; - CommandMailbox->Common.DataTransferSize = DataTransferLength; - } - else - { - CommandMailbox->Common.CommandControlBits - .DataTransferControllerToHost = false; - CommandMailbox->Common.DataTransferSize = -DataTransferLength; - } - CommandMailbox->Common.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentDataPointer = DataTransferBufferDMA; - CommandMailbox->Common.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentByteCount = - CommandMailbox->Common.DataTransferSize; - } - if (RequestSenseLength > 0) - { - CommandMailbox->Common.CommandControlBits - .NoAutoRequestSense = false; - CommandMailbox->Common.RequestSenseSize = RequestSenseLength; - CommandMailbox->Common.RequestSenseBusAddress = - RequestSenseBufferDMA; - } - DAC960_ExecuteCommand(Command); - CommandStatus = Command->V2.CommandStatus; - RequestSenseLength = Command->V2.RequestSenseLength; - DataTransferResidue = Command->V2.DataTransferResidue; - spin_lock_irqsave(&Controller->queue_lock, flags); - DAC960_DeallocateCommand(Command); - spin_unlock_irqrestore(&Controller->queue_lock, flags); - if (RequestSenseLength > UserCommand.RequestSenseLength) - RequestSenseLength = UserCommand.RequestSenseLength; - if (copy_to_user(&UserSpaceUserCommand->DataTransferLength, - &DataTransferResidue, - sizeof(DataTransferResidue))) { - ErrorCode = -EFAULT; - goto Failure2; - } - if (copy_to_user(&UserSpaceUserCommand->RequestSenseLength, - &RequestSenseLength, sizeof(RequestSenseLength))) { - ErrorCode = -EFAULT; - goto Failure2; - } - if (DataTransferLength > 0) - { - if (copy_to_user(UserCommand.DataTransferBuffer, - DataTransferBuffer, DataTransferLength)) { - ErrorCode = -EFAULT; - goto Failure2; - } - } - if (RequestSenseLength > 0) - { - if (copy_to_user(UserCommand.RequestSenseBuffer, - RequestSenseBuffer, RequestSenseLength)) { - ErrorCode = -EFAULT; - goto Failure2; - } - } - ErrorCode = CommandStatus; - Failure2: - pci_free_consistent(Controller->PCIDevice, abs(DataTransferLength), - DataTransferBuffer, DataTransferBufferDMA); - if (RequestSenseBuffer != NULL) - pci_free_consistent(Controller->PCIDevice, RequestSenseLength, - RequestSenseBuffer, RequestSenseBufferDMA); -out: - return ErrorCode; -} - -static long DAC960_gam_v2_get_health_status(DAC960_V2_GetHealthStatus_T __user *UserSpaceGetHealthStatus) -{ - DAC960_V2_GetHealthStatus_T GetHealthStatus; - DAC960_V2_HealthStatusBuffer_T HealthStatusBuffer; - DAC960_Controller_T *Controller; - int ControllerNumber; - long ErrorCode; - - if (UserSpaceGetHealthStatus == NULL) { - ErrorCode = -EINVAL; - goto out; - } - if (copy_from_user(&GetHealthStatus, UserSpaceGetHealthStatus, - sizeof(DAC960_V2_GetHealthStatus_T))) { - ErrorCode = -EFAULT; - goto out; - } - ErrorCode = -ENXIO; - ControllerNumber = GetHealthStatus.ControllerNumber; - if (ControllerNumber < 0 || - ControllerNumber > DAC960_ControllerCount - 1) - goto out; - Controller = DAC960_Controllers[ControllerNumber]; - if (Controller == NULL) - goto out; - if (Controller->FirmwareType != DAC960_V2_Controller) { - ErrorCode = -EINVAL; - goto out; - } - if (copy_from_user(&HealthStatusBuffer, - GetHealthStatus.HealthStatusBuffer, - sizeof(DAC960_V2_HealthStatusBuffer_T))) { - ErrorCode = -EFAULT; - goto out; - } - ErrorCode = wait_event_interruptible_timeout(Controller->HealthStatusWaitQueue, - !(Controller->V2.HealthStatusBuffer->StatusChangeCounter - == HealthStatusBuffer.StatusChangeCounter && - Controller->V2.HealthStatusBuffer->NextEventSequenceNumber - == HealthStatusBuffer.NextEventSequenceNumber), - DAC960_MonitoringTimerInterval); - if (ErrorCode == -ERESTARTSYS) { - ErrorCode = -EINTR; - goto out; - } - if (copy_to_user(GetHealthStatus.HealthStatusBuffer, - Controller->V2.HealthStatusBuffer, - sizeof(DAC960_V2_HealthStatusBuffer_T))) - ErrorCode = -EFAULT; - else - ErrorCode = 0; - -out: - return ErrorCode; -} - -/* - * DAC960_gam_ioctl is the ioctl function for performing RAID operations. -*/ - -static long DAC960_gam_ioctl(struct file *file, unsigned int Request, - unsigned long Argument) -{ - long ErrorCode = 0; - void __user *argp = (void __user *)Argument; - if (!capable(CAP_SYS_ADMIN)) return -EACCES; - - mutex_lock(&DAC960_mutex); - switch (Request) - { - case DAC960_IOCTL_GET_CONTROLLER_COUNT: - ErrorCode = DAC960_ControllerCount; - break; - case DAC960_IOCTL_GET_CONTROLLER_INFO: - ErrorCode = DAC960_gam_get_controller_info(argp); - break; - case DAC960_IOCTL_V1_EXECUTE_COMMAND: - ErrorCode = DAC960_gam_v1_execute_command(argp); - break; - case DAC960_IOCTL_V2_EXECUTE_COMMAND: - ErrorCode = DAC960_gam_v2_execute_command(argp); - break; - case DAC960_IOCTL_V2_GET_HEALTH_STATUS: - ErrorCode = DAC960_gam_v2_get_health_status(argp); - break; - default: - ErrorCode = -ENOTTY; - } - mutex_unlock(&DAC960_mutex); - return ErrorCode; -} - -static const struct file_operations DAC960_gam_fops = { - .owner = THIS_MODULE, - .unlocked_ioctl = DAC960_gam_ioctl, - .llseek = noop_llseek, -}; - -static struct miscdevice DAC960_gam_dev = { - DAC960_GAM_MINOR, - "dac960_gam", - &DAC960_gam_fops -}; - -static int DAC960_gam_init(void) -{ - int ret; - - ret = misc_register(&DAC960_gam_dev); - if (ret) - printk(KERN_ERR "DAC960_gam: can't misc_register on minor %d\n", DAC960_GAM_MINOR); - return ret; -} - -static void DAC960_gam_cleanup(void) -{ - misc_deregister(&DAC960_gam_dev); -} - -#endif /* DAC960_GAM_MINOR */ - -static struct DAC960_privdata DAC960_GEM_privdata = { - .HardwareType = DAC960_GEM_Controller, - .FirmwareType = DAC960_V2_Controller, - .InterruptHandler = DAC960_GEM_InterruptHandler, - .MemoryWindowSize = DAC960_GEM_RegisterWindowSize, -}; - - -static struct DAC960_privdata DAC960_BA_privdata = { - .HardwareType = DAC960_BA_Controller, - .FirmwareType = DAC960_V2_Controller, - .InterruptHandler = DAC960_BA_InterruptHandler, - .MemoryWindowSize = DAC960_BA_RegisterWindowSize, -}; - -static struct DAC960_privdata DAC960_LP_privdata = { - .HardwareType = DAC960_LP_Controller, - .FirmwareType = DAC960_V2_Controller, - .InterruptHandler = DAC960_LP_InterruptHandler, - .MemoryWindowSize = DAC960_LP_RegisterWindowSize, -}; - -static struct DAC960_privdata DAC960_LA_privdata = { - .HardwareType = DAC960_LA_Controller, - .FirmwareType = DAC960_V1_Controller, - .InterruptHandler = DAC960_LA_InterruptHandler, - .MemoryWindowSize = DAC960_LA_RegisterWindowSize, -}; - -static struct DAC960_privdata DAC960_PG_privdata = { - .HardwareType = DAC960_PG_Controller, - .FirmwareType = DAC960_V1_Controller, - .InterruptHandler = DAC960_PG_InterruptHandler, - .MemoryWindowSize = DAC960_PG_RegisterWindowSize, -}; - -static struct DAC960_privdata DAC960_PD_privdata = { - .HardwareType = DAC960_PD_Controller, - .FirmwareType = DAC960_V1_Controller, - .InterruptHandler = DAC960_PD_InterruptHandler, - .MemoryWindowSize = DAC960_PD_RegisterWindowSize, -}; - -static struct DAC960_privdata DAC960_P_privdata = { - .HardwareType = DAC960_P_Controller, - .FirmwareType = DAC960_V1_Controller, - .InterruptHandler = DAC960_P_InterruptHandler, - .MemoryWindowSize = DAC960_PD_RegisterWindowSize, -}; - -static const struct pci_device_id DAC960_id_table[] = { - { - .vendor = PCI_VENDOR_ID_MYLEX, - .device = PCI_DEVICE_ID_MYLEX_DAC960_GEM, - .subvendor = PCI_VENDOR_ID_MYLEX, - .subdevice = PCI_ANY_ID, - .driver_data = (unsigned long) &DAC960_GEM_privdata, - }, - { - .vendor = PCI_VENDOR_ID_MYLEX, - .device = PCI_DEVICE_ID_MYLEX_DAC960_BA, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .driver_data = (unsigned long) &DAC960_BA_privdata, - }, - { - .vendor = PCI_VENDOR_ID_MYLEX, - .device = PCI_DEVICE_ID_MYLEX_DAC960_LP, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .driver_data = (unsigned long) &DAC960_LP_privdata, - }, - { - .vendor = PCI_VENDOR_ID_DEC, - .device = PCI_DEVICE_ID_DEC_21285, - .subvendor = PCI_VENDOR_ID_MYLEX, - .subdevice = PCI_DEVICE_ID_MYLEX_DAC960_LA, - .driver_data = (unsigned long) &DAC960_LA_privdata, - }, - { - .vendor = PCI_VENDOR_ID_MYLEX, - .device = PCI_DEVICE_ID_MYLEX_DAC960_PG, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .driver_data = (unsigned long) &DAC960_PG_privdata, - }, - { - .vendor = PCI_VENDOR_ID_MYLEX, - .device = PCI_DEVICE_ID_MYLEX_DAC960_PD, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .driver_data = (unsigned long) &DAC960_PD_privdata, - }, - { - .vendor = PCI_VENDOR_ID_MYLEX, - .device = PCI_DEVICE_ID_MYLEX_DAC960_P, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .driver_data = (unsigned long) &DAC960_P_privdata, - }, - {0, }, -}; - -MODULE_DEVICE_TABLE(pci, DAC960_id_table); - -static struct pci_driver DAC960_pci_driver = { - .name = "DAC960", - .id_table = DAC960_id_table, - .probe = DAC960_Probe, - .remove = DAC960_Remove, -}; - -static int __init DAC960_init_module(void) -{ - int ret; - - ret = pci_register_driver(&DAC960_pci_driver); -#ifdef DAC960_GAM_MINOR - if (!ret) - DAC960_gam_init(); -#endif - return ret; -} - -static void __exit DAC960_cleanup_module(void) -{ - int i; - -#ifdef DAC960_GAM_MINOR - DAC960_gam_cleanup(); -#endif - - for (i = 0; i < DAC960_ControllerCount; i++) { - DAC960_Controller_T *Controller = DAC960_Controllers[i]; - if (Controller == NULL) - continue; - DAC960_FinalizeController(Controller); - } - if (DAC960_ProcDirectoryEntry != NULL) { - remove_proc_entry("rd/status", NULL); - remove_proc_entry("rd", NULL); - } - DAC960_ControllerCount = 0; - pci_unregister_driver(&DAC960_pci_driver); -} - -module_init(DAC960_init_module); -module_exit(DAC960_cleanup_module); - -MODULE_LICENSE("GPL"); diff --git a/drivers/block/DAC960.h b/drivers/block/DAC960.h deleted file mode 100644 index 1439e651928b..000000000000 --- a/drivers/block/DAC960.h +++ /dev/null @@ -1,4414 +0,0 @@ -/* - - Linux Driver for Mylex DAC960/AcceleRAID/eXtremeRAID PCI RAID Controllers - - Copyright 1998-2001 by Leonard N. Zubkoff <lnz@dandelion.com> - - This program is free software; you may redistribute and/or modify it under - the terms of the GNU General Public License Version 2 as published by the - Free Software Foundation. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY, without even the implied warranty of MERCHANTABILITY - or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - for complete details. - - The author respectfully requests that any modifications to this software be - sent directly to him for evaluation and testing. - -*/ - - -/* - Define the maximum number of DAC960 Controllers supported by this driver. -*/ - -#define DAC960_MaxControllers 8 - - -/* - Define the maximum number of Controller Channels supported by DAC960 - V1 and V2 Firmware Controllers. -*/ - -#define DAC960_V1_MaxChannels 3 -#define DAC960_V2_MaxChannels 4 - - -/* - Define the maximum number of Targets per Channel supported by DAC960 - V1 and V2 Firmware Controllers. -*/ - -#define DAC960_V1_MaxTargets 16 -#define DAC960_V2_MaxTargets 128 - - -/* - Define the maximum number of Logical Drives supported by DAC960 - V1 and V2 Firmware Controllers. -*/ - -#define DAC960_MaxLogicalDrives 32 - - -/* - Define the maximum number of Physical Devices supported by DAC960 - V1 and V2 Firmware Controllers. -*/ - -#define DAC960_V1_MaxPhysicalDevices 45 -#define DAC960_V2_MaxPhysicalDevices 272 - -/* - Define a 32/64 bit I/O Address data type. -*/ - -typedef unsigned long DAC960_IO_Address_T; - - -/* - Define a 32/64 bit PCI Bus Address data type. -*/ - -typedef unsigned long DAC960_PCI_Address_T; - - -/* - Define a 32 bit Bus Address data type. -*/ - -typedef unsigned int DAC960_BusAddress32_T; - - -/* - Define a 64 bit Bus Address data type. -*/ - -typedef unsigned long long DAC960_BusAddress64_T; - - -/* - Define a 32 bit Byte Count data type. -*/ - -typedef unsigned int DAC960_ByteCount32_T; - - -/* - Define a 64 bit Byte Count data type. -*/ - -typedef unsigned long long DAC960_ByteCount64_T; - - -/* - dma_loaf is used by helper routines to divide a region of - dma mapped memory into smaller pieces, where those pieces - are not of uniform size. - */ - -struct dma_loaf { - void *cpu_base; - dma_addr_t dma_base; - size_t length; - void *cpu_free; - dma_addr_t dma_free; -}; - -/* - Define the SCSI INQUIRY Standard Data structure. -*/ - -typedef struct DAC960_SCSI_Inquiry -{ - unsigned char PeripheralDeviceType:5; /* Byte 0 Bits 0-4 */ - unsigned char PeripheralQualifier:3; /* Byte 0 Bits 5-7 */ - unsigned char DeviceTypeModifier:7; /* Byte 1 Bits 0-6 */ - bool RMB:1; /* Byte 1 Bit 7 */ - unsigned char ANSI_ApprovedVersion:3; /* Byte 2 Bits 0-2 */ - unsigned char ECMA_Version:3; /* Byte 2 Bits 3-5 */ - unsigned char ISO_Version:2; /* Byte 2 Bits 6-7 */ - unsigned char ResponseDataFormat:4; /* Byte 3 Bits 0-3 */ - unsigned char :2; /* Byte 3 Bits 4-5 */ - bool TrmIOP:1; /* Byte 3 Bit 6 */ - bool AENC:1; /* Byte 3 Bit 7 */ - unsigned char AdditionalLength; /* Byte 4 */ - unsigned char :8; /* Byte 5 */ - unsigned char :8; /* Byte 6 */ - bool SftRe:1; /* Byte 7 Bit 0 */ - bool CmdQue:1; /* Byte 7 Bit 1 */ - bool :1; /* Byte 7 Bit 2 */ - bool Linked:1; /* Byte 7 Bit 3 */ - bool Sync:1; /* Byte 7 Bit 4 */ - bool WBus16:1; /* Byte 7 Bit 5 */ - bool WBus32:1; /* Byte 7 Bit 6 */ - bool RelAdr:1; /* Byte 7 Bit 7 */ - unsigned char VendorIdentification[8]; /* Bytes 8-15 */ - unsigned char ProductIdentification[16]; /* Bytes 16-31 */ - unsigned char ProductRevisionLevel[4]; /* Bytes 32-35 */ -} -DAC960_SCSI_Inquiry_T; - - -/* - Define the SCSI INQUIRY Unit Serial Number structure. -*/ - -typedef struct DAC960_SCSI_Inquiry_UnitSerialNumber -{ - unsigned char PeripheralDeviceType:5; /* Byte 0 Bits 0-4 */ - unsigned char PeripheralQualifier:3; /* Byte 0 Bits 5-7 */ - unsigned char PageCode; /* Byte 1 */ - unsigned char :8; /* Byte 2 */ - unsigned char PageLength; /* Byte 3 */ - unsigned char ProductSerialNumber[28]; /* Bytes 4-31 */ -} -DAC960_SCSI_Inquiry_UnitSerialNumber_T; - - -/* - Define the SCSI REQUEST SENSE Sense Key type. -*/ - -typedef enum -{ - DAC960_SenseKey_NoSense = 0x0, - DAC960_SenseKey_RecoveredError = 0x1, - DAC960_SenseKey_NotReady = 0x2, - DAC960_SenseKey_MediumError = 0x3, - DAC960_SenseKey_HardwareError = 0x4, - DAC960_SenseKey_IllegalRequest = 0x5, - DAC960_SenseKey_UnitAttention = 0x6, - DAC960_SenseKey_DataProtect = 0x7, - DAC960_SenseKey_BlankCheck = 0x8, - DAC960_SenseKey_VendorSpecific = 0x9, - DAC960_SenseKey_CopyAborted = 0xA, - DAC960_SenseKey_AbortedCommand = 0xB, - DAC960_SenseKey_Equal = 0xC, - DAC960_SenseKey_VolumeOverflow = 0xD, - DAC960_SenseKey_Miscompare = 0xE, - DAC960_SenseKey_Reserved = 0xF -} -__attribute__ ((packed)) -DAC960_SCSI_RequestSenseKey_T; - - -/* - Define the SCSI REQUEST SENSE structure. -*/ - -typedef struct DAC960_SCSI_RequestSense -{ - unsigned char ErrorCode:7; /* Byte 0 Bits 0-6 */ - bool Valid:1; /* Byte 0 Bit 7 */ - unsigned char SegmentNumber; /* Byte 1 */ - DAC960_SCSI_RequestSenseKey_T SenseKey:4; /* Byte 2 Bits 0-3 */ - unsigned char :1; /* Byte 2 Bit 4 */ - bool ILI:1; /* Byte 2 Bit 5 */ - bool EOM:1; /* Byte 2 Bit 6 */ - bool Filemark:1; /* Byte 2 Bit 7 */ - unsigned char Information[4]; /* Bytes 3-6 */ - unsigned char AdditionalSenseLength; /* Byte 7 */ - unsigned char CommandSpecificInformation[4]; /* Bytes 8-11 */ - unsigned char AdditionalSenseCode; /* Byte 12 */ - unsigned char AdditionalSenseCodeQualifier; /* Byte 13 */ -} -DAC960_SCSI_RequestSense_T; - - -/* - Define the DAC960 V1 Firmware Command Opcodes. -*/ - -typedef enum -{ - /* I/O Commands */ - DAC960_V1_ReadExtended = 0x33, - DAC960_V1_WriteExtended = 0x34, - DAC960_V1_ReadAheadExtended = 0x35, - DAC960_V1_ReadExtendedWithScatterGather = 0xB3, - DAC960_V1_WriteExtendedWithScatterGather = 0xB4, - DAC960_V1_Read = 0x36, - DAC960_V1_ReadWithScatterGather = 0xB6, - DAC960_V1_Write = 0x37, - DAC960_V1_WriteWithScatterGather = 0xB7, - DAC960_V1_DCDB = 0x04, - DAC960_V1_DCDBWithScatterGather = 0x84, - DAC960_V1_Flush = 0x0A, - /* Controller Status Related Commands */ - DAC960_V1_Enquiry = 0x53, - DAC960_V1_Enquiry2 = 0x1C, - DAC960_V1_GetLogicalDriveElement = 0x55, - DAC960_V1_GetLogicalDriveInformation = 0x19, - DAC960_V1_IOPortRead = 0x39, - DAC960_V1_IOPortWrite = 0x3A, - DAC960_V1_GetSDStats = 0x3E, - DAC960_V1_GetPDStats = 0x3F, - DAC960_V1_PerformEventLogOperation = 0x72, - /* Device Related Commands */ - DAC960_V1_StartDevice = 0x10, - DAC960_V1_GetDeviceState = 0x50, - DAC960_V1_StopChannel = 0x13, - DAC960_V1_StartChannel = 0x12, - DAC960_V1_ResetChannel = 0x1A, - /* Commands Associated with Data Consistency and Errors */ - DAC960_V1_Rebuild = 0x09, - DAC960_V1_RebuildAsync = 0x16, - DAC960_V1_CheckConsistency = 0x0F, - DAC960_V1_CheckConsistencyAsync = 0x1E, - DAC960_V1_RebuildStat = 0x0C, - DAC960_V1_GetRebuildProgress = 0x27, - DAC960_V1_RebuildControl = 0x1F, - DAC960_V1_ReadBadBlockTable = 0x0B, - DAC960_V1_ReadBadDataTable = 0x25, - DAC960_V1_ClearBadDataTable = 0x26, - DAC960_V1_GetErrorTable = 0x17, - DAC960_V1_AddCapacityAsync = 0x2A, - DAC960_V1_BackgroundInitializationControl = 0x2B, - /* Configuration Related Commands */ - DAC960_V1_ReadConfig2 = 0x3D, - DAC960_V1_WriteConfig2 = 0x3C, - DAC960_V1_ReadConfigurationOnDisk = 0x4A, - DAC960_V1_WriteConfigurationOnDisk = 0x4B, - DAC960_V1_ReadConfiguration = 0x4E, - DAC960_V1_ReadBackupConfiguration = 0x4D, - DAC960_V1_WriteConfiguration = 0x4F, - DAC960_V1_AddConfiguration = 0x4C, - DAC960_V1_ReadConfigurationLabel = 0x48, - DAC960_V1_WriteConfigurationLabel = 0x49, - /* Firmware Upgrade Related Commands */ - DAC960_V1_LoadImage = 0x20, - DAC960_V1_StoreImage = 0x21, - DAC960_V1_ProgramImage = 0x22, - /* Diagnostic Commands */ - DAC960_V1_SetDiagnosticMode = 0x31, - DAC960_V1_RunDiagnostic = 0x32, - /* Subsystem Service Commands */ - DAC960_V1_GetSubsystemData = 0x70, - DAC960_V1_SetSubsystemParameters = 0x71, - /* Version 2.xx Firmware Commands */ - DAC960_V1_Enquiry_Old = 0x05, - DAC960_V1_GetDeviceState_Old = 0x14, - DAC960_V1_Read_Old = 0x02, - DAC960_V1_Write_Old = 0x03, - DAC960_V1_ReadWithScatterGather_Old = 0x82, - DAC960_V1_WriteWithScatterGather_Old = 0x83 -} -__attribute__ ((packed)) -DAC960_V1_CommandOpcode_T; - - -/* - Define the DAC960 V1 Firmware Command Identifier type. -*/ - -typedef unsigned char DAC960_V1_CommandIdentifier_T; - - -/* - Define the DAC960 V1 Firmware Command Status Codes. -*/ - -#define DAC960_V1_NormalCompletion 0x0000 /* Common */ -#define DAC960_V1_CheckConditionReceived 0x0002 /* Common */ -#define DAC960_V1_NoDeviceAtAddress 0x0102 /* Common */ -#define DAC960_V1_InvalidDeviceAddress 0x0105 /* Common */ -#define DAC960_V1_InvalidParameter 0x0105 /* Common */ -#define DAC960_V1_IrrecoverableDataError 0x0001 /* I/O */ -#define DAC960_V1_LogicalDriveNonexistentOrOffline 0x0002 /* I/O */ -#define DAC960_V1_AccessBeyondEndOfLogicalDrive 0x0105 /* I/O */ -#define DAC960_V1_BadDataEncountered 0x010C /* I/O */ -#define DAC960_V1_DeviceBusy 0x0008 /* DCDB */ -#define DAC960_V1_DeviceNonresponsive 0x000E /* DCDB */ -#define DAC960_V1_CommandTerminatedAbnormally 0x000F /* DCDB */ -#define DAC960_V1_UnableToStartDevice 0x0002 /* Device */ -#define DAC960_V1_InvalidChannelOrTargetOrModifier 0x0105 /* Device */ -#define DAC960_V1_ChannelBusy 0x0106 /* Device */ -#define DAC960_V1_ChannelNotStopped 0x0002 /* Device */ -#define DAC960_V1_AttemptToRebuildOnlineDrive 0x0002 /* Consistency */ -#define DAC960_V1_RebuildBadBlocksEncountered 0x0003 /* Consistency */ -#define DAC960_V1_NewDiskFailedDuringRebuild 0x0004 /* Consistency */ -#define DAC960_V1_RebuildOrCheckAlreadyInProgress 0x0106 /* Consistency */ -#define DAC960_V1_DependentDiskIsDead 0x0002 /* Consistency */ -#define DAC960_V1_InconsistentBlocksFound 0x0003 /* Consistency */ -#define DAC960_V1_InvalidOrNonredundantLogicalDrive 0x0105 /* Consistency */ -#define DAC960_V1_NoRebuildOrCheckInProgress 0x0105 /* Consistency */ -#define DAC960_V1_RebuildInProgress_DataValid 0x0000 /* Consistency */ -#define DAC960_V1_RebuildFailed_LogicalDriveFailure 0x0002 /* Consistency */ -#define DAC960_V1_RebuildFailed_BadBlocksOnOther 0x0003 /* Consistency */ -#define DAC960_V1_RebuildFailed_NewDriveFailed 0x0004 /* Consistency */ -#define DAC960_V1_RebuildSuccessful 0x0100 /* Consistency */ -#define DAC960_V1_RebuildSuccessfullyTerminated 0x0107 /* Consistency */ -#define DAC960_V1_BackgroundInitSuccessful 0x0100 /* Consistency */ -#define DAC960_V1_BackgroundInitAborted 0x0005 /* Consistency */ -#define DAC960_V1_NoBackgroundInitInProgress 0x0105 /* Consistency */ -#define DAC960_V1_AddCapacityInProgress 0x0004 /* Consistency */ -#define DAC960_V1_AddCapacityFailedOrSuspended 0x00F4 /* Consistency */ -#define DAC960_V1_Config2ChecksumError 0x0002 /* Configuration */ -#define DAC960_V1_ConfigurationSuspended 0x0106 /* Configuration */ -#define DAC960_V1_FailedToConfigureNVRAM 0x0105 /* Configuration */ -#define DAC960_V1_ConfigurationNotSavedStateChange 0x0106 /* Configuration */ -#define DAC960_V1_SubsystemNotInstalled 0x0001 /* Subsystem */ -#define DAC960_V1_SubsystemFailed 0x0002 /* Subsystem */ -#define DAC960_V1_SubsystemBusy 0x0106 /* Subsystem */ - -typedef unsigned short DAC960_V1_CommandStatus_T; - - -/* - Define the DAC960 V1 Firmware Enquiry Command reply structure. -*/ - -typedef struct DAC960_V1_Enquiry -{ - unsigned char NumberOfLogicalDrives; /* Byte 0 */ - unsigned int :24; /* Bytes 1-3 */ - unsigned int LogicalDriveSizes[32]; /* Bytes 4-131 */ - unsigned short FlashAge; /* Bytes 132-133 */ - struct { - bool DeferredWriteError:1; /* Byte 134 Bit 0 */ - bool BatteryLow:1; /* Byte 134 Bit 1 */ - unsigned char :6; /* Byte 134 Bits 2-7 */ - } StatusFlags; - unsigned char :8; /* Byte 135 */ - unsigned char MinorFirmwareVersion; /* Byte 136 */ - unsigned char MajorFirmwareVersion; /* Byte 137 */ - enum { - DAC960_V1_NoStandbyRebuildOrCheckInProgress = 0x00, - DAC960_V1_StandbyRebuildInProgress = 0x01, - DAC960_V1_BackgroundRebuildInProgress = 0x02, - DAC960_V1_BackgroundCheckInProgress = 0x03, - DAC960_V1_StandbyRebuildCompletedWithError = 0xFF, - DAC960_V1_BackgroundRebuildOrCheckFailed_DriveFailed = 0xF0, - DAC960_V1_BackgroundRebuildOrCheckFailed_LogicalDriveFailed = 0xF1, - DAC960_V1_BackgroundRebuildOrCheckFailed_OtherCauses = 0xF2, - DAC960_V1_BackgroundRebuildOrCheckSuccessfullyTerminated = 0xF3 - } __attribute__ ((packed)) RebuildFlag; /* Byte 138 */ - unsigned char MaxCommands; /* Byte 139 */ - unsigned char OfflineLogicalDriveCount; /* Byte 140 */ - unsigned char :8; /* Byte 141 */ - unsigned short EventLogSequenceNumber; /* Bytes 142-143 */ - unsigned char CriticalLogicalDriveCount; /* Byte 144 */ - unsigned int :24; /* Bytes 145-147 */ - unsigned char DeadDriveCount; /* Byte 148 */ - unsigned char :8; /* Byte 149 */ - unsigned char RebuildCount; /* Byte 150 */ - struct { - unsigned char :3; /* Byte 151 Bits 0-2 */ - bool BatteryBackupUnitPresent:1; /* Byte 151 Bit 3 */ - unsigned char :3; /* Byte 151 Bits 4-6 */ - unsigned char :1; /* Byte 151 Bit 7 */ - } MiscFlags; - struct { - unsigned char TargetID; - unsigned char Channel; - } DeadDrives[21]; /* Bytes 152-194 */ - unsigned char Reserved[62]; /* Bytes 195-255 */ -} -__attribute__ ((packed)) -DAC960_V1_Enquiry_T; - - -/* - Define the DAC960 V1 Firmware Enquiry2 Command reply structure. -*/ - -typedef struct DAC960_V1_Enquiry2 -{ - struct { - enum { - DAC960_V1_P_PD_PU = 0x01, - DAC960_V1_PL = 0x02, - DAC960_V1_PG = 0x10, - DAC960_V1_PJ = 0x11, - DAC960_V1_PR = 0x12, - DAC960_V1_PT = 0x13, - DAC960_V1_PTL0 = 0x14, - DAC960_V1_PRL = 0x15, - DAC960_V1_PTL1 = 0x16, - DAC960_V1_1164P = 0x20 - } __attribute__ ((packed)) SubModel; /* Byte 0 */ - unsigned char ActualChannels; /* Byte 1 */ - enum { - DAC960_V1_FiveChannelBoard = 0x01, - DAC960_V1_ThreeChannelBoard = 0x02, - DAC960_V1_TwoChannelBoard = 0x03, - DAC960_V1_ThreeChannelASIC_DAC = 0x04 - } __attribute__ ((packed)) Model; /* Byte 2 */ - enum { - DAC960_V1_EISA_Controller = 0x01, - DAC960_V1_MicroChannel_Controller = 0x02, - DAC960_V1_PCI_Controller = 0x03, - DAC960_V1_SCSItoSCSI_Controller = 0x08 - } __attribute__ ((packed)) ProductFamily; /* Byte 3 */ - } HardwareID; /* Bytes 0-3 */ - /* MajorVersion.MinorVersion-FirmwareType-TurnID */ - struct { - unsigned char MajorVersion; /* Byte 4 */ - unsigned char MinorVersion; /* Byte 5 */ - unsigned char TurnID; /* Byte 6 */ - char FirmwareType; /* Byte 7 */ - } FirmwareID; /* Bytes 4-7 */ - unsigned char :8; /* Byte 8 */ - unsigned int :24; /* Bytes 9-11 */ - unsigned char ConfiguredChannels; /* Byte 12 */ - unsigned char ActualChannels; /* Byte 13 */ - unsigned char MaxTargets; /* Byte 14 */ - unsigned char MaxTags; /* Byte 15 */ - unsigned char MaxLogicalDrives; /* Byte 16 */ - unsigned char MaxArms; /* Byte 17 */ - unsigned char MaxSpans; /* Byte 18 */ - unsigned char :8; /* Byte 19 */ - unsigned int :32; /* Bytes 20-23 */ - unsigned int MemorySize; /* Bytes 24-27 */ - unsigned int CacheSize; /* Bytes 28-31 */ - unsigned int FlashMemorySize; /* Bytes 32-35 */ - unsigned int NonVolatileMemorySize; /* Bytes 36-39 */ - struct { - enum { - DAC960_V1_RamType_DRAM = 0x0, - DAC960_V1_RamType_EDO = 0x1, - DAC960_V1_RamType_SDRAM = 0x2, - DAC960_V1_RamType_Last = 0x7 - } __attribute__ ((packed)) RamType:3; /* Byte 40 Bits 0-2 */ - enum { - DAC960_V1_ErrorCorrection_None = 0x0, - DAC960_V1_ErrorCorrection_Parity = 0x1, - DAC960_V1_ErrorCorrection_ECC = 0x2, - DAC960_V1_ErrorCorrection_Last = 0x7 - } __attribute__ ((packed)) ErrorCorrection:3; /* Byte 40 Bits 3-5 */ - bool FastPageMode:1; /* Byte 40 Bit 6 */ - bool LowPowerMemory:1; /* Byte 40 Bit 7 */ - unsigned char :8; /* Bytes 41 */ - } MemoryType; - unsigned short ClockSpeed; /* Bytes 42-43 */ - unsigned short MemorySpeed; /* Bytes 44-45 */ - unsigned short HardwareSpeed; /* Bytes 46-47 */ - unsigned int :32; /* Bytes 48-51 */ - unsigned int :32; /* Bytes 52-55 */ - unsigned char :8; /* Byte 56 */ - unsigned char :8; /* Byte 57 */ - unsigned short :16; /* Bytes 58-59 */ - unsigned short MaxCommands; /* Bytes 60-61 */ - unsigned short MaxScatterGatherEntries; /* Bytes 62-63 */ - unsigned short MaxDriveCommands; /* Bytes 64-65 */ - unsigned short MaxIODescriptors; /* Bytes 66-67 */ - unsigned short MaxCombinedSectors; /* Bytes 68-69 */ - unsigned char Latency; /* Byte 70 */ - unsigned char :8; /* Byte 71 */ - unsigned char SCSITimeout; /* Byte 72 */ - unsigned char :8; /* Byte 73 */ - unsigned short MinFreeLines; /* Bytes 74-75 */ - unsigned int :32; /* Bytes 76-79 */ - unsigned int :32; /* Bytes 80-83 */ - unsigned char RebuildRateConstant; /* Byte 84 */ - unsigned char :8; /* Byte 85 */ - unsigned char :8; /* Byte 86 */ - unsigned char :8; /* Byte 87 */ - unsigned int :32; /* Bytes 88-91 */ - unsigned int :32; /* Bytes 92-95 */ - unsigned short PhysicalDriveBlockSize; /* Bytes 96-97 */ - unsigned short LogicalDriveBlockSize; /* Bytes 98-99 */ - unsigned short MaxBlocksPerCommand; /* Bytes 100-101 */ - unsigned short BlockFactor; /* Bytes 102-103 */ - unsigned short CacheLineSize; /* Bytes 104-105 */ - struct { - enum { - DAC960_V1_Narrow_8bit = 0x0, - DAC960_V1_Wide_16bit = 0x1, - DAC960_V1_Wide_32bit = 0x2 - } __attribute__ ((packed)) BusWidth:2; /* Byte 106 Bits 0-1 */ - enum { - DAC960_V1_Fast = 0x0, - DAC960_V1_Ultra = 0x1, - DAC960_V1_Ultra2 = 0x2 - } __attribute__ ((packed)) BusSpeed:2; /* Byte 106 Bits 2-3 */ - bool Differential:1; /* Byte 106 Bit 4 */ - unsigned char :3; /* Byte 106 Bits 5-7 */ - } SCSICapability; - unsigned char :8; /* Byte 107 */ - unsigned int :32; /* Bytes 108-111 */ - unsigned short FirmwareBuildNumber; /* Bytes 112-113 */ - enum { - DAC960_V1_AEMI = 0x01, - DAC960_V1_OEM1 = 0x02, - DAC960_V1_OEM2 = 0x04, - DAC960_V1_OEM3 = 0x08, - DAC960_V1_Conner = 0x10, - DAC960_V1_SAFTE = 0x20 - } __attribute__ ((packed)) FaultManagementType; /* Byte 114 */ - unsigned char :8; /* Byte 115 */ - struct { - bool Clustering:1; /* Byte 116 Bit 0 */ - bool MylexOnlineRAIDExpansion:1; /* Byte 116 Bit 1 */ - bool ReadAhead:1; /* Byte 116 Bit 2 */ - bool BackgroundInitialization:1; /* Byte 116 Bit 3 */ - unsigned int :28; /* Bytes 116-119 */ - } FirmwareFeatures; - unsigned int :32; /* Bytes 120-123 */ - unsigned int :32; /* Bytes 124-127 */ -} -DAC960_V1_Enquiry2_T; - - -/* - Define the DAC960 V1 Firmware Logical Drive State type. -*/ - -typedef enum -{ - DAC960_V1_LogicalDrive_Online = 0x03, - DAC960_V1_LogicalDrive_Critical = 0x04, - DAC960_V1_LogicalDrive_Offline = 0xFF -} -__attribute__ ((packed)) -DAC960_V1_LogicalDriveState_T; - - -/* - Define the DAC960 V1 Firmware Logical Drive Information structure. -*/ - -typedef struct DAC960_V1_LogicalDriveInformation -{ - unsigned int LogicalDriveSize; /* Bytes 0-3 */ - DAC960_V1_LogicalDriveState_T LogicalDriveState; /* Byte 4 */ - unsigned char RAIDLevel:7; /* Byte 5 Bits 0-6 */ - bool WriteBack:1; /* Byte 5 Bit 7 */ - unsigned short :16; /* Bytes 6-7 */ -} -DAC960_V1_LogicalDriveInformation_T; - - -/* - Define the DAC960 V1 Firmware Get Logical Drive Information Command - reply structure. -*/ - -typedef DAC960_V1_LogicalDriveInformation_T - DAC960_V1_LogicalDriveInformationArray_T[DAC960_MaxLogicalDrives]; - - -/* - Define the DAC960 V1 Firmware Perform Event Log Operation Types. -*/ - -typedef enum -{ - DAC960_V1_GetEventLogEntry = 0x00 -} -__attribute__ ((packed)) -DAC960_V1_PerformEventLogOpType_T; - - -/* - Define the DAC960 V1 Firmware Get Event Log Entry Command reply structure. -*/ - -typedef struct DAC960_V1_EventLogEntry -{ - unsigned char MessageType; /* Byte 0 */ - unsigned char MessageLength; /* Byte 1 */ - unsigned char TargetID:5; /* Byte 2 Bits 0-4 */ - unsigned char Channel:3; /* Byte 2 Bits 5-7 */ - unsigned char LogicalUnit:6; /* Byte 3 Bits 0-5 */ - unsigned char :2; /* Byte 3 Bits 6-7 */ - unsigned short SequenceNumber; /* Bytes 4-5 */ - unsigned char ErrorCode:7; /* Byte 6 Bits 0-6 */ - bool Valid:1; /* Byte 6 Bit 7 */ - unsigned char SegmentNumber; /* Byte 7 */ - DAC960_SCSI_RequestSenseKey_T SenseKey:4; /* Byte 8 Bits 0-3 */ - unsigned char :1; /* Byte 8 Bit 4 */ - bool ILI:1; /* Byte 8 Bit 5 */ - bool EOM:1; /* Byte 8 Bit 6 */ - bool Filemark:1; /* Byte 8 Bit 7 */ - unsigned char Information[4]; /* Bytes 9-12 */ - unsigned char AdditionalSenseLength; /* Byte 13 */ - unsigned char CommandSpecificInformation[4]; /* Bytes 14-17 */ - unsigned char AdditionalSenseCode; /* Byte 18 */ - unsigned char AdditionalSenseCodeQualifier; /* Byte 19 */ - unsigned char Dummy[12]; /* Bytes 20-31 */ -} -DAC960_V1_EventLogEntry_T; - - -/* - Define the DAC960 V1 Firmware Physical Device State type. -*/ - -typedef enum -{ - DAC960_V1_Device_Dead = 0x00, - DAC960_V1_Device_WriteOnly = 0x02, - DAC960_V1_Device_Online = 0x03, - DAC960_V1_Device_Standby = 0x10 -} -__attribute__ ((packed)) -DAC960_V1_PhysicalDeviceState_T; - - -/* - Define the DAC960 V1 Firmware Get Device State Command reply structure. - The structure is padded by 2 bytes for compatibility with Version 2.xx - Firmware. -*/ - -typedef struct DAC960_V1_DeviceState -{ - bool Present:1; /* Byte 0 Bit 0 */ - unsigned char :7; /* Byte 0 Bits 1-7 */ - enum { - DAC960_V1_OtherType = 0x0, - DAC960_V1_DiskType = 0x1, - DAC960_V1_SequentialType = 0x2, - DAC960_V1_CDROM_or_WORM_Type = 0x3 - } __attribute__ ((packed)) DeviceType:2; /* Byte 1 Bits 0-1 */ - bool :1; /* Byte 1 Bit 2 */ - bool Fast20:1; /* Byte 1 Bit 3 */ - bool Sync:1; /* Byte 1 Bit 4 */ - bool Fast:1; /* Byte 1 Bit 5 */ - bool Wide:1; /* Byte 1 Bit 6 */ - bool TaggedQueuingSupported:1; /* Byte 1 Bit 7 */ - DAC960_V1_PhysicalDeviceState_T DeviceState; /* Byte 2 */ - unsigned char :8; /* Byte 3 */ - unsigned char SynchronousMultiplier; /* Byte 4 */ - unsigned char SynchronousOffset:5; /* Byte 5 Bits 0-4 */ - unsigned char :3; /* Byte 5 Bits 5-7 */ - unsigned int DiskSize __attribute__ ((packed)); /* Bytes 6-9 */ - unsigned short :16; /* Bytes 10-11 */ -} -DAC960_V1_DeviceState_T; - - -/* - Define the DAC960 V1 Firmware Get Rebuild Progress Command reply structure. -*/ - -typedef struct DAC960_V1_RebuildProgress -{ - unsigned int LogicalDriveNumber; /* Bytes 0-3 */ - unsigned int LogicalDriveSize; /* Bytes 4-7 */ - unsigned int RemainingBlocks; /* Bytes 8-11 */ -} -DAC960_V1_RebuildProgress_T; - - -/* - Define the DAC960 V1 Firmware Background Initialization Status Command - reply structure. -*/ - -typedef struct DAC960_V1_BackgroundInitializationStatus -{ - unsigned int LogicalDriveSize; /* Bytes 0-3 */ - unsigned int BlocksCompleted; /* Bytes 4-7 */ - unsigned char Reserved1[12]; /* Bytes 8-19 */ - unsigned int LogicalDriveNumber; /* Bytes 20-23 */ - unsigned char RAIDLevel; /* Byte 24 */ - enum { - DAC960_V1_BackgroundInitializationInvalid = 0x00, - DAC960_V1_BackgroundInitializationStarted = 0x02, - DAC960_V1_BackgroundInitializationInProgress = 0x04, - DAC960_V1_BackgroundInitializationSuspended = 0x05, - DAC960_V1_BackgroundInitializationCancelled = 0x06 - } __attribute__ ((packed)) Status; /* Byte 25 */ - unsigned char Reserved2[6]; /* Bytes 26-31 */ -} -DAC960_V1_BackgroundInitializationStatus_T; - - -/* - Define the DAC960 V1 Firmware Error Table Entry structure. -*/ - -typedef struct DAC960_V1_ErrorTableEntry -{ - unsigned char ParityErrorCount; /* Byte 0 */ - unsigned char SoftErrorCount; /* Byte 1 */ - unsigned char HardErrorCount; /* Byte 2 */ - unsigned char MiscErrorCount; /* Byte 3 */ -} -DAC960_V1_ErrorTableEntry_T; - - -/* - Define the DAC960 V1 Firmware Get Error Table Command reply structure. -*/ - -typedef struct DAC960_V1_ErrorTable -{ - DAC960_V1_ErrorTableEntry_T - ErrorTableEntries[DAC960_V1_MaxChannels][DAC960_V1_MaxTargets]; -} -DAC960_V1_ErrorTable_T; - - -/* - Define the DAC960 V1 Firmware Read Config2 Command reply structure. -*/ - -typedef struct DAC960_V1_Config2 -{ - unsigned char :1; /* Byte 0 Bit 0 */ - bool ActiveNegationEnabled:1; /* Byte 0 Bit 1 */ - unsigned char :5; /* Byte 0 Bits 2-6 */ - bool NoRescanIfResetReceivedDuringScan:1; /* Byte 0 Bit 7 */ - bool StorageWorksSupportEnabled:1; /* Byte 1 Bit 0 */ - bool HewlettPackardSupportEnabled:1; /* Byte 1 Bit 1 */ - bool NoDisconnectOnFirstCommand:1; /* Byte 1 Bit 2 */ - unsigned char :2; /* Byte 1 Bits 3-4 */ - bool AEMI_ARM:1; /* Byte 1 Bit 5 */ - bool AEMI_OFM:1; /* Byte 1 Bit 6 */ - unsigned char :1; /* Byte 1 Bit 7 */ - enum { - DAC960_V1_OEMID_Mylex = 0x00, - DAC960_V1_OEMID_IBM = 0x08, - DAC960_V1_OEMID_HP = 0x0A, - DAC960_V1_OEMID_DEC = 0x0C, - DAC960_V1_OEMID_Siemens = 0x10, - DAC960_V1_OEMID_Intel = 0x12 - } __attribute__ ((packed)) OEMID; /* Byte 2 */ - unsigned char OEMModelNumber; /* Byte 3 */ - unsigned char PhysicalSector; /* Byte 4 */ - unsigned char LogicalSector; /* Byte 5 */ - unsigned char BlockFactor; /* Byte 6 */ - bool ReadAheadEnabled:1; /* Byte 7 Bit 0 */ - bool LowBIOSDelay:1; /* Byte 7 Bit 1 */ - unsigned char :2; /* Byte 7 Bits 2-3 */ - bool ReassignRestrictedToOneSector:1; /* Byte 7 Bit 4 */ - unsigned char :1; /* Byte 7 Bit 5 */ - bool ForceUnitAccessDuringWriteRecovery:1; /* Byte 7 Bit 6 */ - bool EnableLeftSymmetricRAID5Algorithm:1; /* Byte 7 Bit 7 */ - unsigned char DefaultRebuildRate; /* Byte 8 */ - unsigned char :8; /* Byte 9 */ - unsigned char BlocksPerCacheLine; /* Byte 10 */ - unsigned char BlocksPerStripe; /* Byte 11 */ - struct { - enum { - DAC960_V1_Async = 0x0, - DAC960_V1_Sync_8MHz = 0x1, - DAC960_V1_Sync_5MHz = 0x2, - DAC960_V1_Sync_10or20MHz = 0x3 /* Byte 11 Bits 0-1 */ - } __attribute__ ((packed)) Speed:2; - bool Force8Bit:1; /* Byte 11 Bit 2 */ - bool DisableFast20:1; /* Byte 11 Bit 3 */ - unsigned char :3; /* Byte 11 Bits 4-6 */ - bool EnableTaggedQueuing:1; /* Byte 11 Bit 7 */ - } __attribute__ ((packed)) ChannelParameters[6]; /* Bytes 12-17 */ - unsigned char SCSIInitiatorID; /* Byte 18 */ - unsigned char :8; /* Byte 19 */ - enum { - DAC960_V1_StartupMode_ControllerSpinUp = 0x00, - DAC960_V1_StartupMode_PowerOnSpinUp = 0x01 - } __attribute__ ((packed)) StartupMode; /* Byte 20 */ - unsigned char SimultaneousDeviceSpinUpCount; /* Byte 21 */ - unsigned char SecondsDelayBetweenSpinUps; /* Byte 22 */ - unsigned char Reserved1[29]; /* Bytes 23-51 */ - bool BIOSDisabled:1; /* Byte 52 Bit 0 */ - bool CDROMBootEnabled:1; /* Byte 52 Bit 1 */ - unsigned char :3; /* Byte 52 Bits 2-4 */ - enum { - DAC960_V1_Geometry_128_32 = 0x0, - DAC960_V1_Geometry_255_63 = 0x1, - DAC960_V1_Geometry_Reserved1 = 0x2, - DAC960_V1_Geometry_Reserved2 = 0x3 - } __attribute__ ((packed)) DriveGeometry:2; /* Byte 52 Bits 5-6 */ - unsigned char :1; /* Byte 52 Bit 7 */ - unsigned char Reserved2[9]; /* Bytes 53-61 */ - unsigned short Checksum; /* Bytes 62-63 */ -} -DAC960_V1_Config2_T; - - -/* - Define the DAC960 V1 Firmware DCDB request structure. -*/ - -typedef struct DAC960_V1_DCDB -{ - unsigned char TargetID:4; /* Byte 0 Bits 0-3 */ - unsigned char Channel:4; /* Byte 0 Bits 4-7 */ - enum { - DAC960_V1_DCDB_NoDataTransfer = 0, - DAC960_V1_DCDB_DataTransferDeviceToSystem = 1, - DAC960_V1_DCDB_DataTransferSystemToDevice = 2, - DAC960_V1_DCDB_IllegalDataTransfer = 3 - } __attribute__ ((packed)) Direction:2; /* Byte 1 Bits 0-1 */ - bool EarlyStatus:1; /* Byte 1 Bit 2 */ - unsigned char :1; /* Byte 1 Bit 3 */ - enum { - DAC960_V1_DCDB_Timeout_24_hours = 0, - DAC960_V1_DCDB_Timeout_10_seconds = 1, - DAC960_V1_DCDB_Timeout_60_seconds = 2, - DAC960_V1_DCDB_Timeout_10_minutes = 3 - } __attribute__ ((packed)) Timeout:2; /* Byte 1 Bits 4-5 */ - bool NoAutomaticRequestSense:1; /* Byte 1 Bit 6 */ - bool DisconnectPermitted:1; /* Byte 1 Bit 7 */ - unsigned short TransferLength; /* Bytes 2-3 */ - DAC960_BusAddress32_T BusAddress; /* Bytes 4-7 */ - unsigned char CDBLength:4; /* Byte 8 Bits 0-3 */ - unsigned char TransferLengthHigh4:4; /* Byte 8 Bits 4-7 */ - unsigned char SenseLength; /* Byte 9 */ - unsigned char CDB[12]; /* Bytes 10-21 */ - unsigned char SenseData[64]; /* Bytes 22-85 */ - unsigned char Status; /* Byte 86 */ - unsigned char :8; /* Byte 87 */ -} -DAC960_V1_DCDB_T; - - -/* - Define the DAC960 V1 Firmware Scatter/Gather List Type 1 32 Bit Address - 32 Bit Byte Count structure. -*/ - -typedef struct DAC960_V1_ScatterGatherSegment -{ - DAC960_BusAddress32_T SegmentDataPointer; /* Bytes 0-3 */ - DAC960_ByteCount32_T SegmentByteCount; /* Bytes 4-7 */ -} -DAC960_V1_ScatterGatherSegment_T; - - -/* - Define the 13 Byte DAC960 V1 Firmware Command Mailbox structure. Bytes 13-15 - are not used. The Command Mailbox structure is padded to 16 bytes for - efficient access. -*/ - -typedef union DAC960_V1_CommandMailbox -{ - unsigned int Words[4]; /* Words 0-3 */ - unsigned char Bytes[16]; /* Bytes 0-15 */ - struct { - DAC960_V1_CommandOpcode_T CommandOpcode; /* Byte 0 */ - DAC960_V1_CommandIdentifier_T CommandIdentifier; /* Byte 1 */ - unsigned char Dummy[14]; /* Bytes 2-15 */ - } __attribute__ ((packed)) Common; - struct { - DAC960_V1_CommandOpcode_T CommandOpcode; /* Byte 0 */ - DAC960_V1_CommandIdentifier_T CommandIdentifier; /* Byte 1 */ - unsigned char Dummy1[6]; /* Bytes 2-7 */ - DAC960_BusAddress32_T BusAddress; /* Bytes 8-11 */ - unsigned char Dummy2[4]; /* Bytes 12-15 */ - } __attribute__ ((packed)) Type3; - struct { - DAC960_V1_CommandOpcode_T CommandOpcode; /* Byte 0 */ - DAC960_V1_CommandIdentifier_T CommandIdentifier; /* Byte 1 */ - unsigned char CommandOpcode2; /* Byte 2 */ - unsigned char Dummy1[5]; /* Bytes 3-7 */ - DAC960_BusAddress32_T BusAddress; /* Bytes 8-11 */ - unsigned char Dummy2[4]; /* Bytes 12-15 */ - } __attribute__ ((packed)) Type3B; - struct { - DAC960_V1_CommandOpcode_T CommandOpcode; /* Byte 0 */ - DAC960_V1_CommandIdentifier_T CommandIdentifier; /* Byte 1 */ - unsigned char Dummy1[5]; /* Bytes 2-6 */ - unsigned char LogicalDriveNumber:6; /* Byte 7 Bits 0-6 */ - bool AutoRestore:1; /* Byte 7 Bit 7 */ - unsigned char Dummy2[8]; /* Bytes 8-15 */ - } __attribute__ ((packed)) Type3C; - struct { - DAC960_V1_CommandOpcode_T CommandOpcode; /* Byte 0 */ - DAC960_V1_CommandIdentifier_T CommandIdentifier; /* Byte 1 */ - unsigned char Channel; /* Byte 2 */ - unsigned char TargetID; /* Byte 3 */ - DAC960_V1_PhysicalDeviceState_T DeviceState:5; /* Byte 4 Bits 0-4 */ - unsigned char Modifier:3; /* Byte 4 Bits 5-7 */ - unsigned char Dummy1[3]; /* Bytes 5-7 */ - DAC960_BusAddress32_T BusAddress; /* Bytes 8-11 */ - unsigned char Dummy2[4]; /* Bytes 12-15 */ - } __attribute__ ((packed)) Type3D; - struct { - DAC960_V1_CommandOpcode_T CommandOpcode; /* Byte 0 */ - DAC960_V1_CommandIdentifier_T CommandIdentifier; /* Byte 1 */ - DAC960_V1_PerformEventLogOpType_T OperationType; /* Byte 2 */ - unsigned char OperationQualifier; /* Byte 3 */ - unsigned short SequenceNumber; /* Bytes 4-5 */ - unsigned char Dummy1[2]; /* Bytes 6-7 */ - DAC960_BusAddress32_T BusAddress; /* Bytes 8-11 */ - unsigned char Dummy2[4]; /* Bytes 12-15 */ - } __attribute__ ((packed)) Type3E; - struct { - DAC960_V1_CommandOpcode_T CommandOpcode; /* Byte 0 */ - DAC960_V1_CommandIdentifier_T CommandIdentifier; /* Byte 1 */ - unsigned char Dummy1[2]; /* Bytes 2-3 */ - unsigned char RebuildRateConstant; /* Byte 4 */ - unsigned char Dummy2[3]; /* Bytes 5-7 */ - DAC960_BusAddress32_T BusAddress; /* Bytes 8-11 */ - unsigned char Dummy3[4]; /* Bytes 12-15 */ - } __attribute__ ((packed)) Type3R; - struct { - DAC960_V1_CommandOpcode_T CommandOpcode; /* Byte 0 */ - DAC960_V1_CommandIdentifier_T CommandIdentifier; /* Byte 1 */ - unsigned short TransferLength; /* Bytes 2-3 */ - unsigned int LogicalBlockAddress; /* Bytes 4-7 */ - DAC960_BusAddress32_T BusAddress; /* Bytes 8-11 */ - unsigned char LogicalDriveNumber; /* Byte 12 */ - unsigned char Dummy[3]; /* Bytes 13-15 */ - } __attribute__ ((packed)) Type4; - struct { - DAC960_V1_CommandOpcode_T CommandOpcode; /* Byte 0 */ - DAC960_V1_CommandIdentifier_T CommandIdentifier; /* Byte 1 */ - struct { - unsigned short TransferLength:11; /* Bytes 2-3 */ - unsigned char LogicalDriveNumber:5; /* Byte 3 Bits 3-7 */ - } __attribute__ ((packed)) LD; - unsigned int LogicalBlockAddress; /* Bytes 4-7 */ - DAC960_BusAddress32_T BusAddress; /* Bytes 8-11 */ - unsigned char ScatterGatherCount:6; /* Byte 12 Bits 0-5 */ - enum { - DAC960_V1_ScatterGather_32BitAddress_32BitByteCount = 0x0, - DAC960_V1_ScatterGather_32BitAddress_16BitByteCount = 0x1, - DAC960_V1_ScatterGather_32BitByteCount_32BitAddress = 0x2, - DAC960_V1_ScatterGather_16BitByteCount_32BitAddress = 0x3 - } __attribute__ ((packed)) ScatterGatherType:2; /* Byte 12 Bits 6-7 */ - unsigned char Dummy[3]; /* Bytes 13-15 */ - } __attribute__ ((packed)) Type5; - struct { - DAC960_V1_CommandOpcode_T CommandOpcode; /* Byte 0 */ - DAC960_V1_CommandIdentifier_T CommandIdentifier; /* Byte 1 */ - unsigned char CommandOpcode2; /* Byte 2 */ - unsigned char :8; /* Byte 3 */ - DAC960_BusAddress32_T CommandMailboxesBusAddress; /* Bytes 4-7 */ - DAC960_BusAddress32_T StatusMailboxesBusAddress; /* Bytes 8-11 */ - unsigned char Dummy[4]; /* Bytes 12-15 */ - } __attribute__ ((packed)) TypeX; -} -DAC960_V1_CommandMailbox_T; - - -/* - Define the DAC960 V2 Firmware Command Opcodes. -*/ - -typedef enum -{ - DAC960_V2_MemCopy = 0x01, - DAC960_V2_SCSI_10_Passthru = 0x02, - DAC960_V2_SCSI_255_Passthru = 0x03, - DAC960_V2_SCSI_10 = 0x04, - DAC960_V2_SCSI_256 = 0x05, - DAC960_V2_IOCTL = 0x20 -} -__attribute__ ((packed)) -DAC960_V2_CommandOpcode_T; - - -/* - Define the DAC960 V2 Firmware IOCTL Opcodes. -*/ - -typedef enum -{ - DAC960_V2_GetControllerInfo = 0x01, - DAC960_V2_GetLogicalDeviceInfoValid = 0x03, - DAC960_V2_GetPhysicalDeviceInfoValid = 0x05, - DAC960_V2_GetHealthStatus = 0x11, - DAC960_V2_GetEvent = 0x15, - DAC960_V2_StartDiscovery = 0x81, - DAC960_V2_SetDeviceState = 0x82, - DAC960_V2_RebuildDeviceStart = 0x88, - DAC960_V2_RebuildDeviceStop = 0x89, - DAC960_V2_ConsistencyCheckStart = 0x8C, - DAC960_V2_ConsistencyCheckStop = 0x8D, - DAC960_V2_SetMemoryMailbox = 0x8E, - DAC960_V2_PauseDevice = 0x92, - DAC960_V2_TranslatePhysicalToLogicalDevice = 0xC5 -} -__attribute__ ((packed)) -DAC960_V2_IOCTL_Opcode_T; - - -/* - Define the DAC960 V2 Firmware Command Identifier type. -*/ - -typedef unsigned short DAC960_V2_CommandIdentifier_T; - - -/* - Define the DAC960 V2 Firmware Command Status Codes. -*/ - -#define DAC960_V2_NormalCompletion 0x00 -#define DAC960_V2_AbormalCompletion 0x02 -#define DAC960_V2_DeviceBusy 0x08 -#define DAC960_V2_DeviceNonresponsive 0x0E -#define DAC960_V2_DeviceNonresponsive2 0x0F -#define DAC960_V2_DeviceRevervationConflict 0x18 - -typedef unsigned char DAC960_V2_CommandStatus_T; - - -/* - Define the DAC960 V2 Firmware Memory Type structure. -*/ - -typedef struct DAC960_V2_MemoryType -{ - enum { - DAC960_V2_MemoryType_Reserved = 0x00, - DAC960_V2_MemoryType_DRAM = 0x01, - DAC960_V2_MemoryType_EDRAM = 0x02, - DAC960_V2_MemoryType_EDO = 0x03, - DAC960_V2_MemoryType_SDRAM = 0x04, - DAC960_V2_MemoryType_Last = 0x1F - } __attribute__ ((packed)) MemoryType:5; /* Byte 0 Bits 0-4 */ - bool :1; /* Byte 0 Bit 5 */ - bool MemoryParity:1; /* Byte 0 Bit 6 */ - bool MemoryECC:1; /* Byte 0 Bit 7 */ -} -DAC960_V2_MemoryType_T; - - -/* - Define the DAC960 V2 Firmware Processor Type structure. -*/ - -typedef enum -{ - DAC960_V2_ProcessorType_i960CA = 0x01, - DAC960_V2_ProcessorType_i960RD = 0x02, - DAC960_V2_ProcessorType_i960RN = 0x03, - DAC960_V2_ProcessorType_i960RP = 0x04, - DAC960_V2_ProcessorType_NorthBay = 0x05, - DAC960_V2_ProcessorType_StrongArm = 0x06, - DAC960_V2_ProcessorType_i960RM = 0x07 -} -__attribute__ ((packed)) -DAC960_V2_ProcessorType_T; - - -/* - Define the DAC960 V2 Firmware Get Controller Info reply structure. -*/ - -typedef struct DAC960_V2_ControllerInfo -{ - unsigned char :8; /* Byte 0 */ - enum { - DAC960_V2_SCSI_Bus = 0x00, - DAC960_V2_Fibre_Bus = 0x01, - DAC960_V2_PCI_Bus = 0x03 - } __attribute__ ((packed)) BusInterfaceType; /* Byte 1 */ - enum { - DAC960_V2_DAC960E = 0x01, - DAC960_V2_DAC960M = 0x08, - DAC960_V2_DAC960PD = 0x10, - DAC960_V2_DAC960PL = 0x11, - DAC960_V2_DAC960PU = 0x12, - DAC960_V2_DAC960PE = 0x13, - DAC960_V2_DAC960PG = 0x14, - DAC960_V2_DAC960PJ = 0x15, - DAC960_V2_DAC960PTL0 = 0x16, - DAC960_V2_DAC960PR = 0x17, - DAC960_V2_DAC960PRL = 0x18, - DAC960_V2_DAC960PT = 0x19, - DAC960_V2_DAC1164P = 0x1A, - DAC960_V2_DAC960PTL1 = 0x1B, - DAC960_V2_EXR2000P = 0x1C, - DAC960_V2_EXR3000P = 0x1D, - DAC960_V2_AcceleRAID352 = 0x1E, - DAC960_V2_AcceleRAID170 = 0x1F, - DAC960_V2_AcceleRAID160 = 0x20, - DAC960_V2_DAC960S = 0x60, - DAC960_V2_DAC960SU = 0x61, - DAC960_V2_DAC960SX = 0x62, - DAC960_V2_DAC960SF = 0x63, - DAC960_V2_DAC960SS = 0x64, - DAC960_V2_DAC960FL = 0x65, - DAC960_V2_DAC960LL = 0x66, - DAC960_V2_DAC960FF = 0x67, - DAC960_V2_DAC960HP = 0x68, - DAC960_V2_RAIDBRICK = 0x69, - DAC960_V2_METEOR_FL = 0x6A, - DAC960_V2_METEOR_FF = 0x6B - } __attribute__ ((packed)) ControllerType; /* Byte 2 */ - unsigned char :8; /* Byte 3 */ - unsigned short BusInterfaceSpeedMHz; /* Bytes 4-5 */ - unsigned char BusWidthBits; /* Byte 6 */ - unsigned char FlashCodeTypeOrProductID; /* Byte 7 */ - unsigned char NumberOfHostPortsPresent; /* Byte 8 */ - unsigned char Reserved1[7]; /* Bytes 9-15 */ - unsigned char BusInterfaceName[16]; /* Bytes 16-31 */ - unsigned char ControllerName[16]; /* Bytes 32-47 */ - unsigned char Reserved2[16]; /* Bytes 48-63 */ - /* Firmware Release Information */ - unsigned char FirmwareMajorVersion; /* Byte 64 */ - unsigned char FirmwareMinorVersion; /* Byte 65 */ - unsigned char FirmwareTurnNumber; /* Byte 66 */ - unsigned char FirmwareBuildNumber; /* Byte 67 */ - unsigned char FirmwareReleaseDay; /* Byte 68 */ - unsigned char FirmwareReleaseMonth; /* Byte 69 */ - unsigned char FirmwareReleaseYearHigh2Digits; /* Byte 70 */ - unsigned char FirmwareReleaseYearLow2Digits; /* Byte 71 */ - /* Hardware Release Information */ - unsigned char HardwareRevision; /* Byte 72 */ - unsigned int :24; /* Bytes 73-75 */ - unsigned char HardwareReleaseDay; /* Byte 76 */ - unsigned char HardwareReleaseMonth; /* Byte 77 */ - unsigned char HardwareReleaseYearHigh2Digits; /* Byte 78 */ - unsigned char HardwareReleaseYearLow2Digits; /* Byte 79 */ - /* Hardware Manufacturing Information */ - unsigned char ManufacturingBatchNumber; /* Byte 80 */ - unsigned char :8; /* Byte 81 */ - unsigned char ManufacturingPlantNumber; /* Byte 82 */ - unsigned char :8; /* Byte 83 */ - unsigned char HardwareManufacturingDay; /* Byte 84 */ - unsigned char HardwareManufacturingMonth; /* Byte 85 */ - unsigned char HardwareManufacturingYearHigh2Digits; /* Byte 86 */ - unsigned char HardwareManufacturingYearLow2Digits; /* Byte 87 */ - unsigned char MaximumNumberOfPDDperXLD; /* Byte 88 */ - unsigned char MaximumNumberOfILDperXLD; /* Byte 89 */ - unsigned short NonvolatileMemorySizeKB; /* Bytes 90-91 */ - unsigned char MaximumNumberOfXLD; /* Byte 92 */ - unsigned int :24; /* Bytes 93-95 */ - /* Unique Information per Controller */ - unsigned char ControllerSerialNumber[16]; /* Bytes 96-111 */ - unsigned char Reserved3[16]; /* Bytes 112-127 */ - /* Vendor Information */ - unsigned int :24; /* Bytes 128-130 */ - unsigned char OEM_Code; /* Byte 131 */ - unsigned char VendorName[16]; /* Bytes 132-147 */ - /* Other Physical/Controller/Operation Information */ - bool BBU_Present:1; /* Byte 148 Bit 0 */ - bool ActiveActiveClusteringMode:1; /* Byte 148 Bit 1 */ - unsigned char :6; /* Byte 148 Bits 2-7 */ - unsigned char :8; /* Byte 149 */ - unsigned short :16; /* Bytes 150-151 */ - /* Physical Device Scan Information */ - bool PhysicalScanActive:1; /* Byte 152 Bit 0 */ - unsigned char :7; /* Byte 152 Bits 1-7 */ - unsigned char PhysicalDeviceChannelNumber; /* Byte 153 */ - unsigned char PhysicalDeviceTargetID; /* Byte 154 */ - unsigned char PhysicalDeviceLogicalUnit; /* Byte 155 */ - /* Maximum Command Data Transfer Sizes */ - unsigned short MaximumDataTransferSizeInBlocks; /* Bytes 156-157 */ - unsigned short MaximumScatterGatherEntries; /* Bytes 158-159 */ - /* Logical/Physical Device Counts */ - unsigned short LogicalDevicesPresent; /* Bytes 160-161 */ - unsigned short LogicalDevicesCritical; /* Bytes 162-163 */ - unsigned short LogicalDevicesOffline; /* Bytes 164-165 */ - unsigned short PhysicalDevicesPresent; /* Bytes 166-167 */ - unsigned short PhysicalDisksPresent; /* Bytes 168-169 */ - unsigned short PhysicalDisksCritical; /* Bytes 170-171 */ - unsigned short PhysicalDisksOffline; /* Bytes 172-173 */ - unsigned short MaximumParallelCommands; /* Bytes 174-175 */ - /* Channel and Target ID Information */ - unsigned char NumberOfPhysicalChannelsPresent; /* Byte 176 */ - unsigned char NumberOfVirtualChannelsPresent; /* Byte 177 */ - unsigned char NumberOfPhysicalChannelsPossible; /* Byte 178 */ - unsigned char NumberOfVirtualChannelsPossible; /* Byte 179 */ - unsigned char MaximumTargetsPerChannel[16]; /* Bytes 180-195 */ - unsigned char Reserved4[12]; /* Bytes 196-207 */ - /* Memory/Cache Information */ - unsigned short MemorySizeMB; /* Bytes 208-209 */ - unsigned short CacheSizeMB; /* Bytes 210-211 */ - unsigned int ValidCacheSizeInBytes; /* Bytes 212-215 */ - unsigned int DirtyCacheSizeInBytes; /* Bytes 216-219 */ - unsigned short MemorySpeedMHz; /* Bytes 220-221 */ - unsigned char MemoryDataWidthBits; /* Byte 222 */ - DAC960_V2_MemoryType_T MemoryType; /* Byte 223 */ - unsigned char CacheMemoryTypeName[16]; /* Bytes 224-239 */ - /* Execution Memory Information */ - unsigned short ExecutionMemorySizeMB; /* Bytes 240-241 */ - unsigned short ExecutionL2CacheSizeMB; /* Bytes 242-243 */ - unsigned char Reserved5[8]; /* Bytes 244-251 */ - unsigned short ExecutionMemorySpeedMHz; /* Bytes 252-253 */ - unsigned char ExecutionMemoryDataWidthBits; /* Byte 254 */ - DAC960_V2_MemoryType_T ExecutionMemoryType; /* Byte 255 */ - unsigned char ExecutionMemoryTypeName[16]; /* Bytes 256-271 */ - /* First CPU Type Information */ - unsigned short FirstProcessorSpeedMHz; /* Bytes 272-273 */ - DAC960_V2_ProcessorType_T FirstProcessorType; /* Byte 274 */ - unsigned char FirstProcessorCount; /* Byte 275 */ - unsigned char Reserved6[12]; /* Bytes 276-287 */ - unsigned char FirstProcessorName[16]; /* Bytes 288-303 */ - /* Second CPU Type Information */ - unsigned short SecondProcessorSpeedMHz; /* Bytes 304-305 */ - DAC960_V2_ProcessorType_T SecondProcessorType; /* Byte 306 */ - unsigned char SecondProcessorCount; /* Byte 307 */ - unsigned char Reserved7[12]; /* Bytes 308-319 */ - unsigned char SecondProcessorName[16]; /* Bytes 320-335 */ - /* Debugging/Profiling/Command Time Tracing Information */ - unsigned short CurrentProfilingDataPageNumber; /* Bytes 336-337 */ - unsigned short ProgramsAwaitingProfilingData; /* Bytes 338-339 */ - unsigned short CurrentCommandTimeTraceDataPageNumber; /* Bytes 340-341 */ - unsigned short ProgramsAwaitingCommandTimeTraceData; /* Bytes 342-343 */ - unsigned char Reserved8[8]; /* Bytes 344-351 */ - /* Error Counters on Physical Devices */ - unsigned short PhysicalDeviceBusResets; /* Bytes 352-353 */ - unsigned short PhysicalDeviceParityErrors; /* Bytes 355-355 */ - unsigned short PhysicalDeviceSoftErrors; /* Bytes 356-357 */ - unsigned short PhysicalDeviceCommandsFailed; /* Bytes 358-359 */ - unsigned short PhysicalDeviceMiscellaneousErrors; /* Bytes 360-361 */ - unsigned short PhysicalDeviceCommandTimeouts; /* Bytes 362-363 */ - unsigned short PhysicalDeviceSelectionTimeouts; /* Bytes 364-365 */ - unsigned short PhysicalDeviceRetriesDone; /* Bytes 366-367 */ - unsigned short PhysicalDeviceAbortsDone; /* Bytes 368-369 */ - unsigned short PhysicalDeviceHostCommandAbortsDone; /* Bytes 370-371 */ - unsigned short PhysicalDevicePredictedFailuresDetected; /* Bytes 372-373 */ - unsigned short PhysicalDeviceHostCommandsFailed; /* Bytes 374-375 */ - unsigned short PhysicalDeviceHardErrors; /* Bytes 376-377 */ - unsigned char Reserved9[6]; /* Bytes 378-383 */ - /* Error Counters on Logical Devices */ - unsigned short LogicalDeviceSoftErrors; /* Bytes 384-385 */ - unsigned short LogicalDeviceCommandsFailed; /* Bytes 386-387 */ - unsigned short LogicalDeviceHostCommandAbortsDone; /* Bytes 388-389 */ - unsigned short :16; /* Bytes 390-391 */ - /* Error Counters on Controller */ - unsigned short ControllerMemoryErrors; /* Bytes 392-393 */ - unsigned short ControllerHostCommandAbortsDone; /* Bytes 394-395 */ - unsigned int :32; /* Bytes 396-399 */ - /* Long Duration Activity Information */ - unsigned short BackgroundInitializationsActive; /* Bytes 400-401 */ - unsigned short LogicalDeviceInitializationsActive; /* Bytes 402-403 */ - unsigned short PhysicalDeviceInitializationsActive; /* Bytes 404-405 */ - unsigned short ConsistencyChecksActive; /* Bytes 406-407 */ - unsigned short RebuildsActive; /* Bytes 408-409 */ - unsigned short OnlineExpansionsActive; /* Bytes 410-411 */ - unsigned short PatrolActivitiesActive; /* Bytes 412-413 */ - unsigned short :16; /* Bytes 414-415 */ - /* Flash ROM Information */ - unsigned char FlashType; /* Byte 416 */ - unsigned char :8; /* Byte 417 */ - unsigned short FlashSizeMB; /* Bytes 418-419 */ - unsigned int FlashLimit; /* Bytes 420-423 */ - unsigned int FlashCount; /* Bytes 424-427 */ - unsigned int :32; /* Bytes 428-431 */ - unsigned char FlashTypeName[16]; /* Bytes 432-447 */ - /* Firmware Run Time Information */ - unsigned char RebuildRate; /* Byte 448 */ - unsigned char BackgroundInitializationRate; /* Byte 449 */ - unsigned char ForegroundInitializationRate; /* Byte 450 */ - unsigned char ConsistencyCheckRate; /* Byte 451 */ - unsigned int :32; /* Bytes 452-455 */ - unsigned int MaximumDP; /* Bytes 456-459 */ - unsigned int FreeDP; /* Bytes 460-463 */ - unsigned int MaximumIOP; /* Bytes 464-467 */ - unsigned int FreeIOP; /* Bytes 468-471 */ - unsigned short MaximumCombLengthInBlocks; /* Bytes 472-473 */ - unsigned short NumberOfConfigurationGroups; /* Bytes 474-475 */ - bool InstallationAbortStatus:1; /* Byte 476 Bit 0 */ - bool MaintenanceModeStatus:1; /* Byte 476 Bit 1 */ - unsigned int :24; /* Bytes 476-479 */ - unsigned char Reserved10[32]; /* Bytes 480-511 */ - unsigned char Reserved11[512]; /* Bytes 512-1023 */ -} -DAC960_V2_ControllerInfo_T; - - -/* - Define the DAC960 V2 Firmware Logical Device State type. -*/ - -typedef enum -{ - DAC960_V2_LogicalDevice_Online = 0x01, - DAC960_V2_LogicalDevice_Offline = 0x08, - DAC960_V2_LogicalDevice_Critical = 0x09 -} -__attribute__ ((packed)) -DAC960_V2_LogicalDeviceState_T; - - -/* - Define the DAC960 V2 Firmware Get Logical Device Info reply structure. -*/ - -typedef struct DAC960_V2_LogicalDeviceInfo -{ - unsigned char :8; /* Byte 0 */ - unsigned char Channel; /* Byte 1 */ - unsigned char TargetID; /* Byte 2 */ - unsigned char LogicalUnit; /* Byte 3 */ - DAC960_V2_LogicalDeviceState_T LogicalDeviceState; /* Byte 4 */ - unsigned char RAIDLevel; /* Byte 5 */ - unsigned char StripeSize; /* Byte 6 */ - unsigned char CacheLineSize; /* Byte 7 */ - struct { - enum { - DAC960_V2_ReadCacheDisabled = 0x0, - DAC960_V2_ReadCacheEnabled = 0x1, - DAC960_V2_ReadAheadEnabled = 0x2, - DAC960_V2_IntelligentReadAheadEnabled = 0x3, - DAC960_V2_ReadCache_Last = 0x7 - } __attribute__ ((packed)) ReadCache:3; /* Byte 8 Bits 0-2 */ - enum { - DAC960_V2_WriteCacheDisabled = 0x0, - DAC960_V2_LogicalDeviceReadOnly = 0x1, - DAC960_V2_WriteCacheEnabled = 0x2, - DAC960_V2_IntelligentWriteCacheEnabled = 0x3, - DAC960_V2_WriteCache_Last = 0x7 - } __attribute__ ((packed)) WriteCache:3; /* Byte 8 Bits 3-5 */ - bool :1; /* Byte 8 Bit 6 */ - bool LogicalDeviceInitialized:1; /* Byte 8 Bit 7 */ - } LogicalDeviceControl; /* Byte 8 */ - /* Logical Device Operations Status */ - bool ConsistencyCheckInProgress:1; /* Byte 9 Bit 0 */ - bool RebuildInProgress:1; /* Byte 9 Bit 1 */ - bool BackgroundInitializationInProgress:1; /* Byte 9 Bit 2 */ - bool ForegroundInitializationInProgress:1; /* Byte 9 Bit 3 */ - bool DataMigrationInProgress:1; /* Byte 9 Bit 4 */ - bool PatrolOperationInProgress:1; /* Byte 9 Bit 5 */ - unsigned char :2; /* Byte 9 Bits 6-7 */ - unsigned char RAID5WriteUpdate; /* Byte 10 */ - unsigned char RAID5Algorithm; /* Byte 11 */ - unsigned short LogicalDeviceNumber; /* Bytes 12-13 */ - /* BIOS Info */ - bool BIOSDisabled:1; /* Byte 14 Bit 0 */ - bool CDROMBootEnabled:1; /* Byte 14 Bit 1 */ - bool DriveCoercionEnabled:1; /* Byte 14 Bit 2 */ - bool WriteSameDisabled:1; /* Byte 14 Bit 3 */ - bool HBA_ModeEnabled:1; /* Byte 14 Bit 4 */ - enum { - DAC960_V2_Geometry_128_32 = 0x0, - DAC960_V2_Geometry_255_63 = 0x1, - DAC960_V2_Geometry_Reserved1 = 0x2, - DAC960_V2_Geometry_Reserved2 = 0x3 - } __attribute__ ((packed)) DriveGeometry:2; /* Byte 14 Bits 5-6 */ - bool SuperReadAheadEnabled:1; /* Byte 14 Bit 7 */ - unsigned char :8; /* Byte 15 */ - /* Error Counters */ - unsigned short SoftErrors; /* Bytes 16-17 */ - unsigned short CommandsFailed; /* Bytes 18-19 */ - unsigned short HostCommandAbortsDone; /* Bytes 20-21 */ - unsigned short DeferredWriteErrors; /* Bytes 22-23 */ - unsigned int :32; /* Bytes 24-27 */ - unsigned int :32; /* Bytes 28-31 */ - /* Device Size Information */ - unsigned short :16; /* Bytes 32-33 */ - unsigned short DeviceBlockSizeInBytes; /* Bytes 34-35 */ - unsigned int OriginalDeviceSize; /* Bytes 36-39 */ - unsigned int ConfigurableDeviceSize; /* Bytes 40-43 */ - unsigned int :32; /* Bytes 44-47 */ - unsigned char LogicalDeviceName[32]; /* Bytes 48-79 */ - unsigned char SCSI_InquiryData[36]; /* Bytes 80-115 */ - unsigned char Reserved1[12]; /* Bytes 116-127 */ - DAC960_ByteCount64_T LastReadBlockNumber; /* Bytes 128-135 */ - DAC960_ByteCount64_T LastWrittenBlockNumber; /* Bytes 136-143 */ - DAC960_ByteCount64_T ConsistencyCheckBlockNumber; /* Bytes 144-151 */ - DAC960_ByteCount64_T RebuildBlockNumber; /* Bytes 152-159 */ - DAC960_ByteCount64_T BackgroundInitializationBlockNumber; /* Bytes 160-167 */ - DAC960_ByteCount64_T ForegroundInitializationBlockNumber; /* Bytes 168-175 */ - DAC960_ByteCount64_T DataMigrationBlockNumber; /* Bytes 176-183 */ - DAC960_ByteCount64_T PatrolOperationBlockNumber; /* Bytes 184-191 */ - unsigned char Reserved2[64]; /* Bytes 192-255 */ -} -DAC960_V2_LogicalDeviceInfo_T; - - -/* - Define the DAC960 V2 Firmware Physical Device State type. -*/ - -typedef enum -{ - DAC960_V2_Device_Unconfigured = 0x00, - DAC960_V2_Device_Online = 0x01, - DAC960_V2_Device_Rebuild = 0x03, - DAC960_V2_Device_Missing = 0x04, - DAC960_V2_Device_Critical = 0x05, - DAC960_V2_Device_Dead = 0x08, - DAC960_V2_Device_SuspectedDead = 0x0C, - DAC960_V2_Device_CommandedOffline = 0x10, - DAC960_V2_Device_Standby = 0x21, - DAC960_V2_Device_InvalidState = 0xFF -} -__attribute__ ((packed)) -DAC960_V2_PhysicalDeviceState_T; - - -/* - Define the DAC960 V2 Firmware Get Physical Device Info reply structure. -*/ - -typedef struct DAC960_V2_PhysicalDeviceInfo -{ - unsigned char :8; /* Byte 0 */ - unsigned char Channel; /* Byte 1 */ - unsigned char TargetID; /* Byte 2 */ - unsigned char LogicalUnit; /* Byte 3 */ - /* Configuration Status Bits */ - bool PhysicalDeviceFaultTolerant:1; /* Byte 4 Bit 0 */ - bool PhysicalDeviceConnected:1; /* Byte 4 Bit 1 */ - bool PhysicalDeviceLocalToController:1; /* Byte 4 Bit 2 */ - unsigned char :5; /* Byte 4 Bits 3-7 */ - /* Multiple Host/Controller Status Bits */ - bool RemoteHostSystemDead:1; /* Byte 5 Bit 0 */ - bool RemoteControllerDead:1; /* Byte 5 Bit 1 */ - unsigned char :6; /* Byte 5 Bits 2-7 */ - DAC960_V2_PhysicalDeviceState_T PhysicalDeviceState; /* Byte 6 */ - unsigned char NegotiatedDataWidthBits; /* Byte 7 */ - unsigned short NegotiatedSynchronousMegaTransfers; /* Bytes 8-9 */ - /* Multiported Physical Device Information */ - unsigned char NumberOfPortConnections; /* Byte 10 */ - unsigned char DriveAccessibilityBitmap; /* Byte 11 */ - unsigned int :32; /* Bytes 12-15 */ - unsigned char NetworkAddress[16]; /* Bytes 16-31 */ - unsigned short MaximumTags; /* Bytes 32-33 */ - /* Physical Device Operations Status */ - bool ConsistencyCheckInProgress:1; /* Byte 34 Bit 0 */ - bool RebuildInProgress:1; /* Byte 34 Bit 1 */ - bool MakingDataConsistentInProgress:1; /* Byte 34 Bit 2 */ - bool PhysicalDeviceInitializationInProgress:1; /* Byte 34 Bit 3 */ - bool DataMigrationInProgress:1; /* Byte 34 Bit 4 */ - bool PatrolOperationInProgress:1; /* Byte 34 Bit 5 */ - unsigned char :2; /* Byte 34 Bits 6-7 */ - unsigned char LongOperationStatus; /* Byte 35 */ - unsigned char ParityErrors; /* Byte 36 */ - unsigned char SoftErrors; /* Byte 37 */ - unsigned char HardErrors; /* Byte 38 */ - unsigned char MiscellaneousErrors; /* Byte 39 */ - unsigned char CommandTimeouts; /* Byte 40 */ - unsigned char Retries; /* Byte 41 */ - unsigned char Aborts; /* Byte 42 */ - unsigned char PredictedFailuresDetected; /* Byte 43 */ - unsigned int :32; /* Bytes 44-47 */ - unsigned short :16; /* Bytes 48-49 */ - unsigned short DeviceBlockSizeInBytes; /* Bytes 50-51 */ - unsigned int OriginalDeviceSize; /* Bytes 52-55 */ - unsigned int ConfigurableDeviceSize; /* Bytes 56-59 */ - unsigned int :32; /* Bytes 60-63 */ - unsigned char PhysicalDeviceName[16]; /* Bytes 64-79 */ - unsigned char Reserved1[16]; /* Bytes 80-95 */ - unsigned char Reserved2[32]; /* Bytes 96-127 */ - unsigned char SCSI_InquiryData[36]; /* Bytes 128-163 */ - unsigned char Reserved3[20]; /* Bytes 164-183 */ - unsigned char Reserved4[8]; /* Bytes 184-191 */ - DAC960_ByteCount64_T LastReadBlockNumber; /* Bytes 192-199 */ - DAC960_ByteCount64_T LastWrittenBlockNumber; /* Bytes 200-207 */ - DAC960_ByteCount64_T ConsistencyCheckBlockNumber; /* Bytes 208-215 */ - DAC960_ByteCount64_T RebuildBlockNumber; /* Bytes 216-223 */ - DAC960_ByteCount64_T MakingDataConsistentBlockNumber; /* Bytes 224-231 */ - DAC960_ByteCount64_T DeviceInitializationBlockNumber; /* Bytes 232-239 */ - DAC960_ByteCount64_T DataMigrationBlockNumber; /* Bytes 240-247 */ - DAC960_ByteCount64_T PatrolOperationBlockNumber; /* Bytes 248-255 */ - unsigned char Reserved5[256]; /* Bytes 256-511 */ -} -DAC960_V2_PhysicalDeviceInfo_T; - - -/* - Define the DAC960 V2 Firmware Health Status Buffer structure. -*/ - -typedef struct DAC960_V2_HealthStatusBuffer -{ - unsigned int MicrosecondsFromControllerStartTime; /* Bytes 0-3 */ - unsigned int MillisecondsFromControllerStartTime; /* Bytes 4-7 */ - unsigned int SecondsFrom1January1970; /* Bytes 8-11 */ - unsigned int :32; /* Bytes 12-15 */ - unsigned int StatusChangeCounter; /* Bytes 16-19 */ - unsigned int :32; /* Bytes 20-23 */ - unsigned int DebugOutputMessageBufferIndex; /* Bytes 24-27 */ - unsigned int CodedMessageBufferIndex; /* Bytes 28-31 */ - unsigned int CurrentTimeTracePageNumber; /* Bytes 32-35 */ - unsigned int CurrentProfilerPageNumber; /* Bytes 36-39 */ - unsigned int NextEventSequenceNumber; /* Bytes 40-43 */ - unsigned int :32; /* Bytes 44-47 */ - unsigned char Reserved1[16]; /* Bytes 48-63 */ - unsigned char Reserved2[64]; /* Bytes 64-127 */ -} -DAC960_V2_HealthStatusBuffer_T; - - -/* - Define the DAC960 V2 Firmware Get Event reply structure. -*/ - -typedef struct DAC960_V2_Event -{ - unsigned int EventSequenceNumber; /* Bytes 0-3 */ - unsigned int EventTime; /* Bytes 4-7 */ - unsigned int EventCode; /* Bytes 8-11 */ - unsigned char :8; /* Byte 12 */ - unsigned char Channel; /* Byte 13 */ - unsigned char TargetID; /* Byte 14 */ - unsigned char LogicalUnit; /* Byte 15 */ - unsigned int :32; /* Bytes 16-19 */ - unsigned int EventSpecificParameter; /* Bytes 20-23 */ - unsigned char RequestSenseData[40]; /* Bytes 24-63 */ -} -DAC960_V2_Event_T; - - -/* - Define the DAC960 V2 Firmware Command Control Bits structure. -*/ - -typedef struct DAC960_V2_CommandControlBits -{ - bool ForceUnitAccess:1; /* Byte 0 Bit 0 */ - bool DisablePageOut:1; /* Byte 0 Bit 1 */ - bool :1; /* Byte 0 Bit 2 */ - bool AdditionalScatterGatherListMemory:1; /* Byte 0 Bit 3 */ - bool DataTransferControllerToHost:1; /* Byte 0 Bit 4 */ - bool :1; /* Byte 0 Bit 5 */ - bool NoAutoRequestSense:1; /* Byte 0 Bit 6 */ - bool DisconnectProhibited:1; /* Byte 0 Bit 7 */ -} -DAC960_V2_CommandControlBits_T; - - -/* - Define the DAC960 V2 Firmware Command Timeout structure. -*/ - -typedef struct DAC960_V2_CommandTimeout -{ - unsigned char TimeoutValue:6; /* Byte 0 Bits 0-5 */ - enum { - DAC960_V2_TimeoutScale_Seconds = 0, - DAC960_V2_TimeoutScale_Minutes = 1, - DAC960_V2_TimeoutScale_Hours = 2, - DAC960_V2_TimeoutScale_Reserved = 3 - } __attribute__ ((packed)) TimeoutScale:2; /* Byte 0 Bits 6-7 */ -} -DAC960_V2_CommandTimeout_T; - - -/* - Define the DAC960 V2 Firmware Physical Device structure. -*/ - -typedef struct DAC960_V2_PhysicalDevice -{ - unsigned char LogicalUnit; /* Byte 0 */ - unsigned char TargetID; /* Byte 1 */ - unsigned char Channel:3; /* Byte 2 Bits 0-2 */ - unsigned char Controller:5; /* Byte 2 Bits 3-7 */ -} -__attribute__ ((packed)) -DAC960_V2_PhysicalDevice_T; - - -/* - Define the DAC960 V2 Firmware Logical Device structure. -*/ - -typedef struct DAC960_V2_LogicalDevice -{ - unsigned short LogicalDeviceNumber; /* Bytes 0-1 */ - unsigned char :3; /* Byte 2 Bits 0-2 */ - unsigned char Controller:5; /* Byte 2 Bits 3-7 */ -} -__attribute__ ((packed)) -DAC960_V2_LogicalDevice_T; - - -/* - Define the DAC960 V2 Firmware Operation Device type. -*/ - -typedef enum -{ - DAC960_V2_Physical_Device = 0x00, - DAC960_V2_RAID_Device = 0x01, - DAC960_V2_Physical_Channel = 0x02, - DAC960_V2_RAID_Channel = 0x03, - DAC960_V2_Physical_Controller = 0x04, - DAC960_V2_RAID_Controller = 0x05, - DAC960_V2_Configuration_Group = 0x10, - DAC960_V2_Enclosure = 0x11 -} -__attribute__ ((packed)) -DAC960_V2_OperationDevice_T; - - -/* - Define the DAC960 V2 Firmware Translate Physical To Logical Device structure. -*/ - -typedef struct DAC960_V2_PhysicalToLogicalDevice -{ - unsigned short LogicalDeviceNumber; /* Bytes 0-1 */ - unsigned short :16; /* Bytes 2-3 */ - unsigned char PreviousBootController; /* Byte 4 */ - unsigned char PreviousBootChannel; /* Byte 5 */ - unsigned char PreviousBootTargetID; /* Byte 6 */ - unsigned char PreviousBootLogicalUnit; /* Byte 7 */ -} -DAC960_V2_PhysicalToLogicalDevice_T; - - - -/* - Define the DAC960 V2 Firmware Scatter/Gather List Entry structure. -*/ - -typedef struct DAC960_V2_ScatterGatherSegment -{ - DAC960_BusAddress64_T SegmentDataPointer; /* Bytes 0-7 */ - DAC960_ByteCount64_T SegmentByteCount; /* Bytes 8-15 */ -} -DAC960_V2_ScatterGatherSegment_T; - - -/* - Define the DAC960 V2 Firmware Data Transfer Memory Address structure. -*/ - -typedef union DAC960_V2_DataTransferMemoryAddress -{ - DAC960_V2_ScatterGatherSegment_T ScatterGatherSegments[2]; /* Bytes 0-31 */ - struct { - unsigned short ScatterGatherList0Length; /* Bytes 0-1 */ - unsigned short ScatterGatherList1Length; /* Bytes 2-3 */ - unsigned short ScatterGatherList2Length; /* Bytes 4-5 */ - unsigned short :16; /* Bytes 6-7 */ - DAC960_BusAddress64_T ScatterGatherList0Address; /* Bytes 8-15 */ - DAC960_BusAddress64_T ScatterGatherList1Address; /* Bytes 16-23 */ - DAC960_BusAddress64_T ScatterGatherList2Address; /* Bytes 24-31 */ - } ExtendedScatterGather; -} -DAC960_V2_DataTransferMemoryAddress_T; - - -/* - Define the 64 Byte DAC960 V2 Firmware Command Mailbox structure. -*/ - -typedef union DAC960_V2_CommandMailbox -{ - unsigned int Words[16]; /* Words 0-15 */ - struct { - DAC960_V2_CommandIdentifier_T CommandIdentifier; /* Bytes 0-1 */ - DAC960_V2_CommandOpcode_T CommandOpcode; /* Byte 2 */ - DAC960_V2_CommandControlBits_T CommandControlBits; /* Byte 3 */ - DAC960_ByteCount32_T DataTransferSize:24; /* Bytes 4-6 */ - unsigned char DataTransferPageNumber; /* Byte 7 */ - DAC960_BusAddress64_T RequestSenseBusAddress; /* Bytes 8-15 */ - unsigned int :24; /* Bytes 16-18 */ - DAC960_V2_CommandTimeout_T CommandTimeout; /* Byte 19 */ - unsigned char RequestSenseSize; /* Byte 20 */ - unsigned char IOCTL_Opcode; /* Byte 21 */ - unsigned char Reserved[10]; /* Bytes 22-31 */ - DAC960_V2_DataTransferMemoryAddress_T - DataTransferMemoryAddress; /* Bytes 32-63 */ - } Common; - struct { - DAC960_V2_CommandIdentifier_T CommandIdentifier; /* Bytes 0-1 */ - DAC960_V2_CommandOpcode_T CommandOpcode; /* Byte 2 */ - DAC960_V2_CommandControlBits_T CommandControlBits; /* Byte 3 */ - DAC960_ByteCount32_T DataTransferSize; /* Bytes 4-7 */ - DAC960_BusAddress64_T RequestSenseBusAddress; /* Bytes 8-15 */ - DAC960_V2_PhysicalDevice_T PhysicalDevice; /* Bytes 16-18 */ - DAC960_V2_CommandTimeout_T CommandTimeout; /* Byte 19 */ - unsigned char RequestSenseSize; /* Byte 20 */ - unsigned char CDBLength; /* Byte 21 */ - unsigned char SCSI_CDB[10]; /* Bytes 22-31 */ - DAC960_V2_DataTransferMemoryAddress_T - DataTransferMemoryAddress; /* Bytes 32-63 */ - } SCSI_10; - struct { - DAC960_V2_CommandIdentifier_T CommandIdentifier; /* Bytes 0-1 */ - DAC960_V2_CommandOpcode_T CommandOpcode; /* Byte 2 */ - DAC960_V2_CommandControlBits_T CommandControlBits; /* Byte 3 */ - DAC960_ByteCount32_T DataTransferSize; /* Bytes 4-7 */ - DAC960_BusAddress64_T RequestSenseBusAddress; /* Bytes 8-15 */ - DAC960_V2_PhysicalDevice_T PhysicalDevice; /* Bytes 16-18 */ - DAC960_V2_CommandTimeout_T CommandTimeout; /* Byte 19 */ - unsigned char RequestSenseSize; /* Byte 20 */ - unsigned char CDBLength; /* Byte 21 */ - unsigned short :16; /* Bytes 22-23 */ - DAC960_BusAddress64_T SCSI_CDB_BusAddress; /* Bytes 24-31 */ - DAC960_V2_DataTransferMemoryAddress_T - DataTransferMemoryAddress; /* Bytes 32-63 */ - } SCSI_255; - struct { - DAC960_V2_CommandIdentifier_T CommandIdentifier; /* Bytes 0-1 */ - DAC960_V2_CommandOpcode_T CommandOpcode; /* Byte 2 */ - DAC960_V2_CommandControlBits_T CommandControlBits; /* Byte 3 */ - DAC960_ByteCount32_T DataTransferSize:24; /* Bytes 4-6 */ - unsigned char DataTransferPageNumber; /* Byte 7 */ - DAC960_BusAddress64_T RequestSenseBusAddress; /* Bytes 8-15 */ - unsigned short :16; /* Bytes 16-17 */ - unsigned char ControllerNumber; /* Byte 18 */ - DAC960_V2_CommandTimeout_T CommandTimeout; /* Byte 19 */ - unsigned char RequestSenseSize; /* Byte 20 */ - unsigned char IOCTL_Opcode; /* Byte 21 */ - unsigned char Reserved[10]; /* Bytes 22-31 */ - DAC960_V2_DataTransferMemoryAddress_T - DataTransferMemoryAddress; /* Bytes 32-63 */ - } ControllerInfo; - struct { - DAC960_V2_CommandIdentifier_T CommandIdentifier; /* Bytes 0-1 */ - DAC960_V2_CommandOpcode_T CommandOpcode; /* Byte 2 */ - DAC960_V2_CommandControlBits_T CommandControlBits; /* Byte 3 */ - DAC960_ByteCount32_T DataTransferSize:24; /* Bytes 4-6 */ - unsigned char DataTransferPageNumber; /* Byte 7 */ - DAC960_BusAddress64_T RequestSenseBusAddress; /* Bytes 8-15 */ - DAC960_V2_LogicalDevice_T LogicalDevice; /* Bytes 16-18 */ - DAC960_V2_CommandTimeout_T CommandTimeout; /* Byte 19 */ - unsigned char RequestSenseSize; /* Byte 20 */ - unsigned char IOCTL_Opcode; /* Byte 21 */ - unsigned char Reserved[10]; /* Bytes 22-31 */ - DAC960_V2_DataTransferMemoryAddress_T - DataTransferMemoryAddress; /* Bytes 32-63 */ - } LogicalDeviceInfo; - struct { - DAC960_V2_CommandIdentifier_T CommandIdentifier; /* Bytes 0-1 */ - DAC960_V2_CommandOpcode_T CommandOpcode; /* Byte 2 */ - DAC960_V2_CommandControlBits_T CommandControlBits; /* Byte 3 */ - DAC960_ByteCount32_T DataTransferSize:24; /* Bytes 4-6 */ - unsigned char DataTransferPageNumber; /* Byte 7 */ - DAC960_BusAddress64_T RequestSenseBusAddress; /* Bytes 8-15 */ - DAC960_V2_PhysicalDevice_T PhysicalDevice; /* Bytes 16-18 */ - DAC960_V2_CommandTimeout_T CommandTimeout; /* Byte 19 */ - unsigned char RequestSenseSize; /* Byte 20 */ - unsigned char IOCTL_Opcode; /* Byte 21 */ - unsigned char Reserved[10]; /* Bytes 22-31 */ - DAC960_V2_DataTransferMemoryAddress_T - DataTransferMemoryAddress; /* Bytes 32-63 */ - } PhysicalDeviceInfo; - struct { - DAC960_V2_CommandIdentifier_T CommandIdentifier; /* Bytes 0-1 */ - DAC960_V2_CommandOpcode_T CommandOpcode; /* Byte 2 */ - DAC960_V2_CommandControlBits_T CommandControlBits; /* Byte 3 */ - DAC960_ByteCount32_T DataTransferSize:24; /* Bytes 4-6 */ - unsigned char DataTransferPageNumber; /* Byte 7 */ - DAC960_BusAddress64_T RequestSenseBusAddress; /* Bytes 8-15 */ - unsigned short EventSequenceNumberHigh16; /* Bytes 16-17 */ - unsigned char ControllerNumber; /* Byte 18 */ - DAC960_V2_CommandTimeout_T CommandTimeout; /* Byte 19 */ - unsigned char RequestSenseSize; /* Byte 20 */ - unsigned char IOCTL_Opcode; /* Byte 21 */ - unsigned short EventSequenceNumberLow16; /* Bytes 22-23 */ - unsigned char Reserved[8]; /* Bytes 24-31 */ - DAC960_V2_DataTransferMemoryAddress_T - DataTransferMemoryAddress; /* Bytes 32-63 */ - } GetEvent; - struct { - DAC960_V2_CommandIdentifier_T CommandIdentifier; /* Bytes 0-1 */ - DAC960_V2_CommandOpcode_T CommandOpcode; /* Byte 2 */ - DAC960_V2_CommandControlBits_T CommandControlBits; /* Byte 3 */ - DAC960_ByteCount32_T DataTransferSize:24; /* Bytes 4-6 */ - unsigned char DataTransferPageNumber; /* Byte 7 */ - DAC960_BusAddress64_T RequestSenseBusAddress; /* Bytes 8-15 */ - DAC960_V2_LogicalDevice_T LogicalDevice; /* Bytes 16-18 */ - DAC960_V2_CommandTimeout_T CommandTimeout; /* Byte 19 */ - unsigned char RequestSenseSize; /* Byte 20 */ - unsigned char IOCTL_Opcode; /* Byte 21 */ - union { - DAC960_V2_LogicalDeviceState_T LogicalDeviceState; - DAC960_V2_PhysicalDeviceState_T PhysicalDeviceState; - } DeviceState; /* Byte 22 */ - unsigned char Reserved[9]; /* Bytes 23-31 */ - DAC960_V2_DataTransferMemoryAddress_T - DataTransferMemoryAddress; /* Bytes 32-63 */ - } SetDeviceState; - struct { - DAC960_V2_CommandIdentifier_T CommandIdentifier; /* Bytes 0-1 */ - DAC960_V2_CommandOpcode_T CommandOpcode; /* Byte 2 */ - DAC960_V2_CommandControlBits_T CommandControlBits; /* Byte 3 */ - DAC960_ByteCount32_T DataTransferSize:24; /* Bytes 4-6 */ - unsigned char DataTransferPageNumber; /* Byte 7 */ - DAC960_BusAddress64_T RequestSenseBusAddress; /* Bytes 8-15 */ - DAC960_V2_LogicalDevice_T LogicalDevice; /* Bytes 16-18 */ - DAC960_V2_CommandTimeout_T CommandTimeout; /* Byte 19 */ - unsigned char RequestSenseSize; /* Byte 20 */ - unsigned char IOCTL_Opcode; /* Byte 21 */ - bool RestoreConsistency:1; /* Byte 22 Bit 0 */ - bool InitializedAreaOnly:1; /* Byte 22 Bit 1 */ - unsigned char :6; /* Byte 22 Bits 2-7 */ - unsigned char Reserved[9]; /* Bytes 23-31 */ - DAC960_V2_DataTransferMemoryAddress_T - DataTransferMemoryAddress; /* Bytes 32-63 */ - } ConsistencyCheck; - struct { - DAC960_V2_CommandIdentifier_T CommandIdentifier; /* Bytes 0-1 */ - DAC960_V2_CommandOpcode_T CommandOpcode; /* Byte 2 */ - DAC960_V2_CommandControlBits_T CommandControlBits; /* Byte 3 */ - unsigned char FirstCommandMailboxSizeKB; /* Byte 4 */ - unsigned char FirstStatusMailboxSizeKB; /* Byte 5 */ - unsigned char SecondCommandMailboxSizeKB; /* Byte 6 */ - unsigned char SecondStatusMailboxSizeKB; /* Byte 7 */ - DAC960_BusAddress64_T RequestSenseBusAddress; /* Bytes 8-15 */ - unsigned int :24; /* Bytes 16-18 */ - DAC960_V2_CommandTimeout_T CommandTimeout; /* Byte 19 */ - unsigned char RequestSenseSize; /* Byte 20 */ - unsigned char IOCTL_Opcode; /* Byte 21 */ - unsigned char HealthStatusBufferSizeKB; /* Byte 22 */ - unsigned char :8; /* Byte 23 */ - DAC960_BusAddress64_T HealthStatusBufferBusAddress; /* Bytes 24-31 */ - DAC960_BusAddress64_T FirstCommandMailboxBusAddress; /* Bytes 32-39 */ - DAC960_BusAddress64_T FirstStatusMailboxBusAddress; /* Bytes 40-47 */ - DAC960_BusAddress64_T SecondCommandMailboxBusAddress; /* Bytes 48-55 */ - DAC960_BusAddress64_T SecondStatusMailboxBusAddress; /* Bytes 56-63 */ - } SetMemoryMailbox; - struct { - DAC960_V2_CommandIdentifier_T CommandIdentifier; /* Bytes 0-1 */ - DAC960_V2_CommandOpcode_T CommandOpcode; /* Byte 2 */ - DAC960_V2_CommandControlBits_T CommandControlBits; /* Byte 3 */ - DAC960_ByteCount32_T DataTransferSize:24; /* Bytes 4-6 */ - unsigned char DataTransferPageNumber; /* Byte 7 */ - DAC960_BusAddress64_T RequestSenseBusAddress; /* Bytes 8-15 */ - DAC960_V2_PhysicalDevice_T PhysicalDevice; /* Bytes 16-18 */ - DAC960_V2_CommandTimeout_T CommandTimeout; /* Byte 19 */ - unsigned char RequestSenseSize; /* Byte 20 */ - unsigned char IOCTL_Opcode; /* Byte 21 */ - DAC960_V2_OperationDevice_T OperationDevice; /* Byte 22 */ - unsigned char Reserved[9]; /* Bytes 23-31 */ - DAC960_V2_DataTransferMemoryAddress_T - DataTransferMemoryAddress; /* Bytes 32-63 */ - } DeviceOperation; -} -DAC960_V2_CommandMailbox_T; - - -/* - Define the DAC960 Driver IOCTL requests. -*/ - -#define DAC960_IOCTL_GET_CONTROLLER_COUNT 0xDAC001 -#define DAC960_IOCTL_GET_CONTROLLER_INFO 0xDAC002 -#define DAC960_IOCTL_V1_EXECUTE_COMMAND 0xDAC003 -#define DAC960_IOCTL_V2_EXECUTE_COMMAND 0xDAC004 -#define DAC960_IOCTL_V2_GET_HEALTH_STATUS 0xDAC005 - - -/* - Define the DAC960_IOCTL_GET_CONTROLLER_INFO reply structure. -*/ - -typedef struct DAC960_ControllerInfo -{ - unsigned char ControllerNumber; - unsigned char FirmwareType; - unsigned char Channels; - unsigned char Targets; - unsigned char PCI_Bus; - unsigned char PCI_Device; - unsigned char PCI_Function; - unsigned char IRQ_Channel; - DAC960_PCI_Address_T PCI_Address; - unsigned char ModelName[20]; - unsigned char FirmwareVersion[12]; -} -DAC960_ControllerInfo_T; - - -/* - Define the User Mode DAC960_IOCTL_V1_EXECUTE_COMMAND request structure. -*/ - -typedef struct DAC960_V1_UserCommand -{ - unsigned char ControllerNumber; - DAC960_V1_CommandMailbox_T CommandMailbox; - int DataTransferLength; - void __user *DataTransferBuffer; - DAC960_V1_DCDB_T __user *DCDB; -} -DAC960_V1_UserCommand_T; - - -/* - Define the Kernel Mode DAC960_IOCTL_V1_EXECUTE_COMMAND request structure. -*/ - -typedef struct DAC960_V1_KernelCommand -{ - unsigned char ControllerNumber; - DAC960_V1_CommandMailbox_T CommandMailbox; - int DataTransferLength; - void *DataTransferBuffer; - DAC960_V1_DCDB_T *DCDB; - DAC960_V1_CommandStatus_T CommandStatus; - void (*CompletionFunction)(struct DAC960_V1_KernelCommand *); - void *CompletionData; -} -DAC960_V1_KernelCommand_T; - - -/* - Define the User Mode DAC960_IOCTL_V2_EXECUTE_COMMAND request structure. -*/ - -typedef struct DAC960_V2_UserCommand -{ - unsigned char ControllerNumber; - DAC960_V2_CommandMailbox_T CommandMailbox; - int DataTransferLength; - int RequestSenseLength; - void __user *DataTransferBuffer; - void __user *RequestSenseBuffer; -} -DAC960_V2_UserCommand_T; - - -/* - Define the Kernel Mode DAC960_IOCTL_V2_EXECUTE_COMMAND request structure. -*/ - -typedef struct DAC960_V2_KernelCommand -{ - unsigned char ControllerNumber; - DAC960_V2_CommandMailbox_T CommandMailbox; - int DataTransferLength; - int RequestSenseLength; - void *DataTransferBuffer; - void *RequestSenseBuffer; - DAC960_V2_CommandStatus_T CommandStatus; - void (*CompletionFunction)(struct DAC960_V2_KernelCommand *); - void *CompletionData; -} -DAC960_V2_KernelCommand_T; - - -/* - Define the User Mode DAC960_IOCTL_V2_GET_HEALTH_STATUS request structure. -*/ - -typedef struct DAC960_V2_GetHealthStatus -{ - unsigned char ControllerNumber; - DAC960_V2_HealthStatusBuffer_T __user *HealthStatusBuffer; -} -DAC960_V2_GetHealthStatus_T; - - -/* - Import the Kernel Mode IOCTL interface. -*/ - -extern int DAC960_KernelIOCTL(unsigned int Request, void *Argument); - - -/* - DAC960_DriverVersion protects the private portion of this file. -*/ - -#ifdef DAC960_DriverVersion - - -/* - Define the maximum Driver Queue Depth and Controller Queue Depth supported - by DAC960 V1 and V2 Firmware Controllers. -*/ - -#define DAC960_MaxDriverQueueDepth 511 -#define DAC960_MaxControllerQueueDepth 512 - - -/* - Define the maximum number of Scatter/Gather Segments supported for any - DAC960 V1 and V2 Firmware controller. -*/ - -#define DAC960_V1_ScatterGatherLimit 33 -#define DAC960_V2_ScatterGatherLimit 128 - - -/* - Define the number of Command Mailboxes and Status Mailboxes used by the - DAC960 V1 and V2 Firmware Memory Mailbox Interface. -*/ - -#define DAC960_V1_CommandMailboxCount 256 -#define DAC960_V1_StatusMailboxCount 1024 -#define DAC960_V2_CommandMailboxCount 512 -#define DAC960_V2_StatusMailboxCount 512 - - -/* - Define the DAC960 Controller Monitoring Timer Interval. -*/ - -#define DAC960_MonitoringTimerInterval (10 * HZ) - - -/* - Define the DAC960 Controller Secondary Monitoring Interval. -*/ - -#define DAC960_SecondaryMonitoringInterval (60 * HZ) - - -/* - Define the DAC960 Controller Health Status Monitoring Interval. -*/ - -#define DAC960_HealthStatusMonitoringInterval (1 * HZ) - - -/* - Define the DAC960 Controller Progress Reporting Interval. -*/ - -#define DAC960_ProgressReportingInterval (60 * HZ) - - -/* - Define the maximum number of Partitions allowed for each Logical Drive. -*/ - -#define DAC960_MaxPartitions 8 -#define DAC960_MaxPartitionsBits 3 - -/* - Define the DAC960 Controller fixed Block Size and Block Size Bits. -*/ - -#define DAC960_BlockSize 512 -#define DAC960_BlockSizeBits 9 - - -/* - Define the number of Command structures that should be allocated as a - group to optimize kernel memory allocation. -*/ - -#define DAC960_V1_CommandAllocationGroupSize 11 -#define DAC960_V2_CommandAllocationGroupSize 29 - - -/* - Define the Controller Line Buffer, Progress Buffer, User Message, and - Initial Status Buffer sizes. -*/ - -#define DAC960_LineBufferSize 100 -#define DAC960_ProgressBufferSize 200 -#define DAC960_UserMessageSize 200 -#define DAC960_InitialStatusBufferSize (8192-32) - - -/* - Define the DAC960 Controller Firmware Types. -*/ - -typedef enum -{ - DAC960_V1_Controller = 1, - DAC960_V2_Controller = 2 -} -DAC960_FirmwareType_T; - - -/* - Define the DAC960 Controller Hardware Types. -*/ - -typedef enum -{ - DAC960_BA_Controller = 1, /* eXtremeRAID 2000 */ - DAC960_LP_Controller = 2, /* AcceleRAID 352 */ - DAC960_LA_Controller = 3, /* DAC1164P */ - DAC960_PG_Controller = 4, /* DAC960PTL/PJ/PG */ - DAC960_PD_Controller = 5, /* DAC960PU/PD/PL/P */ - DAC960_P_Controller = 6, /* DAC960PU/PD/PL/P */ - DAC960_GEM_Controller = 7, /* AcceleRAID 4/5/600 */ -} -DAC960_HardwareType_T; - - -/* - Define the Driver Message Levels. -*/ - -typedef enum DAC960_MessageLevel -{ - DAC960_AnnounceLevel = 0, - DAC960_InfoLevel = 1, - DAC960_NoticeLevel = 2, - DAC960_WarningLevel = 3, - DAC960_ErrorLevel = 4, - DAC960_ProgressLevel = 5, - DAC960_CriticalLevel = 6, - DAC960_UserCriticalLevel = 7 -} -DAC960_MessageLevel_T; - -static char - *DAC960_MessageLevelMap[] = - { KERN_NOTICE, KERN_NOTICE, KERN_NOTICE, KERN_WARNING, - KERN_ERR, KERN_CRIT, KERN_CRIT, KERN_CRIT }; - - -/* - Define Driver Message macros. -*/ - -#define DAC960_Announce(Format, Arguments...) \ - DAC960_Message(DAC960_AnnounceLevel, Format, ##Arguments) - -#define DAC960_Info(Format, Arguments...) \ - DAC960_Message(DAC960_InfoLevel, Format, ##Arguments) - -#define DAC960_Notice(Format, Arguments...) \ - DAC960_Message(DAC960_NoticeLevel, Format, ##Arguments) - -#define DAC960_Warning(Format, Arguments...) \ - DAC960_Message(DAC960_WarningLevel, Format, ##Arguments) - -#define DAC960_Error(Format, Arguments...) \ - DAC960_Message(DAC960_ErrorLevel, Format, ##Arguments) - -#define DAC960_Progress(Format, Arguments...) \ - DAC960_Message(DAC960_ProgressLevel, Format, ##Arguments) - -#define DAC960_Critical(Format, Arguments...) \ - DAC960_Message(DAC960_CriticalLevel, Format, ##Arguments) - -#define DAC960_UserCritical(Format, Arguments...) \ - DAC960_Message(DAC960_UserCriticalLevel, Format, ##Arguments) - - -struct DAC960_privdata { - DAC960_HardwareType_T HardwareType; - DAC960_FirmwareType_T FirmwareType; - irq_handler_t InterruptHandler; - unsigned int MemoryWindowSize; -}; - - -/* - Define the DAC960 V1 Firmware Controller Status Mailbox structure. -*/ - -typedef union DAC960_V1_StatusMailbox -{ - unsigned int Word; /* Word 0 */ - struct { - DAC960_V1_CommandIdentifier_T CommandIdentifier; /* Byte 0 */ - unsigned char :7; /* Byte 1 Bits 0-6 */ - bool Valid:1; /* Byte 1 Bit 7 */ - DAC960_V1_CommandStatus_T CommandStatus; /* Bytes 2-3 */ - } Fields; -} -DAC960_V1_StatusMailbox_T; - - -/* - Define the DAC960 V2 Firmware Controller Status Mailbox structure. -*/ - -typedef union DAC960_V2_StatusMailbox -{ - unsigned int Words[2]; /* Words 0-1 */ - struct { - DAC960_V2_CommandIdentifier_T CommandIdentifier; /* Bytes 0-1 */ - DAC960_V2_CommandStatus_T CommandStatus; /* Byte 2 */ - unsigned char RequestSenseLength; /* Byte 3 */ - int DataTransferResidue; /* Bytes 4-7 */ - } Fields; -} -DAC960_V2_StatusMailbox_T; - - -/* - Define the DAC960 Driver Command Types. -*/ - -typedef enum -{ - DAC960_ReadCommand = 1, - DAC960_WriteCommand = 2, - DAC960_ReadRetryCommand = 3, - DAC960_WriteRetryCommand = 4, - DAC960_MonitoringCommand = 5, - DAC960_ImmediateCommand = 6, - DAC960_QueuedCommand = 7 -} -DAC960_CommandType_T; - - -/* - Define the DAC960 Driver Command structure. -*/ - -typedef struct DAC960_Command -{ - int CommandIdentifier; - DAC960_CommandType_T CommandType; - struct DAC960_Controller *Controller; - struct DAC960_Command *Next; - struct completion *Completion; - unsigned int LogicalDriveNumber; - unsigned int BlockNumber; - unsigned int BlockCount; - unsigned int SegmentCount; - int DmaDirection; - struct scatterlist *cmd_sglist; - struct request *Request; - union { - struct { - DAC960_V1_CommandMailbox_T CommandMailbox; - DAC960_V1_KernelCommand_T *KernelCommand; - DAC960_V1_CommandStatus_T CommandStatus; - DAC960_V1_ScatterGatherSegment_T *ScatterGatherList; - dma_addr_t ScatterGatherListDMA; - struct scatterlist ScatterList[DAC960_V1_ScatterGatherLimit]; - unsigned int EndMarker[0]; - } V1; - struct { - DAC960_V2_CommandMailbox_T CommandMailbox; - DAC960_V2_KernelCommand_T *KernelCommand; - DAC960_V2_CommandStatus_T CommandStatus; - unsigned char RequestSenseLength; - int DataTransferResidue; - DAC960_V2_ScatterGatherSegment_T *ScatterGatherList; - dma_addr_t ScatterGatherListDMA; - DAC960_SCSI_RequestSense_T *RequestSense; - dma_addr_t RequestSenseDMA; - struct scatterlist ScatterList[DAC960_V2_ScatterGatherLimit]; - unsigned int EndMarker[0]; - } V2; - } FW; -} -DAC960_Command_T; - - -/* - Define the DAC960 Driver Controller structure. -*/ - -typedef struct DAC960_Controller -{ - void __iomem *BaseAddress; - void __iomem *MemoryMappedAddress; - DAC960_FirmwareType_T FirmwareType; - DAC960_HardwareType_T HardwareType; - DAC960_IO_Address_T IO_Address; - DAC960_PCI_Address_T PCI_Address; - struct pci_dev *PCIDevice; - unsigned char ControllerNumber; - unsigned char ControllerName[4]; - unsigned char ModelName[20]; - unsigned char FullModelName[28]; - unsigned char FirmwareVersion[12]; - unsigned char Bus; - unsigned char Device; - unsigned char Function; - unsigned char IRQ_Channel; - unsigned char Channels; - unsigned char Targets; - unsigned char MemorySize; - unsigned char LogicalDriveCount; - unsigned short CommandAllocationGroupSize; - unsigned short ControllerQueueDepth; - unsigned short DriverQueueDepth; - unsigned short MaxBlocksPerCommand; - unsigned short ControllerScatterGatherLimit; - unsigned short DriverScatterGatherLimit; - unsigned int CombinedStatusBufferLength; - unsigned int InitialStatusLength; - unsigned int CurrentStatusLength; - unsigned int ProgressBufferLength; - unsigned int UserStatusLength; - struct dma_loaf DmaPages; - unsigned long MonitoringTimerCount; - unsigned long PrimaryMonitoringTime; - unsigned long SecondaryMonitoringTime; - unsigned long ShutdownMonitoringTimer; - unsigned long LastProgressReportTime; - unsigned long LastCurrentStatusTime; - bool ControllerInitialized; - bool MonitoringCommandDeferred; - bool EphemeralProgressMessage; - bool DriveSpinUpMessageDisplayed; - bool MonitoringAlertMode; - bool SuppressEnclosureMessages; - struct timer_list MonitoringTimer; - struct gendisk *disks[DAC960_MaxLogicalDrives]; - struct dma_pool *ScatterGatherPool; - DAC960_Command_T *FreeCommands; - unsigned char *CombinedStatusBuffer; - unsigned char *CurrentStatusBuffer; - struct request_queue *RequestQueue[DAC960_MaxLogicalDrives]; - int req_q_index; - spinlock_t queue_lock; - wait_queue_head_t CommandWaitQueue; - wait_queue_head_t HealthStatusWaitQueue; - DAC960_Command_T InitialCommand; - DAC960_Command_T *Commands[DAC960_MaxDriverQueueDepth]; - struct proc_dir_entry *ControllerProcEntry; - bool LogicalDriveInitiallyAccessible[DAC960_MaxLogicalDrives]; - void (*QueueCommand)(DAC960_Command_T *Command); - bool (*ReadControllerConfiguration)(struct DAC960_Controller *); - bool (*ReadDeviceConfiguration)(struct DAC960_Controller *); - bool (*ReportDeviceConfiguration)(struct DAC960_Controller *); - void (*QueueReadWriteCommand)(DAC960_Command_T *Command); - union { - struct { - unsigned char GeometryTranslationHeads; - unsigned char GeometryTranslationSectors; - unsigned char PendingRebuildFlag; - unsigned short StripeSize; - unsigned short SegmentSize; - unsigned short NewEventLogSequenceNumber; - unsigned short OldEventLogSequenceNumber; - unsigned short DeviceStateChannel; - unsigned short DeviceStateTargetID; - bool DualModeMemoryMailboxInterface; - bool BackgroundInitializationStatusSupported; - bool SAFTE_EnclosureManagementEnabled; - bool NeedLogicalDriveInformation; - bool NeedErrorTableInformation; - bool NeedDeviceStateInformation; - bool NeedDeviceInquiryInformation; - bool NeedDeviceSerialNumberInformation; - bool NeedRebuildProgress; - bool NeedConsistencyCheckProgress; - bool NeedBackgroundInitializationStatus; - bool StartDeviceStateScan; - bool RebuildProgressFirst; - bool RebuildFlagPending; - bool RebuildStatusPending; - - dma_addr_t FirstCommandMailboxDMA; - DAC960_V1_CommandMailbox_T *FirstCommandMailbox; - DAC960_V1_CommandMailbox_T *LastCommandMailbox; - DAC960_V1_CommandMailbox_T *NextCommandMailbox; - DAC960_V1_CommandMailbox_T *PreviousCommandMailbox1; - DAC960_V1_CommandMailbox_T *PreviousCommandMailbox2; - - dma_addr_t FirstStatusMailboxDMA; - DAC960_V1_StatusMailbox_T *FirstStatusMailbox; - DAC960_V1_StatusMailbox_T *LastStatusMailbox; - DAC960_V1_StatusMailbox_T *NextStatusMailbox; - - DAC960_V1_DCDB_T *MonitoringDCDB; - dma_addr_t MonitoringDCDB_DMA; - - DAC960_V1_Enquiry_T Enquiry; - DAC960_V1_Enquiry_T *NewEnquiry; - dma_addr_t NewEnquiryDMA; - - DAC960_V1_ErrorTable_T ErrorTable; - DAC960_V1_ErrorTable_T *NewErrorTable; - dma_addr_t NewErrorTableDMA; - - DAC960_V1_EventLogEntry_T *EventLogEntry; - dma_addr_t EventLogEntryDMA; - - DAC960_V1_RebuildProgress_T *RebuildProgress; - dma_addr_t RebuildProgressDMA; - DAC960_V1_CommandStatus_T LastRebuildStatus; - DAC960_V1_CommandStatus_T PendingRebuildStatus; - - DAC960_V1_LogicalDriveInformationArray_T LogicalDriveInformation; - DAC960_V1_LogicalDriveInformationArray_T *NewLogicalDriveInformation; - dma_addr_t NewLogicalDriveInformationDMA; - - DAC960_V1_BackgroundInitializationStatus_T - *BackgroundInitializationStatus; - dma_addr_t BackgroundInitializationStatusDMA; - DAC960_V1_BackgroundInitializationStatus_T - LastBackgroundInitializationStatus; - - DAC960_V1_DeviceState_T - DeviceState[DAC960_V1_MaxChannels][DAC960_V1_MaxTargets]; - DAC960_V1_DeviceState_T *NewDeviceState; - dma_addr_t NewDeviceStateDMA; - - DAC960_SCSI_Inquiry_T - InquiryStandardData[DAC960_V1_MaxChannels][DAC960_V1_MaxTargets]; - DAC960_SCSI_Inquiry_T *NewInquiryStandardData; - dma_addr_t NewInquiryStandardDataDMA; - - DAC960_SCSI_Inquiry_UnitSerialNumber_T - InquiryUnitSerialNumber[DAC960_V1_MaxChannels][DAC960_V1_MaxTargets]; - DAC960_SCSI_Inquiry_UnitSerialNumber_T *NewInquiryUnitSerialNumber; - dma_addr_t NewInquiryUnitSerialNumberDMA; - - int DeviceResetCount[DAC960_V1_MaxChannels][DAC960_V1_MaxTargets]; - bool DirectCommandActive[DAC960_V1_MaxChannels][DAC960_V1_MaxTargets]; - } V1; - struct { - unsigned int StatusChangeCounter; - unsigned int NextEventSequenceNumber; - unsigned int PhysicalDeviceIndex; - bool NeedLogicalDeviceInformation; - bool NeedPhysicalDeviceInformation; - bool NeedDeviceSerialNumberInformation; - bool StartLogicalDeviceInformationScan; - bool StartPhysicalDeviceInformationScan; - struct dma_pool *RequestSensePool; - - dma_addr_t FirstCommandMailboxDMA; - DAC960_V2_CommandMailbox_T *FirstCommandMailbox; - DAC960_V2_CommandMailbox_T *LastCommandMailbox; - DAC960_V2_CommandMailbox_T *NextCommandMailbox; - DAC960_V2_CommandMailbox_T *PreviousCommandMailbox1; - DAC960_V2_CommandMailbox_T *PreviousCommandMailbox2; - - dma_addr_t FirstStatusMailboxDMA; - DAC960_V2_StatusMailbox_T *FirstStatusMailbox; - DAC960_V2_StatusMailbox_T *LastStatusMailbox; - DAC960_V2_StatusMailbox_T *NextStatusMailbox; - - dma_addr_t HealthStatusBufferDMA; - DAC960_V2_HealthStatusBuffer_T *HealthStatusBuffer; - - DAC960_V2_ControllerInfo_T ControllerInformation; - DAC960_V2_ControllerInfo_T *NewControllerInformation; - dma_addr_t NewControllerInformationDMA; - - DAC960_V2_LogicalDeviceInfo_T - *LogicalDeviceInformation[DAC960_MaxLogicalDrives]; - DAC960_V2_LogicalDeviceInfo_T *NewLogicalDeviceInformation; - dma_addr_t NewLogicalDeviceInformationDMA; - - DAC960_V2_PhysicalDeviceInfo_T - *PhysicalDeviceInformation[DAC960_V2_MaxPhysicalDevices]; - DAC960_V2_PhysicalDeviceInfo_T *NewPhysicalDeviceInformation; - dma_addr_t NewPhysicalDeviceInformationDMA; - - DAC960_SCSI_Inquiry_UnitSerialNumber_T *NewInquiryUnitSerialNumber; - dma_addr_t NewInquiryUnitSerialNumberDMA; - DAC960_SCSI_Inquiry_UnitSerialNumber_T - *InquiryUnitSerialNumber[DAC960_V2_MaxPhysicalDevices]; - - DAC960_V2_Event_T *Event; - dma_addr_t EventDMA; - - DAC960_V2_PhysicalToLogicalDevice_T *PhysicalToLogicalDevice; - dma_addr_t PhysicalToLogicalDeviceDMA; - - DAC960_V2_PhysicalDevice_T - LogicalDriveToVirtualDevice[DAC960_MaxLogicalDrives]; - bool LogicalDriveFoundDuringScan[DAC960_MaxLogicalDrives]; - } V2; - } FW; - unsigned char ProgressBuffer[DAC960_ProgressBufferSize]; - unsigned char UserStatusBuffer[DAC960_UserMessageSize]; -} -DAC960_Controller_T; - - -/* - Simplify access to Firmware Version Dependent Data Structure Components - and Functions. -*/ - -#define V1 FW.V1 -#define V2 FW.V2 -#define DAC960_QueueCommand(Command) \ - (Controller->QueueCommand)(Command) -#define DAC960_ReadControllerConfiguration(Controller) \ - (Controller->ReadControllerConfiguration)(Controller) -#define DAC960_ReadDeviceConfiguration(Controller) \ - (Controller->ReadDeviceConfiguration)(Controller) -#define DAC960_ReportDeviceConfiguration(Controller) \ - (Controller->ReportDeviceConfiguration)(Controller) -#define DAC960_QueueReadWriteCommand(Command) \ - (Controller->QueueReadWriteCommand)(Command) - -/* - * dma_addr_writeql is provided to write dma_addr_t types - * to a 64-bit pci address space register. The controller - * will accept having the register written as two 32-bit - * values. - * - * In HIGHMEM kernels, dma_addr_t is a 64-bit value. - * without HIGHMEM, dma_addr_t is a 32-bit value. - * - * The compiler should always fix up the assignment - * to u.wq appropriately, depending upon the size of - * dma_addr_t. - */ -static inline -void dma_addr_writeql(dma_addr_t addr, void __iomem *write_address) -{ - union { - u64 wq; - uint wl[2]; - } u; - - u.wq = addr; - - writel(u.wl[0], write_address); - writel(u.wl[1], write_address + 4); -} - -/* - Define the DAC960 GEM Series Controller Interface Register Offsets. - */ - -#define DAC960_GEM_RegisterWindowSize 0x600 - -typedef enum -{ - DAC960_GEM_InboundDoorBellRegisterReadSetOffset = 0x214, - DAC960_GEM_InboundDoorBellRegisterClearOffset = 0x218, - DAC960_GEM_OutboundDoorBellRegisterReadSetOffset = 0x224, - DAC960_GEM_OutboundDoorBellRegisterClearOffset = 0x228, - DAC960_GEM_InterruptStatusRegisterOffset = 0x208, - DAC960_GEM_InterruptMaskRegisterReadSetOffset = 0x22C, - DAC960_GEM_InterruptMaskRegisterClearOffset = 0x230, - DAC960_GEM_CommandMailboxBusAddressOffset = 0x510, - DAC960_GEM_CommandStatusOffset = 0x518, - DAC960_GEM_ErrorStatusRegisterReadSetOffset = 0x224, - DAC960_GEM_ErrorStatusRegisterClearOffset = 0x228, -} -DAC960_GEM_RegisterOffsets_T; - -/* - Define the structure of the DAC960 GEM Series Inbound Door Bell - */ - -typedef union DAC960_GEM_InboundDoorBellRegister -{ - unsigned int All; - struct { - unsigned int :24; - bool HardwareMailboxNewCommand:1; - bool AcknowledgeHardwareMailboxStatus:1; - bool GenerateInterrupt:1; - bool ControllerReset:1; - bool MemoryMailboxNewCommand:1; - unsigned int :3; - } Write; - struct { - unsigned int :24; - bool HardwareMailboxFull:1; - bool InitializationInProgress:1; - unsigned int :6; - } Read; -} -DAC960_GEM_InboundDoorBellRegister_T; - -/* - Define the structure of the DAC960 GEM Series Outbound Door Bell Register. - */ -typedef union DAC960_GEM_OutboundDoorBellRegister -{ - unsigned int All; - struct { - unsigned int :24; - bool AcknowledgeHardwareMailboxInterrupt:1; - bool AcknowledgeMemoryMailboxInterrupt:1; - unsigned int :6; - } Write; - struct { - unsigned int :24; - bool HardwareMailboxStatusAvailable:1; - bool MemoryMailboxStatusAvailable:1; - unsigned int :6; - } Read; -} -DAC960_GEM_OutboundDoorBellRegister_T; - -/* - Define the structure of the DAC960 GEM Series Interrupt Mask Register. - */ -typedef union DAC960_GEM_InterruptMaskRegister -{ - unsigned int All; - struct { - unsigned int :16; - unsigned int :8; - unsigned int HardwareMailboxInterrupt:1; - unsigned int MemoryMailboxInterrupt:1; - unsigned int :6; - } Bits; -} -DAC960_GEM_InterruptMaskRegister_T; - -/* - Define the structure of the DAC960 GEM Series Error Status Register. - */ - -typedef union DAC960_GEM_ErrorStatusRegister -{ - unsigned int All; - struct { - unsigned int :24; - unsigned int :5; - bool ErrorStatusPending:1; - unsigned int :2; - } Bits; -} -DAC960_GEM_ErrorStatusRegister_T; - -/* - Define inline functions to provide an abstraction for reading and writing the - DAC960 GEM Series Controller Interface Registers. -*/ - -static inline -void DAC960_GEM_HardwareMailboxNewCommand(void __iomem *ControllerBaseAddress) -{ - DAC960_GEM_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = 0; - InboundDoorBellRegister.Write.HardwareMailboxNewCommand = true; - writel(InboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_GEM_InboundDoorBellRegisterReadSetOffset); -} - -static inline -void DAC960_GEM_AcknowledgeHardwareMailboxStatus(void __iomem *ControllerBaseAddress) -{ - DAC960_GEM_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = 0; - InboundDoorBellRegister.Write.AcknowledgeHardwareMailboxStatus = true; - writel(InboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_GEM_InboundDoorBellRegisterClearOffset); -} - -static inline -void DAC960_GEM_GenerateInterrupt(void __iomem *ControllerBaseAddress) -{ - DAC960_GEM_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = 0; - InboundDoorBellRegister.Write.GenerateInterrupt = true; - writel(InboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_GEM_InboundDoorBellRegisterReadSetOffset); -} - -static inline -void DAC960_GEM_ControllerReset(void __iomem *ControllerBaseAddress) -{ - DAC960_GEM_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = 0; - InboundDoorBellRegister.Write.ControllerReset = true; - writel(InboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_GEM_InboundDoorBellRegisterReadSetOffset); -} - -static inline -void DAC960_GEM_MemoryMailboxNewCommand(void __iomem *ControllerBaseAddress) -{ - DAC960_GEM_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = 0; - InboundDoorBellRegister.Write.MemoryMailboxNewCommand = true; - writel(InboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_GEM_InboundDoorBellRegisterReadSetOffset); -} - -static inline -bool DAC960_GEM_HardwareMailboxFullP(void __iomem *ControllerBaseAddress) -{ - DAC960_GEM_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = - readl(ControllerBaseAddress + - DAC960_GEM_InboundDoorBellRegisterReadSetOffset); - return InboundDoorBellRegister.Read.HardwareMailboxFull; -} - -static inline -bool DAC960_GEM_InitializationInProgressP(void __iomem *ControllerBaseAddress) -{ - DAC960_GEM_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = - readl(ControllerBaseAddress + - DAC960_GEM_InboundDoorBellRegisterReadSetOffset); - return InboundDoorBellRegister.Read.InitializationInProgress; -} - -static inline -void DAC960_GEM_AcknowledgeHardwareMailboxInterrupt(void __iomem *ControllerBaseAddress) -{ - DAC960_GEM_OutboundDoorBellRegister_T OutboundDoorBellRegister; - OutboundDoorBellRegister.All = 0; - OutboundDoorBellRegister.Write.AcknowledgeHardwareMailboxInterrupt = true; - writel(OutboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_GEM_OutboundDoorBellRegisterClearOffset); -} - -static inline -void DAC960_GEM_AcknowledgeMemoryMailboxInterrupt(void __iomem *ControllerBaseAddress) -{ - DAC960_GEM_OutboundDoorBellRegister_T OutboundDoorBellRegister; - OutboundDoorBellRegister.All = 0; - OutboundDoorBellRegister.Write.AcknowledgeMemoryMailboxInterrupt = true; - writel(OutboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_GEM_OutboundDoorBellRegisterClearOffset); -} - -static inline -void DAC960_GEM_AcknowledgeInterrupt(void __iomem *ControllerBaseAddress) -{ - DAC960_GEM_OutboundDoorBellRegister_T OutboundDoorBellRegister; - OutboundDoorBellRegister.All = 0; - OutboundDoorBellRegister.Write.AcknowledgeHardwareMailboxInterrupt = true; - OutboundDoorBellRegister.Write.AcknowledgeMemoryMailboxInterrupt = true; - writel(OutboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_GEM_OutboundDoorBellRegisterClearOffset); -} - -static inline -bool DAC960_GEM_HardwareMailboxStatusAvailableP(void __iomem *ControllerBaseAddress) -{ - DAC960_GEM_OutboundDoorBellRegister_T OutboundDoorBellRegister; - OutboundDoorBellRegister.All = - readl(ControllerBaseAddress + - DAC960_GEM_OutboundDoorBellRegisterReadSetOffset); - return OutboundDoorBellRegister.Read.HardwareMailboxStatusAvailable; -} - -static inline -bool DAC960_GEM_MemoryMailboxStatusAvailableP(void __iomem *ControllerBaseAddress) -{ - DAC960_GEM_OutboundDoorBellRegister_T OutboundDoorBellRegister; - OutboundDoorBellRegister.All = - readl(ControllerBaseAddress + - DAC960_GEM_OutboundDoorBellRegisterReadSetOffset); - return OutboundDoorBellRegister.Read.MemoryMailboxStatusAvailable; -} - -static inline -void DAC960_GEM_EnableInterrupts(void __iomem *ControllerBaseAddress) -{ - DAC960_GEM_InterruptMaskRegister_T InterruptMaskRegister; - InterruptMaskRegister.All = 0; - InterruptMaskRegister.Bits.HardwareMailboxInterrupt = true; - InterruptMaskRegister.Bits.MemoryMailboxInterrupt = true; - writel(InterruptMaskRegister.All, - ControllerBaseAddress + DAC960_GEM_InterruptMaskRegisterClearOffset); -} - -static inline -void DAC960_GEM_DisableInterrupts(void __iomem *ControllerBaseAddress) -{ - DAC960_GEM_InterruptMaskRegister_T InterruptMaskRegister; - InterruptMaskRegister.All = 0; - InterruptMaskRegister.Bits.HardwareMailboxInterrupt = true; - InterruptMaskRegister.Bits.MemoryMailboxInterrupt = true; - writel(InterruptMaskRegister.All, - ControllerBaseAddress + DAC960_GEM_InterruptMaskRegisterReadSetOffset); -} - -static inline -bool DAC960_GEM_InterruptsEnabledP(void __iomem *ControllerBaseAddress) -{ - DAC960_GEM_InterruptMaskRegister_T InterruptMaskRegister; - InterruptMaskRegister.All = - readl(ControllerBaseAddress + - DAC960_GEM_InterruptMaskRegisterReadSetOffset); - return !(InterruptMaskRegister.Bits.HardwareMailboxInterrupt || - InterruptMaskRegister.Bits.MemoryMailboxInterrupt); -} - -static inline -void DAC960_GEM_WriteCommandMailbox(DAC960_V2_CommandMailbox_T - *MemoryCommandMailbox, - DAC960_V2_CommandMailbox_T - *CommandMailbox) -{ - memcpy(&MemoryCommandMailbox->Words[1], &CommandMailbox->Words[1], - sizeof(DAC960_V2_CommandMailbox_T) - sizeof(unsigned int)); - wmb(); - MemoryCommandMailbox->Words[0] = CommandMailbox->Words[0]; - mb(); -} - -static inline -void DAC960_GEM_WriteHardwareMailbox(void __iomem *ControllerBaseAddress, - dma_addr_t CommandMailboxDMA) -{ - dma_addr_writeql(CommandMailboxDMA, - ControllerBaseAddress + - DAC960_GEM_CommandMailboxBusAddressOffset); -} - -static inline DAC960_V2_CommandIdentifier_T -DAC960_GEM_ReadCommandIdentifier(void __iomem *ControllerBaseAddress) -{ - return readw(ControllerBaseAddress + DAC960_GEM_CommandStatusOffset); -} - -static inline DAC960_V2_CommandStatus_T -DAC960_GEM_ReadCommandStatus(void __iomem *ControllerBaseAddress) -{ - return readw(ControllerBaseAddress + DAC960_GEM_CommandStatusOffset + 2); -} - -static inline bool -DAC960_GEM_ReadErrorStatus(void __iomem *ControllerBaseAddress, - unsigned char *ErrorStatus, - unsigned char *Parameter0, - unsigned char *Parameter1) -{ - DAC960_GEM_ErrorStatusRegister_T ErrorStatusRegister; - ErrorStatusRegister.All = - readl(ControllerBaseAddress + DAC960_GEM_ErrorStatusRegisterReadSetOffset); - if (!ErrorStatusRegister.Bits.ErrorStatusPending) return false; - ErrorStatusRegister.Bits.ErrorStatusPending = false; - *ErrorStatus = ErrorStatusRegister.All; - *Parameter0 = - readb(ControllerBaseAddress + DAC960_GEM_CommandMailboxBusAddressOffset + 0); - *Parameter1 = - readb(ControllerBaseAddress + DAC960_GEM_CommandMailboxBusAddressOffset + 1); - writel(0x03000000, ControllerBaseAddress + - DAC960_GEM_ErrorStatusRegisterClearOffset); - return true; -} - -/* - Define the DAC960 BA Series Controller Interface Register Offsets. -*/ - -#define DAC960_BA_RegisterWindowSize 0x80 - -typedef enum -{ - DAC960_BA_InboundDoorBellRegisterOffset = 0x60, - DAC960_BA_OutboundDoorBellRegisterOffset = 0x61, - DAC960_BA_InterruptStatusRegisterOffset = 0x30, - DAC960_BA_InterruptMaskRegisterOffset = 0x34, - DAC960_BA_CommandMailboxBusAddressOffset = 0x50, - DAC960_BA_CommandStatusOffset = 0x58, - DAC960_BA_ErrorStatusRegisterOffset = 0x63 -} -DAC960_BA_RegisterOffsets_T; - - -/* - Define the structure of the DAC960 BA Series Inbound Door Bell Register. -*/ - -typedef union DAC960_BA_InboundDoorBellRegister -{ - unsigned char All; - struct { - bool HardwareMailboxNewCommand:1; /* Bit 0 */ - bool AcknowledgeHardwareMailboxStatus:1; /* Bit 1 */ - bool GenerateInterrupt:1; /* Bit 2 */ - bool ControllerReset:1; /* Bit 3 */ - bool MemoryMailboxNewCommand:1; /* Bit 4 */ - unsigned char :3; /* Bits 5-7 */ - } Write; - struct { - bool HardwareMailboxEmpty:1; /* Bit 0 */ - bool InitializationNotInProgress:1; /* Bit 1 */ - unsigned char :6; /* Bits 2-7 */ - } Read; -} -DAC960_BA_InboundDoorBellRegister_T; - - -/* - Define the structure of the DAC960 BA Series Outbound Door Bell Register. -*/ - -typedef union DAC960_BA_OutboundDoorBellRegister -{ - unsigned char All; - struct { - bool AcknowledgeHardwareMailboxInterrupt:1; /* Bit 0 */ - bool AcknowledgeMemoryMailboxInterrupt:1; /* Bit 1 */ - unsigned char :6; /* Bits 2-7 */ - } Write; - struct { - bool HardwareMailboxStatusAvailable:1; /* Bit 0 */ - bool MemoryMailboxStatusAvailable:1; /* Bit 1 */ - unsigned char :6; /* Bits 2-7 */ - } Read; -} -DAC960_BA_OutboundDoorBellRegister_T; - - -/* - Define the structure of the DAC960 BA Series Interrupt Mask Register. -*/ - -typedef union DAC960_BA_InterruptMaskRegister -{ - unsigned char All; - struct { - unsigned int :2; /* Bits 0-1 */ - bool DisableInterrupts:1; /* Bit 2 */ - bool DisableInterruptsI2O:1; /* Bit 3 */ - unsigned int :4; /* Bits 4-7 */ - } Bits; -} -DAC960_BA_InterruptMaskRegister_T; - - -/* - Define the structure of the DAC960 BA Series Error Status Register. -*/ - -typedef union DAC960_BA_ErrorStatusRegister -{ - unsigned char All; - struct { - unsigned int :2; /* Bits 0-1 */ - bool ErrorStatusPending:1; /* Bit 2 */ - unsigned int :5; /* Bits 3-7 */ - } Bits; -} -DAC960_BA_ErrorStatusRegister_T; - - -/* - Define inline functions to provide an abstraction for reading and writing the - DAC960 BA Series Controller Interface Registers. -*/ - -static inline -void DAC960_BA_HardwareMailboxNewCommand(void __iomem *ControllerBaseAddress) -{ - DAC960_BA_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = 0; - InboundDoorBellRegister.Write.HardwareMailboxNewCommand = true; - writeb(InboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_BA_InboundDoorBellRegisterOffset); -} - -static inline -void DAC960_BA_AcknowledgeHardwareMailboxStatus(void __iomem *ControllerBaseAddress) -{ - DAC960_BA_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = 0; - InboundDoorBellRegister.Write.AcknowledgeHardwareMailboxStatus = true; - writeb(InboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_BA_InboundDoorBellRegisterOffset); -} - -static inline -void DAC960_BA_GenerateInterrupt(void __iomem *ControllerBaseAddress) -{ - DAC960_BA_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = 0; - InboundDoorBellRegister.Write.GenerateInterrupt = true; - writeb(InboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_BA_InboundDoorBellRegisterOffset); -} - -static inline -void DAC960_BA_ControllerReset(void __iomem *ControllerBaseAddress) -{ - DAC960_BA_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = 0; - InboundDoorBellRegister.Write.ControllerReset = true; - writeb(InboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_BA_InboundDoorBellRegisterOffset); -} - -static inline -void DAC960_BA_MemoryMailboxNewCommand(void __iomem *ControllerBaseAddress) -{ - DAC960_BA_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = 0; - InboundDoorBellRegister.Write.MemoryMailboxNewCommand = true; - writeb(InboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_BA_InboundDoorBellRegisterOffset); -} - -static inline -bool DAC960_BA_HardwareMailboxFullP(void __iomem *ControllerBaseAddress) -{ - DAC960_BA_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = - readb(ControllerBaseAddress + DAC960_BA_InboundDoorBellRegisterOffset); - return !InboundDoorBellRegister.Read.HardwareMailboxEmpty; -} - -static inline -bool DAC960_BA_InitializationInProgressP(void __iomem *ControllerBaseAddress) -{ - DAC960_BA_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = - readb(ControllerBaseAddress + DAC960_BA_InboundDoorBellRegisterOffset); - return !InboundDoorBellRegister.Read.InitializationNotInProgress; -} - -static inline -void DAC960_BA_AcknowledgeHardwareMailboxInterrupt(void __iomem *ControllerBaseAddress) -{ - DAC960_BA_OutboundDoorBellRegister_T OutboundDoorBellRegister; - OutboundDoorBellRegister.All = 0; - OutboundDoorBellRegister.Write.AcknowledgeHardwareMailboxInterrupt = true; - writeb(OutboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_BA_OutboundDoorBellRegisterOffset); -} - -static inline -void DAC960_BA_AcknowledgeMemoryMailboxInterrupt(void __iomem *ControllerBaseAddress) -{ - DAC960_BA_OutboundDoorBellRegister_T OutboundDoorBellRegister; - OutboundDoorBellRegister.All = 0; - OutboundDoorBellRegister.Write.AcknowledgeMemoryMailboxInterrupt = true; - writeb(OutboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_BA_OutboundDoorBellRegisterOffset); -} - -static inline -void DAC960_BA_AcknowledgeInterrupt(void __iomem *ControllerBaseAddress) -{ - DAC960_BA_OutboundDoorBellRegister_T OutboundDoorBellRegister; - OutboundDoorBellRegister.All = 0; - OutboundDoorBellRegister.Write.AcknowledgeHardwareMailboxInterrupt = true; - OutboundDoorBellRegister.Write.AcknowledgeMemoryMailboxInterrupt = true; - writeb(OutboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_BA_OutboundDoorBellRegisterOffset); -} - -static inline -bool DAC960_BA_HardwareMailboxStatusAvailableP(void __iomem *ControllerBaseAddress) -{ - DAC960_BA_OutboundDoorBellRegister_T OutboundDoorBellRegister; - OutboundDoorBellRegister.All = - readb(ControllerBaseAddress + DAC960_BA_OutboundDoorBellRegisterOffset); - return OutboundDoorBellRegister.Read.HardwareMailboxStatusAvailable; -} - -static inline -bool DAC960_BA_MemoryMailboxStatusAvailableP(void __iomem *ControllerBaseAddress) -{ - DAC960_BA_OutboundDoorBellRegister_T OutboundDoorBellRegister; - OutboundDoorBellRegister.All = - readb(ControllerBaseAddress + DAC960_BA_OutboundDoorBellRegisterOffset); - return OutboundDoorBellRegister.Read.MemoryMailboxStatusAvailable; -} - -static inline -void DAC960_BA_EnableInterrupts(void __iomem *ControllerBaseAddress) -{ - DAC960_BA_InterruptMaskRegister_T InterruptMaskRegister; - InterruptMaskRegister.All = 0xFF; - InterruptMaskRegister.Bits.DisableInterrupts = false; - InterruptMaskRegister.Bits.DisableInterruptsI2O = true; - writeb(InterruptMaskRegister.All, - ControllerBaseAddress + DAC960_BA_InterruptMaskRegisterOffset); -} - -static inline -void DAC960_BA_DisableInterrupts(void __iomem *ControllerBaseAddress) -{ - DAC960_BA_InterruptMaskRegister_T InterruptMaskRegister; - InterruptMaskRegister.All = 0xFF; - InterruptMaskRegister.Bits.DisableInterrupts = true; - InterruptMaskRegister.Bits.DisableInterruptsI2O = true; - writeb(InterruptMaskRegister.All, - ControllerBaseAddress + DAC960_BA_InterruptMaskRegisterOffset); -} - -static inline -bool DAC960_BA_InterruptsEnabledP(void __iomem *ControllerBaseAddress) -{ - DAC960_BA_InterruptMaskRegister_T InterruptMaskRegister; - InterruptMaskRegister.All = - readb(ControllerBaseAddress + DAC960_BA_InterruptMaskRegisterOffset); - return !InterruptMaskRegister.Bits.DisableInterrupts; -} - -static inline -void DAC960_BA_WriteCommandMailbox(DAC960_V2_CommandMailbox_T - *MemoryCommandMailbox, - DAC960_V2_CommandMailbox_T - *CommandMailbox) -{ - memcpy(&MemoryCommandMailbox->Words[1], &CommandMailbox->Words[1], - sizeof(DAC960_V2_CommandMailbox_T) - sizeof(unsigned int)); - wmb(); - MemoryCommandMailbox->Words[0] = CommandMailbox->Words[0]; - mb(); -} - - -static inline -void DAC960_BA_WriteHardwareMailbox(void __iomem *ControllerBaseAddress, - dma_addr_t CommandMailboxDMA) -{ - dma_addr_writeql(CommandMailboxDMA, - ControllerBaseAddress + - DAC960_BA_CommandMailboxBusAddressOffset); -} - -static inline DAC960_V2_CommandIdentifier_T -DAC960_BA_ReadCommandIdentifier(void __iomem *ControllerBaseAddress) -{ - return readw(ControllerBaseAddress + DAC960_BA_CommandStatusOffset); -} - -static inline DAC960_V2_CommandStatus_T -DAC960_BA_ReadCommandStatus(void __iomem *ControllerBaseAddress) -{ - return readw(ControllerBaseAddress + DAC960_BA_CommandStatusOffset + 2); -} - -static inline bool -DAC960_BA_ReadErrorStatus(void __iomem *ControllerBaseAddress, - unsigned char *ErrorStatus, - unsigned char *Parameter0, - unsigned char *Parameter1) -{ - DAC960_BA_ErrorStatusRegister_T ErrorStatusRegister; - ErrorStatusRegister.All = - readb(ControllerBaseAddress + DAC960_BA_ErrorStatusRegisterOffset); - if (!ErrorStatusRegister.Bits.ErrorStatusPending) return false; - ErrorStatusRegister.Bits.ErrorStatusPending = false; - *ErrorStatus = ErrorStatusRegister.All; - *Parameter0 = - readb(ControllerBaseAddress + DAC960_BA_CommandMailboxBusAddressOffset + 0); - *Parameter1 = - readb(ControllerBaseAddress + DAC960_BA_CommandMailboxBusAddressOffset + 1); - writeb(0xFF, ControllerBaseAddress + DAC960_BA_ErrorStatusRegisterOffset); - return true; -} - - -/* - Define the DAC960 LP Series Controller Interface Register Offsets. -*/ - -#define DAC960_LP_RegisterWindowSize 0x80 - -typedef enum -{ - DAC960_LP_InboundDoorBellRegisterOffset = 0x20, - DAC960_LP_OutboundDoorBellRegisterOffset = 0x2C, - DAC960_LP_InterruptStatusRegisterOffset = 0x30, - DAC960_LP_InterruptMaskRegisterOffset = 0x34, - DAC960_LP_CommandMailboxBusAddressOffset = 0x10, - DAC960_LP_CommandStatusOffset = 0x18, - DAC960_LP_ErrorStatusRegisterOffset = 0x2E -} -DAC960_LP_RegisterOffsets_T; - - -/* - Define the structure of the DAC960 LP Series Inbound Door Bell Register. -*/ - -typedef union DAC960_LP_InboundDoorBellRegister -{ - unsigned char All; - struct { - bool HardwareMailboxNewCommand:1; /* Bit 0 */ - bool AcknowledgeHardwareMailboxStatus:1; /* Bit 1 */ - bool GenerateInterrupt:1; /* Bit 2 */ - bool ControllerReset:1; /* Bit 3 */ - bool MemoryMailboxNewCommand:1; /* Bit 4 */ - unsigned char :3; /* Bits 5-7 */ - } Write; - struct { - bool HardwareMailboxFull:1; /* Bit 0 */ - bool InitializationInProgress:1; /* Bit 1 */ - unsigned char :6; /* Bits 2-7 */ - } Read; -} -DAC960_LP_InboundDoorBellRegister_T; - - -/* - Define the structure of the DAC960 LP Series Outbound Door Bell Register. -*/ - -typedef union DAC960_LP_OutboundDoorBellRegister -{ - unsigned char All; - struct { - bool AcknowledgeHardwareMailboxInterrupt:1; /* Bit 0 */ - bool AcknowledgeMemoryMailboxInterrupt:1; /* Bit 1 */ - unsigned char :6; /* Bits 2-7 */ - } Write; - struct { - bool HardwareMailboxStatusAvailable:1; /* Bit 0 */ - bool MemoryMailboxStatusAvailable:1; /* Bit 1 */ - unsigned char :6; /* Bits 2-7 */ - } Read; -} -DAC960_LP_OutboundDoorBellRegister_T; - - -/* - Define the structure of the DAC960 LP Series Interrupt Mask Register. -*/ - -typedef union DAC960_LP_InterruptMaskRegister -{ - unsigned char All; - struct { - unsigned int :2; /* Bits 0-1 */ - bool DisableInterrupts:1; /* Bit 2 */ - unsigned int :5; /* Bits 3-7 */ - } Bits; -} -DAC960_LP_InterruptMaskRegister_T; - - -/* - Define the structure of the DAC960 LP Series Error Status Register. -*/ - -typedef union DAC960_LP_ErrorStatusRegister -{ - unsigned char All; - struct { - unsigned int :2; /* Bits 0-1 */ - bool ErrorStatusPending:1; /* Bit 2 */ - unsigned int :5; /* Bits 3-7 */ - } Bits; -} -DAC960_LP_ErrorStatusRegister_T; - - -/* - Define inline functions to provide an abstraction for reading and writing the - DAC960 LP Series Controller Interface Registers. -*/ - -static inline -void DAC960_LP_HardwareMailboxNewCommand(void __iomem *ControllerBaseAddress) -{ - DAC960_LP_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = 0; - InboundDoorBellRegister.Write.HardwareMailboxNewCommand = true; - writeb(InboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_LP_InboundDoorBellRegisterOffset); -} - -static inline -void DAC960_LP_AcknowledgeHardwareMailboxStatus(void __iomem *ControllerBaseAddress) -{ - DAC960_LP_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = 0; - InboundDoorBellRegister.Write.AcknowledgeHardwareMailboxStatus = true; - writeb(InboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_LP_InboundDoorBellRegisterOffset); -} - -static inline -void DAC960_LP_GenerateInterrupt(void __iomem *ControllerBaseAddress) -{ - DAC960_LP_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = 0; - InboundDoorBellRegister.Write.GenerateInterrupt = true; - writeb(InboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_LP_InboundDoorBellRegisterOffset); -} - -static inline -void DAC960_LP_ControllerReset(void __iomem *ControllerBaseAddress) -{ - DAC960_LP_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = 0; - InboundDoorBellRegister.Write.ControllerReset = true; - writeb(InboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_LP_InboundDoorBellRegisterOffset); -} - -static inline -void DAC960_LP_MemoryMailboxNewCommand(void __iomem *ControllerBaseAddress) -{ - DAC960_LP_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = 0; - InboundDoorBellRegister.Write.MemoryMailboxNewCommand = true; - writeb(InboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_LP_InboundDoorBellRegisterOffset); -} - -static inline -bool DAC960_LP_HardwareMailboxFullP(void __iomem *ControllerBaseAddress) -{ - DAC960_LP_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = - readb(ControllerBaseAddress + DAC960_LP_InboundDoorBellRegisterOffset); - return InboundDoorBellRegister.Read.HardwareMailboxFull; -} - -static inline -bool DAC960_LP_InitializationInProgressP(void __iomem *ControllerBaseAddress) -{ - DAC960_LP_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = - readb(ControllerBaseAddress + DAC960_LP_InboundDoorBellRegisterOffset); - return InboundDoorBellRegister.Read.InitializationInProgress; -} - -static inline -void DAC960_LP_AcknowledgeHardwareMailboxInterrupt(void __iomem *ControllerBaseAddress) -{ - DAC960_LP_OutboundDoorBellRegister_T OutboundDoorBellRegister; - OutboundDoorBellRegister.All = 0; - OutboundDoorBellRegister.Write.AcknowledgeHardwareMailboxInterrupt = true; - writeb(OutboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_LP_OutboundDoorBellRegisterOffset); -} - -static inline -void DAC960_LP_AcknowledgeMemoryMailboxInterrupt(void __iomem *ControllerBaseAddress) -{ - DAC960_LP_OutboundDoorBellRegister_T OutboundDoorBellRegister; - OutboundDoorBellRegister.All = 0; - OutboundDoorBellRegister.Write.AcknowledgeMemoryMailboxInterrupt = true; - writeb(OutboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_LP_OutboundDoorBellRegisterOffset); -} - -static inline -void DAC960_LP_AcknowledgeInterrupt(void __iomem *ControllerBaseAddress) -{ - DAC960_LP_OutboundDoorBellRegister_T OutboundDoorBellRegister; - OutboundDoorBellRegister.All = 0; - OutboundDoorBellRegister.Write.AcknowledgeHardwareMailboxInterrupt = true; - OutboundDoorBellRegister.Write.AcknowledgeMemoryMailboxInterrupt = true; - writeb(OutboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_LP_OutboundDoorBellRegisterOffset); -} - -static inline -bool DAC960_LP_HardwareMailboxStatusAvailableP(void __iomem *ControllerBaseAddress) -{ - DAC960_LP_OutboundDoorBellRegister_T OutboundDoorBellRegister; - OutboundDoorBellRegister.All = - readb(ControllerBaseAddress + DAC960_LP_OutboundDoorBellRegisterOffset); - return OutboundDoorBellRegister.Read.HardwareMailboxStatusAvailable; -} - -static inline -bool DAC960_LP_MemoryMailboxStatusAvailableP(void __iomem *ControllerBaseAddress) -{ - DAC960_LP_OutboundDoorBellRegister_T OutboundDoorBellRegister; - OutboundDoorBellRegister.All = - readb(ControllerBaseAddress + DAC960_LP_OutboundDoorBellRegisterOffset); - return OutboundDoorBellRegister.Read.MemoryMailboxStatusAvailable; -} - -static inline -void DAC960_LP_EnableInterrupts(void __iomem *ControllerBaseAddress) -{ - DAC960_LP_InterruptMaskRegister_T InterruptMaskRegister; - InterruptMaskRegister.All = 0xFF; - InterruptMaskRegister.Bits.DisableInterrupts = false; - writeb(InterruptMaskRegister.All, - ControllerBaseAddress + DAC960_LP_InterruptMaskRegisterOffset); -} - -static inline -void DAC960_LP_DisableInterrupts(void __iomem *ControllerBaseAddress) -{ - DAC960_LP_InterruptMaskRegister_T InterruptMaskRegister; - InterruptMaskRegister.All = 0xFF; - InterruptMaskRegister.Bits.DisableInterrupts = true; - writeb(InterruptMaskRegister.All, - ControllerBaseAddress + DAC960_LP_InterruptMaskRegisterOffset); -} - -static inline -bool DAC960_LP_InterruptsEnabledP(void __iomem *ControllerBaseAddress) -{ - DAC960_LP_InterruptMaskRegister_T InterruptMaskRegister; - InterruptMaskRegister.All = - readb(ControllerBaseAddress + DAC960_LP_InterruptMaskRegisterOffset); - return !InterruptMaskRegister.Bits.DisableInterrupts; -} - -static inline -void DAC960_LP_WriteCommandMailbox(DAC960_V2_CommandMailbox_T - *MemoryCommandMailbox, - DAC960_V2_CommandMailbox_T - *CommandMailbox) -{ - memcpy(&MemoryCommandMailbox->Words[1], &CommandMailbox->Words[1], - sizeof(DAC960_V2_CommandMailbox_T) - sizeof(unsigned int)); - wmb(); - MemoryCommandMailbox->Words[0] = CommandMailbox->Words[0]; - mb(); -} - -static inline -void DAC960_LP_WriteHardwareMailbox(void __iomem *ControllerBaseAddress, - dma_addr_t CommandMailboxDMA) -{ - dma_addr_writeql(CommandMailboxDMA, - ControllerBaseAddress + - DAC960_LP_CommandMailboxBusAddressOffset); -} - -static inline DAC960_V2_CommandIdentifier_T -DAC960_LP_ReadCommandIdentifier(void __iomem *ControllerBaseAddress) -{ - return readw(ControllerBaseAddress + DAC960_LP_CommandStatusOffset); -} - -static inline DAC960_V2_CommandStatus_T -DAC960_LP_ReadCommandStatus(void __iomem *ControllerBaseAddress) -{ - return readw(ControllerBaseAddress + DAC960_LP_CommandStatusOffset + 2); -} - -static inline bool -DAC960_LP_ReadErrorStatus(void __iomem *ControllerBaseAddress, - unsigned char *ErrorStatus, - unsigned char *Parameter0, - unsigned char *Parameter1) -{ - DAC960_LP_ErrorStatusRegister_T ErrorStatusRegister; - ErrorStatusRegister.All = - readb(ControllerBaseAddress + DAC960_LP_ErrorStatusRegisterOffset); - if (!ErrorStatusRegister.Bits.ErrorStatusPending) return false; - ErrorStatusRegister.Bits.ErrorStatusPending = false; - *ErrorStatus = ErrorStatusRegister.All; - *Parameter0 = - readb(ControllerBaseAddress + DAC960_LP_CommandMailboxBusAddressOffset + 0); - *Parameter1 = - readb(ControllerBaseAddress + DAC960_LP_CommandMailboxBusAddressOffset + 1); - writeb(0xFF, ControllerBaseAddress + DAC960_LP_ErrorStatusRegisterOffset); - return true; -} - - -/* - Define the DAC960 LA Series Controller Interface Register Offsets. -*/ - -#define DAC960_LA_RegisterWindowSize 0x80 - -typedef enum -{ - DAC960_LA_InboundDoorBellRegisterOffset = 0x60, - DAC960_LA_OutboundDoorBellRegisterOffset = 0x61, - DAC960_LA_InterruptMaskRegisterOffset = 0x34, - DAC960_LA_CommandOpcodeRegisterOffset = 0x50, - DAC960_LA_CommandIdentifierRegisterOffset = 0x51, - DAC960_LA_MailboxRegister2Offset = 0x52, - DAC960_LA_MailboxRegister3Offset = 0x53, - DAC960_LA_MailboxRegister4Offset = 0x54, - DAC960_LA_MailboxRegister5Offset = 0x55, - DAC960_LA_MailboxRegister6Offset = 0x56, - DAC960_LA_MailboxRegister7Offset = 0x57, - DAC960_LA_MailboxRegister8Offset = 0x58, - DAC960_LA_MailboxRegister9Offset = 0x59, - DAC960_LA_MailboxRegister10Offset = 0x5A, - DAC960_LA_MailboxRegister11Offset = 0x5B, - DAC960_LA_MailboxRegister12Offset = 0x5C, - DAC960_LA_StatusCommandIdentifierRegOffset = 0x5D, - DAC960_LA_StatusRegisterOffset = 0x5E, - DAC960_LA_ErrorStatusRegisterOffset = 0x63 -} -DAC960_LA_RegisterOffsets_T; - - -/* - Define the structure of the DAC960 LA Series Inbound Door Bell Register. -*/ - -typedef union DAC960_LA_InboundDoorBellRegister -{ - unsigned char All; - struct { - bool HardwareMailboxNewCommand:1; /* Bit 0 */ - bool AcknowledgeHardwareMailboxStatus:1; /* Bit 1 */ - bool GenerateInterrupt:1; /* Bit 2 */ - bool ControllerReset:1; /* Bit 3 */ - bool MemoryMailboxNewCommand:1; /* Bit 4 */ - unsigned char :3; /* Bits 5-7 */ - } Write; - struct { - bool HardwareMailboxEmpty:1; /* Bit 0 */ - bool InitializationNotInProgress:1; /* Bit 1 */ - unsigned char :6; /* Bits 2-7 */ - } Read; -} -DAC960_LA_InboundDoorBellRegister_T; - - -/* - Define the structure of the DAC960 LA Series Outbound Door Bell Register. -*/ - -typedef union DAC960_LA_OutboundDoorBellRegister -{ - unsigned char All; - struct { - bool AcknowledgeHardwareMailboxInterrupt:1; /* Bit 0 */ - bool AcknowledgeMemoryMailboxInterrupt:1; /* Bit 1 */ - unsigned char :6; /* Bits 2-7 */ - } Write; - struct { - bool HardwareMailboxStatusAvailable:1; /* Bit 0 */ - bool MemoryMailboxStatusAvailable:1; /* Bit 1 */ - unsigned char :6; /* Bits 2-7 */ - } Read; -} -DAC960_LA_OutboundDoorBellRegister_T; - - -/* - Define the structure of the DAC960 LA Series Interrupt Mask Register. -*/ - -typedef union DAC960_LA_InterruptMaskRegister -{ - unsigned char All; - struct { - unsigned char :2; /* Bits 0-1 */ - bool DisableInterrupts:1; /* Bit 2 */ - unsigned char :5; /* Bits 3-7 */ - } Bits; -} -DAC960_LA_InterruptMaskRegister_T; - - -/* - Define the structure of the DAC960 LA Series Error Status Register. -*/ - -typedef union DAC960_LA_ErrorStatusRegister -{ - unsigned char All; - struct { - unsigned int :2; /* Bits 0-1 */ - bool ErrorStatusPending:1; /* Bit 2 */ - unsigned int :5; /* Bits 3-7 */ - } Bits; -} -DAC960_LA_ErrorStatusRegister_T; - - -/* - Define inline functions to provide an abstraction for reading and writing the - DAC960 LA Series Controller Interface Registers. -*/ - -static inline -void DAC960_LA_HardwareMailboxNewCommand(void __iomem *ControllerBaseAddress) -{ - DAC960_LA_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = 0; - InboundDoorBellRegister.Write.HardwareMailboxNewCommand = true; - writeb(InboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_LA_InboundDoorBellRegisterOffset); -} - -static inline -void DAC960_LA_AcknowledgeHardwareMailboxStatus(void __iomem *ControllerBaseAddress) -{ - DAC960_LA_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = 0; - InboundDoorBellRegister.Write.AcknowledgeHardwareMailboxStatus = true; - writeb(InboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_LA_InboundDoorBellRegisterOffset); -} - -static inline -void DAC960_LA_GenerateInterrupt(void __iomem *ControllerBaseAddress) -{ - DAC960_LA_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = 0; - InboundDoorBellRegister.Write.GenerateInterrupt = true; - writeb(InboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_LA_InboundDoorBellRegisterOffset); -} - -static inline -void DAC960_LA_ControllerReset(void __iomem *ControllerBaseAddress) -{ - DAC960_LA_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = 0; - InboundDoorBellRegister.Write.ControllerReset = true; - writeb(InboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_LA_InboundDoorBellRegisterOffset); -} - -static inline -void DAC960_LA_MemoryMailboxNewCommand(void __iomem *ControllerBaseAddress) -{ - DAC960_LA_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = 0; - InboundDoorBellRegister.Write.MemoryMailboxNewCommand = true; - writeb(InboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_LA_InboundDoorBellRegisterOffset); -} - -static inline -bool DAC960_LA_HardwareMailboxFullP(void __iomem *ControllerBaseAddress) -{ - DAC960_LA_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = - readb(ControllerBaseAddress + DAC960_LA_InboundDoorBellRegisterOffset); - return !InboundDoorBellRegister.Read.HardwareMailboxEmpty; -} - -static inline -bool DAC960_LA_InitializationInProgressP(void __iomem *ControllerBaseAddress) -{ - DAC960_LA_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = - readb(ControllerBaseAddress + DAC960_LA_InboundDoorBellRegisterOffset); - return !InboundDoorBellRegister.Read.InitializationNotInProgress; -} - -static inline -void DAC960_LA_AcknowledgeHardwareMailboxInterrupt(void __iomem *ControllerBaseAddress) -{ - DAC960_LA_OutboundDoorBellRegister_T OutboundDoorBellRegister; - OutboundDoorBellRegister.All = 0; - OutboundDoorBellRegister.Write.AcknowledgeHardwareMailboxInterrupt = true; - writeb(OutboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_LA_OutboundDoorBellRegisterOffset); -} - -static inline -void DAC960_LA_AcknowledgeMemoryMailboxInterrupt(void __iomem *ControllerBaseAddress) -{ - DAC960_LA_OutboundDoorBellRegister_T OutboundDoorBellRegister; - OutboundDoorBellRegister.All = 0; - OutboundDoorBellRegister.Write.AcknowledgeMemoryMailboxInterrupt = true; - writeb(OutboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_LA_OutboundDoorBellRegisterOffset); -} - -static inline -void DAC960_LA_AcknowledgeInterrupt(void __iomem *ControllerBaseAddress) -{ - DAC960_LA_OutboundDoorBellRegister_T OutboundDoorBellRegister; - OutboundDoorBellRegister.All = 0; - OutboundDoorBellRegister.Write.AcknowledgeHardwareMailboxInterrupt = true; - OutboundDoorBellRegister.Write.AcknowledgeMemoryMailboxInterrupt = true; - writeb(OutboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_LA_OutboundDoorBellRegisterOffset); -} - -static inline -bool DAC960_LA_HardwareMailboxStatusAvailableP(void __iomem *ControllerBaseAddress) -{ - DAC960_LA_OutboundDoorBellRegister_T OutboundDoorBellRegister; - OutboundDoorBellRegister.All = - readb(ControllerBaseAddress + DAC960_LA_OutboundDoorBellRegisterOffset); - return OutboundDoorBellRegister.Read.HardwareMailboxStatusAvailable; -} - -static inline -bool DAC960_LA_MemoryMailboxStatusAvailableP(void __iomem *ControllerBaseAddress) -{ - DAC960_LA_OutboundDoorBellRegister_T OutboundDoorBellRegister; - OutboundDoorBellRegister.All = - readb(ControllerBaseAddress + DAC960_LA_OutboundDoorBellRegisterOffset); - return OutboundDoorBellRegister.Read.MemoryMailboxStatusAvailable; -} - -static inline -void DAC960_LA_EnableInterrupts(void __iomem *ControllerBaseAddress) -{ - DAC960_LA_InterruptMaskRegister_T InterruptMaskRegister; - InterruptMaskRegister.All = 0xFF; - InterruptMaskRegister.Bits.DisableInterrupts = false; - writeb(InterruptMaskRegister.All, - ControllerBaseAddress + DAC960_LA_InterruptMaskRegisterOffset); -} - -static inline -void DAC960_LA_DisableInterrupts(void __iomem *ControllerBaseAddress) -{ - DAC960_LA_InterruptMaskRegister_T InterruptMaskRegister; - InterruptMaskRegister.All = 0xFF; - InterruptMaskRegister.Bits.DisableInterrupts = true; - writeb(InterruptMaskRegister.All, - ControllerBaseAddress + DAC960_LA_InterruptMaskRegisterOffset); -} - -static inline -bool DAC960_LA_InterruptsEnabledP(void __iomem *ControllerBaseAddress) -{ - DAC960_LA_InterruptMaskRegister_T InterruptMaskRegister; - InterruptMaskRegister.All = - readb(ControllerBaseAddress + DAC960_LA_InterruptMaskRegisterOffset); - return !InterruptMaskRegister.Bits.DisableInterrupts; -} - -static inline -void DAC960_LA_WriteCommandMailbox(DAC960_V1_CommandMailbox_T - *MemoryCommandMailbox, - DAC960_V1_CommandMailbox_T - *CommandMailbox) -{ - MemoryCommandMailbox->Words[1] = CommandMailbox->Words[1]; - MemoryCommandMailbox->Words[2] = CommandMailbox->Words[2]; - MemoryCommandMailbox->Words[3] = CommandMailbox->Words[3]; - wmb(); - MemoryCommandMailbox->Words[0] = CommandMailbox->Words[0]; - mb(); -} - -static inline -void DAC960_LA_WriteHardwareMailbox(void __iomem *ControllerBaseAddress, - DAC960_V1_CommandMailbox_T *CommandMailbox) -{ - writel(CommandMailbox->Words[0], - ControllerBaseAddress + DAC960_LA_CommandOpcodeRegisterOffset); - writel(CommandMailbox->Words[1], - ControllerBaseAddress + DAC960_LA_MailboxRegister4Offset); - writel(CommandMailbox->Words[2], - ControllerBaseAddress + DAC960_LA_MailboxRegister8Offset); - writeb(CommandMailbox->Bytes[12], - ControllerBaseAddress + DAC960_LA_MailboxRegister12Offset); -} - -static inline DAC960_V1_CommandIdentifier_T -DAC960_LA_ReadStatusCommandIdentifier(void __iomem *ControllerBaseAddress) -{ - return readb(ControllerBaseAddress - + DAC960_LA_StatusCommandIdentifierRegOffset); -} - -static inline DAC960_V1_CommandStatus_T -DAC960_LA_ReadStatusRegister(void __iomem *ControllerBaseAddress) -{ - return readw(ControllerBaseAddress + DAC960_LA_StatusRegisterOffset); -} - -static inline bool -DAC960_LA_ReadErrorStatus(void __iomem *ControllerBaseAddress, - unsigned char *ErrorStatus, - unsigned char *Parameter0, - unsigned char *Parameter1) -{ - DAC960_LA_ErrorStatusRegister_T ErrorStatusRegister; - ErrorStatusRegister.All = - readb(ControllerBaseAddress + DAC960_LA_ErrorStatusRegisterOffset); - if (!ErrorStatusRegister.Bits.ErrorStatusPending) return false; - ErrorStatusRegister.Bits.ErrorStatusPending = false; - *ErrorStatus = ErrorStatusRegister.All; - *Parameter0 = - readb(ControllerBaseAddress + DAC960_LA_CommandOpcodeRegisterOffset); - *Parameter1 = - readb(ControllerBaseAddress + DAC960_LA_CommandIdentifierRegisterOffset); - writeb(0xFF, ControllerBaseAddress + DAC960_LA_ErrorStatusRegisterOffset); - return true; -} - -/* - Define the DAC960 PG Series Controller Interface Register Offsets. -*/ - -#define DAC960_PG_RegisterWindowSize 0x2000 - -typedef enum -{ - DAC960_PG_InboundDoorBellRegisterOffset = 0x0020, - DAC960_PG_OutboundDoorBellRegisterOffset = 0x002C, - DAC960_PG_InterruptMaskRegisterOffset = 0x0034, - DAC960_PG_CommandOpcodeRegisterOffset = 0x1000, - DAC960_PG_CommandIdentifierRegisterOffset = 0x1001, - DAC960_PG_MailboxRegister2Offset = 0x1002, - DAC960_PG_MailboxRegister3Offset = 0x1003, - DAC960_PG_MailboxRegister4Offset = 0x1004, - DAC960_PG_MailboxRegister5Offset = 0x1005, - DAC960_PG_MailboxRegister6Offset = 0x1006, - DAC960_PG_MailboxRegister7Offset = 0x1007, - DAC960_PG_MailboxRegister8Offset = 0x1008, - DAC960_PG_MailboxRegister9Offset = 0x1009, - DAC960_PG_MailboxRegister10Offset = 0x100A, - DAC960_PG_MailboxRegister11Offset = 0x100B, - DAC960_PG_MailboxRegister12Offset = 0x100C, - DAC960_PG_StatusCommandIdentifierRegOffset = 0x1018, - DAC960_PG_StatusRegisterOffset = 0x101A, - DAC960_PG_ErrorStatusRegisterOffset = 0x103F -} -DAC960_PG_RegisterOffsets_T; - - -/* - Define the structure of the DAC960 PG Series Inbound Door Bell Register. -*/ - -typedef union DAC960_PG_InboundDoorBellRegister -{ - unsigned int All; - struct { - bool HardwareMailboxNewCommand:1; /* Bit 0 */ - bool AcknowledgeHardwareMailboxStatus:1; /* Bit 1 */ - bool GenerateInterrupt:1; /* Bit 2 */ - bool ControllerReset:1; /* Bit 3 */ - bool MemoryMailboxNewCommand:1; /* Bit 4 */ - unsigned int :27; /* Bits 5-31 */ - } Write; - struct { - bool HardwareMailboxFull:1; /* Bit 0 */ - bool InitializationInProgress:1; /* Bit 1 */ - unsigned int :30; /* Bits 2-31 */ - } Read; -} -DAC960_PG_InboundDoorBellRegister_T; - - -/* - Define the structure of the DAC960 PG Series Outbound Door Bell Register. -*/ - -typedef union DAC960_PG_OutboundDoorBellRegister -{ - unsigned int All; - struct { - bool AcknowledgeHardwareMailboxInterrupt:1; /* Bit 0 */ - bool AcknowledgeMemoryMailboxInterrupt:1; /* Bit 1 */ - unsigned int :30; /* Bits 2-31 */ - } Write; - struct { - bool HardwareMailboxStatusAvailable:1; /* Bit 0 */ - bool MemoryMailboxStatusAvailable:1; /* Bit 1 */ - unsigned int :30; /* Bits 2-31 */ - } Read; -} -DAC960_PG_OutboundDoorBellRegister_T; - - -/* - Define the structure of the DAC960 PG Series Interrupt Mask Register. -*/ - -typedef union DAC960_PG_InterruptMaskRegister -{ - unsigned int All; - struct { - unsigned int MessageUnitInterruptMask1:2; /* Bits 0-1 */ - bool DisableInterrupts:1; /* Bit 2 */ - unsigned int MessageUnitInterruptMask2:5; /* Bits 3-7 */ - unsigned int Reserved0:24; /* Bits 8-31 */ - } Bits; -} -DAC960_PG_InterruptMaskRegister_T; - - -/* - Define the structure of the DAC960 PG Series Error Status Register. -*/ - -typedef union DAC960_PG_ErrorStatusRegister -{ - unsigned char All; - struct { - unsigned int :2; /* Bits 0-1 */ - bool ErrorStatusPending:1; /* Bit 2 */ - unsigned int :5; /* Bits 3-7 */ - } Bits; -} -DAC960_PG_ErrorStatusRegister_T; - - -/* - Define inline functions to provide an abstraction for reading and writing the - DAC960 PG Series Controller Interface Registers. -*/ - -static inline -void DAC960_PG_HardwareMailboxNewCommand(void __iomem *ControllerBaseAddress) -{ - DAC960_PG_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = 0; - InboundDoorBellRegister.Write.HardwareMailboxNewCommand = true; - writel(InboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_PG_InboundDoorBellRegisterOffset); -} - -static inline -void DAC960_PG_AcknowledgeHardwareMailboxStatus(void __iomem *ControllerBaseAddress) -{ - DAC960_PG_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = 0; - InboundDoorBellRegister.Write.AcknowledgeHardwareMailboxStatus = true; - writel(InboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_PG_InboundDoorBellRegisterOffset); -} - -static inline -void DAC960_PG_GenerateInterrupt(void __iomem *ControllerBaseAddress) -{ - DAC960_PG_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = 0; - InboundDoorBellRegister.Write.GenerateInterrupt = true; - writel(InboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_PG_InboundDoorBellRegisterOffset); -} - -static inline -void DAC960_PG_ControllerReset(void __iomem *ControllerBaseAddress) -{ - DAC960_PG_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = 0; - InboundDoorBellRegister.Write.ControllerReset = true; - writel(InboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_PG_InboundDoorBellRegisterOffset); -} - -static inline -void DAC960_PG_MemoryMailboxNewCommand(void __iomem *ControllerBaseAddress) -{ - DAC960_PG_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = 0; - InboundDoorBellRegister.Write.MemoryMailboxNewCommand = true; - writel(InboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_PG_InboundDoorBellRegisterOffset); -} - -static inline -bool DAC960_PG_HardwareMailboxFullP(void __iomem *ControllerBaseAddress) -{ - DAC960_PG_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = - readl(ControllerBaseAddress + DAC960_PG_InboundDoorBellRegisterOffset); - return InboundDoorBellRegister.Read.HardwareMailboxFull; -} - -static inline -bool DAC960_PG_InitializationInProgressP(void __iomem *ControllerBaseAddress) -{ - DAC960_PG_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = - readl(ControllerBaseAddress + DAC960_PG_InboundDoorBellRegisterOffset); - return InboundDoorBellRegister.Read.InitializationInProgress; -} - -static inline -void DAC960_PG_AcknowledgeHardwareMailboxInterrupt(void __iomem *ControllerBaseAddress) -{ - DAC960_PG_OutboundDoorBellRegister_T OutboundDoorBellRegister; - OutboundDoorBellRegister.All = 0; - OutboundDoorBellRegister.Write.AcknowledgeHardwareMailboxInterrupt = true; - writel(OutboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_PG_OutboundDoorBellRegisterOffset); -} - -static inline -void DAC960_PG_AcknowledgeMemoryMailboxInterrupt(void __iomem *ControllerBaseAddress) -{ - DAC960_PG_OutboundDoorBellRegister_T OutboundDoorBellRegister; - OutboundDoorBellRegister.All = 0; - OutboundDoorBellRegister.Write.AcknowledgeMemoryMailboxInterrupt = true; - writel(OutboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_PG_OutboundDoorBellRegisterOffset); -} - -static inline -void DAC960_PG_AcknowledgeInterrupt(void __iomem *ControllerBaseAddress) -{ - DAC960_PG_OutboundDoorBellRegister_T OutboundDoorBellRegister; - OutboundDoorBellRegister.All = 0; - OutboundDoorBellRegister.Write.AcknowledgeHardwareMailboxInterrupt = true; - OutboundDoorBellRegister.Write.AcknowledgeMemoryMailboxInterrupt = true; - writel(OutboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_PG_OutboundDoorBellRegisterOffset); -} - -static inline -bool DAC960_PG_HardwareMailboxStatusAvailableP(void __iomem *ControllerBaseAddress) -{ - DAC960_PG_OutboundDoorBellRegister_T OutboundDoorBellRegister; - OutboundDoorBellRegister.All = - readl(ControllerBaseAddress + DAC960_PG_OutboundDoorBellRegisterOffset); - return OutboundDoorBellRegister.Read.HardwareMailboxStatusAvailable; -} - -static inline -bool DAC960_PG_MemoryMailboxStatusAvailableP(void __iomem *ControllerBaseAddress) -{ - DAC960_PG_OutboundDoorBellRegister_T OutboundDoorBellRegister; - OutboundDoorBellRegister.All = - readl(ControllerBaseAddress + DAC960_PG_OutboundDoorBellRegisterOffset); - return OutboundDoorBellRegister.Read.MemoryMailboxStatusAvailable; -} - -static inline -void DAC960_PG_EnableInterrupts(void __iomem *ControllerBaseAddress) -{ - DAC960_PG_InterruptMaskRegister_T InterruptMaskRegister; - InterruptMaskRegister.All = 0; - InterruptMaskRegister.Bits.MessageUnitInterruptMask1 = 0x3; - InterruptMaskRegister.Bits.DisableInterrupts = false; - InterruptMaskRegister.Bits.MessageUnitInterruptMask2 = 0x1F; - writel(InterruptMaskRegister.All, - ControllerBaseAddress + DAC960_PG_InterruptMaskRegisterOffset); -} - -static inline -void DAC960_PG_DisableInterrupts(void __iomem *ControllerBaseAddress) -{ - DAC960_PG_InterruptMaskRegister_T InterruptMaskRegister; - InterruptMaskRegister.All = 0; - InterruptMaskRegister.Bits.MessageUnitInterruptMask1 = 0x3; - InterruptMaskRegister.Bits.DisableInterrupts = true; - InterruptMaskRegister.Bits.MessageUnitInterruptMask2 = 0x1F; - writel(InterruptMaskRegister.All, - ControllerBaseAddress + DAC960_PG_InterruptMaskRegisterOffset); -} - -static inline -bool DAC960_PG_InterruptsEnabledP(void __iomem *ControllerBaseAddress) -{ - DAC960_PG_InterruptMaskRegister_T InterruptMaskRegister; - InterruptMaskRegister.All = - readl(ControllerBaseAddress + DAC960_PG_InterruptMaskRegisterOffset); - return !InterruptMaskRegister.Bits.DisableInterrupts; -} - -static inline -void DAC960_PG_WriteCommandMailbox(DAC960_V1_CommandMailbox_T - *MemoryCommandMailbox, - DAC960_V1_CommandMailbox_T - *CommandMailbox) -{ - MemoryCommandMailbox->Words[1] = CommandMailbox->Words[1]; - MemoryCommandMailbox->Words[2] = CommandMailbox->Words[2]; - MemoryCommandMailbox->Words[3] = CommandMailbox->Words[3]; - wmb(); - MemoryCommandMailbox->Words[0] = CommandMailbox->Words[0]; - mb(); -} - -static inline -void DAC960_PG_WriteHardwareMailbox(void __iomem *ControllerBaseAddress, - DAC960_V1_CommandMailbox_T *CommandMailbox) -{ - writel(CommandMailbox->Words[0], - ControllerBaseAddress + DAC960_PG_CommandOpcodeRegisterOffset); - writel(CommandMailbox->Words[1], - ControllerBaseAddress + DAC960_PG_MailboxRegister4Offset); - writel(CommandMailbox->Words[2], - ControllerBaseAddress + DAC960_PG_MailboxRegister8Offset); - writeb(CommandMailbox->Bytes[12], - ControllerBaseAddress + DAC960_PG_MailboxRegister12Offset); -} - -static inline DAC960_V1_CommandIdentifier_T -DAC960_PG_ReadStatusCommandIdentifier(void __iomem *ControllerBaseAddress) -{ - return readb(ControllerBaseAddress - + DAC960_PG_StatusCommandIdentifierRegOffset); -} - -static inline DAC960_V1_CommandStatus_T -DAC960_PG_ReadStatusRegister(void __iomem *ControllerBaseAddress) -{ - return readw(ControllerBaseAddress + DAC960_PG_StatusRegisterOffset); -} - -static inline bool -DAC960_PG_ReadErrorStatus(void __iomem *ControllerBaseAddress, - unsigned char *ErrorStatus, - unsigned char *Parameter0, - unsigned char *Parameter1) -{ - DAC960_PG_ErrorStatusRegister_T ErrorStatusRegister; - ErrorStatusRegister.All = - readb(ControllerBaseAddress + DAC960_PG_ErrorStatusRegisterOffset); - if (!ErrorStatusRegister.Bits.ErrorStatusPending) return false; - ErrorStatusRegister.Bits.ErrorStatusPending = false; - *ErrorStatus = ErrorStatusRegister.All; - *Parameter0 = - readb(ControllerBaseAddress + DAC960_PG_CommandOpcodeRegisterOffset); - *Parameter1 = - readb(ControllerBaseAddress + DAC960_PG_CommandIdentifierRegisterOffset); - writeb(0, ControllerBaseAddress + DAC960_PG_ErrorStatusRegisterOffset); - return true; -} - -/* - Define the DAC960 PD Series Controller Interface Register Offsets. -*/ - -#define DAC960_PD_RegisterWindowSize 0x80 - -typedef enum -{ - DAC960_PD_CommandOpcodeRegisterOffset = 0x00, - DAC960_PD_CommandIdentifierRegisterOffset = 0x01, - DAC960_PD_MailboxRegister2Offset = 0x02, - DAC960_PD_MailboxRegister3Offset = 0x03, - DAC960_PD_MailboxRegister4Offset = 0x04, - DAC960_PD_MailboxRegister5Offset = 0x05, - DAC960_PD_MailboxRegister6Offset = 0x06, - DAC960_PD_MailboxRegister7Offset = 0x07, - DAC960_PD_MailboxRegister8Offset = 0x08, - DAC960_PD_MailboxRegister9Offset = 0x09, - DAC960_PD_MailboxRegister10Offset = 0x0A, - DAC960_PD_MailboxRegister11Offset = 0x0B, - DAC960_PD_MailboxRegister12Offset = 0x0C, - DAC960_PD_StatusCommandIdentifierRegOffset = 0x0D, - DAC960_PD_StatusRegisterOffset = 0x0E, - DAC960_PD_ErrorStatusRegisterOffset = 0x3F, - DAC960_PD_InboundDoorBellRegisterOffset = 0x40, - DAC960_PD_OutboundDoorBellRegisterOffset = 0x41, - DAC960_PD_InterruptEnableRegisterOffset = 0x43 -} -DAC960_PD_RegisterOffsets_T; - - -/* - Define the structure of the DAC960 PD Series Inbound Door Bell Register. -*/ - -typedef union DAC960_PD_InboundDoorBellRegister -{ - unsigned char All; - struct { - bool NewCommand:1; /* Bit 0 */ - bool AcknowledgeStatus:1; /* Bit 1 */ - bool GenerateInterrupt:1; /* Bit 2 */ - bool ControllerReset:1; /* Bit 3 */ - unsigned char :4; /* Bits 4-7 */ - } Write; - struct { - bool MailboxFull:1; /* Bit 0 */ - bool InitializationInProgress:1; /* Bit 1 */ - unsigned char :6; /* Bits 2-7 */ - } Read; -} -DAC960_PD_InboundDoorBellRegister_T; - - -/* - Define the structure of the DAC960 PD Series Outbound Door Bell Register. -*/ - -typedef union DAC960_PD_OutboundDoorBellRegister -{ - unsigned char All; - struct { - bool AcknowledgeInterrupt:1; /* Bit 0 */ - unsigned char :7; /* Bits 1-7 */ - } Write; - struct { - bool StatusAvailable:1; /* Bit 0 */ - unsigned char :7; /* Bits 1-7 */ - } Read; -} -DAC960_PD_OutboundDoorBellRegister_T; - - -/* - Define the structure of the DAC960 PD Series Interrupt Enable Register. -*/ - -typedef union DAC960_PD_InterruptEnableRegister -{ - unsigned char All; - struct { - bool EnableInterrupts:1; /* Bit 0 */ - unsigned char :7; /* Bits 1-7 */ - } Bits; -} -DAC960_PD_InterruptEnableRegister_T; - - -/* - Define the structure of the DAC960 PD Series Error Status Register. -*/ - -typedef union DAC960_PD_ErrorStatusRegister -{ - unsigned char All; - struct { - unsigned int :2; /* Bits 0-1 */ - bool ErrorStatusPending:1; /* Bit 2 */ - unsigned int :5; /* Bits 3-7 */ - } Bits; -} -DAC960_PD_ErrorStatusRegister_T; - - -/* - Define inline functions to provide an abstraction for reading and writing the - DAC960 PD Series Controller Interface Registers. -*/ - -static inline -void DAC960_PD_NewCommand(void __iomem *ControllerBaseAddress) -{ - DAC960_PD_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = 0; - InboundDoorBellRegister.Write.NewCommand = true; - writeb(InboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_PD_InboundDoorBellRegisterOffset); -} - -static inline -void DAC960_PD_AcknowledgeStatus(void __iomem *ControllerBaseAddress) -{ - DAC960_PD_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = 0; - InboundDoorBellRegister.Write.AcknowledgeStatus = true; - writeb(InboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_PD_InboundDoorBellRegisterOffset); -} - -static inline -void DAC960_PD_GenerateInterrupt(void __iomem *ControllerBaseAddress) -{ - DAC960_PD_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = 0; - InboundDoorBellRegister.Write.GenerateInterrupt = true; - writeb(InboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_PD_InboundDoorBellRegisterOffset); -} - -static inline -void DAC960_PD_ControllerReset(void __iomem *ControllerBaseAddress) -{ - DAC960_PD_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = 0; - InboundDoorBellRegister.Write.ControllerReset = true; - writeb(InboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_PD_InboundDoorBellRegisterOffset); -} - -static inline -bool DAC960_PD_MailboxFullP(void __iomem *ControllerBaseAddress) -{ - DAC960_PD_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = - readb(ControllerBaseAddress + DAC960_PD_InboundDoorBellRegisterOffset); - return InboundDoorBellRegister.Read.MailboxFull; -} - -static inline -bool DAC960_PD_InitializationInProgressP(void __iomem *ControllerBaseAddress) -{ - DAC960_PD_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = - readb(ControllerBaseAddress + DAC960_PD_InboundDoorBellRegisterOffset); - return InboundDoorBellRegister.Read.InitializationInProgress; -} - -static inline -void DAC960_PD_AcknowledgeInterrupt(void __iomem *ControllerBaseAddress) -{ - DAC960_PD_OutboundDoorBellRegister_T OutboundDoorBellRegister; - OutboundDoorBellRegister.All = 0; - OutboundDoorBellRegister.Write.AcknowledgeInterrupt = true; - writeb(OutboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_PD_OutboundDoorBellRegisterOffset); -} - -static inline -bool DAC960_PD_StatusAvailableP(void __iomem *ControllerBaseAddress) -{ - DAC960_PD_OutboundDoorBellRegister_T OutboundDoorBellRegister; - OutboundDoorBellRegister.All = - readb(ControllerBaseAddress + DAC960_PD_OutboundDoorBellRegisterOffset); - return OutboundDoorBellRegister.Read.StatusAvailable; -} - -static inline -void DAC960_PD_EnableInterrupts(void __iomem *ControllerBaseAddress) -{ - DAC960_PD_InterruptEnableRegister_T InterruptEnableRegister; - InterruptEnableRegister.All = 0; - InterruptEnableRegister.Bits.EnableInterrupts = true; - writeb(InterruptEnableRegister.All, - ControllerBaseAddress + DAC960_PD_InterruptEnableRegisterOffset); -} - -static inline -void DAC960_PD_DisableInterrupts(void __iomem *ControllerBaseAddress) -{ - DAC960_PD_InterruptEnableRegister_T InterruptEnableRegister; - InterruptEnableRegister.All = 0; - InterruptEnableRegister.Bits.EnableInterrupts = false; - writeb(InterruptEnableRegister.All, - ControllerBaseAddress + DAC960_PD_InterruptEnableRegisterOffset); -} - -static inline -bool DAC960_PD_InterruptsEnabledP(void __iomem *ControllerBaseAddress) -{ - DAC960_PD_InterruptEnableRegister_T InterruptEnableRegister; - InterruptEnableRegister.All = - readb(ControllerBaseAddress + DAC960_PD_InterruptEnableRegisterOffset); - return InterruptEnableRegister.Bits.EnableInterrupts; -} - -static inline -void DAC960_PD_WriteCommandMailbox(void __iomem *ControllerBaseAddress, - DAC960_V1_CommandMailbox_T *CommandMailbox) -{ - writel(CommandMailbox->Words[0], - ControllerBaseAddress + DAC960_PD_CommandOpcodeRegisterOffset); - writel(CommandMailbox->Words[1], - ControllerBaseAddress + DAC960_PD_MailboxRegister4Offset); - writel(CommandMailbox->Words[2], - ControllerBaseAddress + DAC960_PD_MailboxRegister8Offset); - writeb(CommandMailbox->Bytes[12], - ControllerBaseAddress + DAC960_PD_MailboxRegister12Offset); -} - -static inline DAC960_V1_CommandIdentifier_T -DAC960_PD_ReadStatusCommandIdentifier(void __iomem *ControllerBaseAddress) -{ - return readb(ControllerBaseAddress - + DAC960_PD_StatusCommandIdentifierRegOffset); -} - -static inline DAC960_V1_CommandStatus_T -DAC960_PD_ReadStatusRegister(void __iomem *ControllerBaseAddress) -{ - return readw(ControllerBaseAddress + DAC960_PD_StatusRegisterOffset); -} - -static inline bool -DAC960_PD_ReadErrorStatus(void __iomem *ControllerBaseAddress, - unsigned char *ErrorStatus, - unsigned char *Parameter0, - unsigned char *Parameter1) -{ - DAC960_PD_ErrorStatusRegister_T ErrorStatusRegister; - ErrorStatusRegister.All = - readb(ControllerBaseAddress + DAC960_PD_ErrorStatusRegisterOffset); - if (!ErrorStatusRegister.Bits.ErrorStatusPending) return false; - ErrorStatusRegister.Bits.ErrorStatusPending = false; - *ErrorStatus = ErrorStatusRegister.All; - *Parameter0 = - readb(ControllerBaseAddress + DAC960_PD_CommandOpcodeRegisterOffset); - *Parameter1 = - readb(ControllerBaseAddress + DAC960_PD_CommandIdentifierRegisterOffset); - writeb(0, ControllerBaseAddress + DAC960_PD_ErrorStatusRegisterOffset); - return true; -} - -static inline void DAC960_P_To_PD_TranslateEnquiry(void *Enquiry) -{ - memcpy(Enquiry + 132, Enquiry + 36, 64); - memset(Enquiry + 36, 0, 96); -} - -static inline void DAC960_P_To_PD_TranslateDeviceState(void *DeviceState) -{ - memcpy(DeviceState + 2, DeviceState + 3, 1); - memmove(DeviceState + 4, DeviceState + 5, 2); - memmove(DeviceState + 6, DeviceState + 8, 4); -} - -static inline -void DAC960_PD_To_P_TranslateReadWriteCommand(DAC960_V1_CommandMailbox_T - *CommandMailbox) -{ - int LogicalDriveNumber = CommandMailbox->Type5.LD.LogicalDriveNumber; - CommandMailbox->Bytes[3] &= 0x7; - CommandMailbox->Bytes[3] |= CommandMailbox->Bytes[7] << 6; - CommandMailbox->Bytes[7] = LogicalDriveNumber; -} - -static inline -void DAC960_P_To_PD_TranslateReadWriteCommand(DAC960_V1_CommandMailbox_T - *CommandMailbox) -{ - int LogicalDriveNumber = CommandMailbox->Bytes[7]; - CommandMailbox->Bytes[7] = CommandMailbox->Bytes[3] >> 6; - CommandMailbox->Bytes[3] &= 0x7; - CommandMailbox->Bytes[3] |= LogicalDriveNumber << 3; -} - - -/* - Define prototypes for the forward referenced DAC960 Driver Internal Functions. -*/ - -static void DAC960_FinalizeController(DAC960_Controller_T *); -static void DAC960_V1_QueueReadWriteCommand(DAC960_Command_T *); -static void DAC960_V2_QueueReadWriteCommand(DAC960_Command_T *); -static void DAC960_RequestFunction(struct request_queue *); -static irqreturn_t DAC960_BA_InterruptHandler(int, void *); -static irqreturn_t DAC960_LP_InterruptHandler(int, void *); -static irqreturn_t DAC960_LA_InterruptHandler(int, void *); -static irqreturn_t DAC960_PG_InterruptHandler(int, void *); -static irqreturn_t DAC960_PD_InterruptHandler(int, void *); -static irqreturn_t DAC960_P_InterruptHandler(int, void *); -static void DAC960_V1_QueueMonitoringCommand(DAC960_Command_T *); -static void DAC960_V2_QueueMonitoringCommand(DAC960_Command_T *); -static void DAC960_MonitoringTimerFunction(struct timer_list *); -static void DAC960_Message(DAC960_MessageLevel_T, unsigned char *, - DAC960_Controller_T *, ...); -static void DAC960_CreateProcEntries(DAC960_Controller_T *); -static void DAC960_DestroyProcEntries(DAC960_Controller_T *); - -#endif /* DAC960_DriverVersion */ diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index d4913516823f..20bb4bfa4be6 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -121,18 +121,6 @@ source "drivers/block/mtip32xx/Kconfig" source "drivers/block/zram/Kconfig" -config BLK_DEV_DAC960 - tristate "Mylex DAC960/DAC1100 PCI RAID Controller support" - depends on PCI - help - This driver adds support for the Mylex DAC960, AcceleRAID, and - eXtremeRAID PCI RAID controllers. See the file - <file:Documentation/blockdev/README.DAC960> for further information - about this driver. - - To compile this driver as a module, choose M here: the - module will be called DAC960. - config BLK_DEV_UMEM tristate "Micro Memory MM5415 Battery Backed RAM support" depends on PCI @@ -461,7 +449,6 @@ config BLK_DEV_RBD select LIBCRC32C select CRYPTO_AES select CRYPTO - default n help Say Y here if you want include the Rados block device, which stripes a block device over objects stored in the Ceph distributed object diff --git a/drivers/block/Makefile b/drivers/block/Makefile index 8566b188368b..a53cc1e3a2d3 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile @@ -16,7 +16,6 @@ obj-$(CONFIG_ATARI_FLOPPY) += ataflop.o obj-$(CONFIG_AMIGA_Z2RAM) += z2ram.o obj-$(CONFIG_BLK_DEV_RAM) += brd.o obj-$(CONFIG_BLK_DEV_LOOP) += loop.o -obj-$(CONFIG_BLK_DEV_DAC960) += DAC960.o obj-$(CONFIG_XILINX_SYSACE) += xsysace.o obj-$(CONFIG_CDROM_PKTCDVD) += pktcdvd.o obj-$(CONFIG_SUNVDC) += sunvdc.o diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c index 3aaf6af3ec23..bf996bd44cfc 100644 --- a/drivers/block/amiflop.c +++ b/drivers/block/amiflop.c @@ -61,10 +61,8 @@ #include <linux/delay.h> #include <linux/init.h> #include <linux/mutex.h> -#include <linux/amifdreg.h> -#include <linux/amifd.h> #include <linux/fs.h> -#include <linux/blkdev.h> +#include <linux/blk-mq.h> #include <linux/elevator.h> #include <linux/interrupt.h> #include <linux/platform_device.h> @@ -87,6 +85,126 @@ */ /* + * CIAAPRA bits (read only) + */ + +#define DSKRDY (0x1<<5) /* disk ready when low */ +#define DSKTRACK0 (0x1<<4) /* head at track zero when low */ +#define DSKPROT (0x1<<3) /* disk protected when low */ +#define DSKCHANGE (0x1<<2) /* low when disk removed */ + +/* + * CIAAPRB bits (read/write) + */ + +#define DSKMOTOR (0x1<<7) /* motor on when low */ +#define DSKSEL3 (0x1<<6) /* select drive 3 when low */ +#define DSKSEL2 (0x1<<5) /* select drive 2 when low */ +#define DSKSEL1 (0x1<<4) /* select drive 1 when low */ +#define DSKSEL0 (0x1<<3) /* select drive 0 when low */ +#define DSKSIDE (0x1<<2) /* side selection: 0 = upper, 1 = lower */ +#define DSKDIREC (0x1<<1) /* step direction: 0=in, 1=out (to trk 0) */ +#define DSKSTEP (0x1) /* pulse low to step head 1 track */ + +/* + * DSKBYTR bits (read only) + */ + +#define DSKBYT (1<<15) /* register contains valid byte when set */ +#define DMAON (1<<14) /* disk DMA enabled */ +#define DISKWRITE (1<<13) /* disk write bit in DSKLEN enabled */ +#define WORDEQUAL (1<<12) /* DSKSYNC register match when true */ +/* bits 7-0 are data */ + +/* + * ADKCON/ADKCONR bits + */ + +#ifndef SETCLR +#define ADK_SETCLR (1<<15) /* control bit */ +#endif +#define ADK_PRECOMP1 (1<<14) /* precompensation selection */ +#define ADK_PRECOMP0 (1<<13) /* 00=none, 01=140ns, 10=280ns, 11=500ns */ +#define ADK_MFMPREC (1<<12) /* 0=GCR precomp., 1=MFM precomp. */ +#define ADK_WORDSYNC (1<<10) /* enable DSKSYNC auto DMA */ +#define ADK_MSBSYNC (1<<9) /* when 1, enable sync on MSbit (for GCR) */ +#define ADK_FAST (1<<8) /* bit cell: 0=2us (GCR), 1=1us (MFM) */ + +/* + * DSKLEN bits + */ + +#define DSKLEN_DMAEN (1<<15) +#define DSKLEN_WRITE (1<<14) + +/* + * INTENA/INTREQ bits + */ + +#define DSKINDEX (0x1<<4) /* DSKINDEX bit */ + +/* + * Misc + */ + +#define MFM_SYNC 0x4489 /* standard MFM sync value */ + +/* Values for FD_COMMAND */ +#define FD_RECALIBRATE 0x07 /* move to track 0 */ +#define FD_SEEK 0x0F /* seek track */ +#define FD_READ 0xE6 /* read with MT, MFM, SKip deleted */ +#define FD_WRITE 0xC5 /* write with MT, MFM */ +#define FD_SENSEI 0x08 /* Sense Interrupt Status */ +#define FD_SPECIFY 0x03 /* specify HUT etc */ +#define FD_FORMAT 0x4D /* format one track */ +#define FD_VERSION 0x10 /* get version code */ +#define FD_CONFIGURE 0x13 /* configure FIFO operation */ +#define FD_PERPENDICULAR 0x12 /* perpendicular r/w mode */ + +#define FD_MAX_UNITS 4 /* Max. Number of drives */ +#define FLOPPY_MAX_SECTORS 22 /* Max. Number of sectors per track */ + +struct fd_data_type { + char *name; /* description of data type */ + int sects; /* sectors per track */ + int (*read_fkt)(int); /* read whole track */ + void (*write_fkt)(int); /* write whole track */ +}; + +struct fd_drive_type { + unsigned long code; /* code returned from drive */ + char *name; /* description of drive */ + unsigned int tracks; /* number of tracks */ + unsigned int heads; /* number of heads */ + unsigned int read_size; /* raw read size for one track */ + unsigned int write_size; /* raw write size for one track */ + unsigned int sect_mult; /* sectors and gap multiplier (HD = 2) */ + unsigned int precomp1; /* start track for precomp 1 */ + unsigned int precomp2; /* start track for precomp 2 */ + unsigned int step_delay; /* time (in ms) for delay after step */ + unsigned int settle_time; /* time to settle after dir change */ + unsigned int side_time; /* time needed to change sides */ +}; + +struct amiga_floppy_struct { + struct fd_drive_type *type; /* type of floppy for this unit */ + struct fd_data_type *dtype; /* type of floppy for this unit */ + int track; /* current track (-1 == unknown) */ + unsigned char *trackbuf; /* current track (kmaloc()'d */ + + int blocks; /* total # blocks on disk */ + + int changed; /* true when not known */ + int disk; /* disk in drive (-1 == unknown) */ + int motor; /* true when motor is at speed */ + int busy; /* true when drive is active */ + int dirty; /* true when trackbuf is not on disk */ + int status; /* current error code for unit */ + struct gendisk *gendisk; + struct blk_mq_tag_set tag_set; +}; + +/* * Error codes */ #define FD_OK 0 /* operation succeeded */ @@ -164,7 +282,6 @@ static volatile int selected = -1; /* currently selected drive */ static int writepending; static int writefromint; static char *raw_buf; -static int fdc_queue; static DEFINE_SPINLOCK(amiflop_lock); @@ -1337,76 +1454,20 @@ static int get_track(int drive, int track) return -1; } -/* - * Round-robin between our available drives, doing one request from each - */ -static struct request *set_next_request(void) -{ - struct request_queue *q; - int cnt = FD_MAX_UNITS; - struct request *rq = NULL; - - /* Find next queue we can dispatch from */ - fdc_queue = fdc_queue + 1; - if (fdc_queue == FD_MAX_UNITS) - fdc_queue = 0; - - for(cnt = FD_MAX_UNITS; cnt > 0; cnt--) { - - if (unit[fdc_queue].type->code == FD_NODRIVE) { - if (++fdc_queue == FD_MAX_UNITS) - fdc_queue = 0; - continue; - } - - q = unit[fdc_queue].gendisk->queue; - if (q) { - rq = blk_fetch_request(q); - if (rq) - break; - } - - if (++fdc_queue == FD_MAX_UNITS) - fdc_queue = 0; - } - - return rq; -} - -static void redo_fd_request(void) +static blk_status_t amiflop_rw_cur_segment(struct amiga_floppy_struct *floppy, + struct request *rq) { - struct request *rq; + int drive = floppy - unit; unsigned int cnt, block, track, sector; - int drive; - struct amiga_floppy_struct *floppy; char *data; - unsigned long flags; - blk_status_t err; - -next_req: - rq = set_next_request(); - if (!rq) { - /* Nothing left to do */ - return; - } - - floppy = rq->rq_disk->private_data; - drive = floppy - unit; -next_segment: - /* Here someone could investigate to be more efficient */ - for (cnt = 0, err = BLK_STS_OK; cnt < blk_rq_cur_sectors(rq); cnt++) { + for (cnt = 0; cnt < blk_rq_cur_sectors(rq); cnt++) { #ifdef DEBUG printk("fd: sector %ld + %d requested for %s\n", blk_rq_pos(rq), cnt, (rq_data_dir(rq) == READ) ? "read" : "write"); #endif block = blk_rq_pos(rq) + cnt; - if ((int)block > floppy->blocks) { - err = BLK_STS_IOERR; - break; - } - track = block / (floppy->dtype->sects * floppy->type->sect_mult); sector = block % (floppy->dtype->sects * floppy->type->sect_mult); data = bio_data(rq->bio) + 512 * cnt; @@ -1415,10 +1476,8 @@ next_segment: "0x%08lx\n", track, sector, data); #endif - if (get_track(drive, track) == -1) { - err = BLK_STS_IOERR; - break; - } + if (get_track(drive, track) == -1) + return BLK_STS_IOERR; if (rq_data_dir(rq) == READ) { memcpy(data, floppy->trackbuf + sector * 512, 512); @@ -1426,31 +1485,40 @@ next_segment: memcpy(floppy->trackbuf + sector * 512, data, 512); /* keep the drive spinning while writes are scheduled */ - if (!fd_motor_on(drive)) { - err = BLK_STS_IOERR; - break; - } + if (!fd_motor_on(drive)) + return BLK_STS_IOERR; /* * setup a callback to write the track buffer * after a short (1 tick) delay. */ - local_irq_save(flags); - floppy->dirty = 1; /* reset the timer */ mod_timer (flush_track_timer + drive, jiffies + 1); - local_irq_restore(flags); } } - if (__blk_end_request_cur(rq, err)) - goto next_segment; - goto next_req; + return BLK_STS_OK; } -static void do_fd_request(struct request_queue * q) +static blk_status_t amiflop_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) { - redo_fd_request(); + struct request *rq = bd->rq; + struct amiga_floppy_struct *floppy = rq->rq_disk->private_data; + blk_status_t err; + + if (!spin_trylock_irq(&amiflop_lock)) + return BLK_STS_DEV_RESOURCE; + + blk_mq_start_request(rq); + + do { + err = amiflop_rw_cur_segment(floppy, rq); + } while (blk_update_request(rq, err, blk_rq_cur_bytes(rq))); + blk_mq_end_request(rq, err); + + spin_unlock_irq(&amiflop_lock); + return BLK_STS_OK; } static int fd_getgeo(struct block_device *bdev, struct hd_geometry *geo) @@ -1701,11 +1769,47 @@ static const struct block_device_operations floppy_fops = { .check_events = amiga_check_events, }; +static const struct blk_mq_ops amiflop_mq_ops = { + .queue_rq = amiflop_queue_rq, +}; + +static struct gendisk *fd_alloc_disk(int drive) +{ + struct gendisk *disk; + + disk = alloc_disk(1); + if (!disk) + goto out; + + disk->queue = blk_mq_init_sq_queue(&unit[drive].tag_set, &amiflop_mq_ops, + 2, BLK_MQ_F_SHOULD_MERGE); + if (IS_ERR(disk->queue)) { + disk->queue = NULL; + goto out_put_disk; + } + + unit[drive].trackbuf = kmalloc(FLOPPY_MAX_SECTORS * 512, GFP_KERNEL); + if (!unit[drive].trackbuf) + goto out_cleanup_queue; + + return disk; + +out_cleanup_queue: + blk_cleanup_queue(disk->queue); + disk->queue = NULL; + blk_mq_free_tag_set(&unit[drive].tag_set); +out_put_disk: + put_disk(disk); +out: + unit[drive].type->code = FD_NODRIVE; + return NULL; +} + static int __init fd_probe_drives(void) { int drive,drives,nomem; - printk(KERN_INFO "FD: probing units\nfound "); + pr_info("FD: probing units\nfound"); drives=0; nomem=0; for(drive=0;drive<FD_MAX_UNITS;drive++) { @@ -1713,27 +1817,17 @@ static int __init fd_probe_drives(void) fd_probe(drive); if (unit[drive].type->code == FD_NODRIVE) continue; - disk = alloc_disk(1); + + disk = fd_alloc_disk(drive); if (!disk) { - unit[drive].type->code = FD_NODRIVE; + pr_cont(" no mem for fd%d", drive); + nomem = 1; continue; } unit[drive].gendisk = disk; - - disk->queue = blk_init_queue(do_fd_request, &amiflop_lock); - if (!disk->queue) { - unit[drive].type->code = FD_NODRIVE; - continue; - } - drives++; - if ((unit[drive].trackbuf = kmalloc(FLOPPY_MAX_SECTORS * 512, GFP_KERNEL)) == NULL) { - printk("no mem for "); - unit[drive].type = &drive_types[num_dr_types - 1]; /* FD_NODRIVE */ - drives--; - nomem = 1; - } - printk("fd%d ",drive); + + pr_cont(" fd%d",drive); disk->major = FLOPPY_MAJOR; disk->first_minor = drive; disk->fops = &floppy_fops; @@ -1744,11 +1838,11 @@ static int __init fd_probe_drives(void) } if ((drives > 0) || (nomem == 0)) { if (drives == 0) - printk("no drives"); - printk("\n"); + pr_cont(" no drives"); + pr_cont("\n"); return drives; } - printk("\n"); + pr_cont("\n"); return -ENOMEM; } @@ -1831,30 +1925,6 @@ out_blkdev: return ret; } -#if 0 /* not safe to unload */ -static int __exit amiga_floppy_remove(struct platform_device *pdev) -{ - int i; - - for( i = 0; i < FD_MAX_UNITS; i++) { - if (unit[i].type->code != FD_NODRIVE) { - struct request_queue *q = unit[i].gendisk->queue; - del_gendisk(unit[i].gendisk); - put_disk(unit[i].gendisk); - kfree(unit[i].trackbuf); - if (q) - blk_cleanup_queue(q); - } - } - blk_unregister_region(MKDEV(FLOPPY_MAJOR, 0), 256); - free_irq(IRQ_AMIGA_CIAA_TB, NULL); - free_irq(IRQ_AMIGA_DSKBLK, NULL); - custom.dmacon = DMAF_DISK; /* disable DMA */ - amiga_chip_free(raw_buf); - unregister_blkdev(FLOPPY_MAJOR, "fd"); -} -#endif - static struct platform_driver amiga_floppy_driver = { .driver = { .name = "amiga-floppy", diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h index c0ebda1283cc..7ca76ed2e71a 100644 --- a/drivers/block/aoe/aoe.h +++ b/drivers/block/aoe/aoe.h @@ -1,4 +1,6 @@ /* Copyright (c) 2013 Coraid, Inc. See COPYING for GPL terms. */ +#include <linux/blk-mq.h> + #define VERSION "85" #define AOE_MAJOR 152 #define DEVICE_NAME "aoe" @@ -164,6 +166,8 @@ struct aoedev { struct gendisk *gd; struct dentry *debugfs; struct request_queue *blkq; + struct list_head rq_list; + struct blk_mq_tag_set tag_set; struct hd_geometry geo; sector_t ssize; struct timer_list timer; @@ -201,7 +205,6 @@ int aoeblk_init(void); void aoeblk_exit(void); void aoeblk_gdalloc(void *); void aoedisk_rm_debugfs(struct aoedev *d); -void aoedisk_rm_sysfs(struct aoedev *d); int aoechr_init(void); void aoechr_exit(void); diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c index 429ebb84b592..ed26b7287256 100644 --- a/drivers/block/aoe/aoeblk.c +++ b/drivers/block/aoe/aoeblk.c @@ -6,7 +6,7 @@ #include <linux/kernel.h> #include <linux/hdreg.h> -#include <linux/blkdev.h> +#include <linux/blk-mq.h> #include <linux/backing-dev.h> #include <linux/fs.h> #include <linux/ioctl.h> @@ -177,10 +177,15 @@ static struct attribute *aoe_attrs[] = { NULL, }; -static const struct attribute_group attr_group = { +static const struct attribute_group aoe_attr_group = { .attrs = aoe_attrs, }; +static const struct attribute_group *aoe_attr_groups[] = { + &aoe_attr_group, + NULL, +}; + static const struct file_operations aoe_debugfs_fops = { .open = aoe_debugfs_open, .read = seq_read, @@ -220,17 +225,6 @@ aoedisk_rm_debugfs(struct aoedev *d) } static int -aoedisk_add_sysfs(struct aoedev *d) -{ - return sysfs_create_group(&disk_to_dev(d->gd)->kobj, &attr_group); -} -void -aoedisk_rm_sysfs(struct aoedev *d) -{ - sysfs_remove_group(&disk_to_dev(d->gd)->kobj, &attr_group); -} - -static int aoeblk_open(struct block_device *bdev, fmode_t mode) { struct aoedev *d = bdev->bd_disk->private_data; @@ -274,23 +268,25 @@ aoeblk_release(struct gendisk *disk, fmode_t mode) spin_unlock_irqrestore(&d->lock, flags); } -static void -aoeblk_request(struct request_queue *q) +static blk_status_t aoeblk_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) { - struct aoedev *d; - struct request *rq; + struct aoedev *d = hctx->queue->queuedata; + + spin_lock_irq(&d->lock); - d = q->queuedata; if ((d->flags & DEVFL_UP) == 0) { pr_info_ratelimited("aoe: device %ld.%d is not up\n", d->aoemajor, d->aoeminor); - while ((rq = blk_peek_request(q))) { - blk_start_request(rq); - aoe_end_request(d, rq, 1); - } - return; + spin_unlock_irq(&d->lock); + blk_mq_start_request(bd->rq); + return BLK_STS_IOERR; } + + list_add_tail(&bd->rq->queuelist, &d->rq_list); aoecmd_work(d); + spin_unlock_irq(&d->lock); + return BLK_STS_OK; } static int @@ -345,6 +341,10 @@ static const struct block_device_operations aoe_bdops = { .owner = THIS_MODULE, }; +static const struct blk_mq_ops aoeblk_mq_ops = { + .queue_rq = aoeblk_queue_rq, +}; + /* alloc_disk and add_disk can sleep */ void aoeblk_gdalloc(void *vp) @@ -353,9 +353,11 @@ aoeblk_gdalloc(void *vp) struct gendisk *gd; mempool_t *mp; struct request_queue *q; + struct blk_mq_tag_set *set; enum { KB = 1024, MB = KB * KB, READ_AHEAD = 2 * MB, }; ulong flags; int late = 0; + int err; spin_lock_irqsave(&d->lock, flags); if (d->flags & DEVFL_GDALLOC @@ -382,10 +384,25 @@ aoeblk_gdalloc(void *vp) d->aoemajor, d->aoeminor); goto err_disk; } - q = blk_init_queue(aoeblk_request, &d->lock); - if (q == NULL) { + + set = &d->tag_set; + set->ops = &aoeblk_mq_ops; + set->nr_hw_queues = 1; + set->queue_depth = 128; + set->numa_node = NUMA_NO_NODE; + set->flags = BLK_MQ_F_SHOULD_MERGE; + err = blk_mq_alloc_tag_set(set); + if (err) { + pr_err("aoe: cannot allocate tag set for %ld.%d\n", + d->aoemajor, d->aoeminor); + goto err_mempool; + } + + q = blk_mq_init_queue(set); + if (IS_ERR(q)) { pr_err("aoe: cannot allocate block queue for %ld.%d\n", d->aoemajor, d->aoeminor); + blk_mq_free_tag_set(set); goto err_mempool; } @@ -417,8 +434,7 @@ aoeblk_gdalloc(void *vp) spin_unlock_irqrestore(&d->lock, flags); - add_disk(gd); - aoedisk_add_sysfs(d); + device_add_disk(NULL, gd, aoe_attr_groups); aoedisk_add_debugfs(d); spin_lock_irqsave(&d->lock, flags); diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 136dc507d020..bb2fba651bd2 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -7,7 +7,7 @@ #include <linux/ata.h> #include <linux/slab.h> #include <linux/hdreg.h> -#include <linux/blkdev.h> +#include <linux/blk-mq.h> #include <linux/skbuff.h> #include <linux/netdevice.h> #include <linux/genhd.h> @@ -813,7 +813,7 @@ rexmit_timer(struct timer_list *timer) out: if ((d->flags & DEVFL_KICKME) && d->blkq) { d->flags &= ~DEVFL_KICKME; - d->blkq->request_fn(d->blkq); + blk_mq_run_hw_queues(d->blkq, true); } d->timer.expires = jiffies + TIMERTICK; @@ -857,10 +857,12 @@ nextbuf(struct aoedev *d) return d->ip.buf; rq = d->ip.rq; if (rq == NULL) { - rq = blk_peek_request(q); + rq = list_first_entry_or_null(&d->rq_list, struct request, + queuelist); if (rq == NULL) return NULL; - blk_start_request(rq); + list_del_init(&rq->queuelist); + blk_mq_start_request(rq); d->ip.rq = rq; d->ip.nxbio = rq->bio; rq->special = (void *) rqbiocnt(rq); @@ -1045,6 +1047,7 @@ aoe_end_request(struct aoedev *d, struct request *rq, int fastfail) struct bio *bio; int bok; struct request_queue *q; + blk_status_t err = BLK_STS_OK; q = d->blkq; if (rq == d->ip.rq) @@ -1052,11 +1055,15 @@ aoe_end_request(struct aoedev *d, struct request *rq, int fastfail) do { bio = rq->bio; bok = !fastfail && !bio->bi_status; - } while (__blk_end_request(rq, bok ? BLK_STS_OK : BLK_STS_IOERR, bio->bi_iter.bi_size)); + if (!bok) + err = BLK_STS_IOERR; + } while (blk_update_request(rq, bok ? BLK_STS_OK : BLK_STS_IOERR, bio->bi_iter.bi_size)); + + __blk_mq_end_request(rq, err); /* cf. http://lkml.org/lkml/2006/10/31/28 */ if (!fastfail) - __blk_run_queue(q); + blk_mq_run_hw_queues(q, true); } static void diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c index 41060e9cedf2..9063f8efbd3b 100644 --- a/drivers/block/aoe/aoedev.c +++ b/drivers/block/aoe/aoedev.c @@ -5,7 +5,7 @@ */ #include <linux/hdreg.h> -#include <linux/blkdev.h> +#include <linux/blk-mq.h> #include <linux/netdevice.h> #include <linux/delay.h> #include <linux/slab.h> @@ -197,7 +197,6 @@ aoedev_downdev(struct aoedev *d) { struct aoetgt *t, **tt, **te; struct list_head *head, *pos, *nx; - struct request *rq; int i; d->flags &= ~DEVFL_UP; @@ -225,10 +224,11 @@ aoedev_downdev(struct aoedev *d) /* fast fail all pending I/O */ if (d->blkq) { - while ((rq = blk_peek_request(d->blkq))) { - blk_start_request(rq); - aoe_end_request(d, rq, 1); - } + /* UP is cleared, freeze+quiesce to insure all are errored */ + blk_mq_freeze_queue(d->blkq); + blk_mq_quiesce_queue(d->blkq); + blk_mq_unquiesce_queue(d->blkq); + blk_mq_unfreeze_queue(d->blkq); } if (d->gd) @@ -275,9 +275,9 @@ freedev(struct aoedev *d) del_timer_sync(&d->timer); if (d->gd) { aoedisk_rm_debugfs(d); - aoedisk_rm_sysfs(d); del_gendisk(d->gd); put_disk(d->gd); + blk_mq_free_tag_set(&d->tag_set); blk_cleanup_queue(d->blkq); } t = d->targets; @@ -464,6 +464,7 @@ aoedev_by_aoeaddr(ulong maj, int min, int do_alloc) d->ntargets = NTARGETS; INIT_WORK(&d->work, aoecmd_sleepwork); spin_lock_init(&d->lock); + INIT_LIST_HEAD(&d->rq_list); skb_queue_head_init(&d->skbpool); timer_setup(&d->timer, dummy_timer, 0); d->timer.expires = jiffies + HZ; diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c index dfb2c2622e5a..f88b4c26d422 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c @@ -66,13 +66,11 @@ #include <linux/fd.h> #include <linux/delay.h> #include <linux/init.h> -#include <linux/blkdev.h> +#include <linux/blk-mq.h> #include <linux/mutex.h> #include <linux/completion.h> #include <linux/wait.h> -#include <asm/atafd.h> -#include <asm/atafdreg.h> #include <asm/atariints.h> #include <asm/atari_stdma.h> #include <asm/atari_stram.h> @@ -83,7 +81,87 @@ static DEFINE_MUTEX(ataflop_mutex); static struct request *fd_request; -static int fdc_queue; + +/* + * WD1772 stuff + */ + +/* register codes */ + +#define FDCSELREG_STP (0x80) /* command/status register */ +#define FDCSELREG_TRA (0x82) /* track register */ +#define FDCSELREG_SEC (0x84) /* sector register */ +#define FDCSELREG_DTA (0x86) /* data register */ + +/* register names for FDC_READ/WRITE macros */ + +#define FDCREG_CMD 0 +#define FDCREG_STATUS 0 +#define FDCREG_TRACK 2 +#define FDCREG_SECTOR 4 +#define FDCREG_DATA 6 + +/* command opcodes */ + +#define FDCCMD_RESTORE (0x00) /* - */ +#define FDCCMD_SEEK (0x10) /* | */ +#define FDCCMD_STEP (0x20) /* | TYP 1 Commands */ +#define FDCCMD_STIN (0x40) /* | */ +#define FDCCMD_STOT (0x60) /* - */ +#define FDCCMD_RDSEC (0x80) /* - TYP 2 Commands */ +#define FDCCMD_WRSEC (0xa0) /* - " */ +#define FDCCMD_RDADR (0xc0) /* - */ +#define FDCCMD_RDTRA (0xe0) /* | TYP 3 Commands */ +#define FDCCMD_WRTRA (0xf0) /* - */ +#define FDCCMD_FORCI (0xd0) /* - TYP 4 Command */ + +/* command modifier bits */ + +#define FDCCMDADD_SR6 (0x00) /* step rate settings */ +#define FDCCMDADD_SR12 (0x01) +#define FDCCMDADD_SR2 (0x02) +#define FDCCMDADD_SR3 (0x03) +#define FDCCMDADD_V (0x04) /* verify */ +#define FDCCMDADD_H (0x08) /* wait for spin-up */ +#define FDCCMDADD_U (0x10) /* update track register */ +#define FDCCMDADD_M (0x10) /* multiple sector access */ +#define FDCCMDADD_E (0x04) /* head settling flag */ +#define FDCCMDADD_P (0x02) /* precompensation off */ +#define FDCCMDADD_A0 (0x01) /* DAM flag */ + +/* status register bits */ + +#define FDCSTAT_MOTORON (0x80) /* motor on */ +#define FDCSTAT_WPROT (0x40) /* write protected (FDCCMD_WR*) */ +#define FDCSTAT_SPINUP (0x20) /* motor speed stable (Type I) */ +#define FDCSTAT_DELDAM (0x20) /* sector has deleted DAM (Type II+III) */ +#define FDCSTAT_RECNF (0x10) /* record not found */ +#define FDCSTAT_CRC (0x08) /* CRC error */ +#define FDCSTAT_TR00 (0x04) /* Track 00 flag (Type I) */ +#define FDCSTAT_LOST (0x04) /* Lost Data (Type II+III) */ +#define FDCSTAT_IDX (0x02) /* Index status (Type I) */ +#define FDCSTAT_DRQ (0x02) /* DRQ status (Type II+III) */ +#define FDCSTAT_BUSY (0x01) /* FDC is busy */ + + +/* PSG Port A Bit Nr 0 .. Side Sel .. 0 -> Side 1 1 -> Side 2 */ +#define DSKSIDE (0x01) + +#define DSKDRVNONE (0x06) +#define DSKDRV0 (0x02) +#define DSKDRV1 (0x04) + +/* step rates */ +#define FDCSTEP_6 0x00 +#define FDCSTEP_12 0x01 +#define FDCSTEP_2 0x02 +#define FDCSTEP_3 0x03 + +struct atari_format_descr { + int track; /* to be formatted */ + int head; /* "" "" */ + int sect_offset; /* offset of first sector */ +}; /* Disk types: DD, HD, ED */ static struct atari_disk_type { @@ -221,6 +299,7 @@ static struct atari_floppy_struct { struct gendisk *disk; int ref; int type; + struct blk_mq_tag_set tag_set; } unit[FD_MAX_UNITS]; #define UD unit[drive] @@ -300,9 +379,6 @@ static int IsFormatting = 0, FormatError; static int UserSteprate[FD_MAX_UNITS] = { -1, -1 }; module_param_array(UserSteprate, int, NULL, 0); -/* Synchronization of FDC access. */ -static volatile int fdc_busy = 0; -static DECLARE_WAIT_QUEUE_HEAD(fdc_wait); static DECLARE_COMPLETION(format_wait); static unsigned long changed_floppies = 0xff, fake_change = 0; @@ -362,7 +438,6 @@ static void fd_times_out(struct timer_list *unused); static void finish_fdc( void ); static void finish_fdc_done( int dummy ); static void setup_req_params( int drive ); -static void redo_fd_request( void); static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long param); static void fd_probe( int drive ); @@ -380,8 +455,11 @@ static DEFINE_TIMER(fd_timer, check_change); static void fd_end_request_cur(blk_status_t err) { - if (!__blk_end_request_cur(fd_request, err)) + if (!blk_update_request(fd_request, err, + blk_rq_cur_bytes(fd_request))) { + __blk_mq_end_request(fd_request, err); fd_request = NULL; + } } static inline void start_motor_off_timer(void) @@ -627,7 +705,6 @@ static void fd_error( void ) if (SelectedDrive != -1) SUD.track = -1; } - redo_fd_request(); } @@ -645,14 +722,15 @@ static void fd_error( void ) static int do_format(int drive, int type, struct atari_format_descr *desc) { + struct request_queue *q = unit[drive].disk->queue; unsigned char *p; int sect, nsect; unsigned long flags; + int ret; - DPRINT(("do_format( dr=%d tr=%d he=%d offs=%d )\n", - drive, desc->track, desc->head, desc->sect_offset )); + blk_mq_freeze_queue(q); + blk_mq_quiesce_queue(q); - wait_event(fdc_wait, cmpxchg(&fdc_busy, 0, 1) == 0); local_irq_save(flags); stdma_lock(floppy_irq, NULL); atari_turnon_irq( IRQ_MFP_FDC ); /* should be already, just to be sure */ @@ -661,16 +739,16 @@ static int do_format(int drive, int type, struct atari_format_descr *desc) if (type) { if (--type >= NUM_DISK_MINORS || minor2disktype[type].drive_types > DriveType) { - redo_fd_request(); - return -EINVAL; + ret = -EINVAL; + goto out; } type = minor2disktype[type].index; UDT = &atari_disk_type[type]; } if (!UDT || desc->track >= UDT->blocks/UDT->spt/2 || desc->head >= 2) { - redo_fd_request(); - return -EINVAL; + ret = -EINVAL; + goto out; } nsect = UDT->spt; @@ -709,8 +787,11 @@ static int do_format(int drive, int type, struct atari_format_descr *desc) wait_for_completion(&format_wait); - redo_fd_request(); - return( FormatError ? -EIO : 0 ); + ret = FormatError ? -EIO : 0; +out: + blk_mq_unquiesce_queue(q); + blk_mq_unfreeze_queue(q); + return ret; } @@ -740,7 +821,6 @@ static void do_fd_action( int drive ) else { /* all sectors finished */ fd_end_request_cur(BLK_STS_OK); - redo_fd_request(); return; } } @@ -1145,7 +1225,6 @@ static void fd_rwsec_done1(int status) else { /* all sectors finished */ fd_end_request_cur(BLK_STS_OK); - redo_fd_request(); } return; @@ -1303,8 +1382,6 @@ static void finish_fdc_done( int dummy ) local_irq_save(flags); stdma_release(); - fdc_busy = 0; - wake_up( &fdc_wait ); local_irq_restore(flags); DPRINT(("finish_fdc() finished\n")); @@ -1394,59 +1471,34 @@ static void setup_req_params( int drive ) ReqTrack, ReqSector, (unsigned long)ReqData )); } -/* - * Round-robin between our available drives, doing one request from each - */ -static struct request *set_next_request(void) +static blk_status_t ataflop_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) { - struct request_queue *q; - int old_pos = fdc_queue; - struct request *rq = NULL; - - do { - q = unit[fdc_queue].disk->queue; - if (++fdc_queue == FD_MAX_UNITS) - fdc_queue = 0; - if (q) { - rq = blk_fetch_request(q); - if (rq) { - rq->error_count = 0; - break; - } - } - } while (fdc_queue != old_pos); - - return rq; -} - + struct atari_floppy_struct *floppy = bd->rq->rq_disk->private_data; + int drive = floppy - unit; + int type = floppy->type; -static void redo_fd_request(void) -{ - int drive, type; - struct atari_floppy_struct *floppy; + spin_lock_irq(&ataflop_lock); + if (fd_request) { + spin_unlock_irq(&ataflop_lock); + return BLK_STS_DEV_RESOURCE; + } + if (!stdma_try_lock(floppy_irq, NULL)) { + spin_unlock_irq(&ataflop_lock); + return BLK_STS_RESOURCE; + } + fd_request = bd->rq; + blk_mq_start_request(fd_request); - DPRINT(("redo_fd_request: fd_request=%p dev=%s fd_request->sector=%ld\n", - fd_request, fd_request ? fd_request->rq_disk->disk_name : "", - fd_request ? blk_rq_pos(fd_request) : 0 )); + atari_disable_irq( IRQ_MFP_FDC ); IsFormatting = 0; -repeat: - if (!fd_request) { - fd_request = set_next_request(); - if (!fd_request) - goto the_end; - } - - floppy = fd_request->rq_disk->private_data; - drive = floppy - unit; - type = floppy->type; - if (!UD.connected) { /* drive not connected */ printk(KERN_ERR "Unknown Device: fd%d\n", drive ); fd_end_request_cur(BLK_STS_IOERR); - goto repeat; + goto out; } if (type == 0) { @@ -1462,23 +1514,18 @@ repeat: if (--type >= NUM_DISK_MINORS) { printk(KERN_WARNING "fd%d: invalid disk format", drive ); fd_end_request_cur(BLK_STS_IOERR); - goto repeat; + goto out; } if (minor2disktype[type].drive_types > DriveType) { printk(KERN_WARNING "fd%d: unsupported disk format", drive ); fd_end_request_cur(BLK_STS_IOERR); - goto repeat; + goto out; } type = minor2disktype[type].index; UDT = &atari_disk_type[type]; set_capacity(floppy->disk, UDT->blocks); UD.autoprobe = 0; } - - if (blk_rq_pos(fd_request) + 1 > UDT->blocks) { - fd_end_request_cur(BLK_STS_IOERR); - goto repeat; - } /* stop deselect timer */ del_timer( &motor_off_timer ); @@ -1490,22 +1537,13 @@ repeat: setup_req_params( drive ); do_fd_action( drive ); - return; - - the_end: - finish_fdc(); -} - - -void do_fd_request(struct request_queue * q) -{ - DPRINT(("do_fd_request for pid %d\n",current->pid)); - wait_event(fdc_wait, cmpxchg(&fdc_busy, 0, 1) == 0); - stdma_lock(floppy_irq, NULL); - - atari_disable_irq( IRQ_MFP_FDC ); - redo_fd_request(); + if (bd->last) + finish_fdc(); atari_enable_irq( IRQ_MFP_FDC ); + +out: + spin_unlock_irq(&ataflop_lock); + return BLK_STS_OK; } static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, @@ -1583,7 +1621,6 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, /* what if type > 0 here? Overwrite specified entry ? */ if (type) { /* refuse to re-set a predefined type for now */ - redo_fd_request(); return -EINVAL; } @@ -1651,10 +1688,8 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, /* sanity check */ if (setprm.track != dtp->blocks/dtp->spt/2 || - setprm.head != 2) { - redo_fd_request(); + setprm.head != 2) return -EINVAL; - } UDT = dtp; set_capacity(floppy->disk, UDT->blocks); @@ -1910,6 +1945,10 @@ static const struct block_device_operations floppy_fops = { .revalidate_disk= floppy_revalidate, }; +static const struct blk_mq_ops ataflop_mq_ops = { + .queue_rq = ataflop_queue_rq, +}; + static struct kobject *floppy_find(dev_t dev, int *part, void *data) { int drive = *part & 3; @@ -1923,6 +1962,7 @@ static struct kobject *floppy_find(dev_t dev, int *part, void *data) static int __init atari_floppy_init (void) { int i; + int ret; if (!MACH_IS_ATARI) /* Amiga, Mac, ... don't have Atari-compatible floppy :-) */ @@ -1933,8 +1973,19 @@ static int __init atari_floppy_init (void) for (i = 0; i < FD_MAX_UNITS; i++) { unit[i].disk = alloc_disk(1); - if (!unit[i].disk) - goto Enomem; + if (!unit[i].disk) { + ret = -ENOMEM; + goto err; + } + + unit[i].disk->queue = blk_mq_init_sq_queue(&unit[i].tag_set, + &ataflop_mq_ops, 2, + BLK_MQ_F_SHOULD_MERGE); + if (IS_ERR(unit[i].disk->queue)) { + ret = PTR_ERR(unit[i].disk->queue); + unit[i].disk->queue = NULL; + goto err; + } } if (UseTrackbuffer < 0) @@ -1951,7 +2002,8 @@ static int __init atari_floppy_init (void) DMABuffer = atari_stram_alloc(BUFFER_SIZE+512, "ataflop"); if (!DMABuffer) { printk(KERN_ERR "atari_floppy_init: cannot get dma buffer\n"); - goto Enomem; + ret = -ENOMEM; + goto err; } TrackBuffer = DMABuffer + 512; PhysDMABuffer = atari_stram_to_phys(DMABuffer); @@ -1966,10 +2018,6 @@ static int __init atari_floppy_init (void) sprintf(unit[i].disk->disk_name, "fd%d", i); unit[i].disk->fops = &floppy_fops; unit[i].disk->private_data = &unit[i]; - unit[i].disk->queue = blk_init_queue(do_fd_request, - &ataflop_lock); - if (!unit[i].disk->queue) - goto Enomem; set_capacity(unit[i].disk, MAX_DISK_SIZE * 2); add_disk(unit[i].disk); } @@ -1983,17 +2031,23 @@ static int __init atari_floppy_init (void) config_types(); return 0; -Enomem: - while (i--) { - struct request_queue *q = unit[i].disk->queue; - put_disk(unit[i].disk); - if (q) - blk_cleanup_queue(q); - } +err: + do { + struct gendisk *disk = unit[i].disk; + + if (disk) { + if (disk->queue) { + blk_cleanup_queue(disk->queue); + disk->queue = NULL; + } + blk_mq_free_tag_set(&unit[i].tag_set); + put_disk(unit[i].disk); + } + } while (i--); unregister_blkdev(FLOPPY_MAJOR, "fd"); - return -ENOMEM; + return ret; } #ifndef MODULE @@ -2040,11 +2094,10 @@ static void __exit atari_floppy_exit(void) int i; blk_unregister_region(MKDEV(FLOPPY_MAJOR, 0), 256); for (i = 0; i < FD_MAX_UNITS; i++) { - struct request_queue *q = unit[i].disk->queue; - del_gendisk(unit[i].disk); + blk_cleanup_queue(unit[i].disk->queue); + blk_mq_free_tag_set(&unit[i].tag_set); put_disk(unit[i].disk); - blk_cleanup_queue(q); } unregister_blkdev(FLOPPY_MAJOR, "fd"); diff --git a/drivers/block/drbd/Kconfig b/drivers/block/drbd/Kconfig index 87aab6910d2d..52d885cdccb5 100644 --- a/drivers/block/drbd/Kconfig +++ b/drivers/block/drbd/Kconfig @@ -11,7 +11,6 @@ config BLK_DEV_DRBD depends on PROC_FS && INET select LRU_CACHE select LIBCRC32C - default n help NOTE: In order to authenticate connections you have to select diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index e35a234b0a8f..1e47db57b9d2 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -429,7 +429,7 @@ enum { __EE_CALL_AL_COMPLETE_IO, __EE_MAY_SET_IN_SYNC, - /* is this a TRIM aka REQ_DISCARD? */ + /* is this a TRIM aka REQ_OP_DISCARD? */ __EE_IS_TRIM, /* In case a barrier failed, @@ -724,10 +724,10 @@ struct drbd_connection { struct list_head transfer_log; /* all requests not yet fully processed */ struct crypto_shash *cram_hmac_tfm; - struct crypto_ahash *integrity_tfm; /* checksums we compute, updates protected by connection->data->mutex */ - struct crypto_ahash *peer_integrity_tfm; /* checksums we verify, only accessed from receiver thread */ - struct crypto_ahash *csums_tfm; - struct crypto_ahash *verify_tfm; + struct crypto_shash *integrity_tfm; /* checksums we compute, updates protected by connection->data->mutex */ + struct crypto_shash *peer_integrity_tfm; /* checksums we verify, only accessed from receiver thread */ + struct crypto_shash *csums_tfm; + struct crypto_shash *verify_tfm; void *int_dig_in; void *int_dig_vv; @@ -1531,8 +1531,9 @@ static inline void ov_out_of_sync_print(struct drbd_device *device) } -extern void drbd_csum_bio(struct crypto_ahash *, struct bio *, void *); -extern void drbd_csum_ee(struct crypto_ahash *, struct drbd_peer_request *, void *); +extern void drbd_csum_bio(struct crypto_shash *, struct bio *, void *); +extern void drbd_csum_ee(struct crypto_shash *, struct drbd_peer_request *, + void *); /* worker callbacks */ extern int w_e_end_data_req(struct drbd_work *, int); extern int w_e_end_rsdata_req(struct drbd_work *, int); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index ef8212a4b73e..55fd104f1ed4 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1377,7 +1377,7 @@ void drbd_send_ack_dp(struct drbd_peer_device *peer_device, enum drbd_packet cmd struct p_data *dp, int data_size) { if (peer_device->connection->peer_integrity_tfm) - data_size -= crypto_ahash_digestsize(peer_device->connection->peer_integrity_tfm); + data_size -= crypto_shash_digestsize(peer_device->connection->peer_integrity_tfm); _drbd_send_ack(peer_device, cmd, dp->sector, cpu_to_be32(data_size), dp->block_id); } @@ -1673,7 +1673,7 @@ static u32 bio_flags_to_wire(struct drbd_connection *connection, return bio->bi_opf & REQ_SYNC ? DP_RW_SYNC : 0; } -/* Used to send write or TRIM aka REQ_DISCARD requests +/* Used to send write or TRIM aka REQ_OP_DISCARD requests * R_PRIMARY -> Peer (P_DATA, P_TRIM) */ int drbd_send_dblock(struct drbd_peer_device *peer_device, struct drbd_request *req) @@ -1690,7 +1690,7 @@ int drbd_send_dblock(struct drbd_peer_device *peer_device, struct drbd_request * sock = &peer_device->connection->data; p = drbd_prepare_command(peer_device, sock); digest_size = peer_device->connection->integrity_tfm ? - crypto_ahash_digestsize(peer_device->connection->integrity_tfm) : 0; + crypto_shash_digestsize(peer_device->connection->integrity_tfm) : 0; if (!p) return -EIO; @@ -1796,7 +1796,7 @@ int drbd_send_block(struct drbd_peer_device *peer_device, enum drbd_packet cmd, p = drbd_prepare_command(peer_device, sock); digest_size = peer_device->connection->integrity_tfm ? - crypto_ahash_digestsize(peer_device->connection->integrity_tfm) : 0; + crypto_shash_digestsize(peer_device->connection->integrity_tfm) : 0; if (!p) return -EIO; @@ -2557,11 +2557,11 @@ void conn_free_crypto(struct drbd_connection *connection) { drbd_free_sock(connection); - crypto_free_ahash(connection->csums_tfm); - crypto_free_ahash(connection->verify_tfm); + crypto_free_shash(connection->csums_tfm); + crypto_free_shash(connection->verify_tfm); crypto_free_shash(connection->cram_hmac_tfm); - crypto_free_ahash(connection->integrity_tfm); - crypto_free_ahash(connection->peer_integrity_tfm); + crypto_free_shash(connection->integrity_tfm); + crypto_free_shash(connection->peer_integrity_tfm); kfree(connection->int_dig_in); kfree(connection->int_dig_vv); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index b4f02768ba47..d15703b1ffe8 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -2303,10 +2303,10 @@ check_net_options(struct drbd_connection *connection, struct net_conf *new_net_c } struct crypto { - struct crypto_ahash *verify_tfm; - struct crypto_ahash *csums_tfm; + struct crypto_shash *verify_tfm; + struct crypto_shash *csums_tfm; struct crypto_shash *cram_hmac_tfm; - struct crypto_ahash *integrity_tfm; + struct crypto_shash *integrity_tfm; }; static int @@ -2324,36 +2324,21 @@ alloc_shash(struct crypto_shash **tfm, char *tfm_name, int err_alg) return NO_ERROR; } -static int -alloc_ahash(struct crypto_ahash **tfm, char *tfm_name, int err_alg) -{ - if (!tfm_name[0]) - return NO_ERROR; - - *tfm = crypto_alloc_ahash(tfm_name, 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(*tfm)) { - *tfm = NULL; - return err_alg; - } - - return NO_ERROR; -} - static enum drbd_ret_code alloc_crypto(struct crypto *crypto, struct net_conf *new_net_conf) { char hmac_name[CRYPTO_MAX_ALG_NAME]; enum drbd_ret_code rv; - rv = alloc_ahash(&crypto->csums_tfm, new_net_conf->csums_alg, + rv = alloc_shash(&crypto->csums_tfm, new_net_conf->csums_alg, ERR_CSUMS_ALG); if (rv != NO_ERROR) return rv; - rv = alloc_ahash(&crypto->verify_tfm, new_net_conf->verify_alg, + rv = alloc_shash(&crypto->verify_tfm, new_net_conf->verify_alg, ERR_VERIFY_ALG); if (rv != NO_ERROR) return rv; - rv = alloc_ahash(&crypto->integrity_tfm, new_net_conf->integrity_alg, + rv = alloc_shash(&crypto->integrity_tfm, new_net_conf->integrity_alg, ERR_INTEGRITY_ALG); if (rv != NO_ERROR) return rv; @@ -2371,9 +2356,9 @@ alloc_crypto(struct crypto *crypto, struct net_conf *new_net_conf) static void free_crypto(struct crypto *crypto) { crypto_free_shash(crypto->cram_hmac_tfm); - crypto_free_ahash(crypto->integrity_tfm); - crypto_free_ahash(crypto->csums_tfm); - crypto_free_ahash(crypto->verify_tfm); + crypto_free_shash(crypto->integrity_tfm); + crypto_free_shash(crypto->csums_tfm); + crypto_free_shash(crypto->verify_tfm); } int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info) @@ -2450,17 +2435,17 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info) rcu_assign_pointer(connection->net_conf, new_net_conf); if (!rsr) { - crypto_free_ahash(connection->csums_tfm); + crypto_free_shash(connection->csums_tfm); connection->csums_tfm = crypto.csums_tfm; crypto.csums_tfm = NULL; } if (!ovr) { - crypto_free_ahash(connection->verify_tfm); + crypto_free_shash(connection->verify_tfm); connection->verify_tfm = crypto.verify_tfm; crypto.verify_tfm = NULL; } - crypto_free_ahash(connection->integrity_tfm); + crypto_free_shash(connection->integrity_tfm); connection->integrity_tfm = crypto.integrity_tfm; if (connection->cstate >= C_WF_REPORT_PARAMS && connection->agreed_pro_version >= 100) /* Do this without trying to take connection->data.mutex again. */ diff --git a/drivers/block/drbd/drbd_protocol.h b/drivers/block/drbd/drbd_protocol.h index c3081f93051c..48dabbb21e11 100644 --- a/drivers/block/drbd/drbd_protocol.h +++ b/drivers/block/drbd/drbd_protocol.h @@ -57,7 +57,7 @@ enum drbd_packet { P_PROTOCOL_UPDATE = 0x2d, /* data sock: is used in established connections */ /* 0x2e to 0x30 reserved, used in drbd 9 */ - /* REQ_DISCARD. We used "discard" in different contexts before, + /* REQ_OP_DISCARD. We used "discard" in different contexts before, * which is why I chose TRIM here, to disambiguate. */ P_TRIM = 0x31, @@ -126,7 +126,7 @@ struct p_header100 { #define DP_UNPLUG 8 /* not used anymore */ #define DP_FUA 16 /* equals REQ_FUA */ #define DP_FLUSH 32 /* equals REQ_PREFLUSH */ -#define DP_DISCARD 64 /* equals REQ_DISCARD */ +#define DP_DISCARD 64 /* equals REQ_OP_DISCARD */ #define DP_SEND_RECEIVE_ACK 128 /* This is a proto B write request */ #define DP_SEND_WRITE_ACK 256 /* This is a proto C write request */ #define DP_WSAME 512 /* equiv. REQ_WRITE_SAME */ diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 75f6b47169e6..fc67fd853375 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1732,7 +1732,7 @@ static int receive_Barrier(struct drbd_connection *connection, struct packet_inf } /* quick wrapper in case payload size != request_size (write same) */ -static void drbd_csum_ee_size(struct crypto_ahash *h, +static void drbd_csum_ee_size(struct crypto_shash *h, struct drbd_peer_request *r, void *d, unsigned int payload_size) { @@ -1769,7 +1769,7 @@ read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector, digest_size = 0; if (!trim && peer_device->connection->peer_integrity_tfm) { - digest_size = crypto_ahash_digestsize(peer_device->connection->peer_integrity_tfm); + digest_size = crypto_shash_digestsize(peer_device->connection->peer_integrity_tfm); /* * FIXME: Receive the incoming digest into the receive buffer * here, together with its struct p_data? @@ -1905,7 +1905,7 @@ static int recv_dless_read(struct drbd_peer_device *peer_device, struct drbd_req digest_size = 0; if (peer_device->connection->peer_integrity_tfm) { - digest_size = crypto_ahash_digestsize(peer_device->connection->peer_integrity_tfm); + digest_size = crypto_shash_digestsize(peer_device->connection->peer_integrity_tfm); err = drbd_recv_all_warn(peer_device->connection, dig_in, digest_size); if (err) return err; @@ -3542,7 +3542,7 @@ static int receive_protocol(struct drbd_connection *connection, struct packet_in int p_proto, p_discard_my_data, p_two_primaries, cf; struct net_conf *nc, *old_net_conf, *new_net_conf = NULL; char integrity_alg[SHARED_SECRET_MAX] = ""; - struct crypto_ahash *peer_integrity_tfm = NULL; + struct crypto_shash *peer_integrity_tfm = NULL; void *int_dig_in = NULL, *int_dig_vv = NULL; p_proto = be32_to_cpu(p->protocol); @@ -3623,7 +3623,7 @@ static int receive_protocol(struct drbd_connection *connection, struct packet_in * change. */ - peer_integrity_tfm = crypto_alloc_ahash(integrity_alg, 0, CRYPTO_ALG_ASYNC); + peer_integrity_tfm = crypto_alloc_shash(integrity_alg, 0, CRYPTO_ALG_ASYNC); if (IS_ERR(peer_integrity_tfm)) { peer_integrity_tfm = NULL; drbd_err(connection, "peer data-integrity-alg %s not supported\n", @@ -3631,7 +3631,7 @@ static int receive_protocol(struct drbd_connection *connection, struct packet_in goto disconnect; } - hash_size = crypto_ahash_digestsize(peer_integrity_tfm); + hash_size = crypto_shash_digestsize(peer_integrity_tfm); int_dig_in = kmalloc(hash_size, GFP_KERNEL); int_dig_vv = kmalloc(hash_size, GFP_KERNEL); if (!(int_dig_in && int_dig_vv)) { @@ -3661,7 +3661,7 @@ static int receive_protocol(struct drbd_connection *connection, struct packet_in mutex_unlock(&connection->resource->conf_update); mutex_unlock(&connection->data.mutex); - crypto_free_ahash(connection->peer_integrity_tfm); + crypto_free_shash(connection->peer_integrity_tfm); kfree(connection->int_dig_in); kfree(connection->int_dig_vv); connection->peer_integrity_tfm = peer_integrity_tfm; @@ -3679,7 +3679,7 @@ static int receive_protocol(struct drbd_connection *connection, struct packet_in disconnect_rcu_unlock: rcu_read_unlock(); disconnect: - crypto_free_ahash(peer_integrity_tfm); + crypto_free_shash(peer_integrity_tfm); kfree(int_dig_in); kfree(int_dig_vv); conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD); @@ -3691,15 +3691,16 @@ disconnect: * return: NULL (alg name was "") * ERR_PTR(error) if something goes wrong * or the crypto hash ptr, if it worked out ok. */ -static struct crypto_ahash *drbd_crypto_alloc_digest_safe(const struct drbd_device *device, +static struct crypto_shash *drbd_crypto_alloc_digest_safe( + const struct drbd_device *device, const char *alg, const char *name) { - struct crypto_ahash *tfm; + struct crypto_shash *tfm; if (!alg[0]) return NULL; - tfm = crypto_alloc_ahash(alg, 0, CRYPTO_ALG_ASYNC); + tfm = crypto_alloc_shash(alg, 0, 0); if (IS_ERR(tfm)) { drbd_err(device, "Can not allocate \"%s\" as %s (reason: %ld)\n", alg, name, PTR_ERR(tfm)); @@ -3752,8 +3753,8 @@ static int receive_SyncParam(struct drbd_connection *connection, struct packet_i struct drbd_device *device; struct p_rs_param_95 *p; unsigned int header_size, data_size, exp_max_sz; - struct crypto_ahash *verify_tfm = NULL; - struct crypto_ahash *csums_tfm = NULL; + struct crypto_shash *verify_tfm = NULL; + struct crypto_shash *csums_tfm = NULL; struct net_conf *old_net_conf, *new_net_conf = NULL; struct disk_conf *old_disk_conf = NULL, *new_disk_conf = NULL; const int apv = connection->agreed_pro_version; @@ -3900,14 +3901,14 @@ static int receive_SyncParam(struct drbd_connection *connection, struct packet_i if (verify_tfm) { strcpy(new_net_conf->verify_alg, p->verify_alg); new_net_conf->verify_alg_len = strlen(p->verify_alg) + 1; - crypto_free_ahash(peer_device->connection->verify_tfm); + crypto_free_shash(peer_device->connection->verify_tfm); peer_device->connection->verify_tfm = verify_tfm; drbd_info(device, "using verify-alg: \"%s\"\n", p->verify_alg); } if (csums_tfm) { strcpy(new_net_conf->csums_alg, p->csums_alg); new_net_conf->csums_alg_len = strlen(p->csums_alg) + 1; - crypto_free_ahash(peer_device->connection->csums_tfm); + crypto_free_shash(peer_device->connection->csums_tfm); peer_device->connection->csums_tfm = csums_tfm; drbd_info(device, "using csums-alg: \"%s\"\n", p->csums_alg); } @@ -3951,9 +3952,9 @@ disconnect: mutex_unlock(&connection->resource->conf_update); /* just for completeness: actually not needed, * as this is not reached if csums_tfm was ok. */ - crypto_free_ahash(csums_tfm); + crypto_free_shash(csums_tfm); /* but free the verify_tfm again, if csums_tfm did not work out */ - crypto_free_ahash(verify_tfm); + crypto_free_shash(verify_tfm); conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD); return -EIO; } diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 19cac36e9737..1c4da17e902e 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -650,7 +650,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, case DISCARD_COMPLETED_NOTSUPP: case DISCARD_COMPLETED_WITH_ERROR: /* I'd rather not detach from local disk just because it - * failed a REQ_DISCARD. */ + * failed a REQ_OP_DISCARD. */ mod_rq_state(req, m, RQ_LOCAL_PENDING, RQ_LOCAL_COMPLETED); break; diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index b8f77e83d456..99255d0c9e2f 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -152,7 +152,7 @@ void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(l do_wake = list_empty(block_id == ID_SYNCER ? &device->sync_ee : &device->active_ee); - /* FIXME do we want to detach for failed REQ_DISCARD? + /* FIXME do we want to detach for failed REQ_OP_DISCARD? * ((peer_req->flags & (EE_WAS_ERROR|EE_IS_TRIM)) == EE_WAS_ERROR) */ if (peer_req->flags & EE_WAS_ERROR) __drbd_chk_io_error(device, DRBD_WRITE_ERROR); @@ -295,60 +295,61 @@ void drbd_request_endio(struct bio *bio) complete_master_bio(device, &m); } -void drbd_csum_ee(struct crypto_ahash *tfm, struct drbd_peer_request *peer_req, void *digest) +void drbd_csum_ee(struct crypto_shash *tfm, struct drbd_peer_request *peer_req, void *digest) { - AHASH_REQUEST_ON_STACK(req, tfm); - struct scatterlist sg; + SHASH_DESC_ON_STACK(desc, tfm); struct page *page = peer_req->pages; struct page *tmp; unsigned len; + void *src; - ahash_request_set_tfm(req, tfm); - ahash_request_set_callback(req, 0, NULL, NULL); + desc->tfm = tfm; + desc->flags = 0; - sg_init_table(&sg, 1); - crypto_ahash_init(req); + crypto_shash_init(desc); + src = kmap_atomic(page); while ((tmp = page_chain_next(page))) { /* all but the last page will be fully used */ - sg_set_page(&sg, page, PAGE_SIZE, 0); - ahash_request_set_crypt(req, &sg, NULL, sg.length); - crypto_ahash_update(req); + crypto_shash_update(desc, src, PAGE_SIZE); + kunmap_atomic(src); page = tmp; + src = kmap_atomic(page); } /* and now the last, possibly only partially used page */ len = peer_req->i.size & (PAGE_SIZE - 1); - sg_set_page(&sg, page, len ?: PAGE_SIZE, 0); - ahash_request_set_crypt(req, &sg, digest, sg.length); - crypto_ahash_finup(req); - ahash_request_zero(req); + crypto_shash_update(desc, src, len ?: PAGE_SIZE); + kunmap_atomic(src); + + crypto_shash_final(desc, digest); + shash_desc_zero(desc); } -void drbd_csum_bio(struct crypto_ahash *tfm, struct bio *bio, void *digest) +void drbd_csum_bio(struct crypto_shash *tfm, struct bio *bio, void *digest) { - AHASH_REQUEST_ON_STACK(req, tfm); - struct scatterlist sg; + SHASH_DESC_ON_STACK(desc, tfm); struct bio_vec bvec; struct bvec_iter iter; - ahash_request_set_tfm(req, tfm); - ahash_request_set_callback(req, 0, NULL, NULL); + desc->tfm = tfm; + desc->flags = 0; - sg_init_table(&sg, 1); - crypto_ahash_init(req); + crypto_shash_init(desc); bio_for_each_segment(bvec, bio, iter) { - sg_set_page(&sg, bvec.bv_page, bvec.bv_len, bvec.bv_offset); - ahash_request_set_crypt(req, &sg, NULL, sg.length); - crypto_ahash_update(req); + u8 *src; + + src = kmap_atomic(bvec.bv_page); + crypto_shash_update(desc, src + bvec.bv_offset, bvec.bv_len); + kunmap_atomic(src); + /* REQ_OP_WRITE_SAME has only one segment, * checksum the payload only once. */ if (bio_op(bio) == REQ_OP_WRITE_SAME) break; } - ahash_request_set_crypt(req, NULL, digest, 0); - crypto_ahash_final(req); - ahash_request_zero(req); + crypto_shash_final(desc, digest); + shash_desc_zero(desc); } /* MAYBE merge common code with w_e_end_ov_req */ @@ -367,7 +368,7 @@ static int w_e_send_csum(struct drbd_work *w, int cancel) if (unlikely((peer_req->flags & EE_WAS_ERROR) != 0)) goto out; - digest_size = crypto_ahash_digestsize(peer_device->connection->csums_tfm); + digest_size = crypto_shash_digestsize(peer_device->connection->csums_tfm); digest = kmalloc(digest_size, GFP_NOIO); if (digest) { sector_t sector = peer_req->i.sector; @@ -1205,7 +1206,7 @@ int w_e_end_csum_rs_req(struct drbd_work *w, int cancel) * a real fix would be much more involved, * introducing more locking mechanisms */ if (peer_device->connection->csums_tfm) { - digest_size = crypto_ahash_digestsize(peer_device->connection->csums_tfm); + digest_size = crypto_shash_digestsize(peer_device->connection->csums_tfm); D_ASSERT(device, digest_size == di->digest_size); digest = kmalloc(digest_size, GFP_NOIO); } @@ -1255,7 +1256,7 @@ int w_e_end_ov_req(struct drbd_work *w, int cancel) if (unlikely(cancel)) goto out; - digest_size = crypto_ahash_digestsize(peer_device->connection->verify_tfm); + digest_size = crypto_shash_digestsize(peer_device->connection->verify_tfm); digest = kmalloc(digest_size, GFP_NOIO); if (!digest) { err = 1; /* terminate the connection in case the allocation failed */ @@ -1327,7 +1328,7 @@ int w_e_end_ov_reply(struct drbd_work *w, int cancel) di = peer_req->digest; if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { - digest_size = crypto_ahash_digestsize(peer_device->connection->verify_tfm); + digest_size = crypto_shash_digestsize(peer_device->connection->verify_tfm); digest = kmalloc(digest_size, GFP_NOIO); if (digest) { drbd_csum_ee(peer_device->connection->verify_tfm, peer_req, digest); diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index f2b6f4da1034..a8cfa011c284 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -252,13 +252,13 @@ static int allowed_drive_mask = 0x33; static int irqdma_allocated; -#include <linux/blkdev.h> +#include <linux/blk-mq.h> #include <linux/blkpg.h> #include <linux/cdrom.h> /* for the compatibility eject ioctl */ #include <linux/completion.h> +static LIST_HEAD(floppy_reqs); static struct request *current_req; -static void do_fd_request(struct request_queue *q); static int set_next_request(void); #ifndef fd_get_dma_residue @@ -414,10 +414,10 @@ static struct floppy_drive_struct drive_state[N_DRIVE]; static struct floppy_write_errors write_errors[N_DRIVE]; static struct timer_list motor_off_timer[N_DRIVE]; static struct gendisk *disks[N_DRIVE]; +static struct blk_mq_tag_set tag_sets[N_DRIVE]; static struct block_device *opened_bdev[N_DRIVE]; static DEFINE_MUTEX(open_lock); static struct floppy_raw_cmd *raw_cmd, default_raw_cmd; -static int fdc_queue; /* * This struct defines the different floppy types. @@ -2216,8 +2216,9 @@ static void floppy_end_request(struct request *req, blk_status_t error) /* current_count_sectors can be zero if transfer failed */ if (error) nr_sectors = blk_rq_cur_sectors(req); - if (__blk_end_request(req, error, nr_sectors << 9)) + if (blk_update_request(req, error, nr_sectors << 9)) return; + __blk_mq_end_request(req, error); /* We're done with the request */ floppy_off(drive); @@ -2797,27 +2798,14 @@ static int make_raw_rw_request(void) return 2; } -/* - * Round-robin between our available drives, doing one request from each - */ static int set_next_request(void) { - struct request_queue *q; - int old_pos = fdc_queue; - - do { - q = disks[fdc_queue]->queue; - if (++fdc_queue == N_DRIVE) - fdc_queue = 0; - if (q) { - current_req = blk_fetch_request(q); - if (current_req) { - current_req->error_count = 0; - break; - } - } - } while (fdc_queue != old_pos); - + current_req = list_first_entry_or_null(&floppy_reqs, struct request, + queuelist); + if (current_req) { + current_req->error_count = 0; + list_del_init(¤t_req->queuelist); + } return current_req != NULL; } @@ -2901,29 +2889,38 @@ static void process_fd_request(void) schedule_bh(redo_fd_request); } -static void do_fd_request(struct request_queue *q) +static blk_status_t floppy_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) { + blk_mq_start_request(bd->rq); + if (WARN(max_buffer_sectors == 0, "VFS: %s called on non-open device\n", __func__)) - return; + return BLK_STS_IOERR; if (WARN(atomic_read(&usage_count) == 0, "warning: usage count=0, current_req=%p sect=%ld flags=%llx\n", current_req, (long)blk_rq_pos(current_req), (unsigned long long) current_req->cmd_flags)) - return; + return BLK_STS_IOERR; + + spin_lock_irq(&floppy_lock); + list_add_tail(&bd->rq->queuelist, &floppy_reqs); + spin_unlock_irq(&floppy_lock); if (test_and_set_bit(0, &fdc_busy)) { /* fdc busy, this new request will be treated when the current one is done */ is_alive(__func__, "old request running"); - return; + return BLK_STS_OK; } + command_status = FD_COMMAND_NONE; __reschedule_timeout(MAXTIMEOUT, "fd_request"); set_fdc(0); process_fd_request(); is_alive(__func__, ""); + return BLK_STS_OK; } static const struct cont_t poll_cont = { @@ -4486,6 +4483,10 @@ static struct platform_driver floppy_driver = { }, }; +static const struct blk_mq_ops floppy_mq_ops = { + .queue_rq = floppy_queue_rq, +}; + static struct platform_device floppy_device[N_DRIVE]; static bool floppy_available(int drive) @@ -4533,9 +4534,12 @@ static int __init do_floppy_init(void) goto out_put_disk; } - disks[drive]->queue = blk_init_queue(do_fd_request, &floppy_lock); - if (!disks[drive]->queue) { - err = -ENOMEM; + disks[drive]->queue = blk_mq_init_sq_queue(&tag_sets[drive], + &floppy_mq_ops, 2, + BLK_MQ_F_SHOULD_MERGE); + if (IS_ERR(disks[drive]->queue)) { + err = PTR_ERR(disks[drive]->queue); + disks[drive]->queue = NULL; goto out_put_disk; } @@ -4679,7 +4683,7 @@ static int __init do_floppy_init(void) /* to be cleaned up... */ disks[drive]->private_data = (void *)(long)drive; disks[drive]->flags |= GENHD_FL_REMOVABLE; - device_add_disk(&floppy_device[drive].dev, disks[drive]); + device_add_disk(&floppy_device[drive].dev, disks[drive], NULL); } return 0; @@ -4708,6 +4712,7 @@ out_put_disk: del_timer_sync(&motor_off_timer[drive]); blk_cleanup_queue(disks[drive]->queue); disks[drive]->queue = NULL; + blk_mq_free_tag_set(&tag_sets[drive]); } put_disk(disks[drive]); } @@ -4935,6 +4940,7 @@ static void __exit floppy_module_exit(void) platform_device_unregister(&floppy_device[drive]); } blk_cleanup_queue(disks[drive]->queue); + blk_mq_free_tag_set(&tag_sets[drive]); /* * These disks have not called add_disk(). Don't put down diff --git a/drivers/block/loop.c b/drivers/block/loop.c index ea9debf59b22..abad6d15f956 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -77,6 +77,7 @@ #include <linux/falloc.h> #include <linux/uio.h> #include <linux/ioprio.h> +#include <linux/blk-cgroup.h> #include "loop.h" @@ -1760,8 +1761,8 @@ static blk_status_t loop_queue_rq(struct blk_mq_hw_ctx *hctx, /* always use the first bio's css */ #ifdef CONFIG_BLK_CGROUP - if (cmd->use_aio && rq->bio && rq->bio->bi_css) { - cmd->css = rq->bio->bi_css; + if (cmd->use_aio && rq->bio && rq->bio->bi_blkg) { + cmd->css = &bio_blkcg(rq->bio)->css; css_get(cmd->css); } else #endif diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index d0666f5ce003..dfc8de6ce525 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -1862,11 +1862,9 @@ static int exec_drive_taskfile(struct driver_data *dd, if (IS_ERR(outbuf)) return PTR_ERR(outbuf); - outbuf_dma = pci_map_single(dd->pdev, - outbuf, - taskout, - DMA_TO_DEVICE); - if (pci_dma_mapping_error(dd->pdev, outbuf_dma)) { + outbuf_dma = dma_map_single(&dd->pdev->dev, outbuf, + taskout, DMA_TO_DEVICE); + if (dma_mapping_error(&dd->pdev->dev, outbuf_dma)) { err = -ENOMEM; goto abort; } @@ -1880,10 +1878,9 @@ static int exec_drive_taskfile(struct driver_data *dd, inbuf = NULL; goto abort; } - inbuf_dma = pci_map_single(dd->pdev, - inbuf, - taskin, DMA_FROM_DEVICE); - if (pci_dma_mapping_error(dd->pdev, inbuf_dma)) { + inbuf_dma = dma_map_single(&dd->pdev->dev, inbuf, + taskin, DMA_FROM_DEVICE); + if (dma_mapping_error(&dd->pdev->dev, inbuf_dma)) { err = -ENOMEM; goto abort; } @@ -2002,11 +1999,11 @@ static int exec_drive_taskfile(struct driver_data *dd, /* reclaim the DMA buffers.*/ if (inbuf_dma) - pci_unmap_single(dd->pdev, inbuf_dma, - taskin, DMA_FROM_DEVICE); + dma_unmap_single(&dd->pdev->dev, inbuf_dma, taskin, + DMA_FROM_DEVICE); if (outbuf_dma) - pci_unmap_single(dd->pdev, outbuf_dma, - taskout, DMA_TO_DEVICE); + dma_unmap_single(&dd->pdev->dev, outbuf_dma, taskout, + DMA_TO_DEVICE); inbuf_dma = 0; outbuf_dma = 0; @@ -2053,11 +2050,11 @@ static int exec_drive_taskfile(struct driver_data *dd, } abort: if (inbuf_dma) - pci_unmap_single(dd->pdev, inbuf_dma, - taskin, DMA_FROM_DEVICE); + dma_unmap_single(&dd->pdev->dev, inbuf_dma, taskin, + DMA_FROM_DEVICE); if (outbuf_dma) - pci_unmap_single(dd->pdev, outbuf_dma, - taskout, DMA_TO_DEVICE); + dma_unmap_single(&dd->pdev->dev, outbuf_dma, taskout, + DMA_TO_DEVICE); kfree(outbuf); kfree(inbuf); @@ -3861,7 +3858,7 @@ skip_create_disk: set_capacity(dd->disk, capacity); /* Enable the block device and add it to /dev */ - device_add_disk(&dd->pdev->dev, dd->disk); + device_add_disk(&dd->pdev->dev, dd->disk, NULL); dd->bdev = bdget_disk(dd->disk, 0); /* @@ -4216,18 +4213,10 @@ static int mtip_pci_probe(struct pci_dev *pdev, goto iomap_err; } - if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) { - rv = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); - - if (rv) { - rv = pci_set_consistent_dma_mask(pdev, - DMA_BIT_MASK(32)); - if (rv) { - dev_warn(&pdev->dev, - "64-bit DMA enable failed\n"); - goto setmask_err; - } - } + rv = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + if (rv) { + dev_warn(&pdev->dev, "64-bit DMA enable failed\n"); + goto setmask_err; } /* Copy the info we may need later into the private data structure. */ diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk_main.c index 093b614d6524..e94591021682 100644 --- a/drivers/block/null_blk_main.c +++ b/drivers/block/null_blk_main.c @@ -606,20 +606,12 @@ static struct nullb_cmd *alloc_cmd(struct nullb_queue *nq, int can_wait) static void end_cmd(struct nullb_cmd *cmd) { - struct request_queue *q = NULL; int queue_mode = cmd->nq->dev->queue_mode; - if (cmd->rq) - q = cmd->rq->q; - switch (queue_mode) { case NULL_Q_MQ: blk_mq_end_request(cmd->rq, cmd->error); return; - case NULL_Q_RQ: - INIT_LIST_HEAD(&cmd->rq->queuelist); - blk_end_request_all(cmd->rq, cmd->error); - break; case NULL_Q_BIO: cmd->bio->bi_status = cmd->error; bio_endio(cmd->bio); @@ -627,15 +619,6 @@ static void end_cmd(struct nullb_cmd *cmd) } free_cmd(cmd); - - /* Restart queue if needed, as we are freeing a tag */ - if (queue_mode == NULL_Q_RQ && blk_queue_stopped(q)) { - unsigned long flags; - - spin_lock_irqsave(q->queue_lock, flags); - blk_start_queue_async(q); - spin_unlock_irqrestore(q->queue_lock, flags); - } } static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer) @@ -1136,25 +1119,14 @@ static void null_stop_queue(struct nullb *nullb) if (nullb->dev->queue_mode == NULL_Q_MQ) blk_mq_stop_hw_queues(q); - else { - spin_lock_irq(q->queue_lock); - blk_stop_queue(q); - spin_unlock_irq(q->queue_lock); - } } static void null_restart_queue_async(struct nullb *nullb) { struct request_queue *q = nullb->q; - unsigned long flags; if (nullb->dev->queue_mode == NULL_Q_MQ) blk_mq_start_stopped_hw_queues(q, true); - else { - spin_lock_irqsave(q->queue_lock, flags); - blk_start_queue_async(q); - spin_unlock_irqrestore(q->queue_lock, flags); - } } static bool cmd_report_zone(struct nullb *nullb, struct nullb_cmd *cmd) @@ -1197,17 +1169,8 @@ static blk_status_t null_handle_cmd(struct nullb_cmd *cmd) /* race with timer */ if (atomic_long_read(&nullb->cur_bytes) > 0) null_restart_queue_async(nullb); - if (dev->queue_mode == NULL_Q_RQ) { - struct request_queue *q = nullb->q; - - spin_lock_irq(q->queue_lock); - rq->rq_flags |= RQF_DONTPREP; - blk_requeue_request(q, rq); - spin_unlock_irq(q->queue_lock); - return BLK_STS_OK; - } else - /* requeue request */ - return BLK_STS_DEV_RESOURCE; + /* requeue request */ + return BLK_STS_DEV_RESOURCE; } } @@ -1278,9 +1241,6 @@ out: case NULL_Q_MQ: blk_mq_complete_request(cmd->rq); break; - case NULL_Q_RQ: - blk_complete_request(cmd->rq); - break; case NULL_Q_BIO: /* * XXX: no proper submitting cpu information available. @@ -1349,30 +1309,6 @@ static blk_qc_t null_queue_bio(struct request_queue *q, struct bio *bio) return BLK_QC_T_NONE; } -static enum blk_eh_timer_return null_rq_timed_out_fn(struct request *rq) -{ - pr_info("null: rq %p timed out\n", rq); - __blk_complete_request(rq); - return BLK_EH_DONE; -} - -static int null_rq_prep_fn(struct request_queue *q, struct request *req) -{ - struct nullb *nullb = q->queuedata; - struct nullb_queue *nq = nullb_to_queue(nullb); - struct nullb_cmd *cmd; - - cmd = alloc_cmd(nq, 0); - if (cmd) { - cmd->rq = req; - req->special = cmd; - return BLKPREP_OK; - } - blk_stop_queue(q); - - return BLKPREP_DEFER; -} - static bool should_timeout_request(struct request *rq) { #ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION @@ -1391,27 +1327,6 @@ static bool should_requeue_request(struct request *rq) return false; } -static void null_request_fn(struct request_queue *q) -{ - struct request *rq; - - while ((rq = blk_fetch_request(q)) != NULL) { - struct nullb_cmd *cmd = rq->special; - - /* just ignore the request */ - if (should_timeout_request(rq)) - continue; - if (should_requeue_request(rq)) { - blk_requeue_request(q, rq); - continue; - } - - spin_unlock_irq(q->queue_lock); - null_handle_cmd(cmd); - spin_lock_irq(q->queue_lock); - } -} - static enum blk_eh_timer_return null_timeout_rq(struct request *rq, bool res) { pr_info("null: rq %p timed out\n", rq); @@ -1766,24 +1681,6 @@ static int null_add_dev(struct nullb_device *dev) rv = init_driver_queues(nullb); if (rv) goto out_cleanup_blk_queue; - } else { - nullb->q = blk_init_queue_node(null_request_fn, &nullb->lock, - dev->home_node); - if (!nullb->q) { - rv = -ENOMEM; - goto out_cleanup_queues; - } - - if (!null_setup_fault()) - goto out_cleanup_blk_queue; - - blk_queue_prep_rq(nullb->q, null_rq_prep_fn); - blk_queue_softirq_done(nullb->q, null_softirq_done_fn); - blk_queue_rq_timed_out(nullb->q, null_rq_timed_out_fn); - nullb->q->rq_timeout = 5 * HZ; - rv = init_driver_queues(nullb); - if (rv) - goto out_cleanup_blk_queue; } if (dev->mbps) { @@ -1865,6 +1762,10 @@ static int __init null_init(void) return -EINVAL; } + if (g_queue_mode == NULL_Q_RQ) { + pr_err("null_blk: legacy IO path no longer available\n"); + return -EINVAL; + } if (g_queue_mode == NULL_Q_MQ && g_use_per_node_hctx) { if (g_submit_queues != nr_online_nodes) { pr_warn("null_blk: submit_queues param is set to %u.\n", diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c index a026211afb51..96670eefaeb2 100644 --- a/drivers/block/paride/pcd.c +++ b/drivers/block/paride/pcd.c @@ -137,7 +137,7 @@ enum {D_PRT, D_PRO, D_UNI, D_MOD, D_SLV, D_DLY}; #include <linux/delay.h> #include <linux/cdrom.h> #include <linux/spinlock.h> -#include <linux/blkdev.h> +#include <linux/blk-mq.h> #include <linux/mutex.h> #include <linux/uaccess.h> @@ -186,7 +186,8 @@ static int pcd_packet(struct cdrom_device_info *cdi, static int pcd_detect(void); static void pcd_probe_capabilities(void); static void do_pcd_read_drq(void); -static void do_pcd_request(struct request_queue * q); +static blk_status_t pcd_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd); static void do_pcd_read(void); struct pcd_unit { @@ -199,6 +200,8 @@ struct pcd_unit { char *name; /* pcd0, pcd1, etc */ struct cdrom_device_info info; /* uniform cdrom interface */ struct gendisk *disk; + struct blk_mq_tag_set tag_set; + struct list_head rq_list; }; static struct pcd_unit pcd[PCD_UNITS]; @@ -292,6 +295,10 @@ static const struct cdrom_device_ops pcd_dops = { CDC_CD_RW, }; +static const struct blk_mq_ops pcd_mq_ops = { + .queue_rq = pcd_queue_rq, +}; + static void pcd_init_units(void) { struct pcd_unit *cd; @@ -300,13 +307,19 @@ static void pcd_init_units(void) pcd_drive_count = 0; for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) { struct gendisk *disk = alloc_disk(1); + if (!disk) continue; - disk->queue = blk_init_queue(do_pcd_request, &pcd_lock); - if (!disk->queue) { - put_disk(disk); + + disk->queue = blk_mq_init_sq_queue(&cd->tag_set, &pcd_mq_ops, + 1, BLK_MQ_F_SHOULD_MERGE); + if (IS_ERR(disk->queue)) { + disk->queue = NULL; continue; } + + INIT_LIST_HEAD(&cd->rq_list); + disk->queue->queuedata = cd; blk_queue_bounce_limit(disk->queue, BLK_BOUNCE_HIGH); cd->disk = disk; cd->pi = &cd->pia; @@ -748,18 +761,18 @@ static int pcd_queue; static int set_next_request(void) { struct pcd_unit *cd; - struct request_queue *q; int old_pos = pcd_queue; do { cd = &pcd[pcd_queue]; - q = cd->present ? cd->disk->queue : NULL; if (++pcd_queue == PCD_UNITS) pcd_queue = 0; - if (q) { - pcd_req = blk_fetch_request(q); - if (pcd_req) - break; + if (cd->present && !list_empty(&cd->rq_list)) { + pcd_req = list_first_entry(&cd->rq_list, struct request, + queuelist); + list_del_init(&pcd_req->queuelist); + blk_mq_start_request(pcd_req); + break; } } while (pcd_queue != old_pos); @@ -768,33 +781,41 @@ static int set_next_request(void) static void pcd_request(void) { + struct pcd_unit *cd; + if (pcd_busy) return; - while (1) { - if (!pcd_req && !set_next_request()) - return; - if (rq_data_dir(pcd_req) == READ) { - struct pcd_unit *cd = pcd_req->rq_disk->private_data; - if (cd != pcd_current) - pcd_bufblk = -1; - pcd_current = cd; - pcd_sector = blk_rq_pos(pcd_req); - pcd_count = blk_rq_cur_sectors(pcd_req); - pcd_buf = bio_data(pcd_req->bio); - pcd_busy = 1; - ps_set_intr(do_pcd_read, NULL, 0, nice); - return; - } else { - __blk_end_request_all(pcd_req, BLK_STS_IOERR); - pcd_req = NULL; - } - } + if (!pcd_req && !set_next_request()) + return; + + cd = pcd_req->rq_disk->private_data; + if (cd != pcd_current) + pcd_bufblk = -1; + pcd_current = cd; + pcd_sector = blk_rq_pos(pcd_req); + pcd_count = blk_rq_cur_sectors(pcd_req); + pcd_buf = bio_data(pcd_req->bio); + pcd_busy = 1; + ps_set_intr(do_pcd_read, NULL, 0, nice); } -static void do_pcd_request(struct request_queue *q) +static blk_status_t pcd_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) { + struct pcd_unit *cd = hctx->queue->queuedata; + + if (rq_data_dir(bd->rq) != READ) { + blk_mq_start_request(bd->rq); + return BLK_STS_IOERR; + } + + spin_lock_irq(&pcd_lock); + list_add_tail(&bd->rq->queuelist, &cd->rq_list); pcd_request(); + spin_unlock_irq(&pcd_lock); + + return BLK_STS_OK; } static inline void next_request(blk_status_t err) @@ -802,8 +823,10 @@ static inline void next_request(blk_status_t err) unsigned long saved_flags; spin_lock_irqsave(&pcd_lock, saved_flags); - if (!__blk_end_request_cur(pcd_req, err)) + if (!blk_update_request(pcd_req, err, blk_rq_cur_bytes(pcd_req))) { + __blk_mq_end_request(pcd_req, err); pcd_req = NULL; + } pcd_busy = 0; pcd_request(); spin_unlock_irqrestore(&pcd_lock, saved_flags); @@ -1011,6 +1034,7 @@ static void __exit pcd_exit(void) unregister_cdrom(&cd->info); } blk_cleanup_queue(cd->disk->queue); + blk_mq_free_tag_set(&cd->tag_set); put_disk(cd->disk); } unregister_blkdev(major, name); diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c index 7cf947586fe4..ae4971e5d9a8 100644 --- a/drivers/block/paride/pd.c +++ b/drivers/block/paride/pd.c @@ -151,7 +151,7 @@ enum {D_PRT, D_PRO, D_UNI, D_MOD, D_GEO, D_SBY, D_DLY, D_SLV}; #include <linux/delay.h> #include <linux/hdreg.h> #include <linux/cdrom.h> /* for the eject ioctl */ -#include <linux/blkdev.h> +#include <linux/blk-mq.h> #include <linux/blkpg.h> #include <linux/kernel.h> #include <linux/mutex.h> @@ -236,6 +236,8 @@ struct pd_unit { int alt_geom; char name[PD_NAMELEN]; /* pda, pdb, etc ... */ struct gendisk *gd; + struct blk_mq_tag_set tag_set; + struct list_head rq_list; }; static struct pd_unit pd[PD_UNITS]; @@ -399,9 +401,17 @@ static int set_next_request(void) if (++pd_queue == PD_UNITS) pd_queue = 0; if (q) { - pd_req = blk_fetch_request(q); - if (pd_req) - break; + struct pd_unit *disk = q->queuedata; + + if (list_empty(&disk->rq_list)) + continue; + + pd_req = list_first_entry(&disk->rq_list, + struct request, + queuelist); + list_del_init(&pd_req->queuelist); + blk_mq_start_request(pd_req); + break; } } while (pd_queue != old_pos); @@ -412,7 +422,6 @@ static void run_fsm(void) { while (1) { enum action res; - unsigned long saved_flags; int stop = 0; if (!phase) { @@ -433,19 +442,24 @@ static void run_fsm(void) } switch(res = phase()) { - case Ok: case Fail: + case Ok: case Fail: { + blk_status_t err; + + err = res == Ok ? 0 : BLK_STS_IOERR; pi_disconnect(pi_current); pd_claimed = 0; phase = NULL; - spin_lock_irqsave(&pd_lock, saved_flags); - if (!__blk_end_request_cur(pd_req, - res == Ok ? 0 : BLK_STS_IOERR)) { - if (!set_next_request()) - stop = 1; + spin_lock_irq(&pd_lock); + if (!blk_update_request(pd_req, err, + blk_rq_cur_bytes(pd_req))) { + __blk_mq_end_request(pd_req, err); + pd_req = NULL; + stop = !set_next_request(); } - spin_unlock_irqrestore(&pd_lock, saved_flags); + spin_unlock_irq(&pd_lock); if (stop) return; + } /* fall through */ case Hold: schedule_fsm(); @@ -505,11 +519,17 @@ static int pd_next_buf(void) if (pd_count) return 0; spin_lock_irqsave(&pd_lock, saved_flags); - __blk_end_request_cur(pd_req, 0); - pd_count = blk_rq_cur_sectors(pd_req); - pd_buf = bio_data(pd_req->bio); + if (!blk_update_request(pd_req, 0, blk_rq_cur_bytes(pd_req))) { + __blk_mq_end_request(pd_req, 0); + pd_req = NULL; + pd_count = 0; + pd_buf = NULL; + } else { + pd_count = blk_rq_cur_sectors(pd_req); + pd_buf = bio_data(pd_req->bio); + } spin_unlock_irqrestore(&pd_lock, saved_flags); - return 0; + return !pd_count; } static unsigned long pd_timeout; @@ -726,15 +746,21 @@ static enum action pd_identify(struct pd_unit *disk) /* end of io request engine */ -static void do_pd_request(struct request_queue * q) +static blk_status_t pd_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) { - if (pd_req) - return; - pd_req = blk_fetch_request(q); - if (!pd_req) - return; + struct pd_unit *disk = hctx->queue->queuedata; + + spin_lock_irq(&pd_lock); + if (!pd_req) { + pd_req = bd->rq; + blk_mq_start_request(pd_req); + } else + list_add_tail(&bd->rq->queuelist, &disk->rq_list); + spin_unlock_irq(&pd_lock); - schedule_fsm(); + run_fsm(); + return BLK_STS_OK; } static int pd_special_command(struct pd_unit *disk, @@ -847,23 +873,33 @@ static const struct block_device_operations pd_fops = { /* probing */ +static const struct blk_mq_ops pd_mq_ops = { + .queue_rq = pd_queue_rq, +}; + static void pd_probe_drive(struct pd_unit *disk) { - struct gendisk *p = alloc_disk(1 << PD_BITS); + struct gendisk *p; + + p = alloc_disk(1 << PD_BITS); if (!p) return; + strcpy(p->disk_name, disk->name); p->fops = &pd_fops; p->major = major; p->first_minor = (disk - pd) << PD_BITS; disk->gd = p; p->private_data = disk; - p->queue = blk_init_queue(do_pd_request, &pd_lock); - if (!p->queue) { - disk->gd = NULL; - put_disk(p); + + p->queue = blk_mq_init_sq_queue(&disk->tag_set, &pd_mq_ops, 2, + BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING); + if (IS_ERR(p->queue)) { + p->queue = NULL; return; } + + p->queue->queuedata = disk; blk_queue_max_hw_sectors(p->queue, cluster); blk_queue_bounce_limit(p->queue, BLK_BOUNCE_HIGH); @@ -895,6 +931,7 @@ static int pd_detect(void) disk->standby = parm[D_SBY]; if (parm[D_PRT]) pd_drive_count++; + INIT_LIST_HEAD(&disk->rq_list); } par_drv = pi_register_driver(name); @@ -972,6 +1009,7 @@ static void __exit pd_exit(void) disk->gd = NULL; del_gendisk(p); blk_cleanup_queue(p->queue); + blk_mq_free_tag_set(&disk->tag_set); put_disk(p); pi_release(disk->pi); } diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c index eef7a91f667d..e92e7a8eeeb2 100644 --- a/drivers/block/paride/pf.c +++ b/drivers/block/paride/pf.c @@ -152,7 +152,7 @@ enum {D_PRT, D_PRO, D_UNI, D_MOD, D_SLV, D_LUN, D_DLY}; #include <linux/hdreg.h> #include <linux/cdrom.h> #include <linux/spinlock.h> -#include <linux/blkdev.h> +#include <linux/blk-mq.h> #include <linux/blkpg.h> #include <linux/mutex.h> #include <linux/uaccess.h> @@ -206,7 +206,8 @@ module_param_array(drive3, int, NULL, 0); #define ATAPI_WRITE_10 0x2a static int pf_open(struct block_device *bdev, fmode_t mode); -static void do_pf_request(struct request_queue * q); +static blk_status_t pf_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd); static int pf_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg); static int pf_getgeo(struct block_device *bdev, struct hd_geometry *geo); @@ -238,6 +239,8 @@ struct pf_unit { int present; /* device present ? */ char name[PF_NAMELEN]; /* pf0, pf1, ... */ struct gendisk *disk; + struct blk_mq_tag_set tag_set; + struct list_head rq_list; }; static struct pf_unit units[PF_UNITS]; @@ -277,6 +280,10 @@ static const struct block_device_operations pf_fops = { .check_events = pf_check_events, }; +static const struct blk_mq_ops pf_mq_ops = { + .queue_rq = pf_queue_rq, +}; + static void __init pf_init_units(void) { struct pf_unit *pf; @@ -284,14 +291,22 @@ static void __init pf_init_units(void) pf_drive_count = 0; for (unit = 0, pf = units; unit < PF_UNITS; unit++, pf++) { - struct gendisk *disk = alloc_disk(1); + struct gendisk *disk; + + disk = alloc_disk(1); if (!disk) continue; - disk->queue = blk_init_queue(do_pf_request, &pf_spin_lock); - if (!disk->queue) { + + disk->queue = blk_mq_init_sq_queue(&pf->tag_set, &pf_mq_ops, + 1, BLK_MQ_F_SHOULD_MERGE); + if (IS_ERR(disk->queue)) { put_disk(disk); - return; + disk->queue = NULL; + continue; } + + INIT_LIST_HEAD(&pf->rq_list); + disk->queue->queuedata = pf; blk_queue_max_segments(disk->queue, cluster); blk_queue_bounce_limit(disk->queue, BLK_BOUNCE_HIGH); pf->disk = disk; @@ -784,18 +799,18 @@ static int pf_queue; static int set_next_request(void) { struct pf_unit *pf; - struct request_queue *q; int old_pos = pf_queue; do { pf = &units[pf_queue]; - q = pf->present ? pf->disk->queue : NULL; if (++pf_queue == PF_UNITS) pf_queue = 0; - if (q) { - pf_req = blk_fetch_request(q); - if (pf_req) - break; + if (pf->present && !list_empty(&pf->rq_list)) { + pf_req = list_first_entry(&pf->rq_list, struct request, + queuelist); + list_del_init(&pf_req->queuelist); + blk_mq_start_request(pf_req); + break; } } while (pf_queue != old_pos); @@ -804,8 +819,12 @@ static int set_next_request(void) static void pf_end_request(blk_status_t err) { - if (pf_req && !__blk_end_request_cur(pf_req, err)) + if (!pf_req) + return; + if (!blk_update_request(pf_req, err, blk_rq_cur_bytes(pf_req))) { + __blk_mq_end_request(pf_req, err); pf_req = NULL; + } } static void pf_request(void) @@ -842,9 +861,17 @@ repeat: } } -static void do_pf_request(struct request_queue *q) +static blk_status_t pf_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) { + struct pf_unit *pf = hctx->queue->queuedata; + + spin_lock_irq(&pf_spin_lock); + list_add_tail(&bd->rq->queuelist, &pf->rq_list); pf_request(); + spin_unlock_irq(&pf_spin_lock); + + return BLK_STS_OK; } static int pf_next_buf(void) @@ -1024,6 +1051,7 @@ static void __exit pf_exit(void) continue; del_gendisk(pf->disk); blk_cleanup_queue(pf->disk->queue); + blk_mq_free_tag_set(&pf->tag_set); put_disk(pf->disk); pi_release(pf->pi); } diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 6f1d25c1eb64..9381f4e3b221 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2645,7 +2645,7 @@ static int pkt_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, */ if (pd->refcnt == 1) pkt_lock_door(pd, 0); - /* fallthru */ + /* fall through */ /* * forward selected CDROM ioctls to CD-ROM, for UDF */ diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c index afe1508d82c6..4e1d9b31f60c 100644 --- a/drivers/block/ps3disk.c +++ b/drivers/block/ps3disk.c @@ -19,7 +19,7 @@ */ #include <linux/ata.h> -#include <linux/blkdev.h> +#include <linux/blk-mq.h> #include <linux/slab.h> #include <linux/module.h> @@ -42,6 +42,7 @@ struct ps3disk_private { spinlock_t lock; /* Request queue spinlock */ struct request_queue *queue; + struct blk_mq_tag_set tag_set; struct gendisk *gendisk; unsigned int blocking_factor; struct request *req; @@ -118,8 +119,8 @@ static void ps3disk_scatter_gather(struct ps3_storage_device *dev, } } -static int ps3disk_submit_request_sg(struct ps3_storage_device *dev, - struct request *req) +static blk_status_t ps3disk_submit_request_sg(struct ps3_storage_device *dev, + struct request *req) { struct ps3disk_private *priv = ps3_system_bus_get_drvdata(&dev->sbd); int write = rq_data_dir(req), res; @@ -158,16 +159,15 @@ static int ps3disk_submit_request_sg(struct ps3_storage_device *dev, if (res) { dev_err(&dev->sbd.core, "%s:%u: %s failed %d\n", __func__, __LINE__, op, res); - __blk_end_request_all(req, BLK_STS_IOERR); - return 0; + return BLK_STS_IOERR; } priv->req = req; - return 1; + return BLK_STS_OK; } -static int ps3disk_submit_flush_request(struct ps3_storage_device *dev, - struct request *req) +static blk_status_t ps3disk_submit_flush_request(struct ps3_storage_device *dev, + struct request *req) { struct ps3disk_private *priv = ps3_system_bus_get_drvdata(&dev->sbd); u64 res; @@ -180,50 +180,45 @@ static int ps3disk_submit_flush_request(struct ps3_storage_device *dev, if (res) { dev_err(&dev->sbd.core, "%s:%u: sync cache failed 0x%llx\n", __func__, __LINE__, res); - __blk_end_request_all(req, BLK_STS_IOERR); - return 0; + return BLK_STS_IOERR; } priv->req = req; - return 1; + return BLK_STS_OK; } -static void ps3disk_do_request(struct ps3_storage_device *dev, - struct request_queue *q) +static blk_status_t ps3disk_do_request(struct ps3_storage_device *dev, + struct request *req) { - struct request *req; - dev_dbg(&dev->sbd.core, "%s:%u\n", __func__, __LINE__); - while ((req = blk_fetch_request(q))) { - switch (req_op(req)) { - case REQ_OP_FLUSH: - if (ps3disk_submit_flush_request(dev, req)) - return; - break; - case REQ_OP_READ: - case REQ_OP_WRITE: - if (ps3disk_submit_request_sg(dev, req)) - return; - break; - default: - blk_dump_rq_flags(req, DEVICE_NAME " bad request"); - __blk_end_request_all(req, BLK_STS_IOERR); - } + switch (req_op(req)) { + case REQ_OP_FLUSH: + return ps3disk_submit_flush_request(dev, req); + case REQ_OP_READ: + case REQ_OP_WRITE: + return ps3disk_submit_request_sg(dev, req); + default: + blk_dump_rq_flags(req, DEVICE_NAME " bad request"); + return BLK_STS_IOERR; } } -static void ps3disk_request(struct request_queue *q) +static blk_status_t ps3disk_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) { + struct request_queue *q = hctx->queue; struct ps3_storage_device *dev = q->queuedata; struct ps3disk_private *priv = ps3_system_bus_get_drvdata(&dev->sbd); + blk_status_t ret; - if (priv->req) { - dev_dbg(&dev->sbd.core, "%s:%u busy\n", __func__, __LINE__); - return; - } + blk_mq_start_request(bd->rq); + + spin_lock_irq(&priv->lock); + ret = ps3disk_do_request(dev, bd->rq); + spin_unlock_irq(&priv->lock); - ps3disk_do_request(dev, q); + return ret; } static irqreturn_t ps3disk_interrupt(int irq, void *data) @@ -280,11 +275,11 @@ static irqreturn_t ps3disk_interrupt(int irq, void *data) } spin_lock(&priv->lock); - __blk_end_request_all(req, error); priv->req = NULL; - ps3disk_do_request(dev, priv->queue); + blk_mq_end_request(req, error); spin_unlock(&priv->lock); + blk_mq_run_hw_queues(priv->queue, true); return IRQ_HANDLED; } @@ -404,6 +399,10 @@ static unsigned long ps3disk_mask; static DEFINE_MUTEX(ps3disk_mask_mutex); +static const struct blk_mq_ops ps3disk_mq_ops = { + .queue_rq = ps3disk_queue_rq, +}; + static int ps3disk_probe(struct ps3_system_bus_device *_dev) { struct ps3_storage_device *dev = to_ps3_storage_device(&_dev->core); @@ -454,11 +453,12 @@ static int ps3disk_probe(struct ps3_system_bus_device *_dev) ps3disk_identify(dev); - queue = blk_init_queue(ps3disk_request, &priv->lock); - if (!queue) { - dev_err(&dev->sbd.core, "%s:%u: blk_init_queue failed\n", + queue = blk_mq_init_sq_queue(&priv->tag_set, &ps3disk_mq_ops, 1, + BLK_MQ_F_SHOULD_MERGE); + if (IS_ERR(queue)) { + dev_err(&dev->sbd.core, "%s:%u: blk_mq_init_queue failed\n", __func__, __LINE__); - error = -ENOMEM; + error = PTR_ERR(queue); goto fail_teardown; } @@ -500,11 +500,12 @@ static int ps3disk_probe(struct ps3_system_bus_device *_dev) gendisk->disk_name, priv->model, priv->raw_capacity >> 11, get_capacity(gendisk) >> 11); - device_add_disk(&dev->sbd.core, gendisk); + device_add_disk(&dev->sbd.core, gendisk, NULL); return 0; fail_cleanup_queue: blk_cleanup_queue(queue); + blk_mq_free_tag_set(&priv->tag_set); fail_teardown: ps3stor_teardown(dev); fail_free_bounce: @@ -530,6 +531,7 @@ static int ps3disk_remove(struct ps3_system_bus_device *_dev) mutex_unlock(&ps3disk_mask_mutex); del_gendisk(priv->gendisk); blk_cleanup_queue(priv->queue); + blk_mq_free_tag_set(&priv->tag_set); put_disk(priv->gendisk); dev_notice(&dev->sbd.core, "Synchronizing disk cache\n"); ps3disk_sync_cache(dev); diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c index 1e3d5de9d838..c0c50816a10b 100644 --- a/drivers/block/ps3vram.c +++ b/drivers/block/ps3vram.c @@ -769,7 +769,7 @@ static int ps3vram_probe(struct ps3_system_bus_device *dev) dev_info(&dev->core, "%s: Using %lu MiB of GPU memory\n", gendisk->disk_name, get_capacity(gendisk) >> 11); - device_add_disk(&dev->core, gendisk); + device_add_disk(&dev->core, gendisk, NULL); return 0; fail_cleanup_queue: diff --git a/drivers/block/rsxx/core.c b/drivers/block/rsxx/core.c index f2c631ce793c..639051502181 100644 --- a/drivers/block/rsxx/core.c +++ b/drivers/block/rsxx/core.c @@ -782,7 +782,7 @@ static int rsxx_pci_probe(struct pci_dev *dev, pci_set_master(dev); pci_set_dma_max_seg_size(dev, RSXX_HW_BLK_SIZE); - st = pci_set_dma_mask(dev, DMA_BIT_MASK(64)); + st = dma_set_mask(&dev->dev, DMA_BIT_MASK(64)); if (st) { dev_err(CARD_TO_DEV(card), "No usable DMA configuration,aborting\n"); diff --git a/drivers/block/rsxx/cregs.c b/drivers/block/rsxx/cregs.c index c148e83e4ed7..d9a8758682c9 100644 --- a/drivers/block/rsxx/cregs.c +++ b/drivers/block/rsxx/cregs.c @@ -276,7 +276,7 @@ static void creg_cmd_done(struct work_struct *work) st = -EIO; } - if ((cmd->op == CREG_OP_READ)) { + if (cmd->op == CREG_OP_READ) { unsigned int cnt8 = ioread32(card->regmap + CREG_CNT); /* Paranoid Sanity Checks */ diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c index 1a92f9e65937..3894aa0f350b 100644 --- a/drivers/block/rsxx/dev.c +++ b/drivers/block/rsxx/dev.c @@ -226,7 +226,7 @@ int rsxx_attach_dev(struct rsxx_cardinfo *card) set_capacity(card->gendisk, card->size8 >> 9); else set_capacity(card->gendisk, 0); - device_add_disk(CARD_TO_DEV(card), card->gendisk); + device_add_disk(CARD_TO_DEV(card), card->gendisk, NULL); card->bdev_attached = 1; } diff --git a/drivers/block/rsxx/dma.c b/drivers/block/rsxx/dma.c index 8fbc1bf6db3d..af9cf0215164 100644 --- a/drivers/block/rsxx/dma.c +++ b/drivers/block/rsxx/dma.c @@ -224,12 +224,12 @@ static void dma_intr_coal_auto_tune(struct rsxx_cardinfo *card) static void rsxx_free_dma(struct rsxx_dma_ctrl *ctrl, struct rsxx_dma *dma) { if (dma->cmd != HW_CMD_BLK_DISCARD) { - if (!pci_dma_mapping_error(ctrl->card->dev, dma->dma_addr)) { - pci_unmap_page(ctrl->card->dev, dma->dma_addr, + if (!dma_mapping_error(&ctrl->card->dev->dev, dma->dma_addr)) { + dma_unmap_page(&ctrl->card->dev->dev, dma->dma_addr, get_dma_size(dma), dma->cmd == HW_CMD_BLK_WRITE ? - PCI_DMA_TODEVICE : - PCI_DMA_FROMDEVICE); + DMA_TO_DEVICE : + DMA_FROM_DEVICE); } } @@ -438,23 +438,23 @@ static void rsxx_issue_dmas(struct rsxx_dma_ctrl *ctrl) if (dma->cmd != HW_CMD_BLK_DISCARD) { if (dma->cmd == HW_CMD_BLK_WRITE) - dir = PCI_DMA_TODEVICE; + dir = DMA_TO_DEVICE; else - dir = PCI_DMA_FROMDEVICE; + dir = DMA_FROM_DEVICE; /* - * The function pci_map_page is placed here because we + * The function dma_map_page is placed here because we * can only, by design, issue up to 255 commands to the * hardware at one time per DMA channel. So the maximum * amount of mapped memory would be 255 * 4 channels * * 4096 Bytes which is less than 2GB, the limit of a x8 - * Non-HWWD PCIe slot. This way the pci_map_page + * Non-HWWD PCIe slot. This way the dma_map_page * function should never fail because of a lack of * mappable memory. */ - dma->dma_addr = pci_map_page(ctrl->card->dev, dma->page, + dma->dma_addr = dma_map_page(&ctrl->card->dev->dev, dma->page, dma->pg_off, dma->sub_page.cnt << 9, dir); - if (pci_dma_mapping_error(ctrl->card->dev, dma->dma_addr)) { + if (dma_mapping_error(&ctrl->card->dev->dev, dma->dma_addr)) { push_tracker(ctrl->trackers, tag); rsxx_complete_dma(ctrl, dma, DMA_CANCELLED); continue; @@ -776,10 +776,10 @@ bvec_err: /*----------------- DMA Engine Initialization & Setup -------------------*/ int rsxx_hw_buffers_init(struct pci_dev *dev, struct rsxx_dma_ctrl *ctrl) { - ctrl->status.buf = pci_alloc_consistent(dev, STATUS_BUFFER_SIZE8, - &ctrl->status.dma_addr); - ctrl->cmd.buf = pci_alloc_consistent(dev, COMMAND_BUFFER_SIZE8, - &ctrl->cmd.dma_addr); + ctrl->status.buf = dma_alloc_coherent(&dev->dev, STATUS_BUFFER_SIZE8, + &ctrl->status.dma_addr, GFP_KERNEL); + ctrl->cmd.buf = dma_alloc_coherent(&dev->dev, COMMAND_BUFFER_SIZE8, + &ctrl->cmd.dma_addr, GFP_KERNEL); if (ctrl->status.buf == NULL || ctrl->cmd.buf == NULL) return -ENOMEM; @@ -962,12 +962,12 @@ failed_dma_setup: vfree(ctrl->trackers); if (ctrl->status.buf) - pci_free_consistent(card->dev, STATUS_BUFFER_SIZE8, - ctrl->status.buf, - ctrl->status.dma_addr); + dma_free_coherent(&card->dev->dev, STATUS_BUFFER_SIZE8, + ctrl->status.buf, + ctrl->status.dma_addr); if (ctrl->cmd.buf) - pci_free_consistent(card->dev, COMMAND_BUFFER_SIZE8, - ctrl->cmd.buf, ctrl->cmd.dma_addr); + dma_free_coherent(&card->dev->dev, COMMAND_BUFFER_SIZE8, + ctrl->cmd.buf, ctrl->cmd.dma_addr); } return st; @@ -1023,10 +1023,10 @@ void rsxx_dma_destroy(struct rsxx_cardinfo *card) vfree(ctrl->trackers); - pci_free_consistent(card->dev, STATUS_BUFFER_SIZE8, - ctrl->status.buf, ctrl->status.dma_addr); - pci_free_consistent(card->dev, COMMAND_BUFFER_SIZE8, - ctrl->cmd.buf, ctrl->cmd.dma_addr); + dma_free_coherent(&card->dev->dev, STATUS_BUFFER_SIZE8, + ctrl->status.buf, ctrl->status.dma_addr); + dma_free_coherent(&card->dev->dev, COMMAND_BUFFER_SIZE8, + ctrl->cmd.buf, ctrl->cmd.dma_addr); } } @@ -1059,11 +1059,11 @@ int rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card) card->ctrl[i].stats.reads_issued--; if (dma->cmd != HW_CMD_BLK_DISCARD) { - pci_unmap_page(card->dev, dma->dma_addr, + dma_unmap_page(&card->dev->dev, dma->dma_addr, get_dma_size(dma), dma->cmd == HW_CMD_BLK_WRITE ? - PCI_DMA_TODEVICE : - PCI_DMA_FROMDEVICE); + DMA_TO_DEVICE : + DMA_FROM_DEVICE); } list_add_tail(&dma->list, &issued_dmas[i]); diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c index 87b9e7fbf062..7c5fc6942f32 100644 --- a/drivers/block/skd_main.c +++ b/drivers/block/skd_main.c @@ -632,7 +632,7 @@ static bool skd_preop_sg_list(struct skd_device *skdev, * Map scatterlist to PCI bus addresses. * Note PCI might change the number of entries. */ - n_sg = pci_map_sg(skdev->pdev, sgl, n_sg, skreq->data_dir); + n_sg = dma_map_sg(&skdev->pdev->dev, sgl, n_sg, skreq->data_dir); if (n_sg <= 0) return false; @@ -682,7 +682,8 @@ static void skd_postop_sg_list(struct skd_device *skdev, skreq->sksg_list[skreq->n_sg - 1].next_desc_ptr = skreq->sksg_dma_address + ((skreq->n_sg) * sizeof(struct fit_sg_descriptor)); - pci_unmap_sg(skdev->pdev, &skreq->sg[0], skreq->n_sg, skreq->data_dir); + dma_unmap_sg(&skdev->pdev->dev, &skreq->sg[0], skreq->n_sg, + skreq->data_dir); } /* @@ -1416,7 +1417,7 @@ static void skd_resolve_req_exception(struct skd_device *skdev, case SKD_CHECK_STATUS_BUSY_IMMINENT: skd_log_skreq(skdev, skreq, "retry(busy)"); - blk_requeue_request(skdev->queue, req); + blk_mq_requeue_request(req, true); dev_info(&skdev->pdev->dev, "drive BUSY imminent\n"); skdev->state = SKD_DRVR_STATE_BUSY_IMMINENT; skdev->timer_countdown = SKD_TIMER_MINUTES(20); @@ -1426,7 +1427,7 @@ static void skd_resolve_req_exception(struct skd_device *skdev, case SKD_CHECK_STATUS_REQUEUE_REQUEST: if ((unsigned long) ++req->special < SKD_MAX_RETRIES) { skd_log_skreq(skdev, skreq, "retry"); - blk_requeue_request(skdev->queue, req); + blk_mq_requeue_request(req, true); break; } /* fall through */ @@ -2632,8 +2633,8 @@ static int skd_cons_skcomp(struct skd_device *skdev) "comp pci_alloc, total bytes %zd entries %d\n", SKD_SKCOMP_SIZE, SKD_N_COMPLETION_ENTRY); - skcomp = pci_zalloc_consistent(skdev->pdev, SKD_SKCOMP_SIZE, - &skdev->cq_dma_address); + skcomp = dma_zalloc_coherent(&skdev->pdev->dev, SKD_SKCOMP_SIZE, + &skdev->cq_dma_address, GFP_KERNEL); if (skcomp == NULL) { rc = -ENOMEM; @@ -2674,10 +2675,10 @@ static int skd_cons_skmsg(struct skd_device *skdev) skmsg->id = i + SKD_ID_FIT_MSG; - skmsg->msg_buf = pci_alloc_consistent(skdev->pdev, - SKD_N_FITMSG_BYTES, - &skmsg->mb_dma_address); - + skmsg->msg_buf = dma_alloc_coherent(&skdev->pdev->dev, + SKD_N_FITMSG_BYTES, + &skmsg->mb_dma_address, + GFP_KERNEL); if (skmsg->msg_buf == NULL) { rc = -ENOMEM; goto err_out; @@ -2971,8 +2972,8 @@ err_out: static void skd_free_skcomp(struct skd_device *skdev) { if (skdev->skcomp_table) - pci_free_consistent(skdev->pdev, SKD_SKCOMP_SIZE, - skdev->skcomp_table, skdev->cq_dma_address); + dma_free_coherent(&skdev->pdev->dev, SKD_SKCOMP_SIZE, + skdev->skcomp_table, skdev->cq_dma_address); skdev->skcomp_table = NULL; skdev->cq_dma_address = 0; @@ -2991,8 +2992,8 @@ static void skd_free_skmsg(struct skd_device *skdev) skmsg = &skdev->skmsg_table[i]; if (skmsg->msg_buf != NULL) { - pci_free_consistent(skdev->pdev, SKD_N_FITMSG_BYTES, - skmsg->msg_buf, + dma_free_coherent(&skdev->pdev->dev, SKD_N_FITMSG_BYTES, + skmsg->msg_buf, skmsg->mb_dma_address); } skmsg->msg_buf = NULL; @@ -3104,7 +3105,7 @@ static int skd_bdev_getgeo(struct block_device *bdev, struct hd_geometry *geo) static int skd_bdev_attach(struct device *parent, struct skd_device *skdev) { dev_dbg(&skdev->pdev->dev, "add_disk\n"); - device_add_disk(parent, skdev->disk); + device_add_disk(parent, skdev->disk, NULL); return 0; } @@ -3172,18 +3173,12 @@ static int skd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) rc = pci_request_regions(pdev, DRV_NAME); if (rc) goto err_out; - rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); - if (!rc) { - if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64))) { - dev_err(&pdev->dev, "consistent DMA mask error %d\n", - rc); - } - } else { - rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); - if (rc) { - dev_err(&pdev->dev, "DMA mask error %d\n", rc); - goto err_out_regions; - } + rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + if (rc) + dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); + if (rc) { + dev_err(&pdev->dev, "DMA mask error %d\n", rc); + goto err_out_regions; } if (!skd_major) { @@ -3367,20 +3362,12 @@ static int skd_pci_resume(struct pci_dev *pdev) rc = pci_request_regions(pdev, DRV_NAME); if (rc) goto err_out; - rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); - if (!rc) { - if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64))) { - - dev_err(&pdev->dev, "consistent DMA mask error %d\n", - rc); - } - } else { - rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); - if (rc) { - - dev_err(&pdev->dev, "DMA mask error %d\n", rc); - goto err_out_regions; - } + rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + if (rc) + dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); + if (rc) { + dev_err(&pdev->dev, "DMA mask error %d\n", rc); + goto err_out_regions; } pci_set_master(pdev); diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c index 5ca56bfae63c..b54fa6726303 100644 --- a/drivers/block/sunvdc.c +++ b/drivers/block/sunvdc.c @@ -36,6 +36,10 @@ MODULE_VERSION(DRV_MODULE_VERSION); #define VDC_TX_RING_SIZE 512 #define VDC_DEFAULT_BLK_SIZE 512 +#define MAX_XFER_BLKS (128 * 1024) +#define MAX_XFER_SIZE (MAX_XFER_BLKS / VDC_DEFAULT_BLK_SIZE) +#define MAX_RING_COOKIES ((MAX_XFER_BLKS / PAGE_SIZE) + 2) + #define WAITING_FOR_LINK_UP 0x01 #define WAITING_FOR_TX_SPACE 0x02 #define WAITING_FOR_GEN_CMD 0x04 @@ -450,7 +454,7 @@ static int __send_request(struct request *req) { struct vdc_port *port = req->rq_disk->private_data; struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING]; - struct scatterlist sg[port->ring_cookies]; + struct scatterlist sg[MAX_RING_COOKIES]; struct vdc_req_entry *rqe; struct vio_disk_desc *desc; unsigned int map_perm; @@ -458,6 +462,9 @@ static int __send_request(struct request *req) u64 len; u8 op; + if (WARN_ON(port->ring_cookies > MAX_RING_COOKIES)) + return -EINVAL; + map_perm = LDC_MAP_SHADOW | LDC_MAP_DIRECT | LDC_MAP_IO; if (rq_data_dir(req) == READ) { @@ -850,7 +857,7 @@ static int probe_disk(struct vdc_port *port) port->vdisk_size, (port->vdisk_size >> (20 - 9)), port->vio.ver.major, port->vio.ver.minor); - device_add_disk(&port->vio.vdev->dev, g); + device_add_disk(&port->vio.vdev->dev, g, NULL); return 0; } @@ -984,9 +991,8 @@ static int vdc_port_probe(struct vio_dev *vdev, const struct vio_device_id *id) goto err_out_free_port; port->vdisk_block_size = VDC_DEFAULT_BLK_SIZE; - port->max_xfer_size = ((128 * 1024) / port->vdisk_block_size); - port->ring_cookies = ((port->max_xfer_size * - port->vdisk_block_size) / PAGE_SIZE) + 2; + port->max_xfer_size = MAX_XFER_SIZE; + port->ring_cookies = MAX_RING_COOKIES; err = vio_ldc_alloc(&port->vio, &vdc_ldc_cfg, port); if (err) diff --git a/drivers/block/swim.c b/drivers/block/swim.c index 0e31884a9519..3fa6fcc34790 100644 --- a/drivers/block/swim.c +++ b/drivers/block/swim.c @@ -19,7 +19,7 @@ #include <linux/module.h> #include <linux/fd.h> #include <linux/slab.h> -#include <linux/blkdev.h> +#include <linux/blk-mq.h> #include <linux/mutex.h> #include <linux/hdreg.h> #include <linux/kernel.h> @@ -190,6 +190,7 @@ struct floppy_state { int ref_count; struct gendisk *disk; + struct blk_mq_tag_set tag_set; /* parent controller */ @@ -211,7 +212,6 @@ enum head { struct swim_priv { struct swim __iomem *base; spinlock_t lock; - int fdc_queue; int floppy_count; struct floppy_state unit[FD_MAX_UNIT]; }; @@ -525,58 +525,36 @@ static blk_status_t floppy_read_sectors(struct floppy_state *fs, return 0; } -static struct request *swim_next_request(struct swim_priv *swd) +static blk_status_t swim_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) { - struct request_queue *q; - struct request *rq; - int old_pos = swd->fdc_queue; + struct floppy_state *fs = hctx->queue->queuedata; + struct swim_priv *swd = fs->swd; + struct request *req = bd->rq; + blk_status_t err; - do { - q = swd->unit[swd->fdc_queue].disk->queue; - if (++swd->fdc_queue == swd->floppy_count) - swd->fdc_queue = 0; - if (q) { - rq = blk_fetch_request(q); - if (rq) - return rq; - } - } while (swd->fdc_queue != old_pos); + if (!spin_trylock_irq(&swd->lock)) + return BLK_STS_DEV_RESOURCE; - return NULL; -} + blk_mq_start_request(req); -static void do_fd_request(struct request_queue *q) -{ - struct swim_priv *swd = q->queuedata; - struct request *req; - struct floppy_state *fs; + if (!fs->disk_in || rq_data_dir(req) == WRITE) { + err = BLK_STS_IOERR; + goto out; + } - req = swim_next_request(swd); - while (req) { - blk_status_t err = BLK_STS_IOERR; + do { + err = floppy_read_sectors(fs, blk_rq_pos(req), + blk_rq_cur_sectors(req), + bio_data(req->bio)); + } while (blk_update_request(req, err, blk_rq_cur_bytes(req))); + __blk_mq_end_request(req, err); - fs = req->rq_disk->private_data; - if (blk_rq_pos(req) >= fs->total_secs) - goto done; - if (!fs->disk_in) - goto done; - if (rq_data_dir(req) == WRITE && fs->write_protected) - goto done; + err = BLK_STS_OK; +out: + spin_unlock_irq(&swd->lock); + return err; - switch (rq_data_dir(req)) { - case WRITE: - /* NOT IMPLEMENTED */ - break; - case READ: - err = floppy_read_sectors(fs, blk_rq_pos(req), - blk_rq_cur_sectors(req), - bio_data(req->bio)); - break; - } - done: - if (!__blk_end_request_cur(req, err)) - req = swim_next_request(swd); - } } static struct floppy_struct floppy_type[4] = { @@ -823,6 +801,10 @@ static int swim_add_floppy(struct swim_priv *swd, enum drive_location location) return 0; } +static const struct blk_mq_ops swim_mq_ops = { + .queue_rq = swim_queue_rq, +}; + static int swim_floppy_init(struct swim_priv *swd) { int err; @@ -852,20 +834,25 @@ static int swim_floppy_init(struct swim_priv *swd) spin_lock_init(&swd->lock); for (drive = 0; drive < swd->floppy_count; drive++) { + struct request_queue *q; + swd->unit[drive].disk = alloc_disk(1); if (swd->unit[drive].disk == NULL) { err = -ENOMEM; goto exit_put_disks; } - swd->unit[drive].disk->queue = blk_init_queue(do_fd_request, - &swd->lock); - if (!swd->unit[drive].disk->queue) { - err = -ENOMEM; + + q = blk_mq_init_sq_queue(&swd->unit[drive].tag_set, &swim_mq_ops, + 2, BLK_MQ_F_SHOULD_MERGE); + if (IS_ERR(q)) { + err = PTR_ERR(q); goto exit_put_disks; } + + swd->unit[drive].disk->queue = q; blk_queue_bounce_limit(swd->unit[drive].disk->queue, BLK_BOUNCE_HIGH); - swd->unit[drive].disk->queue->queuedata = swd; + swd->unit[drive].disk->queue->queuedata = &swd->unit[drive]; swd->unit[drive].swd = swd; } @@ -887,8 +874,18 @@ static int swim_floppy_init(struct swim_priv *swd) exit_put_disks: unregister_blkdev(FLOPPY_MAJOR, "fd"); - while (drive--) - put_disk(swd->unit[drive].disk); + do { + struct gendisk *disk = swd->unit[drive].disk; + + if (disk) { + if (disk->queue) { + blk_cleanup_queue(disk->queue); + disk->queue = NULL; + } + blk_mq_free_tag_set(&swd->unit[drive].tag_set); + put_disk(disk); + } + } while (drive--); return err; } @@ -961,6 +958,7 @@ static int swim_remove(struct platform_device *dev) for (drive = 0; drive < swd->floppy_count; drive++) { del_gendisk(swd->unit[drive].disk); blk_cleanup_queue(swd->unit[drive].disk->queue); + blk_mq_free_tag_set(&swd->unit[drive].tag_set); put_disk(swd->unit[drive].disk); } diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c index 469541c1e51e..c1c676a33e4a 100644 --- a/drivers/block/swim3.c +++ b/drivers/block/swim3.c @@ -25,7 +25,7 @@ #include <linux/delay.h> #include <linux/fd.h> #include <linux/ioctl.h> -#include <linux/blkdev.h> +#include <linux/blk-mq.h> #include <linux/interrupt.h> #include <linux/mutex.h> #include <linux/module.h> @@ -206,6 +206,7 @@ struct floppy_state { char dbdma_cmd_space[5 * sizeof(struct dbdma_cmd)]; int index; struct request *cur_req; + struct blk_mq_tag_set tag_set; }; #define swim3_err(fmt, arg...) dev_err(&fs->mdev->ofdev.dev, "[fd%d] " fmt, fs->index, arg) @@ -260,16 +261,15 @@ static int floppy_revalidate(struct gendisk *disk); static bool swim3_end_request(struct floppy_state *fs, blk_status_t err, unsigned int nr_bytes) { struct request *req = fs->cur_req; - int rc; swim3_dbg(" end request, err=%d nr_bytes=%d, cur_req=%p\n", err, nr_bytes, req); if (err) nr_bytes = blk_rq_cur_bytes(req); - rc = __blk_end_request(req, err, nr_bytes); - if (rc) + if (blk_update_request(req, err, nr_bytes)) return true; + __blk_mq_end_request(req, err); fs->cur_req = NULL; return false; } @@ -309,86 +309,58 @@ static int swim3_readbit(struct floppy_state *fs, int bit) return (stat & DATA) == 0; } -static void start_request(struct floppy_state *fs) +static blk_status_t swim3_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) { - struct request *req; + struct floppy_state *fs = hctx->queue->queuedata; + struct request *req = bd->rq; unsigned long x; - swim3_dbg("start request, initial state=%d\n", fs->state); - - if (fs->state == idle && fs->wanted) { - fs->state = available; - wake_up(&fs->wait); - return; + spin_lock_irq(&swim3_lock); + if (fs->cur_req || fs->state != idle) { + spin_unlock_irq(&swim3_lock); + return BLK_STS_DEV_RESOURCE; } - while (fs->state == idle) { - swim3_dbg("start request, idle loop, cur_req=%p\n", fs->cur_req); - if (!fs->cur_req) { - fs->cur_req = blk_fetch_request(disks[fs->index]->queue); - swim3_dbg(" fetched request %p\n", fs->cur_req); - if (!fs->cur_req) - break; - } - req = fs->cur_req; - - if (fs->mdev->media_bay && - check_media_bay(fs->mdev->media_bay) != MB_FD) { - swim3_dbg("%s", " media bay absent, dropping req\n"); - swim3_end_request(fs, BLK_STS_IOERR, 0); - continue; - } - -#if 0 /* This is really too verbose */ - swim3_dbg("do_fd_req: dev=%s cmd=%d sec=%ld nr_sec=%u buf=%p\n", - req->rq_disk->disk_name, req->cmd, - (long)blk_rq_pos(req), blk_rq_sectors(req), - bio_data(req->bio)); - swim3_dbg(" current_nr_sectors=%u\n", - blk_rq_cur_sectors(req)); -#endif - - if (blk_rq_pos(req) >= fs->total_secs) { - swim3_dbg(" pos out of bounds (%ld, max is %ld)\n", - (long)blk_rq_pos(req), (long)fs->total_secs); - swim3_end_request(fs, BLK_STS_IOERR, 0); - continue; - } - if (fs->ejected) { - swim3_dbg("%s", " disk ejected\n"); + blk_mq_start_request(req); + fs->cur_req = req; + if (fs->mdev->media_bay && + check_media_bay(fs->mdev->media_bay) != MB_FD) { + swim3_dbg("%s", " media bay absent, dropping req\n"); + swim3_end_request(fs, BLK_STS_IOERR, 0); + goto out; + } + if (fs->ejected) { + swim3_dbg("%s", " disk ejected\n"); + swim3_end_request(fs, BLK_STS_IOERR, 0); + goto out; + } + if (rq_data_dir(req) == WRITE) { + if (fs->write_prot < 0) + fs->write_prot = swim3_readbit(fs, WRITE_PROT); + if (fs->write_prot) { + swim3_dbg("%s", " try to write, disk write protected\n"); swim3_end_request(fs, BLK_STS_IOERR, 0); - continue; + goto out; } - - if (rq_data_dir(req) == WRITE) { - if (fs->write_prot < 0) - fs->write_prot = swim3_readbit(fs, WRITE_PROT); - if (fs->write_prot) { - swim3_dbg("%s", " try to write, disk write protected\n"); - swim3_end_request(fs, BLK_STS_IOERR, 0); - continue; - } - } - - /* Do not remove the cast. blk_rq_pos(req) is now a - * sector_t and can be 64 bits, but it will never go - * past 32 bits for this driver anyway, so we can - * safely cast it down and not have to do a 64/32 - * division - */ - fs->req_cyl = ((long)blk_rq_pos(req)) / fs->secpercyl; - x = ((long)blk_rq_pos(req)) % fs->secpercyl; - fs->head = x / fs->secpertrack; - fs->req_sector = x % fs->secpertrack + 1; - fs->state = do_transfer; - fs->retries = 0; - - act(fs); } -} -static void do_fd_request(struct request_queue * q) -{ - start_request(q->queuedata); + /* + * Do not remove the cast. blk_rq_pos(req) is now a sector_t and can be + * 64 bits, but it will never go past 32 bits for this driver anyway, so + * we can safely cast it down and not have to do a 64/32 division + */ + fs->req_cyl = ((long)blk_rq_pos(req)) / fs->secpercyl; + x = ((long)blk_rq_pos(req)) % fs->secpercyl; + fs->head = x / fs->secpertrack; + fs->req_sector = x % fs->secpertrack + 1; + fs->state = do_transfer; + fs->retries = 0; + + act(fs); + +out: + spin_unlock_irq(&swim3_lock); + return BLK_STS_OK; } static void set_timeout(struct floppy_state *fs, int nticks, @@ -585,7 +557,6 @@ static void scan_timeout(struct timer_list *t) if (fs->retries > 5) { swim3_end_request(fs, BLK_STS_IOERR, 0); fs->state = idle; - start_request(fs); } else { fs->state = jogging; act(fs); @@ -609,7 +580,6 @@ static void seek_timeout(struct timer_list *t) swim3_err("%s", "Seek timeout\n"); swim3_end_request(fs, BLK_STS_IOERR, 0); fs->state = idle; - start_request(fs); spin_unlock_irqrestore(&swim3_lock, flags); } @@ -638,7 +608,6 @@ static void settle_timeout(struct timer_list *t) swim3_err("%s", "Seek settle timeout\n"); swim3_end_request(fs, BLK_STS_IOERR, 0); fs->state = idle; - start_request(fs); unlock: spin_unlock_irqrestore(&swim3_lock, flags); } @@ -667,7 +636,6 @@ static void xfer_timeout(struct timer_list *t) (long)blk_rq_pos(fs->cur_req)); swim3_end_request(fs, BLK_STS_IOERR, 0); fs->state = idle; - start_request(fs); spin_unlock_irqrestore(&swim3_lock, flags); } @@ -704,7 +672,6 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id) if (fs->retries > 5) { swim3_end_request(fs, BLK_STS_IOERR, 0); fs->state = idle; - start_request(fs); } else { fs->state = jogging; act(fs); @@ -796,7 +763,6 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id) fs->state, rq_data_dir(req), intr, err); swim3_end_request(fs, BLK_STS_IOERR, 0); fs->state = idle; - start_request(fs); break; } fs->retries = 0; @@ -813,8 +779,6 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id) } else fs->state = idle; } - if (fs->state == idle) - start_request(fs); break; default: swim3_err("Don't know what to do in state %d\n", fs->state); @@ -862,14 +826,19 @@ static int grab_drive(struct floppy_state *fs, enum swim_state state, static void release_drive(struct floppy_state *fs) { + struct request_queue *q = disks[fs->index]->queue; unsigned long flags; swim3_dbg("%s", "-> release drive\n"); spin_lock_irqsave(&swim3_lock, flags); fs->state = idle; - start_request(fs); spin_unlock_irqrestore(&swim3_lock, flags); + + blk_mq_freeze_queue(q); + blk_mq_quiesce_queue(q); + blk_mq_unquiesce_queue(q); + blk_mq_unfreeze_queue(q); } static int fd_eject(struct floppy_state *fs) @@ -1089,6 +1058,10 @@ static const struct block_device_operations floppy_fops = { .revalidate_disk= floppy_revalidate, }; +static const struct blk_mq_ops swim3_mq_ops = { + .queue_rq = swim3_queue_rq, +}; + static void swim3_mb_event(struct macio_dev* mdev, int mb_state) { struct floppy_state *fs = macio_get_drvdata(mdev); @@ -1202,47 +1175,63 @@ static int swim3_add_device(struct macio_dev *mdev, int index) static int swim3_attach(struct macio_dev *mdev, const struct of_device_id *match) { + struct floppy_state *fs; struct gendisk *disk; - int index, rc; + int rc; - index = floppy_count++; - if (index >= MAX_FLOPPIES) + if (floppy_count >= MAX_FLOPPIES) return -ENXIO; - /* Add the drive */ - rc = swim3_add_device(mdev, index); - if (rc) - return rc; - /* Now register that disk. Same comment about failure handling */ - disk = disks[index] = alloc_disk(1); - if (disk == NULL) - return -ENOMEM; - disk->queue = blk_init_queue(do_fd_request, &swim3_lock); - if (disk->queue == NULL) { - put_disk(disk); - return -ENOMEM; + if (floppy_count == 0) { + rc = register_blkdev(FLOPPY_MAJOR, "fd"); + if (rc) + return rc; } - blk_queue_bounce_limit(disk->queue, BLK_BOUNCE_HIGH); - disk->queue->queuedata = &floppy_states[index]; - if (index == 0) { - /* If we failed, there isn't much we can do as the driver is still - * too dumb to remove the device, just bail out - */ - if (register_blkdev(FLOPPY_MAJOR, "fd")) - return 0; + fs = &floppy_states[floppy_count]; + + disk = alloc_disk(1); + if (disk == NULL) { + rc = -ENOMEM; + goto out_unregister; + } + + disk->queue = blk_mq_init_sq_queue(&fs->tag_set, &swim3_mq_ops, 2, + BLK_MQ_F_SHOULD_MERGE); + if (IS_ERR(disk->queue)) { + rc = PTR_ERR(disk->queue); + disk->queue = NULL; + goto out_put_disk; } + blk_queue_bounce_limit(disk->queue, BLK_BOUNCE_HIGH); + disk->queue->queuedata = fs; + + rc = swim3_add_device(mdev, floppy_count); + if (rc) + goto out_cleanup_queue; disk->major = FLOPPY_MAJOR; - disk->first_minor = index; + disk->first_minor = floppy_count; disk->fops = &floppy_fops; - disk->private_data = &floppy_states[index]; + disk->private_data = fs; disk->flags |= GENHD_FL_REMOVABLE; - sprintf(disk->disk_name, "fd%d", index); + sprintf(disk->disk_name, "fd%d", floppy_count); set_capacity(disk, 2880); add_disk(disk); + disks[floppy_count++] = disk; return 0; + +out_cleanup_queue: + blk_cleanup_queue(disk->queue); + disk->queue = NULL; + blk_mq_free_tag_set(&fs->tag_set); +out_put_disk: + put_disk(disk); +out_unregister: + if (floppy_count == 0) + unregister_blkdev(FLOPPY_MAJOR, "fd"); + return rc; } static const struct of_device_id swim3_match[] = diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c index 4d90e5eba2f5..064b8c5c7a32 100644 --- a/drivers/block/sx8.c +++ b/drivers/block/sx8.c @@ -16,7 +16,7 @@ #include <linux/pci.h> #include <linux/slab.h> #include <linux/spinlock.h> -#include <linux/blkdev.h> +#include <linux/blk-mq.h> #include <linux/sched.h> #include <linux/interrupt.h> #include <linux/compiler.h> @@ -197,7 +197,6 @@ enum { FL_NON_RAID = FW_VER_NON_RAID, FL_4PORT = FW_VER_4PORT, FL_FW_VER_MASK = (FW_VER_NON_RAID | FW_VER_4PORT), - FL_DAC = (1 << 16), FL_DYN_MAJOR = (1 << 17), }; @@ -244,6 +243,7 @@ struct carm_port { unsigned int port_no; struct gendisk *disk; struct carm_host *host; + struct blk_mq_tag_set tag_set; /* attached device characteristics */ u64 capacity; @@ -279,6 +279,7 @@ struct carm_host { unsigned int state; u32 fw_ver; + struct blk_mq_tag_set tag_set; struct request_queue *oob_q; unsigned int n_oob; @@ -750,7 +751,7 @@ static inline void carm_end_request_queued(struct carm_host *host, struct request *req = crq->rq; int rc; - __blk_end_request_all(req, error); + blk_mq_end_request(req, error); rc = carm_put_request(host, crq); assert(rc == 0); @@ -760,7 +761,7 @@ static inline void carm_push_q (struct carm_host *host, struct request_queue *q) { unsigned int idx = host->wait_q_prod % CARM_MAX_WAIT_Q; - blk_stop_queue(q); + blk_mq_stop_hw_queues(q); VPRINTK("STOPPED QUEUE %p\n", q); host->wait_q[idx] = q; @@ -785,7 +786,7 @@ static inline void carm_round_robin(struct carm_host *host) { struct request_queue *q = carm_pop_q(host); if (q) { - blk_start_queue(q); + blk_mq_start_hw_queues(q); VPRINTK("STARTED QUEUE %p\n", q); } } @@ -802,82 +803,86 @@ static inline void carm_end_rq(struct carm_host *host, struct carm_request *crq, } } -static void carm_oob_rq_fn(struct request_queue *q) +static blk_status_t carm_oob_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) { + struct request_queue *q = hctx->queue; struct carm_host *host = q->queuedata; struct carm_request *crq; - struct request *rq; int rc; - while (1) { - DPRINTK("get req\n"); - rq = blk_fetch_request(q); - if (!rq) - break; + blk_mq_start_request(bd->rq); - crq = rq->special; - assert(crq != NULL); - assert(crq->rq == rq); + spin_lock_irq(&host->lock); - crq->n_elem = 0; + crq = bd->rq->special; + assert(crq != NULL); + assert(crq->rq == bd->rq); - DPRINTK("send req\n"); - rc = carm_send_msg(host, crq); - if (rc) { - blk_requeue_request(q, rq); - carm_push_q(host, q); - return; /* call us again later, eventually */ - } + crq->n_elem = 0; + + DPRINTK("send req\n"); + rc = carm_send_msg(host, crq); + if (rc) { + carm_push_q(host, q); + spin_unlock_irq(&host->lock); + return BLK_STS_DEV_RESOURCE; } + + spin_unlock_irq(&host->lock); + return BLK_STS_OK; } -static void carm_rq_fn(struct request_queue *q) +static blk_status_t carm_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) { + struct request_queue *q = hctx->queue; struct carm_port *port = q->queuedata; struct carm_host *host = port->host; struct carm_msg_rw *msg; struct carm_request *crq; - struct request *rq; + struct request *rq = bd->rq; struct scatterlist *sg; int writing = 0, pci_dir, i, n_elem, rc; u32 tmp; unsigned int msg_size; -queue_one_request: - VPRINTK("get req\n"); - rq = blk_peek_request(q); - if (!rq) - return; + blk_mq_start_request(rq); + + spin_lock_irq(&host->lock); crq = carm_get_request(host); if (!crq) { carm_push_q(host, q); - return; /* call us again later, eventually */ + spin_unlock_irq(&host->lock); + return BLK_STS_DEV_RESOURCE; } crq->rq = rq; - blk_start_request(rq); - if (rq_data_dir(rq) == WRITE) { writing = 1; - pci_dir = PCI_DMA_TODEVICE; + pci_dir = DMA_TO_DEVICE; } else { - pci_dir = PCI_DMA_FROMDEVICE; + pci_dir = DMA_FROM_DEVICE; } /* get scatterlist from block layer */ sg = &crq->sg[0]; n_elem = blk_rq_map_sg(q, rq, sg); if (n_elem <= 0) { + /* request with no s/g entries? */ carm_end_rq(host, crq, BLK_STS_IOERR); - return; /* request with no s/g entries? */ + spin_unlock_irq(&host->lock); + return BLK_STS_IOERR; } /* map scatterlist to PCI bus addresses */ - n_elem = pci_map_sg(host->pdev, sg, n_elem, pci_dir); + n_elem = dma_map_sg(&host->pdev->dev, sg, n_elem, pci_dir); if (n_elem <= 0) { + /* request with no s/g entries? */ carm_end_rq(host, crq, BLK_STS_IOERR); - return; /* request with no s/g entries? */ + spin_unlock_irq(&host->lock); + return BLK_STS_IOERR; } crq->n_elem = n_elem; crq->port = port; @@ -927,12 +932,13 @@ queue_one_request: rc = carm_send_msg(host, crq); if (rc) { carm_put_request(host, crq); - blk_requeue_request(q, rq); carm_push_q(host, q); - return; /* call us again later, eventually */ + spin_unlock_irq(&host->lock); + return BLK_STS_DEV_RESOURCE; } - goto queue_one_request; + spin_unlock_irq(&host->lock); + return BLK_STS_OK; } static void carm_handle_array_info(struct carm_host *host, @@ -1052,11 +1058,11 @@ static inline void carm_handle_rw(struct carm_host *host, VPRINTK("ENTER\n"); if (rq_data_dir(crq->rq) == WRITE) - pci_dir = PCI_DMA_TODEVICE; + pci_dir = DMA_TO_DEVICE; else - pci_dir = PCI_DMA_FROMDEVICE; + pci_dir = DMA_FROM_DEVICE; - pci_unmap_sg(host->pdev, &crq->sg[0], crq->n_elem, pci_dir); + dma_unmap_sg(&host->pdev->dev, &crq->sg[0], crq->n_elem, pci_dir); carm_end_rq(host, crq, error); } @@ -1485,6 +1491,14 @@ static int carm_init_host(struct carm_host *host) return 0; } +static const struct blk_mq_ops carm_oob_mq_ops = { + .queue_rq = carm_oob_queue_rq, +}; + +static const struct blk_mq_ops carm_mq_ops = { + .queue_rq = carm_queue_rq, +}; + static int carm_init_disks(struct carm_host *host) { unsigned int i; @@ -1513,9 +1527,10 @@ static int carm_init_disks(struct carm_host *host) disk->fops = &carm_bd_ops; disk->private_data = port; - q = blk_init_queue(carm_rq_fn, &host->lock); - if (!q) { - rc = -ENOMEM; + q = blk_mq_init_sq_queue(&port->tag_set, &carm_mq_ops, + max_queue, BLK_MQ_F_SHOULD_MERGE); + if (IS_ERR(q)) { + rc = PTR_ERR(q); break; } disk->queue = q; @@ -1533,14 +1548,18 @@ static void carm_free_disks(struct carm_host *host) unsigned int i; for (i = 0; i < CARM_MAX_PORTS; i++) { - struct gendisk *disk = host->port[i].disk; + struct carm_port *port = &host->port[i]; + struct gendisk *disk = port->disk; + if (disk) { struct request_queue *q = disk->queue; if (disk->flags & GENHD_FL_UP) del_gendisk(disk); - if (q) + if (q) { + blk_mq_free_tag_set(&port->tag_set); blk_cleanup_queue(q); + } put_disk(disk); } } @@ -1548,8 +1567,8 @@ static void carm_free_disks(struct carm_host *host) static int carm_init_shm(struct carm_host *host) { - host->shm = pci_alloc_consistent(host->pdev, CARM_SHM_SIZE, - &host->shm_dma); + host->shm = dma_alloc_coherent(&host->pdev->dev, CARM_SHM_SIZE, + &host->shm_dma, GFP_KERNEL); if (!host->shm) return -ENOMEM; @@ -1565,7 +1584,6 @@ static int carm_init_shm(struct carm_host *host) static int carm_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) { struct carm_host *host; - unsigned int pci_dac; int rc; struct request_queue *q; unsigned int i; @@ -1580,28 +1598,12 @@ static int carm_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) if (rc) goto err_out; -#ifdef IF_64BIT_DMA_IS_POSSIBLE /* grrrr... */ - rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); - if (!rc) { - rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); - if (rc) { - printk(KERN_ERR DRV_NAME "(%s): consistent DMA mask failure\n", - pci_name(pdev)); - goto err_out_regions; - } - pci_dac = 1; - } else { -#endif - rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); - if (rc) { - printk(KERN_ERR DRV_NAME "(%s): DMA mask failure\n", - pci_name(pdev)); - goto err_out_regions; - } - pci_dac = 0; -#ifdef IF_64BIT_DMA_IS_POSSIBLE /* grrrr... */ + rc = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)); + if (rc) { + printk(KERN_ERR DRV_NAME "(%s): DMA mask failure\n", + pci_name(pdev)); + goto err_out_regions; } -#endif host = kzalloc(sizeof(*host), GFP_KERNEL); if (!host) { @@ -1612,7 +1614,6 @@ static int carm_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) } host->pdev = pdev; - host->flags = pci_dac ? FL_DAC : 0; spin_lock_init(&host->lock); INIT_WORK(&host->fsm_task, carm_fsm_task); init_completion(&host->probe_comp); @@ -1636,12 +1637,13 @@ static int carm_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) goto err_out_iounmap; } - q = blk_init_queue(carm_oob_rq_fn, &host->lock); - if (!q) { + q = blk_mq_init_sq_queue(&host->tag_set, &carm_oob_mq_ops, 1, + BLK_MQ_F_NO_SCHED); + if (IS_ERR(q)) { printk(KERN_ERR DRV_NAME "(%s): OOB queue alloc failure\n", pci_name(pdev)); - rc = -ENOMEM; - goto err_out_pci_free; + rc = PTR_ERR(q); + goto err_out_dma_free; } host->oob_q = q; q->queuedata = host; @@ -1705,8 +1707,9 @@ err_out_free_majors: else if (host->major == 161) clear_bit(1, &carm_major_alloc); blk_cleanup_queue(host->oob_q); -err_out_pci_free: - pci_free_consistent(pdev, CARM_SHM_SIZE, host->shm, host->shm_dma); + blk_mq_free_tag_set(&host->tag_set); +err_out_dma_free: + dma_free_coherent(&pdev->dev, CARM_SHM_SIZE, host->shm, host->shm_dma); err_out_iounmap: iounmap(host->mmio); err_out_kfree: @@ -1736,7 +1739,8 @@ static void carm_remove_one (struct pci_dev *pdev) else if (host->major == 161) clear_bit(1, &carm_major_alloc); blk_cleanup_queue(host->oob_q); - pci_free_consistent(pdev, CARM_SHM_SIZE, host->shm, host->shm_dma); + blk_mq_free_tag_set(&host->tag_set); + dma_free_coherent(&pdev->dev, CARM_SHM_SIZE, host->shm, host->shm_dma); iounmap(host->mmio); kfree(host); pci_release_regions(pdev); diff --git a/drivers/block/umem.c b/drivers/block/umem.c index 5c7fb8cc4149..be3e3ab79950 100644 --- a/drivers/block/umem.c +++ b/drivers/block/umem.c @@ -363,12 +363,12 @@ static int add_bio(struct cardinfo *card) vec = bio_iter_iovec(bio, card->current_iter); - dma_handle = pci_map_page(card->dev, + dma_handle = dma_map_page(&card->dev->dev, vec.bv_page, vec.bv_offset, vec.bv_len, bio_op(bio) == REQ_OP_READ ? - PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE); + DMA_FROM_DEVICE : DMA_TO_DEVICE); p = &card->mm_pages[card->Ready]; desc = &p->desc[p->cnt]; @@ -421,7 +421,7 @@ static void process_page(unsigned long data) struct cardinfo *card = (struct cardinfo *)data; unsigned int dma_status = card->dma_status; - spin_lock_bh(&card->lock); + spin_lock(&card->lock); if (card->Active < 0) goto out_unlock; page = &card->mm_pages[card->Active]; @@ -448,10 +448,10 @@ static void process_page(unsigned long data) page->iter = page->bio->bi_iter; } - pci_unmap_page(card->dev, desc->data_dma_handle, + dma_unmap_page(&card->dev->dev, desc->data_dma_handle, vec.bv_len, (control & DMASCR_TRANSFER_READ) ? - PCI_DMA_TODEVICE : PCI_DMA_FROMDEVICE); + DMA_TO_DEVICE : DMA_FROM_DEVICE); if (control & DMASCR_HARD_ERROR) { /* error */ bio->bi_status = BLK_STS_IOERR; @@ -496,7 +496,7 @@ static void process_page(unsigned long data) mm_start_io(card); } out_unlock: - spin_unlock_bh(&card->lock); + spin_unlock(&card->lock); while (return_bio) { struct bio *bio = return_bio; @@ -817,8 +817,8 @@ static int mm_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) dev_printk(KERN_INFO, &dev->dev, "Micro Memory(tm) controller found (PCI Mem Module (Battery Backup))\n"); - if (pci_set_dma_mask(dev, DMA_BIT_MASK(64)) && - pci_set_dma_mask(dev, DMA_BIT_MASK(32))) { + if (dma_set_mask(&dev->dev, DMA_BIT_MASK(64)) && + dma_set_mask(&dev->dev, DMA_BIT_MASK(32))) { dev_printk(KERN_WARNING, &dev->dev, "NO suitable DMA found\n"); return -ENOMEM; } @@ -871,12 +871,10 @@ static int mm_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) goto failed_magic; } - card->mm_pages[0].desc = pci_alloc_consistent(card->dev, - PAGE_SIZE * 2, - &card->mm_pages[0].page_dma); - card->mm_pages[1].desc = pci_alloc_consistent(card->dev, - PAGE_SIZE * 2, - &card->mm_pages[1].page_dma); + card->mm_pages[0].desc = dma_alloc_coherent(&card->dev->dev, + PAGE_SIZE * 2, &card->mm_pages[0].page_dma, GFP_KERNEL); + card->mm_pages[1].desc = dma_alloc_coherent(&card->dev->dev, + PAGE_SIZE * 2, &card->mm_pages[1].page_dma, GFP_KERNEL); if (card->mm_pages[0].desc == NULL || card->mm_pages[1].desc == NULL) { dev_printk(KERN_ERR, &card->dev->dev, "alloc failed\n"); @@ -1002,13 +1000,13 @@ static int mm_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) failed_req_irq: failed_alloc: if (card->mm_pages[0].desc) - pci_free_consistent(card->dev, PAGE_SIZE*2, - card->mm_pages[0].desc, - card->mm_pages[0].page_dma); + dma_free_coherent(&card->dev->dev, PAGE_SIZE * 2, + card->mm_pages[0].desc, + card->mm_pages[0].page_dma); if (card->mm_pages[1].desc) - pci_free_consistent(card->dev, PAGE_SIZE*2, - card->mm_pages[1].desc, - card->mm_pages[1].page_dma); + dma_free_coherent(&card->dev->dev, PAGE_SIZE * 2, + card->mm_pages[1].desc, + card->mm_pages[1].page_dma); failed_magic: iounmap(card->csr_remap); failed_remap_csr: @@ -1027,11 +1025,11 @@ static void mm_pci_remove(struct pci_dev *dev) iounmap(card->csr_remap); if (card->mm_pages[0].desc) - pci_free_consistent(card->dev, PAGE_SIZE*2, + dma_free_coherent(&card->dev->dev, PAGE_SIZE * 2, card->mm_pages[0].desc, card->mm_pages[0].page_dma); if (card->mm_pages[1].desc) - pci_free_consistent(card->dev, PAGE_SIZE*2, + dma_free_coherent(&card->dev->dev, PAGE_SIZE * 2, card->mm_pages[1].desc, card->mm_pages[1].page_dma); blk_cleanup_queue(card->queue); diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 23752dc99b00..086c6bb12baa 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -351,8 +351,8 @@ static int minor_to_index(int minor) return minor >> PART_BITS; } -static ssize_t virtblk_serial_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t serial_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct gendisk *disk = dev_to_disk(dev); int err; @@ -371,7 +371,7 @@ static ssize_t virtblk_serial_show(struct device *dev, return err; } -static DEVICE_ATTR(serial, 0444, virtblk_serial_show, NULL); +static DEVICE_ATTR_RO(serial); /* The queue's logical block size must be set before calling this */ static void virtblk_update_capacity(struct virtio_blk *vblk, bool resize) @@ -545,8 +545,8 @@ static const char *const virtblk_cache_types[] = { }; static ssize_t -virtblk_cache_type_store(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) +cache_type_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) { struct gendisk *disk = dev_to_disk(dev); struct virtio_blk *vblk = disk->private_data; @@ -564,8 +564,7 @@ virtblk_cache_type_store(struct device *dev, struct device_attribute *attr, } static ssize_t -virtblk_cache_type_show(struct device *dev, struct device_attribute *attr, - char *buf) +cache_type_show(struct device *dev, struct device_attribute *attr, char *buf) { struct gendisk *disk = dev_to_disk(dev); struct virtio_blk *vblk = disk->private_data; @@ -575,12 +574,38 @@ virtblk_cache_type_show(struct device *dev, struct device_attribute *attr, return snprintf(buf, 40, "%s\n", virtblk_cache_types[writeback]); } -static const struct device_attribute dev_attr_cache_type_ro = - __ATTR(cache_type, 0444, - virtblk_cache_type_show, NULL); -static const struct device_attribute dev_attr_cache_type_rw = - __ATTR(cache_type, 0644, - virtblk_cache_type_show, virtblk_cache_type_store); +static DEVICE_ATTR_RW(cache_type); + +static struct attribute *virtblk_attrs[] = { + &dev_attr_serial.attr, + &dev_attr_cache_type.attr, + NULL, +}; + +static umode_t virtblk_attrs_are_visible(struct kobject *kobj, + struct attribute *a, int n) +{ + struct device *dev = container_of(kobj, struct device, kobj); + struct gendisk *disk = dev_to_disk(dev); + struct virtio_blk *vblk = disk->private_data; + struct virtio_device *vdev = vblk->vdev; + + if (a == &dev_attr_cache_type.attr && + !virtio_has_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE)) + return S_IRUGO; + + return a->mode; +} + +static const struct attribute_group virtblk_attr_group = { + .attrs = virtblk_attrs, + .is_visible = virtblk_attrs_are_visible, +}; + +static const struct attribute_group *virtblk_attr_groups[] = { + &virtblk_attr_group, + NULL, +}; static int virtblk_init_request(struct blk_mq_tag_set *set, struct request *rq, unsigned int hctx_idx, unsigned int numa_node) @@ -780,24 +805,9 @@ static int virtblk_probe(struct virtio_device *vdev) virtblk_update_capacity(vblk, false); virtio_device_ready(vdev); - device_add_disk(&vdev->dev, vblk->disk); - err = device_create_file(disk_to_dev(vblk->disk), &dev_attr_serial); - if (err) - goto out_del_disk; - - if (virtio_has_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE)) - err = device_create_file(disk_to_dev(vblk->disk), - &dev_attr_cache_type_rw); - else - err = device_create_file(disk_to_dev(vblk->disk), - &dev_attr_cache_type_ro); - if (err) - goto out_del_disk; + device_add_disk(&vdev->dev, vblk->disk, virtblk_attr_groups); return 0; -out_del_disk: - del_gendisk(vblk->disk); - blk_cleanup_queue(vblk->disk->queue); out_free_tags: blk_mq_free_tag_set(&vblk->tag_set); out_put_disk: diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 429d20131c7e..9eea83ae01c6 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -2420,7 +2420,7 @@ static void blkfront_connect(struct blkfront_info *info) for (i = 0; i < info->nr_rings; i++) kick_pending_request_queues(&info->rinfo[i]); - device_add_disk(&info->xbdev->dev, info->gd); + device_add_disk(&info->xbdev->dev, info->gd, NULL); info->is_ready = 1; return; diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c index c24589414c75..87ccef4bd69e 100644 --- a/drivers/block/xsysace.c +++ b/drivers/block/xsysace.c @@ -88,7 +88,7 @@ #include <linux/kernel.h> #include <linux/delay.h> #include <linux/slab.h> -#include <linux/blkdev.h> +#include <linux/blk-mq.h> #include <linux/mutex.h> #include <linux/ata.h> #include <linux/hdreg.h> @@ -209,6 +209,8 @@ struct ace_device { struct device *dev; struct request_queue *queue; struct gendisk *gd; + struct blk_mq_tag_set tag_set; + struct list_head rq_list; /* Inserted CF card parameters */ u16 cf_id[ATA_ID_WORDS]; @@ -462,18 +464,26 @@ static inline void ace_fsm_yieldirq(struct ace_device *ace) ace->fsm_continue_flag = 0; } +static bool ace_has_next_request(struct request_queue *q) +{ + struct ace_device *ace = q->queuedata; + + return !list_empty(&ace->rq_list); +} + /* Get the next read/write request; ending requests that we don't handle */ static struct request *ace_get_next_request(struct request_queue *q) { - struct request *req; + struct ace_device *ace = q->queuedata; + struct request *rq; - while ((req = blk_peek_request(q)) != NULL) { - if (!blk_rq_is_passthrough(req)) - break; - blk_start_request(req); - __blk_end_request_all(req, BLK_STS_IOERR); + rq = list_first_entry_or_null(&ace->rq_list, struct request, queuelist); + if (rq) { + list_del_init(&rq->queuelist); + blk_mq_start_request(rq); } - return req; + + return NULL; } static void ace_fsm_dostate(struct ace_device *ace) @@ -499,11 +509,11 @@ static void ace_fsm_dostate(struct ace_device *ace) /* Drop all in-flight and pending requests */ if (ace->req) { - __blk_end_request_all(ace->req, BLK_STS_IOERR); + blk_mq_end_request(ace->req, BLK_STS_IOERR); ace->req = NULL; } - while ((req = blk_fetch_request(ace->queue)) != NULL) - __blk_end_request_all(req, BLK_STS_IOERR); + while ((req = ace_get_next_request(ace->queue)) != NULL) + blk_mq_end_request(req, BLK_STS_IOERR); /* Drop back to IDLE state and notify waiters */ ace->fsm_state = ACE_FSM_STATE_IDLE; @@ -517,7 +527,7 @@ static void ace_fsm_dostate(struct ace_device *ace) switch (ace->fsm_state) { case ACE_FSM_STATE_IDLE: /* See if there is anything to do */ - if (ace->id_req_count || ace_get_next_request(ace->queue)) { + if (ace->id_req_count || ace_has_next_request(ace->queue)) { ace->fsm_iter_num++; ace->fsm_state = ACE_FSM_STATE_REQ_LOCK; mod_timer(&ace->stall_timer, jiffies + HZ); @@ -651,7 +661,6 @@ static void ace_fsm_dostate(struct ace_device *ace) ace->fsm_state = ACE_FSM_STATE_IDLE; break; } - blk_start_request(req); /* Okay, it's a data request, set it up for transfer */ dev_dbg(ace->dev, @@ -728,7 +737,8 @@ static void ace_fsm_dostate(struct ace_device *ace) } /* bio finished; is there another one? */ - if (__blk_end_request_cur(ace->req, BLK_STS_OK)) { + if (blk_update_request(ace->req, BLK_STS_OK, + blk_rq_cur_bytes(ace->req))) { /* dev_dbg(ace->dev, "next block; h=%u c=%u\n", * blk_rq_sectors(ace->req), * blk_rq_cur_sectors(ace->req)); @@ -854,17 +864,23 @@ static irqreturn_t ace_interrupt(int irq, void *dev_id) /* --------------------------------------------------------------------- * Block ops */ -static void ace_request(struct request_queue * q) +static blk_status_t ace_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) { - struct request *req; - struct ace_device *ace; - - req = ace_get_next_request(q); + struct ace_device *ace = hctx->queue->queuedata; + struct request *req = bd->rq; - if (req) { - ace = req->rq_disk->private_data; - tasklet_schedule(&ace->fsm_tasklet); + if (blk_rq_is_passthrough(req)) { + blk_mq_start_request(req); + return BLK_STS_IOERR; } + + spin_lock_irq(&ace->lock); + list_add_tail(&req->queuelist, &ace->rq_list); + spin_unlock_irq(&ace->lock); + + tasklet_schedule(&ace->fsm_tasklet); + return BLK_STS_OK; } static unsigned int ace_check_events(struct gendisk *gd, unsigned int clearing) @@ -957,6 +973,10 @@ static const struct block_device_operations ace_fops = { .getgeo = ace_getgeo, }; +static const struct blk_mq_ops ace_mq_ops = { + .queue_rq = ace_queue_rq, +}; + /* -------------------------------------------------------------------- * SystemACE device setup/teardown code */ @@ -972,6 +992,7 @@ static int ace_setup(struct ace_device *ace) spin_lock_init(&ace->lock); init_completion(&ace->id_completion); + INIT_LIST_HEAD(&ace->rq_list); /* * Map the device @@ -989,9 +1010,15 @@ static int ace_setup(struct ace_device *ace) /* * Initialize the request queue */ - ace->queue = blk_init_queue(ace_request, &ace->lock); - if (ace->queue == NULL) + ace->queue = blk_mq_init_sq_queue(&ace->tag_set, &ace_mq_ops, 2, + BLK_MQ_F_SHOULD_MERGE); + if (IS_ERR(ace->queue)) { + rc = PTR_ERR(ace->queue); + ace->queue = NULL; goto err_blk_initq; + } + ace->queue->queuedata = ace; + blk_queue_logical_block_size(ace->queue, 512); blk_queue_bounce_limit(ace->queue, BLK_BOUNCE_HIGH); @@ -1066,6 +1093,7 @@ err_read: put_disk(ace->gd); err_alloc_disk: blk_cleanup_queue(ace->queue); + blk_mq_free_tag_set(&ace->tag_set); err_blk_initq: iounmap(ace->baseaddr); err_ioremap: @@ -1081,8 +1109,10 @@ static void ace_teardown(struct ace_device *ace) put_disk(ace->gd); } - if (ace->queue) + if (ace->queue) { blk_cleanup_queue(ace->queue); + blk_mq_free_tag_set(&ace->tag_set); + } tasklet_kill(&ace->fsm_tasklet); diff --git a/drivers/block/z2ram.c b/drivers/block/z2ram.c index d0c5bc4e0703..1106c076fa4b 100644 --- a/drivers/block/z2ram.c +++ b/drivers/block/z2ram.c @@ -31,7 +31,7 @@ #include <linux/vmalloc.h> #include <linux/init.h> #include <linux/module.h> -#include <linux/blkdev.h> +#include <linux/blk-mq.h> #include <linux/bitops.h> #include <linux/mutex.h> #include <linux/slab.h> @@ -66,43 +66,44 @@ static DEFINE_SPINLOCK(z2ram_lock); static struct gendisk *z2ram_gendisk; -static void do_z2_request(struct request_queue *q) +static blk_status_t z2_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) { - struct request *req; - - req = blk_fetch_request(q); - while (req) { - unsigned long start = blk_rq_pos(req) << 9; - unsigned long len = blk_rq_cur_bytes(req); - blk_status_t err = BLK_STS_OK; - - if (start + len > z2ram_size) { - pr_err(DEVICE_NAME ": bad access: block=%llu, " - "count=%u\n", - (unsigned long long)blk_rq_pos(req), - blk_rq_cur_sectors(req)); - err = BLK_STS_IOERR; - goto done; - } - while (len) { - unsigned long addr = start & Z2RAM_CHUNKMASK; - unsigned long size = Z2RAM_CHUNKSIZE - addr; - void *buffer = bio_data(req->bio); - - if (len < size) - size = len; - addr += z2ram_map[ start >> Z2RAM_CHUNKSHIFT ]; - if (rq_data_dir(req) == READ) - memcpy(buffer, (char *)addr, size); - else - memcpy((char *)addr, buffer, size); - start += size; - len -= size; - } - done: - if (!__blk_end_request_cur(req, err)) - req = blk_fetch_request(q); + struct request *req = bd->rq; + unsigned long start = blk_rq_pos(req) << 9; + unsigned long len = blk_rq_cur_bytes(req); + + blk_mq_start_request(req); + + if (start + len > z2ram_size) { + pr_err(DEVICE_NAME ": bad access: block=%llu, " + "count=%u\n", + (unsigned long long)blk_rq_pos(req), + blk_rq_cur_sectors(req)); + return BLK_STS_IOERR; + } + + spin_lock_irq(&z2ram_lock); + + while (len) { + unsigned long addr = start & Z2RAM_CHUNKMASK; + unsigned long size = Z2RAM_CHUNKSIZE - addr; + void *buffer = bio_data(req->bio); + + if (len < size) + size = len; + addr += z2ram_map[ start >> Z2RAM_CHUNKSHIFT ]; + if (rq_data_dir(req) == READ) + memcpy(buffer, (char *)addr, size); + else + memcpy((char *)addr, buffer, size); + start += size; + len -= size; } + + spin_unlock_irq(&z2ram_lock); + blk_mq_end_request(req, BLK_STS_OK); + return BLK_STS_OK; } static void @@ -337,6 +338,11 @@ static struct kobject *z2_find(dev_t dev, int *part, void *data) } static struct request_queue *z2_queue; +static struct blk_mq_tag_set tag_set; + +static const struct blk_mq_ops z2_mq_ops = { + .queue_rq = z2_queue_rq, +}; static int __init z2_init(void) @@ -355,9 +361,13 @@ z2_init(void) if (!z2ram_gendisk) goto out_disk; - z2_queue = blk_init_queue(do_z2_request, &z2ram_lock); - if (!z2_queue) + z2_queue = blk_mq_init_sq_queue(&tag_set, &z2_mq_ops, 16, + BLK_MQ_F_SHOULD_MERGE); + if (IS_ERR(z2_queue)) { + ret = PTR_ERR(z2_queue); + z2_queue = NULL; goto out_queue; + } z2ram_gendisk->major = Z2RAM_MAJOR; z2ram_gendisk->first_minor = 0; @@ -387,6 +397,7 @@ static void __exit z2_exit(void) del_gendisk(z2ram_gendisk); put_disk(z2ram_gendisk); blk_cleanup_queue(z2_queue); + blk_mq_free_tag_set(&tag_set); if ( current_device != -1 ) { diff --git a/drivers/block/zram/Kconfig b/drivers/block/zram/Kconfig index 635235759a0a..fcd055457364 100644 --- a/drivers/block/zram/Kconfig +++ b/drivers/block/zram/Kconfig @@ -3,7 +3,6 @@ config ZRAM tristate "Compressed RAM block device support" depends on BLOCK && SYSFS && ZSMALLOC && CRYPTO select CRYPTO_LZO - default n help Creates virtual block devices called /dev/zramX (X = 0, 1, ...). Pages written to these disks are compressed and stored in memory @@ -18,7 +17,6 @@ config ZRAM config ZRAM_WRITEBACK bool "Write back incompressible page to backing device" depends on ZRAM - default n help With incompressible page, there is no memory saving to keep it in memory. Instead, write it out to backing device. diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index a1d6b5597c17..4879595200e1 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1636,6 +1636,11 @@ static const struct attribute_group zram_disk_attr_group = { .attrs = zram_disk_attrs, }; +static const struct attribute_group *zram_disk_attr_groups[] = { + &zram_disk_attr_group, + NULL, +}; + /* * Allocate and initialize new zram device. the function returns * '>= 0' device_id upon success, and negative value otherwise. @@ -1716,24 +1721,14 @@ static int zram_add(void) zram->disk->queue->backing_dev_info->capabilities |= (BDI_CAP_STABLE_WRITES | BDI_CAP_SYNCHRONOUS_IO); - add_disk(zram->disk); - - ret = sysfs_create_group(&disk_to_dev(zram->disk)->kobj, - &zram_disk_attr_group); - if (ret < 0) { - pr_err("Error creating sysfs group for device %d\n", - device_id); - goto out_free_disk; - } + device_add_disk(NULL, zram->disk, zram_disk_attr_groups); + strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor)); zram_debugfs_register(zram); pr_info("Added device: %s\n", zram->disk->disk_name); return device_id; -out_free_disk: - del_gendisk(zram->disk); - put_disk(zram->disk); out_free_queue: blk_cleanup_queue(queue); out_free_idr: @@ -1762,15 +1757,6 @@ static int zram_remove(struct zram *zram) mutex_unlock(&bdev->bd_mutex); zram_debugfs_unregister(zram); - /* - * Remove sysfs first, so no one will perform a disksize - * store while we destroy the devices. This also helps during - * hot_remove -- zram_reset_device() is the last holder of - * ->init_lock, no later/concurrent disksize_store() or any - * other sysfs handlers are possible. - */ - sysfs_remove_group(&disk_to_dev(zram->disk)->kobj, - &zram_disk_attr_group); /* Make sure all the pending I/O are finished */ fsync_bdev(bdev); diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index e182f6019f68..2fee65886d50 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -1322,7 +1322,7 @@ static int qca_init_regulators(struct qca_power *qca, { int i; - qca->vreg_bulk = devm_kzalloc(qca->dev, num_vregs * + qca->vreg_bulk = devm_kcalloc(qca->dev, num_vregs, sizeof(struct regulator_bulk_data), GFP_KERNEL); if (!qca->vreg_bulk) diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index a5d5a96479bf..614ecdbb4ab7 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -410,10 +410,10 @@ static int cdrom_get_disc_info(struct cdrom_device_info *cdi, * hack to have the capability flags defined const, while we can still * change it here without gcc complaining at every line. */ -#define ENSURE(call, bits) \ -do { \ - if (cdo->call == NULL) \ - *change_capability &= ~(bits); \ +#define ENSURE(cdo, call, bits) \ +do { \ + if (cdo->call == NULL) \ + WARN_ON_ONCE((cdo)->capability & (bits)); \ } while (0) /* @@ -589,7 +589,6 @@ int register_cdrom(struct cdrom_device_info *cdi) { static char banner_printed; const struct cdrom_device_ops *cdo = cdi->ops; - int *change_capability = (int *)&cdo->capability; /* hack */ cd_dbg(CD_OPEN, "entering register_cdrom\n"); @@ -601,16 +600,16 @@ int register_cdrom(struct cdrom_device_info *cdi) cdrom_sysctl_register(); } - ENSURE(drive_status, CDC_DRIVE_STATUS); + ENSURE(cdo, drive_status, CDC_DRIVE_STATUS); if (cdo->check_events == NULL && cdo->media_changed == NULL) - *change_capability = ~(CDC_MEDIA_CHANGED | CDC_SELECT_DISC); - ENSURE(tray_move, CDC_CLOSE_TRAY | CDC_OPEN_TRAY); - ENSURE(lock_door, CDC_LOCK); - ENSURE(select_speed, CDC_SELECT_SPEED); - ENSURE(get_last_session, CDC_MULTI_SESSION); - ENSURE(get_mcn, CDC_MCN); - ENSURE(reset, CDC_RESET); - ENSURE(generic_packet, CDC_GENERIC_PACKET); + WARN_ON_ONCE(cdo->capability & (CDC_MEDIA_CHANGED | CDC_SELECT_DISC)); + ENSURE(cdo, tray_move, CDC_CLOSE_TRAY | CDC_OPEN_TRAY); + ENSURE(cdo, lock_door, CDC_LOCK); + ENSURE(cdo, select_speed, CDC_SELECT_SPEED); + ENSURE(cdo, get_last_session, CDC_MULTI_SESSION); + ENSURE(cdo, get_mcn, CDC_MCN); + ENSURE(cdo, reset, CDC_RESET); + ENSURE(cdo, generic_packet, CDC_GENERIC_PACKET); cdi->mc_flags = 0; cdi->options = CDO_USE_FFLAGS; @@ -2445,7 +2444,7 @@ static int cdrom_ioctl_select_disc(struct cdrom_device_info *cdi, return -ENOSYS; if (arg != CDSL_CURRENT && arg != CDSL_NONE) { - if ((int)arg >= cdi->capacity) + if (arg >= cdi->capacity) return -EINVAL; } diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c index ae3a7537cf0f..757e85b81879 100644 --- a/drivers/cdrom/gdrom.c +++ b/drivers/cdrom/gdrom.c @@ -31,12 +31,11 @@ #include <linux/cdrom.h> #include <linux/genhd.h> #include <linux/bio.h> -#include <linux/blkdev.h> +#include <linux/blk-mq.h> #include <linux/interrupt.h> #include <linux/device.h> #include <linux/mutex.h> #include <linux/wait.h> -#include <linux/workqueue.h> #include <linux/platform_device.h> #include <scsi/scsi.h> #include <asm/io.h> @@ -102,11 +101,6 @@ static int gdrom_major; static DECLARE_WAIT_QUEUE_HEAD(command_queue); static DECLARE_WAIT_QUEUE_HEAD(request_queue); -static DEFINE_SPINLOCK(gdrom_lock); -static void gdrom_readdisk_dma(struct work_struct *work); -static DECLARE_WORK(work, gdrom_readdisk_dma); -static LIST_HEAD(gdrom_deferred); - struct gdromtoc { unsigned int entry[99]; unsigned int first, last; @@ -122,6 +116,7 @@ static struct gdrom_unit { char disk_type; struct gdromtoc *toc; struct request_queue *gdrom_rq; + struct blk_mq_tag_set tag_set; } gd; struct gdrom_id { @@ -584,103 +579,83 @@ static int gdrom_set_interrupt_handlers(void) * 9 -> sectors >> 8 * 10 -> sectors */ -static void gdrom_readdisk_dma(struct work_struct *work) +static blk_status_t gdrom_readdisk_dma(struct request *req) { int block, block_cnt; blk_status_t err; struct packet_command *read_command; - struct list_head *elem, *next; - struct request *req; unsigned long timeout; - if (list_empty(&gdrom_deferred)) - return; read_command = kzalloc(sizeof(struct packet_command), GFP_KERNEL); if (!read_command) - return; /* get more memory later? */ + return BLK_STS_RESOURCE; + read_command->cmd[0] = 0x30; read_command->cmd[1] = 0x20; - spin_lock(&gdrom_lock); - list_for_each_safe(elem, next, &gdrom_deferred) { - req = list_entry(elem, struct request, queuelist); - spin_unlock(&gdrom_lock); - block = blk_rq_pos(req)/GD_TO_BLK + GD_SESSION_OFFSET; - block_cnt = blk_rq_sectors(req)/GD_TO_BLK; - __raw_writel(virt_to_phys(bio_data(req->bio)), GDROM_DMA_STARTADDR_REG); - __raw_writel(block_cnt * GDROM_HARD_SECTOR, GDROM_DMA_LENGTH_REG); - __raw_writel(1, GDROM_DMA_DIRECTION_REG); - __raw_writel(1, GDROM_DMA_ENABLE_REG); - read_command->cmd[2] = (block >> 16) & 0xFF; - read_command->cmd[3] = (block >> 8) & 0xFF; - read_command->cmd[4] = block & 0xFF; - read_command->cmd[8] = (block_cnt >> 16) & 0xFF; - read_command->cmd[9] = (block_cnt >> 8) & 0xFF; - read_command->cmd[10] = block_cnt & 0xFF; - /* set for DMA */ - __raw_writeb(1, GDROM_ERROR_REG); - /* other registers */ - __raw_writeb(0, GDROM_SECNUM_REG); - __raw_writeb(0, GDROM_BCL_REG); - __raw_writeb(0, GDROM_BCH_REG); - __raw_writeb(0, GDROM_DSEL_REG); - __raw_writeb(0, GDROM_INTSEC_REG); - /* Wait for registers to reset after any previous activity */ - timeout = jiffies + HZ / 2; - while (gdrom_is_busy() && time_before(jiffies, timeout)) - cpu_relax(); - __raw_writeb(GDROM_COM_PACKET, GDROM_STATUSCOMMAND_REG); - timeout = jiffies + HZ / 2; - /* Wait for packet command to finish */ - while (gdrom_is_busy() && time_before(jiffies, timeout)) - cpu_relax(); - gd.pending = 1; - gd.transfer = 1; - outsw(GDROM_DATA_REG, &read_command->cmd, 6); - timeout = jiffies + HZ / 2; - /* Wait for any pending DMA to finish */ - while (__raw_readb(GDROM_DMA_STATUS_REG) && - time_before(jiffies, timeout)) - cpu_relax(); - /* start transfer */ - __raw_writeb(1, GDROM_DMA_STATUS_REG); - wait_event_interruptible_timeout(request_queue, - gd.transfer == 0, GDROM_DEFAULT_TIMEOUT); - err = gd.transfer ? BLK_STS_IOERR : BLK_STS_OK; - gd.transfer = 0; - gd.pending = 0; - /* now seek to take the request spinlock - * before handling ending the request */ - spin_lock(&gdrom_lock); - list_del_init(&req->queuelist); - __blk_end_request_all(req, err); - } - spin_unlock(&gdrom_lock); + block = blk_rq_pos(req)/GD_TO_BLK + GD_SESSION_OFFSET; + block_cnt = blk_rq_sectors(req)/GD_TO_BLK; + __raw_writel(virt_to_phys(bio_data(req->bio)), GDROM_DMA_STARTADDR_REG); + __raw_writel(block_cnt * GDROM_HARD_SECTOR, GDROM_DMA_LENGTH_REG); + __raw_writel(1, GDROM_DMA_DIRECTION_REG); + __raw_writel(1, GDROM_DMA_ENABLE_REG); + read_command->cmd[2] = (block >> 16) & 0xFF; + read_command->cmd[3] = (block >> 8) & 0xFF; + read_command->cmd[4] = block & 0xFF; + read_command->cmd[8] = (block_cnt >> 16) & 0xFF; + read_command->cmd[9] = (block_cnt >> 8) & 0xFF; + read_command->cmd[10] = block_cnt & 0xFF; + /* set for DMA */ + __raw_writeb(1, GDROM_ERROR_REG); + /* other registers */ + __raw_writeb(0, GDROM_SECNUM_REG); + __raw_writeb(0, GDROM_BCL_REG); + __raw_writeb(0, GDROM_BCH_REG); + __raw_writeb(0, GDROM_DSEL_REG); + __raw_writeb(0, GDROM_INTSEC_REG); + /* Wait for registers to reset after any previous activity */ + timeout = jiffies + HZ / 2; + while (gdrom_is_busy() && time_before(jiffies, timeout)) + cpu_relax(); + __raw_writeb(GDROM_COM_PACKET, GDROM_STATUSCOMMAND_REG); + timeout = jiffies + HZ / 2; + /* Wait for packet command to finish */ + while (gdrom_is_busy() && time_before(jiffies, timeout)) + cpu_relax(); + gd.pending = 1; + gd.transfer = 1; + outsw(GDROM_DATA_REG, &read_command->cmd, 6); + timeout = jiffies + HZ / 2; + /* Wait for any pending DMA to finish */ + while (__raw_readb(GDROM_DMA_STATUS_REG) && + time_before(jiffies, timeout)) + cpu_relax(); + /* start transfer */ + __raw_writeb(1, GDROM_DMA_STATUS_REG); + wait_event_interruptible_timeout(request_queue, + gd.transfer == 0, GDROM_DEFAULT_TIMEOUT); + err = gd.transfer ? BLK_STS_IOERR : BLK_STS_OK; + gd.transfer = 0; + gd.pending = 0; + + blk_mq_end_request(req, err); kfree(read_command); + return BLK_STS_OK; } -static void gdrom_request(struct request_queue *rq) -{ - struct request *req; - - while ((req = blk_fetch_request(rq)) != NULL) { - switch (req_op(req)) { - case REQ_OP_READ: - /* - * Add to list of deferred work and then schedule - * workqueue. - */ - list_add_tail(&req->queuelist, &gdrom_deferred); - schedule_work(&work); - break; - case REQ_OP_WRITE: - pr_notice("Read only device - write request ignored\n"); - __blk_end_request_all(req, BLK_STS_IOERR); - break; - default: - printk(KERN_DEBUG "gdrom: Non-fs request ignored\n"); - __blk_end_request_all(req, BLK_STS_IOERR); - break; - } +static blk_status_t gdrom_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) +{ + blk_mq_start_request(bd->rq); + + switch (req_op(bd->rq)) { + case REQ_OP_READ: + return gdrom_readdisk_dma(bd->rq); + case REQ_OP_WRITE: + pr_notice("Read only device - write request ignored\n"); + return BLK_STS_IOERR; + default: + printk(KERN_DEBUG "gdrom: Non-fs request ignored\n"); + return BLK_STS_IOERR; } } @@ -768,6 +743,10 @@ static int probe_gdrom_setupqueue(void) return gdrom_init_dma_mode(); } +static const struct blk_mq_ops gdrom_mq_ops = { + .queue_rq = gdrom_queue_rq, +}; + /* * register this as a block device and as compliant with the * universal CD Rom driver interface @@ -811,11 +790,15 @@ static int probe_gdrom(struct platform_device *devptr) err = gdrom_set_interrupt_handlers(); if (err) goto probe_fail_cmdirq_register; - gd.gdrom_rq = blk_init_queue(gdrom_request, &gdrom_lock); - if (!gd.gdrom_rq) { - err = -ENOMEM; + + gd.gdrom_rq = blk_mq_init_sq_queue(&gd.tag_set, &gdrom_mq_ops, 1, + BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING); + if (IS_ERR(gd.gdrom_rq)) { + rc = PTR_ERR(gd.gdrom_rq); + gd.gdrom_rq = NULL; goto probe_fail_requestq; } + blk_queue_bounce_limit(gd.gdrom_rq, BLK_BOUNCE_HIGH); err = probe_gdrom_setupqueue(); @@ -832,6 +815,7 @@ static int probe_gdrom(struct platform_device *devptr) probe_fail_toc: blk_cleanup_queue(gd.gdrom_rq); + blk_mq_free_tag_set(&gd.tag_set); probe_fail_requestq: free_irq(HW_EVENT_GDROM_DMA, &gd); free_irq(HW_EVENT_GDROM_CMD, &gd); @@ -849,8 +833,8 @@ probe_fail_no_mem: static int remove_gdrom(struct platform_device *devptr) { - flush_work(&work); blk_cleanup_queue(gd.gdrom_rq); + blk_mq_free_tag_set(&gd.tag_set); free_irq(HW_EVENT_GDROM_CMD, &gd); free_irq(HW_EVENT_GDROM_DMA, &gd); del_gendisk(gd.disk); diff --git a/drivers/clk/sunxi-ng/ccu-sun4i-a10.c b/drivers/clk/sunxi-ng/ccu-sun4i-a10.c index ffa5dac221e4..129ebd2588fd 100644 --- a/drivers/clk/sunxi-ng/ccu-sun4i-a10.c +++ b/drivers/clk/sunxi-ng/ccu-sun4i-a10.c @@ -1434,8 +1434,16 @@ static void __init sun4i_ccu_init(struct device_node *node, return; } - /* Force the PLL-Audio-1x divider to 1 */ val = readl(reg + SUN4I_PLL_AUDIO_REG); + + /* + * Force VCO and PLL bias current to lowest setting. Higher + * settings interfere with sigma-delta modulation and result + * in audible noise and distortions when using SPDIF or I2S. + */ + val &= ~GENMASK(25, 16); + + /* Force the PLL-Audio-1x divider to 1 */ val &= ~GENMASK(29, 26); writel(val | (1 << 26), reg + SUN4I_PLL_AUDIO_REG); diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index d8c7f5750cdb..9a7d4dc00b6e 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -319,6 +319,13 @@ static u64 notrace arm64_858921_read_cntvct_el0(void) } #endif +#ifdef CONFIG_ARM64_ERRATUM_1188873 +static u64 notrace arm64_1188873_read_cntvct_el0(void) +{ + return read_sysreg(cntvct_el0); +} +#endif + #ifdef CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND DEFINE_PER_CPU(const struct arch_timer_erratum_workaround *, timer_unstable_counter_workaround); EXPORT_SYMBOL_GPL(timer_unstable_counter_workaround); @@ -408,6 +415,14 @@ static const struct arch_timer_erratum_workaround ool_workarounds[] = { .read_cntvct_el0 = arm64_858921_read_cntvct_el0, }, #endif +#ifdef CONFIG_ARM64_ERRATUM_1188873 + { + .match_type = ate_match_local_cap_id, + .id = (void *)ARM64_WORKAROUND_1188873, + .desc = "ARM erratum 1188873", + .read_cntvct_el0 = arm64_1188873_read_cntvct_el0, + }, +#endif }; typedef bool (*ate_match_fn_t)(const struct arch_timer_erratum_workaround *, diff --git a/drivers/crypto/inside-secure/safexcel.c b/drivers/crypto/inside-secure/safexcel.c index 7e71043457a6..86c699c14f84 100644 --- a/drivers/crypto/inside-secure/safexcel.c +++ b/drivers/crypto/inside-secure/safexcel.c @@ -1044,7 +1044,8 @@ static int safexcel_probe(struct platform_device *pdev) safexcel_configure(priv); - priv->ring = devm_kzalloc(dev, priv->config.rings * sizeof(*priv->ring), + priv->ring = devm_kcalloc(dev, priv->config.rings, + sizeof(*priv->ring), GFP_KERNEL); if (!priv->ring) { ret = -ENOMEM; @@ -1063,8 +1064,9 @@ static int safexcel_probe(struct platform_device *pdev) if (ret) goto err_reg_clk; - priv->ring[i].rdr_req = devm_kzalloc(dev, - sizeof(priv->ring[i].rdr_req) * EIP197_DEFAULT_RING_SIZE, + priv->ring[i].rdr_req = devm_kcalloc(dev, + EIP197_DEFAULT_RING_SIZE, + sizeof(priv->ring[i].rdr_req), GFP_KERNEL); if (!priv->ring[i].rdr_req) { ret = -ENOMEM; diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index a57300c1d649..25187403e3ac 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -1682,7 +1682,8 @@ static void gpiochip_set_cascaded_irqchip(struct gpio_chip *gpiochip, irq_set_chained_handler_and_data(parent_irq, parent_handler, gpiochip); - gpiochip->irq.parents = &parent_irq; + gpiochip->irq.parent_irq = parent_irq; + gpiochip->irq.parents = &gpiochip->irq.parent_irq; gpiochip->irq.num_parents = 1; } diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index 018fcdb353d2..281cf9cbb44c 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -174,6 +174,11 @@ void drm_atomic_state_default_clear(struct drm_atomic_state *state) state->crtcs[i].state = NULL; state->crtcs[i].old_state = NULL; state->crtcs[i].new_state = NULL; + + if (state->crtcs[i].commit) { + drm_crtc_commit_put(state->crtcs[i].commit); + state->crtcs[i].commit = NULL; + } } for (i = 0; i < config->num_total_plane; i++) { diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index 80be74df7ba6..1bb4c318bdd4 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -1408,15 +1408,16 @@ EXPORT_SYMBOL(drm_atomic_helper_wait_for_vblanks); void drm_atomic_helper_wait_for_flip_done(struct drm_device *dev, struct drm_atomic_state *old_state) { - struct drm_crtc_state *new_crtc_state; struct drm_crtc *crtc; int i; - for_each_new_crtc_in_state(old_state, crtc, new_crtc_state, i) { - struct drm_crtc_commit *commit = new_crtc_state->commit; + for (i = 0; i < dev->mode_config.num_crtc; i++) { + struct drm_crtc_commit *commit = old_state->crtcs[i].commit; int ret; - if (!commit) + crtc = old_state->crtcs[i].ptr; + + if (!crtc || !commit) continue; ret = wait_for_completion_timeout(&commit->flip_done, 10 * HZ); @@ -1934,6 +1935,9 @@ int drm_atomic_helper_setup_commit(struct drm_atomic_state *state, drm_crtc_commit_get(commit); commit->abort_completion = true; + + state->crtcs[i].commit = commit; + drm_crtc_commit_get(commit); } for_each_oldnew_connector_in_state(state, conn, old_conn_state, new_conn_state, i) { diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index bae43938c8f6..9cbe8f5c9aca 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -567,9 +567,9 @@ int drm_mode_setcrtc(struct drm_device *dev, void *data, struct drm_mode_crtc *crtc_req = data; struct drm_crtc *crtc; struct drm_plane *plane; - struct drm_connector **connector_set = NULL, *connector; - struct drm_framebuffer *fb = NULL; - struct drm_display_mode *mode = NULL; + struct drm_connector **connector_set, *connector; + struct drm_framebuffer *fb; + struct drm_display_mode *mode; struct drm_mode_set set; uint32_t __user *set_connectors_ptr; struct drm_modeset_acquire_ctx ctx; @@ -598,6 +598,10 @@ int drm_mode_setcrtc(struct drm_device *dev, void *data, mutex_lock(&crtc->dev->mode_config.mutex); drm_modeset_acquire_init(&ctx, DRM_MODESET_ACQUIRE_INTERRUPTIBLE); retry: + connector_set = NULL; + fb = NULL; + mode = NULL; + ret = drm_modeset_lock_all_ctx(crtc->dev, &ctx); if (ret) goto out; diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 3c9fc99648b7..ff0bfc65a8c1 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -113,6 +113,9 @@ static const struct edid_quirk { /* AEO model 0 reports 8 bpc, but is a 6 bpc panel */ { "AEO", 0, EDID_QUIRK_FORCE_6BPC }, + /* BOE model on HP Pavilion 15-n233sl reports 8 bpc, but is a 6 bpc panel */ + { "BOE", 0x78b, EDID_QUIRK_FORCE_6BPC }, + /* CPT panel of Asus UX303LA reports 8 bpc, but is a 6 bpc panel */ { "CPT", 0x17df, EDID_QUIRK_FORCE_6BPC }, @@ -4279,7 +4282,7 @@ static void drm_parse_ycbcr420_deep_color_info(struct drm_connector *connector, struct drm_hdmi_info *hdmi = &connector->display_info.hdmi; dc_mask = db[7] & DRM_EDID_YCBCR420_DC_MASK; - hdmi->y420_dc_modes |= dc_mask; + hdmi->y420_dc_modes = dc_mask; } static void drm_parse_hdmi_forum_vsdb(struct drm_connector *connector, diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index 515a7aec57ac..9628dd617826 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -1580,6 +1580,25 @@ unlock: } EXPORT_SYMBOL(drm_fb_helper_ioctl); +static bool drm_fb_pixel_format_equal(const struct fb_var_screeninfo *var_1, + const struct fb_var_screeninfo *var_2) +{ + return var_1->bits_per_pixel == var_2->bits_per_pixel && + var_1->grayscale == var_2->grayscale && + var_1->red.offset == var_2->red.offset && + var_1->red.length == var_2->red.length && + var_1->red.msb_right == var_2->red.msb_right && + var_1->green.offset == var_2->green.offset && + var_1->green.length == var_2->green.length && + var_1->green.msb_right == var_2->green.msb_right && + var_1->blue.offset == var_2->blue.offset && + var_1->blue.length == var_2->blue.length && + var_1->blue.msb_right == var_2->blue.msb_right && + var_1->transp.offset == var_2->transp.offset && + var_1->transp.length == var_2->transp.length && + var_1->transp.msb_right == var_2->transp.msb_right; +} + /** * drm_fb_helper_check_var - implementation for &fb_ops.fb_check_var * @var: screeninfo to check @@ -1590,7 +1609,6 @@ int drm_fb_helper_check_var(struct fb_var_screeninfo *var, { struct drm_fb_helper *fb_helper = info->par; struct drm_framebuffer *fb = fb_helper->fb; - int depth; if (var->pixclock != 0 || in_dbg_master()) return -EINVAL; @@ -1610,72 +1628,15 @@ int drm_fb_helper_check_var(struct fb_var_screeninfo *var, return -EINVAL; } - switch (var->bits_per_pixel) { - case 16: - depth = (var->green.length == 6) ? 16 : 15; - break; - case 32: - depth = (var->transp.length > 0) ? 32 : 24; - break; - default: - depth = var->bits_per_pixel; - break; - } - - switch (depth) { - case 8: - var->red.offset = 0; - var->green.offset = 0; - var->blue.offset = 0; - var->red.length = 8; - var->green.length = 8; - var->blue.length = 8; - var->transp.length = 0; - var->transp.offset = 0; - break; - case 15: - var->red.offset = 10; - var->green.offset = 5; - var->blue.offset = 0; - var->red.length = 5; - var->green.length = 5; - var->blue.length = 5; - var->transp.length = 1; - var->transp.offset = 15; - break; - case 16: - var->red.offset = 11; - var->green.offset = 5; - var->blue.offset = 0; - var->red.length = 5; - var->green.length = 6; - var->blue.length = 5; - var->transp.length = 0; - var->transp.offset = 0; - break; - case 24: - var->red.offset = 16; - var->green.offset = 8; - var->blue.offset = 0; - var->red.length = 8; - var->green.length = 8; - var->blue.length = 8; - var->transp.length = 0; - var->transp.offset = 0; - break; - case 32: - var->red.offset = 16; - var->green.offset = 8; - var->blue.offset = 0; - var->red.length = 8; - var->green.length = 8; - var->blue.length = 8; - var->transp.length = 8; - var->transp.offset = 24; - break; - default: + /* + * drm fbdev emulation doesn't support changing the pixel format at all, + * so reject all pixel format changing requests. + */ + if (!drm_fb_pixel_format_equal(var, &info->var)) { + DRM_DEBUG("fbdev emulation doesn't support changing the pixel format\n"); return -EINVAL; } + return 0; } EXPORT_SYMBOL(drm_fb_helper_check_var); diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c index 0b976dfd04df..92ecb9bf982c 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c @@ -600,7 +600,7 @@ int mtk_drm_crtc_create(struct drm_device *drm_dev, } mtk_crtc->layer_nr = mtk_ddp_comp_layer_nr(mtk_crtc->ddp_comp[0]); - mtk_crtc->planes = devm_kzalloc(dev, mtk_crtc->layer_nr * + mtk_crtc->planes = devm_kcalloc(dev, mtk_crtc->layer_nr, sizeof(struct drm_plane), GFP_KERNEL); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_io_util.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_io_util.c index 790d39f816dc..b557687b1964 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_io_util.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_io_util.c @@ -153,8 +153,8 @@ int msm_dss_parse_clock(struct platform_device *pdev, return 0; } - mp->clk_config = devm_kzalloc(&pdev->dev, - sizeof(struct dss_clk) * num_clk, + mp->clk_config = devm_kcalloc(&pdev->dev, + num_clk, sizeof(struct dss_clk), GFP_KERNEL); if (!mp->clk_config) return -ENOMEM; diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c index 5691dfa1db6f..041e7daf8a33 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/disp.c +++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c @@ -900,9 +900,22 @@ static enum drm_connector_status nv50_mstc_detect(struct drm_connector *connector, bool force) { struct nv50_mstc *mstc = nv50_mstc(connector); + enum drm_connector_status conn_status; + int ret; + if (!mstc->port) return connector_status_disconnected; - return drm_dp_mst_detect_port(connector, mstc->port->mgr, mstc->port); + + ret = pm_runtime_get_sync(connector->dev->dev); + if (ret < 0 && ret != -EACCES) + return connector_status_disconnected; + + conn_status = drm_dp_mst_detect_port(connector, mstc->port->mgr, + mstc->port); + + pm_runtime_mark_last_busy(connector->dev->dev); + pm_runtime_put_autosuspend(connector->dev->dev); + return conn_status; } static void diff --git a/drivers/gpu/drm/sun4i/sun4i_dotclock.c b/drivers/gpu/drm/sun4i/sun4i_dotclock.c index e36004fbe453..2a15f2f9271e 100644 --- a/drivers/gpu/drm/sun4i/sun4i_dotclock.c +++ b/drivers/gpu/drm/sun4i/sun4i_dotclock.c @@ -81,9 +81,19 @@ static long sun4i_dclk_round_rate(struct clk_hw *hw, unsigned long rate, int i; for (i = tcon->dclk_min_div; i <= tcon->dclk_max_div; i++) { - unsigned long ideal = rate * i; + u64 ideal = (u64)rate * i; unsigned long rounded; + /* + * ideal has overflowed the max value that can be stored in an + * unsigned long, and every clk operation we might do on a + * truncated u64 value will give us incorrect results. + * Let's just stop there since bigger dividers will result in + * the same overflow issue. + */ + if (ideal > ULONG_MAX) + goto out; + rounded = clk_hw_round_rate(clk_hw_get_parent(hw), ideal); diff --git a/drivers/hwmon/npcm750-pwm-fan.c b/drivers/hwmon/npcm750-pwm-fan.c index c0fab54c0094..b3b907bdfb63 100644 --- a/drivers/hwmon/npcm750-pwm-fan.c +++ b/drivers/hwmon/npcm750-pwm-fan.c @@ -908,7 +908,7 @@ static int npcm7xx_en_pwm_fan(struct device *dev, if (fan_cnt < 1) return -EINVAL; - fan_ch = devm_kzalloc(dev, sizeof(*fan_ch) * fan_cnt, GFP_KERNEL); + fan_ch = devm_kcalloc(dev, fan_cnt, sizeof(*fan_ch), GFP_KERNEL); if (!fan_ch) return -ENOMEM; diff --git a/drivers/i2c/busses/i2c-rcar.c b/drivers/i2c/busses/i2c-rcar.c index 52cf42b32f0a..4aa7dde876f3 100644 --- a/drivers/i2c/busses/i2c-rcar.c +++ b/drivers/i2c/busses/i2c-rcar.c @@ -806,8 +806,12 @@ static int rcar_i2c_master_xfer(struct i2c_adapter *adap, time_left = wait_event_timeout(priv->wait, priv->flags & ID_DONE, num * adap->timeout); - if (!time_left) { + + /* cleanup DMA if it couldn't complete properly due to an error */ + if (priv->dma_direction != DMA_NONE) rcar_i2c_cleanup_dma(priv); + + if (!time_left) { rcar_i2c_init(priv); ret = -ETIMEDOUT; } else if (priv->flags & ID_NACK) { diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index 9ee9a15e7134..9200e349f29e 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -2270,7 +2270,7 @@ EXPORT_SYMBOL(i2c_put_adapter); * * Return: NULL if a DMA safe buffer was not obtained. Use msg->buf with PIO. * Or a valid pointer to be used with DMA. After use, release it by - * calling i2c_release_dma_safe_msg_buf(). + * calling i2c_put_dma_safe_msg_buf(). * * This function must only be called from process context! */ diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 44a7a255ef74..f9b59d41813f 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -1784,7 +1784,7 @@ static int ide_cd_probe(ide_drive_t *drive) ide_cd_read_toc(drive); g->fops = &idecd_ops; g->flags |= GENHD_FL_REMOVABLE | GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE; - device_add_disk(&drive->gendev, g); + device_add_disk(&drive->gendev, g, NULL); return 0; out_free_disk: diff --git a/drivers/ide/ide-gd.c b/drivers/ide/ide-gd.c index e823394ed543..04e008e8f6f9 100644 --- a/drivers/ide/ide-gd.c +++ b/drivers/ide/ide-gd.c @@ -416,7 +416,7 @@ static int ide_gd_probe(ide_drive_t *drive) if (drive->dev_flags & IDE_DFLAG_REMOVABLE) g->flags = GENHD_FL_REMOVABLE; g->fops = &ide_gd_ops; - device_add_disk(&drive->gendev, g); + device_add_disk(&drive->gendev, g, NULL); return 0; out_free_disk: diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index faa9e6116b2f..73332b9a25b5 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -46,6 +46,8 @@ #include <linux/mutex.h> #include <linux/slab.h> +#include <linux/nospec.h> + #include <linux/uaccess.h> #include <rdma/ib.h> @@ -1120,6 +1122,7 @@ static ssize_t ib_ucm_write(struct file *filp, const char __user *buf, if (hdr.cmd >= ARRAY_SIZE(ucm_cmd_table)) return -EINVAL; + hdr.cmd = array_index_nospec(hdr.cmd, ARRAY_SIZE(ucm_cmd_table)); if (hdr.in + sizeof(hdr) > len) return -EINVAL; diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 21863ddde63e..01d68ed46c1b 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -44,6 +44,8 @@ #include <linux/module.h> #include <linux/nsproxy.h> +#include <linux/nospec.h> + #include <rdma/rdma_user_cm.h> #include <rdma/ib_marshall.h> #include <rdma/rdma_cm.h> @@ -1676,6 +1678,7 @@ static ssize_t ucma_write(struct file *filp, const char __user *buf, if (hdr.cmd >= ARRAY_SIZE(ucma_cmd_table)) return -EINVAL; + hdr.cmd = array_index_nospec(hdr.cmd, ARRAY_SIZE(ucma_cmd_table)); if (hdr.in + sizeof(hdr) > len) return -EINVAL; diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 9fb1d9cb9401..e22314837645 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -544,6 +544,9 @@ void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) int shrink = 0; int c; + if (!mr->allocated_from_cache) + return; + c = order2idx(dev, mr->order); if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) { mlx5_ib_warn(dev, "order %d, cache index %d\n", mr->order, c); @@ -1647,18 +1650,19 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) umem = NULL; } #endif - clean_mr(dev, mr); + /* + * We should unregister the DMA address from the HCA before + * remove the DMA mapping. + */ + mlx5_mr_cache_free(dev, mr); if (umem) { ib_umem_release(umem); atomic_sub(npages, &dev->mdev->priv.reg_pages); } - if (!mr->allocated_from_cache) kfree(mr); - else - mlx5_mr_cache_free(dev, mr); } int mlx5_ib_dereg_mr(struct ib_mr *ibmr) diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c index 370206f987f9..f48369d6f3a0 100644 --- a/drivers/input/evdev.c +++ b/drivers/input/evdev.c @@ -564,6 +564,7 @@ static ssize_t evdev_write(struct file *file, const char __user *buffer, input_inject_event(&evdev->handle, event.type, event.code, event.value); + cond_resched(); } out: diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index cd620e009bad..d4b9db487b16 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -231,6 +231,7 @@ static const struct xpad_device { { 0x0e6f, 0x0246, "Rock Candy Gamepad for Xbox One 2015", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x02ab, "PDP Controller for Xbox One", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x02a4, "PDP Wired Controller for Xbox One - Stealth Series", 0, XTYPE_XBOXONE }, + { 0x0e6f, 0x02a6, "PDP Wired Controller for Xbox One - Camo Series", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x0301, "Logic3 Controller", 0, XTYPE_XBOX360 }, { 0x0e6f, 0x0346, "Rock Candy Gamepad for Xbox One 2016", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x0401, "Logic3 Controller", 0, XTYPE_XBOX360 }, @@ -530,6 +531,8 @@ static const struct xboxone_init_packet xboxone_init_packets[] = { XBOXONE_INIT_PKT(0x0e6f, 0x02ab, xboxone_pdp_init2), XBOXONE_INIT_PKT(0x0e6f, 0x02a4, xboxone_pdp_init1), XBOXONE_INIT_PKT(0x0e6f, 0x02a4, xboxone_pdp_init2), + XBOXONE_INIT_PKT(0x0e6f, 0x02a6, xboxone_pdp_init1), + XBOXONE_INIT_PKT(0x0e6f, 0x02a6, xboxone_pdp_init2), XBOXONE_INIT_PKT(0x24c6, 0x541a, xboxone_rumblebegin_init), XBOXONE_INIT_PKT(0x24c6, 0x542a, xboxone_rumblebegin_init), XBOXONE_INIT_PKT(0x24c6, 0x543a, xboxone_rumblebegin_init), diff --git a/drivers/input/misc/uinput.c b/drivers/input/misc/uinput.c index eb14ddf69346..8ec483e8688b 100644 --- a/drivers/input/misc/uinput.c +++ b/drivers/input/misc/uinput.c @@ -598,6 +598,7 @@ static ssize_t uinput_inject_events(struct uinput_device *udev, input_event(udev->dev, ev.type, ev.code, ev.value); bytes += input_event_size(); + cond_resched(); } return bytes; diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c index f5ae24865355..b0f9d19b3410 100644 --- a/drivers/input/mouse/elan_i2c_core.c +++ b/drivers/input/mouse/elan_i2c_core.c @@ -1346,6 +1346,7 @@ static const struct acpi_device_id elan_acpi_id[] = { { "ELAN0611", 0 }, { "ELAN0612", 0 }, { "ELAN0618", 0 }, + { "ELAN061C", 0 }, { "ELAN061D", 0 }, { "ELAN0622", 0 }, { "ELAN1000", 0 }, diff --git a/drivers/input/mousedev.c b/drivers/input/mousedev.c index e08228061bcd..412fa71245af 100644 --- a/drivers/input/mousedev.c +++ b/drivers/input/mousedev.c @@ -707,6 +707,7 @@ static ssize_t mousedev_write(struct file *file, const char __user *buffer, mousedev_generate_response(client, c); spin_unlock_irq(&client->packet_lock); + cond_resched(); } kill_fasync(&client->fasync, SIGIO, POLL_IN); diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c index b8bc71569349..95a78ccbd847 100644 --- a/drivers/input/serio/i8042.c +++ b/drivers/input/serio/i8042.c @@ -1395,15 +1395,26 @@ static void __init i8042_register_ports(void) for (i = 0; i < I8042_NUM_PORTS; i++) { struct serio *serio = i8042_ports[i].serio; - if (serio) { - printk(KERN_INFO "serio: %s at %#lx,%#lx irq %d\n", - serio->name, - (unsigned long) I8042_DATA_REG, - (unsigned long) I8042_COMMAND_REG, - i8042_ports[i].irq); - serio_register_port(serio); - device_set_wakeup_capable(&serio->dev, true); - } + if (!serio) + continue; + + printk(KERN_INFO "serio: %s at %#lx,%#lx irq %d\n", + serio->name, + (unsigned long) I8042_DATA_REG, + (unsigned long) I8042_COMMAND_REG, + i8042_ports[i].irq); + serio_register_port(serio); + device_set_wakeup_capable(&serio->dev, true); + + /* + * On platforms using suspend-to-idle, allow the keyboard to + * wake up the system from sleep by enabling keyboard wakeups + * by default. This is consistent with keyboard wakeup + * behavior on many platforms using suspend-to-RAM (ACPI S3) + * by default. + */ + if (pm_suspend_via_s2idle() && i == I8042_KBD_PORT_NO) + device_set_wakeup_enable(&serio->dev, true); } } diff --git a/drivers/lightnvm/Kconfig b/drivers/lightnvm/Kconfig index 439bf90d084d..a872cd720967 100644 --- a/drivers/lightnvm/Kconfig +++ b/drivers/lightnvm/Kconfig @@ -4,8 +4,7 @@ menuconfig NVM bool "Open-Channel SSD target support" - depends on BLOCK && PCI - select BLK_DEV_NVME + depends on BLOCK help Say Y here to get to enable Open-channel SSDs. diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c index 60aa7bc5a630..efb976a863d2 100644 --- a/drivers/lightnvm/core.c +++ b/drivers/lightnvm/core.c @@ -355,6 +355,11 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create) return -EINVAL; } + if ((tt->flags & NVM_TGT_F_HOST_L2P) != (dev->geo.dom & NVM_RSP_L2P)) { + pr_err("nvm: device is incompatible with target L2P type.\n"); + return -EINVAL; + } + if (nvm_target_exists(create->tgtname)) { pr_err("nvm: target name already exists (%s)\n", create->tgtname); @@ -598,22 +603,16 @@ static void nvm_ppa_dev_to_tgt(struct nvm_tgt_dev *tgt_dev, static void nvm_rq_tgt_to_dev(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd) { - if (rqd->nr_ppas == 1) { - nvm_ppa_tgt_to_dev(tgt_dev, &rqd->ppa_addr, 1); - return; - } + struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd); - nvm_ppa_tgt_to_dev(tgt_dev, rqd->ppa_list, rqd->nr_ppas); + nvm_ppa_tgt_to_dev(tgt_dev, ppa_list, rqd->nr_ppas); } static void nvm_rq_dev_to_tgt(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd) { - if (rqd->nr_ppas == 1) { - nvm_ppa_dev_to_tgt(tgt_dev, &rqd->ppa_addr, 1); - return; - } + struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd); - nvm_ppa_dev_to_tgt(tgt_dev, rqd->ppa_list, rqd->nr_ppas); + nvm_ppa_dev_to_tgt(tgt_dev, ppa_list, rqd->nr_ppas); } int nvm_register_tgt_type(struct nvm_tgt_type *tt) @@ -712,45 +711,23 @@ static void nvm_free_rqd_ppalist(struct nvm_tgt_dev *tgt_dev, nvm_dev_dma_free(tgt_dev->parent, rqd->ppa_list, rqd->dma_ppa_list); } -int nvm_get_chunk_meta(struct nvm_tgt_dev *tgt_dev, struct nvm_chk_meta *meta, - struct ppa_addr ppa, int nchks) +static int nvm_set_flags(struct nvm_geo *geo, struct nvm_rq *rqd) { - struct nvm_dev *dev = tgt_dev->parent; - - nvm_ppa_tgt_to_dev(tgt_dev, &ppa, 1); - - return dev->ops->get_chk_meta(tgt_dev->parent, meta, - (sector_t)ppa.ppa, nchks); -} -EXPORT_SYMBOL(nvm_get_chunk_meta); - -int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas, - int nr_ppas, int type) -{ - struct nvm_dev *dev = tgt_dev->parent; - struct nvm_rq rqd; - int ret; + int flags = 0; - if (nr_ppas > NVM_MAX_VLBA) { - pr_err("nvm: unable to update all blocks atomically\n"); - return -EINVAL; - } + if (geo->version == NVM_OCSSD_SPEC_20) + return 0; - memset(&rqd, 0, sizeof(struct nvm_rq)); + if (rqd->is_seq) + flags |= geo->pln_mode >> 1; - nvm_set_rqd_ppalist(tgt_dev, &rqd, ppas, nr_ppas); - nvm_rq_tgt_to_dev(tgt_dev, &rqd); + if (rqd->opcode == NVM_OP_PREAD) + flags |= (NVM_IO_SCRAMBLE_ENABLE | NVM_IO_SUSPEND); + else if (rqd->opcode == NVM_OP_PWRITE) + flags |= NVM_IO_SCRAMBLE_ENABLE; - ret = dev->ops->set_bb_tbl(dev, &rqd.ppa_addr, rqd.nr_ppas, type); - nvm_free_rqd_ppalist(tgt_dev, &rqd); - if (ret) { - pr_err("nvm: failed bb mark\n"); - return -EINVAL; - } - - return 0; + return flags; } -EXPORT_SYMBOL(nvm_set_tgt_bb_tbl); int nvm_submit_io(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd) { @@ -763,6 +740,7 @@ int nvm_submit_io(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd) nvm_rq_tgt_to_dev(tgt_dev, rqd); rqd->dev = tgt_dev; + rqd->flags = nvm_set_flags(&tgt_dev->geo, rqd); /* In case of error, fail with right address format */ ret = dev->ops->submit_io(dev, rqd); @@ -783,6 +761,7 @@ int nvm_submit_io_sync(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd) nvm_rq_tgt_to_dev(tgt_dev, rqd); rqd->dev = tgt_dev; + rqd->flags = nvm_set_flags(&tgt_dev->geo, rqd); /* In case of error, fail with right address format */ ret = dev->ops->submit_io_sync(dev, rqd); @@ -805,27 +784,159 @@ void nvm_end_io(struct nvm_rq *rqd) } EXPORT_SYMBOL(nvm_end_io); +static int nvm_submit_io_sync_raw(struct nvm_dev *dev, struct nvm_rq *rqd) +{ + if (!dev->ops->submit_io_sync) + return -ENODEV; + + rqd->flags = nvm_set_flags(&dev->geo, rqd); + + return dev->ops->submit_io_sync(dev, rqd); +} + +static int nvm_bb_chunk_sense(struct nvm_dev *dev, struct ppa_addr ppa) +{ + struct nvm_rq rqd = { NULL }; + struct bio bio; + struct bio_vec bio_vec; + struct page *page; + int ret; + + page = alloc_page(GFP_KERNEL); + if (!page) + return -ENOMEM; + + bio_init(&bio, &bio_vec, 1); + bio_add_page(&bio, page, PAGE_SIZE, 0); + bio_set_op_attrs(&bio, REQ_OP_READ, 0); + + rqd.bio = &bio; + rqd.opcode = NVM_OP_PREAD; + rqd.is_seq = 1; + rqd.nr_ppas = 1; + rqd.ppa_addr = generic_to_dev_addr(dev, ppa); + + ret = nvm_submit_io_sync_raw(dev, &rqd); + if (ret) + return ret; + + __free_page(page); + + return rqd.error; +} + /* - * folds a bad block list from its plane representation to its virtual - * block representation. The fold is done in place and reduced size is - * returned. - * - * If any of the planes status are bad or grown bad block, the virtual block - * is marked bad. If not bad, the first plane state acts as the block state. + * Scans a 1.2 chunk first and last page to determine if its state. + * If the chunk is found to be open, also scan it to update the write + * pointer. */ -int nvm_bb_tbl_fold(struct nvm_dev *dev, u8 *blks, int nr_blks) +static int nvm_bb_chunk_scan(struct nvm_dev *dev, struct ppa_addr ppa, + struct nvm_chk_meta *meta) { struct nvm_geo *geo = &dev->geo; - int blk, offset, pl, blktype; + int ret, pg, pl; - if (nr_blks != geo->num_chk * geo->pln_mode) - return -EINVAL; + /* sense first page */ + ret = nvm_bb_chunk_sense(dev, ppa); + if (ret < 0) /* io error */ + return ret; + else if (ret == 0) /* valid data */ + meta->state = NVM_CHK_ST_OPEN; + else if (ret > 0) { + /* + * If empty page, the chunk is free, else it is an + * actual io error. In that case, mark it offline. + */ + switch (ret) { + case NVM_RSP_ERR_EMPTYPAGE: + meta->state = NVM_CHK_ST_FREE; + return 0; + case NVM_RSP_ERR_FAILCRC: + case NVM_RSP_ERR_FAILECC: + case NVM_RSP_WARN_HIGHECC: + meta->state = NVM_CHK_ST_OPEN; + goto scan; + default: + return -ret; /* other io error */ + } + } + + /* sense last page */ + ppa.g.pg = geo->num_pg - 1; + ppa.g.pl = geo->num_pln - 1; + + ret = nvm_bb_chunk_sense(dev, ppa); + if (ret < 0) /* io error */ + return ret; + else if (ret == 0) { /* Chunk fully written */ + meta->state = NVM_CHK_ST_CLOSED; + meta->wp = geo->clba; + return 0; + } else if (ret > 0) { + switch (ret) { + case NVM_RSP_ERR_EMPTYPAGE: + case NVM_RSP_ERR_FAILCRC: + case NVM_RSP_ERR_FAILECC: + case NVM_RSP_WARN_HIGHECC: + meta->state = NVM_CHK_ST_OPEN; + break; + default: + return -ret; /* other io error */ + } + } + +scan: + /* + * chunk is open, we scan sequentially to update the write pointer. + * We make the assumption that targets write data across all planes + * before moving to the next page. + */ + for (pg = 0; pg < geo->num_pg; pg++) { + for (pl = 0; pl < geo->num_pln; pl++) { + ppa.g.pg = pg; + ppa.g.pl = pl; + + ret = nvm_bb_chunk_sense(dev, ppa); + if (ret < 0) /* io error */ + return ret; + else if (ret == 0) { + meta->wp += geo->ws_min; + } else if (ret > 0) { + switch (ret) { + case NVM_RSP_ERR_EMPTYPAGE: + return 0; + case NVM_RSP_ERR_FAILCRC: + case NVM_RSP_ERR_FAILECC: + case NVM_RSP_WARN_HIGHECC: + meta->wp += geo->ws_min; + break; + default: + return -ret; /* other io error */ + } + } + } + } + + return 0; +} + +/* + * folds a bad block list from its plane representation to its + * chunk representation. + * + * If any of the planes status are bad or grown bad, the chunk is marked + * offline. If not bad, the first plane state acts as the chunk state. + */ +static int nvm_bb_to_chunk(struct nvm_dev *dev, struct ppa_addr ppa, + u8 *blks, int nr_blks, struct nvm_chk_meta *meta) +{ + struct nvm_geo *geo = &dev->geo; + int ret, blk, pl, offset, blktype; for (blk = 0; blk < geo->num_chk; blk++) { offset = blk * geo->pln_mode; blktype = blks[offset]; - /* Bad blocks on any planes take precedence over other types */ for (pl = 0; pl < geo->pln_mode; pl++) { if (blks[offset + pl] & (NVM_BLK_T_BAD|NVM_BLK_T_GRWN_BAD)) { @@ -834,23 +945,124 @@ int nvm_bb_tbl_fold(struct nvm_dev *dev, u8 *blks, int nr_blks) } } - blks[blk] = blktype; + ppa.g.blk = blk; + + meta->wp = 0; + meta->type = NVM_CHK_TP_W_SEQ; + meta->wi = 0; + meta->slba = generic_to_dev_addr(dev, ppa).ppa; + meta->cnlb = dev->geo.clba; + + if (blktype == NVM_BLK_T_FREE) { + ret = nvm_bb_chunk_scan(dev, ppa, meta); + if (ret) + return ret; + } else { + meta->state = NVM_CHK_ST_OFFLINE; + } + + meta++; } - return geo->num_chk; + return 0; +} + +static int nvm_get_bb_meta(struct nvm_dev *dev, sector_t slba, + int nchks, struct nvm_chk_meta *meta) +{ + struct nvm_geo *geo = &dev->geo; + struct ppa_addr ppa; + u8 *blks; + int ch, lun, nr_blks; + int ret; + + ppa.ppa = slba; + ppa = dev_to_generic_addr(dev, ppa); + + if (ppa.g.blk != 0) + return -EINVAL; + + if ((nchks % geo->num_chk) != 0) + return -EINVAL; + + nr_blks = geo->num_chk * geo->pln_mode; + + blks = kmalloc(nr_blks, GFP_KERNEL); + if (!blks) + return -ENOMEM; + + for (ch = ppa.g.ch; ch < geo->num_ch; ch++) { + for (lun = ppa.g.lun; lun < geo->num_lun; lun++) { + struct ppa_addr ppa_gen, ppa_dev; + + if (!nchks) + goto done; + + ppa_gen.ppa = 0; + ppa_gen.g.ch = ch; + ppa_gen.g.lun = lun; + ppa_dev = generic_to_dev_addr(dev, ppa_gen); + + ret = dev->ops->get_bb_tbl(dev, ppa_dev, blks); + if (ret) + goto done; + + ret = nvm_bb_to_chunk(dev, ppa_gen, blks, nr_blks, + meta); + if (ret) + goto done; + + meta += geo->num_chk; + nchks -= geo->num_chk; + } + } +done: + kfree(blks); + return ret; } -EXPORT_SYMBOL(nvm_bb_tbl_fold); -int nvm_get_tgt_bb_tbl(struct nvm_tgt_dev *tgt_dev, struct ppa_addr ppa, - u8 *blks) +int nvm_get_chunk_meta(struct nvm_tgt_dev *tgt_dev, struct ppa_addr ppa, + int nchks, struct nvm_chk_meta *meta) { struct nvm_dev *dev = tgt_dev->parent; nvm_ppa_tgt_to_dev(tgt_dev, &ppa, 1); - return dev->ops->get_bb_tbl(dev, ppa, blks); + if (dev->geo.version == NVM_OCSSD_SPEC_12) + return nvm_get_bb_meta(dev, (sector_t)ppa.ppa, nchks, meta); + + return dev->ops->get_chk_meta(dev, (sector_t)ppa.ppa, nchks, meta); +} +EXPORT_SYMBOL_GPL(nvm_get_chunk_meta); + +int nvm_set_chunk_meta(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas, + int nr_ppas, int type) +{ + struct nvm_dev *dev = tgt_dev->parent; + struct nvm_rq rqd; + int ret; + + if (dev->geo.version == NVM_OCSSD_SPEC_20) + return 0; + + if (nr_ppas > NVM_MAX_VLBA) { + pr_err("nvm: unable to update all blocks atomically\n"); + return -EINVAL; + } + + memset(&rqd, 0, sizeof(struct nvm_rq)); + + nvm_set_rqd_ppalist(tgt_dev, &rqd, ppas, nr_ppas); + nvm_rq_tgt_to_dev(tgt_dev, &rqd); + + ret = dev->ops->set_bb_tbl(dev, &rqd.ppa_addr, rqd.nr_ppas, type); + nvm_free_rqd_ppalist(tgt_dev, &rqd); + if (ret) + return -EINVAL; + + return 0; } -EXPORT_SYMBOL(nvm_get_tgt_bb_tbl); +EXPORT_SYMBOL_GPL(nvm_set_chunk_meta); static int nvm_core_init(struct nvm_dev *dev) { diff --git a/drivers/lightnvm/pblk-cache.c b/drivers/lightnvm/pblk-cache.c index f565a56b898a..c9fa26f95659 100644 --- a/drivers/lightnvm/pblk-cache.c +++ b/drivers/lightnvm/pblk-cache.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2016 CNEX Labs * Initial release: Javier Gonzalez <javier@cnexlabs.com> diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c index 00984b486fea..6944aac43b01 100644 --- a/drivers/lightnvm/pblk-core.c +++ b/drivers/lightnvm/pblk-core.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2016 CNEX Labs * Initial release: Javier Gonzalez <javier@cnexlabs.com> @@ -16,7 +17,10 @@ * */ +#define CREATE_TRACE_POINTS + #include "pblk.h" +#include "pblk-trace.h" static void pblk_line_mark_bb(struct work_struct *work) { @@ -27,12 +31,12 @@ static void pblk_line_mark_bb(struct work_struct *work) struct ppa_addr *ppa = line_ws->priv; int ret; - ret = nvm_set_tgt_bb_tbl(dev, ppa, 1, NVM_BLK_T_GRWN_BAD); + ret = nvm_set_chunk_meta(dev, ppa, 1, NVM_BLK_T_GRWN_BAD); if (ret) { struct pblk_line *line; int pos; - line = &pblk->lines[pblk_ppa_to_line(*ppa)]; + line = pblk_ppa_to_line(pblk, *ppa); pos = pblk_ppa_to_pos(&dev->geo, *ppa); pblk_err(pblk, "failed to mark bb, line:%d, pos:%d\n", @@ -80,19 +84,28 @@ static void __pblk_end_io_erase(struct pblk *pblk, struct nvm_rq *rqd) struct pblk_line *line; int pos; - line = &pblk->lines[pblk_ppa_to_line(rqd->ppa_addr)]; + line = pblk_ppa_to_line(pblk, rqd->ppa_addr); pos = pblk_ppa_to_pos(geo, rqd->ppa_addr); chunk = &line->chks[pos]; atomic_dec(&line->left_seblks); if (rqd->error) { + trace_pblk_chunk_reset(pblk_disk_name(pblk), + &rqd->ppa_addr, PBLK_CHUNK_RESET_FAILED); + chunk->state = NVM_CHK_ST_OFFLINE; pblk_mark_bb(pblk, line, rqd->ppa_addr); } else { + trace_pblk_chunk_reset(pblk_disk_name(pblk), + &rqd->ppa_addr, PBLK_CHUNK_RESET_DONE); + chunk->state = NVM_CHK_ST_FREE; } + trace_pblk_chunk_state(pblk_disk_name(pblk), &rqd->ppa_addr, + chunk->state); + atomic_dec(&pblk->inflight_io); } @@ -108,9 +121,9 @@ static void pblk_end_io_erase(struct nvm_rq *rqd) /* * Get information for all chunks from the device. * - * The caller is responsible for freeing the returned structure + * The caller is responsible for freeing (vmalloc) the returned structure */ -struct nvm_chk_meta *pblk_chunk_get_info(struct pblk *pblk) +struct nvm_chk_meta *pblk_get_chunk_meta(struct pblk *pblk) { struct nvm_tgt_dev *dev = pblk->dev; struct nvm_geo *geo = &dev->geo; @@ -122,11 +135,11 @@ struct nvm_chk_meta *pblk_chunk_get_info(struct pblk *pblk) ppa.ppa = 0; len = geo->all_chunks * sizeof(*meta); - meta = kzalloc(len, GFP_KERNEL); + meta = vzalloc(len); if (!meta) return ERR_PTR(-ENOMEM); - ret = nvm_get_chunk_meta(dev, meta, ppa, geo->all_chunks); + ret = nvm_get_chunk_meta(dev, ppa, geo->all_chunks, meta); if (ret) { kfree(meta); return ERR_PTR(-EIO); @@ -192,7 +205,6 @@ void pblk_map_invalidate(struct pblk *pblk, struct ppa_addr ppa) { struct pblk_line *line; u64 paddr; - int line_id; #ifdef CONFIG_NVM_PBLK_DEBUG /* Callers must ensure that the ppa points to a device address */ @@ -200,8 +212,7 @@ void pblk_map_invalidate(struct pblk *pblk, struct ppa_addr ppa) BUG_ON(pblk_ppa_empty(ppa)); #endif - line_id = pblk_ppa_to_line(ppa); - line = &pblk->lines[line_id]; + line = pblk_ppa_to_line(pblk, ppa); paddr = pblk_dev_ppa_to_line_addr(pblk, ppa); __pblk_map_invalidate(pblk, line, paddr); @@ -227,6 +238,33 @@ static void pblk_invalidate_range(struct pblk *pblk, sector_t slba, spin_unlock(&pblk->trans_lock); } +int pblk_alloc_rqd_meta(struct pblk *pblk, struct nvm_rq *rqd) +{ + struct nvm_tgt_dev *dev = pblk->dev; + + rqd->meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, + &rqd->dma_meta_list); + if (!rqd->meta_list) + return -ENOMEM; + + if (rqd->nr_ppas == 1) + return 0; + + rqd->ppa_list = rqd->meta_list + pblk_dma_meta_size; + rqd->dma_ppa_list = rqd->dma_meta_list + pblk_dma_meta_size; + + return 0; +} + +void pblk_free_rqd_meta(struct pblk *pblk, struct nvm_rq *rqd) +{ + struct nvm_tgt_dev *dev = pblk->dev; + + if (rqd->meta_list) + nvm_dev_dma_free(dev->parent, rqd->meta_list, + rqd->dma_meta_list); +} + /* Caller must guarantee that the request is a valid type */ struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int type) { @@ -258,7 +296,6 @@ struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int type) /* Typically used on completion path. Cannot guarantee request consistency */ void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int type) { - struct nvm_tgt_dev *dev = pblk->dev; mempool_t *pool; switch (type) { @@ -279,9 +316,7 @@ void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int type) return; } - if (rqd->meta_list) - nvm_dev_dma_free(dev->parent, rqd->meta_list, - rqd->dma_meta_list); + pblk_free_rqd_meta(pblk, rqd); mempool_free(rqd, pool); } @@ -409,6 +444,9 @@ struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line) } } else { line->state = PBLK_LINESTATE_CORRUPT; + trace_pblk_line_state(pblk_disk_name(pblk), line->id, + line->state); + line->gc_group = PBLK_LINEGC_NONE; move_list = &l_mg->corrupt_list; pblk_err(pblk, "corrupted vsc for line %d, vsc:%d (%d/%d/%d)\n", @@ -479,9 +517,30 @@ int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd) return nvm_submit_io(dev, rqd); } +void pblk_check_chunk_state_update(struct pblk *pblk, struct nvm_rq *rqd) +{ + struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd); + + int i; + + for (i = 0; i < rqd->nr_ppas; i++) { + struct ppa_addr *ppa = &ppa_list[i]; + struct nvm_chk_meta *chunk = pblk_dev_ppa_to_chunk(pblk, *ppa); + u64 caddr = pblk_dev_ppa_to_chunk_addr(pblk, *ppa); + + if (caddr == 0) + trace_pblk_chunk_state(pblk_disk_name(pblk), + ppa, NVM_CHK_ST_OPEN); + else if (caddr == chunk->cnlb) + trace_pblk_chunk_state(pblk_disk_name(pblk), + ppa, NVM_CHK_ST_CLOSED); + } +} + int pblk_submit_io_sync(struct pblk *pblk, struct nvm_rq *rqd) { struct nvm_tgt_dev *dev = pblk->dev; + int ret; atomic_inc(&pblk->inflight_io); @@ -490,7 +549,27 @@ int pblk_submit_io_sync(struct pblk *pblk, struct nvm_rq *rqd) return NVM_IO_ERR; #endif - return nvm_submit_io_sync(dev, rqd); + ret = nvm_submit_io_sync(dev, rqd); + + if (trace_pblk_chunk_state_enabled() && !ret && + rqd->opcode == NVM_OP_PWRITE) + pblk_check_chunk_state_update(pblk, rqd); + + return ret; +} + +int pblk_submit_io_sync_sem(struct pblk *pblk, struct nvm_rq *rqd) +{ + struct ppa_addr *ppa_list; + int ret; + + ppa_list = (rqd->nr_ppas > 1) ? rqd->ppa_list : &rqd->ppa_addr; + + pblk_down_chunk(pblk, ppa_list[0]); + ret = pblk_submit_io_sync(pblk, rqd); + pblk_up_chunk(pblk, ppa_list[0]); + + return ret; } static void pblk_bio_map_addr_endio(struct bio *bio) @@ -621,262 +700,227 @@ u64 pblk_lookup_page(struct pblk *pblk, struct pblk_line *line) return paddr; } -/* - * Submit emeta to one LUN in the raid line at the time to avoid a deadlock when - * taking the per LUN semaphore. - */ -static int pblk_line_submit_emeta_io(struct pblk *pblk, struct pblk_line *line, - void *emeta_buf, u64 paddr, int dir) +u64 pblk_line_smeta_start(struct pblk *pblk, struct pblk_line *line) { struct nvm_tgt_dev *dev = pblk->dev; struct nvm_geo *geo = &dev->geo; - struct pblk_line_mgmt *l_mg = &pblk->l_mg; struct pblk_line_meta *lm = &pblk->lm; - void *ppa_list, *meta_list; - struct bio *bio; - struct nvm_rq rqd; - dma_addr_t dma_ppa_list, dma_meta_list; - int min = pblk->min_write_pgs; - int left_ppas = lm->emeta_sec[0]; - int id = line->id; - int rq_ppas, rq_len; - int cmd_op, bio_op; - int i, j; - int ret; + int bit; - if (dir == PBLK_WRITE) { - bio_op = REQ_OP_WRITE; - cmd_op = NVM_OP_PWRITE; - } else if (dir == PBLK_READ) { - bio_op = REQ_OP_READ; - cmd_op = NVM_OP_PREAD; - } else - return -EINVAL; + /* This usually only happens on bad lines */ + bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line); + if (bit >= lm->blk_per_line) + return -1; - meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, - &dma_meta_list); - if (!meta_list) - return -ENOMEM; + return bit * geo->ws_opt; +} - ppa_list = meta_list + pblk_dma_meta_size; - dma_ppa_list = dma_meta_list + pblk_dma_meta_size; +int pblk_line_smeta_read(struct pblk *pblk, struct pblk_line *line) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct pblk_line_meta *lm = &pblk->lm; + struct bio *bio; + struct nvm_rq rqd; + u64 paddr = pblk_line_smeta_start(pblk, line); + int i, ret; -next_rq: memset(&rqd, 0, sizeof(struct nvm_rq)); - rq_ppas = pblk_calc_secs(pblk, left_ppas, 0); - rq_len = rq_ppas * geo->csecs; + ret = pblk_alloc_rqd_meta(pblk, &rqd); + if (ret) + return ret; - bio = pblk_bio_map_addr(pblk, emeta_buf, rq_ppas, rq_len, - l_mg->emeta_alloc_type, GFP_KERNEL); + bio = bio_map_kern(dev->q, line->smeta, lm->smeta_len, GFP_KERNEL); if (IS_ERR(bio)) { ret = PTR_ERR(bio); - goto free_rqd_dma; + goto clear_rqd; } bio->bi_iter.bi_sector = 0; /* internal bio */ - bio_set_op_attrs(bio, bio_op, 0); + bio_set_op_attrs(bio, REQ_OP_READ, 0); rqd.bio = bio; - rqd.meta_list = meta_list; - rqd.ppa_list = ppa_list; - rqd.dma_meta_list = dma_meta_list; - rqd.dma_ppa_list = dma_ppa_list; - rqd.opcode = cmd_op; - rqd.nr_ppas = rq_ppas; - - if (dir == PBLK_WRITE) { - struct pblk_sec_meta *meta_list = rqd.meta_list; - - rqd.flags = pblk_set_progr_mode(pblk, PBLK_WRITE); - for (i = 0; i < rqd.nr_ppas; ) { - spin_lock(&line->lock); - paddr = __pblk_alloc_page(pblk, line, min); - spin_unlock(&line->lock); - for (j = 0; j < min; j++, i++, paddr++) { - meta_list[i].lba = cpu_to_le64(ADDR_EMPTY); - rqd.ppa_list[i] = - addr_to_gen_ppa(pblk, paddr, id); - } - } - } else { - for (i = 0; i < rqd.nr_ppas; ) { - struct ppa_addr ppa = addr_to_gen_ppa(pblk, paddr, id); - int pos = pblk_ppa_to_pos(geo, ppa); - int read_type = PBLK_READ_RANDOM; - - if (pblk_io_aligned(pblk, rq_ppas)) - read_type = PBLK_READ_SEQUENTIAL; - rqd.flags = pblk_set_read_mode(pblk, read_type); - - while (test_bit(pos, line->blk_bitmap)) { - paddr += min; - if (pblk_boundary_paddr_checks(pblk, paddr)) { - pblk_err(pblk, "corrupt emeta line:%d\n", - line->id); - bio_put(bio); - ret = -EINTR; - goto free_rqd_dma; - } - - ppa = addr_to_gen_ppa(pblk, paddr, id); - pos = pblk_ppa_to_pos(geo, ppa); - } - - if (pblk_boundary_paddr_checks(pblk, paddr + min)) { - pblk_err(pblk, "corrupt emeta line:%d\n", - line->id); - bio_put(bio); - ret = -EINTR; - goto free_rqd_dma; - } + rqd.opcode = NVM_OP_PREAD; + rqd.nr_ppas = lm->smeta_sec; + rqd.is_seq = 1; - for (j = 0; j < min; j++, i++, paddr++) - rqd.ppa_list[i] = - addr_to_gen_ppa(pblk, paddr, line->id); - } - } + for (i = 0; i < lm->smeta_sec; i++, paddr++) + rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id); ret = pblk_submit_io_sync(pblk, &rqd); if (ret) { - pblk_err(pblk, "emeta I/O submission failed: %d\n", ret); + pblk_err(pblk, "smeta I/O submission failed: %d\n", ret); bio_put(bio); - goto free_rqd_dma; + goto clear_rqd; } atomic_dec(&pblk->inflight_io); - if (rqd.error) { - if (dir == PBLK_WRITE) - pblk_log_write_err(pblk, &rqd); - else - pblk_log_read_err(pblk, &rqd); - } + if (rqd.error) + pblk_log_read_err(pblk, &rqd); - emeta_buf += rq_len; - left_ppas -= rq_ppas; - if (left_ppas) - goto next_rq; -free_rqd_dma: - nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list); +clear_rqd: + pblk_free_rqd_meta(pblk, &rqd); return ret; } -u64 pblk_line_smeta_start(struct pblk *pblk, struct pblk_line *line) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - struct pblk_line_meta *lm = &pblk->lm; - int bit; - - /* This usually only happens on bad lines */ - bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line); - if (bit >= lm->blk_per_line) - return -1; - - return bit * geo->ws_opt; -} - -static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line, - u64 paddr, int dir) +static int pblk_line_smeta_write(struct pblk *pblk, struct pblk_line *line, + u64 paddr) { struct nvm_tgt_dev *dev = pblk->dev; struct pblk_line_meta *lm = &pblk->lm; struct bio *bio; struct nvm_rq rqd; - __le64 *lba_list = NULL; + __le64 *lba_list = emeta_to_lbas(pblk, line->emeta->buf); + __le64 addr_empty = cpu_to_le64(ADDR_EMPTY); int i, ret; - int cmd_op, bio_op; - int flags; - - if (dir == PBLK_WRITE) { - bio_op = REQ_OP_WRITE; - cmd_op = NVM_OP_PWRITE; - flags = pblk_set_progr_mode(pblk, PBLK_WRITE); - lba_list = emeta_to_lbas(pblk, line->emeta->buf); - } else if (dir == PBLK_READ_RECOV || dir == PBLK_READ) { - bio_op = REQ_OP_READ; - cmd_op = NVM_OP_PREAD; - flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL); - } else - return -EINVAL; memset(&rqd, 0, sizeof(struct nvm_rq)); - rqd.meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, - &rqd.dma_meta_list); - if (!rqd.meta_list) - return -ENOMEM; - - rqd.ppa_list = rqd.meta_list + pblk_dma_meta_size; - rqd.dma_ppa_list = rqd.dma_meta_list + pblk_dma_meta_size; + ret = pblk_alloc_rqd_meta(pblk, &rqd); + if (ret) + return ret; bio = bio_map_kern(dev->q, line->smeta, lm->smeta_len, GFP_KERNEL); if (IS_ERR(bio)) { ret = PTR_ERR(bio); - goto free_ppa_list; + goto clear_rqd; } bio->bi_iter.bi_sector = 0; /* internal bio */ - bio_set_op_attrs(bio, bio_op, 0); + bio_set_op_attrs(bio, REQ_OP_WRITE, 0); rqd.bio = bio; - rqd.opcode = cmd_op; - rqd.flags = flags; + rqd.opcode = NVM_OP_PWRITE; rqd.nr_ppas = lm->smeta_sec; + rqd.is_seq = 1; for (i = 0; i < lm->smeta_sec; i++, paddr++) { struct pblk_sec_meta *meta_list = rqd.meta_list; rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id); - - if (dir == PBLK_WRITE) { - __le64 addr_empty = cpu_to_le64(ADDR_EMPTY); - - meta_list[i].lba = lba_list[paddr] = addr_empty; - } + meta_list[i].lba = lba_list[paddr] = addr_empty; } - /* - * This I/O is sent by the write thread when a line is replace. Since - * the write thread is the only one sending write and erase commands, - * there is no need to take the LUN semaphore. - */ - ret = pblk_submit_io_sync(pblk, &rqd); + ret = pblk_submit_io_sync_sem(pblk, &rqd); if (ret) { pblk_err(pblk, "smeta I/O submission failed: %d\n", ret); bio_put(bio); - goto free_ppa_list; + goto clear_rqd; } atomic_dec(&pblk->inflight_io); if (rqd.error) { - if (dir == PBLK_WRITE) { - pblk_log_write_err(pblk, &rqd); - ret = 1; - } else if (dir == PBLK_READ) - pblk_log_read_err(pblk, &rqd); + pblk_log_write_err(pblk, &rqd); + ret = -EIO; } -free_ppa_list: - nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list); - +clear_rqd: + pblk_free_rqd_meta(pblk, &rqd); return ret; } -int pblk_line_read_smeta(struct pblk *pblk, struct pblk_line *line) +int pblk_line_emeta_read(struct pblk *pblk, struct pblk_line *line, + void *emeta_buf) { - u64 bpaddr = pblk_line_smeta_start(pblk, line); + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct pblk_line_mgmt *l_mg = &pblk->l_mg; + struct pblk_line_meta *lm = &pblk->lm; + void *ppa_list, *meta_list; + struct bio *bio; + struct nvm_rq rqd; + u64 paddr = line->emeta_ssec; + dma_addr_t dma_ppa_list, dma_meta_list; + int min = pblk->min_write_pgs; + int left_ppas = lm->emeta_sec[0]; + int line_id = line->id; + int rq_ppas, rq_len; + int i, j; + int ret; - return pblk_line_submit_smeta_io(pblk, line, bpaddr, PBLK_READ_RECOV); -} + meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, + &dma_meta_list); + if (!meta_list) + return -ENOMEM; -int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line, - void *emeta_buf) -{ - return pblk_line_submit_emeta_io(pblk, line, emeta_buf, - line->emeta_ssec, PBLK_READ); + ppa_list = meta_list + pblk_dma_meta_size; + dma_ppa_list = dma_meta_list + pblk_dma_meta_size; + +next_rq: + memset(&rqd, 0, sizeof(struct nvm_rq)); + + rq_ppas = pblk_calc_secs(pblk, left_ppas, 0); + rq_len = rq_ppas * geo->csecs; + + bio = pblk_bio_map_addr(pblk, emeta_buf, rq_ppas, rq_len, + l_mg->emeta_alloc_type, GFP_KERNEL); + if (IS_ERR(bio)) { + ret = PTR_ERR(bio); + goto free_rqd_dma; + } + + bio->bi_iter.bi_sector = 0; /* internal bio */ + bio_set_op_attrs(bio, REQ_OP_READ, 0); + + rqd.bio = bio; + rqd.meta_list = meta_list; + rqd.ppa_list = ppa_list; + rqd.dma_meta_list = dma_meta_list; + rqd.dma_ppa_list = dma_ppa_list; + rqd.opcode = NVM_OP_PREAD; + rqd.nr_ppas = rq_ppas; + + for (i = 0; i < rqd.nr_ppas; ) { + struct ppa_addr ppa = addr_to_gen_ppa(pblk, paddr, line_id); + int pos = pblk_ppa_to_pos(geo, ppa); + + if (pblk_io_aligned(pblk, rq_ppas)) + rqd.is_seq = 1; + + while (test_bit(pos, line->blk_bitmap)) { + paddr += min; + if (pblk_boundary_paddr_checks(pblk, paddr)) { + bio_put(bio); + ret = -EINTR; + goto free_rqd_dma; + } + + ppa = addr_to_gen_ppa(pblk, paddr, line_id); + pos = pblk_ppa_to_pos(geo, ppa); + } + + if (pblk_boundary_paddr_checks(pblk, paddr + min)) { + bio_put(bio); + ret = -EINTR; + goto free_rqd_dma; + } + + for (j = 0; j < min; j++, i++, paddr++) + rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line_id); + } + + ret = pblk_submit_io_sync(pblk, &rqd); + if (ret) { + pblk_err(pblk, "emeta I/O submission failed: %d\n", ret); + bio_put(bio); + goto free_rqd_dma; + } + + atomic_dec(&pblk->inflight_io); + + if (rqd.error) + pblk_log_read_err(pblk, &rqd); + + emeta_buf += rq_len; + left_ppas -= rq_ppas; + if (left_ppas) + goto next_rq; + +free_rqd_dma: + nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list); + return ret; } static void pblk_setup_e_rq(struct pblk *pblk, struct nvm_rq *rqd, @@ -885,16 +929,17 @@ static void pblk_setup_e_rq(struct pblk *pblk, struct nvm_rq *rqd, rqd->opcode = NVM_OP_ERASE; rqd->ppa_addr = ppa; rqd->nr_ppas = 1; - rqd->flags = pblk_set_progr_mode(pblk, PBLK_ERASE); + rqd->is_seq = 1; rqd->bio = NULL; } static int pblk_blk_erase_sync(struct pblk *pblk, struct ppa_addr ppa) { - struct nvm_rq rqd; - int ret = 0; + struct nvm_rq rqd = {NULL}; + int ret; - memset(&rqd, 0, sizeof(struct nvm_rq)); + trace_pblk_chunk_reset(pblk_disk_name(pblk), &ppa, + PBLK_CHUNK_RESET_START); pblk_setup_e_rq(pblk, &rqd, ppa); @@ -902,19 +947,6 @@ static int pblk_blk_erase_sync(struct pblk *pblk, struct ppa_addr ppa) * with writes. Thus, there is no need to take the LUN semaphore. */ ret = pblk_submit_io_sync(pblk, &rqd); - if (ret) { - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - - pblk_err(pblk, "could not sync erase line:%d,blk:%d\n", - pblk_ppa_to_line(ppa), - pblk_ppa_to_pos(geo, ppa)); - - rqd.error = ret; - goto out; - } - -out: rqd.private = pblk; __pblk_end_io_erase(pblk, &rqd); @@ -1008,6 +1040,8 @@ static int pblk_line_init_metadata(struct pblk *pblk, struct pblk_line *line, spin_lock(&l_mg->free_lock); spin_lock(&line->lock); line->state = PBLK_LINESTATE_BAD; + trace_pblk_line_state(pblk_disk_name(pblk), line->id, + line->state); spin_unlock(&line->lock); list_add_tail(&line->list, &l_mg->bad_list); @@ -1071,15 +1105,18 @@ static int pblk_line_init_metadata(struct pblk *pblk, struct pblk_line *line, static int pblk_line_alloc_bitmaps(struct pblk *pblk, struct pblk_line *line) { struct pblk_line_meta *lm = &pblk->lm; + struct pblk_line_mgmt *l_mg = &pblk->l_mg; - line->map_bitmap = kzalloc(lm->sec_bitmap_len, GFP_KERNEL); + line->map_bitmap = mempool_alloc(l_mg->bitmap_pool, GFP_KERNEL); if (!line->map_bitmap) return -ENOMEM; + memset(line->map_bitmap, 0, lm->sec_bitmap_len); + /* will be initialized using bb info from map_bitmap */ - line->invalid_bitmap = kmalloc(lm->sec_bitmap_len, GFP_KERNEL); + line->invalid_bitmap = mempool_alloc(l_mg->bitmap_pool, GFP_KERNEL); if (!line->invalid_bitmap) { - kfree(line->map_bitmap); + mempool_free(line->map_bitmap, l_mg->bitmap_pool); line->map_bitmap = NULL; return -ENOMEM; } @@ -1122,7 +1159,7 @@ static int pblk_line_init_bb(struct pblk *pblk, struct pblk_line *line, line->smeta_ssec = off; line->cur_sec = off + lm->smeta_sec; - if (init && pblk_line_submit_smeta_io(pblk, line, off, PBLK_WRITE)) { + if (init && pblk_line_smeta_write(pblk, line, off)) { pblk_debug(pblk, "line smeta I/O failed. Retry\n"); return 0; } @@ -1152,6 +1189,8 @@ static int pblk_line_init_bb(struct pblk *pblk, struct pblk_line *line, bitmap_weight(line->invalid_bitmap, lm->sec_per_line)) { spin_lock(&line->lock); line->state = PBLK_LINESTATE_BAD; + trace_pblk_line_state(pblk_disk_name(pblk), line->id, + line->state); spin_unlock(&line->lock); list_add_tail(&line->list, &l_mg->bad_list); @@ -1204,6 +1243,8 @@ static int pblk_line_prepare(struct pblk *pblk, struct pblk_line *line) if (line->state == PBLK_LINESTATE_NEW) { blk_to_erase = pblk_prepare_new_line(pblk, line); line->state = PBLK_LINESTATE_FREE; + trace_pblk_line_state(pblk_disk_name(pblk), line->id, + line->state); } else { blk_to_erase = blk_in_line; } @@ -1221,6 +1262,8 @@ static int pblk_line_prepare(struct pblk *pblk, struct pblk_line *line) } line->state = PBLK_LINESTATE_OPEN; + trace_pblk_line_state(pblk_disk_name(pblk), line->id, + line->state); atomic_set(&line->left_eblks, blk_to_erase); atomic_set(&line->left_seblks, blk_to_erase); @@ -1265,7 +1308,9 @@ int pblk_line_recov_alloc(struct pblk *pblk, struct pblk_line *line) void pblk_line_recov_close(struct pblk *pblk, struct pblk_line *line) { - kfree(line->map_bitmap); + struct pblk_line_mgmt *l_mg = &pblk->l_mg; + + mempool_free(line->map_bitmap, l_mg->bitmap_pool); line->map_bitmap = NULL; line->smeta = NULL; line->emeta = NULL; @@ -1283,8 +1328,11 @@ static void pblk_line_reinit(struct pblk_line *line) void pblk_line_free(struct pblk_line *line) { - kfree(line->map_bitmap); - kfree(line->invalid_bitmap); + struct pblk *pblk = line->pblk; + struct pblk_line_mgmt *l_mg = &pblk->l_mg; + + mempool_free(line->map_bitmap, l_mg->bitmap_pool); + mempool_free(line->invalid_bitmap, l_mg->bitmap_pool); pblk_line_reinit(line); } @@ -1312,6 +1360,8 @@ retry: if (unlikely(bit >= lm->blk_per_line)) { spin_lock(&line->lock); line->state = PBLK_LINESTATE_BAD; + trace_pblk_line_state(pblk_disk_name(pblk), line->id, + line->state); spin_unlock(&line->lock); list_add_tail(&line->list, &l_mg->bad_list); @@ -1446,12 +1496,32 @@ retry_setup: return line; } +void pblk_ppa_to_line_put(struct pblk *pblk, struct ppa_addr ppa) +{ + struct pblk_line *line; + + line = pblk_ppa_to_line(pblk, ppa); + kref_put(&line->ref, pblk_line_put_wq); +} + +void pblk_rq_to_line_put(struct pblk *pblk, struct nvm_rq *rqd) +{ + struct ppa_addr *ppa_list; + int i; + + ppa_list = (rqd->nr_ppas > 1) ? rqd->ppa_list : &rqd->ppa_addr; + + for (i = 0; i < rqd->nr_ppas; i++) + pblk_ppa_to_line_put(pblk, ppa_list[i]); +} + static void pblk_stop_writes(struct pblk *pblk, struct pblk_line *line) { lockdep_assert_held(&pblk->l_mg.free_lock); pblk_set_space_limit(pblk); pblk->state = PBLK_STATE_STOPPING; + trace_pblk_state(pblk_disk_name(pblk), pblk->state); } static void pblk_line_close_meta_sync(struct pblk *pblk) @@ -1501,6 +1571,7 @@ void __pblk_pipeline_flush(struct pblk *pblk) return; } pblk->state = PBLK_STATE_RECOVERING; + trace_pblk_state(pblk_disk_name(pblk), pblk->state); spin_unlock(&l_mg->free_lock); pblk_flush_writer(pblk); @@ -1522,6 +1593,7 @@ void __pblk_pipeline_stop(struct pblk *pblk) spin_lock(&l_mg->free_lock); pblk->state = PBLK_STATE_STOPPED; + trace_pblk_state(pblk_disk_name(pblk), pblk->state); l_mg->data_line = NULL; l_mg->data_next = NULL; spin_unlock(&l_mg->free_lock); @@ -1539,13 +1611,14 @@ struct pblk_line *pblk_line_replace_data(struct pblk *pblk) struct pblk_line *cur, *new = NULL; unsigned int left_seblks; - cur = l_mg->data_line; new = l_mg->data_next; if (!new) goto out; - l_mg->data_line = new; spin_lock(&l_mg->free_lock); + cur = l_mg->data_line; + l_mg->data_line = new; + pblk_line_setup_metadata(new, l_mg, &pblk->lm); spin_unlock(&l_mg->free_lock); @@ -1612,6 +1685,8 @@ static void __pblk_line_put(struct pblk *pblk, struct pblk_line *line) spin_lock(&line->lock); WARN_ON(line->state != PBLK_LINESTATE_GC); line->state = PBLK_LINESTATE_FREE; + trace_pblk_line_state(pblk_disk_name(pblk), line->id, + line->state); line->gc_group = PBLK_LINEGC_NONE; pblk_line_free(line); @@ -1680,6 +1755,9 @@ int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr ppa) rqd->end_io = pblk_end_io_erase; rqd->private = pblk; + trace_pblk_chunk_reset(pblk_disk_name(pblk), + &ppa, PBLK_CHUNK_RESET_START); + /* The write thread schedules erases so that it minimizes disturbances * with writes. Thus, there is no need to take the LUN semaphore. */ @@ -1689,7 +1767,7 @@ int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr ppa) struct nvm_geo *geo = &dev->geo; pblk_err(pblk, "could not async erase line:%d,blk:%d\n", - pblk_ppa_to_line(ppa), + pblk_ppa_to_line_id(ppa), pblk_ppa_to_pos(geo, ppa)); } @@ -1741,10 +1819,9 @@ void pblk_line_close(struct pblk *pblk, struct pblk_line *line) WARN_ON(line->state != PBLK_LINESTATE_OPEN); line->state = PBLK_LINESTATE_CLOSED; move_list = pblk_line_gc_list(pblk, line); - list_add_tail(&line->list, move_list); - kfree(line->map_bitmap); + mempool_free(line->map_bitmap, l_mg->bitmap_pool); line->map_bitmap = NULL; line->smeta = NULL; line->emeta = NULL; @@ -1760,6 +1837,9 @@ void pblk_line_close(struct pblk *pblk, struct pblk_line *line) spin_unlock(&line->lock); spin_unlock(&l_mg->gc_lock); + + trace_pblk_line_state(pblk_disk_name(pblk), line->id, + line->state); } void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line) @@ -1778,6 +1858,17 @@ void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line) wa->pad = cpu_to_le64(atomic64_read(&pblk->pad_wa)); wa->gc = cpu_to_le64(atomic64_read(&pblk->gc_wa)); + if (le32_to_cpu(emeta_buf->header.identifier) != PBLK_MAGIC) { + emeta_buf->header.identifier = cpu_to_le32(PBLK_MAGIC); + memcpy(emeta_buf->header.uuid, pblk->instance_uuid, 16); + emeta_buf->header.id = cpu_to_le32(line->id); + emeta_buf->header.type = cpu_to_le16(line->type); + emeta_buf->header.version_major = EMETA_VERSION_MAJOR; + emeta_buf->header.version_minor = EMETA_VERSION_MINOR; + emeta_buf->header.crc = cpu_to_le32( + pblk_calc_meta_header_crc(pblk, &emeta_buf->header)); + } + emeta_buf->nr_valid_lbas = cpu_to_le64(line->nr_valid_lbas); emeta_buf->crc = cpu_to_le32(pblk_calc_emeta_crc(pblk, emeta_buf)); @@ -1795,8 +1886,6 @@ void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line) spin_unlock(&l_mg->close_lock); pblk_line_should_sync_meta(pblk); - - } static void pblk_save_lba_list(struct pblk *pblk, struct pblk_line *line) @@ -1847,8 +1936,7 @@ void pblk_gen_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv, queue_work(wq, &line_ws->ws); } -static void __pblk_down_page(struct pblk *pblk, struct ppa_addr *ppa_list, - int nr_ppas, int pos) +static void __pblk_down_chunk(struct pblk *pblk, int pos) { struct pblk_lun *rlun = &pblk->luns[pos]; int ret; @@ -1857,13 +1945,6 @@ static void __pblk_down_page(struct pblk *pblk, struct ppa_addr *ppa_list, * Only send one inflight I/O per LUN. Since we map at a page * granurality, all ppas in the I/O will map to the same LUN */ -#ifdef CONFIG_NVM_PBLK_DEBUG - int i; - - for (i = 1; i < nr_ppas; i++) - WARN_ON(ppa_list[0].a.lun != ppa_list[i].a.lun || - ppa_list[0].a.ch != ppa_list[i].a.ch); -#endif ret = down_timeout(&rlun->wr_sem, msecs_to_jiffies(30000)); if (ret == -ETIME || ret == -EINTR) @@ -1871,21 +1952,21 @@ static void __pblk_down_page(struct pblk *pblk, struct ppa_addr *ppa_list, -ret); } -void pblk_down_page(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas) +void pblk_down_chunk(struct pblk *pblk, struct ppa_addr ppa) { struct nvm_tgt_dev *dev = pblk->dev; struct nvm_geo *geo = &dev->geo; - int pos = pblk_ppa_to_pos(geo, ppa_list[0]); + int pos = pblk_ppa_to_pos(geo, ppa); - __pblk_down_page(pblk, ppa_list, nr_ppas, pos); + __pblk_down_chunk(pblk, pos); } -void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas, +void pblk_down_rq(struct pblk *pblk, struct ppa_addr ppa, unsigned long *lun_bitmap) { struct nvm_tgt_dev *dev = pblk->dev; struct nvm_geo *geo = &dev->geo; - int pos = pblk_ppa_to_pos(geo, ppa_list[0]); + int pos = pblk_ppa_to_pos(geo, ppa); /* If the LUN has been locked for this same request, do no attempt to * lock it again @@ -1893,30 +1974,21 @@ void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas, if (test_and_set_bit(pos, lun_bitmap)) return; - __pblk_down_page(pblk, ppa_list, nr_ppas, pos); + __pblk_down_chunk(pblk, pos); } -void pblk_up_page(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas) +void pblk_up_chunk(struct pblk *pblk, struct ppa_addr ppa) { struct nvm_tgt_dev *dev = pblk->dev; struct nvm_geo *geo = &dev->geo; struct pblk_lun *rlun; - int pos = pblk_ppa_to_pos(geo, ppa_list[0]); - -#ifdef CONFIG_NVM_PBLK_DEBUG - int i; - - for (i = 1; i < nr_ppas; i++) - WARN_ON(ppa_list[0].a.lun != ppa_list[i].a.lun || - ppa_list[0].a.ch != ppa_list[i].a.ch); -#endif + int pos = pblk_ppa_to_pos(geo, ppa); rlun = &pblk->luns[pos]; up(&rlun->wr_sem); } -void pblk_up_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas, - unsigned long *lun_bitmap) +void pblk_up_rq(struct pblk *pblk, unsigned long *lun_bitmap) { struct nvm_tgt_dev *dev = pblk->dev; struct nvm_geo *geo = &dev->geo; @@ -2060,8 +2132,7 @@ void pblk_lookup_l2p_seq(struct pblk *pblk, struct ppa_addr *ppas, /* If the L2P entry maps to a line, the reference is valid */ if (!pblk_ppa_empty(ppa) && !pblk_addr_in_cache(ppa)) { - int line_id = pblk_ppa_to_line(ppa); - struct pblk_line *line = &pblk->lines[line_id]; + struct pblk_line *line = pblk_ppa_to_line(pblk, ppa); kref_get(&line->ref); } diff --git a/drivers/lightnvm/pblk-gc.c b/drivers/lightnvm/pblk-gc.c index 157c2567c9e8..2fa118c8eb71 100644 --- a/drivers/lightnvm/pblk-gc.c +++ b/drivers/lightnvm/pblk-gc.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2016 CNEX Labs * Initial release: Javier Gonzalez <javier@cnexlabs.com> @@ -16,8 +17,10 @@ */ #include "pblk.h" +#include "pblk-trace.h" #include <linux/delay.h> + static void pblk_gc_free_gc_rq(struct pblk_gc_rq *gc_rq) { if (gc_rq->data) @@ -64,6 +67,8 @@ static void pblk_put_line_back(struct pblk *pblk, struct pblk_line *line) spin_lock(&line->lock); WARN_ON(line->state != PBLK_LINESTATE_GC); line->state = PBLK_LINESTATE_CLOSED; + trace_pblk_line_state(pblk_disk_name(pblk), line->id, + line->state); move_list = pblk_line_gc_list(pblk, line); spin_unlock(&line->lock); @@ -144,7 +149,7 @@ static __le64 *get_lba_list_from_emeta(struct pblk *pblk, if (!emeta_buf) return NULL; - ret = pblk_line_read_emeta(pblk, line, emeta_buf); + ret = pblk_line_emeta_read(pblk, line, emeta_buf); if (ret) { pblk_err(pblk, "line %d read emeta failed (%d)\n", line->id, ret); @@ -405,6 +410,8 @@ void pblk_gc_free_full_lines(struct pblk *pblk) spin_lock(&line->lock); WARN_ON(line->state != PBLK_LINESTATE_CLOSED); line->state = PBLK_LINESTATE_GC; + trace_pblk_line_state(pblk_disk_name(pblk), line->id, + line->state); spin_unlock(&line->lock); list_del(&line->list); @@ -451,6 +458,8 @@ next_gc_group: spin_lock(&line->lock); WARN_ON(line->state != PBLK_LINESTATE_CLOSED); line->state = PBLK_LINESTATE_GC; + trace_pblk_line_state(pblk_disk_name(pblk), line->id, + line->state); spin_unlock(&line->lock); list_del(&line->list); diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index 537e98f2b24a..13822594647c 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2015 IT University of Copenhagen (rrpc.c) * Copyright (C) 2016 CNEX Labs @@ -19,15 +20,31 @@ */ #include "pblk.h" +#include "pblk-trace.h" static unsigned int write_buffer_size; module_param(write_buffer_size, uint, 0644); MODULE_PARM_DESC(write_buffer_size, "number of entries in a write buffer"); -static struct kmem_cache *pblk_ws_cache, *pblk_rec_cache, *pblk_g_rq_cache, - *pblk_w_rq_cache; -static DECLARE_RWSEM(pblk_lock); +struct pblk_global_caches { + struct kmem_cache *ws; + struct kmem_cache *rec; + struct kmem_cache *g_rq; + struct kmem_cache *w_rq; + + struct kref kref; + + struct mutex mutex; /* Ensures consistency between + * caches and kref + */ +}; + +static struct pblk_global_caches pblk_caches = { + .mutex = __MUTEX_INITIALIZER(pblk_caches.mutex), + .kref = KREF_INIT(0), +}; + struct bio_set pblk_bio_set; static int pblk_rw_io(struct request_queue *q, struct pblk *pblk, @@ -168,36 +185,26 @@ static void pblk_rwb_free(struct pblk *pblk) if (pblk_rb_tear_down_check(&pblk->rwb)) pblk_err(pblk, "write buffer error on tear down\n"); - pblk_rb_data_free(&pblk->rwb); - vfree(pblk_rb_entries_ref(&pblk->rwb)); + pblk_rb_free(&pblk->rwb); } static int pblk_rwb_init(struct pblk *pblk) { struct nvm_tgt_dev *dev = pblk->dev; struct nvm_geo *geo = &dev->geo; - struct pblk_rb_entry *entries; - unsigned long nr_entries, buffer_size; - unsigned int power_size, power_seg_sz; - int pgs_in_buffer; + unsigned long buffer_size; + int pgs_in_buffer, threshold; - pgs_in_buffer = max(geo->mw_cunits, geo->ws_opt) * geo->all_luns; + threshold = geo->mw_cunits * geo->all_luns; + pgs_in_buffer = (max(geo->mw_cunits, geo->ws_opt) + geo->ws_opt) + * geo->all_luns; if (write_buffer_size && (write_buffer_size > pgs_in_buffer)) buffer_size = write_buffer_size; else buffer_size = pgs_in_buffer; - nr_entries = pblk_rb_calculate_size(buffer_size); - - entries = vzalloc(array_size(nr_entries, sizeof(struct pblk_rb_entry))); - if (!entries) - return -ENOMEM; - - power_size = get_count_order(nr_entries); - power_seg_sz = get_count_order(geo->csecs); - - return pblk_rb_init(&pblk->rwb, entries, power_size, power_seg_sz); + return pblk_rb_init(&pblk->rwb, buffer_size, threshold, geo->csecs); } /* Minimum pages needed within a lun */ @@ -306,53 +313,80 @@ static int pblk_set_addrf(struct pblk *pblk) return 0; } -static int pblk_init_global_caches(struct pblk *pblk) +static int pblk_create_global_caches(void) { - down_write(&pblk_lock); - pblk_ws_cache = kmem_cache_create("pblk_blk_ws", + + pblk_caches.ws = kmem_cache_create("pblk_blk_ws", sizeof(struct pblk_line_ws), 0, 0, NULL); - if (!pblk_ws_cache) { - up_write(&pblk_lock); + if (!pblk_caches.ws) return -ENOMEM; - } - pblk_rec_cache = kmem_cache_create("pblk_rec", + pblk_caches.rec = kmem_cache_create("pblk_rec", sizeof(struct pblk_rec_ctx), 0, 0, NULL); - if (!pblk_rec_cache) { - kmem_cache_destroy(pblk_ws_cache); - up_write(&pblk_lock); - return -ENOMEM; - } + if (!pblk_caches.rec) + goto fail_destroy_ws; - pblk_g_rq_cache = kmem_cache_create("pblk_g_rq", pblk_g_rq_size, + pblk_caches.g_rq = kmem_cache_create("pblk_g_rq", pblk_g_rq_size, 0, 0, NULL); - if (!pblk_g_rq_cache) { - kmem_cache_destroy(pblk_ws_cache); - kmem_cache_destroy(pblk_rec_cache); - up_write(&pblk_lock); - return -ENOMEM; - } + if (!pblk_caches.g_rq) + goto fail_destroy_rec; - pblk_w_rq_cache = kmem_cache_create("pblk_w_rq", pblk_w_rq_size, + pblk_caches.w_rq = kmem_cache_create("pblk_w_rq", pblk_w_rq_size, 0, 0, NULL); - if (!pblk_w_rq_cache) { - kmem_cache_destroy(pblk_ws_cache); - kmem_cache_destroy(pblk_rec_cache); - kmem_cache_destroy(pblk_g_rq_cache); - up_write(&pblk_lock); - return -ENOMEM; - } - up_write(&pblk_lock); + if (!pblk_caches.w_rq) + goto fail_destroy_g_rq; return 0; + +fail_destroy_g_rq: + kmem_cache_destroy(pblk_caches.g_rq); +fail_destroy_rec: + kmem_cache_destroy(pblk_caches.rec); +fail_destroy_ws: + kmem_cache_destroy(pblk_caches.ws); + + return -ENOMEM; } -static void pblk_free_global_caches(struct pblk *pblk) +static int pblk_get_global_caches(void) { - kmem_cache_destroy(pblk_ws_cache); - kmem_cache_destroy(pblk_rec_cache); - kmem_cache_destroy(pblk_g_rq_cache); - kmem_cache_destroy(pblk_w_rq_cache); + int ret; + + mutex_lock(&pblk_caches.mutex); + + if (kref_read(&pblk_caches.kref) > 0) { + kref_get(&pblk_caches.kref); + mutex_unlock(&pblk_caches.mutex); + return 0; + } + + ret = pblk_create_global_caches(); + + if (!ret) + kref_get(&pblk_caches.kref); + + mutex_unlock(&pblk_caches.mutex); + + return ret; +} + +static void pblk_destroy_global_caches(struct kref *ref) +{ + struct pblk_global_caches *c; + + c = container_of(ref, struct pblk_global_caches, kref); + + kmem_cache_destroy(c->ws); + kmem_cache_destroy(c->rec); + kmem_cache_destroy(c->g_rq); + kmem_cache_destroy(c->w_rq); +} + +static void pblk_put_global_caches(void) +{ + mutex_lock(&pblk_caches.mutex); + kref_put(&pblk_caches.kref, pblk_destroy_global_caches); + mutex_unlock(&pblk_caches.mutex); } static int pblk_core_init(struct pblk *pblk) @@ -371,23 +405,19 @@ static int pblk_core_init(struct pblk *pblk) atomic64_set(&pblk->nr_flush, 0); pblk->nr_flush_rst = 0; - pblk->min_write_pgs = geo->ws_opt * (geo->csecs / PAGE_SIZE); + pblk->min_write_pgs = geo->ws_opt; max_write_ppas = pblk->min_write_pgs * geo->all_luns; pblk->max_write_pgs = min_t(int, max_write_ppas, NVM_MAX_VLBA); + pblk->max_write_pgs = min_t(int, pblk->max_write_pgs, + queue_max_hw_sectors(dev->q) / (geo->csecs >> SECTOR_SHIFT)); pblk_set_sec_per_write(pblk, pblk->min_write_pgs); - if (pblk->max_write_pgs > PBLK_MAX_REQ_ADDRS) { - pblk_err(pblk, "vector list too big(%u > %u)\n", - pblk->max_write_pgs, PBLK_MAX_REQ_ADDRS); - return -EINVAL; - } - pblk->pad_dist = kcalloc(pblk->min_write_pgs - 1, sizeof(atomic64_t), GFP_KERNEL); if (!pblk->pad_dist) return -ENOMEM; - if (pblk_init_global_caches(pblk)) + if (pblk_get_global_caches()) goto fail_free_pad_dist; /* Internal bios can be at most the sectors signaled by the device. */ @@ -396,27 +426,27 @@ static int pblk_core_init(struct pblk *pblk) goto free_global_caches; ret = mempool_init_slab_pool(&pblk->gen_ws_pool, PBLK_GEN_WS_POOL_SIZE, - pblk_ws_cache); + pblk_caches.ws); if (ret) goto free_page_bio_pool; ret = mempool_init_slab_pool(&pblk->rec_pool, geo->all_luns, - pblk_rec_cache); + pblk_caches.rec); if (ret) goto free_gen_ws_pool; ret = mempool_init_slab_pool(&pblk->r_rq_pool, geo->all_luns, - pblk_g_rq_cache); + pblk_caches.g_rq); if (ret) goto free_rec_pool; ret = mempool_init_slab_pool(&pblk->e_rq_pool, geo->all_luns, - pblk_g_rq_cache); + pblk_caches.g_rq); if (ret) goto free_r_rq_pool; ret = mempool_init_slab_pool(&pblk->w_rq_pool, geo->all_luns, - pblk_w_rq_cache); + pblk_caches.w_rq); if (ret) goto free_e_rq_pool; @@ -462,7 +492,7 @@ free_gen_ws_pool: free_page_bio_pool: mempool_exit(&pblk->page_bio_pool); free_global_caches: - pblk_free_global_caches(pblk); + pblk_put_global_caches(); fail_free_pad_dist: kfree(pblk->pad_dist); return -ENOMEM; @@ -486,7 +516,7 @@ static void pblk_core_free(struct pblk *pblk) mempool_exit(&pblk->e_rq_pool); mempool_exit(&pblk->w_rq_pool); - pblk_free_global_caches(pblk); + pblk_put_global_caches(); kfree(pblk->pad_dist); } @@ -504,6 +534,9 @@ static void pblk_line_mg_free(struct pblk *pblk) pblk_mfree(l_mg->eline_meta[i]->buf, l_mg->emeta_alloc_type); kfree(l_mg->eline_meta[i]); } + + mempool_destroy(l_mg->bitmap_pool); + kmem_cache_destroy(l_mg->bitmap_cache); } static void pblk_line_meta_free(struct pblk_line_mgmt *l_mg, @@ -540,67 +573,6 @@ static void pblk_lines_free(struct pblk *pblk) kfree(pblk->lines); } -static int pblk_bb_get_tbl(struct nvm_tgt_dev *dev, struct pblk_lun *rlun, - u8 *blks, int nr_blks) -{ - struct ppa_addr ppa; - int ret; - - ppa.ppa = 0; - ppa.g.ch = rlun->bppa.g.ch; - ppa.g.lun = rlun->bppa.g.lun; - - ret = nvm_get_tgt_bb_tbl(dev, ppa, blks); - if (ret) - return ret; - - nr_blks = nvm_bb_tbl_fold(dev->parent, blks, nr_blks); - if (nr_blks < 0) - return -EIO; - - return 0; -} - -static void *pblk_bb_get_meta(struct pblk *pblk) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - u8 *meta; - int i, nr_blks, blk_per_lun; - int ret; - - blk_per_lun = geo->num_chk * geo->pln_mode; - nr_blks = blk_per_lun * geo->all_luns; - - meta = kmalloc(nr_blks, GFP_KERNEL); - if (!meta) - return ERR_PTR(-ENOMEM); - - for (i = 0; i < geo->all_luns; i++) { - struct pblk_lun *rlun = &pblk->luns[i]; - u8 *meta_pos = meta + i * blk_per_lun; - - ret = pblk_bb_get_tbl(dev, rlun, meta_pos, blk_per_lun); - if (ret) { - kfree(meta); - return ERR_PTR(-EIO); - } - } - - return meta; -} - -static void *pblk_chunk_get_meta(struct pblk *pblk) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - - if (geo->version == NVM_OCSSD_SPEC_12) - return pblk_bb_get_meta(pblk); - else - return pblk_chunk_get_info(pblk); -} - static int pblk_luns_init(struct pblk *pblk) { struct nvm_tgt_dev *dev = pblk->dev; @@ -699,51 +671,7 @@ static void pblk_set_provision(struct pblk *pblk, long nr_free_blks) atomic_set(&pblk->rl.free_user_blocks, nr_free_blks); } -static int pblk_setup_line_meta_12(struct pblk *pblk, struct pblk_line *line, - void *chunk_meta) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - struct pblk_line_meta *lm = &pblk->lm; - int i, chk_per_lun, nr_bad_chks = 0; - - chk_per_lun = geo->num_chk * geo->pln_mode; - - for (i = 0; i < lm->blk_per_line; i++) { - struct pblk_lun *rlun = &pblk->luns[i]; - struct nvm_chk_meta *chunk; - int pos = pblk_ppa_to_pos(geo, rlun->bppa); - u8 *lun_bb_meta = chunk_meta + pos * chk_per_lun; - - chunk = &line->chks[pos]; - - /* - * In 1.2 spec. chunk state is not persisted by the device. Thus - * some of the values are reset each time pblk is instantiated, - * so we have to assume that the block is closed. - */ - if (lun_bb_meta[line->id] == NVM_BLK_T_FREE) - chunk->state = NVM_CHK_ST_CLOSED; - else - chunk->state = NVM_CHK_ST_OFFLINE; - - chunk->type = NVM_CHK_TP_W_SEQ; - chunk->wi = 0; - chunk->slba = -1; - chunk->cnlb = geo->clba; - chunk->wp = 0; - - if (!(chunk->state & NVM_CHK_ST_OFFLINE)) - continue; - - set_bit(pos, line->blk_bitmap); - nr_bad_chks++; - } - - return nr_bad_chks; -} - -static int pblk_setup_line_meta_20(struct pblk *pblk, struct pblk_line *line, +static int pblk_setup_line_meta_chk(struct pblk *pblk, struct pblk_line *line, struct nvm_chk_meta *meta) { struct nvm_tgt_dev *dev = pblk->dev; @@ -772,6 +700,9 @@ static int pblk_setup_line_meta_20(struct pblk *pblk, struct pblk_line *line, chunk->cnlb = chunk_meta->cnlb; chunk->wp = chunk_meta->wp; + trace_pblk_chunk_state(pblk_disk_name(pblk), &ppa, + chunk->state); + if (chunk->type & NVM_CHK_TP_SZ_SPEC) { WARN_ONCE(1, "pblk: custom-sized chunks unsupported\n"); continue; @@ -790,8 +721,6 @@ static int pblk_setup_line_meta_20(struct pblk *pblk, struct pblk_line *line, static long pblk_setup_line_meta(struct pblk *pblk, struct pblk_line *line, void *chunk_meta, int line_id) { - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; struct pblk_line_mgmt *l_mg = &pblk->l_mg; struct pblk_line_meta *lm = &pblk->lm; long nr_bad_chks, chk_in_line; @@ -804,10 +733,7 @@ static long pblk_setup_line_meta(struct pblk *pblk, struct pblk_line *line, line->vsc = &l_mg->vsc_list[line_id]; spin_lock_init(&line->lock); - if (geo->version == NVM_OCSSD_SPEC_12) - nr_bad_chks = pblk_setup_line_meta_12(pblk, line, chunk_meta); - else - nr_bad_chks = pblk_setup_line_meta_20(pblk, line, chunk_meta); + nr_bad_chks = pblk_setup_line_meta_chk(pblk, line, chunk_meta); chk_in_line = lm->blk_per_line - nr_bad_chks; if (nr_bad_chks < 0 || nr_bad_chks > lm->blk_per_line || @@ -913,6 +839,17 @@ static int pblk_line_mg_init(struct pblk *pblk) goto fail_free_smeta; } + l_mg->bitmap_cache = kmem_cache_create("pblk_lm_bitmap", + lm->sec_bitmap_len, 0, 0, NULL); + if (!l_mg->bitmap_cache) + goto fail_free_smeta; + + /* the bitmap pool is used for both valid and map bitmaps */ + l_mg->bitmap_pool = mempool_create_slab_pool(PBLK_DATA_LINES * 2, + l_mg->bitmap_cache); + if (!l_mg->bitmap_pool) + goto fail_destroy_bitmap_cache; + /* emeta allocates three different buffers for managing metadata with * in-memory and in-media layouts */ @@ -965,6 +902,10 @@ fail_free_emeta: kfree(l_mg->eline_meta[i]->buf); kfree(l_mg->eline_meta[i]); } + + mempool_destroy(l_mg->bitmap_pool); +fail_destroy_bitmap_cache: + kmem_cache_destroy(l_mg->bitmap_cache); fail_free_smeta: for (i = 0; i < PBLK_DATA_LINES; i++) kfree(l_mg->sline_meta[i]); @@ -1058,7 +999,7 @@ static int pblk_lines_init(struct pblk *pblk) if (ret) goto fail_free_meta; - chunk_meta = pblk_chunk_get_meta(pblk); + chunk_meta = pblk_get_chunk_meta(pblk); if (IS_ERR(chunk_meta)) { ret = PTR_ERR(chunk_meta); goto fail_free_luns; @@ -1079,16 +1020,20 @@ static int pblk_lines_init(struct pblk *pblk) goto fail_free_lines; nr_free_chks += pblk_setup_line_meta(pblk, line, chunk_meta, i); + + trace_pblk_line_state(pblk_disk_name(pblk), line->id, + line->state); } if (!nr_free_chks) { pblk_err(pblk, "too many bad blocks prevent for sane instance\n"); - return -EINTR; + ret = -EINTR; + goto fail_free_lines; } pblk_set_provision(pblk, nr_free_chks); - kfree(chunk_meta); + vfree(chunk_meta); return 0; fail_free_lines: @@ -1165,7 +1110,6 @@ static void pblk_exit(void *private, bool graceful) { struct pblk *pblk = private; - down_write(&pblk_lock); pblk_gc_exit(pblk, graceful); pblk_tear_down(pblk, graceful); @@ -1174,7 +1118,6 @@ static void pblk_exit(void *private, bool graceful) #endif pblk_free(pblk); - up_write(&pblk_lock); } static sector_t pblk_capacity(void *private) @@ -1200,6 +1143,7 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk, pblk->dev = dev; pblk->disk = tdisk; pblk->state = PBLK_STATE_RUNNING; + trace_pblk_state(pblk_disk_name(pblk), pblk->state); pblk->gc.gc_enabled = 0; if (!(geo->version == NVM_OCSSD_SPEC_12 || @@ -1210,13 +1154,6 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk, return ERR_PTR(-EINVAL); } - if (geo->version == NVM_OCSSD_SPEC_12 && geo->dom & NVM_RSP_L2P) { - pblk_err(pblk, "host-side L2P table not supported. (%x)\n", - geo->dom); - kfree(pblk); - return ERR_PTR(-EINVAL); - } - spin_lock_init(&pblk->resubmit_lock); spin_lock_init(&pblk->trans_lock); spin_lock_init(&pblk->lock); diff --git a/drivers/lightnvm/pblk-map.c b/drivers/lightnvm/pblk-map.c index 953ca31dda68..6dcbd44e3acb 100644 --- a/drivers/lightnvm/pblk-map.c +++ b/drivers/lightnvm/pblk-map.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2016 CNEX Labs * Initial release: Javier Gonzalez <javier@cnexlabs.com> @@ -79,7 +80,7 @@ static int pblk_map_page_data(struct pblk *pblk, unsigned int sentry, } } - pblk_down_rq(pblk, ppa_list, nr_secs, lun_bitmap); + pblk_down_rq(pblk, ppa_list[0], lun_bitmap); return 0; } @@ -88,13 +89,14 @@ void pblk_map_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned int sentry, unsigned int off) { struct pblk_sec_meta *meta_list = rqd->meta_list; + struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd); unsigned int map_secs; int min = pblk->min_write_pgs; int i; for (i = off; i < rqd->nr_ppas; i += min) { map_secs = (i + min > valid_secs) ? (valid_secs % min) : min; - if (pblk_map_page_data(pblk, sentry + i, &rqd->ppa_list[i], + if (pblk_map_page_data(pblk, sentry + i, &ppa_list[i], lun_bitmap, &meta_list[i], map_secs)) { bio_put(rqd->bio); pblk_free_rqd(pblk, rqd, PBLK_WRITE); @@ -112,6 +114,7 @@ void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd, struct nvm_geo *geo = &dev->geo; struct pblk_line_meta *lm = &pblk->lm; struct pblk_sec_meta *meta_list = rqd->meta_list; + struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd); struct pblk_line *e_line, *d_line; unsigned int map_secs; int min = pblk->min_write_pgs; @@ -119,14 +122,14 @@ void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd, for (i = 0; i < rqd->nr_ppas; i += min) { map_secs = (i + min > valid_secs) ? (valid_secs % min) : min; - if (pblk_map_page_data(pblk, sentry + i, &rqd->ppa_list[i], + if (pblk_map_page_data(pblk, sentry + i, &ppa_list[i], lun_bitmap, &meta_list[i], map_secs)) { bio_put(rqd->bio); pblk_free_rqd(pblk, rqd, PBLK_WRITE); pblk_pipeline_stop(pblk); } - erase_lun = pblk_ppa_to_pos(geo, rqd->ppa_list[i]); + erase_lun = pblk_ppa_to_pos(geo, ppa_list[i]); /* line can change after page map. We might also be writing the * last line. @@ -141,7 +144,7 @@ void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd, set_bit(erase_lun, e_line->erase_bitmap); atomic_dec(&e_line->left_eblks); - *erase_ppa = rqd->ppa_list[i]; + *erase_ppa = ppa_list[i]; erase_ppa->a.blk = e_line->id; spin_unlock(&e_line->lock); diff --git a/drivers/lightnvm/pblk-rb.c b/drivers/lightnvm/pblk-rb.c index f6eec0212dfc..b1f4b51783f4 100644 --- a/drivers/lightnvm/pblk-rb.c +++ b/drivers/lightnvm/pblk-rb.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2016 CNEX Labs * Initial release: Javier Gonzalez <javier@cnexlabs.com> @@ -22,7 +23,7 @@ static DECLARE_RWSEM(pblk_rb_lock); -void pblk_rb_data_free(struct pblk_rb *rb) +static void pblk_rb_data_free(struct pblk_rb *rb) { struct pblk_rb_pages *p, *t; @@ -35,25 +36,51 @@ void pblk_rb_data_free(struct pblk_rb *rb) up_write(&pblk_rb_lock); } +void pblk_rb_free(struct pblk_rb *rb) +{ + pblk_rb_data_free(rb); + vfree(rb->entries); +} + +/* + * pblk_rb_calculate_size -- calculate the size of the write buffer + */ +static unsigned int pblk_rb_calculate_size(unsigned int nr_entries) +{ + /* Alloc a write buffer that can at least fit 128 entries */ + return (1 << max(get_count_order(nr_entries), 7)); +} + /* * Initialize ring buffer. The data and metadata buffers must be previously * allocated and their size must be a power of two * (Documentation/core-api/circular-buffers.rst) */ -int pblk_rb_init(struct pblk_rb *rb, struct pblk_rb_entry *rb_entry_base, - unsigned int power_size, unsigned int power_seg_sz) +int pblk_rb_init(struct pblk_rb *rb, unsigned int size, unsigned int threshold, + unsigned int seg_size) { struct pblk *pblk = container_of(rb, struct pblk, rwb); + struct pblk_rb_entry *entries; unsigned int init_entry = 0; - unsigned int alloc_order = power_size; unsigned int max_order = MAX_ORDER - 1; - unsigned int order, iter; + unsigned int power_size, power_seg_sz; + unsigned int alloc_order, order, iter; + unsigned int nr_entries; + + nr_entries = pblk_rb_calculate_size(size); + entries = vzalloc(array_size(nr_entries, sizeof(struct pblk_rb_entry))); + if (!entries) + return -ENOMEM; + + power_size = get_count_order(size); + power_seg_sz = get_count_order(seg_size); down_write(&pblk_rb_lock); - rb->entries = rb_entry_base; + rb->entries = entries; rb->seg_size = (1 << power_seg_sz); rb->nr_entries = (1 << power_size); rb->mem = rb->subm = rb->sync = rb->l2p_update = 0; + rb->back_thres = threshold; rb->flush_point = EMPTY_ENTRY; spin_lock_init(&rb->w_lock); @@ -61,6 +88,7 @@ int pblk_rb_init(struct pblk_rb *rb, struct pblk_rb_entry *rb_entry_base, INIT_LIST_HEAD(&rb->pages); + alloc_order = power_size; if (alloc_order >= max_order) { order = max_order; iter = (1 << (alloc_order - max_order)); @@ -79,6 +107,7 @@ int pblk_rb_init(struct pblk_rb *rb, struct pblk_rb_entry *rb_entry_base, page_set = kmalloc(sizeof(struct pblk_rb_pages), GFP_KERNEL); if (!page_set) { up_write(&pblk_rb_lock); + vfree(entries); return -ENOMEM; } @@ -88,6 +117,7 @@ int pblk_rb_init(struct pblk_rb *rb, struct pblk_rb_entry *rb_entry_base, kfree(page_set); pblk_rb_data_free(rb); up_write(&pblk_rb_lock); + vfree(entries); return -ENOMEM; } kaddr = page_address(page_set->pages); @@ -124,20 +154,6 @@ int pblk_rb_init(struct pblk_rb *rb, struct pblk_rb_entry *rb_entry_base, return 0; } -/* - * pblk_rb_calculate_size -- calculate the size of the write buffer - */ -unsigned int pblk_rb_calculate_size(unsigned int nr_entries) -{ - /* Alloc a write buffer that can at least fit 128 entries */ - return (1 << max(get_count_order(nr_entries), 7)); -} - -void *pblk_rb_entries_ref(struct pblk_rb *rb) -{ - return rb->entries; -} - static void clean_wctx(struct pblk_w_ctx *w_ctx) { int flags; @@ -168,6 +184,12 @@ static unsigned int pblk_rb_space(struct pblk_rb *rb) return pblk_rb_ring_space(rb, mem, sync, rb->nr_entries); } +unsigned int pblk_rb_ptr_wrap(struct pblk_rb *rb, unsigned int p, + unsigned int nr_entries) +{ + return (p + nr_entries) & (rb->nr_entries - 1); +} + /* * Buffer count is calculated with respect to the submission entry signaling the * entries that are available to send to the media @@ -194,8 +216,7 @@ unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int nr_entries) subm = READ_ONCE(rb->subm); /* Commit read means updating submission pointer */ - smp_store_release(&rb->subm, - (subm + nr_entries) & (rb->nr_entries - 1)); + smp_store_release(&rb->subm, pblk_rb_ptr_wrap(rb, subm, nr_entries)); return subm; } @@ -225,10 +246,10 @@ static int __pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int to_update) pblk_update_map_dev(pblk, w_ctx->lba, w_ctx->ppa, entry->cacheline); - line = &pblk->lines[pblk_ppa_to_line(w_ctx->ppa)]; + line = pblk_ppa_to_line(pblk, w_ctx->ppa); kref_put(&line->ref, pblk_line_put); clean_wctx(w_ctx); - rb->l2p_update = (rb->l2p_update + 1) & (rb->nr_entries - 1); + rb->l2p_update = pblk_rb_ptr_wrap(rb, rb->l2p_update, 1); } pblk_rl_out(&pblk->rl, user_io, gc_io); @@ -385,11 +406,14 @@ static int __pblk_rb_may_write(struct pblk_rb *rb, unsigned int nr_entries, { unsigned int mem; unsigned int sync; + unsigned int threshold; sync = READ_ONCE(rb->sync); mem = READ_ONCE(rb->mem); - if (pblk_rb_ring_space(rb, mem, sync, rb->nr_entries) < nr_entries) + threshold = nr_entries + rb->back_thres; + + if (pblk_rb_ring_space(rb, mem, sync, rb->nr_entries) < threshold) return 0; if (pblk_rb_update_l2p(rb, nr_entries, mem, sync)) @@ -407,7 +431,7 @@ static int pblk_rb_may_write(struct pblk_rb *rb, unsigned int nr_entries, return 0; /* Protect from read count */ - smp_store_release(&rb->mem, (*pos + nr_entries) & (rb->nr_entries - 1)); + smp_store_release(&rb->mem, pblk_rb_ptr_wrap(rb, *pos, nr_entries)); return 1; } @@ -431,7 +455,7 @@ static int pblk_rb_may_write_flush(struct pblk_rb *rb, unsigned int nr_entries, if (!__pblk_rb_may_write(rb, nr_entries, pos)) return 0; - mem = (*pos + nr_entries) & (rb->nr_entries - 1); + mem = pblk_rb_ptr_wrap(rb, *pos, nr_entries); *io_ret = NVM_IO_DONE; if (bio->bi_opf & REQ_PREFLUSH) { @@ -571,7 +595,7 @@ try: /* Release flags on context. Protect from writes */ smp_store_release(&entry->w_ctx.flags, flags); - pos = (pos + 1) & (rb->nr_entries - 1); + pos = pblk_rb_ptr_wrap(rb, pos, 1); } if (pad) { @@ -651,7 +675,7 @@ out: struct pblk_w_ctx *pblk_rb_w_ctx(struct pblk_rb *rb, unsigned int pos) { - unsigned int entry = pos & (rb->nr_entries - 1); + unsigned int entry = pblk_rb_ptr_wrap(rb, pos, 0); return &rb->entries[entry].w_ctx; } @@ -697,7 +721,7 @@ unsigned int pblk_rb_sync_advance(struct pblk_rb *rb, unsigned int nr_entries) } } - sync = (sync + nr_entries) & (rb->nr_entries - 1); + sync = pblk_rb_ptr_wrap(rb, sync, nr_entries); /* Protect from counts */ smp_store_release(&rb->sync, sync); @@ -728,32 +752,6 @@ unsigned int pblk_rb_flush_point_count(struct pblk_rb *rb) return (submitted < to_flush) ? (to_flush - submitted) : 0; } -/* - * Scan from the current position of the sync pointer to find the entry that - * corresponds to the given ppa. This is necessary since write requests can be - * completed out of order. The assumption is that the ppa is close to the sync - * pointer thus the search will not take long. - * - * The caller of this function must guarantee that the sync pointer will no - * reach the entry while it is using the metadata associated with it. With this - * assumption in mind, there is no need to take the sync lock. - */ -struct pblk_rb_entry *pblk_rb_sync_scan_entry(struct pblk_rb *rb, - struct ppa_addr *ppa) -{ - unsigned int sync, subm, count; - unsigned int i; - - sync = READ_ONCE(rb->sync); - subm = READ_ONCE(rb->subm); - count = pblk_rb_ring_count(subm, sync, rb->nr_entries); - - for (i = 0; i < count; i++) - sync = (sync + 1) & (rb->nr_entries - 1); - - return NULL; -} - int pblk_rb_tear_down_check(struct pblk_rb *rb) { struct pblk_rb_entry *entry; diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c index 5a46d7f9302f..9fba614adeeb 100644 --- a/drivers/lightnvm/pblk-read.c +++ b/drivers/lightnvm/pblk-read.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2016 CNEX Labs * Initial release: Javier Gonzalez <javier@cnexlabs.com> @@ -43,7 +44,7 @@ static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned long *read_bitmap) { struct pblk_sec_meta *meta_list = rqd->meta_list; - struct ppa_addr ppas[PBLK_MAX_REQ_ADDRS]; + struct ppa_addr ppas[NVM_MAX_VLBA]; int nr_secs = rqd->nr_ppas; bool advanced_bio = false; int i, j = 0; @@ -93,9 +94,7 @@ next: } if (pblk_io_aligned(pblk, nr_secs)) - rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL); - else - rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM); + rqd->is_seq = 1; #ifdef CONFIG_NVM_PBLK_DEBUG atomic_long_add(nr_secs, &pblk->inflight_reads); @@ -118,10 +117,9 @@ static void pblk_read_check_seq(struct pblk *pblk, struct nvm_rq *rqd, if (lba != blba + i) { #ifdef CONFIG_NVM_PBLK_DEBUG - struct ppa_addr *p; + struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd); - p = (nr_lbas == 1) ? &rqd->ppa_list[i] : &rqd->ppa_addr; - print_ppa(pblk, p, "seq", i); + print_ppa(pblk, &ppa_list[i], "seq", i); #endif pblk_err(pblk, "corrupted read LBA (%llu/%llu)\n", lba, (u64)blba + i); @@ -150,14 +148,12 @@ static void pblk_read_check_rand(struct pblk *pblk, struct nvm_rq *rqd, if (lba != meta_lba) { #ifdef CONFIG_NVM_PBLK_DEBUG - struct ppa_addr *p; - int nr_ppas = rqd->nr_ppas; + struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd); - p = (nr_ppas == 1) ? &rqd->ppa_list[j] : &rqd->ppa_addr; - print_ppa(pblk, p, "seq", j); + print_ppa(pblk, &ppa_list[j], "rnd", j); #endif pblk_err(pblk, "corrupted read LBA (%llu/%llu)\n", - lba, meta_lba); + meta_lba, lba); WARN_ON(1); } @@ -167,22 +163,6 @@ static void pblk_read_check_rand(struct pblk *pblk, struct nvm_rq *rqd, WARN_ONCE(j != rqd->nr_ppas, "pblk: corrupted random request\n"); } -static void pblk_read_put_rqd_kref(struct pblk *pblk, struct nvm_rq *rqd) -{ - struct ppa_addr *ppa_list; - int i; - - ppa_list = (rqd->nr_ppas > 1) ? rqd->ppa_list : &rqd->ppa_addr; - - for (i = 0; i < rqd->nr_ppas; i++) { - struct ppa_addr ppa = ppa_list[i]; - struct pblk_line *line; - - line = &pblk->lines[pblk_ppa_to_line(ppa)]; - kref_put(&line->ref, pblk_line_put_wq); - } -} - static void pblk_end_user_read(struct bio *bio) { #ifdef CONFIG_NVM_PBLK_DEBUG @@ -210,7 +190,7 @@ static void __pblk_end_io_read(struct pblk *pblk, struct nvm_rq *rqd, bio_put(int_bio); if (put_line) - pblk_read_put_rqd_kref(pblk, rqd); + pblk_rq_to_line_put(pblk, rqd); #ifdef CONFIG_NVM_PBLK_DEBUG atomic_long_add(rqd->nr_ppas, &pblk->sync_reads); @@ -270,9 +250,9 @@ static void pblk_end_partial_read(struct nvm_rq *rqd) i = 0; hole = find_first_zero_bit(read_bitmap, nr_secs); do { - int line_id = pblk_ppa_to_line(rqd->ppa_list[i]); - struct pblk_line *line = &pblk->lines[line_id]; + struct pblk_line *line; + line = pblk_ppa_to_line(pblk, rqd->ppa_list[i]); kref_put(&line->ref, pblk_line_put); meta_list[hole].lba = lba_list_media[i]; @@ -344,7 +324,6 @@ static int pblk_setup_partial_read(struct pblk *pblk, struct nvm_rq *rqd, rqd->bio = new_bio; rqd->nr_ppas = nr_holes; - rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM); pr_ctx->ppa_ptr = NULL; pr_ctx->orig_bio = bio; @@ -438,8 +417,6 @@ retry: } else { rqd->ppa_addr = ppa; } - - rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM); } int pblk_submit_read(struct pblk *pblk, struct bio *bio) @@ -454,13 +431,6 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio) DECLARE_BITMAP(read_bitmap, NVM_MAX_VLBA); int ret = NVM_IO_ERR; - /* logic error: lba out-of-bounds. Ignore read request */ - if (blba >= pblk->rl.nr_secs || nr_secs > PBLK_MAX_REQ_ADDRS) { - WARN(1, "pblk: read lba out of bounds (lba:%llu, nr:%d)\n", - (unsigned long long)blba, nr_secs); - return NVM_IO_ERR; - } - generic_start_io_acct(q, REQ_OP_READ, bio_sectors(bio), &pblk->disk->part0); @@ -484,21 +454,13 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio) */ bio_init_idx = pblk_get_bi_idx(bio); - rqd->meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, - &rqd->dma_meta_list); - if (!rqd->meta_list) { - pblk_err(pblk, "not able to allocate ppa list\n"); + if (pblk_alloc_rqd_meta(pblk, rqd)) goto fail_rqd_free; - } - - if (nr_secs > 1) { - rqd->ppa_list = rqd->meta_list + pblk_dma_meta_size; - rqd->dma_ppa_list = rqd->dma_meta_list + pblk_dma_meta_size; + if (nr_secs > 1) pblk_read_ppalist_rq(pblk, rqd, bio, blba, read_bitmap); - } else { + else pblk_read_rq(pblk, rqd, bio, blba, read_bitmap); - } if (bitmap_full(read_bitmap, nr_secs)) { atomic_inc(&pblk->inflight_io); @@ -552,7 +514,7 @@ static int read_ppalist_rq_gc(struct pblk *pblk, struct nvm_rq *rqd, struct pblk_line *line, u64 *lba_list, u64 *paddr_list_gc, unsigned int nr_secs) { - struct ppa_addr ppa_list_l2p[PBLK_MAX_REQ_ADDRS]; + struct ppa_addr ppa_list_l2p[NVM_MAX_VLBA]; struct ppa_addr ppa_gc; int valid_secs = 0; int i; @@ -625,15 +587,11 @@ int pblk_submit_read_gc(struct pblk *pblk, struct pblk_gc_rq *gc_rq) memset(&rqd, 0, sizeof(struct nvm_rq)); - rqd.meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, - &rqd.dma_meta_list); - if (!rqd.meta_list) - return -ENOMEM; + ret = pblk_alloc_rqd_meta(pblk, &rqd); + if (ret) + return ret; if (gc_rq->nr_secs > 1) { - rqd.ppa_list = rqd.meta_list + pblk_dma_meta_size; - rqd.dma_ppa_list = rqd.dma_meta_list + pblk_dma_meta_size; - gc_rq->secs_to_gc = read_ppalist_rq_gc(pblk, &rqd, gc_rq->line, gc_rq->lba_list, gc_rq->paddr_list, @@ -654,7 +612,8 @@ int pblk_submit_read_gc(struct pblk *pblk, struct pblk_gc_rq *gc_rq) PBLK_VMALLOC_META, GFP_KERNEL); if (IS_ERR(bio)) { pblk_err(pblk, "could not allocate GC bio (%lu)\n", - PTR_ERR(bio)); + PTR_ERR(bio)); + ret = PTR_ERR(bio); goto err_free_dma; } @@ -663,7 +622,6 @@ int pblk_submit_read_gc(struct pblk *pblk, struct pblk_gc_rq *gc_rq) rqd.opcode = NVM_OP_PREAD; rqd.nr_ppas = gc_rq->secs_to_gc; - rqd.flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM); rqd.bio = bio; if (pblk_submit_io_sync(pblk, &rqd)) { @@ -690,12 +648,12 @@ int pblk_submit_read_gc(struct pblk *pblk, struct pblk_gc_rq *gc_rq) #endif out: - nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list); + pblk_free_rqd_meta(pblk, &rqd); return ret; err_free_bio: bio_put(bio); err_free_dma: - nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list); + pblk_free_rqd_meta(pblk, &rqd); return ret; } diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c index e232e47e1353..5740b7509bd8 100644 --- a/drivers/lightnvm/pblk-recovery.c +++ b/drivers/lightnvm/pblk-recovery.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2016 CNEX Labs * Initial: Javier Gonzalez <javier@cnexlabs.com> @@ -15,6 +16,7 @@ */ #include "pblk.h" +#include "pblk-trace.h" int pblk_recov_check_emeta(struct pblk *pblk, struct line_emeta *emeta_buf) { @@ -85,15 +87,39 @@ static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line) return 0; } -static int pblk_calc_sec_in_line(struct pblk *pblk, struct pblk_line *line) +static void pblk_update_line_wp(struct pblk *pblk, struct pblk_line *line, + u64 written_secs) +{ + int i; + + for (i = 0; i < written_secs; i += pblk->min_write_pgs) + pblk_alloc_page(pblk, line, pblk->min_write_pgs); +} + +static u64 pblk_sec_in_open_line(struct pblk *pblk, struct pblk_line *line) { - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; struct pblk_line_meta *lm = &pblk->lm; int nr_bb = bitmap_weight(line->blk_bitmap, lm->blk_per_line); + u64 written_secs = 0; + int valid_chunks = 0; + int i; + + for (i = 0; i < lm->blk_per_line; i++) { + struct nvm_chk_meta *chunk = &line->chks[i]; + + if (chunk->state & NVM_CHK_ST_OFFLINE) + continue; + + written_secs += chunk->wp; + valid_chunks++; + } + + if (lm->blk_per_line - nr_bb != valid_chunks) + pblk_err(pblk, "recovery line %d is bad\n", line->id); - return lm->sec_per_line - lm->smeta_sec - lm->emeta_sec[0] - - nr_bb * geo->clba; + pblk_update_line_wp(pblk, line, written_secs - lm->smeta_sec); + + return written_secs; } struct pblk_recov_alloc { @@ -105,115 +131,6 @@ struct pblk_recov_alloc { dma_addr_t dma_meta_list; }; -static int pblk_recov_read_oob(struct pblk *pblk, struct pblk_line *line, - struct pblk_recov_alloc p, u64 r_ptr) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - struct ppa_addr *ppa_list; - struct pblk_sec_meta *meta_list; - struct nvm_rq *rqd; - struct bio *bio; - void *data; - dma_addr_t dma_ppa_list, dma_meta_list; - u64 r_ptr_int; - int left_ppas; - int rq_ppas, rq_len; - int i, j; - int ret = 0; - - ppa_list = p.ppa_list; - meta_list = p.meta_list; - rqd = p.rqd; - data = p.data; - dma_ppa_list = p.dma_ppa_list; - dma_meta_list = p.dma_meta_list; - - left_ppas = line->cur_sec - r_ptr; - if (!left_ppas) - return 0; - - r_ptr_int = r_ptr; - -next_read_rq: - memset(rqd, 0, pblk_g_rq_size); - - rq_ppas = pblk_calc_secs(pblk, left_ppas, 0); - if (!rq_ppas) - rq_ppas = pblk->min_write_pgs; - rq_len = rq_ppas * geo->csecs; - - bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL); - if (IS_ERR(bio)) - return PTR_ERR(bio); - - bio->bi_iter.bi_sector = 0; /* internal bio */ - bio_set_op_attrs(bio, REQ_OP_READ, 0); - - rqd->bio = bio; - rqd->opcode = NVM_OP_PREAD; - rqd->meta_list = meta_list; - rqd->nr_ppas = rq_ppas; - rqd->ppa_list = ppa_list; - rqd->dma_ppa_list = dma_ppa_list; - rqd->dma_meta_list = dma_meta_list; - - if (pblk_io_aligned(pblk, rq_ppas)) - rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL); - else - rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM); - - for (i = 0; i < rqd->nr_ppas; ) { - struct ppa_addr ppa; - int pos; - - ppa = addr_to_gen_ppa(pblk, r_ptr_int, line->id); - pos = pblk_ppa_to_pos(geo, ppa); - - while (test_bit(pos, line->blk_bitmap)) { - r_ptr_int += pblk->min_write_pgs; - ppa = addr_to_gen_ppa(pblk, r_ptr_int, line->id); - pos = pblk_ppa_to_pos(geo, ppa); - } - - for (j = 0; j < pblk->min_write_pgs; j++, i++, r_ptr_int++) - rqd->ppa_list[i] = - addr_to_gen_ppa(pblk, r_ptr_int, line->id); - } - - /* If read fails, more padding is needed */ - ret = pblk_submit_io_sync(pblk, rqd); - if (ret) { - pblk_err(pblk, "I/O submission failed: %d\n", ret); - return ret; - } - - atomic_dec(&pblk->inflight_io); - - /* At this point, the read should not fail. If it does, it is a problem - * we cannot recover from here. Need FTL log. - */ - if (rqd->error && rqd->error != NVM_RSP_WARN_HIGHECC) { - pblk_err(pblk, "L2P recovery failed (%d)\n", rqd->error); - return -EINTR; - } - - for (i = 0; i < rqd->nr_ppas; i++) { - u64 lba = le64_to_cpu(meta_list[i].lba); - - if (lba == ADDR_EMPTY || lba > pblk->rl.nr_secs) - continue; - - pblk_update_map(pblk, lba, rqd->ppa_list[i]); - } - - left_ppas -= rq_ppas; - if (left_ppas > 0) - goto next_read_rq; - - return 0; -} - static void pblk_recov_complete(struct kref *ref) { struct pblk_pad_rq *pad_rq = container_of(ref, struct pblk_pad_rq, ref); @@ -223,10 +140,11 @@ static void pblk_recov_complete(struct kref *ref) static void pblk_end_io_recov(struct nvm_rq *rqd) { + struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd); struct pblk_pad_rq *pad_rq = rqd->private; struct pblk *pblk = pad_rq->pblk; - pblk_up_page(pblk, rqd->ppa_list, rqd->nr_ppas); + pblk_up_chunk(pblk, ppa_list[0]); pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT); @@ -234,18 +152,17 @@ static void pblk_end_io_recov(struct nvm_rq *rqd) kref_put(&pad_rq->ref, pblk_recov_complete); } -static int pblk_recov_pad_oob(struct pblk *pblk, struct pblk_line *line, - int left_ppas) +/* pad line using line bitmap. */ +static int pblk_recov_pad_line(struct pblk *pblk, struct pblk_line *line, + int left_ppas) { struct nvm_tgt_dev *dev = pblk->dev; struct nvm_geo *geo = &dev->geo; - struct ppa_addr *ppa_list; struct pblk_sec_meta *meta_list; struct pblk_pad_rq *pad_rq; struct nvm_rq *rqd; struct bio *bio; void *data; - dma_addr_t dma_ppa_list, dma_meta_list; __le64 *lba_list = emeta_to_lbas(pblk, line->emeta->buf); u64 w_ptr = line->cur_sec; int left_line_ppas, rq_ppas, rq_len; @@ -279,20 +196,11 @@ next_pad_rq: rq_len = rq_ppas * geo->csecs; - meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list); - if (!meta_list) { - ret = -ENOMEM; - goto fail_free_pad; - } - - ppa_list = (void *)(meta_list) + pblk_dma_meta_size; - dma_ppa_list = dma_meta_list + pblk_dma_meta_size; - bio = pblk_bio_map_addr(pblk, data, rq_ppas, rq_len, PBLK_VMALLOC_META, GFP_KERNEL); if (IS_ERR(bio)) { ret = PTR_ERR(bio); - goto fail_free_meta; + goto fail_free_pad; } bio->bi_iter.bi_sector = 0; /* internal bio */ @@ -300,17 +208,19 @@ next_pad_rq: rqd = pblk_alloc_rqd(pblk, PBLK_WRITE_INT); + ret = pblk_alloc_rqd_meta(pblk, rqd); + if (ret) + goto fail_free_rqd; + rqd->bio = bio; rqd->opcode = NVM_OP_PWRITE; - rqd->flags = pblk_set_progr_mode(pblk, PBLK_WRITE); - rqd->meta_list = meta_list; + rqd->is_seq = 1; rqd->nr_ppas = rq_ppas; - rqd->ppa_list = ppa_list; - rqd->dma_ppa_list = dma_ppa_list; - rqd->dma_meta_list = dma_meta_list; rqd->end_io = pblk_end_io_recov; rqd->private = pad_rq; + meta_list = rqd->meta_list; + for (i = 0; i < rqd->nr_ppas; ) { struct ppa_addr ppa; int pos; @@ -338,13 +248,13 @@ next_pad_rq: } kref_get(&pad_rq->ref); - pblk_down_page(pblk, rqd->ppa_list, rqd->nr_ppas); + pblk_down_chunk(pblk, rqd->ppa_list[0]); ret = pblk_submit_io(pblk, rqd); if (ret) { pblk_err(pblk, "I/O submission failed: %d\n", ret); - pblk_up_page(pblk, rqd->ppa_list, rqd->nr_ppas); - goto fail_free_bio; + pblk_up_chunk(pblk, rqd->ppa_list[0]); + goto fail_free_rqd; } left_line_ppas -= rq_ppas; @@ -368,157 +278,60 @@ free_rq: kfree(pad_rq); return ret; -fail_free_bio: +fail_free_rqd: + pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT); bio_put(bio); -fail_free_meta: - nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list); fail_free_pad: kfree(pad_rq); vfree(data); return ret; } -/* When this function is called, it means that not all upper pages have been - * written in a page that contains valid data. In order to recover this data, we - * first find the write pointer on the device, then we pad all necessary - * sectors, and finally attempt to read the valid data - */ -static int pblk_recov_scan_all_oob(struct pblk *pblk, struct pblk_line *line, - struct pblk_recov_alloc p) +static int pblk_pad_distance(struct pblk *pblk, struct pblk_line *line) { struct nvm_tgt_dev *dev = pblk->dev; struct nvm_geo *geo = &dev->geo; - struct ppa_addr *ppa_list; - struct pblk_sec_meta *meta_list; - struct nvm_rq *rqd; - struct bio *bio; - void *data; - dma_addr_t dma_ppa_list, dma_meta_list; - u64 w_ptr = 0, r_ptr; - int rq_ppas, rq_len; - int i, j; - int ret = 0; - int rec_round; - int left_ppas = pblk_calc_sec_in_line(pblk, line) - line->cur_sec; - - ppa_list = p.ppa_list; - meta_list = p.meta_list; - rqd = p.rqd; - data = p.data; - dma_ppa_list = p.dma_ppa_list; - dma_meta_list = p.dma_meta_list; - - /* we could recover up until the line write pointer */ - r_ptr = line->cur_sec; - rec_round = 0; - -next_rq: - memset(rqd, 0, pblk_g_rq_size); + int distance = geo->mw_cunits * geo->all_luns * geo->ws_opt; - rq_ppas = pblk_calc_secs(pblk, left_ppas, 0); - if (!rq_ppas) - rq_ppas = pblk->min_write_pgs; - rq_len = rq_ppas * geo->csecs; - - bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL); - if (IS_ERR(bio)) - return PTR_ERR(bio); - - bio->bi_iter.bi_sector = 0; /* internal bio */ - bio_set_op_attrs(bio, REQ_OP_READ, 0); + return (distance > line->left_msecs) ? line->left_msecs : distance; +} - rqd->bio = bio; - rqd->opcode = NVM_OP_PREAD; - rqd->meta_list = meta_list; - rqd->nr_ppas = rq_ppas; - rqd->ppa_list = ppa_list; - rqd->dma_ppa_list = dma_ppa_list; - rqd->dma_meta_list = dma_meta_list; +static int pblk_line_wp_is_unbalanced(struct pblk *pblk, + struct pblk_line *line) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct pblk_line_meta *lm = &pblk->lm; + struct pblk_lun *rlun; + struct nvm_chk_meta *chunk; + struct ppa_addr ppa; + u64 line_wp; + int pos, i; - if (pblk_io_aligned(pblk, rq_ppas)) - rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL); - else - rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM); + rlun = &pblk->luns[0]; + ppa = rlun->bppa; + pos = pblk_ppa_to_pos(geo, ppa); + chunk = &line->chks[pos]; - for (i = 0; i < rqd->nr_ppas; ) { - struct ppa_addr ppa; - int pos; + line_wp = chunk->wp; - w_ptr = pblk_alloc_page(pblk, line, pblk->min_write_pgs); - ppa = addr_to_gen_ppa(pblk, w_ptr, line->id); + for (i = 1; i < lm->blk_per_line; i++) { + rlun = &pblk->luns[i]; + ppa = rlun->bppa; pos = pblk_ppa_to_pos(geo, ppa); + chunk = &line->chks[pos]; - while (test_bit(pos, line->blk_bitmap)) { - w_ptr += pblk->min_write_pgs; - ppa = addr_to_gen_ppa(pblk, w_ptr, line->id); - pos = pblk_ppa_to_pos(geo, ppa); - } - - for (j = 0; j < pblk->min_write_pgs; j++, i++, w_ptr++) - rqd->ppa_list[i] = - addr_to_gen_ppa(pblk, w_ptr, line->id); - } - - ret = pblk_submit_io_sync(pblk, rqd); - if (ret) { - pblk_err(pblk, "I/O submission failed: %d\n", ret); - return ret; - } - - atomic_dec(&pblk->inflight_io); - - /* This should not happen since the read failed during normal recovery, - * but the media works funny sometimes... - */ - if (!rec_round++ && !rqd->error) { - rec_round = 0; - for (i = 0; i < rqd->nr_ppas; i++, r_ptr++) { - u64 lba = le64_to_cpu(meta_list[i].lba); - - if (lba == ADDR_EMPTY || lba > pblk->rl.nr_secs) - continue; - - pblk_update_map(pblk, lba, rqd->ppa_list[i]); - } - } - - /* Reached the end of the written line */ - if (rqd->error == NVM_RSP_ERR_EMPTYPAGE) { - int pad_secs, nr_error_bits, bit; - int ret; - - bit = find_first_bit((void *)&rqd->ppa_status, rqd->nr_ppas); - nr_error_bits = rqd->nr_ppas - bit; - - /* Roll back failed sectors */ - line->cur_sec -= nr_error_bits; - line->left_msecs += nr_error_bits; - bitmap_clear(line->map_bitmap, line->cur_sec, nr_error_bits); - - pad_secs = pblk_pad_distance(pblk); - if (pad_secs > line->left_msecs) - pad_secs = line->left_msecs; - - ret = pblk_recov_pad_oob(pblk, line, pad_secs); - if (ret) - pblk_err(pblk, "OOB padding failed (err:%d)\n", ret); - - ret = pblk_recov_read_oob(pblk, line, p, r_ptr); - if (ret) - pblk_err(pblk, "OOB read failed (err:%d)\n", ret); - - left_ppas = 0; + if (chunk->wp > line_wp) + return 1; + else if (chunk->wp < line_wp) + line_wp = chunk->wp; } - left_ppas -= rq_ppas; - if (left_ppas > 0) - goto next_rq; - - return ret; + return 0; } static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line, - struct pblk_recov_alloc p, int *done) + struct pblk_recov_alloc p) { struct nvm_tgt_dev *dev = pblk->dev; struct nvm_geo *geo = &dev->geo; @@ -528,11 +341,16 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line, struct bio *bio; void *data; dma_addr_t dma_ppa_list, dma_meta_list; - u64 paddr; + __le64 *lba_list; + u64 paddr = 0; + bool padded = false; int rq_ppas, rq_len; int i, j; - int ret = 0; - int left_ppas = pblk_calc_sec_in_line(pblk, line); + int ret; + u64 left_ppas = pblk_sec_in_open_line(pblk, line); + + if (pblk_line_wp_is_unbalanced(pblk, line)) + pblk_warn(pblk, "recovering unbalanced line (%d)\n", line->id); ppa_list = p.ppa_list; meta_list = p.meta_list; @@ -541,7 +359,7 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line, dma_ppa_list = p.dma_ppa_list; dma_meta_list = p.dma_meta_list; - *done = 1; + lba_list = emeta_to_lbas(pblk, line->emeta->buf); next_rq: memset(rqd, 0, pblk_g_rq_size); @@ -567,15 +385,13 @@ next_rq: rqd->dma_meta_list = dma_meta_list; if (pblk_io_aligned(pblk, rq_ppas)) - rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL); - else - rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM); + rqd->is_seq = 1; +retry_rq: for (i = 0; i < rqd->nr_ppas; ) { struct ppa_addr ppa; int pos; - paddr = pblk_alloc_page(pblk, line, pblk->min_write_pgs); ppa = addr_to_gen_ppa(pblk, paddr, line->id); pos = pblk_ppa_to_pos(geo, ppa); @@ -585,9 +401,9 @@ next_rq: pos = pblk_ppa_to_pos(geo, ppa); } - for (j = 0; j < pblk->min_write_pgs; j++, i++, paddr++) + for (j = 0; j < pblk->min_write_pgs; j++, i++) rqd->ppa_list[i] = - addr_to_gen_ppa(pblk, paddr, line->id); + addr_to_gen_ppa(pblk, paddr + j, line->id); } ret = pblk_submit_io_sync(pblk, rqd); @@ -599,31 +415,33 @@ next_rq: atomic_dec(&pblk->inflight_io); - /* Reached the end of the written line */ + /* If a read fails, do a best effort by padding the line and retrying */ if (rqd->error) { - int nr_error_bits, bit; + int pad_distance, ret; - bit = find_first_bit((void *)&rqd->ppa_status, rqd->nr_ppas); - nr_error_bits = rqd->nr_ppas - bit; - - /* Roll back failed sectors */ - line->cur_sec -= nr_error_bits; - line->left_msecs += nr_error_bits; - bitmap_clear(line->map_bitmap, line->cur_sec, nr_error_bits); + if (padded) { + pblk_log_read_err(pblk, rqd); + return -EINTR; + } - left_ppas = 0; - rqd->nr_ppas = bit; + pad_distance = pblk_pad_distance(pblk, line); + ret = pblk_recov_pad_line(pblk, line, pad_distance); + if (ret) + return ret; - if (rqd->error != NVM_RSP_ERR_EMPTYPAGE) - *done = 0; + padded = true; + goto retry_rq; } for (i = 0; i < rqd->nr_ppas; i++) { u64 lba = le64_to_cpu(meta_list[i].lba); + lba_list[paddr++] = cpu_to_le64(lba); + if (lba == ADDR_EMPTY || lba > pblk->rl.nr_secs) continue; + line->nr_valid_lbas++; pblk_update_map(pblk, lba, rqd->ppa_list[i]); } @@ -631,7 +449,11 @@ next_rq: if (left_ppas > 0) goto next_rq; - return ret; +#ifdef CONFIG_NVM_PBLK_DEBUG + WARN_ON(padded && !pblk_line_is_full(line)); +#endif + + return 0; } /* Scan line for lbas on out of bound area */ @@ -645,7 +467,7 @@ static int pblk_recov_l2p_from_oob(struct pblk *pblk, struct pblk_line *line) struct pblk_recov_alloc p; void *data; dma_addr_t dma_ppa_list, dma_meta_list; - int done, ret = 0; + int ret = 0; meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list); if (!meta_list) @@ -660,7 +482,8 @@ static int pblk_recov_l2p_from_oob(struct pblk *pblk, struct pblk_line *line) goto free_meta_list; } - rqd = pblk_alloc_rqd(pblk, PBLK_READ); + rqd = mempool_alloc(&pblk->r_rq_pool, GFP_KERNEL); + memset(rqd, 0, pblk_g_rq_size); p.ppa_list = ppa_list; p.meta_list = meta_list; @@ -669,24 +492,17 @@ static int pblk_recov_l2p_from_oob(struct pblk *pblk, struct pblk_line *line) p.dma_ppa_list = dma_ppa_list; p.dma_meta_list = dma_meta_list; - ret = pblk_recov_scan_oob(pblk, line, p, &done); + ret = pblk_recov_scan_oob(pblk, line, p); if (ret) { - pblk_err(pblk, "could not recover L2P from OOB\n"); + pblk_err(pblk, "could not recover L2P form OOB\n"); goto out; } - if (!done) { - ret = pblk_recov_scan_all_oob(pblk, line, p); - if (ret) { - pblk_err(pblk, "could not recover L2P from OOB\n"); - goto out; - } - } - if (pblk_line_is_full(line)) pblk_line_recov_close(pblk, line); out: + mempool_free(rqd, &pblk->r_rq_pool); kfree(data); free_meta_list: nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list); @@ -775,7 +591,7 @@ static void pblk_recov_wa_counters(struct pblk *pblk, } static int pblk_line_was_written(struct pblk_line *line, - struct pblk *pblk) + struct pblk *pblk) { struct pblk_line_meta *lm = &pblk->lm; @@ -801,6 +617,18 @@ static int pblk_line_was_written(struct pblk_line *line, return 1; } +static bool pblk_line_is_open(struct pblk *pblk, struct pblk_line *line) +{ + struct pblk_line_meta *lm = &pblk->lm; + int i; + + for (i = 0; i < lm->blk_per_line; i++) + if (line->chks[i].state & NVM_CHK_ST_OPEN) + return true; + + return false; +} + struct pblk_line *pblk_recov_l2p(struct pblk *pblk) { struct pblk_line_meta *lm = &pblk->lm; @@ -841,7 +669,7 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk) continue; /* Lines that cannot be read are assumed as not written here */ - if (pblk_line_read_smeta(pblk, line)) + if (pblk_line_smeta_read(pblk, line)) continue; crc = pblk_calc_smeta_crc(pblk, smeta_buf); @@ -911,7 +739,12 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk) line->emeta = emeta; memset(line->emeta->buf, 0, lm->emeta_len[0]); - if (pblk_line_read_emeta(pblk, line, line->emeta->buf)) { + if (pblk_line_is_open(pblk, line)) { + pblk_recov_l2p_from_oob(pblk, line); + goto next; + } + + if (pblk_line_emeta_read(pblk, line, line->emeta->buf)) { pblk_recov_l2p_from_oob(pblk, line); goto next; } @@ -935,6 +768,8 @@ next: spin_lock(&line->lock); line->state = PBLK_LINESTATE_CLOSED; + trace_pblk_line_state(pblk_disk_name(pblk), line->id, + line->state); move_list = pblk_line_gc_list(pblk, line); spin_unlock(&line->lock); @@ -942,26 +777,36 @@ next: list_move_tail(&line->list, move_list); spin_unlock(&l_mg->gc_lock); - kfree(line->map_bitmap); + mempool_free(line->map_bitmap, l_mg->bitmap_pool); line->map_bitmap = NULL; line->smeta = NULL; line->emeta = NULL; } else { - if (open_lines > 1) - pblk_err(pblk, "failed to recover L2P\n"); + spin_lock(&line->lock); + line->state = PBLK_LINESTATE_OPEN; + spin_unlock(&line->lock); + + line->emeta->mem = 0; + atomic_set(&line->emeta->sync, 0); + + trace_pblk_line_state(pblk_disk_name(pblk), line->id, + line->state); - open_lines++; - line->meta_line = meta_line; data_line = line; + line->meta_line = meta_line; + + open_lines++; } } - spin_lock(&l_mg->free_lock); if (!open_lines) { + spin_lock(&l_mg->free_lock); WARN_ON_ONCE(!test_and_clear_bit(meta_line, &l_mg->meta_bitmap)); + spin_unlock(&l_mg->free_lock); pblk_line_replace_data(pblk); } else { + spin_lock(&l_mg->free_lock); /* Allocate next line for preparation */ l_mg->data_next = pblk_line_get(pblk); if (l_mg->data_next) { @@ -969,8 +814,8 @@ next: l_mg->data_next->type = PBLK_LINETYPE_DATA; is_next = 1; } + spin_unlock(&l_mg->free_lock); } - spin_unlock(&l_mg->free_lock); if (is_next) pblk_line_erase(pblk, l_mg->data_next); @@ -998,7 +843,7 @@ int pblk_recov_pad(struct pblk *pblk) left_msecs = line->left_msecs; spin_unlock(&l_mg->free_lock); - ret = pblk_recov_pad_oob(pblk, line, left_msecs); + ret = pblk_recov_pad_line(pblk, line, left_msecs); if (ret) { pblk_err(pblk, "tear down padding failed (%d)\n", ret); return ret; diff --git a/drivers/lightnvm/pblk-rl.c b/drivers/lightnvm/pblk-rl.c index 6a0616a6fcaf..db55a1c89997 100644 --- a/drivers/lightnvm/pblk-rl.c +++ b/drivers/lightnvm/pblk-rl.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2016 CNEX Labs * Initial release: Javier Gonzalez <javier@cnexlabs.com> @@ -127,7 +128,7 @@ static void __pblk_rl_update_rates(struct pblk_rl *rl, } else if (free_blocks < rl->high) { int shift = rl->high_pw - rl->rb_windows_pw; int user_windows = free_blocks >> shift; - int user_max = user_windows << PBLK_MAX_REQ_ADDRS_PW; + int user_max = user_windows << ilog2(NVM_MAX_VLBA); rl->rb_user_max = user_max; rl->rb_gc_max = max - user_max; @@ -228,7 +229,7 @@ void pblk_rl_init(struct pblk_rl *rl, int budget) rl->rsv_blocks = min_blocks; /* This will always be a power-of-2 */ - rb_windows = budget / PBLK_MAX_REQ_ADDRS; + rb_windows = budget / NVM_MAX_VLBA; rl->rb_windows_pw = get_count_order(rb_windows); /* To start with, all buffer is available to user I/O writers */ diff --git a/drivers/lightnvm/pblk-sysfs.c b/drivers/lightnvm/pblk-sysfs.c index 9fc3dfa168b4..2d2818155aa8 100644 --- a/drivers/lightnvm/pblk-sysfs.c +++ b/drivers/lightnvm/pblk-sysfs.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2016 CNEX Labs * Initial release: Javier Gonzalez <javier@cnexlabs.com> @@ -262,8 +263,14 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page) sec_in_line = l_mg->data_line->sec_in_line; meta_weight = bitmap_weight(&l_mg->meta_bitmap, PBLK_DATA_LINES); - map_weight = bitmap_weight(l_mg->data_line->map_bitmap, + + spin_lock(&l_mg->data_line->lock); + if (l_mg->data_line->map_bitmap) + map_weight = bitmap_weight(l_mg->data_line->map_bitmap, lm->sec_per_line); + else + map_weight = 0; + spin_unlock(&l_mg->data_line->lock); } spin_unlock(&l_mg->free_lock); @@ -337,7 +344,6 @@ static ssize_t pblk_get_write_amp(u64 user, u64 gc, u64 pad, { int sz; - sz = snprintf(page, PAGE_SIZE, "user:%lld gc:%lld pad:%lld WA:", user, gc, pad); @@ -349,7 +355,7 @@ static ssize_t pblk_get_write_amp(u64 user, u64 gc, u64 pad, u32 wa_frac; wa_int = (user + gc + pad) * 100000; - wa_int = div_u64(wa_int, user); + wa_int = div64_u64(wa_int, user); wa_int = div_u64_rem(wa_int, 100000, &wa_frac); sz += snprintf(page + sz, PAGE_SIZE - sz, "%llu.%05u\n", diff --git a/drivers/lightnvm/pblk-trace.h b/drivers/lightnvm/pblk-trace.h new file mode 100644 index 000000000000..679e5c458ca6 --- /dev/null +++ b/drivers/lightnvm/pblk-trace.h @@ -0,0 +1,145 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM pblk + +#if !defined(_TRACE_PBLK_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_PBLK_H + +#include <linux/tracepoint.h> + +struct ppa_addr; + +#define show_chunk_flags(state) __print_flags(state, "", \ + { NVM_CHK_ST_FREE, "FREE", }, \ + { NVM_CHK_ST_CLOSED, "CLOSED", }, \ + { NVM_CHK_ST_OPEN, "OPEN", }, \ + { NVM_CHK_ST_OFFLINE, "OFFLINE", }) + +#define show_line_state(state) __print_symbolic(state, \ + { PBLK_LINESTATE_NEW, "NEW", }, \ + { PBLK_LINESTATE_FREE, "FREE", }, \ + { PBLK_LINESTATE_OPEN, "OPEN", }, \ + { PBLK_LINESTATE_CLOSED, "CLOSED", }, \ + { PBLK_LINESTATE_GC, "GC", }, \ + { PBLK_LINESTATE_BAD, "BAD", }, \ + { PBLK_LINESTATE_CORRUPT, "CORRUPT" }) + + +#define show_pblk_state(state) __print_symbolic(state, \ + { PBLK_STATE_RUNNING, "RUNNING", }, \ + { PBLK_STATE_STOPPING, "STOPPING", }, \ + { PBLK_STATE_RECOVERING, "RECOVERING", }, \ + { PBLK_STATE_STOPPED, "STOPPED" }) + +#define show_chunk_erase_state(state) __print_symbolic(state, \ + { PBLK_CHUNK_RESET_START, "START", }, \ + { PBLK_CHUNK_RESET_DONE, "OK", }, \ + { PBLK_CHUNK_RESET_FAILED, "FAILED" }) + + +TRACE_EVENT(pblk_chunk_reset, + + TP_PROTO(const char *name, struct ppa_addr *ppa, int state), + + TP_ARGS(name, ppa, state), + + TP_STRUCT__entry( + __string(name, name) + __field(u64, ppa) + __field(int, state); + ), + + TP_fast_assign( + __assign_str(name, name); + __entry->ppa = ppa->ppa; + __entry->state = state; + ), + + TP_printk("dev=%s grp=%llu pu=%llu chk=%llu state=%s", __get_str(name), + (u64)(((struct ppa_addr *)(&__entry->ppa))->m.grp), + (u64)(((struct ppa_addr *)(&__entry->ppa))->m.pu), + (u64)(((struct ppa_addr *)(&__entry->ppa))->m.chk), + show_chunk_erase_state((int)__entry->state)) + +); + +TRACE_EVENT(pblk_chunk_state, + + TP_PROTO(const char *name, struct ppa_addr *ppa, int state), + + TP_ARGS(name, ppa, state), + + TP_STRUCT__entry( + __string(name, name) + __field(u64, ppa) + __field(int, state); + ), + + TP_fast_assign( + __assign_str(name, name); + __entry->ppa = ppa->ppa; + __entry->state = state; + ), + + TP_printk("dev=%s grp=%llu pu=%llu chk=%llu state=%s", __get_str(name), + (u64)(((struct ppa_addr *)(&__entry->ppa))->m.grp), + (u64)(((struct ppa_addr *)(&__entry->ppa))->m.pu), + (u64)(((struct ppa_addr *)(&__entry->ppa))->m.chk), + show_chunk_flags((int)__entry->state)) + +); + +TRACE_EVENT(pblk_line_state, + + TP_PROTO(const char *name, int line, int state), + + TP_ARGS(name, line, state), + + TP_STRUCT__entry( + __string(name, name) + __field(int, line) + __field(int, state); + ), + + TP_fast_assign( + __assign_str(name, name); + __entry->line = line; + __entry->state = state; + ), + + TP_printk("dev=%s line=%d state=%s", __get_str(name), + (int)__entry->line, + show_line_state((int)__entry->state)) + +); + +TRACE_EVENT(pblk_state, + + TP_PROTO(const char *name, int state), + + TP_ARGS(name, state), + + TP_STRUCT__entry( + __string(name, name) + __field(int, state); + ), + + TP_fast_assign( + __assign_str(name, name); + __entry->state = state; + ), + + TP_printk("dev=%s state=%s", __get_str(name), + show_pblk_state((int)__entry->state)) + +); + +#endif /* !defined(_TRACE_PBLK_H) || defined(TRACE_HEADER_MULTI_READ) */ + +/* This part must be outside protection */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH ../../../drivers/lightnvm +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE pblk-trace +#include <trace/define_trace.h> diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c index ee774a86cf1e..fa8726493b39 100644 --- a/drivers/lightnvm/pblk-write.c +++ b/drivers/lightnvm/pblk-write.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2016 CNEX Labs * Initial release: Javier Gonzalez <javier@cnexlabs.com> @@ -16,6 +17,7 @@ */ #include "pblk.h" +#include "pblk-trace.h" static unsigned long pblk_end_w_bio(struct pblk *pblk, struct nvm_rq *rqd, struct pblk_c_ctx *c_ctx) @@ -81,8 +83,7 @@ static void pblk_complete_write(struct pblk *pblk, struct nvm_rq *rqd, #ifdef CONFIG_NVM_PBLK_DEBUG atomic_long_sub(c_ctx->nr_valid, &pblk->inflight_writes); #endif - - pblk_up_rq(pblk, rqd->ppa_list, rqd->nr_ppas, c_ctx->lun_bitmap); + pblk_up_rq(pblk, c_ctx->lun_bitmap); pos = pblk_rb_sync_init(&pblk->rwb, &flags); if (pos == c_ctx->sentry) { @@ -106,14 +107,12 @@ retry: /* Map remaining sectors in chunk, starting from ppa */ static void pblk_map_remaining(struct pblk *pblk, struct ppa_addr *ppa) { - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; struct pblk_line *line; struct ppa_addr map_ppa = *ppa; u64 paddr; int done = 0; - line = &pblk->lines[pblk_ppa_to_line(*ppa)]; + line = pblk_ppa_to_line(pblk, *ppa); spin_lock(&line->lock); while (!done) { @@ -125,15 +124,7 @@ static void pblk_map_remaining(struct pblk *pblk, struct ppa_addr *ppa) if (!test_and_set_bit(paddr, line->invalid_bitmap)) le32_add_cpu(line->vsc, -1); - if (geo->version == NVM_OCSSD_SPEC_12) { - map_ppa.ppa++; - if (map_ppa.g.pg == geo->num_pg) - done = 1; - } else { - map_ppa.m.sec++; - if (map_ppa.m.sec == geo->clba) - done = 1; - } + done = nvm_next_ppa_in_chk(pblk->dev, &map_ppa); } line->w_err_gc->has_write_err = 1; @@ -149,12 +140,11 @@ static void pblk_prepare_resubmit(struct pblk *pblk, unsigned int sentry, struct pblk_w_ctx *w_ctx; struct ppa_addr ppa_l2p; int flags; - unsigned int pos, i; + unsigned int i; spin_lock(&pblk->trans_lock); - pos = sentry; for (i = 0; i < nr_entries; i++) { - entry = &rb->entries[pos]; + entry = &rb->entries[pblk_rb_ptr_wrap(rb, sentry, i)]; w_ctx = &entry->w_ctx; /* Check if the lba has been overwritten */ @@ -168,13 +158,11 @@ static void pblk_prepare_resubmit(struct pblk *pblk, unsigned int sentry, /* Release flags on write context. Protect from writes */ smp_store_release(&w_ctx->flags, flags); - /* Decrese the reference count to the line as we will + /* Decrease the reference count to the line as we will * re-map these entries */ - line = &pblk->lines[pblk_ppa_to_line(w_ctx->ppa)]; + line = pblk_ppa_to_line(pblk, w_ctx->ppa); kref_put(&line->ref, pblk_line_put); - - pos = (pos + 1) & (rb->nr_entries - 1); } spin_unlock(&pblk->trans_lock); } @@ -208,19 +196,14 @@ static void pblk_submit_rec(struct work_struct *work) struct pblk *pblk = recovery->pblk; struct nvm_rq *rqd = recovery->rqd; struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd); - struct ppa_addr *ppa_list; + struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd); pblk_log_write_err(pblk, rqd); - if (rqd->nr_ppas == 1) - ppa_list = &rqd->ppa_addr; - else - ppa_list = rqd->ppa_list; - pblk_map_remaining(pblk, ppa_list); pblk_queue_resubmit(pblk, c_ctx); - pblk_up_rq(pblk, rqd->ppa_list, rqd->nr_ppas, c_ctx->lun_bitmap); + pblk_up_rq(pblk, c_ctx->lun_bitmap); if (c_ctx->nr_padded) pblk_bio_free_pages(pblk, rqd->bio, c_ctx->nr_valid, c_ctx->nr_padded); @@ -257,11 +240,13 @@ static void pblk_end_io_write(struct nvm_rq *rqd) if (rqd->error) { pblk_end_w_fail(pblk, rqd); return; - } + } else { + if (trace_pblk_chunk_state_enabled()) + pblk_check_chunk_state_update(pblk, rqd); #ifdef CONFIG_NVM_PBLK_DEBUG - else WARN_ONCE(rqd->bio->bi_status, "pblk: corrupted write error\n"); #endif + } pblk_complete_write(pblk, rqd, c_ctx); atomic_dec(&pblk->inflight_io); @@ -273,14 +258,18 @@ static void pblk_end_io_write_meta(struct nvm_rq *rqd) struct pblk_g_ctx *m_ctx = nvm_rq_to_pdu(rqd); struct pblk_line *line = m_ctx->private; struct pblk_emeta *emeta = line->emeta; + struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd); int sync; - pblk_up_page(pblk, rqd->ppa_list, rqd->nr_ppas); + pblk_up_chunk(pblk, ppa_list[0]); if (rqd->error) { pblk_log_write_err(pblk, rqd); pblk_err(pblk, "metadata I/O failed. Line %d\n", line->id); line->w_err_gc->has_write_err = 1; + } else { + if (trace_pblk_chunk_state_enabled()) + pblk_check_chunk_state_update(pblk, rqd); } sync = atomic_add_return(rqd->nr_ppas, &emeta->sync); @@ -294,27 +283,16 @@ static void pblk_end_io_write_meta(struct nvm_rq *rqd) } static int pblk_alloc_w_rq(struct pblk *pblk, struct nvm_rq *rqd, - unsigned int nr_secs, - nvm_end_io_fn(*end_io)) + unsigned int nr_secs, nvm_end_io_fn(*end_io)) { - struct nvm_tgt_dev *dev = pblk->dev; - /* Setup write request */ rqd->opcode = NVM_OP_PWRITE; rqd->nr_ppas = nr_secs; - rqd->flags = pblk_set_progr_mode(pblk, PBLK_WRITE); + rqd->is_seq = 1; rqd->private = pblk; rqd->end_io = end_io; - rqd->meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, - &rqd->dma_meta_list); - if (!rqd->meta_list) - return -ENOMEM; - - rqd->ppa_list = rqd->meta_list + pblk_dma_meta_size; - rqd->dma_ppa_list = rqd->dma_meta_list + pblk_dma_meta_size; - - return 0; + return pblk_alloc_rqd_meta(pblk, rqd); } static int pblk_setup_w_rq(struct pblk *pblk, struct nvm_rq *rqd, @@ -375,6 +353,7 @@ int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line) struct pblk_line_mgmt *l_mg = &pblk->l_mg; struct pblk_line_meta *lm = &pblk->lm; struct pblk_emeta *emeta = meta_line->emeta; + struct ppa_addr *ppa_list; struct pblk_g_ctx *m_ctx; struct bio *bio; struct nvm_rq *rqd; @@ -409,22 +388,22 @@ int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line) if (ret) goto fail_free_bio; + ppa_list = nvm_rq_to_ppa_list(rqd); for (i = 0; i < rqd->nr_ppas; ) { spin_lock(&meta_line->lock); paddr = __pblk_alloc_page(pblk, meta_line, rq_ppas); spin_unlock(&meta_line->lock); for (j = 0; j < rq_ppas; j++, i++, paddr++) - rqd->ppa_list[i] = addr_to_gen_ppa(pblk, paddr, id); + ppa_list[i] = addr_to_gen_ppa(pblk, paddr, id); } + spin_lock(&l_mg->close_lock); emeta->mem += rq_len; - if (emeta->mem >= lm->emeta_len[0]) { - spin_lock(&l_mg->close_lock); + if (emeta->mem >= lm->emeta_len[0]) list_del(&meta_line->list); - spin_unlock(&l_mg->close_lock); - } + spin_unlock(&l_mg->close_lock); - pblk_down_page(pblk, rqd->ppa_list, rqd->nr_ppas); + pblk_down_chunk(pblk, ppa_list[0]); ret = pblk_submit_io(pblk, rqd); if (ret) { @@ -435,7 +414,7 @@ int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line) return NVM_IO_OK; fail_rollback: - pblk_up_page(pblk, rqd->ppa_list, rqd->nr_ppas); + pblk_up_chunk(pblk, ppa_list[0]); spin_lock(&l_mg->close_lock); pblk_dealloc_page(pblk, meta_line, rq_ppas); list_add(&meta_line->list, &meta_line->list); @@ -491,14 +470,15 @@ static struct pblk_line *pblk_should_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line; spin_lock(&l_mg->close_lock); -retry: if (list_empty(&l_mg->emeta_list)) { spin_unlock(&l_mg->close_lock); return NULL; } meta_line = list_first_entry(&l_mg->emeta_list, struct pblk_line, list); - if (meta_line->emeta->mem >= lm->emeta_len[0]) - goto retry; + if (meta_line->emeta->mem >= lm->emeta_len[0]) { + spin_unlock(&l_mg->close_lock); + return NULL; + } spin_unlock(&l_mg->close_lock); if (!pblk_valid_meta_ppa(pblk, meta_line, data_rqd)) diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h index 4760af7b6499..02bb2e98f8a9 100644 --- a/drivers/lightnvm/pblk.h +++ b/drivers/lightnvm/pblk.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2015 IT University of Copenhagen (rrpc.h) * Copyright (C) 2016 CNEX Labs @@ -37,8 +38,6 @@ #define PBLK_SECTOR (512) #define PBLK_EXPOSED_PAGE_SIZE (4096) -#define PBLK_MAX_REQ_ADDRS (64) -#define PBLK_MAX_REQ_ADDRS_PW (6) #define PBLK_NR_CLOSE_JOBS (4) @@ -81,6 +80,12 @@ enum { PBLK_BLK_ST_CLOSED = 0x2, }; +enum { + PBLK_CHUNK_RESET_START, + PBLK_CHUNK_RESET_DONE, + PBLK_CHUNK_RESET_FAILED, +}; + struct pblk_sec_meta { u64 reserved; __le64 lba; @@ -99,8 +104,8 @@ enum { PBLK_RL_LOW = 4 }; -#define pblk_dma_meta_size (sizeof(struct pblk_sec_meta) * PBLK_MAX_REQ_ADDRS) -#define pblk_dma_ppa_size (sizeof(u64) * PBLK_MAX_REQ_ADDRS) +#define pblk_dma_meta_size (sizeof(struct pblk_sec_meta) * NVM_MAX_VLBA) +#define pblk_dma_ppa_size (sizeof(u64) * NVM_MAX_VLBA) /* write buffer completion context */ struct pblk_c_ctx { @@ -198,6 +203,11 @@ struct pblk_rb { * will be 4KB */ + unsigned int back_thres; /* Threshold that shall be maintained by + * the backpointer in order to respect + * geo->mw_cunits on a per chunk basis + */ + struct list_head pages; /* List of data pages */ spinlock_t w_lock; /* Write lock */ @@ -218,8 +228,8 @@ struct pblk_lun { struct pblk_gc_rq { struct pblk_line *line; void *data; - u64 paddr_list[PBLK_MAX_REQ_ADDRS]; - u64 lba_list[PBLK_MAX_REQ_ADDRS]; + u64 paddr_list[NVM_MAX_VLBA]; + u64 lba_list[NVM_MAX_VLBA]; int nr_secs; int secs_to_gc; struct list_head list; @@ -532,6 +542,10 @@ struct pblk_line_mgmt { struct pblk_emeta *eline_meta[PBLK_DATA_LINES]; unsigned long meta_bitmap; + /* Cache and mempool for map/invalid bitmaps */ + struct kmem_cache *bitmap_cache; + mempool_t *bitmap_pool; + /* Helpers for fast bitmap calculations */ unsigned long *bb_template; unsigned long *bb_aux; @@ -725,10 +739,8 @@ struct pblk_line_ws { /* * pblk ring buffer operations */ -int pblk_rb_init(struct pblk_rb *rb, struct pblk_rb_entry *rb_entry_base, - unsigned int power_size, unsigned int power_seg_sz); -unsigned int pblk_rb_calculate_size(unsigned int nr_entries); -void *pblk_rb_entries_ref(struct pblk_rb *rb); +int pblk_rb_init(struct pblk_rb *rb, unsigned int size, unsigned int threshold, + unsigned int seg_sz); int pblk_rb_may_write_user(struct pblk_rb *rb, struct bio *bio, unsigned int nr_entries, unsigned int *pos); int pblk_rb_may_write_gc(struct pblk_rb *rb, unsigned int nr_entries, @@ -751,8 +763,8 @@ unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int entries); unsigned int pblk_rb_sync_init(struct pblk_rb *rb, unsigned long *flags); unsigned int pblk_rb_sync_advance(struct pblk_rb *rb, unsigned int nr_entries); -struct pblk_rb_entry *pblk_rb_sync_scan_entry(struct pblk_rb *rb, - struct ppa_addr *ppa); +unsigned int pblk_rb_ptr_wrap(struct pblk_rb *rb, unsigned int p, + unsigned int nr_entries); void pblk_rb_sync_end(struct pblk_rb *rb, unsigned long *flags); unsigned int pblk_rb_flush_point_count(struct pblk_rb *rb); @@ -762,7 +774,7 @@ unsigned int pblk_rb_wrap_pos(struct pblk_rb *rb, unsigned int pos); int pblk_rb_tear_down_check(struct pblk_rb *rb); int pblk_rb_pos_oob(struct pblk_rb *rb, u64 pos); -void pblk_rb_data_free(struct pblk_rb *rb); +void pblk_rb_free(struct pblk_rb *rb); ssize_t pblk_rb_sysfs(struct pblk_rb *rb, char *buf); /* @@ -770,11 +782,13 @@ ssize_t pblk_rb_sysfs(struct pblk_rb *rb, char *buf); */ struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int type); void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int type); +int pblk_alloc_rqd_meta(struct pblk *pblk, struct nvm_rq *rqd); +void pblk_free_rqd_meta(struct pblk *pblk, struct nvm_rq *rqd); void pblk_set_sec_per_write(struct pblk *pblk, int sec_per_write); int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd, struct pblk_c_ctx *c_ctx); void pblk_discard(struct pblk *pblk, struct bio *bio); -struct nvm_chk_meta *pblk_chunk_get_info(struct pblk *pblk); +struct nvm_chk_meta *pblk_get_chunk_meta(struct pblk *pblk); struct nvm_chk_meta *pblk_chunk_get_off(struct pblk *pblk, struct nvm_chk_meta *lp, struct ppa_addr ppa); @@ -782,13 +796,17 @@ void pblk_log_write_err(struct pblk *pblk, struct nvm_rq *rqd); void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd); int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd); int pblk_submit_io_sync(struct pblk *pblk, struct nvm_rq *rqd); +int pblk_submit_io_sync_sem(struct pblk *pblk, struct nvm_rq *rqd); int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line); +void pblk_check_chunk_state_update(struct pblk *pblk, struct nvm_rq *rqd); struct bio *pblk_bio_map_addr(struct pblk *pblk, void *data, unsigned int nr_secs, unsigned int len, int alloc_type, gfp_t gfp_mask); struct pblk_line *pblk_line_get(struct pblk *pblk); struct pblk_line *pblk_line_get_first_data(struct pblk *pblk); struct pblk_line *pblk_line_replace_data(struct pblk *pblk); +void pblk_ppa_to_line_put(struct pblk *pblk, struct ppa_addr ppa); +void pblk_rq_to_line_put(struct pblk *pblk, struct nvm_rq *rqd); int pblk_line_recov_alloc(struct pblk *pblk, struct pblk_line *line); void pblk_line_recov_close(struct pblk *pblk, struct pblk_line *line); struct pblk_line *pblk_line_get_data(struct pblk *pblk); @@ -806,8 +824,8 @@ void pblk_gen_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv, void (*work)(struct work_struct *), gfp_t gfp_mask, struct workqueue_struct *wq); u64 pblk_line_smeta_start(struct pblk *pblk, struct pblk_line *line); -int pblk_line_read_smeta(struct pblk *pblk, struct pblk_line *line); -int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line, +int pblk_line_smeta_read(struct pblk *pblk, struct pblk_line *line); +int pblk_line_emeta_read(struct pblk *pblk, struct pblk_line *line, void *emeta_buf); int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr erase_ppa); void pblk_line_put(struct kref *ref); @@ -819,12 +837,11 @@ u64 pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs); u64 __pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs); int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail, unsigned long secs_to_flush); -void pblk_up_page(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas); -void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas, +void pblk_down_rq(struct pblk *pblk, struct ppa_addr ppa, unsigned long *lun_bitmap); -void pblk_down_page(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas); -void pblk_up_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas, - unsigned long *lun_bitmap); +void pblk_down_chunk(struct pblk *pblk, struct ppa_addr ppa); +void pblk_up_chunk(struct pblk *pblk, struct ppa_addr ppa); +void pblk_up_rq(struct pblk *pblk, unsigned long *lun_bitmap); int pblk_bio_add_pages(struct pblk *pblk, struct bio *bio, gfp_t flags, int nr_pages); void pblk_bio_free_pages(struct pblk *pblk, struct bio *bio, int off, @@ -976,17 +993,15 @@ static inline int pblk_line_vsc(struct pblk_line *line) return le32_to_cpu(*line->vsc); } -static inline int pblk_pad_distance(struct pblk *pblk) +static inline int pblk_ppa_to_line_id(struct ppa_addr p) { - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - - return geo->mw_cunits * geo->all_luns * geo->ws_opt; + return p.a.blk; } -static inline int pblk_ppa_to_line(struct ppa_addr p) +static inline struct pblk_line *pblk_ppa_to_line(struct pblk *pblk, + struct ppa_addr p) { - return p.a.blk; + return &pblk->lines[pblk_ppa_to_line_id(p)]; } static inline int pblk_ppa_to_pos(struct nvm_geo *geo, struct ppa_addr p) @@ -1034,6 +1049,25 @@ static inline struct ppa_addr addr_to_gen_ppa(struct pblk *pblk, u64 paddr, return ppa; } +static inline struct nvm_chk_meta *pblk_dev_ppa_to_chunk(struct pblk *pblk, + struct ppa_addr p) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct pblk_line *line = pblk_ppa_to_line(pblk, p); + int pos = pblk_ppa_to_pos(geo, p); + + return &line->chks[pos]; +} + +static inline u64 pblk_dev_ppa_to_chunk_addr(struct pblk *pblk, + struct ppa_addr p) +{ + struct nvm_tgt_dev *dev = pblk->dev; + + return dev_to_chunk_addr(dev->parent, &pblk->addrf, p); +} + static inline u64 pblk_dev_ppa_to_line_addr(struct pblk *pblk, struct ppa_addr p) { @@ -1067,86 +1101,16 @@ static inline u64 pblk_dev_ppa_to_line_addr(struct pblk *pblk, static inline struct ppa_addr pblk_ppa32_to_ppa64(struct pblk *pblk, u32 ppa32) { - struct ppa_addr ppa64; - - ppa64.ppa = 0; - - if (ppa32 == -1) { - ppa64.ppa = ADDR_EMPTY; - } else if (ppa32 & (1U << 31)) { - ppa64.c.line = ppa32 & ((~0U) >> 1); - ppa64.c.is_cached = 1; - } else { - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - - if (geo->version == NVM_OCSSD_SPEC_12) { - struct nvm_addrf_12 *ppaf = - (struct nvm_addrf_12 *)&pblk->addrf; - - ppa64.g.ch = (ppa32 & ppaf->ch_mask) >> - ppaf->ch_offset; - ppa64.g.lun = (ppa32 & ppaf->lun_mask) >> - ppaf->lun_offset; - ppa64.g.blk = (ppa32 & ppaf->blk_mask) >> - ppaf->blk_offset; - ppa64.g.pg = (ppa32 & ppaf->pg_mask) >> - ppaf->pg_offset; - ppa64.g.pl = (ppa32 & ppaf->pln_mask) >> - ppaf->pln_offset; - ppa64.g.sec = (ppa32 & ppaf->sec_mask) >> - ppaf->sec_offset; - } else { - struct nvm_addrf *lbaf = &pblk->addrf; - - ppa64.m.grp = (ppa32 & lbaf->ch_mask) >> - lbaf->ch_offset; - ppa64.m.pu = (ppa32 & lbaf->lun_mask) >> - lbaf->lun_offset; - ppa64.m.chk = (ppa32 & lbaf->chk_mask) >> - lbaf->chk_offset; - ppa64.m.sec = (ppa32 & lbaf->sec_mask) >> - lbaf->sec_offset; - } - } + struct nvm_tgt_dev *dev = pblk->dev; - return ppa64; + return nvm_ppa32_to_ppa64(dev->parent, &pblk->addrf, ppa32); } static inline u32 pblk_ppa64_to_ppa32(struct pblk *pblk, struct ppa_addr ppa64) { - u32 ppa32 = 0; - - if (ppa64.ppa == ADDR_EMPTY) { - ppa32 = ~0U; - } else if (ppa64.c.is_cached) { - ppa32 |= ppa64.c.line; - ppa32 |= 1U << 31; - } else { - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - - if (geo->version == NVM_OCSSD_SPEC_12) { - struct nvm_addrf_12 *ppaf = - (struct nvm_addrf_12 *)&pblk->addrf; - - ppa32 |= ppa64.g.ch << ppaf->ch_offset; - ppa32 |= ppa64.g.lun << ppaf->lun_offset; - ppa32 |= ppa64.g.blk << ppaf->blk_offset; - ppa32 |= ppa64.g.pg << ppaf->pg_offset; - ppa32 |= ppa64.g.pl << ppaf->pln_offset; - ppa32 |= ppa64.g.sec << ppaf->sec_offset; - } else { - struct nvm_addrf *lbaf = &pblk->addrf; - - ppa32 |= ppa64.m.grp << lbaf->ch_offset; - ppa32 |= ppa64.m.pu << lbaf->lun_offset; - ppa32 |= ppa64.m.chk << lbaf->chk_offset; - ppa32 |= ppa64.m.sec << lbaf->sec_offset; - } - } + struct nvm_tgt_dev *dev = pblk->dev; - return ppa32; + return nvm_ppa64_to_ppa32(dev->parent, &pblk->addrf, ppa64); } static inline struct ppa_addr pblk_trans_map_get(struct pblk *pblk, @@ -1255,44 +1219,6 @@ static inline u32 pblk_calc_emeta_crc(struct pblk *pblk, return crc; } -static inline int pblk_set_progr_mode(struct pblk *pblk, int type) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - int flags; - - if (geo->version == NVM_OCSSD_SPEC_20) - return 0; - - flags = geo->pln_mode >> 1; - - if (type == PBLK_WRITE) - flags |= NVM_IO_SCRAMBLE_ENABLE; - - return flags; -} - -enum { - PBLK_READ_RANDOM = 0, - PBLK_READ_SEQUENTIAL = 1, -}; - -static inline int pblk_set_read_mode(struct pblk *pblk, int type) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - int flags; - - if (geo->version == NVM_OCSSD_SPEC_20) - return 0; - - flags = NVM_IO_SUSPEND | NVM_IO_SCRAMBLE_ENABLE; - if (type == PBLK_READ_SEQUENTIAL) - flags |= geo->pln_mode >> 1; - - return flags; -} - static inline int pblk_io_aligned(struct pblk *pblk, int nr_secs) { return !(nr_secs % pblk->min_write_pgs); @@ -1375,9 +1301,7 @@ static inline int pblk_boundary_ppa_checks(struct nvm_tgt_dev *tgt_dev, static inline int pblk_check_io(struct pblk *pblk, struct nvm_rq *rqd) { struct nvm_tgt_dev *dev = pblk->dev; - struct ppa_addr *ppa_list; - - ppa_list = (rqd->nr_ppas > 1) ? rqd->ppa_list : &rqd->ppa_addr; + struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd); if (pblk_boundary_ppa_checks(dev, ppa_list, rqd->nr_ppas)) { WARN_ON(1); @@ -1386,12 +1310,10 @@ static inline int pblk_check_io(struct pblk *pblk, struct nvm_rq *rqd) if (rqd->opcode == NVM_OP_PWRITE) { struct pblk_line *line; - struct ppa_addr ppa; int i; for (i = 0; i < rqd->nr_ppas; i++) { - ppa = ppa_list[i]; - line = &pblk->lines[pblk_ppa_to_line(ppa)]; + line = pblk_ppa_to_line(pblk, ppa_list[i]); spin_lock(&line->lock); if (line->state != PBLK_LINESTATE_OPEN) { @@ -1441,4 +1363,11 @@ static inline void pblk_setup_uuid(struct pblk *pblk) uuid_le_gen(&uuid); memcpy(pblk->instance_uuid, uuid.b, 16); } + +static inline char *pblk_disk_name(struct pblk *pblk) +{ + struct gendisk *disk = pblk->disk; + + return disk->disk_name; +} #endif /* PBLK_H_ */ diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c index 7a28232d868b..5002838ea476 100644 --- a/drivers/md/bcache/alloc.c +++ b/drivers/md/bcache/alloc.c @@ -484,7 +484,7 @@ int __bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve, int i; lockdep_assert_held(&c->bucket_lock); - BUG_ON(!n || n > c->caches_loaded || n > 8); + BUG_ON(!n || n > c->caches_loaded || n > MAX_CACHES_PER_SET); bkey_init(k); diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 954dad29e6e8..b61b83bbcfff 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -1004,7 +1004,7 @@ void bch_open_buckets_free(struct cache_set *c); int bch_cache_allocator_start(struct cache *ca); void bch_debug_exit(void); -void bch_debug_init(struct kobject *kobj); +void bch_debug_init(void); void bch_request_exit(void); int bch_request_init(void); diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index e7d4817681f2..3f4211b5cd33 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -2434,7 +2434,7 @@ static int refill_keybuf_fn(struct btree_op *op, struct btree *b, struct keybuf *buf = refill->buf; int ret = MAP_CONTINUE; - if (bkey_cmp(k, refill->end) >= 0) { + if (bkey_cmp(k, refill->end) > 0) { ret = MAP_DONE; goto out; } diff --git a/drivers/md/bcache/closure.h b/drivers/md/bcache/closure.h index eca0d496b686..c88cdc4ae4ec 100644 --- a/drivers/md/bcache/closure.h +++ b/drivers/md/bcache/closure.h @@ -345,7 +345,8 @@ do { \ } while (0) /** - * closure_return - finish execution of a closure, with destructor + * closure_return_with_destructor - finish execution of a closure, + * with destructor * * Works like closure_return(), except @destructor will be called when all * outstanding refs on @cl have been dropped; @destructor may be used to safely diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c index 06da66b2488a..8f448b9c96a1 100644 --- a/drivers/md/bcache/debug.c +++ b/drivers/md/bcache/debug.c @@ -253,7 +253,7 @@ void bch_debug_exit(void) debugfs_remove_recursive(bcache_debug); } -void __init bch_debug_init(struct kobject *kobj) +void __init bch_debug_init(void) { /* * it is unnecessary to check return value of diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c index c809724e6571..956004366699 100644 --- a/drivers/md/bcache/extents.c +++ b/drivers/md/bcache/extents.c @@ -553,7 +553,7 @@ static bool bch_extent_bad(struct btree_keys *bk, const struct bkey *k) for (i = 0; i < KEY_PTRS(k); i++) { stale = ptr_stale(b->c, k, i); - btree_bug_on(stale > 96, b, + btree_bug_on(stale > BUCKET_GC_GEN_MAX, b, "key too stale: %i, need_gc %u", stale, b->c->need_gc); diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 51be355a3309..3bf35914bb57 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -395,7 +395,7 @@ static bool check_should_bypass(struct cached_dev *dc, struct bio *bio) * unless the read-ahead request is for metadata (eg, for gfs2). */ if (bio->bi_opf & (REQ_RAHEAD|REQ_BACKGROUND) && - !(bio->bi_opf & REQ_META)) + !(bio->bi_opf & REQ_PRIO)) goto skip; if (bio->bi_iter.bi_sector & (c->sb.block_size - 1) || @@ -850,7 +850,7 @@ static void cached_dev_read_done_bh(struct closure *cl) bch_mark_cache_accounting(s->iop.c, s->d, !s->cache_missed, s->iop.bypass); - trace_bcache_read(s->orig_bio, !s->cache_miss, s->iop.bypass); + trace_bcache_read(s->orig_bio, !s->cache_missed, s->iop.bypass); if (s->iop.status) continue_at_nobarrier(cl, cached_dev_read_error, bcache_wq); @@ -877,7 +877,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s, } if (!(bio->bi_opf & REQ_RAHEAD) && - !(bio->bi_opf & REQ_META) && + !(bio->bi_opf & REQ_PRIO) && s->iop.c->gc_stats.in_use < CUTOFF_CACHE_READA) reada = min_t(sector_t, dc->readahead >> 9, get_capacity(bio->bi_disk) - bio_end_sector(bio)); @@ -1218,6 +1218,9 @@ static int cached_dev_ioctl(struct bcache_device *d, fmode_t mode, { struct cached_dev *dc = container_of(d, struct cached_dev, disk); + if (dc->io_disable) + return -EIO; + return __blkdev_driver_ioctl(dc->bdev, mode, cmd, arg); } diff --git a/drivers/md/bcache/request.h b/drivers/md/bcache/request.h index aa055cfeb099..721bf336ed1a 100644 --- a/drivers/md/bcache/request.h +++ b/drivers/md/bcache/request.h @@ -39,6 +39,6 @@ void bch_data_insert(struct closure *cl); void bch_cached_dev_request_init(struct cached_dev *dc); void bch_flash_dev_request_init(struct bcache_device *d); -extern struct kmem_cache *bch_search_cache, *bch_passthrough_cache; +extern struct kmem_cache *bch_search_cache; #endif /* _BCACHE_REQUEST_H_ */ diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 30ba9aeb5ee8..7bbd670a5a84 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -418,6 +418,7 @@ static int __uuid_write(struct cache_set *c) { BKEY_PADDED(key) k; struct closure cl; + struct cache *ca; closure_init_stack(&cl); lockdep_assert_held(&bch_register_lock); @@ -429,6 +430,10 @@ static int __uuid_write(struct cache_set *c) uuid_io(c, REQ_OP_WRITE, 0, &k.key, &cl); closure_sync(&cl); + /* Only one bucket used for uuid write */ + ca = PTR_CACHE(c, &k.key, 0); + atomic_long_add(ca->sb.bucket_size, &ca->meta_sectors_written); + bkey_copy(&c->uuid_bucket, &k.key); bkey_put(c, &k.key); return 0; @@ -643,10 +648,6 @@ static int ioctl_dev(struct block_device *b, fmode_t mode, unsigned int cmd, unsigned long arg) { struct bcache_device *d = b->bd_disk->private_data; - struct cached_dev *dc = container_of(d, struct cached_dev, disk); - - if (dc->io_disable) - return -EIO; return d->ioctl(d, mode, cmd, arg); } @@ -1008,6 +1009,7 @@ static void cached_dev_detach_finish(struct work_struct *w) bch_write_bdev_super(dc, &cl); closure_sync(&cl); + calc_cached_dev_sectors(dc->disk.c); bcache_device_detach(&dc->disk); list_move(&dc->list, &uncached_devices); @@ -1152,11 +1154,12 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, } if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) { - bch_sectors_dirty_init(&dc->disk); atomic_set(&dc->has_dirty, 1); bch_writeback_queue(dc); } + bch_sectors_dirty_init(&dc->disk); + bch_cached_dev_run(dc); bcache_device_link(&dc->disk, c, "bdev"); atomic_inc(&c->attached_dev_nr); @@ -2049,6 +2052,8 @@ static int cache_alloc(struct cache *ca) size_t free; size_t btree_buckets; struct bucket *b; + int ret = -ENOMEM; + const char *err = NULL; __module_get(THIS_MODULE); kobject_init(&ca->kobj, &bch_cache_ktype); @@ -2066,27 +2071,93 @@ static int cache_alloc(struct cache *ca) */ btree_buckets = ca->sb.njournal_buckets ?: 8; free = roundup_pow_of_two(ca->sb.nbuckets) >> 10; + if (!free) { + ret = -EPERM; + err = "ca->sb.nbuckets is too small"; + goto err_free; + } - if (!init_fifo(&ca->free[RESERVE_BTREE], btree_buckets, GFP_KERNEL) || - !init_fifo_exact(&ca->free[RESERVE_PRIO], prio_buckets(ca), GFP_KERNEL) || - !init_fifo(&ca->free[RESERVE_MOVINGGC], free, GFP_KERNEL) || - !init_fifo(&ca->free[RESERVE_NONE], free, GFP_KERNEL) || - !init_fifo(&ca->free_inc, free << 2, GFP_KERNEL) || - !init_heap(&ca->heap, free << 3, GFP_KERNEL) || - !(ca->buckets = vzalloc(array_size(sizeof(struct bucket), - ca->sb.nbuckets))) || - !(ca->prio_buckets = kzalloc(array3_size(sizeof(uint64_t), - prio_buckets(ca), 2), - GFP_KERNEL)) || - !(ca->disk_buckets = alloc_bucket_pages(GFP_KERNEL, ca))) - return -ENOMEM; + if (!init_fifo(&ca->free[RESERVE_BTREE], btree_buckets, + GFP_KERNEL)) { + err = "ca->free[RESERVE_BTREE] alloc failed"; + goto err_btree_alloc; + } + + if (!init_fifo_exact(&ca->free[RESERVE_PRIO], prio_buckets(ca), + GFP_KERNEL)) { + err = "ca->free[RESERVE_PRIO] alloc failed"; + goto err_prio_alloc; + } + + if (!init_fifo(&ca->free[RESERVE_MOVINGGC], free, GFP_KERNEL)) { + err = "ca->free[RESERVE_MOVINGGC] alloc failed"; + goto err_movinggc_alloc; + } + + if (!init_fifo(&ca->free[RESERVE_NONE], free, GFP_KERNEL)) { + err = "ca->free[RESERVE_NONE] alloc failed"; + goto err_none_alloc; + } + + if (!init_fifo(&ca->free_inc, free << 2, GFP_KERNEL)) { + err = "ca->free_inc alloc failed"; + goto err_free_inc_alloc; + } + + if (!init_heap(&ca->heap, free << 3, GFP_KERNEL)) { + err = "ca->heap alloc failed"; + goto err_heap_alloc; + } + + ca->buckets = vzalloc(array_size(sizeof(struct bucket), + ca->sb.nbuckets)); + if (!ca->buckets) { + err = "ca->buckets alloc failed"; + goto err_buckets_alloc; + } + + ca->prio_buckets = kzalloc(array3_size(sizeof(uint64_t), + prio_buckets(ca), 2), + GFP_KERNEL); + if (!ca->prio_buckets) { + err = "ca->prio_buckets alloc failed"; + goto err_prio_buckets_alloc; + } + + ca->disk_buckets = alloc_bucket_pages(GFP_KERNEL, ca); + if (!ca->disk_buckets) { + err = "ca->disk_buckets alloc failed"; + goto err_disk_buckets_alloc; + } ca->prio_last_buckets = ca->prio_buckets + prio_buckets(ca); for_each_bucket(b, ca) atomic_set(&b->pin, 0); - return 0; + +err_disk_buckets_alloc: + kfree(ca->prio_buckets); +err_prio_buckets_alloc: + vfree(ca->buckets); +err_buckets_alloc: + free_heap(&ca->heap); +err_heap_alloc: + free_fifo(&ca->free_inc); +err_free_inc_alloc: + free_fifo(&ca->free[RESERVE_NONE]); +err_none_alloc: + free_fifo(&ca->free[RESERVE_MOVINGGC]); +err_movinggc_alloc: + free_fifo(&ca->free[RESERVE_PRIO]); +err_prio_alloc: + free_fifo(&ca->free[RESERVE_BTREE]); +err_btree_alloc: +err_free: + module_put(THIS_MODULE); + if (err) + pr_notice("error %s: %s", ca->cache_dev_name, err); + return ret; } static int register_cache(struct cache_sb *sb, struct page *sb_page, @@ -2112,6 +2183,8 @@ static int register_cache(struct cache_sb *sb, struct page *sb_page, blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); if (ret == -ENOMEM) err = "cache_alloc(): -ENOMEM"; + else if (ret == -EPERM) + err = "cache_alloc(): cache device is too small"; else err = "cache_alloc(): unknown error"; goto err; @@ -2386,7 +2459,7 @@ static int __init bcache_init(void) sysfs_create_files(bcache_kobj, files)) goto err; - bch_debug_init(bcache_kobj); + bch_debug_init(); closure_debug_init(); return 0; diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index 150cf4f4cf74..26f035a0c5b9 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -285,6 +285,7 @@ STORE(__cached_dev) 1, WRITEBACK_RATE_UPDATE_SECS_MAX); d_strtoul(writeback_rate_i_term_inverse); d_strtoul_nonzero(writeback_rate_p_term_inverse); + d_strtoul_nonzero(writeback_rate_minimum); sysfs_strtoul_clamp(io_error_limit, dc->error_limit, 0, INT_MAX); @@ -412,6 +413,7 @@ static struct attribute *bch_cached_dev_files[] = { &sysfs_writeback_rate_update_seconds, &sysfs_writeback_rate_i_term_inverse, &sysfs_writeback_rate_p_term_inverse, + &sysfs_writeback_rate_minimum, &sysfs_writeback_rate_debug, &sysfs_errors, &sysfs_io_error_limit, diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index e13d991e9fb5..b29a8327eed1 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -3484,14 +3484,13 @@ static int __init dm_cache_init(void) int r; migration_cache = KMEM_CACHE(dm_cache_migration, 0); - if (!migration_cache) { - dm_unregister_target(&cache_target); + if (!migration_cache) return -ENOMEM; - } r = dm_register_target(&cache_target); if (r) { DMERR("cache target registration failed: %d", r); + kmem_cache_destroy(migration_cache); return r; } diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index 21d126a5078c..32aabe27b37c 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -467,7 +467,9 @@ static int flakey_iterate_devices(struct dm_target *ti, iterate_devices_callout_ static struct target_type flakey_target = { .name = "flakey", .version = {1, 5, 0}, +#ifdef CONFIG_BLK_DEV_ZONED .features = DM_TARGET_ZONED_HM, +#endif .module = THIS_MODULE, .ctr = flakey_ctr, .dtr = flakey_dtr, diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 89ccb64342de..e1fa6baf4e8e 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -3462,7 +3462,8 @@ try_smaller_buffer: r = -ENOMEM; goto bad; } - ic->recalc_tags = kvmalloc((RECALC_SECTORS >> ic->sb->log2_sectors_per_block) * ic->tag_size, GFP_KERNEL); + ic->recalc_tags = kvmalloc_array(RECALC_SECTORS >> ic->sb->log2_sectors_per_block, + ic->tag_size, GFP_KERNEL); if (!ic->recalc_tags) { ti->error = "Cannot allocate tags for recalculating"; r = -ENOMEM; diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index d10964d41fd7..2f7c44a006c4 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -102,6 +102,7 @@ static int linear_map(struct dm_target *ti, struct bio *bio) return DM_MAPIO_REMAPPED; } +#ifdef CONFIG_BLK_DEV_ZONED static int linear_end_io(struct dm_target *ti, struct bio *bio, blk_status_t *error) { @@ -112,6 +113,7 @@ static int linear_end_io(struct dm_target *ti, struct bio *bio, return DM_ENDIO_DONE; } +#endif static void linear_status(struct dm_target *ti, status_type_t type, unsigned status_flags, char *result, unsigned maxlen) @@ -208,12 +210,16 @@ static size_t linear_dax_copy_to_iter(struct dm_target *ti, pgoff_t pgoff, static struct target_type linear_target = { .name = "linear", .version = {1, 4, 0}, +#ifdef CONFIG_BLK_DEV_ZONED + .end_io = linear_end_io, .features = DM_TARGET_PASSES_INTEGRITY | DM_TARGET_ZONED_HM, +#else + .features = DM_TARGET_PASSES_INTEGRITY, +#endif .module = THIS_MODULE, .ctr = linear_ctr, .dtr = linear_dtr, .map = linear_map, - .end_io = linear_end_io, .status = linear_status, .prepare_ioctl = linear_prepare_ioctl, .iterate_devices = linear_iterate_devices, diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 20f7e4ef5342..45abb54037fc 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1155,12 +1155,14 @@ void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors) EXPORT_SYMBOL_GPL(dm_accept_partial_bio); /* - * The zone descriptors obtained with a zone report indicate - * zone positions within the target device. The zone descriptors - * must be remapped to match their position within the dm device. - * A target may call dm_remap_zone_report after completion of a - * REQ_OP_ZONE_REPORT bio to remap the zone descriptors obtained - * from the target device mapping to the dm device. + * The zone descriptors obtained with a zone report indicate zone positions + * within the target backing device, regardless of that device is a partition + * and regardless of the target mapping start sector on the device or partition. + * The zone descriptors start sector and write pointer position must be adjusted + * to match their relative position within the dm device. + * A target may call dm_remap_zone_report() after completion of a + * REQ_OP_ZONE_REPORT bio to remap the zone descriptors obtained from the + * backing device. */ void dm_remap_zone_report(struct dm_target *ti, struct bio *bio, sector_t start) { @@ -1171,6 +1173,7 @@ void dm_remap_zone_report(struct dm_target *ti, struct bio *bio, sector_t start) struct blk_zone *zone; unsigned int nr_rep = 0; unsigned int ofst; + sector_t part_offset; struct bio_vec bvec; struct bvec_iter iter; void *addr; @@ -1179,6 +1182,15 @@ void dm_remap_zone_report(struct dm_target *ti, struct bio *bio, sector_t start) return; /* + * bio sector was incremented by the request size on completion. Taking + * into account the original request sector, the target start offset on + * the backing device and the target mapping offset (ti->begin), the + * start sector of the backing device. The partition offset is always 0 + * if the target uses a whole device. + */ + part_offset = bio->bi_iter.bi_sector + ti->begin - (start + bio_end_sector(report_bio)); + + /* * Remap the start sector of the reported zones. For sequential zones, * also remap the write pointer position. */ @@ -1195,6 +1207,7 @@ void dm_remap_zone_report(struct dm_target *ti, struct bio *bio, sector_t start) /* Set zones start sector */ while (hdr->nr_zones && ofst < bvec.bv_len) { zone = addr + ofst; + zone->start -= part_offset; if (zone->start >= start + ti->len) { hdr->nr_zones = 0; break; @@ -1206,7 +1219,7 @@ void dm_remap_zone_report(struct dm_target *ti, struct bio *bio, sector_t start) else if (zone->cond == BLK_ZONE_COND_EMPTY) zone->wp = zone->start; else - zone->wp = zone->wp + ti->begin - start; + zone->wp = zone->wp + ti->begin - start - part_offset; } ofst += sizeof(struct blk_zone); hdr->nr_zones--; diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index ac1cffd2a09b..f3fb5bb8c82a 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -542,7 +542,7 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio) !discard_bio) continue; bio_chain(discard_bio, bio); - bio_clone_blkcg_association(discard_bio, bio); + bio_clone_blkg_association(discard_bio, bio); if (mddev->gendisk) trace_block_bio_remap(bdev_get_queue(rdev->bdev), discard_bio, disk_devt(mddev->gendisk), diff --git a/drivers/memstick/core/ms_block.c b/drivers/memstick/core/ms_block.c index 716fc8ed31d3..8a02f11076f9 100644 --- a/drivers/memstick/core/ms_block.c +++ b/drivers/memstick/core/ms_block.c @@ -2146,7 +2146,7 @@ static int msb_init_disk(struct memstick_dev *card) set_disk_ro(msb->disk, 1); msb_start(card); - device_add_disk(&card->dev, msb->disk); + device_add_disk(&card->dev, msb->disk, NULL); dbg("Disk added"); return 0; diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c index 5ee932631fae..0cd30dcb6801 100644 --- a/drivers/memstick/core/mspro_block.c +++ b/drivers/memstick/core/mspro_block.c @@ -1236,7 +1236,7 @@ static int mspro_block_init_disk(struct memstick_dev *card) set_capacity(msb->disk, capacity); dev_dbg(&card->dev, "capacity set %ld\n", capacity); - device_add_disk(&card->dev, msb->disk); + device_add_disk(&card->dev, msb->disk, NULL); msb->active = 1; return 0; diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index a0b9102c4c6e..c35b5b08bb33 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -1371,6 +1371,16 @@ static void mmc_blk_data_prep(struct mmc_queue *mq, struct mmc_queue_req *mqrq, if (brq->data.blocks > 1) { /* + * Some SD cards in SPI mode return a CRC error or even lock up + * completely when trying to read the last block using a + * multiblock read command. + */ + if (mmc_host_is_spi(card->host) && (rq_data_dir(req) == READ) && + (blk_rq_pos(req) + blk_rq_sectors(req) == + get_capacity(md->disk))) + brq->data.blocks--; + + /* * After a read error, we redo the request one sector * at a time in order to accurately determine which * sectors can be read successfully. @@ -2698,7 +2708,7 @@ static int mmc_add_disk(struct mmc_blk_data *md) int ret; struct mmc_card *card = md->queue.card; - device_add_disk(md->parent, md->disk); + device_add_disk(md->parent, md->disk, NULL); md->force_ro.show = force_ro_show; md->force_ro.store = force_ro_store; sysfs_attr_init(&md->force_ro.attr); diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c index 29c0bfd74e8a..b0d44f9214b0 100644 --- a/drivers/mtd/mtd_blkdevs.c +++ b/drivers/mtd/mtd_blkdevs.c @@ -27,6 +27,7 @@ #include <linux/mtd/blktrans.h> #include <linux/mtd/mtd.h> #include <linux/blkdev.h> +#include <linux/blk-mq.h> #include <linux/blkpg.h> #include <linux/spinlock.h> #include <linux/hdreg.h> @@ -45,6 +46,8 @@ static void blktrans_dev_release(struct kref *kref) dev->disk->private_data = NULL; blk_cleanup_queue(dev->rq); + blk_mq_free_tag_set(dev->tag_set); + kfree(dev->tag_set); put_disk(dev->disk); list_del(&dev->list); kfree(dev); @@ -134,28 +137,39 @@ int mtd_blktrans_cease_background(struct mtd_blktrans_dev *dev) } EXPORT_SYMBOL_GPL(mtd_blktrans_cease_background); -static void mtd_blktrans_work(struct work_struct *work) +static struct request *mtd_next_request(struct mtd_blktrans_dev *dev) +{ + struct request *rq; + + rq = list_first_entry_or_null(&dev->rq_list, struct request, queuelist); + if (rq) { + list_del_init(&rq->queuelist); + blk_mq_start_request(rq); + return rq; + } + + return NULL; +} + +static void mtd_blktrans_work(struct mtd_blktrans_dev *dev) + __releases(&dev->queue_lock) + __acquires(&dev->queue_lock) { - struct mtd_blktrans_dev *dev = - container_of(work, struct mtd_blktrans_dev, work); struct mtd_blktrans_ops *tr = dev->tr; - struct request_queue *rq = dev->rq; struct request *req = NULL; int background_done = 0; - spin_lock_irq(rq->queue_lock); - while (1) { blk_status_t res; dev->bg_stop = false; - if (!req && !(req = blk_fetch_request(rq))) { + if (!req && !(req = mtd_next_request(dev))) { if (tr->background && !background_done) { - spin_unlock_irq(rq->queue_lock); + spin_unlock_irq(&dev->queue_lock); mutex_lock(&dev->lock); tr->background(dev); mutex_unlock(&dev->lock); - spin_lock_irq(rq->queue_lock); + spin_lock_irq(&dev->queue_lock); /* * Do background processing just once per idle * period. @@ -166,35 +180,39 @@ static void mtd_blktrans_work(struct work_struct *work) break; } - spin_unlock_irq(rq->queue_lock); + spin_unlock_irq(&dev->queue_lock); mutex_lock(&dev->lock); res = do_blktrans_request(dev->tr, dev, req); mutex_unlock(&dev->lock); - spin_lock_irq(rq->queue_lock); - - if (!__blk_end_request_cur(req, res)) + if (!blk_update_request(req, res, blk_rq_cur_bytes(req))) { + __blk_mq_end_request(req, res); req = NULL; + } background_done = 0; + spin_lock_irq(&dev->queue_lock); } - - spin_unlock_irq(rq->queue_lock); } -static void mtd_blktrans_request(struct request_queue *rq) +static blk_status_t mtd_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) { struct mtd_blktrans_dev *dev; - struct request *req = NULL; - dev = rq->queuedata; + dev = hctx->queue->queuedata; + if (!dev) { + blk_mq_start_request(bd->rq); + return BLK_STS_IOERR; + } + + spin_lock_irq(&dev->queue_lock); + list_add_tail(&bd->rq->queuelist, &dev->rq_list); + mtd_blktrans_work(dev); + spin_unlock_irq(&dev->queue_lock); - if (!dev) - while ((req = blk_fetch_request(rq)) != NULL) - __blk_end_request_all(req, BLK_STS_IOERR); - else - queue_work(dev->wq, &dev->work); + return BLK_STS_OK; } static int blktrans_open(struct block_device *bdev, fmode_t mode) @@ -329,6 +347,10 @@ static const struct block_device_operations mtd_block_ops = { .getgeo = blktrans_getgeo, }; +static const struct blk_mq_ops mtd_mq_ops = { + .queue_rq = mtd_queue_rq, +}; + int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new) { struct mtd_blktrans_ops *tr = new->tr; @@ -416,11 +438,20 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new) /* Create the request queue */ spin_lock_init(&new->queue_lock); - new->rq = blk_init_queue(mtd_blktrans_request, &new->queue_lock); + INIT_LIST_HEAD(&new->rq_list); - if (!new->rq) + new->tag_set = kzalloc(sizeof(*new->tag_set), GFP_KERNEL); + if (!new->tag_set) goto error3; + new->rq = blk_mq_init_sq_queue(new->tag_set, &mtd_mq_ops, 2, + BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING); + if (IS_ERR(new->rq)) { + ret = PTR_ERR(new->rq); + new->rq = NULL; + goto error4; + } + if (tr->flush) blk_queue_write_cache(new->rq, true, false); @@ -437,17 +468,10 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new) gd->queue = new->rq; - /* Create processing workqueue */ - new->wq = alloc_workqueue("%s%d", 0, 0, - tr->name, new->mtd->index); - if (!new->wq) - goto error4; - INIT_WORK(&new->work, mtd_blktrans_work); - if (new->readonly) set_disk_ro(gd, 1); - device_add_disk(&new->mtd->dev, gd); + device_add_disk(&new->mtd->dev, gd, NULL); if (new->disk_attributes) { ret = sysfs_create_group(&disk_to_dev(gd)->kobj, @@ -456,7 +480,7 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new) } return 0; error4: - blk_cleanup_queue(new->rq); + kfree(new->tag_set); error3: put_disk(new->disk); error2: @@ -481,15 +505,17 @@ int del_mtd_blktrans_dev(struct mtd_blktrans_dev *old) /* Stop new requests to arrive */ del_gendisk(old->disk); - /* Stop workqueue. This will perform any pending request. */ - destroy_workqueue(old->wq); - /* Kill current requests */ spin_lock_irqsave(&old->queue_lock, flags); old->rq->queuedata = NULL; - blk_start_queue(old->rq); spin_unlock_irqrestore(&old->queue_lock, flags); + /* freeze+quiesce queue to ensure all requests are flushed */ + blk_mq_freeze_queue(old->rq); + blk_mq_quiesce_queue(old->rq); + blk_mq_unquiesce_queue(old->rq); + blk_mq_unfreeze_queue(old->rq); + /* If the device is currently open, tell trans driver to close it, then put mtd device, and don't touch it again */ mutex_lock(&old->lock); diff --git a/drivers/mux/adgs1408.c b/drivers/mux/adgs1408.c index 0f7cf54e3234..89096f10f4c4 100644 --- a/drivers/mux/adgs1408.c +++ b/drivers/mux/adgs1408.c @@ -128,4 +128,4 @@ module_spi_driver(adgs1408_driver); MODULE_AUTHOR("Mircea Caprioru <mircea.caprioru@analog.com>"); MODULE_DESCRIPTION("Analog Devices ADGS1408 MUX driver"); -MODULE_LICENSE("GPL v2"); +MODULE_LICENSE("GPL"); diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index e0066adcd2f3..fc8b48adf38b 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -703,7 +703,6 @@ static int bcm_sf2_sw_suspend(struct dsa_switch *ds) static int bcm_sf2_sw_resume(struct dsa_switch *ds) { struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); - unsigned int port; int ret; ret = bcm_sf2_sw_rst(priv); @@ -715,14 +714,7 @@ static int bcm_sf2_sw_resume(struct dsa_switch *ds) if (priv->hw_params.num_gphy == 1) bcm_sf2_gphy_enable_set(ds, true); - for (port = 0; port < DSA_MAX_PORTS; port++) { - if (dsa_is_user_port(ds, port)) - bcm_sf2_port_setup(ds, port, NULL); - else if (dsa_is_cpu_port(ds, port)) - bcm_sf2_imp_setup(ds, port); - } - - bcm_sf2_enable_acb(ds); + ds->ops->setup(ds); return 0; } @@ -1173,10 +1165,10 @@ static int bcm_sf2_sw_remove(struct platform_device *pdev) { struct bcm_sf2_priv *priv = platform_get_drvdata(pdev); - /* Disable all ports and interrupts */ priv->wol_ports_mask = 0; - bcm_sf2_sw_suspend(priv->dev->ds); dsa_unregister_switch(priv->dev->ds); + /* Disable all ports and interrupts */ + bcm_sf2_sw_suspend(priv->dev->ds); bcm_sf2_mdio_unregister(priv); return 0; diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_com.c b/drivers/net/ethernet/amazon/ena/ena_eth_com.c index 1c682b76190f..2b3ff0c20155 100644 --- a/drivers/net/ethernet/amazon/ena/ena_eth_com.c +++ b/drivers/net/ethernet/amazon/ena/ena_eth_com.c @@ -245,11 +245,11 @@ static inline void ena_com_rx_set_flags(struct ena_com_rx_ctx *ena_rx_ctx, (cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_L4_PROTO_IDX_MASK) >> ENA_ETH_IO_RX_CDESC_BASE_L4_PROTO_IDX_SHIFT; ena_rx_ctx->l3_csum_err = - (cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM_ERR_MASK) >> - ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM_ERR_SHIFT; + !!((cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM_ERR_MASK) >> + ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM_ERR_SHIFT); ena_rx_ctx->l4_csum_err = - (cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_MASK) >> - ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_SHIFT; + !!((cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_MASK) >> + ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_SHIFT); ena_rx_ctx->hash = cdesc->hash; ena_rx_ctx->frag = (cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_IPV4_FRAG_MASK) >> diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 25621a218f20..d906293ce07d 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -1575,8 +1575,6 @@ static int ena_up_complete(struct ena_adapter *adapter) if (rc) return rc; - ena_init_napi(adapter); - ena_change_mtu(adapter->netdev, adapter->netdev->mtu); ena_refill_all_rx_bufs(adapter); @@ -1730,6 +1728,13 @@ static int ena_up(struct ena_adapter *adapter) ena_setup_io_intr(adapter); + /* napi poll functions should be initialized before running + * request_irq(), to handle a rare condition where there is a pending + * interrupt, causing the ISR to fire immediately while the poll + * function wasn't set yet, causing a null dereference + */ + ena_init_napi(adapter); + rc = ena_request_io_irq(adapter); if (rc) goto err_req_irq; @@ -2619,7 +2624,11 @@ err_disable_msix: ena_free_mgmnt_irq(adapter); ena_disable_msix(adapter); err_device_destroy: + ena_com_abort_admin_commands(ena_dev); + ena_com_wait_for_abort_completion(ena_dev); ena_com_admin_destroy(ena_dev); + ena_com_mmio_reg_read_request_destroy(ena_dev); + ena_com_dev_reset(ena_dev, ENA_REGS_RESET_DRIVER_INVALID_STATE); err: clear_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags); clear_bit(ENA_FLAG_ONGOING_RESET, &adapter->flags); @@ -3099,15 +3108,8 @@ err_rss_init: static void ena_release_bars(struct ena_com_dev *ena_dev, struct pci_dev *pdev) { - int release_bars; - - if (ena_dev->mem_bar) - devm_iounmap(&pdev->dev, ena_dev->mem_bar); - - if (ena_dev->reg_bar) - devm_iounmap(&pdev->dev, ena_dev->reg_bar); + int release_bars = pci_select_bars(pdev, IORESOURCE_MEM) & ENA_BAR_MASK; - release_bars = pci_select_bars(pdev, IORESOURCE_MEM) & ENA_BAR_MASK; pci_release_selected_regions(pdev, release_bars); } diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index 4241ae928d4a..34af5f1569c8 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -321,9 +321,12 @@ int bcmgenet_mii_probe(struct net_device *dev) phydev->advertising = phydev->supported; /* The internal PHY has its link interrupts routed to the - * Ethernet MAC ISRs + * Ethernet MAC ISRs. On GENETv5 there is a hardware issue + * that prevents the signaling of link UP interrupts when + * the link operates at 10Mbps, so fallback to polling for + * those versions of GENET. */ - if (priv->internal_phy) + if (priv->internal_phy && !GENET_IS_V5(priv)) dev->phydev->irq = PHY_IGNORE_INTERRUPT; return 0; diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h index 4778b663653e..bf80855dd0dd 100644 --- a/drivers/net/ethernet/freescale/fec.h +++ b/drivers/net/ethernet/freescale/fec.h @@ -452,6 +452,10 @@ struct bufdesc_ex { * initialisation. */ #define FEC_QUIRK_MIB_CLEAR (1 << 15) +/* Only i.MX25/i.MX27/i.MX28 controller supports FRBR,FRSR registers, + * those FIFO receive registers are resolved in other platforms. + */ +#define FEC_QUIRK_HAS_FRREG (1 << 16) struct bufdesc_prop { int qid; diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index bf9b9fd6d2a0..7b98bb75ba8a 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -91,14 +91,16 @@ static struct platform_device_id fec_devtype[] = { .driver_data = 0, }, { .name = "imx25-fec", - .driver_data = FEC_QUIRK_USE_GASKET | FEC_QUIRK_MIB_CLEAR, + .driver_data = FEC_QUIRK_USE_GASKET | FEC_QUIRK_MIB_CLEAR | + FEC_QUIRK_HAS_FRREG, }, { .name = "imx27-fec", - .driver_data = FEC_QUIRK_MIB_CLEAR, + .driver_data = FEC_QUIRK_MIB_CLEAR | FEC_QUIRK_HAS_FRREG, }, { .name = "imx28-fec", .driver_data = FEC_QUIRK_ENET_MAC | FEC_QUIRK_SWAP_FRAME | - FEC_QUIRK_SINGLE_MDIO | FEC_QUIRK_HAS_RACC, + FEC_QUIRK_SINGLE_MDIO | FEC_QUIRK_HAS_RACC | + FEC_QUIRK_HAS_FRREG, }, { .name = "imx6q-fec", .driver_data = FEC_QUIRK_ENET_MAC | FEC_QUIRK_HAS_GBIT | @@ -2164,7 +2166,13 @@ static void fec_enet_get_regs(struct net_device *ndev, memset(buf, 0, regs->len); for (i = 0; i < ARRAY_SIZE(fec_enet_register_offset); i++) { - off = fec_enet_register_offset[i] / 4; + off = fec_enet_register_offset[i]; + + if ((off == FEC_R_BOUND || off == FEC_R_FSTART) && + !(fep->quirks & FEC_QUIRK_HAS_FRREG)) + continue; + + off >>= 2; buf[off] = readl(&theregs[off]); } } diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index d2d59444f562..6a046030e873 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -260,47 +260,34 @@ static const struct devlink_param mlx4_devlink_params[] = { NULL, NULL, NULL), }; -static void mlx4_devlink_set_init_value(struct devlink *devlink, u32 param_id, - union devlink_param_value init_val) -{ - struct mlx4_priv *priv = devlink_priv(devlink); - struct mlx4_dev *dev = &priv->dev; - int err; - - err = devlink_param_driverinit_value_set(devlink, param_id, init_val); - if (err) - mlx4_warn(dev, - "devlink set parameter %u value failed (err = %d)", - param_id, err); -} - static void mlx4_devlink_set_params_init_values(struct devlink *devlink) { union devlink_param_value value; value.vbool = !!mlx4_internal_err_reset; - mlx4_devlink_set_init_value(devlink, - DEVLINK_PARAM_GENERIC_ID_INT_ERR_RESET, - value); + devlink_param_driverinit_value_set(devlink, + DEVLINK_PARAM_GENERIC_ID_INT_ERR_RESET, + value); value.vu32 = 1UL << log_num_mac; - mlx4_devlink_set_init_value(devlink, - DEVLINK_PARAM_GENERIC_ID_MAX_MACS, value); + devlink_param_driverinit_value_set(devlink, + DEVLINK_PARAM_GENERIC_ID_MAX_MACS, + value); value.vbool = enable_64b_cqe_eqe; - mlx4_devlink_set_init_value(devlink, - MLX4_DEVLINK_PARAM_ID_ENABLE_64B_CQE_EQE, - value); + devlink_param_driverinit_value_set(devlink, + MLX4_DEVLINK_PARAM_ID_ENABLE_64B_CQE_EQE, + value); value.vbool = enable_4k_uar; - mlx4_devlink_set_init_value(devlink, - MLX4_DEVLINK_PARAM_ID_ENABLE_4K_UAR, - value); + devlink_param_driverinit_value_set(devlink, + MLX4_DEVLINK_PARAM_ID_ENABLE_4K_UAR, + value); value.vbool = false; - mlx4_devlink_set_init_value(devlink, - DEVLINK_PARAM_GENERIC_ID_REGION_SNAPSHOT, - value); + devlink_param_driverinit_value_set(devlink, + DEVLINK_PARAM_GENERIC_ID_REGION_SNAPSHOT, + value); } static inline void mlx4_set_num_reserved_uars(struct mlx4_dev *dev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 15d8ae28c040..00172dee5339 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -432,10 +432,9 @@ static inline u16 mlx5e_icosq_wrap_cnt(struct mlx5e_icosq *sq) static inline void mlx5e_fill_icosq_frag_edge(struct mlx5e_icosq *sq, struct mlx5_wq_cyc *wq, - u16 pi, u16 frag_pi) + u16 pi, u16 nnops) { struct mlx5e_sq_wqe_info *edge_wi, *wi = &sq->db.ico_wqe[pi]; - u8 nnops = mlx5_wq_cyc_get_frag_size(wq) - frag_pi; edge_wi = wi + nnops; @@ -454,15 +453,14 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) struct mlx5_wq_cyc *wq = &sq->wq; struct mlx5e_umr_wqe *umr_wqe; u16 xlt_offset = ix << (MLX5E_LOG_ALIGNED_MPWQE_PPW - 1); - u16 pi, frag_pi; + u16 pi, contig_wqebbs_room; int err; int i; pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); - frag_pi = mlx5_wq_cyc_ctr2fragix(wq, sq->pc); - - if (unlikely(frag_pi + MLX5E_UMR_WQEBBS > mlx5_wq_cyc_get_frag_size(wq))) { - mlx5e_fill_icosq_frag_edge(sq, wq, pi, frag_pi); + contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi); + if (unlikely(contig_wqebbs_room < MLX5E_UMR_WQEBBS)) { + mlx5e_fill_icosq_frag_edge(sq, wq, pi, contig_wqebbs_room); pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index ae73ea992845..6dacaeba2fbf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -290,10 +290,9 @@ dma_unmap_wqe_err: static inline void mlx5e_fill_sq_frag_edge(struct mlx5e_txqsq *sq, struct mlx5_wq_cyc *wq, - u16 pi, u16 frag_pi) + u16 pi, u16 nnops) { struct mlx5e_tx_wqe_info *edge_wi, *wi = &sq->db.wqe_info[pi]; - u8 nnops = mlx5_wq_cyc_get_frag_size(wq) - frag_pi; edge_wi = wi + nnops; @@ -348,8 +347,8 @@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, struct mlx5e_tx_wqe_info *wi; struct mlx5e_sq_stats *stats = sq->stats; + u16 headlen, ihs, contig_wqebbs_room; u16 ds_cnt, ds_cnt_inl = 0; - u16 headlen, ihs, frag_pi; u8 num_wqebbs, opcode; u32 num_bytes; int num_dma; @@ -386,9 +385,9 @@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, } num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); - frag_pi = mlx5_wq_cyc_ctr2fragix(wq, sq->pc); - if (unlikely(frag_pi + num_wqebbs > mlx5_wq_cyc_get_frag_size(wq))) { - mlx5e_fill_sq_frag_edge(sq, wq, pi, frag_pi); + contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi); + if (unlikely(contig_wqebbs_room < num_wqebbs)) { + mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room); mlx5e_sq_fetch_wqe(sq, &wqe, &pi); } @@ -636,7 +635,7 @@ netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, struct mlx5e_tx_wqe_info *wi; struct mlx5e_sq_stats *stats = sq->stats; - u16 headlen, ihs, pi, frag_pi; + u16 headlen, ihs, pi, contig_wqebbs_room; u16 ds_cnt, ds_cnt_inl = 0; u8 num_wqebbs, opcode; u32 num_bytes; @@ -672,13 +671,14 @@ netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, } num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); - frag_pi = mlx5_wq_cyc_ctr2fragix(wq, sq->pc); - if (unlikely(frag_pi + num_wqebbs > mlx5_wq_cyc_get_frag_size(wq))) { + pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); + contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi); + if (unlikely(contig_wqebbs_room < num_wqebbs)) { + mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room); pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); - mlx5e_fill_sq_frag_edge(sq, wq, pi, frag_pi); } - mlx5i_sq_fetch_wqe(sq, &wqe, &pi); + mlx5i_sq_fetch_wqe(sq, &wqe, pi); /* fill wqe */ wi = &sq->db.wqe_info[pi]; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 48864f4988a4..c1e1a16a9b07 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -273,7 +273,7 @@ static void eq_pf_process(struct mlx5_eq *eq) case MLX5_PFAULT_SUBTYPE_WQE: /* WQE based event */ pfault->type = - be32_to_cpu(pf_eqe->wqe.pftype_wq) >> 24; + (be32_to_cpu(pf_eqe->wqe.pftype_wq) >> 24) & 0x7; pfault->token = be32_to_cpu(pf_eqe->wqe.token); pfault->wqe.wq_num = diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c index 5645a4facad2..b8ee9101c506 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c @@ -245,7 +245,7 @@ static void *mlx5_fpga_ipsec_cmd_exec(struct mlx5_core_dev *mdev, return ERR_PTR(res); } - /* Context will be freed by wait func after completion */ + /* Context should be freed by the caller after completion. */ return context; } @@ -418,10 +418,8 @@ static int mlx5_fpga_ipsec_set_caps(struct mlx5_core_dev *mdev, u32 flags) cmd.cmd = htonl(MLX5_FPGA_IPSEC_CMD_OP_SET_CAP); cmd.flags = htonl(flags); context = mlx5_fpga_ipsec_cmd_exec(mdev, &cmd, sizeof(cmd)); - if (IS_ERR(context)) { - err = PTR_ERR(context); - goto out; - } + if (IS_ERR(context)) + return PTR_ERR(context); err = mlx5_fpga_ipsec_cmd_wait(context); if (err) @@ -435,6 +433,7 @@ static int mlx5_fpga_ipsec_set_caps(struct mlx5_core_dev *mdev, u32 flags) } out: + kfree(context); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h index 08eac92fc26c..0982c579ec74 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h @@ -109,12 +109,11 @@ struct mlx5i_tx_wqe { static inline void mlx5i_sq_fetch_wqe(struct mlx5e_txqsq *sq, struct mlx5i_tx_wqe **wqe, - u16 *pi) + u16 pi) { struct mlx5_wq_cyc *wq = &sq->wq; - *pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); - *wqe = mlx5_wq_cyc_get_wqe(wq, *pi); + *wqe = mlx5_wq_cyc_get_wqe(wq, pi); memset(*wqe, 0, sizeof(**wqe)); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c index 68e7f8df2a6d..ddca327e8950 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c @@ -39,11 +39,6 @@ u32 mlx5_wq_cyc_get_size(struct mlx5_wq_cyc *wq) return (u32)wq->fbc.sz_m1 + 1; } -u16 mlx5_wq_cyc_get_frag_size(struct mlx5_wq_cyc *wq) -{ - return wq->fbc.frag_sz_m1 + 1; -} - u32 mlx5_cqwq_get_size(struct mlx5_cqwq *wq) { return wq->fbc.sz_m1 + 1; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.h b/drivers/net/ethernet/mellanox/mlx5/core/wq.h index 3a1a170bb2d7..b1293d153a58 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/wq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h @@ -80,7 +80,6 @@ int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, void *wqc, struct mlx5_wq_cyc *wq, struct mlx5_wq_ctrl *wq_ctrl); u32 mlx5_wq_cyc_get_size(struct mlx5_wq_cyc *wq); -u16 mlx5_wq_cyc_get_frag_size(struct mlx5_wq_cyc *wq); int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, void *qpc, struct mlx5_wq_qp *wq, @@ -140,11 +139,6 @@ static inline u16 mlx5_wq_cyc_ctr2ix(struct mlx5_wq_cyc *wq, u16 ctr) return ctr & wq->fbc.sz_m1; } -static inline u16 mlx5_wq_cyc_ctr2fragix(struct mlx5_wq_cyc *wq, u16 ctr) -{ - return ctr & wq->fbc.frag_sz_m1; -} - static inline u16 mlx5_wq_cyc_get_head(struct mlx5_wq_cyc *wq) { return mlx5_wq_cyc_ctr2ix(wq, wq->wqe_ctr); @@ -160,6 +154,11 @@ static inline void *mlx5_wq_cyc_get_wqe(struct mlx5_wq_cyc *wq, u16 ix) return mlx5_frag_buf_get_wqe(&wq->fbc, ix); } +static inline u16 mlx5_wq_cyc_get_contig_wqebbs(struct mlx5_wq_cyc *wq, u16 ix) +{ + return mlx5_frag_buf_get_idx_last_contig_stride(&wq->fbc, ix) - ix + 1; +} + static inline int mlx5_wq_cyc_cc_bigger(u16 cc1, u16 cc2) { int equal = (cc1 == cc2); diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index 81533d7f395c..937d0ace699a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -1055,6 +1055,7 @@ int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, err_driver_init: mlxsw_thermal_fini(mlxsw_core->thermal); err_thermal_init: + mlxsw_hwmon_fini(mlxsw_core->hwmon); err_hwmon_init: if (!reload) devlink_unregister(devlink); @@ -1088,6 +1089,7 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core, if (mlxsw_core->driver->fini) mlxsw_core->driver->fini(mlxsw_core); mlxsw_thermal_fini(mlxsw_core->thermal); + mlxsw_hwmon_fini(mlxsw_core->hwmon); if (!reload) devlink_unregister(devlink); mlxsw_emad_fini(mlxsw_core); diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index 655ddd204ab2..c35be477856f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -359,6 +359,10 @@ static inline int mlxsw_hwmon_init(struct mlxsw_core *mlxsw_core, return 0; } +static inline void mlxsw_hwmon_fini(struct mlxsw_hwmon *mlxsw_hwmon) +{ +} + #endif struct mlxsw_thermal; diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c index f6cf2896d337..e04e8162aa14 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c @@ -303,8 +303,7 @@ int mlxsw_hwmon_init(struct mlxsw_core *mlxsw_core, struct device *hwmon_dev; int err; - mlxsw_hwmon = devm_kzalloc(mlxsw_bus_info->dev, sizeof(*mlxsw_hwmon), - GFP_KERNEL); + mlxsw_hwmon = kzalloc(sizeof(*mlxsw_hwmon), GFP_KERNEL); if (!mlxsw_hwmon) return -ENOMEM; mlxsw_hwmon->core = mlxsw_core; @@ -321,10 +320,9 @@ int mlxsw_hwmon_init(struct mlxsw_core *mlxsw_core, mlxsw_hwmon->groups[0] = &mlxsw_hwmon->group; mlxsw_hwmon->group.attrs = mlxsw_hwmon->attrs; - hwmon_dev = devm_hwmon_device_register_with_groups(mlxsw_bus_info->dev, - "mlxsw", - mlxsw_hwmon, - mlxsw_hwmon->groups); + hwmon_dev = hwmon_device_register_with_groups(mlxsw_bus_info->dev, + "mlxsw", mlxsw_hwmon, + mlxsw_hwmon->groups); if (IS_ERR(hwmon_dev)) { err = PTR_ERR(hwmon_dev); goto err_hwmon_register; @@ -337,5 +335,12 @@ int mlxsw_hwmon_init(struct mlxsw_core *mlxsw_core, err_hwmon_register: err_fans_init: err_temp_init: + kfree(mlxsw_hwmon); return err; } + +void mlxsw_hwmon_fini(struct mlxsw_hwmon *mlxsw_hwmon) +{ + hwmon_device_unregister(mlxsw_hwmon->hwmon_dev); + kfree(mlxsw_hwmon); +} diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 1a4f2bb48ead..ed4e298cd823 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -133,9 +133,9 @@ static inline int ocelot_vlant_wait_for_completion(struct ocelot *ocelot) { unsigned int val, timeout = 10; - /* Wait for the issued mac table command to be completed, or timeout. - * When the command read from ANA_TABLES_MACACCESS is - * MACACCESS_CMD_IDLE, the issued command completed successfully. + /* Wait for the issued vlan table command to be completed, or timeout. + * When the command read from ANA_TABLES_VLANACCESS is + * VLANACCESS_CMD_IDLE, the issued command completed successfully. */ do { val = ocelot_read(ocelot, ANA_TABLES_VLANACCESS); diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c index 46ba0cf257c6..7a1e9cd9cc62 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/action.c +++ b/drivers/net/ethernet/netronome/nfp/flower/action.c @@ -429,12 +429,14 @@ nfp_fl_set_ip4(const struct tc_action *action, int idx, u32 off, switch (off) { case offsetof(struct iphdr, daddr): - set_ip_addr->ipv4_dst_mask = mask; - set_ip_addr->ipv4_dst = exact; + set_ip_addr->ipv4_dst_mask |= mask; + set_ip_addr->ipv4_dst &= ~mask; + set_ip_addr->ipv4_dst |= exact & mask; break; case offsetof(struct iphdr, saddr): - set_ip_addr->ipv4_src_mask = mask; - set_ip_addr->ipv4_src = exact; + set_ip_addr->ipv4_src_mask |= mask; + set_ip_addr->ipv4_src &= ~mask; + set_ip_addr->ipv4_src |= exact & mask; break; default: return -EOPNOTSUPP; @@ -448,11 +450,12 @@ nfp_fl_set_ip4(const struct tc_action *action, int idx, u32 off, } static void -nfp_fl_set_ip6_helper(int opcode_tag, int idx, __be32 exact, __be32 mask, +nfp_fl_set_ip6_helper(int opcode_tag, u8 word, __be32 exact, __be32 mask, struct nfp_fl_set_ipv6_addr *ip6) { - ip6->ipv6[idx % 4].mask = mask; - ip6->ipv6[idx % 4].exact = exact; + ip6->ipv6[word].mask |= mask; + ip6->ipv6[word].exact &= ~mask; + ip6->ipv6[word].exact |= exact & mask; ip6->reserved = cpu_to_be16(0); ip6->head.jump_id = opcode_tag; @@ -465,6 +468,7 @@ nfp_fl_set_ip6(const struct tc_action *action, int idx, u32 off, struct nfp_fl_set_ipv6_addr *ip_src) { __be32 exact, mask; + u8 word; /* We are expecting tcf_pedit to return a big endian value */ mask = (__force __be32)~tcf_pedit_mask(action, idx); @@ -473,17 +477,20 @@ nfp_fl_set_ip6(const struct tc_action *action, int idx, u32 off, if (exact & ~mask) return -EOPNOTSUPP; - if (off < offsetof(struct ipv6hdr, saddr)) + if (off < offsetof(struct ipv6hdr, saddr)) { return -EOPNOTSUPP; - else if (off < offsetof(struct ipv6hdr, daddr)) - nfp_fl_set_ip6_helper(NFP_FL_ACTION_OPCODE_SET_IPV6_SRC, idx, + } else if (off < offsetof(struct ipv6hdr, daddr)) { + word = (off - offsetof(struct ipv6hdr, saddr)) / sizeof(exact); + nfp_fl_set_ip6_helper(NFP_FL_ACTION_OPCODE_SET_IPV6_SRC, word, exact, mask, ip_src); - else if (off < offsetof(struct ipv6hdr, daddr) + - sizeof(struct in6_addr)) - nfp_fl_set_ip6_helper(NFP_FL_ACTION_OPCODE_SET_IPV6_DST, idx, + } else if (off < offsetof(struct ipv6hdr, daddr) + + sizeof(struct in6_addr)) { + word = (off - offsetof(struct ipv6hdr, daddr)) / sizeof(exact); + nfp_fl_set_ip6_helper(NFP_FL_ACTION_OPCODE_SET_IPV6_DST, word, exact, mask, ip_dst); - else + } else { return -EOPNOTSUPP; + } return 0; } @@ -541,7 +548,7 @@ nfp_fl_pedit(const struct tc_action *action, struct tc_cls_flower_offload *flow, struct nfp_fl_set_eth set_eth; enum pedit_header_type htype; int idx, nkeys, err; - size_t act_size; + size_t act_size = 0; u32 offset, cmd; u8 ip_proto = 0; @@ -599,7 +606,9 @@ nfp_fl_pedit(const struct tc_action *action, struct tc_cls_flower_offload *flow, act_size = sizeof(set_eth); memcpy(nfp_action, &set_eth, act_size); *a_len += act_size; - } else if (set_ip_addr.head.len_lw) { + } + if (set_ip_addr.head.len_lw) { + nfp_action += act_size; act_size = sizeof(set_ip_addr); memcpy(nfp_action, &set_ip_addr, act_size); *a_len += act_size; @@ -607,10 +616,12 @@ nfp_fl_pedit(const struct tc_action *action, struct tc_cls_flower_offload *flow, /* Hardware will automatically fix IPv4 and TCP/UDP checksum. */ *csum_updated |= TCA_CSUM_UPDATE_FLAG_IPV4HDR | nfp_fl_csum_l4_to_flag(ip_proto); - } else if (set_ip6_dst.head.len_lw && set_ip6_src.head.len_lw) { + } + if (set_ip6_dst.head.len_lw && set_ip6_src.head.len_lw) { /* TC compiles set src and dst IPv6 address as a single action, * the hardware requires this to be 2 separate actions. */ + nfp_action += act_size; act_size = sizeof(set_ip6_src); memcpy(nfp_action, &set_ip6_src, act_size); *a_len += act_size; @@ -623,6 +634,7 @@ nfp_fl_pedit(const struct tc_action *action, struct tc_cls_flower_offload *flow, /* Hardware will automatically fix TCP/UDP checksum. */ *csum_updated |= nfp_fl_csum_l4_to_flag(ip_proto); } else if (set_ip6_dst.head.len_lw) { + nfp_action += act_size; act_size = sizeof(set_ip6_dst); memcpy(nfp_action, &set_ip6_dst, act_size); *a_len += act_size; @@ -630,13 +642,16 @@ nfp_fl_pedit(const struct tc_action *action, struct tc_cls_flower_offload *flow, /* Hardware will automatically fix TCP/UDP checksum. */ *csum_updated |= nfp_fl_csum_l4_to_flag(ip_proto); } else if (set_ip6_src.head.len_lw) { + nfp_action += act_size; act_size = sizeof(set_ip6_src); memcpy(nfp_action, &set_ip6_src, act_size); *a_len += act_size; /* Hardware will automatically fix TCP/UDP checksum. */ *csum_updated |= nfp_fl_csum_l4_to_flag(ip_proto); - } else if (set_tport.head.len_lw) { + } + if (set_tport.head.len_lw) { + nfp_action += act_size; act_size = sizeof(set_tport); memcpy(nfp_action, &set_tport, act_size); *a_len += act_size; diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c b/drivers/net/ethernet/qlogic/qed/qed_int.c index af3a28ec04eb..0f0aba793352 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_int.c +++ b/drivers/net/ethernet/qlogic/qed/qed_int.c @@ -228,7 +228,7 @@ static int qed_grc_attn_cb(struct qed_hwfn *p_hwfn) attn_master_to_str(GET_FIELD(tmp, QED_GRC_ATTENTION_MASTER)), GET_FIELD(tmp2, QED_GRC_ATTENTION_PF), (GET_FIELD(tmp2, QED_GRC_ATTENTION_PRIV) == - QED_GRC_ATTENTION_PRIV_VF) ? "VF" : "(Ireelevant)", + QED_GRC_ATTENTION_PRIV_VF) ? "VF" : "(Irrelevant)", GET_FIELD(tmp2, QED_GRC_ATTENTION_VF)); out: diff --git a/drivers/net/ethernet/qlogic/qla3xxx.c b/drivers/net/ethernet/qlogic/qla3xxx.c index b48f76182049..10b075bc5959 100644 --- a/drivers/net/ethernet/qlogic/qla3xxx.c +++ b/drivers/net/ethernet/qlogic/qla3xxx.c @@ -380,8 +380,6 @@ static void fm93c56a_select(struct ql3_adapter *qdev) qdev->eeprom_cmd_data = AUBURN_EEPROM_CS_1; ql_write_nvram_reg(qdev, spir, ISP_NVRAM_MASK | qdev->eeprom_cmd_data); - ql_write_nvram_reg(qdev, spir, - ((ISP_NVRAM_MASK << 16) | qdev->eeprom_cmd_data)); } /* diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index 9a5e2969df61..2c350099b83c 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -4282,8 +4282,8 @@ static void rtl_init_rxcfg(struct rtl8169_private *tp) RTL_W32(tp, RxConfig, RX_FIFO_THRESH | RX_DMA_BURST); break; case RTL_GIGA_MAC_VER_18 ... RTL_GIGA_MAC_VER_24: - case RTL_GIGA_MAC_VER_34: - case RTL_GIGA_MAC_VER_35: + case RTL_GIGA_MAC_VER_34 ... RTL_GIGA_MAC_VER_36: + case RTL_GIGA_MAC_VER_38: RTL_W32(tp, RxConfig, RX128_INT_EN | RX_MULTI_EN | RX_DMA_BURST); break; case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_51: @@ -6549,17 +6549,15 @@ static int rtl8169_poll(struct napi_struct *napi, int budget) struct rtl8169_private *tp = container_of(napi, struct rtl8169_private, napi); struct net_device *dev = tp->dev; u16 enable_mask = RTL_EVENT_NAPI | tp->event_slow; - int work_done= 0; + int work_done; u16 status; status = rtl_get_events(tp); rtl_ack_events(tp, status & ~tp->event_slow); - if (status & RTL_EVENT_NAPI_RX) - work_done = rtl_rx(dev, tp, (u32) budget); + work_done = rtl_rx(dev, tp, (u32) budget); - if (status & RTL_EVENT_NAPI_TX) - rtl_tx(dev, tp); + rtl_tx(dev, tp); if (status & tp->event_slow) { enable_mask &= ~tp->event_slow; @@ -7093,20 +7091,12 @@ static int rtl_alloc_irq(struct rtl8169_private *tp) { unsigned int flags; - switch (tp->mac_version) { - case RTL_GIGA_MAC_VER_01 ... RTL_GIGA_MAC_VER_06: + if (tp->mac_version <= RTL_GIGA_MAC_VER_06) { RTL_W8(tp, Cfg9346, Cfg9346_Unlock); RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~MSIEnable); RTL_W8(tp, Cfg9346, Cfg9346_Lock); flags = PCI_IRQ_LEGACY; - break; - case RTL_GIGA_MAC_VER_39 ... RTL_GIGA_MAC_VER_40: - /* This version was reported to have issues with resume - * from suspend when using MSI-X - */ - flags = PCI_IRQ_LEGACY | PCI_IRQ_MSI; - break; - default: + } else { flags = PCI_IRQ_ALL_TYPES; } diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c index 7aa5ebb6766c..4289ccb26e4e 100644 --- a/drivers/net/ethernet/socionext/netsec.c +++ b/drivers/net/ethernet/socionext/netsec.c @@ -735,8 +735,11 @@ static int netsec_process_rx(struct netsec_priv *priv, int budget) u16 idx = dring->tail; struct netsec_de *de = dring->vaddr + (DESC_SZ * idx); - if (de->attr & (1U << NETSEC_RX_PKT_OWN_FIELD)) + if (de->attr & (1U << NETSEC_RX_PKT_OWN_FIELD)) { + /* reading the register clears the irq */ + netsec_read(priv, NETSEC_REG_NRM_RX_PKTCNT); break; + } /* This barrier is needed to keep us from reading * any other fields out of the netsec_de until we have diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 6acb6b5718b9..493cd382b8aa 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -830,12 +830,8 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, if (IS_ERR(rt)) return PTR_ERR(rt); - if (skb_dst(skb)) { - int mtu = dst_mtu(&rt->dst) - GENEVE_IPV4_HLEN - - info->options_len; - - skb_dst_update_pmtu(skb, mtu); - } + skb_tunnel_check_pmtu(skb, &rt->dst, + GENEVE_IPV4_HLEN + info->options_len); sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); if (geneve->collect_md) { @@ -876,11 +872,7 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, if (IS_ERR(dst)) return PTR_ERR(dst); - if (skb_dst(skb)) { - int mtu = dst_mtu(dst) - GENEVE_IPV6_HLEN - info->options_len; - - skb_dst_update_pmtu(skb, mtu); - } + skb_tunnel_check_pmtu(skb, dst, GENEVE_IPV6_HLEN + info->options_len); sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); if (geneve->collect_md) { diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index 6e13b8832bc7..fd8bb998ae52 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -163,8 +163,6 @@ static const enum gpiod_flags gpio_flags[] = { /* Give this long for the PHY to reset. */ #define T_PHY_RESET_MS 50 -static DEFINE_MUTEX(sfp_mutex); - struct sff_data { unsigned int gpios; bool (*module_supported)(const struct sfp_eeprom_id *id); diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 533b6fb8d923..72a55b6b4211 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1241,6 +1241,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x0b3c, 0xc00b, 4)}, /* Olivetti Olicard 500 */ {QMI_FIXED_INTF(0x1e2d, 0x0060, 4)}, /* Cinterion PLxx */ {QMI_FIXED_INTF(0x1e2d, 0x0053, 4)}, /* Cinterion PHxx,PXxx */ + {QMI_FIXED_INTF(0x1e2d, 0x0063, 10)}, /* Cinterion ALASxx (1 RmNet) */ {QMI_FIXED_INTF(0x1e2d, 0x0082, 4)}, /* Cinterion PHxx,PXxx (2 RmNet) */ {QMI_FIXED_INTF(0x1e2d, 0x0082, 5)}, /* Cinterion PHxx,PXxx (2 RmNet) */ {QMI_FIXED_INTF(0x1e2d, 0x0083, 4)}, /* Cinterion PHxx,PXxx (1 RmNet + USB Audio)*/ diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index dab504ec5e50..ddfa3f24204c 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -2218,8 +2218,9 @@ static void virtnet_freeze_down(struct virtio_device *vdev) /* Make sure no work handler is accessing the device */ flush_work(&vi->config_work); + netif_tx_lock_bh(vi->dev); netif_device_detach(vi->dev); - netif_tx_disable(vi->dev); + netif_tx_unlock_bh(vi->dev); cancel_delayed_work_sync(&vi->refill); if (netif_running(vi->dev)) { @@ -2255,7 +2256,9 @@ static int virtnet_restore_up(struct virtio_device *vdev) } } + netif_tx_lock_bh(vi->dev); netif_device_attach(vi->dev); + netif_tx_unlock_bh(vi->dev); return err; } diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 2b8da2b7e721..27bd586b94b0 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -2194,11 +2194,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, } ndst = &rt->dst; - if (skb_dst(skb)) { - int mtu = dst_mtu(ndst) - VXLAN_HEADROOM; - - skb_dst_update_pmtu(skb, mtu); - } + skb_tunnel_check_pmtu(skb, ndst, VXLAN_HEADROOM); tos = ip_tunnel_ecn_encap(tos, old_iph, skb); ttl = ttl ? : ip4_dst_hoplimit(&rt->dst); @@ -2235,11 +2231,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, goto out_unlock; } - if (skb_dst(skb)) { - int mtu = dst_mtu(ndst) - VXLAN6_HEADROOM; - - skb_dst_update_pmtu(skb, mtu); - } + skb_tunnel_check_pmtu(skb, ndst, VXLAN6_HEADROOM); tos = ip_tunnel_ecn_encap(tos, old_iph, skb); ttl = ttl ? : ip6_dst_hoplimit(ndst); diff --git a/drivers/net/wireless/marvell/libertas/if_sdio.c b/drivers/net/wireless/marvell/libertas/if_sdio.c index 43743c26c071..39bf85d0ade0 100644 --- a/drivers/net/wireless/marvell/libertas/if_sdio.c +++ b/drivers/net/wireless/marvell/libertas/if_sdio.c @@ -1317,6 +1317,10 @@ static int if_sdio_suspend(struct device *dev) if (priv->wol_criteria == EHS_REMOVE_WAKEUP) { dev_info(dev, "Suspend without wake params -- powering down card\n"); if (priv->fw_ready) { + ret = lbs_suspend(priv); + if (ret) + return ret; + priv->power_up_on_resume = true; if_sdio_power_off(card); } diff --git a/drivers/net/wireless/mediatek/mt76/usb.c b/drivers/net/wireless/mediatek/mt76/usb.c index 7780b07543bb..79e59f2379a2 100644 --- a/drivers/net/wireless/mediatek/mt76/usb.c +++ b/drivers/net/wireless/mediatek/mt76/usb.c @@ -258,7 +258,7 @@ int mt76u_buf_alloc(struct mt76_dev *dev, struct mt76u_buf *buf, if (!buf->urb) return -ENOMEM; - buf->urb->sg = devm_kzalloc(dev->dev, nsgs * sizeof(*buf->urb->sg), + buf->urb->sg = devm_kcalloc(dev->dev, nsgs, sizeof(*buf->urb->sg), gfp); if (!buf->urb->sg) return -ENOMEM; @@ -464,8 +464,8 @@ static int mt76u_alloc_rx(struct mt76_dev *dev) int i, err, nsgs; spin_lock_init(&q->lock); - q->entry = devm_kzalloc(dev->dev, - MT_NUM_RX_ENTRIES * sizeof(*q->entry), + q->entry = devm_kcalloc(dev->dev, + MT_NUM_RX_ENTRIES, sizeof(*q->entry), GFP_KERNEL); if (!q->entry) return -ENOMEM; @@ -717,8 +717,8 @@ static int mt76u_alloc_tx(struct mt76_dev *dev) INIT_LIST_HEAD(&q->swq); q->hw_idx = q2hwq(i); - q->entry = devm_kzalloc(dev->dev, - MT_NUM_TX_ENTRIES * sizeof(*q->entry), + q->entry = devm_kcalloc(dev->dev, + MT_NUM_TX_ENTRIES, sizeof(*q->entry), GFP_KERNEL); if (!q->entry) return -ENOMEM; diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c index 62e9cb167aad..db45c6bbb7bb 100644 --- a/drivers/nvdimm/blk.c +++ b/drivers/nvdimm/blk.c @@ -290,7 +290,7 @@ static int nsblk_attach_disk(struct nd_namespace_blk *nsblk) } set_capacity(disk, available_disk_size >> SECTOR_SHIFT); - device_add_disk(dev, disk); + device_add_disk(dev, disk, NULL); revalidate_disk(disk); return 0; } diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index 0360c015f658..b123b0dcf274 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -1556,7 +1556,7 @@ static int btt_blk_init(struct btt *btt) } } set_capacity(btt->btt_disk, btt->nlba * btt->sector_size >> 9); - device_add_disk(&btt->nd_btt->dev, btt->btt_disk); + device_add_disk(&btt->nd_btt->dev, btt->btt_disk, NULL); btt->nd_btt->size = btt->nlba * (u64)btt->sector_size; revalidate_disk(btt->btt_disk); diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 6071e2942053..a75d10c23d80 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -474,7 +474,7 @@ static int pmem_attach_disk(struct device *dev, gendev = disk_to_dev(disk); gendev->groups = pmem_attribute_groups; - device_add_disk(dev, disk); + device_add_disk(dev, disk, NULL); if (devm_add_action_or_reset(dev, pmem_release_disk, pmem)) return -ENOMEM; diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index dd8ec1dd9219..9e4a30b05bd2 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -971,7 +971,7 @@ static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid, uuid_copy(&ids->uuid, data + pos + sizeof(*cur)); break; default: - /* Skip unnkown types */ + /* Skip unknown types */ len = cur->nidl; break; } @@ -1132,7 +1132,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) return nvme_submit_user_cmd(ns->queue, &c, (void __user *)(uintptr_t)io.addr, length, - metadata, meta_len, io.slba, NULL, 0); + metadata, meta_len, lower_32_bits(io.slba), NULL, 0); } static u32 nvme_known_admin_effects(u8 opcode) @@ -2076,7 +2076,7 @@ static void nvme_init_subnqn(struct nvme_subsystem *subsys, struct nvme_ctrl *ct nqnlen = strnlen(id->subnqn, NVMF_NQN_SIZE); if (nqnlen > 0 && nqnlen < NVMF_NQN_SIZE) { - strncpy(subsys->subnqn, id->subnqn, NVMF_NQN_SIZE); + strlcpy(subsys->subnqn, id->subnqn, NVMF_NQN_SIZE); return; } @@ -2729,11 +2729,19 @@ static umode_t nvme_ns_id_attrs_are_visible(struct kobject *kobj, return a->mode; } -const struct attribute_group nvme_ns_id_attr_group = { +static const struct attribute_group nvme_ns_id_attr_group = { .attrs = nvme_ns_id_attrs, .is_visible = nvme_ns_id_attrs_are_visible, }; +const struct attribute_group *nvme_ns_id_attr_groups[] = { + &nvme_ns_id_attr_group, +#ifdef CONFIG_NVM + &nvme_nvm_attr_group, +#endif + NULL, +}; + #define nvme_show_str_function(field) \ static ssize_t field##_show(struct device *dev, \ struct device_attribute *attr, char *buf) \ @@ -2900,9 +2908,14 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl, unsigned nsid, struct nvme_id_ns *id) { struct nvme_ns_head *head; + size_t size = sizeof(*head); int ret = -ENOMEM; - head = kzalloc(sizeof(*head), GFP_KERNEL); +#ifdef CONFIG_NVME_MULTIPATH + size += num_possible_nodes() * sizeof(struct nvme_ns *); +#endif + + head = kzalloc(size, GFP_KERNEL); if (!head) goto out; ret = ida_simple_get(&ctrl->subsys->ns_ida, 1, 0, GFP_KERNEL); @@ -3099,14 +3112,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) nvme_get_ctrl(ctrl); - device_add_disk(ctrl->device, ns->disk); - if (sysfs_create_group(&disk_to_dev(ns->disk)->kobj, - &nvme_ns_id_attr_group)) - pr_warn("%s: failed to create sysfs group for identification\n", - ns->disk->disk_name); - if (ns->ndev && nvme_nvm_register_sysfs(ns)) - pr_warn("%s: failed to register lightnvm sysfs group for identification\n", - ns->disk->disk_name); + device_add_disk(ctrl->device, ns->disk, nvme_ns_id_attr_groups); nvme_mpath_add_disk(ns, id); nvme_fault_inject_init(ns); @@ -3132,10 +3138,6 @@ static void nvme_ns_remove(struct nvme_ns *ns) nvme_fault_inject_fini(ns); if (ns->disk && ns->disk->flags & GENHD_FL_UP) { - sysfs_remove_group(&disk_to_dev(ns->disk)->kobj, - &nvme_ns_id_attr_group); - if (ns->ndev) - nvme_nvm_unregister_sysfs(ns); del_gendisk(ns->disk); blk_cleanup_queue(ns->queue); if (blk_get_integrity(ns->disk)) @@ -3143,8 +3145,8 @@ static void nvme_ns_remove(struct nvme_ns *ns) } mutex_lock(&ns->ctrl->subsys->lock); - nvme_mpath_clear_current_path(ns); list_del_rcu(&ns->siblings); + nvme_mpath_clear_current_path(ns); mutex_unlock(&ns->ctrl->subsys->lock); down_write(&ns->ctrl->namespaces_rwsem); @@ -3411,16 +3413,21 @@ static void nvme_fw_act_work(struct work_struct *work) static void nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result) { - switch ((result & 0xff00) >> 8) { + u32 aer_notice_type = (result & 0xff00) >> 8; + + switch (aer_notice_type) { case NVME_AER_NOTICE_NS_CHANGED: + trace_nvme_async_event(ctrl, aer_notice_type); set_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events); nvme_queue_scan(ctrl); break; case NVME_AER_NOTICE_FW_ACT_STARTING: + trace_nvme_async_event(ctrl, aer_notice_type); queue_work(nvme_wq, &ctrl->fw_act_work); break; #ifdef CONFIG_NVME_MULTIPATH case NVME_AER_NOTICE_ANA: + trace_nvme_async_event(ctrl, aer_notice_type); if (!ctrl->ana_log_buf) break; queue_work(nvme_wq, &ctrl->ana_work); @@ -3435,11 +3442,12 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status, volatile union nvme_result *res) { u32 result = le32_to_cpu(res->u32); + u32 aer_type = result & 0x07; if (le16_to_cpu(status) >> 1 != NVME_SC_SUCCESS) return; - switch (result & 0x7) { + switch (aer_type) { case NVME_AER_NOTICE: nvme_handle_aen_notice(ctrl, result); break; @@ -3447,6 +3455,7 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status, case NVME_AER_SMART: case NVME_AER_CSS: case NVME_AER_VS: + trace_nvme_async_event(ctrl, aer_type); ctrl->aen_result = result; break; default: diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 206d63cb1afc..bd0969db6225 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -552,8 +552,11 @@ blk_status_t nvmf_fail_nonready_command(struct nvme_ctrl *ctrl, ctrl->state != NVME_CTRL_DEAD && !blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH)) return BLK_STS_RESOURCE; - nvme_req(rq)->status = NVME_SC_ABORT_REQ; - return BLK_STS_IOERR; + + nvme_req(rq)->status = NVME_SC_HOST_PATH_ERROR; + blk_mq_start_request(rq); + nvme_complete_rq(rq); + return BLK_STS_OK; } EXPORT_SYMBOL_GPL(nvmf_fail_nonready_command); @@ -865,6 +868,36 @@ static int nvmf_check_required_opts(struct nvmf_ctrl_options *opts, return 0; } +bool nvmf_ip_options_match(struct nvme_ctrl *ctrl, + struct nvmf_ctrl_options *opts) +{ + if (!nvmf_ctlr_matches_baseopts(ctrl, opts) || + strcmp(opts->traddr, ctrl->opts->traddr) || + strcmp(opts->trsvcid, ctrl->opts->trsvcid)) + return false; + + /* + * Checking the local address is rough. In most cases, none is specified + * and the host port is selected by the stack. + * + * Assume no match if: + * - local address is specified and address is not the same + * - local address is not specified but remote is, or vice versa + * (admin using specific host_traddr when it matters). + */ + if ((opts->mask & NVMF_OPT_HOST_TRADDR) && + (ctrl->opts->mask & NVMF_OPT_HOST_TRADDR)) { + if (strcmp(opts->host_traddr, ctrl->opts->host_traddr)) + return false; + } else if ((opts->mask & NVMF_OPT_HOST_TRADDR) || + (ctrl->opts->mask & NVMF_OPT_HOST_TRADDR)) { + return false; + } + + return true; +} +EXPORT_SYMBOL_GPL(nvmf_ip_options_match); + static int nvmf_check_allowed_opts(struct nvmf_ctrl_options *opts, unsigned int allowed_opts) { diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h index aa2fdb2a2e8f..6ea6275f332a 100644 --- a/drivers/nvme/host/fabrics.h +++ b/drivers/nvme/host/fabrics.h @@ -166,6 +166,8 @@ blk_status_t nvmf_fail_nonready_command(struct nvme_ctrl *ctrl, struct request *rq); bool __nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq, bool queue_live); +bool nvmf_ip_options_match(struct nvme_ctrl *ctrl, + struct nvmf_ctrl_options *opts); static inline bool nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq, bool queue_live) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 611e70cae754..e52b9d3c0bd6 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -20,6 +20,7 @@ #include <uapi/scsi/fc/fc_fs.h> #include <uapi/scsi/fc/fc_els.h> #include <linux/delay.h> +#include <linux/overflow.h> #include "nvme.h" #include "fabrics.h" @@ -104,6 +105,12 @@ struct nvme_fc_fcp_op { struct nvme_fc_ersp_iu rsp_iu; }; +struct nvme_fcp_op_w_sgl { + struct nvme_fc_fcp_op op; + struct scatterlist sgl[SG_CHUNK_SIZE]; + uint8_t priv[0]; +}; + struct nvme_fc_lport { struct nvme_fc_local_port localport; @@ -122,6 +129,7 @@ struct nvme_fc_rport { struct list_head endp_list; /* for lport->endp_list */ struct list_head ctrl_list; struct list_head ls_req_list; + struct list_head disc_list; struct device *dev; /* physical device for dma */ struct nvme_fc_lport *lport; spinlock_t lock; @@ -210,7 +218,6 @@ static DEFINE_IDA(nvme_fc_ctrl_cnt); * These items are short-term. They will eventually be moved into * a generic FC class. See comments in module init. */ -static struct class *fc_class; static struct device *fc_udev_device; @@ -317,7 +324,7 @@ out_done: * @template: LLDD entrypoints and operational parameters for the port * @dev: physical hardware device node port corresponds to. Will be * used for DMA mappings - * @lport_p: pointer to a local port pointer. Upon success, the routine + * @portptr: pointer to a local port pointer. Upon success, the routine * will allocate a nvme_fc_local_port structure and place its * address in the local port pointer. Upon failure, local port * pointer will be set to 0. @@ -425,8 +432,7 @@ EXPORT_SYMBOL_GPL(nvme_fc_register_localport); * nvme_fc_unregister_localport - transport entry point called by an * LLDD to deregister/remove a previously * registered a NVME host FC port. - * @localport: pointer to the (registered) local port that is to be - * deregistered. + * @portptr: pointer to the (registered) local port that is to be deregistered. * * Returns: * a completion status. Must be 0 upon success; a negative errno @@ -507,6 +513,7 @@ nvme_fc_free_rport(struct kref *ref) list_del(&rport->endp_list); spin_unlock_irqrestore(&nvme_fc_lock, flags); + WARN_ON(!list_empty(&rport->disc_list)); ida_simple_remove(&lport->endp_cnt, rport->remoteport.port_num); kfree(rport); @@ -631,7 +638,7 @@ __nvme_fc_set_dev_loss_tmo(struct nvme_fc_rport *rport, * @localport: pointer to the (registered) local port that the remote * subsystem port is connected to. * @pinfo: pointer to information about the port to be registered - * @rport_p: pointer to a remote port pointer. Upon success, the routine + * @portptr: pointer to a remote port pointer. Upon success, the routine * will allocate a nvme_fc_remote_port structure and place its * address in the remote port pointer. Upon failure, remote port * pointer will be set to 0. @@ -694,6 +701,7 @@ nvme_fc_register_remoteport(struct nvme_fc_local_port *localport, INIT_LIST_HEAD(&newrec->endp_list); INIT_LIST_HEAD(&newrec->ctrl_list); INIT_LIST_HEAD(&newrec->ls_req_list); + INIT_LIST_HEAD(&newrec->disc_list); kref_init(&newrec->ref); atomic_set(&newrec->act_ctrl_cnt, 0); spin_lock_init(&newrec->lock); @@ -807,8 +815,8 @@ nvme_fc_ctrl_connectivity_loss(struct nvme_fc_ctrl *ctrl) * nvme_fc_unregister_remoteport - transport entry point called by an * LLDD to deregister/remove a previously * registered a NVME subsystem FC port. - * @remoteport: pointer to the (registered) remote port that is to be - * deregistered. + * @portptr: pointer to the (registered) remote port that is to be + * deregistered. * * Returns: * a completion status. Must be 0 upon success; a negative errno @@ -1385,7 +1393,7 @@ nvme_fc_disconnect_assoc_done(struct nvmefc_ls_req *lsreq, int status) __nvme_fc_finish_ls_req(lsop); - /* fc-nvme iniator doesn't care about success or failure of cmd */ + /* fc-nvme initiator doesn't care about success or failure of cmd */ kfree(lsop); } @@ -1685,6 +1693,8 @@ __nvme_fc_init_request(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, struct nvme_fc_fcp_op *op, struct request *rq, u32 rqno) { + struct nvme_fcp_op_w_sgl *op_w_sgl = + container_of(op, typeof(*op_w_sgl), op); struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu; int ret = 0; @@ -1694,7 +1704,6 @@ __nvme_fc_init_request(struct nvme_fc_ctrl *ctrl, op->fcp_req.rspaddr = &op->rsp_iu; op->fcp_req.rsplen = sizeof(op->rsp_iu); op->fcp_req.done = nvme_fc_fcpio_done; - op->fcp_req.first_sgl = (struct scatterlist *)&op[1]; op->fcp_req.private = &op->fcp_req.first_sgl[SG_CHUNK_SIZE]; op->ctrl = ctrl; op->queue = queue; @@ -1733,12 +1742,17 @@ nvme_fc_init_request(struct blk_mq_tag_set *set, struct request *rq, unsigned int hctx_idx, unsigned int numa_node) { struct nvme_fc_ctrl *ctrl = set->driver_data; - struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); + struct nvme_fcp_op_w_sgl *op = blk_mq_rq_to_pdu(rq); int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0; struct nvme_fc_queue *queue = &ctrl->queues[queue_idx]; + int res; nvme_req(rq)->ctrl = &ctrl->ctrl; - return __nvme_fc_init_request(ctrl, queue, op, rq, queue->rqcnt++); + res = __nvme_fc_init_request(ctrl, queue, &op->op, rq, queue->rqcnt++); + if (res) + return res; + op->op.fcp_req.first_sgl = &op->sgl[0]; + return res; } static int @@ -1768,7 +1782,6 @@ nvme_fc_init_aen_ops(struct nvme_fc_ctrl *ctrl) } aen_op->flags = FCOP_FLAGS_AEN; - aen_op->fcp_req.first_sgl = NULL; /* no sg list */ aen_op->fcp_req.private = private; memset(sqe, 0, sizeof(*sqe)); @@ -2422,10 +2435,9 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl) ctrl->tag_set.reserved_tags = 1; /* fabric connect */ ctrl->tag_set.numa_node = NUMA_NO_NODE; ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; - ctrl->tag_set.cmd_size = sizeof(struct nvme_fc_fcp_op) + - (SG_CHUNK_SIZE * - sizeof(struct scatterlist)) + - ctrl->lport->ops->fcprqst_priv_sz; + ctrl->tag_set.cmd_size = + struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv, + ctrl->lport->ops->fcprqst_priv_sz); ctrl->tag_set.driver_data = ctrl; ctrl->tag_set.nr_hw_queues = ctrl->ctrl.queue_count - 1; ctrl->tag_set.timeout = NVME_IO_TIMEOUT; @@ -3027,10 +3039,9 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, ctrl->admin_tag_set.queue_depth = NVME_AQ_MQ_TAG_DEPTH; ctrl->admin_tag_set.reserved_tags = 2; /* fabric connect + Keep-Alive */ ctrl->admin_tag_set.numa_node = NUMA_NO_NODE; - ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_fc_fcp_op) + - (SG_CHUNK_SIZE * - sizeof(struct scatterlist)) + - ctrl->lport->ops->fcprqst_priv_sz; + ctrl->admin_tag_set.cmd_size = + struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv, + ctrl->lport->ops->fcprqst_priv_sz); ctrl->admin_tag_set.driver_data = ctrl; ctrl->admin_tag_set.nr_hw_queues = 1; ctrl->admin_tag_set.timeout = ADMIN_TIMEOUT; @@ -3159,7 +3170,7 @@ nvme_fc_parse_traddr(struct nvmet_fc_traddr *traddr, char *buf, size_t blen) substring_t wwn = { name, &name[sizeof(name)-1] }; int nnoffset, pnoffset; - /* validate it string one of the 2 allowed formats */ + /* validate if string is one of the 2 allowed formats */ if (strnlen(buf, blen) == NVME_FC_TRADDR_MAXLENGTH && !strncmp(buf, "nn-0x", NVME_FC_TRADDR_OXNNLEN) && !strncmp(&buf[NVME_FC_TRADDR_MAX_PN_OFFSET], @@ -3254,6 +3265,90 @@ static struct nvmf_transport_ops nvme_fc_transport = { .create_ctrl = nvme_fc_create_ctrl, }; +/* Arbitrary successive failures max. With lots of subsystems could be high */ +#define DISCOVERY_MAX_FAIL 20 + +static ssize_t nvme_fc_nvme_discovery_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + unsigned long flags; + LIST_HEAD(local_disc_list); + struct nvme_fc_lport *lport; + struct nvme_fc_rport *rport; + int failcnt = 0; + + spin_lock_irqsave(&nvme_fc_lock, flags); +restart: + list_for_each_entry(lport, &nvme_fc_lport_list, port_list) { + list_for_each_entry(rport, &lport->endp_list, endp_list) { + if (!nvme_fc_lport_get(lport)) + continue; + if (!nvme_fc_rport_get(rport)) { + /* + * This is a temporary condition. Upon restart + * this rport will be gone from the list. + * + * Revert the lport put and retry. Anything + * added to the list already will be skipped (as + * they are no longer list_empty). Loops should + * resume at rports that were not yet seen. + */ + nvme_fc_lport_put(lport); + + if (failcnt++ < DISCOVERY_MAX_FAIL) + goto restart; + + pr_err("nvme_discovery: too many reference " + "failures\n"); + goto process_local_list; + } + if (list_empty(&rport->disc_list)) + list_add_tail(&rport->disc_list, + &local_disc_list); + } + } + +process_local_list: + while (!list_empty(&local_disc_list)) { + rport = list_first_entry(&local_disc_list, + struct nvme_fc_rport, disc_list); + list_del_init(&rport->disc_list); + spin_unlock_irqrestore(&nvme_fc_lock, flags); + + lport = rport->lport; + /* signal discovery. Won't hurt if it repeats */ + nvme_fc_signal_discovery_scan(lport, rport); + nvme_fc_rport_put(rport); + nvme_fc_lport_put(lport); + + spin_lock_irqsave(&nvme_fc_lock, flags); + } + spin_unlock_irqrestore(&nvme_fc_lock, flags); + + return count; +} +static DEVICE_ATTR(nvme_discovery, 0200, NULL, nvme_fc_nvme_discovery_store); + +static struct attribute *nvme_fc_attrs[] = { + &dev_attr_nvme_discovery.attr, + NULL +}; + +static struct attribute_group nvme_fc_attr_group = { + .attrs = nvme_fc_attrs, +}; + +static const struct attribute_group *nvme_fc_attr_groups[] = { + &nvme_fc_attr_group, + NULL +}; + +static struct class fc_class = { + .name = "fc", + .dev_groups = nvme_fc_attr_groups, + .owner = THIS_MODULE, +}; + static int __init nvme_fc_init_module(void) { int ret; @@ -3272,16 +3367,16 @@ static int __init nvme_fc_init_module(void) * put in place, this code will move to a more generic * location for the class. */ - fc_class = class_create(THIS_MODULE, "fc"); - if (IS_ERR(fc_class)) { + ret = class_register(&fc_class); + if (ret) { pr_err("couldn't register class fc\n"); - return PTR_ERR(fc_class); + return ret; } /* * Create a device for the FC-centric udev events */ - fc_udev_device = device_create(fc_class, NULL, MKDEV(0, 0), NULL, + fc_udev_device = device_create(&fc_class, NULL, MKDEV(0, 0), NULL, "fc_udev_device"); if (IS_ERR(fc_udev_device)) { pr_err("couldn't create fc_udev device!\n"); @@ -3296,9 +3391,9 @@ static int __init nvme_fc_init_module(void) return 0; out_destroy_device: - device_destroy(fc_class, MKDEV(0, 0)); + device_destroy(&fc_class, MKDEV(0, 0)); out_destroy_class: - class_destroy(fc_class); + class_unregister(&fc_class); return ret; } @@ -3313,8 +3408,8 @@ static void __exit nvme_fc_exit_module(void) ida_destroy(&nvme_fc_local_port_cnt); ida_destroy(&nvme_fc_ctrl_cnt); - device_destroy(fc_class, MKDEV(0, 0)); - class_destroy(fc_class); + device_destroy(&fc_class, MKDEV(0, 0)); + class_unregister(&fc_class); } module_init(nvme_fc_init_module); diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index 6fe5923c95d4..a4f3b263cd6c 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c @@ -567,13 +567,13 @@ static int nvme_nvm_set_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr *ppas, * Expect the lba in device format */ static int nvme_nvm_get_chk_meta(struct nvm_dev *ndev, - struct nvm_chk_meta *meta, - sector_t slba, int nchks) + sector_t slba, int nchks, + struct nvm_chk_meta *meta) { struct nvm_geo *geo = &ndev->geo; struct nvme_ns *ns = ndev->q->queuedata; struct nvme_ctrl *ctrl = ns->ctrl; - struct nvme_nvm_chk_meta *dev_meta = (struct nvme_nvm_chk_meta *)meta; + struct nvme_nvm_chk_meta *dev_meta, *dev_meta_off; struct ppa_addr ppa; size_t left = nchks * sizeof(struct nvme_nvm_chk_meta); size_t log_pos, offset, len; @@ -585,6 +585,10 @@ static int nvme_nvm_get_chk_meta(struct nvm_dev *ndev, */ max_len = min_t(unsigned int, ctrl->max_hw_sectors << 9, 256 * 1024); + dev_meta = kmalloc(max_len, GFP_KERNEL); + if (!dev_meta) + return -ENOMEM; + /* Normalize lba address space to obtain log offset */ ppa.ppa = slba; ppa = dev_to_generic_addr(ndev, ppa); @@ -598,6 +602,9 @@ static int nvme_nvm_get_chk_meta(struct nvm_dev *ndev, while (left) { len = min_t(unsigned int, left, max_len); + memset(dev_meta, 0, max_len); + dev_meta_off = dev_meta; + ret = nvme_get_log(ctrl, ns->head->ns_id, NVME_NVM_LOG_REPORT_CHUNK, 0, dev_meta, len, offset); @@ -607,21 +614,23 @@ static int nvme_nvm_get_chk_meta(struct nvm_dev *ndev, } for (i = 0; i < len; i += sizeof(struct nvme_nvm_chk_meta)) { - meta->state = dev_meta->state; - meta->type = dev_meta->type; - meta->wi = dev_meta->wi; - meta->slba = le64_to_cpu(dev_meta->slba); - meta->cnlb = le64_to_cpu(dev_meta->cnlb); - meta->wp = le64_to_cpu(dev_meta->wp); + meta->state = dev_meta_off->state; + meta->type = dev_meta_off->type; + meta->wi = dev_meta_off->wi; + meta->slba = le64_to_cpu(dev_meta_off->slba); + meta->cnlb = le64_to_cpu(dev_meta_off->cnlb); + meta->wp = le64_to_cpu(dev_meta_off->wp); meta++; - dev_meta++; + dev_meta_off++; } offset += len; left -= len; } + kfree(dev_meta); + return ret; } @@ -968,6 +977,9 @@ void nvme_nvm_update_nvm_info(struct nvme_ns *ns) struct nvm_dev *ndev = ns->ndev; struct nvm_geo *geo = &ndev->geo; + if (geo->version == NVM_OCSSD_SPEC_12) + return; + geo->csecs = 1 << ns->lba_shift; geo->sos = ns->ms; } @@ -1190,10 +1202,29 @@ static NVM_DEV_ATTR_12_RO(multiplane_modes); static NVM_DEV_ATTR_12_RO(media_capabilities); static NVM_DEV_ATTR_12_RO(max_phys_secs); -static struct attribute *nvm_dev_attrs_12[] = { +/* 2.0 values */ +static NVM_DEV_ATTR_20_RO(groups); +static NVM_DEV_ATTR_20_RO(punits); +static NVM_DEV_ATTR_20_RO(chunks); +static NVM_DEV_ATTR_20_RO(clba); +static NVM_DEV_ATTR_20_RO(ws_min); +static NVM_DEV_ATTR_20_RO(ws_opt); +static NVM_DEV_ATTR_20_RO(maxoc); +static NVM_DEV_ATTR_20_RO(maxocpu); +static NVM_DEV_ATTR_20_RO(mw_cunits); +static NVM_DEV_ATTR_20_RO(write_typ); +static NVM_DEV_ATTR_20_RO(write_max); +static NVM_DEV_ATTR_20_RO(reset_typ); +static NVM_DEV_ATTR_20_RO(reset_max); + +static struct attribute *nvm_dev_attrs[] = { + /* version agnostic attrs */ &dev_attr_version.attr, &dev_attr_capabilities.attr, + &dev_attr_read_typ.attr, + &dev_attr_read_max.attr, + /* 1.2 attrs */ &dev_attr_vendor_opcode.attr, &dev_attr_device_mode.attr, &dev_attr_media_manager.attr, @@ -1208,8 +1239,6 @@ static struct attribute *nvm_dev_attrs_12[] = { &dev_attr_page_size.attr, &dev_attr_hw_sector_size.attr, &dev_attr_oob_sector_size.attr, - &dev_attr_read_typ.attr, - &dev_attr_read_max.attr, &dev_attr_prog_typ.attr, &dev_attr_prog_max.attr, &dev_attr_erase_typ.attr, @@ -1218,33 +1247,7 @@ static struct attribute *nvm_dev_attrs_12[] = { &dev_attr_media_capabilities.attr, &dev_attr_max_phys_secs.attr, - NULL, -}; - -static const struct attribute_group nvm_dev_attr_group_12 = { - .name = "lightnvm", - .attrs = nvm_dev_attrs_12, -}; - -/* 2.0 values */ -static NVM_DEV_ATTR_20_RO(groups); -static NVM_DEV_ATTR_20_RO(punits); -static NVM_DEV_ATTR_20_RO(chunks); -static NVM_DEV_ATTR_20_RO(clba); -static NVM_DEV_ATTR_20_RO(ws_min); -static NVM_DEV_ATTR_20_RO(ws_opt); -static NVM_DEV_ATTR_20_RO(maxoc); -static NVM_DEV_ATTR_20_RO(maxocpu); -static NVM_DEV_ATTR_20_RO(mw_cunits); -static NVM_DEV_ATTR_20_RO(write_typ); -static NVM_DEV_ATTR_20_RO(write_max); -static NVM_DEV_ATTR_20_RO(reset_typ); -static NVM_DEV_ATTR_20_RO(reset_max); - -static struct attribute *nvm_dev_attrs_20[] = { - &dev_attr_version.attr, - &dev_attr_capabilities.attr, - + /* 2.0 attrs */ &dev_attr_groups.attr, &dev_attr_punits.attr, &dev_attr_chunks.attr, @@ -1255,8 +1258,6 @@ static struct attribute *nvm_dev_attrs_20[] = { &dev_attr_maxocpu.attr, &dev_attr_mw_cunits.attr, - &dev_attr_read_typ.attr, - &dev_attr_read_max.attr, &dev_attr_write_typ.attr, &dev_attr_write_max.attr, &dev_attr_reset_typ.attr, @@ -1265,44 +1266,38 @@ static struct attribute *nvm_dev_attrs_20[] = { NULL, }; -static const struct attribute_group nvm_dev_attr_group_20 = { - .name = "lightnvm", - .attrs = nvm_dev_attrs_20, -}; - -int nvme_nvm_register_sysfs(struct nvme_ns *ns) +static umode_t nvm_dev_attrs_visible(struct kobject *kobj, + struct attribute *attr, int index) { + struct device *dev = container_of(kobj, struct device, kobj); + struct gendisk *disk = dev_to_disk(dev); + struct nvme_ns *ns = disk->private_data; struct nvm_dev *ndev = ns->ndev; - struct nvm_geo *geo = &ndev->geo; + struct device_attribute *dev_attr = + container_of(attr, typeof(*dev_attr), attr); if (!ndev) - return -EINVAL; - - switch (geo->major_ver_id) { - case 1: - return sysfs_create_group(&disk_to_dev(ns->disk)->kobj, - &nvm_dev_attr_group_12); - case 2: - return sysfs_create_group(&disk_to_dev(ns->disk)->kobj, - &nvm_dev_attr_group_20); - } + return 0; - return -EINVAL; -} + if (dev_attr->show == nvm_dev_attr_show) + return attr->mode; -void nvme_nvm_unregister_sysfs(struct nvme_ns *ns) -{ - struct nvm_dev *ndev = ns->ndev; - struct nvm_geo *geo = &ndev->geo; - - switch (geo->major_ver_id) { + switch (ndev->geo.major_ver_id) { case 1: - sysfs_remove_group(&disk_to_dev(ns->disk)->kobj, - &nvm_dev_attr_group_12); + if (dev_attr->show == nvm_dev_attr_show_12) + return attr->mode; break; case 2: - sysfs_remove_group(&disk_to_dev(ns->disk)->kobj, - &nvm_dev_attr_group_20); + if (dev_attr->show == nvm_dev_attr_show_20) + return attr->mode; break; } + + return 0; } + +const struct attribute_group nvme_nvm_attr_group = { + .name = "lightnvm", + .attrs = nvm_dev_attrs, + .is_visible = nvm_dev_attrs_visible, +}; diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 9fe3fff818b8..5e3cc8c59a39 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -77,6 +77,13 @@ void nvme_failover_req(struct request *req) queue_work(nvme_wq, &ns->ctrl->ana_work); } break; + case NVME_SC_HOST_PATH_ERROR: + /* + * Temporary transport disruption in talking to the controller. + * Try to send on a new path. + */ + nvme_mpath_clear_current_path(ns); + break; default: /* * Reset the controller for any non-ANA error as we don't know @@ -110,29 +117,55 @@ static const char *nvme_ana_state_names[] = { [NVME_ANA_CHANGE] = "change", }; -static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head) +void nvme_mpath_clear_current_path(struct nvme_ns *ns) +{ + struct nvme_ns_head *head = ns->head; + int node; + + if (!head) + return; + + for_each_node(node) { + if (ns == rcu_access_pointer(head->current_path[node])) + rcu_assign_pointer(head->current_path[node], NULL); + } +} + +static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head, int node) { - struct nvme_ns *ns, *fallback = NULL; + int found_distance = INT_MAX, fallback_distance = INT_MAX, distance; + struct nvme_ns *found = NULL, *fallback = NULL, *ns; list_for_each_entry_rcu(ns, &head->list, siblings) { if (ns->ctrl->state != NVME_CTRL_LIVE || test_bit(NVME_NS_ANA_PENDING, &ns->flags)) continue; + + distance = node_distance(node, dev_to_node(ns->ctrl->dev)); + switch (ns->ana_state) { case NVME_ANA_OPTIMIZED: - rcu_assign_pointer(head->current_path, ns); - return ns; + if (distance < found_distance) { + found_distance = distance; + found = ns; + } + break; case NVME_ANA_NONOPTIMIZED: - fallback = ns; + if (distance < fallback_distance) { + fallback_distance = distance; + fallback = ns; + } break; default: break; } } - if (fallback) - rcu_assign_pointer(head->current_path, fallback); - return fallback; + if (!found) + found = fallback; + if (found) + rcu_assign_pointer(head->current_path[node], found); + return found; } static inline bool nvme_path_is_optimized(struct nvme_ns *ns) @@ -143,10 +176,12 @@ static inline bool nvme_path_is_optimized(struct nvme_ns *ns) inline struct nvme_ns *nvme_find_path(struct nvme_ns_head *head) { - struct nvme_ns *ns = srcu_dereference(head->current_path, &head->srcu); + int node = numa_node_id(); + struct nvme_ns *ns; + ns = srcu_dereference(head->current_path[node], &head->srcu); if (unlikely(!ns || !nvme_path_is_optimized(ns))) - ns = __nvme_find_path(head); + ns = __nvme_find_path(head, node); return ns; } @@ -193,7 +228,7 @@ static bool nvme_ns_head_poll(struct request_queue *q, blk_qc_t qc) int srcu_idx; srcu_idx = srcu_read_lock(&head->srcu); - ns = srcu_dereference(head->current_path, &head->srcu); + ns = srcu_dereference(head->current_path[numa_node_id()], &head->srcu); if (likely(ns && nvme_path_is_optimized(ns))) found = ns->queue->poll_fn(q, qc); srcu_read_unlock(&head->srcu, srcu_idx); @@ -282,12 +317,17 @@ static void nvme_mpath_set_live(struct nvme_ns *ns) if (!head->disk) return; - if (!(head->disk->flags & GENHD_FL_UP)) { - device_add_disk(&head->subsys->dev, head->disk); - if (sysfs_create_group(&disk_to_dev(head->disk)->kobj, - &nvme_ns_id_attr_group)) - dev_warn(&head->subsys->dev, - "failed to create id group.\n"); + if (!(head->disk->flags & GENHD_FL_UP)) + device_add_disk(&head->subsys->dev, head->disk, + nvme_ns_id_attr_groups); + + if (nvme_path_is_optimized(ns)) { + int node, srcu_idx; + + srcu_idx = srcu_read_lock(&head->srcu); + for_each_node(node) + __nvme_find_path(head, node); + srcu_read_unlock(&head->srcu, srcu_idx); } kblockd_schedule_work(&ns->head->requeue_work); @@ -494,11 +534,8 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head) { if (!head->disk) return; - if (head->disk->flags & GENHD_FL_UP) { - sysfs_remove_group(&disk_to_dev(head->disk)->kobj, - &nvme_ns_id_attr_group); + if (head->disk->flags & GENHD_FL_UP) del_gendisk(head->disk); - } blk_set_queue_dying(head->disk->queue); /* make sure all pending bios are cleaned up */ kblockd_schedule_work(&head->requeue_work); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index bb4a2003c097..9fefba039d1e 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -277,14 +277,6 @@ struct nvme_ns_ids { * only ever has a single entry for private namespaces. */ struct nvme_ns_head { -#ifdef CONFIG_NVME_MULTIPATH - struct gendisk *disk; - struct nvme_ns __rcu *current_path; - struct bio_list requeue_list; - spinlock_t requeue_lock; - struct work_struct requeue_work; - struct mutex lock; -#endif struct list_head list; struct srcu_struct srcu; struct nvme_subsystem *subsys; @@ -293,6 +285,14 @@ struct nvme_ns_head { struct list_head entry; struct kref ref; int instance; +#ifdef CONFIG_NVME_MULTIPATH + struct gendisk *disk; + struct bio_list requeue_list; + spinlock_t requeue_lock; + struct work_struct requeue_work; + struct mutex lock; + struct nvme_ns __rcu *current_path[]; +#endif }; #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS @@ -459,7 +459,7 @@ int nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl); int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp, void *log, size_t size, u64 offset); -extern const struct attribute_group nvme_ns_id_attr_group; +extern const struct attribute_group *nvme_ns_id_attr_groups[]; extern const struct block_device_operations nvme_ns_head_ops; #ifdef CONFIG_NVME_MULTIPATH @@ -474,14 +474,7 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head); int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id); void nvme_mpath_uninit(struct nvme_ctrl *ctrl); void nvme_mpath_stop(struct nvme_ctrl *ctrl); - -static inline void nvme_mpath_clear_current_path(struct nvme_ns *ns) -{ - struct nvme_ns_head *head = ns->head; - - if (head && ns == rcu_access_pointer(head->current_path)) - rcu_assign_pointer(head->current_path, NULL); -} +void nvme_mpath_clear_current_path(struct nvme_ns *ns); struct nvme_ns *nvme_find_path(struct nvme_ns_head *head); static inline void nvme_mpath_check_last_path(struct nvme_ns *ns) @@ -551,8 +544,7 @@ static inline void nvme_mpath_stop(struct nvme_ctrl *ctrl) void nvme_nvm_update_nvm_info(struct nvme_ns *ns); int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node); void nvme_nvm_unregister(struct nvme_ns *ns); -int nvme_nvm_register_sysfs(struct nvme_ns *ns); -void nvme_nvm_unregister_sysfs(struct nvme_ns *ns); +extern const struct attribute_group nvme_nvm_attr_group; int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, unsigned long arg); #else static inline void nvme_nvm_update_nvm_info(struct nvme_ns *ns) {}; @@ -563,11 +555,6 @@ static inline int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, } static inline void nvme_nvm_unregister(struct nvme_ns *ns) {}; -static inline int nvme_nvm_register_sysfs(struct nvme_ns *ns) -{ - return 0; -} -static inline void nvme_nvm_unregister_sysfs(struct nvme_ns *ns) {}; static inline int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, unsigned long arg) { diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index d668682f91df..4e023cd007e1 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -772,10 +772,10 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req, if (!dma_map_sg(dev->dev, &iod->meta_sg, 1, dma_dir)) goto out_unmap; - } - if (blk_integrity_rq(req)) cmnd->rw.metadata = cpu_to_le64(sg_dma_address(&iod->meta_sg)); + } + return BLK_STS_OK; out_unmap: @@ -1249,7 +1249,7 @@ static void nvme_free_queues(struct nvme_dev *dev, int lowest) /** * nvme_suspend_queue - put queue into suspended state - * @nvmeq - queue to suspend + * @nvmeq: queue to suspend */ static int nvme_suspend_queue(struct nvme_queue *nvmeq) { @@ -2564,13 +2564,12 @@ static void nvme_remove(struct pci_dev *pdev) struct nvme_dev *dev = pci_get_drvdata(pdev); nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING); - - cancel_work_sync(&dev->ctrl.reset_work); pci_set_drvdata(pdev, NULL); if (!pci_device_is_present(pdev)) { nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DEAD); nvme_dev_disable(dev, true); + nvme_dev_remove_admin(dev); } flush_work(&dev->ctrl.reset_work); diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index dc042017c293..d181cafedc58 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -233,8 +233,15 @@ static void nvme_rdma_qp_event(struct ib_event *event, void *context) static int nvme_rdma_wait_for_cm(struct nvme_rdma_queue *queue) { - wait_for_completion_interruptible_timeout(&queue->cm_done, + int ret; + + ret = wait_for_completion_interruptible_timeout(&queue->cm_done, msecs_to_jiffies(NVME_RDMA_CONNECT_TIMEOUT_MS) + 1); + if (ret < 0) + return ret; + if (ret == 0) + return -ETIMEDOUT; + WARN_ON_ONCE(queue->cm_error > 0); return queue->cm_error; } @@ -1849,54 +1856,6 @@ static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = { .stop_ctrl = nvme_rdma_stop_ctrl, }; -static inline bool -__nvme_rdma_options_match(struct nvme_rdma_ctrl *ctrl, - struct nvmf_ctrl_options *opts) -{ - char *stdport = __stringify(NVME_RDMA_IP_PORT); - - - if (!nvmf_ctlr_matches_baseopts(&ctrl->ctrl, opts) || - strcmp(opts->traddr, ctrl->ctrl.opts->traddr)) - return false; - - if (opts->mask & NVMF_OPT_TRSVCID && - ctrl->ctrl.opts->mask & NVMF_OPT_TRSVCID) { - if (strcmp(opts->trsvcid, ctrl->ctrl.opts->trsvcid)) - return false; - } else if (opts->mask & NVMF_OPT_TRSVCID) { - if (strcmp(opts->trsvcid, stdport)) - return false; - } else if (ctrl->ctrl.opts->mask & NVMF_OPT_TRSVCID) { - if (strcmp(stdport, ctrl->ctrl.opts->trsvcid)) - return false; - } - /* else, it's a match as both have stdport. Fall to next checks */ - - /* - * checking the local address is rough. In most cases, one - * is not specified and the host port is selected by the stack. - * - * Assume no match if: - * local address is specified and address is not the same - * local address is not specified but remote is, or vice versa - * (admin using specific host_traddr when it matters). - */ - if (opts->mask & NVMF_OPT_HOST_TRADDR && - ctrl->ctrl.opts->mask & NVMF_OPT_HOST_TRADDR) { - if (strcmp(opts->host_traddr, ctrl->ctrl.opts->host_traddr)) - return false; - } else if (opts->mask & NVMF_OPT_HOST_TRADDR || - ctrl->ctrl.opts->mask & NVMF_OPT_HOST_TRADDR) - return false; - /* - * if neither controller had an host port specified, assume it's - * a match as everything else matched. - */ - - return true; -} - /* * Fails a connection request if it matches an existing controller * (association) with the same tuple: @@ -1917,7 +1876,7 @@ nvme_rdma_existing_controller(struct nvmf_ctrl_options *opts) mutex_lock(&nvme_rdma_ctrl_mutex); list_for_each_entry(ctrl, &nvme_rdma_ctrl_list, list) { - found = __nvme_rdma_options_match(ctrl, opts); + found = nvmf_ip_options_match(&ctrl->ctrl, opts); if (found) break; } @@ -1932,7 +1891,6 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, struct nvme_rdma_ctrl *ctrl; int ret; bool changed; - char *port; ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL); if (!ctrl) @@ -1940,15 +1898,21 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, ctrl->ctrl.opts = opts; INIT_LIST_HEAD(&ctrl->list); - if (opts->mask & NVMF_OPT_TRSVCID) - port = opts->trsvcid; - else - port = __stringify(NVME_RDMA_IP_PORT); + if (!(opts->mask & NVMF_OPT_TRSVCID)) { + opts->trsvcid = + kstrdup(__stringify(NVME_RDMA_IP_PORT), GFP_KERNEL); + if (!opts->trsvcid) { + ret = -ENOMEM; + goto out_free_ctrl; + } + opts->mask |= NVMF_OPT_TRSVCID; + } ret = inet_pton_with_scope(&init_net, AF_UNSPEC, - opts->traddr, port, &ctrl->addr); + opts->traddr, opts->trsvcid, &ctrl->addr); if (ret) { - pr_err("malformed address passed: %s:%s\n", opts->traddr, port); + pr_err("malformed address passed: %s:%s\n", + opts->traddr, opts->trsvcid); goto out_free_ctrl; } diff --git a/drivers/nvme/host/trace.h b/drivers/nvme/host/trace.h index a490790d6691..196d5bd56718 100644 --- a/drivers/nvme/host/trace.h +++ b/drivers/nvme/host/trace.h @@ -156,6 +156,34 @@ TRACE_EVENT(nvme_complete_rq, ); +#define aer_name(aer) { aer, #aer } + +TRACE_EVENT(nvme_async_event, + TP_PROTO(struct nvme_ctrl *ctrl, u32 result), + TP_ARGS(ctrl, result), + TP_STRUCT__entry( + __field(int, ctrl_id) + __field(u32, result) + ), + TP_fast_assign( + __entry->ctrl_id = ctrl->instance; + __entry->result = result; + ), + TP_printk("nvme%d: NVME_AEN=%#08x [%s]", + __entry->ctrl_id, __entry->result, + __print_symbolic(__entry->result, + aer_name(NVME_AER_NOTICE_NS_CHANGED), + aer_name(NVME_AER_NOTICE_ANA), + aer_name(NVME_AER_NOTICE_FW_ACT_STARTING), + aer_name(NVME_AER_ERROR), + aer_name(NVME_AER_SMART), + aer_name(NVME_AER_CSS), + aer_name(NVME_AER_VS)) + ) +); + +#undef aer_name + #endif /* _TRACE_NVME_H */ #undef TRACE_INCLUDE_PATH diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 2008fa62a373..1179f6314323 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -58,7 +58,7 @@ static u16 nvmet_get_smart_log_nsid(struct nvmet_req *req, ns = nvmet_find_namespace(req->sq->ctrl, req->cmd->get_log_page.nsid); if (!ns) { - pr_err("nvmet : Could not find namespace id : %d\n", + pr_err("Could not find namespace id : %d\n", le32_to_cpu(req->cmd->get_log_page.nsid)); return NVME_SC_INVALID_NS; } @@ -353,7 +353,7 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req) if (req->port->inline_data_size) id->sgls |= cpu_to_le32(1 << 20); - strcpy(id->subnqn, ctrl->subsys->subsysnqn); + strlcpy(id->subnqn, ctrl->subsys->subsysnqn, sizeof(id->subnqn)); /* Max command capsule size is sqe + single page of in-capsule data */ id->ioccsz = cpu_to_le32((sizeof(struct nvme_command) + diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index b5ec96abd048..0acdff9e6842 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -1105,8 +1105,7 @@ static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port, if (!port) return NULL; - if (!strncmp(NVME_DISC_SUBSYS_NAME, subsysnqn, - NVMF_NQN_SIZE)) { + if (!strcmp(NVME_DISC_SUBSYS_NAME, subsysnqn)) { if (!kref_get_unless_zero(&nvmet_disc_subsys->ref)) return NULL; return nvmet_disc_subsys; diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c index eae29f493a07..bc0aa0bf1543 100644 --- a/drivers/nvme/target/discovery.c +++ b/drivers/nvme/target/discovery.c @@ -174,7 +174,7 @@ static void nvmet_execute_identify_disc_ctrl(struct nvmet_req *req) if (req->port->inline_data_size) id->sgls |= cpu_to_le32(1 << 20); - strcpy(id->subnqn, ctrl->subsys->subsysnqn); + strlcpy(id->subnqn, ctrl->subsys->subsysnqn, sizeof(id->subnqn)); status = nvmet_copy_to_sgl(req, 0, id, sizeof(*id)); @@ -219,12 +219,10 @@ u16 nvmet_parse_discovery_cmd(struct nvmet_req *req) return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; } default: - pr_err("unsupported cmd %d\n", cmd->common.opcode); + pr_err("unhandled cmd %d\n", cmd->common.opcode); return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; } - pr_err("unhandled cmd %d\n", cmd->common.opcode); - return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; } int __init nvmet_init_discovery(void) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 29b4b236afd8..409081a03b24 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -110,11 +110,19 @@ struct nvmet_fc_tgtport { struct list_head ls_busylist; struct list_head assoc_list; struct ida assoc_cnt; - struct nvmet_port *port; + struct nvmet_fc_port_entry *pe; struct kref ref; u32 max_sg_cnt; }; +struct nvmet_fc_port_entry { + struct nvmet_fc_tgtport *tgtport; + struct nvmet_port *port; + u64 node_name; + u64 port_name; + struct list_head pe_list; +}; + struct nvmet_fc_defer_fcp_req { struct list_head req_list; struct nvmefc_tgt_fcp_req *fcp_req; @@ -132,7 +140,6 @@ struct nvmet_fc_tgt_queue { atomic_t zrspcnt; atomic_t rsn; spinlock_t qlock; - struct nvmet_port *port; struct nvmet_cq nvme_cq; struct nvmet_sq nvme_sq; struct nvmet_fc_tgt_assoc *assoc; @@ -221,6 +228,7 @@ static DEFINE_SPINLOCK(nvmet_fc_tgtlock); static LIST_HEAD(nvmet_fc_target_list); static DEFINE_IDA(nvmet_fc_tgtport_cnt); +static LIST_HEAD(nvmet_fc_portentry_list); static void nvmet_fc_handle_ls_rqst_work(struct work_struct *work); @@ -645,7 +653,6 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc, queue->qid = qid; queue->sqsize = sqsize; queue->assoc = assoc; - queue->port = assoc->tgtport->port; queue->cpu = nvmet_fc_queue_to_cpu(assoc->tgtport, qid); INIT_LIST_HEAD(&queue->fod_list); INIT_LIST_HEAD(&queue->avail_defer_list); @@ -957,6 +964,83 @@ nvmet_fc_find_target_assoc(struct nvmet_fc_tgtport *tgtport, return ret; } +static void +nvmet_fc_portentry_bind(struct nvmet_fc_tgtport *tgtport, + struct nvmet_fc_port_entry *pe, + struct nvmet_port *port) +{ + lockdep_assert_held(&nvmet_fc_tgtlock); + + pe->tgtport = tgtport; + tgtport->pe = pe; + + pe->port = port; + port->priv = pe; + + pe->node_name = tgtport->fc_target_port.node_name; + pe->port_name = tgtport->fc_target_port.port_name; + INIT_LIST_HEAD(&pe->pe_list); + + list_add_tail(&pe->pe_list, &nvmet_fc_portentry_list); +} + +static void +nvmet_fc_portentry_unbind(struct nvmet_fc_port_entry *pe) +{ + unsigned long flags; + + spin_lock_irqsave(&nvmet_fc_tgtlock, flags); + if (pe->tgtport) + pe->tgtport->pe = NULL; + list_del(&pe->pe_list); + spin_unlock_irqrestore(&nvmet_fc_tgtlock, flags); +} + +/* + * called when a targetport deregisters. Breaks the relationship + * with the nvmet port, but leaves the port_entry in place so that + * re-registration can resume operation. + */ +static void +nvmet_fc_portentry_unbind_tgt(struct nvmet_fc_tgtport *tgtport) +{ + struct nvmet_fc_port_entry *pe; + unsigned long flags; + + spin_lock_irqsave(&nvmet_fc_tgtlock, flags); + pe = tgtport->pe; + if (pe) + pe->tgtport = NULL; + tgtport->pe = NULL; + spin_unlock_irqrestore(&nvmet_fc_tgtlock, flags); +} + +/* + * called when a new targetport is registered. Looks in the + * existing nvmet port_entries to see if the nvmet layer is + * configured for the targetport's wwn's. (the targetport existed, + * nvmet configured, the lldd unregistered the tgtport, and is now + * reregistering the same targetport). If so, set the nvmet port + * port entry on the targetport. + */ +static void +nvmet_fc_portentry_rebind_tgt(struct nvmet_fc_tgtport *tgtport) +{ + struct nvmet_fc_port_entry *pe; + unsigned long flags; + + spin_lock_irqsave(&nvmet_fc_tgtlock, flags); + list_for_each_entry(pe, &nvmet_fc_portentry_list, pe_list) { + if (tgtport->fc_target_port.node_name == pe->node_name && + tgtport->fc_target_port.port_name == pe->port_name) { + WARN_ON(pe->tgtport); + tgtport->pe = pe; + pe->tgtport = tgtport; + break; + } + } + spin_unlock_irqrestore(&nvmet_fc_tgtlock, flags); +} /** * nvme_fc_register_targetport - transport entry point called by an @@ -1034,6 +1118,8 @@ nvmet_fc_register_targetport(struct nvmet_fc_port_info *pinfo, goto out_free_newrec; } + nvmet_fc_portentry_rebind_tgt(newrec); + spin_lock_irqsave(&nvmet_fc_tgtlock, flags); list_add_tail(&newrec->tgt_list, &nvmet_fc_target_list); spin_unlock_irqrestore(&nvmet_fc_tgtlock, flags); @@ -1159,8 +1245,8 @@ nvmet_fc_delete_ctrl(struct nvmet_ctrl *ctrl) * nvme_fc_unregister_targetport - transport entry point called by an * LLDD to deregister/remove a previously * registered a local NVME subsystem FC port. - * @tgtport: pointer to the (registered) target port that is to be - * deregistered. + * @target_port: pointer to the (registered) target port that is to be + * deregistered. * * Returns: * a completion status. Must be 0 upon success; a negative errno @@ -1171,6 +1257,8 @@ nvmet_fc_unregister_targetport(struct nvmet_fc_target_port *target_port) { struct nvmet_fc_tgtport *tgtport = targetport_to_tgtport(target_port); + nvmet_fc_portentry_unbind_tgt(tgtport); + /* terminate any outstanding associations */ __nvmet_fc_free_assocs(tgtport); @@ -1661,7 +1749,7 @@ nvmet_fc_handle_ls_rqst_work(struct work_struct *work) * * If this routine returns error, the LLDD should abort the exchange. * - * @tgtport: pointer to the (registered) target port the LS was + * @target_port: pointer to the (registered) target port the LS was * received on. * @lsreq: pointer to a lsreq request structure to be used to reference * the exchange corresponding to the LS. @@ -2147,7 +2235,7 @@ nvmet_fc_fcp_nvme_cmd_done(struct nvmet_req *nvme_req) /* - * Actual processing routine for received FC-NVME LS Requests from the LLD + * Actual processing routine for received FC-NVME I/O Requests from the LLD */ static void nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport, @@ -2158,6 +2246,13 @@ nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport, int ret; /* + * if there is no nvmet mapping to the targetport there + * shouldn't be requests. just terminate them. + */ + if (!tgtport->pe) + goto transport_error; + + /* * Fused commands are currently not supported in the linux * implementation. * @@ -2184,7 +2279,7 @@ nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport, fod->req.cmd = &fod->cmdiubuf.sqe; fod->req.rsp = &fod->rspiubuf.cqe; - fod->req.port = fod->queue->port; + fod->req.port = tgtport->pe->port; /* clear any response payload */ memset(&fod->rspiubuf, 0, sizeof(fod->rspiubuf)); @@ -2468,7 +2563,7 @@ nvme_fc_parse_traddr(struct nvmet_fc_traddr *traddr, char *buf, size_t blen) substring_t wwn = { name, &name[sizeof(name)-1] }; int nnoffset, pnoffset; - /* validate it string one of the 2 allowed formats */ + /* validate if string is one of the 2 allowed formats */ if (strnlen(buf, blen) == NVME_FC_TRADDR_MAXLENGTH && !strncmp(buf, "nn-0x", NVME_FC_TRADDR_OXNNLEN) && !strncmp(&buf[NVME_FC_TRADDR_MAX_PN_OFFSET], @@ -2508,6 +2603,7 @@ static int nvmet_fc_add_port(struct nvmet_port *port) { struct nvmet_fc_tgtport *tgtport; + struct nvmet_fc_port_entry *pe; struct nvmet_fc_traddr traddr = { 0L, 0L }; unsigned long flags; int ret; @@ -2524,24 +2620,40 @@ nvmet_fc_add_port(struct nvmet_port *port) if (ret) return ret; + pe = kzalloc(sizeof(*pe), GFP_KERNEL); + if (!pe) + return -ENOMEM; + ret = -ENXIO; spin_lock_irqsave(&nvmet_fc_tgtlock, flags); list_for_each_entry(tgtport, &nvmet_fc_target_list, tgt_list) { if ((tgtport->fc_target_port.node_name == traddr.nn) && (tgtport->fc_target_port.port_name == traddr.pn)) { - tgtport->port = port; - ret = 0; + /* a FC port can only be 1 nvmet port id */ + if (!tgtport->pe) { + nvmet_fc_portentry_bind(tgtport, pe, port); + ret = 0; + } else + ret = -EALREADY; break; } } spin_unlock_irqrestore(&nvmet_fc_tgtlock, flags); + + if (ret) + kfree(pe); + return ret; } static void nvmet_fc_remove_port(struct nvmet_port *port) { - /* nothing to do */ + struct nvmet_fc_port_entry *pe = port->priv; + + nvmet_fc_portentry_unbind(pe); + + kfree(pe); } static const struct nvmet_fabrics_ops nvmet_fc_tgt_fcp_ops = { diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index 5251689a1d9a..291f4121f516 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -648,6 +648,7 @@ fcloop_fcp_op(struct nvmet_fc_target_port *tgtport, break; /* Fall-Thru to RSP handling */ + /* FALLTHRU */ case NVMET_FCOP_RSP: if (fcpreq) { diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index 7bc9f6240432..f93fb5711142 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -58,7 +58,7 @@ static void nvmet_bio_done(struct bio *bio) static void nvmet_bdev_execute_rw(struct nvmet_req *req) { int sg_cnt = req->sg_cnt; - struct bio *bio = &req->b.inline_bio; + struct bio *bio; struct scatterlist *sg; sector_t sector; blk_qc_t cookie; @@ -81,7 +81,12 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req) sector = le64_to_cpu(req->cmd->rw.slba); sector <<= (req->ns->blksize_shift - 9); - bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec)); + if (req->data_len <= NVMET_MAX_INLINE_DATA_LEN) { + bio = &req->b.inline_bio; + bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec)); + } else { + bio = bio_alloc(GFP_KERNEL, min(sg_cnt, BIO_MAX_PAGES)); + } bio_set_dev(bio, req->ns->bdev); bio->bi_iter.bi_sector = sector; bio->bi_private = req; diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c index 81a9dc5290a8..39d972e2595f 100644 --- a/drivers/nvme/target/io-cmd-file.c +++ b/drivers/nvme/target/io-cmd-file.c @@ -246,7 +246,8 @@ static void nvmet_file_execute_discard(struct nvmet_req *req) break; offset = le64_to_cpu(range.slba) << req->ns->blksize_shift; - len = le32_to_cpu(range.nlb) << req->ns->blksize_shift; + len = le32_to_cpu(range.nlb); + len <<= req->ns->blksize_shift; if (offset + len > req->ns->size) { ret = NVME_SC_LBA_RANGE | NVME_SC_DNR; break; diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index ec9af4ee03b6..08f7b57a1203 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -264,6 +264,7 @@ struct nvmet_fabrics_ops { }; #define NVMET_MAX_INLINE_BIOVEC 8 +#define NVMET_MAX_INLINE_DATA_LEN NVMET_MAX_INLINE_BIOVEC * PAGE_SIZE struct nvmet_req { struct nvme_command *cmd; diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index bfc4da660bb4..bd265aceb90c 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -122,6 +122,7 @@ struct nvmet_rdma_device { int inline_page_count; }; +static struct workqueue_struct *nvmet_rdma_delete_wq; static bool nvmet_rdma_use_srq; module_param_named(use_srq, nvmet_rdma_use_srq, bool, 0444); MODULE_PARM_DESC(use_srq, "Use shared receive queue."); @@ -1267,12 +1268,12 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id, if (queue->host_qid == 0) { /* Let inflight controller teardown complete */ - flush_scheduled_work(); + flush_workqueue(nvmet_rdma_delete_wq); } ret = nvmet_rdma_cm_accept(cm_id, queue, &event->param.conn); if (ret) { - schedule_work(&queue->release_work); + queue_work(nvmet_rdma_delete_wq, &queue->release_work); /* Destroying rdma_cm id is not needed here */ return 0; } @@ -1337,7 +1338,7 @@ static void __nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue) if (disconnect) { rdma_disconnect(queue->cm_id); - schedule_work(&queue->release_work); + queue_work(nvmet_rdma_delete_wq, &queue->release_work); } } @@ -1367,7 +1368,7 @@ static void nvmet_rdma_queue_connect_fail(struct rdma_cm_id *cm_id, mutex_unlock(&nvmet_rdma_queue_mutex); pr_err("failed to connect queue %d\n", queue->idx); - schedule_work(&queue->release_work); + queue_work(nvmet_rdma_delete_wq, &queue->release_work); } /** @@ -1649,8 +1650,17 @@ static int __init nvmet_rdma_init(void) if (ret) goto err_ib_client; + nvmet_rdma_delete_wq = alloc_workqueue("nvmet-rdma-delete-wq", + WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0); + if (!nvmet_rdma_delete_wq) { + ret = -ENOMEM; + goto err_unreg_transport; + } + return 0; +err_unreg_transport: + nvmet_unregister_transport(&nvmet_rdma_ops); err_ib_client: ib_unregister_client(&nvmet_rdma_ib_client); return ret; @@ -1658,6 +1668,7 @@ err_ib_client: static void __exit nvmet_rdma_exit(void) { + destroy_workqueue(nvmet_rdma_delete_wq); nvmet_unregister_transport(&nvmet_rdma_ops); ib_unregister_client(&nvmet_rdma_ib_client); WARN_ON_ONCE(!list_empty(&nvmet_rdma_queue_list)); diff --git a/drivers/of/device.c b/drivers/of/device.c index 5957cd4fa262..c7fa5a9697c9 100644 --- a/drivers/of/device.c +++ b/drivers/of/device.c @@ -170,18 +170,6 @@ int of_dma_configure(struct device *dev, struct device_node *np, bool force_dma) } EXPORT_SYMBOL_GPL(of_dma_configure); -/** - * of_dma_deconfigure - Clean up DMA configuration - * @dev: Device for which to clean up DMA configuration - * - * Clean up all configuration performed by of_dma_configure_ops() and free all - * resources that have been allocated. - */ -void of_dma_deconfigure(struct device *dev) -{ - arch_teardown_dma_ops(dev); -} - int of_device_register(struct platform_device *pdev) { device_initialize(&pdev->dev); diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c index 722537e14848..41b49716ac75 100644 --- a/drivers/of/unittest.c +++ b/drivers/of/unittest.c @@ -771,6 +771,9 @@ static void __init of_unittest_parse_interrupts(void) struct of_phandle_args args; int i, rc; + if (of_irq_workarounds & OF_IMAP_OLDWORLD_MAC) + return; + np = of_find_node_by_path("/testcase-data/interrupts/interrupts0"); if (!np) { pr_err("missing testcase data\n"); @@ -845,6 +848,9 @@ static void __init of_unittest_parse_interrupts_extended(void) struct of_phandle_args args; int i, rc; + if (of_irq_workarounds & OF_IMAP_OLDWORLD_MAC) + return; + np = of_find_node_by_path("/testcase-data/interrupts/interrupts-extended0"); if (!np) { pr_err("missing testcase data\n"); @@ -1001,15 +1007,19 @@ static void __init of_unittest_platform_populate(void) pdev = of_find_device_by_node(np); unittest(pdev, "device 1 creation failed\n"); - irq = platform_get_irq(pdev, 0); - unittest(irq == -EPROBE_DEFER, "device deferred probe failed - %d\n", irq); + if (!(of_irq_workarounds & OF_IMAP_OLDWORLD_MAC)) { + irq = platform_get_irq(pdev, 0); + unittest(irq == -EPROBE_DEFER, + "device deferred probe failed - %d\n", irq); - /* Test that a parsing failure does not return -EPROBE_DEFER */ - np = of_find_node_by_path("/testcase-data/testcase-device2"); - pdev = of_find_device_by_node(np); - unittest(pdev, "device 2 creation failed\n"); - irq = platform_get_irq(pdev, 0); - unittest(irq < 0 && irq != -EPROBE_DEFER, "device parsing error failed - %d\n", irq); + /* Test that a parsing failure does not return -EPROBE_DEFER */ + np = of_find_node_by_path("/testcase-data/testcase-device2"); + pdev = of_find_device_by_node(np); + unittest(pdev, "device 2 creation failed\n"); + irq = platform_get_irq(pdev, 0); + unittest(irq < 0 && irq != -EPROBE_DEFER, + "device parsing error failed - %d\n", irq); + } np = of_find_node_by_path("/testcase-data/platform-tests"); unittest(np, "No testcase data in device tree\n"); diff --git a/drivers/pci/controller/pcie-cadence.c b/drivers/pci/controller/pcie-cadence.c index 86f1b002c846..975bcdd6b5c0 100644 --- a/drivers/pci/controller/pcie-cadence.c +++ b/drivers/pci/controller/pcie-cadence.c @@ -180,11 +180,11 @@ int cdns_pcie_init_phy(struct device *dev, struct cdns_pcie *pcie) return 0; } - phy = devm_kzalloc(dev, sizeof(*phy) * phy_count, GFP_KERNEL); + phy = devm_kcalloc(dev, phy_count, sizeof(*phy), GFP_KERNEL); if (!phy) return -ENOMEM; - link = devm_kzalloc(dev, sizeof(*link) * phy_count, GFP_KERNEL); + link = devm_kcalloc(dev, phy_count, sizeof(*link), GFP_KERNEL); if (!link) return -ENOMEM; diff --git a/drivers/pci/controller/vmd.c b/drivers/pci/controller/vmd.c index fd2dbd7eed7b..f31ed62d518c 100644 --- a/drivers/pci/controller/vmd.c +++ b/drivers/pci/controller/vmd.c @@ -404,12 +404,10 @@ static int vmd_dma_supported(struct device *dev, u64 mask) return vmd_dma_ops(dev)->dma_supported(to_vmd_dev(dev), mask); } -#ifdef ARCH_HAS_DMA_GET_REQUIRED_MASK static u64 vmd_get_required_mask(struct device *dev) { return vmd_dma_ops(dev)->get_required_mask(to_vmd_dev(dev)); } -#endif static void vmd_teardown_dma_ops(struct vmd_dev *vmd) { @@ -450,9 +448,7 @@ static void vmd_setup_dma_ops(struct vmd_dev *vmd) ASSIGN_VMD_DMA_OPS(source, dest, sync_sg_for_device); ASSIGN_VMD_DMA_OPS(source, dest, mapping_error); ASSIGN_VMD_DMA_OPS(source, dest, dma_supported); -#ifdef ARCH_HAS_DMA_GET_REQUIRED_MASK ASSIGN_VMD_DMA_OPS(source, dest, get_required_mask); -#endif add_dma_domain(domain); } #undef ASSIGN_VMD_DMA_OPS diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 7f01f6f60b87..d0b7dd8fb184 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -485,7 +485,13 @@ static int armpmu_filter_match(struct perf_event *event) { struct arm_pmu *armpmu = to_arm_pmu(event->pmu); unsigned int cpu = smp_processor_id(); - return cpumask_test_cpu(cpu, &armpmu->supported_cpus); + int ret; + + ret = cpumask_test_cpu(cpu, &armpmu->supported_cpus); + if (ret && armpmu->filter_match) + return armpmu->filter_match(event); + + return ret; } static ssize_t armpmu_cpumask_show(struct device *dev, diff --git a/drivers/perf/arm_pmu_platform.c b/drivers/perf/arm_pmu_platform.c index 96075cecb0ae..933bd8410fc2 100644 --- a/drivers/perf/arm_pmu_platform.c +++ b/drivers/perf/arm_pmu_platform.c @@ -77,14 +77,14 @@ static int pmu_parse_irq_affinity(struct device_node *node, int i) dn = of_parse_phandle(node, "interrupt-affinity", i); if (!dn) { - pr_warn("failed to parse interrupt-affinity[%d] for %s\n", - i, node->name); + pr_warn("failed to parse interrupt-affinity[%d] for %pOFn\n", + i, node); return -EINVAL; } cpu = of_cpu_node_to_id(dn); if (cpu < 0) { - pr_warn("failed to find logical CPU for %s\n", dn->name); + pr_warn("failed to find logical CPU for %pOFn\n", dn); cpu = nr_cpu_ids; } diff --git a/drivers/pinctrl/pinctrl-mcp23s08.c b/drivers/pinctrl/pinctrl-mcp23s08.c index 4a8a8efadefa..cf73a403d22d 100644 --- a/drivers/pinctrl/pinctrl-mcp23s08.c +++ b/drivers/pinctrl/pinctrl-mcp23s08.c @@ -636,6 +636,14 @@ static int mcp23s08_irq_setup(struct mcp23s08 *mcp) return err; } + return 0; +} + +static int mcp23s08_irqchip_setup(struct mcp23s08 *mcp) +{ + struct gpio_chip *chip = &mcp->chip; + int err; + err = gpiochip_irqchip_add_nested(chip, &mcp23s08_irq_chip, 0, @@ -912,7 +920,7 @@ static int mcp23s08_probe_one(struct mcp23s08 *mcp, struct device *dev, } if (mcp->irq && mcp->irq_controller) { - ret = mcp23s08_irq_setup(mcp); + ret = mcp23s08_irqchip_setup(mcp); if (ret) goto fail; } @@ -944,6 +952,9 @@ static int mcp23s08_probe_one(struct mcp23s08 *mcp, struct device *dev, goto fail; } + if (mcp->irq) + ret = mcp23s08_irq_setup(mcp); + fail: if (ret < 0) dev_dbg(dev, "can't setup chip %d, --> %d\n", addr, ret); diff --git a/drivers/platform/chrome/cros_ec_proto.c b/drivers/platform/chrome/cros_ec_proto.c index 398393ab5df8..b6fd4838f60f 100644 --- a/drivers/platform/chrome/cros_ec_proto.c +++ b/drivers/platform/chrome/cros_ec_proto.c @@ -520,7 +520,7 @@ static int get_next_event_xfer(struct cros_ec_device *ec_dev, ret = cros_ec_cmd_xfer(ec_dev, msg); if (ret > 0) { ec_dev->event_size = ret - 1; - memcpy(&ec_dev->event_data, msg->data, ec_dev->event_size); + memcpy(&ec_dev->event_data, msg->data, ret); } return ret; diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c index 01b0e2bb3319..2012551d93e0 100644 --- a/drivers/ptp/ptp_chardev.c +++ b/drivers/ptp/ptp_chardev.c @@ -24,6 +24,8 @@ #include <linux/slab.h> #include <linux/timekeeping.h> +#include <linux/nospec.h> + #include "ptp_private.h" static int ptp_disable_pinfunc(struct ptp_clock_info *ops, @@ -248,6 +250,7 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg) err = -EINVAL; break; } + pin_index = array_index_nospec(pin_index, ops->n_pins); if (mutex_lock_interruptible(&ptp->pincfg_mux)) return -ERESTARTSYS; pd = ops->pin_config[pin_index]; @@ -266,6 +269,7 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg) err = -EINVAL; break; } + pin_index = array_index_nospec(pin_index, ops->n_pins); if (mutex_lock_interruptible(&ptp->pincfg_mux)) return -ERESTARTSYS; err = ptp_set_pinfunc(ptp, pin_index, pd.func, pd.chan); diff --git a/drivers/s390/block/dasd_genhd.c b/drivers/s390/block/dasd_genhd.c index 7036a6c6f86f..5542d9eadfe0 100644 --- a/drivers/s390/block/dasd_genhd.c +++ b/drivers/s390/block/dasd_genhd.c @@ -76,7 +76,7 @@ int dasd_gendisk_alloc(struct dasd_block *block) gdp->queue = block->request_queue; block->gdp = gdp; set_capacity(block->gdp, 0); - device_add_disk(&base->cdev->dev, block->gdp); + device_add_disk(&base->cdev->dev, block->gdp, NULL); return 0; } diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index 23e526cda5c1..4e8aedd50cb0 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -685,7 +685,7 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char } get_device(&dev_info->dev); - device_add_disk(&dev_info->dev, dev_info->gd); + device_add_disk(&dev_info->dev, dev_info->gd, NULL); switch (dev_info->segment_type) { case SEG_TYPE_SR: diff --git a/drivers/s390/block/scm_blk.c b/drivers/s390/block/scm_blk.c index 98f66b7b6794..e01889394c84 100644 --- a/drivers/s390/block/scm_blk.c +++ b/drivers/s390/block/scm_blk.c @@ -500,7 +500,7 @@ int scm_blk_dev_setup(struct scm_blk_dev *bdev, struct scm_device *scmdev) /* 512 byte sectors */ set_capacity(bdev->gendisk, scmdev->size >> 9); - device_add_disk(&scmdev->dev, bdev->gendisk); + device_add_disk(&scmdev->dev, bdev->gendisk, NULL); return 0; out_queue: diff --git a/drivers/s390/char/sclp_early_core.c b/drivers/s390/char/sclp_early_core.c index eceba3858cef..2f61f5579aa5 100644 --- a/drivers/s390/char/sclp_early_core.c +++ b/drivers/s390/char/sclp_early_core.c @@ -210,11 +210,11 @@ static int sclp_early_setup(int disable, int *have_linemode, int *have_vt220) * Output one or more lines of text on the SCLP console (VT220 and / * or line-mode). */ -void __sclp_early_printk(const char *str, unsigned int len) +void __sclp_early_printk(const char *str, unsigned int len, unsigned int force) { int have_linemode, have_vt220; - if (sclp_init_state != sclp_init_state_uninitialized) + if (!force && sclp_init_state != sclp_init_state_uninitialized) return; if (sclp_early_setup(0, &have_linemode, &have_vt220) != 0) return; @@ -227,5 +227,10 @@ void __sclp_early_printk(const char *str, unsigned int len) void sclp_early_printk(const char *str) { - __sclp_early_printk(str, strlen(str)); + __sclp_early_printk(str, strlen(str), 0); +} + +void sclp_early_printk_force(const char *str) +{ + __sclp_early_printk(str, strlen(str), 1); } diff --git a/drivers/s390/cio/vfio_ccw_cp.c b/drivers/s390/cio/vfio_ccw_cp.c index dbe7c7ac9ac8..fd77e46eb3b2 100644 --- a/drivers/s390/cio/vfio_ccw_cp.c +++ b/drivers/s390/cio/vfio_ccw_cp.c @@ -163,7 +163,7 @@ static bool pfn_array_table_iova_pinned(struct pfn_array_table *pat, for (i = 0; i < pat->pat_nr; i++, pa++) for (j = 0; j < pa->pa_nr; j++) - if (pa->pa_iova_pfn[i] == iova_pfn) + if (pa->pa_iova_pfn[j] == iova_pfn) return true; return false; diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c index 770fa9cfc310..f47d16b5810b 100644 --- a/drivers/s390/cio/vfio_ccw_drv.c +++ b/drivers/s390/cio/vfio_ccw_drv.c @@ -22,6 +22,7 @@ #include "vfio_ccw_private.h" struct workqueue_struct *vfio_ccw_work_q; +struct kmem_cache *vfio_ccw_io_region; /* * Helpers @@ -79,7 +80,7 @@ static void vfio_ccw_sch_io_todo(struct work_struct *work) cp_update_scsw(&private->cp, &irb->scsw); cp_free(&private->cp); } - memcpy(private->io_region.irb_area, irb, sizeof(*irb)); + memcpy(private->io_region->irb_area, irb, sizeof(*irb)); if (private->io_trigger) eventfd_signal(private->io_trigger, 1); @@ -114,6 +115,14 @@ static int vfio_ccw_sch_probe(struct subchannel *sch) private = kzalloc(sizeof(*private), GFP_KERNEL | GFP_DMA); if (!private) return -ENOMEM; + + private->io_region = kmem_cache_zalloc(vfio_ccw_io_region, + GFP_KERNEL | GFP_DMA); + if (!private->io_region) { + kfree(private); + return -ENOMEM; + } + private->sch = sch; dev_set_drvdata(&sch->dev, private); @@ -139,6 +148,7 @@ out_disable: cio_disable_subchannel(sch); out_free: dev_set_drvdata(&sch->dev, NULL); + kmem_cache_free(vfio_ccw_io_region, private->io_region); kfree(private); return ret; } @@ -153,6 +163,7 @@ static int vfio_ccw_sch_remove(struct subchannel *sch) dev_set_drvdata(&sch->dev, NULL); + kmem_cache_free(vfio_ccw_io_region, private->io_region); kfree(private); return 0; @@ -232,10 +243,20 @@ static int __init vfio_ccw_sch_init(void) if (!vfio_ccw_work_q) return -ENOMEM; + vfio_ccw_io_region = kmem_cache_create_usercopy("vfio_ccw_io_region", + sizeof(struct ccw_io_region), 0, + SLAB_ACCOUNT, 0, + sizeof(struct ccw_io_region), NULL); + if (!vfio_ccw_io_region) { + destroy_workqueue(vfio_ccw_work_q); + return -ENOMEM; + } + isc_register(VFIO_CCW_ISC); ret = css_driver_register(&vfio_ccw_sch_driver); if (ret) { isc_unregister(VFIO_CCW_ISC); + kmem_cache_destroy(vfio_ccw_io_region); destroy_workqueue(vfio_ccw_work_q); } @@ -246,6 +267,7 @@ static void __exit vfio_ccw_sch_exit(void) { css_driver_unregister(&vfio_ccw_sch_driver); isc_unregister(VFIO_CCW_ISC); + kmem_cache_destroy(vfio_ccw_io_region); destroy_workqueue(vfio_ccw_work_q); } module_init(vfio_ccw_sch_init); diff --git a/drivers/s390/cio/vfio_ccw_fsm.c b/drivers/s390/cio/vfio_ccw_fsm.c index 797a82731159..f94aa01f9c36 100644 --- a/drivers/s390/cio/vfio_ccw_fsm.c +++ b/drivers/s390/cio/vfio_ccw_fsm.c @@ -93,13 +93,13 @@ static void fsm_io_error(struct vfio_ccw_private *private, enum vfio_ccw_event event) { pr_err("vfio-ccw: FSM: I/O request from state:%d\n", private->state); - private->io_region.ret_code = -EIO; + private->io_region->ret_code = -EIO; } static void fsm_io_busy(struct vfio_ccw_private *private, enum vfio_ccw_event event) { - private->io_region.ret_code = -EBUSY; + private->io_region->ret_code = -EBUSY; } static void fsm_disabled_irq(struct vfio_ccw_private *private, @@ -126,7 +126,7 @@ static void fsm_io_request(struct vfio_ccw_private *private, { union orb *orb; union scsw *scsw = &private->scsw; - struct ccw_io_region *io_region = &private->io_region; + struct ccw_io_region *io_region = private->io_region; struct mdev_device *mdev = private->mdev; char *errstr = "request"; diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c index 41eeb57d68a3..f673e106c041 100644 --- a/drivers/s390/cio/vfio_ccw_ops.c +++ b/drivers/s390/cio/vfio_ccw_ops.c @@ -174,7 +174,7 @@ static ssize_t vfio_ccw_mdev_read(struct mdev_device *mdev, return -EINVAL; private = dev_get_drvdata(mdev_parent_dev(mdev)); - region = &private->io_region; + region = private->io_region; if (copy_to_user(buf, (void *)region + *ppos, count)) return -EFAULT; @@ -196,7 +196,7 @@ static ssize_t vfio_ccw_mdev_write(struct mdev_device *mdev, if (private->state != VFIO_CCW_STATE_IDLE) return -EACCES; - region = &private->io_region; + region = private->io_region; if (copy_from_user((void *)region + *ppos, buf, count)) return -EFAULT; diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h index 78a66d96756b..078e46f9623d 100644 --- a/drivers/s390/cio/vfio_ccw_private.h +++ b/drivers/s390/cio/vfio_ccw_private.h @@ -41,7 +41,7 @@ struct vfio_ccw_private { atomic_t avail; struct mdev_device *mdev; struct notifier_block nb; - struct ccw_io_region io_region; + struct ccw_io_region *io_region; struct channel_program cp; struct irb irb; diff --git a/drivers/sbus/char/openprom.c b/drivers/sbus/char/openprom.c index 7b31f19ade83..050879a2ddef 100644 --- a/drivers/sbus/char/openprom.c +++ b/drivers/sbus/char/openprom.c @@ -715,22 +715,13 @@ static struct miscdevice openprom_dev = { static int __init openprom_init(void) { - struct device_node *dp; int err; err = misc_register(&openprom_dev); if (err) return err; - dp = of_find_node_by_path("/"); - dp = dp->child; - while (dp) { - if (!strcmp(dp->name, "options")) - break; - dp = dp->sibling; - } - options_node = dp; - + options_node = of_get_child_by_name(of_find_node_by_path("/"), "options"); if (!options_node) { misc_deregister(&openprom_dev); return -EIO; diff --git a/drivers/sbus/char/oradax.c b/drivers/sbus/char/oradax.c index 524f9ea62e52..6516bc3cb58b 100644 --- a/drivers/sbus/char/oradax.c +++ b/drivers/sbus/char/oradax.c @@ -689,8 +689,7 @@ static int dax_open(struct inode *inode, struct file *f) alloc_error: kfree(ctx->ccb_buf); done: - if (ctx != NULL) - kfree(ctx); + kfree(ctx); return -ENOMEM; } diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index eb97d2dd3651..62348412ed1b 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -3046,11 +3046,14 @@ scsi_device_quiesce(struct scsi_device *sdev) */ WARN_ON_ONCE(sdev->quiesced_by && sdev->quiesced_by != current); - blk_set_preempt_only(q); + if (sdev->quiesced_by == current) + return 0; + + blk_set_pm_only(q); blk_mq_freeze_queue(q); /* - * Ensure that the effect of blk_set_preempt_only() will be visible + * Ensure that the effect of blk_set_pm_only() will be visible * for percpu_ref_tryget() callers that occur after the queue * unfreeze even if the queue was already frozen before this function * was called. See also https://lwn.net/Articles/573497/. @@ -3063,7 +3066,7 @@ scsi_device_quiesce(struct scsi_device *sdev) if (err == 0) sdev->quiesced_by = current; else - blk_clear_preempt_only(q); + blk_clear_pm_only(q); mutex_unlock(&sdev->state_mutex); return err; @@ -3088,7 +3091,7 @@ void scsi_device_resume(struct scsi_device *sdev) mutex_lock(&sdev->state_mutex); WARN_ON_ONCE(!sdev->quiesced_by); sdev->quiesced_by = NULL; - blk_clear_preempt_only(sdev->request_queue); + blk_clear_pm_only(sdev->request_queue); if (sdev->sdev_state == SDEV_QUIESCE) scsi_device_set_state(sdev, SDEV_RUNNING); mutex_unlock(&sdev->state_mutex); diff --git a/drivers/scsi/scsi_pm.c b/drivers/scsi/scsi_pm.c index b44c1bb687a2..a2b4179bfdf7 100644 --- a/drivers/scsi/scsi_pm.c +++ b/drivers/scsi/scsi_pm.c @@ -8,6 +8,7 @@ #include <linux/pm_runtime.h> #include <linux/export.h> #include <linux/async.h> +#include <linux/blk-pm.h> #include <scsi/scsi.h> #include <scsi/scsi_device.h> diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 4a57ffecc7e6..b762d0fd773c 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -45,6 +45,7 @@ #include <linux/init.h> #include <linux/blkdev.h> #include <linux/blkpg.h> +#include <linux/blk-pm.h> #include <linux/delay.h> #include <linux/mutex.h> #include <linux/string_helpers.h> @@ -3275,7 +3276,7 @@ static void sd_probe_async(void *data, async_cookie_t cookie) } blk_pm_runtime_init(sdp->request_queue, dev); - device_add_disk(dev, gd); + device_add_disk(dev, gd, NULL); if (sdkp->capacity) sd_dif_config_host(sdkp); diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c index d0389b20574d..54dd70ae9731 100644 --- a/drivers/scsi/sr.c +++ b/drivers/scsi/sr.c @@ -43,6 +43,7 @@ #include <linux/interrupt.h> #include <linux/init.h> #include <linux/blkdev.h> +#include <linux/blk-pm.h> #include <linux/mutex.h> #include <linux/slab.h> #include <linux/pm_runtime.h> @@ -758,7 +759,7 @@ static int sr_probe(struct device *dev) dev_set_drvdata(dev, cd); disk->flags |= GENHD_FL_REMOVABLE; - device_add_disk(&sdev->sdev_gendev, disk); + device_add_disk(&sdev->sdev_gendev, disk, NULL); sdev_printk(KERN_DEBUG, sdev, "Attached scsi CD-ROM %s\n", cd->cdi.name); diff --git a/drivers/soc/fsl/qbman/bman_ccsr.c b/drivers/soc/fsl/qbman/bman_ccsr.c index 05c42235dd41..7c3cc968053c 100644 --- a/drivers/soc/fsl/qbman/bman_ccsr.c +++ b/drivers/soc/fsl/qbman/bman_ccsr.c @@ -120,6 +120,7 @@ static void bm_set_memory(u64 ba, u32 size) */ static dma_addr_t fbpr_a; static size_t fbpr_sz; +static int __bman_probed; static int bman_fbpr(struct reserved_mem *rmem) { @@ -166,6 +167,12 @@ static irqreturn_t bman_isr(int irq, void *ptr) return IRQ_HANDLED; } +int bman_is_probed(void) +{ + return __bman_probed; +} +EXPORT_SYMBOL_GPL(bman_is_probed); + static int fsl_bman_probe(struct platform_device *pdev) { int ret, err_irq; @@ -175,6 +182,8 @@ static int fsl_bman_probe(struct platform_device *pdev) u16 id, bm_pool_cnt; u8 major, minor; + __bman_probed = -1; + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!res) { dev_err(dev, "Can't get %pOF property 'IORESOURCE_MEM'\n", @@ -255,6 +264,8 @@ static int fsl_bman_probe(struct platform_device *pdev) return ret; } + __bman_probed = 1; + return 0; }; diff --git a/drivers/soc/fsl/qbman/qman_ccsr.c b/drivers/soc/fsl/qbman/qman_ccsr.c index 79cba58387a5..6fd5fef5f39b 100644 --- a/drivers/soc/fsl/qbman/qman_ccsr.c +++ b/drivers/soc/fsl/qbman/qman_ccsr.c @@ -273,6 +273,7 @@ static const struct qman_error_info_mdata error_mdata[] = { static u32 __iomem *qm_ccsr_start; /* A SDQCR mask comprising all the available/visible pool channels */ static u32 qm_pools_sdqcr; +static int __qman_probed; static inline u32 qm_ccsr_in(u32 offset) { @@ -686,6 +687,12 @@ static int qman_resource_init(struct device *dev) return 0; } +int qman_is_probed(void) +{ + return __qman_probed; +} +EXPORT_SYMBOL_GPL(qman_is_probed); + static int fsl_qman_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; @@ -695,6 +702,8 @@ static int fsl_qman_probe(struct platform_device *pdev) u16 id; u8 major, minor; + __qman_probed = -1; + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!res) { dev_err(dev, "Can't get %pOF property 'IORESOURCE_MEM'\n", @@ -828,6 +837,8 @@ static int fsl_qman_probe(struct platform_device *pdev) if (ret) return ret; + __qman_probed = 1; + return 0; } diff --git a/drivers/soc/fsl/qbman/qman_portal.c b/drivers/soc/fsl/qbman/qman_portal.c index a120002b630e..3e9391d117c5 100644 --- a/drivers/soc/fsl/qbman/qman_portal.c +++ b/drivers/soc/fsl/qbman/qman_portal.c @@ -227,6 +227,14 @@ static int qman_portal_probe(struct platform_device *pdev) int irq, cpu, err; u32 val; + err = qman_is_probed(); + if (!err) + return -EPROBE_DEFER; + if (err < 0) { + dev_err(&pdev->dev, "failing probe due to qman probe error\n"); + return -ENODEV; + } + pcfg = devm_kmalloc(dev, sizeof(*pcfg), GFP_KERNEL); if (!pcfg) return -ENOMEM; diff --git a/drivers/target/target_core_spc.c b/drivers/target/target_core_spc.c index cb0461a10808..f459118bc11b 100644 --- a/drivers/target/target_core_spc.c +++ b/drivers/target/target_core_spc.c @@ -636,9 +636,9 @@ spc_emulate_evpd_b2(struct se_cmd *cmd, unsigned char *buf) /* * The unmap_zeroes_data set means that the underlying device supports - * REQ_DISCARD and has the discard_zeroes_data bit set. This satisfies - * the SBC requirements for LBPRZ, meaning that a subsequent read - * will return zeroes after an UNMAP or WRITE SAME (16) to an LBA + * REQ_OP_DISCARD and has the discard_zeroes_data bit set. This + * satisfies the SBC requirements for LBPRZ, meaning that a subsequent + * read will return zeroes after an UNMAP or WRITE SAME (16) to an LBA * See sbc4r36 6.6.4. */ if (((dev->dev_attrib.emulate_tpu != 0) || diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c index 29ec34387246..1515074e18fb 100644 --- a/drivers/tty/serial/qcom_geni_serial.c +++ b/drivers/tty/serial/qcom_geni_serial.c @@ -868,8 +868,8 @@ static int qcom_geni_serial_port_setup(struct uart_port *uport) geni_se_init(&port->se, port->rx_wm, port->rx_rfr); geni_se_select_mode(&port->se, port->xfer_mode); if (!uart_console(uport)) { - port->rx_fifo = devm_kzalloc(uport->dev, - port->rx_fifo_depth * sizeof(u32), GFP_KERNEL); + port->rx_fifo = devm_kcalloc(uport->dev, + port->rx_fifo_depth, sizeof(u32), GFP_KERNEL); if (!port->rx_fifo) return -ENOMEM; } diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index bc03b0a690b4..9ede35cecb12 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -310,17 +310,17 @@ static void acm_process_notification(struct acm *acm, unsigned char *buf) if (difference & ACM_CTRL_DSR) acm->iocount.dsr++; - if (difference & ACM_CTRL_BRK) - acm->iocount.brk++; - if (difference & ACM_CTRL_RI) - acm->iocount.rng++; if (difference & ACM_CTRL_DCD) acm->iocount.dcd++; - if (difference & ACM_CTRL_FRAMING) + if (newctrl & ACM_CTRL_BRK) + acm->iocount.brk++; + if (newctrl & ACM_CTRL_RI) + acm->iocount.rng++; + if (newctrl & ACM_CTRL_FRAMING) acm->iocount.frame++; - if (difference & ACM_CTRL_PARITY) + if (newctrl & ACM_CTRL_PARITY) acm->iocount.parity++; - if (difference & ACM_CTRL_OVERRUN) + if (newctrl & ACM_CTRL_OVERRUN) acm->iocount.overrun++; spin_unlock_irqrestore(&acm->read_lock, flags); @@ -355,7 +355,6 @@ static void acm_ctrl_irq(struct urb *urb) case -ENOENT: case -ESHUTDOWN: /* this urb is terminated, clean up */ - acm->nb_index = 0; dev_dbg(&acm->control->dev, "%s - urb shutting down with status: %d\n", __func__, status); @@ -1642,6 +1641,7 @@ static int acm_pre_reset(struct usb_interface *intf) struct acm *acm = usb_get_intfdata(intf); clear_bit(EVENT_RX_STALL, &acm->flags); + acm->nb_index = 0; /* pending control transfers are lost */ return 0; } diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index 244417d0dfd1..ffccd40ea67d 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -1474,8 +1474,6 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb u = 0; switch (uurb->type) { case USBDEVFS_URB_TYPE_CONTROL: - if (is_in) - allow_short = true; if (!usb_endpoint_xfer_control(&ep->desc)) return -EINVAL; /* min 8 byte setup packet */ @@ -1505,6 +1503,8 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb is_in = 0; uurb->endpoint &= ~USB_DIR_IN; } + if (is_in) + allow_short = true; snoop(&ps->dev->dev, "control urb: bRequestType=%02x " "bRequest=%02x wValue=%04x " "wIndex=%04x wLength=%04x\n", diff --git a/drivers/usb/gadget/function/f_mass_storage.c b/drivers/usb/gadget/function/f_mass_storage.c index ca8a4b53c59f..1074cb82ec17 100644 --- a/drivers/usb/gadget/function/f_mass_storage.c +++ b/drivers/usb/gadget/function/f_mass_storage.c @@ -221,6 +221,8 @@ #include <linux/usb/gadget.h> #include <linux/usb/composite.h> +#include <linux/nospec.h> + #include "configfs.h" @@ -3152,6 +3154,7 @@ static struct config_group *fsg_lun_make(struct config_group *group, fsg_opts = to_fsg_opts(&group->cg_item); if (num >= FSG_MAX_LUNS) return ERR_PTR(-ERANGE); + num = array_index_nospec(num, FSG_MAX_LUNS); mutex_lock(&fsg_opts->lock); if (fsg_opts->refcnt || fsg_opts->common->luns[num]) { diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 722860eb5a91..51dd8e00c4f8 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -179,10 +179,12 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) xhci->quirks |= XHCI_PME_STUCK_QUIRK; } if (pdev->vendor == PCI_VENDOR_ID_INTEL && - pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI) { + pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI) xhci->quirks |= XHCI_SSIC_PORT_UNUSED; + if (pdev->vendor == PCI_VENDOR_ID_INTEL && + (pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI || + pdev->device == PCI_DEVICE_ID_INTEL_APL_XHCI)) xhci->quirks |= XHCI_INTEL_USB_ROLE_SW; - } if (pdev->vendor == PCI_VENDOR_ID_INTEL && (pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI || pdev->device == PCI_DEVICE_ID_INTEL_SUNRISEPOINT_LP_XHCI || diff --git a/drivers/usb/roles/intel-xhci-usb-role-switch.c b/drivers/usb/roles/intel-xhci-usb-role-switch.c index 1fb3dd0f1dfa..277de96181f9 100644 --- a/drivers/usb/roles/intel-xhci-usb-role-switch.c +++ b/drivers/usb/roles/intel-xhci-usb-role-switch.c @@ -161,6 +161,8 @@ static int intel_xhci_usb_remove(struct platform_device *pdev) { struct intel_xhci_usb_data *data = platform_get_drvdata(pdev); + pm_runtime_disable(&pdev->dev); + usb_role_switch_unregister(data->role_sw); return 0; } diff --git a/drivers/usb/usbip/vhci_hcd.c b/drivers/usb/usbip/vhci_hcd.c index d11f3f8dad40..1e592ec94ba4 100644 --- a/drivers/usb/usbip/vhci_hcd.c +++ b/drivers/usb/usbip/vhci_hcd.c @@ -318,8 +318,9 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, struct vhci_hcd *vhci_hcd; struct vhci *vhci; int retval = 0; - int rhport; + int rhport = -1; unsigned long flags; + bool invalid_rhport = false; u32 prev_port_status[VHCI_HC_PORTS]; @@ -334,9 +335,19 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, usbip_dbg_vhci_rh("typeReq %x wValue %x wIndex %x\n", typeReq, wValue, wIndex); - if (wIndex > VHCI_HC_PORTS) - pr_err("invalid port number %d\n", wIndex); - rhport = wIndex - 1; + /* + * wIndex can be 0 for some request types (typeReq). rhport is + * in valid range when wIndex >= 1 and < VHCI_HC_PORTS. + * + * Reference port_status[] only with valid rhport when + * invalid_rhport is false. + */ + if (wIndex < 1 || wIndex > VHCI_HC_PORTS) { + invalid_rhport = true; + if (wIndex > VHCI_HC_PORTS) + pr_err("invalid port number %d\n", wIndex); + } else + rhport = wIndex - 1; vhci_hcd = hcd_to_vhci_hcd(hcd); vhci = vhci_hcd->vhci; @@ -345,8 +356,9 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, /* store old status and compare now and old later */ if (usbip_dbg_flag_vhci_rh) { - memcpy(prev_port_status, vhci_hcd->port_status, - sizeof(prev_port_status)); + if (!invalid_rhport) + memcpy(prev_port_status, vhci_hcd->port_status, + sizeof(prev_port_status)); } switch (typeReq) { @@ -354,8 +366,10 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, usbip_dbg_vhci_rh(" ClearHubFeature\n"); break; case ClearPortFeature: - if (rhport < 0) + if (invalid_rhport) { + pr_err("invalid port number %d\n", wIndex); goto error; + } switch (wValue) { case USB_PORT_FEAT_SUSPEND: if (hcd->speed == HCD_USB3) { @@ -415,9 +429,10 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, break; case GetPortStatus: usbip_dbg_vhci_rh(" GetPortStatus port %x\n", wIndex); - if (wIndex < 1) { + if (invalid_rhport) { pr_err("invalid port number %d\n", wIndex); retval = -EPIPE; + goto error; } /* we do not care about resume. */ @@ -513,16 +528,20 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, goto error; } - if (rhport < 0) + if (invalid_rhport) { + pr_err("invalid port number %d\n", wIndex); goto error; + } vhci_hcd->port_status[rhport] |= USB_PORT_STAT_SUSPEND; break; case USB_PORT_FEAT_POWER: usbip_dbg_vhci_rh( " SetPortFeature: USB_PORT_FEAT_POWER\n"); - if (rhport < 0) + if (invalid_rhport) { + pr_err("invalid port number %d\n", wIndex); goto error; + } if (hcd->speed == HCD_USB3) vhci_hcd->port_status[rhport] |= USB_SS_PORT_STAT_POWER; else @@ -531,8 +550,10 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, case USB_PORT_FEAT_BH_PORT_RESET: usbip_dbg_vhci_rh( " SetPortFeature: USB_PORT_FEAT_BH_PORT_RESET\n"); - if (rhport < 0) + if (invalid_rhport) { + pr_err("invalid port number %d\n", wIndex); goto error; + } /* Applicable only for USB3.0 hub */ if (hcd->speed != HCD_USB3) { pr_err("USB_PORT_FEAT_BH_PORT_RESET req not " @@ -543,8 +564,10 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, case USB_PORT_FEAT_RESET: usbip_dbg_vhci_rh( " SetPortFeature: USB_PORT_FEAT_RESET\n"); - if (rhport < 0) + if (invalid_rhport) { + pr_err("invalid port number %d\n", wIndex); goto error; + } /* if it's already enabled, disable */ if (hcd->speed == HCD_USB3) { vhci_hcd->port_status[rhport] = 0; @@ -565,8 +588,10 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, default: usbip_dbg_vhci_rh(" SetPortFeature: default %d\n", wValue); - if (rhport < 0) + if (invalid_rhport) { + pr_err("invalid port number %d\n", wIndex); goto error; + } if (hcd->speed == HCD_USB3) { if ((vhci_hcd->port_status[rhport] & USB_SS_PORT_STAT_POWER) != 0) { @@ -608,7 +633,7 @@ error: if (usbip_dbg_flag_vhci_rh) { pr_debug("port %d\n", rhport); /* Only dump valid port status */ - if (rhport >= 0) { + if (!invalid_rhport) { dump_port_status_diff(prev_port_status[rhport], vhci_hcd->port_status[rhport], hcd->speed == HCD_USB3); @@ -618,8 +643,10 @@ error: spin_unlock_irqrestore(&vhci->lock, flags); - if ((vhci_hcd->port_status[rhport] & PORT_C_MASK) != 0) + if (!invalid_rhport && + (vhci_hcd->port_status[rhport] & PORT_C_MASK) != 0) { usb_hcd_poll_rh_status(hcd); + } return retval; } diff --git a/drivers/video/fbdev/aty/atyfb.h b/drivers/video/fbdev/aty/atyfb.h index 8235b285dbb2..d09bab3bf224 100644 --- a/drivers/video/fbdev/aty/atyfb.h +++ b/drivers/video/fbdev/aty/atyfb.h @@ -333,6 +333,8 @@ extern const struct aty_pll_ops aty_pll_ct; /* Integrated */ extern void aty_set_pll_ct(const struct fb_info *info, const union aty_pll *pll); extern u8 aty_ld_pll_ct(int offset, const struct atyfb_par *par); +extern const u8 aty_postdividers[8]; + /* * Hardware cursor support @@ -359,7 +361,6 @@ static inline void wait_for_idle(struct atyfb_par *par) extern void aty_reset_engine(const struct atyfb_par *par); extern void aty_init_engine(struct atyfb_par *par, struct fb_info *info); -extern u8 aty_ld_pll_ct(int offset, const struct atyfb_par *par); void atyfb_copyarea(struct fb_info *info, const struct fb_copyarea *area); void atyfb_fillrect(struct fb_info *info, const struct fb_fillrect *rect); diff --git a/drivers/video/fbdev/aty/atyfb_base.c b/drivers/video/fbdev/aty/atyfb_base.c index a9a8272f7a6e..05111e90f168 100644 --- a/drivers/video/fbdev/aty/atyfb_base.c +++ b/drivers/video/fbdev/aty/atyfb_base.c @@ -3087,17 +3087,18 @@ static int atyfb_setup_sparc(struct pci_dev *pdev, struct fb_info *info, /* * PLL Reference Divider M: */ - M = pll_regs[2]; + M = pll_regs[PLL_REF_DIV]; /* * PLL Feedback Divider N (Dependent on CLOCK_CNTL): */ - N = pll_regs[7 + (clock_cntl & 3)]; + N = pll_regs[VCLK0_FB_DIV + (clock_cntl & 3)]; /* * PLL Post Divider P (Dependent on CLOCK_CNTL): */ - P = 1 << (pll_regs[6] >> ((clock_cntl & 3) << 1)); + P = aty_postdividers[((pll_regs[VCLK_POST_DIV] >> ((clock_cntl & 3) << 1)) & 3) | + ((pll_regs[PLL_EXT_CNTL] >> (2 + (clock_cntl & 3))) & 4)]; /* * PLL Divider Q: diff --git a/drivers/video/fbdev/aty/mach64_ct.c b/drivers/video/fbdev/aty/mach64_ct.c index 74a62aa193c0..f87cc81f4fa2 100644 --- a/drivers/video/fbdev/aty/mach64_ct.c +++ b/drivers/video/fbdev/aty/mach64_ct.c @@ -115,7 +115,7 @@ static void aty_st_pll_ct(int offset, u8 val, const struct atyfb_par *par) */ #define Maximum_DSP_PRECISION 7 -static u8 postdividers[] = {1,2,4,8,3}; +const u8 aty_postdividers[8] = {1,2,4,8,3,5,6,12}; static int aty_dsp_gt(const struct fb_info *info, u32 bpp, struct pll_ct *pll) { @@ -222,7 +222,7 @@ static int aty_valid_pll_ct(const struct fb_info *info, u32 vclk_per, struct pll pll->vclk_post_div += (q < 64*8); pll->vclk_post_div += (q < 32*8); } - pll->vclk_post_div_real = postdividers[pll->vclk_post_div]; + pll->vclk_post_div_real = aty_postdividers[pll->vclk_post_div]; // pll->vclk_post_div <<= 6; pll->vclk_fb_div = q * pll->vclk_post_div_real / 8; pllvclk = (1000000 * 2 * pll->vclk_fb_div) / @@ -513,7 +513,7 @@ static int aty_init_pll_ct(const struct fb_info *info, union aty_pll *pll) u8 mclk_fb_div, pll_ext_cntl; pll->ct.pll_ref_div = aty_ld_pll_ct(PLL_REF_DIV, par); pll_ext_cntl = aty_ld_pll_ct(PLL_EXT_CNTL, par); - pll->ct.xclk_post_div_real = postdividers[pll_ext_cntl & 0x07]; + pll->ct.xclk_post_div_real = aty_postdividers[pll_ext_cntl & 0x07]; mclk_fb_div = aty_ld_pll_ct(MCLK_FB_DIV, par); if (pll_ext_cntl & PLL_MFB_TIMES_4_2B) mclk_fb_div <<= 1; @@ -535,7 +535,7 @@ static int aty_init_pll_ct(const struct fb_info *info, union aty_pll *pll) xpost_div += (q < 64*8); xpost_div += (q < 32*8); } - pll->ct.xclk_post_div_real = postdividers[xpost_div]; + pll->ct.xclk_post_div_real = aty_postdividers[xpost_div]; pll->ct.mclk_fb_div = q * pll->ct.xclk_post_div_real / 8; #ifdef CONFIG_PPC @@ -584,7 +584,7 @@ static int aty_init_pll_ct(const struct fb_info *info, union aty_pll *pll) mpost_div += (q < 64*8); mpost_div += (q < 32*8); } - sclk_post_div_real = postdividers[mpost_div]; + sclk_post_div_real = aty_postdividers[mpost_div]; pll->ct.sclk_fb_div = q * sclk_post_div_real / 8; pll->ct.spll_cntl2 = mpost_div << 4; #ifdef DEBUG diff --git a/drivers/xen/biomerge.c b/drivers/xen/biomerge.c index 55ed80c3a17c..f3fbb700f569 100644 --- a/drivers/xen/biomerge.c +++ b/drivers/xen/biomerge.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/bio.h> -#include <linux/io.h> #include <linux/export.h> +#include <xen/xen.h> #include <xen/page.h> bool xen_biovec_phys_mergeable(const struct bio_vec *vec1, @@ -20,4 +20,3 @@ bool xen_biovec_phys_mergeable(const struct bio_vec *vec1, return false; #endif } -EXPORT_SYMBOL(xen_biovec_phys_mergeable); diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index a6f9ba85dc4b..28819a0e61d0 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -662,7 +662,7 @@ xen_swiotlb_dma_mmap(struct device *dev, struct vm_area_struct *vma, return xen_get_dma_ops(dev)->mmap(dev, vma, cpu_addr, dma_addr, size, attrs); #endif - return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size); + return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size, attrs); } /* @@ -689,7 +689,7 @@ xen_swiotlb_get_sgtable(struct device *dev, struct sg_table *sgt, handle, size, attrs); } #endif - return dma_common_get_sgtable(dev, sgt, cpu_addr, handle, size); + return dma_common_get_sgtable(dev, sgt, cpu_addr, handle, size, attrs); } static int xen_swiotlb_mapping_error(struct device *dev, dma_addr_t dma_addr) diff --git a/drivers/xen/xen-acpi-pad.c b/drivers/xen/xen-acpi-pad.c index 23d1808fe027..e25ab76b9c99 100644 --- a/drivers/xen/xen-acpi-pad.c +++ b/drivers/xen/xen-acpi-pad.c @@ -19,6 +19,7 @@ #include <linux/kernel.h> #include <linux/types.h> #include <linux/acpi.h> +#include <xen/xen.h> #include <xen/interface/version.h> #include <xen/xen-ops.h> #include <asm/xen/hypercall.h> diff --git a/fs/afs/cell.c b/fs/afs/cell.c index f3d0bef16d78..6127f0fcd62c 100644 --- a/fs/afs/cell.c +++ b/fs/afs/cell.c @@ -514,6 +514,8 @@ static int afs_alloc_anon_key(struct afs_cell *cell) */ static int afs_activate_cell(struct afs_net *net, struct afs_cell *cell) { + struct hlist_node **p; + struct afs_cell *pcell; int ret; if (!cell->anonymous_key) { @@ -534,7 +536,18 @@ static int afs_activate_cell(struct afs_net *net, struct afs_cell *cell) return ret; mutex_lock(&net->proc_cells_lock); - list_add_tail(&cell->proc_link, &net->proc_cells); + for (p = &net->proc_cells.first; *p; p = &(*p)->next) { + pcell = hlist_entry(*p, struct afs_cell, proc_link); + if (strcmp(cell->name, pcell->name) < 0) + break; + } + + cell->proc_link.pprev = p; + cell->proc_link.next = *p; + rcu_assign_pointer(*p, &cell->proc_link.next); + if (cell->proc_link.next) + cell->proc_link.next->pprev = &cell->proc_link.next; + afs_dynroot_mkdir(net, cell); mutex_unlock(&net->proc_cells_lock); return 0; @@ -550,7 +563,7 @@ static void afs_deactivate_cell(struct afs_net *net, struct afs_cell *cell) afs_proc_cell_remove(cell); mutex_lock(&net->proc_cells_lock); - list_del_init(&cell->proc_link); + hlist_del_rcu(&cell->proc_link); afs_dynroot_rmdir(net, cell); mutex_unlock(&net->proc_cells_lock); diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c index 1cde710a8013..f29c6dade7f6 100644 --- a/fs/afs/dynroot.c +++ b/fs/afs/dynroot.c @@ -265,7 +265,7 @@ int afs_dynroot_populate(struct super_block *sb) return -ERESTARTSYS; net->dynroot_sb = sb; - list_for_each_entry(cell, &net->proc_cells, proc_link) { + hlist_for_each_entry(cell, &net->proc_cells, proc_link) { ret = afs_dynroot_mkdir(net, cell); if (ret < 0) goto error; diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 871a228d7f37..34c02fdcc25f 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -242,7 +242,7 @@ struct afs_net { seqlock_t cells_lock; struct mutex proc_cells_lock; - struct list_head proc_cells; + struct hlist_head proc_cells; /* Known servers. Theoretically each fileserver can only be in one * cell, but in practice, people create aliases and subsets and there's @@ -320,7 +320,7 @@ struct afs_cell { struct afs_net *net; struct key *anonymous_key; /* anonymous user key for this cell */ struct work_struct manager; /* Manager for init/deinit/dns */ - struct list_head proc_link; /* /proc cell list link */ + struct hlist_node proc_link; /* /proc cell list link */ #ifdef CONFIG_AFS_FSCACHE struct fscache_cookie *cache; /* caching cookie */ #endif diff --git a/fs/afs/main.c b/fs/afs/main.c index e84fe822a960..107427688edd 100644 --- a/fs/afs/main.c +++ b/fs/afs/main.c @@ -87,7 +87,7 @@ static int __net_init afs_net_init(struct net *net_ns) timer_setup(&net->cells_timer, afs_cells_timer, 0); mutex_init(&net->proc_cells_lock); - INIT_LIST_HEAD(&net->proc_cells); + INIT_HLIST_HEAD(&net->proc_cells); seqlock_init(&net->fs_lock); net->fs_servers = RB_ROOT; diff --git a/fs/afs/proc.c b/fs/afs/proc.c index 476dcbb79713..9101f62707af 100644 --- a/fs/afs/proc.c +++ b/fs/afs/proc.c @@ -33,9 +33,8 @@ static inline struct afs_net *afs_seq2net_single(struct seq_file *m) static int afs_proc_cells_show(struct seq_file *m, void *v) { struct afs_cell *cell = list_entry(v, struct afs_cell, proc_link); - struct afs_net *net = afs_seq2net(m); - if (v == &net->proc_cells) { + if (v == SEQ_START_TOKEN) { /* display header on line 1 */ seq_puts(m, "USE NAME\n"); return 0; @@ -50,12 +49,12 @@ static void *afs_proc_cells_start(struct seq_file *m, loff_t *_pos) __acquires(rcu) { rcu_read_lock(); - return seq_list_start_head(&afs_seq2net(m)->proc_cells, *_pos); + return seq_hlist_start_head_rcu(&afs_seq2net(m)->proc_cells, *_pos); } static void *afs_proc_cells_next(struct seq_file *m, void *v, loff_t *pos) { - return seq_list_next(v, &afs_seq2net(m)->proc_cells, pos); + return seq_hlist_next_rcu(v, &afs_seq2net(m)->proc_cells, pos); } static void afs_proc_cells_stop(struct seq_file *m, void *v) diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 35f2ae30f31f..77a83790a31f 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -690,8 +690,6 @@ static void afs_process_async_call(struct work_struct *work) } if (call->state == AFS_CALL_COMPLETE) { - call->reply[0] = NULL; - /* We have two refs to release - one from the alloc and one * queued with the work item - and we can't just deallocate the * call because the work item may be queued again. diff --git a/fs/buffer.c b/fs/buffer.c index 6f1ae3ac9789..109f55196866 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -3060,11 +3060,6 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh, */ bio = bio_alloc(GFP_NOIO, 1); - if (wbc) { - wbc_init_bio(wbc, bio); - wbc_account_io(wbc, bh->b_page, bh->b_size); - } - bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9); bio_set_dev(bio, bh->b_bdev); bio->bi_write_hint = write_hint; @@ -3084,6 +3079,11 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh, op_flags |= REQ_PRIO; bio_set_op_attrs(bio, op, op_flags); + if (wbc) { + wbc_init_bio(wbc, bio); + wbc_account_io(wbc, bh->b_page, bh->b_size); + } + submit_bio(bio); return 0; } diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index af2b17b21b94..95983c744164 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -343,7 +343,7 @@ try_again: trap = lock_rename(cache->graveyard, dir); /* do some checks before getting the grave dentry */ - if (rep->d_parent != dir) { + if (rep->d_parent != dir || IS_DEADDIR(d_inode(rep))) { /* the entry was probably culled when we dropped the parent dir * lock */ unlock_rename(cache->graveyard, dir); @@ -666,6 +666,8 @@ struct page *dax_layout_busy_page(struct address_space *mapping) while (index < end && pagevec_lookup_entries(&pvec, mapping, index, min(end - index, (pgoff_t)PAGEVEC_SIZE), indices)) { + pgoff_t nr_pages = 1; + for (i = 0; i < pagevec_count(&pvec); i++) { struct page *pvec_ent = pvec.pages[i]; void *entry; @@ -680,8 +682,15 @@ struct page *dax_layout_busy_page(struct address_space *mapping) xa_lock_irq(&mapping->i_pages); entry = get_unlocked_mapping_entry(mapping, index, NULL); - if (entry) + if (entry) { page = dax_busy_page(entry); + /* + * Account for multi-order entries at + * the end of the pagevec. + */ + if (i + 1 >= pagevec_count(&pvec)) + nr_pages = 1UL << dax_radix_order(entry); + } put_unlocked_mapping_entry(mapping, index, entry); xa_unlock_irq(&mapping->i_pages); if (page) @@ -696,7 +705,7 @@ struct page *dax_layout_busy_page(struct address_space *mapping) */ pagevec_remove_exceptionals(&pvec); pagevec_release(&pvec); - index++; + index += nr_pages; if (page) break; diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index db7590178dfc..2aa62d58d8dd 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -374,13 +374,13 @@ static int io_submit_init_bio(struct ext4_io_submit *io, bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES); if (!bio) return -ENOMEM; - wbc_init_bio(io->io_wbc, bio); bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9); bio_set_dev(bio, bh->b_bdev); bio->bi_end_io = ext4_end_bio; bio->bi_private = ext4_get_io_end(io->io_end); io->io_bio = bio; io->io_next_block = bh->b_blocknr; + wbc_init_bio(io->io_wbc, bio); return 0; } diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c index defc2168de91..f58c0cacc531 100644 --- a/fs/fat/fatent.c +++ b/fs/fat/fatent.c @@ -682,6 +682,7 @@ int fat_count_free_clusters(struct super_block *sb) if (ops->ent_get(&fatent) == FAT_ENT_FREE) free++; } while (fat_ent_next(sbi, &fatent)); + cond_resched(); } sbi->free_clusters = free; sbi->free_clus_valid = 1; diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c index 83bfe04456b6..c550512ce335 100644 --- a/fs/fscache/cookie.c +++ b/fs/fscache/cookie.c @@ -70,20 +70,7 @@ void fscache_free_cookie(struct fscache_cookie *cookie) } /* - * initialise an cookie jar slab element prior to any use - */ -void fscache_cookie_init_once(void *_cookie) -{ - struct fscache_cookie *cookie = _cookie; - - memset(cookie, 0, sizeof(*cookie)); - spin_lock_init(&cookie->lock); - spin_lock_init(&cookie->stores_lock); - INIT_HLIST_HEAD(&cookie->backing_objects); -} - -/* - * Set the index key in a cookie. The cookie struct has space for a 12-byte + * Set the index key in a cookie. The cookie struct has space for a 16-byte * key plus length and hash, but if that's not big enough, it's instead a * pointer to a buffer containing 3 bytes of hash, 1 byte of length and then * the key data. @@ -93,20 +80,18 @@ static int fscache_set_key(struct fscache_cookie *cookie, { unsigned long long h; u32 *buf; + int bufs; int i; - cookie->key_len = index_key_len; + bufs = DIV_ROUND_UP(index_key_len, sizeof(*buf)); if (index_key_len > sizeof(cookie->inline_key)) { - buf = kzalloc(index_key_len, GFP_KERNEL); + buf = kcalloc(bufs, sizeof(*buf), GFP_KERNEL); if (!buf) return -ENOMEM; cookie->key = buf; } else { buf = (u32 *)cookie->inline_key; - buf[0] = 0; - buf[1] = 0; - buf[2] = 0; } memcpy(buf, index_key, index_key_len); @@ -116,7 +101,8 @@ static int fscache_set_key(struct fscache_cookie *cookie, */ h = (unsigned long)cookie->parent; h += index_key_len + cookie->type; - for (i = 0; i < (index_key_len + sizeof(u32) - 1) / sizeof(u32); i++) + + for (i = 0; i < bufs; i++) h += buf[i]; cookie->key_hash = h ^ (h >> 32); @@ -161,7 +147,7 @@ struct fscache_cookie *fscache_alloc_cookie( struct fscache_cookie *cookie; /* allocate and initialise a cookie */ - cookie = kmem_cache_alloc(fscache_cookie_jar, GFP_KERNEL); + cookie = kmem_cache_zalloc(fscache_cookie_jar, GFP_KERNEL); if (!cookie) return NULL; @@ -192,6 +178,9 @@ struct fscache_cookie *fscache_alloc_cookie( cookie->netfs_data = netfs_data; cookie->flags = (1 << FSCACHE_COOKIE_NO_DATA_YET); cookie->type = def->type; + spin_lock_init(&cookie->lock); + spin_lock_init(&cookie->stores_lock); + INIT_HLIST_HEAD(&cookie->backing_objects); /* radix tree insertion won't use the preallocation pool unless it's * told it may not wait */ diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index f83328a7f048..d6209022e965 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h @@ -51,7 +51,6 @@ extern struct fscache_cache *fscache_select_cache_for_object( extern struct kmem_cache *fscache_cookie_jar; extern void fscache_free_cookie(struct fscache_cookie *); -extern void fscache_cookie_init_once(void *); extern struct fscache_cookie *fscache_alloc_cookie(struct fscache_cookie *, const struct fscache_cookie_def *, const void *, size_t, diff --git a/fs/fscache/main.c b/fs/fscache/main.c index 7dce110bf17d..30ad89db1efc 100644 --- a/fs/fscache/main.c +++ b/fs/fscache/main.c @@ -143,9 +143,7 @@ static int __init fscache_init(void) fscache_cookie_jar = kmem_cache_create("fscache_cookie_jar", sizeof(struct fscache_cookie), - 0, - 0, - fscache_cookie_init_once); + 0, 0, NULL); if (!fscache_cookie_jar) { pr_notice("Failed to allocate a cookie jar\n"); ret = -ENOMEM; diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 03128ed1f34e..84544a4f012d 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -1057,7 +1057,7 @@ static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos, } } release_metapath(&mp); - if (gfs2_is_jdata(ip)) + if (!gfs2_is_stuffed(ip) && gfs2_is_jdata(ip)) iomap->page_done = gfs2_iomap_journaled_page_done; return 0; diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 8e712b614e6e..933aac5da193 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -96,7 +96,9 @@ struct ocfs2_unblock_ctl { }; /* Lockdep class keys */ +#ifdef CONFIG_DEBUG_LOCK_ALLOC static struct lock_class_key lockdep_keys[OCFS2_NUM_LOCK_TYPES]; +#endif static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres, int new_level); diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index bf000c8aeffb..fec62e9dfbe6 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -2337,8 +2337,8 @@ late_initcall(ubifs_init); static void __exit ubifs_exit(void) { - WARN_ON(list_empty(&ubifs_infos)); - WARN_ON(atomic_long_read(&ubifs_clean_zn_cnt) == 0); + WARN_ON(!list_empty(&ubifs_infos)); + WARN_ON(atomic_long_read(&ubifs_clean_zn_cnt) != 0); dbg_debugfs_exit(); ubifs_compressors_exit(); diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 5289e22cb081..42ea7bab9144 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -1220,35 +1220,92 @@ retry: return 0; } +/* Unlock both inodes after they've been prepped for a range clone. */ +STATIC void +xfs_reflink_remap_unlock( + struct file *file_in, + struct file *file_out) +{ + struct inode *inode_in = file_inode(file_in); + struct xfs_inode *src = XFS_I(inode_in); + struct inode *inode_out = file_inode(file_out); + struct xfs_inode *dest = XFS_I(inode_out); + bool same_inode = (inode_in == inode_out); + + xfs_iunlock(dest, XFS_MMAPLOCK_EXCL); + if (!same_inode) + xfs_iunlock(src, XFS_MMAPLOCK_SHARED); + inode_unlock(inode_out); + if (!same_inode) + inode_unlock_shared(inode_in); +} + /* - * Link a range of blocks from one file to another. + * If we're reflinking to a point past the destination file's EOF, we must + * zero any speculative post-EOF preallocations that sit between the old EOF + * and the destination file offset. */ -int -xfs_reflink_remap_range( +static int +xfs_reflink_zero_posteof( + struct xfs_inode *ip, + loff_t pos) +{ + loff_t isize = i_size_read(VFS_I(ip)); + + if (pos <= isize) + return 0; + + trace_xfs_zero_eof(ip, isize, pos - isize); + return iomap_zero_range(VFS_I(ip), isize, pos - isize, NULL, + &xfs_iomap_ops); +} + +/* + * Prepare two files for range cloning. Upon a successful return both inodes + * will have the iolock and mmaplock held, the page cache of the out file will + * be truncated, and any leases on the out file will have been broken. This + * function borrows heavily from xfs_file_aio_write_checks. + * + * The VFS allows partial EOF blocks to "match" for dedupe even though it hasn't + * checked that the bytes beyond EOF physically match. Hence we cannot use the + * EOF block in the source dedupe range because it's not a complete block match, + * hence can introduce a corruption into the file that has it's block replaced. + * + * In similar fashion, the VFS file cloning also allows partial EOF blocks to be + * "block aligned" for the purposes of cloning entire files. However, if the + * source file range includes the EOF block and it lands within the existing EOF + * of the destination file, then we can expose stale data from beyond the source + * file EOF in the destination file. + * + * XFS doesn't support partial block sharing, so in both cases we have check + * these cases ourselves. For dedupe, we can simply round the length to dedupe + * down to the previous whole block and ignore the partial EOF block. While this + * means we can't dedupe the last block of a file, this is an acceptible + * tradeoff for simplicity on implementation. + * + * For cloning, we want to share the partial EOF block if it is also the new EOF + * block of the destination file. If the partial EOF block lies inside the + * existing destination EOF, then we have to abort the clone to avoid exposing + * stale data in the destination file. Hence we reject these clone attempts with + * -EINVAL in this case. + */ +STATIC int +xfs_reflink_remap_prep( struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, - u64 len, + u64 *len, bool is_dedupe) { struct inode *inode_in = file_inode(file_in); struct xfs_inode *src = XFS_I(inode_in); struct inode *inode_out = file_inode(file_out); struct xfs_inode *dest = XFS_I(inode_out); - struct xfs_mount *mp = src->i_mount; bool same_inode = (inode_in == inode_out); - xfs_fileoff_t sfsbno, dfsbno; - xfs_filblks_t fsblen; - xfs_extlen_t cowextsize; + u64 blkmask = i_blocksize(inode_in) - 1; ssize_t ret; - if (!xfs_sb_version_hasreflink(&mp->m_sb)) - return -EOPNOTSUPP; - - if (XFS_FORCED_SHUTDOWN(mp)) - return -EIO; - /* Lock both files against IO */ ret = xfs_iolock_two_inodes_and_break_layout(inode_in, inode_out); if (ret) @@ -1270,33 +1327,115 @@ xfs_reflink_remap_range( goto out_unlock; ret = vfs_clone_file_prep_inodes(inode_in, pos_in, inode_out, pos_out, - &len, is_dedupe); + len, is_dedupe); if (ret <= 0) goto out_unlock; + /* + * If the dedupe data matches, chop off the partial EOF block + * from the source file so we don't try to dedupe the partial + * EOF block. + */ + if (is_dedupe) { + *len &= ~blkmask; + } else if (*len & blkmask) { + /* + * The user is attempting to share a partial EOF block, + * if it's inside the destination EOF then reject it. + */ + if (pos_out + *len < i_size_read(inode_out)) { + ret = -EINVAL; + goto out_unlock; + } + } + /* Attach dquots to dest inode before changing block map */ ret = xfs_qm_dqattach(dest); if (ret) goto out_unlock; - trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out); - /* - * Clear out post-eof preallocations because we don't have page cache - * backing the delayed allocations and they'll never get freed on - * their own. + * Zero existing post-eof speculative preallocations in the destination + * file. */ - if (xfs_can_free_eofblocks(dest, true)) { - ret = xfs_free_eofblocks(dest); - if (ret) - goto out_unlock; - } + ret = xfs_reflink_zero_posteof(dest, pos_out); + if (ret) + goto out_unlock; /* Set flags and remap blocks. */ ret = xfs_reflink_set_inode_flag(src, dest); if (ret) goto out_unlock; + /* Zap any page cache for the destination file's range. */ + truncate_inode_pages_range(&inode_out->i_data, pos_out, + PAGE_ALIGN(pos_out + *len) - 1); + + /* If we're altering the file contents... */ + if (!is_dedupe) { + /* + * ...update the timestamps (which will grab the ilock again + * from xfs_fs_dirty_inode, so we have to call it before we + * take the ilock). + */ + if (!(file_out->f_mode & FMODE_NOCMTIME)) { + ret = file_update_time(file_out); + if (ret) + goto out_unlock; + } + + /* + * ...clear the security bits if the process is not being run + * by root. This keeps people from modifying setuid and setgid + * binaries. + */ + ret = file_remove_privs(file_out); + if (ret) + goto out_unlock; + } + + return 1; +out_unlock: + xfs_reflink_remap_unlock(file_in, file_out); + return ret; +} + +/* + * Link a range of blocks from one file to another. + */ +int +xfs_reflink_remap_range( + struct file *file_in, + loff_t pos_in, + struct file *file_out, + loff_t pos_out, + u64 len, + bool is_dedupe) +{ + struct inode *inode_in = file_inode(file_in); + struct xfs_inode *src = XFS_I(inode_in); + struct inode *inode_out = file_inode(file_out); + struct xfs_inode *dest = XFS_I(inode_out); + struct xfs_mount *mp = src->i_mount; + xfs_fileoff_t sfsbno, dfsbno; + xfs_filblks_t fsblen; + xfs_extlen_t cowextsize; + ssize_t ret; + + if (!xfs_sb_version_hasreflink(&mp->m_sb)) + return -EOPNOTSUPP; + + if (XFS_FORCED_SHUTDOWN(mp)) + return -EIO; + + /* Prepare and then clone file data. */ + ret = xfs_reflink_remap_prep(file_in, pos_in, file_out, pos_out, + &len, is_dedupe); + if (ret <= 0) + return ret; + + trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out); + dfsbno = XFS_B_TO_FSBT(mp, pos_out); sfsbno = XFS_B_TO_FSBT(mp, pos_in); fsblen = XFS_B_TO_FSB(mp, len); @@ -1305,10 +1444,6 @@ xfs_reflink_remap_range( if (ret) goto out_unlock; - /* Zap any page cache for the destination file's range. */ - truncate_inode_pages_range(&inode_out->i_data, pos_out, - PAGE_ALIGN(pos_out + len) - 1); - /* * Carry the cowextsize hint from src to dest if we're sharing the * entire source file to the entire destination file, the source file @@ -1325,12 +1460,7 @@ xfs_reflink_remap_range( is_dedupe); out_unlock: - xfs_iunlock(dest, XFS_MMAPLOCK_EXCL); - if (!same_inode) - xfs_iunlock(src, XFS_MMAPLOCK_SHARED); - inode_unlock(inode_out); - if (!same_inode) - inode_unlock_shared(inode_in); + xfs_reflink_remap_unlock(file_in, file_out); if (ret) trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_); return ret; diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index ba4dd54f2c82..53600f527a70 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -595,7 +595,6 @@ enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev); int acpi_dma_get_range(struct device *dev, u64 *dma_addr, u64 *offset, u64 *size); int acpi_dma_configure(struct device *dev, enum dev_dma_attr attr); -void acpi_dma_deconfigure(struct device *dev); struct acpi_device *acpi_find_child_device(struct acpi_device *parent, u64 address, bool check_children); diff --git a/include/asm-generic/dma-mapping.h b/include/asm-generic/dma-mapping.h index ad2868263867..880a292d792f 100644 --- a/include/asm-generic/dma-mapping.h +++ b/include/asm-generic/dma-mapping.h @@ -4,16 +4,7 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) { - /* - * Use the non-coherent ops if available. If an architecture wants a - * more fine-grained selection of operations it will have to implement - * get_arch_dma_ops itself or use the per-device dma_ops. - */ -#ifdef CONFIG_DMA_NONCOHERENT_OPS - return &dma_noncoherent_ops; -#else return &dma_direct_ops; -#endif } #endif /* _ASM_GENERIC_DMA_MAPPING_H */ diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index b3353e21f3b3..6be86c1c5c58 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -20,6 +20,8 @@ #include <asm/pgalloc.h> #include <asm/tlbflush.h> +#ifdef CONFIG_MMU + #ifdef CONFIG_HAVE_RCU_TABLE_FREE /* * Semi RCU freeing of the page directories. @@ -97,12 +99,30 @@ struct mmu_gather { #endif unsigned long start; unsigned long end; - /* we are in the middle of an operation to clear - * a full mm and can make some optimizations */ - unsigned int fullmm : 1, - /* we have performed an operation which - * requires a complete flush of the tlb */ - need_flush_all : 1; + /* + * we are in the middle of an operation to clear + * a full mm and can make some optimizations + */ + unsigned int fullmm : 1; + + /* + * we have performed an operation which + * requires a complete flush of the tlb + */ + unsigned int need_flush_all : 1; + + /* + * we have removed page directories + */ + unsigned int freed_tables : 1; + + /* + * at which levels have we cleared entries? + */ + unsigned int cleared_ptes : 1; + unsigned int cleared_pmds : 1; + unsigned int cleared_puds : 1; + unsigned int cleared_p4ds : 1; struct mmu_gather_batch *active; struct mmu_gather_batch local; @@ -118,6 +138,7 @@ void arch_tlb_gather_mmu(struct mmu_gather *tlb, void tlb_flush_mmu(struct mmu_gather *tlb); void arch_tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end, bool force); +void tlb_flush_mmu_free(struct mmu_gather *tlb); extern bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_size); @@ -137,6 +158,11 @@ static inline void __tlb_reset_range(struct mmu_gather *tlb) tlb->start = TASK_SIZE; tlb->end = 0; } + tlb->freed_tables = 0; + tlb->cleared_ptes = 0; + tlb->cleared_pmds = 0; + tlb->cleared_puds = 0; + tlb->cleared_p4ds = 0; } static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb) @@ -186,6 +212,25 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, } #endif +static inline unsigned long tlb_get_unmap_shift(struct mmu_gather *tlb) +{ + if (tlb->cleared_ptes) + return PAGE_SHIFT; + if (tlb->cleared_pmds) + return PMD_SHIFT; + if (tlb->cleared_puds) + return PUD_SHIFT; + if (tlb->cleared_p4ds) + return P4D_SHIFT; + + return PAGE_SHIFT; +} + +static inline unsigned long tlb_get_unmap_size(struct mmu_gather *tlb) +{ + return 1UL << tlb_get_unmap_shift(tlb); +} + /* * In the case of tlb vma handling, we can optimise these away in the * case where we're doing a full MM flush. When we're doing a munmap, @@ -219,13 +264,19 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, #define tlb_remove_tlb_entry(tlb, ptep, address) \ do { \ __tlb_adjust_range(tlb, address, PAGE_SIZE); \ + tlb->cleared_ptes = 1; \ __tlb_remove_tlb_entry(tlb, ptep, address); \ } while (0) -#define tlb_remove_huge_tlb_entry(h, tlb, ptep, address) \ - do { \ - __tlb_adjust_range(tlb, address, huge_page_size(h)); \ - __tlb_remove_tlb_entry(tlb, ptep, address); \ +#define tlb_remove_huge_tlb_entry(h, tlb, ptep, address) \ + do { \ + unsigned long _sz = huge_page_size(h); \ + __tlb_adjust_range(tlb, address, _sz); \ + if (_sz == PMD_SIZE) \ + tlb->cleared_pmds = 1; \ + else if (_sz == PUD_SIZE) \ + tlb->cleared_puds = 1; \ + __tlb_remove_tlb_entry(tlb, ptep, address); \ } while (0) /** @@ -239,6 +290,7 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, #define tlb_remove_pmd_tlb_entry(tlb, pmdp, address) \ do { \ __tlb_adjust_range(tlb, address, HPAGE_PMD_SIZE); \ + tlb->cleared_pmds = 1; \ __tlb_remove_pmd_tlb_entry(tlb, pmdp, address); \ } while (0) @@ -253,6 +305,7 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, #define tlb_remove_pud_tlb_entry(tlb, pudp, address) \ do { \ __tlb_adjust_range(tlb, address, HPAGE_PUD_SIZE); \ + tlb->cleared_puds = 1; \ __tlb_remove_pud_tlb_entry(tlb, pudp, address); \ } while (0) @@ -278,6 +331,8 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, #define pte_free_tlb(tlb, ptep, address) \ do { \ __tlb_adjust_range(tlb, address, PAGE_SIZE); \ + tlb->freed_tables = 1; \ + tlb->cleared_pmds = 1; \ __pte_free_tlb(tlb, ptep, address); \ } while (0) #endif @@ -285,7 +340,9 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, #ifndef pmd_free_tlb #define pmd_free_tlb(tlb, pmdp, address) \ do { \ - __tlb_adjust_range(tlb, address, PAGE_SIZE); \ + __tlb_adjust_range(tlb, address, PAGE_SIZE); \ + tlb->freed_tables = 1; \ + tlb->cleared_puds = 1; \ __pmd_free_tlb(tlb, pmdp, address); \ } while (0) #endif @@ -295,6 +352,8 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, #define pud_free_tlb(tlb, pudp, address) \ do { \ __tlb_adjust_range(tlb, address, PAGE_SIZE); \ + tlb->freed_tables = 1; \ + tlb->cleared_p4ds = 1; \ __pud_free_tlb(tlb, pudp, address); \ } while (0) #endif @@ -304,12 +363,15 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, #ifndef p4d_free_tlb #define p4d_free_tlb(tlb, pudp, address) \ do { \ - __tlb_adjust_range(tlb, address, PAGE_SIZE); \ + __tlb_adjust_range(tlb, address, PAGE_SIZE); \ + tlb->freed_tables = 1; \ __p4d_free_tlb(tlb, pudp, address); \ } while (0) #endif #endif +#endif /* CONFIG_MMU */ + #define tlb_migrate_finish(mm) do {} while (0) #endif /* _ASM_GENERIC__TLB_H */ diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 7b75ff6e2fce..d7701d466b60 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -68,7 +68,7 @@ */ #ifdef CONFIG_LD_DEAD_CODE_DATA_ELIMINATION #define TEXT_MAIN .text .text.[0-9a-zA-Z_]* -#define DATA_MAIN .data .data.[0-9a-zA-Z_]* +#define DATA_MAIN .data .data.[0-9a-zA-Z_]* .data..LPBX* #define SDATA_MAIN .sdata .sdata.[0-9a-zA-Z_]* #define RODATA_MAIN .rodata .rodata.[0-9a-zA-Z_]* #define BSS_MAIN .bss .bss.[0-9a-zA-Z_]* @@ -613,8 +613,8 @@ #define EXIT_DATA \ *(.exit.data .exit.data.*) \ - *(.fini_array) \ - *(.dtors) \ + *(.fini_array .fini_array.*) \ + *(.dtors .dtors.*) \ MEM_DISCARD(exit.data*) \ MEM_DISCARD(exit.rodata*) diff --git a/include/drm/drm_atomic.h b/include/drm/drm_atomic.h index da9d95a19580..1e713154f00e 100644 --- a/include/drm/drm_atomic.h +++ b/include/drm/drm_atomic.h @@ -153,6 +153,17 @@ struct __drm_planes_state { struct __drm_crtcs_state { struct drm_crtc *ptr; struct drm_crtc_state *state, *old_state, *new_state; + + /** + * @commit: + * + * A reference to the CRTC commit object that is kept for use by + * drm_atomic_helper_wait_for_flip_done() after + * drm_atomic_helper_commit_hw_done() is called. This ensures that a + * concurrent commit won't free a commit object that is still in use. + */ + struct drm_crtc_commit *commit; + s32 __user *out_fence_ptr; u64 last_vblank_count; }; diff --git a/include/drm/drm_edid.h b/include/drm/drm_edid.h index b25d12ef120a..e3c404833115 100644 --- a/include/drm/drm_edid.h +++ b/include/drm/drm_edid.h @@ -214,9 +214,9 @@ struct detailed_timing { #define DRM_EDID_HDMI_DC_Y444 (1 << 3) /* YCBCR 420 deep color modes */ -#define DRM_EDID_YCBCR420_DC_48 (1 << 6) -#define DRM_EDID_YCBCR420_DC_36 (1 << 5) -#define DRM_EDID_YCBCR420_DC_30 (1 << 4) +#define DRM_EDID_YCBCR420_DC_48 (1 << 2) +#define DRM_EDID_YCBCR420_DC_36 (1 << 1) +#define DRM_EDID_YCBCR420_DC_30 (1 << 0) #define DRM_EDID_YCBCR420_DC_MASK (DRM_EDID_YCBCR420_DC_48 | \ DRM_EDID_YCBCR420_DC_36 | \ DRM_EDID_YCBCR420_DC_30) diff --git a/include/linux/acpi.h b/include/linux/acpi.h index de8d3d3fa651..af4628979d13 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -831,8 +831,6 @@ static inline int acpi_dma_configure(struct device *dev, return 0; } -static inline void acpi_dma_deconfigure(struct device *dev) { } - #define ACPI_PTR(_ptr) (NULL) static inline void acpi_device_set_enumerated(struct acpi_device *adev) diff --git a/include/linux/amifd.h b/include/linux/amifd.h deleted file mode 100644 index 202a77dbe46d..000000000000 --- a/include/linux/amifd.h +++ /dev/null @@ -1,63 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _AMIFD_H -#define _AMIFD_H - -/* Definitions for the Amiga floppy driver */ - -#include <linux/fd.h> - -#define FD_MAX_UNITS 4 /* Max. Number of drives */ -#define FLOPPY_MAX_SECTORS 22 /* Max. Number of sectors per track */ - -#ifndef ASSEMBLER - -struct fd_data_type { - char *name; /* description of data type */ - int sects; /* sectors per track */ -#ifdef __STDC__ - int (*read_fkt)(int); - void (*write_fkt)(int); -#else - int (*read_fkt)(); /* read whole track */ - void (*write_fkt)(); /* write whole track */ -#endif -}; - -/* -** Floppy type descriptions -*/ - -struct fd_drive_type { - unsigned long code; /* code returned from drive */ - char *name; /* description of drive */ - unsigned int tracks; /* number of tracks */ - unsigned int heads; /* number of heads */ - unsigned int read_size; /* raw read size for one track */ - unsigned int write_size; /* raw write size for one track */ - unsigned int sect_mult; /* sectors and gap multiplier (HD = 2) */ - unsigned int precomp1; /* start track for precomp 1 */ - unsigned int precomp2; /* start track for precomp 2 */ - unsigned int step_delay; /* time (in ms) for delay after step */ - unsigned int settle_time; /* time to settle after dir change */ - unsigned int side_time; /* time needed to change sides */ -}; - -struct amiga_floppy_struct { - struct fd_drive_type *type; /* type of floppy for this unit */ - struct fd_data_type *dtype; /* type of floppy for this unit */ - int track; /* current track (-1 == unknown) */ - unsigned char *trackbuf; /* current track (kmaloc()'d */ - - int blocks; /* total # blocks on disk */ - - int changed; /* true when not known */ - int disk; /* disk in drive (-1 == unknown) */ - int motor; /* true when motor is at speed */ - int busy; /* true when drive is active */ - int dirty; /* true when trackbuf is not on disk */ - int status; /* current error code for unit */ - struct gendisk *gendisk; -}; -#endif - -#endif diff --git a/include/linux/amifdreg.h b/include/linux/amifdreg.h deleted file mode 100644 index 9b514d05ec70..000000000000 --- a/include/linux/amifdreg.h +++ /dev/null @@ -1,82 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_AMIFDREG_H -#define _LINUX_AMIFDREG_H - -/* -** CIAAPRA bits (read only) -*/ - -#define DSKRDY (0x1<<5) /* disk ready when low */ -#define DSKTRACK0 (0x1<<4) /* head at track zero when low */ -#define DSKPROT (0x1<<3) /* disk protected when low */ -#define DSKCHANGE (0x1<<2) /* low when disk removed */ - -/* -** CIAAPRB bits (read/write) -*/ - -#define DSKMOTOR (0x1<<7) /* motor on when low */ -#define DSKSEL3 (0x1<<6) /* select drive 3 when low */ -#define DSKSEL2 (0x1<<5) /* select drive 2 when low */ -#define DSKSEL1 (0x1<<4) /* select drive 1 when low */ -#define DSKSEL0 (0x1<<3) /* select drive 0 when low */ -#define DSKSIDE (0x1<<2) /* side selection: 0 = upper, 1 = lower */ -#define DSKDIREC (0x1<<1) /* step direction: 0=in, 1=out (to trk 0) */ -#define DSKSTEP (0x1) /* pulse low to step head 1 track */ - -/* -** DSKBYTR bits (read only) -*/ - -#define DSKBYT (1<<15) /* register contains valid byte when set */ -#define DMAON (1<<14) /* disk DMA enabled */ -#define DISKWRITE (1<<13) /* disk write bit in DSKLEN enabled */ -#define WORDEQUAL (1<<12) /* DSKSYNC register match when true */ -/* bits 7-0 are data */ - -/* -** ADKCON/ADKCONR bits -*/ - -#ifndef SETCLR -#define ADK_SETCLR (1<<15) /* control bit */ -#endif -#define ADK_PRECOMP1 (1<<14) /* precompensation selection */ -#define ADK_PRECOMP0 (1<<13) /* 00=none, 01=140ns, 10=280ns, 11=500ns */ -#define ADK_MFMPREC (1<<12) /* 0=GCR precomp., 1=MFM precomp. */ -#define ADK_WORDSYNC (1<<10) /* enable DSKSYNC auto DMA */ -#define ADK_MSBSYNC (1<<9) /* when 1, enable sync on MSbit (for GCR) */ -#define ADK_FAST (1<<8) /* bit cell: 0=2us (GCR), 1=1us (MFM) */ - -/* -** DSKLEN bits -*/ - -#define DSKLEN_DMAEN (1<<15) -#define DSKLEN_WRITE (1<<14) - -/* -** INTENA/INTREQ bits -*/ - -#define DSKINDEX (0x1<<4) /* DSKINDEX bit */ - -/* -** Misc -*/ - -#define MFM_SYNC 0x4489 /* standard MFM sync value */ - -/* Values for FD_COMMAND */ -#define FD_RECALIBRATE 0x07 /* move to track 0 */ -#define FD_SEEK 0x0F /* seek track */ -#define FD_READ 0xE6 /* read with MT, MFM, SKip deleted */ -#define FD_WRITE 0xC5 /* write with MT, MFM */ -#define FD_SENSEI 0x08 /* Sense Interrupt Status */ -#define FD_SPECIFY 0x03 /* specify HUT etc */ -#define FD_FORMAT 0x4D /* format one track */ -#define FD_VERSION 0x10 /* get version code */ -#define FD_CONFIGURE 0x13 /* configure FIFO operation */ -#define FD_PERPENDICULAR 0x12 /* perpendicular r/w mode */ - -#endif /* _LINUX_AMIFDREG_H */ diff --git a/include/linux/bio.h b/include/linux/bio.h index 51371740d2a8..b47c7f716731 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -21,12 +21,8 @@ #include <linux/highmem.h> #include <linux/mempool.h> #include <linux/ioprio.h> -#include <linux/bug.h> #ifdef CONFIG_BLOCK - -#include <asm/io.h> - /* struct bio, bio_vec and BIO_* flags are defined in blk_types.h */ #include <linux/blk_types.h> @@ -133,32 +129,6 @@ static inline bool bio_full(struct bio *bio) } /* - * will die - */ -#define bvec_to_phys(bv) (page_to_phys((bv)->bv_page) + (unsigned long) (bv)->bv_offset) - -/* - * merge helpers etc - */ - -/* Default implementation of BIOVEC_PHYS_MERGEABLE */ -#define __BIOVEC_PHYS_MERGEABLE(vec1, vec2) \ - ((bvec_to_phys((vec1)) + (vec1)->bv_len) == bvec_to_phys((vec2))) - -/* - * allow arch override, for eg virtualized architectures (put in asm/io.h) - */ -#ifndef BIOVEC_PHYS_MERGEABLE -#define BIOVEC_PHYS_MERGEABLE(vec1, vec2) \ - __BIOVEC_PHYS_MERGEABLE(vec1, vec2) -#endif - -#define __BIO_SEG_BOUNDARY(addr1, addr2, mask) \ - (((addr1) | (mask)) == (((addr2) - 1) | (mask))) -#define BIOVEC_SEG_BOUNDARY(q, b1, b2) \ - __BIO_SEG_BOUNDARY(bvec_to_phys((b1)), bvec_to_phys((b2)) + (b2)->bv_len, queue_segment_boundary((q))) - -/* * drivers should _never_ use the all version - the bio may have been split * before it got to the driver and the driver won't own all of it */ @@ -170,27 +140,11 @@ static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter, { iter->bi_sector += bytes >> 9; - if (bio_no_advance_iter(bio)) { + if (bio_no_advance_iter(bio)) iter->bi_size -= bytes; - iter->bi_done += bytes; - } else { + else bvec_iter_advance(bio->bi_io_vec, iter, bytes); /* TODO: It is reasonable to complete bio with error here. */ - } -} - -static inline bool bio_rewind_iter(struct bio *bio, struct bvec_iter *iter, - unsigned int bytes) -{ - iter->bi_sector -= bytes >> 9; - - if (bio_no_advance_iter(bio)) { - iter->bi_size += bytes; - iter->bi_done -= bytes; - return true; - } - - return bvec_iter_rewind(bio->bi_io_vec, iter, bytes); } #define __bio_for_each_segment(bvl, bio, iter, start) \ @@ -353,6 +307,8 @@ struct bio_integrity_payload { unsigned short bip_max_vcnt; /* integrity bio_vec slots */ unsigned short bip_flags; /* control flags */ + struct bvec_iter bio_iter; /* for rewinding parent bio */ + struct work_struct bip_work; /* I/O completion */ struct bio_vec *bip_vec; @@ -547,23 +503,31 @@ do { \ disk_devt((bio)->bi_disk) #if defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP) -int bio_associate_blkcg_from_page(struct bio *bio, struct page *page); +int bio_associate_blkg_from_page(struct bio *bio, struct page *page); #else -static inline int bio_associate_blkcg_from_page(struct bio *bio, - struct page *page) { return 0; } +static inline int bio_associate_blkg_from_page(struct bio *bio, + struct page *page) { return 0; } #endif #ifdef CONFIG_BLK_CGROUP -int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css); int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg); +int bio_associate_blkg_from_css(struct bio *bio, + struct cgroup_subsys_state *css); +int bio_associate_create_blkg(struct request_queue *q, struct bio *bio); +int bio_reassociate_blkg(struct request_queue *q, struct bio *bio); void bio_disassociate_task(struct bio *bio); -void bio_clone_blkcg_association(struct bio *dst, struct bio *src); +void bio_clone_blkg_association(struct bio *dst, struct bio *src); #else /* CONFIG_BLK_CGROUP */ -static inline int bio_associate_blkcg(struct bio *bio, - struct cgroup_subsys_state *blkcg_css) { return 0; } +static inline int bio_associate_blkg_from_css(struct bio *bio, + struct cgroup_subsys_state *css) +{ return 0; } +static inline int bio_associate_create_blkg(struct request_queue *q, + struct bio *bio) { return 0; } +static inline int bio_reassociate_blkg(struct request_queue *q, struct bio *bio) +{ return 0; } static inline void bio_disassociate_task(struct bio *bio) { } -static inline void bio_clone_blkcg_association(struct bio *dst, - struct bio *src) { } +static inline void bio_clone_blkg_association(struct bio *dst, + struct bio *src) { } #endif /* CONFIG_BLK_CGROUP */ #ifdef CONFIG_HIGHMEM diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 6d766a19f2bb..1e76ceebeb5d 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -126,7 +126,7 @@ struct blkcg_gq { struct request_list rl; /* reference count */ - atomic_t refcnt; + struct percpu_ref refcnt; /* is this blkg online? protected by both blkcg and q locks */ bool online; @@ -184,6 +184,8 @@ extern struct cgroup_subsys_state * const blkcg_root_css; struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg, struct request_queue *q, bool update_hint); +struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg, + struct request_queue *q); struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, struct request_queue *q); int blkcg_init_queue(struct request_queue *q); @@ -230,22 +232,59 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, char *input, struct blkg_conf_ctx *ctx); void blkg_conf_finish(struct blkg_conf_ctx *ctx); +/** + * blkcg_css - find the current css + * + * Find the css associated with either the kthread or the current task. + * This may return a dying css, so it is up to the caller to use tryget logic + * to confirm it is alive and well. + */ +static inline struct cgroup_subsys_state *blkcg_css(void) +{ + struct cgroup_subsys_state *css; + + css = kthread_blkcg(); + if (css) + return css; + return task_css(current, io_cgrp_id); +} static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css) { return css ? container_of(css, struct blkcg, css) : NULL; } -static inline struct blkcg *bio_blkcg(struct bio *bio) +/** + * __bio_blkcg - internal version of bio_blkcg for bfq and cfq + * + * DO NOT USE. + * There is a flaw using this version of the function. In particular, this was + * used in a broken paradigm where association was called on the given css. It + * is possible though that the returned css from task_css() is in the process + * of dying due to migration of the current task. So it is improper to assume + * *_get() is going to succeed. Both BFQ and CFQ rely on this logic and will + * take additional work to handle more gracefully. + */ +static inline struct blkcg *__bio_blkcg(struct bio *bio) { - struct cgroup_subsys_state *css; + if (bio && bio->bi_blkg) + return bio->bi_blkg->blkcg; + return css_to_blkcg(blkcg_css()); +} - if (bio && bio->bi_css) - return css_to_blkcg(bio->bi_css); - css = kthread_blkcg(); - if (css) - return css_to_blkcg(css); - return css_to_blkcg(task_css(current, io_cgrp_id)); +/** + * bio_blkcg - grab the blkcg associated with a bio + * @bio: target bio + * + * This returns the blkcg associated with a bio, NULL if not associated. + * Callers are expected to either handle NULL or know association has been + * done prior to calling this. + */ +static inline struct blkcg *bio_blkcg(struct bio *bio) +{ + if (bio && bio->bi_blkg) + return bio->bi_blkg->blkcg; + return NULL; } static inline bool blk_cgroup_congested(void) @@ -451,26 +490,35 @@ static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen) */ static inline void blkg_get(struct blkcg_gq *blkg) { - WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0); - atomic_inc(&blkg->refcnt); + percpu_ref_get(&blkg->refcnt); } /** - * blkg_try_get - try and get a blkg reference + * blkg_tryget - try and get a blkg reference * @blkg: blkg to get * * This is for use when doing an RCU lookup of the blkg. We may be in the midst * of freeing this blkg, so we can only use it if the refcnt is not zero. */ -static inline struct blkcg_gq *blkg_try_get(struct blkcg_gq *blkg) +static inline bool blkg_tryget(struct blkcg_gq *blkg) { - if (atomic_inc_not_zero(&blkg->refcnt)) - return blkg; - return NULL; + return percpu_ref_tryget(&blkg->refcnt); } +/** + * blkg_tryget_closest - try and get a blkg ref on the closet blkg + * @blkg: blkg to get + * + * This walks up the blkg tree to find the closest non-dying blkg and returns + * the blkg that it did association with as it may not be the passed in blkg. + */ +static inline struct blkcg_gq *blkg_tryget_closest(struct blkcg_gq *blkg) +{ + while (!percpu_ref_tryget(&blkg->refcnt)) + blkg = blkg->parent; -void __blkg_release_rcu(struct rcu_head *rcu); + return blkg; +} /** * blkg_put - put a blkg reference @@ -478,9 +526,7 @@ void __blkg_release_rcu(struct rcu_head *rcu); */ static inline void blkg_put(struct blkcg_gq *blkg) { - WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0); - if (atomic_dec_and_test(&blkg->refcnt)) - call_rcu(&blkg->rcu_head, __blkg_release_rcu); + percpu_ref_put(&blkg->refcnt); } /** @@ -533,25 +579,36 @@ static inline struct request_list *blk_get_rl(struct request_queue *q, rcu_read_lock(); - blkcg = bio_blkcg(bio); + if (bio && bio->bi_blkg) { + blkcg = bio->bi_blkg->blkcg; + if (blkcg == &blkcg_root) + goto rl_use_root; + + blkg_get(bio->bi_blkg); + rcu_read_unlock(); + return &bio->bi_blkg->rl; + } - /* bypass blkg lookup and use @q->root_rl directly for root */ + blkcg = css_to_blkcg(blkcg_css()); if (blkcg == &blkcg_root) - goto root_rl; + goto rl_use_root; - /* - * Try to use blkg->rl. blkg lookup may fail under memory pressure - * or if either the blkcg or queue is going away. Fall back to - * root_rl in such cases. - */ blkg = blkg_lookup(blkcg, q); if (unlikely(!blkg)) - goto root_rl; + blkg = __blkg_lookup_create(blkcg, q); + + if (blkg->blkcg == &blkcg_root || !blkg_tryget(blkg)) + goto rl_use_root; - blkg_get(blkg); rcu_read_unlock(); return &blkg->rl; -root_rl: + + /* + * Each blkg has its own request_list, however, the root blkcg + * uses the request_queue's root_rl. This is to avoid most + * overhead for the root blkcg. + */ +rl_use_root: rcu_read_unlock(); return &q->root_rl; } @@ -797,32 +854,26 @@ static inline bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg struct bio *bio) { return false; } #endif + +static inline void blkcg_bio_issue_init(struct bio *bio) +{ + bio_issue_init(&bio->bi_issue, bio_sectors(bio)); +} + static inline bool blkcg_bio_issue_check(struct request_queue *q, struct bio *bio) { - struct blkcg *blkcg; struct blkcg_gq *blkg; bool throtl = false; rcu_read_lock(); - blkcg = bio_blkcg(bio); - - /* associate blkcg if bio hasn't attached one */ - bio_associate_blkcg(bio, &blkcg->css); - blkg = blkg_lookup(blkcg, q); - if (unlikely(!blkg)) { - spin_lock_irq(q->queue_lock); - blkg = blkg_lookup_create(blkcg, q); - if (IS_ERR(blkg)) - blkg = NULL; - spin_unlock_irq(q->queue_lock); - } + bio_associate_create_blkg(q, bio); + blkg = bio->bi_blkg; throtl = blk_throtl_bio(q, blkg, bio); if (!throtl) { - blkg = blkg ?: q->root_blkg; /* * If the bio is flagged with BIO_QUEUE_ENTERED it means this * is a split bio and we would have already accounted for the @@ -834,6 +885,8 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q, blkg_rwstat_add(&blkg->stat_ios, bio->bi_opf, 1); } + blkcg_bio_issue_init(bio); + rcu_read_unlock(); return !throtl; } @@ -930,6 +983,7 @@ static inline int blkcg_activate_policy(struct request_queue *q, static inline void blkcg_deactivate_policy(struct request_queue *q, const struct blkcg_policy *pol) { } +static inline struct blkcg *__bio_blkcg(struct bio *bio) { return NULL; } static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; } static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg, @@ -945,6 +999,7 @@ static inline void blk_put_rl(struct request_list *rl) { } static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) { } static inline struct request_list *blk_rq_rl(struct request *rq) { return &rq->q->root_rl; } +static inline void blkcg_bio_issue_init(struct bio *bio) { } static inline bool blkcg_bio_issue_check(struct request_queue *q, struct bio *bio) { return true; } diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 1da59c16f637..2286dc12c6bc 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -203,6 +203,10 @@ enum { struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *); struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, struct request_queue *q); +struct request_queue *blk_mq_init_sq_queue(struct blk_mq_tag_set *set, + const struct blk_mq_ops *ops, + unsigned int queue_depth, + unsigned int set_flags); int blk_mq_register_dev(struct device *, struct request_queue *); void blk_mq_unregister_dev(struct device *, struct request_queue *); diff --git a/include/linux/blk-pm.h b/include/linux/blk-pm.h new file mode 100644 index 000000000000..b80c65aba249 --- /dev/null +++ b/include/linux/blk-pm.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _BLK_PM_H_ +#define _BLK_PM_H_ + +struct device; +struct request_queue; + +/* + * block layer runtime pm functions + */ +#ifdef CONFIG_PM +extern void blk_pm_runtime_init(struct request_queue *q, struct device *dev); +extern int blk_pre_runtime_suspend(struct request_queue *q); +extern void blk_post_runtime_suspend(struct request_queue *q, int err); +extern void blk_pre_runtime_resume(struct request_queue *q); +extern void blk_post_runtime_resume(struct request_queue *q, int err); +extern void blk_set_runtime_active(struct request_queue *q); +#else +static inline void blk_pm_runtime_init(struct request_queue *q, + struct device *dev) {} +#endif + +#endif /* _BLK_PM_H_ */ diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index f6dfb30737d8..9578c7ab1eb6 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -178,7 +178,6 @@ struct bio { * release. Read comment on top of bio_associate_current(). */ struct io_context *bi_ioc; - struct cgroup_subsys_state *bi_css; struct blkcg_gq *bi_blkg; struct bio_issue bi_issue; #endif diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 6980014357d4..61207560e826 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -108,7 +108,7 @@ typedef __u32 __bitwise req_flags_t; #define RQF_QUIET ((__force req_flags_t)(1 << 11)) /* elevator private data attached */ #define RQF_ELVPRIV ((__force req_flags_t)(1 << 12)) -/* account I/O stat */ +/* account into disk and partition IO statistics */ #define RQF_IO_STAT ((__force req_flags_t)(1 << 13)) /* request came from our alloc pool */ #define RQF_ALLOCED ((__force req_flags_t)(1 << 14)) @@ -116,7 +116,7 @@ typedef __u32 __bitwise req_flags_t; #define RQF_PM ((__force req_flags_t)(1 << 15)) /* on IO scheduler merge hash */ #define RQF_HASHED ((__force req_flags_t)(1 << 16)) -/* IO stats tracking on */ +/* track IO completion time */ #define RQF_STATS ((__force req_flags_t)(1 << 17)) /* Look at ->special_vec for the actual data payload instead of the bio chain. */ @@ -504,6 +504,12 @@ struct request_queue { * various queue flags, see QUEUE_* below */ unsigned long queue_flags; + /* + * Number of contexts that have called blk_set_pm_only(). If this + * counter is above zero then only RQF_PM and RQF_PREEMPT requests are + * processed. + */ + atomic_t pm_only; /* * ida allocated id for this queue. Used to index queues from @@ -679,7 +685,7 @@ struct request_queue { #define QUEUE_FLAG_FAIL_IO 7 /* fake timeout */ #define QUEUE_FLAG_NONROT 9 /* non-rotational device (SSD) */ #define QUEUE_FLAG_VIRT QUEUE_FLAG_NONROT /* paravirt device */ -#define QUEUE_FLAG_IO_STAT 10 /* do IO stats */ +#define QUEUE_FLAG_IO_STAT 10 /* do disk/partitions IO accounting */ #define QUEUE_FLAG_DISCARD 11 /* supports DISCARD */ #define QUEUE_FLAG_NOXMERGES 12 /* No extended merges */ #define QUEUE_FLAG_ADD_RANDOM 13 /* Contributes to random pool */ @@ -693,12 +699,11 @@ struct request_queue { #define QUEUE_FLAG_FUA 21 /* device supports FUA writes */ #define QUEUE_FLAG_FLUSH_NQ 22 /* flush not queueuable */ #define QUEUE_FLAG_DAX 23 /* device supports DAX */ -#define QUEUE_FLAG_STATS 24 /* track rq completion times */ +#define QUEUE_FLAG_STATS 24 /* track IO start and completion times */ #define QUEUE_FLAG_POLL_STATS 25 /* collecting stats for hybrid polling */ #define QUEUE_FLAG_REGISTERED 26 /* queue has been registered to a disk */ #define QUEUE_FLAG_SCSI_PASSTHROUGH 27 /* queue supports SCSI commands */ #define QUEUE_FLAG_QUIESCED 28 /* queue has been quiesced */ -#define QUEUE_FLAG_PREEMPT_ONLY 29 /* only process REQ_PREEMPT requests */ #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ (1 << QUEUE_FLAG_SAME_COMP) | \ @@ -736,12 +741,11 @@ bool blk_queue_flag_test_and_clear(unsigned int flag, struct request_queue *q); ((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \ REQ_FAILFAST_DRIVER)) #define blk_queue_quiesced(q) test_bit(QUEUE_FLAG_QUIESCED, &(q)->queue_flags) -#define blk_queue_preempt_only(q) \ - test_bit(QUEUE_FLAG_PREEMPT_ONLY, &(q)->queue_flags) +#define blk_queue_pm_only(q) atomic_read(&(q)->pm_only) #define blk_queue_fua(q) test_bit(QUEUE_FLAG_FUA, &(q)->queue_flags) -extern int blk_set_preempt_only(struct request_queue *q); -extern void blk_clear_preempt_only(struct request_queue *q); +extern void blk_set_pm_only(struct request_queue *q); +extern void blk_clear_pm_only(struct request_queue *q); static inline int queue_in_flight(struct request_queue *q) { @@ -1281,29 +1285,6 @@ extern void blk_put_queue(struct request_queue *); extern void blk_set_queue_dying(struct request_queue *); /* - * block layer runtime pm functions - */ -#ifdef CONFIG_PM -extern void blk_pm_runtime_init(struct request_queue *q, struct device *dev); -extern int blk_pre_runtime_suspend(struct request_queue *q); -extern void blk_post_runtime_suspend(struct request_queue *q, int err); -extern void blk_pre_runtime_resume(struct request_queue *q); -extern void blk_post_runtime_resume(struct request_queue *q, int err); -extern void blk_set_runtime_active(struct request_queue *q); -#else -static inline void blk_pm_runtime_init(struct request_queue *q, - struct device *dev) {} -static inline int blk_pre_runtime_suspend(struct request_queue *q) -{ - return -ENOSYS; -} -static inline void blk_post_runtime_suspend(struct request_queue *q, int err) {} -static inline void blk_pre_runtime_resume(struct request_queue *q) {} -static inline void blk_post_runtime_resume(struct request_queue *q, int err) {} -static inline void blk_set_runtime_active(struct request_queue *q) {} -#endif - -/* * blk_plug permits building a queue of related requests by holding the I/O * fragments for a short period. This allows merging of sequential requests * into single larger request. As the requests are moved from a per-task list to @@ -1676,94 +1657,6 @@ static inline void put_dev_sector(Sector p) put_page(p.v); } -static inline bool __bvec_gap_to_prev(struct request_queue *q, - struct bio_vec *bprv, unsigned int offset) -{ - return offset || - ((bprv->bv_offset + bprv->bv_len) & queue_virt_boundary(q)); -} - -/* - * Check if adding a bio_vec after bprv with offset would create a gap in - * the SG list. Most drivers don't care about this, but some do. - */ -static inline bool bvec_gap_to_prev(struct request_queue *q, - struct bio_vec *bprv, unsigned int offset) -{ - if (!queue_virt_boundary(q)) - return false; - return __bvec_gap_to_prev(q, bprv, offset); -} - -/* - * Check if the two bvecs from two bios can be merged to one segment. - * If yes, no need to check gap between the two bios since the 1st bio - * and the 1st bvec in the 2nd bio can be handled in one segment. - */ -static inline bool bios_segs_mergeable(struct request_queue *q, - struct bio *prev, struct bio_vec *prev_last_bv, - struct bio_vec *next_first_bv) -{ - if (!BIOVEC_PHYS_MERGEABLE(prev_last_bv, next_first_bv)) - return false; - if (!BIOVEC_SEG_BOUNDARY(q, prev_last_bv, next_first_bv)) - return false; - if (prev->bi_seg_back_size + next_first_bv->bv_len > - queue_max_segment_size(q)) - return false; - return true; -} - -static inline bool bio_will_gap(struct request_queue *q, - struct request *prev_rq, - struct bio *prev, - struct bio *next) -{ - if (bio_has_data(prev) && queue_virt_boundary(q)) { - struct bio_vec pb, nb; - - /* - * don't merge if the 1st bio starts with non-zero - * offset, otherwise it is quite difficult to respect - * sg gap limit. We work hard to merge a huge number of small - * single bios in case of mkfs. - */ - if (prev_rq) - bio_get_first_bvec(prev_rq->bio, &pb); - else - bio_get_first_bvec(prev, &pb); - if (pb.bv_offset) - return true; - - /* - * We don't need to worry about the situation that the - * merged segment ends in unaligned virt boundary: - * - * - if 'pb' ends aligned, the merged segment ends aligned - * - if 'pb' ends unaligned, the next bio must include - * one single bvec of 'nb', otherwise the 'nb' can't - * merge with 'pb' - */ - bio_get_last_bvec(prev, &pb); - bio_get_first_bvec(next, &nb); - - if (!bios_segs_mergeable(q, prev, &pb, &nb)) - return __bvec_gap_to_prev(q, &pb, nb.bv_offset); - } - - return false; -} - -static inline bool req_gap_back_merge(struct request *req, struct bio *bio) -{ - return bio_will_gap(req->q, req, req->biotail, bio); -} - -static inline bool req_gap_front_merge(struct request *req, struct bio *bio) -{ - return bio_will_gap(req->q, NULL, bio, req->bio); -} - int kblockd_schedule_work(struct work_struct *work); int kblockd_schedule_work_on(int cpu, struct work_struct *work); int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay); @@ -1843,26 +1736,6 @@ queue_max_integrity_segments(struct request_queue *q) return q->limits.max_integrity_segments; } -static inline bool integrity_req_gap_back_merge(struct request *req, - struct bio *next) -{ - struct bio_integrity_payload *bip = bio_integrity(req->bio); - struct bio_integrity_payload *bip_next = bio_integrity(next); - - return bvec_gap_to_prev(req->q, &bip->bip_vec[bip->bip_vcnt - 1], - bip_next->bip_vec[0].bv_offset); -} - -static inline bool integrity_req_gap_front_merge(struct request *req, - struct bio *bio) -{ - struct bio_integrity_payload *bip = bio_integrity(bio); - struct bio_integrity_payload *bip_next = bio_integrity(req->bio); - - return bvec_gap_to_prev(req->q, &bip->bip_vec[bip->bip_vcnt - 1], - bip_next->bip_vec[0].bv_offset); -} - /** * bio_integrity_intervals - Return number of integrity intervals for a bio * @bi: blk_integrity profile for device @@ -1947,17 +1820,6 @@ static inline bool blk_integrity_merge_bio(struct request_queue *rq, return true; } -static inline bool integrity_req_gap_back_merge(struct request *req, - struct bio *next) -{ - return false; -} -static inline bool integrity_req_gap_front_merge(struct request *req, - struct bio *bio) -{ - return false; -} - static inline unsigned int bio_integrity_intervals(struct blk_integrity *bi, unsigned int sectors) { diff --git a/include/linux/bvec.h b/include/linux/bvec.h index fe7a22dd133b..02c73c6aa805 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h @@ -40,8 +40,6 @@ struct bvec_iter { unsigned int bi_idx; /* current index into bvl_vec */ - unsigned int bi_done; /* number of bytes completed */ - unsigned int bi_bvec_done; /* number of bytes completed in current bvec */ }; @@ -85,7 +83,6 @@ static inline bool bvec_iter_advance(const struct bio_vec *bv, bytes -= len; iter->bi_size -= len; iter->bi_bvec_done += len; - iter->bi_done += len; if (iter->bi_bvec_done == __bvec_iter_bvec(bv, *iter)->bv_len) { iter->bi_bvec_done = 0; diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index ff20b677fb9f..22254c1fe1c5 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -412,6 +412,7 @@ struct cgroup { * specific task are charged to the dom_cgrp. */ struct cgroup *dom_cgrp; + struct cgroup *old_dom_cgrp; /* used while enabling threaded */ /* per-cpu recursive resource statistics */ struct cgroup_rstat_cpu __percpu *rstat_cpu; diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 32c553556bbd..b8bcbdeb2eac 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -93,6 +93,8 @@ extern struct css_set init_css_set; bool css_has_online_children(struct cgroup_subsys_state *css); struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss); +struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgroup, + struct cgroup_subsys *ss); struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgroup, struct cgroup_subsys *ss); struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry, diff --git a/include/linux/compat.h b/include/linux/compat.h index 1a3c4f37e908..de0c13bdcd2c 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -103,6 +103,9 @@ typedef struct compat_sigaltstack { compat_size_t ss_size; } compat_stack_t; #endif +#ifndef COMPAT_MINSIGSTKSZ +#define COMPAT_MINSIGSTKSZ MINSIGSTKSZ +#endif #define compat_jiffies_to_clock_t(x) \ (((unsigned long)(x) * COMPAT_USER_HZ) / HZ) diff --git a/include/linux/device.h b/include/linux/device.h index 8f882549edee..983506789402 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -927,6 +927,8 @@ struct dev_links_info { * @offline: Set after successful invocation of bus type's .offline(). * @of_node_reused: Set if the device-tree node is shared with an ancestor * device. + * @dma_coherent: this particular device is dma coherent, even if the + * architecture supports non-coherent devices. * * At the lowest level, every device in a Linux system is represented by an * instance of struct device. The device structure contains the information @@ -1016,6 +1018,11 @@ struct device { bool offline_disabled:1; bool offline:1; bool of_node_reused:1; +#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ + defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ + defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) + bool dma_coherent:1; +#endif }; static inline struct device *kobj_to_dev(struct kobject *kobj) diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h index a785f2507159..30213adbb6b9 100644 --- a/include/linux/dma-debug.h +++ b/include/linux/dma-debug.h @@ -32,6 +32,9 @@ extern void dma_debug_add_bus(struct bus_type *bus); extern int dma_debug_resize_entries(u32 num_entries); +extern void debug_dma_map_single(struct device *dev, const void *addr, + unsigned long len); + extern void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, size_t size, int direction, dma_addr_t dma_addr, @@ -103,6 +106,11 @@ static inline int dma_debug_resize_entries(u32 num_entries) return 0; } +static inline void debug_dma_map_single(struct device *dev, const void *addr, + unsigned long len) +{ +} + static inline void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, size_t size, int direction, dma_addr_t dma_addr, diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index 8d9f33febde5..fbca184ff5a0 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -27,7 +27,8 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) if (!dev->dma_mask) return false; - return addr + size - 1 <= *dev->dma_mask; + return addr + size - 1 <= + min_not_zero(*dev->dma_mask, dev->bus_dma_mask); } #endif /* !CONFIG_ARCH_HAS_PHYS_TO_DMA */ @@ -55,10 +56,15 @@ static inline void dma_mark_clean(void *addr, size_t size) } #endif /* CONFIG_ARCH_HAS_DMA_MARK_CLEAN */ +u64 dma_direct_get_required_mask(struct device *dev); void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs); void dma_direct_free(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs); +void *dma_direct_alloc_pages(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs); +void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t dma_addr, unsigned long attrs); dma_addr_t dma_direct_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, unsigned long attrs); diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 1db6a6b46d0d..15bd41447025 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -130,13 +130,10 @@ struct dma_map_ops { enum dma_data_direction direction); int (*mapping_error)(struct device *dev, dma_addr_t dma_addr); int (*dma_supported)(struct device *dev, u64 mask); -#ifdef ARCH_HAS_DMA_GET_REQUIRED_MASK u64 (*get_required_mask)(struct device *dev); -#endif }; extern const struct dma_map_ops dma_direct_ops; -extern const struct dma_map_ops dma_noncoherent_ops; extern const struct dma_map_ops dma_virt_ops; #define DMA_BIT_MASK(n) (((n) == 64) ? ~0ULL : ((1ULL<<(n))-1)) @@ -232,6 +229,7 @@ static inline dma_addr_t dma_map_single_attrs(struct device *dev, void *ptr, dma_addr_t addr; BUG_ON(!valid_dma_direction(dir)); + debug_dma_map_single(dev, ptr, size); addr = ops->map_page(dev, virt_to_page(ptr), offset_in_page(ptr), size, dir, attrs); @@ -445,7 +443,8 @@ dma_cache_sync(struct device *dev, void *vaddr, size_t size, } extern int dma_common_mmap(struct device *dev, struct vm_area_struct *vma, - void *cpu_addr, dma_addr_t dma_addr, size_t size); + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs); void *dma_common_contiguous_remap(struct page *page, size_t size, unsigned long vm_flags, @@ -477,14 +476,14 @@ dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, BUG_ON(!ops); if (ops->mmap) return ops->mmap(dev, vma, cpu_addr, dma_addr, size, attrs); - return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size); + return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size, attrs); } #define dma_mmap_coherent(d, v, c, h, s) dma_mmap_attrs(d, v, c, h, s, 0) int -dma_common_get_sgtable(struct device *dev, struct sg_table *sgt, - void *cpu_addr, dma_addr_t dma_addr, size_t size); +dma_common_get_sgtable(struct device *dev, struct sg_table *sgt, void *cpu_addr, + dma_addr_t dma_addr, size_t size, unsigned long attrs); static inline int dma_get_sgtable_attrs(struct device *dev, struct sg_table *sgt, void *cpu_addr, @@ -496,7 +495,8 @@ dma_get_sgtable_attrs(struct device *dev, struct sg_table *sgt, void *cpu_addr, if (ops->get_sgtable) return ops->get_sgtable(dev, sgt, cpu_addr, dma_addr, size, attrs); - return dma_common_get_sgtable(dev, sgt, cpu_addr, dma_addr, size); + return dma_common_get_sgtable(dev, sgt, cpu_addr, dma_addr, size, + attrs); } #define dma_get_sgtable(d, t, v, h, s) dma_get_sgtable_attrs(d, t, v, h, s, 0) @@ -558,9 +558,11 @@ static inline void dma_free_attrs(struct device *dev, size_t size, } static inline void *dma_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t flag) + dma_addr_t *dma_handle, gfp_t gfp) { - return dma_alloc_attrs(dev, size, dma_handle, flag, 0); + + return dma_alloc_attrs(dev, size, dma_handle, gfp, + (gfp & __GFP_NOWARN) ? DMA_ATTR_NO_WARN : 0); } static inline void dma_free_coherent(struct device *dev, size_t size, @@ -753,18 +755,6 @@ dma_mark_declared_memory_occupied(struct device *dev, } #endif /* CONFIG_HAVE_GENERIC_DMA_COHERENT */ -#ifdef CONFIG_HAS_DMA -int dma_configure(struct device *dev); -void dma_deconfigure(struct device *dev); -#else -static inline int dma_configure(struct device *dev) -{ - return 0; -} - -static inline void dma_deconfigure(struct device *dev) {} -#endif - /* * Managed DMA API */ @@ -806,8 +796,12 @@ static inline void dmam_release_declared_memory(struct device *dev) static inline void *dma_alloc_wc(struct device *dev, size_t size, dma_addr_t *dma_addr, gfp_t gfp) { - return dma_alloc_attrs(dev, size, dma_addr, gfp, - DMA_ATTR_WRITE_COMBINE); + unsigned long attrs = DMA_ATTR_NO_WARN; + + if (gfp & __GFP_NOWARN) + attrs |= DMA_ATTR_NO_WARN; + + return dma_alloc_attrs(dev, size, dma_addr, gfp, attrs); } #ifndef dma_alloc_writecombine #define dma_alloc_writecombine dma_alloc_wc diff --git a/include/linux/dma-noncoherent.h b/include/linux/dma-noncoherent.h index a0aa00cc909d..9051b055beec 100644 --- a/include/linux/dma-noncoherent.h +++ b/include/linux/dma-noncoherent.h @@ -4,18 +4,35 @@ #include <linux/dma-mapping.h> +#ifdef CONFIG_ARCH_HAS_DMA_COHERENCE_H +#include <asm/dma-coherence.h> +#elif defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ + defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ + defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) +static inline bool dev_is_dma_coherent(struct device *dev) +{ + return dev->dma_coherent; +} +#else +static inline bool dev_is_dma_coherent(struct device *dev) +{ + return true; +} +#endif /* CONFIG_ARCH_HAS_DMA_COHERENCE_H */ + void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs); void arch_dma_free(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs); +long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr, + dma_addr_t dma_addr); -#ifdef CONFIG_DMA_NONCOHERENT_MMAP -int arch_dma_mmap(struct device *dev, struct vm_area_struct *vma, - void *cpu_addr, dma_addr_t dma_addr, size_t size, +#ifdef CONFIG_ARCH_HAS_DMA_MMAP_PGPROT +pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot, unsigned long attrs); #else -#define arch_dma_mmap NULL -#endif /* CONFIG_DMA_NONCOHERENT_MMAP */ +# define arch_dma_mmap_pgprot(dev, prot, attrs) pgprot_noncached(prot) +#endif #ifdef CONFIG_DMA_NONCOHERENT_CACHE_SYNC void arch_dma_cache_sync(struct device *dev, void *vaddr, size_t size, diff --git a/include/linux/elevator.h b/include/linux/elevator.h index a02deea30185..015bb59c0331 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -111,7 +111,7 @@ struct elevator_mq_ops { void (*insert_requests)(struct blk_mq_hw_ctx *, struct list_head *, bool); struct request *(*dispatch_request)(struct blk_mq_hw_ctx *); bool (*has_work)(struct blk_mq_hw_ctx *); - void (*completed_request)(struct request *); + void (*completed_request)(struct request *, u64); void (*started_request)(struct request *); void (*requeue_request)(struct request *); struct request *(*former_request)(struct request_queue *, struct request *); diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 25c08c6c7f99..70fc838e6773 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -402,10 +402,11 @@ static inline void free_part_info(struct hd_struct *part) extern void part_round_stats(struct request_queue *q, int cpu, struct hd_struct *part); /* block/genhd.c */ -extern void device_add_disk(struct device *parent, struct gendisk *disk); +extern void device_add_disk(struct device *parent, struct gendisk *disk, + const struct attribute_group **groups); static inline void add_disk(struct gendisk *disk) { - device_add_disk(NULL, disk); + device_add_disk(NULL, disk, NULL); } extern void device_add_disk_no_queue_reg(struct device *parent, struct gendisk *disk); static inline void add_disk_no_queue_reg(struct gendisk *disk) diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 0ea328e71ec9..a4d5eb37744a 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -95,6 +95,13 @@ struct gpio_irq_chip { unsigned int num_parents; /** + * @parent_irq: + * + * For use by gpiochip_set_cascaded_irqchip() + */ + unsigned int parent_irq; + + /** * @parents: * * A list of interrupt parents of a GPIO chip. This is owned by the diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 99c19b06d9a4..fdcb45999b26 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -43,7 +43,7 @@ extern int mincore_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, unsigned char *vec); extern bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, unsigned long new_addr, unsigned long old_end, - pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush); + pmd_t *old_pmd, pmd_t *new_pmd); extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, pgprot_t newprot, int prot_numa); diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index e9e0d1c7eaf5..2fdeac1a420d 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -86,8 +86,8 @@ struct nvm_chk_meta; typedef int (nvm_id_fn)(struct nvm_dev *); typedef int (nvm_op_bb_tbl_fn)(struct nvm_dev *, struct ppa_addr, u8 *); typedef int (nvm_op_set_bb_fn)(struct nvm_dev *, struct ppa_addr *, int, int); -typedef int (nvm_get_chk_meta_fn)(struct nvm_dev *, struct nvm_chk_meta *, - sector_t, int); +typedef int (nvm_get_chk_meta_fn)(struct nvm_dev *, sector_t, int, + struct nvm_chk_meta *); typedef int (nvm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *); typedef int (nvm_submit_io_sync_fn)(struct nvm_dev *, struct nvm_rq *); typedef void *(nvm_create_dma_pool_fn)(struct nvm_dev *, char *); @@ -305,6 +305,8 @@ struct nvm_rq { u64 ppa_status; /* ppa media status */ int error; + int is_seq; /* Sequential hint flag. 1.2 only */ + void *private; }; @@ -318,6 +320,11 @@ static inline void *nvm_rq_to_pdu(struct nvm_rq *rqdata) return rqdata + 1; } +static inline struct ppa_addr *nvm_rq_to_ppa_list(struct nvm_rq *rqd) +{ + return (rqd->nr_ppas > 1) ? rqd->ppa_list : &rqd->ppa_addr; +} + enum { NVM_BLK_ST_FREE = 0x1, /* Free block */ NVM_BLK_ST_TGT = 0x2, /* Block in use by target */ @@ -485,6 +492,144 @@ static inline struct ppa_addr dev_to_generic_addr(struct nvm_dev *dev, return l; } +static inline u64 dev_to_chunk_addr(struct nvm_dev *dev, void *addrf, + struct ppa_addr p) +{ + struct nvm_geo *geo = &dev->geo; + u64 caddr; + + if (geo->version == NVM_OCSSD_SPEC_12) { + struct nvm_addrf_12 *ppaf = (struct nvm_addrf_12 *)addrf; + + caddr = (u64)p.g.pg << ppaf->pg_offset; + caddr |= (u64)p.g.pl << ppaf->pln_offset; + caddr |= (u64)p.g.sec << ppaf->sec_offset; + } else { + caddr = p.m.sec; + } + + return caddr; +} + +static inline struct ppa_addr nvm_ppa32_to_ppa64(struct nvm_dev *dev, + void *addrf, u32 ppa32) +{ + struct ppa_addr ppa64; + + ppa64.ppa = 0; + + if (ppa32 == -1) { + ppa64.ppa = ADDR_EMPTY; + } else if (ppa32 & (1U << 31)) { + ppa64.c.line = ppa32 & ((~0U) >> 1); + ppa64.c.is_cached = 1; + } else { + struct nvm_geo *geo = &dev->geo; + + if (geo->version == NVM_OCSSD_SPEC_12) { + struct nvm_addrf_12 *ppaf = addrf; + + ppa64.g.ch = (ppa32 & ppaf->ch_mask) >> + ppaf->ch_offset; + ppa64.g.lun = (ppa32 & ppaf->lun_mask) >> + ppaf->lun_offset; + ppa64.g.blk = (ppa32 & ppaf->blk_mask) >> + ppaf->blk_offset; + ppa64.g.pg = (ppa32 & ppaf->pg_mask) >> + ppaf->pg_offset; + ppa64.g.pl = (ppa32 & ppaf->pln_mask) >> + ppaf->pln_offset; + ppa64.g.sec = (ppa32 & ppaf->sec_mask) >> + ppaf->sec_offset; + } else { + struct nvm_addrf *lbaf = addrf; + + ppa64.m.grp = (ppa32 & lbaf->ch_mask) >> + lbaf->ch_offset; + ppa64.m.pu = (ppa32 & lbaf->lun_mask) >> + lbaf->lun_offset; + ppa64.m.chk = (ppa32 & lbaf->chk_mask) >> + lbaf->chk_offset; + ppa64.m.sec = (ppa32 & lbaf->sec_mask) >> + lbaf->sec_offset; + } + } + + return ppa64; +} + +static inline u32 nvm_ppa64_to_ppa32(struct nvm_dev *dev, + void *addrf, struct ppa_addr ppa64) +{ + u32 ppa32 = 0; + + if (ppa64.ppa == ADDR_EMPTY) { + ppa32 = ~0U; + } else if (ppa64.c.is_cached) { + ppa32 |= ppa64.c.line; + ppa32 |= 1U << 31; + } else { + struct nvm_geo *geo = &dev->geo; + + if (geo->version == NVM_OCSSD_SPEC_12) { + struct nvm_addrf_12 *ppaf = addrf; + + ppa32 |= ppa64.g.ch << ppaf->ch_offset; + ppa32 |= ppa64.g.lun << ppaf->lun_offset; + ppa32 |= ppa64.g.blk << ppaf->blk_offset; + ppa32 |= ppa64.g.pg << ppaf->pg_offset; + ppa32 |= ppa64.g.pl << ppaf->pln_offset; + ppa32 |= ppa64.g.sec << ppaf->sec_offset; + } else { + struct nvm_addrf *lbaf = addrf; + + ppa32 |= ppa64.m.grp << lbaf->ch_offset; + ppa32 |= ppa64.m.pu << lbaf->lun_offset; + ppa32 |= ppa64.m.chk << lbaf->chk_offset; + ppa32 |= ppa64.m.sec << lbaf->sec_offset; + } + } + + return ppa32; +} + +static inline int nvm_next_ppa_in_chk(struct nvm_tgt_dev *dev, + struct ppa_addr *ppa) +{ + struct nvm_geo *geo = &dev->geo; + int last = 0; + + if (geo->version == NVM_OCSSD_SPEC_12) { + int sec = ppa->g.sec; + + sec++; + if (sec == geo->ws_min) { + int pg = ppa->g.pg; + + sec = 0; + pg++; + if (pg == geo->num_pg) { + int pl = ppa->g.pl; + + pg = 0; + pl++; + if (pl == geo->num_pln) + last = 1; + + ppa->g.pl = pl; + } + ppa->g.pg = pg; + } + ppa->g.sec = sec; + } else { + ppa->m.sec++; + if (ppa->m.sec == geo->clba) + last = 1; + } + + return last; +} + typedef blk_qc_t (nvm_tgt_make_rq_fn)(struct request_queue *, struct bio *); typedef sector_t (nvm_tgt_capacity_fn)(void *); typedef void *(nvm_tgt_init_fn)(struct nvm_tgt_dev *, struct gendisk *, @@ -493,9 +638,15 @@ typedef void (nvm_tgt_exit_fn)(void *, bool); typedef int (nvm_tgt_sysfs_init_fn)(struct gendisk *); typedef void (nvm_tgt_sysfs_exit_fn)(struct gendisk *); +enum { + NVM_TGT_F_DEV_L2P = 0, + NVM_TGT_F_HOST_L2P = 1 << 0, +}; + struct nvm_tgt_type { const char *name; unsigned int version[3]; + int flags; /* target entry points */ nvm_tgt_make_rq_fn *make_rq; @@ -524,18 +675,13 @@ extern struct nvm_dev *nvm_alloc_dev(int); extern int nvm_register(struct nvm_dev *); extern void nvm_unregister(struct nvm_dev *); - -extern int nvm_get_chunk_meta(struct nvm_tgt_dev *tgt_dev, - struct nvm_chk_meta *meta, struct ppa_addr ppa, - int nchks); - -extern int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *, struct ppa_addr *, +extern int nvm_get_chunk_meta(struct nvm_tgt_dev *, struct ppa_addr, + int, struct nvm_chk_meta *); +extern int nvm_set_chunk_meta(struct nvm_tgt_dev *, struct ppa_addr *, int, int); extern int nvm_submit_io(struct nvm_tgt_dev *, struct nvm_rq *); extern int nvm_submit_io_sync(struct nvm_tgt_dev *, struct nvm_rq *); extern void nvm_end_io(struct nvm_rq *); -extern int nvm_bb_tbl_fold(struct nvm_dev *, u8 *, int); -extern int nvm_get_tgt_bb_tbl(struct nvm_tgt_dev *, struct ppa_addr, u8 *); #else /* CONFIG_NVM */ struct nvm_dev_ops; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 66d94b4557cf..88a041b73abf 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1032,6 +1032,14 @@ static inline void *mlx5_frag_buf_get_wqe(struct mlx5_frag_buf_ctrl *fbc, ((fbc->frag_sz_m1 & ix) << fbc->log_stride); } +static inline u32 +mlx5_frag_buf_get_idx_last_contig_stride(struct mlx5_frag_buf_ctrl *fbc, u32 ix) +{ + u32 last_frag_stride_idx = (ix + fbc->strides_offset) | fbc->frag_sz_m1; + + return min_t(u32, last_frag_stride_idx - fbc->strides_offset, fbc->sz_m1); +} + int mlx5_cmd_init(struct mlx5_core_dev *dev); void mlx5_cmd_cleanup(struct mlx5_core_dev *dev); void mlx5_cmd_use_events(struct mlx5_core_dev *dev); diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 3f4c0b167333..d4b0c79d2924 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -668,10 +668,6 @@ typedef struct pglist_data { wait_queue_head_t kcompactd_wait; struct task_struct *kcompactd; #endif -#ifdef CONFIG_NUMA_BALANCING - /* Lock serializing the migrate rate limiting window */ - spinlock_t numabalancing_migrate_lock; -#endif /* * This is a per-node reserve of pages that are not available * to userspace allocations. diff --git a/include/linux/module.h b/include/linux/module.h index f807f15bebbe..e19ae08c7fb8 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -20,6 +20,7 @@ #include <linux/export.h> #include <linux/rbtree_latch.h> #include <linux/error-injection.h> +#include <linux/tracepoint-defs.h> #include <linux/percpu.h> #include <asm/module.h> @@ -430,7 +431,7 @@ struct module { #ifdef CONFIG_TRACEPOINTS unsigned int num_tracepoints; - struct tracepoint * const *tracepoints_ptrs; + tracepoint_ptr_t *tracepoints_ptrs; #endif #ifdef HAVE_JUMP_LABEL struct jump_entry *jump_entries; diff --git a/include/linux/mtd/blktrans.h b/include/linux/mtd/blktrans.h index e93837f647de..1d3ade69d39a 100644 --- a/include/linux/mtd/blktrans.h +++ b/include/linux/mtd/blktrans.h @@ -23,7 +23,6 @@ #include <linux/mutex.h> #include <linux/kref.h> #include <linux/sysfs.h> -#include <linux/workqueue.h> struct hd_geometry; struct mtd_info; @@ -44,9 +43,9 @@ struct mtd_blktrans_dev { struct kref ref; struct gendisk *disk; struct attribute_group *disk_attributes; - struct workqueue_struct *wq; - struct work_struct work; struct request_queue *rq; + struct list_head rq_list; + struct blk_mq_tag_set *tag_set; spinlock_t queue_lock; void *priv; fmode_t file_mode; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c7861e4b402c..d837dad24b4c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2458,6 +2458,13 @@ struct netdev_notifier_info { struct netlink_ext_ack *extack; }; +struct netdev_notifier_info_ext { + struct netdev_notifier_info info; /* must be first */ + union { + u32 mtu; + } ext; +}; + struct netdev_notifier_change_info { struct netdev_notifier_info info; /* must be first */ unsigned int flags_changed; diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 68e91ef5494c..818dbe9331be 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -1241,6 +1241,7 @@ enum { NVME_SC_ANA_PERSISTENT_LOSS = 0x301, NVME_SC_ANA_INACCESSIBLE = 0x302, NVME_SC_ANA_TRANSITION = 0x303, + NVME_SC_HOST_PATH_ERROR = 0x370, NVME_SC_DNR = 0x4000, }; diff --git a/include/linux/of_device.h b/include/linux/of_device.h index 165fd302b442..8d31e39dd564 100644 --- a/include/linux/of_device.h +++ b/include/linux/of_device.h @@ -58,7 +58,6 @@ static inline struct device_node *of_cpu_device_node_get(int cpu) int of_dma_configure(struct device *dev, struct device_node *np, bool force_dma); -void of_dma_deconfigure(struct device *dev); #else /* CONFIG_OF */ static inline int of_driver_match_device(struct device *dev, @@ -113,8 +112,6 @@ static inline int of_dma_configure(struct device *dev, { return 0; } -static inline void of_dma_deconfigure(struct device *dev) -{} #endif /* CONFIG_OF */ #endif /* _LINUX_OF_DEVICE_H */ diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h index 009cdf3d65b6..b297cd1cd4f1 100644 --- a/include/linux/percpu-refcount.h +++ b/include/linux/percpu-refcount.h @@ -108,6 +108,7 @@ void percpu_ref_switch_to_atomic_sync(struct percpu_ref *ref); void percpu_ref_switch_to_percpu(struct percpu_ref *ref); void percpu_ref_kill_and_confirm(struct percpu_ref *ref, percpu_ref_func_t *confirm_kill); +void percpu_ref_resurrect(struct percpu_ref *ref); void percpu_ref_reinit(struct percpu_ref *ref); /** diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 10f92e1d8e7b..bf309ff6f244 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -99,6 +99,7 @@ struct arm_pmu { void (*stop)(struct arm_pmu *); void (*reset)(void *); int (*map_event)(struct perf_event *event); + int (*filter_match)(struct perf_event *event); int num_events; bool secure_access; /* 32-bit ARM only */ #define ARMV8_PMUV3_MAX_COMMON_EVENTS 0x40 diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 5a28ac9284f0..3f529ad9a9d2 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -251,6 +251,7 @@ static inline bool idle_should_enter_s2idle(void) return unlikely(s2idle_state == S2IDLE_STATE_ENTER); } +extern bool pm_suspend_via_s2idle(void); extern void __init pm_states_init(void); extern void s2idle_set_ops(const struct platform_s2idle_ops *ops); extern void s2idle_wake(void); @@ -282,6 +283,7 @@ static inline void pm_set_suspend_via_firmware(void) {} static inline void pm_set_resume_via_firmware(void) {} static inline bool pm_suspend_via_firmware(void) { return false; } static inline bool pm_resume_via_firmware(void) { return false; } +static inline bool pm_suspend_via_s2idle(void) { return false; } static inline void suspend_set_ops(const struct platform_suspend_ops *ops) {} static inline int pm_suspend(suspend_state_t state) { return -ENOSYS; } diff --git a/include/linux/tracepoint-defs.h b/include/linux/tracepoint-defs.h index 22c5a46e9693..49ba9cde7e4b 100644 --- a/include/linux/tracepoint-defs.h +++ b/include/linux/tracepoint-defs.h @@ -35,6 +35,12 @@ struct tracepoint { struct tracepoint_func __rcu *funcs; }; +#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS +typedef const int tracepoint_ptr_t; +#else +typedef struct tracepoint * const tracepoint_ptr_t; +#endif + struct bpf_raw_event_map { struct tracepoint *tp; void *bpf_func; diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 041f7e56a289..538ba1a58f5b 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -99,6 +99,29 @@ extern void syscall_unregfunc(void); #define TRACE_DEFINE_ENUM(x) #define TRACE_DEFINE_SIZEOF(x) +#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS +static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) +{ + return offset_to_ptr(p); +} + +#define __TRACEPOINT_ENTRY(name) \ + asm(" .section \"__tracepoints_ptrs\", \"a\" \n" \ + " .balign 4 \n" \ + " .long __tracepoint_" #name " - . \n" \ + " .previous \n") +#else +static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) +{ + return *p; +} + +#define __TRACEPOINT_ENTRY(name) \ + static tracepoint_ptr_t __tracepoint_ptr_##name __used \ + __attribute__((section("__tracepoints_ptrs"))) = \ + &__tracepoint_##name +#endif + #endif /* _LINUX_TRACEPOINT_H */ /* @@ -253,19 +276,6 @@ extern void syscall_unregfunc(void); return static_key_false(&__tracepoint_##name.key); \ } -#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS -#define __TRACEPOINT_ENTRY(name) \ - asm(" .section \"__tracepoints_ptrs\", \"a\" \n" \ - " .balign 4 \n" \ - " .long __tracepoint_" #name " - . \n" \ - " .previous \n") -#else -#define __TRACEPOINT_ENTRY(name) \ - static struct tracepoint * const __tracepoint_ptr_##name __used \ - __attribute__((section("__tracepoints_ptrs"))) = \ - &__tracepoint_##name -#endif - /* * We have no guarantee that gcc and the linker won't up-align the tracepoint * structures, so we create an array of pointers that will be used for iteration diff --git a/include/linux/writeback.h b/include/linux/writeback.h index fdfd04e348f6..738a0c24874f 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -246,7 +246,8 @@ static inline void wbc_attach_fdatawrite_inode(struct writeback_control *wbc, * * @bio is a part of the writeback in progress controlled by @wbc. Perform * writeback specific initialization. This is used to apply the cgroup - * writeback context. + * writeback context. Must be called after the bio has been associated with + * a device. */ static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio) { @@ -257,7 +258,7 @@ static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio) * regular writeback instead of writing things out itself. */ if (wbc->wb) - bio_associate_blkcg(bio, wbc->wb->blkcg_css); + bio_associate_blkg_from_css(bio, wbc->wb->blkcg_css); } #else /* CONFIG_CGROUP_WRITEBACK */ diff --git a/include/net/devlink.h b/include/net/devlink.h index b9b89d6604d4..99efc156a309 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -298,7 +298,7 @@ struct devlink_resource { #define DEVLINK_RESOURCE_ID_PARENT_TOP 0 -#define DEVLINK_PARAM_MAX_STRING_VALUE 32 +#define __DEVLINK_PARAM_MAX_STRING_VALUE 32 enum devlink_param_type { DEVLINK_PARAM_TYPE_U8, DEVLINK_PARAM_TYPE_U16, @@ -311,7 +311,7 @@ union devlink_param_value { u8 vu8; u16 vu16; u32 vu32; - const char *vstr; + char vstr[__DEVLINK_PARAM_MAX_STRING_VALUE]; bool vbool; }; @@ -553,6 +553,8 @@ int devlink_param_driverinit_value_get(struct devlink *devlink, u32 param_id, int devlink_param_driverinit_value_set(struct devlink *devlink, u32 param_id, union devlink_param_value init_val); void devlink_param_value_changed(struct devlink *devlink, u32 param_id); +void devlink_param_value_str_fill(union devlink_param_value *dst_val, + const char *src); struct devlink_region *devlink_region_create(struct devlink *devlink, const char *region_name, u32 region_max_snapshots, @@ -789,6 +791,12 @@ devlink_param_value_changed(struct devlink *devlink, u32 param_id) { } +static inline void +devlink_param_value_str_fill(union devlink_param_value *dst_val, + const char *src) +{ +} + static inline struct devlink_region * devlink_region_create(struct devlink *devlink, const char *region_name, diff --git a/include/net/dst.h b/include/net/dst.h index 7f735e76ca73..6cf0870414c7 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -527,4 +527,14 @@ static inline void skb_dst_update_pmtu(struct sk_buff *skb, u32 mtu) dst->ops->update_pmtu(dst, NULL, skb, mtu); } +static inline void skb_tunnel_check_pmtu(struct sk_buff *skb, + struct dst_entry *encap_dst, + int headroom) +{ + u32 encap_mtu = dst_mtu(encap_dst); + + if (skb->len > encap_mtu - headroom) + skb_dst_update_pmtu(skb, encap_mtu - headroom); +} + #endif /* _NET_DST_H */ diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 3d4930528db0..2d31e22babd8 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -159,6 +159,10 @@ struct fib6_info { struct rt6_info * __percpu *rt6i_pcpu; struct rt6_exception_bucket __rcu *rt6i_exception_bucket; +#ifdef CONFIG_IPV6_ROUTER_PREF + unsigned long last_probe; +#endif + u32 fib6_metric; u8 fib6_protocol; u8 fib6_type; diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 69c91d1934c1..c9b7b136939d 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -394,6 +394,7 @@ int ip_fib_check_default(__be32 gw, struct net_device *dev); int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force); int fib_sync_down_addr(struct net_device *dev, __be32 local); int fib_sync_up(struct net_device *dev, unsigned int nh_flags); +void fib_sync_mtu(struct net_device *dev, u32 orig_mtu); #ifdef CONFIG_IP_ROUTE_MULTIPATH int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index 5ef1bad81ef5..9e3d32746430 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -347,7 +347,7 @@ static inline __u16 sctp_data_size(struct sctp_chunk *chunk) __u16 size; size = ntohs(chunk->chunk_hdr->length); - size -= sctp_datahdr_len(&chunk->asoc->stream); + size -= sctp_datachk_len(&chunk->asoc->stream); return size; } diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 28a7c8e44636..a11f93790476 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -876,6 +876,8 @@ struct sctp_transport { unsigned long sackdelay; __u32 sackfreq; + atomic_t mtu_info; + /* When was the last time that we heard from this transport? We use * this to pick new active and retran paths. */ diff --git a/include/soc/fsl/bman.h b/include/soc/fsl/bman.h index eaaf56df4086..5b99cb2ea5ef 100644 --- a/include/soc/fsl/bman.h +++ b/include/soc/fsl/bman.h @@ -126,4 +126,12 @@ int bman_release(struct bman_pool *pool, const struct bm_buffer *bufs, u8 num); */ int bman_acquire(struct bman_pool *pool, struct bm_buffer *bufs, u8 num); +/** + * bman_is_probed - Check if bman is probed + * + * Returns 1 if the bman driver successfully probed, -1 if the bman driver + * failed to probe or 0 if the bman driver did not probed yet. + */ +int bman_is_probed(void); + #endif /* __FSL_BMAN_H */ diff --git a/include/soc/fsl/qman.h b/include/soc/fsl/qman.h index d4dfefdee6c1..597783b8a3a0 100644 --- a/include/soc/fsl/qman.h +++ b/include/soc/fsl/qman.h @@ -1186,4 +1186,12 @@ int qman_alloc_cgrid_range(u32 *result, u32 count); */ int qman_release_cgrid(u32 id); +/** + * qman_is_probed - Check if qman is probed + * + * Returns 1 if the qman driver successfully probed, -1 if the qman driver + * failed to probe or 0 if the qman driver did not probed yet. + */ +int qman_is_probed(void); + #endif /* __FSL_QMAN_H */ diff --git a/include/trace/events/kyber.h b/include/trace/events/kyber.h new file mode 100644 index 000000000000..a9834c37ac40 --- /dev/null +++ b/include/trace/events/kyber.h @@ -0,0 +1,96 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM kyber + +#if !defined(_TRACE_KYBER_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_KYBER_H + +#include <linux/blkdev.h> +#include <linux/tracepoint.h> + +#define DOMAIN_LEN 16 +#define LATENCY_TYPE_LEN 8 + +TRACE_EVENT(kyber_latency, + + TP_PROTO(struct request_queue *q, const char *domain, const char *type, + unsigned int percentile, unsigned int numerator, + unsigned int denominator, unsigned int samples), + + TP_ARGS(q, domain, type, percentile, numerator, denominator, samples), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __array( char, domain, DOMAIN_LEN ) + __array( char, type, LATENCY_TYPE_LEN ) + __field( u8, percentile ) + __field( u8, numerator ) + __field( u8, denominator ) + __field( unsigned int, samples ) + ), + + TP_fast_assign( + __entry->dev = disk_devt(dev_to_disk(kobj_to_dev(q->kobj.parent))); + strlcpy(__entry->domain, domain, DOMAIN_LEN); + strlcpy(__entry->type, type, DOMAIN_LEN); + __entry->percentile = percentile; + __entry->numerator = numerator; + __entry->denominator = denominator; + __entry->samples = samples; + ), + + TP_printk("%d,%d %s %s p%u %u/%u samples=%u", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->domain, + __entry->type, __entry->percentile, __entry->numerator, + __entry->denominator, __entry->samples) +); + +TRACE_EVENT(kyber_adjust, + + TP_PROTO(struct request_queue *q, const char *domain, + unsigned int depth), + + TP_ARGS(q, domain, depth), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __array( char, domain, DOMAIN_LEN ) + __field( unsigned int, depth ) + ), + + TP_fast_assign( + __entry->dev = disk_devt(dev_to_disk(kobj_to_dev(q->kobj.parent))); + strlcpy(__entry->domain, domain, DOMAIN_LEN); + __entry->depth = depth; + ), + + TP_printk("%d,%d %s %u", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->domain, + __entry->depth) +); + +TRACE_EVENT(kyber_throttled, + + TP_PROTO(struct request_queue *q, const char *domain), + + TP_ARGS(q, domain), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __array( char, domain, DOMAIN_LEN ) + ), + + TP_fast_assign( + __entry->dev = disk_devt(dev_to_disk(kobj_to_dev(q->kobj.parent))); + strlcpy(__entry->domain, domain, DOMAIN_LEN); + ), + + TP_printk("%d,%d %s", MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->domain) +); + +#define _TRACE_KYBER_H +#endif /* _TRACE_KYBER_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 837393fa897b..573d5b901fb1 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -931,6 +931,7 @@ TRACE_EVENT(rxrpc_tx_packet, TP_fast_assign( __entry->call = call_id; memcpy(&__entry->whdr, whdr, sizeof(__entry->whdr)); + __entry->where = where; ), TP_printk("c=%08x %08x:%08x:%08x:%04x %08x %08x %02x %02x %s %s", diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index b479db5c71d9..34dd3d497f2c 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -301,6 +301,7 @@ enum sctp_sinfo_flags { SCTP_SACK_IMMEDIATELY = (1 << 3), /* SACK should be sent without delay. */ /* 2 bits here have been used by SCTP_PR_SCTP_MASK */ SCTP_SENDALL = (1 << 6), + SCTP_PR_SCTP_ALL = (1 << 7), SCTP_NOTIFICATION = MSG_NOTIFICATION, /* Next message is not user msg but notification. */ SCTP_EOF = MSG_FIN, /* Initiate graceful shutdown process. */ }; diff --git a/include/uapi/linux/smc_diag.h b/include/uapi/linux/smc_diag.h index ac9e8c96d9bd..8cb3a6fef553 100644 --- a/include/uapi/linux/smc_diag.h +++ b/include/uapi/linux/smc_diag.h @@ -18,14 +18,17 @@ struct smc_diag_req { * on the internal clcsock, and more SMC-related socket data */ struct smc_diag_msg { - __u8 diag_family; - __u8 diag_state; - __u8 diag_mode; - __u8 diag_shutdown; + __u8 diag_family; + __u8 diag_state; + union { + __u8 diag_mode; + __u8 diag_fallback; /* the old name of the field */ + }; + __u8 diag_shutdown; struct inet_diag_sockid id; - __u32 diag_uid; - __u64 diag_inode; + __u32 diag_uid; + __aligned_u64 diag_inode; }; /* Mode of a connection */ @@ -99,11 +102,11 @@ struct smc_diag_fallback { }; struct smcd_diag_dmbinfo { /* SMC-D Socket internals */ - __u32 linkid; /* Link identifier */ - __u64 peer_gid; /* Peer GID */ - __u64 my_gid; /* My GID */ - __u64 token; /* Token of DMB */ - __u64 peer_token; /* Token of remote DMBE */ + __u32 linkid; /* Link identifier */ + __aligned_u64 peer_gid; /* Peer GID */ + __aligned_u64 my_gid; /* My GID */ + __aligned_u64 token; /* Token of DMB */ + __aligned_u64 peer_token; /* Token of remote DMBE */ }; #endif /* _UAPI_SMC_DIAG_H_ */ diff --git a/include/uapi/linux/udp.h b/include/uapi/linux/udp.h index 09d00f8c442b..09502de447f5 100644 --- a/include/uapi/linux/udp.h +++ b/include/uapi/linux/udp.h @@ -40,5 +40,6 @@ struct udphdr { #define UDP_ENCAP_L2TPINUDP 3 /* rfc2661 */ #define UDP_ENCAP_GTP0 4 /* GSM TS 09.60 */ #define UDP_ENCAP_GTP1U 5 /* 3GPP TS 29.060 */ +#define UDP_ENCAP_RXRPC 6 #endif /* _UAPI_LINUX_UDP_H */ diff --git a/include/xen/xen.h b/include/xen/xen.h index 1e1d9bd0bd37..d7a2678da77f 100644 --- a/include/xen/xen.h +++ b/include/xen/xen.h @@ -39,4 +39,8 @@ extern uint32_t xen_start_flags; #define xen_initial_domain() (0) #endif /* CONFIG_XEN_DOM0 */ +struct bio_vec; +bool xen_biovec_phys_mergeable(const struct bio_vec *vec1, + const struct bio_vec *vec2); + #endif /* _XEN_XEN_H */ diff --git a/kernel/bpf/xskmap.c b/kernel/bpf/xskmap.c index 9f8463afda9c..47147c9e184d 100644 --- a/kernel/bpf/xskmap.c +++ b/kernel/bpf/xskmap.c @@ -192,11 +192,8 @@ static int xsk_map_update_elem(struct bpf_map *map, void *key, void *value, sock_hold(sock->sk); old_xs = xchg(&m->xsk_map[i], xs); - if (old_xs) { - /* Make sure we've flushed everything. */ - synchronize_net(); + if (old_xs) sock_put((struct sock *)old_xs); - } sockfd_put(sock); return 0; @@ -212,11 +209,8 @@ static int xsk_map_delete_elem(struct bpf_map *map, void *key) return -EINVAL; old_xs = xchg(&m->xsk_map[k], NULL); - if (old_xs) { - /* Make sure we've flushed everything. */ - synchronize_net(); + if (old_xs) sock_put((struct sock *)old_xs); - } return 0; } diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index aae10baf1902..4c1cf0969a80 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -492,7 +492,7 @@ static struct cgroup_subsys_state *cgroup_tryget_css(struct cgroup *cgrp, } /** - * cgroup_e_css - obtain a cgroup's effective css for the specified subsystem + * cgroup_e_css_by_mask - obtain a cgroup's effective css for the specified ss * @cgrp: the cgroup of interest * @ss: the subsystem of interest (%NULL returns @cgrp->self) * @@ -501,8 +501,8 @@ static struct cgroup_subsys_state *cgroup_tryget_css(struct cgroup *cgrp, * enabled. If @ss is associated with the hierarchy @cgrp is on, this * function is guaranteed to return non-NULL css. */ -static struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp, - struct cgroup_subsys *ss) +static struct cgroup_subsys_state *cgroup_e_css_by_mask(struct cgroup *cgrp, + struct cgroup_subsys *ss) { lockdep_assert_held(&cgroup_mutex); @@ -523,6 +523,35 @@ static struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp, } /** + * cgroup_e_css - obtain a cgroup's effective css for the specified subsystem + * @cgrp: the cgroup of interest + * @ss: the subsystem of interest + * + * Find and get the effective css of @cgrp for @ss. The effective css is + * defined as the matching css of the nearest ancestor including self which + * has @ss enabled. If @ss is not mounted on the hierarchy @cgrp is on, + * the root css is returned, so this function always returns a valid css. + * + * The returned css is not guaranteed to be online, and therefore it is the + * callers responsiblity to tryget a reference for it. + */ +struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp, + struct cgroup_subsys *ss) +{ + struct cgroup_subsys_state *css; + + do { + css = cgroup_css(cgrp, ss); + + if (css) + return css; + cgrp = cgroup_parent(cgrp); + } while (cgrp); + + return init_css_set.subsys[ss->id]; +} + +/** * cgroup_get_e_css - get a cgroup's effective css for the specified subsystem * @cgrp: the cgroup of interest * @ss: the subsystem of interest @@ -604,10 +633,11 @@ EXPORT_SYMBOL_GPL(of_css); * * Should be called under cgroup_[tree_]mutex. */ -#define for_each_e_css(css, ssid, cgrp) \ - for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++) \ - if (!((css) = cgroup_e_css(cgrp, cgroup_subsys[(ssid)]))) \ - ; \ +#define for_each_e_css(css, ssid, cgrp) \ + for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++) \ + if (!((css) = cgroup_e_css_by_mask(cgrp, \ + cgroup_subsys[(ssid)]))) \ + ; \ else /** @@ -1006,7 +1036,7 @@ static struct css_set *find_existing_css_set(struct css_set *old_cset, * @ss is in this hierarchy, so we want the * effective css from @cgrp. */ - template[i] = cgroup_e_css(cgrp, ss); + template[i] = cgroup_e_css_by_mask(cgrp, ss); } else { /* * @ss is not in this hierarchy, so we don't want @@ -2836,11 +2866,12 @@ restart: } /** - * cgroup_save_control - save control masks of a subtree + * cgroup_save_control - save control masks and dom_cgrp of a subtree * @cgrp: root of the target subtree * - * Save ->subtree_control and ->subtree_ss_mask to the respective old_ - * prefixed fields for @cgrp's subtree including @cgrp itself. + * Save ->subtree_control, ->subtree_ss_mask and ->dom_cgrp to the + * respective old_ prefixed fields for @cgrp's subtree including @cgrp + * itself. */ static void cgroup_save_control(struct cgroup *cgrp) { @@ -2850,6 +2881,7 @@ static void cgroup_save_control(struct cgroup *cgrp) cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) { dsct->old_subtree_control = dsct->subtree_control; dsct->old_subtree_ss_mask = dsct->subtree_ss_mask; + dsct->old_dom_cgrp = dsct->dom_cgrp; } } @@ -2875,11 +2907,12 @@ static void cgroup_propagate_control(struct cgroup *cgrp) } /** - * cgroup_restore_control - restore control masks of a subtree + * cgroup_restore_control - restore control masks and dom_cgrp of a subtree * @cgrp: root of the target subtree * - * Restore ->subtree_control and ->subtree_ss_mask from the respective old_ - * prefixed fields for @cgrp's subtree including @cgrp itself. + * Restore ->subtree_control, ->subtree_ss_mask and ->dom_cgrp from the + * respective old_ prefixed fields for @cgrp's subtree including @cgrp + * itself. */ static void cgroup_restore_control(struct cgroup *cgrp) { @@ -2889,6 +2922,7 @@ static void cgroup_restore_control(struct cgroup *cgrp) cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) { dsct->subtree_control = dsct->old_subtree_control; dsct->subtree_ss_mask = dsct->old_subtree_ss_mask; + dsct->dom_cgrp = dsct->old_dom_cgrp; } } @@ -3019,7 +3053,7 @@ static int cgroup_apply_control(struct cgroup *cgrp) return ret; /* - * At this point, cgroup_e_css() results reflect the new csses + * At this point, cgroup_e_css_by_mask() results reflect the new csses * making the following cgroup_update_dfl_csses() properly update * css associations of all tasks in the subtree. */ @@ -3196,6 +3230,8 @@ static int cgroup_enable_threaded(struct cgroup *cgrp) { struct cgroup *parent = cgroup_parent(cgrp); struct cgroup *dom_cgrp = parent->dom_cgrp; + struct cgroup *dsct; + struct cgroup_subsys_state *d_css; int ret; lockdep_assert_held(&cgroup_mutex); @@ -3225,12 +3261,13 @@ static int cgroup_enable_threaded(struct cgroup *cgrp) */ cgroup_save_control(cgrp); - cgrp->dom_cgrp = dom_cgrp; + cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) + if (dsct == cgrp || cgroup_is_threaded(dsct)) + dsct->dom_cgrp = dom_cgrp; + ret = cgroup_apply_control(cgrp); if (!ret) parent->nr_threaded_children++; - else - cgrp->dom_cgrp = cgrp; cgroup_finalize_control(cgrp, ret); return ret; diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig index 1b1d63b3634b..645c7a2ecde8 100644 --- a/kernel/dma/Kconfig +++ b/kernel/dma/Kconfig @@ -13,6 +13,9 @@ config NEED_DMA_MAP_STATE config ARCH_DMA_ADDR_T_64BIT def_bool 64BIT || PHYS_ADDR_T_64BIT +config ARCH_HAS_DMA_COHERENCE_H + bool + config HAVE_GENERIC_DMA_COHERENT bool @@ -26,22 +29,19 @@ config ARCH_HAS_SYNC_DMA_FOR_CPU config ARCH_HAS_SYNC_DMA_FOR_CPU_ALL bool -config DMA_DIRECT_OPS +config ARCH_HAS_DMA_COHERENT_TO_PFN bool - depends on HAS_DMA -config DMA_NONCOHERENT_OPS +config ARCH_HAS_DMA_MMAP_PGPROT bool - depends on HAS_DMA - select DMA_DIRECT_OPS -config DMA_NONCOHERENT_MMAP +config DMA_DIRECT_OPS bool - depends on DMA_NONCOHERENT_OPS + depends on HAS_DMA config DMA_NONCOHERENT_CACHE_SYNC bool - depends on DMA_NONCOHERENT_OPS + depends on DMA_DIRECT_OPS config DMA_VIRT_OPS bool diff --git a/kernel/dma/Makefile b/kernel/dma/Makefile index 6de44e4eb454..7d581e4eea4a 100644 --- a/kernel/dma/Makefile +++ b/kernel/dma/Makefile @@ -4,7 +4,6 @@ obj-$(CONFIG_HAS_DMA) += mapping.o obj-$(CONFIG_DMA_CMA) += contiguous.o obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += coherent.o obj-$(CONFIG_DMA_DIRECT_OPS) += direct.o -obj-$(CONFIG_DMA_NONCOHERENT_OPS) += noncoherent.o obj-$(CONFIG_DMA_VIRT_OPS) += virt.o obj-$(CONFIG_DMA_API_DEBUG) += debug.o obj-$(CONFIG_SWIOTLB) += swiotlb.o diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c index 286d82329eb0..b2a87905846d 100644 --- a/kernel/dma/contiguous.c +++ b/kernel/dma/contiguous.c @@ -49,7 +49,11 @@ static phys_addr_t limit_cmdline; static int __init early_cma(char *p) { - pr_debug("%s(%s)\n", __func__, p); + if (!p) { + pr_err("Config string not provided\n"); + return -EINVAL; + } + size_cmdline = memparse(p, &p); if (*p != '@') return 0; diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c index c007d25bee09..231ca4628062 100644 --- a/kernel/dma/debug.c +++ b/kernel/dma/debug.c @@ -1312,6 +1312,22 @@ static void check_sg_segment(struct device *dev, struct scatterlist *sg) #endif } +void debug_dma_map_single(struct device *dev, const void *addr, + unsigned long len) +{ + if (unlikely(dma_debug_disabled())) + return; + + if (!virt_addr_valid(addr)) + err_printk(dev, NULL, "DMA-API: device driver maps memory from invalid area [addr=%p] [len=%lu]\n", + addr, len); + + if (is_vmalloc_addr(addr)) + err_printk(dev, NULL, "DMA-API: device driver maps memory from vmalloc area [addr=%p] [len=%lu]\n", + addr, len); +} +EXPORT_SYMBOL(debug_dma_map_single); + void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, size_t size, int direction, dma_addr_t dma_addr, bool map_single) diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index de87b0282e74..87a6bc2a96c0 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -1,13 +1,16 @@ // SPDX-License-Identifier: GPL-2.0 /* - * DMA operations that map physical memory directly without using an IOMMU or - * flushing caches. + * Copyright (C) 2018 Christoph Hellwig. + * + * DMA operations that map physical memory directly without using an IOMMU. */ +#include <linux/bootmem.h> /* for max_pfn */ #include <linux/export.h> #include <linux/mm.h> #include <linux/dma-direct.h> #include <linux/scatterlist.h> #include <linux/dma-contiguous.h> +#include <linux/dma-noncoherent.h> #include <linux/pfn.h> #include <linux/set_memory.h> @@ -41,40 +44,83 @@ check_addr(struct device *dev, dma_addr_t dma_addr, size_t size, return false; } - if (*dev->dma_mask >= DMA_BIT_MASK(32)) { + if (*dev->dma_mask >= DMA_BIT_MASK(32) || dev->bus_dma_mask) { dev_err(dev, - "%s: overflow %pad+%zu of device mask %llx\n", - caller, &dma_addr, size, *dev->dma_mask); + "%s: overflow %pad+%zu of device mask %llx bus mask %llx\n", + caller, &dma_addr, size, + *dev->dma_mask, dev->bus_dma_mask); } return false; } return true; } +static inline dma_addr_t phys_to_dma_direct(struct device *dev, + phys_addr_t phys) +{ + if (force_dma_unencrypted()) + return __phys_to_dma(dev, phys); + return phys_to_dma(dev, phys); +} + +u64 dma_direct_get_required_mask(struct device *dev) +{ + u64 max_dma = phys_to_dma_direct(dev, (max_pfn - 1) << PAGE_SHIFT); + + if (dev->bus_dma_mask && dev->bus_dma_mask < max_dma) + max_dma = dev->bus_dma_mask; + + return (1ULL << (fls64(max_dma) - 1)) * 2 - 1; +} + +static gfp_t __dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask, + u64 *phys_mask) +{ + if (dev->bus_dma_mask && dev->bus_dma_mask < dma_mask) + dma_mask = dev->bus_dma_mask; + + if (force_dma_unencrypted()) + *phys_mask = __dma_to_phys(dev, dma_mask); + else + *phys_mask = dma_to_phys(dev, dma_mask); + + /* + * Optimistically try the zone that the physical address mask falls + * into first. If that returns memory that isn't actually addressable + * we will fallback to the next lower zone and try again. + * + * Note that GFP_DMA32 and GFP_DMA are no ops without the corresponding + * zones. + */ + if (*phys_mask <= DMA_BIT_MASK(ARCH_ZONE_DMA_BITS)) + return GFP_DMA; + if (*phys_mask <= DMA_BIT_MASK(32)) + return GFP_DMA32; + return 0; +} + static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size) { - dma_addr_t addr = force_dma_unencrypted() ? - __phys_to_dma(dev, phys) : phys_to_dma(dev, phys); - return addr + size - 1 <= dev->coherent_dma_mask; + return phys_to_dma_direct(dev, phys) + size - 1 <= + min_not_zero(dev->coherent_dma_mask, dev->bus_dma_mask); } -void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, - gfp_t gfp, unsigned long attrs) +void *dma_direct_alloc_pages(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) { unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; int page_order = get_order(size); struct page *page = NULL; + u64 phys_mask; void *ret; + if (attrs & DMA_ATTR_NO_WARN) + gfp |= __GFP_NOWARN; + /* we always manually zero the memory once we are done: */ gfp &= ~__GFP_ZERO; - - /* GFP_DMA32 and GFP_DMA are no ops without the corresponding zones: */ - if (dev->coherent_dma_mask <= DMA_BIT_MASK(ARCH_ZONE_DMA_BITS)) - gfp |= GFP_DMA; - if (dev->coherent_dma_mask <= DMA_BIT_MASK(32) && !(gfp & GFP_DMA)) - gfp |= GFP_DMA32; - + gfp |= __dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask, + &phys_mask); again: /* CMA can be used only in the context which permits sleeping */ if (gfpflags_allow_blocking(gfp)) { @@ -93,15 +139,14 @@ again: page = NULL; if (IS_ENABLED(CONFIG_ZONE_DMA32) && - dev->coherent_dma_mask < DMA_BIT_MASK(64) && + phys_mask < DMA_BIT_MASK(64) && !(gfp & (GFP_DMA32 | GFP_DMA))) { gfp |= GFP_DMA32; goto again; } if (IS_ENABLED(CONFIG_ZONE_DMA) && - dev->coherent_dma_mask < DMA_BIT_MASK(32) && - !(gfp & GFP_DMA)) { + phys_mask < DMA_BIT_MASK(32) && !(gfp & GFP_DMA)) { gfp = (gfp & ~GFP_DMA32) | GFP_DMA; goto again; } @@ -124,7 +169,7 @@ again: * NOTE: this function must never look at the dma_addr argument, because we want * to be able to use it as a helper for iommu implementations as well. */ -void dma_direct_free(struct device *dev, size_t size, void *cpu_addr, +void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs) { unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; @@ -136,14 +181,96 @@ void dma_direct_free(struct device *dev, size_t size, void *cpu_addr, free_pages((unsigned long)cpu_addr, page_order); } +void *dma_direct_alloc(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) +{ + if (!dev_is_dma_coherent(dev)) + return arch_dma_alloc(dev, size, dma_handle, gfp, attrs); + return dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs); +} + +void dma_direct_free(struct device *dev, size_t size, + void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs) +{ + if (!dev_is_dma_coherent(dev)) + arch_dma_free(dev, size, cpu_addr, dma_addr, attrs); + else + dma_direct_free_pages(dev, size, cpu_addr, dma_addr, attrs); +} + +static void dma_direct_sync_single_for_device(struct device *dev, + dma_addr_t addr, size_t size, enum dma_data_direction dir) +{ + if (dev_is_dma_coherent(dev)) + return; + arch_sync_dma_for_device(dev, dma_to_phys(dev, addr), size, dir); +} + +static void dma_direct_sync_sg_for_device(struct device *dev, + struct scatterlist *sgl, int nents, enum dma_data_direction dir) +{ + struct scatterlist *sg; + int i; + + if (dev_is_dma_coherent(dev)) + return; + + for_each_sg(sgl, sg, nents, i) + arch_sync_dma_for_device(dev, sg_phys(sg), sg->length, dir); +} + +#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ + defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) +static void dma_direct_sync_single_for_cpu(struct device *dev, + dma_addr_t addr, size_t size, enum dma_data_direction dir) +{ + if (dev_is_dma_coherent(dev)) + return; + arch_sync_dma_for_cpu(dev, dma_to_phys(dev, addr), size, dir); + arch_sync_dma_for_cpu_all(dev); +} + +static void dma_direct_sync_sg_for_cpu(struct device *dev, + struct scatterlist *sgl, int nents, enum dma_data_direction dir) +{ + struct scatterlist *sg; + int i; + + if (dev_is_dma_coherent(dev)) + return; + + for_each_sg(sgl, sg, nents, i) + arch_sync_dma_for_cpu(dev, sg_phys(sg), sg->length, dir); + arch_sync_dma_for_cpu_all(dev); +} + +static void dma_direct_unmap_page(struct device *dev, dma_addr_t addr, + size_t size, enum dma_data_direction dir, unsigned long attrs) +{ + if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) + dma_direct_sync_single_for_cpu(dev, addr, size, dir); +} + +static void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction dir, unsigned long attrs) +{ + if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) + dma_direct_sync_sg_for_cpu(dev, sgl, nents, dir); +} +#endif + dma_addr_t dma_direct_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, unsigned long attrs) { - dma_addr_t dma_addr = phys_to_dma(dev, page_to_phys(page)) + offset; + phys_addr_t phys = page_to_phys(page) + offset; + dma_addr_t dma_addr = phys_to_dma(dev, phys); if (!check_addr(dev, dma_addr, size, __func__)) return DIRECT_MAPPING_ERROR; + + if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) + dma_direct_sync_single_for_device(dev, dma_addr, size, dir); return dma_addr; } @@ -162,31 +289,29 @@ int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents, sg_dma_len(sg) = sg->length; } + if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) + dma_direct_sync_sg_for_device(dev, sgl, nents, dir); return nents; } +/* + * Because 32-bit DMA masks are so common we expect every architecture to be + * able to satisfy them - either by not supporting more physical memory, or by + * providing a ZONE_DMA32. If neither is the case, the architecture needs to + * use an IOMMU instead of the direct mapping. + */ int dma_direct_supported(struct device *dev, u64 mask) { -#ifdef CONFIG_ZONE_DMA - if (mask < phys_to_dma(dev, DMA_BIT_MASK(ARCH_ZONE_DMA_BITS))) - return 0; -#else - /* - * Because 32-bit DMA masks are so common we expect every architecture - * to be able to satisfy them - either by not supporting more physical - * memory, or by providing a ZONE_DMA32. If neither is the case, the - * architecture needs to use an IOMMU instead of the direct mapping. - */ - if (mask < phys_to_dma(dev, DMA_BIT_MASK(32))) - return 0; -#endif - /* - * Upstream PCI/PCIe bridges or SoC interconnects may not carry - * as many DMA address bits as the device itself supports. - */ - if (dev->bus_dma_mask && mask > dev->bus_dma_mask) - return 0; - return 1; + u64 min_mask; + + if (IS_ENABLED(CONFIG_ZONE_DMA)) + min_mask = DMA_BIT_MASK(ARCH_ZONE_DMA_BITS); + else + min_mask = DMA_BIT_MASK(32); + + min_mask = min_t(u64, min_mask, (max_pfn - 1) << PAGE_SHIFT); + + return mask >= phys_to_dma(dev, min_mask); } int dma_direct_mapping_error(struct device *dev, dma_addr_t dma_addr) @@ -199,7 +324,20 @@ const struct dma_map_ops dma_direct_ops = { .free = dma_direct_free, .map_page = dma_direct_map_page, .map_sg = dma_direct_map_sg, +#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) + .sync_single_for_device = dma_direct_sync_single_for_device, + .sync_sg_for_device = dma_direct_sync_sg_for_device, +#endif +#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ + defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) + .sync_single_for_cpu = dma_direct_sync_single_for_cpu, + .sync_sg_for_cpu = dma_direct_sync_sg_for_cpu, + .unmap_page = dma_direct_unmap_page, + .unmap_sg = dma_direct_unmap_sg, +#endif + .get_required_mask = dma_direct_get_required_mask, .dma_supported = dma_direct_supported, .mapping_error = dma_direct_mapping_error, + .cache_sync = arch_dma_cache_sync, }; EXPORT_SYMBOL(dma_direct_ops); diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index d2a92ddaac4d..58dec7a92b7b 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -7,7 +7,7 @@ */ #include <linux/acpi.h> -#include <linux/dma-mapping.h> +#include <linux/dma-noncoherent.h> #include <linux/export.h> #include <linux/gfp.h> #include <linux/of_device.h> @@ -202,17 +202,26 @@ EXPORT_SYMBOL(dmam_release_declared_memory); * Create scatter-list for the already allocated DMA buffer. */ int dma_common_get_sgtable(struct device *dev, struct sg_table *sgt, - void *cpu_addr, dma_addr_t handle, size_t size) + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs) { - struct page *page = virt_to_page(cpu_addr); + struct page *page; int ret; - ret = sg_alloc_table(sgt, 1, GFP_KERNEL); - if (unlikely(ret)) - return ret; + if (!dev_is_dma_coherent(dev)) { + if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_COHERENT_TO_PFN)) + return -ENXIO; - sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0); - return 0; + page = pfn_to_page(arch_dma_coherent_to_pfn(dev, cpu_addr, + dma_addr)); + } else { + page = virt_to_page(cpu_addr); + } + + ret = sg_alloc_table(sgt, 1, GFP_KERNEL); + if (!ret) + sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0); + return ret; } EXPORT_SYMBOL(dma_common_get_sgtable); @@ -220,27 +229,37 @@ EXPORT_SYMBOL(dma_common_get_sgtable); * Create userspace mapping for the DMA-coherent memory. */ int dma_common_mmap(struct device *dev, struct vm_area_struct *vma, - void *cpu_addr, dma_addr_t dma_addr, size_t size) + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs) { - int ret = -ENXIO; #ifndef CONFIG_ARCH_NO_COHERENT_DMA_MMAP unsigned long user_count = vma_pages(vma); unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT; unsigned long off = vma->vm_pgoff; + unsigned long pfn; + int ret = -ENXIO; - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + vma->vm_page_prot = arch_dma_mmap_pgprot(dev, vma->vm_page_prot, attrs); if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret)) return ret; - if (off < count && user_count <= (count - off)) - ret = remap_pfn_range(vma, vma->vm_start, - page_to_pfn(virt_to_page(cpu_addr)) + off, - user_count << PAGE_SHIFT, - vma->vm_page_prot); -#endif /* !CONFIG_ARCH_NO_COHERENT_DMA_MMAP */ + if (off >= count || user_count > count - off) + return -ENXIO; - return ret; + if (!dev_is_dma_coherent(dev)) { + if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_COHERENT_TO_PFN)) + return -ENXIO; + pfn = arch_dma_coherent_to_pfn(dev, cpu_addr, dma_addr); + } else { + pfn = page_to_pfn(virt_to_page(cpu_addr)); + } + + return remap_pfn_range(vma, vma->vm_start, pfn + vma->vm_pgoff, + user_count << PAGE_SHIFT, vma->vm_page_prot); +#else + return -ENXIO; +#endif /* !CONFIG_ARCH_NO_COHERENT_DMA_MMAP */ } EXPORT_SYMBOL(dma_common_mmap); @@ -327,19 +346,3 @@ void dma_common_free_remap(void *cpu_addr, size_t size, unsigned long vm_flags) vunmap(cpu_addr); } #endif - -/* - * enables DMA API use for a device - */ -int dma_configure(struct device *dev) -{ - if (dev->bus->dma_configure) - return dev->bus->dma_configure(dev); - return 0; -} - -void dma_deconfigure(struct device *dev) -{ - of_dma_deconfigure(dev); - acpi_dma_deconfigure(dev); -} diff --git a/kernel/dma/noncoherent.c b/kernel/dma/noncoherent.c deleted file mode 100644 index 031fe235d958..000000000000 --- a/kernel/dma/noncoherent.c +++ /dev/null @@ -1,106 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2018 Christoph Hellwig. - * - * DMA operations that map physical memory directly without providing cache - * coherence. - */ -#include <linux/export.h> -#include <linux/mm.h> -#include <linux/dma-direct.h> -#include <linux/dma-noncoherent.h> -#include <linux/scatterlist.h> - -static void dma_noncoherent_sync_single_for_device(struct device *dev, - dma_addr_t addr, size_t size, enum dma_data_direction dir) -{ - arch_sync_dma_for_device(dev, dma_to_phys(dev, addr), size, dir); -} - -static void dma_noncoherent_sync_sg_for_device(struct device *dev, - struct scatterlist *sgl, int nents, enum dma_data_direction dir) -{ - struct scatterlist *sg; - int i; - - for_each_sg(sgl, sg, nents, i) - arch_sync_dma_for_device(dev, sg_phys(sg), sg->length, dir); -} - -static dma_addr_t dma_noncoherent_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, enum dma_data_direction dir, - unsigned long attrs) -{ - dma_addr_t addr; - - addr = dma_direct_map_page(dev, page, offset, size, dir, attrs); - if (!dma_mapping_error(dev, addr) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - arch_sync_dma_for_device(dev, page_to_phys(page) + offset, - size, dir); - return addr; -} - -static int dma_noncoherent_map_sg(struct device *dev, struct scatterlist *sgl, - int nents, enum dma_data_direction dir, unsigned long attrs) -{ - nents = dma_direct_map_sg(dev, sgl, nents, dir, attrs); - if (nents > 0 && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - dma_noncoherent_sync_sg_for_device(dev, sgl, nents, dir); - return nents; -} - -#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ - defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) -static void dma_noncoherent_sync_single_for_cpu(struct device *dev, - dma_addr_t addr, size_t size, enum dma_data_direction dir) -{ - arch_sync_dma_for_cpu(dev, dma_to_phys(dev, addr), size, dir); - arch_sync_dma_for_cpu_all(dev); -} - -static void dma_noncoherent_sync_sg_for_cpu(struct device *dev, - struct scatterlist *sgl, int nents, enum dma_data_direction dir) -{ - struct scatterlist *sg; - int i; - - for_each_sg(sgl, sg, nents, i) - arch_sync_dma_for_cpu(dev, sg_phys(sg), sg->length, dir); - arch_sync_dma_for_cpu_all(dev); -} - -static void dma_noncoherent_unmap_page(struct device *dev, dma_addr_t addr, - size_t size, enum dma_data_direction dir, unsigned long attrs) -{ - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - dma_noncoherent_sync_single_for_cpu(dev, addr, size, dir); -} - -static void dma_noncoherent_unmap_sg(struct device *dev, struct scatterlist *sgl, - int nents, enum dma_data_direction dir, unsigned long attrs) -{ - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - dma_noncoherent_sync_sg_for_cpu(dev, sgl, nents, dir); -} -#endif - -const struct dma_map_ops dma_noncoherent_ops = { - .alloc = arch_dma_alloc, - .free = arch_dma_free, - .mmap = arch_dma_mmap, - .sync_single_for_device = dma_noncoherent_sync_single_for_device, - .sync_sg_for_device = dma_noncoherent_sync_sg_for_device, - .map_page = dma_noncoherent_map_page, - .map_sg = dma_noncoherent_map_sg, -#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ - defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) - .sync_single_for_cpu = dma_noncoherent_sync_single_for_cpu, - .sync_sg_for_cpu = dma_noncoherent_sync_sg_for_cpu, - .unmap_page = dma_noncoherent_unmap_page, - .unmap_sg = dma_noncoherent_unmap_sg, -#endif - .dma_supported = dma_direct_supported, - .mapping_error = dma_direct_mapping_error, - .cache_sync = arch_dma_cache_sync, -}; -EXPORT_SYMBOL(dma_noncoherent_ops); diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 5342f6fc022e..0bd595a0b610 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -63,6 +63,12 @@ static DECLARE_SWAIT_QUEUE_HEAD(s2idle_wait_head); enum s2idle_states __read_mostly s2idle_state; static DEFINE_RAW_SPINLOCK(s2idle_lock); +bool pm_suspend_via_s2idle(void) +{ + return mem_sleep_current == PM_SUSPEND_TO_IDLE; +} +EXPORT_SYMBOL_GPL(pm_suspend_via_s2idle); + void s2idle_set_ops(const struct platform_s2idle_ops *ops) { lock_system_sleep(); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 7fc4a371bdd2..908c9cdae2f0 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4001,7 +4001,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) * put back on, and if we advance min_vruntime, we'll be placed back * further than we started -- ie. we'll be penalized. */ - if ((flags & (DEQUEUE_SAVE | DEQUEUE_MOVE)) == DEQUEUE_SAVE) + if ((flags & (DEQUEUE_SAVE | DEQUEUE_MOVE)) != DEQUEUE_SAVE) update_min_vruntime(cfs_rq); } @@ -4476,9 +4476,13 @@ static void throttle_cfs_rq(struct cfs_rq *cfs_rq) /* * Add to the _head_ of the list, so that an already-started - * distribute_cfs_runtime will not see us + * distribute_cfs_runtime will not see us. If disribute_cfs_runtime is + * not running add to the tail so that later runqueues don't get starved. */ - list_add_rcu(&cfs_rq->throttled_list, &cfs_b->throttled_cfs_rq); + if (cfs_b->distribute_running) + list_add_rcu(&cfs_rq->throttled_list, &cfs_b->throttled_cfs_rq); + else + list_add_tail_rcu(&cfs_rq->throttled_list, &cfs_b->throttled_cfs_rq); /* * If we're the first throttled task, make sure the bandwidth @@ -4622,14 +4626,16 @@ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun) * in us over-using our runtime if it is all used during this loop, but * only by limited amounts in that extreme case. */ - while (throttled && cfs_b->runtime > 0) { + while (throttled && cfs_b->runtime > 0 && !cfs_b->distribute_running) { runtime = cfs_b->runtime; + cfs_b->distribute_running = 1; raw_spin_unlock(&cfs_b->lock); /* we can't nest cfs_b->lock while distributing bandwidth */ runtime = distribute_cfs_runtime(cfs_b, runtime, runtime_expires); raw_spin_lock(&cfs_b->lock); + cfs_b->distribute_running = 0; throttled = !list_empty(&cfs_b->throttled_cfs_rq); cfs_b->runtime -= min(runtime, cfs_b->runtime); @@ -4740,6 +4746,11 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b) /* confirm we're still not at a refresh boundary */ raw_spin_lock(&cfs_b->lock); + if (cfs_b->distribute_running) { + raw_spin_unlock(&cfs_b->lock); + return; + } + if (runtime_refresh_within(cfs_b, min_bandwidth_expiration)) { raw_spin_unlock(&cfs_b->lock); return; @@ -4749,6 +4760,9 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b) runtime = cfs_b->runtime; expires = cfs_b->runtime_expires; + if (runtime) + cfs_b->distribute_running = 1; + raw_spin_unlock(&cfs_b->lock); if (!runtime) @@ -4759,6 +4773,7 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b) raw_spin_lock(&cfs_b->lock); if (expires == cfs_b->runtime_expires) cfs_b->runtime -= min(runtime, cfs_b->runtime); + cfs_b->distribute_running = 0; raw_spin_unlock(&cfs_b->lock); } @@ -4867,6 +4882,7 @@ void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b) cfs_b->period_timer.function = sched_cfs_period_timer; hrtimer_init(&cfs_b->slack_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); cfs_b->slack_timer.function = sched_cfs_slack_timer; + cfs_b->distribute_running = 0; } static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq) diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 455fa330de04..9683f458aec7 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -346,6 +346,8 @@ struct cfs_bandwidth { int nr_periods; int nr_throttled; u64 throttled_time; + + bool distribute_running; #endif }; diff --git a/kernel/signal.c b/kernel/signal.c index 5843c541fda9..e4aad0e90882 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -3460,7 +3460,8 @@ int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact) } static int -do_sigaltstack (const stack_t *ss, stack_t *oss, unsigned long sp) +do_sigaltstack (const stack_t *ss, stack_t *oss, unsigned long sp, + size_t min_ss_size) { struct task_struct *t = current; @@ -3490,7 +3491,7 @@ do_sigaltstack (const stack_t *ss, stack_t *oss, unsigned long sp) ss_size = 0; ss_sp = NULL; } else { - if (unlikely(ss_size < MINSIGSTKSZ)) + if (unlikely(ss_size < min_ss_size)) return -ENOMEM; } @@ -3508,7 +3509,8 @@ SYSCALL_DEFINE2(sigaltstack,const stack_t __user *,uss, stack_t __user *,uoss) if (uss && copy_from_user(&new, uss, sizeof(stack_t))) return -EFAULT; err = do_sigaltstack(uss ? &new : NULL, uoss ? &old : NULL, - current_user_stack_pointer()); + current_user_stack_pointer(), + MINSIGSTKSZ); if (!err && uoss && copy_to_user(uoss, &old, sizeof(stack_t))) err = -EFAULT; return err; @@ -3519,7 +3521,8 @@ int restore_altstack(const stack_t __user *uss) stack_t new; if (copy_from_user(&new, uss, sizeof(stack_t))) return -EFAULT; - (void)do_sigaltstack(&new, NULL, current_user_stack_pointer()); + (void)do_sigaltstack(&new, NULL, current_user_stack_pointer(), + MINSIGSTKSZ); /* squash all but EFAULT for now */ return 0; } @@ -3553,7 +3556,8 @@ static int do_compat_sigaltstack(const compat_stack_t __user *uss_ptr, uss.ss_size = uss32.ss_size; } ret = do_sigaltstack(uss_ptr ? &uss : NULL, &uoss, - compat_user_stack_pointer()); + compat_user_stack_pointer(), + COMPAT_MINSIGSTKSZ); if (ret >= 0 && uoss_ptr) { compat_stack_t old; memset(&old, 0, sizeof(old)); diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 2868d85f1fb1..fac0ddf8a8e2 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -764,9 +764,9 @@ blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio) if (!bt || !(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP)) return NULL; - if (!bio->bi_css) + if (!bio->bi_blkg) return NULL; - return cgroup_get_kernfs_id(bio->bi_css->cgroup); + return cgroup_get_kernfs_id(bio_blkcg(bio)->css.cgroup); } #else static union kernfs_node_id * diff --git a/kernel/trace/preemptirq_delay_test.c b/kernel/trace/preemptirq_delay_test.c index f704390db9fc..d8765c952fab 100644 --- a/kernel/trace/preemptirq_delay_test.c +++ b/kernel/trace/preemptirq_delay_test.c @@ -5,12 +5,12 @@ * Copyright (C) 2018 Joel Fernandes (Google) <joel@joelfernandes.org> */ +#include <linux/trace_clock.h> #include <linux/delay.h> #include <linux/interrupt.h> #include <linux/irq.h> #include <linux/kernel.h> #include <linux/kthread.h> -#include <linux/ktime.h> #include <linux/module.h> #include <linux/printk.h> #include <linux/string.h> @@ -25,13 +25,13 @@ MODULE_PARM_DESC(test_mode, "Mode of the test such as preempt or irq (default ir static void busy_wait(ulong time) { - ktime_t start, end; - start = ktime_get(); + u64 start, end; + start = trace_clock_local(); do { - end = ktime_get(); + end = trace_clock_local(); if (kthread_should_stop()) break; - } while (ktime_to_ns(ktime_sub(end, start)) < (time * 1000)); + } while ((end - start) < (time * 1000)); } static int preemptirq_delay_run(void *data) diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 85f6b01431c7..d239004aaf29 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -738,16 +738,30 @@ static void free_synth_field(struct synth_field *field) kfree(field); } -static struct synth_field *parse_synth_field(char *field_type, - char *field_name) +static struct synth_field *parse_synth_field(int argc, char **argv, + int *consumed) { struct synth_field *field; + const char *prefix = NULL; + char *field_type = argv[0], *field_name; int len, ret = 0; char *array; if (field_type[0] == ';') field_type++; + if (!strcmp(field_type, "unsigned")) { + if (argc < 3) + return ERR_PTR(-EINVAL); + prefix = "unsigned "; + field_type = argv[1]; + field_name = argv[2]; + *consumed = 3; + } else { + field_name = argv[1]; + *consumed = 2; + } + len = strlen(field_name); if (field_name[len - 1] == ';') field_name[len - 1] = '\0'; @@ -760,11 +774,15 @@ static struct synth_field *parse_synth_field(char *field_type, array = strchr(field_name, '['); if (array) len += strlen(array); + if (prefix) + len += strlen(prefix); field->type = kzalloc(len, GFP_KERNEL); if (!field->type) { ret = -ENOMEM; goto free; } + if (prefix) + strcat(field->type, prefix); strcat(field->type, field_type); if (array) { strcat(field->type, array); @@ -1009,7 +1027,7 @@ static int create_synth_event(int argc, char **argv) struct synth_field *field, *fields[SYNTH_FIELDS_MAX]; struct synth_event *event = NULL; bool delete_event = false; - int i, n_fields = 0, ret = 0; + int i, consumed = 0, n_fields = 0, ret = 0; char *name; mutex_lock(&synth_event_mutex); @@ -1061,16 +1079,16 @@ static int create_synth_event(int argc, char **argv) goto err; } - field = parse_synth_field(argv[i], argv[i + 1]); + field = parse_synth_field(argc - i, &argv[i], &consumed); if (IS_ERR(field)) { ret = PTR_ERR(field); goto err; } - fields[n_fields] = field; - i++; n_fields++; + fields[n_fields++] = field; + i += consumed - 1; } - if (i < argc) { + if (i < argc && strcmp(argv[i], ";") != 0) { ret = -EINVAL; goto err; } diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index bf2c06ef9afc..a3be42304485 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -28,8 +28,8 @@ #include <linux/sched/task.h> #include <linux/static_key.h> -extern struct tracepoint * const __start___tracepoints_ptrs[]; -extern struct tracepoint * const __stop___tracepoints_ptrs[]; +extern tracepoint_ptr_t __start___tracepoints_ptrs[]; +extern tracepoint_ptr_t __stop___tracepoints_ptrs[]; DEFINE_SRCU(tracepoint_srcu); EXPORT_SYMBOL_GPL(tracepoint_srcu); @@ -371,25 +371,17 @@ int tracepoint_probe_unregister(struct tracepoint *tp, void *probe, void *data) } EXPORT_SYMBOL_GPL(tracepoint_probe_unregister); -static void for_each_tracepoint_range(struct tracepoint * const *begin, - struct tracepoint * const *end, +static void for_each_tracepoint_range( + tracepoint_ptr_t *begin, tracepoint_ptr_t *end, void (*fct)(struct tracepoint *tp, void *priv), void *priv) { + tracepoint_ptr_t *iter; + if (!begin) return; - - if (IS_ENABLED(CONFIG_HAVE_ARCH_PREL32_RELOCATIONS)) { - const int *iter; - - for (iter = (const int *)begin; iter < (const int *)end; iter++) - fct(offset_to_ptr(iter), priv); - } else { - struct tracepoint * const *iter; - - for (iter = begin; iter < end; iter++) - fct(*iter, priv); - } + for (iter = begin; iter < end; iter++) + fct(tracepoint_ptr_deref(iter), priv); } #ifdef CONFIG_MODULES diff --git a/lib/Makefile b/lib/Makefile index ca3f7ebb900d..423876446810 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -119,7 +119,6 @@ obj-$(CONFIG_ZLIB_INFLATE) += zlib_inflate/ obj-$(CONFIG_ZLIB_DEFLATE) += zlib_deflate/ obj-$(CONFIG_REED_SOLOMON) += reed_solomon/ obj-$(CONFIG_BCH) += bch.o -CFLAGS_bch.o := $(call cc-option,-Wframe-larger-than=4500) obj-$(CONFIG_LZO_COMPRESS) += lzo/ obj-$(CONFIG_LZO_DECOMPRESS) += lzo/ obj-$(CONFIG_LZ4_COMPRESS) += lz4/ diff --git a/lib/bch.c b/lib/bch.c index 7b0f2006698b..5db6d3a4c8a6 100644 --- a/lib/bch.c +++ b/lib/bch.c @@ -79,20 +79,19 @@ #define GF_T(_p) (CONFIG_BCH_CONST_T) #define GF_N(_p) ((1 << (CONFIG_BCH_CONST_M))-1) #define BCH_MAX_M (CONFIG_BCH_CONST_M) +#define BCH_MAX_T (CONFIG_BCH_CONST_T) #else #define GF_M(_p) ((_p)->m) #define GF_T(_p) ((_p)->t) #define GF_N(_p) ((_p)->n) -#define BCH_MAX_M 15 +#define BCH_MAX_M 15 /* 2KB */ +#define BCH_MAX_T 64 /* 64 bit correction */ #endif -#define BCH_MAX_T (((1 << BCH_MAX_M) - 1) / BCH_MAX_M) - #define BCH_ECC_WORDS(_p) DIV_ROUND_UP(GF_M(_p)*GF_T(_p), 32) #define BCH_ECC_BYTES(_p) DIV_ROUND_UP(GF_M(_p)*GF_T(_p), 8) #define BCH_ECC_MAX_WORDS DIV_ROUND_UP(BCH_MAX_M * BCH_MAX_T, 32) -#define BCH_ECC_MAX_BYTES DIV_ROUND_UP(BCH_MAX_M * BCH_MAX_T, 8) #ifndef dbg #define dbg(_fmt, args...) do {} while (0) @@ -202,6 +201,9 @@ void encode_bch(struct bch_control *bch, const uint8_t *data, const uint32_t * const tab3 = tab2 + 256*(l+1); const uint32_t *pdata, *p0, *p1, *p2, *p3; + if (WARN_ON(r_bytes > sizeof(r))) + return; + if (ecc) { /* load ecc parity bytes into internal 32-bit buffer */ load_ecc8(bch, bch->ecc_buf, ecc); @@ -1285,6 +1287,13 @@ struct bch_control *init_bch(int m, int t, unsigned int prim_poly) */ goto fail; + if (t > BCH_MAX_T) + /* + * we can support larger than 64 bits if necessary, at the + * cost of higher stack usage. + */ + goto fail; + /* sanity checks */ if ((t < 1) || (m*t >= ((1 << m)-1))) /* invalid t value */ diff --git a/lib/crc32.c b/lib/crc32.c index a6c9afafc8c8..45b1d67a1767 100644 --- a/lib/crc32.c +++ b/lib/crc32.c @@ -183,21 +183,21 @@ static inline u32 __pure crc32_le_generic(u32 crc, unsigned char const *p, } #if CRC_LE_BITS == 1 -u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len) +u32 __pure __weak crc32_le(u32 crc, unsigned char const *p, size_t len) { return crc32_le_generic(crc, p, len, NULL, CRC32_POLY_LE); } -u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len) +u32 __pure __weak __crc32c_le(u32 crc, unsigned char const *p, size_t len) { return crc32_le_generic(crc, p, len, NULL, CRC32C_POLY_LE); } #else -u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len) +u32 __pure __weak crc32_le(u32 crc, unsigned char const *p, size_t len) { return crc32_le_generic(crc, p, len, (const u32 (*)[256])crc32table_le, CRC32_POLY_LE); } -u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len) +u32 __pure __weak __crc32c_le(u32 crc, unsigned char const *p, size_t len) { return crc32_le_generic(crc, p, len, (const u32 (*)[256])crc32ctable_le, CRC32C_POLY_LE); @@ -206,6 +206,9 @@ u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len) EXPORT_SYMBOL(crc32_le); EXPORT_SYMBOL(__crc32c_le); +u32 crc32_le_base(u32, unsigned char const *, size_t) __alias(crc32_le); +u32 __crc32c_le_base(u32, unsigned char const *, size_t) __alias(__crc32c_le); + /* * This multiplies the polynomials x and y modulo the given modulus. * This follows the "little-endian" CRC convention that the lsbit diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c index 9f96fa7bc000..de10b8c0bff6 100644 --- a/lib/percpu-refcount.c +++ b/lib/percpu-refcount.c @@ -356,11 +356,35 @@ EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm); */ void percpu_ref_reinit(struct percpu_ref *ref) { + WARN_ON_ONCE(!percpu_ref_is_zero(ref)); + + percpu_ref_resurrect(ref); +} +EXPORT_SYMBOL_GPL(percpu_ref_reinit); + +/** + * percpu_ref_resurrect - modify a percpu refcount from dead to live + * @ref: perpcu_ref to resurrect + * + * Modify @ref so that it's in the same state as before percpu_ref_kill() was + * called. @ref must be dead but must not yet have exited. + * + * If @ref->release() frees @ref then the caller is responsible for + * guaranteeing that @ref->release() does not get called while this + * function is in progress. + * + * Note that percpu_ref_tryget[_live]() are safe to perform on @ref while + * this function is in progress. + */ +void percpu_ref_resurrect(struct percpu_ref *ref) +{ + unsigned long __percpu *percpu_count; unsigned long flags; spin_lock_irqsave(&percpu_ref_switch_lock, flags); - WARN_ON_ONCE(!percpu_ref_is_zero(ref)); + WARN_ON_ONCE(!(ref->percpu_count_ptr & __PERCPU_REF_DEAD)); + WARN_ON_ONCE(__ref_is_percpu(ref, &percpu_count)); ref->percpu_count_ptr &= ~__PERCPU_REF_DEAD; percpu_ref_get(ref); @@ -368,4 +392,4 @@ void percpu_ref_reinit(struct percpu_ref *ref) spin_unlock_irqrestore(&percpu_ref_switch_lock, flags); } -EXPORT_SYMBOL_GPL(percpu_ref_reinit); +EXPORT_SYMBOL_GPL(percpu_ref_resurrect); diff --git a/lib/test_ida.c b/lib/test_ida.c index 2d1637d8136b..b06880625961 100644 --- a/lib/test_ida.c +++ b/lib/test_ida.c @@ -150,10 +150,10 @@ static void ida_check_conv(struct ida *ida) IDA_BUG_ON(ida, !ida_is_empty(ida)); } +static DEFINE_IDA(ida); + static int ida_checks(void) { - DEFINE_IDA(ida); - IDA_BUG_ON(&ida, !ida_is_empty(&ida)); ida_check_alloc(&ida); ida_check_destroy(&ida); diff --git a/lib/vsprintf.c b/lib/vsprintf.c index d5b3a3f95c01..812e59e13fe6 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -2794,7 +2794,7 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf) copy = end - str; memcpy(str, args, copy); str += len; - args += len; + args += len + 1; } } if (process) diff --git a/mm/Makefile b/mm/Makefile index 26ef77a3883b..6485d5745dd7 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -23,9 +23,9 @@ KCOV_INSTRUMENT_vmstat.o := n mmu-y := nommu.o mmu-$(CONFIG_MMU) := gup.o highmem.o memory.o mincore.o \ - mlock.o mmap.o mprotect.o mremap.o msync.o \ - page_vma_mapped.o pagewalk.o pgtable-generic.o \ - rmap.o vmalloc.o + mlock.o mmap.o mmu_gather.o mprotect.o mremap.o \ + msync.o page_vma_mapped.o pagewalk.o \ + pgtable-generic.o rmap.o vmalloc.o ifdef CONFIG_CROSS_MEMORY_ATTACH diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 00704060b7f7..deed97fba979 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1780,7 +1780,7 @@ static pmd_t move_soft_dirty_pmd(pmd_t pmd) bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, unsigned long new_addr, unsigned long old_end, - pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush) + pmd_t *old_pmd, pmd_t *new_pmd) { spinlock_t *old_ptl, *new_ptl; pmd_t pmd; @@ -1811,7 +1811,7 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, if (new_ptl != old_ptl) spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING); pmd = pmdp_huge_get_and_clear(mm, old_addr, old_pmd); - if (pmd_present(pmd) && pmd_dirty(pmd)) + if (pmd_present(pmd)) force_flush = true; VM_BUG_ON(!pmd_none(*new_pmd)); @@ -1822,12 +1822,10 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, } pmd = move_soft_dirty_pmd(pmd); set_pmd_at(mm, new_addr, new_pmd, pmd); - if (new_ptl != old_ptl) - spin_unlock(new_ptl); if (force_flush) flush_tlb_range(vma, old_addr, old_addr + PMD_SIZE); - else - *need_flush = true; + if (new_ptl != old_ptl) + spin_unlock(new_ptl); spin_unlock(old_ptl); return true; } @@ -2885,9 +2883,6 @@ void set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw, if (!(pvmw->pmd && !pvmw->pte)) return; - mmu_notifier_invalidate_range_start(mm, address, - address + HPAGE_PMD_SIZE); - flush_cache_range(vma, address, address + HPAGE_PMD_SIZE); pmdval = *pvmw->pmd; pmdp_invalidate(vma, address, pvmw->pmd); @@ -2900,9 +2895,6 @@ void set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw, set_pmd_at(mm, address, pvmw->pmd, pmdswp); page_remove_rmap(page, true); put_page(page); - - mmu_notifier_invalidate_range_end(mm, address, - address + HPAGE_PMD_SIZE); } void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new) diff --git a/mm/memory.c b/mm/memory.c index c467102a5cbc..21a5e6e4758b 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -186,253 +186,6 @@ static void check_sync_rss_stat(struct task_struct *task) #endif /* SPLIT_RSS_COUNTING */ -#ifdef HAVE_GENERIC_MMU_GATHER - -static bool tlb_next_batch(struct mmu_gather *tlb) -{ - struct mmu_gather_batch *batch; - - batch = tlb->active; - if (batch->next) { - tlb->active = batch->next; - return true; - } - - if (tlb->batch_count == MAX_GATHER_BATCH_COUNT) - return false; - - batch = (void *)__get_free_pages(GFP_NOWAIT | __GFP_NOWARN, 0); - if (!batch) - return false; - - tlb->batch_count++; - batch->next = NULL; - batch->nr = 0; - batch->max = MAX_GATHER_BATCH; - - tlb->active->next = batch; - tlb->active = batch; - - return true; -} - -void arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, - unsigned long start, unsigned long end) -{ - tlb->mm = mm; - - /* Is it from 0 to ~0? */ - tlb->fullmm = !(start | (end+1)); - tlb->need_flush_all = 0; - tlb->local.next = NULL; - tlb->local.nr = 0; - tlb->local.max = ARRAY_SIZE(tlb->__pages); - tlb->active = &tlb->local; - tlb->batch_count = 0; - -#ifdef CONFIG_HAVE_RCU_TABLE_FREE - tlb->batch = NULL; -#endif - tlb->page_size = 0; - - __tlb_reset_range(tlb); -} - -static void tlb_flush_mmu_free(struct mmu_gather *tlb) -{ - struct mmu_gather_batch *batch; - -#ifdef CONFIG_HAVE_RCU_TABLE_FREE - tlb_table_flush(tlb); -#endif - for (batch = &tlb->local; batch && batch->nr; batch = batch->next) { - free_pages_and_swap_cache(batch->pages, batch->nr); - batch->nr = 0; - } - tlb->active = &tlb->local; -} - -void tlb_flush_mmu(struct mmu_gather *tlb) -{ - tlb_flush_mmu_tlbonly(tlb); - tlb_flush_mmu_free(tlb); -} - -/* tlb_finish_mmu - * Called at the end of the shootdown operation to free up any resources - * that were required. - */ -void arch_tlb_finish_mmu(struct mmu_gather *tlb, - unsigned long start, unsigned long end, bool force) -{ - struct mmu_gather_batch *batch, *next; - - if (force) - __tlb_adjust_range(tlb, start, end - start); - - tlb_flush_mmu(tlb); - - /* keep the page table cache within bounds */ - check_pgt_cache(); - - for (batch = tlb->local.next; batch; batch = next) { - next = batch->next; - free_pages((unsigned long)batch, 0); - } - tlb->local.next = NULL; -} - -/* __tlb_remove_page - * Must perform the equivalent to __free_pte(pte_get_and_clear(ptep)), while - * handling the additional races in SMP caused by other CPUs caching valid - * mappings in their TLBs. Returns the number of free page slots left. - * When out of page slots we must call tlb_flush_mmu(). - *returns true if the caller should flush. - */ -bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_size) -{ - struct mmu_gather_batch *batch; - - VM_BUG_ON(!tlb->end); - VM_WARN_ON(tlb->page_size != page_size); - - batch = tlb->active; - /* - * Add the page and check if we are full. If so - * force a flush. - */ - batch->pages[batch->nr++] = page; - if (batch->nr == batch->max) { - if (!tlb_next_batch(tlb)) - return true; - batch = tlb->active; - } - VM_BUG_ON_PAGE(batch->nr > batch->max, page); - - return false; -} - -#endif /* HAVE_GENERIC_MMU_GATHER */ - -#ifdef CONFIG_HAVE_RCU_TABLE_FREE - -/* - * See the comment near struct mmu_table_batch. - */ - -/* - * If we want tlb_remove_table() to imply TLB invalidates. - */ -static inline void tlb_table_invalidate(struct mmu_gather *tlb) -{ -#ifdef CONFIG_HAVE_RCU_TABLE_INVALIDATE - /* - * Invalidate page-table caches used by hardware walkers. Then we still - * need to RCU-sched wait while freeing the pages because software - * walkers can still be in-flight. - */ - tlb_flush_mmu_tlbonly(tlb); -#endif -} - -static void tlb_remove_table_smp_sync(void *arg) -{ - /* Simply deliver the interrupt */ -} - -static void tlb_remove_table_one(void *table) -{ - /* - * This isn't an RCU grace period and hence the page-tables cannot be - * assumed to be actually RCU-freed. - * - * It is however sufficient for software page-table walkers that rely on - * IRQ disabling. See the comment near struct mmu_table_batch. - */ - smp_call_function(tlb_remove_table_smp_sync, NULL, 1); - __tlb_remove_table(table); -} - -static void tlb_remove_table_rcu(struct rcu_head *head) -{ - struct mmu_table_batch *batch; - int i; - - batch = container_of(head, struct mmu_table_batch, rcu); - - for (i = 0; i < batch->nr; i++) - __tlb_remove_table(batch->tables[i]); - - free_page((unsigned long)batch); -} - -void tlb_table_flush(struct mmu_gather *tlb) -{ - struct mmu_table_batch **batch = &tlb->batch; - - if (*batch) { - tlb_table_invalidate(tlb); - call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu); - *batch = NULL; - } -} - -void tlb_remove_table(struct mmu_gather *tlb, void *table) -{ - struct mmu_table_batch **batch = &tlb->batch; - - if (*batch == NULL) { - *batch = (struct mmu_table_batch *)__get_free_page(GFP_NOWAIT | __GFP_NOWARN); - if (*batch == NULL) { - tlb_table_invalidate(tlb); - tlb_remove_table_one(table); - return; - } - (*batch)->nr = 0; - } - - (*batch)->tables[(*batch)->nr++] = table; - if ((*batch)->nr == MAX_TABLE_BATCH) - tlb_table_flush(tlb); -} - -#endif /* CONFIG_HAVE_RCU_TABLE_FREE */ - -/** - * tlb_gather_mmu - initialize an mmu_gather structure for page-table tear-down - * @tlb: the mmu_gather structure to initialize - * @mm: the mm_struct of the target address space - * @start: start of the region that will be removed from the page-table - * @end: end of the region that will be removed from the page-table - * - * Called to initialize an (on-stack) mmu_gather structure for page-table - * tear-down from @mm. The @start and @end are set to 0 and -1 - * respectively when @mm is without users and we're going to destroy - * the full address space (exit/execve). - */ -void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, - unsigned long start, unsigned long end) -{ - arch_tlb_gather_mmu(tlb, mm, start, end); - inc_tlb_flush_pending(tlb->mm); -} - -void tlb_finish_mmu(struct mmu_gather *tlb, - unsigned long start, unsigned long end) -{ - /* - * If there are parallel threads are doing PTE changes on same range - * under non-exclusive lock(e.g., mmap_sem read-side) but defer TLB - * flush by batching, a thread has stable TLB entry can fail to flush - * the TLB by observing pte_none|!pte_dirty, for example so flush TLB - * forcefully if we detect parallel PTE batching threads. - */ - bool force = mm_tlb_flush_nested(tlb->mm); - - arch_tlb_finish_mmu(tlb, start, end, force); - dec_tlb_flush_pending(tlb->mm); -} - /* * Note: this doesn't free the actual pages themselves. That * has been handled earlier when unmapping all the memory regions. diff --git a/mm/mmap.c b/mm/mmap.c index 5f2b2b184c60..f7cd9cb966c0 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1410,7 +1410,7 @@ unsigned long do_mmap(struct file *file, unsigned long addr, if (flags & MAP_FIXED_NOREPLACE) { struct vm_area_struct *vma = find_vma(mm, addr); - if (vma && vma->vm_start <= addr) + if (vma && vma->vm_start < addr + len) return -EEXIST; } diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c new file mode 100644 index 000000000000..2a9fbc4a37d5 --- /dev/null +++ b/mm/mmu_gather.c @@ -0,0 +1,261 @@ +#include <linux/gfp.h> +#include <linux/highmem.h> +#include <linux/kernel.h> +#include <linux/mmdebug.h> +#include <linux/mm_types.h> +#include <linux/pagemap.h> +#include <linux/rcupdate.h> +#include <linux/smp.h> +#include <linux/swap.h> + +#include <asm/pgalloc.h> +#include <asm/tlb.h> + +#ifdef HAVE_GENERIC_MMU_GATHER + +static bool tlb_next_batch(struct mmu_gather *tlb) +{ + struct mmu_gather_batch *batch; + + batch = tlb->active; + if (batch->next) { + tlb->active = batch->next; + return true; + } + + if (tlb->batch_count == MAX_GATHER_BATCH_COUNT) + return false; + + batch = (void *)__get_free_pages(GFP_NOWAIT | __GFP_NOWARN, 0); + if (!batch) + return false; + + tlb->batch_count++; + batch->next = NULL; + batch->nr = 0; + batch->max = MAX_GATHER_BATCH; + + tlb->active->next = batch; + tlb->active = batch; + + return true; +} + +void arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, + unsigned long start, unsigned long end) +{ + tlb->mm = mm; + + /* Is it from 0 to ~0? */ + tlb->fullmm = !(start | (end+1)); + tlb->need_flush_all = 0; + tlb->local.next = NULL; + tlb->local.nr = 0; + tlb->local.max = ARRAY_SIZE(tlb->__pages); + tlb->active = &tlb->local; + tlb->batch_count = 0; + +#ifdef CONFIG_HAVE_RCU_TABLE_FREE + tlb->batch = NULL; +#endif + tlb->page_size = 0; + + __tlb_reset_range(tlb); +} + +void tlb_flush_mmu_free(struct mmu_gather *tlb) +{ + struct mmu_gather_batch *batch; + +#ifdef CONFIG_HAVE_RCU_TABLE_FREE + tlb_table_flush(tlb); +#endif + for (batch = &tlb->local; batch && batch->nr; batch = batch->next) { + free_pages_and_swap_cache(batch->pages, batch->nr); + batch->nr = 0; + } + tlb->active = &tlb->local; +} + +void tlb_flush_mmu(struct mmu_gather *tlb) +{ + tlb_flush_mmu_tlbonly(tlb); + tlb_flush_mmu_free(tlb); +} + +/* tlb_finish_mmu + * Called at the end of the shootdown operation to free up any resources + * that were required. + */ +void arch_tlb_finish_mmu(struct mmu_gather *tlb, + unsigned long start, unsigned long end, bool force) +{ + struct mmu_gather_batch *batch, *next; + + if (force) { + __tlb_reset_range(tlb); + __tlb_adjust_range(tlb, start, end - start); + } + + tlb_flush_mmu(tlb); + + /* keep the page table cache within bounds */ + check_pgt_cache(); + + for (batch = tlb->local.next; batch; batch = next) { + next = batch->next; + free_pages((unsigned long)batch, 0); + } + tlb->local.next = NULL; +} + +/* __tlb_remove_page + * Must perform the equivalent to __free_pte(pte_get_and_clear(ptep)), while + * handling the additional races in SMP caused by other CPUs caching valid + * mappings in their TLBs. Returns the number of free page slots left. + * When out of page slots we must call tlb_flush_mmu(). + *returns true if the caller should flush. + */ +bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_size) +{ + struct mmu_gather_batch *batch; + + VM_BUG_ON(!tlb->end); + VM_WARN_ON(tlb->page_size != page_size); + + batch = tlb->active; + /* + * Add the page and check if we are full. If so + * force a flush. + */ + batch->pages[batch->nr++] = page; + if (batch->nr == batch->max) { + if (!tlb_next_batch(tlb)) + return true; + batch = tlb->active; + } + VM_BUG_ON_PAGE(batch->nr > batch->max, page); + + return false; +} + +#endif /* HAVE_GENERIC_MMU_GATHER */ + +#ifdef CONFIG_HAVE_RCU_TABLE_FREE + +/* + * See the comment near struct mmu_table_batch. + */ + +/* + * If we want tlb_remove_table() to imply TLB invalidates. + */ +static inline void tlb_table_invalidate(struct mmu_gather *tlb) +{ +#ifdef CONFIG_HAVE_RCU_TABLE_INVALIDATE + /* + * Invalidate page-table caches used by hardware walkers. Then we still + * need to RCU-sched wait while freeing the pages because software + * walkers can still be in-flight. + */ + tlb_flush_mmu_tlbonly(tlb); +#endif +} + +static void tlb_remove_table_smp_sync(void *arg) +{ + /* Simply deliver the interrupt */ +} + +static void tlb_remove_table_one(void *table) +{ + /* + * This isn't an RCU grace period and hence the page-tables cannot be + * assumed to be actually RCU-freed. + * + * It is however sufficient for software page-table walkers that rely on + * IRQ disabling. See the comment near struct mmu_table_batch. + */ + smp_call_function(tlb_remove_table_smp_sync, NULL, 1); + __tlb_remove_table(table); +} + +static void tlb_remove_table_rcu(struct rcu_head *head) +{ + struct mmu_table_batch *batch; + int i; + + batch = container_of(head, struct mmu_table_batch, rcu); + + for (i = 0; i < batch->nr; i++) + __tlb_remove_table(batch->tables[i]); + + free_page((unsigned long)batch); +} + +void tlb_table_flush(struct mmu_gather *tlb) +{ + struct mmu_table_batch **batch = &tlb->batch; + + if (*batch) { + tlb_table_invalidate(tlb); + call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu); + *batch = NULL; + } +} + +void tlb_remove_table(struct mmu_gather *tlb, void *table) +{ + struct mmu_table_batch **batch = &tlb->batch; + + if (*batch == NULL) { + *batch = (struct mmu_table_batch *)__get_free_page(GFP_NOWAIT | __GFP_NOWARN); + if (*batch == NULL) { + tlb_table_invalidate(tlb); + tlb_remove_table_one(table); + return; + } + (*batch)->nr = 0; + } + + (*batch)->tables[(*batch)->nr++] = table; + if ((*batch)->nr == MAX_TABLE_BATCH) + tlb_table_flush(tlb); +} + +#endif /* CONFIG_HAVE_RCU_TABLE_FREE */ + +/** + * tlb_gather_mmu - initialize an mmu_gather structure for page-table tear-down + * @tlb: the mmu_gather structure to initialize + * @mm: the mm_struct of the target address space + * @start: start of the region that will be removed from the page-table + * @end: end of the region that will be removed from the page-table + * + * Called to initialize an (on-stack) mmu_gather structure for page-table + * tear-down from @mm. The @start and @end are set to 0 and -1 + * respectively when @mm is without users and we're going to destroy + * the full address space (exit/execve). + */ +void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, + unsigned long start, unsigned long end) +{ + arch_tlb_gather_mmu(tlb, mm, start, end); + inc_tlb_flush_pending(tlb->mm); +} + +void tlb_finish_mmu(struct mmu_gather *tlb, + unsigned long start, unsigned long end) +{ + /* + * If there are parallel threads are doing PTE changes on same range + * under non-exclusive lock(e.g., mmap_sem read-side) but defer TLB + * flush by batching, a thread has stable TLB entry can fail to flush + * the TLB by observing pte_none|!pte_dirty, for example so flush TLB + * forcefully if we detect parallel PTE batching threads. + */ + bool force = mm_tlb_flush_nested(tlb->mm); + + arch_tlb_finish_mmu(tlb, start, end, force); + dec_tlb_flush_pending(tlb->mm); +} diff --git a/mm/mremap.c b/mm/mremap.c index 5c2e18505f75..a9617e72e6b7 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -115,7 +115,7 @@ static pte_t move_soft_dirty_pte(pte_t pte) static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, unsigned long old_addr, unsigned long old_end, struct vm_area_struct *new_vma, pmd_t *new_pmd, - unsigned long new_addr, bool need_rmap_locks, bool *need_flush) + unsigned long new_addr, bool need_rmap_locks) { struct mm_struct *mm = vma->vm_mm; pte_t *old_pte, *new_pte, pte; @@ -163,15 +163,17 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, pte = ptep_get_and_clear(mm, old_addr, old_pte); /* - * If we are remapping a dirty PTE, make sure + * If we are remapping a valid PTE, make sure * to flush TLB before we drop the PTL for the - * old PTE or we may race with page_mkclean(). + * PTE. * - * This check has to be done after we removed the - * old PTE from page tables or another thread may - * dirty it after the check and before the removal. + * NOTE! Both old and new PTL matter: the old one + * for racing with page_mkclean(), the new one to + * make sure the physical page stays valid until + * the TLB entry for the old mapping has been + * flushed. */ - if (pte_present(pte) && pte_dirty(pte)) + if (pte_present(pte)) force_flush = true; pte = move_pte(pte, new_vma->vm_page_prot, old_addr, new_addr); pte = move_soft_dirty_pte(pte); @@ -179,13 +181,11 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, } arch_leave_lazy_mmu_mode(); + if (force_flush) + flush_tlb_range(vma, old_end - len, old_end); if (new_ptl != old_ptl) spin_unlock(new_ptl); pte_unmap(new_pte - 1); - if (force_flush) - flush_tlb_range(vma, old_end - len, old_end); - else - *need_flush = true; pte_unmap_unlock(old_pte - 1, old_ptl); if (need_rmap_locks) drop_rmap_locks(vma); @@ -198,7 +198,6 @@ unsigned long move_page_tables(struct vm_area_struct *vma, { unsigned long extent, next, old_end; pmd_t *old_pmd, *new_pmd; - bool need_flush = false; unsigned long mmun_start; /* For mmu_notifiers */ unsigned long mmun_end; /* For mmu_notifiers */ @@ -229,8 +228,7 @@ unsigned long move_page_tables(struct vm_area_struct *vma, if (need_rmap_locks) take_rmap_locks(vma); moved = move_huge_pmd(vma, old_addr, new_addr, - old_end, old_pmd, new_pmd, - &need_flush); + old_end, old_pmd, new_pmd); if (need_rmap_locks) drop_rmap_locks(vma); if (moved) @@ -246,10 +244,8 @@ unsigned long move_page_tables(struct vm_area_struct *vma, if (extent > next - new_addr) extent = next - new_addr; move_ptes(vma, old_pmd, old_addr, old_addr + extent, new_vma, - new_pmd, new_addr, need_rmap_locks, &need_flush); + new_pmd, new_addr, need_rmap_locks); } - if (need_flush) - flush_tlb_range(vma, old_end-len, old_addr); mmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 706a738c0aee..e2ef1c17942f 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -6193,15 +6193,6 @@ static unsigned long __init calc_memmap_size(unsigned long spanned_pages, return PAGE_ALIGN(pages * sizeof(struct page)) >> PAGE_SHIFT; } -#ifdef CONFIG_NUMA_BALANCING -static void pgdat_init_numabalancing(struct pglist_data *pgdat) -{ - spin_lock_init(&pgdat->numabalancing_migrate_lock); -} -#else -static void pgdat_init_numabalancing(struct pglist_data *pgdat) {} -#endif - #ifdef CONFIG_TRANSPARENT_HUGEPAGE static void pgdat_init_split_queue(struct pglist_data *pgdat) { @@ -6226,7 +6217,6 @@ static void __meminit pgdat_init_internals(struct pglist_data *pgdat) { pgdat_resize_init(pgdat); - pgdat_init_numabalancing(pgdat); pgdat_init_split_queue(pgdat); pgdat_init_kcompactd(pgdat); diff --git a/mm/page_io.c b/mm/page_io.c index aafd19ec1db4..573d3663d846 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -339,7 +339,7 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc, goto out; } bio->bi_opf = REQ_OP_WRITE | REQ_SWAP | wbc_to_write_flags(wbc); - bio_associate_blkcg_from_page(bio, page); + bio_associate_blkg_from_page(bio, page); count_swpout_vm_event(page); set_page_writeback(page); unlock_page(page); diff --git a/mm/percpu.c b/mm/percpu.c index a749d4d96e3e..4b90682623e9 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1212,6 +1212,7 @@ static void pcpu_free_chunk(struct pcpu_chunk *chunk) { if (!chunk) return; + pcpu_mem_free(chunk->md_blocks); pcpu_mem_free(chunk->bound_map); pcpu_mem_free(chunk->alloc_map); pcpu_mem_free(chunk); diff --git a/net/bpfilter/bpfilter_kern.c b/net/bpfilter/bpfilter_kern.c index b64e1649993b..94e88f510c5b 100644 --- a/net/bpfilter/bpfilter_kern.c +++ b/net/bpfilter/bpfilter_kern.c @@ -23,9 +23,11 @@ static void shutdown_umh(struct umh_info *info) if (!info->pid) return; - tsk = pid_task(find_vpid(info->pid), PIDTYPE_PID); - if (tsk) + tsk = get_pid_task(find_vpid(info->pid), PIDTYPE_PID); + if (tsk) { force_sig(SIGKILL, tsk); + put_task_struct(tsk); + } fput(info->pipe_to_umh); fput(info->pipe_from_umh); info->pid = 0; diff --git a/net/core/dev.c b/net/core/dev.c index 82114e1111e6..93243479085f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1752,6 +1752,28 @@ int call_netdevice_notifiers(unsigned long val, struct net_device *dev) } EXPORT_SYMBOL(call_netdevice_notifiers); +/** + * call_netdevice_notifiers_mtu - call all network notifier blocks + * @val: value passed unmodified to notifier function + * @dev: net_device pointer passed unmodified to notifier function + * @arg: additional u32 argument passed to the notifier function + * + * Call all network notifier blocks. Parameters and return value + * are as for raw_notifier_call_chain(). + */ +static int call_netdevice_notifiers_mtu(unsigned long val, + struct net_device *dev, u32 arg) +{ + struct netdev_notifier_info_ext info = { + .info.dev = dev, + .ext.mtu = arg, + }; + + BUILD_BUG_ON(offsetof(struct netdev_notifier_info_ext, info) != 0); + + return call_netdevice_notifiers_info(val, &info.info); +} + #ifdef CONFIG_NET_INGRESS static DEFINE_STATIC_KEY_FALSE(ingress_needed_key); @@ -7574,14 +7596,16 @@ int dev_set_mtu_ext(struct net_device *dev, int new_mtu, err = __dev_set_mtu(dev, new_mtu); if (!err) { - err = call_netdevice_notifiers(NETDEV_CHANGEMTU, dev); + err = call_netdevice_notifiers_mtu(NETDEV_CHANGEMTU, dev, + orig_mtu); err = notifier_to_errno(err); if (err) { /* setting mtu back and notifying everyone again, * so that they have a chance to revert changes. */ __dev_set_mtu(dev, orig_mtu); - call_netdevice_notifiers(NETDEV_CHANGEMTU, dev); + call_netdevice_notifiers_mtu(NETDEV_CHANGEMTU, dev, + new_mtu); } } return err; diff --git a/net/core/devlink.c b/net/core/devlink.c index 8c0ed225e280..6bc42933be4a 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -2995,6 +2995,8 @@ devlink_param_value_get_from_info(const struct devlink_param *param, struct genl_info *info, union devlink_param_value *value) { + int len; + if (param->type != DEVLINK_PARAM_TYPE_BOOL && !info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]) return -EINVAL; @@ -3010,10 +3012,13 @@ devlink_param_value_get_from_info(const struct devlink_param *param, value->vu32 = nla_get_u32(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]); break; case DEVLINK_PARAM_TYPE_STRING: - if (nla_len(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]) > - DEVLINK_PARAM_MAX_STRING_VALUE) + len = strnlen(nla_data(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]), + nla_len(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA])); + if (len == nla_len(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]) || + len >= __DEVLINK_PARAM_MAX_STRING_VALUE) return -EINVAL; - value->vstr = nla_data(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]); + strcpy(value->vstr, + nla_data(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA])); break; case DEVLINK_PARAM_TYPE_BOOL: value->vbool = info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA] ? @@ -3100,7 +3105,10 @@ static int devlink_nl_cmd_param_set_doit(struct sk_buff *skb, return -EOPNOTSUPP; if (cmode == DEVLINK_PARAM_CMODE_DRIVERINIT) { - param_item->driverinit_value = value; + if (param->type == DEVLINK_PARAM_TYPE_STRING) + strcpy(param_item->driverinit_value.vstr, value.vstr); + else + param_item->driverinit_value = value; param_item->driverinit_value_valid = true; } else { if (!param->set) @@ -4540,7 +4548,10 @@ int devlink_param_driverinit_value_get(struct devlink *devlink, u32 param_id, DEVLINK_PARAM_CMODE_DRIVERINIT)) return -EOPNOTSUPP; - *init_val = param_item->driverinit_value; + if (param_item->param->type == DEVLINK_PARAM_TYPE_STRING) + strcpy(init_val->vstr, param_item->driverinit_value.vstr); + else + *init_val = param_item->driverinit_value; return 0; } @@ -4571,7 +4582,10 @@ int devlink_param_driverinit_value_set(struct devlink *devlink, u32 param_id, DEVLINK_PARAM_CMODE_DRIVERINIT)) return -EOPNOTSUPP; - param_item->driverinit_value = init_val; + if (param_item->param->type == DEVLINK_PARAM_TYPE_STRING) + strcpy(param_item->driverinit_value.vstr, init_val.vstr); + else + param_item->driverinit_value = init_val; param_item->driverinit_value_valid = true; devlink_param_notify(devlink, param_item, DEVLINK_CMD_PARAM_NEW); @@ -4604,6 +4618,23 @@ void devlink_param_value_changed(struct devlink *devlink, u32 param_id) EXPORT_SYMBOL_GPL(devlink_param_value_changed); /** + * devlink_param_value_str_fill - Safely fill-up the string preventing + * from overflow of the preallocated buffer + * + * @dst_val: destination devlink_param_value + * @src: source buffer + */ +void devlink_param_value_str_fill(union devlink_param_value *dst_val, + const char *src) +{ + size_t len; + + len = strlcpy(dst_val->vstr, src, __DEVLINK_PARAM_MAX_STRING_VALUE); + WARN_ON(len >= __DEVLINK_PARAM_MAX_STRING_VALUE); +} +EXPORT_SYMBOL_GPL(devlink_param_value_str_fill); + +/** * devlink_region_create - create a new address region * * @devlink: devlink diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 0762aaf8e964..aeabc4831fca 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -1015,6 +1015,9 @@ static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev, return -EINVAL; } + if (info.cmd != cmd) + return -EINVAL; + if (info.cmd == ETHTOOL_GRXCLSRLALL) { if (info.rule_cnt > 0) { if (info.rule_cnt <= KMALLOC_MAX_SIZE / sizeof(u32)) @@ -2469,13 +2472,17 @@ roll_back: return ret; } -static int ethtool_set_per_queue(struct net_device *dev, void __user *useraddr) +static int ethtool_set_per_queue(struct net_device *dev, + void __user *useraddr, u32 sub_cmd) { struct ethtool_per_queue_op per_queue_opt; if (copy_from_user(&per_queue_opt, useraddr, sizeof(per_queue_opt))) return -EFAULT; + if (per_queue_opt.sub_command != sub_cmd) + return -EINVAL; + switch (per_queue_opt.sub_command) { case ETHTOOL_GCOALESCE: return ethtool_get_per_queue_coalesce(dev, useraddr, &per_queue_opt); @@ -2846,7 +2853,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) rc = ethtool_get_phy_stats(dev, useraddr); break; case ETHTOOL_PERQUEUE: - rc = ethtool_set_per_queue(dev, useraddr); + rc = ethtool_set_per_queue(dev, useraddr, sub_cmd); break; case ETHTOOL_GLINKSETTINGS: rc = ethtool_get_link_ksettings(dev, useraddr); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 91592fceeaad..4e07824eec5e 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1148,8 +1148,7 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, neigh->nud_state = new; err = 0; notify = old & NUD_VALID; - if (((old & (NUD_INCOMPLETE | NUD_PROBE)) || - (flags & NEIGH_UPDATE_F_ADMIN)) && + if ((old & (NUD_INCOMPLETE | NUD_PROBE)) && (new & NUD_FAILED)) { neigh_invalidate(neigh); notify = 1; diff --git a/net/core/netpoll.c b/net/core/netpoll.c index de1d1ba92f2d..3ae899805f8b 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -312,7 +312,6 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, /* It is up to the caller to keep npinfo alive. */ struct netpoll_info *npinfo; - rcu_read_lock_bh(); lockdep_assert_irqs_disabled(); npinfo = rcu_dereference_bh(np->dev->npinfo); @@ -357,7 +356,6 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, skb_queue_tail(&npinfo->txq, skb); schedule_delayed_work(&npinfo->tx_work,0); } - rcu_read_unlock_bh(); } EXPORT_SYMBOL(netpoll_send_skb_on_dev); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index b2c807f67aba..f817f336595d 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1846,8 +1846,9 @@ int pskb_trim_rcsum_slow(struct sk_buff *skb, unsigned int len) if (skb->ip_summed == CHECKSUM_COMPLETE) { int delta = skb->len - len; - skb->csum = csum_sub(skb->csum, - skb_checksum(skb, len, delta, 0)); + skb->csum = csum_block_sub(skb->csum, + skb_checksum(skb, len, delta, 0), + len); } return __pskb_trim(skb, len); } @@ -4452,14 +4453,16 @@ EXPORT_SYMBOL_GPL(skb_complete_wifi_ack); */ bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off) { - if (unlikely(start > skb_headlen(skb)) || - unlikely((int)start + off > skb_headlen(skb) - 2)) { - net_warn_ratelimited("bad partial csum: csum=%u/%u len=%u\n", - start, off, skb_headlen(skb)); + u32 csum_end = (u32)start + (u32)off + sizeof(__sum16); + u32 csum_start = skb_headroom(skb) + (u32)start; + + if (unlikely(csum_start > U16_MAX || csum_end > skb_headlen(skb))) { + net_warn_ratelimited("bad partial csum: csum=%u/%u headroom=%u headlen=%u\n", + start, off, skb_headroom(skb), skb_headlen(skb)); return false; } skb->ip_summed = CHECKSUM_PARTIAL; - skb->csum_start = skb_headroom(skb) + start; + skb->csum_start = csum_start; skb->csum_offset = off; skb_set_transport_header(skb, start); return true; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 2998b0e47d4b..0113993e9b2c 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1243,7 +1243,8 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); - struct netdev_notifier_changeupper_info *info; + struct netdev_notifier_changeupper_info *upper_info = ptr; + struct netdev_notifier_info_ext *info_ext = ptr; struct in_device *in_dev; struct net *net = dev_net(dev); unsigned int flags; @@ -1278,16 +1279,19 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo fib_sync_up(dev, RTNH_F_LINKDOWN); else fib_sync_down_dev(dev, event, false); - /* fall through */ + rt_cache_flush(net); + break; case NETDEV_CHANGEMTU: + fib_sync_mtu(dev, info_ext->ext.mtu); rt_cache_flush(net); break; case NETDEV_CHANGEUPPER: - info = ptr; + upper_info = ptr; /* flush all routes if dev is linked to or unlinked from * an L3 master device (e.g., VRF) */ - if (info->upper_dev && netif_is_l3_master(info->upper_dev)) + if (upper_info->upper_dev && + netif_is_l3_master(upper_info->upper_dev)) fib_disable_ip(dev, NETDEV_DOWN, true); break; } diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index f3c89ccf14c5..446204ca7406 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1470,6 +1470,56 @@ static int call_fib_nh_notifiers(struct fib_nh *fib_nh, return NOTIFY_DONE; } +/* Update the PMTU of exceptions when: + * - the new MTU of the first hop becomes smaller than the PMTU + * - the old MTU was the same as the PMTU, and it limited discovery of + * larger MTUs on the path. With that limit raised, we can now + * discover larger MTUs + * A special case is locked exceptions, for which the PMTU is smaller + * than the minimal accepted PMTU: + * - if the new MTU is greater than the PMTU, don't make any change + * - otherwise, unlock and set PMTU + */ +static void nh_update_mtu(struct fib_nh *nh, u32 new, u32 orig) +{ + struct fnhe_hash_bucket *bucket; + int i; + + bucket = rcu_dereference_protected(nh->nh_exceptions, 1); + if (!bucket) + return; + + for (i = 0; i < FNHE_HASH_SIZE; i++) { + struct fib_nh_exception *fnhe; + + for (fnhe = rcu_dereference_protected(bucket[i].chain, 1); + fnhe; + fnhe = rcu_dereference_protected(fnhe->fnhe_next, 1)) { + if (fnhe->fnhe_mtu_locked) { + if (new <= fnhe->fnhe_pmtu) { + fnhe->fnhe_pmtu = new; + fnhe->fnhe_mtu_locked = false; + } + } else if (new < fnhe->fnhe_pmtu || + orig == fnhe->fnhe_pmtu) { + fnhe->fnhe_pmtu = new; + } + } + } +} + +void fib_sync_mtu(struct net_device *dev, u32 orig_mtu) +{ + unsigned int hash = fib_devindex_hashfn(dev->ifindex); + struct hlist_head *head = &fib_info_devhash[hash]; + struct fib_nh *nh; + + hlist_for_each_entry(nh, head, nh_hash) { + if (nh->nh_dev == dev) + nh_update_mtu(nh, dev->mtu, orig_mtu); + } +} + /* Event force Flags Description * NETDEV_CHANGE 0 LINKDOWN Carrier OFF, not for scope host * NETDEV_DOWN 0 LINKDOWN|DEAD Link down, not for scope host diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c index 1ad9aa62a97b..eab8cd5ec2f5 100644 --- a/net/ipv4/ipmr_base.c +++ b/net/ipv4/ipmr_base.c @@ -296,8 +296,6 @@ int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb, next_entry: e++; } - e = 0; - s_e = 0; spin_lock_bh(lock); list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) { diff --git a/net/ipv4/route.c b/net/ipv4/route.c index b678466da451..8501554e96a4 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1001,21 +1001,22 @@ out: kfree_skb(skb); static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) { struct dst_entry *dst = &rt->dst; + u32 old_mtu = ipv4_mtu(dst); struct fib_result res; bool lock = false; if (ip_mtu_locked(dst)) return; - if (ipv4_mtu(dst) < mtu) + if (old_mtu < mtu) return; if (mtu < ip_rt_min_pmtu) { lock = true; - mtu = ip_rt_min_pmtu; + mtu = min(old_mtu, ip_rt_min_pmtu); } - if (rt->rt_pmtu == mtu && + if (rt->rt_pmtu == mtu && !lock && time_before(jiffies, dst->expires - ip_rt_mtu_expires / 2)) return; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 7d69dd6fa7e8..c32a4c16b7ff 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1627,7 +1627,7 @@ busy_check: *err = error; return NULL; } -EXPORT_SYMBOL_GPL(__skb_recv_udp); +EXPORT_SYMBOL(__skb_recv_udp); /* * This should be easy, if there is something there we diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index c63ccce6425f..4e81ff2f4588 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4928,8 +4928,8 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb, /* unicast address incl. temp addr */ list_for_each_entry(ifa, &idev->addr_list, if_list) { - if (++ip_idx < s_ip_idx) - continue; + if (ip_idx < s_ip_idx) + goto next; err = inet6_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, @@ -4938,6 +4938,8 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb, if (err < 0) break; nl_dump_check_consistent(cb, nlmsg_hdr(skb)); +next: + ip_idx++; } break; } diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 5516f55e214b..cbe46175bb59 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -196,6 +196,8 @@ void fib6_info_destroy_rcu(struct rcu_head *head) *ppcpu_rt = NULL; } } + + free_percpu(f6i->rt6i_pcpu); } lwtstate_put(f6i->fib6_nh.nh_lwtstate); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index a0b6932c3afd..a9d06d4dd057 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1184,11 +1184,6 @@ route_lookup: } skb_dst_set(skb, dst); - if (encap_limit >= 0) { - init_tel_txopt(&opt, encap_limit); - ipv6_push_frag_opts(skb, &opt.ops, &proto); - } - if (hop_limit == 0) { if (skb->protocol == htons(ETH_P_IP)) hop_limit = ip_hdr(skb)->ttl; @@ -1210,6 +1205,11 @@ route_lookup: if (err) return err; + if (encap_limit >= 0) { + init_tel_txopt(&opt, encap_limit); + ipv6_push_frag_opts(skb, &opt.ops, &proto); + } + skb_push(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); ipv6h = ipv6_hdr(skb); diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 4ae54aaca373..dbab62e3f0d7 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -2436,17 +2436,17 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml, { int err; - /* callers have the socket lock and rtnl lock - * so no other readers or writers of iml or its sflist - */ + write_lock_bh(&iml->sflock); if (!iml->sflist) { /* any-source empty exclude case */ - return ip6_mc_del_src(idev, &iml->addr, iml->sfmode, 0, NULL, 0); + err = ip6_mc_del_src(idev, &iml->addr, iml->sfmode, 0, NULL, 0); + } else { + err = ip6_mc_del_src(idev, &iml->addr, iml->sfmode, + iml->sflist->sl_count, iml->sflist->sl_addr, 0); + sock_kfree_s(sk, iml->sflist, IP6_SFLSIZE(iml->sflist->sl_max)); + iml->sflist = NULL; } - err = ip6_mc_del_src(idev, &iml->addr, iml->sfmode, - iml->sflist->sl_count, iml->sflist->sl_addr, 0); - sock_kfree_s(sk, iml->sflist, IP6_SFLSIZE(iml->sflist->sl_max)); - iml->sflist = NULL; + write_unlock_bh(&iml->sflock); return err; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index a366c05a239d..abcb5ae77319 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -520,10 +520,11 @@ static void rt6_probe_deferred(struct work_struct *w) static void rt6_probe(struct fib6_info *rt) { - struct __rt6_probe_work *work; + struct __rt6_probe_work *work = NULL; const struct in6_addr *nh_gw; struct neighbour *neigh; struct net_device *dev; + struct inet6_dev *idev; /* * Okay, this does not seem to be appropriate @@ -539,15 +540,12 @@ static void rt6_probe(struct fib6_info *rt) nh_gw = &rt->fib6_nh.nh_gw; dev = rt->fib6_nh.nh_dev; rcu_read_lock_bh(); + idev = __in6_dev_get(dev); neigh = __ipv6_neigh_lookup_noref(dev, nh_gw); if (neigh) { - struct inet6_dev *idev; - if (neigh->nud_state & NUD_VALID) goto out; - idev = __in6_dev_get(dev); - work = NULL; write_lock(&neigh->lock); if (!(neigh->nud_state & NUD_VALID) && time_after(jiffies, @@ -557,11 +555,13 @@ static void rt6_probe(struct fib6_info *rt) __neigh_set_probe_once(neigh); } write_unlock(&neigh->lock); - } else { + } else if (time_after(jiffies, rt->last_probe + + idev->cnf.rtr_probe_interval)) { work = kmalloc(sizeof(*work), GFP_ATOMIC); } if (work) { + rt->last_probe = jiffies; INIT_WORK(&work->work, rt6_probe_deferred); work->target = *nh_gw; dev_hold(dev); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 28c4aa5078fc..b36694b6716e 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -766,11 +766,9 @@ static int udp6_unicast_rcv_skb(struct sock *sk, struct sk_buff *skb, ret = udpv6_queue_rcv_skb(sk, skb); - /* a return value > 0 means to resubmit the input, but - * it wants the return to be -protocol, or 0 - */ + /* a return value > 0 means to resubmit the input */ if (ret > 0) - return -ret; + return ret; return 0; } diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index ef3defaf43b9..d35bcf92969c 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -146,8 +146,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) fl6->daddr = reverse ? hdr->saddr : hdr->daddr; fl6->saddr = reverse ? hdr->daddr : hdr->saddr; - while (nh + offset + 1 < skb->data || - pskb_may_pull(skb, nh + offset + 1 - skb->data)) { + while (nh + offset + sizeof(*exthdr) < skb->data || + pskb_may_pull(skb, nh + offset + sizeof(*exthdr) - skb->data)) { nh = skb_network_header(skb); exthdr = (struct ipv6_opt_hdr *)(nh + offset); diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index c0ac522b48a1..4ff89cb7c86f 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -734,6 +734,7 @@ void llc_sap_add_socket(struct llc_sap *sap, struct sock *sk) llc_sk(sk)->sap = sap; spin_lock_bh(&sap->sk_lock); + sock_set_flag(sk, SOCK_RCU_FREE); sap->sk_count++; sk_nulls_add_node_rcu(sk, laddr_hb); hlist_add_head(&llc->dev_hash_node, dev_hb); diff --git a/net/rds/send.c b/net/rds/send.c index 57b3d5a8b2db..fe785ee819dd 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -1007,7 +1007,8 @@ static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm, return ret; } -static int rds_send_mprds_hash(struct rds_sock *rs, struct rds_connection *conn) +static int rds_send_mprds_hash(struct rds_sock *rs, + struct rds_connection *conn, int nonblock) { int hash; @@ -1023,10 +1024,16 @@ static int rds_send_mprds_hash(struct rds_sock *rs, struct rds_connection *conn) * used. But if we are interrupted, we have to use the zero * c_path in case the connection ends up being non-MP capable. */ - if (conn->c_npaths == 0) + if (conn->c_npaths == 0) { + /* Cannot wait for the connection be made, so just use + * the base c_path. + */ + if (nonblock) + return 0; if (wait_event_interruptible(conn->c_hs_waitq, conn->c_npaths != 0)) hash = 0; + } if (conn->c_npaths == 1) hash = 0; } @@ -1256,7 +1263,7 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) } if (conn->c_trans->t_mp_capable) - cpath = &conn->c_path[rds_send_mprds_hash(rs, conn)]; + cpath = &conn->c_path[rds_send_mprds_hash(rs, conn, nonblock)]; else cpath = &conn->c_path[0]; diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index ef9554131434..a6e6cae82c30 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -302,6 +302,7 @@ struct rxrpc_peer { /* calculated RTT cache */ #define RXRPC_RTT_CACHE_SIZE 32 + spinlock_t rtt_input_lock; /* RTT lock for input routine */ ktime_t rtt_last_req; /* Time of last RTT request */ u64 rtt; /* Current RTT estimate (in nS) */ u64 rtt_sum; /* Sum of cache contents */ @@ -442,17 +443,17 @@ struct rxrpc_connection { spinlock_t state_lock; /* state-change lock */ enum rxrpc_conn_cache_state cache_state; enum rxrpc_conn_proto_state state; /* current state of connection */ - u32 local_abort; /* local abort code */ - u32 remote_abort; /* remote abort code */ + u32 abort_code; /* Abort code of connection abort */ int debug_id; /* debug ID for printks */ atomic_t serial; /* packet serial number counter */ unsigned int hi_serial; /* highest serial number received */ u32 security_nonce; /* response re-use preventer */ - u16 service_id; /* Service ID, possibly upgraded */ + u32 service_id; /* Service ID, possibly upgraded */ u8 size_align; /* data size alignment (for security) */ u8 security_size; /* security header size */ u8 security_ix; /* security type */ u8 out_clientflag; /* RXRPC_CLIENT_INITIATED if we are client */ + short error; /* Local error code */ }; static inline bool rxrpc_to_server(const struct rxrpc_skb_priv *sp) @@ -635,6 +636,8 @@ struct rxrpc_call { bool tx_phase; /* T if transmission phase, F if receive phase */ u8 nr_jumbo_bad; /* Number of jumbo dups/exceeds-windows */ + spinlock_t input_lock; /* Lock for packet input to this call */ + /* receive-phase ACK management */ u8 ackr_reason; /* reason to ACK */ u16 ackr_skew; /* skew on packet being ACK'd */ @@ -720,8 +723,6 @@ int rxrpc_service_prealloc(struct rxrpc_sock *, gfp_t); void rxrpc_discard_prealloc(struct rxrpc_sock *); struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *, struct rxrpc_sock *, - struct rxrpc_peer *, - struct rxrpc_connection *, struct sk_buff *); void rxrpc_accept_incoming_calls(struct rxrpc_local *); struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *, unsigned long, @@ -891,8 +892,9 @@ extern unsigned long rxrpc_conn_idle_client_fast_expiry; extern struct idr rxrpc_client_conn_ids; void rxrpc_destroy_client_conn_ids(void); -int rxrpc_connect_call(struct rxrpc_call *, struct rxrpc_conn_parameters *, - struct sockaddr_rxrpc *, gfp_t); +int rxrpc_connect_call(struct rxrpc_sock *, struct rxrpc_call *, + struct rxrpc_conn_parameters *, struct sockaddr_rxrpc *, + gfp_t); void rxrpc_expose_client_call(struct rxrpc_call *); void rxrpc_disconnect_client_call(struct rxrpc_call *); void rxrpc_put_client_conn(struct rxrpc_connection *); @@ -965,7 +967,7 @@ void rxrpc_unpublish_service_conn(struct rxrpc_connection *); /* * input.c */ -void rxrpc_data_ready(struct sock *); +int rxrpc_input_packet(struct sock *, struct sk_buff *); /* * insecure.c @@ -1045,10 +1047,11 @@ void rxrpc_peer_keepalive_worker(struct work_struct *); */ struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *, const struct sockaddr_rxrpc *); -struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *, +struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *, struct rxrpc_local *, struct sockaddr_rxrpc *, gfp_t); struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *, gfp_t); -void rxrpc_new_incoming_peer(struct rxrpc_local *, struct rxrpc_peer *); +void rxrpc_new_incoming_peer(struct rxrpc_sock *, struct rxrpc_local *, + struct rxrpc_peer *); void rxrpc_destroy_all_peers(struct rxrpc_net *); struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *); struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *); diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index 9c7f26d06a52..8079aacaecac 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -287,7 +287,7 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx, (peer_tail + 1) & (RXRPC_BACKLOG_MAX - 1)); - rxrpc_new_incoming_peer(local, peer); + rxrpc_new_incoming_peer(rx, local, peer); } /* Now allocate and set up the connection */ @@ -333,11 +333,11 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx, */ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local, struct rxrpc_sock *rx, - struct rxrpc_peer *peer, - struct rxrpc_connection *conn, struct sk_buff *skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + struct rxrpc_connection *conn; + struct rxrpc_peer *peer = NULL; struct rxrpc_call *call; _enter(""); @@ -354,6 +354,13 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local, goto out; } + /* The peer, connection and call may all have sprung into existence due + * to a duplicate packet being handled on another CPU in parallel, so + * we have to recheck the routing. However, we're now holding + * rx->incoming_lock, so the values should remain stable. + */ + conn = rxrpc_find_connection_rcu(local, skb, &peer); + call = rxrpc_alloc_incoming_call(rx, local, peer, conn, skb); if (!call) { skb->mark = RXRPC_SKB_MARK_REJECT_BUSY; @@ -396,20 +403,22 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local, case RXRPC_CONN_SERVICE: write_lock(&call->state_lock); - if (rx->discard_new_call) - call->state = RXRPC_CALL_SERVER_RECV_REQUEST; - else - call->state = RXRPC_CALL_SERVER_ACCEPTING; + if (call->state < RXRPC_CALL_COMPLETE) { + if (rx->discard_new_call) + call->state = RXRPC_CALL_SERVER_RECV_REQUEST; + else + call->state = RXRPC_CALL_SERVER_ACCEPTING; + } write_unlock(&call->state_lock); break; case RXRPC_CONN_REMOTELY_ABORTED: rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED, - conn->remote_abort, -ECONNABORTED); + conn->abort_code, conn->error); break; case RXRPC_CONN_LOCALLY_ABORTED: rxrpc_abort_call("CON", call, sp->hdr.seq, - conn->local_abort, -ECONNABORTED); + conn->abort_code, conn->error); break; default: BUG(); diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 799f75b6900d..8f1a8f85b1f9 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -138,6 +138,7 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp, init_waitqueue_head(&call->waitq); spin_lock_init(&call->lock); spin_lock_init(&call->notify_lock); + spin_lock_init(&call->input_lock); rwlock_init(&call->state_lock); atomic_set(&call->usage, 1); call->debug_id = debug_id; @@ -287,7 +288,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, /* Set up or get a connection record and set the protocol parameters, * including channel number and call ID. */ - ret = rxrpc_connect_call(call, cp, srx, gfp); + ret = rxrpc_connect_call(rx, call, cp, srx, gfp); if (ret < 0) goto error; @@ -339,7 +340,7 @@ int rxrpc_retry_client_call(struct rxrpc_sock *rx, /* Set up or get a connection record and set the protocol parameters, * including channel number and call ID. */ - ret = rxrpc_connect_call(call, cp, srx, gfp); + ret = rxrpc_connect_call(rx, call, cp, srx, gfp); if (ret < 0) goto error; diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index 8acf74fe24c0..521189f4b666 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -276,7 +276,8 @@ dont_reuse: * If we return with a connection, the call will be on its waiting list. It's * left to the caller to assign a channel and wake up the call. */ -static int rxrpc_get_client_conn(struct rxrpc_call *call, +static int rxrpc_get_client_conn(struct rxrpc_sock *rx, + struct rxrpc_call *call, struct rxrpc_conn_parameters *cp, struct sockaddr_rxrpc *srx, gfp_t gfp) @@ -289,7 +290,7 @@ static int rxrpc_get_client_conn(struct rxrpc_call *call, _enter("{%d,%lx},", call->debug_id, call->user_call_ID); - cp->peer = rxrpc_lookup_peer(cp->local, srx, gfp); + cp->peer = rxrpc_lookup_peer(rx, cp->local, srx, gfp); if (!cp->peer) goto error; @@ -683,7 +684,8 @@ out: * find a connection for a call * - called in process context with IRQs enabled */ -int rxrpc_connect_call(struct rxrpc_call *call, +int rxrpc_connect_call(struct rxrpc_sock *rx, + struct rxrpc_call *call, struct rxrpc_conn_parameters *cp, struct sockaddr_rxrpc *srx, gfp_t gfp) @@ -696,7 +698,7 @@ int rxrpc_connect_call(struct rxrpc_call *call, rxrpc_discard_expired_client_conns(&rxnet->client_conn_reaper); rxrpc_cull_active_client_conns(rxnet); - ret = rxrpc_get_client_conn(call, cp, srx, gfp); + ret = rxrpc_get_client_conn(rx, call, cp, srx, gfp); if (ret < 0) goto out; diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 6df56ce68861..b6fca8ebb117 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -126,7 +126,7 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, switch (chan->last_type) { case RXRPC_PACKET_TYPE_ABORT: - _proto("Tx ABORT %%%u { %d } [re]", serial, conn->local_abort); + _proto("Tx ABORT %%%u { %d } [re]", serial, conn->abort_code); break; case RXRPC_PACKET_TYPE_ACK: trace_rxrpc_tx_ack(chan->call_debug_id, serial, @@ -153,13 +153,12 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, * pass a connection-level abort onto all calls on that connection */ static void rxrpc_abort_calls(struct rxrpc_connection *conn, - enum rxrpc_call_completion compl, - u32 abort_code, int error) + enum rxrpc_call_completion compl) { struct rxrpc_call *call; int i; - _enter("{%d},%x", conn->debug_id, abort_code); + _enter("{%d},%x", conn->debug_id, conn->abort_code); spin_lock(&conn->channel_lock); @@ -172,9 +171,11 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn, trace_rxrpc_abort(call->debug_id, "CON", call->cid, call->call_id, 0, - abort_code, error); + conn->abort_code, + conn->error); if (rxrpc_set_call_completion(call, compl, - abort_code, error)) + conn->abort_code, + conn->error)) rxrpc_notify_socket(call); } } @@ -207,10 +208,12 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn, return 0; } + conn->error = error; + conn->abort_code = abort_code; conn->state = RXRPC_CONN_LOCALLY_ABORTED; spin_unlock_bh(&conn->state_lock); - rxrpc_abort_calls(conn, RXRPC_CALL_LOCALLY_ABORTED, abort_code, error); + rxrpc_abort_calls(conn, RXRPC_CALL_LOCALLY_ABORTED); msg.msg_name = &conn->params.peer->srx.transport; msg.msg_namelen = conn->params.peer->srx.transport_len; @@ -229,7 +232,7 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn, whdr._rsvd = 0; whdr.serviceId = htons(conn->service_id); - word = htonl(conn->local_abort); + word = htonl(conn->abort_code); iov[0].iov_base = &whdr; iov[0].iov_len = sizeof(whdr); @@ -240,7 +243,7 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn, serial = atomic_inc_return(&conn->serial); whdr.serial = htonl(serial); - _proto("Tx CONN ABORT %%%u { %d }", serial, conn->local_abort); + _proto("Tx CONN ABORT %%%u { %d }", serial, conn->abort_code); ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len); if (ret < 0) { @@ -315,9 +318,10 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, abort_code = ntohl(wtmp); _proto("Rx ABORT %%%u { ac=%d }", sp->hdr.serial, abort_code); + conn->error = -ECONNABORTED; + conn->abort_code = abort_code; conn->state = RXRPC_CONN_REMOTELY_ABORTED; - rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED, - abort_code, -ECONNABORTED); + rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED); return -ECONNABORTED; case RXRPC_PACKET_TYPE_CHALLENGE: diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 800f5b8a1baa..570b49d2da42 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -216,10 +216,11 @@ static void rxrpc_send_ping(struct rxrpc_call *call, struct sk_buff *skb, /* * Apply a hard ACK by advancing the Tx window. */ -static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, +static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, struct rxrpc_ack_summary *summary) { struct sk_buff *skb, *list = NULL; + bool rot_last = false; int ix; u8 annotation; @@ -243,15 +244,17 @@ static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, skb->next = list; list = skb; - if (annotation & RXRPC_TX_ANNO_LAST) + if (annotation & RXRPC_TX_ANNO_LAST) { set_bit(RXRPC_CALL_TX_LAST, &call->flags); + rot_last = true; + } if ((annotation & RXRPC_TX_ANNO_MASK) != RXRPC_TX_ANNO_ACK) summary->nr_rot_new_acks++; } spin_unlock(&call->lock); - trace_rxrpc_transmit(call, (test_bit(RXRPC_CALL_TX_LAST, &call->flags) ? + trace_rxrpc_transmit(call, (rot_last ? rxrpc_transmit_rotate_last : rxrpc_transmit_rotate)); wake_up(&call->waitq); @@ -262,6 +265,8 @@ static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, skb->next = NULL; rxrpc_free_skb(skb, rxrpc_skb_tx_freed); } + + return rot_last; } /* @@ -273,23 +278,26 @@ static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, static bool rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun, const char *abort_why) { + unsigned int state; ASSERT(test_bit(RXRPC_CALL_TX_LAST, &call->flags)); write_lock(&call->state_lock); - switch (call->state) { + state = call->state; + switch (state) { case RXRPC_CALL_CLIENT_SEND_REQUEST: case RXRPC_CALL_CLIENT_AWAIT_REPLY: if (reply_begun) - call->state = RXRPC_CALL_CLIENT_RECV_REPLY; + call->state = state = RXRPC_CALL_CLIENT_RECV_REPLY; else - call->state = RXRPC_CALL_CLIENT_AWAIT_REPLY; + call->state = state = RXRPC_CALL_CLIENT_AWAIT_REPLY; break; case RXRPC_CALL_SERVER_AWAIT_ACK: __rxrpc_call_completed(call); rxrpc_notify_socket(call); + state = call->state; break; default: @@ -297,11 +305,10 @@ static bool rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun, } write_unlock(&call->state_lock); - if (call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY) { + if (state == RXRPC_CALL_CLIENT_AWAIT_REPLY) trace_rxrpc_transmit(call, rxrpc_transmit_await_reply); - } else { + else trace_rxrpc_transmit(call, rxrpc_transmit_end); - } _leave(" = ok"); return true; @@ -332,11 +339,11 @@ static bool rxrpc_receiving_reply(struct rxrpc_call *call) trace_rxrpc_timer(call, rxrpc_timer_init_for_reply, now); } - if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags)) - rxrpc_rotate_tx_window(call, top, &summary); if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags)) { - rxrpc_proto_abort("TXL", call, top); - return false; + if (!rxrpc_rotate_tx_window(call, top, &summary)) { + rxrpc_proto_abort("TXL", call, top); + return false; + } } if (!rxrpc_end_tx_phase(call, true, "ETD")) return false; @@ -452,13 +459,15 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb, } } + spin_lock(&call->input_lock); + /* Received data implicitly ACKs all of the request packets we sent * when we're acting as a client. */ if ((state == RXRPC_CALL_CLIENT_SEND_REQUEST || state == RXRPC_CALL_CLIENT_AWAIT_REPLY) && !rxrpc_receiving_reply(call)) - return; + goto unlock; call->ackr_prev_seq = seq; @@ -488,12 +497,16 @@ next_subpacket: if (flags & RXRPC_LAST_PACKET) { if (test_bit(RXRPC_CALL_RX_LAST, &call->flags) && - seq != call->rx_top) - return rxrpc_proto_abort("LSN", call, seq); + seq != call->rx_top) { + rxrpc_proto_abort("LSN", call, seq); + goto unlock; + } } else { if (test_bit(RXRPC_CALL_RX_LAST, &call->flags) && - after_eq(seq, call->rx_top)) - return rxrpc_proto_abort("LSA", call, seq); + after_eq(seq, call->rx_top)) { + rxrpc_proto_abort("LSA", call, seq); + goto unlock; + } } trace_rxrpc_rx_data(call->debug_id, seq, serial, flags, annotation); @@ -560,8 +573,10 @@ next_subpacket: skip: offset += len; if (flags & RXRPC_JUMBO_PACKET) { - if (skb_copy_bits(skb, offset, &flags, 1) < 0) - return rxrpc_proto_abort("XJF", call, seq); + if (skb_copy_bits(skb, offset, &flags, 1) < 0) { + rxrpc_proto_abort("XJF", call, seq); + goto unlock; + } offset += sizeof(struct rxrpc_jumbo_header); seq++; serial++; @@ -601,6 +616,9 @@ ack: trace_rxrpc_notify_socket(call->debug_id, serial); rxrpc_notify_socket(call); } + +unlock: + spin_unlock(&call->input_lock); _leave(" [queued]"); } @@ -687,15 +705,14 @@ static void rxrpc_input_ping_response(struct rxrpc_call *call, ping_time = call->ping_time; smp_rmb(); - ping_serial = call->ping_serial; + ping_serial = READ_ONCE(call->ping_serial); if (orig_serial == call->acks_lost_ping) rxrpc_input_check_for_lost_ack(call); - if (!test_bit(RXRPC_CALL_PINGING, &call->flags) || - before(orig_serial, ping_serial)) + if (before(orig_serial, ping_serial) || + !test_and_clear_bit(RXRPC_CALL_PINGING, &call->flags)) return; - clear_bit(RXRPC_CALL_PINGING, &call->flags); if (after(orig_serial, ping_serial)) return; @@ -861,15 +878,32 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, rxrpc_propose_ack_respond_to_ack); } + /* Discard any out-of-order or duplicate ACKs. */ + if (before_eq(sp->hdr.serial, call->acks_latest)) + return; + + buf.info.rxMTU = 0; ioffset = offset + nr_acks + 3; - if (skb->len >= ioffset + sizeof(buf.info)) { - if (skb_copy_bits(skb, ioffset, &buf.info, sizeof(buf.info)) < 0) - return rxrpc_proto_abort("XAI", call, 0); + if (skb->len >= ioffset + sizeof(buf.info) && + skb_copy_bits(skb, ioffset, &buf.info, sizeof(buf.info)) < 0) + return rxrpc_proto_abort("XAI", call, 0); + + spin_lock(&call->input_lock); + + /* Discard any out-of-order or duplicate ACKs. */ + if (before_eq(sp->hdr.serial, call->acks_latest)) + goto out; + call->acks_latest_ts = skb->tstamp; + call->acks_latest = sp->hdr.serial; + + /* Parse rwind and mtu sizes if provided. */ + if (buf.info.rxMTU) rxrpc_input_ackinfo(call, skb, &buf.info); - } - if (first_soft_ack == 0) - return rxrpc_proto_abort("AK0", call, 0); + if (first_soft_ack == 0) { + rxrpc_proto_abort("AK0", call, 0); + goto out; + } /* Ignore ACKs unless we are or have just been transmitting. */ switch (READ_ONCE(call->state)) { @@ -879,39 +913,35 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, case RXRPC_CALL_SERVER_AWAIT_ACK: break; default: - return; - } - - /* Discard any out-of-order or duplicate ACKs. */ - if (before_eq(sp->hdr.serial, call->acks_latest)) { - _debug("discard ACK %d <= %d", - sp->hdr.serial, call->acks_latest); - return; + goto out; } - call->acks_latest_ts = skb->tstamp; - call->acks_latest = sp->hdr.serial; if (before(hard_ack, call->tx_hard_ack) || - after(hard_ack, call->tx_top)) - return rxrpc_proto_abort("AKW", call, 0); - if (nr_acks > call->tx_top - hard_ack) - return rxrpc_proto_abort("AKN", call, 0); + after(hard_ack, call->tx_top)) { + rxrpc_proto_abort("AKW", call, 0); + goto out; + } + if (nr_acks > call->tx_top - hard_ack) { + rxrpc_proto_abort("AKN", call, 0); + goto out; + } - if (after(hard_ack, call->tx_hard_ack)) - rxrpc_rotate_tx_window(call, hard_ack, &summary); + if (after(hard_ack, call->tx_hard_ack)) { + if (rxrpc_rotate_tx_window(call, hard_ack, &summary)) { + rxrpc_end_tx_phase(call, false, "ETA"); + goto out; + } + } if (nr_acks > 0) { - if (skb_copy_bits(skb, offset, buf.acks, nr_acks) < 0) - return rxrpc_proto_abort("XSA", call, 0); + if (skb_copy_bits(skb, offset, buf.acks, nr_acks) < 0) { + rxrpc_proto_abort("XSA", call, 0); + goto out; + } rxrpc_input_soft_acks(call, buf.acks, first_soft_ack, nr_acks, &summary); } - if (test_bit(RXRPC_CALL_TX_LAST, &call->flags)) { - rxrpc_end_tx_phase(call, false, "ETA"); - return; - } - if (call->rxtx_annotations[call->tx_top & RXRPC_RXTX_BUFF_MASK] & RXRPC_TX_ANNO_LAST && summary.nr_acks == call->tx_top - hard_ack && @@ -920,7 +950,9 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, false, true, rxrpc_propose_ack_ping_for_lost_reply); - return rxrpc_congestion_management(call, skb, &summary, acked_serial); + rxrpc_congestion_management(call, skb, &summary, acked_serial); +out: + spin_unlock(&call->input_lock); } /* @@ -933,9 +965,12 @@ static void rxrpc_input_ackall(struct rxrpc_call *call, struct sk_buff *skb) _proto("Rx ACKALL %%%u", sp->hdr.serial); - rxrpc_rotate_tx_window(call, call->tx_top, &summary); - if (test_bit(RXRPC_CALL_TX_LAST, &call->flags)) + spin_lock(&call->input_lock); + + if (rxrpc_rotate_tx_window(call, call->tx_top, &summary)) rxrpc_end_tx_phase(call, false, "ETL"); + + spin_unlock(&call->input_lock); } /* @@ -1018,18 +1053,19 @@ static void rxrpc_input_call_packet(struct rxrpc_call *call, } /* - * Handle a new call on a channel implicitly completing the preceding call on - * that channel. + * Handle a new service call on a channel implicitly completing the preceding + * call on that channel. This does not apply to client conns. * * TODO: If callNumber > call_id + 1, renegotiate security. */ -static void rxrpc_input_implicit_end_call(struct rxrpc_connection *conn, +static void rxrpc_input_implicit_end_call(struct rxrpc_sock *rx, + struct rxrpc_connection *conn, struct rxrpc_call *call) { switch (READ_ONCE(call->state)) { case RXRPC_CALL_SERVER_AWAIT_ACK: rxrpc_call_completed(call); - break; + /* Fall through */ case RXRPC_CALL_COMPLETE: break; default: @@ -1037,11 +1073,13 @@ static void rxrpc_input_implicit_end_call(struct rxrpc_connection *conn, set_bit(RXRPC_CALL_EV_ABORT, &call->events); rxrpc_queue_call(call); } + trace_rxrpc_improper_term(call); break; } - trace_rxrpc_improper_term(call); + spin_lock(&rx->incoming_lock); __rxrpc_disconnect_call(conn, call); + spin_unlock(&rx->incoming_lock); rxrpc_notify_socket(call); } @@ -1120,8 +1158,10 @@ int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb) * The socket is locked by the caller and this prevents the socket from being * shut down and the local endpoint from going away, thus sk_user_data will not * be cleared until this function returns. + * + * Called with the RCU read lock held from the IP layer via UDP. */ -void rxrpc_data_ready(struct sock *udp_sk) +int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb) { struct rxrpc_connection *conn; struct rxrpc_channel *chan; @@ -1130,38 +1170,17 @@ void rxrpc_data_ready(struct sock *udp_sk) struct rxrpc_local *local = udp_sk->sk_user_data; struct rxrpc_peer *peer = NULL; struct rxrpc_sock *rx = NULL; - struct sk_buff *skb; unsigned int channel; - int ret, skew = 0; + int skew = 0; _enter("%p", udp_sk); - ASSERT(!irqs_disabled()); - - skb = skb_recv_udp(udp_sk, 0, 1, &ret); - if (!skb) { - if (ret == -EAGAIN) - return; - _debug("UDP socket error %d", ret); - return; - } - if (skb->tstamp == 0) skb->tstamp = ktime_get_real(); rxrpc_new_skb(skb, rxrpc_skb_rx_received); - _net("recv skb %p", skb); - - /* we'll probably need to checksum it (didn't call sock_recvmsg) */ - if (skb_checksum_complete(skb)) { - rxrpc_free_skb(skb, rxrpc_skb_rx_freed); - __UDP_INC_STATS(&init_net, UDP_MIB_INERRORS, 0); - _leave(" [CSUM failed]"); - return; - } - - __UDP_INC_STATS(&init_net, UDP_MIB_INDATAGRAMS, 0); + skb_pull(skb, sizeof(struct udphdr)); /* The UDP protocol already released all skb resources; * we are free to add our own data there. @@ -1176,11 +1195,13 @@ void rxrpc_data_ready(struct sock *udp_sk) static int lose; if ((lose++ & 7) == 7) { trace_rxrpc_rx_lose(sp); - rxrpc_lose_skb(skb, rxrpc_skb_rx_lost); - return; + rxrpc_free_skb(skb, rxrpc_skb_rx_lost); + return 0; } } + if (skb->tstamp == 0) + skb->tstamp = ktime_get_real(); trace_rxrpc_rx_packet(sp); switch (sp->hdr.type) { @@ -1234,8 +1255,6 @@ void rxrpc_data_ready(struct sock *udp_sk) if (sp->hdr.serviceId == 0) goto bad_message; - rcu_read_lock(); - if (rxrpc_to_server(sp)) { /* Weed out packets to services we're not offering. Packets * that would begin a call are explicitly rejected and the rest @@ -1247,7 +1266,7 @@ void rxrpc_data_ready(struct sock *udp_sk) if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA && sp->hdr.seq == 1) goto unsupported_service; - goto discard_unlock; + goto discard; } } @@ -1257,17 +1276,23 @@ void rxrpc_data_ready(struct sock *udp_sk) goto wrong_security; if (sp->hdr.serviceId != conn->service_id) { - if (!test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags) || - conn->service_id != conn->params.service_id) + int old_id; + + if (!test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags)) + goto reupgrade; + old_id = cmpxchg(&conn->service_id, conn->params.service_id, + sp->hdr.serviceId); + + if (old_id != conn->params.service_id && + old_id != sp->hdr.serviceId) goto reupgrade; - conn->service_id = sp->hdr.serviceId; } if (sp->hdr.callNumber == 0) { /* Connection-level packet */ _debug("CONN %p {%d}", conn, conn->debug_id); rxrpc_post_packet_to_conn(conn, skb); - goto out_unlock; + goto out; } /* Note the serial number skew here */ @@ -1286,19 +1311,19 @@ void rxrpc_data_ready(struct sock *udp_sk) /* Ignore really old calls */ if (sp->hdr.callNumber < chan->last_call) - goto discard_unlock; + goto discard; if (sp->hdr.callNumber == chan->last_call) { if (chan->call || sp->hdr.type == RXRPC_PACKET_TYPE_ABORT) - goto discard_unlock; + goto discard; /* For the previous service call, if completed * successfully, we discard all further packets. */ if (rxrpc_conn_is_service(conn) && chan->last_type == RXRPC_PACKET_TYPE_ACK) - goto discard_unlock; + goto discard; /* But otherwise we need to retransmit the final packet * from data cached in the connection record. @@ -1309,18 +1334,16 @@ void rxrpc_data_ready(struct sock *udp_sk) sp->hdr.serial, sp->hdr.flags, 0); rxrpc_post_packet_to_conn(conn, skb); - goto out_unlock; + goto out; } call = rcu_dereference(chan->call); if (sp->hdr.callNumber > chan->call_id) { - if (rxrpc_to_client(sp)) { - rcu_read_unlock(); + if (rxrpc_to_client(sp)) goto reject_packet; - } if (call) - rxrpc_input_implicit_end_call(conn, call); + rxrpc_input_implicit_end_call(rx, conn, call); call = NULL; } @@ -1337,55 +1360,42 @@ void rxrpc_data_ready(struct sock *udp_sk) if (!call || atomic_read(&call->usage) == 0) { if (rxrpc_to_client(sp) || sp->hdr.type != RXRPC_PACKET_TYPE_DATA) - goto bad_message_unlock; + goto bad_message; if (sp->hdr.seq != 1) - goto discard_unlock; - call = rxrpc_new_incoming_call(local, rx, peer, conn, skb); - if (!call) { - rcu_read_unlock(); + goto discard; + call = rxrpc_new_incoming_call(local, rx, skb); + if (!call) goto reject_packet; - } rxrpc_send_ping(call, skb, skew); mutex_unlock(&call->user_mutex); } rxrpc_input_call_packet(call, skb, skew); - goto discard_unlock; + goto discard; -discard_unlock: - rcu_read_unlock(); discard: rxrpc_free_skb(skb, rxrpc_skb_rx_freed); out: trace_rxrpc_rx_done(0, 0); - return; - -out_unlock: - rcu_read_unlock(); - goto out; + return 0; wrong_security: - rcu_read_unlock(); trace_rxrpc_abort(0, "SEC", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, RXKADINCONSISTENCY, EBADMSG); skb->priority = RXKADINCONSISTENCY; goto post_abort; unsupported_service: - rcu_read_unlock(); trace_rxrpc_abort(0, "INV", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, RX_INVALID_OPERATION, EOPNOTSUPP); skb->priority = RX_INVALID_OPERATION; goto post_abort; reupgrade: - rcu_read_unlock(); trace_rxrpc_abort(0, "UPG", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, RX_PROTOCOL_ERROR, EBADMSG); goto protocol_error; -bad_message_unlock: - rcu_read_unlock(); bad_message: trace_rxrpc_abort(0, "BAD", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, RX_PROTOCOL_ERROR, EBADMSG); @@ -1397,4 +1407,5 @@ reject_packet: trace_rxrpc_rx_done(skb->mark, skb->priority); rxrpc_reject_packet(local, skb); _leave(" [badmsg]"); + return 0; } diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index 94d234e9c685..0906e51d3cfb 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -19,6 +19,7 @@ #include <linux/ip.h> #include <linux/hashtable.h> #include <net/sock.h> +#include <net/udp.h> #include <net/af_rxrpc.h> #include "ar-internal.h" @@ -108,7 +109,7 @@ static struct rxrpc_local *rxrpc_alloc_local(struct rxrpc_net *rxnet, */ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net) { - struct sock *sock; + struct sock *usk; int ret, opt; _enter("%p{%d,%d}", @@ -122,6 +123,28 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net) return ret; } + /* set the socket up */ + usk = local->socket->sk; + inet_sk(usk)->mc_loop = 0; + + /* Enable CHECKSUM_UNNECESSARY to CHECKSUM_COMPLETE conversion */ + inet_inc_convert_csum(usk); + + rcu_assign_sk_user_data(usk, local); + + udp_sk(usk)->encap_type = UDP_ENCAP_RXRPC; + udp_sk(usk)->encap_rcv = rxrpc_input_packet; + udp_sk(usk)->encap_destroy = NULL; + udp_sk(usk)->gro_receive = NULL; + udp_sk(usk)->gro_complete = NULL; + + udp_encap_enable(); +#if IS_ENABLED(CONFIG_AF_RXRPC_IPV6) + if (local->srx.transport.family == AF_INET6) + udpv6_encap_enable(); +#endif + usk->sk_error_report = rxrpc_error_report; + /* if a local address was supplied then bind it */ if (local->srx.transport_len > sizeof(sa_family_t)) { _debug("bind"); @@ -191,11 +214,6 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net) BUG(); } - /* set the socket up */ - sock = local->socket->sk; - sock->sk_user_data = local; - sock->sk_data_ready = rxrpc_data_ready; - sock->sk_error_report = rxrpc_error_report; _leave(" = 0"); return 0; diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index e8fb8922bca8..a141ee3ab812 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -572,7 +572,8 @@ void rxrpc_reject_packets(struct rxrpc_local *local) whdr.flags ^= RXRPC_CLIENT_INITIATED; whdr.flags &= RXRPC_CLIENT_INITIATED; - ret = kernel_sendmsg(local->socket, &msg, iov, 2, size); + ret = kernel_sendmsg(local->socket, &msg, + iov, ioc, size); if (ret < 0) trace_rxrpc_tx_fail(local->debug_id, 0, ret, rxrpc_tx_point_reject); diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c index f3e6fc670da2..bd2fa3b7caa7 100644 --- a/net/rxrpc/peer_event.c +++ b/net/rxrpc/peer_event.c @@ -195,6 +195,7 @@ void rxrpc_error_report(struct sock *sk) rxrpc_store_error(peer, serr); rcu_read_unlock(); rxrpc_free_skb(skb, rxrpc_skb_rx_freed); + rxrpc_put_peer(peer); _leave(""); } @@ -301,6 +302,8 @@ void rxrpc_peer_add_rtt(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why, if (rtt < 0) return; + spin_lock(&peer->rtt_input_lock); + /* Replace the oldest datum in the RTT buffer */ sum -= peer->rtt_cache[cursor]; sum += rtt; @@ -312,6 +315,8 @@ void rxrpc_peer_add_rtt(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why, peer->rtt_usage = usage; } + spin_unlock(&peer->rtt_input_lock); + /* Now recalculate the average */ if (usage == RXRPC_RTT_CACHE_SIZE) { avg = sum / RXRPC_RTT_CACHE_SIZE; @@ -320,6 +325,7 @@ void rxrpc_peer_add_rtt(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why, do_div(avg, usage); } + /* Don't need to update this under lock */ peer->rtt = avg; trace_rxrpc_rtt_rx(call, why, send_serial, resp_serial, rtt, usage, avg); diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c index 01a9febfa367..5691b7d266ca 100644 --- a/net/rxrpc/peer_object.c +++ b/net/rxrpc/peer_object.c @@ -153,8 +153,10 @@ struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *local, * assess the MTU size for the network interface through which this peer is * reached */ -static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer) +static void rxrpc_assess_MTU_size(struct rxrpc_sock *rx, + struct rxrpc_peer *peer) { + struct net *net = sock_net(&rx->sk); struct dst_entry *dst; struct rtable *rt; struct flowi fl; @@ -169,7 +171,7 @@ static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer) switch (peer->srx.transport.family) { case AF_INET: rt = ip_route_output_ports( - &init_net, fl4, NULL, + net, fl4, NULL, peer->srx.transport.sin.sin_addr.s_addr, 0, htons(7000), htons(7001), IPPROTO_UDP, 0, 0); if (IS_ERR(rt)) { @@ -188,7 +190,7 @@ static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer) sizeof(struct in6_addr)); fl6->fl6_dport = htons(7001); fl6->fl6_sport = htons(7000); - dst = ip6_route_output(&init_net, NULL, fl6); + dst = ip6_route_output(net, NULL, fl6); if (dst->error) { _leave(" [route err %d]", dst->error); return; @@ -223,6 +225,7 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp) peer->service_conns = RB_ROOT; seqlock_init(&peer->service_conn_lock); spin_lock_init(&peer->lock); + spin_lock_init(&peer->rtt_input_lock); peer->debug_id = atomic_inc_return(&rxrpc_debug_id); if (RXRPC_TX_SMSS > 2190) @@ -240,10 +243,11 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp) /* * Initialise peer record. */ -static void rxrpc_init_peer(struct rxrpc_peer *peer, unsigned long hash_key) +static void rxrpc_init_peer(struct rxrpc_sock *rx, struct rxrpc_peer *peer, + unsigned long hash_key) { peer->hash_key = hash_key; - rxrpc_assess_MTU_size(peer); + rxrpc_assess_MTU_size(rx, peer); peer->mtu = peer->if_mtu; peer->rtt_last_req = ktime_get_real(); @@ -275,7 +279,8 @@ static void rxrpc_init_peer(struct rxrpc_peer *peer, unsigned long hash_key) /* * Set up a new peer. */ -static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_local *local, +static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_sock *rx, + struct rxrpc_local *local, struct sockaddr_rxrpc *srx, unsigned long hash_key, gfp_t gfp) @@ -287,7 +292,7 @@ static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_local *local, peer = rxrpc_alloc_peer(local, gfp); if (peer) { memcpy(&peer->srx, srx, sizeof(*srx)); - rxrpc_init_peer(peer, hash_key); + rxrpc_init_peer(rx, peer, hash_key); } _leave(" = %p", peer); @@ -299,14 +304,15 @@ static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_local *local, * since we've already done a search in the list from the non-reentrant context * (the data_ready handler) that is the only place we can add new peers. */ -void rxrpc_new_incoming_peer(struct rxrpc_local *local, struct rxrpc_peer *peer) +void rxrpc_new_incoming_peer(struct rxrpc_sock *rx, struct rxrpc_local *local, + struct rxrpc_peer *peer) { struct rxrpc_net *rxnet = local->rxnet; unsigned long hash_key; hash_key = rxrpc_peer_hash_key(local, &peer->srx); peer->local = local; - rxrpc_init_peer(peer, hash_key); + rxrpc_init_peer(rx, peer, hash_key); spin_lock(&rxnet->peer_hash_lock); hash_add_rcu(rxnet->peer_hash, &peer->hash_link, hash_key); @@ -317,7 +323,8 @@ void rxrpc_new_incoming_peer(struct rxrpc_local *local, struct rxrpc_peer *peer) /* * obtain a remote transport endpoint for the specified address */ -struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local, +struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *rx, + struct rxrpc_local *local, struct sockaddr_rxrpc *srx, gfp_t gfp) { struct rxrpc_peer *peer, *candidate; @@ -337,7 +344,7 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local, /* The peer is not yet present in hash - create a candidate * for a new record and then redo the search. */ - candidate = rxrpc_create_peer(local, srx, hash_key, gfp); + candidate = rxrpc_create_peer(rx, local, srx, hash_key, gfp); if (!candidate) { _leave(" = NULL [nomem]"); return NULL; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 0a75cb2e5e7b..70f144ac5e1d 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -31,6 +31,8 @@ #include <net/pkt_sched.h> #include <net/pkt_cls.h> +extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1]; + /* The list of all installed classifier types */ static LIST_HEAD(tcf_proto_base); @@ -1211,7 +1213,7 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n, replay: tp_created = 0; - err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL, extack); + err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack); if (err < 0) return err; @@ -1360,7 +1362,7 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n, if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) return -EPERM; - err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL, extack); + err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack); if (err < 0) return err; @@ -1475,7 +1477,7 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *fh = NULL; int err; - err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL, extack); + err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack); if (err < 0) return err; @@ -1838,7 +1840,7 @@ static int tc_ctl_chain(struct sk_buff *skb, struct nlmsghdr *n, return -EPERM; replay: - err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL, extack); + err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack); if (err < 0) return err; @@ -1949,7 +1951,8 @@ static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb) if (nlmsg_len(cb->nlh) < sizeof(*tcm)) return skb->len; - err = nlmsg_parse(cb->nlh, sizeof(*tcm), tca, TCA_MAX, NULL, NULL); + err = nlmsg_parse(cb->nlh, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy, + NULL); if (err) return err; diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index f218ccf1e2d9..b2c3406a2cf2 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -398,6 +398,7 @@ static int u32_init(struct tcf_proto *tp) rcu_assign_pointer(tp_c->hlist, root_ht); root_ht->tp_c = tp_c; + root_ht->refcnt++; rcu_assign_pointer(tp->root, root_ht); tp->data = tp_c; return 0; @@ -610,7 +611,7 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht, struct tc_u_hnode __rcu **hn; struct tc_u_hnode *phn; - WARN_ON(ht->refcnt); + WARN_ON(--ht->refcnt); u32_clear_hnode(tp, ht, extack); @@ -649,7 +650,7 @@ static void u32_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) WARN_ON(root_ht == NULL); - if (root_ht && --root_ht->refcnt == 0) + if (root_ht && --root_ht->refcnt == 1) u32_destroy_hnode(tp, root_ht, extack); if (--tp_c->refcnt == 0) { @@ -698,7 +699,6 @@ static int u32_delete(struct tcf_proto *tp, void *arg, bool *last, } if (ht->refcnt == 1) { - ht->refcnt--; u32_destroy_hnode(tp, ht, extack); } else { NL_SET_ERR_MSG_MOD(extack, "Can not delete in-use filter"); @@ -708,11 +708,11 @@ static int u32_delete(struct tcf_proto *tp, void *arg, bool *last, out: *last = true; if (root_ht) { - if (root_ht->refcnt > 1) { + if (root_ht->refcnt > 2) { *last = false; goto ret; } - if (root_ht->refcnt == 1) { + if (root_ht->refcnt == 2) { if (!ht_empty(root_ht)) { *last = false; goto ret; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 85e73f48e48f..3dc0acf54245 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1307,10 +1307,6 @@ check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w) return 0; } -/* - * Delete/get qdisc. - */ - const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = { [TCA_KIND] = { .type = NLA_STRING }, [TCA_OPTIONS] = { .type = NLA_NESTED }, @@ -1323,6 +1319,10 @@ const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = { [TCA_EGRESS_BLOCK] = { .type = NLA_U32 }, }; +/* + * Delete/get qdisc. + */ + static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, struct netlink_ext_ack *extack) { @@ -2059,7 +2059,8 @@ static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb, if (tcm->tcm_parent) { q = qdisc_match_from_root(root, TC_H_MAJ(tcm->tcm_parent)); - if (q && tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0) + if (q && q != root && + tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0) return -1; return 0; } diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index c07c30b916d5..793016d722ec 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -2644,7 +2644,7 @@ static int cake_init(struct Qdisc *sch, struct nlattr *opt, for (i = 1; i <= CAKE_QUEUES; i++) quantum_div[i] = 65535 / i; - q->tins = kvzalloc(CAKE_MAX_TINS * sizeof(struct cake_tin_data), + q->tins = kvcalloc(CAKE_MAX_TINS, sizeof(struct cake_tin_data), GFP_KERNEL); if (!q->tins) goto nomem; diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 297d9cf960b9..a827a1f562bf 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1450,7 +1450,8 @@ void sctp_assoc_sync_pmtu(struct sctp_association *asoc) /* Get the lowest pmtu of all the transports. */ list_for_each_entry(t, &asoc->peer.transport_addr_list, transports) { if (t->pmtu_pending && t->dst) { - sctp_transport_update_pmtu(t, sctp_dst_mtu(t->dst)); + sctp_transport_update_pmtu(t, + atomic_read(&t->mtu_info)); t->pmtu_pending = 0; } if (!pmtu || (t->pathmtu < pmtu)) diff --git a/net/sctp/input.c b/net/sctp/input.c index 9bbc5f92c941..5c36a99882ed 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -395,6 +395,7 @@ void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc, return; if (sock_owned_by_user(sk)) { + atomic_set(&t->mtu_info, pmtu); asoc->pmtu_pending = 1; t->pmtu_pending = 1; return; diff --git a/net/sctp/output.c b/net/sctp/output.c index 7f849b01ec8e..67939ad99c01 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -120,6 +120,12 @@ void sctp_packet_config(struct sctp_packet *packet, __u32 vtag, sctp_assoc_sync_pmtu(asoc); } + if (asoc->pmtu_pending) { + if (asoc->param_flags & SPP_PMTUD_ENABLE) + sctp_assoc_sync_pmtu(asoc); + asoc->pmtu_pending = 0; + } + /* If there a is a prepend chunk stick it on the list before * any other chunks get appended. */ diff --git a/net/sctp/socket.c b/net/sctp/socket.c index f73e9d38d5ba..c1c1bda334a4 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -271,11 +271,10 @@ struct sctp_association *sctp_id2assoc(struct sock *sk, sctp_assoc_t id) spin_lock_bh(&sctp_assocs_id_lock); asoc = (struct sctp_association *)idr_find(&sctp_assocs_id, (int)id); + if (asoc && (asoc->base.sk != sk || asoc->base.dead)) + asoc = NULL; spin_unlock_bh(&sctp_assocs_id_lock); - if (!asoc || (asoc->base.sk != sk) || asoc->base.dead) - return NULL; - return asoc; } @@ -1946,8 +1945,10 @@ static int sctp_sendmsg_to_asoc(struct sctp_association *asoc, if (sp->strm_interleave) { timeo = sock_sndtimeo(sk, 0); err = sctp_wait_for_connect(asoc, &timeo); - if (err) + if (err) { + err = -ESRCH; goto err; + } } else { wait_connect = true; } @@ -7100,14 +7101,14 @@ static int sctp_getsockopt_pr_assocstatus(struct sock *sk, int len, } policy = params.sprstat_policy; - if (policy & ~SCTP_PR_SCTP_MASK) + if (!policy || (policy & ~(SCTP_PR_SCTP_MASK | SCTP_PR_SCTP_ALL))) goto out; asoc = sctp_id2assoc(sk, params.sprstat_assoc_id); if (!asoc) goto out; - if (policy == SCTP_PR_SCTP_NONE) { + if (policy & SCTP_PR_SCTP_ALL) { params.sprstat_abandoned_unsent = 0; params.sprstat_abandoned_sent = 0; for (policy = 0; policy <= SCTP_PR_INDEX(MAX); policy++) { @@ -7159,7 +7160,7 @@ static int sctp_getsockopt_pr_streamstatus(struct sock *sk, int len, } policy = params.sprstat_policy; - if (policy & ~SCTP_PR_SCTP_MASK) + if (!policy || (policy & ~(SCTP_PR_SCTP_MASK | SCTP_PR_SCTP_ALL))) goto out; asoc = sctp_id2assoc(sk, params.sprstat_assoc_id); @@ -7175,7 +7176,7 @@ static int sctp_getsockopt_pr_streamstatus(struct sock *sk, int len, goto out; } - if (policy == SCTP_PR_SCTP_NONE) { + if (policy == SCTP_PR_SCTP_ALL) { params.sprstat_abandoned_unsent = 0; params.sprstat_abandoned_sent = 0; for (policy = 0; policy <= SCTP_PR_INDEX(MAX); policy++) { diff --git a/net/socket.c b/net/socket.c index 01f3f8f32d6f..390a8ecef4bf 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2875,9 +2875,14 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32) copy_in_user(&rxnfc->fs.ring_cookie, &compat_rxnfc->fs.ring_cookie, (void __user *)(&rxnfc->fs.location + 1) - - (void __user *)&rxnfc->fs.ring_cookie) || - copy_in_user(&rxnfc->rule_cnt, &compat_rxnfc->rule_cnt, - sizeof(rxnfc->rule_cnt))) + (void __user *)&rxnfc->fs.ring_cookie)) + return -EFAULT; + if (ethcmd == ETHTOOL_GRXCLSRLALL) { + if (put_user(rule_cnt, &rxnfc->rule_cnt)) + return -EFAULT; + } else if (copy_in_user(&rxnfc->rule_cnt, + &compat_rxnfc->rule_cnt, + sizeof(rxnfc->rule_cnt))) return -EFAULT; } diff --git a/net/tipc/group.c b/net/tipc/group.c index e82f13cb2dc5..06fee142f09f 100644 --- a/net/tipc/group.c +++ b/net/tipc/group.c @@ -666,6 +666,7 @@ static void tipc_group_create_event(struct tipc_group *grp, struct sk_buff *skb; struct tipc_msg *hdr; + memset(&evt, 0, sizeof(evt)); evt.event = event; evt.found_lower = m->instance; evt.found_upper = m->instance; diff --git a/net/tipc/link.c b/net/tipc/link.c index fb886b525d95..201c3b5bc96b 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -477,6 +477,8 @@ bool tipc_link_create(struct net *net, char *if_name, int bearer_id, l->in_session = false; l->bearer_id = bearer_id; l->tolerance = tolerance; + if (bc_rcvlink) + bc_rcvlink->tolerance = tolerance; l->net_plane = net_plane; l->advertised_mtu = mtu; l->mtu = mtu; @@ -843,14 +845,21 @@ static void link_prepare_wakeup(struct tipc_link *l) void tipc_link_reset(struct tipc_link *l) { + struct sk_buff_head list; + + __skb_queue_head_init(&list); + l->in_session = false; l->session++; l->mtu = l->advertised_mtu; + spin_lock_bh(&l->wakeupq.lock); + skb_queue_splice_init(&l->wakeupq, &list); + spin_unlock_bh(&l->wakeupq.lock); + spin_lock_bh(&l->inputq->lock); - skb_queue_splice_init(&l->wakeupq, l->inputq); + skb_queue_splice_init(&list, l->inputq); spin_unlock_bh(&l->inputq->lock); - spin_unlock_bh(&l->wakeupq.lock); __skb_queue_purge(&l->transmq); __skb_queue_purge(&l->deferdq); @@ -1031,7 +1040,8 @@ static int tipc_link_retrans(struct tipc_link *l, struct tipc_link *r, /* Detect repeated retransmit failures on same packet */ if (r->last_retransm != buf_seqno(skb)) { r->last_retransm = buf_seqno(skb); - r->stale_limit = jiffies + msecs_to_jiffies(l->tolerance); + r->stale_limit = jiffies + msecs_to_jiffies(r->tolerance); + r->stale_cnt = 0; } else if (++r->stale_cnt > 99 && time_after(jiffies, r->stale_limit)) { link_retransmit_failure(l, skb); if (link_is_bc_sndlink(l)) @@ -1576,9 +1586,10 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, strncpy(if_name, data, TIPC_MAX_IF_NAME); /* Update own tolerance if peer indicates a non-zero value */ - if (in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL)) + if (in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL)) { l->tolerance = peers_tol; - + l->bc_rcvlink->tolerance = peers_tol; + } /* Update own priority if peer's priority is higher */ if (in_range(peers_prio, l->priority + 1, TIPC_MAX_LINK_PRI)) l->priority = peers_prio; @@ -1604,9 +1615,10 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, l->rcv_nxt_state = msg_seqno(hdr) + 1; /* Update own tolerance if peer indicates a non-zero value */ - if (in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL)) + if (in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL)) { l->tolerance = peers_tol; - + l->bc_rcvlink->tolerance = peers_tol; + } /* Update own prio if peer indicates a different value */ if ((peers_prio != l->priority) && in_range(peers_prio, 1, TIPC_MAX_LINK_PRI)) { @@ -2223,6 +2235,8 @@ void tipc_link_set_tolerance(struct tipc_link *l, u32 tol, struct sk_buff_head *xmitq) { l->tolerance = tol; + if (l->bc_rcvlink) + l->bc_rcvlink->tolerance = tol; if (link_is_up(l)) tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, tol, 0, xmitq); } diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index 51b4b96f89db..3cfeb9df64b0 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -115,7 +115,7 @@ struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *publ) struct sk_buff *buf; struct distr_item *item; - list_del(&publ->binding_node); + list_del_rcu(&publ->binding_node); if (publ->scope == TIPC_NODE_SCOPE) return NULL; @@ -147,7 +147,7 @@ static void named_distribute(struct net *net, struct sk_buff_head *list, ITEM_SIZE) * ITEM_SIZE; u32 msg_rem = msg_dsz; - list_for_each_entry(publ, pls, binding_node) { + list_for_each_entry_rcu(publ, pls, binding_node) { /* Prepare next buffer: */ if (!skb) { skb = named_prepare_buf(net, PUBLICATION, msg_rem, diff --git a/net/tipc/socket.c b/net/tipc/socket.c index b6f99b021d09..49810fdff4c5 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1196,6 +1196,7 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq, * @skb: pointer to message buffer. */ static void tipc_sk_conn_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb, + struct sk_buff_head *inputq, struct sk_buff_head *xmitq) { struct tipc_msg *hdr = buf_msg(skb); @@ -1213,7 +1214,16 @@ static void tipc_sk_conn_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb, tipc_node_remove_conn(sock_net(sk), tsk_peer_node(tsk), tsk_peer_port(tsk)); sk->sk_state_change(sk); - goto exit; + + /* State change is ignored if socket already awake, + * - convert msg to abort msg and add to inqueue + */ + msg_set_user(hdr, TIPC_CRITICAL_IMPORTANCE); + msg_set_type(hdr, TIPC_CONN_MSG); + msg_set_size(hdr, BASIC_H_SIZE); + msg_set_hdr_sz(hdr, BASIC_H_SIZE); + __skb_queue_tail(inputq, skb); + return; } tsk->probe_unacked = false; @@ -1936,7 +1946,7 @@ static void tipc_sk_proto_rcv(struct sock *sk, switch (msg_user(hdr)) { case CONN_MANAGER: - tipc_sk_conn_proto_rcv(tsk, skb, xmitq); + tipc_sk_conn_proto_rcv(tsk, skb, inputq, xmitq); return; case SOCK_WAKEUP: tipc_dest_del(&tsk->cong_links, msg_orignode(hdr), 0); diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 4e937cd7c17d..661504042d30 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -744,6 +744,8 @@ static int xsk_create(struct net *net, struct socket *sock, int protocol, sk->sk_destruct = xsk_destruct; sk_refcnt_debug_inc(sk); + sock_set_flag(sk, SOCK_RCU_FREE); + xs = xdp_sk(sk); mutex_init(&xs->mutex); spin_lock_init(&xs->tx_completion_lock); diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c index 31acc6f33d98..6f05e831a73e 100644 --- a/net/xfrm/xfrm_interface.c +++ b/net/xfrm/xfrm_interface.c @@ -116,6 +116,9 @@ static void xfrmi_unlink(struct xfrmi_net *xfrmn, struct xfrm_if *xi) static void xfrmi_dev_free(struct net_device *dev) { + struct xfrm_if *xi = netdev_priv(dev); + + gro_cells_destroy(&xi->gro_cells); free_percpu(dev->tstats); } diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index f094d4b3520d..119a427d9b2b 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -632,9 +632,9 @@ static void xfrm_hash_rebuild(struct work_struct *work) break; } if (newpos) - hlist_add_behind(&policy->bydst, newpos); + hlist_add_behind_rcu(&policy->bydst, newpos); else - hlist_add_head(&policy->bydst, chain); + hlist_add_head_rcu(&policy->bydst, chain); } spin_unlock_bh(&net->xfrm.xfrm_policy_lock); @@ -774,9 +774,9 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) break; } if (newpos) - hlist_add_behind(&policy->bydst, newpos); + hlist_add_behind_rcu(&policy->bydst, newpos); else - hlist_add_head(&policy->bydst, chain); + hlist_add_head_rcu(&policy->bydst, chain); __xfrm_policy_link(policy, dir); /* After previous checking, family can either be AF_INET or AF_INET6 */ diff --git a/samples/Kconfig b/samples/Kconfig index bd133efc1a56..ad1ec7016d4c 100644 --- a/samples/Kconfig +++ b/samples/Kconfig @@ -1,5 +1,6 @@ menuconfig SAMPLES bool "Sample kernel code" + depends on !UML help You can build and test sample kernel code here. diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 5a2d1c9578a0..54da4b070db3 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -219,7 +219,7 @@ else sub_cmd_record_mcount = set -e ; perl $(srctree)/scripts/recordmcount.pl "$(ARCH)" \ "$(if $(CONFIG_CPU_BIG_ENDIAN),big,little)" \ "$(if $(CONFIG_64BIT),64,32)" \ - "$(OBJDUMP)" "$(OBJCOPY)" "$(CC) $(KBUILD_CFLAGS)" \ + "$(OBJDUMP)" "$(OBJCOPY)" "$(CC) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS)" \ "$(LD) $(KBUILD_LDFLAGS)" "$(NM)" "$(RM)" "$(MV)" \ "$(if $(part-of-module),1,0)" "$(@)"; recordmcount_source := $(srctree)/scripts/recordmcount.pl diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 86299efa804a..fd23d5778ea1 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -377,6 +377,7 @@ struct kvm_sync_regs { #define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0) #define KVM_X86_QUIRK_CD_NW_CLEARED (1 << 1) +#define KVM_X86_QUIRK_LAPIC_MMIO_HOLE (1 << 2) #define KVM_STATE_NESTED_GUEST_MODE 0x00000001 #define KVM_STATE_NESTED_RUN_PENDING 0x00000002 diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 07548de5c988..251be353f950 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -952,6 +952,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_S390_HPAGE_1M 156 #define KVM_CAP_NESTED_STATE 157 #define KVM_CAP_ARM_INJECT_SERROR_ESR 158 +#define KVM_CAP_MSR_PLATFORM_INFO 159 #ifdef KVM_CAP_IRQ_ROUTING diff --git a/tools/lib/api/fs/tracing_path.c b/tools/lib/api/fs/tracing_path.c index 120037496f77..5afb11b30fca 100644 --- a/tools/lib/api/fs/tracing_path.c +++ b/tools/lib/api/fs/tracing_path.c @@ -36,7 +36,7 @@ static const char *tracing_path_tracefs_mount(void) __tracing_path_set("", mnt); - return mnt; + return tracing_path; } static const char *tracing_path_debugfs_mount(void) @@ -49,7 +49,7 @@ static const char *tracing_path_debugfs_mount(void) __tracing_path_set("tracing/", mnt); - return mnt; + return tracing_path; } const char *tracing_path_mount(void) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index f6d1a03c7523..e30d20fb482d 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -833,7 +833,7 @@ ifndef NO_JVMTI JDIR=$(shell /usr/sbin/update-java-alternatives -l | head -1 | awk '{print $$3}') else ifneq (,$(wildcard /usr/sbin/alternatives)) - JDIR=$(shell alternatives --display java | tail -1 | cut -d' ' -f 5 | sed 's%/jre/bin/java.%%g') + JDIR=$(shell /usr/sbin/alternatives --display java | tail -1 | cut -d' ' -f 5 | sed 's%/jre/bin/java.%%g') endif endif ifndef JDIR diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 5224ade3d5af..0be411695379 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -635,7 +635,7 @@ $(LIBPERF_IN): prepare FORCE $(LIB_FILE): $(LIBPERF_IN) $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIBPERF_IN) $(LIB_OBJS) -LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ) +LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ) 'EXTRA_CFLAGS=$(EXTRA_CFLAGS)' 'LDFLAGS=$(LDFLAGS)' $(LIBTRACEEVENT): FORCE $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent.a diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 76e12bcd1765..b2188e623e22 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -981,6 +981,7 @@ int cmd_report(int argc, const char **argv) .id_index = perf_event__process_id_index, .auxtrace_info = perf_event__process_auxtrace_info, .auxtrace = perf_event__process_auxtrace, + .event_update = perf_event__process_event_update, .feature = process_feature_event, .ordered_events = true, .ordering_requires_timestamps = true, diff --git a/tools/perf/pmu-events/arch/x86/ivytown/uncore-power.json b/tools/perf/pmu-events/arch/x86/ivytown/uncore-power.json index d40498f2cb1e..635c09fda1d9 100644 --- a/tools/perf/pmu-events/arch/x86/ivytown/uncore-power.json +++ b/tools/perf/pmu-events/arch/x86/ivytown/uncore-power.json @@ -188,7 +188,7 @@ "Counter": "0,1,2,3", "EventCode": "0xb", "EventName": "UNC_P_FREQ_GE_1200MHZ_CYCLES", - "Filter": "filter_band0=1200", + "Filter": "filter_band0=12", "MetricExpr": "(UNC_P_FREQ_GE_1200MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_1200mhz_cycles %", "PerPkg": "1", @@ -199,7 +199,7 @@ "Counter": "0,1,2,3", "EventCode": "0xc", "EventName": "UNC_P_FREQ_GE_2000MHZ_CYCLES", - "Filter": "filter_band1=2000", + "Filter": "filter_band1=20", "MetricExpr": "(UNC_P_FREQ_GE_2000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_2000mhz_cycles %", "PerPkg": "1", @@ -210,7 +210,7 @@ "Counter": "0,1,2,3", "EventCode": "0xd", "EventName": "UNC_P_FREQ_GE_3000MHZ_CYCLES", - "Filter": "filter_band2=3000", + "Filter": "filter_band2=30", "MetricExpr": "(UNC_P_FREQ_GE_3000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_3000mhz_cycles %", "PerPkg": "1", @@ -221,7 +221,7 @@ "Counter": "0,1,2,3", "EventCode": "0xe", "EventName": "UNC_P_FREQ_GE_4000MHZ_CYCLES", - "Filter": "filter_band3=4000", + "Filter": "filter_band3=40", "MetricExpr": "(UNC_P_FREQ_GE_4000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_4000mhz_cycles %", "PerPkg": "1", @@ -232,7 +232,7 @@ "Counter": "0,1,2,3", "EventCode": "0xb", "EventName": "UNC_P_FREQ_GE_1200MHZ_TRANSITIONS", - "Filter": "edge=1,filter_band0=1200", + "Filter": "edge=1,filter_band0=12", "MetricExpr": "(UNC_P_FREQ_GE_1200MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_1200mhz_cycles %", "PerPkg": "1", @@ -243,7 +243,7 @@ "Counter": "0,1,2,3", "EventCode": "0xc", "EventName": "UNC_P_FREQ_GE_2000MHZ_TRANSITIONS", - "Filter": "edge=1,filter_band1=2000", + "Filter": "edge=1,filter_band1=20", "MetricExpr": "(UNC_P_FREQ_GE_2000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_2000mhz_cycles %", "PerPkg": "1", @@ -254,7 +254,7 @@ "Counter": "0,1,2,3", "EventCode": "0xd", "EventName": "UNC_P_FREQ_GE_3000MHZ_TRANSITIONS", - "Filter": "edge=1,filter_band2=4000", + "Filter": "edge=1,filter_band2=30", "MetricExpr": "(UNC_P_FREQ_GE_3000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_3000mhz_cycles %", "PerPkg": "1", @@ -265,7 +265,7 @@ "Counter": "0,1,2,3", "EventCode": "0xe", "EventName": "UNC_P_FREQ_GE_4000MHZ_TRANSITIONS", - "Filter": "edge=1,filter_band3=4000", + "Filter": "edge=1,filter_band3=40", "MetricExpr": "(UNC_P_FREQ_GE_4000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_4000mhz_cycles %", "PerPkg": "1", diff --git a/tools/perf/pmu-events/arch/x86/jaketown/uncore-power.json b/tools/perf/pmu-events/arch/x86/jaketown/uncore-power.json index 16034bfd06dd..8755693d86c6 100644 --- a/tools/perf/pmu-events/arch/x86/jaketown/uncore-power.json +++ b/tools/perf/pmu-events/arch/x86/jaketown/uncore-power.json @@ -187,7 +187,7 @@ "Counter": "0,1,2,3", "EventCode": "0xb", "EventName": "UNC_P_FREQ_GE_1200MHZ_CYCLES", - "Filter": "filter_band0=1200", + "Filter": "filter_band0=12", "MetricExpr": "(UNC_P_FREQ_GE_1200MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_1200mhz_cycles %", "PerPkg": "1", @@ -198,7 +198,7 @@ "Counter": "0,1,2,3", "EventCode": "0xc", "EventName": "UNC_P_FREQ_GE_2000MHZ_CYCLES", - "Filter": "filter_band1=2000", + "Filter": "filter_band1=20", "MetricExpr": "(UNC_P_FREQ_GE_2000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_2000mhz_cycles %", "PerPkg": "1", @@ -209,7 +209,7 @@ "Counter": "0,1,2,3", "EventCode": "0xd", "EventName": "UNC_P_FREQ_GE_3000MHZ_CYCLES", - "Filter": "filter_band2=3000", + "Filter": "filter_band2=30", "MetricExpr": "(UNC_P_FREQ_GE_3000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_3000mhz_cycles %", "PerPkg": "1", @@ -220,7 +220,7 @@ "Counter": "0,1,2,3", "EventCode": "0xe", "EventName": "UNC_P_FREQ_GE_4000MHZ_CYCLES", - "Filter": "filter_band3=4000", + "Filter": "filter_band3=40", "MetricExpr": "(UNC_P_FREQ_GE_4000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_4000mhz_cycles %", "PerPkg": "1", @@ -231,7 +231,7 @@ "Counter": "0,1,2,3", "EventCode": "0xb", "EventName": "UNC_P_FREQ_GE_1200MHZ_TRANSITIONS", - "Filter": "edge=1,filter_band0=1200", + "Filter": "edge=1,filter_band0=12", "MetricExpr": "(UNC_P_FREQ_GE_1200MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_1200mhz_cycles %", "PerPkg": "1", @@ -242,7 +242,7 @@ "Counter": "0,1,2,3", "EventCode": "0xc", "EventName": "UNC_P_FREQ_GE_2000MHZ_TRANSITIONS", - "Filter": "edge=1,filter_band1=2000", + "Filter": "edge=1,filter_band1=20", "MetricExpr": "(UNC_P_FREQ_GE_2000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_2000mhz_cycles %", "PerPkg": "1", @@ -253,7 +253,7 @@ "Counter": "0,1,2,3", "EventCode": "0xd", "EventName": "UNC_P_FREQ_GE_3000MHZ_TRANSITIONS", - "Filter": "edge=1,filter_band2=4000", + "Filter": "edge=1,filter_band2=30", "MetricExpr": "(UNC_P_FREQ_GE_3000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_3000mhz_cycles %", "PerPkg": "1", @@ -264,7 +264,7 @@ "Counter": "0,1,2,3", "EventCode": "0xe", "EventName": "UNC_P_FREQ_GE_4000MHZ_TRANSITIONS", - "Filter": "edge=1,filter_band3=4000", + "Filter": "edge=1,filter_band3=40", "MetricExpr": "(UNC_P_FREQ_GE_4000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_4000mhz_cycles %", "PerPkg": "1", diff --git a/tools/perf/scripts/python/export-to-postgresql.py b/tools/perf/scripts/python/export-to-postgresql.py index efcaf6cac2eb..e46f51b17513 100644 --- a/tools/perf/scripts/python/export-to-postgresql.py +++ b/tools/perf/scripts/python/export-to-postgresql.py @@ -204,14 +204,23 @@ from ctypes import * libpq = CDLL("libpq.so.5") PQconnectdb = libpq.PQconnectdb PQconnectdb.restype = c_void_p +PQconnectdb.argtypes = [ c_char_p ] PQfinish = libpq.PQfinish +PQfinish.argtypes = [ c_void_p ] PQstatus = libpq.PQstatus +PQstatus.restype = c_int +PQstatus.argtypes = [ c_void_p ] PQexec = libpq.PQexec PQexec.restype = c_void_p +PQexec.argtypes = [ c_void_p, c_char_p ] PQresultStatus = libpq.PQresultStatus +PQresultStatus.restype = c_int +PQresultStatus.argtypes = [ c_void_p ] PQputCopyData = libpq.PQputCopyData +PQputCopyData.restype = c_int PQputCopyData.argtypes = [ c_void_p, c_void_p, c_int ] PQputCopyEnd = libpq.PQputCopyEnd +PQputCopyEnd.restype = c_int PQputCopyEnd.argtypes = [ c_void_p, c_void_p ] sys.path.append(os.environ['PERF_EXEC_PATH'] + \ diff --git a/tools/perf/scripts/python/export-to-sqlite.py b/tools/perf/scripts/python/export-to-sqlite.py index f827bf77e9d2..e4bb82c8aba9 100644 --- a/tools/perf/scripts/python/export-to-sqlite.py +++ b/tools/perf/scripts/python/export-to-sqlite.py @@ -440,7 +440,11 @@ def branch_type_table(*x): def sample_table(*x): if branches: - bind_exec(sample_query, 18, x) + for xx in x[0:15]: + sample_query.addBindValue(str(xx)) + for xx in x[19:22]: + sample_query.addBindValue(str(xx)) + do_query_(sample_query) else: bind_exec(sample_query, 22, x) diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 0cd42150f712..bc646185f8d9 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -1081,6 +1081,7 @@ void *cpu_map_data__alloc(struct cpu_map *map, size_t *size, u16 *type, int *max } *size += sizeof(struct cpu_map_data); + *size = PERF_ALIGN(*size, sizeof(u64)); return zalloc(*size); } @@ -1560,26 +1561,9 @@ struct map *thread__find_map(struct thread *thread, u8 cpumode, u64 addr, return NULL; } -try_again: + al->map = map_groups__find(mg, al->addr); - if (al->map == NULL) { - /* - * If this is outside of all known maps, and is a negative - * address, try to look it up in the kernel dso, as it might be - * a vsyscall or vdso (which executes in user-mode). - * - * XXX This is nasty, we should have a symbol list in the - * "[vdso]" dso, but for now lets use the old trick of looking - * in the whole kernel symbol list. - */ - if (cpumode == PERF_RECORD_MISC_USER && machine && - mg != &machine->kmaps && - machine__kernel_ip(machine, al->addr)) { - mg = &machine->kmaps; - load_map = true; - goto try_again; - } - } else { + if (al->map != NULL) { /* * Kernel maps might be changed when loading symbols so loading * must be done prior to using kernel maps. diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 1a61628a1c12..e596ae358c4d 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1089,6 +1089,9 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts, attr->exclude_user = 1; } + if (evsel->own_cpus) + evsel->attr.read_format |= PERF_FORMAT_ID; + /* * Apply event specific term settings, * it overloads any global configuration. diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index c4acd2001db0..111ae858cbcb 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -2286,7 +2286,8 @@ static int append_inlines(struct callchain_cursor *cursor, if (!symbol_conf.inline_name || !map || !sym) return ret; - addr = map__rip_2objdump(map, ip); + addr = map__map_ip(map, ip); + addr = map__rip_2objdump(map, addr); inline_node = inlines__tree_find(&map->dso->inlined_nodes, addr); if (!inline_node) { @@ -2312,7 +2313,7 @@ static int unwind_entry(struct unwind_entry *entry, void *arg) { struct callchain_cursor *cursor = arg; const char *srcline = NULL; - u64 addr; + u64 addr = entry->ip; if (symbol_conf.hide_unresolved && entry->sym == NULL) return 0; @@ -2324,7 +2325,8 @@ static int unwind_entry(struct unwind_entry *entry, void *arg) * Convert entry->ip from a virtual address to an offset in * its corresponding binary. */ - addr = map__map_ip(entry->map, entry->ip); + if (entry->map) + addr = map__map_ip(entry->map, entry->ip); srcline = callchain_srcline(entry->map, entry->sym, addr); return callchain_cursor_append(cursor, entry->ip, diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index afd68524ffa9..7799788f662f 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -930,13 +930,14 @@ static void pmu_format_value(unsigned long *format, __u64 value, __u64 *v, static __u64 pmu_format_max_value(const unsigned long *format) { - __u64 w = 0; - int fbit; - - for_each_set_bit(fbit, format, PERF_PMU_FORMAT_BITS) - w |= (1ULL << fbit); + int w; - return w; + w = bitmap_weight(format, PERF_PMU_FORMAT_BITS); + if (!w) + return 0; + if (w < 64) + return (1ULL << w) - 1; + return -1; } /* diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py index 97efbcad076e..1942f6dd24f6 100644 --- a/tools/perf/util/setup.py +++ b/tools/perf/util/setup.py @@ -35,7 +35,7 @@ class install_lib(_install_lib): cflags = getenv('CFLAGS', '').split() # switch off several checks (need to be at the end of cflags list) -cflags += ['-fno-strict-aliasing', '-Wno-write-strings', '-Wno-unused-parameter' ] +cflags += ['-fno-strict-aliasing', '-Wno-write-strings', '-Wno-unused-parameter', '-Wno-redundant-decls' ] if cc != "clang": cflags += ['-Wno-cast-function-type' ] diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c index 09d6746e6ec8..e767c4a9d4d2 100644 --- a/tools/perf/util/srcline.c +++ b/tools/perf/util/srcline.c @@ -85,6 +85,9 @@ static struct symbol *new_inline_sym(struct dso *dso, struct symbol *inline_sym; char *demangled = NULL; + if (!funcname) + funcname = "??"; + if (dso) { demangled = dso__demangle_sym(dso, 0, funcname); if (demangled) diff --git a/tools/testing/selftests/drivers/usb/usbip/usbip_test.sh b/tools/testing/selftests/drivers/usb/usbip/usbip_test.sh index a72df93cf1f8..128f0ab24307 100755 --- a/tools/testing/selftests/drivers/usb/usbip/usbip_test.sh +++ b/tools/testing/selftests/drivers/usb/usbip/usbip_test.sh @@ -141,6 +141,10 @@ echo "Import devices from localhost - should work" src/usbip attach -r localhost -b $busid; echo "==============================================================" +# Wait for sysfs file to be updated. Without this sleep, usbip port +# shows no imported devices. +sleep 3; + echo "List imported devices - expect to see imported devices"; src/usbip port; echo "==============================================================" diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc new file mode 100644 index 000000000000..88e6c3f43006 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc @@ -0,0 +1,80 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: event trigger - test synthetic_events syntax parser + +do_reset() { + reset_trigger + echo > set_event + clear_trace +} + +fail() { #msg + do_reset + echo $1 + exit_fail +} + +if [ ! -f set_event ]; then + echo "event tracing is not supported" + exit_unsupported +fi + +if [ ! -f synthetic_events ]; then + echo "synthetic event is not supported" + exit_unsupported +fi + +reset_tracer +do_reset + +echo "Test synthetic_events syntax parser" + +echo > synthetic_events + +# synthetic event must have a field +! echo "myevent" >> synthetic_events +echo "myevent u64 var1" >> synthetic_events + +# synthetic event must be found in synthetic_events +grep "myevent[[:space:]]u64 var1" synthetic_events + +# it is not possible to add same name event +! echo "myevent u64 var2" >> synthetic_events + +# Non-append open will cleanup all events and add new one +echo "myevent u64 var2" > synthetic_events + +# multiple fields with different spaces +echo "myevent u64 var1; u64 var2;" > synthetic_events +grep "myevent[[:space:]]u64 var1; u64 var2" synthetic_events +echo "myevent u64 var1 ; u64 var2 ;" > synthetic_events +grep "myevent[[:space:]]u64 var1; u64 var2" synthetic_events +echo "myevent u64 var1 ;u64 var2" > synthetic_events +grep "myevent[[:space:]]u64 var1; u64 var2" synthetic_events + +# test field types +echo "myevent u32 var" > synthetic_events +echo "myevent u16 var" > synthetic_events +echo "myevent u8 var" > synthetic_events +echo "myevent s64 var" > synthetic_events +echo "myevent s32 var" > synthetic_events +echo "myevent s16 var" > synthetic_events +echo "myevent s8 var" > synthetic_events + +echo "myevent char var" > synthetic_events +echo "myevent int var" > synthetic_events +echo "myevent long var" > synthetic_events +echo "myevent pid_t var" > synthetic_events + +echo "myevent unsigned char var" > synthetic_events +echo "myevent unsigned int var" > synthetic_events +echo "myevent unsigned long var" > synthetic_events +grep "myevent[[:space:]]unsigned long var" synthetic_events + +# test string type +echo "myevent char var[10]" > synthetic_events +grep "myevent[[:space:]]char\[10\] var" synthetic_events + +do_reset + +exit 0 diff --git a/tools/testing/selftests/net/reuseport_bpf.c b/tools/testing/selftests/net/reuseport_bpf.c index cad14cd0ea92..b5277106df1f 100644 --- a/tools/testing/selftests/net/reuseport_bpf.c +++ b/tools/testing/selftests/net/reuseport_bpf.c @@ -437,14 +437,19 @@ void enable_fastopen(void) } } -static struct rlimit rlim_old, rlim_new; +static struct rlimit rlim_old; static __attribute__((constructor)) void main_ctor(void) { getrlimit(RLIMIT_MEMLOCK, &rlim_old); - rlim_new.rlim_cur = rlim_old.rlim_cur + (1UL << 20); - rlim_new.rlim_max = rlim_old.rlim_max + (1UL << 20); - setrlimit(RLIMIT_MEMLOCK, &rlim_new); + + if (rlim_old.rlim_cur != RLIM_INFINITY) { + struct rlimit rlim_new; + + rlim_new.rlim_cur = rlim_old.rlim_cur + (1UL << 20); + rlim_new.rlim_max = rlim_old.rlim_max + (1UL << 20); + setrlimit(RLIMIT_MEMLOCK, &rlim_new); + } } static __attribute__((destructor)) void main_dtor(void) diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index 08c341b49760..e101af52d1d6 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # # This test is for checking rtnetlink callpaths, and get as much coverage as possible. # diff --git a/tools/testing/selftests/net/udpgso_bench.sh b/tools/testing/selftests/net/udpgso_bench.sh index 850767befa47..99e537ab5ad9 100755 --- a/tools/testing/selftests/net/udpgso_bench.sh +++ b/tools/testing/selftests/net/udpgso_bench.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # SPDX-License-Identifier: GPL-2.0 # # Run a series of udpgso benchmarks diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index c92053bc3f96..150c8a69cdaf 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -496,7 +496,7 @@ static bool need_new_vmid_gen(struct kvm *kvm) static void update_vttbr(struct kvm *kvm) { phys_addr_t pgd_phys; - u64 vmid; + u64 vmid, cnp = kvm_cpu_has_cnp() ? VTTBR_CNP_BIT : 0; bool new_gen; read_lock(&kvm_vmid_lock); @@ -546,7 +546,7 @@ static void update_vttbr(struct kvm *kvm) pgd_phys = virt_to_phys(kvm->arch.pgd); BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK); vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK(kvm_vmid_bits); - kvm->arch.vttbr = kvm_phys_to_vttbr(pgd_phys) | vmid; + kvm->arch.vttbr = kvm_phys_to_vttbr(pgd_phys) | vmid | cnp; write_unlock(&kvm_vmid_lock); } |