diff options
405 files changed, 11018 insertions, 4624 deletions
diff --git a/Documentation/assoc_array.txt b/Documentation/assoc_array.txt new file mode 100644 index 0000000..f4faec0 --- /dev/null +++ b/Documentation/assoc_array.txt @@ -0,0 +1,574 @@ + ======================================== + GENERIC ASSOCIATIVE ARRAY IMPLEMENTATION + ======================================== + +Contents: + + - Overview. + + - The public API. + - Edit script. + - Operations table. + - Manipulation functions. + - Access functions. + - Index key form. + + - Internal workings. + - Basic internal tree layout. + - Shortcuts. + - Splitting and collapsing nodes. + - Non-recursive iteration. + - Simultaneous alteration and iteration. + + +======== +OVERVIEW +======== + +This associative array implementation is an object container with the following +properties: + + (1) Objects are opaque pointers. The implementation does not care where they + point (if anywhere) or what they point to (if anything). + + [!] NOTE: Pointers to objects _must_ be zero in the least significant bit. + + (2) Objects do not need to contain linkage blocks for use by the array. This + permits an object to be located in multiple arrays simultaneously. + Rather, the array is made up of metadata blocks that point to objects. + + (3) Objects require index keys to locate them within the array. + + (4) Index keys must be unique. Inserting an object with the same key as one + already in the array will replace the old object. + + (5) Index keys can be of any length and can be of different lengths. + + (6) Index keys should encode the length early on, before any variation due to + length is seen. + + (7) Index keys can include a hash to scatter objects throughout the array. + + (8) The array can iterated over. The objects will not necessarily come out in + key order. + + (9) The array can be iterated over whilst it is being modified, provided the + RCU readlock is being held by the iterator. Note, however, under these + circumstances, some objects may be seen more than once. If this is a + problem, the iterator should lock against modification. Objects will not + be missed, however, unless deleted. + +(10) Objects in the array can be looked up by means of their index key. + +(11) Objects can be looked up whilst the array is being modified, provided the + RCU readlock is being held by the thread doing the look up. + +The implementation uses a tree of 16-pointer nodes internally that are indexed +on each level by nibbles from the index key in the same manner as in a radix +tree. To improve memory efficiency, shortcuts can be emplaced to skip over +what would otherwise be a series of single-occupancy nodes. Further, nodes +pack leaf object pointers into spare space in the node rather than making an +extra branch until as such time an object needs to be added to a full node. + + +============== +THE PUBLIC API +============== + +The public API can be found in <linux/assoc_array.h>. The associative array is +rooted on the following structure: + + struct assoc_array { + ... + }; + +The code is selected by enabling CONFIG_ASSOCIATIVE_ARRAY. + + +EDIT SCRIPT +----------- + +The insertion and deletion functions produce an 'edit script' that can later be +applied to effect the changes without risking ENOMEM. This retains the +preallocated metadata blocks that will be installed in the internal tree and +keeps track of the metadata blocks that will be removed from the tree when the +script is applied. + +This is also used to keep track of dead blocks and dead objects after the +script has been applied so that they can be freed later. The freeing is done +after an RCU grace period has passed - thus allowing access functions to +proceed under the RCU read lock. + +The script appears as outside of the API as a pointer of the type: + + struct assoc_array_edit; + +There are two functions for dealing with the script: + + (1) Apply an edit script. + + void assoc_array_apply_edit(struct assoc_array_edit *edit); + + This will perform the edit functions, interpolating various write barriers + to permit accesses under the RCU read lock to continue. The edit script + will then be passed to call_rcu() to free it and any dead stuff it points + to. + + (2) Cancel an edit script. + + void assoc_array_cancel_edit(struct assoc_array_edit *edit); + + This frees the edit script and all preallocated memory immediately. If + this was for insertion, the new object is _not_ released by this function, + but must rather be released by the caller. + +These functions are guaranteed not to fail. + + +OPERATIONS TABLE +---------------- + +Various functions take a table of operations: + + struct assoc_array_ops { + ... + }; + +This points to a number of methods, all of which need to be provided: + + (1) Get a chunk of index key from caller data: + + unsigned long (*get_key_chunk)(const void *index_key, int level); + + This should return a chunk of caller-supplied index key starting at the + *bit* position given by the level argument. The level argument will be a + multiple of ASSOC_ARRAY_KEY_CHUNK_SIZE and the function should return + ASSOC_ARRAY_KEY_CHUNK_SIZE bits. No error is possible. + + + (2) Get a chunk of an object's index key. + + unsigned long (*get_object_key_chunk)(const void *object, int level); + + As the previous function, but gets its data from an object in the array + rather than from a caller-supplied index key. + + + (3) See if this is the object we're looking for. + + bool (*compare_object)(const void *object, const void *index_key); + + Compare the object against an index key and return true if it matches and + false if it doesn't. + + + (4) Diff the index keys of two objects. + + int (*diff_objects)(const void *a, const void *b); + + Return the bit position at which the index keys of two objects differ or + -1 if they are the same. + + + (5) Free an object. + + void (*free_object)(void *object); + + Free the specified object. Note that this may be called an RCU grace + period after assoc_array_apply_edit() was called, so synchronize_rcu() may + be necessary on module unloading. + + +MANIPULATION FUNCTIONS +---------------------- + +There are a number of functions for manipulating an associative array: + + (1) Initialise an associative array. + + void assoc_array_init(struct assoc_array *array); + + This initialises the base structure for an associative array. It can't + fail. + + + (2) Insert/replace an object in an associative array. + + struct assoc_array_edit * + assoc_array_insert(struct assoc_array *array, + const struct assoc_array_ops *ops, + const void *index_key, + void *object); + + This inserts the given object into the array. Note that the least + significant bit of the pointer must be zero as it's used to type-mark + pointers internally. + + If an object already exists for that key then it will be replaced with the + new object and the old one will be freed automatically. + + The index_key argument should hold index key information and is + passed to the methods in the ops table when they are called. + + This function makes no alteration to the array itself, but rather returns + an edit script that must be applied. -ENOMEM is returned in the case of + an out-of-memory error. + + The caller should lock exclusively against other modifiers of the array. + + + (3) Delete an object from an associative array. + + struct assoc_array_edit * + assoc_array_delete(struct assoc_array *array, + const struct assoc_array_ops *ops, + const void *index_key); + + This deletes an object that matches the specified data from the array. + + The index_key argument should hold index key information and is + passed to the methods in the ops table when they are called. + + This function makes no alteration to the array itself, but rather returns + an edit script that must be applied. -ENOMEM is returned in the case of + an out-of-memory error. NULL will be returned if the specified object is + not found within the array. + + The caller should lock exclusively against other modifiers of the array. + + + (4) Delete all objects from an associative array. + + struct assoc_array_edit * + assoc_array_clear(struct assoc_array *array, + const struct assoc_array_ops *ops); + + This deletes all the objects from an associative array and leaves it + completely empty. + + This function makes no alteration to the array itself, but rather returns + an edit script that must be applied. -ENOMEM is returned in the case of + an out-of-memory error. + + The caller should lock exclusively against other modifiers of the array. + + + (5) Destroy an associative array, deleting all objects. + + void assoc_array_destroy(struct assoc_array *array, + const struct assoc_array_ops *ops); + + This destroys the contents of the associative array and leaves it + completely empty. It is not permitted for another thread to be traversing + the array under the RCU read lock at the same time as this function is + destroying it as no RCU deferral is performed on memory release - + something that would require memory to be allocated. + + The caller should lock exclusively against other modifiers and accessors + of the array. + + + (6) Garbage collect an associative array. + + int assoc_array_gc(struct assoc_array *array, + const struct assoc_array_ops *ops, + bool (*iterator)(void *object, void *iterator_data), + void *iterator_data); + + This iterates over the objects in an associative array and passes each one + to iterator(). If iterator() returns true, the object is kept. If it + returns false, the object will be freed. If the iterator() function + returns true, it must perform any appropriate refcount incrementing on the + object before returning. + + The internal tree will be packed down if possible as part of the iteration + to reduce the number of nodes in it. + + The iterator_data is passed directly to iterator() and is otherwise + ignored by the function. + + The function will return 0 if successful and -ENOMEM if there wasn't + enough memory. + + It is possible for other threads to iterate over or search the array under + the RCU read lock whilst this function is in progress. The caller should + lock exclusively against other modifiers of the array. + + +ACCESS FUNCTIONS +---------------- + +There are two functions for accessing an associative array: + + (1) Iterate over all the objects in an associative array. + + int assoc_array_iterate(const struct assoc_array *array, + int (*iterator)(const void *object, + void *iterator_data), + void *iterator_data); + + This passes each object in the array to the iterator callback function. + iterator_data is private data for that function. + + This may be used on an array at the same time as the array is being + modified, provided the RCU read lock is held. Under such circumstances, + it is possible for the iteration function to see some objects twice. If + this is a problem, then modification should be locked against. The + iteration algorithm should not, however, miss any objects. + + The function will return 0 if no objects were in the array or else it will + return the result of the last iterator function called. Iteration stops + immediately if any call to the iteration function results in a non-zero + return. + + + (2) Find an object in an associative array. + + void *assoc_array_find(const struct assoc_array *array, + const struct assoc_array_ops *ops, + const void *index_key); + + This walks through the array's internal tree directly to the object + specified by the index key.. + + This may be used on an array at the same time as the array is being + modified, provided the RCU read lock is held. + + The function will return the object if found (and set *_type to the object + type) or will return NULL if the object was not found. + + +INDEX KEY FORM +-------------- + +The index key can be of any form, but since the algorithms aren't told how long +the key is, it is strongly recommended that the index key includes its length +very early on before any variation due to the length would have an effect on +comparisons. + +This will cause leaves with different length keys to scatter away from each +other - and those with the same length keys to cluster together. + +It is also recommended that the index key begin with a hash of the rest of the +key to maximise scattering throughout keyspace. + +The better the scattering, the wider and lower the internal tree will be. + +Poor scattering isn't too much of a problem as there are shortcuts and nodes +can contain mixtures of leaves and metadata pointers. + +The index key is read in chunks of machine word. Each chunk is subdivided into +one nibble (4 bits) per level, so on a 32-bit CPU this is good for 8 levels and +on a 64-bit CPU, 16 levels. Unless the scattering is really poor, it is +unlikely that more than one word of any particular index key will have to be +used. + + +================= +INTERNAL WORKINGS +================= + +The associative array data structure has an internal tree. This tree is +constructed of two types of metadata blocks: nodes and shortcuts. + +A node is an array of slots. Each slot can contain one of four things: + + (*) A NULL pointer, indicating that the slot is empty. + + (*) A pointer to an object (a leaf). + + (*) A pointer to a node at the next level. + + (*) A pointer to a shortcut. + + +BASIC INTERNAL TREE LAYOUT +-------------------------- + +Ignoring shortcuts for the moment, the nodes form a multilevel tree. The index +key space is strictly subdivided by the nodes in the tree and nodes occur on +fixed levels. For example: + + Level: 0 1 2 3 + =============== =============== =============== =============== + NODE D + NODE B NODE C +------>+---+ + +------>+---+ +------>+---+ | | 0 | + NODE A | | 0 | | | 0 | | +---+ + +---+ | +---+ | +---+ | : : + | 0 | | : : | : : | +---+ + +---+ | +---+ | +---+ | | f | + | 1 |---+ | 3 |---+ | 7 |---+ +---+ + +---+ +---+ +---+ + : : : : | 8 |---+ + +---+ +---+ +---+ | NODE E + | e |---+ | f | : : +------>+---+ + +---+ | +---+ +---+ | 0 | + | f | | | f | +---+ + +---+ | +---+ : : + | NODE F +---+ + +------>+---+ | f | + | 0 | NODE G +---+ + +---+ +------>+---+ + : : | | 0 | + +---+ | +---+ + | 6 |---+ : : + +---+ +---+ + : : | f | + +---+ +---+ + | f | + +---+ + +In the above example, there are 7 nodes (A-G), each with 16 slots (0-f). +Assuming no other meta data nodes in the tree, the key space is divided thusly: + + KEY PREFIX NODE + ========== ==== + 137* D + 138* E + 13[0-69-f]* C + 1[0-24-f]* B + e6* G + e[0-57-f]* F + [02-df]* A + +So, for instance, keys with the following example index keys will be found in +the appropriate nodes: + + INDEX KEY PREFIX NODE + =============== ======= ==== + 13694892892489 13 C + 13795289025897 137 D + 13889dde88793 138 E + 138bbb89003093 138 E + 1394879524789 12 C + 1458952489 1 B + 9431809de993ba - A + b4542910809cd - A + e5284310def98 e F + e68428974237 e6 G + e7fffcbd443 e F + f3842239082 - A + +To save memory, if a node can hold all the leaves in its portion of keyspace, +then the node will have all those leaves in it and will not have any metadata +pointers - even if some of those leaves would like to be in the same slot. + +A node can contain a heterogeneous mix of leaves and metadata pointers. +Metadata pointers must be in the slots that match their subdivisions of key +space. The leaves can be in any slot not occupied by a metadata pointer. It +is guaranteed that none of the leaves in a node will match a slot occupied by a +metadata pointer. If the metadata pointer is there, any leaf whose key matches +the metadata key prefix must be in the subtree that the metadata pointer points +to. + +In the above example list of index keys, node A will contain: + + SLOT CONTENT INDEX KEY (PREFIX) + ==== =============== ================== + 1 PTR TO NODE B 1* + any LEAF 9431809de993ba + any LEAF b4542910809cd + e PTR TO NODE F e* + any LEAF f3842239082 + +and node B: + + 3 PTR TO NODE C 13* + any LEAF 1458952489 + + +SHORTCUTS +--------- + +Shortcuts are metadata records that jump over a piece of keyspace. A shortcut +is a replacement for a series of single-occupancy nodes ascending through the +levels. Shortcuts exist to save memory and to speed up traversal. + +It is possible for the root of the tree to be a shortcut - say, for example, +the tree contains at least 17 nodes all with key prefix '1111'. The insertion +algorithm will insert a shortcut to skip over the '1111' keyspace in a single +bound and get to the fourth level where these actually become different. + + +SPLITTING AND COLLAPSING NODES +------------------------------ + +Each node has a maximum capacity of 16 leaves and metadata pointers. If the +insertion algorithm finds that it is trying to insert a 17th object into a +node, that node will be split such that at least two leaves that have a common +key segment at that level end up in a separate node rooted on that slot for +that common key segment. + +If the leaves in a full node and the leaf that is being inserted are +sufficiently similar, then a shortcut will be inserted into the tree. + +When the number of objects in the subtree rooted at a node falls to 16 or +fewer, then the subtree will be collapsed down to a single node - and this will +ripple towards the root if possible. + + +NON-RECURSIVE ITERATION +----------------------- + +Each node and shortcut contains a back pointer to its parent and the number of +slot in that parent that points to it. None-recursive iteration uses these to +proceed rootwards through the tree, going to the parent node, slot N + 1 to +make sure progress is made without the need for a stack. + +The backpointers, however, make simultaneous alteration and iteration tricky. + + +SIMULTANEOUS ALTERATION AND ITERATION +------------------------------------- + +There are a number of cases to consider: + + (1) Simple insert/replace. This involves simply replacing a NULL or old + matching leaf pointer with the pointer to the new leaf after a barrier. + The metadata blocks don't change otherwise. An old leaf won't be freed + until after the RCU grace period. + + (2) Simple delete. This involves just clearing an old matching leaf. The + metadata blocks don't change otherwise. The old leaf won't be freed until + after the RCU grace period. + + (3) Insertion replacing part of a subtree that we haven't yet entered. This + may involve replacement of part of that subtree - but that won't affect + the iteration as we won't have reached the pointer to it yet and the + ancestry blocks are not replaced (the layout of those does not change). + + (4) Insertion replacing nodes that we're actively processing. This isn't a + problem as we've passed the anchoring pointer and won't switch onto the + new layout until we follow the back pointers - at which point we've + already examined the leaves in the replaced node (we iterate over all the + leaves in a node before following any of its metadata pointers). + + We might, however, re-see some leaves that have been split out into a new + branch that's in a slot further along than we were at. + + (5) Insertion replacing nodes that we're processing a dependent branch of. + This won't affect us until we follow the back pointers. Similar to (4). + + (6) Deletion collapsing a branch under us. This doesn't affect us because the + back pointers will get us back to the parent of the new node before we + could see the new node. The entire collapsed subtree is thrown away + unchanged - and will still be rooted on the same slot, so we shouldn't + process it a second time as we'll go back to slot + 1. + +Note: + + (*) Under some circumstances, we need to simultaneously change the parent + pointer and the parent slot pointer on a node (say, for example, we + inserted another node before it and moved it up a level). We cannot do + this without locking against a read - so we have to replace that node too. + + However, when we're changing a shortcut into a node this isn't a problem + as shortcuts only have one slot and so the parent slot number isn't used + when traversing backwards over one. This means that it's okay to change + the slot number first - provided suitable barriers are used to make sure + the parent slot number is read after the back pointer. + +Obsolete blocks and leaves are freed up after an RCU grace period has passed, +so as long as anyone doing walking or iteration holds the RCU read lock, the +old superstructure should not go away on them. diff --git a/Documentation/devicetree/bindings/i2c/trivial-devices.txt b/Documentation/devicetree/bindings/i2c/trivial-devices.txt index ad6a738..b1cb341 100644 --- a/Documentation/devicetree/bindings/i2c/trivial-devices.txt +++ b/Documentation/devicetree/bindings/i2c/trivial-devices.txt @@ -15,6 +15,7 @@ adi,adt7461 +/-1C TDM Extended Temp Range I.C adt7461 +/-1C TDM Extended Temp Range I.C at,24c08 i2c serial eeprom (24cxx) atmel,24c02 i2c serial eeprom (24cxx) +atmel,at97sc3204t i2c trusted platform module (TPM) catalyst,24c32 i2c serial eeprom dallas,ds1307 64 x 8, Serial, I2C Real-Time Clock dallas,ds1338 I2C RTC with 56-Byte NV RAM @@ -35,6 +36,7 @@ fsl,mc13892 MC13892: Power Management Integrated Circuit (PMIC) for i.MX35/51 fsl,mma8450 MMA8450Q: Xtrinsic Low-power, 3-axis Xtrinsic Accelerometer fsl,mpr121 MPR121: Proximity Capacitive Touch Sensor Controller fsl,sgtl5000 SGTL5000: Ultra Low-Power Audio Codec +gmt,g751 G751: Digital Temperature Sensor and Thermal Watchdog with Two-Wire Interface infineon,slb9635tt Infineon SLB9635 (Soft-) I2C TPM (old protocol, max 100khz) infineon,slb9645tt Infineon SLB9645 I2C TPM (new protocol, max 400khz) maxim,ds1050 5 Bit Programmable, Pulse-Width Modulator @@ -44,6 +46,7 @@ mc,rv3029c2 Real Time Clock Module with I2C-Bus national,lm75 I2C TEMP SENSOR national,lm80 Serial Interface ACPI-Compatible Microprocessor System Hardware Monitor national,lm92 ±0.33°C Accurate, 12-Bit + Sign Temperature Sensor and Thermal Window Comparator with Two-Wire Interface +nuvoton,npct501 i2c trusted platform module (TPM) nxp,pca9556 Octal SMBus and I2C registered interface nxp,pca9557 8-bit I2C-bus and SMBus I/O port with reset nxp,pcf8563 Real-time clock/calendar @@ -61,3 +64,4 @@ taos,tsl2550 Ambient Light Sensor with SMBUS/Two Wire Serial Interface ti,tsc2003 I2C Touch-Screen Controller ti,tmp102 Low Power Digital Temperature Sensor with SMBUS/Two Wire Serial Interface ti,tmp275 Digital Temperature Sensor +winbond,wpct301 i2c trusted platform module (TPM) diff --git a/Documentation/filesystems/btrfs.txt b/Documentation/filesystems/btrfs.txt index 9dae594..5dd282d 100644 --- a/Documentation/filesystems/btrfs.txt +++ b/Documentation/filesystems/btrfs.txt @@ -70,6 +70,12 @@ Unless otherwise specified, all options default to off. See comments at the top of fs/btrfs/check-integrity.c for more info. + commit=<seconds> + Set the interval of periodic commit, 30 seconds by default. Higher + values defer data being synced to permanent storage with obvious + consequences when the system crashes. The upper bound is not forced, + but a warning is printed if it's more than 300 seconds (5 minutes). + compress compress=<type> compress-force @@ -154,7 +160,11 @@ Unless otherwise specified, all options default to off. Currently this scans a list of several previous tree roots and tries to use the first readable. - skip_balance + rescan_uuid_tree + Force check and rebuild procedure of the UUID tree. This should not + normally be needed. + + skip_balance Skip automatic resume of interrupted balance operation after mount. May be resumed with "btrfs balance resume." @@ -234,24 +244,14 @@ available from the git repository at the following location: These include the following tools: -mkfs.btrfs: create a filesystem - -btrfsctl: control program to create snapshots and subvolumes: +* mkfs.btrfs: create a filesystem - mount /dev/sda2 /mnt - btrfsctl -s new_subvol_name /mnt - btrfsctl -s snapshot_of_default /mnt/default - btrfsctl -s snapshot_of_new_subvol /mnt/new_subvol_name - btrfsctl -s snapshot_of_a_snapshot /mnt/snapshot_of_new_subvol - ls /mnt - default snapshot_of_a_snapshot snapshot_of_new_subvol - new_subvol_name snapshot_of_default +* btrfs: a single tool to manage the filesystems, refer to the manpage for more details - Snapshots and subvolumes cannot be deleted right now, but you can - rm -rf all the files and directories inside them. +* 'btrfsck' or 'btrfs check': do a consistency check of the filesystem -btrfsck: do a limited check of the FS extent trees. +Other tools for specific tasks: -btrfs-debug-tree: print all of the FS metadata in text form. Example: +* btrfs-convert: in-place conversion from ext2/3/4 filesystems - btrfs-debug-tree /dev/sda2 >& big_output_file +* btrfs-image: dump filesystem metadata for debugging diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 9ca3e74..50680a5 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1190,15 +1190,24 @@ bytes respectively. Such letter suffixes can also be entirely omitted. owned by uid=0. ima_hash= [IMA] - Format: { "sha1" | "md5" } + Format: { md5 | sha1 | rmd160 | sha256 | sha384 + | sha512 | ... } default: "sha1" + The list of supported hash algorithms is defined + in crypto/hash_info.h. + ima_tcb [IMA] Load a policy which meets the needs of the Trusted Computing Base. This means IMA will measure all programs exec'd, files mmap'd for exec, and all files opened for read by uid=0. + ima_template= [IMA] + Select one of defined IMA measurements template formats. + Formats: { "ima" | "ima-ng" } + Default: "ima-ng" + init= [KNL] Format: <full_path> Run specified binary instead of /sbin/init as init diff --git a/Documentation/security/00-INDEX b/Documentation/security/00-INDEX index 414235c..45c82fd 100644 --- a/Documentation/security/00-INDEX +++ b/Documentation/security/00-INDEX @@ -22,3 +22,5 @@ keys.txt - description of the kernel key retention service. tomoyo.txt - documentation on the TOMOYO Linux Security Module. +IMA-templates.txt + - documentation on the template management mechanism for IMA. diff --git a/Documentation/security/IMA-templates.txt b/Documentation/security/IMA-templates.txt new file mode 100644 index 0000000..a777e5f --- /dev/null +++ b/Documentation/security/IMA-templates.txt @@ -0,0 +1,87 @@ + IMA Template Management Mechanism + + +==== INTRODUCTION ==== + +The original 'ima' template is fixed length, containing the filedata hash +and pathname. The filedata hash is limited to 20 bytes (md5/sha1). +The pathname is a null terminated string, limited to 255 characters. +To overcome these limitations and to add additional file metadata, it is +necessary to extend the current version of IMA by defining additional +templates. For example, information that could be possibly reported are +the inode UID/GID or the LSM labels either of the inode and of the process +that is accessing it. + +However, the main problem to introduce this feature is that, each time +a new template is defined, the functions that generate and display +the measurements list would include the code for handling a new format +and, thus, would significantly grow over the time. + +The proposed solution solves this problem by separating the template +management from the remaining IMA code. The core of this solution is the +definition of two new data structures: a template descriptor, to determine +which information should be included in the measurement list; a template +field, to generate and display data of a given type. + +Managing templates with these structures is very simple. To support +a new data type, developers define the field identifier and implement +two functions, init() and show(), respectively to generate and display +measurement entries. Defining a new template descriptor requires +specifying the template format, a string of field identifiers separated +by the '|' character. While in the current implementation it is possible +to define new template descriptors only by adding their definition in the +template specific code (ima_template.c), in a future version it will be +possible to register a new template on a running kernel by supplying to IMA +the desired format string. In this version, IMA initializes at boot time +all defined template descriptors by translating the format into an array +of template fields structures taken from the set of the supported ones. + +After the initialization step, IMA will call ima_alloc_init_template() +(new function defined within the patches for the new template management +mechanism) to generate a new measurement entry by using the template +descriptor chosen through the kernel configuration or through the newly +introduced 'ima_template=' kernel command line parameter. It is during this +phase that the advantages of the new architecture are clearly shown: +the latter function will not contain specific code to handle a given template +but, instead, it simply calls the init() method of the template fields +associated to the chosen template descriptor and store the result (pointer +to allocated data and data length) in the measurement entry structure. + +The same mechanism is employed to display measurements entries. +The functions ima[_ascii]_measurements_show() retrieve, for each entry, +the template descriptor used to produce that entry and call the show() +method for each item of the array of template fields structures. + + + +==== SUPPORTED TEMPLATE FIELDS AND DESCRIPTORS ==== + +In the following, there is the list of supported template fields +('<identifier>': description), that can be used to define new template +descriptors by adding their identifier to the format string +(support for more data types will be added later): + + - 'd': the digest of the event (i.e. the digest of a measured file), + calculated with the SHA1 or MD5 hash algorithm; + - 'n': the name of the event (i.e. the file name), with size up to 255 bytes; + - 'd-ng': the digest of the event, calculated with an arbitrary hash + algorithm (field format: [<hash algo>:]digest, where the digest + prefix is shown only if the hash algorithm is not SHA1 or MD5); + - 'n-ng': the name of the event, without size limitations. + + +Below, there is the list of defined template descriptors: + - "ima": its format is 'd|n'; + - "ima-ng" (default): its format is 'd-ng|n-ng'. + + + +==== USE ==== + +To specify the template descriptor to be used to generate measurement entries, +currently the following methods are supported: + + - select a template descriptor among those supported in the kernel + configuration ('ima-ng' is the default choice); + - specify a template descriptor name from the kernel command line through + the 'ima_template=' parameter. diff --git a/Documentation/security/keys.txt b/Documentation/security/keys.txt index 7b4145d..a4c33f1 100644 --- a/Documentation/security/keys.txt +++ b/Documentation/security/keys.txt @@ -865,15 +865,14 @@ encountered: calling processes has a searchable link to the key from one of its keyrings. There are three functions for dealing with these: - key_ref_t make_key_ref(const struct key *key, - unsigned long possession); + key_ref_t make_key_ref(const struct key *key, bool possession); struct key *key_ref_to_ptr(const key_ref_t key_ref); - unsigned long is_key_possessed(const key_ref_t key_ref); + bool is_key_possessed(const key_ref_t key_ref); The first function constructs a key reference from a key pointer and - possession information (which must be 0 or 1 and not any other value). + possession information (which must be true or false). The second function retrieves the key pointer from a reference and the third retrieves the possession flag. @@ -961,14 +960,17 @@ payload contents" for more information. the argument will not be parsed. -(*) Extra references can be made to a key by calling the following function: +(*) Extra references can be made to a key by calling one of the following + functions: + struct key *__key_get(struct key *key); struct key *key_get(struct key *key); - These need to be disposed of by calling key_put() when they've been - finished with. The key pointer passed in will be returned. If the pointer - is NULL or CONFIG_KEYS is not set then the key will not be dereferenced and - no increment will take place. + Keys so references will need to be disposed of by calling key_put() when + they've been finished with. The key pointer passed in will be returned. + + In the case of key_get(), if the pointer is NULL or CONFIG_KEYS is not set + then the key will not be dereferenced and no increment will take place. (*) A key's serial number can be obtained by calling: diff --git a/Documentation/target/tcm_mod_builder.py b/Documentation/target/tcm_mod_builder.py index 54d29c1..230ce71 100755 --- a/Documentation/target/tcm_mod_builder.py +++ b/Documentation/target/tcm_mod_builder.py @@ -440,15 +440,15 @@ def tcm_mod_build_configfs(proto_ident, fabric_mod_dir_var, fabric_mod_name): buf += " /*\n" buf += " * Setup default attribute lists for various fabric->tf_cit_tmpl\n" buf += " */\n" - buf += " TF_CIT_TMPL(fabric)->tfc_wwn_cit.ct_attrs = " + fabric_mod_name + "_wwn_attrs;\n" - buf += " TF_CIT_TMPL(fabric)->tfc_tpg_base_cit.ct_attrs = NULL;\n" - buf += " TF_CIT_TMPL(fabric)->tfc_tpg_attrib_cit.ct_attrs = NULL;\n" - buf += " TF_CIT_TMPL(fabric)->tfc_tpg_param_cit.ct_attrs = NULL;\n" - buf += " TF_CIT_TMPL(fabric)->tfc_tpg_np_base_cit.ct_attrs = NULL;\n" - buf += " TF_CIT_TMPL(fabric)->tfc_tpg_nacl_base_cit.ct_attrs = NULL;\n" - buf += " TF_CIT_TMPL(fabric)->tfc_tpg_nacl_attrib_cit.ct_attrs = NULL;\n" - buf += " TF_CIT_TMPL(fabric)->tfc_tpg_nacl_auth_cit.ct_attrs = NULL;\n" - buf += " TF_CIT_TMPL(fabric)->tfc_tpg_nacl_param_cit.ct_attrs = NULL;\n" + buf += " fabric->tf_cit_tmpl.tfc_wwn_cit.ct_attrs = " + fabric_mod_name + "_wwn_attrs;\n" + buf += " fabric->tf_cit_tmpl.tfc_tpg_base_cit.ct_attrs = NULL;\n" + buf += " fabric->tf_cit_tmpl.tfc_tpg_attrib_cit.ct_attrs = NULL;\n" + buf += " fabric->tf_cit_tmpl.tfc_tpg_param_cit.ct_attrs = NULL;\n" + buf += " fabric->tf_cit_tmpl.tfc_tpg_np_base_cit.ct_attrs = NULL;\n" + buf += " fabric->tf_cit_tmpl.tfc_tpg_nacl_base_cit.ct_attrs = NULL;\n" + buf += " fabric->tf_cit_tmpl.tfc_tpg_nacl_attrib_cit.ct_attrs = NULL;\n" + buf += " fabric->tf_cit_tmpl.tfc_tpg_nacl_auth_cit.ct_attrs = NULL;\n" + buf += " fabric->tf_cit_tmpl.tfc_tpg_nacl_param_cit.ct_attrs = NULL;\n" buf += " /*\n" buf += " * Register the fabric for use within TCM\n" buf += " */\n" diff --git a/Documentation/vm/split_page_table_lock b/Documentation/vm/split_page_table_lock index 7521d36..6dea4fd 100644 --- a/Documentation/vm/split_page_table_lock +++ b/Documentation/vm/split_page_table_lock @@ -63,9 +63,9 @@ levels. PMD split lock enabling requires pgtable_pmd_page_ctor() call on PMD table allocation and pgtable_pmd_page_dtor() on freeing. -Allocation usually happens in pmd_alloc_one(), freeing in pmd_free(), but -make sure you cover all PMD table allocation / freeing paths: i.e X86_PAE -preallocate few PMDs on pgd_alloc(). +Allocation usually happens in pmd_alloc_one(), freeing in pmd_free() and +pmd_free_tlb(), but make sure you cover all PMD table allocation / freeing +paths: i.e X86_PAE preallocate few PMDs on pgd_alloc(). With everything in place you can set CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK. diff --git a/MAINTAINERS b/MAINTAINERS index 63f3048..8285ed4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4065,6 +4065,7 @@ F: arch/x86/include/uapi/asm/hyperv.h F: arch/x86/kernel/cpu/mshyperv.c F: drivers/hid/hid-hyperv.c F: drivers/hv/ +F: drivers/input/serio/hyperv-keyboard.c F: drivers/net/hyperv/ F: drivers/scsi/storvsc_drv.c F: drivers/video/hyperv_fb.c @@ -7515,9 +7516,10 @@ SELINUX SECURITY MODULE M: Stephen Smalley <sds@tycho.nsa.gov> M: James Morris <james.l.morris@oracle.com> M: Eric Paris <eparis@parisplace.org> +M: Paul Moore <paul@paul-moore.com> L: selinux@tycho.nsa.gov (subscribers-only, general discussion) W: http://selinuxproject.org -T: git git://git.infradead.org/users/eparis/selinux.git +T: git git://git.infradead.org/users/pcmoore/selinux S: Supported F: include/linux/selinux* F: security/selinux/ @@ -8664,6 +8666,7 @@ F: drivers/media/usb/tm6000/ TPM DEVICE DRIVER M: Leonidas Da Silva Barbosa <leosilva@linux.vnet.ibm.com> M: Ashley Lai <ashley@ashleylai.com> +M: Peter Huewe <peterhuewe@gmx.de> M: Rajiv Andrade <mail@srajiv.net> W: http://tpmdd.sourceforge.net M: Marcel Selhorst <tpmdd@selhorst.net> @@ -9522,8 +9525,8 @@ F: drivers/xen/*swiotlb* XFS FILESYSTEM P: Silicon Graphics Inc +M: Dave Chinner <dchinner@fromorbit.com> M: Ben Myers <bpm@sgi.com> -M: Alex Elder <elder@kernel.org> M: xfs@oss.sgi.com L: xfs@oss.sgi.com W: http://oss.sgi.com/projects/xfs diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 214b698..c1f1a7e 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -25,7 +25,7 @@ config ARM select HARDIRQS_SW_RESEND select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL select HAVE_ARCH_KGDB - select HAVE_ARCH_SECCOMP_FILTER + select HAVE_ARCH_SECCOMP_FILTER if (AEABI && !OABI_COMPAT) select HAVE_ARCH_TRACEHOOK select HAVE_BPF_JIT select HAVE_CONTEXT_TRACKING @@ -1496,6 +1496,7 @@ config HAVE_ARM_ARCH_TIMER bool "Architected timer support" depends on CPU_V7 select ARM_ARCH_TIMER + select GENERIC_CLOCKEVENTS help This option enables support for the ARM architected timer @@ -1719,7 +1720,6 @@ config AEABI config OABI_COMPAT bool "Allow old ABI binaries to run with this kernel (EXPERIMENTAL)" depends on AEABI && !THUMB2_KERNEL - default y help This option preserves the old syscall interface along with the new (ARM EABI) one. It also provides a compatibility layer to @@ -1727,11 +1727,16 @@ config OABI_COMPAT in memory differs between the legacy ABI and the new ARM EABI (only for non "thumb" binaries). This option adds a tiny overhead to all syscalls and produces a slightly larger kernel. + + The seccomp filter system will not be available when this is + selected, since there is no way yet to sensibly distinguish + between calling conventions during filtering. + If you know you'll be using only pure EABI user space then you can say N here. If this option is not selected and you attempt to execute a legacy ABI binary then the result will be UNPREDICTABLE (in fact it can be predicted that it won't work - at all). If in doubt say Y. + at all). If in doubt say N. config ARCH_HAS_HOLES_MEMORYMODEL bool diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h index 4dd2145..9ecccc8 100644 --- a/arch/arm/include/asm/memory.h +++ b/arch/arm/include/asm/memory.h @@ -226,7 +226,14 @@ static inline phys_addr_t __virt_to_phys(unsigned long x) static inline unsigned long __phys_to_virt(phys_addr_t x) { unsigned long t; - __pv_stub(x, t, "sub", __PV_BITS_31_24); + + /* + * 'unsigned long' cast discard upper word when + * phys_addr_t is 64 bit, and makes sure that inline + * assembler expression receives 32 bit argument + * in place where 'r' 32 bit operand is expected. + */ + __pv_stub((unsigned long) x, t, "sub", __PV_BITS_31_24); return t; } diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index 7801866..11d59b3 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -508,6 +508,7 @@ __fixup_smp: teq r0, #0x0 @ '0' on actual UP A9 hardware beq __fixup_smp_on_up @ So its an A9 UP ldr r0, [r0, #4] @ read SCU Config +ARM_BE8(rev r0, r0) @ byteswap if big endian and r0, r0, #0x3 @ number of CPUs teq r0, #0x0 @ is 1? movne pc, lr @@ -644,7 +645,11 @@ ARM_BE8(rev16 ip, ip) bcc 1b bx lr #else +#ifdef CONFIG_CPU_ENDIAN_BE8 + moveq r0, #0x00004000 @ set bit 22, mov to mvn instruction +#else moveq r0, #0x400000 @ set bit 22, mov to mvn instruction +#endif b 2f 1: ldr ip, [r7, r3] #ifdef CONFIG_CPU_ENDIAN_BE8 @@ -653,7 +658,7 @@ ARM_BE8(rev16 ip, ip) tst ip, #0x000f0000 @ check the rotation field orrne ip, ip, r6, lsl #24 @ mask in offset bits 31-24 biceq ip, ip, #0x00004000 @ clear bit 22 - orreq ip, ip, r0, lsl #24 @ mask in offset bits 7-0 + orreq ip, ip, r0 @ mask in offset bits 7-0 #else bic ip, ip, #0x000000ff tst ip, #0xf00 @ check the rotation field diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 6125f25..dbf0923 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -856,7 +856,7 @@ static void __init kuser_init(void *vectors) memcpy(vectors + 0xfe0, vectors + 0xfe8, 4); } #else -static void __init kuser_init(void *vectors) +static inline void __init kuser_init(void *vectors) { } #endif diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 3719583..5809069 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -334,6 +334,17 @@ out: return err; } +static phys_addr_t kvm_kaddr_to_phys(void *kaddr) +{ + if (!is_vmalloc_addr(kaddr)) { + BUG_ON(!virt_addr_valid(kaddr)); + return __pa(kaddr); + } else { + return page_to_phys(vmalloc_to_page(kaddr)) + + offset_in_page(kaddr); + } +} + /** * create_hyp_mappings - duplicate a kernel virtual address range in Hyp mode * @from: The virtual kernel start address of the range @@ -345,16 +356,27 @@ out: */ int create_hyp_mappings(void *from, void *to) { - unsigned long phys_addr = virt_to_phys(from); + phys_addr_t phys_addr; + unsigned long virt_addr; unsigned long start = KERN_TO_HYP((unsigned long)from); unsigned long end = KERN_TO_HYP((unsigned long)to); - /* Check for a valid kernel memory mapping */ - if (!virt_addr_valid(from) || !virt_addr_valid(to - 1)) - return -EINVAL; + start = start & PAGE_MASK; + end = PAGE_ALIGN(end); - return __create_hyp_mappings(hyp_pgd, start, end, - __phys_to_pfn(phys_addr), PAGE_HYP); + for (virt_addr = start; virt_addr < end; virt_addr += PAGE_SIZE) { + int err; + + phys_addr = kvm_kaddr_to_phys(from + virt_addr - start); + err = __create_hyp_mappings(hyp_pgd, virt_addr, + virt_addr + PAGE_SIZE, + __phys_to_pfn(phys_addr), + PAGE_HYP); + if (err) + return err; + } + + return 0; } /** diff --git a/arch/arm/lib/bitops.h b/arch/arm/lib/bitops.h index e0c68d5..52886b8 100644 --- a/arch/arm/lib/bitops.h +++ b/arch/arm/lib/bitops.h @@ -10,7 +10,7 @@ UNWIND( .fnstart ) and r3, r0, #31 @ Get bit offset mov r0, r0, lsr #5 add r1, r1, r0, lsl #2 @ Get word offset -#if __LINUX_ARM_ARCH__ >= 7 +#if __LINUX_ARM_ARCH__ >= 7 && defined(CONFIG_SMP) .arch_extension mp ALT_SMP(W(pldw) [r1]) ALT_UP(W(nop)) diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 78eeeca..580ef2d 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -558,8 +558,8 @@ static void __init build_mem_type_table(void) mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_WB; break; } - printk("Memory policy: ECC %sabled, Data cache %s\n", - ecc_mask ? "en" : "dis", cp->policy); + pr_info("Memory policy: %sData cache %s\n", + ecc_mask ? "ECC enabled, " : "", cp->policy); for (i = 0; i < ARRAY_SIZE(mem_types); i++) { struct mem_type *t = &mem_types[i]; diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c index 5c668b7..55764a7e 100644 --- a/arch/arm/mm/nommu.c +++ b/arch/arm/mm/nommu.c @@ -18,6 +18,7 @@ #include <asm/mach/arch.h> #include <asm/cputype.h> #include <asm/mpu.h> +#include <asm/procinfo.h> #include "mm.h" diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index 60920f6..bd17819 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -92,7 +92,7 @@ ENDPROC(cpu_v7_dcache_clean_area) /* Suspend/resume support: derived from arch/arm/mach-s5pv210/sleep.S */ .globl cpu_v7_suspend_size -.equ cpu_v7_suspend_size, 4 * 8 +.equ cpu_v7_suspend_size, 4 * 9 #ifdef CONFIG_ARM_CPU_SUSPEND ENTRY(cpu_v7_do_suspend) stmfd sp!, {r4 - r10, lr} @@ -101,13 +101,17 @@ ENTRY(cpu_v7_do_suspend) stmia r0!, {r4 - r5} #ifdef CONFIG_MMU mrc p15, 0, r6, c3, c0, 0 @ Domain ID +#ifdef CONFIG_ARM_LPAE + mrrc p15, 1, r5, r7, c2 @ TTB 1 +#else mrc p15, 0, r7, c2, c0, 1 @ TTB 1 +#endif mrc p15, 0, r11, c2, c0, 2 @ TTB control register #endif mrc p15, 0, r8, c1, c0, 0 @ Control register mrc p15, 0, r9, c1, c0, 1 @ Auxiliary control register mrc p15, 0, r10, c1, c0, 2 @ Co-processor access control - stmia r0, {r6 - r11} + stmia r0, {r5 - r11} ldmfd sp!, {r4 - r10, pc} ENDPROC(cpu_v7_do_suspend) @@ -118,16 +122,19 @@ ENTRY(cpu_v7_do_resume) ldmia r0!, {r4 - r5} mcr p15, 0, r4, c13, c0, 0 @ FCSE/PID mcr p15, 0, r5, c13, c0, 3 @ User r/o thread ID - ldmia r0, {r6 - r11} + ldmia r0, {r5 - r11} #ifdef CONFIG_MMU mcr p15, 0, ip, c8, c7, 0 @ invalidate TLBs mcr p15, 0, r6, c3, c0, 0 @ Domain ID -#ifndef CONFIG_ARM_LPAE +#ifdef CONFIG_ARM_LPAE + mcrr p15, 0, r1, ip, c2 @ TTB 0 + mcrr p15, 1, r5, r7, c2 @ TTB 1 +#else ALT_SMP(orr r1, r1, #TTB_FLAGS_SMP) ALT_UP(orr r1, r1, #TTB_FLAGS_UP) -#endif mcr p15, 0, r1, c2, c0, 0 @ TTB 0 mcr p15, 0, r7, c2, c0, 1 @ TTB 1 +#endif mcr p15, 0, r11, c2, c0, 2 @ TTB control register ldr r4, =PRRR @ PRRR ldr r5, =NMRR @ NMRR diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h index 98da78e..084e080 100644 --- a/arch/powerpc/include/asm/smp.h +++ b/arch/powerpc/include/asm/smp.h @@ -33,6 +33,7 @@ extern int boot_cpuid; extern int spinning_secondaries; extern void cpu_die(void); +extern int cpu_to_chip_id(int cpu); #ifdef CONFIG_SMP @@ -112,7 +113,6 @@ static inline struct cpumask *cpu_core_mask(int cpu) } extern int cpu_to_core_id(int cpu); -extern int cpu_to_chip_id(int cpu); /* Since OpenPIC has only 4 IPIs, we use slightly different message numbers. * diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 6713020..4bd687d 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -686,6 +686,15 @@ void eeh_save_bars(struct eeh_dev *edev) for (i = 0; i < 16; i++) eeh_ops->read_config(dn, i * 4, 4, &edev->config_space[i]); + + /* + * For PCI bridges including root port, we need enable bus + * master explicitly. Otherwise, it can't fetch IODA table + * entries correctly. So we cache the bit in advance so that + * we can restore it after reset, either PHB range or PE range. + */ + if (edev->mode & EEH_DEV_BRIDGE) + edev->config_space[1] |= PCI_COMMAND_MASTER; } /** diff --git a/arch/powerpc/kernel/eeh_event.c b/arch/powerpc/kernel/eeh_event.c index d27c5af..72d748b 100644 --- a/arch/powerpc/kernel/eeh_event.c +++ b/arch/powerpc/kernel/eeh_event.c @@ -74,8 +74,13 @@ static int eeh_event_handler(void * dummy) pe = event->pe; if (pe) { eeh_pe_state_mark(pe, EEH_PE_RECOVERING); - pr_info("EEH: Detected PCI bus error on PHB#%d-PE#%x\n", - pe->phb->global_number, pe->addr); + if (pe->type & EEH_PE_PHB) + pr_info("EEH: Detected error on PHB#%d\n", + pe->phb->global_number); + else + pr_info("EEH: Detected PCI bus error on " + "PHB#%d-PE#%x\n", + pe->phb->global_number, pe->addr); eeh_handle_event(pe); eeh_pe_state_clear(pe, EEH_PE_RECOVERING); } else { diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 0650e18..3386d8a 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -858,17 +858,21 @@ void show_regs(struct pt_regs * regs) printk("MSR: "REG" ", regs->msr); printbits(regs->msr, msr_bits); printk(" CR: %08lx XER: %08lx\n", regs->ccr, regs->xer); -#ifdef CONFIG_PPC64 - printk("SOFTE: %ld\n", regs->softe); -#endif trap = TRAP(regs); if ((regs->trap != 0xc00) && cpu_has_feature(CPU_FTR_CFAR)) - printk("CFAR: "REG"\n", regs->orig_gpr3); - if (trap == 0x300 || trap == 0x600) + printk("CFAR: "REG" ", regs->orig_gpr3); + if (trap == 0x200 || trap == 0x300 || trap == 0x600) #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) - printk("DEAR: "REG", ESR: "REG"\n", regs->dar, regs->dsisr); + printk("DEAR: "REG" ESR: "REG" ", regs->dar, regs->dsisr); #else - printk("DAR: "REG", DSISR: %08lx\n", regs->dar, regs->dsisr); + printk("DAR: "REG" DSISR: %08lx ", regs->dar, regs->dsisr); +#endif +#ifdef CONFIG_PPC64 + printk("SOFTE: %ld ", regs->softe); +#endif +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + if (MSR_TM_ACTIVE(regs->msr)) + printk("\nPACATMSCRATCH: %016llx ", get_paca()->tm_scratch); #endif for (i = 0; i < 32; i++) { @@ -887,9 +891,6 @@ void show_regs(struct pt_regs * regs) printk("NIP ["REG"] %pS\n", regs->nip, (void *)regs->nip); printk("LR ["REG"] %pS\n", regs->link, (void *)regs->link); #endif -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - printk("PACATMSCRATCH [%llx]\n", get_paca()->tm_scratch); -#endif show_stack(current, (unsigned long *) regs->gpr[1]); if (!user_mode(regs)) show_instructions(regs); diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index f3a4709..fa0ad8a 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -777,6 +777,26 @@ int of_get_ibm_chip_id(struct device_node *np) return -1; } +/** + * cpu_to_chip_id - Return the cpus chip-id + * @cpu: The logical cpu number. + * + * Return the value of the ibm,chip-id property corresponding to the given + * logical cpu number. If the chip-id can not be found, returns -1. + */ +int cpu_to_chip_id(int cpu) +{ + struct device_node *np; + + np = of_get_cpu_node(cpu, NULL); + if (!np) + return -1; + + of_node_put(np); + return of_get_ibm_chip_id(np); +} +EXPORT_SYMBOL(cpu_to_chip_id); + #ifdef CONFIG_PPC_PSERIES /* * Fix up the uninitialized fields in a new device node: diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 749778e..1844298 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -457,7 +457,15 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, if (copy_vsx_to_user(&frame->mc_vsregs, current)) return 1; msr |= MSR_VSX; - } + } else if (!ctx_has_vsx_region) + /* + * With a small context structure we can't hold the VSX + * registers, hence clear the MSR value to indicate the state + * was not saved. + */ + msr &= ~MSR_VSX; + + #endif /* CONFIG_VSX */ #ifdef CONFIG_SPE /* save spe registers */ diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 930cd8a..a3b64f3 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -597,22 +597,6 @@ out: return id; } -/* Return the value of the chip-id property corresponding - * to the given logical cpu. - */ -int cpu_to_chip_id(int cpu) -{ - struct device_node *np; - - np = of_get_cpu_node(cpu, NULL); - if (!np) - return -1; - - of_node_put(np); - return of_get_ibm_chip_id(np); -} -EXPORT_SYMBOL(cpu_to_chip_id); - /* Helper routines for cpu to core mapping */ int cpu_core_index_of_thread(int cpu) { diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 192b051..b3b1441 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -213,8 +213,6 @@ static u64 scan_dispatch_log(u64 stop_tb) if (i == be64_to_cpu(vpa->dtl_idx)) return 0; while (i < be64_to_cpu(vpa->dtl_idx)) { - if (dtl_consumer) - dtl_consumer(dtl, i); dtb = be64_to_cpu(dtl->timebase); tb_delta = be32_to_cpu(dtl->enqueue_to_dispatch_time) + be32_to_cpu(dtl->ready_to_enqueue_time); @@ -227,6 +225,8 @@ static u64 scan_dispatch_log(u64 stop_tb) } if (dtb > stop_tb) break; + if (dtl_consumer) + dtl_consumer(dtl, i); stolen += tb_delta; ++i; ++dtl; diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c index e7d0c88f..76a6482 100644 --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/kernel/vio.c @@ -1419,7 +1419,7 @@ struct vio_dev *vio_register_device_node(struct device_node *of_node) /* needed to ensure proper operation of coherent allocations * later, in case driver doesn't set it explicitly */ - dma_set_mask_and_coherent(&viodev->dev, DMA_BIT_MASK(64)); + dma_coerce_mask_and_coherent(&viodev->dev, DMA_BIT_MASK(64)); } /* register with generic device framework */ diff --git a/arch/powerpc/mm/gup.c b/arch/powerpc/mm/gup.c index 6936547..c5f734e 100644 --- a/arch/powerpc/mm/gup.c +++ b/arch/powerpc/mm/gup.c @@ -123,6 +123,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write, struct mm_struct *mm = current->mm; unsigned long addr, len, end; unsigned long next; + unsigned long flags; pgd_t *pgdp; int nr = 0; @@ -156,7 +157,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write, * So long as we atomically load page table pointers versus teardown, * we can follow the address down to the the page and take a ref on it. */ - local_irq_disable(); + local_irq_save(flags); pgdp = pgd_offset(mm, addr); do { @@ -179,7 +180,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write, break; } while (pgdp++, addr = next, addr != end); - local_irq_enable(); + local_irq_restore(flags); return nr; } diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c index 3e99c14..7ce9cf3 100644 --- a/arch/powerpc/mm/slice.c +++ b/arch/powerpc/mm/slice.c @@ -258,7 +258,7 @@ static bool slice_scan_available(unsigned long addr, slice = GET_HIGH_SLICE_INDEX(addr); *boundary_addr = (slice + end) ? ((slice + end) << SLICE_HIGH_SHIFT) : SLICE_LOW_TOP; - return !!(available.high_slices & (1u << slice)); + return !!(available.high_slices & (1ul << slice)); } } diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c index 8844628..1cb160d 100644 --- a/arch/powerpc/platforms/powernv/rng.c +++ b/arch/powerpc/platforms/powernv/rng.c @@ -19,6 +19,7 @@ #include <asm/io.h> #include <asm/prom.h> #include <asm/machdep.h> +#include <asm/smp.h> struct powernv_rng { diff --git a/arch/powerpc/platforms/pseries/rng.c b/arch/powerpc/platforms/pseries/rng.c index a702f1c..72a1027 100644 --- a/arch/powerpc/platforms/pseries/rng.c +++ b/arch/powerpc/platforms/pseries/rng.c @@ -13,6 +13,7 @@ #include <linux/of.h> #include <asm/archrandom.h> #include <asm/machdep.h> +#include <asm/plpar_wrappers.h> static int pseries_get_random_long(unsigned long *v) diff --git a/arch/powerpc/platforms/wsp/chroma.c b/arch/powerpc/platforms/wsp/chroma.c index 8ef53bc..aaa46b3 100644 --- a/arch/powerpc/platforms/wsp/chroma.c +++ b/arch/powerpc/platforms/wsp/chroma.c @@ -15,6 +15,7 @@ #include <linux/of.h> #include <linux/smp.h> #include <linux/time.h> +#include <linux/of_fdt.h> #include <asm/machdep.h> #include <asm/udbg.h> diff --git a/arch/powerpc/platforms/wsp/h8.c b/arch/powerpc/platforms/wsp/h8.c index d18e6cc..a3c87f3 100644 --- a/arch/powerpc/platforms/wsp/h8.c +++ b/arch/powerpc/platforms/wsp/h8.c @@ -10,6 +10,7 @@ #include <linux/kernel.h> #include <linux/of.h> #include <linux/io.h> +#include <linux/of_address.h> #include "wsp.h" diff --git a/arch/powerpc/platforms/wsp/ics.c b/arch/powerpc/platforms/wsp/ics.c index 2d3b1dd..9cd92e6 100644 --- a/arch/powerpc/platforms/wsp/ics.c +++ b/arch/powerpc/platforms/wsp/ics.c @@ -18,6 +18,8 @@ #include <linux/smp.h> #include <linux/spinlock.h> #include <linux/types.h> +#include <linux/of_address.h> +#include <linux/of_irq.h> #include <asm/io.h> #include <asm/irq.h> diff --git a/arch/powerpc/platforms/wsp/opb_pic.c b/arch/powerpc/platforms/wsp/opb_pic.c index cb565bf..3f67298 100644 --- a/arch/powerpc/platforms/wsp/opb_pic.c +++ b/arch/powerpc/platforms/wsp/opb_pic.c @@ -15,6 +15,8 @@ #include <linux/of.h> #include <linux/slab.h> #include <linux/time.h> +#include <linux/of_address.h> +#include <linux/of_irq.h> #include <asm/reg_a2.h> #include <asm/irq.h> diff --git a/arch/powerpc/platforms/wsp/psr2.c b/arch/powerpc/platforms/wsp/psr2.c index 508ec82..a87b414 100644 --- a/arch/powerpc/platforms/wsp/psr2.c +++ b/arch/powerpc/platforms/wsp/psr2.c @@ -15,6 +15,7 @@ #include <linux/of.h> #include <linux/smp.h> #include <linux/time.h> +#include <linux/of_fdt.h> #include <asm/machdep.h> #include <asm/udbg.h> diff --git a/arch/powerpc/platforms/wsp/scom_wsp.c b/arch/powerpc/platforms/wsp/scom_wsp.c index 8928507..6538b4d 100644 --- a/arch/powerpc/platforms/wsp/scom_wsp.c +++ b/arch/powerpc/platforms/wsp/scom_wsp.c @@ -14,6 +14,7 @@ #include <linux/of.h> #include <linux/spinlock.h> #include <linux/types.h> +#include <linux/of_address.h> #include <asm/cputhreads.h> #include <asm/reg_a2.h> diff --git a/arch/powerpc/platforms/wsp/wsp.c b/arch/powerpc/platforms/wsp/wsp.c index ddb6efe..58cd1f0 100644 --- a/arch/powerpc/platforms/wsp/wsp.c +++ b/arch/powerpc/platforms/wsp/wsp.c @@ -13,6 +13,7 @@ #include <linux/smp.h> #include <linux/delay.h> #include <linux/time.h> +#include <linux/of_address.h> #include <asm/scom.h> diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c index daff69e..1185fe7 100644 --- a/arch/x86/kvm/mmu_audit.c +++ b/arch/x86/kvm/mmu_audit.c @@ -296,4 +296,4 @@ static struct kernel_param_ops audit_param_ops = { .get = param_get_bool, }; -module_param_cb(mmu_audit, &audit_param_ops, &mmu_audit, 0644); +arch_param_cb(mmu_audit, &audit_param_ops, &mmu_audit, 0644); diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 36aa999..c96314a 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -61,6 +61,7 @@ void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte) #if PAGETABLE_LEVELS > 2 void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) { + struct page *page = virt_to_page(pmd); paravirt_release_pmd(__pa(pmd) >> PAGE_SHIFT); /* * NOTE! For PAE, any changes to the top page-directory-pointer-table @@ -69,7 +70,8 @@ void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) #ifdef CONFIG_X86_PAE tlb->need_flush_all = 1; #endif - tlb_remove_page(tlb, virt_to_page(pmd)); + pgtable_pmd_page_dtor(page); + tlb_remove_page(tlb, page); } #if PAGETABLE_LEVELS > 3 diff --git a/block/partitions/efi.c b/block/partitions/efi.c index a8287b4..dc51f46 100644 --- a/block/partitions/efi.c +++ b/block/partitions/efi.c @@ -96,6 +96,7 @@ * - Code works, detects all the partitions. * ************************************************************/ +#include <linux/kernel.h> #include <linux/crc32.h> #include <linux/ctype.h> #include <linux/math64.h> @@ -715,8 +716,8 @@ int efi_partition(struct parsed_partitions *state) efi_guid_unparse(&ptes[i].unique_partition_guid, info->uuid); /* Naively convert UTF16-LE to 7 bits. */ - label_max = min(sizeof(info->volname) - 1, - sizeof(ptes[i].partition_name)); + label_max = min(ARRAY_SIZE(info->volname) - 1, + ARRAY_SIZE(ptes[i].partition_name)); info->volname[label_max] = 0; while (label_count < label_max) { u8 c = ptes[i].partition_name[label_count] & 0xff; diff --git a/crypto/Kconfig b/crypto/Kconfig index 71f337a..4ae5734 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1402,6 +1402,9 @@ config CRYPTO_USER_API_SKCIPHER This option enables the user-spaces interface for symmetric key cipher algorithms. +config CRYPTO_HASH_INFO + bool + source "drivers/crypto/Kconfig" source crypto/asymmetric_keys/Kconfig diff --git a/crypto/Makefile b/crypto/Makefile index 80019ba..b3a7e80 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -104,3 +104,4 @@ obj-$(CONFIG_CRYPTO_USER_API_SKCIPHER) += algif_skcipher.o obj-$(CONFIG_XOR_BLOCKS) += xor.o obj-$(CONFIG_ASYNC_CORE) += async_tx/ obj-$(CONFIG_ASYMMETRIC_KEY_TYPE) += asymmetric_keys/ +obj-$(CONFIG_CRYPTO_HASH_INFO) += hash_info.o diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c index 0262210..ef5356c 100644 --- a/crypto/algif_hash.c +++ b/crypto/algif_hash.c @@ -161,8 +161,6 @@ static int hash_recvmsg(struct kiocb *unused, struct socket *sock, else if (len < ds) msg->msg_flags |= MSG_TRUNC; - msg->msg_namelen = 0; - lock_sock(sk); if (ctx->more) { ctx->more = 0; diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c index a1c4f0a..6a6dfc0 100644 --- a/crypto/algif_skcipher.c +++ b/crypto/algif_skcipher.c @@ -432,7 +432,6 @@ static int skcipher_recvmsg(struct kiocb *unused, struct socket *sock, long copied = 0; lock_sock(sk); - msg->msg_namelen = 0; for (iov = msg->msg_iov, iovlen = msg->msg_iovlen; iovlen > 0; iovlen--, iov++) { unsigned long seglen = iov->iov_len; diff --git a/crypto/asymmetric_keys/Kconfig b/crypto/asymmetric_keys/Kconfig index 6d2c2ea..03a6eb9 100644 --- a/crypto/asymmetric_keys/Kconfig +++ b/crypto/asymmetric_keys/Kconfig @@ -12,6 +12,8 @@ if ASYMMETRIC_KEY_TYPE config ASYMMETRIC_PUBLIC_KEY_SUBTYPE tristate "Asymmetric public-key crypto algorithm subtype" select MPILIB + select PUBLIC_KEY_ALGO_RSA + select CRYPTO_HASH_INFO help This option provides support for asymmetric public key type handling. If signature generation and/or verification are to be used, @@ -20,8 +22,8 @@ config ASYMMETRIC_PUBLIC_KEY_SUBTYPE config PUBLIC_KEY_ALGO_RSA tristate "RSA public-key algorithm" - depends on ASYMMETRIC_PUBLIC_KEY_SUBTYPE select MPILIB_EXTRA + select MPILIB help This option enables support for the RSA algorithm (PKCS#1, RFC3447). diff --git a/crypto/asymmetric_keys/asymmetric_type.c b/crypto/asymmetric_keys/asymmetric_type.c index cf80765..b77eb53 100644 --- a/crypto/asymmetric_keys/asymmetric_type.c +++ b/crypto/asymmetric_keys/asymmetric_type.c @@ -209,6 +209,7 @@ struct key_type key_type_asymmetric = { .match = asymmetric_key_match, .destroy = asymmetric_key_destroy, .describe = asymmetric_key_describe, + .def_lookup_type = KEYRING_SEARCH_LOOKUP_ITERATE, }; EXPORT_SYMBOL_GPL(key_type_asymmetric); diff --git a/crypto/asymmetric_keys/public_key.c b/crypto/asymmetric_keys/public_key.c index cb2e291..97eb0019 100644 --- a/crypto/asymmetric_keys/public_key.c +++ b/crypto/asymmetric_keys/public_key.c @@ -22,29 +22,25 @@ MODULE_LICENSE("GPL"); -const char *const pkey_algo[PKEY_ALGO__LAST] = { +const char *const pkey_algo_name[PKEY_ALGO__LAST] = { [PKEY_ALGO_DSA] = "DSA", [PKEY_ALGO_RSA] = "RSA", }; -EXPORT_SYMBOL_GPL(pkey_algo); +EXPORT_SYMBOL_GPL(pkey_algo_name); -const char *const pkey_hash_algo[PKEY_HASH__LAST] = { - [PKEY_HASH_MD4] = "md4", - [PKEY_HASH_MD5] = "md5", - [PKEY_HASH_SHA1] = "sha1", - [PKEY_HASH_RIPE_MD_160] = "rmd160", - [PKEY_HASH_SHA256] = "sha256", - [PKEY_HASH_SHA384] = "sha384", - [PKEY_HASH_SHA512] = "sha512", - [PKEY_HASH_SHA224] = "sha224", +const struct public_key_algorithm *pkey_algo[PKEY_ALGO__LAST] = { +#if defined(CONFIG_PUBLIC_KEY_ALGO_RSA) || \ + defined(CONFIG_PUBLIC_KEY_ALGO_RSA_MODULE) + [PKEY_ALGO_RSA] = &RSA_public_key_algorithm, +#endif }; -EXPORT_SYMBOL_GPL(pkey_hash_algo); +EXPORT_SYMBOL_GPL(pkey_algo); -const char *const pkey_id_type[PKEY_ID_TYPE__LAST] = { +const char *const pkey_id_type_name[PKEY_ID_TYPE__LAST] = { [PKEY_ID_PGP] = "PGP", [PKEY_ID_X509] = "X509", }; -EXPORT_SYMBOL_GPL(pkey_id_type); +EXPORT_SYMBOL_GPL(pkey_id_type_name); /* * Provide a part of a description of the key for /proc/keys. @@ -56,7 +52,7 @@ static void public_key_describe(const struct key *asymmetric_key, if (key) seq_printf(m, "%s.%s", - pkey_id_type[key->id_type], key->algo->name); + pkey_id_type_name[key->id_type], key->algo->name); } /* @@ -78,21 +74,45 @@ EXPORT_SYMBOL_GPL(public_key_destroy); /* * Verify a signature using a public key. */ -static int public_key_verify_signature(const struct key *key, - const struct public_key_signature *sig) +int public_key_verify_signature(const struct public_key *pk, + const struct public_key_signature *sig) { - const struct public_key *pk = key->payload.data; + const struct public_key_algorithm *algo; + + BUG_ON(!pk); + BUG_ON(!pk->mpi[0]); + BUG_ON(!pk->mpi[1]); + BUG_ON(!sig); + BUG_ON(!sig->digest); + BUG_ON(!sig->mpi[0]); + + algo = pk->algo; + if (!algo) { + if (pk->pkey_algo >= PKEY_ALGO__LAST) + return -ENOPKG; + algo = pkey_algo[pk->pkey_algo]; + if (!algo) + return -ENOPKG; + } - if (!pk->algo->verify_signature) + if (!algo->verify_signature) return -ENOTSUPP; - if (sig->nr_mpi != pk->algo->n_sig_mpi) { + if (sig->nr_mpi != algo->n_sig_mpi) { pr_debug("Signature has %u MPI not %u\n", - sig->nr_mpi, pk->algo->n_sig_mpi); + sig->nr_mpi, algo->n_sig_mpi); return -EINVAL; } - return pk->algo->verify_signature(pk, sig); + return algo->verify_signature(pk, sig); +} +EXPORT_SYMBOL_GPL(public_key_verify_signature); + +static int public_key_verify_signature_2(const struct key *key, + const struct public_key_signature *sig) +{ + const struct public_key *pk = key->payload.data; + return public_key_verify_signature(pk, sig); } /* @@ -103,6 +123,6 @@ struct asymmetric_key_subtype public_key_subtype = { .name = "public_key", .describe = public_key_describe, .destroy = public_key_destroy, - .verify_signature = public_key_verify_signature, + .verify_signature = public_key_verify_signature_2, }; EXPORT_SYMBOL_GPL(public_key_subtype); diff --git a/crypto/asymmetric_keys/public_key.h b/crypto/asymmetric_keys/public_key.h index 5e5e356..5c37a22 100644 --- a/crypto/asymmetric_keys/public_key.h +++ b/crypto/asymmetric_keys/public_key.h @@ -28,3 +28,9 @@ struct public_key_algorithm { }; extern const struct public_key_algorithm RSA_public_key_algorithm; + +/* + * public_key.c + */ +extern int public_key_verify_signature(const struct public_key *pk, + const struct public_key_signature *sig); diff --git a/crypto/asymmetric_keys/rsa.c b/crypto/asymmetric_keys/rsa.c index 4a6a069..90a17f5 100644 --- a/crypto/asymmetric_keys/rsa.c +++ b/crypto/asymmetric_keys/rsa.c @@ -73,13 +73,13 @@ static const struct { size_t size; } RSA_ASN1_templates[PKEY_HASH__LAST] = { #define _(X) { RSA_digest_info_##X, sizeof(RSA_digest_info_##X) } - [PKEY_HASH_MD5] = _(MD5), - [PKEY_HASH_SHA1] = _(SHA1), - [PKEY_HASH_RIPE_MD_160] = _(RIPE_MD_160), - [PKEY_HASH_SHA256] = _(SHA256), - [PKEY_HASH_SHA384] = _(SHA384), - [PKEY_HASH_SHA512] = _(SHA512), - [PKEY_HASH_SHA224] = _(SHA224), + [HASH_ALGO_MD5] = _(MD5), + [HASH_ALGO_SHA1] = _(SHA1), + [HASH_ALGO_RIPE_MD_160] = _(RIPE_MD_160), + [HASH_ALGO_SHA256] = _(SHA256), + [HASH_ALGO_SHA384] = _(SHA384), + [HASH_ALGO_SHA512] = _(SHA512), + [HASH_ALGO_SHA224] = _(SHA224), #undef _ }; diff --git a/crypto/asymmetric_keys/x509_cert_parser.c b/crypto/asymmetric_keys/x509_cert_parser.c index facbf26..2989316 100644 --- a/crypto/asymmetric_keys/x509_cert_parser.c +++ b/crypto/asymmetric_keys/x509_cert_parser.c @@ -47,6 +47,8 @@ void x509_free_certificate(struct x509_certificate *cert) kfree(cert->subject); kfree(cert->fingerprint); kfree(cert->authority); + kfree(cert->sig.digest); + mpi_free(cert->sig.rsa.s); kfree(cert); } } @@ -152,33 +154,33 @@ int x509_note_pkey_algo(void *context, size_t hdrlen, return -ENOPKG; /* Unsupported combination */ case OID_md4WithRSAEncryption: - ctx->cert->sig_hash_algo = PKEY_HASH_MD5; - ctx->cert->sig_pkey_algo = PKEY_ALGO_RSA; + ctx->cert->sig.pkey_hash_algo = HASH_ALGO_MD5; + ctx->cert->sig.pkey_algo = PKEY_ALGO_RSA; break; case OID_sha1WithRSAEncryption: - ctx->cert->sig_hash_algo = PKEY_HASH_SHA1; - ctx->cert->sig_pkey_algo = PKEY_ALGO_RSA; + ctx->cert->sig.pkey_hash_algo = HASH_ALGO_SHA1; + ctx->cert->sig.pkey_algo = PKEY_ALGO_RSA; break; case OID_sha256WithRSAEncryption: - ctx->cert->sig_hash_algo = PKEY_HASH_SHA256; - ctx->cert->sig_pkey_algo = PKEY_ALGO_RSA; + ctx->cert->sig.pkey_hash_algo = HASH_ALGO_SHA256; + ctx->cert->sig.pkey_algo = PKEY_ALGO_RSA; break; case OID_sha384WithRSAEncryption: - ctx->cert->sig_hash_algo = PKEY_HASH_SHA384; - ctx->cert->sig_pkey_algo = PKEY_ALGO_RSA; + ctx->cert->sig.pkey_hash_algo = HASH_ALGO_SHA384; + ctx->cert->sig.pkey_algo = PKEY_ALGO_RSA; break; case OID_sha512WithRSAEncryption: - ctx->cert->sig_hash_algo = PKEY_HASH_SHA512; - ctx->cert->sig_pkey_algo = PKEY_ALGO_RSA; + ctx->cert->sig.pkey_hash_algo = HASH_ALGO_SHA512; + ctx->cert->sig.pkey_algo = PKEY_ALGO_RSA; break; case OID_sha224WithRSAEncryption: - ctx->cert->sig_hash_algo = PKEY_HASH_SHA224; - ctx->cert->sig_pkey_algo = PKEY_ALGO_RSA; + ctx->cert->sig.pkey_hash_algo = HASH_ALGO_SHA224; + ctx->cert->sig.pkey_algo = PKEY_ALGO_RSA; break; } @@ -203,8 +205,8 @@ int x509_note_signature(void *context, size_t hdrlen, return -EINVAL; } - ctx->cert->sig = value; - ctx->cert->sig_size = vlen; + ctx->cert->raw_sig = value; + ctx->cert->raw_sig_size = vlen; return 0; } @@ -343,8 +345,9 @@ int x509_extract_key_data(void *context, size_t hdrlen, if (ctx->last_oid != OID_rsaEncryption) return -ENOPKG; - /* There seems to be an extraneous 0 byte on the front of the data */ - ctx->cert->pkey_algo = PKEY_ALGO_RSA; + ctx->cert->pub->pkey_algo = PKEY_ALGO_RSA; + + /* Discard the BIT STRING metadata */ ctx->key = value + 1; ctx->key_size = vlen - 1; return 0; diff --git a/crypto/asymmetric_keys/x509_parser.h b/crypto/asymmetric_keys/x509_parser.h index f86dc5f..87d9cc2 100644 --- a/crypto/asymmetric_keys/x509_parser.h +++ b/crypto/asymmetric_keys/x509_parser.h @@ -9,6 +9,7 @@ * 2 of the Licence, or (at your option) any later version. */ +#include <linux/time.h> #include <crypto/public_key.h> struct x509_certificate { @@ -20,13 +21,11 @@ struct x509_certificate { char *authority; /* Authority key fingerprint as hex */ struct tm valid_from; struct tm valid_to; - enum pkey_algo pkey_algo : 8; /* Public key algorithm */ - enum pkey_algo sig_pkey_algo : 8; /* Signature public key algorithm */ - enum pkey_hash_algo sig_hash_algo : 8; /* Signature hash algorithm */ const void *tbs; /* Signed data */ - size_t tbs_size; /* Size of signed data */ - const void *sig; /* Signature data */ - size_t sig_size; /* Size of sigature */ + unsigned tbs_size; /* Size of signed data */ + unsigned raw_sig_size; /* Size of sigature */ + const void *raw_sig; /* Signature data */ + struct public_key_signature sig; /* Signature parameters */ }; /* @@ -34,3 +33,10 @@ struct x509_certificate { */ extern void x509_free_certificate(struct x509_certificate *cert); extern struct x509_certificate *x509_cert_parse(const void *data, size_t datalen); + +/* + * x509_public_key.c + */ +extern int x509_get_sig_params(struct x509_certificate *cert); +extern int x509_check_signature(const struct public_key *pub, + struct x509_certificate *cert); diff --git a/crypto/asymmetric_keys/x509_public_key.c b/crypto/asymmetric_keys/x509_public_key.c index 06007f0..f83300b 100644 --- a/crypto/asymmetric_keys/x509_public_key.c +++ b/crypto/asymmetric_keys/x509_public_key.c @@ -18,85 +18,162 @@ #include <linux/asn1_decoder.h> #include <keys/asymmetric-subtype.h> #include <keys/asymmetric-parser.h> +#include <keys/system_keyring.h> #include <crypto/hash.h> #include "asymmetric_keys.h" #include "public_key.h" #include "x509_parser.h" -static const -struct public_key_algorithm *x509_public_key_algorithms[PKEY_ALGO__LAST] = { - [PKEY_ALGO_DSA] = NULL, -#if defined(CONFIG_PUBLIC_KEY_ALGO_RSA) || \ - defined(CONFIG_PUBLIC_KEY_ALGO_RSA_MODULE) - [PKEY_ALGO_RSA] = &RSA_public_key_algorithm, -#endif -}; +/* + * Find a key in the given keyring by issuer and authority. + */ +static struct key *x509_request_asymmetric_key( + struct key *keyring, + const char *signer, size_t signer_len, + const char *authority, size_t auth_len) +{ + key_ref_t key; + char *id; + + /* Construct an identifier. */ + id = kmalloc(signer_len + 2 + auth_len + 1, GFP_KERNEL); + if (!id) + return ERR_PTR(-ENOMEM); + + memcpy(id, signer, signer_len); + id[signer_len + 0] = ':'; + id[signer_len + 1] = ' '; + memcpy(id + signer_len + 2, authority, auth_len); + id[signer_len + 2 + auth_len] = 0; + + pr_debug("Look up: \"%s\"\n", id); + + key = keyring_search(make_key_ref(keyring, 1), + &key_type_asymmetric, id); + if (IS_ERR(key)) + pr_debug("Request for module key '%s' err %ld\n", + id, PTR_ERR(key)); + kfree(id); + + if (IS_ERR(key)) { + switch (PTR_ERR(key)) { + /* Hide some search errors */ + case -EACCES: + case -ENOTDIR: + case -EAGAIN: + return ERR_PTR(-ENOKEY); + default: + return ERR_CAST(key); + } + } + + pr_devel("<==%s() = 0 [%x]\n", __func__, key_serial(key_ref_to_ptr(key))); + return key_ref_to_ptr(key); +} /* - * Check the signature on a certificate using the provided public key + * Set up the signature parameters in an X.509 certificate. This involves + * digesting the signed data and extracting the signature. */ -static int x509_check_signature(const struct public_key *pub, - const struct x509_certificate *cert) +int x509_get_sig_params(struct x509_certificate *cert) { - struct public_key_signature *sig; struct crypto_shash *tfm; struct shash_desc *desc; size_t digest_size, desc_size; + void *digest; int ret; pr_devel("==>%s()\n", __func__); - + + if (cert->sig.rsa.s) + return 0; + + cert->sig.rsa.s = mpi_read_raw_data(cert->raw_sig, cert->raw_sig_size); + if (!cert->sig.rsa.s) + return -ENOMEM; + cert->sig.nr_mpi = 1; + /* Allocate the hashing algorithm we're going to need and find out how * big the hash operational data will be. */ - tfm = crypto_alloc_shash(pkey_hash_algo[cert->sig_hash_algo], 0, 0); + tfm = crypto_alloc_shash(hash_algo_name[cert->sig.pkey_hash_algo], 0, 0); if (IS_ERR(tfm)) return (PTR_ERR(tfm) == -ENOENT) ? -ENOPKG : PTR_ERR(tfm); desc_size = crypto_shash_descsize(tfm) + sizeof(*desc); digest_size = crypto_shash_digestsize(tfm); - /* We allocate the hash operational data storage on the end of our - * context data. + /* We allocate the hash operational data storage on the end of the + * digest storage space. */ ret = -ENOMEM; - sig = kzalloc(sizeof(*sig) + desc_size + digest_size, GFP_KERNEL); - if (!sig) - goto error_no_sig; + digest = kzalloc(digest_size + desc_size, GFP_KERNEL); + if (!digest) + goto error; - sig->pkey_hash_algo = cert->sig_hash_algo; - sig->digest = (u8 *)sig + sizeof(*sig) + desc_size; - sig->digest_size = digest_size; + cert->sig.digest = digest; + cert->sig.digest_size = digest_size; - desc = (void *)sig + sizeof(*sig); - desc->tfm = tfm; - desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP; + desc = digest + digest_size; + desc->tfm = tfm; + desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP; ret = crypto_shash_init(desc); if (ret < 0) goto error; + might_sleep(); + ret = crypto_shash_finup(desc, cert->tbs, cert->tbs_size, digest); +error: + crypto_free_shash(tfm); + pr_devel("<==%s() = %d\n", __func__, ret); + return ret; +} +EXPORT_SYMBOL_GPL(x509_get_sig_params); - ret = -ENOMEM; - sig->rsa.s = mpi_read_raw_data(cert->sig, cert->sig_size); - if (!sig->rsa.s) - goto error; +/* + * Check the signature on a certificate using the provided public key + */ +int x509_check_signature(const struct public_key *pub, + struct x509_certificate *cert) +{ + int ret; - ret = crypto_shash_finup(desc, cert->tbs, cert->tbs_size, sig->digest); - if (ret < 0) - goto error_mpi; + pr_devel("==>%s()\n", __func__); - ret = pub->algo->verify_signature(pub, sig); + ret = x509_get_sig_params(cert); + if (ret < 0) + return ret; + ret = public_key_verify_signature(pub, &cert->sig); pr_debug("Cert Verification: %d\n", ret); + return ret; +} +EXPORT_SYMBOL_GPL(x509_check_signature); -error_mpi: - mpi_free(sig->rsa.s); -error: - kfree(sig); -error_no_sig: - crypto_free_shash(tfm); +/* + * Check the new certificate against the ones in the trust keyring. If one of + * those is the signing key and validates the new certificate, then mark the + * new certificate as being trusted. + * + * Return 0 if the new certificate was successfully validated, 1 if we couldn't + * find a matching parent certificate in the trusted list and an error if there + * is a matching certificate but the signature check fails. + */ +static int x509_validate_trust(struct x509_certificate *cert, + struct key *trust_keyring) +{ + const struct public_key *pk; + struct key *key; + int ret = 1; - pr_devel("<==%s() = %d\n", __func__, ret); + key = x509_request_asymmetric_key(trust_keyring, + cert->issuer, strlen(cert->issuer), + cert->authority, + strlen(cert->authority)); + if (!IS_ERR(key)) { + pk = key->payload.data; + ret = x509_check_signature(pk, cert); + } return ret; } @@ -106,7 +183,6 @@ error_no_sig: static int x509_key_preparse(struct key_preparsed_payload *prep) { struct x509_certificate *cert; - struct tm now; size_t srlen, sulen; char *desc = NULL; int ret; @@ -117,7 +193,18 @@ static int x509_key_preparse(struct key_preparsed_payload *prep) pr_devel("Cert Issuer: %s\n", cert->issuer); pr_devel("Cert Subject: %s\n", cert->subject); - pr_devel("Cert Key Algo: %s\n", pkey_algo[cert->pkey_algo]); + + if (cert->pub->pkey_algo >= PKEY_ALGO__LAST || + cert->sig.pkey_algo >= PKEY_ALGO__LAST || + cert->sig.pkey_hash_algo >= PKEY_HASH__LAST || + !pkey_algo[cert->pub->pkey_algo] || + !pkey_algo[cert->sig.pkey_algo] || + !hash_algo_name[cert->sig.pkey_hash_algo]) { + ret = -ENOPKG; + goto error_free_cert; + } + + pr_devel("Cert Key Algo: %s\n", pkey_algo_name[cert->pub->pkey_algo]); pr_devel("Cert Valid From: %04ld-%02d-%02d %02d:%02d:%02d\n", cert->valid_from.tm_year + 1900, cert->valid_from.tm_mon + 1, cert->valid_from.tm_mday, cert->valid_from.tm_hour, @@ -127,61 +214,29 @@ static int x509_key_preparse(struct key_preparsed_payload *prep) cert->valid_to.tm_mday, cert->valid_to.tm_hour, cert->valid_to.tm_min, cert->valid_to.tm_sec); pr_devel("Cert Signature: %s + %s\n", - pkey_algo[cert->sig_pkey_algo], - pkey_hash_algo[cert->sig_hash_algo]); + pkey_algo_name[cert->sig.pkey_algo], + hash_algo_name[cert->sig.pkey_hash_algo]); - if (!cert->fingerprint || !cert->authority) { - pr_warn("Cert for '%s' must have SubjKeyId and AuthKeyId extensions\n", + if (!cert->fingerprint) { + pr_warn("Cert for '%s' must have a SubjKeyId extension\n", cert->subject); ret = -EKEYREJECTED; goto error_free_cert; } - time_to_tm(CURRENT_TIME.tv_sec, 0, &now); - pr_devel("Now: %04ld-%02d-%02d %02d:%02d:%02d\n", - now.tm_year + 1900, now.tm_mon + 1, now.tm_mday, - now.tm_hour, now.tm_min, now.tm_sec); - if (now.tm_year < cert->valid_from.tm_year || - (now.tm_year == cert->valid_from.tm_year && - (now.tm_mon < cert->valid_from.tm_mon || - (now.tm_mon == cert->valid_from.tm_mon && - (now.tm_mday < cert->valid_from.tm_mday || - (now.tm_mday == cert->valid_from.tm_mday && - (now.tm_hour < cert->valid_from.tm_hour || - (now.tm_hour == cert->valid_from.tm_hour && - (now.tm_min < cert->valid_from.tm_min || - (now.tm_min == cert->valid_from.tm_min && - (now.tm_sec < cert->valid_from.tm_sec - ))))))))))) { - pr_warn("Cert %s is not yet valid\n", cert->fingerprint); - ret = -EKEYREJECTED; - goto error_free_cert; - } - if (now.tm_year > cert->valid_to.tm_year || - (now.tm_year == cert->valid_to.tm_year && - (now.tm_mon > cert->valid_to.tm_mon || - (now.tm_mon == cert->valid_to.tm_mon && - (now.tm_mday > cert->valid_to.tm_mday || - (now.tm_mday == cert->valid_to.tm_mday && - (now.tm_hour > cert->valid_to.tm_hour || - (now.tm_hour == cert->valid_to.tm_hour && - (now.tm_min > cert->valid_to.tm_min || - (now.tm_min == cert->valid_to.tm_min && - (now.tm_sec > cert->valid_to.tm_sec - ))))))))))) { - pr_warn("Cert %s has expired\n", cert->fingerprint); - ret = -EKEYEXPIRED; - goto error_free_cert; - } - - cert->pub->algo = x509_public_key_algorithms[cert->pkey_algo]; + cert->pub->algo = pkey_algo[cert->pub->pkey_algo]; cert->pub->id_type = PKEY_ID_X509; - /* Check the signature on the key */ - if (strcmp(cert->fingerprint, cert->authority) == 0) { - ret = x509_check_signature(cert->pub, cert); + /* Check the signature on the key if it appears to be self-signed */ + if (!cert->authority || + strcmp(cert->fingerprint, cert->authority) == 0) { + ret = x509_check_signature(cert->pub, cert); /* self-signed */ if (ret < 0) goto error_free_cert; + } else { + ret = x509_validate_trust(cert, system_trusted_keyring); + if (!ret) + prep->trusted = 1; } /* Propose a description */ @@ -237,3 +292,6 @@ static void __exit x509_key_exit(void) module_init(x509_key_init); module_exit(x509_key_exit); + +MODULE_DESCRIPTION("X.509 certificate parser"); +MODULE_LICENSE("GPL"); diff --git a/crypto/hash_info.c b/crypto/hash_info.c new file mode 100644 index 0000000..3e7ff46 --- /dev/null +++ b/crypto/hash_info.c @@ -0,0 +1,56 @@ +/* + * Hash Info: Hash algorithms information + * + * Copyright (c) 2013 Dmitry Kasatkin <d.kasatkin@samsung.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ + +#include <linux/export.h> +#include <crypto/hash_info.h> + +const char *const hash_algo_name[HASH_ALGO__LAST] = { + [HASH_ALGO_MD4] = "md4", + [HASH_ALGO_MD5] = "md5", + [HASH_ALGO_SHA1] = "sha1", + [HASH_ALGO_RIPE_MD_160] = "rmd160", + [HASH_ALGO_SHA256] = "sha256", + [HASH_ALGO_SHA384] = "sha384", + [HASH_ALGO_SHA512] = "sha512", + [HASH_ALGO_SHA224] = "sha224", + [HASH_ALGO_RIPE_MD_128] = "rmd128", + [HASH_ALGO_RIPE_MD_256] = "rmd256", + [HASH_ALGO_RIPE_MD_320] = "rmd320", + [HASH_ALGO_WP_256] = "wp256", + [HASH_ALGO_WP_384] = "wp384", + [HASH_ALGO_WP_512] = "wp512", + [HASH_ALGO_TGR_128] = "tgr128", + [HASH_ALGO_TGR_160] = "tgr160", + [HASH_ALGO_TGR_192] = "tgr192", +}; +EXPORT_SYMBOL_GPL(hash_algo_name); + +const int hash_digest_size[HASH_ALGO__LAST] = { + [HASH_ALGO_MD4] = MD5_DIGEST_SIZE, + [HASH_ALGO_MD5] = MD5_DIGEST_SIZE, + [HASH_ALGO_SHA1] = SHA1_DIGEST_SIZE, + [HASH_ALGO_RIPE_MD_160] = RMD160_DIGEST_SIZE, + [HASH_ALGO_SHA256] = SHA256_DIGEST_SIZE, + [HASH_ALGO_SHA384] = SHA384_DIGEST_SIZE, + [HASH_ALGO_SHA512] = SHA512_DIGEST_SIZE, + [HASH_ALGO_SHA224] = SHA224_DIGEST_SIZE, + [HASH_ALGO_RIPE_MD_128] = RMD128_DIGEST_SIZE, + [HASH_ALGO_RIPE_MD_256] = RMD256_DIGEST_SIZE, + [HASH_ALGO_RIPE_MD_320] = RMD320_DIGEST_SIZE, + [HASH_ALGO_WP_256] = WP256_DIGEST_SIZE, + [HASH_ALGO_WP_384] = WP384_DIGEST_SIZE, + [HASH_ALGO_WP_512] = WP512_DIGEST_SIZE, + [HASH_ALGO_TGR_128] = TGR128_DIGEST_SIZE, + [HASH_ALGO_TGR_160] = TGR160_DIGEST_SIZE, + [HASH_ALGO_TGR_192] = TGR192_DIGEST_SIZE, +}; +EXPORT_SYMBOL_GPL(hash_digest_size); diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index b5d8423..ea192ec 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -223,7 +223,7 @@ static void null_softirq_done_fn(struct request *rq) blk_end_request_all(rq, 0); } -#if defined(CONFIG_SMP) && defined(CONFIG_USE_GENERIC_SMP_HELPERS) +#ifdef CONFIG_SMP static void null_ipi_cmd_end_io(void *data) { @@ -260,7 +260,7 @@ static void null_cmd_end_ipi(struct nullb_cmd *cmd) put_cpu(); } -#endif /* CONFIG_SMP && CONFIG_USE_GENERIC_SMP_HELPERS */ +#endif /* CONFIG_SMP */ static inline void null_handle_cmd(struct nullb_cmd *cmd) { @@ -270,7 +270,7 @@ static inline void null_handle_cmd(struct nullb_cmd *cmd) end_cmd(cmd); break; case NULL_IRQ_SOFTIRQ: -#if defined(CONFIG_SMP) && defined(CONFIG_USE_GENERIC_SMP_HELPERS) +#ifdef CONFIG_SMP null_cmd_end_ipi(cmd); #else end_cmd(cmd); @@ -571,7 +571,7 @@ static int __init null_init(void) { unsigned int i; -#if !defined(CONFIG_SMP) || !defined(CONFIG_USE_GENERIC_SMP_HELPERS) +#if !defined(CONFIG_SMP) if (irqmode == NULL_IRQ_SOFTIRQ) { pr_warn("null_blk: softirq completions not available.\n"); pr_warn("null_blk: using direct completions.\n"); diff --git a/drivers/char/tpm/Kconfig b/drivers/char/tpm/Kconfig index 94c0c74..1a65838 100644 --- a/drivers/char/tpm/Kconfig +++ b/drivers/char/tpm/Kconfig @@ -33,6 +33,15 @@ config TCG_TIS from within Linux. To compile this driver as a module, choose M here; the module will be called tpm_tis. +config TCG_TIS_I2C_ATMEL + tristate "TPM Interface Specification 1.2 Interface (I2C - Atmel)" + depends on I2C + ---help--- + If you have an Atmel I2C TPM security chip say Yes and it will be + accessible from within Linux. + To compile this driver as a module, choose M here; the module will + be called tpm_tis_i2c_atmel. + config TCG_TIS_I2C_INFINEON tristate "TPM Interface Specification 1.2 Interface (I2C - Infineon)" depends on I2C @@ -42,7 +51,17 @@ config TCG_TIS_I2C_INFINEON Specification 0.20 say Yes and it will be accessible from within Linux. To compile this driver as a module, choose M here; the module - will be called tpm_tis_i2c_infineon. + will be called tpm_i2c_infineon. + +config TCG_TIS_I2C_NUVOTON + tristate "TPM Interface Specification 1.2 Interface (I2C - Nuvoton)" + depends on I2C + ---help--- + If you have a TPM security chip with an I2C interface from + Nuvoton Technology Corp. say Yes and it will be accessible + from within Linux. + To compile this driver as a module, choose M here; the module + will be called tpm_i2c_nuvoton. config TCG_NSC tristate "National Semiconductor TPM Interface" @@ -82,14 +101,14 @@ config TCG_IBMVTPM as a module, choose M here; the module will be called tpm_ibmvtpm. config TCG_ST33_I2C - tristate "STMicroelectronics ST33 I2C TPM" - depends on I2C - depends on GPIOLIB - ---help--- - If you have a TPM security chip from STMicroelectronics working with - an I2C bus say Yes and it will be accessible from within Linux. - To compile this driver as a module, choose M here; the module will be - called tpm_stm_st33_i2c. + tristate "STMicroelectronics ST33 I2C TPM" + depends on I2C + depends on GPIOLIB + ---help--- + If you have a TPM security chip from STMicroelectronics working with + an I2C bus say Yes and it will be accessible from within Linux. + To compile this driver as a module, choose M here; the module will be + called tpm_stm_st33_i2c. config TCG_XEN tristate "XEN TPM Interface" diff --git a/drivers/char/tpm/Makefile b/drivers/char/tpm/Makefile index eb41ff9..b80a400 100644 --- a/drivers/char/tpm/Makefile +++ b/drivers/char/tpm/Makefile @@ -2,17 +2,20 @@ # Makefile for the kernel tpm device drivers. # obj-$(CONFIG_TCG_TPM) += tpm.o +tpm-y := tpm-interface.o +tpm-$(CONFIG_ACPI) += tpm_ppi.o + ifdef CONFIG_ACPI - obj-$(CONFIG_TCG_TPM) += tpm_bios.o - tpm_bios-objs += tpm_eventlog.o tpm_acpi.o tpm_ppi.o + tpm-y += tpm_eventlog.o tpm_acpi.o else ifdef CONFIG_TCG_IBMVTPM - obj-$(CONFIG_TCG_TPM) += tpm_bios.o - tpm_bios-objs += tpm_eventlog.o tpm_of.o + tpm-y += tpm_eventlog.o tpm_of.o endif endif obj-$(CONFIG_TCG_TIS) += tpm_tis.o +obj-$(CONFIG_TCG_TIS_I2C_ATMEL) += tpm_i2c_atmel.o obj-$(CONFIG_TCG_TIS_I2C_INFINEON) += tpm_i2c_infineon.o +obj-$(CONFIG_TCG_TIS_I2C_NUVOTON) += tpm_i2c_nuvoton.o obj-$(CONFIG_TCG_NSC) += tpm_nsc.o obj-$(CONFIG_TCG_ATMEL) += tpm_atmel.o obj-$(CONFIG_TCG_INFINEON) += tpm_infineon.o diff --git a/drivers/char/tpm/tpm.c b/drivers/char/tpm/tpm-interface.c index e3c974a..6ae41d3 100644 --- a/drivers/char/tpm/tpm.c +++ b/drivers/char/tpm/tpm-interface.c @@ -10,13 +10,13 @@ * Maintained by: <tpmdd-devel@lists.sourceforge.net> * * Device driver for TCG/TCPA TPM (trusted platform module). - * Specifications at www.trustedcomputinggroup.org + * Specifications at www.trustedcomputinggroup.org * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as * published by the Free Software Foundation, version 2 of the * License. - * + * * Note, the TPM chip is not interrupt driven (only polling) * and can have very long timeouts (minutes!). Hence the unusual * calls to msleep. @@ -371,13 +371,14 @@ static ssize_t tpm_transmit(struct tpm_chip *chip, const char *buf, return -ENODATA; if (count > bufsiz) { dev_err(chip->dev, - "invalid count value %x %zx \n", count, bufsiz); + "invalid count value %x %zx\n", count, bufsiz); return -E2BIG; } mutex_lock(&chip->tpm_mutex); - if ((rc = chip->vendor.send(chip, (u8 *) buf, count)) < 0) { + rc = chip->vendor.send(chip, (u8 *) buf, count); + if (rc < 0) { dev_err(chip->dev, "tpm_transmit: tpm_send: error %zd\n", rc); goto out; @@ -444,7 +445,7 @@ static ssize_t transmit_cmd(struct tpm_chip *chip, struct tpm_cmd_t *cmd, { int err; - len = tpm_transmit(chip,(u8 *) cmd, len); + len = tpm_transmit(chip, (u8 *) cmd, len); if (len < 0) return len; else if (len < TPM_HEADER_SIZE) @@ -658,7 +659,7 @@ static int tpm_continue_selftest(struct tpm_chip *chip) return rc; } -ssize_t tpm_show_enabled(struct device * dev, struct device_attribute * attr, +ssize_t tpm_show_enabled(struct device *dev, struct device_attribute *attr, char *buf) { cap_t cap; @@ -674,7 +675,7 @@ ssize_t tpm_show_enabled(struct device * dev, struct device_attribute * attr, } EXPORT_SYMBOL_GPL(tpm_show_enabled); -ssize_t tpm_show_active(struct device * dev, struct device_attribute * attr, +ssize_t tpm_show_active(struct device *dev, struct device_attribute *attr, char *buf) { cap_t cap; @@ -690,7 +691,7 @@ ssize_t tpm_show_active(struct device * dev, struct device_attribute * attr, } EXPORT_SYMBOL_GPL(tpm_show_active); -ssize_t tpm_show_owned(struct device * dev, struct device_attribute * attr, +ssize_t tpm_show_owned(struct device *dev, struct device_attribute *attr, char *buf) { cap_t cap; @@ -706,8 +707,8 @@ ssize_t tpm_show_owned(struct device * dev, struct device_attribute * attr, } EXPORT_SYMBOL_GPL(tpm_show_owned); -ssize_t tpm_show_temp_deactivated(struct device * dev, - struct device_attribute * attr, char *buf) +ssize_t tpm_show_temp_deactivated(struct device *dev, + struct device_attribute *attr, char *buf) { cap_t cap; ssize_t rc; @@ -769,10 +770,10 @@ static int __tpm_pcr_read(struct tpm_chip *chip, int pcr_idx, u8 *res_buf) /** * tpm_pcr_read - read a pcr value - * @chip_num: tpm idx # or ANY + * @chip_num: tpm idx # or ANY * @pcr_idx: pcr idx to retrieve - * @res_buf: TPM_PCR value - * size of res_buf is 20 bytes (or NULL if you don't care) + * @res_buf: TPM_PCR value + * size of res_buf is 20 bytes (or NULL if you don't care) * * The TPM driver should be built-in, but for whatever reason it * isn't, protect against the chip disappearing, by incrementing @@ -794,9 +795,9 @@ EXPORT_SYMBOL_GPL(tpm_pcr_read); /** * tpm_pcr_extend - extend pcr value with hash - * @chip_num: tpm idx # or AN& + * @chip_num: tpm idx # or AN& * @pcr_idx: pcr idx to extend - * @hash: hash value used to extend pcr value + * @hash: hash value used to extend pcr value * * The TPM driver should be built-in, but for whatever reason it * isn't, protect against the chip disappearing, by incrementing @@ -847,8 +848,7 @@ int tpm_do_selftest(struct tpm_chip *chip) unsigned long duration; struct tpm_cmd_t cmd; - duration = tpm_calc_ordinal_duration(chip, - TPM_ORD_CONTINUE_SELFTEST); + duration = tpm_calc_ordinal_duration(chip, TPM_ORD_CONTINUE_SELFTEST); loops = jiffies_to_msecs(duration) / delay_msec; @@ -965,12 +965,12 @@ ssize_t tpm_show_pubek(struct device *dev, struct device_attribute *attr, if (err) goto out; - /* + /* ignore header 10 bytes algorithm 32 bits (1 == RSA ) encscheme 16 bits sigscheme 16 bits - parameters (RSA 12->bytes: keybit, #primes, expbit) + parameters (RSA 12->bytes: keybit, #primes, expbit) keylenbytes 32 bits 256 byte modulus ignore checksum 20 bytes @@ -1020,43 +1020,33 @@ ssize_t tpm_show_caps(struct device *dev, struct device_attribute *attr, str += sprintf(str, "Manufacturer: 0x%x\n", be32_to_cpu(cap.manufacturer_id)); - rc = tpm_getcap(dev, CAP_VERSION_1_1, &cap, - "attempting to determine the 1.1 version"); - if (rc) - return 0; - str += sprintf(str, - "TCG version: %d.%d\nFirmware version: %d.%d\n", - cap.tpm_version.Major, cap.tpm_version.Minor, - cap.tpm_version.revMajor, cap.tpm_version.revMinor); - return str - buf; -} -EXPORT_SYMBOL_GPL(tpm_show_caps); - -ssize_t tpm_show_caps_1_2(struct device * dev, - struct device_attribute * attr, char *buf) -{ - cap_t cap; - ssize_t rc; - char *str = buf; - - rc = tpm_getcap(dev, TPM_CAP_PROP_MANUFACTURER, &cap, - "attempting to determine the manufacturer"); - if (rc) - return 0; - str += sprintf(str, "Manufacturer: 0x%x\n", - be32_to_cpu(cap.manufacturer_id)); + /* Try to get a TPM version 1.2 TPM_CAP_VERSION_INFO */ rc = tpm_getcap(dev, CAP_VERSION_1_2, &cap, "attempting to determine the 1.2 version"); - if (rc) - return 0; - str += sprintf(str, - "TCG version: %d.%d\nFirmware version: %d.%d\n", - cap.tpm_version_1_2.Major, cap.tpm_version_1_2.Minor, - cap.tpm_version_1_2.revMajor, - cap.tpm_version_1_2.revMinor); + if (!rc) { + str += sprintf(str, + "TCG version: %d.%d\nFirmware version: %d.%d\n", + cap.tpm_version_1_2.Major, + cap.tpm_version_1_2.Minor, + cap.tpm_version_1_2.revMajor, + cap.tpm_version_1_2.revMinor); + } else { + /* Otherwise just use TPM_STRUCT_VER */ + rc = tpm_getcap(dev, CAP_VERSION_1_1, &cap, + "attempting to determine the 1.1 version"); + if (rc) + return 0; + str += sprintf(str, + "TCG version: %d.%d\nFirmware version: %d.%d\n", + cap.tpm_version.Major, + cap.tpm_version.Minor, + cap.tpm_version.revMajor, + cap.tpm_version.revMinor); + } + return str - buf; } -EXPORT_SYMBOL_GPL(tpm_show_caps_1_2); +EXPORT_SYMBOL_GPL(tpm_show_caps); ssize_t tpm_show_durations(struct device *dev, struct device_attribute *attr, char *buf) @@ -1102,8 +1092,8 @@ ssize_t tpm_store_cancel(struct device *dev, struct device_attribute *attr, } EXPORT_SYMBOL_GPL(tpm_store_cancel); -static bool wait_for_tpm_stat_cond(struct tpm_chip *chip, u8 mask, bool check_cancel, - bool *canceled) +static bool wait_for_tpm_stat_cond(struct tpm_chip *chip, u8 mask, + bool check_cancel, bool *canceled) { u8 status = chip->vendor.status(chip); @@ -1170,38 +1160,25 @@ EXPORT_SYMBOL_GPL(wait_for_tpm_stat); */ int tpm_open(struct inode *inode, struct file *file) { - int minor = iminor(inode); - struct tpm_chip *chip = NULL, *pos; - - rcu_read_lock(); - list_for_each_entry_rcu(pos, &tpm_chip_list, list) { - if (pos->vendor.miscdev.minor == minor) { - chip = pos; - get_device(chip->dev); - break; - } - } - rcu_read_unlock(); - - if (!chip) - return -ENODEV; + struct miscdevice *misc = file->private_data; + struct tpm_chip *chip = container_of(misc, struct tpm_chip, + vendor.miscdev); if (test_and_set_bit(0, &chip->is_open)) { dev_dbg(chip->dev, "Another process owns this TPM\n"); - put_device(chip->dev); return -EBUSY; } chip->data_buffer = kzalloc(TPM_BUFSIZE, GFP_KERNEL); if (chip->data_buffer == NULL) { clear_bit(0, &chip->is_open); - put_device(chip->dev); return -ENOMEM; } atomic_set(&chip->data_pending, 0); file->private_data = chip; + get_device(chip->dev); return 0; } EXPORT_SYMBOL_GPL(tpm_open); @@ -1463,7 +1440,6 @@ void tpm_dev_vendor_release(struct tpm_chip *chip) chip->vendor.release(chip->dev); clear_bit(chip->dev_num, dev_mask); - kfree(chip->vendor.miscdev.name); } EXPORT_SYMBOL_GPL(tpm_dev_vendor_release); @@ -1487,7 +1463,7 @@ void tpm_dev_release(struct device *dev) EXPORT_SYMBOL_GPL(tpm_dev_release); /* - * Called from tpm_<specific>.c probe function only for devices + * Called from tpm_<specific>.c probe function only for devices * the driver has determined it should claim. Prior to calling * this function the specific probe function has called pci_enable_device * upon errant exit from this function specific probe function should call @@ -1496,17 +1472,13 @@ EXPORT_SYMBOL_GPL(tpm_dev_release); struct tpm_chip *tpm_register_hardware(struct device *dev, const struct tpm_vendor_specific *entry) { -#define DEVNAME_SIZE 7 - - char *devname; struct tpm_chip *chip; /* Driver specific per-device data */ chip = kzalloc(sizeof(*chip), GFP_KERNEL); - devname = kmalloc(DEVNAME_SIZE, GFP_KERNEL); - if (chip == NULL || devname == NULL) - goto out_free; + if (chip == NULL) + return NULL; mutex_init(&chip->buffer_mutex); mutex_init(&chip->tpm_mutex); @@ -1531,8 +1503,9 @@ struct tpm_chip *tpm_register_hardware(struct device *dev, set_bit(chip->dev_num, dev_mask); - scnprintf(devname, DEVNAME_SIZE, "%s%d", "tpm", chip->dev_num); - chip->vendor.miscdev.name = devname; + scnprintf(chip->devname, sizeof(chip->devname), "%s%d", "tpm", + chip->dev_num); + chip->vendor.miscdev.name = chip->devname; chip->vendor.miscdev.parent = dev; chip->dev = get_device(dev); @@ -1558,7 +1531,7 @@ struct tpm_chip *tpm_register_hardware(struct device *dev, goto put_device; } - chip->bios_dir = tpm_bios_log_setup(devname); + chip->bios_dir = tpm_bios_log_setup(chip->devname); /* Make chip available */ spin_lock(&driver_lock); @@ -1571,7 +1544,6 @@ put_device: put_device(chip->dev); out_free: kfree(chip); - kfree(devname); return NULL; } EXPORT_SYMBOL_GPL(tpm_register_hardware); diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h index a7bfc17..f328478 100644 --- a/drivers/char/tpm/tpm.h +++ b/drivers/char/tpm/tpm.h @@ -59,8 +59,6 @@ extern ssize_t tpm_show_pcrs(struct device *, struct device_attribute *attr, char *); extern ssize_t tpm_show_caps(struct device *, struct device_attribute *attr, char *); -extern ssize_t tpm_show_caps_1_2(struct device *, struct device_attribute *attr, - char *); extern ssize_t tpm_store_cancel(struct device *, struct device_attribute *attr, const char *, size_t); extern ssize_t tpm_show_enabled(struct device *, struct device_attribute *attr, @@ -122,6 +120,7 @@ struct tpm_chip { struct device *dev; /* Device stuff */ int dev_num; /* /dev/tpm# */ + char devname[7]; unsigned long is_open; /* only one allowed */ int time_expired; diff --git a/drivers/char/tpm/tpm_atmel.c b/drivers/char/tpm/tpm_atmel.c index 99d6820..c9a528d 100644 --- a/drivers/char/tpm/tpm_atmel.c +++ b/drivers/char/tpm/tpm_atmel.c @@ -202,7 +202,7 @@ static int __init init_atmel(void) have_region = (atmel_request_region - (tpm_atmel.base, region_size, "tpm_atmel0") == NULL) ? 0 : 1; + (base, region_size, "tpm_atmel0") == NULL) ? 0 : 1; pdev = platform_device_register_simple("tpm_atmel", -1, NULL, 0); if (IS_ERR(pdev)) { diff --git a/drivers/char/tpm/tpm_eventlog.c b/drivers/char/tpm/tpm_eventlog.c index 84ddc55..59f7cb2 100644 --- a/drivers/char/tpm/tpm_eventlog.c +++ b/drivers/char/tpm/tpm_eventlog.c @@ -406,7 +406,6 @@ out_tpm: out: return NULL; } -EXPORT_SYMBOL_GPL(tpm_bios_log_setup); void tpm_bios_log_teardown(struct dentry **lst) { @@ -415,5 +414,3 @@ void tpm_bios_log_teardown(struct dentry **lst) for (i = 0; i < 3; i++) securityfs_remove(lst[i]); } -EXPORT_SYMBOL_GPL(tpm_bios_log_teardown); -MODULE_LICENSE("GPL"); diff --git a/drivers/char/tpm/tpm_i2c_atmel.c b/drivers/char/tpm/tpm_i2c_atmel.c new file mode 100644 index 0000000..c3cd7fe --- /dev/null +++ b/drivers/char/tpm/tpm_i2c_atmel.c @@ -0,0 +1,284 @@ +/* + * ATMEL I2C TPM AT97SC3204T + * + * Copyright (C) 2012 V Lab Technologies + * Teddy Reed <teddy@prosauce.org> + * Copyright (C) 2013, Obsidian Research Corp. + * Jason Gunthorpe <jgunthorpe@obsidianresearch.com> + * Device driver for ATMEL I2C TPMs. + * + * Teddy Reed determined the basic I2C command flow, unlike other I2C TPM + * devices the raw TCG formatted TPM command data is written via I2C and then + * raw TCG formatted TPM command data is returned via I2C. + * + * TGC status/locality/etc functions seen in the LPC implementation do not + * seem to be present. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see http://www.gnu.org/licenses/>. + */ +#include <linux/init.h> +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/slab.h> +#include <linux/i2c.h> +#include "tpm.h" + +#define I2C_DRIVER_NAME "tpm_i2c_atmel" + +#define TPM_I2C_SHORT_TIMEOUT 750 /* ms */ +#define TPM_I2C_LONG_TIMEOUT 2000 /* 2 sec */ + +#define ATMEL_STS_OK 1 + +struct priv_data { + size_t len; + /* This is the amount we read on the first try. 25 was chosen to fit a + * fair number of read responses in the buffer so a 2nd retry can be + * avoided in small message cases. */ + u8 buffer[sizeof(struct tpm_output_header) + 25]; +}; + +static int i2c_atmel_send(struct tpm_chip *chip, u8 *buf, size_t len) +{ + struct priv_data *priv = chip->vendor.priv; + struct i2c_client *client = to_i2c_client(chip->dev); + s32 status; + + priv->len = 0; + + if (len <= 2) + return -EIO; + + status = i2c_master_send(client, buf, len); + + dev_dbg(chip->dev, + "%s(buf=%*ph len=%0zx) -> sts=%d\n", __func__, + (int)min_t(size_t, 64, len), buf, len, status); + return status; +} + +static int i2c_atmel_recv(struct tpm_chip *chip, u8 *buf, size_t count) +{ + struct priv_data *priv = chip->vendor.priv; + struct i2c_client *client = to_i2c_client(chip->dev); + struct tpm_output_header *hdr = + (struct tpm_output_header *)priv->buffer; + u32 expected_len; + int rc; + + if (priv->len == 0) + return -EIO; + + /* Get the message size from the message header, if we didn't get the + * whole message in read_status then we need to re-read the + * message. */ + expected_len = be32_to_cpu(hdr->length); + if (expected_len > count) + return -ENOMEM; + + if (priv->len >= expected_len) { + dev_dbg(chip->dev, + "%s early(buf=%*ph count=%0zx) -> ret=%d\n", __func__, + (int)min_t(size_t, 64, expected_len), buf, count, + expected_len); + memcpy(buf, priv->buffer, expected_len); + return expected_len; + } + + rc = i2c_master_recv(client, buf, expected_len); + dev_dbg(chip->dev, + "%s reread(buf=%*ph count=%0zx) -> ret=%d\n", __func__, + (int)min_t(size_t, 64, expected_len), buf, count, + expected_len); + return rc; +} + +static void i2c_atmel_cancel(struct tpm_chip *chip) +{ + dev_err(chip->dev, "TPM operation cancellation was requested, but is not supported"); +} + +static u8 i2c_atmel_read_status(struct tpm_chip *chip) +{ + struct priv_data *priv = chip->vendor.priv; + struct i2c_client *client = to_i2c_client(chip->dev); + int rc; + + /* The TPM fails the I2C read until it is ready, so we do the entire + * transfer here and buffer it locally. This way the common code can + * properly handle the timeouts. */ + priv->len = 0; + memset(priv->buffer, 0, sizeof(priv->buffer)); + + + /* Once the TPM has completed the command the command remains readable + * until another command is issued. */ + rc = i2c_master_recv(client, priv->buffer, sizeof(priv->buffer)); + dev_dbg(chip->dev, + "%s: sts=%d", __func__, rc); + if (rc <= 0) + return 0; + + priv->len = rc; + + return ATMEL_STS_OK; +} + +static const struct file_operations i2c_atmel_ops = { + .owner = THIS_MODULE, + .llseek = no_llseek, + .open = tpm_open, + .read = tpm_read, + .write = tpm_write, + .release = tpm_release, +}; + +static DEVICE_ATTR(pubek, S_IRUGO, tpm_show_pubek, NULL); +static DEVICE_ATTR(pcrs, S_IRUGO, tpm_show_pcrs, NULL); +static DEVICE_ATTR(enabled, S_IRUGO, tpm_show_enabled, NULL); +static DEVICE_ATTR(active, S_IRUGO, tpm_show_active, NULL); +static DEVICE_ATTR(owned, S_IRUGO, tpm_show_owned, NULL); +static DEVICE_ATTR(temp_deactivated, S_IRUGO, tpm_show_temp_deactivated, NULL); +static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps, NULL); +static DEVICE_ATTR(cancel, S_IWUSR | S_IWGRP, NULL, tpm_store_cancel); +static DEVICE_ATTR(durations, S_IRUGO, tpm_show_durations, NULL); +static DEVICE_ATTR(timeouts, S_IRUGO, tpm_show_timeouts, NULL); + +static struct attribute *i2c_atmel_attrs[] = { + &dev_attr_pubek.attr, + &dev_attr_pcrs.attr, + &dev_attr_enabled.attr, + &dev_attr_active.attr, + &dev_attr_owned.attr, + &dev_attr_temp_deactivated.attr, + &dev_attr_caps.attr, + &dev_attr_cancel.attr, + &dev_attr_durations.attr, + &dev_attr_timeouts.attr, + NULL, +}; + +static struct attribute_group i2c_atmel_attr_grp = { + .attrs = i2c_atmel_attrs +}; + +static bool i2c_atmel_req_canceled(struct tpm_chip *chip, u8 status) +{ + return 0; +} + +static const struct tpm_vendor_specific i2c_atmel = { + .status = i2c_atmel_read_status, + .recv = i2c_atmel_recv, + .send = i2c_atmel_send, + .cancel = i2c_atmel_cancel, + .req_complete_mask = ATMEL_STS_OK, + .req_complete_val = ATMEL_STS_OK, + .req_canceled = i2c_atmel_req_canceled, + .attr_group = &i2c_atmel_attr_grp, + .miscdev.fops = &i2c_atmel_ops, +}; + +static int i2c_atmel_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + int rc; + struct tpm_chip *chip; + struct device *dev = &client->dev; + + if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) + return -ENODEV; + + chip = tpm_register_hardware(dev, &i2c_atmel); + if (!chip) { + dev_err(dev, "%s() error in tpm_register_hardware\n", __func__); + return -ENODEV; + } + + chip->vendor.priv = devm_kzalloc(dev, sizeof(struct priv_data), + GFP_KERNEL); + + /* Default timeouts */ + chip->vendor.timeout_a = msecs_to_jiffies(TPM_I2C_SHORT_TIMEOUT); + chip->vendor.timeout_b = msecs_to_jiffies(TPM_I2C_LONG_TIMEOUT); + chip->vendor.timeout_c = msecs_to_jiffies(TPM_I2C_SHORT_TIMEOUT); + chip->vendor.timeout_d = msecs_to_jiffies(TPM_I2C_SHORT_TIMEOUT); + chip->vendor.irq = 0; + + /* There is no known way to probe for this device, and all version + * information seems to be read via TPM commands. Thus we rely on the + * TPM startup process in the common code to detect the device. */ + if (tpm_get_timeouts(chip)) { + rc = -ENODEV; + goto out_err; + } + + if (tpm_do_selftest(chip)) { + rc = -ENODEV; + goto out_err; + } + + return 0; + +out_err: + tpm_dev_vendor_release(chip); + tpm_remove_hardware(chip->dev); + return rc; +} + +static int i2c_atmel_remove(struct i2c_client *client) +{ + struct device *dev = &(client->dev); + struct tpm_chip *chip = dev_get_drvdata(dev); + + if (chip) + tpm_dev_vendor_release(chip); + tpm_remove_hardware(dev); + kfree(chip); + return 0; +} + +static const struct i2c_device_id i2c_atmel_id[] = { + {I2C_DRIVER_NAME, 0}, + {} +}; +MODULE_DEVICE_TABLE(i2c, i2c_atmel_id); + +#ifdef CONFIG_OF +static const struct of_device_id i2c_atmel_of_match[] = { + {.compatible = "atmel,at97sc3204t"}, + {}, +}; +MODULE_DEVICE_TABLE(of, i2c_atmel_of_match); +#endif + +static SIMPLE_DEV_PM_OPS(i2c_atmel_pm_ops, tpm_pm_suspend, tpm_pm_resume); + +static struct i2c_driver i2c_atmel_driver = { + .id_table = i2c_atmel_id, + .probe = i2c_atmel_probe, + .remove = i2c_atmel_remove, + .driver = { + .name = I2C_DRIVER_NAME, + .owner = THIS_MODULE, + .pm = &i2c_atmel_pm_ops, + .of_match_table = of_match_ptr(i2c_atmel_of_match), + }, +}; + +module_i2c_driver(i2c_atmel_driver); + +MODULE_AUTHOR("Jason Gunthorpe <jgunthorpe@obsidianresearch.com>"); +MODULE_DESCRIPTION("Atmel TPM I2C Driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/char/tpm/tpm_i2c_infineon.c b/drivers/char/tpm/tpm_i2c_infineon.c index b8735de..fefd2aa 100644 --- a/drivers/char/tpm/tpm_i2c_infineon.c +++ b/drivers/char/tpm/tpm_i2c_infineon.c @@ -581,7 +581,7 @@ static DEVICE_ATTR(enabled, S_IRUGO, tpm_show_enabled, NULL); static DEVICE_ATTR(active, S_IRUGO, tpm_show_active, NULL); static DEVICE_ATTR(owned, S_IRUGO, tpm_show_owned, NULL); static DEVICE_ATTR(temp_deactivated, S_IRUGO, tpm_show_temp_deactivated, NULL); -static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps_1_2, NULL); +static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps, NULL); static DEVICE_ATTR(cancel, S_IWUSR | S_IWGRP, NULL, tpm_store_cancel); static DEVICE_ATTR(durations, S_IRUGO, tpm_show_durations, NULL); static DEVICE_ATTR(timeouts, S_IRUGO, tpm_show_timeouts, NULL); @@ -685,7 +685,6 @@ out_vendor: chip->dev->release = NULL; chip->release = NULL; tpm_dev.client = NULL; - dev_set_drvdata(chip->dev, chip); out_err: return rc; } @@ -766,7 +765,6 @@ static int tpm_tis_i2c_remove(struct i2c_client *client) chip->dev->release = NULL; chip->release = NULL; tpm_dev.client = NULL; - dev_set_drvdata(chip->dev, chip); return 0; } diff --git a/drivers/char/tpm/tpm_i2c_nuvoton.c b/drivers/char/tpm/tpm_i2c_nuvoton.c new file mode 100644 index 0000000..6276fea --- /dev/null +++ b/drivers/char/tpm/tpm_i2c_nuvoton.c @@ -0,0 +1,710 @@ +/****************************************************************************** + * Nuvoton TPM I2C Device Driver Interface for WPCT301/NPCT501, + * based on the TCG TPM Interface Spec version 1.2. + * Specifications at www.trustedcomputinggroup.org + * + * Copyright (C) 2011, Nuvoton Technology Corporation. + * Dan Morav <dan.morav@nuvoton.com> + * Copyright (C) 2013, Obsidian Research Corp. + * Jason Gunthorpe <jgunthorpe@obsidianresearch.com> + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see http://www.gnu.org/licenses/>. + * + * Nuvoton contact information: APC.Support@nuvoton.com + *****************************************************************************/ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/slab.h> +#include <linux/interrupt.h> +#include <linux/wait.h> +#include <linux/i2c.h> +#include "tpm.h" + +/* I2C interface offsets */ +#define TPM_STS 0x00 +#define TPM_BURST_COUNT 0x01 +#define TPM_DATA_FIFO_W 0x20 +#define TPM_DATA_FIFO_R 0x40 +#define TPM_VID_DID_RID 0x60 +/* TPM command header size */ +#define TPM_HEADER_SIZE 10 +#define TPM_RETRY 5 +/* + * I2C bus device maximum buffer size w/o counting I2C address or command + * i.e. max size required for I2C write is 34 = addr, command, 32 bytes data + */ +#define TPM_I2C_MAX_BUF_SIZE 32 +#define TPM_I2C_RETRY_COUNT 32 +#define TPM_I2C_BUS_DELAY 1 /* msec */ +#define TPM_I2C_RETRY_DELAY_SHORT 2 /* msec */ +#define TPM_I2C_RETRY_DELAY_LONG 10 /* msec */ + +#define I2C_DRIVER_NAME "tpm_i2c_nuvoton" + +struct priv_data { + unsigned int intrs; +}; + +static s32 i2c_nuvoton_read_buf(struct i2c_client *client, u8 offset, u8 size, + u8 *data) +{ + s32 status; + + status = i2c_smbus_read_i2c_block_data(client, offset, size, data); + dev_dbg(&client->dev, + "%s(offset=%u size=%u data=%*ph) -> sts=%d\n", __func__, + offset, size, (int)size, data, status); + return status; +} + +static s32 i2c_nuvoton_write_buf(struct i2c_client *client, u8 offset, u8 size, + u8 *data) +{ + s32 status; + + status = i2c_smbus_write_i2c_block_data(client, offset, size, data); + dev_dbg(&client->dev, + "%s(offset=%u size=%u data=%*ph) -> sts=%d\n", __func__, + offset, size, (int)size, data, status); + return status; +} + +#define TPM_STS_VALID 0x80 +#define TPM_STS_COMMAND_READY 0x40 +#define TPM_STS_GO 0x20 +#define TPM_STS_DATA_AVAIL 0x10 +#define TPM_STS_EXPECT 0x08 +#define TPM_STS_RESPONSE_RETRY 0x02 +#define TPM_STS_ERR_VAL 0x07 /* bit2...bit0 reads always 0 */ + +#define TPM_I2C_SHORT_TIMEOUT 750 /* ms */ +#define TPM_I2C_LONG_TIMEOUT 2000 /* 2 sec */ + +/* read TPM_STS register */ +static u8 i2c_nuvoton_read_status(struct tpm_chip *chip) +{ + struct i2c_client *client = to_i2c_client(chip->dev); + s32 status; + u8 data; + + status = i2c_nuvoton_read_buf(client, TPM_STS, 1, &data); + if (status <= 0) { + dev_err(chip->dev, "%s() error return %d\n", __func__, + status); + data = TPM_STS_ERR_VAL; + } + + return data; +} + +/* write byte to TPM_STS register */ +static s32 i2c_nuvoton_write_status(struct i2c_client *client, u8 data) +{ + s32 status; + int i; + + /* this causes the current command to be aborted */ + for (i = 0, status = -1; i < TPM_I2C_RETRY_COUNT && status < 0; i++) { + status = i2c_nuvoton_write_buf(client, TPM_STS, 1, &data); + msleep(TPM_I2C_BUS_DELAY); + } + return status; +} + +/* write commandReady to TPM_STS register */ +static void i2c_nuvoton_ready(struct tpm_chip *chip) +{ + struct i2c_client *client = to_i2c_client(chip->dev); + s32 status; + + /* this causes the current command to be aborted */ + status = i2c_nuvoton_write_status(client, TPM_STS_COMMAND_READY); + if (status < 0) + dev_err(chip->dev, + "%s() fail to write TPM_STS.commandReady\n", __func__); +} + +/* read burstCount field from TPM_STS register + * return -1 on fail to read */ +static int i2c_nuvoton_get_burstcount(struct i2c_client *client, + struct tpm_chip *chip) +{ + unsigned long stop = jiffies + chip->vendor.timeout_d; + s32 status; + int burst_count = -1; + u8 data; + + /* wait for burstcount to be non-zero */ + do { + /* in I2C burstCount is 1 byte */ + status = i2c_nuvoton_read_buf(client, TPM_BURST_COUNT, 1, + &data); + if (status > 0 && data > 0) { + burst_count = min_t(u8, TPM_I2C_MAX_BUF_SIZE, data); + break; + } + msleep(TPM_I2C_BUS_DELAY); + } while (time_before(jiffies, stop)); + + return burst_count; +} + +/* + * WPCT301/NPCT501 SINT# supports only dataAvail + * any call to this function which is not waiting for dataAvail will + * set queue to NULL to avoid waiting for interrupt + */ +static bool i2c_nuvoton_check_status(struct tpm_chip *chip, u8 mask, u8 value) +{ + u8 status = i2c_nuvoton_read_status(chip); + return (status != TPM_STS_ERR_VAL) && ((status & mask) == value); +} + +static int i2c_nuvoton_wait_for_stat(struct tpm_chip *chip, u8 mask, u8 value, + u32 timeout, wait_queue_head_t *queue) +{ + if (chip->vendor.irq && queue) { + s32 rc; + DEFINE_WAIT(wait); + struct priv_data *priv = chip->vendor.priv; + unsigned int cur_intrs = priv->intrs; + + enable_irq(chip->vendor.irq); + rc = wait_event_interruptible_timeout(*queue, + cur_intrs != priv->intrs, + timeout); + if (rc > 0) + return 0; + /* At this point we know that the SINT pin is asserted, so we + * do not need to do i2c_nuvoton_check_status */ + } else { + unsigned long ten_msec, stop; + bool status_valid; + + /* check current status */ + status_valid = i2c_nuvoton_check_status(chip, mask, value); + if (status_valid) + return 0; + + /* use polling to wait for the event */ + ten_msec = jiffies + msecs_to_jiffies(TPM_I2C_RETRY_DELAY_LONG); + stop = jiffies + timeout; + do { + if (time_before(jiffies, ten_msec)) + msleep(TPM_I2C_RETRY_DELAY_SHORT); + else + msleep(TPM_I2C_RETRY_DELAY_LONG); + status_valid = i2c_nuvoton_check_status(chip, mask, + value); + if (status_valid) + return 0; + } while (time_before(jiffies, stop)); + } + dev_err(chip->dev, "%s(%02x, %02x) -> timeout\n", __func__, mask, + value); + return -ETIMEDOUT; +} + +/* wait for dataAvail field to be set in the TPM_STS register */ +static int i2c_nuvoton_wait_for_data_avail(struct tpm_chip *chip, u32 timeout, + wait_queue_head_t *queue) +{ + return i2c_nuvoton_wait_for_stat(chip, + TPM_STS_DATA_AVAIL | TPM_STS_VALID, + TPM_STS_DATA_AVAIL | TPM_STS_VALID, + timeout, queue); +} + +/* Read @count bytes into @buf from TPM_RD_FIFO register */ +static int i2c_nuvoton_recv_data(struct i2c_client *client, + struct tpm_chip *chip, u8 *buf, size_t count) +{ + s32 rc; + int burst_count, bytes2read, size = 0; + + while (size < count && + i2c_nuvoton_wait_for_data_avail(chip, + chip->vendor.timeout_c, + &chip->vendor.read_queue) == 0) { + burst_count = i2c_nuvoton_get_burstcount(client, chip); + if (burst_count < 0) { + dev_err(chip->dev, + "%s() fail to read burstCount=%d\n", __func__, + burst_count); + return -EIO; + } + bytes2read = min_t(size_t, burst_count, count - size); + rc = i2c_nuvoton_read_buf(client, TPM_DATA_FIFO_R, + bytes2read, &buf[size]); + if (rc < 0) { + dev_err(chip->dev, + "%s() fail on i2c_nuvoton_read_buf()=%d\n", + __func__, rc); + return -EIO; + } + dev_dbg(chip->dev, "%s(%d):", __func__, bytes2read); + size += bytes2read; + } + + return size; +} + +/* Read TPM command results */ +static int i2c_nuvoton_recv(struct tpm_chip *chip, u8 *buf, size_t count) +{ + struct device *dev = chip->dev; + struct i2c_client *client = to_i2c_client(dev); + s32 rc; + int expected, status, burst_count, retries, size = 0; + + if (count < TPM_HEADER_SIZE) { + i2c_nuvoton_ready(chip); /* return to idle */ + dev_err(dev, "%s() count < header size\n", __func__); + return -EIO; + } + for (retries = 0; retries < TPM_RETRY; retries++) { + if (retries > 0) { + /* if this is not the first trial, set responseRetry */ + i2c_nuvoton_write_status(client, + TPM_STS_RESPONSE_RETRY); + } + /* + * read first available (> 10 bytes), including: + * tag, paramsize, and result + */ + status = i2c_nuvoton_wait_for_data_avail( + chip, chip->vendor.timeout_c, &chip->vendor.read_queue); + if (status != 0) { + dev_err(dev, "%s() timeout on dataAvail\n", __func__); + size = -ETIMEDOUT; + continue; + } + burst_count = i2c_nuvoton_get_burstcount(client, chip); + if (burst_count < 0) { + dev_err(dev, "%s() fail to get burstCount\n", __func__); + size = -EIO; + continue; + } + size = i2c_nuvoton_recv_data(client, chip, buf, + burst_count); + if (size < TPM_HEADER_SIZE) { + dev_err(dev, "%s() fail to read header\n", __func__); + size = -EIO; + continue; + } + /* + * convert number of expected bytes field from big endian 32 bit + * to machine native + */ + expected = be32_to_cpu(*(__be32 *) (buf + 2)); + if (expected > count) { + dev_err(dev, "%s() expected > count\n", __func__); + size = -EIO; + continue; + } + rc = i2c_nuvoton_recv_data(client, chip, &buf[size], + expected - size); + size += rc; + if (rc < 0 || size < expected) { + dev_err(dev, "%s() fail to read remainder of result\n", + __func__); + size = -EIO; + continue; + } + if (i2c_nuvoton_wait_for_stat( + chip, TPM_STS_VALID | TPM_STS_DATA_AVAIL, + TPM_STS_VALID, chip->vendor.timeout_c, + NULL)) { + dev_err(dev, "%s() error left over data\n", __func__); + size = -ETIMEDOUT; + continue; + } + break; + } + i2c_nuvoton_ready(chip); + dev_dbg(chip->dev, "%s() -> %d\n", __func__, size); + return size; +} + +/* + * Send TPM command. + * + * If interrupts are used (signaled by an irq set in the vendor structure) + * tpm.c can skip polling for the data to be available as the interrupt is + * waited for here + */ +static int i2c_nuvoton_send(struct tpm_chip *chip, u8 *buf, size_t len) +{ + struct device *dev = chip->dev; + struct i2c_client *client = to_i2c_client(dev); + u32 ordinal; + size_t count = 0; + int burst_count, bytes2write, retries, rc = -EIO; + + for (retries = 0; retries < TPM_RETRY; retries++) { + i2c_nuvoton_ready(chip); + if (i2c_nuvoton_wait_for_stat(chip, TPM_STS_COMMAND_READY, + TPM_STS_COMMAND_READY, + chip->vendor.timeout_b, NULL)) { + dev_err(dev, "%s() timeout on commandReady\n", + __func__); + rc = -EIO; + continue; + } + rc = 0; + while (count < len - 1) { + burst_count = i2c_nuvoton_get_burstcount(client, + chip); + if (burst_count < 0) { + dev_err(dev, "%s() fail get burstCount\n", + __func__); + rc = -EIO; + break; + } + bytes2write = min_t(size_t, burst_count, + len - 1 - count); + rc = i2c_nuvoton_write_buf(client, TPM_DATA_FIFO_W, + bytes2write, &buf[count]); + if (rc < 0) { + dev_err(dev, "%s() fail i2cWriteBuf\n", + __func__); + break; + } + dev_dbg(dev, "%s(%d):", __func__, bytes2write); + count += bytes2write; + rc = i2c_nuvoton_wait_for_stat(chip, + TPM_STS_VALID | + TPM_STS_EXPECT, + TPM_STS_VALID | + TPM_STS_EXPECT, + chip->vendor.timeout_c, + NULL); + if (rc < 0) { + dev_err(dev, "%s() timeout on Expect\n", + __func__); + rc = -ETIMEDOUT; + break; + } + } + if (rc < 0) + continue; + + /* write last byte */ + rc = i2c_nuvoton_write_buf(client, TPM_DATA_FIFO_W, 1, + &buf[count]); + if (rc < 0) { + dev_err(dev, "%s() fail to write last byte\n", + __func__); + rc = -EIO; + continue; + } + dev_dbg(dev, "%s(last): %02x", __func__, buf[count]); + rc = i2c_nuvoton_wait_for_stat(chip, + TPM_STS_VALID | TPM_STS_EXPECT, + TPM_STS_VALID, + chip->vendor.timeout_c, NULL); + if (rc) { + dev_err(dev, "%s() timeout on Expect to clear\n", + __func__); + rc = -ETIMEDOUT; + continue; + } + break; + } + if (rc < 0) { + /* retries == TPM_RETRY */ + i2c_nuvoton_ready(chip); + return rc; + } + /* execute the TPM command */ + rc = i2c_nuvoton_write_status(client, TPM_STS_GO); + if (rc < 0) { + dev_err(dev, "%s() fail to write Go\n", __func__); + i2c_nuvoton_ready(chip); + return rc; + } + ordinal = be32_to_cpu(*((__be32 *) (buf + 6))); + rc = i2c_nuvoton_wait_for_data_avail(chip, + tpm_calc_ordinal_duration(chip, + ordinal), + &chip->vendor.read_queue); + if (rc) { + dev_err(dev, "%s() timeout command duration\n", __func__); + i2c_nuvoton_ready(chip); + return rc; + } + + dev_dbg(dev, "%s() -> %zd\n", __func__, len); + return len; +} + +static bool i2c_nuvoton_req_canceled(struct tpm_chip *chip, u8 status) +{ + return (status == TPM_STS_COMMAND_READY); +} + +static const struct file_operations i2c_nuvoton_ops = { + .owner = THIS_MODULE, + .llseek = no_llseek, + .open = tpm_open, + .read = tpm_read, + .write = tpm_write, + .release = tpm_release, +}; + +static DEVICE_ATTR(pubek, S_IRUGO, tpm_show_pubek, NULL); +static DEVICE_ATTR(pcrs, S_IRUGO, tpm_show_pcrs, NULL); +static DEVICE_ATTR(enabled, S_IRUGO, tpm_show_enabled, NULL); +static DEVICE_ATTR(active, S_IRUGO, tpm_show_active, NULL); +static DEVICE_ATTR(owned, S_IRUGO, tpm_show_owned, NULL); +static DEVICE_ATTR(temp_deactivated, S_IRUGO, tpm_show_temp_deactivated, NULL); +static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps, NULL); +static DEVICE_ATTR(cancel, S_IWUSR | S_IWGRP, NULL, tpm_store_cancel); +static DEVICE_ATTR(durations, S_IRUGO, tpm_show_durations, NULL); +static DEVICE_ATTR(timeouts, S_IRUGO, tpm_show_timeouts, NULL); + +static struct attribute *i2c_nuvoton_attrs[] = { + &dev_attr_pubek.attr, + &dev_attr_pcrs.attr, + &dev_attr_enabled.attr, + &dev_attr_active.attr, + &dev_attr_owned.attr, + &dev_attr_temp_deactivated.attr, + &dev_attr_caps.attr, + &dev_attr_cancel.attr, + &dev_attr_durations.attr, + &dev_attr_timeouts.attr, + NULL, +}; + +static struct attribute_group i2c_nuvoton_attr_grp = { + .attrs = i2c_nuvoton_attrs +}; + +static const struct tpm_vendor_specific tpm_i2c = { + .status = i2c_nuvoton_read_status, + .recv = i2c_nuvoton_recv, + .send = i2c_nuvoton_send, + .cancel = i2c_nuvoton_ready, + .req_complete_mask = TPM_STS_DATA_AVAIL | TPM_STS_VALID, + .req_complete_val = TPM_STS_DATA_AVAIL | TPM_STS_VALID, + .req_canceled = i2c_nuvoton_req_canceled, + .attr_group = &i2c_nuvoton_attr_grp, + .miscdev.fops = &i2c_nuvoton_ops, +}; + +/* The only purpose for the handler is to signal to any waiting threads that + * the interrupt is currently being asserted. The driver does not do any + * processing triggered by interrupts, and the chip provides no way to mask at + * the source (plus that would be slow over I2C). Run the IRQ as a one-shot, + * this means it cannot be shared. */ +static irqreturn_t i2c_nuvoton_int_handler(int dummy, void *dev_id) +{ + struct tpm_chip *chip = dev_id; + struct priv_data *priv = chip->vendor.priv; + + priv->intrs++; + wake_up(&chip->vendor.read_queue); + disable_irq_nosync(chip->vendor.irq); + return IRQ_HANDLED; +} + +static int get_vid(struct i2c_client *client, u32 *res) +{ + static const u8 vid_did_rid_value[] = { 0x50, 0x10, 0xfe }; + u32 temp; + s32 rc; + + if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_BYTE_DATA)) + return -ENODEV; + rc = i2c_nuvoton_read_buf(client, TPM_VID_DID_RID, 4, (u8 *)&temp); + if (rc < 0) + return rc; + + /* check WPCT301 values - ignore RID */ + if (memcmp(&temp, vid_did_rid_value, sizeof(vid_did_rid_value))) { + /* + * f/w rev 2.81 has an issue where the VID_DID_RID is not + * reporting the right value. so give it another chance at + * offset 0x20 (FIFO_W). + */ + rc = i2c_nuvoton_read_buf(client, TPM_DATA_FIFO_W, 4, + (u8 *) (&temp)); + if (rc < 0) + return rc; + + /* check WPCT301 values - ignore RID */ + if (memcmp(&temp, vid_did_rid_value, + sizeof(vid_did_rid_value))) + return -ENODEV; + } + + *res = temp; + return 0; +} + +static int i2c_nuvoton_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + int rc; + struct tpm_chip *chip; + struct device *dev = &client->dev; + u32 vid = 0; + + rc = get_vid(client, &vid); + if (rc) + return rc; + + dev_info(dev, "VID: %04X DID: %02X RID: %02X\n", (u16) vid, + (u8) (vid >> 16), (u8) (vid >> 24)); + + chip = tpm_register_hardware(dev, &tpm_i2c); + if (!chip) { + dev_err(dev, "%s() error in tpm_register_hardware\n", __func__); + return -ENODEV; + } + + chip->vendor.priv = devm_kzalloc(dev, sizeof(struct priv_data), + GFP_KERNEL); + init_waitqueue_head(&chip->vendor.read_queue); + init_waitqueue_head(&chip->vendor.int_queue); + + /* Default timeouts */ + chip->vendor.timeout_a = msecs_to_jiffies(TPM_I2C_SHORT_TIMEOUT); + chip->vendor.timeout_b = msecs_to_jiffies(TPM_I2C_LONG_TIMEOUT); + chip->vendor.timeout_c = msecs_to_jiffies(TPM_I2C_SHORT_TIMEOUT); + chip->vendor.timeout_d = msecs_to_jiffies(TPM_I2C_SHORT_TIMEOUT); + + /* + * I2C intfcaps (interrupt capabilitieis) in the chip are hard coded to: + * TPM_INTF_INT_LEVEL_LOW | TPM_INTF_DATA_AVAIL_INT + * The IRQ should be set in the i2c_board_info (which is done + * automatically in of_i2c_register_devices, for device tree users */ + chip->vendor.irq = client->irq; + + if (chip->vendor.irq) { + dev_dbg(dev, "%s() chip-vendor.irq\n", __func__); + rc = devm_request_irq(dev, chip->vendor.irq, + i2c_nuvoton_int_handler, + IRQF_TRIGGER_LOW, + chip->vendor.miscdev.name, + chip); + if (rc) { + dev_err(dev, "%s() Unable to request irq: %d for use\n", + __func__, chip->vendor.irq); + chip->vendor.irq = 0; + } else { + /* Clear any pending interrupt */ + i2c_nuvoton_ready(chip); + /* - wait for TPM_STS==0xA0 (stsValid, commandReady) */ + rc = i2c_nuvoton_wait_for_stat(chip, + TPM_STS_COMMAND_READY, + TPM_STS_COMMAND_READY, + chip->vendor.timeout_b, + NULL); + if (rc == 0) { + /* + * TIS is in ready state + * write dummy byte to enter reception state + * TPM_DATA_FIFO_W <- rc (0) + */ + rc = i2c_nuvoton_write_buf(client, + TPM_DATA_FIFO_W, + 1, (u8 *) (&rc)); + if (rc < 0) + goto out_err; + /* TPM_STS <- 0x40 (commandReady) */ + i2c_nuvoton_ready(chip); + } else { + /* + * timeout_b reached - command was + * aborted. TIS should now be in idle state - + * only TPM_STS_VALID should be set + */ + if (i2c_nuvoton_read_status(chip) != + TPM_STS_VALID) { + rc = -EIO; + goto out_err; + } + } + } + } + + if (tpm_get_timeouts(chip)) { + rc = -ENODEV; + goto out_err; + } + + if (tpm_do_selftest(chip)) { + rc = -ENODEV; + goto out_err; + } + + return 0; + +out_err: + tpm_dev_vendor_release(chip); + tpm_remove_hardware(chip->dev); + return rc; +} + +static int i2c_nuvoton_remove(struct i2c_client *client) +{ + struct device *dev = &(client->dev); + struct tpm_chip *chip = dev_get_drvdata(dev); + + if (chip) + tpm_dev_vendor_release(chip); + tpm_remove_hardware(dev); + kfree(chip); + return 0; +} + + +static const struct i2c_device_id i2c_nuvoton_id[] = { + {I2C_DRIVER_NAME, 0}, + {} +}; +MODULE_DEVICE_TABLE(i2c, i2c_nuvoton_id); + +#ifdef CONFIG_OF +static const struct of_device_id i2c_nuvoton_of_match[] = { + {.compatible = "nuvoton,npct501"}, + {.compatible = "winbond,wpct301"}, + {}, +}; +MODULE_DEVICE_TABLE(of, i2c_nuvoton_of_match); +#endif + +static SIMPLE_DEV_PM_OPS(i2c_nuvoton_pm_ops, tpm_pm_suspend, tpm_pm_resume); + +static struct i2c_driver i2c_nuvoton_driver = { + .id_table = i2c_nuvoton_id, + .probe = i2c_nuvoton_probe, + .remove = i2c_nuvoton_remove, + .driver = { + .name = I2C_DRIVER_NAME, + .owner = THIS_MODULE, + .pm = &i2c_nuvoton_pm_ops, + .of_match_table = of_match_ptr(i2c_nuvoton_of_match), + }, +}; + +module_i2c_driver(i2c_nuvoton_driver); + +MODULE_AUTHOR("Dan Morav (dan.morav@nuvoton.com)"); +MODULE_DESCRIPTION("Nuvoton TPM I2C Driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/char/tpm/tpm_i2c_stm_st33.c b/drivers/char/tpm/tpm_i2c_stm_st33.c index 5bb8e2d..a0d6ceb5 100644 --- a/drivers/char/tpm/tpm_i2c_stm_st33.c +++ b/drivers/char/tpm/tpm_i2c_stm_st33.c @@ -584,7 +584,7 @@ static DEVICE_ATTR(enabled, S_IRUGO, tpm_show_enabled, NULL); static DEVICE_ATTR(active, S_IRUGO, tpm_show_active, NULL); static DEVICE_ATTR(owned, S_IRUGO, tpm_show_owned, NULL); static DEVICE_ATTR(temp_deactivated, S_IRUGO, tpm_show_temp_deactivated, NULL); -static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps_1_2, NULL); +static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps, NULL); static DEVICE_ATTR(cancel, S_IWUSR | S_IWGRP, NULL, tpm_store_cancel); static struct attribute *stm_tpm_attrs[] = { @@ -746,8 +746,6 @@ tpm_st33_i2c_probe(struct i2c_client *client, const struct i2c_device_id *id) tpm_get_timeouts(chip); - i2c_set_clientdata(client, chip); - dev_info(chip->dev, "TPM I2C Initialized\n"); return 0; _irq_set: @@ -807,24 +805,18 @@ static int tpm_st33_i2c_remove(struct i2c_client *client) #ifdef CONFIG_PM_SLEEP /* * tpm_st33_i2c_pm_suspend suspend the TPM device - * Added: Work around when suspend and no tpm application is running, suspend - * may fail because chip->data_buffer is not set (only set in tpm_open in Linux - * TPM core) * @param: client, the i2c_client drescription (TPM I2C description). * @param: mesg, the power management message. * @return: 0 in case of success. */ static int tpm_st33_i2c_pm_suspend(struct device *dev) { - struct tpm_chip *chip = dev_get_drvdata(dev); struct st33zp24_platform_data *pin_infos = dev->platform_data; int ret = 0; if (power_mgt) { gpio_set_value(pin_infos->io_lpcpd, 0); } else { - if (chip->data_buffer == NULL) - chip->data_buffer = pin_infos->tpm_i2c_buffer[0]; ret = tpm_pm_suspend(dev); } return ret; @@ -849,8 +841,6 @@ static int tpm_st33_i2c_pm_resume(struct device *dev) TPM_STS_VALID) == TPM_STS_VALID, chip->vendor.timeout_b); } else { - if (chip->data_buffer == NULL) - chip->data_buffer = pin_infos->tpm_i2c_buffer[0]; ret = tpm_pm_resume(dev); if (!ret) tpm_do_selftest(chip); diff --git a/drivers/char/tpm/tpm_ibmvtpm.c b/drivers/char/tpm/tpm_ibmvtpm.c index 56b07c3..2783a42 100644 --- a/drivers/char/tpm/tpm_ibmvtpm.c +++ b/drivers/char/tpm/tpm_ibmvtpm.c @@ -98,7 +98,7 @@ static int tpm_ibmvtpm_recv(struct tpm_chip *chip, u8 *buf, size_t count) if (count < len) { dev_err(ibmvtpm->dev, - "Invalid size in recv: count=%ld, crq_size=%d\n", + "Invalid size in recv: count=%zd, crq_size=%d\n", count, len); return -EIO; } @@ -136,7 +136,7 @@ static int tpm_ibmvtpm_send(struct tpm_chip *chip, u8 *buf, size_t count) if (count > ibmvtpm->rtce_size) { dev_err(ibmvtpm->dev, - "Invalid size in send: count=%ld, rtce_size=%d\n", + "Invalid size in send: count=%zd, rtce_size=%d\n", count, ibmvtpm->rtce_size); return -EIO; } @@ -419,7 +419,7 @@ static DEVICE_ATTR(active, S_IRUGO, tpm_show_active, NULL); static DEVICE_ATTR(owned, S_IRUGO, tpm_show_owned, NULL); static DEVICE_ATTR(temp_deactivated, S_IRUGO, tpm_show_temp_deactivated, NULL); -static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps_1_2, NULL); +static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps, NULL); static DEVICE_ATTR(cancel, S_IWUSR | S_IWGRP, NULL, tpm_store_cancel); static DEVICE_ATTR(durations, S_IRUGO, tpm_show_durations, NULL); static DEVICE_ATTR(timeouts, S_IRUGO, tpm_show_timeouts, NULL); diff --git a/drivers/char/tpm/tpm_ppi.c b/drivers/char/tpm/tpm_ppi.c index 2168d15..8e562dc 100644 --- a/drivers/char/tpm/tpm_ppi.c +++ b/drivers/char/tpm/tpm_ppi.c @@ -452,12 +452,8 @@ int tpm_add_ppi(struct kobject *parent) { return sysfs_create_group(parent, &ppi_attr_grp); } -EXPORT_SYMBOL_GPL(tpm_add_ppi); void tpm_remove_ppi(struct kobject *parent) { sysfs_remove_group(parent, &ppi_attr_grp); } -EXPORT_SYMBOL_GPL(tpm_remove_ppi); - -MODULE_LICENSE("GPL"); diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c index 5796d01..1b74459 100644 --- a/drivers/char/tpm/tpm_tis.c +++ b/drivers/char/tpm/tpm_tis.c @@ -448,7 +448,7 @@ static DEVICE_ATTR(active, S_IRUGO, tpm_show_active, NULL); static DEVICE_ATTR(owned, S_IRUGO, tpm_show_owned, NULL); static DEVICE_ATTR(temp_deactivated, S_IRUGO, tpm_show_temp_deactivated, NULL); -static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps_1_2, NULL); +static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps, NULL); static DEVICE_ATTR(cancel, S_IWUSR | S_IWGRP, NULL, tpm_store_cancel); static DEVICE_ATTR(durations, S_IRUGO, tpm_show_durations, NULL); static DEVICE_ATTR(timeouts, S_IRUGO, tpm_show_timeouts, NULL); diff --git a/drivers/char/tpm/xen-tpmfront.c b/drivers/char/tpm/xen-tpmfront.c index 94c280d..c8ff4df 100644 --- a/drivers/char/tpm/xen-tpmfront.c +++ b/drivers/char/tpm/xen-tpmfront.c @@ -351,8 +351,6 @@ static int tpmfront_probe(struct xenbus_device *dev, tpm_get_timeouts(priv->chip); - dev_set_drvdata(&dev->dev, priv->chip); - return rv; } diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig index b3ab9d4..52d548f 100644 --- a/drivers/hwmon/Kconfig +++ b/drivers/hwmon/Kconfig @@ -656,6 +656,7 @@ config SENSORS_LM75 - Analog Devices ADT75 - Dallas Semiconductor DS75, DS1775 and DS7505 + - Global Mixed-mode Technology (GMT) G751 - Maxim MAX6625 and MAX6626 - Microchip MCP980x - National Semiconductor LM75, LM75A diff --git a/drivers/hwmon/acpi_power_meter.c b/drivers/hwmon/acpi_power_meter.c index 8d40da3..6a34f7f 100644 --- a/drivers/hwmon/acpi_power_meter.c +++ b/drivers/hwmon/acpi_power_meter.c @@ -602,9 +602,8 @@ static int read_domain_devices(struct acpi_power_meter_resource *resource) /* Create a symlink to domain objects */ resource->domain_devices[i] = NULL; - status = acpi_bus_get_device(element->reference.handle, - &resource->domain_devices[i]); - if (ACPI_FAILURE(status)) + if (acpi_bus_get_device(element->reference.handle, + &resource->domain_devices[i])) continue; obj = resource->domain_devices[i]; diff --git a/drivers/hwmon/lm75.c b/drivers/hwmon/lm75.c index c03b490..7e3ef13 100644 --- a/drivers/hwmon/lm75.c +++ b/drivers/hwmon/lm75.c @@ -39,6 +39,7 @@ enum lm75_type { /* keep sorted in alphabetical order */ ds1775, ds75, ds7505, + g751, lm75, lm75a, max6625, @@ -208,6 +209,7 @@ lm75_probe(struct i2c_client *client, const struct i2c_device_id *id) data->resolution = 12; data->sample_time = HZ / 4; break; + case g751: case lm75: case lm75a: data->resolution = 9; @@ -296,6 +298,7 @@ static const struct i2c_device_id lm75_ids[] = { { "ds1775", ds1775, }, { "ds75", ds75, }, { "ds7505", ds7505, }, + { "g751", g751, }, { "lm75", lm75, }, { "lm75a", lm75a, }, { "max6625", max6625, }, diff --git a/drivers/hwmon/nct6775.c b/drivers/hwmon/nct6775.c index d17325d..cf811c1 100644 --- a/drivers/hwmon/nct6775.c +++ b/drivers/hwmon/nct6775.c @@ -274,6 +274,8 @@ static const u16 NCT6775_FAN_PULSE_SHIFT[] = { 0, 0, 0, 0, 0, 0 }; static const u16 NCT6775_REG_TEMP[] = { 0x27, 0x150, 0x250, 0x62b, 0x62c, 0x62d }; +static const u16 NCT6775_REG_TEMP_MON[] = { 0x73, 0x75, 0x77 }; + static const u16 NCT6775_REG_TEMP_CONFIG[ARRAY_SIZE(NCT6775_REG_TEMP)] = { 0, 0x152, 0x252, 0x628, 0x629, 0x62A }; static const u16 NCT6775_REG_TEMP_HYST[ARRAY_SIZE(NCT6775_REG_TEMP)] = { @@ -454,6 +456,7 @@ static const u16 NCT6779_REG_CRITICAL_PWM[] = { 0x137, 0x237, 0x337, 0x837, 0x937, 0xa37 }; static const u16 NCT6779_REG_TEMP[] = { 0x27, 0x150 }; +static const u16 NCT6779_REG_TEMP_MON[] = { 0x73, 0x75, 0x77, 0x79, 0x7b }; static const u16 NCT6779_REG_TEMP_CONFIG[ARRAY_SIZE(NCT6779_REG_TEMP)] = { 0x18, 0x152 }; static const u16 NCT6779_REG_TEMP_HYST[ARRAY_SIZE(NCT6779_REG_TEMP)] = { @@ -507,6 +510,13 @@ static const u16 NCT6779_REG_TEMP_CRIT[ARRAY_SIZE(nct6779_temp_label) - 1] #define NCT6791_REG_HM_IO_SPACE_LOCK_ENABLE 0x28 +static const u16 NCT6791_REG_WEIGHT_TEMP_SEL[6] = { 0, 0x239 }; +static const u16 NCT6791_REG_WEIGHT_TEMP_STEP[6] = { 0, 0x23a }; +static const u16 NCT6791_REG_WEIGHT_TEMP_STEP_TOL[6] = { 0, 0x23b }; +static const u16 NCT6791_REG_WEIGHT_DUTY_STEP[6] = { 0, 0x23c }; +static const u16 NCT6791_REG_WEIGHT_TEMP_BASE[6] = { 0, 0x23d }; +static const u16 NCT6791_REG_WEIGHT_DUTY_BASE[6] = { 0, 0x23e }; + static const u16 NCT6791_REG_ALARM[NUM_REG_ALARM] = { 0x459, 0x45A, 0x45B, 0x568, 0x45D }; @@ -534,6 +544,7 @@ static const u16 NCT6106_REG_IN[] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x07, 0x08, 0x09 }; static const u16 NCT6106_REG_TEMP[] = { 0x10, 0x11, 0x12, 0x13, 0x14, 0x15 }; +static const u16 NCT6106_REG_TEMP_MON[] = { 0x18, 0x19, 0x1a }; static const u16 NCT6106_REG_TEMP_HYST[] = { 0xc3, 0xc7, 0xcb, 0xcf, 0xd3, 0xd7 }; static const u16 NCT6106_REG_TEMP_OVER[] = { @@ -1307,6 +1318,9 @@ static void nct6775_update_pwm(struct device *dev) if (reg & 0x80) data->pwm[2][i] = 0; + if (!data->REG_WEIGHT_TEMP_SEL[i]) + continue; + reg = nct6775_read_value(data, data->REG_WEIGHT_TEMP_SEL[i]); data->pwm_weight_temp_sel[i] = reg & 0x1f; /* If weight is disabled, report weight source as 0 */ @@ -2852,6 +2866,9 @@ static umode_t nct6775_pwm_is_visible(struct kobject *kobj, if (!(data->has_pwm & (1 << pwm))) return 0; + if ((nr >= 14 && nr <= 18) || nr == 21) /* weight */ + if (!data->REG_WEIGHT_TEMP_SEL[pwm]) + return 0; if (nr == 19 && data->REG_PWM[3] == NULL) /* pwm_max */ return 0; if (nr == 20 && data->REG_PWM[4] == NULL) /* pwm_step */ @@ -2945,11 +2962,11 @@ static struct sensor_device_template *nct6775_attributes_pwm_template[] = { &sensor_dev_template_pwm_step_down_time, &sensor_dev_template_pwm_start, &sensor_dev_template_pwm_floor, - &sensor_dev_template_pwm_weight_temp_sel, + &sensor_dev_template_pwm_weight_temp_sel, /* 14 */ &sensor_dev_template_pwm_weight_temp_step, &sensor_dev_template_pwm_weight_temp_step_tol, &sensor_dev_template_pwm_weight_temp_step_base, - &sensor_dev_template_pwm_weight_duty_step, + &sensor_dev_template_pwm_weight_duty_step, /* 18 */ &sensor_dev_template_pwm_max, /* 19 */ &sensor_dev_template_pwm_step, /* 20 */ &sensor_dev_template_pwm_weight_duty_base, /* 21 */ @@ -3253,9 +3270,9 @@ static int nct6775_probe(struct platform_device *pdev) int i, s, err = 0; int src, mask, available; const u16 *reg_temp, *reg_temp_over, *reg_temp_hyst, *reg_temp_config; - const u16 *reg_temp_alternate, *reg_temp_crit; + const u16 *reg_temp_mon, *reg_temp_alternate, *reg_temp_crit; const u16 *reg_temp_crit_l = NULL, *reg_temp_crit_h = NULL; - int num_reg_temp; + int num_reg_temp, num_reg_temp_mon; u8 cr2a; struct attribute_group *group; struct device *hwmon_dev; @@ -3338,7 +3355,9 @@ static int nct6775_probe(struct platform_device *pdev) data->BEEP_BITS = NCT6106_BEEP_BITS; reg_temp = NCT6106_REG_TEMP; + reg_temp_mon = NCT6106_REG_TEMP_MON; num_reg_temp = ARRAY_SIZE(NCT6106_REG_TEMP); + num_reg_temp_mon = ARRAY_SIZE(NCT6106_REG_TEMP_MON); reg_temp_over = NCT6106_REG_TEMP_OVER; reg_temp_hyst = NCT6106_REG_TEMP_HYST; reg_temp_config = NCT6106_REG_TEMP_CONFIG; @@ -3410,7 +3429,9 @@ static int nct6775_probe(struct platform_device *pdev) data->REG_BEEP = NCT6775_REG_BEEP; reg_temp = NCT6775_REG_TEMP; + reg_temp_mon = NCT6775_REG_TEMP_MON; num_reg_temp = ARRAY_SIZE(NCT6775_REG_TEMP); + num_reg_temp_mon = ARRAY_SIZE(NCT6775_REG_TEMP_MON); reg_temp_over = NCT6775_REG_TEMP_OVER; reg_temp_hyst = NCT6775_REG_TEMP_HYST; reg_temp_config = NCT6775_REG_TEMP_CONFIG; @@ -3480,7 +3501,9 @@ static int nct6775_probe(struct platform_device *pdev) data->REG_BEEP = NCT6776_REG_BEEP; reg_temp = NCT6775_REG_TEMP; + reg_temp_mon = NCT6775_REG_TEMP_MON; num_reg_temp = ARRAY_SIZE(NCT6775_REG_TEMP); + num_reg_temp_mon = ARRAY_SIZE(NCT6775_REG_TEMP_MON); reg_temp_over = NCT6775_REG_TEMP_OVER; reg_temp_hyst = NCT6775_REG_TEMP_HYST; reg_temp_config = NCT6776_REG_TEMP_CONFIG; @@ -3554,7 +3577,9 @@ static int nct6775_probe(struct platform_device *pdev) data->REG_BEEP = NCT6776_REG_BEEP; reg_temp = NCT6779_REG_TEMP; + reg_temp_mon = NCT6779_REG_TEMP_MON; num_reg_temp = ARRAY_SIZE(NCT6779_REG_TEMP); + num_reg_temp_mon = ARRAY_SIZE(NCT6779_REG_TEMP_MON); reg_temp_over = NCT6779_REG_TEMP_OVER; reg_temp_hyst = NCT6779_REG_TEMP_HYST; reg_temp_config = NCT6779_REG_TEMP_CONFIG; @@ -3603,8 +3628,8 @@ static int nct6775_probe(struct platform_device *pdev) data->REG_PWM[0] = NCT6775_REG_PWM; data->REG_PWM[1] = NCT6775_REG_FAN_START_OUTPUT; data->REG_PWM[2] = NCT6775_REG_FAN_STOP_OUTPUT; - data->REG_PWM[5] = NCT6775_REG_WEIGHT_DUTY_STEP; - data->REG_PWM[6] = NCT6776_REG_WEIGHT_DUTY_BASE; + data->REG_PWM[5] = NCT6791_REG_WEIGHT_DUTY_STEP; + data->REG_PWM[6] = NCT6791_REG_WEIGHT_DUTY_BASE; data->REG_PWM_READ = NCT6775_REG_PWM_READ; data->REG_PWM_MODE = NCT6776_REG_PWM_MODE; data->PWM_MODE_MASK = NCT6776_PWM_MODE_MASK; @@ -3620,15 +3645,17 @@ static int nct6775_probe(struct platform_device *pdev) data->REG_TEMP_OFFSET = NCT6779_REG_TEMP_OFFSET; data->REG_TEMP_SOURCE = NCT6775_REG_TEMP_SOURCE; data->REG_TEMP_SEL = NCT6775_REG_TEMP_SEL; - data->REG_WEIGHT_TEMP_SEL = NCT6775_REG_WEIGHT_TEMP_SEL; - data->REG_WEIGHT_TEMP[0] = NCT6775_REG_WEIGHT_TEMP_STEP; - data->REG_WEIGHT_TEMP[1] = NCT6775_REG_WEIGHT_TEMP_STEP_TOL; - data->REG_WEIGHT_TEMP[2] = NCT6775_REG_WEIGHT_TEMP_BASE; + data->REG_WEIGHT_TEMP_SEL = NCT6791_REG_WEIGHT_TEMP_SEL; + data->REG_WEIGHT_TEMP[0] = NCT6791_REG_WEIGHT_TEMP_STEP; + data->REG_WEIGHT_TEMP[1] = NCT6791_REG_WEIGHT_TEMP_STEP_TOL; + data->REG_WEIGHT_TEMP[2] = NCT6791_REG_WEIGHT_TEMP_BASE; data->REG_ALARM = NCT6791_REG_ALARM; data->REG_BEEP = NCT6776_REG_BEEP; reg_temp = NCT6779_REG_TEMP; + reg_temp_mon = NCT6779_REG_TEMP_MON; num_reg_temp = ARRAY_SIZE(NCT6779_REG_TEMP); + num_reg_temp_mon = ARRAY_SIZE(NCT6779_REG_TEMP_MON); reg_temp_over = NCT6779_REG_TEMP_OVER; reg_temp_hyst = NCT6779_REG_TEMP_HYST; reg_temp_config = NCT6779_REG_TEMP_CONFIG; @@ -3729,6 +3756,50 @@ static int nct6775_probe(struct platform_device *pdev) s++; } + /* + * Repeat with temperatures used for fan control. + * This set of registers does not support limits. + */ + for (i = 0; i < num_reg_temp_mon; i++) { + if (reg_temp_mon[i] == 0) + continue; + + src = nct6775_read_value(data, data->REG_TEMP_SEL[i]) & 0x1f; + if (!src || (mask & (1 << src))) + continue; + + if (src >= data->temp_label_num || + !strlen(data->temp_label[src])) { + dev_info(dev, + "Invalid temperature source %d at index %d, source register 0x%x, temp register 0x%x\n", + src, i, data->REG_TEMP_SEL[i], + reg_temp_mon[i]); + continue; + } + + mask |= 1 << src; + + /* Use fixed index for SYSTIN(1), CPUTIN(2), AUXTIN(3) */ + if (src <= data->temp_fixed_num) { + if (data->have_temp & (1 << (src - 1))) + continue; + data->have_temp |= 1 << (src - 1); + data->have_temp_fixed |= 1 << (src - 1); + data->reg_temp[0][src - 1] = reg_temp_mon[i]; + data->temp_src[src - 1] = src; + continue; + } + + if (s >= NUM_TEMP) + continue; + + /* Use dynamic index for other sources */ + data->have_temp |= 1 << s; + data->reg_temp[0][s] = reg_temp_mon[i]; + data->temp_src[s] = src; + s++; + } + #ifdef USE_ALTERNATE /* * Go through the list of alternate temp registers and enable diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 6df2350..6be57c3 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -22,6 +22,7 @@ #include <linux/socket.h> #include <linux/in.h> #include <linux/in6.h> +#include <linux/llist.h> #include <rdma/ib_verbs.h> #include <rdma/rdma_cm.h> #include <target/target_core_base.h> @@ -489,6 +490,7 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) kref_init(&isert_conn->conn_kref); kref_get(&isert_conn->conn_kref); mutex_init(&isert_conn->conn_mutex); + mutex_init(&isert_conn->conn_comp_mutex); spin_lock_init(&isert_conn->conn_lock); cma_id->context = isert_conn; @@ -843,14 +845,32 @@ isert_init_tx_hdrs(struct isert_conn *isert_conn, } static void -isert_init_send_wr(struct isert_cmd *isert_cmd, struct ib_send_wr *send_wr) +isert_init_send_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd, + struct ib_send_wr *send_wr, bool coalesce) { + struct iser_tx_desc *tx_desc = &isert_cmd->tx_desc; + isert_cmd->rdma_wr.iser_ib_op = ISER_IB_SEND; send_wr->wr_id = (unsigned long)&isert_cmd->tx_desc; send_wr->opcode = IB_WR_SEND; - send_wr->send_flags = IB_SEND_SIGNALED; - send_wr->sg_list = &isert_cmd->tx_desc.tx_sg[0]; + send_wr->sg_list = &tx_desc->tx_sg[0]; send_wr->num_sge = isert_cmd->tx_desc.num_sge; + /* + * Coalesce send completion interrupts by only setting IB_SEND_SIGNALED + * bit for every ISERT_COMP_BATCH_COUNT number of ib_post_send() calls. + */ + mutex_lock(&isert_conn->conn_comp_mutex); + if (coalesce && + ++isert_conn->conn_comp_batch < ISERT_COMP_BATCH_COUNT) { + llist_add(&tx_desc->comp_llnode, &isert_conn->conn_comp_llist); + mutex_unlock(&isert_conn->conn_comp_mutex); + return; + } + isert_conn->conn_comp_batch = 0; + tx_desc->comp_llnode_batch = llist_del_all(&isert_conn->conn_comp_llist); + mutex_unlock(&isert_conn->conn_comp_mutex); + + send_wr->send_flags = IB_SEND_SIGNALED; } static int @@ -1582,8 +1602,8 @@ isert_response_completion(struct iser_tx_desc *tx_desc, } static void -isert_send_completion(struct iser_tx_desc *tx_desc, - struct isert_conn *isert_conn) +__isert_send_completion(struct iser_tx_desc *tx_desc, + struct isert_conn *isert_conn) { struct ib_device *ib_dev = isert_conn->conn_cm_id->device; struct isert_cmd *isert_cmd = tx_desc->isert_cmd; @@ -1624,6 +1644,24 @@ isert_send_completion(struct iser_tx_desc *tx_desc, } static void +isert_send_completion(struct iser_tx_desc *tx_desc, + struct isert_conn *isert_conn) +{ + struct llist_node *llnode = tx_desc->comp_llnode_batch; + struct iser_tx_desc *t; + /* + * Drain coalesced completion llist starting from comp_llnode_batch + * setup in isert_init_send_wr(), and then complete trailing tx_desc. + */ + while (llnode) { + t = llist_entry(llnode, struct iser_tx_desc, comp_llnode); + llnode = llist_next(llnode); + __isert_send_completion(t, isert_conn); + } + __isert_send_completion(tx_desc, isert_conn); +} + +static void isert_cq_comp_err(struct iser_tx_desc *tx_desc, struct isert_conn *isert_conn) { struct ib_device *ib_dev = isert_conn->conn_cm_id->device; @@ -1793,7 +1831,7 @@ isert_put_response(struct iscsi_conn *conn, struct iscsi_cmd *cmd) isert_cmd->tx_desc.num_sge = 2; } - isert_init_send_wr(isert_cmd, send_wr); + isert_init_send_wr(isert_conn, isert_cmd, send_wr, true); pr_debug("Posting SCSI Response IB_WR_SEND >>>>>>>>>>>>>>>>>>>>>>\n"); @@ -1813,7 +1851,7 @@ isert_put_nopin(struct iscsi_cmd *cmd, struct iscsi_conn *conn, &isert_cmd->tx_desc.iscsi_header, nopout_response); isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc); - isert_init_send_wr(isert_cmd, send_wr); + isert_init_send_wr(isert_conn, isert_cmd, send_wr, false); pr_debug("Posting NOPIN Response IB_WR_SEND >>>>>>>>>>>>>>>>>>>>>>\n"); @@ -1831,7 +1869,7 @@ isert_put_logout_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn) iscsit_build_logout_rsp(cmd, conn, (struct iscsi_logout_rsp *) &isert_cmd->tx_desc.iscsi_header); isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc); - isert_init_send_wr(isert_cmd, send_wr); + isert_init_send_wr(isert_conn, isert_cmd, send_wr, false); pr_debug("Posting Logout Response IB_WR_SEND >>>>>>>>>>>>>>>>>>>>>>\n"); @@ -1849,7 +1887,7 @@ isert_put_tm_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn) iscsit_build_task_mgt_rsp(cmd, conn, (struct iscsi_tm_rsp *) &isert_cmd->tx_desc.iscsi_header); isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc); - isert_init_send_wr(isert_cmd, send_wr); + isert_init_send_wr(isert_conn, isert_cmd, send_wr, false); pr_debug("Posting Task Management Response IB_WR_SEND >>>>>>>>>>>>>>>>>>>>>>\n"); @@ -1881,7 +1919,7 @@ isert_put_reject(struct iscsi_cmd *cmd, struct iscsi_conn *conn) tx_dsg->lkey = isert_conn->conn_mr->lkey; isert_cmd->tx_desc.num_sge = 2; - isert_init_send_wr(isert_cmd, send_wr); + isert_init_send_wr(isert_conn, isert_cmd, send_wr, false); pr_debug("Posting Reject IB_WR_SEND >>>>>>>>>>>>>>>>>>>>>>\n"); @@ -1921,7 +1959,7 @@ isert_put_text_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn) tx_dsg->lkey = isert_conn->conn_mr->lkey; isert_cmd->tx_desc.num_sge = 2; } - isert_init_send_wr(isert_cmd, send_wr); + isert_init_send_wr(isert_conn, isert_cmd, send_wr, false); pr_debug("Posting Text Response IB_WR_SEND >>>>>>>>>>>>>>>>>>>>>>\n"); @@ -1991,8 +2029,6 @@ isert_map_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd, if (wr->iser_ib_op == ISER_IB_RDMA_WRITE) { data_left = se_cmd->data_length; - iscsit_increment_maxcmdsn(cmd, conn->sess); - cmd->stat_sn = conn->stat_sn++; } else { sg_off = cmd->write_data_done / PAGE_SIZE; data_left = se_cmd->data_length - cmd->write_data_done; @@ -2204,8 +2240,6 @@ isert_reg_rdma_frwr(struct iscsi_conn *conn, struct iscsi_cmd *cmd, if (wr->iser_ib_op == ISER_IB_RDMA_WRITE) { data_left = se_cmd->data_length; - iscsit_increment_maxcmdsn(cmd, conn->sess); - cmd->stat_sn = conn->stat_sn++; } else { sg_off = cmd->write_data_done / PAGE_SIZE; data_left = se_cmd->data_length - cmd->write_data_done; @@ -2259,18 +2293,26 @@ isert_reg_rdma_frwr(struct iscsi_conn *conn, struct iscsi_cmd *cmd, data_len = min(data_left, rdma_write_max); wr->cur_rdma_length = data_len; - spin_lock_irqsave(&isert_conn->conn_lock, flags); - fr_desc = list_first_entry(&isert_conn->conn_frwr_pool, - struct fast_reg_descriptor, list); - list_del(&fr_desc->list); - spin_unlock_irqrestore(&isert_conn->conn_lock, flags); - wr->fr_desc = fr_desc; + /* if there is a single dma entry, dma mr is sufficient */ + if (count == 1) { + ib_sge->addr = ib_sg_dma_address(ib_dev, &sg_start[0]); + ib_sge->length = ib_sg_dma_len(ib_dev, &sg_start[0]); + ib_sge->lkey = isert_conn->conn_mr->lkey; + wr->fr_desc = NULL; + } else { + spin_lock_irqsave(&isert_conn->conn_lock, flags); + fr_desc = list_first_entry(&isert_conn->conn_frwr_pool, + struct fast_reg_descriptor, list); + list_del(&fr_desc->list); + spin_unlock_irqrestore(&isert_conn->conn_lock, flags); + wr->fr_desc = fr_desc; - ret = isert_fast_reg_mr(fr_desc, isert_cmd, isert_conn, - ib_sge, offset, data_len); - if (ret) { - list_add_tail(&fr_desc->list, &isert_conn->conn_frwr_pool); - goto unmap_sg; + ret = isert_fast_reg_mr(fr_desc, isert_cmd, isert_conn, + ib_sge, offset, data_len); + if (ret) { + list_add_tail(&fr_desc->list, &isert_conn->conn_frwr_pool); + goto unmap_sg; + } } return 0; @@ -2306,10 +2348,11 @@ isert_put_datain(struct iscsi_conn *conn, struct iscsi_cmd *cmd) * Build isert_conn->tx_desc for iSCSI response PDU and attach */ isert_create_send_desc(isert_conn, isert_cmd, &isert_cmd->tx_desc); - iscsit_build_rsp_pdu(cmd, conn, false, (struct iscsi_scsi_rsp *) + iscsit_build_rsp_pdu(cmd, conn, true, (struct iscsi_scsi_rsp *) &isert_cmd->tx_desc.iscsi_header); isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc); - isert_init_send_wr(isert_cmd, &isert_cmd->tx_desc.send_wr); + isert_init_send_wr(isert_conn, isert_cmd, + &isert_cmd->tx_desc.send_wr, true); atomic_inc(&isert_conn->post_send_buf_count); diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h index 631f209..691f90f 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.h +++ b/drivers/infiniband/ulp/isert/ib_isert.h @@ -43,6 +43,8 @@ struct iser_tx_desc { struct ib_sge tx_sg[2]; int num_sge; struct isert_cmd *isert_cmd; + struct llist_node *comp_llnode_batch; + struct llist_node comp_llnode; struct ib_send_wr send_wr; } __packed; @@ -121,6 +123,10 @@ struct isert_conn { int conn_frwr_pool_size; /* lock to protect frwr_pool */ spinlock_t conn_lock; +#define ISERT_COMP_BATCH_COUNT 8 + int conn_comp_batch; + struct llist_head conn_comp_llist; + struct mutex conn_comp_mutex; }; #define ISERT_MAX_CQ 64 diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 6c923c7..520a7e5 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -1352,11 +1352,8 @@ static int srpt_abort_cmd(struct srpt_send_ioctx *ioctx) /* XXX(hch): this is a horrible layering violation.. */ spin_lock_irqsave(&ioctx->cmd.t_state_lock, flags); - ioctx->cmd.transport_state |= CMD_T_LUN_STOP; ioctx->cmd.transport_state &= ~CMD_T_ACTIVE; spin_unlock_irqrestore(&ioctx->cmd.t_state_lock, flags); - - complete(&ioctx->cmd.transport_lun_stop_comp); break; case SRPT_STATE_CMD_RSP_SENT: /* @@ -1364,9 +1361,6 @@ static int srpt_abort_cmd(struct srpt_send_ioctx *ioctx) * not been received in time. */ srpt_unmap_sg_to_ib_sge(ioctx->ch, ioctx); - spin_lock_irqsave(&ioctx->cmd.t_state_lock, flags); - ioctx->cmd.transport_state |= CMD_T_LUN_STOP; - spin_unlock_irqrestore(&ioctx->cmd.t_state_lock, flags); target_put_sess_cmd(ioctx->ch->sess, &ioctx->cmd); break; case SRPT_STATE_MGMT_RSP_SENT: @@ -1476,7 +1470,6 @@ static void srpt_handle_rdma_err_comp(struct srpt_rdma_ch *ch, { struct se_cmd *cmd; enum srpt_command_state state; - unsigned long flags; cmd = &ioctx->cmd; state = srpt_get_cmd_state(ioctx); @@ -1496,9 +1489,6 @@ static void srpt_handle_rdma_err_comp(struct srpt_rdma_ch *ch, __func__, __LINE__, state); break; case SRPT_RDMA_WRITE_LAST: - spin_lock_irqsave(&ioctx->cmd.t_state_lock, flags); - ioctx->cmd.transport_state |= CMD_T_LUN_STOP; - spin_unlock_irqrestore(&ioctx->cmd.t_state_lock, flags); break; default: printk(KERN_ERR "%s[%d]: opcode = %u\n", __func__, diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c index e47dcb9..5cefb47 100644 --- a/drivers/isdn/mISDN/socket.c +++ b/drivers/isdn/mISDN/socket.c @@ -117,7 +117,6 @@ mISDN_sock_recvmsg(struct kiocb *iocb, struct socket *sock, { struct sk_buff *skb; struct sock *sk = sock->sk; - struct sockaddr_mISDN *maddr; int copied, err; @@ -135,9 +134,9 @@ mISDN_sock_recvmsg(struct kiocb *iocb, struct socket *sock, if (!skb) return err; - if (msg->msg_namelen >= sizeof(struct sockaddr_mISDN)) { - msg->msg_namelen = sizeof(struct sockaddr_mISDN); - maddr = (struct sockaddr_mISDN *)msg->msg_name; + if (msg->msg_name) { + struct sockaddr_mISDN *maddr = msg->msg_name; + maddr->family = AF_ISDN; maddr->dev = _pms(sk)->dev->id; if ((sk->sk_protocol == ISDN_P_LAPD_TE) || @@ -150,11 +149,7 @@ mISDN_sock_recvmsg(struct kiocb *iocb, struct socket *sock, maddr->sapi = _pms(sk)->ch.addr & 0xFF; maddr->tei = (_pms(sk)->ch.addr >> 8) & 0xFF; } - } else { - if (msg->msg_namelen) - printk(KERN_WARNING "%s: too small namelen %d\n", - __func__, msg->msg_namelen); - msg->msg_namelen = 0; + msg->msg_namelen = sizeof(*maddr); } copied = skb->len + MISDN_HEADER_LEN; diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 74630e9..d6447b3 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -697,7 +697,7 @@ static int genphy_config_advert(struct phy_device *phydev) * to the values in phydev. Assumes that the values are valid. * Please see phy_sanitize_settings(). */ -static int genphy_setup_forced(struct phy_device *phydev) +int genphy_setup_forced(struct phy_device *phydev) { int err; int ctl = 0; @@ -716,7 +716,7 @@ static int genphy_setup_forced(struct phy_device *phydev) return err; } - +EXPORT_SYMBOL(genphy_setup_forced); /** * genphy_restart_aneg - Enable and Restart Autonegotiation diff --git a/drivers/net/phy/vitesse.c b/drivers/net/phy/vitesse.c index 69b482b..508e435 100644 --- a/drivers/net/phy/vitesse.c +++ b/drivers/net/phy/vitesse.c @@ -3,7 +3,7 @@ * * Author: Kriston Carson * - * Copyright (c) 2005, 2009 Freescale Semiconductor, Inc. + * Copyright (c) 2005, 2009, 2011 Freescale Semiconductor, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -18,6 +18,11 @@ #include <linux/ethtool.h> #include <linux/phy.h> +/* Vitesse Extended Page Magic Register(s) */ +#define MII_VSC82X4_EXT_PAGE_16E 0x10 +#define MII_VSC82X4_EXT_PAGE_17E 0x11 +#define MII_VSC82X4_EXT_PAGE_18E 0x12 + /* Vitesse Extended Control Register 1 */ #define MII_VSC8244_EXT_CON1 0x17 #define MII_VSC8244_EXTCON1_INIT 0x0000 @@ -54,7 +59,13 @@ #define MII_VSC8221_AUXCONSTAT_INIT 0x0004 /* need to set this bit? */ #define MII_VSC8221_AUXCONSTAT_RESERVED 0x0004 +/* Vitesse Extended Page Access Register */ +#define MII_VSC82X4_EXT_PAGE_ACCESS 0x1f + +#define PHY_ID_VSC8234 0x000fc620 #define PHY_ID_VSC8244 0x000fc6c0 +#define PHY_ID_VSC8574 0x000704a0 +#define PHY_ID_VSC8662 0x00070660 #define PHY_ID_VSC8221 0x000fc550 #define PHY_ID_VSC8211 0x000fc4b0 @@ -118,7 +129,9 @@ static int vsc82xx_config_intr(struct phy_device *phydev) if (phydev->interrupts == PHY_INTERRUPT_ENABLED) err = phy_write(phydev, MII_VSC8244_IMASK, - phydev->drv->phy_id == PHY_ID_VSC8244 ? + (phydev->drv->phy_id == PHY_ID_VSC8234 || + phydev->drv->phy_id == PHY_ID_VSC8244 || + phydev->drv->phy_id == PHY_ID_VSC8574) ? MII_VSC8244_IMASK_MASK : MII_VSC8221_IMASK_MASK); else { @@ -149,21 +162,114 @@ static int vsc8221_config_init(struct phy_device *phydev) */ } -/* Vitesse 824x */ +/* vsc82x4_config_autocross_enable - Enable auto MDI/MDI-X for forced links + * @phydev: target phy_device struct + * + * Enable auto MDI/MDI-X when in 10/100 forced link speeds by writing + * special values in the VSC8234/VSC8244 extended reserved registers + */ +static int vsc82x4_config_autocross_enable(struct phy_device *phydev) +{ + int ret; + + if (phydev->autoneg == AUTONEG_ENABLE || phydev->speed > SPEED_100) + return 0; + + /* map extended registers set 0x10 - 0x1e */ + ret = phy_write(phydev, MII_VSC82X4_EXT_PAGE_ACCESS, 0x52b5); + if (ret >= 0) + ret = phy_write(phydev, MII_VSC82X4_EXT_PAGE_18E, 0x0012); + if (ret >= 0) + ret = phy_write(phydev, MII_VSC82X4_EXT_PAGE_17E, 0x2803); + if (ret >= 0) + ret = phy_write(phydev, MII_VSC82X4_EXT_PAGE_16E, 0x87fa); + /* map standard registers set 0x10 - 0x1e */ + if (ret >= 0) + ret = phy_write(phydev, MII_VSC82X4_EXT_PAGE_ACCESS, 0x0000); + else + phy_write(phydev, MII_VSC82X4_EXT_PAGE_ACCESS, 0x0000); + + return ret; +} + +/* vsc82x4_config_aneg - restart auto-negotiation or write BMCR + * @phydev: target phy_device struct + * + * Description: If auto-negotiation is enabled, we configure the + * advertising, and then restart auto-negotiation. If it is not + * enabled, then we write the BMCR and also start the auto + * MDI/MDI-X feature + */ +static int vsc82x4_config_aneg(struct phy_device *phydev) +{ + int ret; + + /* Enable auto MDI/MDI-X when in 10/100 forced link speeds by + * writing special values in the VSC8234 extended reserved registers + */ + if (phydev->autoneg != AUTONEG_ENABLE && phydev->speed <= SPEED_100) { + ret = genphy_setup_forced(phydev); + + if (ret < 0) /* error */ + return ret; + + return vsc82x4_config_autocross_enable(phydev); + } + + return genphy_config_aneg(phydev); +} + +/* Vitesse 82xx */ static struct phy_driver vsc82xx_driver[] = { { + .phy_id = PHY_ID_VSC8234, + .name = "Vitesse VSC8234", + .phy_id_mask = 0x000ffff0, + .features = PHY_GBIT_FEATURES, + .flags = PHY_HAS_INTERRUPT, + .config_init = &vsc824x_config_init, + .config_aneg = &vsc82x4_config_aneg, + .read_status = &genphy_read_status, + .ack_interrupt = &vsc824x_ack_interrupt, + .config_intr = &vsc82xx_config_intr, + .driver = { .owner = THIS_MODULE,}, +}, { .phy_id = PHY_ID_VSC8244, .name = "Vitesse VSC8244", .phy_id_mask = 0x000fffc0, .features = PHY_GBIT_FEATURES, .flags = PHY_HAS_INTERRUPT, .config_init = &vsc824x_config_init, - .config_aneg = &genphy_config_aneg, + .config_aneg = &vsc82x4_config_aneg, .read_status = &genphy_read_status, .ack_interrupt = &vsc824x_ack_interrupt, .config_intr = &vsc82xx_config_intr, .driver = { .owner = THIS_MODULE,}, }, { + .phy_id = PHY_ID_VSC8574, + .name = "Vitesse VSC8574", + .phy_id_mask = 0x000ffff0, + .features = PHY_GBIT_FEATURES, + .flags = PHY_HAS_INTERRUPT, + .config_init = &vsc824x_config_init, + .config_aneg = &vsc82x4_config_aneg, + .read_status = &genphy_read_status, + .ack_interrupt = &vsc824x_ack_interrupt, + .config_intr = &vsc82xx_config_intr, + .driver = { .owner = THIS_MODULE,}, +}, { + .phy_id = PHY_ID_VSC8662, + .name = "Vitesse VSC8662", + .phy_id_mask = 0x000ffff0, + .features = PHY_GBIT_FEATURES, + .flags = PHY_HAS_INTERRUPT, + .config_init = &vsc824x_config_init, + .config_aneg = &vsc82x4_config_aneg, + .read_status = &genphy_read_status, + .ack_interrupt = &vsc824x_ack_interrupt, + .config_intr = &vsc82xx_config_intr, + .driver = { .owner = THIS_MODULE,}, +}, { /* Vitesse 8221 */ .phy_id = PHY_ID_VSC8221, .phy_id_mask = 0x000ffff0, @@ -207,7 +313,10 @@ module_init(vsc82xx_init); module_exit(vsc82xx_exit); static struct mdio_device_id __maybe_unused vitesse_tbl[] = { + { PHY_ID_VSC8234, 0x000ffff0 }, { PHY_ID_VSC8244, 0x000fffc0 }, + { PHY_ID_VSC8574, 0x000ffff0 }, + { PHY_ID_VSC8662, 0x000ffff0 }, { PHY_ID_VSC8221, 0x000ffff0 }, { PHY_ID_VSC8211, 0x000ffff0 }, { } diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c index 5f66e30..82ee6ed 100644 --- a/drivers/net/ppp/pppoe.c +++ b/drivers/net/ppp/pppoe.c @@ -979,8 +979,6 @@ static int pppoe_recvmsg(struct kiocb *iocb, struct socket *sock, if (error < 0) goto end; - m->msg_namelen = 0; - if (skb) { total_len = min_t(size_t, total_len, skb->len); error = skb_copy_datagram_iovec(skb, 0, m->msg_iov, total_len); diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index f3fce41..5107372 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -24,7 +24,7 @@ #include <linux/ipv6.h> /* Version Information */ -#define DRIVER_VERSION "v1.01.0 (2013/08/12)" +#define DRIVER_VERSION "v1.02.0 (2013/10/28)" #define DRIVER_AUTHOR "Realtek linux nic maintainers <nic_swsd@realtek.com>" #define DRIVER_DESC "Realtek RTL8152 Based USB 2.0 Ethernet Adapters" #define MODULENAME "r8152" @@ -307,22 +307,22 @@ enum rtl8152_flags { #define MCU_TYPE_USB 0x0000 struct rx_desc { - u32 opts1; + __le32 opts1; #define RX_LEN_MASK 0x7fff - u32 opts2; - u32 opts3; - u32 opts4; - u32 opts5; - u32 opts6; + __le32 opts2; + __le32 opts3; + __le32 opts4; + __le32 opts5; + __le32 opts6; }; struct tx_desc { - u32 opts1; + __le32 opts1; #define TX_FS (1 << 31) /* First segment of a packet */ #define TX_LS (1 << 30) /* Final segment of a packet */ #define TX_LEN_MASK 0x3ffff - u32 opts2; + __le32 opts2; #define UDP_CS (1 << 31) /* Calculate UDP/IP checksum */ #define TCP_CS (1 << 30) /* Calculate TCP/IP checksum */ #define IPV4_CS (1 << 29) /* Calculate IPv4 checksum */ @@ -365,6 +365,7 @@ struct r8152 { struct mii_if_info mii; int intr_interval; u32 msg_enable; + u32 tx_qlen; u16 ocp_base; u8 *intr_buff; u8 version; @@ -876,7 +877,7 @@ static void write_bulk_callback(struct urb *urb) static void intr_callback(struct urb *urb) { struct r8152 *tp; - __u16 *d; + __le16 *d; int status = urb->status; int res; @@ -1136,14 +1137,14 @@ r8152_tx_csum(struct r8152 *tp, struct tx_desc *desc, struct sk_buff *skb) static int r8152_tx_agg_fill(struct r8152 *tp, struct tx_agg *agg) { - u32 remain; + int remain; u8 *tx_data; tx_data = agg->head; agg->skb_num = agg->skb_len = 0; - remain = rx_buf_sz - sizeof(struct tx_desc); + remain = rx_buf_sz; - while (remain >= ETH_ZLEN) { + while (remain >= ETH_ZLEN + sizeof(struct tx_desc)) { struct tx_desc *tx_desc; struct sk_buff *skb; unsigned int len; @@ -1152,12 +1153,14 @@ static int r8152_tx_agg_fill(struct r8152 *tp, struct tx_agg *agg) if (!skb) break; + remain -= sizeof(*tx_desc); len = skb->len; if (remain < len) { skb_queue_head(&tp->tx_queue, skb); break; } + tx_data = tx_agg_align(tx_data); tx_desc = (struct tx_desc *)tx_data; tx_data += sizeof(*tx_desc); @@ -1167,11 +1170,18 @@ static int r8152_tx_agg_fill(struct r8152 *tp, struct tx_agg *agg) agg->skb_len += len; dev_kfree_skb_any(skb); - tx_data = tx_agg_align(tx_data + len); - remain = rx_buf_sz - sizeof(*tx_desc) - - (u32)((void *)tx_data - agg->head); + tx_data += len; + remain = rx_buf_sz - (int)(tx_agg_align(tx_data) - agg->head); } + netif_tx_lock(tp->netdev); + + if (netif_queue_stopped(tp->netdev) && + skb_queue_len(&tp->tx_queue) < tp->tx_qlen) + netif_wake_queue(tp->netdev); + + netif_tx_unlock(tp->netdev); + usb_fill_bulk_urb(agg->urb, tp->udev, usb_sndbulkpipe(tp->udev, 2), agg->head, (int)(tx_data - (u8 *)agg->head), (usb_complete_t)write_bulk_callback, agg); @@ -1188,7 +1198,6 @@ static void rx_bottom(struct r8152 *tp) list_for_each_safe(cursor, next, &tp->rx_done) { struct rx_desc *rx_desc; struct rx_agg *agg; - unsigned pkt_len; int len_used = 0; struct urb *urb; u8 *rx_data; @@ -1204,17 +1213,22 @@ static void rx_bottom(struct r8152 *tp) rx_desc = agg->head; rx_data = agg->head; - pkt_len = le32_to_cpu(rx_desc->opts1) & RX_LEN_MASK; - len_used += sizeof(struct rx_desc) + pkt_len; + len_used += sizeof(struct rx_desc); - while (urb->actual_length >= len_used) { + while (urb->actual_length > len_used) { struct net_device *netdev = tp->netdev; struct net_device_stats *stats; + unsigned int pkt_len; struct sk_buff *skb; + pkt_len = le32_to_cpu(rx_desc->opts1) & RX_LEN_MASK; if (pkt_len < ETH_ZLEN) break; + len_used += pkt_len; + if (urb->actual_length < len_used) + break; + stats = rtl8152_get_stats(netdev); pkt_len -= 4; /* CRC */ @@ -1234,9 +1248,8 @@ static void rx_bottom(struct r8152 *tp) rx_data = rx_agg_align(rx_data + pkt_len + 4); rx_desc = (struct rx_desc *)rx_data; - pkt_len = le32_to_cpu(rx_desc->opts1) & RX_LEN_MASK; len_used = (int)(rx_data - (u8 *)agg->head); - len_used += sizeof(struct rx_desc) + pkt_len; + len_used += sizeof(struct rx_desc); } submit: @@ -1384,53 +1397,17 @@ static netdev_tx_t rtl8152_start_xmit(struct sk_buff *skb, struct net_device *netdev) { struct r8152 *tp = netdev_priv(netdev); - struct net_device_stats *stats = rtl8152_get_stats(netdev); - unsigned long flags; - struct tx_agg *agg = NULL; - struct tx_desc *tx_desc; - unsigned int len; - u8 *tx_data; - int res; skb_tx_timestamp(skb); - /* If tx_queue is not empty, it means at least one previous packt */ - /* is waiting for sending. Don't send current one before it. */ - if (skb_queue_empty(&tp->tx_queue)) - agg = r8152_get_tx_agg(tp); - - if (!agg) { - skb_queue_tail(&tp->tx_queue, skb); - return NETDEV_TX_OK; - } + skb_queue_tail(&tp->tx_queue, skb); - tx_desc = (struct tx_desc *)agg->head; - tx_data = agg->head + sizeof(*tx_desc); - agg->skb_num = agg->skb_len = 0; + if (list_empty(&tp->tx_free) && + skb_queue_len(&tp->tx_queue) > tp->tx_qlen) + netif_stop_queue(netdev); - len = skb->len; - r8152_tx_csum(tp, tx_desc, skb); - memcpy(tx_data, skb->data, len); - dev_kfree_skb_any(skb); - agg->skb_num++; - agg->skb_len += len; - usb_fill_bulk_urb(agg->urb, tp->udev, usb_sndbulkpipe(tp->udev, 2), - agg->head, len + sizeof(*tx_desc), - (usb_complete_t)write_bulk_callback, agg); - res = usb_submit_urb(agg->urb, GFP_ATOMIC); - if (res) { - /* Can we get/handle EPIPE here? */ - if (res == -ENODEV) { - netif_device_detach(tp->netdev); - } else { - netif_warn(tp, tx_err, netdev, - "failed tx_urb %d\n", res); - stats->tx_dropped++; - spin_lock_irqsave(&tp->tx_lock, flags); - list_add_tail(&agg->list, &tp->tx_free); - spin_unlock_irqrestore(&tp->tx_lock, flags); - } - } + if (!list_empty(&tp->tx_free)) + tasklet_schedule(&tp->tl); return NETDEV_TX_OK; } @@ -1459,6 +1436,14 @@ static void rtl8152_nic_reset(struct r8152 *tp) } } +static void set_tx_qlen(struct r8152 *tp) +{ + struct net_device *netdev = tp->netdev; + + tp->tx_qlen = rx_buf_sz / (netdev->mtu + VLAN_ETH_HLEN + VLAN_HLEN + + sizeof(struct tx_desc)); +} + static inline u8 rtl8152_get_speed(struct r8152 *tp) { return ocp_read_byte(tp, MCU_TYPE_PLA, PLA_PHYSTATUS); @@ -1470,6 +1455,7 @@ static int rtl8152_enable(struct r8152 *tp) int i, ret; u8 speed; + set_tx_qlen(tp); speed = rtl8152_get_speed(tp); if (speed & _10bps) { ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_EEEP_CR); diff --git a/drivers/net/wireless/ath/ath9k/ar9003_phy.c b/drivers/net/wireless/ath/ath9k/ar9003_phy.c index 11f5358..d39b79f 100644 --- a/drivers/net/wireless/ath/ath9k/ar9003_phy.c +++ b/drivers/net/wireless/ath/ath9k/ar9003_phy.c @@ -701,6 +701,54 @@ static int ar9550_hw_get_modes_txgain_index(struct ath_hw *ah, return ret; } +static void ar9003_doubler_fix(struct ath_hw *ah) +{ + if (AR_SREV_9300(ah) || AR_SREV_9580(ah) || AR_SREV_9550(ah)) { + REG_RMW(ah, AR_PHY_65NM_CH0_RXTX2, + 1 << AR_PHY_65NM_CH0_RXTX2_SYNTHON_MASK_S | + 1 << AR_PHY_65NM_CH0_RXTX2_SYNTHOVR_MASK_S, 0); + REG_RMW(ah, AR_PHY_65NM_CH1_RXTX2, + 1 << AR_PHY_65NM_CH0_RXTX2_SYNTHON_MASK_S | + 1 << AR_PHY_65NM_CH0_RXTX2_SYNTHOVR_MASK_S, 0); + REG_RMW(ah, AR_PHY_65NM_CH2_RXTX2, + 1 << AR_PHY_65NM_CH0_RXTX2_SYNTHON_MASK_S | + 1 << AR_PHY_65NM_CH0_RXTX2_SYNTHOVR_MASK_S, 0); + + udelay(200); + + REG_CLR_BIT(ah, AR_PHY_65NM_CH0_RXTX2, + AR_PHY_65NM_CH0_RXTX2_SYNTHON_MASK); + REG_CLR_BIT(ah, AR_PHY_65NM_CH1_RXTX2, + AR_PHY_65NM_CH0_RXTX2_SYNTHON_MASK); + REG_CLR_BIT(ah, AR_PHY_65NM_CH2_RXTX2, + AR_PHY_65NM_CH0_RXTX2_SYNTHON_MASK); + + udelay(1); + + REG_RMW_FIELD(ah, AR_PHY_65NM_CH0_RXTX2, + AR_PHY_65NM_CH0_RXTX2_SYNTHON_MASK, 1); + REG_RMW_FIELD(ah, AR_PHY_65NM_CH1_RXTX2, + AR_PHY_65NM_CH0_RXTX2_SYNTHON_MASK, 1); + REG_RMW_FIELD(ah, AR_PHY_65NM_CH2_RXTX2, + AR_PHY_65NM_CH0_RXTX2_SYNTHON_MASK, 1); + + udelay(200); + + REG_RMW_FIELD(ah, AR_PHY_65NM_CH0_SYNTH12, + AR_PHY_65NM_CH0_SYNTH12_VREFMUL3, 0xf); + + REG_RMW(ah, AR_PHY_65NM_CH0_RXTX2, 0, + 1 << AR_PHY_65NM_CH0_RXTX2_SYNTHON_MASK_S | + 1 << AR_PHY_65NM_CH0_RXTX2_SYNTHOVR_MASK_S); + REG_RMW(ah, AR_PHY_65NM_CH1_RXTX2, 0, + 1 << AR_PHY_65NM_CH0_RXTX2_SYNTHON_MASK_S | + 1 << AR_PHY_65NM_CH0_RXTX2_SYNTHOVR_MASK_S); + REG_RMW(ah, AR_PHY_65NM_CH2_RXTX2, 0, + 1 << AR_PHY_65NM_CH0_RXTX2_SYNTHON_MASK_S | + 1 << AR_PHY_65NM_CH0_RXTX2_SYNTHOVR_MASK_S); + } +} + static int ar9003_hw_process_ini(struct ath_hw *ah, struct ath9k_channel *chan) { @@ -726,6 +774,8 @@ static int ar9003_hw_process_ini(struct ath_hw *ah, modesIndex); } + ar9003_doubler_fix(ah); + /* * RXGAIN initvals. */ diff --git a/drivers/net/wireless/ath/ath9k/ar9003_phy.h b/drivers/net/wireless/ath/ath9k/ar9003_phy.h index fca6243..2af667b 100644 --- a/drivers/net/wireless/ath/ath9k/ar9003_phy.h +++ b/drivers/net/wireless/ath/ath9k/ar9003_phy.h @@ -656,13 +656,24 @@ #define AR_PHY_SYNTH4_LONG_SHIFT_SELECT ((AR_SREV_9462(ah) || AR_SREV_9565(ah)) ? 0x00000001 : 0x00000002) #define AR_PHY_SYNTH4_LONG_SHIFT_SELECT_S ((AR_SREV_9462(ah) || AR_SREV_9565(ah)) ? 0 : 1) #define AR_PHY_65NM_CH0_SYNTH7 0x16098 +#define AR_PHY_65NM_CH0_SYNTH12 0x160ac #define AR_PHY_65NM_CH0_BIAS1 0x160c0 #define AR_PHY_65NM_CH0_BIAS2 0x160c4 #define AR_PHY_65NM_CH0_BIAS4 0x160cc +#define AR_PHY_65NM_CH0_RXTX2 0x16104 +#define AR_PHY_65NM_CH1_RXTX2 0x16504 +#define AR_PHY_65NM_CH2_RXTX2 0x16904 #define AR_PHY_65NM_CH0_RXTX4 0x1610c #define AR_PHY_65NM_CH1_RXTX4 0x1650c #define AR_PHY_65NM_CH2_RXTX4 0x1690c +#define AR_PHY_65NM_CH0_SYNTH12_VREFMUL3 0x00780000 +#define AR_PHY_65NM_CH0_SYNTH12_VREFMUL3_S 19 +#define AR_PHY_65NM_CH0_RXTX2_SYNTHON_MASK 0x00000004 +#define AR_PHY_65NM_CH0_RXTX2_SYNTHON_MASK_S 2 +#define AR_PHY_65NM_CH0_RXTX2_SYNTHOVR_MASK 0x00000008 +#define AR_PHY_65NM_CH0_RXTX2_SYNTHOVR_MASK_S 3 + #define AR_CH0_TOP (AR_SREV_9300(ah) ? 0x16288 : \ (((AR_SREV_9462(ah) || AR_SREV_9565(ah)) ? 0x1628c : 0x16280))) #define AR_CH0_TOP_XPABIASLVL (AR_SREV_9550(ah) ? 0x3c0 : 0x300) diff --git a/drivers/net/wireless/ath/ath9k/ar9462_2p1_initvals.h b/drivers/net/wireless/ath/ath9k/ar9462_2p1_initvals.h index 4dbc294..57fc5f4 100644 --- a/drivers/net/wireless/ath/ath9k/ar9462_2p1_initvals.h +++ b/drivers/net/wireless/ath/ath9k/ar9462_2p1_initvals.h @@ -361,7 +361,7 @@ static const u32 ar9462_2p1_baseband_postamble[][5] = { {0x00009e14, 0x37b95d5e, 0x37b9605e, 0x3236605e, 0x32365a5e}, {0x00009e18, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, {0x00009e1c, 0x0001cf9c, 0x0001cf9c, 0x00021f9c, 0x00021f9c}, - {0x00009e20, 0x000003b5, 0x000003b5, 0x000003ce, 0x000003ce}, + {0x00009e20, 0x000003a5, 0x000003a5, 0x000003a5, 0x000003a5}, {0x00009e2c, 0x0000001c, 0x0000001c, 0x00000021, 0x00000021}, {0x00009e3c, 0xcf946220, 0xcf946220, 0xcfd5c782, 0xcfd5c282}, {0x00009e44, 0x62321e27, 0x62321e27, 0xfe291e27, 0xfe291e27}, @@ -400,7 +400,7 @@ static const u32 ar9462_2p1_baseband_postamble[][5] = { {0x0000ae04, 0x001c0000, 0x001c0000, 0x001c0000, 0x00100000}, {0x0000ae18, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, {0x0000ae1c, 0x0000019c, 0x0000019c, 0x0000019c, 0x0000019c}, - {0x0000ae20, 0x000001b5, 0x000001b5, 0x000001ce, 0x000001ce}, + {0x0000ae20, 0x000001a6, 0x000001a6, 0x000001aa, 0x000001aa}, {0x0000b284, 0x00000000, 0x00000000, 0x00000550, 0x00000550}, }; @@ -472,7 +472,7 @@ static const u32 ar9462_2p1_radio_postamble[][5] = { static const u32 ar9462_2p1_soc_preamble[][2] = { /* Addr allmodes */ - {0x000040a4, 0x00a0c1c9}, + {0x000040a4, 0x00a0c9c9}, {0x00007020, 0x00000000}, {0x00007034, 0x00000002}, {0x00007038, 0x000004c2}, diff --git a/drivers/net/wireless/ath/regd.c b/drivers/net/wireless/ath/regd.c index c00687e..1217c52 100644 --- a/drivers/net/wireless/ath/regd.c +++ b/drivers/net/wireless/ath/regd.c @@ -362,7 +362,8 @@ static int __ath_reg_dyn_country(struct wiphy *wiphy, { u16 country_code; - if (!ath_is_world_regd(reg)) + if (request->initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE && + !ath_is_world_regd(reg)) return -EINVAL; country_code = ath_regd_find_country_by_name(request->alpha2); diff --git a/drivers/net/wireless/brcm80211/brcmfmac/p2p.c b/drivers/net/wireless/brcm80211/brcmfmac/p2p.c index 5b5b952..4a22930 100644 --- a/drivers/net/wireless/brcm80211/brcmfmac/p2p.c +++ b/drivers/net/wireless/brcm80211/brcmfmac/p2p.c @@ -823,6 +823,7 @@ static s32 brcmf_p2p_run_escan(struct brcmf_cfg80211_info *cfg, } err = brcmf_p2p_escan(p2p, num_nodfs, chanspecs, search_state, action, P2PAPI_BSSCFG_DEVICE); + kfree(chanspecs); } exit: if (err) diff --git a/drivers/net/wireless/mwifiex/cfg80211.c b/drivers/net/wireless/mwifiex/cfg80211.c index fbad00a..aeaea0e 100644 --- a/drivers/net/wireless/mwifiex/cfg80211.c +++ b/drivers/net/wireless/mwifiex/cfg80211.c @@ -2210,8 +2210,10 @@ struct wireless_dev *mwifiex_add_virtual_intf(struct wiphy *wiphy, priv->bss_started = 0; priv->bss_num = 0; - if (mwifiex_cfg80211_init_p2p_client(priv)) - return ERR_PTR(-EFAULT); + if (mwifiex_cfg80211_init_p2p_client(priv)) { + wdev = ERR_PTR(-EFAULT); + goto done; + } break; default: @@ -2224,7 +2226,8 @@ struct wireless_dev *mwifiex_add_virtual_intf(struct wiphy *wiphy, if (!dev) { wiphy_err(wiphy, "no memory available for netdevice\n"); priv->bss_mode = NL80211_IFTYPE_UNSPECIFIED; - return ERR_PTR(-ENOMEM); + wdev = ERR_PTR(-ENOMEM); + goto done; } mwifiex_init_priv_params(priv, dev); @@ -2264,7 +2267,9 @@ struct wireless_dev *mwifiex_add_virtual_intf(struct wiphy *wiphy, wiphy_err(wiphy, "cannot register virtual network device\n"); free_netdev(dev); priv->bss_mode = NL80211_IFTYPE_UNSPECIFIED; - return ERR_PTR(-EFAULT); + priv->netdev = NULL; + wdev = ERR_PTR(-EFAULT); + goto done; } sema_init(&priv->async_sem, 1); @@ -2274,6 +2279,13 @@ struct wireless_dev *mwifiex_add_virtual_intf(struct wiphy *wiphy, #ifdef CONFIG_DEBUG_FS mwifiex_dev_debugfs_init(priv); #endif + +done: + if (IS_ERR(wdev)) { + kfree(priv->wdev); + priv->wdev = NULL; + } + return wdev; } EXPORT_SYMBOL_GPL(mwifiex_add_virtual_intf); @@ -2298,7 +2310,10 @@ int mwifiex_del_virtual_intf(struct wiphy *wiphy, struct wireless_dev *wdev) unregister_netdevice(wdev->netdev); /* Clear the priv in adapter */ + priv->netdev->ieee80211_ptr = NULL; priv->netdev = NULL; + kfree(wdev); + priv->wdev = NULL; priv->media_connected = false; diff --git a/drivers/net/wireless/mwifiex/main.c b/drivers/net/wireless/mwifiex/main.c index 9d7c9d3..78e8a66 100644 --- a/drivers/net/wireless/mwifiex/main.c +++ b/drivers/net/wireless/mwifiex/main.c @@ -411,13 +411,14 @@ static void mwifiex_terminate_workqueue(struct mwifiex_adapter *adapter) */ static void mwifiex_fw_dpc(const struct firmware *firmware, void *context) { - int ret, i; + int ret; char fmt[64]; struct mwifiex_private *priv; struct mwifiex_adapter *adapter = context; struct mwifiex_fw_image fw; struct semaphore *sem = adapter->card_sem; bool init_failed = false; + struct wireless_dev *wdev; if (!firmware) { dev_err(adapter->dev, @@ -469,14 +470,16 @@ static void mwifiex_fw_dpc(const struct firmware *firmware, void *context) priv = adapter->priv[MWIFIEX_BSS_ROLE_STA]; if (mwifiex_register_cfg80211(adapter)) { dev_err(adapter->dev, "cannot register with cfg80211\n"); - goto err_register_cfg80211; + goto err_init_fw; } rtnl_lock(); /* Create station interface by default */ - if (!mwifiex_add_virtual_intf(adapter->wiphy, "mlan%d", - NL80211_IFTYPE_STATION, NULL, NULL)) { + wdev = mwifiex_add_virtual_intf(adapter->wiphy, "mlan%d", + NL80211_IFTYPE_STATION, NULL, NULL); + if (IS_ERR(wdev)) { dev_err(adapter->dev, "cannot create default STA interface\n"); + rtnl_unlock(); goto err_add_intf; } rtnl_unlock(); @@ -486,17 +489,6 @@ static void mwifiex_fw_dpc(const struct firmware *firmware, void *context) goto done; err_add_intf: - for (i = 0; i < adapter->priv_num; i++) { - priv = adapter->priv[i]; - - if (!priv) - continue; - - if (priv->wdev && priv->netdev) - mwifiex_del_virtual_intf(adapter->wiphy, priv->wdev); - } - rtnl_unlock(); -err_register_cfg80211: wiphy_unregister(adapter->wiphy); wiphy_free(adapter->wiphy); err_init_fw: @@ -1006,12 +998,6 @@ int mwifiex_remove_card(struct mwifiex_adapter *adapter, struct semaphore *sem) wiphy_unregister(priv->wdev->wiphy); wiphy_free(priv->wdev->wiphy); - for (i = 0; i < adapter->priv_num; i++) { - priv = adapter->priv[i]; - if (priv) - kfree(priv->wdev); - } - mwifiex_terminate_workqueue(adapter); /* Unregister device */ diff --git a/drivers/net/wireless/mwifiex/pcie.c b/drivers/net/wireless/mwifiex/pcie.c index 33fa943..03688aa 100644 --- a/drivers/net/wireless/mwifiex/pcie.c +++ b/drivers/net/wireless/mwifiex/pcie.c @@ -232,7 +232,6 @@ static void mwifiex_pcie_remove(struct pci_dev *pdev) } mwifiex_remove_card(card->adapter, &add_remove_card_sem); - kfree(card); } static void mwifiex_pcie_shutdown(struct pci_dev *pdev) @@ -2313,6 +2312,7 @@ static void mwifiex_pcie_cleanup(struct mwifiex_adapter *adapter) pci_release_region(pdev, 0); pci_set_drvdata(pdev, NULL); } + kfree(card); } /* diff --git a/drivers/net/wireless/mwifiex/sdio.c b/drivers/net/wireless/mwifiex/sdio.c index 9bf8898..b44a315 100644 --- a/drivers/net/wireless/mwifiex/sdio.c +++ b/drivers/net/wireless/mwifiex/sdio.c @@ -196,7 +196,6 @@ mwifiex_sdio_remove(struct sdio_func *func) } mwifiex_remove_card(card->adapter, &add_remove_card_sem); - kfree(card); } /* @@ -1745,7 +1744,6 @@ mwifiex_unregister_dev(struct mwifiex_adapter *adapter) sdio_claim_host(card->func); sdio_disable_func(card->func); sdio_release_host(card->func); - sdio_set_drvdata(card->func, NULL); } } @@ -1773,7 +1771,6 @@ static int mwifiex_register_dev(struct mwifiex_adapter *adapter) return ret; } - sdio_set_drvdata(func, card); adapter->dev = &func->dev; @@ -1801,6 +1798,8 @@ static int mwifiex_init_sdio(struct mwifiex_adapter *adapter) int ret; u8 sdio_ireg; + sdio_set_drvdata(card->func, card); + /* * Read the HOST_INT_STATUS_REG for ACK the first interrupt got * from the bootloader. If we don't do this we get a interrupt @@ -1883,6 +1882,8 @@ static void mwifiex_cleanup_sdio(struct mwifiex_adapter *adapter) kfree(card->mpa_rx.len_arr); kfree(card->mpa_tx.buf); kfree(card->mpa_rx.buf); + sdio_set_drvdata(card->func, NULL); + kfree(card); } /* diff --git a/drivers/net/wireless/mwifiex/usb.c b/drivers/net/wireless/mwifiex/usb.c index 1c70b8d..edf5b7a 100644 --- a/drivers/net/wireless/mwifiex/usb.c +++ b/drivers/net/wireless/mwifiex/usb.c @@ -350,7 +350,6 @@ static int mwifiex_usb_probe(struct usb_interface *intf, card->udev = udev; card->intf = intf; - usb_card = card; pr_debug("info: bcdUSB=%#x Device Class=%#x SubClass=%#x Protocol=%#x\n", udev->descriptor.bcdUSB, udev->descriptor.bDeviceClass, @@ -525,25 +524,28 @@ static int mwifiex_usb_resume(struct usb_interface *intf) static void mwifiex_usb_disconnect(struct usb_interface *intf) { struct usb_card_rec *card = usb_get_intfdata(intf); - struct mwifiex_adapter *adapter; - if (!card || !card->adapter) { - pr_err("%s: card or card->adapter is NULL\n", __func__); + if (!card) { + pr_err("%s: card is NULL\n", __func__); return; } - adapter = card->adapter; - if (!adapter->priv_num) - return; - mwifiex_usb_free(card); - dev_dbg(adapter->dev, "%s: removing card\n", __func__); - mwifiex_remove_card(adapter, &add_remove_card_sem); + if (card->adapter) { + struct mwifiex_adapter *adapter = card->adapter; + + if (!adapter->priv_num) + return; + + dev_dbg(adapter->dev, "%s: removing card\n", __func__); + mwifiex_remove_card(adapter, &add_remove_card_sem); + } usb_set_intfdata(intf, NULL); usb_put_dev(interface_to_usbdev(intf)); kfree(card); + usb_card = NULL; return; } @@ -754,6 +756,7 @@ static int mwifiex_register_dev(struct mwifiex_adapter *adapter) card->adapter = adapter; adapter->dev = &card->udev->dev; strcpy(adapter->fw_name, USB8797_DEFAULT_FW_NAME); + usb_card = card; return 0; } @@ -762,7 +765,7 @@ static void mwifiex_unregister_dev(struct mwifiex_adapter *adapter) { struct usb_card_rec *card = (struct usb_card_rec *)adapter->card; - usb_set_intfdata(card->intf, NULL); + card->adapter = NULL; } static int mwifiex_prog_fw_w_helper(struct mwifiex_adapter *adapter, @@ -1004,7 +1007,7 @@ static void mwifiex_usb_cleanup_module(void) if (!down_interruptible(&add_remove_card_sem)) up(&add_remove_card_sem); - if (usb_card) { + if (usb_card && usb_card->adapter) { struct mwifiex_adapter *adapter = usb_card->adapter; int i; diff --git a/drivers/net/wireless/rt2x00/rt2x00dev.c b/drivers/net/wireless/rt2x00/rt2x00dev.c index 080b1fc..9dd92a7 100644 --- a/drivers/net/wireless/rt2x00/rt2x00dev.c +++ b/drivers/net/wireless/rt2x00/rt2x00dev.c @@ -181,6 +181,7 @@ static void rt2x00lib_autowakeup(struct work_struct *work) static void rt2x00lib_bc_buffer_iter(void *data, u8 *mac, struct ieee80211_vif *vif) { + struct ieee80211_tx_control control = {}; struct rt2x00_dev *rt2x00dev = data; struct sk_buff *skb; @@ -195,7 +196,7 @@ static void rt2x00lib_bc_buffer_iter(void *data, u8 *mac, */ skb = ieee80211_get_buffered_bc(rt2x00dev->hw, vif); while (skb) { - rt2x00mac_tx(rt2x00dev->hw, NULL, skb); + rt2x00mac_tx(rt2x00dev->hw, &control, skb); skb = ieee80211_get_buffered_bc(rt2x00dev->hw, vif); } } diff --git a/drivers/net/wireless/rtlwifi/rtl8192cu/mac.c b/drivers/net/wireless/rtlwifi/rtl8192cu/mac.c index 3936853..e26312f 100644 --- a/drivers/net/wireless/rtlwifi/rtl8192cu/mac.c +++ b/drivers/net/wireless/rtlwifi/rtl8192cu/mac.c @@ -769,7 +769,7 @@ static long _rtl92c_signal_scale_mapping(struct ieee80211_hw *hw, static void _rtl92c_query_rxphystatus(struct ieee80211_hw *hw, struct rtl_stats *pstats, - struct rx_desc_92c *pdesc, + struct rx_desc_92c *p_desc, struct rx_fwinfo_92c *p_drvinfo, bool packet_match_bssid, bool packet_toself, @@ -784,11 +784,11 @@ static void _rtl92c_query_rxphystatus(struct ieee80211_hw *hw, u32 rssi, total_rssi = 0; bool in_powersavemode = false; bool is_cck_rate; + u8 *pdesc = (u8 *)p_desc; - is_cck_rate = RX_HAL_IS_CCK_RATE(pdesc); + is_cck_rate = RX_HAL_IS_CCK_RATE(p_desc); pstats->packet_matchbssid = packet_match_bssid; pstats->packet_toself = packet_toself; - pstats->is_cck = is_cck_rate; pstats->packet_beacon = packet_beacon; pstats->is_cck = is_cck_rate; pstats->RX_SIGQ[0] = -1; diff --git a/drivers/net/wireless/rtlwifi/rtl8192cu/trx.c b/drivers/net/wireless/rtlwifi/rtl8192cu/trx.c index b0c346a..1bc21cc 100644 --- a/drivers/net/wireless/rtlwifi/rtl8192cu/trx.c +++ b/drivers/net/wireless/rtlwifi/rtl8192cu/trx.c @@ -303,10 +303,10 @@ out: bool rtl92cu_rx_query_desc(struct ieee80211_hw *hw, struct rtl_stats *stats, struct ieee80211_rx_status *rx_status, - u8 *p_desc, struct sk_buff *skb) + u8 *pdesc, struct sk_buff *skb) { struct rx_fwinfo_92c *p_drvinfo; - struct rx_desc_92c *pdesc = (struct rx_desc_92c *)p_desc; + struct rx_desc_92c *p_desc = (struct rx_desc_92c *)pdesc; u32 phystatus = GET_RX_DESC_PHY_STATUS(pdesc); stats->length = (u16) GET_RX_DESC_PKT_LEN(pdesc); @@ -345,7 +345,7 @@ bool rtl92cu_rx_query_desc(struct ieee80211_hw *hw, if (phystatus) { p_drvinfo = (struct rx_fwinfo_92c *)(skb->data + stats->rx_bufshift); - rtl92c_translate_rx_signal_stuff(hw, skb, stats, pdesc, + rtl92c_translate_rx_signal_stuff(hw, skb, stats, p_desc, p_drvinfo); } /*rx_status->qual = stats->signal; */ diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index b78ee10..2329ccc 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -461,6 +461,9 @@ void xenvif_disconnect(struct xenvif *vif) if (netif_carrier_ok(vif->dev)) xenvif_carrier_off(vif); + if (vif->task) + kthread_stop(vif->task); + if (vif->tx_irq) { if (vif->tx_irq == vif->rx_irq) unbind_from_irqhandler(vif->tx_irq, vif); @@ -471,9 +474,6 @@ void xenvif_disconnect(struct xenvif *vif) vif->tx_irq = 0; } - if (vif->task) - kthread_stop(vif->task); - xenvif_unmap_frontend_rings(vif); } diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c index 95655d7..e52d7ff 100644 --- a/drivers/pci/ats.c +++ b/drivers/pci/ats.c @@ -410,7 +410,7 @@ EXPORT_SYMBOL_GPL(pci_disable_pasid); * Otherwise is returns a bitmask with supported features. Current * features reported are: * PCI_PASID_CAP_EXEC - Execute permission supported - * PCI_PASID_CAP_PRIV - Priviledged mode supported + * PCI_PASID_CAP_PRIV - Privileged mode supported */ int pci_pasid_features(struct pci_dev *pdev) { diff --git a/drivers/pci/host/pci-tegra.c b/drivers/pci/host/pci-tegra.c index 7c4f38d..0afbbbc 100644 --- a/drivers/pci/host/pci-tegra.c +++ b/drivers/pci/host/pci-tegra.c @@ -249,7 +249,7 @@ struct tegra_pcie { void __iomem *afi; int irq; - struct list_head busses; + struct list_head buses; struct resource *cs; struct resource io; @@ -399,14 +399,14 @@ free: /* * Look up a virtual address mapping for the specified bus number. If no such - * mapping existis, try to create one. + * mapping exists, try to create one. */ static void __iomem *tegra_pcie_bus_map(struct tegra_pcie *pcie, unsigned int busnr) { struct tegra_pcie_bus *bus; - list_for_each_entry(bus, &pcie->busses, list) + list_for_each_entry(bus, &pcie->buses, list) if (bus->nr == busnr) return (void __iomem *)bus->area->addr; @@ -414,7 +414,7 @@ static void __iomem *tegra_pcie_bus_map(struct tegra_pcie *pcie, if (IS_ERR(bus)) return NULL; - list_add_tail(&bus->list, &pcie->busses); + list_add_tail(&bus->list, &pcie->buses); return (void __iomem *)bus->area->addr; } @@ -808,7 +808,7 @@ static int tegra_pcie_enable_controller(struct tegra_pcie *pcie) value &= ~AFI_FUSE_PCIE_T0_GEN2_DIS; afi_writel(pcie, value, AFI_FUSE); - /* initialze internal PHY, enable up to 16 PCIE lanes */ + /* initialize internal PHY, enable up to 16 PCIE lanes */ pads_writel(pcie, 0x0, PADS_CTL_SEL); /* override IDDQ to 1 on all 4 lanes */ @@ -1624,7 +1624,7 @@ static int tegra_pcie_probe(struct platform_device *pdev) if (!pcie) return -ENOMEM; - INIT_LIST_HEAD(&pcie->busses); + INIT_LIST_HEAD(&pcie->buses); INIT_LIST_HEAD(&pcie->ports); pcie->soc_data = match->data; pcie->dev = &pdev->dev; diff --git a/drivers/pci/host/pcie-designware.c b/drivers/pci/host/pcie-designware.c index 1e1fea4..e33b68b 100644 --- a/drivers/pci/host/pcie-designware.c +++ b/drivers/pci/host/pcie-designware.c @@ -197,7 +197,7 @@ static int find_valid_pos0(struct pcie_port *pp, int msgvec, int pos, int *pos0) return -ENOSPC; /* * Check if this position is at correct offset.nvec is always a - * power of two. pos0 must be nvec bit alligned. + * power of two. pos0 must be nvec bit aligned. */ if (pos % msgvec) pos += msgvec - (pos % msgvec); diff --git a/drivers/pci/hotplug/Kconfig b/drivers/pci/hotplug/Kconfig index 0a648af..df8caec 100644 --- a/drivers/pci/hotplug/Kconfig +++ b/drivers/pci/hotplug/Kconfig @@ -133,8 +133,8 @@ config HOTPLUG_PCI_RPA_DLPAR To compile this driver as a module, choose M here: the module will be called rpadlpar_io. - - When in doubt, say N. + + When in doubt, say N. config HOTPLUG_PCI_SGI tristate "SGI PCI Hotplug Support" diff --git a/drivers/pci/hotplug/Makefile b/drivers/pci/hotplug/Makefile index 47ec8c8..3e6532b 100644 --- a/drivers/pci/hotplug/Makefile +++ b/drivers/pci/hotplug/Makefile @@ -31,7 +31,7 @@ pci_hotplug-objs += cpci_hotplug_core.o \ cpci_hotplug_pci.o endif ifdef CONFIG_ACPI -pci_hotplug-objs += acpi_pcihp.o +pci_hotplug-objs += acpi_pcihp.o endif cpqphp-objs := cpqphp_core.o \ diff --git a/drivers/pci/hotplug/acpiphp_core.c b/drivers/pci/hotplug/acpiphp_core.c index 8650d39..dca66bc 100644 --- a/drivers/pci/hotplug/acpiphp_core.c +++ b/drivers/pci/hotplug/acpiphp_core.c @@ -111,7 +111,7 @@ int acpiphp_register_attention(struct acpiphp_attention_info *info) * @info: must match the pointer used to register * * Description: This is used to un-register a hardware specific acpi - * driver that manipulates the attention LED. The pointer to the + * driver that manipulates the attention LED. The pointer to the * info struct must be the same as the one used to set it. */ int acpiphp_unregister_attention(struct acpiphp_attention_info *info) @@ -169,8 +169,8 @@ static int disable_slot(struct hotplug_slot *hotplug_slot) * was registered with us. This allows hardware specific * ACPI implementations to blink the light for us. */ - static int set_attention_status(struct hotplug_slot *hotplug_slot, u8 status) - { +static int set_attention_status(struct hotplug_slot *hotplug_slot, u8 status) +{ int retval = -ENODEV; pr_debug("%s - physical_slot = %s\n", __func__, @@ -182,8 +182,8 @@ static int disable_slot(struct hotplug_slot *hotplug_slot) } else attention_info = NULL; return retval; - } - +} + /** * get_power_status - get power status of a slot @@ -323,7 +323,7 @@ int acpiphp_register_hotplug_slot(struct acpiphp_slot *acpiphp_slot, if (retval) { pr_err("pci_hp_register failed with error %d\n", retval); goto error_hpslot; - } + } pr_info("Slot [%s] registered\n", slot_name(slot)); diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c index 5b4e9eb0..1cf605f 100644 --- a/drivers/pci/hotplug/acpiphp_glue.c +++ b/drivers/pci/hotplug/acpiphp_glue.c @@ -325,7 +325,7 @@ static acpi_status register_slot(acpi_handle handle, u32 lvl, void *data, list_add_tail(&slot->node, &bridge->slots); - /* Register slots for ejectable funtions only. */ + /* Register slots for ejectable functions only. */ if (acpi_pci_check_ejectable(pbus, handle) || is_dock_device(handle)) { unsigned long long sun; int retval; diff --git a/drivers/pci/hotplug/acpiphp_ibm.c b/drivers/pci/hotplug/acpiphp_ibm.c index 0d64c41..ecfac7e 100644 --- a/drivers/pci/hotplug/acpiphp_ibm.c +++ b/drivers/pci/hotplug/acpiphp_ibm.c @@ -116,7 +116,7 @@ static struct bin_attribute ibm_apci_table_attr = { .read = ibm_read_apci_table, .write = NULL, }; -static struct acpiphp_attention_info ibm_attention_info = +static struct acpiphp_attention_info ibm_attention_info = { .set_attn = ibm_set_attention_status, .get_attn = ibm_get_attention_status, @@ -171,9 +171,9 @@ ibm_slot_done: */ static int ibm_set_attention_status(struct hotplug_slot *slot, u8 status) { - union acpi_object args[2]; + union acpi_object args[2]; struct acpi_object_list params = { .pointer = args, .count = 2 }; - acpi_status stat; + acpi_status stat; unsigned long long rc; union apci_descriptor *ibm_slot; @@ -208,7 +208,7 @@ static int ibm_set_attention_status(struct hotplug_slot *slot, u8 status) * * Description: This method is registered with the acpiphp module as a * callback to do the device specific task of getting the LED status. - * + * * Because there is no direct method of getting the LED status directly * from an ACPI call, we read the aPCI table and parse out our * slot descriptor to read the status from that. @@ -259,7 +259,7 @@ static void ibm_handle_events(acpi_handle handle, u32 event, void *context) pr_debug("%s: Received notification %02x\n", __func__, event); if (subevent == 0x80) { - pr_debug("%s: generationg bus event\n", __func__); + pr_debug("%s: generating bus event\n", __func__); acpi_bus_generate_netlink_event(note->device->pnp.device_class, dev_name(¬e->device->dev), note->event, detail); @@ -387,7 +387,7 @@ static acpi_status __init ibm_find_acpi_device(acpi_handle handle, u32 lvl, void *context, void **rv) { acpi_handle *phandle = (acpi_handle *)context; - acpi_status status; + acpi_status status; struct acpi_device_info *info; int retval = 0; @@ -405,7 +405,7 @@ static acpi_status __init ibm_find_acpi_device(acpi_handle handle, info->hardware_id.string, handle); *phandle = handle; /* returning non-zero causes the search to stop - * and returns this value to the caller of + * and returns this value to the caller of * acpi_walk_namespace, but it also causes some warnings * in the acpi debug code to print... */ diff --git a/drivers/pci/hotplug/cpci_hotplug_core.c b/drivers/pci/hotplug/cpci_hotplug_core.c index 2b4c412..00c81a3 100644 --- a/drivers/pci/hotplug/cpci_hotplug_core.c +++ b/drivers/pci/hotplug/cpci_hotplug_core.c @@ -46,7 +46,7 @@ do { \ if (cpci_debug) \ printk (KERN_DEBUG "%s: " format "\n", \ - MY_NAME , ## arg); \ + MY_NAME , ## arg); \ } while (0) #define err(format, arg...) printk(KERN_ERR "%s: " format "\n", MY_NAME , ## arg) #define info(format, arg...) printk(KERN_INFO "%s: " format "\n", MY_NAME , ## arg) diff --git a/drivers/pci/hotplug/cpci_hotplug_pci.c b/drivers/pci/hotplug/cpci_hotplug_pci.c index d8add34..d3add98 100644 --- a/drivers/pci/hotplug/cpci_hotplug_pci.c +++ b/drivers/pci/hotplug/cpci_hotplug_pci.c @@ -39,7 +39,7 @@ extern int cpci_debug; do { \ if (cpci_debug) \ printk (KERN_DEBUG "%s: " format "\n", \ - MY_NAME , ## arg); \ + MY_NAME , ## arg); \ } while (0) #define err(format, arg...) printk(KERN_ERR "%s: " format "\n", MY_NAME , ## arg) #define info(format, arg...) printk(KERN_INFO "%s: " format "\n", MY_NAME , ## arg) diff --git a/drivers/pci/hotplug/cpcihp_generic.c b/drivers/pci/hotplug/cpcihp_generic.c index a6a71c4..7536eef 100644 --- a/drivers/pci/hotplug/cpcihp_generic.c +++ b/drivers/pci/hotplug/cpcihp_generic.c @@ -13,14 +13,14 @@ * option) any later version. * * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, - * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY - * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * You should have received a copy of the GNU General Public License along @@ -53,9 +53,9 @@ #define dbg(format, arg...) \ do { \ - if(debug) \ + if (debug) \ printk (KERN_DEBUG "%s: " format "\n", \ - MY_NAME , ## arg); \ + MY_NAME , ## arg); \ } while(0) #define err(format, arg...) printk(KERN_ERR "%s: " format "\n", MY_NAME , ## arg) #define info(format, arg...) printk(KERN_INFO "%s: " format "\n", MY_NAME , ## arg) diff --git a/drivers/pci/hotplug/cpcihp_zt5550.c b/drivers/pci/hotplug/cpcihp_zt5550.c index 449b4bb..e8c4a7c 100644 --- a/drivers/pci/hotplug/cpcihp_zt5550.c +++ b/drivers/pci/hotplug/cpcihp_zt5550.c @@ -13,14 +13,14 @@ * option) any later version. * * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, - * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY - * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * You should have received a copy of the GNU General Public License along @@ -48,9 +48,9 @@ #define dbg(format, arg...) \ do { \ - if(debug) \ + if (debug) \ printk (KERN_DEBUG "%s: " format "\n", \ - MY_NAME , ## arg); \ + MY_NAME , ## arg); \ } while(0) #define err(format, arg...) printk(KERN_ERR "%s: " format "\n", MY_NAME , ## arg) #define info(format, arg...) printk(KERN_INFO "%s: " format "\n", MY_NAME , ## arg) @@ -285,7 +285,7 @@ static struct pci_device_id zt5550_hc_pci_tbl[] = { { 0, } }; MODULE_DEVICE_TABLE(pci, zt5550_hc_pci_tbl); - + static struct pci_driver zt5550_hc_driver = { .name = "zt5550_hc", .id_table = zt5550_hc_pci_tbl, diff --git a/drivers/pci/hotplug/cpcihp_zt5550.h b/drivers/pci/hotplug/cpcihp_zt5550.h index bebc606..9a57fda 100644 --- a/drivers/pci/hotplug/cpcihp_zt5550.h +++ b/drivers/pci/hotplug/cpcihp_zt5550.h @@ -13,14 +13,14 @@ * option) any later version. * * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, - * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY - * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * You should have received a copy of the GNU General Public License along @@ -55,7 +55,7 @@ #define HC_CMD_REG 0x0C #define ARB_CONFIG_GNT_REG 0x10 #define ARB_CONFIG_CFG_REG 0x12 -#define ARB_CONFIG_REG 0x10 +#define ARB_CONFIG_REG 0x10 #define ISOL_CONFIG_REG 0x18 #define FAULT_STATUS_REG 0x20 #define FAULT_CONFIG_REG 0x24 diff --git a/drivers/pci/hotplug/cpqphp_core.c b/drivers/pci/hotplug/cpqphp_core.c index c8eaeb4..31273e1 100644 --- a/drivers/pci/hotplug/cpqphp_core.c +++ b/drivers/pci/hotplug/cpqphp_core.c @@ -862,10 +862,10 @@ static int cpqhpc_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_disable_device; } - /* Check for the proper subsystem ID's + /* Check for the proper subsystem IDs * Intel uses a different SSID programming model than Compaq. * For Intel, each SSID bit identifies a PHP capability. - * Also Intel HPC's may have RID=0. + * Also Intel HPCs may have RID=0. */ if ((pdev->revision <= 2) && (vendor_id != PCI_VENDOR_ID_INTEL)) { err(msg_HPC_not_supported); diff --git a/drivers/pci/hotplug/cpqphp_ctrl.c b/drivers/pci/hotplug/cpqphp_ctrl.c index d282019..11845b7 100644 --- a/drivers/pci/hotplug/cpqphp_ctrl.c +++ b/drivers/pci/hotplug/cpqphp_ctrl.c @@ -1231,7 +1231,7 @@ static u8 set_controller_speed(struct controller *ctrl, u8 adapter_speed, u8 hp_ /* Only if mode change...*/ if (((bus->cur_bus_speed == PCI_SPEED_66MHz) && (adapter_speed == PCI_SPEED_66MHz_PCIX)) || - ((bus->cur_bus_speed == PCI_SPEED_66MHz_PCIX) && (adapter_speed == PCI_SPEED_66MHz))) + ((bus->cur_bus_speed == PCI_SPEED_66MHz_PCIX) && (adapter_speed == PCI_SPEED_66MHz))) set_SOGO(ctrl); wait_for_ctrl_irq(ctrl); @@ -1828,7 +1828,7 @@ static void interrupt_event_handler(struct controller *ctrl) if (ctrl->event_queue[loop].event_type == INT_BUTTON_PRESS) { dbg("button pressed\n"); - } else if (ctrl->event_queue[loop].event_type == + } else if (ctrl->event_queue[loop].event_type == INT_BUTTON_CANCEL) { dbg("button cancel\n"); del_timer(&p_slot->task_event); @@ -2411,11 +2411,11 @@ static int configure_new_function(struct controller *ctrl, struct pci_func *func if (rc) return rc; - /* find range of busses to use */ + /* find range of buses to use */ dbg("find ranges of buses to use\n"); bus_node = get_max_resource(&(resources->bus_head), 1); - /* If we don't have any busses to allocate, we can't continue */ + /* If we don't have any buses to allocate, we can't continue */ if (!bus_node) return -ENOMEM; @@ -2900,7 +2900,7 @@ static int configure_new_function(struct controller *ctrl, struct pci_func *func /* If this function needs an interrupt and we are behind * a bridge and the pin is tied to something that's - * alread mapped, set this one the same */ + * already mapped, set this one the same */ if (temp_byte && resources->irqs && (resources->irqs->valid_INT & (0x01 << ((temp_byte + resources->irqs->barber_pole - 1) & 0x03)))) { diff --git a/drivers/pci/hotplug/cpqphp_pci.c b/drivers/pci/hotplug/cpqphp_pci.c index 09801c6..6e4a12c 100644 --- a/drivers/pci/hotplug/cpqphp_pci.c +++ b/drivers/pci/hotplug/cpqphp_pci.c @@ -291,7 +291,7 @@ int cpqhp_get_bus_dev (struct controller *ctrl, u8 * bus_num, u8 * dev_num, u8 s * * Reads configuration for all slots in a PCI bus and saves info. * - * Note: For non-hot plug busses, the slot # saved is the device # + * Note: For non-hot plug buses, the slot # saved is the device # * * returns 0 if success */ @@ -455,7 +455,7 @@ int cpqhp_save_config(struct controller *ctrl, int busnumber, int is_hot_plug) * cpqhp_save_slot_config * * Saves configuration info for all PCI devices in a given slot - * including subordinate busses. + * including subordinate buses. * * returns 0 if success */ @@ -1556,4 +1556,3 @@ void cpqhp_destroy_board_resources (struct pci_func * func) kfree(tres); } } - diff --git a/drivers/pci/hotplug/ibmphp.h b/drivers/pci/hotplug/ibmphp.h index 8c5b258..e3e46a7 100644 --- a/drivers/pci/hotplug/ibmphp.h +++ b/drivers/pci/hotplug/ibmphp.h @@ -59,7 +59,7 @@ extern int ibmphp_debug; /************************************************************ -* RESOURE TYPE * +* RESOURCE TYPE * ************************************************************/ #define EBDA_RSRC_TYPE_MASK 0x03 @@ -103,7 +103,7 @@ extern int ibmphp_debug; //-------------------------------------------------------------- struct rio_table_hdr { - u8 ver_num; + u8 ver_num; u8 scal_count; u8 riodev_count; u16 offset; @@ -127,7 +127,7 @@ struct scal_detail { }; //-------------------------------------------------------------- -// RIO DETAIL +// RIO DETAIL //-------------------------------------------------------------- struct rio_detail { @@ -152,7 +152,7 @@ struct opt_rio { u8 first_slot_num; u8 middle_num; struct list_head opt_rio_list; -}; +}; struct opt_rio_lo { u8 rio_type; @@ -161,7 +161,7 @@ struct opt_rio_lo { u8 middle_num; u8 pack_count; struct list_head opt_rio_lo_list; -}; +}; /**************************************************************** * HPC DESCRIPTOR NODE * @@ -574,7 +574,7 @@ void ibmphp_hpc_stop_poll_thread(void); #define HPC_CTLR_IRQ_PENDG 0x80 //---------------------------------------------------------------------------- -// HPC_CTLR_WROKING status return codes +// HPC_CTLR_WORKING status return codes //---------------------------------------------------------------------------- #define HPC_CTLR_WORKING_NO 0x00 #define HPC_CTLR_WORKING_YES 0x01 diff --git a/drivers/pci/hotplug/ibmphp_core.c b/drivers/pci/hotplug/ibmphp_core.c index cbd72d8..efdc13a 100644 --- a/drivers/pci/hotplug/ibmphp_core.c +++ b/drivers/pci/hotplug/ibmphp_core.c @@ -58,7 +58,7 @@ MODULE_DESCRIPTION (DRIVER_DESC); struct pci_bus *ibmphp_pci_bus; static int max_slots; -static int irqs[16]; /* PIC mode IRQ's we're using so far (in case MPS +static int irqs[16]; /* PIC mode IRQs we're using so far (in case MPS * tables don't provide default info for empty slots */ static int init_flag; @@ -71,20 +71,20 @@ static inline int get_max_adapter_speed (struct hotplug_slot *hs, u8 *value) return get_max_adapter_speed_1 (hs, value, 1); } */ -static inline int get_cur_bus_info(struct slot **sl) +static inline int get_cur_bus_info(struct slot **sl) { int rc = 1; struct slot * slot_cur = *sl; debug("options = %x\n", slot_cur->ctrl->options); - debug("revision = %x\n", slot_cur->ctrl->revision); + debug("revision = %x\n", slot_cur->ctrl->revision); - if (READ_BUS_STATUS(slot_cur->ctrl)) + if (READ_BUS_STATUS(slot_cur->ctrl)) rc = ibmphp_hpc_readslot(slot_cur, READ_BUSSTATUS, NULL); - - if (rc) + + if (rc) return rc; - + slot_cur->bus_on->current_speed = CURRENT_BUS_SPEED(slot_cur->busstatus); if (READ_BUS_MODE(slot_cur->ctrl)) slot_cur->bus_on->current_bus_mode = @@ -96,7 +96,7 @@ static inline int get_cur_bus_info(struct slot **sl) slot_cur->busstatus, slot_cur->bus_on->current_speed, slot_cur->bus_on->current_bus_mode); - + *sl = slot_cur; return 0; } @@ -104,8 +104,8 @@ static inline int get_cur_bus_info(struct slot **sl) static inline int slot_update(struct slot **sl) { int rc; - rc = ibmphp_hpc_readslot(*sl, READ_ALLSTAT, NULL); - if (rc) + rc = ibmphp_hpc_readslot(*sl, READ_ALLSTAT, NULL); + if (rc) return rc; if (!init_flag) rc = get_cur_bus_info(sl); @@ -172,7 +172,7 @@ int ibmphp_init_devno(struct slot **cur_slot) debug("(*cur_slot)->irq[3] = %x\n", (*cur_slot)->irq[3]); - debug("rtable->exlusive_irqs = %x\n", + debug("rtable->exclusive_irqs = %x\n", rtable->exclusive_irqs); debug("rtable->slots[loop].irq[0].bitmap = %x\n", rtable->slots[loop].irq[0].bitmap); @@ -271,7 +271,7 @@ static int set_attention_status(struct hotplug_slot *hotplug_slot, u8 value) else rc = -ENODEV; } - } else + } else rc = -ENODEV; ibmphp_unlock_operations(); @@ -288,7 +288,7 @@ static int get_attention_status(struct hotplug_slot *hotplug_slot, u8 * value) debug("get_attention_status - Entry hotplug_slot[%lx] pvalue[%lx]\n", (ulong) hotplug_slot, (ulong) value); - + ibmphp_lock_operations(); if (hotplug_slot) { pslot = hotplug_slot->private; @@ -406,14 +406,14 @@ static int get_max_bus_speed(struct slot *slot) ibmphp_lock_operations(); mode = slot->supported_bus_mode; - speed = slot->supported_speed; + speed = slot->supported_speed; ibmphp_unlock_operations(); switch (speed) { case BUS_SPEED_33: break; case BUS_SPEED_66: - if (mode == BUS_MODE_PCIX) + if (mode == BUS_MODE_PCIX) speed += 0x01; break; case BUS_SPEED_100: @@ -515,13 +515,13 @@ static int __init init_ops(void) debug("BEFORE GETTING SLOT STATUS, slot # %x\n", slot_cur->number); - if (slot_cur->ctrl->revision == 0xFF) + if (slot_cur->ctrl->revision == 0xFF) if (get_ctrl_revision(slot_cur, &slot_cur->ctrl->revision)) return -1; - if (slot_cur->bus_on->current_speed == 0xFF) - if (get_cur_bus_info(&slot_cur)) + if (slot_cur->bus_on->current_speed == 0xFF) + if (get_cur_bus_info(&slot_cur)) return -1; get_max_bus_speed(slot_cur); @@ -539,8 +539,8 @@ static int __init init_ops(void) debug("SLOT_PRESENT = %x\n", SLOT_PRESENT(slot_cur->status)); debug("SLOT_LATCH = %x\n", SLOT_LATCH(slot_cur->status)); - if ((SLOT_PWRGD(slot_cur->status)) && - !(SLOT_PRESENT(slot_cur->status)) && + if ((SLOT_PWRGD(slot_cur->status)) && + !(SLOT_PRESENT(slot_cur->status)) && !(SLOT_LATCH(slot_cur->status))) { debug("BEFORE POWER OFF COMMAND\n"); rc = power_off(slot_cur); @@ -581,13 +581,13 @@ static int validate(struct slot *slot_cur, int opn) switch (opn) { case ENABLE: - if (!(SLOT_PWRGD(slot_cur->status)) && - (SLOT_PRESENT(slot_cur->status)) && + if (!(SLOT_PWRGD(slot_cur->status)) && + (SLOT_PRESENT(slot_cur->status)) && !(SLOT_LATCH(slot_cur->status))) return 0; break; case DISABLE: - if ((SLOT_PWRGD(slot_cur->status)) && + if ((SLOT_PWRGD(slot_cur->status)) && (SLOT_PRESENT(slot_cur->status)) && !(SLOT_LATCH(slot_cur->status))) return 0; @@ -617,7 +617,7 @@ int ibmphp_update_slot_info(struct slot *slot_cur) err("out of system memory\n"); return -ENOMEM; } - + info->power_status = SLOT_PWRGD(slot_cur->status); info->attention_status = SLOT_ATTN(slot_cur->status, slot_cur->ext_status); @@ -638,7 +638,7 @@ int ibmphp_update_slot_info(struct slot *slot_cur) case BUS_SPEED_33: break; case BUS_SPEED_66: - if (mode == BUS_MODE_PCIX) + if (mode == BUS_MODE_PCIX) bus_speed += 0x01; else if (mode == BUS_MODE_PCI) ; @@ -654,8 +654,8 @@ int ibmphp_update_slot_info(struct slot *slot_cur) } bus->cur_bus_speed = bus_speed; - // To do: bus_names - + // To do: bus_names + rc = pci_hp_change_slot_info(slot_cur->hotplug_slot, info); kfree(info); return rc; @@ -729,8 +729,8 @@ static void ibm_unconfigure_device(struct pci_func *func) } /* - * The following function is to fix kernel bug regarding - * getting bus entries, here we manually add those primary + * The following function is to fix kernel bug regarding + * getting bus entries, here we manually add those primary * bus entries to kernel bus structure whenever apply */ static u8 bus_structure_fixup(u8 busno) @@ -814,7 +814,7 @@ static int ibm_configure_device(struct pci_func *func) } /******************************************************* - * Returns whether the bus is empty or not + * Returns whether the bus is empty or not *******************************************************/ static int is_bus_empty(struct slot * slot_cur) { @@ -842,7 +842,7 @@ static int is_bus_empty(struct slot * slot_cur) } /*********************************************************** - * If the HPC permits and the bus currently empty, tries to set the + * If the HPC permits and the bus currently empty, tries to set the * bus speed and mode at the maximum card and bus capability * Parameters: slot * Returns: bus is set (0) or error code @@ -856,7 +856,7 @@ static int set_bus(struct slot * slot_cur) static struct pci_device_id ciobx[] = { { PCI_DEVICE(PCI_VENDOR_ID_SERVERWORKS, 0x0101) }, { }, - }; + }; debug("%s - entry slot # %d\n", __func__, slot_cur->number); if (SET_BUS_STATUS(slot_cur->ctrl) && is_bus_empty(slot_cur)) { @@ -877,7 +877,7 @@ static int set_bus(struct slot * slot_cur) else if (!SLOT_BUS_MODE(slot_cur->ext_status)) /* if max slot/bus capability is 66 pci and there's no bus mode mismatch, then - the adapter supports 66 pci */ + the adapter supports 66 pci */ cmd = HPC_BUS_66CONVMODE; else cmd = HPC_BUS_33CONVMODE; @@ -930,7 +930,7 @@ static int set_bus(struct slot * slot_cur) return -EIO; } } - /* This is for x440, once Brandon fixes the firmware, + /* This is for x440, once Brandon fixes the firmware, will not need this delay */ msleep(1000); debug("%s -Exit\n", __func__); @@ -938,9 +938,9 @@ static int set_bus(struct slot * slot_cur) } /* This routine checks the bus limitations that the slot is on from the BIOS. - * This is used in deciding whether or not to power up the slot. + * This is used in deciding whether or not to power up the slot. * (electrical/spec limitations. For example, >1 133 MHz or >2 66 PCI cards on - * same bus) + * same bus) * Parameters: slot * Returns: 0 = no limitations, -EINVAL = exceeded limitations on the bus */ @@ -986,7 +986,7 @@ static int check_limitations(struct slot *slot_cur) static inline void print_card_capability(struct slot *slot_cur) { info("capability of the card is "); - if ((slot_cur->ext_status & CARD_INFO) == PCIX133) + if ((slot_cur->ext_status & CARD_INFO) == PCIX133) info(" 133 MHz PCI-X\n"); else if ((slot_cur->ext_status & CARD_INFO) == PCIX66) info(" 66 MHz PCI-X\n"); @@ -1020,7 +1020,7 @@ static int enable_slot(struct hotplug_slot *hs) } attn_LED_blink(slot_cur); - + rc = set_bus(slot_cur); if (rc) { err("was not able to set the bus\n"); @@ -1082,7 +1082,7 @@ static int enable_slot(struct hotplug_slot *hs) rc = slot_update(&slot_cur); if (rc) goto error_power; - + rc = -EINVAL; if (SLOT_POWER(slot_cur->status) && !(SLOT_PWRGD(slot_cur->status))) { err("power fault occurred trying to power up...\n"); @@ -1093,7 +1093,7 @@ static int enable_slot(struct hotplug_slot *hs) "speed and card capability\n"); print_card_capability(slot_cur); goto error_power; - } + } /* Don't think this case will happen after above checks... * but just in case, for paranoia sake */ if (!(SLOT_POWER(slot_cur->status))) { @@ -1144,7 +1144,7 @@ static int enable_slot(struct hotplug_slot *hs) ibmphp_print_test(); rc = ibmphp_update_slot_info(slot_cur); exit: - ibmphp_unlock_operations(); + ibmphp_unlock_operations(); return rc; error_nopower: @@ -1180,7 +1180,7 @@ static int ibmphp_disable_slot(struct hotplug_slot *hotplug_slot) { struct slot *slot = hotplug_slot->private; int rc; - + ibmphp_lock_operations(); rc = ibmphp_do_disable_slot(slot); ibmphp_unlock_operations(); @@ -1192,12 +1192,12 @@ int ibmphp_do_disable_slot(struct slot *slot_cur) int rc; u8 flag; - debug("DISABLING SLOT...\n"); - + debug("DISABLING SLOT...\n"); + if ((slot_cur == NULL) || (slot_cur->ctrl == NULL)) { return -ENODEV; } - + flag = slot_cur->flag; slot_cur->flag = 1; @@ -1210,7 +1210,7 @@ int ibmphp_do_disable_slot(struct slot *slot_cur) attn_LED_blink(slot_cur); if (slot_cur->func == NULL) { - /* We need this for fncs's that were there on bootup */ + /* We need this for functions that were there on bootup */ slot_cur->func = kzalloc(sizeof(struct pci_func), GFP_KERNEL); if (!slot_cur->func) { err("out of system memory\n"); @@ -1222,12 +1222,13 @@ int ibmphp_do_disable_slot(struct slot *slot_cur) } ibm_unconfigure_device(slot_cur->func); - - /* If we got here from latch suddenly opening on operating card or - a power fault, there's no power to the card, so cannot - read from it to determine what resources it occupied. This operation - is forbidden anyhow. The best we can do is remove it from kernel - lists at least */ + + /* + * If we got here from latch suddenly opening on operating card or + * a power fault, there's no power to the card, so cannot + * read from it to determine what resources it occupied. This operation + * is forbidden anyhow. The best we can do is remove it from kernel + * lists at least */ if (!flag) { attn_off(slot_cur); @@ -1264,7 +1265,7 @@ error: rc = -EFAULT; goto exit; } - if (flag) + if (flag) ibmphp_update_slot_info(slot_cur); goto exit; } @@ -1339,7 +1340,7 @@ static int __init ibmphp_init(void) debug("AFTER Resource & EBDA INITIALIZATIONS\n"); max_slots = get_max_slots(); - + if ((rc = ibmphp_register_pci())) goto error; diff --git a/drivers/pci/hotplug/ibmphp_ebda.c b/drivers/pci/hotplug/ibmphp_ebda.c index 9df78bc..bd04415 100644 --- a/drivers/pci/hotplug/ibmphp_ebda.c +++ b/drivers/pci/hotplug/ibmphp_ebda.c @@ -123,7 +123,7 @@ static struct ebda_pci_rsrc *alloc_ebda_pci_rsrc (void) static void __init print_bus_info (void) { struct bus_info *ptr; - + list_for_each_entry(ptr, &bus_info_head, bus_info_list) { debug ("%s - slot_min = %x\n", __func__, ptr->slot_min); debug ("%s - slot_max = %x\n", __func__, ptr->slot_max); @@ -131,7 +131,7 @@ static void __init print_bus_info (void) debug ("%s - bus# = %x\n", __func__, ptr->busno); debug ("%s - current_speed = %x\n", __func__, ptr->current_speed); debug ("%s - controller_id = %x\n", __func__, ptr->controller_id); - + debug ("%s - slots_at_33_conv = %x\n", __func__, ptr->slots_at_33_conv); debug ("%s - slots_at_66_conv = %x\n", __func__, ptr->slots_at_66_conv); debug ("%s - slots_at_66_pcix = %x\n", __func__, ptr->slots_at_66_pcix); @@ -144,7 +144,7 @@ static void __init print_bus_info (void) static void print_lo_info (void) { struct rio_detail *ptr; - debug ("print_lo_info ----\n"); + debug ("print_lo_info ----\n"); list_for_each_entry(ptr, &rio_lo_head, rio_detail_list) { debug ("%s - rio_node_id = %x\n", __func__, ptr->rio_node_id); debug ("%s - rio_type = %x\n", __func__, ptr->rio_type); @@ -176,7 +176,7 @@ static void __init print_ebda_pci_rsrc (void) struct ebda_pci_rsrc *ptr; list_for_each_entry(ptr, &ibmphp_ebda_pci_rsrc_head, ebda_pci_rsrc_list) { - debug ("%s - rsrc type: %x bus#: %x dev_func: %x start addr: %x end addr: %x\n", + debug ("%s - rsrc type: %x bus#: %x dev_func: %x start addr: %x end addr: %x\n", __func__, ptr->rsrc_type ,ptr->bus_num, ptr->dev_fun,ptr->start_addr, ptr->end_addr); } } @@ -259,7 +259,7 @@ int __init ibmphp_access_ebda (void) ebda_seg = readw (io_mem); iounmap (io_mem); debug ("returned ebda segment: %x\n", ebda_seg); - + io_mem = ioremap(ebda_seg<<4, 1); if (!io_mem) return -ENOMEM; @@ -310,7 +310,7 @@ int __init ibmphp_access_ebda (void) re = readw (io_mem + sub_addr); /* next sub blk */ sub_addr += 2; - rc_id = readw (io_mem + sub_addr); /* sub blk id */ + rc_id = readw (io_mem + sub_addr); /* sub blk id */ sub_addr += 2; if (rc_id != 0x5243) @@ -330,7 +330,7 @@ int __init ibmphp_access_ebda (void) debug ("info about hpc descriptor---\n"); debug ("hot blk format: %x\n", format); debug ("num of controller: %x\n", num_ctlrs); - debug ("offset of hpc data structure enteries: %x\n ", sub_addr); + debug ("offset of hpc data structure entries: %x\n ", sub_addr); sub_addr = base + re; /* re sub blk */ /* FIXME: rc is never used/checked */ @@ -359,7 +359,7 @@ int __init ibmphp_access_ebda (void) debug ("info about rsrc descriptor---\n"); debug ("format: %x\n", format); debug ("num of rsrc: %x\n", num_entries); - debug ("offset of rsrc data structure enteries: %x\n ", sub_addr); + debug ("offset of rsrc data structure entries: %x\n ", sub_addr); hs_complete = 1; } else { @@ -376,7 +376,7 @@ int __init ibmphp_access_ebda (void) rio_table_ptr->scal_count = readb (io_mem + offset + 1); rio_table_ptr->riodev_count = readb (io_mem + offset + 2); rio_table_ptr->offset = offset +3 ; - + debug("info about rio table hdr ---\n"); debug("ver_num: %x\nscal_count: %x\nriodev_count: %x\noffset of rio table: %x\n ", rio_table_ptr->ver_num, rio_table_ptr->scal_count, @@ -440,12 +440,12 @@ static int __init ebda_rio_table (void) rio_detail_ptr->chassis_num = readb (io_mem + offset + 14); // debug ("rio_node_id: %x\nbbar: %x\nrio_type: %x\nowner_id: %x\nport0_node: %x\nport0_port: %x\nport1_node: %x\nport1_port: %x\nfirst_slot_num: %x\nstatus: %x\n", rio_detail_ptr->rio_node_id, rio_detail_ptr->bbar, rio_detail_ptr->rio_type, rio_detail_ptr->owner_id, rio_detail_ptr->port0_node_connect, rio_detail_ptr->port0_port_connect, rio_detail_ptr->port1_node_connect, rio_detail_ptr->port1_port_connect, rio_detail_ptr->first_slot_num, rio_detail_ptr->status); //create linked list of chassis - if (rio_detail_ptr->rio_type == 4 || rio_detail_ptr->rio_type == 5) + if (rio_detail_ptr->rio_type == 4 || rio_detail_ptr->rio_type == 5) list_add (&rio_detail_ptr->rio_detail_list, &rio_vg_head); - //create linked list of expansion box - else if (rio_detail_ptr->rio_type == 6 || rio_detail_ptr->rio_type == 7) + //create linked list of expansion box + else if (rio_detail_ptr->rio_type == 6 || rio_detail_ptr->rio_type == 7) list_add (&rio_detail_ptr->rio_detail_list, &rio_lo_head); - else + else // not in my concern kfree (rio_detail_ptr); offset += 15; @@ -456,7 +456,7 @@ static int __init ebda_rio_table (void) } /* - * reorganizing linked list of chassis + * reorganizing linked list of chassis */ static struct opt_rio *search_opt_vg (u8 chassis_num) { @@ -464,7 +464,7 @@ static struct opt_rio *search_opt_vg (u8 chassis_num) list_for_each_entry(ptr, &opt_vg_head, opt_rio_list) { if (ptr->chassis_num == chassis_num) return ptr; - } + } return NULL; } @@ -472,7 +472,7 @@ static int __init combine_wpg_for_chassis (void) { struct opt_rio *opt_rio_ptr = NULL; struct rio_detail *rio_detail_ptr = NULL; - + list_for_each_entry(rio_detail_ptr, &rio_vg_head, rio_detail_list) { opt_rio_ptr = search_opt_vg (rio_detail_ptr->chassis_num); if (!opt_rio_ptr) { @@ -484,14 +484,14 @@ static int __init combine_wpg_for_chassis (void) opt_rio_ptr->first_slot_num = rio_detail_ptr->first_slot_num; opt_rio_ptr->middle_num = rio_detail_ptr->first_slot_num; list_add (&opt_rio_ptr->opt_rio_list, &opt_vg_head); - } else { + } else { opt_rio_ptr->first_slot_num = min (opt_rio_ptr->first_slot_num, rio_detail_ptr->first_slot_num); opt_rio_ptr->middle_num = max (opt_rio_ptr->middle_num, rio_detail_ptr->first_slot_num); - } + } } print_opt_vg (); - return 0; -} + return 0; +} /* * reorganizing linked list of expansion box @@ -502,7 +502,7 @@ static struct opt_rio_lo *search_opt_lo (u8 chassis_num) list_for_each_entry(ptr, &opt_lo_head, opt_rio_lo_list) { if (ptr->chassis_num == chassis_num) return ptr; - } + } return NULL; } @@ -510,7 +510,7 @@ static int combine_wpg_for_expansion (void) { struct opt_rio_lo *opt_rio_lo_ptr = NULL; struct rio_detail *rio_detail_ptr = NULL; - + list_for_each_entry(rio_detail_ptr, &rio_lo_head, rio_detail_list) { opt_rio_lo_ptr = search_opt_lo (rio_detail_ptr->chassis_num); if (!opt_rio_lo_ptr) { @@ -522,22 +522,22 @@ static int combine_wpg_for_expansion (void) opt_rio_lo_ptr->first_slot_num = rio_detail_ptr->first_slot_num; opt_rio_lo_ptr->middle_num = rio_detail_ptr->first_slot_num; opt_rio_lo_ptr->pack_count = 1; - + list_add (&opt_rio_lo_ptr->opt_rio_lo_list, &opt_lo_head); - } else { + } else { opt_rio_lo_ptr->first_slot_num = min (opt_rio_lo_ptr->first_slot_num, rio_detail_ptr->first_slot_num); opt_rio_lo_ptr->middle_num = max (opt_rio_lo_ptr->middle_num, rio_detail_ptr->first_slot_num); opt_rio_lo_ptr->pack_count = 2; - } + } } - return 0; + return 0; } - + /* Since we don't know the max slot number per each chassis, hence go * through the list of all chassis to find out the range - * Arguments: slot_num, 1st slot number of the chassis we think we are on, - * var (0 = chassis, 1 = expansion box) + * Arguments: slot_num, 1st slot number of the chassis we think we are on, + * var (0 = chassis, 1 = expansion box) */ static int first_slot_num (u8 slot_num, u8 first_slot, u8 var) { @@ -547,7 +547,7 @@ static int first_slot_num (u8 slot_num, u8 first_slot, u8 var) if (!var) { list_for_each_entry(opt_vg_ptr, &opt_vg_head, opt_rio_list) { - if ((first_slot < opt_vg_ptr->first_slot_num) && (slot_num >= opt_vg_ptr->first_slot_num)) { + if ((first_slot < opt_vg_ptr->first_slot_num) && (slot_num >= opt_vg_ptr->first_slot_num)) { rc = -ENODEV; break; } @@ -569,7 +569,7 @@ static struct opt_rio_lo * find_rxe_num (u8 slot_num) list_for_each_entry(opt_lo_ptr, &opt_lo_head, opt_rio_lo_list) { //check to see if this slot_num belongs to expansion box - if ((slot_num >= opt_lo_ptr->first_slot_num) && (!first_slot_num (slot_num, opt_lo_ptr->first_slot_num, 1))) + if ((slot_num >= opt_lo_ptr->first_slot_num) && (!first_slot_num (slot_num, opt_lo_ptr->first_slot_num, 1))) return opt_lo_ptr; } return NULL; @@ -580,8 +580,8 @@ static struct opt_rio * find_chassis_num (u8 slot_num) struct opt_rio *opt_vg_ptr; list_for_each_entry(opt_vg_ptr, &opt_vg_head, opt_rio_list) { - //check to see if this slot_num belongs to chassis - if ((slot_num >= opt_vg_ptr->first_slot_num) && (!first_slot_num (slot_num, opt_vg_ptr->first_slot_num, 0))) + //check to see if this slot_num belongs to chassis + if ((slot_num >= opt_vg_ptr->first_slot_num) && (!first_slot_num (slot_num, opt_vg_ptr->first_slot_num, 0))) return opt_vg_ptr; } return NULL; @@ -594,13 +594,13 @@ static u8 calculate_first_slot (u8 slot_num) { u8 first_slot = 1; struct slot * slot_cur; - + list_for_each_entry(slot_cur, &ibmphp_slot_head, ibm_slot_list) { if (slot_cur->ctrl) { - if ((slot_cur->ctrl->ctlr_type != 4) && (slot_cur->ctrl->ending_slot_num > first_slot) && (slot_num > slot_cur->ctrl->ending_slot_num)) + if ((slot_cur->ctrl->ctlr_type != 4) && (slot_cur->ctrl->ending_slot_num > first_slot) && (slot_num > slot_cur->ctrl->ending_slot_num)) first_slot = slot_cur->ctrl->ending_slot_num; } - } + } return first_slot + 1; } @@ -622,11 +622,11 @@ static char *create_file_name (struct slot * slot_cur) err ("Structure passed is empty\n"); return NULL; } - + slot_num = slot_cur->number; memset (str, 0, sizeof(str)); - + if (rio_table_ptr) { if (rio_table_ptr->ver_num == 3) { opt_vg_ptr = find_chassis_num (slot_num); @@ -660,7 +660,7 @@ static char *create_file_name (struct slot * slot_cur) /* if both NULL and we DO have correct RIO table in BIOS */ return NULL; } - } + } if (!flag) { if (slot_cur->ctrl->ctlr_type == 4) { first_slot = calculate_first_slot (slot_num); @@ -798,7 +798,7 @@ static int __init ebda_rsrc_controller (void) slot_ptr->ctl_index = readb (io_mem + addr_slot + 2*slot_num); slot_ptr->slot_cap = readb (io_mem + addr_slot + 3*slot_num); - // create bus_info lined list --- if only one slot per bus: slot_min = slot_max + // create bus_info lined list --- if only one slot per bus: slot_min = slot_max bus_info_ptr2 = ibmphp_find_same_bus_num (slot_ptr->slot_bus_num); if (!bus_info_ptr2) { @@ -814,9 +814,9 @@ static int __init ebda_rsrc_controller (void) bus_info_ptr1->index = bus_index++; bus_info_ptr1->current_speed = 0xff; bus_info_ptr1->current_bus_mode = 0xff; - + bus_info_ptr1->controller_id = hpc_ptr->ctlr_id; - + list_add_tail (&bus_info_ptr1->bus_info_list, &bus_info_head); } else { @@ -851,7 +851,7 @@ static int __init ebda_rsrc_controller (void) bus_info_ptr2->slots_at_66_conv = bus_ptr->slots_at_66_conv; bus_info_ptr2->slots_at_66_pcix = bus_ptr->slots_at_66_pcix; bus_info_ptr2->slots_at_100_pcix = bus_ptr->slots_at_100_pcix; - bus_info_ptr2->slots_at_133_pcix = bus_ptr->slots_at_133_pcix; + bus_info_ptr2->slots_at_133_pcix = bus_ptr->slots_at_133_pcix; } bus_ptr++; } @@ -864,7 +864,7 @@ static int __init ebda_rsrc_controller (void) hpc_ptr->u.pci_ctlr.dev_fun = readb (io_mem + addr + 1); hpc_ptr->irq = readb (io_mem + addr + 2); addr += 3; - debug ("ctrl bus = %x, ctlr devfun = %x, irq = %x\n", + debug ("ctrl bus = %x, ctlr devfun = %x, irq = %x\n", hpc_ptr->u.pci_ctlr.bus, hpc_ptr->u.pci_ctlr.dev_fun, hpc_ptr->irq); break; @@ -932,7 +932,7 @@ static int __init ebda_rsrc_controller (void) tmp_slot->supported_speed = 2; else if ((hpc_ptr->slots[index].slot_cap & EBDA_SLOT_66_MAX) == EBDA_SLOT_66_MAX) tmp_slot->supported_speed = 1; - + if ((hpc_ptr->slots[index].slot_cap & EBDA_SLOT_PCIX_CAP) == EBDA_SLOT_PCIX_CAP) tmp_slot->supported_bus_mode = 1; else @@ -1000,7 +1000,7 @@ error_no_hpc: return rc; } -/* +/* * map info (bus, devfun, start addr, end addr..) of i/o, memory, * pfm from the physical addr to a list of resource. */ @@ -1057,7 +1057,7 @@ static int __init ebda_rsrc_rsrc (void) addr += 10; debug ("rsrc from mem or pfm ---\n"); - debug ("rsrc type: %x bus#: %x dev_func: %x start addr: %x end addr: %x\n", + debug ("rsrc type: %x bus#: %x dev_func: %x start addr: %x end addr: %x\n", rsrc_ptr->rsrc_type, rsrc_ptr->bus_num, rsrc_ptr->dev_fun, rsrc_ptr->start_addr, rsrc_ptr->end_addr); list_add (&rsrc_ptr->ebda_pci_rsrc_list, &ibmphp_ebda_pci_rsrc_head); @@ -1096,7 +1096,7 @@ struct bus_info *ibmphp_find_same_bus_num (u32 num) struct bus_info *ptr; list_for_each_entry(ptr, &bus_info_head, bus_info_list) { - if (ptr->busno == num) + if (ptr->busno == num) return ptr; } return NULL; @@ -1110,7 +1110,7 @@ int ibmphp_get_bus_index (u8 num) struct bus_info *ptr; list_for_each_entry(ptr, &bus_info_head, bus_info_list) { - if (ptr->busno == num) + if (ptr->busno == num) return ptr->index; } return -ENODEV; @@ -1168,7 +1168,7 @@ static struct pci_device_id id_table[] = { .subdevice = HPC_SUBSYSTEM_ID, .class = ((PCI_CLASS_SYSTEM_PCI_HOTPLUG << 8) | 0x00), }, {} -}; +}; MODULE_DEVICE_TABLE(pci, id_table); @@ -1197,7 +1197,7 @@ static int ibmphp_probe (struct pci_dev * dev, const struct pci_device_id *ids) struct controller *ctrl; debug ("inside ibmphp_probe\n"); - + list_for_each_entry(ctrl, &ebda_hpc_head, ebda_hpc_list) { if (ctrl->ctlr_type == 1) { if ((dev->devfn == ctrl->u.pci_ctlr.dev_fun) && (dev->bus->number == ctrl->u.pci_ctlr.bus)) { @@ -1210,4 +1210,3 @@ static int ibmphp_probe (struct pci_dev * dev, const struct pci_device_id *ids) } return -ENODEV; } - diff --git a/drivers/pci/hotplug/ibmphp_hpc.c b/drivers/pci/hotplug/ibmphp_hpc.c index f59ed30..5fc7a08 100644 --- a/drivers/pci/hotplug/ibmphp_hpc.c +++ b/drivers/pci/hotplug/ibmphp_hpc.c @@ -258,7 +258,7 @@ static u8 i2c_ctrl_write (struct controller *ctlr_ptr, void __iomem *WPGBbar, u8 { u8 rc; void __iomem *wpg_addr; // base addr + offset - unsigned long wpg_data; // data to/from WPG LOHI format + unsigned long wpg_data; // data to/from WPG LOHI format unsigned long ultemp; unsigned long data; // actual data HILO format int i; @@ -351,7 +351,7 @@ static u8 i2c_ctrl_write (struct controller *ctlr_ptr, void __iomem *WPGBbar, u8 } //------------------------------------------------------------ -// Read from ISA type HPC +// Read from ISA type HPC //------------------------------------------------------------ static u8 isa_ctrl_read (struct controller *ctlr_ptr, u8 offset) { @@ -372,7 +372,7 @@ static void isa_ctrl_write (struct controller *ctlr_ptr, u8 offset, u8 data) { u16 start_address; u16 port_address; - + start_address = ctlr_ptr->u.isa_ctlr.io_start; port_address = start_address + (u16) offset; outb (data, port_address); @@ -656,11 +656,11 @@ int ibmphp_hpc_readslot (struct slot * pslot, u8 cmd, u8 * pstatus) //-------------------------------------------------------------------- // cleanup //-------------------------------------------------------------------- - + // remove physical to logical address mapping if ((ctlr_ptr->ctlr_type == 2) || (ctlr_ptr->ctlr_type == 4)) iounmap (wpg_bbar); - + free_hpc_access (); debug_polling ("%s - Exit rc[%d]\n", __func__, rc); @@ -835,7 +835,7 @@ static int poll_hpc(void *data) down (&semOperations); switch (poll_state) { - case POLL_LATCH_REGISTER: + case POLL_LATCH_REGISTER: oldlatchlow = curlatchlow; ctrl_count = 0x00; list_for_each (pslotlist, &ibmphp_slot_head) { @@ -892,16 +892,16 @@ static int poll_hpc(void *data) if (kthread_should_stop()) goto out_sleep; - + down (&semOperations); - + if (poll_count >= POLL_LATCH_CNT) { poll_count = 0; poll_state = POLL_SLOTS; } else poll_state = POLL_LATCH_REGISTER; break; - } + } /* give up the hardware semaphore */ up (&semOperations); /* sleep for a short time just for good measure */ @@ -958,7 +958,7 @@ static int process_changeinstatus (struct slot *pslot, struct slot *poldslot) // bit 5 - HPC_SLOT_PWRGD if ((pslot->status & 0x20) != (poldslot->status & 0x20)) // OFF -> ON: ignore, ON -> OFF: disable slot - if ((poldslot->status & 0x20) && (SLOT_CONNECT (poldslot->status) == HPC_SLOT_CONNECTED) && (SLOT_PRESENT (poldslot->status))) + if ((poldslot->status & 0x20) && (SLOT_CONNECT (poldslot->status) == HPC_SLOT_CONNECTED) && (SLOT_PRESENT (poldslot->status))) disable = 1; // bit 6 - HPC_SLOT_BUS_SPEED @@ -980,7 +980,7 @@ static int process_changeinstatus (struct slot *pslot, struct slot *poldslot) pslot->status &= ~HPC_SLOT_POWER; } } - // CLOSE -> OPEN + // CLOSE -> OPEN else if ((SLOT_PWRGD (poldslot->status) == HPC_SLOT_PWRGD_GOOD) && (SLOT_CONNECT (poldslot->status) == HPC_SLOT_CONNECTED) && (SLOT_PRESENT (poldslot->status))) { disable = 1; @@ -1075,7 +1075,7 @@ void __exit ibmphp_hpc_stop_poll_thread (void) debug ("before locking operations \n"); ibmphp_lock_operations (); debug ("after locking operations \n"); - + // wait for poll thread to exit debug ("before sem_exit down \n"); down (&sem_exit); diff --git a/drivers/pci/hotplug/ibmphp_pci.c b/drivers/pci/hotplug/ibmphp_pci.c index c60f5f3..639ea3a 100644 --- a/drivers/pci/hotplug/ibmphp_pci.c +++ b/drivers/pci/hotplug/ibmphp_pci.c @@ -1,8 +1,8 @@ /* * IBM Hot Plug Controller Driver - * + * * Written By: Irene Zubarev, IBM Corporation - * + * * Copyright (C) 2001 Greg Kroah-Hartman (greg@kroah.com) * Copyright (C) 2001,2002 IBM Corp. * @@ -42,7 +42,7 @@ static u8 find_sec_number (u8 primary_busno, u8 slotno); /* * NOTE..... If BIOS doesn't provide default routing, we assign: - * 9 for SCSI, 10 for LAN adapters, and 11 for everything else. + * 9 for SCSI, 10 for LAN adapters, and 11 for everything else. * If adapter is bridged, then we assign 11 to it and devices behind it. * We also assign the same irq numbers for multi function devices. * These are PIC mode, so shouldn't matter n.e.ways (hopefully) @@ -71,11 +71,11 @@ static void assign_alt_irq (struct pci_func * cur_func, u8 class_code) * Configures the device to be added (will allocate needed resources if it * can), the device can be a bridge or a regular pci device, can also be * multi-functional - * + * * Input: function to be added - * + * * TO DO: The error case with Multifunction device or multi function bridge, - * if there is an error, will need to go through all previous functions and + * if there is an error, will need to go through all previous functions and * unconfigure....or can add some code into unconfigure_card.... */ int ibmphp_configure_card (struct pci_func *func, u8 slotno) @@ -98,7 +98,7 @@ int ibmphp_configure_card (struct pci_func *func, u8 slotno) cur_func = func; /* We only get bus and device from IRQ routing table. So at this point, - * func->busno is correct, and func->device contains only device (at the 5 + * func->busno is correct, and func->device contains only device (at the 5 * highest bits) */ @@ -151,7 +151,7 @@ int ibmphp_configure_card (struct pci_func *func, u8 slotno) cur_func->device, cur_func->busno); cleanup_count = 6; goto error; - } + } cur_func->next = NULL; function = 0x8; break; @@ -339,7 +339,7 @@ error: } /* - * This function configures the pci BARs of a single device. + * This function configures the pci BARs of a single device. * Input: pointer to the pci_func * Output: configured PCI, 0, or error */ @@ -371,17 +371,17 @@ static int configure_device (struct pci_func *func) for (count = 0; address[count]; count++) { /* for 6 BARs */ - /* not sure if i need this. per scott, said maybe need smth like this + /* not sure if i need this. per scott, said maybe need * something like this if devices don't adhere 100% to the spec, so don't want to write to the reserved bits - pcibios_read_config_byte(cur_func->busno, cur_func->device, + pcibios_read_config_byte(cur_func->busno, cur_func->device, PCI_BASE_ADDRESS_0 + 4 * count, &tmp); if (tmp & 0x01) // IO - pcibios_write_config_dword(cur_func->busno, cur_func->device, + pcibios_write_config_dword(cur_func->busno, cur_func->device, PCI_BASE_ADDRESS_0 + 4 * count, 0xFFFFFFFD); else // Memory - pcibios_write_config_dword(cur_func->busno, cur_func->device, + pcibios_write_config_dword(cur_func->busno, cur_func->device, PCI_BASE_ADDRESS_0 + 4 * count, 0xFFFFFFFF); */ pci_bus_write_config_dword (ibmphp_pci_bus, devfn, address[count], 0xFFFFFFFF); @@ -421,8 +421,8 @@ static int configure_device (struct pci_func *func) return -EIO; } pci_bus_write_config_dword (ibmphp_pci_bus, devfn, address[count], func->io[count]->start); - - /* _______________This is for debugging purposes only_____________________ */ + + /* _______________This is for debugging purposes only_____________________ */ debug ("b4 writing, the IO address is %x\n", func->io[count]->start); pci_bus_read_config_dword (ibmphp_pci_bus, devfn, address[count], &bar[count]); debug ("after writing.... the start address is %x\n", bar[count]); @@ -484,7 +484,7 @@ static int configure_device (struct pci_func *func) pci_bus_write_config_dword (ibmphp_pci_bus, devfn, address[count], func->pfmem[count]->start); - /*_______________This is for debugging purposes only______________________________*/ + /*_______________This is for debugging purposes only______________________________*/ debug ("b4 writing, start address is %x\n", func->pfmem[count]->start); pci_bus_read_config_dword (ibmphp_pci_bus, devfn, address[count], &bar[count]); debug ("after writing, start address is %x\n", bar[count]); @@ -559,7 +559,7 @@ static int configure_device (struct pci_func *func) /****************************************************************************** * This routine configures a PCI-2-PCI bridge and the functions behind it * Parameters: pci_func - * Returns: + * Returns: ******************************************************************************/ static int configure_bridge (struct pci_func **func_passed, u8 slotno) { @@ -622,7 +622,7 @@ static int configure_bridge (struct pci_func **func_passed, u8 slotno) debug ("AFTER FIND_SEC_NUMBER, func->busno IS %x\n", func->busno); pci_bus_write_config_byte (ibmphp_pci_bus, devfn, PCI_SECONDARY_BUS, sec_number); - + /* __________________For debugging purposes only __________________________________ pci_bus_read_config_byte (ibmphp_pci_bus, devfn, PCI_SECONDARY_BUS, &sec_number); debug ("sec_number after write/read is %x\n", sec_number); @@ -644,7 +644,7 @@ static int configure_bridge (struct pci_func **func_passed, u8 slotno) /* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - !!!!!!!!!!!!!!!NEED TO ADD!!! FAST BACK-TO-BACK ENABLE!!!!!!!!!!!!!!!!!!!! + !!!!!!!!!!!!!!!NEED TO ADD!!! FAST BACK-TO-BACK ENABLE!!!!!!!!!!!!!!!!!!!! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!*/ @@ -670,7 +670,7 @@ static int configure_bridge (struct pci_func **func_passed, u8 slotno) debug ("len[count] in IO = %x\n", len[count]); bus_io[count] = kzalloc(sizeof(struct resource_node), GFP_KERNEL); - + if (!bus_io[count]) { err ("out of system memory\n"); retval = -ENOMEM; @@ -735,7 +735,7 @@ static int configure_bridge (struct pci_func **func_passed, u8 slotno) ibmphp_add_pfmem_from_mem (bus_pfmem[count]); func->pfmem[count] = bus_pfmem[count]; } else { - err ("cannot allocate requested pfmem for bus %x, device %x, len %x\n", + err ("cannot allocate requested pfmem for bus %x, device %x, len %x\n", func->busno, func->device, len[count]); kfree (mem_tmp); kfree (bus_pfmem[count]); @@ -805,7 +805,7 @@ static int configure_bridge (struct pci_func **func_passed, u8 slotno) debug ("amount_needed->mem = %x\n", amount_needed->mem); debug ("amount_needed->pfmem = %x\n", amount_needed->pfmem); - if (amount_needed->not_correct) { + if (amount_needed->not_correct) { debug ("amount_needed is not correct\n"); for (count = 0; address[count]; count++) { /* for 2 BARs */ @@ -830,7 +830,7 @@ static int configure_bridge (struct pci_func **func_passed, u8 slotno) } else { debug ("it wants %x IO behind the bridge\n", amount_needed->io); io = kzalloc(sizeof(*io), GFP_KERNEL); - + if (!io) { err ("out of system memory\n"); retval = -ENOMEM; @@ -959,7 +959,7 @@ static int configure_bridge (struct pci_func **func_passed, u8 slotno) if (bus->noIORanges) { pci_bus_write_config_byte (ibmphp_pci_bus, devfn, PCI_IO_BASE, 0x00 | bus->rangeIO->start >> 8); - pci_bus_write_config_byte (ibmphp_pci_bus, devfn, PCI_IO_LIMIT, 0x00 | bus->rangeIO->end >> 8); + pci_bus_write_config_byte (ibmphp_pci_bus, devfn, PCI_IO_LIMIT, 0x00 | bus->rangeIO->end >> 8); /* _______________This is for debugging purposes only ____________________ pci_bus_read_config_byte (ibmphp_pci_bus, devfn, PCI_IO_BASE, &temp); @@ -980,7 +980,7 @@ static int configure_bridge (struct pci_func **func_passed, u8 slotno) if (bus->noMemRanges) { pci_bus_write_config_word (ibmphp_pci_bus, devfn, PCI_MEMORY_BASE, 0x0000 | bus->rangeMem->start >> 16); pci_bus_write_config_word (ibmphp_pci_bus, devfn, PCI_MEMORY_LIMIT, 0x0000 | bus->rangeMem->end >> 16); - + /* ____________________This is for debugging purposes only ________________________ pci_bus_read_config_word (ibmphp_pci_bus, devfn, PCI_MEMORY_BASE, &temp); debug ("mem_base = %x\n", (temp & PCI_MEMORY_RANGE_TYPE_MASK) << 16); @@ -1017,7 +1017,7 @@ static int configure_bridge (struct pci_func **func_passed, u8 slotno) pci_bus_read_config_byte (ibmphp_pci_bus, devfn, PCI_INTERRUPT_PIN, &irq); if ((irq > 0x00) && (irq < 0x05)) pci_bus_write_config_byte (ibmphp_pci_bus, devfn, PCI_INTERRUPT_LINE, func->irq[irq - 1]); - /* + /* pci_bus_write_config_byte (ibmphp_pci_bus, devfn, PCI_BRIDGE_CONTROL, ctrl); pci_bus_write_config_byte (ibmphp_pci_bus, devfn, PCI_BRIDGE_CONTROL, PCI_BRIDGE_CTL_PARITY); pci_bus_write_config_byte (ibmphp_pci_bus, devfn, PCI_BRIDGE_CONTROL, PCI_BRIDGE_CTL_SERR); @@ -1071,7 +1071,7 @@ error: * This function adds up the amount of resources needed behind the PPB bridge * and passes it to the configure_bridge function * Input: bridge function - * Ouput: amount of resources needed + * Output: amount of resources needed *****************************************************************************/ static struct res_needed *scan_behind_bridge (struct pci_func * func, u8 busno) { @@ -1204,9 +1204,9 @@ static struct res_needed *scan_behind_bridge (struct pci_func * func, u8 busno) return amount; } -/* The following 3 unconfigure_boot_ routines deal with the case when we had the card - * upon bootup in the system, since we don't allocate func to such case, we need to read - * the start addresses from pci config space and then find the corresponding entries in +/* The following 3 unconfigure_boot_ routines deal with the case when we had the card + * upon bootup in the system, since we don't allocate func to such case, we need to read + * the start addresses from pci config space and then find the corresponding entries in * our resource lists. The functions return either 0, -ENODEV, or -1 (general failure) * Change: we also call these functions even if we configured the card ourselves (i.e., not * the bootup case), since it should work same way @@ -1561,8 +1561,8 @@ static int unconfigure_boot_card (struct slot *slot_cur) * unconfiguring the device * TO DO: will probably need to add some code in case there was some resource, * to remove it... this is from when we have errors in the configure_card... - * !!!!!!!!!!!!!!!!!!!!!!!!!FOR BUSES!!!!!!!!!!!! - * Returns: 0, -1, -ENODEV + * !!!!!!!!!!!!!!!!!!!!!!!!!FOR BUSES!!!!!!!!!!!! + * Returns: 0, -1, -ENODEV */ int ibmphp_unconfigure_card (struct slot **slot_cur, int the_end) { @@ -1634,7 +1634,7 @@ int ibmphp_unconfigure_card (struct slot **slot_cur, int the_end) * Input: bus and the amount of resources needed (we know we can assign those, * since they've been checked already * Output: bus added to the correct spot - * 0, -1, error + * 0, -1, error */ static int add_new_bus (struct bus_node *bus, struct resource_node *io, struct resource_node *mem, struct resource_node *pfmem, u8 parent_busno) { @@ -1650,7 +1650,7 @@ static int add_new_bus (struct bus_node *bus, struct resource_node *io, struct r err ("strange, cannot find bus which is supposed to be at the system... something is terribly wrong...\n"); return -ENODEV; } - + list_add (&bus->bus_list, &cur_bus->bus_list); } if (io) { @@ -1679,7 +1679,7 @@ static int add_new_bus (struct bus_node *bus, struct resource_node *io, struct r } if (pfmem) { pfmem_range = kzalloc(sizeof(*pfmem_range), GFP_KERNEL); - if (!pfmem_range) { + if (!pfmem_range) { err ("out of system memory\n"); return -ENOMEM; } @@ -1726,4 +1726,3 @@ static u8 find_sec_number (u8 primary_busno, u8 slotno) return busno; return 0xff; } - diff --git a/drivers/pci/hotplug/ibmphp_res.c b/drivers/pci/hotplug/ibmphp_res.c index e2dc289..a265acb 100644 --- a/drivers/pci/hotplug/ibmphp_res.c +++ b/drivers/pci/hotplug/ibmphp_res.c @@ -72,7 +72,7 @@ static struct bus_node * __init alloc_error_bus (struct ebda_pci_rsrc * curr, u8 static struct resource_node * __init alloc_resources (struct ebda_pci_rsrc * curr) { struct resource_node *rs; - + if (!curr) { err ("NULL passed to allocate\n"); return NULL; @@ -128,7 +128,7 @@ static int __init alloc_bus_range (struct bus_node **new_bus, struct range_node } newrange->start = curr->start_addr; newrange->end = curr->end_addr; - + if (first_bus || (!num_ranges)) newrange->rangeno = 1; else { @@ -162,7 +162,7 @@ static int __init alloc_bus_range (struct bus_node **new_bus, struct range_node newbus->rangePFMem = newrange; if (first_bus) newbus->noPFMemRanges = 1; - else { + else { debug ("1st PFMemory Primary on Bus %x [%x - %x]\n", newbus->busno, newrange->start, newrange->end); ++newbus->noPFMemRanges; fix_resources (newbus); @@ -190,7 +190,7 @@ static int __init alloc_bus_range (struct bus_node **new_bus, struct range_node * This is the Resource Management initialization function. It will go through * the Resource list taken from EBDA and fill in this module's data structures * - * THIS IS NOT TAKING INTO CONSIDERATION IO RESTRICTIONS OF PRIMARY BUSES, + * THIS IS NOT TAKING INTO CONSIDERATION IO RESTRICTIONS OF PRIMARY BUSES, * SINCE WE'RE GOING TO ASSUME FOR NOW WE DON'T HAVE THOSE ON OUR BUSES FOR NOW * * Input: ptr to the head of the resource list from EBDA @@ -382,7 +382,7 @@ int __init ibmphp_rsrc_init (void) * pci devices' resources for the appropriate resource * * Input: type of the resource, range to add, current bus - * Output: 0 or -1, bus and range ptrs + * Output: 0 or -1, bus and range ptrs ********************************************************************************/ static int add_bus_range (int type, struct range_node *range, struct bus_node *bus_cur) { @@ -466,7 +466,7 @@ static void update_resources (struct bus_node *bus_cur, int type, int rangeno) switch (type) { case MEM: - if (bus_cur->firstMem) + if (bus_cur->firstMem) res = bus_cur->firstMem; break; case PFMEM: @@ -583,7 +583,7 @@ static void fix_resources (struct bus_node *bus_cur) } /******************************************************************************* - * This routine adds a resource to the list of resources to the appropriate bus + * This routine adds a resource to the list of resources to the appropriate bus * based on their resource type and sorted by their starting addresses. It assigns * the ptrs to next and nextRange if needed. * @@ -605,11 +605,11 @@ int ibmphp_add_resource (struct resource_node *res) err ("NULL passed to add\n"); return -ENODEV; } - + bus_cur = find_bus_wprev (res->busno, NULL, 0); - + if (!bus_cur) { - /* didn't find a bus, smth's wrong!!! */ + /* didn't find a bus, something's wrong!!! */ debug ("no bus in the system, either pci_dev's wrong or allocation failed\n"); return -ENODEV; } @@ -648,7 +648,7 @@ int ibmphp_add_resource (struct resource_node *res) if (!range_cur) { switch (res->type) { case IO: - ++bus_cur->needIOUpdate; + ++bus_cur->needIOUpdate; break; case MEM: ++bus_cur->needMemUpdate; @@ -659,13 +659,13 @@ int ibmphp_add_resource (struct resource_node *res) } res->rangeno = -1; } - + debug ("The range is %d\n", res->rangeno); if (!res_start) { /* no first{IO,Mem,Pfmem} on the bus, 1st IO/Mem/Pfmem resource ever */ switch (res->type) { case IO: - bus_cur->firstIO = res; + bus_cur->firstIO = res; break; case MEM: bus_cur->firstMem = res; @@ -673,7 +673,7 @@ int ibmphp_add_resource (struct resource_node *res) case PFMEM: bus_cur->firstPFMem = res; break; - } + } res->next = NULL; res->nextRange = NULL; } else { @@ -770,7 +770,7 @@ int ibmphp_add_resource (struct resource_node *res) * This routine will remove the resource from the list of resources * * Input: io, mem, and/or pfmem resource to be deleted - * Ouput: modified resource list + * Output: modified resource list * 0 or error code ****************************************************************************/ int ibmphp_remove_resource (struct resource_node *res) @@ -825,7 +825,7 @@ int ibmphp_remove_resource (struct resource_node *res) if (!res_cur) { if (res->type == PFMEM) { - /* + /* * case where pfmem might be in the PFMemFromMem list * so will also need to remove the corresponding mem * entry @@ -961,12 +961,12 @@ static struct range_node * find_range (struct bus_node *bus_cur, struct resource } /***************************************************************************** - * This routine will check to make sure the io/mem/pfmem->len that the device asked for + * This routine will check to make sure the io/mem/pfmem->len that the device asked for * can fit w/i our list of available IO/MEM/PFMEM resources. If cannot, returns -EINVAL, * otherwise, returns 0 * * Input: resource - * Ouput: the correct start and end address are inputted into the resource node, + * Output: the correct start and end address are inputted into the resource node, * 0 or -EINVAL *****************************************************************************/ int ibmphp_check_resource (struct resource_node *res, u8 bridge) @@ -996,7 +996,7 @@ int ibmphp_check_resource (struct resource_node *res, u8 bridge) bus_cur = find_bus_wprev (res->busno, NULL, 0); if (!bus_cur) { - /* didn't find a bus, smth's wrong!!! */ + /* didn't find a bus, something's wrong!!! */ debug ("no bus in the system, either pci_dev's wrong or allocation failed\n"); return -EINVAL; } @@ -1066,7 +1066,7 @@ int ibmphp_check_resource (struct resource_node *res, u8 bridge) break; } } - + if (flag && len_cur == res->len) { debug ("but we are not here, right?\n"); res->start = start_cur; @@ -1118,10 +1118,10 @@ int ibmphp_check_resource (struct resource_node *res, u8 bridge) if (res_prev) { if (res_prev->rangeno != res_cur->rangeno) { /* 1st device on this range */ - if ((res_cur->start != range->start) && + if ((res_cur->start != range->start) && ((len_tmp = res_cur->start - 1 - range->start) >= res->len)) { if ((len_tmp < len_cur) || (len_cur == 0)) { - if ((range->start % tmp_divide) == 0) { + if ((range->start % tmp_divide) == 0) { /* just perfect, starting address is divisible by length */ flag = 1; len_cur = len_tmp; @@ -1344,7 +1344,7 @@ int ibmphp_check_resource (struct resource_node *res, u8 bridge) * This routine is called from remove_card if the card contained PPB. * It will remove all the resources on the bus as well as the bus itself * Input: Bus - * Ouput: 0, -ENODEV + * Output: 0, -ENODEV ********************************************************************************/ int ibmphp_remove_bus (struct bus_node *bus, u8 parent_busno) { @@ -1353,7 +1353,7 @@ int ibmphp_remove_bus (struct bus_node *bus, u8 parent_busno) struct bus_node *prev_bus; int rc; - prev_bus = find_bus_wprev (parent_busno, NULL, 0); + prev_bus = find_bus_wprev (parent_busno, NULL, 0); if (!prev_bus) { debug ("something terribly wrong. Cannot find parent bus to the one to remove\n"); @@ -1424,7 +1424,7 @@ int ibmphp_remove_bus (struct bus_node *bus, u8 parent_busno) } /****************************************************************************** - * This routine deletes the ranges from a given bus, and the entries from the + * This routine deletes the ranges from a given bus, and the entries from the * parent's bus in the resources * Input: current bus, previous bus * Output: 0, -EINVAL @@ -1453,7 +1453,7 @@ static int remove_ranges (struct bus_node *bus_cur, struct bus_node *bus_prev) if (bus_cur->noMemRanges) { range_cur = bus_cur->rangeMem; for (i = 0; i < bus_cur->noMemRanges; i++) { - if (ibmphp_find_resource (bus_prev, range_cur->start, &res, MEM) < 0) + if (ibmphp_find_resource (bus_prev, range_cur->start, &res, MEM) < 0) return -EINVAL; ibmphp_remove_resource (res); @@ -1467,7 +1467,7 @@ static int remove_ranges (struct bus_node *bus_cur, struct bus_node *bus_prev) if (bus_cur->noPFMemRanges) { range_cur = bus_cur->rangePFMem; for (i = 0; i < bus_cur->noPFMemRanges; i++) { - if (ibmphp_find_resource (bus_prev, range_cur->start, &res, PFMEM) < 0) + if (ibmphp_find_resource (bus_prev, range_cur->start, &res, PFMEM) < 0) return -EINVAL; ibmphp_remove_resource (res); @@ -1482,7 +1482,7 @@ static int remove_ranges (struct bus_node *bus_cur, struct bus_node *bus_prev) } /* - * find the resource node in the bus + * find the resource node in the bus * Input: Resource needed, start address of the resource, type of resource */ int ibmphp_find_resource (struct bus_node *bus, u32 start_address, struct resource_node **res, int flag) @@ -1512,7 +1512,7 @@ int ibmphp_find_resource (struct bus_node *bus, u32 start_address, struct resour err ("wrong type of flag\n"); return -EINVAL; } - + while (res_cur) { if (res_cur->start == start_address) { *res = res_cur; @@ -1718,7 +1718,7 @@ static int __init once_over (void) } /* end for pfmem */ } /* end if */ } /* end list_for_each bus */ - return 0; + return 0; } int ibmphp_add_pfmem_from_mem (struct resource_node *pfmem) @@ -1760,9 +1760,9 @@ static struct bus_node *find_bus_wprev (u8 bus_number, struct bus_node **prev, u list_for_each (tmp, &gbuses) { tmp_prev = tmp->prev; bus_cur = list_entry (tmp, struct bus_node, bus_list); - if (flag) + if (flag) *prev = list_entry (tmp_prev, struct bus_node, bus_list); - if (bus_cur->busno == bus_number) + if (bus_cur->busno == bus_number) return bus_cur; } @@ -1776,7 +1776,7 @@ void ibmphp_print_test (void) struct range_node *range; struct resource_node *res; struct list_head *tmp; - + debug_pci ("*****************START**********************\n"); if ((!list_empty(&gbuses)) && flags) { @@ -1906,7 +1906,7 @@ static int range_exists_already (struct range_node * range, struct bus_node * bu return 1; range_cur = range_cur->next; } - + return 0; } @@ -1920,7 +1920,7 @@ static int range_exists_already (struct range_node * range, struct bus_node * bu * Returns: none * Note: this function doesn't take into account IO restrictions etc, * so will only work for bridges with no video/ISA devices behind them It - * also will not work for onboard PPB's that can have more than 1 *bus + * also will not work for onboard PPBs that can have more than 1 *bus * behind them All these are TO DO. * Also need to add more error checkings... (from fnc returns etc) */ @@ -1963,7 +1963,7 @@ static int __init update_bridge_ranges (struct bus_node **bus) case PCI_HEADER_TYPE_BRIDGE: function = 0x8; case PCI_HEADER_TYPE_MULTIBRIDGE: - /* We assume here that only 1 bus behind the bridge + /* We assume here that only 1 bus behind the bridge TO DO: add functionality for several: temp = secondary; while (temp < subordinate) { @@ -1972,7 +1972,7 @@ static int __init update_bridge_ranges (struct bus_node **bus) } */ pci_bus_read_config_byte (ibmphp_pci_bus, devfn, PCI_SECONDARY_BUS, &sec_busno); - bus_sec = find_bus_wprev (sec_busno, NULL, 0); + bus_sec = find_bus_wprev (sec_busno, NULL, 0); /* this bus structure doesn't exist yet, PPB was configured during previous loading of ibmphp */ if (!bus_sec) { bus_sec = alloc_error_bus (NULL, sec_busno, 1); @@ -2028,7 +2028,7 @@ static int __init update_bridge_ranges (struct bus_node **bus) io->len = io->end - io->start + 1; ibmphp_add_resource (io); } - } + } pci_bus_read_config_word (ibmphp_pci_bus, devfn, PCI_MEMORY_BASE, &start_mem_address); pci_bus_read_config_word (ibmphp_pci_bus, devfn, PCI_MEMORY_LIMIT, &end_mem_address); diff --git a/drivers/pci/hotplug/pci_hotplug_core.c b/drivers/pci/hotplug/pci_hotplug_core.c index ec20f74..cfa92a9 100644 --- a/drivers/pci/hotplug/pci_hotplug_core.c +++ b/drivers/pci/hotplug/pci_hotplug_core.c @@ -131,7 +131,7 @@ static ssize_t power_write_file(struct pci_slot *pci_slot, const char *buf, } module_put(slot->ops->owner); -exit: +exit: if (retval) return retval; return count; @@ -177,7 +177,7 @@ static ssize_t attention_write_file(struct pci_slot *slot, const char *buf, retval = ops->set_attention_status(slot->hotplug, attention); module_put(ops->owner); -exit: +exit: if (retval) return retval; return count; @@ -247,7 +247,7 @@ static ssize_t test_write_file(struct pci_slot *pci_slot, const char *buf, retval = slot->ops->hardware_test(slot, test); module_put(slot->ops->owner); -exit: +exit: if (retval) return retval; return count; @@ -512,7 +512,7 @@ int pci_hp_deregister(struct hotplug_slot *hotplug) * @hotplug: pointer to the slot whose info has changed * @info: pointer to the info copy into the slot's info structure * - * @slot must have been registered with the pci + * @slot must have been registered with the pci * hotplug subsystem previously with a call to pci_hp_register(). * * Returns 0 if successful, anything else for an error. diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h index 541bbe6..21e865d 100644 --- a/drivers/pci/hotplug/pciehp.h +++ b/drivers/pci/hotplug/pciehp.h @@ -180,5 +180,5 @@ static inline int pciehp_acpi_slot_detection_check(struct pci_dev *dev) { return 0; } -#endif /* CONFIG_ACPI */ +#endif /* CONFIG_ACPI */ #endif /* _PCIEHP_H */ diff --git a/drivers/pci/hotplug/pciehp_acpi.c b/drivers/pci/hotplug/pciehp_acpi.c index cff7cad..eddddd4 100644 --- a/drivers/pci/hotplug/pciehp_acpi.c +++ b/drivers/pci/hotplug/pciehp_acpi.c @@ -78,7 +78,7 @@ static int __initdata dup_slot_id; static int __initdata acpi_slot_detected; static struct list_head __initdata dummy_slots = LIST_HEAD_INIT(dummy_slots); -/* Dummy driver for dumplicate name detection */ +/* Dummy driver for duplicate name detection */ static int __init dummy_probe(struct pcie_device *dev) { u32 slot_cap; diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c index f4a18f5..bbd48bb 100644 --- a/drivers/pci/hotplug/pciehp_core.c +++ b/drivers/pci/hotplug/pciehp_core.c @@ -351,8 +351,8 @@ static int __init pcied_init(void) pciehp_firmware_init(); retval = pcie_port_service_register(&hpdriver_portdrv); - dbg("pcie_port_service_register = %d\n", retval); - info(DRIVER_DESC " version: " DRIVER_VERSION "\n"); + dbg("pcie_port_service_register = %d\n", retval); + info(DRIVER_DESC " version: " DRIVER_VERSION "\n"); if (retval) dbg("Failure to register service\n"); diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index 51f56ef..3eea3fd 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -92,7 +92,7 @@ static void start_int_poll_timer(struct controller *ctrl, int sec) { /* Clamp to sane value */ if ((sec <= 0) || (sec > 60)) - sec = 2; + sec = 2; ctrl->poll_timer.function = &int_poll_timeout; ctrl->poll_timer.data = (unsigned long)ctrl; @@ -194,7 +194,7 @@ static int pcie_write_cmd(struct controller *ctrl, u16 cmd, u16 mask) ctrl_dbg(ctrl, "CMD_COMPLETED not clear after 1 sec\n"); } else if (!NO_CMD_CMPL(ctrl)) { /* - * This controller semms to notify of command completed + * This controller seems to notify of command completed * event even though it supports none of power * controller, attention led, power led and EMI. */ @@ -926,7 +926,7 @@ struct controller *pcie_init(struct pcie_device *dev) if (pciehp_writew(ctrl, PCI_EXP_SLTSTA, 0x1f)) goto abort_ctrl; - /* Disable sotfware notification */ + /* Disable software notification */ pcie_disable_notification(ctrl); ctrl_info(ctrl, "HPC vendor_id %x device_id %x ss_vid %x ss_did %x\n", diff --git a/drivers/pci/hotplug/pcihp_skeleton.c b/drivers/pci/hotplug/pcihp_skeleton.c index 1f00b93..ac69094 100644 --- a/drivers/pci/hotplug/pcihp_skeleton.c +++ b/drivers/pci/hotplug/pcihp_skeleton.c @@ -52,7 +52,7 @@ static LIST_HEAD(slot_list); do { \ if (debug) \ printk (KERN_DEBUG "%s: " format "\n", \ - MY_NAME , ## arg); \ + MY_NAME , ## arg); \ } while (0) #define err(format, arg...) printk(KERN_ERR "%s: " format "\n", MY_NAME , ## arg) #define info(format, arg...) printk(KERN_INFO "%s: " format "\n", MY_NAME , ## arg) @@ -287,7 +287,7 @@ static int __init init_slots(void) hotplug_slot->release = &release_slot; make_slot_name(slot); hotplug_slot->ops = &skel_hotplug_slot_ops; - + /* * Initialize the slot info structure with some known * good values. @@ -296,7 +296,7 @@ static int __init init_slots(void) get_attention_status(hotplug_slot, &info->attention_status); get_latch_status(hotplug_slot, &info->latch_status); get_adapter_status(hotplug_slot, &info->adapter_status); - + dbg("registering slot %d\n", i); retval = pci_hp_register(slot->hotplug_slot); if (retval) { @@ -336,7 +336,7 @@ static void __exit cleanup_slots(void) pci_hp_deregister(slot->hotplug_slot); } } - + static int __init pcihp_skel_init(void) { int retval; diff --git a/drivers/pci/hotplug/rpadlpar_core.c b/drivers/pci/hotplug/rpadlpar_core.c index bb7af78..e9c044d 100644 --- a/drivers/pci/hotplug/rpadlpar_core.c +++ b/drivers/pci/hotplug/rpadlpar_core.c @@ -217,7 +217,7 @@ static int dlpar_remove_phb(char *drc_name, struct device_node *dn) if (!pcibios_find_pci_bus(dn)) return -EINVAL; - /* If pci slot is hotplugable, use hotplug to remove it */ + /* If pci slot is hotpluggable, use hotplug to remove it */ slot = find_php_slot(dn); if (slot && rpaphp_deregister_slot(slot)) { printk(KERN_ERR "%s: unable to remove hotplug slot %s\n", diff --git a/drivers/pci/hotplug/rpaphp.h b/drivers/pci/hotplug/rpaphp.h index 3135856..b2593e8 100644 --- a/drivers/pci/hotplug/rpaphp.h +++ b/drivers/pci/hotplug/rpaphp.h @@ -49,9 +49,9 @@ extern bool rpaphp_debug; #define dbg(format, arg...) \ do { \ - if (rpaphp_debug) \ + if (rpaphp_debug) \ printk(KERN_DEBUG "%s: " format, \ - MY_NAME , ## arg); \ + MY_NAME , ## arg); \ } while (0) #define err(format, arg...) printk(KERN_ERR "%s: " format, MY_NAME , ## arg) #define info(format, arg...) printk(KERN_INFO "%s: " format, MY_NAME , ## arg) @@ -99,5 +99,5 @@ void dealloc_slot_struct(struct slot *slot); struct slot *alloc_slot_struct(struct device_node *dn, int drc_index, char *drc_name, int power_domain); int rpaphp_register_slot(struct slot *slot); int rpaphp_deregister_slot(struct slot *slot); - + #endif /* _PPC64PHP_H */ diff --git a/drivers/pci/hotplug/rpaphp_core.c b/drivers/pci/hotplug/rpaphp_core.c index 127d6e6..b7fc5c9 100644 --- a/drivers/pci/hotplug/rpaphp_core.c +++ b/drivers/pci/hotplug/rpaphp_core.c @@ -226,7 +226,7 @@ int rpaphp_get_drc_props(struct device_node *dn, int *drc_index, for (i = 0; i < indexes[0]; i++) { if ((unsigned int) indexes[i + 1] == *my_index) { if (drc_name) - *drc_name = name_tmp; + *drc_name = name_tmp; if (drc_type) *drc_type = type_tmp; if (drc_index) @@ -289,7 +289,7 @@ static int is_php_dn(struct device_node *dn, const int **indexes, * rpaphp_add_slot -- declare a hotplug slot to the hotplug subsystem. * @dn: device node of slot * - * This subroutine will register a hotplugable slot with the + * This subroutine will register a hotpluggable slot with the * PCI hotplug infrastructure. This routine is typically called * during boot time, if the hotplug slots are present at boot time, * or is called later, by the dlpar add code, if the slot is @@ -328,7 +328,7 @@ int rpaphp_add_slot(struct device_node *dn) return -ENOMEM; slot->type = simple_strtoul(type, NULL, 10); - + dbg("Found drc-index:0x%x drc-name:%s drc-type:%s\n", indexes[i + 1], name, type); @@ -356,7 +356,7 @@ static void __exit cleanup_slots(void) /* * Unregister all of our slots with the pci_hotplug subsystem, * and free up all memory that we had allocated. - * memory will be freed in release_slot callback. + * memory will be freed in release_slot callback. */ list_for_each_safe(tmp, n, &rpaphp_slot_head) { diff --git a/drivers/pci/hotplug/rpaphp_pci.c b/drivers/pci/hotplug/rpaphp_pci.c index 513e1e2..9243f3e7 100644 --- a/drivers/pci/hotplug/rpaphp_pci.c +++ b/drivers/pci/hotplug/rpaphp_pci.c @@ -44,7 +44,7 @@ int rpaphp_get_sensor_state(struct slot *slot, int *state) dbg("%s: slot must be power up to get sensor-state\n", __func__); - /* some slots have to be powered up + /* some slots have to be powered up * before get-sensor will succeed. */ rc = rtas_set_power_level(slot->power_domain, POWER_ON, @@ -133,4 +133,3 @@ int rpaphp_enable_slot(struct slot *slot) return 0; } - diff --git a/drivers/pci/hotplug/rpaphp_slot.c b/drivers/pci/hotplug/rpaphp_slot.c index b283bbe..a6082cc 100644 --- a/drivers/pci/hotplug/rpaphp_slot.c +++ b/drivers/pci/hotplug/rpaphp_slot.c @@ -1,5 +1,5 @@ /* - * RPA Virtual I/O device functions + * RPA Virtual I/O device functions * Copyright (C) 2004 Linda Xie <lxie@us.ibm.com> * * All rights reserved. @@ -51,27 +51,27 @@ struct slot *alloc_slot_struct(struct device_node *dn, int drc_index, char *drc_name, int power_domain) { struct slot *slot; - + slot = kzalloc(sizeof(struct slot), GFP_KERNEL); if (!slot) goto error_nomem; slot->hotplug_slot = kzalloc(sizeof(struct hotplug_slot), GFP_KERNEL); if (!slot->hotplug_slot) - goto error_slot; + goto error_slot; slot->hotplug_slot->info = kzalloc(sizeof(struct hotplug_slot_info), GFP_KERNEL); if (!slot->hotplug_slot->info) goto error_hpslot; slot->name = kstrdup(drc_name, GFP_KERNEL); if (!slot->name) - goto error_info; + goto error_info; slot->dn = dn; slot->index = drc_index; slot->power_domain = power_domain; slot->hotplug_slot->private = slot; slot->hotplug_slot->ops = &rpaphp_hotplug_slot_ops; slot->hotplug_slot->release = &rpaphp_release_slot; - + return (slot); error_info: @@ -91,7 +91,7 @@ static int is_registered(struct slot *slot) list_for_each_entry(tmp_slot, &rpaphp_slot_head, rpaphp_slot_list) { if (!strcmp(tmp_slot->name, slot->name)) return 1; - } + } return 0; } @@ -104,7 +104,7 @@ int rpaphp_deregister_slot(struct slot *slot) __func__, slot->name); list_del(&slot->rpaphp_slot_list); - + retval = pci_hp_deregister(php_slot); if (retval) err("Problem unregistering a slot %s\n", slot->name); @@ -120,7 +120,7 @@ int rpaphp_register_slot(struct slot *slot) int retval; int slotno; - dbg("%s registering slot:path[%s] index[%x], name[%s] pdomain[%x] type[%d]\n", + dbg("%s registering slot:path[%s] index[%x], name[%s] pdomain[%x] type[%d]\n", __func__, slot->dn->full_name, slot->index, slot->name, slot->power_domain, slot->type); @@ -128,7 +128,7 @@ int rpaphp_register_slot(struct slot *slot) if (is_registered(slot)) { err("rpaphp_register_slot: slot[%s] is already registered\n", slot->name); return -EAGAIN; - } + } if (slot->dn->child) slotno = PCI_SLOT(PCI_DN(slot->dn->child)->devfn); @@ -145,4 +145,3 @@ int rpaphp_register_slot(struct slot *slot) info("Slot [%s] registered\n", slot->name); return 0; } - diff --git a/drivers/pci/hotplug/shpchp.h b/drivers/pci/hotplug/shpchp.h index d876e4b..6152909 100644 --- a/drivers/pci/hotplug/shpchp.h +++ b/drivers/pci/hotplug/shpchp.h @@ -216,13 +216,13 @@ struct ctrl_reg { /* offsets to the controller registers based on the above structure layout */ enum ctrl_offsets { - BASE_OFFSET = offsetof(struct ctrl_reg, base_offset), - SLOT_AVAIL1 = offsetof(struct ctrl_reg, slot_avail1), + BASE_OFFSET = offsetof(struct ctrl_reg, base_offset), + SLOT_AVAIL1 = offsetof(struct ctrl_reg, slot_avail1), SLOT_AVAIL2 = offsetof(struct ctrl_reg, slot_avail2), - SLOT_CONFIG = offsetof(struct ctrl_reg, slot_config), + SLOT_CONFIG = offsetof(struct ctrl_reg, slot_config), SEC_BUS_CONFIG = offsetof(struct ctrl_reg, sec_bus_config), MSI_CTRL = offsetof(struct ctrl_reg, msi_ctrl), - PROG_INTERFACE = offsetof(struct ctrl_reg, prog_interface), + PROG_INTERFACE = offsetof(struct ctrl_reg, prog_interface), CMD = offsetof(struct ctrl_reg, cmd), CMD_STATUS = offsetof(struct ctrl_reg, cmd_status), INTR_LOC = offsetof(struct ctrl_reg, intr_loc), diff --git a/drivers/pci/hotplug/shpchp_core.c b/drivers/pci/hotplug/shpchp_core.c index d3f757d..faf13ab 100644 --- a/drivers/pci/hotplug/shpchp_core.c +++ b/drivers/pci/hotplug/shpchp_core.c @@ -143,11 +143,11 @@ static int init_slots(struct controller *ctrl) snprintf(name, SLOT_NAME_SIZE, "%d", slot->number); hotplug_slot->ops = &shpchp_hotplug_slot_ops; - ctrl_dbg(ctrl, "Registering domain:bus:dev=%04x:%02x:%02x " - "hp_slot=%x sun=%x slot_device_offset=%x\n", - pci_domain_nr(ctrl->pci_dev->subordinate), - slot->bus, slot->device, slot->hp_slot, slot->number, - ctrl->slot_device_offset); + ctrl_dbg(ctrl, "Registering domain:bus:dev=%04x:%02x:%02x " + "hp_slot=%x sun=%x slot_device_offset=%x\n", + pci_domain_nr(ctrl->pci_dev->subordinate), + slot->bus, slot->device, slot->hp_slot, slot->number, + ctrl->slot_device_offset); retval = pci_hp_register(slot->hotplug_slot, ctrl->pci_dev->subordinate, slot->device, name); if (retval) { diff --git a/drivers/pci/hotplug/shpchp_hpc.c b/drivers/pci/hotplug/shpchp_hpc.c index 75ba231..2d7f474 100644 --- a/drivers/pci/hotplug/shpchp_hpc.c +++ b/drivers/pci/hotplug/shpchp_hpc.c @@ -116,7 +116,7 @@ #define SLOT_REG_RSVDZ_MASK ((1 << 15) | (7 << 21)) /* - * SHPC Command Code definitnions + * SHPC Command Code definitions * * Slot Operation 00h - 3Fh * Set Bus Segment Speed/Mode A 40h - 47h diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c index 21a7182..1fe2d6f 100644 --- a/drivers/pci/iov.c +++ b/drivers/pci/iov.c @@ -610,7 +610,7 @@ resource_size_t pci_sriov_resource_alignment(struct pci_dev *dev, int resno) struct resource tmp; enum pci_bar_type type; int reg = pci_iov_resource_bar(dev, resno, &type); - + if (!reg) return 0; diff --git a/drivers/pci/irq.c b/drivers/pci/irq.c index b008cf8..6684f15 100644 --- a/drivers/pci/irq.c +++ b/drivers/pci/irq.c @@ -25,7 +25,7 @@ static void pci_note_irq_problem(struct pci_dev *pdev, const char *reason) /** * pci_lost_interrupt - reports a lost PCI interrupt * @pdev: device whose interrupt is lost - * + * * The primary function of this routine is to report a lost interrupt * in a standard way which users can recognise (instead of blaming the * driver). diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 5e63645..3fcd67a 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -784,7 +784,7 @@ error: * @nvec: how many MSIs have been requested ? * @type: are we checking for MSI or MSI-X ? * - * Look at global flags, the device itself, and its parent busses + * Look at global flags, the device itself, and its parent buses * to determine if MSI/-X are supported for the device. If MSI/-X is * supported return 0, else return an error code. **/ diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index f166126..577074e 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -141,7 +141,7 @@ phys_addr_t acpi_pci_root_get_mcfg_addr(acpi_handle handle) * if (_PRW at S-state x) * choose from highest power _SxD to lowest power _SxW * else // no _PRW at S-state x - * choose highest power _SxD or any lower power + * choose highest power _SxD or any lower power */ static pci_power_t acpi_pci_choose_state(struct pci_dev *pdev) diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 4548535..9042fdb 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -312,7 +312,7 @@ static int pci_call_probe(struct pci_driver *drv, struct pci_dev *dev, * __pci_device_probe - check if a driver wants to claim a specific PCI device * @drv: driver to call to check if it wants the PCI device * @pci_dev: PCI device being probed - * + * * returns 0 on success, else error. * side-effect: pci_dev->driver is set to drv when drv claims pci_dev. */ @@ -378,7 +378,7 @@ static int pci_device_remove(struct device * dev) * We would love to complain here if pci_dev->is_enabled is set, that * the driver should have called pci_disable_device(), but the * unfortunate fact is there are too many odd BIOS and bridge setups - * that don't like drivers doing that all of the time. + * that don't like drivers doing that all of the time. * Oh well, we can dream of sane hardware when we sleep, no matter how * horrible the crap we have to deal with is when we are awake... */ @@ -1156,10 +1156,10 @@ static const struct dev_pm_ops pci_dev_pm_ops = { * @drv: the driver structure to register * @owner: owner module of drv * @mod_name: module name string - * + * * Adds the driver structure to the list of registered drivers. - * Returns a negative value on error, otherwise 0. - * If no error occurred, the driver remains registered even if + * Returns a negative value on error, otherwise 0. + * If no error occurred, the driver remains registered even if * no device was claimed during registration. */ int __pci_register_driver(struct pci_driver *drv, struct module *owner, @@ -1181,7 +1181,7 @@ int __pci_register_driver(struct pci_driver *drv, struct module *owner, /** * pci_unregister_driver - unregister a pci driver * @drv: the driver structure to unregister - * + * * Deletes the driver structure from the list of registered PCI drivers, * gives it a chance to clean up by calling its remove() function for * each device it was responsible for, and marks those devices as @@ -1203,7 +1203,7 @@ static struct pci_driver pci_compat_driver = { * pci_dev_driver - get the pci_driver of a device * @dev: the device to query * - * Returns the appropriate pci_driver structure or %NULL if there is no + * Returns the appropriate pci_driver structure or %NULL if there is no * registered driver for the device. */ struct pci_driver * @@ -1224,7 +1224,7 @@ pci_dev_driver(const struct pci_dev *dev) * pci_bus_match - Tell if a PCI device structure has a matching PCI device id structure * @dev: the PCI device structure to match against * @drv: the device driver to search for matching PCI device id structures - * + * * Used by a driver to check whether a PCI device present in the * system is in its list of supported devices. Returns the matching * pci_device_id structure or %NULL if there is no match. diff --git a/drivers/pci/pci-stub.c b/drivers/pci/pci-stub.c index 6e47c51..2ff7750 100644 --- a/drivers/pci/pci-stub.c +++ b/drivers/pci/pci-stub.c @@ -2,13 +2,13 @@ * * Copyright (C) 2008 Red Hat, Inc. * Author: - * Chris Wright + * Chris Wright * * This work is licensed under the terms of the GNU GPL, version 2. * * Usage is simple, allocate a new id to the stub driver and bind the * device to it. For example: - * + * * # echo "8086 10f5" > /sys/bus/pci/drivers/pci-stub/new_id * # echo -n 0000:00:19.0 > /sys/bus/pci/drivers/e1000e/unbind * # echo -n 0000:00:19.0 > /sys/bus/pci/drivers/pci-stub/bind diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index 2aaa83c..c91e6c1 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -10,7 +10,7 @@ * * File attributes for PCI devices * - * Modeled after usb's driverfs.c + * Modeled after usb's driverfs.c * */ @@ -270,13 +270,17 @@ msi_bus_store(struct device *dev, struct device_attribute *attr, if (kstrtoul(buf, 0, &val) < 0) return -EINVAL; - /* bad things may happen if the no_msi flag is changed - * while some drivers are loaded */ + /* + * Bad things may happen if the no_msi flag is changed + * while drivers are loaded. + */ if (!capable(CAP_SYS_ADMIN)) return -EPERM; - /* Maybe pci devices without subordinate busses shouldn't even have this - * attribute in the first place? */ + /* + * Maybe devices without subordinate buses shouldn't have this + * attribute in the first place? + */ if (!pdev->subordinate) return count; @@ -670,7 +674,7 @@ pci_write_config(struct file* filp, struct kobject *kobj, size = dev->cfg_size - off; count = size; } - + pci_config_pm_runtime_get(dev); if ((off & 1) && size) { @@ -678,7 +682,7 @@ pci_write_config(struct file* filp, struct kobject *kobj, off++; size--; } - + if ((off & 3) && size > 2) { u16 val = data[off - init_off]; val |= (u16) data[off - init_off + 1] << 8; @@ -696,7 +700,7 @@ pci_write_config(struct file* filp, struct kobject *kobj, off += 4; size -= 4; } - + if (size >= 2) { u16 val = data[off - init_off]; val |= (u16) data[off - init_off + 1] << 8; @@ -1229,21 +1233,21 @@ pci_read_rom(struct file *filp, struct kobject *kobj, if (!pdev->rom_attr_enabled) return -EINVAL; - + rom = pci_map_rom(pdev, &size); /* size starts out as PCI window size */ if (!rom || !size) return -EIO; - + if (off >= size) count = 0; else { if (off + count > size) count = size - off; - + memcpy_fromio(buf, rom + off, count); } pci_unmap_rom(pdev, rom); - + return count; } diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index b127fbda..33120d1 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -198,7 +198,7 @@ static int __pci_bus_find_cap_start(struct pci_bus *bus, } /** - * pci_find_capability - query for devices' capabilities + * pci_find_capability - query for devices' capabilities * @dev: PCI device to query * @cap: capability code * @@ -207,12 +207,12 @@ static int __pci_bus_find_cap_start(struct pci_bus *bus, * device's PCI configuration space or 0 in case the device does not * support it. Possible values for @cap: * - * %PCI_CAP_ID_PM Power Management - * %PCI_CAP_ID_AGP Accelerated Graphics Port - * %PCI_CAP_ID_VPD Vital Product Data - * %PCI_CAP_ID_SLOTID Slot Identification + * %PCI_CAP_ID_PM Power Management + * %PCI_CAP_ID_AGP Accelerated Graphics Port + * %PCI_CAP_ID_VPD Vital Product Data + * %PCI_CAP_ID_SLOTID Slot Identification * %PCI_CAP_ID_MSI Message Signalled Interrupts - * %PCI_CAP_ID_CHSWP CompactPCI HotSwap + * %PCI_CAP_ID_CHSWP CompactPCI HotSwap * %PCI_CAP_ID_PCIX PCI-X * %PCI_CAP_ID_EXP PCI Express */ @@ -228,13 +228,13 @@ int pci_find_capability(struct pci_dev *dev, int cap) } /** - * pci_bus_find_capability - query for devices' capabilities + * pci_bus_find_capability - query for devices' capabilities * @bus: the PCI bus to query * @devfn: PCI device to query * @cap: capability code * * Like pci_find_capability() but works for pci devices that do not have a - * pci_dev structure set up yet. + * pci_dev structure set up yet. * * Returns the address of the requested capability structure within the * device's PCI configuration space or 0 in case the device does not @@ -515,7 +515,7 @@ static int pci_raw_set_power_state(struct pci_dev *dev, pci_power_t state) return -EINVAL; /* Validate current state: - * Can enter D0 from any state, but if we can only go deeper + * Can enter D0 from any state, but if we can only go deeper * to sleep if we're already in a low power state */ if (state != PCI_D0 && dev->current_state <= PCI_D3cold @@ -998,7 +998,7 @@ static void pci_restore_config_space(struct pci_dev *pdev) } } -/** +/** * pci_restore_state - Restore the saved state of a PCI device * @dev: - PCI device that we're dealing with */ @@ -1030,7 +1030,7 @@ struct pci_saved_state { * the device saved state. * @dev: PCI device that we're dealing with * - * Rerturn NULL if no state or error. + * Return NULL if no state or error. */ struct pci_saved_state *pci_store_saved_state(struct pci_dev *dev) { @@ -1880,7 +1880,7 @@ int pci_finish_runtime_suspend(struct pci_dev *dev) * pci_dev_run_wake - Check if device can generate run-time wake-up events. * @dev: Device to check. * - * Return true if the device itself is cabable of generating wake-up events + * Return true if the device itself is capable of generating wake-up events * (through the platform or using the native PCIe PME) or if the device supports * PME and one of its upstream bridges can generate wake-up events. */ @@ -2447,7 +2447,7 @@ bool pci_acs_enabled(struct pci_dev *pdev, u16 acs_flags) switch (pci_pcie_type(pdev)) { /* * PCI/X-to-PCIe bridges are not specifically mentioned by the spec, - * but since their primary inteface is PCI/X, we conservatively + * but since their primary interface is PCI/X, we conservatively * handle them as we would a non-PCIe device. */ case PCI_EXP_TYPE_PCIE_BRIDGE: @@ -2471,7 +2471,7 @@ bool pci_acs_enabled(struct pci_dev *pdev, u16 acs_flags) /* * PCIe 3.0, 6.12.1.2 specifies ACS capabilities that should be * implemented by the remaining PCIe types to indicate peer-to-peer - * capabilities, but only when they are part of a multifunciton + * capabilities, but only when they are part of a multifunction * device. The footnote for section 6.12 indicates the specific * PCIe types included here. */ @@ -2486,7 +2486,7 @@ bool pci_acs_enabled(struct pci_dev *pdev, u16 acs_flags) } /* - * PCIe 3.0, 6.12.1.3 specifies no ACS capabilties are applicable + * PCIe 3.0, 6.12.1.3 specifies no ACS capabilities are applicable * to single function devices with the exception of downstream ports. */ return true; @@ -2622,7 +2622,7 @@ void pci_release_region(struct pci_dev *pdev, int bar) * * If @exclusive is set, then the region is marked so that userspace * is explicitly not allowed to map the resource via /dev/mem or - * sysfs MMIO access. + * sysfs MMIO access. * * Returns 0 on success, or %EBUSY on error. A warning * message is also printed on failure. @@ -2634,7 +2634,7 @@ static int __pci_request_region(struct pci_dev *pdev, int bar, const char *res_n if (pci_resource_len(pdev, bar) == 0) return 0; - + if (pci_resource_flags(pdev, bar) & IORESOURCE_IO) { if (!request_region(pci_resource_start(pdev, bar), pci_resource_len(pdev, bar), res_name)) @@ -2694,7 +2694,7 @@ int pci_request_region(struct pci_dev *pdev, int bar, const char *res_name) * * The key difference that _exclusive makes it that userspace is * explicitly not allowed to map the resource via /dev/mem or - * sysfs. + * sysfs. */ int pci_request_region_exclusive(struct pci_dev *pdev, int bar, const char *res_name) { @@ -2799,7 +2799,7 @@ int pci_request_regions(struct pci_dev *pdev, const char *res_name) * successfully. * * pci_request_regions_exclusive() will mark the region so that - * /dev/mem and the sysfs MMIO access will not be allowed. + * /dev/mem and the sysfs MMIO access will not be allowed. * * Returns 0 on success, or %EBUSY on error. A warning * message is also printed on failure. @@ -2967,7 +2967,7 @@ pci_set_mwi(struct pci_dev *dev) cmd |= PCI_COMMAND_INVALIDATE; pci_write_config_word(dev, PCI_COMMAND, cmd); } - + return 0; } @@ -3292,7 +3292,7 @@ clear: * * NOTE: This causes the caller to sleep for twice the device power transition * cooldown period, which for the D0->D3hot and D3hot->D0 transitions is 10 ms - * by devault (i.e. unless the @dev's d3_delay field has a different value). + * by default (i.e. unless the @dev's d3_delay field has a different value). * Moreover, only devices in D0 can be reset by this function. */ static int pci_pm_reset(struct pci_dev *dev, int probe) @@ -3341,7 +3341,7 @@ void pci_reset_bridge_secondary_bus(struct pci_dev *dev) pci_write_config_word(dev, PCI_BRIDGE_CONTROL, ctrl); /* * PCI spec v3.0 7.6.4.2 requires minimum Trst of 1ms. Double - * this to 2ms to ensure that we meet the minium requirement. + * this to 2ms to ensure that we meet the minimum requirement. */ msleep(2); @@ -3998,7 +3998,7 @@ int pcie_set_mps(struct pci_dev *dev, int mps) return -EINVAL; v = ffs(mps) - 8; - if (v > dev->pcie_mpss) + if (v > dev->pcie_mpss) return -EINVAL; v <<= 5; diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c index 6b3a958..b2c8881 100644 --- a/drivers/pci/pcie/aer/aerdrv_core.c +++ b/drivers/pci/pcie/aer/aerdrv_core.c @@ -525,7 +525,7 @@ static void handle_error_source(struct pcie_device *aerdev, if (info->severity == AER_CORRECTABLE) { /* - * Correctable error does not need software intevention. + * Correctable error does not need software intervention. * No need to go through error recovery process. */ pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR); diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index 403a443..f1272dc 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -548,7 +548,7 @@ static struct pcie_link_state *alloc_pcie_link_state(struct pci_dev *pdev) /* * pcie_aspm_init_link_state: Initiate PCI express link state. - * It is called after the pcie and its children devices are scaned. + * It is called after the pcie and its children devices are scanned. * @pdev: the root port or switch downstream port */ void pcie_aspm_init_link_state(struct pci_dev *pdev) diff --git a/drivers/pci/pcie/pme.c b/drivers/pci/pcie/pme.c index e56e594..bbc3bdd 100644 --- a/drivers/pci/pcie/pme.c +++ b/drivers/pci/pcie/pme.c @@ -419,8 +419,8 @@ static void pcie_pme_remove(struct pcie_device *srv) static struct pcie_port_service_driver pcie_pme_driver = { .name = "pcie_pme", - .port_type = PCI_EXP_TYPE_ROOT_PORT, - .service = PCIE_PORT_SERVICE_PME, + .port_type = PCI_EXP_TYPE_ROOT_PORT, + .service = PCIE_PORT_SERVICE_PME, .probe = pcie_pme_probe, .suspend = pcie_pme_suspend, diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h index d2eb80a..d525548 100644 --- a/drivers/pci/pcie/portdrv.h +++ b/drivers/pci/pcie/portdrv.h @@ -14,7 +14,7 @@ #define PCIE_PORT_DEVICE_MAXSERVICES 4 /* * According to the PCI Express Base Specification 2.0, the indices of - * the MSI-X table entires used by port services must not exceed 31 + * the MSI-X table entries used by port services must not exceed 31 */ #define PCIE_PORT_MAX_MSIX_ENTRIES 32 diff --git a/drivers/pci/pcie/portdrv_bus.c b/drivers/pci/pcie/portdrv_bus.c index 67be55a..87e79a6 100644 --- a/drivers/pci/pcie/portdrv_bus.c +++ b/drivers/pci/pcie/portdrv_bus.c @@ -18,8 +18,8 @@ static int pcie_port_bus_match(struct device *dev, struct device_driver *drv); struct bus_type pcie_port_bus_type = { - .name = "pci_express", - .match = pcie_port_bus_match, + .name = "pci_express", + .match = pcie_port_bus_match, }; EXPORT_SYMBOL_GPL(pcie_port_bus_type); diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c index 08d131f..0b6e766 100644 --- a/drivers/pci/pcie/portdrv_core.c +++ b/drivers/pci/pcie/portdrv_core.c @@ -46,7 +46,7 @@ static void release_pcie_device(struct device *dev) * pcie_port_msix_add_entry - add entry to given array of MSI-X entries * @entries: Array of MSI-X entries * @new_entry: Index of the entry to add to the array - * @nr_entries: Number of entries aleady in the array + * @nr_entries: Number of entries already in the array * * Return value: Position of the added entry in the array */ diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c index 696caed..0d8fdc4 100644 --- a/drivers/pci/pcie/portdrv_pci.c +++ b/drivers/pci/pcie/portdrv_pci.c @@ -223,7 +223,6 @@ static int pcie_portdrv_probe(struct pci_dev *dev, static void pcie_portdrv_remove(struct pci_dev *dev) { pcie_port_device_remove(dev); - pci_disable_device(dev); } static int error_detected_iter(struct device *device, void *data) @@ -390,9 +389,9 @@ static struct pci_driver pcie_portdriver = { .probe = pcie_portdrv_probe, .remove = pcie_portdrv_remove, - .err_handler = &pcie_portdrv_err_handler, + .err_handler = &pcie_portdrv_err_handler, - .driver.pm = PCIE_PORTDRV_PM_OPS, + .driver.pm = PCIE_PORTDRV_PM_OPS, }; static int __init dmi_pcie_pme_disable_msi(const struct dmi_system_id *d) @@ -412,7 +411,7 @@ static struct dmi_system_id __initdata pcie_portdrv_dmi_table[] = { .ident = "MSI Wind U-100", .matches = { DMI_MATCH(DMI_SYS_VENDOR, - "MICRO-STAR INTERNATIONAL CO., LTD"), + "MICRO-STAR INTERNATIONAL CO., LTD"), DMI_MATCH(DMI_PRODUCT_NAME, "U-100"), }, }, diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 5e14f5a..38e403d 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -582,7 +582,7 @@ static enum pci_bus_speed agp_speed(int agp3, int agpstat) index = 1; else goto out; - + if (agp3) { index += 2; if (index == 5) @@ -789,7 +789,7 @@ int pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max, int pass) } /* Disable MasterAbortMode during probing to avoid reporting - of bus errors (in some architectures) */ + of bus errors (in some architectures) */ pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &bctl); pci_write_config_word(dev, PCI_BRIDGE_CONTROL, bctl & ~PCI_BRIDGE_CTL_MASTER_ABORT); @@ -1005,7 +1005,7 @@ void set_pcie_hotplug_bridge(struct pci_dev *pdev) * pci_setup_device - fill in class and map information of a device * @dev: the device structure to fill * - * Initialize the device structure with information about the device's + * Initialize the device structure with information about the device's * vendor,class,memory and IO-space addresses,IRQ lines etc. * Called at initialisation of the PCI subsystem and by CardBus services. * Returns 0 on success and negative if unknown type of device (not normal, @@ -1111,7 +1111,7 @@ int pci_setup_device(struct pci_dev *dev) goto bad; /* The PCI-to-PCI bridge spec requires that subtractive decoding (i.e. transparent) bridge must have programming - interface code of 0x01. */ + interface code of 0x01. */ pci_read_irq(dev); dev->transparent = ((dev->class & 0xff) == 1); pci_read_bases(dev, 2, PCI_ROM_ADDRESS1); @@ -1570,7 +1570,7 @@ static void pcie_write_mrrs(struct pci_dev *dev) * subsequent read will verify if the value is acceptable or not. * If the MRRS value provided is not acceptable (e.g., too large), * shrink the value until it is acceptable to the HW. - */ + */ while (mrrs != pcie_get_readrq(dev) && mrrs >= 128) { rc = pcie_set_readrq(dev, mrrs); if (!rc) diff --git a/drivers/pci/proc.c b/drivers/pci/proc.c index cdc7836..46d1378 100644 --- a/drivers/pci/proc.c +++ b/drivers/pci/proc.c @@ -222,7 +222,7 @@ static long proc_bus_pci_ioctl(struct file *file, unsigned int cmd, default: ret = -EINVAL; break; - }; + } return ret; } diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 9149045..b3b1b9a 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -55,7 +55,7 @@ static void quirk_mellanox_tavor(struct pci_dev *dev) DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_MELLANOX,PCI_DEVICE_ID_MELLANOX_TAVOR,quirk_mellanox_tavor); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_MELLANOX,PCI_DEVICE_ID_MELLANOX_TAVOR_BRIDGE,quirk_mellanox_tavor); -/* Deal with broken BIOS'es that neglect to enable passive release, +/* Deal with broken BIOSes that neglect to enable passive release, which can cause problems in combination with the 82441FX/PPro MTRRs */ static void quirk_passive_release(struct pci_dev *dev) { @@ -78,11 +78,11 @@ DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82441, quirk_p /* The VIA VP2/VP3/MVP3 seem to have some 'features'. There may be a workaround but VIA don't answer queries. If you happen to have good contacts at VIA - ask them for me please -- Alan - - This appears to be BIOS not version dependent. So presumably there is a + ask them for me please -- Alan + + This appears to be BIOS not version dependent. So presumably there is a chipset level fix */ - + static void quirk_isa_dma_hangs(struct pci_dev *dev) { if (!isa_dma_bridge_buggy) { @@ -97,7 +97,7 @@ static void quirk_isa_dma_hangs(struct pci_dev *dev) DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C586_0, quirk_isa_dma_hangs); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C596, quirk_isa_dma_hangs); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371SB_0, quirk_isa_dma_hangs); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M1533, quirk_isa_dma_hangs); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M1533, quirk_isa_dma_hangs); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NEC, PCI_DEVICE_ID_NEC_CBUS_1, quirk_isa_dma_hangs); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NEC, PCI_DEVICE_ID_NEC_CBUS_2, quirk_isa_dma_hangs); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NEC, PCI_DEVICE_ID_NEC_CBUS_3, quirk_isa_dma_hangs); @@ -157,10 +157,10 @@ static void quirk_triton(struct pci_dev *dev) pci_pci_problems |= PCIPCI_TRITON; } } -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82437, quirk_triton); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82437VX, quirk_triton); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82439, quirk_triton); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82439TX, quirk_triton); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82437, quirk_triton); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82437VX, quirk_triton); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82439, quirk_triton); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82439TX, quirk_triton); /* * VIA Apollo KT133 needs PCI latency patch @@ -171,7 +171,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82439TX, quir * the info on which Mr Breese based his work. * * Updated based on further information from the site and also on - * information provided by VIA + * information provided by VIA */ static void quirk_vialatency(struct pci_dev *dev) { @@ -179,7 +179,7 @@ static void quirk_vialatency(struct pci_dev *dev) u8 busarb; /* Ok we have a potential problem chipset here. Now see if we have a buggy southbridge */ - + p = pci_get_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C686, NULL); if (p!=NULL) { /* 0x40 - 0x4f == 686B, 0x10 - 0x2f == 686A; thanks Dan Hollis */ @@ -194,9 +194,9 @@ static void quirk_vialatency(struct pci_dev *dev) if (p->revision < 0x10 || p->revision > 0x12) goto exit; } - + /* - * Ok we have the problem. Now set the PCI master grant to + * Ok we have the problem. Now set the PCI master grant to * occur every master grant. The apparent bug is that under high * PCI load (quite common in Linux of course) you can get data * loss when the CPU is held off the bus for 3 bus master requests @@ -209,7 +209,7 @@ static void quirk_vialatency(struct pci_dev *dev) */ pci_read_config_byte(dev, 0x76, &busarb); - /* Set bit 4 and bi 5 of byte 76 to 0x01 + /* Set bit 4 and bi 5 of byte 76 to 0x01 "Master priority rotation on every PCI master grant */ busarb &= ~(1<<5); busarb |= (1<<4); @@ -252,7 +252,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C576, quirk_vsfx) * that DMA to AGP space. Latency must be set to 0xA and triton * workaround applied too * [Info kindly provided by ALi] - */ + */ static void quirk_alimagik(struct pci_dev *dev) { if ((pci_pci_problems&PCIPCI_ALIMAGIK)==0) { @@ -260,8 +260,8 @@ static void quirk_alimagik(struct pci_dev *dev) pci_pci_problems |= PCIPCI_ALIMAGIK|PCIPCI_TRITON; } } -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M1647, quirk_alimagik); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M1651, quirk_alimagik); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M1647, quirk_alimagik); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M1651, quirk_alimagik); /* * Natoma has some interesting boundary conditions with Zoran stuff @@ -274,12 +274,12 @@ static void quirk_natoma(struct pci_dev *dev) pci_pci_problems |= PCIPCI_NATOMA; } } -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82441, quirk_natoma); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443LX_0, quirk_natoma); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443LX_1, quirk_natoma); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443BX_0, quirk_natoma); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443BX_1, quirk_natoma); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443BX_2, quirk_natoma); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82441, quirk_natoma); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443LX_0, quirk_natoma); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443LX_1, quirk_natoma); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443BX_0, quirk_natoma); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443BX_1, quirk_natoma); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443BX_2, quirk_natoma); /* * This chip can cause PCI parity errors if config register 0xA0 is read @@ -400,7 +400,7 @@ static void piix4_io_quirk(struct pci_dev *dev, const char *name, unsigned int p /* * For now we only print it out. Eventually we'll want to * reserve it (at least if it's in the 0x1000+ range), but - * let's get enough confirmation reports first. + * let's get enough confirmation reports first. */ base &= -size; dev_info(&dev->dev, "%s PIO at %04x-%04x\n", name, base, base + size - 1); @@ -425,7 +425,7 @@ static void piix4_mem_quirk(struct pci_dev *dev, const char *name, unsigned int } /* * For now we only print it out. Eventually we'll want to - * reserve it, but let's get enough confirmation reports first. + * reserve it, but let's get enough confirmation reports first. */ base &= -size; dev_info(&dev->dev, "%s MMIO at %04x-%04x\n", name, base, base + size - 1); @@ -682,7 +682,7 @@ static void quirk_xio2000a(struct pci_dev *dev) DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_XIO2000A, quirk_xio2000a); -#ifdef CONFIG_X86_IO_APIC +#ifdef CONFIG_X86_IO_APIC #include <asm/io_apic.h> @@ -696,12 +696,12 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_XIO2000A, static void quirk_via_ioapic(struct pci_dev *dev) { u8 tmp; - + if (nr_ioapics < 1) tmp = 0; /* nothing routed to external APIC */ else tmp = 0x1f; /* all known bits (4-0) routed to external APIC */ - + dev_info(&dev->dev, "%sbling VIA external APIC routing\n", tmp == 0 ? "Disa" : "Ena"); @@ -712,7 +712,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C686, quirk_via_i DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C686, quirk_via_ioapic); /* - * VIA 8237: Some BIOSs don't set the 'Bypass APIC De-Assert Message' Bit. + * VIA 8237: Some BIOSes don't set the 'Bypass APIC De-Assert Message' Bit. * This leads to doubled level interrupt rates. * Set this bit to get rid of cycle wastage. * Otherwise uncritical. @@ -986,7 +986,7 @@ DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_PCI_MASTER, qu static void quirk_disable_pxb(struct pci_dev *pdev) { u16 config; - + if (pdev->revision != 0x04) /* Only C0 requires this */ return; pci_read_config_word(pdev, 0x40, &config); @@ -1094,11 +1094,11 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82375, quirk_e * On ASUS P4B boards, the SMBus PCI Device within the ICH2/4 southbridge * is not activated. The myth is that Asus said that they do not want the * users to be irritated by just another PCI Device in the Win98 device - * manager. (see the file prog/hotplug/README.p4b in the lm_sensors + * manager. (see the file prog/hotplug/README.p4b in the lm_sensors * package 2.7.0 for details) * - * The SMBus PCI Device can be activated by setting a bit in the ICH LPC - * bridge. Unfortunately, this device has no subvendor/subdevice ID. So it + * The SMBus PCI Device can be activated by setting a bit in the ICH LPC + * bridge. Unfortunately, this device has no subvendor/subdevice ID. So it * becomes necessary to do this tweak in two steps -- the chosen trigger * is either the Host bridge (preferred) or on-board VGA controller. * @@ -1253,7 +1253,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82815_CGC, asu static void asus_hides_smbus_lpc(struct pci_dev *dev) { u16 val; - + if (likely(!asus_hides_smbus)) return; @@ -1640,8 +1640,8 @@ static void quirk_disable_intel_boot_interrupt(struct pci_dev *dev) dev_info(&dev->dev, "disabled boot interrupts on device [%04x:%04x]\n", dev->vendor, dev->device); } -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB_10, quirk_disable_intel_boot_interrupt); -DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB_10, quirk_disable_intel_boot_interrupt); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB_10, quirk_disable_intel_boot_interrupt); +DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB_10, quirk_disable_intel_boot_interrupt); /* * disable boot interrupts on HT-1000 @@ -1673,8 +1673,8 @@ static void quirk_disable_broadcom_boot_interrupt(struct pci_dev *dev) dev_info(&dev->dev, "disabled boot interrupts on device [%04x:%04x]\n", dev->vendor, dev->device); } -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_HT1000SB, quirk_disable_broadcom_boot_interrupt); -DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_HT1000SB, quirk_disable_broadcom_boot_interrupt); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_HT1000SB, quirk_disable_broadcom_boot_interrupt); +DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_HT1000SB, quirk_disable_broadcom_boot_interrupt); /* * disable boot interrupts on AMD and ATI chipsets @@ -1730,8 +1730,8 @@ static void quirk_disable_amd_8111_boot_interrupt(struct pci_dev *dev) dev_info(&dev->dev, "disabled boot interrupts on device [%04x:%04x]\n", dev->vendor, dev->device); } -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8111_SMBUS, quirk_disable_amd_8111_boot_interrupt); -DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8111_SMBUS, quirk_disable_amd_8111_boot_interrupt); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8111_SMBUS, quirk_disable_amd_8111_boot_interrupt); +DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8111_SMBUS, quirk_disable_amd_8111_boot_interrupt); #endif /* CONFIG_X86_IO_APIC */ /* @@ -2127,8 +2127,8 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_PLX, 0x8624, quirk_tile_plx_gen1); #ifdef CONFIG_PCI_MSI /* Some chipsets do not support MSI. We cannot easily rely on setting * PCI_BUS_FLAGS_NO_MSI in its bus flags because there are actually - * some other busses controlled by the chipset even if Linux is not - * aware of it. Instead of setting the flag on all busses in the + * some other buses controlled by the chipset even if Linux is not + * aware of it. Instead of setting the flag on all buses in the * machine, simply disable MSI globally. */ static void quirk_disable_all_msi(struct pci_dev *dev) @@ -2288,14 +2288,14 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_NVIDIA, nvenet_msi_disable); /* - * Some versions of the MCP55 bridge from nvidia have a legacy irq routing - * config register. This register controls the routing of legacy interrupts - * from devices that route through the MCP55. If this register is misprogramed - * interrupts are only sent to the bsp, unlike conventional systems where the - * irq is broadxast to all online cpus. Not having this register set - * properly prevents kdump from booting up properly, so lets make sure that - * we have it set correctly. - * Note this is an undocumented register. + * Some versions of the MCP55 bridge from Nvidia have a legacy IRQ routing + * config register. This register controls the routing of legacy + * interrupts from devices that route through the MCP55. If this register + * is misprogrammed, interrupts are only sent to the BSP, unlike + * conventional systems where the IRQ is broadcast to all online CPUs. Not + * having this register set properly prevents kdump from booting up + * properly, so let's make sure that we have it set correctly. + * Note that this is an undocumented register. */ static void nvbridge_check_legacy_irq_routing(struct pci_dev *dev) { @@ -2626,7 +2626,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATTANSIC, 0xe091, /* Allow manual resource allocation for PCI hotplug bridges * via pci=hpmemsize=nnM and pci=hpiosize=nnM parameters. For * some PCI-PCI hotplug bridges, like PLX 6254 (former HINT HB6), - * kernel fails to allocate resources when hotplug device is + * kernel fails to allocate resources when hotplug device is * inserted and PCI bus is rescanned. */ static void quirk_hotplug_bridge(struct pci_dev *dev) diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c index 8fc54b7..1576851 100644 --- a/drivers/pci/remove.c +++ b/drivers/pci/remove.c @@ -7,7 +7,7 @@ static void pci_free_resources(struct pci_dev *dev) { int i; - msi_remove_pci_irq_vectors(dev); + msi_remove_pci_irq_vectors(dev); pci_cleanup_rom(dev); for (i = 0; i < PCI_NUM_RESOURCES; i++) { diff --git a/drivers/pci/search.c b/drivers/pci/search.c index d0627fa..3ff2ac7 100644 --- a/drivers/pci/search.c +++ b/drivers/pci/search.c @@ -1,5 +1,5 @@ /* - * PCI searching functions. + * PCI searching functions. * * Copyright (C) 1993 -- 1997 Drew Eckhardt, Frederic Potter, * David Mosberger-Tang @@ -96,12 +96,12 @@ struct pci_bus * pci_find_bus(int domain, int busnr) * pci_find_next_bus - begin or continue searching for a PCI bus * @from: Previous PCI bus found, or %NULL for new search. * - * Iterates through the list of known PCI busses. A new search is + * Iterates through the list of known PCI buses. A new search is * initiated by passing %NULL as the @from argument. Otherwise if * @from is not %NULL, searches continue from next device on the * global list. */ -struct pci_bus * +struct pci_bus * pci_find_next_bus(const struct pci_bus *from) { struct list_head *n; @@ -119,11 +119,11 @@ pci_find_next_bus(const struct pci_bus *from) /** * pci_get_slot - locate PCI device for a given PCI slot * @bus: PCI bus on which desired PCI device resides - * @devfn: encodes number of PCI slot in which the desired PCI - * device resides and the logical device number within that slot + * @devfn: encodes number of PCI slot in which the desired PCI + * device resides and the logical device number within that slot * in case of multi-function devices. * - * Given a PCI bus and slot/function number, the desired PCI device + * Given a PCI bus and slot/function number, the desired PCI device * is located in the list of PCI devices. * If the device is found, its reference count is increased and this * function returns a pointer to its data structure. The caller must diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index 4ce83b2..219a410 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -292,8 +292,8 @@ static void assign_requested_resources_sorted(struct list_head *head, (!(res->flags & IORESOURCE_ROM_ENABLE)))) add_to_list(fail_head, dev_res->dev, res, - 0 /* dont care */, - 0 /* dont care */); + 0 /* don't care */, + 0 /* don't care */); } reset_resource(res); } @@ -667,9 +667,9 @@ static void pci_bridge_check_ranges(struct pci_bus *bus) if (!io) { pci_write_config_word(bridge, PCI_IO_BASE, 0xf0f0); pci_read_config_word(bridge, PCI_IO_BASE, &io); - pci_write_config_word(bridge, PCI_IO_BASE, 0x0); - } - if (io) + pci_write_config_word(bridge, PCI_IO_BASE, 0x0); + } + if (io) b_res[0].flags |= IORESOURCE_IO; /* DECchip 21050 pass 2 errata: the bridge may miss an address disconnect boundary by one PCI data phase. @@ -819,7 +819,7 @@ static void pbus_size_io(struct pci_bus *bus, resource_size_t min_size, resource_size_t min_align, align; if (!b_res) - return; + return; min_align = window_alignment(bus, IORESOURCE_IO); list_for_each_entry(dev, &bus->devices, bus_list) { @@ -950,7 +950,7 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, if (realloc_head && i >= PCI_IOV_RESOURCES && i <= PCI_IOV_RESOURCE_END) { r->end = r->start - 1; - add_to_list(realloc_head, dev, r, r_size, 0/* dont' care */); + add_to_list(realloc_head, dev, r, r_size, 0/* don't care */); children_add_size += r_size; continue; } @@ -1456,8 +1456,8 @@ static enum enable_type pci_realloc_detect(struct pci_bus *bus, /* * first try will not touch pci bridge res - * second and later try will clear small leaf bridge res - * will stop till to the max deepth if can not find good one + * second and later try will clear small leaf bridge res + * will stop till to the max depth if can not find good one */ void pci_assign_unassigned_root_bus_resources(struct pci_bus *bus) { diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c index 07f2edd..83c4d3b 100644 --- a/drivers/pci/setup-res.c +++ b/drivers/pci/setup-res.c @@ -159,7 +159,7 @@ resource_size_t __weak pcibios_retrieve_fw_addr(struct pci_dev *dev, int idx) return 0; } -static int pci_revert_fw_address(struct resource *res, struct pci_dev *dev, +static int pci_revert_fw_address(struct resource *res, struct pci_dev *dev, int resno, resource_size_t size) { struct resource *root, *conflict; diff --git a/drivers/pci/slot.c b/drivers/pci/slot.c index c1e9284..448ca56 100644 --- a/drivers/pci/slot.c +++ b/drivers/pci/slot.c @@ -53,7 +53,7 @@ static ssize_t address_read_file(struct pci_slot *slot, char *buf) static const char *pci_bus_speed_strings[] = { "33 MHz PCI", /* 0x00 */ "66 MHz PCI", /* 0x01 */ - "66 MHz PCI-X", /* 0x02 */ + "66 MHz PCI-X", /* 0x02 */ "100 MHz PCI-X", /* 0x03 */ "133 MHz PCI-X", /* 0x04 */ NULL, /* 0x05 */ diff --git a/drivers/pci/syscall.c b/drivers/pci/syscall.c index e1c1ec5..24750a1 100644 --- a/drivers/pci/syscall.c +++ b/drivers/pci/syscall.c @@ -44,7 +44,7 @@ SYSCALL_DEFINE5(pciconfig_read, unsigned long, bus, unsigned long, dfn, default: err = -EINVAL; goto error; - }; + } err = -EIO; if (cfg_ret != PCIBIOS_SUCCESSFUL) diff --git a/drivers/rtc/rtc-at91rm9200.c b/drivers/rtc/rtc-at91rm9200.c index 8b2cd8a..c0da95e 100644 --- a/drivers/rtc/rtc-at91rm9200.c +++ b/drivers/rtc/rtc-at91rm9200.c @@ -428,6 +428,14 @@ static int __exit at91_rtc_remove(struct platform_device *pdev) return 0; } +static void at91_rtc_shutdown(struct platform_device *pdev) +{ + /* Disable all interrupts */ + at91_rtc_write(AT91_RTC_IDR, AT91_RTC_ACKUPD | AT91_RTC_ALARM | + AT91_RTC_SECEV | AT91_RTC_TIMEV | + AT91_RTC_CALEV); +} + #ifdef CONFIG_PM_SLEEP /* AT91RM9200 RTC Power management control */ @@ -466,6 +474,7 @@ static SIMPLE_DEV_PM_OPS(at91_rtc_pm_ops, at91_rtc_suspend, at91_rtc_resume); static struct platform_driver at91_rtc_driver = { .remove = __exit_p(at91_rtc_remove), + .shutdown = at91_rtc_shutdown, .driver = { .name = "at91_rtc", .owner = THIS_MODULE, diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c index f85b9e5..7eb19be 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c @@ -330,7 +330,7 @@ static int tcm_qla2xxx_check_demo_mode(struct se_portal_group *se_tpg) struct tcm_qla2xxx_tpg *tpg = container_of(se_tpg, struct tcm_qla2xxx_tpg, se_tpg); - return QLA_TPG_ATTRIB(tpg)->generate_node_acls; + return tpg->tpg_attrib.generate_node_acls; } static int tcm_qla2xxx_check_demo_mode_cache(struct se_portal_group *se_tpg) @@ -338,7 +338,7 @@ static int tcm_qla2xxx_check_demo_mode_cache(struct se_portal_group *se_tpg) struct tcm_qla2xxx_tpg *tpg = container_of(se_tpg, struct tcm_qla2xxx_tpg, se_tpg); - return QLA_TPG_ATTRIB(tpg)->cache_dynamic_acls; + return tpg->tpg_attrib.cache_dynamic_acls; } static int tcm_qla2xxx_check_demo_write_protect(struct se_portal_group *se_tpg) @@ -346,7 +346,7 @@ static int tcm_qla2xxx_check_demo_write_protect(struct se_portal_group *se_tpg) struct tcm_qla2xxx_tpg *tpg = container_of(se_tpg, struct tcm_qla2xxx_tpg, se_tpg); - return QLA_TPG_ATTRIB(tpg)->demo_mode_write_protect; + return tpg->tpg_attrib.demo_mode_write_protect; } static int tcm_qla2xxx_check_prod_write_protect(struct se_portal_group *se_tpg) @@ -354,7 +354,7 @@ static int tcm_qla2xxx_check_prod_write_protect(struct se_portal_group *se_tpg) struct tcm_qla2xxx_tpg *tpg = container_of(se_tpg, struct tcm_qla2xxx_tpg, se_tpg); - return QLA_TPG_ATTRIB(tpg)->prod_mode_write_protect; + return tpg->tpg_attrib.prod_mode_write_protect; } static int tcm_qla2xxx_check_demo_mode_login_only(struct se_portal_group *se_tpg) @@ -362,7 +362,7 @@ static int tcm_qla2xxx_check_demo_mode_login_only(struct se_portal_group *se_tpg struct tcm_qla2xxx_tpg *tpg = container_of(se_tpg, struct tcm_qla2xxx_tpg, se_tpg); - return QLA_TPG_ATTRIB(tpg)->demo_mode_login_only; + return tpg->tpg_attrib.demo_mode_login_only; } static struct se_node_acl *tcm_qla2xxx_alloc_fabric_acl( @@ -847,7 +847,7 @@ static ssize_t tcm_qla2xxx_tpg_attrib_show_##name( \ struct tcm_qla2xxx_tpg *tpg = container_of(se_tpg, \ struct tcm_qla2xxx_tpg, se_tpg); \ \ - return sprintf(page, "%u\n", QLA_TPG_ATTRIB(tpg)->name); \ + return sprintf(page, "%u\n", tpg->tpg_attrib.name); \ } \ \ static ssize_t tcm_qla2xxx_tpg_attrib_store_##name( \ @@ -1027,10 +1027,10 @@ static struct se_portal_group *tcm_qla2xxx_make_tpg( * By default allow READ-ONLY TPG demo-mode access w/ cached dynamic * NodeACLs */ - QLA_TPG_ATTRIB(tpg)->generate_node_acls = 1; - QLA_TPG_ATTRIB(tpg)->demo_mode_write_protect = 1; - QLA_TPG_ATTRIB(tpg)->cache_dynamic_acls = 1; - QLA_TPG_ATTRIB(tpg)->demo_mode_login_only = 1; + tpg->tpg_attrib.generate_node_acls = 1; + tpg->tpg_attrib.demo_mode_write_protect = 1; + tpg->tpg_attrib.cache_dynamic_acls = 1; + tpg->tpg_attrib.demo_mode_login_only = 1; ret = core_tpg_register(&tcm_qla2xxx_fabric_configfs->tf_ops, wwn, &tpg->se_tpg, tpg, TRANSPORT_TPG_TYPE_NORMAL); @@ -1830,16 +1830,16 @@ static int tcm_qla2xxx_register_configfs(void) /* * Setup default attribute lists for various fabric->tf_cit_tmpl */ - TF_CIT_TMPL(fabric)->tfc_wwn_cit.ct_attrs = tcm_qla2xxx_wwn_attrs; - TF_CIT_TMPL(fabric)->tfc_tpg_base_cit.ct_attrs = tcm_qla2xxx_tpg_attrs; - TF_CIT_TMPL(fabric)->tfc_tpg_attrib_cit.ct_attrs = + fabric->tf_cit_tmpl.tfc_wwn_cit.ct_attrs = tcm_qla2xxx_wwn_attrs; + fabric->tf_cit_tmpl.tfc_tpg_base_cit.ct_attrs = tcm_qla2xxx_tpg_attrs; + fabric->tf_cit_tmpl.tfc_tpg_attrib_cit.ct_attrs = tcm_qla2xxx_tpg_attrib_attrs; - TF_CIT_TMPL(fabric)->tfc_tpg_param_cit.ct_attrs = NULL; - TF_CIT_TMPL(fabric)->tfc_tpg_np_base_cit.ct_attrs = NULL; - TF_CIT_TMPL(fabric)->tfc_tpg_nacl_base_cit.ct_attrs = NULL; - TF_CIT_TMPL(fabric)->tfc_tpg_nacl_attrib_cit.ct_attrs = NULL; - TF_CIT_TMPL(fabric)->tfc_tpg_nacl_auth_cit.ct_attrs = NULL; - TF_CIT_TMPL(fabric)->tfc_tpg_nacl_param_cit.ct_attrs = NULL; + fabric->tf_cit_tmpl.tfc_tpg_param_cit.ct_attrs = NULL; + fabric->tf_cit_tmpl.tfc_tpg_np_base_cit.ct_attrs = NULL; + fabric->tf_cit_tmpl.tfc_tpg_nacl_base_cit.ct_attrs = NULL; + fabric->tf_cit_tmpl.tfc_tpg_nacl_attrib_cit.ct_attrs = NULL; + fabric->tf_cit_tmpl.tfc_tpg_nacl_auth_cit.ct_attrs = NULL; + fabric->tf_cit_tmpl.tfc_tpg_nacl_param_cit.ct_attrs = NULL; /* * Register the fabric for use within TCM */ @@ -1870,15 +1870,15 @@ static int tcm_qla2xxx_register_configfs(void) /* * Setup default attribute lists for various npiv_fabric->tf_cit_tmpl */ - TF_CIT_TMPL(npiv_fabric)->tfc_wwn_cit.ct_attrs = tcm_qla2xxx_wwn_attrs; - TF_CIT_TMPL(npiv_fabric)->tfc_tpg_base_cit.ct_attrs = NULL; - TF_CIT_TMPL(npiv_fabric)->tfc_tpg_attrib_cit.ct_attrs = NULL; - TF_CIT_TMPL(npiv_fabric)->tfc_tpg_param_cit.ct_attrs = NULL; - TF_CIT_TMPL(npiv_fabric)->tfc_tpg_np_base_cit.ct_attrs = NULL; - TF_CIT_TMPL(npiv_fabric)->tfc_tpg_nacl_base_cit.ct_attrs = NULL; - TF_CIT_TMPL(npiv_fabric)->tfc_tpg_nacl_attrib_cit.ct_attrs = NULL; - TF_CIT_TMPL(npiv_fabric)->tfc_tpg_nacl_auth_cit.ct_attrs = NULL; - TF_CIT_TMPL(npiv_fabric)->tfc_tpg_nacl_param_cit.ct_attrs = NULL; + npiv_fabric->tf_cit_tmpl.tfc_wwn_cit.ct_attrs = tcm_qla2xxx_wwn_attrs; + npiv_fabric->tf_cit_tmpl.tfc_tpg_base_cit.ct_attrs = NULL; + npiv_fabric->tf_cit_tmpl.tfc_tpg_attrib_cit.ct_attrs = NULL; + npiv_fabric->tf_cit_tmpl.tfc_tpg_param_cit.ct_attrs = NULL; + npiv_fabric->tf_cit_tmpl.tfc_tpg_np_base_cit.ct_attrs = NULL; + npiv_fabric->tf_cit_tmpl.tfc_tpg_nacl_base_cit.ct_attrs = NULL; + npiv_fabric->tf_cit_tmpl.tfc_tpg_nacl_attrib_cit.ct_attrs = NULL; + npiv_fabric->tf_cit_tmpl.tfc_tpg_nacl_auth_cit.ct_attrs = NULL; + npiv_fabric->tf_cit_tmpl.tfc_tpg_nacl_param_cit.ct_attrs = NULL; /* * Register the npiv_fabric for use within TCM */ diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.h b/drivers/scsi/qla2xxx/tcm_qla2xxx.h index 3293275..771f7b8 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.h +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.h @@ -45,8 +45,6 @@ struct tcm_qla2xxx_tpg { struct se_portal_group se_tpg; }; -#define QLA_TPG_ATTRIB(tpg) (&(tpg)->tpg_attrib) - struct tcm_qla2xxx_fc_loopid { struct se_node_acl *se_nacl; }; diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 38e44b9..d70e911 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -805,14 +805,7 @@ int iscsit_setup_scsi_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, int iscsi_task_attr; int sam_task_attr; - spin_lock_bh(&conn->sess->session_stats_lock); - conn->sess->cmd_pdus++; - if (conn->sess->se_sess->se_node_acl) { - spin_lock(&conn->sess->se_sess->se_node_acl->stats_lock); - conn->sess->se_sess->se_node_acl->num_cmds++; - spin_unlock(&conn->sess->se_sess->se_node_acl->stats_lock); - } - spin_unlock_bh(&conn->sess->session_stats_lock); + atomic_long_inc(&conn->sess->cmd_pdus); hdr = (struct iscsi_scsi_req *) buf; payload_length = ntoh24(hdr->dlength); @@ -1254,20 +1247,12 @@ iscsit_check_dataout_hdr(struct iscsi_conn *conn, unsigned char *buf, int rc; if (!payload_length) { - pr_err("DataOUT payload is ZERO, protocol error.\n"); - return iscsit_add_reject(conn, ISCSI_REASON_PROTOCOL_ERROR, - buf); + pr_warn("DataOUT payload is ZERO, ignoring.\n"); + return 0; } /* iSCSI write */ - spin_lock_bh(&conn->sess->session_stats_lock); - conn->sess->rx_data_octets += payload_length; - if (conn->sess->se_sess->se_node_acl) { - spin_lock(&conn->sess->se_sess->se_node_acl->stats_lock); - conn->sess->se_sess->se_node_acl->write_bytes += payload_length; - spin_unlock(&conn->sess->se_sess->se_node_acl->stats_lock); - } - spin_unlock_bh(&conn->sess->session_stats_lock); + atomic_long_add(payload_length, &conn->sess->rx_data_octets); if (payload_length > conn->conn_ops->MaxXmitDataSegmentLength) { pr_err("DataSegmentLength: %u is greater than" @@ -1486,7 +1471,7 @@ EXPORT_SYMBOL(iscsit_check_dataout_payload); static int iscsit_handle_data_out(struct iscsi_conn *conn, unsigned char *buf) { - struct iscsi_cmd *cmd; + struct iscsi_cmd *cmd = NULL; struct iscsi_data *hdr = (struct iscsi_data *)buf; int rc; bool data_crc_failed = false; @@ -1954,6 +1939,13 @@ iscsit_setup_text_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, (unsigned char *)hdr); } + if (!(hdr->flags & ISCSI_FLAG_CMD_FINAL) || + (hdr->flags & ISCSI_FLAG_TEXT_CONTINUE)) { + pr_err("Multi sequence text commands currently not supported\n"); + return iscsit_reject_cmd(cmd, ISCSI_REASON_CMD_NOT_SUPPORTED, + (unsigned char *)hdr); + } + pr_debug("Got Text Request: ITT: 0x%08x, CmdSN: 0x%08x," " ExpStatSN: 0x%08x, Length: %u\n", hdr->itt, hdr->cmdsn, hdr->exp_statsn, payload_length); @@ -2630,14 +2622,7 @@ static int iscsit_send_datain(struct iscsi_cmd *cmd, struct iscsi_conn *conn) return -1; } - spin_lock_bh(&conn->sess->session_stats_lock); - conn->sess->tx_data_octets += datain.length; - if (conn->sess->se_sess->se_node_acl) { - spin_lock(&conn->sess->se_sess->se_node_acl->stats_lock); - conn->sess->se_sess->se_node_acl->read_bytes += datain.length; - spin_unlock(&conn->sess->se_sess->se_node_acl->stats_lock); - } - spin_unlock_bh(&conn->sess->session_stats_lock); + atomic_long_add(datain.length, &conn->sess->tx_data_octets); /* * Special case for successfully execution w/ both DATAIN * and Sense Data. @@ -3162,9 +3147,7 @@ void iscsit_build_rsp_pdu(struct iscsi_cmd *cmd, struct iscsi_conn *conn, if (inc_stat_sn) cmd->stat_sn = conn->stat_sn++; - spin_lock_bh(&conn->sess->session_stats_lock); - conn->sess->rsp_pdus++; - spin_unlock_bh(&conn->sess->session_stats_lock); + atomic_long_inc(&conn->sess->rsp_pdus); memset(hdr, 0, ISCSI_HDR_LEN); hdr->opcode = ISCSI_OP_SCSI_CMD_RSP; @@ -3374,6 +3357,7 @@ static int iscsit_build_sendtargets_response(struct iscsi_cmd *cmd) struct iscsi_tiqn *tiqn; struct iscsi_tpg_np *tpg_np; int buffer_len, end_of_buf = 0, len = 0, payload_len = 0; + int target_name_printed; unsigned char buf[ISCSI_IQN_LEN+12]; /* iqn + "TargetName=" + \0 */ unsigned char *text_in = cmd->text_in_ptr, *text_ptr = NULL; @@ -3411,19 +3395,23 @@ static int iscsit_build_sendtargets_response(struct iscsi_cmd *cmd) continue; } - len = sprintf(buf, "TargetName=%s", tiqn->tiqn); - len += 1; - - if ((len + payload_len) > buffer_len) { - end_of_buf = 1; - goto eob; - } - memcpy(payload + payload_len, buf, len); - payload_len += len; + target_name_printed = 0; spin_lock(&tiqn->tiqn_tpg_lock); list_for_each_entry(tpg, &tiqn->tiqn_tpg_list, tpg_list) { + /* If demo_mode_discovery=0 and generate_node_acls=0 + * (demo mode dislabed) do not return + * TargetName+TargetAddress unless a NodeACL exists. + */ + + if ((tpg->tpg_attrib.generate_node_acls == 0) && + (tpg->tpg_attrib.demo_mode_discovery == 0) && + (!core_tpg_get_initiator_node_acl(&tpg->tpg_se_tpg, + cmd->conn->sess->sess_ops->InitiatorName))) { + continue; + } + spin_lock(&tpg->tpg_state_lock); if ((tpg->tpg_state == TPG_STATE_FREE) || (tpg->tpg_state == TPG_STATE_INACTIVE)) { @@ -3438,6 +3426,22 @@ static int iscsit_build_sendtargets_response(struct iscsi_cmd *cmd) struct iscsi_np *np = tpg_np->tpg_np; bool inaddr_any = iscsit_check_inaddr_any(np); + if (!target_name_printed) { + len = sprintf(buf, "TargetName=%s", + tiqn->tiqn); + len += 1; + + if ((len + payload_len) > buffer_len) { + spin_unlock(&tpg->tpg_np_lock); + spin_unlock(&tiqn->tiqn_tpg_lock); + end_of_buf = 1; + goto eob; + } + memcpy(payload + payload_len, buf, len); + payload_len += len; + target_name_printed = 1; + } + len = sprintf(buf, "TargetAddress=" "%s:%hu,%hu", (inaddr_any == false) ? @@ -4092,9 +4096,7 @@ restart: * hit default in the switch below. */ memset(buffer, 0xff, ISCSI_HDR_LEN); - spin_lock_bh(&conn->sess->session_stats_lock); - conn->sess->conn_digest_errors++; - spin_unlock_bh(&conn->sess->session_stats_lock); + atomic_long_inc(&conn->sess->conn_digest_errors); } else { pr_debug("Got HeaderDigest CRC32C" " 0x%08x\n", checksum); @@ -4381,7 +4383,7 @@ int iscsit_close_connection( int iscsit_close_session(struct iscsi_session *sess) { - struct iscsi_portal_group *tpg = ISCSI_TPG_S(sess); + struct iscsi_portal_group *tpg = sess->tpg; struct se_portal_group *se_tpg = &tpg->tpg_se_tpg; if (atomic_read(&sess->nconn)) { diff --git a/drivers/target/iscsi/iscsi_target_auth.c b/drivers/target/iscsi/iscsi_target_auth.c index 7505fdd..de77d9a 100644 --- a/drivers/target/iscsi/iscsi_target_auth.c +++ b/drivers/target/iscsi/iscsi_target_auth.c @@ -111,7 +111,7 @@ static struct iscsi_chap *chap_server_open( /* * Set Identifier. */ - chap->id = ISCSI_TPG_C(conn)->tpg_chap_id++; + chap->id = conn->tpg->tpg_chap_id++; *aic_len += sprintf(aic_str + *aic_len, "CHAP_I=%d", chap->id); *aic_len += 1; pr_debug("[server] Sending CHAP_I=%d\n", chap->id); @@ -146,6 +146,7 @@ static int chap_server_compute_md5( unsigned char client_digest[MD5_SIGNATURE_SIZE]; unsigned char server_digest[MD5_SIGNATURE_SIZE]; unsigned char chap_n[MAX_CHAP_N_SIZE], chap_r[MAX_RESPONSE_LENGTH]; + size_t compare_len; struct iscsi_chap *chap = conn->auth_protocol; struct crypto_hash *tfm; struct hash_desc desc; @@ -184,7 +185,9 @@ static int chap_server_compute_md5( goto out; } - if (memcmp(chap_n, auth->userid, strlen(auth->userid)) != 0) { + /* Include the terminating NULL in the compare */ + compare_len = strlen(auth->userid) + 1; + if (strncmp(chap_n, auth->userid, compare_len) != 0) { pr_err("CHAP_N values do not match!\n"); goto out; } diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c index fd14525..e3318ed 100644 --- a/drivers/target/iscsi/iscsi_target_configfs.c +++ b/drivers/target/iscsi/iscsi_target_configfs.c @@ -372,7 +372,7 @@ static ssize_t iscsi_nacl_attrib_show_##name( \ struct iscsi_node_acl *nacl = container_of(se_nacl, struct iscsi_node_acl, \ se_node_acl); \ \ - return sprintf(page, "%u\n", ISCSI_NODE_ATTRIB(nacl)->name); \ + return sprintf(page, "%u\n", nacl->node_attrib.name); \ } \ \ static ssize_t iscsi_nacl_attrib_store_##name( \ @@ -897,7 +897,7 @@ static struct se_node_acl *lio_target_make_nodeacl( if (!se_nacl_new) return ERR_PTR(-ENOMEM); - cmdsn_depth = ISCSI_TPG_ATTRIB(tpg)->default_cmdsn_depth; + cmdsn_depth = tpg->tpg_attrib.default_cmdsn_depth; /* * se_nacl_new may be released by core_tpg_add_initiator_node_acl() * when converting a NdoeACL from demo mode -> explict @@ -920,9 +920,9 @@ static struct se_node_acl *lio_target_make_nodeacl( return ERR_PTR(-ENOMEM); } - stats_cg->default_groups[0] = &NODE_STAT_GRPS(acl)->iscsi_sess_stats_group; + stats_cg->default_groups[0] = &acl->node_stat_grps.iscsi_sess_stats_group; stats_cg->default_groups[1] = NULL; - config_group_init_type_name(&NODE_STAT_GRPS(acl)->iscsi_sess_stats_group, + config_group_init_type_name(&acl->node_stat_grps.iscsi_sess_stats_group, "iscsi_sess_stats", &iscsi_stat_sess_cit); return se_nacl; @@ -967,7 +967,7 @@ static ssize_t iscsi_tpg_attrib_show_##name( \ if (iscsit_get_tpg(tpg) < 0) \ return -EINVAL; \ \ - rb = sprintf(page, "%u\n", ISCSI_TPG_ATTRIB(tpg)->name); \ + rb = sprintf(page, "%u\n", tpg->tpg_attrib.name); \ iscsit_put_tpg(tpg); \ return rb; \ } \ @@ -1041,6 +1041,16 @@ TPG_ATTR(demo_mode_write_protect, S_IRUGO | S_IWUSR); */ DEF_TPG_ATTRIB(prod_mode_write_protect); TPG_ATTR(prod_mode_write_protect, S_IRUGO | S_IWUSR); +/* + * Define iscsi_tpg_attrib_s_demo_mode_discovery, + */ +DEF_TPG_ATTRIB(demo_mode_discovery); +TPG_ATTR(demo_mode_discovery, S_IRUGO | S_IWUSR); +/* + * Define iscsi_tpg_attrib_s_default_erl + */ +DEF_TPG_ATTRIB(default_erl); +TPG_ATTR(default_erl, S_IRUGO | S_IWUSR); static struct configfs_attribute *lio_target_tpg_attrib_attrs[] = { &iscsi_tpg_attrib_authentication.attr, @@ -1051,6 +1061,8 @@ static struct configfs_attribute *lio_target_tpg_attrib_attrs[] = { &iscsi_tpg_attrib_cache_dynamic_acls.attr, &iscsi_tpg_attrib_demo_mode_write_protect.attr, &iscsi_tpg_attrib_prod_mode_write_protect.attr, + &iscsi_tpg_attrib_demo_mode_discovery.attr, + &iscsi_tpg_attrib_default_erl.attr, NULL, }; @@ -1514,21 +1526,21 @@ static struct se_wwn *lio_target_call_coreaddtiqn( return ERR_PTR(-ENOMEM); } - stats_cg->default_groups[0] = &WWN_STAT_GRPS(tiqn)->iscsi_instance_group; - stats_cg->default_groups[1] = &WWN_STAT_GRPS(tiqn)->iscsi_sess_err_group; - stats_cg->default_groups[2] = &WWN_STAT_GRPS(tiqn)->iscsi_tgt_attr_group; - stats_cg->default_groups[3] = &WWN_STAT_GRPS(tiqn)->iscsi_login_stats_group; - stats_cg->default_groups[4] = &WWN_STAT_GRPS(tiqn)->iscsi_logout_stats_group; + stats_cg->default_groups[0] = &tiqn->tiqn_stat_grps.iscsi_instance_group; + stats_cg->default_groups[1] = &tiqn->tiqn_stat_grps.iscsi_sess_err_group; + stats_cg->default_groups[2] = &tiqn->tiqn_stat_grps.iscsi_tgt_attr_group; + stats_cg->default_groups[3] = &tiqn->tiqn_stat_grps.iscsi_login_stats_group; + stats_cg->default_groups[4] = &tiqn->tiqn_stat_grps.iscsi_logout_stats_group; stats_cg->default_groups[5] = NULL; - config_group_init_type_name(&WWN_STAT_GRPS(tiqn)->iscsi_instance_group, + config_group_init_type_name(&tiqn->tiqn_stat_grps.iscsi_instance_group, "iscsi_instance", &iscsi_stat_instance_cit); - config_group_init_type_name(&WWN_STAT_GRPS(tiqn)->iscsi_sess_err_group, + config_group_init_type_name(&tiqn->tiqn_stat_grps.iscsi_sess_err_group, "iscsi_sess_err", &iscsi_stat_sess_err_cit); - config_group_init_type_name(&WWN_STAT_GRPS(tiqn)->iscsi_tgt_attr_group, + config_group_init_type_name(&tiqn->tiqn_stat_grps.iscsi_tgt_attr_group, "iscsi_tgt_attr", &iscsi_stat_tgt_attr_cit); - config_group_init_type_name(&WWN_STAT_GRPS(tiqn)->iscsi_login_stats_group, + config_group_init_type_name(&tiqn->tiqn_stat_grps.iscsi_login_stats_group, "iscsi_login_stats", &iscsi_stat_login_cit); - config_group_init_type_name(&WWN_STAT_GRPS(tiqn)->iscsi_logout_stats_group, + config_group_init_type_name(&tiqn->tiqn_stat_grps.iscsi_logout_stats_group, "iscsi_logout_stats", &iscsi_stat_logout_cit); pr_debug("LIO_Target_ConfigFS: REGISTER -> %s\n", tiqn->tiqn); @@ -1784,6 +1796,11 @@ static int lio_queue_status(struct se_cmd *se_cmd) struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd); cmd->i_state = ISTATE_SEND_STATUS; + + if (cmd->se_cmd.scsi_status || cmd->sense_reason) { + iscsit_add_cmd_to_response_queue(cmd, cmd->conn, cmd->i_state); + return 0; + } cmd->conn->conn_transport->iscsit_queue_status(cmd->conn, cmd); return 0; @@ -1815,21 +1832,21 @@ static u32 lio_tpg_get_default_depth(struct se_portal_group *se_tpg) { struct iscsi_portal_group *tpg = se_tpg->se_tpg_fabric_ptr; - return ISCSI_TPG_ATTRIB(tpg)->default_cmdsn_depth; + return tpg->tpg_attrib.default_cmdsn_depth; } static int lio_tpg_check_demo_mode(struct se_portal_group *se_tpg) { struct iscsi_portal_group *tpg = se_tpg->se_tpg_fabric_ptr; - return ISCSI_TPG_ATTRIB(tpg)->generate_node_acls; + return tpg->tpg_attrib.generate_node_acls; } static int lio_tpg_check_demo_mode_cache(struct se_portal_group *se_tpg) { struct iscsi_portal_group *tpg = se_tpg->se_tpg_fabric_ptr; - return ISCSI_TPG_ATTRIB(tpg)->cache_dynamic_acls; + return tpg->tpg_attrib.cache_dynamic_acls; } static int lio_tpg_check_demo_mode_write_protect( @@ -1837,7 +1854,7 @@ static int lio_tpg_check_demo_mode_write_protect( { struct iscsi_portal_group *tpg = se_tpg->se_tpg_fabric_ptr; - return ISCSI_TPG_ATTRIB(tpg)->demo_mode_write_protect; + return tpg->tpg_attrib.demo_mode_write_protect; } static int lio_tpg_check_prod_mode_write_protect( @@ -1845,7 +1862,7 @@ static int lio_tpg_check_prod_mode_write_protect( { struct iscsi_portal_group *tpg = se_tpg->se_tpg_fabric_ptr; - return ISCSI_TPG_ATTRIB(tpg)->prod_mode_write_protect; + return tpg->tpg_attrib.prod_mode_write_protect; } static void lio_tpg_release_fabric_acl( @@ -1908,9 +1925,12 @@ static void lio_set_default_node_attributes(struct se_node_acl *se_acl) { struct iscsi_node_acl *acl = container_of(se_acl, struct iscsi_node_acl, se_node_acl); + struct se_portal_group *se_tpg = se_acl->se_tpg; + struct iscsi_portal_group *tpg = container_of(se_tpg, + struct iscsi_portal_group, tpg_se_tpg); - ISCSI_NODE_ATTRIB(acl)->nacl = acl; - iscsit_set_default_node_attribues(acl); + acl->node_attrib.nacl = acl; + iscsit_set_default_node_attribues(acl, tpg); } static int lio_check_stop_free(struct se_cmd *se_cmd) @@ -1995,17 +2015,17 @@ int iscsi_target_register_configfs(void) * Setup default attribute lists for various fabric->tf_cit_tmpl * sturct config_item_type's */ - TF_CIT_TMPL(fabric)->tfc_discovery_cit.ct_attrs = lio_target_discovery_auth_attrs; - TF_CIT_TMPL(fabric)->tfc_wwn_cit.ct_attrs = lio_target_wwn_attrs; - TF_CIT_TMPL(fabric)->tfc_tpg_base_cit.ct_attrs = lio_target_tpg_attrs; - TF_CIT_TMPL(fabric)->tfc_tpg_attrib_cit.ct_attrs = lio_target_tpg_attrib_attrs; - TF_CIT_TMPL(fabric)->tfc_tpg_auth_cit.ct_attrs = lio_target_tpg_auth_attrs; - TF_CIT_TMPL(fabric)->tfc_tpg_param_cit.ct_attrs = lio_target_tpg_param_attrs; - TF_CIT_TMPL(fabric)->tfc_tpg_np_base_cit.ct_attrs = lio_target_portal_attrs; - TF_CIT_TMPL(fabric)->tfc_tpg_nacl_base_cit.ct_attrs = lio_target_initiator_attrs; - TF_CIT_TMPL(fabric)->tfc_tpg_nacl_attrib_cit.ct_attrs = lio_target_nacl_attrib_attrs; - TF_CIT_TMPL(fabric)->tfc_tpg_nacl_auth_cit.ct_attrs = lio_target_nacl_auth_attrs; - TF_CIT_TMPL(fabric)->tfc_tpg_nacl_param_cit.ct_attrs = lio_target_nacl_param_attrs; + fabric->tf_cit_tmpl.tfc_discovery_cit.ct_attrs = lio_target_discovery_auth_attrs; + fabric->tf_cit_tmpl.tfc_wwn_cit.ct_attrs = lio_target_wwn_attrs; + fabric->tf_cit_tmpl.tfc_tpg_base_cit.ct_attrs = lio_target_tpg_attrs; + fabric->tf_cit_tmpl.tfc_tpg_attrib_cit.ct_attrs = lio_target_tpg_attrib_attrs; + fabric->tf_cit_tmpl.tfc_tpg_auth_cit.ct_attrs = lio_target_tpg_auth_attrs; + fabric->tf_cit_tmpl.tfc_tpg_param_cit.ct_attrs = lio_target_tpg_param_attrs; + fabric->tf_cit_tmpl.tfc_tpg_np_base_cit.ct_attrs = lio_target_portal_attrs; + fabric->tf_cit_tmpl.tfc_tpg_nacl_base_cit.ct_attrs = lio_target_initiator_attrs; + fabric->tf_cit_tmpl.tfc_tpg_nacl_attrib_cit.ct_attrs = lio_target_nacl_attrib_attrs; + fabric->tf_cit_tmpl.tfc_tpg_nacl_auth_cit.ct_attrs = lio_target_nacl_auth_attrs; + fabric->tf_cit_tmpl.tfc_tpg_nacl_param_cit.ct_attrs = lio_target_nacl_param_attrs; ret = target_fabric_configfs_register(fabric); if (ret < 0) { diff --git a/drivers/target/iscsi/iscsi_target_core.h b/drivers/target/iscsi/iscsi_target_core.h index 9a5721b..48f7b3b 100644 --- a/drivers/target/iscsi/iscsi_target_core.h +++ b/drivers/target/iscsi/iscsi_target_core.h @@ -37,9 +37,6 @@ #define NA_RANDOM_DATAIN_PDU_OFFSETS 0 #define NA_RANDOM_DATAIN_SEQ_OFFSETS 0 #define NA_RANDOM_R2T_OFFSETS 0 -#define NA_DEFAULT_ERL 0 -#define NA_DEFAULT_ERL_MAX 2 -#define NA_DEFAULT_ERL_MIN 0 /* struct iscsi_tpg_attrib sanity values */ #define TA_AUTHENTICATION 1 @@ -58,6 +55,8 @@ #define TA_DEMO_MODE_WRITE_PROTECT 1 /* Disabled by default in production mode w/ explict ACLs */ #define TA_PROD_MODE_WRITE_PROTECT 0 +#define TA_DEMO_MODE_DISCOVERY 1 +#define TA_DEFAULT_ERL 0 #define TA_CACHE_CORE_NPS 0 @@ -192,6 +191,7 @@ enum recover_cmdsn_ret_table { CMDSN_NORMAL_OPERATION = 0, CMDSN_LOWER_THAN_EXP = 1, CMDSN_HIGHER_THAN_EXP = 2, + CMDSN_MAXCMDSN_OVERRUN = 3, }; /* Used for iscsi_handle_immediate_data() return values */ @@ -650,14 +650,13 @@ struct iscsi_session { /* Used for session reference counting */ int session_usage_count; int session_waiting_on_uc; - u32 cmd_pdus; - u32 rsp_pdus; - u64 tx_data_octets; - u64 rx_data_octets; - u32 conn_digest_errors; - u32 conn_timeout_errors; + atomic_long_t cmd_pdus; + atomic_long_t rsp_pdus; + atomic_long_t tx_data_octets; + atomic_long_t rx_data_octets; + atomic_long_t conn_digest_errors; + atomic_long_t conn_timeout_errors; u64 creation_time; - spinlock_t session_stats_lock; /* Number of active connections */ atomic_t nconn; atomic_t session_continuation; @@ -755,11 +754,6 @@ struct iscsi_node_acl { struct se_node_acl se_node_acl; }; -#define NODE_STAT_GRPS(nacl) (&(nacl)->node_stat_grps) - -#define ISCSI_NODE_ATTRIB(t) (&(t)->node_attrib) -#define ISCSI_NODE_AUTH(t) (&(t)->node_auth) - struct iscsi_tpg_attrib { u32 authentication; u32 login_timeout; @@ -769,6 +763,8 @@ struct iscsi_tpg_attrib { u32 default_cmdsn_depth; u32 demo_mode_write_protect; u32 prod_mode_write_protect; + u32 demo_mode_discovery; + u32 default_erl; struct iscsi_portal_group *tpg; }; @@ -835,12 +831,6 @@ struct iscsi_portal_group { struct list_head tpg_list; } ____cacheline_aligned; -#define ISCSI_TPG_C(c) ((struct iscsi_portal_group *)(c)->tpg) -#define ISCSI_TPG_LUN(c, l) ((iscsi_tpg_list_t *)(c)->tpg->tpg_lun_list_t[l]) -#define ISCSI_TPG_S(s) ((struct iscsi_portal_group *)(s)->tpg) -#define ISCSI_TPG_ATTRIB(t) (&(t)->tpg_attrib) -#define SE_TPG(tpg) (&(tpg)->tpg_se_tpg) - struct iscsi_wwn_stat_grps { struct config_group iscsi_stat_group; struct config_group iscsi_instance_group; @@ -871,8 +861,6 @@ struct iscsi_tiqn { struct iscsi_logout_stats logout_stats; } ____cacheline_aligned; -#define WWN_STAT_GRPS(tiqn) (&(tiqn)->tiqn_stat_grps) - struct iscsit_global { /* In core shutdown */ u32 in_shutdown; diff --git a/drivers/target/iscsi/iscsi_target_device.c b/drivers/target/iscsi/iscsi_target_device.c index 6c7a510..7087c73 100644 --- a/drivers/target/iscsi/iscsi_target_device.c +++ b/drivers/target/iscsi/iscsi_target_device.c @@ -58,11 +58,7 @@ void iscsit_increment_maxcmdsn(struct iscsi_cmd *cmd, struct iscsi_session *sess cmd->maxcmdsn_inc = 1; - if (!mutex_trylock(&sess->cmdsn_mutex)) { - sess->max_cmd_sn += 1; - pr_debug("Updated MaxCmdSN to 0x%08x\n", sess->max_cmd_sn); - return; - } + mutex_lock(&sess->cmdsn_mutex); sess->max_cmd_sn += 1; pr_debug("Updated MaxCmdSN to 0x%08x\n", sess->max_cmd_sn); mutex_unlock(&sess->cmdsn_mutex); diff --git a/drivers/target/iscsi/iscsi_target_erl0.c b/drivers/target/iscsi/iscsi_target_erl0.c index 41052e5..0d1e6ee 100644 --- a/drivers/target/iscsi/iscsi_target_erl0.c +++ b/drivers/target/iscsi/iscsi_target_erl0.c @@ -757,7 +757,7 @@ int iscsit_check_post_dataout( static void iscsit_handle_time2retain_timeout(unsigned long data) { struct iscsi_session *sess = (struct iscsi_session *) data; - struct iscsi_portal_group *tpg = ISCSI_TPG_S(sess); + struct iscsi_portal_group *tpg = sess->tpg; struct se_portal_group *se_tpg = &tpg->tpg_se_tpg; spin_lock_bh(&se_tpg->session_lock); @@ -785,7 +785,7 @@ static void iscsit_handle_time2retain_timeout(unsigned long data) tiqn->sess_err_stats.last_sess_failure_type = ISCSI_SESS_ERR_CXN_TIMEOUT; tiqn->sess_err_stats.cxn_timeout_errors++; - sess->conn_timeout_errors++; + atomic_long_inc(&sess->conn_timeout_errors); spin_unlock(&tiqn->sess_err_stats.lock); } } @@ -801,9 +801,9 @@ void iscsit_start_time2retain_handler(struct iscsi_session *sess) * Only start Time2Retain timer when the associated TPG is still in * an ACTIVE (eg: not disabled or shutdown) state. */ - spin_lock(&ISCSI_TPG_S(sess)->tpg_state_lock); - tpg_active = (ISCSI_TPG_S(sess)->tpg_state == TPG_STATE_ACTIVE); - spin_unlock(&ISCSI_TPG_S(sess)->tpg_state_lock); + spin_lock(&sess->tpg->tpg_state_lock); + tpg_active = (sess->tpg->tpg_state == TPG_STATE_ACTIVE); + spin_unlock(&sess->tpg->tpg_state_lock); if (!tpg_active) return; @@ -829,7 +829,7 @@ void iscsit_start_time2retain_handler(struct iscsi_session *sess) */ int iscsit_stop_time2retain_timer(struct iscsi_session *sess) { - struct iscsi_portal_group *tpg = ISCSI_TPG_S(sess); + struct iscsi_portal_group *tpg = sess->tpg; struct se_portal_group *se_tpg = &tpg->tpg_se_tpg; if (sess->time2retain_timer_flags & ISCSI_TF_EXPIRED) diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c index 1794c75..4eb93b2 100644 --- a/drivers/target/iscsi/iscsi_target_login.c +++ b/drivers/target/iscsi/iscsi_target_login.c @@ -305,7 +305,6 @@ static int iscsi_login_zero_tsih_s1( } sess->creation_time = get_jiffies_64(); - spin_lock_init(&sess->session_stats_lock); /* * The FFP CmdSN window values will be allocated from the TPG's * Initiator Node's ACL once the login has been successfully completed. @@ -347,15 +346,15 @@ static int iscsi_login_zero_tsih_s2( * Assign a new TPG Session Handle. Note this is protected with * struct iscsi_portal_group->np_login_sem from iscsit_access_np(). */ - sess->tsih = ++ISCSI_TPG_S(sess)->ntsih; + sess->tsih = ++sess->tpg->ntsih; if (!sess->tsih) - sess->tsih = ++ISCSI_TPG_S(sess)->ntsih; + sess->tsih = ++sess->tpg->ntsih; /* * Create the default params from user defined values.. */ if (iscsi_copy_param_list(&conn->param_list, - ISCSI_TPG_C(conn)->param_list, 1) < 0) { + conn->tpg->param_list, 1) < 0) { iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, ISCSI_LOGIN_STATUS_NO_RESOURCES); return -1; @@ -380,7 +379,7 @@ static int iscsi_login_zero_tsih_s2( * In our case, we have already located the struct iscsi_tiqn at this point. */ memset(buf, 0, 32); - sprintf(buf, "TargetPortalGroupTag=%hu", ISCSI_TPG_S(sess)->tpgt); + sprintf(buf, "TargetPortalGroupTag=%hu", sess->tpg->tpgt); if (iscsi_change_param_value(buf, conn->param_list, 0) < 0) { iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, ISCSI_LOGIN_STATUS_NO_RESOURCES); @@ -575,7 +574,7 @@ static int iscsi_login_non_zero_tsih_s2( iscsi_login_set_conn_values(sess, conn, pdu->cid); if (iscsi_copy_param_list(&conn->param_list, - ISCSI_TPG_C(conn)->param_list, 0) < 0) { + conn->tpg->param_list, 0) < 0) { iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, ISCSI_LOGIN_STATUS_NO_RESOURCES); return -1; @@ -593,7 +592,7 @@ static int iscsi_login_non_zero_tsih_s2( * In our case, we have already located the struct iscsi_tiqn at this point. */ memset(buf, 0, 32); - sprintf(buf, "TargetPortalGroupTag=%hu", ISCSI_TPG_S(sess)->tpgt); + sprintf(buf, "TargetPortalGroupTag=%hu", sess->tpg->tpgt); if (iscsi_change_param_value(buf, conn->param_list, 0) < 0) { iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, ISCSI_LOGIN_STATUS_NO_RESOURCES); @@ -691,7 +690,7 @@ int iscsi_post_login_handler( int stop_timer = 0; struct iscsi_session *sess = conn->sess; struct se_session *se_sess = sess->se_sess; - struct iscsi_portal_group *tpg = ISCSI_TPG_S(sess); + struct iscsi_portal_group *tpg = sess->tpg; struct se_portal_group *se_tpg = &tpg->tpg_se_tpg; struct iscsi_thread_set *ts; @@ -1154,7 +1153,7 @@ old_sess_out: spin_lock_bh(&conn->sess->conn_lock); if (conn->sess->session_state == TARG_SESS_STATE_FAILED) { struct se_portal_group *se_tpg = - &ISCSI_TPG_C(conn)->tpg_se_tpg; + &conn->tpg->tpg_se_tpg; atomic_set(&conn->sess->session_continuation, 0); spin_unlock_bh(&conn->sess->conn_lock); diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscsi/iscsi_target_nego.c index ef6d836..83c965c 100644 --- a/drivers/target/iscsi/iscsi_target_nego.c +++ b/drivers/target/iscsi/iscsi_target_nego.c @@ -88,7 +88,7 @@ int extract_param( if (len < 0) return -1; - if (len > max_length) { + if (len >= max_length) { pr_err("Length of input: %d exceeds max_length:" " %d\n", len, max_length); return -1; @@ -140,7 +140,7 @@ static u32 iscsi_handle_authentication( iscsi_nacl = container_of(se_nacl, struct iscsi_node_acl, se_node_acl); - auth = ISCSI_NODE_AUTH(iscsi_nacl); + auth = &iscsi_nacl->node_auth; } } else { /* @@ -789,7 +789,7 @@ static int iscsi_target_handle_csg_zero( return -1; if (!iscsi_check_negotiated_keys(conn->param_list)) { - if (ISCSI_TPG_ATTRIB(ISCSI_TPG_C(conn))->authentication && + if (conn->tpg->tpg_attrib.authentication && !strncmp(param->value, NONE, 4)) { pr_err("Initiator sent AuthMethod=None but" " Target is enforcing iSCSI Authentication," @@ -799,7 +799,7 @@ static int iscsi_target_handle_csg_zero( return -1; } - if (ISCSI_TPG_ATTRIB(ISCSI_TPG_C(conn))->authentication && + if (conn->tpg->tpg_attrib.authentication && !login->auth_complete) return 0; @@ -862,7 +862,7 @@ static int iscsi_target_handle_csg_one(struct iscsi_conn *conn, struct iscsi_log } if (!login->auth_complete && - ISCSI_TPG_ATTRIB(ISCSI_TPG_C(conn))->authentication) { + conn->tpg->tpg_attrib.authentication) { pr_err("Initiator is requesting CSG: 1, has not been" " successfully authenticated, and the Target is" " enforcing iSCSI Authentication, login failed.\n"); diff --git a/drivers/target/iscsi/iscsi_target_nodeattrib.c b/drivers/target/iscsi/iscsi_target_nodeattrib.c index 93bdc47..16454a9 100644 --- a/drivers/target/iscsi/iscsi_target_nodeattrib.c +++ b/drivers/target/iscsi/iscsi_target_nodeattrib.c @@ -33,7 +33,8 @@ static inline char *iscsit_na_get_initiatorname( } void iscsit_set_default_node_attribues( - struct iscsi_node_acl *acl) + struct iscsi_node_acl *acl, + struct iscsi_portal_group *tpg) { struct iscsi_node_attrib *a = &acl->node_attrib; @@ -44,7 +45,7 @@ void iscsit_set_default_node_attribues( a->random_datain_pdu_offsets = NA_RANDOM_DATAIN_PDU_OFFSETS; a->random_datain_seq_offsets = NA_RANDOM_DATAIN_SEQ_OFFSETS; a->random_r2t_offsets = NA_RANDOM_R2T_OFFSETS; - a->default_erl = NA_DEFAULT_ERL; + a->default_erl = tpg->tpg_attrib.default_erl; } int iscsit_na_dataout_timeout( diff --git a/drivers/target/iscsi/iscsi_target_nodeattrib.h b/drivers/target/iscsi/iscsi_target_nodeattrib.h index c970b326..0c69a46 100644 --- a/drivers/target/iscsi/iscsi_target_nodeattrib.h +++ b/drivers/target/iscsi/iscsi_target_nodeattrib.h @@ -1,7 +1,8 @@ #ifndef ISCSI_TARGET_NODEATTRIB_H #define ISCSI_TARGET_NODEATTRIB_H -extern void iscsit_set_default_node_attribues(struct iscsi_node_acl *); +extern void iscsit_set_default_node_attribues(struct iscsi_node_acl *, + struct iscsi_portal_group *); extern int iscsit_na_dataout_timeout(struct iscsi_node_acl *, u32); extern int iscsit_na_dataout_timeout_retries(struct iscsi_node_acl *, u32); extern int iscsit_na_nopin_timeout(struct iscsi_node_acl *, u32); diff --git a/drivers/target/iscsi/iscsi_target_stat.c b/drivers/target/iscsi/iscsi_target_stat.c index f788e8b..1033955 100644 --- a/drivers/target/iscsi/iscsi_target_stat.c +++ b/drivers/target/iscsi/iscsi_target_stat.c @@ -792,7 +792,8 @@ static ssize_t iscsi_stat_sess_show_attr_cmd_pdus( if (se_sess) { sess = se_sess->fabric_sess_ptr; if (sess) - ret = snprintf(page, PAGE_SIZE, "%u\n", sess->cmd_pdus); + ret = snprintf(page, PAGE_SIZE, "%lu\n", + atomic_long_read(&sess->cmd_pdus)); } spin_unlock_bh(&se_nacl->nacl_sess_lock); @@ -815,7 +816,8 @@ static ssize_t iscsi_stat_sess_show_attr_rsp_pdus( if (se_sess) { sess = se_sess->fabric_sess_ptr; if (sess) - ret = snprintf(page, PAGE_SIZE, "%u\n", sess->rsp_pdus); + ret = snprintf(page, PAGE_SIZE, "%lu\n", + atomic_long_read(&sess->rsp_pdus)); } spin_unlock_bh(&se_nacl->nacl_sess_lock); @@ -838,8 +840,8 @@ static ssize_t iscsi_stat_sess_show_attr_txdata_octs( if (se_sess) { sess = se_sess->fabric_sess_ptr; if (sess) - ret = snprintf(page, PAGE_SIZE, "%llu\n", - (unsigned long long)sess->tx_data_octets); + ret = snprintf(page, PAGE_SIZE, "%lu\n", + atomic_long_read(&sess->tx_data_octets)); } spin_unlock_bh(&se_nacl->nacl_sess_lock); @@ -862,8 +864,8 @@ static ssize_t iscsi_stat_sess_show_attr_rxdata_octs( if (se_sess) { sess = se_sess->fabric_sess_ptr; if (sess) - ret = snprintf(page, PAGE_SIZE, "%llu\n", - (unsigned long long)sess->rx_data_octets); + ret = snprintf(page, PAGE_SIZE, "%lu\n", + atomic_long_read(&sess->rx_data_octets)); } spin_unlock_bh(&se_nacl->nacl_sess_lock); @@ -886,8 +888,8 @@ static ssize_t iscsi_stat_sess_show_attr_conn_digest_errors( if (se_sess) { sess = se_sess->fabric_sess_ptr; if (sess) - ret = snprintf(page, PAGE_SIZE, "%u\n", - sess->conn_digest_errors); + ret = snprintf(page, PAGE_SIZE, "%lu\n", + atomic_long_read(&sess->conn_digest_errors)); } spin_unlock_bh(&se_nacl->nacl_sess_lock); @@ -910,8 +912,8 @@ static ssize_t iscsi_stat_sess_show_attr_conn_timeout_errors( if (se_sess) { sess = se_sess->fabric_sess_ptr; if (sess) - ret = snprintf(page, PAGE_SIZE, "%u\n", - sess->conn_timeout_errors); + ret = snprintf(page, PAGE_SIZE, "%lu\n", + atomic_long_read(&sess->conn_timeout_errors)); } spin_unlock_bh(&se_nacl->nacl_sess_lock); diff --git a/drivers/target/iscsi/iscsi_target_tpg.c b/drivers/target/iscsi/iscsi_target_tpg.c index 4faeb47..3976183 100644 --- a/drivers/target/iscsi/iscsi_target_tpg.c +++ b/drivers/target/iscsi/iscsi_target_tpg.c @@ -223,6 +223,8 @@ static void iscsit_set_default_tpg_attribs(struct iscsi_portal_group *tpg) a->cache_dynamic_acls = TA_CACHE_DYNAMIC_ACLS; a->demo_mode_write_protect = TA_DEMO_MODE_WRITE_PROTECT; a->prod_mode_write_protect = TA_PROD_MODE_WRITE_PROTECT; + a->demo_mode_discovery = TA_DEMO_MODE_DISCOVERY; + a->default_erl = TA_DEFAULT_ERL; } int iscsit_tpg_add_portal_group(struct iscsi_tiqn *tiqn, struct iscsi_portal_group *tpg) @@ -237,7 +239,7 @@ int iscsit_tpg_add_portal_group(struct iscsi_tiqn *tiqn, struct iscsi_portal_gro if (iscsi_create_default_params(&tpg->param_list) < 0) goto err_out; - ISCSI_TPG_ATTRIB(tpg)->tpg = tpg; + tpg->tpg_attrib.tpg = tpg; spin_lock(&tpg->tpg_state_lock); tpg->tpg_state = TPG_STATE_INACTIVE; @@ -330,7 +332,7 @@ int iscsit_tpg_enable_portal_group(struct iscsi_portal_group *tpg) return -EINVAL; } - if (ISCSI_TPG_ATTRIB(tpg)->authentication) { + if (tpg->tpg_attrib.authentication) { if (!strcmp(param->value, NONE)) { ret = iscsi_update_param_value(param, CHAP); if (ret) @@ -820,3 +822,39 @@ int iscsit_ta_prod_mode_write_protect( return 0; } + +int iscsit_ta_demo_mode_discovery( + struct iscsi_portal_group *tpg, + u32 flag) +{ + struct iscsi_tpg_attrib *a = &tpg->tpg_attrib; + + if ((flag != 0) && (flag != 1)) { + pr_err("Illegal value %d\n", flag); + return -EINVAL; + } + + a->demo_mode_discovery = flag; + pr_debug("iSCSI_TPG[%hu] - Demo Mode Discovery bit:" + " %s\n", tpg->tpgt, (a->demo_mode_discovery) ? + "ON" : "OFF"); + + return 0; +} + +int iscsit_ta_default_erl( + struct iscsi_portal_group *tpg, + u32 default_erl) +{ + struct iscsi_tpg_attrib *a = &tpg->tpg_attrib; + + if ((default_erl != 0) && (default_erl != 1) && (default_erl != 2)) { + pr_err("Illegal value for default_erl: %u\n", default_erl); + return -EINVAL; + } + + a->default_erl = default_erl; + pr_debug("iSCSI_TPG[%hu] - DefaultERL: %u\n", tpg->tpgt, a->default_erl); + + return 0; +} diff --git a/drivers/target/iscsi/iscsi_target_tpg.h b/drivers/target/iscsi/iscsi_target_tpg.h index b77693e..213c0fc 100644 --- a/drivers/target/iscsi/iscsi_target_tpg.h +++ b/drivers/target/iscsi/iscsi_target_tpg.h @@ -37,5 +37,7 @@ extern int iscsit_ta_default_cmdsn_depth(struct iscsi_portal_group *, u32); extern int iscsit_ta_cache_dynamic_acls(struct iscsi_portal_group *, u32); extern int iscsit_ta_demo_mode_write_protect(struct iscsi_portal_group *, u32); extern int iscsit_ta_prod_mode_write_protect(struct iscsi_portal_group *, u32); +extern int iscsit_ta_demo_mode_discovery(struct iscsi_portal_group *, u32); +extern int iscsit_ta_default_erl(struct iscsi_portal_group *, u32); #endif /* ISCSI_TARGET_TPG_H */ diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c index b0cac0c..0819e68 100644 --- a/drivers/target/iscsi/iscsi_target_util.c +++ b/drivers/target/iscsi/iscsi_target_util.c @@ -242,9 +242,9 @@ static inline int iscsit_check_received_cmdsn(struct iscsi_session *sess, u32 cm */ if (iscsi_sna_gt(cmdsn, sess->max_cmd_sn)) { pr_err("Received CmdSN: 0x%08x is greater than" - " MaxCmdSN: 0x%08x, protocol error.\n", cmdsn, + " MaxCmdSN: 0x%08x, ignoring.\n", cmdsn, sess->max_cmd_sn); - ret = CMDSN_ERROR_CANNOT_RECOVER; + ret = CMDSN_MAXCMDSN_OVERRUN; } else if (cmdsn == sess->exp_cmd_sn) { sess->exp_cmd_sn++; @@ -303,14 +303,16 @@ int iscsit_sequence_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, ret = CMDSN_HIGHER_THAN_EXP; break; case CMDSN_LOWER_THAN_EXP: + case CMDSN_MAXCMDSN_OVERRUN: + default: cmd->i_state = ISTATE_REMOVE; iscsit_add_cmd_to_immediate_queue(cmd, conn, cmd->i_state); - ret = cmdsn_ret; - break; - default: - reason = ISCSI_REASON_PROTOCOL_ERROR; - reject = true; - ret = cmdsn_ret; + /* + * Existing callers for iscsit_sequence_cmd() will silently + * ignore commands with CMDSN_LOWER_THAN_EXP, so force this + * return for CMDSN_MAXCMDSN_OVERRUN as well.. + */ + ret = CMDSN_LOWER_THAN_EXP; break; } mutex_unlock(&conn->sess->cmdsn_mutex); @@ -980,7 +982,7 @@ static void iscsit_handle_nopin_response_timeout(unsigned long data) tiqn->sess_err_stats.last_sess_failure_type = ISCSI_SESS_ERR_CXN_TIMEOUT; tiqn->sess_err_stats.cxn_timeout_errors++; - conn->sess->conn_timeout_errors++; + atomic_long_inc(&conn->sess->conn_timeout_errors); spin_unlock_bh(&tiqn->sess_err_stats.lock); } } diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c index 0f6d69d..1b41e67 100644 --- a/drivers/target/loopback/tcm_loop.c +++ b/drivers/target/loopback/tcm_loop.c @@ -135,6 +135,21 @@ static int tcm_loop_change_queue_depth( return sdev->queue_depth; } +static int tcm_loop_change_queue_type(struct scsi_device *sdev, int tag) +{ + if (sdev->tagged_supported) { + scsi_set_tag_type(sdev, tag); + + if (tag) + scsi_activate_tcq(sdev, sdev->queue_depth); + else + scsi_deactivate_tcq(sdev, sdev->queue_depth); + } else + tag = 0; + + return tag; +} + /* * Locate the SAM Task Attr from struct scsi_cmnd * */ @@ -178,7 +193,10 @@ static void tcm_loop_submission_work(struct work_struct *work) set_host_byte(sc, DID_NO_CONNECT); goto out_done; } - + if (tl_tpg->tl_transport_status == TCM_TRANSPORT_OFFLINE) { + set_host_byte(sc, DID_TRANSPORT_DISRUPTED); + goto out_done; + } tl_nexus = tl_hba->tl_nexus; if (!tl_nexus) { scmd_printk(KERN_ERR, sc, "TCM_Loop I_T Nexus" @@ -233,6 +251,7 @@ static int tcm_loop_queuecommand(struct Scsi_Host *sh, struct scsi_cmnd *sc) } tl_cmd->sc = sc; + tl_cmd->sc_cmd_tag = sc->tag; INIT_WORK(&tl_cmd->work, tcm_loop_submission_work); queue_work(tcm_loop_workqueue, &tl_cmd->work); return 0; @@ -242,41 +261,21 @@ static int tcm_loop_queuecommand(struct Scsi_Host *sh, struct scsi_cmnd *sc) * Called from SCSI EH process context to issue a LUN_RESET TMR * to struct scsi_device */ -static int tcm_loop_device_reset(struct scsi_cmnd *sc) +static int tcm_loop_issue_tmr(struct tcm_loop_tpg *tl_tpg, + struct tcm_loop_nexus *tl_nexus, + int lun, int task, enum tcm_tmreq_table tmr) { struct se_cmd *se_cmd = NULL; - struct se_portal_group *se_tpg; struct se_session *se_sess; + struct se_portal_group *se_tpg; struct tcm_loop_cmd *tl_cmd = NULL; - struct tcm_loop_hba *tl_hba; - struct tcm_loop_nexus *tl_nexus; struct tcm_loop_tmr *tl_tmr = NULL; - struct tcm_loop_tpg *tl_tpg; - int ret = FAILED, rc; - /* - * Locate the tcm_loop_hba_t pointer - */ - tl_hba = *(struct tcm_loop_hba **)shost_priv(sc->device->host); - /* - * Locate the tl_nexus and se_sess pointers - */ - tl_nexus = tl_hba->tl_nexus; - if (!tl_nexus) { - pr_err("Unable to perform device reset without" - " active I_T Nexus\n"); - return FAILED; - } - se_sess = tl_nexus->se_sess; - /* - * Locate the tl_tpg and se_tpg pointers from TargetID in sc->device->id - */ - tl_tpg = &tl_hba->tl_hba_tpgs[sc->device->id]; - se_tpg = &tl_tpg->tl_se_tpg; + int ret = TMR_FUNCTION_FAILED, rc; tl_cmd = kmem_cache_zalloc(tcm_loop_cmd_cache, GFP_KERNEL); if (!tl_cmd) { pr_err("Unable to allocate memory for tl_cmd\n"); - return FAILED; + return ret; } tl_tmr = kzalloc(sizeof(struct tcm_loop_tmr), GFP_KERNEL); @@ -287,6 +286,8 @@ static int tcm_loop_device_reset(struct scsi_cmnd *sc) init_waitqueue_head(&tl_tmr->tl_tmr_wait); se_cmd = &tl_cmd->tl_se_cmd; + se_tpg = &tl_tpg->tl_se_tpg; + se_sess = tl_nexus->se_sess; /* * Initialize struct se_cmd descriptor from target_core_mod infrastructure */ @@ -294,17 +295,23 @@ static int tcm_loop_device_reset(struct scsi_cmnd *sc) DMA_NONE, MSG_SIMPLE_TAG, &tl_cmd->tl_sense_buf[0]); - rc = core_tmr_alloc_req(se_cmd, tl_tmr, TMR_LUN_RESET, GFP_KERNEL); + rc = core_tmr_alloc_req(se_cmd, tl_tmr, tmr, GFP_KERNEL); if (rc < 0) goto release; + + if (tmr == TMR_ABORT_TASK) + se_cmd->se_tmr_req->ref_task_tag = task; + /* - * Locate the underlying TCM struct se_lun from sc->device->lun + * Locate the underlying TCM struct se_lun */ - if (transport_lookup_tmr_lun(se_cmd, sc->device->lun) < 0) + if (transport_lookup_tmr_lun(se_cmd, lun) < 0) { + ret = TMR_LUN_DOES_NOT_EXIST; goto release; + } /* - * Queue the TMR to TCM Core and sleep waiting for tcm_loop_queue_tm_rsp() - * to wake us up. + * Queue the TMR to TCM Core and sleep waiting for + * tcm_loop_queue_tm_rsp() to wake us up. */ transport_generic_handle_tmr(se_cmd); wait_event(tl_tmr->tl_tmr_wait, atomic_read(&tl_tmr->tmr_complete)); @@ -312,8 +319,7 @@ static int tcm_loop_device_reset(struct scsi_cmnd *sc) * The TMR LUN_RESET has completed, check the response status and * then release allocations. */ - ret = (se_cmd->se_tmr_req->response == TMR_FUNCTION_COMPLETE) ? - SUCCESS : FAILED; + ret = se_cmd->se_tmr_req->response; release: if (se_cmd) transport_generic_free_cmd(se_cmd, 1); @@ -323,6 +329,94 @@ release: return ret; } +static int tcm_loop_abort_task(struct scsi_cmnd *sc) +{ + struct tcm_loop_hba *tl_hba; + struct tcm_loop_nexus *tl_nexus; + struct tcm_loop_tpg *tl_tpg; + int ret = FAILED; + + /* + * Locate the tcm_loop_hba_t pointer + */ + tl_hba = *(struct tcm_loop_hba **)shost_priv(sc->device->host); + /* + * Locate the tl_nexus and se_sess pointers + */ + tl_nexus = tl_hba->tl_nexus; + if (!tl_nexus) { + pr_err("Unable to perform device reset without" + " active I_T Nexus\n"); + return FAILED; + } + + /* + * Locate the tl_tpg pointer from TargetID in sc->device->id + */ + tl_tpg = &tl_hba->tl_hba_tpgs[sc->device->id]; + ret = tcm_loop_issue_tmr(tl_tpg, tl_nexus, sc->device->lun, + sc->tag, TMR_ABORT_TASK); + return (ret == TMR_FUNCTION_COMPLETE) ? SUCCESS : FAILED; +} + +/* + * Called from SCSI EH process context to issue a LUN_RESET TMR + * to struct scsi_device + */ +static int tcm_loop_device_reset(struct scsi_cmnd *sc) +{ + struct tcm_loop_hba *tl_hba; + struct tcm_loop_nexus *tl_nexus; + struct tcm_loop_tpg *tl_tpg; + int ret = FAILED; + + /* + * Locate the tcm_loop_hba_t pointer + */ + tl_hba = *(struct tcm_loop_hba **)shost_priv(sc->device->host); + /* + * Locate the tl_nexus and se_sess pointers + */ + tl_nexus = tl_hba->tl_nexus; + if (!tl_nexus) { + pr_err("Unable to perform device reset without" + " active I_T Nexus\n"); + return FAILED; + } + /* + * Locate the tl_tpg pointer from TargetID in sc->device->id + */ + tl_tpg = &tl_hba->tl_hba_tpgs[sc->device->id]; + ret = tcm_loop_issue_tmr(tl_tpg, tl_nexus, sc->device->lun, + 0, TMR_LUN_RESET); + return (ret == TMR_FUNCTION_COMPLETE) ? SUCCESS : FAILED; +} + +static int tcm_loop_target_reset(struct scsi_cmnd *sc) +{ + struct tcm_loop_hba *tl_hba; + struct tcm_loop_tpg *tl_tpg; + + /* + * Locate the tcm_loop_hba_t pointer + */ + tl_hba = *(struct tcm_loop_hba **)shost_priv(sc->device->host); + if (!tl_hba) { + pr_err("Unable to perform device reset without" + " active I_T Nexus\n"); + return FAILED; + } + /* + * Locate the tl_tpg pointer from TargetID in sc->device->id + */ + tl_tpg = &tl_hba->tl_hba_tpgs[sc->device->id]; + if (tl_tpg) { + tl_tpg->tl_transport_status = TCM_TRANSPORT_ONLINE; + return SUCCESS; + } + return FAILED; +} + static int tcm_loop_slave_alloc(struct scsi_device *sd) { set_bit(QUEUE_FLAG_BIDI, &sd->request_queue->queue_flags); @@ -331,6 +425,15 @@ static int tcm_loop_slave_alloc(struct scsi_device *sd) static int tcm_loop_slave_configure(struct scsi_device *sd) { + if (sd->tagged_supported) { + scsi_activate_tcq(sd, sd->queue_depth); + scsi_adjust_queue_depth(sd, MSG_SIMPLE_TAG, + sd->host->cmd_per_lun); + } else { + scsi_adjust_queue_depth(sd, 0, + sd->host->cmd_per_lun); + } + return 0; } @@ -340,7 +443,10 @@ static struct scsi_host_template tcm_loop_driver_template = { .name = "TCM_Loopback", .queuecommand = tcm_loop_queuecommand, .change_queue_depth = tcm_loop_change_queue_depth, + .change_queue_type = tcm_loop_change_queue_type, + .eh_abort_handler = tcm_loop_abort_task, .eh_device_reset_handler = tcm_loop_device_reset, + .eh_target_reset_handler = tcm_loop_target_reset, .can_queue = 1024, .this_id = -1, .sg_tablesize = 256, @@ -699,7 +805,10 @@ static void tcm_loop_set_default_node_attributes(struct se_node_acl *se_acl) static u32 tcm_loop_get_task_tag(struct se_cmd *se_cmd) { - return 1; + struct tcm_loop_cmd *tl_cmd = container_of(se_cmd, + struct tcm_loop_cmd, tl_se_cmd); + + return tl_cmd->sc_cmd_tag; } static int tcm_loop_get_cmd_state(struct se_cmd *se_cmd) @@ -932,7 +1041,10 @@ static int tcm_loop_drop_nexus( struct tcm_loop_nexus *tl_nexus; struct tcm_loop_hba *tl_hba = tpg->tl_hba; - tl_nexus = tpg->tl_hba->tl_nexus; + if (!tl_hba) + return -ENODEV; + + tl_nexus = tl_hba->tl_nexus; if (!tl_nexus) return -ENODEV; @@ -1061,8 +1173,56 @@ check_newline: TF_TPG_BASE_ATTR(tcm_loop, nexus, S_IRUGO | S_IWUSR); +static ssize_t tcm_loop_tpg_show_transport_status( + struct se_portal_group *se_tpg, + char *page) +{ + struct tcm_loop_tpg *tl_tpg = container_of(se_tpg, + struct tcm_loop_tpg, tl_se_tpg); + const char *status = NULL; + ssize_t ret = -EINVAL; + + switch (tl_tpg->tl_transport_status) { + case TCM_TRANSPORT_ONLINE: + status = "online"; + break; + case TCM_TRANSPORT_OFFLINE: + status = "offline"; + break; + default: + break; + } + + if (status) + ret = snprintf(page, PAGE_SIZE, "%s\n", status); + + return ret; +} + +static ssize_t tcm_loop_tpg_store_transport_status( + struct se_portal_group *se_tpg, + const char *page, + size_t count) +{ + struct tcm_loop_tpg *tl_tpg = container_of(se_tpg, + struct tcm_loop_tpg, tl_se_tpg); + + if (!strncmp(page, "online", 6)) { + tl_tpg->tl_transport_status = TCM_TRANSPORT_ONLINE; + return count; + } + if (!strncmp(page, "offline", 7)) { + tl_tpg->tl_transport_status = TCM_TRANSPORT_OFFLINE; + return count; + } + return -EINVAL; +} + +TF_TPG_BASE_ATTR(tcm_loop, transport_status, S_IRUGO | S_IWUSR); + static struct configfs_attribute *tcm_loop_tpg_attrs[] = { &tcm_loop_tpg_nexus.attr, + &tcm_loop_tpg_transport_status.attr, NULL, }; @@ -1334,11 +1494,11 @@ static int tcm_loop_register_configfs(void) /* * Setup default attribute lists for various fabric->tf_cit_tmpl */ - TF_CIT_TMPL(fabric)->tfc_wwn_cit.ct_attrs = tcm_loop_wwn_attrs; - TF_CIT_TMPL(fabric)->tfc_tpg_base_cit.ct_attrs = tcm_loop_tpg_attrs; - TF_CIT_TMPL(fabric)->tfc_tpg_attrib_cit.ct_attrs = NULL; - TF_CIT_TMPL(fabric)->tfc_tpg_param_cit.ct_attrs = NULL; - TF_CIT_TMPL(fabric)->tfc_tpg_np_base_cit.ct_attrs = NULL; + fabric->tf_cit_tmpl.tfc_wwn_cit.ct_attrs = tcm_loop_wwn_attrs; + fabric->tf_cit_tmpl.tfc_tpg_base_cit.ct_attrs = tcm_loop_tpg_attrs; + fabric->tf_cit_tmpl.tfc_tpg_attrib_cit.ct_attrs = NULL; + fabric->tf_cit_tmpl.tfc_tpg_param_cit.ct_attrs = NULL; + fabric->tf_cit_tmpl.tfc_tpg_np_base_cit.ct_attrs = NULL; /* * Once fabric->tf_ops has been setup, now register the fabric for * use within TCM diff --git a/drivers/target/loopback/tcm_loop.h b/drivers/target/loopback/tcm_loop.h index dd7a84e..54c59d0 100644 --- a/drivers/target/loopback/tcm_loop.h +++ b/drivers/target/loopback/tcm_loop.h @@ -10,6 +10,8 @@ struct tcm_loop_cmd { /* State of Linux/SCSI CDB+Data descriptor */ u32 sc_cmd_state; + /* Tagged command queueing */ + u32 sc_cmd_tag; /* Pointer to the CDB+Data descriptor from Linux/SCSI subsystem */ struct scsi_cmnd *sc; /* The TCM I/O descriptor that is accessed via container_of() */ @@ -40,8 +42,12 @@ struct tcm_loop_nacl { struct se_node_acl se_node_acl; }; +#define TCM_TRANSPORT_ONLINE 0 +#define TCM_TRANSPORT_OFFLINE 1 + struct tcm_loop_tpg { unsigned short tl_tpgt; + unsigned short tl_transport_status; atomic_t tl_tpg_port_count; struct se_portal_group tl_se_tpg; struct tcm_loop_hba *tl_hba; diff --git a/drivers/target/sbp/sbp_target.c b/drivers/target/sbp/sbp_target.c index e51b09a..24884ca 100644 --- a/drivers/target/sbp/sbp_target.c +++ b/drivers/target/sbp/sbp_target.c @@ -2556,15 +2556,15 @@ static int sbp_register_configfs(void) /* * Setup default attribute lists for various fabric->tf_cit_tmpl */ - TF_CIT_TMPL(fabric)->tfc_wwn_cit.ct_attrs = sbp_wwn_attrs; - TF_CIT_TMPL(fabric)->tfc_tpg_base_cit.ct_attrs = sbp_tpg_base_attrs; - TF_CIT_TMPL(fabric)->tfc_tpg_attrib_cit.ct_attrs = sbp_tpg_attrib_attrs; - TF_CIT_TMPL(fabric)->tfc_tpg_param_cit.ct_attrs = NULL; - TF_CIT_TMPL(fabric)->tfc_tpg_np_base_cit.ct_attrs = NULL; - TF_CIT_TMPL(fabric)->tfc_tpg_nacl_base_cit.ct_attrs = NULL; - TF_CIT_TMPL(fabric)->tfc_tpg_nacl_attrib_cit.ct_attrs = NULL; - TF_CIT_TMPL(fabric)->tfc_tpg_nacl_auth_cit.ct_attrs = NULL; - TF_CIT_TMPL(fabric)->tfc_tpg_nacl_param_cit.ct_attrs = NULL; + fabric->tf_cit_tmpl.tfc_wwn_cit.ct_attrs = sbp_wwn_attrs; + fabric->tf_cit_tmpl.tfc_tpg_base_cit.ct_attrs = sbp_tpg_base_attrs; + fabric->tf_cit_tmpl.tfc_tpg_attrib_cit.ct_attrs = sbp_tpg_attrib_attrs; + fabric->tf_cit_tmpl.tfc_tpg_param_cit.ct_attrs = NULL; + fabric->tf_cit_tmpl.tfc_tpg_np_base_cit.ct_attrs = NULL; + fabric->tf_cit_tmpl.tfc_tpg_nacl_base_cit.ct_attrs = NULL; + fabric->tf_cit_tmpl.tfc_tpg_nacl_attrib_cit.ct_attrs = NULL; + fabric->tf_cit_tmpl.tfc_tpg_nacl_auth_cit.ct_attrs = NULL; + fabric->tf_cit_tmpl.tfc_tpg_nacl_param_cit.ct_attrs = NULL; ret = target_fabric_configfs_register(fabric); if (ret < 0) { diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c index 47244102..fdcee32 100644 --- a/drivers/target/target_core_alua.c +++ b/drivers/target/target_core_alua.c @@ -44,7 +44,7 @@ static sense_reason_t core_alua_check_transition(int state, int *primary); static int core_alua_set_tg_pt_secondary_state( struct t10_alua_tg_pt_gp_member *tg_pt_gp_mem, - struct se_port *port, int explict, int offline); + struct se_port *port, int explicit, int offline); static u16 alua_lu_gps_counter; static u32 alua_lu_gps_count; @@ -117,12 +117,7 @@ target_emulate_report_target_port_groups(struct se_cmd *cmd) /* * Set supported ASYMMETRIC ACCESS State bits */ - buf[off] = 0x80; /* T_SUP */ - buf[off] |= 0x40; /* O_SUP */ - buf[off] |= 0x8; /* U_SUP */ - buf[off] |= 0x4; /* S_SUP */ - buf[off] |= 0x2; /* AN_SUP */ - buf[off++] |= 0x1; /* AO_SUP */ + buf[off++] |= tg_pt_gp->tg_pt_gp_alua_supported_states; /* * TARGET PORT GROUP */ @@ -175,7 +170,7 @@ target_emulate_report_target_port_groups(struct se_cmd *cmd) if (ext_hdr != 0) { buf[4] = 0x10; /* - * Set the implict transition time (in seconds) for the application + * Set the implicit transition time (in seconds) for the application * client to use as a base for it's transition timeout value. * * Use the current tg_pt_gp_mem -> tg_pt_gp membership from the LUN @@ -188,7 +183,7 @@ target_emulate_report_target_port_groups(struct se_cmd *cmd) spin_lock(&tg_pt_gp_mem->tg_pt_gp_mem_lock); tg_pt_gp = tg_pt_gp_mem->tg_pt_gp; if (tg_pt_gp) - buf[5] = tg_pt_gp->tg_pt_gp_implict_trans_secs; + buf[5] = tg_pt_gp->tg_pt_gp_implicit_trans_secs; spin_unlock(&tg_pt_gp_mem->tg_pt_gp_mem_lock); } } @@ -199,7 +194,7 @@ target_emulate_report_target_port_groups(struct se_cmd *cmd) } /* - * SET_TARGET_PORT_GROUPS for explict ALUA operation. + * SET_TARGET_PORT_GROUPS for explicit ALUA operation. * * See spc4r17 section 6.35 */ @@ -232,7 +227,7 @@ target_emulate_set_target_port_groups(struct se_cmd *cmd) return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; /* - * Determine if explict ALUA via SET_TARGET_PORT_GROUPS is allowed + * Determine if explicit ALUA via SET_TARGET_PORT_GROUPS is allowed * for the local tg_pt_gp. */ l_tg_pt_gp_mem = l_port->sep_alua_tg_pt_gp_mem; @@ -251,9 +246,9 @@ target_emulate_set_target_port_groups(struct se_cmd *cmd) } spin_unlock(&l_tg_pt_gp_mem->tg_pt_gp_mem_lock); - if (!(l_tg_pt_gp->tg_pt_gp_alua_access_type & TPGS_EXPLICT_ALUA)) { + if (!(l_tg_pt_gp->tg_pt_gp_alua_access_type & TPGS_EXPLICIT_ALUA)) { pr_debug("Unable to process SET_TARGET_PORT_GROUPS" - " while TPGS_EXPLICT_ALUA is disabled\n"); + " while TPGS_EXPLICIT_ALUA is disabled\n"); rc = TCM_UNSUPPORTED_SCSI_OPCODE; goto out; } @@ -330,7 +325,7 @@ target_emulate_set_target_port_groups(struct se_cmd *cmd) spin_unlock(&dev->t10_alua.tg_pt_gps_lock); } else { /* - * Extact the RELATIVE TARGET PORT IDENTIFIER to identify + * Extract the RELATIVE TARGET PORT IDENTIFIER to identify * the Target Port in question for the the incoming * SET_TARGET_PORT_GROUPS op. */ @@ -487,7 +482,7 @@ static inline int core_alua_state_transition( u8 *alua_ascq) { /* - * Allowed CDBs for ALUA_ACCESS_STATE_TRANSITIO as defined by + * Allowed CDBs for ALUA_ACCESS_STATE_TRANSITION as defined by * spc4r17 section 5.9.2.5 */ switch (cdb[0]) { @@ -515,9 +510,9 @@ static inline int core_alua_state_transition( } /* - * return 1: Is used to signal LUN not accecsable, and check condition/not ready + * return 1: Is used to signal LUN not accessible, and check condition/not ready * return 0: Used to signal success - * reutrn -1: Used to signal failure, and invalid cdb field + * return -1: Used to signal failure, and invalid cdb field */ sense_reason_t target_alua_state_check(struct se_cmd *cmd) @@ -566,12 +561,12 @@ target_alua_state_check(struct se_cmd *cmd) nonop_delay_msecs = tg_pt_gp->tg_pt_gp_nonop_delay_msecs; spin_unlock(&tg_pt_gp_mem->tg_pt_gp_mem_lock); /* - * Process ALUA_ACCESS_STATE_ACTIVE_OPTMIZED in a separate conditional + * Process ALUA_ACCESS_STATE_ACTIVE_OPTIMIZED in a separate conditional * statement so the compiler knows explicitly to check this case first. * For the Optimized ALUA access state case, we want to process the * incoming fabric cmd ASAP.. */ - if (out_alua_state == ALUA_ACCESS_STATE_ACTIVE_OPTMIZED) + if (out_alua_state == ALUA_ACCESS_STATE_ACTIVE_OPTIMIZED) return 0; switch (out_alua_state) { @@ -620,13 +615,13 @@ out: } /* - * Check implict and explict ALUA state change request. + * Check implicit and explicit ALUA state change request. */ static sense_reason_t core_alua_check_transition(int state, int *primary) { switch (state) { - case ALUA_ACCESS_STATE_ACTIVE_OPTMIZED: + case ALUA_ACCESS_STATE_ACTIVE_OPTIMIZED: case ALUA_ACCESS_STATE_ACTIVE_NON_OPTIMIZED: case ALUA_ACCESS_STATE_STANDBY: case ALUA_ACCESS_STATE_UNAVAILABLE: @@ -654,7 +649,7 @@ core_alua_check_transition(int state, int *primary) static char *core_alua_dump_state(int state) { switch (state) { - case ALUA_ACCESS_STATE_ACTIVE_OPTMIZED: + case ALUA_ACCESS_STATE_ACTIVE_OPTIMIZED: return "Active/Optimized"; case ALUA_ACCESS_STATE_ACTIVE_NON_OPTIMIZED: return "Active/NonOptimized"; @@ -676,10 +671,10 @@ char *core_alua_dump_status(int status) switch (status) { case ALUA_STATUS_NONE: return "None"; - case ALUA_STATUS_ALTERED_BY_EXPLICT_STPG: - return "Altered by Explict STPG"; - case ALUA_STATUS_ALTERED_BY_IMPLICT_ALUA: - return "Altered by Implict ALUA"; + case ALUA_STATUS_ALTERED_BY_EXPLICIT_STPG: + return "Altered by Explicit STPG"; + case ALUA_STATUS_ALTERED_BY_IMPLICIT_ALUA: + return "Altered by Implicit ALUA"; default: return "Unknown"; } @@ -770,7 +765,7 @@ static int core_alua_do_transition_tg_pt( struct se_node_acl *nacl, unsigned char *md_buf, int new_state, - int explict) + int explicit) { struct se_dev_entry *se_deve; struct se_lun_acl *lacl; @@ -784,9 +779,9 @@ static int core_alua_do_transition_tg_pt( old_state = atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state); atomic_set(&tg_pt_gp->tg_pt_gp_alua_access_state, ALUA_ACCESS_STATE_TRANSITION); - tg_pt_gp->tg_pt_gp_alua_access_status = (explict) ? - ALUA_STATUS_ALTERED_BY_EXPLICT_STPG : - ALUA_STATUS_ALTERED_BY_IMPLICT_ALUA; + tg_pt_gp->tg_pt_gp_alua_access_status = (explicit) ? + ALUA_STATUS_ALTERED_BY_EXPLICIT_STPG : + ALUA_STATUS_ALTERED_BY_IMPLICIT_ALUA; /* * Check for the optional ALUA primary state transition delay */ @@ -802,7 +797,7 @@ static int core_alua_do_transition_tg_pt( * change, a device server shall establish a unit attention * condition for the initiator port associated with every I_T * nexus with the additional sense code set to ASYMMETRIC - * ACCESS STATE CHAGED. + * ACCESS STATE CHANGED. * * After an explicit target port asymmetric access state * change, a device server shall establish a unit attention @@ -821,12 +816,12 @@ static int core_alua_do_transition_tg_pt( lacl = se_deve->se_lun_acl; /* * se_deve->se_lun_acl pointer may be NULL for a - * entry created without explict Node+MappedLUN ACLs + * entry created without explicit Node+MappedLUN ACLs */ if (!lacl) continue; - if (explict && + if (explicit && (nacl != NULL) && (nacl == lacl->se_lun_nacl) && (l_port != NULL) && (l_port == port)) continue; @@ -866,8 +861,8 @@ static int core_alua_do_transition_tg_pt( atomic_set(&tg_pt_gp->tg_pt_gp_alua_access_state, new_state); pr_debug("Successful %s ALUA transition TG PT Group: %s ID: %hu" - " from primary access state %s to %s\n", (explict) ? "explict" : - "implict", config_item_name(&tg_pt_gp->tg_pt_gp_group.cg_item), + " from primary access state %s to %s\n", (explicit) ? "explicit" : + "implicit", config_item_name(&tg_pt_gp->tg_pt_gp_group.cg_item), tg_pt_gp->tg_pt_gp_id, core_alua_dump_state(old_state), core_alua_dump_state(new_state)); @@ -880,7 +875,7 @@ int core_alua_do_port_transition( struct se_port *l_port, struct se_node_acl *l_nacl, int new_state, - int explict) + int explicit) { struct se_device *dev; struct se_port *port; @@ -917,7 +912,7 @@ int core_alua_do_port_transition( * success. */ core_alua_do_transition_tg_pt(l_tg_pt_gp, l_port, l_nacl, - md_buf, new_state, explict); + md_buf, new_state, explicit); atomic_dec(&lu_gp->lu_gp_ref_cnt); smp_mb__after_atomic_dec(); kfree(md_buf); @@ -946,7 +941,7 @@ int core_alua_do_port_transition( continue; /* * If the target behavior port asymmetric access state - * is changed for any target port group accessiable via + * is changed for any target port group accessible via * a logical unit within a LU group, the target port * behavior group asymmetric access states for the same * target port group accessible via other logical units @@ -970,7 +965,7 @@ int core_alua_do_port_transition( * success. */ core_alua_do_transition_tg_pt(tg_pt_gp, port, - nacl, md_buf, new_state, explict); + nacl, md_buf, new_state, explicit); spin_lock(&dev->t10_alua.tg_pt_gps_lock); atomic_dec(&tg_pt_gp->tg_pt_gp_ref_cnt); @@ -987,7 +982,7 @@ int core_alua_do_port_transition( pr_debug("Successfully processed LU Group: %s all ALUA TG PT" " Group IDs: %hu %s transition to primary state: %s\n", config_item_name(&lu_gp->lu_gp_group.cg_item), - l_tg_pt_gp->tg_pt_gp_id, (explict) ? "explict" : "implict", + l_tg_pt_gp->tg_pt_gp_id, (explicit) ? "explicit" : "implicit", core_alua_dump_state(new_state)); atomic_dec(&lu_gp->lu_gp_ref_cnt); @@ -1034,7 +1029,7 @@ static int core_alua_update_tpg_secondary_metadata( static int core_alua_set_tg_pt_secondary_state( struct t10_alua_tg_pt_gp_member *tg_pt_gp_mem, struct se_port *port, - int explict, + int explicit, int offline) { struct t10_alua_tg_pt_gp *tg_pt_gp; @@ -1061,13 +1056,13 @@ static int core_alua_set_tg_pt_secondary_state( atomic_set(&port->sep_tg_pt_secondary_offline, 0); md_buf_len = tg_pt_gp->tg_pt_gp_md_buf_len; - port->sep_tg_pt_secondary_stat = (explict) ? - ALUA_STATUS_ALTERED_BY_EXPLICT_STPG : - ALUA_STATUS_ALTERED_BY_IMPLICT_ALUA; + port->sep_tg_pt_secondary_stat = (explicit) ? + ALUA_STATUS_ALTERED_BY_EXPLICIT_STPG : + ALUA_STATUS_ALTERED_BY_IMPLICIT_ALUA; pr_debug("Successful %s ALUA transition TG PT Group: %s ID: %hu" - " to secondary access state: %s\n", (explict) ? "explict" : - "implict", config_item_name(&tg_pt_gp->tg_pt_gp_group.cg_item), + " to secondary access state: %s\n", (explicit) ? "explicit" : + "implicit", config_item_name(&tg_pt_gp->tg_pt_gp_group.cg_item), tg_pt_gp->tg_pt_gp_id, (offline) ? "OFFLINE" : "ONLINE"); spin_unlock(&tg_pt_gp_mem->tg_pt_gp_mem_lock); @@ -1232,7 +1227,7 @@ void core_alua_free_lu_gp(struct t10_alua_lu_gp *lu_gp) * struct se_device is released via core_alua_free_lu_gp_mem(). * * If the passed lu_gp does NOT match the default_lu_gp, assume - * we want to re-assocate a given lu_gp_mem with default_lu_gp. + * we want to re-associate a given lu_gp_mem with default_lu_gp. */ spin_lock(&lu_gp_mem->lu_gp_mem_lock); if (lu_gp != default_lu_gp) @@ -1354,18 +1349,25 @@ struct t10_alua_tg_pt_gp *core_alua_allocate_tg_pt_gp(struct se_device *dev, tg_pt_gp->tg_pt_gp_dev = dev; tg_pt_gp->tg_pt_gp_md_buf_len = ALUA_MD_BUF_LEN; atomic_set(&tg_pt_gp->tg_pt_gp_alua_access_state, - ALUA_ACCESS_STATE_ACTIVE_OPTMIZED); + ALUA_ACCESS_STATE_ACTIVE_OPTIMIZED); /* - * Enable both explict and implict ALUA support by default + * Enable both explicit and implicit ALUA support by default */ tg_pt_gp->tg_pt_gp_alua_access_type = - TPGS_EXPLICT_ALUA | TPGS_IMPLICT_ALUA; + TPGS_EXPLICIT_ALUA | TPGS_IMPLICIT_ALUA; /* * Set the default Active/NonOptimized Delay in milliseconds */ tg_pt_gp->tg_pt_gp_nonop_delay_msecs = ALUA_DEFAULT_NONOP_DELAY_MSECS; tg_pt_gp->tg_pt_gp_trans_delay_msecs = ALUA_DEFAULT_TRANS_DELAY_MSECS; - tg_pt_gp->tg_pt_gp_implict_trans_secs = ALUA_DEFAULT_IMPLICT_TRANS_SECS; + tg_pt_gp->tg_pt_gp_implicit_trans_secs = ALUA_DEFAULT_IMPLICIT_TRANS_SECS; + + /* + * Enable all supported states + */ + tg_pt_gp->tg_pt_gp_alua_supported_states = + ALUA_T_SUP | ALUA_O_SUP | + ALUA_U_SUP | ALUA_S_SUP | ALUA_AN_SUP | ALUA_AO_SUP; if (def_group) { spin_lock(&dev->t10_alua.tg_pt_gps_lock); @@ -1465,7 +1467,7 @@ void core_alua_free_tg_pt_gp( * been called from target_core_alua_drop_tg_pt_gp(). * * Here we remove *tg_pt_gp from the global list so that - * no assications *OR* explict ALUA via SET_TARGET_PORT_GROUPS + * no associations *OR* explicit ALUA via SET_TARGET_PORT_GROUPS * can be made while we are releasing struct t10_alua_tg_pt_gp. */ spin_lock(&dev->t10_alua.tg_pt_gps_lock); @@ -1501,7 +1503,7 @@ void core_alua_free_tg_pt_gp( * core_alua_free_tg_pt_gp_mem(). * * If the passed tg_pt_gp does NOT match the default_tg_pt_gp, - * assume we want to re-assocate a given tg_pt_gp_mem with + * assume we want to re-associate a given tg_pt_gp_mem with * default_tg_pt_gp. */ spin_lock(&tg_pt_gp_mem->tg_pt_gp_mem_lock); @@ -1740,13 +1742,13 @@ ssize_t core_alua_show_access_type( struct t10_alua_tg_pt_gp *tg_pt_gp, char *page) { - if ((tg_pt_gp->tg_pt_gp_alua_access_type & TPGS_EXPLICT_ALUA) && - (tg_pt_gp->tg_pt_gp_alua_access_type & TPGS_IMPLICT_ALUA)) - return sprintf(page, "Implict and Explict\n"); - else if (tg_pt_gp->tg_pt_gp_alua_access_type & TPGS_IMPLICT_ALUA) - return sprintf(page, "Implict\n"); - else if (tg_pt_gp->tg_pt_gp_alua_access_type & TPGS_EXPLICT_ALUA) - return sprintf(page, "Explict\n"); + if ((tg_pt_gp->tg_pt_gp_alua_access_type & TPGS_EXPLICIT_ALUA) && + (tg_pt_gp->tg_pt_gp_alua_access_type & TPGS_IMPLICIT_ALUA)) + return sprintf(page, "Implicit and Explicit\n"); + else if (tg_pt_gp->tg_pt_gp_alua_access_type & TPGS_IMPLICIT_ALUA) + return sprintf(page, "Implicit\n"); + else if (tg_pt_gp->tg_pt_gp_alua_access_type & TPGS_EXPLICIT_ALUA) + return sprintf(page, "Explicit\n"); else return sprintf(page, "None\n"); } @@ -1771,11 +1773,11 @@ ssize_t core_alua_store_access_type( } if (tmp == 3) tg_pt_gp->tg_pt_gp_alua_access_type = - TPGS_IMPLICT_ALUA | TPGS_EXPLICT_ALUA; + TPGS_IMPLICIT_ALUA | TPGS_EXPLICIT_ALUA; else if (tmp == 2) - tg_pt_gp->tg_pt_gp_alua_access_type = TPGS_EXPLICT_ALUA; + tg_pt_gp->tg_pt_gp_alua_access_type = TPGS_EXPLICIT_ALUA; else if (tmp == 1) - tg_pt_gp->tg_pt_gp_alua_access_type = TPGS_IMPLICT_ALUA; + tg_pt_gp->tg_pt_gp_alua_access_type = TPGS_IMPLICIT_ALUA; else tg_pt_gp->tg_pt_gp_alua_access_type = 0; @@ -1844,14 +1846,14 @@ ssize_t core_alua_store_trans_delay_msecs( return count; } -ssize_t core_alua_show_implict_trans_secs( +ssize_t core_alua_show_implicit_trans_secs( struct t10_alua_tg_pt_gp *tg_pt_gp, char *page) { - return sprintf(page, "%d\n", tg_pt_gp->tg_pt_gp_implict_trans_secs); + return sprintf(page, "%d\n", tg_pt_gp->tg_pt_gp_implicit_trans_secs); } -ssize_t core_alua_store_implict_trans_secs( +ssize_t core_alua_store_implicit_trans_secs( struct t10_alua_tg_pt_gp *tg_pt_gp, const char *page, size_t count) @@ -1861,16 +1863,16 @@ ssize_t core_alua_store_implict_trans_secs( ret = kstrtoul(page, 0, &tmp); if (ret < 0) { - pr_err("Unable to extract implict_trans_secs\n"); + pr_err("Unable to extract implicit_trans_secs\n"); return ret; } - if (tmp > ALUA_MAX_IMPLICT_TRANS_SECS) { - pr_err("Passed implict_trans_secs: %lu, exceeds" - " ALUA_MAX_IMPLICT_TRANS_SECS: %d\n", tmp, - ALUA_MAX_IMPLICT_TRANS_SECS); + if (tmp > ALUA_MAX_IMPLICIT_TRANS_SECS) { + pr_err("Passed implicit_trans_secs: %lu, exceeds" + " ALUA_MAX_IMPLICIT_TRANS_SECS: %d\n", tmp, + ALUA_MAX_IMPLICIT_TRANS_SECS); return -EINVAL; } - tg_pt_gp->tg_pt_gp_implict_trans_secs = (int)tmp; + tg_pt_gp->tg_pt_gp_implicit_trans_secs = (int)tmp; return count; } @@ -1970,8 +1972,8 @@ ssize_t core_alua_store_secondary_status( return ret; } if ((tmp != ALUA_STATUS_NONE) && - (tmp != ALUA_STATUS_ALTERED_BY_EXPLICT_STPG) && - (tmp != ALUA_STATUS_ALTERED_BY_IMPLICT_ALUA)) { + (tmp != ALUA_STATUS_ALTERED_BY_EXPLICIT_STPG) && + (tmp != ALUA_STATUS_ALTERED_BY_IMPLICIT_ALUA)) { pr_err("Illegal value for alua_tg_pt_status: %lu\n", tmp); return -EINVAL; diff --git a/drivers/target/target_core_alua.h b/drivers/target/target_core_alua.h index e539c3e..88e2e83 100644 --- a/drivers/target/target_core_alua.h +++ b/drivers/target/target_core_alua.h @@ -7,15 +7,15 @@ * from spc4r17 section 6.4.2 Table 135 */ #define TPGS_NO_ALUA 0x00 -#define TPGS_IMPLICT_ALUA 0x10 -#define TPGS_EXPLICT_ALUA 0x20 +#define TPGS_IMPLICIT_ALUA 0x10 +#define TPGS_EXPLICIT_ALUA 0x20 /* * ASYMMETRIC ACCESS STATE field * * from spc4r17 section 6.27 Table 245 */ -#define ALUA_ACCESS_STATE_ACTIVE_OPTMIZED 0x0 +#define ALUA_ACCESS_STATE_ACTIVE_OPTIMIZED 0x0 #define ALUA_ACCESS_STATE_ACTIVE_NON_OPTIMIZED 0x1 #define ALUA_ACCESS_STATE_STANDBY 0x2 #define ALUA_ACCESS_STATE_UNAVAILABLE 0x3 @@ -23,13 +23,24 @@ #define ALUA_ACCESS_STATE_TRANSITION 0xf /* + * from spc4r36j section 6.37 Table 306 + */ +#define ALUA_T_SUP 0x80 +#define ALUA_O_SUP 0x40 +#define ALUA_LBD_SUP 0x10 +#define ALUA_U_SUP 0x08 +#define ALUA_S_SUP 0x04 +#define ALUA_AN_SUP 0x02 +#define ALUA_AO_SUP 0x01 + +/* * REPORT_TARGET_PORT_GROUP STATUS CODE * * from spc4r17 section 6.27 Table 246 */ #define ALUA_STATUS_NONE 0x00 -#define ALUA_STATUS_ALTERED_BY_EXPLICT_STPG 0x01 -#define ALUA_STATUS_ALTERED_BY_IMPLICT_ALUA 0x02 +#define ALUA_STATUS_ALTERED_BY_EXPLICIT_STPG 0x01 +#define ALUA_STATUS_ALTERED_BY_IMPLICIT_ALUA 0x02 /* * From spc4r17, Table D.1: ASC and ASCQ Assignement @@ -46,17 +57,17 @@ #define ALUA_DEFAULT_NONOP_DELAY_MSECS 100 #define ALUA_MAX_NONOP_DELAY_MSECS 10000 /* 10 seconds */ /* - * Used for implict and explict ALUA transitional delay, that is disabled + * Used for implicit and explicit ALUA transitional delay, that is disabled * by default, and is intended to be used for debugging client side ALUA code. */ #define ALUA_DEFAULT_TRANS_DELAY_MSECS 0 #define ALUA_MAX_TRANS_DELAY_MSECS 30000 /* 30 seconds */ /* - * Used for the recommended application client implict transition timeout + * Used for the recommended application client implicit transition timeout * in seconds, returned by the REPORT_TARGET_PORT_GROUPS w/ extended header. */ -#define ALUA_DEFAULT_IMPLICT_TRANS_SECS 0 -#define ALUA_MAX_IMPLICT_TRANS_SECS 255 +#define ALUA_DEFAULT_IMPLICIT_TRANS_SECS 0 +#define ALUA_MAX_IMPLICIT_TRANS_SECS 255 /* * Used by core_alua_update_tpg_primary_metadata() and * core_alua_update_tpg_secondary_metadata() @@ -113,9 +124,9 @@ extern ssize_t core_alua_show_trans_delay_msecs(struct t10_alua_tg_pt_gp *, char *); extern ssize_t core_alua_store_trans_delay_msecs(struct t10_alua_tg_pt_gp *, const char *, size_t); -extern ssize_t core_alua_show_implict_trans_secs(struct t10_alua_tg_pt_gp *, +extern ssize_t core_alua_show_implicit_trans_secs(struct t10_alua_tg_pt_gp *, char *); -extern ssize_t core_alua_store_implict_trans_secs(struct t10_alua_tg_pt_gp *, +extern ssize_t core_alua_store_implicit_trans_secs(struct t10_alua_tg_pt_gp *, const char *, size_t); extern ssize_t core_alua_show_preferred_bit(struct t10_alua_tg_pt_gp *, char *); diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c index 82e81c5..272755d 100644 --- a/drivers/target/target_core_configfs.c +++ b/drivers/target/target_core_configfs.c @@ -177,16 +177,16 @@ static struct config_group *target_core_register_fabric( * struct target_fabric_configfs *tf will contain a usage reference. */ pr_debug("Target_Core_ConfigFS: REGISTER tfc_wwn_cit -> %p\n", - &TF_CIT_TMPL(tf)->tfc_wwn_cit); + &tf->tf_cit_tmpl.tfc_wwn_cit); tf->tf_group.default_groups = tf->tf_default_groups; tf->tf_group.default_groups[0] = &tf->tf_disc_group; tf->tf_group.default_groups[1] = NULL; config_group_init_type_name(&tf->tf_group, name, - &TF_CIT_TMPL(tf)->tfc_wwn_cit); + &tf->tf_cit_tmpl.tfc_wwn_cit); config_group_init_type_name(&tf->tf_disc_group, "discovery_auth", - &TF_CIT_TMPL(tf)->tfc_discovery_cit); + &tf->tf_cit_tmpl.tfc_discovery_cit); pr_debug("Target_Core_ConfigFS: REGISTER -> Allocated Fabric:" " %s\n", tf->tf_group.cg_item.ci_name); @@ -2036,7 +2036,7 @@ static ssize_t target_core_alua_tg_pt_gp_store_attr_alua_access_state( int new_state, ret; if (!tg_pt_gp->tg_pt_gp_valid_id) { - pr_err("Unable to do implict ALUA on non valid" + pr_err("Unable to do implicit ALUA on non valid" " tg_pt_gp ID: %hu\n", tg_pt_gp->tg_pt_gp_valid_id); return -EINVAL; } @@ -2049,9 +2049,9 @@ static ssize_t target_core_alua_tg_pt_gp_store_attr_alua_access_state( } new_state = (int)tmp; - if (!(tg_pt_gp->tg_pt_gp_alua_access_type & TPGS_IMPLICT_ALUA)) { - pr_err("Unable to process implict configfs ALUA" - " transition while TPGS_IMPLICT_ALUA is disabled\n"); + if (!(tg_pt_gp->tg_pt_gp_alua_access_type & TPGS_IMPLICIT_ALUA)) { + pr_err("Unable to process implicit configfs ALUA" + " transition while TPGS_IMPLICIT_ALUA is disabled\n"); return -EINVAL; } @@ -2097,8 +2097,8 @@ static ssize_t target_core_alua_tg_pt_gp_store_attr_alua_access_status( new_status = (int)tmp; if ((new_status != ALUA_STATUS_NONE) && - (new_status != ALUA_STATUS_ALTERED_BY_EXPLICT_STPG) && - (new_status != ALUA_STATUS_ALTERED_BY_IMPLICT_ALUA)) { + (new_status != ALUA_STATUS_ALTERED_BY_EXPLICIT_STPG) && + (new_status != ALUA_STATUS_ALTERED_BY_IMPLICIT_ALUA)) { pr_err("Illegal ALUA access status: 0x%02x\n", new_status); return -EINVAL; @@ -2131,6 +2131,90 @@ static ssize_t target_core_alua_tg_pt_gp_store_attr_alua_access_type( SE_DEV_ALUA_TG_PT_ATTR(alua_access_type, S_IRUGO | S_IWUSR); /* + * alua_supported_states + */ + +#define SE_DEV_ALUA_SUPPORT_STATE_SHOW(_name, _var, _bit) \ +static ssize_t target_core_alua_tg_pt_gp_show_attr_alua_support_##_name( \ + struct t10_alua_tg_pt_gp *t, char *p) \ +{ \ + return sprintf(p, "%d\n", !!(t->_var & _bit)); \ +} + +#define SE_DEV_ALUA_SUPPORT_STATE_STORE(_name, _var, _bit) \ +static ssize_t target_core_alua_tg_pt_gp_store_attr_alua_support_##_name(\ + struct t10_alua_tg_pt_gp *t, const char *p, size_t c) \ +{ \ + unsigned long tmp; \ + int ret; \ + \ + if (!t->tg_pt_gp_valid_id) { \ + pr_err("Unable to do set ##_name ALUA state on non" \ + " valid tg_pt_gp ID: %hu\n", \ + t->tg_pt_gp_valid_id); \ + return -EINVAL; \ + } \ + \ + ret = kstrtoul(p, 0, &tmp); \ + if (ret < 0) { \ + pr_err("Invalid value '%s', must be '0' or '1'\n", p); \ + return -EINVAL; \ + } \ + if (tmp > 1) { \ + pr_err("Invalid value '%ld', must be '0' or '1'\n", tmp); \ + return -EINVAL; \ + } \ + if (!tmp) \ + t->_var |= _bit; \ + else \ + t->_var &= ~_bit; \ + \ + return c; \ +} + +SE_DEV_ALUA_SUPPORT_STATE_SHOW(transitioning, + tg_pt_gp_alua_supported_states, ALUA_T_SUP); +SE_DEV_ALUA_SUPPORT_STATE_STORE(transitioning, + tg_pt_gp_alua_supported_states, ALUA_T_SUP); +SE_DEV_ALUA_TG_PT_ATTR(alua_support_transitioning, S_IRUGO | S_IWUSR); + +SE_DEV_ALUA_SUPPORT_STATE_SHOW(offline, + tg_pt_gp_alua_supported_states, ALUA_O_SUP); +SE_DEV_ALUA_SUPPORT_STATE_STORE(offline, + tg_pt_gp_alua_supported_states, ALUA_O_SUP); +SE_DEV_ALUA_TG_PT_ATTR(alua_support_offline, S_IRUGO | S_IWUSR); + +SE_DEV_ALUA_SUPPORT_STATE_SHOW(lba_dependent, + tg_pt_gp_alua_supported_states, ALUA_LBD_SUP); +SE_DEV_ALUA_SUPPORT_STATE_STORE(lba_dependent, + tg_pt_gp_alua_supported_states, ALUA_LBD_SUP); +SE_DEV_ALUA_TG_PT_ATTR(alua_support_lba_dependent, S_IRUGO | S_IWUSR); + +SE_DEV_ALUA_SUPPORT_STATE_SHOW(unavailable, + tg_pt_gp_alua_supported_states, ALUA_U_SUP); +SE_DEV_ALUA_SUPPORT_STATE_STORE(unavailable, + tg_pt_gp_alua_supported_states, ALUA_U_SUP); +SE_DEV_ALUA_TG_PT_ATTR(alua_support_unavailable, S_IRUGO | S_IWUSR); + +SE_DEV_ALUA_SUPPORT_STATE_SHOW(standby, + tg_pt_gp_alua_supported_states, ALUA_S_SUP); +SE_DEV_ALUA_SUPPORT_STATE_STORE(standby, + tg_pt_gp_alua_supported_states, ALUA_S_SUP); +SE_DEV_ALUA_TG_PT_ATTR(alua_support_standby, S_IRUGO | S_IWUSR); + +SE_DEV_ALUA_SUPPORT_STATE_SHOW(active_optimized, + tg_pt_gp_alua_supported_states, ALUA_AO_SUP); +SE_DEV_ALUA_SUPPORT_STATE_STORE(active_optimized, + tg_pt_gp_alua_supported_states, ALUA_AO_SUP); +SE_DEV_ALUA_TG_PT_ATTR(alua_support_active_optimized, S_IRUGO | S_IWUSR); + +SE_DEV_ALUA_SUPPORT_STATE_SHOW(active_nonoptimized, + tg_pt_gp_alua_supported_states, ALUA_AN_SUP); +SE_DEV_ALUA_SUPPORT_STATE_STORE(active_nonoptimized, + tg_pt_gp_alua_supported_states, ALUA_AN_SUP); +SE_DEV_ALUA_TG_PT_ATTR(alua_support_active_nonoptimized, S_IRUGO | S_IWUSR); + +/* * alua_write_metadata */ static ssize_t target_core_alua_tg_pt_gp_show_attr_alua_write_metadata( @@ -2210,24 +2294,24 @@ static ssize_t target_core_alua_tg_pt_gp_store_attr_trans_delay_msecs( SE_DEV_ALUA_TG_PT_ATTR(trans_delay_msecs, S_IRUGO | S_IWUSR); /* - * implict_trans_secs + * implicit_trans_secs */ -static ssize_t target_core_alua_tg_pt_gp_show_attr_implict_trans_secs( +static ssize_t target_core_alua_tg_pt_gp_show_attr_implicit_trans_secs( struct t10_alua_tg_pt_gp *tg_pt_gp, char *page) { - return core_alua_show_implict_trans_secs(tg_pt_gp, page); + return core_alua_show_implicit_trans_secs(tg_pt_gp, page); } -static ssize_t target_core_alua_tg_pt_gp_store_attr_implict_trans_secs( +static ssize_t target_core_alua_tg_pt_gp_store_attr_implicit_trans_secs( struct t10_alua_tg_pt_gp *tg_pt_gp, const char *page, size_t count) { - return core_alua_store_implict_trans_secs(tg_pt_gp, page, count); + return core_alua_store_implicit_trans_secs(tg_pt_gp, page, count); } -SE_DEV_ALUA_TG_PT_ATTR(implict_trans_secs, S_IRUGO | S_IWUSR); +SE_DEV_ALUA_TG_PT_ATTR(implicit_trans_secs, S_IRUGO | S_IWUSR); /* * preferred @@ -2350,10 +2434,17 @@ static struct configfs_attribute *target_core_alua_tg_pt_gp_attrs[] = { &target_core_alua_tg_pt_gp_alua_access_state.attr, &target_core_alua_tg_pt_gp_alua_access_status.attr, &target_core_alua_tg_pt_gp_alua_access_type.attr, + &target_core_alua_tg_pt_gp_alua_support_transitioning.attr, + &target_core_alua_tg_pt_gp_alua_support_offline.attr, + &target_core_alua_tg_pt_gp_alua_support_lba_dependent.attr, + &target_core_alua_tg_pt_gp_alua_support_unavailable.attr, + &target_core_alua_tg_pt_gp_alua_support_standby.attr, + &target_core_alua_tg_pt_gp_alua_support_active_nonoptimized.attr, + &target_core_alua_tg_pt_gp_alua_support_active_optimized.attr, &target_core_alua_tg_pt_gp_alua_write_metadata.attr, &target_core_alua_tg_pt_gp_nonop_delay_msecs.attr, &target_core_alua_tg_pt_gp_trans_delay_msecs.attr, - &target_core_alua_tg_pt_gp_implict_trans_secs.attr, + &target_core_alua_tg_pt_gp_implicit_trans_secs.attr, &target_core_alua_tg_pt_gp_preferred.attr, &target_core_alua_tg_pt_gp_tg_pt_gp_id.attr, &target_core_alua_tg_pt_gp_members.attr, diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index d90dbb0..207b340 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -92,6 +92,9 @@ transport_lookup_cmd_lun(struct se_cmd *se_cmd, u32 unpacked_lun) se_cmd->pr_res_key = deve->pr_res_key; se_cmd->orig_fe_lun = unpacked_lun; se_cmd->se_cmd_flags |= SCF_SE_LUN_CMD; + + percpu_ref_get(&se_lun->lun_ref); + se_cmd->lun_ref_active = true; } spin_unlock_irqrestore(&se_sess->se_node_acl->device_list_lock, flags); @@ -119,24 +122,20 @@ transport_lookup_cmd_lun(struct se_cmd *se_cmd, u32 unpacked_lun) se_cmd->se_lun = &se_sess->se_tpg->tpg_virt_lun0; se_cmd->orig_fe_lun = 0; se_cmd->se_cmd_flags |= SCF_SE_LUN_CMD; + + percpu_ref_get(&se_lun->lun_ref); + se_cmd->lun_ref_active = true; } /* Directly associate cmd with se_dev */ se_cmd->se_dev = se_lun->lun_se_dev; - /* TODO: get rid of this and use atomics for stats */ dev = se_lun->lun_se_dev; - spin_lock_irqsave(&dev->stats_lock, flags); - dev->num_cmds++; + atomic_long_inc(&dev->num_cmds); if (se_cmd->data_direction == DMA_TO_DEVICE) - dev->write_bytes += se_cmd->data_length; + atomic_long_add(se_cmd->data_length, &dev->write_bytes); else if (se_cmd->data_direction == DMA_FROM_DEVICE) - dev->read_bytes += se_cmd->data_length; - spin_unlock_irqrestore(&dev->stats_lock, flags); - - spin_lock_irqsave(&se_lun->lun_cmd_lock, flags); - list_add_tail(&se_cmd->se_lun_node, &se_lun->lun_cmd_list); - spin_unlock_irqrestore(&se_lun->lun_cmd_lock, flags); + atomic_long_add(se_cmd->data_length, &dev->read_bytes); return 0; } @@ -314,14 +313,14 @@ int core_enable_device_list_for_node( deve = nacl->device_list[mapped_lun]; /* - * Check if the call is handling demo mode -> explict LUN ACL + * Check if the call is handling demo mode -> explicit LUN ACL * transition. This transition must be for the same struct se_lun * + mapped_lun that was setup in demo mode.. */ if (deve->lun_flags & TRANSPORT_LUNFLAGS_INITIATOR_ACCESS) { if (deve->se_lun_acl != NULL) { pr_err("struct se_dev_entry->se_lun_acl" - " already set for demo mode -> explict" + " already set for demo mode -> explicit" " LUN ACL transition\n"); spin_unlock_irq(&nacl->device_list_lock); return -EINVAL; @@ -329,7 +328,7 @@ int core_enable_device_list_for_node( if (deve->se_lun != lun) { pr_err("struct se_dev_entry->se_lun does" " match passed struct se_lun for demo mode" - " -> explict LUN ACL transition\n"); + " -> explicit LUN ACL transition\n"); spin_unlock_irq(&nacl->device_list_lock); return -EINVAL; } @@ -1407,6 +1406,7 @@ static void scsi_dump_inquiry(struct se_device *dev) struct se_device *target_alloc_device(struct se_hba *hba, const char *name) { struct se_device *dev; + struct se_lun *xcopy_lun; dev = hba->transport->alloc_device(hba, name); if (!dev) @@ -1423,7 +1423,6 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name) INIT_LIST_HEAD(&dev->state_list); INIT_LIST_HEAD(&dev->qf_cmd_list); INIT_LIST_HEAD(&dev->g_dev_node); - spin_lock_init(&dev->stats_lock); spin_lock_init(&dev->execute_task_lock); spin_lock_init(&dev->delayed_cmd_lock); spin_lock_init(&dev->dev_reservation_lock); @@ -1469,6 +1468,14 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name) dev->dev_attrib.fabric_max_sectors = DA_FABRIC_MAX_SECTORS; dev->dev_attrib.optimal_sectors = DA_FABRIC_MAX_SECTORS; + xcopy_lun = &dev->xcopy_lun; + xcopy_lun->lun_se_dev = dev; + init_completion(&xcopy_lun->lun_shutdown_comp); + INIT_LIST_HEAD(&xcopy_lun->lun_acl_list); + spin_lock_init(&xcopy_lun->lun_acl_lock); + spin_lock_init(&xcopy_lun->lun_sep_lock); + init_completion(&xcopy_lun->lun_ref_comp); + return dev; } diff --git a/drivers/target/target_core_fabric_configfs.c b/drivers/target/target_core_fabric_configfs.c index 3503996..dae2ad6 100644 --- a/drivers/target/target_core_fabric_configfs.c +++ b/drivers/target/target_core_fabric_configfs.c @@ -385,9 +385,9 @@ static struct config_group *target_fabric_make_mappedlun( } config_group_init_type_name(&lacl->se_lun_group, name, - &TF_CIT_TMPL(tf)->tfc_tpg_mappedlun_cit); + &tf->tf_cit_tmpl.tfc_tpg_mappedlun_cit); config_group_init_type_name(&lacl->ml_stat_grps.stat_group, - "statistics", &TF_CIT_TMPL(tf)->tfc_tpg_mappedlun_stat_cit); + "statistics", &tf->tf_cit_tmpl.tfc_tpg_mappedlun_stat_cit); lacl_cg->default_groups[0] = &lacl->ml_stat_grps.stat_group; lacl_cg->default_groups[1] = NULL; @@ -504,16 +504,16 @@ static struct config_group *target_fabric_make_nodeacl( nacl_cg->default_groups[4] = NULL; config_group_init_type_name(&se_nacl->acl_group, name, - &TF_CIT_TMPL(tf)->tfc_tpg_nacl_base_cit); + &tf->tf_cit_tmpl.tfc_tpg_nacl_base_cit); config_group_init_type_name(&se_nacl->acl_attrib_group, "attrib", - &TF_CIT_TMPL(tf)->tfc_tpg_nacl_attrib_cit); + &tf->tf_cit_tmpl.tfc_tpg_nacl_attrib_cit); config_group_init_type_name(&se_nacl->acl_auth_group, "auth", - &TF_CIT_TMPL(tf)->tfc_tpg_nacl_auth_cit); + &tf->tf_cit_tmpl.tfc_tpg_nacl_auth_cit); config_group_init_type_name(&se_nacl->acl_param_group, "param", - &TF_CIT_TMPL(tf)->tfc_tpg_nacl_param_cit); + &tf->tf_cit_tmpl.tfc_tpg_nacl_param_cit); config_group_init_type_name(&se_nacl->acl_fabric_stat_group, "fabric_statistics", - &TF_CIT_TMPL(tf)->tfc_tpg_nacl_stat_cit); + &tf->tf_cit_tmpl.tfc_tpg_nacl_stat_cit); return &se_nacl->acl_group; } @@ -595,7 +595,7 @@ static struct config_group *target_fabric_make_np( se_tpg_np->tpg_np_parent = se_tpg; config_group_init_type_name(&se_tpg_np->tpg_np_group, name, - &TF_CIT_TMPL(tf)->tfc_tpg_np_base_cit); + &tf->tf_cit_tmpl.tfc_tpg_np_base_cit); return &se_tpg_np->tpg_np_group; } @@ -899,9 +899,9 @@ static struct config_group *target_fabric_make_lun( } config_group_init_type_name(&lun->lun_group, name, - &TF_CIT_TMPL(tf)->tfc_tpg_port_cit); + &tf->tf_cit_tmpl.tfc_tpg_port_cit); config_group_init_type_name(&lun->port_stat_grps.stat_group, - "statistics", &TF_CIT_TMPL(tf)->tfc_tpg_port_stat_cit); + "statistics", &tf->tf_cit_tmpl.tfc_tpg_port_stat_cit); lun_cg->default_groups[0] = &lun->port_stat_grps.stat_group; lun_cg->default_groups[1] = NULL; @@ -1056,19 +1056,19 @@ static struct config_group *target_fabric_make_tpg( se_tpg->tpg_group.default_groups[6] = NULL; config_group_init_type_name(&se_tpg->tpg_group, name, - &TF_CIT_TMPL(tf)->tfc_tpg_base_cit); + &tf->tf_cit_tmpl.tfc_tpg_base_cit); config_group_init_type_name(&se_tpg->tpg_lun_group, "lun", - &TF_CIT_TMPL(tf)->tfc_tpg_lun_cit); + &tf->tf_cit_tmpl.tfc_tpg_lun_cit); config_group_init_type_name(&se_tpg->tpg_np_group, "np", - &TF_CIT_TMPL(tf)->tfc_tpg_np_cit); + &tf->tf_cit_tmpl.tfc_tpg_np_cit); config_group_init_type_name(&se_tpg->tpg_acl_group, "acls", - &TF_CIT_TMPL(tf)->tfc_tpg_nacl_cit); + &tf->tf_cit_tmpl.tfc_tpg_nacl_cit); config_group_init_type_name(&se_tpg->tpg_attrib_group, "attrib", - &TF_CIT_TMPL(tf)->tfc_tpg_attrib_cit); + &tf->tf_cit_tmpl.tfc_tpg_attrib_cit); config_group_init_type_name(&se_tpg->tpg_auth_group, "auth", - &TF_CIT_TMPL(tf)->tfc_tpg_auth_cit); + &tf->tf_cit_tmpl.tfc_tpg_auth_cit); config_group_init_type_name(&se_tpg->tpg_param_group, "param", - &TF_CIT_TMPL(tf)->tfc_tpg_param_cit); + &tf->tf_cit_tmpl.tfc_tpg_param_cit); return &se_tpg->tpg_group; } @@ -1155,9 +1155,9 @@ static struct config_group *target_fabric_make_wwn( wwn->wwn_group.default_groups[1] = NULL; config_group_init_type_name(&wwn->wwn_group, name, - &TF_CIT_TMPL(tf)->tfc_tpg_cit); + &tf->tf_cit_tmpl.tfc_tpg_cit); config_group_init_type_name(&wwn->fabric_stat_group, "fabric_statistics", - &TF_CIT_TMPL(tf)->tfc_wwn_fabric_stats_cit); + &tf->tf_cit_tmpl.tfc_wwn_fabric_stats_cit); return &wwn->wwn_group; } diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c index b662f89..0e34cda 100644 --- a/drivers/target/target_core_file.c +++ b/drivers/target/target_core_file.c @@ -562,7 +562,7 @@ fd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, } else { ret = fd_do_rw(cmd, sgl, sgl_nents, 1); /* - * Perform implict vfs_fsync_range() for fd_do_writev() ops + * Perform implicit vfs_fsync_range() for fd_do_writev() ops * for SCSI WRITEs with Forced Unit Access (FUA) set. * Allow this to happen independent of WCE=0 setting. */ diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c index b9a3394..c87959f 100644 --- a/drivers/target/target_core_iblock.c +++ b/drivers/target/target_core_iblock.c @@ -710,6 +710,45 @@ static sector_t iblock_get_blocks(struct se_device *dev) return iblock_emulate_read_cap_with_block_size(dev, bd, q); } +static sector_t iblock_get_alignment_offset_lbas(struct se_device *dev) +{ + struct iblock_dev *ib_dev = IBLOCK_DEV(dev); + struct block_device *bd = ib_dev->ibd_bd; + int ret; + + ret = bdev_alignment_offset(bd); + if (ret == -1) + return 0; + + /* convert offset-bytes to offset-lbas */ + return ret / bdev_logical_block_size(bd); +} + +static unsigned int iblock_get_lbppbe(struct se_device *dev) +{ + struct iblock_dev *ib_dev = IBLOCK_DEV(dev); + struct block_device *bd = ib_dev->ibd_bd; + int logs_per_phys = bdev_physical_block_size(bd) / bdev_logical_block_size(bd); + + return ilog2(logs_per_phys); +} + +static unsigned int iblock_get_io_min(struct se_device *dev) +{ + struct iblock_dev *ib_dev = IBLOCK_DEV(dev); + struct block_device *bd = ib_dev->ibd_bd; + + return bdev_io_min(bd); +} + +static unsigned int iblock_get_io_opt(struct se_device *dev) +{ + struct iblock_dev *ib_dev = IBLOCK_DEV(dev); + struct block_device *bd = ib_dev->ibd_bd; + + return bdev_io_opt(bd); +} + static struct sbc_ops iblock_sbc_ops = { .execute_rw = iblock_execute_rw, .execute_sync_cache = iblock_execute_sync_cache, @@ -749,6 +788,10 @@ static struct se_subsystem_api iblock_template = { .show_configfs_dev_params = iblock_show_configfs_dev_params, .get_device_type = sbc_get_device_type, .get_blocks = iblock_get_blocks, + .get_alignment_offset_lbas = iblock_get_alignment_offset_lbas, + .get_lbppbe = iblock_get_lbppbe, + .get_io_min = iblock_get_io_min, + .get_io_opt = iblock_get_io_opt, .get_write_cache = iblock_get_write_cache, }; diff --git a/drivers/target/target_core_internal.h b/drivers/target/target_core_internal.h index 579128a..47b63b0 100644 --- a/drivers/target/target_core_internal.h +++ b/drivers/target/target_core_internal.h @@ -75,8 +75,6 @@ extern struct se_device *g_lun0_dev; struct se_node_acl *__core_tpg_get_initiator_node_acl(struct se_portal_group *tpg, const char *); -struct se_node_acl *core_tpg_get_initiator_node_acl(struct se_portal_group *tpg, - unsigned char *); void core_tpg_add_node_to_devs(struct se_node_acl *, struct se_portal_group *); void core_tpg_wait_for_nacl_pr_ref(struct se_node_acl *); struct se_lun *core_tpg_pre_addlun(struct se_portal_group *, u32); @@ -102,7 +100,7 @@ int transport_dump_vpd_assoc(struct t10_vpd *, unsigned char *, int); int transport_dump_vpd_ident_type(struct t10_vpd *, unsigned char *, int); int transport_dump_vpd_ident(struct t10_vpd *, unsigned char *, int); bool target_stop_cmd(struct se_cmd *cmd, unsigned long *flags); -int transport_clear_lun_from_sessions(struct se_lun *); +int transport_clear_lun_ref(struct se_lun *); void transport_send_task_abort(struct se_cmd *); sense_reason_t target_cmd_size_check(struct se_cmd *cmd, unsigned int size); void target_qf_do_work(struct work_struct *work); diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c index d1ae4c5..2f5d779 100644 --- a/drivers/target/target_core_pr.c +++ b/drivers/target/target_core_pr.c @@ -474,7 +474,7 @@ static int core_scsi3_pr_seq_non_holder( * statement. */ if (!ret && !other_cdb) { - pr_debug("Allowing explict CDB: 0x%02x for %s" + pr_debug("Allowing explicit CDB: 0x%02x for %s" " reservation holder\n", cdb[0], core_scsi3_pr_dump_type(pr_reg_type)); @@ -507,7 +507,7 @@ static int core_scsi3_pr_seq_non_holder( */ if (!registered_nexus) { - pr_debug("Allowing implict CDB: 0x%02x" + pr_debug("Allowing implicit CDB: 0x%02x" " for %s reservation on unregistered" " nexus\n", cdb[0], core_scsi3_pr_dump_type(pr_reg_type)); @@ -522,7 +522,7 @@ static int core_scsi3_pr_seq_non_holder( * allow commands from registered nexuses. */ - pr_debug("Allowing implict CDB: 0x%02x for %s" + pr_debug("Allowing implicit CDB: 0x%02x for %s" " reservation\n", cdb[0], core_scsi3_pr_dump_type(pr_reg_type)); @@ -683,7 +683,7 @@ static struct t10_pr_registration *__core_scsi3_alloc_registration( alua_port_list) { /* * This pointer will be NULL for demo mode MappedLUNs - * that have not been make explict via a ConfigFS + * that have not been make explicit via a ConfigFS * MappedLUN group for the SCSI Initiator Node ACL. */ if (!deve_tmp->se_lun_acl) @@ -1158,7 +1158,7 @@ static void core_scsi3_put_pr_reg(struct t10_pr_registration *pr_reg) smp_mb__after_atomic_dec(); } -static int core_scsi3_check_implict_release( +static int core_scsi3_check_implicit_release( struct se_device *dev, struct t10_pr_registration *pr_reg) { @@ -1174,7 +1174,7 @@ static int core_scsi3_check_implict_release( } if (pr_res_holder == pr_reg) { /* - * Perform an implict RELEASE if the registration that + * Perform an implicit RELEASE if the registration that * is being released is holding the reservation. * * From spc4r17, section 5.7.11.1: @@ -1192,7 +1192,7 @@ static int core_scsi3_check_implict_release( * For 'All Registrants' reservation types, all existing * registrations are still processed as reservation holders * in core_scsi3_pr_seq_non_holder() after the initial - * reservation holder is implictly released here. + * reservation holder is implicitly released here. */ } else if (pr_reg->pr_reg_all_tg_pt && (!strcmp(pr_res_holder->pr_reg_nacl->initiatorname, @@ -2125,7 +2125,7 @@ core_scsi3_emulate_pro_register(struct se_cmd *cmd, u64 res_key, u64 sa_res_key, /* * sa_res_key=0 Unregister Reservation Key for registered I_T Nexus. */ - pr_holder = core_scsi3_check_implict_release( + pr_holder = core_scsi3_check_implicit_release( cmd->se_dev, pr_reg); if (pr_holder < 0) { ret = TCM_RESERVATION_CONFLICT; @@ -2402,7 +2402,7 @@ static void __core_scsi3_complete_pro_release( struct se_device *dev, struct se_node_acl *se_nacl, struct t10_pr_registration *pr_reg, - int explict) + int explicit) { struct target_core_fabric_ops *tfo = se_nacl->se_tpg->se_tpg_tfo; char i_buf[PR_REG_ISID_ID_LEN]; @@ -2416,7 +2416,7 @@ static void __core_scsi3_complete_pro_release( pr_debug("SPC-3 PR [%s] Service Action: %s RELEASE cleared" " reservation holder TYPE: %s ALL_TG_PT: %d\n", - tfo->get_fabric_name(), (explict) ? "explict" : "implict", + tfo->get_fabric_name(), (explicit) ? "explicit" : "implicit", core_scsi3_pr_dump_type(pr_reg->pr_res_type), (pr_reg->pr_reg_all_tg_pt) ? 1 : 0); pr_debug("SPC-3 PR [%s] RELEASE Node: %s%s\n", @@ -2692,7 +2692,7 @@ static void __core_scsi3_complete_pro_preempt( memset(i_buf, 0, PR_REG_ISID_ID_LEN); core_pr_dump_initiator_port(pr_reg, i_buf, PR_REG_ISID_ID_LEN); /* - * Do an implict RELEASE of the existing reservation. + * Do an implicit RELEASE of the existing reservation. */ if (dev->dev_pr_res_holder) __core_scsi3_complete_pro_release(dev, nacl, @@ -2845,7 +2845,7 @@ core_scsi3_pro_preempt(struct se_cmd *cmd, int type, int scope, u64 res_key, * 5.7.11.4 Preempting, Table 52 and Figure 7. * * For a ZERO SA Reservation key, release - * all other registrations and do an implict + * all other registrations and do an implicit * release of active persistent reservation. * * For a non-ZERO SA Reservation key, only diff --git a/drivers/target/target_core_rd.c b/drivers/target/target_core_rd.c index 131327a..4ffe5f2 100644 --- a/drivers/target/target_core_rd.c +++ b/drivers/target/target_core_rd.c @@ -27,7 +27,6 @@ #include <linux/string.h> #include <linux/parser.h> #include <linux/timer.h> -#include <linux/blkdev.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <scsi/scsi.h> diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c index d9b92b2..52ae54e 100644 --- a/drivers/target/target_core_sbc.c +++ b/drivers/target/target_core_sbc.c @@ -105,12 +105,22 @@ sbc_emulate_readcapacity_16(struct se_cmd *cmd) buf[9] = (dev->dev_attrib.block_size >> 16) & 0xff; buf[10] = (dev->dev_attrib.block_size >> 8) & 0xff; buf[11] = dev->dev_attrib.block_size & 0xff; + + if (dev->transport->get_lbppbe) + buf[13] = dev->transport->get_lbppbe(dev) & 0x0f; + + if (dev->transport->get_alignment_offset_lbas) { + u16 lalba = dev->transport->get_alignment_offset_lbas(dev); + buf[14] = (lalba >> 8) & 0x3f; + buf[15] = lalba & 0xff; + } + /* * Set Thin Provisioning Enable bit following sbc3r22 in section * READ CAPACITY (16) byte 14 if emulate_tpu or emulate_tpws is enabled. */ if (dev->dev_attrib.emulate_tpu || dev->dev_attrib.emulate_tpws) - buf[14] = 0x80; + buf[14] |= 0x80; rbuf = transport_kmap_data_sg(cmd); if (rbuf) { diff --git a/drivers/target/target_core_spc.c b/drivers/target/target_core_spc.c index 0745395..021c3f4 100644 --- a/drivers/target/target_core_spc.c +++ b/drivers/target/target_core_spc.c @@ -48,7 +48,7 @@ static void spc_fill_alua_data(struct se_port *port, unsigned char *buf) buf[5] = 0x80; /* - * Set TPGS field for explict and/or implict ALUA access type + * Set TPGS field for explicit and/or implicit ALUA access type * and opteration. * * See spc4r17 section 6.4.2 Table 135 @@ -452,6 +452,7 @@ spc_emulate_evpd_b0(struct se_cmd *cmd, unsigned char *buf) struct se_device *dev = cmd->se_dev; u32 max_sectors; int have_tp = 0; + int opt, min; /* * Following spc3r22 section 6.5.3 Block Limits VPD page, when @@ -475,7 +476,10 @@ spc_emulate_evpd_b0(struct se_cmd *cmd, unsigned char *buf) /* * Set OPTIMAL TRANSFER LENGTH GRANULARITY */ - put_unaligned_be16(1, &buf[6]); + if (dev->transport->get_io_min && (min = dev->transport->get_io_min(dev))) + put_unaligned_be16(min / dev->dev_attrib.block_size, &buf[6]); + else + put_unaligned_be16(1, &buf[6]); /* * Set MAXIMUM TRANSFER LENGTH @@ -487,7 +491,10 @@ spc_emulate_evpd_b0(struct se_cmd *cmd, unsigned char *buf) /* * Set OPTIMAL TRANSFER LENGTH */ - put_unaligned_be32(dev->dev_attrib.optimal_sectors, &buf[12]); + if (dev->transport->get_io_opt && (opt = dev->transport->get_io_opt(dev))) + put_unaligned_be32(opt / dev->dev_attrib.block_size, &buf[12]); + else + put_unaligned_be32(dev->dev_attrib.optimal_sectors, &buf[12]); /* * Exit now if we don't support TP. @@ -1250,7 +1257,7 @@ spc_parse_cdb(struct se_cmd *cmd, unsigned int *size) *size = (cdb[3] << 8) + cdb[4]; /* - * Do implict HEAD_OF_QUEUE processing for INQUIRY. + * Do implicit HEAD_OF_QUEUE processing for INQUIRY. * See spc4r17 section 5.3 */ cmd->sam_task_attr = MSG_HEAD_TAG; @@ -1284,7 +1291,7 @@ spc_parse_cdb(struct se_cmd *cmd, unsigned int *size) cmd->execute_cmd = spc_emulate_report_luns; *size = (cdb[6] << 24) | (cdb[7] << 16) | (cdb[8] << 8) | cdb[9]; /* - * Do implict HEAD_OF_QUEUE processing for REPORT_LUNS + * Do implicit HEAD_OF_QUEUE processing for REPORT_LUNS * See spc4r17 section 5.3 */ cmd->sam_task_attr = MSG_HEAD_TAG; diff --git a/drivers/target/target_core_stat.c b/drivers/target/target_core_stat.c index 9c642e0..0353899 100644 --- a/drivers/target/target_core_stat.c +++ b/drivers/target/target_core_stat.c @@ -32,7 +32,6 @@ #include <linux/utsname.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> -#include <linux/blkdev.h> #include <linux/configfs.h> #include <scsi/scsi.h> #include <scsi/scsi_device.h> @@ -214,7 +213,8 @@ static ssize_t target_stat_scsi_tgt_dev_show_attr_resets( struct se_device *dev = container_of(sgrps, struct se_device, dev_stat_grps); - return snprintf(page, PAGE_SIZE, "%u\n", dev->num_resets); + return snprintf(page, PAGE_SIZE, "%lu\n", + atomic_long_read(&dev->num_resets)); } DEV_STAT_SCSI_TGT_DEV_ATTR_RO(resets); @@ -397,8 +397,8 @@ static ssize_t target_stat_scsi_lu_show_attr_num_cmds( container_of(sgrps, struct se_device, dev_stat_grps); /* scsiLuNumCommands */ - return snprintf(page, PAGE_SIZE, "%llu\n", - (unsigned long long)dev->num_cmds); + return snprintf(page, PAGE_SIZE, "%lu\n", + atomic_long_read(&dev->num_cmds)); } DEV_STAT_SCSI_LU_ATTR_RO(num_cmds); @@ -409,7 +409,8 @@ static ssize_t target_stat_scsi_lu_show_attr_read_mbytes( container_of(sgrps, struct se_device, dev_stat_grps); /* scsiLuReadMegaBytes */ - return snprintf(page, PAGE_SIZE, "%u\n", (u32)(dev->read_bytes >> 20)); + return snprintf(page, PAGE_SIZE, "%lu\n", + atomic_long_read(&dev->read_bytes) >> 20); } DEV_STAT_SCSI_LU_ATTR_RO(read_mbytes); @@ -420,7 +421,8 @@ static ssize_t target_stat_scsi_lu_show_attr_write_mbytes( container_of(sgrps, struct se_device, dev_stat_grps); /* scsiLuWrittenMegaBytes */ - return snprintf(page, PAGE_SIZE, "%u\n", (u32)(dev->write_bytes >> 20)); + return snprintf(page, PAGE_SIZE, "%lu\n", + atomic_long_read(&dev->write_bytes) >> 20); } DEV_STAT_SCSI_LU_ATTR_RO(write_mbytes); @@ -431,7 +433,7 @@ static ssize_t target_stat_scsi_lu_show_attr_resets( container_of(sgrps, struct se_device, dev_stat_grps); /* scsiLuInResets */ - return snprintf(page, PAGE_SIZE, "%u\n", dev->num_resets); + return snprintf(page, PAGE_SIZE, "%lu\n", atomic_long_read(&dev->num_resets)); } DEV_STAT_SCSI_LU_ATTR_RO(resets); diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c index 2500099..70c638f 100644 --- a/drivers/target/target_core_tmr.c +++ b/drivers/target/target_core_tmr.c @@ -386,9 +386,7 @@ int core_tmr_lun_reset( pr_debug("LUN_RESET: SCSI-2 Released reservation\n"); } - spin_lock_irq(&dev->stats_lock); - dev->num_resets++; - spin_unlock_irq(&dev->stats_lock); + atomic_long_inc(&dev->num_resets); pr_debug("LUN_RESET: %s for [%s] Complete\n", (preempt_and_abort_list) ? "Preempt" : "TMR", diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c index b9a6ec0..f697f8b 100644 --- a/drivers/target/target_core_tpg.c +++ b/drivers/target/target_core_tpg.c @@ -116,6 +116,7 @@ struct se_node_acl *core_tpg_get_initiator_node_acl( return acl; } +EXPORT_SYMBOL(core_tpg_get_initiator_node_acl); /* core_tpg_add_node_to_devs(): * @@ -633,6 +634,13 @@ int core_tpg_set_initiator_node_tag( } EXPORT_SYMBOL(core_tpg_set_initiator_node_tag); +static void core_tpg_lun_ref_release(struct percpu_ref *ref) +{ + struct se_lun *lun = container_of(ref, struct se_lun, lun_ref); + + complete(&lun->lun_ref_comp); +} + static int core_tpg_setup_virtual_lun0(struct se_portal_group *se_tpg) { /* Set in core_dev_setup_virtual_lun0() */ @@ -646,15 +654,20 @@ static int core_tpg_setup_virtual_lun0(struct se_portal_group *se_tpg) atomic_set(&lun->lun_acl_count, 0); init_completion(&lun->lun_shutdown_comp); INIT_LIST_HEAD(&lun->lun_acl_list); - INIT_LIST_HEAD(&lun->lun_cmd_list); spin_lock_init(&lun->lun_acl_lock); - spin_lock_init(&lun->lun_cmd_lock); spin_lock_init(&lun->lun_sep_lock); + init_completion(&lun->lun_ref_comp); - ret = core_tpg_post_addlun(se_tpg, lun, lun_access, dev); + ret = percpu_ref_init(&lun->lun_ref, core_tpg_lun_ref_release); if (ret < 0) return ret; + ret = core_tpg_post_addlun(se_tpg, lun, lun_access, dev); + if (ret < 0) { + percpu_ref_cancel_init(&lun->lun_ref); + return ret; + } + return 0; } @@ -691,10 +704,9 @@ int core_tpg_register( atomic_set(&lun->lun_acl_count, 0); init_completion(&lun->lun_shutdown_comp); INIT_LIST_HEAD(&lun->lun_acl_list); - INIT_LIST_HEAD(&lun->lun_cmd_list); spin_lock_init(&lun->lun_acl_lock); - spin_lock_init(&lun->lun_cmd_lock); spin_lock_init(&lun->lun_sep_lock); + init_completion(&lun->lun_ref_comp); } se_tpg->se_tpg_type = se_tpg_type; @@ -815,10 +827,16 @@ int core_tpg_post_addlun( { int ret; - ret = core_dev_export(lun_ptr, tpg, lun); + ret = percpu_ref_init(&lun->lun_ref, core_tpg_lun_ref_release); if (ret < 0) return ret; + ret = core_dev_export(lun_ptr, tpg, lun); + if (ret < 0) { + percpu_ref_cancel_init(&lun->lun_ref); + return ret; + } + spin_lock(&tpg->tpg_lun_lock); lun->lun_access = lun_access; lun->lun_status = TRANSPORT_LUN_STATUS_ACTIVE; @@ -827,14 +845,6 @@ int core_tpg_post_addlun( return 0; } -static void core_tpg_shutdown_lun( - struct se_portal_group *tpg, - struct se_lun *lun) -{ - core_clear_lun_from_tpg(lun, tpg); - transport_clear_lun_from_sessions(lun); -} - struct se_lun *core_tpg_pre_dellun( struct se_portal_group *tpg, u32 unpacked_lun) @@ -869,7 +879,8 @@ int core_tpg_post_dellun( struct se_portal_group *tpg, struct se_lun *lun) { - core_tpg_shutdown_lun(tpg, lun); + core_clear_lun_from_tpg(lun, tpg); + transport_clear_lun_ref(lun); core_dev_unexport(lun->lun_se_dev, tpg, lun); diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 81e945e..91953da 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -28,7 +28,6 @@ #include <linux/string.h> #include <linux/timer.h> #include <linux/slab.h> -#include <linux/blkdev.h> #include <linux/spinlock.h> #include <linux/kthread.h> #include <linux/in.h> @@ -473,7 +472,7 @@ void transport_deregister_session(struct se_session *se_sess) pr_debug("TARGET_CORE[%s]: Deregistered fabric_sess\n", se_tpg->se_tpg_tfo->get_fabric_name()); /* - * If last kref is dropping now for an explict NodeACL, awake sleeping + * If last kref is dropping now for an explicit NodeACL, awake sleeping * ->acl_free_comp caller to wakeup configfs se_node_acl->acl_group * removal context. */ @@ -515,23 +514,6 @@ static int transport_cmd_check_stop(struct se_cmd *cmd, bool remove_from_lists, if (write_pending) cmd->t_state = TRANSPORT_WRITE_PENDING; - /* - * Determine if IOCTL context caller in requesting the stopping of this - * command for LUN shutdown purposes. - */ - if (cmd->transport_state & CMD_T_LUN_STOP) { - pr_debug("%s:%d CMD_T_LUN_STOP for ITT: 0x%08x\n", - __func__, __LINE__, cmd->se_tfo->get_task_tag(cmd)); - - cmd->transport_state &= ~CMD_T_ACTIVE; - if (remove_from_lists) - target_remove_from_state_list(cmd); - spin_unlock_irqrestore(&cmd->t_state_lock, flags); - - complete(&cmd->transport_lun_stop_comp); - return 1; - } - if (remove_from_lists) { target_remove_from_state_list(cmd); @@ -585,15 +567,11 @@ static int transport_cmd_check_stop_to_fabric(struct se_cmd *cmd) static void transport_lun_remove_cmd(struct se_cmd *cmd) { struct se_lun *lun = cmd->se_lun; - unsigned long flags; - if (!lun) + if (!lun || !cmd->lun_ref_active) return; - spin_lock_irqsave(&lun->lun_cmd_lock, flags); - if (!list_empty(&cmd->se_lun_node)) - list_del_init(&cmd->se_lun_node); - spin_unlock_irqrestore(&lun->lun_cmd_lock, flags); + percpu_ref_put(&lun->lun_ref); } void transport_cmd_finish_abort(struct se_cmd *cmd, int remove) @@ -668,7 +646,7 @@ void target_complete_cmd(struct se_cmd *cmd, u8 scsi_status) cmd->transport_state |= CMD_T_FAILED; /* - * Check for case where an explict ABORT_TASK has been received + * Check for case where an explicit ABORT_TASK has been received * and transport_wait_for_tasks() will be waiting for completion.. */ if (cmd->transport_state & CMD_T_ABORTED && @@ -1092,13 +1070,10 @@ void transport_init_se_cmd( int task_attr, unsigned char *sense_buffer) { - INIT_LIST_HEAD(&cmd->se_lun_node); INIT_LIST_HEAD(&cmd->se_delayed_node); INIT_LIST_HEAD(&cmd->se_qf_node); INIT_LIST_HEAD(&cmd->se_cmd_list); INIT_LIST_HEAD(&cmd->state_list); - init_completion(&cmd->transport_lun_fe_stop_comp); - init_completion(&cmd->transport_lun_stop_comp); init_completion(&cmd->t_transport_stop_comp); init_completion(&cmd->cmd_wait_comp); init_completion(&cmd->task_stop_comp); @@ -1719,29 +1694,14 @@ void target_execute_cmd(struct se_cmd *cmd) /* * If the received CDB has aleady been aborted stop processing it here. */ - if (transport_check_aborted_status(cmd, 1)) { - complete(&cmd->transport_lun_stop_comp); + if (transport_check_aborted_status(cmd, 1)) return; - } /* - * Determine if IOCTL context caller in requesting the stopping of this - * command for LUN shutdown purposes. - */ - spin_lock_irq(&cmd->t_state_lock); - if (cmd->transport_state & CMD_T_LUN_STOP) { - pr_debug("%s:%d CMD_T_LUN_STOP for ITT: 0x%08x\n", - __func__, __LINE__, cmd->se_tfo->get_task_tag(cmd)); - - cmd->transport_state &= ~CMD_T_ACTIVE; - spin_unlock_irq(&cmd->t_state_lock); - complete(&cmd->transport_lun_stop_comp); - return; - } - /* * Determine if frontend context caller is requesting the stopping of * this command for frontend exceptions. */ + spin_lock_irq(&cmd->t_state_lock); if (cmd->transport_state & CMD_T_STOP) { pr_debug("%s:%d CMD_T_STOP for ITT: 0x%08x\n", __func__, __LINE__, @@ -2404,164 +2364,23 @@ void target_wait_for_sess_cmds(struct se_session *se_sess) } EXPORT_SYMBOL(target_wait_for_sess_cmds); -/* transport_lun_wait_for_tasks(): - * - * Called from ConfigFS context to stop the passed struct se_cmd to allow - * an struct se_lun to be successfully shutdown. - */ -static int transport_lun_wait_for_tasks(struct se_cmd *cmd, struct se_lun *lun) -{ - unsigned long flags; - int ret = 0; - - /* - * If the frontend has already requested this struct se_cmd to - * be stopped, we can safely ignore this struct se_cmd. - */ - spin_lock_irqsave(&cmd->t_state_lock, flags); - if (cmd->transport_state & CMD_T_STOP) { - cmd->transport_state &= ~CMD_T_LUN_STOP; - - pr_debug("ConfigFS ITT[0x%08x] - CMD_T_STOP, skipping\n", - cmd->se_tfo->get_task_tag(cmd)); - spin_unlock_irqrestore(&cmd->t_state_lock, flags); - transport_cmd_check_stop(cmd, false, false); - return -EPERM; - } - cmd->transport_state |= CMD_T_LUN_FE_STOP; - spin_unlock_irqrestore(&cmd->t_state_lock, flags); - - // XXX: audit task_flags checks. - spin_lock_irqsave(&cmd->t_state_lock, flags); - if ((cmd->transport_state & CMD_T_BUSY) && - (cmd->transport_state & CMD_T_SENT)) { - if (!target_stop_cmd(cmd, &flags)) - ret++; - } - spin_unlock_irqrestore(&cmd->t_state_lock, flags); - - pr_debug("ConfigFS: cmd: %p stop tasks ret:" - " %d\n", cmd, ret); - if (!ret) { - pr_debug("ConfigFS: ITT[0x%08x] - stopping cmd....\n", - cmd->se_tfo->get_task_tag(cmd)); - wait_for_completion(&cmd->transport_lun_stop_comp); - pr_debug("ConfigFS: ITT[0x%08x] - stopped cmd....\n", - cmd->se_tfo->get_task_tag(cmd)); - } - - return 0; -} - -static void __transport_clear_lun_from_sessions(struct se_lun *lun) -{ - struct se_cmd *cmd = NULL; - unsigned long lun_flags, cmd_flags; - /* - * Do exception processing and return CHECK_CONDITION status to the - * Initiator Port. - */ - spin_lock_irqsave(&lun->lun_cmd_lock, lun_flags); - while (!list_empty(&lun->lun_cmd_list)) { - cmd = list_first_entry(&lun->lun_cmd_list, - struct se_cmd, se_lun_node); - list_del_init(&cmd->se_lun_node); - - spin_lock(&cmd->t_state_lock); - pr_debug("SE_LUN[%d] - Setting cmd->transport" - "_lun_stop for ITT: 0x%08x\n", - cmd->se_lun->unpacked_lun, - cmd->se_tfo->get_task_tag(cmd)); - cmd->transport_state |= CMD_T_LUN_STOP; - spin_unlock(&cmd->t_state_lock); - - spin_unlock_irqrestore(&lun->lun_cmd_lock, lun_flags); - - if (!cmd->se_lun) { - pr_err("ITT: 0x%08x, [i,t]_state: %u/%u\n", - cmd->se_tfo->get_task_tag(cmd), - cmd->se_tfo->get_cmd_state(cmd), cmd->t_state); - BUG(); - } - /* - * If the Storage engine still owns the iscsi_cmd_t, determine - * and/or stop its context. - */ - pr_debug("SE_LUN[%d] - ITT: 0x%08x before transport" - "_lun_wait_for_tasks()\n", cmd->se_lun->unpacked_lun, - cmd->se_tfo->get_task_tag(cmd)); - - if (transport_lun_wait_for_tasks(cmd, cmd->se_lun) < 0) { - spin_lock_irqsave(&lun->lun_cmd_lock, lun_flags); - continue; - } - - pr_debug("SE_LUN[%d] - ITT: 0x%08x after transport_lun" - "_wait_for_tasks(): SUCCESS\n", - cmd->se_lun->unpacked_lun, - cmd->se_tfo->get_task_tag(cmd)); - - spin_lock_irqsave(&cmd->t_state_lock, cmd_flags); - if (!(cmd->transport_state & CMD_T_DEV_ACTIVE)) { - spin_unlock_irqrestore(&cmd->t_state_lock, cmd_flags); - goto check_cond; - } - cmd->transport_state &= ~CMD_T_DEV_ACTIVE; - target_remove_from_state_list(cmd); - spin_unlock_irqrestore(&cmd->t_state_lock, cmd_flags); - - /* - * The Storage engine stopped this struct se_cmd before it was - * send to the fabric frontend for delivery back to the - * Initiator Node. Return this SCSI CDB back with an - * CHECK_CONDITION status. - */ -check_cond: - transport_send_check_condition_and_sense(cmd, - TCM_NON_EXISTENT_LUN, 0); - /* - * If the fabric frontend is waiting for this iscsi_cmd_t to - * be released, notify the waiting thread now that LU has - * finished accessing it. - */ - spin_lock_irqsave(&cmd->t_state_lock, cmd_flags); - if (cmd->transport_state & CMD_T_LUN_FE_STOP) { - pr_debug("SE_LUN[%d] - Detected FE stop for" - " struct se_cmd: %p ITT: 0x%08x\n", - lun->unpacked_lun, - cmd, cmd->se_tfo->get_task_tag(cmd)); - - spin_unlock_irqrestore(&cmd->t_state_lock, - cmd_flags); - transport_cmd_check_stop(cmd, false, false); - complete(&cmd->transport_lun_fe_stop_comp); - spin_lock_irqsave(&lun->lun_cmd_lock, lun_flags); - continue; - } - pr_debug("SE_LUN[%d] - ITT: 0x%08x finished processing\n", - lun->unpacked_lun, cmd->se_tfo->get_task_tag(cmd)); - - spin_unlock_irqrestore(&cmd->t_state_lock, cmd_flags); - spin_lock_irqsave(&lun->lun_cmd_lock, lun_flags); - } - spin_unlock_irqrestore(&lun->lun_cmd_lock, lun_flags); -} - -static int transport_clear_lun_thread(void *p) +static int transport_clear_lun_ref_thread(void *p) { struct se_lun *lun = p; - __transport_clear_lun_from_sessions(lun); + percpu_ref_kill(&lun->lun_ref); + + wait_for_completion(&lun->lun_ref_comp); complete(&lun->lun_shutdown_comp); return 0; } -int transport_clear_lun_from_sessions(struct se_lun *lun) +int transport_clear_lun_ref(struct se_lun *lun) { struct task_struct *kt; - kt = kthread_run(transport_clear_lun_thread, lun, + kt = kthread_run(transport_clear_lun_ref_thread, lun, "tcm_cl_%u", lun->unpacked_lun); if (IS_ERR(kt)) { pr_err("Unable to start clear_lun thread\n"); @@ -2595,43 +2414,6 @@ bool transport_wait_for_tasks(struct se_cmd *cmd) spin_unlock_irqrestore(&cmd->t_state_lock, flags); return false; } - /* - * If we are already stopped due to an external event (ie: LUN shutdown) - * sleep until the connection can have the passed struct se_cmd back. - * The cmd->transport_lun_stopped_sem will be upped by - * transport_clear_lun_from_sessions() once the ConfigFS context caller - * has completed its operation on the struct se_cmd. - */ - if (cmd->transport_state & CMD_T_LUN_STOP) { - pr_debug("wait_for_tasks: Stopping" - " wait_for_completion(&cmd->t_tasktransport_lun_fe" - "_stop_comp); for ITT: 0x%08x\n", - cmd->se_tfo->get_task_tag(cmd)); - /* - * There is a special case for WRITES where a FE exception + - * LUN shutdown means ConfigFS context is still sleeping on - * transport_lun_stop_comp in transport_lun_wait_for_tasks(). - * We go ahead and up transport_lun_stop_comp just to be sure - * here. - */ - spin_unlock_irqrestore(&cmd->t_state_lock, flags); - complete(&cmd->transport_lun_stop_comp); - wait_for_completion(&cmd->transport_lun_fe_stop_comp); - spin_lock_irqsave(&cmd->t_state_lock, flags); - - target_remove_from_state_list(cmd); - /* - * At this point, the frontend who was the originator of this - * struct se_cmd, now owns the structure and can be released through - * normal means below. - */ - pr_debug("wait_for_tasks: Stopped" - " wait_for_completion(&cmd->t_tasktransport_lun_fe_" - "stop_comp); for ITT: 0x%08x\n", - cmd->se_tfo->get_task_tag(cmd)); - - cmd->transport_state &= ~CMD_T_LUN_STOP; - } if (!(cmd->transport_state & CMD_T_ACTIVE)) { spin_unlock_irqrestore(&cmd->t_state_lock, flags); @@ -2910,6 +2692,7 @@ int transport_check_aborted_status(struct se_cmd *cmd, int send_status) cmd->t_task_cdb[0], cmd->se_tfo->get_task_tag(cmd)); cmd->se_cmd_flags |= SCF_SENT_DELAYED_TAS; + cmd->scsi_status = SAM_STAT_TASK_ABORTED; trace_target_cmd_complete(cmd); cmd->se_tfo->queue_status(cmd); @@ -2938,6 +2721,7 @@ void transport_send_task_abort(struct se_cmd *cmd) if (cmd->se_tfo->write_pending_status(cmd) != 0) { cmd->transport_state |= CMD_T_ABORTED; smp_mb__after_atomic_inc(); + return; } } cmd->scsi_status = SAM_STAT_TASK_ABORTED; diff --git a/drivers/target/target_core_ua.h b/drivers/target/target_core_ua.h index 0204952..be912b3 100644 --- a/drivers/target/target_core_ua.h +++ b/drivers/target/target_core_ua.h @@ -19,7 +19,7 @@ #define ASCQ_2AH_RESERVATIONS_RELEASED 0x04 #define ASCQ_2AH_REGISTRATIONS_PREEMPTED 0x05 #define ASCQ_2AH_ASYMMETRIC_ACCESS_STATE_CHANGED 0x06 -#define ASCQ_2AH_IMPLICT_ASYMMETRIC_ACCESS_STATE_TRANSITION_FAILED 0x07 +#define ASCQ_2AH_IMPLICIT_ASYMMETRIC_ACCESS_STATE_TRANSITION_FAILED 0x07 #define ASCQ_2AH_PRIORITY_CHANGED 0x08 #define ASCQ_2CH_PREVIOUS_RESERVATION_CONFLICT_STATUS 0x09 diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c index 474cd44..6b88a99 100644 --- a/drivers/target/target_core_xcopy.c +++ b/drivers/target/target_core_xcopy.c @@ -405,9 +405,6 @@ static void xcopy_pt_release_cmd(struct se_cmd *se_cmd) struct xcopy_pt_cmd *xpt_cmd = container_of(se_cmd, struct xcopy_pt_cmd, se_cmd); - if (xpt_cmd->remote_port) - kfree(se_cmd->se_lun); - kfree(xpt_cmd); } @@ -572,22 +569,10 @@ static int target_xcopy_init_pt_lun( return 0; } - pt_cmd->se_lun = kzalloc(sizeof(struct se_lun), GFP_KERNEL); - if (!pt_cmd->se_lun) { - pr_err("Unable to allocate pt_cmd->se_lun\n"); - return -ENOMEM; - } - init_completion(&pt_cmd->se_lun->lun_shutdown_comp); - INIT_LIST_HEAD(&pt_cmd->se_lun->lun_cmd_list); - INIT_LIST_HEAD(&pt_cmd->se_lun->lun_acl_list); - spin_lock_init(&pt_cmd->se_lun->lun_acl_lock); - spin_lock_init(&pt_cmd->se_lun->lun_cmd_lock); - spin_lock_init(&pt_cmd->se_lun->lun_sep_lock); - + pt_cmd->se_lun = &se_dev->xcopy_lun; pt_cmd->se_dev = se_dev; pr_debug("Setup emulated se_dev: %p from se_dev\n", pt_cmd->se_dev); - pt_cmd->se_lun->lun_se_dev = se_dev; pt_cmd->se_cmd_flags |= SCF_SE_LUN_CMD | SCF_CMD_XCOPY_PASSTHROUGH; pr_debug("Setup emulated se_dev: %p to pt_cmd->se_lun->lun_se_dev\n", @@ -658,8 +643,6 @@ static int target_xcopy_setup_pt_cmd( return 0; out: - if (remote_port == true) - kfree(cmd->se_lun); return ret; } diff --git a/drivers/target/tcm_fc/tcm_fc.h b/drivers/target/tcm_fc/tcm_fc.h index 0dd54a4..752863a 100644 --- a/drivers/target/tcm_fc/tcm_fc.h +++ b/drivers/target/tcm_fc/tcm_fc.h @@ -22,6 +22,7 @@ #define FT_NAMELEN 32 /* length of ASCII WWPNs including pad */ #define FT_TPG_NAMELEN 32 /* max length of TPG name */ #define FT_LUN_NAMELEN 32 /* max length of LUN name */ +#define TCM_FC_DEFAULT_TAGS 512 /* tags used for per-session preallocation */ struct ft_transport_id { __u8 format; diff --git a/drivers/target/tcm_fc/tfc_cmd.c b/drivers/target/tcm_fc/tfc_cmd.c index 0e5a1cae..479ec56 100644 --- a/drivers/target/tcm_fc/tfc_cmd.c +++ b/drivers/target/tcm_fc/tfc_cmd.c @@ -28,6 +28,7 @@ #include <linux/configfs.h> #include <linux/ctype.h> #include <linux/hash.h> +#include <linux/percpu_ida.h> #include <asm/unaligned.h> #include <scsi/scsi.h> #include <scsi/scsi_host.h> @@ -89,16 +90,18 @@ static void ft_free_cmd(struct ft_cmd *cmd) { struct fc_frame *fp; struct fc_lport *lport; + struct se_session *se_sess; if (!cmd) return; + se_sess = cmd->sess->se_sess; fp = cmd->req_frame; lport = fr_dev(fp); if (fr_seq(fp)) lport->tt.seq_release(fr_seq(fp)); fc_frame_free(fp); + percpu_ida_free(&se_sess->sess_tag_pool, cmd->se_cmd.map_tag); ft_sess_put(cmd->sess); /* undo get from lookup at recv */ - kfree(cmd); } void ft_release_cmd(struct se_cmd *se_cmd) @@ -432,14 +435,21 @@ static void ft_recv_cmd(struct ft_sess *sess, struct fc_frame *fp) { struct ft_cmd *cmd; struct fc_lport *lport = sess->tport->lport; + struct se_session *se_sess = sess->se_sess; + int tag; - cmd = kzalloc(sizeof(*cmd), GFP_ATOMIC); - if (!cmd) + tag = percpu_ida_alloc(&se_sess->sess_tag_pool, GFP_ATOMIC); + if (tag < 0) goto busy; + + cmd = &((struct ft_cmd *)se_sess->sess_cmd_map)[tag]; + memset(cmd, 0, sizeof(struct ft_cmd)); + + cmd->se_cmd.map_tag = tag; cmd->sess = sess; cmd->seq = lport->tt.seq_assign(lport, fp); if (!cmd->seq) { - kfree(cmd); + percpu_ida_free(&se_sess->sess_tag_pool, tag); goto busy; } cmd->req_frame = fp; /* hold frame during cmd */ diff --git a/drivers/target/tcm_fc/tfc_conf.c b/drivers/target/tcm_fc/tfc_conf.c index 4e00508..c6932fb 100644 --- a/drivers/target/tcm_fc/tfc_conf.c +++ b/drivers/target/tcm_fc/tfc_conf.c @@ -571,16 +571,16 @@ int ft_register_configfs(void) /* * Setup default attribute lists for various fabric->tf_cit_tmpl */ - TF_CIT_TMPL(fabric)->tfc_wwn_cit.ct_attrs = ft_wwn_attrs; - TF_CIT_TMPL(fabric)->tfc_tpg_base_cit.ct_attrs = NULL; - TF_CIT_TMPL(fabric)->tfc_tpg_attrib_cit.ct_attrs = NULL; - TF_CIT_TMPL(fabric)->tfc_tpg_param_cit.ct_attrs = NULL; - TF_CIT_TMPL(fabric)->tfc_tpg_np_base_cit.ct_attrs = NULL; - TF_CIT_TMPL(fabric)->tfc_tpg_nacl_base_cit.ct_attrs = + fabric->tf_cit_tmpl.tfc_wwn_cit.ct_attrs = ft_wwn_attrs; + fabric->tf_cit_tmpl.tfc_tpg_base_cit.ct_attrs = NULL; + fabric->tf_cit_tmpl.tfc_tpg_attrib_cit.ct_attrs = NULL; + fabric->tf_cit_tmpl.tfc_tpg_param_cit.ct_attrs = NULL; + fabric->tf_cit_tmpl.tfc_tpg_np_base_cit.ct_attrs = NULL; + fabric->tf_cit_tmpl.tfc_tpg_nacl_base_cit.ct_attrs = ft_nacl_base_attrs; - TF_CIT_TMPL(fabric)->tfc_tpg_nacl_attrib_cit.ct_attrs = NULL; - TF_CIT_TMPL(fabric)->tfc_tpg_nacl_auth_cit.ct_attrs = NULL; - TF_CIT_TMPL(fabric)->tfc_tpg_nacl_param_cit.ct_attrs = NULL; + fabric->tf_cit_tmpl.tfc_tpg_nacl_attrib_cit.ct_attrs = NULL; + fabric->tf_cit_tmpl.tfc_tpg_nacl_auth_cit.ct_attrs = NULL; + fabric->tf_cit_tmpl.tfc_tpg_nacl_param_cit.ct_attrs = NULL; /* * register the fabric for use within TCM */ diff --git a/drivers/target/tcm_fc/tfc_sess.c b/drivers/target/tcm_fc/tfc_sess.c index 4859505..ae52c08 100644 --- a/drivers/target/tcm_fc/tfc_sess.c +++ b/drivers/target/tcm_fc/tfc_sess.c @@ -210,7 +210,8 @@ static struct ft_sess *ft_sess_create(struct ft_tport *tport, u32 port_id, if (!sess) return NULL; - sess->se_sess = transport_init_session(); + sess->se_sess = transport_init_session_tags(TCM_FC_DEFAULT_TAGS, + sizeof(struct ft_cmd)); if (IS_ERR(sess->se_sess)) { kfree(sess); return NULL; diff --git a/drivers/usb/gadget/tcm_usb_gadget.c b/drivers/usb/gadget/tcm_usb_gadget.c index eccea1d..6c3d795 100644 --- a/drivers/usb/gadget/tcm_usb_gadget.c +++ b/drivers/usb/gadget/tcm_usb_gadget.c @@ -1923,15 +1923,15 @@ static int usbg_register_configfs(void) } fabric->tf_ops = usbg_ops; - TF_CIT_TMPL(fabric)->tfc_wwn_cit.ct_attrs = usbg_wwn_attrs; - TF_CIT_TMPL(fabric)->tfc_tpg_base_cit.ct_attrs = usbg_base_attrs; - TF_CIT_TMPL(fabric)->tfc_tpg_attrib_cit.ct_attrs = NULL; - TF_CIT_TMPL(fabric)->tfc_tpg_param_cit.ct_attrs = NULL; - TF_CIT_TMPL(fabric)->tfc_tpg_np_base_cit.ct_attrs = NULL; - TF_CIT_TMPL(fabric)->tfc_tpg_nacl_base_cit.ct_attrs = NULL; - TF_CIT_TMPL(fabric)->tfc_tpg_nacl_attrib_cit.ct_attrs = NULL; - TF_CIT_TMPL(fabric)->tfc_tpg_nacl_auth_cit.ct_attrs = NULL; - TF_CIT_TMPL(fabric)->tfc_tpg_nacl_param_cit.ct_attrs = NULL; + fabric->tf_cit_tmpl.tfc_wwn_cit.ct_attrs = usbg_wwn_attrs; + fabric->tf_cit_tmpl.tfc_tpg_base_cit.ct_attrs = usbg_base_attrs; + fabric->tf_cit_tmpl.tfc_tpg_attrib_cit.ct_attrs = NULL; + fabric->tf_cit_tmpl.tfc_tpg_param_cit.ct_attrs = NULL; + fabric->tf_cit_tmpl.tfc_tpg_np_base_cit.ct_attrs = NULL; + fabric->tf_cit_tmpl.tfc_tpg_nacl_base_cit.ct_attrs = NULL; + fabric->tf_cit_tmpl.tfc_tpg_nacl_attrib_cit.ct_attrs = NULL; + fabric->tf_cit_tmpl.tfc_tpg_nacl_auth_cit.ct_attrs = NULL; + fabric->tf_cit_tmpl.tfc_tpg_nacl_param_cit.ct_attrs = NULL; ret = target_fabric_configfs_register(fabric); if (ret < 0) { printk(KERN_ERR "target_fabric_configfs_register() failed" diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index e663921..f175629 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -2168,15 +2168,15 @@ static int tcm_vhost_register_configfs(void) /* * Setup default attribute lists for various fabric->tf_cit_tmpl */ - TF_CIT_TMPL(fabric)->tfc_wwn_cit.ct_attrs = tcm_vhost_wwn_attrs; - TF_CIT_TMPL(fabric)->tfc_tpg_base_cit.ct_attrs = tcm_vhost_tpg_attrs; - TF_CIT_TMPL(fabric)->tfc_tpg_attrib_cit.ct_attrs = NULL; - TF_CIT_TMPL(fabric)->tfc_tpg_param_cit.ct_attrs = NULL; - TF_CIT_TMPL(fabric)->tfc_tpg_np_base_cit.ct_attrs = NULL; - TF_CIT_TMPL(fabric)->tfc_tpg_nacl_base_cit.ct_attrs = NULL; - TF_CIT_TMPL(fabric)->tfc_tpg_nacl_attrib_cit.ct_attrs = NULL; - TF_CIT_TMPL(fabric)->tfc_tpg_nacl_auth_cit.ct_attrs = NULL; - TF_CIT_TMPL(fabric)->tfc_tpg_nacl_param_cit.ct_attrs = NULL; + fabric->tf_cit_tmpl.tfc_wwn_cit.ct_attrs = tcm_vhost_wwn_attrs; + fabric->tf_cit_tmpl.tfc_tpg_base_cit.ct_attrs = tcm_vhost_tpg_attrs; + fabric->tf_cit_tmpl.tfc_tpg_attrib_cit.ct_attrs = NULL; + fabric->tf_cit_tmpl.tfc_tpg_param_cit.ct_attrs = NULL; + fabric->tf_cit_tmpl.tfc_tpg_np_base_cit.ct_attrs = NULL; + fabric->tf_cit_tmpl.tfc_tpg_nacl_base_cit.ct_attrs = NULL; + fabric->tf_cit_tmpl.tfc_tpg_nacl_attrib_cit.ct_attrs = NULL; + fabric->tf_cit_tmpl.tfc_tpg_nacl_auth_cit.ct_attrs = NULL; + fabric->tf_cit_tmpl.tfc_tpg_nacl_param_cit.ct_attrs = NULL; /* * Register the fabric for use within TCM */ @@ -80,6 +80,8 @@ struct kioctx { struct percpu_ref users; atomic_t dead; + struct percpu_ref reqs; + unsigned long user_id; struct __percpu kioctx_cpu *cpu; @@ -107,7 +109,6 @@ struct kioctx { struct page **ring_pages; long nr_pages; - struct rcu_head rcu_head; struct work_struct free_work; struct { @@ -250,8 +251,10 @@ static void aio_free_ring(struct kioctx *ctx) put_aio_ring_file(ctx); - if (ctx->ring_pages && ctx->ring_pages != ctx->internal_pages) + if (ctx->ring_pages && ctx->ring_pages != ctx->internal_pages) { kfree(ctx->ring_pages); + ctx->ring_pages = NULL; + } } static int aio_ring_mmap(struct file *file, struct vm_area_struct *vma) @@ -463,26 +466,34 @@ static int kiocb_cancel(struct kioctx *ctx, struct kiocb *kiocb) return cancel(kiocb); } -static void free_ioctx_rcu(struct rcu_head *head) +static void free_ioctx(struct work_struct *work) { - struct kioctx *ctx = container_of(head, struct kioctx, rcu_head); + struct kioctx *ctx = container_of(work, struct kioctx, free_work); + pr_debug("freeing %p\n", ctx); + + aio_free_ring(ctx); free_percpu(ctx->cpu); kmem_cache_free(kioctx_cachep, ctx); } +static void free_ioctx_reqs(struct percpu_ref *ref) +{ + struct kioctx *ctx = container_of(ref, struct kioctx, reqs); + + INIT_WORK(&ctx->free_work, free_ioctx); + schedule_work(&ctx->free_work); +} + /* * When this function runs, the kioctx has been removed from the "hash table" * and ctx->users has dropped to 0, so we know no more kiocbs can be submitted - * now it's safe to cancel any that need to be. */ -static void free_ioctx(struct work_struct *work) +static void free_ioctx_users(struct percpu_ref *ref) { - struct kioctx *ctx = container_of(work, struct kioctx, free_work); - struct aio_ring *ring; + struct kioctx *ctx = container_of(ref, struct kioctx, users); struct kiocb *req; - unsigned cpu, avail; - DEFINE_WAIT(wait); spin_lock_irq(&ctx->ctx_lock); @@ -496,54 +507,8 @@ static void free_ioctx(struct work_struct *work) spin_unlock_irq(&ctx->ctx_lock); - for_each_possible_cpu(cpu) { - struct kioctx_cpu *kcpu = per_cpu_ptr(ctx->cpu, cpu); - - atomic_add(kcpu->reqs_available, &ctx->reqs_available); - kcpu->reqs_available = 0; - } - - while (1) { - prepare_to_wait(&ctx->wait, &wait, TASK_UNINTERRUPTIBLE); - - ring = kmap_atomic(ctx->ring_pages[0]); - avail = (ring->head <= ring->tail) - ? ring->tail - ring->head - : ctx->nr_events - ring->head + ring->tail; - - atomic_add(avail, &ctx->reqs_available); - ring->head = ring->tail; - kunmap_atomic(ring); - - if (atomic_read(&ctx->reqs_available) >= ctx->nr_events - 1) - break; - - schedule(); - } - finish_wait(&ctx->wait, &wait); - - WARN_ON(atomic_read(&ctx->reqs_available) > ctx->nr_events - 1); - - aio_free_ring(ctx); - - pr_debug("freeing %p\n", ctx); - - /* - * Here the call_rcu() is between the wait_event() for reqs_active to - * hit 0, and freeing the ioctx. - * - * aio_complete() decrements reqs_active, but it has to touch the ioctx - * after to issue a wakeup so we use rcu. - */ - call_rcu(&ctx->rcu_head, free_ioctx_rcu); -} - -static void free_ioctx_ref(struct percpu_ref *ref) -{ - struct kioctx *ctx = container_of(ref, struct kioctx, users); - - INIT_WORK(&ctx->free_work, free_ioctx); - schedule_work(&ctx->free_work); + percpu_ref_kill(&ctx->reqs); + percpu_ref_put(&ctx->reqs); } static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm) @@ -602,6 +567,16 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm) } } +static void aio_nr_sub(unsigned nr) +{ + spin_lock(&aio_nr_lock); + if (WARN_ON(aio_nr - nr > aio_nr)) + aio_nr = 0; + else + aio_nr -= nr; + spin_unlock(&aio_nr_lock); +} + /* ioctx_alloc * Allocates and initializes an ioctx. Returns an ERR_PTR if it failed. */ @@ -639,8 +614,11 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) ctx->max_reqs = nr_events; - if (percpu_ref_init(&ctx->users, free_ioctx_ref)) - goto out_freectx; + if (percpu_ref_init(&ctx->users, free_ioctx_users)) + goto err; + + if (percpu_ref_init(&ctx->reqs, free_ioctx_reqs)) + goto err; spin_lock_init(&ctx->ctx_lock); spin_lock_init(&ctx->completion_lock); @@ -651,10 +629,10 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) ctx->cpu = alloc_percpu(struct kioctx_cpu); if (!ctx->cpu) - goto out_freeref; + goto err; if (aio_setup_ring(ctx) < 0) - goto out_freepcpu; + goto err; atomic_set(&ctx->reqs_available, ctx->nr_events - 1); ctx->req_batch = (ctx->nr_events - 1) / (num_possible_cpus() * 4); @@ -666,7 +644,8 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) if (aio_nr + nr_events > (aio_max_nr * 2UL) || aio_nr + nr_events < aio_nr) { spin_unlock(&aio_nr_lock); - goto out_cleanup; + err = -EAGAIN; + goto err; } aio_nr += ctx->max_reqs; spin_unlock(&aio_nr_lock); @@ -675,23 +654,18 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) err = ioctx_add_table(ctx, mm); if (err) - goto out_cleanup_put; + goto err_cleanup; pr_debug("allocated ioctx %p[%ld]: mm=%p mask=0x%x\n", ctx, ctx->user_id, mm, ctx->nr_events); return ctx; -out_cleanup_put: - percpu_ref_put(&ctx->users); -out_cleanup: - err = -EAGAIN; - aio_free_ring(ctx); -out_freepcpu: +err_cleanup: + aio_nr_sub(ctx->max_reqs); +err: free_percpu(ctx->cpu); -out_freeref: + free_percpu(ctx->reqs.pcpu_count); free_percpu(ctx->users.pcpu_count); -out_freectx: - put_aio_ring_file(ctx); kmem_cache_free(kioctx_cachep, ctx); pr_debug("error allocating ioctx %d\n", err); return ERR_PTR(err); @@ -726,10 +700,7 @@ static void kill_ioctx(struct mm_struct *mm, struct kioctx *ctx) * -EAGAIN with no ioctxs actually in use (as far as userspace * could tell). */ - spin_lock(&aio_nr_lock); - BUG_ON(aio_nr - ctx->max_reqs > aio_nr); - aio_nr -= ctx->max_reqs; - spin_unlock(&aio_nr_lock); + aio_nr_sub(ctx->max_reqs); if (ctx->mmap_size) vm_munmap(ctx->mmap_base, ctx->mmap_size); @@ -861,6 +832,8 @@ static inline struct kiocb *aio_get_req(struct kioctx *ctx) if (unlikely(!req)) goto out_put; + percpu_ref_get(&ctx->reqs); + req->ki_ctx = ctx; return req; out_put: @@ -930,12 +903,6 @@ void aio_complete(struct kiocb *iocb, long res, long res2) return; } - /* - * Take rcu_read_lock() in case the kioctx is being destroyed, as we - * need to issue a wakeup after incrementing reqs_available. - */ - rcu_read_lock(); - if (iocb->ki_list.next) { unsigned long flags; @@ -1010,7 +977,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2) if (waitqueue_active(&ctx->wait)) wake_up(&ctx->wait); - rcu_read_unlock(); + percpu_ref_put(&ctx->reqs); } EXPORT_SYMBOL(aio_complete); @@ -1421,6 +1388,7 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, return 0; out_put_req: put_reqs_available(ctx, 1); + percpu_ref_put(&ctx->reqs); kiocb_free(req); return ret; } diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig index f9d5094..aa976ec 100644 --- a/fs/btrfs/Kconfig +++ b/fs/btrfs/Kconfig @@ -9,12 +9,17 @@ config BTRFS_FS select XOR_BLOCKS help - Btrfs is a new filesystem with extents, writable snapshotting, - support for multiple devices and many more features. + Btrfs is a general purpose copy-on-write filesystem with extents, + writable snapshotting, support for multiple devices and many more + features focused on fault tolerance, repair and easy administration. - Btrfs is highly experimental, and THE DISK FORMAT IS NOT YET - FINALIZED. You should say N here unless you are interested in - testing Btrfs with non-critical data. + The filesystem disk format is no longer unstable, and it's not + expected to change unless there are strong reasons to do so. If there + is a format change, file systems with a unchanged format will + continue to be mountable and usable by newer kernels. + + For more information, please see the web pages at + http://btrfs.wiki.kernel.org. To compile this file system support as a module, choose M here. The module will be called btrfs. diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 8aec751..c1e0b0c 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -495,6 +495,7 @@ static int __btrfs_start_workers(struct btrfs_workers *workers) spin_lock_irq(&workers->lock); if (workers->stopping) { spin_unlock_irq(&workers->lock); + ret = -EINVAL; goto fail_kthread; } list_add_tail(&worker->worker_list, &workers->idle_list); diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index e0aab44..b50764b 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c @@ -77,6 +77,15 @@ * the integrity of (super)-block write requests, do not * enable the config option BTRFS_FS_CHECK_INTEGRITY to * include and compile the integrity check tool. + * + * Expect millions of lines of information in the kernel log with an + * enabled check_int_print_mask. Therefore set LOG_BUF_SHIFT in the + * kernel config to at least 26 (which is 64MB). Usually the value is + * limited to 21 (which is 2MB) in init/Kconfig. The file needs to be + * changed like this before LOG_BUF_SHIFT can be set to a high value: + * config LOG_BUF_SHIFT + * int "Kernel log buffer size (16 => 64KB, 17 => 128KB)" + * range 12 30 */ #include <linux/sched.h> @@ -124,6 +133,7 @@ #define BTRFSIC_PRINT_MASK_INITIAL_DATABASE 0x00000400 #define BTRFSIC_PRINT_MASK_NUM_COPIES 0x00000800 #define BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS 0x00001000 +#define BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH_VERBOSE 0x00002000 struct btrfsic_dev_state; struct btrfsic_state; @@ -3015,6 +3025,7 @@ void btrfsic_submit_bio(int rw, struct bio *bio) (rw & WRITE) && NULL != bio->bi_io_vec) { unsigned int i; u64 dev_bytenr; + u64 cur_bytenr; int bio_is_patched; char **mapped_datav; @@ -3033,6 +3044,7 @@ void btrfsic_submit_bio(int rw, struct bio *bio) GFP_NOFS); if (!mapped_datav) goto leave; + cur_bytenr = dev_bytenr; for (i = 0; i < bio->bi_vcnt; i++) { BUG_ON(bio->bi_io_vec[i].bv_len != PAGE_CACHE_SIZE); mapped_datav[i] = kmap(bio->bi_io_vec[i].bv_page); @@ -3044,16 +3056,13 @@ void btrfsic_submit_bio(int rw, struct bio *bio) kfree(mapped_datav); goto leave; } - if ((BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH | - BTRFSIC_PRINT_MASK_VERBOSE) == - (dev_state->state->print_mask & - (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH | - BTRFSIC_PRINT_MASK_VERBOSE))) + if (dev_state->state->print_mask & + BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH_VERBOSE) printk(KERN_INFO - "#%u: page=%p, len=%u, offset=%u\n", - i, bio->bi_io_vec[i].bv_page, - bio->bi_io_vec[i].bv_len, + "#%u: bytenr=%llu, len=%u, offset=%u\n", + i, cur_bytenr, bio->bi_io_vec[i].bv_len, bio->bi_io_vec[i].bv_offset); + cur_bytenr += bio->bi_io_vec[i].bv_len; } btrfsic_process_written_block(dev_state, dev_bytenr, mapped_datav, bio->bi_vcnt, diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index f9aeb27..54ab861 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3613,9 +3613,6 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, struct btrfs_ordered_sum *sums); int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, struct bio *bio, u64 file_start, int contig); -int btrfs_csum_truncate(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct btrfs_path *path, - u64 isize); int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, struct list_head *list, int search_commit); /* inode.c */ @@ -3744,9 +3741,6 @@ void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info); int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync); void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, int skip_pinned); -int btrfs_replace_extent_cache(struct inode *inode, struct extent_map *replace, - u64 start, u64 end, int skip_pinned, - int modified); extern const struct file_operations btrfs_file_operations; int __btrfs_drop_extents(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 342f9fd..2cfc3dff 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -366,7 +366,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root, dev_replace->tgtdev = tgt_device; printk_in_rcu(KERN_INFO - "btrfs: dev_replace from %s (devid %llu) to %s) started\n", + "btrfs: dev_replace from %s (devid %llu) to %s started\n", src_device->missing ? "<missing disk>" : rcu_str_deref(src_device->name), src_device->devid, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 4c4ed0b..8072cfa 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3517,7 +3517,6 @@ int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info) int btrfs_commit_super(struct btrfs_root *root) { struct btrfs_trans_handle *trans; - int ret; mutex_lock(&root->fs_info->cleaner_mutex); btrfs_run_delayed_iputs(root); @@ -3531,25 +3530,7 @@ int btrfs_commit_super(struct btrfs_root *root) trans = btrfs_join_transaction(root); if (IS_ERR(trans)) return PTR_ERR(trans); - ret = btrfs_commit_transaction(trans, root); - if (ret) - return ret; - /* run commit again to drop the original snapshot */ - trans = btrfs_join_transaction(root); - if (IS_ERR(trans)) - return PTR_ERR(trans); - ret = btrfs_commit_transaction(trans, root); - if (ret) - return ret; - ret = btrfs_write_and_wait_transaction(NULL, root); - if (ret) { - btrfs_error(root->fs_info, ret, - "Failed to sync btree inode to disk."); - return ret; - } - - ret = write_ctree_super(NULL, root, 0); - return ret; + return btrfs_commit_transaction(trans, root); } int close_ctree(struct btrfs_root *root) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 856bc2b..8e457fc 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1980,6 +1980,7 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start, struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; int ret; + ASSERT(!(fs_info->sb->s_flags & MS_RDONLY)); BUG_ON(!mirror_num); /* we can't repair anything in raid56 yet */ @@ -2036,6 +2037,9 @@ int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb, unsigned long i, num_pages = num_extent_pages(eb->start, eb->len); int ret = 0; + if (root->fs_info->sb->s_flags & MS_RDONLY) + return -EROFS; + for (i = 0; i < num_pages; i++) { struct page *p = extent_buffer_page(eb, i); ret = repair_io_failure(root->fs_info, start, PAGE_CACHE_SIZE, @@ -2057,12 +2061,12 @@ static int clean_io_failure(u64 start, struct page *page) u64 private; u64 private_failure; struct io_failure_record *failrec; - struct btrfs_fs_info *fs_info; + struct inode *inode = page->mapping->host; + struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; struct extent_state *state; int num_copies; int did_repair = 0; int ret; - struct inode *inode = page->mapping->host; private = 0; ret = count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private, @@ -2085,6 +2089,8 @@ static int clean_io_failure(u64 start, struct page *page) did_repair = 1; goto out; } + if (fs_info->sb->s_flags & MS_RDONLY) + goto out; spin_lock(&BTRFS_I(inode)->io_tree.lock); state = find_first_extent_bit_state(&BTRFS_I(inode)->io_tree, @@ -2094,7 +2100,6 @@ static int clean_io_failure(u64 start, struct page *page) if (state && state->start <= failrec->start && state->end >= failrec->start + failrec->len - 1) { - fs_info = BTRFS_I(inode)->root->fs_info; num_copies = btrfs_num_copies(fs_info, failrec->logical, failrec->len); if (num_copies > 1) { diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index da8d2f6..f1a7744 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2129,7 +2129,8 @@ static noinline bool record_extent_backrefs(struct btrfs_path *path, old->extent_offset, fs_info, path, record_one_backref, old); - BUG_ON(ret < 0 && ret != -ENOENT); + if (ret < 0 && ret != -ENOENT) + return false; /* no backref to be processed for this extent */ if (!old->count) { @@ -6186,8 +6187,7 @@ insert: write_unlock(&em_tree->lock); out: - if (em) - trace_btrfs_get_extent(root, em); + trace_btrfs_get_extent(root, em); if (path) btrfs_free_path(path); diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 25a8f38..69582d5 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -638,6 +638,7 @@ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr) WARN_ON(nr < 0); } } + list_splice_tail(&splice, &fs_info->ordered_roots); spin_unlock(&fs_info->ordered_root_lock); } @@ -803,7 +804,7 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) btrfs_put_ordered_extent(ordered); break; } - if (ordered->file_offset + ordered->len < start) { + if (ordered->file_offset + ordered->len <= start) { btrfs_put_ordered_extent(ordered); break; } diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 2544805..561e2f1 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -938,8 +938,10 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) BTRFS_DEV_STAT_CORRUPTION_ERRS); } - if (sctx->readonly && !sctx->is_dev_replace) - goto did_not_correct_error; + if (sctx->readonly) { + ASSERT(!sctx->is_dev_replace); + goto out; + } if (!is_metadata && !have_csum) { struct scrub_fixup_nodatasum *fixup_nodatasum; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 57c16b4..c6a872a 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1480,7 +1480,7 @@ static void do_async_commit(struct work_struct *work) * We've got freeze protection passed with the transaction. * Tell lockdep about it. */ - if (ac->newtrans->type < TRANS_JOIN_NOLOCK) + if (ac->newtrans->type & __TRANS_FREEZABLE) rwsem_acquire_read( &ac->root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1], 0, 1, _THIS_IP_); @@ -1521,7 +1521,7 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans, * Tell lockdep we've released the freeze rwsem, since the * async commit thread will be the one to unlock it. */ - if (trans->type < TRANS_JOIN_NOLOCK) + if (ac->newtrans->type & __TRANS_FREEZABLE) rwsem_release( &root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1], 1, _THIS_IP_); diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 744553c..9f7fc51 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3697,7 +3697,8 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, ret = btrfs_truncate_inode_items(trans, log, inode, 0, 0); } else if (test_and_clear_bit(BTRFS_INODE_COPY_EVERYTHING, - &BTRFS_I(inode)->runtime_flags)) { + &BTRFS_I(inode)->runtime_flags) || + inode_only == LOG_INODE_EXISTS) { if (inode_only == LOG_INODE_ALL) fast_search = true; max_key.type = BTRFS_XATTR_ITEM_KEY; @@ -3801,7 +3802,7 @@ log_extents: err = ret; goto out_unlock; } - } else { + } else if (inode_only == LOG_INODE_ALL) { struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree; struct extent_map *em, *n; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 0db6370..92303f4 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -5394,7 +5394,7 @@ static int bio_size_ok(struct block_device *bdev, struct bio *bio, { struct bio_vec *prev; struct request_queue *q = bdev_get_queue(bdev); - unsigned short max_sectors = queue_max_sectors(q); + unsigned int max_sectors = queue_max_sectors(q); struct bvec_merge_data bvm = { .bi_bdev = bdev, .bi_sector = sector, diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 4522e07..e081acb 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -56,10 +56,19 @@ static void configfs_d_iput(struct dentry * dentry, struct configfs_dirent *sd = dentry->d_fsdata; if (sd) { - BUG_ON(sd->s_dentry != dentry); /* Coordinate with configfs_readdir */ spin_lock(&configfs_dirent_lock); - sd->s_dentry = NULL; + /* Coordinate with configfs_attach_attr where will increase + * sd->s_count and update sd->s_dentry to new allocated one. + * Only set sd->dentry to null when this dentry is the only + * sd owner. + * If not do so, configfs_d_iput may run just after + * configfs_attach_attr and set sd->s_dentry to null + * even it's still in use. + */ + if (atomic_read(&sd->s_count) <= 2) + sd->s_dentry = NULL; + spin_unlock(&configfs_dirent_lock); configfs_put(sd); } @@ -416,8 +425,11 @@ static int configfs_attach_attr(struct configfs_dirent * sd, struct dentry * den struct configfs_attribute * attr = sd->s_element; int error; + spin_lock(&configfs_dirent_lock); dentry->d_fsdata = configfs_get(sd); sd->s_dentry = dentry; + spin_unlock(&configfs_dirent_lock); + error = configfs_create(dentry, (attr->ca_mode & S_IALLUGO) | S_IFREG, configfs_init_file); if (error) { @@ -1380,10 +1380,6 @@ int search_binary_handler(struct linux_binprm *bprm) if (retval) return retval; - retval = audit_bprm(bprm); - if (retval) - return retval; - retval = -ENOENT; retry: read_lock(&binfmt_lock); @@ -1431,6 +1427,7 @@ static int exec_binprm(struct linux_binprm *bprm) ret = search_binary_handler(bprm); if (ret >= 0) { + audit_bprm(bprm); trace_sched_process_exec(current, old_pid, bprm); ptrace_event(PTRACE_EVENT_EXEC, old_vpid); current->did_exec = 1; diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index e66a800..c8420f7 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1899,7 +1899,8 @@ static int gfs2_glock_iter_next(struct gfs2_glock_iter *gi) gi->nhash = 0; } /* Skip entries for other sb and dead entries */ - } while (gi->sdp != gi->gl->gl_sbd || __lockref_is_dead(&gl->gl_lockref)); + } while (gi->sdp != gi->gl->gl_sbd || + __lockref_is_dead(&gi->gl->gl_lockref)); return 0; } diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 1615df1..7119504 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -1171,8 +1171,11 @@ static int gfs2_atomic_open(struct inode *dir, struct dentry *dentry, if (d != NULL) dentry = d; if (dentry->d_inode) { - if (!(*opened & FILE_OPENED)) + if (!(*opened & FILE_OPENED)) { + if (d == NULL) + dget(dentry); return finish_no_open(file, dentry); + } dput(d); return 0; } @@ -2435,6 +2435,7 @@ static int may_delete(struct inode *dir, struct dentry *victim, bool isdir) */ static inline int may_create(struct inode *dir, struct dentry *child) { + audit_inode_child(dir, child, AUDIT_TYPE_CHILD_CREATE); if (child->d_inode) return -EEXIST; if (IS_DEADDIR(dir)) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 088de13..ee7237f 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -141,8 +141,8 @@ xdr_error: \ static void next_decode_page(struct nfsd4_compoundargs *argp) { - argp->pagelist++; argp->p = page_address(argp->pagelist[0]); + argp->pagelist++; if (argp->pagelen < PAGE_SIZE) { argp->end = argp->p + (argp->pagelen>>2); argp->pagelen = 0; @@ -1229,6 +1229,7 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write) len -= pages * PAGE_SIZE; argp->p = (__be32 *)page_address(argp->pagelist[0]); + argp->pagelist++; argp->end = argp->p + XDR_QUADLEN(PAGE_SIZE); } argp->p += XDR_QUADLEN(len); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 94b5f5d..7eea63c 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -298,41 +298,12 @@ commit_metadata(struct svc_fh *fhp) } /* - * Set various file attributes. - * N.B. After this call fhp needs an fh_put + * Go over the attributes and take care of the small differences between + * NFS semantics and what Linux expects. */ -__be32 -nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, - int check_guard, time_t guardtime) +static void +nfsd_sanitize_attrs(struct inode *inode, struct iattr *iap) { - struct dentry *dentry; - struct inode *inode; - int accmode = NFSD_MAY_SATTR; - umode_t ftype = 0; - __be32 err; - int host_err; - int size_change = 0; - - if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE)) - accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE; - if (iap->ia_valid & ATTR_SIZE) - ftype = S_IFREG; - - /* Get inode */ - err = fh_verify(rqstp, fhp, ftype, accmode); - if (err) - goto out; - - dentry = fhp->fh_dentry; - inode = dentry->d_inode; - - /* Ignore any mode updates on symlinks */ - if (S_ISLNK(inode->i_mode)) - iap->ia_valid &= ~ATTR_MODE; - - if (!iap->ia_valid) - goto out; - /* * NFSv2 does not differentiate between "set-[ac]time-to-now" * which only requires access, and "set-[ac]time-to-X" which @@ -342,8 +313,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, * convert to "set to now" instead of "set to explicit time" * * We only call inode_change_ok as the last test as technically - * it is not an interface that we should be using. It is only - * valid if the filesystem does not define it's own i_op->setattr. + * it is not an interface that we should be using. */ #define BOTH_TIME_SET (ATTR_ATIME_SET | ATTR_MTIME_SET) #define MAX_TOUCH_TIME_ERROR (30*60) @@ -369,30 +339,6 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, iap->ia_valid &= ~BOTH_TIME_SET; } } - - /* - * The size case is special. - * It changes the file as well as the attributes. - */ - if (iap->ia_valid & ATTR_SIZE) { - if (iap->ia_size < inode->i_size) { - err = nfsd_permission(rqstp, fhp->fh_export, dentry, - NFSD_MAY_TRUNC|NFSD_MAY_OWNER_OVERRIDE); - if (err) - goto out; - } - - host_err = get_write_access(inode); - if (host_err) - goto out_nfserr; - - size_change = 1; - host_err = locks_verify_truncate(inode, NULL, iap->ia_size); - if (host_err) { - put_write_access(inode); - goto out_nfserr; - } - } /* sanitize the mode change */ if (iap->ia_valid & ATTR_MODE) { @@ -415,32 +361,111 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, iap->ia_valid |= (ATTR_KILL_SUID | ATTR_KILL_SGID); } } +} - /* Change the attributes. */ +static __be32 +nfsd_get_write_access(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct iattr *iap) +{ + struct inode *inode = fhp->fh_dentry->d_inode; + int host_err; - iap->ia_valid |= ATTR_CTIME; + if (iap->ia_size < inode->i_size) { + __be32 err; - err = nfserr_notsync; - if (!check_guard || guardtime == inode->i_ctime.tv_sec) { - host_err = nfsd_break_lease(inode); - if (host_err) - goto out_nfserr; - fh_lock(fhp); + err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry, + NFSD_MAY_TRUNC | NFSD_MAY_OWNER_OVERRIDE); + if (err) + return err; + } - host_err = notify_change(dentry, iap, NULL); - err = nfserrno(host_err); - fh_unlock(fhp); + host_err = get_write_access(inode); + if (host_err) + goto out_nfserrno; + + host_err = locks_verify_truncate(inode, NULL, iap->ia_size); + if (host_err) + goto out_put_write_access; + return 0; + +out_put_write_access: + put_write_access(inode); +out_nfserrno: + return nfserrno(host_err); +} + +/* + * Set various file attributes. After this call fhp needs an fh_put. + */ +__be32 +nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, + int check_guard, time_t guardtime) +{ + struct dentry *dentry; + struct inode *inode; + int accmode = NFSD_MAY_SATTR; + umode_t ftype = 0; + __be32 err; + int host_err; + int size_change = 0; + + if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE)) + accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE; + if (iap->ia_valid & ATTR_SIZE) + ftype = S_IFREG; + + /* Get inode */ + err = fh_verify(rqstp, fhp, ftype, accmode); + if (err) + goto out; + + dentry = fhp->fh_dentry; + inode = dentry->d_inode; + + /* Ignore any mode updates on symlinks */ + if (S_ISLNK(inode->i_mode)) + iap->ia_valid &= ~ATTR_MODE; + + if (!iap->ia_valid) + goto out; + + nfsd_sanitize_attrs(inode, iap); + + /* + * The size case is special, it changes the file in addition to the + * attributes. + */ + if (iap->ia_valid & ATTR_SIZE) { + err = nfsd_get_write_access(rqstp, fhp, iap); + if (err) + goto out; + size_change = 1; } + + iap->ia_valid |= ATTR_CTIME; + + if (check_guard && guardtime != inode->i_ctime.tv_sec) { + err = nfserr_notsync; + goto out_put_write_access; + } + + host_err = nfsd_break_lease(inode); + if (host_err) + goto out_put_write_access_nfserror; + + fh_lock(fhp); + host_err = notify_change(dentry, iap, NULL); + fh_unlock(fhp); + +out_put_write_access_nfserror: + err = nfserrno(host_err); +out_put_write_access: if (size_change) put_write_access(inode); if (!err) commit_metadata(fhp); out: return err; - -out_nfserr: - err = nfserrno(host_err); - goto out; } #if defined(CONFIG_NFSD_V2_ACL) || \ diff --git a/fs/proc/base.c b/fs/proc/base.c index 1485e38..03c8d74 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1151,10 +1151,16 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf, goto out_free_page; } - kloginuid = make_kuid(file->f_cred->user_ns, loginuid); - if (!uid_valid(kloginuid)) { - length = -EINVAL; - goto out_free_page; + + /* is userspace tring to explicitly UNSET the loginuid? */ + if (loginuid == AUDIT_UID_UNSET) { + kloginuid = INVALID_UID; + } else { + kloginuid = make_kuid(file->f_cred->user_ns, loginuid); + if (!uid_valid(kloginuid)) { + length = -EINVAL; + goto out_free_page; + } } length = audit_set_loginuid(kloginuid); diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 1c02da8..3ef11b2 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -1137,6 +1137,7 @@ xfs_bmap_add_attrfork( int committed; /* xaction was committed */ int logflags; /* logging flags */ int error; /* error return value */ + int cancel_flags = 0; ASSERT(XFS_IFORK_Q(ip) == 0); @@ -1147,19 +1148,20 @@ xfs_bmap_add_attrfork( if (rsvd) tp->t_flags |= XFS_TRANS_RESERVE; error = xfs_trans_reserve(tp, &M_RES(mp)->tr_addafork, blks, 0); - if (error) - goto error0; + if (error) { + xfs_trans_cancel(tp, 0); + return error; + } + cancel_flags = XFS_TRANS_RELEASE_LOG_RES; xfs_ilock(ip, XFS_ILOCK_EXCL); error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ? XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES : XFS_QMOPT_RES_REGBLKS); - if (error) { - xfs_iunlock(ip, XFS_ILOCK_EXCL); - xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES); - return error; - } + if (error) + goto trans_cancel; + cancel_flags |= XFS_TRANS_ABORT; if (XFS_IFORK_Q(ip)) - goto error1; + goto trans_cancel; if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS) { /* * For inodes coming from pre-6.2 filesystems. @@ -1169,7 +1171,7 @@ xfs_bmap_add_attrfork( } ASSERT(ip->i_d.di_anextents == 0); - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, ip, 0); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); switch (ip->i_d.di_format) { @@ -1191,7 +1193,7 @@ xfs_bmap_add_attrfork( default: ASSERT(0); error = XFS_ERROR(EINVAL); - goto error1; + goto trans_cancel; } ASSERT(ip->i_afp == NULL); @@ -1219,7 +1221,7 @@ xfs_bmap_add_attrfork( if (logflags) xfs_trans_log_inode(tp, ip, logflags); if (error) - goto error2; + goto bmap_cancel; if (!xfs_sb_version_hasattr(&mp->m_sb) || (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2)) { __int64_t sbfields = 0; @@ -1242,14 +1244,16 @@ xfs_bmap_add_attrfork( error = xfs_bmap_finish(&tp, &flist, &committed); if (error) - goto error2; - return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); -error2: + goto bmap_cancel; + error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + return error; + +bmap_cancel: xfs_bmap_cancel(&flist); -error1: +trans_cancel: + xfs_trans_cancel(tp, cancel_flags); xfs_iunlock(ip, XFS_ILOCK_EXCL); -error0: - xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); return error; } diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index da88f16..02df7b4 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -41,6 +41,7 @@ #include "xfs_fsops.h" #include "xfs_trace.h" #include "xfs_icache.h" +#include "xfs_dinode.h" #ifdef HAVE_PERCPU_SB @@ -718,8 +719,22 @@ xfs_mountfs( * Set the inode cluster size. * This may still be overridden by the file system * block size if it is larger than the chosen cluster size. + * + * For v5 filesystems, scale the cluster size with the inode size to + * keep a constant ratio of inode per cluster buffer, but only if mkfs + * has set the inode alignment value appropriately for larger cluster + * sizes. */ mp->m_inode_cluster_size = XFS_INODE_BIG_CLUSTER_SIZE; + if (xfs_sb_version_hascrc(&mp->m_sb)) { + int new_size = mp->m_inode_cluster_size; + + new_size *= mp->m_sb.sb_inodesize / XFS_DINODE_MIN_SIZE; + if (mp->m_sb.sb_inoalignmt >= XFS_B_TO_FSBT(mp, new_size)) + mp->m_inode_cluster_size = new_size; + xfs_info(mp, "Using inode cluster size of %d bytes", + mp->m_inode_cluster_size); + } /* * Set inode alignment fields diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 1d8101a..a466c5e 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -112,7 +112,7 @@ typedef struct xfs_mount { __uint8_t m_blkbb_log; /* blocklog - BBSHIFT */ __uint8_t m_agno_log; /* log #ag's */ __uint8_t m_agino_log; /* #bits for agino in inum */ - __uint16_t m_inode_cluster_size;/* min inode buf size */ + uint m_inode_cluster_size;/* min inode buf size */ uint m_blockmask; /* sb_blocksize-1 */ uint m_blockwsize; /* sb_blocksize in words */ uint m_blockwmask; /* blockwsize-1 */ diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c index 1bba7f6..50c3f56 100644 --- a/fs/xfs/xfs_trans_inode.c +++ b/fs/xfs/xfs_trans_inode.c @@ -111,12 +111,14 @@ xfs_trans_log_inode( /* * First time we log the inode in a transaction, bump the inode change - * counter if it is configured for this to occur. + * counter if it is configured for this to occur. We don't use + * inode_inc_version() because there is no need for extra locking around + * i_version as we already hold the inode locked exclusively for + * metadata modification. */ if (!(ip->i_itemp->ili_item.li_desc->lid_flags & XFS_LID_DIRTY) && IS_I_VERSION(VFS_I(ip))) { - inode_inc_iversion(VFS_I(ip)); - ip->i_d.di_changecount = VFS_I(ip)->i_version; + ip->i_d.di_changecount = ++VFS_I(ip)->i_version; flags |= XFS_ILOG_CORE; } diff --git a/fs/xfs/xfs_trans_resv.c b/fs/xfs/xfs_trans_resv.c index d53d9f0..2fd59c0 100644 --- a/fs/xfs/xfs_trans_resv.c +++ b/fs/xfs/xfs_trans_resv.c @@ -385,8 +385,7 @@ xfs_calc_ifree_reservation( xfs_calc_inode_res(mp, 1) + xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, 1)) + - MAX((__uint16_t)XFS_FSB_TO_B(mp, 1), - XFS_INODE_CLUSTER_SIZE(mp)) + + max_t(uint, XFS_FSB_TO_B(mp, 1), XFS_INODE_CLUSTER_SIZE(mp)) + xfs_calc_buf_res(1, 0) + xfs_calc_buf_res(2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels, 0) + diff --git a/include/crypto/hash_info.h b/include/crypto/hash_info.h new file mode 100644 index 0000000..e1e5a3e --- /dev/null +++ b/include/crypto/hash_info.h @@ -0,0 +1,40 @@ +/* + * Hash Info: Hash algorithms information + * + * Copyright (c) 2013 Dmitry Kasatkin <d.kasatkin@samsung.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ + +#ifndef _CRYPTO_HASH_INFO_H +#define _CRYPTO_HASH_INFO_H + +#include <crypto/sha.h> +#include <crypto/md5.h> + +#include <uapi/linux/hash_info.h> + +/* not defined in include/crypto/ */ +#define RMD128_DIGEST_SIZE 16 +#define RMD160_DIGEST_SIZE 20 +#define RMD256_DIGEST_SIZE 32 +#define RMD320_DIGEST_SIZE 40 + +/* not defined in include/crypto/ */ +#define WP512_DIGEST_SIZE 64 +#define WP384_DIGEST_SIZE 48 +#define WP256_DIGEST_SIZE 32 + +/* not defined in include/crypto/ */ +#define TGR128_DIGEST_SIZE 16 +#define TGR160_DIGEST_SIZE 20 +#define TGR192_DIGEST_SIZE 24 + +extern const char *const hash_algo_name[HASH_ALGO__LAST]; +extern const int hash_digest_size[HASH_ALGO__LAST]; + +#endif /* _CRYPTO_HASH_INFO_H */ diff --git a/include/crypto/public_key.h b/include/crypto/public_key.h index f5b0224..fc09732 100644 --- a/include/crypto/public_key.h +++ b/include/crypto/public_key.h @@ -15,6 +15,7 @@ #define _LINUX_PUBLIC_KEY_H #include <linux/mpi.h> +#include <crypto/hash_info.h> enum pkey_algo { PKEY_ALGO_DSA, @@ -22,21 +23,11 @@ enum pkey_algo { PKEY_ALGO__LAST }; -extern const char *const pkey_algo[PKEY_ALGO__LAST]; +extern const char *const pkey_algo_name[PKEY_ALGO__LAST]; +extern const struct public_key_algorithm *pkey_algo[PKEY_ALGO__LAST]; -enum pkey_hash_algo { - PKEY_HASH_MD4, - PKEY_HASH_MD5, - PKEY_HASH_SHA1, - PKEY_HASH_RIPE_MD_160, - PKEY_HASH_SHA256, - PKEY_HASH_SHA384, - PKEY_HASH_SHA512, - PKEY_HASH_SHA224, - PKEY_HASH__LAST -}; - -extern const char *const pkey_hash_algo[PKEY_HASH__LAST]; +/* asymmetric key implementation supports only up to SHA224 */ +#define PKEY_HASH__LAST (HASH_ALGO_SHA224 + 1) enum pkey_id_type { PKEY_ID_PGP, /* OpenPGP generated key ID */ @@ -44,7 +35,7 @@ enum pkey_id_type { PKEY_ID_TYPE__LAST }; -extern const char *const pkey_id_type[PKEY_ID_TYPE__LAST]; +extern const char *const pkey_id_type_name[PKEY_ID_TYPE__LAST]; /* * Cryptographic data for the public-key subtype of the asymmetric key type. @@ -59,6 +50,7 @@ struct public_key { #define PKEY_CAN_DECRYPT 0x02 #define PKEY_CAN_SIGN 0x04 #define PKEY_CAN_VERIFY 0x08 + enum pkey_algo pkey_algo : 8; enum pkey_id_type id_type : 8; union { MPI mpi[5]; @@ -88,7 +80,8 @@ struct public_key_signature { u8 *digest; u8 digest_size; /* Number of bytes in digest */ u8 nr_mpi; /* Occupancy of mpi[] */ - enum pkey_hash_algo pkey_hash_algo : 8; + enum pkey_algo pkey_algo : 8; + enum hash_algo pkey_hash_algo : 8; union { MPI mpi[2]; struct { diff --git a/include/keys/big_key-type.h b/include/keys/big_key-type.h new file mode 100644 index 0000000..d69bc8a --- /dev/null +++ b/include/keys/big_key-type.h @@ -0,0 +1,25 @@ +/* Big capacity key type. + * + * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _KEYS_BIG_KEY_TYPE_H +#define _KEYS_BIG_KEY_TYPE_H + +#include <linux/key-type.h> + +extern struct key_type key_type_big_key; + +extern int big_key_instantiate(struct key *key, struct key_preparsed_payload *prep); +extern void big_key_revoke(struct key *key); +extern void big_key_destroy(struct key *key); +extern void big_key_describe(const struct key *big_key, struct seq_file *m); +extern long big_key_read(const struct key *key, char __user *buffer, size_t buflen); + +#endif /* _KEYS_BIG_KEY_TYPE_H */ diff --git a/include/keys/keyring-type.h b/include/keys/keyring-type.h index cf49159..fca5c62 100644 --- a/include/keys/keyring-type.h +++ b/include/keys/keyring-type.h @@ -1,6 +1,6 @@ /* Keyring key type * - * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2008, 2013 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * This program is free software; you can redistribute it and/or @@ -13,19 +13,6 @@ #define _KEYS_KEYRING_TYPE_H #include <linux/key.h> -#include <linux/rcupdate.h> - -/* - * the keyring payload contains a list of the keys to which the keyring is - * subscribed - */ -struct keyring_list { - struct rcu_head rcu; /* RCU deletion hook */ - unsigned short maxkeys; /* max keys this list can hold */ - unsigned short nkeys; /* number of keys currently held */ - unsigned short delkey; /* key to be unlinked by RCU */ - struct key __rcu *keys[0]; -}; - +#include <linux/assoc_array.h> #endif /* _KEYS_KEYRING_TYPE_H */ diff --git a/include/keys/system_keyring.h b/include/keys/system_keyring.h new file mode 100644 index 0000000..8dabc39 --- /dev/null +++ b/include/keys/system_keyring.h @@ -0,0 +1,23 @@ +/* System keyring containing trusted public keys. + * + * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#ifndef _KEYS_SYSTEM_KEYRING_H +#define _KEYS_SYSTEM_KEYRING_H + +#ifdef CONFIG_SYSTEM_TRUSTED_KEYRING + +#include <linux/key.h> + +extern struct key *system_trusted_keyring; + +#endif + +#endif /* _KEYS_SYSTEM_KEYRING_H */ diff --git a/include/linux/assoc_array.h b/include/linux/assoc_array.h new file mode 100644 index 0000000..9a193b8 --- /dev/null +++ b/include/linux/assoc_array.h @@ -0,0 +1,92 @@ +/* Generic associative array implementation. + * + * See Documentation/assoc_array.txt for information. + * + * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#ifndef _LINUX_ASSOC_ARRAY_H +#define _LINUX_ASSOC_ARRAY_H + +#ifdef CONFIG_ASSOCIATIVE_ARRAY + +#include <linux/types.h> + +#define ASSOC_ARRAY_KEY_CHUNK_SIZE BITS_PER_LONG /* Key data retrieved in chunks of this size */ + +/* + * Generic associative array. + */ +struct assoc_array { + struct assoc_array_ptr *root; /* The node at the root of the tree */ + unsigned long nr_leaves_on_tree; +}; + +/* + * Operations on objects and index keys for use by array manipulation routines. + */ +struct assoc_array_ops { + /* Method to get a chunk of an index key from caller-supplied data */ + unsigned long (*get_key_chunk)(const void *index_key, int level); + + /* Method to get a piece of an object's index key */ + unsigned long (*get_object_key_chunk)(const void *object, int level); + + /* Is this the object we're looking for? */ + bool (*compare_object)(const void *object, const void *index_key); + + /* How different are two objects, to a bit position in their keys? (or + * -1 if they're the same) + */ + int (*diff_objects)(const void *a, const void *b); + + /* Method to free an object. */ + void (*free_object)(void *object); +}; + +/* + * Access and manipulation functions. + */ +struct assoc_array_edit; + +static inline void assoc_array_init(struct assoc_array *array) +{ + array->root = NULL; + array->nr_leaves_on_tree = 0; +} + +extern int assoc_array_iterate(const struct assoc_array *array, + int (*iterator)(const void *object, + void *iterator_data), + void *iterator_data); +extern void *assoc_array_find(const struct assoc_array *array, + const struct assoc_array_ops *ops, + const void *index_key); +extern void assoc_array_destroy(struct assoc_array *array, + const struct assoc_array_ops *ops); +extern struct assoc_array_edit *assoc_array_insert(struct assoc_array *array, + const struct assoc_array_ops *ops, + const void *index_key, + void *object); +extern void assoc_array_insert_set_object(struct assoc_array_edit *edit, + void *object); +extern struct assoc_array_edit *assoc_array_delete(struct assoc_array *array, + const struct assoc_array_ops *ops, + const void *index_key); +extern struct assoc_array_edit *assoc_array_clear(struct assoc_array *array, + const struct assoc_array_ops *ops); +extern void assoc_array_apply_edit(struct assoc_array_edit *edit); +extern void assoc_array_cancel_edit(struct assoc_array_edit *edit); +extern int assoc_array_gc(struct assoc_array *array, + const struct assoc_array_ops *ops, + bool (*iterator)(void *object, void *iterator_data), + void *iterator_data); + +#endif /* CONFIG_ASSOCIATIVE_ARRAY */ +#endif /* _LINUX_ASSOC_ARRAY_H */ diff --git a/include/linux/assoc_array_priv.h b/include/linux/assoc_array_priv.h new file mode 100644 index 0000000..711275e --- /dev/null +++ b/include/linux/assoc_array_priv.h @@ -0,0 +1,182 @@ +/* Private definitions for the generic associative array implementation. + * + * See Documentation/assoc_array.txt for information. + * + * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#ifndef _LINUX_ASSOC_ARRAY_PRIV_H +#define _LINUX_ASSOC_ARRAY_PRIV_H + +#ifdef CONFIG_ASSOCIATIVE_ARRAY + +#include <linux/assoc_array.h> + +#define ASSOC_ARRAY_FAN_OUT 16 /* Number of slots per node */ +#define ASSOC_ARRAY_FAN_MASK (ASSOC_ARRAY_FAN_OUT - 1) +#define ASSOC_ARRAY_LEVEL_STEP (ilog2(ASSOC_ARRAY_FAN_OUT)) +#define ASSOC_ARRAY_LEVEL_STEP_MASK (ASSOC_ARRAY_LEVEL_STEP - 1) +#define ASSOC_ARRAY_KEY_CHUNK_MASK (ASSOC_ARRAY_KEY_CHUNK_SIZE - 1) +#define ASSOC_ARRAY_KEY_CHUNK_SHIFT (ilog2(BITS_PER_LONG)) + +/* + * Undefined type representing a pointer with type information in the bottom + * two bits. + */ +struct assoc_array_ptr; + +/* + * An N-way node in the tree. + * + * Each slot contains one of four things: + * + * (1) Nothing (NULL). + * + * (2) A leaf object (pointer types 0). + * + * (3) A next-level node (pointer type 1, subtype 0). + * + * (4) A shortcut (pointer type 1, subtype 1). + * + * The tree is optimised for search-by-ID, but permits reasonable iteration + * also. + * + * The tree is navigated by constructing an index key consisting of an array of + * segments, where each segment is ilog2(ASSOC_ARRAY_FAN_OUT) bits in size. + * + * The segments correspond to levels of the tree (the first segment is used at + * level 0, the second at level 1, etc.). + */ +struct assoc_array_node { + struct assoc_array_ptr *back_pointer; + u8 parent_slot; + struct assoc_array_ptr *slots[ASSOC_ARRAY_FAN_OUT]; + unsigned long nr_leaves_on_branch; +}; + +/* + * A shortcut through the index space out to where a collection of nodes/leaves + * with the same IDs live. + */ +struct assoc_array_shortcut { + struct assoc_array_ptr *back_pointer; + int parent_slot; + int skip_to_level; + struct assoc_array_ptr *next_node; + unsigned long index_key[]; +}; + +/* + * Preallocation cache. + */ +struct assoc_array_edit { + struct rcu_head rcu; + struct assoc_array *array; + const struct assoc_array_ops *ops; + const struct assoc_array_ops *ops_for_excised_subtree; + struct assoc_array_ptr *leaf; + struct assoc_array_ptr **leaf_p; + struct assoc_array_ptr *dead_leaf; + struct assoc_array_ptr *new_meta[3]; + struct assoc_array_ptr *excised_meta[1]; + struct assoc_array_ptr *excised_subtree; + struct assoc_array_ptr **set_backpointers[ASSOC_ARRAY_FAN_OUT]; + struct assoc_array_ptr *set_backpointers_to; + struct assoc_array_node *adjust_count_on; + long adjust_count_by; + struct { + struct assoc_array_ptr **ptr; + struct assoc_array_ptr *to; + } set[2]; + struct { + u8 *p; + u8 to; + } set_parent_slot[1]; + u8 segment_cache[ASSOC_ARRAY_FAN_OUT + 1]; +}; + +/* + * Internal tree member pointers are marked in the bottom one or two bits to + * indicate what type they are so that we don't have to look behind every + * pointer to see what it points to. + * + * We provide functions to test type annotations and to create and translate + * the annotated pointers. + */ +#define ASSOC_ARRAY_PTR_TYPE_MASK 0x1UL +#define ASSOC_ARRAY_PTR_LEAF_TYPE 0x0UL /* Points to leaf (or nowhere) */ +#define ASSOC_ARRAY_PTR_META_TYPE 0x1UL /* Points to node or shortcut */ +#define ASSOC_ARRAY_PTR_SUBTYPE_MASK 0x2UL +#define ASSOC_ARRAY_PTR_NODE_SUBTYPE 0x0UL +#define ASSOC_ARRAY_PTR_SHORTCUT_SUBTYPE 0x2UL + +static inline bool assoc_array_ptr_is_meta(const struct assoc_array_ptr *x) +{ + return (unsigned long)x & ASSOC_ARRAY_PTR_TYPE_MASK; +} +static inline bool assoc_array_ptr_is_leaf(const struct assoc_array_ptr *x) +{ + return !assoc_array_ptr_is_meta(x); +} +static inline bool assoc_array_ptr_is_shortcut(const struct assoc_array_ptr *x) +{ + return (unsigned long)x & ASSOC_ARRAY_PTR_SUBTYPE_MASK; +} +static inline bool assoc_array_ptr_is_node(const struct assoc_array_ptr *x) +{ + return !assoc_array_ptr_is_shortcut(x); +} + +static inline void *assoc_array_ptr_to_leaf(const struct assoc_array_ptr *x) +{ + return (void *)((unsigned long)x & ~ASSOC_ARRAY_PTR_TYPE_MASK); +} + +static inline +unsigned long __assoc_array_ptr_to_meta(const struct assoc_array_ptr *x) +{ + return (unsigned long)x & + ~(ASSOC_ARRAY_PTR_SUBTYPE_MASK | ASSOC_ARRAY_PTR_TYPE_MASK); +} +static inline +struct assoc_array_node *assoc_array_ptr_to_node(const struct assoc_array_ptr *x) +{ + return (struct assoc_array_node *)__assoc_array_ptr_to_meta(x); +} +static inline +struct assoc_array_shortcut *assoc_array_ptr_to_shortcut(const struct assoc_array_ptr *x) +{ + return (struct assoc_array_shortcut *)__assoc_array_ptr_to_meta(x); +} + +static inline +struct assoc_array_ptr *__assoc_array_x_to_ptr(const void *p, unsigned long t) +{ + return (struct assoc_array_ptr *)((unsigned long)p | t); +} +static inline +struct assoc_array_ptr *assoc_array_leaf_to_ptr(const void *p) +{ + return __assoc_array_x_to_ptr(p, ASSOC_ARRAY_PTR_LEAF_TYPE); +} +static inline +struct assoc_array_ptr *assoc_array_node_to_ptr(const struct assoc_array_node *p) +{ + return __assoc_array_x_to_ptr( + p, ASSOC_ARRAY_PTR_META_TYPE | ASSOC_ARRAY_PTR_NODE_SUBTYPE); +} +static inline +struct assoc_array_ptr *assoc_array_shortcut_to_ptr(const struct assoc_array_shortcut *p) +{ + return __assoc_array_x_to_ptr( + p, ASSOC_ARRAY_PTR_META_TYPE | ASSOC_ARRAY_PTR_SHORTCUT_SUBTYPE); +} + +#endif /* CONFIG_ASSOCIATIVE_ARRAY */ +#endif /* _LINUX_ASSOC_ARRAY_PRIV_H */ diff --git a/include/linux/audit.h b/include/linux/audit.h index 729a4d1..a406419 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -73,6 +73,8 @@ struct audit_field { void *lsm_rule; }; +extern int is_audit_feature_set(int which); + extern int __init audit_register_class(int class, unsigned *list); extern int audit_classify_syscall(int abi, unsigned syscall); extern int audit_classify_arch(int arch); @@ -207,7 +209,7 @@ static inline int audit_get_sessionid(struct task_struct *tsk) extern void __audit_ipc_obj(struct kern_ipc_perm *ipcp); extern void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, umode_t mode); -extern int __audit_bprm(struct linux_binprm *bprm); +extern void __audit_bprm(struct linux_binprm *bprm); extern int __audit_socketcall(int nargs, unsigned long *args); extern int __audit_sockaddr(int len, void *addr); extern void __audit_fd_pair(int fd1, int fd2); @@ -236,11 +238,10 @@ static inline void audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid if (unlikely(!audit_dummy_context())) __audit_ipc_set_perm(qbytes, uid, gid, mode); } -static inline int audit_bprm(struct linux_binprm *bprm) +static inline void audit_bprm(struct linux_binprm *bprm) { if (unlikely(!audit_dummy_context())) - return __audit_bprm(bprm); - return 0; + __audit_bprm(bprm); } static inline int audit_socketcall(int nargs, unsigned long *args) { @@ -367,10 +368,8 @@ static inline void audit_ipc_obj(struct kern_ipc_perm *ipcp) static inline void audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, umode_t mode) { } -static inline int audit_bprm(struct linux_binprm *bprm) -{ - return 0; -} +static inline void audit_bprm(struct linux_binprm *bprm) +{ } static inline int audit_socketcall(int nargs, unsigned long *args) { return 0; diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index acd2010..9649ff0 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -31,6 +31,7 @@ struct hugepage_subpool *hugepage_new_subpool(long nr_blocks); void hugepage_put_subpool(struct hugepage_subpool *spool); int PageHuge(struct page *page); +int PageHeadHuge(struct page *page_head); void reset_vma_resv_huge_pages(struct vm_area_struct *vma); int hugetlb_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); @@ -69,7 +70,6 @@ int dequeue_hwpoisoned_huge_page(struct page *page); bool isolate_huge_page(struct page *page, struct list_head *list); void putback_active_hugepage(struct page *page); bool is_hugepage_active(struct page *page); -void copy_huge_page(struct page *dst, struct page *src); #ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud); @@ -104,6 +104,11 @@ static inline int PageHuge(struct page *page) return 0; } +static inline int PageHeadHuge(struct page *page_head) +{ + return 0; +} + static inline void reset_vma_resv_huge_pages(struct vm_area_struct *vma) { } @@ -140,9 +145,6 @@ static inline int dequeue_hwpoisoned_huge_page(struct page *page) #define isolate_huge_page(p, l) false #define putback_active_hugepage(p) do {} while (0) #define is_hugepage_active(x) false -static inline void copy_huge_page(struct page *dst, struct page *src) -{ -} static inline unsigned long hugetlb_change_protection(struct vm_area_struct *vma, unsigned long address, unsigned long end, pgprot_t newprot) diff --git a/include/linux/key-type.h b/include/linux/key-type.h index 518a53a..a74c3a8 100644 --- a/include/linux/key-type.h +++ b/include/linux/key-type.h @@ -45,6 +45,7 @@ struct key_preparsed_payload { const void *data; /* Raw data */ size_t datalen; /* Raw datalen */ size_t quotalen; /* Quota length for proposed payload */ + bool trusted; /* True if key is trusted */ }; typedef int (*request_key_actor_t)(struct key_construction *key, @@ -63,6 +64,11 @@ struct key_type { */ size_t def_datalen; + /* Default key search algorithm. */ + unsigned def_lookup_type; +#define KEYRING_SEARCH_LOOKUP_DIRECT 0x0000 /* Direct lookup by description. */ +#define KEYRING_SEARCH_LOOKUP_ITERATE 0x0001 /* Iterative search. */ + /* vet a description */ int (*vet_description)(const char *description); diff --git a/include/linux/key.h b/include/linux/key.h index 4dfde11..80d6774 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -22,6 +22,7 @@ #include <linux/sysctl.h> #include <linux/rwsem.h> #include <linux/atomic.h> +#include <linux/assoc_array.h> #ifdef __KERNEL__ #include <linux/uidgid.h> @@ -82,6 +83,12 @@ struct key_owner; struct keyring_list; struct keyring_name; +struct keyring_index_key { + struct key_type *type; + const char *description; + size_t desc_len; +}; + /*****************************************************************************/ /* * key reference with possession attribute handling @@ -99,7 +106,7 @@ struct keyring_name; typedef struct __key_reference_with_attributes *key_ref_t; static inline key_ref_t make_key_ref(const struct key *key, - unsigned long possession) + bool possession) { return (key_ref_t) ((unsigned long) key | possession); } @@ -109,7 +116,7 @@ static inline struct key *key_ref_to_ptr(const key_ref_t key_ref) return (struct key *) ((unsigned long) key_ref & ~1UL); } -static inline unsigned long is_key_possessed(const key_ref_t key_ref) +static inline bool is_key_possessed(const key_ref_t key_ref) { return (unsigned long) key_ref & 1UL; } @@ -129,7 +136,6 @@ struct key { struct list_head graveyard_link; struct rb_node serial_node; }; - struct key_type *type; /* type of key */ struct rw_semaphore sem; /* change vs change sem */ struct key_user *user; /* owner of this key */ void *security; /* security data for this key */ @@ -162,13 +168,21 @@ struct key { #define KEY_FLAG_NEGATIVE 5 /* set if key is negative */ #define KEY_FLAG_ROOT_CAN_CLEAR 6 /* set if key can be cleared by root without permission */ #define KEY_FLAG_INVALIDATED 7 /* set if key has been invalidated */ +#define KEY_FLAG_TRUSTED 8 /* set if key is trusted */ +#define KEY_FLAG_TRUSTED_ONLY 9 /* set if keyring only accepts links to trusted keys */ - /* the description string - * - this is used to match a key against search criteria - * - this should be a printable string + /* the key type and key description string + * - the desc is used to match a key against search criteria + * - it should be a printable string * - eg: for krb5 AFS, this might be "afs@REDHAT.COM" */ - char *description; + union { + struct keyring_index_key index_key; + struct { + struct key_type *type; /* type of key */ + char *description; + }; + }; /* type specific data * - this is used by the keyring type to index the name @@ -185,11 +199,14 @@ struct key { * whatever */ union { - unsigned long value; - void __rcu *rcudata; - void *data; - struct keyring_list __rcu *subscriptions; - } payload; + union { + unsigned long value; + void __rcu *rcudata; + void *data; + void *data2[2]; + } payload; + struct assoc_array keys; + }; }; extern struct key *key_alloc(struct key_type *type, @@ -203,18 +220,23 @@ extern struct key *key_alloc(struct key_type *type, #define KEY_ALLOC_IN_QUOTA 0x0000 /* add to quota, reject if would overrun */ #define KEY_ALLOC_QUOTA_OVERRUN 0x0001 /* add to quota, permit even if overrun */ #define KEY_ALLOC_NOT_IN_QUOTA 0x0002 /* not in quota */ +#define KEY_ALLOC_TRUSTED 0x0004 /* Key should be flagged as trusted */ extern void key_revoke(struct key *key); extern void key_invalidate(struct key *key); extern void key_put(struct key *key); -static inline struct key *key_get(struct key *key) +static inline struct key *__key_get(struct key *key) { - if (key) - atomic_inc(&key->usage); + atomic_inc(&key->usage); return key; } +static inline struct key *key_get(struct key *key) +{ + return key ? __key_get(key) : key; +} + static inline void key_ref_put(key_ref_t key_ref) { key_put(key_ref_to_ptr(key_ref)); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 10f5a72..bd29941 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -44,18 +44,22 @@ struct page { /* First double word block */ unsigned long flags; /* Atomic flags, some possibly * updated asynchronously */ - struct address_space *mapping; /* If low bit clear, points to - * inode address_space, or NULL. - * If page mapped as anonymous - * memory, low bit is set, and - * it points to anon_vma object: - * see PAGE_MAPPING_ANON below. - */ + union { + struct address_space *mapping; /* If low bit clear, points to + * inode address_space, or NULL. + * If page mapped as anonymous + * memory, low bit is set, and + * it points to anon_vma object: + * see PAGE_MAPPING_ANON below. + */ + void *s_mem; /* slab first object */ + }; + /* Second double word */ struct { union { pgoff_t index; /* Our offset within mapping. */ - void *freelist; /* slub/slob first free object */ + void *freelist; /* sl[aou]b first free object */ bool pfmemalloc; /* If set by the page allocator, * ALLOC_NO_WATERMARKS was set * and the low watermark was not @@ -65,9 +69,6 @@ struct page { * this page is only used to * free other pages. */ -#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && USE_SPLIT_PMD_PTLOCKS - pgtable_t pmd_huge_pte; /* protected by page->ptl */ -#endif }; union { @@ -114,6 +115,7 @@ struct page { }; atomic_t _count; /* Usage count, see below. */ }; + unsigned int active; /* SLAB */ }; }; @@ -135,6 +137,12 @@ struct page { struct list_head list; /* slobs list of pages */ struct slab *slab_page; /* slab fields */ + struct rcu_head rcu_head; /* Used by SLAB + * when destroying via RCU + */ +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && USE_SPLIT_PMD_PTLOCKS + pgtable_t pmd_huge_pte; /* protected by page->ptl */ +#endif }; /* Remainder is not double word aligned */ diff --git a/include/linux/msi.h b/include/linux/msi.h index 87cce50..009b024 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -26,11 +26,11 @@ struct msi_desc { struct { __u8 is_msix : 1; __u8 multiple: 3; /* log2 number of messages */ - __u8 maskbit : 1; /* mask-pending bit supported ? */ - __u8 is_64 : 1; /* Address size: 0=32bit 1=64bit */ - __u8 pos; /* Location of the msi capability */ - __u16 entry_nr; /* specific enabled entry */ - unsigned default_irq; /* default pre-assigned irq */ + __u8 maskbit : 1; /* mask-pending bit supported ? */ + __u8 is_64 : 1; /* Address size: 0=32bit 1=64bit */ + __u8 pos; /* Location of the msi capability */ + __u16 entry_nr; /* specific enabled entry */ + unsigned default_irq; /* default pre-assigned irq */ } msi_attrib; u32 masked; /* mask bits */ diff --git a/include/linux/net.h b/include/linux/net.h index b292a04..4bcee94 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -164,6 +164,14 @@ struct proto_ops { #endif int (*sendmsg) (struct kiocb *iocb, struct socket *sock, struct msghdr *m, size_t total_len); + /* Notes for implementing recvmsg: + * =============================== + * msg->msg_namelen should get updated by the recvmsg handlers + * iff msg_name != NULL. It is by default 0 to prevent + * returning uninitialized memory to user space. The recvfrom + * handlers can assume that msg.msg_name is either NULL or has + * a minimum size of sizeof(struct sockaddr_storage). + */ int (*recvmsg) (struct kiocb *iocb, struct socket *sock, struct msghdr *m, size_t total_len, int flags); diff --git a/include/linux/pci.h b/include/linux/pci.h index 835ec7b..1084a15 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -32,7 +32,6 @@ #include <linux/irqreturn.h> #include <uapi/linux/pci.h> -/* Include the ID list */ #include <linux/pci_ids.h> /* @@ -42,9 +41,10 @@ * * 7:3 = slot * 2:0 = function - * PCI_DEVFN(), PCI_SLOT(), and PCI_FUNC() are defined uapi/linux/pci.h + * + * PCI_DEVFN(), PCI_SLOT(), and PCI_FUNC() are defined in uapi/linux/pci.h. * In the interest of not exposing interfaces to user-space unnecessarily, - * the following kernel only defines are being added here. + * the following kernel-only defines are being added here. */ #define PCI_DEVID(bus, devfn) ((((u16)bus) << 8) | devfn) /* return bus from PCI devid = ((u16)bus_number) << 8) | devfn */ @@ -153,10 +153,10 @@ enum pcie_reset_state { /* Reset is NOT asserted (Use to deassert reset) */ pcie_deassert_reset = (__force pcie_reset_state_t) 1, - /* Use #PERST to reset PCI-E device */ + /* Use #PERST to reset PCIe device */ pcie_warm_reset = (__force pcie_reset_state_t) 2, - /* Use PCI-E Hot Reset to reset device */ + /* Use PCIe Hot Reset to reset device */ pcie_hot_reset = (__force pcie_reset_state_t) 3 }; @@ -259,13 +259,13 @@ struct pci_dev { unsigned int class; /* 3 bytes: (base,sub,prog-if) */ u8 revision; /* PCI revision, low byte of class word */ u8 hdr_type; /* PCI header type (`multi' flag masked out) */ - u8 pcie_cap; /* PCI-E capability offset */ + u8 pcie_cap; /* PCIe capability offset */ u8 msi_cap; /* MSI capability offset */ u8 msix_cap; /* MSI-X capability offset */ - u8 pcie_mpss:3; /* PCI-E Max Payload Size Supported */ + u8 pcie_mpss:3; /* PCIe Max Payload Size Supported */ u8 rom_base_reg; /* which config register controls the ROM */ - u8 pin; /* which interrupt pin this device uses */ - u16 pcie_flags_reg; /* cached PCI-E Capabilities Register */ + u8 pin; /* which interrupt pin this device uses */ + u16 pcie_flags_reg; /* cached PCIe Capabilities Register */ struct pci_driver *driver; /* which driver has allocated this device */ u64 dma_mask; /* Mask of the bits of bus address this @@ -300,7 +300,7 @@ struct pci_dev { unsigned int d3cold_delay; /* D3cold->D0 transition time in ms */ #ifdef CONFIG_PCIEASPM - struct pcie_link_state *link_state; /* ASPM link state. */ + struct pcie_link_state *link_state; /* ASPM link state */ #endif pci_channel_state_t error_state; /* current connectivity state */ @@ -317,7 +317,7 @@ struct pci_dev { bool match_driver; /* Skip attaching driver */ /* These fields are used by common fixups */ - unsigned int transparent:1; /* Transparent PCI bridge */ + unsigned int transparent:1; /* Subtractive decode PCI bridge */ unsigned int multifunction:1;/* Part of multi-function device */ /* keep track of device state */ unsigned int is_added:1; @@ -326,7 +326,7 @@ struct pci_dev { unsigned int block_cfg_access:1; /* config space access is blocked */ unsigned int broken_parity_status:1; /* Device generates false positive parity */ unsigned int irq_reroute_variant:2; /* device needs IRQ rerouting variant */ - unsigned int msi_enabled:1; + unsigned int msi_enabled:1; unsigned int msix_enabled:1; unsigned int ari_enabled:1; /* ARI forwarding */ unsigned int is_managed:1; @@ -371,7 +371,6 @@ static inline struct pci_dev *pci_physfn(struct pci_dev *dev) if (dev->is_virtfn) dev = dev->physfn; #endif - return dev; } @@ -456,7 +455,7 @@ struct pci_bus { char name[48]; unsigned short bridge_ctl; /* manage NO_ISA/FBB/et al behaviors */ - pci_bus_flags_t bus_flags; /* Inherited by child busses */ + pci_bus_flags_t bus_flags; /* inherited by child buses */ struct device *bridge; struct device dev; struct bin_attribute *legacy_io; /* legacy I/O for this bus */ @@ -468,7 +467,7 @@ struct pci_bus { #define to_pci_bus(n) container_of(n, struct pci_bus, dev) /* - * Returns true if the pci bus is root (behind host-pci bridge), + * Returns true if the PCI bus is root (behind host-PCI bridge), * false otherwise * * Some code assumes that "bus->self == NULL" means that bus is a root bus. @@ -510,7 +509,7 @@ static inline bool pci_dev_msi_enabled(struct pci_dev *pci_dev) { return false; #define PCIBIOS_BUFFER_TOO_SMALL 0x89 /* - * Translate above to generic errno for passing back through non-pci. + * Translate above to generic errno for passing back through non-PCI code. */ static inline int pcibios_err_to_errno(int err) { @@ -561,11 +560,12 @@ struct pci_dynids { struct list_head list; /* for IDs added at runtime */ }; -/* ---------------------------------------------------------------- */ -/** PCI Error Recovery System (PCI-ERS). If a PCI device driver provides - * a set of callbacks in struct pci_error_handlers, then that device driver - * will be notified of PCI bus errors, and will be driven to recovery - * when an error occurs. + +/* + * PCI Error Recovery System (PCI-ERS). If a PCI device driver provides + * a set of callbacks in struct pci_error_handlers, that device driver + * will be notified of PCI bus errors, and will be driven to recovery + * when an error occurs. */ typedef unsigned int __bitwise pci_ers_result_t; @@ -609,7 +609,6 @@ struct pci_error_handlers { void (*resume)(struct pci_dev *dev); }; -/* ---------------------------------------------------------------- */ struct module; struct pci_driver { @@ -713,10 +712,10 @@ extern enum pcie_bus_config_types pcie_bus_config; extern struct bus_type pci_bus_type; -/* Do NOT directly access these two variables, unless you are arch specific pci - * code, or pci core code. */ +/* Do NOT directly access these two variables, unless you are arch-specific PCI + * code, or PCI core code. */ extern struct list_head pci_root_buses; /* list of all known PCI buses */ -/* Some device drivers need know if pci is initiated */ +/* Some device drivers need know if PCI is initiated */ int no_pci_devices(void); void pcibios_resource_survey_bus(struct pci_bus *bus); @@ -724,7 +723,7 @@ void pcibios_add_bus(struct pci_bus *bus); void pcibios_remove_bus(struct pci_bus *bus); void pcibios_fixup_bus(struct pci_bus *); int __must_check pcibios_enable_device(struct pci_dev *, int mask); -/* Architecture specific versions may override this (weak) */ +/* Architecture-specific versions may override this (weak) */ char *pcibios_setup(char *str); /* Used only when drivers/pci/setup.c is used */ @@ -1258,7 +1257,7 @@ void pci_cfg_access_unlock(struct pci_dev *dev); /* * PCI domain support. Sometimes called PCI segment (eg by ACPI), - * a PCI domain is defined to be a set of PCI busses which share + * a PCI domain is defined to be a set of PCI buses which share * configuration space. */ #ifdef CONFIG_PCI_DOMAINS @@ -1672,7 +1671,7 @@ extern u8 pci_cache_line_size; extern unsigned long pci_hotplug_io_size; extern unsigned long pci_hotplug_mem_size; -/* Architecture specific versions may override these (weak) */ +/* Architecture-specific versions may override these (weak) */ int pcibios_add_platform_entries(struct pci_dev *dev); void pcibios_disable_device(struct pci_dev *dev); void pcibios_set_master(struct pci_dev *dev); diff --git a/include/linux/pci_hotplug.h b/include/linux/pci_hotplug.h index 430dd96..a2e2f1d 100644 --- a/include/linux/pci_hotplug.h +++ b/include/linux/pci_hotplug.h @@ -39,8 +39,8 @@ * @hardware_test: Called to run a specified hardware test on the specified * slot. * @get_power_status: Called to get the current power status of a slot. - * If this field is NULL, the value passed in the struct hotplug_slot_info - * will be used when this value is requested by a user. + * If this field is NULL, the value passed in the struct hotplug_slot_info + * will be used when this value is requested by a user. * @get_attention_status: Called to get the current attention status of a slot. * If this field is NULL, the value passed in the struct hotplug_slot_info * will be used when this value is requested by a user. @@ -191,4 +191,3 @@ static inline int pci_get_hp_params(struct pci_dev *dev, void pci_configure_slot(struct pci_dev *dev); #endif - diff --git a/include/linux/pcieport_if.h b/include/linux/pcieport_if.h index 9572669..4f1089f 100644 --- a/include/linux/pcieport_if.h +++ b/include/linux/pcieport_if.h @@ -23,7 +23,7 @@ #define PCIE_PORT_SERVICE_VC (1 << PCIE_PORT_SERVICE_VC_SHIFT) struct pcie_device { - int irq; /* Service IRQ/MSI/MSI-X Vector */ + int irq; /* Service IRQ/MSI/MSI-X Vector */ struct pci_dev *port; /* Root/Upstream/Downstream Port */ u32 service; /* Port service this device represents */ void *priv_data; /* Service Private Data */ diff --git a/include/linux/phy.h b/include/linux/phy.h index 64ab823..48a4dc3 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -559,6 +559,7 @@ static inline int phy_read_status(struct phy_device *phydev) { return phydev->drv->read_status(phydev); } +int genphy_setup_forced(struct phy_device *phydev); int genphy_restart_aneg(struct phy_device *phydev); int genphy_config_aneg(struct phy_device *phydev); int genphy_update_link(struct phy_device *phydev); diff --git a/include/linux/security.h b/include/linux/security.h index 9d37e2b..5623a7f 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1052,17 +1052,25 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @xfrm_policy_delete_security: * @ctx contains the xfrm_sec_ctx. * Authorize deletion of xp->security. - * @xfrm_state_alloc_security: + * @xfrm_state_alloc: * @x contains the xfrm_state being added to the Security Association * Database by the XFRM system. * @sec_ctx contains the security context information being provided by * the user-level SA generation program (e.g., setkey or racoon). - * @secid contains the secid from which to take the mls portion of the context. * Allocate a security structure to the x->security field; the security * field is initialized to NULL when the xfrm_state is allocated. Set the - * context to correspond to either sec_ctx or polsec, with the mls portion - * taken from secid in the latter case. - * Return 0 if operation was successful (memory to allocate, legal context). + * context to correspond to sec_ctx. Return 0 if operation was successful + * (memory to allocate, legal context). + * @xfrm_state_alloc_acquire: + * @x contains the xfrm_state being added to the Security Association + * Database by the XFRM system. + * @polsec contains the policy's security context. + * @secid contains the secid from which to take the mls portion of the + * context. + * Allocate a security structure to the x->security field; the security + * field is initialized to NULL when the xfrm_state is allocated. Set the + * context to correspond to secid. Return 0 if operation was successful + * (memory to allocate, legal context). * @xfrm_state_free_security: * @x contains the xfrm_state. * Deallocate x->security. @@ -1679,9 +1687,11 @@ struct security_operations { int (*xfrm_policy_clone_security) (struct xfrm_sec_ctx *old_ctx, struct xfrm_sec_ctx **new_ctx); void (*xfrm_policy_free_security) (struct xfrm_sec_ctx *ctx); int (*xfrm_policy_delete_security) (struct xfrm_sec_ctx *ctx); - int (*xfrm_state_alloc_security) (struct xfrm_state *x, - struct xfrm_user_sec_ctx *sec_ctx, - u32 secid); + int (*xfrm_state_alloc) (struct xfrm_state *x, + struct xfrm_user_sec_ctx *sec_ctx); + int (*xfrm_state_alloc_acquire) (struct xfrm_state *x, + struct xfrm_sec_ctx *polsec, + u32 secid); void (*xfrm_state_free_security) (struct xfrm_state *x); int (*xfrm_state_delete_security) (struct xfrm_state *x); int (*xfrm_policy_lookup) (struct xfrm_sec_ctx *ctx, u32 fl_secid, u8 dir); diff --git a/include/linux/slab.h b/include/linux/slab.h index 74f1058..c2bba24 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -53,7 +53,14 @@ * } * rcu_read_unlock(); * - * See also the comment on struct slab_rcu in mm/slab.c. + * This is useful if we need to approach a kernel structure obliquely, + * from its address obtained without the usual locking. We can lock + * the structure to stabilize it and check it's still at the given address, + * only if we can be sure that the memory has not been meanwhile reused + * for some other kind of object (which our subsystem's lock might corrupt). + * + * rcu_read_lock before reading the address, then rcu_read_unlock after + * taking the spinlock within the structure expected at that address. */ #define SLAB_DESTROY_BY_RCU 0x00080000UL /* Defer freeing slabs to RCU */ #define SLAB_MEM_SPREAD 0x00100000UL /* Spread some memory over cpuset */ diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index e9346b4..09bfffb 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h @@ -27,8 +27,8 @@ struct kmem_cache { size_t colour; /* cache colouring range */ unsigned int colour_off; /* colour offset */ - struct kmem_cache *slabp_cache; - unsigned int slab_size; + struct kmem_cache *freelist_cache; + unsigned int freelist_size; /* constructor func */ void (*ctor)(void *obj); diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index cc0b67e..f56bfa9 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -11,7 +11,7 @@ enum stat_item { ALLOC_FASTPATH, /* Allocation from cpu slab */ ALLOC_SLOWPATH, /* Allocation by getting a new cpu slab */ - FREE_FASTPATH, /* Free to cpu slub */ + FREE_FASTPATH, /* Free to cpu slab */ FREE_SLOWPATH, /* Freeing not to cpu slab */ FREE_FROZEN, /* Freeing to frozen slab */ FREE_ADD_PARTIAL, /* Freeing moves slab to partial list */ diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 4db2985..4836ba3 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -27,6 +27,12 @@ struct user_namespace { kuid_t owner; kgid_t group; unsigned int proc_inum; + + /* Register of per-UID persistent keyrings for this namespace */ +#ifdef CONFIG_PERSISTENT_KEYRINGS + struct key *persistent_keyring_register; + struct rw_semaphore persistent_keyring_register_sem; +#endif }; extern struct user_namespace init_user_ns; diff --git a/include/net/genetlink.h b/include/net/genetlink.h index ace4abf..1b177ed 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -265,7 +265,7 @@ static inline int genlmsg_multicast_netns(struct genl_family *family, struct net *net, struct sk_buff *skb, u32 portid, unsigned int group, gfp_t flags) { - if (group >= family->n_mcgrps) + if (WARN_ON_ONCE(group >= family->n_mcgrps)) return -EINVAL; group = family->mcgrp_offset + group; return nlmsg_multicast(net->genl_sock, skb, portid, group, flags); @@ -283,9 +283,6 @@ static inline int genlmsg_multicast(struct genl_family *family, struct sk_buff *skb, u32 portid, unsigned int group, gfp_t flags) { - if (group >= family->n_mcgrps) - return -EINVAL; - group = family->mcgrp_offset + group; return genlmsg_multicast_netns(family, &init_net, skb, portid, group, flags); } @@ -387,6 +384,9 @@ static inline struct sk_buff *genlmsg_new(size_t payload, gfp_t flags) static inline int genl_set_err(struct genl_family *family, struct net *net, u32 portid, u32 group, int code) { + if (WARN_ON_ONCE(group >= family->n_mcgrps)) + return -EINVAL; + group = family->mcgrp_offset + group; return netlink_set_err(net->genl_sock, portid, group, code); } diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h index 5ebe21c..39e0114 100644 --- a/include/target/target_core_backend.h +++ b/include/target/target_core_backend.h @@ -34,6 +34,11 @@ struct se_subsystem_api { sense_reason_t (*parse_cdb)(struct se_cmd *cmd); u32 (*get_device_type)(struct se_device *); sector_t (*get_blocks)(struct se_device *); + sector_t (*get_alignment_offset_lbas)(struct se_device *); + /* lbppbe = logical blocks per physical block exponent. see SBC-3 */ + unsigned int (*get_lbppbe)(struct se_device *); + unsigned int (*get_io_min)(struct se_device *); + unsigned int (*get_io_opt)(struct se_device *); unsigned char *(*get_sense_buffer)(struct se_cmd *); bool (*get_write_cache)(struct se_device *); }; diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index 5bdb8b7..45412a6 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -227,6 +227,7 @@ enum tcm_tmreq_table { /* fabric independent task management response values */ enum tcm_tmrsp_table { + TMR_FUNCTION_FAILED = 0, TMR_FUNCTION_COMPLETE = 1, TMR_TASK_DOES_NOT_EXIST = 2, TMR_LUN_DOES_NOT_EXIST = 3, @@ -282,11 +283,12 @@ struct t10_alua_lu_gp_member { struct t10_alua_tg_pt_gp { u16 tg_pt_gp_id; int tg_pt_gp_valid_id; + int tg_pt_gp_alua_supported_states; int tg_pt_gp_alua_access_status; int tg_pt_gp_alua_access_type; int tg_pt_gp_nonop_delay_msecs; int tg_pt_gp_trans_delay_msecs; - int tg_pt_gp_implict_trans_secs; + int tg_pt_gp_implicit_trans_secs; int tg_pt_gp_pref; int tg_pt_gp_write_metadata; /* Used by struct t10_alua_tg_pt_gp->tg_pt_gp_md_buf_len */ @@ -442,7 +444,6 @@ struct se_cmd { /* Used for sense data */ void *sense_buffer; struct list_head se_delayed_node; - struct list_head se_lun_node; struct list_head se_qf_node; struct se_device *se_dev; struct se_dev_entry *se_deve; @@ -470,15 +471,11 @@ struct se_cmd { #define CMD_T_SENT (1 << 4) #define CMD_T_STOP (1 << 5) #define CMD_T_FAILED (1 << 6) -#define CMD_T_LUN_STOP (1 << 7) -#define CMD_T_LUN_FE_STOP (1 << 8) -#define CMD_T_DEV_ACTIVE (1 << 9) -#define CMD_T_REQUEST_STOP (1 << 10) -#define CMD_T_BUSY (1 << 11) +#define CMD_T_DEV_ACTIVE (1 << 7) +#define CMD_T_REQUEST_STOP (1 << 8) +#define CMD_T_BUSY (1 << 9) spinlock_t t_state_lock; struct completion t_transport_stop_comp; - struct completion transport_lun_fe_stop_comp; - struct completion transport_lun_stop_comp; struct work_struct work; @@ -498,6 +495,9 @@ struct se_cmd { /* backend private data */ void *priv; + + /* Used for lun->lun_ref counting */ + bool lun_ref_active; }; struct se_ua { @@ -628,6 +628,34 @@ struct se_dev_attrib { struct config_group da_group; }; +struct se_port_stat_grps { + struct config_group stat_group; + struct config_group scsi_port_group; + struct config_group scsi_tgt_port_group; + struct config_group scsi_transport_group; +}; + +struct se_lun { +#define SE_LUN_LINK_MAGIC 0xffff7771 + u32 lun_link_magic; + /* See transport_lun_status_table */ + enum transport_lun_status_table lun_status; + u32 lun_access; + u32 lun_flags; + u32 unpacked_lun; + atomic_t lun_acl_count; + spinlock_t lun_acl_lock; + spinlock_t lun_sep_lock; + struct completion lun_shutdown_comp; + struct list_head lun_acl_list; + struct se_device *lun_se_dev; + struct se_port *lun_sep; + struct config_group lun_group; + struct se_port_stat_grps port_stat_grps; + struct completion lun_ref_comp; + struct percpu_ref lun_ref; +}; + struct se_dev_stat_grps { struct config_group stat_group; struct config_group scsi_dev_group; @@ -656,11 +684,10 @@ struct se_device { /* Pointer to transport specific device structure */ u32 dev_index; u64 creation_time; - u32 num_resets; - u64 num_cmds; - u64 read_bytes; - u64 write_bytes; - spinlock_t stats_lock; + atomic_long_t num_resets; + atomic_long_t num_cmds; + atomic_long_t read_bytes; + atomic_long_t write_bytes; /* Active commands on this virtual SE device */ atomic_t simple_cmds; atomic_t dev_ordered_id; @@ -711,6 +738,7 @@ struct se_device { struct se_subsystem_api *transport; /* Linked list for struct se_hba struct se_device list */ struct list_head dev_list; + struct se_lun xcopy_lun; }; struct se_hba { @@ -730,34 +758,6 @@ struct se_hba { struct se_subsystem_api *transport; }; -struct se_port_stat_grps { - struct config_group stat_group; - struct config_group scsi_port_group; - struct config_group scsi_tgt_port_group; - struct config_group scsi_transport_group; -}; - -struct se_lun { -#define SE_LUN_LINK_MAGIC 0xffff7771 - u32 lun_link_magic; - /* See transport_lun_status_table */ - enum transport_lun_status_table lun_status; - u32 lun_access; - u32 lun_flags; - u32 unpacked_lun; - atomic_t lun_acl_count; - spinlock_t lun_acl_lock; - spinlock_t lun_cmd_lock; - spinlock_t lun_sep_lock; - struct completion lun_shutdown_comp; - struct list_head lun_cmd_list; - struct list_head lun_acl_list; - struct se_device *lun_se_dev; - struct se_port *lun_sep; - struct config_group lun_group; - struct se_port_stat_grps port_stat_grps; -}; - struct scsi_port_stats { u64 cmd_pdus; u64 tx_data_octets; diff --git a/include/target/target_core_configfs.h b/include/target/target_core_configfs.h index 713c500..e080138 100644 --- a/include/target/target_core_configfs.h +++ b/include/target/target_core_configfs.h @@ -54,4 +54,3 @@ struct target_fabric_configfs { struct target_fabric_configfs_template tf_cit_tmpl; }; -#define TF_CIT_TMPL(tf) (&(tf)->tf_cit_tmpl) diff --git a/include/target/target_core_fabric.h b/include/target/target_core_fabric.h index 882b650e..4cf4fda 100644 --- a/include/target/target_core_fabric.h +++ b/include/target/target_core_fabric.h @@ -137,6 +137,8 @@ void transport_generic_request_failure(struct se_cmd *, sense_reason_t); void __target_execute_cmd(struct se_cmd *); int transport_lookup_tmr_lun(struct se_cmd *, u32); +struct se_node_acl *core_tpg_get_initiator_node_acl(struct se_portal_group *tpg, + unsigned char *); struct se_node_acl *core_tpg_check_initiator_node_acl(struct se_portal_group *, unsigned char *); void core_tpg_clear_object_luns(struct se_portal_group *); diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index f18b3b7..4832d75 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -162,12 +162,14 @@ DEFINE_EVENT(btrfs__inode, btrfs_inode_evict, { EXTENT_FLAG_LOGGING, "LOGGING" }, \ { EXTENT_FLAG_FILLING, "FILLING" }) -TRACE_EVENT(btrfs_get_extent, +TRACE_EVENT_CONDITION(btrfs_get_extent, TP_PROTO(struct btrfs_root *root, struct extent_map *map), TP_ARGS(root, map), + TP_CONDITION(map), + TP_STRUCT__entry( __field( u64, root_objectid ) __field( u64, start ) diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index db0b825..44b05a0 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -68,6 +68,9 @@ #define AUDIT_MAKE_EQUIV 1015 /* Append to watched tree */ #define AUDIT_TTY_GET 1016 /* Get TTY auditing status */ #define AUDIT_TTY_SET 1017 /* Set TTY auditing status */ +#define AUDIT_SET_FEATURE 1018 /* Turn an audit feature on or off */ +#define AUDIT_GET_FEATURE 1019 /* Get which features are enabled */ +#define AUDIT_FEATURE_CHANGE 1020 /* audit log listing feature changes */ #define AUDIT_FIRST_USER_MSG 1100 /* Userspace messages mostly uninteresting to kernel */ #define AUDIT_USER_AVC 1107 /* We filter this differently */ @@ -357,6 +360,12 @@ enum { #define AUDIT_PERM_READ 4 #define AUDIT_PERM_ATTR 8 +/* MAX_AUDIT_MESSAGE_LENGTH is set in audit:lib/libaudit.h as: + * 8970 // PATH_MAX*2+CONTEXT_SIZE*2+11+256+1 + * max header+body+tailer: 44 + 29 + 32 + 262 + 7 + pad + */ +#define AUDIT_MESSAGE_TEXT_MAX 8560 + struct audit_status { __u32 mask; /* Bit mask for valid entries */ __u32 enabled; /* 1 = enabled, 0 = disabled */ @@ -368,11 +377,28 @@ struct audit_status { __u32 backlog; /* messages waiting in queue */ }; +struct audit_features { +#define AUDIT_FEATURE_VERSION 1 + __u32 vers; + __u32 mask; /* which bits we are dealing with */ + __u32 features; /* which feature to enable/disable */ + __u32 lock; /* which features to lock */ +}; + +#define AUDIT_FEATURE_ONLY_UNSET_LOGINUID 0 +#define AUDIT_FEATURE_LOGINUID_IMMUTABLE 1 +#define AUDIT_LAST_FEATURE AUDIT_FEATURE_LOGINUID_IMMUTABLE + +#define audit_feature_valid(x) ((x) >= 0 && (x) <= AUDIT_LAST_FEATURE) +#define AUDIT_FEATURE_TO_MASK(x) (1 << ((x) & 31)) /* mask for __u32 */ + struct audit_tty_status { __u32 enabled; /* 1 = enabled, 0 = disabled */ __u32 log_passwd; /* 1 = enabled, 0 = disabled */ }; +#define AUDIT_UID_UNSET (unsigned int)-1 + /* audit_rule_data supports filter rules with both integer and string * fields. It corresponds with AUDIT_ADD_RULE, AUDIT_DEL_RULE and * AUDIT_LIST_RULES requests. diff --git a/include/uapi/linux/hash_info.h b/include/uapi/linux/hash_info.h new file mode 100644 index 0000000..ca18c45 --- /dev/null +++ b/include/uapi/linux/hash_info.h @@ -0,0 +1,37 @@ +/* + * Hash Info: Hash algorithms information + * + * Copyright (c) 2013 Dmitry Kasatkin <d.kasatkin@samsung.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ + +#ifndef _UAPI_LINUX_HASH_INFO_H +#define _UAPI_LINUX_HASH_INFO_H + +enum hash_algo { + HASH_ALGO_MD4, + HASH_ALGO_MD5, + HASH_ALGO_SHA1, + HASH_ALGO_RIPE_MD_160, + HASH_ALGO_SHA256, + HASH_ALGO_SHA384, + HASH_ALGO_SHA512, + HASH_ALGO_SHA224, + HASH_ALGO_RIPE_MD_128, + HASH_ALGO_RIPE_MD_256, + HASH_ALGO_RIPE_MD_320, + HASH_ALGO_WP_256, + HASH_ALGO_WP_384, + HASH_ALGO_WP_512, + HASH_ALGO_TGR_128, + HASH_ALGO_TGR_160, + HASH_ALGO_TGR_192, + HASH_ALGO__LAST +}; + +#endif /* _UAPI_LINUX_HASH_INFO_H */ diff --git a/include/uapi/linux/keyctl.h b/include/uapi/linux/keyctl.h index c9b7f4fa..840cb99 100644 --- a/include/uapi/linux/keyctl.h +++ b/include/uapi/linux/keyctl.h @@ -56,5 +56,6 @@ #define KEYCTL_REJECT 19 /* reject a partially constructed key */ #define KEYCTL_INSTANTIATE_IOV 20 /* instantiate a partially constructed key */ #define KEYCTL_INVALIDATE 21 /* invalidate a key */ +#define KEYCTL_GET_PERSISTENT 22 /* get a user's persistent keyring */ #endif /* _LINUX_KEYCTL_H */ diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 0890556..4a98e85 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -13,10 +13,10 @@ * PCI to PCI Bridge Specification * PCI System Design Guide * - * For hypertransport information, please consult the following manuals - * from http://www.hypertransport.org + * For HyperTransport information, please consult the following manuals + * from http://www.hypertransport.org * - * The Hypertransport I/O Link Specification + * The HyperTransport I/O Link Specification */ #ifndef LINUX_PCI_REGS_H @@ -37,7 +37,7 @@ #define PCI_COMMAND_INVALIDATE 0x10 /* Use memory write and invalidate */ #define PCI_COMMAND_VGA_PALETTE 0x20 /* Enable palette snooping */ #define PCI_COMMAND_PARITY 0x40 /* Enable parity checking */ -#define PCI_COMMAND_WAIT 0x80 /* Enable address/data stepping */ +#define PCI_COMMAND_WAIT 0x80 /* Enable address/data stepping */ #define PCI_COMMAND_SERR 0x100 /* Enable SERR */ #define PCI_COMMAND_FAST_BACK 0x200 /* Enable back-to-back writes */ #define PCI_COMMAND_INTX_DISABLE 0x400 /* INTx Emulation Disable */ @@ -45,7 +45,7 @@ #define PCI_STATUS 0x06 /* 16 bits */ #define PCI_STATUS_INTERRUPT 0x08 /* Interrupt status */ #define PCI_STATUS_CAP_LIST 0x10 /* Support Capability List */ -#define PCI_STATUS_66MHZ 0x20 /* Support 66 Mhz PCI 2.1 bus */ +#define PCI_STATUS_66MHZ 0x20 /* Support 66 MHz PCI 2.1 bus */ #define PCI_STATUS_UDF 0x40 /* Support User Definable Features [obsolete] */ #define PCI_STATUS_FAST_BACK 0x80 /* Accept fast-back to back */ #define PCI_STATUS_PARITY 0x100 /* Detected parity error */ @@ -205,14 +205,14 @@ #define PCI_CAP_ID_CHSWP 0x06 /* CompactPCI HotSwap */ #define PCI_CAP_ID_PCIX 0x07 /* PCI-X */ #define PCI_CAP_ID_HT 0x08 /* HyperTransport */ -#define PCI_CAP_ID_VNDR 0x09 /* Vendor specific */ +#define PCI_CAP_ID_VNDR 0x09 /* Vendor-Specific */ #define PCI_CAP_ID_DBG 0x0A /* Debug port */ #define PCI_CAP_ID_CCRC 0x0B /* CompactPCI Central Resource Control */ -#define PCI_CAP_ID_SHPC 0x0C /* PCI Standard Hot-Plug Controller */ +#define PCI_CAP_ID_SHPC 0x0C /* PCI Standard Hot-Plug Controller */ #define PCI_CAP_ID_SSVID 0x0D /* Bridge subsystem vendor/device ID */ #define PCI_CAP_ID_AGP3 0x0E /* AGP Target PCI-PCI bridge */ #define PCI_CAP_ID_SECDEV 0x0F /* Secure Device */ -#define PCI_CAP_ID_EXP 0x10 /* PCI Express */ +#define PCI_CAP_ID_EXP 0x10 /* PCI Express */ #define PCI_CAP_ID_MSIX 0x11 /* MSI-X */ #define PCI_CAP_ID_SATA 0x12 /* SATA Data/Index Conf. */ #define PCI_CAP_ID_AF 0x13 /* PCI Advanced Features */ @@ -268,8 +268,8 @@ #define PCI_AGP_COMMAND_RQ_MASK 0xff000000 /* Master: Maximum number of requests */ #define PCI_AGP_COMMAND_SBA 0x0200 /* Sideband addressing enabled */ #define PCI_AGP_COMMAND_AGP 0x0100 /* Allow processing of AGP transactions */ -#define PCI_AGP_COMMAND_64BIT 0x0020 /* Allow processing of 64-bit addresses */ -#define PCI_AGP_COMMAND_FW 0x0010 /* Force FW transfers */ +#define PCI_AGP_COMMAND_64BIT 0x0020 /* Allow processing of 64-bit addresses */ +#define PCI_AGP_COMMAND_FW 0x0010 /* Force FW transfers */ #define PCI_AGP_COMMAND_RATE4 0x0004 /* Use 4x rate */ #define PCI_AGP_COMMAND_RATE2 0x0002 /* Use 2x rate */ #define PCI_AGP_COMMAND_RATE1 0x0001 /* Use 1x rate */ @@ -321,7 +321,7 @@ #define PCI_MSIX_PBA_OFFSET 0xfffffff8 /* Offset into specified BAR */ #define PCI_CAP_MSIX_SIZEOF 12 /* size of MSIX registers */ -/* MSI-X entry's format */ +/* MSI-X Table entry format */ #define PCI_MSIX_ENTRY_SIZE 16 #define PCI_MSIX_ENTRY_LOWER_ADDR 0 #define PCI_MSIX_ENTRY_UPPER_ADDR 4 @@ -372,7 +372,7 @@ #define PCI_X_CMD_SPLIT_16 0x0060 /* Max 16 */ #define PCI_X_CMD_SPLIT_32 0x0070 /* Max 32 */ #define PCI_X_CMD_MAX_SPLIT 0x0070 /* Max Outstanding Split Transactions */ -#define PCI_X_CMD_VERSION(x) (((x) >> 12) & 3) /* Version */ +#define PCI_X_CMD_VERSION(x) (((x) >> 12) & 3) /* Version */ #define PCI_X_STATUS 4 /* PCI-X capabilities */ #define PCI_X_STATUS_DEVFN 0x000000ff /* A copy of devfn */ #define PCI_X_STATUS_BUS 0x0000ff00 /* A copy of bus nr */ @@ -407,8 +407,8 @@ /* PCI Bridge Subsystem ID registers */ -#define PCI_SSVID_VENDOR_ID 4 /* PCI-Bridge subsystem vendor id register */ -#define PCI_SSVID_DEVICE_ID 6 /* PCI-Bridge subsystem device id register */ +#define PCI_SSVID_VENDOR_ID 4 /* PCI Bridge subsystem vendor ID */ +#define PCI_SSVID_DEVICE_ID 6 /* PCI Bridge subsystem device ID */ /* PCI Express capability registers */ @@ -484,12 +484,12 @@ #define PCI_EXP_LNKCTL_CLKREQ_EN 0x0100 /* Enable clkreq */ #define PCI_EXP_LNKCTL_HAWD 0x0200 /* Hardware Autonomous Width Disable */ #define PCI_EXP_LNKCTL_LBMIE 0x0400 /* Link Bandwidth Management Interrupt Enable */ -#define PCI_EXP_LNKCTL_LABIE 0x0800 /* Lnk Autonomous Bandwidth Interrupt Enable */ +#define PCI_EXP_LNKCTL_LABIE 0x0800 /* Link Autonomous Bandwidth Interrupt Enable */ #define PCI_EXP_LNKSTA 18 /* Link Status */ #define PCI_EXP_LNKSTA_CLS 0x000f /* Current Link Speed */ #define PCI_EXP_LNKSTA_CLS_2_5GB 0x0001 /* Current Link Speed 2.5GT/s */ #define PCI_EXP_LNKSTA_CLS_5_0GB 0x0002 /* Current Link Speed 5.0GT/s */ -#define PCI_EXP_LNKSTA_NLW 0x03f0 /* Nogotiated Link Width */ +#define PCI_EXP_LNKSTA_NLW 0x03f0 /* Negotiated Link Width */ #define PCI_EXP_LNKSTA_NLW_SHIFT 4 /* start of NLW mask in link status */ #define PCI_EXP_LNKSTA_LT 0x0800 /* Link Training */ #define PCI_EXP_LNKSTA_SLC 0x1000 /* Slot Clock Configuration */ @@ -593,7 +593,7 @@ #define PCI_EXT_CAP_ID_MFVC 0x08 /* Multi-Function VC Capability */ #define PCI_EXT_CAP_ID_VC9 0x09 /* same as _VC */ #define PCI_EXT_CAP_ID_RCRB 0x0A /* Root Complex RB? */ -#define PCI_EXT_CAP_ID_VNDR 0x0B /* Vendor Specific */ +#define PCI_EXT_CAP_ID_VNDR 0x0B /* Vendor-Specific */ #define PCI_EXT_CAP_ID_CAC 0x0C /* Config Access - obsolete */ #define PCI_EXT_CAP_ID_ACS 0x0D /* Access Control Services */ #define PCI_EXT_CAP_ID_ARI 0x0E /* Alternate Routing ID */ @@ -602,12 +602,12 @@ #define PCI_EXT_CAP_ID_MRIOV 0x11 /* Multi Root I/O Virtualization */ #define PCI_EXT_CAP_ID_MCAST 0x12 /* Multicast */ #define PCI_EXT_CAP_ID_PRI 0x13 /* Page Request Interface */ -#define PCI_EXT_CAP_ID_AMD_XXX 0x14 /* reserved for AMD */ -#define PCI_EXT_CAP_ID_REBAR 0x15 /* resizable BAR */ -#define PCI_EXT_CAP_ID_DPA 0x16 /* dynamic power alloc */ -#define PCI_EXT_CAP_ID_TPH 0x17 /* TPH request */ -#define PCI_EXT_CAP_ID_LTR 0x18 /* latency tolerance reporting */ -#define PCI_EXT_CAP_ID_SECPCI 0x19 /* Secondary PCIe */ +#define PCI_EXT_CAP_ID_AMD_XXX 0x14 /* Reserved for AMD */ +#define PCI_EXT_CAP_ID_REBAR 0x15 /* Resizable BAR */ +#define PCI_EXT_CAP_ID_DPA 0x16 /* Dynamic Power Allocation */ +#define PCI_EXT_CAP_ID_TPH 0x17 /* TPH Requester */ +#define PCI_EXT_CAP_ID_LTR 0x18 /* Latency Tolerance Reporting */ +#define PCI_EXT_CAP_ID_SECPCI 0x19 /* Secondary PCIe Capability */ #define PCI_EXT_CAP_ID_PMUX 0x1A /* Protocol Multiplexing */ #define PCI_EXT_CAP_ID_PASID 0x1B /* Process Address Space ID */ #define PCI_EXT_CAP_ID_MAX PCI_EXT_CAP_ID_PASID @@ -667,9 +667,9 @@ #define PCI_ERR_ROOT_COR_RCV 0x00000001 /* ERR_COR Received */ /* Multi ERR_COR Received */ #define PCI_ERR_ROOT_MULTI_COR_RCV 0x00000002 -/* ERR_FATAL/NONFATAL Recevied */ +/* ERR_FATAL/NONFATAL Received */ #define PCI_ERR_ROOT_UNCOR_RCV 0x00000004 -/* Multi ERR_FATAL/NONFATAL Recevied */ +/* Multi ERR_FATAL/NONFATAL Received */ #define PCI_ERR_ROOT_MULTI_UNCOR_RCV 0x00000008 #define PCI_ERR_ROOT_FIRST_FATAL 0x00000010 /* First Fatal */ #define PCI_ERR_ROOT_NONFATAL_RCV 0x00000020 /* Non-Fatal Received */ @@ -678,7 +678,7 @@ /* Virtual Channel */ #define PCI_VC_PORT_REG1 4 -#define PCI_VC_REG1_EVCC 0x7 /* extended vc count */ +#define PCI_VC_REG1_EVCC 0x7 /* extended VC count */ #define PCI_VC_PORT_REG2 8 #define PCI_VC_REG2_32_PHASE 0x2 #define PCI_VC_REG2_64_PHASE 0x4 @@ -711,7 +711,7 @@ #define PCI_VNDR_HEADER_LEN(x) (((x) >> 20) & 0xfff) /* - * Hypertransport sub capability types + * HyperTransport sub capability types * * Unfortunately there are both 3 bit and 5 bit capability types defined * in the HT spec, catering for that is a little messy. You probably don't @@ -739,8 +739,8 @@ #define HT_CAPTYPE_DIRECT_ROUTE 0xB0 /* Direct routing configuration */ #define HT_CAPTYPE_VCSET 0xB8 /* Virtual Channel configuration */ #define HT_CAPTYPE_ERROR_RETRY 0xC0 /* Retry on error configuration */ -#define HT_CAPTYPE_GEN3 0xD0 /* Generation 3 hypertransport configuration */ -#define HT_CAPTYPE_PM 0xE0 /* Hypertransport powermanagement configuration */ +#define HT_CAPTYPE_GEN3 0xD0 /* Generation 3 HyperTransport configuration */ +#define HT_CAPTYPE_PM 0xE0 /* HyperTransport power management configuration */ #define HT_CAP_SIZEOF_LONG 28 /* slave & primary */ #define HT_CAP_SIZEOF_SHORT 24 /* host & secondary */ @@ -777,14 +777,14 @@ #define PCI_PRI_ALLOC_REQ 0x0c /* PRI max reqs allowed */ #define PCI_EXT_CAP_PRI_SIZEOF 16 -/* PASID capability */ +/* Process Address Space ID */ #define PCI_PASID_CAP 0x04 /* PASID feature register */ #define PCI_PASID_CAP_EXEC 0x02 /* Exec permissions Supported */ -#define PCI_PASID_CAP_PRIV 0x04 /* Priviledge Mode Supported */ +#define PCI_PASID_CAP_PRIV 0x04 /* Privilege Mode Supported */ #define PCI_PASID_CTRL 0x06 /* PASID control register */ #define PCI_PASID_CTRL_ENABLE 0x01 /* Enable bit */ #define PCI_PASID_CTRL_EXEC 0x02 /* Exec permissions Enable */ -#define PCI_PASID_CTRL_PRIV 0x04 /* Priviledge Mode Enable */ +#define PCI_PASID_CTRL_PRIV 0x04 /* Privilege Mode Enable */ #define PCI_EXT_CAP_PASID_SIZEOF 8 /* Single Root I/O Virtualization */ @@ -839,22 +839,22 @@ #define PCI_ACS_CTRL 0x06 /* ACS Control Register */ #define PCI_ACS_EGRESS_CTL_V 0x08 /* ACS Egress Control Vector */ -#define PCI_VSEC_HDR 4 /* extended cap - vendor specific */ +#define PCI_VSEC_HDR 4 /* extended cap - vendor-specific */ #define PCI_VSEC_HDR_LEN_SHIFT 20 /* shift for length field */ -/* sata capability */ +/* SATA capability */ #define PCI_SATA_REGS 4 /* SATA REGs specifier */ #define PCI_SATA_REGS_MASK 0xF /* location - BAR#/inline */ #define PCI_SATA_REGS_INLINE 0xF /* REGS in config space */ #define PCI_SATA_SIZEOF_SHORT 8 #define PCI_SATA_SIZEOF_LONG 16 -/* resizable BARs */ +/* Resizable BARs */ #define PCI_REBAR_CTRL 8 /* control register */ #define PCI_REBAR_CTRL_NBAR_MASK (7 << 5) /* mask for # bars */ #define PCI_REBAR_CTRL_NBAR_SHIFT 5 /* shift for # bars */ -/* dynamic power allocation */ +/* Dynamic Power Allocation */ #define PCI_DPA_CAP 4 /* capability register */ #define PCI_DPA_CAP_SUBSTATE_MASK 0x1F /* # substates - 1 */ #define PCI_DPA_BASE_SIZEOF 16 /* size with 0 substates */ diff --git a/init/Kconfig b/init/Kconfig index 3fc8a2f..79383d3 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -301,20 +301,6 @@ config AUDIT_TREE depends on AUDITSYSCALL select FSNOTIFY -config AUDIT_LOGINUID_IMMUTABLE - bool "Make audit loginuid immutable" - depends on AUDIT - help - The config option toggles if a task setting its loginuid requires - CAP_SYS_AUDITCONTROL or if that task should require no special permissions - but should instead only allow setting its loginuid if it was never - previously set. On systems which use systemd or a similar central - process to restart login services this should be set to true. On older - systems in which an admin would typically have to directly stop and - start processes this should be set to false. Setting this to true allows - one to drop potentially dangerous capabilites from the login tasks, - but may not be backwards compatible with older init systems. - source "kernel/irq/Kconfig" source "kernel/time/Kconfig" @@ -1669,6 +1655,18 @@ config BASE_SMALL default 0 if BASE_FULL default 1 if !BASE_FULL +config SYSTEM_TRUSTED_KEYRING + bool "Provide system-wide ring of trusted keys" + depends on KEYS + help + Provide a system keyring to which trusted keys can be added. Keys in + the keyring are considered to be trusted. Keys may be added at will + by the kernel from compiled-in data and from hardware key stores, but + userspace may only add extra keys if those keys can be verified by + keys already in the keyring. + + Keys in this keyring are used by module signature checking. + menuconfig MODULES bool "Enable loadable module support" option modules @@ -1742,6 +1740,7 @@ config MODULE_SRCVERSION_ALL config MODULE_SIG bool "Module signature verification" depends on MODULES + select SYSTEM_TRUSTED_KEYRING select KEYS select CRYPTO select ASYMMETRIC_KEY_TYPE @@ -208,15 +208,18 @@ static void shm_open(struct vm_area_struct *vma) */ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) { + struct file *shm_file; + + shm_file = shp->shm_file; + shp->shm_file = NULL; ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT; shm_rmid(ns, shp); shm_unlock(shp); - if (!is_file_hugepages(shp->shm_file)) - shmem_lock(shp->shm_file, 0, shp->mlock_user); + if (!is_file_hugepages(shm_file)) + shmem_lock(shm_file, 0, shp->mlock_user); else if (shp->mlock_user) - user_shm_unlock(file_inode(shp->shm_file)->i_size, - shp->mlock_user); - fput (shp->shm_file); + user_shm_unlock(file_inode(shm_file)->i_size, shp->mlock_user); + fput(shm_file); ipc_rcu_putref(shp, shm_rcu_free); } @@ -974,15 +977,25 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf) ipc_lock_object(&shp->shm_perm); if (!ns_capable(ns->user_ns, CAP_IPC_LOCK)) { kuid_t euid = current_euid(); - err = -EPERM; if (!uid_eq(euid, shp->shm_perm.uid) && - !uid_eq(euid, shp->shm_perm.cuid)) + !uid_eq(euid, shp->shm_perm.cuid)) { + err = -EPERM; goto out_unlock0; - if (cmd == SHM_LOCK && !rlimit(RLIMIT_MEMLOCK)) + } + if (cmd == SHM_LOCK && !rlimit(RLIMIT_MEMLOCK)) { + err = -EPERM; goto out_unlock0; + } } shm_file = shp->shm_file; + + /* check if shm_destroy() is tearing down shp */ + if (shm_file == NULL) { + err = -EIDRM; + goto out_unlock0; + } + if (is_file_hugepages(shm_file)) goto out_unlock0; @@ -1101,6 +1114,14 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr, goto out_unlock; ipc_lock_object(&shp->shm_perm); + + /* check if shm_destroy() is tearing down shp */ + if (shp->shm_file == NULL) { + ipc_unlock_object(&shp->shm_perm); + err = -EIDRM; + goto out_unlock; + } + path = shp->shm_file->f_path; path_get(&path); shp->shm_nattch++; diff --git a/kernel/Makefile b/kernel/Makefile index 09a9c94..bbaf7d5 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -41,8 +41,9 @@ ifneq ($(CONFIG_SMP),y) obj-y += up.o endif obj-$(CONFIG_UID16) += uid16.o +obj-$(CONFIG_SYSTEM_TRUSTED_KEYRING) += system_keyring.o system_certificates.o obj-$(CONFIG_MODULES) += module.o -obj-$(CONFIG_MODULE_SIG) += module_signing.o modsign_pubkey.o modsign_certificate.o +obj-$(CONFIG_MODULE_SIG) += module_signing.o obj-$(CONFIG_KALLSYMS) += kallsyms.o obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o obj-$(CONFIG_KEXEC) += kexec.o @@ -122,19 +123,52 @@ targets += timeconst.h $(obj)/timeconst.h: $(obj)/hz.bc $(src)/timeconst.bc FORCE $(call if_changed,bc) -ifeq ($(CONFIG_MODULE_SIG),y) +############################################################################### +# +# Roll all the X.509 certificates that we can find together and pull them into +# the kernel so that they get loaded into the system trusted keyring during +# boot. # -# Pull the signing certificate and any extra certificates into the kernel +# We look in the source root and the build root for all files whose name ends +# in ".x509". Unfortunately, this will generate duplicate filenames, so we +# have make canonicalise the pathnames and then sort them to discard the +# duplicates. # +############################################################################### +ifeq ($(CONFIG_SYSTEM_TRUSTED_KEYRING),y) +X509_CERTIFICATES-y := $(wildcard *.x509) $(wildcard $(srctree)/*.x509) +X509_CERTIFICATES-$(CONFIG_MODULE_SIG) += signing_key.x509 +X509_CERTIFICATES := $(sort $(foreach CERT,$(X509_CERTIFICATES-y), \ + $(or $(realpath $(CERT)),$(CERT)))) + +ifeq ($(X509_CERTIFICATES),) +$(warning *** No X.509 certificates found ***) +endif + +ifneq ($(wildcard $(obj)/.x509.list),) +ifneq ($(shell cat $(obj)/.x509.list),$(X509_CERTIFICATES)) +$(info X.509 certificate list changed) +$(shell rm $(obj)/.x509.list) +endif +endif + +kernel/system_certificates.o: $(obj)/x509_certificate_list -quiet_cmd_touch = TOUCH $@ - cmd_touch = touch $@ +quiet_cmd_x509certs = CERTS $@ + cmd_x509certs = cat $(X509_CERTIFICATES) /dev/null >$@ $(foreach X509,$(X509_CERTIFICATES),; echo " - Including cert $(X509)") -extra_certificates: - $(call cmd,touch) +targets += $(obj)/x509_certificate_list +$(obj)/x509_certificate_list: $(X509_CERTIFICATES) $(obj)/.x509.list + $(call if_changed,x509certs) -kernel/modsign_certificate.o: signing_key.x509 extra_certificates +targets += $(obj)/.x509.list +$(obj)/.x509.list: + @echo $(X509_CERTIFICATES) >$@ +clean-files := x509_certificate_list .x509.list +endif + +ifeq ($(CONFIG_MODULE_SIG),y) ############################################################################### # # If module signing is requested, say by allyesconfig, but a key has not been diff --git a/kernel/audit.c b/kernel/audit.c index 7b0e23a..906ae5a0 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -60,7 +60,6 @@ #ifdef CONFIG_SECURITY #include <linux/security.h> #endif -#include <net/netlink.h> #include <linux/freezer.h> #include <linux/tty.h> #include <linux/pid_namespace.h> @@ -140,6 +139,17 @@ static struct task_struct *kauditd_task; static DECLARE_WAIT_QUEUE_HEAD(kauditd_wait); static DECLARE_WAIT_QUEUE_HEAD(audit_backlog_wait); +static struct audit_features af = {.vers = AUDIT_FEATURE_VERSION, + .mask = -1, + .features = 0, + .lock = 0,}; + +static char *audit_feature_names[2] = { + "only_unset_loginuid", + "loginuid_immutable", +}; + + /* Serialize requests from userspace. */ DEFINE_MUTEX(audit_cmd_mutex); @@ -584,6 +594,8 @@ static int audit_netlink_ok(struct sk_buff *skb, u16 msg_type) return -EOPNOTSUPP; case AUDIT_GET: case AUDIT_SET: + case AUDIT_GET_FEATURE: + case AUDIT_SET_FEATURE: case AUDIT_LIST_RULES: case AUDIT_ADD_RULE: case AUDIT_DEL_RULE: @@ -613,7 +625,7 @@ static int audit_log_common_recv_msg(struct audit_buffer **ab, u16 msg_type) int rc = 0; uid_t uid = from_kuid(&init_user_ns, current_uid()); - if (!audit_enabled) { + if (!audit_enabled && msg_type != AUDIT_USER_AVC) { *ab = NULL; return rc; } @@ -628,6 +640,94 @@ static int audit_log_common_recv_msg(struct audit_buffer **ab, u16 msg_type) return rc; } +int is_audit_feature_set(int i) +{ + return af.features & AUDIT_FEATURE_TO_MASK(i); +} + + +static int audit_get_feature(struct sk_buff *skb) +{ + u32 seq; + + seq = nlmsg_hdr(skb)->nlmsg_seq; + + audit_send_reply(NETLINK_CB(skb).portid, seq, AUDIT_GET, 0, 0, + &af, sizeof(af)); + + return 0; +} + +static void audit_log_feature_change(int which, u32 old_feature, u32 new_feature, + u32 old_lock, u32 new_lock, int res) +{ + struct audit_buffer *ab; + + ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_FEATURE_CHANGE); + audit_log_format(ab, "feature=%s new=%d old=%d old_lock=%d new_lock=%d res=%d", + audit_feature_names[which], !!old_feature, !!new_feature, + !!old_lock, !!new_lock, res); + audit_log_end(ab); +} + +static int audit_set_feature(struct sk_buff *skb) +{ + struct audit_features *uaf; + int i; + + BUILD_BUG_ON(AUDIT_LAST_FEATURE + 1 > sizeof(audit_feature_names)/sizeof(audit_feature_names[0])); + uaf = nlmsg_data(nlmsg_hdr(skb)); + + /* if there is ever a version 2 we should handle that here */ + + for (i = 0; i <= AUDIT_LAST_FEATURE; i++) { + u32 feature = AUDIT_FEATURE_TO_MASK(i); + u32 old_feature, new_feature, old_lock, new_lock; + + /* if we are not changing this feature, move along */ + if (!(feature & uaf->mask)) + continue; + + old_feature = af.features & feature; + new_feature = uaf->features & feature; + new_lock = (uaf->lock | af.lock) & feature; + old_lock = af.lock & feature; + + /* are we changing a locked feature? */ + if ((af.lock & feature) && (new_feature != old_feature)) { + audit_log_feature_change(i, old_feature, new_feature, + old_lock, new_lock, 0); + return -EPERM; + } + } + /* nothing invalid, do the changes */ + for (i = 0; i <= AUDIT_LAST_FEATURE; i++) { + u32 feature = AUDIT_FEATURE_TO_MASK(i); + u32 old_feature, new_feature, old_lock, new_lock; + + /* if we are not changing this feature, move along */ + if (!(feature & uaf->mask)) + continue; + + old_feature = af.features & feature; + new_feature = uaf->features & feature; + old_lock = af.lock & feature; + new_lock = (uaf->lock | af.lock) & feature; + + if (new_feature != old_feature) + audit_log_feature_change(i, old_feature, new_feature, + old_lock, new_lock, 1); + + if (new_feature) + af.features |= feature; + else + af.features &= ~feature; + af.lock |= new_lock; + } + + return 0; +} + static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) { u32 seq; @@ -659,6 +759,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) switch (msg_type) { case AUDIT_GET: + memset(&status_set, 0, sizeof(status_set)); status_set.enabled = audit_enabled; status_set.failure = audit_failure; status_set.pid = audit_pid; @@ -670,7 +771,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) &status_set, sizeof(status_set)); break; case AUDIT_SET: - if (nlh->nlmsg_len < sizeof(struct audit_status)) + if (nlmsg_len(nlh) < sizeof(struct audit_status)) return -EINVAL; status_get = (struct audit_status *)data; if (status_get->mask & AUDIT_STATUS_ENABLED) { @@ -699,6 +800,16 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (status_get->mask & AUDIT_STATUS_BACKLOG_LIMIT) err = audit_set_backlog_limit(status_get->backlog_limit); break; + case AUDIT_GET_FEATURE: + err = audit_get_feature(skb); + if (err) + return err; + break; + case AUDIT_SET_FEATURE: + err = audit_set_feature(skb); + if (err) + return err; + break; case AUDIT_USER: case AUDIT_FIRST_USER_MSG ... AUDIT_LAST_USER_MSG: case AUDIT_FIRST_USER_MSG2 ... AUDIT_LAST_USER_MSG2: @@ -715,7 +826,8 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) } audit_log_common_recv_msg(&ab, msg_type); if (msg_type != AUDIT_USER_TTY) - audit_log_format(ab, " msg='%.1024s'", + audit_log_format(ab, " msg='%.*s'", + AUDIT_MESSAGE_TEXT_MAX, (char *)data); else { int size; @@ -818,7 +930,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) struct task_struct *tsk = current; spin_lock(&tsk->sighand->siglock); - s.enabled = tsk->signal->audit_tty != 0; + s.enabled = tsk->signal->audit_tty; s.log_passwd = tsk->signal->audit_tty_log_passwd; spin_unlock(&tsk->sighand->siglock); @@ -832,7 +944,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) memset(&s, 0, sizeof(s)); /* guard against past and future API changes */ - memcpy(&s, data, min(sizeof(s), (size_t)nlh->nlmsg_len)); + memcpy(&s, data, min_t(size_t, sizeof(s), nlmsg_len(nlh))); if ((s.enabled != 0 && s.enabled != 1) || (s.log_passwd != 0 && s.log_passwd != 1)) return -EINVAL; @@ -1067,13 +1179,6 @@ static void wait_for_auditd(unsigned long sleep_time) remove_wait_queue(&audit_backlog_wait, &wait); } -/* Obtain an audit buffer. This routine does locking to obtain the - * audit buffer, but then no locking is required for calls to - * audit_log_*format. If the tsk is a task that is currently in a - * syscall, then the syscall is marked as auditable and an audit record - * will be written at syscall exit. If there is no associated task, tsk - * should be NULL. */ - /** * audit_log_start - obtain an audit buffer * @ctx: audit_context (may be NULL) @@ -1389,7 +1494,7 @@ void audit_log_session_info(struct audit_buffer *ab) u32 sessionid = audit_get_sessionid(current); uid_t auid = from_kuid(&init_user_ns, audit_get_loginuid(current)); - audit_log_format(ab, " auid=%u ses=%u\n", auid, sessionid); + audit_log_format(ab, " auid=%u ses=%u", auid, sessionid); } void audit_log_key(struct audit_buffer *ab, char *key) @@ -1536,6 +1641,26 @@ void audit_log_name(struct audit_context *context, struct audit_names *n, } } + /* log the audit_names record type */ + audit_log_format(ab, " nametype="); + switch(n->type) { + case AUDIT_TYPE_NORMAL: + audit_log_format(ab, "NORMAL"); + break; + case AUDIT_TYPE_PARENT: + audit_log_format(ab, "PARENT"); + break; + case AUDIT_TYPE_CHILD_DELETE: + audit_log_format(ab, "DELETE"); + break; + case AUDIT_TYPE_CHILD_CREATE: + audit_log_format(ab, "CREATE"); + break; + default: + audit_log_format(ab, "UNKNOWN"); + break; + } + audit_log_fcaps(ab, n); audit_log_end(ab); } diff --git a/kernel/audit.h b/kernel/audit.h index 123c9b7..b779642 100644 --- a/kernel/audit.h +++ b/kernel/audit.h @@ -197,6 +197,9 @@ struct audit_context { int fd; int flags; } mmap; + struct { + int argc; + } execve; }; int fds[2]; diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index f7aee8b..51f3fd4 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -343,6 +343,7 @@ static int audit_field_valid(struct audit_entry *entry, struct audit_field *f) case AUDIT_DEVMINOR: case AUDIT_EXIT: case AUDIT_SUCCESS: + case AUDIT_INODE: /* bit ops are only useful on syscall args */ if (f->op == Audit_bitmask || f->op == Audit_bittest) return -EINVAL; @@ -423,7 +424,7 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, f->lsm_rule = NULL; /* Support legacy tests for a valid loginuid */ - if ((f->type == AUDIT_LOGINUID) && (f->val == ~0U)) { + if ((f->type == AUDIT_LOGINUID) && (f->val == AUDIT_UID_UNSET)) { f->type = AUDIT_LOGINUID_SET; f->val = 0; } diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 9845cb3..90594c9 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -95,13 +95,6 @@ struct audit_aux_data { /* Number of target pids per aux struct. */ #define AUDIT_AUX_PIDS 16 -struct audit_aux_data_execve { - struct audit_aux_data d; - int argc; - int envc; - struct mm_struct *mm; -}; - struct audit_aux_data_pids { struct audit_aux_data d; pid_t target_pid[AUDIT_AUX_PIDS]; @@ -121,12 +114,6 @@ struct audit_aux_data_bprm_fcaps { struct audit_cap_data new_pcap; }; -struct audit_aux_data_capset { - struct audit_aux_data d; - pid_t pid; - struct audit_cap_data cap; -}; - struct audit_tree_refs { struct audit_tree_refs *next; struct audit_chunk *c[31]; @@ -566,7 +553,7 @@ static int audit_filter_rules(struct task_struct *tsk, break; case AUDIT_INODE: if (name) - result = (name->ino == f->val); + result = audit_comparator(name->ino, f->op, f->val); else if (ctx) { list_for_each_entry(n, &ctx->names_list, list) { if (audit_comparator(n->ino, f->op, f->val)) { @@ -943,8 +930,10 @@ int audit_alloc(struct task_struct *tsk) return 0; /* Return if not auditing. */ state = audit_filter_task(tsk, &key); - if (state == AUDIT_DISABLED) + if (state == AUDIT_DISABLED) { + clear_tsk_thread_flag(tsk, TIF_SYSCALL_AUDIT); return 0; + } if (!(context = audit_alloc_context(state))) { kfree(key); @@ -1149,20 +1138,16 @@ static int audit_log_single_execve_arg(struct audit_context *context, } static void audit_log_execve_info(struct audit_context *context, - struct audit_buffer **ab, - struct audit_aux_data_execve *axi) + struct audit_buffer **ab) { int i, len; size_t len_sent = 0; const char __user *p; char *buf; - if (axi->mm != current->mm) - return; /* execve failed, no additional info */ - - p = (const char __user *)axi->mm->arg_start; + p = (const char __user *)current->mm->arg_start; - audit_log_format(*ab, "argc=%d", axi->argc); + audit_log_format(*ab, "argc=%d", context->execve.argc); /* * we need some kernel buffer to hold the userspace args. Just @@ -1176,7 +1161,7 @@ static void audit_log_execve_info(struct audit_context *context, return; } - for (i = 0; i < axi->argc; i++) { + for (i = 0; i < context->execve.argc; i++) { len = audit_log_single_execve_arg(context, ab, i, &len_sent, p, buf); if (len <= 0) @@ -1279,6 +1264,9 @@ static void show_special(struct audit_context *context, int *call_panic) audit_log_format(ab, "fd=%d flags=0x%x", context->mmap.fd, context->mmap.flags); break; } + case AUDIT_EXECVE: { + audit_log_execve_info(context, &ab); + break; } } audit_log_end(ab); } @@ -1325,11 +1313,6 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts switch (aux->type) { - case AUDIT_EXECVE: { - struct audit_aux_data_execve *axi = (void *)aux; - audit_log_execve_info(context, &ab, axi); - break; } - case AUDIT_BPRM_FCAPS: { struct audit_aux_data_bprm_fcaps *axs = (void *)aux; audit_log_format(ab, "fver=%x", axs->fcap_ver); @@ -1964,6 +1947,43 @@ int auditsc_get_stamp(struct audit_context *ctx, /* global counter which is incremented every time something logs in */ static atomic_t session_id = ATOMIC_INIT(0); +static int audit_set_loginuid_perm(kuid_t loginuid) +{ + /* if we are unset, we don't need privs */ + if (!audit_loginuid_set(current)) + return 0; + /* if AUDIT_FEATURE_LOGINUID_IMMUTABLE means never ever allow a change*/ + if (is_audit_feature_set(AUDIT_FEATURE_LOGINUID_IMMUTABLE)) + return -EPERM; + /* it is set, you need permission */ + if (!capable(CAP_AUDIT_CONTROL)) + return -EPERM; + /* reject if this is not an unset and we don't allow that */ + if (is_audit_feature_set(AUDIT_FEATURE_ONLY_UNSET_LOGINUID) && uid_valid(loginuid)) + return -EPERM; + return 0; +} + +static void audit_log_set_loginuid(kuid_t koldloginuid, kuid_t kloginuid, + unsigned int oldsessionid, unsigned int sessionid, + int rc) +{ + struct audit_buffer *ab; + uid_t uid, ologinuid, nloginuid; + + uid = from_kuid(&init_user_ns, task_uid(current)); + ologinuid = from_kuid(&init_user_ns, koldloginuid); + nloginuid = from_kuid(&init_user_ns, kloginuid), + + ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_LOGIN); + if (!ab) + return; + audit_log_format(ab, "pid=%d uid=%u old auid=%u new auid=%u old " + "ses=%u new ses=%u res=%d", current->pid, uid, ologinuid, + nloginuid, oldsessionid, sessionid, !rc); + audit_log_end(ab); +} + /** * audit_set_loginuid - set current task's audit_context loginuid * @loginuid: loginuid value @@ -1975,37 +1995,26 @@ static atomic_t session_id = ATOMIC_INIT(0); int audit_set_loginuid(kuid_t loginuid) { struct task_struct *task = current; - struct audit_context *context = task->audit_context; - unsigned int sessionid; + unsigned int oldsessionid, sessionid = (unsigned int)-1; + kuid_t oldloginuid; + int rc; -#ifdef CONFIG_AUDIT_LOGINUID_IMMUTABLE - if (audit_loginuid_set(task)) - return -EPERM; -#else /* CONFIG_AUDIT_LOGINUID_IMMUTABLE */ - if (!capable(CAP_AUDIT_CONTROL)) - return -EPERM; -#endif /* CONFIG_AUDIT_LOGINUID_IMMUTABLE */ + oldloginuid = audit_get_loginuid(current); + oldsessionid = audit_get_sessionid(current); - sessionid = atomic_inc_return(&session_id); - if (context && context->in_syscall) { - struct audit_buffer *ab; + rc = audit_set_loginuid_perm(loginuid); + if (rc) + goto out; + + /* are we setting or clearing? */ + if (uid_valid(loginuid)) + sessionid = atomic_inc_return(&session_id); - ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_LOGIN); - if (ab) { - audit_log_format(ab, "login pid=%d uid=%u " - "old auid=%u new auid=%u" - " old ses=%u new ses=%u", - task->pid, - from_kuid(&init_user_ns, task_uid(task)), - from_kuid(&init_user_ns, task->loginuid), - from_kuid(&init_user_ns, loginuid), - task->sessionid, sessionid); - audit_log_end(ab); - } - } task->sessionid = sessionid; task->loginuid = loginuid; - return 0; +out: + audit_log_set_loginuid(oldloginuid, loginuid, oldsessionid, sessionid, rc); + return rc; } /** @@ -2126,22 +2135,12 @@ void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, umode_t mo context->ipc.has_perm = 1; } -int __audit_bprm(struct linux_binprm *bprm) +void __audit_bprm(struct linux_binprm *bprm) { - struct audit_aux_data_execve *ax; struct audit_context *context = current->audit_context; - ax = kmalloc(sizeof(*ax), GFP_KERNEL); - if (!ax) - return -ENOMEM; - - ax->argc = bprm->argc; - ax->envc = bprm->envc; - ax->mm = bprm->mm; - ax->d.type = AUDIT_EXECVE; - ax->d.next = context->aux; - context->aux = (void *)ax; - return 0; + context->type = AUDIT_EXECVE; + context->execve.argc = bprm->argc; } diff --git a/kernel/modsign_certificate.S b/kernel/modsign_certificate.S deleted file mode 100644 index 4a9a86d..0000000 --- a/kernel/modsign_certificate.S +++ /dev/null @@ -1,12 +0,0 @@ -#include <linux/export.h> - -#define GLOBAL(name) \ - .globl VMLINUX_SYMBOL(name); \ - VMLINUX_SYMBOL(name): - - .section ".init.data","aw" - -GLOBAL(modsign_certificate_list) - .incbin "signing_key.x509" - .incbin "extra_certificates" -GLOBAL(modsign_certificate_list_end) diff --git a/kernel/modsign_pubkey.c b/kernel/modsign_pubkey.c deleted file mode 100644 index 7cbd450..0000000 --- a/kernel/modsign_pubkey.c +++ /dev/null @@ -1,104 +0,0 @@ -/* Public keys for module signature verification - * - * Copyright (C) 2012 Red Hat, Inc. All Rights Reserved. - * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public Licence - * as published by the Free Software Foundation; either version - * 2 of the Licence, or (at your option) any later version. - */ - -#include <linux/kernel.h> -#include <linux/sched.h> -#include <linux/cred.h> -#include <linux/err.h> -#include <keys/asymmetric-type.h> -#include "module-internal.h" - -struct key *modsign_keyring; - -extern __initconst const u8 modsign_certificate_list[]; -extern __initconst const u8 modsign_certificate_list_end[]; - -/* - * We need to make sure ccache doesn't cache the .o file as it doesn't notice - * if modsign.pub changes. - */ -static __initconst const char annoy_ccache[] = __TIME__ "foo"; - -/* - * Load the compiled-in keys - */ -static __init int module_verify_init(void) -{ - pr_notice("Initialise module verification\n"); - - modsign_keyring = keyring_alloc(".module_sign", - KUIDT_INIT(0), KGIDT_INIT(0), - current_cred(), - ((KEY_POS_ALL & ~KEY_POS_SETATTR) | - KEY_USR_VIEW | KEY_USR_READ), - KEY_ALLOC_NOT_IN_QUOTA, NULL); - if (IS_ERR(modsign_keyring)) - panic("Can't allocate module signing keyring\n"); - - return 0; -} - -/* - * Must be initialised before we try and load the keys into the keyring. - */ -device_initcall(module_verify_init); - -/* - * Load the compiled-in keys - */ -static __init int load_module_signing_keys(void) -{ - key_ref_t key; - const u8 *p, *end; - size_t plen; - - pr_notice("Loading module verification certificates\n"); - - end = modsign_certificate_list_end; - p = modsign_certificate_list; - while (p < end) { - /* Each cert begins with an ASN.1 SEQUENCE tag and must be more - * than 256 bytes in size. - */ - if (end - p < 4) - goto dodgy_cert; - if (p[0] != 0x30 && - p[1] != 0x82) - goto dodgy_cert; - plen = (p[2] << 8) | p[3]; - plen += 4; - if (plen > end - p) - goto dodgy_cert; - - key = key_create_or_update(make_key_ref(modsign_keyring, 1), - "asymmetric", - NULL, - p, - plen, - (KEY_POS_ALL & ~KEY_POS_SETATTR) | - KEY_USR_VIEW, - KEY_ALLOC_NOT_IN_QUOTA); - if (IS_ERR(key)) - pr_err("MODSIGN: Problem loading in-kernel X.509 certificate (%ld)\n", - PTR_ERR(key)); - else - pr_notice("MODSIGN: Loaded cert '%s'\n", - key_ref_to_ptr(key)->description); - p += plen; - } - - return 0; - -dodgy_cert: - pr_err("MODSIGN: Problem parsing in-kernel X.509 certificate list\n"); - return 0; -} -late_initcall(load_module_signing_keys); diff --git a/kernel/module-internal.h b/kernel/module-internal.h index 24f9247..915e123 100644 --- a/kernel/module-internal.h +++ b/kernel/module-internal.h @@ -9,6 +9,4 @@ * 2 of the Licence, or (at your option) any later version. */ -extern struct key *modsign_keyring; - extern int mod_verify_sig(const void *mod, unsigned long *_modlen); diff --git a/kernel/module_signing.c b/kernel/module_signing.c index f2970bd..be5b8fa 100644 --- a/kernel/module_signing.c +++ b/kernel/module_signing.c @@ -14,6 +14,7 @@ #include <crypto/public_key.h> #include <crypto/hash.h> #include <keys/asymmetric-type.h> +#include <keys/system_keyring.h> #include "module-internal.h" /* @@ -28,7 +29,7 @@ */ struct module_signature { u8 algo; /* Public-key crypto algorithm [enum pkey_algo] */ - u8 hash; /* Digest algorithm [enum pkey_hash_algo] */ + u8 hash; /* Digest algorithm [enum hash_algo] */ u8 id_type; /* Key identifier type [enum pkey_id_type] */ u8 signer_len; /* Length of signer's name */ u8 key_id_len; /* Length of key identifier */ @@ -39,7 +40,7 @@ struct module_signature { /* * Digest the module contents. */ -static struct public_key_signature *mod_make_digest(enum pkey_hash_algo hash, +static struct public_key_signature *mod_make_digest(enum hash_algo hash, const void *mod, unsigned long modlen) { @@ -54,7 +55,7 @@ static struct public_key_signature *mod_make_digest(enum pkey_hash_algo hash, /* Allocate the hashing algorithm we're going to need and find out how * big the hash operational data will be. */ - tfm = crypto_alloc_shash(pkey_hash_algo[hash], 0, 0); + tfm = crypto_alloc_shash(hash_algo_name[hash], 0, 0); if (IS_ERR(tfm)) return (PTR_ERR(tfm) == -ENOENT) ? ERR_PTR(-ENOPKG) : ERR_CAST(tfm); @@ -157,7 +158,7 @@ static struct key *request_asymmetric_key(const char *signer, size_t signer_len, pr_debug("Look up: \"%s\"\n", id); - key = keyring_search(make_key_ref(modsign_keyring, 1), + key = keyring_search(make_key_ref(system_trusted_keyring, 1), &key_type_asymmetric, id); if (IS_ERR(key)) pr_warn("Request for unknown module key '%s' err %ld\n", @@ -217,7 +218,7 @@ int mod_verify_sig(const void *mod, unsigned long *_modlen) return -ENOPKG; if (ms.hash >= PKEY_HASH__LAST || - !pkey_hash_algo[ms.hash]) + !hash_algo_name[ms.hash]) return -ENOPKG; key = request_asymmetric_key(sig, ms.signer_len, diff --git a/kernel/system_certificates.S b/kernel/system_certificates.S new file mode 100644 index 0000000..4aef390 --- /dev/null +++ b/kernel/system_certificates.S @@ -0,0 +1,10 @@ +#include <linux/export.h> +#include <linux/init.h> + + __INITRODATA + + .globl VMLINUX_SYMBOL(system_certificate_list) +VMLINUX_SYMBOL(system_certificate_list): + .incbin "kernel/x509_certificate_list" + .globl VMLINUX_SYMBOL(system_certificate_list_end) +VMLINUX_SYMBOL(system_certificate_list_end): diff --git a/kernel/system_keyring.c b/kernel/system_keyring.c new file mode 100644 index 0000000..564dd93 --- /dev/null +++ b/kernel/system_keyring.c @@ -0,0 +1,105 @@ +/* System trusted keyring for trusted public keys + * + * Copyright (C) 2012 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include <linux/export.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/cred.h> +#include <linux/err.h> +#include <keys/asymmetric-type.h> +#include <keys/system_keyring.h> +#include "module-internal.h" + +struct key *system_trusted_keyring; +EXPORT_SYMBOL_GPL(system_trusted_keyring); + +extern __initconst const u8 system_certificate_list[]; +extern __initconst const u8 system_certificate_list_end[]; + +/* + * Load the compiled-in keys + */ +static __init int system_trusted_keyring_init(void) +{ + pr_notice("Initialise system trusted keyring\n"); + + system_trusted_keyring = + keyring_alloc(".system_keyring", + KUIDT_INIT(0), KGIDT_INIT(0), current_cred(), + ((KEY_POS_ALL & ~KEY_POS_SETATTR) | + KEY_USR_VIEW | KEY_USR_READ | KEY_USR_SEARCH), + KEY_ALLOC_NOT_IN_QUOTA, NULL); + if (IS_ERR(system_trusted_keyring)) + panic("Can't allocate system trusted keyring\n"); + + set_bit(KEY_FLAG_TRUSTED_ONLY, &system_trusted_keyring->flags); + return 0; +} + +/* + * Must be initialised before we try and load the keys into the keyring. + */ +device_initcall(system_trusted_keyring_init); + +/* + * Load the compiled-in list of X.509 certificates. + */ +static __init int load_system_certificate_list(void) +{ + key_ref_t key; + const u8 *p, *end; + size_t plen; + + pr_notice("Loading compiled-in X.509 certificates\n"); + + end = system_certificate_list_end; + p = system_certificate_list; + while (p < end) { + /* Each cert begins with an ASN.1 SEQUENCE tag and must be more + * than 256 bytes in size. + */ + if (end - p < 4) + goto dodgy_cert; + if (p[0] != 0x30 && + p[1] != 0x82) + goto dodgy_cert; + plen = (p[2] << 8) | p[3]; + plen += 4; + if (plen > end - p) + goto dodgy_cert; + + key = key_create_or_update(make_key_ref(system_trusted_keyring, 1), + "asymmetric", + NULL, + p, + plen, + ((KEY_POS_ALL & ~KEY_POS_SETATTR) | + KEY_USR_VIEW | KEY_USR_READ), + KEY_ALLOC_NOT_IN_QUOTA | + KEY_ALLOC_TRUSTED); + if (IS_ERR(key)) { + pr_err("Problem loading in-kernel X.509 certificate (%ld)\n", + PTR_ERR(key)); + } else { + pr_notice("Loaded X.509 cert '%s'\n", + key_ref_to_ptr(key)->description); + key_ref_put(key); + } + p += plen; + } + + return 0; + +dodgy_cert: + pr_err("Problem parsing in-kernel X.509 certificate list\n"); + return 0; +} +late_initcall(load_system_certificate_list); diff --git a/kernel/user.c b/kernel/user.c index 5bbb919..a3a0dbf 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -51,6 +51,10 @@ struct user_namespace init_user_ns = { .owner = GLOBAL_ROOT_UID, .group = GLOBAL_ROOT_GID, .proc_inum = PROC_USER_INIT_INO, +#ifdef CONFIG_KEYS_KERBEROS_CACHE + .krb_cache_register_sem = + __RWSEM_INITIALIZER(init_user_ns.krb_cache_register_sem), +#endif }; EXPORT_SYMBOL_GPL(init_user_ns); diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 13fb113..240fb62 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -101,6 +101,9 @@ int create_user_ns(struct cred *new) set_cred_user_ns(new, ns); +#ifdef CONFIG_PERSISTENT_KEYRINGS + init_rwsem(&ns->persistent_keyring_register_sem); +#endif return 0; } @@ -130,6 +133,9 @@ void free_user_ns(struct user_namespace *ns) do { parent = ns->parent; +#ifdef CONFIG_PERSISTENT_KEYRINGS + key_put(ns->persistent_keyring_register); +#endif proc_free_inum(ns->proc_inum); kmem_cache_free(user_ns_cachep, ns); ns = parent; diff --git a/lib/Kconfig b/lib/Kconfig index 06dc742..991c98b 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -322,6 +322,20 @@ config TEXTSEARCH_FSM config BTREE boolean +config ASSOCIATIVE_ARRAY + bool + help + Generic associative array. Can be searched and iterated over whilst + it is being modified. It is also reasonably quick to search and + modify. The algorithms are non-recursive, and the trees are highly + capacious. + + See: + + Documentation/assoc_array.txt + + for more information. + config HAS_IOMEM boolean depends on !NO_IOMEM diff --git a/lib/Makefile b/lib/Makefile index d480a8c..a459c31 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -13,7 +13,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ sha1.o md5.o irq_regs.o reciprocal_div.o argv_split.o \ proportions.o flex_proportions.o prio_heap.o ratelimit.o show_mem.o \ is_single_threaded.o plist.o decompress.o kobject_uevent.o \ - earlycpio.o percpu-refcount.o percpu_ida.o + earlycpio.o obj-$(CONFIG_ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS) += usercopy.o lib-$(CONFIG_MMU) += ioremap.o @@ -26,7 +26,7 @@ obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \ bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \ gcd.o lcm.o list_sort.o uuid.o flex_array.o iovec.o clz_ctz.o \ bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o \ - percpu_ida.o + percpu-refcount.o percpu_ida.o obj-y += string_helpers.o obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o obj-y += kstrtox.o @@ -47,6 +47,7 @@ CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS)) obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o obj-$(CONFIG_BTREE) += btree.o +obj-$(CONFIG_ASSOCIATIVE_ARRAY) += assoc_array.o obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o obj-$(CONFIG_DEBUG_LIST) += list_debug.o obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o diff --git a/lib/assoc_array.c b/lib/assoc_array.c new file mode 100644 index 0000000..17edeaf --- /dev/null +++ b/lib/assoc_array.c @@ -0,0 +1,1746 @@ +/* Generic associative array implementation. + * + * See Documentation/assoc_array.txt for information. + * + * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ +//#define DEBUG +#include <linux/slab.h> +#include <linux/err.h> +#include <linux/assoc_array_priv.h> + +/* + * Iterate over an associative array. The caller must hold the RCU read lock + * or better. + */ +static int assoc_array_subtree_iterate(const struct assoc_array_ptr *root, + const struct assoc_array_ptr *stop, + int (*iterator)(const void *leaf, + void *iterator_data), + void *iterator_data) +{ + const struct assoc_array_shortcut *shortcut; + const struct assoc_array_node *node; + const struct assoc_array_ptr *cursor, *ptr, *parent; + unsigned long has_meta; + int slot, ret; + + cursor = root; + +begin_node: + if (assoc_array_ptr_is_shortcut(cursor)) { + /* Descend through a shortcut */ + shortcut = assoc_array_ptr_to_shortcut(cursor); + smp_read_barrier_depends(); + cursor = ACCESS_ONCE(shortcut->next_node); + } + + node = assoc_array_ptr_to_node(cursor); + smp_read_barrier_depends(); + slot = 0; + + /* We perform two passes of each node. + * + * The first pass does all the leaves in this node. This means we + * don't miss any leaves if the node is split up by insertion whilst + * we're iterating over the branches rooted here (we may, however, see + * some leaves twice). + */ + has_meta = 0; + for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { + ptr = ACCESS_ONCE(node->slots[slot]); + has_meta |= (unsigned long)ptr; + if (ptr && assoc_array_ptr_is_leaf(ptr)) { + /* We need a barrier between the read of the pointer + * and dereferencing the pointer - but only if we are + * actually going to dereference it. + */ + smp_read_barrier_depends(); + + /* Invoke the callback */ + ret = iterator(assoc_array_ptr_to_leaf(ptr), + iterator_data); + if (ret) + return ret; + } + } + + /* The second pass attends to all the metadata pointers. If we follow + * one of these we may find that we don't come back here, but rather go + * back to a replacement node with the leaves in a different layout. + * + * We are guaranteed to make progress, however, as the slot number for + * a particular portion of the key space cannot change - and we + * continue at the back pointer + 1. + */ + if (!(has_meta & ASSOC_ARRAY_PTR_META_TYPE)) + goto finished_node; + slot = 0; + +continue_node: + node = assoc_array_ptr_to_node(cursor); + smp_read_barrier_depends(); + + for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { + ptr = ACCESS_ONCE(node->slots[slot]); + if (assoc_array_ptr_is_meta(ptr)) { + cursor = ptr; + goto begin_node; + } + } + +finished_node: + /* Move up to the parent (may need to skip back over a shortcut) */ + parent = ACCESS_ONCE(node->back_pointer); + slot = node->parent_slot; + if (parent == stop) + return 0; + + if (assoc_array_ptr_is_shortcut(parent)) { + shortcut = assoc_array_ptr_to_shortcut(parent); + smp_read_barrier_depends(); + cursor = parent; + parent = ACCESS_ONCE(shortcut->back_pointer); + slot = shortcut->parent_slot; + if (parent == stop) + return 0; + } + + /* Ascend to next slot in parent node */ + cursor = parent; + slot++; + goto continue_node; +} + +/** + * assoc_array_iterate - Pass all objects in the array to a callback + * @array: The array to iterate over. + * @iterator: The callback function. + * @iterator_data: Private data for the callback function. + * + * Iterate over all the objects in an associative array. Each one will be + * presented to the iterator function. + * + * If the array is being modified concurrently with the iteration then it is + * possible that some objects in the array will be passed to the iterator + * callback more than once - though every object should be passed at least + * once. If this is undesirable then the caller must lock against modification + * for the duration of this function. + * + * The function will return 0 if no objects were in the array or else it will + * return the result of the last iterator function called. Iteration stops + * immediately if any call to the iteration function results in a non-zero + * return. + * + * The caller should hold the RCU read lock or better if concurrent + * modification is possible. + */ +int assoc_array_iterate(const struct assoc_array *array, + int (*iterator)(const void *object, + void *iterator_data), + void *iterator_data) +{ + struct assoc_array_ptr *root = ACCESS_ONCE(array->root); + + if (!root) + return 0; + return assoc_array_subtree_iterate(root, NULL, iterator, iterator_data); +} + +enum assoc_array_walk_status { + assoc_array_walk_tree_empty, + assoc_array_walk_found_terminal_node, + assoc_array_walk_found_wrong_shortcut, +} status; + +struct assoc_array_walk_result { + struct { + struct assoc_array_node *node; /* Node in which leaf might be found */ + int level; + int slot; + } terminal_node; + struct { + struct assoc_array_shortcut *shortcut; + int level; + int sc_level; + unsigned long sc_segments; + unsigned long dissimilarity; + } wrong_shortcut; +}; + +/* + * Navigate through the internal tree looking for the closest node to the key. + */ +static enum assoc_array_walk_status +assoc_array_walk(const struct assoc_array *array, + const struct assoc_array_ops *ops, + const void *index_key, + struct assoc_array_walk_result *result) +{ + struct assoc_array_shortcut *shortcut; + struct assoc_array_node *node; + struct assoc_array_ptr *cursor, *ptr; + unsigned long sc_segments, dissimilarity; + unsigned long segments; + int level, sc_level, next_sc_level; + int slot; + + pr_devel("-->%s()\n", __func__); + + cursor = ACCESS_ONCE(array->root); + if (!cursor) + return assoc_array_walk_tree_empty; + + level = 0; + + /* Use segments from the key for the new leaf to navigate through the + * internal tree, skipping through nodes and shortcuts that are on + * route to the destination. Eventually we'll come to a slot that is + * either empty or contains a leaf at which point we've found a node in + * which the leaf we're looking for might be found or into which it + * should be inserted. + */ +jumped: + segments = ops->get_key_chunk(index_key, level); + pr_devel("segments[%d]: %lx\n", level, segments); + + if (assoc_array_ptr_is_shortcut(cursor)) + goto follow_shortcut; + +consider_node: + node = assoc_array_ptr_to_node(cursor); + smp_read_barrier_depends(); + + slot = segments >> (level & ASSOC_ARRAY_KEY_CHUNK_MASK); + slot &= ASSOC_ARRAY_FAN_MASK; + ptr = ACCESS_ONCE(node->slots[slot]); + + pr_devel("consider slot %x [ix=%d type=%lu]\n", + slot, level, (unsigned long)ptr & 3); + + if (!assoc_array_ptr_is_meta(ptr)) { + /* The node doesn't have a node/shortcut pointer in the slot + * corresponding to the index key that we have to follow. + */ + result->terminal_node.node = node; + result->terminal_node.level = level; + result->terminal_node.slot = slot; + pr_devel("<--%s() = terminal_node\n", __func__); + return assoc_array_walk_found_terminal_node; + } + + if (assoc_array_ptr_is_node(ptr)) { + /* There is a pointer to a node in the slot corresponding to + * this index key segment, so we need to follow it. + */ + cursor = ptr; + level += ASSOC_ARRAY_LEVEL_STEP; + if ((level & ASSOC_ARRAY_KEY_CHUNK_MASK) != 0) + goto consider_node; + goto jumped; + } + + /* There is a shortcut in the slot corresponding to the index key + * segment. We follow the shortcut if its partial index key matches + * this leaf's. Otherwise we need to split the shortcut. + */ + cursor = ptr; +follow_shortcut: + shortcut = assoc_array_ptr_to_shortcut(cursor); + smp_read_barrier_depends(); + pr_devel("shortcut to %d\n", shortcut->skip_to_level); + sc_level = level + ASSOC_ARRAY_LEVEL_STEP; + BUG_ON(sc_level > shortcut->skip_to_level); + + do { + /* Check the leaf against the shortcut's index key a word at a + * time, trimming the final word (the shortcut stores the index + * key completely from the root to the shortcut's target). + */ + if ((sc_level & ASSOC_ARRAY_KEY_CHUNK_MASK) == 0) + segments = ops->get_key_chunk(index_key, sc_level); + + sc_segments = shortcut->index_key[sc_level >> ASSOC_ARRAY_KEY_CHUNK_SHIFT]; + dissimilarity = segments ^ sc_segments; + + if (round_up(sc_level, ASSOC_ARRAY_KEY_CHUNK_SIZE) > shortcut->skip_to_level) { + /* Trim segments that are beyond the shortcut */ + int shift = shortcut->skip_to_level & ASSOC_ARRAY_KEY_CHUNK_MASK; + dissimilarity &= ~(ULONG_MAX << shift); + next_sc_level = shortcut->skip_to_level; + } else { + next_sc_level = sc_level + ASSOC_ARRAY_KEY_CHUNK_SIZE; + next_sc_level = round_down(next_sc_level, ASSOC_ARRAY_KEY_CHUNK_SIZE); + } + + if (dissimilarity != 0) { + /* This shortcut points elsewhere */ + result->wrong_shortcut.shortcut = shortcut; + result->wrong_shortcut.level = level; + result->wrong_shortcut.sc_level = sc_level; + result->wrong_shortcut.sc_segments = sc_segments; + result->wrong_shortcut.dissimilarity = dissimilarity; + return assoc_array_walk_found_wrong_shortcut; + } + + sc_level = next_sc_level; + } while (sc_level < shortcut->skip_to_level); + + /* The shortcut matches the leaf's index to this point. */ + cursor = ACCESS_ONCE(shortcut->next_node); + if (((level ^ sc_level) & ~ASSOC_ARRAY_KEY_CHUNK_MASK) != 0) { + level = sc_level; + goto jumped; + } else { + level = sc_level; + goto consider_node; + } +} + +/** + * assoc_array_find - Find an object by index key + * @array: The associative array to search. + * @ops: The operations to use. + * @index_key: The key to the object. + * + * Find an object in an associative array by walking through the internal tree + * to the node that should contain the object and then searching the leaves + * there. NULL is returned if the requested object was not found in the array. + * + * The caller must hold the RCU read lock or better. + */ +void *assoc_array_find(const struct assoc_array *array, + const struct assoc_array_ops *ops, + const void *index_key) +{ + struct assoc_array_walk_result result; + const struct assoc_array_node *node; + const struct assoc_array_ptr *ptr; + const void *leaf; + int slot; + + if (assoc_array_walk(array, ops, index_key, &result) != + assoc_array_walk_found_terminal_node) + return NULL; + + node = result.terminal_node.node; + smp_read_barrier_depends(); + + /* If the target key is available to us, it's has to be pointed to by + * the terminal node. + */ + for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) { + ptr = ACCESS_ONCE(node->slots[slot]); + if (ptr && assoc_array_ptr_is_leaf(ptr)) { + /* We need a barrier between the read of the pointer + * and dereferencing the pointer - but only if we are + * actually going to dereference it. + */ + leaf = assoc_array_ptr_to_leaf(ptr); + smp_read_barrier_depends(); + if (ops->compare_object(leaf, index_key)) + return (void *)leaf; + } + } + + return NULL; +} + +/* + * Destructively iterate over an associative array. The caller must prevent + * other simultaneous accesses. + */ +static void assoc_array_destroy_subtree(struct assoc_array_ptr *root, + const struct assoc_array_ops *ops) +{ + struct assoc_array_shortcut *shortcut; + struct assoc_array_node *node; + struct assoc_array_ptr *cursor, *parent = NULL; + int slot = -1; + + pr_devel("-->%s()\n", __func__); + + cursor = root; + if (!cursor) { + pr_devel("empty\n"); + return; + } + +move_to_meta: + if (assoc_array_ptr_is_shortcut(cursor)) { + /* Descend through a shortcut */ + pr_devel("[%d] shortcut\n", slot); + BUG_ON(!assoc_array_ptr_is_shortcut(cursor)); + shortcut = assoc_array_ptr_to_shortcut(cursor); + BUG_ON(shortcut->back_pointer != parent); + BUG_ON(slot != -1 && shortcut->parent_slot != slot); + parent = cursor; + cursor = shortcut->next_node; + slot = -1; + BUG_ON(!assoc_array_ptr_is_node(cursor)); + } + + pr_devel("[%d] node\n", slot); + node = assoc_array_ptr_to_node(cursor); + BUG_ON(node->back_pointer != parent); + BUG_ON(slot != -1 && node->parent_slot != slot); + slot = 0; + +continue_node: + pr_devel("Node %p [back=%p]\n", node, node->back_pointer); + for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { + struct assoc_array_ptr *ptr = node->slots[slot]; + if (!ptr) + continue; + if (assoc_array_ptr_is_meta(ptr)) { + parent = cursor; + cursor = ptr; + goto move_to_meta; + } + + if (ops) { + pr_devel("[%d] free leaf\n", slot); + ops->free_object(assoc_array_ptr_to_leaf(ptr)); + } + } + + parent = node->back_pointer; + slot = node->parent_slot; + pr_devel("free node\n"); + kfree(node); + if (!parent) + return; /* Done */ + + /* Move back up to the parent (may need to free a shortcut on + * the way up) */ + if (assoc_array_ptr_is_shortcut(parent)) { + shortcut = assoc_array_ptr_to_shortcut(parent); + BUG_ON(shortcut->next_node != cursor); + cursor = parent; + parent = shortcut->back_pointer; + slot = shortcut->parent_slot; + pr_devel("free shortcut\n"); + kfree(shortcut); + if (!parent) + return; + + BUG_ON(!assoc_array_ptr_is_node(parent)); + } + + /* Ascend to next slot in parent node */ + pr_devel("ascend to %p[%d]\n", parent, slot); + cursor = parent; + node = assoc_array_ptr_to_node(cursor); + slot++; + goto continue_node; +} + +/** + * assoc_array_destroy - Destroy an associative array + * @array: The array to destroy. + * @ops: The operations to use. + * + * Discard all metadata and free all objects in an associative array. The + * array will be empty and ready to use again upon completion. This function + * cannot fail. + * + * The caller must prevent all other accesses whilst this takes place as no + * attempt is made to adjust pointers gracefully to permit RCU readlock-holding + * accesses to continue. On the other hand, no memory allocation is required. + */ +void assoc_array_destroy(struct assoc_array *array, + const struct assoc_array_ops *ops) +{ + assoc_array_destroy_subtree(array->root, ops); + array->root = NULL; +} + +/* + * Handle insertion into an empty tree. + */ +static bool assoc_array_insert_in_empty_tree(struct assoc_array_edit *edit) +{ + struct assoc_array_node *new_n0; + + pr_devel("-->%s()\n", __func__); + + new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); + if (!new_n0) + return false; + + edit->new_meta[0] = assoc_array_node_to_ptr(new_n0); + edit->leaf_p = &new_n0->slots[0]; + edit->adjust_count_on = new_n0; + edit->set[0].ptr = &edit->array->root; + edit->set[0].to = assoc_array_node_to_ptr(new_n0); + + pr_devel("<--%s() = ok [no root]\n", __func__); + return true; +} + +/* + * Handle insertion into a terminal node. + */ +static bool assoc_array_insert_into_terminal_node(struct assoc_array_edit *edit, + const struct assoc_array_ops *ops, + const void *index_key, + struct assoc_array_walk_result *result) +{ + struct assoc_array_shortcut *shortcut, *new_s0; + struct assoc_array_node *node, *new_n0, *new_n1, *side; + struct assoc_array_ptr *ptr; + unsigned long dissimilarity, base_seg, blank; + size_t keylen; + bool have_meta; + int level, diff; + int slot, next_slot, free_slot, i, j; + + node = result->terminal_node.node; + level = result->terminal_node.level; + edit->segment_cache[ASSOC_ARRAY_FAN_OUT] = result->terminal_node.slot; + + pr_devel("-->%s()\n", __func__); + + /* We arrived at a node which doesn't have an onward node or shortcut + * pointer that we have to follow. This means that (a) the leaf we + * want must go here (either by insertion or replacement) or (b) we + * need to split this node and insert in one of the fragments. + */ + free_slot = -1; + + /* Firstly, we have to check the leaves in this node to see if there's + * a matching one we should replace in place. + */ + for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { + ptr = node->slots[i]; + if (!ptr) { + free_slot = i; + continue; + } + if (ops->compare_object(assoc_array_ptr_to_leaf(ptr), index_key)) { + pr_devel("replace in slot %d\n", i); + edit->leaf_p = &node->slots[i]; + edit->dead_leaf = node->slots[i]; + pr_devel("<--%s() = ok [replace]\n", __func__); + return true; + } + } + + /* If there is a free slot in this node then we can just insert the + * leaf here. + */ + if (free_slot >= 0) { + pr_devel("insert in free slot %d\n", free_slot); + edit->leaf_p = &node->slots[free_slot]; + edit->adjust_count_on = node; + pr_devel("<--%s() = ok [insert]\n", __func__); + return true; + } + + /* The node has no spare slots - so we're either going to have to split + * it or insert another node before it. + * + * Whatever, we're going to need at least two new nodes - so allocate + * those now. We may also need a new shortcut, but we deal with that + * when we need it. + */ + new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); + if (!new_n0) + return false; + edit->new_meta[0] = assoc_array_node_to_ptr(new_n0); + new_n1 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); + if (!new_n1) + return false; + edit->new_meta[1] = assoc_array_node_to_ptr(new_n1); + + /* We need to find out how similar the leaves are. */ + pr_devel("no spare slots\n"); + have_meta = false; + for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { + ptr = node->slots[i]; + if (assoc_array_ptr_is_meta(ptr)) { + edit->segment_cache[i] = 0xff; + have_meta = true; + continue; + } + base_seg = ops->get_object_key_chunk( + assoc_array_ptr_to_leaf(ptr), level); + base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK; + edit->segment_cache[i] = base_seg & ASSOC_ARRAY_FAN_MASK; + } + + if (have_meta) { + pr_devel("have meta\n"); + goto split_node; + } + + /* The node contains only leaves */ + dissimilarity = 0; + base_seg = edit->segment_cache[0]; + for (i = 1; i < ASSOC_ARRAY_FAN_OUT; i++) + dissimilarity |= edit->segment_cache[i] ^ base_seg; + + pr_devel("only leaves; dissimilarity=%lx\n", dissimilarity); + + if ((dissimilarity & ASSOC_ARRAY_FAN_MASK) == 0) { + /* The old leaves all cluster in the same slot. We will need + * to insert a shortcut if the new node wants to cluster with them. + */ + if ((edit->segment_cache[ASSOC_ARRAY_FAN_OUT] ^ base_seg) == 0) + goto all_leaves_cluster_together; + + /* Otherwise we can just insert a new node ahead of the old + * one. + */ + goto present_leaves_cluster_but_not_new_leaf; + } + +split_node: + pr_devel("split node\n"); + + /* We need to split the current node; we know that the node doesn't + * simply contain a full set of leaves that cluster together (it + * contains meta pointers and/or non-clustering leaves). + * + * We need to expel at least two leaves out of a set consisting of the + * leaves in the node and the new leaf. + * + * We need a new node (n0) to replace the current one and a new node to + * take the expelled nodes (n1). + */ + edit->set[0].to = assoc_array_node_to_ptr(new_n0); + new_n0->back_pointer = node->back_pointer; + new_n0->parent_slot = node->parent_slot; + new_n1->back_pointer = assoc_array_node_to_ptr(new_n0); + new_n1->parent_slot = -1; /* Need to calculate this */ + +do_split_node: + pr_devel("do_split_node\n"); + + new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch; + new_n1->nr_leaves_on_branch = 0; + + /* Begin by finding two matching leaves. There have to be at least two + * that match - even if there are meta pointers - because any leaf that + * would match a slot with a meta pointer in it must be somewhere + * behind that meta pointer and cannot be here. Further, given N + * remaining leaf slots, we now have N+1 leaves to go in them. + */ + for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { + slot = edit->segment_cache[i]; + if (slot != 0xff) + for (j = i + 1; j < ASSOC_ARRAY_FAN_OUT + 1; j++) + if (edit->segment_cache[j] == slot) + goto found_slot_for_multiple_occupancy; + } +found_slot_for_multiple_occupancy: + pr_devel("same slot: %x %x [%02x]\n", i, j, slot); + BUG_ON(i >= ASSOC_ARRAY_FAN_OUT); + BUG_ON(j >= ASSOC_ARRAY_FAN_OUT + 1); + BUG_ON(slot >= ASSOC_ARRAY_FAN_OUT); + + new_n1->parent_slot = slot; + + /* Metadata pointers cannot change slot */ + for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) + if (assoc_array_ptr_is_meta(node->slots[i])) + new_n0->slots[i] = node->slots[i]; + else + new_n0->slots[i] = NULL; + BUG_ON(new_n0->slots[slot] != NULL); + new_n0->slots[slot] = assoc_array_node_to_ptr(new_n1); + + /* Filter the leaf pointers between the new nodes */ + free_slot = -1; + next_slot = 0; + for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { + if (assoc_array_ptr_is_meta(node->slots[i])) + continue; + if (edit->segment_cache[i] == slot) { + new_n1->slots[next_slot++] = node->slots[i]; + new_n1->nr_leaves_on_branch++; + } else { + do { + free_slot++; + } while (new_n0->slots[free_slot] != NULL); + new_n0->slots[free_slot] = node->slots[i]; + } + } + + pr_devel("filtered: f=%x n=%x\n", free_slot, next_slot); + + if (edit->segment_cache[ASSOC_ARRAY_FAN_OUT] != slot) { + do { + free_slot++; + } while (new_n0->slots[free_slot] != NULL); + edit->leaf_p = &new_n0->slots[free_slot]; + edit->adjust_count_on = new_n0; + } else { + edit->leaf_p = &new_n1->slots[next_slot++]; + edit->adjust_count_on = new_n1; + } + + BUG_ON(next_slot <= 1); + + edit->set_backpointers_to = assoc_array_node_to_ptr(new_n0); + for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { + if (edit->segment_cache[i] == 0xff) { + ptr = node->slots[i]; + BUG_ON(assoc_array_ptr_is_leaf(ptr)); + if (assoc_array_ptr_is_node(ptr)) { + side = assoc_array_ptr_to_node(ptr); + edit->set_backpointers[i] = &side->back_pointer; + } else { + shortcut = assoc_array_ptr_to_shortcut(ptr); + edit->set_backpointers[i] = &shortcut->back_pointer; + } + } + } + + ptr = node->back_pointer; + if (!ptr) + edit->set[0].ptr = &edit->array->root; + else if (assoc_array_ptr_is_node(ptr)) + edit->set[0].ptr = &assoc_array_ptr_to_node(ptr)->slots[node->parent_slot]; + else + edit->set[0].ptr = &assoc_array_ptr_to_shortcut(ptr)->next_node; + edit->excised_meta[0] = assoc_array_node_to_ptr(node); + pr_devel("<--%s() = ok [split node]\n", __func__); + return true; + +present_leaves_cluster_but_not_new_leaf: + /* All the old leaves cluster in the same slot, but the new leaf wants + * to go into a different slot, so we create a new node to hold the new + * leaf and a pointer to a new node holding all the old leaves. + */ + pr_devel("present leaves cluster but not new leaf\n"); + + new_n0->back_pointer = node->back_pointer; + new_n0->parent_slot = node->parent_slot; + new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch; + new_n1->back_pointer = assoc_array_node_to_ptr(new_n0); + new_n1->parent_slot = edit->segment_cache[0]; + new_n1->nr_leaves_on_branch = node->nr_leaves_on_branch; + edit->adjust_count_on = new_n0; + + for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) + new_n1->slots[i] = node->slots[i]; + + new_n0->slots[edit->segment_cache[0]] = assoc_array_node_to_ptr(new_n0); + edit->leaf_p = &new_n0->slots[edit->segment_cache[ASSOC_ARRAY_FAN_OUT]]; + + edit->set[0].ptr = &assoc_array_ptr_to_node(node->back_pointer)->slots[node->parent_slot]; + edit->set[0].to = assoc_array_node_to_ptr(new_n0); + edit->excised_meta[0] = assoc_array_node_to_ptr(node); + pr_devel("<--%s() = ok [insert node before]\n", __func__); + return true; + +all_leaves_cluster_together: + /* All the leaves, new and old, want to cluster together in this node + * in the same slot, so we have to replace this node with a shortcut to + * skip over the identical parts of the key and then place a pair of + * nodes, one inside the other, at the end of the shortcut and + * distribute the keys between them. + * + * Firstly we need to work out where the leaves start diverging as a + * bit position into their keys so that we know how big the shortcut + * needs to be. + * + * We only need to make a single pass of N of the N+1 leaves because if + * any keys differ between themselves at bit X then at least one of + * them must also differ with the base key at bit X or before. + */ + pr_devel("all leaves cluster together\n"); + diff = INT_MAX; + for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { + int x = ops->diff_objects(assoc_array_ptr_to_leaf(edit->leaf), + assoc_array_ptr_to_leaf(node->slots[i])); + if (x < diff) { + BUG_ON(x < 0); + diff = x; + } + } + BUG_ON(diff == INT_MAX); + BUG_ON(diff < level + ASSOC_ARRAY_LEVEL_STEP); + + keylen = round_up(diff, ASSOC_ARRAY_KEY_CHUNK_SIZE); + keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT; + + new_s0 = kzalloc(sizeof(struct assoc_array_shortcut) + + keylen * sizeof(unsigned long), GFP_KERNEL); + if (!new_s0) + return false; + edit->new_meta[2] = assoc_array_shortcut_to_ptr(new_s0); + + edit->set[0].to = assoc_array_shortcut_to_ptr(new_s0); + new_s0->back_pointer = node->back_pointer; + new_s0->parent_slot = node->parent_slot; + new_s0->next_node = assoc_array_node_to_ptr(new_n0); + new_n0->back_pointer = assoc_array_shortcut_to_ptr(new_s0); + new_n0->parent_slot = 0; + new_n1->back_pointer = assoc_array_node_to_ptr(new_n0); + new_n1->parent_slot = -1; /* Need to calculate this */ + + new_s0->skip_to_level = level = diff & ~ASSOC_ARRAY_LEVEL_STEP_MASK; + pr_devel("skip_to_level = %d [diff %d]\n", level, diff); + BUG_ON(level <= 0); + + for (i = 0; i < keylen; i++) + new_s0->index_key[i] = + ops->get_key_chunk(index_key, i * ASSOC_ARRAY_KEY_CHUNK_SIZE); + + blank = ULONG_MAX << (level & ASSOC_ARRAY_KEY_CHUNK_MASK); + pr_devel("blank off [%zu] %d: %lx\n", keylen - 1, level, blank); + new_s0->index_key[keylen - 1] &= ~blank; + + /* This now reduces to a node splitting exercise for which we'll need + * to regenerate the disparity table. + */ + for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { + ptr = node->slots[i]; + base_seg = ops->get_object_key_chunk(assoc_array_ptr_to_leaf(ptr), + level); + base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK; + edit->segment_cache[i] = base_seg & ASSOC_ARRAY_FAN_MASK; + } + + base_seg = ops->get_key_chunk(index_key, level); + base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK; + edit->segment_cache[ASSOC_ARRAY_FAN_OUT] = base_seg & ASSOC_ARRAY_FAN_MASK; + goto do_split_node; +} + +/* + * Handle insertion into the middle of a shortcut. + */ +static bool assoc_array_insert_mid_shortcut(struct assoc_array_edit *edit, + const struct assoc_array_ops *ops, + struct assoc_array_walk_result *result) +{ + struct assoc_array_shortcut *shortcut, *new_s0, *new_s1; + struct assoc_array_node *node, *new_n0, *side; + unsigned long sc_segments, dissimilarity, blank; + size_t keylen; + int level, sc_level, diff; + int sc_slot; + + shortcut = result->wrong_shortcut.shortcut; + level = result->wrong_shortcut.level; + sc_level = result->wrong_shortcut.sc_level; + sc_segments = result->wrong_shortcut.sc_segments; + dissimilarity = result->wrong_shortcut.dissimilarity; + + pr_devel("-->%s(ix=%d dis=%lx scix=%d)\n", + __func__, level, dissimilarity, sc_level); + + /* We need to split a shortcut and insert a node between the two + * pieces. Zero-length pieces will be dispensed with entirely. + * + * First of all, we need to find out in which level the first + * difference was. + */ + diff = __ffs(dissimilarity); + diff &= ~ASSOC_ARRAY_LEVEL_STEP_MASK; + diff += sc_level & ~ASSOC_ARRAY_KEY_CHUNK_MASK; + pr_devel("diff=%d\n", diff); + + if (!shortcut->back_pointer) { + edit->set[0].ptr = &edit->array->root; + } else if (assoc_array_ptr_is_node(shortcut->back_pointer)) { + node = assoc_array_ptr_to_node(shortcut->back_pointer); + edit->set[0].ptr = &node->slots[shortcut->parent_slot]; + } else { + BUG(); + } + + edit->excised_meta[0] = assoc_array_shortcut_to_ptr(shortcut); + + /* Create a new node now since we're going to need it anyway */ + new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); + if (!new_n0) + return false; + edit->new_meta[0] = assoc_array_node_to_ptr(new_n0); + edit->adjust_count_on = new_n0; + + /* Insert a new shortcut before the new node if this segment isn't of + * zero length - otherwise we just connect the new node directly to the + * parent. + */ + level += ASSOC_ARRAY_LEVEL_STEP; + if (diff > level) { + pr_devel("pre-shortcut %d...%d\n", level, diff); + keylen = round_up(diff, ASSOC_ARRAY_KEY_CHUNK_SIZE); + keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT; + + new_s0 = kzalloc(sizeof(struct assoc_array_shortcut) + + keylen * sizeof(unsigned long), GFP_KERNEL); + if (!new_s0) + return false; + edit->new_meta[1] = assoc_array_shortcut_to_ptr(new_s0); + edit->set[0].to = assoc_array_shortcut_to_ptr(new_s0); + new_s0->back_pointer = shortcut->back_pointer; + new_s0->parent_slot = shortcut->parent_slot; + new_s0->next_node = assoc_array_node_to_ptr(new_n0); + new_s0->skip_to_level = diff; + + new_n0->back_pointer = assoc_array_shortcut_to_ptr(new_s0); + new_n0->parent_slot = 0; + + memcpy(new_s0->index_key, shortcut->index_key, + keylen * sizeof(unsigned long)); + + blank = ULONG_MAX << (diff & ASSOC_ARRAY_KEY_CHUNK_MASK); + pr_devel("blank off [%zu] %d: %lx\n", keylen - 1, diff, blank); + new_s0->index_key[keylen - 1] &= ~blank; + } else { + pr_devel("no pre-shortcut\n"); + edit->set[0].to = assoc_array_node_to_ptr(new_n0); + new_n0->back_pointer = shortcut->back_pointer; + new_n0->parent_slot = shortcut->parent_slot; + } + + side = assoc_array_ptr_to_node(shortcut->next_node); + new_n0->nr_leaves_on_branch = side->nr_leaves_on_branch; + + /* We need to know which slot in the new node is going to take a + * metadata pointer. + */ + sc_slot = sc_segments >> (diff & ASSOC_ARRAY_KEY_CHUNK_MASK); + sc_slot &= ASSOC_ARRAY_FAN_MASK; + + pr_devel("new slot %lx >> %d -> %d\n", + sc_segments, diff & ASSOC_ARRAY_KEY_CHUNK_MASK, sc_slot); + + /* Determine whether we need to follow the new node with a replacement + * for the current shortcut. We could in theory reuse the current + * shortcut if its parent slot number doesn't change - but that's a + * 1-in-16 chance so not worth expending the code upon. + */ + level = diff + ASSOC_ARRAY_LEVEL_STEP; + if (level < shortcut->skip_to_level) { + pr_devel("post-shortcut %d...%d\n", level, shortcut->skip_to_level); + keylen = round_up(shortcut->skip_to_level, ASSOC_ARRAY_KEY_CHUNK_SIZE); + keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT; + + new_s1 = kzalloc(sizeof(struct assoc_array_shortcut) + + keylen * sizeof(unsigned long), GFP_KERNEL); + if (!new_s1) + return false; + edit->new_meta[2] = assoc_array_shortcut_to_ptr(new_s1); + + new_s1->back_pointer = assoc_array_node_to_ptr(new_n0); + new_s1->parent_slot = sc_slot; + new_s1->next_node = shortcut->next_node; + new_s1->skip_to_level = shortcut->skip_to_level; + + new_n0->slots[sc_slot] = assoc_array_shortcut_to_ptr(new_s1); + + memcpy(new_s1->index_key, shortcut->index_key, + keylen * sizeof(unsigned long)); + + edit->set[1].ptr = &side->back_pointer; + edit->set[1].to = assoc_array_shortcut_to_ptr(new_s1); + } else { + pr_devel("no post-shortcut\n"); + + /* We don't have to replace the pointed-to node as long as we + * use memory barriers to make sure the parent slot number is + * changed before the back pointer (the parent slot number is + * irrelevant to the old parent shortcut). + */ + new_n0->slots[sc_slot] = shortcut->next_node; + edit->set_parent_slot[0].p = &side->parent_slot; + edit->set_parent_slot[0].to = sc_slot; + edit->set[1].ptr = &side->back_pointer; + edit->set[1].to = assoc_array_node_to_ptr(new_n0); + } + + /* Install the new leaf in a spare slot in the new node. */ + if (sc_slot == 0) + edit->leaf_p = &new_n0->slots[1]; + else + edit->leaf_p = &new_n0->slots[0]; + + pr_devel("<--%s() = ok [split shortcut]\n", __func__); + return edit; +} + +/** + * assoc_array_insert - Script insertion of an object into an associative array + * @array: The array to insert into. + * @ops: The operations to use. + * @index_key: The key to insert at. + * @object: The object to insert. + * + * Precalculate and preallocate a script for the insertion or replacement of an + * object in an associative array. This results in an edit script that can + * either be applied or cancelled. + * + * The function returns a pointer to an edit script or -ENOMEM. + * + * The caller should lock against other modifications and must continue to hold + * the lock until assoc_array_apply_edit() has been called. + * + * Accesses to the tree may take place concurrently with this function, + * provided they hold the RCU read lock. + */ +struct assoc_array_edit *assoc_array_insert(struct assoc_array *array, + const struct assoc_array_ops *ops, + const void *index_key, + void *object) +{ + struct assoc_array_walk_result result; + struct assoc_array_edit *edit; + + pr_devel("-->%s()\n", __func__); + + /* The leaf pointer we're given must not have the bottom bit set as we + * use those for type-marking the pointer. NULL pointers are also not + * allowed as they indicate an empty slot but we have to allow them + * here as they can be updated later. + */ + BUG_ON(assoc_array_ptr_is_meta(object)); + + edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL); + if (!edit) + return ERR_PTR(-ENOMEM); + edit->array = array; + edit->ops = ops; + edit->leaf = assoc_array_leaf_to_ptr(object); + edit->adjust_count_by = 1; + + switch (assoc_array_walk(array, ops, index_key, &result)) { + case assoc_array_walk_tree_empty: + /* Allocate a root node if there isn't one yet */ + if (!assoc_array_insert_in_empty_tree(edit)) + goto enomem; + return edit; + + case assoc_array_walk_found_terminal_node: + /* We found a node that doesn't have a node/shortcut pointer in + * the slot corresponding to the index key that we have to + * follow. + */ + if (!assoc_array_insert_into_terminal_node(edit, ops, index_key, + &result)) + goto enomem; + return edit; + + case assoc_array_walk_found_wrong_shortcut: + /* We found a shortcut that didn't match our key in a slot we + * needed to follow. + */ + if (!assoc_array_insert_mid_shortcut(edit, ops, &result)) + goto enomem; + return edit; + } + +enomem: + /* Clean up after an out of memory error */ + pr_devel("enomem\n"); + assoc_array_cancel_edit(edit); + return ERR_PTR(-ENOMEM); +} + +/** + * assoc_array_insert_set_object - Set the new object pointer in an edit script + * @edit: The edit script to modify. + * @object: The object pointer to set. + * + * Change the object to be inserted in an edit script. The object pointed to + * by the old object is not freed. This must be done prior to applying the + * script. + */ +void assoc_array_insert_set_object(struct assoc_array_edit *edit, void *object) +{ + BUG_ON(!object); + edit->leaf = assoc_array_leaf_to_ptr(object); +} + +struct assoc_array_delete_collapse_context { + struct assoc_array_node *node; + const void *skip_leaf; + int slot; +}; + +/* + * Subtree collapse to node iterator. + */ +static int assoc_array_delete_collapse_iterator(const void *leaf, + void *iterator_data) +{ + struct assoc_array_delete_collapse_context *collapse = iterator_data; + + if (leaf == collapse->skip_leaf) + return 0; + + BUG_ON(collapse->slot >= ASSOC_ARRAY_FAN_OUT); + + collapse->node->slots[collapse->slot++] = assoc_array_leaf_to_ptr(leaf); + return 0; +} + +/** + * assoc_array_delete - Script deletion of an object from an associative array + * @array: The array to search. + * @ops: The operations to use. + * @index_key: The key to the object. + * + * Precalculate and preallocate a script for the deletion of an object from an + * associative array. This results in an edit script that can either be + * applied or cancelled. + * + * The function returns a pointer to an edit script if the object was found, + * NULL if the object was not found or -ENOMEM. + * + * The caller should lock against other modifications and must continue to hold + * the lock until assoc_array_apply_edit() has been called. + * + * Accesses to the tree may take place concurrently with this function, + * provided they hold the RCU read lock. + */ +struct assoc_array_edit *assoc_array_delete(struct assoc_array *array, + const struct assoc_array_ops *ops, + const void *index_key) +{ + struct assoc_array_delete_collapse_context collapse; + struct assoc_array_walk_result result; + struct assoc_array_node *node, *new_n0; + struct assoc_array_edit *edit; + struct assoc_array_ptr *ptr; + bool has_meta; + int slot, i; + + pr_devel("-->%s()\n", __func__); + + edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL); + if (!edit) + return ERR_PTR(-ENOMEM); + edit->array = array; + edit->ops = ops; + edit->adjust_count_by = -1; + + switch (assoc_array_walk(array, ops, index_key, &result)) { + case assoc_array_walk_found_terminal_node: + /* We found a node that should contain the leaf we've been + * asked to remove - *if* it's in the tree. + */ + pr_devel("terminal_node\n"); + node = result.terminal_node.node; + + for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) { + ptr = node->slots[slot]; + if (ptr && + assoc_array_ptr_is_leaf(ptr) && + ops->compare_object(assoc_array_ptr_to_leaf(ptr), + index_key)) + goto found_leaf; + } + case assoc_array_walk_tree_empty: + case assoc_array_walk_found_wrong_shortcut: + default: + assoc_array_cancel_edit(edit); + pr_devel("not found\n"); + return NULL; + } + +found_leaf: + BUG_ON(array->nr_leaves_on_tree <= 0); + + /* In the simplest form of deletion we just clear the slot and release + * the leaf after a suitable interval. + */ + edit->dead_leaf = node->slots[slot]; + edit->set[0].ptr = &node->slots[slot]; + edit->set[0].to = NULL; + edit->adjust_count_on = node; + + /* If that concludes erasure of the last leaf, then delete the entire + * internal array. + */ + if (array->nr_leaves_on_tree == 1) { + edit->set[1].ptr = &array->root; + edit->set[1].to = NULL; + edit->adjust_count_on = NULL; + edit->excised_subtree = array->root; + pr_devel("all gone\n"); + return edit; + } + + /* However, we'd also like to clear up some metadata blocks if we + * possibly can. + * + * We go for a simple algorithm of: if this node has FAN_OUT or fewer + * leaves in it, then attempt to collapse it - and attempt to + * recursively collapse up the tree. + * + * We could also try and collapse in partially filled subtrees to take + * up space in this node. + */ + if (node->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT + 1) { + struct assoc_array_node *parent, *grandparent; + struct assoc_array_ptr *ptr; + + /* First of all, we need to know if this node has metadata so + * that we don't try collapsing if all the leaves are already + * here. + */ + has_meta = false; + for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { + ptr = node->slots[i]; + if (assoc_array_ptr_is_meta(ptr)) { + has_meta = true; + break; + } + } + + pr_devel("leaves: %ld [m=%d]\n", + node->nr_leaves_on_branch - 1, has_meta); + + /* Look further up the tree to see if we can collapse this node + * into a more proximal node too. + */ + parent = node; + collapse_up: + pr_devel("collapse subtree: %ld\n", parent->nr_leaves_on_branch); + + ptr = parent->back_pointer; + if (!ptr) + goto do_collapse; + if (assoc_array_ptr_is_shortcut(ptr)) { + struct assoc_array_shortcut *s = assoc_array_ptr_to_shortcut(ptr); + ptr = s->back_pointer; + if (!ptr) + goto do_collapse; + } + + grandparent = assoc_array_ptr_to_node(ptr); + if (grandparent->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT + 1) { + parent = grandparent; + goto collapse_up; + } + + do_collapse: + /* There's no point collapsing if the original node has no meta + * pointers to discard and if we didn't merge into one of that + * node's ancestry. + */ + if (has_meta || parent != node) { + node = parent; + + /* Create a new node to collapse into */ + new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); + if (!new_n0) + goto enomem; + edit->new_meta[0] = assoc_array_node_to_ptr(new_n0); + + new_n0->back_pointer = node->back_pointer; + new_n0->parent_slot = node->parent_slot; + new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch; + edit->adjust_count_on = new_n0; + + collapse.node = new_n0; + collapse.skip_leaf = assoc_array_ptr_to_leaf(edit->dead_leaf); + collapse.slot = 0; + assoc_array_subtree_iterate(assoc_array_node_to_ptr(node), + node->back_pointer, + assoc_array_delete_collapse_iterator, + &collapse); + pr_devel("collapsed %d,%lu\n", collapse.slot, new_n0->nr_leaves_on_branch); + BUG_ON(collapse.slot != new_n0->nr_leaves_on_branch - 1); + + if (!node->back_pointer) { + edit->set[1].ptr = &array->root; + } else if (assoc_array_ptr_is_leaf(node->back_pointer)) { + BUG(); + } else if (assoc_array_ptr_is_node(node->back_pointer)) { + struct assoc_array_node *p = + assoc_array_ptr_to_node(node->back_pointer); + edit->set[1].ptr = &p->slots[node->parent_slot]; + } else if (assoc_array_ptr_is_shortcut(node->back_pointer)) { + struct assoc_array_shortcut *s = + assoc_array_ptr_to_shortcut(node->back_pointer); + edit->set[1].ptr = &s->next_node; + } + edit->set[1].to = assoc_array_node_to_ptr(new_n0); + edit->excised_subtree = assoc_array_node_to_ptr(node); + } + } + + return edit; + +enomem: + /* Clean up after an out of memory error */ + pr_devel("enomem\n"); + assoc_array_cancel_edit(edit); + return ERR_PTR(-ENOMEM); +} + +/** + * assoc_array_clear - Script deletion of all objects from an associative array + * @array: The array to clear. + * @ops: The operations to use. + * + * Precalculate and preallocate a script for the deletion of all the objects + * from an associative array. This results in an edit script that can either + * be applied or cancelled. + * + * The function returns a pointer to an edit script if there are objects to be + * deleted, NULL if there are no objects in the array or -ENOMEM. + * + * The caller should lock against other modifications and must continue to hold + * the lock until assoc_array_apply_edit() has been called. + * + * Accesses to the tree may take place concurrently with this function, + * provided they hold the RCU read lock. + */ +struct assoc_array_edit *assoc_array_clear(struct assoc_array *array, + const struct assoc_array_ops *ops) +{ + struct assoc_array_edit *edit; + + pr_devel("-->%s()\n", __func__); + + if (!array->root) + return NULL; + + edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL); + if (!edit) + return ERR_PTR(-ENOMEM); + edit->array = array; + edit->ops = ops; + edit->set[1].ptr = &array->root; + edit->set[1].to = NULL; + edit->excised_subtree = array->root; + edit->ops_for_excised_subtree = ops; + pr_devel("all gone\n"); + return edit; +} + +/* + * Handle the deferred destruction after an applied edit. + */ +static void assoc_array_rcu_cleanup(struct rcu_head *head) +{ + struct assoc_array_edit *edit = + container_of(head, struct assoc_array_edit, rcu); + int i; + + pr_devel("-->%s()\n", __func__); + + if (edit->dead_leaf) + edit->ops->free_object(assoc_array_ptr_to_leaf(edit->dead_leaf)); + for (i = 0; i < ARRAY_SIZE(edit->excised_meta); i++) + if (edit->excised_meta[i]) + kfree(assoc_array_ptr_to_node(edit->excised_meta[i])); + + if (edit->excised_subtree) { + BUG_ON(assoc_array_ptr_is_leaf(edit->excised_subtree)); + if (assoc_array_ptr_is_node(edit->excised_subtree)) { + struct assoc_array_node *n = + assoc_array_ptr_to_node(edit->excised_subtree); + n->back_pointer = NULL; + } else { + struct assoc_array_shortcut *s = + assoc_array_ptr_to_shortcut(edit->excised_subtree); + s->back_pointer = NULL; + } + assoc_array_destroy_subtree(edit->excised_subtree, + edit->ops_for_excised_subtree); + } + + kfree(edit); +} + +/** + * assoc_array_apply_edit - Apply an edit script to an associative array + * @edit: The script to apply. + * + * Apply an edit script to an associative array to effect an insertion, + * deletion or clearance. As the edit script includes preallocated memory, + * this is guaranteed not to fail. + * + * The edit script, dead objects and dead metadata will be scheduled for + * destruction after an RCU grace period to permit those doing read-only + * accesses on the array to continue to do so under the RCU read lock whilst + * the edit is taking place. + */ +void assoc_array_apply_edit(struct assoc_array_edit *edit) +{ + struct assoc_array_shortcut *shortcut; + struct assoc_array_node *node; + struct assoc_array_ptr *ptr; + int i; + + pr_devel("-->%s()\n", __func__); + + smp_wmb(); + if (edit->leaf_p) + *edit->leaf_p = edit->leaf; + + smp_wmb(); + for (i = 0; i < ARRAY_SIZE(edit->set_parent_slot); i++) + if (edit->set_parent_slot[i].p) + *edit->set_parent_slot[i].p = edit->set_parent_slot[i].to; + + smp_wmb(); + for (i = 0; i < ARRAY_SIZE(edit->set_backpointers); i++) + if (edit->set_backpointers[i]) + *edit->set_backpointers[i] = edit->set_backpointers_to; + + smp_wmb(); + for (i = 0; i < ARRAY_SIZE(edit->set); i++) + if (edit->set[i].ptr) + *edit->set[i].ptr = edit->set[i].to; + + if (edit->array->root == NULL) { + edit->array->nr_leaves_on_tree = 0; + } else if (edit->adjust_count_on) { + node = edit->adjust_count_on; + for (;;) { + node->nr_leaves_on_branch += edit->adjust_count_by; + + ptr = node->back_pointer; + if (!ptr) + break; + if (assoc_array_ptr_is_shortcut(ptr)) { + shortcut = assoc_array_ptr_to_shortcut(ptr); + ptr = shortcut->back_pointer; + if (!ptr) + break; + } + BUG_ON(!assoc_array_ptr_is_node(ptr)); + node = assoc_array_ptr_to_node(ptr); + } + + edit->array->nr_leaves_on_tree += edit->adjust_count_by; + } + + call_rcu(&edit->rcu, assoc_array_rcu_cleanup); +} + +/** + * assoc_array_cancel_edit - Discard an edit script. + * @edit: The script to discard. + * + * Free an edit script and all the preallocated data it holds without making + * any changes to the associative array it was intended for. + * + * NOTE! In the case of an insertion script, this does _not_ release the leaf + * that was to be inserted. That is left to the caller. + */ +void assoc_array_cancel_edit(struct assoc_array_edit *edit) +{ + struct assoc_array_ptr *ptr; + int i; + + pr_devel("-->%s()\n", __func__); + + /* Clean up after an out of memory error */ + for (i = 0; i < ARRAY_SIZE(edit->new_meta); i++) { + ptr = edit->new_meta[i]; + if (ptr) { + if (assoc_array_ptr_is_node(ptr)) + kfree(assoc_array_ptr_to_node(ptr)); + else + kfree(assoc_array_ptr_to_shortcut(ptr)); + } + } + kfree(edit); +} + +/** + * assoc_array_gc - Garbage collect an associative array. + * @array: The array to clean. + * @ops: The operations to use. + * @iterator: A callback function to pass judgement on each object. + * @iterator_data: Private data for the callback function. + * + * Collect garbage from an associative array and pack down the internal tree to + * save memory. + * + * The iterator function is asked to pass judgement upon each object in the + * array. If it returns false, the object is discard and if it returns true, + * the object is kept. If it returns true, it must increment the object's + * usage count (or whatever it needs to do to retain it) before returning. + * + * This function returns 0 if successful or -ENOMEM if out of memory. In the + * latter case, the array is not changed. + * + * The caller should lock against other modifications and must continue to hold + * the lock until assoc_array_apply_edit() has been called. + * + * Accesses to the tree may take place concurrently with this function, + * provided they hold the RCU read lock. + */ +int assoc_array_gc(struct assoc_array *array, + const struct assoc_array_ops *ops, + bool (*iterator)(void *object, void *iterator_data), + void *iterator_data) +{ + struct assoc_array_shortcut *shortcut, *new_s; + struct assoc_array_node *node, *new_n; + struct assoc_array_edit *edit; + struct assoc_array_ptr *cursor, *ptr; + struct assoc_array_ptr *new_root, *new_parent, **new_ptr_pp; + unsigned long nr_leaves_on_tree; + int keylen, slot, nr_free, next_slot, i; + + pr_devel("-->%s()\n", __func__); + + if (!array->root) + return 0; + + edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL); + if (!edit) + return -ENOMEM; + edit->array = array; + edit->ops = ops; + edit->ops_for_excised_subtree = ops; + edit->set[0].ptr = &array->root; + edit->excised_subtree = array->root; + + new_root = new_parent = NULL; + new_ptr_pp = &new_root; + cursor = array->root; + +descend: + /* If this point is a shortcut, then we need to duplicate it and + * advance the target cursor. + */ + if (assoc_array_ptr_is_shortcut(cursor)) { + shortcut = assoc_array_ptr_to_shortcut(cursor); + keylen = round_up(shortcut->skip_to_level, ASSOC_ARRAY_KEY_CHUNK_SIZE); + keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT; + new_s = kmalloc(sizeof(struct assoc_array_shortcut) + + keylen * sizeof(unsigned long), GFP_KERNEL); + if (!new_s) + goto enomem; + pr_devel("dup shortcut %p -> %p\n", shortcut, new_s); + memcpy(new_s, shortcut, (sizeof(struct assoc_array_shortcut) + + keylen * sizeof(unsigned long))); + new_s->back_pointer = new_parent; + new_s->parent_slot = shortcut->parent_slot; + *new_ptr_pp = new_parent = assoc_array_shortcut_to_ptr(new_s); + new_ptr_pp = &new_s->next_node; + cursor = shortcut->next_node; + } + + /* Duplicate the node at this position */ + node = assoc_array_ptr_to_node(cursor); + new_n = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); + if (!new_n) + goto enomem; + pr_devel("dup node %p -> %p\n", node, new_n); + new_n->back_pointer = new_parent; + new_n->parent_slot = node->parent_slot; + *new_ptr_pp = new_parent = assoc_array_node_to_ptr(new_n); + new_ptr_pp = NULL; + slot = 0; + +continue_node: + /* Filter across any leaves and gc any subtrees */ + for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { + ptr = node->slots[slot]; + if (!ptr) + continue; + + if (assoc_array_ptr_is_leaf(ptr)) { + if (iterator(assoc_array_ptr_to_leaf(ptr), + iterator_data)) + /* The iterator will have done any reference + * counting on the object for us. + */ + new_n->slots[slot] = ptr; + continue; + } + + new_ptr_pp = &new_n->slots[slot]; + cursor = ptr; + goto descend; + } + + pr_devel("-- compress node %p --\n", new_n); + + /* Count up the number of empty slots in this node and work out the + * subtree leaf count. + */ + new_n->nr_leaves_on_branch = 0; + nr_free = 0; + for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) { + ptr = new_n->slots[slot]; + if (!ptr) + nr_free++; + else if (assoc_array_ptr_is_leaf(ptr)) + new_n->nr_leaves_on_branch++; + } + pr_devel("free=%d, leaves=%lu\n", nr_free, new_n->nr_leaves_on_branch); + + /* See what we can fold in */ + next_slot = 0; + for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) { + struct assoc_array_shortcut *s; + struct assoc_array_node *child; + + ptr = new_n->slots[slot]; + if (!ptr || assoc_array_ptr_is_leaf(ptr)) + continue; + + s = NULL; + if (assoc_array_ptr_is_shortcut(ptr)) { + s = assoc_array_ptr_to_shortcut(ptr); + ptr = s->next_node; + } + + child = assoc_array_ptr_to_node(ptr); + new_n->nr_leaves_on_branch += child->nr_leaves_on_branch; + + if (child->nr_leaves_on_branch <= nr_free + 1) { + /* Fold the child node into this one */ + pr_devel("[%d] fold node %lu/%d [nx %d]\n", + slot, child->nr_leaves_on_branch, nr_free + 1, + next_slot); + + /* We would already have reaped an intervening shortcut + * on the way back up the tree. + */ + BUG_ON(s); + + new_n->slots[slot] = NULL; + nr_free++; + if (slot < next_slot) + next_slot = slot; + for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { + struct assoc_array_ptr *p = child->slots[i]; + if (!p) + continue; + BUG_ON(assoc_array_ptr_is_meta(p)); + while (new_n->slots[next_slot]) + next_slot++; + BUG_ON(next_slot >= ASSOC_ARRAY_FAN_OUT); + new_n->slots[next_slot++] = p; + nr_free--; + } + kfree(child); + } else { + pr_devel("[%d] retain node %lu/%d [nx %d]\n", + slot, child->nr_leaves_on_branch, nr_free + 1, + next_slot); + } + } + + pr_devel("after: %lu\n", new_n->nr_leaves_on_branch); + + nr_leaves_on_tree = new_n->nr_leaves_on_branch; + + /* Excise this node if it is singly occupied by a shortcut */ + if (nr_free == ASSOC_ARRAY_FAN_OUT - 1) { + for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) + if ((ptr = new_n->slots[slot])) + break; + + if (assoc_array_ptr_is_meta(ptr) && + assoc_array_ptr_is_shortcut(ptr)) { + pr_devel("excise node %p with 1 shortcut\n", new_n); + new_s = assoc_array_ptr_to_shortcut(ptr); + new_parent = new_n->back_pointer; + slot = new_n->parent_slot; + kfree(new_n); + if (!new_parent) { + new_s->back_pointer = NULL; + new_s->parent_slot = 0; + new_root = ptr; + goto gc_complete; + } + + if (assoc_array_ptr_is_shortcut(new_parent)) { + /* We can discard any preceding shortcut also */ + struct assoc_array_shortcut *s = + assoc_array_ptr_to_shortcut(new_parent); + + pr_devel("excise preceding shortcut\n"); + + new_parent = new_s->back_pointer = s->back_pointer; + slot = new_s->parent_slot = s->parent_slot; + kfree(s); + if (!new_parent) { + new_s->back_pointer = NULL; + new_s->parent_slot = 0; + new_root = ptr; + goto gc_complete; + } + } + + new_s->back_pointer = new_parent; + new_s->parent_slot = slot; + new_n = assoc_array_ptr_to_node(new_parent); + new_n->slots[slot] = ptr; + goto ascend_old_tree; + } + } + + /* Excise any shortcuts we might encounter that point to nodes that + * only contain leaves. + */ + ptr = new_n->back_pointer; + if (!ptr) + goto gc_complete; + + if (assoc_array_ptr_is_shortcut(ptr)) { + new_s = assoc_array_ptr_to_shortcut(ptr); + new_parent = new_s->back_pointer; + slot = new_s->parent_slot; + + if (new_n->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT) { + struct assoc_array_node *n; + + pr_devel("excise shortcut\n"); + new_n->back_pointer = new_parent; + new_n->parent_slot = slot; + kfree(new_s); + if (!new_parent) { + new_root = assoc_array_node_to_ptr(new_n); + goto gc_complete; + } + + n = assoc_array_ptr_to_node(new_parent); + n->slots[slot] = assoc_array_node_to_ptr(new_n); + } + } else { + new_parent = ptr; + } + new_n = assoc_array_ptr_to_node(new_parent); + +ascend_old_tree: + ptr = node->back_pointer; + if (assoc_array_ptr_is_shortcut(ptr)) { + shortcut = assoc_array_ptr_to_shortcut(ptr); + slot = shortcut->parent_slot; + cursor = shortcut->back_pointer; + } else { + slot = node->parent_slot; + cursor = ptr; + } + BUG_ON(!ptr); + node = assoc_array_ptr_to_node(cursor); + slot++; + goto continue_node; + +gc_complete: + edit->set[0].to = new_root; + assoc_array_apply_edit(edit); + edit->array->nr_leaves_on_tree = nr_leaves_on_tree; + return 0; + +enomem: + pr_devel("enomem\n"); + assoc_array_destroy_subtree(new_root, edit->ops); + kfree(edit); + return -ENOMEM; +} diff --git a/lib/mpi/mpiutil.c b/lib/mpi/mpiutil.c index 657979f..bf076d2 100644 --- a/lib/mpi/mpiutil.c +++ b/lib/mpi/mpiutil.c @@ -121,3 +121,6 @@ void mpi_free(MPI a) kfree(a); } EXPORT_SYMBOL_GPL(mpi_free); + +MODULE_DESCRIPTION("Multiprecision maths library"); +MODULE_LICENSE("GPL"); diff --git a/lib/percpu_ida.c b/lib/percpu_ida.c index b0698ea..9d054bf 100644 --- a/lib/percpu_ida.c +++ b/lib/percpu_ida.c @@ -117,8 +117,7 @@ static inline void alloc_global_tags(struct percpu_ida *pool, min(pool->nr_free, pool->percpu_batch_size)); } -static inline unsigned alloc_local_tag(struct percpu_ida *pool, - struct percpu_ida_cpu *tags) +static inline unsigned alloc_local_tag(struct percpu_ida_cpu *tags) { int tag = -ENOSPC; @@ -159,7 +158,7 @@ int percpu_ida_alloc(struct percpu_ida *pool, gfp_t gfp) tags = this_cpu_ptr(pool->tag_cpu); /* Fastpath */ - tag = alloc_local_tag(pool, tags); + tag = alloc_local_tag(tags); if (likely(tag >= 0)) { local_irq_restore(flags); return tag; diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 7d57af2..dee6cf4 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -476,40 +476,6 @@ static int vma_has_reserves(struct vm_area_struct *vma, long chg) return 0; } -static void copy_gigantic_page(struct page *dst, struct page *src) -{ - int i; - struct hstate *h = page_hstate(src); - struct page *dst_base = dst; - struct page *src_base = src; - - for (i = 0; i < pages_per_huge_page(h); ) { - cond_resched(); - copy_highpage(dst, src); - - i++; - dst = mem_map_next(dst, dst_base, i); - src = mem_map_next(src, src_base, i); - } -} - -void copy_huge_page(struct page *dst, struct page *src) -{ - int i; - struct hstate *h = page_hstate(src); - - if (unlikely(pages_per_huge_page(h) > MAX_ORDER_NR_PAGES)) { - copy_gigantic_page(dst, src); - return; - } - - might_sleep(); - for (i = 0; i < pages_per_huge_page(h); i++) { - cond_resched(); - copy_highpage(dst + i, src + i); - } -} - static void enqueue_huge_page(struct hstate *h, struct page *page) { int nid = page_to_nid(page); @@ -736,6 +702,23 @@ int PageHuge(struct page *page) } EXPORT_SYMBOL_GPL(PageHuge); +/* + * PageHeadHuge() only returns true for hugetlbfs head page, but not for + * normal or transparent huge pages. + */ +int PageHeadHuge(struct page *page_head) +{ + compound_page_dtor *dtor; + + if (!PageHead(page_head)) + return 0; + + dtor = get_compound_page_dtor(page_head); + + return dtor == free_huge_page; +} +EXPORT_SYMBOL_GPL(PageHeadHuge); + pgoff_t __basepage_index(struct page *page) { struct page *page_head = compound_head(page); diff --git a/mm/mempolicy.c b/mm/mempolicy.c index c4403cd..eca4a31 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2950,7 +2950,7 @@ void mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol) return; } - p += snprintf(p, maxlen, policy_modes[mode]); + p += snprintf(p, maxlen, "%s", policy_modes[mode]); if (flags & MPOL_MODE_FLAGS) { p += snprintf(p, buffer + maxlen - p, "="); diff --git a/mm/migrate.c b/mm/migrate.c index 316e720..bb94004 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -442,6 +442,54 @@ int migrate_huge_page_move_mapping(struct address_space *mapping, } /* + * Gigantic pages are so large that we do not guarantee that page++ pointer + * arithmetic will work across the entire page. We need something more + * specialized. + */ +static void __copy_gigantic_page(struct page *dst, struct page *src, + int nr_pages) +{ + int i; + struct page *dst_base = dst; + struct page *src_base = src; + + for (i = 0; i < nr_pages; ) { + cond_resched(); + copy_highpage(dst, src); + + i++; + dst = mem_map_next(dst, dst_base, i); + src = mem_map_next(src, src_base, i); + } +} + +static void copy_huge_page(struct page *dst, struct page *src) +{ + int i; + int nr_pages; + + if (PageHuge(src)) { + /* hugetlbfs page */ + struct hstate *h = page_hstate(src); + nr_pages = pages_per_huge_page(h); + + if (unlikely(nr_pages > MAX_ORDER_NR_PAGES)) { + __copy_gigantic_page(dst, src, nr_pages); + return; + } + } else { + /* thp page */ + BUG_ON(!PageTransHuge(src)); + nr_pages = hpage_nr_pages(src); + } + + for (i = 0; i < nr_pages; i++) { + cond_resched(); + copy_highpage(dst + i, src + i); + } +} + +/* * Copy the page to its new location */ void migrate_page_copy(struct page *newpage, struct page *page) @@ -164,72 +164,6 @@ static bool pfmemalloc_active __read_mostly; /* - * kmem_bufctl_t: - * - * Bufctl's are used for linking objs within a slab - * linked offsets. - * - * This implementation relies on "struct page" for locating the cache & - * slab an object belongs to. - * This allows the bufctl structure to be small (one int), but limits - * the number of objects a slab (not a cache) can contain when off-slab - * bufctls are used. The limit is the size of the largest general cache - * that does not use off-slab slabs. - * For 32bit archs with 4 kB pages, is this 56. - * This is not serious, as it is only for large objects, when it is unwise - * to have too many per slab. - * Note: This limit can be raised by introducing a general cache whose size - * is less than 512 (PAGE_SIZE<<3), but greater than 256. - */ - -typedef unsigned int kmem_bufctl_t; -#define BUFCTL_END (((kmem_bufctl_t)(~0U))-0) -#define BUFCTL_FREE (((kmem_bufctl_t)(~0U))-1) -#define BUFCTL_ACTIVE (((kmem_bufctl_t)(~0U))-2) -#define SLAB_LIMIT (((kmem_bufctl_t)(~0U))-3) - -/* - * struct slab_rcu - * - * slab_destroy on a SLAB_DESTROY_BY_RCU cache uses this structure to - * arrange for kmem_freepages to be called via RCU. This is useful if - * we need to approach a kernel structure obliquely, from its address - * obtained without the usual locking. We can lock the structure to - * stabilize it and check it's still at the given address, only if we - * can be sure that the memory has not been meanwhile reused for some - * other kind of object (which our subsystem's lock might corrupt). - * - * rcu_read_lock before reading the address, then rcu_read_unlock after - * taking the spinlock within the structure expected at that address. - */ -struct slab_rcu { - struct rcu_head head; - struct kmem_cache *cachep; - void *addr; -}; - -/* - * struct slab - * - * Manages the objs in a slab. Placed either at the beginning of mem allocated - * for a slab, or allocated from an general cache. - * Slabs are chained into three list: fully used, partial, fully free slabs. - */ -struct slab { - union { - struct { - struct list_head list; - unsigned long colouroff; - void *s_mem; /* including colour offset */ - unsigned int inuse; /* num of objs active in slab */ - kmem_bufctl_t free; - unsigned short nodeid; - }; - struct slab_rcu __slab_cover_slab_rcu; - }; -}; - -/* * struct array_cache * * Purpose: @@ -456,18 +390,10 @@ static inline struct kmem_cache *virt_to_cache(const void *obj) return page->slab_cache; } -static inline struct slab *virt_to_slab(const void *obj) -{ - struct page *page = virt_to_head_page(obj); - - VM_BUG_ON(!PageSlab(page)); - return page->slab_page; -} - -static inline void *index_to_obj(struct kmem_cache *cache, struct slab *slab, +static inline void *index_to_obj(struct kmem_cache *cache, struct page *page, unsigned int idx) { - return slab->s_mem + cache->size * idx; + return page->s_mem + cache->size * idx; } /* @@ -477,9 +403,9 @@ static inline void *index_to_obj(struct kmem_cache *cache, struct slab *slab, * reciprocal_divide(offset, cache->reciprocal_buffer_size) */ static inline unsigned int obj_to_index(const struct kmem_cache *cache, - const struct slab *slab, void *obj) + const struct page *page, void *obj) { - u32 offset = (obj - slab->s_mem); + u32 offset = (obj - page->s_mem); return reciprocal_divide(offset, cache->reciprocal_buffer_size); } @@ -641,7 +567,7 @@ static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep) static size_t slab_mgmt_size(size_t nr_objs, size_t align) { - return ALIGN(sizeof(struct slab)+nr_objs*sizeof(kmem_bufctl_t), align); + return ALIGN(nr_objs * sizeof(unsigned int), align); } /* @@ -660,8 +586,7 @@ static void cache_estimate(unsigned long gfporder, size_t buffer_size, * on it. For the latter case, the memory allocated for a * slab is used for: * - * - The struct slab - * - One kmem_bufctl_t for each object + * - One unsigned int for each object * - Padding to respect alignment of @align * - @buffer_size bytes for each object * @@ -674,8 +599,6 @@ static void cache_estimate(unsigned long gfporder, size_t buffer_size, mgmt_size = 0; nr_objs = slab_size / buffer_size; - if (nr_objs > SLAB_LIMIT) - nr_objs = SLAB_LIMIT; } else { /* * Ignore padding for the initial guess. The padding @@ -685,8 +608,7 @@ static void cache_estimate(unsigned long gfporder, size_t buffer_size, * into the memory allocation when taking the padding * into account. */ - nr_objs = (slab_size - sizeof(struct slab)) / - (buffer_size + sizeof(kmem_bufctl_t)); + nr_objs = (slab_size) / (buffer_size + sizeof(unsigned int)); /* * This calculated number will be either the right @@ -696,9 +618,6 @@ static void cache_estimate(unsigned long gfporder, size_t buffer_size, > slab_size) nr_objs--; - if (nr_objs > SLAB_LIMIT) - nr_objs = SLAB_LIMIT; - mgmt_size = slab_mgmt_size(nr_objs, align); } *num = nr_objs; @@ -829,10 +748,8 @@ static struct array_cache *alloc_arraycache(int node, int entries, return nc; } -static inline bool is_slab_pfmemalloc(struct slab *slabp) +static inline bool is_slab_pfmemalloc(struct page *page) { - struct page *page = virt_to_page(slabp->s_mem); - return PageSlabPfmemalloc(page); } @@ -841,23 +758,23 @@ static void recheck_pfmemalloc_active(struct kmem_cache *cachep, struct array_cache *ac) { struct kmem_cache_node *n = cachep->node[numa_mem_id()]; - struct slab *slabp; + struct page *page; unsigned long flags; if (!pfmemalloc_active) return; spin_lock_irqsave(&n->list_lock, flags); - list_for_each_entry(slabp, &n->slabs_full, list) - if (is_slab_pfmemalloc(slabp)) + list_for_each_entry(page, &n->slabs_full, lru) + if (is_slab_pfmemalloc(page)) goto out; - list_for_each_entry(slabp, &n->slabs_partial, list) - if (is_slab_pfmemalloc(slabp)) + list_for_each_entry(page, &n->slabs_partial, lru) + if (is_slab_pfmemalloc(page)) goto out; - list_for_each_entry(slabp, &n->slabs_free, list) - if (is_slab_pfmemalloc(slabp)) + list_for_each_entry(page, &n->slabs_free, lru) + if (is_slab_pfmemalloc(page)) goto out; pfmemalloc_active = false; @@ -897,8 +814,8 @@ static void *__ac_get_obj(struct kmem_cache *cachep, struct array_cache *ac, */ n = cachep->node[numa_mem_id()]; if (!list_empty(&n->slabs_free) && force_refill) { - struct slab *slabp = virt_to_slab(objp); - ClearPageSlabPfmemalloc(virt_to_head_page(slabp->s_mem)); + struct page *page = virt_to_head_page(objp); + ClearPageSlabPfmemalloc(page); clear_obj_pfmemalloc(&objp); recheck_pfmemalloc_active(cachep, ac); return objp; @@ -1099,8 +1016,7 @@ static void drain_alien_cache(struct kmem_cache *cachep, static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) { - struct slab *slabp = virt_to_slab(objp); - int nodeid = slabp->nodeid; + int nodeid = page_to_nid(virt_to_page(objp)); struct kmem_cache_node *n; struct array_cache *alien = NULL; int node; @@ -1111,7 +1027,7 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) * Make sure we are not freeing a object from another node to the array * cache on this cpu. */ - if (likely(slabp->nodeid == node)) + if (likely(nodeid == node)) return 0; n = cachep->node[node]; @@ -1512,6 +1428,8 @@ void __init kmem_cache_init(void) { int i; + BUILD_BUG_ON(sizeof(((struct page *)NULL)->lru) < + sizeof(struct rcu_head)); kmem_cache = &kmem_cache_boot; setup_node_pointer(kmem_cache); @@ -1687,7 +1605,7 @@ static noinline void slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid) { struct kmem_cache_node *n; - struct slab *slabp; + struct page *page; unsigned long flags; int node; @@ -1706,15 +1624,15 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid) continue; spin_lock_irqsave(&n->list_lock, flags); - list_for_each_entry(slabp, &n->slabs_full, list) { + list_for_each_entry(page, &n->slabs_full, lru) { active_objs += cachep->num; active_slabs++; } - list_for_each_entry(slabp, &n->slabs_partial, list) { - active_objs += slabp->inuse; + list_for_each_entry(page, &n->slabs_partial, lru) { + active_objs += page->active; active_slabs++; } - list_for_each_entry(slabp, &n->slabs_free, list) + list_for_each_entry(page, &n->slabs_free, lru) num_slabs++; free_objects += n->free_objects; @@ -1736,19 +1654,11 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid) * did not request dmaable memory, we might get it, but that * would be relatively rare and ignorable. */ -static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid) +static struct page *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, + int nodeid) { struct page *page; int nr_pages; - int i; - -#ifndef CONFIG_MMU - /* - * Nommu uses slab's for process anonymous memory allocations, and thus - * requires __GFP_COMP to properly refcount higher order allocations - */ - flags |= __GFP_COMP; -#endif flags |= cachep->allocflags; if (cachep->flags & SLAB_RECLAIM_ACCOUNT) @@ -1772,12 +1682,9 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid) else add_zone_page_state(page_zone(page), NR_SLAB_UNRECLAIMABLE, nr_pages); - for (i = 0; i < nr_pages; i++) { - __SetPageSlab(page + i); - - if (page->pfmemalloc) - SetPageSlabPfmemalloc(page + i); - } + __SetPageSlab(page); + if (page->pfmemalloc) + SetPageSlabPfmemalloc(page); memcg_bind_pages(cachep, cachep->gfporder); if (kmemcheck_enabled && !(cachep->flags & SLAB_NOTRACK)) { @@ -1789,17 +1696,15 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid) kmemcheck_mark_unallocated_pages(page, nr_pages); } - return page_address(page); + return page; } /* * Interface to system's page release. */ -static void kmem_freepages(struct kmem_cache *cachep, void *addr) +static void kmem_freepages(struct kmem_cache *cachep, struct page *page) { - unsigned long i = (1 << cachep->gfporder); - struct page *page = virt_to_page(addr); - const unsigned long nr_freed = i; + const unsigned long nr_freed = (1 << cachep->gfporder); kmemcheck_free_shadow(page, cachep->gfporder); @@ -1809,27 +1714,28 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr) else sub_zone_page_state(page_zone(page), NR_SLAB_UNRECLAIMABLE, nr_freed); - while (i--) { - BUG_ON(!PageSlab(page)); - __ClearPageSlabPfmemalloc(page); - __ClearPageSlab(page); - page++; - } + + BUG_ON(!PageSlab(page)); + __ClearPageSlabPfmemalloc(page); + __ClearPageSlab(page); + page_mapcount_reset(page); + page->mapping = NULL; memcg_release_pages(cachep, cachep->gfporder); if (current->reclaim_state) current->reclaim_state->reclaimed_slab += nr_freed; - free_memcg_kmem_pages((unsigned long)addr, cachep->gfporder); + __free_memcg_kmem_pages(page, cachep->gfporder); } static void kmem_rcu_free(struct rcu_head *head) { - struct slab_rcu *slab_rcu = (struct slab_rcu *)head; - struct kmem_cache *cachep = slab_rcu->cachep; + struct kmem_cache *cachep; + struct page *page; - kmem_freepages(cachep, slab_rcu->addr); - if (OFF_SLAB(cachep)) - kmem_cache_free(cachep->slabp_cache, slab_rcu); + page = container_of(head, struct page, rcu_head); + cachep = page->slab_cache; + + kmem_freepages(cachep, page); } #if DEBUG @@ -1978,19 +1884,19 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp) /* Print some data about the neighboring objects, if they * exist: */ - struct slab *slabp = virt_to_slab(objp); + struct page *page = virt_to_head_page(objp); unsigned int objnr; - objnr = obj_to_index(cachep, slabp, objp); + objnr = obj_to_index(cachep, page, objp); if (objnr) { - objp = index_to_obj(cachep, slabp, objnr - 1); + objp = index_to_obj(cachep, page, objnr - 1); realobj = (char *)objp + obj_offset(cachep); printk(KERN_ERR "Prev obj: start=%p, len=%d\n", realobj, size); print_objinfo(cachep, objp, 2); } if (objnr + 1 < cachep->num) { - objp = index_to_obj(cachep, slabp, objnr + 1); + objp = index_to_obj(cachep, page, objnr + 1); realobj = (char *)objp + obj_offset(cachep); printk(KERN_ERR "Next obj: start=%p, len=%d\n", realobj, size); @@ -2001,11 +1907,12 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp) #endif #if DEBUG -static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slabp) +static void slab_destroy_debugcheck(struct kmem_cache *cachep, + struct page *page) { int i; for (i = 0; i < cachep->num; i++) { - void *objp = index_to_obj(cachep, slabp, i); + void *objp = index_to_obj(cachep, page, i); if (cachep->flags & SLAB_POISON) { #ifdef CONFIG_DEBUG_PAGEALLOC @@ -2030,7 +1937,8 @@ static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slab } } #else -static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slabp) +static void slab_destroy_debugcheck(struct kmem_cache *cachep, + struct page *page) { } #endif @@ -2044,23 +1952,34 @@ static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slab * Before calling the slab must have been unlinked from the cache. The * cache-lock is not held/needed. */ -static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp) +static void slab_destroy(struct kmem_cache *cachep, struct page *page) { - void *addr = slabp->s_mem - slabp->colouroff; + void *freelist; - slab_destroy_debugcheck(cachep, slabp); + freelist = page->freelist; + slab_destroy_debugcheck(cachep, page); if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) { - struct slab_rcu *slab_rcu; + struct rcu_head *head; + + /* + * RCU free overloads the RCU head over the LRU. + * slab_page has been overloeaded over the LRU, + * however it is not used from now on so that + * we can use it safely. + */ + head = (void *)&page->rcu_head; + call_rcu(head, kmem_rcu_free); - slab_rcu = (struct slab_rcu *)slabp; - slab_rcu->cachep = cachep; - slab_rcu->addr = addr; - call_rcu(&slab_rcu->head, kmem_rcu_free); } else { - kmem_freepages(cachep, addr); - if (OFF_SLAB(cachep)) - kmem_cache_free(cachep->slabp_cache, slabp); + kmem_freepages(cachep, page); } + + /* + * From now on, we don't use freelist + * although actual page can be freed in rcu context + */ + if (OFF_SLAB(cachep)) + kmem_cache_free(cachep->freelist_cache, freelist); } /** @@ -2097,8 +2016,8 @@ static size_t calculate_slab_order(struct kmem_cache *cachep, * use off-slab slabs. Needed to avoid a possible * looping condition in cache_grow(). */ - offslab_limit = size - sizeof(struct slab); - offslab_limit /= sizeof(kmem_bufctl_t); + offslab_limit = size; + offslab_limit /= sizeof(unsigned int); if (num > offslab_limit) break; @@ -2220,7 +2139,7 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp) int __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) { - size_t left_over, slab_size, ralign; + size_t left_over, freelist_size, ralign; gfp_t gfp; int err; size_t size = cachep->size; @@ -2339,22 +2258,21 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) if (!cachep->num) return -E2BIG; - slab_size = ALIGN(cachep->num * sizeof(kmem_bufctl_t) - + sizeof(struct slab), cachep->align); + freelist_size = + ALIGN(cachep->num * sizeof(unsigned int), cachep->align); /* * If the slab has been placed off-slab, and we have enough space then * move it on-slab. This is at the expense of any extra colouring. */ - if (flags & CFLGS_OFF_SLAB && left_over >= slab_size) { + if (flags & CFLGS_OFF_SLAB && left_over >= freelist_size) { flags &= ~CFLGS_OFF_SLAB; - left_over -= slab_size; + left_over -= freelist_size; } if (flags & CFLGS_OFF_SLAB) { /* really off slab. No need for manual alignment */ - slab_size = - cachep->num * sizeof(kmem_bufctl_t) + sizeof(struct slab); + freelist_size = cachep->num * sizeof(unsigned int); #ifdef CONFIG_PAGE_POISONING /* If we're going to use the generic kernel_map_pages() @@ -2371,16 +2289,16 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) if (cachep->colour_off < cachep->align) cachep->colour_off = cachep->align; cachep->colour = left_over / cachep->colour_off; - cachep->slab_size = slab_size; + cachep->freelist_size = freelist_size; cachep->flags = flags; - cachep->allocflags = 0; + cachep->allocflags = __GFP_COMP; if (CONFIG_ZONE_DMA_FLAG && (flags & SLAB_CACHE_DMA)) cachep->allocflags |= GFP_DMA; cachep->size = size; cachep->reciprocal_buffer_size = reciprocal_value(size); if (flags & CFLGS_OFF_SLAB) { - cachep->slabp_cache = kmalloc_slab(slab_size, 0u); + cachep->freelist_cache = kmalloc_slab(freelist_size, 0u); /* * This is a possibility for one of the malloc_sizes caches. * But since we go off slab only for object size greater than @@ -2388,7 +2306,7 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) * this should not happen at all. * But leave a BUG_ON for some lucky dude. */ - BUG_ON(ZERO_OR_NULL_PTR(cachep->slabp_cache)); + BUG_ON(ZERO_OR_NULL_PTR(cachep->freelist_cache)); } err = setup_cpu_cache(cachep, gfp); @@ -2494,7 +2412,7 @@ static int drain_freelist(struct kmem_cache *cache, { struct list_head *p; int nr_freed; - struct slab *slabp; + struct page *page; nr_freed = 0; while (nr_freed < tofree && !list_empty(&n->slabs_free)) { @@ -2506,18 +2424,18 @@ static int drain_freelist(struct kmem_cache *cache, goto out; } - slabp = list_entry(p, struct slab, list); + page = list_entry(p, struct page, lru); #if DEBUG - BUG_ON(slabp->inuse); + BUG_ON(page->active); #endif - list_del(&slabp->list); + list_del(&page->lru); /* * Safe to drop the lock. The slab is no longer linked * to the cache. */ n->free_objects -= cache->num; spin_unlock_irq(&n->list_lock); - slab_destroy(cache, slabp); + slab_destroy(cache, page); nr_freed++; } out: @@ -2600,52 +2518,42 @@ int __kmem_cache_shutdown(struct kmem_cache *cachep) * descriptors in kmem_cache_create, we search through the malloc_sizes array. * If we are creating a malloc_sizes cache here it would not be visible to * kmem_find_general_cachep till the initialization is complete. - * Hence we cannot have slabp_cache same as the original cache. + * Hence we cannot have freelist_cache same as the original cache. */ -static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp, - int colour_off, gfp_t local_flags, - int nodeid) +static void *alloc_slabmgmt(struct kmem_cache *cachep, + struct page *page, int colour_off, + gfp_t local_flags, int nodeid) { - struct slab *slabp; + void *freelist; + void *addr = page_address(page); if (OFF_SLAB(cachep)) { /* Slab management obj is off-slab. */ - slabp = kmem_cache_alloc_node(cachep->slabp_cache, + freelist = kmem_cache_alloc_node(cachep->freelist_cache, local_flags, nodeid); - /* - * If the first object in the slab is leaked (it's allocated - * but no one has a reference to it), we want to make sure - * kmemleak does not treat the ->s_mem pointer as a reference - * to the object. Otherwise we will not report the leak. - */ - kmemleak_scan_area(&slabp->list, sizeof(struct list_head), - local_flags); - if (!slabp) + if (!freelist) return NULL; } else { - slabp = objp + colour_off; - colour_off += cachep->slab_size; + freelist = addr + colour_off; + colour_off += cachep->freelist_size; } - slabp->inuse = 0; - slabp->colouroff = colour_off; - slabp->s_mem = objp + colour_off; - slabp->nodeid = nodeid; - slabp->free = 0; - return slabp; + page->active = 0; + page->s_mem = addr + colour_off; + return freelist; } -static inline kmem_bufctl_t *slab_bufctl(struct slab *slabp) +static inline unsigned int *slab_freelist(struct page *page) { - return (kmem_bufctl_t *) (slabp + 1); + return (unsigned int *)(page->freelist); } static void cache_init_objs(struct kmem_cache *cachep, - struct slab *slabp) + struct page *page) { int i; for (i = 0; i < cachep->num; i++) { - void *objp = index_to_obj(cachep, slabp, i); + void *objp = index_to_obj(cachep, page, i); #if DEBUG /* need to poison the objs? */ if (cachep->flags & SLAB_POISON) @@ -2681,9 +2589,8 @@ static void cache_init_objs(struct kmem_cache *cachep, if (cachep->ctor) cachep->ctor(objp); #endif - slab_bufctl(slabp)[i] = i + 1; + slab_freelist(page)[i] = i; } - slab_bufctl(slabp)[i - 1] = BUFCTL_END; } static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags) @@ -2696,41 +2603,41 @@ static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags) } } -static void *slab_get_obj(struct kmem_cache *cachep, struct slab *slabp, +static void *slab_get_obj(struct kmem_cache *cachep, struct page *page, int nodeid) { - void *objp = index_to_obj(cachep, slabp, slabp->free); - kmem_bufctl_t next; + void *objp; - slabp->inuse++; - next = slab_bufctl(slabp)[slabp->free]; + objp = index_to_obj(cachep, page, slab_freelist(page)[page->active]); + page->active++; #if DEBUG - slab_bufctl(slabp)[slabp->free] = BUFCTL_FREE; - WARN_ON(slabp->nodeid != nodeid); + WARN_ON(page_to_nid(virt_to_page(objp)) != nodeid); #endif - slabp->free = next; return objp; } -static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp, +static void slab_put_obj(struct kmem_cache *cachep, struct page *page, void *objp, int nodeid) { - unsigned int objnr = obj_to_index(cachep, slabp, objp); - + unsigned int objnr = obj_to_index(cachep, page, objp); #if DEBUG + unsigned int i; + /* Verify that the slab belongs to the intended node */ - WARN_ON(slabp->nodeid != nodeid); + WARN_ON(page_to_nid(virt_to_page(objp)) != nodeid); - if (slab_bufctl(slabp)[objnr] + 1 <= SLAB_LIMIT + 1) { - printk(KERN_ERR "slab: double free detected in cache " - "'%s', objp %p\n", cachep->name, objp); - BUG(); + /* Verify double free bug */ + for (i = page->active; i < cachep->num; i++) { + if (slab_freelist(page)[i] == objnr) { + printk(KERN_ERR "slab: double free detected in cache " + "'%s', objp %p\n", cachep->name, objp); + BUG(); + } } #endif - slab_bufctl(slabp)[objnr] = slabp->free; - slabp->free = objnr; - slabp->inuse--; + page->active--; + slab_freelist(page)[page->active] = objnr; } /* @@ -2738,23 +2645,11 @@ static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp, * for the slab allocator to be able to lookup the cache and slab of a * virtual address for kfree, ksize, and slab debugging. */ -static void slab_map_pages(struct kmem_cache *cache, struct slab *slab, - void *addr) +static void slab_map_pages(struct kmem_cache *cache, struct page *page, + void *freelist) { - int nr_pages; - struct page *page; - - page = virt_to_page(addr); - - nr_pages = 1; - if (likely(!PageCompound(page))) - nr_pages <<= cache->gfporder; - - do { - page->slab_cache = cache; - page->slab_page = slab; - page++; - } while (--nr_pages); + page->slab_cache = cache; + page->freelist = freelist; } /* @@ -2762,9 +2657,9 @@ static void slab_map_pages(struct kmem_cache *cache, struct slab *slab, * kmem_cache_alloc() when there are no active objs left in a cache. */ static int cache_grow(struct kmem_cache *cachep, - gfp_t flags, int nodeid, void *objp) + gfp_t flags, int nodeid, struct page *page) { - struct slab *slabp; + void *freelist; size_t offset; gfp_t local_flags; struct kmem_cache_node *n; @@ -2805,20 +2700,20 @@ static int cache_grow(struct kmem_cache *cachep, * Get mem for the objs. Attempt to allocate a physical page from * 'nodeid'. */ - if (!objp) - objp = kmem_getpages(cachep, local_flags, nodeid); - if (!objp) + if (!page) + page = kmem_getpages(cachep, local_flags, nodeid); + if (!page) goto failed; /* Get slab management. */ - slabp = alloc_slabmgmt(cachep, objp, offset, + freelist = alloc_slabmgmt(cachep, page, offset, local_flags & ~GFP_CONSTRAINT_MASK, nodeid); - if (!slabp) + if (!freelist) goto opps1; - slab_map_pages(cachep, slabp, objp); + slab_map_pages(cachep, page, freelist); - cache_init_objs(cachep, slabp); + cache_init_objs(cachep, page); if (local_flags & __GFP_WAIT) local_irq_disable(); @@ -2826,13 +2721,13 @@ static int cache_grow(struct kmem_cache *cachep, spin_lock(&n->list_lock); /* Make slab active. */ - list_add_tail(&slabp->list, &(n->slabs_free)); + list_add_tail(&page->lru, &(n->slabs_free)); STATS_INC_GROWN(cachep); n->free_objects += cachep->num; spin_unlock(&n->list_lock); return 1; opps1: - kmem_freepages(cachep, objp); + kmem_freepages(cachep, page); failed: if (local_flags & __GFP_WAIT) local_irq_disable(); @@ -2880,9 +2775,8 @@ static inline void verify_redzone_free(struct kmem_cache *cache, void *obj) static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp, unsigned long caller) { - struct page *page; unsigned int objnr; - struct slab *slabp; + struct page *page; BUG_ON(virt_to_cache(objp) != cachep); @@ -2890,8 +2784,6 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp, kfree_debugcheck(objp); page = virt_to_head_page(objp); - slabp = page->slab_page; - if (cachep->flags & SLAB_RED_ZONE) { verify_redzone_free(cachep, objp); *dbg_redzone1(cachep, objp) = RED_INACTIVE; @@ -2900,14 +2792,11 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp, if (cachep->flags & SLAB_STORE_USER) *dbg_userword(cachep, objp) = (void *)caller; - objnr = obj_to_index(cachep, slabp, objp); + objnr = obj_to_index(cachep, page, objp); BUG_ON(objnr >= cachep->num); - BUG_ON(objp != index_to_obj(cachep, slabp, objnr)); + BUG_ON(objp != index_to_obj(cachep, page, objnr)); -#ifdef CONFIG_DEBUG_SLAB_LEAK - slab_bufctl(slabp)[objnr] = BUFCTL_FREE; -#endif if (cachep->flags & SLAB_POISON) { #ifdef CONFIG_DEBUG_PAGEALLOC if ((cachep->size % PAGE_SIZE)==0 && OFF_SLAB(cachep)) { @@ -2924,33 +2813,9 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp, return objp; } -static void check_slabp(struct kmem_cache *cachep, struct slab *slabp) -{ - kmem_bufctl_t i; - int entries = 0; - - /* Check slab's freelist to see if this obj is there. */ - for (i = slabp->free; i != BUFCTL_END; i = slab_bufctl(slabp)[i]) { - entries++; - if (entries > cachep->num || i >= cachep->num) - goto bad; - } - if (entries != cachep->num - slabp->inuse) { -bad: - printk(KERN_ERR "slab: Internal list corruption detected in " - "cache '%s'(%d), slabp %p(%d). Tainted(%s). Hexdump:\n", - cachep->name, cachep->num, slabp, slabp->inuse, - print_tainted()); - print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 16, 1, slabp, - sizeof(*slabp) + cachep->num * sizeof(kmem_bufctl_t), - 1); - BUG(); - } -} #else #define kfree_debugcheck(x) do { } while(0) #define cache_free_debugcheck(x,objp,z) (objp) -#define check_slabp(x,y) do { } while(0) #endif static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags, @@ -2989,7 +2854,7 @@ retry: while (batchcount > 0) { struct list_head *entry; - struct slab *slabp; + struct page *page; /* Get slab alloc is to come from. */ entry = n->slabs_partial.next; if (entry == &n->slabs_partial) { @@ -2999,8 +2864,7 @@ retry: goto must_grow; } - slabp = list_entry(entry, struct slab, list); - check_slabp(cachep, slabp); + page = list_entry(entry, struct page, lru); check_spinlock_acquired(cachep); /* @@ -3008,24 +2872,23 @@ retry: * there must be at least one object available for * allocation. */ - BUG_ON(slabp->inuse >= cachep->num); + BUG_ON(page->active >= cachep->num); - while (slabp->inuse < cachep->num && batchcount--) { + while (page->active < cachep->num && batchcount--) { STATS_INC_ALLOCED(cachep); STATS_INC_ACTIVE(cachep); STATS_SET_HIGH(cachep); - ac_put_obj(cachep, ac, slab_get_obj(cachep, slabp, + ac_put_obj(cachep, ac, slab_get_obj(cachep, page, node)); } - check_slabp(cachep, slabp); /* move slabp to correct slabp list: */ - list_del(&slabp->list); - if (slabp->free == BUFCTL_END) - list_add(&slabp->list, &n->slabs_full); + list_del(&page->lru); + if (page->active == cachep->num) + list_add(&page->list, &n->slabs_full); else - list_add(&slabp->list, &n->slabs_partial); + list_add(&page->list, &n->slabs_partial); } must_grow: @@ -3097,16 +2960,6 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, *dbg_redzone1(cachep, objp) = RED_ACTIVE; *dbg_redzone2(cachep, objp) = RED_ACTIVE; } -#ifdef CONFIG_DEBUG_SLAB_LEAK - { - struct slab *slabp; - unsigned objnr; - - slabp = virt_to_head_page(objp)->slab_page; - objnr = (unsigned)(objp - slabp->s_mem) / cachep->size; - slab_bufctl(slabp)[objnr] = BUFCTL_ACTIVE; - } -#endif objp += obj_offset(cachep); if (cachep->ctor && cachep->flags & SLAB_POISON) cachep->ctor(objp); @@ -3248,18 +3101,20 @@ retry: * We may trigger various forms of reclaim on the allowed * set and go into memory reserves if necessary. */ + struct page *page; + if (local_flags & __GFP_WAIT) local_irq_enable(); kmem_flagcheck(cache, flags); - obj = kmem_getpages(cache, local_flags, numa_mem_id()); + page = kmem_getpages(cache, local_flags, numa_mem_id()); if (local_flags & __GFP_WAIT) local_irq_disable(); - if (obj) { + if (page) { /* * Insert into the appropriate per node queues */ - nid = page_to_nid(virt_to_page(obj)); - if (cache_grow(cache, flags, nid, obj)) { + nid = page_to_nid(page); + if (cache_grow(cache, flags, nid, page)) { obj = ____cache_alloc_node(cache, flags | GFP_THISNODE, nid); if (!obj) @@ -3288,7 +3143,7 @@ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid) { struct list_head *entry; - struct slab *slabp; + struct page *page; struct kmem_cache_node *n; void *obj; int x; @@ -3308,26 +3163,24 @@ retry: goto must_grow; } - slabp = list_entry(entry, struct slab, list); + page = list_entry(entry, struct page, lru); check_spinlock_acquired_node(cachep, nodeid); - check_slabp(cachep, slabp); STATS_INC_NODEALLOCS(cachep); STATS_INC_ACTIVE(cachep); STATS_SET_HIGH(cachep); - BUG_ON(slabp->inuse == cachep->num); + BUG_ON(page->active == cachep->num); - obj = slab_get_obj(cachep, slabp, nodeid); - check_slabp(cachep, slabp); + obj = slab_get_obj(cachep, page, nodeid); n->free_objects--; /* move slabp to correct slabp list: */ - list_del(&slabp->list); + list_del(&page->lru); - if (slabp->free == BUFCTL_END) - list_add(&slabp->list, &n->slabs_full); + if (page->active == cachep->num) + list_add(&page->lru, &n->slabs_full); else - list_add(&slabp->list, &n->slabs_partial); + list_add(&page->lru, &n->slabs_partial); spin_unlock(&n->list_lock); goto done; @@ -3477,23 +3330,21 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects, for (i = 0; i < nr_objects; i++) { void *objp; - struct slab *slabp; + struct page *page; clear_obj_pfmemalloc(&objpp[i]); objp = objpp[i]; - slabp = virt_to_slab(objp); + page = virt_to_head_page(objp); n = cachep->node[node]; - list_del(&slabp->list); + list_del(&page->lru); check_spinlock_acquired_node(cachep, node); - check_slabp(cachep, slabp); - slab_put_obj(cachep, slabp, objp, node); + slab_put_obj(cachep, page, objp, node); STATS_DEC_ACTIVE(cachep); n->free_objects++; - check_slabp(cachep, slabp); /* fixup slab chains */ - if (slabp->inuse == 0) { + if (page->active == 0) { if (n->free_objects > n->free_limit) { n->free_objects -= cachep->num; /* No need to drop any previously held @@ -3502,16 +3353,16 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects, * a different cache, refer to comments before * alloc_slabmgmt. */ - slab_destroy(cachep, slabp); + slab_destroy(cachep, page); } else { - list_add(&slabp->list, &n->slabs_free); + list_add(&page->lru, &n->slabs_free); } } else { /* Unconditionally move a slab to the end of the * partial list on free - maximum time for the * other objects to be freed, too. */ - list_add_tail(&slabp->list, &n->slabs_partial); + list_add_tail(&page->lru, &n->slabs_partial); } } } @@ -3551,10 +3402,10 @@ free_done: p = n->slabs_free.next; while (p != &(n->slabs_free)) { - struct slab *slabp; + struct page *page; - slabp = list_entry(p, struct slab, list); - BUG_ON(slabp->inuse); + page = list_entry(p, struct page, lru); + BUG_ON(page->active); i++; p = p->next; @@ -4158,7 +4009,7 @@ out: #ifdef CONFIG_SLABINFO void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo) { - struct slab *slabp; + struct page *page; unsigned long active_objs; unsigned long num_objs; unsigned long active_slabs = 0; @@ -4178,23 +4029,23 @@ void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo) check_irq_on(); spin_lock_irq(&n->list_lock); - list_for_each_entry(slabp, &n->slabs_full, list) { - if (slabp->inuse != cachep->num && !error) + list_for_each_entry(page, &n->slabs_full, lru) { + if (page->active != cachep->num && !error) error = "slabs_full accounting error"; active_objs += cachep->num; active_slabs++; } - list_for_each_entry(slabp, &n->slabs_partial, list) { - if (slabp->inuse == cachep->num && !error) - error = "slabs_partial inuse accounting error"; - if (!slabp->inuse && !error) - error = "slabs_partial/inuse accounting error"; - active_objs += slabp->inuse; + list_for_each_entry(page, &n->slabs_partial, lru) { + if (page->active == cachep->num && !error) + error = "slabs_partial accounting error"; + if (!page->active && !error) + error = "slabs_partial accounting error"; + active_objs += page->active; active_slabs++; } - list_for_each_entry(slabp, &n->slabs_free, list) { - if (slabp->inuse && !error) - error = "slabs_free/inuse accounting error"; + list_for_each_entry(page, &n->slabs_free, lru) { + if (page->active && !error) + error = "slabs_free accounting error"; num_slabs++; } free_objects += n->free_objects; @@ -4346,15 +4197,27 @@ static inline int add_caller(unsigned long *n, unsigned long v) return 1; } -static void handle_slab(unsigned long *n, struct kmem_cache *c, struct slab *s) +static void handle_slab(unsigned long *n, struct kmem_cache *c, + struct page *page) { void *p; - int i; + int i, j; + if (n[0] == n[1]) return; - for (i = 0, p = s->s_mem; i < c->num; i++, p += c->size) { - if (slab_bufctl(s)[i] != BUFCTL_ACTIVE) + for (i = 0, p = page->s_mem; i < c->num; i++, p += c->size) { + bool active = true; + + for (j = page->active; j < c->num; j++) { + /* Skip freed item */ + if (slab_freelist(page)[j] == i) { + active = false; + break; + } + } + if (!active) continue; + if (!add_caller(n, (unsigned long)*dbg_userword(c, p))) return; } @@ -4379,7 +4242,7 @@ static void show_symbol(struct seq_file *m, unsigned long address) static int leaks_show(struct seq_file *m, void *p) { struct kmem_cache *cachep = list_entry(p, struct kmem_cache, list); - struct slab *slabp; + struct page *page; struct kmem_cache_node *n; const char *name; unsigned long *x = m->private; @@ -4403,10 +4266,10 @@ static int leaks_show(struct seq_file *m, void *p) check_irq_on(); spin_lock_irq(&n->list_lock); - list_for_each_entry(slabp, &n->slabs_full, list) - handle_slab(x, cachep, slabp); - list_for_each_entry(slabp, &n->slabs_partial, list) - handle_slab(x, cachep, slabp); + list_for_each_entry(page, &n->slabs_full, lru) + handle_slab(x, cachep, page); + list_for_each_entry(page, &n->slabs_partial, lru) + handle_slab(x, cachep, page); spin_unlock_irq(&n->list_lock); } name = cachep->name; @@ -155,7 +155,7 @@ static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s) /* * Maximum number of desirable partial slabs. * The existence of more partial slabs makes kmem_cache_shrink - * sort the partial list by the number of objects in the. + * sort the partial list by the number of objects in use. */ #define MAX_PARTIAL 10 @@ -933,6 +933,16 @@ static void trace(struct kmem_cache *s, struct page *page, void *object, * Hooks for other subsystems that check memory allocations. In a typical * production configuration these hooks all should produce no code at all. */ +static inline void kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags) +{ + kmemleak_alloc(ptr, size, 1, flags); +} + +static inline void kfree_hook(const void *x) +{ + kmemleak_free(x); +} + static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags) { flags &= gfp_allowed_mask; @@ -1217,8 +1227,8 @@ static unsigned long kmem_cache_flags(unsigned long object_size, /* * Enable debugging if selected on the kernel commandline. */ - if (slub_debug && (!slub_debug_slabs || - !strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs)))) + if (slub_debug && (!slub_debug_slabs || (name && + !strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs))))) flags |= slub_debug; return flags; @@ -1260,13 +1270,30 @@ static inline void inc_slabs_node(struct kmem_cache *s, int node, static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects) {} +static inline void kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags) +{ + kmemleak_alloc(ptr, size, 1, flags); +} + +static inline void kfree_hook(const void *x) +{ + kmemleak_free(x); +} + static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags) { return 0; } static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, - void *object) {} + void *object) +{ + kmemleak_alloc_recursive(object, s->object_size, 1, s->flags, + flags & gfp_allowed_mask); +} -static inline void slab_free_hook(struct kmem_cache *s, void *x) {} +static inline void slab_free_hook(struct kmem_cache *s, void *x) +{ + kmemleak_free_recursive(x, s->flags); +} #endif /* CONFIG_SLUB_DEBUG */ @@ -2829,8 +2856,8 @@ static struct kmem_cache *kmem_cache_node; * slab on the node for this slabcache. There are no concurrent accesses * possible. * - * Note that this function only works on the kmalloc_node_cache - * when allocating for the kmalloc_node_cache. This is used for bootstrapping + * Note that this function only works on the kmem_cache_node + * when allocating for the kmem_cache_node. This is used for bootstrapping * memory on a fresh node that has no slab structures yet. */ static void early_kmem_cache_node_alloc(int node) @@ -3272,7 +3299,7 @@ static void *kmalloc_large_node(size_t size, gfp_t flags, int node) if (page) ptr = page_address(page); - kmemleak_alloc(ptr, size, 1, flags); + kmalloc_large_node_hook(ptr, size, flags); return ptr; } @@ -3336,7 +3363,7 @@ void kfree(const void *x) page = virt_to_head_page(x); if (unlikely(!PageSlab(page))) { BUG_ON(!PageCompound(page)); - kmemleak_free(x); + kfree_hook(x); __free_memcg_kmem_pages(page, compound_order(page)); return; } @@ -82,19 +82,6 @@ static void __put_compound_page(struct page *page) static void put_compound_page(struct page *page) { - /* - * hugetlbfs pages cannot be split from under us. If this is a - * hugetlbfs page, check refcount on head page and release the page if - * the refcount becomes zero. - */ - if (PageHuge(page)) { - page = compound_head(page); - if (put_page_testzero(page)) - __put_compound_page(page); - - return; - } - if (unlikely(PageTail(page))) { /* __split_huge_page_refcount can run under us */ struct page *page_head = compound_trans_head(page); @@ -111,14 +98,31 @@ static void put_compound_page(struct page *page) * still hot on arches that do not support * this_cpu_cmpxchg_double(). */ - if (PageSlab(page_head)) { - if (PageTail(page)) { + if (PageSlab(page_head) || PageHeadHuge(page_head)) { + if (likely(PageTail(page))) { + /* + * __split_huge_page_refcount + * cannot race here. + */ + VM_BUG_ON(!PageHead(page_head)); + atomic_dec(&page->_mapcount); if (put_page_testzero(page_head)) VM_BUG_ON(1); - - atomic_dec(&page->_mapcount); - goto skip_lock_tail; + if (put_page_testzero(page_head)) + __put_compound_page(page_head); + return; } else + /* + * __split_huge_page_refcount + * run before us, "page" was a + * THP tail. The split + * page_head has been freed + * and reallocated as slab or + * hugetlbfs page of smaller + * order (only possible if + * reallocated as slab on + * x86). + */ goto skip_lock; } /* @@ -132,8 +136,27 @@ static void put_compound_page(struct page *page) /* __split_huge_page_refcount run before us */ compound_unlock_irqrestore(page_head, flags); skip_lock: - if (put_page_testzero(page_head)) - __put_single_page(page_head); + if (put_page_testzero(page_head)) { + /* + * The head page may have been + * freed and reallocated as a + * compound page of smaller + * order and then freed again. + * All we know is that it + * cannot have become: a THP + * page, a compound page of + * higher order, a tail page. + * That is because we still + * hold the refcount of the + * split THP tail and + * page_head was the THP head + * before the split. + */ + if (PageHead(page_head)) + __put_compound_page(page_head); + else + __put_single_page(page_head); + } out_put_single: if (put_page_testzero(page)) __put_single_page(page); @@ -155,7 +178,6 @@ out_put_single: VM_BUG_ON(atomic_read(&page->_count) != 0); compound_unlock_irqrestore(page_head, flags); -skip_lock_tail: if (put_page_testzero(page_head)) { if (PageHead(page_head)) __put_compound_page(page_head); @@ -198,51 +220,52 @@ bool __get_page_tail(struct page *page) * proper PT lock that already serializes against * split_huge_page(). */ + unsigned long flags; bool got = false; - struct page *page_head; - - /* - * If this is a hugetlbfs page it cannot be split under us. Simply - * increment refcount for the head page. - */ - if (PageHuge(page)) { - page_head = compound_head(page); - atomic_inc(&page_head->_count); - got = true; - } else { - unsigned long flags; + struct page *page_head = compound_trans_head(page); - page_head = compound_trans_head(page); - if (likely(page != page_head && - get_page_unless_zero(page_head))) { - - /* Ref to put_compound_page() comment. */ - if (PageSlab(page_head)) { - if (likely(PageTail(page))) { - __get_page_tail_foll(page, false); - return true; - } else { - put_page(page_head); - return false; - } - } - - /* - * page_head wasn't a dangling pointer but it - * may not be a head page anymore by the time - * we obtain the lock. That is ok as long as it - * can't be freed from under us. - */ - flags = compound_lock_irqsave(page_head); - /* here __split_huge_page_refcount won't run anymore */ + if (likely(page != page_head && get_page_unless_zero(page_head))) { + /* Ref to put_compound_page() comment. */ + if (PageSlab(page_head) || PageHeadHuge(page_head)) { if (likely(PageTail(page))) { + /* + * This is a hugetlbfs page or a slab + * page. __split_huge_page_refcount + * cannot race here. + */ + VM_BUG_ON(!PageHead(page_head)); __get_page_tail_foll(page, false); - got = true; - } - compound_unlock_irqrestore(page_head, flags); - if (unlikely(!got)) + return true; + } else { + /* + * __split_huge_page_refcount run + * before us, "page" was a THP + * tail. The split page_head has been + * freed and reallocated as slab or + * hugetlbfs page of smaller order + * (only possible if reallocated as + * slab on x86). + */ put_page(page_head); + return false; + } + } + + /* + * page_head wasn't a dangling pointer but it + * may not be a head page anymore by the time + * we obtain the lock. That is ok as long as it + * can't be freed from under us. + */ + flags = compound_lock_irqsave(page_head); + /* here __split_huge_page_refcount won't run anymore */ + if (likely(PageTail(page))) { + __get_page_tail_foll(page, false); + got = true; } + compound_unlock_irqrestore(page_head, flags); + if (unlikely(!got)) + put_page(page_head); } return got; } diff --git a/net/Kconfig b/net/Kconfig index 0715db6..d334678 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -224,7 +224,7 @@ source "net/hsr/Kconfig" config RPS boolean - depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS + depends on SMP && SYSFS default y config RFS_ACCEL @@ -235,7 +235,7 @@ config RFS_ACCEL config XPS boolean - depends on SMP && USE_GENERIC_SMP_HELPERS + depends on SMP default y config NETPRIO_CGROUP diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 7fee50d..7d424ac 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1735,7 +1735,6 @@ static int atalk_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr size_t size, int flags) { struct sock *sk = sock->sk; - struct sockaddr_at *sat = (struct sockaddr_at *)msg->msg_name; struct ddpehdr *ddp; int copied = 0; int offset = 0; @@ -1764,14 +1763,13 @@ static int atalk_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr } err = skb_copy_datagram_iovec(skb, offset, msg->msg_iov, copied); - if (!err) { - if (sat) { - sat->sat_family = AF_APPLETALK; - sat->sat_port = ddp->deh_sport; - sat->sat_addr.s_node = ddp->deh_snode; - sat->sat_addr.s_net = ddp->deh_snet; - } - msg->msg_namelen = sizeof(*sat); + if (!err && msg->msg_name) { + struct sockaddr_at *sat = msg->msg_name; + sat->sat_family = AF_APPLETALK; + sat->sat_port = ddp->deh_sport; + sat->sat_addr.s_node = ddp->deh_snode; + sat->sat_addr.s_net = ddp->deh_snet; + msg->msg_namelen = sizeof(*sat); } skb_free_datagram(sk, skb); /* Free the datagram. */ diff --git a/net/atm/common.c b/net/atm/common.c index 737bef5..7b49100 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -531,8 +531,6 @@ int vcc_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, struct sk_buff *skb; int copied, error = -EINVAL; - msg->msg_namelen = 0; - if (sock->state != SS_CONNECTED) return -ENOTCONN; diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index a00123e..7bb1605 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1636,11 +1636,11 @@ static int ax25_recvmsg(struct kiocb *iocb, struct socket *sock, skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); - if (msg->msg_namelen != 0) { - struct sockaddr_ax25 *sax = (struct sockaddr_ax25 *)msg->msg_name; + if (msg->msg_name) { ax25_digi digi; ax25_address src; const unsigned char *mac = skb_mac_header(skb); + struct sockaddr_ax25 *sax = msg->msg_name; memset(sax, 0, sizeof(struct full_sockaddr_ax25)); ax25_addr_parse(mac + 1, skb->data - mac - 1, &src, NULL, diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index f6a1671..56ca494 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -224,10 +224,9 @@ int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock, skb = skb_recv_datagram(sk, flags, noblock, &err); if (!skb) { - if (sk->sk_shutdown & RCV_SHUTDOWN) { - msg->msg_namelen = 0; + if (sk->sk_shutdown & RCV_SHUTDOWN) return 0; - } + return err; } @@ -245,8 +244,6 @@ int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock, if (bt_sk(sk)->skb_msg_name) bt_sk(sk)->skb_msg_name(skb, msg->msg_name, &msg->msg_namelen); - else - msg->msg_namelen = 0; } skb_free_datagram(sk, skb); @@ -295,8 +292,6 @@ int bt_sock_stream_recvmsg(struct kiocb *iocb, struct socket *sock, if (flags & MSG_OOB) return -EOPNOTSUPP; - msg->msg_namelen = 0; - BT_DBG("sk %p size %zu", sk, size); lock_sock(sk); diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 71f0be1..6a6c8bb 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -856,8 +856,6 @@ static int hci_sock_recvmsg(struct kiocb *iocb, struct socket *sock, if (!skb) return err; - msg->msg_namelen = 0; - copied = skb->len; if (len < copied) { msg->msg_flags |= MSG_TRUNC; diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 0cef677..4af3821 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -2439,6 +2439,9 @@ int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len, int err; struct sk_buff_head seg_queue; + if (!chan->conn) + return -ENOTCONN; + /* Connectionless channel */ if (chan->chan_type == L2CAP_CHAN_CONN_LESS) { skb = l2cap_create_connless_pdu(chan, msg, len, priority); diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 94d06cb..facd8a7 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -694,6 +694,7 @@ static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src, addr.l2_family = AF_BLUETOOTH; addr.l2_psm = 0; addr.l2_cid = 0; + addr.l2_bdaddr_type = BDADDR_BREDR; *err = kernel_bind(sock, (struct sockaddr *) &addr, sizeof(addr)); if (*err < 0) goto failed; @@ -719,6 +720,7 @@ static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src, addr.l2_family = AF_BLUETOOTH; addr.l2_psm = __constant_cpu_to_le16(RFCOMM_PSM); addr.l2_cid = 0; + addr.l2_bdaddr_type = BDADDR_BREDR; *err = kernel_connect(sock, (struct sockaddr *) &addr, sizeof(addr), O_NONBLOCK); if (*err == 0 || *err == -EINPROGRESS) return s; @@ -1983,6 +1985,7 @@ static int rfcomm_add_listener(bdaddr_t *ba) addr.l2_family = AF_BLUETOOTH; addr.l2_psm = __constant_cpu_to_le16(RFCOMM_PSM); addr.l2_cid = 0; + addr.l2_bdaddr_type = BDADDR_BREDR; err = kernel_bind(sock, (struct sockaddr *) &addr, sizeof(addr)); if (err < 0) { BT_ERR("Bind failed %d", err); diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index c4d3d42..3c2d3e4 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -615,7 +615,6 @@ static int rfcomm_sock_recvmsg(struct kiocb *iocb, struct socket *sock, if (test_and_clear_bit(RFCOMM_DEFER_SETUP, &d->flags)) { rfcomm_dlc_accept(d); - msg->msg_namelen = 0; return 0; } @@ -739,8 +738,9 @@ static int rfcomm_sock_setsockopt(struct socket *sock, int level, int optname, c static int rfcomm_sock_getsockopt_old(struct socket *sock, int optname, char __user *optval, int __user *optlen) { struct sock *sk = sock->sk; + struct sock *l2cap_sk; + struct l2cap_conn *conn; struct rfcomm_conninfo cinfo; - struct l2cap_conn *conn = l2cap_pi(sk)->chan->conn; int len, err = 0; u32 opt; @@ -783,6 +783,9 @@ static int rfcomm_sock_getsockopt_old(struct socket *sock, int optname, char __u break; } + l2cap_sk = rfcomm_pi(sk)->dlc->session->sock->sk; + conn = l2cap_pi(l2cap_sk)->chan->conn; + memset(&cinfo, 0, sizeof(cinfo)); cinfo.hci_handle = conn->hcon->handle; memcpy(cinfo.dev_class, conn->hcon->dev_class, 3); diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 12a0e51..24fa396 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -711,7 +711,6 @@ static int sco_sock_recvmsg(struct kiocb *iocb, struct socket *sock, test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags)) { sco_conn_defer_accept(pi->conn->hcon, pi->setting); sk->sk_state = BT_CONFIG; - msg->msg_namelen = 0; release_sock(sk); return 0; diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 85a2796c..4b07acb 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -742,6 +742,9 @@ static u8 smp_cmd_security_req(struct l2cap_conn *conn, struct sk_buff *skb) BT_DBG("conn %p", conn); + if (!(conn->hcon->link_mode & HCI_LM_MASTER)) + return SMP_CMD_NOTSUPP; + hcon->pending_sec_level = authreq_to_seclevel(rp->auth_req); if (smp_ltk_encrypt(conn, hcon->pending_sec_level)) diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 6e6194f..4bf02ad 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -172,6 +172,8 @@ void br_dev_delete(struct net_device *dev, struct list_head *head) del_nbp(p); } + br_fdb_delete_by_port(br, NULL, 1); + br_vlan_flush(br); del_timer_sync(&br->gc_timer); diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c index 99c8566..17fd5f2 100644 --- a/net/bridge/netfilter/ebt_ip6.c +++ b/net/bridge/netfilter/ebt_ip6.c @@ -48,10 +48,12 @@ ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param *par) if (info->bitmask & EBT_IP6_TCLASS && FWINV(info->tclass != ipv6_get_dsfield(ih6), EBT_IP6_TCLASS)) return false; - if (FWINV(ipv6_masked_addr_cmp(&ih6->saddr, &info->smsk, - &info->saddr), EBT_IP6_SOURCE) || + if ((info->bitmask & EBT_IP6_SOURCE && + FWINV(ipv6_masked_addr_cmp(&ih6->saddr, &info->smsk, + &info->saddr), EBT_IP6_SOURCE)) || + (info->bitmask & EBT_IP6_DEST && FWINV(ipv6_masked_addr_cmp(&ih6->daddr, &info->dmsk, - &info->daddr), EBT_IP6_DEST)) + &info->daddr), EBT_IP6_DEST))) return false; if (info->bitmask & EBT_IP6_PROTO) { uint8_t nexthdr = ih6->nexthdr; diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index 05a41c7..d6be3ed 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -286,8 +286,6 @@ static int caif_seqpkt_recvmsg(struct kiocb *iocb, struct socket *sock, if (m->msg_flags&MSG_OOB) goto read_error; - m->msg_namelen = 0; - skb = skb_recv_datagram(sk, flags, 0 , &ret); if (!skb) goto read_error; @@ -361,8 +359,6 @@ static int caif_stream_recvmsg(struct kiocb *iocb, struct socket *sock, if (flags&MSG_OOB) goto out; - msg->msg_namelen = 0; - /* * Lock the socket to prevent queue disordering * while sleeps in memcpy_tomsg diff --git a/net/compat.c b/net/compat.c index 8903258..618c6a8 100644 --- a/net/compat.c +++ b/net/compat.c @@ -93,7 +93,8 @@ int verify_compat_iovec(struct msghdr *kern_msg, struct iovec *kern_iov, if (err < 0) return err; } - kern_msg->msg_name = kern_address; + if (kern_msg->msg_name) + kern_msg->msg_name = kern_address; } else kern_msg->msg_name = NULL; diff --git a/net/core/dev.c b/net/core/dev.c index 7e00a73..ba3b7ea 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4996,7 +4996,7 @@ static void dev_change_rx_flags(struct net_device *dev, int flags) { const struct net_device_ops *ops = dev->netdev_ops; - if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags) + if (ops->ndo_change_rx_flags) ops->ndo_change_rx_flags(dev, flags); } diff --git a/net/core/iovec.c b/net/core/iovec.c index 4cdb7c4..b618694 100644 --- a/net/core/iovec.c +++ b/net/core/iovec.c @@ -48,7 +48,8 @@ int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr_storage *a if (err < 0) return err; } - m->msg_name = address; + if (m->msg_name) + m->msg_name = address; } else { m->msg_name = NULL; } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 8cec1e6..2718fed 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2796,6 +2796,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) struct sk_buff *segs = NULL; struct sk_buff *tail = NULL; struct sk_buff *fskb = skb_shinfo(skb)->frag_list; + skb_frag_t *skb_frag = skb_shinfo(skb)->frags; unsigned int mss = skb_shinfo(skb)->gso_size; unsigned int doffset = skb->data - skb_mac_header(skb); unsigned int offset = doffset; @@ -2835,16 +2836,38 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) if (hsize > len || !sg) hsize = len; - if (!hsize && i >= nfrags) { - BUG_ON(fskb->len != len); + if (!hsize && i >= nfrags && skb_headlen(fskb) && + (skb_headlen(fskb) == len || sg)) { + BUG_ON(skb_headlen(fskb) > len); + + i = 0; + nfrags = skb_shinfo(fskb)->nr_frags; + skb_frag = skb_shinfo(fskb)->frags; + pos += skb_headlen(fskb); + + while (pos < offset + len) { + BUG_ON(i >= nfrags); + + size = skb_frag_size(skb_frag); + if (pos + size > offset + len) + break; + + i++; + pos += size; + skb_frag++; + } - pos += len; nskb = skb_clone(fskb, GFP_ATOMIC); fskb = fskb->next; if (unlikely(!nskb)) goto err; + if (unlikely(pskb_trim(nskb, len))) { + kfree_skb(nskb); + goto err; + } + hsize = skb_end_offset(nskb); if (skb_cow_head(nskb, doffset + headroom)) { kfree_skb(nskb); @@ -2881,7 +2904,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) nskb->data - tnl_hlen, doffset + tnl_hlen); - if (fskb != skb_shinfo(skb)->frag_list) + if (nskb->len == len + doffset) goto perform_csum_check; if (!sg) { @@ -2899,8 +2922,28 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) skb_shinfo(nskb)->tx_flags = skb_shinfo(skb)->tx_flags & SKBTX_SHARED_FRAG; - while (pos < offset + len && i < nfrags) { - *frag = skb_shinfo(skb)->frags[i]; + while (pos < offset + len) { + if (i >= nfrags) { + BUG_ON(skb_headlen(fskb)); + + i = 0; + nfrags = skb_shinfo(fskb)->nr_frags; + skb_frag = skb_shinfo(fskb)->frags; + + BUG_ON(!nfrags); + + fskb = fskb->next; + } + + if (unlikely(skb_shinfo(nskb)->nr_frags >= + MAX_SKB_FRAGS)) { + net_warn_ratelimited( + "skb_segment: too many frags: %u %u\n", + pos, mss); + goto err; + } + + *frag = *skb_frag; __skb_frag_ref(frag); size = skb_frag_size(frag); @@ -2913,6 +2956,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) if (pos + size <= offset + len) { i++; + skb_frag++; pos += size; } else { skb_frag_size_sub(frag, pos + size - (offset + len)); @@ -2922,25 +2966,6 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) frag++; } - if (pos < offset + len) { - struct sk_buff *fskb2 = fskb; - - BUG_ON(pos + fskb->len != offset + len); - - pos += fskb->len; - fskb = fskb->next; - - if (fskb2->next) { - fskb2 = skb_clone(fskb2, GFP_ATOMIC); - if (!fskb2) - goto err; - } else - skb_get(fskb2); - - SKB_FRAG_ASSERT(nskb); - skb_shinfo(nskb)->frag_list = fskb2; - } - skip_fraglist: nskb->data_len = len - hsize; nskb->len += nskb->data_len; diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c index 01cffea..f13bd91 100644 --- a/net/ipv4/netfilter/ipt_SYNPROXY.c +++ b/net/ipv4/netfilter/ipt_SYNPROXY.c @@ -244,6 +244,7 @@ synproxy_recv_client_ack(const struct synproxy_net *snet, this_cpu_inc(snet->stats->cookie_valid); opts->mss = mss; + opts->options |= XT_SYNPROXY_OPT_MSS; if (opts->options & XT_SYNPROXY_OPT_TIMESTAMP) synproxy_check_timestamp_cookie(opts); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index f428935..f8da282 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1776,8 +1776,12 @@ local_input: rth->dst.error= -err; rth->rt_flags &= ~RTCF_LOCAL; } - if (do_cache) - rt_cache_route(&FIB_RES_NH(res), rth); + if (do_cache) { + if (unlikely(!rt_cache_route(&FIB_RES_NH(res), rth))) { + rth->dst.flags |= DST_NOCACHE; + rt_add_uncached_list(rth); + } + } skb_dst_set(skb, &rth->dst); err = 0; goto out; diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c index bf9f612..f78f41a 100644 --- a/net/ipv6/netfilter/ip6t_SYNPROXY.c +++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c @@ -259,6 +259,7 @@ synproxy_recv_client_ack(const struct synproxy_net *snet, this_cpu_inc(snet->stats->cookie_valid); opts->mss = mss; + opts->options |= XT_SYNPROXY_OPT_MSS; if (opts->options & XT_SYNPROXY_OPT_TIMESTAMP) synproxy_check_timestamp_cookie(opts); diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index 7a1e0fc..e096025 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -1823,8 +1823,6 @@ static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock, if (skb->tstamp.tv64) sk->sk_stamp = skb->tstamp; - msg->msg_namelen = sizeof(*sipx); - if (sipx) { sipx->sipx_family = AF_IPX; sipx->sipx_port = ipx->ipx_source.sock; @@ -1832,6 +1830,7 @@ static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock, sipx->sipx_network = IPX_SKB_CB(skb)->ipx_source_net; sipx->sipx_type = ipx->ipx_type; sipx->sipx_zero = 0; + msg->msg_namelen = sizeof(*sipx); } rc = copied; diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index 0f67690..de7db23 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -1385,8 +1385,6 @@ static int irda_recvmsg_dgram(struct kiocb *iocb, struct socket *sock, IRDA_DEBUG(4, "%s()\n", __func__); - msg->msg_namelen = 0; - skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, flags & MSG_DONTWAIT, &err); if (!skb) @@ -1451,8 +1449,6 @@ static int irda_recvmsg_stream(struct kiocb *iocb, struct socket *sock, target = sock_rcvlowat(sk, flags & MSG_WAITALL, size); timeo = sock_rcvtimeo(sk, noblock); - msg->msg_namelen = 0; - do { int chunk; struct sk_buff *skb = skb_dequeue(&sk->sk_receive_queue); diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 168aff5..c4b7218 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -1324,8 +1324,6 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, int err = 0; u32 offset; - msg->msg_namelen = 0; - if ((sk->sk_state == IUCV_DISCONN) && skb_queue_empty(&iucv->backlog_skb_q) && skb_queue_empty(&sk->sk_receive_queue) && diff --git a/net/key/af_key.c b/net/key/af_key.c index 911ef03..545f047 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -3616,7 +3616,6 @@ static int pfkey_recvmsg(struct kiocb *kiocb, if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT)) goto out; - msg->msg_namelen = 0; skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err); if (skb == NULL) goto out; diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index ffda81e..be5fadf 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -197,8 +197,6 @@ static int pppol2tp_recvmsg(struct kiocb *iocb, struct socket *sock, if (sk->sk_state & PPPOX_BOUND) goto end; - msg->msg_namelen = 0; - err = 0; skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, flags & MSG_DONTWAIT, &err); diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 6cba486..7b01b9f 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -720,8 +720,6 @@ static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock, int target; /* Read at least this many bytes */ long timeo; - msg->msg_namelen = 0; - lock_sock(sk); copied = -ENOTCONN; if (unlikely(sk->sk_type == SOCK_STREAM && sk->sk_state == TCP_LISTEN)) diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 48acec1..c3398cd 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -909,7 +909,7 @@ config NETFILTER_XT_MATCH_CONNLABEL connection simultaneously. config NETFILTER_XT_MATCH_CONNLIMIT - tristate '"connlimit" match support"' + tristate '"connlimit" match support' depends on NF_CONNTRACK depends on NETFILTER_ADVANCED ---help--- diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index e22d950..43549eb 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -764,9 +764,10 @@ void nf_conntrack_free(struct nf_conn *ct) struct net *net = nf_ct_net(ct); nf_ct_ext_destroy(ct); - atomic_dec(&net->ct.count); nf_ct_ext_free(ct); kmem_cache_free(net->ct.nf_conntrack_cachep, ct); + smp_mb__before_atomic_dec(); + atomic_dec(&net->ct.count); } EXPORT_SYMBOL_GPL(nf_conntrack_free); diff --git a/net/netfilter/nf_conntrack_seqadj.c b/net/netfilter/nf_conntrack_seqadj.c index 5f9bfd0..17c1bcb 100644 --- a/net/netfilter/nf_conntrack_seqadj.c +++ b/net/netfilter/nf_conntrack_seqadj.c @@ -41,8 +41,8 @@ int nf_ct_seqadj_set(struct nf_conn *ct, enum ip_conntrack_info ctinfo, spin_lock_bh(&ct->lock); this_way = &seqadj->seq[dir]; if (this_way->offset_before == this_way->offset_after || - before(this_way->correction_pos, seq)) { - this_way->correction_pos = seq; + before(this_way->correction_pos, ntohl(seq))) { + this_way->correction_pos = ntohl(seq); this_way->offset_before = this_way->offset_after; this_way->offset_after += off; } diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index cdf4567..9858e3e 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c @@ -151,9 +151,10 @@ void synproxy_init_timestamp_cookie(const struct xt_synproxy_info *info, opts->tsecr = opts->tsval; opts->tsval = tcp_time_stamp & ~0x3f; - if (opts->options & XT_SYNPROXY_OPT_WSCALE) - opts->tsval |= info->wscale; - else + if (opts->options & XT_SYNPROXY_OPT_WSCALE) { + opts->tsval |= opts->wscale; + opts->wscale = info->wscale; + } else opts->tsval |= 0xf; if (opts->options & XT_SYNPROXY_OPT_SACK_PERM) diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index a82667c..da0c1f4 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -128,7 +128,7 @@ static const struct nla_policy nft_rule_compat_policy[NFTA_RULE_COMPAT_MAX + 1] [NFTA_RULE_COMPAT_FLAGS] = { .type = NLA_U32 }, }; -static u8 nft_parse_compat(const struct nlattr *attr, bool *inv) +static int nft_parse_compat(const struct nlattr *attr, u8 *proto, bool *inv) { struct nlattr *tb[NFTA_RULE_COMPAT_MAX+1]; u32 flags; @@ -148,7 +148,8 @@ static u8 nft_parse_compat(const struct nlattr *attr, bool *inv) if (flags & NFT_RULE_COMPAT_F_INV) *inv = true; - return ntohl(nla_get_be32(tb[NFTA_RULE_COMPAT_PROTO])); + *proto = ntohl(nla_get_be32(tb[NFTA_RULE_COMPAT_PROTO])); + return 0; } static int @@ -166,8 +167,11 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr, target_compat_from_user(target, nla_data(tb[NFTA_TARGET_INFO]), info); - if (ctx->nla[NFTA_RULE_COMPAT]) - proto = nft_parse_compat(ctx->nla[NFTA_RULE_COMPAT], &inv); + if (ctx->nla[NFTA_RULE_COMPAT]) { + ret = nft_parse_compat(ctx->nla[NFTA_RULE_COMPAT], &proto, &inv); + if (ret < 0) + goto err; + } nft_target_set_tgchk_param(&par, ctx, target, info, &e, proto, inv); @@ -356,8 +360,11 @@ nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr, match_compat_from_user(match, nla_data(tb[NFTA_MATCH_INFO]), info); - if (ctx->nla[NFTA_RULE_COMPAT]) - proto = nft_parse_compat(ctx->nla[NFTA_RULE_COMPAT], &inv); + if (ctx->nla[NFTA_RULE_COMPAT]) { + ret = nft_parse_compat(ctx->nla[NFTA_RULE_COMPAT], &proto, &inv); + if (ret < 0) + goto err; + } nft_match_set_mtchk_param(&par, ctx, match, info, &e, proto, inv); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index f0176e1..bca50b9 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2335,8 +2335,6 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, } #endif - msg->msg_namelen = 0; - copied = data_skb->len; if (len < copied) { msg->msg_flags |= MSG_TRUNC; diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 7dbc4f7..4518a57 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -1045,7 +1045,7 @@ static int genlmsg_mcast(struct sk_buff *skb, u32 portid, unsigned long group, int genlmsg_multicast_allns(struct genl_family *family, struct sk_buff *skb, u32 portid, unsigned int group, gfp_t flags) { - if (group >= family->n_mcgrps) + if (WARN_ON_ONCE(group >= family->n_mcgrps)) return -EINVAL; group = family->mcgrp_offset + group; return genlmsg_mcast(skb, portid, group, flags); @@ -1062,7 +1062,7 @@ void genl_notify(struct genl_family *family, if (nlh) report = nlmsg_report(nlh); - if (group >= family->n_mcgrps) + if (WARN_ON_ONCE(group >= family->n_mcgrps)) return; group = family->mcgrp_offset + group; nlmsg_notify(sk, skb, portid, group, report, flags); diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 698814b..53c19a3 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -1179,10 +1179,9 @@ static int nr_recvmsg(struct kiocb *iocb, struct socket *sock, sax->sax25_family = AF_NETROM; skb_copy_from_linear_data_offset(skb, 7, sax->sax25_call.ax25_call, AX25_ADDR_LEN); + msg->msg_namelen = sizeof(*sax); } - msg->msg_namelen = sizeof(*sax); - skb_free_datagram(sk, skb); release_sock(sk); diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index d308402b..824c605 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -807,8 +807,6 @@ static int llcp_sock_recvmsg(struct kiocb *iocb, struct socket *sock, pr_debug("%p %zu\n", sk, len); - msg->msg_namelen = 0; - lock_sock(sk); if (sk->sk_state == LLCP_CLOSED && diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c index cd958b3..66bcd2e 100644 --- a/net/nfc/rawsock.c +++ b/net/nfc/rawsock.c @@ -244,8 +244,6 @@ static int rawsock_recvmsg(struct kiocb *iocb, struct socket *sock, if (!skb) return rc; - msg->msg_namelen = 0; - copied = skb->len; if (len < copied) { msg->msg_flags |= MSG_TRUNC; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 2e8286b..ac27c86 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -244,11 +244,15 @@ static void __fanout_link(struct sock *sk, struct packet_sock *po); static void register_prot_hook(struct sock *sk) { struct packet_sock *po = pkt_sk(sk); + if (!po->running) { - if (po->fanout) + if (po->fanout) { __fanout_link(sk, po); - else + } else { dev_add_pack(&po->prot_hook); + rcu_assign_pointer(po->cached_dev, po->prot_hook.dev); + } + sock_hold(sk); po->running = 1; } @@ -266,10 +270,13 @@ static void __unregister_prot_hook(struct sock *sk, bool sync) struct packet_sock *po = pkt_sk(sk); po->running = 0; - if (po->fanout) + if (po->fanout) { __fanout_unlink(sk, po); - else + } else { __dev_remove_pack(&po->prot_hook); + RCU_INIT_POINTER(po->cached_dev, NULL); + } + __sock_put(sk); if (sync) { @@ -2052,12 +2059,24 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, return tp_len; } +static struct net_device *packet_cached_dev_get(struct packet_sock *po) +{ + struct net_device *dev; + + rcu_read_lock(); + dev = rcu_dereference(po->cached_dev); + if (dev) + dev_hold(dev); + rcu_read_unlock(); + + return dev; +} + static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) { struct sk_buff *skb; struct net_device *dev; __be16 proto; - bool need_rls_dev = false; int err, reserve = 0; void *ph; struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name; @@ -2070,7 +2089,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) mutex_lock(&po->pg_vec_lock); if (saddr == NULL) { - dev = po->prot_hook.dev; + dev = packet_cached_dev_get(po); proto = po->num; addr = NULL; } else { @@ -2084,19 +2103,17 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) proto = saddr->sll_protocol; addr = saddr->sll_addr; dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex); - need_rls_dev = true; } err = -ENXIO; if (unlikely(dev == NULL)) goto out; - - reserve = dev->hard_header_len; - err = -ENETDOWN; if (unlikely(!(dev->flags & IFF_UP))) goto out_put; + reserve = dev->hard_header_len; + size_max = po->tx_ring.frame_size - (po->tp_hdrlen - sizeof(struct sockaddr_ll)); @@ -2173,8 +2190,7 @@ out_status: __packet_set_status(po, ph, status); kfree_skb(skb); out_put: - if (need_rls_dev) - dev_put(dev); + dev_put(dev); out: mutex_unlock(&po->pg_vec_lock); return err; @@ -2212,7 +2228,6 @@ static int packet_snd(struct socket *sock, struct sk_buff *skb; struct net_device *dev; __be16 proto; - bool need_rls_dev = false; unsigned char *addr; int err, reserve = 0; struct virtio_net_hdr vnet_hdr = { 0 }; @@ -2228,7 +2243,7 @@ static int packet_snd(struct socket *sock, */ if (saddr == NULL) { - dev = po->prot_hook.dev; + dev = packet_cached_dev_get(po); proto = po->num; addr = NULL; } else { @@ -2240,19 +2255,17 @@ static int packet_snd(struct socket *sock, proto = saddr->sll_protocol; addr = saddr->sll_addr; dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex); - need_rls_dev = true; } err = -ENXIO; - if (dev == NULL) + if (unlikely(dev == NULL)) goto out_unlock; - if (sock->type == SOCK_RAW) - reserve = dev->hard_header_len; - err = -ENETDOWN; - if (!(dev->flags & IFF_UP)) + if (unlikely(!(dev->flags & IFF_UP))) goto out_unlock; + if (sock->type == SOCK_RAW) + reserve = dev->hard_header_len; if (po->has_vnet_hdr) { vnet_hdr_len = sizeof(vnet_hdr); @@ -2386,15 +2399,14 @@ static int packet_snd(struct socket *sock, if (err > 0 && (err = net_xmit_errno(err)) != 0) goto out_unlock; - if (need_rls_dev) - dev_put(dev); + dev_put(dev); return len; out_free: kfree_skb(skb); out_unlock: - if (dev && need_rls_dev) + if (dev) dev_put(dev); out: return err; @@ -2614,6 +2626,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, po = pkt_sk(sk); sk->sk_family = PF_PACKET; po->num = proto; + RCU_INIT_POINTER(po->cached_dev, NULL); sk->sk_destruct = packet_sock_destruct; sk_refcnt_debug_inc(sk); @@ -2660,7 +2673,6 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, struct sock *sk = sock->sk; struct sk_buff *skb; int copied, err; - struct sockaddr_ll *sll; int vnet_hdr_len = 0; err = -EINVAL; @@ -2744,22 +2756,10 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, goto out_free; } - /* - * If the address length field is there to be filled in, we fill - * it in now. + /* You lose any data beyond the buffer you gave. If it worries + * a user program they can ask the device for its MTU + * anyway. */ - - sll = &PACKET_SKB_CB(skb)->sa.ll; - if (sock->type == SOCK_PACKET) - msg->msg_namelen = sizeof(struct sockaddr_pkt); - else - msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr); - - /* - * You lose any data beyond the buffer you gave. If it worries a - * user program they can ask the device for its MTU anyway. - */ - copied = skb->len; if (copied > len) { copied = len; @@ -2772,9 +2772,20 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, sock_recv_ts_and_drops(msg, sk, skb); - if (msg->msg_name) + if (msg->msg_name) { + /* If the address length field is there to be filled + * in, we fill it in now. + */ + if (sock->type == SOCK_PACKET) { + msg->msg_namelen = sizeof(struct sockaddr_pkt); + } else { + struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll; + msg->msg_namelen = sll->sll_halen + + offsetof(struct sockaddr_ll, sll_addr); + } memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, msg->msg_namelen); + } if (pkt_sk(sk)->auxdata) { struct tpacket_auxdata aux; diff --git a/net/packet/internal.h b/net/packet/internal.h index c4e4b45..1035fa2 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -113,6 +113,7 @@ struct packet_sock { unsigned int tp_loss:1; unsigned int tp_tx_has_off:1; unsigned int tp_tstamp; + struct net_device __rcu *cached_dev; struct packet_type prot_hook ____cacheline_aligned_in_smp; }; diff --git a/net/rds/recv.c b/net/rds/recv.c index 9f0f17c..de339b2 100644 --- a/net/rds/recv.c +++ b/net/rds/recv.c @@ -410,8 +410,6 @@ int rds_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, rdsdebug("size %zu flags 0x%x timeo %ld\n", size, msg_flags, timeo); - msg->msg_namelen = 0; - if (msg_flags & MSG_OOB) goto out; diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index e98fcfb..33af772 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -1216,7 +1216,6 @@ static int rose_recvmsg(struct kiocb *iocb, struct socket *sock, { struct sock *sk = sock->sk; struct rose_sock *rose = rose_sk(sk); - struct sockaddr_rose *srose = (struct sockaddr_rose *)msg->msg_name; size_t copied; unsigned char *asmptr; struct sk_buff *skb; @@ -1252,8 +1251,11 @@ static int rose_recvmsg(struct kiocb *iocb, struct socket *sock, skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); - if (srose != NULL) { - memset(srose, 0, msg->msg_namelen); + if (msg->msg_name) { + struct sockaddr_rose *srose; + + memset(msg->msg_name, 0, sizeof(struct full_sockaddr_rose)); + srose = msg->msg_name; srose->srose_family = AF_ROSE; srose->srose_addr = rose->dest_addr; srose->srose_call = rose->dest_call; diff --git a/net/rxrpc/ar-recvmsg.c b/net/rxrpc/ar-recvmsg.c index 4b48687..898492a 100644 --- a/net/rxrpc/ar-recvmsg.c +++ b/net/rxrpc/ar-recvmsg.c @@ -143,10 +143,13 @@ int rxrpc_recvmsg(struct kiocb *iocb, struct socket *sock, /* copy the peer address and timestamp */ if (!continue_call) { - if (msg->msg_name && msg->msg_namelen > 0) + if (msg->msg_name) { + size_t len = + sizeof(call->conn->trans->peer->srx); memcpy(msg->msg_name, - &call->conn->trans->peer->srx, - sizeof(call->conn->trans->peer->srx)); + &call->conn->trans->peer->srx, len); + msg->msg_namelen = len; + } sock_recv_ts_and_drops(msg, &rx->sk, skb); } diff --git a/net/socket.c b/net/socket.c index c226ace..0b18693 100644 --- a/net/socket.c +++ b/net/socket.c @@ -221,12 +221,13 @@ static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen, int err; int len; + BUG_ON(klen > sizeof(struct sockaddr_storage)); err = get_user(len, ulen); if (err) return err; if (len > klen) len = klen; - if (len < 0 || len > sizeof(struct sockaddr_storage)) + if (len < 0) return -EINVAL; if (len) { if (audit_sockaddr(klen, kaddr)) @@ -1840,8 +1841,10 @@ SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size, msg.msg_iov = &iov; iov.iov_len = size; iov.iov_base = ubuf; - msg.msg_name = (struct sockaddr *)&address; - msg.msg_namelen = sizeof(address); + /* Save some cycles and don't copy the address if not needed */ + msg.msg_name = addr ? (struct sockaddr *)&address : NULL; + /* We assume all kernel code knows the size of sockaddr_storage */ + msg.msg_namelen = 0; if (sock->file->f_flags & O_NONBLOCK) flags |= MSG_DONTWAIT; err = sock_recvmsg(sock, &msg, size, flags); @@ -2221,16 +2224,14 @@ static int ___sys_recvmsg(struct socket *sock, struct msghdr __user *msg, goto out; } - /* - * Save the user-mode address (verify_iovec will change the - * kernel msghdr to use the kernel address space) + /* Save the user-mode address (verify_iovec will change the + * kernel msghdr to use the kernel address space) */ - uaddr = (__force void __user *)msg_sys->msg_name; uaddr_len = COMPAT_NAMELEN(msg); - if (MSG_CMSG_COMPAT & flags) { + if (MSG_CMSG_COMPAT & flags) err = verify_compat_iovec(msg_sys, iov, &addr, VERIFY_WRITE); - } else + else err = verify_iovec(msg_sys, iov, &addr, VERIFY_WRITE); if (err < 0) goto out_freeiov; @@ -2239,6 +2240,9 @@ static int ___sys_recvmsg(struct socket *sock, struct msghdr __user *msg, cmsg_ptr = (unsigned long)msg_sys->msg_control; msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT); + /* We assume all kernel code knows the size of sockaddr_storage */ + msg_sys->msg_namelen = 0; + if (sock->file->f_flags & O_NONBLOCK) flags |= MSG_DONTWAIT; err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys, diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 3906527..3b61851 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -980,9 +980,6 @@ static int recv_msg(struct kiocb *iocb, struct socket *sock, goto exit; } - /* will be updated in set_orig_addr() if needed */ - m->msg_namelen = 0; - timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); restart: @@ -1091,9 +1088,6 @@ static int recv_stream(struct kiocb *iocb, struct socket *sock, goto exit; } - /* will be updated in set_orig_addr() if needed */ - m->msg_namelen = 0; - target = sock_rcvlowat(sk, flags & MSG_WAITALL, buf_len); timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index c1f403b..01625ccc 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1754,7 +1754,6 @@ static void unix_copy_addr(struct msghdr *msg, struct sock *sk) { struct unix_sock *u = unix_sk(sk); - msg->msg_namelen = 0; if (u->addr) { msg->msg_namelen = u->addr->len; memcpy(msg->msg_name, u->addr->name, u->addr->len); @@ -1778,8 +1777,6 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock, if (flags&MSG_OOB) goto out; - msg->msg_namelen = 0; - err = mutex_lock_interruptible(&u->readlock); if (err) { err = sock_intr_errno(sock_rcvtimeo(sk, noblock)); @@ -1924,8 +1921,6 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, target = sock_rcvlowat(sk, flags&MSG_WAITALL, size); timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT); - msg->msg_namelen = 0; - /* Lock the socket to prevent queue disordering * while sleeps in memcpy_tomsg */ diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 545c08b..5adfd94 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1662,8 +1662,6 @@ vsock_stream_recvmsg(struct kiocb *kiocb, vsk = vsock_sk(sk); err = 0; - msg->msg_namelen = 0; - lock_sock(sk); if (sk->sk_state != SS_CONNECTED) { diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index 9d69866..687360d 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -1746,8 +1746,6 @@ static int vmci_transport_dgram_dequeue(struct kiocb *kiocb, if (flags & MSG_OOB || flags & MSG_ERRQUEUE) return -EOPNOTSUPP; - msg->msg_namelen = 0; - /* Retrieve the head sk_buff from the socket's receive queue. */ err = 0; skb = skb_recv_datagram(&vsk->sk, flags, noblock, &err); diff --git a/net/wimax/stack.c b/net/wimax/stack.c index ef2191b..ec8b577 100644 --- a/net/wimax/stack.c +++ b/net/wimax/stack.c @@ -610,7 +610,6 @@ int __init wimax_subsys_init(void) d_fnend(4, NULL, "() = 0\n"); return 0; - genl_unregister_family(&wimax_gnl_family); error_register_family: d_fnend(4, NULL, "() = %d\n", result); return result; diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 45a3ab5..7622789 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -1340,10 +1340,9 @@ static int x25_recvmsg(struct kiocb *iocb, struct socket *sock, if (sx25) { sx25->sx25_family = AF_X25; sx25->sx25_addr = x25->dest_addr; + msg->msg_namelen = sizeof(*sx25); } - msg->msg_namelen = sizeof(struct sockaddr_x25); - x25_check_rbuf(sk); rc = copied; out_free_dgram: diff --git a/scripts/asn1_compiler.c b/scripts/asn1_compiler.c index db0e5cd..91c4117 100644 --- a/scripts/asn1_compiler.c +++ b/scripts/asn1_compiler.c @@ -1353,6 +1353,8 @@ static void render_out_of_line_list(FILE *out) render_opcode(out, "ASN1_OP_END_SET_OF%s,\n", act); render_opcode(out, "_jump_target(%u),\n", entry); break; + default: + break; } if (e->action) render_opcode(out, "_action(ACT_%s),\n", diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 61090e0..9c98100 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -3289,6 +3289,7 @@ sub process { } } if (!defined $suppress_whiletrailers{$linenr} && + defined($stat) && defined($cond) && $line =~ /\b(?:if|while|for)\s*\(/ && $line !~ /^.\s*#/) { my ($s, $c) = ($stat, $cond); diff --git a/security/Makefile b/security/Makefile index c26c81e..a5918e0 100644 --- a/security/Makefile +++ b/security/Makefile @@ -16,7 +16,6 @@ obj-$(CONFIG_MMU) += min_addr.o # Object file lists obj-$(CONFIG_SECURITY) += security.o capability.o obj-$(CONFIG_SECURITYFS) += inode.o -# Must precede capability.o in order to stack properly. obj-$(CONFIG_SECURITY_SELINUX) += selinux/built-in.o obj-$(CONFIG_SECURITY_SMACK) += smack/built-in.o obj-$(CONFIG_AUDIT) += lsm_audit.o diff --git a/security/apparmor/audit.c b/security/apparmor/audit.c index 031d2d9..89c7865 100644 --- a/security/apparmor/audit.c +++ b/security/apparmor/audit.c @@ -111,7 +111,6 @@ static const char *const aa_audit_type[] = { static void audit_pre(struct audit_buffer *ab, void *ca) { struct common_audit_data *sa = ca; - struct task_struct *tsk = sa->aad->tsk ? sa->aad->tsk : current; if (aa_g_audit_header) { audit_log_format(ab, "apparmor="); @@ -132,11 +131,6 @@ static void audit_pre(struct audit_buffer *ab, void *ca) if (sa->aad->profile) { struct aa_profile *profile = sa->aad->profile; - pid_t pid; - rcu_read_lock(); - pid = rcu_dereference(tsk->real_parent)->pid; - rcu_read_unlock(); - audit_log_format(ab, " parent=%d", pid); if (profile->ns != root_ns) { audit_log_format(ab, " namespace="); audit_log_untrustedstring(ab, profile->ns->base.hname); @@ -149,12 +143,6 @@ static void audit_pre(struct audit_buffer *ab, void *ca) audit_log_format(ab, " name="); audit_log_untrustedstring(ab, sa->aad->name); } - - if (sa->aad->tsk) { - audit_log_format(ab, " pid=%d comm=", tsk->pid); - audit_log_untrustedstring(ab, tsk->comm); - } - } /** @@ -212,7 +200,7 @@ int aa_audit(int type, struct aa_profile *profile, gfp_t gfp, if (sa->aad->type == AUDIT_APPARMOR_KILL) (void)send_sig_info(SIGKILL, NULL, - sa->aad->tsk ? sa->aad->tsk : current); + sa->u.tsk ? sa->u.tsk : current); if (sa->aad->type == AUDIT_APPARMOR_ALLOWED) return complain_error(sa->aad->error); diff --git a/security/apparmor/capability.c b/security/apparmor/capability.c index 84d1f5f..1101c6f 100644 --- a/security/apparmor/capability.c +++ b/security/apparmor/capability.c @@ -53,8 +53,7 @@ static void audit_cb(struct audit_buffer *ab, void *va) /** * audit_caps - audit a capability - * @profile: profile confining task (NOT NULL) - * @task: task capability test was performed against (NOT NULL) + * @profile: profile being tested for confinement (NOT NULL) * @cap: capability tested * @error: error code returned by test * @@ -63,8 +62,7 @@ static void audit_cb(struct audit_buffer *ab, void *va) * * Returns: 0 or sa->error on success, error code on failure */ -static int audit_caps(struct aa_profile *profile, struct task_struct *task, - int cap, int error) +static int audit_caps(struct aa_profile *profile, int cap, int error) { struct audit_cache *ent; int type = AUDIT_APPARMOR_AUTO; @@ -73,7 +71,6 @@ static int audit_caps(struct aa_profile *profile, struct task_struct *task, sa.type = LSM_AUDIT_DATA_CAP; sa.aad = &aad; sa.u.cap = cap; - sa.aad->tsk = task; sa.aad->op = OP_CAPABLE; sa.aad->error = error; @@ -124,8 +121,7 @@ static int profile_capable(struct aa_profile *profile, int cap) /** * aa_capable - test permission to use capability - * @task: task doing capability test against (NOT NULL) - * @profile: profile confining @task (NOT NULL) + * @profile: profile being tested against (NOT NULL) * @cap: capability to be tested * @audit: whether an audit record should be generated * @@ -133,8 +129,7 @@ static int profile_capable(struct aa_profile *profile, int cap) * * Returns: 0 on success, or else an error code. */ -int aa_capable(struct task_struct *task, struct aa_profile *profile, int cap, - int audit) +int aa_capable(struct aa_profile *profile, int cap, int audit) { int error = profile_capable(profile, cap); @@ -144,5 +139,5 @@ int aa_capable(struct task_struct *task, struct aa_profile *profile, int cap, return error; } - return audit_caps(profile, task, cap, error); + return audit_caps(profile, cap, error); } diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c index 26c607c..452567d 100644 --- a/security/apparmor/domain.c +++ b/security/apparmor/domain.c @@ -50,23 +50,21 @@ void aa_free_domain_entries(struct aa_domain *domain) /** * may_change_ptraced_domain - check if can change profile on ptraced task - * @task: task we want to change profile of (NOT NULL) * @to_profile: profile to change to (NOT NULL) * - * Check if the task is ptraced and if so if the tracing task is allowed + * Check if current is ptraced and if so if the tracing task is allowed * to trace the new domain * * Returns: %0 or error if change not allowed */ -static int may_change_ptraced_domain(struct task_struct *task, - struct aa_profile *to_profile) +static int may_change_ptraced_domain(struct aa_profile *to_profile) { struct task_struct *tracer; struct aa_profile *tracerp = NULL; int error = 0; rcu_read_lock(); - tracer = ptrace_parent(task); + tracer = ptrace_parent(current); if (tracer) /* released below */ tracerp = aa_get_task_profile(tracer); @@ -75,7 +73,7 @@ static int may_change_ptraced_domain(struct task_struct *task, if (!tracer || unconfined(tracerp)) goto out; - error = aa_may_ptrace(tracer, tracerp, to_profile, PTRACE_MODE_ATTACH); + error = aa_may_ptrace(tracerp, to_profile, PTRACE_MODE_ATTACH); out: rcu_read_unlock(); @@ -477,7 +475,7 @@ int apparmor_bprm_set_creds(struct linux_binprm *bprm) } if (bprm->unsafe & (LSM_UNSAFE_PTRACE | LSM_UNSAFE_PTRACE_CAP)) { - error = may_change_ptraced_domain(current, new_profile); + error = may_change_ptraced_domain(new_profile); if (error) { aa_put_profile(new_profile); goto audit; @@ -690,7 +688,7 @@ int aa_change_hat(const char *hats[], int count, u64 token, bool permtest) } } - error = may_change_ptraced_domain(current, hat); + error = may_change_ptraced_domain(hat); if (error) { info = "ptraced"; error = -EPERM; @@ -829,7 +827,7 @@ int aa_change_profile(const char *ns_name, const char *hname, bool onexec, } /* check if tracing task is allowed to trace target domain */ - error = may_change_ptraced_domain(current, target); + error = may_change_ptraced_domain(target); if (error) { info = "ptrace prevents transition"; goto audit; diff --git a/security/apparmor/include/audit.h b/security/apparmor/include/audit.h index 30e8d76..ba3dfd1 100644 --- a/security/apparmor/include/audit.h +++ b/security/apparmor/include/audit.h @@ -109,7 +109,6 @@ struct apparmor_audit_data { void *profile; const char *name; const char *info; - struct task_struct *tsk; union { void *target; struct { diff --git a/security/apparmor/include/capability.h b/security/apparmor/include/capability.h index 2e7c9d6..fc3fa38 100644 --- a/security/apparmor/include/capability.h +++ b/security/apparmor/include/capability.h @@ -4,7 +4,7 @@ * This file contains AppArmor capability mediation definitions. * * Copyright (C) 1998-2008 Novell/SUSE - * Copyright 2009-2010 Canonical Ltd. + * Copyright 2009-2013 Canonical Ltd. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as @@ -38,8 +38,7 @@ struct aa_caps { extern struct aa_fs_entry aa_fs_entry_caps[]; -int aa_capable(struct task_struct *task, struct aa_profile *profile, int cap, - int audit); +int aa_capable(struct aa_profile *profile, int cap, int audit); static inline void aa_free_cap_rules(struct aa_caps *caps) { diff --git a/security/apparmor/include/ipc.h b/security/apparmor/include/ipc.h index aeda0fbc..288ca76 100644 --- a/security/apparmor/include/ipc.h +++ b/security/apparmor/include/ipc.h @@ -19,8 +19,8 @@ struct aa_profile; -int aa_may_ptrace(struct task_struct *tracer_task, struct aa_profile *tracer, - struct aa_profile *tracee, unsigned int mode); +int aa_may_ptrace(struct aa_profile *tracer, struct aa_profile *tracee, + unsigned int mode); int aa_ptrace(struct task_struct *tracer, struct task_struct *tracee, unsigned int mode); diff --git a/security/apparmor/ipc.c b/security/apparmor/ipc.c index c51d226..777ac1c 100644 --- a/security/apparmor/ipc.c +++ b/security/apparmor/ipc.c @@ -54,15 +54,14 @@ static int aa_audit_ptrace(struct aa_profile *profile, /** * aa_may_ptrace - test if tracer task can trace the tracee - * @tracer_task: task who will do the tracing (NOT NULL) * @tracer: profile of the task doing the tracing (NOT NULL) * @tracee: task to be traced * @mode: whether PTRACE_MODE_READ || PTRACE_MODE_ATTACH * * Returns: %0 else error code if permission denied or error */ -int aa_may_ptrace(struct task_struct *tracer_task, struct aa_profile *tracer, - struct aa_profile *tracee, unsigned int mode) +int aa_may_ptrace(struct aa_profile *tracer, struct aa_profile *tracee, + unsigned int mode) { /* TODO: currently only based on capability, not extended ptrace * rules, @@ -72,7 +71,7 @@ int aa_may_ptrace(struct task_struct *tracer_task, struct aa_profile *tracer, if (unconfined(tracer) || tracer == tracee) return 0; /* log this capability request */ - return aa_capable(tracer_task, tracer, CAP_SYS_PTRACE, 1); + return aa_capable(tracer, CAP_SYS_PTRACE, 1); } /** @@ -101,7 +100,7 @@ int aa_ptrace(struct task_struct *tracer, struct task_struct *tracee, if (!unconfined(tracer_p)) { struct aa_profile *tracee_p = aa_get_task_profile(tracee); - error = aa_may_ptrace(tracer, tracer_p, tracee_p, mode); + error = aa_may_ptrace(tracer_p, tracee_p, mode); error = aa_audit_ptrace(tracer_p, tracee_p, error); aa_put_profile(tracee_p); diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index fb99e18..4257b7e 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -145,7 +145,7 @@ static int apparmor_capable(const struct cred *cred, struct user_namespace *ns, if (!error) { profile = aa_cred_profile(cred); if (!unconfined(profile)) - error = aa_capable(current, profile, cap, audit); + error = aa_capable(profile, cap, audit); } return error; } diff --git a/security/capability.c b/security/capability.c index dbeb9bc..8b4f24a 100644 --- a/security/capability.c +++ b/security/capability.c @@ -777,9 +777,15 @@ static int cap_xfrm_policy_delete_security(struct xfrm_sec_ctx *ctx) return 0; } -static int cap_xfrm_state_alloc_security(struct xfrm_state *x, - struct xfrm_user_sec_ctx *sec_ctx, - u32 secid) +static int cap_xfrm_state_alloc(struct xfrm_state *x, + struct xfrm_user_sec_ctx *sec_ctx) +{ + return 0; +} + +static int cap_xfrm_state_alloc_acquire(struct xfrm_state *x, + struct xfrm_sec_ctx *polsec, + u32 secid) { return 0; } @@ -1101,7 +1107,8 @@ void __init security_fixup_ops(struct security_operations *ops) set_to_cap_if_null(ops, xfrm_policy_clone_security); set_to_cap_if_null(ops, xfrm_policy_free_security); set_to_cap_if_null(ops, xfrm_policy_delete_security); - set_to_cap_if_null(ops, xfrm_state_alloc_security); + set_to_cap_if_null(ops, xfrm_state_alloc); + set_to_cap_if_null(ops, xfrm_state_alloc_acquire); set_to_cap_if_null(ops, xfrm_state_free_security); set_to_cap_if_null(ops, xfrm_state_delete_security); set_to_cap_if_null(ops, xfrm_policy_lookup); diff --git a/security/integrity/digsig.c b/security/integrity/digsig.c index 0b759e1..77ca965 100644 --- a/security/integrity/digsig.c +++ b/security/integrity/digsig.c @@ -13,7 +13,9 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/err.h> +#include <linux/sched.h> #include <linux/rbtree.h> +#include <linux/cred.h> #include <linux/key-type.h> #include <linux/digsig.h> @@ -21,21 +23,29 @@ static struct key *keyring[INTEGRITY_KEYRING_MAX]; +#ifdef CONFIG_IMA_TRUSTED_KEYRING +static const char *keyring_name[INTEGRITY_KEYRING_MAX] = { + ".evm", + ".module", + ".ima", +}; +#else static const char *keyring_name[INTEGRITY_KEYRING_MAX] = { "_evm", "_module", "_ima", }; +#endif int integrity_digsig_verify(const unsigned int id, const char *sig, int siglen, - const char *digest, int digestlen) + const char *digest, int digestlen) { if (id >= INTEGRITY_KEYRING_MAX) return -EINVAL; if (!keyring[id]) { keyring[id] = - request_key(&key_type_keyring, keyring_name[id], NULL); + request_key(&key_type_keyring, keyring_name[id], NULL); if (IS_ERR(keyring[id])) { int err = PTR_ERR(keyring[id]); pr_err("no %s keyring: %d\n", keyring_name[id], err); @@ -44,9 +54,10 @@ int integrity_digsig_verify(const unsigned int id, const char *sig, int siglen, } } - switch (sig[0]) { + switch (sig[1]) { case 1: - return digsig_verify(keyring[id], sig, siglen, + /* v1 API expect signature without xattr type */ + return digsig_verify(keyring[id], sig + 1, siglen - 1, digest, digestlen); case 2: return asymmetric_verify(keyring[id], sig, siglen, @@ -55,3 +66,21 @@ int integrity_digsig_verify(const unsigned int id, const char *sig, int siglen, return -EOPNOTSUPP; } + +int integrity_init_keyring(const unsigned int id) +{ + const struct cred *cred = current_cred(); + const struct user_struct *user = cred->user; + + keyring[id] = keyring_alloc(keyring_name[id], KUIDT_INIT(0), + KGIDT_INIT(0), cred, + ((KEY_POS_ALL & ~KEY_POS_SETATTR) | + KEY_USR_VIEW | KEY_USR_READ), + KEY_ALLOC_NOT_IN_QUOTA, user->uid_keyring); + if (!IS_ERR(keyring[id])) + set_bit(KEY_FLAG_TRUSTED_ONLY, &keyring[id]->flags); + else + pr_info("Can't allocate %s keyring (%ld)\n", + keyring_name[id], PTR_ERR(keyring[id])); + return 0; +} diff --git a/security/integrity/digsig_asymmetric.c b/security/integrity/digsig_asymmetric.c index b475466..9eae480 100644 --- a/security/integrity/digsig_asymmetric.c +++ b/security/integrity/digsig_asymmetric.c @@ -20,17 +20,6 @@ #include "integrity.h" /* - * signature format v2 - for using with asymmetric keys - */ -struct signature_v2_hdr { - uint8_t version; /* signature format version */ - uint8_t hash_algo; /* Digest algorithm [enum pkey_hash_algo] */ - uint32_t keyid; /* IMA key identifier - not X509/PGP specific*/ - uint16_t sig_size; /* signature size */ - uint8_t sig[0]; /* signature payload */ -} __packed; - -/* * Request an asymmetric key. */ static struct key *request_asymmetric_key(struct key *keyring, uint32_t keyid) diff --git a/security/integrity/evm/evm_main.c b/security/integrity/evm/evm_main.c index af9b685..336b3dd 100644 --- a/security/integrity/evm/evm_main.c +++ b/security/integrity/evm/evm_main.c @@ -123,7 +123,7 @@ static enum integrity_status evm_verify_hmac(struct dentry *dentry, goto out; } - xattr_len = rc - 1; + xattr_len = rc; /* check value type */ switch (xattr_data->type) { @@ -143,7 +143,7 @@ static enum integrity_status evm_verify_hmac(struct dentry *dentry, if (rc) break; rc = integrity_digsig_verify(INTEGRITY_KEYRING_EVM, - xattr_data->digest, xattr_len, + (const char *)xattr_data, xattr_len, calc.digest, sizeof(calc.digest)); if (!rc) { /* we probably want to replace rsa with hmac here */ diff --git a/security/integrity/evm/evm_posix_acl.c b/security/integrity/evm/evm_posix_acl.c index b1753e9..46408b9 100644 --- a/security/integrity/evm/evm_posix_acl.c +++ b/security/integrity/evm/evm_posix_acl.c @@ -11,8 +11,9 @@ #include <linux/module.h> #include <linux/xattr.h> +#include <linux/evm.h> -int posix_xattr_acl(char *xattr) +int posix_xattr_acl(const char *xattr) { int xattr_len = strlen(xattr); diff --git a/security/integrity/iint.c b/security/integrity/iint.c index 74522db..c49d3f1 100644 --- a/security/integrity/iint.c +++ b/security/integrity/iint.c @@ -70,6 +70,8 @@ struct integrity_iint_cache *integrity_iint_find(struct inode *inode) static void iint_free(struct integrity_iint_cache *iint) { + kfree(iint->ima_hash); + iint->ima_hash = NULL; iint->version = 0; iint->flags = 0UL; iint->ima_file_status = INTEGRITY_UNKNOWN; diff --git a/security/integrity/ima/Kconfig b/security/integrity/ima/Kconfig index 39196ab..dad8d4c 100644 --- a/security/integrity/ima/Kconfig +++ b/security/integrity/ima/Kconfig @@ -9,6 +9,7 @@ config IMA select CRYPTO_HMAC select CRYPTO_MD5 select CRYPTO_SHA1 + select CRYPTO_HASH_INFO select TCG_TPM if HAS_IOMEM && !UML select TCG_TIS if TCG_TPM && X86 select TCG_IBMVTPM if TCG_TPM && PPC64 @@ -45,6 +46,69 @@ config IMA_LSM_RULES help Disabling this option will disregard LSM based policy rules. +choice + prompt "Default template" + default IMA_NG_TEMPLATE + depends on IMA + help + Select the default IMA measurement template. + + The original 'ima' measurement list template contains a + hash, defined as 20 bytes, and a null terminated pathname, + limited to 255 characters. The 'ima-ng' measurement list + template permits both larger hash digests and longer + pathnames. + + config IMA_TEMPLATE + bool "ima" + config IMA_NG_TEMPLATE + bool "ima-ng (default)" + config IMA_SIG_TEMPLATE + bool "ima-sig" +endchoice + +config IMA_DEFAULT_TEMPLATE + string + depends on IMA + default "ima" if IMA_TEMPLATE + default "ima-ng" if IMA_NG_TEMPLATE + default "ima-sig" if IMA_SIG_TEMPLATE + +choice + prompt "Default integrity hash algorithm" + default IMA_DEFAULT_HASH_SHA1 + depends on IMA + help + Select the default hash algorithm used for the measurement + list, integrity appraisal and audit log. The compiled default + hash algorithm can be overwritten using the kernel command + line 'ima_hash=' option. + + config IMA_DEFAULT_HASH_SHA1 + bool "SHA1 (default)" + depends on CRYPTO_SHA1 + + config IMA_DEFAULT_HASH_SHA256 + bool "SHA256" + depends on CRYPTO_SHA256 && !IMA_TEMPLATE + + config IMA_DEFAULT_HASH_SHA512 + bool "SHA512" + depends on CRYPTO_SHA512 && !IMA_TEMPLATE + + config IMA_DEFAULT_HASH_WP512 + bool "WP512" + depends on CRYPTO_WP512 && !IMA_TEMPLATE +endchoice + +config IMA_DEFAULT_HASH + string + depends on IMA + default "sha1" if IMA_DEFAULT_HASH_SHA1 + default "sha256" if IMA_DEFAULT_HASH_SHA256 + default "sha512" if IMA_DEFAULT_HASH_SHA512 + default "wp512" if IMA_DEFAULT_HASH_WP512 + config IMA_APPRAISE bool "Appraise integrity measurements" depends on IMA @@ -59,3 +123,11 @@ config IMA_APPRAISE For more information on integrity appraisal refer to: <http://linux-ima.sourceforge.net> If unsure, say N. + +config IMA_TRUSTED_KEYRING + bool "Require all keys on the _ima keyring be signed" + depends on IMA_APPRAISE && SYSTEM_TRUSTED_KEYRING + default y + help + This option requires that all keys added to the _ima + keyring be signed by a key on the system trusted keyring. diff --git a/security/integrity/ima/Makefile b/security/integrity/ima/Makefile index 56dfee7..d79263d 100644 --- a/security/integrity/ima/Makefile +++ b/security/integrity/ima/Makefile @@ -6,5 +6,5 @@ obj-$(CONFIG_IMA) += ima.o ima-y := ima_fs.o ima_queue.o ima_init.o ima_main.o ima_crypto.o ima_api.o \ - ima_policy.o + ima_policy.o ima_template.o ima_template_lib.o ima-$(CONFIG_IMA_APPRAISE) += ima_appraise.o diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h index b3dd616..bf03c6a 100644 --- a/security/integrity/ima/ima.h +++ b/security/integrity/ima/ima.h @@ -36,23 +36,48 @@ enum tpm_pcrs { TPM_PCR0 = 0, TPM_PCR8 = 8 }; #define IMA_HASH_BITS 9 #define IMA_MEASURE_HTABLE_SIZE (1 << IMA_HASH_BITS) +#define IMA_TEMPLATE_FIELD_ID_MAX_LEN 16 +#define IMA_TEMPLATE_NUM_FIELDS_MAX 15 + +#define IMA_TEMPLATE_IMA_NAME "ima" +#define IMA_TEMPLATE_IMA_FMT "d|n" + /* set during initialization */ extern int ima_initialized; extern int ima_used_chip; -extern char *ima_hash; +extern int ima_hash_algo; extern int ima_appraise; -/* IMA inode template definition */ -struct ima_template_data { - u8 digest[IMA_DIGEST_SIZE]; /* sha1/md5 measurement hash */ - char file_name[IMA_EVENT_NAME_LEN_MAX + 1]; /* name + \0 */ +/* IMA template field data definition */ +struct ima_field_data { + u8 *data; + u32 len; +}; + +/* IMA template field definition */ +struct ima_template_field { + const char field_id[IMA_TEMPLATE_FIELD_ID_MAX_LEN]; + int (*field_init) (struct integrity_iint_cache *iint, struct file *file, + const unsigned char *filename, + struct evm_ima_xattr_data *xattr_value, + int xattr_len, struct ima_field_data *field_data); + void (*field_show) (struct seq_file *m, enum ima_show_type show, + struct ima_field_data *field_data); +}; + +/* IMA template descriptor definition */ +struct ima_template_desc { + char *name; + char *fmt; + int num_fields; + struct ima_template_field **fields; }; struct ima_template_entry { - u8 digest[IMA_DIGEST_SIZE]; /* sha1 or md5 measurement hash */ - const char *template_name; - int template_len; - struct ima_template_data template; + u8 digest[TPM_DIGEST_SIZE]; /* sha1 or md5 measurement hash */ + struct ima_template_desc *template_desc; /* template descriptor */ + u32 template_data_len; + struct ima_field_data template_data[0]; /* template related data */ }; struct ima_queue_entry { @@ -69,13 +94,21 @@ int ima_fs_init(void); void ima_fs_cleanup(void); int ima_inode_alloc(struct inode *inode); int ima_add_template_entry(struct ima_template_entry *entry, int violation, - const char *op, struct inode *inode); -int ima_calc_file_hash(struct file *file, char *digest); -int ima_calc_buffer_hash(const void *data, int len, char *digest); -int ima_calc_boot_aggregate(char *digest); -void ima_add_violation(struct inode *inode, const unsigned char *filename, + const char *op, struct inode *inode, + const unsigned char *filename); +int ima_calc_file_hash(struct file *file, struct ima_digest_data *hash); +int ima_calc_field_array_hash(struct ima_field_data *field_data, int num_fields, + struct ima_digest_data *hash); +int __init ima_calc_boot_aggregate(struct ima_digest_data *hash); +void ima_add_violation(struct file *file, const unsigned char *filename, const char *op, const char *cause); int ima_init_crypto(void); +void ima_putc(struct seq_file *m, void *data, int datalen); +void ima_print_digest(struct seq_file *m, u8 *digest, int size); +struct ima_template_desc *ima_template_desc_current(void); +int ima_init_template(void); + +int ima_init_template(void); /* * used to protect h_table and sha_table @@ -98,14 +131,21 @@ static inline unsigned long ima_hash_key(u8 *digest) int ima_get_action(struct inode *inode, int mask, int function); int ima_must_measure(struct inode *inode, int mask, int function); int ima_collect_measurement(struct integrity_iint_cache *iint, - struct file *file); + struct file *file, + struct evm_ima_xattr_data **xattr_value, + int *xattr_len); void ima_store_measurement(struct integrity_iint_cache *iint, struct file *file, - const unsigned char *filename); + const unsigned char *filename, + struct evm_ima_xattr_data *xattr_value, + int xattr_len); void ima_audit_measurement(struct integrity_iint_cache *iint, const unsigned char *filename); +int ima_alloc_init_template(struct integrity_iint_cache *iint, + struct file *file, const unsigned char *filename, + struct evm_ima_xattr_data *xattr_value, + int xattr_len, struct ima_template_entry **entry); int ima_store_template(struct ima_template_entry *entry, int violation, - struct inode *inode); -void ima_template_show(struct seq_file *m, void *e, enum ima_show_type show); + struct inode *inode, const unsigned char *filename); const char *ima_d_path(struct path *path, char **pathbuf); /* rbtree tree calls to lookup, insert, delete @@ -131,17 +171,25 @@ void ima_delete_rules(void); #ifdef CONFIG_IMA_APPRAISE int ima_appraise_measurement(int func, struct integrity_iint_cache *iint, - struct file *file, const unsigned char *filename); + struct file *file, const unsigned char *filename, + struct evm_ima_xattr_data *xattr_value, + int xattr_len); int ima_must_appraise(struct inode *inode, int mask, enum ima_hooks func); void ima_update_xattr(struct integrity_iint_cache *iint, struct file *file); enum integrity_status ima_get_cache_status(struct integrity_iint_cache *iint, int func); +void ima_get_hash_algo(struct evm_ima_xattr_data *xattr_value, int xattr_len, + struct ima_digest_data *hash); +int ima_read_xattr(struct dentry *dentry, + struct evm_ima_xattr_data **xattr_value); #else static inline int ima_appraise_measurement(int func, struct integrity_iint_cache *iint, struct file *file, - const unsigned char *filename) + const unsigned char *filename, + struct evm_ima_xattr_data *xattr_value, + int xattr_len) { return INTEGRITY_UNKNOWN; } @@ -162,6 +210,19 @@ static inline enum integrity_status ima_get_cache_status(struct integrity_iint_c { return INTEGRITY_UNKNOWN; } + +static inline void ima_get_hash_algo(struct evm_ima_xattr_data *xattr_value, + int xattr_len, + struct ima_digest_data *hash) +{ +} + +static inline int ima_read_xattr(struct dentry *dentry, + struct evm_ima_xattr_data **xattr_value) +{ + return 0; +} + #endif /* LSM based policy rules require audit */ diff --git a/security/integrity/ima/ima_api.c b/security/integrity/ima/ima_api.c index 1c03e8f1..0e75408 100644 --- a/security/integrity/ima/ima_api.c +++ b/security/integrity/ima/ima_api.c @@ -18,9 +18,46 @@ #include <linux/fs.h> #include <linux/xattr.h> #include <linux/evm.h> +#include <crypto/hash_info.h> #include "ima.h" -static const char *IMA_TEMPLATE_NAME = "ima"; +/* + * ima_alloc_init_template - create and initialize a new template entry + */ +int ima_alloc_init_template(struct integrity_iint_cache *iint, + struct file *file, const unsigned char *filename, + struct evm_ima_xattr_data *xattr_value, + int xattr_len, struct ima_template_entry **entry) +{ + struct ima_template_desc *template_desc = ima_template_desc_current(); + int i, result = 0; + + *entry = kzalloc(sizeof(**entry) + template_desc->num_fields * + sizeof(struct ima_field_data), GFP_NOFS); + if (!*entry) + return -ENOMEM; + + for (i = 0; i < template_desc->num_fields; i++) { + struct ima_template_field *field = template_desc->fields[i]; + u32 len; + + result = field->field_init(iint, file, filename, + xattr_value, xattr_len, + &((*entry)->template_data[i])); + if (result != 0) + goto out; + + len = (*entry)->template_data[i].len; + (*entry)->template_data_len += sizeof(len); + (*entry)->template_data_len += len; + } + (*entry)->template_desc = template_desc; + return 0; +out: + kfree(*entry); + *entry = NULL; + return result; +} /* * ima_store_template - store ima template measurements @@ -39,28 +76,34 @@ static const char *IMA_TEMPLATE_NAME = "ima"; * Returns 0 on success, error code otherwise */ int ima_store_template(struct ima_template_entry *entry, - int violation, struct inode *inode) + int violation, struct inode *inode, + const unsigned char *filename) { const char *op = "add_template_measure"; const char *audit_cause = "hashing_error"; + char *template_name = entry->template_desc->name; int result; - - memset(entry->digest, 0, sizeof(entry->digest)); - entry->template_name = IMA_TEMPLATE_NAME; - entry->template_len = sizeof(entry->template); + struct { + struct ima_digest_data hdr; + char digest[TPM_DIGEST_SIZE]; + } hash; if (!violation) { - result = ima_calc_buffer_hash(&entry->template, - entry->template_len, - entry->digest); + int num_fields = entry->template_desc->num_fields; + + /* this function uses default algo */ + hash.hdr.algo = HASH_ALGO_SHA1; + result = ima_calc_field_array_hash(&entry->template_data[0], + num_fields, &hash.hdr); if (result < 0) { integrity_audit_msg(AUDIT_INTEGRITY_PCR, inode, - entry->template_name, op, + template_name, op, audit_cause, result, 0); return result; } + memcpy(entry->digest, hash.hdr.digest, hash.hdr.length); } - result = ima_add_template_entry(entry, violation, op, inode); + result = ima_add_template_entry(entry, violation, op, inode, filename); return result; } @@ -71,24 +114,24 @@ int ima_store_template(struct ima_template_entry *entry, * By extending the PCR with 0xFF's instead of with zeroes, the PCR * value is invalidated. */ -void ima_add_violation(struct inode *inode, const unsigned char *filename, +void ima_add_violation(struct file *file, const unsigned char *filename, const char *op, const char *cause) { struct ima_template_entry *entry; + struct inode *inode = file->f_dentry->d_inode; int violation = 1; int result; /* can overflow, only indicator */ atomic_long_inc(&ima_htable.violations); - entry = kmalloc(sizeof(*entry), GFP_KERNEL); - if (!entry) { + result = ima_alloc_init_template(NULL, file, filename, + NULL, 0, &entry); + if (result < 0) { result = -ENOMEM; goto err_out; } - memset(&entry->template, 0, sizeof(entry->template)); - strncpy(entry->template.file_name, filename, IMA_EVENT_NAME_LEN_MAX); - result = ima_store_template(entry, violation, inode); + result = ima_store_template(entry, violation, inode, filename); if (result < 0) kfree(entry); err_out: @@ -138,20 +181,42 @@ int ima_must_measure(struct inode *inode, int mask, int function) * Return 0 on success, error code otherwise */ int ima_collect_measurement(struct integrity_iint_cache *iint, - struct file *file) + struct file *file, + struct evm_ima_xattr_data **xattr_value, + int *xattr_len) { struct inode *inode = file_inode(file); const char *filename = file->f_dentry->d_name.name; int result = 0; + struct { + struct ima_digest_data hdr; + char digest[IMA_MAX_DIGEST_SIZE]; + } hash; + + if (xattr_value) + *xattr_len = ima_read_xattr(file->f_dentry, xattr_value); if (!(iint->flags & IMA_COLLECTED)) { u64 i_version = file_inode(file)->i_version; - iint->ima_xattr.type = IMA_XATTR_DIGEST; - result = ima_calc_file_hash(file, iint->ima_xattr.digest); + /* use default hash algorithm */ + hash.hdr.algo = ima_hash_algo; + + if (xattr_value) + ima_get_hash_algo(*xattr_value, *xattr_len, &hash.hdr); + + result = ima_calc_file_hash(file, &hash.hdr); if (!result) { - iint->version = i_version; - iint->flags |= IMA_COLLECTED; + int length = sizeof(hash.hdr) + hash.hdr.length; + void *tmpbuf = krealloc(iint->ima_hash, length, + GFP_NOFS); + if (tmpbuf) { + iint->ima_hash = tmpbuf; + memcpy(iint->ima_hash, &hash, length); + iint->version = i_version; + iint->flags |= IMA_COLLECTED; + } else + result = -ENOMEM; } } if (result) @@ -177,7 +242,9 @@ int ima_collect_measurement(struct integrity_iint_cache *iint, * Must be called with iint->mutex held. */ void ima_store_measurement(struct integrity_iint_cache *iint, - struct file *file, const unsigned char *filename) + struct file *file, const unsigned char *filename, + struct evm_ima_xattr_data *xattr_value, + int xattr_len) { const char *op = "add_template_measure"; const char *audit_cause = "ENOMEM"; @@ -189,19 +256,15 @@ void ima_store_measurement(struct integrity_iint_cache *iint, if (iint->flags & IMA_MEASURED) return; - entry = kmalloc(sizeof(*entry), GFP_KERNEL); - if (!entry) { + result = ima_alloc_init_template(iint, file, filename, + xattr_value, xattr_len, &entry); + if (result < 0) { integrity_audit_msg(AUDIT_INTEGRITY_PCR, inode, filename, op, audit_cause, result, 0); return; } - memset(&entry->template, 0, sizeof(entry->template)); - memcpy(entry->template.digest, iint->ima_xattr.digest, IMA_DIGEST_SIZE); - strcpy(entry->template.file_name, - (strlen(filename) > IMA_EVENT_NAME_LEN_MAX) ? - file->f_dentry->d_name.name : filename); - result = ima_store_template(entry, violation, inode); + result = ima_store_template(entry, violation, inode, filename); if (!result || result == -EEXIST) iint->flags |= IMA_MEASURED; if (result < 0) @@ -212,14 +275,16 @@ void ima_audit_measurement(struct integrity_iint_cache *iint, const unsigned char *filename) { struct audit_buffer *ab; - char hash[(IMA_DIGEST_SIZE * 2) + 1]; + char hash[(iint->ima_hash->length * 2) + 1]; + const char *algo_name = hash_algo_name[iint->ima_hash->algo]; + char algo_hash[sizeof(hash) + strlen(algo_name) + 2]; int i; if (iint->flags & IMA_AUDITED) return; - for (i = 0; i < IMA_DIGEST_SIZE; i++) - hex_byte_pack(hash + (i * 2), iint->ima_xattr.digest[i]); + for (i = 0; i < iint->ima_hash->length; i++) + hex_byte_pack(hash + (i * 2), iint->ima_hash->digest[i]); hash[i * 2] = '\0'; ab = audit_log_start(current->audit_context, GFP_KERNEL, @@ -230,7 +295,8 @@ void ima_audit_measurement(struct integrity_iint_cache *iint, audit_log_format(ab, "file="); audit_log_untrustedstring(ab, filename); audit_log_format(ab, " hash="); - audit_log_untrustedstring(ab, hash); + snprintf(algo_hash, sizeof(algo_hash), "%s:%s", algo_name, hash); + audit_log_untrustedstring(ab, algo_hash); audit_log_task_info(ab, current); audit_log_end(ab); diff --git a/security/integrity/ima/ima_appraise.c b/security/integrity/ima/ima_appraise.c index 2d4beca..46353ee 100644 --- a/security/integrity/ima/ima_appraise.c +++ b/security/integrity/ima/ima_appraise.c @@ -15,6 +15,7 @@ #include <linux/magic.h> #include <linux/ima.h> #include <linux/evm.h> +#include <crypto/hash_info.h> #include "ima.h" @@ -43,19 +44,31 @@ int ima_must_appraise(struct inode *inode, int mask, enum ima_hooks func) } static int ima_fix_xattr(struct dentry *dentry, - struct integrity_iint_cache *iint) + struct integrity_iint_cache *iint) { - iint->ima_xattr.type = IMA_XATTR_DIGEST; - return __vfs_setxattr_noperm(dentry, XATTR_NAME_IMA, - (u8 *)&iint->ima_xattr, - sizeof(iint->ima_xattr), 0); + int rc, offset; + u8 algo = iint->ima_hash->algo; + + if (algo <= HASH_ALGO_SHA1) { + offset = 1; + iint->ima_hash->xattr.sha1.type = IMA_XATTR_DIGEST; + } else { + offset = 0; + iint->ima_hash->xattr.ng.type = IMA_XATTR_DIGEST_NG; + iint->ima_hash->xattr.ng.algo = algo; + } + rc = __vfs_setxattr_noperm(dentry, XATTR_NAME_IMA, + &iint->ima_hash->xattr.data[offset], + (sizeof(iint->ima_hash->xattr) - offset) + + iint->ima_hash->length, 0); + return rc; } /* Return specific func appraised cached result */ enum integrity_status ima_get_cache_status(struct integrity_iint_cache *iint, int func) { - switch(func) { + switch (func) { case MMAP_CHECK: return iint->ima_mmap_status; case BPRM_CHECK: @@ -71,7 +84,7 @@ enum integrity_status ima_get_cache_status(struct integrity_iint_cache *iint, static void ima_set_cache_status(struct integrity_iint_cache *iint, int func, enum integrity_status status) { - switch(func) { + switch (func) { case MMAP_CHECK: iint->ima_mmap_status = status; break; @@ -90,7 +103,7 @@ static void ima_set_cache_status(struct integrity_iint_cache *iint, static void ima_cache_flags(struct integrity_iint_cache *iint, int func) { - switch(func) { + switch (func) { case MMAP_CHECK: iint->flags |= (IMA_MMAP_APPRAISED | IMA_APPRAISED); break; @@ -107,6 +120,50 @@ static void ima_cache_flags(struct integrity_iint_cache *iint, int func) } } +void ima_get_hash_algo(struct evm_ima_xattr_data *xattr_value, int xattr_len, + struct ima_digest_data *hash) +{ + struct signature_v2_hdr *sig; + + if (!xattr_value || xattr_len < 2) + return; + + switch (xattr_value->type) { + case EVM_IMA_XATTR_DIGSIG: + sig = (typeof(sig))xattr_value; + if (sig->version != 2 || xattr_len <= sizeof(*sig)) + return; + hash->algo = sig->hash_algo; + break; + case IMA_XATTR_DIGEST_NG: + hash->algo = xattr_value->digest[0]; + break; + case IMA_XATTR_DIGEST: + /* this is for backward compatibility */ + if (xattr_len == 21) { + unsigned int zero = 0; + if (!memcmp(&xattr_value->digest[16], &zero, 4)) + hash->algo = HASH_ALGO_MD5; + else + hash->algo = HASH_ALGO_SHA1; + } else if (xattr_len == 17) + hash->algo = HASH_ALGO_MD5; + break; + } +} + +int ima_read_xattr(struct dentry *dentry, + struct evm_ima_xattr_data **xattr_value) +{ + struct inode *inode = dentry->d_inode; + + if (!inode->i_op->getxattr) + return 0; + + return vfs_getxattr_alloc(dentry, XATTR_NAME_IMA, (char **)xattr_value, + 0, GFP_NOFS); +} + /* * ima_appraise_measurement - appraise file measurement * @@ -116,23 +173,22 @@ static void ima_cache_flags(struct integrity_iint_cache *iint, int func) * Return 0 on success, error code otherwise */ int ima_appraise_measurement(int func, struct integrity_iint_cache *iint, - struct file *file, const unsigned char *filename) + struct file *file, const unsigned char *filename, + struct evm_ima_xattr_data *xattr_value, + int xattr_len) { struct dentry *dentry = file->f_dentry; struct inode *inode = dentry->d_inode; - struct evm_ima_xattr_data *xattr_value = NULL; enum integrity_status status = INTEGRITY_UNKNOWN; const char *op = "appraise_data"; char *cause = "unknown"; - int rc; + int rc = xattr_len, hash_start = 0; if (!ima_appraise) return 0; if (!inode->i_op->getxattr) return INTEGRITY_UNKNOWN; - rc = vfs_getxattr_alloc(dentry, XATTR_NAME_IMA, (char **)&xattr_value, - 0, GFP_NOFS); if (rc <= 0) { if (rc && rc != -ENODATA) goto out; @@ -153,14 +209,25 @@ int ima_appraise_measurement(int func, struct integrity_iint_cache *iint, goto out; } switch (xattr_value->type) { + case IMA_XATTR_DIGEST_NG: + /* first byte contains algorithm id */ + hash_start = 1; case IMA_XATTR_DIGEST: if (iint->flags & IMA_DIGSIG_REQUIRED) { cause = "IMA signature required"; status = INTEGRITY_FAIL; break; } - rc = memcmp(xattr_value->digest, iint->ima_xattr.digest, - IMA_DIGEST_SIZE); + if (xattr_len - sizeof(xattr_value->type) - hash_start >= + iint->ima_hash->length) + /* xattr length may be longer. md5 hash in previous + version occupied 20 bytes in xattr, instead of 16 + */ + rc = memcmp(&xattr_value->digest[hash_start], + iint->ima_hash->digest, + iint->ima_hash->length); + else + rc = -EINVAL; if (rc) { cause = "invalid-hash"; status = INTEGRITY_FAIL; @@ -171,9 +238,9 @@ int ima_appraise_measurement(int func, struct integrity_iint_cache *iint, case EVM_IMA_XATTR_DIGSIG: iint->flags |= IMA_DIGSIG; rc = integrity_digsig_verify(INTEGRITY_KEYRING_IMA, - xattr_value->digest, rc - 1, - iint->ima_xattr.digest, - IMA_DIGEST_SIZE); + (const char *)xattr_value, rc, + iint->ima_hash->digest, + iint->ima_hash->length); if (rc == -EOPNOTSUPP) { status = INTEGRITY_UNKNOWN; } else if (rc) { @@ -203,7 +270,6 @@ out: ima_cache_flags(iint, func); } ima_set_cache_status(iint, func, status); - kfree(xattr_value); return status; } @@ -219,7 +285,7 @@ void ima_update_xattr(struct integrity_iint_cache *iint, struct file *file) if (iint->flags & IMA_DIGSIG) return; - rc = ima_collect_measurement(iint, file); + rc = ima_collect_measurement(iint, file, NULL, NULL); if (rc < 0) return; @@ -315,3 +381,14 @@ int ima_inode_removexattr(struct dentry *dentry, const char *xattr_name) } return result; } + +#ifdef CONFIG_IMA_TRUSTED_KEYRING +static int __init init_ima_keyring(void) +{ + int ret; + + ret = integrity_init_keyring(INTEGRITY_KEYRING_IMA); + return 0; +} +late_initcall(init_ima_keyring); +#endif diff --git a/security/integrity/ima/ima_crypto.c b/security/integrity/ima/ima_crypto.c index a02e079..676e029 100644 --- a/security/integrity/ima/ima_crypto.c +++ b/security/integrity/ima/ima_crypto.c @@ -20,6 +20,7 @@ #include <linux/err.h> #include <linux/slab.h> #include <crypto/hash.h> +#include <crypto/hash_info.h> #include "ima.h" static struct crypto_shash *ima_shash_tfm; @@ -28,31 +29,58 @@ int ima_init_crypto(void) { long rc; - ima_shash_tfm = crypto_alloc_shash(ima_hash, 0, 0); + ima_shash_tfm = crypto_alloc_shash(hash_algo_name[ima_hash_algo], 0, 0); if (IS_ERR(ima_shash_tfm)) { rc = PTR_ERR(ima_shash_tfm); - pr_err("Can not allocate %s (reason: %ld)\n", ima_hash, rc); + pr_err("Can not allocate %s (reason: %ld)\n", + hash_algo_name[ima_hash_algo], rc); return rc; } return 0; } +static struct crypto_shash *ima_alloc_tfm(enum hash_algo algo) +{ + struct crypto_shash *tfm = ima_shash_tfm; + int rc; + + if (algo != ima_hash_algo && algo < HASH_ALGO__LAST) { + tfm = crypto_alloc_shash(hash_algo_name[algo], 0, 0); + if (IS_ERR(tfm)) { + rc = PTR_ERR(tfm); + pr_err("Can not allocate %s (reason: %d)\n", + hash_algo_name[algo], rc); + } + } + return tfm; +} + +static void ima_free_tfm(struct crypto_shash *tfm) +{ + if (tfm != ima_shash_tfm) + crypto_free_shash(tfm); +} + /* * Calculate the MD5/SHA1 file digest */ -int ima_calc_file_hash(struct file *file, char *digest) +static int ima_calc_file_hash_tfm(struct file *file, + struct ima_digest_data *hash, + struct crypto_shash *tfm) { loff_t i_size, offset = 0; char *rbuf; int rc, read = 0; struct { struct shash_desc shash; - char ctx[crypto_shash_descsize(ima_shash_tfm)]; + char ctx[crypto_shash_descsize(tfm)]; } desc; - desc.shash.tfm = ima_shash_tfm; + desc.shash.tfm = tfm; desc.shash.flags = 0; + hash->length = crypto_shash_digestsize(tfm); + rc = crypto_shash_init(&desc.shash); if (rc != 0) return rc; @@ -85,27 +113,83 @@ int ima_calc_file_hash(struct file *file, char *digest) } kfree(rbuf); if (!rc) - rc = crypto_shash_final(&desc.shash, digest); + rc = crypto_shash_final(&desc.shash, hash->digest); if (read) file->f_mode &= ~FMODE_READ; out: return rc; } +int ima_calc_file_hash(struct file *file, struct ima_digest_data *hash) +{ + struct crypto_shash *tfm; + int rc; + + tfm = ima_alloc_tfm(hash->algo); + if (IS_ERR(tfm)) + return PTR_ERR(tfm); + + rc = ima_calc_file_hash_tfm(file, hash, tfm); + + ima_free_tfm(tfm); + + return rc; +} + /* - * Calculate the hash of a given buffer + * Calculate the hash of template data */ -int ima_calc_buffer_hash(const void *data, int len, char *digest) +static int ima_calc_field_array_hash_tfm(struct ima_field_data *field_data, + int num_fields, + struct ima_digest_data *hash, + struct crypto_shash *tfm) { struct { struct shash_desc shash; - char ctx[crypto_shash_descsize(ima_shash_tfm)]; + char ctx[crypto_shash_descsize(tfm)]; } desc; + int rc, i; - desc.shash.tfm = ima_shash_tfm; + desc.shash.tfm = tfm; desc.shash.flags = 0; - return crypto_shash_digest(&desc.shash, data, len, digest); + hash->length = crypto_shash_digestsize(tfm); + + rc = crypto_shash_init(&desc.shash); + if (rc != 0) + return rc; + + for (i = 0; i < num_fields; i++) { + rc = crypto_shash_update(&desc.shash, + (const u8 *) &field_data[i].len, + sizeof(field_data[i].len)); + rc = crypto_shash_update(&desc.shash, field_data[i].data, + field_data[i].len); + if (rc) + break; + } + + if (!rc) + rc = crypto_shash_final(&desc.shash, hash->digest); + + return rc; +} + +int ima_calc_field_array_hash(struct ima_field_data *field_data, int num_fields, + struct ima_digest_data *hash) +{ + struct crypto_shash *tfm; + int rc; + + tfm = ima_alloc_tfm(hash->algo); + if (IS_ERR(tfm)) + return PTR_ERR(tfm); + + rc = ima_calc_field_array_hash_tfm(field_data, num_fields, hash, tfm); + + ima_free_tfm(tfm); + + return rc; } static void __init ima_pcrread(int idx, u8 *pcr) @@ -120,16 +204,17 @@ static void __init ima_pcrread(int idx, u8 *pcr) /* * Calculate the boot aggregate hash */ -int __init ima_calc_boot_aggregate(char *digest) +static int __init ima_calc_boot_aggregate_tfm(char *digest, + struct crypto_shash *tfm) { - u8 pcr_i[IMA_DIGEST_SIZE]; + u8 pcr_i[TPM_DIGEST_SIZE]; int rc, i; struct { struct shash_desc shash; - char ctx[crypto_shash_descsize(ima_shash_tfm)]; + char ctx[crypto_shash_descsize(tfm)]; } desc; - desc.shash.tfm = ima_shash_tfm; + desc.shash.tfm = tfm; desc.shash.flags = 0; rc = crypto_shash_init(&desc.shash); @@ -140,9 +225,26 @@ int __init ima_calc_boot_aggregate(char *digest) for (i = TPM_PCR0; i < TPM_PCR8; i++) { ima_pcrread(i, pcr_i); /* now accumulate with current aggregate */ - rc = crypto_shash_update(&desc.shash, pcr_i, IMA_DIGEST_SIZE); + rc = crypto_shash_update(&desc.shash, pcr_i, TPM_DIGEST_SIZE); } if (!rc) crypto_shash_final(&desc.shash, digest); return rc; } + +int __init ima_calc_boot_aggregate(struct ima_digest_data *hash) +{ + struct crypto_shash *tfm; + int rc; + + tfm = ima_alloc_tfm(hash->algo); + if (IS_ERR(tfm)) + return PTR_ERR(tfm); + + hash->length = crypto_shash_digestsize(tfm); + rc = ima_calc_boot_aggregate_tfm(hash->digest, tfm); + + ima_free_tfm(tfm); + + return rc; +} diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c index 38477c9..d47a7c8 100644 --- a/security/integrity/ima/ima_fs.c +++ b/security/integrity/ima/ima_fs.c @@ -88,8 +88,7 @@ static void *ima_measurements_next(struct seq_file *m, void *v, loff_t *pos) * against concurrent list-extension */ rcu_read_lock(); - qe = list_entry_rcu(qe->later.next, - struct ima_queue_entry, later); + qe = list_entry_rcu(qe->later.next, struct ima_queue_entry, later); rcu_read_unlock(); (*pos)++; @@ -100,7 +99,7 @@ static void ima_measurements_stop(struct seq_file *m, void *v) { } -static void ima_putc(struct seq_file *m, void *data, int datalen) +void ima_putc(struct seq_file *m, void *data, int datalen) { while (datalen--) seq_putc(m, *(char *)data++); @@ -111,6 +110,7 @@ static void ima_putc(struct seq_file *m, void *data, int datalen) * char[20]=template digest * 32bit-le=template name size * char[n]=template name + * [eventdata length] * eventdata[n]=template specific data */ static int ima_measurements_show(struct seq_file *m, void *v) @@ -120,6 +120,7 @@ static int ima_measurements_show(struct seq_file *m, void *v) struct ima_template_entry *e; int namelen; u32 pcr = CONFIG_IMA_MEASURE_PCR_IDX; + int i; /* get entry */ e = qe->entry; @@ -134,18 +135,25 @@ static int ima_measurements_show(struct seq_file *m, void *v) ima_putc(m, &pcr, sizeof pcr); /* 2nd: template digest */ - ima_putc(m, e->digest, IMA_DIGEST_SIZE); + ima_putc(m, e->digest, TPM_DIGEST_SIZE); /* 3rd: template name size */ - namelen = strlen(e->template_name); + namelen = strlen(e->template_desc->name); ima_putc(m, &namelen, sizeof namelen); /* 4th: template name */ - ima_putc(m, (void *)e->template_name, namelen); + ima_putc(m, e->template_desc->name, namelen); + + /* 5th: template length (except for 'ima' template) */ + if (strcmp(e->template_desc->name, IMA_TEMPLATE_IMA_NAME) != 0) + ima_putc(m, &e->template_data_len, + sizeof(e->template_data_len)); - /* 5th: template specific data */ - ima_template_show(m, (struct ima_template_data *)&e->template, - IMA_SHOW_BINARY); + /* 6th: template specific data */ + for (i = 0; i < e->template_desc->num_fields; i++) { + e->template_desc->fields[i]->field_show(m, IMA_SHOW_BINARY, + &e->template_data[i]); + } return 0; } @@ -168,41 +176,21 @@ static const struct file_operations ima_measurements_ops = { .release = seq_release, }; -static void ima_print_digest(struct seq_file *m, u8 *digest) +void ima_print_digest(struct seq_file *m, u8 *digest, int size) { int i; - for (i = 0; i < IMA_DIGEST_SIZE; i++) + for (i = 0; i < size; i++) seq_printf(m, "%02x", *(digest + i)); } -void ima_template_show(struct seq_file *m, void *e, enum ima_show_type show) -{ - struct ima_template_data *entry = e; - int namelen; - - switch (show) { - case IMA_SHOW_ASCII: - ima_print_digest(m, entry->digest); - seq_printf(m, " %s\n", entry->file_name); - break; - case IMA_SHOW_BINARY: - ima_putc(m, entry->digest, IMA_DIGEST_SIZE); - - namelen = strlen(entry->file_name); - ima_putc(m, &namelen, sizeof namelen); - ima_putc(m, entry->file_name, namelen); - default: - break; - } -} - /* print in ascii */ static int ima_ascii_measurements_show(struct seq_file *m, void *v) { /* the list never shrinks, so we don't need a lock here */ struct ima_queue_entry *qe = v; struct ima_template_entry *e; + int i; /* get entry */ e = qe->entry; @@ -213,14 +201,21 @@ static int ima_ascii_measurements_show(struct seq_file *m, void *v) seq_printf(m, "%2d ", CONFIG_IMA_MEASURE_PCR_IDX); /* 2nd: SHA1 template hash */ - ima_print_digest(m, e->digest); + ima_print_digest(m, e->digest, TPM_DIGEST_SIZE); /* 3th: template name */ - seq_printf(m, " %s ", e->template_name); + seq_printf(m, " %s", e->template_desc->name); /* 4th: template specific data */ - ima_template_show(m, (struct ima_template_data *)&e->template, - IMA_SHOW_ASCII); + for (i = 0; i < e->template_desc->num_fields; i++) { + seq_puts(m, " "); + if (e->template_data[i].len == 0) + continue; + + e->template_desc->fields[i]->field_show(m, IMA_SHOW_ASCII, + &e->template_data[i]); + } + seq_puts(m, "\n"); return 0; } diff --git a/security/integrity/ima/ima_init.c b/security/integrity/ima/ima_init.c index 162ea72..15f34bd 100644 --- a/security/integrity/ima/ima_init.c +++ b/security/integrity/ima/ima_init.c @@ -18,6 +18,7 @@ #include <linux/scatterlist.h> #include <linux/slab.h> #include <linux/err.h> +#include <crypto/hash_info.h> #include "ima.h" /* name for boot aggregate entry */ @@ -42,28 +43,38 @@ int ima_used_chip; static void __init ima_add_boot_aggregate(void) { struct ima_template_entry *entry; + struct integrity_iint_cache tmp_iint, *iint = &tmp_iint; const char *op = "add_boot_aggregate"; const char *audit_cause = "ENOMEM"; int result = -ENOMEM; - int violation = 1; + int violation = 0; + struct { + struct ima_digest_data hdr; + char digest[TPM_DIGEST_SIZE]; + } hash; - entry = kmalloc(sizeof(*entry), GFP_KERNEL); - if (!entry) - goto err_out; + memset(iint, 0, sizeof(*iint)); + memset(&hash, 0, sizeof(hash)); + iint->ima_hash = &hash.hdr; + iint->ima_hash->algo = HASH_ALGO_SHA1; + iint->ima_hash->length = SHA1_DIGEST_SIZE; - memset(&entry->template, 0, sizeof(entry->template)); - strncpy(entry->template.file_name, boot_aggregate_name, - IMA_EVENT_NAME_LEN_MAX); if (ima_used_chip) { - violation = 0; - result = ima_calc_boot_aggregate(entry->template.digest); + result = ima_calc_boot_aggregate(&hash.hdr); if (result < 0) { audit_cause = "hashing_error"; kfree(entry); goto err_out; } } - result = ima_store_template(entry, violation, NULL); + + result = ima_alloc_init_template(iint, NULL, boot_aggregate_name, + NULL, 0, &entry); + if (result < 0) + return; + + result = ima_store_template(entry, violation, NULL, + boot_aggregate_name); if (result < 0) kfree(entry); return; @@ -74,7 +85,7 @@ err_out: int __init ima_init(void) { - u8 pcr_i[IMA_DIGEST_SIZE]; + u8 pcr_i[TPM_DIGEST_SIZE]; int rc; ima_used_chip = 0; @@ -88,6 +99,10 @@ int __init ima_init(void) rc = ima_init_crypto(); if (rc) return rc; + rc = ima_init_template(); + if (rc != 0) + return rc; + ima_add_boot_aggregate(); /* boot aggregate must be first entry */ ima_init_policy(); diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c index e9508d5..149ee11 100644 --- a/security/integrity/ima/ima_main.c +++ b/security/integrity/ima/ima_main.c @@ -24,6 +24,7 @@ #include <linux/slab.h> #include <linux/xattr.h> #include <linux/ima.h> +#include <crypto/hash_info.h> #include "ima.h" @@ -35,11 +36,33 @@ int ima_appraise = IMA_APPRAISE_ENFORCE; int ima_appraise; #endif -char *ima_hash = "sha1"; +int ima_hash_algo = HASH_ALGO_SHA1; +static int hash_setup_done; + static int __init hash_setup(char *str) { - if (strncmp(str, "md5", 3) == 0) - ima_hash = "md5"; + struct ima_template_desc *template_desc = ima_template_desc_current(); + int i; + + if (hash_setup_done) + return 1; + + if (strcmp(template_desc->name, IMA_TEMPLATE_IMA_NAME) == 0) { + if (strncmp(str, "sha1", 4) == 0) + ima_hash_algo = HASH_ALGO_SHA1; + else if (strncmp(str, "md5", 3) == 0) + ima_hash_algo = HASH_ALGO_MD5; + goto out; + } + + for (i = 0; i < HASH_ALGO__LAST; i++) { + if (strcmp(str, hash_algo_name[i]) == 0) { + ima_hash_algo = i; + break; + } + } +out: + hash_setup_done = 1; return 1; } __setup("ima_hash=", hash_setup); @@ -92,10 +115,9 @@ out: pathname = dentry->d_name.name; if (send_tomtou) - ima_add_violation(inode, pathname, - "invalid_pcr", "ToMToU"); + ima_add_violation(file, pathname, "invalid_pcr", "ToMToU"); if (send_writers) - ima_add_violation(inode, pathname, + ima_add_violation(file, pathname, "invalid_pcr", "open_writers"); kfree(pathbuf); } @@ -144,9 +166,12 @@ static int process_measurement(struct file *file, const char *filename, { struct inode *inode = file_inode(file); struct integrity_iint_cache *iint; + struct ima_template_desc *template_desc = ima_template_desc_current(); char *pathbuf = NULL; const char *pathname = NULL; int rc = -ENOMEM, action, must_appraise, _func; + struct evm_ima_xattr_data *xattr_value = NULL, **xattr_ptr = NULL; + int xattr_len = 0; if (!ima_initialized || !S_ISREG(inode->i_mode)) return 0; @@ -185,7 +210,13 @@ static int process_measurement(struct file *file, const char *filename, goto out_digsig; } - rc = ima_collect_measurement(iint, file); + if (strcmp(template_desc->name, IMA_TEMPLATE_IMA_NAME) == 0) { + if (action & IMA_APPRAISE_SUBMASK) + xattr_ptr = &xattr_value; + } else + xattr_ptr = &xattr_value; + + rc = ima_collect_measurement(iint, file, xattr_ptr, &xattr_len); if (rc != 0) goto out_digsig; @@ -194,9 +225,11 @@ static int process_measurement(struct file *file, const char *filename, pathname = (const char *)file->f_dentry->d_name.name; if (action & IMA_MEASURE) - ima_store_measurement(iint, file, pathname); + ima_store_measurement(iint, file, pathname, + xattr_value, xattr_len); if (action & IMA_APPRAISE_SUBMASK) - rc = ima_appraise_measurement(_func, iint, file, pathname); + rc = ima_appraise_measurement(_func, iint, file, pathname, + xattr_value, xattr_len); if (action & IMA_AUDIT) ima_audit_measurement(iint, pathname); kfree(pathbuf); @@ -205,6 +238,7 @@ out_digsig: rc = -EACCES; out: mutex_unlock(&inode->i_mutex); + kfree(xattr_value); if ((rc && must_appraise) && (ima_appraise & IMA_APPRAISE_ENFORCE)) return -EACCES; return 0; @@ -244,9 +278,9 @@ int ima_file_mmap(struct file *file, unsigned long prot) int ima_bprm_check(struct linux_binprm *bprm) { return process_measurement(bprm->file, - (strcmp(bprm->filename, bprm->interp) == 0) ? - bprm->filename : bprm->interp, - MAY_EXEC, BPRM_CHECK); + (strcmp(bprm->filename, bprm->interp) == 0) ? + bprm->filename : bprm->interp, + MAY_EXEC, BPRM_CHECK); } /** @@ -263,8 +297,8 @@ int ima_file_check(struct file *file, int mask) { ima_rdwr_violation_check(file); return process_measurement(file, NULL, - mask & (MAY_READ | MAY_WRITE | MAY_EXEC), - FILE_CHECK); + mask & (MAY_READ | MAY_WRITE | MAY_EXEC), + FILE_CHECK); } EXPORT_SYMBOL_GPL(ima_file_check); @@ -294,6 +328,7 @@ static int __init init_ima(void) { int error; + hash_setup(CONFIG_IMA_DEFAULT_HASH); error = ima_init(); if (!error) ima_initialized = 1; diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c index 399433a..a9c3d3c 100644 --- a/security/integrity/ima/ima_policy.c +++ b/security/integrity/ima/ima_policy.c @@ -73,7 +73,6 @@ static struct ima_rule_entry default_rules[] = { {.action = DONT_MEASURE,.fsmagic = SYSFS_MAGIC,.flags = IMA_FSMAGIC}, {.action = DONT_MEASURE,.fsmagic = DEBUGFS_MAGIC,.flags = IMA_FSMAGIC}, {.action = DONT_MEASURE,.fsmagic = TMPFS_MAGIC,.flags = IMA_FSMAGIC}, - {.action = DONT_MEASURE,.fsmagic = RAMFS_MAGIC,.flags = IMA_FSMAGIC}, {.action = DONT_MEASURE,.fsmagic = DEVPTS_SUPER_MAGIC,.flags = IMA_FSMAGIC}, {.action = DONT_MEASURE,.fsmagic = BINFMTFS_MAGIC,.flags = IMA_FSMAGIC}, {.action = DONT_MEASURE,.fsmagic = SECURITYFS_MAGIC,.flags = IMA_FSMAGIC}, diff --git a/security/integrity/ima/ima_queue.c b/security/integrity/ima/ima_queue.c index ff63fe0..d85e997 100644 --- a/security/integrity/ima/ima_queue.c +++ b/security/integrity/ima/ima_queue.c @@ -50,7 +50,7 @@ static struct ima_queue_entry *ima_lookup_digest_entry(u8 *digest_value) key = ima_hash_key(digest_value); rcu_read_lock(); hlist_for_each_entry_rcu(qe, &ima_htable.queue[key], hnext) { - rc = memcmp(qe->entry->digest, digest_value, IMA_DIGEST_SIZE); + rc = memcmp(qe->entry->digest, digest_value, TPM_DIGEST_SIZE); if (rc == 0) { ret = qe; break; @@ -104,9 +104,10 @@ static int ima_pcr_extend(const u8 *hash) * and extend the pcr. */ int ima_add_template_entry(struct ima_template_entry *entry, int violation, - const char *op, struct inode *inode) + const char *op, struct inode *inode, + const unsigned char *filename) { - u8 digest[IMA_DIGEST_SIZE]; + u8 digest[TPM_DIGEST_SIZE]; const char *audit_cause = "hash_added"; char tpm_audit_cause[AUDIT_CAUSE_LEN_MAX]; int audit_info = 1; @@ -141,8 +142,7 @@ int ima_add_template_entry(struct ima_template_entry *entry, int violation, } out: mutex_unlock(&ima_extend_list_mutex); - integrity_audit_msg(AUDIT_INTEGRITY_PCR, inode, - entry->template.file_name, + integrity_audit_msg(AUDIT_INTEGRITY_PCR, inode, filename, op, audit_cause, result, audit_info); return result; } diff --git a/security/integrity/ima/ima_template.c b/security/integrity/ima/ima_template.c new file mode 100644 index 0000000..4e5da99 --- /dev/null +++ b/security/integrity/ima/ima_template.c @@ -0,0 +1,178 @@ +/* + * Copyright (C) 2013 Politecnico di Torino, Italy + * TORSEC group -- http://security.polito.it + * + * Author: Roberto Sassu <roberto.sassu@polito.it> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation, version 2 of the + * License. + * + * File: ima_template.c + * Helpers to manage template descriptors. + */ +#include <crypto/hash_info.h> + +#include "ima.h" +#include "ima_template_lib.h" + +static struct ima_template_desc defined_templates[] = { + {.name = IMA_TEMPLATE_IMA_NAME, .fmt = IMA_TEMPLATE_IMA_FMT}, + {.name = "ima-ng",.fmt = "d-ng|n-ng"}, + {.name = "ima-sig",.fmt = "d-ng|n-ng|sig"}, +}; + +static struct ima_template_field supported_fields[] = { + {.field_id = "d",.field_init = ima_eventdigest_init, + .field_show = ima_show_template_digest}, + {.field_id = "n",.field_init = ima_eventname_init, + .field_show = ima_show_template_string}, + {.field_id = "d-ng",.field_init = ima_eventdigest_ng_init, + .field_show = ima_show_template_digest_ng}, + {.field_id = "n-ng",.field_init = ima_eventname_ng_init, + .field_show = ima_show_template_string}, + {.field_id = "sig",.field_init = ima_eventsig_init, + .field_show = ima_show_template_sig}, +}; + +static struct ima_template_desc *ima_template; +static struct ima_template_desc *lookup_template_desc(const char *name); + +static int __init ima_template_setup(char *str) +{ + struct ima_template_desc *template_desc; + int template_len = strlen(str); + + /* + * Verify that a template with the supplied name exists. + * If not, use CONFIG_IMA_DEFAULT_TEMPLATE. + */ + template_desc = lookup_template_desc(str); + if (!template_desc) + return 1; + + /* + * Verify whether the current hash algorithm is supported + * by the 'ima' template. + */ + if (template_len == 3 && strcmp(str, IMA_TEMPLATE_IMA_NAME) == 0 && + ima_hash_algo != HASH_ALGO_SHA1 && ima_hash_algo != HASH_ALGO_MD5) { + pr_err("IMA: template does not support hash alg\n"); + return 1; + } + + ima_template = template_desc; + return 1; +} +__setup("ima_template=", ima_template_setup); + +static struct ima_template_desc *lookup_template_desc(const char *name) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(defined_templates); i++) { + if (strcmp(defined_templates[i].name, name) == 0) + return defined_templates + i; + } + + return NULL; +} + +static struct ima_template_field *lookup_template_field(const char *field_id) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(supported_fields); i++) + if (strncmp(supported_fields[i].field_id, field_id, + IMA_TEMPLATE_FIELD_ID_MAX_LEN) == 0) + return &supported_fields[i]; + return NULL; +} + +static int template_fmt_size(char *template_fmt) +{ + char c; + int template_fmt_len = strlen(template_fmt); + int i = 0, j = 0; + + while (i < template_fmt_len) { + c = template_fmt[i]; + if (c == '|') + j++; + i++; + } + + return j + 1; +} + +static int template_desc_init_fields(char *template_fmt, + struct ima_template_field ***fields, + int *num_fields) +{ + char *c, *template_fmt_ptr = template_fmt; + int template_num_fields = template_fmt_size(template_fmt); + int i, result = 0; + + if (template_num_fields > IMA_TEMPLATE_NUM_FIELDS_MAX) + return -EINVAL; + + *fields = kzalloc(template_num_fields * sizeof(*fields), GFP_KERNEL); + if (*fields == NULL) { + result = -ENOMEM; + goto out; + } + for (i = 0; (c = strsep(&template_fmt_ptr, "|")) != NULL && + i < template_num_fields; i++) { + struct ima_template_field *f = lookup_template_field(c); + + if (!f) { + result = -ENOENT; + goto out; + } + (*fields)[i] = f; + } + *num_fields = i; + return 0; +out: + kfree(*fields); + *fields = NULL; + return result; +} + +static int init_defined_templates(void) +{ + int i = 0; + int result = 0; + + /* Init defined templates. */ + for (i = 0; i < ARRAY_SIZE(defined_templates); i++) { + struct ima_template_desc *template = &defined_templates[i]; + + result = template_desc_init_fields(template->fmt, + &(template->fields), + &(template->num_fields)); + if (result < 0) + return result; + } + return result; +} + +struct ima_template_desc *ima_template_desc_current(void) +{ + if (!ima_template) + ima_template = + lookup_template_desc(CONFIG_IMA_DEFAULT_TEMPLATE); + return ima_template; +} + +int ima_init_template(void) +{ + int result; + + result = init_defined_templates(); + if (result < 0) + return result; + + return 0; +} diff --git a/security/integrity/ima/ima_template_lib.c b/security/integrity/ima/ima_template_lib.c new file mode 100644 index 0000000..6d66ad6 --- /dev/null +++ b/security/integrity/ima/ima_template_lib.c @@ -0,0 +1,347 @@ +/* + * Copyright (C) 2013 Politecnico di Torino, Italy + * TORSEC group -- http://security.polito.it + * + * Author: Roberto Sassu <roberto.sassu@polito.it> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation, version 2 of the + * License. + * + * File: ima_template_lib.c + * Library of supported template fields. + */ +#include <crypto/hash_info.h> + +#include "ima_template_lib.h" + +static bool ima_template_hash_algo_allowed(u8 algo) +{ + if (algo == HASH_ALGO_SHA1 || algo == HASH_ALGO_MD5) + return true; + + return false; +} + +enum data_formats { + DATA_FMT_DIGEST = 0, + DATA_FMT_DIGEST_WITH_ALGO, + DATA_FMT_EVENT_NAME, + DATA_FMT_STRING, + DATA_FMT_HEX +}; + +static int ima_write_template_field_data(const void *data, const u32 datalen, + enum data_formats datafmt, + struct ima_field_data *field_data) +{ + u8 *buf, *buf_ptr; + u32 buflen; + + switch (datafmt) { + case DATA_FMT_EVENT_NAME: + buflen = IMA_EVENT_NAME_LEN_MAX + 1; + break; + case DATA_FMT_STRING: + buflen = datalen + 1; + break; + default: + buflen = datalen; + } + + buf = kzalloc(buflen, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + memcpy(buf, data, datalen); + + /* + * Replace all space characters with underscore for event names and + * strings. This avoid that, during the parsing of a measurements list, + * filenames with spaces or that end with the suffix ' (deleted)' are + * split into multiple template fields (the space is the delimitator + * character for measurements lists in ASCII format). + */ + if (datafmt == DATA_FMT_EVENT_NAME || datafmt == DATA_FMT_STRING) { + for (buf_ptr = buf; buf_ptr - buf < datalen; buf_ptr++) + if (*buf_ptr == ' ') + *buf_ptr = '_'; + } + + field_data->data = buf; + field_data->len = buflen; + return 0; +} + +static void ima_show_template_data_ascii(struct seq_file *m, + enum ima_show_type show, + enum data_formats datafmt, + struct ima_field_data *field_data) +{ + u8 *buf_ptr = field_data->data, buflen = field_data->len; + + switch (datafmt) { + case DATA_FMT_DIGEST_WITH_ALGO: + buf_ptr = strnchr(field_data->data, buflen, ':'); + if (buf_ptr != field_data->data) + seq_printf(m, "%s", field_data->data); + + /* skip ':' and '\0' */ + buf_ptr += 2; + buflen -= buf_ptr - field_data->data; + case DATA_FMT_DIGEST: + case DATA_FMT_HEX: + if (!buflen) + break; + ima_print_digest(m, buf_ptr, buflen); + break; + case DATA_FMT_STRING: + seq_printf(m, "%s", buf_ptr); + break; + default: + break; + } +} + +static void ima_show_template_data_binary(struct seq_file *m, + enum ima_show_type show, + enum data_formats datafmt, + struct ima_field_data *field_data) +{ + ima_putc(m, &field_data->len, sizeof(u32)); + if (!field_data->len) + return; + ima_putc(m, field_data->data, field_data->len); +} + +static void ima_show_template_field_data(struct seq_file *m, + enum ima_show_type show, + enum data_formats datafmt, + struct ima_field_data *field_data) +{ + switch (show) { + case IMA_SHOW_ASCII: + ima_show_template_data_ascii(m, show, datafmt, field_data); + break; + case IMA_SHOW_BINARY: + ima_show_template_data_binary(m, show, datafmt, field_data); + break; + default: + break; + } +} + +void ima_show_template_digest(struct seq_file *m, enum ima_show_type show, + struct ima_field_data *field_data) +{ + ima_show_template_field_data(m, show, DATA_FMT_DIGEST, field_data); +} + +void ima_show_template_digest_ng(struct seq_file *m, enum ima_show_type show, + struct ima_field_data *field_data) +{ + ima_show_template_field_data(m, show, DATA_FMT_DIGEST_WITH_ALGO, + field_data); +} + +void ima_show_template_string(struct seq_file *m, enum ima_show_type show, + struct ima_field_data *field_data) +{ + ima_show_template_field_data(m, show, DATA_FMT_STRING, field_data); +} + +void ima_show_template_sig(struct seq_file *m, enum ima_show_type show, + struct ima_field_data *field_data) +{ + ima_show_template_field_data(m, show, DATA_FMT_HEX, field_data); +} + +static int ima_eventdigest_init_common(u8 *digest, u32 digestsize, u8 hash_algo, + struct ima_field_data *field_data, + bool size_limit) +{ + /* + * digest formats: + * - DATA_FMT_DIGEST: digest + * - DATA_FMT_DIGEST_WITH_ALGO: [<hash algo>] + ':' + '\0' + digest, + * where <hash algo> is provided if the hash algoritm is not + * SHA1 or MD5 + */ + u8 buffer[CRYPTO_MAX_ALG_NAME + 2 + IMA_MAX_DIGEST_SIZE] = { 0 }; + enum data_formats fmt = DATA_FMT_DIGEST; + u32 offset = 0; + + if (!size_limit) { + fmt = DATA_FMT_DIGEST_WITH_ALGO; + if (hash_algo < HASH_ALGO__LAST) + offset += snprintf(buffer, CRYPTO_MAX_ALG_NAME + 1, + "%s", hash_algo_name[hash_algo]); + buffer[offset] = ':'; + offset += 2; + } + + if (digest) + memcpy(buffer + offset, digest, digestsize); + else + /* + * If digest is NULL, the event being recorded is a violation. + * Make room for the digest by increasing the offset of + * IMA_DIGEST_SIZE. + */ + offset += IMA_DIGEST_SIZE; + + return ima_write_template_field_data(buffer, offset + digestsize, + fmt, field_data); +} + +/* + * This function writes the digest of an event (with size limit). + */ +int ima_eventdigest_init(struct integrity_iint_cache *iint, struct file *file, + const unsigned char *filename, + struct evm_ima_xattr_data *xattr_value, int xattr_len, + struct ima_field_data *field_data) +{ + struct { + struct ima_digest_data hdr; + char digest[IMA_MAX_DIGEST_SIZE]; + } hash; + u8 *cur_digest = NULL; + u32 cur_digestsize = 0; + struct inode *inode; + int result; + + memset(&hash, 0, sizeof(hash)); + + if (!iint) /* recording a violation. */ + goto out; + + if (ima_template_hash_algo_allowed(iint->ima_hash->algo)) { + cur_digest = iint->ima_hash->digest; + cur_digestsize = iint->ima_hash->length; + goto out; + } + + if (!file) /* missing info to re-calculate the digest */ + return -EINVAL; + + inode = file_inode(file); + hash.hdr.algo = ima_template_hash_algo_allowed(ima_hash_algo) ? + ima_hash_algo : HASH_ALGO_SHA1; + result = ima_calc_file_hash(file, &hash.hdr); + if (result) { + integrity_audit_msg(AUDIT_INTEGRITY_DATA, inode, + filename, "collect_data", + "failed", result, 0); + return result; + } + cur_digest = hash.hdr.digest; + cur_digestsize = hash.hdr.length; +out: + return ima_eventdigest_init_common(cur_digest, cur_digestsize, -1, + field_data, true); +} + +/* + * This function writes the digest of an event (without size limit). + */ +int ima_eventdigest_ng_init(struct integrity_iint_cache *iint, + struct file *file, const unsigned char *filename, + struct evm_ima_xattr_data *xattr_value, + int xattr_len, struct ima_field_data *field_data) +{ + u8 *cur_digest = NULL, hash_algo = HASH_ALGO__LAST; + u32 cur_digestsize = 0; + + /* If iint is NULL, we are recording a violation. */ + if (!iint) + goto out; + + cur_digest = iint->ima_hash->digest; + cur_digestsize = iint->ima_hash->length; + + hash_algo = iint->ima_hash->algo; +out: + return ima_eventdigest_init_common(cur_digest, cur_digestsize, + hash_algo, field_data, false); +} + +static int ima_eventname_init_common(struct integrity_iint_cache *iint, + struct file *file, + const unsigned char *filename, + struct ima_field_data *field_data, + bool size_limit) +{ + const char *cur_filename = NULL; + u32 cur_filename_len = 0; + enum data_formats fmt = size_limit ? + DATA_FMT_EVENT_NAME : DATA_FMT_STRING; + + BUG_ON(filename == NULL && file == NULL); + + if (filename) { + cur_filename = filename; + cur_filename_len = strlen(filename); + + if (!size_limit || cur_filename_len <= IMA_EVENT_NAME_LEN_MAX) + goto out; + } + + if (file) { + cur_filename = file->f_dentry->d_name.name; + cur_filename_len = strlen(cur_filename); + } else + /* + * Truncate filename if the latter is too long and + * the file descriptor is not available. + */ + cur_filename_len = IMA_EVENT_NAME_LEN_MAX; +out: + return ima_write_template_field_data(cur_filename, cur_filename_len, + fmt, field_data); +} + +/* + * This function writes the name of an event (with size limit). + */ +int ima_eventname_init(struct integrity_iint_cache *iint, struct file *file, + const unsigned char *filename, + struct evm_ima_xattr_data *xattr_value, int xattr_len, + struct ima_field_data *field_data) +{ + return ima_eventname_init_common(iint, file, filename, + field_data, true); +} + +/* + * This function writes the name of an event (without size limit). + */ +int ima_eventname_ng_init(struct integrity_iint_cache *iint, struct file *file, + const unsigned char *filename, + struct evm_ima_xattr_data *xattr_value, int xattr_len, + struct ima_field_data *field_data) +{ + return ima_eventname_init_common(iint, file, filename, + field_data, false); +} + +/* + * ima_eventsig_init - include the file signature as part of the template data + */ +int ima_eventsig_init(struct integrity_iint_cache *iint, struct file *file, + const unsigned char *filename, + struct evm_ima_xattr_data *xattr_value, int xattr_len, + struct ima_field_data *field_data) +{ + enum data_formats fmt = DATA_FMT_HEX; + int rc = 0; + + if ((!xattr_value) || (xattr_value->type != EVM_IMA_XATTR_DIGSIG)) + goto out; + + rc = ima_write_template_field_data(xattr_value, xattr_len, fmt, + field_data); +out: + return rc; +} diff --git a/security/integrity/ima/ima_template_lib.h b/security/integrity/ima/ima_template_lib.h new file mode 100644 index 0000000..63f6b52 --- /dev/null +++ b/security/integrity/ima/ima_template_lib.h @@ -0,0 +1,49 @@ +/* + * Copyright (C) 2013 Politecnico di Torino, Italy + * TORSEC group -- http://security.polito.it + * + * Author: Roberto Sassu <roberto.sassu@polito.it> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation, version 2 of the + * License. + * + * File: ima_template_lib.h + * Header for the library of supported template fields. + */ +#ifndef __LINUX_IMA_TEMPLATE_LIB_H +#define __LINUX_IMA_TEMPLATE_LIB_H + +#include <linux/seq_file.h> +#include "ima.h" + +void ima_show_template_digest(struct seq_file *m, enum ima_show_type show, + struct ima_field_data *field_data); +void ima_show_template_digest_ng(struct seq_file *m, enum ima_show_type show, + struct ima_field_data *field_data); +void ima_show_template_string(struct seq_file *m, enum ima_show_type show, + struct ima_field_data *field_data); +void ima_show_template_sig(struct seq_file *m, enum ima_show_type show, + struct ima_field_data *field_data); +int ima_eventdigest_init(struct integrity_iint_cache *iint, struct file *file, + const unsigned char *filename, + struct evm_ima_xattr_data *xattr_value, int xattr_len, + struct ima_field_data *field_data); +int ima_eventname_init(struct integrity_iint_cache *iint, struct file *file, + const unsigned char *filename, + struct evm_ima_xattr_data *xattr_value, int xattr_len, + struct ima_field_data *field_data); +int ima_eventdigest_ng_init(struct integrity_iint_cache *iint, + struct file *file, const unsigned char *filename, + struct evm_ima_xattr_data *xattr_value, + int xattr_len, struct ima_field_data *field_data); +int ima_eventname_ng_init(struct integrity_iint_cache *iint, struct file *file, + const unsigned char *filename, + struct evm_ima_xattr_data *xattr_value, int xattr_len, + struct ima_field_data *field_data); +int ima_eventsig_init(struct integrity_iint_cache *iint, struct file *file, + const unsigned char *filename, + struct evm_ima_xattr_data *xattr_value, int xattr_len, + struct ima_field_data *field_data); +#endif /* __LINUX_IMA_TEMPLATE_LIB_H */ diff --git a/security/integrity/integrity.h b/security/integrity/integrity.h index c42fb7a..b9e7c13 100644 --- a/security/integrity/integrity.h +++ b/security/integrity/integrity.h @@ -54,25 +54,57 @@ enum evm_ima_xattr_type { IMA_XATTR_DIGEST = 0x01, EVM_XATTR_HMAC, EVM_IMA_XATTR_DIGSIG, + IMA_XATTR_DIGEST_NG, }; struct evm_ima_xattr_data { u8 type; u8 digest[SHA1_DIGEST_SIZE]; -} __attribute__((packed)); +} __packed; + +#define IMA_MAX_DIGEST_SIZE 64 + +struct ima_digest_data { + u8 algo; + u8 length; + union { + struct { + u8 unused; + u8 type; + } sha1; + struct { + u8 type; + u8 algo; + } ng; + u8 data[2]; + } xattr; + u8 digest[0]; +} __packed; + +/* + * signature format v2 - for using with asymmetric keys + */ +struct signature_v2_hdr { + uint8_t type; /* xattr type */ + uint8_t version; /* signature format version */ + uint8_t hash_algo; /* Digest algorithm [enum pkey_hash_algo] */ + uint32_t keyid; /* IMA key identifier - not X509/PGP specific */ + uint16_t sig_size; /* signature size */ + uint8_t sig[0]; /* signature payload */ +} __packed; /* integrity data associated with an inode */ struct integrity_iint_cache { - struct rb_node rb_node; /* rooted in integrity_iint_tree */ + struct rb_node rb_node; /* rooted in integrity_iint_tree */ struct inode *inode; /* back pointer to inode in question */ u64 version; /* track inode changes */ unsigned long flags; - struct evm_ima_xattr_data ima_xattr; enum integrity_status ima_file_status:4; enum integrity_status ima_mmap_status:4; enum integrity_status ima_bprm_status:4; enum integrity_status ima_module_status:4; enum integrity_status evm_status:4; + struct ima_digest_data *ima_hash; }; /* rbtree tree calls to lookup, insert, delete @@ -89,7 +121,7 @@ struct integrity_iint_cache *integrity_iint_find(struct inode *inode); #ifdef CONFIG_INTEGRITY_SIGNATURE int integrity_digsig_verify(const unsigned int id, const char *sig, int siglen, - const char *digest, int digestlen); + const char *digest, int digestlen); #else @@ -105,12 +137,19 @@ static inline int integrity_digsig_verify(const unsigned int id, #ifdef CONFIG_INTEGRITY_ASYMMETRIC_KEYS int asymmetric_verify(struct key *keyring, const char *sig, int siglen, const char *data, int datalen); + +int integrity_init_keyring(const unsigned int id); #else static inline int asymmetric_verify(struct key *keyring, const char *sig, int siglen, const char *data, int datalen) { return -EOPNOTSUPP; } + +static int integrity_init_keyring(const unsigned int id) +{ + return 0; +} #endif #ifdef CONFIG_INTEGRITY_AUDIT diff --git a/security/keys/Kconfig b/security/keys/Kconfig index a90d6d30..a4f3f8c 100644 --- a/security/keys/Kconfig +++ b/security/keys/Kconfig @@ -4,6 +4,7 @@ config KEYS bool "Enable access key retention support" + select ASSOCIATIVE_ARRAY help This option provides support for retaining authentication tokens and access keys in the kernel. @@ -19,6 +20,34 @@ config KEYS If you are unsure as to whether this is required, answer N. +config PERSISTENT_KEYRINGS + bool "Enable register of persistent per-UID keyrings" + depends on KEYS + help + This option provides a register of persistent per-UID keyrings, + primarily aimed at Kerberos key storage. The keyrings are persistent + in the sense that they stay around after all processes of that UID + have exited, not that they survive the machine being rebooted. + + A particular keyring may be accessed by either the user whose keyring + it is or by a process with administrative privileges. The active + LSMs gets to rule on which admin-level processes get to access the + cache. + + Keyrings are created and added into the register upon demand and get + removed if they expire (a default timeout is set upon creation). + +config BIG_KEYS + bool "Large payload keys" + depends on KEYS + depends on TMPFS + help + This option provides support for holding large keys within the kernel + (for example Kerberos ticket caches). The data may be stored out to + swapspace by tmpfs. + + If you are unsure as to whether this is required, answer N. + config TRUSTED_KEYS tristate "TRUSTED KEYS" depends on KEYS && TCG_TPM diff --git a/security/keys/Makefile b/security/keys/Makefile index 504aaa0..dfb3a7b 100644 --- a/security/keys/Makefile +++ b/security/keys/Makefile @@ -18,9 +18,11 @@ obj-y := \ obj-$(CONFIG_KEYS_COMPAT) += compat.o obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_SYSCTL) += sysctl.o +obj-$(CONFIG_PERSISTENT_KEYRINGS) += persistent.o # # Key types # +obj-$(CONFIG_BIG_KEYS) += big_key.o obj-$(CONFIG_TRUSTED_KEYS) += trusted.o obj-$(CONFIG_ENCRYPTED_KEYS) += encrypted-keys/ diff --git a/security/keys/big_key.c b/security/keys/big_key.c new file mode 100644 index 0000000..7f44c32 --- /dev/null +++ b/security/keys/big_key.c @@ -0,0 +1,207 @@ +/* Large capacity key type + * + * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/seq_file.h> +#include <linux/file.h> +#include <linux/shmem_fs.h> +#include <linux/err.h> +#include <keys/user-type.h> +#include <keys/big_key-type.h> + +MODULE_LICENSE("GPL"); + +/* + * If the data is under this limit, there's no point creating a shm file to + * hold it as the permanently resident metadata for the shmem fs will be at + * least as large as the data. + */ +#define BIG_KEY_FILE_THRESHOLD (sizeof(struct inode) + sizeof(struct dentry)) + +/* + * big_key defined keys take an arbitrary string as the description and an + * arbitrary blob of data as the payload + */ +struct key_type key_type_big_key = { + .name = "big_key", + .def_lookup_type = KEYRING_SEARCH_LOOKUP_DIRECT, + .instantiate = big_key_instantiate, + .match = user_match, + .revoke = big_key_revoke, + .destroy = big_key_destroy, + .describe = big_key_describe, + .read = big_key_read, +}; + +/* + * Instantiate a big key + */ +int big_key_instantiate(struct key *key, struct key_preparsed_payload *prep) +{ + struct path *path = (struct path *)&key->payload.data2; + struct file *file; + ssize_t written; + size_t datalen = prep->datalen; + int ret; + + ret = -EINVAL; + if (datalen <= 0 || datalen > 1024 * 1024 || !prep->data) + goto error; + + /* Set an arbitrary quota */ + ret = key_payload_reserve(key, 16); + if (ret < 0) + goto error; + + key->type_data.x[1] = datalen; + + if (datalen > BIG_KEY_FILE_THRESHOLD) { + /* Create a shmem file to store the data in. This will permit the data + * to be swapped out if needed. + * + * TODO: Encrypt the stored data with a temporary key. + */ + file = shmem_file_setup("", datalen, 0); + if (IS_ERR(file)) { + ret = PTR_ERR(file); + goto err_quota; + } + + written = kernel_write(file, prep->data, prep->datalen, 0); + if (written != datalen) { + ret = written; + if (written >= 0) + ret = -ENOMEM; + goto err_fput; + } + + /* Pin the mount and dentry to the key so that we can open it again + * later + */ + *path = file->f_path; + path_get(path); + fput(file); + } else { + /* Just store the data in a buffer */ + void *data = kmalloc(datalen, GFP_KERNEL); + if (!data) { + ret = -ENOMEM; + goto err_quota; + } + + key->payload.data = memcpy(data, prep->data, prep->datalen); + } + return 0; + +err_fput: + fput(file); +err_quota: + key_payload_reserve(key, 0); +error: + return ret; +} + +/* + * dispose of the links from a revoked keyring + * - called with the key sem write-locked + */ +void big_key_revoke(struct key *key) +{ + struct path *path = (struct path *)&key->payload.data2; + + /* clear the quota */ + key_payload_reserve(key, 0); + if (key_is_instantiated(key) && key->type_data.x[1] > BIG_KEY_FILE_THRESHOLD) + vfs_truncate(path, 0); +} + +/* + * dispose of the data dangling from the corpse of a big_key key + */ +void big_key_destroy(struct key *key) +{ + if (key->type_data.x[1] > BIG_KEY_FILE_THRESHOLD) { + struct path *path = (struct path *)&key->payload.data2; + path_put(path); + path->mnt = NULL; + path->dentry = NULL; + } else { + kfree(key->payload.data); + key->payload.data = NULL; + } +} + +/* + * describe the big_key key + */ +void big_key_describe(const struct key *key, struct seq_file *m) +{ + unsigned long datalen = key->type_data.x[1]; + + seq_puts(m, key->description); + + if (key_is_instantiated(key)) + seq_printf(m, ": %lu [%s]", + datalen, + datalen > BIG_KEY_FILE_THRESHOLD ? "file" : "buff"); +} + +/* + * read the key data + * - the key's semaphore is read-locked + */ +long big_key_read(const struct key *key, char __user *buffer, size_t buflen) +{ + unsigned long datalen = key->type_data.x[1]; + long ret; + + if (!buffer || buflen < datalen) + return datalen; + + if (datalen > BIG_KEY_FILE_THRESHOLD) { + struct path *path = (struct path *)&key->payload.data2; + struct file *file; + loff_t pos; + + file = dentry_open(path, O_RDONLY, current_cred()); + if (IS_ERR(file)) + return PTR_ERR(file); + + pos = 0; + ret = vfs_read(file, buffer, datalen, &pos); + fput(file); + if (ret >= 0 && ret != datalen) + ret = -EIO; + } else { + ret = datalen; + if (copy_to_user(buffer, key->payload.data, datalen) != 0) + ret = -EFAULT; + } + + return ret; +} + +/* + * Module stuff + */ +static int __init big_key_init(void) +{ + return register_key_type(&key_type_big_key); +} + +static void __exit big_key_cleanup(void) +{ + unregister_key_type(&key_type_big_key); +} + +module_init(big_key_init); +module_exit(big_key_cleanup); diff --git a/security/keys/compat.c b/security/keys/compat.c index d65fa7f..bbd32c7 100644 --- a/security/keys/compat.c +++ b/security/keys/compat.c @@ -138,6 +138,9 @@ asmlinkage long compat_sys_keyctl(u32 option, case KEYCTL_INVALIDATE: return keyctl_invalidate_key(arg2); + case KEYCTL_GET_PERSISTENT: + return keyctl_get_persistent(arg2, arg3); + default: return -EOPNOTSUPP; } diff --git a/security/keys/gc.c b/security/keys/gc.c index d67c97b..d3222b6 100644 --- a/security/keys/gc.c +++ b/security/keys/gc.c @@ -131,50 +131,6 @@ void key_gc_keytype(struct key_type *ktype) } /* - * Garbage collect pointers from a keyring. - * - * Not called with any locks held. The keyring's key struct will not be - * deallocated under us as only our caller may deallocate it. - */ -static void key_gc_keyring(struct key *keyring, time_t limit) -{ - struct keyring_list *klist; - int loop; - - kenter("%x", key_serial(keyring)); - - if (keyring->flags & ((1 << KEY_FLAG_INVALIDATED) | - (1 << KEY_FLAG_REVOKED))) - goto dont_gc; - - /* scan the keyring looking for dead keys */ - rcu_read_lock(); - klist = rcu_dereference(keyring->payload.subscriptions); - if (!klist) - goto unlock_dont_gc; - - loop = klist->nkeys; - smp_rmb(); - for (loop--; loop >= 0; loop--) { - struct key *key = rcu_dereference(klist->keys[loop]); - if (key_is_dead(key, limit)) - goto do_gc; - } - -unlock_dont_gc: - rcu_read_unlock(); -dont_gc: - kleave(" [no gc]"); - return; - -do_gc: - rcu_read_unlock(); - - keyring_gc(keyring, limit); - kleave(" [gc]"); -} - -/* * Garbage collect a list of unreferenced, detached keys */ static noinline void key_gc_unused_keys(struct list_head *keys) @@ -392,8 +348,7 @@ found_unreferenced_key: */ found_keyring: spin_unlock(&key_serial_lock); - kdebug("scan keyring %d", key->serial); - key_gc_keyring(key, limit); + keyring_gc(key, limit); goto maybe_resched; /* We found a dead key that is still referenced. Reset its type and diff --git a/security/keys/internal.h b/security/keys/internal.h index d4f1468..80b2aac 100644 --- a/security/keys/internal.h +++ b/security/keys/internal.h @@ -89,42 +89,53 @@ extern struct key_type *key_type_lookup(const char *type); extern void key_type_put(struct key_type *ktype); extern int __key_link_begin(struct key *keyring, - const struct key_type *type, - const char *description, - unsigned long *_prealloc); + const struct keyring_index_key *index_key, + struct assoc_array_edit **_edit); extern int __key_link_check_live_key(struct key *keyring, struct key *key); -extern void __key_link(struct key *keyring, struct key *key, - unsigned long *_prealloc); +extern void __key_link(struct key *key, struct assoc_array_edit **_edit); extern void __key_link_end(struct key *keyring, - struct key_type *type, - unsigned long prealloc); + const struct keyring_index_key *index_key, + struct assoc_array_edit *edit); -extern key_ref_t __keyring_search_one(key_ref_t keyring_ref, - const struct key_type *type, - const char *description, - key_perm_t perm); +extern key_ref_t find_key_to_update(key_ref_t keyring_ref, + const struct keyring_index_key *index_key); extern struct key *keyring_search_instkey(struct key *keyring, key_serial_t target_id); +extern int iterate_over_keyring(const struct key *keyring, + int (*func)(const struct key *key, void *data), + void *data); + typedef int (*key_match_func_t)(const struct key *, const void *); +struct keyring_search_context { + struct keyring_index_key index_key; + const struct cred *cred; + key_match_func_t match; + const void *match_data; + unsigned flags; +#define KEYRING_SEARCH_LOOKUP_TYPE 0x0001 /* [as type->def_lookup_type] */ +#define KEYRING_SEARCH_NO_STATE_CHECK 0x0002 /* Skip state checks */ +#define KEYRING_SEARCH_DO_STATE_CHECK 0x0004 /* Override NO_STATE_CHECK */ +#define KEYRING_SEARCH_NO_UPDATE_TIME 0x0008 /* Don't update times */ +#define KEYRING_SEARCH_NO_CHECK_PERM 0x0010 /* Don't check permissions */ +#define KEYRING_SEARCH_DETECT_TOO_DEEP 0x0020 /* Give an error on excessive depth */ + + int (*iterator)(const void *object, void *iterator_data); + + /* Internal stuff */ + int skipped_ret; + bool possessed; + key_ref_t result; + struct timespec now; +}; + extern key_ref_t keyring_search_aux(key_ref_t keyring_ref, - const struct cred *cred, - struct key_type *type, - const void *description, - key_match_func_t match, - bool no_state_check); - -extern key_ref_t search_my_process_keyrings(struct key_type *type, - const void *description, - key_match_func_t match, - bool no_state_check, - const struct cred *cred); -extern key_ref_t search_process_keyrings(struct key_type *type, - const void *description, - key_match_func_t match, - const struct cred *cred); + struct keyring_search_context *ctx); + +extern key_ref_t search_my_process_keyrings(struct keyring_search_context *ctx); +extern key_ref_t search_process_keyrings(struct keyring_search_context *ctx); extern struct key *find_keyring_by_name(const char *name, bool skip_perm_check); @@ -202,7 +213,7 @@ extern struct key *key_get_instantiation_authkey(key_serial_t target_id); /* * Determine whether a key is dead. */ -static inline bool key_is_dead(struct key *key, time_t limit) +static inline bool key_is_dead(const struct key *key, time_t limit) { return key->flags & ((1 << KEY_FLAG_DEAD) | @@ -244,6 +255,15 @@ extern long keyctl_invalidate_key(key_serial_t); extern long keyctl_instantiate_key_common(key_serial_t, const struct iovec *, unsigned, size_t, key_serial_t); +#ifdef CONFIG_PERSISTENT_KEYRINGS +extern long keyctl_get_persistent(uid_t, key_serial_t); +extern unsigned persistent_keyring_expiry; +#else +static inline long keyctl_get_persistent(uid_t uid, key_serial_t destring) +{ + return -EOPNOTSUPP; +} +#endif /* * Debugging key validation diff --git a/security/keys/key.c b/security/keys/key.c index 8fb7c7b..55d110f 100644 --- a/security/keys/key.c +++ b/security/keys/key.c @@ -242,8 +242,8 @@ struct key *key_alloc(struct key_type *type, const char *desc, } } - desclen = strlen(desc) + 1; - quotalen = desclen + type->def_datalen; + desclen = strlen(desc); + quotalen = desclen + 1 + type->def_datalen; /* get hold of the key tracking for this user */ user = key_user_lookup(uid); @@ -277,7 +277,8 @@ struct key *key_alloc(struct key_type *type, const char *desc, goto no_memory_2; if (desc) { - key->description = kmemdup(desc, desclen, GFP_KERNEL); + key->index_key.desc_len = desclen; + key->index_key.description = kmemdup(desc, desclen + 1, GFP_KERNEL); if (!key->description) goto no_memory_3; } @@ -285,7 +286,7 @@ struct key *key_alloc(struct key_type *type, const char *desc, atomic_set(&key->usage, 1); init_rwsem(&key->sem); lockdep_set_class(&key->sem, &type->lock_class); - key->type = type; + key->index_key.type = type; key->user = user; key->quotalen = quotalen; key->datalen = type->def_datalen; @@ -299,6 +300,8 @@ struct key *key_alloc(struct key_type *type, const char *desc, if (!(flags & KEY_ALLOC_NOT_IN_QUOTA)) key->flags |= 1 << KEY_FLAG_IN_QUOTA; + if (flags & KEY_ALLOC_TRUSTED) + key->flags |= 1 << KEY_FLAG_TRUSTED; memset(&key->type_data, 0, sizeof(key->type_data)); @@ -408,7 +411,7 @@ static int __key_instantiate_and_link(struct key *key, struct key_preparsed_payload *prep, struct key *keyring, struct key *authkey, - unsigned long *_prealloc) + struct assoc_array_edit **_edit) { int ret, awaken; @@ -435,7 +438,7 @@ static int __key_instantiate_and_link(struct key *key, /* and link it into the destination keyring */ if (keyring) - __key_link(keyring, key, _prealloc); + __key_link(key, _edit); /* disable the authorisation key */ if (authkey) @@ -475,7 +478,7 @@ int key_instantiate_and_link(struct key *key, struct key *authkey) { struct key_preparsed_payload prep; - unsigned long prealloc; + struct assoc_array_edit *edit; int ret; memset(&prep, 0, sizeof(prep)); @@ -489,17 +492,15 @@ int key_instantiate_and_link(struct key *key, } if (keyring) { - ret = __key_link_begin(keyring, key->type, key->description, - &prealloc); + ret = __key_link_begin(keyring, &key->index_key, &edit); if (ret < 0) goto error_free_preparse; } - ret = __key_instantiate_and_link(key, &prep, keyring, authkey, - &prealloc); + ret = __key_instantiate_and_link(key, &prep, keyring, authkey, &edit); if (keyring) - __key_link_end(keyring, key->type, prealloc); + __key_link_end(keyring, &key->index_key, edit); error_free_preparse: if (key->type->preparse) @@ -537,7 +538,7 @@ int key_reject_and_link(struct key *key, struct key *keyring, struct key *authkey) { - unsigned long prealloc; + struct assoc_array_edit *edit; struct timespec now; int ret, awaken, link_ret = 0; @@ -548,8 +549,7 @@ int key_reject_and_link(struct key *key, ret = -EBUSY; if (keyring) - link_ret = __key_link_begin(keyring, key->type, - key->description, &prealloc); + link_ret = __key_link_begin(keyring, &key->index_key, &edit); mutex_lock(&key_construction_mutex); @@ -557,9 +557,10 @@ int key_reject_and_link(struct key *key, if (!test_bit(KEY_FLAG_INSTANTIATED, &key->flags)) { /* mark the key as being negatively instantiated */ atomic_inc(&key->user->nikeys); + key->type_data.reject_error = -error; + smp_wmb(); set_bit(KEY_FLAG_NEGATIVE, &key->flags); set_bit(KEY_FLAG_INSTANTIATED, &key->flags); - key->type_data.reject_error = -error; now = current_kernel_time(); key->expiry = now.tv_sec + timeout; key_schedule_gc(key->expiry + key_gc_delay); @@ -571,7 +572,7 @@ int key_reject_and_link(struct key *key, /* and link it into the destination keyring */ if (keyring && link_ret == 0) - __key_link(keyring, key, &prealloc); + __key_link(key, &edit); /* disable the authorisation key */ if (authkey) @@ -581,7 +582,7 @@ int key_reject_and_link(struct key *key, mutex_unlock(&key_construction_mutex); if (keyring) - __key_link_end(keyring, key->type, prealloc); + __key_link_end(keyring, &key->index_key, edit); /* wake up anyone waiting for a key to be constructed */ if (awaken) @@ -645,7 +646,7 @@ found: /* this races with key_put(), but that doesn't matter since key_put() * doesn't actually change the key */ - atomic_inc(&key->usage); + __key_get(key); error: spin_unlock(&key_serial_lock); @@ -780,25 +781,27 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref, key_perm_t perm, unsigned long flags) { - unsigned long prealloc; + struct keyring_index_key index_key = { + .description = description, + }; struct key_preparsed_payload prep; + struct assoc_array_edit *edit; const struct cred *cred = current_cred(); - struct key_type *ktype; struct key *keyring, *key = NULL; key_ref_t key_ref; int ret; /* look up the key type to see if it's one of the registered kernel * types */ - ktype = key_type_lookup(type); - if (IS_ERR(ktype)) { + index_key.type = key_type_lookup(type); + if (IS_ERR(index_key.type)) { key_ref = ERR_PTR(-ENODEV); goto error; } key_ref = ERR_PTR(-EINVAL); - if (!ktype->match || !ktype->instantiate || - (!description && !ktype->preparse)) + if (!index_key.type->match || !index_key.type->instantiate || + (!index_key.description && !index_key.type->preparse)) goto error_put_type; keyring = key_ref_to_ptr(keyring_ref); @@ -812,21 +815,28 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref, memset(&prep, 0, sizeof(prep)); prep.data = payload; prep.datalen = plen; - prep.quotalen = ktype->def_datalen; - if (ktype->preparse) { - ret = ktype->preparse(&prep); + prep.quotalen = index_key.type->def_datalen; + prep.trusted = flags & KEY_ALLOC_TRUSTED; + if (index_key.type->preparse) { + ret = index_key.type->preparse(&prep); if (ret < 0) { key_ref = ERR_PTR(ret); goto error_put_type; } - if (!description) - description = prep.description; + if (!index_key.description) + index_key.description = prep.description; key_ref = ERR_PTR(-EINVAL); - if (!description) + if (!index_key.description) goto error_free_prep; } + index_key.desc_len = strlen(index_key.description); + + key_ref = ERR_PTR(-EPERM); + if (!prep.trusted && test_bit(KEY_FLAG_TRUSTED_ONLY, &keyring->flags)) + goto error_free_prep; + flags |= prep.trusted ? KEY_ALLOC_TRUSTED : 0; - ret = __key_link_begin(keyring, ktype, description, &prealloc); + ret = __key_link_begin(keyring, &index_key, &edit); if (ret < 0) { key_ref = ERR_PTR(ret); goto error_free_prep; @@ -844,10 +854,9 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref, * key of the same type and description in the destination keyring and * update that instead if possible */ - if (ktype->update) { - key_ref = __keyring_search_one(keyring_ref, ktype, description, - 0); - if (!IS_ERR(key_ref)) + if (index_key.type->update) { + key_ref = find_key_to_update(keyring_ref, &index_key); + if (key_ref) goto found_matching_key; } @@ -856,23 +865,24 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref, perm = KEY_POS_VIEW | KEY_POS_SEARCH | KEY_POS_LINK | KEY_POS_SETATTR; perm |= KEY_USR_VIEW; - if (ktype->read) + if (index_key.type->read) perm |= KEY_POS_READ; - if (ktype == &key_type_keyring || ktype->update) + if (index_key.type == &key_type_keyring || + index_key.type->update) perm |= KEY_POS_WRITE; } /* allocate a new key */ - key = key_alloc(ktype, description, cred->fsuid, cred->fsgid, cred, - perm, flags); + key = key_alloc(index_key.type, index_key.description, + cred->fsuid, cred->fsgid, cred, perm, flags); if (IS_ERR(key)) { key_ref = ERR_CAST(key); goto error_link_end; } /* instantiate it and link it into the target keyring */ - ret = __key_instantiate_and_link(key, &prep, keyring, NULL, &prealloc); + ret = __key_instantiate_and_link(key, &prep, keyring, NULL, &edit); if (ret < 0) { key_put(key); key_ref = ERR_PTR(ret); @@ -882,12 +892,12 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref, key_ref = make_key_ref(key, is_key_possessed(keyring_ref)); error_link_end: - __key_link_end(keyring, ktype, prealloc); + __key_link_end(keyring, &index_key, edit); error_free_prep: - if (ktype->preparse) - ktype->free_preparse(&prep); + if (index_key.type->preparse) + index_key.type->free_preparse(&prep); error_put_type: - key_type_put(ktype); + key_type_put(index_key.type); error: return key_ref; @@ -895,7 +905,7 @@ error: /* we found a matching key, so we're going to try to update it * - we can drop the locks first as we have the key pinned */ - __key_link_end(keyring, ktype, prealloc); + __key_link_end(keyring, &index_key, edit); key_ref = __key_update(key_ref, &prep); goto error_free_prep; diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index 33cfd27b..cee72ce 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -1667,6 +1667,9 @@ SYSCALL_DEFINE5(keyctl, int, option, unsigned long, arg2, unsigned long, arg3, case KEYCTL_INVALIDATE: return keyctl_invalidate_key((key_serial_t) arg2); + case KEYCTL_GET_PERSISTENT: + return keyctl_get_persistent((uid_t)arg2, (key_serial_t)arg3); + default: return -EOPNOTSUPP; } diff --git a/security/keys/keyring.c b/security/keys/keyring.c index 6ece7f2..69f0cb7 100644 --- a/security/keys/keyring.c +++ b/security/keys/keyring.c @@ -1,6 +1,6 @@ /* Keyring handling * - * Copyright (C) 2004-2005, 2008 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2004-2005, 2008, 2013 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * This program is free software; you can redistribute it and/or @@ -17,25 +17,11 @@ #include <linux/seq_file.h> #include <linux/err.h> #include <keys/keyring-type.h> +#include <keys/user-type.h> +#include <linux/assoc_array_priv.h> #include <linux/uaccess.h> #include "internal.h" -#define rcu_dereference_locked_keyring(keyring) \ - (rcu_dereference_protected( \ - (keyring)->payload.subscriptions, \ - rwsem_is_locked((struct rw_semaphore *)&(keyring)->sem))) - -#define rcu_deref_link_locked(klist, index, keyring) \ - (rcu_dereference_protected( \ - (klist)->keys[index], \ - rwsem_is_locked((struct rw_semaphore *)&(keyring)->sem))) - -#define MAX_KEYRING_LINKS \ - min_t(size_t, USHRT_MAX - 1, \ - ((PAGE_SIZE - sizeof(struct keyring_list)) / sizeof(struct key *))) - -#define KEY_LINK_FIXQUOTA 1UL - /* * When plumbing the depths of the key tree, this sets a hard limit * set on how deep we're willing to go. @@ -47,6 +33,28 @@ */ #define KEYRING_NAME_HASH_SIZE (1 << 5) +/* + * We mark pointers we pass to the associative array with bit 1 set if + * they're keyrings and clear otherwise. + */ +#define KEYRING_PTR_SUBTYPE 0x2UL + +static inline bool keyring_ptr_is_keyring(const struct assoc_array_ptr *x) +{ + return (unsigned long)x & KEYRING_PTR_SUBTYPE; +} +static inline struct key *keyring_ptr_to_key(const struct assoc_array_ptr *x) +{ + void *object = assoc_array_ptr_to_leaf(x); + return (struct key *)((unsigned long)object & ~KEYRING_PTR_SUBTYPE); +} +static inline void *keyring_key_to_ptr(struct key *key) +{ + if (key->type == &key_type_keyring) + return (void *)((unsigned long)key | KEYRING_PTR_SUBTYPE); + return key; +} + static struct list_head keyring_name_hash[KEYRING_NAME_HASH_SIZE]; static DEFINE_RWLOCK(keyring_name_lock); @@ -67,7 +75,6 @@ static inline unsigned keyring_hash(const char *desc) */ static int keyring_instantiate(struct key *keyring, struct key_preparsed_payload *prep); -static int keyring_match(const struct key *keyring, const void *criterion); static void keyring_revoke(struct key *keyring); static void keyring_destroy(struct key *keyring); static void keyring_describe(const struct key *keyring, struct seq_file *m); @@ -76,9 +83,9 @@ static long keyring_read(const struct key *keyring, struct key_type key_type_keyring = { .name = "keyring", - .def_datalen = sizeof(struct keyring_list), + .def_datalen = 0, .instantiate = keyring_instantiate, - .match = keyring_match, + .match = user_match, .revoke = keyring_revoke, .destroy = keyring_destroy, .describe = keyring_describe, @@ -127,6 +134,7 @@ static int keyring_instantiate(struct key *keyring, ret = -EINVAL; if (prep->datalen == 0) { + assoc_array_init(&keyring->keys); /* make the keyring available by name if it has one */ keyring_publish_name(keyring); ret = 0; @@ -136,15 +144,226 @@ static int keyring_instantiate(struct key *keyring, } /* - * Match keyrings on their name + * Multiply 64-bits by 32-bits to 96-bits and fold back to 64-bit. Ideally we'd + * fold the carry back too, but that requires inline asm. + */ +static u64 mult_64x32_and_fold(u64 x, u32 y) +{ + u64 hi = (u64)(u32)(x >> 32) * y; + u64 lo = (u64)(u32)(x) * y; + return lo + ((u64)(u32)hi << 32) + (u32)(hi >> 32); +} + +/* + * Hash a key type and description. + */ +static unsigned long hash_key_type_and_desc(const struct keyring_index_key *index_key) +{ + const unsigned level_shift = ASSOC_ARRAY_LEVEL_STEP; + const unsigned long level_mask = ASSOC_ARRAY_LEVEL_STEP_MASK; + const char *description = index_key->description; + unsigned long hash, type; + u32 piece; + u64 acc; + int n, desc_len = index_key->desc_len; + + type = (unsigned long)index_key->type; + + acc = mult_64x32_and_fold(type, desc_len + 13); + acc = mult_64x32_and_fold(acc, 9207); + for (;;) { + n = desc_len; + if (n <= 0) + break; + if (n > 4) + n = 4; + piece = 0; + memcpy(&piece, description, n); + description += n; + desc_len -= n; + acc = mult_64x32_and_fold(acc, piece); + acc = mult_64x32_and_fold(acc, 9207); + } + + /* Fold the hash down to 32 bits if need be. */ + hash = acc; + if (ASSOC_ARRAY_KEY_CHUNK_SIZE == 32) + hash ^= acc >> 32; + + /* Squidge all the keyrings into a separate part of the tree to + * ordinary keys by making sure the lowest level segment in the hash is + * zero for keyrings and non-zero otherwise. + */ + if (index_key->type != &key_type_keyring && (hash & level_mask) == 0) + return hash | (hash >> (ASSOC_ARRAY_KEY_CHUNK_SIZE - level_shift)) | 1; + if (index_key->type == &key_type_keyring && (hash & level_mask) != 0) + return (hash + (hash << level_shift)) & ~level_mask; + return hash; +} + +/* + * Build the next index key chunk. + * + * On 32-bit systems the index key is laid out as: + * + * 0 4 5 9... + * hash desclen typeptr desc[] + * + * On 64-bit systems: + * + * 0 8 9 17... + * hash desclen typeptr desc[] + * + * We return it one word-sized chunk at a time. */ -static int keyring_match(const struct key *keyring, const void *description) +static unsigned long keyring_get_key_chunk(const void *data, int level) +{ + const struct keyring_index_key *index_key = data; + unsigned long chunk = 0; + long offset = 0; + int desc_len = index_key->desc_len, n = sizeof(chunk); + + level /= ASSOC_ARRAY_KEY_CHUNK_SIZE; + switch (level) { + case 0: + return hash_key_type_and_desc(index_key); + case 1: + return ((unsigned long)index_key->type << 8) | desc_len; + case 2: + if (desc_len == 0) + return (u8)((unsigned long)index_key->type >> + (ASSOC_ARRAY_KEY_CHUNK_SIZE - 8)); + n--; + offset = 1; + default: + offset += sizeof(chunk) - 1; + offset += (level - 3) * sizeof(chunk); + if (offset >= desc_len) + return 0; + desc_len -= offset; + if (desc_len > n) + desc_len = n; + offset += desc_len; + do { + chunk <<= 8; + chunk |= ((u8*)index_key->description)[--offset]; + } while (--desc_len > 0); + + if (level == 2) { + chunk <<= 8; + chunk |= (u8)((unsigned long)index_key->type >> + (ASSOC_ARRAY_KEY_CHUNK_SIZE - 8)); + } + return chunk; + } +} + +static unsigned long keyring_get_object_key_chunk(const void *object, int level) +{ + const struct key *key = keyring_ptr_to_key(object); + return keyring_get_key_chunk(&key->index_key, level); +} + +static bool keyring_compare_object(const void *object, const void *data) { - return keyring->description && - strcmp(keyring->description, description) == 0; + const struct keyring_index_key *index_key = data; + const struct key *key = keyring_ptr_to_key(object); + + return key->index_key.type == index_key->type && + key->index_key.desc_len == index_key->desc_len && + memcmp(key->index_key.description, index_key->description, + index_key->desc_len) == 0; } /* + * Compare the index keys of a pair of objects and determine the bit position + * at which they differ - if they differ. + */ +static int keyring_diff_objects(const void *_a, const void *_b) +{ + const struct key *key_a = keyring_ptr_to_key(_a); + const struct key *key_b = keyring_ptr_to_key(_b); + const struct keyring_index_key *a = &key_a->index_key; + const struct keyring_index_key *b = &key_b->index_key; + unsigned long seg_a, seg_b; + int level, i; + + level = 0; + seg_a = hash_key_type_and_desc(a); + seg_b = hash_key_type_and_desc(b); + if ((seg_a ^ seg_b) != 0) + goto differ; + + /* The number of bits contributed by the hash is controlled by a + * constant in the assoc_array headers. Everything else thereafter we + * can deal with as being machine word-size dependent. + */ + level += ASSOC_ARRAY_KEY_CHUNK_SIZE / 8; + seg_a = a->desc_len; + seg_b = b->desc_len; + if ((seg_a ^ seg_b) != 0) + goto differ; + + /* The next bit may not work on big endian */ + level++; + seg_a = (unsigned long)a->type; + seg_b = (unsigned long)b->type; + if ((seg_a ^ seg_b) != 0) + goto differ; + + level += sizeof(unsigned long); + if (a->desc_len == 0) + goto same; + + i = 0; + if (((unsigned long)a->description | (unsigned long)b->description) & + (sizeof(unsigned long) - 1)) { + do { + seg_a = *(unsigned long *)(a->description + i); + seg_b = *(unsigned long *)(b->description + i); + if ((seg_a ^ seg_b) != 0) + goto differ_plus_i; + i += sizeof(unsigned long); + } while (i < (a->desc_len & (sizeof(unsigned long) - 1))); + } + + for (; i < a->desc_len; i++) { + seg_a = *(unsigned char *)(a->description + i); + seg_b = *(unsigned char *)(b->description + i); + if ((seg_a ^ seg_b) != 0) + goto differ_plus_i; + } + +same: + return -1; + +differ_plus_i: + level += i; +differ: + i = level * 8 + __ffs(seg_a ^ seg_b); + return i; +} + +/* + * Free an object after stripping the keyring flag off of the pointer. + */ +static void keyring_free_object(void *object) +{ + key_put(keyring_ptr_to_key(object)); +} + +/* + * Operations for keyring management by the index-tree routines. + */ +static const struct assoc_array_ops keyring_assoc_array_ops = { + .get_key_chunk = keyring_get_key_chunk, + .get_object_key_chunk = keyring_get_object_key_chunk, + .compare_object = keyring_compare_object, + .diff_objects = keyring_diff_objects, + .free_object = keyring_free_object, +}; + +/* * Clean up a keyring when it is destroyed. Unpublish its name if it had one * and dispose of its data. * @@ -155,9 +374,6 @@ static int keyring_match(const struct key *keyring, const void *description) */ static void keyring_destroy(struct key *keyring) { - struct keyring_list *klist; - int loop; - if (keyring->description) { write_lock(&keyring_name_lock); @@ -168,12 +384,7 @@ static void keyring_destroy(struct key *keyring) write_unlock(&keyring_name_lock); } - klist = rcu_access_pointer(keyring->payload.subscriptions); - if (klist) { - for (loop = klist->nkeys - 1; loop >= 0; loop--) - key_put(rcu_access_pointer(klist->keys[loop])); - kfree(klist); - } + assoc_array_destroy(&keyring->keys, &keyring_assoc_array_ops); } /* @@ -181,76 +392,88 @@ static void keyring_destroy(struct key *keyring) */ static void keyring_describe(const struct key *keyring, struct seq_file *m) { - struct keyring_list *klist; - if (keyring->description) seq_puts(m, keyring->description); else seq_puts(m, "[anon]"); if (key_is_instantiated(keyring)) { - rcu_read_lock(); - klist = rcu_dereference(keyring->payload.subscriptions); - if (klist) - seq_printf(m, ": %u/%u", klist->nkeys, klist->maxkeys); + if (keyring->keys.nr_leaves_on_tree != 0) + seq_printf(m, ": %lu", keyring->keys.nr_leaves_on_tree); else seq_puts(m, ": empty"); - rcu_read_unlock(); } } +struct keyring_read_iterator_context { + size_t qty; + size_t count; + key_serial_t __user *buffer; +}; + +static int keyring_read_iterator(const void *object, void *data) +{ + struct keyring_read_iterator_context *ctx = data; + const struct key *key = keyring_ptr_to_key(object); + int ret; + + kenter("{%s,%d},,{%zu/%zu}", + key->type->name, key->serial, ctx->count, ctx->qty); + + if (ctx->count >= ctx->qty) + return 1; + + ret = put_user(key->serial, ctx->buffer); + if (ret < 0) + return ret; + ctx->buffer++; + ctx->count += sizeof(key->serial); + return 0; +} + /* * Read a list of key IDs from the keyring's contents in binary form * - * The keyring's semaphore is read-locked by the caller. + * The keyring's semaphore is read-locked by the caller. This prevents someone + * from modifying it under us - which could cause us to read key IDs multiple + * times. */ static long keyring_read(const struct key *keyring, char __user *buffer, size_t buflen) { - struct keyring_list *klist; - struct key *key; - size_t qty, tmp; - int loop, ret; + struct keyring_read_iterator_context ctx; + unsigned long nr_keys; + int ret; - ret = 0; - klist = rcu_dereference_locked_keyring(keyring); - if (klist) { - /* calculate how much data we could return */ - qty = klist->nkeys * sizeof(key_serial_t); - - if (buffer && buflen > 0) { - if (buflen > qty) - buflen = qty; - - /* copy the IDs of the subscribed keys into the - * buffer */ - ret = -EFAULT; - - for (loop = 0; loop < klist->nkeys; loop++) { - key = rcu_deref_link_locked(klist, loop, - keyring); - - tmp = sizeof(key_serial_t); - if (tmp > buflen) - tmp = buflen; - - if (copy_to_user(buffer, - &key->serial, - tmp) != 0) - goto error; - - buflen -= tmp; - if (buflen == 0) - break; - buffer += tmp; - } - } + kenter("{%d},,%zu", key_serial(keyring), buflen); + + if (buflen & (sizeof(key_serial_t) - 1)) + return -EINVAL; + + nr_keys = keyring->keys.nr_leaves_on_tree; + if (nr_keys == 0) + return 0; - ret = qty; + /* Calculate how much data we could return */ + ctx.qty = nr_keys * sizeof(key_serial_t); + + if (!buffer || !buflen) + return ctx.qty; + + if (buflen > ctx.qty) + ctx.qty = buflen; + + /* Copy the IDs of the subscribed keys into the buffer */ + ctx.buffer = (key_serial_t __user *)buffer; + ctx.count = 0; + ret = assoc_array_iterate(&keyring->keys, keyring_read_iterator, &ctx); + if (ret < 0) { + kleave(" = %d [iterate]", ret); + return ret; } -error: - return ret; + kleave(" = %zu [ok]", ctx.count); + return ctx.count; } /* @@ -277,227 +500,361 @@ struct key *keyring_alloc(const char *description, kuid_t uid, kgid_t gid, } EXPORT_SYMBOL(keyring_alloc); -/** - * keyring_search_aux - Search a keyring tree for a key matching some criteria - * @keyring_ref: A pointer to the keyring with possession indicator. - * @cred: The credentials to use for permissions checks. - * @type: The type of key to search for. - * @description: Parameter for @match. - * @match: Function to rule on whether or not a key is the one required. - * @no_state_check: Don't check if a matching key is bad - * - * Search the supplied keyring tree for a key that matches the criteria given. - * The root keyring and any linked keyrings must grant Search permission to the - * caller to be searchable and keys can only be found if they too grant Search - * to the caller. The possession flag on the root keyring pointer controls use - * of the possessor bits in permissions checking of the entire tree. In - * addition, the LSM gets to forbid keyring searches and key matches. - * - * The search is performed as a breadth-then-depth search up to the prescribed - * limit (KEYRING_SEARCH_MAX_DEPTH). - * - * Keys are matched to the type provided and are then filtered by the match - * function, which is given the description to use in any way it sees fit. The - * match function may use any attributes of a key that it wishes to to - * determine the match. Normally the match function from the key type would be - * used. - * - * RCU is used to prevent the keyring key lists from disappearing without the - * need to take lots of locks. - * - * Returns a pointer to the found key and increments the key usage count if - * successful; -EAGAIN if no matching keys were found, or if expired or revoked - * keys were found; -ENOKEY if only negative keys were found; -ENOTDIR if the - * specified keyring wasn't a keyring. - * - * In the case of a successful return, the possession attribute from - * @keyring_ref is propagated to the returned key reference. +/* + * Iteration function to consider each key found. */ -key_ref_t keyring_search_aux(key_ref_t keyring_ref, - const struct cred *cred, - struct key_type *type, - const void *description, - key_match_func_t match, - bool no_state_check) +static int keyring_search_iterator(const void *object, void *iterator_data) { - struct { - /* Need a separate keylist pointer for RCU purposes */ - struct key *keyring; - struct keyring_list *keylist; - int kix; - } stack[KEYRING_SEARCH_MAX_DEPTH]; - - struct keyring_list *keylist; - struct timespec now; - unsigned long possessed, kflags; - struct key *keyring, *key; - key_ref_t key_ref; - long err; - int sp, nkeys, kix; + struct keyring_search_context *ctx = iterator_data; + const struct key *key = keyring_ptr_to_key(object); + unsigned long kflags = key->flags; - keyring = key_ref_to_ptr(keyring_ref); - possessed = is_key_possessed(keyring_ref); - key_check(keyring); + kenter("{%d}", key->serial); - /* top keyring must have search permission to begin the search */ - err = key_task_permission(keyring_ref, cred, KEY_SEARCH); - if (err < 0) { - key_ref = ERR_PTR(err); - goto error; + /* ignore keys not of this type */ + if (key->type != ctx->index_key.type) { + kleave(" = 0 [!type]"); + return 0; } - key_ref = ERR_PTR(-ENOTDIR); - if (keyring->type != &key_type_keyring) - goto error; + /* skip invalidated, revoked and expired keys */ + if (ctx->flags & KEYRING_SEARCH_DO_STATE_CHECK) { + if (kflags & ((1 << KEY_FLAG_INVALIDATED) | + (1 << KEY_FLAG_REVOKED))) { + ctx->result = ERR_PTR(-EKEYREVOKED); + kleave(" = %d [invrev]", ctx->skipped_ret); + goto skipped; + } - rcu_read_lock(); + if (key->expiry && ctx->now.tv_sec >= key->expiry) { + ctx->result = ERR_PTR(-EKEYEXPIRED); + kleave(" = %d [expire]", ctx->skipped_ret); + goto skipped; + } + } - now = current_kernel_time(); - err = -EAGAIN; - sp = 0; - - /* firstly we should check to see if this top-level keyring is what we - * are looking for */ - key_ref = ERR_PTR(-EAGAIN); - kflags = keyring->flags; - if (keyring->type == type && match(keyring, description)) { - key = keyring; - if (no_state_check) - goto found; + /* keys that don't match */ + if (!ctx->match(key, ctx->match_data)) { + kleave(" = 0 [!match]"); + return 0; + } - /* check it isn't negative and hasn't expired or been - * revoked */ - if (kflags & (1 << KEY_FLAG_REVOKED)) - goto error_2; - if (key->expiry && now.tv_sec >= key->expiry) - goto error_2; - key_ref = ERR_PTR(key->type_data.reject_error); - if (kflags & (1 << KEY_FLAG_NEGATIVE)) - goto error_2; - goto found; + /* key must have search permissions */ + if (!(ctx->flags & KEYRING_SEARCH_NO_CHECK_PERM) && + key_task_permission(make_key_ref(key, ctx->possessed), + ctx->cred, KEY_SEARCH) < 0) { + ctx->result = ERR_PTR(-EACCES); + kleave(" = %d [!perm]", ctx->skipped_ret); + goto skipped; } - /* otherwise, the top keyring must not be revoked, expired, or - * negatively instantiated if we are to search it */ - key_ref = ERR_PTR(-EAGAIN); - if (kflags & ((1 << KEY_FLAG_INVALIDATED) | - (1 << KEY_FLAG_REVOKED) | - (1 << KEY_FLAG_NEGATIVE)) || - (keyring->expiry && now.tv_sec >= keyring->expiry)) - goto error_2; - - /* start processing a new keyring */ -descend: - kflags = keyring->flags; - if (kflags & ((1 << KEY_FLAG_INVALIDATED) | - (1 << KEY_FLAG_REVOKED))) - goto not_this_keyring; + if (ctx->flags & KEYRING_SEARCH_DO_STATE_CHECK) { + /* we set a different error code if we pass a negative key */ + if (kflags & (1 << KEY_FLAG_NEGATIVE)) { + smp_rmb(); + ctx->result = ERR_PTR(key->type_data.reject_error); + kleave(" = %d [neg]", ctx->skipped_ret); + goto skipped; + } + } - keylist = rcu_dereference(keyring->payload.subscriptions); - if (!keylist) - goto not_this_keyring; + /* Found */ + ctx->result = make_key_ref(key, ctx->possessed); + kleave(" = 1 [found]"); + return 1; - /* iterate through the keys in this keyring first */ - nkeys = keylist->nkeys; - smp_rmb(); - for (kix = 0; kix < nkeys; kix++) { - key = rcu_dereference(keylist->keys[kix]); - kflags = key->flags; +skipped: + return ctx->skipped_ret; +} - /* ignore keys not of this type */ - if (key->type != type) - continue; +/* + * Search inside a keyring for a key. We can search by walking to it + * directly based on its index-key or we can iterate over the entire + * tree looking for it, based on the match function. + */ +static int search_keyring(struct key *keyring, struct keyring_search_context *ctx) +{ + if ((ctx->flags & KEYRING_SEARCH_LOOKUP_TYPE) == + KEYRING_SEARCH_LOOKUP_DIRECT) { + const void *object; + + object = assoc_array_find(&keyring->keys, + &keyring_assoc_array_ops, + &ctx->index_key); + return object ? ctx->iterator(object, ctx) : 0; + } + return assoc_array_iterate(&keyring->keys, ctx->iterator, ctx); +} - /* skip invalidated, revoked and expired keys */ - if (!no_state_check) { - if (kflags & ((1 << KEY_FLAG_INVALIDATED) | - (1 << KEY_FLAG_REVOKED))) - continue; +/* + * Search a tree of keyrings that point to other keyrings up to the maximum + * depth. + */ +static bool search_nested_keyrings(struct key *keyring, + struct keyring_search_context *ctx) +{ + struct { + struct key *keyring; + struct assoc_array_node *node; + int slot; + } stack[KEYRING_SEARCH_MAX_DEPTH]; - if (key->expiry && now.tv_sec >= key->expiry) - continue; - } + struct assoc_array_shortcut *shortcut; + struct assoc_array_node *node; + struct assoc_array_ptr *ptr; + struct key *key; + int sp = 0, slot; - /* keys that don't match */ - if (!match(key, description)) - continue; + kenter("{%d},{%s,%s}", + keyring->serial, + ctx->index_key.type->name, + ctx->index_key.description); - /* key must have search permissions */ - if (key_task_permission(make_key_ref(key, possessed), - cred, KEY_SEARCH) < 0) - continue; + if (ctx->index_key.description) + ctx->index_key.desc_len = strlen(ctx->index_key.description); - if (no_state_check) + /* Check to see if this top-level keyring is what we are looking for + * and whether it is valid or not. + */ + if (ctx->flags & KEYRING_SEARCH_LOOKUP_ITERATE || + keyring_compare_object(keyring, &ctx->index_key)) { + ctx->skipped_ret = 2; + ctx->flags |= KEYRING_SEARCH_DO_STATE_CHECK; + switch (ctx->iterator(keyring_key_to_ptr(keyring), ctx)) { + case 1: goto found; - - /* we set a different error code if we pass a negative key */ - if (kflags & (1 << KEY_FLAG_NEGATIVE)) { - err = key->type_data.reject_error; - continue; + case 2: + return false; + default: + break; } + } + + ctx->skipped_ret = 0; + if (ctx->flags & KEYRING_SEARCH_NO_STATE_CHECK) + ctx->flags &= ~KEYRING_SEARCH_DO_STATE_CHECK; + /* Start processing a new keyring */ +descend_to_keyring: + kdebug("descend to %d", keyring->serial); + if (keyring->flags & ((1 << KEY_FLAG_INVALIDATED) | + (1 << KEY_FLAG_REVOKED))) + goto not_this_keyring; + + /* Search through the keys in this keyring before its searching its + * subtrees. + */ + if (search_keyring(keyring, ctx)) goto found; - } - /* search through the keyrings nested in this one */ - kix = 0; -ascend: - nkeys = keylist->nkeys; - smp_rmb(); - for (; kix < nkeys; kix++) { - key = rcu_dereference(keylist->keys[kix]); - if (key->type != &key_type_keyring) - continue; + /* Then manually iterate through the keyrings nested in this one. + * + * Start from the root node of the index tree. Because of the way the + * hash function has been set up, keyrings cluster on the leftmost + * branch of the root node (root slot 0) or in the root node itself. + * Non-keyrings avoid the leftmost branch of the root entirely (root + * slots 1-15). + */ + ptr = ACCESS_ONCE(keyring->keys.root); + if (!ptr) + goto not_this_keyring; - /* recursively search nested keyrings - * - only search keyrings for which we have search permission + if (assoc_array_ptr_is_shortcut(ptr)) { + /* If the root is a shortcut, either the keyring only contains + * keyring pointers (everything clusters behind root slot 0) or + * doesn't contain any keyring pointers. */ - if (sp >= KEYRING_SEARCH_MAX_DEPTH) + shortcut = assoc_array_ptr_to_shortcut(ptr); + smp_read_barrier_depends(); + if ((shortcut->index_key[0] & ASSOC_ARRAY_FAN_MASK) != 0) + goto not_this_keyring; + + ptr = ACCESS_ONCE(shortcut->next_node); + node = assoc_array_ptr_to_node(ptr); + goto begin_node; + } + + node = assoc_array_ptr_to_node(ptr); + smp_read_barrier_depends(); + + ptr = node->slots[0]; + if (!assoc_array_ptr_is_meta(ptr)) + goto begin_node; + +descend_to_node: + /* Descend to a more distal node in this keyring's content tree and go + * through that. + */ + kdebug("descend"); + if (assoc_array_ptr_is_shortcut(ptr)) { + shortcut = assoc_array_ptr_to_shortcut(ptr); + smp_read_barrier_depends(); + ptr = ACCESS_ONCE(shortcut->next_node); + BUG_ON(!assoc_array_ptr_is_node(ptr)); + node = assoc_array_ptr_to_node(ptr); + } + +begin_node: + kdebug("begin_node"); + smp_read_barrier_depends(); + slot = 0; +ascend_to_node: + /* Go through the slots in a node */ + for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { + ptr = ACCESS_ONCE(node->slots[slot]); + + if (assoc_array_ptr_is_meta(ptr) && node->back_pointer) + goto descend_to_node; + + if (!keyring_ptr_is_keyring(ptr)) continue; - if (key_task_permission(make_key_ref(key, possessed), - cred, KEY_SEARCH) < 0) + key = keyring_ptr_to_key(ptr); + + if (sp >= KEYRING_SEARCH_MAX_DEPTH) { + if (ctx->flags & KEYRING_SEARCH_DETECT_TOO_DEEP) { + ctx->result = ERR_PTR(-ELOOP); + return false; + } + goto not_this_keyring; + } + + /* Search a nested keyring */ + if (!(ctx->flags & KEYRING_SEARCH_NO_CHECK_PERM) && + key_task_permission(make_key_ref(key, ctx->possessed), + ctx->cred, KEY_SEARCH) < 0) continue; /* stack the current position */ stack[sp].keyring = keyring; - stack[sp].keylist = keylist; - stack[sp].kix = kix; + stack[sp].node = node; + stack[sp].slot = slot; sp++; /* begin again with the new keyring */ keyring = key; - goto descend; + goto descend_to_keyring; } - /* the keyring we're looking at was disqualified or didn't contain a - * matching key */ + /* We've dealt with all the slots in the current node, so now we need + * to ascend to the parent and continue processing there. + */ + ptr = ACCESS_ONCE(node->back_pointer); + slot = node->parent_slot; + + if (ptr && assoc_array_ptr_is_shortcut(ptr)) { + shortcut = assoc_array_ptr_to_shortcut(ptr); + smp_read_barrier_depends(); + ptr = ACCESS_ONCE(shortcut->back_pointer); + slot = shortcut->parent_slot; + } + if (!ptr) + goto not_this_keyring; + node = assoc_array_ptr_to_node(ptr); + smp_read_barrier_depends(); + slot++; + + /* If we've ascended to the root (zero backpointer), we must have just + * finished processing the leftmost branch rather than the root slots - + * so there can't be any more keyrings for us to find. + */ + if (node->back_pointer) { + kdebug("ascend %d", slot); + goto ascend_to_node; + } + + /* The keyring we're looking at was disqualified or didn't contain a + * matching key. + */ not_this_keyring: - if (sp > 0) { - /* resume the processing of a keyring higher up in the tree */ - sp--; - keyring = stack[sp].keyring; - keylist = stack[sp].keylist; - kix = stack[sp].kix + 1; - goto ascend; + kdebug("not_this_keyring %d", sp); + if (sp <= 0) { + kleave(" = false"); + return false; } - key_ref = ERR_PTR(err); - goto error_2; + /* Resume the processing of a keyring higher up in the tree */ + sp--; + keyring = stack[sp].keyring; + node = stack[sp].node; + slot = stack[sp].slot + 1; + kdebug("ascend to %d [%d]", keyring->serial, slot); + goto ascend_to_node; - /* we found a viable match */ + /* We found a viable match */ found: - atomic_inc(&key->usage); - key->last_used_at = now.tv_sec; - keyring->last_used_at = now.tv_sec; - while (sp > 0) - stack[--sp].keyring->last_used_at = now.tv_sec; + key = key_ref_to_ptr(ctx->result); key_check(key); - key_ref = make_key_ref(key, possessed); -error_2: + if (!(ctx->flags & KEYRING_SEARCH_NO_UPDATE_TIME)) { + key->last_used_at = ctx->now.tv_sec; + keyring->last_used_at = ctx->now.tv_sec; + while (sp > 0) + stack[--sp].keyring->last_used_at = ctx->now.tv_sec; + } + kleave(" = true"); + return true; +} + +/** + * keyring_search_aux - Search a keyring tree for a key matching some criteria + * @keyring_ref: A pointer to the keyring with possession indicator. + * @ctx: The keyring search context. + * + * Search the supplied keyring tree for a key that matches the criteria given. + * The root keyring and any linked keyrings must grant Search permission to the + * caller to be searchable and keys can only be found if they too grant Search + * to the caller. The possession flag on the root keyring pointer controls use + * of the possessor bits in permissions checking of the entire tree. In + * addition, the LSM gets to forbid keyring searches and key matches. + * + * The search is performed as a breadth-then-depth search up to the prescribed + * limit (KEYRING_SEARCH_MAX_DEPTH). + * + * Keys are matched to the type provided and are then filtered by the match + * function, which is given the description to use in any way it sees fit. The + * match function may use any attributes of a key that it wishes to to + * determine the match. Normally the match function from the key type would be + * used. + * + * RCU can be used to prevent the keyring key lists from disappearing without + * the need to take lots of locks. + * + * Returns a pointer to the found key and increments the key usage count if + * successful; -EAGAIN if no matching keys were found, or if expired or revoked + * keys were found; -ENOKEY if only negative keys were found; -ENOTDIR if the + * specified keyring wasn't a keyring. + * + * In the case of a successful return, the possession attribute from + * @keyring_ref is propagated to the returned key reference. + */ +key_ref_t keyring_search_aux(key_ref_t keyring_ref, + struct keyring_search_context *ctx) +{ + struct key *keyring; + long err; + + ctx->iterator = keyring_search_iterator; + ctx->possessed = is_key_possessed(keyring_ref); + ctx->result = ERR_PTR(-EAGAIN); + + keyring = key_ref_to_ptr(keyring_ref); + key_check(keyring); + + if (keyring->type != &key_type_keyring) + return ERR_PTR(-ENOTDIR); + + if (!(ctx->flags & KEYRING_SEARCH_NO_CHECK_PERM)) { + err = key_task_permission(keyring_ref, ctx->cred, KEY_SEARCH); + if (err < 0) + return ERR_PTR(err); + } + + rcu_read_lock(); + ctx->now = current_kernel_time(); + if (search_nested_keyrings(keyring, ctx)) + __key_get(key_ref_to_ptr(ctx->result)); rcu_read_unlock(); -error: - return key_ref; + return ctx->result; } /** @@ -507,77 +864,73 @@ error: * @description: The name of the keyring we want to find. * * As keyring_search_aux() above, but using the current task's credentials and - * type's default matching function. + * type's default matching function and preferred search method. */ key_ref_t keyring_search(key_ref_t keyring, struct key_type *type, const char *description) { - if (!type->match) + struct keyring_search_context ctx = { + .index_key.type = type, + .index_key.description = description, + .cred = current_cred(), + .match = type->match, + .match_data = description, + .flags = (type->def_lookup_type | + KEYRING_SEARCH_DO_STATE_CHECK), + }; + + if (!ctx.match) return ERR_PTR(-ENOKEY); - return keyring_search_aux(keyring, current->cred, - type, description, type->match, false); + return keyring_search_aux(keyring, &ctx); } EXPORT_SYMBOL(keyring_search); /* - * Search the given keyring only (no recursion). + * Search the given keyring for a key that might be updated. * * The caller must guarantee that the keyring is a keyring and that the - * permission is granted to search the keyring as no check is made here. - * - * RCU is used to make it unnecessary to lock the keyring key list here. + * permission is granted to modify the keyring as no check is made here. The + * caller must also hold a lock on the keyring semaphore. * * Returns a pointer to the found key with usage count incremented if - * successful and returns -ENOKEY if not found. Revoked keys and keys not - * providing the requested permission are skipped over. + * successful and returns NULL if not found. Revoked and invalidated keys are + * skipped over. * * If successful, the possession indicator is propagated from the keyring ref * to the returned key reference. */ -key_ref_t __keyring_search_one(key_ref_t keyring_ref, - const struct key_type *ktype, - const char *description, - key_perm_t perm) +key_ref_t find_key_to_update(key_ref_t keyring_ref, + const struct keyring_index_key *index_key) { - struct keyring_list *klist; - unsigned long possessed; struct key *keyring, *key; - int nkeys, loop; + const void *object; keyring = key_ref_to_ptr(keyring_ref); - possessed = is_key_possessed(keyring_ref); - rcu_read_lock(); + kenter("{%d},{%s,%s}", + keyring->serial, index_key->type->name, index_key->description); - klist = rcu_dereference(keyring->payload.subscriptions); - if (klist) { - nkeys = klist->nkeys; - smp_rmb(); - for (loop = 0; loop < nkeys ; loop++) { - key = rcu_dereference(klist->keys[loop]); - if (key->type == ktype && - (!key->type->match || - key->type->match(key, description)) && - key_permission(make_key_ref(key, possessed), - perm) == 0 && - !(key->flags & ((1 << KEY_FLAG_INVALIDATED) | - (1 << KEY_FLAG_REVOKED))) - ) - goto found; - } - } + object = assoc_array_find(&keyring->keys, &keyring_assoc_array_ops, + index_key); - rcu_read_unlock(); - return ERR_PTR(-ENOKEY); + if (object) + goto found; + + kleave(" = NULL"); + return NULL; found: - atomic_inc(&key->usage); - keyring->last_used_at = key->last_used_at = - current_kernel_time().tv_sec; - rcu_read_unlock(); - return make_key_ref(key, possessed); + key = keyring_ptr_to_key(object); + if (key->flags & ((1 << KEY_FLAG_INVALIDATED) | + (1 << KEY_FLAG_REVOKED))) { + kleave(" = NULL [x]"); + return NULL; + } + __key_get(key); + kleave(" = {%d}", key->serial); + return make_key_ref(key, is_key_possessed(keyring_ref)); } /* @@ -640,6 +993,19 @@ out: return keyring; } +static int keyring_detect_cycle_iterator(const void *object, + void *iterator_data) +{ + struct keyring_search_context *ctx = iterator_data; + const struct key *key = keyring_ptr_to_key(object); + + kenter("{%d}", key->serial); + + BUG_ON(key != ctx->match_data); + ctx->result = ERR_PTR(-EDEADLK); + return 1; +} + /* * See if a cycle will will be created by inserting acyclic tree B in acyclic * tree A at the topmost level (ie: as a direct child of A). @@ -649,116 +1015,39 @@ out: */ static int keyring_detect_cycle(struct key *A, struct key *B) { - struct { - struct keyring_list *keylist; - int kix; - } stack[KEYRING_SEARCH_MAX_DEPTH]; - - struct keyring_list *keylist; - struct key *subtree, *key; - int sp, nkeys, kix, ret; + struct keyring_search_context ctx = { + .index_key = A->index_key, + .match_data = A, + .iterator = keyring_detect_cycle_iterator, + .flags = (KEYRING_SEARCH_LOOKUP_DIRECT | + KEYRING_SEARCH_NO_STATE_CHECK | + KEYRING_SEARCH_NO_UPDATE_TIME | + KEYRING_SEARCH_NO_CHECK_PERM | + KEYRING_SEARCH_DETECT_TOO_DEEP), + }; rcu_read_lock(); - - ret = -EDEADLK; - if (A == B) - goto cycle_detected; - - subtree = B; - sp = 0; - - /* start processing a new keyring */ -descend: - if (test_bit(KEY_FLAG_REVOKED, &subtree->flags)) - goto not_this_keyring; - - keylist = rcu_dereference(subtree->payload.subscriptions); - if (!keylist) - goto not_this_keyring; - kix = 0; - -ascend: - /* iterate through the remaining keys in this keyring */ - nkeys = keylist->nkeys; - smp_rmb(); - for (; kix < nkeys; kix++) { - key = rcu_dereference(keylist->keys[kix]); - - if (key == A) - goto cycle_detected; - - /* recursively check nested keyrings */ - if (key->type == &key_type_keyring) { - if (sp >= KEYRING_SEARCH_MAX_DEPTH) - goto too_deep; - - /* stack the current position */ - stack[sp].keylist = keylist; - stack[sp].kix = kix; - sp++; - - /* begin again with the new keyring */ - subtree = key; - goto descend; - } - } - - /* the keyring we're looking at was disqualified or didn't contain a - * matching key */ -not_this_keyring: - if (sp > 0) { - /* resume the checking of a keyring higher up in the tree */ - sp--; - keylist = stack[sp].keylist; - kix = stack[sp].kix + 1; - goto ascend; - } - - ret = 0; /* no cycles detected */ - -error: + search_nested_keyrings(B, &ctx); rcu_read_unlock(); - return ret; - -too_deep: - ret = -ELOOP; - goto error; - -cycle_detected: - ret = -EDEADLK; - goto error; -} - -/* - * Dispose of a keyring list after the RCU grace period, freeing the unlinked - * key - */ -static void keyring_unlink_rcu_disposal(struct rcu_head *rcu) -{ - struct keyring_list *klist = - container_of(rcu, struct keyring_list, rcu); - - if (klist->delkey != USHRT_MAX) - key_put(rcu_access_pointer(klist->keys[klist->delkey])); - kfree(klist); + return PTR_ERR(ctx.result) == -EAGAIN ? 0 : PTR_ERR(ctx.result); } /* * Preallocate memory so that a key can be linked into to a keyring. */ -int __key_link_begin(struct key *keyring, const struct key_type *type, - const char *description, unsigned long *_prealloc) +int __key_link_begin(struct key *keyring, + const struct keyring_index_key *index_key, + struct assoc_array_edit **_edit) __acquires(&keyring->sem) __acquires(&keyring_serialise_link_sem) { - struct keyring_list *klist, *nklist; - unsigned long prealloc; - unsigned max; - time_t lowest_lru; - size_t size; - int loop, lru, ret; + struct assoc_array_edit *edit; + int ret; + + kenter("%d,%s,%s,", + keyring->serial, index_key->type->name, index_key->description); - kenter("%d,%s,%s,", key_serial(keyring), type->name, description); + BUG_ON(index_key->desc_len == 0); if (keyring->type != &key_type_keyring) return -ENOTDIR; @@ -771,100 +1060,39 @@ int __key_link_begin(struct key *keyring, const struct key_type *type, /* serialise link/link calls to prevent parallel calls causing a cycle * when linking two keyring in opposite orders */ - if (type == &key_type_keyring) + if (index_key->type == &key_type_keyring) down_write(&keyring_serialise_link_sem); - klist = rcu_dereference_locked_keyring(keyring); - - /* see if there's a matching key we can displace */ - lru = -1; - if (klist && klist->nkeys > 0) { - lowest_lru = TIME_T_MAX; - for (loop = klist->nkeys - 1; loop >= 0; loop--) { - struct key *key = rcu_deref_link_locked(klist, loop, - keyring); - if (key->type == type && - strcmp(key->description, description) == 0) { - /* Found a match - we'll replace the link with - * one to the new key. We record the slot - * position. - */ - klist->delkey = loop; - prealloc = 0; - goto done; - } - if (key->last_used_at < lowest_lru) { - lowest_lru = key->last_used_at; - lru = loop; - } - } - } - - /* If the keyring is full then do an LRU discard */ - if (klist && - klist->nkeys == klist->maxkeys && - klist->maxkeys >= MAX_KEYRING_LINKS) { - kdebug("LRU discard %d\n", lru); - klist->delkey = lru; - prealloc = 0; - goto done; - } - - /* check that we aren't going to overrun the user's quota */ - ret = key_payload_reserve(keyring, - keyring->datalen + KEYQUOTA_LINK_BYTES); - if (ret < 0) + /* Create an edit script that will insert/replace the key in the + * keyring tree. + */ + edit = assoc_array_insert(&keyring->keys, + &keyring_assoc_array_ops, + index_key, + NULL); + if (IS_ERR(edit)) { + ret = PTR_ERR(edit); goto error_sem; + } - if (klist && klist->nkeys < klist->maxkeys) { - /* there's sufficient slack space to append directly */ - klist->delkey = klist->nkeys; - prealloc = KEY_LINK_FIXQUOTA; - } else { - /* grow the key list */ - max = 4; - if (klist) { - max += klist->maxkeys; - if (max > MAX_KEYRING_LINKS) - max = MAX_KEYRING_LINKS; - BUG_ON(max <= klist->maxkeys); - } - - size = sizeof(*klist) + sizeof(struct key *) * max; - - ret = -ENOMEM; - nklist = kmalloc(size, GFP_KERNEL); - if (!nklist) - goto error_quota; - - nklist->maxkeys = max; - if (klist) { - memcpy(nklist->keys, klist->keys, - sizeof(struct key *) * klist->nkeys); - nklist->delkey = klist->nkeys; - nklist->nkeys = klist->nkeys + 1; - klist->delkey = USHRT_MAX; - } else { - nklist->nkeys = 1; - nklist->delkey = 0; - } - - /* add the key into the new space */ - RCU_INIT_POINTER(nklist->keys[nklist->delkey], NULL); - prealloc = (unsigned long)nklist | KEY_LINK_FIXQUOTA; + /* If we're not replacing a link in-place then we're going to need some + * extra quota. + */ + if (!edit->dead_leaf) { + ret = key_payload_reserve(keyring, + keyring->datalen + KEYQUOTA_LINK_BYTES); + if (ret < 0) + goto error_cancel; } -done: - *_prealloc = prealloc; + *_edit = edit; kleave(" = 0"); return 0; -error_quota: - /* undo the quota changes */ - key_payload_reserve(keyring, - keyring->datalen - KEYQUOTA_LINK_BYTES); +error_cancel: + assoc_array_cancel_edit(edit); error_sem: - if (type == &key_type_keyring) + if (index_key->type == &key_type_keyring) up_write(&keyring_serialise_link_sem); error_krsem: up_write(&keyring->sem); @@ -895,60 +1123,12 @@ int __key_link_check_live_key(struct key *keyring, struct key *key) * holds at most one link to any given key of a particular type+description * combination. */ -void __key_link(struct key *keyring, struct key *key, - unsigned long *_prealloc) +void __key_link(struct key *key, struct assoc_array_edit **_edit) { - struct keyring_list *klist, *nklist; - struct key *discard; - - nklist = (struct keyring_list *)(*_prealloc & ~KEY_LINK_FIXQUOTA); - *_prealloc = 0; - - kenter("%d,%d,%p", keyring->serial, key->serial, nklist); - - klist = rcu_dereference_locked_keyring(keyring); - - atomic_inc(&key->usage); - keyring->last_used_at = key->last_used_at = - current_kernel_time().tv_sec; - - /* there's a matching key we can displace or an empty slot in a newly - * allocated list we can fill */ - if (nklist) { - kdebug("reissue %hu/%hu/%hu", - nklist->delkey, nklist->nkeys, nklist->maxkeys); - - RCU_INIT_POINTER(nklist->keys[nklist->delkey], key); - - rcu_assign_pointer(keyring->payload.subscriptions, nklist); - - /* dispose of the old keyring list and, if there was one, the - * displaced key */ - if (klist) { - kdebug("dispose %hu/%hu/%hu", - klist->delkey, klist->nkeys, klist->maxkeys); - call_rcu(&klist->rcu, keyring_unlink_rcu_disposal); - } - } else if (klist->delkey < klist->nkeys) { - kdebug("replace %hu/%hu/%hu", - klist->delkey, klist->nkeys, klist->maxkeys); - - discard = rcu_dereference_protected( - klist->keys[klist->delkey], - rwsem_is_locked(&keyring->sem)); - rcu_assign_pointer(klist->keys[klist->delkey], key); - /* The garbage collector will take care of RCU - * synchronisation */ - key_put(discard); - } else { - /* there's sufficient slack space to append directly */ - kdebug("append %hu/%hu/%hu", - klist->delkey, klist->nkeys, klist->maxkeys); - - RCU_INIT_POINTER(klist->keys[klist->delkey], key); - smp_wmb(); - klist->nkeys++; - } + __key_get(key); + assoc_array_insert_set_object(*_edit, keyring_key_to_ptr(key)); + assoc_array_apply_edit(*_edit); + *_edit = NULL; } /* @@ -956,24 +1136,22 @@ void __key_link(struct key *keyring, struct key *key, * * Must be called with __key_link_begin() having being called. */ -void __key_link_end(struct key *keyring, struct key_type *type, - unsigned long prealloc) +void __key_link_end(struct key *keyring, + const struct keyring_index_key *index_key, + struct assoc_array_edit *edit) __releases(&keyring->sem) __releases(&keyring_serialise_link_sem) { - BUG_ON(type == NULL); - BUG_ON(type->name == NULL); - kenter("%d,%s,%lx", keyring->serial, type->name, prealloc); + BUG_ON(index_key->type == NULL); + kenter("%d,%s,", keyring->serial, index_key->type->name); - if (type == &key_type_keyring) + if (index_key->type == &key_type_keyring) up_write(&keyring_serialise_link_sem); - if (prealloc) { - if (prealloc & KEY_LINK_FIXQUOTA) - key_payload_reserve(keyring, - keyring->datalen - - KEYQUOTA_LINK_BYTES); - kfree((struct keyring_list *)(prealloc & ~KEY_LINK_FIXQUOTA)); + if (edit && !edit->dead_leaf) { + key_payload_reserve(keyring, + keyring->datalen - KEYQUOTA_LINK_BYTES); + assoc_array_cancel_edit(edit); } up_write(&keyring->sem); } @@ -1000,20 +1178,28 @@ void __key_link_end(struct key *keyring, struct key_type *type, */ int key_link(struct key *keyring, struct key *key) { - unsigned long prealloc; + struct assoc_array_edit *edit; int ret; + kenter("{%d,%d}", keyring->serial, atomic_read(&keyring->usage)); + key_check(keyring); key_check(key); - ret = __key_link_begin(keyring, key->type, key->description, &prealloc); + if (test_bit(KEY_FLAG_TRUSTED_ONLY, &keyring->flags) && + !test_bit(KEY_FLAG_TRUSTED, &key->flags)) + return -EPERM; + + ret = __key_link_begin(keyring, &key->index_key, &edit); if (ret == 0) { + kdebug("begun {%d,%d}", keyring->serial, atomic_read(&keyring->usage)); ret = __key_link_check_live_key(keyring, key); if (ret == 0) - __key_link(keyring, key, &prealloc); - __key_link_end(keyring, key->type, prealloc); + __key_link(key, &edit); + __key_link_end(keyring, &key->index_key, edit); } + kleave(" = %d {%d,%d}", ret, keyring->serial, atomic_read(&keyring->usage)); return ret; } EXPORT_SYMBOL(key_link); @@ -1037,90 +1223,37 @@ EXPORT_SYMBOL(key_link); */ int key_unlink(struct key *keyring, struct key *key) { - struct keyring_list *klist, *nklist; - int loop, ret; + struct assoc_array_edit *edit; + int ret; key_check(keyring); key_check(key); - ret = -ENOTDIR; if (keyring->type != &key_type_keyring) - goto error; + return -ENOTDIR; down_write(&keyring->sem); - klist = rcu_dereference_locked_keyring(keyring); - if (klist) { - /* search the keyring for the key */ - for (loop = 0; loop < klist->nkeys; loop++) - if (rcu_access_pointer(klist->keys[loop]) == key) - goto key_is_present; + edit = assoc_array_delete(&keyring->keys, &keyring_assoc_array_ops, + &key->index_key); + if (IS_ERR(edit)) { + ret = PTR_ERR(edit); + goto error; } - - up_write(&keyring->sem); ret = -ENOENT; - goto error; - -key_is_present: - /* we need to copy the key list for RCU purposes */ - nklist = kmalloc(sizeof(*klist) + - sizeof(struct key *) * klist->maxkeys, - GFP_KERNEL); - if (!nklist) - goto nomem; - nklist->maxkeys = klist->maxkeys; - nklist->nkeys = klist->nkeys - 1; - - if (loop > 0) - memcpy(&nklist->keys[0], - &klist->keys[0], - loop * sizeof(struct key *)); - - if (loop < nklist->nkeys) - memcpy(&nklist->keys[loop], - &klist->keys[loop + 1], - (nklist->nkeys - loop) * sizeof(struct key *)); - - /* adjust the user's quota */ - key_payload_reserve(keyring, - keyring->datalen - KEYQUOTA_LINK_BYTES); - - rcu_assign_pointer(keyring->payload.subscriptions, nklist); - - up_write(&keyring->sem); - - /* schedule for later cleanup */ - klist->delkey = loop; - call_rcu(&klist->rcu, keyring_unlink_rcu_disposal); + if (edit == NULL) + goto error; + assoc_array_apply_edit(edit); + key_payload_reserve(keyring, keyring->datalen - KEYQUOTA_LINK_BYTES); ret = 0; error: - return ret; -nomem: - ret = -ENOMEM; up_write(&keyring->sem); - goto error; + return ret; } EXPORT_SYMBOL(key_unlink); -/* - * Dispose of a keyring list after the RCU grace period, releasing the keys it - * links to. - */ -static void keyring_clear_rcu_disposal(struct rcu_head *rcu) -{ - struct keyring_list *klist; - int loop; - - klist = container_of(rcu, struct keyring_list, rcu); - - for (loop = klist->nkeys - 1; loop >= 0; loop--) - key_put(rcu_access_pointer(klist->keys[loop])); - - kfree(klist); -} - /** * keyring_clear - Clear a keyring * @keyring: The keyring to clear. @@ -1131,33 +1264,25 @@ static void keyring_clear_rcu_disposal(struct rcu_head *rcu) */ int keyring_clear(struct key *keyring) { - struct keyring_list *klist; + struct assoc_array_edit *edit; int ret; - ret = -ENOTDIR; - if (keyring->type == &key_type_keyring) { - /* detach the pointer block with the locks held */ - down_write(&keyring->sem); - - klist = rcu_dereference_locked_keyring(keyring); - if (klist) { - /* adjust the quota */ - key_payload_reserve(keyring, - sizeof(struct keyring_list)); - - rcu_assign_pointer(keyring->payload.subscriptions, - NULL); - } - - up_write(&keyring->sem); + if (keyring->type != &key_type_keyring) + return -ENOTDIR; - /* free the keys after the locks have been dropped */ - if (klist) - call_rcu(&klist->rcu, keyring_clear_rcu_disposal); + down_write(&keyring->sem); + edit = assoc_array_clear(&keyring->keys, &keyring_assoc_array_ops); + if (IS_ERR(edit)) { + ret = PTR_ERR(edit); + } else { + if (edit) + assoc_array_apply_edit(edit); + key_payload_reserve(keyring, 0); ret = 0; } + up_write(&keyring->sem); return ret; } EXPORT_SYMBOL(keyring_clear); @@ -1169,111 +1294,68 @@ EXPORT_SYMBOL(keyring_clear); */ static void keyring_revoke(struct key *keyring) { - struct keyring_list *klist; + struct assoc_array_edit *edit; + + edit = assoc_array_clear(&keyring->keys, &keyring_assoc_array_ops); + if (!IS_ERR(edit)) { + if (edit) + assoc_array_apply_edit(edit); + key_payload_reserve(keyring, 0); + } +} + +static bool keyring_gc_select_iterator(void *object, void *iterator_data) +{ + struct key *key = keyring_ptr_to_key(object); + time_t *limit = iterator_data; - klist = rcu_dereference_locked_keyring(keyring); + if (key_is_dead(key, *limit)) + return false; + key_get(key); + return true; +} - /* adjust the quota */ - key_payload_reserve(keyring, 0); +static int keyring_gc_check_iterator(const void *object, void *iterator_data) +{ + const struct key *key = keyring_ptr_to_key(object); + time_t *limit = iterator_data; - if (klist) { - rcu_assign_pointer(keyring->payload.subscriptions, NULL); - call_rcu(&klist->rcu, keyring_clear_rcu_disposal); - } + key_check(key); + return key_is_dead(key, *limit); } /* - * Collect garbage from the contents of a keyring, replacing the old list with - * a new one with the pointers all shuffled down. + * Garbage collect pointers from a keyring. * - * Dead keys are classed as oned that are flagged as being dead or are revoked, - * expired or negative keys that were revoked or expired before the specified - * limit. + * Not called with any locks held. The keyring's key struct will not be + * deallocated under us as only our caller may deallocate it. */ void keyring_gc(struct key *keyring, time_t limit) { - struct keyring_list *klist, *new; - struct key *key; - int loop, keep, max; - - kenter("{%x,%s}", key_serial(keyring), keyring->description); - - down_write(&keyring->sem); - - klist = rcu_dereference_locked_keyring(keyring); - if (!klist) - goto no_klist; - - /* work out how many subscriptions we're keeping */ - keep = 0; - for (loop = klist->nkeys - 1; loop >= 0; loop--) - if (!key_is_dead(rcu_deref_link_locked(klist, loop, keyring), - limit)) - keep++; - - if (keep == klist->nkeys) - goto just_return; - - /* allocate a new keyring payload */ - max = roundup(keep, 4); - new = kmalloc(sizeof(struct keyring_list) + max * sizeof(struct key *), - GFP_KERNEL); - if (!new) - goto nomem; - new->maxkeys = max; - new->nkeys = 0; - new->delkey = 0; - - /* install the live keys - * - must take care as expired keys may be updated back to life - */ - keep = 0; - for (loop = klist->nkeys - 1; loop >= 0; loop--) { - key = rcu_deref_link_locked(klist, loop, keyring); - if (!key_is_dead(key, limit)) { - if (keep >= max) - goto discard_new; - RCU_INIT_POINTER(new->keys[keep++], key_get(key)); - } - } - new->nkeys = keep; - - /* adjust the quota */ - key_payload_reserve(keyring, - sizeof(struct keyring_list) + - KEYQUOTA_LINK_BYTES * keep); + int result; - if (keep == 0) { - rcu_assign_pointer(keyring->payload.subscriptions, NULL); - kfree(new); - } else { - rcu_assign_pointer(keyring->payload.subscriptions, new); - } + kenter("%x{%s}", keyring->serial, keyring->description ?: ""); - up_write(&keyring->sem); + if (keyring->flags & ((1 << KEY_FLAG_INVALIDATED) | + (1 << KEY_FLAG_REVOKED))) + goto dont_gc; - call_rcu(&klist->rcu, keyring_clear_rcu_disposal); - kleave(" [yes]"); - return; - -discard_new: - new->nkeys = keep; - keyring_clear_rcu_disposal(&new->rcu); - up_write(&keyring->sem); - kleave(" [discard]"); - return; - -just_return: - up_write(&keyring->sem); - kleave(" [no dead]"); - return; + /* scan the keyring looking for dead keys */ + rcu_read_lock(); + result = assoc_array_iterate(&keyring->keys, + keyring_gc_check_iterator, &limit); + rcu_read_unlock(); + if (result == true) + goto do_gc; -no_klist: - up_write(&keyring->sem); - kleave(" [no_klist]"); +dont_gc: + kleave(" [no gc]"); return; -nomem: +do_gc: + down_write(&keyring->sem); + assoc_array_gc(&keyring->keys, &keyring_assoc_array_ops, + keyring_gc_select_iterator, &limit); up_write(&keyring->sem); - kleave(" [oom]"); + kleave(" [gc]"); } diff --git a/security/keys/persistent.c b/security/keys/persistent.c new file mode 100644 index 0000000..0ad3ee2 --- /dev/null +++ b/security/keys/persistent.c @@ -0,0 +1,167 @@ +/* General persistent per-UID keyrings register + * + * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include <linux/user_namespace.h> +#include "internal.h" + +unsigned persistent_keyring_expiry = 3 * 24 * 3600; /* Expire after 3 days of non-use */ + +/* + * Create the persistent keyring register for the current user namespace. + * + * Called with the namespace's sem locked for writing. + */ +static int key_create_persistent_register(struct user_namespace *ns) +{ + struct key *reg = keyring_alloc(".persistent_register", + KUIDT_INIT(0), KGIDT_INIT(0), + current_cred(), + ((KEY_POS_ALL & ~KEY_POS_SETATTR) | + KEY_USR_VIEW | KEY_USR_READ), + KEY_ALLOC_NOT_IN_QUOTA, NULL); + if (IS_ERR(reg)) + return PTR_ERR(reg); + + ns->persistent_keyring_register = reg; + return 0; +} + +/* + * Create the persistent keyring for the specified user. + * + * Called with the namespace's sem locked for writing. + */ +static key_ref_t key_create_persistent(struct user_namespace *ns, kuid_t uid, + struct keyring_index_key *index_key) +{ + struct key *persistent; + key_ref_t reg_ref, persistent_ref; + + if (!ns->persistent_keyring_register) { + long err = key_create_persistent_register(ns); + if (err < 0) + return ERR_PTR(err); + } else { + reg_ref = make_key_ref(ns->persistent_keyring_register, true); + persistent_ref = find_key_to_update(reg_ref, index_key); + if (persistent_ref) + return persistent_ref; + } + + persistent = keyring_alloc(index_key->description, + uid, INVALID_GID, current_cred(), + ((KEY_POS_ALL & ~KEY_POS_SETATTR) | + KEY_USR_VIEW | KEY_USR_READ), + KEY_ALLOC_NOT_IN_QUOTA, + ns->persistent_keyring_register); + if (IS_ERR(persistent)) + return ERR_CAST(persistent); + + return make_key_ref(persistent, true); +} + +/* + * Get the persistent keyring for a specific UID and link it to the nominated + * keyring. + */ +static long key_get_persistent(struct user_namespace *ns, kuid_t uid, + key_ref_t dest_ref) +{ + struct keyring_index_key index_key; + struct key *persistent; + key_ref_t reg_ref, persistent_ref; + char buf[32]; + long ret; + + /* Look in the register if it exists */ + index_key.type = &key_type_keyring; + index_key.description = buf; + index_key.desc_len = sprintf(buf, "_persistent.%u", from_kuid(ns, uid)); + + if (ns->persistent_keyring_register) { + reg_ref = make_key_ref(ns->persistent_keyring_register, true); + down_read(&ns->persistent_keyring_register_sem); + persistent_ref = find_key_to_update(reg_ref, &index_key); + up_read(&ns->persistent_keyring_register_sem); + + if (persistent_ref) + goto found; + } + + /* It wasn't in the register, so we'll need to create it. We might + * also need to create the register. + */ + down_write(&ns->persistent_keyring_register_sem); + persistent_ref = key_create_persistent(ns, uid, &index_key); + up_write(&ns->persistent_keyring_register_sem); + if (!IS_ERR(persistent_ref)) + goto found; + + return PTR_ERR(persistent_ref); + +found: + ret = key_task_permission(persistent_ref, current_cred(), KEY_LINK); + if (ret == 0) { + persistent = key_ref_to_ptr(persistent_ref); + ret = key_link(key_ref_to_ptr(dest_ref), persistent); + if (ret == 0) { + key_set_timeout(persistent, persistent_keyring_expiry); + ret = persistent->serial; + } + } + + key_ref_put(persistent_ref); + return ret; +} + +/* + * Get the persistent keyring for a specific UID and link it to the nominated + * keyring. + */ +long keyctl_get_persistent(uid_t _uid, key_serial_t destid) +{ + struct user_namespace *ns = current_user_ns(); + key_ref_t dest_ref; + kuid_t uid; + long ret; + + /* -1 indicates the current user */ + if (_uid == (uid_t)-1) { + uid = current_uid(); + } else { + uid = make_kuid(ns, _uid); + if (!uid_valid(uid)) + return -EINVAL; + + /* You can only see your own persistent cache if you're not + * sufficiently privileged. + */ + if (!uid_eq(uid, current_uid()) && + !uid_eq(uid, current_euid()) && + !ns_capable(ns, CAP_SETUID)) + return -EPERM; + } + + /* There must be a destination keyring */ + dest_ref = lookup_user_key(destid, KEY_LOOKUP_CREATE, KEY_WRITE); + if (IS_ERR(dest_ref)) + return PTR_ERR(dest_ref); + if (key_ref_to_ptr(dest_ref)->type != &key_type_keyring) { + ret = -ENOTDIR; + goto out_put_dest; + } + + ret = key_get_persistent(ns, uid, dest_ref); + +out_put_dest: + key_ref_put(dest_ref); + return ret; +} diff --git a/security/keys/proc.c b/security/keys/proc.c index 217b685..88e9a46 100644 --- a/security/keys/proc.c +++ b/security/keys/proc.c @@ -182,7 +182,6 @@ static void proc_keys_stop(struct seq_file *p, void *v) static int proc_keys_show(struct seq_file *m, void *v) { - const struct cred *cred = current_cred(); struct rb_node *_p = v; struct key *key = rb_entry(_p, struct key, serial_node); struct timespec now; @@ -191,15 +190,23 @@ static int proc_keys_show(struct seq_file *m, void *v) char xbuf[12]; int rc; + struct keyring_search_context ctx = { + .index_key.type = key->type, + .index_key.description = key->description, + .cred = current_cred(), + .match = lookup_user_key_possessed, + .match_data = key, + .flags = (KEYRING_SEARCH_NO_STATE_CHECK | + KEYRING_SEARCH_LOOKUP_DIRECT), + }; + key_ref = make_key_ref(key, 0); /* determine if the key is possessed by this process (a test we can * skip if the key does not indicate the possessor can view it */ if (key->perm & KEY_POS_VIEW) { - skey_ref = search_my_process_keyrings(key->type, key, - lookup_user_key_possessed, - true, cred); + skey_ref = search_my_process_keyrings(&ctx); if (!IS_ERR(skey_ref)) { key_ref_put(skey_ref); key_ref = make_key_ref(key, 1); @@ -211,7 +218,7 @@ static int proc_keys_show(struct seq_file *m, void *v) * - the caller holds a spinlock, and thus the RCU read lock, making our * access to __current_cred() safe */ - rc = key_task_permission(key_ref, cred, KEY_VIEW); + rc = key_task_permission(key_ref, ctx.cred, KEY_VIEW); if (rc < 0) return 0; diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index 42defae..0cf8a13 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -235,7 +235,7 @@ int install_session_keyring_to_cred(struct cred *cred, struct key *keyring) if (IS_ERR(keyring)) return PTR_ERR(keyring); } else { - atomic_inc(&keyring->usage); + __key_get(keyring); } /* install the keyring */ @@ -319,11 +319,7 @@ void key_fsgid_changed(struct task_struct *tsk) * In the case of a successful return, the possession attribute is set on the * returned key reference. */ -key_ref_t search_my_process_keyrings(struct key_type *type, - const void *description, - key_match_func_t match, - bool no_state_check, - const struct cred *cred) +key_ref_t search_my_process_keyrings(struct keyring_search_context *ctx) { key_ref_t key_ref, ret, err; @@ -339,10 +335,9 @@ key_ref_t search_my_process_keyrings(struct key_type *type, err = ERR_PTR(-EAGAIN); /* search the thread keyring first */ - if (cred->thread_keyring) { + if (ctx->cred->thread_keyring) { key_ref = keyring_search_aux( - make_key_ref(cred->thread_keyring, 1), - cred, type, description, match, no_state_check); + make_key_ref(ctx->cred->thread_keyring, 1), ctx); if (!IS_ERR(key_ref)) goto found; @@ -358,10 +353,9 @@ key_ref_t search_my_process_keyrings(struct key_type *type, } /* search the process keyring second */ - if (cred->process_keyring) { + if (ctx->cred->process_keyring) { key_ref = keyring_search_aux( - make_key_ref(cred->process_keyring, 1), - cred, type, description, match, no_state_check); + make_key_ref(ctx->cred->process_keyring, 1), ctx); if (!IS_ERR(key_ref)) goto found; @@ -379,11 +373,11 @@ key_ref_t search_my_process_keyrings(struct key_type *type, } /* search the session keyring */ - if (cred->session_keyring) { + if (ctx->cred->session_keyring) { rcu_read_lock(); key_ref = keyring_search_aux( - make_key_ref(rcu_dereference(cred->session_keyring), 1), - cred, type, description, match, no_state_check); + make_key_ref(rcu_dereference(ctx->cred->session_keyring), 1), + ctx); rcu_read_unlock(); if (!IS_ERR(key_ref)) @@ -402,10 +396,10 @@ key_ref_t search_my_process_keyrings(struct key_type *type, } } /* or search the user-session keyring */ - else if (cred->user->session_keyring) { + else if (ctx->cred->user->session_keyring) { key_ref = keyring_search_aux( - make_key_ref(cred->user->session_keyring, 1), - cred, type, description, match, no_state_check); + make_key_ref(ctx->cred->user->session_keyring, 1), + ctx); if (!IS_ERR(key_ref)) goto found; @@ -437,18 +431,14 @@ found: * * Return same as search_my_process_keyrings(). */ -key_ref_t search_process_keyrings(struct key_type *type, - const void *description, - key_match_func_t match, - const struct cred *cred) +key_ref_t search_process_keyrings(struct keyring_search_context *ctx) { struct request_key_auth *rka; key_ref_t key_ref, ret = ERR_PTR(-EACCES), err; might_sleep(); - key_ref = search_my_process_keyrings(type, description, match, - false, cred); + key_ref = search_my_process_keyrings(ctx); if (!IS_ERR(key_ref)) goto found; err = key_ref; @@ -457,18 +447,21 @@ key_ref_t search_process_keyrings(struct key_type *type, * search the keyrings of the process mentioned there * - we don't permit access to request_key auth keys via this method */ - if (cred->request_key_auth && - cred == current_cred() && - type != &key_type_request_key_auth + if (ctx->cred->request_key_auth && + ctx->cred == current_cred() && + ctx->index_key.type != &key_type_request_key_auth ) { + const struct cred *cred = ctx->cred; + /* defend against the auth key being revoked */ down_read(&cred->request_key_auth->sem); - if (key_validate(cred->request_key_auth) == 0) { - rka = cred->request_key_auth->payload.data; + if (key_validate(ctx->cred->request_key_auth) == 0) { + rka = ctx->cred->request_key_auth->payload.data; - key_ref = search_process_keyrings(type, description, - match, rka->cred); + ctx->cred = rka->cred; + key_ref = search_process_keyrings(ctx); + ctx->cred = cred; up_read(&cred->request_key_auth->sem); @@ -522,19 +515,23 @@ int lookup_user_key_possessed(const struct key *key, const void *target) key_ref_t lookup_user_key(key_serial_t id, unsigned long lflags, key_perm_t perm) { + struct keyring_search_context ctx = { + .match = lookup_user_key_possessed, + .flags = (KEYRING_SEARCH_NO_STATE_CHECK | + KEYRING_SEARCH_LOOKUP_DIRECT), + }; struct request_key_auth *rka; - const struct cred *cred; struct key *key; key_ref_t key_ref, skey_ref; int ret; try_again: - cred = get_current_cred(); + ctx.cred = get_current_cred(); key_ref = ERR_PTR(-ENOKEY); switch (id) { case KEY_SPEC_THREAD_KEYRING: - if (!cred->thread_keyring) { + if (!ctx.cred->thread_keyring) { if (!(lflags & KEY_LOOKUP_CREATE)) goto error; @@ -546,13 +543,13 @@ try_again: goto reget_creds; } - key = cred->thread_keyring; - atomic_inc(&key->usage); + key = ctx.cred->thread_keyring; + __key_get(key); key_ref = make_key_ref(key, 1); break; case KEY_SPEC_PROCESS_KEYRING: - if (!cred->process_keyring) { + if (!ctx.cred->process_keyring) { if (!(lflags & KEY_LOOKUP_CREATE)) goto error; @@ -564,13 +561,13 @@ try_again: goto reget_creds; } - key = cred->process_keyring; - atomic_inc(&key->usage); + key = ctx.cred->process_keyring; + __key_get(key); key_ref = make_key_ref(key, 1); break; case KEY_SPEC_SESSION_KEYRING: - if (!cred->session_keyring) { + if (!ctx.cred->session_keyring) { /* always install a session keyring upon access if one * doesn't exist yet */ ret = install_user_keyrings(); @@ -580,13 +577,13 @@ try_again: ret = join_session_keyring(NULL); else ret = install_session_keyring( - cred->user->session_keyring); + ctx.cred->user->session_keyring); if (ret < 0) goto error; goto reget_creds; - } else if (cred->session_keyring == - cred->user->session_keyring && + } else if (ctx.cred->session_keyring == + ctx.cred->user->session_keyring && lflags & KEY_LOOKUP_CREATE) { ret = join_session_keyring(NULL); if (ret < 0) @@ -595,33 +592,33 @@ try_again: } rcu_read_lock(); - key = rcu_dereference(cred->session_keyring); - atomic_inc(&key->usage); + key = rcu_dereference(ctx.cred->session_keyring); + __key_get(key); rcu_read_unlock(); key_ref = make_key_ref(key, 1); break; case KEY_SPEC_USER_KEYRING: - if (!cred->user->uid_keyring) { + if (!ctx.cred->user->uid_keyring) { ret = install_user_keyrings(); if (ret < 0) goto error; } - key = cred->user->uid_keyring; - atomic_inc(&key->usage); + key = ctx.cred->user->uid_keyring; + __key_get(key); key_ref = make_key_ref(key, 1); break; case KEY_SPEC_USER_SESSION_KEYRING: - if (!cred->user->session_keyring) { + if (!ctx.cred->user->session_keyring) { ret = install_user_keyrings(); if (ret < 0) goto error; } - key = cred->user->session_keyring; - atomic_inc(&key->usage); + key = ctx.cred->user->session_keyring; + __key_get(key); key_ref = make_key_ref(key, 1); break; @@ -631,29 +628,29 @@ try_again: goto error; case KEY_SPEC_REQKEY_AUTH_KEY: - key = cred->request_key_auth; + key = ctx.cred->request_key_auth; if (!key) goto error; - atomic_inc(&key->usage); + __key_get(key); key_ref = make_key_ref(key, 1); break; case KEY_SPEC_REQUESTOR_KEYRING: - if (!cred->request_key_auth) + if (!ctx.cred->request_key_auth) goto error; - down_read(&cred->request_key_auth->sem); + down_read(&ctx.cred->request_key_auth->sem); if (test_bit(KEY_FLAG_REVOKED, - &cred->request_key_auth->flags)) { + &ctx.cred->request_key_auth->flags)) { key_ref = ERR_PTR(-EKEYREVOKED); key = NULL; } else { - rka = cred->request_key_auth->payload.data; + rka = ctx.cred->request_key_auth->payload.data; key = rka->dest_keyring; - atomic_inc(&key->usage); + __key_get(key); } - up_read(&cred->request_key_auth->sem); + up_read(&ctx.cred->request_key_auth->sem); if (!key) goto error; key_ref = make_key_ref(key, 1); @@ -673,9 +670,13 @@ try_again: key_ref = make_key_ref(key, 0); /* check to see if we possess the key */ - skey_ref = search_process_keyrings(key->type, key, - lookup_user_key_possessed, - cred); + ctx.index_key.type = key->type; + ctx.index_key.description = key->description; + ctx.index_key.desc_len = strlen(key->description); + ctx.match_data = key; + kdebug("check possessed"); + skey_ref = search_process_keyrings(&ctx); + kdebug("possessed=%p", skey_ref); if (!IS_ERR(skey_ref)) { key_put(key); @@ -715,14 +716,14 @@ try_again: goto invalid_key; /* check the permissions */ - ret = key_task_permission(key_ref, cred, perm); + ret = key_task_permission(key_ref, ctx.cred, perm); if (ret < 0) goto invalid_key; key->last_used_at = current_kernel_time().tv_sec; error: - put_cred(cred); + put_cred(ctx.cred); return key_ref; invalid_key: @@ -733,7 +734,7 @@ invalid_key: /* if we attempted to install a keyring, then it may have caused new * creds to be installed */ reget_creds: - put_cred(cred); + put_cred(ctx.cred); goto try_again; } @@ -856,3 +857,13 @@ void key_change_session_keyring(struct callback_head *twork) commit_creds(new); } + +/* + * Make sure that root's user and user-session keyrings exist. + */ +static int __init init_root_keyring(void) +{ + return install_user_keyrings(); +} + +late_initcall(init_root_keyring); diff --git a/security/keys/request_key.c b/security/keys/request_key.c index c411f9b..3814119 100644 --- a/security/keys/request_key.c +++ b/security/keys/request_key.c @@ -345,33 +345,34 @@ static void construct_get_dest_keyring(struct key **_dest_keyring) * May return a key that's already under construction instead if there was a * race between two thread calling request_key(). */ -static int construct_alloc_key(struct key_type *type, - const char *description, +static int construct_alloc_key(struct keyring_search_context *ctx, struct key *dest_keyring, unsigned long flags, struct key_user *user, struct key **_key) { - const struct cred *cred = current_cred(); - unsigned long prealloc; + struct assoc_array_edit *edit; struct key *key; key_perm_t perm; key_ref_t key_ref; int ret; - kenter("%s,%s,,,", type->name, description); + kenter("%s,%s,,,", + ctx->index_key.type->name, ctx->index_key.description); *_key = NULL; mutex_lock(&user->cons_lock); perm = KEY_POS_VIEW | KEY_POS_SEARCH | KEY_POS_LINK | KEY_POS_SETATTR; perm |= KEY_USR_VIEW; - if (type->read) + if (ctx->index_key.type->read) perm |= KEY_POS_READ; - if (type == &key_type_keyring || type->update) + if (ctx->index_key.type == &key_type_keyring || + ctx->index_key.type->update) perm |= KEY_POS_WRITE; - key = key_alloc(type, description, cred->fsuid, cred->fsgid, cred, + key = key_alloc(ctx->index_key.type, ctx->index_key.description, + ctx->cred->fsuid, ctx->cred->fsgid, ctx->cred, perm, flags); if (IS_ERR(key)) goto alloc_failed; @@ -379,8 +380,7 @@ static int construct_alloc_key(struct key_type *type, set_bit(KEY_FLAG_USER_CONSTRUCT, &key->flags); if (dest_keyring) { - ret = __key_link_begin(dest_keyring, type, description, - &prealloc); + ret = __key_link_begin(dest_keyring, &ctx->index_key, &edit); if (ret < 0) goto link_prealloc_failed; } @@ -390,16 +390,16 @@ static int construct_alloc_key(struct key_type *type, * waited for locks */ mutex_lock(&key_construction_mutex); - key_ref = search_process_keyrings(type, description, type->match, cred); + key_ref = search_process_keyrings(ctx); if (!IS_ERR(key_ref)) goto key_already_present; if (dest_keyring) - __key_link(dest_keyring, key, &prealloc); + __key_link(key, &edit); mutex_unlock(&key_construction_mutex); if (dest_keyring) - __key_link_end(dest_keyring, type, prealloc); + __key_link_end(dest_keyring, &ctx->index_key, edit); mutex_unlock(&user->cons_lock); *_key = key; kleave(" = 0 [%d]", key_serial(key)); @@ -414,8 +414,8 @@ key_already_present: if (dest_keyring) { ret = __key_link_check_live_key(dest_keyring, key); if (ret == 0) - __key_link(dest_keyring, key, &prealloc); - __key_link_end(dest_keyring, type, prealloc); + __key_link(key, &edit); + __key_link_end(dest_keyring, &ctx->index_key, edit); if (ret < 0) goto link_check_failed; } @@ -444,8 +444,7 @@ alloc_failed: /* * Commence key construction. */ -static struct key *construct_key_and_link(struct key_type *type, - const char *description, +static struct key *construct_key_and_link(struct keyring_search_context *ctx, const char *callout_info, size_t callout_len, void *aux, @@ -464,8 +463,7 @@ static struct key *construct_key_and_link(struct key_type *type, construct_get_dest_keyring(&dest_keyring); - ret = construct_alloc_key(type, description, dest_keyring, flags, user, - &key); + ret = construct_alloc_key(ctx, dest_keyring, flags, user, &key); key_user_put(user); if (ret == 0) { @@ -529,17 +527,24 @@ struct key *request_key_and_link(struct key_type *type, struct key *dest_keyring, unsigned long flags) { - const struct cred *cred = current_cred(); + struct keyring_search_context ctx = { + .index_key.type = type, + .index_key.description = description, + .cred = current_cred(), + .match = type->match, + .match_data = description, + .flags = KEYRING_SEARCH_LOOKUP_DIRECT, + }; struct key *key; key_ref_t key_ref; int ret; kenter("%s,%s,%p,%zu,%p,%p,%lx", - type->name, description, callout_info, callout_len, aux, - dest_keyring, flags); + ctx.index_key.type->name, ctx.index_key.description, + callout_info, callout_len, aux, dest_keyring, flags); /* search all the process keyrings for a key */ - key_ref = search_process_keyrings(type, description, type->match, cred); + key_ref = search_process_keyrings(&ctx); if (!IS_ERR(key_ref)) { key = key_ref_to_ptr(key_ref); @@ -562,9 +567,8 @@ struct key *request_key_and_link(struct key_type *type, if (!callout_info) goto error; - key = construct_key_and_link(type, description, callout_info, - callout_len, aux, dest_keyring, - flags); + key = construct_key_and_link(&ctx, callout_info, callout_len, + aux, dest_keyring, flags); } error: @@ -592,8 +596,10 @@ int wait_for_key_construction(struct key *key, bool intr) intr ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE); if (ret < 0) return ret; - if (test_bit(KEY_FLAG_NEGATIVE, &key->flags)) + if (test_bit(KEY_FLAG_NEGATIVE, &key->flags)) { + smp_rmb(); return key->type_data.reject_error; + } return key_validate(key); } EXPORT_SYMBOL(wait_for_key_construction); diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c index 85730d5..7495a93 100644 --- a/security/keys/request_key_auth.c +++ b/security/keys/request_key_auth.c @@ -18,6 +18,7 @@ #include <linux/slab.h> #include <asm/uaccess.h> #include "internal.h" +#include <keys/user-type.h> static int request_key_auth_instantiate(struct key *, struct key_preparsed_payload *); @@ -222,32 +223,26 @@ error_alloc: } /* - * See if an authorisation key is associated with a particular key. - */ -static int key_get_instantiation_authkey_match(const struct key *key, - const void *_id) -{ - struct request_key_auth *rka = key->payload.data; - key_serial_t id = (key_serial_t)(unsigned long) _id; - - return rka->target_key->serial == id; -} - -/* * Search the current process's keyrings for the authorisation key for * instantiation of a key. */ struct key *key_get_instantiation_authkey(key_serial_t target_id) { - const struct cred *cred = current_cred(); + char description[16]; + struct keyring_search_context ctx = { + .index_key.type = &key_type_request_key_auth, + .index_key.description = description, + .cred = current_cred(), + .match = user_match, + .match_data = description, + .flags = KEYRING_SEARCH_LOOKUP_DIRECT, + }; struct key *authkey; key_ref_t authkey_ref; - authkey_ref = search_process_keyrings( - &key_type_request_key_auth, - (void *) (unsigned long) target_id, - key_get_instantiation_authkey_match, - cred); + sprintf(description, "%x", target_id); + + authkey_ref = search_process_keyrings(&ctx); if (IS_ERR(authkey_ref)) { authkey = ERR_CAST(authkey_ref); diff --git a/security/keys/sysctl.c b/security/keys/sysctl.c index ee32d18..8c0af08 100644 --- a/security/keys/sysctl.c +++ b/security/keys/sysctl.c @@ -61,5 +61,16 @@ ctl_table key_sysctls[] = { .extra1 = (void *) &zero, .extra2 = (void *) &max, }, +#ifdef CONFIG_PERSISTENT_KEYRINGS + { + .procname = "persistent_keyring_expiry", + .data = &persistent_keyring_expiry, + .maxlen = sizeof(unsigned), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = (void *) &zero, + .extra2 = (void *) &max, + }, +#endif { } }; diff --git a/security/keys/user_defined.c b/security/keys/user_defined.c index 55dc889..faa2cae 100644 --- a/security/keys/user_defined.c +++ b/security/keys/user_defined.c @@ -25,14 +25,15 @@ static int logon_vet_description(const char *desc); * arbitrary blob of data as the payload */ struct key_type key_type_user = { - .name = "user", - .instantiate = user_instantiate, - .update = user_update, - .match = user_match, - .revoke = user_revoke, - .destroy = user_destroy, - .describe = user_describe, - .read = user_read, + .name = "user", + .def_lookup_type = KEYRING_SEARCH_LOOKUP_DIRECT, + .instantiate = user_instantiate, + .update = user_update, + .match = user_match, + .revoke = user_revoke, + .destroy = user_destroy, + .describe = user_describe, + .read = user_read, }; EXPORT_SYMBOL_GPL(key_type_user); @@ -45,6 +46,7 @@ EXPORT_SYMBOL_GPL(key_type_user); */ struct key_type key_type_logon = { .name = "logon", + .def_lookup_type = KEYRING_SEARCH_LOOKUP_DIRECT, .instantiate = user_instantiate, .update = user_update, .match = user_match, diff --git a/security/lsm_audit.c b/security/lsm_audit.c index 234bc2a..9a62045 100644 --- a/security/lsm_audit.c +++ b/security/lsm_audit.c @@ -397,7 +397,8 @@ void common_lsm_audit(struct common_audit_data *a, if (a == NULL) return; /* we use GFP_ATOMIC so we won't sleep */ - ab = audit_log_start(current->audit_context, GFP_ATOMIC, AUDIT_AVC); + ab = audit_log_start(current->audit_context, GFP_ATOMIC | __GFP_NOWARN, + AUDIT_AVC); if (ab == NULL) return; diff --git a/security/security.c b/security/security.c index 4dc31f4..15b6928 100644 --- a/security/security.c +++ b/security/security.c @@ -1340,22 +1340,17 @@ int security_xfrm_policy_delete(struct xfrm_sec_ctx *ctx) return security_ops->xfrm_policy_delete_security(ctx); } -int security_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *sec_ctx) +int security_xfrm_state_alloc(struct xfrm_state *x, + struct xfrm_user_sec_ctx *sec_ctx) { - return security_ops->xfrm_state_alloc_security(x, sec_ctx, 0); + return security_ops->xfrm_state_alloc(x, sec_ctx); } EXPORT_SYMBOL(security_xfrm_state_alloc); int security_xfrm_state_alloc_acquire(struct xfrm_state *x, struct xfrm_sec_ctx *polsec, u32 secid) { - if (!polsec) - return 0; - /* - * We want the context to be taken from secid which is usually - * from the sock. - */ - return security_ops->xfrm_state_alloc_security(x, NULL, secid); + return security_ops->xfrm_state_alloc_acquire(x, polsec, secid); } int security_xfrm_state_delete(struct xfrm_state *x) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index c540795..794c3ca 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -95,7 +95,9 @@ #include "audit.h" #include "avc_ss.h" -#define NUM_SEL_MNT_OPTS 5 +#define SB_TYPE_FMT "%s%s%s" +#define SB_SUBTYPE(sb) (sb->s_subtype && sb->s_subtype[0]) +#define SB_TYPE_ARGS(sb) sb->s_type->name, SB_SUBTYPE(sb) ? "." : "", SB_SUBTYPE(sb) ? sb->s_subtype : "" extern struct security_operations *security_ops; @@ -139,12 +141,28 @@ static struct kmem_cache *sel_inode_cache; * This function checks the SECMARK reference counter to see if any SECMARK * targets are currently configured, if the reference counter is greater than * zero SECMARK is considered to be enabled. Returns true (1) if SECMARK is - * enabled, false (0) if SECMARK is disabled. + * enabled, false (0) if SECMARK is disabled. If the always_check_network + * policy capability is enabled, SECMARK is always considered enabled. * */ static int selinux_secmark_enabled(void) { - return (atomic_read(&selinux_secmark_refcount) > 0); + return (selinux_policycap_alwaysnetwork || atomic_read(&selinux_secmark_refcount)); +} + +/** + * selinux_peerlbl_enabled - Check to see if peer labeling is currently enabled + * + * Description: + * This function checks if NetLabel or labeled IPSEC is enabled. Returns true + * (1) if any are enabled or false (0) if neither are enabled. If the + * always_check_network policy capability is enabled, peer labeling + * is always considered enabled. + * + */ +static int selinux_peerlbl_enabled(void) +{ + return (selinux_policycap_alwaysnetwork || netlbl_enabled() || selinux_xfrm_enabled()); } /* @@ -309,8 +327,11 @@ enum { Opt_defcontext = 3, Opt_rootcontext = 4, Opt_labelsupport = 5, + Opt_nextmntopt = 6, }; +#define NUM_SEL_MNT_OPTS (Opt_nextmntopt - 1) + static const match_table_t tokens = { {Opt_context, CONTEXT_STR "%s"}, {Opt_fscontext, FSCONTEXT_STR "%s"}, @@ -355,6 +376,29 @@ static int may_context_mount_inode_relabel(u32 sid, return rc; } +static int selinux_is_sblabel_mnt(struct super_block *sb) +{ + struct superblock_security_struct *sbsec = sb->s_security; + + if (sbsec->behavior == SECURITY_FS_USE_XATTR || + sbsec->behavior == SECURITY_FS_USE_TRANS || + sbsec->behavior == SECURITY_FS_USE_TASK) + return 1; + + /* Special handling for sysfs. Is genfs but also has setxattr handler*/ + if (strncmp(sb->s_type->name, "sysfs", sizeof("sysfs")) == 0) + return 1; + + /* + * Special handling for rootfs. Is genfs but supports + * setting SELinux context on in-core inodes. + */ + if (strncmp(sb->s_type->name, "rootfs", sizeof("rootfs")) == 0) + return 1; + + return 0; +} + static int sb_finish_set_opts(struct super_block *sb) { struct superblock_security_struct *sbsec = sb->s_security; @@ -369,8 +413,8 @@ static int sb_finish_set_opts(struct super_block *sb) the first boot of the SELinux kernel before we have assigned xattr values to the filesystem. */ if (!root_inode->i_op->getxattr) { - printk(KERN_WARNING "SELinux: (dev %s, type %s) has no " - "xattr support\n", sb->s_id, sb->s_type->name); + printk(KERN_WARNING "SELinux: (dev %s, type "SB_TYPE_FMT") has no " + "xattr support\n", sb->s_id, SB_TYPE_ARGS(sb)); rc = -EOPNOTSUPP; goto out; } @@ -378,35 +422,27 @@ static int sb_finish_set_opts(struct super_block *sb) if (rc < 0 && rc != -ENODATA) { if (rc == -EOPNOTSUPP) printk(KERN_WARNING "SELinux: (dev %s, type " - "%s) has no security xattr handler\n", - sb->s_id, sb->s_type->name); + SB_TYPE_FMT") has no security xattr handler\n", + sb->s_id, SB_TYPE_ARGS(sb)); else printk(KERN_WARNING "SELinux: (dev %s, type " - "%s) getxattr errno %d\n", sb->s_id, - sb->s_type->name, -rc); + SB_TYPE_FMT") getxattr errno %d\n", sb->s_id, + SB_TYPE_ARGS(sb), -rc); goto out; } } - sbsec->flags |= (SE_SBINITIALIZED | SE_SBLABELSUPP); - if (sbsec->behavior > ARRAY_SIZE(labeling_behaviors)) - printk(KERN_ERR "SELinux: initialized (dev %s, type %s), unknown behavior\n", - sb->s_id, sb->s_type->name); + printk(KERN_ERR "SELinux: initialized (dev %s, type "SB_TYPE_FMT"), unknown behavior\n", + sb->s_id, SB_TYPE_ARGS(sb)); else - printk(KERN_DEBUG "SELinux: initialized (dev %s, type %s), %s\n", - sb->s_id, sb->s_type->name, + printk(KERN_DEBUG "SELinux: initialized (dev %s, type "SB_TYPE_FMT"), %s\n", + sb->s_id, SB_TYPE_ARGS(sb), labeling_behaviors[sbsec->behavior-1]); - if (sbsec->behavior == SECURITY_FS_USE_GENFS || - sbsec->behavior == SECURITY_FS_USE_MNTPOINT || - sbsec->behavior == SECURITY_FS_USE_NONE || - sbsec->behavior > ARRAY_SIZE(labeling_behaviors)) - sbsec->flags &= ~SE_SBLABELSUPP; - - /* Special handling for sysfs. Is genfs but also has setxattr handler*/ - if (strncmp(sb->s_type->name, "sysfs", sizeof("sysfs")) == 0) - sbsec->flags |= SE_SBLABELSUPP; + sbsec->flags |= SE_SBINITIALIZED; + if (selinux_is_sblabel_mnt(sb)) + sbsec->flags |= SBLABEL_MNT; /* Initialize the root inode. */ rc = inode_doinit_with_dentry(root_inode, root); @@ -460,15 +496,18 @@ static int selinux_get_mnt_opts(const struct super_block *sb, if (!ss_initialized) return -EINVAL; + /* make sure we always check enough bits to cover the mask */ + BUILD_BUG_ON(SE_MNTMASK >= (1 << NUM_SEL_MNT_OPTS)); + tmp = sbsec->flags & SE_MNTMASK; /* count the number of mount options for this sb */ - for (i = 0; i < 8; i++) { + for (i = 0; i < NUM_SEL_MNT_OPTS; i++) { if (tmp & 0x01) opts->num_mnt_opts++; tmp >>= 1; } /* Check if the Label support flag is set */ - if (sbsec->flags & SE_SBLABELSUPP) + if (sbsec->flags & SBLABEL_MNT) opts->num_mnt_opts++; opts->mnt_opts = kcalloc(opts->num_mnt_opts, sizeof(char *), GFP_ATOMIC); @@ -515,9 +554,9 @@ static int selinux_get_mnt_opts(const struct super_block *sb, opts->mnt_opts[i] = context; opts->mnt_opts_flags[i++] = ROOTCONTEXT_MNT; } - if (sbsec->flags & SE_SBLABELSUPP) { + if (sbsec->flags & SBLABEL_MNT) { opts->mnt_opts[i] = NULL; - opts->mnt_opts_flags[i++] = SE_SBLABELSUPP; + opts->mnt_opts_flags[i++] = SBLABEL_MNT; } BUG_ON(i != opts->num_mnt_opts); @@ -561,7 +600,6 @@ static int selinux_set_mnt_opts(struct super_block *sb, const struct cred *cred = current_cred(); int rc = 0, i; struct superblock_security_struct *sbsec = sb->s_security; - const char *name = sb->s_type->name; struct inode *inode = sbsec->sb->s_root->d_inode; struct inode_security_struct *root_isec = inode->i_security; u32 fscontext_sid = 0, context_sid = 0, rootcontext_sid = 0; @@ -614,14 +652,14 @@ static int selinux_set_mnt_opts(struct super_block *sb, for (i = 0; i < num_opts; i++) { u32 sid; - if (flags[i] == SE_SBLABELSUPP) + if (flags[i] == SBLABEL_MNT) continue; rc = security_context_to_sid(mount_options[i], strlen(mount_options[i]), &sid); if (rc) { printk(KERN_WARNING "SELinux: security_context_to_sid" - "(%s) failed for (dev %s, type %s) errno=%d\n", - mount_options[i], sb->s_id, name, rc); + "(%s) failed for (dev %s, type "SB_TYPE_FMT") errno=%d\n", + mount_options[i], sb->s_id, SB_TYPE_ARGS(sb), rc); goto out; } switch (flags[i]) { @@ -685,9 +723,7 @@ static int selinux_set_mnt_opts(struct super_block *sb, * Determine the labeling behavior to use for this * filesystem type. */ - rc = security_fs_use((sbsec->flags & SE_SBPROC) ? - "proc" : sb->s_type->name, - &sbsec->behavior, &sbsec->sid); + rc = security_fs_use(sb); if (rc) { printk(KERN_WARNING "%s: security_fs_use(%s) returned %d\n", @@ -770,7 +806,8 @@ out: out_double_mount: rc = -EINVAL; printk(KERN_WARNING "SELinux: mount invalid. Same superblock, different " - "security settings for (dev %s, type %s)\n", sb->s_id, name); + "security settings for (dev %s, type "SB_TYPE_FMT")\n", sb->s_id, + SB_TYPE_ARGS(sb)); goto out; } @@ -1037,7 +1074,7 @@ static void selinux_write_opts(struct seq_file *m, case DEFCONTEXT_MNT: prefix = DEFCONTEXT_STR; break; - case SE_SBLABELSUPP: + case SBLABEL_MNT: seq_putc(m, ','); seq_puts(m, LABELSUPP_STR); continue; @@ -1649,7 +1686,7 @@ static int may_create(struct inode *dir, if (rc) return rc; - if (!newsid || !(sbsec->flags & SE_SBLABELSUPP)) { + if (!newsid || !(sbsec->flags & SBLABEL_MNT)) { rc = security_transition_sid(sid, dsec->sid, tclass, &dentry->d_name, &newsid); if (rc) @@ -2437,14 +2474,14 @@ static int selinux_sb_remount(struct super_block *sb, void *data) u32 sid; size_t len; - if (flags[i] == SE_SBLABELSUPP) + if (flags[i] == SBLABEL_MNT) continue; len = strlen(mount_options[i]); rc = security_context_to_sid(mount_options[i], len, &sid); if (rc) { printk(KERN_WARNING "SELinux: security_context_to_sid" - "(%s) failed for (dev %s, type %s) errno=%d\n", - mount_options[i], sb->s_id, sb->s_type->name, rc); + "(%s) failed for (dev %s, type "SB_TYPE_FMT") errno=%d\n", + mount_options[i], sb->s_id, SB_TYPE_ARGS(sb), rc); goto out_free_opts; } rc = -EINVAL; @@ -2482,8 +2519,8 @@ out_free_secdata: return rc; out_bad_option: printk(KERN_WARNING "SELinux: unable to change security options " - "during remount (dev %s, type=%s)\n", sb->s_id, - sb->s_type->name); + "during remount (dev %s, type "SB_TYPE_FMT")\n", sb->s_id, + SB_TYPE_ARGS(sb)); goto out_free_opts; } @@ -2606,7 +2643,7 @@ static int selinux_inode_init_security(struct inode *inode, struct inode *dir, if ((sbsec->flags & SE_SBINITIALIZED) && (sbsec->behavior == SECURITY_FS_USE_MNTPOINT)) newsid = sbsec->mntpoint_sid; - else if (!newsid || !(sbsec->flags & SE_SBLABELSUPP)) { + else if (!newsid || !(sbsec->flags & SBLABEL_MNT)) { rc = security_transition_sid(sid, dsec->sid, inode_mode_to_security_class(inode->i_mode), qstr, &newsid); @@ -2628,7 +2665,7 @@ static int selinux_inode_init_security(struct inode *inode, struct inode *dir, isec->initialized = 1; } - if (!ss_initialized || !(sbsec->flags & SE_SBLABELSUPP)) + if (!ss_initialized || !(sbsec->flags & SBLABEL_MNT)) return -EOPNOTSUPP; if (name) @@ -2830,7 +2867,7 @@ static int selinux_inode_setxattr(struct dentry *dentry, const char *name, return selinux_inode_setotherxattr(dentry, name); sbsec = inode->i_sb->s_security; - if (!(sbsec->flags & SE_SBLABELSUPP)) + if (!(sbsec->flags & SBLABEL_MNT)) return -EOPNOTSUPP; if (!inode_owner_or_capable(inode)) @@ -3791,8 +3828,12 @@ static int selinux_skb_peerlbl_sid(struct sk_buff *skb, u16 family, u32 *sid) u32 nlbl_sid; u32 nlbl_type; - selinux_skb_xfrm_sid(skb, &xfrm_sid); - selinux_netlbl_skbuff_getsid(skb, family, &nlbl_type, &nlbl_sid); + err = selinux_skb_xfrm_sid(skb, &xfrm_sid); + if (unlikely(err)) + return -EACCES; + err = selinux_netlbl_skbuff_getsid(skb, family, &nlbl_type, &nlbl_sid); + if (unlikely(err)) + return -EACCES; err = security_net_peersid_resolve(nlbl_sid, nlbl_type, xfrm_sid, sid); if (unlikely(err)) { @@ -4246,7 +4287,7 @@ static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) return selinux_sock_rcv_skb_compat(sk, skb, family); secmark_active = selinux_secmark_enabled(); - peerlbl_active = netlbl_enabled() || selinux_xfrm_enabled(); + peerlbl_active = selinux_peerlbl_enabled(); if (!secmark_active && !peerlbl_active) return 0; @@ -4628,7 +4669,7 @@ static unsigned int selinux_ip_forward(struct sk_buff *skb, int ifindex, secmark_active = selinux_secmark_enabled(); netlbl_active = netlbl_enabled(); - peerlbl_active = netlbl_active || selinux_xfrm_enabled(); + peerlbl_active = selinux_peerlbl_enabled(); if (!secmark_active && !peerlbl_active) return NF_ACCEPT; @@ -4780,7 +4821,7 @@ static unsigned int selinux_ip_postroute(struct sk_buff *skb, int ifindex, return NF_ACCEPT; #endif secmark_active = selinux_secmark_enabled(); - peerlbl_active = netlbl_enabled() || selinux_xfrm_enabled(); + peerlbl_active = selinux_peerlbl_enabled(); if (!secmark_active && !peerlbl_active) return NF_ACCEPT; @@ -5784,7 +5825,8 @@ static struct security_operations selinux_ops = { .xfrm_policy_clone_security = selinux_xfrm_policy_clone, .xfrm_policy_free_security = selinux_xfrm_policy_free, .xfrm_policy_delete_security = selinux_xfrm_policy_delete, - .xfrm_state_alloc_security = selinux_xfrm_state_alloc, + .xfrm_state_alloc = selinux_xfrm_state_alloc, + .xfrm_state_alloc_acquire = selinux_xfrm_state_alloc_acquire, .xfrm_state_free_security = selinux_xfrm_state_free, .xfrm_state_delete_security = selinux_xfrm_state_delete, .xfrm_policy_lookup = selinux_xfrm_policy_lookup, diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h index aa47bca..b1dfe10 100644 --- a/security/selinux/include/objsec.h +++ b/security/selinux/include/objsec.h @@ -58,8 +58,8 @@ struct superblock_security_struct { u32 sid; /* SID of file system superblock */ u32 def_sid; /* default SID for labeling */ u32 mntpoint_sid; /* SECURITY_FS_USE_MNTPOINT context for files */ - unsigned int behavior; /* labeling behavior */ - unsigned char flags; /* which mount options were specified */ + unsigned short behavior; /* labeling behavior */ + unsigned short flags; /* which mount options were specified */ struct mutex lock; struct list_head isec_head; spinlock_t isec_lock; diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h index 8fd8e18..fe341ae 100644 --- a/security/selinux/include/security.h +++ b/security/selinux/include/security.h @@ -45,14 +45,15 @@ /* Mask for just the mount related flags */ #define SE_MNTMASK 0x0f /* Super block security struct flags for mount options */ +/* BE CAREFUL, these need to be the low order bits for selinux_get_mnt_opts */ #define CONTEXT_MNT 0x01 #define FSCONTEXT_MNT 0x02 #define ROOTCONTEXT_MNT 0x04 #define DEFCONTEXT_MNT 0x08 +#define SBLABEL_MNT 0x10 /* Non-mount related flags */ -#define SE_SBINITIALIZED 0x10 -#define SE_SBPROC 0x20 -#define SE_SBLABELSUPP 0x40 +#define SE_SBINITIALIZED 0x0100 +#define SE_SBPROC 0x0200 #define CONTEXT_STR "context=" #define FSCONTEXT_STR "fscontext=" @@ -68,12 +69,15 @@ extern int selinux_enabled; enum { POLICYDB_CAPABILITY_NETPEER, POLICYDB_CAPABILITY_OPENPERM, + POLICYDB_CAPABILITY_REDHAT1, + POLICYDB_CAPABILITY_ALWAYSNETWORK, __POLICYDB_CAPABILITY_MAX }; #define POLICYDB_CAPABILITY_MAX (__POLICYDB_CAPABILITY_MAX - 1) extern int selinux_policycap_netpeer; extern int selinux_policycap_openperm; +extern int selinux_policycap_alwaysnetwork; /* * type_datum properties @@ -172,8 +176,7 @@ int security_get_allow_unknown(void); #define SECURITY_FS_USE_NATIVE 7 /* use native label support */ #define SECURITY_FS_USE_MAX 7 /* Highest SECURITY_FS_USE_XXX */ -int security_fs_use(const char *fstype, unsigned int *behavior, - u32 *sid); +int security_fs_use(struct super_block *sb); int security_genfs_sid(const char *fstype, char *name, u16 sclass, u32 *sid); diff --git a/security/selinux/include/xfrm.h b/security/selinux/include/xfrm.h index 6713f04..0dec76c 100644 --- a/security/selinux/include/xfrm.h +++ b/security/selinux/include/xfrm.h @@ -10,29 +10,21 @@ #include <net/flow.h> int selinux_xfrm_policy_alloc(struct xfrm_sec_ctx **ctxp, - struct xfrm_user_sec_ctx *sec_ctx); + struct xfrm_user_sec_ctx *uctx); int selinux_xfrm_policy_clone(struct xfrm_sec_ctx *old_ctx, struct xfrm_sec_ctx **new_ctxp); void selinux_xfrm_policy_free(struct xfrm_sec_ctx *ctx); int selinux_xfrm_policy_delete(struct xfrm_sec_ctx *ctx); int selinux_xfrm_state_alloc(struct xfrm_state *x, - struct xfrm_user_sec_ctx *sec_ctx, u32 secid); + struct xfrm_user_sec_ctx *uctx); +int selinux_xfrm_state_alloc_acquire(struct xfrm_state *x, + struct xfrm_sec_ctx *polsec, u32 secid); void selinux_xfrm_state_free(struct xfrm_state *x); int selinux_xfrm_state_delete(struct xfrm_state *x); int selinux_xfrm_policy_lookup(struct xfrm_sec_ctx *ctx, u32 fl_secid, u8 dir); int selinux_xfrm_state_pol_flow_match(struct xfrm_state *x, - struct xfrm_policy *xp, const struct flowi *fl); - -/* - * Extract the security blob from the sock (it's actually on the socket) - */ -static inline struct inode_security_struct *get_sock_isec(struct sock *sk) -{ - if (!sk->sk_socket) - return NULL; - - return SOCK_INODE(sk->sk_socket)->i_security; -} + struct xfrm_policy *xp, + const struct flowi *fl); #ifdef CONFIG_SECURITY_NETWORK_XFRM extern atomic_t selinux_xfrm_refcount; @@ -42,10 +34,10 @@ static inline int selinux_xfrm_enabled(void) return (atomic_read(&selinux_xfrm_refcount) > 0); } -int selinux_xfrm_sock_rcv_skb(u32 sid, struct sk_buff *skb, - struct common_audit_data *ad); -int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb, - struct common_audit_data *ad, u8 proto); +int selinux_xfrm_sock_rcv_skb(u32 sk_sid, struct sk_buff *skb, + struct common_audit_data *ad); +int selinux_xfrm_postroute_last(u32 sk_sid, struct sk_buff *skb, + struct common_audit_data *ad, u8 proto); int selinux_xfrm_decode_session(struct sk_buff *skb, u32 *sid, int ckall); static inline void selinux_xfrm_notify_policyload(void) @@ -64,19 +56,21 @@ static inline int selinux_xfrm_enabled(void) return 0; } -static inline int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb, - struct common_audit_data *ad) +static inline int selinux_xfrm_sock_rcv_skb(u32 sk_sid, struct sk_buff *skb, + struct common_audit_data *ad) { return 0; } -static inline int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb, - struct common_audit_data *ad, u8 proto) +static inline int selinux_xfrm_postroute_last(u32 sk_sid, struct sk_buff *skb, + struct common_audit_data *ad, + u8 proto) { return 0; } -static inline int selinux_xfrm_decode_session(struct sk_buff *skb, u32 *sid, int ckall) +static inline int selinux_xfrm_decode_session(struct sk_buff *skb, u32 *sid, + int ckall) { *sid = SECSID_NULL; return 0; @@ -87,10 +81,9 @@ static inline void selinux_xfrm_notify_policyload(void) } #endif -static inline void selinux_skb_xfrm_sid(struct sk_buff *skb, u32 *sid) +static inline int selinux_skb_xfrm_sid(struct sk_buff *skb, u32 *sid) { - int err = selinux_xfrm_decode_session(skb, sid, 0); - BUG_ON(err); + return selinux_xfrm_decode_session(skb, sid, 0); } #endif /* _SELINUX_XFRM_H_ */ diff --git a/security/selinux/netlabel.c b/security/selinux/netlabel.c index da4b8b2..6235d05 100644 --- a/security/selinux/netlabel.c +++ b/security/selinux/netlabel.c @@ -442,8 +442,7 @@ int selinux_netlbl_socket_connect(struct sock *sk, struct sockaddr *addr) sksec->nlbl_state != NLBL_CONNLABELED) return 0; - local_bh_disable(); - bh_lock_sock_nested(sk); + lock_sock(sk); /* connected sockets are allowed to disconnect when the address family * is set to AF_UNSPEC, if that is what is happening we want to reset @@ -464,7 +463,6 @@ int selinux_netlbl_socket_connect(struct sock *sk, struct sockaddr *addr) sksec->nlbl_state = NLBL_CONNLABELED; socket_connect_return: - bh_unlock_sock(sk); - local_bh_enable(); + release_sock(sk); return rc; } diff --git a/security/selinux/netnode.c b/security/selinux/netnode.c index c5454c0..03a72c3 100644 --- a/security/selinux/netnode.c +++ b/security/selinux/netnode.c @@ -166,6 +166,7 @@ static void sel_netnode_insert(struct sel_netnode *node) break; default: BUG(); + return; } /* we need to impose a limit on the growth of the hash table so check @@ -225,6 +226,7 @@ static int sel_netnode_sid_slow(void *addr, u16 family, u32 *sid) break; default: BUG(); + ret = -EINVAL; } if (ret != 0) goto out; diff --git a/security/selinux/nlmsgtab.c b/security/selinux/nlmsgtab.c index 855e464..332ac8a 100644 --- a/security/selinux/nlmsgtab.c +++ b/security/selinux/nlmsgtab.c @@ -116,6 +116,8 @@ static struct nlmsg_perm nlmsg_audit_perms[] = { AUDIT_MAKE_EQUIV, NETLINK_AUDIT_SOCKET__NLMSG_WRITE }, { AUDIT_TTY_GET, NETLINK_AUDIT_SOCKET__NLMSG_READ }, { AUDIT_TTY_SET, NETLINK_AUDIT_SOCKET__NLMSG_TTY_AUDIT }, + { AUDIT_GET_FEATURE, NETLINK_AUDIT_SOCKET__NLMSG_READ }, + { AUDIT_SET_FEATURE, NETLINK_AUDIT_SOCKET__NLMSG_WRITE }, }; diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index ff42773..5122aff 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -44,7 +44,9 @@ /* Policy capability filenames */ static char *policycap_names[] = { "network_peer_controls", - "open_perms" + "open_perms", + "redhat1", + "always_check_network" }; unsigned int selinux_checkreqprot = CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE; diff --git a/security/selinux/ss/ebitmap.c b/security/selinux/ss/ebitmap.c index 30f119b..820313a 100644 --- a/security/selinux/ss/ebitmap.c +++ b/security/selinux/ss/ebitmap.c @@ -213,7 +213,12 @@ netlbl_import_failure: } #endif /* CONFIG_NETLABEL */ -int ebitmap_contains(struct ebitmap *e1, struct ebitmap *e2) +/* + * Check to see if all the bits set in e2 are also set in e1. Optionally, + * if last_e2bit is non-zero, the highest set bit in e2 cannot exceed + * last_e2bit. + */ +int ebitmap_contains(struct ebitmap *e1, struct ebitmap *e2, u32 last_e2bit) { struct ebitmap_node *n1, *n2; int i; @@ -223,14 +228,25 @@ int ebitmap_contains(struct ebitmap *e1, struct ebitmap *e2) n1 = e1->node; n2 = e2->node; + while (n1 && n2 && (n1->startbit <= n2->startbit)) { if (n1->startbit < n2->startbit) { n1 = n1->next; continue; } - for (i = 0; i < EBITMAP_UNIT_NUMS; i++) { + for (i = EBITMAP_UNIT_NUMS - 1; (i >= 0) && !n2->maps[i]; ) + i--; /* Skip trailing NULL map entries */ + if (last_e2bit && (i >= 0)) { + u32 lastsetbit = n2->startbit + i * EBITMAP_UNIT_SIZE + + __fls(n2->maps[i]); + if (lastsetbit > last_e2bit) + return 0; + } + + while (i >= 0) { if ((n1->maps[i] & n2->maps[i]) != n2->maps[i]) return 0; + i--; } n1 = n1->next; diff --git a/security/selinux/ss/ebitmap.h b/security/selinux/ss/ebitmap.h index 922f8af..712c8a7 100644 --- a/security/selinux/ss/ebitmap.h +++ b/security/selinux/ss/ebitmap.h @@ -16,7 +16,13 @@ #include <net/netlabel.h> -#define EBITMAP_UNIT_NUMS ((32 - sizeof(void *) - sizeof(u32)) \ +#ifdef CONFIG_64BIT +#define EBITMAP_NODE_SIZE 64 +#else +#define EBITMAP_NODE_SIZE 32 +#endif + +#define EBITMAP_UNIT_NUMS ((EBITMAP_NODE_SIZE-sizeof(void *)-sizeof(u32))\ / sizeof(unsigned long)) #define EBITMAP_UNIT_SIZE BITS_PER_LONG #define EBITMAP_SIZE (EBITMAP_UNIT_NUMS * EBITMAP_UNIT_SIZE) @@ -117,7 +123,7 @@ static inline void ebitmap_node_clr_bit(struct ebitmap_node *n, int ebitmap_cmp(struct ebitmap *e1, struct ebitmap *e2); int ebitmap_cpy(struct ebitmap *dst, struct ebitmap *src); -int ebitmap_contains(struct ebitmap *e1, struct ebitmap *e2); +int ebitmap_contains(struct ebitmap *e1, struct ebitmap *e2, u32 last_e2bit); int ebitmap_get_bit(struct ebitmap *e, unsigned long bit); int ebitmap_set_bit(struct ebitmap *e, unsigned long bit, int value); void ebitmap_destroy(struct ebitmap *e); diff --git a/security/selinux/ss/mls.c b/security/selinux/ss/mls.c index 40de8d3..c85bc1e 100644 --- a/security/selinux/ss/mls.c +++ b/security/selinux/ss/mls.c @@ -160,8 +160,6 @@ void mls_sid_to_context(struct context *context, int mls_level_isvalid(struct policydb *p, struct mls_level *l) { struct level_datum *levdatum; - struct ebitmap_node *node; - int i; if (!l->sens || l->sens > p->p_levels.nprim) return 0; @@ -170,19 +168,13 @@ int mls_level_isvalid(struct policydb *p, struct mls_level *l) if (!levdatum) return 0; - ebitmap_for_each_positive_bit(&l->cat, node, i) { - if (i > p->p_cats.nprim) - return 0; - if (!ebitmap_get_bit(&levdatum->level->cat, i)) { - /* - * Category may not be associated with - * sensitivity. - */ - return 0; - } - } - - return 1; + /* + * Return 1 iff all the bits set in l->cat are also be set in + * levdatum->level->cat and no bit in l->cat is larger than + * p->p_cats.nprim. + */ + return ebitmap_contains(&levdatum->level->cat, &l->cat, + p->p_cats.nprim); } int mls_range_isvalid(struct policydb *p, struct mls_range *r) diff --git a/security/selinux/ss/mls_types.h b/security/selinux/ss/mls_types.h index 03bed52..e936487 100644 --- a/security/selinux/ss/mls_types.h +++ b/security/selinux/ss/mls_types.h @@ -35,7 +35,7 @@ static inline int mls_level_eq(struct mls_level *l1, struct mls_level *l2) static inline int mls_level_dom(struct mls_level *l1, struct mls_level *l2) { return ((l1->sens >= l2->sens) && - ebitmap_contains(&l1->cat, &l2->cat)); + ebitmap_contains(&l1->cat, &l2->cat, 0)); } #define mls_level_incomp(l1, l2) \ diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c index c8adde3..f6195eb 100644 --- a/security/selinux/ss/policydb.c +++ b/security/selinux/ss/policydb.c @@ -3203,9 +3203,8 @@ static int range_write_helper(void *key, void *data, void *ptr) static int range_write(struct policydb *p, void *fp) { - size_t nel; __le32 buf[1]; - int rc; + int rc, nel; struct policy_data pd; pd.p = p; diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index b4feecc..ee470a0 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -72,6 +72,7 @@ int selinux_policycap_netpeer; int selinux_policycap_openperm; +int selinux_policycap_alwaysnetwork; static DEFINE_RWLOCK(policy_rwlock); @@ -1812,6 +1813,8 @@ static void security_load_policycaps(void) POLICYDB_CAPABILITY_NETPEER); selinux_policycap_openperm = ebitmap_get_bit(&policydb.policycaps, POLICYDB_CAPABILITY_OPENPERM); + selinux_policycap_alwaysnetwork = ebitmap_get_bit(&policydb.policycaps, + POLICYDB_CAPABILITY_ALWAYSNETWORK); } static int security_preserve_bools(struct policydb *p); @@ -2323,43 +2326,74 @@ out: /** * security_fs_use - Determine how to handle labeling for a filesystem. - * @fstype: filesystem type - * @behavior: labeling behavior - * @sid: SID for filesystem (superblock) + * @sb: superblock in question */ -int security_fs_use( - const char *fstype, - unsigned int *behavior, - u32 *sid) +int security_fs_use(struct super_block *sb) { int rc = 0; struct ocontext *c; + struct superblock_security_struct *sbsec = sb->s_security; + const char *fstype = sb->s_type->name; + const char *subtype = (sb->s_subtype && sb->s_subtype[0]) ? sb->s_subtype : NULL; + struct ocontext *base = NULL; read_lock(&policy_rwlock); - c = policydb.ocontexts[OCON_FSUSE]; - while (c) { - if (strcmp(fstype, c->u.name) == 0) + for (c = policydb.ocontexts[OCON_FSUSE]; c; c = c->next) { + char *sub; + int baselen; + + baselen = strlen(fstype); + + /* if base does not match, this is not the one */ + if (strncmp(fstype, c->u.name, baselen)) + continue; + + /* if there is no subtype, this is the one! */ + if (!subtype) + break; + + /* skip past the base in this entry */ + sub = c->u.name + baselen; + + /* entry is only a base. save it. keep looking for subtype */ + if (sub[0] == '\0') { + base = c; + continue; + } + + /* entry is not followed by a subtype, so it is not a match */ + if (sub[0] != '.') + continue; + + /* whew, we found a subtype of this fstype */ + sub++; /* move past '.' */ + + /* exact match of fstype AND subtype */ + if (!strcmp(subtype, sub)) break; - c = c->next; } + /* in case we had found an fstype match but no subtype match */ + if (!c) + c = base; + if (c) { - *behavior = c->v.behavior; + sbsec->behavior = c->v.behavior; if (!c->sid[0]) { rc = sidtab_context_to_sid(&sidtab, &c->context[0], &c->sid[0]); if (rc) goto out; } - *sid = c->sid[0]; + sbsec->sid = c->sid[0]; } else { - rc = security_genfs_sid(fstype, "/", SECCLASS_DIR, sid); + rc = security_genfs_sid(fstype, "/", SECCLASS_DIR, &sbsec->sid); if (rc) { - *behavior = SECURITY_FS_USE_NONE; + sbsec->behavior = SECURITY_FS_USE_NONE; rc = 0; } else { - *behavior = SECURITY_FS_USE_GENFS; + sbsec->behavior = SECURITY_FS_USE_GENFS; } } diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c index d030818..a91d205 100644 --- a/security/selinux/xfrm.c +++ b/security/selinux/xfrm.c @@ -56,7 +56,7 @@ atomic_t selinux_xfrm_refcount = ATOMIC_INIT(0); /* - * Returns true if an LSM/SELinux context + * Returns true if the context is an LSM/SELinux context. */ static inline int selinux_authorizable_ctx(struct xfrm_sec_ctx *ctx) { @@ -66,7 +66,7 @@ static inline int selinux_authorizable_ctx(struct xfrm_sec_ctx *ctx) } /* - * Returns true if the xfrm contains a security blob for SELinux + * Returns true if the xfrm contains a security blob for SELinux. */ static inline int selinux_authorizable_xfrm(struct xfrm_state *x) { @@ -74,48 +74,111 @@ static inline int selinux_authorizable_xfrm(struct xfrm_state *x) } /* - * LSM hook implementation that authorizes that a flow can use - * a xfrm policy rule. + * Allocates a xfrm_sec_state and populates it using the supplied security + * xfrm_user_sec_ctx context. */ -int selinux_xfrm_policy_lookup(struct xfrm_sec_ctx *ctx, u32 fl_secid, u8 dir) +static int selinux_xfrm_alloc_user(struct xfrm_sec_ctx **ctxp, + struct xfrm_user_sec_ctx *uctx) { int rc; - u32 sel_sid; + const struct task_security_struct *tsec = current_security(); + struct xfrm_sec_ctx *ctx = NULL; + u32 str_len; - /* Context sid is either set to label or ANY_ASSOC */ - if (ctx) { - if (!selinux_authorizable_ctx(ctx)) - return -EINVAL; - - sel_sid = ctx->ctx_sid; - } else - /* - * All flows should be treated as polmatch'ing an - * otherwise applicable "non-labeled" policy. This - * would prevent inadvertent "leaks". - */ - return 0; + if (ctxp == NULL || uctx == NULL || + uctx->ctx_doi != XFRM_SC_DOI_LSM || + uctx->ctx_alg != XFRM_SC_ALG_SELINUX) + return -EINVAL; - rc = avc_has_perm(fl_secid, sel_sid, SECCLASS_ASSOCIATION, - ASSOCIATION__POLMATCH, - NULL); + str_len = uctx->ctx_len; + if (str_len >= PAGE_SIZE) + return -ENOMEM; - if (rc == -EACCES) - return -ESRCH; + ctx = kmalloc(sizeof(*ctx) + str_len + 1, GFP_KERNEL); + if (!ctx) + return -ENOMEM; + ctx->ctx_doi = XFRM_SC_DOI_LSM; + ctx->ctx_alg = XFRM_SC_ALG_SELINUX; + ctx->ctx_len = str_len; + memcpy(ctx->ctx_str, &uctx[1], str_len); + ctx->ctx_str[str_len] = '\0'; + rc = security_context_to_sid(ctx->ctx_str, str_len, &ctx->ctx_sid); + if (rc) + goto err; + + rc = avc_has_perm(tsec->sid, ctx->ctx_sid, + SECCLASS_ASSOCIATION, ASSOCIATION__SETCONTEXT, NULL); + if (rc) + goto err; + + *ctxp = ctx; + atomic_inc(&selinux_xfrm_refcount); + return 0; + +err: + kfree(ctx); return rc; } /* + * Free the xfrm_sec_ctx structure. + */ +static void selinux_xfrm_free(struct xfrm_sec_ctx *ctx) +{ + if (!ctx) + return; + + atomic_dec(&selinux_xfrm_refcount); + kfree(ctx); +} + +/* + * Authorize the deletion of a labeled SA or policy rule. + */ +static int selinux_xfrm_delete(struct xfrm_sec_ctx *ctx) +{ + const struct task_security_struct *tsec = current_security(); + + if (!ctx) + return 0; + + return avc_has_perm(tsec->sid, ctx->ctx_sid, + SECCLASS_ASSOCIATION, ASSOCIATION__SETCONTEXT, + NULL); +} + +/* + * LSM hook implementation that authorizes that a flow can use a xfrm policy + * rule. + */ +int selinux_xfrm_policy_lookup(struct xfrm_sec_ctx *ctx, u32 fl_secid, u8 dir) +{ + int rc; + + /* All flows should be treated as polmatch'ing an otherwise applicable + * "non-labeled" policy. This would prevent inadvertent "leaks". */ + if (!ctx) + return 0; + + /* Context sid is either set to label or ANY_ASSOC */ + if (!selinux_authorizable_ctx(ctx)) + return -EINVAL; + + rc = avc_has_perm(fl_secid, ctx->ctx_sid, + SECCLASS_ASSOCIATION, ASSOCIATION__POLMATCH, NULL); + return (rc == -EACCES ? -ESRCH : rc); +} + +/* * LSM hook implementation that authorizes that a state matches * the given policy, flow combo. */ - -int selinux_xfrm_state_pol_flow_match(struct xfrm_state *x, struct xfrm_policy *xp, - const struct flowi *fl) +int selinux_xfrm_state_pol_flow_match(struct xfrm_state *x, + struct xfrm_policy *xp, + const struct flowi *fl) { u32 state_sid; - int rc; if (!xp->security) if (x->security) @@ -138,187 +201,80 @@ int selinux_xfrm_state_pol_flow_match(struct xfrm_state *x, struct xfrm_policy * if (fl->flowi_secid != state_sid) return 0; - rc = avc_has_perm(fl->flowi_secid, state_sid, SECCLASS_ASSOCIATION, - ASSOCIATION__SENDTO, - NULL)? 0:1; - - /* - * We don't need a separate SA Vs. policy polmatch check - * since the SA is now of the same label as the flow and - * a flow Vs. policy polmatch check had already happened - * in selinux_xfrm_policy_lookup() above. - */ - - return rc; + /* We don't need a separate SA Vs. policy polmatch check since the SA + * is now of the same label as the flow and a flow Vs. policy polmatch + * check had already happened in selinux_xfrm_policy_lookup() above. */ + return (avc_has_perm(fl->flowi_secid, state_sid, + SECCLASS_ASSOCIATION, ASSOCIATION__SENDTO, + NULL) ? 0 : 1); } /* * LSM hook implementation that checks and/or returns the xfrm sid for the * incoming packet. */ - int selinux_xfrm_decode_session(struct sk_buff *skb, u32 *sid, int ckall) { + u32 sid_session = SECSID_NULL; struct sec_path *sp; - *sid = SECSID_NULL; - if (skb == NULL) - return 0; + goto out; sp = skb->sp; if (sp) { - int i, sid_set = 0; + int i; - for (i = sp->len-1; i >= 0; i--) { + for (i = sp->len - 1; i >= 0; i--) { struct xfrm_state *x = sp->xvec[i]; if (selinux_authorizable_xfrm(x)) { struct xfrm_sec_ctx *ctx = x->security; - if (!sid_set) { - *sid = ctx->ctx_sid; - sid_set = 1; - + if (sid_session == SECSID_NULL) { + sid_session = ctx->ctx_sid; if (!ckall) - break; - } else if (*sid != ctx->ctx_sid) + goto out; + } else if (sid_session != ctx->ctx_sid) { + *sid = SECSID_NULL; return -EINVAL; + } } } } - return 0; -} - -/* - * Security blob allocation for xfrm_policy and xfrm_state - * CTX does not have a meaningful value on input - */ -static int selinux_xfrm_sec_ctx_alloc(struct xfrm_sec_ctx **ctxp, - struct xfrm_user_sec_ctx *uctx, u32 sid) -{ - int rc = 0; - const struct task_security_struct *tsec = current_security(); - struct xfrm_sec_ctx *ctx = NULL; - char *ctx_str = NULL; - u32 str_len; - - BUG_ON(uctx && sid); - - if (!uctx) - goto not_from_user; - - if (uctx->ctx_alg != XFRM_SC_ALG_SELINUX) - return -EINVAL; - - str_len = uctx->ctx_len; - if (str_len >= PAGE_SIZE) - return -ENOMEM; - - *ctxp = ctx = kmalloc(sizeof(*ctx) + - str_len + 1, - GFP_KERNEL); - - if (!ctx) - return -ENOMEM; - - ctx->ctx_doi = uctx->ctx_doi; - ctx->ctx_len = str_len; - ctx->ctx_alg = uctx->ctx_alg; - - memcpy(ctx->ctx_str, - uctx+1, - str_len); - ctx->ctx_str[str_len] = 0; - rc = security_context_to_sid(ctx->ctx_str, - str_len, - &ctx->ctx_sid); - - if (rc) - goto out; - - /* - * Does the subject have permission to set security context? - */ - rc = avc_has_perm(tsec->sid, ctx->ctx_sid, - SECCLASS_ASSOCIATION, - ASSOCIATION__SETCONTEXT, NULL); - if (rc) - goto out; - - return rc; - -not_from_user: - rc = security_sid_to_context(sid, &ctx_str, &str_len); - if (rc) - goto out; - - *ctxp = ctx = kmalloc(sizeof(*ctx) + - str_len, - GFP_ATOMIC); - - if (!ctx) { - rc = -ENOMEM; - goto out; - } - - ctx->ctx_doi = XFRM_SC_DOI_LSM; - ctx->ctx_alg = XFRM_SC_ALG_SELINUX; - ctx->ctx_sid = sid; - ctx->ctx_len = str_len; - memcpy(ctx->ctx_str, - ctx_str, - str_len); - - goto out2; - out: - *ctxp = NULL; - kfree(ctx); -out2: - kfree(ctx_str); - return rc; + *sid = sid_session; + return 0; } /* - * LSM hook implementation that allocs and transfers uctx spec to - * xfrm_policy. + * LSM hook implementation that allocs and transfers uctx spec to xfrm_policy. */ int selinux_xfrm_policy_alloc(struct xfrm_sec_ctx **ctxp, struct xfrm_user_sec_ctx *uctx) { - int err; - - BUG_ON(!uctx); - - err = selinux_xfrm_sec_ctx_alloc(ctxp, uctx, 0); - if (err == 0) - atomic_inc(&selinux_xfrm_refcount); - - return err; + return selinux_xfrm_alloc_user(ctxp, uctx); } - /* - * LSM hook implementation that copies security data structure from old to - * new for policy cloning. + * LSM hook implementation that copies security data structure from old to new + * for policy cloning. */ int selinux_xfrm_policy_clone(struct xfrm_sec_ctx *old_ctx, struct xfrm_sec_ctx **new_ctxp) { struct xfrm_sec_ctx *new_ctx; - if (old_ctx) { - new_ctx = kmalloc(sizeof(*old_ctx) + old_ctx->ctx_len, - GFP_ATOMIC); - if (!new_ctx) - return -ENOMEM; + if (!old_ctx) + return 0; + + new_ctx = kmemdup(old_ctx, sizeof(*old_ctx) + old_ctx->ctx_len, + GFP_ATOMIC); + if (!new_ctx) + return -ENOMEM; + atomic_inc(&selinux_xfrm_refcount); + *new_ctxp = new_ctx; - memcpy(new_ctx, old_ctx, sizeof(*new_ctx)); - memcpy(new_ctx->ctx_str, old_ctx->ctx_str, new_ctx->ctx_len); - atomic_inc(&selinux_xfrm_refcount); - *new_ctxp = new_ctx; - } return 0; } @@ -327,8 +283,7 @@ int selinux_xfrm_policy_clone(struct xfrm_sec_ctx *old_ctx, */ void selinux_xfrm_policy_free(struct xfrm_sec_ctx *ctx) { - atomic_dec(&selinux_xfrm_refcount); - kfree(ctx); + selinux_xfrm_free(ctx); } /* @@ -336,31 +291,55 @@ void selinux_xfrm_policy_free(struct xfrm_sec_ctx *ctx) */ int selinux_xfrm_policy_delete(struct xfrm_sec_ctx *ctx) { - const struct task_security_struct *tsec = current_security(); - - if (!ctx) - return 0; + return selinux_xfrm_delete(ctx); +} - return avc_has_perm(tsec->sid, ctx->ctx_sid, - SECCLASS_ASSOCIATION, ASSOCIATION__SETCONTEXT, - NULL); +/* + * LSM hook implementation that allocates a xfrm_sec_state, populates it using + * the supplied security context, and assigns it to the xfrm_state. + */ +int selinux_xfrm_state_alloc(struct xfrm_state *x, + struct xfrm_user_sec_ctx *uctx) +{ + return selinux_xfrm_alloc_user(&x->security, uctx); } /* - * LSM hook implementation that allocs and transfers sec_ctx spec to - * xfrm_state. + * LSM hook implementation that allocates a xfrm_sec_state and populates based + * on a secid. */ -int selinux_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *uctx, - u32 secid) +int selinux_xfrm_state_alloc_acquire(struct xfrm_state *x, + struct xfrm_sec_ctx *polsec, u32 secid) { - int err; + int rc; + struct xfrm_sec_ctx *ctx; + char *ctx_str = NULL; + int str_len; + + if (!polsec) + return 0; - BUG_ON(!x); + if (secid == 0) + return -EINVAL; - err = selinux_xfrm_sec_ctx_alloc(&x->security, uctx, secid); - if (err == 0) - atomic_inc(&selinux_xfrm_refcount); - return err; + rc = security_sid_to_context(secid, &ctx_str, &str_len); + if (rc) + return rc; + + ctx = kmalloc(sizeof(*ctx) + str_len, GFP_ATOMIC); + if (!ctx) + return -ENOMEM; + + ctx->ctx_doi = XFRM_SC_DOI_LSM; + ctx->ctx_alg = XFRM_SC_ALG_SELINUX; + ctx->ctx_sid = secid; + ctx->ctx_len = str_len; + memcpy(ctx->ctx_str, ctx_str, str_len); + kfree(ctx_str); + + x->security = ctx; + atomic_inc(&selinux_xfrm_refcount); + return 0; } /* @@ -368,24 +347,15 @@ int selinux_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *uct */ void selinux_xfrm_state_free(struct xfrm_state *x) { - atomic_dec(&selinux_xfrm_refcount); - kfree(x->security); + selinux_xfrm_free(x->security); } - /* - * LSM hook implementation that authorizes deletion of labeled SAs. - */ +/* + * LSM hook implementation that authorizes deletion of labeled SAs. + */ int selinux_xfrm_state_delete(struct xfrm_state *x) { - const struct task_security_struct *tsec = current_security(); - struct xfrm_sec_ctx *ctx = x->security; - - if (!ctx) - return 0; - - return avc_has_perm(tsec->sid, ctx->ctx_sid, - SECCLASS_ASSOCIATION, ASSOCIATION__SETCONTEXT, - NULL); + return selinux_xfrm_delete(x->security); } /* @@ -395,14 +365,12 @@ int selinux_xfrm_state_delete(struct xfrm_state *x) * we need to check for unlabelled access since this may not have * gone thru the IPSec process. */ -int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb, - struct common_audit_data *ad) +int selinux_xfrm_sock_rcv_skb(u32 sk_sid, struct sk_buff *skb, + struct common_audit_data *ad) { - int i, rc = 0; - struct sec_path *sp; - u32 sel_sid = SECINITSID_UNLABELED; - - sp = skb->sp; + int i; + struct sec_path *sp = skb->sp; + u32 peer_sid = SECINITSID_UNLABELED; if (sp) { for (i = 0; i < sp->len; i++) { @@ -410,23 +378,17 @@ int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb, if (x && selinux_authorizable_xfrm(x)) { struct xfrm_sec_ctx *ctx = x->security; - sel_sid = ctx->ctx_sid; + peer_sid = ctx->ctx_sid; break; } } } - /* - * This check even when there's no association involved is - * intended, according to Trent Jaeger, to make sure a - * process can't engage in non-ipsec communication unless - * explicitly allowed by policy. - */ - - rc = avc_has_perm(isec_sid, sel_sid, SECCLASS_ASSOCIATION, - ASSOCIATION__RECVFROM, ad); - - return rc; + /* This check even when there's no association involved is intended, + * according to Trent Jaeger, to make sure a process can't engage in + * non-IPsec communication unless explicitly allowed by policy. */ + return avc_has_perm(sk_sid, peer_sid, + SECCLASS_ASSOCIATION, ASSOCIATION__RECVFROM, ad); } /* @@ -436,49 +398,38 @@ int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb, * If we do have a authorizable security association, then it has already been * checked in the selinux_xfrm_state_pol_flow_match hook above. */ -int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb, - struct common_audit_data *ad, u8 proto) +int selinux_xfrm_postroute_last(u32 sk_sid, struct sk_buff *skb, + struct common_audit_data *ad, u8 proto) { struct dst_entry *dst; - int rc = 0; - - dst = skb_dst(skb); - - if (dst) { - struct dst_entry *dst_test; - - for (dst_test = dst; dst_test != NULL; - dst_test = dst_test->child) { - struct xfrm_state *x = dst_test->xfrm; - - if (x && selinux_authorizable_xfrm(x)) - goto out; - } - } switch (proto) { case IPPROTO_AH: case IPPROTO_ESP: case IPPROTO_COMP: - /* - * We should have already seen this packet once before - * it underwent xfrm(s). No need to subject it to the - * unlabeled check. - */ - goto out; + /* We should have already seen this packet once before it + * underwent xfrm(s). No need to subject it to the unlabeled + * check. */ + return 0; default: break; } - /* - * This check even when there's no association involved is - * intended, according to Trent Jaeger, to make sure a - * process can't engage in non-ipsec communication unless - * explicitly allowed by policy. - */ + dst = skb_dst(skb); + if (dst) { + struct dst_entry *iter; - rc = avc_has_perm(isec_sid, SECINITSID_UNLABELED, SECCLASS_ASSOCIATION, - ASSOCIATION__SENDTO, ad); -out: - return rc; + for (iter = dst; iter != NULL; iter = iter->child) { + struct xfrm_state *x = iter->xfrm; + + if (x && selinux_authorizable_xfrm(x)) + return 0; + } + } + + /* This check even when there's no association involved is intended, + * according to Trent Jaeger, to make sure a process can't engage in + * non-IPsec communication unless explicitly allowed by policy. */ + return avc_has_perm(sk_sid, SECINITSID_UNLABELED, + SECCLASS_ASSOCIATION, ASSOCIATION__SENDTO, ad); } diff --git a/security/smack/smack.h b/security/smack/smack.h index 076b8e8..364cc64 100644 --- a/security/smack/smack.h +++ b/security/smack/smack.h @@ -177,9 +177,13 @@ struct smk_port_label { #define SMACK_CIPSO_MAXCATNUM 184 /* 23 * 8 */ /* - * Flag for transmute access + * Flags for untraditional access modes. + * It shouldn't be necessary to avoid conflicts with definitions + * in fs.h, but do so anyway. */ -#define MAY_TRANSMUTE 64 +#define MAY_TRANSMUTE 0x00001000 /* Controls directory labeling */ +#define MAY_LOCK 0x00002000 /* Locks should be writes, but ... */ + /* * Just to make the common cases easier to deal with */ @@ -188,9 +192,9 @@ struct smk_port_label { #define MAY_NOT 0 /* - * Number of access types used by Smack (rwxat) + * Number of access types used by Smack (rwxatl) */ -#define SMK_NUM_ACCESS_TYPE 5 +#define SMK_NUM_ACCESS_TYPE 6 /* SMACK data */ struct smack_audit_data { diff --git a/security/smack/smack_access.c b/security/smack/smack_access.c index b3b59b1..14293cd 100644 --- a/security/smack/smack_access.c +++ b/security/smack/smack_access.c @@ -84,6 +84,8 @@ int log_policy = SMACK_AUDIT_DENIED; * * Do the object check first because that is more * likely to differ. + * + * Allowing write access implies allowing locking. */ int smk_access_entry(char *subject_label, char *object_label, struct list_head *rule_list) @@ -99,6 +101,11 @@ int smk_access_entry(char *subject_label, char *object_label, } } + /* + * MAY_WRITE implies MAY_LOCK. + */ + if ((may & MAY_WRITE) == MAY_WRITE) + may |= MAY_LOCK; return may; } @@ -245,6 +252,7 @@ out_audit: static inline void smack_str_from_perm(char *string, int access) { int i = 0; + if (access & MAY_READ) string[i++] = 'r'; if (access & MAY_WRITE) @@ -255,6 +263,8 @@ static inline void smack_str_from_perm(char *string, int access) string[i++] = 'a'; if (access & MAY_TRANSMUTE) string[i++] = 't'; + if (access & MAY_LOCK) + string[i++] = 'l'; string[i] = '\0'; } /** diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 8825375..b0be893 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -185,7 +185,7 @@ static int smack_ptrace_access_check(struct task_struct *ctp, unsigned int mode) smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_TASK); smk_ad_setfield_u_tsk(&ad, ctp); - rc = smk_curacc(skp->smk_known, MAY_READWRITE, &ad); + rc = smk_curacc(skp->smk_known, mode, &ad); return rc; } @@ -1146,7 +1146,7 @@ static int smack_file_ioctl(struct file *file, unsigned int cmd, * @file: the object * @cmd: unused * - * Returns 0 if current has write access, error code otherwise + * Returns 0 if current has lock access, error code otherwise */ static int smack_file_lock(struct file *file, unsigned int cmd) { @@ -1154,7 +1154,7 @@ static int smack_file_lock(struct file *file, unsigned int cmd) smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_PATH); smk_ad_setfield_u_fs_path(&ad, file->f_path); - return smk_curacc(file->f_security, MAY_WRITE, &ad); + return smk_curacc(file->f_security, MAY_LOCK, &ad); } /** @@ -1178,8 +1178,13 @@ static int smack_file_fcntl(struct file *file, unsigned int cmd, switch (cmd) { case F_GETLK: + break; case F_SETLK: case F_SETLKW: + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_PATH); + smk_ad_setfield_u_fs_path(&ad, file->f_path); + rc = smk_curacc(file->f_security, MAY_LOCK, &ad); + break; case F_SETOWN: case F_SETSIG: smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_PATH); diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c index 80f4b4a..160aa08e 100644 --- a/security/smack/smackfs.c +++ b/security/smack/smackfs.c @@ -139,7 +139,7 @@ const char *smack_cipso_option = SMACK_CIPSO_OPTION; * SMK_LOADLEN: Smack rule length */ #define SMK_OACCESS "rwxa" -#define SMK_ACCESS "rwxat" +#define SMK_ACCESS "rwxatl" #define SMK_OACCESSLEN (sizeof(SMK_OACCESS) - 1) #define SMK_ACCESSLEN (sizeof(SMK_ACCESS) - 1) #define SMK_OLOADLEN (SMK_LABELLEN + SMK_LABELLEN + SMK_OACCESSLEN) @@ -282,6 +282,10 @@ static int smk_perm_from_str(const char *string) case 'T': perm |= MAY_TRANSMUTE; break; + case 'l': + case 'L': + perm |= MAY_LOCK; + break; default: return perm; } @@ -452,7 +456,7 @@ static ssize_t smk_write_rules_list(struct file *file, const char __user *buf, /* * Minor hack for backward compatibility */ - if (count != SMK_OLOADLEN && count != SMK_LOADLEN) + if (count < SMK_OLOADLEN || count > SMK_LOADLEN) return -EINVAL; } else { if (count >= PAGE_SIZE) { @@ -592,6 +596,8 @@ static void smk_rule_show(struct seq_file *s, struct smack_rule *srp, int max) seq_putc(s, 'a'); if (srp->smk_access & MAY_TRANSMUTE) seq_putc(s, 't'); + if (srp->smk_access & MAY_LOCK) + seq_putc(s, 'l'); seq_putc(s, '\n'); } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 662f34c..a0aa84b 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1615,8 +1615,9 @@ EXPORT_SYMBOL_GPL(kvm_read_guest_cached); int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len) { - return kvm_write_guest_page(kvm, gfn, (const void *) empty_zero_page, - offset, len); + const void *zero_page = (const void *) __va(page_to_phys(ZERO_PAGE(0))); + + return kvm_write_guest_page(kvm, gfn, zero_page, offset, len); } EXPORT_SYMBOL_GPL(kvm_clear_guest_page); |