diff options
589 files changed, 14967 insertions, 6084 deletions
diff --git a/Documentation/assoc_array.txt b/Documentation/assoc_array.txt new file mode 100644 index 0000000..f4faec0 --- /dev/null +++ b/Documentation/assoc_array.txt @@ -0,0 +1,574 @@ + ======================================== + GENERIC ASSOCIATIVE ARRAY IMPLEMENTATION + ======================================== + +Contents: + + - Overview. + + - The public API. + - Edit script. + - Operations table. + - Manipulation functions. + - Access functions. + - Index key form. + + - Internal workings. + - Basic internal tree layout. + - Shortcuts. + - Splitting and collapsing nodes. + - Non-recursive iteration. + - Simultaneous alteration and iteration. + + +======== +OVERVIEW +======== + +This associative array implementation is an object container with the following +properties: + + (1) Objects are opaque pointers. The implementation does not care where they + point (if anywhere) or what they point to (if anything). + + [!] NOTE: Pointers to objects _must_ be zero in the least significant bit. + + (2) Objects do not need to contain linkage blocks for use by the array. This + permits an object to be located in multiple arrays simultaneously. + Rather, the array is made up of metadata blocks that point to objects. + + (3) Objects require index keys to locate them within the array. + + (4) Index keys must be unique. Inserting an object with the same key as one + already in the array will replace the old object. + + (5) Index keys can be of any length and can be of different lengths. + + (6) Index keys should encode the length early on, before any variation due to + length is seen. + + (7) Index keys can include a hash to scatter objects throughout the array. + + (8) The array can iterated over. The objects will not necessarily come out in + key order. + + (9) The array can be iterated over whilst it is being modified, provided the + RCU readlock is being held by the iterator. Note, however, under these + circumstances, some objects may be seen more than once. If this is a + problem, the iterator should lock against modification. Objects will not + be missed, however, unless deleted. + +(10) Objects in the array can be looked up by means of their index key. + +(11) Objects can be looked up whilst the array is being modified, provided the + RCU readlock is being held by the thread doing the look up. + +The implementation uses a tree of 16-pointer nodes internally that are indexed +on each level by nibbles from the index key in the same manner as in a radix +tree. To improve memory efficiency, shortcuts can be emplaced to skip over +what would otherwise be a series of single-occupancy nodes. Further, nodes +pack leaf object pointers into spare space in the node rather than making an +extra branch until as such time an object needs to be added to a full node. + + +============== +THE PUBLIC API +============== + +The public API can be found in <linux/assoc_array.h>. The associative array is +rooted on the following structure: + + struct assoc_array { + ... + }; + +The code is selected by enabling CONFIG_ASSOCIATIVE_ARRAY. + + +EDIT SCRIPT +----------- + +The insertion and deletion functions produce an 'edit script' that can later be +applied to effect the changes without risking ENOMEM. This retains the +preallocated metadata blocks that will be installed in the internal tree and +keeps track of the metadata blocks that will be removed from the tree when the +script is applied. + +This is also used to keep track of dead blocks and dead objects after the +script has been applied so that they can be freed later. The freeing is done +after an RCU grace period has passed - thus allowing access functions to +proceed under the RCU read lock. + +The script appears as outside of the API as a pointer of the type: + + struct assoc_array_edit; + +There are two functions for dealing with the script: + + (1) Apply an edit script. + + void assoc_array_apply_edit(struct assoc_array_edit *edit); + + This will perform the edit functions, interpolating various write barriers + to permit accesses under the RCU read lock to continue. The edit script + will then be passed to call_rcu() to free it and any dead stuff it points + to. + + (2) Cancel an edit script. + + void assoc_array_cancel_edit(struct assoc_array_edit *edit); + + This frees the edit script and all preallocated memory immediately. If + this was for insertion, the new object is _not_ released by this function, + but must rather be released by the caller. + +These functions are guaranteed not to fail. + + +OPERATIONS TABLE +---------------- + +Various functions take a table of operations: + + struct assoc_array_ops { + ... + }; + +This points to a number of methods, all of which need to be provided: + + (1) Get a chunk of index key from caller data: + + unsigned long (*get_key_chunk)(const void *index_key, int level); + + This should return a chunk of caller-supplied index key starting at the + *bit* position given by the level argument. The level argument will be a + multiple of ASSOC_ARRAY_KEY_CHUNK_SIZE and the function should return + ASSOC_ARRAY_KEY_CHUNK_SIZE bits. No error is possible. + + + (2) Get a chunk of an object's index key. + + unsigned long (*get_object_key_chunk)(const void *object, int level); + + As the previous function, but gets its data from an object in the array + rather than from a caller-supplied index key. + + + (3) See if this is the object we're looking for. + + bool (*compare_object)(const void *object, const void *index_key); + + Compare the object against an index key and return true if it matches and + false if it doesn't. + + + (4) Diff the index keys of two objects. + + int (*diff_objects)(const void *a, const void *b); + + Return the bit position at which the index keys of two objects differ or + -1 if they are the same. + + + (5) Free an object. + + void (*free_object)(void *object); + + Free the specified object. Note that this may be called an RCU grace + period after assoc_array_apply_edit() was called, so synchronize_rcu() may + be necessary on module unloading. + + +MANIPULATION FUNCTIONS +---------------------- + +There are a number of functions for manipulating an associative array: + + (1) Initialise an associative array. + + void assoc_array_init(struct assoc_array *array); + + This initialises the base structure for an associative array. It can't + fail. + + + (2) Insert/replace an object in an associative array. + + struct assoc_array_edit * + assoc_array_insert(struct assoc_array *array, + const struct assoc_array_ops *ops, + const void *index_key, + void *object); + + This inserts the given object into the array. Note that the least + significant bit of the pointer must be zero as it's used to type-mark + pointers internally. + + If an object already exists for that key then it will be replaced with the + new object and the old one will be freed automatically. + + The index_key argument should hold index key information and is + passed to the methods in the ops table when they are called. + + This function makes no alteration to the array itself, but rather returns + an edit script that must be applied. -ENOMEM is returned in the case of + an out-of-memory error. + + The caller should lock exclusively against other modifiers of the array. + + + (3) Delete an object from an associative array. + + struct assoc_array_edit * + assoc_array_delete(struct assoc_array *array, + const struct assoc_array_ops *ops, + const void *index_key); + + This deletes an object that matches the specified data from the array. + + The index_key argument should hold index key information and is + passed to the methods in the ops table when they are called. + + This function makes no alteration to the array itself, but rather returns + an edit script that must be applied. -ENOMEM is returned in the case of + an out-of-memory error. NULL will be returned if the specified object is + not found within the array. + + The caller should lock exclusively against other modifiers of the array. + + + (4) Delete all objects from an associative array. + + struct assoc_array_edit * + assoc_array_clear(struct assoc_array *array, + const struct assoc_array_ops *ops); + + This deletes all the objects from an associative array and leaves it + completely empty. + + This function makes no alteration to the array itself, but rather returns + an edit script that must be applied. -ENOMEM is returned in the case of + an out-of-memory error. + + The caller should lock exclusively against other modifiers of the array. + + + (5) Destroy an associative array, deleting all objects. + + void assoc_array_destroy(struct assoc_array *array, + const struct assoc_array_ops *ops); + + This destroys the contents of the associative array and leaves it + completely empty. It is not permitted for another thread to be traversing + the array under the RCU read lock at the same time as this function is + destroying it as no RCU deferral is performed on memory release - + something that would require memory to be allocated. + + The caller should lock exclusively against other modifiers and accessors + of the array. + + + (6) Garbage collect an associative array. + + int assoc_array_gc(struct assoc_array *array, + const struct assoc_array_ops *ops, + bool (*iterator)(void *object, void *iterator_data), + void *iterator_data); + + This iterates over the objects in an associative array and passes each one + to iterator(). If iterator() returns true, the object is kept. If it + returns false, the object will be freed. If the iterator() function + returns true, it must perform any appropriate refcount incrementing on the + object before returning. + + The internal tree will be packed down if possible as part of the iteration + to reduce the number of nodes in it. + + The iterator_data is passed directly to iterator() and is otherwise + ignored by the function. + + The function will return 0 if successful and -ENOMEM if there wasn't + enough memory. + + It is possible for other threads to iterate over or search the array under + the RCU read lock whilst this function is in progress. The caller should + lock exclusively against other modifiers of the array. + + +ACCESS FUNCTIONS +---------------- + +There are two functions for accessing an associative array: + + (1) Iterate over all the objects in an associative array. + + int assoc_array_iterate(const struct assoc_array *array, + int (*iterator)(const void *object, + void *iterator_data), + void *iterator_data); + + This passes each object in the array to the iterator callback function. + iterator_data is private data for that function. + + This may be used on an array at the same time as the array is being + modified, provided the RCU read lock is held. Under such circumstances, + it is possible for the iteration function to see some objects twice. If + this is a problem, then modification should be locked against. The + iteration algorithm should not, however, miss any objects. + + The function will return 0 if no objects were in the array or else it will + return the result of the last iterator function called. Iteration stops + immediately if any call to the iteration function results in a non-zero + return. + + + (2) Find an object in an associative array. + + void *assoc_array_find(const struct assoc_array *array, + const struct assoc_array_ops *ops, + const void *index_key); + + This walks through the array's internal tree directly to the object + specified by the index key.. + + This may be used on an array at the same time as the array is being + modified, provided the RCU read lock is held. + + The function will return the object if found (and set *_type to the object + type) or will return NULL if the object was not found. + + +INDEX KEY FORM +-------------- + +The index key can be of any form, but since the algorithms aren't told how long +the key is, it is strongly recommended that the index key includes its length +very early on before any variation due to the length would have an effect on +comparisons. + +This will cause leaves with different length keys to scatter away from each +other - and those with the same length keys to cluster together. + +It is also recommended that the index key begin with a hash of the rest of the +key to maximise scattering throughout keyspace. + +The better the scattering, the wider and lower the internal tree will be. + +Poor scattering isn't too much of a problem as there are shortcuts and nodes +can contain mixtures of leaves and metadata pointers. + +The index key is read in chunks of machine word. Each chunk is subdivided into +one nibble (4 bits) per level, so on a 32-bit CPU this is good for 8 levels and +on a 64-bit CPU, 16 levels. Unless the scattering is really poor, it is +unlikely that more than one word of any particular index key will have to be +used. + + +================= +INTERNAL WORKINGS +================= + +The associative array data structure has an internal tree. This tree is +constructed of two types of metadata blocks: nodes and shortcuts. + +A node is an array of slots. Each slot can contain one of four things: + + (*) A NULL pointer, indicating that the slot is empty. + + (*) A pointer to an object (a leaf). + + (*) A pointer to a node at the next level. + + (*) A pointer to a shortcut. + + +BASIC INTERNAL TREE LAYOUT +-------------------------- + +Ignoring shortcuts for the moment, the nodes form a multilevel tree. The index +key space is strictly subdivided by the nodes in the tree and nodes occur on +fixed levels. For example: + + Level: 0 1 2 3 + =============== =============== =============== =============== + NODE D + NODE B NODE C +------>+---+ + +------>+---+ +------>+---+ | | 0 | + NODE A | | 0 | | | 0 | | +---+ + +---+ | +---+ | +---+ | : : + | 0 | | : : | : : | +---+ + +---+ | +---+ | +---+ | | f | + | 1 |---+ | 3 |---+ | 7 |---+ +---+ + +---+ +---+ +---+ + : : : : | 8 |---+ + +---+ +---+ +---+ | NODE E + | e |---+ | f | : : +------>+---+ + +---+ | +---+ +---+ | 0 | + | f | | | f | +---+ + +---+ | +---+ : : + | NODE F +---+ + +------>+---+ | f | + | 0 | NODE G +---+ + +---+ +------>+---+ + : : | | 0 | + +---+ | +---+ + | 6 |---+ : : + +---+ +---+ + : : | f | + +---+ +---+ + | f | + +---+ + +In the above example, there are 7 nodes (A-G), each with 16 slots (0-f). +Assuming no other meta data nodes in the tree, the key space is divided thusly: + + KEY PREFIX NODE + ========== ==== + 137* D + 138* E + 13[0-69-f]* C + 1[0-24-f]* B + e6* G + e[0-57-f]* F + [02-df]* A + +So, for instance, keys with the following example index keys will be found in +the appropriate nodes: + + INDEX KEY PREFIX NODE + =============== ======= ==== + 13694892892489 13 C + 13795289025897 137 D + 13889dde88793 138 E + 138bbb89003093 138 E + 1394879524789 12 C + 1458952489 1 B + 9431809de993ba - A + b4542910809cd - A + e5284310def98 e F + e68428974237 e6 G + e7fffcbd443 e F + f3842239082 - A + +To save memory, if a node can hold all the leaves in its portion of keyspace, +then the node will have all those leaves in it and will not have any metadata +pointers - even if some of those leaves would like to be in the same slot. + +A node can contain a heterogeneous mix of leaves and metadata pointers. +Metadata pointers must be in the slots that match their subdivisions of key +space. The leaves can be in any slot not occupied by a metadata pointer. It +is guaranteed that none of the leaves in a node will match a slot occupied by a +metadata pointer. If the metadata pointer is there, any leaf whose key matches +the metadata key prefix must be in the subtree that the metadata pointer points +to. + +In the above example list of index keys, node A will contain: + + SLOT CONTENT INDEX KEY (PREFIX) + ==== =============== ================== + 1 PTR TO NODE B 1* + any LEAF 9431809de993ba + any LEAF b4542910809cd + e PTR TO NODE F e* + any LEAF f3842239082 + +and node B: + + 3 PTR TO NODE C 13* + any LEAF 1458952489 + + +SHORTCUTS +--------- + +Shortcuts are metadata records that jump over a piece of keyspace. A shortcut +is a replacement for a series of single-occupancy nodes ascending through the +levels. Shortcuts exist to save memory and to speed up traversal. + +It is possible for the root of the tree to be a shortcut - say, for example, +the tree contains at least 17 nodes all with key prefix '1111'. The insertion +algorithm will insert a shortcut to skip over the '1111' keyspace in a single +bound and get to the fourth level where these actually become different. + + +SPLITTING AND COLLAPSING NODES +------------------------------ + +Each node has a maximum capacity of 16 leaves and metadata pointers. If the +insertion algorithm finds that it is trying to insert a 17th object into a +node, that node will be split such that at least two leaves that have a common +key segment at that level end up in a separate node rooted on that slot for +that common key segment. + +If the leaves in a full node and the leaf that is being inserted are +sufficiently similar, then a shortcut will be inserted into the tree. + +When the number of objects in the subtree rooted at a node falls to 16 or +fewer, then the subtree will be collapsed down to a single node - and this will +ripple towards the root if possible. + + +NON-RECURSIVE ITERATION +----------------------- + +Each node and shortcut contains a back pointer to its parent and the number of +slot in that parent that points to it. None-recursive iteration uses these to +proceed rootwards through the tree, going to the parent node, slot N + 1 to +make sure progress is made without the need for a stack. + +The backpointers, however, make simultaneous alteration and iteration tricky. + + +SIMULTANEOUS ALTERATION AND ITERATION +------------------------------------- + +There are a number of cases to consider: + + (1) Simple insert/replace. This involves simply replacing a NULL or old + matching leaf pointer with the pointer to the new leaf after a barrier. + The metadata blocks don't change otherwise. An old leaf won't be freed + until after the RCU grace period. + + (2) Simple delete. This involves just clearing an old matching leaf. The + metadata blocks don't change otherwise. The old leaf won't be freed until + after the RCU grace period. + + (3) Insertion replacing part of a subtree that we haven't yet entered. This + may involve replacement of part of that subtree - but that won't affect + the iteration as we won't have reached the pointer to it yet and the + ancestry blocks are not replaced (the layout of those does not change). + + (4) Insertion replacing nodes that we're actively processing. This isn't a + problem as we've passed the anchoring pointer and won't switch onto the + new layout until we follow the back pointers - at which point we've + already examined the leaves in the replaced node (we iterate over all the + leaves in a node before following any of its metadata pointers). + + We might, however, re-see some leaves that have been split out into a new + branch that's in a slot further along than we were at. + + (5) Insertion replacing nodes that we're processing a dependent branch of. + This won't affect us until we follow the back pointers. Similar to (4). + + (6) Deletion collapsing a branch under us. This doesn't affect us because the + back pointers will get us back to the parent of the new node before we + could see the new node. The entire collapsed subtree is thrown away + unchanged - and will still be rooted on the same slot, so we shouldn't + process it a second time as we'll go back to slot + 1. + +Note: + + (*) Under some circumstances, we need to simultaneously change the parent + pointer and the parent slot pointer on a node (say, for example, we + inserted another node before it and moved it up a level). We cannot do + this without locking against a read - so we have to replace that node too. + + However, when we're changing a shortcut into a node this isn't a problem + as shortcuts only have one slot and so the parent slot number isn't used + when traversing backwards over one. This means that it's okay to change + the slot number first - provided suitable barriers are used to make sure + the parent slot number is read after the back pointer. + +Obsolete blocks and leaves are freed up after an RCU grace period has passed, +so as long as anyone doing walking or iteration holds the RCU read lock, the +old superstructure should not go away on them. diff --git a/Documentation/devicetree/bindings/i2c/trivial-devices.txt b/Documentation/devicetree/bindings/i2c/trivial-devices.txt index ad6a738..b1cb341 100644 --- a/Documentation/devicetree/bindings/i2c/trivial-devices.txt +++ b/Documentation/devicetree/bindings/i2c/trivial-devices.txt @@ -15,6 +15,7 @@ adi,adt7461 +/-1C TDM Extended Temp Range I.C adt7461 +/-1C TDM Extended Temp Range I.C at,24c08 i2c serial eeprom (24cxx) atmel,24c02 i2c serial eeprom (24cxx) +atmel,at97sc3204t i2c trusted platform module (TPM) catalyst,24c32 i2c serial eeprom dallas,ds1307 64 x 8, Serial, I2C Real-Time Clock dallas,ds1338 I2C RTC with 56-Byte NV RAM @@ -35,6 +36,7 @@ fsl,mc13892 MC13892: Power Management Integrated Circuit (PMIC) for i.MX35/51 fsl,mma8450 MMA8450Q: Xtrinsic Low-power, 3-axis Xtrinsic Accelerometer fsl,mpr121 MPR121: Proximity Capacitive Touch Sensor Controller fsl,sgtl5000 SGTL5000: Ultra Low-Power Audio Codec +gmt,g751 G751: Digital Temperature Sensor and Thermal Watchdog with Two-Wire Interface infineon,slb9635tt Infineon SLB9635 (Soft-) I2C TPM (old protocol, max 100khz) infineon,slb9645tt Infineon SLB9645 I2C TPM (new protocol, max 400khz) maxim,ds1050 5 Bit Programmable, Pulse-Width Modulator @@ -44,6 +46,7 @@ mc,rv3029c2 Real Time Clock Module with I2C-Bus national,lm75 I2C TEMP SENSOR national,lm80 Serial Interface ACPI-Compatible Microprocessor System Hardware Monitor national,lm92 ±0.33°C Accurate, 12-Bit + Sign Temperature Sensor and Thermal Window Comparator with Two-Wire Interface +nuvoton,npct501 i2c trusted platform module (TPM) nxp,pca9556 Octal SMBus and I2C registered interface nxp,pca9557 8-bit I2C-bus and SMBus I/O port with reset nxp,pcf8563 Real-time clock/calendar @@ -61,3 +64,4 @@ taos,tsl2550 Ambient Light Sensor with SMBUS/Two Wire Serial Interface ti,tsc2003 I2C Touch-Screen Controller ti,tmp102 Low Power Digital Temperature Sensor with SMBUS/Two Wire Serial Interface ti,tmp275 Digital Temperature Sensor +winbond,wpct301 i2c trusted platform module (TPM) diff --git a/Documentation/filesystems/btrfs.txt b/Documentation/filesystems/btrfs.txt index 9dae594..5dd282d 100644 --- a/Documentation/filesystems/btrfs.txt +++ b/Documentation/filesystems/btrfs.txt @@ -70,6 +70,12 @@ Unless otherwise specified, all options default to off. See comments at the top of fs/btrfs/check-integrity.c for more info. + commit=<seconds> + Set the interval of periodic commit, 30 seconds by default. Higher + values defer data being synced to permanent storage with obvious + consequences when the system crashes. The upper bound is not forced, + but a warning is printed if it's more than 300 seconds (5 minutes). + compress compress=<type> compress-force @@ -154,7 +160,11 @@ Unless otherwise specified, all options default to off. Currently this scans a list of several previous tree roots and tries to use the first readable. - skip_balance + rescan_uuid_tree + Force check and rebuild procedure of the UUID tree. This should not + normally be needed. + + skip_balance Skip automatic resume of interrupted balance operation after mount. May be resumed with "btrfs balance resume." @@ -234,24 +244,14 @@ available from the git repository at the following location: These include the following tools: -mkfs.btrfs: create a filesystem - -btrfsctl: control program to create snapshots and subvolumes: +* mkfs.btrfs: create a filesystem - mount /dev/sda2 /mnt - btrfsctl -s new_subvol_name /mnt - btrfsctl -s snapshot_of_default /mnt/default - btrfsctl -s snapshot_of_new_subvol /mnt/new_subvol_name - btrfsctl -s snapshot_of_a_snapshot /mnt/snapshot_of_new_subvol - ls /mnt - default snapshot_of_a_snapshot snapshot_of_new_subvol - new_subvol_name snapshot_of_default +* btrfs: a single tool to manage the filesystems, refer to the manpage for more details - Snapshots and subvolumes cannot be deleted right now, but you can - rm -rf all the files and directories inside them. +* 'btrfsck' or 'btrfs check': do a consistency check of the filesystem -btrfsck: do a limited check of the FS extent trees. +Other tools for specific tasks: -btrfs-debug-tree: print all of the FS metadata in text form. Example: +* btrfs-convert: in-place conversion from ext2/3/4 filesystems - btrfs-debug-tree /dev/sda2 >& big_output_file +* btrfs-image: dump filesystem metadata for debugging diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 9ca3e74..50680a5 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1190,15 +1190,24 @@ bytes respectively. Such letter suffixes can also be entirely omitted. owned by uid=0. ima_hash= [IMA] - Format: { "sha1" | "md5" } + Format: { md5 | sha1 | rmd160 | sha256 | sha384 + | sha512 | ... } default: "sha1" + The list of supported hash algorithms is defined + in crypto/hash_info.h. + ima_tcb [IMA] Load a policy which meets the needs of the Trusted Computing Base. This means IMA will measure all programs exec'd, files mmap'd for exec, and all files opened for read by uid=0. + ima_template= [IMA] + Select one of defined IMA measurements template formats. + Formats: { "ima" | "ima-ng" } + Default: "ima-ng" + init= [KNL] Format: <full_path> Run specified binary instead of /sbin/init as init diff --git a/Documentation/security/00-INDEX b/Documentation/security/00-INDEX index 414235c..45c82fd 100644 --- a/Documentation/security/00-INDEX +++ b/Documentation/security/00-INDEX @@ -22,3 +22,5 @@ keys.txt - description of the kernel key retention service. tomoyo.txt - documentation on the TOMOYO Linux Security Module. +IMA-templates.txt + - documentation on the template management mechanism for IMA. diff --git a/Documentation/security/IMA-templates.txt b/Documentation/security/IMA-templates.txt new file mode 100644 index 0000000..a777e5f --- /dev/null +++ b/Documentation/security/IMA-templates.txt @@ -0,0 +1,87 @@ + IMA Template Management Mechanism + + +==== INTRODUCTION ==== + +The original 'ima' template is fixed length, containing the filedata hash +and pathname. The filedata hash is limited to 20 bytes (md5/sha1). +The pathname is a null terminated string, limited to 255 characters. +To overcome these limitations and to add additional file metadata, it is +necessary to extend the current version of IMA by defining additional +templates. For example, information that could be possibly reported are +the inode UID/GID or the LSM labels either of the inode and of the process +that is accessing it. + +However, the main problem to introduce this feature is that, each time +a new template is defined, the functions that generate and display +the measurements list would include the code for handling a new format +and, thus, would significantly grow over the time. + +The proposed solution solves this problem by separating the template +management from the remaining IMA code. The core of this solution is the +definition of two new data structures: a template descriptor, to determine +which information should be included in the measurement list; a template +field, to generate and display data of a given type. + +Managing templates with these structures is very simple. To support +a new data type, developers define the field identifier and implement +two functions, init() and show(), respectively to generate and display +measurement entries. Defining a new template descriptor requires +specifying the template format, a string of field identifiers separated +by the '|' character. While in the current implementation it is possible +to define new template descriptors only by adding their definition in the +template specific code (ima_template.c), in a future version it will be +possible to register a new template on a running kernel by supplying to IMA +the desired format string. In this version, IMA initializes at boot time +all defined template descriptors by translating the format into an array +of template fields structures taken from the set of the supported ones. + +After the initialization step, IMA will call ima_alloc_init_template() +(new function defined within the patches for the new template management +mechanism) to generate a new measurement entry by using the template +descriptor chosen through the kernel configuration or through the newly +introduced 'ima_template=' kernel command line parameter. It is during this +phase that the advantages of the new architecture are clearly shown: +the latter function will not contain specific code to handle a given template +but, instead, it simply calls the init() method of the template fields +associated to the chosen template descriptor and store the result (pointer +to allocated data and data length) in the measurement entry structure. + +The same mechanism is employed to display measurements entries. +The functions ima[_ascii]_measurements_show() retrieve, for each entry, +the template descriptor used to produce that entry and call the show() +method for each item of the array of template fields structures. + + + +==== SUPPORTED TEMPLATE FIELDS AND DESCRIPTORS ==== + +In the following, there is the list of supported template fields +('<identifier>': description), that can be used to define new template +descriptors by adding their identifier to the format string +(support for more data types will be added later): + + - 'd': the digest of the event (i.e. the digest of a measured file), + calculated with the SHA1 or MD5 hash algorithm; + - 'n': the name of the event (i.e. the file name), with size up to 255 bytes; + - 'd-ng': the digest of the event, calculated with an arbitrary hash + algorithm (field format: [<hash algo>:]digest, where the digest + prefix is shown only if the hash algorithm is not SHA1 or MD5); + - 'n-ng': the name of the event, without size limitations. + + +Below, there is the list of defined template descriptors: + - "ima": its format is 'd|n'; + - "ima-ng" (default): its format is 'd-ng|n-ng'. + + + +==== USE ==== + +To specify the template descriptor to be used to generate measurement entries, +currently the following methods are supported: + + - select a template descriptor among those supported in the kernel + configuration ('ima-ng' is the default choice); + - specify a template descriptor name from the kernel command line through + the 'ima_template=' parameter. diff --git a/Documentation/security/keys.txt b/Documentation/security/keys.txt index 7b4145d..a4c33f1 100644 --- a/Documentation/security/keys.txt +++ b/Documentation/security/keys.txt @@ -865,15 +865,14 @@ encountered: calling processes has a searchable link to the key from one of its keyrings. There are three functions for dealing with these: - key_ref_t make_key_ref(const struct key *key, - unsigned long possession); + key_ref_t make_key_ref(const struct key *key, bool possession); struct key *key_ref_to_ptr(const key_ref_t key_ref); - unsigned long is_key_possessed(const key_ref_t key_ref); + bool is_key_possessed(const key_ref_t key_ref); The first function constructs a key reference from a key pointer and - possession information (which must be 0 or 1 and not any other value). + possession information (which must be true or false). The second function retrieves the key pointer from a reference and the third retrieves the possession flag. @@ -961,14 +960,17 @@ payload contents" for more information. the argument will not be parsed. -(*) Extra references can be made to a key by calling the following function: +(*) Extra references can be made to a key by calling one of the following + functions: + struct key *__key_get(struct key *key); struct key *key_get(struct key *key); - These need to be disposed of by calling key_put() when they've been - finished with. The key pointer passed in will be returned. If the pointer - is NULL or CONFIG_KEYS is not set then the key will not be dereferenced and - no increment will take place. + Keys so references will need to be disposed of by calling key_put() when + they've been finished with. The key pointer passed in will be returned. + + In the case of key_get(), if the pointer is NULL or CONFIG_KEYS is not set + then the key will not be dereferenced and no increment will take place. (*) A key's serial number can be obtained by calling: diff --git a/Documentation/target/tcm_mod_builder.py b/Documentation/target/tcm_mod_builder.py index 54d29c1..230ce71 100755 --- a/Documentation/target/tcm_mod_builder.py +++ b/Documentation/target/tcm_mod_builder.py @@ -440,15 +440,15 @@ def tcm_mod_build_configfs(proto_ident, fabric_mod_dir_var, fabric_mod_name): buf += " /*\n" buf += " * Setup default attribute lists for various fabric->tf_cit_tmpl\n" buf += " */\n" - buf += " TF_CIT_TMPL(fabric)->tfc_wwn_cit.ct_attrs = " + fabric_mod_name + "_wwn_attrs;\n" - buf += " TF_CIT_TMPL(fabric)->tfc_tpg_base_cit.ct_attrs = NULL;\n" - buf += " TF_CIT_TMPL(fabric)->tfc_tpg_attrib_cit.ct_attrs = NULL;\n" - buf += " TF_CIT_TMPL(fabric)->tfc_tpg_param_cit.ct_attrs = NULL;\n" - buf += " TF_CIT_TMPL(fabric)->tfc_tpg_np_base_cit.ct_attrs = NULL;\n" - buf += " TF_CIT_TMPL(fabric)->tfc_tpg_nacl_base_cit.ct_attrs = NULL;\n" - buf += " TF_CIT_TMPL(fabric)->tfc_tpg_nacl_attrib_cit.ct_attrs = NULL;\n" - buf += " TF_CIT_TMPL(fabric)->tfc_tpg_nacl_auth_cit.ct_attrs = NULL;\n" - buf += " TF_CIT_TMPL(fabric)->tfc_tpg_nacl_param_cit.ct_attrs = NULL;\n" + buf += " fabric->tf_cit_tmpl.tfc_wwn_cit.ct_attrs = " + fabric_mod_name + "_wwn_attrs;\n" + buf += " fabric->tf_cit_tmpl.tfc_tpg_base_cit.ct_attrs = NULL;\n" + buf += " fabric->tf_cit_tmpl.tfc_tpg_attrib_cit.ct_attrs = NULL;\n" + buf += " fabric->tf_cit_tmpl.tfc_tpg_param_cit.ct_attrs = NULL;\n" + buf += " fabric->tf_cit_tmpl.tfc_tpg_np_base_cit.ct_attrs = NULL;\n" + buf += " fabric->tf_cit_tmpl.tfc_tpg_nacl_base_cit.ct_attrs = NULL;\n" + buf += " fabric->tf_cit_tmpl.tfc_tpg_nacl_attrib_cit.ct_attrs = NULL;\n" + buf += " fabric->tf_cit_tmpl.tfc_tpg_nacl_auth_cit.ct_attrs = NULL;\n" + buf += " fabric->tf_cit_tmpl.tfc_tpg_nacl_param_cit.ct_attrs = NULL;\n" buf += " /*\n" buf += " * Register the fabric for use within TCM\n" buf += " */\n" diff --git a/Documentation/vm/split_page_table_lock b/Documentation/vm/split_page_table_lock index 7521d36..6dea4fd 100644 --- a/Documentation/vm/split_page_table_lock +++ b/Documentation/vm/split_page_table_lock @@ -63,9 +63,9 @@ levels. PMD split lock enabling requires pgtable_pmd_page_ctor() call on PMD table allocation and pgtable_pmd_page_dtor() on freeing. -Allocation usually happens in pmd_alloc_one(), freeing in pmd_free(), but -make sure you cover all PMD table allocation / freeing paths: i.e X86_PAE -preallocate few PMDs on pgd_alloc(). +Allocation usually happens in pmd_alloc_one(), freeing in pmd_free() and +pmd_free_tlb(), but make sure you cover all PMD table allocation / freeing +paths: i.e X86_PAE preallocate few PMDs on pgd_alloc(). With everything in place you can set CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK. diff --git a/MAINTAINERS b/MAINTAINERS index 63f3048..8285ed4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4065,6 +4065,7 @@ F: arch/x86/include/uapi/asm/hyperv.h F: arch/x86/kernel/cpu/mshyperv.c F: drivers/hid/hid-hyperv.c F: drivers/hv/ +F: drivers/input/serio/hyperv-keyboard.c F: drivers/net/hyperv/ F: drivers/scsi/storvsc_drv.c F: drivers/video/hyperv_fb.c @@ -7515,9 +7516,10 @@ SELINUX SECURITY MODULE M: Stephen Smalley <sds@tycho.nsa.gov> M: James Morris <james.l.morris@oracle.com> M: Eric Paris <eparis@parisplace.org> +M: Paul Moore <paul@paul-moore.com> L: selinux@tycho.nsa.gov (subscribers-only, general discussion) W: http://selinuxproject.org -T: git git://git.infradead.org/users/eparis/selinux.git +T: git git://git.infradead.org/users/pcmoore/selinux S: Supported F: include/linux/selinux* F: security/selinux/ @@ -8664,6 +8666,7 @@ F: drivers/media/usb/tm6000/ TPM DEVICE DRIVER M: Leonidas Da Silva Barbosa <leosilva@linux.vnet.ibm.com> M: Ashley Lai <ashley@ashleylai.com> +M: Peter Huewe <peterhuewe@gmx.de> M: Rajiv Andrade <mail@srajiv.net> W: http://tpmdd.sourceforge.net M: Marcel Selhorst <tpmdd@selhorst.net> @@ -9522,8 +9525,8 @@ F: drivers/xen/*swiotlb* XFS FILESYSTEM P: Silicon Graphics Inc +M: Dave Chinner <dchinner@fromorbit.com> M: Ben Myers <bpm@sgi.com> -M: Alex Elder <elder@kernel.org> M: xfs@oss.sgi.com L: xfs@oss.sgi.com W: http://oss.sgi.com/projects/xfs @@ -1,7 +1,7 @@ VERSION = 3 -PATCHLEVEL = 12 +PATCHLEVEL = 13 SUBLEVEL = 0 -EXTRAVERSION = +EXTRAVERSION = -rc1 NAME = One Giant Leap for Frogkind # *DOCUMENTATION* diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index 135c674..d39dc9b 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -16,8 +16,8 @@ config ALPHA select ARCH_WANT_IPC_PARSE_VERSION select ARCH_HAVE_NMI_SAFE_CMPXCHG select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE + select GENERIC_CLOCKEVENTS select GENERIC_SMP_IDLE_THREAD - select GENERIC_CMOS_UPDATE select GENERIC_STRNCPY_FROM_USER select GENERIC_STRNLEN_USER select HAVE_MOD_ARCH_SPECIFIC @@ -488,6 +488,20 @@ config VGA_HOSE which always have multiple hoses, and whose consoles support it. +config ALPHA_QEMU + bool "Run under QEMU emulation" + depends on !ALPHA_GENERIC + ---help--- + Assume the presence of special features supported by QEMU PALcode + that reduce the overhead of system emulation. + + Generic kernels will auto-detect QEMU. But when building a + system-specific kernel, the assumption is that we want to + elimiate as many runtime tests as possible. + + If unsure, say N. + + config ALPHA_SRM bool "Use SRM as bootloader" if ALPHA_CABRIOLET || ALPHA_AVANTI_CH || ALPHA_EB64P || ALPHA_PC164 || ALPHA_TAKARA || ALPHA_EB164 || ALPHA_ALCOR || ALPHA_MIATA || ALPHA_LX164 || ALPHA_SX164 || ALPHA_NAUTILUS || ALPHA_NONAME depends on TTY @@ -572,6 +586,30 @@ config NUMA Access). This option is for configuring high-end multiprocessor server machines. If in doubt, say N. +config ALPHA_WTINT + bool "Use WTINT" if ALPHA_SRM || ALPHA_GENERIC + default y if ALPHA_QEMU + default n if ALPHA_EV5 || ALPHA_EV56 || (ALPHA_EV4 && !ALPHA_LCA) + default n if !ALPHA_SRM && !ALPHA_GENERIC + default y if SMP + ---help--- + The Wait for Interrupt (WTINT) PALcall attempts to place the CPU + to sleep until the next interrupt. This may reduce the power + consumed, and the heat produced by the computer. However, it has + the side effect of making the cycle counter unreliable as a timing + device across the sleep. + + For emulation under QEMU, definitely say Y here, as we have other + mechanisms for measuring time than the cycle counter. + + For EV4 (but not LCA), EV5 and EV56 systems, or for systems running + MILO, sleep mode is not supported so you might as well say N here. + + For SMP systems we cannot use the cycle counter for timing anyway, + so you might as well say Y here. + + If unsure, say N. + config NODES_SHIFT int default "7" @@ -613,9 +651,41 @@ config VERBOSE_MCHECK_ON Take the default (1) unless you want more control or more info. +choice + prompt "Timer interrupt frequency (HZ)?" + default HZ_128 if ALPHA_QEMU + default HZ_1200 if ALPHA_RAWHIDE + default HZ_1024 + ---help--- + The frequency at which timer interrupts occur. A high frequency + minimizes latency, whereas a low frequency minimizes overhead of + process accounting. The later effect is especially significant + when being run under QEMU. + + Note that some Alpha hardware cannot change the interrupt frequency + of the timer. If unsure, say 1024 (or 1200 for Rawhide). + + config HZ_32 + bool "32 Hz" + config HZ_64 + bool "64 Hz" + config HZ_128 + bool "128 Hz" + config HZ_256 + bool "256 Hz" + config HZ_1024 + bool "1024 Hz" + config HZ_1200 + bool "1200 Hz" +endchoice + config HZ - int - default 1200 if ALPHA_RAWHIDE + int + default 32 if HZ_32 + default 64 if HZ_64 + default 128 if HZ_128 + default 256 if HZ_256 + default 1200 if HZ_1200 default 1024 source "drivers/pci/Kconfig" diff --git a/arch/alpha/include/asm/machvec.h b/arch/alpha/include/asm/machvec.h index 72dbf23..75cb364 100644 --- a/arch/alpha/include/asm/machvec.h +++ b/arch/alpha/include/asm/machvec.h @@ -33,6 +33,7 @@ struct alpha_machine_vector int nr_irqs; int rtc_port; + int rtc_boot_cpu_only; unsigned int max_asn; unsigned long max_isa_dma_address; unsigned long irq_probe_mask; @@ -95,9 +96,6 @@ struct alpha_machine_vector struct _alpha_agp_info *(*agp_info)(void); - unsigned int (*rtc_get_time)(struct rtc_time *); - int (*rtc_set_time)(struct rtc_time *); - const char *vector_name; /* NUMA information */ @@ -126,13 +124,19 @@ extern struct alpha_machine_vector alpha_mv; #ifdef CONFIG_ALPHA_GENERIC extern int alpha_using_srm; +extern int alpha_using_qemu; #else -#ifdef CONFIG_ALPHA_SRM -#define alpha_using_srm 1 -#else -#define alpha_using_srm 0 -#endif +# ifdef CONFIG_ALPHA_SRM +# define alpha_using_srm 1 +# else +# define alpha_using_srm 0 +# endif +# ifdef CONFIG_ALPHA_QEMU +# define alpha_using_qemu 1 +# else +# define alpha_using_qemu 0 +# endif #endif /* GENERIC */ -#endif +#endif /* __KERNEL__ */ #endif /* __ALPHA_MACHVEC_H */ diff --git a/arch/alpha/include/asm/pal.h b/arch/alpha/include/asm/pal.h index 6fcd2b5..5422a47 100644 --- a/arch/alpha/include/asm/pal.h +++ b/arch/alpha/include/asm/pal.h @@ -89,6 +89,7 @@ __CALL_PAL_W1(wrmces, unsigned long); __CALL_PAL_RW2(wrperfmon, unsigned long, unsigned long, unsigned long); __CALL_PAL_W1(wrusp, unsigned long); __CALL_PAL_W1(wrvptptr, unsigned long); +__CALL_PAL_RW1(wtint, unsigned long, unsigned long); /* * TB routines.. @@ -111,5 +112,75 @@ __CALL_PAL_W1(wrvptptr, unsigned long); #define tbiap() __tbi(-1, /* no second argument */) #define tbia() __tbi(-2, /* no second argument */) +/* + * QEMU Cserv routines.. + */ + +static inline unsigned long +qemu_get_walltime(void) +{ + register unsigned long v0 __asm__("$0"); + register unsigned long a0 __asm__("$16") = 3; + + asm("call_pal %2 # cserve get_time" + : "=r"(v0), "+r"(a0) + : "i"(PAL_cserve) + : "$17", "$18", "$19", "$20", "$21"); + + return v0; +} + +static inline unsigned long +qemu_get_alarm(void) +{ + register unsigned long v0 __asm__("$0"); + register unsigned long a0 __asm__("$16") = 4; + + asm("call_pal %2 # cserve get_alarm" + : "=r"(v0), "+r"(a0) + : "i"(PAL_cserve) + : "$17", "$18", "$19", "$20", "$21"); + + return v0; +} + +static inline void +qemu_set_alarm_rel(unsigned long expire) +{ + register unsigned long a0 __asm__("$16") = 5; + register unsigned long a1 __asm__("$17") = expire; + + asm volatile("call_pal %2 # cserve set_alarm_rel" + : "+r"(a0), "+r"(a1) + : "i"(PAL_cserve) + : "$0", "$18", "$19", "$20", "$21"); +} + +static inline void +qemu_set_alarm_abs(unsigned long expire) +{ + register unsigned long a0 __asm__("$16") = 6; + register unsigned long a1 __asm__("$17") = expire; + + asm volatile("call_pal %2 # cserve set_alarm_abs" + : "+r"(a0), "+r"(a1) + : "i"(PAL_cserve) + : "$0", "$18", "$19", "$20", "$21"); +} + +static inline unsigned long +qemu_get_vmtime(void) +{ + register unsigned long v0 __asm__("$0"); + register unsigned long a0 __asm__("$16") = 7; + + asm("call_pal %2 # cserve get_time" + : "=r"(v0), "+r"(a0) + : "i"(PAL_cserve) + : "$17", "$18", "$19", "$20", "$21"); + + return v0; +} + #endif /* !__ASSEMBLY__ */ #endif /* __ALPHA_PAL_H */ diff --git a/arch/alpha/include/asm/rtc.h b/arch/alpha/include/asm/rtc.h index d70408d..f71c3b0 100644 --- a/arch/alpha/include/asm/rtc.h +++ b/arch/alpha/include/asm/rtc.h @@ -1,12 +1 @@ -#ifndef _ALPHA_RTC_H -#define _ALPHA_RTC_H - -#if defined(CONFIG_ALPHA_MARVEL) && defined(CONFIG_SMP) \ - || defined(CONFIG_ALPHA_GENERIC) -# define get_rtc_time alpha_mv.rtc_get_time -# define set_rtc_time alpha_mv.rtc_set_time -#endif - #include <asm-generic/rtc.h> - -#endif diff --git a/arch/alpha/include/asm/string.h b/arch/alpha/include/asm/string.h index b02b8a2..c2911f5 100644 --- a/arch/alpha/include/asm/string.h +++ b/arch/alpha/include/asm/string.h @@ -22,15 +22,27 @@ extern void * __memcpy(void *, const void *, size_t); #define __HAVE_ARCH_MEMSET extern void * __constant_c_memset(void *, unsigned long, size_t); +extern void * ___memset(void *, int, size_t); extern void * __memset(void *, int, size_t); extern void * memset(void *, int, size_t); -#define memset(s, c, n) \ -(__builtin_constant_p(c) \ - ? (__builtin_constant_p(n) && (c) == 0 \ - ? __builtin_memset((s),0,(n)) \ - : __constant_c_memset((s),0x0101010101010101UL*(unsigned char)(c),(n))) \ - : __memset((s),(c),(n))) +/* For gcc 3.x, we cannot have the inline function named "memset" because + the __builtin_memset will attempt to resolve to the inline as well, + leading to a "sorry" about unimplemented recursive inlining. */ +extern inline void *__memset(void *s, int c, size_t n) +{ + if (__builtin_constant_p(c)) { + if (__builtin_constant_p(n)) { + return __builtin_memset(s, c, n); + } else { + unsigned long c8 = (c & 0xff) * 0x0101010101010101UL; + return __constant_c_memset(s, c8, n); + } + } + return ___memset(s, c, n); +} + +#define memset __memset #define __HAVE_ARCH_STRCPY extern char * strcpy(char *,const char *); diff --git a/arch/alpha/include/uapi/asm/pal.h b/arch/alpha/include/uapi/asm/pal.h index 3c0ce08..dfc8140 100644 --- a/arch/alpha/include/uapi/asm/pal.h +++ b/arch/alpha/include/uapi/asm/pal.h @@ -46,6 +46,7 @@ #define PAL_rdusp 58 #define PAL_whami 60 #define PAL_retsys 61 +#define PAL_wtint 62 #define PAL_rti 63 diff --git a/arch/alpha/kernel/Makefile b/arch/alpha/kernel/Makefile index 84ec46b..0d54650 100644 --- a/arch/alpha/kernel/Makefile +++ b/arch/alpha/kernel/Makefile @@ -16,6 +16,7 @@ obj-$(CONFIG_PCI) += pci.o pci_iommu.o pci-sysfs.o obj-$(CONFIG_SRM_ENV) += srm_env.o obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_PERF_EVENTS) += perf_event.o +obj-$(CONFIG_RTC_DRV_ALPHA) += rtc.o ifdef CONFIG_ALPHA_GENERIC diff --git a/arch/alpha/kernel/alpha_ksyms.c b/arch/alpha/kernel/alpha_ksyms.c index 89566b3..f4c7ab6 100644 --- a/arch/alpha/kernel/alpha_ksyms.c +++ b/arch/alpha/kernel/alpha_ksyms.c @@ -40,6 +40,7 @@ EXPORT_SYMBOL(strrchr); EXPORT_SYMBOL(memmove); EXPORT_SYMBOL(__memcpy); EXPORT_SYMBOL(__memset); +EXPORT_SYMBOL(___memset); EXPORT_SYMBOL(__memsetw); EXPORT_SYMBOL(__constant_c_memset); EXPORT_SYMBOL(copy_page); diff --git a/arch/alpha/kernel/irq_alpha.c b/arch/alpha/kernel/irq_alpha.c index 28e4429..1c8625c 100644 --- a/arch/alpha/kernel/irq_alpha.c +++ b/arch/alpha/kernel/irq_alpha.c @@ -66,21 +66,7 @@ do_entInt(unsigned long type, unsigned long vector, break; case 1: old_regs = set_irq_regs(regs); -#ifdef CONFIG_SMP - { - long cpu; - - smp_percpu_timer_interrupt(regs); - cpu = smp_processor_id(); - if (cpu != boot_cpuid) { - kstat_incr_irqs_this_cpu(RTC_IRQ, irq_to_desc(RTC_IRQ)); - } else { - handle_irq(RTC_IRQ); - } - } -#else handle_irq(RTC_IRQ); -#endif set_irq_regs(old_regs); return; case 2: @@ -228,7 +214,7 @@ process_mcheck_info(unsigned long vector, unsigned long la_ptr, */ struct irqaction timer_irqaction = { - .handler = timer_interrupt, + .handler = rtc_timer_interrupt, .name = "timer", }; diff --git a/arch/alpha/kernel/machvec_impl.h b/arch/alpha/kernel/machvec_impl.h index 7fa6248..f54bdf6 100644 --- a/arch/alpha/kernel/machvec_impl.h +++ b/arch/alpha/kernel/machvec_impl.h @@ -43,10 +43,7 @@ #define CAT1(x,y) x##y #define CAT(x,y) CAT1(x,y) -#define DO_DEFAULT_RTC \ - .rtc_port = 0x70, \ - .rtc_get_time = common_get_rtc_time, \ - .rtc_set_time = common_set_rtc_time +#define DO_DEFAULT_RTC .rtc_port = 0x70 #define DO_EV4_MMU \ .max_asn = EV4_MAX_ASN, \ diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c index d821b17..c52e7f0 100644 --- a/arch/alpha/kernel/perf_event.c +++ b/arch/alpha/kernel/perf_event.c @@ -83,6 +83,8 @@ struct alpha_pmu_t { long pmc_left[3]; /* Subroutine for allocation of PMCs. Enforces constraints. */ int (*check_constraints)(struct perf_event **, unsigned long *, int); + /* Subroutine for checking validity of a raw event for this PMU. */ + int (*raw_event_valid)(u64 config); }; /* @@ -203,6 +205,12 @@ success: } +static int ev67_raw_event_valid(u64 config) +{ + return config >= EV67_CYCLES && config < EV67_LAST_ET; +}; + + static const struct alpha_pmu_t ev67_pmu = { .event_map = ev67_perfmon_event_map, .max_events = ARRAY_SIZE(ev67_perfmon_event_map), @@ -211,7 +219,8 @@ static const struct alpha_pmu_t ev67_pmu = { .pmc_count_mask = {EV67_PCTR_0_COUNT_MASK, EV67_PCTR_1_COUNT_MASK, 0}, .pmc_max_period = {(1UL<<20) - 1, (1UL<<20) - 1, 0}, .pmc_left = {16, 4, 0}, - .check_constraints = ev67_check_constraints + .check_constraints = ev67_check_constraints, + .raw_event_valid = ev67_raw_event_valid, }; @@ -609,7 +618,9 @@ static int __hw_perf_event_init(struct perf_event *event) } else if (attr->type == PERF_TYPE_HW_CACHE) { return -EOPNOTSUPP; } else if (attr->type == PERF_TYPE_RAW) { - ev = attr->config & 0xff; + if (!alpha_pmu->raw_event_valid(attr->config)) + return -EINVAL; + ev = attr->config; } else { return -EOPNOTSUPP; } diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c index f2360a7..1941a07 100644 --- a/arch/alpha/kernel/process.c +++ b/arch/alpha/kernel/process.c @@ -46,6 +46,23 @@ void (*pm_power_off)(void) = machine_power_off; EXPORT_SYMBOL(pm_power_off); +#ifdef CONFIG_ALPHA_WTINT +/* + * Sleep the CPU. + * EV6, LCA45 and QEMU know how to power down, skipping N timer interrupts. + */ +void arch_cpu_idle(void) +{ + wtint(0); + local_irq_enable(); +} + +void arch_cpu_idle_dead(void) +{ + wtint(INT_MAX); +} +#endif /* ALPHA_WTINT */ + struct halt_info { int mode; char *restart_cmd; diff --git a/arch/alpha/kernel/proto.h b/arch/alpha/kernel/proto.h index d3e52d3..da2d6ec 100644 --- a/arch/alpha/kernel/proto.h +++ b/arch/alpha/kernel/proto.h @@ -135,17 +135,15 @@ extern void unregister_srm_console(void); /* smp.c */ extern void setup_smp(void); extern void handle_ipi(struct pt_regs *); -extern void smp_percpu_timer_interrupt(struct pt_regs *); /* bios32.c */ /* extern void reset_for_srm(void); */ /* time.c */ -extern irqreturn_t timer_interrupt(int irq, void *dev); +extern irqreturn_t rtc_timer_interrupt(int irq, void *dev); +extern void init_clockevent(void); extern void common_init_rtc(void); extern unsigned long est_cycle_freq; -extern unsigned int common_get_rtc_time(struct rtc_time *time); -extern int common_set_rtc_time(struct rtc_time *time); /* smc37c93x.c */ extern void SMC93x_Init(void); diff --git a/arch/alpha/kernel/rtc.c b/arch/alpha/kernel/rtc.c new file mode 100644 index 0000000..c8d284d --- /dev/null +++ b/arch/alpha/kernel/rtc.c @@ -0,0 +1,323 @@ +/* + * linux/arch/alpha/kernel/rtc.c + * + * Copyright (C) 1991, 1992, 1995, 1999, 2000 Linus Torvalds + * + * This file contains date handling. + */ +#include <linux/errno.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/param.h> +#include <linux/string.h> +#include <linux/mc146818rtc.h> +#include <linux/bcd.h> +#include <linux/rtc.h> +#include <linux/platform_device.h> + +#include <asm/rtc.h> + +#include "proto.h" + + +/* + * Support for the RTC device. + * + * We don't want to use the rtc-cmos driver, because we don't want to support + * alarms, as that would be indistinguishable from timer interrupts. + * + * Further, generic code is really, really tied to a 1900 epoch. This is + * true in __get_rtc_time as well as the users of struct rtc_time e.g. + * rtc_tm_to_time. Thankfully all of the other epochs in use are later + * than 1900, and so it's easy to adjust. + */ + +static unsigned long rtc_epoch; + +static int __init +specifiy_epoch(char *str) +{ + unsigned long epoch = simple_strtoul(str, NULL, 0); + if (epoch < 1900) + printk("Ignoring invalid user specified epoch %lu\n", epoch); + else + rtc_epoch = epoch; + return 1; +} +__setup("epoch=", specifiy_epoch); + +static void __init +init_rtc_epoch(void) +{ + int epoch, year, ctrl; + + if (rtc_epoch != 0) { + /* The epoch was specified on the command-line. */ + return; + } + + /* Detect the epoch in use on this computer. */ + ctrl = CMOS_READ(RTC_CONTROL); + year = CMOS_READ(RTC_YEAR); + if (!(ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD) + year = bcd2bin(year); + + /* PC-like is standard; used for year >= 70 */ + epoch = 1900; + if (year < 20) { + epoch = 2000; + } else if (year >= 20 && year < 48) { + /* NT epoch */ + epoch = 1980; + } else if (year >= 48 && year < 70) { + /* Digital UNIX epoch */ + epoch = 1952; + } + rtc_epoch = epoch; + + printk(KERN_INFO "Using epoch %d for rtc year %d\n", epoch, year); +} + +static int +alpha_rtc_read_time(struct device *dev, struct rtc_time *tm) +{ + __get_rtc_time(tm); + + /* Adjust for non-default epochs. It's easier to depend on the + generic __get_rtc_time and adjust the epoch here than create + a copy of __get_rtc_time with the edits we need. */ + if (rtc_epoch != 1900) { + int year = tm->tm_year; + /* Undo the century adjustment made in __get_rtc_time. */ + if (year >= 100) + year -= 100; + year += rtc_epoch - 1900; + /* Redo the century adjustment with the epoch in place. */ + if (year <= 69) + year += 100; + tm->tm_year = year; + } + + return rtc_valid_tm(tm); +} + +static int +alpha_rtc_set_time(struct device *dev, struct rtc_time *tm) +{ + struct rtc_time xtm; + + if (rtc_epoch != 1900) { + xtm = *tm; + xtm.tm_year -= rtc_epoch - 1900; + tm = &xtm; + } + + return __set_rtc_time(tm); +} + +static int +alpha_rtc_set_mmss(struct device *dev, unsigned long nowtime) +{ + int retval = 0; + int real_seconds, real_minutes, cmos_minutes; + unsigned char save_control, save_freq_select; + + /* Note: This code only updates minutes and seconds. Comments + indicate this was to avoid messing with unknown time zones, + and with the epoch nonsense described above. In order for + this to work, the existing clock cannot be off by more than + 15 minutes. + + ??? This choice is may be out of date. The x86 port does + not have problems with timezones, and the epoch processing has + now been fixed in alpha_set_rtc_time. + + In either case, one can always force a full rtc update with + the userland hwclock program, so surely 15 minute accuracy + is no real burden. */ + + /* In order to set the CMOS clock precisely, we have to be called + 500 ms after the second nowtime has started, because when + nowtime is written into the registers of the CMOS clock, it will + jump to the next second precisely 500 ms later. Check the Motorola + MC146818A or Dallas DS12887 data sheet for details. */ + + /* irq are locally disabled here */ + spin_lock(&rtc_lock); + /* Tell the clock it's being set */ + save_control = CMOS_READ(RTC_CONTROL); + CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL); + + /* Stop and reset prescaler */ + save_freq_select = CMOS_READ(RTC_FREQ_SELECT); + CMOS_WRITE((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT); + + cmos_minutes = CMOS_READ(RTC_MINUTES); + if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) + cmos_minutes = bcd2bin(cmos_minutes); + + real_seconds = nowtime % 60; + real_minutes = nowtime / 60; + if (((abs(real_minutes - cmos_minutes) + 15) / 30) & 1) { + /* correct for half hour time zone */ + real_minutes += 30; + } + real_minutes %= 60; + + if (abs(real_minutes - cmos_minutes) < 30) { + if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) { + real_seconds = bin2bcd(real_seconds); + real_minutes = bin2bcd(real_minutes); + } + CMOS_WRITE(real_seconds,RTC_SECONDS); + CMOS_WRITE(real_minutes,RTC_MINUTES); + } else { + printk_once(KERN_NOTICE + "set_rtc_mmss: can't update from %d to %d\n", + cmos_minutes, real_minutes); + retval = -1; + } + + /* The following flags have to be released exactly in this order, + * otherwise the DS12887 (popular MC146818A clone with integrated + * battery and quartz) will not reset the oscillator and will not + * update precisely 500 ms later. You won't find this mentioned in + * the Dallas Semiconductor data sheets, but who believes data + * sheets anyway ... -- Markus Kuhn + */ + CMOS_WRITE(save_control, RTC_CONTROL); + CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); + spin_unlock(&rtc_lock); + + return retval; +} + +static int +alpha_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg) +{ + switch (cmd) { + case RTC_EPOCH_READ: + return put_user(rtc_epoch, (unsigned long __user *)arg); + case RTC_EPOCH_SET: + if (arg < 1900) + return -EINVAL; + rtc_epoch = arg; + return 0; + default: + return -ENOIOCTLCMD; + } +} + +static const struct rtc_class_ops alpha_rtc_ops = { + .read_time = alpha_rtc_read_time, + .set_time = alpha_rtc_set_time, + .set_mmss = alpha_rtc_set_mmss, + .ioctl = alpha_rtc_ioctl, +}; + +/* + * Similarly, except do the actual CMOS access on the boot cpu only. + * This requires marshalling the data across an interprocessor call. + */ + +#if defined(CONFIG_SMP) && \ + (defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_MARVEL)) +# define HAVE_REMOTE_RTC 1 + +union remote_data { + struct rtc_time *tm; + unsigned long now; + long retval; +}; + +static void +do_remote_read(void *data) +{ + union remote_data *x = data; + x->retval = alpha_rtc_read_time(NULL, x->tm); +} + +static int +remote_read_time(struct device *dev, struct rtc_time *tm) +{ + union remote_data x; + if (smp_processor_id() != boot_cpuid) { + x.tm = tm; + smp_call_function_single(boot_cpuid, do_remote_read, &x, 1); + return x.retval; + } + return alpha_rtc_read_time(NULL, tm); +} + +static void +do_remote_set(void *data) +{ + union remote_data *x = data; + x->retval = alpha_rtc_set_time(NULL, x->tm); +} + +static int +remote_set_time(struct device *dev, struct rtc_time *tm) +{ + union remote_data x; + if (smp_processor_id() != boot_cpuid) { + x.tm = tm; + smp_call_function_single(boot_cpuid, do_remote_set, &x, 1); + return x.retval; + } + return alpha_rtc_set_time(NULL, tm); +} + +static void +do_remote_mmss(void *data) +{ + union remote_data *x = data; + x->retval = alpha_rtc_set_mmss(NULL, x->now); +} + +static int +remote_set_mmss(struct device *dev, unsigned long now) +{ + union remote_data x; + if (smp_processor_id() != boot_cpuid) { + x.now = now; + smp_call_function_single(boot_cpuid, do_remote_mmss, &x, 1); + return x.retval; + } + return alpha_rtc_set_mmss(NULL, now); +} + +static const struct rtc_class_ops remote_rtc_ops = { + .read_time = remote_read_time, + .set_time = remote_set_time, + .set_mmss = remote_set_mmss, + .ioctl = alpha_rtc_ioctl, +}; +#endif + +static int __init +alpha_rtc_init(void) +{ + const struct rtc_class_ops *ops; + struct platform_device *pdev; + struct rtc_device *rtc; + const char *name; + + init_rtc_epoch(); + name = "rtc-alpha"; + ops = &alpha_rtc_ops; + +#ifdef HAVE_REMOTE_RTC + if (alpha_mv.rtc_boot_cpu_only) + ops = &remote_rtc_ops; +#endif + + pdev = platform_device_register_simple(name, -1, NULL, 0); + rtc = devm_rtc_device_register(&pdev->dev, name, ops, THIS_MODULE); + if (IS_ERR(rtc)) + return PTR_ERR(rtc); + + platform_set_drvdata(pdev, rtc); + return 0; +} +device_initcall(alpha_rtc_init); diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c index 9e3107cc5..b20af76 100644 --- a/arch/alpha/kernel/setup.c +++ b/arch/alpha/kernel/setup.c @@ -115,10 +115,17 @@ unsigned long alpha_agpgart_size = DEFAULT_AGP_APER_SIZE; #ifdef CONFIG_ALPHA_GENERIC struct alpha_machine_vector alpha_mv; +#endif + +#ifndef alpha_using_srm int alpha_using_srm; EXPORT_SYMBOL(alpha_using_srm); #endif +#ifndef alpha_using_qemu +int alpha_using_qemu; +#endif + static struct alpha_machine_vector *get_sysvec(unsigned long, unsigned long, unsigned long); static struct alpha_machine_vector *get_sysvec_byname(const char *); @@ -529,11 +536,15 @@ setup_arch(char **cmdline_p) atomic_notifier_chain_register(&panic_notifier_list, &alpha_panic_block); -#ifdef CONFIG_ALPHA_GENERIC +#ifndef alpha_using_srm /* Assume that we've booted from SRM if we haven't booted from MILO. Detect the later by looking for "MILO" in the system serial nr. */ alpha_using_srm = strncmp((const char *)hwrpb->ssn, "MILO", 4) != 0; #endif +#ifndef alpha_using_qemu + /* Similarly, look for QEMU. */ + alpha_using_qemu = strstr((const char *)hwrpb->ssn, "QEMU") != 0; +#endif /* If we are using SRM, we want to allow callbacks as early as possible, so do this NOW, and then @@ -1207,6 +1218,7 @@ show_cpuinfo(struct seq_file *f, void *slot) char *systype_name; char *sysvariation_name; int nr_processors; + unsigned long timer_freq; cpu_index = (unsigned) (cpu->type - 1); cpu_name = "Unknown"; @@ -1218,6 +1230,12 @@ show_cpuinfo(struct seq_file *f, void *slot) nr_processors = get_nr_processors(cpu, hwrpb->nr_processors); +#if CONFIG_HZ == 1024 || CONFIG_HZ == 1200 + timer_freq = (100UL * hwrpb->intr_freq) / 4096; +#else + timer_freq = 100UL * CONFIG_HZ; +#endif + seq_printf(f, "cpu\t\t\t: Alpha\n" "cpu model\t\t: %s\n" "cpu variation\t\t: %ld\n" @@ -1243,8 +1261,7 @@ show_cpuinfo(struct seq_file *f, void *slot) (char*)hwrpb->ssn, est_cycle_freq ? : hwrpb->cycle_freq, est_cycle_freq ? "est." : "", - hwrpb->intr_freq / 4096, - (100 * hwrpb->intr_freq / 4096) % 100, + timer_freq / 100, timer_freq % 100, hwrpb->pagesize, hwrpb->pa_bits, hwrpb->max_asn, diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c index 9dbbcb3..99ac36d 100644 --- a/arch/alpha/kernel/smp.c +++ b/arch/alpha/kernel/smp.c @@ -138,9 +138,11 @@ smp_callin(void) /* Get our local ticker going. */ smp_setup_percpu_timer(cpuid); + init_clockevent(); /* Call platform-specific callin, if specified */ - if (alpha_mv.smp_callin) alpha_mv.smp_callin(); + if (alpha_mv.smp_callin) + alpha_mv.smp_callin(); /* All kernel threads share the same mm context. */ atomic_inc(&init_mm.mm_count); @@ -498,35 +500,6 @@ smp_cpus_done(unsigned int max_cpus) ((bogosum + 2500) / (5000/HZ)) % 100); } - -void -smp_percpu_timer_interrupt(struct pt_regs *regs) -{ - struct pt_regs *old_regs; - int cpu = smp_processor_id(); - unsigned long user = user_mode(regs); - struct cpuinfo_alpha *data = &cpu_data[cpu]; - - old_regs = set_irq_regs(regs); - - /* Record kernel PC. */ - profile_tick(CPU_PROFILING); - - if (!--data->prof_counter) { - /* We need to make like a normal interrupt -- otherwise - timer interrupts ignore the global interrupt lock, - which would be a Bad Thing. */ - irq_enter(); - - update_process_times(user); - - data->prof_counter = data->prof_multiplier; - - irq_exit(); - } - set_irq_regs(old_regs); -} - int setup_profiling_timer(unsigned int multiplier) { diff --git a/arch/alpha/kernel/sys_jensen.c b/arch/alpha/kernel/sys_jensen.c index 5a0af11..608f2a7 100644 --- a/arch/alpha/kernel/sys_jensen.c +++ b/arch/alpha/kernel/sys_jensen.c @@ -224,8 +224,6 @@ struct alpha_machine_vector jensen_mv __initmv = { .machine_check = jensen_machine_check, .max_isa_dma_address = ALPHA_MAX_ISA_DMA_ADDRESS, .rtc_port = 0x170, - .rtc_get_time = common_get_rtc_time, - .rtc_set_time = common_set_rtc_time, .nr_irqs = 16, .device_interrupt = jensen_device_interrupt, diff --git a/arch/alpha/kernel/sys_marvel.c b/arch/alpha/kernel/sys_marvel.c index c92e389..f21d61f 100644 --- a/arch/alpha/kernel/sys_marvel.c +++ b/arch/alpha/kernel/sys_marvel.c @@ -22,7 +22,6 @@ #include <asm/hwrpb.h> #include <asm/tlbflush.h> #include <asm/vga.h> -#include <asm/rtc.h> #include "proto.h" #include "err_impl.h" @@ -400,57 +399,6 @@ marvel_init_rtc(void) init_rtc_irq(); } -struct marvel_rtc_time { - struct rtc_time *time; - int retval; -}; - -#ifdef CONFIG_SMP -static void -smp_get_rtc_time(void *data) -{ - struct marvel_rtc_time *mrt = data; - mrt->retval = __get_rtc_time(mrt->time); -} - -static void -smp_set_rtc_time(void *data) -{ - struct marvel_rtc_time *mrt = data; - mrt->retval = __set_rtc_time(mrt->time); -} -#endif - -static unsigned int -marvel_get_rtc_time(struct rtc_time *time) -{ -#ifdef CONFIG_SMP - struct marvel_rtc_time mrt; - - if (smp_processor_id() != boot_cpuid) { - mrt.time = time; - smp_call_function_single(boot_cpuid, smp_get_rtc_time, &mrt, 1); - return mrt.retval; - } -#endif - return __get_rtc_time(time); -} - -static int -marvel_set_rtc_time(struct rtc_time *time) -{ -#ifdef CONFIG_SMP - struct marvel_rtc_time mrt; - - if (smp_processor_id() != boot_cpuid) { - mrt.time = time; - smp_call_function_single(boot_cpuid, smp_set_rtc_time, &mrt, 1); - return mrt.retval; - } -#endif - return __set_rtc_time(time); -} - static void marvel_smp_callin(void) { @@ -492,8 +440,7 @@ struct alpha_machine_vector marvel_ev7_mv __initmv = { .vector_name = "MARVEL/EV7", DO_EV7_MMU, .rtc_port = 0x70, - .rtc_get_time = marvel_get_rtc_time, - .rtc_set_time = marvel_set_rtc_time, + .rtc_boot_cpu_only = 1, DO_MARVEL_IO, .machine_check = marvel_machine_check, .max_isa_dma_address = ALPHA_MAX_ISA_DMA_ADDRESS, diff --git a/arch/alpha/kernel/time.c b/arch/alpha/kernel/time.c index ea33950..ee39cee 100644 --- a/arch/alpha/kernel/time.c +++ b/arch/alpha/kernel/time.c @@ -3,13 +3,7 @@ * * Copyright (C) 1991, 1992, 1995, 1999, 2000 Linus Torvalds * - * This file contains the PC-specific time handling details: - * reading the RTC at bootup, etc.. - * 1994-07-02 Alan Modra - * fixed set_rtc_mmss, fixed time.year for >= 2000, new mktime - * 1995-03-26 Markus Kuhn - * fixed 500 ms bug at call to set_rtc_mmss, fixed DS12887 - * precision CMOS clock update + * This file contains the clocksource time handling. * 1997-09-10 Updated NTP code according to technical memorandum Jan '96 * "A Kernel Model for Precision Timekeeping" by Dave Mills * 1997-01-09 Adrian Sun @@ -21,9 +15,6 @@ * 1999-04-16 Thorsten Kranzkowski (dl8bcu@gmx.net) * fixed algorithm in do_gettimeofday() for calculating the precise time * from processor cycle counter (now taking lost_ticks into account) - * 2000-08-13 Jan-Benedict Glaw <jbglaw@lug-owl.de> - * Fixed time_init to be aware of epoches != 1900. This prevents - * booting up in 2048 for me;) Code is stolen from rtc.c. * 2003-06-03 R. Scott Bailey <scott.bailey@eds.com> * Tighten sanity in time_init from 1% (10,000 PPM) to 250 PPM */ @@ -46,40 +37,19 @@ #include <asm/uaccess.h> #include <asm/io.h> #include <asm/hwrpb.h> -#include <asm/rtc.h> #include <linux/mc146818rtc.h> #include <linux/time.h> #include <linux/timex.h> #include <linux/clocksource.h> +#include <linux/clockchips.h> #include "proto.h" #include "irq_impl.h" -static int set_rtc_mmss(unsigned long); - DEFINE_SPINLOCK(rtc_lock); EXPORT_SYMBOL(rtc_lock); -#define TICK_SIZE (tick_nsec / 1000) - -/* - * Shift amount by which scaled_ticks_per_cycle is scaled. Shifting - * by 48 gives us 16 bits for HZ while keeping the accuracy good even - * for large CPU clock rates. - */ -#define FIX_SHIFT 48 - -/* lump static variables together for more efficient access: */ -static struct { - /* cycle counter last time it got invoked */ - __u32 last_time; - /* ticks/cycle * 2^48 */ - unsigned long scaled_ticks_per_cycle; - /* partial unused tick */ - unsigned long partial_tick; -} state; - unsigned long est_cycle_freq; #ifdef CONFIG_IRQ_WORK @@ -108,109 +78,156 @@ static inline __u32 rpcc(void) return __builtin_alpha_rpcc(); } -int update_persistent_clock(struct timespec now) -{ - return set_rtc_mmss(now.tv_sec); -} -void read_persistent_clock(struct timespec *ts) + +/* + * The RTC as a clock_event_device primitive. + */ + +static DEFINE_PER_CPU(struct clock_event_device, cpu_ce); + +irqreturn_t +rtc_timer_interrupt(int irq, void *dev) { - unsigned int year, mon, day, hour, min, sec, epoch; - - sec = CMOS_READ(RTC_SECONDS); - min = CMOS_READ(RTC_MINUTES); - hour = CMOS_READ(RTC_HOURS); - day = CMOS_READ(RTC_DAY_OF_MONTH); - mon = CMOS_READ(RTC_MONTH); - year = CMOS_READ(RTC_YEAR); - - if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD) { - sec = bcd2bin(sec); - min = bcd2bin(min); - hour = bcd2bin(hour); - day = bcd2bin(day); - mon = bcd2bin(mon); - year = bcd2bin(year); - } + int cpu = smp_processor_id(); + struct clock_event_device *ce = &per_cpu(cpu_ce, cpu); - /* PC-like is standard; used for year >= 70 */ - epoch = 1900; - if (year < 20) - epoch = 2000; - else if (year >= 20 && year < 48) - /* NT epoch */ - epoch = 1980; - else if (year >= 48 && year < 70) - /* Digital UNIX epoch */ - epoch = 1952; + /* Don't run the hook for UNUSED or SHUTDOWN. */ + if (likely(ce->mode == CLOCK_EVT_MODE_PERIODIC)) + ce->event_handler(ce); - printk(KERN_INFO "Using epoch = %d\n", epoch); + if (test_irq_work_pending()) { + clear_irq_work_pending(); + irq_work_run(); + } - if ((year += epoch) < 1970) - year += 100; + return IRQ_HANDLED; +} - ts->tv_sec = mktime(year, mon, day, hour, min, sec); - ts->tv_nsec = 0; +static void +rtc_ce_set_mode(enum clock_event_mode mode, struct clock_event_device *ce) +{ + /* The mode member of CE is updated in generic code. + Since we only support periodic events, nothing to do. */ +} + +static int +rtc_ce_set_next_event(unsigned long evt, struct clock_event_device *ce) +{ + /* This hook is for oneshot mode, which we don't support. */ + return -EINVAL; } +static void __init +init_rtc_clockevent(void) +{ + int cpu = smp_processor_id(); + struct clock_event_device *ce = &per_cpu(cpu_ce, cpu); + + *ce = (struct clock_event_device){ + .name = "rtc", + .features = CLOCK_EVT_FEAT_PERIODIC, + .rating = 100, + .cpumask = cpumask_of(cpu), + .set_mode = rtc_ce_set_mode, + .set_next_event = rtc_ce_set_next_event, + }; + clockevents_config_and_register(ce, CONFIG_HZ, 0, 0); +} + /* - * timer_interrupt() needs to keep up the real-time clock, - * as well as call the "xtime_update()" routine every clocktick + * The QEMU clock as a clocksource primitive. */ -irqreturn_t timer_interrupt(int irq, void *dev) + +static cycle_t +qemu_cs_read(struct clocksource *cs) { - unsigned long delta; - __u32 now; - long nticks; + return qemu_get_vmtime(); +} -#ifndef CONFIG_SMP - /* Not SMP, do kernel PC profiling here. */ - profile_tick(CPU_PROFILING); -#endif +static struct clocksource qemu_cs = { + .name = "qemu", + .rating = 400, + .read = qemu_cs_read, + .mask = CLOCKSOURCE_MASK(64), + .flags = CLOCK_SOURCE_IS_CONTINUOUS, + .max_idle_ns = LONG_MAX +}; - /* - * Calculate how many ticks have passed since the last update, - * including any previous partial leftover. Save any resulting - * fraction for the next pass. - */ - now = rpcc(); - delta = now - state.last_time; - state.last_time = now; - delta = delta * state.scaled_ticks_per_cycle + state.partial_tick; - state.partial_tick = delta & ((1UL << FIX_SHIFT) - 1); - nticks = delta >> FIX_SHIFT; - if (nticks) - xtime_update(nticks); +/* + * The QEMU alarm as a clock_event_device primitive. + */ - if (test_irq_work_pending()) { - clear_irq_work_pending(); - irq_work_run(); - } +static void +qemu_ce_set_mode(enum clock_event_mode mode, struct clock_event_device *ce) +{ + /* The mode member of CE is updated for us in generic code. + Just make sure that the event is disabled. */ + qemu_set_alarm_abs(0); +} -#ifndef CONFIG_SMP - while (nticks--) - update_process_times(user_mode(get_irq_regs())); -#endif +static int +qemu_ce_set_next_event(unsigned long evt, struct clock_event_device *ce) +{ + qemu_set_alarm_rel(evt); + return 0; +} +static irqreturn_t +qemu_timer_interrupt(int irq, void *dev) +{ + int cpu = smp_processor_id(); + struct clock_event_device *ce = &per_cpu(cpu_ce, cpu); + + ce->event_handler(ce); return IRQ_HANDLED; } +static void __init +init_qemu_clockevent(void) +{ + int cpu = smp_processor_id(); + struct clock_event_device *ce = &per_cpu(cpu_ce, cpu); + + *ce = (struct clock_event_device){ + .name = "qemu", + .features = CLOCK_EVT_FEAT_ONESHOT, + .rating = 400, + .cpumask = cpumask_of(cpu), + .set_mode = qemu_ce_set_mode, + .set_next_event = qemu_ce_set_next_event, + }; + + clockevents_config_and_register(ce, NSEC_PER_SEC, 1000, LONG_MAX); +} + + void __init common_init_rtc(void) { - unsigned char x; + unsigned char x, sel = 0; /* Reset periodic interrupt frequency. */ - x = CMOS_READ(RTC_FREQ_SELECT) & 0x3f; - /* Test includes known working values on various platforms - where 0x26 is wrong; we refuse to change those. */ - if (x != 0x26 && x != 0x25 && x != 0x19 && x != 0x06) { - printk("Setting RTC_FREQ to 1024 Hz (%x)\n", x); - CMOS_WRITE(0x26, RTC_FREQ_SELECT); +#if CONFIG_HZ == 1024 || CONFIG_HZ == 1200 + x = CMOS_READ(RTC_FREQ_SELECT) & 0x3f; + /* Test includes known working values on various platforms + where 0x26 is wrong; we refuse to change those. */ + if (x != 0x26 && x != 0x25 && x != 0x19 && x != 0x06) { + sel = RTC_REF_CLCK_32KHZ + 6; } +#elif CONFIG_HZ == 256 || CONFIG_HZ == 128 || CONFIG_HZ == 64 || CONFIG_HZ == 32 + sel = RTC_REF_CLCK_32KHZ + __builtin_ffs(32768 / CONFIG_HZ); +#else +# error "Unknown HZ from arch/alpha/Kconfig" +#endif + if (sel) { + printk(KERN_INFO "Setting RTC_FREQ to %d Hz (%x)\n", + CONFIG_HZ, sel); + CMOS_WRITE(sel, RTC_FREQ_SELECT); + } /* Turn on periodic interrupts. */ x = CMOS_READ(RTC_CONTROL); @@ -233,16 +250,37 @@ common_init_rtc(void) init_rtc_irq(); } -unsigned int common_get_rtc_time(struct rtc_time *time) -{ - return __get_rtc_time(time); -} + +#ifndef CONFIG_ALPHA_WTINT +/* + * The RPCC as a clocksource primitive. + * + * While we have free-running timecounters running on all CPUs, and we make + * a half-hearted attempt in init_rtc_rpcc_info to sync the timecounter + * with the wall clock, that initialization isn't kept up-to-date across + * different time counters in SMP mode. Therefore we can only use this + * method when there's only one CPU enabled. + * + * When using the WTINT PALcall, the RPCC may shift to a lower frequency, + * or stop altogether, while waiting for the interrupt. Therefore we cannot + * use this method when WTINT is in use. + */ -int common_set_rtc_time(struct rtc_time *time) +static cycle_t read_rpcc(struct clocksource *cs) { - return __set_rtc_time(time); + return rpcc(); } +static struct clocksource clocksource_rpcc = { + .name = "rpcc", + .rating = 300, + .read = read_rpcc, + .mask = CLOCKSOURCE_MASK(32), + .flags = CLOCK_SOURCE_IS_CONTINUOUS +}; +#endif /* ALPHA_WTINT */ + + /* Validate a computed cycle counter result against the known bounds for the given processor core. There's too much brokenness in the way of timing hardware for any one method to work everywhere. :-( @@ -353,33 +391,6 @@ rpcc_after_update_in_progress(void) return rpcc(); } -#ifndef CONFIG_SMP -/* Until and unless we figure out how to get cpu cycle counters - in sync and keep them there, we can't use the rpcc. */ -static cycle_t read_rpcc(struct clocksource *cs) -{ - cycle_t ret = (cycle_t)rpcc(); - return ret; -} - -static struct clocksource clocksource_rpcc = { - .name = "rpcc", - .rating = 300, - .read = read_rpcc, - .mask = CLOCKSOURCE_MASK(32), - .flags = CLOCK_SOURCE_IS_CONTINUOUS -}; - -static inline void register_rpcc_clocksource(long cycle_freq) -{ - clocksource_register_hz(&clocksource_rpcc, cycle_freq); -} -#else /* !CONFIG_SMP */ -static inline void register_rpcc_clocksource(long cycle_freq) -{ -} -#endif /* !CONFIG_SMP */ - void __init time_init(void) { @@ -387,6 +398,15 @@ time_init(void) unsigned long cycle_freq, tolerance; long diff; + if (alpha_using_qemu) { + clocksource_register_hz(&qemu_cs, NSEC_PER_SEC); + init_qemu_clockevent(); + + timer_irqaction.handler = qemu_timer_interrupt; + init_rtc_irq(); + return; + } + /* Calibrate CPU clock -- attempt #1. */ if (!est_cycle_freq) est_cycle_freq = validate_cc_value(calibrate_cc_with_pit()); @@ -421,100 +441,25 @@ time_init(void) "and unable to estimate a proper value!\n"); } - /* From John Bowman <bowman@math.ualberta.ca>: allow the values - to settle, as the Update-In-Progress bit going low isn't good - enough on some hardware. 2ms is our guess; we haven't found - bogomips yet, but this is close on a 500Mhz box. */ - __delay(1000000); - - - if (HZ > (1<<16)) { - extern void __you_loose (void); - __you_loose(); - } - - register_rpcc_clocksource(cycle_freq); - - state.last_time = cc1; - state.scaled_ticks_per_cycle - = ((unsigned long) HZ << FIX_SHIFT) / cycle_freq; - state.partial_tick = 0L; + /* See above for restrictions on using clocksource_rpcc. */ +#ifndef CONFIG_ALPHA_WTINT + if (hwrpb->nr_processors == 1) + clocksource_register_hz(&clocksource_rpcc, cycle_freq); +#endif /* Startup the timer source. */ alpha_mv.init_rtc(); + init_rtc_clockevent(); } -/* - * In order to set the CMOS clock precisely, set_rtc_mmss has to be - * called 500 ms after the second nowtime has started, because when - * nowtime is written into the registers of the CMOS clock, it will - * jump to the next second precisely 500 ms later. Check the Motorola - * MC146818A or Dallas DS12887 data sheet for details. - * - * BUG: This routine does not handle hour overflow properly; it just - * sets the minutes. Usually you won't notice until after reboot! - */ - - -static int -set_rtc_mmss(unsigned long nowtime) +/* Initialize the clock_event_device for secondary cpus. */ +#ifdef CONFIG_SMP +void __init +init_clockevent(void) { - int retval = 0; - int real_seconds, real_minutes, cmos_minutes; - unsigned char save_control, save_freq_select; - - /* irq are locally disabled here */ - spin_lock(&rtc_lock); - /* Tell the clock it's being set */ - save_control = CMOS_READ(RTC_CONTROL); - CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL); - - /* Stop and reset prescaler */ - save_freq_select = CMOS_READ(RTC_FREQ_SELECT); - CMOS_WRITE((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT); - - cmos_minutes = CMOS_READ(RTC_MINUTES); - if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) - cmos_minutes = bcd2bin(cmos_minutes); - - /* - * since we're only adjusting minutes and seconds, - * don't interfere with hour overflow. This avoids - * messing with unknown time zones but requires your - * RTC not to be off by more than 15 minutes - */ - real_seconds = nowtime % 60; - real_minutes = nowtime / 60; - if (((abs(real_minutes - cmos_minutes) + 15)/30) & 1) { - /* correct for half hour time zone */ - real_minutes += 30; - } - real_minutes %= 60; - - if (abs(real_minutes - cmos_minutes) < 30) { - if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) { - real_seconds = bin2bcd(real_seconds); - real_minutes = bin2bcd(real_minutes); - } - CMOS_WRITE(real_seconds,RTC_SECONDS); - CMOS_WRITE(real_minutes,RTC_MINUTES); - } else { - printk_once(KERN_NOTICE - "set_rtc_mmss: can't update from %d to %d\n", - cmos_minutes, real_minutes); - retval = -1; - } - - /* The following flags have to be released exactly in this order, - * otherwise the DS12887 (popular MC146818A clone with integrated - * battery and quartz) will not reset the oscillator and will not - * update precisely 500 ms later. You won't find this mentioned in - * the Dallas Semiconductor data sheets, but who believes data - * sheets anyway ... -- Markus Kuhn - */ - CMOS_WRITE(save_control, RTC_CONTROL); - CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); - spin_unlock(&rtc_lock); - - return retval; + if (alpha_using_qemu) + init_qemu_clockevent(); + else + init_rtc_clockevent(); } +#endif diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c index bd0665c..9c4c189 100644 --- a/arch/alpha/kernel/traps.c +++ b/arch/alpha/kernel/traps.c @@ -241,6 +241,21 @@ do_entIF(unsigned long type, struct pt_regs *regs) (const char *)(data[1] | (long)data[2] << 32), data[0]); } +#ifdef CONFIG_ALPHA_WTINT + if (type == 4) { + /* If CALL_PAL WTINT is totally unsupported by the + PALcode, e.g. MILO, "emulate" it by overwriting + the insn. */ + unsigned int *pinsn + = (unsigned int *) regs->pc - 1; + if (*pinsn == PAL_wtint) { + *pinsn = 0x47e01400; /* mov 0,$0 */ + imb(); + regs->r0 = 0; + return; + } + } +#endif /* ALPHA_WTINT */ die_if_kernel((type == 1 ? "Kernel Bug" : "Instruction fault"), regs, type, NULL); } diff --git a/arch/alpha/lib/csum_partial_copy.c b/arch/alpha/lib/csum_partial_copy.c index ffb19b7..ff3c107 100644 --- a/arch/alpha/lib/csum_partial_copy.c +++ b/arch/alpha/lib/csum_partial_copy.c @@ -130,7 +130,7 @@ csum_partial_cfu_aligned(const unsigned long __user *src, unsigned long *dst, *dst = word | tmp; checksum += carry; } - if (err) *errp = err; + if (err && errp) *errp = err; return checksum; } @@ -185,7 +185,7 @@ csum_partial_cfu_dest_aligned(const unsigned long __user *src, *dst = word | tmp; checksum += carry; } - if (err) *errp = err; + if (err && errp) *errp = err; return checksum; } @@ -242,7 +242,7 @@ csum_partial_cfu_src_aligned(const unsigned long __user *src, stq_u(partial_dest | second_dest, dst); out: checksum += carry; - if (err) *errp = err; + if (err && errp) *errp = err; return checksum; } @@ -325,7 +325,7 @@ csum_partial_cfu_unaligned(const unsigned long __user * src, stq_u(partial_dest | word | second_dest, dst); checksum += carry; } - if (err) *errp = err; + if (err && errp) *errp = err; return checksum; } @@ -339,7 +339,7 @@ csum_partial_copy_from_user(const void __user *src, void *dst, int len, if (len) { if (!access_ok(VERIFY_READ, src, len)) { - *errp = -EFAULT; + if (errp) *errp = -EFAULT; memset(dst, 0, len); return sum; } diff --git a/arch/alpha/lib/ev6-memset.S b/arch/alpha/lib/ev6-memset.S index d8b94e1..356bb2f 100644 --- a/arch/alpha/lib/ev6-memset.S +++ b/arch/alpha/lib/ev6-memset.S @@ -30,14 +30,15 @@ .set noat .set noreorder .text + .globl memset .globl __memset + .globl ___memset .globl __memsetw .globl __constant_c_memset - .globl memset - .ent __memset + .ent ___memset .align 5 -__memset: +___memset: .frame $30,0,$26,0 .prologue 0 @@ -227,7 +228,7 @@ end_b: nop nop ret $31,($26),1 # L0 : - .end __memset + .end ___memset /* * This is the original body of code, prior to replication and @@ -594,4 +595,5 @@ end_w: .end __memsetw -memset = __memset +memset = ___memset +__memset = ___memset diff --git a/arch/alpha/lib/memset.S b/arch/alpha/lib/memset.S index 311b8cf..76ccc6d 100644 --- a/arch/alpha/lib/memset.S +++ b/arch/alpha/lib/memset.S @@ -19,11 +19,13 @@ .text .globl memset .globl __memset + .globl ___memset .globl __memsetw .globl __constant_c_memset - .ent __memset + + .ent ___memset .align 5 -__memset: +___memset: .frame $30,0,$26,0 .prologue 0 @@ -103,7 +105,7 @@ within_one_quad: end: ret $31,($26),1 /* E1 */ - .end __memset + .end ___memset .align 5 .ent __memsetw @@ -121,4 +123,5 @@ __memsetw: .end __memsetw -memset = __memset +memset = ___memset +__memset = ___memset diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 214b698..c1f1a7e 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -25,7 +25,7 @@ config ARM select HARDIRQS_SW_RESEND select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL select HAVE_ARCH_KGDB - select HAVE_ARCH_SECCOMP_FILTER + select HAVE_ARCH_SECCOMP_FILTER if (AEABI && !OABI_COMPAT) select HAVE_ARCH_TRACEHOOK select HAVE_BPF_JIT select HAVE_CONTEXT_TRACKING @@ -1496,6 +1496,7 @@ config HAVE_ARM_ARCH_TIMER bool "Architected timer support" depends on CPU_V7 select ARM_ARCH_TIMER + select GENERIC_CLOCKEVENTS help This option enables support for the ARM architected timer @@ -1719,7 +1720,6 @@ config AEABI config OABI_COMPAT bool "Allow old ABI binaries to run with this kernel (EXPERIMENTAL)" depends on AEABI && !THUMB2_KERNEL - default y help This option preserves the old syscall interface along with the new (ARM EABI) one. It also provides a compatibility layer to @@ -1727,11 +1727,16 @@ config OABI_COMPAT in memory differs between the legacy ABI and the new ARM EABI (only for non "thumb" binaries). This option adds a tiny overhead to all syscalls and produces a slightly larger kernel. + + The seccomp filter system will not be available when this is + selected, since there is no way yet to sensibly distinguish + between calling conventions during filtering. + If you know you'll be using only pure EABI user space then you can say N here. If this option is not selected and you attempt to execute a legacy ABI binary then the result will be UNPREDICTABLE (in fact it can be predicted that it won't work - at all). If in doubt say Y. + at all). If in doubt say N. config ARCH_HAS_HOLES_MEMORYMODEL bool diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h index 4dd2145..9ecccc8 100644 --- a/arch/arm/include/asm/memory.h +++ b/arch/arm/include/asm/memory.h @@ -226,7 +226,14 @@ static inline phys_addr_t __virt_to_phys(unsigned long x) static inline unsigned long __phys_to_virt(phys_addr_t x) { unsigned long t; - __pv_stub(x, t, "sub", __PV_BITS_31_24); + + /* + * 'unsigned long' cast discard upper word when + * phys_addr_t is 64 bit, and makes sure that inline + * assembler expression receives 32 bit argument + * in place where 'r' 32 bit operand is expected. + */ + __pv_stub((unsigned long) x, t, "sub", __PV_BITS_31_24); return t; } diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index 7801866..11d59b3 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -508,6 +508,7 @@ __fixup_smp: teq r0, #0x0 @ '0' on actual UP A9 hardware beq __fixup_smp_on_up @ So its an A9 UP ldr r0, [r0, #4] @ read SCU Config +ARM_BE8(rev r0, r0) @ byteswap if big endian and r0, r0, #0x3 @ number of CPUs teq r0, #0x0 @ is 1? movne pc, lr @@ -644,7 +645,11 @@ ARM_BE8(rev16 ip, ip) bcc 1b bx lr #else +#ifdef CONFIG_CPU_ENDIAN_BE8 + moveq r0, #0x00004000 @ set bit 22, mov to mvn instruction +#else moveq r0, #0x400000 @ set bit 22, mov to mvn instruction +#endif b 2f 1: ldr ip, [r7, r3] #ifdef CONFIG_CPU_ENDIAN_BE8 @@ -653,7 +658,7 @@ ARM_BE8(rev16 ip, ip) tst ip, #0x000f0000 @ check the rotation field orrne ip, ip, r6, lsl #24 @ mask in offset bits 31-24 biceq ip, ip, #0x00004000 @ clear bit 22 - orreq ip, ip, r0, lsl #24 @ mask in offset bits 7-0 + orreq ip, ip, r0 @ mask in offset bits 7-0 #else bic ip, ip, #0x000000ff tst ip, #0xf00 @ check the rotation field diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 6125f25..dbf0923 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -856,7 +856,7 @@ static void __init kuser_init(void *vectors) memcpy(vectors + 0xfe0, vectors + 0xfe8, 4); } #else -static void __init kuser_init(void *vectors) +static inline void __init kuser_init(void *vectors) { } #endif diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 3719583..5809069 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -334,6 +334,17 @@ out: return err; } +static phys_addr_t kvm_kaddr_to_phys(void *kaddr) +{ + if (!is_vmalloc_addr(kaddr)) { + BUG_ON(!virt_addr_valid(kaddr)); + return __pa(kaddr); + } else { + return page_to_phys(vmalloc_to_page(kaddr)) + + offset_in_page(kaddr); + } +} + /** * create_hyp_mappings - duplicate a kernel virtual address range in Hyp mode * @from: The virtual kernel start address of the range @@ -345,16 +356,27 @@ out: */ int create_hyp_mappings(void *from, void *to) { - unsigned long phys_addr = virt_to_phys(from); + phys_addr_t phys_addr; + unsigned long virt_addr; unsigned long start = KERN_TO_HYP((unsigned long)from); unsigned long end = KERN_TO_HYP((unsigned long)to); - /* Check for a valid kernel memory mapping */ - if (!virt_addr_valid(from) || !virt_addr_valid(to - 1)) - return -EINVAL; + start = start & PAGE_MASK; + end = PAGE_ALIGN(end); - return __create_hyp_mappings(hyp_pgd, start, end, - __phys_to_pfn(phys_addr), PAGE_HYP); + for (virt_addr = start; virt_addr < end; virt_addr += PAGE_SIZE) { + int err; + + phys_addr = kvm_kaddr_to_phys(from + virt_addr - start); + err = __create_hyp_mappings(hyp_pgd, virt_addr, + virt_addr + PAGE_SIZE, + __phys_to_pfn(phys_addr), + PAGE_HYP); + if (err) + return err; + } + + return 0; } /** diff --git a/arch/arm/lib/bitops.h b/arch/arm/lib/bitops.h index e0c68d5..52886b8 100644 --- a/arch/arm/lib/bitops.h +++ b/arch/arm/lib/bitops.h @@ -10,7 +10,7 @@ UNWIND( .fnstart ) and r3, r0, #31 @ Get bit offset mov r0, r0, lsr #5 add r1, r1, r0, lsl #2 @ Get word offset -#if __LINUX_ARM_ARCH__ >= 7 +#if __LINUX_ARM_ARCH__ >= 7 && defined(CONFIG_SMP) .arch_extension mp ALT_SMP(W(pldw) [r1]) ALT_UP(W(nop)) diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 78eeeca..580ef2d 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -558,8 +558,8 @@ static void __init build_mem_type_table(void) mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_WB; break; } - printk("Memory policy: ECC %sabled, Data cache %s\n", - ecc_mask ? "en" : "dis", cp->policy); + pr_info("Memory policy: %sData cache %s\n", + ecc_mask ? "ECC enabled, " : "", cp->policy); for (i = 0; i < ARRAY_SIZE(mem_types); i++) { struct mem_type *t = &mem_types[i]; diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c index 5c668b7..55764a7e 100644 --- a/arch/arm/mm/nommu.c +++ b/arch/arm/mm/nommu.c @@ -18,6 +18,7 @@ #include <asm/mach/arch.h> #include <asm/cputype.h> #include <asm/mpu.h> +#include <asm/procinfo.h> #include "mm.h" diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index 60920f6..bd17819 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -92,7 +92,7 @@ ENDPROC(cpu_v7_dcache_clean_area) /* Suspend/resume support: derived from arch/arm/mach-s5pv210/sleep.S */ .globl cpu_v7_suspend_size -.equ cpu_v7_suspend_size, 4 * 8 +.equ cpu_v7_suspend_size, 4 * 9 #ifdef CONFIG_ARM_CPU_SUSPEND ENTRY(cpu_v7_do_suspend) stmfd sp!, {r4 - r10, lr} @@ -101,13 +101,17 @@ ENTRY(cpu_v7_do_suspend) stmia r0!, {r4 - r5} #ifdef CONFIG_MMU mrc p15, 0, r6, c3, c0, 0 @ Domain ID +#ifdef CONFIG_ARM_LPAE + mrrc p15, 1, r5, r7, c2 @ TTB 1 +#else mrc p15, 0, r7, c2, c0, 1 @ TTB 1 +#endif mrc p15, 0, r11, c2, c0, 2 @ TTB control register #endif mrc p15, 0, r8, c1, c0, 0 @ Control register mrc p15, 0, r9, c1, c0, 1 @ Auxiliary control register mrc p15, 0, r10, c1, c0, 2 @ Co-processor access control - stmia r0, {r6 - r11} + stmia r0, {r5 - r11} ldmfd sp!, {r4 - r10, pc} ENDPROC(cpu_v7_do_suspend) @@ -118,16 +122,19 @@ ENTRY(cpu_v7_do_resume) ldmia r0!, {r4 - r5} mcr p15, 0, r4, c13, c0, 0 @ FCSE/PID mcr p15, 0, r5, c13, c0, 3 @ User r/o thread ID - ldmia r0, {r6 - r11} + ldmia r0, {r5 - r11} #ifdef CONFIG_MMU mcr p15, 0, ip, c8, c7, 0 @ invalidate TLBs mcr p15, 0, r6, c3, c0, 0 @ Domain ID -#ifndef CONFIG_ARM_LPAE +#ifdef CONFIG_ARM_LPAE + mcrr p15, 0, r1, ip, c2 @ TTB 0 + mcrr p15, 1, r5, r7, c2 @ TTB 1 +#else ALT_SMP(orr r1, r1, #TTB_FLAGS_SMP) ALT_UP(orr r1, r1, #TTB_FLAGS_UP) -#endif mcr p15, 0, r1, c2, c0, 0 @ TTB 0 mcr p15, 0, r7, c2, c0, 1 @ TTB 1 +#endif mcr p15, 0, r11, c2, c0, 2 @ TTB control register ldr r4, =PRRR @ PRRR ldr r5, =NMRR @ NMRR diff --git a/arch/avr32/boot/u-boot/head.S b/arch/avr32/boot/u-boot/head.S index 4488fa2..2ffc298 100644 --- a/arch/avr32/boot/u-boot/head.S +++ b/arch/avr32/boot/u-boot/head.S @@ -8,6 +8,8 @@ * published by the Free Software Foundation. */ #include <asm/setup.h> +#include <asm/thread_info.h> +#include <asm/sysreg.h> /* * The kernel is loaded where we want it to be and all caches @@ -20,11 +22,6 @@ .section .init.text,"ax" .global _start _start: - /* Check if the boot loader actually provided a tag table */ - lddpc r0, magic_number - cp.w r12, r0 - brne no_tag_table - /* Initialize .bss */ lddpc r2, bss_start_addr lddpc r3, end_addr @@ -34,6 +31,25 @@ _start: cp r2, r3 brlo 1b + /* Initialize status register */ + lddpc r0, init_sr + mtsr SYSREG_SR, r0 + + /* Set initial stack pointer */ + lddpc sp, stack_addr + sub sp, -THREAD_SIZE + +#ifdef CONFIG_FRAME_POINTER + /* Mark last stack frame */ + mov lr, 0 + mov r7, 0 +#endif + + /* Check if the boot loader actually provided a tag table */ + lddpc r0, magic_number + cp.w r12, r0 + brne no_tag_table + /* * Save the tag table address for later use. This must be done * _after_ .bss has been initialized... @@ -53,8 +69,15 @@ bss_start_addr: .long __bss_start end_addr: .long _end +init_sr: + .long 0x007f0000 /* Supervisor mode, everything masked */ +stack_addr: + .long init_thread_union +panic_addr: + .long panic no_tag_table: sub r12, pc, (. - 2f) - bral panic + /* branch to panic() which can be far away with that construct */ + lddpc pc, panic_addr 2: .asciz "Boot loader didn't provide correct magic number\n" diff --git a/arch/avr32/include/asm/kprobes.h b/arch/avr32/include/asm/kprobes.h index 996cb65..45f563e 100644 --- a/arch/avr32/include/asm/kprobes.h +++ b/arch/avr32/include/asm/kprobes.h @@ -16,6 +16,7 @@ typedef u16 kprobe_opcode_t; #define BREAKPOINT_INSTRUCTION 0xd673 /* breakpoint */ #define MAX_INSN_SIZE 2 +#define MAX_STACK_SIZE 64 /* 32 would probably be OK */ #define kretprobe_blacklist_size 0 @@ -26,6 +27,19 @@ struct arch_specific_insn { kprobe_opcode_t insn[MAX_INSN_SIZE]; }; +struct prev_kprobe { + struct kprobe *kp; + unsigned int status; +}; + +/* per-cpu kprobe control block */ +struct kprobe_ctlblk { + unsigned int kprobe_status; + struct prev_kprobe prev_kprobe; + struct pt_regs jprobe_saved_regs; + char jprobes_stack[MAX_STACK_SIZE]; +}; + extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr); extern int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data); diff --git a/arch/avr32/include/uapi/asm/Kbuild b/arch/avr32/include/uapi/asm/Kbuild index 3b85ead..08d8a3d 100644 --- a/arch/avr32/include/uapi/asm/Kbuild +++ b/arch/avr32/include/uapi/asm/Kbuild @@ -2,35 +2,35 @@ include include/uapi/asm-generic/Kbuild.asm header-y += auxvec.h -header-y += bitsperlong.h header-y += byteorder.h header-y += cachectl.h -header-y += errno.h -header-y += fcntl.h -header-y += ioctl.h -header-y += ioctls.h -header-y += ipcbuf.h -header-y += kvm_para.h -header-y += mman.h header-y += msgbuf.h header-y += param.h -header-y += poll.h header-y += posix_types.h header-y += ptrace.h -header-y += resource.h header-y += sembuf.h header-y += setup.h header-y += shmbuf.h header-y += sigcontext.h -header-y += siginfo.h header-y += signal.h header-y += socket.h header-y += sockios.h header-y += stat.h -header-y += statfs.h header-y += swab.h header-y += termbits.h header-y += termios.h header-y += types.h header-y += unistd.h +generic-y += bitsperlong.h +generic-y += errno.h +generic-y += fcntl.h +generic-y += ioctl.h +generic-y += ioctls.h +generic-y += ipcbuf.h +generic-y += kvm_para.h +generic-y += mman.h generic-y += param.h +generic-y += poll.h +generic-y += resource.h +generic-y += siginfo.h +generic-y += statfs.h diff --git a/arch/avr32/include/uapi/asm/auxvec.h b/arch/avr32/include/uapi/asm/auxvec.h index d5dd435..4f02da3 100644 --- a/arch/avr32/include/uapi/asm/auxvec.h +++ b/arch/avr32/include/uapi/asm/auxvec.h @@ -1,4 +1,4 @@ -#ifndef __ASM_AVR32_AUXVEC_H -#define __ASM_AVR32_AUXVEC_H +#ifndef _UAPI__ASM_AVR32_AUXVEC_H +#define _UAPI__ASM_AVR32_AUXVEC_H -#endif /* __ASM_AVR32_AUXVEC_H */ +#endif /* _UAPI__ASM_AVR32_AUXVEC_H */ diff --git a/arch/avr32/include/uapi/asm/bitsperlong.h b/arch/avr32/include/uapi/asm/bitsperlong.h deleted file mode 100644 index 6dc0bb0..0000000 --- a/arch/avr32/include/uapi/asm/bitsperlong.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/bitsperlong.h> diff --git a/arch/avr32/include/uapi/asm/byteorder.h b/arch/avr32/include/uapi/asm/byteorder.h index 50abc21..71242f0 100644 --- a/arch/avr32/include/uapi/asm/byteorder.h +++ b/arch/avr32/include/uapi/asm/byteorder.h @@ -1,9 +1,9 @@ /* * AVR32 endian-conversion functions. */ -#ifndef __ASM_AVR32_BYTEORDER_H -#define __ASM_AVR32_BYTEORDER_H +#ifndef _UAPI__ASM_AVR32_BYTEORDER_H +#define _UAPI__ASM_AVR32_BYTEORDER_H #include <linux/byteorder/big_endian.h> -#endif /* __ASM_AVR32_BYTEORDER_H */ +#endif /* _UAPI__ASM_AVR32_BYTEORDER_H */ diff --git a/arch/avr32/include/uapi/asm/cachectl.h b/arch/avr32/include/uapi/asm/cachectl.h index 4faf1ce..573a958 100644 --- a/arch/avr32/include/uapi/asm/cachectl.h +++ b/arch/avr32/include/uapi/asm/cachectl.h @@ -1,5 +1,5 @@ -#ifndef __ASM_AVR32_CACHECTL_H -#define __ASM_AVR32_CACHECTL_H +#ifndef _UAPI__ASM_AVR32_CACHECTL_H +#define _UAPI__ASM_AVR32_CACHECTL_H /* * Operations that can be performed through the cacheflush system call @@ -8,4 +8,4 @@ /* Clean the data cache, then invalidate the icache */ #define CACHE_IFLUSH 0 -#endif /* __ASM_AVR32_CACHECTL_H */ +#endif /* _UAPI__ASM_AVR32_CACHECTL_H */ diff --git a/arch/avr32/include/uapi/asm/errno.h b/arch/avr32/include/uapi/asm/errno.h deleted file mode 100644 index 558a724..0000000 --- a/arch/avr32/include/uapi/asm/errno.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __ASM_AVR32_ERRNO_H -#define __ASM_AVR32_ERRNO_H - -#include <asm-generic/errno.h> - -#endif /* __ASM_AVR32_ERRNO_H */ diff --git a/arch/avr32/include/uapi/asm/fcntl.h b/arch/avr32/include/uapi/asm/fcntl.h deleted file mode 100644 index 14c0c44..0000000 --- a/arch/avr32/include/uapi/asm/fcntl.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __ASM_AVR32_FCNTL_H -#define __ASM_AVR32_FCNTL_H - -#include <asm-generic/fcntl.h> - -#endif /* __ASM_AVR32_FCNTL_H */ diff --git a/arch/avr32/include/uapi/asm/ioctl.h b/arch/avr32/include/uapi/asm/ioctl.h deleted file mode 100644 index c8472c1..0000000 --- a/arch/avr32/include/uapi/asm/ioctl.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __ASM_AVR32_IOCTL_H -#define __ASM_AVR32_IOCTL_H - -#include <asm-generic/ioctl.h> - -#endif /* __ASM_AVR32_IOCTL_H */ diff --git a/arch/avr32/include/uapi/asm/ioctls.h b/arch/avr32/include/uapi/asm/ioctls.h deleted file mode 100644 index 909cf66..0000000 --- a/arch/avr32/include/uapi/asm/ioctls.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __ASM_AVR32_IOCTLS_H -#define __ASM_AVR32_IOCTLS_H - -#include <asm-generic/ioctls.h> - -#endif /* __ASM_AVR32_IOCTLS_H */ diff --git a/arch/avr32/include/uapi/asm/ipcbuf.h b/arch/avr32/include/uapi/asm/ipcbuf.h deleted file mode 100644 index 84c7e51..0000000 --- a/arch/avr32/include/uapi/asm/ipcbuf.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/ipcbuf.h> diff --git a/arch/avr32/include/uapi/asm/kvm_para.h b/arch/avr32/include/uapi/asm/kvm_para.h deleted file mode 100644 index 14fab8f..0000000 --- a/arch/avr32/include/uapi/asm/kvm_para.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/kvm_para.h> diff --git a/arch/avr32/include/uapi/asm/mman.h b/arch/avr32/include/uapi/asm/mman.h deleted file mode 100644 index 8eebf89..0000000 --- a/arch/avr32/include/uapi/asm/mman.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/mman.h> diff --git a/arch/avr32/include/uapi/asm/msgbuf.h b/arch/avr32/include/uapi/asm/msgbuf.h index ac18bc4..9eae6ef 100644 --- a/arch/avr32/include/uapi/asm/msgbuf.h +++ b/arch/avr32/include/uapi/asm/msgbuf.h @@ -1,5 +1,5 @@ -#ifndef __ASM_AVR32_MSGBUF_H -#define __ASM_AVR32_MSGBUF_H +#ifndef _UAPI__ASM_AVR32_MSGBUF_H +#define _UAPI__ASM_AVR32_MSGBUF_H /* * The msqid64_ds structure for i386 architecture. @@ -28,4 +28,4 @@ struct msqid64_ds { unsigned long __unused5; }; -#endif /* __ASM_AVR32_MSGBUF_H */ +#endif /* _UAPI__ASM_AVR32_MSGBUF_H */ diff --git a/arch/avr32/include/uapi/asm/poll.h b/arch/avr32/include/uapi/asm/poll.h deleted file mode 100644 index c98509d..0000000 --- a/arch/avr32/include/uapi/asm/poll.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/poll.h> diff --git a/arch/avr32/include/uapi/asm/posix_types.h b/arch/avr32/include/uapi/asm/posix_types.h index 9ba9e74..5b813a8 100644 --- a/arch/avr32/include/uapi/asm/posix_types.h +++ b/arch/avr32/include/uapi/asm/posix_types.h @@ -5,8 +5,8 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ -#ifndef __ASM_AVR32_POSIX_TYPES_H -#define __ASM_AVR32_POSIX_TYPES_H +#ifndef _UAPI__ASM_AVR32_POSIX_TYPES_H +#define _UAPI__ASM_AVR32_POSIX_TYPES_H /* * This file is generally used by user-level software, so you need to @@ -34,4 +34,4 @@ typedef unsigned short __kernel_old_dev_t; #include <asm-generic/posix_types.h> -#endif /* __ASM_AVR32_POSIX_TYPES_H */ +#endif /* _UAPI__ASM_AVR32_POSIX_TYPES_H */ diff --git a/arch/avr32/include/uapi/asm/resource.h b/arch/avr32/include/uapi/asm/resource.h deleted file mode 100644 index c6dd101..0000000 --- a/arch/avr32/include/uapi/asm/resource.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __ASM_AVR32_RESOURCE_H -#define __ASM_AVR32_RESOURCE_H - -#include <asm-generic/resource.h> - -#endif /* __ASM_AVR32_RESOURCE_H */ diff --git a/arch/avr32/include/uapi/asm/sembuf.h b/arch/avr32/include/uapi/asm/sembuf.h index e472216..6c6f7cf 100644 --- a/arch/avr32/include/uapi/asm/sembuf.h +++ b/arch/avr32/include/uapi/asm/sembuf.h @@ -1,5 +1,5 @@ -#ifndef __ASM_AVR32_SEMBUF_H -#define __ASM_AVR32_SEMBUF_H +#ifndef _UAPI__ASM_AVR32_SEMBUF_H +#define _UAPI__ASM_AVR32_SEMBUF_H /* * The semid64_ds structure for AVR32 architecture. @@ -22,4 +22,4 @@ struct semid64_ds { unsigned long __unused4; }; -#endif /* __ASM_AVR32_SEMBUF_H */ +#endif /* _UAPI__ASM_AVR32_SEMBUF_H */ diff --git a/arch/avr32/include/uapi/asm/setup.h b/arch/avr32/include/uapi/asm/setup.h index e58aa93..a654df7 100644 --- a/arch/avr32/include/uapi/asm/setup.h +++ b/arch/avr32/include/uapi/asm/setup.h @@ -13,5 +13,4 @@ #define COMMAND_LINE_SIZE 256 - #endif /* _UAPI__ASM_AVR32_SETUP_H__ */ diff --git a/arch/avr32/include/uapi/asm/shmbuf.h b/arch/avr32/include/uapi/asm/shmbuf.h index c62fba4..b94cf8b 100644 --- a/arch/avr32/include/uapi/asm/shmbuf.h +++ b/arch/avr32/include/uapi/asm/shmbuf.h @@ -1,5 +1,5 @@ -#ifndef __ASM_AVR32_SHMBUF_H -#define __ASM_AVR32_SHMBUF_H +#ifndef _UAPI__ASM_AVR32_SHMBUF_H +#define _UAPI__ASM_AVR32_SHMBUF_H /* * The shmid64_ds structure for i386 architecture. @@ -39,4 +39,4 @@ struct shminfo64 { unsigned long __unused4; }; -#endif /* __ASM_AVR32_SHMBUF_H */ +#endif /* _UAPI__ASM_AVR32_SHMBUF_H */ diff --git a/arch/avr32/include/uapi/asm/sigcontext.h b/arch/avr32/include/uapi/asm/sigcontext.h index e04062b..27e56bf 100644 --- a/arch/avr32/include/uapi/asm/sigcontext.h +++ b/arch/avr32/include/uapi/asm/sigcontext.h @@ -5,8 +5,8 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ -#ifndef __ASM_AVR32_SIGCONTEXT_H -#define __ASM_AVR32_SIGCONTEXT_H +#ifndef _UAPI__ASM_AVR32_SIGCONTEXT_H +#define _UAPI__ASM_AVR32_SIGCONTEXT_H struct sigcontext { unsigned long oldmask; @@ -31,4 +31,4 @@ struct sigcontext { unsigned long r0; }; -#endif /* __ASM_AVR32_SIGCONTEXT_H */ +#endif /* _UAPI__ASM_AVR32_SIGCONTEXT_H */ diff --git a/arch/avr32/include/uapi/asm/siginfo.h b/arch/avr32/include/uapi/asm/siginfo.h deleted file mode 100644 index 5ee93f4..0000000 --- a/arch/avr32/include/uapi/asm/siginfo.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _AVR32_SIGINFO_H -#define _AVR32_SIGINFO_H - -#include <asm-generic/siginfo.h> - -#endif diff --git a/arch/avr32/include/uapi/asm/signal.h b/arch/avr32/include/uapi/asm/signal.h index 1b77a93..ffe8c77 100644 --- a/arch/avr32/include/uapi/asm/signal.h +++ b/arch/avr32/include/uapi/asm/signal.h @@ -118,5 +118,4 @@ typedef struct sigaltstack { size_t ss_size; } stack_t; - #endif /* _UAPI__ASM_AVR32_SIGNAL_H */ diff --git a/arch/avr32/include/uapi/asm/socket.h b/arch/avr32/include/uapi/asm/socket.h index 4399364..cbf902e 100644 --- a/arch/avr32/include/uapi/asm/socket.h +++ b/arch/avr32/include/uapi/asm/socket.h @@ -1,5 +1,5 @@ -#ifndef __ASM_AVR32_SOCKET_H -#define __ASM_AVR32_SOCKET_H +#ifndef _UAPI__ASM_AVR32_SOCKET_H +#define _UAPI__ASM_AVR32_SOCKET_H #include <asm/sockios.h> @@ -78,4 +78,4 @@ #define SO_MAX_PACING_RATE 47 -#endif /* __ASM_AVR32_SOCKET_H */ +#endif /* _UAPI__ASM_AVR32_SOCKET_H */ diff --git a/arch/avr32/include/uapi/asm/sockios.h b/arch/avr32/include/uapi/asm/sockios.h index 0802d74..d04785453 100644 --- a/arch/avr32/include/uapi/asm/sockios.h +++ b/arch/avr32/include/uapi/asm/sockios.h @@ -1,5 +1,5 @@ -#ifndef __ASM_AVR32_SOCKIOS_H -#define __ASM_AVR32_SOCKIOS_H +#ifndef _UAPI__ASM_AVR32_SOCKIOS_H +#define _UAPI__ASM_AVR32_SOCKIOS_H /* Socket-level I/O control calls. */ #define FIOSETOWN 0x8901 @@ -10,4 +10,4 @@ #define SIOCGSTAMP 0x8906 /* Get stamp (timeval) */ #define SIOCGSTAMPNS 0x8907 /* Get stamp (timespec) */ -#endif /* __ASM_AVR32_SOCKIOS_H */ +#endif /* _UAPI__ASM_AVR32_SOCKIOS_H */ diff --git a/arch/avr32/include/uapi/asm/stat.h b/arch/avr32/include/uapi/asm/stat.h index e72881e..c06acef 100644 --- a/arch/avr32/include/uapi/asm/stat.h +++ b/arch/avr32/include/uapi/asm/stat.h @@ -5,8 +5,8 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ -#ifndef __ASM_AVR32_STAT_H -#define __ASM_AVR32_STAT_H +#ifndef _UAPI__ASM_AVR32_STAT_H +#define _UAPI__ASM_AVR32_STAT_H struct __old_kernel_stat { unsigned short st_dev; @@ -76,4 +76,4 @@ struct stat64 { unsigned long __unused2; }; -#endif /* __ASM_AVR32_STAT_H */ +#endif /* _UAPI__ASM_AVR32_STAT_H */ diff --git a/arch/avr32/include/uapi/asm/statfs.h b/arch/avr32/include/uapi/asm/statfs.h deleted file mode 100644 index 2961bd1..0000000 --- a/arch/avr32/include/uapi/asm/statfs.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __ASM_AVR32_STATFS_H -#define __ASM_AVR32_STATFS_H - -#include <asm-generic/statfs.h> - -#endif /* __ASM_AVR32_STATFS_H */ diff --git a/arch/avr32/include/uapi/asm/swab.h b/arch/avr32/include/uapi/asm/swab.h index 14cc737..1a03549 100644 --- a/arch/avr32/include/uapi/asm/swab.h +++ b/arch/avr32/include/uapi/asm/swab.h @@ -1,8 +1,8 @@ /* * AVR32 byteswapping functions. */ -#ifndef __ASM_AVR32_SWAB_H -#define __ASM_AVR32_SWAB_H +#ifndef _UAPI__ASM_AVR32_SWAB_H +#define _UAPI__ASM_AVR32_SWAB_H #include <linux/types.h> #include <linux/compiler.h> @@ -32,4 +32,4 @@ static inline __attribute_const__ __u32 __arch_swab32(__u32 val) #define __arch_swab32 __arch_swab32 #endif -#endif /* __ASM_AVR32_SWAB_H */ +#endif /* _UAPI__ASM_AVR32_SWAB_H */ diff --git a/arch/avr32/include/uapi/asm/termbits.h b/arch/avr32/include/uapi/asm/termbits.h index 366adc5..32789cc 100644 --- a/arch/avr32/include/uapi/asm/termbits.h +++ b/arch/avr32/include/uapi/asm/termbits.h @@ -1,5 +1,5 @@ -#ifndef __ASM_AVR32_TERMBITS_H -#define __ASM_AVR32_TERMBITS_H +#ifndef _UAPI__ASM_AVR32_TERMBITS_H +#define _UAPI__ASM_AVR32_TERMBITS_H #include <linux/posix_types.h> @@ -193,4 +193,4 @@ struct ktermios { #define TCSADRAIN 1 #define TCSAFLUSH 2 -#endif /* __ASM_AVR32_TERMBITS_H */ +#endif /* _UAPI__ASM_AVR32_TERMBITS_H */ diff --git a/arch/avr32/include/uapi/asm/termios.h b/arch/avr32/include/uapi/asm/termios.h index b8ef8ea..c8a0081 100644 --- a/arch/avr32/include/uapi/asm/termios.h +++ b/arch/avr32/include/uapi/asm/termios.h @@ -46,5 +46,4 @@ struct termio { /* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */ - #endif /* _UAPI__ASM_AVR32_TERMIOS_H */ diff --git a/arch/avr32/include/uapi/asm/types.h b/arch/avr32/include/uapi/asm/types.h index bb34ad3..7c986c4 100644 --- a/arch/avr32/include/uapi/asm/types.h +++ b/arch/avr32/include/uapi/asm/types.h @@ -5,4 +5,9 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ +#ifndef _UAPI__ASM_AVR32_TYPES_H +#define _UAPI__ASM_AVR32_TYPES_H + #include <asm-generic/int-ll64.h> + +#endif /* _UAPI__ASM_AVR32_TYPES_H */ diff --git a/arch/avr32/include/uapi/asm/unistd.h b/arch/avr32/include/uapi/asm/unistd.h index 3eaa687..8822bf4 100644 --- a/arch/avr32/include/uapi/asm/unistd.h +++ b/arch/avr32/include/uapi/asm/unistd.h @@ -301,5 +301,4 @@ #define __NR_eventfd 281 #define __NR_setns 283 - #endif /* _UAPI__ASM_AVR32_UNISTD_H */ diff --git a/arch/avr32/kernel/entry-avr32b.S b/arch/avr32/kernel/entry-avr32b.S index 9899d3c..7301f48 100644 --- a/arch/avr32/kernel/entry-avr32b.S +++ b/arch/avr32/kernel/entry-avr32b.S @@ -401,9 +401,10 @@ handle_critical: /* We should never get here... */ bad_return: sub r12, pc, (. - 1f) - bral panic + lddpc pc, 2f .align 2 1: .asciz "Return from critical exception!" +2: .long panic .align 1 do_bus_error_write: diff --git a/arch/avr32/kernel/head.S b/arch/avr32/kernel/head.S index 6163bd0..59eae6d 100644 --- a/arch/avr32/kernel/head.S +++ b/arch/avr32/kernel/head.S @@ -10,33 +10,13 @@ #include <linux/linkage.h> #include <asm/page.h> -#include <asm/thread_info.h> -#include <asm/sysreg.h> .section .init.text,"ax" .global kernel_entry kernel_entry: - /* Initialize status register */ - lddpc r0, init_sr - mtsr SYSREG_SR, r0 - - /* Set initial stack pointer */ - lddpc sp, stack_addr - sub sp, -THREAD_SIZE - -#ifdef CONFIG_FRAME_POINTER - /* Mark last stack frame */ - mov lr, 0 - mov r7, 0 -#endif - /* Start the show */ lddpc pc, kernel_start_addr .align 2 -init_sr: - .long 0x007f0000 /* Supervisor mode, everything masked */ -stack_addr: - .long init_thread_union kernel_start_addr: .long start_kernel diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index 5a9ff1c..cb59277 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -2166,12 +2166,6 @@ static const struct file_operations pfm_file_ops = { .flush = pfm_flush }; -static int -pfmfs_delete_dentry(const struct dentry *dentry) -{ - return 1; -} - static char *pfmfs_dname(struct dentry *dentry, char *buffer, int buflen) { return dynamic_dname(dentry, buffer, buflen, "pfm:[%lu]", @@ -2179,7 +2173,7 @@ static char *pfmfs_dname(struct dentry *dentry, char *buffer, int buflen) } static const struct dentry_operations pfmfs_dentry_operations = { - .d_delete = pfmfs_delete_dentry, + .d_delete = always_delete_dentry, .d_dname = pfmfs_dname, }; diff --git a/arch/parisc/include/asm/socket.h b/arch/parisc/include/asm/socket.h new file mode 100644 index 0000000..748016c --- /dev/null +++ b/arch/parisc/include/asm/socket.h @@ -0,0 +1,11 @@ +#ifndef _ASM_SOCKET_H +#define _ASM_SOCKET_H + +#include <uapi/asm/socket.h> + +/* O_NONBLOCK clashes with the bits used for socket types. Therefore we + * have to define SOCK_NONBLOCK to a different value here. + */ +#define SOCK_NONBLOCK 0x40000000 + +#endif /* _ASM_SOCKET_H */ diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h index 63f4dd0..4006964 100644 --- a/arch/parisc/include/asm/uaccess.h +++ b/arch/parisc/include/asm/uaccess.h @@ -4,14 +4,11 @@ /* * User space memory access functions */ -#include <asm/processor.h> #include <asm/page.h> #include <asm/cache.h> #include <asm/errno.h> #include <asm-generic/uaccess-unaligned.h> -#include <linux/sched.h> - #define VERIFY_READ 0 #define VERIFY_WRITE 1 @@ -36,43 +33,12 @@ extern int __get_user_bad(void); extern int __put_kernel_bad(void); extern int __put_user_bad(void); - -/* - * Test whether a block of memory is a valid user space address. - * Returns 0 if the range is valid, nonzero otherwise. - */ -static inline int __range_not_ok(unsigned long addr, unsigned long size, - unsigned long limit) +static inline long access_ok(int type, const void __user * addr, + unsigned long size) { - unsigned long __newaddr = addr + size; - return (__newaddr < addr || __newaddr > limit || size > limit); + return 1; } -/** - * access_ok: - Checks if a user space pointer is valid - * @type: Type of access: %VERIFY_READ or %VERIFY_WRITE. Note that - * %VERIFY_WRITE is a superset of %VERIFY_READ - if it is safe - * to write to a block, it is always safe to read from it. - * @addr: User space pointer to start of block to check - * @size: Size of block to check - * - * Context: User context only. This function may sleep. - * - * Checks if a pointer to a block of memory in user space is valid. - * - * Returns true (nonzero) if the memory block may be valid, false (zero) - * if it is definitely invalid. - * - * Note that, depending on architecture, this function probably just - * checks that the pointer is in the user space range - after calling - * this function, memory access functions may still return -EFAULT. - */ -#define access_ok(type, addr, size) \ -( __chk_user_ptr(addr), \ - !__range_not_ok((unsigned long) (__force void *) (addr), \ - size, user_addr_max()) \ -) - #define put_user __put_user #define get_user __get_user @@ -253,11 +219,7 @@ extern long lstrnlen_user(const char __user *,long); /* * Complex access routines -- macros */ -#ifdef CONFIG_COMPAT -#define user_addr_max() (TASK_SIZE) -#else -#define user_addr_max() (DEFAULT_TASK_SIZE) -#endif +#define user_addr_max() (~0UL) #define strnlen_user lstrnlen_user #define strlen_user(str) lstrnlen_user(str, 0x7fffffffL) diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h index 7c614d0..f33113a 100644 --- a/arch/parisc/include/uapi/asm/socket.h +++ b/arch/parisc/include/uapi/asm/socket.h @@ -1,5 +1,5 @@ -#ifndef _ASM_SOCKET_H -#define _ASM_SOCKET_H +#ifndef _UAPI_ASM_SOCKET_H +#define _UAPI_ASM_SOCKET_H #include <asm/sockios.h> @@ -77,9 +77,4 @@ #define SO_MAX_PACING_RATE 0x4048 -/* O_NONBLOCK clashes with the bits used for socket types. Therefore we - * have to define SOCK_NONBLOCK to a different value here. - */ -#define SOCK_NONBLOCK 0x40000000 - -#endif /* _ASM_SOCKET_H */ +#endif /* _UAPI_ASM_SOCKET_H */ diff --git a/arch/parisc/lib/memcpy.c b/arch/parisc/lib/memcpy.c index b5507ec..413dc17 100644 --- a/arch/parisc/lib/memcpy.c +++ b/arch/parisc/lib/memcpy.c @@ -161,7 +161,7 @@ static inline void prefetch_dst(const void *addr) /* Copy from a not-aligned src to an aligned dst, using shifts. Handles 4 words * per loop. This code is derived from glibc. */ -static inline unsigned long copy_dstaligned(unsigned long dst, +static noinline unsigned long copy_dstaligned(unsigned long dst, unsigned long src, unsigned long len) { /* gcc complains that a2 and a3 may be uninitialized, but actually @@ -276,7 +276,7 @@ handle_store_error: /* Returns PA_MEMCPY_OK, PA_MEMCPY_LOAD_ERROR or PA_MEMCPY_STORE_ERROR. * In case of an access fault the faulty address can be read from the per_cpu * exception data struct. */ -static unsigned long pa_memcpy_internal(void *dstp, const void *srcp, +static noinline unsigned long pa_memcpy_internal(void *dstp, const void *srcp, unsigned long len) { register unsigned long src, dst, t1, t2, t3; @@ -529,7 +529,7 @@ long probe_kernel_read(void *dst, const void *src, size_t size) { unsigned long addr = (unsigned long)src; - if (size < 0 || addr < PAGE_SIZE) + if (addr < PAGE_SIZE) return -EFAULT; /* check for I/O space F_EXTEND(0xfff00000) access as well? */ diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c index 7584a5d..9d08c71 100644 --- a/arch/parisc/mm/fault.c +++ b/arch/parisc/mm/fault.c @@ -282,16 +282,34 @@ bad_area: #endif switch (code) { case 15: /* Data TLB miss fault/Data page fault */ + /* send SIGSEGV when outside of vma */ + if (!vma || + address < vma->vm_start || address > vma->vm_end) { + si.si_signo = SIGSEGV; + si.si_code = SEGV_MAPERR; + break; + } + + /* send SIGSEGV for wrong permissions */ + if ((vma->vm_flags & acc_type) != acc_type) { + si.si_signo = SIGSEGV; + si.si_code = SEGV_ACCERR; + break; + } + + /* probably address is outside of mapped file */ + /* fall through */ case 17: /* NA data TLB miss / page fault */ case 18: /* Unaligned access - PCXS only */ si.si_signo = SIGBUS; - si.si_code = BUS_ADRERR; + si.si_code = (code == 18) ? BUS_ADRALN : BUS_ADRERR; break; case 16: /* Non-access instruction TLB miss fault */ case 26: /* PCXL: Data memory access rights trap */ default: si.si_signo = SIGSEGV; - si.si_code = SEGV_MAPERR; + si.si_code = (code == 26) ? SEGV_ACCERR : SEGV_MAPERR; + break; } si.si_errno = 0; si.si_addr = (void __user *) address; diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 607acf5..8a24636 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -111,6 +111,7 @@ endif endif CFLAGS-$(CONFIG_PPC64) := -mtraceback=no -mcall-aixdesc +CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv1) CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mcmodel=medium,-mminimal-toc) CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mno-pointers-to-nested-functions) CFLAGS-$(CONFIG_PPC32) := -ffixed-r2 $(MULTIPLEWORD) diff --git a/arch/powerpc/configs/pseries_le_defconfig b/arch/powerpc/configs/pseries_le_defconfig new file mode 100644 index 0000000..62771e0 --- /dev/null +++ b/arch/powerpc/configs/pseries_le_defconfig @@ -0,0 +1,352 @@ +CONFIG_PPC64=y +CONFIG_ALTIVEC=y +CONFIG_VSX=y +CONFIG_SMP=y +CONFIG_NR_CPUS=2048 +CONFIG_CPU_LITTLE_ENDIAN=y +CONFIG_SYSVIPC=y +CONFIG_POSIX_MQUEUE=y +CONFIG_AUDIT=y +CONFIG_AUDITSYSCALL=y +CONFIG_IRQ_DOMAIN_DEBUG=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_TASKSTATS=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_XACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_CGROUPS=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_DEVICE=y +CONFIG_CPUSETS=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_BLK_DEV_INITRD=y +# CONFIG_COMPAT_BRK is not set +CONFIG_PROFILING=y +CONFIG_OPROFILE=y +CONFIG_KPROBES=y +CONFIG_JUMP_LABEL=y +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +CONFIG_MODVERSIONS=y +CONFIG_MODULE_SRCVERSION_ALL=y +CONFIG_PARTITION_ADVANCED=y +CONFIG_PPC_SPLPAR=y +CONFIG_SCANLOG=m +CONFIG_PPC_SMLPAR=y +CONFIG_DTL=y +# CONFIG_PPC_PMAC is not set +CONFIG_RTAS_FLASH=m +CONFIG_IBMEBUS=y +CONFIG_HZ_100=y +CONFIG_BINFMT_MISC=m +CONFIG_PPC_TRANSACTIONAL_MEM=y +CONFIG_KEXEC=y +CONFIG_IRQ_ALL_CPUS=y +CONFIG_MEMORY_HOTPLUG=y +CONFIG_MEMORY_HOTREMOVE=y +CONFIG_CMA=y +CONFIG_PPC_64K_PAGES=y +CONFIG_PPC_SUBPAGE_PROT=y +CONFIG_SCHED_SMT=y +CONFIG_HOTPLUG_PCI=y +CONFIG_HOTPLUG_PCI_RPA=m +CONFIG_HOTPLUG_PCI_RPA_DLPAR=m +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_XFRM_USER=m +CONFIG_NET_KEY=m +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_NET_IPIP=y +CONFIG_SYN_COOKIES=y +CONFIG_INET_AH=m +CONFIG_INET_ESP=m +CONFIG_INET_IPCOMP=m +# CONFIG_IPV6 is not set +CONFIG_NETFILTER=y +CONFIG_NF_CONNTRACK=m +CONFIG_NF_CONNTRACK_EVENTS=y +CONFIG_NF_CT_PROTO_UDPLITE=m +CONFIG_NF_CONNTRACK_FTP=m +CONFIG_NF_CONNTRACK_IRC=m +CONFIG_NF_CONNTRACK_TFTP=m +CONFIG_NF_CT_NETLINK=m +CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m +CONFIG_NETFILTER_XT_TARGET_CONNMARK=m +CONFIG_NETFILTER_XT_TARGET_MARK=m +CONFIG_NETFILTER_XT_TARGET_NFLOG=m +CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m +CONFIG_NETFILTER_XT_TARGET_TCPMSS=m +CONFIG_NETFILTER_XT_MATCH_COMMENT=m +CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m +CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m +CONFIG_NETFILTER_XT_MATCH_CONNMARK=m +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m +CONFIG_NETFILTER_XT_MATCH_DCCP=m +CONFIG_NETFILTER_XT_MATCH_DSCP=m +CONFIG_NETFILTER_XT_MATCH_ESP=m +CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m +CONFIG_NETFILTER_XT_MATCH_HELPER=m +CONFIG_NETFILTER_XT_MATCH_IPRANGE=m +CONFIG_NETFILTER_XT_MATCH_LENGTH=m +CONFIG_NETFILTER_XT_MATCH_LIMIT=m +CONFIG_NETFILTER_XT_MATCH_MAC=m +CONFIG_NETFILTER_XT_MATCH_MARK=m +CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m +CONFIG_NETFILTER_XT_MATCH_OWNER=m +CONFIG_NETFILTER_XT_MATCH_POLICY=m +CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m +CONFIG_NETFILTER_XT_MATCH_QUOTA=m +CONFIG_NETFILTER_XT_MATCH_RATEEST=m +CONFIG_NETFILTER_XT_MATCH_REALM=m +CONFIG_NETFILTER_XT_MATCH_RECENT=m +CONFIG_NETFILTER_XT_MATCH_SCTP=m +CONFIG_NETFILTER_XT_MATCH_STATE=m +CONFIG_NETFILTER_XT_MATCH_STATISTIC=m +CONFIG_NETFILTER_XT_MATCH_STRING=m +CONFIG_NETFILTER_XT_MATCH_TCPMSS=m +CONFIG_NETFILTER_XT_MATCH_TIME=m +CONFIG_NETFILTER_XT_MATCH_U32=m +CONFIG_NF_CONNTRACK_IPV4=m +CONFIG_IP_NF_IPTABLES=m +CONFIG_IP_NF_MATCH_AH=m +CONFIG_IP_NF_MATCH_ECN=m +CONFIG_IP_NF_MATCH_TTL=m +CONFIG_IP_NF_FILTER=m +CONFIG_IP_NF_TARGET_REJECT=m +CONFIG_IP_NF_TARGET_ULOG=m +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_DEVTMPFS=y +CONFIG_DEVTMPFS_MOUNT=y +CONFIG_PROC_DEVICETREE=y +CONFIG_PARPORT=m +CONFIG_PARPORT_PC=m +CONFIG_BLK_DEV_FD=m +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_NBD=m +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=65536 +CONFIG_VIRTIO_BLK=m +CONFIG_IDE=y +CONFIG_BLK_DEV_IDECD=y +CONFIG_BLK_DEV_GENERIC=y +CONFIG_BLK_DEV_AMD74XX=y +CONFIG_BLK_DEV_SD=y +CONFIG_CHR_DEV_ST=y +CONFIG_BLK_DEV_SR=y +CONFIG_BLK_DEV_SR_VENDOR=y +CONFIG_CHR_DEV_SG=y +CONFIG_SCSI_MULTI_LUN=y +CONFIG_SCSI_CONSTANTS=y +CONFIG_SCSI_FC_ATTRS=y +CONFIG_SCSI_CXGB3_ISCSI=m +CONFIG_SCSI_CXGB4_ISCSI=m +CONFIG_SCSI_BNX2_ISCSI=m +CONFIG_BE2ISCSI=m +CONFIG_SCSI_MPT2SAS=m +CONFIG_SCSI_IBMVSCSI=y +CONFIG_SCSI_IBMVFC=m +CONFIG_SCSI_SYM53C8XX_2=y +CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0 +CONFIG_SCSI_IPR=y +CONFIG_SCSI_QLA_FC=m +CONFIG_SCSI_QLA_ISCSI=m +CONFIG_SCSI_LPFC=m +CONFIG_SCSI_VIRTIO=m +CONFIG_SCSI_DH=m +CONFIG_SCSI_DH_RDAC=m +CONFIG_SCSI_DH_ALUA=m +CONFIG_ATA=y +# CONFIG_ATA_SFF is not set +CONFIG_MD=y +CONFIG_BLK_DEV_MD=y +CONFIG_MD_LINEAR=y +CONFIG_MD_RAID0=y +CONFIG_MD_RAID1=y +CONFIG_MD_RAID10=m +CONFIG_MD_RAID456=m +CONFIG_MD_MULTIPATH=m +CONFIG_MD_FAULTY=m +CONFIG_BLK_DEV_DM=y +CONFIG_DM_CRYPT=m +CONFIG_DM_SNAPSHOT=m +CONFIG_DM_MIRROR=m +CONFIG_DM_ZERO=m +CONFIG_DM_MULTIPATH=m +CONFIG_DM_MULTIPATH_QL=m +CONFIG_DM_MULTIPATH_ST=m +CONFIG_DM_UEVENT=y +CONFIG_BONDING=m +CONFIG_DUMMY=m +CONFIG_NETCONSOLE=y +CONFIG_NETPOLL_TRAP=y +CONFIG_TUN=m +CONFIG_VIRTIO_NET=m +CONFIG_VORTEX=y +CONFIG_ACENIC=m +CONFIG_ACENIC_OMIT_TIGON_I=y +CONFIG_PCNET32=y +CONFIG_TIGON3=y +CONFIG_CHELSIO_T1=m +CONFIG_BE2NET=m +CONFIG_S2IO=m +CONFIG_IBMVETH=y +CONFIG_EHEA=y +CONFIG_E100=y +CONFIG_E1000=y +CONFIG_E1000E=y +CONFIG_IXGB=m +CONFIG_IXGBE=m +CONFIG_MLX4_EN=m +CONFIG_MYRI10GE=m +CONFIG_QLGE=m +CONFIG_NETXEN_NIC=m +CONFIG_PPP=m +CONFIG_PPP_BSDCOMP=m +CONFIG_PPP_DEFLATE=m +CONFIG_PPPOE=m +CONFIG_PPP_ASYNC=m +CONFIG_PPP_SYNC_TTY=m +# CONFIG_INPUT_MOUSEDEV_PSAUX is not set +CONFIG_INPUT_EVDEV=m +CONFIG_INPUT_MISC=y +CONFIG_INPUT_PCSPKR=m +# CONFIG_SERIO_SERPORT is not set +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_ICOM=m +CONFIG_SERIAL_JSM=m +CONFIG_HVC_CONSOLE=y +CONFIG_HVC_RTAS=y +CONFIG_HVCS=m +CONFIG_VIRTIO_CONSOLE=m +CONFIG_IBM_BSR=m +CONFIG_GEN_RTC=y +CONFIG_RAW_DRIVER=y +CONFIG_MAX_RAW_DEVS=1024 +CONFIG_FB=y +CONFIG_FIRMWARE_EDID=y +CONFIG_FB_OF=y +CONFIG_FB_MATROX=y +CONFIG_FB_MATROX_MILLENIUM=y +CONFIG_FB_MATROX_MYSTIQUE=y +CONFIG_FB_MATROX_G=y +CONFIG_FB_RADEON=y +CONFIG_FB_IBM_GXT4500=y +CONFIG_LCD_PLATFORM=m +# CONFIG_VGA_CONSOLE is not set +CONFIG_FRAMEBUFFER_CONSOLE=y +CONFIG_LOGO=y +CONFIG_HID_GYRATION=y +CONFIG_HID_PANTHERLORD=y +CONFIG_HID_PETALYNX=y +CONFIG_HID_SAMSUNG=y +CONFIG_HID_SUNPLUS=y +CONFIG_USB_HIDDEV=y +CONFIG_USB=y +CONFIG_USB_MON=m +CONFIG_USB_EHCI_HCD=y +# CONFIG_USB_EHCI_HCD_PPC_OF is not set +CONFIG_USB_OHCI_HCD=y +CONFIG_USB_STORAGE=m +CONFIG_INFINIBAND=m +CONFIG_INFINIBAND_USER_MAD=m +CONFIG_INFINIBAND_USER_ACCESS=m +CONFIG_INFINIBAND_MTHCA=m +CONFIG_INFINIBAND_EHCA=m +CONFIG_INFINIBAND_CXGB3=m +CONFIG_INFINIBAND_CXGB4=m +CONFIG_MLX4_INFINIBAND=m +CONFIG_INFINIBAND_IPOIB=m +CONFIG_INFINIBAND_IPOIB_CM=y +CONFIG_INFINIBAND_SRP=m +CONFIG_INFINIBAND_ISER=m +CONFIG_VIRTIO_PCI=m +CONFIG_VIRTIO_BALLOON=m +CONFIG_EXT2_FS=y +CONFIG_EXT2_FS_XATTR=y +CONFIG_EXT2_FS_POSIX_ACL=y +CONFIG_EXT2_FS_SECURITY=y +CONFIG_EXT2_FS_XIP=y +CONFIG_EXT3_FS=y +CONFIG_EXT3_FS_POSIX_ACL=y +CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y +CONFIG_REISERFS_FS=y +CONFIG_REISERFS_FS_XATTR=y +CONFIG_REISERFS_FS_POSIX_ACL=y +CONFIG_REISERFS_FS_SECURITY=y +CONFIG_JFS_FS=m +CONFIG_JFS_POSIX_ACL=y +CONFIG_JFS_SECURITY=y +CONFIG_XFS_FS=m +CONFIG_XFS_POSIX_ACL=y +CONFIG_BTRFS_FS=m +CONFIG_BTRFS_FS_POSIX_ACL=y +CONFIG_NILFS2_FS=m +CONFIG_AUTOFS4_FS=m +CONFIG_FUSE_FS=m +CONFIG_ISO9660_FS=y +CONFIG_UDF_FS=m +CONFIG_MSDOS_FS=y +CONFIG_VFAT_FS=y +CONFIG_PROC_KCORE=y +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_HUGETLBFS=y +CONFIG_CRAMFS=m +CONFIG_SQUASHFS=m +CONFIG_SQUASHFS_XATTR=y +CONFIG_SQUASHFS_LZO=y +CONFIG_SQUASHFS_XZ=y +CONFIG_PSTORE=y +CONFIG_NFS_FS=y +CONFIG_NFS_V3_ACL=y +CONFIG_NFS_V4=y +CONFIG_NFSD=m +CONFIG_NFSD_V3_ACL=y +CONFIG_NFSD_V4=y +CONFIG_CIFS=m +CONFIG_CIFS_XATTR=y +CONFIG_CIFS_POSIX=y +CONFIG_NLS_DEFAULT="utf8" +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_ASCII=y +CONFIG_NLS_ISO8859_1=y +CONFIG_NLS_UTF8=y +CONFIG_CRC_T10DIF=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_KERNEL=y +CONFIG_DEBUG_STACK_USAGE=y +CONFIG_DEBUG_STACKOVERFLOW=y +CONFIG_LOCKUP_DETECTOR=y +CONFIG_LATENCYTOP=y +CONFIG_SCHED_TRACER=y +CONFIG_BLK_DEV_IO_TRACE=y +CONFIG_CODE_PATCHING_SELFTEST=y +CONFIG_FTR_FIXUP_SELFTEST=y +CONFIG_MSI_BITMAP_SELFTEST=y +CONFIG_XMON=y +CONFIG_CRYPTO_TEST=m +CONFIG_CRYPTO_PCBC=m +CONFIG_CRYPTO_HMAC=y +CONFIG_CRYPTO_MICHAEL_MIC=m +CONFIG_CRYPTO_TGR192=m +CONFIG_CRYPTO_WP512=m +CONFIG_CRYPTO_ANUBIS=m +CONFIG_CRYPTO_BLOWFISH=m +CONFIG_CRYPTO_CAST6=m +CONFIG_CRYPTO_KHAZAD=m +CONFIG_CRYPTO_SALSA20=m +CONFIG_CRYPTO_SERPENT=m +CONFIG_CRYPTO_TEA=m +CONFIG_CRYPTO_TWOFISH=m +CONFIG_CRYPTO_LZO=m +# CONFIG_CRYPTO_ANSI_CPRNG is not set +CONFIG_CRYPTO_DEV_NX=y +CONFIG_CRYPTO_DEV_NX_ENCRYPT=m diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h index cc0655a..935b5e7 100644 --- a/arch/powerpc/include/asm/elf.h +++ b/arch/powerpc/include/asm/elf.h @@ -31,6 +31,8 @@ extern unsigned long randomize_et_dyn(unsigned long base); #define ELF_ET_DYN_BASE (randomize_et_dyn(0x20000000)) +#define ELF_CORE_EFLAGS (is_elf2_task() ? 2 : 0) + /* * Our registers are always unsigned longs, whether we're a 32 bit * process or 64 bit, on either a 64 bit or 32 bit kernel. @@ -86,6 +88,8 @@ typedef elf_vrregset_t elf_fpxregset_t; #ifdef __powerpc64__ # define SET_PERSONALITY(ex) \ do { \ + if (((ex).e_flags & 0x3) == 2) \ + set_thread_flag(TIF_ELF2ABI); \ if ((ex).e_ident[EI_CLASS] == ELFCLASS32) \ set_thread_flag(TIF_32BIT); \ else \ diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 0c7f2bf..d8b600b 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -403,6 +403,8 @@ static inline unsigned long cmo_get_page_size(void) extern long pSeries_enable_reloc_on_exc(void); extern long pSeries_disable_reloc_on_exc(void); +extern long pseries_big_endian_exceptions(void); + #else #define pSeries_enable_reloc_on_exc() do {} while (0) diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h index a63b045..12c32c5 100644 --- a/arch/powerpc/include/asm/plpar_wrappers.h +++ b/arch/powerpc/include/asm/plpar_wrappers.h @@ -287,6 +287,32 @@ static inline long disable_reloc_on_exceptions(void) { return plpar_set_mode(0, 3, 0, 0); } +/* + * Take exceptions in big endian mode on this partition + * + * Note: this call has a partition wide scope and can take a while to complete. + * If it returns H_LONG_BUSY_* it should be retried periodically until it + * returns H_SUCCESS. + */ +static inline long enable_big_endian_exceptions(void) +{ + /* mflags = 0: big endian exceptions */ + return plpar_set_mode(0, 4, 0, 0); +} + +/* + * Take exceptions in little endian mode on this partition + * + * Note: this call has a partition wide scope and can take a while to complete. + * If it returns H_LONG_BUSY_* it should be retried periodically until it + * returns H_SUCCESS. + */ +static inline long enable_little_endian_exceptions(void) +{ + /* mflags = 1: little endian exceptions */ + return plpar_set_mode(1, 4, 0, 0); +} + static inline long plapr_set_ciabr(unsigned long ciabr) { return plpar_set_mode(0, 1, ciabr, 0); diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h index 98da78e..084e080 100644 --- a/arch/powerpc/include/asm/smp.h +++ b/arch/powerpc/include/asm/smp.h @@ -33,6 +33,7 @@ extern int boot_cpuid; extern int spinning_secondaries; extern void cpu_die(void); +extern int cpu_to_chip_id(int cpu); #ifdef CONFIG_SMP @@ -112,7 +113,6 @@ static inline struct cpumask *cpu_core_mask(int cpu) } extern int cpu_to_core_id(int cpu); -extern int cpu_to_chip_id(int cpu); /* Since OpenPIC has only 4 IPIs, we use slightly different message numbers. * diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index 8fd6cf6..9854c56 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -105,6 +105,9 @@ static inline struct thread_info *current_thread_info(void) #define TIF_EMULATE_STACK_STORE 16 /* Is an instruction emulation for stack store? */ #define TIF_MEMDIE 17 /* is terminating due to OOM killer */ +#if defined(CONFIG_PPC64) +#define TIF_ELF2ABI 18 /* function descriptors must die! */ +#endif /* as above, but as bit values */ #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE) @@ -183,6 +186,12 @@ static inline bool test_thread_local_flags(unsigned int flags) #define is_32bit_task() (1) #endif +#if defined(CONFIG_PPC64) +#define is_elf2_task() (test_thread_flag(TIF_ELF2ABI)) +#else +#define is_elf2_task() (0) +#endif + #endif /* !__ASSEMBLY__ */ #endif /* __KERNEL__ */ diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 6713020..4bd687d 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -686,6 +686,15 @@ void eeh_save_bars(struct eeh_dev *edev) for (i = 0; i < 16; i++) eeh_ops->read_config(dn, i * 4, 4, &edev->config_space[i]); + + /* + * For PCI bridges including root port, we need enable bus + * master explicitly. Otherwise, it can't fetch IODA table + * entries correctly. So we cache the bit in advance so that + * we can restore it after reset, either PHB range or PE range. + */ + if (edev->mode & EEH_DEV_BRIDGE) + edev->config_space[1] |= PCI_COMMAND_MASTER; } /** diff --git a/arch/powerpc/kernel/eeh_event.c b/arch/powerpc/kernel/eeh_event.c index d27c5af..72d748b 100644 --- a/arch/powerpc/kernel/eeh_event.c +++ b/arch/powerpc/kernel/eeh_event.c @@ -74,8 +74,13 @@ static int eeh_event_handler(void * dummy) pe = event->pe; if (pe) { eeh_pe_state_mark(pe, EEH_PE_RECOVERING); - pr_info("EEH: Detected PCI bus error on PHB#%d-PE#%x\n", - pe->phb->global_number, pe->addr); + if (pe->type & EEH_PE_PHB) + pr_info("EEH: Detected error on PHB#%d\n", + pe->phb->global_number); + else + pr_info("EEH: Detected PCI bus error on " + "PHB#%d-PE#%x\n", + pe->phb->global_number, pe->addr); eeh_handle_event(pe); eeh_pe_state_clear(pe, EEH_PE_RECOVERING); } else { diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 75c2d10..3386d8a 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -858,17 +858,21 @@ void show_regs(struct pt_regs * regs) printk("MSR: "REG" ", regs->msr); printbits(regs->msr, msr_bits); printk(" CR: %08lx XER: %08lx\n", regs->ccr, regs->xer); -#ifdef CONFIG_PPC64 - printk("SOFTE: %ld\n", regs->softe); -#endif trap = TRAP(regs); if ((regs->trap != 0xc00) && cpu_has_feature(CPU_FTR_CFAR)) - printk("CFAR: "REG"\n", regs->orig_gpr3); - if (trap == 0x300 || trap == 0x600) + printk("CFAR: "REG" ", regs->orig_gpr3); + if (trap == 0x200 || trap == 0x300 || trap == 0x600) #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) - printk("DEAR: "REG", ESR: "REG"\n", regs->dar, regs->dsisr); + printk("DEAR: "REG" ESR: "REG" ", regs->dar, regs->dsisr); #else - printk("DAR: "REG", DSISR: %08lx\n", regs->dar, regs->dsisr); + printk("DAR: "REG" DSISR: %08lx ", regs->dar, regs->dsisr); +#endif +#ifdef CONFIG_PPC64 + printk("SOFTE: %ld ", regs->softe); +#endif +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + if (MSR_TM_ACTIVE(regs->msr)) + printk("\nPACATMSCRATCH: %016llx ", get_paca()->tm_scratch); #endif for (i = 0; i < 32; i++) { @@ -887,9 +891,6 @@ void show_regs(struct pt_regs * regs) printk("NIP ["REG"] %pS\n", regs->nip, (void *)regs->nip); printk("LR ["REG"] %pS\n", regs->link, (void *)regs->link); #endif -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - printk("PACATMSCRATCH [%llx]\n", get_paca()->tm_scratch); -#endif show_stack(current, (unsigned long *) regs->gpr[1]); if (!user_mode(regs)) show_instructions(regs); @@ -1086,25 +1087,45 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) regs->msr = MSR_USER; #else if (!is_32bit_task()) { - unsigned long entry, toc; + unsigned long entry; - /* start is a relocated pointer to the function descriptor for - * the elf _start routine. The first entry in the function - * descriptor is the entry address of _start and the second - * entry is the TOC value we need to use. - */ - __get_user(entry, (unsigned long __user *)start); - __get_user(toc, (unsigned long __user *)start+1); + if (is_elf2_task()) { + /* Look ma, no function descriptors! */ + entry = start; - /* Check whether the e_entry function descriptor entries - * need to be relocated before we can use them. - */ - if (load_addr != 0) { - entry += load_addr; - toc += load_addr; + /* + * Ulrich says: + * The latest iteration of the ABI requires that when + * calling a function (at its global entry point), + * the caller must ensure r12 holds the entry point + * address (so that the function can quickly + * establish addressability). + */ + regs->gpr[12] = start; + /* Make sure that's restored on entry to userspace. */ + set_thread_flag(TIF_RESTOREALL); + } else { + unsigned long toc; + + /* start is a relocated pointer to the function + * descriptor for the elf _start routine. The first + * entry in the function descriptor is the entry + * address of _start and the second entry is the TOC + * value we need to use. + */ + __get_user(entry, (unsigned long __user *)start); + __get_user(toc, (unsigned long __user *)start+1); + + /* Check whether the e_entry function descriptor entries + * need to be relocated before we can use them. + */ + if (load_addr != 0) { + entry += load_addr; + toc += load_addr; + } + regs->gpr[2] = toc; } regs->nip = entry; - regs->gpr[2] = toc; regs->msr = MSR_USER64; } else { regs->nip = start; diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index f3a4709..fa0ad8a 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -777,6 +777,26 @@ int of_get_ibm_chip_id(struct device_node *np) return -1; } +/** + * cpu_to_chip_id - Return the cpus chip-id + * @cpu: The logical cpu number. + * + * Return the value of the ibm,chip-id property corresponding to the given + * logical cpu number. If the chip-id can not be found, returns -1. + */ +int cpu_to_chip_id(int cpu) +{ + struct device_node *np; + + np = of_get_cpu_node(cpu, NULL); + if (!np) + return -1; + + of_node_put(np); + return of_get_ibm_chip_id(np); +} +EXPORT_SYMBOL(cpu_to_chip_id); + #ifdef CONFIG_PPC_PSERIES /* * Fix up the uninitialized fields in a new device node: diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 749778e..1844298 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -457,7 +457,15 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, if (copy_vsx_to_user(&frame->mc_vsregs, current)) return 1; msr |= MSR_VSX; - } + } else if (!ctx_has_vsx_region) + /* + * With a small context structure we can't hold the VSX + * registers, hence clear the MSR value to indicate the state + * was not saved. + */ + msr &= ~MSR_VSX; + + #endif /* CONFIG_VSX */ #ifdef CONFIG_SPE /* save spe registers */ diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index b3c6157..e66f67b 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -701,12 +701,6 @@ badframe: int handle_rt_signal64(int signr, struct k_sigaction *ka, siginfo_t *info, sigset_t *set, struct pt_regs *regs) { - /* Handler is *really* a pointer to the function descriptor for - * the signal routine. The first entry in the function - * descriptor is the entry address of signal and the second - * entry is the TOC value we need to use. - */ - func_descr_t __user *funct_desc_ptr; struct rt_sigframe __user *frame; unsigned long newsp = 0; long err = 0; @@ -766,19 +760,32 @@ int handle_rt_signal64(int signr, struct k_sigaction *ka, siginfo_t *info, goto badframe; regs->link = (unsigned long) &frame->tramp[0]; } - funct_desc_ptr = (func_descr_t __user *) ka->sa.sa_handler; /* Allocate a dummy caller frame for the signal handler. */ newsp = ((unsigned long)frame) - __SIGNAL_FRAMESIZE; err |= put_user(regs->gpr[1], (unsigned long __user *)newsp); /* Set up "regs" so we "return" to the signal handler. */ - err |= get_user(regs->nip, &funct_desc_ptr->entry); + if (is_elf2_task()) { + regs->nip = (unsigned long) ka->sa.sa_handler; + regs->gpr[12] = regs->nip; + } else { + /* Handler is *really* a pointer to the function descriptor for + * the signal routine. The first entry in the function + * descriptor is the entry address of signal and the second + * entry is the TOC value we need to use. + */ + func_descr_t __user *funct_desc_ptr = + (func_descr_t __user *) ka->sa.sa_handler; + + err |= get_user(regs->nip, &funct_desc_ptr->entry); + err |= get_user(regs->gpr[2], &funct_desc_ptr->toc); + } + /* enter the signal handler in native-endian mode */ regs->msr &= ~MSR_LE; regs->msr |= (MSR_KERNEL & MSR_LE); regs->gpr[1] = newsp; - err |= get_user(regs->gpr[2], &funct_desc_ptr->toc); regs->gpr[3] = signr; regs->result = 0; if (ka->sa.sa_flags & SA_SIGINFO) { diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 930cd8a..a3b64f3 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -597,22 +597,6 @@ out: return id; } -/* Return the value of the chip-id property corresponding - * to the given logical cpu. - */ -int cpu_to_chip_id(int cpu) -{ - struct device_node *np; - - np = of_get_cpu_node(cpu, NULL); - if (!np) - return -1; - - of_node_put(np); - return of_get_ibm_chip_id(np); -} -EXPORT_SYMBOL(cpu_to_chip_id); - /* Helper routines for cpu to core mapping */ int cpu_core_index_of_thread(int cpu) { diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 192b051..b3b1441 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -213,8 +213,6 @@ static u64 scan_dispatch_log(u64 stop_tb) if (i == be64_to_cpu(vpa->dtl_idx)) return 0; while (i < be64_to_cpu(vpa->dtl_idx)) { - if (dtl_consumer) - dtl_consumer(dtl, i); dtb = be64_to_cpu(dtl->timebase); tb_delta = be32_to_cpu(dtl->enqueue_to_dispatch_time) + be32_to_cpu(dtl->ready_to_enqueue_time); @@ -227,6 +225,8 @@ static u64 scan_dispatch_log(u64 stop_tb) } if (dtb > stop_tb) break; + if (dtl_consumer) + dtl_consumer(dtl, i); stolen += tb_delta; ++i; ++dtl; diff --git a/arch/powerpc/kernel/vdso64/sigtramp.S b/arch/powerpc/kernel/vdso64/sigtramp.S index 45ea281..542c6f42 100644 --- a/arch/powerpc/kernel/vdso64/sigtramp.S +++ b/arch/powerpc/kernel/vdso64/sigtramp.S @@ -142,6 +142,13 @@ V_FUNCTION_END(__kernel_sigtramp_rt64) /* Size of CR reg in DWARF unwind info. */ #define CRSIZE 4 +/* Offset of CR reg within a full word. */ +#ifdef __LITTLE_ENDIAN__ +#define CROFF 0 +#else +#define CROFF (RSIZE - CRSIZE) +#endif + /* This is the offset of the VMX reg pointer. */ #define VREGS 48*RSIZE+33*8 @@ -181,7 +188,14 @@ V_FUNCTION_END(__kernel_sigtramp_rt64) rsave (31, 31*RSIZE); \ rsave (67, 32*RSIZE); /* ap, used as temp for nip */ \ rsave (65, 36*RSIZE); /* lr */ \ - rsave (70, 38*RSIZE + (RSIZE - CRSIZE)) /* cr */ + rsave (68, 38*RSIZE + CROFF); /* cr fields */ \ + rsave (69, 38*RSIZE + CROFF); \ + rsave (70, 38*RSIZE + CROFF); \ + rsave (71, 38*RSIZE + CROFF); \ + rsave (72, 38*RSIZE + CROFF); \ + rsave (73, 38*RSIZE + CROFF); \ + rsave (74, 38*RSIZE + CROFF); \ + rsave (75, 38*RSIZE + CROFF) /* Describe where the FP regs are saved. */ #define EH_FRAME_FP \ diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c index e7d0c88f..76a6482 100644 --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/kernel/vio.c @@ -1419,7 +1419,7 @@ struct vio_dev *vio_register_device_node(struct device_node *of_node) /* needed to ensure proper operation of coherent allocations * later, in case driver doesn't set it explicitly */ - dma_set_mask_and_coherent(&viodev->dev, DMA_BIT_MASK(64)); + dma_coerce_mask_and_coherent(&viodev->dev, DMA_BIT_MASK(64)); } /* register with generic device framework */ diff --git a/arch/powerpc/mm/gup.c b/arch/powerpc/mm/gup.c index 6936547..c5f734e 100644 --- a/arch/powerpc/mm/gup.c +++ b/arch/powerpc/mm/gup.c @@ -123,6 +123,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write, struct mm_struct *mm = current->mm; unsigned long addr, len, end; unsigned long next; + unsigned long flags; pgd_t *pgdp; int nr = 0; @@ -156,7 +157,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write, * So long as we atomically load page table pointers versus teardown, * we can follow the address down to the the page and take a ref on it. */ - local_irq_disable(); + local_irq_save(flags); pgdp = pgd_offset(mm, addr); do { @@ -179,7 +180,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write, break; } while (pgdp++, addr = next, addr != end); - local_irq_enable(); + local_irq_restore(flags); return nr; } diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c index 3e99c14..7ce9cf3 100644 --- a/arch/powerpc/mm/slice.c +++ b/arch/powerpc/mm/slice.c @@ -258,7 +258,7 @@ static bool slice_scan_available(unsigned long addr, slice = GET_HIGH_SLICE_INDEX(addr); *boundary_addr = (slice + end) ? ((slice + end) << SLICE_HIGH_SHIFT) : SLICE_LOW_TOP; - return !!(available.high_slices & (1u << slice)); + return !!(available.high_slices & (1ul << slice)); } } diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index c2a566f..132f872 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -403,3 +403,14 @@ config PPC_DOORBELL default n endmenu + +config CPU_LITTLE_ENDIAN + bool "Build little endian kernel" + default n + help + This option selects whether a big endian or little endian kernel will + be built. + + Note that if cross compiling a little endian kernel, + CROSS_COMPILE must point to a toolchain capable of targeting + little endian powerpc. diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c index 8844628..1cb160d 100644 --- a/arch/powerpc/platforms/powernv/rng.c +++ b/arch/powerpc/platforms/powernv/rng.c @@ -19,6 +19,7 @@ #include <asm/io.h> #include <asm/prom.h> #include <asm/machdep.h> +#include <asm/smp.h> struct powernv_rng { diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index 7fbc25b..ccb633e 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -189,8 +189,9 @@ static void *pseries_eeh_of_probe(struct device_node *dn, void *flag) struct eeh_dev *edev; struct eeh_pe pe; struct pci_dn *pdn = PCI_DN(dn); - const u32 *class_code, *vendor_id, *device_id; - const u32 *regs; + const __be32 *classp, *vendorp, *devicep; + u32 class_code; + const __be32 *regs; u32 pcie_flags; int enable = 0; int ret; @@ -201,22 +202,24 @@ static void *pseries_eeh_of_probe(struct device_node *dn, void *flag) return NULL; /* Retrieve class/vendor/device IDs */ - class_code = of_get_property(dn, "class-code", NULL); - vendor_id = of_get_property(dn, "vendor-id", NULL); - device_id = of_get_property(dn, "device-id", NULL); + classp = of_get_property(dn, "class-code", NULL); + vendorp = of_get_property(dn, "vendor-id", NULL); + devicep = of_get_property(dn, "device-id", NULL); /* Skip for bad OF node or PCI-ISA bridge */ - if (!class_code || !vendor_id || !device_id) + if (!classp || !vendorp || !devicep) return NULL; if (dn->type && !strcmp(dn->type, "isa")) return NULL; + class_code = of_read_number(classp, 1); + /* * Update class code and mode of eeh device. We need * correctly reflects that current device is root port * or PCIe switch downstream port. */ - edev->class_code = *class_code; + edev->class_code = class_code; edev->pcie_cap = pseries_eeh_find_cap(dn, PCI_CAP_ID_EXP); edev->mode &= 0xFFFFFF00; if ((edev->class_code >> 8) == PCI_CLASS_BRIDGE_PCI) { @@ -243,12 +246,12 @@ static void *pseries_eeh_of_probe(struct device_node *dn, void *flag) /* Initialize the fake PE */ memset(&pe, 0, sizeof(struct eeh_pe)); pe.phb = edev->phb; - pe.config_addr = regs[0]; + pe.config_addr = of_read_number(regs, 1); /* Enable EEH on the device */ ret = eeh_ops->set_option(&pe, EEH_OPT_ENABLE); if (!ret) { - edev->config_addr = regs[0]; + edev->config_addr = of_read_number(regs, 1); /* Retrieve PE address */ edev->pe_config_addr = eeh_ops->get_pe_addr(&pe); pe.addr = edev->pe_config_addr; diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 356bc75..4fca3de 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -245,6 +245,23 @@ static void pSeries_lpar_hptab_clear(void) &(ptes[j].pteh), &(ptes[j].ptel)); } } + +#ifdef __LITTLE_ENDIAN__ + /* Reset exceptions to big endian */ + if (firmware_has_feature(FW_FEATURE_SET_MODE)) { + long rc; + + rc = pseries_big_endian_exceptions(); + /* + * At this point it is unlikely panic() will get anything + * out to the user, but at least this will stop us from + * continuing on further and creating an even more + * difficult to debug situation. + */ + if (rc) + panic("Could not enable big endian exceptions"); + } +#endif } /* diff --git a/arch/powerpc/platforms/pseries/rng.c b/arch/powerpc/platforms/pseries/rng.c index a702f1c..72a1027 100644 --- a/arch/powerpc/platforms/pseries/rng.c +++ b/arch/powerpc/platforms/pseries/rng.c @@ -13,6 +13,7 @@ #include <linux/of.h> #include <asm/archrandom.h> #include <asm/machdep.h> +#include <asm/plpar_wrappers.h> static int pseries_get_random_long(unsigned long *v) diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 1f97e2b..c1f1908 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -442,6 +442,32 @@ static void pSeries_machine_kexec(struct kimage *image) } #endif +#ifdef __LITTLE_ENDIAN__ +long pseries_big_endian_exceptions(void) +{ + long rc; + + while (1) { + rc = enable_big_endian_exceptions(); + if (!H_IS_LONG_BUSY(rc)) + return rc; + mdelay(get_longbusy_msecs(rc)); + } +} + +static long pseries_little_endian_exceptions(void) +{ + long rc; + + while (1) { + rc = enable_little_endian_exceptions(); + if (!H_IS_LONG_BUSY(rc)) + return rc; + mdelay(get_longbusy_msecs(rc)); + } +} +#endif + static void __init pSeries_setup_arch(void) { panic_timeout = 10; @@ -698,6 +724,22 @@ static int __init pSeries_probe(void) /* Now try to figure out if we are running on LPAR */ of_scan_flat_dt(pseries_probe_fw_features, NULL); +#ifdef __LITTLE_ENDIAN__ + if (firmware_has_feature(FW_FEATURE_SET_MODE)) { + long rc; + /* + * Tell the hypervisor that we want our exceptions to + * be taken in little endian mode. If this fails we don't + * want to use BUG() because it will trigger an exception. + */ + rc = pseries_little_endian_exceptions(); + if (rc) { + ppc_md.progress("H_SET_MODE LE exception fail", 0); + panic("Could not enable little endian exceptions"); + } + } +#endif + if (firmware_has_feature(FW_FEATURE_LPAR)) hpte_init_lpar(); else diff --git a/arch/powerpc/platforms/wsp/chroma.c b/arch/powerpc/platforms/wsp/chroma.c index 8ef53bc..aaa46b3 100644 --- a/arch/powerpc/platforms/wsp/chroma.c +++ b/arch/powerpc/platforms/wsp/chroma.c @@ -15,6 +15,7 @@ #include <linux/of.h> #include <linux/smp.h> #include <linux/time.h> +#include <linux/of_fdt.h> #include <asm/machdep.h> #include <asm/udbg.h> diff --git a/arch/powerpc/platforms/wsp/h8.c b/arch/powerpc/platforms/wsp/h8.c index d18e6cc..a3c87f3 100644 --- a/arch/powerpc/platforms/wsp/h8.c +++ b/arch/powerpc/platforms/wsp/h8.c @@ -10,6 +10,7 @@ #include <linux/kernel.h> #include <linux/of.h> #include <linux/io.h> +#include <linux/of_address.h> #include "wsp.h" diff --git a/arch/powerpc/platforms/wsp/ics.c b/arch/powerpc/platforms/wsp/ics.c index 2d3b1dd..9cd92e6 100644 --- a/arch/powerpc/platforms/wsp/ics.c +++ b/arch/powerpc/platforms/wsp/ics.c @@ -18,6 +18,8 @@ #include <linux/smp.h> #include <linux/spinlock.h> #include <linux/types.h> +#include <linux/of_address.h> +#include <linux/of_irq.h> #include <asm/io.h> #include <asm/irq.h> diff --git a/arch/powerpc/platforms/wsp/opb_pic.c b/arch/powerpc/platforms/wsp/opb_pic.c index cb565bf..3f67298 100644 --- a/arch/powerpc/platforms/wsp/opb_pic.c +++ b/arch/powerpc/platforms/wsp/opb_pic.c @@ -15,6 +15,8 @@ #include <linux/of.h> #include <linux/slab.h> #include <linux/time.h> +#include <linux/of_address.h> +#include <linux/of_irq.h> #include <asm/reg_a2.h> #include <asm/irq.h> diff --git a/arch/powerpc/platforms/wsp/psr2.c b/arch/powerpc/platforms/wsp/psr2.c index 508ec82..a87b414 100644 --- a/arch/powerpc/platforms/wsp/psr2.c +++ b/arch/powerpc/platforms/wsp/psr2.c @@ -15,6 +15,7 @@ #include <linux/of.h> #include <linux/smp.h> #include <linux/time.h> +#include <linux/of_fdt.h> #include <asm/machdep.h> #include <asm/udbg.h> diff --git a/arch/powerpc/platforms/wsp/scom_wsp.c b/arch/powerpc/platforms/wsp/scom_wsp.c index 8928507..6538b4d 100644 --- a/arch/powerpc/platforms/wsp/scom_wsp.c +++ b/arch/powerpc/platforms/wsp/scom_wsp.c @@ -14,6 +14,7 @@ #include <linux/of.h> #include <linux/spinlock.h> #include <linux/types.h> +#include <linux/of_address.h> #include <asm/cputhreads.h> #include <asm/reg_a2.h> diff --git a/arch/powerpc/platforms/wsp/wsp.c b/arch/powerpc/platforms/wsp/wsp.c index ddb6efe..58cd1f0 100644 --- a/arch/powerpc/platforms/wsp/wsp.c +++ b/arch/powerpc/platforms/wsp/wsp.c @@ -13,6 +13,7 @@ #include <linux/smp.h> #include <linux/delay.h> #include <linux/time.h> +#include <linux/of_address.h> #include <asm/scom.h> diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index 96f958d..bc4a088 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -330,8 +330,8 @@ static struct pci_device_id intel_stolen_ids[] __initdata = { INTEL_I915GM_IDS(gen3_stolen_size), INTEL_I945G_IDS(gen3_stolen_size), INTEL_I945GM_IDS(gen3_stolen_size), - INTEL_VLV_M_IDS(gen3_stolen_size), - INTEL_VLV_D_IDS(gen3_stolen_size), + INTEL_VLV_M_IDS(gen6_stolen_size), + INTEL_VLV_D_IDS(gen6_stolen_size), INTEL_PINEVIEW_IDS(gen3_stolen_size), INTEL_I965G_IDS(gen3_stolen_size), INTEL_G33_IDS(gen3_stolen_size), diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c index daff69e..1185fe7 100644 --- a/arch/x86/kvm/mmu_audit.c +++ b/arch/x86/kvm/mmu_audit.c @@ -296,4 +296,4 @@ static struct kernel_param_ops audit_param_ops = { .get = param_get_bool, }; -module_param_cb(mmu_audit, &audit_param_ops, &mmu_audit, 0644); +arch_param_cb(mmu_audit, &audit_param_ops, &mmu_audit, 0644); diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index a7cccb6..c96314a 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -61,6 +61,7 @@ void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte) #if PAGETABLE_LEVELS > 2 void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) { + struct page *page = virt_to_page(pmd); paravirt_release_pmd(__pa(pmd) >> PAGE_SHIFT); /* * NOTE! For PAE, any changes to the top page-directory-pointer-table @@ -69,7 +70,8 @@ void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) #ifdef CONFIG_X86_PAE tlb->need_flush_all = 1; #endif - tlb_remove_page(tlb, virt_to_page(pmd)); + pgtable_pmd_page_dtor(page); + tlb_remove_page(tlb, page); } #if PAGETABLE_LEVELS > 3 @@ -209,7 +211,7 @@ static int preallocate_pmds(pmd_t *pmds[]) if (!pmd) failed = true; if (pmd && !pgtable_pmd_page_ctor(virt_to_page(pmd))) { - free_page((unsigned long)pmds[i]); + free_page((unsigned long)pmd); pmd = NULL; failed = true; } diff --git a/block/partitions/efi.c b/block/partitions/efi.c index a8287b4..dc51f46 100644 --- a/block/partitions/efi.c +++ b/block/partitions/efi.c @@ -96,6 +96,7 @@ * - Code works, detects all the partitions. * ************************************************************/ +#include <linux/kernel.h> #include <linux/crc32.h> #include <linux/ctype.h> #include <linux/math64.h> @@ -715,8 +716,8 @@ int efi_partition(struct parsed_partitions *state) efi_guid_unparse(&ptes[i].unique_partition_guid, info->uuid); /* Naively convert UTF16-LE to 7 bits. */ - label_max = min(sizeof(info->volname) - 1, - sizeof(ptes[i].partition_name)); + label_max = min(ARRAY_SIZE(info->volname) - 1, + ARRAY_SIZE(ptes[i].partition_name)); info->volname[label_max] = 0; while (label_count < label_max) { u8 c = ptes[i].partition_name[label_count] & 0xff; diff --git a/crypto/Kconfig b/crypto/Kconfig index 71f337a..4ae5734 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1402,6 +1402,9 @@ config CRYPTO_USER_API_SKCIPHER This option enables the user-spaces interface for symmetric key cipher algorithms. +config CRYPTO_HASH_INFO + bool + source "drivers/crypto/Kconfig" source crypto/asymmetric_keys/Kconfig diff --git a/crypto/Makefile b/crypto/Makefile index 80019ba..b3a7e80 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -104,3 +104,4 @@ obj-$(CONFIG_CRYPTO_USER_API_SKCIPHER) += algif_skcipher.o obj-$(CONFIG_XOR_BLOCKS) += xor.o obj-$(CONFIG_ASYNC_CORE) += async_tx/ obj-$(CONFIG_ASYMMETRIC_KEY_TYPE) += asymmetric_keys/ +obj-$(CONFIG_CRYPTO_HASH_INFO) += hash_info.o diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c index 0262210..ef5356c 100644 --- a/crypto/algif_hash.c +++ b/crypto/algif_hash.c @@ -161,8 +161,6 @@ static int hash_recvmsg(struct kiocb *unused, struct socket *sock, else if (len < ds) msg->msg_flags |= MSG_TRUNC; - msg->msg_namelen = 0; - lock_sock(sk); if (ctx->more) { ctx->more = 0; diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c index a1c4f0a..6a6dfc0 100644 --- a/crypto/algif_skcipher.c +++ b/crypto/algif_skcipher.c @@ -432,7 +432,6 @@ static int skcipher_recvmsg(struct kiocb *unused, struct socket *sock, long copied = 0; lock_sock(sk); - msg->msg_namelen = 0; for (iov = msg->msg_iov, iovlen = msg->msg_iovlen; iovlen > 0; iovlen--, iov++) { unsigned long seglen = iov->iov_len; diff --git a/crypto/asymmetric_keys/Kconfig b/crypto/asymmetric_keys/Kconfig index 6d2c2ea..03a6eb9 100644 --- a/crypto/asymmetric_keys/Kconfig +++ b/crypto/asymmetric_keys/Kconfig @@ -12,6 +12,8 @@ if ASYMMETRIC_KEY_TYPE config ASYMMETRIC_PUBLIC_KEY_SUBTYPE tristate "Asymmetric public-key crypto algorithm subtype" select MPILIB + select PUBLIC_KEY_ALGO_RSA + select CRYPTO_HASH_INFO help This option provides support for asymmetric public key type handling. If signature generation and/or verification are to be used, @@ -20,8 +22,8 @@ config ASYMMETRIC_PUBLIC_KEY_SUBTYPE config PUBLIC_KEY_ALGO_RSA tristate "RSA public-key algorithm" - depends on ASYMMETRIC_PUBLIC_KEY_SUBTYPE select MPILIB_EXTRA + select MPILIB help This option enables support for the RSA algorithm (PKCS#1, RFC3447). diff --git a/crypto/asymmetric_keys/asymmetric_type.c b/crypto/asymmetric_keys/asymmetric_type.c index cf80765..b77eb53 100644 --- a/crypto/asymmetric_keys/asymmetric_type.c +++ b/crypto/asymmetric_keys/asymmetric_type.c @@ -209,6 +209,7 @@ struct key_type key_type_asymmetric = { .match = asymmetric_key_match, .destroy = asymmetric_key_destroy, .describe = asymmetric_key_describe, + .def_lookup_type = KEYRING_SEARCH_LOOKUP_ITERATE, }; EXPORT_SYMBOL_GPL(key_type_asymmetric); diff --git a/crypto/asymmetric_keys/public_key.c b/crypto/asymmetric_keys/public_key.c index cb2e291..97eb0019 100644 --- a/crypto/asymmetric_keys/public_key.c +++ b/crypto/asymmetric_keys/public_key.c @@ -22,29 +22,25 @@ MODULE_LICENSE("GPL"); -const char *const pkey_algo[PKEY_ALGO__LAST] = { +const char *const pkey_algo_name[PKEY_ALGO__LAST] = { [PKEY_ALGO_DSA] = "DSA", [PKEY_ALGO_RSA] = "RSA", }; -EXPORT_SYMBOL_GPL(pkey_algo); +EXPORT_SYMBOL_GPL(pkey_algo_name); -const char *const pkey_hash_algo[PKEY_HASH__LAST] = { - [PKEY_HASH_MD4] = "md4", - [PKEY_HASH_MD5] = "md5", - [PKEY_HASH_SHA1] = "sha1", - [PKEY_HASH_RIPE_MD_160] = "rmd160", - [PKEY_HASH_SHA256] = "sha256", - [PKEY_HASH_SHA384] = "sha384", - [PKEY_HASH_SHA512] = "sha512", - [PKEY_HASH_SHA224] = "sha224", +const struct public_key_algorithm *pkey_algo[PKEY_ALGO__LAST] = { +#if defined(CONFIG_PUBLIC_KEY_ALGO_RSA) || \ + defined(CONFIG_PUBLIC_KEY_ALGO_RSA_MODULE) + [PKEY_ALGO_RSA] = &RSA_public_key_algorithm, +#endif }; -EXPORT_SYMBOL_GPL(pkey_hash_algo); +EXPORT_SYMBOL_GPL(pkey_algo); -const char *const pkey_id_type[PKEY_ID_TYPE__LAST] = { +const char *const pkey_id_type_name[PKEY_ID_TYPE__LAST] = { [PKEY_ID_PGP] = "PGP", [PKEY_ID_X509] = "X509", }; -EXPORT_SYMBOL_GPL(pkey_id_type); +EXPORT_SYMBOL_GPL(pkey_id_type_name); /* * Provide a part of a description of the key for /proc/keys. @@ -56,7 +52,7 @@ static void public_key_describe(const struct key *asymmetric_key, if (key) seq_printf(m, "%s.%s", - pkey_id_type[key->id_type], key->algo->name); + pkey_id_type_name[key->id_type], key->algo->name); } /* @@ -78,21 +74,45 @@ EXPORT_SYMBOL_GPL(public_key_destroy); /* * Verify a signature using a public key. */ -static int public_key_verify_signature(const struct key *key, - const struct public_key_signature *sig) +int public_key_verify_signature(const struct public_key *pk, + const struct public_key_signature *sig) { - const struct public_key *pk = key->payload.data; + const struct public_key_algorithm *algo; + + BUG_ON(!pk); + BUG_ON(!pk->mpi[0]); + BUG_ON(!pk->mpi[1]); + BUG_ON(!sig); + BUG_ON(!sig->digest); + BUG_ON(!sig->mpi[0]); + + algo = pk->algo; + if (!algo) { + if (pk->pkey_algo >= PKEY_ALGO__LAST) + return -ENOPKG; + algo = pkey_algo[pk->pkey_algo]; + if (!algo) + return -ENOPKG; + } - if (!pk->algo->verify_signature) + if (!algo->verify_signature) return -ENOTSUPP; - if (sig->nr_mpi != pk->algo->n_sig_mpi) { + if (sig->nr_mpi != algo->n_sig_mpi) { pr_debug("Signature has %u MPI not %u\n", - sig->nr_mpi, pk->algo->n_sig_mpi); + sig->nr_mpi, algo->n_sig_mpi); return -EINVAL; } - return pk->algo->verify_signature(pk, sig); + return algo->verify_signature(pk, sig); +} +EXPORT_SYMBOL_GPL(public_key_verify_signature); + +static int public_key_verify_signature_2(const struct key *key, + const struct public_key_signature *sig) +{ + const struct public_key *pk = key->payload.data; + return public_key_verify_signature(pk, sig); } /* @@ -103,6 +123,6 @@ struct asymmetric_key_subtype public_key_subtype = { .name = "public_key", .describe = public_key_describe, .destroy = public_key_destroy, - .verify_signature = public_key_verify_signature, + .verify_signature = public_key_verify_signature_2, }; EXPORT_SYMBOL_GPL(public_key_subtype); diff --git a/crypto/asymmetric_keys/public_key.h b/crypto/asymmetric_keys/public_key.h index 5e5e356..5c37a22 100644 --- a/crypto/asymmetric_keys/public_key.h +++ b/crypto/asymmetric_keys/public_key.h @@ -28,3 +28,9 @@ struct public_key_algorithm { }; extern const struct public_key_algorithm RSA_public_key_algorithm; + +/* + * public_key.c + */ +extern int public_key_verify_signature(const struct public_key *pk, + const struct public_key_signature *sig); diff --git a/crypto/asymmetric_keys/rsa.c b/crypto/asymmetric_keys/rsa.c index 4a6a069..90a17f5 100644 --- a/crypto/asymmetric_keys/rsa.c +++ b/crypto/asymmetric_keys/rsa.c @@ -73,13 +73,13 @@ static const struct { size_t size; } RSA_ASN1_templates[PKEY_HASH__LAST] = { #define _(X) { RSA_digest_info_##X, sizeof(RSA_digest_info_##X) } - [PKEY_HASH_MD5] = _(MD5), - [PKEY_HASH_SHA1] = _(SHA1), - [PKEY_HASH_RIPE_MD_160] = _(RIPE_MD_160), - [PKEY_HASH_SHA256] = _(SHA256), - [PKEY_HASH_SHA384] = _(SHA384), - [PKEY_HASH_SHA512] = _(SHA512), - [PKEY_HASH_SHA224] = _(SHA224), + [HASH_ALGO_MD5] = _(MD5), + [HASH_ALGO_SHA1] = _(SHA1), + [HASH_ALGO_RIPE_MD_160] = _(RIPE_MD_160), + [HASH_ALGO_SHA256] = _(SHA256), + [HASH_ALGO_SHA384] = _(SHA384), + [HASH_ALGO_SHA512] = _(SHA512), + [HASH_ALGO_SHA224] = _(SHA224), #undef _ }; diff --git a/crypto/asymmetric_keys/x509_cert_parser.c b/crypto/asymmetric_keys/x509_cert_parser.c index facbf26..2989316 100644 --- a/crypto/asymmetric_keys/x509_cert_parser.c +++ b/crypto/asymmetric_keys/x509_cert_parser.c @@ -47,6 +47,8 @@ void x509_free_certificate(struct x509_certificate *cert) kfree(cert->subject); kfree(cert->fingerprint); kfree(cert->authority); + kfree(cert->sig.digest); + mpi_free(cert->sig.rsa.s); kfree(cert); } } @@ -152,33 +154,33 @@ int x509_note_pkey_algo(void *context, size_t hdrlen, return -ENOPKG; /* Unsupported combination */ case OID_md4WithRSAEncryption: - ctx->cert->sig_hash_algo = PKEY_HASH_MD5; - ctx->cert->sig_pkey_algo = PKEY_ALGO_RSA; + ctx->cert->sig.pkey_hash_algo = HASH_ALGO_MD5; + ctx->cert->sig.pkey_algo = PKEY_ALGO_RSA; break; case OID_sha1WithRSAEncryption: - ctx->cert->sig_hash_algo = PKEY_HASH_SHA1; - ctx->cert->sig_pkey_algo = PKEY_ALGO_RSA; + ctx->cert->sig.pkey_hash_algo = HASH_ALGO_SHA1; + ctx->cert->sig.pkey_algo = PKEY_ALGO_RSA; break; case OID_sha256WithRSAEncryption: - ctx->cert->sig_hash_algo = PKEY_HASH_SHA256; - ctx->cert->sig_pkey_algo = PKEY_ALGO_RSA; + ctx->cert->sig.pkey_hash_algo = HASH_ALGO_SHA256; + ctx->cert->sig.pkey_algo = PKEY_ALGO_RSA; break; case OID_sha384WithRSAEncryption: - ctx->cert->sig_hash_algo = PKEY_HASH_SHA384; - ctx->cert->sig_pkey_algo = PKEY_ALGO_RSA; + ctx->cert->sig.pkey_hash_algo = HASH_ALGO_SHA384; + ctx->cert->sig.pkey_algo = PKEY_ALGO_RSA; break; case OID_sha512WithRSAEncryption: - ctx->cert->sig_hash_algo = PKEY_HASH_SHA512; - ctx->cert->sig_pkey_algo = PKEY_ALGO_RSA; + ctx->cert->sig.pkey_hash_algo = HASH_ALGO_SHA512; + ctx->cert->sig.pkey_algo = PKEY_ALGO_RSA; break; case OID_sha224WithRSAEncryption: - ctx->cert->sig_hash_algo = PKEY_HASH_SHA224; - ctx->cert->sig_pkey_algo = PKEY_ALGO_RSA; + ctx->cert->sig.pkey_hash_algo = HASH_ALGO_SHA224; + ctx->cert->sig.pkey_algo = PKEY_ALGO_RSA; break; } @@ -203,8 +205,8 @@ int x509_note_signature(void *context, size_t hdrlen, return -EINVAL; } - ctx->cert->sig = value; - ctx->cert->sig_size = vlen; + ctx->cert->raw_sig = value; + ctx->cert->raw_sig_size = vlen; return 0; } @@ -343,8 +345,9 @@ int x509_extract_key_data(void *context, size_t hdrlen, if (ctx->last_oid != OID_rsaEncryption) return -ENOPKG; - /* There seems to be an extraneous 0 byte on the front of the data */ - ctx->cert->pkey_algo = PKEY_ALGO_RSA; + ctx->cert->pub->pkey_algo = PKEY_ALGO_RSA; + + /* Discard the BIT STRING metadata */ ctx->key = value + 1; ctx->key_size = vlen - 1; return 0; diff --git a/crypto/asymmetric_keys/x509_parser.h b/crypto/asymmetric_keys/x509_parser.h index f86dc5f..87d9cc2 100644 --- a/crypto/asymmetric_keys/x509_parser.h +++ b/crypto/asymmetric_keys/x509_parser.h @@ -9,6 +9,7 @@ * 2 of the Licence, or (at your option) any later version. */ +#include <linux/time.h> #include <crypto/public_key.h> struct x509_certificate { @@ -20,13 +21,11 @@ struct x509_certificate { char *authority; /* Authority key fingerprint as hex */ struct tm valid_from; struct tm valid_to; - enum pkey_algo pkey_algo : 8; /* Public key algorithm */ - enum pkey_algo sig_pkey_algo : 8; /* Signature public key algorithm */ - enum pkey_hash_algo sig_hash_algo : 8; /* Signature hash algorithm */ const void *tbs; /* Signed data */ - size_t tbs_size; /* Size of signed data */ - const void *sig; /* Signature data */ - size_t sig_size; /* Size of sigature */ + unsigned tbs_size; /* Size of signed data */ + unsigned raw_sig_size; /* Size of sigature */ + const void *raw_sig; /* Signature data */ + struct public_key_signature sig; /* Signature parameters */ }; /* @@ -34,3 +33,10 @@ struct x509_certificate { */ extern void x509_free_certificate(struct x509_certificate *cert); extern struct x509_certificate *x509_cert_parse(const void *data, size_t datalen); + +/* + * x509_public_key.c + */ +extern int x509_get_sig_params(struct x509_certificate *cert); +extern int x509_check_signature(const struct public_key *pub, + struct x509_certificate *cert); diff --git a/crypto/asymmetric_keys/x509_public_key.c b/crypto/asymmetric_keys/x509_public_key.c index 06007f0..f83300b 100644 --- a/crypto/asymmetric_keys/x509_public_key.c +++ b/crypto/asymmetric_keys/x509_public_key.c @@ -18,85 +18,162 @@ #include <linux/asn1_decoder.h> #include <keys/asymmetric-subtype.h> #include <keys/asymmetric-parser.h> +#include <keys/system_keyring.h> #include <crypto/hash.h> #include "asymmetric_keys.h" #include "public_key.h" #include "x509_parser.h" -static const -struct public_key_algorithm *x509_public_key_algorithms[PKEY_ALGO__LAST] = { - [PKEY_ALGO_DSA] = NULL, -#if defined(CONFIG_PUBLIC_KEY_ALGO_RSA) || \ - defined(CONFIG_PUBLIC_KEY_ALGO_RSA_MODULE) - [PKEY_ALGO_RSA] = &RSA_public_key_algorithm, -#endif -}; +/* + * Find a key in the given keyring by issuer and authority. + */ +static struct key *x509_request_asymmetric_key( + struct key *keyring, + const char *signer, size_t signer_len, + const char *authority, size_t auth_len) +{ + key_ref_t key; + char *id; + + /* Construct an identifier. */ + id = kmalloc(signer_len + 2 + auth_len + 1, GFP_KERNEL); + if (!id) + return ERR_PTR(-ENOMEM); + + memcpy(id, signer, signer_len); + id[signer_len + 0] = ':'; + id[signer_len + 1] = ' '; + memcpy(id + signer_len + 2, authority, auth_len); + id[signer_len + 2 + auth_len] = 0; + + pr_debug("Look up: \"%s\"\n", id); + + key = keyring_search(make_key_ref(keyring, 1), + &key_type_asymmetric, id); + if (IS_ERR(key)) + pr_debug("Request for module key '%s' err %ld\n", + id, PTR_ERR(key)); + kfree(id); + + if (IS_ERR(key)) { + switch (PTR_ERR(key)) { + /* Hide some search errors */ + case -EACCES: + case -ENOTDIR: + case -EAGAIN: + return ERR_PTR(-ENOKEY); + default: + return ERR_CAST(key); + } + } + + pr_devel("<==%s() = 0 [%x]\n", __func__, key_serial(key_ref_to_ptr(key))); + return key_ref_to_ptr(key); +} /* - * Check the signature on a certificate using the provided public key + * Set up the signature parameters in an X.509 certificate. This involves + * digesting the signed data and extracting the signature. */ -static int x509_check_signature(const struct public_key *pub, - const struct x509_certificate *cert) +int x509_get_sig_params(struct x509_certificate *cert) { - struct public_key_signature *sig; struct crypto_shash *tfm; struct shash_desc *desc; size_t digest_size, desc_size; + void *digest; int ret; pr_devel("==>%s()\n", __func__); - + + if (cert->sig.rsa.s) + return 0; + + cert->sig.rsa.s = mpi_read_raw_data(cert->raw_sig, cert->raw_sig_size); + if (!cert->sig.rsa.s) + return -ENOMEM; + cert->sig.nr_mpi = 1; + /* Allocate the hashing algorithm we're going to need and find out how * big the hash operational data will be. */ - tfm = crypto_alloc_shash(pkey_hash_algo[cert->sig_hash_algo], 0, 0); + tfm = crypto_alloc_shash(hash_algo_name[cert->sig.pkey_hash_algo], 0, 0); if (IS_ERR(tfm)) return (PTR_ERR(tfm) == -ENOENT) ? -ENOPKG : PTR_ERR(tfm); desc_size = crypto_shash_descsize(tfm) + sizeof(*desc); digest_size = crypto_shash_digestsize(tfm); - /* We allocate the hash operational data storage on the end of our - * context data. + /* We allocate the hash operational data storage on the end of the + * digest storage space. */ ret = -ENOMEM; - sig = kzalloc(sizeof(*sig) + desc_size + digest_size, GFP_KERNEL); - if (!sig) - goto error_no_sig; + digest = kzalloc(digest_size + desc_size, GFP_KERNEL); + if (!digest) + goto error; - sig->pkey_hash_algo = cert->sig_hash_algo; - sig->digest = (u8 *)sig + sizeof(*sig) + desc_size; - sig->digest_size = digest_size; + cert->sig.digest = digest; + cert->sig.digest_size = digest_size; - desc = (void *)sig + sizeof(*sig); - desc->tfm = tfm; - desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP; + desc = digest + digest_size; + desc->tfm = tfm; + desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP; ret = crypto_shash_init(desc); if (ret < 0) goto error; + might_sleep(); + ret = crypto_shash_finup(desc, cert->tbs, cert->tbs_size, digest); +error: + crypto_free_shash(tfm); + pr_devel("<==%s() = %d\n", __func__, ret); + return ret; +} +EXPORT_SYMBOL_GPL(x509_get_sig_params); - ret = -ENOMEM; - sig->rsa.s = mpi_read_raw_data(cert->sig, cert->sig_size); - if (!sig->rsa.s) - goto error; +/* + * Check the signature on a certificate using the provided public key + */ +int x509_check_signature(const struct public_key *pub, + struct x509_certificate *cert) +{ + int ret; - ret = crypto_shash_finup(desc, cert->tbs, cert->tbs_size, sig->digest); - if (ret < 0) - goto error_mpi; + pr_devel("==>%s()\n", __func__); - ret = pub->algo->verify_signature(pub, sig); + ret = x509_get_sig_params(cert); + if (ret < 0) + return ret; + ret = public_key_verify_signature(pub, &cert->sig); pr_debug("Cert Verification: %d\n", ret); + return ret; +} +EXPORT_SYMBOL_GPL(x509_check_signature); -error_mpi: - mpi_free(sig->rsa.s); -error: - kfree(sig); -error_no_sig: - crypto_free_shash(tfm); +/* + * Check the new certificate against the ones in the trust keyring. If one of + * those is the signing key and validates the new certificate, then mark the + * new certificate as being trusted. + * + * Return 0 if the new certificate was successfully validated, 1 if we couldn't + * find a matching parent certificate in the trusted list and an error if there + * is a matching certificate but the signature check fails. + */ +static int x509_validate_trust(struct x509_certificate *cert, + struct key *trust_keyring) +{ + const struct public_key *pk; + struct key *key; + int ret = 1; - pr_devel("<==%s() = %d\n", __func__, ret); + key = x509_request_asymmetric_key(trust_keyring, + cert->issuer, strlen(cert->issuer), + cert->authority, + strlen(cert->authority)); + if (!IS_ERR(key)) { + pk = key->payload.data; + ret = x509_check_signature(pk, cert); + } return ret; } @@ -106,7 +183,6 @@ error_no_sig: static int x509_key_preparse(struct key_preparsed_payload *prep) { struct x509_certificate *cert; - struct tm now; size_t srlen, sulen; char *desc = NULL; int ret; @@ -117,7 +193,18 @@ static int x509_key_preparse(struct key_preparsed_payload *prep) pr_devel("Cert Issuer: %s\n", cert->issuer); pr_devel("Cert Subject: %s\n", cert->subject); - pr_devel("Cert Key Algo: %s\n", pkey_algo[cert->pkey_algo]); + + if (cert->pub->pkey_algo >= PKEY_ALGO__LAST || + cert->sig.pkey_algo >= PKEY_ALGO__LAST || + cert->sig.pkey_hash_algo >= PKEY_HASH__LAST || + !pkey_algo[cert->pub->pkey_algo] || + !pkey_algo[cert->sig.pkey_algo] || + !hash_algo_name[cert->sig.pkey_hash_algo]) { + ret = -ENOPKG; + goto error_free_cert; + } + + pr_devel("Cert Key Algo: %s\n", pkey_algo_name[cert->pub->pkey_algo]); pr_devel("Cert Valid From: %04ld-%02d-%02d %02d:%02d:%02d\n", cert->valid_from.tm_year + 1900, cert->valid_from.tm_mon + 1, cert->valid_from.tm_mday, cert->valid_from.tm_hour, @@ -127,61 +214,29 @@ static int x509_key_preparse(struct key_preparsed_payload *prep) cert->valid_to.tm_mday, cert->valid_to.tm_hour, cert->valid_to.tm_min, cert->valid_to.tm_sec); pr_devel("Cert Signature: %s + %s\n", - pkey_algo[cert->sig_pkey_algo], - pkey_hash_algo[cert->sig_hash_algo]); + pkey_algo_name[cert->sig.pkey_algo], + hash_algo_name[cert->sig.pkey_hash_algo]); - if (!cert->fingerprint || !cert->authority) { - pr_warn("Cert for '%s' must have SubjKeyId and AuthKeyId extensions\n", + if (!cert->fingerprint) { + pr_warn("Cert for '%s' must have a SubjKeyId extension\n", cert->subject); ret = -EKEYREJECTED; goto error_free_cert; } - time_to_tm(CURRENT_TIME.tv_sec, 0, &now); - pr_devel("Now: %04ld-%02d-%02d %02d:%02d:%02d\n", - now.tm_year + 1900, now.tm_mon + 1, now.tm_mday, - now.tm_hour, now.tm_min, now.tm_sec); - if (now.tm_year < cert->valid_from.tm_year || - (now.tm_year == cert->valid_from.tm_year && - (now.tm_mon < cert->valid_from.tm_mon || - (now.tm_mon == cert->valid_from.tm_mon && - (now.tm_mday < cert->valid_from.tm_mday || - (now.tm_mday == cert->valid_from.tm_mday && - (now.tm_hour < cert->valid_from.tm_hour || - (now.tm_hour == cert->valid_from.tm_hour && - (now.tm_min < cert->valid_from.tm_min || - (now.tm_min == cert->valid_from.tm_min && - (now.tm_sec < cert->valid_from.tm_sec - ))))))))))) { - pr_warn("Cert %s is not yet valid\n", cert->fingerprint); - ret = -EKEYREJECTED; - goto error_free_cert; - } - if (now.tm_year > cert->valid_to.tm_year || - (now.tm_year == cert->valid_to.tm_year && - (now.tm_mon > cert->valid_to.tm_mon || - (now.tm_mon == cert->valid_to.tm_mon && - (now.tm_mday > cert->valid_to.tm_mday || - (now.tm_mday == cert->valid_to.tm_mday && - (now.tm_hour > cert->valid_to.tm_hour || - (now.tm_hour == cert->valid_to.tm_hour && - (now.tm_min > cert->valid_to.tm_min || - (now.tm_min == cert->valid_to.tm_min && - (now.tm_sec > cert->valid_to.tm_sec - ))))))))))) { - pr_warn("Cert %s has expired\n", cert->fingerprint); - ret = -EKEYEXPIRED; - goto error_free_cert; - } - - cert->pub->algo = x509_public_key_algorithms[cert->pkey_algo]; + cert->pub->algo = pkey_algo[cert->pub->pkey_algo]; cert->pub->id_type = PKEY_ID_X509; - /* Check the signature on the key */ - if (strcmp(cert->fingerprint, cert->authority) == 0) { - ret = x509_check_signature(cert->pub, cert); + /* Check the signature on the key if it appears to be self-signed */ + if (!cert->authority || + strcmp(cert->fingerprint, cert->authority) == 0) { + ret = x509_check_signature(cert->pub, cert); /* self-signed */ if (ret < 0) goto error_free_cert; + } else { + ret = x509_validate_trust(cert, system_trusted_keyring); + if (!ret) + prep->trusted = 1; } /* Propose a description */ @@ -237,3 +292,6 @@ static void __exit x509_key_exit(void) module_init(x509_key_init); module_exit(x509_key_exit); + +MODULE_DESCRIPTION("X.509 certificate parser"); +MODULE_LICENSE("GPL"); diff --git a/crypto/hash_info.c b/crypto/hash_info.c new file mode 100644 index 0000000..3e7ff46 --- /dev/null +++ b/crypto/hash_info.c @@ -0,0 +1,56 @@ +/* + * Hash Info: Hash algorithms information + * + * Copyright (c) 2013 Dmitry Kasatkin <d.kasatkin@samsung.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ + +#include <linux/export.h> +#include <crypto/hash_info.h> + +const char *const hash_algo_name[HASH_ALGO__LAST] = { + [HASH_ALGO_MD4] = "md4", + [HASH_ALGO_MD5] = "md5", + [HASH_ALGO_SHA1] = "sha1", + [HASH_ALGO_RIPE_MD_160] = "rmd160", + [HASH_ALGO_SHA256] = "sha256", + [HASH_ALGO_SHA384] = "sha384", + [HASH_ALGO_SHA512] = "sha512", + [HASH_ALGO_SHA224] = "sha224", + [HASH_ALGO_RIPE_MD_128] = "rmd128", + [HASH_ALGO_RIPE_MD_256] = "rmd256", + [HASH_ALGO_RIPE_MD_320] = "rmd320", + [HASH_ALGO_WP_256] = "wp256", + [HASH_ALGO_WP_384] = "wp384", + [HASH_ALGO_WP_512] = "wp512", + [HASH_ALGO_TGR_128] = "tgr128", + [HASH_ALGO_TGR_160] = "tgr160", + [HASH_ALGO_TGR_192] = "tgr192", +}; +EXPORT_SYMBOL_GPL(hash_algo_name); + +const int hash_digest_size[HASH_ALGO__LAST] = { + [HASH_ALGO_MD4] = MD5_DIGEST_SIZE, + [HASH_ALGO_MD5] = MD5_DIGEST_SIZE, + [HASH_ALGO_SHA1] = SHA1_DIGEST_SIZE, + [HASH_ALGO_RIPE_MD_160] = RMD160_DIGEST_SIZE, + [HASH_ALGO_SHA256] = SHA256_DIGEST_SIZE, + [HASH_ALGO_SHA384] = SHA384_DIGEST_SIZE, + [HASH_ALGO_SHA512] = SHA512_DIGEST_SIZE, + [HASH_ALGO_SHA224] = SHA224_DIGEST_SIZE, + [HASH_ALGO_RIPE_MD_128] = RMD128_DIGEST_SIZE, + [HASH_ALGO_RIPE_MD_256] = RMD256_DIGEST_SIZE, + [HASH_ALGO_RIPE_MD_320] = RMD320_DIGEST_SIZE, + [HASH_ALGO_WP_256] = WP256_DIGEST_SIZE, + [HASH_ALGO_WP_384] = WP384_DIGEST_SIZE, + [HASH_ALGO_WP_512] = WP512_DIGEST_SIZE, + [HASH_ALGO_TGR_128] = TGR128_DIGEST_SIZE, + [HASH_ALGO_TGR_160] = TGR160_DIGEST_SIZE, + [HASH_ALGO_TGR_192] = TGR192_DIGEST_SIZE, +}; +EXPORT_SYMBOL_GPL(hash_digest_size); diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c index 0703bff..20360e4 100644 --- a/drivers/acpi/pci_root.c +++ b/drivers/acpi/pci_root.c @@ -65,6 +65,9 @@ static struct acpi_scan_handler pci_root_handler = { .ids = root_device_ids, .attach = acpi_pci_root_add, .detach = acpi_pci_root_remove, + .hotplug = { + .ignore = true, + }, }; static DEFINE_MUTEX(osc_lock); diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 15daa21..fd39459 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -1772,7 +1772,7 @@ static void acpi_scan_init_hotplug(acpi_handle handle, int type) */ list_for_each_entry(hwid, &pnp.ids, list) { handler = acpi_scan_match_handler(hwid->id, NULL); - if (handler) { + if (handler && !handler->hotplug.ignore) { acpi_install_notify_handler(handle, ACPI_SYSTEM_NOTIFY, acpi_hotplug_notify_cb, handler); break; diff --git a/drivers/acpi/sysfs.c b/drivers/acpi/sysfs.c index db52936..6dbc3ca 100644 --- a/drivers/acpi/sysfs.c +++ b/drivers/acpi/sysfs.c @@ -309,7 +309,7 @@ static void acpi_table_attr_init(struct acpi_table_attr *table_attr, sprintf(table_attr->name + ACPI_NAME_SIZE, "%d", table_attr->instance); - table_attr->attr.size = 0; + table_attr->attr.size = table_header->length; table_attr->attr.read = acpi_table_show; table_attr->attr.attr.name = table_attr->name; table_attr->attr.attr.mode = 0400; @@ -354,8 +354,9 @@ static int acpi_tables_sysfs_init(void) { struct acpi_table_attr *table_attr; struct acpi_table_header *table_header = NULL; - int table_index = 0; - int result; + int table_index; + acpi_status status; + int ret; tables_kobj = kobject_create_and_add("tables", acpi_kobj); if (!tables_kobj) @@ -365,33 +366,34 @@ static int acpi_tables_sysfs_init(void) if (!dynamic_tables_kobj) goto err_dynamic_tables; - do { - result = acpi_get_table_by_index(table_index, &table_header); - if (!result) { - table_index++; - table_attr = NULL; - table_attr = - kzalloc(sizeof(struct acpi_table_attr), GFP_KERNEL); - if (!table_attr) - return -ENOMEM; - - acpi_table_attr_init(table_attr, table_header); - result = - sysfs_create_bin_file(tables_kobj, - &table_attr->attr); - if (result) { - kfree(table_attr); - return result; - } else - list_add_tail(&table_attr->node, - &acpi_table_attr_list); + for (table_index = 0;; table_index++) { + status = acpi_get_table_by_index(table_index, &table_header); + + if (status == AE_BAD_PARAMETER) + break; + + if (ACPI_FAILURE(status)) + continue; + + table_attr = NULL; + table_attr = kzalloc(sizeof(*table_attr), GFP_KERNEL); + if (!table_attr) + return -ENOMEM; + + acpi_table_attr_init(table_attr, table_header); + ret = sysfs_create_bin_file(tables_kobj, &table_attr->attr); + if (ret) { + kfree(table_attr); + return ret; } - } while (!result); + list_add_tail(&table_attr->node, &acpi_table_attr_list); + } + kobject_uevent(tables_kobj, KOBJ_ADD); kobject_uevent(dynamic_tables_kobj, KOBJ_ADD); - result = acpi_install_table_handler(acpi_sysfs_table_handler, NULL); + status = acpi_install_table_handler(acpi_sysfs_table_handler, NULL); - return result == AE_OK ? 0 : -EINVAL; + return ACPI_FAILURE(status) ? -EINVAL : 0; err_dynamic_tables: kobject_put(tables_kobj); err: diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index b5d8423..ea192ec 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -223,7 +223,7 @@ static void null_softirq_done_fn(struct request *rq) blk_end_request_all(rq, 0); } -#if defined(CONFIG_SMP) && defined(CONFIG_USE_GENERIC_SMP_HELPERS) +#ifdef CONFIG_SMP static void null_ipi_cmd_end_io(void *data) { @@ -260,7 +260,7 @@ static void null_cmd_end_ipi(struct nullb_cmd *cmd) put_cpu(); } -#endif /* CONFIG_SMP && CONFIG_USE_GENERIC_SMP_HELPERS */ +#endif /* CONFIG_SMP */ static inline void null_handle_cmd(struct nullb_cmd *cmd) { @@ -270,7 +270,7 @@ static inline void null_handle_cmd(struct nullb_cmd *cmd) end_cmd(cmd); break; case NULL_IRQ_SOFTIRQ: -#if defined(CONFIG_SMP) && defined(CONFIG_USE_GENERIC_SMP_HELPERS) +#ifdef CONFIG_SMP null_cmd_end_ipi(cmd); #else end_cmd(cmd); @@ -571,7 +571,7 @@ static int __init null_init(void) { unsigned int i; -#if !defined(CONFIG_SMP) || !defined(CONFIG_USE_GENERIC_SMP_HELPERS) +#if !defined(CONFIG_SMP) if (irqmode == NULL_IRQ_SOFTIRQ) { pr_warn("null_blk: softirq completions not available.\n"); pr_warn("null_blk: using direct completions.\n"); diff --git a/drivers/char/tpm/Kconfig b/drivers/char/tpm/Kconfig index 94c0c74..1a65838 100644 --- a/drivers/char/tpm/Kconfig +++ b/drivers/char/tpm/Kconfig @@ -33,6 +33,15 @@ config TCG_TIS from within Linux. To compile this driver as a module, choose M here; the module will be called tpm_tis. +config TCG_TIS_I2C_ATMEL + tristate "TPM Interface Specification 1.2 Interface (I2C - Atmel)" + depends on I2C + ---help--- + If you have an Atmel I2C TPM security chip say Yes and it will be + accessible from within Linux. + To compile this driver as a module, choose M here; the module will + be called tpm_tis_i2c_atmel. + config TCG_TIS_I2C_INFINEON tristate "TPM Interface Specification 1.2 Interface (I2C - Infineon)" depends on I2C @@ -42,7 +51,17 @@ config TCG_TIS_I2C_INFINEON Specification 0.20 say Yes and it will be accessible from within Linux. To compile this driver as a module, choose M here; the module - will be called tpm_tis_i2c_infineon. + will be called tpm_i2c_infineon. + +config TCG_TIS_I2C_NUVOTON + tristate "TPM Interface Specification 1.2 Interface (I2C - Nuvoton)" + depends on I2C + ---help--- + If you have a TPM security chip with an I2C interface from + Nuvoton Technology Corp. say Yes and it will be accessible + from within Linux. + To compile this driver as a module, choose M here; the module + will be called tpm_i2c_nuvoton. config TCG_NSC tristate "National Semiconductor TPM Interface" @@ -82,14 +101,14 @@ config TCG_IBMVTPM as a module, choose M here; the module will be called tpm_ibmvtpm. config TCG_ST33_I2C - tristate "STMicroelectronics ST33 I2C TPM" - depends on I2C - depends on GPIOLIB - ---help--- - If you have a TPM security chip from STMicroelectronics working with - an I2C bus say Yes and it will be accessible from within Linux. - To compile this driver as a module, choose M here; the module will be - called tpm_stm_st33_i2c. + tristate "STMicroelectronics ST33 I2C TPM" + depends on I2C + depends on GPIOLIB + ---help--- + If you have a TPM security chip from STMicroelectronics working with + an I2C bus say Yes and it will be accessible from within Linux. + To compile this driver as a module, choose M here; the module will be + called tpm_stm_st33_i2c. config TCG_XEN tristate "XEN TPM Interface" diff --git a/drivers/char/tpm/Makefile b/drivers/char/tpm/Makefile index eb41ff9..b80a400 100644 --- a/drivers/char/tpm/Makefile +++ b/drivers/char/tpm/Makefile @@ -2,17 +2,20 @@ # Makefile for the kernel tpm device drivers. # obj-$(CONFIG_TCG_TPM) += tpm.o +tpm-y := tpm-interface.o +tpm-$(CONFIG_ACPI) += tpm_ppi.o + ifdef CONFIG_ACPI - obj-$(CONFIG_TCG_TPM) += tpm_bios.o - tpm_bios-objs += tpm_eventlog.o tpm_acpi.o tpm_ppi.o + tpm-y += tpm_eventlog.o tpm_acpi.o else ifdef CONFIG_TCG_IBMVTPM - obj-$(CONFIG_TCG_TPM) += tpm_bios.o - tpm_bios-objs += tpm_eventlog.o tpm_of.o + tpm-y += tpm_eventlog.o tpm_of.o endif endif obj-$(CONFIG_TCG_TIS) += tpm_tis.o +obj-$(CONFIG_TCG_TIS_I2C_ATMEL) += tpm_i2c_atmel.o obj-$(CONFIG_TCG_TIS_I2C_INFINEON) += tpm_i2c_infineon.o +obj-$(CONFIG_TCG_TIS_I2C_NUVOTON) += tpm_i2c_nuvoton.o obj-$(CONFIG_TCG_NSC) += tpm_nsc.o obj-$(CONFIG_TCG_ATMEL) += tpm_atmel.o obj-$(CONFIG_TCG_INFINEON) += tpm_infineon.o diff --git a/drivers/char/tpm/tpm.c b/drivers/char/tpm/tpm-interface.c index e3c974a..6ae41d3 100644 --- a/drivers/char/tpm/tpm.c +++ b/drivers/char/tpm/tpm-interface.c @@ -10,13 +10,13 @@ * Maintained by: <tpmdd-devel@lists.sourceforge.net> * * Device driver for TCG/TCPA TPM (trusted platform module). - * Specifications at www.trustedcomputinggroup.org + * Specifications at www.trustedcomputinggroup.org * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as * published by the Free Software Foundation, version 2 of the * License. - * + * * Note, the TPM chip is not interrupt driven (only polling) * and can have very long timeouts (minutes!). Hence the unusual * calls to msleep. @@ -371,13 +371,14 @@ static ssize_t tpm_transmit(struct tpm_chip *chip, const char *buf, return -ENODATA; if (count > bufsiz) { dev_err(chip->dev, - "invalid count value %x %zx \n", count, bufsiz); + "invalid count value %x %zx\n", count, bufsiz); return -E2BIG; } mutex_lock(&chip->tpm_mutex); - if ((rc = chip->vendor.send(chip, (u8 *) buf, count)) < 0) { + rc = chip->vendor.send(chip, (u8 *) buf, count); + if (rc < 0) { dev_err(chip->dev, "tpm_transmit: tpm_send: error %zd\n", rc); goto out; @@ -444,7 +445,7 @@ static ssize_t transmit_cmd(struct tpm_chip *chip, struct tpm_cmd_t *cmd, { int err; - len = tpm_transmit(chip,(u8 *) cmd, len); + len = tpm_transmit(chip, (u8 *) cmd, len); if (len < 0) return len; else if (len < TPM_HEADER_SIZE) @@ -658,7 +659,7 @@ static int tpm_continue_selftest(struct tpm_chip *chip) return rc; } -ssize_t tpm_show_enabled(struct device * dev, struct device_attribute * attr, +ssize_t tpm_show_enabled(struct device *dev, struct device_attribute *attr, char *buf) { cap_t cap; @@ -674,7 +675,7 @@ ssize_t tpm_show_enabled(struct device * dev, struct device_attribute * attr, } EXPORT_SYMBOL_GPL(tpm_show_enabled); -ssize_t tpm_show_active(struct device * dev, struct device_attribute * attr, +ssize_t tpm_show_active(struct device *dev, struct device_attribute *attr, char *buf) { cap_t cap; @@ -690,7 +691,7 @@ ssize_t tpm_show_active(struct device * dev, struct device_attribute * attr, } EXPORT_SYMBOL_GPL(tpm_show_active); -ssize_t tpm_show_owned(struct device * dev, struct device_attribute * attr, +ssize_t tpm_show_owned(struct device *dev, struct device_attribute *attr, char *buf) { cap_t cap; @@ -706,8 +707,8 @@ ssize_t tpm_show_owned(struct device * dev, struct device_attribute * attr, } EXPORT_SYMBOL_GPL(tpm_show_owned); -ssize_t tpm_show_temp_deactivated(struct device * dev, - struct device_attribute * attr, char *buf) +ssize_t tpm_show_temp_deactivated(struct device *dev, + struct device_attribute *attr, char *buf) { cap_t cap; ssize_t rc; @@ -769,10 +770,10 @@ static int __tpm_pcr_read(struct tpm_chip *chip, int pcr_idx, u8 *res_buf) /** * tpm_pcr_read - read a pcr value - * @chip_num: tpm idx # or ANY + * @chip_num: tpm idx # or ANY * @pcr_idx: pcr idx to retrieve - * @res_buf: TPM_PCR value - * size of res_buf is 20 bytes (or NULL if you don't care) + * @res_buf: TPM_PCR value + * size of res_buf is 20 bytes (or NULL if you don't care) * * The TPM driver should be built-in, but for whatever reason it * isn't, protect against the chip disappearing, by incrementing @@ -794,9 +795,9 @@ EXPORT_SYMBOL_GPL(tpm_pcr_read); /** * tpm_pcr_extend - extend pcr value with hash - * @chip_num: tpm idx # or AN& + * @chip_num: tpm idx # or AN& * @pcr_idx: pcr idx to extend - * @hash: hash value used to extend pcr value + * @hash: hash value used to extend pcr value * * The TPM driver should be built-in, but for whatever reason it * isn't, protect against the chip disappearing, by incrementing @@ -847,8 +848,7 @@ int tpm_do_selftest(struct tpm_chip *chip) unsigned long duration; struct tpm_cmd_t cmd; - duration = tpm_calc_ordinal_duration(chip, - TPM_ORD_CONTINUE_SELFTEST); + duration = tpm_calc_ordinal_duration(chip, TPM_ORD_CONTINUE_SELFTEST); loops = jiffies_to_msecs(duration) / delay_msec; @@ -965,12 +965,12 @@ ssize_t tpm_show_pubek(struct device *dev, struct device_attribute *attr, if (err) goto out; - /* + /* ignore header 10 bytes algorithm 32 bits (1 == RSA ) encscheme 16 bits sigscheme 16 bits - parameters (RSA 12->bytes: keybit, #primes, expbit) + parameters (RSA 12->bytes: keybit, #primes, expbit) keylenbytes 32 bits 256 byte modulus ignore checksum 20 bytes @@ -1020,43 +1020,33 @@ ssize_t tpm_show_caps(struct device *dev, struct device_attribute *attr, str += sprintf(str, "Manufacturer: 0x%x\n", be32_to_cpu(cap.manufacturer_id)); - rc = tpm_getcap(dev, CAP_VERSION_1_1, &cap, - "attempting to determine the 1.1 version"); - if (rc) - return 0; - str += sprintf(str, - "TCG version: %d.%d\nFirmware version: %d.%d\n", - cap.tpm_version.Major, cap.tpm_version.Minor, - cap.tpm_version.revMajor, cap.tpm_version.revMinor); - return str - buf; -} -EXPORT_SYMBOL_GPL(tpm_show_caps); - -ssize_t tpm_show_caps_1_2(struct device * dev, - struct device_attribute * attr, char *buf) -{ - cap_t cap; - ssize_t rc; - char *str = buf; - - rc = tpm_getcap(dev, TPM_CAP_PROP_MANUFACTURER, &cap, - "attempting to determine the manufacturer"); - if (rc) - return 0; - str += sprintf(str, "Manufacturer: 0x%x\n", - be32_to_cpu(cap.manufacturer_id)); + /* Try to get a TPM version 1.2 TPM_CAP_VERSION_INFO */ rc = tpm_getcap(dev, CAP_VERSION_1_2, &cap, "attempting to determine the 1.2 version"); - if (rc) - return 0; - str += sprintf(str, - "TCG version: %d.%d\nFirmware version: %d.%d\n", - cap.tpm_version_1_2.Major, cap.tpm_version_1_2.Minor, - cap.tpm_version_1_2.revMajor, - cap.tpm_version_1_2.revMinor); + if (!rc) { + str += sprintf(str, + "TCG version: %d.%d\nFirmware version: %d.%d\n", + cap.tpm_version_1_2.Major, + cap.tpm_version_1_2.Minor, + cap.tpm_version_1_2.revMajor, + cap.tpm_version_1_2.revMinor); + } else { + /* Otherwise just use TPM_STRUCT_VER */ + rc = tpm_getcap(dev, CAP_VERSION_1_1, &cap, + "attempting to determine the 1.1 version"); + if (rc) + return 0; + str += sprintf(str, + "TCG version: %d.%d\nFirmware version: %d.%d\n", + cap.tpm_version.Major, + cap.tpm_version.Minor, + cap.tpm_version.revMajor, + cap.tpm_version.revMinor); + } + return str - buf; } -EXPORT_SYMBOL_GPL(tpm_show_caps_1_2); +EXPORT_SYMBOL_GPL(tpm_show_caps); ssize_t tpm_show_durations(struct device *dev, struct device_attribute *attr, char *buf) @@ -1102,8 +1092,8 @@ ssize_t tpm_store_cancel(struct device *dev, struct device_attribute *attr, } EXPORT_SYMBOL_GPL(tpm_store_cancel); -static bool wait_for_tpm_stat_cond(struct tpm_chip *chip, u8 mask, bool check_cancel, - bool *canceled) +static bool wait_for_tpm_stat_cond(struct tpm_chip *chip, u8 mask, + bool check_cancel, bool *canceled) { u8 status = chip->vendor.status(chip); @@ -1170,38 +1160,25 @@ EXPORT_SYMBOL_GPL(wait_for_tpm_stat); */ int tpm_open(struct inode *inode, struct file *file) { - int minor = iminor(inode); - struct tpm_chip *chip = NULL, *pos; - - rcu_read_lock(); - list_for_each_entry_rcu(pos, &tpm_chip_list, list) { - if (pos->vendor.miscdev.minor == minor) { - chip = pos; - get_device(chip->dev); - break; - } - } - rcu_read_unlock(); - - if (!chip) - return -ENODEV; + struct miscdevice *misc = file->private_data; + struct tpm_chip *chip = container_of(misc, struct tpm_chip, + vendor.miscdev); if (test_and_set_bit(0, &chip->is_open)) { dev_dbg(chip->dev, "Another process owns this TPM\n"); - put_device(chip->dev); return -EBUSY; } chip->data_buffer = kzalloc(TPM_BUFSIZE, GFP_KERNEL); if (chip->data_buffer == NULL) { clear_bit(0, &chip->is_open); - put_device(chip->dev); return -ENOMEM; } atomic_set(&chip->data_pending, 0); file->private_data = chip; + get_device(chip->dev); return 0; } EXPORT_SYMBOL_GPL(tpm_open); @@ -1463,7 +1440,6 @@ void tpm_dev_vendor_release(struct tpm_chip *chip) chip->vendor.release(chip->dev); clear_bit(chip->dev_num, dev_mask); - kfree(chip->vendor.miscdev.name); } EXPORT_SYMBOL_GPL(tpm_dev_vendor_release); @@ -1487,7 +1463,7 @@ void tpm_dev_release(struct device *dev) EXPORT_SYMBOL_GPL(tpm_dev_release); /* - * Called from tpm_<specific>.c probe function only for devices + * Called from tpm_<specific>.c probe function only for devices * the driver has determined it should claim. Prior to calling * this function the specific probe function has called pci_enable_device * upon errant exit from this function specific probe function should call @@ -1496,17 +1472,13 @@ EXPORT_SYMBOL_GPL(tpm_dev_release); struct tpm_chip *tpm_register_hardware(struct device *dev, const struct tpm_vendor_specific *entry) { -#define DEVNAME_SIZE 7 - - char *devname; struct tpm_chip *chip; /* Driver specific per-device data */ chip = kzalloc(sizeof(*chip), GFP_KERNEL); - devname = kmalloc(DEVNAME_SIZE, GFP_KERNEL); - if (chip == NULL || devname == NULL) - goto out_free; + if (chip == NULL) + return NULL; mutex_init(&chip->buffer_mutex); mutex_init(&chip->tpm_mutex); @@ -1531,8 +1503,9 @@ struct tpm_chip *tpm_register_hardware(struct device *dev, set_bit(chip->dev_num, dev_mask); - scnprintf(devname, DEVNAME_SIZE, "%s%d", "tpm", chip->dev_num); - chip->vendor.miscdev.name = devname; + scnprintf(chip->devname, sizeof(chip->devname), "%s%d", "tpm", + chip->dev_num); + chip->vendor.miscdev.name = chip->devname; chip->vendor.miscdev.parent = dev; chip->dev = get_device(dev); @@ -1558,7 +1531,7 @@ struct tpm_chip *tpm_register_hardware(struct device *dev, goto put_device; } - chip->bios_dir = tpm_bios_log_setup(devname); + chip->bios_dir = tpm_bios_log_setup(chip->devname); /* Make chip available */ spin_lock(&driver_lock); @@ -1571,7 +1544,6 @@ put_device: put_device(chip->dev); out_free: kfree(chip); - kfree(devname); return NULL; } EXPORT_SYMBOL_GPL(tpm_register_hardware); diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h index a7bfc17..f328478 100644 --- a/drivers/char/tpm/tpm.h +++ b/drivers/char/tpm/tpm.h @@ -59,8 +59,6 @@ extern ssize_t tpm_show_pcrs(struct device *, struct device_attribute *attr, char *); extern ssize_t tpm_show_caps(struct device *, struct device_attribute *attr, char *); -extern ssize_t tpm_show_caps_1_2(struct device *, struct device_attribute *attr, - char *); extern ssize_t tpm_store_cancel(struct device *, struct device_attribute *attr, const char *, size_t); extern ssize_t tpm_show_enabled(struct device *, struct device_attribute *attr, @@ -122,6 +120,7 @@ struct tpm_chip { struct device *dev; /* Device stuff */ int dev_num; /* /dev/tpm# */ + char devname[7]; unsigned long is_open; /* only one allowed */ int time_expired; diff --git a/drivers/char/tpm/tpm_atmel.c b/drivers/char/tpm/tpm_atmel.c index 99d6820..c9a528d 100644 --- a/drivers/char/tpm/tpm_atmel.c +++ b/drivers/char/tpm/tpm_atmel.c @@ -202,7 +202,7 @@ static int __init init_atmel(void) have_region = (atmel_request_region - (tpm_atmel.base, region_size, "tpm_atmel0") == NULL) ? 0 : 1; + (base, region_size, "tpm_atmel0") == NULL) ? 0 : 1; pdev = platform_device_register_simple("tpm_atmel", -1, NULL, 0); if (IS_ERR(pdev)) { diff --git a/drivers/char/tpm/tpm_eventlog.c b/drivers/char/tpm/tpm_eventlog.c index 84ddc55..59f7cb2 100644 --- a/drivers/char/tpm/tpm_eventlog.c +++ b/drivers/char/tpm/tpm_eventlog.c @@ -406,7 +406,6 @@ out_tpm: out: return NULL; } -EXPORT_SYMBOL_GPL(tpm_bios_log_setup); void tpm_bios_log_teardown(struct dentry **lst) { @@ -415,5 +414,3 @@ void tpm_bios_log_teardown(struct dentry **lst) for (i = 0; i < 3; i++) securityfs_remove(lst[i]); } -EXPORT_SYMBOL_GPL(tpm_bios_log_teardown); -MODULE_LICENSE("GPL"); diff --git a/drivers/char/tpm/tpm_i2c_atmel.c b/drivers/char/tpm/tpm_i2c_atmel.c new file mode 100644 index 0000000..c3cd7fe --- /dev/null +++ b/drivers/char/tpm/tpm_i2c_atmel.c @@ -0,0 +1,284 @@ +/* + * ATMEL I2C TPM AT97SC3204T + * + * Copyright (C) 2012 V Lab Technologies + * Teddy Reed <teddy@prosauce.org> + * Copyright (C) 2013, Obsidian Research Corp. + * Jason Gunthorpe <jgunthorpe@obsidianresearch.com> + * Device driver for ATMEL I2C TPMs. + * + * Teddy Reed determined the basic I2C command flow, unlike other I2C TPM + * devices the raw TCG formatted TPM command data is written via I2C and then + * raw TCG formatted TPM command data is returned via I2C. + * + * TGC status/locality/etc functions seen in the LPC implementation do not + * seem to be present. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see http://www.gnu.org/licenses/>. + */ +#include <linux/init.h> +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/slab.h> +#include <linux/i2c.h> +#include "tpm.h" + +#define I2C_DRIVER_NAME "tpm_i2c_atmel" + +#define TPM_I2C_SHORT_TIMEOUT 750 /* ms */ +#define TPM_I2C_LONG_TIMEOUT 2000 /* 2 sec */ + +#define ATMEL_STS_OK 1 + +struct priv_data { + size_t len; + /* This is the amount we read on the first try. 25 was chosen to fit a + * fair number of read responses in the buffer so a 2nd retry can be + * avoided in small message cases. */ + u8 buffer[sizeof(struct tpm_output_header) + 25]; +}; + +static int i2c_atmel_send(struct tpm_chip *chip, u8 *buf, size_t len) +{ + struct priv_data *priv = chip->vendor.priv; + struct i2c_client *client = to_i2c_client(chip->dev); + s32 status; + + priv->len = 0; + + if (len <= 2) + return -EIO; + + status = i2c_master_send(client, buf, len); + + dev_dbg(chip->dev, + "%s(buf=%*ph len=%0zx) -> sts=%d\n", __func__, + (int)min_t(size_t, 64, len), buf, len, status); + return status; +} + +static int i2c_atmel_recv(struct tpm_chip *chip, u8 *buf, size_t count) +{ + struct priv_data *priv = chip->vendor.priv; + struct i2c_client *client = to_i2c_client(chip->dev); + struct tpm_output_header *hdr = + (struct tpm_output_header *)priv->buffer; + u32 expected_len; + int rc; + + if (priv->len == 0) + return -EIO; + + /* Get the message size from the message header, if we didn't get the + * whole message in read_status then we need to re-read the + * message. */ + expected_len = be32_to_cpu(hdr->length); + if (expected_len > count) + return -ENOMEM; + + if (priv->len >= expected_len) { + dev_dbg(chip->dev, + "%s early(buf=%*ph count=%0zx) -> ret=%d\n", __func__, + (int)min_t(size_t, 64, expected_len), buf, count, + expected_len); + memcpy(buf, priv->buffer, expected_len); + return expected_len; + } + + rc = i2c_master_recv(client, buf, expected_len); + dev_dbg(chip->dev, + "%s reread(buf=%*ph count=%0zx) -> ret=%d\n", __func__, + (int)min_t(size_t, 64, expected_len), buf, count, + expected_len); + return rc; +} + +static void i2c_atmel_cancel(struct tpm_chip *chip) +{ + dev_err(chip->dev, "TPM operation cancellation was requested, but is not supported"); +} + +static u8 i2c_atmel_read_status(struct tpm_chip *chip) +{ + struct priv_data *priv = chip->vendor.priv; + struct i2c_client *client = to_i2c_client(chip->dev); + int rc; + + /* The TPM fails the I2C read until it is ready, so we do the entire + * transfer here and buffer it locally. This way the common code can + * properly handle the timeouts. */ + priv->len = 0; + memset(priv->buffer, 0, sizeof(priv->buffer)); + + + /* Once the TPM has completed the command the command remains readable + * until another command is issued. */ + rc = i2c_master_recv(client, priv->buffer, sizeof(priv->buffer)); + dev_dbg(chip->dev, + "%s: sts=%d", __func__, rc); + if (rc <= 0) + return 0; + + priv->len = rc; + + return ATMEL_STS_OK; +} + +static const struct file_operations i2c_atmel_ops = { + .owner = THIS_MODULE, + .llseek = no_llseek, + .open = tpm_open, + .read = tpm_read, + .write = tpm_write, + .release = tpm_release, +}; + +static DEVICE_ATTR(pubek, S_IRUGO, tpm_show_pubek, NULL); +static DEVICE_ATTR(pcrs, S_IRUGO, tpm_show_pcrs, NULL); +static DEVICE_ATTR(enabled, S_IRUGO, tpm_show_enabled, NULL); +static DEVICE_ATTR(active, S_IRUGO, tpm_show_active, NULL); +static DEVICE_ATTR(owned, S_IRUGO, tpm_show_owned, NULL); +static DEVICE_ATTR(temp_deactivated, S_IRUGO, tpm_show_temp_deactivated, NULL); +static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps, NULL); +static DEVICE_ATTR(cancel, S_IWUSR | S_IWGRP, NULL, tpm_store_cancel); +static DEVICE_ATTR(durations, S_IRUGO, tpm_show_durations, NULL); +static DEVICE_ATTR(timeouts, S_IRUGO, tpm_show_timeouts, NULL); + +static struct attribute *i2c_atmel_attrs[] = { + &dev_attr_pubek.attr, + &dev_attr_pcrs.attr, + &dev_attr_enabled.attr, + &dev_attr_active.attr, + &dev_attr_owned.attr, + &dev_attr_temp_deactivated.attr, + &dev_attr_caps.attr, + &dev_attr_cancel.attr, + &dev_attr_durations.attr, + &dev_attr_timeouts.attr, + NULL, +}; + +static struct attribute_group i2c_atmel_attr_grp = { + .attrs = i2c_atmel_attrs +}; + +static bool i2c_atmel_req_canceled(struct tpm_chip *chip, u8 status) +{ + return 0; +} + +static const struct tpm_vendor_specific i2c_atmel = { + .status = i2c_atmel_read_status, + .recv = i2c_atmel_recv, + .send = i2c_atmel_send, + .cancel = i2c_atmel_cancel, + .req_complete_mask = ATMEL_STS_OK, + .req_complete_val = ATMEL_STS_OK, + .req_canceled = i2c_atmel_req_canceled, + .attr_group = &i2c_atmel_attr_grp, + .miscdev.fops = &i2c_atmel_ops, +}; + +static int i2c_atmel_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + int rc; + struct tpm_chip *chip; + struct device *dev = &client->dev; + + if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) + return -ENODEV; + + chip = tpm_register_hardware(dev, &i2c_atmel); + if (!chip) { + dev_err(dev, "%s() error in tpm_register_hardware\n", __func__); + return -ENODEV; + } + + chip->vendor.priv = devm_kzalloc(dev, sizeof(struct priv_data), + GFP_KERNEL); + + /* Default timeouts */ + chip->vendor.timeout_a = msecs_to_jiffies(TPM_I2C_SHORT_TIMEOUT); + chip->vendor.timeout_b = msecs_to_jiffies(TPM_I2C_LONG_TIMEOUT); + chip->vendor.timeout_c = msecs_to_jiffies(TPM_I2C_SHORT_TIMEOUT); + chip->vendor.timeout_d = msecs_to_jiffies(TPM_I2C_SHORT_TIMEOUT); + chip->vendor.irq = 0; + + /* There is no known way to probe for this device, and all version + * information seems to be read via TPM commands. Thus we rely on the + * TPM startup process in the common code to detect the device. */ + if (tpm_get_timeouts(chip)) { + rc = -ENODEV; + goto out_err; + } + + if (tpm_do_selftest(chip)) { + rc = -ENODEV; + goto out_err; + } + + return 0; + +out_err: + tpm_dev_vendor_release(chip); + tpm_remove_hardware(chip->dev); + return rc; +} + +static int i2c_atmel_remove(struct i2c_client *client) +{ + struct device *dev = &(client->dev); + struct tpm_chip *chip = dev_get_drvdata(dev); + + if (chip) + tpm_dev_vendor_release(chip); + tpm_remove_hardware(dev); + kfree(chip); + return 0; +} + +static const struct i2c_device_id i2c_atmel_id[] = { + {I2C_DRIVER_NAME, 0}, + {} +}; +MODULE_DEVICE_TABLE(i2c, i2c_atmel_id); + +#ifdef CONFIG_OF +static const struct of_device_id i2c_atmel_of_match[] = { + {.compatible = "atmel,at97sc3204t"}, + {}, +}; +MODULE_DEVICE_TABLE(of, i2c_atmel_of_match); +#endif + +static SIMPLE_DEV_PM_OPS(i2c_atmel_pm_ops, tpm_pm_suspend, tpm_pm_resume); + +static struct i2c_driver i2c_atmel_driver = { + .id_table = i2c_atmel_id, + .probe = i2c_atmel_probe, + .remove = i2c_atmel_remove, + .driver = { + .name = I2C_DRIVER_NAME, + .owner = THIS_MODULE, + .pm = &i2c_atmel_pm_ops, + .of_match_table = of_match_ptr(i2c_atmel_of_match), + }, +}; + +module_i2c_driver(i2c_atmel_driver); + +MODULE_AUTHOR("Jason Gunthorpe <jgunthorpe@obsidianresearch.com>"); +MODULE_DESCRIPTION("Atmel TPM I2C Driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/char/tpm/tpm_i2c_infineon.c b/drivers/char/tpm/tpm_i2c_infineon.c index b8735de..fefd2aa 100644 --- a/drivers/char/tpm/tpm_i2c_infineon.c +++ b/drivers/char/tpm/tpm_i2c_infineon.c @@ -581,7 +581,7 @@ static DEVICE_ATTR(enabled, S_IRUGO, tpm_show_enabled, NULL); static DEVICE_ATTR(active, S_IRUGO, tpm_show_active, NULL); static DEVICE_ATTR(owned, S_IRUGO, tpm_show_owned, NULL); static DEVICE_ATTR(temp_deactivated, S_IRUGO, tpm_show_temp_deactivated, NULL); -static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps_1_2, NULL); +static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps, NULL); static DEVICE_ATTR(cancel, S_IWUSR | S_IWGRP, NULL, tpm_store_cancel); static DEVICE_ATTR(durations, S_IRUGO, tpm_show_durations, NULL); static DEVICE_ATTR(timeouts, S_IRUGO, tpm_show_timeouts, NULL); @@ -685,7 +685,6 @@ out_vendor: chip->dev->release = NULL; chip->release = NULL; tpm_dev.client = NULL; - dev_set_drvdata(chip->dev, chip); out_err: return rc; } @@ -766,7 +765,6 @@ static int tpm_tis_i2c_remove(struct i2c_client *client) chip->dev->release = NULL; chip->release = NULL; tpm_dev.client = NULL; - dev_set_drvdata(chip->dev, chip); return 0; } diff --git a/drivers/char/tpm/tpm_i2c_nuvoton.c b/drivers/char/tpm/tpm_i2c_nuvoton.c new file mode 100644 index 0000000..6276fea --- /dev/null +++ b/drivers/char/tpm/tpm_i2c_nuvoton.c @@ -0,0 +1,710 @@ +/****************************************************************************** + * Nuvoton TPM I2C Device Driver Interface for WPCT301/NPCT501, + * based on the TCG TPM Interface Spec version 1.2. + * Specifications at www.trustedcomputinggroup.org + * + * Copyright (C) 2011, Nuvoton Technology Corporation. + * Dan Morav <dan.morav@nuvoton.com> + * Copyright (C) 2013, Obsidian Research Corp. + * Jason Gunthorpe <jgunthorpe@obsidianresearch.com> + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see http://www.gnu.org/licenses/>. + * + * Nuvoton contact information: APC.Support@nuvoton.com + *****************************************************************************/ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/slab.h> +#include <linux/interrupt.h> +#include <linux/wait.h> +#include <linux/i2c.h> +#include "tpm.h" + +/* I2C interface offsets */ +#define TPM_STS 0x00 +#define TPM_BURST_COUNT 0x01 +#define TPM_DATA_FIFO_W 0x20 +#define TPM_DATA_FIFO_R 0x40 +#define TPM_VID_DID_RID 0x60 +/* TPM command header size */ +#define TPM_HEADER_SIZE 10 +#define TPM_RETRY 5 +/* + * I2C bus device maximum buffer size w/o counting I2C address or command + * i.e. max size required for I2C write is 34 = addr, command, 32 bytes data + */ +#define TPM_I2C_MAX_BUF_SIZE 32 +#define TPM_I2C_RETRY_COUNT 32 +#define TPM_I2C_BUS_DELAY 1 /* msec */ +#define TPM_I2C_RETRY_DELAY_SHORT 2 /* msec */ +#define TPM_I2C_RETRY_DELAY_LONG 10 /* msec */ + +#define I2C_DRIVER_NAME "tpm_i2c_nuvoton" + +struct priv_data { + unsigned int intrs; +}; + +static s32 i2c_nuvoton_read_buf(struct i2c_client *client, u8 offset, u8 size, + u8 *data) +{ + s32 status; + + status = i2c_smbus_read_i2c_block_data(client, offset, size, data); + dev_dbg(&client->dev, + "%s(offset=%u size=%u data=%*ph) -> sts=%d\n", __func__, + offset, size, (int)size, data, status); + return status; +} + +static s32 i2c_nuvoton_write_buf(struct i2c_client *client, u8 offset, u8 size, + u8 *data) +{ + s32 status; + + status = i2c_smbus_write_i2c_block_data(client, offset, size, data); + dev_dbg(&client->dev, + "%s(offset=%u size=%u data=%*ph) -> sts=%d\n", __func__, + offset, size, (int)size, data, status); + return status; +} + +#define TPM_STS_VALID 0x80 +#define TPM_STS_COMMAND_READY 0x40 +#define TPM_STS_GO 0x20 +#define TPM_STS_DATA_AVAIL 0x10 +#define TPM_STS_EXPECT 0x08 +#define TPM_STS_RESPONSE_RETRY 0x02 +#define TPM_STS_ERR_VAL 0x07 /* bit2...bit0 reads always 0 */ + +#define TPM_I2C_SHORT_TIMEOUT 750 /* ms */ +#define TPM_I2C_LONG_TIMEOUT 2000 /* 2 sec */ + +/* read TPM_STS register */ +static u8 i2c_nuvoton_read_status(struct tpm_chip *chip) +{ + struct i2c_client *client = to_i2c_client(chip->dev); + s32 status; + u8 data; + + status = i2c_nuvoton_read_buf(client, TPM_STS, 1, &data); + if (status <= 0) { + dev_err(chip->dev, "%s() error return %d\n", __func__, + status); + data = TPM_STS_ERR_VAL; + } + + return data; +} + +/* write byte to TPM_STS register */ +static s32 i2c_nuvoton_write_status(struct i2c_client *client, u8 data) +{ + s32 status; + int i; + + /* this causes the current command to be aborted */ + for (i = 0, status = -1; i < TPM_I2C_RETRY_COUNT && status < 0; i++) { + status = i2c_nuvoton_write_buf(client, TPM_STS, 1, &data); + msleep(TPM_I2C_BUS_DELAY); + } + return status; +} + +/* write commandReady to TPM_STS register */ +static void i2c_nuvoton_ready(struct tpm_chip *chip) +{ + struct i2c_client *client = to_i2c_client(chip->dev); + s32 status; + + /* this causes the current command to be aborted */ + status = i2c_nuvoton_write_status(client, TPM_STS_COMMAND_READY); + if (status < 0) + dev_err(chip->dev, + "%s() fail to write TPM_STS.commandReady\n", __func__); +} + +/* read burstCount field from TPM_STS register + * return -1 on fail to read */ +static int i2c_nuvoton_get_burstcount(struct i2c_client *client, + struct tpm_chip *chip) +{ + unsigned long stop = jiffies + chip->vendor.timeout_d; + s32 status; + int burst_count = -1; + u8 data; + + /* wait for burstcount to be non-zero */ + do { + /* in I2C burstCount is 1 byte */ + status = i2c_nuvoton_read_buf(client, TPM_BURST_COUNT, 1, + &data); + if (status > 0 && data > 0) { + burst_count = min_t(u8, TPM_I2C_MAX_BUF_SIZE, data); + break; + } + msleep(TPM_I2C_BUS_DELAY); + } while (time_before(jiffies, stop)); + + return burst_count; +} + +/* + * WPCT301/NPCT501 SINT# supports only dataAvail + * any call to this function which is not waiting for dataAvail will + * set queue to NULL to avoid waiting for interrupt + */ +static bool i2c_nuvoton_check_status(struct tpm_chip *chip, u8 mask, u8 value) +{ + u8 status = i2c_nuvoton_read_status(chip); + return (status != TPM_STS_ERR_VAL) && ((status & mask) == value); +} + +static int i2c_nuvoton_wait_for_stat(struct tpm_chip *chip, u8 mask, u8 value, + u32 timeout, wait_queue_head_t *queue) +{ + if (chip->vendor.irq && queue) { + s32 rc; + DEFINE_WAIT(wait); + struct priv_data *priv = chip->vendor.priv; + unsigned int cur_intrs = priv->intrs; + + enable_irq(chip->vendor.irq); + rc = wait_event_interruptible_timeout(*queue, + cur_intrs != priv->intrs, + timeout); + if (rc > 0) + return 0; + /* At this point we know that the SINT pin is asserted, so we + * do not need to do i2c_nuvoton_check_status */ + } else { + unsigned long ten_msec, stop; + bool status_valid; + + /* check current status */ + status_valid = i2c_nuvoton_check_status(chip, mask, value); + if (status_valid) + return 0; + + /* use polling to wait for the event */ + ten_msec = jiffies + msecs_to_jiffies(TPM_I2C_RETRY_DELAY_LONG); + stop = jiffies + timeout; + do { + if (time_before(jiffies, ten_msec)) + msleep(TPM_I2C_RETRY_DELAY_SHORT); + else + msleep(TPM_I2C_RETRY_DELAY_LONG); + status_valid = i2c_nuvoton_check_status(chip, mask, + value); + if (status_valid) + return 0; + } while (time_before(jiffies, stop)); + } + dev_err(chip->dev, "%s(%02x, %02x) -> timeout\n", __func__, mask, + value); + return -ETIMEDOUT; +} + +/* wait for dataAvail field to be set in the TPM_STS register */ +static int i2c_nuvoton_wait_for_data_avail(struct tpm_chip *chip, u32 timeout, + wait_queue_head_t *queue) +{ + return i2c_nuvoton_wait_for_stat(chip, + TPM_STS_DATA_AVAIL | TPM_STS_VALID, + TPM_STS_DATA_AVAIL | TPM_STS_VALID, + timeout, queue); +} + +/* Read @count bytes into @buf from TPM_RD_FIFO register */ +static int i2c_nuvoton_recv_data(struct i2c_client *client, + struct tpm_chip *chip, u8 *buf, size_t count) +{ + s32 rc; + int burst_count, bytes2read, size = 0; + + while (size < count && + i2c_nuvoton_wait_for_data_avail(chip, + chip->vendor.timeout_c, + &chip->vendor.read_queue) == 0) { + burst_count = i2c_nuvoton_get_burstcount(client, chip); + if (burst_count < 0) { + dev_err(chip->dev, + "%s() fail to read burstCount=%d\n", __func__, + burst_count); + return -EIO; + } + bytes2read = min_t(size_t, burst_count, count - size); + rc = i2c_nuvoton_read_buf(client, TPM_DATA_FIFO_R, + bytes2read, &buf[size]); + if (rc < 0) { + dev_err(chip->dev, + "%s() fail on i2c_nuvoton_read_buf()=%d\n", + __func__, rc); + return -EIO; + } + dev_dbg(chip->dev, "%s(%d):", __func__, bytes2read); + size += bytes2read; + } + + return size; +} + +/* Read TPM command results */ +static int i2c_nuvoton_recv(struct tpm_chip *chip, u8 *buf, size_t count) +{ + struct device *dev = chip->dev; + struct i2c_client *client = to_i2c_client(dev); + s32 rc; + int expected, status, burst_count, retries, size = 0; + + if (count < TPM_HEADER_SIZE) { + i2c_nuvoton_ready(chip); /* return to idle */ + dev_err(dev, "%s() count < header size\n", __func__); + return -EIO; + } + for (retries = 0; retries < TPM_RETRY; retries++) { + if (retries > 0) { + /* if this is not the first trial, set responseRetry */ + i2c_nuvoton_write_status(client, + TPM_STS_RESPONSE_RETRY); + } + /* + * read first available (> 10 bytes), including: + * tag, paramsize, and result + */ + status = i2c_nuvoton_wait_for_data_avail( + chip, chip->vendor.timeout_c, &chip->vendor.read_queue); + if (status != 0) { + dev_err(dev, "%s() timeout on dataAvail\n", __func__); + size = -ETIMEDOUT; + continue; + } + burst_count = i2c_nuvoton_get_burstcount(client, chip); + if (burst_count < 0) { + dev_err(dev, "%s() fail to get burstCount\n", __func__); + size = -EIO; + continue; + } + size = i2c_nuvoton_recv_data(client, chip, buf, + burst_count); + if (size < TPM_HEADER_SIZE) { + dev_err(dev, "%s() fail to read header\n", __func__); + size = -EIO; + continue; + } + /* + * convert number of expected bytes field from big endian 32 bit + * to machine native + */ + expected = be32_to_cpu(*(__be32 *) (buf + 2)); + if (expected > count) { + dev_err(dev, "%s() expected > count\n", __func__); + size = -EIO; + continue; + } + rc = i2c_nuvoton_recv_data(client, chip, &buf[size], + expected - size); + size += rc; + if (rc < 0 || size < expected) { + dev_err(dev, "%s() fail to read remainder of result\n", + __func__); + size = -EIO; + continue; + } + if (i2c_nuvoton_wait_for_stat( + chip, TPM_STS_VALID | TPM_STS_DATA_AVAIL, + TPM_STS_VALID, chip->vendor.timeout_c, + NULL)) { + dev_err(dev, "%s() error left over data\n", __func__); + size = -ETIMEDOUT; + continue; + } + break; + } + i2c_nuvoton_ready(chip); + dev_dbg(chip->dev, "%s() -> %d\n", __func__, size); + return size; +} + +/* + * Send TPM command. + * + * If interrupts are used (signaled by an irq set in the vendor structure) + * tpm.c can skip polling for the data to be available as the interrupt is + * waited for here + */ +static int i2c_nuvoton_send(struct tpm_chip *chip, u8 *buf, size_t len) +{ + struct device *dev = chip->dev; + struct i2c_client *client = to_i2c_client(dev); + u32 ordinal; + size_t count = 0; + int burst_count, bytes2write, retries, rc = -EIO; + + for (retries = 0; retries < TPM_RETRY; retries++) { + i2c_nuvoton_ready(chip); + if (i2c_nuvoton_wait_for_stat(chip, TPM_STS_COMMAND_READY, + TPM_STS_COMMAND_READY, + chip->vendor.timeout_b, NULL)) { + dev_err(dev, "%s() timeout on commandReady\n", + __func__); + rc = -EIO; + continue; + } + rc = 0; + while (count < len - 1) { + burst_count = i2c_nuvoton_get_burstcount(client, + chip); + if (burst_count < 0) { + dev_err(dev, "%s() fail get burstCount\n", + __func__); + rc = -EIO; + break; + } + bytes2write = min_t(size_t, burst_count, + len - 1 - count); + rc = i2c_nuvoton_write_buf(client, TPM_DATA_FIFO_W, + bytes2write, &buf[count]); + if (rc < 0) { + dev_err(dev, "%s() fail i2cWriteBuf\n", + __func__); + break; + } + dev_dbg(dev, "%s(%d):", __func__, bytes2write); + count += bytes2write; + rc = i2c_nuvoton_wait_for_stat(chip, + TPM_STS_VALID | + TPM_STS_EXPECT, + TPM_STS_VALID | + TPM_STS_EXPECT, + chip->vendor.timeout_c, + NULL); + if (rc < 0) { + dev_err(dev, "%s() timeout on Expect\n", + __func__); + rc = -ETIMEDOUT; + break; + } + } + if (rc < 0) + continue; + + /* write last byte */ + rc = i2c_nuvoton_write_buf(client, TPM_DATA_FIFO_W, 1, + &buf[count]); + if (rc < 0) { + dev_err(dev, "%s() fail to write last byte\n", + __func__); + rc = -EIO; + continue; + } + dev_dbg(dev, "%s(last): %02x", __func__, buf[count]); + rc = i2c_nuvoton_wait_for_stat(chip, + TPM_STS_VALID | TPM_STS_EXPECT, + TPM_STS_VALID, + chip->vendor.timeout_c, NULL); + if (rc) { + dev_err(dev, "%s() timeout on Expect to clear\n", + __func__); + rc = -ETIMEDOUT; + continue; + } + break; + } + if (rc < 0) { + /* retries == TPM_RETRY */ + i2c_nuvoton_ready(chip); + return rc; + } + /* execute the TPM command */ + rc = i2c_nuvoton_write_status(client, TPM_STS_GO); + if (rc < 0) { + dev_err(dev, "%s() fail to write Go\n", __func__); + i2c_nuvoton_ready(chip); + return rc; + } + ordinal = be32_to_cpu(*((__be32 *) (buf + 6))); + rc = i2c_nuvoton_wait_for_data_avail(chip, + tpm_calc_ordinal_duration(chip, + ordinal), + &chip->vendor.read_queue); + if (rc) { + dev_err(dev, "%s() timeout command duration\n", __func__); + i2c_nuvoton_ready(chip); + return rc; + } + + dev_dbg(dev, "%s() -> %zd\n", __func__, len); + return len; +} + +static bool i2c_nuvoton_req_canceled(struct tpm_chip *chip, u8 status) +{ + return (status == TPM_STS_COMMAND_READY); +} + +static const struct file_operations i2c_nuvoton_ops = { + .owner = THIS_MODULE, + .llseek = no_llseek, + .open = tpm_open, + .read = tpm_read, + .write = tpm_write, + .release = tpm_release, +}; + +static DEVICE_ATTR(pubek, S_IRUGO, tpm_show_pubek, NULL); +static DEVICE_ATTR(pcrs, S_IRUGO, tpm_show_pcrs, NULL); +static DEVICE_ATTR(enabled, S_IRUGO, tpm_show_enabled, NULL); +static DEVICE_ATTR(active, S_IRUGO, tpm_show_active, NULL); +static DEVICE_ATTR(owned, S_IRUGO, tpm_show_owned, NULL); +static DEVICE_ATTR(temp_deactivated, S_IRUGO, tpm_show_temp_deactivated, NULL); +static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps, NULL); +static DEVICE_ATTR(cancel, S_IWUSR | S_IWGRP, NULL, tpm_store_cancel); +static DEVICE_ATTR(durations, S_IRUGO, tpm_show_durations, NULL); +static DEVICE_ATTR(timeouts, S_IRUGO, tpm_show_timeouts, NULL); + +static struct attribute *i2c_nuvoton_attrs[] = { + &dev_attr_pubek.attr, + &dev_attr_pcrs.attr, + &dev_attr_enabled.attr, + &dev_attr_active.attr, + &dev_attr_owned.attr, + &dev_attr_temp_deactivated.attr, + &dev_attr_caps.attr, + &dev_attr_cancel.attr, + &dev_attr_durations.attr, + &dev_attr_timeouts.attr, + NULL, +}; + +static struct attribute_group i2c_nuvoton_attr_grp = { + .attrs = i2c_nuvoton_attrs +}; + +static const struct tpm_vendor_specific tpm_i2c = { + .status = i2c_nuvoton_read_status, + .recv = i2c_nuvoton_recv, + .send = i2c_nuvoton_send, + .cancel = i2c_nuvoton_ready, + .req_complete_mask = TPM_STS_DATA_AVAIL | TPM_STS_VALID, + .req_complete_val = TPM_STS_DATA_AVAIL | TPM_STS_VALID, + .req_canceled = i2c_nuvoton_req_canceled, + .attr_group = &i2c_nuvoton_attr_grp, + .miscdev.fops = &i2c_nuvoton_ops, +}; + +/* The only purpose for the handler is to signal to any waiting threads that + * the interrupt is currently being asserted. The driver does not do any + * processing triggered by interrupts, and the chip provides no way to mask at + * the source (plus that would be slow over I2C). Run the IRQ as a one-shot, + * this means it cannot be shared. */ +static irqreturn_t i2c_nuvoton_int_handler(int dummy, void *dev_id) +{ + struct tpm_chip *chip = dev_id; + struct priv_data *priv = chip->vendor.priv; + + priv->intrs++; + wake_up(&chip->vendor.read_queue); + disable_irq_nosync(chip->vendor.irq); + return IRQ_HANDLED; +} + +static int get_vid(struct i2c_client *client, u32 *res) +{ + static const u8 vid_did_rid_value[] = { 0x50, 0x10, 0xfe }; + u32 temp; + s32 rc; + + if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_BYTE_DATA)) + return -ENODEV; + rc = i2c_nuvoton_read_buf(client, TPM_VID_DID_RID, 4, (u8 *)&temp); + if (rc < 0) + return rc; + + /* check WPCT301 values - ignore RID */ + if (memcmp(&temp, vid_did_rid_value, sizeof(vid_did_rid_value))) { + /* + * f/w rev 2.81 has an issue where the VID_DID_RID is not + * reporting the right value. so give it another chance at + * offset 0x20 (FIFO_W). + */ + rc = i2c_nuvoton_read_buf(client, TPM_DATA_FIFO_W, 4, + (u8 *) (&temp)); + if (rc < 0) + return rc; + + /* check WPCT301 values - ignore RID */ + if (memcmp(&temp, vid_did_rid_value, + sizeof(vid_did_rid_value))) + return -ENODEV; + } + + *res = temp; + return 0; +} + +static int i2c_nuvoton_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + int rc; + struct tpm_chip *chip; + struct device *dev = &client->dev; + u32 vid = 0; + + rc = get_vid(client, &vid); + if (rc) + return rc; + + dev_info(dev, "VID: %04X DID: %02X RID: %02X\n", (u16) vid, + (u8) (vid >> 16), (u8) (vid >> 24)); + + chip = tpm_register_hardware(dev, &tpm_i2c); + if (!chip) { + dev_err(dev, "%s() error in tpm_register_hardware\n", __func__); + return -ENODEV; + } + + chip->vendor.priv = devm_kzalloc(dev, sizeof(struct priv_data), + GFP_KERNEL); + init_waitqueue_head(&chip->vendor.read_queue); + init_waitqueue_head(&chip->vendor.int_queue); + + /* Default timeouts */ + chip->vendor.timeout_a = msecs_to_jiffies(TPM_I2C_SHORT_TIMEOUT); + chip->vendor.timeout_b = msecs_to_jiffies(TPM_I2C_LONG_TIMEOUT); + chip->vendor.timeout_c = msecs_to_jiffies(TPM_I2C_SHORT_TIMEOUT); + chip->vendor.timeout_d = msecs_to_jiffies(TPM_I2C_SHORT_TIMEOUT); + + /* + * I2C intfcaps (interrupt capabilitieis) in the chip are hard coded to: + * TPM_INTF_INT_LEVEL_LOW | TPM_INTF_DATA_AVAIL_INT + * The IRQ should be set in the i2c_board_info (which is done + * automatically in of_i2c_register_devices, for device tree users */ + chip->vendor.irq = client->irq; + + if (chip->vendor.irq) { + dev_dbg(dev, "%s() chip-vendor.irq\n", __func__); + rc = devm_request_irq(dev, chip->vendor.irq, + i2c_nuvoton_int_handler, + IRQF_TRIGGER_LOW, + chip->vendor.miscdev.name, + chip); + if (rc) { + dev_err(dev, "%s() Unable to request irq: %d for use\n", + __func__, chip->vendor.irq); + chip->vendor.irq = 0; + } else { + /* Clear any pending interrupt */ + i2c_nuvoton_ready(chip); + /* - wait for TPM_STS==0xA0 (stsValid, commandReady) */ + rc = i2c_nuvoton_wait_for_stat(chip, + TPM_STS_COMMAND_READY, + TPM_STS_COMMAND_READY, + chip->vendor.timeout_b, + NULL); + if (rc == 0) { + /* + * TIS is in ready state + * write dummy byte to enter reception state + * TPM_DATA_FIFO_W <- rc (0) + */ + rc = i2c_nuvoton_write_buf(client, + TPM_DATA_FIFO_W, + 1, (u8 *) (&rc)); + if (rc < 0) + goto out_err; + /* TPM_STS <- 0x40 (commandReady) */ + i2c_nuvoton_ready(chip); + } else { + /* + * timeout_b reached - command was + * aborted. TIS should now be in idle state - + * only TPM_STS_VALID should be set + */ + if (i2c_nuvoton_read_status(chip) != + TPM_STS_VALID) { + rc = -EIO; + goto out_err; + } + } + } + } + + if (tpm_get_timeouts(chip)) { + rc = -ENODEV; + goto out_err; + } + + if (tpm_do_selftest(chip)) { + rc = -ENODEV; + goto out_err; + } + + return 0; + +out_err: + tpm_dev_vendor_release(chip); + tpm_remove_hardware(chip->dev); + return rc; +} + +static int i2c_nuvoton_remove(struct i2c_client *client) +{ + struct device *dev = &(client->dev); + struct tpm_chip *chip = dev_get_drvdata(dev); + + if (chip) + tpm_dev_vendor_release(chip); + tpm_remove_hardware(dev); + kfree(chip); + return 0; +} + + +static const struct i2c_device_id i2c_nuvoton_id[] = { + {I2C_DRIVER_NAME, 0}, + {} +}; +MODULE_DEVICE_TABLE(i2c, i2c_nuvoton_id); + +#ifdef CONFIG_OF +static const struct of_device_id i2c_nuvoton_of_match[] = { + {.compatible = "nuvoton,npct501"}, + {.compatible = "winbond,wpct301"}, + {}, +}; +MODULE_DEVICE_TABLE(of, i2c_nuvoton_of_match); +#endif + +static SIMPLE_DEV_PM_OPS(i2c_nuvoton_pm_ops, tpm_pm_suspend, tpm_pm_resume); + +static struct i2c_driver i2c_nuvoton_driver = { + .id_table = i2c_nuvoton_id, + .probe = i2c_nuvoton_probe, + .remove = i2c_nuvoton_remove, + .driver = { + .name = I2C_DRIVER_NAME, + .owner = THIS_MODULE, + .pm = &i2c_nuvoton_pm_ops, + .of_match_table = of_match_ptr(i2c_nuvoton_of_match), + }, +}; + +module_i2c_driver(i2c_nuvoton_driver); + +MODULE_AUTHOR("Dan Morav (dan.morav@nuvoton.com)"); +MODULE_DESCRIPTION("Nuvoton TPM I2C Driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/char/tpm/tpm_i2c_stm_st33.c b/drivers/char/tpm/tpm_i2c_stm_st33.c index 5bb8e2d..a0d6ceb5 100644 --- a/drivers/char/tpm/tpm_i2c_stm_st33.c +++ b/drivers/char/tpm/tpm_i2c_stm_st33.c @@ -584,7 +584,7 @@ static DEVICE_ATTR(enabled, S_IRUGO, tpm_show_enabled, NULL); static DEVICE_ATTR(active, S_IRUGO, tpm_show_active, NULL); static DEVICE_ATTR(owned, S_IRUGO, tpm_show_owned, NULL); static DEVICE_ATTR(temp_deactivated, S_IRUGO, tpm_show_temp_deactivated, NULL); -static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps_1_2, NULL); +static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps, NULL); static DEVICE_ATTR(cancel, S_IWUSR | S_IWGRP, NULL, tpm_store_cancel); static struct attribute *stm_tpm_attrs[] = { @@ -746,8 +746,6 @@ tpm_st33_i2c_probe(struct i2c_client *client, const struct i2c_device_id *id) tpm_get_timeouts(chip); - i2c_set_clientdata(client, chip); - dev_info(chip->dev, "TPM I2C Initialized\n"); return 0; _irq_set: @@ -807,24 +805,18 @@ static int tpm_st33_i2c_remove(struct i2c_client *client) #ifdef CONFIG_PM_SLEEP /* * tpm_st33_i2c_pm_suspend suspend the TPM device - * Added: Work around when suspend and no tpm application is running, suspend - * may fail because chip->data_buffer is not set (only set in tpm_open in Linux - * TPM core) * @param: client, the i2c_client drescription (TPM I2C description). * @param: mesg, the power management message. * @return: 0 in case of success. */ static int tpm_st33_i2c_pm_suspend(struct device *dev) { - struct tpm_chip *chip = dev_get_drvdata(dev); struct st33zp24_platform_data *pin_infos = dev->platform_data; int ret = 0; if (power_mgt) { gpio_set_value(pin_infos->io_lpcpd, 0); } else { - if (chip->data_buffer == NULL) - chip->data_buffer = pin_infos->tpm_i2c_buffer[0]; ret = tpm_pm_suspend(dev); } return ret; @@ -849,8 +841,6 @@ static int tpm_st33_i2c_pm_resume(struct device *dev) TPM_STS_VALID) == TPM_STS_VALID, chip->vendor.timeout_b); } else { - if (chip->data_buffer == NULL) - chip->data_buffer = pin_infos->tpm_i2c_buffer[0]; ret = tpm_pm_resume(dev); if (!ret) tpm_do_selftest(chip); diff --git a/drivers/char/tpm/tpm_ibmvtpm.c b/drivers/char/tpm/tpm_ibmvtpm.c index 56b07c3..2783a42 100644 --- a/drivers/char/tpm/tpm_ibmvtpm.c +++ b/drivers/char/tpm/tpm_ibmvtpm.c @@ -98,7 +98,7 @@ static int tpm_ibmvtpm_recv(struct tpm_chip *chip, u8 *buf, size_t count) if (count < len) { dev_err(ibmvtpm->dev, - "Invalid size in recv: count=%ld, crq_size=%d\n", + "Invalid size in recv: count=%zd, crq_size=%d\n", count, len); return -EIO; } @@ -136,7 +136,7 @@ static int tpm_ibmvtpm_send(struct tpm_chip *chip, u8 *buf, size_t count) if (count > ibmvtpm->rtce_size) { dev_err(ibmvtpm->dev, - "Invalid size in send: count=%ld, rtce_size=%d\n", + "Invalid size in send: count=%zd, rtce_size=%d\n", count, ibmvtpm->rtce_size); return -EIO; } @@ -419,7 +419,7 @@ static DEVICE_ATTR(active, S_IRUGO, tpm_show_active, NULL); static DEVICE_ATTR(owned, S_IRUGO, tpm_show_owned, NULL); static DEVICE_ATTR(temp_deactivated, S_IRUGO, tpm_show_temp_deactivated, NULL); -static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps_1_2, NULL); +static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps, NULL); static DEVICE_ATTR(cancel, S_IWUSR | S_IWGRP, NULL, tpm_store_cancel); static DEVICE_ATTR(durations, S_IRUGO, tpm_show_durations, NULL); static DEVICE_ATTR(timeouts, S_IRUGO, tpm_show_timeouts, NULL); diff --git a/drivers/char/tpm/tpm_ppi.c b/drivers/char/tpm/tpm_ppi.c index 2168d15..8e562dc 100644 --- a/drivers/char/tpm/tpm_ppi.c +++ b/drivers/char/tpm/tpm_ppi.c @@ -452,12 +452,8 @@ int tpm_add_ppi(struct kobject *parent) { return sysfs_create_group(parent, &ppi_attr_grp); } -EXPORT_SYMBOL_GPL(tpm_add_ppi); void tpm_remove_ppi(struct kobject *parent) { sysfs_remove_group(parent, &ppi_attr_grp); } -EXPORT_SYMBOL_GPL(tpm_remove_ppi); - -MODULE_LICENSE("GPL"); diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c index 5796d01..1b74459 100644 --- a/drivers/char/tpm/tpm_tis.c +++ b/drivers/char/tpm/tpm_tis.c @@ -448,7 +448,7 @@ static DEVICE_ATTR(active, S_IRUGO, tpm_show_active, NULL); static DEVICE_ATTR(owned, S_IRUGO, tpm_show_owned, NULL); static DEVICE_ATTR(temp_deactivated, S_IRUGO, tpm_show_temp_deactivated, NULL); -static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps_1_2, NULL); +static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps, NULL); static DEVICE_ATTR(cancel, S_IWUSR | S_IWGRP, NULL, tpm_store_cancel); static DEVICE_ATTR(durations, S_IRUGO, tpm_show_durations, NULL); static DEVICE_ATTR(timeouts, S_IRUGO, tpm_show_timeouts, NULL); diff --git a/drivers/char/tpm/xen-tpmfront.c b/drivers/char/tpm/xen-tpmfront.c index 94c280d..c8ff4df 100644 --- a/drivers/char/tpm/xen-tpmfront.c +++ b/drivers/char/tpm/xen-tpmfront.c @@ -351,8 +351,6 @@ static int tpmfront_probe(struct xenbus_device *dev, tpm_get_timeouts(priv->chip); - dev_set_drvdata(&dev->dev, priv->chip); - return rv; } diff --git a/drivers/gpu/drm/drm_sysfs.c b/drivers/gpu/drm/drm_sysfs.c index 1a35ea5..bd2bca3 100644 --- a/drivers/gpu/drm/drm_sysfs.c +++ b/drivers/gpu/drm/drm_sysfs.c @@ -489,6 +489,11 @@ void drm_sysfs_hotplug_event(struct drm_device *dev) } EXPORT_SYMBOL(drm_sysfs_hotplug_event); +static void drm_sysfs_release(struct device *dev) +{ + kfree(dev); +} + /** * drm_sysfs_device_add - adds a class device to sysfs for a character driver * @dev: DRM device to be added @@ -501,6 +506,7 @@ EXPORT_SYMBOL(drm_sysfs_hotplug_event); int drm_sysfs_device_add(struct drm_minor *minor) { char *minor_str; + int r; if (minor->type == DRM_MINOR_CONTROL) minor_str = "controlD%d"; @@ -509,14 +515,34 @@ int drm_sysfs_device_add(struct drm_minor *minor) else minor_str = "card%d"; - minor->kdev = device_create(drm_class, minor->dev->dev, - MKDEV(DRM_MAJOR, minor->index), - minor, minor_str, minor->index); - if (IS_ERR(minor->kdev)) { - DRM_ERROR("device create failed %ld\n", PTR_ERR(minor->kdev)); - return PTR_ERR(minor->kdev); + minor->kdev = kzalloc(sizeof(*minor->kdev), GFP_KERNEL); + if (!minor->dev) { + r = -ENOMEM; + goto error; } + + device_initialize(minor->kdev); + minor->kdev->devt = MKDEV(DRM_MAJOR, minor->index); + minor->kdev->class = drm_class; + minor->kdev->type = &drm_sysfs_device_minor; + minor->kdev->parent = minor->dev->dev; + minor->kdev->release = drm_sysfs_release; + dev_set_drvdata(minor->kdev, minor); + + r = dev_set_name(minor->kdev, minor_str, minor->index); + if (r < 0) + goto error; + + r = device_add(minor->kdev); + if (r < 0) + goto error; + return 0; + +error: + DRM_ERROR("device create failed %d\n", r); + put_device(minor->kdev); + return r; } /** @@ -529,7 +555,7 @@ int drm_sysfs_device_add(struct drm_minor *minor) void drm_sysfs_device_remove(struct drm_minor *minor) { if (minor->kdev) - device_destroy(drm_class, MKDEV(DRM_MAJOR, minor->index)); + device_unregister(minor->kdev); minor->kdev = NULL; } diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c b/drivers/gpu/drm/exynos/exynos_drm_g2d.c index 3271fd4..7bccedc 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c +++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c @@ -383,6 +383,8 @@ out: g2d_userptr->npages, g2d_userptr->vma); + exynos_gem_put_vma(g2d_userptr->vma); + if (!g2d_userptr->out_of_list) list_del_init(&g2d_userptr->list); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 8600c31..ccdbecc 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1816,6 +1816,7 @@ struct drm_i915_file_private { #define HAS_POWER_WELL(dev) (IS_HASWELL(dev) || IS_BROADWELL(dev)) #define HAS_FPGA_DBG_UNCLAIMED(dev) (INTEL_INFO(dev)->has_fpga_dbg) #define HAS_PSR(dev) (IS_HASWELL(dev) || IS_BROADWELL(dev)) +#define HAS_PC8(dev) (IS_HASWELL(dev)) /* XXX HSW:ULX */ #define INTEL_PCH_DEVICE_ID_MASK 0xff00 #define INTEL_PCH_IBX_DEVICE_ID_TYPE 0x3b00 diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c index 6dd622d..e4fba39 100644 --- a/drivers/gpu/drm/i915/intel_bios.c +++ b/drivers/gpu/drm/i915/intel_bios.c @@ -790,7 +790,12 @@ init_vbt_defaults(struct drm_i915_private *dev_priv) /* Default to using SSC */ dev_priv->vbt.lvds_use_ssc = 1; - dev_priv->vbt.lvds_ssc_freq = intel_bios_ssc_frequency(dev, 1); + /* + * Core/SandyBridge/IvyBridge use alternative (120MHz) reference + * clock for LVDS. + */ + dev_priv->vbt.lvds_ssc_freq = intel_bios_ssc_frequency(dev, + !HAS_PCH_SPLIT(dev)); DRM_DEBUG_KMS("Set default to SSC at %dMHz\n", dev_priv->vbt.lvds_ssc_freq); for (port = PORT_A; port < I915_MAX_PORTS; port++) { diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 1591576..330077b 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -1406,6 +1406,26 @@ void intel_ddi_get_config(struct intel_encoder *encoder, default: break; } + + if (encoder->type == INTEL_OUTPUT_EDP && dev_priv->vbt.edp_bpp && + pipe_config->pipe_bpp > dev_priv->vbt.edp_bpp) { + /* + * This is a big fat ugly hack. + * + * Some machines in UEFI boot mode provide us a VBT that has 18 + * bpp and 1.62 GHz link bandwidth for eDP, which for reasons + * unknown we fail to light up. Yet the same BIOS boots up with + * 24 bpp and 2.7 GHz link. Use the same bpp as the BIOS uses as + * max, not what it tells us to use. + * + * Note: This will still be broken if the eDP panel is not lit + * up by the BIOS, and thus we can't get the mode at module + * load. + */ + DRM_DEBUG_KMS("pipe has %d bpp for eDP panel, overriding BIOS-provided max %d bpp\n", + pipe_config->pipe_bpp, dev_priv->vbt.edp_bpp); + dev_priv->vbt.edp_bpp = pipe_config->pipe_bpp; + } } static void intel_ddi_destroy(struct drm_encoder *encoder) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 3cddd50..7ec8b48 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -6518,6 +6518,9 @@ static void __hsw_disable_package_c8(struct drm_i915_private *dev_priv) void hsw_enable_package_c8(struct drm_i915_private *dev_priv) { + if (!HAS_PC8(dev_priv->dev)) + return; + mutex_lock(&dev_priv->pc8.lock); __hsw_enable_package_c8(dev_priv); mutex_unlock(&dev_priv->pc8.lock); @@ -6525,6 +6528,9 @@ void hsw_enable_package_c8(struct drm_i915_private *dev_priv) void hsw_disable_package_c8(struct drm_i915_private *dev_priv) { + if (!HAS_PC8(dev_priv->dev)) + return; + mutex_lock(&dev_priv->pc8.lock); __hsw_disable_package_c8(dev_priv); mutex_unlock(&dev_priv->pc8.lock); @@ -6562,6 +6568,9 @@ static void hsw_update_package_c8(struct drm_device *dev) struct drm_i915_private *dev_priv = dev->dev_private; bool allow; + if (!HAS_PC8(dev_priv->dev)) + return; + if (!i915_enable_pc8) return; @@ -6585,18 +6594,28 @@ done: static void hsw_package_c8_gpu_idle(struct drm_i915_private *dev_priv) { + if (!HAS_PC8(dev_priv->dev)) + return; + + mutex_lock(&dev_priv->pc8.lock); if (!dev_priv->pc8.gpu_idle) { dev_priv->pc8.gpu_idle = true; - hsw_enable_package_c8(dev_priv); + __hsw_enable_package_c8(dev_priv); } + mutex_unlock(&dev_priv->pc8.lock); } static void hsw_package_c8_gpu_busy(struct drm_i915_private *dev_priv) { + if (!HAS_PC8(dev_priv->dev)) + return; + + mutex_lock(&dev_priv->pc8.lock); if (dev_priv->pc8.gpu_idle) { dev_priv->pc8.gpu_idle = false; - hsw_disable_package_c8(dev_priv); + __hsw_disable_package_c8(dev_priv); } + mutex_unlock(&dev_priv->pc8.lock); } #define for_each_power_domain(domain, mask) \ @@ -7184,7 +7203,9 @@ static void i9xx_update_cursor(struct drm_crtc *crtc, u32 base) intel_crtc->cursor_visible = visible; } /* and commit changes on next vblank */ + POSTING_READ(CURCNTR(pipe)); I915_WRITE(CURBASE(pipe), base); + POSTING_READ(CURBASE(pipe)); } static void ivb_update_cursor(struct drm_crtc *crtc, u32 base) @@ -7213,7 +7234,9 @@ static void ivb_update_cursor(struct drm_crtc *crtc, u32 base) intel_crtc->cursor_visible = visible; } /* and commit changes on next vblank */ + POSTING_READ(CURCNTR_IVB(pipe)); I915_WRITE(CURBASE_IVB(pipe), base); + POSTING_READ(CURBASE_IVB(pipe)); } /* If no-part of the cursor is visible on the framebuffer, then the GPU may hang... */ @@ -9248,8 +9271,7 @@ check_crtc_state(struct drm_device *dev) enum pipe pipe; if (encoder->base.crtc != &crtc->base) continue; - if (encoder->get_config && - encoder->get_hw_state(encoder, &pipe)) + if (encoder->get_hw_state(encoder, &pipe)) encoder->get_config(encoder, &pipe_config); } @@ -10909,8 +10931,7 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) if (encoder->get_hw_state(encoder, &pipe)) { crtc = to_intel_crtc(dev_priv->pipe_to_crtc_mapping[pipe]); encoder->base.crtc = &crtc->base; - if (encoder->get_config) - encoder->get_config(encoder, &crtc->config); + encoder->get_config(encoder, &crtc->config); } else { encoder->base.crtc = NULL; } diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index eb8139d..0b2e842 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1774,7 +1774,7 @@ static void intel_disable_dp(struct intel_encoder *encoder) * ensure that we have vdd while we switch off the panel. */ ironlake_edp_panel_vdd_on(intel_dp); ironlake_edp_backlight_off(intel_dp); - intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON); + intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_OFF); ironlake_edp_panel_off(intel_dp); /* cpu edp my only be disable _after_ the cpu pipe/plane is disabled. */ diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 0a07d7c..caf2ee4 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1625,7 +1625,7 @@ static void i9xx_update_wm(struct drm_crtc *unused_crtc) &to_intel_crtc(enabled)->config.adjusted_mode; int clock = adjusted_mode->crtc_clock; int htotal = adjusted_mode->htotal; - int hdisplay = to_intel_crtc(crtc)->config.pipe_src_w; + int hdisplay = to_intel_crtc(enabled)->config.pipe_src_w; int pixel_size = enabled->fb->bits_per_pixel / 8; unsigned long line_time_us; int entries; @@ -3888,7 +3888,7 @@ static void gen6_enable_rps(struct drm_device *dev) I915_WRITE(GEN6_RC_SLEEP, 0); I915_WRITE(GEN6_RC1e_THRESHOLD, 1000); - if (INTEL_INFO(dev)->gen <= 6 || IS_IVYBRIDGE(dev)) + if (IS_IVYBRIDGE(dev)) I915_WRITE(GEN6_RC6_THRESHOLD, 125000); else I915_WRITE(GEN6_RC6_THRESHOLD, 50000); diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c index 18c4062..22cf0f4 100644 --- a/drivers/gpu/drm/i915/intel_tv.c +++ b/drivers/gpu/drm/i915/intel_tv.c @@ -902,6 +902,13 @@ intel_tv_mode_valid(struct drm_connector *connector, } +static void +intel_tv_get_config(struct intel_encoder *encoder, + struct intel_crtc_config *pipe_config) +{ + pipe_config->adjusted_mode.crtc_clock = pipe_config->port_clock; +} + static bool intel_tv_compute_config(struct intel_encoder *encoder, struct intel_crtc_config *pipe_config) @@ -1621,6 +1628,7 @@ intel_tv_init(struct drm_device *dev) DRM_MODE_ENCODER_TVDAC); intel_encoder->compute_config = intel_tv_compute_config; + intel_encoder->get_config = intel_tv_get_config; intel_encoder->mode_set = intel_tv_mode_set; intel_encoder->enable = intel_enable_tv; intel_encoder->disable = intel_disable_tv; diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index f9883ce..0b02078 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -217,6 +217,19 @@ static void gen6_force_wake_work(struct work_struct *work) spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); } +static void intel_uncore_forcewake_reset(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + + if (IS_VALLEYVIEW(dev)) { + vlv_force_wake_reset(dev_priv); + } else if (INTEL_INFO(dev)->gen >= 6) { + __gen6_gt_force_wake_reset(dev_priv); + if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev)) + __gen6_gt_force_wake_mt_reset(dev_priv); + } +} + void intel_uncore_early_sanitize(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; @@ -234,19 +247,8 @@ void intel_uncore_early_sanitize(struct drm_device *dev) dev_priv->ellc_size = 128; DRM_INFO("Found %zuMB of eLLC\n", dev_priv->ellc_size); } -} -static void intel_uncore_forcewake_reset(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - - if (IS_VALLEYVIEW(dev)) { - vlv_force_wake_reset(dev_priv); - } else if (INTEL_INFO(dev)->gen >= 6) { - __gen6_gt_force_wake_reset(dev_priv); - if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev)) - __gen6_gt_force_wake_mt_reset(dev_priv); - } + intel_uncore_forcewake_reset(dev); } void intel_uncore_sanitize(struct drm_device *dev) diff --git a/drivers/gpu/drm/radeon/atombios_i2c.c b/drivers/gpu/drm/radeon/atombios_i2c.c index deaf98c..0652ee0 100644 --- a/drivers/gpu/drm/radeon/atombios_i2c.c +++ b/drivers/gpu/drm/radeon/atombios_i2c.c @@ -56,8 +56,10 @@ static int radeon_process_i2c_ch(struct radeon_i2c_chan *chan, return -EINVAL; } args.ucRegIndex = buf[0]; - if (num > 1) - memcpy(&out, &buf[1], num - 1); + if (num > 1) { + num--; + memcpy(&out, &buf[1], num); + } args.lpI2CDataOut = cpu_to_le16(out); } else { if (num > ATOM_MAX_HW_I2C_READ) { diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index ae92aa0..b43a3a3 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -1560,17 +1560,17 @@ u32 cik_get_xclk(struct radeon_device *rdev) * cik_mm_rdoorbell - read a doorbell dword * * @rdev: radeon_device pointer - * @offset: byte offset into the aperture + * @index: doorbell index * * Returns the value in the doorbell aperture at the - * requested offset (CIK). + * requested doorbell index (CIK). */ -u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 offset) +u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index) { - if (offset < rdev->doorbell.size) { - return readl(((void __iomem *)rdev->doorbell.ptr) + offset); + if (index < rdev->doorbell.num_doorbells) { + return readl(rdev->doorbell.ptr + index); } else { - DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", offset); + DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index); return 0; } } @@ -1579,18 +1579,18 @@ u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 offset) * cik_mm_wdoorbell - write a doorbell dword * * @rdev: radeon_device pointer - * @offset: byte offset into the aperture + * @index: doorbell index * @v: value to write * * Writes @v to the doorbell aperture at the - * requested offset (CIK). + * requested doorbell index (CIK). */ -void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v) +void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v) { - if (offset < rdev->doorbell.size) { - writel(v, ((void __iomem *)rdev->doorbell.ptr) + offset); + if (index < rdev->doorbell.num_doorbells) { + writel(v, rdev->doorbell.ptr + index); } else { - DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", offset); + DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index); } } @@ -2427,6 +2427,7 @@ static void cik_tiling_mode_table_init(struct radeon_device *rdev) gb_tile_moden = 0; break; } + rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden; WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden); } } else if (num_pipe_configs == 4) { @@ -2773,6 +2774,7 @@ static void cik_tiling_mode_table_init(struct radeon_device *rdev) gb_tile_moden = 0; break; } + rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden; WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden); } } else if (num_pipe_configs == 2) { @@ -2990,6 +2992,7 @@ static void cik_tiling_mode_table_init(struct radeon_device *rdev) gb_tile_moden = 0; break; } + rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden; WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden); } } else @@ -3556,17 +3559,24 @@ void cik_fence_compute_ring_emit(struct radeon_device *rdev, radeon_ring_write(ring, 0); } -void cik_semaphore_ring_emit(struct radeon_device *rdev, +bool cik_semaphore_ring_emit(struct radeon_device *rdev, struct radeon_ring *ring, struct radeon_semaphore *semaphore, bool emit_wait) { +/* TODO: figure out why semaphore cause lockups */ +#if 0 uint64_t addr = semaphore->gpu_addr; unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL; radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1)); radeon_ring_write(ring, addr & 0xffffffff); radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel); + + return true; +#else + return false; +#endif } /** @@ -3609,13 +3619,8 @@ int cik_copy_cpdma(struct radeon_device *rdev, return r; } - if (radeon_fence_need_sync(*fence, ring->idx)) { - radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring, - ring->idx); - radeon_fence_note_sync(*fence, ring->idx); - } else { - radeon_semaphore_free(rdev, &sem, NULL); - } + radeon_semaphore_sync_to(sem, *fence); + radeon_semaphore_sync_rings(rdev, sem, ring->idx); for (i = 0; i < num_loops; i++) { cur_size_in_bytes = size_in_bytes; @@ -4052,7 +4057,7 @@ void cik_compute_ring_set_wptr(struct radeon_device *rdev, struct radeon_ring *ring) { rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(ring->wptr); - WDOORBELL32(ring->doorbell_offset, ring->wptr); + WDOORBELL32(ring->doorbell_index, ring->wptr); } /** @@ -4393,10 +4398,6 @@ static int cik_cp_compute_resume(struct radeon_device *rdev) return r; } - /* doorbell offset */ - rdev->ring[idx].doorbell_offset = - (rdev->ring[idx].doorbell_page_num * PAGE_SIZE) + 0; - /* init the mqd struct */ memset(buf, 0, sizeof(struct bonaire_mqd)); @@ -4508,7 +4509,7 @@ static int cik_cp_compute_resume(struct radeon_device *rdev) RREG32(CP_HQD_PQ_DOORBELL_CONTROL); mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK; mqd->queue_state.cp_hqd_pq_doorbell_control |= - DOORBELL_OFFSET(rdev->ring[idx].doorbell_offset / 4); + DOORBELL_OFFSET(rdev->ring[idx].doorbell_index); mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN; mqd->queue_state.cp_hqd_pq_doorbell_control &= ~(DOORBELL_SOURCE | DOORBELL_HIT); @@ -7839,14 +7840,14 @@ int cik_init(struct radeon_device *rdev) ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]; ring->ring_obj = NULL; r600_ring_init(rdev, ring, 1024 * 1024); - r = radeon_doorbell_get(rdev, &ring->doorbell_page_num); + r = radeon_doorbell_get(rdev, &ring->doorbell_index); if (r) return r; ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]; ring->ring_obj = NULL; r600_ring_init(rdev, ring, 1024 * 1024); - r = radeon_doorbell_get(rdev, &ring->doorbell_page_num); + r = radeon_doorbell_get(rdev, &ring->doorbell_index); if (r) return r; diff --git a/drivers/gpu/drm/radeon/cik_sdma.c b/drivers/gpu/drm/radeon/cik_sdma.c index 9c9529d..0300727 100644 --- a/drivers/gpu/drm/radeon/cik_sdma.c +++ b/drivers/gpu/drm/radeon/cik_sdma.c @@ -130,7 +130,7 @@ void cik_sdma_fence_ring_emit(struct radeon_device *rdev, * Add a DMA semaphore packet to the ring wait on or signal * other rings (CIK). */ -void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev, +bool cik_sdma_semaphore_ring_emit(struct radeon_device *rdev, struct radeon_ring *ring, struct radeon_semaphore *semaphore, bool emit_wait) @@ -141,6 +141,8 @@ void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev, radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits)); radeon_ring_write(ring, addr & 0xfffffff8); radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff); + + return true; } /** @@ -443,13 +445,8 @@ int cik_copy_dma(struct radeon_device *rdev, return r; } - if (radeon_fence_need_sync(*fence, ring->idx)) { - radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring, - ring->idx); - radeon_fence_note_sync(*fence, ring->idx); - } else { - radeon_semaphore_free(rdev, &sem, NULL); - } + radeon_semaphore_sync_to(sem, *fence); + radeon_semaphore_sync_rings(rdev, sem, ring->idx); for (i = 0; i < num_loops; i++) { cur_size_in_bytes = size_in_bytes; diff --git a/drivers/gpu/drm/radeon/cypress_dpm.c b/drivers/gpu/drm/radeon/cypress_dpm.c index 91bb470..920e1e4 100644 --- a/drivers/gpu/drm/radeon/cypress_dpm.c +++ b/drivers/gpu/drm/radeon/cypress_dpm.c @@ -299,7 +299,9 @@ void cypress_program_response_times(struct radeon_device *rdev) static int cypress_pcie_performance_request(struct radeon_device *rdev, u8 perf_req, bool advertise) { +#if defined(CONFIG_ACPI) struct evergreen_power_info *eg_pi = evergreen_get_pi(rdev); +#endif u32 tmp; udelay(10); diff --git a/drivers/gpu/drm/radeon/evergreen_dma.c b/drivers/gpu/drm/radeon/evergreen_dma.c index 6a0656d..a37b544 100644 --- a/drivers/gpu/drm/radeon/evergreen_dma.c +++ b/drivers/gpu/drm/radeon/evergreen_dma.c @@ -131,13 +131,8 @@ int evergreen_copy_dma(struct radeon_device *rdev, return r; } - if (radeon_fence_need_sync(*fence, ring->idx)) { - radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring, - ring->idx); - radeon_fence_note_sync(*fence, ring->idx); - } else { - radeon_semaphore_free(rdev, &sem, NULL); - } + radeon_semaphore_sync_to(sem, *fence); + radeon_semaphore_sync_rings(rdev, sem, ring->idx); for (i = 0; i < num_loops; i++) { cur_size_in_dw = size_in_dw; diff --git a/drivers/gpu/drm/radeon/ni_dpm.c b/drivers/gpu/drm/radeon/ni_dpm.c index f263390..cdc0030 100644 --- a/drivers/gpu/drm/radeon/ni_dpm.c +++ b/drivers/gpu/drm/radeon/ni_dpm.c @@ -3445,9 +3445,9 @@ static int ni_enable_smc_cac(struct radeon_device *rdev, static int ni_pcie_performance_request(struct radeon_device *rdev, u8 perf_req, bool advertise) { +#if defined(CONFIG_ACPI) struct evergreen_power_info *eg_pi = evergreen_get_pi(rdev); -#if defined(CONFIG_ACPI) if ((perf_req == PCIE_PERF_REQ_PECI_GEN1) || (perf_req == PCIE_PERF_REQ_PECI_GEN2)) { if (eg_pi->pcie_performance_request_registered == false) diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 784983d..10abc4d 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -869,13 +869,14 @@ void r100_fence_ring_emit(struct radeon_device *rdev, radeon_ring_write(ring, RADEON_SW_INT_FIRE); } -void r100_semaphore_ring_emit(struct radeon_device *rdev, +bool r100_semaphore_ring_emit(struct radeon_device *rdev, struct radeon_ring *ring, struct radeon_semaphore *semaphore, bool emit_wait) { /* Unused on older asics, since we don't have semaphores or multiple rings */ BUG(); + return false; } int r100_copy_blit(struct radeon_device *rdev, diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 4e609e8..9ad0673 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -2650,7 +2650,7 @@ void r600_fence_ring_emit(struct radeon_device *rdev, } } -void r600_semaphore_ring_emit(struct radeon_device *rdev, +bool r600_semaphore_ring_emit(struct radeon_device *rdev, struct radeon_ring *ring, struct radeon_semaphore *semaphore, bool emit_wait) @@ -2664,6 +2664,8 @@ void r600_semaphore_ring_emit(struct radeon_device *rdev, radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1)); radeon_ring_write(ring, addr & 0xffffffff); radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | sel); + + return true; } /** @@ -2706,13 +2708,8 @@ int r600_copy_cpdma(struct radeon_device *rdev, return r; } - if (radeon_fence_need_sync(*fence, ring->idx)) { - radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring, - ring->idx); - radeon_fence_note_sync(*fence, ring->idx); - } else { - radeon_semaphore_free(rdev, &sem, NULL); - } + radeon_semaphore_sync_to(sem, *fence); + radeon_semaphore_sync_rings(rdev, sem, ring->idx); radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); radeon_ring_write(ring, (WAIT_UNTIL - PACKET3_SET_CONFIG_REG_OFFSET) >> 2); diff --git a/drivers/gpu/drm/radeon/r600_dma.c b/drivers/gpu/drm/radeon/r600_dma.c index 3b31745..7844d15 100644 --- a/drivers/gpu/drm/radeon/r600_dma.c +++ b/drivers/gpu/drm/radeon/r600_dma.c @@ -311,7 +311,7 @@ void r600_dma_fence_ring_emit(struct radeon_device *rdev, * Add a DMA semaphore packet to the ring wait on or signal * other rings (r6xx-SI). */ -void r600_dma_semaphore_ring_emit(struct radeon_device *rdev, +bool r600_dma_semaphore_ring_emit(struct radeon_device *rdev, struct radeon_ring *ring, struct radeon_semaphore *semaphore, bool emit_wait) @@ -322,6 +322,8 @@ void r600_dma_semaphore_ring_emit(struct radeon_device *rdev, radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SEMAPHORE, 0, s, 0)); radeon_ring_write(ring, addr & 0xfffffffc); radeon_ring_write(ring, upper_32_bits(addr) & 0xff); + + return true; } /** @@ -462,13 +464,8 @@ int r600_copy_dma(struct radeon_device *rdev, return r; } - if (radeon_fence_need_sync(*fence, ring->idx)) { - radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring, - ring->idx); - radeon_fence_note_sync(*fence, ring->idx); - } else { - radeon_semaphore_free(rdev, &sem, NULL); - } + radeon_semaphore_sync_to(sem, *fence); + radeon_semaphore_sync_rings(rdev, sem, ring->idx); for (i = 0; i < num_loops; i++) { cur_size_in_dw = size_in_dw; diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index b9ee992..ecf2a39 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -348,6 +348,7 @@ int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence **fence, i void radeon_fence_process(struct radeon_device *rdev, int ring); bool radeon_fence_signaled(struct radeon_fence *fence); int radeon_fence_wait(struct radeon_fence *fence, bool interruptible); +int radeon_fence_wait_locked(struct radeon_fence *fence); int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring); int radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring); int radeon_fence_wait_any(struct radeon_device *rdev, @@ -548,17 +549,20 @@ struct radeon_semaphore { struct radeon_sa_bo *sa_bo; signed waiters; uint64_t gpu_addr; + struct radeon_fence *sync_to[RADEON_NUM_RINGS]; }; int radeon_semaphore_create(struct radeon_device *rdev, struct radeon_semaphore **semaphore); -void radeon_semaphore_emit_signal(struct radeon_device *rdev, int ring, +bool radeon_semaphore_emit_signal(struct radeon_device *rdev, int ring, struct radeon_semaphore *semaphore); -void radeon_semaphore_emit_wait(struct radeon_device *rdev, int ring, +bool radeon_semaphore_emit_wait(struct radeon_device *rdev, int ring, struct radeon_semaphore *semaphore); +void radeon_semaphore_sync_to(struct radeon_semaphore *semaphore, + struct radeon_fence *fence); int radeon_semaphore_sync_rings(struct radeon_device *rdev, struct radeon_semaphore *semaphore, - int signaler, int waiter); + int waiting_ring); void radeon_semaphore_free(struct radeon_device *rdev, struct radeon_semaphore **semaphore, struct radeon_fence *fence); @@ -645,13 +649,15 @@ void radeon_scratch_free(struct radeon_device *rdev, uint32_t reg); /* * GPU doorbell structures, functions & helpers */ +#define RADEON_MAX_DOORBELLS 1024 /* Reserve at most 1024 doorbell slots for radeon-owned rings. */ + struct radeon_doorbell { - u32 num_pages; - bool free[1024]; /* doorbell mmio */ - resource_size_t base; - resource_size_t size; - void __iomem *ptr; + resource_size_t base; + resource_size_t size; + u32 __iomem *ptr; + u32 num_doorbells; /* Number of doorbells actually reserved for radeon. */ + unsigned long used[DIV_ROUND_UP(RADEON_MAX_DOORBELLS, BITS_PER_LONG)]; }; int radeon_doorbell_get(struct radeon_device *rdev, u32 *page); @@ -765,7 +771,6 @@ struct radeon_ib { struct radeon_fence *fence; struct radeon_vm *vm; bool is_const_ib; - struct radeon_fence *sync_to[RADEON_NUM_RINGS]; struct radeon_semaphore *semaphore; }; @@ -799,8 +804,7 @@ struct radeon_ring { u32 pipe; u32 queue; struct radeon_bo *mqd_obj; - u32 doorbell_page_num; - u32 doorbell_offset; + u32 doorbell_index; unsigned wptr_offs; }; @@ -921,7 +925,6 @@ int radeon_ib_get(struct radeon_device *rdev, int ring, struct radeon_ib *ib, struct radeon_vm *vm, unsigned size); void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib); -void radeon_ib_sync_to(struct radeon_ib *ib, struct radeon_fence *fence); int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib, struct radeon_ib *const_ib); int radeon_ib_pool_init(struct radeon_device *rdev); @@ -1638,7 +1641,7 @@ struct radeon_asic_ring { /* command emmit functions */ void (*ib_execute)(struct radeon_device *rdev, struct radeon_ib *ib); void (*emit_fence)(struct radeon_device *rdev, struct radeon_fence *fence); - void (*emit_semaphore)(struct radeon_device *rdev, struct radeon_ring *cp, + bool (*emit_semaphore)(struct radeon_device *rdev, struct radeon_ring *cp, struct radeon_semaphore *semaphore, bool emit_wait); void (*vm_flush)(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); @@ -1979,6 +1982,7 @@ struct cik_asic { unsigned tile_config; uint32_t tile_mode_array[32]; + uint32_t macrotile_mode_array[16]; }; union radeon_asic_config { @@ -2239,8 +2243,8 @@ void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v, u32 r100_io_rreg(struct radeon_device *rdev, u32 reg); void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v); -u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 offset); -void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v); +u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index); +void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v); /* * Cast helper @@ -2303,8 +2307,8 @@ void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v); #define RREG32_IO(reg) r100_io_rreg(rdev, (reg)) #define WREG32_IO(reg, v) r100_io_wreg(rdev, (reg), (v)) -#define RDOORBELL32(offset) cik_mm_rdoorbell(rdev, (offset)) -#define WDOORBELL32(offset, v) cik_mm_wdoorbell(rdev, (offset), (v)) +#define RDOORBELL32(index) cik_mm_rdoorbell(rdev, (index)) +#define WDOORBELL32(index, v) cik_mm_wdoorbell(rdev, (index), (v)) /* * Indirect registers accessor diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index 50853c0..e354ce9 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -2015,6 +2015,8 @@ static struct radeon_asic ci_asic = { .bandwidth_update = &dce8_bandwidth_update, .get_vblank_counter = &evergreen_get_vblank_counter, .wait_for_vblank = &dce4_wait_for_vblank, + .set_backlight_level = &atombios_set_backlight_level, + .get_backlight_level = &atombios_get_backlight_level, .hdmi_enable = &evergreen_hdmi_enable, .hdmi_setmode = &evergreen_hdmi_setmode, }, @@ -2114,6 +2116,8 @@ static struct radeon_asic kv_asic = { .bandwidth_update = &dce8_bandwidth_update, .get_vblank_counter = &evergreen_get_vblank_counter, .wait_for_vblank = &dce4_wait_for_vblank, + .set_backlight_level = &atombios_set_backlight_level, + .get_backlight_level = &atombios_get_backlight_level, .hdmi_enable = &evergreen_hdmi_enable, .hdmi_setmode = &evergreen_hdmi_setmode, }, diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index f2833ee..c9fd97b 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -80,7 +80,7 @@ int r100_irq_set(struct radeon_device *rdev); int r100_irq_process(struct radeon_device *rdev); void r100_fence_ring_emit(struct radeon_device *rdev, struct radeon_fence *fence); -void r100_semaphore_ring_emit(struct radeon_device *rdev, +bool r100_semaphore_ring_emit(struct radeon_device *rdev, struct radeon_ring *cp, struct radeon_semaphore *semaphore, bool emit_wait); @@ -313,13 +313,13 @@ int r600_cs_parse(struct radeon_cs_parser *p); int r600_dma_cs_parse(struct radeon_cs_parser *p); void r600_fence_ring_emit(struct radeon_device *rdev, struct radeon_fence *fence); -void r600_semaphore_ring_emit(struct radeon_device *rdev, +bool r600_semaphore_ring_emit(struct radeon_device *rdev, struct radeon_ring *cp, struct radeon_semaphore *semaphore, bool emit_wait); void r600_dma_fence_ring_emit(struct radeon_device *rdev, struct radeon_fence *fence); -void r600_dma_semaphore_ring_emit(struct radeon_device *rdev, +bool r600_dma_semaphore_ring_emit(struct radeon_device *rdev, struct radeon_ring *ring, struct radeon_semaphore *semaphore, bool emit_wait); @@ -566,10 +566,6 @@ int sumo_dpm_force_performance_level(struct radeon_device *rdev, */ void cayman_fence_ring_emit(struct radeon_device *rdev, struct radeon_fence *fence); -void cayman_uvd_semaphore_emit(struct radeon_device *rdev, - struct radeon_ring *ring, - struct radeon_semaphore *semaphore, - bool emit_wait); void cayman_pcie_gart_tlb_flush(struct radeon_device *rdev); int cayman_init(struct radeon_device *rdev); void cayman_fini(struct radeon_device *rdev); @@ -697,7 +693,7 @@ void cik_pciep_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v); int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk); void cik_sdma_fence_ring_emit(struct radeon_device *rdev, struct radeon_fence *fence); -void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev, +bool cik_sdma_semaphore_ring_emit(struct radeon_device *rdev, struct radeon_ring *ring, struct radeon_semaphore *semaphore, bool emit_wait); @@ -717,7 +713,7 @@ void cik_fence_gfx_ring_emit(struct radeon_device *rdev, struct radeon_fence *fence); void cik_fence_compute_ring_emit(struct radeon_device *rdev, struct radeon_fence *fence); -void cik_semaphore_ring_emit(struct radeon_device *rdev, +bool cik_semaphore_ring_emit(struct radeon_device *rdev, struct radeon_ring *cp, struct radeon_semaphore *semaphore, bool emit_wait); @@ -807,7 +803,7 @@ void uvd_v1_0_stop(struct radeon_device *rdev); int uvd_v1_0_ring_test(struct radeon_device *rdev, struct radeon_ring *ring); int uvd_v1_0_ib_test(struct radeon_device *rdev, struct radeon_ring *ring); -void uvd_v1_0_semaphore_emit(struct radeon_device *rdev, +bool uvd_v1_0_semaphore_emit(struct radeon_device *rdev, struct radeon_ring *ring, struct radeon_semaphore *semaphore, bool emit_wait); @@ -819,7 +815,7 @@ void uvd_v2_2_fence_emit(struct radeon_device *rdev, struct radeon_fence *fence); /* uvd v3.1 */ -void uvd_v3_1_semaphore_emit(struct radeon_device *rdev, +bool uvd_v3_1_semaphore_emit(struct radeon_device *rdev, struct radeon_ring *ring, struct radeon_semaphore *semaphore, bool emit_wait); diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index 26ca223..f41594b 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -159,7 +159,8 @@ static void radeon_cs_sync_rings(struct radeon_cs_parser *p) if (!p->relocs[i].robj) continue; - radeon_ib_sync_to(&p->ib, p->relocs[i].robj->tbo.sync_obj); + radeon_semaphore_sync_to(p->ib.semaphore, + p->relocs[i].robj->tbo.sync_obj); } } @@ -411,9 +412,9 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, goto out; } radeon_cs_sync_rings(parser); - radeon_ib_sync_to(&parser->ib, vm->fence); - radeon_ib_sync_to(&parser->ib, radeon_vm_grab_id( - rdev, vm, parser->ring)); + radeon_semaphore_sync_to(parser->ib.semaphore, vm->fence); + radeon_semaphore_sync_to(parser->ib.semaphore, + radeon_vm_grab_id(rdev, vm, parser->ring)); if ((rdev->family >= CHIP_TAHITI) && (parser->chunk_const_ib_idx != -1)) { diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index b9234c4..39b033b4 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -251,28 +251,23 @@ void radeon_scratch_free(struct radeon_device *rdev, uint32_t reg) */ int radeon_doorbell_init(struct radeon_device *rdev) { - int i; - /* doorbell bar mapping */ rdev->doorbell.base = pci_resource_start(rdev->pdev, 2); rdev->doorbell.size = pci_resource_len(rdev->pdev, 2); - /* limit to 4 MB for now */ - if (rdev->doorbell.size > (4 * 1024 * 1024)) - rdev->doorbell.size = 4 * 1024 * 1024; + rdev->doorbell.num_doorbells = min_t(u32, rdev->doorbell.size / sizeof(u32), RADEON_MAX_DOORBELLS); + if (rdev->doorbell.num_doorbells == 0) + return -EINVAL; - rdev->doorbell.ptr = ioremap(rdev->doorbell.base, rdev->doorbell.size); + rdev->doorbell.ptr = ioremap(rdev->doorbell.base, rdev->doorbell.num_doorbells * sizeof(u32)); if (rdev->doorbell.ptr == NULL) { return -ENOMEM; } DRM_INFO("doorbell mmio base: 0x%08X\n", (uint32_t)rdev->doorbell.base); DRM_INFO("doorbell mmio size: %u\n", (unsigned)rdev->doorbell.size); - rdev->doorbell.num_pages = rdev->doorbell.size / PAGE_SIZE; + memset(&rdev->doorbell.used, 0, sizeof(rdev->doorbell.used)); - for (i = 0; i < rdev->doorbell.num_pages; i++) { - rdev->doorbell.free[i] = true; - } return 0; } @@ -290,40 +285,38 @@ void radeon_doorbell_fini(struct radeon_device *rdev) } /** - * radeon_doorbell_get - Allocate a doorbell page + * radeon_doorbell_get - Allocate a doorbell entry * * @rdev: radeon_device pointer - * @doorbell: doorbell page number + * @doorbell: doorbell index * - * Allocate a doorbell page for use by the driver (all asics). + * Allocate a doorbell for use by the driver (all asics). * Returns 0 on success or -EINVAL on failure. */ int radeon_doorbell_get(struct radeon_device *rdev, u32 *doorbell) { - int i; - - for (i = 0; i < rdev->doorbell.num_pages; i++) { - if (rdev->doorbell.free[i]) { - rdev->doorbell.free[i] = false; - *doorbell = i; - return 0; - } + unsigned long offset = find_first_zero_bit(rdev->doorbell.used, rdev->doorbell.num_doorbells); + if (offset < rdev->doorbell.num_doorbells) { + __set_bit(offset, rdev->doorbell.used); + *doorbell = offset; + return 0; + } else { + return -EINVAL; } - return -EINVAL; } /** - * radeon_doorbell_free - Free a doorbell page + * radeon_doorbell_free - Free a doorbell entry * * @rdev: radeon_device pointer - * @doorbell: doorbell page number + * @doorbell: doorbell index * - * Free a doorbell page allocated for use by the driver (all asics) + * Free a doorbell allocated for use by the driver (all asics) */ void radeon_doorbell_free(struct radeon_device *rdev, u32 doorbell) { - if (doorbell < rdev->doorbell.num_pages) - rdev->doorbell.free[doorbell] = true; + if (doorbell < rdev->doorbell.num_doorbells) + __clear_bit(doorbell, rdev->doorbell.used); } /* diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 1aee322..9f5ff28 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -76,9 +76,10 @@ * 2.32.0 - new info request for rings working * 2.33.0 - Add SI tiling mode array query * 2.34.0 - Add CIK tiling mode array query + * 2.35.0 - Add CIK macrotile mode array query */ #define KMS_DRIVER_MAJOR 2 -#define KMS_DRIVER_MINOR 34 +#define KMS_DRIVER_MINOR 35 #define KMS_DRIVER_PATCHLEVEL 0 int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags); int radeon_driver_unload_kms(struct drm_device *dev); diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index 281d14c..d3a86e4 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -472,6 +472,36 @@ int radeon_fence_wait_any(struct radeon_device *rdev, } /** + * radeon_fence_wait_locked - wait for a fence to signal + * + * @fence: radeon fence object + * + * Wait for the requested fence to signal (all asics). + * Returns 0 if the fence has passed, error for all other cases. + */ +int radeon_fence_wait_locked(struct radeon_fence *fence) +{ + uint64_t seq[RADEON_NUM_RINGS] = {}; + int r; + + if (fence == NULL) { + WARN(1, "Querying an invalid fence : %p !\n", fence); + return -EINVAL; + } + + seq[fence->ring] = fence->seq; + if (seq[fence->ring] == RADEON_FENCE_SIGNALED_SEQ) + return 0; + + r = radeon_fence_wait_seq(fence->rdev, seq, false, false); + if (r) + return r; + + fence->seq = RADEON_FENCE_SIGNALED_SEQ; + return 0; +} + +/** * radeon_fence_wait_next_locked - wait for the next fence to signal * * @rdev: radeon device pointer diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c index 8a83b89..3044e50 100644 --- a/drivers/gpu/drm/radeon/radeon_gart.c +++ b/drivers/gpu/drm/radeon/radeon_gart.c @@ -651,7 +651,7 @@ retry: radeon_asic_vm_set_page(rdev, &ib, vm->pd_gpu_addr, 0, pd_entries, 0, 0); - radeon_ib_sync_to(&ib, vm->fence); + radeon_semaphore_sync_to(ib.semaphore, vm->fence); r = radeon_ib_schedule(rdev, &ib, NULL); if (r) { radeon_ib_free(rdev, &ib); @@ -1209,6 +1209,8 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev, return -ENOMEM; r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, ndw * 4); + if (r) + return r; ib.length_dw = 0; r = radeon_vm_update_pdes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset); @@ -1220,7 +1222,7 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev, radeon_vm_update_ptes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset, addr, radeon_vm_page_flags(bo_va->flags)); - radeon_ib_sync_to(&ib, vm->fence); + radeon_semaphore_sync_to(ib.semaphore, vm->fence); r = radeon_ib_schedule(rdev, &ib, NULL); if (r) { radeon_ib_free(rdev, &ib); diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index bb87105..55d0b47 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -340,7 +340,7 @@ int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) break; case RADEON_INFO_BACKEND_MAP: if (rdev->family >= CHIP_BONAIRE) - return -EINVAL; + *value = rdev->config.cik.backend_map; else if (rdev->family >= CHIP_TAHITI) *value = rdev->config.si.backend_map; else if (rdev->family >= CHIP_CAYMAN) @@ -449,6 +449,15 @@ int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) return -EINVAL; } break; + case RADEON_INFO_CIK_MACROTILE_MODE_ARRAY: + if (rdev->family >= CHIP_BONAIRE) { + value = rdev->config.cik.macrotile_mode_array; + value_size = sizeof(uint32_t)*16; + } else { + DRM_DEBUG_KMS("macrotile mode array is cik+ only!\n"); + return -EINVAL; + } + break; case RADEON_INFO_SI_CP_DMA_COMPUTE: *value = 1; break; diff --git a/drivers/gpu/drm/radeon/radeon_legacy_crtc.c b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c index 0c7b8c6..0b158f9 100644 --- a/drivers/gpu/drm/radeon/radeon_legacy_crtc.c +++ b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c @@ -422,6 +422,7 @@ int radeon_crtc_do_set_base(struct drm_crtc *crtc, /* Pin framebuffer & get tilling informations */ obj = radeon_fb->obj; rbo = gem_to_radeon_bo(obj); +retry: r = radeon_bo_reserve(rbo, false); if (unlikely(r != 0)) return r; @@ -430,6 +431,33 @@ int radeon_crtc_do_set_base(struct drm_crtc *crtc, &base); if (unlikely(r != 0)) { radeon_bo_unreserve(rbo); + + /* On old GPU like RN50 with little vram pining can fails because + * current fb is taking all space needed. So instead of unpining + * the old buffer after pining the new one, first unpin old one + * and then retry pining new one. + * + * As only master can set mode only master can pin and it is + * unlikely the master client will race with itself especialy + * on those old gpu with single crtc. + * + * We don't shutdown the display controller because new buffer + * will end up in same spot. + */ + if (!atomic && fb && fb != crtc->fb) { + struct radeon_bo *old_rbo; + unsigned long nsize, osize; + + old_rbo = gem_to_radeon_bo(to_radeon_framebuffer(fb)->obj); + osize = radeon_bo_size(old_rbo); + nsize = radeon_bo_size(rbo); + if (nsize <= osize && !radeon_bo_reserve(old_rbo, false)) { + radeon_bo_unpin(old_rbo); + radeon_bo_unreserve(old_rbo); + fb = NULL; + goto retry; + } + } return -EINVAL; } radeon_bo_get_tiling_flags(rbo, &tiling_flags, NULL); diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c index 866ace0..d1385cc 100644 --- a/drivers/gpu/drm/radeon/radeon_pm.c +++ b/drivers/gpu/drm/radeon/radeon_pm.c @@ -1252,7 +1252,6 @@ int radeon_pm_init(struct radeon_device *rdev) case CHIP_RS780: case CHIP_RS880: case CHIP_CAYMAN: - case CHIP_ARUBA: case CHIP_BONAIRE: case CHIP_KABINI: case CHIP_KAVERI: @@ -1284,6 +1283,7 @@ int radeon_pm_init(struct radeon_device *rdev) case CHIP_BARTS: case CHIP_TURKS: case CHIP_CAICOS: + case CHIP_ARUBA: case CHIP_TAHITI: case CHIP_PITCAIRN: case CHIP_VERDE: diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index 18254e1..9214403 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -61,7 +61,7 @@ int radeon_ib_get(struct radeon_device *rdev, int ring, struct radeon_ib *ib, struct radeon_vm *vm, unsigned size) { - int i, r; + int r; r = radeon_sa_bo_new(rdev, &rdev->ring_tmp_bo, &ib->sa_bo, size, 256, true); if (r) { @@ -87,8 +87,6 @@ int radeon_ib_get(struct radeon_device *rdev, int ring, ib->gpu_addr = radeon_sa_bo_gpu_addr(ib->sa_bo); } ib->is_const_ib = false; - for (i = 0; i < RADEON_NUM_RINGS; ++i) - ib->sync_to[i] = NULL; return 0; } @@ -109,25 +107,6 @@ void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib) } /** - * radeon_ib_sync_to - sync to fence before executing the IB - * - * @ib: IB object to add fence to - * @fence: fence to sync to - * - * Sync to the fence before executing the IB - */ -void radeon_ib_sync_to(struct radeon_ib *ib, struct radeon_fence *fence) -{ - struct radeon_fence *other; - - if (!fence) - return; - - other = ib->sync_to[fence->ring]; - ib->sync_to[fence->ring] = radeon_fence_later(fence, other); -} - -/** * radeon_ib_schedule - schedule an IB (Indirect Buffer) on the ring * * @rdev: radeon_device pointer @@ -151,8 +130,7 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib, struct radeon_ib *const_ib) { struct radeon_ring *ring = &rdev->ring[ib->ring]; - bool need_sync = false; - int i, r = 0; + int r = 0; if (!ib->length_dw || !ring->ready) { /* TODO: Nothings in the ib we should report. */ @@ -166,19 +144,15 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib, dev_err(rdev->dev, "scheduling IB failed (%d).\n", r); return r; } - for (i = 0; i < RADEON_NUM_RINGS; ++i) { - struct radeon_fence *fence = ib->sync_to[i]; - if (radeon_fence_need_sync(fence, ib->ring)) { - need_sync = true; - radeon_semaphore_sync_rings(rdev, ib->semaphore, - fence->ring, ib->ring); - radeon_fence_note_sync(fence, ib->ring); - } - } - /* immediately free semaphore when we don't need to sync */ - if (!need_sync) { - radeon_semaphore_free(rdev, &ib->semaphore, NULL); + + /* sync with other rings */ + r = radeon_semaphore_sync_rings(rdev, ib->semaphore, ib->ring); + if (r) { + dev_err(rdev->dev, "failed to sync rings (%d)\n", r); + radeon_ring_unlock_undo(rdev, ring); + return r; } + /* if we can't remember our last VM flush then flush now! */ /* XXX figure out why we have to flush for every IB */ if (ib->vm /*&& !ib->vm->last_flush*/) { diff --git a/drivers/gpu/drm/radeon/radeon_semaphore.c b/drivers/gpu/drm/radeon/radeon_semaphore.c index 8dcc20f..2b42aa1 100644 --- a/drivers/gpu/drm/radeon/radeon_semaphore.c +++ b/drivers/gpu/drm/radeon/radeon_semaphore.c @@ -29,12 +29,12 @@ */ #include <drm/drmP.h> #include "radeon.h" - +#include "radeon_trace.h" int radeon_semaphore_create(struct radeon_device *rdev, struct radeon_semaphore **semaphore) { - int r; + int i, r; *semaphore = kmalloc(sizeof(struct radeon_semaphore), GFP_KERNEL); if (*semaphore == NULL) { @@ -50,54 +50,121 @@ int radeon_semaphore_create(struct radeon_device *rdev, (*semaphore)->waiters = 0; (*semaphore)->gpu_addr = radeon_sa_bo_gpu_addr((*semaphore)->sa_bo); *((uint64_t*)radeon_sa_bo_cpu_addr((*semaphore)->sa_bo)) = 0; + + for (i = 0; i < RADEON_NUM_RINGS; ++i) + (*semaphore)->sync_to[i] = NULL; + return 0; } -void radeon_semaphore_emit_signal(struct radeon_device *rdev, int ring, +bool radeon_semaphore_emit_signal(struct radeon_device *rdev, int ridx, struct radeon_semaphore *semaphore) { - --semaphore->waiters; - radeon_semaphore_ring_emit(rdev, ring, &rdev->ring[ring], semaphore, false); + struct radeon_ring *ring = &rdev->ring[ridx]; + + trace_radeon_semaphore_signale(ridx, semaphore); + + if (radeon_semaphore_ring_emit(rdev, ridx, ring, semaphore, false)) { + --semaphore->waiters; + + /* for debugging lockup only, used by sysfs debug files */ + ring->last_semaphore_signal_addr = semaphore->gpu_addr; + return true; + } + return false; } -void radeon_semaphore_emit_wait(struct radeon_device *rdev, int ring, +bool radeon_semaphore_emit_wait(struct radeon_device *rdev, int ridx, struct radeon_semaphore *semaphore) { - ++semaphore->waiters; - radeon_semaphore_ring_emit(rdev, ring, &rdev->ring[ring], semaphore, true); + struct radeon_ring *ring = &rdev->ring[ridx]; + + trace_radeon_semaphore_wait(ridx, semaphore); + + if (radeon_semaphore_ring_emit(rdev, ridx, ring, semaphore, true)) { + ++semaphore->waiters; + + /* for debugging lockup only, used by sysfs debug files */ + ring->last_semaphore_wait_addr = semaphore->gpu_addr; + return true; + } + return false; +} + +/** + * radeon_semaphore_sync_to - use the semaphore to sync to a fence + * + * @semaphore: semaphore object to add fence to + * @fence: fence to sync to + * + * Sync to the fence using this semaphore object + */ +void radeon_semaphore_sync_to(struct radeon_semaphore *semaphore, + struct radeon_fence *fence) +{ + struct radeon_fence *other; + + if (!fence) + return; + + other = semaphore->sync_to[fence->ring]; + semaphore->sync_to[fence->ring] = radeon_fence_later(fence, other); } -/* caller must hold ring lock */ +/** + * radeon_semaphore_sync_rings - sync ring to all registered fences + * + * @rdev: radeon_device pointer + * @semaphore: semaphore object to use for sync + * @ring: ring that needs sync + * + * Ensure that all registered fences are signaled before letting + * the ring continue. The caller must hold the ring lock. + */ int radeon_semaphore_sync_rings(struct radeon_device *rdev, struct radeon_semaphore *semaphore, - int signaler, int waiter) + int ring) { - int r; + int i, r; - /* no need to signal and wait on the same ring */ - if (signaler == waiter) { - return 0; - } + for (i = 0; i < RADEON_NUM_RINGS; ++i) { + struct radeon_fence *fence = semaphore->sync_to[i]; - /* prevent GPU deadlocks */ - if (!rdev->ring[signaler].ready) { - dev_err(rdev->dev, "Trying to sync to a disabled ring!"); - return -EINVAL; - } + /* check if we really need to sync */ + if (!radeon_fence_need_sync(fence, ring)) + continue; - r = radeon_ring_alloc(rdev, &rdev->ring[signaler], 8); - if (r) { - return r; - } - radeon_semaphore_emit_signal(rdev, signaler, semaphore); - radeon_ring_commit(rdev, &rdev->ring[signaler]); + /* prevent GPU deadlocks */ + if (!rdev->ring[i].ready) { + dev_err(rdev->dev, "Syncing to a disabled ring!"); + return -EINVAL; + } - /* we assume caller has already allocated space on waiters ring */ - radeon_semaphore_emit_wait(rdev, waiter, semaphore); + /* allocate enough space for sync command */ + r = radeon_ring_alloc(rdev, &rdev->ring[i], 16); + if (r) { + return r; + } - /* for debugging lockup only, used by sysfs debug files */ - rdev->ring[signaler].last_semaphore_signal_addr = semaphore->gpu_addr; - rdev->ring[waiter].last_semaphore_wait_addr = semaphore->gpu_addr; + /* emit the signal semaphore */ + if (!radeon_semaphore_emit_signal(rdev, i, semaphore)) { + /* signaling wasn't successful wait manually */ + radeon_ring_undo(&rdev->ring[i]); + radeon_fence_wait_locked(fence); + continue; + } + + /* we assume caller has already allocated space on waiters ring */ + if (!radeon_semaphore_emit_wait(rdev, ring, semaphore)) { + /* waiting wasn't successful wait manually */ + radeon_ring_undo(&rdev->ring[i]); + radeon_fence_wait_locked(fence); + continue; + } + + radeon_ring_commit(rdev, &rdev->ring[i]); + radeon_fence_note_sync(fence, ring); + } return 0; } diff --git a/drivers/gpu/drm/radeon/radeon_trace.h b/drivers/gpu/drm/radeon/radeon_trace.h index 811bca6..9f0e181 100644 --- a/drivers/gpu/drm/radeon/radeon_trace.h +++ b/drivers/gpu/drm/radeon/radeon_trace.h @@ -111,6 +111,42 @@ DEFINE_EVENT(radeon_fence_request, radeon_fence_wait_end, TP_ARGS(dev, seqno) ); +DECLARE_EVENT_CLASS(radeon_semaphore_request, + + TP_PROTO(int ring, struct radeon_semaphore *sem), + + TP_ARGS(ring, sem), + + TP_STRUCT__entry( + __field(int, ring) + __field(signed, waiters) + __field(uint64_t, gpu_addr) + ), + + TP_fast_assign( + __entry->ring = ring; + __entry->waiters = sem->waiters; + __entry->gpu_addr = sem->gpu_addr; + ), + + TP_printk("ring=%u, waiters=%d, addr=%010Lx", __entry->ring, + __entry->waiters, __entry->gpu_addr) +); + +DEFINE_EVENT(radeon_semaphore_request, radeon_semaphore_signale, + + TP_PROTO(int ring, struct radeon_semaphore *sem), + + TP_ARGS(ring, sem) +); + +DEFINE_EVENT(radeon_semaphore_request, radeon_semaphore_wait, + + TP_PROTO(int ring, struct radeon_semaphore *sem), + + TP_ARGS(ring, sem) +); + #endif /* This part must be outside protection */ diff --git a/drivers/gpu/drm/radeon/rv770_dma.c b/drivers/gpu/drm/radeon/rv770_dma.c index f9b02e3..aca8cbe 100644 --- a/drivers/gpu/drm/radeon/rv770_dma.c +++ b/drivers/gpu/drm/radeon/rv770_dma.c @@ -66,13 +66,8 @@ int rv770_copy_dma(struct radeon_device *rdev, return r; } - if (radeon_fence_need_sync(*fence, ring->idx)) { - radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring, - ring->idx); - radeon_fence_note_sync(*fence, ring->idx); - } else { - radeon_semaphore_free(rdev, &sem, NULL); - } + radeon_semaphore_sync_to(sem, *fence); + radeon_semaphore_sync_rings(rdev, sem, ring->idx); for (i = 0; i < num_loops; i++) { cur_size_in_dw = size_in_dw; diff --git a/drivers/gpu/drm/radeon/si_dma.c b/drivers/gpu/drm/radeon/si_dma.c index 8e8f461..59be2cf 100644 --- a/drivers/gpu/drm/radeon/si_dma.c +++ b/drivers/gpu/drm/radeon/si_dma.c @@ -195,13 +195,8 @@ int si_copy_dma(struct radeon_device *rdev, return r; } - if (radeon_fence_need_sync(*fence, ring->idx)) { - radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring, - ring->idx); - radeon_fence_note_sync(*fence, ring->idx); - } else { - radeon_semaphore_free(rdev, &sem, NULL); - } + radeon_semaphore_sync_to(sem, *fence); + radeon_semaphore_sync_rings(rdev, sem, ring->idx); for (i = 0; i < num_loops; i++) { cur_size_in_bytes = size_in_bytes; diff --git a/drivers/gpu/drm/radeon/trinity_dpm.c b/drivers/gpu/drm/radeon/trinity_dpm.c index 9364129..d700698 100644 --- a/drivers/gpu/drm/radeon/trinity_dpm.c +++ b/drivers/gpu/drm/radeon/trinity_dpm.c @@ -1873,9 +1873,9 @@ int trinity_dpm_init(struct radeon_device *rdev) pi->enable_sclk_ds = true; pi->enable_gfx_power_gating = true; pi->enable_gfx_clock_gating = true; - pi->enable_mg_clock_gating = true; - pi->enable_gfx_dynamic_mgpg = true; /* ??? */ - pi->override_dynamic_mgpg = true; + pi->enable_mg_clock_gating = false; + pi->enable_gfx_dynamic_mgpg = false; + pi->override_dynamic_mgpg = false; pi->enable_auto_thermal_throttling = true; pi->voltage_drop_in_dce = false; /* need to restructure dpm/modeset interaction */ pi->uvd_dpm = true; /* ??? */ diff --git a/drivers/gpu/drm/radeon/uvd_v1_0.c b/drivers/gpu/drm/radeon/uvd_v1_0.c index 7266805..d4a68af 100644 --- a/drivers/gpu/drm/radeon/uvd_v1_0.c +++ b/drivers/gpu/drm/radeon/uvd_v1_0.c @@ -357,7 +357,7 @@ int uvd_v1_0_ring_test(struct radeon_device *rdev, struct radeon_ring *ring) * * Emit a semaphore command (either wait or signal) to the UVD ring. */ -void uvd_v1_0_semaphore_emit(struct radeon_device *rdev, +bool uvd_v1_0_semaphore_emit(struct radeon_device *rdev, struct radeon_ring *ring, struct radeon_semaphore *semaphore, bool emit_wait) @@ -372,6 +372,8 @@ void uvd_v1_0_semaphore_emit(struct radeon_device *rdev, radeon_ring_write(ring, PACKET0(UVD_SEMA_CMD, 0)); radeon_ring_write(ring, emit_wait ? 1 : 0); + + return true; } /** diff --git a/drivers/gpu/drm/radeon/uvd_v3_1.c b/drivers/gpu/drm/radeon/uvd_v3_1.c index 5b6fa1f..d722db2 100644 --- a/drivers/gpu/drm/radeon/uvd_v3_1.c +++ b/drivers/gpu/drm/radeon/uvd_v3_1.c @@ -37,7 +37,7 @@ * * Emit a semaphore command (either wait or signal) to the UVD ring. */ -void uvd_v3_1_semaphore_emit(struct radeon_device *rdev, +bool uvd_v3_1_semaphore_emit(struct radeon_device *rdev, struct radeon_ring *ring, struct radeon_semaphore *semaphore, bool emit_wait) @@ -52,4 +52,6 @@ void uvd_v3_1_semaphore_emit(struct radeon_device *rdev, radeon_ring_write(ring, PACKET0(UVD_SEMA_CMD, 0)); radeon_ring_write(ring, 0x80 | (emit_wait ? 1 : 0)); + + return true; } diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 8d5a646..07e02c4 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -151,7 +151,7 @@ static void ttm_bo_release_list(struct kref *list_kref) atomic_dec(&bo->glob->bo_count); if (bo->resv == &bo->ttm_resv) reservation_object_fini(&bo->ttm_resv); - + mutex_destroy(&bo->wu_mutex); if (bo->destroy) bo->destroy(bo); else { @@ -1123,6 +1123,7 @@ int ttm_bo_init(struct ttm_bo_device *bdev, INIT_LIST_HEAD(&bo->ddestroy); INIT_LIST_HEAD(&bo->swap); INIT_LIST_HEAD(&bo->io_reserve_lru); + mutex_init(&bo->wu_mutex); bo->bdev = bdev; bo->glob = bdev->glob; bo->type = type; @@ -1704,3 +1705,35 @@ void ttm_bo_swapout_all(struct ttm_bo_device *bdev) ; } EXPORT_SYMBOL(ttm_bo_swapout_all); + +/** + * ttm_bo_wait_unreserved - interruptible wait for a buffer object to become + * unreserved + * + * @bo: Pointer to buffer + */ +int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo) +{ + int ret; + + /* + * In the absense of a wait_unlocked API, + * Use the bo::wu_mutex to avoid triggering livelocks due to + * concurrent use of this function. Note that this use of + * bo::wu_mutex can go away if we change locking order to + * mmap_sem -> bo::reserve. + */ + ret = mutex_lock_interruptible(&bo->wu_mutex); + if (unlikely(ret != 0)) + return -ERESTARTSYS; + if (!ww_mutex_is_locked(&bo->resv->lock)) + goto out_unlock; + ret = ttm_bo_reserve_nolru(bo, true, false, false, NULL); + if (unlikely(ret != 0)) + goto out_unlock; + ww_mutex_unlock(&bo->resv->lock); + +out_unlock: + mutex_unlock(&bo->wu_mutex); + return ret; +} diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 4834c46..15b86a9 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -350,10 +350,13 @@ int ttm_bo_move_memcpy(struct ttm_buffer_object *bo, goto out2; /* - * Move nonexistent data. NOP. + * Don't move nonexistent data. Clear destination instead. */ - if (old_iomap == NULL && ttm == NULL) + if (old_iomap == NULL && + (ttm == NULL || ttm->state == tt_unpopulated)) { + memset_io(new_iomap, 0, new_mem->num_pages*PAGE_SIZE); goto out2; + } /* * TTM might be null for moves within the same region. diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index ac617f3..b249ab9 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -107,13 +107,28 @@ static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) /* * Work around locking order reversal in fault / nopfn * between mmap_sem and bo_reserve: Perform a trylock operation - * for reserve, and if it fails, retry the fault after scheduling. + * for reserve, and if it fails, retry the fault after waiting + * for the buffer to become unreserved. */ - - ret = ttm_bo_reserve(bo, true, true, false, 0); + ret = ttm_bo_reserve(bo, true, true, false, NULL); if (unlikely(ret != 0)) { - if (ret == -EBUSY) - set_need_resched(); + if (ret != -EBUSY) + return VM_FAULT_NOPAGE; + + if (vmf->flags & FAULT_FLAG_ALLOW_RETRY) { + if (!(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) { + up_read(&vma->vm_mm->mmap_sem); + (void) ttm_bo_wait_unreserved(bo); + } + + return VM_FAULT_RETRY; + } + + /* + * If we'd want to change locking order to + * mmap_sem -> bo::reserve, we'd use a blocking reserve here + * instead of retrying the fault... + */ return VM_FAULT_NOPAGE; } @@ -123,7 +138,6 @@ static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) case 0: break; case -EBUSY: - set_need_resched(); case -ERESTARTSYS: retval = VM_FAULT_NOPAGE; goto out_unlock; diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c index 6c91178..479e941 100644 --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c @@ -32,8 +32,7 @@ #include <linux/sched.h> #include <linux/module.h> -static void ttm_eu_backoff_reservation_locked(struct list_head *list, - struct ww_acquire_ctx *ticket) +static void ttm_eu_backoff_reservation_locked(struct list_head *list) { struct ttm_validate_buffer *entry; @@ -93,8 +92,9 @@ void ttm_eu_backoff_reservation(struct ww_acquire_ctx *ticket, entry = list_first_entry(list, struct ttm_validate_buffer, head); glob = entry->bo->glob; spin_lock(&glob->lru_lock); - ttm_eu_backoff_reservation_locked(list, ticket); - ww_acquire_fini(ticket); + ttm_eu_backoff_reservation_locked(list); + if (ticket) + ww_acquire_fini(ticket); spin_unlock(&glob->lru_lock); } EXPORT_SYMBOL(ttm_eu_backoff_reservation); @@ -130,7 +130,8 @@ int ttm_eu_reserve_buffers(struct ww_acquire_ctx *ticket, entry = list_first_entry(list, struct ttm_validate_buffer, head); glob = entry->bo->glob; - ww_acquire_init(ticket, &reservation_ww_class); + if (ticket) + ww_acquire_init(ticket, &reservation_ww_class); retry: list_for_each_entry(entry, list, head) { struct ttm_buffer_object *bo = entry->bo; @@ -139,16 +140,17 @@ retry: if (entry->reserved) continue; - - ret = ttm_bo_reserve_nolru(bo, true, false, true, ticket); + ret = ttm_bo_reserve_nolru(bo, true, (ticket == NULL), true, + ticket); if (ret == -EDEADLK) { /* uh oh, we lost out, drop every reservation and try * to only reserve this buffer, then start over if * this succeeds. */ + BUG_ON(ticket == NULL); spin_lock(&glob->lru_lock); - ttm_eu_backoff_reservation_locked(list, ticket); + ttm_eu_backoff_reservation_locked(list); spin_unlock(&glob->lru_lock); ttm_eu_list_ref_sub(list); ret = ww_mutex_lock_slow_interruptible(&bo->resv->lock, @@ -175,7 +177,8 @@ retry: } } - ww_acquire_done(ticket); + if (ticket) + ww_acquire_done(ticket); spin_lock(&glob->lru_lock); ttm_eu_del_from_lru_locked(list); spin_unlock(&glob->lru_lock); @@ -184,12 +187,14 @@ retry: err: spin_lock(&glob->lru_lock); - ttm_eu_backoff_reservation_locked(list, ticket); + ttm_eu_backoff_reservation_locked(list); spin_unlock(&glob->lru_lock); ttm_eu_list_ref_sub(list); err_fini: - ww_acquire_done(ticket); - ww_acquire_fini(ticket); + if (ticket) { + ww_acquire_done(ticket); + ww_acquire_fini(ticket); + } return ret; } EXPORT_SYMBOL(ttm_eu_reserve_buffers); @@ -224,7 +229,8 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket, } spin_unlock(&bdev->fence_lock); spin_unlock(&glob->lru_lock); - ww_acquire_fini(ticket); + if (ticket) + ww_acquire_fini(ticket); list_for_each_entry(entry, list, head) { if (entry->old_sync_obj) diff --git a/drivers/gpu/drm/ttm/ttm_object.c b/drivers/gpu/drm/ttm/ttm_object.c index a868176..6fe7b92 100644 --- a/drivers/gpu/drm/ttm/ttm_object.c +++ b/drivers/gpu/drm/ttm/ttm_object.c @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright (c) 2009 VMware, Inc., Palo Alto, CA., USA + * Copyright (c) 2009-2013 VMware, Inc., Palo Alto, CA., USA * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -26,6 +26,12 @@ **************************************************************************/ /* * Authors: Thomas Hellstrom <thellstrom-at-vmware-dot-com> + * + * While no substantial code is shared, the prime code is inspired by + * drm_prime.c, with + * Authors: + * Dave Airlie <airlied@redhat.com> + * Rob Clark <rob.clark@linaro.org> */ /** @file ttm_ref_object.c * @@ -34,6 +40,7 @@ * and release on file close. */ + /** * struct ttm_object_file * @@ -84,6 +91,9 @@ struct ttm_object_device { struct drm_open_hash object_hash; atomic_t object_count; struct ttm_mem_global *mem_glob; + struct dma_buf_ops ops; + void (*dmabuf_release)(struct dma_buf *dma_buf); + size_t dma_buf_size; }; /** @@ -116,6 +126,8 @@ struct ttm_ref_object { struct ttm_object_file *tfile; }; +static void ttm_prime_dmabuf_release(struct dma_buf *dma_buf); + static inline struct ttm_object_file * ttm_object_file_ref(struct ttm_object_file *tfile) { @@ -416,9 +428,10 @@ out_err: } EXPORT_SYMBOL(ttm_object_file_init); -struct ttm_object_device *ttm_object_device_init(struct ttm_mem_global - *mem_glob, - unsigned int hash_order) +struct ttm_object_device * +ttm_object_device_init(struct ttm_mem_global *mem_glob, + unsigned int hash_order, + const struct dma_buf_ops *ops) { struct ttm_object_device *tdev = kmalloc(sizeof(*tdev), GFP_KERNEL); int ret; @@ -430,10 +443,17 @@ struct ttm_object_device *ttm_object_device_init(struct ttm_mem_global spin_lock_init(&tdev->object_lock); atomic_set(&tdev->object_count, 0); ret = drm_ht_create(&tdev->object_hash, hash_order); + if (ret != 0) + goto out_no_object_hash; - if (likely(ret == 0)) - return tdev; + tdev->ops = *ops; + tdev->dmabuf_release = tdev->ops.release; + tdev->ops.release = ttm_prime_dmabuf_release; + tdev->dma_buf_size = ttm_round_pot(sizeof(struct dma_buf)) + + ttm_round_pot(sizeof(struct file)); + return tdev; +out_no_object_hash: kfree(tdev); return NULL; } @@ -452,3 +472,225 @@ void ttm_object_device_release(struct ttm_object_device **p_tdev) kfree(tdev); } EXPORT_SYMBOL(ttm_object_device_release); + +/** + * get_dma_buf_unless_doomed - get a dma_buf reference if possible. + * + * @dma_buf: Non-refcounted pointer to a struct dma-buf. + * + * Obtain a file reference from a lookup structure that doesn't refcount + * the file, but synchronizes with its release method to make sure it has + * not been freed yet. See for example kref_get_unless_zero documentation. + * Returns true if refcounting succeeds, false otherwise. + * + * Nobody really wants this as a public API yet, so let it mature here + * for some time... + */ +static bool __must_check get_dma_buf_unless_doomed(struct dma_buf *dmabuf) +{ + return atomic_long_inc_not_zero(&dmabuf->file->f_count) != 0L; +} + +/** + * ttm_prime_refcount_release - refcount release method for a prime object. + * + * @p_base: Pointer to ttm_base_object pointer. + * + * This is a wrapper that calls the refcount_release founction of the + * underlying object. At the same time it cleans up the prime object. + * This function is called when all references to the base object we + * derive from are gone. + */ +static void ttm_prime_refcount_release(struct ttm_base_object **p_base) +{ + struct ttm_base_object *base = *p_base; + struct ttm_prime_object *prime; + + *p_base = NULL; + prime = container_of(base, struct ttm_prime_object, base); + BUG_ON(prime->dma_buf != NULL); + mutex_destroy(&prime->mutex); + if (prime->refcount_release) + prime->refcount_release(&base); +} + +/** + * ttm_prime_dmabuf_release - Release method for the dma-bufs we export + * + * @dma_buf: + * + * This function first calls the dma_buf release method the driver + * provides. Then it cleans up our dma_buf pointer used for lookup, + * and finally releases the reference the dma_buf has on our base + * object. + */ +static void ttm_prime_dmabuf_release(struct dma_buf *dma_buf) +{ + struct ttm_prime_object *prime = + (struct ttm_prime_object *) dma_buf->priv; + struct ttm_base_object *base = &prime->base; + struct ttm_object_device *tdev = base->tfile->tdev; + + if (tdev->dmabuf_release) + tdev->dmabuf_release(dma_buf); + mutex_lock(&prime->mutex); + if (prime->dma_buf == dma_buf) + prime->dma_buf = NULL; + mutex_unlock(&prime->mutex); + ttm_mem_global_free(tdev->mem_glob, tdev->dma_buf_size); + ttm_base_object_unref(&base); +} + +/** + * ttm_prime_fd_to_handle - Get a base object handle from a prime fd + * + * @tfile: A struct ttm_object_file identifying the caller. + * @fd: The prime / dmabuf fd. + * @handle: The returned handle. + * + * This function returns a handle to an object that previously exported + * a dma-buf. Note that we don't handle imports yet, because we simply + * have no consumers of that implementation. + */ +int ttm_prime_fd_to_handle(struct ttm_object_file *tfile, + int fd, u32 *handle) +{ + struct ttm_object_device *tdev = tfile->tdev; + struct dma_buf *dma_buf; + struct ttm_prime_object *prime; + struct ttm_base_object *base; + int ret; + + dma_buf = dma_buf_get(fd); + if (IS_ERR(dma_buf)) + return PTR_ERR(dma_buf); + + if (dma_buf->ops != &tdev->ops) + return -ENOSYS; + + prime = (struct ttm_prime_object *) dma_buf->priv; + base = &prime->base; + *handle = base->hash.key; + ret = ttm_ref_object_add(tfile, base, TTM_REF_USAGE, NULL); + + dma_buf_put(dma_buf); + + return ret; +} +EXPORT_SYMBOL_GPL(ttm_prime_fd_to_handle); + +/** + * ttm_prime_handle_to_fd - Return a dma_buf fd from a ttm prime object + * + * @tfile: Struct ttm_object_file identifying the caller. + * @handle: Handle to the object we're exporting from. + * @flags: flags for dma-buf creation. We just pass them on. + * @prime_fd: The returned file descriptor. + * + */ +int ttm_prime_handle_to_fd(struct ttm_object_file *tfile, + uint32_t handle, uint32_t flags, + int *prime_fd) +{ + struct ttm_object_device *tdev = tfile->tdev; + struct ttm_base_object *base; + struct dma_buf *dma_buf; + struct ttm_prime_object *prime; + int ret; + + base = ttm_base_object_lookup(tfile, handle); + if (unlikely(base == NULL || + base->object_type != ttm_prime_type)) { + ret = -ENOENT; + goto out_unref; + } + + prime = container_of(base, struct ttm_prime_object, base); + if (unlikely(!base->shareable)) { + ret = -EPERM; + goto out_unref; + } + + ret = mutex_lock_interruptible(&prime->mutex); + if (unlikely(ret != 0)) { + ret = -ERESTARTSYS; + goto out_unref; + } + + dma_buf = prime->dma_buf; + if (!dma_buf || !get_dma_buf_unless_doomed(dma_buf)) { + + /* + * Need to create a new dma_buf, with memory accounting. + */ + ret = ttm_mem_global_alloc(tdev->mem_glob, tdev->dma_buf_size, + false, true); + if (unlikely(ret != 0)) { + mutex_unlock(&prime->mutex); + goto out_unref; + } + + dma_buf = dma_buf_export(prime, &tdev->ops, + prime->size, flags); + if (IS_ERR(dma_buf)) { + ret = PTR_ERR(dma_buf); + ttm_mem_global_free(tdev->mem_glob, + tdev->dma_buf_size); + mutex_unlock(&prime->mutex); + goto out_unref; + } + + /* + * dma_buf has taken the base object reference + */ + base = NULL; + prime->dma_buf = dma_buf; + } + mutex_unlock(&prime->mutex); + + ret = dma_buf_fd(dma_buf, flags); + if (ret >= 0) { + *prime_fd = ret; + ret = 0; + } else + dma_buf_put(dma_buf); + +out_unref: + if (base) + ttm_base_object_unref(&base); + return ret; +} +EXPORT_SYMBOL_GPL(ttm_prime_handle_to_fd); + +/** + * ttm_prime_object_init - Initialize a ttm_prime_object + * + * @tfile: struct ttm_object_file identifying the caller + * @size: The size of the dma_bufs we export. + * @prime: The object to be initialized. + * @shareable: See ttm_base_object_init + * @type: See ttm_base_object_init + * @refcount_release: See ttm_base_object_init + * @ref_obj_release: See ttm_base_object_init + * + * Initializes an object which is compatible with the drm_prime model + * for data sharing between processes and devices. + */ +int ttm_prime_object_init(struct ttm_object_file *tfile, size_t size, + struct ttm_prime_object *prime, bool shareable, + enum ttm_object_type type, + void (*refcount_release) (struct ttm_base_object **), + void (*ref_obj_release) (struct ttm_base_object *, + enum ttm_ref_type ref_type)) +{ + mutex_init(&prime->mutex); + prime->size = PAGE_ALIGN(size); + prime->real_type = type; + prime->dma_buf = NULL; + prime->refcount_release = refcount_release; + return ttm_base_object_init(tfile, &prime->base, shareable, + ttm_prime_type, + ttm_prime_refcount_release, + ref_obj_release); +} +EXPORT_SYMBOL(ttm_prime_object_init); diff --git a/drivers/gpu/drm/vmwgfx/Makefile b/drivers/gpu/drm/vmwgfx/Makefile index 2cc6cd9..9f8b690 100644 --- a/drivers/gpu/drm/vmwgfx/Makefile +++ b/drivers/gpu/drm/vmwgfx/Makefile @@ -6,6 +6,6 @@ vmwgfx-y := vmwgfx_execbuf.o vmwgfx_gmr.o vmwgfx_kms.o vmwgfx_drv.o \ vmwgfx_fifo.o vmwgfx_irq.o vmwgfx_ldu.o vmwgfx_ttm_glue.o \ vmwgfx_overlay.o vmwgfx_marker.o vmwgfx_gmrid_manager.o \ vmwgfx_fence.o vmwgfx_dmabuf.o vmwgfx_scrn.o vmwgfx_context.o \ - vmwgfx_surface.o + vmwgfx_surface.o vmwgfx_prime.o obj-$(CONFIG_DRM_VMWGFX) := vmwgfx.o diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 20d5485..c7a5496 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -677,7 +677,7 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset) } dev_priv->tdev = ttm_object_device_init - (dev_priv->mem_global_ref.object, 12); + (dev_priv->mem_global_ref.object, 12, &vmw_prime_dmabuf_ops); if (unlikely(dev_priv->tdev == NULL)) { DRM_ERROR("Unable to initialize TTM object management.\n"); @@ -1210,7 +1210,7 @@ static const struct file_operations vmwgfx_driver_fops = { static struct drm_driver driver = { .driver_features = DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED | - DRIVER_MODESET, + DRIVER_MODESET | DRIVER_PRIME, .load = vmw_driver_load, .unload = vmw_driver_unload, .lastclose = vmw_lastclose, @@ -1235,6 +1235,9 @@ static struct drm_driver driver = { .dumb_map_offset = vmw_dumb_map_offset, .dumb_destroy = vmw_dumb_destroy, + .prime_fd_to_handle = vmw_prime_fd_to_handle, + .prime_handle_to_fd = vmw_prime_handle_to_fd, + .fops = &vmwgfx_driver_fops, .name = VMWGFX_DRIVER_NAME, .desc = VMWGFX_DRIVER_DESC, diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index e401d5d..db85985 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -819,6 +819,20 @@ int vmw_overlay_num_free_overlays(struct vmw_private *dev_priv); extern const struct ttm_mem_type_manager_func vmw_gmrid_manager_func; /** + * Prime - vmwgfx_prime.c + */ + +extern const struct dma_buf_ops vmw_prime_dmabuf_ops; +extern int vmw_prime_fd_to_handle(struct drm_device *dev, + struct drm_file *file_priv, + int fd, u32 *handle); +extern int vmw_prime_handle_to_fd(struct drm_device *dev, + struct drm_file *file_priv, + uint32_t handle, uint32_t flags, + int *prime_fd); + + +/** * Inline helper functions */ diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_prime.c b/drivers/gpu/drm/vmwgfx/vmwgfx_prime.c new file mode 100644 index 0000000..31fe32d --- /dev/null +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_prime.c @@ -0,0 +1,137 @@ +/************************************************************************** + * + * Copyright © 2013 VMware, Inc., Palo Alto, CA., USA + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ +/* + * Authors: + * Thomas Hellstrom <thellstrom@vmware.com> + * + */ + +#include "vmwgfx_drv.h" +#include <linux/dma-buf.h> +#include <drm/ttm/ttm_object.h> + +/* + * DMA-BUF attach- and mapping methods. No need to implement + * these until we have other virtual devices use them. + */ + +static int vmw_prime_map_attach(struct dma_buf *dma_buf, + struct device *target_dev, + struct dma_buf_attachment *attach) +{ + return -ENOSYS; +} + +static void vmw_prime_map_detach(struct dma_buf *dma_buf, + struct dma_buf_attachment *attach) +{ +} + +static struct sg_table *vmw_prime_map_dma_buf(struct dma_buf_attachment *attach, + enum dma_data_direction dir) +{ + return ERR_PTR(-ENOSYS); +} + +static void vmw_prime_unmap_dma_buf(struct dma_buf_attachment *attach, + struct sg_table *sgb, + enum dma_data_direction dir) +{ +} + +static void *vmw_prime_dmabuf_vmap(struct dma_buf *dma_buf) +{ + return NULL; +} + +static void vmw_prime_dmabuf_vunmap(struct dma_buf *dma_buf, void *vaddr) +{ +} + +static void *vmw_prime_dmabuf_kmap_atomic(struct dma_buf *dma_buf, + unsigned long page_num) +{ + return NULL; +} + +static void vmw_prime_dmabuf_kunmap_atomic(struct dma_buf *dma_buf, + unsigned long page_num, void *addr) +{ + +} +static void *vmw_prime_dmabuf_kmap(struct dma_buf *dma_buf, + unsigned long page_num) +{ + return NULL; +} + +static void vmw_prime_dmabuf_kunmap(struct dma_buf *dma_buf, + unsigned long page_num, void *addr) +{ + +} + +static int vmw_prime_dmabuf_mmap(struct dma_buf *dma_buf, + struct vm_area_struct *vma) +{ + WARN_ONCE(true, "Attempted use of dmabuf mmap. Bad.\n"); + return -ENOSYS; +} + +const struct dma_buf_ops vmw_prime_dmabuf_ops = { + .attach = vmw_prime_map_attach, + .detach = vmw_prime_map_detach, + .map_dma_buf = vmw_prime_map_dma_buf, + .unmap_dma_buf = vmw_prime_unmap_dma_buf, + .release = NULL, + .kmap = vmw_prime_dmabuf_kmap, + .kmap_atomic = vmw_prime_dmabuf_kmap_atomic, + .kunmap = vmw_prime_dmabuf_kunmap, + .kunmap_atomic = vmw_prime_dmabuf_kunmap_atomic, + .mmap = vmw_prime_dmabuf_mmap, + .vmap = vmw_prime_dmabuf_vmap, + .vunmap = vmw_prime_dmabuf_vunmap, +}; + +int vmw_prime_fd_to_handle(struct drm_device *dev, + struct drm_file *file_priv, + int fd, u32 *handle) +{ + struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile; + + return ttm_prime_fd_to_handle(tfile, fd, handle); +} + +int vmw_prime_handle_to_fd(struct drm_device *dev, + struct drm_file *file_priv, + uint32_t handle, uint32_t flags, + int *prime_fd) +{ + struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile; + + return ttm_prime_handle_to_fd(tfile, handle, flags, prime_fd); +} diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c index 252501a..efe2b74 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c @@ -35,7 +35,7 @@ #define VMW_RES_EVICT_ERR_COUNT 10 struct vmw_user_dma_buffer { - struct ttm_base_object base; + struct ttm_prime_object prime; struct vmw_dma_buffer dma; }; @@ -297,7 +297,7 @@ int vmw_user_resource_lookup_handle(struct vmw_private *dev_priv, if (unlikely(base == NULL)) return -EINVAL; - if (unlikely(base->object_type != converter->object_type)) + if (unlikely(ttm_base_object_type(base) != converter->object_type)) goto out_bad_resource; res = converter->base_obj_to_res(base); @@ -387,7 +387,7 @@ static void vmw_user_dmabuf_destroy(struct ttm_buffer_object *bo) { struct vmw_user_dma_buffer *vmw_user_bo = vmw_user_dma_buffer(bo); - ttm_base_object_kfree(vmw_user_bo, base); + ttm_prime_object_kfree(vmw_user_bo, prime); } static void vmw_user_dmabuf_release(struct ttm_base_object **p_base) @@ -401,7 +401,8 @@ static void vmw_user_dmabuf_release(struct ttm_base_object **p_base) if (unlikely(base == NULL)) return; - vmw_user_bo = container_of(base, struct vmw_user_dma_buffer, base); + vmw_user_bo = container_of(base, struct vmw_user_dma_buffer, + prime.base); bo = &vmw_user_bo->dma.base; ttm_bo_unref(&bo); } @@ -442,18 +443,19 @@ int vmw_user_dmabuf_alloc(struct vmw_private *dev_priv, return ret; tmp = ttm_bo_reference(&user_bo->dma.base); - ret = ttm_base_object_init(tfile, - &user_bo->base, - shareable, - ttm_buffer_type, - &vmw_user_dmabuf_release, NULL); + ret = ttm_prime_object_init(tfile, + size, + &user_bo->prime, + shareable, + ttm_buffer_type, + &vmw_user_dmabuf_release, NULL); if (unlikely(ret != 0)) { ttm_bo_unref(&tmp); goto out_no_base_object; } *p_dma_buf = &user_bo->dma; - *handle = user_bo->base.hash.key; + *handle = user_bo->prime.base.hash.key; out_no_base_object: return ret; @@ -475,8 +477,8 @@ int vmw_user_dmabuf_verify_access(struct ttm_buffer_object *bo, return -EPERM; vmw_user_bo = vmw_user_dma_buffer(bo); - return (vmw_user_bo->base.tfile == tfile || - vmw_user_bo->base.shareable) ? 0 : -EPERM; + return (vmw_user_bo->prime.base.tfile == tfile || + vmw_user_bo->prime.base.shareable) ? 0 : -EPERM; } int vmw_dmabuf_alloc_ioctl(struct drm_device *dev, void *data, @@ -538,14 +540,15 @@ int vmw_user_dmabuf_lookup(struct ttm_object_file *tfile, return -ESRCH; } - if (unlikely(base->object_type != ttm_buffer_type)) { + if (unlikely(ttm_base_object_type(base) != ttm_buffer_type)) { ttm_base_object_unref(&base); printk(KERN_ERR "Invalid buffer object handle 0x%08lx.\n", (unsigned long)handle); return -EINVAL; } - vmw_user_bo = container_of(base, struct vmw_user_dma_buffer, base); + vmw_user_bo = container_of(base, struct vmw_user_dma_buffer, + prime.base); (void)ttm_bo_reference(&vmw_user_bo->dma.base); ttm_base_object_unref(&base); *out = &vmw_user_bo->dma; @@ -562,7 +565,8 @@ int vmw_user_dmabuf_reference(struct ttm_object_file *tfile, return -EINVAL; user_bo = container_of(dma_buf, struct vmw_user_dma_buffer, dma); - return ttm_ref_object_add(tfile, &user_bo->base, TTM_REF_USAGE, NULL); + return ttm_ref_object_add(tfile, &user_bo->prime.base, + TTM_REF_USAGE, NULL); } /* @@ -807,15 +811,16 @@ int vmw_dumb_create(struct drm_file *file_priv, goto out_no_dmabuf; tmp = ttm_bo_reference(&vmw_user_bo->dma.base); - ret = ttm_base_object_init(vmw_fpriv(file_priv)->tfile, - &vmw_user_bo->base, - false, - ttm_buffer_type, - &vmw_user_dmabuf_release, NULL); + ret = ttm_prime_object_init(vmw_fpriv(file_priv)->tfile, + args->size, + &vmw_user_bo->prime, + false, + ttm_buffer_type, + &vmw_user_dmabuf_release, NULL); if (unlikely(ret != 0)) goto out_no_base_object; - args->handle = vmw_user_bo->base.hash.key; + args->handle = vmw_user_bo->prime.base.hash.key; out_no_base_object: ttm_bo_unref(&tmp); @@ -994,7 +999,6 @@ void vmw_resource_unreserve(struct vmw_resource *res, */ static int vmw_resource_check_buffer(struct vmw_resource *res, - struct ww_acquire_ctx *ticket, bool interruptible, struct ttm_validate_buffer *val_buf) { @@ -1011,7 +1015,7 @@ vmw_resource_check_buffer(struct vmw_resource *res, INIT_LIST_HEAD(&val_list); val_buf->bo = ttm_bo_reference(&res->backup->base); list_add_tail(&val_buf->head, &val_list); - ret = ttm_eu_reserve_buffers(ticket, &val_list); + ret = ttm_eu_reserve_buffers(NULL, &val_list); if (unlikely(ret != 0)) goto out_no_reserve; @@ -1029,7 +1033,7 @@ vmw_resource_check_buffer(struct vmw_resource *res, return 0; out_no_validate: - ttm_eu_backoff_reservation(ticket, &val_list); + ttm_eu_backoff_reservation(NULL, &val_list); out_no_reserve: ttm_bo_unref(&val_buf->bo); if (backup_dirty) @@ -1074,8 +1078,7 @@ int vmw_resource_reserve(struct vmw_resource *res, bool no_backup) * @val_buf: Backup buffer information. */ static void -vmw_resource_backoff_reservation(struct ww_acquire_ctx *ticket, - struct ttm_validate_buffer *val_buf) +vmw_resource_backoff_reservation(struct ttm_validate_buffer *val_buf) { struct list_head val_list; @@ -1084,7 +1087,7 @@ vmw_resource_backoff_reservation(struct ww_acquire_ctx *ticket, INIT_LIST_HEAD(&val_list); list_add_tail(&val_buf->head, &val_list); - ttm_eu_backoff_reservation(ticket, &val_list); + ttm_eu_backoff_reservation(NULL, &val_list); ttm_bo_unref(&val_buf->bo); } @@ -1099,14 +1102,12 @@ int vmw_resource_do_evict(struct vmw_resource *res, bool interruptible) { struct ttm_validate_buffer val_buf; const struct vmw_res_func *func = res->func; - struct ww_acquire_ctx ticket; int ret; BUG_ON(!func->may_evict); val_buf.bo = NULL; - ret = vmw_resource_check_buffer(res, &ticket, interruptible, - &val_buf); + ret = vmw_resource_check_buffer(res, interruptible, &val_buf); if (unlikely(ret != 0)) return ret; @@ -1121,7 +1122,7 @@ int vmw_resource_do_evict(struct vmw_resource *res, bool interruptible) res->backup_dirty = true; res->res_dirty = false; out_no_unbind: - vmw_resource_backoff_reservation(&ticket, &val_buf); + vmw_resource_backoff_reservation(&val_buf); return ret; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c index 5828143..7de2ea8 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c @@ -38,7 +38,7 @@ * @size: TTM accounting size for the surface. */ struct vmw_user_surface { - struct ttm_base_object base; + struct ttm_prime_object prime; struct vmw_surface srf; uint32_t size; uint32_t backup_handle; @@ -580,7 +580,8 @@ static int vmw_surface_init(struct vmw_private *dev_priv, static struct vmw_resource * vmw_user_surface_base_to_res(struct ttm_base_object *base) { - return &(container_of(base, struct vmw_user_surface, base)->srf.res); + return &(container_of(base, struct vmw_user_surface, + prime.base)->srf.res); } /** @@ -599,7 +600,7 @@ static void vmw_user_surface_free(struct vmw_resource *res) kfree(srf->offsets); kfree(srf->sizes); kfree(srf->snooper.image); - ttm_base_object_kfree(user_srf, base); + ttm_prime_object_kfree(user_srf, prime); ttm_mem_global_free(vmw_mem_glob(dev_priv), size); } @@ -616,7 +617,7 @@ static void vmw_user_surface_base_release(struct ttm_base_object **p_base) { struct ttm_base_object *base = *p_base; struct vmw_user_surface *user_srf = - container_of(base, struct vmw_user_surface, base); + container_of(base, struct vmw_user_surface, prime.base); struct vmw_resource *res = &user_srf->srf.res; *p_base = NULL; @@ -790,8 +791,8 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data, } srf->snooper.crtc = NULL; - user_srf->base.shareable = false; - user_srf->base.tfile = NULL; + user_srf->prime.base.shareable = false; + user_srf->prime.base.tfile = NULL; /** * From this point, the generic resource management functions @@ -803,9 +804,9 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data, goto out_unlock; tmp = vmw_resource_reference(&srf->res); - ret = ttm_base_object_init(tfile, &user_srf->base, - req->shareable, VMW_RES_SURFACE, - &vmw_user_surface_base_release, NULL); + ret = ttm_prime_object_init(tfile, res->backup_size, &user_srf->prime, + req->shareable, VMW_RES_SURFACE, + &vmw_user_surface_base_release, NULL); if (unlikely(ret != 0)) { vmw_resource_unreference(&tmp); @@ -813,7 +814,7 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data, goto out_unlock; } - rep->sid = user_srf->base.hash.key; + rep->sid = user_srf->prime.base.hash.key; vmw_resource_unreference(&res); ttm_read_unlock(&vmaster->lock); @@ -823,7 +824,7 @@ out_no_copy: out_no_offsets: kfree(srf->sizes); out_no_sizes: - ttm_base_object_kfree(user_srf, base); + ttm_prime_object_kfree(user_srf, prime); out_no_user_srf: ttm_mem_global_free(vmw_mem_glob(dev_priv), size); out_unlock: @@ -859,13 +860,14 @@ int vmw_surface_reference_ioctl(struct drm_device *dev, void *data, return -EINVAL; } - if (unlikely(base->object_type != VMW_RES_SURFACE)) + if (unlikely(ttm_base_object_type(base) != VMW_RES_SURFACE)) goto out_bad_resource; - user_srf = container_of(base, struct vmw_user_surface, base); + user_srf = container_of(base, struct vmw_user_surface, prime.base); srf = &user_srf->srf; - ret = ttm_ref_object_add(tfile, &user_srf->base, TTM_REF_USAGE, NULL); + ret = ttm_ref_object_add(tfile, &user_srf->prime.base, + TTM_REF_USAGE, NULL); if (unlikely(ret != 0)) { DRM_ERROR("Could not add a reference to a surface.\n"); goto out_no_reference; diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig index b3ab9d4..52d548f 100644 --- a/drivers/hwmon/Kconfig +++ b/drivers/hwmon/Kconfig @@ -656,6 +656,7 @@ config SENSORS_LM75 - Analog Devices ADT75 - Dallas Semiconductor DS75, DS1775 and DS7505 + - Global Mixed-mode Technology (GMT) G751 - Maxim MAX6625 and MAX6626 - Microchip MCP980x - National Semiconductor LM75, LM75A diff --git a/drivers/hwmon/acpi_power_meter.c b/drivers/hwmon/acpi_power_meter.c index 8d40da3..6a34f7f 100644 --- a/drivers/hwmon/acpi_power_meter.c +++ b/drivers/hwmon/acpi_power_meter.c @@ -602,9 +602,8 @@ static int read_domain_devices(struct acpi_power_meter_resource *resource) /* Create a symlink to domain objects */ resource->domain_devices[i] = NULL; - status = acpi_bus_get_device(element->reference.handle, - &resource->domain_devices[i]); - if (ACPI_FAILURE(status)) + if (acpi_bus_get_device(element->reference.handle, + &resource->domain_devices[i])) continue; obj = resource->domain_devices[i]; diff --git a/drivers/hwmon/lm75.c b/drivers/hwmon/lm75.c index c03b490..7e3ef13 100644 --- a/drivers/hwmon/lm75.c +++ b/drivers/hwmon/lm75.c @@ -39,6 +39,7 @@ enum lm75_type { /* keep sorted in alphabetical order */ ds1775, ds75, ds7505, + g751, lm75, lm75a, max6625, @@ -208,6 +209,7 @@ lm75_probe(struct i2c_client *client, const struct i2c_device_id *id) data->resolution = 12; data->sample_time = HZ / 4; break; + case g751: case lm75: case lm75a: data->resolution = 9; @@ -296,6 +298,7 @@ static const struct i2c_device_id lm75_ids[] = { { "ds1775", ds1775, }, { "ds75", ds75, }, { "ds7505", ds7505, }, + { "g751", g751, }, { "lm75", lm75, }, { "lm75a", lm75a, }, { "max6625", max6625, }, diff --git a/drivers/hwmon/nct6775.c b/drivers/hwmon/nct6775.c index d17325d..cf811c1 100644 --- a/drivers/hwmon/nct6775.c +++ b/drivers/hwmon/nct6775.c @@ -274,6 +274,8 @@ static const u16 NCT6775_FAN_PULSE_SHIFT[] = { 0, 0, 0, 0, 0, 0 }; static const u16 NCT6775_REG_TEMP[] = { 0x27, 0x150, 0x250, 0x62b, 0x62c, 0x62d }; +static const u16 NCT6775_REG_TEMP_MON[] = { 0x73, 0x75, 0x77 }; + static const u16 NCT6775_REG_TEMP_CONFIG[ARRAY_SIZE(NCT6775_REG_TEMP)] = { 0, 0x152, 0x252, 0x628, 0x629, 0x62A }; static const u16 NCT6775_REG_TEMP_HYST[ARRAY_SIZE(NCT6775_REG_TEMP)] = { @@ -454,6 +456,7 @@ static const u16 NCT6779_REG_CRITICAL_PWM[] = { 0x137, 0x237, 0x337, 0x837, 0x937, 0xa37 }; static const u16 NCT6779_REG_TEMP[] = { 0x27, 0x150 }; +static const u16 NCT6779_REG_TEMP_MON[] = { 0x73, 0x75, 0x77, 0x79, 0x7b }; static const u16 NCT6779_REG_TEMP_CONFIG[ARRAY_SIZE(NCT6779_REG_TEMP)] = { 0x18, 0x152 }; static const u16 NCT6779_REG_TEMP_HYST[ARRAY_SIZE(NCT6779_REG_TEMP)] = { @@ -507,6 +510,13 @@ static const u16 NCT6779_REG_TEMP_CRIT[ARRAY_SIZE(nct6779_temp_label) - 1] #define NCT6791_REG_HM_IO_SPACE_LOCK_ENABLE 0x28 +static const u16 NCT6791_REG_WEIGHT_TEMP_SEL[6] = { 0, 0x239 }; +static const u16 NCT6791_REG_WEIGHT_TEMP_STEP[6] = { 0, 0x23a }; +static const u16 NCT6791_REG_WEIGHT_TEMP_STEP_TOL[6] = { 0, 0x23b }; +static const u16 NCT6791_REG_WEIGHT_DUTY_STEP[6] = { 0, 0x23c }; +static const u16 NCT6791_REG_WEIGHT_TEMP_BASE[6] = { 0, 0x23d }; +static const u16 NCT6791_REG_WEIGHT_DUTY_BASE[6] = { 0, 0x23e }; + static const u16 NCT6791_REG_ALARM[NUM_REG_ALARM] = { 0x459, 0x45A, 0x45B, 0x568, 0x45D }; @@ -534,6 +544,7 @@ static const u16 NCT6106_REG_IN[] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x07, 0x08, 0x09 }; static const u16 NCT6106_REG_TEMP[] = { 0x10, 0x11, 0x12, 0x13, 0x14, 0x15 }; +static const u16 NCT6106_REG_TEMP_MON[] = { 0x18, 0x19, 0x1a }; static const u16 NCT6106_REG_TEMP_HYST[] = { 0xc3, 0xc7, 0xcb, 0xcf, 0xd3, 0xd7 }; static const u16 NCT6106_REG_TEMP_OVER[] = { @@ -1307,6 +1318,9 @@ static void nct6775_update_pwm(struct device *dev) if (reg & 0x80) data->pwm[2][i] = 0; + if (!data->REG_WEIGHT_TEMP_SEL[i]) + continue; + reg = nct6775_read_value(data, data->REG_WEIGHT_TEMP_SEL[i]); data->pwm_weight_temp_sel[i] = reg & 0x1f; /* If weight is disabled, report weight source as 0 */ @@ -2852,6 +2866,9 @@ static umode_t nct6775_pwm_is_visible(struct kobject *kobj, if (!(data->has_pwm & (1 << pwm))) return 0; + if ((nr >= 14 && nr <= 18) || nr == 21) /* weight */ + if (!data->REG_WEIGHT_TEMP_SEL[pwm]) + return 0; if (nr == 19 && data->REG_PWM[3] == NULL) /* pwm_max */ return 0; if (nr == 20 && data->REG_PWM[4] == NULL) /* pwm_step */ @@ -2945,11 +2962,11 @@ static struct sensor_device_template *nct6775_attributes_pwm_template[] = { &sensor_dev_template_pwm_step_down_time, &sensor_dev_template_pwm_start, &sensor_dev_template_pwm_floor, - &sensor_dev_template_pwm_weight_temp_sel, + &sensor_dev_template_pwm_weight_temp_sel, /* 14 */ &sensor_dev_template_pwm_weight_temp_step, &sensor_dev_template_pwm_weight_temp_step_tol, &sensor_dev_template_pwm_weight_temp_step_base, - &sensor_dev_template_pwm_weight_duty_step, + &sensor_dev_template_pwm_weight_duty_step, /* 18 */ &sensor_dev_template_pwm_max, /* 19 */ &sensor_dev_template_pwm_step, /* 20 */ &sensor_dev_template_pwm_weight_duty_base, /* 21 */ @@ -3253,9 +3270,9 @@ static int nct6775_probe(struct platform_device *pdev) int i, s, err = 0; int src, mask, available; const u16 *reg_temp, *reg_temp_over, *reg_temp_hyst, *reg_temp_config; - const u16 *reg_temp_alternate, *reg_temp_crit; + const u16 *reg_temp_mon, *reg_temp_alternate, *reg_temp_crit; const u16 *reg_temp_crit_l = NULL, *reg_temp_crit_h = NULL; - int num_reg_temp; + int num_reg_temp, num_reg_temp_mon; u8 cr2a; struct attribute_group *group; struct device *hwmon_dev; @@ -3338,7 +3355,9 @@ static int nct6775_probe(struct platform_device *pdev) data->BEEP_BITS = NCT6106_BEEP_BITS; reg_temp = NCT6106_REG_TEMP; + reg_temp_mon = NCT6106_REG_TEMP_MON; num_reg_temp = ARRAY_SIZE(NCT6106_REG_TEMP); + num_reg_temp_mon = ARRAY_SIZE(NCT6106_REG_TEMP_MON); reg_temp_over = NCT6106_REG_TEMP_OVER; reg_temp_hyst = NCT6106_REG_TEMP_HYST; reg_temp_config = NCT6106_REG_TEMP_CONFIG; @@ -3410,7 +3429,9 @@ static int nct6775_probe(struct platform_device *pdev) data->REG_BEEP = NCT6775_REG_BEEP; reg_temp = NCT6775_REG_TEMP; + reg_temp_mon = NCT6775_REG_TEMP_MON; num_reg_temp = ARRAY_SIZE(NCT6775_REG_TEMP); + num_reg_temp_mon = ARRAY_SIZE(NCT6775_REG_TEMP_MON); reg_temp_over = NCT6775_REG_TEMP_OVER; reg_temp_hyst = NCT6775_REG_TEMP_HYST; reg_temp_config = NCT6775_REG_TEMP_CONFIG; @@ -3480,7 +3501,9 @@ static int nct6775_probe(struct platform_device *pdev) data->REG_BEEP = NCT6776_REG_BEEP; reg_temp = NCT6775_REG_TEMP; + reg_temp_mon = NCT6775_REG_TEMP_MON; num_reg_temp = ARRAY_SIZE(NCT6775_REG_TEMP); + num_reg_temp_mon = ARRAY_SIZE(NCT6775_REG_TEMP_MON); reg_temp_over = NCT6775_REG_TEMP_OVER; reg_temp_hyst = NCT6775_REG_TEMP_HYST; reg_temp_config = NCT6776_REG_TEMP_CONFIG; @@ -3554,7 +3577,9 @@ static int nct6775_probe(struct platform_device *pdev) data->REG_BEEP = NCT6776_REG_BEEP; reg_temp = NCT6779_REG_TEMP; + reg_temp_mon = NCT6779_REG_TEMP_MON; num_reg_temp = ARRAY_SIZE(NCT6779_REG_TEMP); + num_reg_temp_mon = ARRAY_SIZE(NCT6779_REG_TEMP_MON); reg_temp_over = NCT6779_REG_TEMP_OVER; reg_temp_hyst = NCT6779_REG_TEMP_HYST; reg_temp_config = NCT6779_REG_TEMP_CONFIG; @@ -3603,8 +3628,8 @@ static int nct6775_probe(struct platform_device *pdev) data->REG_PWM[0] = NCT6775_REG_PWM; data->REG_PWM[1] = NCT6775_REG_FAN_START_OUTPUT; data->REG_PWM[2] = NCT6775_REG_FAN_STOP_OUTPUT; - data->REG_PWM[5] = NCT6775_REG_WEIGHT_DUTY_STEP; - data->REG_PWM[6] = NCT6776_REG_WEIGHT_DUTY_BASE; + data->REG_PWM[5] = NCT6791_REG_WEIGHT_DUTY_STEP; + data->REG_PWM[6] = NCT6791_REG_WEIGHT_DUTY_BASE; data->REG_PWM_READ = NCT6775_REG_PWM_READ; data->REG_PWM_MODE = NCT6776_REG_PWM_MODE; data->PWM_MODE_MASK = NCT6776_PWM_MODE_MASK; @@ -3620,15 +3645,17 @@ static int nct6775_probe(struct platform_device *pdev) data->REG_TEMP_OFFSET = NCT6779_REG_TEMP_OFFSET; data->REG_TEMP_SOURCE = NCT6775_REG_TEMP_SOURCE; data->REG_TEMP_SEL = NCT6775_REG_TEMP_SEL; - data->REG_WEIGHT_TEMP_SEL = NCT6775_REG_WEIGHT_TEMP_SEL; - data->REG_WEIGHT_TEMP[0] = NCT6775_REG_WEIGHT_TEMP_STEP; - data->REG_WEIGHT_TEMP[1] = NCT6775_REG_WEIGHT_TEMP_STEP_TOL; - data->REG_WEIGHT_TEMP[2] = NCT6775_REG_WEIGHT_TEMP_BASE; + data->REG_WEIGHT_TEMP_SEL = NCT6791_REG_WEIGHT_TEMP_SEL; + data->REG_WEIGHT_TEMP[0] = NCT6791_REG_WEIGHT_TEMP_STEP; + data->REG_WEIGHT_TEMP[1] = NCT6791_REG_WEIGHT_TEMP_STEP_TOL; + data->REG_WEIGHT_TEMP[2] = NCT6791_REG_WEIGHT_TEMP_BASE; data->REG_ALARM = NCT6791_REG_ALARM; data->REG_BEEP = NCT6776_REG_BEEP; reg_temp = NCT6779_REG_TEMP; + reg_temp_mon = NCT6779_REG_TEMP_MON; num_reg_temp = ARRAY_SIZE(NCT6779_REG_TEMP); + num_reg_temp_mon = ARRAY_SIZE(NCT6779_REG_TEMP_MON); reg_temp_over = NCT6779_REG_TEMP_OVER; reg_temp_hyst = NCT6779_REG_TEMP_HYST; reg_temp_config = NCT6779_REG_TEMP_CONFIG; @@ -3729,6 +3756,50 @@ static int nct6775_probe(struct platform_device *pdev) s++; } + /* + * Repeat with temperatures used for fan control. + * This set of registers does not support limits. + */ + for (i = 0; i < num_reg_temp_mon; i++) { + if (reg_temp_mon[i] == 0) + continue; + + src = nct6775_read_value(data, data->REG_TEMP_SEL[i]) & 0x1f; + if (!src || (mask & (1 << src))) + continue; + + if (src >= data->temp_label_num || + !strlen(data->temp_label[src])) { + dev_info(dev, + "Invalid temperature source %d at index %d, source register 0x%x, temp register 0x%x\n", + src, i, data->REG_TEMP_SEL[i], + reg_temp_mon[i]); + continue; + } + + mask |= 1 << src; + + /* Use fixed index for SYSTIN(1), CPUTIN(2), AUXTIN(3) */ + if (src <= data->temp_fixed_num) { + if (data->have_temp & (1 << (src - 1))) + continue; + data->have_temp |= 1 << (src - 1); + data->have_temp_fixed |= 1 << (src - 1); + data->reg_temp[0][src - 1] = reg_temp_mon[i]; + data->temp_src[src - 1] = src; + continue; + } + + if (s >= NUM_TEMP) + continue; + + /* Use dynamic index for other sources */ + data->have_temp |= 1 << s; + data->reg_temp[0][s] = reg_temp_mon[i]; + data->temp_src[s] = src; + s++; + } + #ifdef USE_ALTERNATE /* * Go through the list of alternate temp registers and enable diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 6df2350..6be57c3 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -22,6 +22,7 @@ #include <linux/socket.h> #include <linux/in.h> #include <linux/in6.h> +#include <linux/llist.h> #include <rdma/ib_verbs.h> #include <rdma/rdma_cm.h> #include <target/target_core_base.h> @@ -489,6 +490,7 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) kref_init(&isert_conn->conn_kref); kref_get(&isert_conn->conn_kref); mutex_init(&isert_conn->conn_mutex); + mutex_init(&isert_conn->conn_comp_mutex); spin_lock_init(&isert_conn->conn_lock); cma_id->context = isert_conn; @@ -843,14 +845,32 @@ isert_init_tx_hdrs(struct isert_conn *isert_conn, } static void -isert_init_send_wr(struct isert_cmd *isert_cmd, struct ib_send_wr *send_wr) +isert_init_send_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd, + struct ib_send_wr *send_wr, bool coalesce) { + struct iser_tx_desc *tx_desc = &isert_cmd->tx_desc; + isert_cmd->rdma_wr.iser_ib_op = ISER_IB_SEND; send_wr->wr_id = (unsigned long)&isert_cmd->tx_desc; send_wr->opcode = IB_WR_SEND; - send_wr->send_flags = IB_SEND_SIGNALED; - send_wr->sg_list = &isert_cmd->tx_desc.tx_sg[0]; + send_wr->sg_list = &tx_desc->tx_sg[0]; send_wr->num_sge = isert_cmd->tx_desc.num_sge; + /* + * Coalesce send completion interrupts by only setting IB_SEND_SIGNALED + * bit for every ISERT_COMP_BATCH_COUNT number of ib_post_send() calls. + */ + mutex_lock(&isert_conn->conn_comp_mutex); + if (coalesce && + ++isert_conn->conn_comp_batch < ISERT_COMP_BATCH_COUNT) { + llist_add(&tx_desc->comp_llnode, &isert_conn->conn_comp_llist); + mutex_unlock(&isert_conn->conn_comp_mutex); + return; + } + isert_conn->conn_comp_batch = 0; + tx_desc->comp_llnode_batch = llist_del_all(&isert_conn->conn_comp_llist); + mutex_unlock(&isert_conn->conn_comp_mutex); + + send_wr->send_flags = IB_SEND_SIGNALED; } static int @@ -1582,8 +1602,8 @@ isert_response_completion(struct iser_tx_desc *tx_desc, } static void -isert_send_completion(struct iser_tx_desc *tx_desc, - struct isert_conn *isert_conn) +__isert_send_completion(struct iser_tx_desc *tx_desc, + struct isert_conn *isert_conn) { struct ib_device *ib_dev = isert_conn->conn_cm_id->device; struct isert_cmd *isert_cmd = tx_desc->isert_cmd; @@ -1624,6 +1644,24 @@ isert_send_completion(struct iser_tx_desc *tx_desc, } static void +isert_send_completion(struct iser_tx_desc *tx_desc, + struct isert_conn *isert_conn) +{ + struct llist_node *llnode = tx_desc->comp_llnode_batch; + struct iser_tx_desc *t; + /* + * Drain coalesced completion llist starting from comp_llnode_batch + * setup in isert_init_send_wr(), and then complete trailing tx_desc. + */ + while (llnode) { + t = llist_entry(llnode, struct iser_tx_desc, comp_llnode); + llnode = llist_next(llnode); + __isert_send_completion(t, isert_conn); + } + __isert_send_completion(tx_desc, isert_conn); +} + +static void isert_cq_comp_err(struct iser_tx_desc *tx_desc, struct isert_conn *isert_conn) { struct ib_device *ib_dev = isert_conn->conn_cm_id->device; @@ -1793,7 +1831,7 @@ isert_put_response(struct iscsi_conn *conn, struct iscsi_cmd *cmd) isert_cmd->tx_desc.num_sge = 2; } - isert_init_send_wr(isert_cmd, send_wr); + isert_init_send_wr(isert_conn, isert_cmd, send_wr, true); pr_debug("Posting SCSI Response IB_WR_SEND >>>>>>>>>>>>>>>>>>>>>>\n"); @@ -1813,7 +1851,7 @@ isert_put_nopin(struct iscsi_cmd *cmd, struct iscsi_conn *conn, &isert_cmd->tx_desc.iscsi_header, nopout_response); isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc); - isert_init_send_wr(isert_cmd, send_wr); + isert_init_send_wr(isert_conn, isert_cmd, send_wr, false); pr_debug("Posting NOPIN Response IB_WR_SEND >>>>>>>>>>>>>>>>>>>>>>\n"); @@ -1831,7 +1869,7 @@ isert_put_logout_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn) iscsit_build_logout_rsp(cmd, conn, (struct iscsi_logout_rsp *) &isert_cmd->tx_desc.iscsi_header); isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc); - isert_init_send_wr(isert_cmd, send_wr); + isert_init_send_wr(isert_conn, isert_cmd, send_wr, false); pr_debug("Posting Logout Response IB_WR_SEND >>>>>>>>>>>>>>>>>>>>>>\n"); @@ -1849,7 +1887,7 @@ isert_put_tm_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn) iscsit_build_task_mgt_rsp(cmd, conn, (struct iscsi_tm_rsp *) &isert_cmd->tx_desc.iscsi_header); isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc); - isert_init_send_wr(isert_cmd, send_wr); + isert_init_send_wr(isert_conn, isert_cmd, send_wr, false); pr_debug("Posting Task Management Response IB_WR_SEND >>>>>>>>>>>>>>>>>>>>>>\n"); @@ -1881,7 +1919,7 @@ isert_put_reject(struct iscsi_cmd *cmd, struct iscsi_conn *conn) tx_dsg->lkey = isert_conn->conn_mr->lkey; isert_cmd->tx_desc.num_sge = 2; - isert_init_send_wr(isert_cmd, send_wr); + isert_init_send_wr(isert_conn, isert_cmd, send_wr, false); pr_debug("Posting Reject IB_WR_SEND >>>>>>>>>>>>>>>>>>>>>>\n"); @@ -1921,7 +1959,7 @@ isert_put_text_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn) tx_dsg->lkey = isert_conn->conn_mr->lkey; isert_cmd->tx_desc.num_sge = 2; } - isert_init_send_wr(isert_cmd, send_wr); + isert_init_send_wr(isert_conn, isert_cmd, send_wr, false); pr_debug("Posting Text Response IB_WR_SEND >>>>>>>>>>>>>>>>>>>>>>\n"); @@ -1991,8 +2029,6 @@ isert_map_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd, if (wr->iser_ib_op == ISER_IB_RDMA_WRITE) { data_left = se_cmd->data_length; - iscsit_increment_maxcmdsn(cmd, conn->sess); - cmd->stat_sn = conn->stat_sn++; } else { sg_off = cmd->write_data_done / PAGE_SIZE; data_left = se_cmd->data_length - cmd->write_data_done; @@ -2204,8 +2240,6 @@ isert_reg_rdma_frwr(struct iscsi_conn *conn, struct iscsi_cmd *cmd, if (wr->iser_ib_op == ISER_IB_RDMA_WRITE) { data_left = se_cmd->data_length; - iscsit_increment_maxcmdsn(cmd, conn->sess); - cmd->stat_sn = conn->stat_sn++; } else { sg_off = cmd->write_data_done / PAGE_SIZE; data_left = se_cmd->data_length - cmd->write_data_done; @@ -2259,18 +2293,26 @@ isert_reg_rdma_frwr(struct iscsi_conn *conn, struct iscsi_cmd *cmd, data_len = min(data_left, rdma_write_max); wr->cur_rdma_length = data_len; - spin_lock_irqsave(&isert_conn->conn_lock, flags); - fr_desc = list_first_entry(&isert_conn->conn_frwr_pool, - struct fast_reg_descriptor, list); - list_del(&fr_desc->list); - spin_unlock_irqrestore(&isert_conn->conn_lock, flags); - wr->fr_desc = fr_desc; + /* if there is a single dma entry, dma mr is sufficient */ + if (count == 1) { + ib_sge->addr = ib_sg_dma_address(ib_dev, &sg_start[0]); + ib_sge->length = ib_sg_dma_len(ib_dev, &sg_start[0]); + ib_sge->lkey = isert_conn->conn_mr->lkey; + wr->fr_desc = NULL; + } else { + spin_lock_irqsave(&isert_conn->conn_lock, flags); + fr_desc = list_first_entry(&isert_conn->conn_frwr_pool, + struct fast_reg_descriptor, list); + list_del(&fr_desc->list); + spin_unlock_irqrestore(&isert_conn->conn_lock, flags); + wr->fr_desc = fr_desc; - ret = isert_fast_reg_mr(fr_desc, isert_cmd, isert_conn, - ib_sge, offset, data_len); - if (ret) { - list_add_tail(&fr_desc->list, &isert_conn->conn_frwr_pool); - goto unmap_sg; + ret = isert_fast_reg_mr(fr_desc, isert_cmd, isert_conn, + ib_sge, offset, data_len); + if (ret) { + list_add_tail(&fr_desc->list, &isert_conn->conn_frwr_pool); + goto unmap_sg; + } } return 0; @@ -2306,10 +2348,11 @@ isert_put_datain(struct iscsi_conn *conn, struct iscsi_cmd *cmd) * Build isert_conn->tx_desc for iSCSI response PDU and attach */ isert_create_send_desc(isert_conn, isert_cmd, &isert_cmd->tx_desc); - iscsit_build_rsp_pdu(cmd, conn, false, (struct iscsi_scsi_rsp *) + iscsit_build_rsp_pdu(cmd, conn, true, (struct iscsi_scsi_rsp *) &isert_cmd->tx_desc.iscsi_header); isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc); - isert_init_send_wr(isert_cmd, &isert_cmd->tx_desc.send_wr); + isert_init_send_wr(isert_conn, isert_cmd, + &isert_cmd->tx_desc.send_wr, true); atomic_inc(&isert_conn->post_send_buf_count); diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h index 631f209..691f90f 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.h +++ b/drivers/infiniband/ulp/isert/ib_isert.h @@ -43,6 +43,8 @@ struct iser_tx_desc { struct ib_sge tx_sg[2]; int num_sge; struct isert_cmd *isert_cmd; + struct llist_node *comp_llnode_batch; + struct llist_node comp_llnode; struct ib_send_wr send_wr; } __packed; @@ -121,6 +123,10 @@ struct isert_conn { int conn_frwr_pool_size; /* lock to protect frwr_pool */ spinlock_t conn_lock; +#define ISERT_COMP_BATCH_COUNT 8 + int conn_comp_batch; + struct llist_head conn_comp_llist; + struct mutex conn_comp_mutex; }; #define ISERT_MAX_CQ 64 diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 6c923c7..520a7e5 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -1352,11 +1352,8 @@ static int srpt_abort_cmd(struct srpt_send_ioctx *ioctx) /* XXX(hch): this is a horrible layering violation.. */ spin_lock_irqsave(&ioctx->cmd.t_state_lock, flags); - ioctx->cmd.transport_state |= CMD_T_LUN_STOP; ioctx->cmd.transport_state &= ~CMD_T_ACTIVE; spin_unlock_irqrestore(&ioctx->cmd.t_state_lock, flags); - - complete(&ioctx->cmd.transport_lun_stop_comp); break; case SRPT_STATE_CMD_RSP_SENT: /* @@ -1364,9 +1361,6 @@ static int srpt_abort_cmd(struct srpt_send_ioctx *ioctx) * not been received in time. */ srpt_unmap_sg_to_ib_sge(ioctx->ch, ioctx); - spin_lock_irqsave(&ioctx->cmd.t_state_lock, flags); - ioctx->cmd.transport_state |= CMD_T_LUN_STOP; - spin_unlock_irqrestore(&ioctx->cmd.t_state_lock, flags); target_put_sess_cmd(ioctx->ch->sess, &ioctx->cmd); break; case SRPT_STATE_MGMT_RSP_SENT: @@ -1476,7 +1470,6 @@ static void srpt_handle_rdma_err_comp(struct srpt_rdma_ch *ch, { struct se_cmd *cmd; enum srpt_command_state state; - unsigned long flags; cmd = &ioctx->cmd; state = srpt_get_cmd_state(ioctx); @@ -1496,9 +1489,6 @@ static void srpt_handle_rdma_err_comp(struct srpt_rdma_ch *ch, __func__, __LINE__, state); break; case SRPT_RDMA_WRITE_LAST: - spin_lock_irqsave(&ioctx->cmd.t_state_lock, flags); - ioctx->cmd.transport_state |= CMD_T_LUN_STOP; - spin_unlock_irqrestore(&ioctx->cmd.t_state_lock, flags); break; default: printk(KERN_ERR "%s[%d]: opcode = %u\n", __func__, diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c index e47dcb9..5cefb47 100644 --- a/drivers/isdn/mISDN/socket.c +++ b/drivers/isdn/mISDN/socket.c @@ -117,7 +117,6 @@ mISDN_sock_recvmsg(struct kiocb *iocb, struct socket *sock, { struct sk_buff *skb; struct sock *sk = sock->sk; - struct sockaddr_mISDN *maddr; int copied, err; @@ -135,9 +134,9 @@ mISDN_sock_recvmsg(struct kiocb *iocb, struct socket *sock, if (!skb) return err; - if (msg->msg_namelen >= sizeof(struct sockaddr_mISDN)) { - msg->msg_namelen = sizeof(struct sockaddr_mISDN); - maddr = (struct sockaddr_mISDN *)msg->msg_name; + if (msg->msg_name) { + struct sockaddr_mISDN *maddr = msg->msg_name; + maddr->family = AF_ISDN; maddr->dev = _pms(sk)->dev->id; if ((sk->sk_protocol == ISDN_P_LAPD_TE) || @@ -150,11 +149,7 @@ mISDN_sock_recvmsg(struct kiocb *iocb, struct socket *sock, maddr->sapi = _pms(sk)->ch.addr & 0xFF; maddr->tei = (_pms(sk)->ch.addr >> 8) & 0xFF; } - } else { - if (msg->msg_namelen) - printk(KERN_WARNING "%s: too small namelen %d\n", - __func__, msg->msg_namelen); - msg->msg_namelen = 0; + msg->msg_namelen = sizeof(*maddr); } copied = skb->len + MISDN_HEADER_LEN; diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 74630e9..d6447b3 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -697,7 +697,7 @@ static int genphy_config_advert(struct phy_device *phydev) * to the values in phydev. Assumes that the values are valid. * Please see phy_sanitize_settings(). */ -static int genphy_setup_forced(struct phy_device *phydev) +int genphy_setup_forced(struct phy_device *phydev) { int err; int ctl = 0; @@ -716,7 +716,7 @@ static int genphy_setup_forced(struct phy_device *phydev) return err; } - +EXPORT_SYMBOL(genphy_setup_forced); /** * genphy_restart_aneg - Enable and Restart Autonegotiation diff --git a/drivers/net/phy/vitesse.c b/drivers/net/phy/vitesse.c index 69b482b..508e435 100644 --- a/drivers/net/phy/vitesse.c +++ b/drivers/net/phy/vitesse.c @@ -3,7 +3,7 @@ * * Author: Kriston Carson * - * Copyright (c) 2005, 2009 Freescale Semiconductor, Inc. + * Copyright (c) 2005, 2009, 2011 Freescale Semiconductor, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -18,6 +18,11 @@ #include <linux/ethtool.h> #include <linux/phy.h> +/* Vitesse Extended Page Magic Register(s) */ +#define MII_VSC82X4_EXT_PAGE_16E 0x10 +#define MII_VSC82X4_EXT_PAGE_17E 0x11 +#define MII_VSC82X4_EXT_PAGE_18E 0x12 + /* Vitesse Extended Control Register 1 */ #define MII_VSC8244_EXT_CON1 0x17 #define MII_VSC8244_EXTCON1_INIT 0x0000 @@ -54,7 +59,13 @@ #define MII_VSC8221_AUXCONSTAT_INIT 0x0004 /* need to set this bit? */ #define MII_VSC8221_AUXCONSTAT_RESERVED 0x0004 +/* Vitesse Extended Page Access Register */ +#define MII_VSC82X4_EXT_PAGE_ACCESS 0x1f + +#define PHY_ID_VSC8234 0x000fc620 #define PHY_ID_VSC8244 0x000fc6c0 +#define PHY_ID_VSC8574 0x000704a0 +#define PHY_ID_VSC8662 0x00070660 #define PHY_ID_VSC8221 0x000fc550 #define PHY_ID_VSC8211 0x000fc4b0 @@ -118,7 +129,9 @@ static int vsc82xx_config_intr(struct phy_device *phydev) if (phydev->interrupts == PHY_INTERRUPT_ENABLED) err = phy_write(phydev, MII_VSC8244_IMASK, - phydev->drv->phy_id == PHY_ID_VSC8244 ? + (phydev->drv->phy_id == PHY_ID_VSC8234 || + phydev->drv->phy_id == PHY_ID_VSC8244 || + phydev->drv->phy_id == PHY_ID_VSC8574) ? MII_VSC8244_IMASK_MASK : MII_VSC8221_IMASK_MASK); else { @@ -149,21 +162,114 @@ static int vsc8221_config_init(struct phy_device *phydev) */ } -/* Vitesse 824x */ +/* vsc82x4_config_autocross_enable - Enable auto MDI/MDI-X for forced links + * @phydev: target phy_device struct + * + * Enable auto MDI/MDI-X when in 10/100 forced link speeds by writing + * special values in the VSC8234/VSC8244 extended reserved registers + */ +static int vsc82x4_config_autocross_enable(struct phy_device *phydev) +{ + int ret; + + if (phydev->autoneg == AUTONEG_ENABLE || phydev->speed > SPEED_100) + return 0; + + /* map extended registers set 0x10 - 0x1e */ + ret = phy_write(phydev, MII_VSC82X4_EXT_PAGE_ACCESS, 0x52b5); + if (ret >= 0) + ret = phy_write(phydev, MII_VSC82X4_EXT_PAGE_18E, 0x0012); + if (ret >= 0) + ret = phy_write(phydev, MII_VSC82X4_EXT_PAGE_17E, 0x2803); + if (ret >= 0) + ret = phy_write(phydev, MII_VSC82X4_EXT_PAGE_16E, 0x87fa); + /* map standard registers set 0x10 - 0x1e */ + if (ret >= 0) + ret = phy_write(phydev, MII_VSC82X4_EXT_PAGE_ACCESS, 0x0000); + else + phy_write(phydev, MII_VSC82X4_EXT_PAGE_ACCESS, 0x0000); + + return ret; +} + +/* vsc82x4_config_aneg - restart auto-negotiation or write BMCR + * @phydev: target phy_device struct + * + * Description: If auto-negotiation is enabled, we configure the + * advertising, and then restart auto-negotiation. If it is not + * enabled, then we write the BMCR and also start the auto + * MDI/MDI-X feature + */ +static int vsc82x4_config_aneg(struct phy_device *phydev) +{ + int ret; + + /* Enable auto MDI/MDI-X when in 10/100 forced link speeds by + * writing special values in the VSC8234 extended reserved registers + */ + if (phydev->autoneg != AUTONEG_ENABLE && phydev->speed <= SPEED_100) { + ret = genphy_setup_forced(phydev); + + if (ret < 0) /* error */ + return ret; + + return vsc82x4_config_autocross_enable(phydev); + } + + return genphy_config_aneg(phydev); +} + +/* Vitesse 82xx */ static struct phy_driver vsc82xx_driver[] = { { + .phy_id = PHY_ID_VSC8234, + .name = "Vitesse VSC8234", + .phy_id_mask = 0x000ffff0, + .features = PHY_GBIT_FEATURES, + .flags = PHY_HAS_INTERRUPT, + .config_init = &vsc824x_config_init, + .config_aneg = &vsc82x4_config_aneg, + .read_status = &genphy_read_status, + .ack_interrupt = &vsc824x_ack_interrupt, + .config_intr = &vsc82xx_config_intr, + .driver = { .owner = THIS_MODULE,}, +}, { .phy_id = PHY_ID_VSC8244, .name = "Vitesse VSC8244", .phy_id_mask = 0x000fffc0, .features = PHY_GBIT_FEATURES, .flags = PHY_HAS_INTERRUPT, .config_init = &vsc824x_config_init, - .config_aneg = &genphy_config_aneg, + .config_aneg = &vsc82x4_config_aneg, .read_status = &genphy_read_status, .ack_interrupt = &vsc824x_ack_interrupt, .config_intr = &vsc82xx_config_intr, .driver = { .owner = THIS_MODULE,}, }, { + .phy_id = PHY_ID_VSC8574, + .name = "Vitesse VSC8574", + .phy_id_mask = 0x000ffff0, + .features = PHY_GBIT_FEATURES, + .flags = PHY_HAS_INTERRUPT, + .config_init = &vsc824x_config_init, + .config_aneg = &vsc82x4_config_aneg, + .read_status = &genphy_read_status, + .ack_interrupt = &vsc824x_ack_interrupt, + .config_intr = &vsc82xx_config_intr, + .driver = { .owner = THIS_MODULE,}, +}, { + .phy_id = PHY_ID_VSC8662, + .name = "Vitesse VSC8662", + .phy_id_mask = 0x000ffff0, + .features = PHY_GBIT_FEATURES, + .flags = PHY_HAS_INTERRUPT, + .config_init = &vsc824x_config_init, + .config_aneg = &vsc82x4_config_aneg, + .read_status = &genphy_read_status, + .ack_interrupt = &vsc824x_ack_interrupt, + .config_intr = &vsc82xx_config_intr, + .driver = { .owner = THIS_MODULE,}, +}, { /* Vitesse 8221 */ .phy_id = PHY_ID_VSC8221, .phy_id_mask = 0x000ffff0, @@ -207,7 +313,10 @@ module_init(vsc82xx_init); module_exit(vsc82xx_exit); static struct mdio_device_id __maybe_unused vitesse_tbl[] = { + { PHY_ID_VSC8234, 0x000ffff0 }, { PHY_ID_VSC8244, 0x000fffc0 }, + { PHY_ID_VSC8574, 0x000ffff0 }, + { PHY_ID_VSC8662, 0x000ffff0 }, { PHY_ID_VSC8221, 0x000ffff0 }, { PHY_ID_VSC8211, 0x000ffff0 }, { } diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c index 5f66e30..82ee6ed 100644 --- a/drivers/net/ppp/pppoe.c +++ b/drivers/net/ppp/pppoe.c @@ -979,8 +979,6 @@ static int pppoe_recvmsg(struct kiocb *iocb, struct socket *sock, if (error < 0) goto end; - m->msg_namelen = 0; - if (skb) { total_len = min_t(size_t, total_len, skb->len); error = skb_copy_datagram_iovec(skb, 0, m->msg_iov, total_len); diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index f3fce41..5107372 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -24,7 +24,7 @@ #include <linux/ipv6.h> /* Version Information */ -#define DRIVER_VERSION "v1.01.0 (2013/08/12)" +#define DRIVER_VERSION "v1.02.0 (2013/10/28)" #define DRIVER_AUTHOR "Realtek linux nic maintainers <nic_swsd@realtek.com>" #define DRIVER_DESC "Realtek RTL8152 Based USB 2.0 Ethernet Adapters" #define MODULENAME "r8152" @@ -307,22 +307,22 @@ enum rtl8152_flags { #define MCU_TYPE_USB 0x0000 struct rx_desc { - u32 opts1; + __le32 opts1; #define RX_LEN_MASK 0x7fff - u32 opts2; - u32 opts3; - u32 opts4; - u32 opts5; - u32 opts6; + __le32 opts2; + __le32 opts3; + __le32 opts4; + __le32 opts5; + __le32 opts6; }; struct tx_desc { - u32 opts1; + __le32 opts1; #define TX_FS (1 << 31) /* First segment of a packet */ #define TX_LS (1 << 30) /* Final segment of a packet */ #define TX_LEN_MASK 0x3ffff - u32 opts2; + __le32 opts2; #define UDP_CS (1 << 31) /* Calculate UDP/IP checksum */ #define TCP_CS (1 << 30) /* Calculate TCP/IP checksum */ #define IPV4_CS (1 << 29) /* Calculate IPv4 checksum */ @@ -365,6 +365,7 @@ struct r8152 { struct mii_if_info mii; int intr_interval; u32 msg_enable; + u32 tx_qlen; u16 ocp_base; u8 *intr_buff; u8 version; @@ -876,7 +877,7 @@ static void write_bulk_callback(struct urb *urb) static void intr_callback(struct urb *urb) { struct r8152 *tp; - __u16 *d; + __le16 *d; int status = urb->status; int res; @@ -1136,14 +1137,14 @@ r8152_tx_csum(struct r8152 *tp, struct tx_desc *desc, struct sk_buff *skb) static int r8152_tx_agg_fill(struct r8152 *tp, struct tx_agg *agg) { - u32 remain; + int remain; u8 *tx_data; tx_data = agg->head; agg->skb_num = agg->skb_len = 0; - remain = rx_buf_sz - sizeof(struct tx_desc); + remain = rx_buf_sz; - while (remain >= ETH_ZLEN) { + while (remain >= ETH_ZLEN + sizeof(struct tx_desc)) { struct tx_desc *tx_desc; struct sk_buff *skb; unsigned int len; @@ -1152,12 +1153,14 @@ static int r8152_tx_agg_fill(struct r8152 *tp, struct tx_agg *agg) if (!skb) break; + remain -= sizeof(*tx_desc); len = skb->len; if (remain < len) { skb_queue_head(&tp->tx_queue, skb); break; } + tx_data = tx_agg_align(tx_data); tx_desc = (struct tx_desc *)tx_data; tx_data += sizeof(*tx_desc); @@ -1167,11 +1170,18 @@ static int r8152_tx_agg_fill(struct r8152 *tp, struct tx_agg *agg) agg->skb_len += len; dev_kfree_skb_any(skb); - tx_data = tx_agg_align(tx_data + len); - remain = rx_buf_sz - sizeof(*tx_desc) - - (u32)((void *)tx_data - agg->head); + tx_data += len; + remain = rx_buf_sz - (int)(tx_agg_align(tx_data) - agg->head); } + netif_tx_lock(tp->netdev); + + if (netif_queue_stopped(tp->netdev) && + skb_queue_len(&tp->tx_queue) < tp->tx_qlen) + netif_wake_queue(tp->netdev); + + netif_tx_unlock(tp->netdev); + usb_fill_bulk_urb(agg->urb, tp->udev, usb_sndbulkpipe(tp->udev, 2), agg->head, (int)(tx_data - (u8 *)agg->head), (usb_complete_t)write_bulk_callback, agg); @@ -1188,7 +1198,6 @@ static void rx_bottom(struct r8152 *tp) list_for_each_safe(cursor, next, &tp->rx_done) { struct rx_desc *rx_desc; struct rx_agg *agg; - unsigned pkt_len; int len_used = 0; struct urb *urb; u8 *rx_data; @@ -1204,17 +1213,22 @@ static void rx_bottom(struct r8152 *tp) rx_desc = agg->head; rx_data = agg->head; - pkt_len = le32_to_cpu(rx_desc->opts1) & RX_LEN_MASK; - len_used += sizeof(struct rx_desc) + pkt_len; + len_used += sizeof(struct rx_desc); - while (urb->actual_length >= len_used) { + while (urb->actual_length > len_used) { struct net_device *netdev = tp->netdev; struct net_device_stats *stats; + unsigned int pkt_len; struct sk_buff *skb; + pkt_len = le32_to_cpu(rx_desc->opts1) & RX_LEN_MASK; if (pkt_len < ETH_ZLEN) break; + len_used += pkt_len; + if (urb->actual_length < len_used) + break; + stats = rtl8152_get_stats(netdev); pkt_len -= 4; /* CRC */ @@ -1234,9 +1248,8 @@ static void rx_bottom(struct r8152 *tp) rx_data = rx_agg_align(rx_data + pkt_len + 4); rx_desc = (struct rx_desc *)rx_data; - pkt_len = le32_to_cpu(rx_desc->opts1) & RX_LEN_MASK; len_used = (int)(rx_data - (u8 *)agg->head); - len_used += sizeof(struct rx_desc) + pkt_len; + len_used += sizeof(struct rx_desc); } submit: @@ -1384,53 +1397,17 @@ static netdev_tx_t rtl8152_start_xmit(struct sk_buff *skb, struct net_device *netdev) { struct r8152 *tp = netdev_priv(netdev); - struct net_device_stats *stats = rtl8152_get_stats(netdev); - unsigned long flags; - struct tx_agg *agg = NULL; - struct tx_desc *tx_desc; - unsigned int len; - u8 *tx_data; - int res; skb_tx_timestamp(skb); - /* If tx_queue is not empty, it means at least one previous packt */ - /* is waiting for sending. Don't send current one before it. */ - if (skb_queue_empty(&tp->tx_queue)) - agg = r8152_get_tx_agg(tp); - - if (!agg) { - skb_queue_tail(&tp->tx_queue, skb); - return NETDEV_TX_OK; - } + skb_queue_tail(&tp->tx_queue, skb); - tx_desc = (struct tx_desc *)agg->head; - tx_data = agg->head + sizeof(*tx_desc); - agg->skb_num = agg->skb_len = 0; + if (list_empty(&tp->tx_free) && + skb_queue_len(&tp->tx_queue) > tp->tx_qlen) + netif_stop_queue(netdev); - len = skb->len; - r8152_tx_csum(tp, tx_desc, skb); - memcpy(tx_data, skb->data, len); - dev_kfree_skb_any(skb); - agg->skb_num++; - agg->skb_len += len; - usb_fill_bulk_urb(agg->urb, tp->udev, usb_sndbulkpipe(tp->udev, 2), - agg->head, len + sizeof(*tx_desc), - (usb_complete_t)write_bulk_callback, agg); - res = usb_submit_urb(agg->urb, GFP_ATOMIC); - if (res) { - /* Can we get/handle EPIPE here? */ - if (res == -ENODEV) { - netif_device_detach(tp->netdev); - } else { - netif_warn(tp, tx_err, netdev, - "failed tx_urb %d\n", res); - stats->tx_dropped++; - spin_lock_irqsave(&tp->tx_lock, flags); - list_add_tail(&agg->list, &tp->tx_free); - spin_unlock_irqrestore(&tp->tx_lock, flags); - } - } + if (!list_empty(&tp->tx_free)) + tasklet_schedule(&tp->tl); return NETDEV_TX_OK; } @@ -1459,6 +1436,14 @@ static void rtl8152_nic_reset(struct r8152 *tp) } } +static void set_tx_qlen(struct r8152 *tp) +{ + struct net_device *netdev = tp->netdev; + + tp->tx_qlen = rx_buf_sz / (netdev->mtu + VLAN_ETH_HLEN + VLAN_HLEN + + sizeof(struct tx_desc)); +} + static inline u8 rtl8152_get_speed(struct r8152 *tp) { return ocp_read_byte(tp, MCU_TYPE_PLA, PLA_PHYSTATUS); @@ -1470,6 +1455,7 @@ static int rtl8152_enable(struct r8152 *tp) int i, ret; u8 speed; + set_tx_qlen(tp); speed = rtl8152_get_speed(tp); if (speed & _10bps) { ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_EEEP_CR); diff --git a/drivers/net/wireless/ath/ath9k/ar9003_phy.c b/drivers/net/wireless/ath/ath9k/ar9003_phy.c index 11f5358..d39b79f 100644 --- a/drivers/net/wireless/ath/ath9k/ar9003_phy.c +++ b/drivers/net/wireless/ath/ath9k/ar9003_phy.c @@ -701,6 +701,54 @@ static int ar9550_hw_get_modes_txgain_index(struct ath_hw *ah, return ret; } +static void ar9003_doubler_fix(struct ath_hw *ah) +{ + if (AR_SREV_9300(ah) || AR_SREV_9580(ah) || AR_SREV_9550(ah)) { + REG_RMW(ah, AR_PHY_65NM_CH0_RXTX2, + 1 << AR_PHY_65NM_CH0_RXTX2_SYNTHON_MASK_S | + 1 << AR_PHY_65NM_CH0_RXTX2_SYNTHOVR_MASK_S, 0); + REG_RMW(ah, AR_PHY_65NM_CH1_RXTX2, + 1 << AR_PHY_65NM_CH0_RXTX2_SYNTHON_MASK_S | + 1 << AR_PHY_65NM_CH0_RXTX2_SYNTHOVR_MASK_S, 0); + REG_RMW(ah, AR_PHY_65NM_CH2_RXTX2, + 1 << AR_PHY_65NM_CH0_RXTX2_SYNTHON_MASK_S | + 1 << AR_PHY_65NM_CH0_RXTX2_SYNTHOVR_MASK_S, 0); + + udelay(200); + + REG_CLR_BIT(ah, AR_PHY_65NM_CH0_RXTX2, + AR_PHY_65NM_CH0_RXTX2_SYNTHON_MASK); + REG_CLR_BIT(ah, AR_PHY_65NM_CH1_RXTX2, + AR_PHY_65NM_CH0_RXTX2_SYNTHON_MASK); + REG_CLR_BIT(ah, AR_PHY_65NM_CH2_RXTX2, + AR_PHY_65NM_CH0_RXTX2_SYNTHON_MASK); + + udelay(1); + + REG_RMW_FIELD(ah, AR_PHY_65NM_CH0_RXTX2, + AR_PHY_65NM_CH0_RXTX2_SYNTHON_MASK, 1); + REG_RMW_FIELD(ah, AR_PHY_65NM_CH1_RXTX2, + AR_PHY_65NM_CH0_RXTX2_SYNTHON_MASK, 1); + REG_RMW_FIELD(ah, AR_PHY_65NM_CH2_RXTX2, + AR_PHY_65NM_CH0_RXTX2_SYNTHON_MASK, 1); + + udelay(200); + + REG_RMW_FIELD(ah, AR_PHY_65NM_CH0_SYNTH12, + AR_PHY_65NM_CH0_SYNTH12_VREFMUL3, 0xf); + + REG_RMW(ah, AR_PHY_65NM_CH0_RXTX2, 0, + 1 << AR_PHY_65NM_CH0_RXTX2_SYNTHON_MASK_S | + 1 << AR_PHY_65NM_CH0_RXTX2_SYNTHOVR_MASK_S); + REG_RMW(ah, AR_PHY_65NM_CH1_RXTX2, 0, + 1 << AR_PHY_65NM_CH0_RXTX2_SYNTHON_MASK_S | + 1 << AR_PHY_65NM_CH0_RXTX2_SYNTHOVR_MASK_S); + REG_RMW(ah, AR_PHY_65NM_CH2_RXTX2, 0, + 1 << AR_PHY_65NM_CH0_RXTX2_SYNTHON_MASK_S | + 1 << AR_PHY_65NM_CH0_RXTX2_SYNTHOVR_MASK_S); + } +} + static int ar9003_hw_process_ini(struct ath_hw *ah, struct ath9k_channel *chan) { @@ -726,6 +774,8 @@ static int ar9003_hw_process_ini(struct ath_hw *ah, modesIndex); } + ar9003_doubler_fix(ah); + /* * RXGAIN initvals. */ diff --git a/drivers/net/wireless/ath/ath9k/ar9003_phy.h b/drivers/net/wireless/ath/ath9k/ar9003_phy.h index fca6243..2af667b 100644 --- a/drivers/net/wireless/ath/ath9k/ar9003_phy.h +++ b/drivers/net/wireless/ath/ath9k/ar9003_phy.h @@ -656,13 +656,24 @@ #define AR_PHY_SYNTH4_LONG_SHIFT_SELECT ((AR_SREV_9462(ah) || AR_SREV_9565(ah)) ? 0x00000001 : 0x00000002) #define AR_PHY_SYNTH4_LONG_SHIFT_SELECT_S ((AR_SREV_9462(ah) || AR_SREV_9565(ah)) ? 0 : 1) #define AR_PHY_65NM_CH0_SYNTH7 0x16098 +#define AR_PHY_65NM_CH0_SYNTH12 0x160ac #define AR_PHY_65NM_CH0_BIAS1 0x160c0 #define AR_PHY_65NM_CH0_BIAS2 0x160c4 #define AR_PHY_65NM_CH0_BIAS4 0x160cc +#define AR_PHY_65NM_CH0_RXTX2 0x16104 +#define AR_PHY_65NM_CH1_RXTX2 0x16504 +#define AR_PHY_65NM_CH2_RXTX2 0x16904 #define AR_PHY_65NM_CH0_RXTX4 0x1610c #define AR_PHY_65NM_CH1_RXTX4 0x1650c #define AR_PHY_65NM_CH2_RXTX4 0x1690c +#define AR_PHY_65NM_CH0_SYNTH12_VREFMUL3 0x00780000 +#define AR_PHY_65NM_CH0_SYNTH12_VREFMUL3_S 19 +#define AR_PHY_65NM_CH0_RXTX2_SYNTHON_MASK 0x00000004 +#define AR_PHY_65NM_CH0_RXTX2_SYNTHON_MASK_S 2 +#define AR_PHY_65NM_CH0_RXTX2_SYNTHOVR_MASK 0x00000008 +#define AR_PHY_65NM_CH0_RXTX2_SYNTHOVR_MASK_S 3 + #define AR_CH0_TOP (AR_SREV_9300(ah) ? 0x16288 : \ (((AR_SREV_9462(ah) || AR_SREV_9565(ah)) ? 0x1628c : 0x16280))) #define AR_CH0_TOP_XPABIASLVL (AR_SREV_9550(ah) ? 0x3c0 : 0x300) diff --git a/drivers/net/wireless/ath/ath9k/ar9462_2p1_initvals.h b/drivers/net/wireless/ath/ath9k/ar9462_2p1_initvals.h index 4dbc294..57fc5f4 100644 --- a/drivers/net/wireless/ath/ath9k/ar9462_2p1_initvals.h +++ b/drivers/net/wireless/ath/ath9k/ar9462_2p1_initvals.h @@ -361,7 +361,7 @@ static const u32 ar9462_2p1_baseband_postamble[][5] = { {0x00009e14, 0x37b95d5e, 0x37b9605e, 0x3236605e, 0x32365a5e}, {0x00009e18, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, {0x00009e1c, 0x0001cf9c, 0x0001cf9c, 0x00021f9c, 0x00021f9c}, - {0x00009e20, 0x000003b5, 0x000003b5, 0x000003ce, 0x000003ce}, + {0x00009e20, 0x000003a5, 0x000003a5, 0x000003a5, 0x000003a5}, {0x00009e2c, 0x0000001c, 0x0000001c, 0x00000021, 0x00000021}, {0x00009e3c, 0xcf946220, 0xcf946220, 0xcfd5c782, 0xcfd5c282}, {0x00009e44, 0x62321e27, 0x62321e27, 0xfe291e27, 0xfe291e27}, @@ -400,7 +400,7 @@ static const u32 ar9462_2p1_baseband_postamble[][5] = { {0x0000ae04, 0x001c0000, 0x001c0000, 0x001c0000, 0x00100000}, {0x0000ae18, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, {0x0000ae1c, 0x0000019c, 0x0000019c, 0x0000019c, 0x0000019c}, - {0x0000ae20, 0x000001b5, 0x000001b5, 0x000001ce, 0x000001ce}, + {0x0000ae20, 0x000001a6, 0x000001a6, 0x000001aa, 0x000001aa}, {0x0000b284, 0x00000000, 0x00000000, 0x00000550, 0x00000550}, }; @@ -472,7 +472,7 @@ static const u32 ar9462_2p1_radio_postamble[][5] = { static const u32 ar9462_2p1_soc_preamble[][2] = { /* Addr allmodes */ - {0x000040a4, 0x00a0c1c9}, + {0x000040a4, 0x00a0c9c9}, {0x00007020, 0x00000000}, {0x00007034, 0x00000002}, {0x00007038, 0x000004c2}, diff --git a/drivers/net/wireless/ath/regd.c b/drivers/net/wireless/ath/regd.c index c00687e..1217c52 100644 --- a/drivers/net/wireless/ath/regd.c +++ b/drivers/net/wireless/ath/regd.c @@ -362,7 +362,8 @@ static int __ath_reg_dyn_country(struct wiphy *wiphy, { u16 country_code; - if (!ath_is_world_regd(reg)) + if (request->initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE && + !ath_is_world_regd(reg)) return -EINVAL; country_code = ath_regd_find_country_by_name(request->alpha2); diff --git a/drivers/net/wireless/brcm80211/brcmfmac/p2p.c b/drivers/net/wireless/brcm80211/brcmfmac/p2p.c index 5b5b952..4a22930 100644 --- a/drivers/net/wireless/brcm80211/brcmfmac/p2p.c +++ b/drivers/net/wireless/brcm80211/brcmfmac/p2p.c @@ -823,6 +823,7 @@ static s32 brcmf_p2p_run_escan(struct brcmf_cfg80211_info *cfg, } err = brcmf_p2p_escan(p2p, num_nodfs, chanspecs, search_state, action, P2PAPI_BSSCFG_DEVICE); + kfree(chanspecs); } exit: if (err) diff --git a/drivers/net/wireless/mwifiex/cfg80211.c b/drivers/net/wireless/mwifiex/cfg80211.c index fbad00a..aeaea0e 100644 --- a/drivers/net/wireless/mwifiex/cfg80211.c +++ b/drivers/net/wireless/mwifiex/cfg80211.c @@ -2210,8 +2210,10 @@ struct wireless_dev *mwifiex_add_virtual_intf(struct wiphy *wiphy, priv->bss_started = 0; priv->bss_num = 0; - if (mwifiex_cfg80211_init_p2p_client(priv)) - return ERR_PTR(-EFAULT); + if (mwifiex_cfg80211_init_p2p_client(priv)) { + wdev = ERR_PTR(-EFAULT); + goto done; + } break; default: @@ -2224,7 +2226,8 @@ struct wireless_dev *mwifiex_add_virtual_intf(struct wiphy *wiphy, if (!dev) { wiphy_err(wiphy, "no memory available for netdevice\n"); priv->bss_mode = NL80211_IFTYPE_UNSPECIFIED; - return ERR_PTR(-ENOMEM); + wdev = ERR_PTR(-ENOMEM); + goto done; } mwifiex_init_priv_params(priv, dev); @@ -2264,7 +2267,9 @@ struct wireless_dev *mwifiex_add_virtual_intf(struct wiphy *wiphy, wiphy_err(wiphy, "cannot register virtual network device\n"); free_netdev(dev); priv->bss_mode = NL80211_IFTYPE_UNSPECIFIED; - return ERR_PTR(-EFAULT); + priv->netdev = NULL; + wdev = ERR_PTR(-EFAULT); + goto done; } sema_init(&priv->async_sem, 1); @@ -2274,6 +2279,13 @@ struct wireless_dev *mwifiex_add_virtual_intf(struct wiphy *wiphy, #ifdef CONFIG_DEBUG_FS mwifiex_dev_debugfs_init(priv); #endif + +done: + if (IS_ERR(wdev)) { + kfree(priv->wdev); + priv->wdev = NULL; + } + return wdev; } EXPORT_SYMBOL_GPL(mwifiex_add_virtual_intf); @@ -2298,7 +2310,10 @@ int mwifiex_del_virtual_intf(struct wiphy *wiphy, struct wireless_dev *wdev) unregister_netdevice(wdev->netdev); /* Clear the priv in adapter */ + priv->netdev->ieee80211_ptr = NULL; priv->netdev = NULL; + kfree(wdev); + priv->wdev = NULL; priv->media_connected = false; diff --git a/drivers/net/wireless/mwifiex/main.c b/drivers/net/wireless/mwifiex/main.c index 9d7c9d3..78e8a66 100644 --- a/drivers/net/wireless/mwifiex/main.c +++ b/drivers/net/wireless/mwifiex/main.c @@ -411,13 +411,14 @@ static void mwifiex_terminate_workqueue(struct mwifiex_adapter *adapter) */ static void mwifiex_fw_dpc(const struct firmware *firmware, void *context) { - int ret, i; + int ret; char fmt[64]; struct mwifiex_private *priv; struct mwifiex_adapter *adapter = context; struct mwifiex_fw_image fw; struct semaphore *sem = adapter->card_sem; bool init_failed = false; + struct wireless_dev *wdev; if (!firmware) { dev_err(adapter->dev, @@ -469,14 +470,16 @@ static void mwifiex_fw_dpc(const struct firmware *firmware, void *context) priv = adapter->priv[MWIFIEX_BSS_ROLE_STA]; if (mwifiex_register_cfg80211(adapter)) { dev_err(adapter->dev, "cannot register with cfg80211\n"); - goto err_register_cfg80211; + goto err_init_fw; } rtnl_lock(); /* Create station interface by default */ - if (!mwifiex_add_virtual_intf(adapter->wiphy, "mlan%d", - NL80211_IFTYPE_STATION, NULL, NULL)) { + wdev = mwifiex_add_virtual_intf(adapter->wiphy, "mlan%d", + NL80211_IFTYPE_STATION, NULL, NULL); + if (IS_ERR(wdev)) { dev_err(adapter->dev, "cannot create default STA interface\n"); + rtnl_unlock(); goto err_add_intf; } rtnl_unlock(); @@ -486,17 +489,6 @@ static void mwifiex_fw_dpc(const struct firmware *firmware, void *context) goto done; err_add_intf: - for (i = 0; i < adapter->priv_num; i++) { - priv = adapter->priv[i]; - - if (!priv) - continue; - - if (priv->wdev && priv->netdev) - mwifiex_del_virtual_intf(adapter->wiphy, priv->wdev); - } - rtnl_unlock(); -err_register_cfg80211: wiphy_unregister(adapter->wiphy); wiphy_free(adapter->wiphy); err_init_fw: @@ -1006,12 +998,6 @@ int mwifiex_remove_card(struct mwifiex_adapter *adapter, struct semaphore *sem) wiphy_unregister(priv->wdev->wiphy); wiphy_free(priv->wdev->wiphy); - for (i = 0; i < adapter->priv_num; i++) { - priv = adapter->priv[i]; - if (priv) - kfree(priv->wdev); - } - mwifiex_terminate_workqueue(adapter); /* Unregister device */ diff --git a/drivers/net/wireless/mwifiex/pcie.c b/drivers/net/wireless/mwifiex/pcie.c index 33fa943..03688aa 100644 --- a/drivers/net/wireless/mwifiex/pcie.c +++ b/drivers/net/wireless/mwifiex/pcie.c @@ -232,7 +232,6 @@ static void mwifiex_pcie_remove(struct pci_dev *pdev) } mwifiex_remove_card(card->adapter, &add_remove_card_sem); - kfree(card); } static void mwifiex_pcie_shutdown(struct pci_dev *pdev) @@ -2313,6 +2312,7 @@ static void mwifiex_pcie_cleanup(struct mwifiex_adapter *adapter) pci_release_region(pdev, 0); pci_set_drvdata(pdev, NULL); } + kfree(card); } /* diff --git a/drivers/net/wireless/mwifiex/sdio.c b/drivers/net/wireless/mwifiex/sdio.c index 9bf8898..b44a315 100644 --- a/drivers/net/wireless/mwifiex/sdio.c +++ b/drivers/net/wireless/mwifiex/sdio.c @@ -196,7 +196,6 @@ mwifiex_sdio_remove(struct sdio_func *func) } mwifiex_remove_card(card->adapter, &add_remove_card_sem); - kfree(card); } /* @@ -1745,7 +1744,6 @@ mwifiex_unregister_dev(struct mwifiex_adapter *adapter) sdio_claim_host(card->func); sdio_disable_func(card->func); sdio_release_host(card->func); - sdio_set_drvdata(card->func, NULL); } } @@ -1773,7 +1771,6 @@ static int mwifiex_register_dev(struct mwifiex_adapter *adapter) return ret; } - sdio_set_drvdata(func, card); adapter->dev = &func->dev; @@ -1801,6 +1798,8 @@ static int mwifiex_init_sdio(struct mwifiex_adapter *adapter) int ret; u8 sdio_ireg; + sdio_set_drvdata(card->func, card); + /* * Read the HOST_INT_STATUS_REG for ACK the first interrupt got * from the bootloader. If we don't do this we get a interrupt @@ -1883,6 +1882,8 @@ static void mwifiex_cleanup_sdio(struct mwifiex_adapter *adapter) kfree(card->mpa_rx.len_arr); kfree(card->mpa_tx.buf); kfree(card->mpa_rx.buf); + sdio_set_drvdata(card->func, NULL); + kfree(card); } /* diff --git a/drivers/net/wireless/mwifiex/usb.c b/drivers/net/wireless/mwifiex/usb.c index 1c70b8d..edf5b7a 100644 --- a/drivers/net/wireless/mwifiex/usb.c +++ b/drivers/net/wireless/mwifiex/usb.c @@ -350,7 +350,6 @@ static int mwifiex_usb_probe(struct usb_interface *intf, card->udev = udev; card->intf = intf; - usb_card = card; pr_debug("info: bcdUSB=%#x Device Class=%#x SubClass=%#x Protocol=%#x\n", udev->descriptor.bcdUSB, udev->descriptor.bDeviceClass, @@ -525,25 +524,28 @@ static int mwifiex_usb_resume(struct usb_interface *intf) static void mwifiex_usb_disconnect(struct usb_interface *intf) { struct usb_card_rec *card = usb_get_intfdata(intf); - struct mwifiex_adapter *adapter; - if (!card || !card->adapter) { - pr_err("%s: card or card->adapter is NULL\n", __func__); + if (!card) { + pr_err("%s: card is NULL\n", __func__); return; } - adapter = card->adapter; - if (!adapter->priv_num) - return; - mwifiex_usb_free(card); - dev_dbg(adapter->dev, "%s: removing card\n", __func__); - mwifiex_remove_card(adapter, &add_remove_card_sem); + if (card->adapter) { + struct mwifiex_adapter *adapter = card->adapter; + + if (!adapter->priv_num) + return; + + dev_dbg(adapter->dev, "%s: removing card\n", __func__); + mwifiex_remove_card(adapter, &add_remove_card_sem); + } usb_set_intfdata(intf, NULL); usb_put_dev(interface_to_usbdev(intf)); kfree(card); + usb_card = NULL; return; } @@ -754,6 +756,7 @@ static int mwifiex_register_dev(struct mwifiex_adapter *adapter) card->adapter = adapter; adapter->dev = &card->udev->dev; strcpy(adapter->fw_name, USB8797_DEFAULT_FW_NAME); + usb_card = card; return 0; } @@ -762,7 +765,7 @@ static void mwifiex_unregister_dev(struct mwifiex_adapter *adapter) { struct usb_card_rec *card = (struct usb_card_rec *)adapter->card; - usb_set_intfdata(card->intf, NULL); + card->adapter = NULL; } static int mwifiex_prog_fw_w_helper(struct mwifiex_adapter *adapter, @@ -1004,7 +1007,7 @@ static void mwifiex_usb_cleanup_module(void) if (!down_interruptible(&add_remove_card_sem)) up(&add_remove_card_sem); - if (usb_card) { + if (usb_card && usb_card->adapter) { struct mwifiex_adapter *adapter = usb_card->adapter; int i; diff --git a/drivers/net/wireless/rt2x00/rt2x00dev.c b/drivers/net/wireless/rt2x00/rt2x00dev.c index 080b1fc..9dd92a7 100644 --- a/drivers/net/wireless/rt2x00/rt2x00dev.c +++ b/drivers/net/wireless/rt2x00/rt2x00dev.c @@ -181,6 +181,7 @@ static void rt2x00lib_autowakeup(struct work_struct *work) static void rt2x00lib_bc_buffer_iter(void *data, u8 *mac, struct ieee80211_vif *vif) { + struct ieee80211_tx_control control = {}; struct rt2x00_dev *rt2x00dev = data; struct sk_buff *skb; @@ -195,7 +196,7 @@ static void rt2x00lib_bc_buffer_iter(void *data, u8 *mac, */ skb = ieee80211_get_buffered_bc(rt2x00dev->hw, vif); while (skb) { - rt2x00mac_tx(rt2x00dev->hw, NULL, skb); + rt2x00mac_tx(rt2x00dev->hw, &control, skb); skb = ieee80211_get_buffered_bc(rt2x00dev->hw, vif); } } diff --git a/drivers/net/wireless/rtlwifi/rtl8192cu/mac.c b/drivers/net/wireless/rtlwifi/rtl8192cu/mac.c index 3936853..e26312f 100644 --- a/drivers/net/wireless/rtlwifi/rtl8192cu/mac.c +++ b/drivers/net/wireless/rtlwifi/rtl8192cu/mac.c @@ -769,7 +769,7 @@ static long _rtl92c_signal_scale_mapping(struct ieee80211_hw *hw, static void _rtl92c_query_rxphystatus(struct ieee80211_hw *hw, struct rtl_stats *pstats, - struct rx_desc_92c *pdesc, + struct rx_desc_92c *p_desc, struct rx_fwinfo_92c *p_drvinfo, bool packet_match_bssid, bool packet_toself, @@ -784,11 +784,11 @@ static void _rtl92c_query_rxphystatus(struct ieee80211_hw *hw, u32 rssi, total_rssi = 0; bool in_powersavemode = false; bool is_cck_rate; + u8 *pdesc = (u8 *)p_desc; - is_cck_rate = RX_HAL_IS_CCK_RATE(pdesc); + is_cck_rate = RX_HAL_IS_CCK_RATE(p_desc); pstats->packet_matchbssid = packet_match_bssid; pstats->packet_toself = packet_toself; - pstats->is_cck = is_cck_rate; pstats->packet_beacon = packet_beacon; pstats->is_cck = is_cck_rate; pstats->RX_SIGQ[0] = -1; diff --git a/drivers/net/wireless/rtlwifi/rtl8192cu/trx.c b/drivers/net/wireless/rtlwifi/rtl8192cu/trx.c index b0c346a..1bc21cc 100644 --- a/drivers/net/wireless/rtlwifi/rtl8192cu/trx.c +++ b/drivers/net/wireless/rtlwifi/rtl8192cu/trx.c @@ -303,10 +303,10 @@ out: bool rtl92cu_rx_query_desc(struct ieee80211_hw *hw, struct rtl_stats *stats, struct ieee80211_rx_status *rx_status, - u8 *p_desc, struct sk_buff *skb) + u8 *pdesc, struct sk_buff *skb) { struct rx_fwinfo_92c *p_drvinfo; - struct rx_desc_92c *pdesc = (struct rx_desc_92c *)p_desc; + struct rx_desc_92c *p_desc = (struct rx_desc_92c *)pdesc; u32 phystatus = GET_RX_DESC_PHY_STATUS(pdesc); stats->length = (u16) GET_RX_DESC_PKT_LEN(pdesc); @@ -345,7 +345,7 @@ bool rtl92cu_rx_query_desc(struct ieee80211_hw *hw, if (phystatus) { p_drvinfo = (struct rx_fwinfo_92c *)(skb->data + stats->rx_bufshift); - rtl92c_translate_rx_signal_stuff(hw, skb, stats, pdesc, + rtl92c_translate_rx_signal_stuff(hw, skb, stats, p_desc, p_drvinfo); } /*rx_status->qual = stats->signal; */ diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index b78ee10..2329ccc 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -461,6 +461,9 @@ void xenvif_disconnect(struct xenvif *vif) if (netif_carrier_ok(vif->dev)) xenvif_carrier_off(vif); + if (vif->task) + kthread_stop(vif->task); + if (vif->tx_irq) { if (vif->tx_irq == vif->rx_irq) unbind_from_irqhandler(vif->tx_irq, vif); @@ -471,9 +474,6 @@ void xenvif_disconnect(struct xenvif *vif) vif->tx_irq = 0; } - if (vif->task) - kthread_stop(vif->task); - xenvif_unmap_frontend_rings(vif); } diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c index 95655d7..e52d7ff 100644 --- a/drivers/pci/ats.c +++ b/drivers/pci/ats.c @@ -410,7 +410,7 @@ EXPORT_SYMBOL_GPL(pci_disable_pasid); * Otherwise is returns a bitmask with supported features. Current * features reported are: * PCI_PASID_CAP_EXEC - Execute permission supported - * PCI_PASID_CAP_PRIV - Priviledged mode supported + * PCI_PASID_CAP_PRIV - Privileged mode supported */ int pci_pasid_features(struct pci_dev *pdev) { diff --git a/drivers/pci/host/pci-tegra.c b/drivers/pci/host/pci-tegra.c index 7c4f38d..0afbbbc 100644 --- a/drivers/pci/host/pci-tegra.c +++ b/drivers/pci/host/pci-tegra.c @@ -249,7 +249,7 @@ struct tegra_pcie { void __iomem *afi; int irq; - struct list_head busses; + struct list_head buses; struct resource *cs; struct resource io; @@ -399,14 +399,14 @@ free: /* * Look up a virtual address mapping for the specified bus number. If no such - * mapping existis, try to create one. + * mapping exists, try to create one. */ static void __iomem *tegra_pcie_bus_map(struct tegra_pcie *pcie, unsigned int busnr) { struct tegra_pcie_bus *bus; - list_for_each_entry(bus, &pcie->busses, list) + list_for_each_entry(bus, &pcie->buses, list) if (bus->nr == busnr) return (void __iomem *)bus->area->addr; @@ -414,7 +414,7 @@ static void __iomem *tegra_pcie_bus_map(struct tegra_pcie *pcie, if (IS_ERR(bus)) return NULL; - list_add_tail(&bus->list, &pcie->busses); + list_add_tail(&bus->list, &pcie->buses); return (void __iomem *)bus->area->addr; } @@ -808,7 +808,7 @@ static int tegra_pcie_enable_controller(struct tegra_pcie *pcie) value &= ~AFI_FUSE_PCIE_T0_GEN2_DIS; afi_writel(pcie, value, AFI_FUSE); - /* initialze internal PHY, enable up to 16 PCIE lanes */ + /* initialize internal PHY, enable up to 16 PCIE lanes */ pads_writel(pcie, 0x0, PADS_CTL_SEL); /* override IDDQ to 1 on all 4 lanes */ @@ -1624,7 +1624,7 @@ static int tegra_pcie_probe(struct platform_device *pdev) if (!pcie) return -ENOMEM; - INIT_LIST_HEAD(&pcie->busses); + INIT_LIST_HEAD(&pcie->buses); INIT_LIST_HEAD(&pcie->ports); pcie->soc_data = match->data; pcie->dev = &pdev->dev; diff --git a/drivers/pci/host/pcie-designware.c b/drivers/pci/host/pcie-designware.c index 1e1fea4..e33b68b 100644 --- a/drivers/pci/host/pcie-designware.c +++ b/drivers/pci/host/pcie-designware.c @@ -197,7 +197,7 @@ static int find_valid_pos0(struct pcie_port *pp, int msgvec, int pos, int *pos0) return -ENOSPC; /* * Check if this position is at correct offset.nvec is always a - * power of two. pos0 must be nvec bit alligned. + * power of two. pos0 must be nvec bit aligned. */ if (pos % msgvec) pos += msgvec - (pos % msgvec); diff --git a/drivers/pci/hotplug/Kconfig b/drivers/pci/hotplug/Kconfig index 0a648af..df8caec 100644 --- a/drivers/pci/hotplug/Kconfig +++ b/drivers/pci/hotplug/Kconfig @@ -133,8 +133,8 @@ config HOTPLUG_PCI_RPA_DLPAR To compile this driver as a module, choose M here: the module will be called rpadlpar_io. - - When in doubt, say N. + + When in doubt, say N. config HOTPLUG_PCI_SGI tristate "SGI PCI Hotplug Support" diff --git a/drivers/pci/hotplug/Makefile b/drivers/pci/hotplug/Makefile index 47ec8c8..3e6532b 100644 --- a/drivers/pci/hotplug/Makefile +++ b/drivers/pci/hotplug/Makefile @@ -31,7 +31,7 @@ pci_hotplug-objs += cpci_hotplug_core.o \ cpci_hotplug_pci.o endif ifdef CONFIG_ACPI -pci_hotplug-objs += acpi_pcihp.o +pci_hotplug-objs += acpi_pcihp.o endif cpqphp-objs := cpqphp_core.o \ diff --git a/drivers/pci/hotplug/acpiphp_core.c b/drivers/pci/hotplug/acpiphp_core.c index 8650d39..dca66bc 100644 --- a/drivers/pci/hotplug/acpiphp_core.c +++ b/drivers/pci/hotplug/acpiphp_core.c @@ -111,7 +111,7 @@ int acpiphp_register_attention(struct acpiphp_attention_info *info) * @info: must match the pointer used to register * * Description: This is used to un-register a hardware specific acpi - * driver that manipulates the attention LED. The pointer to the + * driver that manipulates the attention LED. The pointer to the * info struct must be the same as the one used to set it. */ int acpiphp_unregister_attention(struct acpiphp_attention_info *info) @@ -169,8 +169,8 @@ static int disable_slot(struct hotplug_slot *hotplug_slot) * was registered with us. This allows hardware specific * ACPI implementations to blink the light for us. */ - static int set_attention_status(struct hotplug_slot *hotplug_slot, u8 status) - { +static int set_attention_status(struct hotplug_slot *hotplug_slot, u8 status) +{ int retval = -ENODEV; pr_debug("%s - physical_slot = %s\n", __func__, @@ -182,8 +182,8 @@ static int disable_slot(struct hotplug_slot *hotplug_slot) } else attention_info = NULL; return retval; - } - +} + /** * get_power_status - get power status of a slot @@ -323,7 +323,7 @@ int acpiphp_register_hotplug_slot(struct acpiphp_slot *acpiphp_slot, if (retval) { pr_err("pci_hp_register failed with error %d\n", retval); goto error_hpslot; - } + } pr_info("Slot [%s] registered\n", slot_name(slot)); diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c index 5b4e9eb0..1cf605f 100644 --- a/drivers/pci/hotplug/acpiphp_glue.c +++ b/drivers/pci/hotplug/acpiphp_glue.c @@ -325,7 +325,7 @@ static acpi_status register_slot(acpi_handle handle, u32 lvl, void *data, list_add_tail(&slot->node, &bridge->slots); - /* Register slots for ejectable funtions only. */ + /* Register slots for ejectable functions only. */ if (acpi_pci_check_ejectable(pbus, handle) || is_dock_device(handle)) { unsigned long long sun; int retval; diff --git a/drivers/pci/hotplug/acpiphp_ibm.c b/drivers/pci/hotplug/acpiphp_ibm.c index 0d64c41..ecfac7e 100644 --- a/drivers/pci/hotplug/acpiphp_ibm.c +++ b/drivers/pci/hotplug/acpiphp_ibm.c @@ -116,7 +116,7 @@ static struct bin_attribute ibm_apci_table_attr = { .read = ibm_read_apci_table, .write = NULL, }; -static struct acpiphp_attention_info ibm_attention_info = +static struct acpiphp_attention_info ibm_attention_info = { .set_attn = ibm_set_attention_status, .get_attn = ibm_get_attention_status, @@ -171,9 +171,9 @@ ibm_slot_done: */ static int ibm_set_attention_status(struct hotplug_slot *slot, u8 status) { - union acpi_object args[2]; + union acpi_object args[2]; struct acpi_object_list params = { .pointer = args, .count = 2 }; - acpi_status stat; + acpi_status stat; unsigned long long rc; union apci_descriptor *ibm_slot; @@ -208,7 +208,7 @@ static int ibm_set_attention_status(struct hotplug_slot *slot, u8 status) * * Description: This method is registered with the acpiphp module as a * callback to do the device specific task of getting the LED status. - * + * * Because there is no direct method of getting the LED status directly * from an ACPI call, we read the aPCI table and parse out our * slot descriptor to read the status from that. @@ -259,7 +259,7 @@ static void ibm_handle_events(acpi_handle handle, u32 event, void *context) pr_debug("%s: Received notification %02x\n", __func__, event); if (subevent == 0x80) { - pr_debug("%s: generationg bus event\n", __func__); + pr_debug("%s: generating bus event\n", __func__); acpi_bus_generate_netlink_event(note->device->pnp.device_class, dev_name(¬e->device->dev), note->event, detail); @@ -387,7 +387,7 @@ static acpi_status __init ibm_find_acpi_device(acpi_handle handle, u32 lvl, void *context, void **rv) { acpi_handle *phandle = (acpi_handle *)context; - acpi_status status; + acpi_status status; struct acpi_device_info *info; int retval = 0; @@ -405,7 +405,7 @@ static acpi_status __init ibm_find_acpi_device(acpi_handle handle, info->hardware_id.string, handle); *phandle = handle; /* returning non-zero causes the search to stop - * and returns this value to the caller of + * and returns this value to the caller of * acpi_walk_namespace, but it also causes some warnings * in the acpi debug code to print... */ diff --git a/drivers/pci/hotplug/cpci_hotplug_core.c b/drivers/pci/hotplug/cpci_hotplug_core.c index 2b4c412..00c81a3 100644 --- a/drivers/pci/hotplug/cpci_hotplug_core.c +++ b/drivers/pci/hotplug/cpci_hotplug_core.c @@ -46,7 +46,7 @@ do { \ if (cpci_debug) \ printk (KERN_DEBUG "%s: " format "\n", \ - MY_NAME , ## arg); \ + MY_NAME , ## arg); \ } while (0) #define err(format, arg...) printk(KERN_ERR "%s: " format "\n", MY_NAME , ## arg) #define info(format, arg...) printk(KERN_INFO "%s: " format "\n", MY_NAME , ## arg) diff --git a/drivers/pci/hotplug/cpci_hotplug_pci.c b/drivers/pci/hotplug/cpci_hotplug_pci.c index d8add34..d3add98 100644 --- a/drivers/pci/hotplug/cpci_hotplug_pci.c +++ b/drivers/pci/hotplug/cpci_hotplug_pci.c @@ -39,7 +39,7 @@ extern int cpci_debug; do { \ if (cpci_debug) \ printk (KERN_DEBUG "%s: " format "\n", \ - MY_NAME , ## arg); \ + MY_NAME , ## arg); \ } while (0) #define err(format, arg...) printk(KERN_ERR "%s: " format "\n", MY_NAME , ## arg) #define info(format, arg...) printk(KERN_INFO "%s: " format "\n", MY_NAME , ## arg) diff --git a/drivers/pci/hotplug/cpcihp_generic.c b/drivers/pci/hotplug/cpcihp_generic.c index a6a71c4..7536eef 100644 --- a/drivers/pci/hotplug/cpcihp_generic.c +++ b/drivers/pci/hotplug/cpcihp_generic.c @@ -13,14 +13,14 @@ * option) any later version. * * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, - * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY - * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * You should have received a copy of the GNU General Public License along @@ -53,9 +53,9 @@ #define dbg(format, arg...) \ do { \ - if(debug) \ + if (debug) \ printk (KERN_DEBUG "%s: " format "\n", \ - MY_NAME , ## arg); \ + MY_NAME , ## arg); \ } while(0) #define err(format, arg...) printk(KERN_ERR "%s: " format "\n", MY_NAME , ## arg) #define info(format, arg...) printk(KERN_INFO "%s: " format "\n", MY_NAME , ## arg) diff --git a/drivers/pci/hotplug/cpcihp_zt5550.c b/drivers/pci/hotplug/cpcihp_zt5550.c index 449b4bb..e8c4a7c 100644 --- a/drivers/pci/hotplug/cpcihp_zt5550.c +++ b/drivers/pci/hotplug/cpcihp_zt5550.c @@ -13,14 +13,14 @@ * option) any later version. * * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, - * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY - * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * You should have received a copy of the GNU General Public License along @@ -48,9 +48,9 @@ #define dbg(format, arg...) \ do { \ - if(debug) \ + if (debug) \ printk (KERN_DEBUG "%s: " format "\n", \ - MY_NAME , ## arg); \ + MY_NAME , ## arg); \ } while(0) #define err(format, arg...) printk(KERN_ERR "%s: " format "\n", MY_NAME , ## arg) #define info(format, arg...) printk(KERN_INFO "%s: " format "\n", MY_NAME , ## arg) @@ -285,7 +285,7 @@ static struct pci_device_id zt5550_hc_pci_tbl[] = { { 0, } }; MODULE_DEVICE_TABLE(pci, zt5550_hc_pci_tbl); - + static struct pci_driver zt5550_hc_driver = { .name = "zt5550_hc", .id_table = zt5550_hc_pci_tbl, diff --git a/drivers/pci/hotplug/cpcihp_zt5550.h b/drivers/pci/hotplug/cpcihp_zt5550.h index bebc606..9a57fda 100644 --- a/drivers/pci/hotplug/cpcihp_zt5550.h +++ b/drivers/pci/hotplug/cpcihp_zt5550.h @@ -13,14 +13,14 @@ * option) any later version. * * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, - * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY - * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * You should have received a copy of the GNU General Public License along @@ -55,7 +55,7 @@ #define HC_CMD_REG 0x0C #define ARB_CONFIG_GNT_REG 0x10 #define ARB_CONFIG_CFG_REG 0x12 -#define ARB_CONFIG_REG 0x10 +#define ARB_CONFIG_REG 0x10 #define ISOL_CONFIG_REG 0x18 #define FAULT_STATUS_REG 0x20 #define FAULT_CONFIG_REG 0x24 diff --git a/drivers/pci/hotplug/cpqphp_core.c b/drivers/pci/hotplug/cpqphp_core.c index c8eaeb4..31273e1 100644 --- a/drivers/pci/hotplug/cpqphp_core.c +++ b/drivers/pci/hotplug/cpqphp_core.c @@ -862,10 +862,10 @@ static int cpqhpc_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_disable_device; } - /* Check for the proper subsystem ID's + /* Check for the proper subsystem IDs * Intel uses a different SSID programming model than Compaq. * For Intel, each SSID bit identifies a PHP capability. - * Also Intel HPC's may have RID=0. + * Also Intel HPCs may have RID=0. */ if ((pdev->revision <= 2) && (vendor_id != PCI_VENDOR_ID_INTEL)) { err(msg_HPC_not_supported); diff --git a/drivers/pci/hotplug/cpqphp_ctrl.c b/drivers/pci/hotplug/cpqphp_ctrl.c index d282019..11845b7 100644 --- a/drivers/pci/hotplug/cpqphp_ctrl.c +++ b/drivers/pci/hotplug/cpqphp_ctrl.c @@ -1231,7 +1231,7 @@ static u8 set_controller_speed(struct controller *ctrl, u8 adapter_speed, u8 hp_ /* Only if mode change...*/ if (((bus->cur_bus_speed == PCI_SPEED_66MHz) && (adapter_speed == PCI_SPEED_66MHz_PCIX)) || - ((bus->cur_bus_speed == PCI_SPEED_66MHz_PCIX) && (adapter_speed == PCI_SPEED_66MHz))) + ((bus->cur_bus_speed == PCI_SPEED_66MHz_PCIX) && (adapter_speed == PCI_SPEED_66MHz))) set_SOGO(ctrl); wait_for_ctrl_irq(ctrl); @@ -1828,7 +1828,7 @@ static void interrupt_event_handler(struct controller *ctrl) if (ctrl->event_queue[loop].event_type == INT_BUTTON_PRESS) { dbg("button pressed\n"); - } else if (ctrl->event_queue[loop].event_type == + } else if (ctrl->event_queue[loop].event_type == INT_BUTTON_CANCEL) { dbg("button cancel\n"); del_timer(&p_slot->task_event); @@ -2411,11 +2411,11 @@ static int configure_new_function(struct controller *ctrl, struct pci_func *func if (rc) return rc; - /* find range of busses to use */ + /* find range of buses to use */ dbg("find ranges of buses to use\n"); bus_node = get_max_resource(&(resources->bus_head), 1); - /* If we don't have any busses to allocate, we can't continue */ + /* If we don't have any buses to allocate, we can't continue */ if (!bus_node) return -ENOMEM; @@ -2900,7 +2900,7 @@ static int configure_new_function(struct controller *ctrl, struct pci_func *func /* If this function needs an interrupt and we are behind * a bridge and the pin is tied to something that's - * alread mapped, set this one the same */ + * already mapped, set this one the same */ if (temp_byte && resources->irqs && (resources->irqs->valid_INT & (0x01 << ((temp_byte + resources->irqs->barber_pole - 1) & 0x03)))) { diff --git a/drivers/pci/hotplug/cpqphp_pci.c b/drivers/pci/hotplug/cpqphp_pci.c index 09801c6..6e4a12c 100644 --- a/drivers/pci/hotplug/cpqphp_pci.c +++ b/drivers/pci/hotplug/cpqphp_pci.c @@ -291,7 +291,7 @@ int cpqhp_get_bus_dev (struct controller *ctrl, u8 * bus_num, u8 * dev_num, u8 s * * Reads configuration for all slots in a PCI bus and saves info. * - * Note: For non-hot plug busses, the slot # saved is the device # + * Note: For non-hot plug buses, the slot # saved is the device # * * returns 0 if success */ @@ -455,7 +455,7 @@ int cpqhp_save_config(struct controller *ctrl, int busnumber, int is_hot_plug) * cpqhp_save_slot_config * * Saves configuration info for all PCI devices in a given slot - * including subordinate busses. + * including subordinate buses. * * returns 0 if success */ @@ -1556,4 +1556,3 @@ void cpqhp_destroy_board_resources (struct pci_func * func) kfree(tres); } } - diff --git a/drivers/pci/hotplug/ibmphp.h b/drivers/pci/hotplug/ibmphp.h index 8c5b258..e3e46a7 100644 --- a/drivers/pci/hotplug/ibmphp.h +++ b/drivers/pci/hotplug/ibmphp.h @@ -59,7 +59,7 @@ extern int ibmphp_debug; /************************************************************ -* RESOURE TYPE * +* RESOURCE TYPE * ************************************************************/ #define EBDA_RSRC_TYPE_MASK 0x03 @@ -103,7 +103,7 @@ extern int ibmphp_debug; //-------------------------------------------------------------- struct rio_table_hdr { - u8 ver_num; + u8 ver_num; u8 scal_count; u8 riodev_count; u16 offset; @@ -127,7 +127,7 @@ struct scal_detail { }; //-------------------------------------------------------------- -// RIO DETAIL +// RIO DETAIL //-------------------------------------------------------------- struct rio_detail { @@ -152,7 +152,7 @@ struct opt_rio { u8 first_slot_num; u8 middle_num; struct list_head opt_rio_list; -}; +}; struct opt_rio_lo { u8 rio_type; @@ -161,7 +161,7 @@ struct opt_rio_lo { u8 middle_num; u8 pack_count; struct list_head opt_rio_lo_list; -}; +}; /**************************************************************** * HPC DESCRIPTOR NODE * @@ -574,7 +574,7 @@ void ibmphp_hpc_stop_poll_thread(void); #define HPC_CTLR_IRQ_PENDG 0x80 //---------------------------------------------------------------------------- -// HPC_CTLR_WROKING status return codes +// HPC_CTLR_WORKING status return codes //---------------------------------------------------------------------------- #define HPC_CTLR_WORKING_NO 0x00 #define HPC_CTLR_WORKING_YES 0x01 diff --git a/drivers/pci/hotplug/ibmphp_core.c b/drivers/pci/hotplug/ibmphp_core.c index cbd72d8..efdc13a 100644 --- a/drivers/pci/hotplug/ibmphp_core.c +++ b/drivers/pci/hotplug/ibmphp_core.c @@ -58,7 +58,7 @@ MODULE_DESCRIPTION (DRIVER_DESC); struct pci_bus *ibmphp_pci_bus; static int max_slots; -static int irqs[16]; /* PIC mode IRQ's we're using so far (in case MPS +static int irqs[16]; /* PIC mode IRQs we're using so far (in case MPS * tables don't provide default info for empty slots */ static int init_flag; @@ -71,20 +71,20 @@ static inline int get_max_adapter_speed (struct hotplug_slot *hs, u8 *value) return get_max_adapter_speed_1 (hs, value, 1); } */ -static inline int get_cur_bus_info(struct slot **sl) +static inline int get_cur_bus_info(struct slot **sl) { int rc = 1; struct slot * slot_cur = *sl; debug("options = %x\n", slot_cur->ctrl->options); - debug("revision = %x\n", slot_cur->ctrl->revision); + debug("revision = %x\n", slot_cur->ctrl->revision); - if (READ_BUS_STATUS(slot_cur->ctrl)) + if (READ_BUS_STATUS(slot_cur->ctrl)) rc = ibmphp_hpc_readslot(slot_cur, READ_BUSSTATUS, NULL); - - if (rc) + + if (rc) return rc; - + slot_cur->bus_on->current_speed = CURRENT_BUS_SPEED(slot_cur->busstatus); if (READ_BUS_MODE(slot_cur->ctrl)) slot_cur->bus_on->current_bus_mode = @@ -96,7 +96,7 @@ static inline int get_cur_bus_info(struct slot **sl) slot_cur->busstatus, slot_cur->bus_on->current_speed, slot_cur->bus_on->current_bus_mode); - + *sl = slot_cur; return 0; } @@ -104,8 +104,8 @@ static inline int get_cur_bus_info(struct slot **sl) static inline int slot_update(struct slot **sl) { int rc; - rc = ibmphp_hpc_readslot(*sl, READ_ALLSTAT, NULL); - if (rc) + rc = ibmphp_hpc_readslot(*sl, READ_ALLSTAT, NULL); + if (rc) return rc; if (!init_flag) rc = get_cur_bus_info(sl); @@ -172,7 +172,7 @@ int ibmphp_init_devno(struct slot **cur_slot) debug("(*cur_slot)->irq[3] = %x\n", (*cur_slot)->irq[3]); - debug("rtable->exlusive_irqs = %x\n", + debug("rtable->exclusive_irqs = %x\n", rtable->exclusive_irqs); debug("rtable->slots[loop].irq[0].bitmap = %x\n", rtable->slots[loop].irq[0].bitmap); @@ -271,7 +271,7 @@ static int set_attention_status(struct hotplug_slot *hotplug_slot, u8 value) else rc = -ENODEV; } - } else + } else rc = -ENODEV; ibmphp_unlock_operations(); @@ -288,7 +288,7 @@ static int get_attention_status(struct hotplug_slot *hotplug_slot, u8 * value) debug("get_attention_status - Entry hotplug_slot[%lx] pvalue[%lx]\n", (ulong) hotplug_slot, (ulong) value); - + ibmphp_lock_operations(); if (hotplug_slot) { pslot = hotplug_slot->private; @@ -406,14 +406,14 @@ static int get_max_bus_speed(struct slot *slot) ibmphp_lock_operations(); mode = slot->supported_bus_mode; - speed = slot->supported_speed; + speed = slot->supported_speed; ibmphp_unlock_operations(); switch (speed) { case BUS_SPEED_33: break; case BUS_SPEED_66: - if (mode == BUS_MODE_PCIX) + if (mode == BUS_MODE_PCIX) speed += 0x01; break; case BUS_SPEED_100: @@ -515,13 +515,13 @@ static int __init init_ops(void) debug("BEFORE GETTING SLOT STATUS, slot # %x\n", slot_cur->number); - if (slot_cur->ctrl->revision == 0xFF) + if (slot_cur->ctrl->revision == 0xFF) if (get_ctrl_revision(slot_cur, &slot_cur->ctrl->revision)) return -1; - if (slot_cur->bus_on->current_speed == 0xFF) - if (get_cur_bus_info(&slot_cur)) + if (slot_cur->bus_on->current_speed == 0xFF) + if (get_cur_bus_info(&slot_cur)) return -1; get_max_bus_speed(slot_cur); @@ -539,8 +539,8 @@ static int __init init_ops(void) debug("SLOT_PRESENT = %x\n", SLOT_PRESENT(slot_cur->status)); debug("SLOT_LATCH = %x\n", SLOT_LATCH(slot_cur->status)); - if ((SLOT_PWRGD(slot_cur->status)) && - !(SLOT_PRESENT(slot_cur->status)) && + if ((SLOT_PWRGD(slot_cur->status)) && + !(SLOT_PRESENT(slot_cur->status)) && !(SLOT_LATCH(slot_cur->status))) { debug("BEFORE POWER OFF COMMAND\n"); rc = power_off(slot_cur); @@ -581,13 +581,13 @@ static int validate(struct slot *slot_cur, int opn) switch (opn) { case ENABLE: - if (!(SLOT_PWRGD(slot_cur->status)) && - (SLOT_PRESENT(slot_cur->status)) && + if (!(SLOT_PWRGD(slot_cur->status)) && + (SLOT_PRESENT(slot_cur->status)) && !(SLOT_LATCH(slot_cur->status))) return 0; break; case DISABLE: - if ((SLOT_PWRGD(slot_cur->status)) && + if ((SLOT_PWRGD(slot_cur->status)) && (SLOT_PRESENT(slot_cur->status)) && !(SLOT_LATCH(slot_cur->status))) return 0; @@ -617,7 +617,7 @@ int ibmphp_update_slot_info(struct slot *slot_cur) err("out of system memory\n"); return -ENOMEM; } - + info->power_status = SLOT_PWRGD(slot_cur->status); info->attention_status = SLOT_ATTN(slot_cur->status, slot_cur->ext_status); @@ -638,7 +638,7 @@ int ibmphp_update_slot_info(struct slot *slot_cur) case BUS_SPEED_33: break; case BUS_SPEED_66: - if (mode == BUS_MODE_PCIX) + if (mode == BUS_MODE_PCIX) bus_speed += 0x01; else if (mode == BUS_MODE_PCI) ; @@ -654,8 +654,8 @@ int ibmphp_update_slot_info(struct slot *slot_cur) } bus->cur_bus_speed = bus_speed; - // To do: bus_names - + // To do: bus_names + rc = pci_hp_change_slot_info(slot_cur->hotplug_slot, info); kfree(info); return rc; @@ -729,8 +729,8 @@ static void ibm_unconfigure_device(struct pci_func *func) } /* - * The following function is to fix kernel bug regarding - * getting bus entries, here we manually add those primary + * The following function is to fix kernel bug regarding + * getting bus entries, here we manually add those primary * bus entries to kernel bus structure whenever apply */ static u8 bus_structure_fixup(u8 busno) @@ -814,7 +814,7 @@ static int ibm_configure_device(struct pci_func *func) } /******************************************************* - * Returns whether the bus is empty or not + * Returns whether the bus is empty or not *******************************************************/ static int is_bus_empty(struct slot * slot_cur) { @@ -842,7 +842,7 @@ static int is_bus_empty(struct slot * slot_cur) } /*********************************************************** - * If the HPC permits and the bus currently empty, tries to set the + * If the HPC permits and the bus currently empty, tries to set the * bus speed and mode at the maximum card and bus capability * Parameters: slot * Returns: bus is set (0) or error code @@ -856,7 +856,7 @@ static int set_bus(struct slot * slot_cur) static struct pci_device_id ciobx[] = { { PCI_DEVICE(PCI_VENDOR_ID_SERVERWORKS, 0x0101) }, { }, - }; + }; debug("%s - entry slot # %d\n", __func__, slot_cur->number); if (SET_BUS_STATUS(slot_cur->ctrl) && is_bus_empty(slot_cur)) { @@ -877,7 +877,7 @@ static int set_bus(struct slot * slot_cur) else if (!SLOT_BUS_MODE(slot_cur->ext_status)) /* if max slot/bus capability is 66 pci and there's no bus mode mismatch, then - the adapter supports 66 pci */ + the adapter supports 66 pci */ cmd = HPC_BUS_66CONVMODE; else cmd = HPC_BUS_33CONVMODE; @@ -930,7 +930,7 @@ static int set_bus(struct slot * slot_cur) return -EIO; } } - /* This is for x440, once Brandon fixes the firmware, + /* This is for x440, once Brandon fixes the firmware, will not need this delay */ msleep(1000); debug("%s -Exit\n", __func__); @@ -938,9 +938,9 @@ static int set_bus(struct slot * slot_cur) } /* This routine checks the bus limitations that the slot is on from the BIOS. - * This is used in deciding whether or not to power up the slot. + * This is used in deciding whether or not to power up the slot. * (electrical/spec limitations. For example, >1 133 MHz or >2 66 PCI cards on - * same bus) + * same bus) * Parameters: slot * Returns: 0 = no limitations, -EINVAL = exceeded limitations on the bus */ @@ -986,7 +986,7 @@ static int check_limitations(struct slot *slot_cur) static inline void print_card_capability(struct slot *slot_cur) { info("capability of the card is "); - if ((slot_cur->ext_status & CARD_INFO) == PCIX133) + if ((slot_cur->ext_status & CARD_INFO) == PCIX133) info(" 133 MHz PCI-X\n"); else if ((slot_cur->ext_status & CARD_INFO) == PCIX66) info(" 66 MHz PCI-X\n"); @@ -1020,7 +1020,7 @@ static int enable_slot(struct hotplug_slot *hs) } attn_LED_blink(slot_cur); - + rc = set_bus(slot_cur); if (rc) { err("was not able to set the bus\n"); @@ -1082,7 +1082,7 @@ static int enable_slot(struct hotplug_slot *hs) rc = slot_update(&slot_cur); if (rc) goto error_power; - + rc = -EINVAL; if (SLOT_POWER(slot_cur->status) && !(SLOT_PWRGD(slot_cur->status))) { err("power fault occurred trying to power up...\n"); @@ -1093,7 +1093,7 @@ static int enable_slot(struct hotplug_slot *hs) "speed and card capability\n"); print_card_capability(slot_cur); goto error_power; - } + } /* Don't think this case will happen after above checks... * but just in case, for paranoia sake */ if (!(SLOT_POWER(slot_cur->status))) { @@ -1144,7 +1144,7 @@ static int enable_slot(struct hotplug_slot *hs) ibmphp_print_test(); rc = ibmphp_update_slot_info(slot_cur); exit: - ibmphp_unlock_operations(); + ibmphp_unlock_operations(); return rc; error_nopower: @@ -1180,7 +1180,7 @@ static int ibmphp_disable_slot(struct hotplug_slot *hotplug_slot) { struct slot *slot = hotplug_slot->private; int rc; - + ibmphp_lock_operations(); rc = ibmphp_do_disable_slot(slot); ibmphp_unlock_operations(); @@ -1192,12 +1192,12 @@ int ibmphp_do_disable_slot(struct slot *slot_cur) int rc; u8 flag; - debug("DISABLING SLOT...\n"); - + debug("DISABLING SLOT...\n"); + if ((slot_cur == NULL) || (slot_cur->ctrl == NULL)) { return -ENODEV; } - + flag = slot_cur->flag; slot_cur->flag = 1; @@ -1210,7 +1210,7 @@ int ibmphp_do_disable_slot(struct slot *slot_cur) attn_LED_blink(slot_cur); if (slot_cur->func == NULL) { - /* We need this for fncs's that were there on bootup */ + /* We need this for functions that were there on bootup */ slot_cur->func = kzalloc(sizeof(struct pci_func), GFP_KERNEL); if (!slot_cur->func) { err("out of system memory\n"); @@ -1222,12 +1222,13 @@ int ibmphp_do_disable_slot(struct slot *slot_cur) } ibm_unconfigure_device(slot_cur->func); - - /* If we got here from latch suddenly opening on operating card or - a power fault, there's no power to the card, so cannot - read from it to determine what resources it occupied. This operation - is forbidden anyhow. The best we can do is remove it from kernel - lists at least */ + + /* + * If we got here from latch suddenly opening on operating card or + * a power fault, there's no power to the card, so cannot + * read from it to determine what resources it occupied. This operation + * is forbidden anyhow. The best we can do is remove it from kernel + * lists at least */ if (!flag) { attn_off(slot_cur); @@ -1264,7 +1265,7 @@ error: rc = -EFAULT; goto exit; } - if (flag) + if (flag) ibmphp_update_slot_info(slot_cur); goto exit; } @@ -1339,7 +1340,7 @@ static int __init ibmphp_init(void) debug("AFTER Resource & EBDA INITIALIZATIONS\n"); max_slots = get_max_slots(); - + if ((rc = ibmphp_register_pci())) goto error; diff --git a/drivers/pci/hotplug/ibmphp_ebda.c b/drivers/pci/hotplug/ibmphp_ebda.c index 9df78bc..bd04415 100644 --- a/drivers/pci/hotplug/ibmphp_ebda.c +++ b/drivers/pci/hotplug/ibmphp_ebda.c @@ -123,7 +123,7 @@ static struct ebda_pci_rsrc *alloc_ebda_pci_rsrc (void) static void __init print_bus_info (void) { struct bus_info *ptr; - + list_for_each_entry(ptr, &bus_info_head, bus_info_list) { debug ("%s - slot_min = %x\n", __func__, ptr->slot_min); debug ("%s - slot_max = %x\n", __func__, ptr->slot_max); @@ -131,7 +131,7 @@ static void __init print_bus_info (void) debug ("%s - bus# = %x\n", __func__, ptr->busno); debug ("%s - current_speed = %x\n", __func__, ptr->current_speed); debug ("%s - controller_id = %x\n", __func__, ptr->controller_id); - + debug ("%s - slots_at_33_conv = %x\n", __func__, ptr->slots_at_33_conv); debug ("%s - slots_at_66_conv = %x\n", __func__, ptr->slots_at_66_conv); debug ("%s - slots_at_66_pcix = %x\n", __func__, ptr->slots_at_66_pcix); @@ -144,7 +144,7 @@ static void __init print_bus_info (void) static void print_lo_info (void) { struct rio_detail *ptr; - debug ("print_lo_info ----\n"); + debug ("print_lo_info ----\n"); list_for_each_entry(ptr, &rio_lo_head, rio_detail_list) { debug ("%s - rio_node_id = %x\n", __func__, ptr->rio_node_id); debug ("%s - rio_type = %x\n", __func__, ptr->rio_type); @@ -176,7 +176,7 @@ static void __init print_ebda_pci_rsrc (void) struct ebda_pci_rsrc *ptr; list_for_each_entry(ptr, &ibmphp_ebda_pci_rsrc_head, ebda_pci_rsrc_list) { - debug ("%s - rsrc type: %x bus#: %x dev_func: %x start addr: %x end addr: %x\n", + debug ("%s - rsrc type: %x bus#: %x dev_func: %x start addr: %x end addr: %x\n", __func__, ptr->rsrc_type ,ptr->bus_num, ptr->dev_fun,ptr->start_addr, ptr->end_addr); } } @@ -259,7 +259,7 @@ int __init ibmphp_access_ebda (void) ebda_seg = readw (io_mem); iounmap (io_mem); debug ("returned ebda segment: %x\n", ebda_seg); - + io_mem = ioremap(ebda_seg<<4, 1); if (!io_mem) return -ENOMEM; @@ -310,7 +310,7 @@ int __init ibmphp_access_ebda (void) re = readw (io_mem + sub_addr); /* next sub blk */ sub_addr += 2; - rc_id = readw (io_mem + sub_addr); /* sub blk id */ + rc_id = readw (io_mem + sub_addr); /* sub blk id */ sub_addr += 2; if (rc_id != 0x5243) @@ -330,7 +330,7 @@ int __init ibmphp_access_ebda (void) debug ("info about hpc descriptor---\n"); debug ("hot blk format: %x\n", format); debug ("num of controller: %x\n", num_ctlrs); - debug ("offset of hpc data structure enteries: %x\n ", sub_addr); + debug ("offset of hpc data structure entries: %x\n ", sub_addr); sub_addr = base + re; /* re sub blk */ /* FIXME: rc is never used/checked */ @@ -359,7 +359,7 @@ int __init ibmphp_access_ebda (void) debug ("info about rsrc descriptor---\n"); debug ("format: %x\n", format); debug ("num of rsrc: %x\n", num_entries); - debug ("offset of rsrc data structure enteries: %x\n ", sub_addr); + debug ("offset of rsrc data structure entries: %x\n ", sub_addr); hs_complete = 1; } else { @@ -376,7 +376,7 @@ int __init ibmphp_access_ebda (void) rio_table_ptr->scal_count = readb (io_mem + offset + 1); rio_table_ptr->riodev_count = readb (io_mem + offset + 2); rio_table_ptr->offset = offset +3 ; - + debug("info about rio table hdr ---\n"); debug("ver_num: %x\nscal_count: %x\nriodev_count: %x\noffset of rio table: %x\n ", rio_table_ptr->ver_num, rio_table_ptr->scal_count, @@ -440,12 +440,12 @@ static int __init ebda_rio_table (void) rio_detail_ptr->chassis_num = readb (io_mem + offset + 14); // debug ("rio_node_id: %x\nbbar: %x\nrio_type: %x\nowner_id: %x\nport0_node: %x\nport0_port: %x\nport1_node: %x\nport1_port: %x\nfirst_slot_num: %x\nstatus: %x\n", rio_detail_ptr->rio_node_id, rio_detail_ptr->bbar, rio_detail_ptr->rio_type, rio_detail_ptr->owner_id, rio_detail_ptr->port0_node_connect, rio_detail_ptr->port0_port_connect, rio_detail_ptr->port1_node_connect, rio_detail_ptr->port1_port_connect, rio_detail_ptr->first_slot_num, rio_detail_ptr->status); //create linked list of chassis - if (rio_detail_ptr->rio_type == 4 || rio_detail_ptr->rio_type == 5) + if (rio_detail_ptr->rio_type == 4 || rio_detail_ptr->rio_type == 5) list_add (&rio_detail_ptr->rio_detail_list, &rio_vg_head); - //create linked list of expansion box - else if (rio_detail_ptr->rio_type == 6 || rio_detail_ptr->rio_type == 7) + //create linked list of expansion box + else if (rio_detail_ptr->rio_type == 6 || rio_detail_ptr->rio_type == 7) list_add (&rio_detail_ptr->rio_detail_list, &rio_lo_head); - else + else // not in my concern kfree (rio_detail_ptr); offset += 15; @@ -456,7 +456,7 @@ static int __init ebda_rio_table (void) } /* - * reorganizing linked list of chassis + * reorganizing linked list of chassis */ static struct opt_rio *search_opt_vg (u8 chassis_num) { @@ -464,7 +464,7 @@ static struct opt_rio *search_opt_vg (u8 chassis_num) list_for_each_entry(ptr, &opt_vg_head, opt_rio_list) { if (ptr->chassis_num == chassis_num) return ptr; - } + } return NULL; } @@ -472,7 +472,7 @@ static int __init combine_wpg_for_chassis (void) { struct opt_rio *opt_rio_ptr = NULL; struct rio_detail *rio_detail_ptr = NULL; - + list_for_each_entry(rio_detail_ptr, &rio_vg_head, rio_detail_list) { opt_rio_ptr = search_opt_vg (rio_detail_ptr->chassis_num); if (!opt_rio_ptr) { @@ -484,14 +484,14 @@ static int __init combine_wpg_for_chassis (void) opt_rio_ptr->first_slot_num = rio_detail_ptr->first_slot_num; opt_rio_ptr->middle_num = rio_detail_ptr->first_slot_num; list_add (&opt_rio_ptr->opt_rio_list, &opt_vg_head); - } else { + } else { opt_rio_ptr->first_slot_num = min (opt_rio_ptr->first_slot_num, rio_detail_ptr->first_slot_num); opt_rio_ptr->middle_num = max (opt_rio_ptr->middle_num, rio_detail_ptr->first_slot_num); - } + } } print_opt_vg (); - return 0; -} + return 0; +} /* * reorganizing linked list of expansion box @@ -502,7 +502,7 @@ static struct opt_rio_lo *search_opt_lo (u8 chassis_num) list_for_each_entry(ptr, &opt_lo_head, opt_rio_lo_list) { if (ptr->chassis_num == chassis_num) return ptr; - } + } return NULL; } @@ -510,7 +510,7 @@ static int combine_wpg_for_expansion (void) { struct opt_rio_lo *opt_rio_lo_ptr = NULL; struct rio_detail *rio_detail_ptr = NULL; - + list_for_each_entry(rio_detail_ptr, &rio_lo_head, rio_detail_list) { opt_rio_lo_ptr = search_opt_lo (rio_detail_ptr->chassis_num); if (!opt_rio_lo_ptr) { @@ -522,22 +522,22 @@ static int combine_wpg_for_expansion (void) opt_rio_lo_ptr->first_slot_num = rio_detail_ptr->first_slot_num; opt_rio_lo_ptr->middle_num = rio_detail_ptr->first_slot_num; opt_rio_lo_ptr->pack_count = 1; - + list_add (&opt_rio_lo_ptr->opt_rio_lo_list, &opt_lo_head); - } else { + } else { opt_rio_lo_ptr->first_slot_num = min (opt_rio_lo_ptr->first_slot_num, rio_detail_ptr->first_slot_num); opt_rio_lo_ptr->middle_num = max (opt_rio_lo_ptr->middle_num, rio_detail_ptr->first_slot_num); opt_rio_lo_ptr->pack_count = 2; - } + } } - return 0; + return 0; } - + /* Since we don't know the max slot number per each chassis, hence go * through the list of all chassis to find out the range - * Arguments: slot_num, 1st slot number of the chassis we think we are on, - * var (0 = chassis, 1 = expansion box) + * Arguments: slot_num, 1st slot number of the chassis we think we are on, + * var (0 = chassis, 1 = expansion box) */ static int first_slot_num (u8 slot_num, u8 first_slot, u8 var) { @@ -547,7 +547,7 @@ static int first_slot_num (u8 slot_num, u8 first_slot, u8 var) if (!var) { list_for_each_entry(opt_vg_ptr, &opt_vg_head, opt_rio_list) { - if ((first_slot < opt_vg_ptr->first_slot_num) && (slot_num >= opt_vg_ptr->first_slot_num)) { + if ((first_slot < opt_vg_ptr->first_slot_num) && (slot_num >= opt_vg_ptr->first_slot_num)) { rc = -ENODEV; break; } @@ -569,7 +569,7 @@ static struct opt_rio_lo * find_rxe_num (u8 slot_num) list_for_each_entry(opt_lo_ptr, &opt_lo_head, opt_rio_lo_list) { //check to see if this slot_num belongs to expansion box - if ((slot_num >= opt_lo_ptr->first_slot_num) && (!first_slot_num (slot_num, opt_lo_ptr->first_slot_num, 1))) + if ((slot_num >= opt_lo_ptr->first_slot_num) && (!first_slot_num (slot_num, opt_lo_ptr->first_slot_num, 1))) return opt_lo_ptr; } return NULL; @@ -580,8 +580,8 @@ static struct opt_rio * find_chassis_num (u8 slot_num) struct opt_rio *opt_vg_ptr; list_for_each_entry(opt_vg_ptr, &opt_vg_head, opt_rio_list) { - //check to see if this slot_num belongs to chassis - if ((slot_num >= opt_vg_ptr->first_slot_num) && (!first_slot_num (slot_num, opt_vg_ptr->first_slot_num, 0))) + //check to see if this slot_num belongs to chassis + if ((slot_num >= opt_vg_ptr->first_slot_num) && (!first_slot_num (slot_num, opt_vg_ptr->first_slot_num, 0))) return opt_vg_ptr; } return NULL; @@ -594,13 +594,13 @@ static u8 calculate_first_slot (u8 slot_num) { u8 first_slot = 1; struct slot * slot_cur; - + list_for_each_entry(slot_cur, &ibmphp_slot_head, ibm_slot_list) { if (slot_cur->ctrl) { - if ((slot_cur->ctrl->ctlr_type != 4) && (slot_cur->ctrl->ending_slot_num > first_slot) && (slot_num > slot_cur->ctrl->ending_slot_num)) + if ((slot_cur->ctrl->ctlr_type != 4) && (slot_cur->ctrl->ending_slot_num > first_slot) && (slot_num > slot_cur->ctrl->ending_slot_num)) first_slot = slot_cur->ctrl->ending_slot_num; } - } + } return first_slot + 1; } @@ -622,11 +622,11 @@ static char *create_file_name (struct slot * slot_cur) err ("Structure passed is empty\n"); return NULL; } - + slot_num = slot_cur->number; memset (str, 0, sizeof(str)); - + if (rio_table_ptr) { if (rio_table_ptr->ver_num == 3) { opt_vg_ptr = find_chassis_num (slot_num); @@ -660,7 +660,7 @@ static char *create_file_name (struct slot * slot_cur) /* if both NULL and we DO have correct RIO table in BIOS */ return NULL; } - } + } if (!flag) { if (slot_cur->ctrl->ctlr_type == 4) { first_slot = calculate_first_slot (slot_num); @@ -798,7 +798,7 @@ static int __init ebda_rsrc_controller (void) slot_ptr->ctl_index = readb (io_mem + addr_slot + 2*slot_num); slot_ptr->slot_cap = readb (io_mem + addr_slot + 3*slot_num); - // create bus_info lined list --- if only one slot per bus: slot_min = slot_max + // create bus_info lined list --- if only one slot per bus: slot_min = slot_max bus_info_ptr2 = ibmphp_find_same_bus_num (slot_ptr->slot_bus_num); if (!bus_info_ptr2) { @@ -814,9 +814,9 @@ static int __init ebda_rsrc_controller (void) bus_info_ptr1->index = bus_index++; bus_info_ptr1->current_speed = 0xff; bus_info_ptr1->current_bus_mode = 0xff; - + bus_info_ptr1->controller_id = hpc_ptr->ctlr_id; - + list_add_tail (&bus_info_ptr1->bus_info_list, &bus_info_head); } else { @@ -851,7 +851,7 @@ static int __init ebda_rsrc_controller (void) bus_info_ptr2->slots_at_66_conv = bus_ptr->slots_at_66_conv; bus_info_ptr2->slots_at_66_pcix = bus_ptr->slots_at_66_pcix; bus_info_ptr2->slots_at_100_pcix = bus_ptr->slots_at_100_pcix; - bus_info_ptr2->slots_at_133_pcix = bus_ptr->slots_at_133_pcix; + bus_info_ptr2->slots_at_133_pcix = bus_ptr->slots_at_133_pcix; } bus_ptr++; } @@ -864,7 +864,7 @@ static int __init ebda_rsrc_controller (void) hpc_ptr->u.pci_ctlr.dev_fun = readb (io_mem + addr + 1); hpc_ptr->irq = readb (io_mem + addr + 2); addr += 3; - debug ("ctrl bus = %x, ctlr devfun = %x, irq = %x\n", + debug ("ctrl bus = %x, ctlr devfun = %x, irq = %x\n", hpc_ptr->u.pci_ctlr.bus, hpc_ptr->u.pci_ctlr.dev_fun, hpc_ptr->irq); break; @@ -932,7 +932,7 @@ static int __init ebda_rsrc_controller (void) tmp_slot->supported_speed = 2; else if ((hpc_ptr->slots[index].slot_cap & EBDA_SLOT_66_MAX) == EBDA_SLOT_66_MAX) tmp_slot->supported_speed = 1; - + if ((hpc_ptr->slots[index].slot_cap & EBDA_SLOT_PCIX_CAP) == EBDA_SLOT_PCIX_CAP) tmp_slot->supported_bus_mode = 1; else @@ -1000,7 +1000,7 @@ error_no_hpc: return rc; } -/* +/* * map info (bus, devfun, start addr, end addr..) of i/o, memory, * pfm from the physical addr to a list of resource. */ @@ -1057,7 +1057,7 @@ static int __init ebda_rsrc_rsrc (void) addr += 10; debug ("rsrc from mem or pfm ---\n"); - debug ("rsrc type: %x bus#: %x dev_func: %x start addr: %x end addr: %x\n", + debug ("rsrc type: %x bus#: %x dev_func: %x start addr: %x end addr: %x\n", rsrc_ptr->rsrc_type, rsrc_ptr->bus_num, rsrc_ptr->dev_fun, rsrc_ptr->start_addr, rsrc_ptr->end_addr); list_add (&rsrc_ptr->ebda_pci_rsrc_list, &ibmphp_ebda_pci_rsrc_head); @@ -1096,7 +1096,7 @@ struct bus_info *ibmphp_find_same_bus_num (u32 num) struct bus_info *ptr; list_for_each_entry(ptr, &bus_info_head, bus_info_list) { - if (ptr->busno == num) + if (ptr->busno == num) return ptr; } return NULL; @@ -1110,7 +1110,7 @@ int ibmphp_get_bus_index (u8 num) struct bus_info *ptr; list_for_each_entry(ptr, &bus_info_head, bus_info_list) { - if (ptr->busno == num) + if (ptr->busno == num) return ptr->index; } return -ENODEV; @@ -1168,7 +1168,7 @@ static struct pci_device_id id_table[] = { .subdevice = HPC_SUBSYSTEM_ID, .class = ((PCI_CLASS_SYSTEM_PCI_HOTPLUG << 8) | 0x00), }, {} -}; +}; MODULE_DEVICE_TABLE(pci, id_table); @@ -1197,7 +1197,7 @@ static int ibmphp_probe (struct pci_dev * dev, const struct pci_device_id *ids) struct controller *ctrl; debug ("inside ibmphp_probe\n"); - + list_for_each_entry(ctrl, &ebda_hpc_head, ebda_hpc_list) { if (ctrl->ctlr_type == 1) { if ((dev->devfn == ctrl->u.pci_ctlr.dev_fun) && (dev->bus->number == ctrl->u.pci_ctlr.bus)) { @@ -1210,4 +1210,3 @@ static int ibmphp_probe (struct pci_dev * dev, const struct pci_device_id *ids) } return -ENODEV; } - diff --git a/drivers/pci/hotplug/ibmphp_hpc.c b/drivers/pci/hotplug/ibmphp_hpc.c index f59ed30..5fc7a08 100644 --- a/drivers/pci/hotplug/ibmphp_hpc.c +++ b/drivers/pci/hotplug/ibmphp_hpc.c @@ -258,7 +258,7 @@ static u8 i2c_ctrl_write (struct controller *ctlr_ptr, void __iomem *WPGBbar, u8 { u8 rc; void __iomem *wpg_addr; // base addr + offset - unsigned long wpg_data; // data to/from WPG LOHI format + unsigned long wpg_data; // data to/from WPG LOHI format unsigned long ultemp; unsigned long data; // actual data HILO format int i; @@ -351,7 +351,7 @@ static u8 i2c_ctrl_write (struct controller *ctlr_ptr, void __iomem *WPGBbar, u8 } //------------------------------------------------------------ -// Read from ISA type HPC +// Read from ISA type HPC //------------------------------------------------------------ static u8 isa_ctrl_read (struct controller *ctlr_ptr, u8 offset) { @@ -372,7 +372,7 @@ static void isa_ctrl_write (struct controller *ctlr_ptr, u8 offset, u8 data) { u16 start_address; u16 port_address; - + start_address = ctlr_ptr->u.isa_ctlr.io_start; port_address = start_address + (u16) offset; outb (data, port_address); @@ -656,11 +656,11 @@ int ibmphp_hpc_readslot (struct slot * pslot, u8 cmd, u8 * pstatus) //-------------------------------------------------------------------- // cleanup //-------------------------------------------------------------------- - + // remove physical to logical address mapping if ((ctlr_ptr->ctlr_type == 2) || (ctlr_ptr->ctlr_type == 4)) iounmap (wpg_bbar); - + free_hpc_access (); debug_polling ("%s - Exit rc[%d]\n", __func__, rc); @@ -835,7 +835,7 @@ static int poll_hpc(void *data) down (&semOperations); switch (poll_state) { - case POLL_LATCH_REGISTER: + case POLL_LATCH_REGISTER: oldlatchlow = curlatchlow; ctrl_count = 0x00; list_for_each (pslotlist, &ibmphp_slot_head) { @@ -892,16 +892,16 @@ static int poll_hpc(void *data) if (kthread_should_stop()) goto out_sleep; - + down (&semOperations); - + if (poll_count >= POLL_LATCH_CNT) { poll_count = 0; poll_state = POLL_SLOTS; } else poll_state = POLL_LATCH_REGISTER; break; - } + } /* give up the hardware semaphore */ up (&semOperations); /* sleep for a short time just for good measure */ @@ -958,7 +958,7 @@ static int process_changeinstatus (struct slot *pslot, struct slot *poldslot) // bit 5 - HPC_SLOT_PWRGD if ((pslot->status & 0x20) != (poldslot->status & 0x20)) // OFF -> ON: ignore, ON -> OFF: disable slot - if ((poldslot->status & 0x20) && (SLOT_CONNECT (poldslot->status) == HPC_SLOT_CONNECTED) && (SLOT_PRESENT (poldslot->status))) + if ((poldslot->status & 0x20) && (SLOT_CONNECT (poldslot->status) == HPC_SLOT_CONNECTED) && (SLOT_PRESENT (poldslot->status))) disable = 1; // bit 6 - HPC_SLOT_BUS_SPEED @@ -980,7 +980,7 @@ static int process_changeinstatus (struct slot *pslot, struct slot *poldslot) pslot->status &= ~HPC_SLOT_POWER; } } - // CLOSE -> OPEN + // CLOSE -> OPEN else if ((SLOT_PWRGD (poldslot->status) == HPC_SLOT_PWRGD_GOOD) && (SLOT_CONNECT (poldslot->status) == HPC_SLOT_CONNECTED) && (SLOT_PRESENT (poldslot->status))) { disable = 1; @@ -1075,7 +1075,7 @@ void __exit ibmphp_hpc_stop_poll_thread (void) debug ("before locking operations \n"); ibmphp_lock_operations (); debug ("after locking operations \n"); - + // wait for poll thread to exit debug ("before sem_exit down \n"); down (&sem_exit); diff --git a/drivers/pci/hotplug/ibmphp_pci.c b/drivers/pci/hotplug/ibmphp_pci.c index c60f5f3..639ea3a 100644 --- a/drivers/pci/hotplug/ibmphp_pci.c +++ b/drivers/pci/hotplug/ibmphp_pci.c @@ -1,8 +1,8 @@ /* * IBM Hot Plug Controller Driver - * + * * Written By: Irene Zubarev, IBM Corporation - * + * * Copyright (C) 2001 Greg Kroah-Hartman (greg@kroah.com) * Copyright (C) 2001,2002 IBM Corp. * @@ -42,7 +42,7 @@ static u8 find_sec_number (u8 primary_busno, u8 slotno); /* * NOTE..... If BIOS doesn't provide default routing, we assign: - * 9 for SCSI, 10 for LAN adapters, and 11 for everything else. + * 9 for SCSI, 10 for LAN adapters, and 11 for everything else. * If adapter is bridged, then we assign 11 to it and devices behind it. * We also assign the same irq numbers for multi function devices. * These are PIC mode, so shouldn't matter n.e.ways (hopefully) @@ -71,11 +71,11 @@ static void assign_alt_irq (struct pci_func * cur_func, u8 class_code) * Configures the device to be added (will allocate needed resources if it * can), the device can be a bridge or a regular pci device, can also be * multi-functional - * + * * Input: function to be added - * + * * TO DO: The error case with Multifunction device or multi function bridge, - * if there is an error, will need to go through all previous functions and + * if there is an error, will need to go through all previous functions and * unconfigure....or can add some code into unconfigure_card.... */ int ibmphp_configure_card (struct pci_func *func, u8 slotno) @@ -98,7 +98,7 @@ int ibmphp_configure_card (struct pci_func *func, u8 slotno) cur_func = func; /* We only get bus and device from IRQ routing table. So at this point, - * func->busno is correct, and func->device contains only device (at the 5 + * func->busno is correct, and func->device contains only device (at the 5 * highest bits) */ @@ -151,7 +151,7 @@ int ibmphp_configure_card (struct pci_func *func, u8 slotno) cur_func->device, cur_func->busno); cleanup_count = 6; goto error; - } + } cur_func->next = NULL; function = 0x8; break; @@ -339,7 +339,7 @@ error: } /* - * This function configures the pci BARs of a single device. + * This function configures the pci BARs of a single device. * Input: pointer to the pci_func * Output: configured PCI, 0, or error */ @@ -371,17 +371,17 @@ static int configure_device (struct pci_func *func) for (count = 0; address[count]; count++) { /* for 6 BARs */ - /* not sure if i need this. per scott, said maybe need smth like this + /* not sure if i need this. per scott, said maybe need * something like this if devices don't adhere 100% to the spec, so don't want to write to the reserved bits - pcibios_read_config_byte(cur_func->busno, cur_func->device, + pcibios_read_config_byte(cur_func->busno, cur_func->device, PCI_BASE_ADDRESS_0 + 4 * count, &tmp); if (tmp & 0x01) // IO - pcibios_write_config_dword(cur_func->busno, cur_func->device, + pcibios_write_config_dword(cur_func->busno, cur_func->device, PCI_BASE_ADDRESS_0 + 4 * count, 0xFFFFFFFD); else // Memory - pcibios_write_config_dword(cur_func->busno, cur_func->device, + pcibios_write_config_dword(cur_func->busno, cur_func->device, PCI_BASE_ADDRESS_0 + 4 * count, 0xFFFFFFFF); */ pci_bus_write_config_dword (ibmphp_pci_bus, devfn, address[count], 0xFFFFFFFF); @@ -421,8 +421,8 @@ static int configure_device (struct pci_func *func) return -EIO; } pci_bus_write_config_dword (ibmphp_pci_bus, devfn, address[count], func->io[count]->start); - - /* _______________This is for debugging purposes only_____________________ */ + + /* _______________This is for debugging purposes only_____________________ */ debug ("b4 writing, the IO address is %x\n", func->io[count]->start); pci_bus_read_config_dword (ibmphp_pci_bus, devfn, address[count], &bar[count]); debug ("after writing.... the start address is %x\n", bar[count]); @@ -484,7 +484,7 @@ static int configure_device (struct pci_func *func) pci_bus_write_config_dword (ibmphp_pci_bus, devfn, address[count], func->pfmem[count]->start); - /*_______________This is for debugging purposes only______________________________*/ + /*_______________This is for debugging purposes only______________________________*/ debug ("b4 writing, start address is %x\n", func->pfmem[count]->start); pci_bus_read_config_dword (ibmphp_pci_bus, devfn, address[count], &bar[count]); debug ("after writing, start address is %x\n", bar[count]); @@ -559,7 +559,7 @@ static int configure_device (struct pci_func *func) /****************************************************************************** * This routine configures a PCI-2-PCI bridge and the functions behind it * Parameters: pci_func - * Returns: + * Returns: ******************************************************************************/ static int configure_bridge (struct pci_func **func_passed, u8 slotno) { @@ -622,7 +622,7 @@ static int configure_bridge (struct pci_func **func_passed, u8 slotno) debug ("AFTER FIND_SEC_NUMBER, func->busno IS %x\n", func->busno); pci_bus_write_config_byte (ibmphp_pci_bus, devfn, PCI_SECONDARY_BUS, sec_number); - + /* __________________For debugging purposes only __________________________________ pci_bus_read_config_byte (ibmphp_pci_bus, devfn, PCI_SECONDARY_BUS, &sec_number); debug ("sec_number after write/read is %x\n", sec_number); @@ -644,7 +644,7 @@ static int configure_bridge (struct pci_func **func_passed, u8 slotno) /* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - !!!!!!!!!!!!!!!NEED TO ADD!!! FAST BACK-TO-BACK ENABLE!!!!!!!!!!!!!!!!!!!! + !!!!!!!!!!!!!!!NEED TO ADD!!! FAST BACK-TO-BACK ENABLE!!!!!!!!!!!!!!!!!!!! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!*/ @@ -670,7 +670,7 @@ static int configure_bridge (struct pci_func **func_passed, u8 slotno) debug ("len[count] in IO = %x\n", len[count]); bus_io[count] = kzalloc(sizeof(struct resource_node), GFP_KERNEL); - + if (!bus_io[count]) { err ("out of system memory\n"); retval = -ENOMEM; @@ -735,7 +735,7 @@ static int configure_bridge (struct pci_func **func_passed, u8 slotno) ibmphp_add_pfmem_from_mem (bus_pfmem[count]); func->pfmem[count] = bus_pfmem[count]; } else { - err ("cannot allocate requested pfmem for bus %x, device %x, len %x\n", + err ("cannot allocate requested pfmem for bus %x, device %x, len %x\n", func->busno, func->device, len[count]); kfree (mem_tmp); kfree (bus_pfmem[count]); @@ -805,7 +805,7 @@ static int configure_bridge (struct pci_func **func_passed, u8 slotno) debug ("amount_needed->mem = %x\n", amount_needed->mem); debug ("amount_needed->pfmem = %x\n", amount_needed->pfmem); - if (amount_needed->not_correct) { + if (amount_needed->not_correct) { debug ("amount_needed is not correct\n"); for (count = 0; address[count]; count++) { /* for 2 BARs */ @@ -830,7 +830,7 @@ static int configure_bridge (struct pci_func **func_passed, u8 slotno) } else { debug ("it wants %x IO behind the bridge\n", amount_needed->io); io = kzalloc(sizeof(*io), GFP_KERNEL); - + if (!io) { err ("out of system memory\n"); retval = -ENOMEM; @@ -959,7 +959,7 @@ static int configure_bridge (struct pci_func **func_passed, u8 slotno) if (bus->noIORanges) { pci_bus_write_config_byte (ibmphp_pci_bus, devfn, PCI_IO_BASE, 0x00 | bus->rangeIO->start >> 8); - pci_bus_write_config_byte (ibmphp_pci_bus, devfn, PCI_IO_LIMIT, 0x00 | bus->rangeIO->end >> 8); + pci_bus_write_config_byte (ibmphp_pci_bus, devfn, PCI_IO_LIMIT, 0x00 | bus->rangeIO->end >> 8); /* _______________This is for debugging purposes only ____________________ pci_bus_read_config_byte (ibmphp_pci_bus, devfn, PCI_IO_BASE, &temp); @@ -980,7 +980,7 @@ static int configure_bridge (struct pci_func **func_passed, u8 slotno) if (bus->noMemRanges) { pci_bus_write_config_word (ibmphp_pci_bus, devfn, PCI_MEMORY_BASE, 0x0000 | bus->rangeMem->start >> 16); pci_bus_write_config_word (ibmphp_pci_bus, devfn, PCI_MEMORY_LIMIT, 0x0000 | bus->rangeMem->end >> 16); - + /* ____________________This is for debugging purposes only ________________________ pci_bus_read_config_word (ibmphp_pci_bus, devfn, PCI_MEMORY_BASE, &temp); debug ("mem_base = %x\n", (temp & PCI_MEMORY_RANGE_TYPE_MASK) << 16); @@ -1017,7 +1017,7 @@ static int configure_bridge (struct pci_func **func_passed, u8 slotno) pci_bus_read_config_byte (ibmphp_pci_bus, devfn, PCI_INTERRUPT_PIN, &irq); if ((irq > 0x00) && (irq < 0x05)) pci_bus_write_config_byte (ibmphp_pci_bus, devfn, PCI_INTERRUPT_LINE, func->irq[irq - 1]); - /* + /* pci_bus_write_config_byte (ibmphp_pci_bus, devfn, PCI_BRIDGE_CONTROL, ctrl); pci_bus_write_config_byte (ibmphp_pci_bus, devfn, PCI_BRIDGE_CONTROL, PCI_BRIDGE_CTL_PARITY); pci_bus_write_config_byte (ibmphp_pci_bus, devfn, PCI_BRIDGE_CONTROL, PCI_BRIDGE_CTL_SERR); @@ -1071,7 +1071,7 @@ error: * This function adds up the amount of resources needed behind the PPB bridge * and passes it to the configure_bridge function * Input: bridge function - * Ouput: amount of resources needed + * Output: amount of resources needed *****************************************************************************/ static struct res_needed *scan_behind_bridge (struct pci_func * func, u8 busno) { @@ -1204,9 +1204,9 @@ static struct res_needed *scan_behind_bridge (struct pci_func * func, u8 busno) return amount; } -/* The following 3 unconfigure_boot_ routines deal with the case when we had the card - * upon bootup in the system, since we don't allocate func to such case, we need to read - * the start addresses from pci config space and then find the corresponding entries in +/* The following 3 unconfigure_boot_ routines deal with the case when we had the card + * upon bootup in the system, since we don't allocate func to such case, we need to read + * the start addresses from pci config space and then find the corresponding entries in * our resource lists. The functions return either 0, -ENODEV, or -1 (general failure) * Change: we also call these functions even if we configured the card ourselves (i.e., not * the bootup case), since it should work same way @@ -1561,8 +1561,8 @@ static int unconfigure_boot_card (struct slot *slot_cur) * unconfiguring the device * TO DO: will probably need to add some code in case there was some resource, * to remove it... this is from when we have errors in the configure_card... - * !!!!!!!!!!!!!!!!!!!!!!!!!FOR BUSES!!!!!!!!!!!! - * Returns: 0, -1, -ENODEV + * !!!!!!!!!!!!!!!!!!!!!!!!!FOR BUSES!!!!!!!!!!!! + * Returns: 0, -1, -ENODEV */ int ibmphp_unconfigure_card (struct slot **slot_cur, int the_end) { @@ -1634,7 +1634,7 @@ int ibmphp_unconfigure_card (struct slot **slot_cur, int the_end) * Input: bus and the amount of resources needed (we know we can assign those, * since they've been checked already * Output: bus added to the correct spot - * 0, -1, error + * 0, -1, error */ static int add_new_bus (struct bus_node *bus, struct resource_node *io, struct resource_node *mem, struct resource_node *pfmem, u8 parent_busno) { @@ -1650,7 +1650,7 @@ static int add_new_bus (struct bus_node *bus, struct resource_node *io, struct r err ("strange, cannot find bus which is supposed to be at the system... something is terribly wrong...\n"); return -ENODEV; } - + list_add (&bus->bus_list, &cur_bus->bus_list); } if (io) { @@ -1679,7 +1679,7 @@ static int add_new_bus (struct bus_node *bus, struct resource_node *io, struct r } if (pfmem) { pfmem_range = kzalloc(sizeof(*pfmem_range), GFP_KERNEL); - if (!pfmem_range) { + if (!pfmem_range) { err ("out of system memory\n"); return -ENOMEM; } @@ -1726,4 +1726,3 @@ static u8 find_sec_number (u8 primary_busno, u8 slotno) return busno; return 0xff; } - diff --git a/drivers/pci/hotplug/ibmphp_res.c b/drivers/pci/hotplug/ibmphp_res.c index e2dc289..a265acb 100644 --- a/drivers/pci/hotplug/ibmphp_res.c +++ b/drivers/pci/hotplug/ibmphp_res.c @@ -72,7 +72,7 @@ static struct bus_node * __init alloc_error_bus (struct ebda_pci_rsrc * curr, u8 static struct resource_node * __init alloc_resources (struct ebda_pci_rsrc * curr) { struct resource_node *rs; - + if (!curr) { err ("NULL passed to allocate\n"); return NULL; @@ -128,7 +128,7 @@ static int __init alloc_bus_range (struct bus_node **new_bus, struct range_node } newrange->start = curr->start_addr; newrange->end = curr->end_addr; - + if (first_bus || (!num_ranges)) newrange->rangeno = 1; else { @@ -162,7 +162,7 @@ static int __init alloc_bus_range (struct bus_node **new_bus, struct range_node newbus->rangePFMem = newrange; if (first_bus) newbus->noPFMemRanges = 1; - else { + else { debug ("1st PFMemory Primary on Bus %x [%x - %x]\n", newbus->busno, newrange->start, newrange->end); ++newbus->noPFMemRanges; fix_resources (newbus); @@ -190,7 +190,7 @@ static int __init alloc_bus_range (struct bus_node **new_bus, struct range_node * This is the Resource Management initialization function. It will go through * the Resource list taken from EBDA and fill in this module's data structures * - * THIS IS NOT TAKING INTO CONSIDERATION IO RESTRICTIONS OF PRIMARY BUSES, + * THIS IS NOT TAKING INTO CONSIDERATION IO RESTRICTIONS OF PRIMARY BUSES, * SINCE WE'RE GOING TO ASSUME FOR NOW WE DON'T HAVE THOSE ON OUR BUSES FOR NOW * * Input: ptr to the head of the resource list from EBDA @@ -382,7 +382,7 @@ int __init ibmphp_rsrc_init (void) * pci devices' resources for the appropriate resource * * Input: type of the resource, range to add, current bus - * Output: 0 or -1, bus and range ptrs + * Output: 0 or -1, bus and range ptrs ********************************************************************************/ static int add_bus_range (int type, struct range_node *range, struct bus_node *bus_cur) { @@ -466,7 +466,7 @@ static void update_resources (struct bus_node *bus_cur, int type, int rangeno) switch (type) { case MEM: - if (bus_cur->firstMem) + if (bus_cur->firstMem) res = bus_cur->firstMem; break; case PFMEM: @@ -583,7 +583,7 @@ static void fix_resources (struct bus_node *bus_cur) } /******************************************************************************* - * This routine adds a resource to the list of resources to the appropriate bus + * This routine adds a resource to the list of resources to the appropriate bus * based on their resource type and sorted by their starting addresses. It assigns * the ptrs to next and nextRange if needed. * @@ -605,11 +605,11 @@ int ibmphp_add_resource (struct resource_node *res) err ("NULL passed to add\n"); return -ENODEV; } - + bus_cur = find_bus_wprev (res->busno, NULL, 0); - + if (!bus_cur) { - /* didn't find a bus, smth's wrong!!! */ + /* didn't find a bus, something's wrong!!! */ debug ("no bus in the system, either pci_dev's wrong or allocation failed\n"); return -ENODEV; } @@ -648,7 +648,7 @@ int ibmphp_add_resource (struct resource_node *res) if (!range_cur) { switch (res->type) { case IO: - ++bus_cur->needIOUpdate; + ++bus_cur->needIOUpdate; break; case MEM: ++bus_cur->needMemUpdate; @@ -659,13 +659,13 @@ int ibmphp_add_resource (struct resource_node *res) } res->rangeno = -1; } - + debug ("The range is %d\n", res->rangeno); if (!res_start) { /* no first{IO,Mem,Pfmem} on the bus, 1st IO/Mem/Pfmem resource ever */ switch (res->type) { case IO: - bus_cur->firstIO = res; + bus_cur->firstIO = res; break; case MEM: bus_cur->firstMem = res; @@ -673,7 +673,7 @@ int ibmphp_add_resource (struct resource_node *res) case PFMEM: bus_cur->firstPFMem = res; break; - } + } res->next = NULL; res->nextRange = NULL; } else { @@ -770,7 +770,7 @@ int ibmphp_add_resource (struct resource_node *res) * This routine will remove the resource from the list of resources * * Input: io, mem, and/or pfmem resource to be deleted - * Ouput: modified resource list + * Output: modified resource list * 0 or error code ****************************************************************************/ int ibmphp_remove_resource (struct resource_node *res) @@ -825,7 +825,7 @@ int ibmphp_remove_resource (struct resource_node *res) if (!res_cur) { if (res->type == PFMEM) { - /* + /* * case where pfmem might be in the PFMemFromMem list * so will also need to remove the corresponding mem * entry @@ -961,12 +961,12 @@ static struct range_node * find_range (struct bus_node *bus_cur, struct resource } /***************************************************************************** - * This routine will check to make sure the io/mem/pfmem->len that the device asked for + * This routine will check to make sure the io/mem/pfmem->len that the device asked for * can fit w/i our list of available IO/MEM/PFMEM resources. If cannot, returns -EINVAL, * otherwise, returns 0 * * Input: resource - * Ouput: the correct start and end address are inputted into the resource node, + * Output: the correct start and end address are inputted into the resource node, * 0 or -EINVAL *****************************************************************************/ int ibmphp_check_resource (struct resource_node *res, u8 bridge) @@ -996,7 +996,7 @@ int ibmphp_check_resource (struct resource_node *res, u8 bridge) bus_cur = find_bus_wprev (res->busno, NULL, 0); if (!bus_cur) { - /* didn't find a bus, smth's wrong!!! */ + /* didn't find a bus, something's wrong!!! */ debug ("no bus in the system, either pci_dev's wrong or allocation failed\n"); return -EINVAL; } @@ -1066,7 +1066,7 @@ int ibmphp_check_resource (struct resource_node *res, u8 bridge) break; } } - + if (flag && len_cur == res->len) { debug ("but we are not here, right?\n"); res->start = start_cur; @@ -1118,10 +1118,10 @@ int ibmphp_check_resource (struct resource_node *res, u8 bridge) if (res_prev) { if (res_prev->rangeno != res_cur->rangeno) { /* 1st device on this range */ - if ((res_cur->start != range->start) && + if ((res_cur->start != range->start) && ((len_tmp = res_cur->start - 1 - range->start) >= res->len)) { if ((len_tmp < len_cur) || (len_cur == 0)) { - if ((range->start % tmp_divide) == 0) { + if ((range->start % tmp_divide) == 0) { /* just perfect, starting address is divisible by length */ flag = 1; len_cur = len_tmp; @@ -1344,7 +1344,7 @@ int ibmphp_check_resource (struct resource_node *res, u8 bridge) * This routine is called from remove_card if the card contained PPB. * It will remove all the resources on the bus as well as the bus itself * Input: Bus - * Ouput: 0, -ENODEV + * Output: 0, -ENODEV ********************************************************************************/ int ibmphp_remove_bus (struct bus_node *bus, u8 parent_busno) { @@ -1353,7 +1353,7 @@ int ibmphp_remove_bus (struct bus_node *bus, u8 parent_busno) struct bus_node *prev_bus; int rc; - prev_bus = find_bus_wprev (parent_busno, NULL, 0); + prev_bus = find_bus_wprev (parent_busno, NULL, 0); if (!prev_bus) { debug ("something terribly wrong. Cannot find parent bus to the one to remove\n"); @@ -1424,7 +1424,7 @@ int ibmphp_remove_bus (struct bus_node *bus, u8 parent_busno) } /****************************************************************************** - * This routine deletes the ranges from a given bus, and the entries from the + * This routine deletes the ranges from a given bus, and the entries from the * parent's bus in the resources * Input: current bus, previous bus * Output: 0, -EINVAL @@ -1453,7 +1453,7 @@ static int remove_ranges (struct bus_node *bus_cur, struct bus_node *bus_prev) if (bus_cur->noMemRanges) { range_cur = bus_cur->rangeMem; for (i = 0; i < bus_cur->noMemRanges; i++) { - if (ibmphp_find_resource (bus_prev, range_cur->start, &res, MEM) < 0) + if (ibmphp_find_resource (bus_prev, range_cur->start, &res, MEM) < 0) return -EINVAL; ibmphp_remove_resource (res); @@ -1467,7 +1467,7 @@ static int remove_ranges (struct bus_node *bus_cur, struct bus_node *bus_prev) if (bus_cur->noPFMemRanges) { range_cur = bus_cur->rangePFMem; for (i = 0; i < bus_cur->noPFMemRanges; i++) { - if (ibmphp_find_resource (bus_prev, range_cur->start, &res, PFMEM) < 0) + if (ibmphp_find_resource (bus_prev, range_cur->start, &res, PFMEM) < 0) return -EINVAL; ibmphp_remove_resource (res); @@ -1482,7 +1482,7 @@ static int remove_ranges (struct bus_node *bus_cur, struct bus_node *bus_prev) } /* - * find the resource node in the bus + * find the resource node in the bus * Input: Resource needed, start address of the resource, type of resource */ int ibmphp_find_resource (struct bus_node *bus, u32 start_address, struct resource_node **res, int flag) @@ -1512,7 +1512,7 @@ int ibmphp_find_resource (struct bus_node *bus, u32 start_address, struct resour err ("wrong type of flag\n"); return -EINVAL; } - + while (res_cur) { if (res_cur->start == start_address) { *res = res_cur; @@ -1718,7 +1718,7 @@ static int __init once_over (void) } /* end for pfmem */ } /* end if */ } /* end list_for_each bus */ - return 0; + return 0; } int ibmphp_add_pfmem_from_mem (struct resource_node *pfmem) @@ -1760,9 +1760,9 @@ static struct bus_node *find_bus_wprev (u8 bus_number, struct bus_node **prev, u list_for_each (tmp, &gbuses) { tmp_prev = tmp->prev; bus_cur = list_entry (tmp, struct bus_node, bus_list); - if (flag) + if (flag) *prev = list_entry (tmp_prev, struct bus_node, bus_list); - if (bus_cur->busno == bus_number) + if (bus_cur->busno == bus_number) return bus_cur; } @@ -1776,7 +1776,7 @@ void ibmphp_print_test (void) struct range_node *range; struct resource_node *res; struct list_head *tmp; - + debug_pci ("*****************START**********************\n"); if ((!list_empty(&gbuses)) && flags) { @@ -1906,7 +1906,7 @@ static int range_exists_already (struct range_node * range, struct bus_node * bu return 1; range_cur = range_cur->next; } - + return 0; } @@ -1920,7 +1920,7 @@ static int range_exists_already (struct range_node * range, struct bus_node * bu * Returns: none * Note: this function doesn't take into account IO restrictions etc, * so will only work for bridges with no video/ISA devices behind them It - * also will not work for onboard PPB's that can have more than 1 *bus + * also will not work for onboard PPBs that can have more than 1 *bus * behind them All these are TO DO. * Also need to add more error checkings... (from fnc returns etc) */ @@ -1963,7 +1963,7 @@ static int __init update_bridge_ranges (struct bus_node **bus) case PCI_HEADER_TYPE_BRIDGE: function = 0x8; case PCI_HEADER_TYPE_MULTIBRIDGE: - /* We assume here that only 1 bus behind the bridge + /* We assume here that only 1 bus behind the bridge TO DO: add functionality for several: temp = secondary; while (temp < subordinate) { @@ -1972,7 +1972,7 @@ static int __init update_bridge_ranges (struct bus_node **bus) } */ pci_bus_read_config_byte (ibmphp_pci_bus, devfn, PCI_SECONDARY_BUS, &sec_busno); - bus_sec = find_bus_wprev (sec_busno, NULL, 0); + bus_sec = find_bus_wprev (sec_busno, NULL, 0); /* this bus structure doesn't exist yet, PPB was configured during previous loading of ibmphp */ if (!bus_sec) { bus_sec = alloc_error_bus (NULL, sec_busno, 1); @@ -2028,7 +2028,7 @@ static int __init update_bridge_ranges (struct bus_node **bus) io->len = io->end - io->start + 1; ibmphp_add_resource (io); } - } + } pci_bus_read_config_word (ibmphp_pci_bus, devfn, PCI_MEMORY_BASE, &start_mem_address); pci_bus_read_config_word (ibmphp_pci_bus, devfn, PCI_MEMORY_LIMIT, &end_mem_address); diff --git a/drivers/pci/hotplug/pci_hotplug_core.c b/drivers/pci/hotplug/pci_hotplug_core.c index ec20f74..cfa92a9 100644 --- a/drivers/pci/hotplug/pci_hotplug_core.c +++ b/drivers/pci/hotplug/pci_hotplug_core.c @@ -131,7 +131,7 @@ static ssize_t power_write_file(struct pci_slot *pci_slot, const char *buf, } module_put(slot->ops->owner); -exit: +exit: if (retval) return retval; return count; @@ -177,7 +177,7 @@ static ssize_t attention_write_file(struct pci_slot *slot, const char *buf, retval = ops->set_attention_status(slot->hotplug, attention); module_put(ops->owner); -exit: +exit: if (retval) return retval; return count; @@ -247,7 +247,7 @@ static ssize_t test_write_file(struct pci_slot *pci_slot, const char *buf, retval = slot->ops->hardware_test(slot, test); module_put(slot->ops->owner); -exit: +exit: if (retval) return retval; return count; @@ -512,7 +512,7 @@ int pci_hp_deregister(struct hotplug_slot *hotplug) * @hotplug: pointer to the slot whose info has changed * @info: pointer to the info copy into the slot's info structure * - * @slot must have been registered with the pci + * @slot must have been registered with the pci * hotplug subsystem previously with a call to pci_hp_register(). * * Returns 0 if successful, anything else for an error. diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h index 541bbe6..21e865d 100644 --- a/drivers/pci/hotplug/pciehp.h +++ b/drivers/pci/hotplug/pciehp.h @@ -180,5 +180,5 @@ static inline int pciehp_acpi_slot_detection_check(struct pci_dev *dev) { return 0; } -#endif /* CONFIG_ACPI */ +#endif /* CONFIG_ACPI */ #endif /* _PCIEHP_H */ diff --git a/drivers/pci/hotplug/pciehp_acpi.c b/drivers/pci/hotplug/pciehp_acpi.c index cff7cad..eddddd4 100644 --- a/drivers/pci/hotplug/pciehp_acpi.c +++ b/drivers/pci/hotplug/pciehp_acpi.c @@ -78,7 +78,7 @@ static int __initdata dup_slot_id; static int __initdata acpi_slot_detected; static struct list_head __initdata dummy_slots = LIST_HEAD_INIT(dummy_slots); -/* Dummy driver for dumplicate name detection */ +/* Dummy driver for duplicate name detection */ static int __init dummy_probe(struct pcie_device *dev) { u32 slot_cap; diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c index f4a18f5..bbd48bb 100644 --- a/drivers/pci/hotplug/pciehp_core.c +++ b/drivers/pci/hotplug/pciehp_core.c @@ -351,8 +351,8 @@ static int __init pcied_init(void) pciehp_firmware_init(); retval = pcie_port_service_register(&hpdriver_portdrv); - dbg("pcie_port_service_register = %d\n", retval); - info(DRIVER_DESC " version: " DRIVER_VERSION "\n"); + dbg("pcie_port_service_register = %d\n", retval); + info(DRIVER_DESC " version: " DRIVER_VERSION "\n"); if (retval) dbg("Failure to register service\n"); diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index 51f56ef..3eea3fd 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -92,7 +92,7 @@ static void start_int_poll_timer(struct controller *ctrl, int sec) { /* Clamp to sane value */ if ((sec <= 0) || (sec > 60)) - sec = 2; + sec = 2; ctrl->poll_timer.function = &int_poll_timeout; ctrl->poll_timer.data = (unsigned long)ctrl; @@ -194,7 +194,7 @@ static int pcie_write_cmd(struct controller *ctrl, u16 cmd, u16 mask) ctrl_dbg(ctrl, "CMD_COMPLETED not clear after 1 sec\n"); } else if (!NO_CMD_CMPL(ctrl)) { /* - * This controller semms to notify of command completed + * This controller seems to notify of command completed * event even though it supports none of power * controller, attention led, power led and EMI. */ @@ -926,7 +926,7 @@ struct controller *pcie_init(struct pcie_device *dev) if (pciehp_writew(ctrl, PCI_EXP_SLTSTA, 0x1f)) goto abort_ctrl; - /* Disable sotfware notification */ + /* Disable software notification */ pcie_disable_notification(ctrl); ctrl_info(ctrl, "HPC vendor_id %x device_id %x ss_vid %x ss_did %x\n", diff --git a/drivers/pci/hotplug/pcihp_skeleton.c b/drivers/pci/hotplug/pcihp_skeleton.c index 1f00b93..ac69094 100644 --- a/drivers/pci/hotplug/pcihp_skeleton.c +++ b/drivers/pci/hotplug/pcihp_skeleton.c @@ -52,7 +52,7 @@ static LIST_HEAD(slot_list); do { \ if (debug) \ printk (KERN_DEBUG "%s: " format "\n", \ - MY_NAME , ## arg); \ + MY_NAME , ## arg); \ } while (0) #define err(format, arg...) printk(KERN_ERR "%s: " format "\n", MY_NAME , ## arg) #define info(format, arg...) printk(KERN_INFO "%s: " format "\n", MY_NAME , ## arg) @@ -287,7 +287,7 @@ static int __init init_slots(void) hotplug_slot->release = &release_slot; make_slot_name(slot); hotplug_slot->ops = &skel_hotplug_slot_ops; - + /* * Initialize the slot info structure with some known * good values. @@ -296,7 +296,7 @@ static int __init init_slots(void) get_attention_status(hotplug_slot, &info->attention_status); get_latch_status(hotplug_slot, &info->latch_status); get_adapter_status(hotplug_slot, &info->adapter_status); - + dbg("registering slot %d\n", i); retval = pci_hp_register(slot->hotplug_slot); if (retval) { @@ -336,7 +336,7 @@ static void __exit cleanup_slots(void) pci_hp_deregister(slot->hotplug_slot); } } - + static int __init pcihp_skel_init(void) { int retval; diff --git a/drivers/pci/hotplug/rpadlpar_core.c b/drivers/pci/hotplug/rpadlpar_core.c index bb7af78..e9c044d 100644 --- a/drivers/pci/hotplug/rpadlpar_core.c +++ b/drivers/pci/hotplug/rpadlpar_core.c @@ -217,7 +217,7 @@ static int dlpar_remove_phb(char *drc_name, struct device_node *dn) if (!pcibios_find_pci_bus(dn)) return -EINVAL; - /* If pci slot is hotplugable, use hotplug to remove it */ + /* If pci slot is hotpluggable, use hotplug to remove it */ slot = find_php_slot(dn); if (slot && rpaphp_deregister_slot(slot)) { printk(KERN_ERR "%s: unable to remove hotplug slot %s\n", diff --git a/drivers/pci/hotplug/rpaphp.h b/drivers/pci/hotplug/rpaphp.h index 3135856..b2593e8 100644 --- a/drivers/pci/hotplug/rpaphp.h +++ b/drivers/pci/hotplug/rpaphp.h @@ -49,9 +49,9 @@ extern bool rpaphp_debug; #define dbg(format, arg...) \ do { \ - if (rpaphp_debug) \ + if (rpaphp_debug) \ printk(KERN_DEBUG "%s: " format, \ - MY_NAME , ## arg); \ + MY_NAME , ## arg); \ } while (0) #define err(format, arg...) printk(KERN_ERR "%s: " format, MY_NAME , ## arg) #define info(format, arg...) printk(KERN_INFO "%s: " format, MY_NAME , ## arg) @@ -99,5 +99,5 @@ void dealloc_slot_struct(struct slot *slot); struct slot *alloc_slot_struct(struct device_node *dn, int drc_index, char *drc_name, int power_domain); int rpaphp_register_slot(struct slot *slot); int rpaphp_deregister_slot(struct slot *slot); - + #endif /* _PPC64PHP_H */ diff --git a/drivers/pci/hotplug/rpaphp_core.c b/drivers/pci/hotplug/rpaphp_core.c index 127d6e6..b7fc5c9 100644 --- a/drivers/pci/hotplug/rpaphp_core.c +++ b/drivers/pci/hotplug/rpaphp_core.c @@ -226,7 +226,7 @@ int rpaphp_get_drc_props(struct device_node *dn, int *drc_index, for (i = 0; i < indexes[0]; i++) { if ((unsigned int) indexes[i + 1] == *my_index) { if (drc_name) - *drc_name = name_tmp; + *drc_name = name_tmp; if (drc_type) *drc_type = type_tmp; if (drc_index) @@ -289,7 +289,7 @@ static int is_php_dn(struct device_node *dn, const int **indexes, * rpaphp_add_slot -- declare a hotplug slot to the hotplug subsystem. * @dn: device node of slot * - * This subroutine will register a hotplugable slot with the + * This subroutine will register a hotpluggable slot with the * PCI hotplug infrastructure. This routine is typically called * during boot time, if the hotplug slots are present at boot time, * or is called later, by the dlpar add code, if the slot is @@ -328,7 +328,7 @@ int rpaphp_add_slot(struct device_node *dn) return -ENOMEM; slot->type = simple_strtoul(type, NULL, 10); - + dbg("Found drc-index:0x%x drc-name:%s drc-type:%s\n", indexes[i + 1], name, type); @@ -356,7 +356,7 @@ static void __exit cleanup_slots(void) /* * Unregister all of our slots with the pci_hotplug subsystem, * and free up all memory that we had allocated. - * memory will be freed in release_slot callback. + * memory will be freed in release_slot callback. */ list_for_each_safe(tmp, n, &rpaphp_slot_head) { diff --git a/drivers/pci/hotplug/rpaphp_pci.c b/drivers/pci/hotplug/rpaphp_pci.c index 513e1e2..9243f3e7 100644 --- a/drivers/pci/hotplug/rpaphp_pci.c +++ b/drivers/pci/hotplug/rpaphp_pci.c @@ -44,7 +44,7 @@ int rpaphp_get_sensor_state(struct slot *slot, int *state) dbg("%s: slot must be power up to get sensor-state\n", __func__); - /* some slots have to be powered up + /* some slots have to be powered up * before get-sensor will succeed. */ rc = rtas_set_power_level(slot->power_domain, POWER_ON, @@ -133,4 +133,3 @@ int rpaphp_enable_slot(struct slot *slot) return 0; } - diff --git a/drivers/pci/hotplug/rpaphp_slot.c b/drivers/pci/hotplug/rpaphp_slot.c index b283bbe..a6082cc 100644 --- a/drivers/pci/hotplug/rpaphp_slot.c +++ b/drivers/pci/hotplug/rpaphp_slot.c @@ -1,5 +1,5 @@ /* - * RPA Virtual I/O device functions + * RPA Virtual I/O device functions * Copyright (C) 2004 Linda Xie <lxie@us.ibm.com> * * All rights reserved. @@ -51,27 +51,27 @@ struct slot *alloc_slot_struct(struct device_node *dn, int drc_index, char *drc_name, int power_domain) { struct slot *slot; - + slot = kzalloc(sizeof(struct slot), GFP_KERNEL); if (!slot) goto error_nomem; slot->hotplug_slot = kzalloc(sizeof(struct hotplug_slot), GFP_KERNEL); if (!slot->hotplug_slot) - goto error_slot; + goto error_slot; slot->hotplug_slot->info = kzalloc(sizeof(struct hotplug_slot_info), GFP_KERNEL); if (!slot->hotplug_slot->info) goto error_hpslot; slot->name = kstrdup(drc_name, GFP_KERNEL); if (!slot->name) - goto error_info; + goto error_info; slot->dn = dn; slot->index = drc_index; slot->power_domain = power_domain; slot->hotplug_slot->private = slot; slot->hotplug_slot->ops = &rpaphp_hotplug_slot_ops; slot->hotplug_slot->release = &rpaphp_release_slot; - + return (slot); error_info: @@ -91,7 +91,7 @@ static int is_registered(struct slot *slot) list_for_each_entry(tmp_slot, &rpaphp_slot_head, rpaphp_slot_list) { if (!strcmp(tmp_slot->name, slot->name)) return 1; - } + } return 0; } @@ -104,7 +104,7 @@ int rpaphp_deregister_slot(struct slot *slot) __func__, slot->name); list_del(&slot->rpaphp_slot_list); - + retval = pci_hp_deregister(php_slot); if (retval) err("Problem unregistering a slot %s\n", slot->name); @@ -120,7 +120,7 @@ int rpaphp_register_slot(struct slot *slot) int retval; int slotno; - dbg("%s registering slot:path[%s] index[%x], name[%s] pdomain[%x] type[%d]\n", + dbg("%s registering slot:path[%s] index[%x], name[%s] pdomain[%x] type[%d]\n", __func__, slot->dn->full_name, slot->index, slot->name, slot->power_domain, slot->type); @@ -128,7 +128,7 @@ int rpaphp_register_slot(struct slot *slot) if (is_registered(slot)) { err("rpaphp_register_slot: slot[%s] is already registered\n", slot->name); return -EAGAIN; - } + } if (slot->dn->child) slotno = PCI_SLOT(PCI_DN(slot->dn->child)->devfn); @@ -145,4 +145,3 @@ int rpaphp_register_slot(struct slot *slot) info("Slot [%s] registered\n", slot->name); return 0; } - diff --git a/drivers/pci/hotplug/shpchp.h b/drivers/pci/hotplug/shpchp.h index d876e4b..6152909 100644 --- a/drivers/pci/hotplug/shpchp.h +++ b/drivers/pci/hotplug/shpchp.h @@ -216,13 +216,13 @@ struct ctrl_reg { /* offsets to the controller registers based on the above structure layout */ enum ctrl_offsets { - BASE_OFFSET = offsetof(struct ctrl_reg, base_offset), - SLOT_AVAIL1 = offsetof(struct ctrl_reg, slot_avail1), + BASE_OFFSET = offsetof(struct ctrl_reg, base_offset), + SLOT_AVAIL1 = offsetof(struct ctrl_reg, slot_avail1), SLOT_AVAIL2 = offsetof(struct ctrl_reg, slot_avail2), - SLOT_CONFIG = offsetof(struct ctrl_reg, slot_config), + SLOT_CONFIG = offsetof(struct ctrl_reg, slot_config), SEC_BUS_CONFIG = offsetof(struct ctrl_reg, sec_bus_config), MSI_CTRL = offsetof(struct ctrl_reg, msi_ctrl), - PROG_INTERFACE = offsetof(struct ctrl_reg, prog_interface), + PROG_INTERFACE = offsetof(struct ctrl_reg, prog_interface), CMD = offsetof(struct ctrl_reg, cmd), CMD_STATUS = offsetof(struct ctrl_reg, cmd_status), INTR_LOC = offsetof(struct ctrl_reg, intr_loc), diff --git a/drivers/pci/hotplug/shpchp_core.c b/drivers/pci/hotplug/shpchp_core.c index d3f757d..faf13ab 100644 --- a/drivers/pci/hotplug/shpchp_core.c +++ b/drivers/pci/hotplug/shpchp_core.c @@ -143,11 +143,11 @@ static int init_slots(struct controller *ctrl) snprintf(name, SLOT_NAME_SIZE, "%d", slot->number); hotplug_slot->ops = &shpchp_hotplug_slot_ops; - ctrl_dbg(ctrl, "Registering domain:bus:dev=%04x:%02x:%02x " - "hp_slot=%x sun=%x slot_device_offset=%x\n", - pci_domain_nr(ctrl->pci_dev->subordinate), - slot->bus, slot->device, slot->hp_slot, slot->number, - ctrl->slot_device_offset); + ctrl_dbg(ctrl, "Registering domain:bus:dev=%04x:%02x:%02x " + "hp_slot=%x sun=%x slot_device_offset=%x\n", + pci_domain_nr(ctrl->pci_dev->subordinate), + slot->bus, slot->device, slot->hp_slot, slot->number, + ctrl->slot_device_offset); retval = pci_hp_register(slot->hotplug_slot, ctrl->pci_dev->subordinate, slot->device, name); if (retval) { diff --git a/drivers/pci/hotplug/shpchp_hpc.c b/drivers/pci/hotplug/shpchp_hpc.c index 75ba231..2d7f474 100644 --- a/drivers/pci/hotplug/shpchp_hpc.c +++ b/drivers/pci/hotplug/shpchp_hpc.c @@ -116,7 +116,7 @@ #define SLOT_REG_RSVDZ_MASK ((1 << 15) | (7 << 21)) /* - * SHPC Command Code definitnions + * SHPC Command Code definitions * * Slot Operation 00h - 3Fh * Set Bus Segment Speed/Mode A 40h - 47h diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c index 21a7182..1fe2d6f 100644 --- a/drivers/pci/iov.c +++ b/drivers/pci/iov.c @@ -610,7 +610,7 @@ resource_size_t pci_sriov_resource_alignment(struct pci_dev *dev, int resno) struct resource tmp; enum pci_bar_type type; int reg = pci_iov_resource_bar(dev, resno, &type); - + if (!reg) return 0; diff --git a/drivers/pci/irq.c b/drivers/pci/irq.c index b008cf8..6684f15 100644 --- a/drivers/pci/irq.c +++ b/drivers/pci/irq.c @@ -25,7 +25,7 @@ static void pci_note_irq_problem(struct pci_dev *pdev, const char *reason) /** * pci_lost_interrupt - reports a lost PCI interrupt * @pdev: device whose interrupt is lost - * + * * The primary function of this routine is to report a lost interrupt * in a standard way which users can recognise (instead of blaming the * driver). diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 5e63645..3fcd67a 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -784,7 +784,7 @@ error: * @nvec: how many MSIs have been requested ? * @type: are we checking for MSI or MSI-X ? * - * Look at global flags, the device itself, and its parent busses + * Look at global flags, the device itself, and its parent buses * to determine if MSI/-X are supported for the device. If MSI/-X is * supported return 0, else return an error code. **/ diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index f166126..577074e 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -141,7 +141,7 @@ phys_addr_t acpi_pci_root_get_mcfg_addr(acpi_handle handle) * if (_PRW at S-state x) * choose from highest power _SxD to lowest power _SxW * else // no _PRW at S-state x - * choose highest power _SxD or any lower power + * choose highest power _SxD or any lower power */ static pci_power_t acpi_pci_choose_state(struct pci_dev *pdev) diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 4548535..9042fdb 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -312,7 +312,7 @@ static int pci_call_probe(struct pci_driver *drv, struct pci_dev *dev, * __pci_device_probe - check if a driver wants to claim a specific PCI device * @drv: driver to call to check if it wants the PCI device * @pci_dev: PCI device being probed - * + * * returns 0 on success, else error. * side-effect: pci_dev->driver is set to drv when drv claims pci_dev. */ @@ -378,7 +378,7 @@ static int pci_device_remove(struct device * dev) * We would love to complain here if pci_dev->is_enabled is set, that * the driver should have called pci_disable_device(), but the * unfortunate fact is there are too many odd BIOS and bridge setups - * that don't like drivers doing that all of the time. + * that don't like drivers doing that all of the time. * Oh well, we can dream of sane hardware when we sleep, no matter how * horrible the crap we have to deal with is when we are awake... */ @@ -1156,10 +1156,10 @@ static const struct dev_pm_ops pci_dev_pm_ops = { * @drv: the driver structure to register * @owner: owner module of drv * @mod_name: module name string - * + * * Adds the driver structure to the list of registered drivers. - * Returns a negative value on error, otherwise 0. - * If no error occurred, the driver remains registered even if + * Returns a negative value on error, otherwise 0. + * If no error occurred, the driver remains registered even if * no device was claimed during registration. */ int __pci_register_driver(struct pci_driver *drv, struct module *owner, @@ -1181,7 +1181,7 @@ int __pci_register_driver(struct pci_driver *drv, struct module *owner, /** * pci_unregister_driver - unregister a pci driver * @drv: the driver structure to unregister - * + * * Deletes the driver structure from the list of registered PCI drivers, * gives it a chance to clean up by calling its remove() function for * each device it was responsible for, and marks those devices as @@ -1203,7 +1203,7 @@ static struct pci_driver pci_compat_driver = { * pci_dev_driver - get the pci_driver of a device * @dev: the device to query * - * Returns the appropriate pci_driver structure or %NULL if there is no + * Returns the appropriate pci_driver structure or %NULL if there is no * registered driver for the device. */ struct pci_driver * @@ -1224,7 +1224,7 @@ pci_dev_driver(const struct pci_dev *dev) * pci_bus_match - Tell if a PCI device structure has a matching PCI device id structure * @dev: the PCI device structure to match against * @drv: the device driver to search for matching PCI device id structures - * + * * Used by a driver to check whether a PCI device present in the * system is in its list of supported devices. Returns the matching * pci_device_id structure or %NULL if there is no match. diff --git a/drivers/pci/pci-stub.c b/drivers/pci/pci-stub.c index 6e47c51..2ff7750 100644 --- a/drivers/pci/pci-stub.c +++ b/drivers/pci/pci-stub.c @@ -2,13 +2,13 @@ * * Copyright (C) 2008 Red Hat, Inc. * Author: - * Chris Wright + * Chris Wright * * This work is licensed under the terms of the GNU GPL, version 2. * * Usage is simple, allocate a new id to the stub driver and bind the * device to it. For example: - * + * * # echo "8086 10f5" > /sys/bus/pci/drivers/pci-stub/new_id * # echo -n 0000:00:19.0 > /sys/bus/pci/drivers/e1000e/unbind * # echo -n 0000:00:19.0 > /sys/bus/pci/drivers/pci-stub/bind diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index 2aaa83c..c91e6c1 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -10,7 +10,7 @@ * * File attributes for PCI devices * - * Modeled after usb's driverfs.c + * Modeled after usb's driverfs.c * */ @@ -270,13 +270,17 @@ msi_bus_store(struct device *dev, struct device_attribute *attr, if (kstrtoul(buf, 0, &val) < 0) return -EINVAL; - /* bad things may happen if the no_msi flag is changed - * while some drivers are loaded */ + /* + * Bad things may happen if the no_msi flag is changed + * while drivers are loaded. + */ if (!capable(CAP_SYS_ADMIN)) return -EPERM; - /* Maybe pci devices without subordinate busses shouldn't even have this - * attribute in the first place? */ + /* + * Maybe devices without subordinate buses shouldn't have this + * attribute in the first place? + */ if (!pdev->subordinate) return count; @@ -670,7 +674,7 @@ pci_write_config(struct file* filp, struct kobject *kobj, size = dev->cfg_size - off; count = size; } - + pci_config_pm_runtime_get(dev); if ((off & 1) && size) { @@ -678,7 +682,7 @@ pci_write_config(struct file* filp, struct kobject *kobj, off++; size--; } - + if ((off & 3) && size > 2) { u16 val = data[off - init_off]; val |= (u16) data[off - init_off + 1] << 8; @@ -696,7 +700,7 @@ pci_write_config(struct file* filp, struct kobject *kobj, off += 4; size -= 4; } - + if (size >= 2) { u16 val = data[off - init_off]; val |= (u16) data[off - init_off + 1] << 8; @@ -1229,21 +1233,21 @@ pci_read_rom(struct file *filp, struct kobject *kobj, if (!pdev->rom_attr_enabled) return -EINVAL; - + rom = pci_map_rom(pdev, &size); /* size starts out as PCI window size */ if (!rom || !size) return -EIO; - + if (off >= size) count = 0; else { if (off + count > size) count = size - off; - + memcpy_fromio(buf, rom + off, count); } pci_unmap_rom(pdev, rom); - + return count; } diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index b127fbda..33120d1 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -198,7 +198,7 @@ static int __pci_bus_find_cap_start(struct pci_bus *bus, } /** - * pci_find_capability - query for devices' capabilities + * pci_find_capability - query for devices' capabilities * @dev: PCI device to query * @cap: capability code * @@ -207,12 +207,12 @@ static int __pci_bus_find_cap_start(struct pci_bus *bus, * device's PCI configuration space or 0 in case the device does not * support it. Possible values for @cap: * - * %PCI_CAP_ID_PM Power Management - * %PCI_CAP_ID_AGP Accelerated Graphics Port - * %PCI_CAP_ID_VPD Vital Product Data - * %PCI_CAP_ID_SLOTID Slot Identification + * %PCI_CAP_ID_PM Power Management + * %PCI_CAP_ID_AGP Accelerated Graphics Port + * %PCI_CAP_ID_VPD Vital Product Data + * %PCI_CAP_ID_SLOTID Slot Identification * %PCI_CAP_ID_MSI Message Signalled Interrupts - * %PCI_CAP_ID_CHSWP CompactPCI HotSwap + * %PCI_CAP_ID_CHSWP CompactPCI HotSwap * %PCI_CAP_ID_PCIX PCI-X * %PCI_CAP_ID_EXP PCI Express */ @@ -228,13 +228,13 @@ int pci_find_capability(struct pci_dev *dev, int cap) } /** - * pci_bus_find_capability - query for devices' capabilities + * pci_bus_find_capability - query for devices' capabilities * @bus: the PCI bus to query * @devfn: PCI device to query * @cap: capability code * * Like pci_find_capability() but works for pci devices that do not have a - * pci_dev structure set up yet. + * pci_dev structure set up yet. * * Returns the address of the requested capability structure within the * device's PCI configuration space or 0 in case the device does not @@ -515,7 +515,7 @@ static int pci_raw_set_power_state(struct pci_dev *dev, pci_power_t state) return -EINVAL; /* Validate current state: - * Can enter D0 from any state, but if we can only go deeper + * Can enter D0 from any state, but if we can only go deeper * to sleep if we're already in a low power state */ if (state != PCI_D0 && dev->current_state <= PCI_D3cold @@ -998,7 +998,7 @@ static void pci_restore_config_space(struct pci_dev *pdev) } } -/** +/** * pci_restore_state - Restore the saved state of a PCI device * @dev: - PCI device that we're dealing with */ @@ -1030,7 +1030,7 @@ struct pci_saved_state { * the device saved state. * @dev: PCI device that we're dealing with * - * Rerturn NULL if no state or error. + * Return NULL if no state or error. */ struct pci_saved_state *pci_store_saved_state(struct pci_dev *dev) { @@ -1880,7 +1880,7 @@ int pci_finish_runtime_suspend(struct pci_dev *dev) * pci_dev_run_wake - Check if device can generate run-time wake-up events. * @dev: Device to check. * - * Return true if the device itself is cabable of generating wake-up events + * Return true if the device itself is capable of generating wake-up events * (through the platform or using the native PCIe PME) or if the device supports * PME and one of its upstream bridges can generate wake-up events. */ @@ -2447,7 +2447,7 @@ bool pci_acs_enabled(struct pci_dev *pdev, u16 acs_flags) switch (pci_pcie_type(pdev)) { /* * PCI/X-to-PCIe bridges are not specifically mentioned by the spec, - * but since their primary inteface is PCI/X, we conservatively + * but since their primary interface is PCI/X, we conservatively * handle them as we would a non-PCIe device. */ case PCI_EXP_TYPE_PCIE_BRIDGE: @@ -2471,7 +2471,7 @@ bool pci_acs_enabled(struct pci_dev *pdev, u16 acs_flags) /* * PCIe 3.0, 6.12.1.2 specifies ACS capabilities that should be * implemented by the remaining PCIe types to indicate peer-to-peer - * capabilities, but only when they are part of a multifunciton + * capabilities, but only when they are part of a multifunction * device. The footnote for section 6.12 indicates the specific * PCIe types included here. */ @@ -2486,7 +2486,7 @@ bool pci_acs_enabled(struct pci_dev *pdev, u16 acs_flags) } /* - * PCIe 3.0, 6.12.1.3 specifies no ACS capabilties are applicable + * PCIe 3.0, 6.12.1.3 specifies no ACS capabilities are applicable * to single function devices with the exception of downstream ports. */ return true; @@ -2622,7 +2622,7 @@ void pci_release_region(struct pci_dev *pdev, int bar) * * If @exclusive is set, then the region is marked so that userspace * is explicitly not allowed to map the resource via /dev/mem or - * sysfs MMIO access. + * sysfs MMIO access. * * Returns 0 on success, or %EBUSY on error. A warning * message is also printed on failure. @@ -2634,7 +2634,7 @@ static int __pci_request_region(struct pci_dev *pdev, int bar, const char *res_n if (pci_resource_len(pdev, bar) == 0) return 0; - + if (pci_resource_flags(pdev, bar) & IORESOURCE_IO) { if (!request_region(pci_resource_start(pdev, bar), pci_resource_len(pdev, bar), res_name)) @@ -2694,7 +2694,7 @@ int pci_request_region(struct pci_dev *pdev, int bar, const char *res_name) * * The key difference that _exclusive makes it that userspace is * explicitly not allowed to map the resource via /dev/mem or - * sysfs. + * sysfs. */ int pci_request_region_exclusive(struct pci_dev *pdev, int bar, const char *res_name) { @@ -2799,7 +2799,7 @@ int pci_request_regions(struct pci_dev *pdev, const char *res_name) * successfully. * * pci_request_regions_exclusive() will mark the region so that - * /dev/mem and the sysfs MMIO access will not be allowed. + * /dev/mem and the sysfs MMIO access will not be allowed. * * Returns 0 on success, or %EBUSY on error. A warning * message is also printed on failure. @@ -2967,7 +2967,7 @@ pci_set_mwi(struct pci_dev *dev) cmd |= PCI_COMMAND_INVALIDATE; pci_write_config_word(dev, PCI_COMMAND, cmd); } - + return 0; } @@ -3292,7 +3292,7 @@ clear: * * NOTE: This causes the caller to sleep for twice the device power transition * cooldown period, which for the D0->D3hot and D3hot->D0 transitions is 10 ms - * by devault (i.e. unless the @dev's d3_delay field has a different value). + * by default (i.e. unless the @dev's d3_delay field has a different value). * Moreover, only devices in D0 can be reset by this function. */ static int pci_pm_reset(struct pci_dev *dev, int probe) @@ -3341,7 +3341,7 @@ void pci_reset_bridge_secondary_bus(struct pci_dev *dev) pci_write_config_word(dev, PCI_BRIDGE_CONTROL, ctrl); /* * PCI spec v3.0 7.6.4.2 requires minimum Trst of 1ms. Double - * this to 2ms to ensure that we meet the minium requirement. + * this to 2ms to ensure that we meet the minimum requirement. */ msleep(2); @@ -3998,7 +3998,7 @@ int pcie_set_mps(struct pci_dev *dev, int mps) return -EINVAL; v = ffs(mps) - 8; - if (v > dev->pcie_mpss) + if (v > dev->pcie_mpss) return -EINVAL; v <<= 5; diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c index 6b3a958..b2c8881 100644 --- a/drivers/pci/pcie/aer/aerdrv_core.c +++ b/drivers/pci/pcie/aer/aerdrv_core.c @@ -525,7 +525,7 @@ static void handle_error_source(struct pcie_device *aerdev, if (info->severity == AER_CORRECTABLE) { /* - * Correctable error does not need software intevention. + * Correctable error does not need software intervention. * No need to go through error recovery process. */ pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR); diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index 403a443..f1272dc 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -548,7 +548,7 @@ static struct pcie_link_state *alloc_pcie_link_state(struct pci_dev *pdev) /* * pcie_aspm_init_link_state: Initiate PCI express link state. - * It is called after the pcie and its children devices are scaned. + * It is called after the pcie and its children devices are scanned. * @pdev: the root port or switch downstream port */ void pcie_aspm_init_link_state(struct pci_dev *pdev) diff --git a/drivers/pci/pcie/pme.c b/drivers/pci/pcie/pme.c index e56e594..bbc3bdd 100644 --- a/drivers/pci/pcie/pme.c +++ b/drivers/pci/pcie/pme.c @@ -419,8 +419,8 @@ static void pcie_pme_remove(struct pcie_device *srv) static struct pcie_port_service_driver pcie_pme_driver = { .name = "pcie_pme", - .port_type = PCI_EXP_TYPE_ROOT_PORT, - .service = PCIE_PORT_SERVICE_PME, + .port_type = PCI_EXP_TYPE_ROOT_PORT, + .service = PCIE_PORT_SERVICE_PME, .probe = pcie_pme_probe, .suspend = pcie_pme_suspend, diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h index d2eb80a..d525548 100644 --- a/drivers/pci/pcie/portdrv.h +++ b/drivers/pci/pcie/portdrv.h @@ -14,7 +14,7 @@ #define PCIE_PORT_DEVICE_MAXSERVICES 4 /* * According to the PCI Express Base Specification 2.0, the indices of - * the MSI-X table entires used by port services must not exceed 31 + * the MSI-X table entries used by port services must not exceed 31 */ #define PCIE_PORT_MAX_MSIX_ENTRIES 32 diff --git a/drivers/pci/pcie/portdrv_bus.c b/drivers/pci/pcie/portdrv_bus.c index 67be55a..87e79a6 100644 --- a/drivers/pci/pcie/portdrv_bus.c +++ b/drivers/pci/pcie/portdrv_bus.c @@ -18,8 +18,8 @@ static int pcie_port_bus_match(struct device *dev, struct device_driver *drv); struct bus_type pcie_port_bus_type = { - .name = "pci_express", - .match = pcie_port_bus_match, + .name = "pci_express", + .match = pcie_port_bus_match, }; EXPORT_SYMBOL_GPL(pcie_port_bus_type); diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c index 08d131f..0b6e766 100644 --- a/drivers/pci/pcie/portdrv_core.c +++ b/drivers/pci/pcie/portdrv_core.c @@ -46,7 +46,7 @@ static void release_pcie_device(struct device *dev) * pcie_port_msix_add_entry - add entry to given array of MSI-X entries * @entries: Array of MSI-X entries * @new_entry: Index of the entry to add to the array - * @nr_entries: Number of entries aleady in the array + * @nr_entries: Number of entries already in the array * * Return value: Position of the added entry in the array */ diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c index 696caed..0d8fdc4 100644 --- a/drivers/pci/pcie/portdrv_pci.c +++ b/drivers/pci/pcie/portdrv_pci.c @@ -223,7 +223,6 @@ static int pcie_portdrv_probe(struct pci_dev *dev, static void pcie_portdrv_remove(struct pci_dev *dev) { pcie_port_device_remove(dev); - pci_disable_device(dev); } static int error_detected_iter(struct device *device, void *data) @@ -390,9 +389,9 @@ static struct pci_driver pcie_portdriver = { .probe = pcie_portdrv_probe, .remove = pcie_portdrv_remove, - .err_handler = &pcie_portdrv_err_handler, + .err_handler = &pcie_portdrv_err_handler, - .driver.pm = PCIE_PORTDRV_PM_OPS, + .driver.pm = PCIE_PORTDRV_PM_OPS, }; static int __init dmi_pcie_pme_disable_msi(const struct dmi_system_id *d) @@ -412,7 +411,7 @@ static struct dmi_system_id __initdata pcie_portdrv_dmi_table[] = { .ident = "MSI Wind U-100", .matches = { DMI_MATCH(DMI_SYS_VENDOR, - "MICRO-STAR INTERNATIONAL CO., LTD"), + "MICRO-STAR INTERNATIONAL CO., LTD"), DMI_MATCH(DMI_PRODUCT_NAME, "U-100"), }, }, diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 5e14f5a..38e403d 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -582,7 +582,7 @@ static enum pci_bus_speed agp_speed(int agp3, int agpstat) index = 1; else goto out; - + if (agp3) { index += 2; if (index == 5) @@ -789,7 +789,7 @@ int pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max, int pass) } /* Disable MasterAbortMode during probing to avoid reporting - of bus errors (in some architectures) */ + of bus errors (in some architectures) */ pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &bctl); pci_write_config_word(dev, PCI_BRIDGE_CONTROL, bctl & ~PCI_BRIDGE_CTL_MASTER_ABORT); @@ -1005,7 +1005,7 @@ void set_pcie_hotplug_bridge(struct pci_dev *pdev) * pci_setup_device - fill in class and map information of a device * @dev: the device structure to fill * - * Initialize the device structure with information about the device's + * Initialize the device structure with information about the device's * vendor,class,memory and IO-space addresses,IRQ lines etc. * Called at initialisation of the PCI subsystem and by CardBus services. * Returns 0 on success and negative if unknown type of device (not normal, @@ -1111,7 +1111,7 @@ int pci_setup_device(struct pci_dev *dev) goto bad; /* The PCI-to-PCI bridge spec requires that subtractive decoding (i.e. transparent) bridge must have programming - interface code of 0x01. */ + interface code of 0x01. */ pci_read_irq(dev); dev->transparent = ((dev->class & 0xff) == 1); pci_read_bases(dev, 2, PCI_ROM_ADDRESS1); @@ -1570,7 +1570,7 @@ static void pcie_write_mrrs(struct pci_dev *dev) * subsequent read will verify if the value is acceptable or not. * If the MRRS value provided is not acceptable (e.g., too large), * shrink the value until it is acceptable to the HW. - */ + */ while (mrrs != pcie_get_readrq(dev) && mrrs >= 128) { rc = pcie_set_readrq(dev, mrrs); if (!rc) diff --git a/drivers/pci/proc.c b/drivers/pci/proc.c index cdc7836..46d1378 100644 --- a/drivers/pci/proc.c +++ b/drivers/pci/proc.c @@ -222,7 +222,7 @@ static long proc_bus_pci_ioctl(struct file *file, unsigned int cmd, default: ret = -EINVAL; break; - }; + } return ret; } diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 9149045..b3b1b9a 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -55,7 +55,7 @@ static void quirk_mellanox_tavor(struct pci_dev *dev) DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_MELLANOX,PCI_DEVICE_ID_MELLANOX_TAVOR,quirk_mellanox_tavor); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_MELLANOX,PCI_DEVICE_ID_MELLANOX_TAVOR_BRIDGE,quirk_mellanox_tavor); -/* Deal with broken BIOS'es that neglect to enable passive release, +/* Deal with broken BIOSes that neglect to enable passive release, which can cause problems in combination with the 82441FX/PPro MTRRs */ static void quirk_passive_release(struct pci_dev *dev) { @@ -78,11 +78,11 @@ DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82441, quirk_p /* The VIA VP2/VP3/MVP3 seem to have some 'features'. There may be a workaround but VIA don't answer queries. If you happen to have good contacts at VIA - ask them for me please -- Alan - - This appears to be BIOS not version dependent. So presumably there is a + ask them for me please -- Alan + + This appears to be BIOS not version dependent. So presumably there is a chipset level fix */ - + static void quirk_isa_dma_hangs(struct pci_dev *dev) { if (!isa_dma_bridge_buggy) { @@ -97,7 +97,7 @@ static void quirk_isa_dma_hangs(struct pci_dev *dev) DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C586_0, quirk_isa_dma_hangs); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C596, quirk_isa_dma_hangs); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371SB_0, quirk_isa_dma_hangs); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M1533, quirk_isa_dma_hangs); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M1533, quirk_isa_dma_hangs); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NEC, PCI_DEVICE_ID_NEC_CBUS_1, quirk_isa_dma_hangs); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NEC, PCI_DEVICE_ID_NEC_CBUS_2, quirk_isa_dma_hangs); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NEC, PCI_DEVICE_ID_NEC_CBUS_3, quirk_isa_dma_hangs); @@ -157,10 +157,10 @@ static void quirk_triton(struct pci_dev *dev) pci_pci_problems |= PCIPCI_TRITON; } } -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82437, quirk_triton); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82437VX, quirk_triton); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82439, quirk_triton); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82439TX, quirk_triton); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82437, quirk_triton); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82437VX, quirk_triton); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82439, quirk_triton); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82439TX, quirk_triton); /* * VIA Apollo KT133 needs PCI latency patch @@ -171,7 +171,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82439TX, quir * the info on which Mr Breese based his work. * * Updated based on further information from the site and also on - * information provided by VIA + * information provided by VIA */ static void quirk_vialatency(struct pci_dev *dev) { @@ -179,7 +179,7 @@ static void quirk_vialatency(struct pci_dev *dev) u8 busarb; /* Ok we have a potential problem chipset here. Now see if we have a buggy southbridge */ - + p = pci_get_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C686, NULL); if (p!=NULL) { /* 0x40 - 0x4f == 686B, 0x10 - 0x2f == 686A; thanks Dan Hollis */ @@ -194,9 +194,9 @@ static void quirk_vialatency(struct pci_dev *dev) if (p->revision < 0x10 || p->revision > 0x12) goto exit; } - + /* - * Ok we have the problem. Now set the PCI master grant to + * Ok we have the problem. Now set the PCI master grant to * occur every master grant. The apparent bug is that under high * PCI load (quite common in Linux of course) you can get data * loss when the CPU is held off the bus for 3 bus master requests @@ -209,7 +209,7 @@ static void quirk_vialatency(struct pci_dev *dev) */ pci_read_config_byte(dev, 0x76, &busarb); - /* Set bit 4 and bi 5 of byte 76 to 0x01 + /* Set bit 4 and bi 5 of byte 76 to 0x01 "Master priority rotation on every PCI master grant */ busarb &= ~(1<<5); busarb |= (1<<4); @@ -252,7 +252,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C576, quirk_vsfx) * that DMA to AGP space. Latency must be set to 0xA and triton * workaround applied too * [Info kindly provided by ALi] - */ + */ static void quirk_alimagik(struct pci_dev *dev) { if ((pci_pci_problems&PCIPCI_ALIMAGIK)==0) { @@ -260,8 +260,8 @@ static void quirk_alimagik(struct pci_dev *dev) pci_pci_problems |= PCIPCI_ALIMAGIK|PCIPCI_TRITON; } } -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M1647, quirk_alimagik); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M1651, quirk_alimagik); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M1647, quirk_alimagik); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M1651, quirk_alimagik); /* * Natoma has some interesting boundary conditions with Zoran stuff @@ -274,12 +274,12 @@ static void quirk_natoma(struct pci_dev *dev) pci_pci_problems |= PCIPCI_NATOMA; } } -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82441, quirk_natoma); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443LX_0, quirk_natoma); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443LX_1, quirk_natoma); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443BX_0, quirk_natoma); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443BX_1, quirk_natoma); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443BX_2, quirk_natoma); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82441, quirk_natoma); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443LX_0, quirk_natoma); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443LX_1, quirk_natoma); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443BX_0, quirk_natoma); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443BX_1, quirk_natoma); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443BX_2, quirk_natoma); /* * This chip can cause PCI parity errors if config register 0xA0 is read @@ -400,7 +400,7 @@ static void piix4_io_quirk(struct pci_dev *dev, const char *name, unsigned int p /* * For now we only print it out. Eventually we'll want to * reserve it (at least if it's in the 0x1000+ range), but - * let's get enough confirmation reports first. + * let's get enough confirmation reports first. */ base &= -size; dev_info(&dev->dev, "%s PIO at %04x-%04x\n", name, base, base + size - 1); @@ -425,7 +425,7 @@ static void piix4_mem_quirk(struct pci_dev *dev, const char *name, unsigned int } /* * For now we only print it out. Eventually we'll want to - * reserve it, but let's get enough confirmation reports first. + * reserve it, but let's get enough confirmation reports first. */ base &= -size; dev_info(&dev->dev, "%s MMIO at %04x-%04x\n", name, base, base + size - 1); @@ -682,7 +682,7 @@ static void quirk_xio2000a(struct pci_dev *dev) DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_XIO2000A, quirk_xio2000a); -#ifdef CONFIG_X86_IO_APIC +#ifdef CONFIG_X86_IO_APIC #include <asm/io_apic.h> @@ -696,12 +696,12 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_XIO2000A, static void quirk_via_ioapic(struct pci_dev *dev) { u8 tmp; - + if (nr_ioapics < 1) tmp = 0; /* nothing routed to external APIC */ else tmp = 0x1f; /* all known bits (4-0) routed to external APIC */ - + dev_info(&dev->dev, "%sbling VIA external APIC routing\n", tmp == 0 ? "Disa" : "Ena"); @@ -712,7 +712,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C686, quirk_via_i DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C686, quirk_via_ioapic); /* - * VIA 8237: Some BIOSs don't set the 'Bypass APIC De-Assert Message' Bit. + * VIA 8237: Some BIOSes don't set the 'Bypass APIC De-Assert Message' Bit. * This leads to doubled level interrupt rates. * Set this bit to get rid of cycle wastage. * Otherwise uncritical. @@ -986,7 +986,7 @@ DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_PCI_MASTER, qu static void quirk_disable_pxb(struct pci_dev *pdev) { u16 config; - + if (pdev->revision != 0x04) /* Only C0 requires this */ return; pci_read_config_word(pdev, 0x40, &config); @@ -1094,11 +1094,11 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82375, quirk_e * On ASUS P4B boards, the SMBus PCI Device within the ICH2/4 southbridge * is not activated. The myth is that Asus said that they do not want the * users to be irritated by just another PCI Device in the Win98 device - * manager. (see the file prog/hotplug/README.p4b in the lm_sensors + * manager. (see the file prog/hotplug/README.p4b in the lm_sensors * package 2.7.0 for details) * - * The SMBus PCI Device can be activated by setting a bit in the ICH LPC - * bridge. Unfortunately, this device has no subvendor/subdevice ID. So it + * The SMBus PCI Device can be activated by setting a bit in the ICH LPC + * bridge. Unfortunately, this device has no subvendor/subdevice ID. So it * becomes necessary to do this tweak in two steps -- the chosen trigger * is either the Host bridge (preferred) or on-board VGA controller. * @@ -1253,7 +1253,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82815_CGC, asu static void asus_hides_smbus_lpc(struct pci_dev *dev) { u16 val; - + if (likely(!asus_hides_smbus)) return; @@ -1640,8 +1640,8 @@ static void quirk_disable_intel_boot_interrupt(struct pci_dev *dev) dev_info(&dev->dev, "disabled boot interrupts on device [%04x:%04x]\n", dev->vendor, dev->device); } -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB_10, quirk_disable_intel_boot_interrupt); -DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB_10, quirk_disable_intel_boot_interrupt); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB_10, quirk_disable_intel_boot_interrupt); +DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB_10, quirk_disable_intel_boot_interrupt); /* * disable boot interrupts on HT-1000 @@ -1673,8 +1673,8 @@ static void quirk_disable_broadcom_boot_interrupt(struct pci_dev *dev) dev_info(&dev->dev, "disabled boot interrupts on device [%04x:%04x]\n", dev->vendor, dev->device); } -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_HT1000SB, quirk_disable_broadcom_boot_interrupt); -DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_HT1000SB, quirk_disable_broadcom_boot_interrupt); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_HT1000SB, quirk_disable_broadcom_boot_interrupt); +DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_HT1000SB, quirk_disable_broadcom_boot_interrupt); /* * disable boot interrupts on AMD and ATI chipsets @@ -1730,8 +1730,8 @@ static void quirk_disable_amd_8111_boot_interrupt(struct pci_dev *dev) dev_info(&dev->dev, "disabled boot interrupts on device [%04x:%04x]\n", dev->vendor, dev->device); } -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8111_SMBUS, quirk_disable_amd_8111_boot_interrupt); -DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8111_SMBUS, quirk_disable_amd_8111_boot_interrupt); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8111_SMBUS, quirk_disable_amd_8111_boot_interrupt); +DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8111_SMBUS, quirk_disable_amd_8111_boot_interrupt); #endif /* CONFIG_X86_IO_APIC */ /* @@ -2127,8 +2127,8 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_PLX, 0x8624, quirk_tile_plx_gen1); #ifdef CONFIG_PCI_MSI /* Some chipsets do not support MSI. We cannot easily rely on setting * PCI_BUS_FLAGS_NO_MSI in its bus flags because there are actually - * some other busses controlled by the chipset even if Linux is not - * aware of it. Instead of setting the flag on all busses in the + * some other buses controlled by the chipset even if Linux is not + * aware of it. Instead of setting the flag on all buses in the * machine, simply disable MSI globally. */ static void quirk_disable_all_msi(struct pci_dev *dev) @@ -2288,14 +2288,14 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_NVIDIA, nvenet_msi_disable); /* - * Some versions of the MCP55 bridge from nvidia have a legacy irq routing - * config register. This register controls the routing of legacy interrupts - * from devices that route through the MCP55. If this register is misprogramed - * interrupts are only sent to the bsp, unlike conventional systems where the - * irq is broadxast to all online cpus. Not having this register set - * properly prevents kdump from booting up properly, so lets make sure that - * we have it set correctly. - * Note this is an undocumented register. + * Some versions of the MCP55 bridge from Nvidia have a legacy IRQ routing + * config register. This register controls the routing of legacy + * interrupts from devices that route through the MCP55. If this register + * is misprogrammed, interrupts are only sent to the BSP, unlike + * conventional systems where the IRQ is broadcast to all online CPUs. Not + * having this register set properly prevents kdump from booting up + * properly, so let's make sure that we have it set correctly. + * Note that this is an undocumented register. */ static void nvbridge_check_legacy_irq_routing(struct pci_dev *dev) { @@ -2626,7 +2626,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATTANSIC, 0xe091, /* Allow manual resource allocation for PCI hotplug bridges * via pci=hpmemsize=nnM and pci=hpiosize=nnM parameters. For * some PCI-PCI hotplug bridges, like PLX 6254 (former HINT HB6), - * kernel fails to allocate resources when hotplug device is + * kernel fails to allocate resources when hotplug device is * inserted and PCI bus is rescanned. */ static void quirk_hotplug_bridge(struct pci_dev *dev) diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c index 8fc54b7..1576851 100644 --- a/drivers/pci/remove.c +++ b/drivers/pci/remove.c @@ -7,7 +7,7 @@ static void pci_free_resources(struct pci_dev *dev) { int i; - msi_remove_pci_irq_vectors(dev); + msi_remove_pci_irq_vectors(dev); pci_cleanup_rom(dev); for (i = 0; i < PCI_NUM_RESOURCES; i++) { diff --git a/drivers/pci/search.c b/drivers/pci/search.c index d0627fa..3ff2ac7 100644 --- a/drivers/pci/search.c +++ b/drivers/pci/search.c @@ -1,5 +1,5 @@ /* - * PCI searching functions. + * PCI searching functions. * * Copyright (C) 1993 -- 1997 Drew Eckhardt, Frederic Potter, * David Mosberger-Tang @@ -96,12 +96,12 @@ struct pci_bus * pci_find_bus(int domain, int busnr) * pci_find_next_bus - begin or continue searching for a PCI bus * @from: Previous PCI bus found, or %NULL for new search. * - * Iterates through the list of known PCI busses. A new search is + * Iterates through the list of known PCI buses. A new search is * initiated by passing %NULL as the @from argument. Otherwise if * @from is not %NULL, searches continue from next device on the * global list. */ -struct pci_bus * +struct pci_bus * pci_find_next_bus(const struct pci_bus *from) { struct list_head *n; @@ -119,11 +119,11 @@ pci_find_next_bus(const struct pci_bus *from) /** * pci_get_slot - locate PCI device for a given PCI slot * @bus: PCI bus on which desired PCI device resides - * @devfn: encodes number of PCI slot in which the desired PCI - * device resides and the logical device number within that slot + * @devfn: encodes number of PCI slot in which the desired PCI + * device resides and the logical device number within that slot * in case of multi-function devices. * - * Given a PCI bus and slot/function number, the desired PCI device + * Given a PCI bus and slot/function number, the desired PCI device * is located in the list of PCI devices. * If the device is found, its reference count is increased and this * function returns a pointer to its data structure. The caller must diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index 4ce83b2..219a410 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -292,8 +292,8 @@ static void assign_requested_resources_sorted(struct list_head *head, (!(res->flags & IORESOURCE_ROM_ENABLE)))) add_to_list(fail_head, dev_res->dev, res, - 0 /* dont care */, - 0 /* dont care */); + 0 /* don't care */, + 0 /* don't care */); } reset_resource(res); } @@ -667,9 +667,9 @@ static void pci_bridge_check_ranges(struct pci_bus *bus) if (!io) { pci_write_config_word(bridge, PCI_IO_BASE, 0xf0f0); pci_read_config_word(bridge, PCI_IO_BASE, &io); - pci_write_config_word(bridge, PCI_IO_BASE, 0x0); - } - if (io) + pci_write_config_word(bridge, PCI_IO_BASE, 0x0); + } + if (io) b_res[0].flags |= IORESOURCE_IO; /* DECchip 21050 pass 2 errata: the bridge may miss an address disconnect boundary by one PCI data phase. @@ -819,7 +819,7 @@ static void pbus_size_io(struct pci_bus *bus, resource_size_t min_size, resource_size_t min_align, align; if (!b_res) - return; + return; min_align = window_alignment(bus, IORESOURCE_IO); list_for_each_entry(dev, &bus->devices, bus_list) { @@ -950,7 +950,7 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, if (realloc_head && i >= PCI_IOV_RESOURCES && i <= PCI_IOV_RESOURCE_END) { r->end = r->start - 1; - add_to_list(realloc_head, dev, r, r_size, 0/* dont' care */); + add_to_list(realloc_head, dev, r, r_size, 0/* don't care */); children_add_size += r_size; continue; } @@ -1456,8 +1456,8 @@ static enum enable_type pci_realloc_detect(struct pci_bus *bus, /* * first try will not touch pci bridge res - * second and later try will clear small leaf bridge res - * will stop till to the max deepth if can not find good one + * second and later try will clear small leaf bridge res + * will stop till to the max depth if can not find good one */ void pci_assign_unassigned_root_bus_resources(struct pci_bus *bus) { diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c index 07f2edd..83c4d3b 100644 --- a/drivers/pci/setup-res.c +++ b/drivers/pci/setup-res.c @@ -159,7 +159,7 @@ resource_size_t __weak pcibios_retrieve_fw_addr(struct pci_dev *dev, int idx) return 0; } -static int pci_revert_fw_address(struct resource *res, struct pci_dev *dev, +static int pci_revert_fw_address(struct resource *res, struct pci_dev *dev, int resno, resource_size_t size) { struct resource *root, *conflict; diff --git a/drivers/pci/slot.c b/drivers/pci/slot.c index c1e9284..448ca56 100644 --- a/drivers/pci/slot.c +++ b/drivers/pci/slot.c @@ -53,7 +53,7 @@ static ssize_t address_read_file(struct pci_slot *slot, char *buf) static const char *pci_bus_speed_strings[] = { "33 MHz PCI", /* 0x00 */ "66 MHz PCI", /* 0x01 */ - "66 MHz PCI-X", /* 0x02 */ + "66 MHz PCI-X", /* 0x02 */ "100 MHz PCI-X", /* 0x03 */ "133 MHz PCI-X", /* 0x04 */ NULL, /* 0x05 */ diff --git a/drivers/pci/syscall.c b/drivers/pci/syscall.c index e1c1ec5..24750a1 100644 --- a/drivers/pci/syscall.c +++ b/drivers/pci/syscall.c @@ -44,7 +44,7 @@ SYSCALL_DEFINE5(pciconfig_read, unsigned long, bus, unsigned long, dfn, default: err = -EINVAL; goto error; - }; + } err = -EIO; if (cfg_ret != PCIBIOS_SUCCESSFUL) diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index 15f166a..0077302 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -626,7 +626,7 @@ comment "Platform RTC drivers" config RTC_DRV_CMOS tristate "PC-style 'CMOS'" - depends on X86 || ALPHA || ARM || M32R || ATARI || PPC || MIPS || SPARC64 + depends on X86 || ARM || M32R || ATARI || PPC || MIPS || SPARC64 default y if X86 help Say "yes" here to get direct support for the real time clock @@ -643,6 +643,14 @@ config RTC_DRV_CMOS This driver can also be built as a module. If so, the module will be called rtc-cmos. +config RTC_DRV_ALPHA + bool "Alpha PC-style CMOS" + depends on ALPHA + default y + help + Direct support for the real-time clock found on every Alpha + system, specifically MC146818 compatibles. If in doubt, say Y. + config RTC_DRV_VRTC tristate "Virtual RTC for Intel MID platforms" depends on X86_INTEL_MID diff --git a/drivers/rtc/rtc-at91rm9200.c b/drivers/rtc/rtc-at91rm9200.c index 8b2cd8a..c0da95e 100644 --- a/drivers/rtc/rtc-at91rm9200.c +++ b/drivers/rtc/rtc-at91rm9200.c @@ -428,6 +428,14 @@ static int __exit at91_rtc_remove(struct platform_device *pdev) return 0; } +static void at91_rtc_shutdown(struct platform_device *pdev) +{ + /* Disable all interrupts */ + at91_rtc_write(AT91_RTC_IDR, AT91_RTC_ACKUPD | AT91_RTC_ALARM | + AT91_RTC_SECEV | AT91_RTC_TIMEV | + AT91_RTC_CALEV); +} + #ifdef CONFIG_PM_SLEEP /* AT91RM9200 RTC Power management control */ @@ -466,6 +474,7 @@ static SIMPLE_DEV_PM_OPS(at91_rtc_pm_ops, at91_rtc_suspend, at91_rtc_resume); static struct platform_driver at91_rtc_driver = { .remove = __exit_p(at91_rtc_remove), + .shutdown = at91_rtc_shutdown, .driver = { .name = "at91_rtc", .owner = THIS_MODULE, diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c index f85b9e5..7eb19be 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c @@ -330,7 +330,7 @@ static int tcm_qla2xxx_check_demo_mode(struct se_portal_group *se_tpg) struct tcm_qla2xxx_tpg *tpg = container_of(se_tpg, struct tcm_qla2xxx_tpg, se_tpg); - return QLA_TPG_ATTRIB(tpg)->generate_node_acls; + return tpg->tpg_attrib.generate_node_acls; } static int tcm_qla2xxx_check_demo_mode_cache(struct se_portal_group *se_tpg) @@ -338,7 +338,7 @@ static int tcm_qla2xxx_check_demo_mode_cache(struct se_portal_group *se_tpg) struct tcm_qla2xxx_tpg *tpg = container_of(se_tpg, struct tcm_qla2xxx_tpg, se_tpg); - return QLA_TPG_ATTRIB(tpg)->cache_dynamic_acls; + return tpg->tpg_attrib.cache_dynamic_acls; } static int tcm_qla2xxx_check_demo_write_protect(struct se_portal_group *se_tpg) @@ -346,7 +346,7 @@ static int tcm_qla2xxx_check_demo_write_protect(struct se_portal_group *se_tpg) struct tcm_qla2xxx_tpg *tpg = container_of(se_tpg, struct tcm_qla2xxx_tpg, se_tpg); - return QLA_TPG_ATTRIB(tpg)->demo_mode_write_protect; + return tpg->tpg_attrib.demo_mode_write_protect; } static int tcm_qla2xxx_check_prod_write_protect(struct se_portal_group *se_tpg) @@ -354,7 +354,7 @@ static int tcm_qla2xxx_check_prod_write_protect(struct se_portal_group *se_tpg) struct tcm_qla2xxx_tpg *tpg = container_of(se_tpg, struct tcm_qla2xxx_tpg, se_tpg); - return QLA_TPG_ATTRIB(tpg)->prod_mode_write_protect; + return tpg->tpg_attrib.prod_mode_write_protect; } static int tcm_qla2xxx_check_demo_mode_login_only(struct se_portal_group *se_tpg) @@ -362,7 +362,7 @@ static int tcm_qla2xxx_check_demo_mode_login_only(struct se_portal_group *se_tpg struct tcm_qla2xxx_tpg *tpg = container_of(se_tpg, struct tcm_qla2xxx_tpg, se_tpg); - return QLA_TPG_ATTRIB(tpg)->demo_mode_login_only; + return tpg->tpg_attrib.demo_mode_login_only; } static struct se_node_acl *tcm_qla2xxx_alloc_fabric_acl( @@ -847,7 +847,7 @@ static ssize_t tcm_qla2xxx_tpg_attrib_show_##name( \ struct tcm_qla2xxx_tpg *tpg = container_of(se_tpg, \ struct tcm_qla2xxx_tpg, se_tpg); \ \ - return sprintf(page, "%u\n", QLA_TPG_ATTRIB(tpg)->name); \ + return sprintf(page, "%u\n", tpg->tpg_attrib.name); \ } \ \ static ssize_t tcm_qla2xxx_tpg_attrib_store_##name( \ @@ -1027,10 +1027,10 @@ static struct se_portal_group *tcm_qla2xxx_make_tpg( * By default allow READ-ONLY TPG demo-mode access w/ cached dynamic * NodeACLs */ - QLA_TPG_ATTRIB(tpg)->generate_node_acls = 1; - QLA_TPG_ATTRIB(tpg)->demo_mode_write_protect = 1; - QLA_TPG_ATTRIB(tpg)->cache_dynamic_acls = 1; - QLA_TPG_ATTRIB(tpg)->demo_mode_login_only = 1; + tpg->tpg_attrib.generate_node_acls = 1; + tpg->tpg_attrib.demo_mode_write_protect = 1; + tpg->tpg_attrib.cache_dynamic_acls = 1; + tpg->tpg_attrib.demo_mode_login_only = 1; ret = core_tpg_register(&tcm_qla2xxx_fabric_configfs->tf_ops, wwn, &tpg->se_tpg, tpg, TRANSPORT_TPG_TYPE_NORMAL); @@ -1830,16 +1830,16 @@ static int tcm_qla2xxx_register_configfs(void) /* * Setup default attribute lists for various fabric->tf_cit_tmpl */ - TF_CIT_TMPL(fabric)->tfc_wwn_cit.ct_attrs = tcm_qla2xxx_wwn_attrs; - TF_CIT_TMPL(fabric)->tfc_tpg_base_cit.ct_attrs = tcm_qla2xxx_tpg_attrs; - TF_CIT_TMPL(fabric)->tfc_tpg_attrib_cit.ct_attrs = + fabric->tf_cit_tmpl.tfc_wwn_cit.ct_attrs = tcm_qla2xxx_wwn_attrs; + fabric->tf_cit_tmpl.tfc_tpg_base_cit.ct_attrs = tcm_qla2xxx_tpg_attrs; + fabric->tf_cit_tmpl.tfc_tpg_attrib_cit.ct_attrs = tcm_qla2xxx_tpg_attrib_attrs; - TF_CIT_TMPL(fabric)->tfc_tpg_param_cit.ct_attrs = NULL; - TF_CIT_TMPL(fabric)->tfc_tpg_np_base_cit.ct_attrs = NULL; - TF_CIT_TMPL(fabric)->tfc_tpg_nacl_base_cit.ct_attrs = NULL; - TF_CIT_TMPL(fabric)->tfc_tpg_nacl_attrib_cit.ct_attrs = NULL; - TF_CIT_TMPL(fabric)->tfc_tpg_nacl_auth_cit.ct_attrs = NULL; - TF_CIT_TMPL(fabric)->tfc_tpg_nacl_param_cit.ct_attrs = NULL; + fabric->tf_cit_tmpl.tfc_tpg_param_cit.ct_attrs = NULL; + fabric->tf_cit_tmpl.tfc_tpg_np_base_cit.ct_attrs = NULL; + fabric->tf_cit_tmpl.tfc_tpg_nacl_base_cit.ct_attrs = NULL; + fabric->tf_cit_tmpl.tfc_tpg_nacl_attrib_cit.ct_attrs = NULL; + fabric->tf_cit_tmpl.tfc_tpg_nacl_auth_cit.ct_attrs = NULL; + fabric->tf_cit_tmpl.tfc_tpg_nacl_param_cit.ct_attrs = NULL; /* * Register the fabric for use within TCM */ @@ -1870,15 +1870,15 @@ static int tcm_qla2xxx_register_configfs(void) /* * Setup default attribute lists for various npiv_fabric->tf_cit_tmpl */ - TF_CIT_TMPL(npiv_fabric)->tfc_wwn_cit.ct_attrs = tcm_qla2xxx_wwn_attrs; - TF_CIT_TMPL(npiv_fabric)->tfc_tpg_base_cit.ct_attrs = NULL; - TF_CIT_TMPL(npiv_fabric)->tfc_tpg_attrib_cit.ct_attrs = NULL; - TF_CIT_TMPL(npiv_fabric)->tfc_tpg_param_cit.ct_attrs = NULL; - TF_CIT_TMPL(npiv_fabric)->tfc_tpg_np_base_cit.ct_attrs = NULL; - TF_CIT_TMPL(npiv_fabric)->tfc_tpg_nacl_base_cit.ct_attrs = NULL; - TF_CIT_TMPL(npiv_fabric)->tfc_tpg_nacl_attrib_cit.ct_attrs = NULL; - TF_CIT_TMPL(npiv_fabric)->tfc_tpg_nacl_auth_cit.ct_attrs = NULL; - TF_CIT_TMPL(npiv_fabric)->tfc_tpg_nacl_param_cit.ct_attrs = NULL; + npiv_fabric->tf_cit_tmpl.tfc_wwn_cit.ct_attrs = tcm_qla2xxx_wwn_attrs; + npiv_fabric->tf_cit_tmpl.tfc_tpg_base_cit.ct_attrs = NULL; + npiv_fabric->tf_cit_tmpl.tfc_tpg_attrib_cit.ct_attrs = NULL; + npiv_fabric->tf_cit_tmpl.tfc_tpg_param_cit.ct_attrs = NULL; + npiv_fabric->tf_cit_tmpl.tfc_tpg_np_base_cit.ct_attrs = NULL; + npiv_fabric->tf_cit_tmpl.tfc_tpg_nacl_base_cit.ct_attrs = NULL; + npiv_fabric->tf_cit_tmpl.tfc_tpg_nacl_attrib_cit.ct_attrs = NULL; + npiv_fabric->tf_cit_tmpl.tfc_tpg_nacl_auth_cit.ct_attrs = NULL; + npiv_fabric->tf_cit_tmpl.tfc_tpg_nacl_param_cit.ct_attrs = NULL; /* * Register the npiv_fabric for use within TCM */ diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.h b/drivers/scsi/qla2xxx/tcm_qla2xxx.h index 3293275..771f7b8 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.h +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.h @@ -45,8 +45,6 @@ struct tcm_qla2xxx_tpg { struct se_portal_group se_tpg; }; -#define QLA_TPG_ATTRIB(tpg) (&(tpg)->tpg_attrib) - struct tcm_qla2xxx_fc_loopid { struct se_node_acl *se_nacl; }; diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 38e44b9..d70e911 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -805,14 +805,7 @@ int iscsit_setup_scsi_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, int iscsi_task_attr; int sam_task_attr; - spin_lock_bh(&conn->sess->session_stats_lock); - conn->sess->cmd_pdus++; - if (conn->sess->se_sess->se_node_acl) { - spin_lock(&conn->sess->se_sess->se_node_acl->stats_lock); - conn->sess->se_sess->se_node_acl->num_cmds++; - spin_unlock(&conn->sess->se_sess->se_node_acl->stats_lock); - } - spin_unlock_bh(&conn->sess->session_stats_lock); + atomic_long_inc(&conn->sess->cmd_pdus); hdr = (struct iscsi_scsi_req *) buf; payload_length = ntoh24(hdr->dlength); @@ -1254,20 +1247,12 @@ iscsit_check_dataout_hdr(struct iscsi_conn *conn, unsigned char *buf, int rc; if (!payload_length) { - pr_err("DataOUT payload is ZERO, protocol error.\n"); - return iscsit_add_reject(conn, ISCSI_REASON_PROTOCOL_ERROR, - buf); + pr_warn("DataOUT payload is ZERO, ignoring.\n"); + return 0; } /* iSCSI write */ - spin_lock_bh(&conn->sess->session_stats_lock); - conn->sess->rx_data_octets += payload_length; - if (conn->sess->se_sess->se_node_acl) { - spin_lock(&conn->sess->se_sess->se_node_acl->stats_lock); - conn->sess->se_sess->se_node_acl->write_bytes += payload_length; - spin_unlock(&conn->sess->se_sess->se_node_acl->stats_lock); - } - spin_unlock_bh(&conn->sess->session_stats_lock); + atomic_long_add(payload_length, &conn->sess->rx_data_octets); if (payload_length > conn->conn_ops->MaxXmitDataSegmentLength) { pr_err("DataSegmentLength: %u is greater than" @@ -1486,7 +1471,7 @@ EXPORT_SYMBOL(iscsit_check_dataout_payload); static int iscsit_handle_data_out(struct iscsi_conn *conn, unsigned char *buf) { - struct iscsi_cmd *cmd; + struct iscsi_cmd *cmd = NULL; struct iscsi_data *hdr = (struct iscsi_data *)buf; int rc; bool data_crc_failed = false; @@ -1954,6 +1939,13 @@ iscsit_setup_text_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, (unsigned char *)hdr); } + if (!(hdr->flags & ISCSI_FLAG_CMD_FINAL) || + (hdr->flags & ISCSI_FLAG_TEXT_CONTINUE)) { + pr_err("Multi sequence text commands currently not supported\n"); + return iscsit_reject_cmd(cmd, ISCSI_REASON_CMD_NOT_SUPPORTED, + (unsigned char *)hdr); + } + pr_debug("Got Text Request: ITT: 0x%08x, CmdSN: 0x%08x," " ExpStatSN: 0x%08x, Length: %u\n", hdr->itt, hdr->cmdsn, hdr->exp_statsn, payload_length); @@ -2630,14 +2622,7 @@ static int iscsit_send_datain(struct iscsi_cmd *cmd, struct iscsi_conn *conn) return -1; } - spin_lock_bh(&conn->sess->session_stats_lock); - conn->sess->tx_data_octets += datain.length; - if (conn->sess->se_sess->se_node_acl) { - spin_lock(&conn->sess->se_sess->se_node_acl->stats_lock); - conn->sess->se_sess->se_node_acl->read_bytes += datain.length; - spin_unlock(&conn->sess->se_sess->se_node_acl->stats_lock); - } - spin_unlock_bh(&conn->sess->session_stats_lock); + atomic_long_add(datain.length, &conn->sess->tx_data_octets); /* * Special case for successfully execution w/ both DATAIN * and Sense Data. @@ -3162,9 +3147,7 @@ void iscsit_build_rsp_pdu(struct iscsi_cmd *cmd, struct iscsi_conn *conn, if (inc_stat_sn) cmd->stat_sn = conn->stat_sn++; - spin_lock_bh(&conn->sess->session_stats_lock); - conn->sess->rsp_pdus++; - spin_unlock_bh(&conn->sess->session_stats_lock); + atomic_long_inc(&conn->sess->rsp_pdus); memset(hdr, 0, ISCSI_HDR_LEN); hdr->opcode = ISCSI_OP_SCSI_CMD_RSP; @@ -3374,6 +3357,7 @@ static int iscsit_build_sendtargets_response(struct iscsi_cmd *cmd) struct iscsi_tiqn *tiqn; struct iscsi_tpg_np *tpg_np; int buffer_len, end_of_buf = 0, len = 0, payload_len = 0; + int target_name_printed; unsigned char buf[ISCSI_IQN_LEN+12]; /* iqn + "TargetName=" + \0 */ unsigned char *text_in = cmd->text_in_ptr, *text_ptr = NULL; @@ -3411,19 +3395,23 @@ static int iscsit_build_sendtargets_response(struct iscsi_cmd *cmd) continue; } - len = sprintf(buf, "TargetName=%s", tiqn->tiqn); - len += 1; - - if ((len + payload_len) > buffer_len) { - end_of_buf = 1; - goto eob; - } - memcpy(payload + payload_len, buf, len); - payload_len += len; + target_name_printed = 0; spin_lock(&tiqn->tiqn_tpg_lock); list_for_each_entry(tpg, &tiqn->tiqn_tpg_list, tpg_list) { + /* If demo_mode_discovery=0 and generate_node_acls=0 + * (demo mode dislabed) do not return + * TargetName+TargetAddress unless a NodeACL exists. + */ + + if ((tpg->tpg_attrib.generate_node_acls == 0) && + (tpg->tpg_attrib.demo_mode_discovery == 0) && + (!core_tpg_get_initiator_node_acl(&tpg->tpg_se_tpg, + cmd->conn->sess->sess_ops->InitiatorName))) { + continue; + } + spin_lock(&tpg->tpg_state_lock); if ((tpg->tpg_state == TPG_STATE_FREE) || (tpg->tpg_state == TPG_STATE_INACTIVE)) { @@ -3438,6 +3426,22 @@ static int iscsit_build_sendtargets_response(struct iscsi_cmd *cmd) struct iscsi_np *np = tpg_np->tpg_np; bool inaddr_any = iscsit_check_inaddr_any(np); + if (!target_name_printed) { + len = sprintf(buf, "TargetName=%s", + tiqn->tiqn); + len += 1; + + if ((len + payload_len) > buffer_len) { + spin_unlock(&tpg->tpg_np_lock); + spin_unlock(&tiqn->tiqn_tpg_lock); + end_of_buf = 1; + goto eob; + } + memcpy(payload + payload_len, buf, len); + payload_len += len; + target_name_printed = 1; + } + len = sprintf(buf, "TargetAddress=" "%s:%hu,%hu", (inaddr_any == false) ? @@ -4092,9 +4096,7 @@ restart: * hit default in the switch below. */ memset(buffer, 0xff, ISCSI_HDR_LEN); - spin_lock_bh(&conn->sess->session_stats_lock); - conn->sess->conn_digest_errors++; - spin_unlock_bh(&conn->sess->session_stats_lock); + atomic_long_inc(&conn->sess->conn_digest_errors); } else { pr_debug("Got HeaderDigest CRC32C" " 0x%08x\n", checksum); @@ -4381,7 +4383,7 @@ int iscsit_close_connection( int iscsit_close_session(struct iscsi_session *sess) { - struct iscsi_portal_group *tpg = ISCSI_TPG_S(sess); + struct iscsi_portal_group *tpg = sess->tpg; struct se_portal_group *se_tpg = &tpg->tpg_se_tpg; if (atomic_read(&sess->nconn)) { diff --git a/drivers/target/iscsi/iscsi_target_auth.c b/drivers/target/iscsi/iscsi_target_auth.c index 7505fdd..de77d9a 100644 --- a/drivers/target/iscsi/iscsi_target_auth.c +++ b/drivers/target/iscsi/iscsi_target_auth.c @@ -111,7 +111,7 @@ static struct iscsi_chap *chap_server_open( /* * Set Identifier. */ - chap->id = ISCSI_TPG_C(conn)->tpg_chap_id++; + chap->id = conn->tpg->tpg_chap_id++; *aic_len += sprintf(aic_str + *aic_len, "CHAP_I=%d", chap->id); *aic_len += 1; pr_debug("[server] Sending CHAP_I=%d\n", chap->id); @@ -146,6 +146,7 @@ static int chap_server_compute_md5( unsigned char client_digest[MD5_SIGNATURE_SIZE]; unsigned char server_digest[MD5_SIGNATURE_SIZE]; unsigned char chap_n[MAX_CHAP_N_SIZE], chap_r[MAX_RESPONSE_LENGTH]; + size_t compare_len; struct iscsi_chap *chap = conn->auth_protocol; struct crypto_hash *tfm; struct hash_desc desc; @@ -184,7 +185,9 @@ static int chap_server_compute_md5( goto out; } - if (memcmp(chap_n, auth->userid, strlen(auth->userid)) != 0) { + /* Include the terminating NULL in the compare */ + compare_len = strlen(auth->userid) + 1; + if (strncmp(chap_n, auth->userid, compare_len) != 0) { pr_err("CHAP_N values do not match!\n"); goto out; } diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c index fd14525..e3318ed 100644 --- a/drivers/target/iscsi/iscsi_target_configfs.c +++ b/drivers/target/iscsi/iscsi_target_configfs.c @@ -372,7 +372,7 @@ static ssize_t iscsi_nacl_attrib_show_##name( \ struct iscsi_node_acl *nacl = container_of(se_nacl, struct iscsi_node_acl, \ se_node_acl); \ \ - return sprintf(page, "%u\n", ISCSI_NODE_ATTRIB(nacl)->name); \ + return sprintf(page, "%u\n", nacl->node_attrib.name); \ } \ \ static ssize_t iscsi_nacl_attrib_store_##name( \ @@ -897,7 +897,7 @@ static struct se_node_acl *lio_target_make_nodeacl( if (!se_nacl_new) return ERR_PTR(-ENOMEM); - cmdsn_depth = ISCSI_TPG_ATTRIB(tpg)->default_cmdsn_depth; + cmdsn_depth = tpg->tpg_attrib.default_cmdsn_depth; /* * se_nacl_new may be released by core_tpg_add_initiator_node_acl() * when converting a NdoeACL from demo mode -> explict @@ -920,9 +920,9 @@ static struct se_node_acl *lio_target_make_nodeacl( return ERR_PTR(-ENOMEM); } - stats_cg->default_groups[0] = &NODE_STAT_GRPS(acl)->iscsi_sess_stats_group; + stats_cg->default_groups[0] = &acl->node_stat_grps.iscsi_sess_stats_group; stats_cg->default_groups[1] = NULL; - config_group_init_type_name(&NODE_STAT_GRPS(acl)->iscsi_sess_stats_group, + config_group_init_type_name(&acl->node_stat_grps.iscsi_sess_stats_group, "iscsi_sess_stats", &iscsi_stat_sess_cit); return se_nacl; @@ -967,7 +967,7 @@ static ssize_t iscsi_tpg_attrib_show_##name( \ if (iscsit_get_tpg(tpg) < 0) \ return -EINVAL; \ \ - rb = sprintf(page, "%u\n", ISCSI_TPG_ATTRIB(tpg)->name); \ + rb = sprintf(page, "%u\n", tpg->tpg_attrib.name); \ iscsit_put_tpg(tpg); \ return rb; \ } \ @@ -1041,6 +1041,16 @@ TPG_ATTR(demo_mode_write_protect, S_IRUGO | S_IWUSR); */ DEF_TPG_ATTRIB(prod_mode_write_protect); TPG_ATTR(prod_mode_write_protect, S_IRUGO | S_IWUSR); +/* + * Define iscsi_tpg_attrib_s_demo_mode_discovery, + */ +DEF_TPG_ATTRIB(demo_mode_discovery); +TPG_ATTR(demo_mode_discovery, S_IRUGO | S_IWUSR); +/* + * Define iscsi_tpg_attrib_s_default_erl + */ +DEF_TPG_ATTRIB(default_erl); +TPG_ATTR(default_erl, S_IRUGO | S_IWUSR); static struct configfs_attribute *lio_target_tpg_attrib_attrs[] = { &iscsi_tpg_attrib_authentication.attr, @@ -1051,6 +1061,8 @@ static struct configfs_attribute *lio_target_tpg_attrib_attrs[] = { &iscsi_tpg_attrib_cache_dynamic_acls.attr, &iscsi_tpg_attrib_demo_mode_write_protect.attr, &iscsi_tpg_attrib_prod_mode_write_protect.attr, + &iscsi_tpg_attrib_demo_mode_discovery.attr, + &iscsi_tpg_attrib_default_erl.attr, NULL, }; @@ -1514,21 +1526,21 @@ static struct se_wwn *lio_target_call_coreaddtiqn( return ERR_PTR(-ENOMEM); } - stats_cg->default_groups[0] = &WWN_STAT_GRPS(tiqn)->iscsi_instance_group; - stats_cg->default_groups[1] = &WWN_STAT_GRPS(tiqn)->iscsi_sess_err_group; - stats_cg->default_groups[2] = &WWN_STAT_GRPS(tiqn)->iscsi_tgt_attr_group; - stats_cg->default_groups[3] = &WWN_STAT_GRPS(tiqn)->iscsi_login_stats_group; - stats_cg->default_groups[4] = &WWN_STAT_GRPS(tiqn)->iscsi_logout_stats_group; + stats_cg->default_groups[0] = &tiqn->tiqn_stat_grps.iscsi_instance_group; + stats_cg->default_groups[1] = &tiqn->tiqn_stat_grps.iscsi_sess_err_group; + stats_cg->default_groups[2] = &tiqn->tiqn_stat_grps.iscsi_tgt_attr_group; + stats_cg->default_groups[3] = &tiqn->tiqn_stat_grps.iscsi_login_stats_group; + stats_cg->default_groups[4] = &tiqn->tiqn_stat_grps.iscsi_logout_stats_group; stats_cg->default_groups[5] = NULL; - config_group_init_type_name(&WWN_STAT_GRPS(tiqn)->iscsi_instance_group, + config_group_init_type_name(&tiqn->tiqn_stat_grps.iscsi_instance_group, "iscsi_instance", &iscsi_stat_instance_cit); - config_group_init_type_name(&WWN_STAT_GRPS(tiqn)->iscsi_sess_err_group, + config_group_init_type_name(&tiqn->tiqn_stat_grps.iscsi_sess_err_group, "iscsi_sess_err", &iscsi_stat_sess_err_cit); - config_group_init_type_name(&WWN_STAT_GRPS(tiqn)->iscsi_tgt_attr_group, + config_group_init_type_name(&tiqn->tiqn_stat_grps.iscsi_tgt_attr_group, "iscsi_tgt_attr", &iscsi_stat_tgt_attr_cit); - config_group_init_type_name(&WWN_STAT_GRPS(tiqn)->iscsi_login_stats_group, + config_group_init_type_name(&tiqn->tiqn_stat_grps.iscsi_login_stats_group, "iscsi_login_stats", &iscsi_stat_login_cit); - config_group_init_type_name(&WWN_STAT_GRPS(tiqn)->iscsi_logout_stats_group, + config_group_init_type_name(&tiqn->tiqn_stat_grps.iscsi_logout_stats_group, "iscsi_logout_stats", &iscsi_stat_logout_cit); pr_debug("LIO_Target_ConfigFS: REGISTER -> %s\n", tiqn->tiqn); @@ -1784,6 +1796,11 @@ static int lio_queue_status(struct se_cmd *se_cmd) struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd); cmd->i_state = ISTATE_SEND_STATUS; + + if (cmd->se_cmd.scsi_status || cmd->sense_reason) { + iscsit_add_cmd_to_response_queue(cmd, cmd->conn, cmd->i_state); + return 0; + } cmd->conn->conn_transport->iscsit_queue_status(cmd->conn, cmd); return 0; @@ -1815,21 +1832,21 @@ static u32 lio_tpg_get_default_depth(struct se_portal_group *se_tpg) { struct iscsi_portal_group *tpg = se_tpg->se_tpg_fabric_ptr; - return ISCSI_TPG_ATTRIB(tpg)->default_cmdsn_depth; + return tpg->tpg_attrib.default_cmdsn_depth; } static int lio_tpg_check_demo_mode(struct se_portal_group *se_tpg) { struct iscsi_portal_group *tpg = se_tpg->se_tpg_fabric_ptr; - return ISCSI_TPG_ATTRIB(tpg)->generate_node_acls; + return tpg->tpg_attrib.generate_node_acls; } static int lio_tpg_check_demo_mode_cache(struct se_portal_group *se_tpg) { struct iscsi_portal_group *tpg = se_tpg->se_tpg_fabric_ptr; - return ISCSI_TPG_ATTRIB(tpg)->cache_dynamic_acls; + return tpg->tpg_attrib.cache_dynamic_acls; } static int lio_tpg_check_demo_mode_write_protect( @@ -1837,7 +1854,7 @@ static int lio_tpg_check_demo_mode_write_protect( { struct iscsi_portal_group *tpg = se_tpg->se_tpg_fabric_ptr; - return ISCSI_TPG_ATTRIB(tpg)->demo_mode_write_protect; + return tpg->tpg_attrib.demo_mode_write_protect; } static int lio_tpg_check_prod_mode_write_protect( @@ -1845,7 +1862,7 @@ static int lio_tpg_check_prod_mode_write_protect( { struct iscsi_portal_group *tpg = se_tpg->se_tpg_fabric_ptr; - return ISCSI_TPG_ATTRIB(tpg)->prod_mode_write_protect; + return tpg->tpg_attrib.prod_mode_write_protect; } static void lio_tpg_release_fabric_acl( @@ -1908,9 +1925,12 @@ static void lio_set_default_node_attributes(struct se_node_acl *se_acl) { struct iscsi_node_acl *acl = container_of(se_acl, struct iscsi_node_acl, se_node_acl); + struct se_portal_group *se_tpg = se_acl->se_tpg; + struct iscsi_portal_group *tpg = container_of(se_tpg, + struct iscsi_portal_group, tpg_se_tpg); - ISCSI_NODE_ATTRIB(acl)->nacl = acl; - iscsit_set_default_node_attribues(acl); + acl->node_attrib.nacl = acl; + iscsit_set_default_node_attribues(acl, tpg); } static int lio_check_stop_free(struct se_cmd *se_cmd) @@ -1995,17 +2015,17 @@ int iscsi_target_register_configfs(void) * Setup default attribute lists for various fabric->tf_cit_tmpl * sturct config_item_type's */ - TF_CIT_TMPL(fabric)->tfc_discovery_cit.ct_attrs = lio_target_discovery_auth_attrs; - TF_CIT_TMPL(fabric)->tfc_wwn_cit.ct_attrs = lio_target_wwn_attrs; - TF_CIT_TMPL(fabric)->tfc_tpg_base_cit.ct_attrs = lio_target_tpg_attrs; - TF_CIT_TMPL(fabric)->tfc_tpg_attrib_cit.ct_attrs = lio_target_tpg_attrib_attrs; - TF_CIT_TMPL(fabric)->tfc_tpg_auth_cit.ct_attrs = lio_target_tpg_auth_attrs; - TF_CIT_TMPL(fabric)->tfc_tpg_param_cit.ct_attrs = lio_target_tpg_param_attrs; - TF_CIT_TMPL(fabric)->tfc_tpg_np_base_cit.ct_attrs = lio_target_portal_attrs; - TF_CIT_TMPL(fabric)->tfc_tpg_nacl_base_cit.ct_attrs = lio_target_initiator_attrs; - TF_CIT_TMPL(fabric)->tfc_tpg_nacl_attrib_cit.ct_attrs = lio_target_nacl_attrib_attrs; - TF_CIT_TMPL(fabric)->tfc_tpg_nacl_auth_cit.ct_attrs = lio_target_nacl_auth_attrs; - TF_CIT_TMPL(fabric)->tfc_tpg_nacl_param_cit.ct_attrs = lio_target_nacl_param_attrs; + fabric->tf_cit_tmpl.tfc_discovery_cit.ct_attrs = lio_target_discovery_auth_attrs; + fabric->tf_cit_tmpl.tfc_wwn_cit.ct_attrs = lio_target_wwn_attrs; + fabric->tf_cit_tmpl.tfc_tpg_base_cit.ct_attrs = lio_target_tpg_attrs; + fabric->tf_cit_tmpl.tfc_tpg_attrib_cit.ct_attrs = lio_target_tpg_attrib_attrs; + fabric->tf_cit_tmpl.tfc_tpg_auth_cit.ct_attrs = lio_target_tpg_auth_attrs; + fabric->tf_cit_tmpl.tfc_tpg_param_cit.ct_attrs = lio_target_tpg_param_attrs; + fabric->tf_cit_tmpl.tfc_tpg_np_base_cit.ct_attrs = lio_target_portal_attrs; + fabric->tf_cit_tmpl.tfc_tpg_nacl_base_cit.ct_attrs = lio_target_initiator_attrs; + fabric->tf_cit_tmpl.tfc_tpg_nacl_attrib_cit.ct_attrs = lio_target_nacl_attrib_attrs; + fabric->tf_cit_tmpl.tfc_tpg_nacl_auth_cit.ct_attrs = lio_target_nacl_auth_attrs; + fabric->tf_cit_tmpl.tfc_tpg_nacl_param_cit.ct_attrs = lio_target_nacl_param_attrs; ret = target_fabric_configfs_register(fabric); if (ret < 0) { diff --git a/drivers/target/iscsi/iscsi_target_core.h b/drivers/target/iscsi/iscsi_target_core.h index 9a5721b..48f7b3b 100644 --- a/drivers/target/iscsi/iscsi_target_core.h +++ b/drivers/target/iscsi/iscsi_target_core.h @@ -37,9 +37,6 @@ #define NA_RANDOM_DATAIN_PDU_OFFSETS 0 #define NA_RANDOM_DATAIN_SEQ_OFFSETS 0 #define NA_RANDOM_R2T_OFFSETS 0 -#define NA_DEFAULT_ERL 0 -#define NA_DEFAULT_ERL_MAX 2 -#define NA_DEFAULT_ERL_MIN 0 /* struct iscsi_tpg_attrib sanity values */ #define TA_AUTHENTICATION 1 @@ -58,6 +55,8 @@ #define TA_DEMO_MODE_WRITE_PROTECT 1 /* Disabled by default in production mode w/ explict ACLs */ #define TA_PROD_MODE_WRITE_PROTECT 0 +#define TA_DEMO_MODE_DISCOVERY 1 +#define TA_DEFAULT_ERL 0 #define TA_CACHE_CORE_NPS 0 @@ -192,6 +191,7 @@ enum recover_cmdsn_ret_table { CMDSN_NORMAL_OPERATION = 0, CMDSN_LOWER_THAN_EXP = 1, CMDSN_HIGHER_THAN_EXP = 2, + CMDSN_MAXCMDSN_OVERRUN = 3, }; /* Used for iscsi_handle_immediate_data() return values */ @@ -650,14 +650,13 @@ struct iscsi_session { /* Used for session reference counting */ int session_usage_count; int session_waiting_on_uc; - u32 cmd_pdus; - u32 rsp_pdus; - u64 tx_data_octets; - u64 rx_data_octets; - u32 conn_digest_errors; - u32 conn_timeout_errors; + atomic_long_t cmd_pdus; + atomic_long_t rsp_pdus; + atomic_long_t tx_data_octets; + atomic_long_t rx_data_octets; + atomic_long_t conn_digest_errors; + atomic_long_t conn_timeout_errors; u64 creation_time; - spinlock_t session_stats_lock; /* Number of active connections */ atomic_t nconn; atomic_t session_continuation; @@ -755,11 +754,6 @@ struct iscsi_node_acl { struct se_node_acl se_node_acl; }; -#define NODE_STAT_GRPS(nacl) (&(nacl)->node_stat_grps) - -#define ISCSI_NODE_ATTRIB(t) (&(t)->node_attrib) -#define ISCSI_NODE_AUTH(t) (&(t)->node_auth) - struct iscsi_tpg_attrib { u32 authentication; u32 login_timeout; @@ -769,6 +763,8 @@ struct iscsi_tpg_attrib { u32 default_cmdsn_depth; u32 demo_mode_write_protect; u32 prod_mode_write_protect; + u32 demo_mode_discovery; + u32 default_erl; struct iscsi_portal_group *tpg; }; @@ -835,12 +831,6 @@ struct iscsi_portal_group { struct list_head tpg_list; } ____cacheline_aligned; -#define ISCSI_TPG_C(c) ((struct iscsi_portal_group *)(c)->tpg) -#define ISCSI_TPG_LUN(c, l) ((iscsi_tpg_list_t *)(c)->tpg->tpg_lun_list_t[l]) -#define ISCSI_TPG_S(s) ((struct iscsi_portal_group *)(s)->tpg) -#define ISCSI_TPG_ATTRIB(t) (&(t)->tpg_attrib) -#define SE_TPG(tpg) (&(tpg)->tpg_se_tpg) - struct iscsi_wwn_stat_grps { struct config_group iscsi_stat_group; struct config_group iscsi_instance_group; @@ -871,8 +861,6 @@ struct iscsi_tiqn { struct iscsi_logout_stats logout_stats; } ____cacheline_aligned; -#define WWN_STAT_GRPS(tiqn) (&(tiqn)->tiqn_stat_grps) - struct iscsit_global { /* In core shutdown */ u32 in_shutdown; diff --git a/drivers/target/iscsi/iscsi_target_device.c b/drivers/target/iscsi/iscsi_target_device.c index 6c7a510..7087c73 100644 --- a/drivers/target/iscsi/iscsi_target_device.c +++ b/drivers/target/iscsi/iscsi_target_device.c @@ -58,11 +58,7 @@ void iscsit_increment_maxcmdsn(struct iscsi_cmd *cmd, struct iscsi_session *sess cmd->maxcmdsn_inc = 1; - if (!mutex_trylock(&sess->cmdsn_mutex)) { - sess->max_cmd_sn += 1; - pr_debug("Updated MaxCmdSN to 0x%08x\n", sess->max_cmd_sn); - return; - } + mutex_lock(&sess->cmdsn_mutex); sess->max_cmd_sn += 1; pr_debug("Updated MaxCmdSN to 0x%08x\n", sess->max_cmd_sn); mutex_unlock(&sess->cmdsn_mutex); diff --git a/drivers/target/iscsi/iscsi_target_erl0.c b/drivers/target/iscsi/iscsi_target_erl0.c index 41052e5..0d1e6ee 100644 --- a/drivers/target/iscsi/iscsi_target_erl0.c +++ b/drivers/target/iscsi/iscsi_target_erl0.c @@ -757,7 +757,7 @@ int iscsit_check_post_dataout( static void iscsit_handle_time2retain_timeout(unsigned long data) { struct iscsi_session *sess = (struct iscsi_session *) data; - struct iscsi_portal_group *tpg = ISCSI_TPG_S(sess); + struct iscsi_portal_group *tpg = sess->tpg; struct se_portal_group *se_tpg = &tpg->tpg_se_tpg; spin_lock_bh(&se_tpg->session_lock); @@ -785,7 +785,7 @@ static void iscsit_handle_time2retain_timeout(unsigned long data) tiqn->sess_err_stats.last_sess_failure_type = ISCSI_SESS_ERR_CXN_TIMEOUT; tiqn->sess_err_stats.cxn_timeout_errors++; - sess->conn_timeout_errors++; + atomic_long_inc(&sess->conn_timeout_errors); spin_unlock(&tiqn->sess_err_stats.lock); } } @@ -801,9 +801,9 @@ void iscsit_start_time2retain_handler(struct iscsi_session *sess) * Only start Time2Retain timer when the associated TPG is still in * an ACTIVE (eg: not disabled or shutdown) state. */ - spin_lock(&ISCSI_TPG_S(sess)->tpg_state_lock); - tpg_active = (ISCSI_TPG_S(sess)->tpg_state == TPG_STATE_ACTIVE); - spin_unlock(&ISCSI_TPG_S(sess)->tpg_state_lock); + spin_lock(&sess->tpg->tpg_state_lock); + tpg_active = (sess->tpg->tpg_state == TPG_STATE_ACTIVE); + spin_unlock(&sess->tpg->tpg_state_lock); if (!tpg_active) return; @@ -829,7 +829,7 @@ void iscsit_start_time2retain_handler(struct iscsi_session *sess) */ int iscsit_stop_time2retain_timer(struct iscsi_session *sess) { - struct iscsi_portal_group *tpg = ISCSI_TPG_S(sess); + struct iscsi_portal_group *tpg = sess->tpg; struct se_portal_group *se_tpg = &tpg->tpg_se_tpg; if (sess->time2retain_timer_flags & ISCSI_TF_EXPIRED) diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c index 1794c75..4eb93b2 100644 --- a/drivers/target/iscsi/iscsi_target_login.c +++ b/drivers/target/iscsi/iscsi_target_login.c @@ -305,7 +305,6 @@ static int iscsi_login_zero_tsih_s1( } sess->creation_time = get_jiffies_64(); - spin_lock_init(&sess->session_stats_lock); /* * The FFP CmdSN window values will be allocated from the TPG's * Initiator Node's ACL once the login has been successfully completed. @@ -347,15 +346,15 @@ static int iscsi_login_zero_tsih_s2( * Assign a new TPG Session Handle. Note this is protected with * struct iscsi_portal_group->np_login_sem from iscsit_access_np(). */ - sess->tsih = ++ISCSI_TPG_S(sess)->ntsih; + sess->tsih = ++sess->tpg->ntsih; if (!sess->tsih) - sess->tsih = ++ISCSI_TPG_S(sess)->ntsih; + sess->tsih = ++sess->tpg->ntsih; /* * Create the default params from user defined values.. */ if (iscsi_copy_param_list(&conn->param_list, - ISCSI_TPG_C(conn)->param_list, 1) < 0) { + conn->tpg->param_list, 1) < 0) { iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, ISCSI_LOGIN_STATUS_NO_RESOURCES); return -1; @@ -380,7 +379,7 @@ static int iscsi_login_zero_tsih_s2( * In our case, we have already located the struct iscsi_tiqn at this point. */ memset(buf, 0, 32); - sprintf(buf, "TargetPortalGroupTag=%hu", ISCSI_TPG_S(sess)->tpgt); + sprintf(buf, "TargetPortalGroupTag=%hu", sess->tpg->tpgt); if (iscsi_change_param_value(buf, conn->param_list, 0) < 0) { iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, ISCSI_LOGIN_STATUS_NO_RESOURCES); @@ -575,7 +574,7 @@ static int iscsi_login_non_zero_tsih_s2( iscsi_login_set_conn_values(sess, conn, pdu->cid); if (iscsi_copy_param_list(&conn->param_list, - ISCSI_TPG_C(conn)->param_list, 0) < 0) { + conn->tpg->param_list, 0) < 0) { iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, ISCSI_LOGIN_STATUS_NO_RESOURCES); return -1; @@ -593,7 +592,7 @@ static int iscsi_login_non_zero_tsih_s2( * In our case, we have already located the struct iscsi_tiqn at this point. */ memset(buf, 0, 32); - sprintf(buf, "TargetPortalGroupTag=%hu", ISCSI_TPG_S(sess)->tpgt); + sprintf(buf, "TargetPortalGroupTag=%hu", sess->tpg->tpgt); if (iscsi_change_param_value(buf, conn->param_list, 0) < 0) { iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, ISCSI_LOGIN_STATUS_NO_RESOURCES); @@ -691,7 +690,7 @@ int iscsi_post_login_handler( int stop_timer = 0; struct iscsi_session *sess = conn->sess; struct se_session *se_sess = sess->se_sess; - struct iscsi_portal_group *tpg = ISCSI_TPG_S(sess); + struct iscsi_portal_group *tpg = sess->tpg; struct se_portal_group *se_tpg = &tpg->tpg_se_tpg; struct iscsi_thread_set *ts; @@ -1154,7 +1153,7 @@ old_sess_out: spin_lock_bh(&conn->sess->conn_lock); if (conn->sess->session_state == TARG_SESS_STATE_FAILED) { struct se_portal_group *se_tpg = - &ISCSI_TPG_C(conn)->tpg_se_tpg; + &conn->tpg->tpg_se_tpg; atomic_set(&conn->sess->session_continuation, 0); spin_unlock_bh(&conn->sess->conn_lock); diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscsi/iscsi_target_nego.c index ef6d836..83c965c 100644 --- a/drivers/target/iscsi/iscsi_target_nego.c +++ b/drivers/target/iscsi/iscsi_target_nego.c @@ -88,7 +88,7 @@ int extract_param( if (len < 0) return -1; - if (len > max_length) { + if (len >= max_length) { pr_err("Length of input: %d exceeds max_length:" " %d\n", len, max_length); return -1; @@ -140,7 +140,7 @@ static u32 iscsi_handle_authentication( iscsi_nacl = container_of(se_nacl, struct iscsi_node_acl, se_node_acl); - auth = ISCSI_NODE_AUTH(iscsi_nacl); + auth = &iscsi_nacl->node_auth; } } else { /* @@ -789,7 +789,7 @@ static int iscsi_target_handle_csg_zero( return -1; if (!iscsi_check_negotiated_keys(conn->param_list)) { - if (ISCSI_TPG_ATTRIB(ISCSI_TPG_C(conn))->authentication && + if (conn->tpg->tpg_attrib.authentication && !strncmp(param->value, NONE, 4)) { pr_err("Initiator sent AuthMethod=None but" " Target is enforcing iSCSI Authentication," @@ -799,7 +799,7 @@ static int iscsi_target_handle_csg_zero( return -1; } - if (ISCSI_TPG_ATTRIB(ISCSI_TPG_C(conn))->authentication && + if (conn->tpg->tpg_attrib.authentication && !login->auth_complete) return 0; @@ -862,7 +862,7 @@ static int iscsi_target_handle_csg_one(struct iscsi_conn *conn, struct iscsi_log } if (!login->auth_complete && - ISCSI_TPG_ATTRIB(ISCSI_TPG_C(conn))->authentication) { + conn->tpg->tpg_attrib.authentication) { pr_err("Initiator is requesting CSG: 1, has not been" " successfully authenticated, and the Target is" " enforcing iSCSI Authentication, login failed.\n"); diff --git a/drivers/target/iscsi/iscsi_target_nodeattrib.c b/drivers/target/iscsi/iscsi_target_nodeattrib.c index 93bdc47..16454a9 100644 --- a/drivers/target/iscsi/iscsi_target_nodeattrib.c +++ b/drivers/target/iscsi/iscsi_target_nodeattrib.c @@ -33,7 +33,8 @@ static inline char *iscsit_na_get_initiatorname( } void iscsit_set_default_node_attribues( - struct iscsi_node_acl *acl) + struct iscsi_node_acl *acl, + struct iscsi_portal_group *tpg) { struct iscsi_node_attrib *a = &acl->node_attrib; @@ -44,7 +45,7 @@ void iscsit_set_default_node_attribues( a->random_datain_pdu_offsets = NA_RANDOM_DATAIN_PDU_OFFSETS; a->random_datain_seq_offsets = NA_RANDOM_DATAIN_SEQ_OFFSETS; a->random_r2t_offsets = NA_RANDOM_R2T_OFFSETS; - a->default_erl = NA_DEFAULT_ERL; + a->default_erl = tpg->tpg_attrib.default_erl; } int iscsit_na_dataout_timeout( diff --git a/drivers/target/iscsi/iscsi_target_nodeattrib.h b/drivers/target/iscsi/iscsi_target_nodeattrib.h index c970b326..0c69a46 100644 --- a/drivers/target/iscsi/iscsi_target_nodeattrib.h +++ b/drivers/target/iscsi/iscsi_target_nodeattrib.h @@ -1,7 +1,8 @@ #ifndef ISCSI_TARGET_NODEATTRIB_H #define ISCSI_TARGET_NODEATTRIB_H -extern void iscsit_set_default_node_attribues(struct iscsi_node_acl *); +extern void iscsit_set_default_node_attribues(struct iscsi_node_acl *, + struct iscsi_portal_group *); extern int iscsit_na_dataout_timeout(struct iscsi_node_acl *, u32); extern int iscsit_na_dataout_timeout_retries(struct iscsi_node_acl *, u32); extern int iscsit_na_nopin_timeout(struct iscsi_node_acl *, u32); diff --git a/drivers/target/iscsi/iscsi_target_stat.c b/drivers/target/iscsi/iscsi_target_stat.c index f788e8b..1033955 100644 --- a/drivers/target/iscsi/iscsi_target_stat.c +++ b/drivers/target/iscsi/iscsi_target_stat.c @@ -792,7 +792,8 @@ static ssize_t iscsi_stat_sess_show_attr_cmd_pdus( if (se_sess) { sess = se_sess->fabric_sess_ptr; if (sess) - ret = snprintf(page, PAGE_SIZE, "%u\n", sess->cmd_pdus); + ret = snprintf(page, PAGE_SIZE, "%lu\n", + atomic_long_read(&sess->cmd_pdus)); } spin_unlock_bh(&se_nacl->nacl_sess_lock); @@ -815,7 +816,8 @@ static ssize_t iscsi_stat_sess_show_attr_rsp_pdus( if (se_sess) { sess = se_sess->fabric_sess_ptr; if (sess) - ret = snprintf(page, PAGE_SIZE, "%u\n", sess->rsp_pdus); + ret = snprintf(page, PAGE_SIZE, "%lu\n", + atomic_long_read(&sess->rsp_pdus)); } spin_unlock_bh(&se_nacl->nacl_sess_lock); @@ -838,8 +840,8 @@ static ssize_t iscsi_stat_sess_show_attr_txdata_octs( if (se_sess) { sess = se_sess->fabric_sess_ptr; if (sess) - ret = snprintf(page, PAGE_SIZE, "%llu\n", - (unsigned long long)sess->tx_data_octets); + ret = snprintf(page, PAGE_SIZE, "%lu\n", + atomic_long_read(&sess->tx_data_octets)); } spin_unlock_bh(&se_nacl->nacl_sess_lock); @@ -862,8 +864,8 @@ static ssize_t iscsi_stat_sess_show_attr_rxdata_octs( if (se_sess) { sess = se_sess->fabric_sess_ptr; if (sess) - ret = snprintf(page, PAGE_SIZE, "%llu\n", - (unsigned long long)sess->rx_data_octets); + ret = snprintf(page, PAGE_SIZE, "%lu\n", + atomic_long_read(&sess->rx_data_octets)); } spin_unlock_bh(&se_nacl->nacl_sess_lock); @@ -886,8 +888,8 @@ static ssize_t iscsi_stat_sess_show_attr_conn_digest_errors( if (se_sess) { sess = se_sess->fabric_sess_ptr; if (sess) - ret = snprintf(page, PAGE_SIZE, "%u\n", - sess->conn_digest_errors); + ret = snprintf(page, PAGE_SIZE, "%lu\n", + atomic_long_read(&sess->conn_digest_errors)); } spin_unlock_bh(&se_nacl->nacl_sess_lock); @@ -910,8 +912,8 @@ static ssize_t iscsi_stat_sess_show_attr_conn_timeout_errors( if (se_sess) { sess = se_sess->fabric_sess_ptr; if (sess) - ret = snprintf(page, PAGE_SIZE, "%u\n", - sess->conn_timeout_errors); + ret = snprintf(page, PAGE_SIZE, "%lu\n", + atomic_long_read(&sess->conn_timeout_errors)); } spin_unlock_bh(&se_nacl->nacl_sess_lock); diff --git a/drivers/target/iscsi/iscsi_target_tpg.c b/drivers/target/iscsi/iscsi_target_tpg.c index 4faeb47..3976183 100644 --- a/drivers/target/iscsi/iscsi_target_tpg.c +++ b/drivers/target/iscsi/iscsi_target_tpg.c @@ -223,6 +223,8 @@ static void iscsit_set_default_tpg_attribs(struct iscsi_portal_group *tpg) a->cache_dynamic_acls = TA_CACHE_DYNAMIC_ACLS; a->demo_mode_write_protect = TA_DEMO_MODE_WRITE_PROTECT; a->prod_mode_write_protect = TA_PROD_MODE_WRITE_PROTECT; + a->demo_mode_discovery = TA_DEMO_MODE_DISCOVERY; + a->default_erl = TA_DEFAULT_ERL; } int iscsit_tpg_add_portal_group(struct iscsi_tiqn *tiqn, struct iscsi_portal_group *tpg) @@ -237,7 +239,7 @@ int iscsit_tpg_add_portal_group(struct iscsi_tiqn *tiqn, struct iscsi_portal_gro if (iscsi_create_default_params(&tpg->param_list) < 0) goto err_out; - ISCSI_TPG_ATTRIB(tpg)->tpg = tpg; + tpg->tpg_attrib.tpg = tpg; spin_lock(&tpg->tpg_state_lock); tpg->tpg_state = TPG_STATE_INACTIVE; @@ -330,7 +332,7 @@ int iscsit_tpg_enable_portal_group(struct iscsi_portal_group *tpg) return -EINVAL; } - if (ISCSI_TPG_ATTRIB(tpg)->authentication) { + if (tpg->tpg_attrib.authentication) { if (!strcmp(param->value, NONE)) { ret = iscsi_update_param_value(param, CHAP); if (ret) @@ -820,3 +822,39 @@ int iscsit_ta_prod_mode_write_protect( return 0; } + +int iscsit_ta_demo_mode_discovery( + struct iscsi_portal_group *tpg, + u32 flag) +{ + struct iscsi_tpg_attrib *a = &tpg->tpg_attrib; + + if ((flag != 0) && (flag != 1)) { + pr_err("Illegal value %d\n", flag); + return -EINVAL; + } + + a->demo_mode_discovery = flag; + pr_debug("iSCSI_TPG[%hu] - Demo Mode Discovery bit:" + " %s\n", tpg->tpgt, (a->demo_mode_discovery) ? + "ON" : "OFF"); + + return 0; +} + +int iscsit_ta_default_erl( + struct iscsi_portal_group *tpg, + u32 default_erl) +{ + struct iscsi_tpg_attrib *a = &tpg->tpg_attrib; + + if ((default_erl != 0) && (default_erl != 1) && (default_erl != 2)) { + pr_err("Illegal value for default_erl: %u\n", default_erl); + return -EINVAL; + } + + a->default_erl = default_erl; + pr_debug("iSCSI_TPG[%hu] - DefaultERL: %u\n", tpg->tpgt, a->default_erl); + + return 0; +} diff --git a/drivers/target/iscsi/iscsi_target_tpg.h b/drivers/target/iscsi/iscsi_target_tpg.h index b77693e..213c0fc 100644 --- a/drivers/target/iscsi/iscsi_target_tpg.h +++ b/drivers/target/iscsi/iscsi_target_tpg.h @@ -37,5 +37,7 @@ extern int iscsit_ta_default_cmdsn_depth(struct iscsi_portal_group *, u32); extern int iscsit_ta_cache_dynamic_acls(struct iscsi_portal_group *, u32); extern int iscsit_ta_demo_mode_write_protect(struct iscsi_portal_group *, u32); extern int iscsit_ta_prod_mode_write_protect(struct iscsi_portal_group *, u32); +extern int iscsit_ta_demo_mode_discovery(struct iscsi_portal_group *, u32); +extern int iscsit_ta_default_erl(struct iscsi_portal_group *, u32); #endif /* ISCSI_TARGET_TPG_H */ diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c index b0cac0c..0819e68 100644 --- a/drivers/target/iscsi/iscsi_target_util.c +++ b/drivers/target/iscsi/iscsi_target_util.c @@ -242,9 +242,9 @@ static inline int iscsit_check_received_cmdsn(struct iscsi_session *sess, u32 cm */ if (iscsi_sna_gt(cmdsn, sess->max_cmd_sn)) { pr_err("Received CmdSN: 0x%08x is greater than" - " MaxCmdSN: 0x%08x, protocol error.\n", cmdsn, + " MaxCmdSN: 0x%08x, ignoring.\n", cmdsn, sess->max_cmd_sn); - ret = CMDSN_ERROR_CANNOT_RECOVER; + ret = CMDSN_MAXCMDSN_OVERRUN; } else if (cmdsn == sess->exp_cmd_sn) { sess->exp_cmd_sn++; @@ -303,14 +303,16 @@ int iscsit_sequence_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, ret = CMDSN_HIGHER_THAN_EXP; break; case CMDSN_LOWER_THAN_EXP: + case CMDSN_MAXCMDSN_OVERRUN: + default: cmd->i_state = ISTATE_REMOVE; iscsit_add_cmd_to_immediate_queue(cmd, conn, cmd->i_state); - ret = cmdsn_ret; - break; - default: - reason = ISCSI_REASON_PROTOCOL_ERROR; - reject = true; - ret = cmdsn_ret; + /* + * Existing callers for iscsit_sequence_cmd() will silently + * ignore commands with CMDSN_LOWER_THAN_EXP, so force this + * return for CMDSN_MAXCMDSN_OVERRUN as well.. + */ + ret = CMDSN_LOWER_THAN_EXP; break; } mutex_unlock(&conn->sess->cmdsn_mutex); @@ -980,7 +982,7 @@ static void iscsit_handle_nopin_response_timeout(unsigned long data) tiqn->sess_err_stats.last_sess_failure_type = ISCSI_SESS_ERR_CXN_TIMEOUT; tiqn->sess_err_stats.cxn_timeout_errors++; - conn->sess->conn_timeout_errors++; + atomic_long_inc(&conn->sess->conn_timeout_errors); spin_unlock_bh(&tiqn->sess_err_stats.lock); } } diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c index 0f6d69d..1b41e67 100644 --- a/drivers/target/loopback/tcm_loop.c +++ b/drivers/target/loopback/tcm_loop.c @@ -135,6 +135,21 @@ static int tcm_loop_change_queue_depth( return sdev->queue_depth; } +static int tcm_loop_change_queue_type(struct scsi_device *sdev, int tag) +{ + if (sdev->tagged_supported) { + scsi_set_tag_type(sdev, tag); + + if (tag) + scsi_activate_tcq(sdev, sdev->queue_depth); + else + scsi_deactivate_tcq(sdev, sdev->queue_depth); + } else + tag = 0; + + return tag; +} + /* * Locate the SAM Task Attr from struct scsi_cmnd * */ @@ -178,7 +193,10 @@ static void tcm_loop_submission_work(struct work_struct *work) set_host_byte(sc, DID_NO_CONNECT); goto out_done; } - + if (tl_tpg->tl_transport_status == TCM_TRANSPORT_OFFLINE) { + set_host_byte(sc, DID_TRANSPORT_DISRUPTED); + goto out_done; + } tl_nexus = tl_hba->tl_nexus; if (!tl_nexus) { scmd_printk(KERN_ERR, sc, "TCM_Loop I_T Nexus" @@ -233,6 +251,7 @@ static int tcm_loop_queuecommand(struct Scsi_Host *sh, struct scsi_cmnd *sc) } tl_cmd->sc = sc; + tl_cmd->sc_cmd_tag = sc->tag; INIT_WORK(&tl_cmd->work, tcm_loop_submission_work); queue_work(tcm_loop_workqueue, &tl_cmd->work); return 0; @@ -242,41 +261,21 @@ static int tcm_loop_queuecommand(struct Scsi_Host *sh, struct scsi_cmnd *sc) * Called from SCSI EH process context to issue a LUN_RESET TMR * to struct scsi_device */ -static int tcm_loop_device_reset(struct scsi_cmnd *sc) +static int tcm_loop_issue_tmr(struct tcm_loop_tpg *tl_tpg, + struct tcm_loop_nexus *tl_nexus, + int lun, int task, enum tcm_tmreq_table tmr) { struct se_cmd *se_cmd = NULL; - struct se_portal_group *se_tpg; struct se_session *se_sess; + struct se_portal_group *se_tpg; struct tcm_loop_cmd *tl_cmd = NULL; - struct tcm_loop_hba *tl_hba; - struct tcm_loop_nexus *tl_nexus; struct tcm_loop_tmr *tl_tmr = NULL; - struct tcm_loop_tpg *tl_tpg; - int ret = FAILED, rc; - /* - * Locate the tcm_loop_hba_t pointer - */ - tl_hba = *(struct tcm_loop_hba **)shost_priv(sc->device->host); - /* - * Locate the tl_nexus and se_sess pointers - */ - tl_nexus = tl_hba->tl_nexus; - if (!tl_nexus) { - pr_err("Unable to perform device reset without" - " active I_T Nexus\n"); - return FAILED; - } - se_sess = tl_nexus->se_sess; - /* - * Locate the tl_tpg and se_tpg pointers from TargetID in sc->device->id - */ - tl_tpg = &tl_hba->tl_hba_tpgs[sc->device->id]; - se_tpg = &tl_tpg->tl_se_tpg; + int ret = TMR_FUNCTION_FAILED, rc; tl_cmd = kmem_cache_zalloc(tcm_loop_cmd_cache, GFP_KERNEL); if (!tl_cmd) { pr_err("Unable to allocate memory for tl_cmd\n"); - return FAILED; + return ret; } tl_tmr = kzalloc(sizeof(struct tcm_loop_tmr), GFP_KERNEL); @@ -287,6 +286,8 @@ static int tcm_loop_device_reset(struct scsi_cmnd *sc) init_waitqueue_head(&tl_tmr->tl_tmr_wait); se_cmd = &tl_cmd->tl_se_cmd; + se_tpg = &tl_tpg->tl_se_tpg; + se_sess = tl_nexus->se_sess; /* * Initialize struct se_cmd descriptor from target_core_mod infrastructure */ @@ -294,17 +295,23 @@ static int tcm_loop_device_reset(struct scsi_cmnd *sc) DMA_NONE, MSG_SIMPLE_TAG, &tl_cmd->tl_sense_buf[0]); - rc = core_tmr_alloc_req(se_cmd, tl_tmr, TMR_LUN_RESET, GFP_KERNEL); + rc = core_tmr_alloc_req(se_cmd, tl_tmr, tmr, GFP_KERNEL); if (rc < 0) goto release; + + if (tmr == TMR_ABORT_TASK) + se_cmd->se_tmr_req->ref_task_tag = task; + /* - * Locate the underlying TCM struct se_lun from sc->device->lun + * Locate the underlying TCM struct se_lun */ - if (transport_lookup_tmr_lun(se_cmd, sc->device->lun) < 0) + if (transport_lookup_tmr_lun(se_cmd, lun) < 0) { + ret = TMR_LUN_DOES_NOT_EXIST; goto release; + } /* - * Queue the TMR to TCM Core and sleep waiting for tcm_loop_queue_tm_rsp() - * to wake us up. + * Queue the TMR to TCM Core and sleep waiting for + * tcm_loop_queue_tm_rsp() to wake us up. */ transport_generic_handle_tmr(se_cmd); wait_event(tl_tmr->tl_tmr_wait, atomic_read(&tl_tmr->tmr_complete)); @@ -312,8 +319,7 @@ static int tcm_loop_device_reset(struct scsi_cmnd *sc) * The TMR LUN_RESET has completed, check the response status and * then release allocations. */ - ret = (se_cmd->se_tmr_req->response == TMR_FUNCTION_COMPLETE) ? - SUCCESS : FAILED; + ret = se_cmd->se_tmr_req->response; release: if (se_cmd) transport_generic_free_cmd(se_cmd, 1); @@ -323,6 +329,94 @@ release: return ret; } +static int tcm_loop_abort_task(struct scsi_cmnd *sc) +{ + struct tcm_loop_hba *tl_hba; + struct tcm_loop_nexus *tl_nexus; + struct tcm_loop_tpg *tl_tpg; + int ret = FAILED; + + /* + * Locate the tcm_loop_hba_t pointer + */ + tl_hba = *(struct tcm_loop_hba **)shost_priv(sc->device->host); + /* + * Locate the tl_nexus and se_sess pointers + */ + tl_nexus = tl_hba->tl_nexus; + if (!tl_nexus) { + pr_err("Unable to perform device reset without" + " active I_T Nexus\n"); + return FAILED; + } + + /* + * Locate the tl_tpg pointer from TargetID in sc->device->id + */ + tl_tpg = &tl_hba->tl_hba_tpgs[sc->device->id]; + ret = tcm_loop_issue_tmr(tl_tpg, tl_nexus, sc->device->lun, + sc->tag, TMR_ABORT_TASK); + return (ret == TMR_FUNCTION_COMPLETE) ? SUCCESS : FAILED; +} + +/* + * Called from SCSI EH process context to issue a LUN_RESET TMR + * to struct scsi_device + */ +static int tcm_loop_device_reset(struct scsi_cmnd *sc) +{ + struct tcm_loop_hba *tl_hba; + struct tcm_loop_nexus *tl_nexus; + struct tcm_loop_tpg *tl_tpg; + int ret = FAILED; + + /* + * Locate the tcm_loop_hba_t pointer + */ + tl_hba = *(struct tcm_loop_hba **)shost_priv(sc->device->host); + /* + * Locate the tl_nexus and se_sess pointers + */ + tl_nexus = tl_hba->tl_nexus; + if (!tl_nexus) { + pr_err("Unable to perform device reset without" + " active I_T Nexus\n"); + return FAILED; + } + /* + * Locate the tl_tpg pointer from TargetID in sc->device->id + */ + tl_tpg = &tl_hba->tl_hba_tpgs[sc->device->id]; + ret = tcm_loop_issue_tmr(tl_tpg, tl_nexus, sc->device->lun, + 0, TMR_LUN_RESET); + return (ret == TMR_FUNCTION_COMPLETE) ? SUCCESS : FAILED; +} + +static int tcm_loop_target_reset(struct scsi_cmnd *sc) +{ + struct tcm_loop_hba *tl_hba; + struct tcm_loop_tpg *tl_tpg; + + /* + * Locate the tcm_loop_hba_t pointer + */ + tl_hba = *(struct tcm_loop_hba **)shost_priv(sc->device->host); + if (!tl_hba) { + pr_err("Unable to perform device reset without" + " active I_T Nexus\n"); + return FAILED; + } + /* + * Locate the tl_tpg pointer from TargetID in sc->device->id + */ + tl_tpg = &tl_hba->tl_hba_tpgs[sc->device->id]; + if (tl_tpg) { + tl_tpg->tl_transport_status = TCM_TRANSPORT_ONLINE; + return SUCCESS; + } + return FAILED; +} + static int tcm_loop_slave_alloc(struct scsi_device *sd) { set_bit(QUEUE_FLAG_BIDI, &sd->request_queue->queue_flags); @@ -331,6 +425,15 @@ static int tcm_loop_slave_alloc(struct scsi_device *sd) static int tcm_loop_slave_configure(struct scsi_device *sd) { + if (sd->tagged_supported) { + scsi_activate_tcq(sd, sd->queue_depth); + scsi_adjust_queue_depth(sd, MSG_SIMPLE_TAG, + sd->host->cmd_per_lun); + } else { + scsi_adjust_queue_depth(sd, 0, + sd->host->cmd_per_lun); + } + return 0; } @@ -340,7 +443,10 @@ static struct scsi_host_template tcm_loop_driver_template = { .name = "TCM_Loopback", .queuecommand = tcm_loop_queuecommand, .change_queue_depth = tcm_loop_change_queue_depth, + .change_queue_type = tcm_loop_change_queue_type, + .eh_abort_handler = tcm_loop_abort_task, .eh_device_reset_handler = tcm_loop_device_reset, + .eh_target_reset_handler = tcm_loop_target_reset, .can_queue = 1024, .this_id = -1, .sg_tablesize = 256, @@ -699,7 +805,10 @@ static void tcm_loop_set_default_node_attributes(struct se_node_acl *se_acl) static u32 tcm_loop_get_task_tag(struct se_cmd *se_cmd) { - return 1; + struct tcm_loop_cmd *tl_cmd = container_of(se_cmd, + struct tcm_loop_cmd, tl_se_cmd); + + return tl_cmd->sc_cmd_tag; } static int tcm_loop_get_cmd_state(struct se_cmd *se_cmd) @@ -932,7 +1041,10 @@ static int tcm_loop_drop_nexus( struct tcm_loop_nexus *tl_nexus; struct tcm_loop_hba *tl_hba = tpg->tl_hba; - tl_nexus = tpg->tl_hba->tl_nexus; + if (!tl_hba) + return -ENODEV; + + tl_nexus = tl_hba->tl_nexus; if (!tl_nexus) return -ENODEV; @@ -1061,8 +1173,56 @@ check_newline: TF_TPG_BASE_ATTR(tcm_loop, nexus, S_IRUGO | S_IWUSR); +static ssize_t tcm_loop_tpg_show_transport_status( + struct se_portal_group *se_tpg, + char *page) +{ + struct tcm_loop_tpg *tl_tpg = container_of(se_tpg, + struct tcm_loop_tpg, tl_se_tpg); + const char *status = NULL; + ssize_t ret = -EINVAL; + + switch (tl_tpg->tl_transport_status) { + case TCM_TRANSPORT_ONLINE: + status = "online"; + break; + case TCM_TRANSPORT_OFFLINE: + status = "offline"; + break; + default: + break; + } + + if (status) + ret = snprintf(page, PAGE_SIZE, "%s\n", status); + + return ret; +} + +static ssize_t tcm_loop_tpg_store_transport_status( + struct se_portal_group *se_tpg, + const char *page, + size_t count) +{ + struct tcm_loop_tpg *tl_tpg = container_of(se_tpg, + struct tcm_loop_tpg, tl_se_tpg); + + if (!strncmp(page, "online", 6)) { + tl_tpg->tl_transport_status = TCM_TRANSPORT_ONLINE; + return count; + } + if (!strncmp(page, "offline", 7)) { + tl_tpg->tl_transport_status = TCM_TRANSPORT_OFFLINE; + return count; + } + return -EINVAL; +} + +TF_TPG_BASE_ATTR(tcm_loop, transport_status, S_IRUGO | S_IWUSR); + static struct configfs_attribute *tcm_loop_tpg_attrs[] = { &tcm_loop_tpg_nexus.attr, + &tcm_loop_tpg_transport_status.attr, NULL, }; @@ -1334,11 +1494,11 @@ static int tcm_loop_register_configfs(void) /* * Setup default attribute lists for various fabric->tf_cit_tmpl */ - TF_CIT_TMPL(fabric)->tfc_wwn_cit.ct_attrs = tcm_loop_wwn_attrs; - TF_CIT_TMPL(fabric)->tfc_tpg_base_cit.ct_attrs = tcm_loop_tpg_attrs; - TF_CIT_TMPL(fabric)->tfc_tpg_attrib_cit.ct_attrs = NULL; - TF_CIT_TMPL(fabric)->tfc_tpg_param_cit.ct_attrs = NULL; - TF_CIT_TMPL(fabric)->tfc_tpg_np_base_cit.ct_attrs = NULL; + fabric->tf_cit_tmpl.tfc_wwn_cit.ct_attrs = tcm_loop_wwn_attrs; + fabric->tf_cit_tmpl.tfc_tpg_base_cit.ct_attrs = tcm_loop_tpg_attrs; + fabric->tf_cit_tmpl.tfc_tpg_attrib_cit.ct_attrs = NULL; + fabric->tf_cit_tmpl.tfc_tpg_param_cit.ct_attrs = NULL; + fabric->tf_cit_tmpl.tfc_tpg_np_base_cit.ct_attrs = NULL; /* * Once fabric->tf_ops has been setup, now register the fabric for * use within TCM diff --git a/drivers/target/loopback/tcm_loop.h b/drivers/target/loopback/tcm_loop.h index dd7a84e..54c59d0 100644 --- a/drivers/target/loopback/tcm_loop.h +++ b/drivers/target/loopback/tcm_loop.h @@ -10,6 +10,8 @@ struct tcm_loop_cmd { /* State of Linux/SCSI CDB+Data descriptor */ u32 sc_cmd_state; + /* Tagged command queueing */ + u32 sc_cmd_tag; /* Pointer to the CDB+Data descriptor from Linux/SCSI subsystem */ struct scsi_cmnd *sc; /* The TCM I/O descriptor that is accessed via container_of() */ @@ -40,8 +42,12 @@ struct tcm_loop_nacl { struct se_node_acl se_node_acl; }; +#define TCM_TRANSPORT_ONLINE 0 +#define TCM_TRANSPORT_OFFLINE 1 + struct tcm_loop_tpg { unsigned short tl_tpgt; + unsigned short tl_transport_status; atomic_t tl_tpg_port_count; struct se_portal_group tl_se_tpg; struct tcm_loop_hba *tl_hba; diff --git a/drivers/target/sbp/sbp_target.c b/drivers/target/sbp/sbp_target.c index e51b09a..24884ca 100644 --- a/drivers/target/sbp/sbp_target.c +++ b/drivers/target/sbp/sbp_target.c @@ -2556,15 +2556,15 @@ static int sbp_register_configfs(void) /* * Setup default attribute lists for various fabric->tf_cit_tmpl */ - TF_CIT_TMPL(fabric)->tfc_wwn_cit.ct_attrs = sbp_wwn_attrs; - TF_CIT_TMPL(fabric)->tfc_tpg_base_cit.ct_attrs = sbp_tpg_base_attrs; - TF_CIT_TMPL(fabric)->tfc_tpg_attrib_cit.ct_attrs = sbp_tpg_attrib_attrs; - TF_CIT_TMPL(fabric)->tfc_tpg_param_cit.ct_attrs = NULL; - TF_CIT_TMPL(fabric)->tfc_tpg_np_base_cit.ct_attrs = NULL; - TF_CIT_TMPL(fabric)->tfc_tpg_nacl_base_cit.ct_attrs = NULL; - TF_CIT_TMPL(fabric)->tfc_tpg_nacl_attrib_cit.ct_attrs = NULL; - TF_CIT_TMPL(fabric)->tfc_tpg_nacl_auth_cit.ct_attrs = NULL; - TF_CIT_TMPL(fabric)->tfc_tpg_nacl_param_cit.ct_attrs = NULL; + fabric->tf_cit_tmpl.tfc_wwn_cit.ct_attrs = sbp_wwn_attrs; + fabric->tf_cit_tmpl.tfc_tpg_base_cit.ct_attrs = sbp_tpg_base_attrs; + fabric->tf_cit_tmpl.tfc_tpg_attrib_cit.ct_attrs = sbp_tpg_attrib_attrs; + fabric->tf_cit_tmpl.tfc_tpg_param_cit.ct_attrs = NULL; + fabric->tf_cit_tmpl.tfc_tpg_np_base_cit.ct_attrs = NULL; + fabric->tf_cit_tmpl.tfc_tpg_nacl_base_cit.ct_attrs = NULL; + fabric->tf_cit_tmpl.tfc_tpg_nacl_attrib_cit.ct_attrs = NULL; + fabric->tf_cit_tmpl.tfc_tpg_nacl_auth_cit.ct_attrs = NULL; + fabric->tf_cit_tmpl.tfc_tpg_nacl_param_cit.ct_attrs = NULL; ret = target_fabric_configfs_register(fabric); if (ret < 0) { diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c index 47244102..fdcee32 100644 --- a/drivers/target/target_core_alua.c +++ b/drivers/target/target_core_alua.c @@ -44,7 +44,7 @@ static sense_reason_t core_alua_check_transition(int state, int *primary); static int core_alua_set_tg_pt_secondary_state( struct t10_alua_tg_pt_gp_member *tg_pt_gp_mem, - struct se_port *port, int explict, int offline); + struct se_port *port, int explicit, int offline); static u16 alua_lu_gps_counter; static u32 alua_lu_gps_count; @@ -117,12 +117,7 @@ target_emulate_report_target_port_groups(struct se_cmd *cmd) /* * Set supported ASYMMETRIC ACCESS State bits */ - buf[off] = 0x80; /* T_SUP */ - buf[off] |= 0x40; /* O_SUP */ - buf[off] |= 0x8; /* U_SUP */ - buf[off] |= 0x4; /* S_SUP */ - buf[off] |= 0x2; /* AN_SUP */ - buf[off++] |= 0x1; /* AO_SUP */ + buf[off++] |= tg_pt_gp->tg_pt_gp_alua_supported_states; /* * TARGET PORT GROUP */ @@ -175,7 +170,7 @@ target_emulate_report_target_port_groups(struct se_cmd *cmd) if (ext_hdr != 0) { buf[4] = 0x10; /* - * Set the implict transition time (in seconds) for the application + * Set the implicit transition time (in seconds) for the application * client to use as a base for it's transition timeout value. * * Use the current tg_pt_gp_mem -> tg_pt_gp membership from the LUN @@ -188,7 +183,7 @@ target_emulate_report_target_port_groups(struct se_cmd *cmd) spin_lock(&tg_pt_gp_mem->tg_pt_gp_mem_lock); tg_pt_gp = tg_pt_gp_mem->tg_pt_gp; if (tg_pt_gp) - buf[5] = tg_pt_gp->tg_pt_gp_implict_trans_secs; + buf[5] = tg_pt_gp->tg_pt_gp_implicit_trans_secs; spin_unlock(&tg_pt_gp_mem->tg_pt_gp_mem_lock); } } @@ -199,7 +194,7 @@ target_emulate_report_target_port_groups(struct se_cmd *cmd) } /* - * SET_TARGET_PORT_GROUPS for explict ALUA operation. + * SET_TARGET_PORT_GROUPS for explicit ALUA operation. * * See spc4r17 section 6.35 */ @@ -232,7 +227,7 @@ target_emulate_set_target_port_groups(struct se_cmd *cmd) return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; /* - * Determine if explict ALUA via SET_TARGET_PORT_GROUPS is allowed + * Determine if explicit ALUA via SET_TARGET_PORT_GROUPS is allowed * for the local tg_pt_gp. */ l_tg_pt_gp_mem = l_port->sep_alua_tg_pt_gp_mem; @@ -251,9 +246,9 @@ target_emulate_set_target_port_groups(struct se_cmd *cmd) } spin_unlock(&l_tg_pt_gp_mem->tg_pt_gp_mem_lock); - if (!(l_tg_pt_gp->tg_pt_gp_alua_access_type & TPGS_EXPLICT_ALUA)) { + if (!(l_tg_pt_gp->tg_pt_gp_alua_access_type & TPGS_EXPLICIT_ALUA)) { pr_debug("Unable to process SET_TARGET_PORT_GROUPS" - " while TPGS_EXPLICT_ALUA is disabled\n"); + " while TPGS_EXPLICIT_ALUA is disabled\n"); rc = TCM_UNSUPPORTED_SCSI_OPCODE; goto out; } @@ -330,7 +325,7 @@ target_emulate_set_target_port_groups(struct se_cmd *cmd) spin_unlock(&dev->t10_alua.tg_pt_gps_lock); } else { /* - * Extact the RELATIVE TARGET PORT IDENTIFIER to identify + * Extract the RELATIVE TARGET PORT IDENTIFIER to identify * the Target Port in question for the the incoming * SET_TARGET_PORT_GROUPS op. */ @@ -487,7 +482,7 @@ static inline int core_alua_state_transition( u8 *alua_ascq) { /* - * Allowed CDBs for ALUA_ACCESS_STATE_TRANSITIO as defined by + * Allowed CDBs for ALUA_ACCESS_STATE_TRANSITION as defined by * spc4r17 section 5.9.2.5 */ switch (cdb[0]) { @@ -515,9 +510,9 @@ static inline int core_alua_state_transition( } /* - * return 1: Is used to signal LUN not accecsable, and check condition/not ready + * return 1: Is used to signal LUN not accessible, and check condition/not ready * return 0: Used to signal success - * reutrn -1: Used to signal failure, and invalid cdb field + * return -1: Used to signal failure, and invalid cdb field */ sense_reason_t target_alua_state_check(struct se_cmd *cmd) @@ -566,12 +561,12 @@ target_alua_state_check(struct se_cmd *cmd) nonop_delay_msecs = tg_pt_gp->tg_pt_gp_nonop_delay_msecs; spin_unlock(&tg_pt_gp_mem->tg_pt_gp_mem_lock); /* - * Process ALUA_ACCESS_STATE_ACTIVE_OPTMIZED in a separate conditional + * Process ALUA_ACCESS_STATE_ACTIVE_OPTIMIZED in a separate conditional * statement so the compiler knows explicitly to check this case first. * For the Optimized ALUA access state case, we want to process the * incoming fabric cmd ASAP.. */ - if (out_alua_state == ALUA_ACCESS_STATE_ACTIVE_OPTMIZED) + if (out_alua_state == ALUA_ACCESS_STATE_ACTIVE_OPTIMIZED) return 0; switch (out_alua_state) { @@ -620,13 +615,13 @@ out: } /* - * Check implict and explict ALUA state change request. + * Check implicit and explicit ALUA state change request. */ static sense_reason_t core_alua_check_transition(int state, int *primary) { switch (state) { - case ALUA_ACCESS_STATE_ACTIVE_OPTMIZED: + case ALUA_ACCESS_STATE_ACTIVE_OPTIMIZED: case ALUA_ACCESS_STATE_ACTIVE_NON_OPTIMIZED: case ALUA_ACCESS_STATE_STANDBY: case ALUA_ACCESS_STATE_UNAVAILABLE: @@ -654,7 +649,7 @@ core_alua_check_transition(int state, int *primary) static char *core_alua_dump_state(int state) { switch (state) { - case ALUA_ACCESS_STATE_ACTIVE_OPTMIZED: + case ALUA_ACCESS_STATE_ACTIVE_OPTIMIZED: return "Active/Optimized"; case ALUA_ACCESS_STATE_ACTIVE_NON_OPTIMIZED: return "Active/NonOptimized"; @@ -676,10 +671,10 @@ char *core_alua_dump_status(int status) switch (status) { case ALUA_STATUS_NONE: return "None"; - case ALUA_STATUS_ALTERED_BY_EXPLICT_STPG: - return "Altered by Explict STPG"; - case ALUA_STATUS_ALTERED_BY_IMPLICT_ALUA: - return "Altered by Implict ALUA"; + case ALUA_STATUS_ALTERED_BY_EXPLICIT_STPG: + return "Altered by Explicit STPG"; + case ALUA_STATUS_ALTERED_BY_IMPLICIT_ALUA: + return "Altered by Implicit ALUA"; default: return "Unknown"; } @@ -770,7 +765,7 @@ static int core_alua_do_transition_tg_pt( struct se_node_acl *nacl, unsigned char *md_buf, int new_state, - int explict) + int explicit) { struct se_dev_entry *se_deve; struct se_lun_acl *lacl; @@ -784,9 +779,9 @@ static int core_alua_do_transition_tg_pt( old_state = atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state); atomic_set(&tg_pt_gp->tg_pt_gp_alua_access_state, ALUA_ACCESS_STATE_TRANSITION); - tg_pt_gp->tg_pt_gp_alua_access_status = (explict) ? - ALUA_STATUS_ALTERED_BY_EXPLICT_STPG : - ALUA_STATUS_ALTERED_BY_IMPLICT_ALUA; + tg_pt_gp->tg_pt_gp_alua_access_status = (explicit) ? + ALUA_STATUS_ALTERED_BY_EXPLICIT_STPG : + ALUA_STATUS_ALTERED_BY_IMPLICIT_ALUA; /* * Check for the optional ALUA primary state transition delay */ @@ -802,7 +797,7 @@ static int core_alua_do_transition_tg_pt( * change, a device server shall establish a unit attention * condition for the initiator port associated with every I_T * nexus with the additional sense code set to ASYMMETRIC - * ACCESS STATE CHAGED. + * ACCESS STATE CHANGED. * * After an explicit target port asymmetric access state * change, a device server shall establish a unit attention @@ -821,12 +816,12 @@ static int core_alua_do_transition_tg_pt( lacl = se_deve->se_lun_acl; /* * se_deve->se_lun_acl pointer may be NULL for a - * entry created without explict Node+MappedLUN ACLs + * entry created without explicit Node+MappedLUN ACLs */ if (!lacl) continue; - if (explict && + if (explicit && (nacl != NULL) && (nacl == lacl->se_lun_nacl) && (l_port != NULL) && (l_port == port)) continue; @@ -866,8 +861,8 @@ static int core_alua_do_transition_tg_pt( atomic_set(&tg_pt_gp->tg_pt_gp_alua_access_state, new_state); pr_debug("Successful %s ALUA transition TG PT Group: %s ID: %hu" - " from primary access state %s to %s\n", (explict) ? "explict" : - "implict", config_item_name(&tg_pt_gp->tg_pt_gp_group.cg_item), + " from primary access state %s to %s\n", (explicit) ? "explicit" : + "implicit", config_item_name(&tg_pt_gp->tg_pt_gp_group.cg_item), tg_pt_gp->tg_pt_gp_id, core_alua_dump_state(old_state), core_alua_dump_state(new_state)); @@ -880,7 +875,7 @@ int core_alua_do_port_transition( struct se_port *l_port, struct se_node_acl *l_nacl, int new_state, - int explict) + int explicit) { struct se_device *dev; struct se_port *port; @@ -917,7 +912,7 @@ int core_alua_do_port_transition( * success. */ core_alua_do_transition_tg_pt(l_tg_pt_gp, l_port, l_nacl, - md_buf, new_state, explict); + md_buf, new_state, explicit); atomic_dec(&lu_gp->lu_gp_ref_cnt); smp_mb__after_atomic_dec(); kfree(md_buf); @@ -946,7 +941,7 @@ int core_alua_do_port_transition( continue; /* * If the target behavior port asymmetric access state - * is changed for any target port group accessiable via + * is changed for any target port group accessible via * a logical unit within a LU group, the target port * behavior group asymmetric access states for the same * target port group accessible via other logical units @@ -970,7 +965,7 @@ int core_alua_do_port_transition( * success. */ core_alua_do_transition_tg_pt(tg_pt_gp, port, - nacl, md_buf, new_state, explict); + nacl, md_buf, new_state, explicit); spin_lock(&dev->t10_alua.tg_pt_gps_lock); atomic_dec(&tg_pt_gp->tg_pt_gp_ref_cnt); @@ -987,7 +982,7 @@ int core_alua_do_port_transition( pr_debug("Successfully processed LU Group: %s all ALUA TG PT" " Group IDs: %hu %s transition to primary state: %s\n", config_item_name(&lu_gp->lu_gp_group.cg_item), - l_tg_pt_gp->tg_pt_gp_id, (explict) ? "explict" : "implict", + l_tg_pt_gp->tg_pt_gp_id, (explicit) ? "explicit" : "implicit", core_alua_dump_state(new_state)); atomic_dec(&lu_gp->lu_gp_ref_cnt); @@ -1034,7 +1029,7 @@ static int core_alua_update_tpg_secondary_metadata( static int core_alua_set_tg_pt_secondary_state( struct t10_alua_tg_pt_gp_member *tg_pt_gp_mem, struct se_port *port, - int explict, + int explicit, int offline) { struct t10_alua_tg_pt_gp *tg_pt_gp; @@ -1061,13 +1056,13 @@ static int core_alua_set_tg_pt_secondary_state( atomic_set(&port->sep_tg_pt_secondary_offline, 0); md_buf_len = tg_pt_gp->tg_pt_gp_md_buf_len; - port->sep_tg_pt_secondary_stat = (explict) ? - ALUA_STATUS_ALTERED_BY_EXPLICT_STPG : - ALUA_STATUS_ALTERED_BY_IMPLICT_ALUA; + port->sep_tg_pt_secondary_stat = (explicit) ? + ALUA_STATUS_ALTERED_BY_EXPLICIT_STPG : + ALUA_STATUS_ALTERED_BY_IMPLICIT_ALUA; pr_debug("Successful %s ALUA transition TG PT Group: %s ID: %hu" - " to secondary access state: %s\n", (explict) ? "explict" : - "implict", config_item_name(&tg_pt_gp->tg_pt_gp_group.cg_item), + " to secondary access state: %s\n", (explicit) ? "explicit" : + "implicit", config_item_name(&tg_pt_gp->tg_pt_gp_group.cg_item), tg_pt_gp->tg_pt_gp_id, (offline) ? "OFFLINE" : "ONLINE"); spin_unlock(&tg_pt_gp_mem->tg_pt_gp_mem_lock); @@ -1232,7 +1227,7 @@ void core_alua_free_lu_gp(struct t10_alua_lu_gp *lu_gp) * struct se_device is released via core_alua_free_lu_gp_mem(). * * If the passed lu_gp does NOT match the default_lu_gp, assume - * we want to re-assocate a given lu_gp_mem with default_lu_gp. + * we want to re-associate a given lu_gp_mem with default_lu_gp. */ spin_lock(&lu_gp_mem->lu_gp_mem_lock); if (lu_gp != default_lu_gp) @@ -1354,18 +1349,25 @@ struct t10_alua_tg_pt_gp *core_alua_allocate_tg_pt_gp(struct se_device *dev, tg_pt_gp->tg_pt_gp_dev = dev; tg_pt_gp->tg_pt_gp_md_buf_len = ALUA_MD_BUF_LEN; atomic_set(&tg_pt_gp->tg_pt_gp_alua_access_state, - ALUA_ACCESS_STATE_ACTIVE_OPTMIZED); + ALUA_ACCESS_STATE_ACTIVE_OPTIMIZED); /* - * Enable both explict and implict ALUA support by default + * Enable both explicit and implicit ALUA support by default */ tg_pt_gp->tg_pt_gp_alua_access_type = - TPGS_EXPLICT_ALUA | TPGS_IMPLICT_ALUA; + TPGS_EXPLICIT_ALUA | TPGS_IMPLICIT_ALUA; /* * Set the default Active/NonOptimized Delay in milliseconds */ tg_pt_gp->tg_pt_gp_nonop_delay_msecs = ALUA_DEFAULT_NONOP_DELAY_MSECS; tg_pt_gp->tg_pt_gp_trans_delay_msecs = ALUA_DEFAULT_TRANS_DELAY_MSECS; - tg_pt_gp->tg_pt_gp_implict_trans_secs = ALUA_DEFAULT_IMPLICT_TRANS_SECS; + tg_pt_gp->tg_pt_gp_implicit_trans_secs = ALUA_DEFAULT_IMPLICIT_TRANS_SECS; + + /* + * Enable all supported states + */ + tg_pt_gp->tg_pt_gp_alua_supported_states = + ALUA_T_SUP | ALUA_O_SUP | + ALUA_U_SUP | ALUA_S_SUP | ALUA_AN_SUP | ALUA_AO_SUP; if (def_group) { spin_lock(&dev->t10_alua.tg_pt_gps_lock); @@ -1465,7 +1467,7 @@ void core_alua_free_tg_pt_gp( * been called from target_core_alua_drop_tg_pt_gp(). * * Here we remove *tg_pt_gp from the global list so that - * no assications *OR* explict ALUA via SET_TARGET_PORT_GROUPS + * no associations *OR* explicit ALUA via SET_TARGET_PORT_GROUPS * can be made while we are releasing struct t10_alua_tg_pt_gp. */ spin_lock(&dev->t10_alua.tg_pt_gps_lock); @@ -1501,7 +1503,7 @@ void core_alua_free_tg_pt_gp( * core_alua_free_tg_pt_gp_mem(). * * If the passed tg_pt_gp does NOT match the default_tg_pt_gp, - * assume we want to re-assocate a given tg_pt_gp_mem with + * assume we want to re-associate a given tg_pt_gp_mem with * default_tg_pt_gp. */ spin_lock(&tg_pt_gp_mem->tg_pt_gp_mem_lock); @@ -1740,13 +1742,13 @@ ssize_t core_alua_show_access_type( struct t10_alua_tg_pt_gp *tg_pt_gp, char *page) { - if ((tg_pt_gp->tg_pt_gp_alua_access_type & TPGS_EXPLICT_ALUA) && - (tg_pt_gp->tg_pt_gp_alua_access_type & TPGS_IMPLICT_ALUA)) - return sprintf(page, "Implict and Explict\n"); - else if (tg_pt_gp->tg_pt_gp_alua_access_type & TPGS_IMPLICT_ALUA) - return sprintf(page, "Implict\n"); - else if (tg_pt_gp->tg_pt_gp_alua_access_type & TPGS_EXPLICT_ALUA) - return sprintf(page, "Explict\n"); + if ((tg_pt_gp->tg_pt_gp_alua_access_type & TPGS_EXPLICIT_ALUA) && + (tg_pt_gp->tg_pt_gp_alua_access_type & TPGS_IMPLICIT_ALUA)) + return sprintf(page, "Implicit and Explicit\n"); + else if (tg_pt_gp->tg_pt_gp_alua_access_type & TPGS_IMPLICIT_ALUA) + return sprintf(page, "Implicit\n"); + else if (tg_pt_gp->tg_pt_gp_alua_access_type & TPGS_EXPLICIT_ALUA) + return sprintf(page, "Explicit\n"); else return sprintf(page, "None\n"); } @@ -1771,11 +1773,11 @@ ssize_t core_alua_store_access_type( } if (tmp == 3) tg_pt_gp->tg_pt_gp_alua_access_type = - TPGS_IMPLICT_ALUA | TPGS_EXPLICT_ALUA; + TPGS_IMPLICIT_ALUA | TPGS_EXPLICIT_ALUA; else if (tmp == 2) - tg_pt_gp->tg_pt_gp_alua_access_type = TPGS_EXPLICT_ALUA; + tg_pt_gp->tg_pt_gp_alua_access_type = TPGS_EXPLICIT_ALUA; else if (tmp == 1) - tg_pt_gp->tg_pt_gp_alua_access_type = TPGS_IMPLICT_ALUA; + tg_pt_gp->tg_pt_gp_alua_access_type = TPGS_IMPLICIT_ALUA; else tg_pt_gp->tg_pt_gp_alua_access_type = 0; @@ -1844,14 +1846,14 @@ ssize_t core_alua_store_trans_delay_msecs( return count; } -ssize_t core_alua_show_implict_trans_secs( +ssize_t core_alua_show_implicit_trans_secs( struct t10_alua_tg_pt_gp *tg_pt_gp, char *page) { - return sprintf(page, "%d\n", tg_pt_gp->tg_pt_gp_implict_trans_secs); + return sprintf(page, "%d\n", tg_pt_gp->tg_pt_gp_implicit_trans_secs); } -ssize_t core_alua_store_implict_trans_secs( +ssize_t core_alua_store_implicit_trans_secs( struct t10_alua_tg_pt_gp *tg_pt_gp, const char *page, size_t count) @@ -1861,16 +1863,16 @@ ssize_t core_alua_store_implict_trans_secs( ret = kstrtoul(page, 0, &tmp); if (ret < 0) { - pr_err("Unable to extract implict_trans_secs\n"); + pr_err("Unable to extract implicit_trans_secs\n"); return ret; } - if (tmp > ALUA_MAX_IMPLICT_TRANS_SECS) { - pr_err("Passed implict_trans_secs: %lu, exceeds" - " ALUA_MAX_IMPLICT_TRANS_SECS: %d\n", tmp, - ALUA_MAX_IMPLICT_TRANS_SECS); + if (tmp > ALUA_MAX_IMPLICIT_TRANS_SECS) { + pr_err("Passed implicit_trans_secs: %lu, exceeds" + " ALUA_MAX_IMPLICIT_TRANS_SECS: %d\n", tmp, + ALUA_MAX_IMPLICIT_TRANS_SECS); return -EINVAL; } - tg_pt_gp->tg_pt_gp_implict_trans_secs = (int)tmp; + tg_pt_gp->tg_pt_gp_implicit_trans_secs = (int)tmp; return count; } @@ -1970,8 +1972,8 @@ ssize_t core_alua_store_secondary_status( return ret; } if ((tmp != ALUA_STATUS_NONE) && - (tmp != ALUA_STATUS_ALTERED_BY_EXPLICT_STPG) && - (tmp != ALUA_STATUS_ALTERED_BY_IMPLICT_ALUA)) { + (tmp != ALUA_STATUS_ALTERED_BY_EXPLICIT_STPG) && + (tmp != ALUA_STATUS_ALTERED_BY_IMPLICIT_ALUA)) { pr_err("Illegal value for alua_tg_pt_status: %lu\n", tmp); return -EINVAL; diff --git a/drivers/target/target_core_alua.h b/drivers/target/target_core_alua.h index e539c3e..88e2e83 100644 --- a/drivers/target/target_core_alua.h +++ b/drivers/target/target_core_alua.h @@ -7,15 +7,15 @@ * from spc4r17 section 6.4.2 Table 135 */ #define TPGS_NO_ALUA 0x00 -#define TPGS_IMPLICT_ALUA 0x10 -#define TPGS_EXPLICT_ALUA 0x20 +#define TPGS_IMPLICIT_ALUA 0x10 +#define TPGS_EXPLICIT_ALUA 0x20 /* * ASYMMETRIC ACCESS STATE field * * from spc4r17 section 6.27 Table 245 */ -#define ALUA_ACCESS_STATE_ACTIVE_OPTMIZED 0x0 +#define ALUA_ACCESS_STATE_ACTIVE_OPTIMIZED 0x0 #define ALUA_ACCESS_STATE_ACTIVE_NON_OPTIMIZED 0x1 #define ALUA_ACCESS_STATE_STANDBY 0x2 #define ALUA_ACCESS_STATE_UNAVAILABLE 0x3 @@ -23,13 +23,24 @@ #define ALUA_ACCESS_STATE_TRANSITION 0xf /* + * from spc4r36j section 6.37 Table 306 + */ +#define ALUA_T_SUP 0x80 +#define ALUA_O_SUP 0x40 +#define ALUA_LBD_SUP 0x10 +#define ALUA_U_SUP 0x08 +#define ALUA_S_SUP 0x04 +#define ALUA_AN_SUP 0x02 +#define ALUA_AO_SUP 0x01 + +/* * REPORT_TARGET_PORT_GROUP STATUS CODE * * from spc4r17 section 6.27 Table 246 */ #define ALUA_STATUS_NONE 0x00 -#define ALUA_STATUS_ALTERED_BY_EXPLICT_STPG 0x01 -#define ALUA_STATUS_ALTERED_BY_IMPLICT_ALUA 0x02 +#define ALUA_STATUS_ALTERED_BY_EXPLICIT_STPG 0x01 +#define ALUA_STATUS_ALTERED_BY_IMPLICIT_ALUA 0x02 /* * From spc4r17, Table D.1: ASC and ASCQ Assignement @@ -46,17 +57,17 @@ #define ALUA_DEFAULT_NONOP_DELAY_MSECS 100 #define ALUA_MAX_NONOP_DELAY_MSECS 10000 /* 10 seconds */ /* - * Used for implict and explict ALUA transitional delay, that is disabled + * Used for implicit and explicit ALUA transitional delay, that is disabled * by default, and is intended to be used for debugging client side ALUA code. */ #define ALUA_DEFAULT_TRANS_DELAY_MSECS 0 #define ALUA_MAX_TRANS_DELAY_MSECS 30000 /* 30 seconds */ /* - * Used for the recommended application client implict transition timeout + * Used for the recommended application client implicit transition timeout * in seconds, returned by the REPORT_TARGET_PORT_GROUPS w/ extended header. */ -#define ALUA_DEFAULT_IMPLICT_TRANS_SECS 0 -#define ALUA_MAX_IMPLICT_TRANS_SECS 255 +#define ALUA_DEFAULT_IMPLICIT_TRANS_SECS 0 +#define ALUA_MAX_IMPLICIT_TRANS_SECS 255 /* * Used by core_alua_update_tpg_primary_metadata() and * core_alua_update_tpg_secondary_metadata() @@ -113,9 +124,9 @@ extern ssize_t core_alua_show_trans_delay_msecs(struct t10_alua_tg_pt_gp *, char *); extern ssize_t core_alua_store_trans_delay_msecs(struct t10_alua_tg_pt_gp *, const char *, size_t); -extern ssize_t core_alua_show_implict_trans_secs(struct t10_alua_tg_pt_gp *, +extern ssize_t core_alua_show_implicit_trans_secs(struct t10_alua_tg_pt_gp *, char *); -extern ssize_t core_alua_store_implict_trans_secs(struct t10_alua_tg_pt_gp *, +extern ssize_t core_alua_store_implicit_trans_secs(struct t10_alua_tg_pt_gp *, const char *, size_t); extern ssize_t core_alua_show_preferred_bit(struct t10_alua_tg_pt_gp *, char *); diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c index 82e81c5..272755d 100644 --- a/drivers/target/target_core_configfs.c +++ b/drivers/target/target_core_configfs.c @@ -177,16 +177,16 @@ static struct config_group *target_core_register_fabric( * struct target_fabric_configfs *tf will contain a usage reference. */ pr_debug("Target_Core_ConfigFS: REGISTER tfc_wwn_cit -> %p\n", - &TF_CIT_TMPL(tf)->tfc_wwn_cit); + &tf->tf_cit_tmpl.tfc_wwn_cit); tf->tf_group.default_groups = tf->tf_default_groups; tf->tf_group.default_groups[0] = &tf->tf_disc_group; tf->tf_group.default_groups[1] = NULL; config_group_init_type_name(&tf->tf_group, name, - &TF_CIT_TMPL(tf)->tfc_wwn_cit); + &tf->tf_cit_tmpl.tfc_wwn_cit); config_group_init_type_name(&tf->tf_disc_group, "discovery_auth", - &TF_CIT_TMPL(tf)->tfc_discovery_cit); + &tf->tf_cit_tmpl.tfc_discovery_cit); pr_debug("Target_Core_ConfigFS: REGISTER -> Allocated Fabric:" " %s\n", tf->tf_group.cg_item.ci_name); @@ -2036,7 +2036,7 @@ static ssize_t target_core_alua_tg_pt_gp_store_attr_alua_access_state( int new_state, ret; if (!tg_pt_gp->tg_pt_gp_valid_id) { - pr_err("Unable to do implict ALUA on non valid" + pr_err("Unable to do implicit ALUA on non valid" " tg_pt_gp ID: %hu\n", tg_pt_gp->tg_pt_gp_valid_id); return -EINVAL; } @@ -2049,9 +2049,9 @@ static ssize_t target_core_alua_tg_pt_gp_store_attr_alua_access_state( } new_state = (int)tmp; - if (!(tg_pt_gp->tg_pt_gp_alua_access_type & TPGS_IMPLICT_ALUA)) { - pr_err("Unable to process implict configfs ALUA" - " transition while TPGS_IMPLICT_ALUA is disabled\n"); + if (!(tg_pt_gp->tg_pt_gp_alua_access_type & TPGS_IMPLICIT_ALUA)) { + pr_err("Unable to process implicit configfs ALUA" + " transition while TPGS_IMPLICIT_ALUA is disabled\n"); return -EINVAL; } @@ -2097,8 +2097,8 @@ static ssize_t target_core_alua_tg_pt_gp_store_attr_alua_access_status( new_status = (int)tmp; if ((new_status != ALUA_STATUS_NONE) && - (new_status != ALUA_STATUS_ALTERED_BY_EXPLICT_STPG) && - (new_status != ALUA_STATUS_ALTERED_BY_IMPLICT_ALUA)) { + (new_status != ALUA_STATUS_ALTERED_BY_EXPLICIT_STPG) && + (new_status != ALUA_STATUS_ALTERED_BY_IMPLICIT_ALUA)) { pr_err("Illegal ALUA access status: 0x%02x\n", new_status); return -EINVAL; @@ -2131,6 +2131,90 @@ static ssize_t target_core_alua_tg_pt_gp_store_attr_alua_access_type( SE_DEV_ALUA_TG_PT_ATTR(alua_access_type, S_IRUGO | S_IWUSR); /* + * alua_supported_states + */ + +#define SE_DEV_ALUA_SUPPORT_STATE_SHOW(_name, _var, _bit) \ +static ssize_t target_core_alua_tg_pt_gp_show_attr_alua_support_##_name( \ + struct t10_alua_tg_pt_gp *t, char *p) \ +{ \ + return sprintf(p, "%d\n", !!(t->_var & _bit)); \ +} + +#define SE_DEV_ALUA_SUPPORT_STATE_STORE(_name, _var, _bit) \ +static ssize_t target_core_alua_tg_pt_gp_store_attr_alua_support_##_name(\ + struct t10_alua_tg_pt_gp *t, const char *p, size_t c) \ +{ \ + unsigned long tmp; \ + int ret; \ + \ + if (!t->tg_pt_gp_valid_id) { \ + pr_err("Unable to do set ##_name ALUA state on non" \ + " valid tg_pt_gp ID: %hu\n", \ + t->tg_pt_gp_valid_id); \ + return -EINVAL; \ + } \ + \ + ret = kstrtoul(p, 0, &tmp); \ + if (ret < 0) { \ + pr_err("Invalid value '%s', must be '0' or '1'\n", p); \ + return -EINVAL; \ + } \ + if (tmp > 1) { \ + pr_err("Invalid value '%ld', must be '0' or '1'\n", tmp); \ + return -EINVAL; \ + } \ + if (!tmp) \ + t->_var |= _bit; \ + else \ + t->_var &= ~_bit; \ + \ + return c; \ +} + +SE_DEV_ALUA_SUPPORT_STATE_SHOW(transitioning, + tg_pt_gp_alua_supported_states, ALUA_T_SUP); +SE_DEV_ALUA_SUPPORT_STATE_STORE(transitioning, + tg_pt_gp_alua_supported_states, ALUA_T_SUP); +SE_DEV_ALUA_TG_PT_ATTR(alua_support_transitioning, S_IRUGO | S_IWUSR); + +SE_DEV_ALUA_SUPPORT_STATE_SHOW(offline, + tg_pt_gp_alua_supported_states, ALUA_O_SUP); +SE_DEV_ALUA_SUPPORT_STATE_STORE(offline, + tg_pt_gp_alua_supported_states, ALUA_O_SUP); +SE_DEV_ALUA_TG_PT_ATTR(alua_support_offline, S_IRUGO | S_IWUSR); + +SE_DEV_ALUA_SUPPORT_STATE_SHOW(lba_dependent, + tg_pt_gp_alua_supported_states, ALUA_LBD_SUP); +SE_DEV_ALUA_SUPPORT_STATE_STORE(lba_dependent, + tg_pt_gp_alua_supported_states, ALUA_LBD_SUP); +SE_DEV_ALUA_TG_PT_ATTR(alua_support_lba_dependent, S_IRUGO | S_IWUSR); + +SE_DEV_ALUA_SUPPORT_STATE_SHOW(unavailable, + tg_pt_gp_alua_supported_states, ALUA_U_SUP); +SE_DEV_ALUA_SUPPORT_STATE_STORE(unavailable, + tg_pt_gp_alua_supported_states, ALUA_U_SUP); +SE_DEV_ALUA_TG_PT_ATTR(alua_support_unavailable, S_IRUGO | S_IWUSR); + +SE_DEV_ALUA_SUPPORT_STATE_SHOW(standby, + tg_pt_gp_alua_supported_states, ALUA_S_SUP); +SE_DEV_ALUA_SUPPORT_STATE_STORE(standby, + tg_pt_gp_alua_supported_states, ALUA_S_SUP); +SE_DEV_ALUA_TG_PT_ATTR(alua_support_standby, S_IRUGO | S_IWUSR); + +SE_DEV_ALUA_SUPPORT_STATE_SHOW(active_optimized, + tg_pt_gp_alua_supported_states, ALUA_AO_SUP); +SE_DEV_ALUA_SUPPORT_STATE_STORE(active_optimized, + tg_pt_gp_alua_supported_states, ALUA_AO_SUP); +SE_DEV_ALUA_TG_PT_ATTR(alua_support_active_optimized, S_IRUGO | S_IWUSR); + +SE_DEV_ALUA_SUPPORT_STATE_SHOW(active_nonoptimized, + tg_pt_gp_alua_supported_states, ALUA_AN_SUP); +SE_DEV_ALUA_SUPPORT_STATE_STORE(active_nonoptimized, + tg_pt_gp_alua_supported_states, ALUA_AN_SUP); +SE_DEV_ALUA_TG_PT_ATTR(alua_support_active_nonoptimized, S_IRUGO | S_IWUSR); + +/* * alua_write_metadata */ static ssize_t target_core_alua_tg_pt_gp_show_attr_alua_write_metadata( @@ -2210,24 +2294,24 @@ static ssize_t target_core_alua_tg_pt_gp_store_attr_trans_delay_msecs( SE_DEV_ALUA_TG_PT_ATTR(trans_delay_msecs, S_IRUGO | S_IWUSR); /* - * implict_trans_secs + * implicit_trans_secs */ -static ssize_t target_core_alua_tg_pt_gp_show_attr_implict_trans_secs( +static ssize_t target_core_alua_tg_pt_gp_show_attr_implicit_trans_secs( struct t10_alua_tg_pt_gp *tg_pt_gp, char *page) { - return core_alua_show_implict_trans_secs(tg_pt_gp, page); + return core_alua_show_implicit_trans_secs(tg_pt_gp, page); } -static ssize_t target_core_alua_tg_pt_gp_store_attr_implict_trans_secs( +static ssize_t target_core_alua_tg_pt_gp_store_attr_implicit_trans_secs( struct t10_alua_tg_pt_gp *tg_pt_gp, const char *page, size_t count) { - return core_alua_store_implict_trans_secs(tg_pt_gp, page, count); + return core_alua_store_implicit_trans_secs(tg_pt_gp, page, count); } -SE_DEV_ALUA_TG_PT_ATTR(implict_trans_secs, S_IRUGO | S_IWUSR); +SE_DEV_ALUA_TG_PT_ATTR(implicit_trans_secs, S_IRUGO | S_IWUSR); /* * preferred @@ -2350,10 +2434,17 @@ static struct configfs_attribute *target_core_alua_tg_pt_gp_attrs[] = { &target_core_alua_tg_pt_gp_alua_access_state.attr, &target_core_alua_tg_pt_gp_alua_access_status.attr, &target_core_alua_tg_pt_gp_alua_access_type.attr, + &target_core_alua_tg_pt_gp_alua_support_transitioning.attr, + &target_core_alua_tg_pt_gp_alua_support_offline.attr, + &target_core_alua_tg_pt_gp_alua_support_lba_dependent.attr, + &target_core_alua_tg_pt_gp_alua_support_unavailable.attr, + &target_core_alua_tg_pt_gp_alua_support_standby.attr, + &target_core_alua_tg_pt_gp_alua_support_active_nonoptimized.attr, + &target_core_alua_tg_pt_gp_alua_support_active_optimized.attr, &target_core_alua_tg_pt_gp_alua_write_metadata.attr, &target_core_alua_tg_pt_gp_nonop_delay_msecs.attr, &target_core_alua_tg_pt_gp_trans_delay_msecs.attr, - &target_core_alua_tg_pt_gp_implict_trans_secs.attr, + &target_core_alua_tg_pt_gp_implicit_trans_secs.attr, &target_core_alua_tg_pt_gp_preferred.attr, &target_core_alua_tg_pt_gp_tg_pt_gp_id.attr, &target_core_alua_tg_pt_gp_members.attr, diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index d90dbb0..207b340 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -92,6 +92,9 @@ transport_lookup_cmd_lun(struct se_cmd *se_cmd, u32 unpacked_lun) se_cmd->pr_res_key = deve->pr_res_key; se_cmd->orig_fe_lun = unpacked_lun; se_cmd->se_cmd_flags |= SCF_SE_LUN_CMD; + + percpu_ref_get(&se_lun->lun_ref); + se_cmd->lun_ref_active = true; } spin_unlock_irqrestore(&se_sess->se_node_acl->device_list_lock, flags); @@ -119,24 +122,20 @@ transport_lookup_cmd_lun(struct se_cmd *se_cmd, u32 unpacked_lun) se_cmd->se_lun = &se_sess->se_tpg->tpg_virt_lun0; se_cmd->orig_fe_lun = 0; se_cmd->se_cmd_flags |= SCF_SE_LUN_CMD; + + percpu_ref_get(&se_lun->lun_ref); + se_cmd->lun_ref_active = true; } /* Directly associate cmd with se_dev */ se_cmd->se_dev = se_lun->lun_se_dev; - /* TODO: get rid of this and use atomics for stats */ dev = se_lun->lun_se_dev; - spin_lock_irqsave(&dev->stats_lock, flags); - dev->num_cmds++; + atomic_long_inc(&dev->num_cmds); if (se_cmd->data_direction == DMA_TO_DEVICE) - dev->write_bytes += se_cmd->data_length; + atomic_long_add(se_cmd->data_length, &dev->write_bytes); else if (se_cmd->data_direction == DMA_FROM_DEVICE) - dev->read_bytes += se_cmd->data_length; - spin_unlock_irqrestore(&dev->stats_lock, flags); - - spin_lock_irqsave(&se_lun->lun_cmd_lock, flags); - list_add_tail(&se_cmd->se_lun_node, &se_lun->lun_cmd_list); - spin_unlock_irqrestore(&se_lun->lun_cmd_lock, flags); + atomic_long_add(se_cmd->data_length, &dev->read_bytes); return 0; } @@ -314,14 +313,14 @@ int core_enable_device_list_for_node( deve = nacl->device_list[mapped_lun]; /* - * Check if the call is handling demo mode -> explict LUN ACL + * Check if the call is handling demo mode -> explicit LUN ACL * transition. This transition must be for the same struct se_lun * + mapped_lun that was setup in demo mode.. */ if (deve->lun_flags & TRANSPORT_LUNFLAGS_INITIATOR_ACCESS) { if (deve->se_lun_acl != NULL) { pr_err("struct se_dev_entry->se_lun_acl" - " already set for demo mode -> explict" + " already set for demo mode -> explicit" " LUN ACL transition\n"); spin_unlock_irq(&nacl->device_list_lock); return -EINVAL; @@ -329,7 +328,7 @@ int core_enable_device_list_for_node( if (deve->se_lun != lun) { pr_err("struct se_dev_entry->se_lun does" " match passed struct se_lun for demo mode" - " -> explict LUN ACL transition\n"); + " -> explicit LUN ACL transition\n"); spin_unlock_irq(&nacl->device_list_lock); return -EINVAL; } @@ -1407,6 +1406,7 @@ static void scsi_dump_inquiry(struct se_device *dev) struct se_device *target_alloc_device(struct se_hba *hba, const char *name) { struct se_device *dev; + struct se_lun *xcopy_lun; dev = hba->transport->alloc_device(hba, name); if (!dev) @@ -1423,7 +1423,6 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name) INIT_LIST_HEAD(&dev->state_list); INIT_LIST_HEAD(&dev->qf_cmd_list); INIT_LIST_HEAD(&dev->g_dev_node); - spin_lock_init(&dev->stats_lock); spin_lock_init(&dev->execute_task_lock); spin_lock_init(&dev->delayed_cmd_lock); spin_lock_init(&dev->dev_reservation_lock); @@ -1469,6 +1468,14 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name) dev->dev_attrib.fabric_max_sectors = DA_FABRIC_MAX_SECTORS; dev->dev_attrib.optimal_sectors = DA_FABRIC_MAX_SECTORS; + xcopy_lun = &dev->xcopy_lun; + xcopy_lun->lun_se_dev = dev; + init_completion(&xcopy_lun->lun_shutdown_comp); + INIT_LIST_HEAD(&xcopy_lun->lun_acl_list); + spin_lock_init(&xcopy_lun->lun_acl_lock); + spin_lock_init(&xcopy_lun->lun_sep_lock); + init_completion(&xcopy_lun->lun_ref_comp); + return dev; } diff --git a/drivers/target/target_core_fabric_configfs.c b/drivers/target/target_core_fabric_configfs.c index 3503996..dae2ad6 100644 --- a/drivers/target/target_core_fabric_configfs.c +++ b/drivers/target/target_core_fabric_configfs.c @@ -385,9 +385,9 @@ static struct config_group *target_fabric_make_mappedlun( } config_group_init_type_name(&lacl->se_lun_group, name, - &TF_CIT_TMPL(tf)->tfc_tpg_mappedlun_cit); + &tf->tf_cit_tmpl.tfc_tpg_mappedlun_cit); config_group_init_type_name(&lacl->ml_stat_grps.stat_group, - "statistics", &TF_CIT_TMPL(tf)->tfc_tpg_mappedlun_stat_cit); + "statistics", &tf->tf_cit_tmpl.tfc_tpg_mappedlun_stat_cit); lacl_cg->default_groups[0] = &lacl->ml_stat_grps.stat_group; lacl_cg->default_groups[1] = NULL; @@ -504,16 +504,16 @@ static struct config_group *target_fabric_make_nodeacl( nacl_cg->default_groups[4] = NULL; config_group_init_type_name(&se_nacl->acl_group, name, - &TF_CIT_TMPL(tf)->tfc_tpg_nacl_base_cit); + &tf->tf_cit_tmpl.tfc_tpg_nacl_base_cit); config_group_init_type_name(&se_nacl->acl_attrib_group, "attrib", - &TF_CIT_TMPL(tf)->tfc_tpg_nacl_attrib_cit); + &tf->tf_cit_tmpl.tfc_tpg_nacl_attrib_cit); config_group_init_type_name(&se_nacl->acl_auth_group, "auth", - &TF_CIT_TMPL(tf)->tfc_tpg_nacl_auth_cit); + &tf->tf_cit_tmpl.tfc_tpg_nacl_auth_cit); config_group_init_type_name(&se_nacl->acl_param_group, "param", - &TF_CIT_TMPL(tf)->tfc_tpg_nacl_param_cit); + &tf->tf_cit_tmpl.tfc_tpg_nacl_param_cit); config_group_init_type_name(&se_nacl->acl_fabric_stat_group, "fabric_statistics", - &TF_CIT_TMPL(tf)->tfc_tpg_nacl_stat_cit); + &tf->tf_cit_tmpl.tfc_tpg_nacl_stat_cit); return &se_nacl->acl_group; } @@ -595,7 +595,7 @@ static struct config_group *target_fabric_make_np( se_tpg_np->tpg_np_parent = se_tpg; config_group_init_type_name(&se_tpg_np->tpg_np_group, name, - &TF_CIT_TMPL(tf)->tfc_tpg_np_base_cit); + &tf->tf_cit_tmpl.tfc_tpg_np_base_cit); return &se_tpg_np->tpg_np_group; } @@ -899,9 +899,9 @@ static struct config_group *target_fabric_make_lun( } config_group_init_type_name(&lun->lun_group, name, - &TF_CIT_TMPL(tf)->tfc_tpg_port_cit); + &tf->tf_cit_tmpl.tfc_tpg_port_cit); config_group_init_type_name(&lun->port_stat_grps.stat_group, - "statistics", &TF_CIT_TMPL(tf)->tfc_tpg_port_stat_cit); + "statistics", &tf->tf_cit_tmpl.tfc_tpg_port_stat_cit); lun_cg->default_groups[0] = &lun->port_stat_grps.stat_group; lun_cg->default_groups[1] = NULL; @@ -1056,19 +1056,19 @@ static struct config_group *target_fabric_make_tpg( se_tpg->tpg_group.default_groups[6] = NULL; config_group_init_type_name(&se_tpg->tpg_group, name, - &TF_CIT_TMPL(tf)->tfc_tpg_base_cit); + &tf->tf_cit_tmpl.tfc_tpg_base_cit); config_group_init_type_name(&se_tpg->tpg_lun_group, "lun", - &TF_CIT_TMPL(tf)->tfc_tpg_lun_cit); + &tf->tf_cit_tmpl.tfc_tpg_lun_cit); config_group_init_type_name(&se_tpg->tpg_np_group, "np", - &TF_CIT_TMPL(tf)->tfc_tpg_np_cit); + &tf->tf_cit_tmpl.tfc_tpg_np_cit); config_group_init_type_name(&se_tpg->tpg_acl_group, "acls", - &TF_CIT_TMPL(tf)->tfc_tpg_nacl_cit); + &tf->tf_cit_tmpl.tfc_tpg_nacl_cit); config_group_init_type_name(&se_tpg->tpg_attrib_group, "attrib", - &TF_CIT_TMPL(tf)->tfc_tpg_attrib_cit); + &tf->tf_cit_tmpl.tfc_tpg_attrib_cit); config_group_init_type_name(&se_tpg->tpg_auth_group, "auth", - &TF_CIT_TMPL(tf)->tfc_tpg_auth_cit); + &tf->tf_cit_tmpl.tfc_tpg_auth_cit); config_group_init_type_name(&se_tpg->tpg_param_group, "param", - &TF_CIT_TMPL(tf)->tfc_tpg_param_cit); + &tf->tf_cit_tmpl.tfc_tpg_param_cit); return &se_tpg->tpg_group; } @@ -1155,9 +1155,9 @@ static struct config_group *target_fabric_make_wwn( wwn->wwn_group.default_groups[1] = NULL; config_group_init_type_name(&wwn->wwn_group, name, - &TF_CIT_TMPL(tf)->tfc_tpg_cit); + &tf->tf_cit_tmpl.tfc_tpg_cit); config_group_init_type_name(&wwn->fabric_stat_group, "fabric_statistics", - &TF_CIT_TMPL(tf)->tfc_wwn_fabric_stats_cit); + &tf->tf_cit_tmpl.tfc_wwn_fabric_stats_cit); return &wwn->wwn_group; } diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c index b662f89..0e34cda 100644 --- a/drivers/target/target_core_file.c +++ b/drivers/target/target_core_file.c @@ -562,7 +562,7 @@ fd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, } else { ret = fd_do_rw(cmd, sgl, sgl_nents, 1); /* - * Perform implict vfs_fsync_range() for fd_do_writev() ops + * Perform implicit vfs_fsync_range() for fd_do_writev() ops * for SCSI WRITEs with Forced Unit Access (FUA) set. * Allow this to happen independent of WCE=0 setting. */ diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c index b9a3394..c87959f 100644 --- a/drivers/target/target_core_iblock.c +++ b/drivers/target/target_core_iblock.c @@ -710,6 +710,45 @@ static sector_t iblock_get_blocks(struct se_device *dev) return iblock_emulate_read_cap_with_block_size(dev, bd, q); } +static sector_t iblock_get_alignment_offset_lbas(struct se_device *dev) +{ + struct iblock_dev *ib_dev = IBLOCK_DEV(dev); + struct block_device *bd = ib_dev->ibd_bd; + int ret; + + ret = bdev_alignment_offset(bd); + if (ret == -1) + return 0; + + /* convert offset-bytes to offset-lbas */ + return ret / bdev_logical_block_size(bd); +} + +static unsigned int iblock_get_lbppbe(struct se_device *dev) +{ + struct iblock_dev *ib_dev = IBLOCK_DEV(dev); + struct block_device *bd = ib_dev->ibd_bd; + int logs_per_phys = bdev_physical_block_size(bd) / bdev_logical_block_size(bd); + + return ilog2(logs_per_phys); +} + +static unsigned int iblock_get_io_min(struct se_device *dev) +{ + struct iblock_dev *ib_dev = IBLOCK_DEV(dev); + struct block_device *bd = ib_dev->ibd_bd; + + return bdev_io_min(bd); +} + +static unsigned int iblock_get_io_opt(struct se_device *dev) +{ + struct iblock_dev *ib_dev = IBLOCK_DEV(dev); + struct block_device *bd = ib_dev->ibd_bd; + + return bdev_io_opt(bd); +} + static struct sbc_ops iblock_sbc_ops = { .execute_rw = iblock_execute_rw, .execute_sync_cache = iblock_execute_sync_cache, @@ -749,6 +788,10 @@ static struct se_subsystem_api iblock_template = { .show_configfs_dev_params = iblock_show_configfs_dev_params, .get_device_type = sbc_get_device_type, .get_blocks = iblock_get_blocks, + .get_alignment_offset_lbas = iblock_get_alignment_offset_lbas, + .get_lbppbe = iblock_get_lbppbe, + .get_io_min = iblock_get_io_min, + .get_io_opt = iblock_get_io_opt, .get_write_cache = iblock_get_write_cache, }; diff --git a/drivers/target/target_core_internal.h b/drivers/target/target_core_internal.h index 579128a..47b63b0 100644 --- a/drivers/target/target_core_internal.h +++ b/drivers/target/target_core_internal.h @@ -75,8 +75,6 @@ extern struct se_device *g_lun0_dev; struct se_node_acl *__core_tpg_get_initiator_node_acl(struct se_portal_group *tpg, const char *); -struct se_node_acl *core_tpg_get_initiator_node_acl(struct se_portal_group *tpg, - unsigned char *); void core_tpg_add_node_to_devs(struct se_node_acl *, struct se_portal_group *); void core_tpg_wait_for_nacl_pr_ref(struct se_node_acl *); struct se_lun *core_tpg_pre_addlun(struct se_portal_group *, u32); @@ -102,7 +100,7 @@ int transport_dump_vpd_assoc(struct t10_vpd *, unsigned char *, int); int transport_dump_vpd_ident_type(struct t10_vpd *, unsigned char *, int); int transport_dump_vpd_ident(struct t10_vpd *, unsigned char *, int); bool target_stop_cmd(struct se_cmd *cmd, unsigned long *flags); -int transport_clear_lun_from_sessions(struct se_lun *); +int transport_clear_lun_ref(struct se_lun *); void transport_send_task_abort(struct se_cmd *); sense_reason_t target_cmd_size_check(struct se_cmd *cmd, unsigned int size); void target_qf_do_work(struct work_struct *work); diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c index d1ae4c5..2f5d779 100644 --- a/drivers/target/target_core_pr.c +++ b/drivers/target/target_core_pr.c @@ -474,7 +474,7 @@ static int core_scsi3_pr_seq_non_holder( * statement. */ if (!ret && !other_cdb) { - pr_debug("Allowing explict CDB: 0x%02x for %s" + pr_debug("Allowing explicit CDB: 0x%02x for %s" " reservation holder\n", cdb[0], core_scsi3_pr_dump_type(pr_reg_type)); @@ -507,7 +507,7 @@ static int core_scsi3_pr_seq_non_holder( */ if (!registered_nexus) { - pr_debug("Allowing implict CDB: 0x%02x" + pr_debug("Allowing implicit CDB: 0x%02x" " for %s reservation on unregistered" " nexus\n", cdb[0], core_scsi3_pr_dump_type(pr_reg_type)); @@ -522,7 +522,7 @@ static int core_scsi3_pr_seq_non_holder( * allow commands from registered nexuses. */ - pr_debug("Allowing implict CDB: 0x%02x for %s" + pr_debug("Allowing implicit CDB: 0x%02x for %s" " reservation\n", cdb[0], core_scsi3_pr_dump_type(pr_reg_type)); @@ -683,7 +683,7 @@ static struct t10_pr_registration *__core_scsi3_alloc_registration( alua_port_list) { /* * This pointer will be NULL for demo mode MappedLUNs - * that have not been make explict via a ConfigFS + * that have not been make explicit via a ConfigFS * MappedLUN group for the SCSI Initiator Node ACL. */ if (!deve_tmp->se_lun_acl) @@ -1158,7 +1158,7 @@ static void core_scsi3_put_pr_reg(struct t10_pr_registration *pr_reg) smp_mb__after_atomic_dec(); } -static int core_scsi3_check_implict_release( +static int core_scsi3_check_implicit_release( struct se_device *dev, struct t10_pr_registration *pr_reg) { @@ -1174,7 +1174,7 @@ static int core_scsi3_check_implict_release( } if (pr_res_holder == pr_reg) { /* - * Perform an implict RELEASE if the registration that + * Perform an implicit RELEASE if the registration that * is being released is holding the reservation. * * From spc4r17, section 5.7.11.1: @@ -1192,7 +1192,7 @@ static int core_scsi3_check_implict_release( * For 'All Registrants' reservation types, all existing * registrations are still processed as reservation holders * in core_scsi3_pr_seq_non_holder() after the initial - * reservation holder is implictly released here. + * reservation holder is implicitly released here. */ } else if (pr_reg->pr_reg_all_tg_pt && (!strcmp(pr_res_holder->pr_reg_nacl->initiatorname, @@ -2125,7 +2125,7 @@ core_scsi3_emulate_pro_register(struct se_cmd *cmd, u64 res_key, u64 sa_res_key, /* * sa_res_key=0 Unregister Reservation Key for registered I_T Nexus. */ - pr_holder = core_scsi3_check_implict_release( + pr_holder = core_scsi3_check_implicit_release( cmd->se_dev, pr_reg); if (pr_holder < 0) { ret = TCM_RESERVATION_CONFLICT; @@ -2402,7 +2402,7 @@ static void __core_scsi3_complete_pro_release( struct se_device *dev, struct se_node_acl *se_nacl, struct t10_pr_registration *pr_reg, - int explict) + int explicit) { struct target_core_fabric_ops *tfo = se_nacl->se_tpg->se_tpg_tfo; char i_buf[PR_REG_ISID_ID_LEN]; @@ -2416,7 +2416,7 @@ static void __core_scsi3_complete_pro_release( pr_debug("SPC-3 PR [%s] Service Action: %s RELEASE cleared" " reservation holder TYPE: %s ALL_TG_PT: %d\n", - tfo->get_fabric_name(), (explict) ? "explict" : "implict", + tfo->get_fabric_name(), (explicit) ? "explicit" : "implicit", core_scsi3_pr_dump_type(pr_reg->pr_res_type), (pr_reg->pr_reg_all_tg_pt) ? 1 : 0); pr_debug("SPC-3 PR [%s] RELEASE Node: %s%s\n", @@ -2692,7 +2692,7 @@ static void __core_scsi3_complete_pro_preempt( memset(i_buf, 0, PR_REG_ISID_ID_LEN); core_pr_dump_initiator_port(pr_reg, i_buf, PR_REG_ISID_ID_LEN); /* - * Do an implict RELEASE of the existing reservation. + * Do an implicit RELEASE of the existing reservation. */ if (dev->dev_pr_res_holder) __core_scsi3_complete_pro_release(dev, nacl, @@ -2845,7 +2845,7 @@ core_scsi3_pro_preempt(struct se_cmd *cmd, int type, int scope, u64 res_key, * 5.7.11.4 Preempting, Table 52 and Figure 7. * * For a ZERO SA Reservation key, release - * all other registrations and do an implict + * all other registrations and do an implicit * release of active persistent reservation. * * For a non-ZERO SA Reservation key, only diff --git a/drivers/target/target_core_rd.c b/drivers/target/target_core_rd.c index 131327a..4ffe5f2 100644 --- a/drivers/target/target_core_rd.c +++ b/drivers/target/target_core_rd.c @@ -27,7 +27,6 @@ #include <linux/string.h> #include <linux/parser.h> #include <linux/timer.h> -#include <linux/blkdev.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <scsi/scsi.h> diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c index d9b92b2..52ae54e 100644 --- a/drivers/target/target_core_sbc.c +++ b/drivers/target/target_core_sbc.c @@ -105,12 +105,22 @@ sbc_emulate_readcapacity_16(struct se_cmd *cmd) buf[9] = (dev->dev_attrib.block_size >> 16) & 0xff; buf[10] = (dev->dev_attrib.block_size >> 8) & 0xff; buf[11] = dev->dev_attrib.block_size & 0xff; + + if (dev->transport->get_lbppbe) + buf[13] = dev->transport->get_lbppbe(dev) & 0x0f; + + if (dev->transport->get_alignment_offset_lbas) { + u16 lalba = dev->transport->get_alignment_offset_lbas(dev); + buf[14] = (lalba >> 8) & 0x3f; + buf[15] = lalba & 0xff; + } + /* * Set Thin Provisioning Enable bit following sbc3r22 in section * READ CAPACITY (16) byte 14 if emulate_tpu or emulate_tpws is enabled. */ if (dev->dev_attrib.emulate_tpu || dev->dev_attrib.emulate_tpws) - buf[14] = 0x80; + buf[14] |= 0x80; rbuf = transport_kmap_data_sg(cmd); if (rbuf) { diff --git a/drivers/target/target_core_spc.c b/drivers/target/target_core_spc.c index 0745395..021c3f4 100644 --- a/drivers/target/target_core_spc.c +++ b/drivers/target/target_core_spc.c @@ -48,7 +48,7 @@ static void spc_fill_alua_data(struct se_port *port, unsigned char *buf) buf[5] = 0x80; /* - * Set TPGS field for explict and/or implict ALUA access type + * Set TPGS field for explicit and/or implicit ALUA access type * and opteration. * * See spc4r17 section 6.4.2 Table 135 @@ -452,6 +452,7 @@ spc_emulate_evpd_b0(struct se_cmd *cmd, unsigned char *buf) struct se_device *dev = cmd->se_dev; u32 max_sectors; int have_tp = 0; + int opt, min; /* * Following spc3r22 section 6.5.3 Block Limits VPD page, when @@ -475,7 +476,10 @@ spc_emulate_evpd_b0(struct se_cmd *cmd, unsigned char *buf) /* * Set OPTIMAL TRANSFER LENGTH GRANULARITY */ - put_unaligned_be16(1, &buf[6]); + if (dev->transport->get_io_min && (min = dev->transport->get_io_min(dev))) + put_unaligned_be16(min / dev->dev_attrib.block_size, &buf[6]); + else + put_unaligned_be16(1, &buf[6]); /* * Set MAXIMUM TRANSFER LENGTH @@ -487,7 +491,10 @@ spc_emulate_evpd_b0(struct se_cmd *cmd, unsigned char *buf) /* * Set OPTIMAL TRANSFER LENGTH */ - put_unaligned_be32(dev->dev_attrib.optimal_sectors, &buf[12]); + if (dev->transport->get_io_opt && (opt = dev->transport->get_io_opt(dev))) + put_unaligned_be32(opt / dev->dev_attrib.block_size, &buf[12]); + else + put_unaligned_be32(dev->dev_attrib.optimal_sectors, &buf[12]); /* * Exit now if we don't support TP. @@ -1250,7 +1257,7 @@ spc_parse_cdb(struct se_cmd *cmd, unsigned int *size) *size = (cdb[3] << 8) + cdb[4]; /* - * Do implict HEAD_OF_QUEUE processing for INQUIRY. + * Do implicit HEAD_OF_QUEUE processing for INQUIRY. * See spc4r17 section 5.3 */ cmd->sam_task_attr = MSG_HEAD_TAG; @@ -1284,7 +1291,7 @@ spc_parse_cdb(struct se_cmd *cmd, unsigned int *size) cmd->execute_cmd = spc_emulate_report_luns; *size = (cdb[6] << 24) | (cdb[7] << 16) | (cdb[8] << 8) | cdb[9]; /* - * Do implict HEAD_OF_QUEUE processing for REPORT_LUNS + * Do implicit HEAD_OF_QUEUE processing for REPORT_LUNS * See spc4r17 section 5.3 */ cmd->sam_task_attr = MSG_HEAD_TAG; diff --git a/drivers/target/target_core_stat.c b/drivers/target/target_core_stat.c index 9c642e0..0353899 100644 --- a/drivers/target/target_core_stat.c +++ b/drivers/target/target_core_stat.c @@ -32,7 +32,6 @@ #include <linux/utsname.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> -#include <linux/blkdev.h> #include <linux/configfs.h> #include <scsi/scsi.h> #include <scsi/scsi_device.h> @@ -214,7 +213,8 @@ static ssize_t target_stat_scsi_tgt_dev_show_attr_resets( struct se_device *dev = container_of(sgrps, struct se_device, dev_stat_grps); - return snprintf(page, PAGE_SIZE, "%u\n", dev->num_resets); + return snprintf(page, PAGE_SIZE, "%lu\n", + atomic_long_read(&dev->num_resets)); } DEV_STAT_SCSI_TGT_DEV_ATTR_RO(resets); @@ -397,8 +397,8 @@ static ssize_t target_stat_scsi_lu_show_attr_num_cmds( container_of(sgrps, struct se_device, dev_stat_grps); /* scsiLuNumCommands */ - return snprintf(page, PAGE_SIZE, "%llu\n", - (unsigned long long)dev->num_cmds); + return snprintf(page, PAGE_SIZE, "%lu\n", + atomic_long_read(&dev->num_cmds)); } DEV_STAT_SCSI_LU_ATTR_RO(num_cmds); @@ -409,7 +409,8 @@ static ssize_t target_stat_scsi_lu_show_attr_read_mbytes( container_of(sgrps, struct se_device, dev_stat_grps); /* scsiLuReadMegaBytes */ - return snprintf(page, PAGE_SIZE, "%u\n", (u32)(dev->read_bytes >> 20)); + return snprintf(page, PAGE_SIZE, "%lu\n", + atomic_long_read(&dev->read_bytes) >> 20); } DEV_STAT_SCSI_LU_ATTR_RO(read_mbytes); @@ -420,7 +421,8 @@ static ssize_t target_stat_scsi_lu_show_attr_write_mbytes( container_of(sgrps, struct se_device, dev_stat_grps); /* scsiLuWrittenMegaBytes */ - return snprintf(page, PAGE_SIZE, "%u\n", (u32)(dev->write_bytes >> 20)); + return snprintf(page, PAGE_SIZE, "%lu\n", + atomic_long_read(&dev->write_bytes) >> 20); } DEV_STAT_SCSI_LU_ATTR_RO(write_mbytes); @@ -431,7 +433,7 @@ static ssize_t target_stat_scsi_lu_show_attr_resets( container_of(sgrps, struct se_device, dev_stat_grps); /* scsiLuInResets */ - return snprintf(page, PAGE_SIZE, "%u\n", dev->num_resets); + return snprintf(page, PAGE_SIZE, "%lu\n", atomic_long_read(&dev->num_resets)); } DEV_STAT_SCSI_LU_ATTR_RO(resets); diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c index 2500099..70c638f 100644 --- a/drivers/target/target_core_tmr.c +++ b/drivers/target/target_core_tmr.c @@ -386,9 +386,7 @@ int core_tmr_lun_reset( pr_debug("LUN_RESET: SCSI-2 Released reservation\n"); } - spin_lock_irq(&dev->stats_lock); - dev->num_resets++; - spin_unlock_irq(&dev->stats_lock); + atomic_long_inc(&dev->num_resets); pr_debug("LUN_RESET: %s for [%s] Complete\n", (preempt_and_abort_list) ? "Preempt" : "TMR", diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c index b9a6ec0..f697f8b 100644 --- a/drivers/target/target_core_tpg.c +++ b/drivers/target/target_core_tpg.c @@ -116,6 +116,7 @@ struct se_node_acl *core_tpg_get_initiator_node_acl( return acl; } +EXPORT_SYMBOL(core_tpg_get_initiator_node_acl); /* core_tpg_add_node_to_devs(): * @@ -633,6 +634,13 @@ int core_tpg_set_initiator_node_tag( } EXPORT_SYMBOL(core_tpg_set_initiator_node_tag); +static void core_tpg_lun_ref_release(struct percpu_ref *ref) +{ + struct se_lun *lun = container_of(ref, struct se_lun, lun_ref); + + complete(&lun->lun_ref_comp); +} + static int core_tpg_setup_virtual_lun0(struct se_portal_group *se_tpg) { /* Set in core_dev_setup_virtual_lun0() */ @@ -646,15 +654,20 @@ static int core_tpg_setup_virtual_lun0(struct se_portal_group *se_tpg) atomic_set(&lun->lun_acl_count, 0); init_completion(&lun->lun_shutdown_comp); INIT_LIST_HEAD(&lun->lun_acl_list); - INIT_LIST_HEAD(&lun->lun_cmd_list); spin_lock_init(&lun->lun_acl_lock); - spin_lock_init(&lun->lun_cmd_lock); spin_lock_init(&lun->lun_sep_lock); + init_completion(&lun->lun_ref_comp); - ret = core_tpg_post_addlun(se_tpg, lun, lun_access, dev); + ret = percpu_ref_init(&lun->lun_ref, core_tpg_lun_ref_release); if (ret < 0) return ret; + ret = core_tpg_post_addlun(se_tpg, lun, lun_access, dev); + if (ret < 0) { + percpu_ref_cancel_init(&lun->lun_ref); + return ret; + } + return 0; } @@ -691,10 +704,9 @@ int core_tpg_register( atomic_set(&lun->lun_acl_count, 0); init_completion(&lun->lun_shutdown_comp); INIT_LIST_HEAD(&lun->lun_acl_list); - INIT_LIST_HEAD(&lun->lun_cmd_list); spin_lock_init(&lun->lun_acl_lock); - spin_lock_init(&lun->lun_cmd_lock); spin_lock_init(&lun->lun_sep_lock); + init_completion(&lun->lun_ref_comp); } se_tpg->se_tpg_type = se_tpg_type; @@ -815,10 +827,16 @@ int core_tpg_post_addlun( { int ret; - ret = core_dev_export(lun_ptr, tpg, lun); + ret = percpu_ref_init(&lun->lun_ref, core_tpg_lun_ref_release); if (ret < 0) return ret; + ret = core_dev_export(lun_ptr, tpg, lun); + if (ret < 0) { + percpu_ref_cancel_init(&lun->lun_ref); + return ret; + } + spin_lock(&tpg->tpg_lun_lock); lun->lun_access = lun_access; lun->lun_status = TRANSPORT_LUN_STATUS_ACTIVE; @@ -827,14 +845,6 @@ int core_tpg_post_addlun( return 0; } -static void core_tpg_shutdown_lun( - struct se_portal_group *tpg, - struct se_lun *lun) -{ - core_clear_lun_from_tpg(lun, tpg); - transport_clear_lun_from_sessions(lun); -} - struct se_lun *core_tpg_pre_dellun( struct se_portal_group *tpg, u32 unpacked_lun) @@ -869,7 +879,8 @@ int core_tpg_post_dellun( struct se_portal_group *tpg, struct se_lun *lun) { - core_tpg_shutdown_lun(tpg, lun); + core_clear_lun_from_tpg(lun, tpg); + transport_clear_lun_ref(lun); core_dev_unexport(lun->lun_se_dev, tpg, lun); diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 81e945e..91953da 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -28,7 +28,6 @@ #include <linux/string.h> #include <linux/timer.h> #include <linux/slab.h> -#include <linux/blkdev.h> #include <linux/spinlock.h> #include <linux/kthread.h> #include <linux/in.h> @@ -473,7 +472,7 @@ void transport_deregister_session(struct se_session *se_sess) pr_debug("TARGET_CORE[%s]: Deregistered fabric_sess\n", se_tpg->se_tpg_tfo->get_fabric_name()); /* - * If last kref is dropping now for an explict NodeACL, awake sleeping + * If last kref is dropping now for an explicit NodeACL, awake sleeping * ->acl_free_comp caller to wakeup configfs se_node_acl->acl_group * removal context. */ @@ -515,23 +514,6 @@ static int transport_cmd_check_stop(struct se_cmd *cmd, bool remove_from_lists, if (write_pending) cmd->t_state = TRANSPORT_WRITE_PENDING; - /* - * Determine if IOCTL context caller in requesting the stopping of this - * command for LUN shutdown purposes. - */ - if (cmd->transport_state & CMD_T_LUN_STOP) { - pr_debug("%s:%d CMD_T_LUN_STOP for ITT: 0x%08x\n", - __func__, __LINE__, cmd->se_tfo->get_task_tag(cmd)); - - cmd->transport_state &= ~CMD_T_ACTIVE; - if (remove_from_lists) - target_remove_from_state_list(cmd); - spin_unlock_irqrestore(&cmd->t_state_lock, flags); - - complete(&cmd->transport_lun_stop_comp); - return 1; - } - if (remove_from_lists) { target_remove_from_state_list(cmd); @@ -585,15 +567,11 @@ static int transport_cmd_check_stop_to_fabric(struct se_cmd *cmd) static void transport_lun_remove_cmd(struct se_cmd *cmd) { struct se_lun *lun = cmd->se_lun; - unsigned long flags; - if (!lun) + if (!lun || !cmd->lun_ref_active) return; - spin_lock_irqsave(&lun->lun_cmd_lock, flags); - if (!list_empty(&cmd->se_lun_node)) - list_del_init(&cmd->se_lun_node); - spin_unlock_irqrestore(&lun->lun_cmd_lock, flags); + percpu_ref_put(&lun->lun_ref); } void transport_cmd_finish_abort(struct se_cmd *cmd, int remove) @@ -668,7 +646,7 @@ void target_complete_cmd(struct se_cmd *cmd, u8 scsi_status) cmd->transport_state |= CMD_T_FAILED; /* - * Check for case where an explict ABORT_TASK has been received + * Check for case where an explicit ABORT_TASK has been received * and transport_wait_for_tasks() will be waiting for completion.. */ if (cmd->transport_state & CMD_T_ABORTED && @@ -1092,13 +1070,10 @@ void transport_init_se_cmd( int task_attr, unsigned char *sense_buffer) { - INIT_LIST_HEAD(&cmd->se_lun_node); INIT_LIST_HEAD(&cmd->se_delayed_node); INIT_LIST_HEAD(&cmd->se_qf_node); INIT_LIST_HEAD(&cmd->se_cmd_list); INIT_LIST_HEAD(&cmd->state_list); - init_completion(&cmd->transport_lun_fe_stop_comp); - init_completion(&cmd->transport_lun_stop_comp); init_completion(&cmd->t_transport_stop_comp); init_completion(&cmd->cmd_wait_comp); init_completion(&cmd->task_stop_comp); @@ -1719,29 +1694,14 @@ void target_execute_cmd(struct se_cmd *cmd) /* * If the received CDB has aleady been aborted stop processing it here. */ - if (transport_check_aborted_status(cmd, 1)) { - complete(&cmd->transport_lun_stop_comp); + if (transport_check_aborted_status(cmd, 1)) return; - } /* - * Determine if IOCTL context caller in requesting the stopping of this - * command for LUN shutdown purposes. - */ - spin_lock_irq(&cmd->t_state_lock); - if (cmd->transport_state & CMD_T_LUN_STOP) { - pr_debug("%s:%d CMD_T_LUN_STOP for ITT: 0x%08x\n", - __func__, __LINE__, cmd->se_tfo->get_task_tag(cmd)); - - cmd->transport_state &= ~CMD_T_ACTIVE; - spin_unlock_irq(&cmd->t_state_lock); - complete(&cmd->transport_lun_stop_comp); - return; - } - /* * Determine if frontend context caller is requesting the stopping of * this command for frontend exceptions. */ + spin_lock_irq(&cmd->t_state_lock); if (cmd->transport_state & CMD_T_STOP) { pr_debug("%s:%d CMD_T_STOP for ITT: 0x%08x\n", __func__, __LINE__, @@ -2404,164 +2364,23 @@ void target_wait_for_sess_cmds(struct se_session *se_sess) } EXPORT_SYMBOL(target_wait_for_sess_cmds); -/* transport_lun_wait_for_tasks(): - * - * Called from ConfigFS context to stop the passed struct se_cmd to allow - * an struct se_lun to be successfully shutdown. - */ -static int transport_lun_wait_for_tasks(struct se_cmd *cmd, struct se_lun *lun) -{ - unsigned long flags; - int ret = 0; - - /* - * If the frontend has already requested this struct se_cmd to - * be stopped, we can safely ignore this struct se_cmd. - */ - spin_lock_irqsave(&cmd->t_state_lock, flags); - if (cmd->transport_state & CMD_T_STOP) { - cmd->transport_state &= ~CMD_T_LUN_STOP; - - pr_debug("ConfigFS ITT[0x%08x] - CMD_T_STOP, skipping\n", - cmd->se_tfo->get_task_tag(cmd)); - spin_unlock_irqrestore(&cmd->t_state_lock, flags); - transport_cmd_check_stop(cmd, false, false); - return -EPERM; - } - cmd->transport_state |= CMD_T_LUN_FE_STOP; - spin_unlock_irqrestore(&cmd->t_state_lock, flags); - - // XXX: audit task_flags checks. - spin_lock_irqsave(&cmd->t_state_lock, flags); - if ((cmd->transport_state & CMD_T_BUSY) && - (cmd->transport_state & CMD_T_SENT)) { - if (!target_stop_cmd(cmd, &flags)) - ret++; - } - spin_unlock_irqrestore(&cmd->t_state_lock, flags); - - pr_debug("ConfigFS: cmd: %p stop tasks ret:" - " %d\n", cmd, ret); - if (!ret) { - pr_debug("ConfigFS: ITT[0x%08x] - stopping cmd....\n", - cmd->se_tfo->get_task_tag(cmd)); - wait_for_completion(&cmd->transport_lun_stop_comp); - pr_debug("ConfigFS: ITT[0x%08x] - stopped cmd....\n", - cmd->se_tfo->get_task_tag(cmd)); - } - - return 0; -} - -static void __transport_clear_lun_from_sessions(struct se_lun *lun) -{ - struct se_cmd *cmd = NULL; - unsigned long lun_flags, cmd_flags; - /* - * Do exception processing and return CHECK_CONDITION status to the - * Initiator Port. - */ - spin_lock_irqsave(&lun->lun_cmd_lock, lun_flags); - while (!list_empty(&lun->lun_cmd_list)) { - cmd = list_first_entry(&lun->lun_cmd_list, - struct se_cmd, se_lun_node); - list_del_init(&cmd->se_lun_node); - - spin_lock(&cmd->t_state_lock); - pr_debug("SE_LUN[%d] - Setting cmd->transport" - "_lun_stop for ITT: 0x%08x\n", - cmd->se_lun->unpacked_lun, - cmd->se_tfo->get_task_tag(cmd)); - cmd->transport_state |= CMD_T_LUN_STOP; - spin_unlock(&cmd->t_state_lock); - - spin_unlock_irqrestore(&lun->lun_cmd_lock, lun_flags); - - if (!cmd->se_lun) { - pr_err("ITT: 0x%08x, [i,t]_state: %u/%u\n", - cmd->se_tfo->get_task_tag(cmd), - cmd->se_tfo->get_cmd_state(cmd), cmd->t_state); - BUG(); - } - /* - * If the Storage engine still owns the iscsi_cmd_t, determine - * and/or stop its context. - */ - pr_debug("SE_LUN[%d] - ITT: 0x%08x before transport" - "_lun_wait_for_tasks()\n", cmd->se_lun->unpacked_lun, - cmd->se_tfo->get_task_tag(cmd)); - - if (transport_lun_wait_for_tasks(cmd, cmd->se_lun) < 0) { - spin_lock_irqsave(&lun->lun_cmd_lock, lun_flags); - continue; - } - - pr_debug("SE_LUN[%d] - ITT: 0x%08x after transport_lun" - "_wait_for_tasks(): SUCCESS\n", - cmd->se_lun->unpacked_lun, - cmd->se_tfo->get_task_tag(cmd)); - - spin_lock_irqsave(&cmd->t_state_lock, cmd_flags); - if (!(cmd->transport_state & CMD_T_DEV_ACTIVE)) { - spin_unlock_irqrestore(&cmd->t_state_lock, cmd_flags); - goto check_cond; - } - cmd->transport_state &= ~CMD_T_DEV_ACTIVE; - target_remove_from_state_list(cmd); - spin_unlock_irqrestore(&cmd->t_state_lock, cmd_flags); - - /* - * The Storage engine stopped this struct se_cmd before it was - * send to the fabric frontend for delivery back to the - * Initiator Node. Return this SCSI CDB back with an - * CHECK_CONDITION status. - */ -check_cond: - transport_send_check_condition_and_sense(cmd, - TCM_NON_EXISTENT_LUN, 0); - /* - * If the fabric frontend is waiting for this iscsi_cmd_t to - * be released, notify the waiting thread now that LU has - * finished accessing it. - */ - spin_lock_irqsave(&cmd->t_state_lock, cmd_flags); - if (cmd->transport_state & CMD_T_LUN_FE_STOP) { - pr_debug("SE_LUN[%d] - Detected FE stop for" - " struct se_cmd: %p ITT: 0x%08x\n", - lun->unpacked_lun, - cmd, cmd->se_tfo->get_task_tag(cmd)); - - spin_unlock_irqrestore(&cmd->t_state_lock, - cmd_flags); - transport_cmd_check_stop(cmd, false, false); - complete(&cmd->transport_lun_fe_stop_comp); - spin_lock_irqsave(&lun->lun_cmd_lock, lun_flags); - continue; - } - pr_debug("SE_LUN[%d] - ITT: 0x%08x finished processing\n", - lun->unpacked_lun, cmd->se_tfo->get_task_tag(cmd)); - - spin_unlock_irqrestore(&cmd->t_state_lock, cmd_flags); - spin_lock_irqsave(&lun->lun_cmd_lock, lun_flags); - } - spin_unlock_irqrestore(&lun->lun_cmd_lock, lun_flags); -} - -static int transport_clear_lun_thread(void *p) +static int transport_clear_lun_ref_thread(void *p) { struct se_lun *lun = p; - __transport_clear_lun_from_sessions(lun); + percpu_ref_kill(&lun->lun_ref); + + wait_for_completion(&lun->lun_ref_comp); complete(&lun->lun_shutdown_comp); return 0; } -int transport_clear_lun_from_sessions(struct se_lun *lun) +int transport_clear_lun_ref(struct se_lun *lun) { struct task_struct *kt; - kt = kthread_run(transport_clear_lun_thread, lun, + kt = kthread_run(transport_clear_lun_ref_thread, lun, "tcm_cl_%u", lun->unpacked_lun); if (IS_ERR(kt)) { pr_err("Unable to start clear_lun thread\n"); @@ -2595,43 +2414,6 @@ bool transport_wait_for_tasks(struct se_cmd *cmd) spin_unlock_irqrestore(&cmd->t_state_lock, flags); return false; } - /* - * If we are already stopped due to an external event (ie: LUN shutdown) - * sleep until the connection can have the passed struct se_cmd back. - * The cmd->transport_lun_stopped_sem will be upped by - * transport_clear_lun_from_sessions() once the ConfigFS context caller - * has completed its operation on the struct se_cmd. - */ - if (cmd->transport_state & CMD_T_LUN_STOP) { - pr_debug("wait_for_tasks: Stopping" - " wait_for_completion(&cmd->t_tasktransport_lun_fe" - "_stop_comp); for ITT: 0x%08x\n", - cmd->se_tfo->get_task_tag(cmd)); - /* - * There is a special case for WRITES where a FE exception + - * LUN shutdown means ConfigFS context is still sleeping on - * transport_lun_stop_comp in transport_lun_wait_for_tasks(). - * We go ahead and up transport_lun_stop_comp just to be sure - * here. - */ - spin_unlock_irqrestore(&cmd->t_state_lock, flags); - complete(&cmd->transport_lun_stop_comp); - wait_for_completion(&cmd->transport_lun_fe_stop_comp); - spin_lock_irqsave(&cmd->t_state_lock, flags); - - target_remove_from_state_list(cmd); - /* - * At this point, the frontend who was the originator of this - * struct se_cmd, now owns the structure and can be released through - * normal means below. - */ - pr_debug("wait_for_tasks: Stopped" - " wait_for_completion(&cmd->t_tasktransport_lun_fe_" - "stop_comp); for ITT: 0x%08x\n", - cmd->se_tfo->get_task_tag(cmd)); - - cmd->transport_state &= ~CMD_T_LUN_STOP; - } if (!(cmd->transport_state & CMD_T_ACTIVE)) { spin_unlock_irqrestore(&cmd->t_state_lock, flags); @@ -2910,6 +2692,7 @@ int transport_check_aborted_status(struct se_cmd *cmd, int send_status) cmd->t_task_cdb[0], cmd->se_tfo->get_task_tag(cmd)); cmd->se_cmd_flags |= SCF_SENT_DELAYED_TAS; + cmd->scsi_status = SAM_STAT_TASK_ABORTED; trace_target_cmd_complete(cmd); cmd->se_tfo->queue_status(cmd); @@ -2938,6 +2721,7 @@ void transport_send_task_abort(struct se_cmd *cmd) if (cmd->se_tfo->write_pending_status(cmd) != 0) { cmd->transport_state |= CMD_T_ABORTED; smp_mb__after_atomic_inc(); + return; } } cmd->scsi_status = SAM_STAT_TASK_ABORTED; diff --git a/drivers/target/target_core_ua.h b/drivers/target/target_core_ua.h index 0204952..be912b3 100644 --- a/drivers/target/target_core_ua.h +++ b/drivers/target/target_core_ua.h @@ -19,7 +19,7 @@ #define ASCQ_2AH_RESERVATIONS_RELEASED 0x04 #define ASCQ_2AH_REGISTRATIONS_PREEMPTED 0x05 #define ASCQ_2AH_ASYMMETRIC_ACCESS_STATE_CHANGED 0x06 -#define ASCQ_2AH_IMPLICT_ASYMMETRIC_ACCESS_STATE_TRANSITION_FAILED 0x07 +#define ASCQ_2AH_IMPLICIT_ASYMMETRIC_ACCESS_STATE_TRANSITION_FAILED 0x07 #define ASCQ_2AH_PRIORITY_CHANGED 0x08 #define ASCQ_2CH_PREVIOUS_RESERVATION_CONFLICT_STATUS 0x09 diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c index 474cd44..6b88a99 100644 --- a/drivers/target/target_core_xcopy.c +++ b/drivers/target/target_core_xcopy.c @@ -405,9 +405,6 @@ static void xcopy_pt_release_cmd(struct se_cmd *se_cmd) struct xcopy_pt_cmd *xpt_cmd = container_of(se_cmd, struct xcopy_pt_cmd, se_cmd); - if (xpt_cmd->remote_port) - kfree(se_cmd->se_lun); - kfree(xpt_cmd); } @@ -572,22 +569,10 @@ static int target_xcopy_init_pt_lun( return 0; } - pt_cmd->se_lun = kzalloc(sizeof(struct se_lun), GFP_KERNEL); - if (!pt_cmd->se_lun) { - pr_err("Unable to allocate pt_cmd->se_lun\n"); - return -ENOMEM; - } - init_completion(&pt_cmd->se_lun->lun_shutdown_comp); - INIT_LIST_HEAD(&pt_cmd->se_lun->lun_cmd_list); - INIT_LIST_HEAD(&pt_cmd->se_lun->lun_acl_list); - spin_lock_init(&pt_cmd->se_lun->lun_acl_lock); - spin_lock_init(&pt_cmd->se_lun->lun_cmd_lock); - spin_lock_init(&pt_cmd->se_lun->lun_sep_lock); - + pt_cmd->se_lun = &se_dev->xcopy_lun; pt_cmd->se_dev = se_dev; pr_debug("Setup emulated se_dev: %p from se_dev\n", pt_cmd->se_dev); - pt_cmd->se_lun->lun_se_dev = se_dev; pt_cmd->se_cmd_flags |= SCF_SE_LUN_CMD | SCF_CMD_XCOPY_PASSTHROUGH; pr_debug("Setup emulated se_dev: %p to pt_cmd->se_lun->lun_se_dev\n", @@ -658,8 +643,6 @@ static int target_xcopy_setup_pt_cmd( return 0; out: - if (remote_port == true) - kfree(cmd->se_lun); return ret; } diff --git a/drivers/target/tcm_fc/tcm_fc.h b/drivers/target/tcm_fc/tcm_fc.h index 0dd54a4..752863a 100644 --- a/drivers/target/tcm_fc/tcm_fc.h +++ b/drivers/target/tcm_fc/tcm_fc.h @@ -22,6 +22,7 @@ #define FT_NAMELEN 32 /* length of ASCII WWPNs including pad */ #define FT_TPG_NAMELEN 32 /* max length of TPG name */ #define FT_LUN_NAMELEN 32 /* max length of LUN name */ +#define TCM_FC_DEFAULT_TAGS 512 /* tags used for per-session preallocation */ struct ft_transport_id { __u8 format; diff --git a/drivers/target/tcm_fc/tfc_cmd.c b/drivers/target/tcm_fc/tfc_cmd.c index 0e5a1cae..479ec56 100644 --- a/drivers/target/tcm_fc/tfc_cmd.c +++ b/drivers/target/tcm_fc/tfc_cmd.c @@ -28,6 +28,7 @@ #include <linux/configfs.h> #include <linux/ctype.h> #include <linux/hash.h> +#include <linux/percpu_ida.h> #include <asm/unaligned.h> #include <scsi/scsi.h> #include <scsi/scsi_host.h> @@ -89,16 +90,18 @@ static void ft_free_cmd(struct ft_cmd *cmd) { struct fc_frame *fp; struct fc_lport *lport; + struct se_session *se_sess; if (!cmd) return; + se_sess = cmd->sess->se_sess; fp = cmd->req_frame; lport = fr_dev(fp); if (fr_seq(fp)) lport->tt.seq_release(fr_seq(fp)); fc_frame_free(fp); + percpu_ida_free(&se_sess->sess_tag_pool, cmd->se_cmd.map_tag); ft_sess_put(cmd->sess); /* undo get from lookup at recv */ - kfree(cmd); } void ft_release_cmd(struct se_cmd *se_cmd) @@ -432,14 +435,21 @@ static void ft_recv_cmd(struct ft_sess *sess, struct fc_frame *fp) { struct ft_cmd *cmd; struct fc_lport *lport = sess->tport->lport; + struct se_session *se_sess = sess->se_sess; + int tag; - cmd = kzalloc(sizeof(*cmd), GFP_ATOMIC); - if (!cmd) + tag = percpu_ida_alloc(&se_sess->sess_tag_pool, GFP_ATOMIC); + if (tag < 0) goto busy; + + cmd = &((struct ft_cmd *)se_sess->sess_cmd_map)[tag]; + memset(cmd, 0, sizeof(struct ft_cmd)); + + cmd->se_cmd.map_tag = tag; cmd->sess = sess; cmd->seq = lport->tt.seq_assign(lport, fp); if (!cmd->seq) { - kfree(cmd); + percpu_ida_free(&se_sess->sess_tag_pool, tag); goto busy; } cmd->req_frame = fp; /* hold frame during cmd */ diff --git a/drivers/target/tcm_fc/tfc_conf.c b/drivers/target/tcm_fc/tfc_conf.c index 4e00508..c6932fb 100644 --- a/drivers/target/tcm_fc/tfc_conf.c +++ b/drivers/target/tcm_fc/tfc_conf.c @@ -571,16 +571,16 @@ int ft_register_configfs(void) /* * Setup default attribute lists for various fabric->tf_cit_tmpl */ - TF_CIT_TMPL(fabric)->tfc_wwn_cit.ct_attrs = ft_wwn_attrs; - TF_CIT_TMPL(fabric)->tfc_tpg_base_cit.ct_attrs = NULL; - TF_CIT_TMPL(fabric)->tfc_tpg_attrib_cit.ct_attrs = NULL; - TF_CIT_TMPL(fabric)->tfc_tpg_param_cit.ct_attrs = NULL; - TF_CIT_TMPL(fabric)->tfc_tpg_np_base_cit.ct_attrs = NULL; - TF_CIT_TMPL(fabric)->tfc_tpg_nacl_base_cit.ct_attrs = + fabric->tf_cit_tmpl.tfc_wwn_cit.ct_attrs = ft_wwn_attrs; + fabric->tf_cit_tmpl.tfc_tpg_base_cit.ct_attrs = NULL; + fabric->tf_cit_tmpl.tfc_tpg_attrib_cit.ct_attrs = NULL; + fabric->tf_cit_tmpl.tfc_tpg_param_cit.ct_attrs = NULL; + fabric->tf_cit_tmpl.tfc_tpg_np_base_cit.ct_attrs = NULL; + fabric->tf_cit_tmpl.tfc_tpg_nacl_base_cit.ct_attrs = ft_nacl_base_attrs; - TF_CIT_TMPL(fabric)->tfc_tpg_nacl_attrib_cit.ct_attrs = NULL; - TF_CIT_TMPL(fabric)->tfc_tpg_nacl_auth_cit.ct_attrs = NULL; - TF_CIT_TMPL(fabric)->tfc_tpg_nacl_param_cit.ct_attrs = NULL; + fabric->tf_cit_tmpl.tfc_tpg_nacl_attrib_cit.ct_attrs = NULL; + fabric->tf_cit_tmpl.tfc_tpg_nacl_auth_cit.ct_attrs = NULL; + fabric->tf_cit_tmpl.tfc_tpg_nacl_param_cit.ct_attrs = NULL; /* * register the fabric for use within TCM */ diff --git a/drivers/target/tcm_fc/tfc_sess.c b/drivers/target/tcm_fc/tfc_sess.c index 4859505..ae52c08 100644 --- a/drivers/target/tcm_fc/tfc_sess.c +++ b/drivers/target/tcm_fc/tfc_sess.c @@ -210,7 +210,8 @@ static struct ft_sess *ft_sess_create(struct ft_tport *tport, u32 port_id, if (!sess) return NULL; - sess->se_sess = transport_init_session(); + sess->se_sess = transport_init_session_tags(TCM_FC_DEFAULT_TAGS, + sizeof(struct ft_cmd)); if (IS_ERR(sess->se_sess)) { kfree(sess); return NULL; diff --git a/drivers/usb/gadget/tcm_usb_gadget.c b/drivers/usb/gadget/tcm_usb_gadget.c index eccea1d..6c3d795 100644 --- a/drivers/usb/gadget/tcm_usb_gadget.c +++ b/drivers/usb/gadget/tcm_usb_gadget.c @@ -1923,15 +1923,15 @@ static int usbg_register_configfs(void) } fabric->tf_ops = usbg_ops; - TF_CIT_TMPL(fabric)->tfc_wwn_cit.ct_attrs = usbg_wwn_attrs; - TF_CIT_TMPL(fabric)->tfc_tpg_base_cit.ct_attrs = usbg_base_attrs; - TF_CIT_TMPL(fabric)->tfc_tpg_attrib_cit.ct_attrs = NULL; - TF_CIT_TMPL(fabric)->tfc_tpg_param_cit.ct_attrs = NULL; - TF_CIT_TMPL(fabric)->tfc_tpg_np_base_cit.ct_attrs = NULL; - TF_CIT_TMPL(fabric)->tfc_tpg_nacl_base_cit.ct_attrs = NULL; - TF_CIT_TMPL(fabric)->tfc_tpg_nacl_attrib_cit.ct_attrs = NULL; - TF_CIT_TMPL(fabric)->tfc_tpg_nacl_auth_cit.ct_attrs = NULL; - TF_CIT_TMPL(fabric)->tfc_tpg_nacl_param_cit.ct_attrs = NULL; + fabric->tf_cit_tmpl.tfc_wwn_cit.ct_attrs = usbg_wwn_attrs; + fabric->tf_cit_tmpl.tfc_tpg_base_cit.ct_attrs = usbg_base_attrs; + fabric->tf_cit_tmpl.tfc_tpg_attrib_cit.ct_attrs = NULL; + fabric->tf_cit_tmpl.tfc_tpg_param_cit.ct_attrs = NULL; + fabric->tf_cit_tmpl.tfc_tpg_np_base_cit.ct_attrs = NULL; + fabric->tf_cit_tmpl.tfc_tpg_nacl_base_cit.ct_attrs = NULL; + fabric->tf_cit_tmpl.tfc_tpg_nacl_attrib_cit.ct_attrs = NULL; + fabric->tf_cit_tmpl.tfc_tpg_nacl_auth_cit.ct_attrs = NULL; + fabric->tf_cit_tmpl.tfc_tpg_nacl_param_cit.ct_attrs = NULL; ret = target_fabric_configfs_register(fabric); if (ret < 0) { printk(KERN_ERR "target_fabric_configfs_register() failed" diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index e663921..f175629 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -2168,15 +2168,15 @@ static int tcm_vhost_register_configfs(void) /* * Setup default attribute lists for various fabric->tf_cit_tmpl */ - TF_CIT_TMPL(fabric)->tfc_wwn_cit.ct_attrs = tcm_vhost_wwn_attrs; - TF_CIT_TMPL(fabric)->tfc_tpg_base_cit.ct_attrs = tcm_vhost_tpg_attrs; - TF_CIT_TMPL(fabric)->tfc_tpg_attrib_cit.ct_attrs = NULL; - TF_CIT_TMPL(fabric)->tfc_tpg_param_cit.ct_attrs = NULL; - TF_CIT_TMPL(fabric)->tfc_tpg_np_base_cit.ct_attrs = NULL; - TF_CIT_TMPL(fabric)->tfc_tpg_nacl_base_cit.ct_attrs = NULL; - TF_CIT_TMPL(fabric)->tfc_tpg_nacl_attrib_cit.ct_attrs = NULL; - TF_CIT_TMPL(fabric)->tfc_tpg_nacl_auth_cit.ct_attrs = NULL; - TF_CIT_TMPL(fabric)->tfc_tpg_nacl_param_cit.ct_attrs = NULL; + fabric->tf_cit_tmpl.tfc_wwn_cit.ct_attrs = tcm_vhost_wwn_attrs; + fabric->tf_cit_tmpl.tfc_tpg_base_cit.ct_attrs = tcm_vhost_tpg_attrs; + fabric->tf_cit_tmpl.tfc_tpg_attrib_cit.ct_attrs = NULL; + fabric->tf_cit_tmpl.tfc_tpg_param_cit.ct_attrs = NULL; + fabric->tf_cit_tmpl.tfc_tpg_np_base_cit.ct_attrs = NULL; + fabric->tf_cit_tmpl.tfc_tpg_nacl_base_cit.ct_attrs = NULL; + fabric->tf_cit_tmpl.tfc_tpg_nacl_attrib_cit.ct_attrs = NULL; + fabric->tf_cit_tmpl.tfc_tpg_nacl_auth_cit.ct_attrs = NULL; + fabric->tf_cit_tmpl.tfc_tpg_nacl_param_cit.ct_attrs = NULL; /* * Register the fabric for use within TCM */ diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c index f039b10..b03dd23 100644 --- a/fs/9p/vfs_dentry.c +++ b/fs/9p/vfs_dentry.c @@ -43,23 +43,6 @@ #include "fid.h" /** - * v9fs_dentry_delete - called when dentry refcount equals 0 - * @dentry: dentry in question - * - * By returning 1 here we should remove cacheing of unused - * dentry components. - * - */ - -static int v9fs_dentry_delete(const struct dentry *dentry) -{ - p9_debug(P9_DEBUG_VFS, " dentry: %s (%p)\n", - dentry->d_name.name, dentry); - - return 1; -} - -/** * v9fs_cached_dentry_delete - called when dentry refcount equals 0 * @dentry: dentry in question * @@ -134,6 +117,6 @@ const struct dentry_operations v9fs_cached_dentry_operations = { }; const struct dentry_operations v9fs_dentry_operations = { - .d_delete = v9fs_dentry_delete, + .d_delete = always_delete_dentry, .d_release = v9fs_dentry_release, }; @@ -80,6 +80,8 @@ struct kioctx { struct percpu_ref users; atomic_t dead; + struct percpu_ref reqs; + unsigned long user_id; struct __percpu kioctx_cpu *cpu; @@ -107,7 +109,6 @@ struct kioctx { struct page **ring_pages; long nr_pages; - struct rcu_head rcu_head; struct work_struct free_work; struct { @@ -250,8 +251,10 @@ static void aio_free_ring(struct kioctx *ctx) put_aio_ring_file(ctx); - if (ctx->ring_pages && ctx->ring_pages != ctx->internal_pages) + if (ctx->ring_pages && ctx->ring_pages != ctx->internal_pages) { kfree(ctx->ring_pages); + ctx->ring_pages = NULL; + } } static int aio_ring_mmap(struct file *file, struct vm_area_struct *vma) @@ -463,26 +466,34 @@ static int kiocb_cancel(struct kioctx *ctx, struct kiocb *kiocb) return cancel(kiocb); } -static void free_ioctx_rcu(struct rcu_head *head) +static void free_ioctx(struct work_struct *work) { - struct kioctx *ctx = container_of(head, struct kioctx, rcu_head); + struct kioctx *ctx = container_of(work, struct kioctx, free_work); + pr_debug("freeing %p\n", ctx); + + aio_free_ring(ctx); free_percpu(ctx->cpu); kmem_cache_free(kioctx_cachep, ctx); } +static void free_ioctx_reqs(struct percpu_ref *ref) +{ + struct kioctx *ctx = container_of(ref, struct kioctx, reqs); + + INIT_WORK(&ctx->free_work, free_ioctx); + schedule_work(&ctx->free_work); +} + /* * When this function runs, the kioctx has been removed from the "hash table" * and ctx->users has dropped to 0, so we know no more kiocbs can be submitted - * now it's safe to cancel any that need to be. */ -static void free_ioctx(struct work_struct *work) +static void free_ioctx_users(struct percpu_ref *ref) { - struct kioctx *ctx = container_of(work, struct kioctx, free_work); - struct aio_ring *ring; + struct kioctx *ctx = container_of(ref, struct kioctx, users); struct kiocb *req; - unsigned cpu, avail; - DEFINE_WAIT(wait); spin_lock_irq(&ctx->ctx_lock); @@ -496,54 +507,8 @@ static void free_ioctx(struct work_struct *work) spin_unlock_irq(&ctx->ctx_lock); - for_each_possible_cpu(cpu) { - struct kioctx_cpu *kcpu = per_cpu_ptr(ctx->cpu, cpu); - - atomic_add(kcpu->reqs_available, &ctx->reqs_available); - kcpu->reqs_available = 0; - } - - while (1) { - prepare_to_wait(&ctx->wait, &wait, TASK_UNINTERRUPTIBLE); - - ring = kmap_atomic(ctx->ring_pages[0]); - avail = (ring->head <= ring->tail) - ? ring->tail - ring->head - : ctx->nr_events - ring->head + ring->tail; - - atomic_add(avail, &ctx->reqs_available); - ring->head = ring->tail; - kunmap_atomic(ring); - - if (atomic_read(&ctx->reqs_available) >= ctx->nr_events - 1) - break; - - schedule(); - } - finish_wait(&ctx->wait, &wait); - - WARN_ON(atomic_read(&ctx->reqs_available) > ctx->nr_events - 1); - - aio_free_ring(ctx); - - pr_debug("freeing %p\n", ctx); - - /* - * Here the call_rcu() is between the wait_event() for reqs_active to - * hit 0, and freeing the ioctx. - * - * aio_complete() decrements reqs_active, but it has to touch the ioctx - * after to issue a wakeup so we use rcu. - */ - call_rcu(&ctx->rcu_head, free_ioctx_rcu); -} - -static void free_ioctx_ref(struct percpu_ref *ref) -{ - struct kioctx *ctx = container_of(ref, struct kioctx, users); - - INIT_WORK(&ctx->free_work, free_ioctx); - schedule_work(&ctx->free_work); + percpu_ref_kill(&ctx->reqs); + percpu_ref_put(&ctx->reqs); } static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm) @@ -602,6 +567,16 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm) } } +static void aio_nr_sub(unsigned nr) +{ + spin_lock(&aio_nr_lock); + if (WARN_ON(aio_nr - nr > aio_nr)) + aio_nr = 0; + else + aio_nr -= nr; + spin_unlock(&aio_nr_lock); +} + /* ioctx_alloc * Allocates and initializes an ioctx. Returns an ERR_PTR if it failed. */ @@ -639,8 +614,11 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) ctx->max_reqs = nr_events; - if (percpu_ref_init(&ctx->users, free_ioctx_ref)) - goto out_freectx; + if (percpu_ref_init(&ctx->users, free_ioctx_users)) + goto err; + + if (percpu_ref_init(&ctx->reqs, free_ioctx_reqs)) + goto err; spin_lock_init(&ctx->ctx_lock); spin_lock_init(&ctx->completion_lock); @@ -651,10 +629,10 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) ctx->cpu = alloc_percpu(struct kioctx_cpu); if (!ctx->cpu) - goto out_freeref; + goto err; if (aio_setup_ring(ctx) < 0) - goto out_freepcpu; + goto err; atomic_set(&ctx->reqs_available, ctx->nr_events - 1); ctx->req_batch = (ctx->nr_events - 1) / (num_possible_cpus() * 4); @@ -666,7 +644,8 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) if (aio_nr + nr_events > (aio_max_nr * 2UL) || aio_nr + nr_events < aio_nr) { spin_unlock(&aio_nr_lock); - goto out_cleanup; + err = -EAGAIN; + goto err; } aio_nr += ctx->max_reqs; spin_unlock(&aio_nr_lock); @@ -675,23 +654,18 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) err = ioctx_add_table(ctx, mm); if (err) - goto out_cleanup_put; + goto err_cleanup; pr_debug("allocated ioctx %p[%ld]: mm=%p mask=0x%x\n", ctx, ctx->user_id, mm, ctx->nr_events); return ctx; -out_cleanup_put: - percpu_ref_put(&ctx->users); -out_cleanup: - err = -EAGAIN; - aio_free_ring(ctx); -out_freepcpu: +err_cleanup: + aio_nr_sub(ctx->max_reqs); +err: free_percpu(ctx->cpu); -out_freeref: + free_percpu(ctx->reqs.pcpu_count); free_percpu(ctx->users.pcpu_count); -out_freectx: - put_aio_ring_file(ctx); kmem_cache_free(kioctx_cachep, ctx); pr_debug("error allocating ioctx %d\n", err); return ERR_PTR(err); @@ -726,10 +700,7 @@ static void kill_ioctx(struct mm_struct *mm, struct kioctx *ctx) * -EAGAIN with no ioctxs actually in use (as far as userspace * could tell). */ - spin_lock(&aio_nr_lock); - BUG_ON(aio_nr - ctx->max_reqs > aio_nr); - aio_nr -= ctx->max_reqs; - spin_unlock(&aio_nr_lock); + aio_nr_sub(ctx->max_reqs); if (ctx->mmap_size) vm_munmap(ctx->mmap_base, ctx->mmap_size); @@ -861,6 +832,8 @@ static inline struct kiocb *aio_get_req(struct kioctx *ctx) if (unlikely(!req)) goto out_put; + percpu_ref_get(&ctx->reqs); + req->ki_ctx = ctx; return req; out_put: @@ -930,12 +903,6 @@ void aio_complete(struct kiocb *iocb, long res, long res2) return; } - /* - * Take rcu_read_lock() in case the kioctx is being destroyed, as we - * need to issue a wakeup after incrementing reqs_available. - */ - rcu_read_lock(); - if (iocb->ki_list.next) { unsigned long flags; @@ -1010,7 +977,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2) if (waitqueue_active(&ctx->wait)) wake_up(&ctx->wait); - rcu_read_unlock(); + percpu_ref_put(&ctx->reqs); } EXPORT_SYMBOL(aio_complete); @@ -1421,6 +1388,7 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, return 0; out_put_req: put_reqs_available(ctx, 1); + percpu_ref_put(&ctx->reqs); kiocb_free(req); return ret; } diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig index f9d5094..aa976ec 100644 --- a/fs/btrfs/Kconfig +++ b/fs/btrfs/Kconfig @@ -9,12 +9,17 @@ config BTRFS_FS select XOR_BLOCKS help - Btrfs is a new filesystem with extents, writable snapshotting, - support for multiple devices and many more features. + Btrfs is a general purpose copy-on-write filesystem with extents, + writable snapshotting, support for multiple devices and many more + features focused on fault tolerance, repair and easy administration. - Btrfs is highly experimental, and THE DISK FORMAT IS NOT YET - FINALIZED. You should say N here unless you are interested in - testing Btrfs with non-critical data. + The filesystem disk format is no longer unstable, and it's not + expected to change unless there are strong reasons to do so. If there + is a format change, file systems with a unchanged format will + continue to be mountable and usable by newer kernels. + + For more information, please see the web pages at + http://btrfs.wiki.kernel.org. To compile this file system support as a module, choose M here. The module will be called btrfs. diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 8aec751..c1e0b0c 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -495,6 +495,7 @@ static int __btrfs_start_workers(struct btrfs_workers *workers) spin_lock_irq(&workers->lock); if (workers->stopping) { spin_unlock_irq(&workers->lock); + ret = -EINVAL; goto fail_kthread; } list_add_tail(&worker->worker_list, &workers->idle_list); diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index e0aab44..b50764b 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c @@ -77,6 +77,15 @@ * the integrity of (super)-block write requests, do not * enable the config option BTRFS_FS_CHECK_INTEGRITY to * include and compile the integrity check tool. + * + * Expect millions of lines of information in the kernel log with an + * enabled check_int_print_mask. Therefore set LOG_BUF_SHIFT in the + * kernel config to at least 26 (which is 64MB). Usually the value is + * limited to 21 (which is 2MB) in init/Kconfig. The file needs to be + * changed like this before LOG_BUF_SHIFT can be set to a high value: + * config LOG_BUF_SHIFT + * int "Kernel log buffer size (16 => 64KB, 17 => 128KB)" + * range 12 30 */ #include <linux/sched.h> @@ -124,6 +133,7 @@ #define BTRFSIC_PRINT_MASK_INITIAL_DATABASE 0x00000400 #define BTRFSIC_PRINT_MASK_NUM_COPIES 0x00000800 #define BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS 0x00001000 +#define BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH_VERBOSE 0x00002000 struct btrfsic_dev_state; struct btrfsic_state; @@ -3015,6 +3025,7 @@ void btrfsic_submit_bio(int rw, struct bio *bio) (rw & WRITE) && NULL != bio->bi_io_vec) { unsigned int i; u64 dev_bytenr; + u64 cur_bytenr; int bio_is_patched; char **mapped_datav; @@ -3033,6 +3044,7 @@ void btrfsic_submit_bio(int rw, struct bio *bio) GFP_NOFS); if (!mapped_datav) goto leave; + cur_bytenr = dev_bytenr; for (i = 0; i < bio->bi_vcnt; i++) { BUG_ON(bio->bi_io_vec[i].bv_len != PAGE_CACHE_SIZE); mapped_datav[i] = kmap(bio->bi_io_vec[i].bv_page); @@ -3044,16 +3056,13 @@ void btrfsic_submit_bio(int rw, struct bio *bio) kfree(mapped_datav); goto leave; } - if ((BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH | - BTRFSIC_PRINT_MASK_VERBOSE) == - (dev_state->state->print_mask & - (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH | - BTRFSIC_PRINT_MASK_VERBOSE))) + if (dev_state->state->print_mask & + BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH_VERBOSE) printk(KERN_INFO - "#%u: page=%p, len=%u, offset=%u\n", - i, bio->bi_io_vec[i].bv_page, - bio->bi_io_vec[i].bv_len, + "#%u: bytenr=%llu, len=%u, offset=%u\n", + i, cur_bytenr, bio->bi_io_vec[i].bv_len, bio->bi_io_vec[i].bv_offset); + cur_bytenr += bio->bi_io_vec[i].bv_len; } btrfsic_process_written_block(dev_state, dev_bytenr, mapped_datav, bio->bi_vcnt, diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index f9aeb27..54ab861 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3613,9 +3613,6 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, struct btrfs_ordered_sum *sums); int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, struct bio *bio, u64 file_start, int contig); -int btrfs_csum_truncate(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct btrfs_path *path, - u64 isize); int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, struct list_head *list, int search_commit); /* inode.c */ @@ -3744,9 +3741,6 @@ void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info); int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync); void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, int skip_pinned); -int btrfs_replace_extent_cache(struct inode *inode, struct extent_map *replace, - u64 start, u64 end, int skip_pinned, - int modified); extern const struct file_operations btrfs_file_operations; int __btrfs_drop_extents(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 342f9fd..2cfc3dff 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -366,7 +366,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root, dev_replace->tgtdev = tgt_device; printk_in_rcu(KERN_INFO - "btrfs: dev_replace from %s (devid %llu) to %s) started\n", + "btrfs: dev_replace from %s (devid %llu) to %s started\n", src_device->missing ? "<missing disk>" : rcu_str_deref(src_device->name), src_device->devid, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 4c4ed0b..8072cfa 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3517,7 +3517,6 @@ int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info) int btrfs_commit_super(struct btrfs_root *root) { struct btrfs_trans_handle *trans; - int ret; mutex_lock(&root->fs_info->cleaner_mutex); btrfs_run_delayed_iputs(root); @@ -3531,25 +3530,7 @@ int btrfs_commit_super(struct btrfs_root *root) trans = btrfs_join_transaction(root); if (IS_ERR(trans)) return PTR_ERR(trans); - ret = btrfs_commit_transaction(trans, root); - if (ret) - return ret; - /* run commit again to drop the original snapshot */ - trans = btrfs_join_transaction(root); - if (IS_ERR(trans)) - return PTR_ERR(trans); - ret = btrfs_commit_transaction(trans, root); - if (ret) - return ret; - ret = btrfs_write_and_wait_transaction(NULL, root); - if (ret) { - btrfs_error(root->fs_info, ret, - "Failed to sync btree inode to disk."); - return ret; - } - - ret = write_ctree_super(NULL, root, 0); - return ret; + return btrfs_commit_transaction(trans, root); } int close_ctree(struct btrfs_root *root) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 856bc2b..8e457fc 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1980,6 +1980,7 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start, struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; int ret; + ASSERT(!(fs_info->sb->s_flags & MS_RDONLY)); BUG_ON(!mirror_num); /* we can't repair anything in raid56 yet */ @@ -2036,6 +2037,9 @@ int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb, unsigned long i, num_pages = num_extent_pages(eb->start, eb->len); int ret = 0; + if (root->fs_info->sb->s_flags & MS_RDONLY) + return -EROFS; + for (i = 0; i < num_pages; i++) { struct page *p = extent_buffer_page(eb, i); ret = repair_io_failure(root->fs_info, start, PAGE_CACHE_SIZE, @@ -2057,12 +2061,12 @@ static int clean_io_failure(u64 start, struct page *page) u64 private; u64 private_failure; struct io_failure_record *failrec; - struct btrfs_fs_info *fs_info; + struct inode *inode = page->mapping->host; + struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; struct extent_state *state; int num_copies; int did_repair = 0; int ret; - struct inode *inode = page->mapping->host; private = 0; ret = count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private, @@ -2085,6 +2089,8 @@ static int clean_io_failure(u64 start, struct page *page) did_repair = 1; goto out; } + if (fs_info->sb->s_flags & MS_RDONLY) + goto out; spin_lock(&BTRFS_I(inode)->io_tree.lock); state = find_first_extent_bit_state(&BTRFS_I(inode)->io_tree, @@ -2094,7 +2100,6 @@ static int clean_io_failure(u64 start, struct page *page) if (state && state->start <= failrec->start && state->end >= failrec->start + failrec->len - 1) { - fs_info = BTRFS_I(inode)->root->fs_info; num_copies = btrfs_num_copies(fs_info, failrec->logical, failrec->len); if (num_copies > 1) { diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index da8d2f6..f1a7744 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2129,7 +2129,8 @@ static noinline bool record_extent_backrefs(struct btrfs_path *path, old->extent_offset, fs_info, path, record_one_backref, old); - BUG_ON(ret < 0 && ret != -ENOENT); + if (ret < 0 && ret != -ENOENT) + return false; /* no backref to be processed for this extent */ if (!old->count) { @@ -6186,8 +6187,7 @@ insert: write_unlock(&em_tree->lock); out: - if (em) - trace_btrfs_get_extent(root, em); + trace_btrfs_get_extent(root, em); if (path) btrfs_free_path(path); diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 25a8f38..69582d5 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -638,6 +638,7 @@ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr) WARN_ON(nr < 0); } } + list_splice_tail(&splice, &fs_info->ordered_roots); spin_unlock(&fs_info->ordered_root_lock); } @@ -803,7 +804,7 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) btrfs_put_ordered_extent(ordered); break; } - if (ordered->file_offset + ordered->len < start) { + if (ordered->file_offset + ordered->len <= start) { btrfs_put_ordered_extent(ordered); break; } diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 2544805..561e2f1 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -938,8 +938,10 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) BTRFS_DEV_STAT_CORRUPTION_ERRS); } - if (sctx->readonly && !sctx->is_dev_replace) - goto did_not_correct_error; + if (sctx->readonly) { + ASSERT(!sctx->is_dev_replace); + goto out; + } if (!is_metadata && !have_csum) { struct scrub_fixup_nodatasum *fixup_nodatasum; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 57c16b4..c6a872a 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1480,7 +1480,7 @@ static void do_async_commit(struct work_struct *work) * We've got freeze protection passed with the transaction. * Tell lockdep about it. */ - if (ac->newtrans->type < TRANS_JOIN_NOLOCK) + if (ac->newtrans->type & __TRANS_FREEZABLE) rwsem_acquire_read( &ac->root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1], 0, 1, _THIS_IP_); @@ -1521,7 +1521,7 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans, * Tell lockdep we've released the freeze rwsem, since the * async commit thread will be the one to unlock it. */ - if (trans->type < TRANS_JOIN_NOLOCK) + if (ac->newtrans->type & __TRANS_FREEZABLE) rwsem_release( &root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1], 1, _THIS_IP_); diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 744553c..9f7fc51 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3697,7 +3697,8 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, ret = btrfs_truncate_inode_items(trans, log, inode, 0, 0); } else if (test_and_clear_bit(BTRFS_INODE_COPY_EVERYTHING, - &BTRFS_I(inode)->runtime_flags)) { + &BTRFS_I(inode)->runtime_flags) || + inode_only == LOG_INODE_EXISTS) { if (inode_only == LOG_INODE_ALL) fast_search = true; max_key.type = BTRFS_XATTR_ITEM_KEY; @@ -3801,7 +3802,7 @@ log_extents: err = ret; goto out_unlock; } - } else { + } else if (inode_only == LOG_INODE_ALL) { struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree; struct extent_map *em, *n; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 0db6370..92303f4 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -5394,7 +5394,7 @@ static int bio_size_ok(struct block_device *bdev, struct bio *bio, { struct bio_vec *prev; struct request_queue *q = bdev_get_queue(bdev); - unsigned short max_sectors = queue_max_sectors(q); + unsigned int max_sectors = queue_max_sectors(q); struct bvec_merge_data bvm = { .bi_bdev = bdev, .bi_sector = sector, diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 277bd1b..e081acb 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -56,29 +56,28 @@ static void configfs_d_iput(struct dentry * dentry, struct configfs_dirent *sd = dentry->d_fsdata; if (sd) { - BUG_ON(sd->s_dentry != dentry); /* Coordinate with configfs_readdir */ spin_lock(&configfs_dirent_lock); - sd->s_dentry = NULL; + /* Coordinate with configfs_attach_attr where will increase + * sd->s_count and update sd->s_dentry to new allocated one. + * Only set sd->dentry to null when this dentry is the only + * sd owner. + * If not do so, configfs_d_iput may run just after + * configfs_attach_attr and set sd->s_dentry to null + * even it's still in use. + */ + if (atomic_read(&sd->s_count) <= 2) + sd->s_dentry = NULL; + spin_unlock(&configfs_dirent_lock); configfs_put(sd); } iput(inode); } -/* - * We _must_ delete our dentries on last dput, as the chain-to-parent - * behavior is required to clear the parents of default_groups. - */ -static int configfs_d_delete(const struct dentry *dentry) -{ - return 1; -} - const struct dentry_operations configfs_dentry_ops = { .d_iput = configfs_d_iput, - /* simple_delete_dentry() isn't exported */ - .d_delete = configfs_d_delete, + .d_delete = always_delete_dentry, }; #ifdef CONFIG_LOCKDEP @@ -426,8 +425,11 @@ static int configfs_attach_attr(struct configfs_dirent * sd, struct dentry * den struct configfs_attribute * attr = sd->s_element; int error; + spin_lock(&configfs_dirent_lock); dentry->d_fsdata = configfs_get(sd); sd->s_dentry = dentry; + spin_unlock(&configfs_dirent_lock); + error = configfs_create(dentry, (attr->ca_mode & S_IALLUGO) | S_IFREG, configfs_init_file); if (error) { diff --git a/fs/coredump.c b/fs/coredump.c index 62406b6..bc3fbcd 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -695,7 +695,7 @@ int dump_emit(struct coredump_params *cprm, const void *addr, int nr) while (nr) { if (dump_interrupted()) return 0; - n = vfs_write(file, addr, nr, &pos); + n = __kernel_write(file, addr, nr, &pos); if (n <= 0) return 0; file->f_pos = pos; @@ -733,7 +733,7 @@ int dump_align(struct coredump_params *cprm, int align) { unsigned mod = cprm->written & (align - 1); if (align & (align - 1)) - return -EINVAL; - return mod ? dump_skip(cprm, align - mod) : 0; + return 0; + return mod ? dump_skip(cprm, align - mod) : 1; } EXPORT_SYMBOL(dump_align); diff --git a/fs/dcache.c b/fs/dcache.c index 0a38ef8..4bdb300 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -88,35 +88,6 @@ EXPORT_SYMBOL(rename_lock); static struct kmem_cache *dentry_cache __read_mostly; -/** - * read_seqbegin_or_lock - begin a sequence number check or locking block - * @lock: sequence lock - * @seq : sequence number to be checked - * - * First try it once optimistically without taking the lock. If that fails, - * take the lock. The sequence number is also used as a marker for deciding - * whether to be a reader (even) or writer (odd). - * N.B. seq must be initialized to an even number to begin with. - */ -static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq) -{ - if (!(*seq & 1)) /* Even */ - *seq = read_seqbegin(lock); - else /* Odd */ - read_seqlock_excl(lock); -} - -static inline int need_seqretry(seqlock_t *lock, int seq) -{ - return !(seq & 1) && read_seqretry(lock, seq); -} - -static inline void done_seqretry(seqlock_t *lock, int seq) -{ - if (seq & 1) - read_sequnlock_excl(lock); -} - /* * This is the single most critical data structure when it comes * to the dcache: the hashtable for lookups. Somebody should try @@ -125,8 +96,6 @@ static inline void done_seqretry(seqlock_t *lock, int seq) * This hash-function tries to avoid losing too many bits of hash * information, yet avoid using a prime hash-size or similar. */ -#define D_HASHBITS d_hash_shift -#define D_HASHMASK d_hash_mask static unsigned int d_hash_mask __read_mostly; static unsigned int d_hash_shift __read_mostly; @@ -137,8 +106,8 @@ static inline struct hlist_bl_head *d_hash(const struct dentry *parent, unsigned int hash) { hash += (unsigned long) parent / L1_CACHE_BYTES; - hash = hash + (hash >> D_HASHBITS); - return dentry_hashtable + (hash & D_HASHMASK); + hash = hash + (hash >> d_hash_shift); + return dentry_hashtable + (hash & d_hash_mask); } /* Statistics gathering. */ @@ -469,7 +438,7 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent) { list_del(&dentry->d_u.d_child); /* - * Inform try_to_ascend() that we are no longer attached to the + * Inform d_walk() that we are no longer attached to the * dentry tree */ dentry->d_flags |= DCACHE_DENTRY_KILLED; @@ -1069,34 +1038,6 @@ void shrink_dcache_sb(struct super_block *sb) } EXPORT_SYMBOL(shrink_dcache_sb); -/* - * This tries to ascend one level of parenthood, but - * we can race with renaming, so we need to re-check - * the parenthood after dropping the lock and check - * that the sequence number still matches. - */ -static struct dentry *try_to_ascend(struct dentry *old, unsigned seq) -{ - struct dentry *new = old->d_parent; - - rcu_read_lock(); - spin_unlock(&old->d_lock); - spin_lock(&new->d_lock); - - /* - * might go back up the wrong parent if we have had a rename - * or deletion - */ - if (new != old->d_parent || - (old->d_flags & DCACHE_DENTRY_KILLED) || - need_seqretry(&rename_lock, seq)) { - spin_unlock(&new->d_lock); - new = NULL; - } - rcu_read_unlock(); - return new; -} - /** * enum d_walk_ret - action to talke during tree walk * @D_WALK_CONTINUE: contrinue walk @@ -1185,9 +1126,24 @@ resume: */ if (this_parent != parent) { struct dentry *child = this_parent; - this_parent = try_to_ascend(this_parent, seq); - if (!this_parent) + this_parent = child->d_parent; + + rcu_read_lock(); + spin_unlock(&child->d_lock); + spin_lock(&this_parent->d_lock); + + /* + * might go back up the wrong parent if we have had a rename + * or deletion + */ + if (this_parent != child->d_parent || + (child->d_flags & DCACHE_DENTRY_KILLED) || + need_seqretry(&rename_lock, seq)) { + spin_unlock(&this_parent->d_lock); + rcu_read_unlock(); goto rename_retry; + } + rcu_read_unlock(); next = child->d_u.d_child.next; goto resume; } diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index 2229a74..b1eaa7a 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -313,11 +313,9 @@ static int ecryptfs_fasync(int fd, struct file *file, int flag) static long ecryptfs_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - struct file *lower_file = NULL; + struct file *lower_file = ecryptfs_file_to_lower(file); long rc = -ENOTTY; - if (ecryptfs_file_to_private(file)) - lower_file = ecryptfs_file_to_lower(file); if (lower_file->f_op->unlocked_ioctl) rc = lower_file->f_op->unlocked_ioctl(lower_file, cmd, arg); return rc; @@ -327,11 +325,9 @@ ecryptfs_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg) static long ecryptfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - struct file *lower_file = NULL; + struct file *lower_file = ecryptfs_file_to_lower(file); long rc = -ENOIOCTLCMD; - if (ecryptfs_file_to_private(file)) - lower_file = ecryptfs_file_to_lower(file); if (lower_file->f_op && lower_file->f_op->compat_ioctl) rc = lower_file->f_op->compat_ioctl(lower_file, cmd, arg); return rc; diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c index a8766b8..becc725 100644 --- a/fs/efivarfs/super.c +++ b/fs/efivarfs/super.c @@ -83,19 +83,10 @@ static int efivarfs_d_hash(const struct dentry *dentry, struct qstr *qstr) return 0; } -/* - * Retaining negative dentries for an in-memory filesystem just wastes - * memory and lookup time: arrange for them to be deleted immediately. - */ -static int efivarfs_delete_dentry(const struct dentry *dentry) -{ - return 1; -} - static struct dentry_operations efivarfs_d_ops = { .d_compare = efivarfs_d_compare, .d_hash = efivarfs_d_hash, - .d_delete = efivarfs_delete_dentry, + .d_delete = always_delete_dentry, }; static struct dentry *efivarfs_alloc_dentry(struct dentry *parent, char *name) @@ -1380,10 +1380,6 @@ int search_binary_handler(struct linux_binprm *bprm) if (retval) return retval; - retval = audit_bprm(bprm); - if (retval) - return retval; - retval = -ENOENT; retry: read_lock(&binfmt_lock); @@ -1431,6 +1427,7 @@ static int exec_binprm(struct linux_binprm *bprm) ret = search_binary_handler(bprm); if (ret >= 0) { + audit_bprm(bprm); trace_sched_process_exec(current, old_pid, bprm); ptrace_event(PTRACE_EVENT_EXEC, old_vpid); current->did_exec = 1; diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index e66a800..c8420f7 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1899,7 +1899,8 @@ static int gfs2_glock_iter_next(struct gfs2_glock_iter *gi) gi->nhash = 0; } /* Skip entries for other sb and dead entries */ - } while (gi->sdp != gi->gl->gl_sbd || __lockref_is_dead(&gl->gl_lockref)); + } while (gi->sdp != gi->gl->gl_sbd || + __lockref_is_dead(&gi->gl->gl_lockref)); return 0; } diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 1615df1..7119504 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -1171,8 +1171,11 @@ static int gfs2_atomic_open(struct inode *dir, struct dentry *dentry, if (d != NULL) dentry = d; if (dentry->d_inode) { - if (!(*opened & FILE_OPENED)) + if (!(*opened & FILE_OPENED)) { + if (d == NULL) + dget(dentry); return finish_no_open(file, dentry); + } dput(d); return 0; } diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index c8423d6..2a6ba06 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -466,19 +466,19 @@ static void gdlm_cancel(struct gfs2_glock *gl) static void control_lvb_read(struct lm_lockstruct *ls, uint32_t *lvb_gen, char *lvb_bits) { - uint32_t gen; + __le32 gen; memcpy(lvb_bits, ls->ls_control_lvb, GDLM_LVB_SIZE); - memcpy(&gen, lvb_bits, sizeof(uint32_t)); + memcpy(&gen, lvb_bits, sizeof(__le32)); *lvb_gen = le32_to_cpu(gen); } static void control_lvb_write(struct lm_lockstruct *ls, uint32_t lvb_gen, char *lvb_bits) { - uint32_t gen; + __le32 gen; memcpy(ls->ls_control_lvb, lvb_bits, GDLM_LVB_SIZE); gen = cpu_to_le32(lvb_gen); - memcpy(ls->ls_control_lvb, &gen, sizeof(uint32_t)); + memcpy(ls->ls_control_lvb, &gen, sizeof(__le32)); } static int all_jid_bits_clear(char *lvb) diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 453b50e..98236d0 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -667,7 +667,7 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc, struct buffer_head *bh; struct page *page; void *kaddr, *ptr; - struct gfs2_quota q, *qp; + struct gfs2_quota q; int err, nbytes; u64 size; @@ -683,28 +683,25 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc, return err; err = -EIO; - qp = &q; - qp->qu_value = be64_to_cpu(qp->qu_value); - qp->qu_value += change; - qp->qu_value = cpu_to_be64(qp->qu_value); - qd->qd_qb.qb_value = qp->qu_value; + be64_add_cpu(&q.qu_value, change); + qd->qd_qb.qb_value = q.qu_value; if (fdq) { if (fdq->d_fieldmask & FS_DQ_BSOFT) { - qp->qu_warn = cpu_to_be64(fdq->d_blk_softlimit >> sdp->sd_fsb2bb_shift); - qd->qd_qb.qb_warn = qp->qu_warn; + q.qu_warn = cpu_to_be64(fdq->d_blk_softlimit >> sdp->sd_fsb2bb_shift); + qd->qd_qb.qb_warn = q.qu_warn; } if (fdq->d_fieldmask & FS_DQ_BHARD) { - qp->qu_limit = cpu_to_be64(fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift); - qd->qd_qb.qb_limit = qp->qu_limit; + q.qu_limit = cpu_to_be64(fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift); + qd->qd_qb.qb_limit = q.qu_limit; } if (fdq->d_fieldmask & FS_DQ_BCOUNT) { - qp->qu_value = cpu_to_be64(fdq->d_bcount >> sdp->sd_fsb2bb_shift); - qd->qd_qb.qb_value = qp->qu_value; + q.qu_value = cpu_to_be64(fdq->d_bcount >> sdp->sd_fsb2bb_shift); + qd->qd_qb.qb_value = q.qu_value; } } /* Write the quota into the quota file on disk */ - ptr = qp; + ptr = &q; nbytes = sizeof(struct gfs2_quota); get_a_page: page = find_or_create_page(mapping, index, GFP_NOFS); diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 4d83abd..c8d6161 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1127,7 +1127,7 @@ int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd) rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK); rgd->rd_free_clone = rgd->rd_free; } - if (be32_to_cpu(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) { + if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) { rgd->rd_rgl->rl_unlinked = cpu_to_be32(count_unlinked(rgd)); gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); @@ -1161,7 +1161,7 @@ int update_rgrp_lvb(struct gfs2_rgrpd *rgd) if (rgd->rd_flags & GFS2_RDF_UPTODATE) return 0; - if (be32_to_cpu(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) + if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) return gfs2_rgrp_bh_get(rgd); rl_flags = be32_to_cpu(rgd->rd_rgl->rl_flags); diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 2543728..db23ce1 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -33,15 +33,6 @@ static inline struct hostfs_inode_info *HOSTFS_I(struct inode *inode) #define FILE_HOSTFS_I(file) HOSTFS_I(file_inode(file)) -static int hostfs_d_delete(const struct dentry *dentry) -{ - return 1; -} - -static const struct dentry_operations hostfs_dentry_ops = { - .d_delete = hostfs_d_delete, -}; - /* Changed in hostfs_args before the kernel starts running */ static char *root_ino = ""; static int append = 0; @@ -925,7 +916,7 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent) sb->s_blocksize_bits = 10; sb->s_magic = HOSTFS_SUPER_MAGIC; sb->s_op = &hostfs_sbops; - sb->s_d_op = &hostfs_dentry_ops; + sb->s_d_op = &simple_dentry_operations; sb->s_maxbytes = MAX_LFS_FILESIZE; /* NULL is printed as <NULL> by sprintf: avoid that. */ @@ -47,10 +47,16 @@ EXPORT_SYMBOL(simple_statfs); * Retaining negative dentries for an in-memory filesystem just wastes * memory and lookup time: arrange for them to be deleted immediately. */ -static int simple_delete_dentry(const struct dentry *dentry) +int always_delete_dentry(const struct dentry *dentry) { return 1; } +EXPORT_SYMBOL(always_delete_dentry); + +const struct dentry_operations simple_dentry_operations = { + .d_delete = always_delete_dentry, +}; +EXPORT_SYMBOL(simple_dentry_operations); /* * Lookup the data. This is trivial - if the dentry didn't already @@ -58,10 +64,6 @@ static int simple_delete_dentry(const struct dentry *dentry) */ struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { - static const struct dentry_operations simple_dentry_operations = { - .d_delete = simple_delete_dentry, - }; - if (dentry->d_name.len > NAME_MAX) return ERR_PTR(-ENAMETOOLONG); if (!dentry->d_sb->s_d_op) @@ -2435,6 +2435,7 @@ static int may_delete(struct inode *dir, struct dentry *victim, bool isdir) */ static inline int may_create(struct inode *dir, struct dentry *child) { + audit_inode_child(dir, child, AUDIT_TYPE_CHILD_CREATE); if (child->d_inode) return -EEXIST; if (IS_DEADDIR(dir)) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 088de13..ee7237f 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -141,8 +141,8 @@ xdr_error: \ static void next_decode_page(struct nfsd4_compoundargs *argp) { - argp->pagelist++; argp->p = page_address(argp->pagelist[0]); + argp->pagelist++; if (argp->pagelen < PAGE_SIZE) { argp->end = argp->p + (argp->pagelen>>2); argp->pagelen = 0; @@ -1229,6 +1229,7 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write) len -= pages * PAGE_SIZE; argp->p = (__be32 *)page_address(argp->pagelist[0]); + argp->pagelist++; argp->end = argp->p + XDR_QUADLEN(PAGE_SIZE); } argp->p += XDR_QUADLEN(len); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 94b5f5d..7eea63c 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -298,41 +298,12 @@ commit_metadata(struct svc_fh *fhp) } /* - * Set various file attributes. - * N.B. After this call fhp needs an fh_put + * Go over the attributes and take care of the small differences between + * NFS semantics and what Linux expects. */ -__be32 -nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, - int check_guard, time_t guardtime) +static void +nfsd_sanitize_attrs(struct inode *inode, struct iattr *iap) { - struct dentry *dentry; - struct inode *inode; - int accmode = NFSD_MAY_SATTR; - umode_t ftype = 0; - __be32 err; - int host_err; - int size_change = 0; - - if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE)) - accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE; - if (iap->ia_valid & ATTR_SIZE) - ftype = S_IFREG; - - /* Get inode */ - err = fh_verify(rqstp, fhp, ftype, accmode); - if (err) - goto out; - - dentry = fhp->fh_dentry; - inode = dentry->d_inode; - - /* Ignore any mode updates on symlinks */ - if (S_ISLNK(inode->i_mode)) - iap->ia_valid &= ~ATTR_MODE; - - if (!iap->ia_valid) - goto out; - /* * NFSv2 does not differentiate between "set-[ac]time-to-now" * which only requires access, and "set-[ac]time-to-X" which @@ -342,8 +313,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, * convert to "set to now" instead of "set to explicit time" * * We only call inode_change_ok as the last test as technically - * it is not an interface that we should be using. It is only - * valid if the filesystem does not define it's own i_op->setattr. + * it is not an interface that we should be using. */ #define BOTH_TIME_SET (ATTR_ATIME_SET | ATTR_MTIME_SET) #define MAX_TOUCH_TIME_ERROR (30*60) @@ -369,30 +339,6 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, iap->ia_valid &= ~BOTH_TIME_SET; } } - - /* - * The size case is special. - * It changes the file as well as the attributes. - */ - if (iap->ia_valid & ATTR_SIZE) { - if (iap->ia_size < inode->i_size) { - err = nfsd_permission(rqstp, fhp->fh_export, dentry, - NFSD_MAY_TRUNC|NFSD_MAY_OWNER_OVERRIDE); - if (err) - goto out; - } - - host_err = get_write_access(inode); - if (host_err) - goto out_nfserr; - - size_change = 1; - host_err = locks_verify_truncate(inode, NULL, iap->ia_size); - if (host_err) { - put_write_access(inode); - goto out_nfserr; - } - } /* sanitize the mode change */ if (iap->ia_valid & ATTR_MODE) { @@ -415,32 +361,111 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, iap->ia_valid |= (ATTR_KILL_SUID | ATTR_KILL_SGID); } } +} - /* Change the attributes. */ +static __be32 +nfsd_get_write_access(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct iattr *iap) +{ + struct inode *inode = fhp->fh_dentry->d_inode; + int host_err; - iap->ia_valid |= ATTR_CTIME; + if (iap->ia_size < inode->i_size) { + __be32 err; - err = nfserr_notsync; - if (!check_guard || guardtime == inode->i_ctime.tv_sec) { - host_err = nfsd_break_lease(inode); - if (host_err) - goto out_nfserr; - fh_lock(fhp); + err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry, + NFSD_MAY_TRUNC | NFSD_MAY_OWNER_OVERRIDE); + if (err) + return err; + } - host_err = notify_change(dentry, iap, NULL); - err = nfserrno(host_err); - fh_unlock(fhp); + host_err = get_write_access(inode); + if (host_err) + goto out_nfserrno; + + host_err = locks_verify_truncate(inode, NULL, iap->ia_size); + if (host_err) + goto out_put_write_access; + return 0; + +out_put_write_access: + put_write_access(inode); +out_nfserrno: + return nfserrno(host_err); +} + +/* + * Set various file attributes. After this call fhp needs an fh_put. + */ +__be32 +nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, + int check_guard, time_t guardtime) +{ + struct dentry *dentry; + struct inode *inode; + int accmode = NFSD_MAY_SATTR; + umode_t ftype = 0; + __be32 err; + int host_err; + int size_change = 0; + + if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE)) + accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE; + if (iap->ia_valid & ATTR_SIZE) + ftype = S_IFREG; + + /* Get inode */ + err = fh_verify(rqstp, fhp, ftype, accmode); + if (err) + goto out; + + dentry = fhp->fh_dentry; + inode = dentry->d_inode; + + /* Ignore any mode updates on symlinks */ + if (S_ISLNK(inode->i_mode)) + iap->ia_valid &= ~ATTR_MODE; + + if (!iap->ia_valid) + goto out; + + nfsd_sanitize_attrs(inode, iap); + + /* + * The size case is special, it changes the file in addition to the + * attributes. + */ + if (iap->ia_valid & ATTR_SIZE) { + err = nfsd_get_write_access(rqstp, fhp, iap); + if (err) + goto out; + size_change = 1; } + + iap->ia_valid |= ATTR_CTIME; + + if (check_guard && guardtime != inode->i_ctime.tv_sec) { + err = nfserr_notsync; + goto out_put_write_access; + } + + host_err = nfsd_break_lease(inode); + if (host_err) + goto out_put_write_access_nfserror; + + fh_lock(fhp); + host_err = notify_change(dentry, iap, NULL); + fh_unlock(fhp); + +out_put_write_access_nfserror: + err = nfserrno(host_err); +out_put_write_access: if (size_change) put_write_access(inode); if (!err) commit_metadata(fhp); out: return err; - -out_nfserr: - err = nfserrno(host_err); - goto out; } #if defined(CONFIG_NFSD_V2_ACL) || \ diff --git a/fs/proc/base.c b/fs/proc/base.c index 1485e38..03c8d74 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1151,10 +1151,16 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf, goto out_free_page; } - kloginuid = make_kuid(file->f_cred->user_ns, loginuid); - if (!uid_valid(kloginuid)) { - length = -EINVAL; - goto out_free_page; + + /* is userspace tring to explicitly UNSET the loginuid? */ + if (loginuid == AUDIT_UID_UNSET) { + kloginuid = INVALID_UID; + } else { + kloginuid = make_kuid(file->f_cred->user_ns, loginuid); + if (!uid_valid(kloginuid)) { + length = -EINVAL; + goto out_free_page; + } } length = audit_set_loginuid(kloginuid); diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 737e156..cca93b6 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -175,22 +175,6 @@ static const struct inode_operations proc_link_inode_operations = { }; /* - * As some entries in /proc are volatile, we want to - * get rid of unused dentries. This could be made - * smarter: we could keep a "volatile" flag in the - * inode to indicate which ones to keep. - */ -static int proc_delete_dentry(const struct dentry * dentry) -{ - return 1; -} - -static const struct dentry_operations proc_dentry_operations = -{ - .d_delete = proc_delete_dentry, -}; - -/* * Don't create negative dentries here, return -ENOENT by hand * instead. */ @@ -209,7 +193,7 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir, inode = proc_get_inode(dir->i_sb, de); if (!inode) return ERR_PTR(-ENOMEM); - d_set_d_op(dentry, &proc_dentry_operations); + d_set_d_op(dentry, &simple_dentry_operations); d_add(dentry, inode); return NULL; } diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c index 49a7fff..9ae46b8 100644 --- a/fs/proc/namespaces.c +++ b/fs/proc/namespaces.c @@ -42,12 +42,6 @@ static const struct inode_operations ns_inode_operations = { .setattr = proc_setattr, }; -static int ns_delete_dentry(const struct dentry *dentry) -{ - /* Don't cache namespace inodes when not in use */ - return 1; -} - static char *ns_dname(struct dentry *dentry, char *buffer, int buflen) { struct inode *inode = dentry->d_inode; @@ -59,7 +53,7 @@ static char *ns_dname(struct dentry *dentry, char *buffer, int buflen) const struct dentry_operations ns_dentry_operations = { - .d_delete = ns_delete_dentry, + .d_delete = always_delete_dentry, .d_dname = ns_dname, }; diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig index c70111e..b6fa865 100644 --- a/fs/squashfs/Kconfig +++ b/fs/squashfs/Kconfig @@ -25,6 +25,78 @@ config SQUASHFS If unsure, say N. +choice + prompt "File decompression options" + depends on SQUASHFS + help + Squashfs now supports two options for decompressing file + data. Traditionally Squashfs has decompressed into an + intermediate buffer and then memcopied it into the page cache. + Squashfs now supports the ability to decompress directly into + the page cache. + + If unsure, select "Decompress file data into an intermediate buffer" + +config SQUASHFS_FILE_CACHE + bool "Decompress file data into an intermediate buffer" + help + Decompress file data into an intermediate buffer and then + memcopy it into the page cache. + +config SQUASHFS_FILE_DIRECT + bool "Decompress files directly into the page cache" + help + Directly decompress file data into the page cache. + Doing so can significantly improve performance because + it eliminates a memcpy and it also removes the lock contention + on the single buffer. + +endchoice + +choice + prompt "Decompressor parallelisation options" + depends on SQUASHFS + help + Squashfs now supports three parallelisation options for + decompression. Each one exhibits various trade-offs between + decompression performance and CPU and memory usage. + + If in doubt, select "Single threaded compression" + +config SQUASHFS_DECOMP_SINGLE + bool "Single threaded compression" + help + Traditionally Squashfs has used single-threaded decompression. + Only one block (data or metadata) can be decompressed at any + one time. This limits CPU and memory usage to a minimum. + +config SQUASHFS_DECOMP_MULTI + bool "Use multiple decompressors for parallel I/O" + help + By default Squashfs uses a single decompressor but it gives + poor performance on parallel I/O workloads when using multiple CPU + machines due to waiting on decompressor availability. + + If you have a parallel I/O workload and your system has enough memory, + using this option may improve overall I/O performance. + + This decompressor implementation uses up to two parallel + decompressors per core. It dynamically allocates decompressors + on a demand basis. + +config SQUASHFS_DECOMP_MULTI_PERCPU + bool "Use percpu multiple decompressors for parallel I/O" + help + By default Squashfs uses a single decompressor but it gives + poor performance on parallel I/O workloads when using multiple CPU + machines due to waiting on decompressor availability. + + This decompressor implementation uses a maximum of one + decompressor per core. It uses percpu variables to ensure + decompression is load-balanced across the cores. + +endchoice + config SQUASHFS_XATTR bool "Squashfs XATTR support" depends on SQUASHFS diff --git a/fs/squashfs/Makefile b/fs/squashfs/Makefile index 110b047..4132520 100644 --- a/fs/squashfs/Makefile +++ b/fs/squashfs/Makefile @@ -5,6 +5,11 @@ obj-$(CONFIG_SQUASHFS) += squashfs.o squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o squashfs-y += namei.o super.o symlink.o decompressor.o +squashfs-$(CONFIG_SQUASHFS_FILE_CACHE) += file_cache.o +squashfs-$(CONFIG_SQUASHFS_FILE_DIRECT) += file_direct.o page_actor.o +squashfs-$(CONFIG_SQUASHFS_DECOMP_SINGLE) += decompressor_single.o +squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI) += decompressor_multi.o +squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU) += decompressor_multi_percpu.o squashfs-$(CONFIG_SQUASHFS_XATTR) += xattr.o xattr_id.o squashfs-$(CONFIG_SQUASHFS_LZO) += lzo_wrapper.o squashfs-$(CONFIG_SQUASHFS_XZ) += xz_wrapper.o diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c index 41d108e..0cea9b9 100644 --- a/fs/squashfs/block.c +++ b/fs/squashfs/block.c @@ -36,6 +36,7 @@ #include "squashfs_fs_sb.h" #include "squashfs.h" #include "decompressor.h" +#include "page_actor.h" /* * Read the metadata block length, this is stored in the first two @@ -86,16 +87,16 @@ static struct buffer_head *get_block_length(struct super_block *sb, * generated a larger block - this does occasionally happen with compression * algorithms). */ -int squashfs_read_data(struct super_block *sb, void **buffer, u64 index, - int length, u64 *next_index, int srclength, int pages) +int squashfs_read_data(struct super_block *sb, u64 index, int length, + u64 *next_index, struct squashfs_page_actor *output) { struct squashfs_sb_info *msblk = sb->s_fs_info; struct buffer_head **bh; int offset = index & ((1 << msblk->devblksize_log2) - 1); u64 cur_index = index >> msblk->devblksize_log2; - int bytes, compressed, b = 0, k = 0, page = 0, avail; + int bytes, compressed, b = 0, k = 0, avail, i; - bh = kcalloc(((srclength + msblk->devblksize - 1) + bh = kcalloc(((output->length + msblk->devblksize - 1) >> msblk->devblksize_log2) + 1, sizeof(*bh), GFP_KERNEL); if (bh == NULL) return -ENOMEM; @@ -111,9 +112,9 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index, *next_index = index + length; TRACE("Block @ 0x%llx, %scompressed size %d, src size %d\n", - index, compressed ? "" : "un", length, srclength); + index, compressed ? "" : "un", length, output->length); - if (length < 0 || length > srclength || + if (length < 0 || length > output->length || (index + length) > msblk->bytes_used) goto read_failure; @@ -145,7 +146,7 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index, TRACE("Block @ 0x%llx, %scompressed size %d\n", index, compressed ? "" : "un", length); - if (length < 0 || length > srclength || + if (length < 0 || length > output->length || (index + length) > msblk->bytes_used) goto block_release; @@ -158,9 +159,15 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index, ll_rw_block(READ, b - 1, bh + 1); } + for (i = 0; i < b; i++) { + wait_on_buffer(bh[i]); + if (!buffer_uptodate(bh[i])) + goto block_release; + } + if (compressed) { - length = squashfs_decompress(msblk, buffer, bh, b, offset, - length, srclength, pages); + length = squashfs_decompress(msblk, bh, b, offset, length, + output); if (length < 0) goto read_failure; } else { @@ -168,22 +175,20 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index, * Block is uncompressed. */ int in, pg_offset = 0; + void *data = squashfs_first_page(output); for (bytes = length; k < b; k++) { in = min(bytes, msblk->devblksize - offset); bytes -= in; - wait_on_buffer(bh[k]); - if (!buffer_uptodate(bh[k])) - goto block_release; while (in) { if (pg_offset == PAGE_CACHE_SIZE) { - page++; + data = squashfs_next_page(output); pg_offset = 0; } avail = min_t(int, in, PAGE_CACHE_SIZE - pg_offset); - memcpy(buffer[page] + pg_offset, - bh[k]->b_data + offset, avail); + memcpy(data + pg_offset, bh[k]->b_data + offset, + avail); in -= avail; pg_offset += avail; offset += avail; @@ -191,6 +196,7 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index, offset = 0; put_bh(bh[k]); } + squashfs_finish_page(output); } kfree(bh); diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c index af0b738..1cb70a0 100644 --- a/fs/squashfs/cache.c +++ b/fs/squashfs/cache.c @@ -56,6 +56,7 @@ #include "squashfs_fs.h" #include "squashfs_fs_sb.h" #include "squashfs.h" +#include "page_actor.h" /* * Look-up block in cache, and increment usage count. If not in cache, read @@ -119,9 +120,8 @@ struct squashfs_cache_entry *squashfs_cache_get(struct super_block *sb, entry->error = 0; spin_unlock(&cache->lock); - entry->length = squashfs_read_data(sb, entry->data, - block, length, &entry->next_index, - cache->block_size, cache->pages); + entry->length = squashfs_read_data(sb, block, length, + &entry->next_index, entry->actor); spin_lock(&cache->lock); @@ -220,6 +220,7 @@ void squashfs_cache_delete(struct squashfs_cache *cache) kfree(cache->entry[i].data[j]); kfree(cache->entry[i].data); } + kfree(cache->entry[i].actor); } kfree(cache->entry); @@ -280,6 +281,13 @@ struct squashfs_cache *squashfs_cache_init(char *name, int entries, goto cleanup; } } + + entry->actor = squashfs_page_actor_init(entry->data, + cache->pages, 0); + if (entry->actor == NULL) { + ERROR("Failed to allocate %s cache entry\n", name); + goto cleanup; + } } return cache; @@ -410,6 +418,7 @@ void *squashfs_read_table(struct super_block *sb, u64 block, int length) int pages = (length + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; int i, res; void *table, *buffer, **data; + struct squashfs_page_actor *actor; table = buffer = kmalloc(length, GFP_KERNEL); if (table == NULL) @@ -421,19 +430,28 @@ void *squashfs_read_table(struct super_block *sb, u64 block, int length) goto failed; } + actor = squashfs_page_actor_init(data, pages, length); + if (actor == NULL) { + res = -ENOMEM; + goto failed2; + } + for (i = 0; i < pages; i++, buffer += PAGE_CACHE_SIZE) data[i] = buffer; - res = squashfs_read_data(sb, data, block, length | - SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, length, pages); + res = squashfs_read_data(sb, block, length | + SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, actor); kfree(data); + kfree(actor); if (res < 0) goto failed; return table; +failed2: + kfree(data); failed: kfree(table); return ERR_PTR(res); diff --git a/fs/squashfs/decompressor.c b/fs/squashfs/decompressor.c index 3f6271d..ac22fe7 100644 --- a/fs/squashfs/decompressor.c +++ b/fs/squashfs/decompressor.c @@ -30,6 +30,7 @@ #include "squashfs_fs_sb.h" #include "decompressor.h" #include "squashfs.h" +#include "page_actor.h" /* * This file (and decompressor.h) implements a decompressor framework for @@ -37,29 +38,29 @@ */ static const struct squashfs_decompressor squashfs_lzma_unsupported_comp_ops = { - NULL, NULL, NULL, LZMA_COMPRESSION, "lzma", 0 + NULL, NULL, NULL, NULL, LZMA_COMPRESSION, "lzma", 0 }; #ifndef CONFIG_SQUASHFS_LZO static const struct squashfs_decompressor squashfs_lzo_comp_ops = { - NULL, NULL, NULL, LZO_COMPRESSION, "lzo", 0 + NULL, NULL, NULL, NULL, LZO_COMPRESSION, "lzo", 0 }; #endif #ifndef CONFIG_SQUASHFS_XZ static const struct squashfs_decompressor squashfs_xz_comp_ops = { - NULL, NULL, NULL, XZ_COMPRESSION, "xz", 0 + NULL, NULL, NULL, NULL, XZ_COMPRESSION, "xz", 0 }; #endif #ifndef CONFIG_SQUASHFS_ZLIB static const struct squashfs_decompressor squashfs_zlib_comp_ops = { - NULL, NULL, NULL, ZLIB_COMPRESSION, "zlib", 0 + NULL, NULL, NULL, NULL, ZLIB_COMPRESSION, "zlib", 0 }; #endif static const struct squashfs_decompressor squashfs_unknown_comp_ops = { - NULL, NULL, NULL, 0, "unknown", 0 + NULL, NULL, NULL, NULL, 0, "unknown", 0 }; static const struct squashfs_decompressor *decompressor[] = { @@ -83,10 +84,11 @@ const struct squashfs_decompressor *squashfs_lookup_decompressor(int id) } -void *squashfs_decompressor_init(struct super_block *sb, unsigned short flags) +static void *get_comp_opts(struct super_block *sb, unsigned short flags) { struct squashfs_sb_info *msblk = sb->s_fs_info; - void *strm, *buffer = NULL; + void *buffer = NULL, *comp_opts; + struct squashfs_page_actor *actor = NULL; int length = 0; /* @@ -94,23 +96,46 @@ void *squashfs_decompressor_init(struct super_block *sb, unsigned short flags) */ if (SQUASHFS_COMP_OPTS(flags)) { buffer = kmalloc(PAGE_CACHE_SIZE, GFP_KERNEL); - if (buffer == NULL) - return ERR_PTR(-ENOMEM); + if (buffer == NULL) { + comp_opts = ERR_PTR(-ENOMEM); + goto out; + } + + actor = squashfs_page_actor_init(&buffer, 1, 0); + if (actor == NULL) { + comp_opts = ERR_PTR(-ENOMEM); + goto out; + } - length = squashfs_read_data(sb, &buffer, - sizeof(struct squashfs_super_block), 0, NULL, - PAGE_CACHE_SIZE, 1); + length = squashfs_read_data(sb, + sizeof(struct squashfs_super_block), 0, NULL, actor); if (length < 0) { - strm = ERR_PTR(length); - goto finished; + comp_opts = ERR_PTR(length); + goto out; } } - strm = msblk->decompressor->init(msblk, buffer, length); + comp_opts = squashfs_comp_opts(msblk, buffer, length); -finished: +out: + kfree(actor); kfree(buffer); + return comp_opts; +} + + +void *squashfs_decompressor_setup(struct super_block *sb, unsigned short flags) +{ + struct squashfs_sb_info *msblk = sb->s_fs_info; + void *stream, *comp_opts = get_comp_opts(sb, flags); + + if (IS_ERR(comp_opts)) + return comp_opts; + + stream = squashfs_decompressor_create(msblk, comp_opts); + if (IS_ERR(stream)) + kfree(comp_opts); - return strm; + return stream; } diff --git a/fs/squashfs/decompressor.h b/fs/squashfs/decompressor.h index 330073e..af09853 100644 --- a/fs/squashfs/decompressor.h +++ b/fs/squashfs/decompressor.h @@ -24,28 +24,22 @@ */ struct squashfs_decompressor { - void *(*init)(struct squashfs_sb_info *, void *, int); + void *(*init)(struct squashfs_sb_info *, void *); + void *(*comp_opts)(struct squashfs_sb_info *, void *, int); void (*free)(void *); - int (*decompress)(struct squashfs_sb_info *, void **, - struct buffer_head **, int, int, int, int, int); + int (*decompress)(struct squashfs_sb_info *, void *, + struct buffer_head **, int, int, int, + struct squashfs_page_actor *); int id; char *name; int supported; }; -static inline void squashfs_decompressor_free(struct squashfs_sb_info *msblk, - void *s) +static inline void *squashfs_comp_opts(struct squashfs_sb_info *msblk, + void *buff, int length) { - if (msblk->decompressor) - msblk->decompressor->free(s); -} - -static inline int squashfs_decompress(struct squashfs_sb_info *msblk, - void **buffer, struct buffer_head **bh, int b, int offset, int length, - int srclength, int pages) -{ - return msblk->decompressor->decompress(msblk, buffer, bh, b, offset, - length, srclength, pages); + return msblk->decompressor->comp_opts ? + msblk->decompressor->comp_opts(msblk, buff, length) : NULL; } #ifdef CONFIG_SQUASHFS_XZ diff --git a/fs/squashfs/decompressor_multi.c b/fs/squashfs/decompressor_multi.c new file mode 100644 index 0000000..d6008a6 --- /dev/null +++ b/fs/squashfs/decompressor_multi.c @@ -0,0 +1,198 @@ +/* + * Copyright (c) 2013 + * Minchan Kim <minchan@kernel.org> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ +#include <linux/types.h> +#include <linux/mutex.h> +#include <linux/slab.h> +#include <linux/buffer_head.h> +#include <linux/sched.h> +#include <linux/wait.h> +#include <linux/cpumask.h> + +#include "squashfs_fs.h" +#include "squashfs_fs_sb.h" +#include "decompressor.h" +#include "squashfs.h" + +/* + * This file implements multi-threaded decompression in the + * decompressor framework + */ + + +/* + * The reason that multiply two is that a CPU can request new I/O + * while it is waiting previous request. + */ +#define MAX_DECOMPRESSOR (num_online_cpus() * 2) + + +int squashfs_max_decompressors(void) +{ + return MAX_DECOMPRESSOR; +} + + +struct squashfs_stream { + void *comp_opts; + struct list_head strm_list; + struct mutex mutex; + int avail_decomp; + wait_queue_head_t wait; +}; + + +struct decomp_stream { + void *stream; + struct list_head list; +}; + + +static void put_decomp_stream(struct decomp_stream *decomp_strm, + struct squashfs_stream *stream) +{ + mutex_lock(&stream->mutex); + list_add(&decomp_strm->list, &stream->strm_list); + mutex_unlock(&stream->mutex); + wake_up(&stream->wait); +} + +void *squashfs_decompressor_create(struct squashfs_sb_info *msblk, + void *comp_opts) +{ + struct squashfs_stream *stream; + struct decomp_stream *decomp_strm = NULL; + int err = -ENOMEM; + + stream = kzalloc(sizeof(*stream), GFP_KERNEL); + if (!stream) + goto out; + + stream->comp_opts = comp_opts; + mutex_init(&stream->mutex); + INIT_LIST_HEAD(&stream->strm_list); + init_waitqueue_head(&stream->wait); + + /* + * We should have a decompressor at least as default + * so if we fail to allocate new decompressor dynamically, + * we could always fall back to default decompressor and + * file system works. + */ + decomp_strm = kmalloc(sizeof(*decomp_strm), GFP_KERNEL); + if (!decomp_strm) + goto out; + + decomp_strm->stream = msblk->decompressor->init(msblk, + stream->comp_opts); + if (IS_ERR(decomp_strm->stream)) { + err = PTR_ERR(decomp_strm->stream); + goto out; + } + + list_add(&decomp_strm->list, &stream->strm_list); + stream->avail_decomp = 1; + return stream; + +out: + kfree(decomp_strm); + kfree(stream); + return ERR_PTR(err); +} + + +void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk) +{ + struct squashfs_stream *stream = msblk->stream; + if (stream) { + struct decomp_stream *decomp_strm; + + while (!list_empty(&stream->strm_list)) { + decomp_strm = list_entry(stream->strm_list.prev, + struct decomp_stream, list); + list_del(&decomp_strm->list); + msblk->decompressor->free(decomp_strm->stream); + kfree(decomp_strm); + stream->avail_decomp--; + } + WARN_ON(stream->avail_decomp); + kfree(stream->comp_opts); + kfree(stream); + } +} + + +static struct decomp_stream *get_decomp_stream(struct squashfs_sb_info *msblk, + struct squashfs_stream *stream) +{ + struct decomp_stream *decomp_strm; + + while (1) { + mutex_lock(&stream->mutex); + + /* There is available decomp_stream */ + if (!list_empty(&stream->strm_list)) { + decomp_strm = list_entry(stream->strm_list.prev, + struct decomp_stream, list); + list_del(&decomp_strm->list); + mutex_unlock(&stream->mutex); + break; + } + + /* + * If there is no available decomp and already full, + * let's wait for releasing decomp from other users. + */ + if (stream->avail_decomp >= MAX_DECOMPRESSOR) + goto wait; + + /* Let's allocate new decomp */ + decomp_strm = kmalloc(sizeof(*decomp_strm), GFP_KERNEL); + if (!decomp_strm) + goto wait; + + decomp_strm->stream = msblk->decompressor->init(msblk, + stream->comp_opts); + if (IS_ERR(decomp_strm->stream)) { + kfree(decomp_strm); + goto wait; + } + + stream->avail_decomp++; + WARN_ON(stream->avail_decomp > MAX_DECOMPRESSOR); + + mutex_unlock(&stream->mutex); + break; +wait: + /* + * If system memory is tough, let's for other's + * releasing instead of hurting VM because it could + * make page cache thrashing. + */ + mutex_unlock(&stream->mutex); + wait_event(stream->wait, + !list_empty(&stream->strm_list)); + } + + return decomp_strm; +} + + +int squashfs_decompress(struct squashfs_sb_info *msblk, struct buffer_head **bh, + int b, int offset, int length, struct squashfs_page_actor *output) +{ + int res; + struct squashfs_stream *stream = msblk->stream; + struct decomp_stream *decomp_stream = get_decomp_stream(msblk, stream); + res = msblk->decompressor->decompress(msblk, decomp_stream->stream, + bh, b, offset, length, output); + put_decomp_stream(decomp_stream, stream); + if (res < 0) + ERROR("%s decompression failed, data probably corrupt\n", + msblk->decompressor->name); + return res; +} diff --git a/fs/squashfs/decompressor_multi_percpu.c b/fs/squashfs/decompressor_multi_percpu.c new file mode 100644 index 0000000..23a9c28 --- /dev/null +++ b/fs/squashfs/decompressor_multi_percpu.c @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2013 + * Phillip Lougher <phillip@squashfs.org.uk> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#include <linux/types.h> +#include <linux/slab.h> +#include <linux/percpu.h> +#include <linux/buffer_head.h> + +#include "squashfs_fs.h" +#include "squashfs_fs_sb.h" +#include "decompressor.h" +#include "squashfs.h" + +/* + * This file implements multi-threaded decompression using percpu + * variables, one thread per cpu core. + */ + +struct squashfs_stream { + void *stream; +}; + +void *squashfs_decompressor_create(struct squashfs_sb_info *msblk, + void *comp_opts) +{ + struct squashfs_stream *stream; + struct squashfs_stream __percpu *percpu; + int err, cpu; + + percpu = alloc_percpu(struct squashfs_stream); + if (percpu == NULL) + return ERR_PTR(-ENOMEM); + + for_each_possible_cpu(cpu) { + stream = per_cpu_ptr(percpu, cpu); + stream->stream = msblk->decompressor->init(msblk, comp_opts); + if (IS_ERR(stream->stream)) { + err = PTR_ERR(stream->stream); + goto out; + } + } + + kfree(comp_opts); + return (__force void *) percpu; + +out: + for_each_possible_cpu(cpu) { + stream = per_cpu_ptr(percpu, cpu); + if (!IS_ERR_OR_NULL(stream->stream)) + msblk->decompressor->free(stream->stream); + } + free_percpu(percpu); + return ERR_PTR(err); +} + +void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk) +{ + struct squashfs_stream __percpu *percpu = + (struct squashfs_stream __percpu *) msblk->stream; + struct squashfs_stream *stream; + int cpu; + + if (msblk->stream) { + for_each_possible_cpu(cpu) { + stream = per_cpu_ptr(percpu, cpu); + msblk->decompressor->free(stream->stream); + } + free_percpu(percpu); + } +} + +int squashfs_decompress(struct squashfs_sb_info *msblk, struct buffer_head **bh, + int b, int offset, int length, struct squashfs_page_actor *output) +{ + struct squashfs_stream __percpu *percpu = + (struct squashfs_stream __percpu *) msblk->stream; + struct squashfs_stream *stream = get_cpu_ptr(percpu); + int res = msblk->decompressor->decompress(msblk, stream->stream, bh, b, + offset, length, output); + put_cpu_ptr(stream); + + if (res < 0) + ERROR("%s decompression failed, data probably corrupt\n", + msblk->decompressor->name); + + return res; +} + +int squashfs_max_decompressors(void) +{ + return num_possible_cpus(); +} diff --git a/fs/squashfs/decompressor_single.c b/fs/squashfs/decompressor_single.c new file mode 100644 index 0000000..a6c7592 --- /dev/null +++ b/fs/squashfs/decompressor_single.c @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2013 + * Phillip Lougher <phillip@squashfs.org.uk> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#include <linux/types.h> +#include <linux/mutex.h> +#include <linux/slab.h> +#include <linux/buffer_head.h> + +#include "squashfs_fs.h" +#include "squashfs_fs_sb.h" +#include "decompressor.h" +#include "squashfs.h" + +/* + * This file implements single-threaded decompression in the + * decompressor framework + */ + +struct squashfs_stream { + void *stream; + struct mutex mutex; +}; + +void *squashfs_decompressor_create(struct squashfs_sb_info *msblk, + void *comp_opts) +{ + struct squashfs_stream *stream; + int err = -ENOMEM; + + stream = kmalloc(sizeof(*stream), GFP_KERNEL); + if (stream == NULL) + goto out; + + stream->stream = msblk->decompressor->init(msblk, comp_opts); + if (IS_ERR(stream->stream)) { + err = PTR_ERR(stream->stream); + goto out; + } + + kfree(comp_opts); + mutex_init(&stream->mutex); + return stream; + +out: + kfree(stream); + return ERR_PTR(err); +} + +void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk) +{ + struct squashfs_stream *stream = msblk->stream; + + if (stream) { + msblk->decompressor->free(stream->stream); + kfree(stream); + } +} + +int squashfs_decompress(struct squashfs_sb_info *msblk, struct buffer_head **bh, + int b, int offset, int length, struct squashfs_page_actor *output) +{ + int res; + struct squashfs_stream *stream = msblk->stream; + + mutex_lock(&stream->mutex); + res = msblk->decompressor->decompress(msblk, stream->stream, bh, b, + offset, length, output); + mutex_unlock(&stream->mutex); + + if (res < 0) + ERROR("%s decompression failed, data probably corrupt\n", + msblk->decompressor->name); + + return res; +} + +int squashfs_max_decompressors(void) +{ + return 1; +} diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c index 8ca62c2..e5c9689 100644 --- a/fs/squashfs/file.c +++ b/fs/squashfs/file.c @@ -370,77 +370,15 @@ static int read_blocklist(struct inode *inode, int index, u64 *block) return le32_to_cpu(size); } - -static int squashfs_readpage(struct file *file, struct page *page) +/* Copy data into page cache */ +void squashfs_copy_cache(struct page *page, struct squashfs_cache_entry *buffer, + int bytes, int offset) { struct inode *inode = page->mapping->host; struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; - int bytes, i, offset = 0, sparse = 0; - struct squashfs_cache_entry *buffer = NULL; void *pageaddr; - - int mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1; - int index = page->index >> (msblk->block_log - PAGE_CACHE_SHIFT); - int start_index = page->index & ~mask; - int end_index = start_index | mask; - int file_end = i_size_read(inode) >> msblk->block_log; - - TRACE("Entered squashfs_readpage, page index %lx, start block %llx\n", - page->index, squashfs_i(inode)->start); - - if (page->index >= ((i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> - PAGE_CACHE_SHIFT)) - goto out; - - if (index < file_end || squashfs_i(inode)->fragment_block == - SQUASHFS_INVALID_BLK) { - /* - * Reading a datablock from disk. Need to read block list - * to get location and block size. - */ - u64 block = 0; - int bsize = read_blocklist(inode, index, &block); - if (bsize < 0) - goto error_out; - - if (bsize == 0) { /* hole */ - bytes = index == file_end ? - (i_size_read(inode) & (msblk->block_size - 1)) : - msblk->block_size; - sparse = 1; - } else { - /* - * Read and decompress datablock. - */ - buffer = squashfs_get_datablock(inode->i_sb, - block, bsize); - if (buffer->error) { - ERROR("Unable to read page, block %llx, size %x" - "\n", block, bsize); - squashfs_cache_put(buffer); - goto error_out; - } - bytes = buffer->length; - } - } else { - /* - * Datablock is stored inside a fragment (tail-end packed - * block). - */ - buffer = squashfs_get_fragment(inode->i_sb, - squashfs_i(inode)->fragment_block, - squashfs_i(inode)->fragment_size); - - if (buffer->error) { - ERROR("Unable to read page, block %llx, size %x\n", - squashfs_i(inode)->fragment_block, - squashfs_i(inode)->fragment_size); - squashfs_cache_put(buffer); - goto error_out; - } - bytes = i_size_read(inode) & (msblk->block_size - 1); - offset = squashfs_i(inode)->fragment_offset; - } + int i, mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1; + int start_index = page->index & ~mask, end_index = start_index | mask; /* * Loop copying datablock into pages. As the datablock likely covers @@ -451,7 +389,7 @@ static int squashfs_readpage(struct file *file, struct page *page) for (i = start_index; i <= end_index && bytes > 0; i++, bytes -= PAGE_CACHE_SIZE, offset += PAGE_CACHE_SIZE) { struct page *push_page; - int avail = sparse ? 0 : min_t(int, bytes, PAGE_CACHE_SIZE); + int avail = buffer ? min_t(int, bytes, PAGE_CACHE_SIZE) : 0; TRACE("bytes %d, i %d, available_bytes %d\n", bytes, i, avail); @@ -475,11 +413,75 @@ skip_page: if (i != page->index) page_cache_release(push_page); } +} + +/* Read datablock stored packed inside a fragment (tail-end packed block) */ +static int squashfs_readpage_fragment(struct page *page) +{ + struct inode *inode = page->mapping->host; + struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; + struct squashfs_cache_entry *buffer = squashfs_get_fragment(inode->i_sb, + squashfs_i(inode)->fragment_block, + squashfs_i(inode)->fragment_size); + int res = buffer->error; + + if (res) + ERROR("Unable to read page, block %llx, size %x\n", + squashfs_i(inode)->fragment_block, + squashfs_i(inode)->fragment_size); + else + squashfs_copy_cache(page, buffer, i_size_read(inode) & + (msblk->block_size - 1), + squashfs_i(inode)->fragment_offset); + + squashfs_cache_put(buffer); + return res; +} - if (!sparse) - squashfs_cache_put(buffer); +static int squashfs_readpage_sparse(struct page *page, int index, int file_end) +{ + struct inode *inode = page->mapping->host; + struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; + int bytes = index == file_end ? + (i_size_read(inode) & (msblk->block_size - 1)) : + msblk->block_size; + squashfs_copy_cache(page, NULL, bytes, 0); return 0; +} + +static int squashfs_readpage(struct file *file, struct page *page) +{ + struct inode *inode = page->mapping->host; + struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; + int index = page->index >> (msblk->block_log - PAGE_CACHE_SHIFT); + int file_end = i_size_read(inode) >> msblk->block_log; + int res; + void *pageaddr; + + TRACE("Entered squashfs_readpage, page index %lx, start block %llx\n", + page->index, squashfs_i(inode)->start); + + if (page->index >= ((i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> + PAGE_CACHE_SHIFT)) + goto out; + + if (index < file_end || squashfs_i(inode)->fragment_block == + SQUASHFS_INVALID_BLK) { + u64 block = 0; + int bsize = read_blocklist(inode, index, &block); + if (bsize < 0) + goto error_out; + + if (bsize == 0) + res = squashfs_readpage_sparse(page, index, file_end); + else + res = squashfs_readpage_block(page, block, bsize); + } else + res = squashfs_readpage_fragment(page); + + if (!res) + return 0; error_out: SetPageError(page); diff --git a/fs/squashfs/file_cache.c b/fs/squashfs/file_cache.c new file mode 100644 index 0000000..f2310d2 --- /dev/null +++ b/fs/squashfs/file_cache.c @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2013 + * Phillip Lougher <phillip@squashfs.org.uk> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#include <linux/fs.h> +#include <linux/vfs.h> +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/pagemap.h> +#include <linux/mutex.h> + +#include "squashfs_fs.h" +#include "squashfs_fs_sb.h" +#include "squashfs_fs_i.h" +#include "squashfs.h" + +/* Read separately compressed datablock and memcopy into page cache */ +int squashfs_readpage_block(struct page *page, u64 block, int bsize) +{ + struct inode *i = page->mapping->host; + struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb, + block, bsize); + int res = buffer->error; + + if (res) + ERROR("Unable to read page, block %llx, size %x\n", block, + bsize); + else + squashfs_copy_cache(page, buffer, buffer->length, 0); + + squashfs_cache_put(buffer); + return res; +} diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c new file mode 100644 index 0000000..2943b2b --- /dev/null +++ b/fs/squashfs/file_direct.c @@ -0,0 +1,173 @@ +/* + * Copyright (c) 2013 + * Phillip Lougher <phillip@squashfs.org.uk> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#include <linux/fs.h> +#include <linux/vfs.h> +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/pagemap.h> +#include <linux/mutex.h> + +#include "squashfs_fs.h" +#include "squashfs_fs_sb.h" +#include "squashfs_fs_i.h" +#include "squashfs.h" +#include "page_actor.h" + +static int squashfs_read_cache(struct page *target_page, u64 block, int bsize, + int pages, struct page **page); + +/* Read separately compressed datablock directly into page cache */ +int squashfs_readpage_block(struct page *target_page, u64 block, int bsize) + +{ + struct inode *inode = target_page->mapping->host; + struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; + + int file_end = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; + int mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1; + int start_index = target_page->index & ~mask; + int end_index = start_index | mask; + int i, n, pages, missing_pages, bytes, res = -ENOMEM; + struct page **page; + struct squashfs_page_actor *actor; + void *pageaddr; + + if (end_index > file_end) + end_index = file_end; + + pages = end_index - start_index + 1; + + page = kmalloc(sizeof(void *) * pages, GFP_KERNEL); + if (page == NULL) + return res; + + /* + * Create a "page actor" which will kmap and kunmap the + * page cache pages appropriately within the decompressor + */ + actor = squashfs_page_actor_init_special(page, pages, 0); + if (actor == NULL) + goto out; + + /* Try to grab all the pages covered by the Squashfs block */ + for (missing_pages = 0, i = 0, n = start_index; i < pages; i++, n++) { + page[i] = (n == target_page->index) ? target_page : + grab_cache_page_nowait(target_page->mapping, n); + + if (page[i] == NULL) { + missing_pages++; + continue; + } + + if (PageUptodate(page[i])) { + unlock_page(page[i]); + page_cache_release(page[i]); + page[i] = NULL; + missing_pages++; + } + } + + if (missing_pages) { + /* + * Couldn't get one or more pages, this page has either + * been VM reclaimed, but others are still in the page cache + * and uptodate, or we're racing with another thread in + * squashfs_readpage also trying to grab them. Fall back to + * using an intermediate buffer. + */ + res = squashfs_read_cache(target_page, block, bsize, pages, + page); + goto out; + } + + /* Decompress directly into the page cache buffers */ + res = squashfs_read_data(inode->i_sb, block, bsize, NULL, actor); + if (res < 0) + goto mark_errored; + + /* Last page may have trailing bytes not filled */ + bytes = res % PAGE_CACHE_SIZE; + if (bytes) { + pageaddr = kmap_atomic(page[pages - 1]); + memset(pageaddr + bytes, 0, PAGE_CACHE_SIZE - bytes); + kunmap_atomic(pageaddr); + } + + /* Mark pages as uptodate, unlock and release */ + for (i = 0; i < pages; i++) { + flush_dcache_page(page[i]); + SetPageUptodate(page[i]); + unlock_page(page[i]); + if (page[i] != target_page) + page_cache_release(page[i]); + } + + kfree(actor); + kfree(page); + + return 0; + +mark_errored: + /* Decompression failed, mark pages as errored. Target_page is + * dealt with by the caller + */ + for (i = 0; i < pages; i++) { + if (page[i] == target_page) + continue; + flush_dcache_page(page[i]); + SetPageError(page[i]); + unlock_page(page[i]); + page_cache_release(page[i]); + } + +out: + kfree(actor); + kfree(page); + return res; +} + + +static int squashfs_read_cache(struct page *target_page, u64 block, int bsize, + int pages, struct page **page) +{ + struct inode *i = target_page->mapping->host; + struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb, + block, bsize); + int bytes = buffer->length, res = buffer->error, n, offset = 0; + void *pageaddr; + + if (res) { + ERROR("Unable to read page, block %llx, size %x\n", block, + bsize); + goto out; + } + + for (n = 0; n < pages && bytes > 0; n++, + bytes -= PAGE_CACHE_SIZE, offset += PAGE_CACHE_SIZE) { + int avail = min_t(int, bytes, PAGE_CACHE_SIZE); + + if (page[n] == NULL) + continue; + + pageaddr = kmap_atomic(page[n]); + squashfs_copy_data(pageaddr, buffer, offset, avail); + memset(pageaddr + avail, 0, PAGE_CACHE_SIZE - avail); + kunmap_atomic(pageaddr); + flush_dcache_page(page[n]); + SetPageUptodate(page[n]); + unlock_page(page[n]); + if (page[n] != target_page) + page_cache_release(page[n]); + } + +out: + squashfs_cache_put(buffer); + return res; +} diff --git a/fs/squashfs/lzo_wrapper.c b/fs/squashfs/lzo_wrapper.c index 00f4dfc..244b9fb 100644 --- a/fs/squashfs/lzo_wrapper.c +++ b/fs/squashfs/lzo_wrapper.c @@ -31,13 +31,14 @@ #include "squashfs_fs_sb.h" #include "squashfs.h" #include "decompressor.h" +#include "page_actor.h" struct squashfs_lzo { void *input; void *output; }; -static void *lzo_init(struct squashfs_sb_info *msblk, void *buff, int len) +static void *lzo_init(struct squashfs_sb_info *msblk, void *buff) { int block_size = max_t(int, msblk->block_size, SQUASHFS_METADATA_SIZE); @@ -74,22 +75,16 @@ static void lzo_free(void *strm) } -static int lzo_uncompress(struct squashfs_sb_info *msblk, void **buffer, - struct buffer_head **bh, int b, int offset, int length, int srclength, - int pages) +static int lzo_uncompress(struct squashfs_sb_info *msblk, void *strm, + struct buffer_head **bh, int b, int offset, int length, + struct squashfs_page_actor *output) { - struct squashfs_lzo *stream = msblk->stream; - void *buff = stream->input; + struct squashfs_lzo *stream = strm; + void *buff = stream->input, *data; int avail, i, bytes = length, res; - size_t out_len = srclength; - - mutex_lock(&msblk->read_data_mutex); + size_t out_len = output->length; for (i = 0; i < b; i++) { - wait_on_buffer(bh[i]); - if (!buffer_uptodate(bh[i])) - goto block_release; - avail = min(bytes, msblk->devblksize - offset); memcpy(buff, bh[i]->b_data + offset, avail); buff += avail; @@ -104,24 +99,24 @@ static int lzo_uncompress(struct squashfs_sb_info *msblk, void **buffer, goto failed; res = bytes = (int)out_len; - for (i = 0, buff = stream->output; bytes && i < pages; i++) { - avail = min_t(int, bytes, PAGE_CACHE_SIZE); - memcpy(buffer[i], buff, avail); - buff += avail; - bytes -= avail; + data = squashfs_first_page(output); + buff = stream->output; + while (data) { + if (bytes <= PAGE_CACHE_SIZE) { + memcpy(data, buff, bytes); + break; + } else { + memcpy(data, buff, PAGE_CACHE_SIZE); + buff += PAGE_CACHE_SIZE; + bytes -= PAGE_CACHE_SIZE; + data = squashfs_next_page(output); + } } + squashfs_finish_page(output); - mutex_unlock(&msblk->read_data_mutex); return res; -block_release: - for (; i < b; i++) - put_bh(bh[i]); - failed: - mutex_unlock(&msblk->read_data_mutex); - - ERROR("lzo decompression failed, data probably corrupt\n"); return -EIO; } diff --git a/fs/squashfs/page_actor.c b/fs/squashfs/page_actor.c new file mode 100644 index 0000000..5a1c11f --- /dev/null +++ b/fs/squashfs/page_actor.c @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2013 + * Phillip Lougher <phillip@squashfs.org.uk> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/pagemap.h> +#include "page_actor.h" + +/* + * This file contains implementations of page_actor for decompressing into + * an intermediate buffer, and for decompressing directly into the + * page cache. + * + * Calling code should avoid sleeping between calls to squashfs_first_page() + * and squashfs_finish_page(). + */ + +/* Implementation of page_actor for decompressing into intermediate buffer */ +static void *cache_first_page(struct squashfs_page_actor *actor) +{ + actor->next_page = 1; + return actor->buffer[0]; +} + +static void *cache_next_page(struct squashfs_page_actor *actor) +{ + if (actor->next_page == actor->pages) + return NULL; + + return actor->buffer[actor->next_page++]; +} + +static void cache_finish_page(struct squashfs_page_actor *actor) +{ + /* empty */ +} + +struct squashfs_page_actor *squashfs_page_actor_init(void **buffer, + int pages, int length) +{ + struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL); + + if (actor == NULL) + return NULL; + + actor->length = length ? : pages * PAGE_CACHE_SIZE; + actor->buffer = buffer; + actor->pages = pages; + actor->next_page = 0; + actor->squashfs_first_page = cache_first_page; + actor->squashfs_next_page = cache_next_page; + actor->squashfs_finish_page = cache_finish_page; + return actor; +} + +/* Implementation of page_actor for decompressing directly into page cache. */ +static void *direct_first_page(struct squashfs_page_actor *actor) +{ + actor->next_page = 1; + return actor->pageaddr = kmap_atomic(actor->page[0]); +} + +static void *direct_next_page(struct squashfs_page_actor *actor) +{ + if (actor->pageaddr) + kunmap_atomic(actor->pageaddr); + + return actor->pageaddr = actor->next_page == actor->pages ? NULL : + kmap_atomic(actor->page[actor->next_page++]); +} + +static void direct_finish_page(struct squashfs_page_actor *actor) +{ + if (actor->pageaddr) + kunmap_atomic(actor->pageaddr); +} + +struct squashfs_page_actor *squashfs_page_actor_init_special(struct page **page, + int pages, int length) +{ + struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL); + + if (actor == NULL) + return NULL; + + actor->length = length ? : pages * PAGE_CACHE_SIZE; + actor->page = page; + actor->pages = pages; + actor->next_page = 0; + actor->pageaddr = NULL; + actor->squashfs_first_page = direct_first_page; + actor->squashfs_next_page = direct_next_page; + actor->squashfs_finish_page = direct_finish_page; + return actor; +} diff --git a/fs/squashfs/page_actor.h b/fs/squashfs/page_actor.h new file mode 100644 index 0000000..26dd820 --- /dev/null +++ b/fs/squashfs/page_actor.h @@ -0,0 +1,81 @@ +#ifndef PAGE_ACTOR_H +#define PAGE_ACTOR_H +/* + * Copyright (c) 2013 + * Phillip Lougher <phillip@squashfs.org.uk> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#ifndef CONFIG_SQUASHFS_FILE_DIRECT +struct squashfs_page_actor { + void **page; + int pages; + int length; + int next_page; +}; + +static inline struct squashfs_page_actor *squashfs_page_actor_init(void **page, + int pages, int length) +{ + struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL); + + if (actor == NULL) + return NULL; + + actor->length = length ? : pages * PAGE_CACHE_SIZE; + actor->page = page; + actor->pages = pages; + actor->next_page = 0; + return actor; +} + +static inline void *squashfs_first_page(struct squashfs_page_actor *actor) +{ + actor->next_page = 1; + return actor->page[0]; +} + +static inline void *squashfs_next_page(struct squashfs_page_actor *actor) +{ + return actor->next_page == actor->pages ? NULL : + actor->page[actor->next_page++]; +} + +static inline void squashfs_finish_page(struct squashfs_page_actor *actor) +{ + /* empty */ +} +#else +struct squashfs_page_actor { + union { + void **buffer; + struct page **page; + }; + void *pageaddr; + void *(*squashfs_first_page)(struct squashfs_page_actor *); + void *(*squashfs_next_page)(struct squashfs_page_actor *); + void (*squashfs_finish_page)(struct squashfs_page_actor *); + int pages; + int length; + int next_page; +}; + +extern struct squashfs_page_actor *squashfs_page_actor_init(void **, int, int); +extern struct squashfs_page_actor *squashfs_page_actor_init_special(struct page + **, int, int); +static inline void *squashfs_first_page(struct squashfs_page_actor *actor) +{ + return actor->squashfs_first_page(actor); +} +static inline void *squashfs_next_page(struct squashfs_page_actor *actor) +{ + return actor->squashfs_next_page(actor); +} +static inline void squashfs_finish_page(struct squashfs_page_actor *actor) +{ + actor->squashfs_finish_page(actor); +} +#endif +#endif diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h index d126651..9e1bb79 100644 --- a/fs/squashfs/squashfs.h +++ b/fs/squashfs/squashfs.h @@ -28,8 +28,8 @@ #define WARNING(s, args...) pr_warning("SQUASHFS: "s, ## args) /* block.c */ -extern int squashfs_read_data(struct super_block *, void **, u64, int, u64 *, - int, int); +extern int squashfs_read_data(struct super_block *, u64, int, u64 *, + struct squashfs_page_actor *); /* cache.c */ extern struct squashfs_cache *squashfs_cache_init(char *, int, int); @@ -48,7 +48,14 @@ extern void *squashfs_read_table(struct super_block *, u64, int); /* decompressor.c */ extern const struct squashfs_decompressor *squashfs_lookup_decompressor(int); -extern void *squashfs_decompressor_init(struct super_block *, unsigned short); +extern void *squashfs_decompressor_setup(struct super_block *, unsigned short); + +/* decompressor_xxx.c */ +extern void *squashfs_decompressor_create(struct squashfs_sb_info *, void *); +extern void squashfs_decompressor_destroy(struct squashfs_sb_info *); +extern int squashfs_decompress(struct squashfs_sb_info *, struct buffer_head **, + int, int, int, struct squashfs_page_actor *); +extern int squashfs_max_decompressors(void); /* export.c */ extern __le64 *squashfs_read_inode_lookup_table(struct super_block *, u64, u64, @@ -59,6 +66,13 @@ extern int squashfs_frag_lookup(struct super_block *, unsigned int, u64 *); extern __le64 *squashfs_read_fragment_index_table(struct super_block *, u64, u64, unsigned int); +/* file.c */ +void squashfs_copy_cache(struct page *, struct squashfs_cache_entry *, int, + int); + +/* file_xxx.c */ +extern int squashfs_readpage_block(struct page *, u64, int); + /* id.c */ extern int squashfs_get_id(struct super_block *, unsigned int, unsigned int *); extern __le64 *squashfs_read_id_index_table(struct super_block *, u64, u64, diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h index 52934a2..1da565c 100644 --- a/fs/squashfs/squashfs_fs_sb.h +++ b/fs/squashfs/squashfs_fs_sb.h @@ -50,6 +50,7 @@ struct squashfs_cache_entry { wait_queue_head_t wait_queue; struct squashfs_cache *cache; void **data; + struct squashfs_page_actor *actor; }; struct squashfs_sb_info { @@ -63,10 +64,9 @@ struct squashfs_sb_info { __le64 *id_table; __le64 *fragment_index; __le64 *xattr_id_table; - struct mutex read_data_mutex; struct mutex meta_index_mutex; struct meta_index *meta_index; - void *stream; + struct squashfs_stream *stream; __le64 *inode_lookup_table; u64 inode_table; u64 directory_table; diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index 60553a9..202df63 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c @@ -98,7 +98,6 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent) msblk->devblksize = sb_min_blocksize(sb, SQUASHFS_DEVBLK_SIZE); msblk->devblksize_log2 = ffz(~msblk->devblksize); - mutex_init(&msblk->read_data_mutex); mutex_init(&msblk->meta_index_mutex); /* @@ -206,13 +205,14 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount; /* Allocate read_page block */ - msblk->read_page = squashfs_cache_init("data", 1, msblk->block_size); + msblk->read_page = squashfs_cache_init("data", + squashfs_max_decompressors(), msblk->block_size); if (msblk->read_page == NULL) { ERROR("Failed to allocate read_page block\n"); goto failed_mount; } - msblk->stream = squashfs_decompressor_init(sb, flags); + msblk->stream = squashfs_decompressor_setup(sb, flags); if (IS_ERR(msblk->stream)) { err = PTR_ERR(msblk->stream); msblk->stream = NULL; @@ -336,7 +336,7 @@ failed_mount: squashfs_cache_delete(msblk->block_cache); squashfs_cache_delete(msblk->fragment_cache); squashfs_cache_delete(msblk->read_page); - squashfs_decompressor_free(msblk, msblk->stream); + squashfs_decompressor_destroy(msblk); kfree(msblk->inode_lookup_table); kfree(msblk->fragment_index); kfree(msblk->id_table); @@ -383,7 +383,7 @@ static void squashfs_put_super(struct super_block *sb) squashfs_cache_delete(sbi->block_cache); squashfs_cache_delete(sbi->fragment_cache); squashfs_cache_delete(sbi->read_page); - squashfs_decompressor_free(sbi, sbi->stream); + squashfs_decompressor_destroy(sbi); kfree(sbi->id_table); kfree(sbi->fragment_index); kfree(sbi->meta_index); diff --git a/fs/squashfs/xz_wrapper.c b/fs/squashfs/xz_wrapper.c index 1760b7d1..c609624 100644 --- a/fs/squashfs/xz_wrapper.c +++ b/fs/squashfs/xz_wrapper.c @@ -32,44 +32,70 @@ #include "squashfs_fs_sb.h" #include "squashfs.h" #include "decompressor.h" +#include "page_actor.h" struct squashfs_xz { struct xz_dec *state; struct xz_buf buf; }; -struct comp_opts { +struct disk_comp_opts { __le32 dictionary_size; __le32 flags; }; -static void *squashfs_xz_init(struct squashfs_sb_info *msblk, void *buff, - int len) +struct comp_opts { + int dict_size; +}; + +static void *squashfs_xz_comp_opts(struct squashfs_sb_info *msblk, + void *buff, int len) { - struct comp_opts *comp_opts = buff; - struct squashfs_xz *stream; - int dict_size = msblk->block_size; - int err, n; + struct disk_comp_opts *comp_opts = buff; + struct comp_opts *opts; + int err = 0, n; + + opts = kmalloc(sizeof(*opts), GFP_KERNEL); + if (opts == NULL) { + err = -ENOMEM; + goto out2; + } if (comp_opts) { /* check compressor options are the expected length */ if (len < sizeof(*comp_opts)) { err = -EIO; - goto failed; + goto out; } - dict_size = le32_to_cpu(comp_opts->dictionary_size); + opts->dict_size = le32_to_cpu(comp_opts->dictionary_size); /* the dictionary size should be 2^n or 2^n+2^(n+1) */ - n = ffs(dict_size) - 1; - if (dict_size != (1 << n) && dict_size != (1 << n) + + n = ffs(opts->dict_size) - 1; + if (opts->dict_size != (1 << n) && opts->dict_size != (1 << n) + (1 << (n + 1))) { err = -EIO; - goto failed; + goto out; } - } + } else + /* use defaults */ + opts->dict_size = max_t(int, msblk->block_size, + SQUASHFS_METADATA_SIZE); + + return opts; + +out: + kfree(opts); +out2: + return ERR_PTR(err); +} + - dict_size = max_t(int, dict_size, SQUASHFS_METADATA_SIZE); +static void *squashfs_xz_init(struct squashfs_sb_info *msblk, void *buff) +{ + struct comp_opts *comp_opts = buff; + struct squashfs_xz *stream; + int err; stream = kmalloc(sizeof(*stream), GFP_KERNEL); if (stream == NULL) { @@ -77,7 +103,7 @@ static void *squashfs_xz_init(struct squashfs_sb_info *msblk, void *buff, goto failed; } - stream->state = xz_dec_init(XZ_PREALLOC, dict_size); + stream->state = xz_dec_init(XZ_PREALLOC, comp_opts->dict_size); if (stream->state == NULL) { kfree(stream); err = -ENOMEM; @@ -103,42 +129,37 @@ static void squashfs_xz_free(void *strm) } -static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void **buffer, - struct buffer_head **bh, int b, int offset, int length, int srclength, - int pages) +static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm, + struct buffer_head **bh, int b, int offset, int length, + struct squashfs_page_actor *output) { enum xz_ret xz_err; - int avail, total = 0, k = 0, page = 0; - struct squashfs_xz *stream = msblk->stream; - - mutex_lock(&msblk->read_data_mutex); + int avail, total = 0, k = 0; + struct squashfs_xz *stream = strm; xz_dec_reset(stream->state); stream->buf.in_pos = 0; stream->buf.in_size = 0; stream->buf.out_pos = 0; stream->buf.out_size = PAGE_CACHE_SIZE; - stream->buf.out = buffer[page++]; + stream->buf.out = squashfs_first_page(output); do { if (stream->buf.in_pos == stream->buf.in_size && k < b) { avail = min(length, msblk->devblksize - offset); length -= avail; - wait_on_buffer(bh[k]); - if (!buffer_uptodate(bh[k])) - goto release_mutex; - stream->buf.in = bh[k]->b_data + offset; stream->buf.in_size = avail; stream->buf.in_pos = 0; offset = 0; } - if (stream->buf.out_pos == stream->buf.out_size - && page < pages) { - stream->buf.out = buffer[page++]; - stream->buf.out_pos = 0; - total += PAGE_CACHE_SIZE; + if (stream->buf.out_pos == stream->buf.out_size) { + stream->buf.out = squashfs_next_page(output); + if (stream->buf.out != NULL) { + stream->buf.out_pos = 0; + total += PAGE_CACHE_SIZE; + } } xz_err = xz_dec_run(stream->state, &stream->buf); @@ -147,23 +168,14 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void **buffer, put_bh(bh[k++]); } while (xz_err == XZ_OK); - if (xz_err != XZ_STREAM_END) { - ERROR("xz_dec_run error, data probably corrupt\n"); - goto release_mutex; - } - - if (k < b) { - ERROR("xz_uncompress error, input remaining\n"); - goto release_mutex; - } + squashfs_finish_page(output); - total += stream->buf.out_pos; - mutex_unlock(&msblk->read_data_mutex); - return total; + if (xz_err != XZ_STREAM_END || k < b) + goto out; -release_mutex: - mutex_unlock(&msblk->read_data_mutex); + return total + stream->buf.out_pos; +out: for (; k < b; k++) put_bh(bh[k]); @@ -172,6 +184,7 @@ release_mutex: const struct squashfs_decompressor squashfs_xz_comp_ops = { .init = squashfs_xz_init, + .comp_opts = squashfs_xz_comp_opts, .free = squashfs_xz_free, .decompress = squashfs_xz_uncompress, .id = XZ_COMPRESSION, diff --git a/fs/squashfs/zlib_wrapper.c b/fs/squashfs/zlib_wrapper.c index 55d918f..8727cab 100644 --- a/fs/squashfs/zlib_wrapper.c +++ b/fs/squashfs/zlib_wrapper.c @@ -32,8 +32,9 @@ #include "squashfs_fs_sb.h" #include "squashfs.h" #include "decompressor.h" +#include "page_actor.h" -static void *zlib_init(struct squashfs_sb_info *dummy, void *buff, int len) +static void *zlib_init(struct squashfs_sb_info *dummy, void *buff) { z_stream *stream = kmalloc(sizeof(z_stream), GFP_KERNEL); if (stream == NULL) @@ -61,44 +62,37 @@ static void zlib_free(void *strm) } -static int zlib_uncompress(struct squashfs_sb_info *msblk, void **buffer, - struct buffer_head **bh, int b, int offset, int length, int srclength, - int pages) +static int zlib_uncompress(struct squashfs_sb_info *msblk, void *strm, + struct buffer_head **bh, int b, int offset, int length, + struct squashfs_page_actor *output) { - int zlib_err, zlib_init = 0; - int k = 0, page = 0; - z_stream *stream = msblk->stream; - - mutex_lock(&msblk->read_data_mutex); + int zlib_err, zlib_init = 0, k = 0; + z_stream *stream = strm; - stream->avail_out = 0; + stream->avail_out = PAGE_CACHE_SIZE; + stream->next_out = squashfs_first_page(output); stream->avail_in = 0; do { if (stream->avail_in == 0 && k < b) { int avail = min(length, msblk->devblksize - offset); length -= avail; - wait_on_buffer(bh[k]); - if (!buffer_uptodate(bh[k])) - goto release_mutex; - stream->next_in = bh[k]->b_data + offset; stream->avail_in = avail; offset = 0; } - if (stream->avail_out == 0 && page < pages) { - stream->next_out = buffer[page++]; - stream->avail_out = PAGE_CACHE_SIZE; + if (stream->avail_out == 0) { + stream->next_out = squashfs_next_page(output); + if (stream->next_out != NULL) + stream->avail_out = PAGE_CACHE_SIZE; } if (!zlib_init) { zlib_err = zlib_inflateInit(stream); if (zlib_err != Z_OK) { - ERROR("zlib_inflateInit returned unexpected " - "result 0x%x, srclength %d\n", - zlib_err, srclength); - goto release_mutex; + squashfs_finish_page(output); + goto out; } zlib_init = 1; } @@ -109,29 +103,21 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void **buffer, put_bh(bh[k++]); } while (zlib_err == Z_OK); - if (zlib_err != Z_STREAM_END) { - ERROR("zlib_inflate error, data probably corrupt\n"); - goto release_mutex; - } + squashfs_finish_page(output); - zlib_err = zlib_inflateEnd(stream); - if (zlib_err != Z_OK) { - ERROR("zlib_inflate error, data probably corrupt\n"); - goto release_mutex; - } + if (zlib_err != Z_STREAM_END) + goto out; - if (k < b) { - ERROR("zlib_uncompress error, data remaining\n"); - goto release_mutex; - } + zlib_err = zlib_inflateEnd(stream); + if (zlib_err != Z_OK) + goto out; - length = stream->total_out; - mutex_unlock(&msblk->read_data_mutex); - return length; + if (k < b) + goto out; -release_mutex: - mutex_unlock(&msblk->read_data_mutex); + return stream->total_out; +out: for (; k < b; k++) put_bh(bh[k]); diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 1c02da8..3ef11b2 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -1137,6 +1137,7 @@ xfs_bmap_add_attrfork( int committed; /* xaction was committed */ int logflags; /* logging flags */ int error; /* error return value */ + int cancel_flags = 0; ASSERT(XFS_IFORK_Q(ip) == 0); @@ -1147,19 +1148,20 @@ xfs_bmap_add_attrfork( if (rsvd) tp->t_flags |= XFS_TRANS_RESERVE; error = xfs_trans_reserve(tp, &M_RES(mp)->tr_addafork, blks, 0); - if (error) - goto error0; + if (error) { + xfs_trans_cancel(tp, 0); + return error; + } + cancel_flags = XFS_TRANS_RELEASE_LOG_RES; xfs_ilock(ip, XFS_ILOCK_EXCL); error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ? XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES : XFS_QMOPT_RES_REGBLKS); - if (error) { - xfs_iunlock(ip, XFS_ILOCK_EXCL); - xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES); - return error; - } + if (error) + goto trans_cancel; + cancel_flags |= XFS_TRANS_ABORT; if (XFS_IFORK_Q(ip)) - goto error1; + goto trans_cancel; if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS) { /* * For inodes coming from pre-6.2 filesystems. @@ -1169,7 +1171,7 @@ xfs_bmap_add_attrfork( } ASSERT(ip->i_d.di_anextents == 0); - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, ip, 0); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); switch (ip->i_d.di_format) { @@ -1191,7 +1193,7 @@ xfs_bmap_add_attrfork( default: ASSERT(0); error = XFS_ERROR(EINVAL); - goto error1; + goto trans_cancel; } ASSERT(ip->i_afp == NULL); @@ -1219,7 +1221,7 @@ xfs_bmap_add_attrfork( if (logflags) xfs_trans_log_inode(tp, ip, logflags); if (error) - goto error2; + goto bmap_cancel; if (!xfs_sb_version_hasattr(&mp->m_sb) || (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2)) { __int64_t sbfields = 0; @@ -1242,14 +1244,16 @@ xfs_bmap_add_attrfork( error = xfs_bmap_finish(&tp, &flist, &committed); if (error) - goto error2; - return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); -error2: + goto bmap_cancel; + error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + return error; + +bmap_cancel: xfs_bmap_cancel(&flist); -error1: +trans_cancel: + xfs_trans_cancel(tp, cancel_flags); xfs_iunlock(ip, XFS_ILOCK_EXCL); -error0: - xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); return error; } diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index da88f16..02df7b4 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -41,6 +41,7 @@ #include "xfs_fsops.h" #include "xfs_trace.h" #include "xfs_icache.h" +#include "xfs_dinode.h" #ifdef HAVE_PERCPU_SB @@ -718,8 +719,22 @@ xfs_mountfs( * Set the inode cluster size. * This may still be overridden by the file system * block size if it is larger than the chosen cluster size. + * + * For v5 filesystems, scale the cluster size with the inode size to + * keep a constant ratio of inode per cluster buffer, but only if mkfs + * has set the inode alignment value appropriately for larger cluster + * sizes. */ mp->m_inode_cluster_size = XFS_INODE_BIG_CLUSTER_SIZE; + if (xfs_sb_version_hascrc(&mp->m_sb)) { + int new_size = mp->m_inode_cluster_size; + + new_size *= mp->m_sb.sb_inodesize / XFS_DINODE_MIN_SIZE; + if (mp->m_sb.sb_inoalignmt >= XFS_B_TO_FSBT(mp, new_size)) + mp->m_inode_cluster_size = new_size; + xfs_info(mp, "Using inode cluster size of %d bytes", + mp->m_inode_cluster_size); + } /* * Set inode alignment fields diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 1d8101a..a466c5e 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -112,7 +112,7 @@ typedef struct xfs_mount { __uint8_t m_blkbb_log; /* blocklog - BBSHIFT */ __uint8_t m_agno_log; /* log #ag's */ __uint8_t m_agino_log; /* #bits for agino in inum */ - __uint16_t m_inode_cluster_size;/* min inode buf size */ + uint m_inode_cluster_size;/* min inode buf size */ uint m_blockmask; /* sb_blocksize-1 */ uint m_blockwsize; /* sb_blocksize in words */ uint m_blockwmask; /* blockwsize-1 */ diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c index 1bba7f6..50c3f56 100644 --- a/fs/xfs/xfs_trans_inode.c +++ b/fs/xfs/xfs_trans_inode.c @@ -111,12 +111,14 @@ xfs_trans_log_inode( /* * First time we log the inode in a transaction, bump the inode change - * counter if it is configured for this to occur. + * counter if it is configured for this to occur. We don't use + * inode_inc_version() because there is no need for extra locking around + * i_version as we already hold the inode locked exclusively for + * metadata modification. */ if (!(ip->i_itemp->ili_item.li_desc->lid_flags & XFS_LID_DIRTY) && IS_I_VERSION(VFS_I(ip))) { - inode_inc_iversion(VFS_I(ip)); - ip->i_d.di_changecount = VFS_I(ip)->i_version; + ip->i_d.di_changecount = ++VFS_I(ip)->i_version; flags |= XFS_ILOG_CORE; } diff --git a/fs/xfs/xfs_trans_resv.c b/fs/xfs/xfs_trans_resv.c index d53d9f0..2fd59c0 100644 --- a/fs/xfs/xfs_trans_resv.c +++ b/fs/xfs/xfs_trans_resv.c @@ -385,8 +385,7 @@ xfs_calc_ifree_reservation( xfs_calc_inode_res(mp, 1) + xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, 1)) + - MAX((__uint16_t)XFS_FSB_TO_B(mp, 1), - XFS_INODE_CLUSTER_SIZE(mp)) + + max_t(uint, XFS_FSB_TO_B(mp, 1), XFS_INODE_CLUSTER_SIZE(mp)) + xfs_calc_buf_res(1, 0) + xfs_calc_buf_res(2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels, 0) + diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 7b2de02..c602c77 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -100,6 +100,7 @@ enum acpi_hotplug_mode { struct acpi_hotplug_profile { struct kobject kobj; bool enabled:1; + bool ignore:1; enum acpi_hotplug_mode mode; }; diff --git a/include/crypto/hash_info.h b/include/crypto/hash_info.h new file mode 100644 index 0000000..e1e5a3e --- /dev/null +++ b/include/crypto/hash_info.h @@ -0,0 +1,40 @@ +/* + * Hash Info: Hash algorithms information + * + * Copyright (c) 2013 Dmitry Kasatkin <d.kasatkin@samsung.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ + +#ifndef _CRYPTO_HASH_INFO_H +#define _CRYPTO_HASH_INFO_H + +#include <crypto/sha.h> +#include <crypto/md5.h> + +#include <uapi/linux/hash_info.h> + +/* not defined in include/crypto/ */ +#define RMD128_DIGEST_SIZE 16 +#define RMD160_DIGEST_SIZE 20 +#define RMD256_DIGEST_SIZE 32 +#define RMD320_DIGEST_SIZE 40 + +/* not defined in include/crypto/ */ +#define WP512_DIGEST_SIZE 64 +#define WP384_DIGEST_SIZE 48 +#define WP256_DIGEST_SIZE 32 + +/* not defined in include/crypto/ */ +#define TGR128_DIGEST_SIZE 16 +#define TGR160_DIGEST_SIZE 20 +#define TGR192_DIGEST_SIZE 24 + +extern const char *const hash_algo_name[HASH_ALGO__LAST]; +extern const int hash_digest_size[HASH_ALGO__LAST]; + +#endif /* _CRYPTO_HASH_INFO_H */ diff --git a/include/crypto/public_key.h b/include/crypto/public_key.h index f5b0224..fc09732 100644 --- a/include/crypto/public_key.h +++ b/include/crypto/public_key.h @@ -15,6 +15,7 @@ #define _LINUX_PUBLIC_KEY_H #include <linux/mpi.h> +#include <crypto/hash_info.h> enum pkey_algo { PKEY_ALGO_DSA, @@ -22,21 +23,11 @@ enum pkey_algo { PKEY_ALGO__LAST }; -extern const char *const pkey_algo[PKEY_ALGO__LAST]; +extern const char *const pkey_algo_name[PKEY_ALGO__LAST]; +extern const struct public_key_algorithm *pkey_algo[PKEY_ALGO__LAST]; -enum pkey_hash_algo { - PKEY_HASH_MD4, - PKEY_HASH_MD5, - PKEY_HASH_SHA1, - PKEY_HASH_RIPE_MD_160, - PKEY_HASH_SHA256, - PKEY_HASH_SHA384, - PKEY_HASH_SHA512, - PKEY_HASH_SHA224, - PKEY_HASH__LAST -}; - -extern const char *const pkey_hash_algo[PKEY_HASH__LAST]; +/* asymmetric key implementation supports only up to SHA224 */ +#define PKEY_HASH__LAST (HASH_ALGO_SHA224 + 1) enum pkey_id_type { PKEY_ID_PGP, /* OpenPGP generated key ID */ @@ -44,7 +35,7 @@ enum pkey_id_type { PKEY_ID_TYPE__LAST }; -extern const char *const pkey_id_type[PKEY_ID_TYPE__LAST]; +extern const char *const pkey_id_type_name[PKEY_ID_TYPE__LAST]; /* * Cryptographic data for the public-key subtype of the asymmetric key type. @@ -59,6 +50,7 @@ struct public_key { #define PKEY_CAN_DECRYPT 0x02 #define PKEY_CAN_SIGN 0x04 #define PKEY_CAN_VERIFY 0x08 + enum pkey_algo pkey_algo : 8; enum pkey_id_type id_type : 8; union { MPI mpi[5]; @@ -88,7 +80,8 @@ struct public_key_signature { u8 *digest; u8 digest_size; /* Number of bytes in digest */ u8 nr_mpi; /* Occupancy of mpi[] */ - enum pkey_hash_algo pkey_hash_algo : 8; + enum pkey_algo pkey_algo : 8; + enum hash_algo pkey_hash_algo : 8; union { MPI mpi[2]; struct { diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index 751eaff..ee127ec 100644 --- a/include/drm/ttm/ttm_bo_api.h +++ b/include/drm/ttm/ttm_bo_api.h @@ -169,6 +169,7 @@ struct ttm_tt; * @offset: The current GPU offset, which can have different meanings * depending on the memory type. For SYSTEM type memory, it should be 0. * @cur_placement: Hint of current placement. + * @wu_mutex: Wait unreserved mutex. * * Base class for TTM buffer object, that deals with data placement and CPU * mappings. GPU mappings are really up to the driver, but for simpler GPUs @@ -250,6 +251,7 @@ struct ttm_buffer_object { struct reservation_object *resv; struct reservation_object ttm_resv; + struct mutex wu_mutex; }; /** @@ -702,5 +704,5 @@ extern ssize_t ttm_bo_io(struct ttm_bo_device *bdev, struct file *filp, size_t count, loff_t *f_pos, bool write); extern void ttm_bo_swapout_all(struct ttm_bo_device *bdev); - +extern int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo); #endif diff --git a/include/drm/ttm/ttm_execbuf_util.h b/include/drm/ttm/ttm_execbuf_util.h index ec8a1d3..16db7d0 100644 --- a/include/drm/ttm/ttm_execbuf_util.h +++ b/include/drm/ttm/ttm_execbuf_util.h @@ -70,7 +70,8 @@ extern void ttm_eu_backoff_reservation(struct ww_acquire_ctx *ticket, /** * function ttm_eu_reserve_buffers * - * @ticket: [out] ww_acquire_ctx returned by call. + * @ticket: [out] ww_acquire_ctx filled in by call, or NULL if only + * non-blocking reserves should be tried. * @list: thread private list of ttm_validate_buffer structs. * * Tries to reserve bos pointed to by the list entries for validation. diff --git a/include/drm/ttm/ttm_object.h b/include/drm/ttm/ttm_object.h index fc0cf06..58b0298 100644 --- a/include/drm/ttm/ttm_object.h +++ b/include/drm/ttm/ttm_object.h @@ -41,6 +41,7 @@ #include <drm/drm_hashtab.h> #include <linux/kref.h> #include <linux/rcupdate.h> +#include <linux/dma-buf.h> #include <ttm/ttm_memory.h> /** @@ -77,6 +78,7 @@ enum ttm_object_type { ttm_fence_type, ttm_buffer_type, ttm_lock_type, + ttm_prime_type, ttm_driver_type0 = 256, ttm_driver_type1, ttm_driver_type2, @@ -132,6 +134,30 @@ struct ttm_base_object { enum ttm_ref_type ref_type); }; + +/** + * struct ttm_prime_object - Modified base object that is prime-aware + * + * @base: struct ttm_base_object that we derive from + * @mutex: Mutex protecting the @dma_buf member. + * @size: Size of the dma_buf associated with this object + * @real_type: Type of the underlying object. Needed since we're setting + * the value of @base::object_type to ttm_prime_type + * @dma_buf: Non ref-coutned pointer to a struct dma_buf created from this + * object. + * @refcount_release: The underlying object's release method. Needed since + * we set @base::refcount_release to our own release method. + */ + +struct ttm_prime_object { + struct ttm_base_object base; + struct mutex mutex; + size_t size; + enum ttm_object_type real_type; + struct dma_buf *dma_buf; + void (*refcount_release) (struct ttm_base_object **); +}; + /** * ttm_base_object_init * @@ -248,14 +274,18 @@ extern void ttm_object_file_release(struct ttm_object_file **p_tfile); /** * ttm_object device init - initialize a struct ttm_object_device * + * @mem_glob: struct ttm_mem_global for memory accounting. * @hash_order: Order of hash table used to hash the base objects. + * @ops: DMA buf ops for prime objects of this device. * * This function is typically called on device initialization to prepare * data structures needed for ttm base and ref objects. */ -extern struct ttm_object_device *ttm_object_device_init - (struct ttm_mem_global *mem_glob, unsigned int hash_order); +extern struct ttm_object_device * +ttm_object_device_init(struct ttm_mem_global *mem_glob, + unsigned int hash_order, + const struct dma_buf_ops *ops); /** * ttm_object_device_release - release data held by a ttm_object_device @@ -272,4 +302,31 @@ extern void ttm_object_device_release(struct ttm_object_device **p_tdev); #define ttm_base_object_kfree(__object, __base)\ kfree_rcu(__object, __base.rhead) + +extern int ttm_prime_object_init(struct ttm_object_file *tfile, + size_t size, + struct ttm_prime_object *prime, + bool shareable, + enum ttm_object_type type, + void (*refcount_release) + (struct ttm_base_object **), + void (*ref_obj_release) + (struct ttm_base_object *, + enum ttm_ref_type ref_type)); + +static inline enum ttm_object_type +ttm_base_object_type(struct ttm_base_object *base) +{ + return (base->object_type == ttm_prime_type) ? + container_of(base, struct ttm_prime_object, base)->real_type : + base->object_type; +} +extern int ttm_prime_fd_to_handle(struct ttm_object_file *tfile, + int fd, u32 *handle); +extern int ttm_prime_handle_to_fd(struct ttm_object_file *tfile, + uint32_t handle, uint32_t flags, + int *prime_fd); + +#define ttm_prime_object_kfree(__obj, __prime) \ + kfree_rcu(__obj, __prime.base.rhead) #endif diff --git a/include/keys/big_key-type.h b/include/keys/big_key-type.h new file mode 100644 index 0000000..d69bc8a --- /dev/null +++ b/include/keys/big_key-type.h @@ -0,0 +1,25 @@ +/* Big capacity key type. + * + * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _KEYS_BIG_KEY_TYPE_H +#define _KEYS_BIG_KEY_TYPE_H + +#include <linux/key-type.h> + +extern struct key_type key_type_big_key; + +extern int big_key_instantiate(struct key *key, struct key_preparsed_payload *prep); +extern void big_key_revoke(struct key *key); +extern void big_key_destroy(struct key *key); +extern void big_key_describe(const struct key *big_key, struct seq_file *m); +extern long big_key_read(const struct key *key, char __user *buffer, size_t buflen); + +#endif /* _KEYS_BIG_KEY_TYPE_H */ diff --git a/include/keys/keyring-type.h b/include/keys/keyring-type.h index cf49159..fca5c62 100644 --- a/include/keys/keyring-type.h +++ b/include/keys/keyring-type.h @@ -1,6 +1,6 @@ /* Keyring key type * - * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2008, 2013 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * This program is free software; you can redistribute it and/or @@ -13,19 +13,6 @@ #define _KEYS_KEYRING_TYPE_H #include <linux/key.h> -#include <linux/rcupdate.h> - -/* - * the keyring payload contains a list of the keys to which the keyring is - * subscribed - */ -struct keyring_list { - struct rcu_head rcu; /* RCU deletion hook */ - unsigned short maxkeys; /* max keys this list can hold */ - unsigned short nkeys; /* number of keys currently held */ - unsigned short delkey; /* key to be unlinked by RCU */ - struct key __rcu *keys[0]; -}; - +#include <linux/assoc_array.h> #endif /* _KEYS_KEYRING_TYPE_H */ diff --git a/include/keys/system_keyring.h b/include/keys/system_keyring.h new file mode 100644 index 0000000..8dabc39 --- /dev/null +++ b/include/keys/system_keyring.h @@ -0,0 +1,23 @@ +/* System keyring containing trusted public keys. + * + * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#ifndef _KEYS_SYSTEM_KEYRING_H +#define _KEYS_SYSTEM_KEYRING_H + +#ifdef CONFIG_SYSTEM_TRUSTED_KEYRING + +#include <linux/key.h> + +extern struct key *system_trusted_keyring; + +#endif + +#endif /* _KEYS_SYSTEM_KEYRING_H */ diff --git a/include/linux/assoc_array.h b/include/linux/assoc_array.h new file mode 100644 index 0000000..9a193b8 --- /dev/null +++ b/include/linux/assoc_array.h @@ -0,0 +1,92 @@ +/* Generic associative array implementation. + * + * See Documentation/assoc_array.txt for information. + * + * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#ifndef _LINUX_ASSOC_ARRAY_H +#define _LINUX_ASSOC_ARRAY_H + +#ifdef CONFIG_ASSOCIATIVE_ARRAY + +#include <linux/types.h> + +#define ASSOC_ARRAY_KEY_CHUNK_SIZE BITS_PER_LONG /* Key data retrieved in chunks of this size */ + +/* + * Generic associative array. + */ +struct assoc_array { + struct assoc_array_ptr *root; /* The node at the root of the tree */ + unsigned long nr_leaves_on_tree; +}; + +/* + * Operations on objects and index keys for use by array manipulation routines. + */ +struct assoc_array_ops { + /* Method to get a chunk of an index key from caller-supplied data */ + unsigned long (*get_key_chunk)(const void *index_key, int level); + + /* Method to get a piece of an object's index key */ + unsigned long (*get_object_key_chunk)(const void *object, int level); + + /* Is this the object we're looking for? */ + bool (*compare_object)(const void *object, const void *index_key); + + /* How different are two objects, to a bit position in their keys? (or + * -1 if they're the same) + */ + int (*diff_objects)(const void *a, const void *b); + + /* Method to free an object. */ + void (*free_object)(void *object); +}; + +/* + * Access and manipulation functions. + */ +struct assoc_array_edit; + +static inline void assoc_array_init(struct assoc_array *array) +{ + array->root = NULL; + array->nr_leaves_on_tree = 0; +} + +extern int assoc_array_iterate(const struct assoc_array *array, + int (*iterator)(const void *object, + void *iterator_data), + void *iterator_data); +extern void *assoc_array_find(const struct assoc_array *array, + const struct assoc_array_ops *ops, + const void *index_key); +extern void assoc_array_destroy(struct assoc_array *array, + const struct assoc_array_ops *ops); +extern struct assoc_array_edit *assoc_array_insert(struct assoc_array *array, + const struct assoc_array_ops *ops, + const void *index_key, + void *object); +extern void assoc_array_insert_set_object(struct assoc_array_edit *edit, + void *object); +extern struct assoc_array_edit *assoc_array_delete(struct assoc_array *array, + const struct assoc_array_ops *ops, + const void *index_key); +extern struct assoc_array_edit *assoc_array_clear(struct assoc_array *array, + const struct assoc_array_ops *ops); +extern void assoc_array_apply_edit(struct assoc_array_edit *edit); +extern void assoc_array_cancel_edit(struct assoc_array_edit *edit); +extern int assoc_array_gc(struct assoc_array *array, + const struct assoc_array_ops *ops, + bool (*iterator)(void *object, void *iterator_data), + void *iterator_data); + +#endif /* CONFIG_ASSOCIATIVE_ARRAY */ +#endif /* _LINUX_ASSOC_ARRAY_H */ diff --git a/include/linux/assoc_array_priv.h b/include/linux/assoc_array_priv.h new file mode 100644 index 0000000..711275e --- /dev/null +++ b/include/linux/assoc_array_priv.h @@ -0,0 +1,182 @@ +/* Private definitions for the generic associative array implementation. + * + * See Documentation/assoc_array.txt for information. + * + * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#ifndef _LINUX_ASSOC_ARRAY_PRIV_H +#define _LINUX_ASSOC_ARRAY_PRIV_H + +#ifdef CONFIG_ASSOCIATIVE_ARRAY + +#include <linux/assoc_array.h> + +#define ASSOC_ARRAY_FAN_OUT 16 /* Number of slots per node */ +#define ASSOC_ARRAY_FAN_MASK (ASSOC_ARRAY_FAN_OUT - 1) +#define ASSOC_ARRAY_LEVEL_STEP (ilog2(ASSOC_ARRAY_FAN_OUT)) +#define ASSOC_ARRAY_LEVEL_STEP_MASK (ASSOC_ARRAY_LEVEL_STEP - 1) +#define ASSOC_ARRAY_KEY_CHUNK_MASK (ASSOC_ARRAY_KEY_CHUNK_SIZE - 1) +#define ASSOC_ARRAY_KEY_CHUNK_SHIFT (ilog2(BITS_PER_LONG)) + +/* + * Undefined type representing a pointer with type information in the bottom + * two bits. + */ +struct assoc_array_ptr; + +/* + * An N-way node in the tree. + * + * Each slot contains one of four things: + * + * (1) Nothing (NULL). + * + * (2) A leaf object (pointer types 0). + * + * (3) A next-level node (pointer type 1, subtype 0). + * + * (4) A shortcut (pointer type 1, subtype 1). + * + * The tree is optimised for search-by-ID, but permits reasonable iteration + * also. + * + * The tree is navigated by constructing an index key consisting of an array of + * segments, where each segment is ilog2(ASSOC_ARRAY_FAN_OUT) bits in size. + * + * The segments correspond to levels of the tree (the first segment is used at + * level 0, the second at level 1, etc.). + */ +struct assoc_array_node { + struct assoc_array_ptr *back_pointer; + u8 parent_slot; + struct assoc_array_ptr *slots[ASSOC_ARRAY_FAN_OUT]; + unsigned long nr_leaves_on_branch; +}; + +/* + * A shortcut through the index space out to where a collection of nodes/leaves + * with the same IDs live. + */ +struct assoc_array_shortcut { + struct assoc_array_ptr *back_pointer; + int parent_slot; + int skip_to_level; + struct assoc_array_ptr *next_node; + unsigned long index_key[]; +}; + +/* + * Preallocation cache. + */ +struct assoc_array_edit { + struct rcu_head rcu; + struct assoc_array *array; + const struct assoc_array_ops *ops; + const struct assoc_array_ops *ops_for_excised_subtree; + struct assoc_array_ptr *leaf; + struct assoc_array_ptr **leaf_p; + struct assoc_array_ptr *dead_leaf; + struct assoc_array_ptr *new_meta[3]; + struct assoc_array_ptr *excised_meta[1]; + struct assoc_array_ptr *excised_subtree; + struct assoc_array_ptr **set_backpointers[ASSOC_ARRAY_FAN_OUT]; + struct assoc_array_ptr *set_backpointers_to; + struct assoc_array_node *adjust_count_on; + long adjust_count_by; + struct { + struct assoc_array_ptr **ptr; + struct assoc_array_ptr *to; + } set[2]; + struct { + u8 *p; + u8 to; + } set_parent_slot[1]; + u8 segment_cache[ASSOC_ARRAY_FAN_OUT + 1]; +}; + +/* + * Internal tree member pointers are marked in the bottom one or two bits to + * indicate what type they are so that we don't have to look behind every + * pointer to see what it points to. + * + * We provide functions to test type annotations and to create and translate + * the annotated pointers. + */ +#define ASSOC_ARRAY_PTR_TYPE_MASK 0x1UL +#define ASSOC_ARRAY_PTR_LEAF_TYPE 0x0UL /* Points to leaf (or nowhere) */ +#define ASSOC_ARRAY_PTR_META_TYPE 0x1UL /* Points to node or shortcut */ +#define ASSOC_ARRAY_PTR_SUBTYPE_MASK 0x2UL +#define ASSOC_ARRAY_PTR_NODE_SUBTYPE 0x0UL +#define ASSOC_ARRAY_PTR_SHORTCUT_SUBTYPE 0x2UL + +static inline bool assoc_array_ptr_is_meta(const struct assoc_array_ptr *x) +{ + return (unsigned long)x & ASSOC_ARRAY_PTR_TYPE_MASK; +} +static inline bool assoc_array_ptr_is_leaf(const struct assoc_array_ptr *x) +{ + return !assoc_array_ptr_is_meta(x); +} +static inline bool assoc_array_ptr_is_shortcut(const struct assoc_array_ptr *x) +{ + return (unsigned long)x & ASSOC_ARRAY_PTR_SUBTYPE_MASK; +} +static inline bool assoc_array_ptr_is_node(const struct assoc_array_ptr *x) +{ + return !assoc_array_ptr_is_shortcut(x); +} + +static inline void *assoc_array_ptr_to_leaf(const struct assoc_array_ptr *x) +{ + return (void *)((unsigned long)x & ~ASSOC_ARRAY_PTR_TYPE_MASK); +} + +static inline +unsigned long __assoc_array_ptr_to_meta(const struct assoc_array_ptr *x) +{ + return (unsigned long)x & + ~(ASSOC_ARRAY_PTR_SUBTYPE_MASK | ASSOC_ARRAY_PTR_TYPE_MASK); +} +static inline +struct assoc_array_node *assoc_array_ptr_to_node(const struct assoc_array_ptr *x) +{ + return (struct assoc_array_node *)__assoc_array_ptr_to_meta(x); +} +static inline +struct assoc_array_shortcut *assoc_array_ptr_to_shortcut(const struct assoc_array_ptr *x) +{ + return (struct assoc_array_shortcut *)__assoc_array_ptr_to_meta(x); +} + +static inline +struct assoc_array_ptr *__assoc_array_x_to_ptr(const void *p, unsigned long t) +{ + return (struct assoc_array_ptr *)((unsigned long)p | t); +} +static inline +struct assoc_array_ptr *assoc_array_leaf_to_ptr(const void *p) +{ + return __assoc_array_x_to_ptr(p, ASSOC_ARRAY_PTR_LEAF_TYPE); +} +static inline +struct assoc_array_ptr *assoc_array_node_to_ptr(const struct assoc_array_node *p) +{ + return __assoc_array_x_to_ptr( + p, ASSOC_ARRAY_PTR_META_TYPE | ASSOC_ARRAY_PTR_NODE_SUBTYPE); +} +static inline +struct assoc_array_ptr *assoc_array_shortcut_to_ptr(const struct assoc_array_shortcut *p) +{ + return __assoc_array_x_to_ptr( + p, ASSOC_ARRAY_PTR_META_TYPE | ASSOC_ARRAY_PTR_SHORTCUT_SUBTYPE); +} + +#endif /* CONFIG_ASSOCIATIVE_ARRAY */ +#endif /* _LINUX_ASSOC_ARRAY_PRIV_H */ diff --git a/include/linux/audit.h b/include/linux/audit.h index 729a4d1..a406419 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -73,6 +73,8 @@ struct audit_field { void *lsm_rule; }; +extern int is_audit_feature_set(int which); + extern int __init audit_register_class(int class, unsigned *list); extern int audit_classify_syscall(int abi, unsigned syscall); extern int audit_classify_arch(int arch); @@ -207,7 +209,7 @@ static inline int audit_get_sessionid(struct task_struct *tsk) extern void __audit_ipc_obj(struct kern_ipc_perm *ipcp); extern void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, umode_t mode); -extern int __audit_bprm(struct linux_binprm *bprm); +extern void __audit_bprm(struct linux_binprm *bprm); extern int __audit_socketcall(int nargs, unsigned long *args); extern int __audit_sockaddr(int len, void *addr); extern void __audit_fd_pair(int fd1, int fd2); @@ -236,11 +238,10 @@ static inline void audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid if (unlikely(!audit_dummy_context())) __audit_ipc_set_perm(qbytes, uid, gid, mode); } -static inline int audit_bprm(struct linux_binprm *bprm) +static inline void audit_bprm(struct linux_binprm *bprm) { if (unlikely(!audit_dummy_context())) - return __audit_bprm(bprm); - return 0; + __audit_bprm(bprm); } static inline int audit_socketcall(int nargs, unsigned long *args) { @@ -367,10 +368,8 @@ static inline void audit_ipc_obj(struct kern_ipc_perm *ipcp) static inline void audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, umode_t mode) { } -static inline int audit_bprm(struct linux_binprm *bprm) -{ - return 0; -} +static inline void audit_bprm(struct linux_binprm *bprm) +{ } static inline int audit_socketcall(int nargs, unsigned long *args) { return 0; diff --git a/include/linux/fs.h b/include/linux/fs.h index bf5d574..121f11f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2622,7 +2622,9 @@ extern int simple_write_begin(struct file *file, struct address_space *mapping, extern int simple_write_end(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, struct page *page, void *fsdata); +extern int always_delete_dentry(const struct dentry *); extern struct inode *alloc_anon_inode(struct super_block *); +extern const struct dentry_operations simple_dentry_operations; extern struct dentry *simple_lookup(struct inode *, struct dentry *, unsigned int flags); extern ssize_t generic_read_dir(struct file *, char __user *, size_t, loff_t *); diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index acd2010..9649ff0 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -31,6 +31,7 @@ struct hugepage_subpool *hugepage_new_subpool(long nr_blocks); void hugepage_put_subpool(struct hugepage_subpool *spool); int PageHuge(struct page *page); +int PageHeadHuge(struct page *page_head); void reset_vma_resv_huge_pages(struct vm_area_struct *vma); int hugetlb_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); @@ -69,7 +70,6 @@ int dequeue_hwpoisoned_huge_page(struct page *page); bool isolate_huge_page(struct page *page, struct list_head *list); void putback_active_hugepage(struct page *page); bool is_hugepage_active(struct page *page); -void copy_huge_page(struct page *dst, struct page *src); #ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud); @@ -104,6 +104,11 @@ static inline int PageHuge(struct page *page) return 0; } +static inline int PageHeadHuge(struct page *page_head) +{ + return 0; +} + static inline void reset_vma_resv_huge_pages(struct vm_area_struct *vma) { } @@ -140,9 +145,6 @@ static inline int dequeue_hwpoisoned_huge_page(struct page *page) #define isolate_huge_page(p, l) false #define putback_active_hugepage(p) do {} while (0) #define is_hugepage_active(x) false -static inline void copy_huge_page(struct page *dst, struct page *src) -{ -} static inline unsigned long hugetlb_change_protection(struct vm_area_struct *vma, unsigned long address, unsigned long end, pgprot_t newprot) diff --git a/include/linux/key-type.h b/include/linux/key-type.h index 518a53a..a74c3a8 100644 --- a/include/linux/key-type.h +++ b/include/linux/key-type.h @@ -45,6 +45,7 @@ struct key_preparsed_payload { const void *data; /* Raw data */ size_t datalen; /* Raw datalen */ size_t quotalen; /* Quota length for proposed payload */ + bool trusted; /* True if key is trusted */ }; typedef int (*request_key_actor_t)(struct key_construction *key, @@ -63,6 +64,11 @@ struct key_type { */ size_t def_datalen; + /* Default key search algorithm. */ + unsigned def_lookup_type; +#define KEYRING_SEARCH_LOOKUP_DIRECT 0x0000 /* Direct lookup by description. */ +#define KEYRING_SEARCH_LOOKUP_ITERATE 0x0001 /* Iterative search. */ + /* vet a description */ int (*vet_description)(const char *description); diff --git a/include/linux/key.h b/include/linux/key.h index 4dfde11..80d6774 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -22,6 +22,7 @@ #include <linux/sysctl.h> #include <linux/rwsem.h> #include <linux/atomic.h> +#include <linux/assoc_array.h> #ifdef __KERNEL__ #include <linux/uidgid.h> @@ -82,6 +83,12 @@ struct key_owner; struct keyring_list; struct keyring_name; +struct keyring_index_key { + struct key_type *type; + const char *description; + size_t desc_len; +}; + /*****************************************************************************/ /* * key reference with possession attribute handling @@ -99,7 +106,7 @@ struct keyring_name; typedef struct __key_reference_with_attributes *key_ref_t; static inline key_ref_t make_key_ref(const struct key *key, - unsigned long possession) + bool possession) { return (key_ref_t) ((unsigned long) key | possession); } @@ -109,7 +116,7 @@ static inline struct key *key_ref_to_ptr(const key_ref_t key_ref) return (struct key *) ((unsigned long) key_ref & ~1UL); } -static inline unsigned long is_key_possessed(const key_ref_t key_ref) +static inline bool is_key_possessed(const key_ref_t key_ref) { return (unsigned long) key_ref & 1UL; } @@ -129,7 +136,6 @@ struct key { struct list_head graveyard_link; struct rb_node serial_node; }; - struct key_type *type; /* type of key */ struct rw_semaphore sem; /* change vs change sem */ struct key_user *user; /* owner of this key */ void *security; /* security data for this key */ @@ -162,13 +168,21 @@ struct key { #define KEY_FLAG_NEGATIVE 5 /* set if key is negative */ #define KEY_FLAG_ROOT_CAN_CLEAR 6 /* set if key can be cleared by root without permission */ #define KEY_FLAG_INVALIDATED 7 /* set if key has been invalidated */ +#define KEY_FLAG_TRUSTED 8 /* set if key is trusted */ +#define KEY_FLAG_TRUSTED_ONLY 9 /* set if keyring only accepts links to trusted keys */ - /* the description string - * - this is used to match a key against search criteria - * - this should be a printable string + /* the key type and key description string + * - the desc is used to match a key against search criteria + * - it should be a printable string * - eg: for krb5 AFS, this might be "afs@REDHAT.COM" */ - char *description; + union { + struct keyring_index_key index_key; + struct { + struct key_type *type; /* type of key */ + char *description; + }; + }; /* type specific data * - this is used by the keyring type to index the name @@ -185,11 +199,14 @@ struct key { * whatever */ union { - unsigned long value; - void __rcu *rcudata; - void *data; - struct keyring_list __rcu *subscriptions; - } payload; + union { + unsigned long value; + void __rcu *rcudata; + void *data; + void *data2[2]; + } payload; + struct assoc_array keys; + }; }; extern struct key *key_alloc(struct key_type *type, @@ -203,18 +220,23 @@ extern struct key *key_alloc(struct key_type *type, #define KEY_ALLOC_IN_QUOTA 0x0000 /* add to quota, reject if would overrun */ #define KEY_ALLOC_QUOTA_OVERRUN 0x0001 /* add to quota, permit even if overrun */ #define KEY_ALLOC_NOT_IN_QUOTA 0x0002 /* not in quota */ +#define KEY_ALLOC_TRUSTED 0x0004 /* Key should be flagged as trusted */ extern void key_revoke(struct key *key); extern void key_invalidate(struct key *key); extern void key_put(struct key *key); -static inline struct key *key_get(struct key *key) +static inline struct key *__key_get(struct key *key) { - if (key) - atomic_inc(&key->usage); + atomic_inc(&key->usage); return key; } +static inline struct key *key_get(struct key *key) +{ + return key ? __key_get(key) : key; +} + static inline void key_ref_put(key_ref_t key_ref) { key_put(key_ref_to_ptr(key_ref)); diff --git a/include/linux/mm.h b/include/linux/mm.h index 0548eb2..1cedd00 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1318,7 +1318,6 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long a #if USE_SPLIT_PTE_PTLOCKS #if BLOATED_SPINLOCKS -void __init ptlock_cache_init(void); extern bool ptlock_alloc(struct page *page); extern void ptlock_free(struct page *page); @@ -1327,7 +1326,6 @@ static inline spinlock_t *ptlock_ptr(struct page *page) return page->ptl; } #else /* BLOATED_SPINLOCKS */ -static inline void ptlock_cache_init(void) {} static inline bool ptlock_alloc(struct page *page) { return true; @@ -1380,17 +1378,10 @@ static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd) { return &mm->page_table_lock; } -static inline void ptlock_cache_init(void) {} static inline bool ptlock_init(struct page *page) { return true; } static inline void pte_lock_deinit(struct page *page) {} #endif /* USE_SPLIT_PTE_PTLOCKS */ -static inline void pgtable_init(void) -{ - ptlock_cache_init(); - pgtable_cache_init(); -} - static inline bool pgtable_page_ctor(struct page *page) { inc_zone_page_state(page, NR_PAGETABLE); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 10f5a72..bd29941 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -44,18 +44,22 @@ struct page { /* First double word block */ unsigned long flags; /* Atomic flags, some possibly * updated asynchronously */ - struct address_space *mapping; /* If low bit clear, points to - * inode address_space, or NULL. - * If page mapped as anonymous - * memory, low bit is set, and - * it points to anon_vma object: - * see PAGE_MAPPING_ANON below. - */ + union { + struct address_space *mapping; /* If low bit clear, points to + * inode address_space, or NULL. + * If page mapped as anonymous + * memory, low bit is set, and + * it points to anon_vma object: + * see PAGE_MAPPING_ANON below. + */ + void *s_mem; /* slab first object */ + }; + /* Second double word */ struct { union { pgoff_t index; /* Our offset within mapping. */ - void *freelist; /* slub/slob first free object */ + void *freelist; /* sl[aou]b first free object */ bool pfmemalloc; /* If set by the page allocator, * ALLOC_NO_WATERMARKS was set * and the low watermark was not @@ -65,9 +69,6 @@ struct page { * this page is only used to * free other pages. */ -#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && USE_SPLIT_PMD_PTLOCKS - pgtable_t pmd_huge_pte; /* protected by page->ptl */ -#endif }; union { @@ -114,6 +115,7 @@ struct page { }; atomic_t _count; /* Usage count, see below. */ }; + unsigned int active; /* SLAB */ }; }; @@ -135,6 +137,12 @@ struct page { struct list_head list; /* slobs list of pages */ struct slab *slab_page; /* slab fields */ + struct rcu_head rcu_head; /* Used by SLAB + * when destroying via RCU + */ +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && USE_SPLIT_PMD_PTLOCKS + pgtable_t pmd_huge_pte; /* protected by page->ptl */ +#endif }; /* Remainder is not double word aligned */ diff --git a/include/linux/msi.h b/include/linux/msi.h index 87cce50..009b024 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -26,11 +26,11 @@ struct msi_desc { struct { __u8 is_msix : 1; __u8 multiple: 3; /* log2 number of messages */ - __u8 maskbit : 1; /* mask-pending bit supported ? */ - __u8 is_64 : 1; /* Address size: 0=32bit 1=64bit */ - __u8 pos; /* Location of the msi capability */ - __u16 entry_nr; /* specific enabled entry */ - unsigned default_irq; /* default pre-assigned irq */ + __u8 maskbit : 1; /* mask-pending bit supported ? */ + __u8 is_64 : 1; /* Address size: 0=32bit 1=64bit */ + __u8 pos; /* Location of the msi capability */ + __u16 entry_nr; /* specific enabled entry */ + unsigned default_irq; /* default pre-assigned irq */ } msi_attrib; u32 masked; /* mask bits */ diff --git a/include/linux/net.h b/include/linux/net.h index b292a04..4bcee94 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -164,6 +164,14 @@ struct proto_ops { #endif int (*sendmsg) (struct kiocb *iocb, struct socket *sock, struct msghdr *m, size_t total_len); + /* Notes for implementing recvmsg: + * =============================== + * msg->msg_namelen should get updated by the recvmsg handlers + * iff msg_name != NULL. It is by default 0 to prevent + * returning uninitialized memory to user space. The recvfrom + * handlers can assume that msg.msg_name is either NULL or has + * a minimum size of sizeof(struct sockaddr_storage). + */ int (*recvmsg) (struct kiocb *iocb, struct socket *sock, struct msghdr *m, size_t total_len, int flags); diff --git a/include/linux/pci.h b/include/linux/pci.h index 835ec7b..1084a15 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -32,7 +32,6 @@ #include <linux/irqreturn.h> #include <uapi/linux/pci.h> -/* Include the ID list */ #include <linux/pci_ids.h> /* @@ -42,9 +41,10 @@ * * 7:3 = slot * 2:0 = function - * PCI_DEVFN(), PCI_SLOT(), and PCI_FUNC() are defined uapi/linux/pci.h + * + * PCI_DEVFN(), PCI_SLOT(), and PCI_FUNC() are defined in uapi/linux/pci.h. * In the interest of not exposing interfaces to user-space unnecessarily, - * the following kernel only defines are being added here. + * the following kernel-only defines are being added here. */ #define PCI_DEVID(bus, devfn) ((((u16)bus) << 8) | devfn) /* return bus from PCI devid = ((u16)bus_number) << 8) | devfn */ @@ -153,10 +153,10 @@ enum pcie_reset_state { /* Reset is NOT asserted (Use to deassert reset) */ pcie_deassert_reset = (__force pcie_reset_state_t) 1, - /* Use #PERST to reset PCI-E device */ + /* Use #PERST to reset PCIe device */ pcie_warm_reset = (__force pcie_reset_state_t) 2, - /* Use PCI-E Hot Reset to reset device */ + /* Use PCIe Hot Reset to reset device */ pcie_hot_reset = (__force pcie_reset_state_t) 3 }; @@ -259,13 +259,13 @@ struct pci_dev { unsigned int class; /* 3 bytes: (base,sub,prog-if) */ u8 revision; /* PCI revision, low byte of class word */ u8 hdr_type; /* PCI header type (`multi' flag masked out) */ - u8 pcie_cap; /* PCI-E capability offset */ + u8 pcie_cap; /* PCIe capability offset */ u8 msi_cap; /* MSI capability offset */ u8 msix_cap; /* MSI-X capability offset */ - u8 pcie_mpss:3; /* PCI-E Max Payload Size Supported */ + u8 pcie_mpss:3; /* PCIe Max Payload Size Supported */ u8 rom_base_reg; /* which config register controls the ROM */ - u8 pin; /* which interrupt pin this device uses */ - u16 pcie_flags_reg; /* cached PCI-E Capabilities Register */ + u8 pin; /* which interrupt pin this device uses */ + u16 pcie_flags_reg; /* cached PCIe Capabilities Register */ struct pci_driver *driver; /* which driver has allocated this device */ u64 dma_mask; /* Mask of the bits of bus address this @@ -300,7 +300,7 @@ struct pci_dev { unsigned int d3cold_delay; /* D3cold->D0 transition time in ms */ #ifdef CONFIG_PCIEASPM - struct pcie_link_state *link_state; /* ASPM link state. */ + struct pcie_link_state *link_state; /* ASPM link state */ #endif pci_channel_state_t error_state; /* current connectivity state */ @@ -317,7 +317,7 @@ struct pci_dev { bool match_driver; /* Skip attaching driver */ /* These fields are used by common fixups */ - unsigned int transparent:1; /* Transparent PCI bridge */ + unsigned int transparent:1; /* Subtractive decode PCI bridge */ unsigned int multifunction:1;/* Part of multi-function device */ /* keep track of device state */ unsigned int is_added:1; @@ -326,7 +326,7 @@ struct pci_dev { unsigned int block_cfg_access:1; /* config space access is blocked */ unsigned int broken_parity_status:1; /* Device generates false positive parity */ unsigned int irq_reroute_variant:2; /* device needs IRQ rerouting variant */ - unsigned int msi_enabled:1; + unsigned int msi_enabled:1; unsigned int msix_enabled:1; unsigned int ari_enabled:1; /* ARI forwarding */ unsigned int is_managed:1; @@ -371,7 +371,6 @@ static inline struct pci_dev *pci_physfn(struct pci_dev *dev) if (dev->is_virtfn) dev = dev->physfn; #endif - return dev; } @@ -456,7 +455,7 @@ struct pci_bus { char name[48]; unsigned short bridge_ctl; /* manage NO_ISA/FBB/et al behaviors */ - pci_bus_flags_t bus_flags; /* Inherited by child busses */ + pci_bus_flags_t bus_flags; /* inherited by child buses */ struct device *bridge; struct device dev; struct bin_attribute *legacy_io; /* legacy I/O for this bus */ @@ -468,7 +467,7 @@ struct pci_bus { #define to_pci_bus(n) container_of(n, struct pci_bus, dev) /* - * Returns true if the pci bus is root (behind host-pci bridge), + * Returns true if the PCI bus is root (behind host-PCI bridge), * false otherwise * * Some code assumes that "bus->self == NULL" means that bus is a root bus. @@ -510,7 +509,7 @@ static inline bool pci_dev_msi_enabled(struct pci_dev *pci_dev) { return false; #define PCIBIOS_BUFFER_TOO_SMALL 0x89 /* - * Translate above to generic errno for passing back through non-pci. + * Translate above to generic errno for passing back through non-PCI code. */ static inline int pcibios_err_to_errno(int err) { @@ -561,11 +560,12 @@ struct pci_dynids { struct list_head list; /* for IDs added at runtime */ }; -/* ---------------------------------------------------------------- */ -/** PCI Error Recovery System (PCI-ERS). If a PCI device driver provides - * a set of callbacks in struct pci_error_handlers, then that device driver - * will be notified of PCI bus errors, and will be driven to recovery - * when an error occurs. + +/* + * PCI Error Recovery System (PCI-ERS). If a PCI device driver provides + * a set of callbacks in struct pci_error_handlers, that device driver + * will be notified of PCI bus errors, and will be driven to recovery + * when an error occurs. */ typedef unsigned int __bitwise pci_ers_result_t; @@ -609,7 +609,6 @@ struct pci_error_handlers { void (*resume)(struct pci_dev *dev); }; -/* ---------------------------------------------------------------- */ struct module; struct pci_driver { @@ -713,10 +712,10 @@ extern enum pcie_bus_config_types pcie_bus_config; extern struct bus_type pci_bus_type; -/* Do NOT directly access these two variables, unless you are arch specific pci - * code, or pci core code. */ +/* Do NOT directly access these two variables, unless you are arch-specific PCI + * code, or PCI core code. */ extern struct list_head pci_root_buses; /* list of all known PCI buses */ -/* Some device drivers need know if pci is initiated */ +/* Some device drivers need know if PCI is initiated */ int no_pci_devices(void); void pcibios_resource_survey_bus(struct pci_bus *bus); @@ -724,7 +723,7 @@ void pcibios_add_bus(struct pci_bus *bus); void pcibios_remove_bus(struct pci_bus *bus); void pcibios_fixup_bus(struct pci_bus *); int __must_check pcibios_enable_device(struct pci_dev *, int mask); -/* Architecture specific versions may override this (weak) */ +/* Architecture-specific versions may override this (weak) */ char *pcibios_setup(char *str); /* Used only when drivers/pci/setup.c is used */ @@ -1258,7 +1257,7 @@ void pci_cfg_access_unlock(struct pci_dev *dev); /* * PCI domain support. Sometimes called PCI segment (eg by ACPI), - * a PCI domain is defined to be a set of PCI busses which share + * a PCI domain is defined to be a set of PCI buses which share * configuration space. */ #ifdef CONFIG_PCI_DOMAINS @@ -1672,7 +1671,7 @@ extern u8 pci_cache_line_size; extern unsigned long pci_hotplug_io_size; extern unsigned long pci_hotplug_mem_size; -/* Architecture specific versions may override these (weak) */ +/* Architecture-specific versions may override these (weak) */ int pcibios_add_platform_entries(struct pci_dev *dev); void pcibios_disable_device(struct pci_dev *dev); void pcibios_set_master(struct pci_dev *dev); diff --git a/include/linux/pci_hotplug.h b/include/linux/pci_hotplug.h index 430dd96..a2e2f1d 100644 --- a/include/linux/pci_hotplug.h +++ b/include/linux/pci_hotplug.h @@ -39,8 +39,8 @@ * @hardware_test: Called to run a specified hardware test on the specified * slot. * @get_power_status: Called to get the current power status of a slot. - * If this field is NULL, the value passed in the struct hotplug_slot_info - * will be used when this value is requested by a user. + * If this field is NULL, the value passed in the struct hotplug_slot_info + * will be used when this value is requested by a user. * @get_attention_status: Called to get the current attention status of a slot. * If this field is NULL, the value passed in the struct hotplug_slot_info * will be used when this value is requested by a user. @@ -191,4 +191,3 @@ static inline int pci_get_hp_params(struct pci_dev *dev, void pci_configure_slot(struct pci_dev *dev); #endif - diff --git a/include/linux/pcieport_if.h b/include/linux/pcieport_if.h index 9572669..4f1089f 100644 --- a/include/linux/pcieport_if.h +++ b/include/linux/pcieport_if.h @@ -23,7 +23,7 @@ #define PCIE_PORT_SERVICE_VC (1 << PCIE_PORT_SERVICE_VC_SHIFT) struct pcie_device { - int irq; /* Service IRQ/MSI/MSI-X Vector */ + int irq; /* Service IRQ/MSI/MSI-X Vector */ struct pci_dev *port; /* Root/Upstream/Downstream Port */ u32 service; /* Port service this device represents */ void *priv_data; /* Service Private Data */ diff --git a/include/linux/phy.h b/include/linux/phy.h index 64ab823..48a4dc3 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -559,6 +559,7 @@ static inline int phy_read_status(struct phy_device *phydev) { return phydev->drv->read_status(phydev); } +int genphy_setup_forced(struct phy_device *phydev); int genphy_restart_aneg(struct phy_device *phydev); int genphy_config_aneg(struct phy_device *phydev); int genphy_update_link(struct phy_device *phydev); diff --git a/include/linux/security.h b/include/linux/security.h index 9d37e2b..5623a7f 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1052,17 +1052,25 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @xfrm_policy_delete_security: * @ctx contains the xfrm_sec_ctx. * Authorize deletion of xp->security. - * @xfrm_state_alloc_security: + * @xfrm_state_alloc: * @x contains the xfrm_state being added to the Security Association * Database by the XFRM system. * @sec_ctx contains the security context information being provided by * the user-level SA generation program (e.g., setkey or racoon). - * @secid contains the secid from which to take the mls portion of the context. * Allocate a security structure to the x->security field; the security * field is initialized to NULL when the xfrm_state is allocated. Set the - * context to correspond to either sec_ctx or polsec, with the mls portion - * taken from secid in the latter case. - * Return 0 if operation was successful (memory to allocate, legal context). + * context to correspond to sec_ctx. Return 0 if operation was successful + * (memory to allocate, legal context). + * @xfrm_state_alloc_acquire: + * @x contains the xfrm_state being added to the Security Association + * Database by the XFRM system. + * @polsec contains the policy's security context. + * @secid contains the secid from which to take the mls portion of the + * context. + * Allocate a security structure to the x->security field; the security + * field is initialized to NULL when the xfrm_state is allocated. Set the + * context to correspond to secid. Return 0 if operation was successful + * (memory to allocate, legal context). * @xfrm_state_free_security: * @x contains the xfrm_state. * Deallocate x->security. @@ -1679,9 +1687,11 @@ struct security_operations { int (*xfrm_policy_clone_security) (struct xfrm_sec_ctx *old_ctx, struct xfrm_sec_ctx **new_ctx); void (*xfrm_policy_free_security) (struct xfrm_sec_ctx *ctx); int (*xfrm_policy_delete_security) (struct xfrm_sec_ctx *ctx); - int (*xfrm_state_alloc_security) (struct xfrm_state *x, - struct xfrm_user_sec_ctx *sec_ctx, - u32 secid); + int (*xfrm_state_alloc) (struct xfrm_state *x, + struct xfrm_user_sec_ctx *sec_ctx); + int (*xfrm_state_alloc_acquire) (struct xfrm_state *x, + struct xfrm_sec_ctx *polsec, + u32 secid); void (*xfrm_state_free_security) (struct xfrm_state *x); int (*xfrm_state_delete_security) (struct xfrm_state *x); int (*xfrm_policy_lookup) (struct xfrm_sec_ctx *ctx, u32 fl_secid, u8 dir); diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 1e8a8b6..cf87a24 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -354,6 +354,35 @@ static inline void read_sequnlock_excl(seqlock_t *sl) spin_unlock(&sl->lock); } +/** + * read_seqbegin_or_lock - begin a sequence number check or locking block + * @lock: sequence lock + * @seq : sequence number to be checked + * + * First try it once optimistically without taking the lock. If that fails, + * take the lock. The sequence number is also used as a marker for deciding + * whether to be a reader (even) or writer (odd). + * N.B. seq must be initialized to an even number to begin with. + */ +static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq) +{ + if (!(*seq & 1)) /* Even */ + *seq = read_seqbegin(lock); + else /* Odd */ + read_seqlock_excl(lock); +} + +static inline int need_seqretry(seqlock_t *lock, int seq) +{ + return !(seq & 1) && read_seqretry(lock, seq); +} + +static inline void done_seqretry(seqlock_t *lock, int seq) +{ + if (seq & 1) + read_sequnlock_excl(lock); +} + static inline void read_seqlock_excl_bh(seqlock_t *sl) { spin_lock_bh(&sl->lock); diff --git a/include/linux/slab.h b/include/linux/slab.h index 74f1058..c2bba24 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -53,7 +53,14 @@ * } * rcu_read_unlock(); * - * See also the comment on struct slab_rcu in mm/slab.c. + * This is useful if we need to approach a kernel structure obliquely, + * from its address obtained without the usual locking. We can lock + * the structure to stabilize it and check it's still at the given address, + * only if we can be sure that the memory has not been meanwhile reused + * for some other kind of object (which our subsystem's lock might corrupt). + * + * rcu_read_lock before reading the address, then rcu_read_unlock after + * taking the spinlock within the structure expected at that address. */ #define SLAB_DESTROY_BY_RCU 0x00080000UL /* Defer freeing slabs to RCU */ #define SLAB_MEM_SPREAD 0x00100000UL /* Spread some memory over cpuset */ diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index e9346b4..09bfffb 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h @@ -27,8 +27,8 @@ struct kmem_cache { size_t colour; /* cache colouring range */ unsigned int colour_off; /* colour offset */ - struct kmem_cache *slabp_cache; - unsigned int slab_size; + struct kmem_cache *freelist_cache; + unsigned int freelist_size; /* constructor func */ void (*ctor)(void *obj); diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index cc0b67e..f56bfa9 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -11,7 +11,7 @@ enum stat_item { ALLOC_FASTPATH, /* Allocation from cpu slab */ ALLOC_SLOWPATH, /* Allocation by getting a new cpu slab */ - FREE_FASTPATH, /* Free to cpu slub */ + FREE_FASTPATH, /* Free to cpu slab */ FREE_SLOWPATH, /* Freeing not to cpu slab */ FREE_FROZEN, /* Freeing to frozen slab */ FREE_ADD_PARTIAL, /* Freeing moves slab to partial list */ diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 4db2985..4836ba3 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -27,6 +27,12 @@ struct user_namespace { kuid_t owner; kgid_t group; unsigned int proc_inum; + + /* Register of per-UID persistent keyrings for this namespace */ +#ifdef CONFIG_PERSISTENT_KEYRINGS + struct key *persistent_keyring_register; + struct rw_semaphore persistent_keyring_register_sem; +#endif }; extern struct user_namespace init_user_ns; diff --git a/include/net/genetlink.h b/include/net/genetlink.h index ace4abf..1b177ed 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -265,7 +265,7 @@ static inline int genlmsg_multicast_netns(struct genl_family *family, struct net *net, struct sk_buff *skb, u32 portid, unsigned int group, gfp_t flags) { - if (group >= family->n_mcgrps) + if (WARN_ON_ONCE(group >= family->n_mcgrps)) return -EINVAL; group = family->mcgrp_offset + group; return nlmsg_multicast(net->genl_sock, skb, portid, group, flags); @@ -283,9 +283,6 @@ static inline int genlmsg_multicast(struct genl_family *family, struct sk_buff *skb, u32 portid, unsigned int group, gfp_t flags) { - if (group >= family->n_mcgrps) - return -EINVAL; - group = family->mcgrp_offset + group; return genlmsg_multicast_netns(family, &init_net, skb, portid, group, flags); } @@ -387,6 +384,9 @@ static inline struct sk_buff *genlmsg_new(size_t payload, gfp_t flags) static inline int genl_set_err(struct genl_family *family, struct net *net, u32 portid, u32 group, int code) { + if (WARN_ON_ONCE(group >= family->n_mcgrps)) + return -EINVAL; + group = family->mcgrp_offset + group; return netlink_set_err(net->genl_sock, portid, group, code); } diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h index 5ebe21c..39e0114 100644 --- a/include/target/target_core_backend.h +++ b/include/target/target_core_backend.h @@ -34,6 +34,11 @@ struct se_subsystem_api { sense_reason_t (*parse_cdb)(struct se_cmd *cmd); u32 (*get_device_type)(struct se_device *); sector_t (*get_blocks)(struct se_device *); + sector_t (*get_alignment_offset_lbas)(struct se_device *); + /* lbppbe = logical blocks per physical block exponent. see SBC-3 */ + unsigned int (*get_lbppbe)(struct se_device *); + unsigned int (*get_io_min)(struct se_device *); + unsigned int (*get_io_opt)(struct se_device *); unsigned char *(*get_sense_buffer)(struct se_cmd *); bool (*get_write_cache)(struct se_device *); }; diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index 5bdb8b7..45412a6 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -227,6 +227,7 @@ enum tcm_tmreq_table { /* fabric independent task management response values */ enum tcm_tmrsp_table { + TMR_FUNCTION_FAILED = 0, TMR_FUNCTION_COMPLETE = 1, TMR_TASK_DOES_NOT_EXIST = 2, TMR_LUN_DOES_NOT_EXIST = 3, @@ -282,11 +283,12 @@ struct t10_alua_lu_gp_member { struct t10_alua_tg_pt_gp { u16 tg_pt_gp_id; int tg_pt_gp_valid_id; + int tg_pt_gp_alua_supported_states; int tg_pt_gp_alua_access_status; int tg_pt_gp_alua_access_type; int tg_pt_gp_nonop_delay_msecs; int tg_pt_gp_trans_delay_msecs; - int tg_pt_gp_implict_trans_secs; + int tg_pt_gp_implicit_trans_secs; int tg_pt_gp_pref; int tg_pt_gp_write_metadata; /* Used by struct t10_alua_tg_pt_gp->tg_pt_gp_md_buf_len */ @@ -442,7 +444,6 @@ struct se_cmd { /* Used for sense data */ void *sense_buffer; struct list_head se_delayed_node; - struct list_head se_lun_node; struct list_head se_qf_node; struct se_device *se_dev; struct se_dev_entry *se_deve; @@ -470,15 +471,11 @@ struct se_cmd { #define CMD_T_SENT (1 << 4) #define CMD_T_STOP (1 << 5) #define CMD_T_FAILED (1 << 6) -#define CMD_T_LUN_STOP (1 << 7) -#define CMD_T_LUN_FE_STOP (1 << 8) -#define CMD_T_DEV_ACTIVE (1 << 9) -#define CMD_T_REQUEST_STOP (1 << 10) -#define CMD_T_BUSY (1 << 11) +#define CMD_T_DEV_ACTIVE (1 << 7) +#define CMD_T_REQUEST_STOP (1 << 8) +#define CMD_T_BUSY (1 << 9) spinlock_t t_state_lock; struct completion t_transport_stop_comp; - struct completion transport_lun_fe_stop_comp; - struct completion transport_lun_stop_comp; struct work_struct work; @@ -498,6 +495,9 @@ struct se_cmd { /* backend private data */ void *priv; + + /* Used for lun->lun_ref counting */ + bool lun_ref_active; }; struct se_ua { @@ -628,6 +628,34 @@ struct se_dev_attrib { struct config_group da_group; }; +struct se_port_stat_grps { + struct config_group stat_group; + struct config_group scsi_port_group; + struct config_group scsi_tgt_port_group; + struct config_group scsi_transport_group; +}; + +struct se_lun { +#define SE_LUN_LINK_MAGIC 0xffff7771 + u32 lun_link_magic; + /* See transport_lun_status_table */ + enum transport_lun_status_table lun_status; + u32 lun_access; + u32 lun_flags; + u32 unpacked_lun; + atomic_t lun_acl_count; + spinlock_t lun_acl_lock; + spinlock_t lun_sep_lock; + struct completion lun_shutdown_comp; + struct list_head lun_acl_list; + struct se_device *lun_se_dev; + struct se_port *lun_sep; + struct config_group lun_group; + struct se_port_stat_grps port_stat_grps; + struct completion lun_ref_comp; + struct percpu_ref lun_ref; +}; + struct se_dev_stat_grps { struct config_group stat_group; struct config_group scsi_dev_group; @@ -656,11 +684,10 @@ struct se_device { /* Pointer to transport specific device structure */ u32 dev_index; u64 creation_time; - u32 num_resets; - u64 num_cmds; - u64 read_bytes; - u64 write_bytes; - spinlock_t stats_lock; + atomic_long_t num_resets; + atomic_long_t num_cmds; + atomic_long_t read_bytes; + atomic_long_t write_bytes; /* Active commands on this virtual SE device */ atomic_t simple_cmds; atomic_t dev_ordered_id; @@ -711,6 +738,7 @@ struct se_device { struct se_subsystem_api *transport; /* Linked list for struct se_hba struct se_device list */ struct list_head dev_list; + struct se_lun xcopy_lun; }; struct se_hba { @@ -730,34 +758,6 @@ struct se_hba { struct se_subsystem_api *transport; }; -struct se_port_stat_grps { - struct config_group stat_group; - struct config_group scsi_port_group; - struct config_group scsi_tgt_port_group; - struct config_group scsi_transport_group; -}; - -struct se_lun { -#define SE_LUN_LINK_MAGIC 0xffff7771 - u32 lun_link_magic; - /* See transport_lun_status_table */ - enum transport_lun_status_table lun_status; - u32 lun_access; - u32 lun_flags; - u32 unpacked_lun; - atomic_t lun_acl_count; - spinlock_t lun_acl_lock; - spinlock_t lun_cmd_lock; - spinlock_t lun_sep_lock; - struct completion lun_shutdown_comp; - struct list_head lun_cmd_list; - struct list_head lun_acl_list; - struct se_device *lun_se_dev; - struct se_port *lun_sep; - struct config_group lun_group; - struct se_port_stat_grps port_stat_grps; -}; - struct scsi_port_stats { u64 cmd_pdus; u64 tx_data_octets; diff --git a/include/target/target_core_configfs.h b/include/target/target_core_configfs.h index 713c500..e080138 100644 --- a/include/target/target_core_configfs.h +++ b/include/target/target_core_configfs.h @@ -54,4 +54,3 @@ struct target_fabric_configfs { struct target_fabric_configfs_template tf_cit_tmpl; }; -#define TF_CIT_TMPL(tf) (&(tf)->tf_cit_tmpl) diff --git a/include/target/target_core_fabric.h b/include/target/target_core_fabric.h index 882b650e..4cf4fda 100644 --- a/include/target/target_core_fabric.h +++ b/include/target/target_core_fabric.h @@ -137,6 +137,8 @@ void transport_generic_request_failure(struct se_cmd *, sense_reason_t); void __target_execute_cmd(struct se_cmd *); int transport_lookup_tmr_lun(struct se_cmd *, u32); +struct se_node_acl *core_tpg_get_initiator_node_acl(struct se_portal_group *tpg, + unsigned char *); struct se_node_acl *core_tpg_check_initiator_node_acl(struct se_portal_group *, unsigned char *); void core_tpg_clear_object_luns(struct se_portal_group *); diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index f18b3b7..4832d75 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -162,12 +162,14 @@ DEFINE_EVENT(btrfs__inode, btrfs_inode_evict, { EXTENT_FLAG_LOGGING, "LOGGING" }, \ { EXTENT_FLAG_FILLING, "FILLING" }) -TRACE_EVENT(btrfs_get_extent, +TRACE_EVENT_CONDITION(btrfs_get_extent, TP_PROTO(struct btrfs_root *root, struct extent_map *map), TP_ARGS(root, map), + TP_CONDITION(map), + TP_STRUCT__entry( __field( u64, root_objectid ) __field( u64, start ) diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h index 46d41e8..2f3f7ea 100644 --- a/include/uapi/drm/radeon_drm.h +++ b/include/uapi/drm/radeon_drm.h @@ -981,6 +981,8 @@ struct drm_radeon_cs { #define RADEON_INFO_SI_TILE_MODE_ARRAY 0x16 /* query if CP DMA is supported on the compute ring */ #define RADEON_INFO_SI_CP_DMA_COMPUTE 0x17 +/* CIK macrotile mode array */ +#define RADEON_INFO_CIK_MACROTILE_MODE_ARRAY 0x18 struct drm_radeon_info { diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index db0b825..44b05a0 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -68,6 +68,9 @@ #define AUDIT_MAKE_EQUIV 1015 /* Append to watched tree */ #define AUDIT_TTY_GET 1016 /* Get TTY auditing status */ #define AUDIT_TTY_SET 1017 /* Set TTY auditing status */ +#define AUDIT_SET_FEATURE 1018 /* Turn an audit feature on or off */ +#define AUDIT_GET_FEATURE 1019 /* Get which features are enabled */ +#define AUDIT_FEATURE_CHANGE 1020 /* audit log listing feature changes */ #define AUDIT_FIRST_USER_MSG 1100 /* Userspace messages mostly uninteresting to kernel */ #define AUDIT_USER_AVC 1107 /* We filter this differently */ @@ -357,6 +360,12 @@ enum { #define AUDIT_PERM_READ 4 #define AUDIT_PERM_ATTR 8 +/* MAX_AUDIT_MESSAGE_LENGTH is set in audit:lib/libaudit.h as: + * 8970 // PATH_MAX*2+CONTEXT_SIZE*2+11+256+1 + * max header+body+tailer: 44 + 29 + 32 + 262 + 7 + pad + */ +#define AUDIT_MESSAGE_TEXT_MAX 8560 + struct audit_status { __u32 mask; /* Bit mask for valid entries */ __u32 enabled; /* 1 = enabled, 0 = disabled */ @@ -368,11 +377,28 @@ struct audit_status { __u32 backlog; /* messages waiting in queue */ }; +struct audit_features { +#define AUDIT_FEATURE_VERSION 1 + __u32 vers; + __u32 mask; /* which bits we are dealing with */ + __u32 features; /* which feature to enable/disable */ + __u32 lock; /* which features to lock */ +}; + +#define AUDIT_FEATURE_ONLY_UNSET_LOGINUID 0 +#define AUDIT_FEATURE_LOGINUID_IMMUTABLE 1 +#define AUDIT_LAST_FEATURE AUDIT_FEATURE_LOGINUID_IMMUTABLE + +#define audit_feature_valid(x) ((x) >= 0 && (x) <= AUDIT_LAST_FEATURE) +#define AUDIT_FEATURE_TO_MASK(x) (1 << ((x) & 31)) /* mask for __u32 */ + struct audit_tty_status { __u32 enabled; /* 1 = enabled, 0 = disabled */ __u32 log_passwd; /* 1 = enabled, 0 = disabled */ }; +#define AUDIT_UID_UNSET (unsigned int)-1 + /* audit_rule_data supports filter rules with both integer and string * fields. It corresponds with AUDIT_ADD_RULE, AUDIT_DEL_RULE and * AUDIT_LIST_RULES requests. diff --git a/include/uapi/linux/hash_info.h b/include/uapi/linux/hash_info.h new file mode 100644 index 0000000..ca18c45 --- /dev/null +++ b/include/uapi/linux/hash_info.h @@ -0,0 +1,37 @@ +/* + * Hash Info: Hash algorithms information + * + * Copyright (c) 2013 Dmitry Kasatkin <d.kasatkin@samsung.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ + +#ifndef _UAPI_LINUX_HASH_INFO_H +#define _UAPI_LINUX_HASH_INFO_H + +enum hash_algo { + HASH_ALGO_MD4, + HASH_ALGO_MD5, + HASH_ALGO_SHA1, + HASH_ALGO_RIPE_MD_160, + HASH_ALGO_SHA256, + HASH_ALGO_SHA384, + HASH_ALGO_SHA512, + HASH_ALGO_SHA224, + HASH_ALGO_RIPE_MD_128, + HASH_ALGO_RIPE_MD_256, + HASH_ALGO_RIPE_MD_320, + HASH_ALGO_WP_256, + HASH_ALGO_WP_384, + HASH_ALGO_WP_512, + HASH_ALGO_TGR_128, + HASH_ALGO_TGR_160, + HASH_ALGO_TGR_192, + HASH_ALGO__LAST +}; + +#endif /* _UAPI_LINUX_HASH_INFO_H */ diff --git a/include/uapi/linux/keyctl.h b/include/uapi/linux/keyctl.h index c9b7f4fa..840cb99 100644 --- a/include/uapi/linux/keyctl.h +++ b/include/uapi/linux/keyctl.h @@ -56,5 +56,6 @@ #define KEYCTL_REJECT 19 /* reject a partially constructed key */ #define KEYCTL_INSTANTIATE_IOV 20 /* instantiate a partially constructed key */ #define KEYCTL_INVALIDATE 21 /* invalidate a key */ +#define KEYCTL_GET_PERSISTENT 22 /* get a user's persistent keyring */ #endif /* _LINUX_KEYCTL_H */ diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 0890556..4a98e85 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -13,10 +13,10 @@ * PCI to PCI Bridge Specification * PCI System Design Guide * - * For hypertransport information, please consult the following manuals - * from http://www.hypertransport.org + * For HyperTransport information, please consult the following manuals + * from http://www.hypertransport.org * - * The Hypertransport I/O Link Specification + * The HyperTransport I/O Link Specification */ #ifndef LINUX_PCI_REGS_H @@ -37,7 +37,7 @@ #define PCI_COMMAND_INVALIDATE 0x10 /* Use memory write and invalidate */ #define PCI_COMMAND_VGA_PALETTE 0x20 /* Enable palette snooping */ #define PCI_COMMAND_PARITY 0x40 /* Enable parity checking */ -#define PCI_COMMAND_WAIT 0x80 /* Enable address/data stepping */ +#define PCI_COMMAND_WAIT 0x80 /* Enable address/data stepping */ #define PCI_COMMAND_SERR 0x100 /* Enable SERR */ #define PCI_COMMAND_FAST_BACK 0x200 /* Enable back-to-back writes */ #define PCI_COMMAND_INTX_DISABLE 0x400 /* INTx Emulation Disable */ @@ -45,7 +45,7 @@ #define PCI_STATUS 0x06 /* 16 bits */ #define PCI_STATUS_INTERRUPT 0x08 /* Interrupt status */ #define PCI_STATUS_CAP_LIST 0x10 /* Support Capability List */ -#define PCI_STATUS_66MHZ 0x20 /* Support 66 Mhz PCI 2.1 bus */ +#define PCI_STATUS_66MHZ 0x20 /* Support 66 MHz PCI 2.1 bus */ #define PCI_STATUS_UDF 0x40 /* Support User Definable Features [obsolete] */ #define PCI_STATUS_FAST_BACK 0x80 /* Accept fast-back to back */ #define PCI_STATUS_PARITY 0x100 /* Detected parity error */ @@ -205,14 +205,14 @@ #define PCI_CAP_ID_CHSWP 0x06 /* CompactPCI HotSwap */ #define PCI_CAP_ID_PCIX 0x07 /* PCI-X */ #define PCI_CAP_ID_HT 0x08 /* HyperTransport */ -#define PCI_CAP_ID_VNDR 0x09 /* Vendor specific */ +#define PCI_CAP_ID_VNDR 0x09 /* Vendor-Specific */ #define PCI_CAP_ID_DBG 0x0A /* Debug port */ #define PCI_CAP_ID_CCRC 0x0B /* CompactPCI Central Resource Control */ -#define PCI_CAP_ID_SHPC 0x0C /* PCI Standard Hot-Plug Controller */ +#define PCI_CAP_ID_SHPC 0x0C /* PCI Standard Hot-Plug Controller */ #define PCI_CAP_ID_SSVID 0x0D /* Bridge subsystem vendor/device ID */ #define PCI_CAP_ID_AGP3 0x0E /* AGP Target PCI-PCI bridge */ #define PCI_CAP_ID_SECDEV 0x0F /* Secure Device */ -#define PCI_CAP_ID_EXP 0x10 /* PCI Express */ +#define PCI_CAP_ID_EXP 0x10 /* PCI Express */ #define PCI_CAP_ID_MSIX 0x11 /* MSI-X */ #define PCI_CAP_ID_SATA 0x12 /* SATA Data/Index Conf. */ #define PCI_CAP_ID_AF 0x13 /* PCI Advanced Features */ @@ -268,8 +268,8 @@ #define PCI_AGP_COMMAND_RQ_MASK 0xff000000 /* Master: Maximum number of requests */ #define PCI_AGP_COMMAND_SBA 0x0200 /* Sideband addressing enabled */ #define PCI_AGP_COMMAND_AGP 0x0100 /* Allow processing of AGP transactions */ -#define PCI_AGP_COMMAND_64BIT 0x0020 /* Allow processing of 64-bit addresses */ -#define PCI_AGP_COMMAND_FW 0x0010 /* Force FW transfers */ +#define PCI_AGP_COMMAND_64BIT 0x0020 /* Allow processing of 64-bit addresses */ +#define PCI_AGP_COMMAND_FW 0x0010 /* Force FW transfers */ #define PCI_AGP_COMMAND_RATE4 0x0004 /* Use 4x rate */ #define PCI_AGP_COMMAND_RATE2 0x0002 /* Use 2x rate */ #define PCI_AGP_COMMAND_RATE1 0x0001 /* Use 1x rate */ @@ -321,7 +321,7 @@ #define PCI_MSIX_PBA_OFFSET 0xfffffff8 /* Offset into specified BAR */ #define PCI_CAP_MSIX_SIZEOF 12 /* size of MSIX registers */ -/* MSI-X entry's format */ +/* MSI-X Table entry format */ #define PCI_MSIX_ENTRY_SIZE 16 #define PCI_MSIX_ENTRY_LOWER_ADDR 0 #define PCI_MSIX_ENTRY_UPPER_ADDR 4 @@ -372,7 +372,7 @@ #define PCI_X_CMD_SPLIT_16 0x0060 /* Max 16 */ #define PCI_X_CMD_SPLIT_32 0x0070 /* Max 32 */ #define PCI_X_CMD_MAX_SPLIT 0x0070 /* Max Outstanding Split Transactions */ -#define PCI_X_CMD_VERSION(x) (((x) >> 12) & 3) /* Version */ +#define PCI_X_CMD_VERSION(x) (((x) >> 12) & 3) /* Version */ #define PCI_X_STATUS 4 /* PCI-X capabilities */ #define PCI_X_STATUS_DEVFN 0x000000ff /* A copy of devfn */ #define PCI_X_STATUS_BUS 0x0000ff00 /* A copy of bus nr */ @@ -407,8 +407,8 @@ /* PCI Bridge Subsystem ID registers */ -#define PCI_SSVID_VENDOR_ID 4 /* PCI-Bridge subsystem vendor id register */ -#define PCI_SSVID_DEVICE_ID 6 /* PCI-Bridge subsystem device id register */ +#define PCI_SSVID_VENDOR_ID 4 /* PCI Bridge subsystem vendor ID */ +#define PCI_SSVID_DEVICE_ID 6 /* PCI Bridge subsystem device ID */ /* PCI Express capability registers */ @@ -484,12 +484,12 @@ #define PCI_EXP_LNKCTL_CLKREQ_EN 0x0100 /* Enable clkreq */ #define PCI_EXP_LNKCTL_HAWD 0x0200 /* Hardware Autonomous Width Disable */ #define PCI_EXP_LNKCTL_LBMIE 0x0400 /* Link Bandwidth Management Interrupt Enable */ -#define PCI_EXP_LNKCTL_LABIE 0x0800 /* Lnk Autonomous Bandwidth Interrupt Enable */ +#define PCI_EXP_LNKCTL_LABIE 0x0800 /* Link Autonomous Bandwidth Interrupt Enable */ #define PCI_EXP_LNKSTA 18 /* Link Status */ #define PCI_EXP_LNKSTA_CLS 0x000f /* Current Link Speed */ #define PCI_EXP_LNKSTA_CLS_2_5GB 0x0001 /* Current Link Speed 2.5GT/s */ #define PCI_EXP_LNKSTA_CLS_5_0GB 0x0002 /* Current Link Speed 5.0GT/s */ -#define PCI_EXP_LNKSTA_NLW 0x03f0 /* Nogotiated Link Width */ +#define PCI_EXP_LNKSTA_NLW 0x03f0 /* Negotiated Link Width */ #define PCI_EXP_LNKSTA_NLW_SHIFT 4 /* start of NLW mask in link status */ #define PCI_EXP_LNKSTA_LT 0x0800 /* Link Training */ #define PCI_EXP_LNKSTA_SLC 0x1000 /* Slot Clock Configuration */ @@ -593,7 +593,7 @@ #define PCI_EXT_CAP_ID_MFVC 0x08 /* Multi-Function VC Capability */ #define PCI_EXT_CAP_ID_VC9 0x09 /* same as _VC */ #define PCI_EXT_CAP_ID_RCRB 0x0A /* Root Complex RB? */ -#define PCI_EXT_CAP_ID_VNDR 0x0B /* Vendor Specific */ +#define PCI_EXT_CAP_ID_VNDR 0x0B /* Vendor-Specific */ #define PCI_EXT_CAP_ID_CAC 0x0C /* Config Access - obsolete */ #define PCI_EXT_CAP_ID_ACS 0x0D /* Access Control Services */ #define PCI_EXT_CAP_ID_ARI 0x0E /* Alternate Routing ID */ @@ -602,12 +602,12 @@ #define PCI_EXT_CAP_ID_MRIOV 0x11 /* Multi Root I/O Virtualization */ #define PCI_EXT_CAP_ID_MCAST 0x12 /* Multicast */ #define PCI_EXT_CAP_ID_PRI 0x13 /* Page Request Interface */ -#define PCI_EXT_CAP_ID_AMD_XXX 0x14 /* reserved for AMD */ -#define PCI_EXT_CAP_ID_REBAR 0x15 /* resizable BAR */ -#define PCI_EXT_CAP_ID_DPA 0x16 /* dynamic power alloc */ -#define PCI_EXT_CAP_ID_TPH 0x17 /* TPH request */ -#define PCI_EXT_CAP_ID_LTR 0x18 /* latency tolerance reporting */ -#define PCI_EXT_CAP_ID_SECPCI 0x19 /* Secondary PCIe */ +#define PCI_EXT_CAP_ID_AMD_XXX 0x14 /* Reserved for AMD */ +#define PCI_EXT_CAP_ID_REBAR 0x15 /* Resizable BAR */ +#define PCI_EXT_CAP_ID_DPA 0x16 /* Dynamic Power Allocation */ +#define PCI_EXT_CAP_ID_TPH 0x17 /* TPH Requester */ +#define PCI_EXT_CAP_ID_LTR 0x18 /* Latency Tolerance Reporting */ +#define PCI_EXT_CAP_ID_SECPCI 0x19 /* Secondary PCIe Capability */ #define PCI_EXT_CAP_ID_PMUX 0x1A /* Protocol Multiplexing */ #define PCI_EXT_CAP_ID_PASID 0x1B /* Process Address Space ID */ #define PCI_EXT_CAP_ID_MAX PCI_EXT_CAP_ID_PASID @@ -667,9 +667,9 @@ #define PCI_ERR_ROOT_COR_RCV 0x00000001 /* ERR_COR Received */ /* Multi ERR_COR Received */ #define PCI_ERR_ROOT_MULTI_COR_RCV 0x00000002 -/* ERR_FATAL/NONFATAL Recevied */ +/* ERR_FATAL/NONFATAL Received */ #define PCI_ERR_ROOT_UNCOR_RCV 0x00000004 -/* Multi ERR_FATAL/NONFATAL Recevied */ +/* Multi ERR_FATAL/NONFATAL Received */ #define PCI_ERR_ROOT_MULTI_UNCOR_RCV 0x00000008 #define PCI_ERR_ROOT_FIRST_FATAL 0x00000010 /* First Fatal */ #define PCI_ERR_ROOT_NONFATAL_RCV 0x00000020 /* Non-Fatal Received */ @@ -678,7 +678,7 @@ /* Virtual Channel */ #define PCI_VC_PORT_REG1 4 -#define PCI_VC_REG1_EVCC 0x7 /* extended vc count */ +#define PCI_VC_REG1_EVCC 0x7 /* extended VC count */ #define PCI_VC_PORT_REG2 8 #define PCI_VC_REG2_32_PHASE 0x2 #define PCI_VC_REG2_64_PHASE 0x4 @@ -711,7 +711,7 @@ #define PCI_VNDR_HEADER_LEN(x) (((x) >> 20) & 0xfff) /* - * Hypertransport sub capability types + * HyperTransport sub capability types * * Unfortunately there are both 3 bit and 5 bit capability types defined * in the HT spec, catering for that is a little messy. You probably don't @@ -739,8 +739,8 @@ #define HT_CAPTYPE_DIRECT_ROUTE 0xB0 /* Direct routing configuration */ #define HT_CAPTYPE_VCSET 0xB8 /* Virtual Channel configuration */ #define HT_CAPTYPE_ERROR_RETRY 0xC0 /* Retry on error configuration */ -#define HT_CAPTYPE_GEN3 0xD0 /* Generation 3 hypertransport configuration */ -#define HT_CAPTYPE_PM 0xE0 /* Hypertransport powermanagement configuration */ +#define HT_CAPTYPE_GEN3 0xD0 /* Generation 3 HyperTransport configuration */ +#define HT_CAPTYPE_PM 0xE0 /* HyperTransport power management configuration */ #define HT_CAP_SIZEOF_LONG 28 /* slave & primary */ #define HT_CAP_SIZEOF_SHORT 24 /* host & secondary */ @@ -777,14 +777,14 @@ #define PCI_PRI_ALLOC_REQ 0x0c /* PRI max reqs allowed */ #define PCI_EXT_CAP_PRI_SIZEOF 16 -/* PASID capability */ +/* Process Address Space ID */ #define PCI_PASID_CAP 0x04 /* PASID feature register */ #define PCI_PASID_CAP_EXEC 0x02 /* Exec permissions Supported */ -#define PCI_PASID_CAP_PRIV 0x04 /* Priviledge Mode Supported */ +#define PCI_PASID_CAP_PRIV 0x04 /* Privilege Mode Supported */ #define PCI_PASID_CTRL 0x06 /* PASID control register */ #define PCI_PASID_CTRL_ENABLE 0x01 /* Enable bit */ #define PCI_PASID_CTRL_EXEC 0x02 /* Exec permissions Enable */ -#define PCI_PASID_CTRL_PRIV 0x04 /* Priviledge Mode Enable */ +#define PCI_PASID_CTRL_PRIV 0x04 /* Privilege Mode Enable */ #define PCI_EXT_CAP_PASID_SIZEOF 8 /* Single Root I/O Virtualization */ @@ -839,22 +839,22 @@ #define PCI_ACS_CTRL 0x06 /* ACS Control Register */ #define PCI_ACS_EGRESS_CTL_V 0x08 /* ACS Egress Control Vector */ -#define PCI_VSEC_HDR 4 /* extended cap - vendor specific */ +#define PCI_VSEC_HDR 4 /* extended cap - vendor-specific */ #define PCI_VSEC_HDR_LEN_SHIFT 20 /* shift for length field */ -/* sata capability */ +/* SATA capability */ #define PCI_SATA_REGS 4 /* SATA REGs specifier */ #define PCI_SATA_REGS_MASK 0xF /* location - BAR#/inline */ #define PCI_SATA_REGS_INLINE 0xF /* REGS in config space */ #define PCI_SATA_SIZEOF_SHORT 8 #define PCI_SATA_SIZEOF_LONG 16 -/* resizable BARs */ +/* Resizable BARs */ #define PCI_REBAR_CTRL 8 /* control register */ #define PCI_REBAR_CTRL_NBAR_MASK (7 << 5) /* mask for # bars */ #define PCI_REBAR_CTRL_NBAR_SHIFT 5 /* shift for # bars */ -/* dynamic power allocation */ +/* Dynamic Power Allocation */ #define PCI_DPA_CAP 4 /* capability register */ #define PCI_DPA_CAP_SUBSTATE_MASK 0x1F /* # substates - 1 */ #define PCI_DPA_BASE_SIZEOF 16 /* size with 0 substates */ diff --git a/init/Kconfig b/init/Kconfig index 3fc8a2f..79383d3 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -301,20 +301,6 @@ config AUDIT_TREE depends on AUDITSYSCALL select FSNOTIFY -config AUDIT_LOGINUID_IMMUTABLE - bool "Make audit loginuid immutable" - depends on AUDIT - help - The config option toggles if a task setting its loginuid requires - CAP_SYS_AUDITCONTROL or if that task should require no special permissions - but should instead only allow setting its loginuid if it was never - previously set. On systems which use systemd or a similar central - process to restart login services this should be set to true. On older - systems in which an admin would typically have to directly stop and - start processes this should be set to false. Setting this to true allows - one to drop potentially dangerous capabilites from the login tasks, - but may not be backwards compatible with older init systems. - source "kernel/irq/Kconfig" source "kernel/time/Kconfig" @@ -1669,6 +1655,18 @@ config BASE_SMALL default 0 if BASE_FULL default 1 if !BASE_FULL +config SYSTEM_TRUSTED_KEYRING + bool "Provide system-wide ring of trusted keys" + depends on KEYS + help + Provide a system keyring to which trusted keys can be added. Keys in + the keyring are considered to be trusted. Keys may be added at will + by the kernel from compiled-in data and from hardware key stores, but + userspace may only add extra keys if those keys can be verified by + keys already in the keyring. + + Keys in this keyring are used by module signature checking. + menuconfig MODULES bool "Enable loadable module support" option modules @@ -1742,6 +1740,7 @@ config MODULE_SRCVERSION_ALL config MODULE_SIG bool "Module signature verification" depends on MODULES + select SYSTEM_TRUSTED_KEYRING select KEYS select CRYPTO select ASYMMETRIC_KEY_TYPE diff --git a/init/main.c b/init/main.c index 01573fd..febc511 100644 --- a/init/main.c +++ b/init/main.c @@ -476,7 +476,7 @@ static void __init mm_init(void) mem_init(); kmem_cache_init(); percpu_init_late(); - pgtable_init(); + pgtable_cache_init(); vmalloc_init(); } @@ -208,15 +208,18 @@ static void shm_open(struct vm_area_struct *vma) */ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) { + struct file *shm_file; + + shm_file = shp->shm_file; + shp->shm_file = NULL; ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT; shm_rmid(ns, shp); shm_unlock(shp); - if (!is_file_hugepages(shp->shm_file)) - shmem_lock(shp->shm_file, 0, shp->mlock_user); + if (!is_file_hugepages(shm_file)) + shmem_lock(shm_file, 0, shp->mlock_user); else if (shp->mlock_user) - user_shm_unlock(file_inode(shp->shm_file)->i_size, - shp->mlock_user); - fput (shp->shm_file); + user_shm_unlock(file_inode(shm_file)->i_size, shp->mlock_user); + fput(shm_file); ipc_rcu_putref(shp, shm_rcu_free); } @@ -974,15 +977,25 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf) ipc_lock_object(&shp->shm_perm); if (!ns_capable(ns->user_ns, CAP_IPC_LOCK)) { kuid_t euid = current_euid(); - err = -EPERM; if (!uid_eq(euid, shp->shm_perm.uid) && - !uid_eq(euid, shp->shm_perm.cuid)) + !uid_eq(euid, shp->shm_perm.cuid)) { + err = -EPERM; goto out_unlock0; - if (cmd == SHM_LOCK && !rlimit(RLIMIT_MEMLOCK)) + } + if (cmd == SHM_LOCK && !rlimit(RLIMIT_MEMLOCK)) { + err = -EPERM; goto out_unlock0; + } } shm_file = shp->shm_file; + + /* check if shm_destroy() is tearing down shp */ + if (shm_file == NULL) { + err = -EIDRM; + goto out_unlock0; + } + if (is_file_hugepages(shm_file)) goto out_unlock0; @@ -1101,6 +1114,14 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr, goto out_unlock; ipc_lock_object(&shp->shm_perm); + + /* check if shm_destroy() is tearing down shp */ + if (shp->shm_file == NULL) { + ipc_unlock_object(&shp->shm_perm); + err = -EIDRM; + goto out_unlock; + } + path = shp->shm_file->f_path; path_get(&path); shp->shm_nattch++; diff --git a/kernel/Makefile b/kernel/Makefile index 09a9c94..bbaf7d5 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -41,8 +41,9 @@ ifneq ($(CONFIG_SMP),y) obj-y += up.o endif obj-$(CONFIG_UID16) += uid16.o +obj-$(CONFIG_SYSTEM_TRUSTED_KEYRING) += system_keyring.o system_certificates.o obj-$(CONFIG_MODULES) += module.o -obj-$(CONFIG_MODULE_SIG) += module_signing.o modsign_pubkey.o modsign_certificate.o +obj-$(CONFIG_MODULE_SIG) += module_signing.o obj-$(CONFIG_KALLSYMS) += kallsyms.o obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o obj-$(CONFIG_KEXEC) += kexec.o @@ -122,19 +123,52 @@ targets += timeconst.h $(obj)/timeconst.h: $(obj)/hz.bc $(src)/timeconst.bc FORCE $(call if_changed,bc) -ifeq ($(CONFIG_MODULE_SIG),y) +############################################################################### +# +# Roll all the X.509 certificates that we can find together and pull them into +# the kernel so that they get loaded into the system trusted keyring during +# boot. # -# Pull the signing certificate and any extra certificates into the kernel +# We look in the source root and the build root for all files whose name ends +# in ".x509". Unfortunately, this will generate duplicate filenames, so we +# have make canonicalise the pathnames and then sort them to discard the +# duplicates. # +############################################################################### +ifeq ($(CONFIG_SYSTEM_TRUSTED_KEYRING),y) +X509_CERTIFICATES-y := $(wildcard *.x509) $(wildcard $(srctree)/*.x509) +X509_CERTIFICATES-$(CONFIG_MODULE_SIG) += signing_key.x509 +X509_CERTIFICATES := $(sort $(foreach CERT,$(X509_CERTIFICATES-y), \ + $(or $(realpath $(CERT)),$(CERT)))) + +ifeq ($(X509_CERTIFICATES),) +$(warning *** No X.509 certificates found ***) +endif + +ifneq ($(wildcard $(obj)/.x509.list),) +ifneq ($(shell cat $(obj)/.x509.list),$(X509_CERTIFICATES)) +$(info X.509 certificate list changed) +$(shell rm $(obj)/.x509.list) +endif +endif + +kernel/system_certificates.o: $(obj)/x509_certificate_list -quiet_cmd_touch = TOUCH $@ - cmd_touch = touch $@ +quiet_cmd_x509certs = CERTS $@ + cmd_x509certs = cat $(X509_CERTIFICATES) /dev/null >$@ $(foreach X509,$(X509_CERTIFICATES),; echo " - Including cert $(X509)") -extra_certificates: - $(call cmd,touch) +targets += $(obj)/x509_certificate_list +$(obj)/x509_certificate_list: $(X509_CERTIFICATES) $(obj)/.x509.list + $(call if_changed,x509certs) -kernel/modsign_certificate.o: signing_key.x509 extra_certificates +targets += $(obj)/.x509.list +$(obj)/.x509.list: + @echo $(X509_CERTIFICATES) >$@ +clean-files := x509_certificate_list .x509.list +endif + +ifeq ($(CONFIG_MODULE_SIG),y) ############################################################################### # # If module signing is requested, say by allyesconfig, but a key has not been diff --git a/kernel/audit.c b/kernel/audit.c index 7b0e23a..906ae5a0 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -60,7 +60,6 @@ #ifdef CONFIG_SECURITY #include <linux/security.h> #endif -#include <net/netlink.h> #include <linux/freezer.h> #include <linux/tty.h> #include <linux/pid_namespace.h> @@ -140,6 +139,17 @@ static struct task_struct *kauditd_task; static DECLARE_WAIT_QUEUE_HEAD(kauditd_wait); static DECLARE_WAIT_QUEUE_HEAD(audit_backlog_wait); +static struct audit_features af = {.vers = AUDIT_FEATURE_VERSION, + .mask = -1, + .features = 0, + .lock = 0,}; + +static char *audit_feature_names[2] = { + "only_unset_loginuid", + "loginuid_immutable", +}; + + /* Serialize requests from userspace. */ DEFINE_MUTEX(audit_cmd_mutex); @@ -584,6 +594,8 @@ static int audit_netlink_ok(struct sk_buff *skb, u16 msg_type) return -EOPNOTSUPP; case AUDIT_GET: case AUDIT_SET: + case AUDIT_GET_FEATURE: + case AUDIT_SET_FEATURE: case AUDIT_LIST_RULES: case AUDIT_ADD_RULE: case AUDIT_DEL_RULE: @@ -613,7 +625,7 @@ static int audit_log_common_recv_msg(struct audit_buffer **ab, u16 msg_type) int rc = 0; uid_t uid = from_kuid(&init_user_ns, current_uid()); - if (!audit_enabled) { + if (!audit_enabled && msg_type != AUDIT_USER_AVC) { *ab = NULL; return rc; } @@ -628,6 +640,94 @@ static int audit_log_common_recv_msg(struct audit_buffer **ab, u16 msg_type) return rc; } +int is_audit_feature_set(int i) +{ + return af.features & AUDIT_FEATURE_TO_MASK(i); +} + + +static int audit_get_feature(struct sk_buff *skb) +{ + u32 seq; + + seq = nlmsg_hdr(skb)->nlmsg_seq; + + audit_send_reply(NETLINK_CB(skb).portid, seq, AUDIT_GET, 0, 0, + &af, sizeof(af)); + + return 0; +} + +static void audit_log_feature_change(int which, u32 old_feature, u32 new_feature, + u32 old_lock, u32 new_lock, int res) +{ + struct audit_buffer *ab; + + ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_FEATURE_CHANGE); + audit_log_format(ab, "feature=%s new=%d old=%d old_lock=%d new_lock=%d res=%d", + audit_feature_names[which], !!old_feature, !!new_feature, + !!old_lock, !!new_lock, res); + audit_log_end(ab); +} + +static int audit_set_feature(struct sk_buff *skb) +{ + struct audit_features *uaf; + int i; + + BUILD_BUG_ON(AUDIT_LAST_FEATURE + 1 > sizeof(audit_feature_names)/sizeof(audit_feature_names[0])); + uaf = nlmsg_data(nlmsg_hdr(skb)); + + /* if there is ever a version 2 we should handle that here */ + + for (i = 0; i <= AUDIT_LAST_FEATURE; i++) { + u32 feature = AUDIT_FEATURE_TO_MASK(i); + u32 old_feature, new_feature, old_lock, new_lock; + + /* if we are not changing this feature, move along */ + if (!(feature & uaf->mask)) + continue; + + old_feature = af.features & feature; + new_feature = uaf->features & feature; + new_lock = (uaf->lock | af.lock) & feature; + old_lock = af.lock & feature; + + /* are we changing a locked feature? */ + if ((af.lock & feature) && (new_feature != old_feature)) { + audit_log_feature_change(i, old_feature, new_feature, + old_lock, new_lock, 0); + return -EPERM; + } + } + /* nothing invalid, do the changes */ + for (i = 0; i <= AUDIT_LAST_FEATURE; i++) { + u32 feature = AUDIT_FEATURE_TO_MASK(i); + u32 old_feature, new_feature, old_lock, new_lock; + + /* if we are not changing this feature, move along */ + if (!(feature & uaf->mask)) + continue; + + old_feature = af.features & feature; + new_feature = uaf->features & feature; + old_lock = af.lock & feature; + new_lock = (uaf->lock | af.lock) & feature; + + if (new_feature != old_feature) + audit_log_feature_change(i, old_feature, new_feature, + old_lock, new_lock, 1); + + if (new_feature) + af.features |= feature; + else + af.features &= ~feature; + af.lock |= new_lock; + } + + return 0; +} + static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) { u32 seq; @@ -659,6 +759,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) switch (msg_type) { case AUDIT_GET: + memset(&status_set, 0, sizeof(status_set)); status_set.enabled = audit_enabled; status_set.failure = audit_failure; status_set.pid = audit_pid; @@ -670,7 +771,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) &status_set, sizeof(status_set)); break; case AUDIT_SET: - if (nlh->nlmsg_len < sizeof(struct audit_status)) + if (nlmsg_len(nlh) < sizeof(struct audit_status)) return -EINVAL; status_get = (struct audit_status *)data; if (status_get->mask & AUDIT_STATUS_ENABLED) { @@ -699,6 +800,16 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (status_get->mask & AUDIT_STATUS_BACKLOG_LIMIT) err = audit_set_backlog_limit(status_get->backlog_limit); break; + case AUDIT_GET_FEATURE: + err = audit_get_feature(skb); + if (err) + return err; + break; + case AUDIT_SET_FEATURE: + err = audit_set_feature(skb); + if (err) + return err; + break; case AUDIT_USER: case AUDIT_FIRST_USER_MSG ... AUDIT_LAST_USER_MSG: case AUDIT_FIRST_USER_MSG2 ... AUDIT_LAST_USER_MSG2: @@ -715,7 +826,8 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) } audit_log_common_recv_msg(&ab, msg_type); if (msg_type != AUDIT_USER_TTY) - audit_log_format(ab, " msg='%.1024s'", + audit_log_format(ab, " msg='%.*s'", + AUDIT_MESSAGE_TEXT_MAX, (char *)data); else { int size; @@ -818,7 +930,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) struct task_struct *tsk = current; spin_lock(&tsk->sighand->siglock); - s.enabled = tsk->signal->audit_tty != 0; + s.enabled = tsk->signal->audit_tty; s.log_passwd = tsk->signal->audit_tty_log_passwd; spin_unlock(&tsk->sighand->siglock); @@ -832,7 +944,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) memset(&s, 0, sizeof(s)); /* guard against past and future API changes */ - memcpy(&s, data, min(sizeof(s), (size_t)nlh->nlmsg_len)); + memcpy(&s, data, min_t(size_t, sizeof(s), nlmsg_len(nlh))); if ((s.enabled != 0 && s.enabled != 1) || (s.log_passwd != 0 && s.log_passwd != 1)) return -EINVAL; @@ -1067,13 +1179,6 @@ static void wait_for_auditd(unsigned long sleep_time) remove_wait_queue(&audit_backlog_wait, &wait); } -/* Obtain an audit buffer. This routine does locking to obtain the - * audit buffer, but then no locking is required for calls to - * audit_log_*format. If the tsk is a task that is currently in a - * syscall, then the syscall is marked as auditable and an audit record - * will be written at syscall exit. If there is no associated task, tsk - * should be NULL. */ - /** * audit_log_start - obtain an audit buffer * @ctx: audit_context (may be NULL) @@ -1389,7 +1494,7 @@ void audit_log_session_info(struct audit_buffer *ab) u32 sessionid = audit_get_sessionid(current); uid_t auid = from_kuid(&init_user_ns, audit_get_loginuid(current)); - audit_log_format(ab, " auid=%u ses=%u\n", auid, sessionid); + audit_log_format(ab, " auid=%u ses=%u", auid, sessionid); } void audit_log_key(struct audit_buffer *ab, char *key) @@ -1536,6 +1641,26 @@ void audit_log_name(struct audit_context *context, struct audit_names *n, } } + /* log the audit_names record type */ + audit_log_format(ab, " nametype="); + switch(n->type) { + case AUDIT_TYPE_NORMAL: + audit_log_format(ab, "NORMAL"); + break; + case AUDIT_TYPE_PARENT: + audit_log_format(ab, "PARENT"); + break; + case AUDIT_TYPE_CHILD_DELETE: + audit_log_format(ab, "DELETE"); + break; + case AUDIT_TYPE_CHILD_CREATE: + audit_log_format(ab, "CREATE"); + break; + default: + audit_log_format(ab, "UNKNOWN"); + break; + } + audit_log_fcaps(ab, n); audit_log_end(ab); } diff --git a/kernel/audit.h b/kernel/audit.h index 123c9b7..b779642 100644 --- a/kernel/audit.h +++ b/kernel/audit.h @@ -197,6 +197,9 @@ struct audit_context { int fd; int flags; } mmap; + struct { + int argc; + } execve; }; int fds[2]; diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index f7aee8b..51f3fd4 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -343,6 +343,7 @@ static int audit_field_valid(struct audit_entry *entry, struct audit_field *f) case AUDIT_DEVMINOR: case AUDIT_EXIT: case AUDIT_SUCCESS: + case AUDIT_INODE: /* bit ops are only useful on syscall args */ if (f->op == Audit_bitmask || f->op == Audit_bittest) return -EINVAL; @@ -423,7 +424,7 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, f->lsm_rule = NULL; /* Support legacy tests for a valid loginuid */ - if ((f->type == AUDIT_LOGINUID) && (f->val == ~0U)) { + if ((f->type == AUDIT_LOGINUID) && (f->val == AUDIT_UID_UNSET)) { f->type = AUDIT_LOGINUID_SET; f->val = 0; } diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 9845cb3..90594c9 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -95,13 +95,6 @@ struct audit_aux_data { /* Number of target pids per aux struct. */ #define AUDIT_AUX_PIDS 16 -struct audit_aux_data_execve { - struct audit_aux_data d; - int argc; - int envc; - struct mm_struct *mm; -}; - struct audit_aux_data_pids { struct audit_aux_data d; pid_t target_pid[AUDIT_AUX_PIDS]; @@ -121,12 +114,6 @@ struct audit_aux_data_bprm_fcaps { struct audit_cap_data new_pcap; }; -struct audit_aux_data_capset { - struct audit_aux_data d; - pid_t pid; - struct audit_cap_data cap; -}; - struct audit_tree_refs { struct audit_tree_refs *next; struct audit_chunk *c[31]; @@ -566,7 +553,7 @@ static int audit_filter_rules(struct task_struct *tsk, break; case AUDIT_INODE: if (name) - result = (name->ino == f->val); + result = audit_comparator(name->ino, f->op, f->val); else if (ctx) { list_for_each_entry(n, &ctx->names_list, list) { if (audit_comparator(n->ino, f->op, f->val)) { @@ -943,8 +930,10 @@ int audit_alloc(struct task_struct *tsk) return 0; /* Return if not auditing. */ state = audit_filter_task(tsk, &key); - if (state == AUDIT_DISABLED) + if (state == AUDIT_DISABLED) { + clear_tsk_thread_flag(tsk, TIF_SYSCALL_AUDIT); return 0; + } if (!(context = audit_alloc_context(state))) { kfree(key); @@ -1149,20 +1138,16 @@ static int audit_log_single_execve_arg(struct audit_context *context, } static void audit_log_execve_info(struct audit_context *context, - struct audit_buffer **ab, - struct audit_aux_data_execve *axi) + struct audit_buffer **ab) { int i, len; size_t len_sent = 0; const char __user *p; char *buf; - if (axi->mm != current->mm) - return; /* execve failed, no additional info */ - - p = (const char __user *)axi->mm->arg_start; + p = (const char __user *)current->mm->arg_start; - audit_log_format(*ab, "argc=%d", axi->argc); + audit_log_format(*ab, "argc=%d", context->execve.argc); /* * we need some kernel buffer to hold the userspace args. Just @@ -1176,7 +1161,7 @@ static void audit_log_execve_info(struct audit_context *context, return; } - for (i = 0; i < axi->argc; i++) { + for (i = 0; i < context->execve.argc; i++) { len = audit_log_single_execve_arg(context, ab, i, &len_sent, p, buf); if (len <= 0) @@ -1279,6 +1264,9 @@ static void show_special(struct audit_context *context, int *call_panic) audit_log_format(ab, "fd=%d flags=0x%x", context->mmap.fd, context->mmap.flags); break; } + case AUDIT_EXECVE: { + audit_log_execve_info(context, &ab); + break; } } audit_log_end(ab); } @@ -1325,11 +1313,6 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts switch (aux->type) { - case AUDIT_EXECVE: { - struct audit_aux_data_execve *axi = (void *)aux; - audit_log_execve_info(context, &ab, axi); - break; } - case AUDIT_BPRM_FCAPS: { struct audit_aux_data_bprm_fcaps *axs = (void *)aux; audit_log_format(ab, "fver=%x", axs->fcap_ver); @@ -1964,6 +1947,43 @@ int auditsc_get_stamp(struct audit_context *ctx, /* global counter which is incremented every time something logs in */ static atomic_t session_id = ATOMIC_INIT(0); +static int audit_set_loginuid_perm(kuid_t loginuid) +{ + /* if we are unset, we don't need privs */ + if (!audit_loginuid_set(current)) + return 0; + /* if AUDIT_FEATURE_LOGINUID_IMMUTABLE means never ever allow a change*/ + if (is_audit_feature_set(AUDIT_FEATURE_LOGINUID_IMMUTABLE)) + return -EPERM; + /* it is set, you need permission */ + if (!capable(CAP_AUDIT_CONTROL)) + return -EPERM; + /* reject if this is not an unset and we don't allow that */ + if (is_audit_feature_set(AUDIT_FEATURE_ONLY_UNSET_LOGINUID) && uid_valid(loginuid)) + return -EPERM; + return 0; +} + +static void audit_log_set_loginuid(kuid_t koldloginuid, kuid_t kloginuid, + unsigned int oldsessionid, unsigned int sessionid, + int rc) +{ + struct audit_buffer *ab; + uid_t uid, ologinuid, nloginuid; + + uid = from_kuid(&init_user_ns, task_uid(current)); + ologinuid = from_kuid(&init_user_ns, koldloginuid); + nloginuid = from_kuid(&init_user_ns, kloginuid), + + ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_LOGIN); + if (!ab) + return; + audit_log_format(ab, "pid=%d uid=%u old auid=%u new auid=%u old " + "ses=%u new ses=%u res=%d", current->pid, uid, ologinuid, + nloginuid, oldsessionid, sessionid, !rc); + audit_log_end(ab); +} + /** * audit_set_loginuid - set current task's audit_context loginuid * @loginuid: loginuid value @@ -1975,37 +1995,26 @@ static atomic_t session_id = ATOMIC_INIT(0); int audit_set_loginuid(kuid_t loginuid) { struct task_struct *task = current; - struct audit_context *context = task->audit_context; - unsigned int sessionid; + unsigned int oldsessionid, sessionid = (unsigned int)-1; + kuid_t oldloginuid; + int rc; -#ifdef CONFIG_AUDIT_LOGINUID_IMMUTABLE - if (audit_loginuid_set(task)) - return -EPERM; -#else /* CONFIG_AUDIT_LOGINUID_IMMUTABLE */ - if (!capable(CAP_AUDIT_CONTROL)) - return -EPERM; -#endif /* CONFIG_AUDIT_LOGINUID_IMMUTABLE */ + oldloginuid = audit_get_loginuid(current); + oldsessionid = audit_get_sessionid(current); - sessionid = atomic_inc_return(&session_id); - if (context && context->in_syscall) { - struct audit_buffer *ab; + rc = audit_set_loginuid_perm(loginuid); + if (rc) + goto out; + + /* are we setting or clearing? */ + if (uid_valid(loginuid)) + sessionid = atomic_inc_return(&session_id); - ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_LOGIN); - if (ab) { - audit_log_format(ab, "login pid=%d uid=%u " - "old auid=%u new auid=%u" - " old ses=%u new ses=%u", - task->pid, - from_kuid(&init_user_ns, task_uid(task)), - from_kuid(&init_user_ns, task->loginuid), - from_kuid(&init_user_ns, loginuid), - task->sessionid, sessionid); - audit_log_end(ab); - } - } task->sessionid = sessionid; task->loginuid = loginuid; - return 0; +out: + audit_log_set_loginuid(oldloginuid, loginuid, oldsessionid, sessionid, rc); + return rc; } /** @@ -2126,22 +2135,12 @@ void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, umode_t mo context->ipc.has_perm = 1; } -int __audit_bprm(struct linux_binprm *bprm) +void __audit_bprm(struct linux_binprm *bprm) { - struct audit_aux_data_execve *ax; struct audit_context *context = current->audit_context; - ax = kmalloc(sizeof(*ax), GFP_KERNEL); - if (!ax) - return -ENOMEM; - - ax->argc = bprm->argc; - ax->envc = bprm->envc; - ax->mm = bprm->mm; - ax->d.type = AUDIT_EXECVE; - ax->d.next = context->aux; - context->aux = (void *)ax; - return 0; + context->type = AUDIT_EXECVE; + context->execve.argc = bprm->argc; } diff --git a/kernel/cgroup.c b/kernel/cgroup.c index e0839bcd..4c62513 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -895,11 +895,6 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode) iput(inode); } -static int cgroup_delete(const struct dentry *d) -{ - return 1; -} - static void remove_dir(struct dentry *d) { struct dentry *parent = dget(d->d_parent); @@ -1486,7 +1481,7 @@ static int cgroup_get_rootdir(struct super_block *sb) { static const struct dentry_operations cgroup_dops = { .d_iput = cgroup_diput, - .d_delete = cgroup_delete, + .d_delete = always_delete_dentry, }; struct inode *inode = diff --git a/kernel/modsign_certificate.S b/kernel/modsign_certificate.S deleted file mode 100644 index 4a9a86d..0000000 --- a/kernel/modsign_certificate.S +++ /dev/null @@ -1,12 +0,0 @@ -#include <linux/export.h> - -#define GLOBAL(name) \ - .globl VMLINUX_SYMBOL(name); \ - VMLINUX_SYMBOL(name): - - .section ".init.data","aw" - -GLOBAL(modsign_certificate_list) - .incbin "signing_key.x509" - .incbin "extra_certificates" -GLOBAL(modsign_certificate_list_end) diff --git a/kernel/modsign_pubkey.c b/kernel/modsign_pubkey.c deleted file mode 100644 index 7cbd450..0000000 --- a/kernel/modsign_pubkey.c +++ /dev/null @@ -1,104 +0,0 @@ -/* Public keys for module signature verification - * - * Copyright (C) 2012 Red Hat, Inc. All Rights Reserved. - * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public Licence - * as published by the Free Software Foundation; either version - * 2 of the Licence, or (at your option) any later version. - */ - -#include <linux/kernel.h> -#include <linux/sched.h> -#include <linux/cred.h> -#include <linux/err.h> -#include <keys/asymmetric-type.h> -#include "module-internal.h" - -struct key *modsign_keyring; - -extern __initconst const u8 modsign_certificate_list[]; -extern __initconst const u8 modsign_certificate_list_end[]; - -/* - * We need to make sure ccache doesn't cache the .o file as it doesn't notice - * if modsign.pub changes. - */ -static __initconst const char annoy_ccache[] = __TIME__ "foo"; - -/* - * Load the compiled-in keys - */ -static __init int module_verify_init(void) -{ - pr_notice("Initialise module verification\n"); - - modsign_keyring = keyring_alloc(".module_sign", - KUIDT_INIT(0), KGIDT_INIT(0), - current_cred(), - ((KEY_POS_ALL & ~KEY_POS_SETATTR) | - KEY_USR_VIEW | KEY_USR_READ), - KEY_ALLOC_NOT_IN_QUOTA, NULL); - if (IS_ERR(modsign_keyring)) - panic("Can't allocate module signing keyring\n"); - - return 0; -} - -/* - * Must be initialised before we try and load the keys into the keyring. - */ -device_initcall(module_verify_init); - -/* - * Load the compiled-in keys - */ -static __init int load_module_signing_keys(void) -{ - key_ref_t key; - const u8 *p, *end; - size_t plen; - - pr_notice("Loading module verification certificates\n"); - - end = modsign_certificate_list_end; - p = modsign_certificate_list; - while (p < end) { - /* Each cert begins with an ASN.1 SEQUENCE tag and must be more - * than 256 bytes in size. - */ - if (end - p < 4) - goto dodgy_cert; - if (p[0] != 0x30 && - p[1] != 0x82) - goto dodgy_cert; - plen = (p[2] << 8) | p[3]; - plen += 4; - if (plen > end - p) - goto dodgy_cert; - - key = key_create_or_update(make_key_ref(modsign_keyring, 1), - "asymmetric", - NULL, - p, - plen, - (KEY_POS_ALL & ~KEY_POS_SETATTR) | - KEY_USR_VIEW, - KEY_ALLOC_NOT_IN_QUOTA); - if (IS_ERR(key)) - pr_err("MODSIGN: Problem loading in-kernel X.509 certificate (%ld)\n", - PTR_ERR(key)); - else - pr_notice("MODSIGN: Loaded cert '%s'\n", - key_ref_to_ptr(key)->description); - p += plen; - } - - return 0; - -dodgy_cert: - pr_err("MODSIGN: Problem parsing in-kernel X.509 certificate list\n"); - return 0; -} -late_initcall(load_module_signing_keys); diff --git a/kernel/module-internal.h b/kernel/module-internal.h index 24f9247..915e123 100644 --- a/kernel/module-internal.h +++ b/kernel/module-internal.h @@ -9,6 +9,4 @@ * 2 of the Licence, or (at your option) any later version. */ -extern struct key *modsign_keyring; - extern int mod_verify_sig(const void *mod, unsigned long *_modlen); diff --git a/kernel/module_signing.c b/kernel/module_signing.c index f2970bd..be5b8fa 100644 --- a/kernel/module_signing.c +++ b/kernel/module_signing.c @@ -14,6 +14,7 @@ #include <crypto/public_key.h> #include <crypto/hash.h> #include <keys/asymmetric-type.h> +#include <keys/system_keyring.h> #include "module-internal.h" /* @@ -28,7 +29,7 @@ */ struct module_signature { u8 algo; /* Public-key crypto algorithm [enum pkey_algo] */ - u8 hash; /* Digest algorithm [enum pkey_hash_algo] */ + u8 hash; /* Digest algorithm [enum hash_algo] */ u8 id_type; /* Key identifier type [enum pkey_id_type] */ u8 signer_len; /* Length of signer's name */ u8 key_id_len; /* Length of key identifier */ @@ -39,7 +40,7 @@ struct module_signature { /* * Digest the module contents. */ -static struct public_key_signature *mod_make_digest(enum pkey_hash_algo hash, +static struct public_key_signature *mod_make_digest(enum hash_algo hash, const void *mod, unsigned long modlen) { @@ -54,7 +55,7 @@ static struct public_key_signature *mod_make_digest(enum pkey_hash_algo hash, /* Allocate the hashing algorithm we're going to need and find out how * big the hash operational data will be. */ - tfm = crypto_alloc_shash(pkey_hash_algo[hash], 0, 0); + tfm = crypto_alloc_shash(hash_algo_name[hash], 0, 0); if (IS_ERR(tfm)) return (PTR_ERR(tfm) == -ENOENT) ? ERR_PTR(-ENOPKG) : ERR_CAST(tfm); @@ -157,7 +158,7 @@ static struct key *request_asymmetric_key(const char *signer, size_t signer_len, pr_debug("Look up: \"%s\"\n", id); - key = keyring_search(make_key_ref(modsign_keyring, 1), + key = keyring_search(make_key_ref(system_trusted_keyring, 1), &key_type_asymmetric, id); if (IS_ERR(key)) pr_warn("Request for unknown module key '%s' err %ld\n", @@ -217,7 +218,7 @@ int mod_verify_sig(const void *mod, unsigned long *_modlen) return -ENOPKG; if (ms.hash >= PKEY_HASH__LAST || - !pkey_hash_algo[ms.hash]) + !hash_algo_name[ms.hash]) return -ENOPKG; key = request_asymmetric_key(sig, ms.signer_len, diff --git a/kernel/system_certificates.S b/kernel/system_certificates.S new file mode 100644 index 0000000..4aef390 --- /dev/null +++ b/kernel/system_certificates.S @@ -0,0 +1,10 @@ +#include <linux/export.h> +#include <linux/init.h> + + __INITRODATA + + .globl VMLINUX_SYMBOL(system_certificate_list) +VMLINUX_SYMBOL(system_certificate_list): + .incbin "kernel/x509_certificate_list" + .globl VMLINUX_SYMBOL(system_certificate_list_end) +VMLINUX_SYMBOL(system_certificate_list_end): diff --git a/kernel/system_keyring.c b/kernel/system_keyring.c new file mode 100644 index 0000000..564dd93 --- /dev/null +++ b/kernel/system_keyring.c @@ -0,0 +1,105 @@ +/* System trusted keyring for trusted public keys + * + * Copyright (C) 2012 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include <linux/export.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/cred.h> +#include <linux/err.h> +#include <keys/asymmetric-type.h> +#include <keys/system_keyring.h> +#include "module-internal.h" + +struct key *system_trusted_keyring; +EXPORT_SYMBOL_GPL(system_trusted_keyring); + +extern __initconst const u8 system_certificate_list[]; +extern __initconst const u8 system_certificate_list_end[]; + +/* + * Load the compiled-in keys + */ +static __init int system_trusted_keyring_init(void) +{ + pr_notice("Initialise system trusted keyring\n"); + + system_trusted_keyring = + keyring_alloc(".system_keyring", + KUIDT_INIT(0), KGIDT_INIT(0), current_cred(), + ((KEY_POS_ALL & ~KEY_POS_SETATTR) | + KEY_USR_VIEW | KEY_USR_READ | KEY_USR_SEARCH), + KEY_ALLOC_NOT_IN_QUOTA, NULL); + if (IS_ERR(system_trusted_keyring)) + panic("Can't allocate system trusted keyring\n"); + + set_bit(KEY_FLAG_TRUSTED_ONLY, &system_trusted_keyring->flags); + return 0; +} + +/* + * Must be initialised before we try and load the keys into the keyring. + */ +device_initcall(system_trusted_keyring_init); + +/* + * Load the compiled-in list of X.509 certificates. + */ +static __init int load_system_certificate_list(void) +{ + key_ref_t key; + const u8 *p, *end; + size_t plen; + + pr_notice("Loading compiled-in X.509 certificates\n"); + + end = system_certificate_list_end; + p = system_certificate_list; + while (p < end) { + /* Each cert begins with an ASN.1 SEQUENCE tag and must be more + * than 256 bytes in size. + */ + if (end - p < 4) + goto dodgy_cert; + if (p[0] != 0x30 && + p[1] != 0x82) + goto dodgy_cert; + plen = (p[2] << 8) | p[3]; + plen += 4; + if (plen > end - p) + goto dodgy_cert; + + key = key_create_or_update(make_key_ref(system_trusted_keyring, 1), + "asymmetric", + NULL, + p, + plen, + ((KEY_POS_ALL & ~KEY_POS_SETATTR) | + KEY_USR_VIEW | KEY_USR_READ), + KEY_ALLOC_NOT_IN_QUOTA | + KEY_ALLOC_TRUSTED); + if (IS_ERR(key)) { + pr_err("Problem loading in-kernel X.509 certificate (%ld)\n", + PTR_ERR(key)); + } else { + pr_notice("Loaded X.509 cert '%s'\n", + key_ref_to_ptr(key)->description); + key_ref_put(key); + } + p += plen; + } + + return 0; + +dodgy_cert: + pr_err("Problem parsing in-kernel X.509 certificate list\n"); + return 0; +} +late_initcall(load_system_certificate_list); diff --git a/kernel/user.c b/kernel/user.c index 5bbb919..a3a0dbf 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -51,6 +51,10 @@ struct user_namespace init_user_ns = { .owner = GLOBAL_ROOT_UID, .group = GLOBAL_ROOT_GID, .proc_inum = PROC_USER_INIT_INO, +#ifdef CONFIG_KEYS_KERBEROS_CACHE + .krb_cache_register_sem = + __RWSEM_INITIALIZER(init_user_ns.krb_cache_register_sem), +#endif }; EXPORT_SYMBOL_GPL(init_user_ns); diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 13fb113..240fb62 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -101,6 +101,9 @@ int create_user_ns(struct cred *new) set_cred_user_ns(new, ns); +#ifdef CONFIG_PERSISTENT_KEYRINGS + init_rwsem(&ns->persistent_keyring_register_sem); +#endif return 0; } @@ -130,6 +133,9 @@ void free_user_ns(struct user_namespace *ns) do { parent = ns->parent; +#ifdef CONFIG_PERSISTENT_KEYRINGS + key_put(ns->persistent_keyring_register); +#endif proc_free_inum(ns->proc_inum); kmem_cache_free(user_ns_cachep, ns); ns = parent; diff --git a/lib/Kconfig b/lib/Kconfig index 06dc742..991c98b 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -322,6 +322,20 @@ config TEXTSEARCH_FSM config BTREE boolean +config ASSOCIATIVE_ARRAY + bool + help + Generic associative array. Can be searched and iterated over whilst + it is being modified. It is also reasonably quick to search and + modify. The algorithms are non-recursive, and the trees are highly + capacious. + + See: + + Documentation/assoc_array.txt + + for more information. + config HAS_IOMEM boolean depends on !NO_IOMEM diff --git a/lib/Makefile b/lib/Makefile index d480a8c..a459c31 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -13,7 +13,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ sha1.o md5.o irq_regs.o reciprocal_div.o argv_split.o \ proportions.o flex_proportions.o prio_heap.o ratelimit.o show_mem.o \ is_single_threaded.o plist.o decompress.o kobject_uevent.o \ - earlycpio.o percpu-refcount.o percpu_ida.o + earlycpio.o obj-$(CONFIG_ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS) += usercopy.o lib-$(CONFIG_MMU) += ioremap.o @@ -26,7 +26,7 @@ obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \ bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \ gcd.o lcm.o list_sort.o uuid.o flex_array.o iovec.o clz_ctz.o \ bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o \ - percpu_ida.o + percpu-refcount.o percpu_ida.o obj-y += string_helpers.o obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o obj-y += kstrtox.o @@ -47,6 +47,7 @@ CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS)) obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o obj-$(CONFIG_BTREE) += btree.o +obj-$(CONFIG_ASSOCIATIVE_ARRAY) += assoc_array.o obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o obj-$(CONFIG_DEBUG_LIST) += list_debug.o obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o diff --git a/lib/assoc_array.c b/lib/assoc_array.c new file mode 100644 index 0000000..17edeaf --- /dev/null +++ b/lib/assoc_array.c @@ -0,0 +1,1746 @@ +/* Generic associative array implementation. + * + * See Documentation/assoc_array.txt for information. + * + * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ +//#define DEBUG +#include <linux/slab.h> +#include <linux/err.h> +#include <linux/assoc_array_priv.h> + +/* + * Iterate over an associative array. The caller must hold the RCU read lock + * or better. + */ +static int assoc_array_subtree_iterate(const struct assoc_array_ptr *root, + const struct assoc_array_ptr *stop, + int (*iterator)(const void *leaf, + void *iterator_data), + void *iterator_data) +{ + const struct assoc_array_shortcut *shortcut; + const struct assoc_array_node *node; + const struct assoc_array_ptr *cursor, *ptr, *parent; + unsigned long has_meta; + int slot, ret; + + cursor = root; + +begin_node: + if (assoc_array_ptr_is_shortcut(cursor)) { + /* Descend through a shortcut */ + shortcut = assoc_array_ptr_to_shortcut(cursor); + smp_read_barrier_depends(); + cursor = ACCESS_ONCE(shortcut->next_node); + } + + node = assoc_array_ptr_to_node(cursor); + smp_read_barrier_depends(); + slot = 0; + + /* We perform two passes of each node. + * + * The first pass does all the leaves in this node. This means we + * don't miss any leaves if the node is split up by insertion whilst + * we're iterating over the branches rooted here (we may, however, see + * some leaves twice). + */ + has_meta = 0; + for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { + ptr = ACCESS_ONCE(node->slots[slot]); + has_meta |= (unsigned long)ptr; + if (ptr && assoc_array_ptr_is_leaf(ptr)) { + /* We need a barrier between the read of the pointer + * and dereferencing the pointer - but only if we are + * actually going to dereference it. + */ + smp_read_barrier_depends(); + + /* Invoke the callback */ + ret = iterator(assoc_array_ptr_to_leaf(ptr), + iterator_data); + if (ret) + return ret; + } + } + + /* The second pass attends to all the metadata pointers. If we follow + * one of these we may find that we don't come back here, but rather go + * back to a replacement node with the leaves in a different layout. + * + * We are guaranteed to make progress, however, as the slot number for + * a particular portion of the key space cannot change - and we + * continue at the back pointer + 1. + */ + if (!(has_meta & ASSOC_ARRAY_PTR_META_TYPE)) + goto finished_node; + slot = 0; + +continue_node: + node = assoc_array_ptr_to_node(cursor); + smp_read_barrier_depends(); + + for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { + ptr = ACCESS_ONCE(node->slots[slot]); + if (assoc_array_ptr_is_meta(ptr)) { + cursor = ptr; + goto begin_node; + } + } + +finished_node: + /* Move up to the parent (may need to skip back over a shortcut) */ + parent = ACCESS_ONCE(node->back_pointer); + slot = node->parent_slot; + if (parent == stop) + return 0; + + if (assoc_array_ptr_is_shortcut(parent)) { + shortcut = assoc_array_ptr_to_shortcut(parent); + smp_read_barrier_depends(); + cursor = parent; + parent = ACCESS_ONCE(shortcut->back_pointer); + slot = shortcut->parent_slot; + if (parent == stop) + return 0; + } + + /* Ascend to next slot in parent node */ + cursor = parent; + slot++; + goto continue_node; +} + +/** + * assoc_array_iterate - Pass all objects in the array to a callback + * @array: The array to iterate over. + * @iterator: The callback function. + * @iterator_data: Private data for the callback function. + * + * Iterate over all the objects in an associative array. Each one will be + * presented to the iterator function. + * + * If the array is being modified concurrently with the iteration then it is + * possible that some objects in the array will be passed to the iterator + * callback more than once - though every object should be passed at least + * once. If this is undesirable then the caller must lock against modification + * for the duration of this function. + * + * The function will return 0 if no objects were in the array or else it will + * return the result of the last iterator function called. Iteration stops + * immediately if any call to the iteration function results in a non-zero + * return. + * + * The caller should hold the RCU read lock or better if concurrent + * modification is possible. + */ +int assoc_array_iterate(const struct assoc_array *array, + int (*iterator)(const void *object, + void *iterator_data), + void *iterator_data) +{ + struct assoc_array_ptr *root = ACCESS_ONCE(array->root); + + if (!root) + return 0; + return assoc_array_subtree_iterate(root, NULL, iterator, iterator_data); +} + +enum assoc_array_walk_status { + assoc_array_walk_tree_empty, + assoc_array_walk_found_terminal_node, + assoc_array_walk_found_wrong_shortcut, +} status; + +struct assoc_array_walk_result { + struct { + struct assoc_array_node *node; /* Node in which leaf might be found */ + int level; + int slot; + } terminal_node; + struct { + struct assoc_array_shortcut *shortcut; + int level; + int sc_level; + unsigned long sc_segments; + unsigned long dissimilarity; + } wrong_shortcut; +}; + +/* + * Navigate through the internal tree looking for the closest node to the key. + */ +static enum assoc_array_walk_status +assoc_array_walk(const struct assoc_array *array, + const struct assoc_array_ops *ops, + const void *index_key, + struct assoc_array_walk_result *result) +{ + struct assoc_array_shortcut *shortcut; + struct assoc_array_node *node; + struct assoc_array_ptr *cursor, *ptr; + unsigned long sc_segments, dissimilarity; + unsigned long segments; + int level, sc_level, next_sc_level; + int slot; + + pr_devel("-->%s()\n", __func__); + + cursor = ACCESS_ONCE(array->root); + if (!cursor) + return assoc_array_walk_tree_empty; + + level = 0; + + /* Use segments from the key for the new leaf to navigate through the + * internal tree, skipping through nodes and shortcuts that are on + * route to the destination. Eventually we'll come to a slot that is + * either empty or contains a leaf at which point we've found a node in + * which the leaf we're looking for might be found or into which it + * should be inserted. + */ +jumped: + segments = ops->get_key_chunk(index_key, level); + pr_devel("segments[%d]: %lx\n", level, segments); + + if (assoc_array_ptr_is_shortcut(cursor)) + goto follow_shortcut; + +consider_node: + node = assoc_array_ptr_to_node(cursor); + smp_read_barrier_depends(); + + slot = segments >> (level & ASSOC_ARRAY_KEY_CHUNK_MASK); + slot &= ASSOC_ARRAY_FAN_MASK; + ptr = ACCESS_ONCE(node->slots[slot]); + + pr_devel("consider slot %x [ix=%d type=%lu]\n", + slot, level, (unsigned long)ptr & 3); + + if (!assoc_array_ptr_is_meta(ptr)) { + /* The node doesn't have a node/shortcut pointer in the slot + * corresponding to the index key that we have to follow. + */ + result->terminal_node.node = node; + result->terminal_node.level = level; + result->terminal_node.slot = slot; + pr_devel("<--%s() = terminal_node\n", __func__); + return assoc_array_walk_found_terminal_node; + } + + if (assoc_array_ptr_is_node(ptr)) { + /* There is a pointer to a node in the slot corresponding to + * this index key segment, so we need to follow it. + */ + cursor = ptr; + level += ASSOC_ARRAY_LEVEL_STEP; + if ((level & ASSOC_ARRAY_KEY_CHUNK_MASK) != 0) + goto consider_node; + goto jumped; + } + + /* There is a shortcut in the slot corresponding to the index key + * segment. We follow the shortcut if its partial index key matches + * this leaf's. Otherwise we need to split the shortcut. + */ + cursor = ptr; +follow_shortcut: + shortcut = assoc_array_ptr_to_shortcut(cursor); + smp_read_barrier_depends(); + pr_devel("shortcut to %d\n", shortcut->skip_to_level); + sc_level = level + ASSOC_ARRAY_LEVEL_STEP; + BUG_ON(sc_level > shortcut->skip_to_level); + + do { + /* Check the leaf against the shortcut's index key a word at a + * time, trimming the final word (the shortcut stores the index + * key completely from the root to the shortcut's target). + */ + if ((sc_level & ASSOC_ARRAY_KEY_CHUNK_MASK) == 0) + segments = ops->get_key_chunk(index_key, sc_level); + + sc_segments = shortcut->index_key[sc_level >> ASSOC_ARRAY_KEY_CHUNK_SHIFT]; + dissimilarity = segments ^ sc_segments; + + if (round_up(sc_level, ASSOC_ARRAY_KEY_CHUNK_SIZE) > shortcut->skip_to_level) { + /* Trim segments that are beyond the shortcut */ + int shift = shortcut->skip_to_level & ASSOC_ARRAY_KEY_CHUNK_MASK; + dissimilarity &= ~(ULONG_MAX << shift); + next_sc_level = shortcut->skip_to_level; + } else { + next_sc_level = sc_level + ASSOC_ARRAY_KEY_CHUNK_SIZE; + next_sc_level = round_down(next_sc_level, ASSOC_ARRAY_KEY_CHUNK_SIZE); + } + + if (dissimilarity != 0) { + /* This shortcut points elsewhere */ + result->wrong_shortcut.shortcut = shortcut; + result->wrong_shortcut.level = level; + result->wrong_shortcut.sc_level = sc_level; + result->wrong_shortcut.sc_segments = sc_segments; + result->wrong_shortcut.dissimilarity = dissimilarity; + return assoc_array_walk_found_wrong_shortcut; + } + + sc_level = next_sc_level; + } while (sc_level < shortcut->skip_to_level); + + /* The shortcut matches the leaf's index to this point. */ + cursor = ACCESS_ONCE(shortcut->next_node); + if (((level ^ sc_level) & ~ASSOC_ARRAY_KEY_CHUNK_MASK) != 0) { + level = sc_level; + goto jumped; + } else { + level = sc_level; + goto consider_node; + } +} + +/** + * assoc_array_find - Find an object by index key + * @array: The associative array to search. + * @ops: The operations to use. + * @index_key: The key to the object. + * + * Find an object in an associative array by walking through the internal tree + * to the node that should contain the object and then searching the leaves + * there. NULL is returned if the requested object was not found in the array. + * + * The caller must hold the RCU read lock or better. + */ +void *assoc_array_find(const struct assoc_array *array, + const struct assoc_array_ops *ops, + const void *index_key) +{ + struct assoc_array_walk_result result; + const struct assoc_array_node *node; + const struct assoc_array_ptr *ptr; + const void *leaf; + int slot; + + if (assoc_array_walk(array, ops, index_key, &result) != + assoc_array_walk_found_terminal_node) + return NULL; + + node = result.terminal_node.node; + smp_read_barrier_depends(); + + /* If the target key is available to us, it's has to be pointed to by + * the terminal node. + */ + for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) { + ptr = ACCESS_ONCE(node->slots[slot]); + if (ptr && assoc_array_ptr_is_leaf(ptr)) { + /* We need a barrier between the read of the pointer + * and dereferencing the pointer - but only if we are + * actually going to dereference it. + */ + leaf = assoc_array_ptr_to_leaf(ptr); + smp_read_barrier_depends(); + if (ops->compare_object(leaf, index_key)) + return (void *)leaf; + } + } + + return NULL; +} + +/* + * Destructively iterate over an associative array. The caller must prevent + * other simultaneous accesses. + */ +static void assoc_array_destroy_subtree(struct assoc_array_ptr *root, + const struct assoc_array_ops *ops) +{ + struct assoc_array_shortcut *shortcut; + struct assoc_array_node *node; + struct assoc_array_ptr *cursor, *parent = NULL; + int slot = -1; + + pr_devel("-->%s()\n", __func__); + + cursor = root; + if (!cursor) { + pr_devel("empty\n"); + return; + } + +move_to_meta: + if (assoc_array_ptr_is_shortcut(cursor)) { + /* Descend through a shortcut */ + pr_devel("[%d] shortcut\n", slot); + BUG_ON(!assoc_array_ptr_is_shortcut(cursor)); + shortcut = assoc_array_ptr_to_shortcut(cursor); + BUG_ON(shortcut->back_pointer != parent); + BUG_ON(slot != -1 && shortcut->parent_slot != slot); + parent = cursor; + cursor = shortcut->next_node; + slot = -1; + BUG_ON(!assoc_array_ptr_is_node(cursor)); + } + + pr_devel("[%d] node\n", slot); + node = assoc_array_ptr_to_node(cursor); + BUG_ON(node->back_pointer != parent); + BUG_ON(slot != -1 && node->parent_slot != slot); + slot = 0; + +continue_node: + pr_devel("Node %p [back=%p]\n", node, node->back_pointer); + for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { + struct assoc_array_ptr *ptr = node->slots[slot]; + if (!ptr) + continue; + if (assoc_array_ptr_is_meta(ptr)) { + parent = cursor; + cursor = ptr; + goto move_to_meta; + } + + if (ops) { + pr_devel("[%d] free leaf\n", slot); + ops->free_object(assoc_array_ptr_to_leaf(ptr)); + } + } + + parent = node->back_pointer; + slot = node->parent_slot; + pr_devel("free node\n"); + kfree(node); + if (!parent) + return; /* Done */ + + /* Move back up to the parent (may need to free a shortcut on + * the way up) */ + if (assoc_array_ptr_is_shortcut(parent)) { + shortcut = assoc_array_ptr_to_shortcut(parent); + BUG_ON(shortcut->next_node != cursor); + cursor = parent; + parent = shortcut->back_pointer; + slot = shortcut->parent_slot; + pr_devel("free shortcut\n"); + kfree(shortcut); + if (!parent) + return; + + BUG_ON(!assoc_array_ptr_is_node(parent)); + } + + /* Ascend to next slot in parent node */ + pr_devel("ascend to %p[%d]\n", parent, slot); + cursor = parent; + node = assoc_array_ptr_to_node(cursor); + slot++; + goto continue_node; +} + +/** + * assoc_array_destroy - Destroy an associative array + * @array: The array to destroy. + * @ops: The operations to use. + * + * Discard all metadata and free all objects in an associative array. The + * array will be empty and ready to use again upon completion. This function + * cannot fail. + * + * The caller must prevent all other accesses whilst this takes place as no + * attempt is made to adjust pointers gracefully to permit RCU readlock-holding + * accesses to continue. On the other hand, no memory allocation is required. + */ +void assoc_array_destroy(struct assoc_array *array, + const struct assoc_array_ops *ops) +{ + assoc_array_destroy_subtree(array->root, ops); + array->root = NULL; +} + +/* + * Handle insertion into an empty tree. + */ +static bool assoc_array_insert_in_empty_tree(struct assoc_array_edit *edit) +{ + struct assoc_array_node *new_n0; + + pr_devel("-->%s()\n", __func__); + + new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); + if (!new_n0) + return false; + + edit->new_meta[0] = assoc_array_node_to_ptr(new_n0); + edit->leaf_p = &new_n0->slots[0]; + edit->adjust_count_on = new_n0; + edit->set[0].ptr = &edit->array->root; + edit->set[0].to = assoc_array_node_to_ptr(new_n0); + + pr_devel("<--%s() = ok [no root]\n", __func__); + return true; +} + +/* + * Handle insertion into a terminal node. + */ +static bool assoc_array_insert_into_terminal_node(struct assoc_array_edit *edit, + const struct assoc_array_ops *ops, + const void *index_key, + struct assoc_array_walk_result *result) +{ + struct assoc_array_shortcut *shortcut, *new_s0; + struct assoc_array_node *node, *new_n0, *new_n1, *side; + struct assoc_array_ptr *ptr; + unsigned long dissimilarity, base_seg, blank; + size_t keylen; + bool have_meta; + int level, diff; + int slot, next_slot, free_slot, i, j; + + node = result->terminal_node.node; + level = result->terminal_node.level; + edit->segment_cache[ASSOC_ARRAY_FAN_OUT] = result->terminal_node.slot; + + pr_devel("-->%s()\n", __func__); + + /* We arrived at a node which doesn't have an onward node or shortcut + * pointer that we have to follow. This means that (a) the leaf we + * want must go here (either by insertion or replacement) or (b) we + * need to split this node and insert in one of the fragments. + */ + free_slot = -1; + + /* Firstly, we have to check the leaves in this node to see if there's + * a matching one we should replace in place. + */ + for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { + ptr = node->slots[i]; + if (!ptr) { + free_slot = i; + continue; + } + if (ops->compare_object(assoc_array_ptr_to_leaf(ptr), index_key)) { + pr_devel("replace in slot %d\n", i); + edit->leaf_p = &node->slots[i]; + edit->dead_leaf = node->slots[i]; + pr_devel("<--%s() = ok [replace]\n", __func__); + return true; + } + } + + /* If there is a free slot in this node then we can just insert the + * leaf here. + */ + if (free_slot >= 0) { + pr_devel("insert in free slot %d\n", free_slot); + edit->leaf_p = &node->slots[free_slot]; + edit->adjust_count_on = node; + pr_devel("<--%s() = ok [insert]\n", __func__); + return true; + } + + /* The node has no spare slots - so we're either going to have to split + * it or insert another node before it. + * + * Whatever, we're going to need at least two new nodes - so allocate + * those now. We may also need a new shortcut, but we deal with that + * when we need it. + */ + new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); + if (!new_n0) + return false; + edit->new_meta[0] = assoc_array_node_to_ptr(new_n0); + new_n1 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); + if (!new_n1) + return false; + edit->new_meta[1] = assoc_array_node_to_ptr(new_n1); + + /* We need to find out how similar the leaves are. */ + pr_devel("no spare slots\n"); + have_meta = false; + for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { + ptr = node->slots[i]; + if (assoc_array_ptr_is_meta(ptr)) { + edit->segment_cache[i] = 0xff; + have_meta = true; + continue; + } + base_seg = ops->get_object_key_chunk( + assoc_array_ptr_to_leaf(ptr), level); + base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK; + edit->segment_cache[i] = base_seg & ASSOC_ARRAY_FAN_MASK; + } + + if (have_meta) { + pr_devel("have meta\n"); + goto split_node; + } + + /* The node contains only leaves */ + dissimilarity = 0; + base_seg = edit->segment_cache[0]; + for (i = 1; i < ASSOC_ARRAY_FAN_OUT; i++) + dissimilarity |= edit->segment_cache[i] ^ base_seg; + + pr_devel("only leaves; dissimilarity=%lx\n", dissimilarity); + + if ((dissimilarity & ASSOC_ARRAY_FAN_MASK) == 0) { + /* The old leaves all cluster in the same slot. We will need + * to insert a shortcut if the new node wants to cluster with them. + */ + if ((edit->segment_cache[ASSOC_ARRAY_FAN_OUT] ^ base_seg) == 0) + goto all_leaves_cluster_together; + + /* Otherwise we can just insert a new node ahead of the old + * one. + */ + goto present_leaves_cluster_but_not_new_leaf; + } + +split_node: + pr_devel("split node\n"); + + /* We need to split the current node; we know that the node doesn't + * simply contain a full set of leaves that cluster together (it + * contains meta pointers and/or non-clustering leaves). + * + * We need to expel at least two leaves out of a set consisting of the + * leaves in the node and the new leaf. + * + * We need a new node (n0) to replace the current one and a new node to + * take the expelled nodes (n1). + */ + edit->set[0].to = assoc_array_node_to_ptr(new_n0); + new_n0->back_pointer = node->back_pointer; + new_n0->parent_slot = node->parent_slot; + new_n1->back_pointer = assoc_array_node_to_ptr(new_n0); + new_n1->parent_slot = -1; /* Need to calculate this */ + +do_split_node: + pr_devel("do_split_node\n"); + + new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch; + new_n1->nr_leaves_on_branch = 0; + + /* Begin by finding two matching leaves. There have to be at least two + * that match - even if there are meta pointers - because any leaf that + * would match a slot with a meta pointer in it must be somewhere + * behind that meta pointer and cannot be here. Further, given N + * remaining leaf slots, we now have N+1 leaves to go in them. + */ + for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { + slot = edit->segment_cache[i]; + if (slot != 0xff) + for (j = i + 1; j < ASSOC_ARRAY_FAN_OUT + 1; j++) + if (edit->segment_cache[j] == slot) + goto found_slot_for_multiple_occupancy; + } +found_slot_for_multiple_occupancy: + pr_devel("same slot: %x %x [%02x]\n", i, j, slot); + BUG_ON(i >= ASSOC_ARRAY_FAN_OUT); + BUG_ON(j >= ASSOC_ARRAY_FAN_OUT + 1); + BUG_ON(slot >= ASSOC_ARRAY_FAN_OUT); + + new_n1->parent_slot = slot; + + /* Metadata pointers cannot change slot */ + for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) + if (assoc_array_ptr_is_meta(node->slots[i])) + new_n0->slots[i] = node->slots[i]; + else + new_n0->slots[i] = NULL; + BUG_ON(new_n0->slots[slot] != NULL); + new_n0->slots[slot] = assoc_array_node_to_ptr(new_n1); + + /* Filter the leaf pointers between the new nodes */ + free_slot = -1; + next_slot = 0; + for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { + if (assoc_array_ptr_is_meta(node->slots[i])) + continue; + if (edit->segment_cache[i] == slot) { + new_n1->slots[next_slot++] = node->slots[i]; + new_n1->nr_leaves_on_branch++; + } else { + do { + free_slot++; + } while (new_n0->slots[free_slot] != NULL); + new_n0->slots[free_slot] = node->slots[i]; + } + } + + pr_devel("filtered: f=%x n=%x\n", free_slot, next_slot); + + if (edit->segment_cache[ASSOC_ARRAY_FAN_OUT] != slot) { + do { + free_slot++; + } while (new_n0->slots[free_slot] != NULL); + edit->leaf_p = &new_n0->slots[free_slot]; + edit->adjust_count_on = new_n0; + } else { + edit->leaf_p = &new_n1->slots[next_slot++]; + edit->adjust_count_on = new_n1; + } + + BUG_ON(next_slot <= 1); + + edit->set_backpointers_to = assoc_array_node_to_ptr(new_n0); + for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { + if (edit->segment_cache[i] == 0xff) { + ptr = node->slots[i]; + BUG_ON(assoc_array_ptr_is_leaf(ptr)); + if (assoc_array_ptr_is_node(ptr)) { + side = assoc_array_ptr_to_node(ptr); + edit->set_backpointers[i] = &side->back_pointer; + } else { + shortcut = assoc_array_ptr_to_shortcut(ptr); + edit->set_backpointers[i] = &shortcut->back_pointer; + } + } + } + + ptr = node->back_pointer; + if (!ptr) + edit->set[0].ptr = &edit->array->root; + else if (assoc_array_ptr_is_node(ptr)) + edit->set[0].ptr = &assoc_array_ptr_to_node(ptr)->slots[node->parent_slot]; + else + edit->set[0].ptr = &assoc_array_ptr_to_shortcut(ptr)->next_node; + edit->excised_meta[0] = assoc_array_node_to_ptr(node); + pr_devel("<--%s() = ok [split node]\n", __func__); + return true; + +present_leaves_cluster_but_not_new_leaf: + /* All the old leaves cluster in the same slot, but the new leaf wants + * to go into a different slot, so we create a new node to hold the new + * leaf and a pointer to a new node holding all the old leaves. + */ + pr_devel("present leaves cluster but not new leaf\n"); + + new_n0->back_pointer = node->back_pointer; + new_n0->parent_slot = node->parent_slot; + new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch; + new_n1->back_pointer = assoc_array_node_to_ptr(new_n0); + new_n1->parent_slot = edit->segment_cache[0]; + new_n1->nr_leaves_on_branch = node->nr_leaves_on_branch; + edit->adjust_count_on = new_n0; + + for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) + new_n1->slots[i] = node->slots[i]; + + new_n0->slots[edit->segment_cache[0]] = assoc_array_node_to_ptr(new_n0); + edit->leaf_p = &new_n0->slots[edit->segment_cache[ASSOC_ARRAY_FAN_OUT]]; + + edit->set[0].ptr = &assoc_array_ptr_to_node(node->back_pointer)->slots[node->parent_slot]; + edit->set[0].to = assoc_array_node_to_ptr(new_n0); + edit->excised_meta[0] = assoc_array_node_to_ptr(node); + pr_devel("<--%s() = ok [insert node before]\n", __func__); + return true; + +all_leaves_cluster_together: + /* All the leaves, new and old, want to cluster together in this node + * in the same slot, so we have to replace this node with a shortcut to + * skip over the identical parts of the key and then place a pair of + * nodes, one inside the other, at the end of the shortcut and + * distribute the keys between them. + * + * Firstly we need to work out where the leaves start diverging as a + * bit position into their keys so that we know how big the shortcut + * needs to be. + * + * We only need to make a single pass of N of the N+1 leaves because if + * any keys differ between themselves at bit X then at least one of + * them must also differ with the base key at bit X or before. + */ + pr_devel("all leaves cluster together\n"); + diff = INT_MAX; + for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { + int x = ops->diff_objects(assoc_array_ptr_to_leaf(edit->leaf), + assoc_array_ptr_to_leaf(node->slots[i])); + if (x < diff) { + BUG_ON(x < 0); + diff = x; + } + } + BUG_ON(diff == INT_MAX); + BUG_ON(diff < level + ASSOC_ARRAY_LEVEL_STEP); + + keylen = round_up(diff, ASSOC_ARRAY_KEY_CHUNK_SIZE); + keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT; + + new_s0 = kzalloc(sizeof(struct assoc_array_shortcut) + + keylen * sizeof(unsigned long), GFP_KERNEL); + if (!new_s0) + return false; + edit->new_meta[2] = assoc_array_shortcut_to_ptr(new_s0); + + edit->set[0].to = assoc_array_shortcut_to_ptr(new_s0); + new_s0->back_pointer = node->back_pointer; + new_s0->parent_slot = node->parent_slot; + new_s0->next_node = assoc_array_node_to_ptr(new_n0); + new_n0->back_pointer = assoc_array_shortcut_to_ptr(new_s0); + new_n0->parent_slot = 0; + new_n1->back_pointer = assoc_array_node_to_ptr(new_n0); + new_n1->parent_slot = -1; /* Need to calculate this */ + + new_s0->skip_to_level = level = diff & ~ASSOC_ARRAY_LEVEL_STEP_MASK; + pr_devel("skip_to_level = %d [diff %d]\n", level, diff); + BUG_ON(level <= 0); + + for (i = 0; i < keylen; i++) + new_s0->index_key[i] = + ops->get_key_chunk(index_key, i * ASSOC_ARRAY_KEY_CHUNK_SIZE); + + blank = ULONG_MAX << (level & ASSOC_ARRAY_KEY_CHUNK_MASK); + pr_devel("blank off [%zu] %d: %lx\n", keylen - 1, level, blank); + new_s0->index_key[keylen - 1] &= ~blank; + + /* This now reduces to a node splitting exercise for which we'll need + * to regenerate the disparity table. + */ + for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { + ptr = node->slots[i]; + base_seg = ops->get_object_key_chunk(assoc_array_ptr_to_leaf(ptr), + level); + base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK; + edit->segment_cache[i] = base_seg & ASSOC_ARRAY_FAN_MASK; + } + + base_seg = ops->get_key_chunk(index_key, level); + base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK; + edit->segment_cache[ASSOC_ARRAY_FAN_OUT] = base_seg & ASSOC_ARRAY_FAN_MASK; + goto do_split_node; +} + +/* + * Handle insertion into the middle of a shortcut. + */ +static bool assoc_array_insert_mid_shortcut(struct assoc_array_edit *edit, + const struct assoc_array_ops *ops, + struct assoc_array_walk_result *result) +{ + struct assoc_array_shortcut *shortcut, *new_s0, *new_s1; + struct assoc_array_node *node, *new_n0, *side; + unsigned long sc_segments, dissimilarity, blank; + size_t keylen; + int level, sc_level, diff; + int sc_slot; + + shortcut = result->wrong_shortcut.shortcut; + level = result->wrong_shortcut.level; + sc_level = result->wrong_shortcut.sc_level; + sc_segments = result->wrong_shortcut.sc_segments; + dissimilarity = result->wrong_shortcut.dissimilarity; + + pr_devel("-->%s(ix=%d dis=%lx scix=%d)\n", + __func__, level, dissimilarity, sc_level); + + /* We need to split a shortcut and insert a node between the two + * pieces. Zero-length pieces will be dispensed with entirely. + * + * First of all, we need to find out in which level the first + * difference was. + */ + diff = __ffs(dissimilarity); + diff &= ~ASSOC_ARRAY_LEVEL_STEP_MASK; + diff += sc_level & ~ASSOC_ARRAY_KEY_CHUNK_MASK; + pr_devel("diff=%d\n", diff); + + if (!shortcut->back_pointer) { + edit->set[0].ptr = &edit->array->root; + } else if (assoc_array_ptr_is_node(shortcut->back_pointer)) { + node = assoc_array_ptr_to_node(shortcut->back_pointer); + edit->set[0].ptr = &node->slots[shortcut->parent_slot]; + } else { + BUG(); + } + + edit->excised_meta[0] = assoc_array_shortcut_to_ptr(shortcut); + + /* Create a new node now since we're going to need it anyway */ + new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); + if (!new_n0) + return false; + edit->new_meta[0] = assoc_array_node_to_ptr(new_n0); + edit->adjust_count_on = new_n0; + + /* Insert a new shortcut before the new node if this segment isn't of + * zero length - otherwise we just connect the new node directly to the + * parent. + */ + level += ASSOC_ARRAY_LEVEL_STEP; + if (diff > level) { + pr_devel("pre-shortcut %d...%d\n", level, diff); + keylen = round_up(diff, ASSOC_ARRAY_KEY_CHUNK_SIZE); + keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT; + + new_s0 = kzalloc(sizeof(struct assoc_array_shortcut) + + keylen * sizeof(unsigned long), GFP_KERNEL); + if (!new_s0) + return false; + edit->new_meta[1] = assoc_array_shortcut_to_ptr(new_s0); + edit->set[0].to = assoc_array_shortcut_to_ptr(new_s0); + new_s0->back_pointer = shortcut->back_pointer; + new_s0->parent_slot = shortcut->parent_slot; + new_s0->next_node = assoc_array_node_to_ptr(new_n0); + new_s0->skip_to_level = diff; + + new_n0->back_pointer = assoc_array_shortcut_to_ptr(new_s0); + new_n0->parent_slot = 0; + + memcpy(new_s0->index_key, shortcut->index_key, + keylen * sizeof(unsigned long)); + + blank = ULONG_MAX << (diff & ASSOC_ARRAY_KEY_CHUNK_MASK); + pr_devel("blank off [%zu] %d: %lx\n", keylen - 1, diff, blank); + new_s0->index_key[keylen - 1] &= ~blank; + } else { + pr_devel("no pre-shortcut\n"); + edit->set[0].to = assoc_array_node_to_ptr(new_n0); + new_n0->back_pointer = shortcut->back_pointer; + new_n0->parent_slot = shortcut->parent_slot; + } + + side = assoc_array_ptr_to_node(shortcut->next_node); + new_n0->nr_leaves_on_branch = side->nr_leaves_on_branch; + + /* We need to know which slot in the new node is going to take a + * metadata pointer. + */ + sc_slot = sc_segments >> (diff & ASSOC_ARRAY_KEY_CHUNK_MASK); + sc_slot &= ASSOC_ARRAY_FAN_MASK; + + pr_devel("new slot %lx >> %d -> %d\n", + sc_segments, diff & ASSOC_ARRAY_KEY_CHUNK_MASK, sc_slot); + + /* Determine whether we need to follow the new node with a replacement + * for the current shortcut. We could in theory reuse the current + * shortcut if its parent slot number doesn't change - but that's a + * 1-in-16 chance so not worth expending the code upon. + */ + level = diff + ASSOC_ARRAY_LEVEL_STEP; + if (level < shortcut->skip_to_level) { + pr_devel("post-shortcut %d...%d\n", level, shortcut->skip_to_level); + keylen = round_up(shortcut->skip_to_level, ASSOC_ARRAY_KEY_CHUNK_SIZE); + keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT; + + new_s1 = kzalloc(sizeof(struct assoc_array_shortcut) + + keylen * sizeof(unsigned long), GFP_KERNEL); + if (!new_s1) + return false; + edit->new_meta[2] = assoc_array_shortcut_to_ptr(new_s1); + + new_s1->back_pointer = assoc_array_node_to_ptr(new_n0); + new_s1->parent_slot = sc_slot; + new_s1->next_node = shortcut->next_node; + new_s1->skip_to_level = shortcut->skip_to_level; + + new_n0->slots[sc_slot] = assoc_array_shortcut_to_ptr(new_s1); + + memcpy(new_s1->index_key, shortcut->index_key, + keylen * sizeof(unsigned long)); + + edit->set[1].ptr = &side->back_pointer; + edit->set[1].to = assoc_array_shortcut_to_ptr(new_s1); + } else { + pr_devel("no post-shortcut\n"); + + /* We don't have to replace the pointed-to node as long as we + * use memory barriers to make sure the parent slot number is + * changed before the back pointer (the parent slot number is + * irrelevant to the old parent shortcut). + */ + new_n0->slots[sc_slot] = shortcut->next_node; + edit->set_parent_slot[0].p = &side->parent_slot; + edit->set_parent_slot[0].to = sc_slot; + edit->set[1].ptr = &side->back_pointer; + edit->set[1].to = assoc_array_node_to_ptr(new_n0); + } + + /* Install the new leaf in a spare slot in the new node. */ + if (sc_slot == 0) + edit->leaf_p = &new_n0->slots[1]; + else + edit->leaf_p = &new_n0->slots[0]; + + pr_devel("<--%s() = ok [split shortcut]\n", __func__); + return edit; +} + +/** + * assoc_array_insert - Script insertion of an object into an associative array + * @array: The array to insert into. + * @ops: The operations to use. + * @index_key: The key to insert at. + * @object: The object to insert. + * + * Precalculate and preallocate a script for the insertion or replacement of an + * object in an associative array. This results in an edit script that can + * either be applied or cancelled. + * + * The function returns a pointer to an edit script or -ENOMEM. + * + * The caller should lock against other modifications and must continue to hold + * the lock until assoc_array_apply_edit() has been called. + * + * Accesses to the tree may take place concurrently with this function, + * provided they hold the RCU read lock. + */ +struct assoc_array_edit *assoc_array_insert(struct assoc_array *array, + const struct assoc_array_ops *ops, + const void *index_key, + void *object) +{ + struct assoc_array_walk_result result; + struct assoc_array_edit *edit; + + pr_devel("-->%s()\n", __func__); + + /* The leaf pointer we're given must not have the bottom bit set as we + * use those for type-marking the pointer. NULL pointers are also not + * allowed as they indicate an empty slot but we have to allow them + * here as they can be updated later. + */ + BUG_ON(assoc_array_ptr_is_meta(object)); + + edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL); + if (!edit) + return ERR_PTR(-ENOMEM); + edit->array = array; + edit->ops = ops; + edit->leaf = assoc_array_leaf_to_ptr(object); + edit->adjust_count_by = 1; + + switch (assoc_array_walk(array, ops, index_key, &result)) { + case assoc_array_walk_tree_empty: + /* Allocate a root node if there isn't one yet */ + if (!assoc_array_insert_in_empty_tree(edit)) + goto enomem; + return edit; + + case assoc_array_walk_found_terminal_node: + /* We found a node that doesn't have a node/shortcut pointer in + * the slot corresponding to the index key that we have to + * follow. + */ + if (!assoc_array_insert_into_terminal_node(edit, ops, index_key, + &result)) + goto enomem; + return edit; + + case assoc_array_walk_found_wrong_shortcut: + /* We found a shortcut that didn't match our key in a slot we + * needed to follow. + */ + if (!assoc_array_insert_mid_shortcut(edit, ops, &result)) + goto enomem; + return edit; + } + +enomem: + /* Clean up after an out of memory error */ + pr_devel("enomem\n"); + assoc_array_cancel_edit(edit); + return ERR_PTR(-ENOMEM); +} + +/** + * assoc_array_insert_set_object - Set the new object pointer in an edit script + * @edit: The edit script to modify. + * @object: The object pointer to set. + * + * Change the object to be inserted in an edit script. The object pointed to + * by the old object is not freed. This must be done prior to applying the + * script. + */ +void assoc_array_insert_set_object(struct assoc_array_edit *edit, void *object) +{ + BUG_ON(!object); + edit->leaf = assoc_array_leaf_to_ptr(object); +} + +struct assoc_array_delete_collapse_context { + struct assoc_array_node *node; + const void *skip_leaf; + int slot; +}; + +/* + * Subtree collapse to node iterator. + */ +static int assoc_array_delete_collapse_iterator(const void *leaf, + void *iterator_data) +{ + struct assoc_array_delete_collapse_context *collapse = iterator_data; + + if (leaf == collapse->skip_leaf) + return 0; + + BUG_ON(collapse->slot >= ASSOC_ARRAY_FAN_OUT); + + collapse->node->slots[collapse->slot++] = assoc_array_leaf_to_ptr(leaf); + return 0; +} + +/** + * assoc_array_delete - Script deletion of an object from an associative array + * @array: The array to search. + * @ops: The operations to use. + * @index_key: The key to the object. + * + * Precalculate and preallocate a script for the deletion of an object from an + * associative array. This results in an edit script that can either be + * applied or cancelled. + * + * The function returns a pointer to an edit script if the object was found, + * NULL if the object was not found or -ENOMEM. + * + * The caller should lock against other modifications and must continue to hold + * the lock until assoc_array_apply_edit() has been called. + * + * Accesses to the tree may take place concurrently with this function, + * provided they hold the RCU read lock. + */ +struct assoc_array_edit *assoc_array_delete(struct assoc_array *array, + const struct assoc_array_ops *ops, + const void *index_key) +{ + struct assoc_array_delete_collapse_context collapse; + struct assoc_array_walk_result result; + struct assoc_array_node *node, *new_n0; + struct assoc_array_edit *edit; + struct assoc_array_ptr *ptr; + bool has_meta; + int slot, i; + + pr_devel("-->%s()\n", __func__); + + edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL); + if (!edit) + return ERR_PTR(-ENOMEM); + edit->array = array; + edit->ops = ops; + edit->adjust_count_by = -1; + + switch (assoc_array_walk(array, ops, index_key, &result)) { + case assoc_array_walk_found_terminal_node: + /* We found a node that should contain the leaf we've been + * asked to remove - *if* it's in the tree. + */ + pr_devel("terminal_node\n"); + node = result.terminal_node.node; + + for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) { + ptr = node->slots[slot]; + if (ptr && + assoc_array_ptr_is_leaf(ptr) && + ops->compare_object(assoc_array_ptr_to_leaf(ptr), + index_key)) + goto found_leaf; + } + case assoc_array_walk_tree_empty: + case assoc_array_walk_found_wrong_shortcut: + default: + assoc_array_cancel_edit(edit); + pr_devel("not found\n"); + return NULL; + } + +found_leaf: + BUG_ON(array->nr_leaves_on_tree <= 0); + + /* In the simplest form of deletion we just clear the slot and release + * the leaf after a suitable interval. + */ + edit->dead_leaf = node->slots[slot]; + edit->set[0].ptr = &node->slots[slot]; + edit->set[0].to = NULL; + edit->adjust_count_on = node; + + /* If that concludes erasure of the last leaf, then delete the entire + * internal array. + */ + if (array->nr_leaves_on_tree == 1) { + edit->set[1].ptr = &array->root; + edit->set[1].to = NULL; + edit->adjust_count_on = NULL; + edit->excised_subtree = array->root; + pr_devel("all gone\n"); + return edit; + } + + /* However, we'd also like to clear up some metadata blocks if we + * possibly can. + * + * We go for a simple algorithm of: if this node has FAN_OUT or fewer + * leaves in it, then attempt to collapse it - and attempt to + * recursively collapse up the tree. + * + * We could also try and collapse in partially filled subtrees to take + * up space in this node. + */ + if (node->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT + 1) { + struct assoc_array_node *parent, *grandparent; + struct assoc_array_ptr *ptr; + + /* First of all, we need to know if this node has metadata so + * that we don't try collapsing if all the leaves are already + * here. + */ + has_meta = false; + for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { + ptr = node->slots[i]; + if (assoc_array_ptr_is_meta(ptr)) { + has_meta = true; + break; + } + } + + pr_devel("leaves: %ld [m=%d]\n", + node->nr_leaves_on_branch - 1, has_meta); + + /* Look further up the tree to see if we can collapse this node + * into a more proximal node too. + */ + parent = node; + collapse_up: + pr_devel("collapse subtree: %ld\n", parent->nr_leaves_on_branch); + + ptr = parent->back_pointer; + if (!ptr) + goto do_collapse; + if (assoc_array_ptr_is_shortcut(ptr)) { + struct assoc_array_shortcut *s = assoc_array_ptr_to_shortcut(ptr); + ptr = s->back_pointer; + if (!ptr) + goto do_collapse; + } + + grandparent = assoc_array_ptr_to_node(ptr); + if (grandparent->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT + 1) { + parent = grandparent; + goto collapse_up; + } + + do_collapse: + /* There's no point collapsing if the original node has no meta + * pointers to discard and if we didn't merge into one of that + * node's ancestry. + */ + if (has_meta || parent != node) { + node = parent; + + /* Create a new node to collapse into */ + new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); + if (!new_n0) + goto enomem; + edit->new_meta[0] = assoc_array_node_to_ptr(new_n0); + + new_n0->back_pointer = node->back_pointer; + new_n0->parent_slot = node->parent_slot; + new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch; + edit->adjust_count_on = new_n0; + + collapse.node = new_n0; + collapse.skip_leaf = assoc_array_ptr_to_leaf(edit->dead_leaf); + collapse.slot = 0; + assoc_array_subtree_iterate(assoc_array_node_to_ptr(node), + node->back_pointer, + assoc_array_delete_collapse_iterator, + &collapse); + pr_devel("collapsed %d,%lu\n", collapse.slot, new_n0->nr_leaves_on_branch); + BUG_ON(collapse.slot != new_n0->nr_leaves_on_branch - 1); + + if (!node->back_pointer) { + edit->set[1].ptr = &array->root; + } else if (assoc_array_ptr_is_leaf(node->back_pointer)) { + BUG(); + } else if (assoc_array_ptr_is_node(node->back_pointer)) { + struct assoc_array_node *p = + assoc_array_ptr_to_node(node->back_pointer); + edit->set[1].ptr = &p->slots[node->parent_slot]; + } else if (assoc_array_ptr_is_shortcut(node->back_pointer)) { + struct assoc_array_shortcut *s = + assoc_array_ptr_to_shortcut(node->back_pointer); + edit->set[1].ptr = &s->next_node; + } + edit->set[1].to = assoc_array_node_to_ptr(new_n0); + edit->excised_subtree = assoc_array_node_to_ptr(node); + } + } + + return edit; + +enomem: + /* Clean up after an out of memory error */ + pr_devel("enomem\n"); + assoc_array_cancel_edit(edit); + return ERR_PTR(-ENOMEM); +} + +/** + * assoc_array_clear - Script deletion of all objects from an associative array + * @array: The array to clear. + * @ops: The operations to use. + * + * Precalculate and preallocate a script for the deletion of all the objects + * from an associative array. This results in an edit script that can either + * be applied or cancelled. + * + * The function returns a pointer to an edit script if there are objects to be + * deleted, NULL if there are no objects in the array or -ENOMEM. + * + * The caller should lock against other modifications and must continue to hold + * the lock until assoc_array_apply_edit() has been called. + * + * Accesses to the tree may take place concurrently with this function, + * provided they hold the RCU read lock. + */ +struct assoc_array_edit *assoc_array_clear(struct assoc_array *array, + const struct assoc_array_ops *ops) +{ + struct assoc_array_edit *edit; + + pr_devel("-->%s()\n", __func__); + + if (!array->root) + return NULL; + + edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL); + if (!edit) + return ERR_PTR(-ENOMEM); + edit->array = array; + edit->ops = ops; + edit->set[1].ptr = &array->root; + edit->set[1].to = NULL; + edit->excised_subtree = array->root; + edit->ops_for_excised_subtree = ops; + pr_devel("all gone\n"); + return edit; +} + +/* + * Handle the deferred destruction after an applied edit. + */ +static void assoc_array_rcu_cleanup(struct rcu_head *head) +{ + struct assoc_array_edit *edit = + container_of(head, struct assoc_array_edit, rcu); + int i; + + pr_devel("-->%s()\n", __func__); + + if (edit->dead_leaf) + edit->ops->free_object(assoc_array_ptr_to_leaf(edit->dead_leaf)); + for (i = 0; i < ARRAY_SIZE(edit->excised_meta); i++) + if (edit->excised_meta[i]) + kfree(assoc_array_ptr_to_node(edit->excised_meta[i])); + + if (edit->excised_subtree) { + BUG_ON(assoc_array_ptr_is_leaf(edit->excised_subtree)); + if (assoc_array_ptr_is_node(edit->excised_subtree)) { + struct assoc_array_node *n = + assoc_array_ptr_to_node(edit->excised_subtree); + n->back_pointer = NULL; + } else { + struct assoc_array_shortcut *s = + assoc_array_ptr_to_shortcut(edit->excised_subtree); + s->back_pointer = NULL; + } + assoc_array_destroy_subtree(edit->excised_subtree, + edit->ops_for_excised_subtree); + } + + kfree(edit); +} + +/** + * assoc_array_apply_edit - Apply an edit script to an associative array + * @edit: The script to apply. + * + * Apply an edit script to an associative array to effect an insertion, + * deletion or clearance. As the edit script includes preallocated memory, + * this is guaranteed not to fail. + * + * The edit script, dead objects and dead metadata will be scheduled for + * destruction after an RCU grace period to permit those doing read-only + * accesses on the array to continue to do so under the RCU read lock whilst + * the edit is taking place. + */ +void assoc_array_apply_edit(struct assoc_array_edit *edit) +{ + struct assoc_array_shortcut *shortcut; + struct assoc_array_node *node; + struct assoc_array_ptr *ptr; + int i; + + pr_devel("-->%s()\n", __func__); + + smp_wmb(); + if (edit->leaf_p) + *edit->leaf_p = edit->leaf; + + smp_wmb(); + for (i = 0; i < ARRAY_SIZE(edit->set_parent_slot); i++) + if (edit->set_parent_slot[i].p) + *edit->set_parent_slot[i].p = edit->set_parent_slot[i].to; + + smp_wmb(); + for (i = 0; i < ARRAY_SIZE(edit->set_backpointers); i++) + if (edit->set_backpointers[i]) + *edit->set_backpointers[i] = edit->set_backpointers_to; + + smp_wmb(); + for (i = 0; i < ARRAY_SIZE(edit->set); i++) + if (edit->set[i].ptr) + *edit->set[i].ptr = edit->set[i].to; + + if (edit->array->root == NULL) { + edit->array->nr_leaves_on_tree = 0; + } else if (edit->adjust_count_on) { + node = edit->adjust_count_on; + for (;;) { + node->nr_leaves_on_branch += edit->adjust_count_by; + + ptr = node->back_pointer; + if (!ptr) + break; + if (assoc_array_ptr_is_shortcut(ptr)) { + shortcut = assoc_array_ptr_to_shortcut(ptr); + ptr = shortcut->back_pointer; + if (!ptr) + break; + } + BUG_ON(!assoc_array_ptr_is_node(ptr)); + node = assoc_array_ptr_to_node(ptr); + } + + edit->array->nr_leaves_on_tree += edit->adjust_count_by; + } + + call_rcu(&edit->rcu, assoc_array_rcu_cleanup); +} + +/** + * assoc_array_cancel_edit - Discard an edit script. + * @edit: The script to discard. + * + * Free an edit script and all the preallocated data it holds without making + * any changes to the associative array it was intended for. + * + * NOTE! In the case of an insertion script, this does _not_ release the leaf + * that was to be inserted. That is left to the caller. + */ +void assoc_array_cancel_edit(struct assoc_array_edit *edit) +{ + struct assoc_array_ptr *ptr; + int i; + + pr_devel("-->%s()\n", __func__); + + /* Clean up after an out of memory error */ + for (i = 0; i < ARRAY_SIZE(edit->new_meta); i++) { + ptr = edit->new_meta[i]; + if (ptr) { + if (assoc_array_ptr_is_node(ptr)) + kfree(assoc_array_ptr_to_node(ptr)); + else + kfree(assoc_array_ptr_to_shortcut(ptr)); + } + } + kfree(edit); +} + +/** + * assoc_array_gc - Garbage collect an associative array. + * @array: The array to clean. + * @ops: The operations to use. + * @iterator: A callback function to pass judgement on each object. + * @iterator_data: Private data for the callback function. + * + * Collect garbage from an associative array and pack down the internal tree to + * save memory. + * + * The iterator function is asked to pass judgement upon each object in the + * array. If it returns false, the object is discard and if it returns true, + * the object is kept. If it returns true, it must increment the object's + * usage count (or whatever it needs to do to retain it) before returning. + * + * This function returns 0 if successful or -ENOMEM if out of memory. In the + * latter case, the array is not changed. + * + * The caller should lock against other modifications and must continue to hold + * the lock until assoc_array_apply_edit() has been called. + * + * Accesses to the tree may take place concurrently with this function, + * provided they hold the RCU read lock. + */ +int assoc_array_gc(struct assoc_array *array, + const struct assoc_array_ops *ops, + bool (*iterator)(void *object, void *iterator_data), + void *iterator_data) +{ + struct assoc_array_shortcut *shortcut, *new_s; + struct assoc_array_node *node, *new_n; + struct assoc_array_edit *edit; + struct assoc_array_ptr *cursor, *ptr; + struct assoc_array_ptr *new_root, *new_parent, **new_ptr_pp; + unsigned long nr_leaves_on_tree; + int keylen, slot, nr_free, next_slot, i; + + pr_devel("-->%s()\n", __func__); + + if (!array->root) + return 0; + + edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL); + if (!edit) + return -ENOMEM; + edit->array = array; + edit->ops = ops; + edit->ops_for_excised_subtree = ops; + edit->set[0].ptr = &array->root; + edit->excised_subtree = array->root; + + new_root = new_parent = NULL; + new_ptr_pp = &new_root; + cursor = array->root; + +descend: + /* If this point is a shortcut, then we need to duplicate it and + * advance the target cursor. + */ + if (assoc_array_ptr_is_shortcut(cursor)) { + shortcut = assoc_array_ptr_to_shortcut(cursor); + keylen = round_up(shortcut->skip_to_level, ASSOC_ARRAY_KEY_CHUNK_SIZE); + keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT; + new_s = kmalloc(sizeof(struct assoc_array_shortcut) + + keylen * sizeof(unsigned long), GFP_KERNEL); + if (!new_s) + goto enomem; + pr_devel("dup shortcut %p -> %p\n", shortcut, new_s); + memcpy(new_s, shortcut, (sizeof(struct assoc_array_shortcut) + + keylen * sizeof(unsigned long))); + new_s->back_pointer = new_parent; + new_s->parent_slot = shortcut->parent_slot; + *new_ptr_pp = new_parent = assoc_array_shortcut_to_ptr(new_s); + new_ptr_pp = &new_s->next_node; + cursor = shortcut->next_node; + } + + /* Duplicate the node at this position */ + node = assoc_array_ptr_to_node(cursor); + new_n = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); + if (!new_n) + goto enomem; + pr_devel("dup node %p -> %p\n", node, new_n); + new_n->back_pointer = new_parent; + new_n->parent_slot = node->parent_slot; + *new_ptr_pp = new_parent = assoc_array_node_to_ptr(new_n); + new_ptr_pp = NULL; + slot = 0; + +continue_node: + /* Filter across any leaves and gc any subtrees */ + for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { + ptr = node->slots[slot]; + if (!ptr) + continue; + + if (assoc_array_ptr_is_leaf(ptr)) { + if (iterator(assoc_array_ptr_to_leaf(ptr), + iterator_data)) + /* The iterator will have done any reference + * counting on the object for us. + */ + new_n->slots[slot] = ptr; + continue; + } + + new_ptr_pp = &new_n->slots[slot]; + cursor = ptr; + goto descend; + } + + pr_devel("-- compress node %p --\n", new_n); + + /* Count up the number of empty slots in this node and work out the + * subtree leaf count. + */ + new_n->nr_leaves_on_branch = 0; + nr_free = 0; + for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) { + ptr = new_n->slots[slot]; + if (!ptr) + nr_free++; + else if (assoc_array_ptr_is_leaf(ptr)) + new_n->nr_leaves_on_branch++; + } + pr_devel("free=%d, leaves=%lu\n", nr_free, new_n->nr_leaves_on_branch); + + /* See what we can fold in */ + next_slot = 0; + for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) { + struct assoc_array_shortcut *s; + struct assoc_array_node *child; + + ptr = new_n->slots[slot]; + if (!ptr || assoc_array_ptr_is_leaf(ptr)) + continue; + + s = NULL; + if (assoc_array_ptr_is_shortcut(ptr)) { + s = assoc_array_ptr_to_shortcut(ptr); + ptr = s->next_node; + } + + child = assoc_array_ptr_to_node(ptr); + new_n->nr_leaves_on_branch += child->nr_leaves_on_branch; + + if (child->nr_leaves_on_branch <= nr_free + 1) { + /* Fold the child node into this one */ + pr_devel("[%d] fold node %lu/%d [nx %d]\n", + slot, child->nr_leaves_on_branch, nr_free + 1, + next_slot); + + /* We would already have reaped an intervening shortcut + * on the way back up the tree. + */ + BUG_ON(s); + + new_n->slots[slot] = NULL; + nr_free++; + if (slot < next_slot) + next_slot = slot; + for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { + struct assoc_array_ptr *p = child->slots[i]; + if (!p) + continue; + BUG_ON(assoc_array_ptr_is_meta(p)); + while (new_n->slots[next_slot]) + next_slot++; + BUG_ON(next_slot >= ASSOC_ARRAY_FAN_OUT); + new_n->slots[next_slot++] = p; + nr_free--; + } + kfree(child); + } else { + pr_devel("[%d] retain node %lu/%d [nx %d]\n", + slot, child->nr_leaves_on_branch, nr_free + 1, + next_slot); + } + } + + pr_devel("after: %lu\n", new_n->nr_leaves_on_branch); + + nr_leaves_on_tree = new_n->nr_leaves_on_branch; + + /* Excise this node if it is singly occupied by a shortcut */ + if (nr_free == ASSOC_ARRAY_FAN_OUT - 1) { + for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) + if ((ptr = new_n->slots[slot])) + break; + + if (assoc_array_ptr_is_meta(ptr) && + assoc_array_ptr_is_shortcut(ptr)) { + pr_devel("excise node %p with 1 shortcut\n", new_n); + new_s = assoc_array_ptr_to_shortcut(ptr); + new_parent = new_n->back_pointer; + slot = new_n->parent_slot; + kfree(new_n); + if (!new_parent) { + new_s->back_pointer = NULL; + new_s->parent_slot = 0; + new_root = ptr; + goto gc_complete; + } + + if (assoc_array_ptr_is_shortcut(new_parent)) { + /* We can discard any preceding shortcut also */ + struct assoc_array_shortcut *s = + assoc_array_ptr_to_shortcut(new_parent); + + pr_devel("excise preceding shortcut\n"); + + new_parent = new_s->back_pointer = s->back_pointer; + slot = new_s->parent_slot = s->parent_slot; + kfree(s); + if (!new_parent) { + new_s->back_pointer = NULL; + new_s->parent_slot = 0; + new_root = ptr; + goto gc_complete; + } + } + + new_s->back_pointer = new_parent; + new_s->parent_slot = slot; + new_n = assoc_array_ptr_to_node(new_parent); + new_n->slots[slot] = ptr; + goto ascend_old_tree; + } + } + + /* Excise any shortcuts we might encounter that point to nodes that + * only contain leaves. + */ + ptr = new_n->back_pointer; + if (!ptr) + goto gc_complete; + + if (assoc_array_ptr_is_shortcut(ptr)) { + new_s = assoc_array_ptr_to_shortcut(ptr); + new_parent = new_s->back_pointer; + slot = new_s->parent_slot; + + if (new_n->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT) { + struct assoc_array_node *n; + + pr_devel("excise shortcut\n"); + new_n->back_pointer = new_parent; + new_n->parent_slot = slot; + kfree(new_s); + if (!new_parent) { + new_root = assoc_array_node_to_ptr(new_n); + goto gc_complete; + } + + n = assoc_array_ptr_to_node(new_parent); + n->slots[slot] = assoc_array_node_to_ptr(new_n); + } + } else { + new_parent = ptr; + } + new_n = assoc_array_ptr_to_node(new_parent); + +ascend_old_tree: + ptr = node->back_pointer; + if (assoc_array_ptr_is_shortcut(ptr)) { + shortcut = assoc_array_ptr_to_shortcut(ptr); + slot = shortcut->parent_slot; + cursor = shortcut->back_pointer; + } else { + slot = node->parent_slot; + cursor = ptr; + } + BUG_ON(!ptr); + node = assoc_array_ptr_to_node(cursor); + slot++; + goto continue_node; + +gc_complete: + edit->set[0].to = new_root; + assoc_array_apply_edit(edit); + edit->array->nr_leaves_on_tree = nr_leaves_on_tree; + return 0; + +enomem: + pr_devel("enomem\n"); + assoc_array_destroy_subtree(new_root, edit->ops); + kfree(edit); + return -ENOMEM; +} diff --git a/lib/mpi/mpiutil.c b/lib/mpi/mpiutil.c index 657979f..bf076d2 100644 --- a/lib/mpi/mpiutil.c +++ b/lib/mpi/mpiutil.c @@ -121,3 +121,6 @@ void mpi_free(MPI a) kfree(a); } EXPORT_SYMBOL_GPL(mpi_free); + +MODULE_DESCRIPTION("Multiprecision maths library"); +MODULE_LICENSE("GPL"); diff --git a/lib/percpu_ida.c b/lib/percpu_ida.c index b0698ea..9d054bf 100644 --- a/lib/percpu_ida.c +++ b/lib/percpu_ida.c @@ -117,8 +117,7 @@ static inline void alloc_global_tags(struct percpu_ida *pool, min(pool->nr_free, pool->percpu_batch_size)); } -static inline unsigned alloc_local_tag(struct percpu_ida *pool, - struct percpu_ida_cpu *tags) +static inline unsigned alloc_local_tag(struct percpu_ida_cpu *tags) { int tag = -ENOSPC; @@ -159,7 +158,7 @@ int percpu_ida_alloc(struct percpu_ida *pool, gfp_t gfp) tags = this_cpu_ptr(pool->tag_cpu); /* Fastpath */ - tag = alloc_local_tag(pool, tags); + tag = alloc_local_tag(tags); if (likely(tag >= 0)) { local_irq_restore(flags); return tag; diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 7d57af2..dee6cf4 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -476,40 +476,6 @@ static int vma_has_reserves(struct vm_area_struct *vma, long chg) return 0; } -static void copy_gigantic_page(struct page *dst, struct page *src) -{ - int i; - struct hstate *h = page_hstate(src); - struct page *dst_base = dst; - struct page *src_base = src; - - for (i = 0; i < pages_per_huge_page(h); ) { - cond_resched(); - copy_highpage(dst, src); - - i++; - dst = mem_map_next(dst, dst_base, i); - src = mem_map_next(src, src_base, i); - } -} - -void copy_huge_page(struct page *dst, struct page *src) -{ - int i; - struct hstate *h = page_hstate(src); - - if (unlikely(pages_per_huge_page(h) > MAX_ORDER_NR_PAGES)) { - copy_gigantic_page(dst, src); - return; - } - - might_sleep(); - for (i = 0; i < pages_per_huge_page(h); i++) { - cond_resched(); - copy_highpage(dst + i, src + i); - } -} - static void enqueue_huge_page(struct hstate *h, struct page *page) { int nid = page_to_nid(page); @@ -736,6 +702,23 @@ int PageHuge(struct page *page) } EXPORT_SYMBOL_GPL(PageHuge); +/* + * PageHeadHuge() only returns true for hugetlbfs head page, but not for + * normal or transparent huge pages. + */ +int PageHeadHuge(struct page *page_head) +{ + compound_page_dtor *dtor; + + if (!PageHead(page_head)) + return 0; + + dtor = get_compound_page_dtor(page_head); + + return dtor == free_huge_page; +} +EXPORT_SYMBOL_GPL(PageHeadHuge); + pgoff_t __basepage_index(struct page *page) { struct page *page_head = compound_head(page); diff --git a/mm/memory.c b/mm/memory.c index 0409e8f..5d9025f 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4272,13 +4272,6 @@ void copy_user_huge_page(struct page *dst, struct page *src, #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */ #if USE_SPLIT_PTE_PTLOCKS && BLOATED_SPINLOCKS -static struct kmem_cache *page_ptl_cachep; -void __init ptlock_cache_init(void) -{ - page_ptl_cachep = kmem_cache_create("page->ptl", sizeof(spinlock_t), 0, - SLAB_PANIC, NULL); -} - bool ptlock_alloc(struct page *page) { spinlock_t *ptl; diff --git a/mm/mempolicy.c b/mm/mempolicy.c index c4403cd..eca4a31 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2950,7 +2950,7 @@ void mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol) return; } - p += snprintf(p, maxlen, policy_modes[mode]); + p += snprintf(p, maxlen, "%s", policy_modes[mode]); if (flags & MPOL_MODE_FLAGS) { p += snprintf(p, buffer + maxlen - p, "="); diff --git a/mm/migrate.c b/mm/migrate.c index 316e720..bb94004 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -442,6 +442,54 @@ int migrate_huge_page_move_mapping(struct address_space *mapping, } /* + * Gigantic pages are so large that we do not guarantee that page++ pointer + * arithmetic will work across the entire page. We need something more + * specialized. + */ +static void __copy_gigantic_page(struct page *dst, struct page *src, + int nr_pages) +{ + int i; + struct page *dst_base = dst; + struct page *src_base = src; + + for (i = 0; i < nr_pages; ) { + cond_resched(); + copy_highpage(dst, src); + + i++; + dst = mem_map_next(dst, dst_base, i); + src = mem_map_next(src, src_base, i); + } +} + +static void copy_huge_page(struct page *dst, struct page *src) +{ + int i; + int nr_pages; + + if (PageHuge(src)) { + /* hugetlbfs page */ + struct hstate *h = page_hstate(src); + nr_pages = pages_per_huge_page(h); + + if (unlikely(nr_pages > MAX_ORDER_NR_PAGES)) { + __copy_gigantic_page(dst, src, nr_pages); + return; + } + } else { + /* thp page */ + BUG_ON(!PageTransHuge(src)); + nr_pages = hpage_nr_pages(src); + } + + for (i = 0; i < nr_pages; i++) { + cond_resched(); + copy_highpage(dst + i, src + i); + } +} + +/* * Copy the page to its new location */ void migrate_page_copy(struct page *newpage, struct page *page) @@ -164,72 +164,6 @@ static bool pfmemalloc_active __read_mostly; /* - * kmem_bufctl_t: - * - * Bufctl's are used for linking objs within a slab - * linked offsets. - * - * This implementation relies on "struct page" for locating the cache & - * slab an object belongs to. - * This allows the bufctl structure to be small (one int), but limits - * the number of objects a slab (not a cache) can contain when off-slab - * bufctls are used. The limit is the size of the largest general cache - * that does not use off-slab slabs. - * For 32bit archs with 4 kB pages, is this 56. - * This is not serious, as it is only for large objects, when it is unwise - * to have too many per slab. - * Note: This limit can be raised by introducing a general cache whose size - * is less than 512 (PAGE_SIZE<<3), but greater than 256. - */ - -typedef unsigned int kmem_bufctl_t; -#define BUFCTL_END (((kmem_bufctl_t)(~0U))-0) -#define BUFCTL_FREE (((kmem_bufctl_t)(~0U))-1) -#define BUFCTL_ACTIVE (((kmem_bufctl_t)(~0U))-2) -#define SLAB_LIMIT (((kmem_bufctl_t)(~0U))-3) - -/* - * struct slab_rcu - * - * slab_destroy on a SLAB_DESTROY_BY_RCU cache uses this structure to - * arrange for kmem_freepages to be called via RCU. This is useful if - * we need to approach a kernel structure obliquely, from its address - * obtained without the usual locking. We can lock the structure to - * stabilize it and check it's still at the given address, only if we - * can be sure that the memory has not been meanwhile reused for some - * other kind of object (which our subsystem's lock might corrupt). - * - * rcu_read_lock before reading the address, then rcu_read_unlock after - * taking the spinlock within the structure expected at that address. - */ -struct slab_rcu { - struct rcu_head head; - struct kmem_cache *cachep; - void *addr; -}; - -/* - * struct slab - * - * Manages the objs in a slab. Placed either at the beginning of mem allocated - * for a slab, or allocated from an general cache. - * Slabs are chained into three list: fully used, partial, fully free slabs. - */ -struct slab { - union { - struct { - struct list_head list; - unsigned long colouroff; - void *s_mem; /* including colour offset */ - unsigned int inuse; /* num of objs active in slab */ - kmem_bufctl_t free; - unsigned short nodeid; - }; - struct slab_rcu __slab_cover_slab_rcu; - }; -}; - -/* * struct array_cache * * Purpose: @@ -456,18 +390,10 @@ static inline struct kmem_cache *virt_to_cache(const void *obj) return page->slab_cache; } -static inline struct slab *virt_to_slab(const void *obj) -{ - struct page *page = virt_to_head_page(obj); - - VM_BUG_ON(!PageSlab(page)); - return page->slab_page; -} - -static inline void *index_to_obj(struct kmem_cache *cache, struct slab *slab, +static inline void *index_to_obj(struct kmem_cache *cache, struct page *page, unsigned int idx) { - return slab->s_mem + cache->size * idx; + return page->s_mem + cache->size * idx; } /* @@ -477,9 +403,9 @@ static inline void *index_to_obj(struct kmem_cache *cache, struct slab *slab, * reciprocal_divide(offset, cache->reciprocal_buffer_size) */ static inline unsigned int obj_to_index(const struct kmem_cache *cache, - const struct slab *slab, void *obj) + const struct page *page, void *obj) { - u32 offset = (obj - slab->s_mem); + u32 offset = (obj - page->s_mem); return reciprocal_divide(offset, cache->reciprocal_buffer_size); } @@ -641,7 +567,7 @@ static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep) static size_t slab_mgmt_size(size_t nr_objs, size_t align) { - return ALIGN(sizeof(struct slab)+nr_objs*sizeof(kmem_bufctl_t), align); + return ALIGN(nr_objs * sizeof(unsigned int), align); } /* @@ -660,8 +586,7 @@ static void cache_estimate(unsigned long gfporder, size_t buffer_size, * on it. For the latter case, the memory allocated for a * slab is used for: * - * - The struct slab - * - One kmem_bufctl_t for each object + * - One unsigned int for each object * - Padding to respect alignment of @align * - @buffer_size bytes for each object * @@ -674,8 +599,6 @@ static void cache_estimate(unsigned long gfporder, size_t buffer_size, mgmt_size = 0; nr_objs = slab_size / buffer_size; - if (nr_objs > SLAB_LIMIT) - nr_objs = SLAB_LIMIT; } else { /* * Ignore padding for the initial guess. The padding @@ -685,8 +608,7 @@ static void cache_estimate(unsigned long gfporder, size_t buffer_size, * into the memory allocation when taking the padding * into account. */ - nr_objs = (slab_size - sizeof(struct slab)) / - (buffer_size + sizeof(kmem_bufctl_t)); + nr_objs = (slab_size) / (buffer_size + sizeof(unsigned int)); /* * This calculated number will be either the right @@ -696,9 +618,6 @@ static void cache_estimate(unsigned long gfporder, size_t buffer_size, > slab_size) nr_objs--; - if (nr_objs > SLAB_LIMIT) - nr_objs = SLAB_LIMIT; - mgmt_size = slab_mgmt_size(nr_objs, align); } *num = nr_objs; @@ -829,10 +748,8 @@ static struct array_cache *alloc_arraycache(int node, int entries, return nc; } -static inline bool is_slab_pfmemalloc(struct slab *slabp) +static inline bool is_slab_pfmemalloc(struct page *page) { - struct page *page = virt_to_page(slabp->s_mem); - return PageSlabPfmemalloc(page); } @@ -841,23 +758,23 @@ static void recheck_pfmemalloc_active(struct kmem_cache *cachep, struct array_cache *ac) { struct kmem_cache_node *n = cachep->node[numa_mem_id()]; - struct slab *slabp; + struct page *page; unsigned long flags; if (!pfmemalloc_active) return; spin_lock_irqsave(&n->list_lock, flags); - list_for_each_entry(slabp, &n->slabs_full, list) - if (is_slab_pfmemalloc(slabp)) + list_for_each_entry(page, &n->slabs_full, lru) + if (is_slab_pfmemalloc(page)) goto out; - list_for_each_entry(slabp, &n->slabs_partial, list) - if (is_slab_pfmemalloc(slabp)) + list_for_each_entry(page, &n->slabs_partial, lru) + if (is_slab_pfmemalloc(page)) goto out; - list_for_each_entry(slabp, &n->slabs_free, list) - if (is_slab_pfmemalloc(slabp)) + list_for_each_entry(page, &n->slabs_free, lru) + if (is_slab_pfmemalloc(page)) goto out; pfmemalloc_active = false; @@ -897,8 +814,8 @@ static void *__ac_get_obj(struct kmem_cache *cachep, struct array_cache *ac, */ n = cachep->node[numa_mem_id()]; if (!list_empty(&n->slabs_free) && force_refill) { - struct slab *slabp = virt_to_slab(objp); - ClearPageSlabPfmemalloc(virt_to_head_page(slabp->s_mem)); + struct page *page = virt_to_head_page(objp); + ClearPageSlabPfmemalloc(page); clear_obj_pfmemalloc(&objp); recheck_pfmemalloc_active(cachep, ac); return objp; @@ -1099,8 +1016,7 @@ static void drain_alien_cache(struct kmem_cache *cachep, static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) { - struct slab *slabp = virt_to_slab(objp); - int nodeid = slabp->nodeid; + int nodeid = page_to_nid(virt_to_page(objp)); struct kmem_cache_node *n; struct array_cache *alien = NULL; int node; @@ -1111,7 +1027,7 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) * Make sure we are not freeing a object from another node to the array * cache on this cpu. */ - if (likely(slabp->nodeid == node)) + if (likely(nodeid == node)) return 0; n = cachep->node[node]; @@ -1512,6 +1428,8 @@ void __init kmem_cache_init(void) { int i; + BUILD_BUG_ON(sizeof(((struct page *)NULL)->lru) < + sizeof(struct rcu_head)); kmem_cache = &kmem_cache_boot; setup_node_pointer(kmem_cache); @@ -1687,7 +1605,7 @@ static noinline void slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid) { struct kmem_cache_node *n; - struct slab *slabp; + struct page *page; unsigned long flags; int node; @@ -1706,15 +1624,15 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid) continue; spin_lock_irqsave(&n->list_lock, flags); - list_for_each_entry(slabp, &n->slabs_full, list) { + list_for_each_entry(page, &n->slabs_full, lru) { active_objs += cachep->num; active_slabs++; } - list_for_each_entry(slabp, &n->slabs_partial, list) { - active_objs += slabp->inuse; + list_for_each_entry(page, &n->slabs_partial, lru) { + active_objs += page->active; active_slabs++; } - list_for_each_entry(slabp, &n->slabs_free, list) + list_for_each_entry(page, &n->slabs_free, lru) num_slabs++; free_objects += n->free_objects; @@ -1736,19 +1654,11 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid) * did not request dmaable memory, we might get it, but that * would be relatively rare and ignorable. */ -static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid) +static struct page *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, + int nodeid) { struct page *page; int nr_pages; - int i; - -#ifndef CONFIG_MMU - /* - * Nommu uses slab's for process anonymous memory allocations, and thus - * requires __GFP_COMP to properly refcount higher order allocations - */ - flags |= __GFP_COMP; -#endif flags |= cachep->allocflags; if (cachep->flags & SLAB_RECLAIM_ACCOUNT) @@ -1772,12 +1682,9 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid) else add_zone_page_state(page_zone(page), NR_SLAB_UNRECLAIMABLE, nr_pages); - for (i = 0; i < nr_pages; i++) { - __SetPageSlab(page + i); - - if (page->pfmemalloc) - SetPageSlabPfmemalloc(page + i); - } + __SetPageSlab(page); + if (page->pfmemalloc) + SetPageSlabPfmemalloc(page); memcg_bind_pages(cachep, cachep->gfporder); if (kmemcheck_enabled && !(cachep->flags & SLAB_NOTRACK)) { @@ -1789,17 +1696,15 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid) kmemcheck_mark_unallocated_pages(page, nr_pages); } - return page_address(page); + return page; } /* * Interface to system's page release. */ -static void kmem_freepages(struct kmem_cache *cachep, void *addr) +static void kmem_freepages(struct kmem_cache *cachep, struct page *page) { - unsigned long i = (1 << cachep->gfporder); - struct page *page = virt_to_page(addr); - const unsigned long nr_freed = i; + const unsigned long nr_freed = (1 << cachep->gfporder); kmemcheck_free_shadow(page, cachep->gfporder); @@ -1809,27 +1714,28 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr) else sub_zone_page_state(page_zone(page), NR_SLAB_UNRECLAIMABLE, nr_freed); - while (i--) { - BUG_ON(!PageSlab(page)); - __ClearPageSlabPfmemalloc(page); - __ClearPageSlab(page); - page++; - } + + BUG_ON(!PageSlab(page)); + __ClearPageSlabPfmemalloc(page); + __ClearPageSlab(page); + page_mapcount_reset(page); + page->mapping = NULL; memcg_release_pages(cachep, cachep->gfporder); if (current->reclaim_state) current->reclaim_state->reclaimed_slab += nr_freed; - free_memcg_kmem_pages((unsigned long)addr, cachep->gfporder); + __free_memcg_kmem_pages(page, cachep->gfporder); } static void kmem_rcu_free(struct rcu_head *head) { - struct slab_rcu *slab_rcu = (struct slab_rcu *)head; - struct kmem_cache *cachep = slab_rcu->cachep; + struct kmem_cache *cachep; + struct page *page; - kmem_freepages(cachep, slab_rcu->addr); - if (OFF_SLAB(cachep)) - kmem_cache_free(cachep->slabp_cache, slab_rcu); + page = container_of(head, struct page, rcu_head); + cachep = page->slab_cache; + + kmem_freepages(cachep, page); } #if DEBUG @@ -1978,19 +1884,19 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp) /* Print some data about the neighboring objects, if they * exist: */ - struct slab *slabp = virt_to_slab(objp); + struct page *page = virt_to_head_page(objp); unsigned int objnr; - objnr = obj_to_index(cachep, slabp, objp); + objnr = obj_to_index(cachep, page, objp); if (objnr) { - objp = index_to_obj(cachep, slabp, objnr - 1); + objp = index_to_obj(cachep, page, objnr - 1); realobj = (char *)objp + obj_offset(cachep); printk(KERN_ERR "Prev obj: start=%p, len=%d\n", realobj, size); print_objinfo(cachep, objp, 2); } if (objnr + 1 < cachep->num) { - objp = index_to_obj(cachep, slabp, objnr + 1); + objp = index_to_obj(cachep, page, objnr + 1); realobj = (char *)objp + obj_offset(cachep); printk(KERN_ERR "Next obj: start=%p, len=%d\n", realobj, size); @@ -2001,11 +1907,12 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp) #endif #if DEBUG -static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slabp) +static void slab_destroy_debugcheck(struct kmem_cache *cachep, + struct page *page) { int i; for (i = 0; i < cachep->num; i++) { - void *objp = index_to_obj(cachep, slabp, i); + void *objp = index_to_obj(cachep, page, i); if (cachep->flags & SLAB_POISON) { #ifdef CONFIG_DEBUG_PAGEALLOC @@ -2030,7 +1937,8 @@ static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slab } } #else -static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slabp) +static void slab_destroy_debugcheck(struct kmem_cache *cachep, + struct page *page) { } #endif @@ -2044,23 +1952,34 @@ static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slab * Before calling the slab must have been unlinked from the cache. The * cache-lock is not held/needed. */ -static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp) +static void slab_destroy(struct kmem_cache *cachep, struct page *page) { - void *addr = slabp->s_mem - slabp->colouroff; + void *freelist; - slab_destroy_debugcheck(cachep, slabp); + freelist = page->freelist; + slab_destroy_debugcheck(cachep, page); if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) { - struct slab_rcu *slab_rcu; + struct rcu_head *head; + + /* + * RCU free overloads the RCU head over the LRU. + * slab_page has been overloeaded over the LRU, + * however it is not used from now on so that + * we can use it safely. + */ + head = (void *)&page->rcu_head; + call_rcu(head, kmem_rcu_free); - slab_rcu = (struct slab_rcu *)slabp; - slab_rcu->cachep = cachep; - slab_rcu->addr = addr; - call_rcu(&slab_rcu->head, kmem_rcu_free); } else { - kmem_freepages(cachep, addr); - if (OFF_SLAB(cachep)) - kmem_cache_free(cachep->slabp_cache, slabp); + kmem_freepages(cachep, page); } + + /* + * From now on, we don't use freelist + * although actual page can be freed in rcu context + */ + if (OFF_SLAB(cachep)) + kmem_cache_free(cachep->freelist_cache, freelist); } /** @@ -2097,8 +2016,8 @@ static size_t calculate_slab_order(struct kmem_cache *cachep, * use off-slab slabs. Needed to avoid a possible * looping condition in cache_grow(). */ - offslab_limit = size - sizeof(struct slab); - offslab_limit /= sizeof(kmem_bufctl_t); + offslab_limit = size; + offslab_limit /= sizeof(unsigned int); if (num > offslab_limit) break; @@ -2220,7 +2139,7 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp) int __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) { - size_t left_over, slab_size, ralign; + size_t left_over, freelist_size, ralign; gfp_t gfp; int err; size_t size = cachep->size; @@ -2339,22 +2258,21 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) if (!cachep->num) return -E2BIG; - slab_size = ALIGN(cachep->num * sizeof(kmem_bufctl_t) - + sizeof(struct slab), cachep->align); + freelist_size = + ALIGN(cachep->num * sizeof(unsigned int), cachep->align); /* * If the slab has been placed off-slab, and we have enough space then * move it on-slab. This is at the expense of any extra colouring. */ - if (flags & CFLGS_OFF_SLAB && left_over >= slab_size) { + if (flags & CFLGS_OFF_SLAB && left_over >= freelist_size) { flags &= ~CFLGS_OFF_SLAB; - left_over -= slab_size; + left_over -= freelist_size; } if (flags & CFLGS_OFF_SLAB) { /* really off slab. No need for manual alignment */ - slab_size = - cachep->num * sizeof(kmem_bufctl_t) + sizeof(struct slab); + freelist_size = cachep->num * sizeof(unsigned int); #ifdef CONFIG_PAGE_POISONING /* If we're going to use the generic kernel_map_pages() @@ -2371,16 +2289,16 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) if (cachep->colour_off < cachep->align) cachep->colour_off = cachep->align; cachep->colour = left_over / cachep->colour_off; - cachep->slab_size = slab_size; + cachep->freelist_size = freelist_size; cachep->flags = flags; - cachep->allocflags = 0; + cachep->allocflags = __GFP_COMP; if (CONFIG_ZONE_DMA_FLAG && (flags & SLAB_CACHE_DMA)) cachep->allocflags |= GFP_DMA; cachep->size = size; cachep->reciprocal_buffer_size = reciprocal_value(size); if (flags & CFLGS_OFF_SLAB) { - cachep->slabp_cache = kmalloc_slab(slab_size, 0u); + cachep->freelist_cache = kmalloc_slab(freelist_size, 0u); /* * This is a possibility for one of the malloc_sizes caches. * But since we go off slab only for object size greater than @@ -2388,7 +2306,7 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) * this should not happen at all. * But leave a BUG_ON for some lucky dude. */ - BUG_ON(ZERO_OR_NULL_PTR(cachep->slabp_cache)); + BUG_ON(ZERO_OR_NULL_PTR(cachep->freelist_cache)); } err = setup_cpu_cache(cachep, gfp); @@ -2494,7 +2412,7 @@ static int drain_freelist(struct kmem_cache *cache, { struct list_head *p; int nr_freed; - struct slab *slabp; + struct page *page; nr_freed = 0; while (nr_freed < tofree && !list_empty(&n->slabs_free)) { @@ -2506,18 +2424,18 @@ static int drain_freelist(struct kmem_cache *cache, goto out; } - slabp = list_entry(p, struct slab, list); + page = list_entry(p, struct page, lru); #if DEBUG - BUG_ON(slabp->inuse); + BUG_ON(page->active); #endif - list_del(&slabp->list); + list_del(&page->lru); /* * Safe to drop the lock. The slab is no longer linked * to the cache. */ n->free_objects -= cache->num; spin_unlock_irq(&n->list_lock); - slab_destroy(cache, slabp); + slab_destroy(cache, page); nr_freed++; } out: @@ -2600,52 +2518,42 @@ int __kmem_cache_shutdown(struct kmem_cache *cachep) * descriptors in kmem_cache_create, we search through the malloc_sizes array. * If we are creating a malloc_sizes cache here it would not be visible to * kmem_find_general_cachep till the initialization is complete. - * Hence we cannot have slabp_cache same as the original cache. + * Hence we cannot have freelist_cache same as the original cache. */ -static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp, - int colour_off, gfp_t local_flags, - int nodeid) +static void *alloc_slabmgmt(struct kmem_cache *cachep, + struct page *page, int colour_off, + gfp_t local_flags, int nodeid) { - struct slab *slabp; + void *freelist; + void *addr = page_address(page); if (OFF_SLAB(cachep)) { /* Slab management obj is off-slab. */ - slabp = kmem_cache_alloc_node(cachep->slabp_cache, + freelist = kmem_cache_alloc_node(cachep->freelist_cache, local_flags, nodeid); - /* - * If the first object in the slab is leaked (it's allocated - * but no one has a reference to it), we want to make sure - * kmemleak does not treat the ->s_mem pointer as a reference - * to the object. Otherwise we will not report the leak. - */ - kmemleak_scan_area(&slabp->list, sizeof(struct list_head), - local_flags); - if (!slabp) + if (!freelist) return NULL; } else { - slabp = objp + colour_off; - colour_off += cachep->slab_size; + freelist = addr + colour_off; + colour_off += cachep->freelist_size; } - slabp->inuse = 0; - slabp->colouroff = colour_off; - slabp->s_mem = objp + colour_off; - slabp->nodeid = nodeid; - slabp->free = 0; - return slabp; + page->active = 0; + page->s_mem = addr + colour_off; + return freelist; } -static inline kmem_bufctl_t *slab_bufctl(struct slab *slabp) +static inline unsigned int *slab_freelist(struct page *page) { - return (kmem_bufctl_t *) (slabp + 1); + return (unsigned int *)(page->freelist); } static void cache_init_objs(struct kmem_cache *cachep, - struct slab *slabp) + struct page *page) { int i; for (i = 0; i < cachep->num; i++) { - void *objp = index_to_obj(cachep, slabp, i); + void *objp = index_to_obj(cachep, page, i); #if DEBUG /* need to poison the objs? */ if (cachep->flags & SLAB_POISON) @@ -2681,9 +2589,8 @@ static void cache_init_objs(struct kmem_cache *cachep, if (cachep->ctor) cachep->ctor(objp); #endif - slab_bufctl(slabp)[i] = i + 1; + slab_freelist(page)[i] = i; } - slab_bufctl(slabp)[i - 1] = BUFCTL_END; } static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags) @@ -2696,41 +2603,41 @@ static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags) } } -static void *slab_get_obj(struct kmem_cache *cachep, struct slab *slabp, +static void *slab_get_obj(struct kmem_cache *cachep, struct page *page, int nodeid) { - void *objp = index_to_obj(cachep, slabp, slabp->free); - kmem_bufctl_t next; + void *objp; - slabp->inuse++; - next = slab_bufctl(slabp)[slabp->free]; + objp = index_to_obj(cachep, page, slab_freelist(page)[page->active]); + page->active++; #if DEBUG - slab_bufctl(slabp)[slabp->free] = BUFCTL_FREE; - WARN_ON(slabp->nodeid != nodeid); + WARN_ON(page_to_nid(virt_to_page(objp)) != nodeid); #endif - slabp->free = next; return objp; } -static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp, +static void slab_put_obj(struct kmem_cache *cachep, struct page *page, void *objp, int nodeid) { - unsigned int objnr = obj_to_index(cachep, slabp, objp); - + unsigned int objnr = obj_to_index(cachep, page, objp); #if DEBUG + unsigned int i; + /* Verify that the slab belongs to the intended node */ - WARN_ON(slabp->nodeid != nodeid); + WARN_ON(page_to_nid(virt_to_page(objp)) != nodeid); - if (slab_bufctl(slabp)[objnr] + 1 <= SLAB_LIMIT + 1) { - printk(KERN_ERR "slab: double free detected in cache " - "'%s', objp %p\n", cachep->name, objp); - BUG(); + /* Verify double free bug */ + for (i = page->active; i < cachep->num; i++) { + if (slab_freelist(page)[i] == objnr) { + printk(KERN_ERR "slab: double free detected in cache " + "'%s', objp %p\n", cachep->name, objp); + BUG(); + } } #endif - slab_bufctl(slabp)[objnr] = slabp->free; - slabp->free = objnr; - slabp->inuse--; + page->active--; + slab_freelist(page)[page->active] = objnr; } /* @@ -2738,23 +2645,11 @@ static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp, * for the slab allocator to be able to lookup the cache and slab of a * virtual address for kfree, ksize, and slab debugging. */ -static void slab_map_pages(struct kmem_cache *cache, struct slab *slab, - void *addr) +static void slab_map_pages(struct kmem_cache *cache, struct page *page, + void *freelist) { - int nr_pages; - struct page *page; - - page = virt_to_page(addr); - - nr_pages = 1; - if (likely(!PageCompound(page))) - nr_pages <<= cache->gfporder; - - do { - page->slab_cache = cache; - page->slab_page = slab; - page++; - } while (--nr_pages); + page->slab_cache = cache; + page->freelist = freelist; } /* @@ -2762,9 +2657,9 @@ static void slab_map_pages(struct kmem_cache *cache, struct slab *slab, * kmem_cache_alloc() when there are no active objs left in a cache. */ static int cache_grow(struct kmem_cache *cachep, - gfp_t flags, int nodeid, void *objp) + gfp_t flags, int nodeid, struct page *page) { - struct slab *slabp; + void *freelist; size_t offset; gfp_t local_flags; struct kmem_cache_node *n; @@ -2805,20 +2700,20 @@ static int cache_grow(struct kmem_cache *cachep, * Get mem for the objs. Attempt to allocate a physical page from * 'nodeid'. */ - if (!objp) - objp = kmem_getpages(cachep, local_flags, nodeid); - if (!objp) + if (!page) + page = kmem_getpages(cachep, local_flags, nodeid); + if (!page) goto failed; /* Get slab management. */ - slabp = alloc_slabmgmt(cachep, objp, offset, + freelist = alloc_slabmgmt(cachep, page, offset, local_flags & ~GFP_CONSTRAINT_MASK, nodeid); - if (!slabp) + if (!freelist) goto opps1; - slab_map_pages(cachep, slabp, objp); + slab_map_pages(cachep, page, freelist); - cache_init_objs(cachep, slabp); + cache_init_objs(cachep, page); if (local_flags & __GFP_WAIT) local_irq_disable(); @@ -2826,13 +2721,13 @@ static int cache_grow(struct kmem_cache *cachep, spin_lock(&n->list_lock); /* Make slab active. */ - list_add_tail(&slabp->list, &(n->slabs_free)); + list_add_tail(&page->lru, &(n->slabs_free)); STATS_INC_GROWN(cachep); n->free_objects += cachep->num; spin_unlock(&n->list_lock); return 1; opps1: - kmem_freepages(cachep, objp); + kmem_freepages(cachep, page); failed: if (local_flags & __GFP_WAIT) local_irq_disable(); @@ -2880,9 +2775,8 @@ static inline void verify_redzone_free(struct kmem_cache *cache, void *obj) static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp, unsigned long caller) { - struct page *page; unsigned int objnr; - struct slab *slabp; + struct page *page; BUG_ON(virt_to_cache(objp) != cachep); @@ -2890,8 +2784,6 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp, kfree_debugcheck(objp); page = virt_to_head_page(objp); - slabp = page->slab_page; - if (cachep->flags & SLAB_RED_ZONE) { verify_redzone_free(cachep, objp); *dbg_redzone1(cachep, objp) = RED_INACTIVE; @@ -2900,14 +2792,11 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp, if (cachep->flags & SLAB_STORE_USER) *dbg_userword(cachep, objp) = (void *)caller; - objnr = obj_to_index(cachep, slabp, objp); + objnr = obj_to_index(cachep, page, objp); BUG_ON(objnr >= cachep->num); - BUG_ON(objp != index_to_obj(cachep, slabp, objnr)); + BUG_ON(objp != index_to_obj(cachep, page, objnr)); -#ifdef CONFIG_DEBUG_SLAB_LEAK - slab_bufctl(slabp)[objnr] = BUFCTL_FREE; -#endif if (cachep->flags & SLAB_POISON) { #ifdef CONFIG_DEBUG_PAGEALLOC if ((cachep->size % PAGE_SIZE)==0 && OFF_SLAB(cachep)) { @@ -2924,33 +2813,9 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp, return objp; } -static void check_slabp(struct kmem_cache *cachep, struct slab *slabp) -{ - kmem_bufctl_t i; - int entries = 0; - - /* Check slab's freelist to see if this obj is there. */ - for (i = slabp->free; i != BUFCTL_END; i = slab_bufctl(slabp)[i]) { - entries++; - if (entries > cachep->num || i >= cachep->num) - goto bad; - } - if (entries != cachep->num - slabp->inuse) { -bad: - printk(KERN_ERR "slab: Internal list corruption detected in " - "cache '%s'(%d), slabp %p(%d). Tainted(%s). Hexdump:\n", - cachep->name, cachep->num, slabp, slabp->inuse, - print_tainted()); - print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 16, 1, slabp, - sizeof(*slabp) + cachep->num * sizeof(kmem_bufctl_t), - 1); - BUG(); - } -} #else #define kfree_debugcheck(x) do { } while(0) #define cache_free_debugcheck(x,objp,z) (objp) -#define check_slabp(x,y) do { } while(0) #endif static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags, @@ -2989,7 +2854,7 @@ retry: while (batchcount > 0) { struct list_head *entry; - struct slab *slabp; + struct page *page; /* Get slab alloc is to come from. */ entry = n->slabs_partial.next; if (entry == &n->slabs_partial) { @@ -2999,8 +2864,7 @@ retry: goto must_grow; } - slabp = list_entry(entry, struct slab, list); - check_slabp(cachep, slabp); + page = list_entry(entry, struct page, lru); check_spinlock_acquired(cachep); /* @@ -3008,24 +2872,23 @@ retry: * there must be at least one object available for * allocation. */ - BUG_ON(slabp->inuse >= cachep->num); + BUG_ON(page->active >= cachep->num); - while (slabp->inuse < cachep->num && batchcount--) { + while (page->active < cachep->num && batchcount--) { STATS_INC_ALLOCED(cachep); STATS_INC_ACTIVE(cachep); STATS_SET_HIGH(cachep); - ac_put_obj(cachep, ac, slab_get_obj(cachep, slabp, + ac_put_obj(cachep, ac, slab_get_obj(cachep, page, node)); } - check_slabp(cachep, slabp); /* move slabp to correct slabp list: */ - list_del(&slabp->list); - if (slabp->free == BUFCTL_END) - list_add(&slabp->list, &n->slabs_full); + list_del(&page->lru); + if (page->active == cachep->num) + list_add(&page->list, &n->slabs_full); else - list_add(&slabp->list, &n->slabs_partial); + list_add(&page->list, &n->slabs_partial); } must_grow: @@ -3097,16 +2960,6 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, *dbg_redzone1(cachep, objp) = RED_ACTIVE; *dbg_redzone2(cachep, objp) = RED_ACTIVE; } -#ifdef CONFIG_DEBUG_SLAB_LEAK - { - struct slab *slabp; - unsigned objnr; - - slabp = virt_to_head_page(objp)->slab_page; - objnr = (unsigned)(objp - slabp->s_mem) / cachep->size; - slab_bufctl(slabp)[objnr] = BUFCTL_ACTIVE; - } -#endif objp += obj_offset(cachep); if (cachep->ctor && cachep->flags & SLAB_POISON) cachep->ctor(objp); @@ -3248,18 +3101,20 @@ retry: * We may trigger various forms of reclaim on the allowed * set and go into memory reserves if necessary. */ + struct page *page; + if (local_flags & __GFP_WAIT) local_irq_enable(); kmem_flagcheck(cache, flags); - obj = kmem_getpages(cache, local_flags, numa_mem_id()); + page = kmem_getpages(cache, local_flags, numa_mem_id()); if (local_flags & __GFP_WAIT) local_irq_disable(); - if (obj) { + if (page) { /* * Insert into the appropriate per node queues */ - nid = page_to_nid(virt_to_page(obj)); - if (cache_grow(cache, flags, nid, obj)) { + nid = page_to_nid(page); + if (cache_grow(cache, flags, nid, page)) { obj = ____cache_alloc_node(cache, flags | GFP_THISNODE, nid); if (!obj) @@ -3288,7 +3143,7 @@ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid) { struct list_head *entry; - struct slab *slabp; + struct page *page; struct kmem_cache_node *n; void *obj; int x; @@ -3308,26 +3163,24 @@ retry: goto must_grow; } - slabp = list_entry(entry, struct slab, list); + page = list_entry(entry, struct page, lru); check_spinlock_acquired_node(cachep, nodeid); - check_slabp(cachep, slabp); STATS_INC_NODEALLOCS(cachep); STATS_INC_ACTIVE(cachep); STATS_SET_HIGH(cachep); - BUG_ON(slabp->inuse == cachep->num); + BUG_ON(page->active == cachep->num); - obj = slab_get_obj(cachep, slabp, nodeid); - check_slabp(cachep, slabp); + obj = slab_get_obj(cachep, page, nodeid); n->free_objects--; /* move slabp to correct slabp list: */ - list_del(&slabp->list); + list_del(&page->lru); - if (slabp->free == BUFCTL_END) - list_add(&slabp->list, &n->slabs_full); + if (page->active == cachep->num) + list_add(&page->lru, &n->slabs_full); else - list_add(&slabp->list, &n->slabs_partial); + list_add(&page->lru, &n->slabs_partial); spin_unlock(&n->list_lock); goto done; @@ -3477,23 +3330,21 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects, for (i = 0; i < nr_objects; i++) { void *objp; - struct slab *slabp; + struct page *page; clear_obj_pfmemalloc(&objpp[i]); objp = objpp[i]; - slabp = virt_to_slab(objp); + page = virt_to_head_page(objp); n = cachep->node[node]; - list_del(&slabp->list); + list_del(&page->lru); check_spinlock_acquired_node(cachep, node); - check_slabp(cachep, slabp); - slab_put_obj(cachep, slabp, objp, node); + slab_put_obj(cachep, page, objp, node); STATS_DEC_ACTIVE(cachep); n->free_objects++; - check_slabp(cachep, slabp); /* fixup slab chains */ - if (slabp->inuse == 0) { + if (page->active == 0) { if (n->free_objects > n->free_limit) { n->free_objects -= cachep->num; /* No need to drop any previously held @@ -3502,16 +3353,16 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects, * a different cache, refer to comments before * alloc_slabmgmt. */ - slab_destroy(cachep, slabp); + slab_destroy(cachep, page); } else { - list_add(&slabp->list, &n->slabs_free); + list_add(&page->lru, &n->slabs_free); } } else { /* Unconditionally move a slab to the end of the * partial list on free - maximum time for the * other objects to be freed, too. */ - list_add_tail(&slabp->list, &n->slabs_partial); + list_add_tail(&page->lru, &n->slabs_partial); } } } @@ -3551,10 +3402,10 @@ free_done: p = n->slabs_free.next; while (p != &(n->slabs_free)) { - struct slab *slabp; + struct page *page; - slabp = list_entry(p, struct slab, list); - BUG_ON(slabp->inuse); + page = list_entry(p, struct page, lru); + BUG_ON(page->active); i++; p = p->next; @@ -4158,7 +4009,7 @@ out: #ifdef CONFIG_SLABINFO void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo) { - struct slab *slabp; + struct page *page; unsigned long active_objs; unsigned long num_objs; unsigned long active_slabs = 0; @@ -4178,23 +4029,23 @@ void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo) check_irq_on(); spin_lock_irq(&n->list_lock); - list_for_each_entry(slabp, &n->slabs_full, list) { - if (slabp->inuse != cachep->num && !error) + list_for_each_entry(page, &n->slabs_full, lru) { + if (page->active != cachep->num && !error) error = "slabs_full accounting error"; active_objs += cachep->num; active_slabs++; } - list_for_each_entry(slabp, &n->slabs_partial, list) { - if (slabp->inuse == cachep->num && !error) - error = "slabs_partial inuse accounting error"; - if (!slabp->inuse && !error) - error = "slabs_partial/inuse accounting error"; - active_objs += slabp->inuse; + list_for_each_entry(page, &n->slabs_partial, lru) { + if (page->active == cachep->num && !error) + error = "slabs_partial accounting error"; + if (!page->active && !error) + error = "slabs_partial accounting error"; + active_objs += page->active; active_slabs++; } - list_for_each_entry(slabp, &n->slabs_free, list) { - if (slabp->inuse && !error) - error = "slabs_free/inuse accounting error"; + list_for_each_entry(page, &n->slabs_free, lru) { + if (page->active && !error) + error = "slabs_free accounting error"; num_slabs++; } free_objects += n->free_objects; @@ -4346,15 +4197,27 @@ static inline int add_caller(unsigned long *n, unsigned long v) return 1; } -static void handle_slab(unsigned long *n, struct kmem_cache *c, struct slab *s) +static void handle_slab(unsigned long *n, struct kmem_cache *c, + struct page *page) { void *p; - int i; + int i, j; + if (n[0] == n[1]) return; - for (i = 0, p = s->s_mem; i < c->num; i++, p += c->size) { - if (slab_bufctl(s)[i] != BUFCTL_ACTIVE) + for (i = 0, p = page->s_mem; i < c->num; i++, p += c->size) { + bool active = true; + + for (j = page->active; j < c->num; j++) { + /* Skip freed item */ + if (slab_freelist(page)[j] == i) { + active = false; + break; + } + } + if (!active) continue; + if (!add_caller(n, (unsigned long)*dbg_userword(c, p))) return; } @@ -4379,7 +4242,7 @@ static void show_symbol(struct seq_file *m, unsigned long address) static int leaks_show(struct seq_file *m, void *p) { struct kmem_cache *cachep = list_entry(p, struct kmem_cache, list); - struct slab *slabp; + struct page *page; struct kmem_cache_node *n; const char *name; unsigned long *x = m->private; @@ -4403,10 +4266,10 @@ static int leaks_show(struct seq_file *m, void *p) check_irq_on(); spin_lock_irq(&n->list_lock); - list_for_each_entry(slabp, &n->slabs_full, list) - handle_slab(x, cachep, slabp); - list_for_each_entry(slabp, &n->slabs_partial, list) - handle_slab(x, cachep, slabp); + list_for_each_entry(page, &n->slabs_full, lru) + handle_slab(x, cachep, page); + list_for_each_entry(page, &n->slabs_partial, lru) + handle_slab(x, cachep, page); spin_unlock_irq(&n->list_lock); } name = cachep->name; @@ -155,7 +155,7 @@ static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s) /* * Maximum number of desirable partial slabs. * The existence of more partial slabs makes kmem_cache_shrink - * sort the partial list by the number of objects in the. + * sort the partial list by the number of objects in use. */ #define MAX_PARTIAL 10 @@ -933,6 +933,16 @@ static void trace(struct kmem_cache *s, struct page *page, void *object, * Hooks for other subsystems that check memory allocations. In a typical * production configuration these hooks all should produce no code at all. */ +static inline void kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags) +{ + kmemleak_alloc(ptr, size, 1, flags); +} + +static inline void kfree_hook(const void *x) +{ + kmemleak_free(x); +} + static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags) { flags &= gfp_allowed_mask; @@ -1217,8 +1227,8 @@ static unsigned long kmem_cache_flags(unsigned long object_size, /* * Enable debugging if selected on the kernel commandline. */ - if (slub_debug && (!slub_debug_slabs || - !strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs)))) + if (slub_debug && (!slub_debug_slabs || (name && + !strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs))))) flags |= slub_debug; return flags; @@ -1260,13 +1270,30 @@ static inline void inc_slabs_node(struct kmem_cache *s, int node, static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects) {} +static inline void kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags) +{ + kmemleak_alloc(ptr, size, 1, flags); +} + +static inline void kfree_hook(const void *x) +{ + kmemleak_free(x); +} + static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags) { return 0; } static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, - void *object) {} + void *object) +{ + kmemleak_alloc_recursive(object, s->object_size, 1, s->flags, + flags & gfp_allowed_mask); +} -static inline void slab_free_hook(struct kmem_cache *s, void *x) {} +static inline void slab_free_hook(struct kmem_cache *s, void *x) +{ + kmemleak_free_recursive(x, s->flags); +} #endif /* CONFIG_SLUB_DEBUG */ @@ -2829,8 +2856,8 @@ static struct kmem_cache *kmem_cache_node; * slab on the node for this slabcache. There are no concurrent accesses * possible. * - * Note that this function only works on the kmalloc_node_cache - * when allocating for the kmalloc_node_cache. This is used for bootstrapping + * Note that this function only works on the kmem_cache_node + * when allocating for the kmem_cache_node. This is used for bootstrapping * memory on a fresh node that has no slab structures yet. */ static void early_kmem_cache_node_alloc(int node) @@ -3272,7 +3299,7 @@ static void *kmalloc_large_node(size_t size, gfp_t flags, int node) if (page) ptr = page_address(page); - kmemleak_alloc(ptr, size, 1, flags); + kmalloc_large_node_hook(ptr, size, flags); return ptr; } @@ -3336,7 +3363,7 @@ void kfree(const void *x) page = virt_to_head_page(x); if (unlikely(!PageSlab(page))) { BUG_ON(!PageCompound(page)); - kmemleak_free(x); + kfree_hook(x); __free_memcg_kmem_pages(page, compound_order(page)); return; } @@ -82,19 +82,6 @@ static void __put_compound_page(struct page *page) static void put_compound_page(struct page *page) { - /* - * hugetlbfs pages cannot be split from under us. If this is a - * hugetlbfs page, check refcount on head page and release the page if - * the refcount becomes zero. - */ - if (PageHuge(page)) { - page = compound_head(page); - if (put_page_testzero(page)) - __put_compound_page(page); - - return; - } - if (unlikely(PageTail(page))) { /* __split_huge_page_refcount can run under us */ struct page *page_head = compound_trans_head(page); @@ -111,14 +98,31 @@ static void put_compound_page(struct page *page) * still hot on arches that do not support * this_cpu_cmpxchg_double(). */ - if (PageSlab(page_head)) { - if (PageTail(page)) { + if (PageSlab(page_head) || PageHeadHuge(page_head)) { + if (likely(PageTail(page))) { + /* + * __split_huge_page_refcount + * cannot race here. + */ + VM_BUG_ON(!PageHead(page_head)); + atomic_dec(&page->_mapcount); if (put_page_testzero(page_head)) VM_BUG_ON(1); - - atomic_dec(&page->_mapcount); - goto skip_lock_tail; + if (put_page_testzero(page_head)) + __put_compound_page(page_head); + return; } else + /* + * __split_huge_page_refcount + * run before us, "page" was a + * THP tail. The split + * page_head has been freed + * and reallocated as slab or + * hugetlbfs page of smaller + * order (only possible if + * reallocated as slab on + * x86). + */ goto skip_lock; } /* @@ -132,8 +136,27 @@ static void put_compound_page(struct page *page) /* __split_huge_page_refcount run before us */ compound_unlock_irqrestore(page_head, flags); skip_lock: - if (put_page_testzero(page_head)) - __put_single_page(page_head); + if (put_page_testzero(page_head)) { + /* + * The head page may have been + * freed and reallocated as a + * compound page of smaller + * order and then freed again. + * All we know is that it + * cannot have become: a THP + * page, a compound page of + * higher order, a tail page. + * That is because we still + * hold the refcount of the + * split THP tail and + * page_head was the THP head + * before the split. + */ + if (PageHead(page_head)) + __put_compound_page(page_head); + else + __put_single_page(page_head); + } out_put_single: if (put_page_testzero(page)) __put_single_page(page); @@ -155,7 +178,6 @@ out_put_single: VM_BUG_ON(atomic_read(&page->_count) != 0); compound_unlock_irqrestore(page_head, flags); -skip_lock_tail: if (put_page_testzero(page_head)) { if (PageHead(page_head)) __put_compound_page(page_head); @@ -198,51 +220,52 @@ bool __get_page_tail(struct page *page) * proper PT lock that already serializes against * split_huge_page(). */ + unsigned long flags; bool got = false; - struct page *page_head; - - /* - * If this is a hugetlbfs page it cannot be split under us. Simply - * increment refcount for the head page. - */ - if (PageHuge(page)) { - page_head = compound_head(page); - atomic_inc(&page_head->_count); - got = true; - } else { - unsigned long flags; + struct page *page_head = compound_trans_head(page); - page_head = compound_trans_head(page); - if (likely(page != page_head && - get_page_unless_zero(page_head))) { - - /* Ref to put_compound_page() comment. */ - if (PageSlab(page_head)) { - if (likely(PageTail(page))) { - __get_page_tail_foll(page, false); - return true; - } else { - put_page(page_head); - return false; - } - } - - /* - * page_head wasn't a dangling pointer but it - * may not be a head page anymore by the time - * we obtain the lock. That is ok as long as it - * can't be freed from under us. - */ - flags = compound_lock_irqsave(page_head); - /* here __split_huge_page_refcount won't run anymore */ + if (likely(page != page_head && get_page_unless_zero(page_head))) { + /* Ref to put_compound_page() comment. */ + if (PageSlab(page_head) || PageHeadHuge(page_head)) { if (likely(PageTail(page))) { + /* + * This is a hugetlbfs page or a slab + * page. __split_huge_page_refcount + * cannot race here. + */ + VM_BUG_ON(!PageHead(page_head)); __get_page_tail_foll(page, false); - got = true; - } - compound_unlock_irqrestore(page_head, flags); - if (unlikely(!got)) + return true; + } else { + /* + * __split_huge_page_refcount run + * before us, "page" was a THP + * tail. The split page_head has been + * freed and reallocated as slab or + * hugetlbfs page of smaller order + * (only possible if reallocated as + * slab on x86). + */ put_page(page_head); + return false; + } + } + + /* + * page_head wasn't a dangling pointer but it + * may not be a head page anymore by the time + * we obtain the lock. That is ok as long as it + * can't be freed from under us. + */ + flags = compound_lock_irqsave(page_head); + /* here __split_huge_page_refcount won't run anymore */ + if (likely(PageTail(page))) { + __get_page_tail_foll(page, false); + got = true; } + compound_unlock_irqrestore(page_head, flags); + if (unlikely(!got)) + put_page(page_head); } return got; } diff --git a/net/Kconfig b/net/Kconfig index 0715db6..d334678 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -224,7 +224,7 @@ source "net/hsr/Kconfig" config RPS boolean - depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS + depends on SMP && SYSFS default y config RFS_ACCEL @@ -235,7 +235,7 @@ config RFS_ACCEL config XPS boolean - depends on SMP && USE_GENERIC_SMP_HELPERS + depends on SMP default y config NETPRIO_CGROUP diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 7fee50d..7d424ac 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1735,7 +1735,6 @@ static int atalk_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr size_t size, int flags) { struct sock *sk = sock->sk; - struct sockaddr_at *sat = (struct sockaddr_at *)msg->msg_name; struct ddpehdr *ddp; int copied = 0; int offset = 0; @@ -1764,14 +1763,13 @@ static int atalk_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr } err = skb_copy_datagram_iovec(skb, offset, msg->msg_iov, copied); - if (!err) { - if (sat) { - sat->sat_family = AF_APPLETALK; - sat->sat_port = ddp->deh_sport; - sat->sat_addr.s_node = ddp->deh_snode; - sat->sat_addr.s_net = ddp->deh_snet; - } - msg->msg_namelen = sizeof(*sat); + if (!err && msg->msg_name) { + struct sockaddr_at *sat = msg->msg_name; + sat->sat_family = AF_APPLETALK; + sat->sat_port = ddp->deh_sport; + sat->sat_addr.s_node = ddp->deh_snode; + sat->sat_addr.s_net = ddp->deh_snet; + msg->msg_namelen = sizeof(*sat); } skb_free_datagram(sk, skb); /* Free the datagram. */ diff --git a/net/atm/common.c b/net/atm/common.c index 737bef5..7b49100 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -531,8 +531,6 @@ int vcc_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, struct sk_buff *skb; int copied, error = -EINVAL; - msg->msg_namelen = 0; - if (sock->state != SS_CONNECTED) return -ENOTCONN; diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index a00123e..7bb1605 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1636,11 +1636,11 @@ static int ax25_recvmsg(struct kiocb *iocb, struct socket *sock, skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); - if (msg->msg_namelen != 0) { - struct sockaddr_ax25 *sax = (struct sockaddr_ax25 *)msg->msg_name; + if (msg->msg_name) { ax25_digi digi; ax25_address src; const unsigned char *mac = skb_mac_header(skb); + struct sockaddr_ax25 *sax = msg->msg_name; memset(sax, 0, sizeof(struct full_sockaddr_ax25)); ax25_addr_parse(mac + 1, skb->data - mac - 1, &src, NULL, diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index f6a1671..56ca494 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -224,10 +224,9 @@ int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock, skb = skb_recv_datagram(sk, flags, noblock, &err); if (!skb) { - if (sk->sk_shutdown & RCV_SHUTDOWN) { - msg->msg_namelen = 0; + if (sk->sk_shutdown & RCV_SHUTDOWN) return 0; - } + return err; } @@ -245,8 +244,6 @@ int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock, if (bt_sk(sk)->skb_msg_name) bt_sk(sk)->skb_msg_name(skb, msg->msg_name, &msg->msg_namelen); - else - msg->msg_namelen = 0; } skb_free_datagram(sk, skb); @@ -295,8 +292,6 @@ int bt_sock_stream_recvmsg(struct kiocb *iocb, struct socket *sock, if (flags & MSG_OOB) return -EOPNOTSUPP; - msg->msg_namelen = 0; - BT_DBG("sk %p size %zu", sk, size); lock_sock(sk); diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 71f0be1..6a6c8bb 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -856,8 +856,6 @@ static int hci_sock_recvmsg(struct kiocb *iocb, struct socket *sock, if (!skb) return err; - msg->msg_namelen = 0; - copied = skb->len; if (len < copied) { msg->msg_flags |= MSG_TRUNC; diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 0cef677..4af3821 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -2439,6 +2439,9 @@ int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len, int err; struct sk_buff_head seg_queue; + if (!chan->conn) + return -ENOTCONN; + /* Connectionless channel */ if (chan->chan_type == L2CAP_CHAN_CONN_LESS) { skb = l2cap_create_connless_pdu(chan, msg, len, priority); diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 94d06cb..facd8a7 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -694,6 +694,7 @@ static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src, addr.l2_family = AF_BLUETOOTH; addr.l2_psm = 0; addr.l2_cid = 0; + addr.l2_bdaddr_type = BDADDR_BREDR; *err = kernel_bind(sock, (struct sockaddr *) &addr, sizeof(addr)); if (*err < 0) goto failed; @@ -719,6 +720,7 @@ static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src, addr.l2_family = AF_BLUETOOTH; addr.l2_psm = __constant_cpu_to_le16(RFCOMM_PSM); addr.l2_cid = 0; + addr.l2_bdaddr_type = BDADDR_BREDR; *err = kernel_connect(sock, (struct sockaddr *) &addr, sizeof(addr), O_NONBLOCK); if (*err == 0 || *err == -EINPROGRESS) return s; @@ -1983,6 +1985,7 @@ static int rfcomm_add_listener(bdaddr_t *ba) addr.l2_family = AF_BLUETOOTH; addr.l2_psm = __constant_cpu_to_le16(RFCOMM_PSM); addr.l2_cid = 0; + addr.l2_bdaddr_type = BDADDR_BREDR; err = kernel_bind(sock, (struct sockaddr *) &addr, sizeof(addr)); if (err < 0) { BT_ERR("Bind failed %d", err); diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index c4d3d42..3c2d3e4 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -615,7 +615,6 @@ static int rfcomm_sock_recvmsg(struct kiocb *iocb, struct socket *sock, if (test_and_clear_bit(RFCOMM_DEFER_SETUP, &d->flags)) { rfcomm_dlc_accept(d); - msg->msg_namelen = 0; return 0; } @@ -739,8 +738,9 @@ static int rfcomm_sock_setsockopt(struct socket *sock, int level, int optname, c static int rfcomm_sock_getsockopt_old(struct socket *sock, int optname, char __user *optval, int __user *optlen) { struct sock *sk = sock->sk; + struct sock *l2cap_sk; + struct l2cap_conn *conn; struct rfcomm_conninfo cinfo; - struct l2cap_conn *conn = l2cap_pi(sk)->chan->conn; int len, err = 0; u32 opt; @@ -783,6 +783,9 @@ static int rfcomm_sock_getsockopt_old(struct socket *sock, int optname, char __u break; } + l2cap_sk = rfcomm_pi(sk)->dlc->session->sock->sk; + conn = l2cap_pi(l2cap_sk)->chan->conn; + memset(&cinfo, 0, sizeof(cinfo)); cinfo.hci_handle = conn->hcon->handle; memcpy(cinfo.dev_class, conn->hcon->dev_class, 3); diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 12a0e51..24fa396 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -711,7 +711,6 @@ static int sco_sock_recvmsg(struct kiocb *iocb, struct socket *sock, test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags)) { sco_conn_defer_accept(pi->conn->hcon, pi->setting); sk->sk_state = BT_CONFIG; - msg->msg_namelen = 0; release_sock(sk); return 0; diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 85a2796c..4b07acb 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -742,6 +742,9 @@ static u8 smp_cmd_security_req(struct l2cap_conn *conn, struct sk_buff *skb) BT_DBG("conn %p", conn); + if (!(conn->hcon->link_mode & HCI_LM_MASTER)) + return SMP_CMD_NOTSUPP; + hcon->pending_sec_level = authreq_to_seclevel(rp->auth_req); if (smp_ltk_encrypt(conn, hcon->pending_sec_level)) diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 6e6194f..4bf02ad 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -172,6 +172,8 @@ void br_dev_delete(struct net_device *dev, struct list_head *head) del_nbp(p); } + br_fdb_delete_by_port(br, NULL, 1); + br_vlan_flush(br); del_timer_sync(&br->gc_timer); diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c index 99c8566..17fd5f2 100644 --- a/net/bridge/netfilter/ebt_ip6.c +++ b/net/bridge/netfilter/ebt_ip6.c @@ -48,10 +48,12 @@ ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param *par) if (info->bitmask & EBT_IP6_TCLASS && FWINV(info->tclass != ipv6_get_dsfield(ih6), EBT_IP6_TCLASS)) return false; - if (FWINV(ipv6_masked_addr_cmp(&ih6->saddr, &info->smsk, - &info->saddr), EBT_IP6_SOURCE) || + if ((info->bitmask & EBT_IP6_SOURCE && + FWINV(ipv6_masked_addr_cmp(&ih6->saddr, &info->smsk, + &info->saddr), EBT_IP6_SOURCE)) || + (info->bitmask & EBT_IP6_DEST && FWINV(ipv6_masked_addr_cmp(&ih6->daddr, &info->dmsk, - &info->daddr), EBT_IP6_DEST)) + &info->daddr), EBT_IP6_DEST))) return false; if (info->bitmask & EBT_IP6_PROTO) { uint8_t nexthdr = ih6->nexthdr; diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index 05a41c7..d6be3ed 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -286,8 +286,6 @@ static int caif_seqpkt_recvmsg(struct kiocb *iocb, struct socket *sock, if (m->msg_flags&MSG_OOB) goto read_error; - m->msg_namelen = 0; - skb = skb_recv_datagram(sk, flags, 0 , &ret); if (!skb) goto read_error; @@ -361,8 +359,6 @@ static int caif_stream_recvmsg(struct kiocb *iocb, struct socket *sock, if (flags&MSG_OOB) goto out; - msg->msg_namelen = 0; - /* * Lock the socket to prevent queue disordering * while sleeps in memcpy_tomsg diff --git a/net/compat.c b/net/compat.c index 8903258..618c6a8 100644 --- a/net/compat.c +++ b/net/compat.c @@ -93,7 +93,8 @@ int verify_compat_iovec(struct msghdr *kern_msg, struct iovec *kern_iov, if (err < 0) return err; } - kern_msg->msg_name = kern_address; + if (kern_msg->msg_name) + kern_msg->msg_name = kern_address; } else kern_msg->msg_name = NULL; diff --git a/net/core/dev.c b/net/core/dev.c index 7e00a73..ba3b7ea 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4996,7 +4996,7 @@ static void dev_change_rx_flags(struct net_device *dev, int flags) { const struct net_device_ops *ops = dev->netdev_ops; - if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags) + if (ops->ndo_change_rx_flags) ops->ndo_change_rx_flags(dev, flags); } diff --git a/net/core/iovec.c b/net/core/iovec.c index 4cdb7c4..b618694 100644 --- a/net/core/iovec.c +++ b/net/core/iovec.c @@ -48,7 +48,8 @@ int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr_storage *a if (err < 0) return err; } - m->msg_name = address; + if (m->msg_name) + m->msg_name = address; } else { m->msg_name = NULL; } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 8cec1e6..2718fed 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2796,6 +2796,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) struct sk_buff *segs = NULL; struct sk_buff *tail = NULL; struct sk_buff *fskb = skb_shinfo(skb)->frag_list; + skb_frag_t *skb_frag = skb_shinfo(skb)->frags; unsigned int mss = skb_shinfo(skb)->gso_size; unsigned int doffset = skb->data - skb_mac_header(skb); unsigned int offset = doffset; @@ -2835,16 +2836,38 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) if (hsize > len || !sg) hsize = len; - if (!hsize && i >= nfrags) { - BUG_ON(fskb->len != len); + if (!hsize && i >= nfrags && skb_headlen(fskb) && + (skb_headlen(fskb) == len || sg)) { + BUG_ON(skb_headlen(fskb) > len); + + i = 0; + nfrags = skb_shinfo(fskb)->nr_frags; + skb_frag = skb_shinfo(fskb)->frags; + pos += skb_headlen(fskb); + + while (pos < offset + len) { + BUG_ON(i >= nfrags); + + size = skb_frag_size(skb_frag); + if (pos + size > offset + len) + break; + + i++; + pos += size; + skb_frag++; + } - pos += len; nskb = skb_clone(fskb, GFP_ATOMIC); fskb = fskb->next; if (unlikely(!nskb)) goto err; + if (unlikely(pskb_trim(nskb, len))) { + kfree_skb(nskb); + goto err; + } + hsize = skb_end_offset(nskb); if (skb_cow_head(nskb, doffset + headroom)) { kfree_skb(nskb); @@ -2881,7 +2904,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) nskb->data - tnl_hlen, doffset + tnl_hlen); - if (fskb != skb_shinfo(skb)->frag_list) + if (nskb->len == len + doffset) goto perform_csum_check; if (!sg) { @@ -2899,8 +2922,28 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) skb_shinfo(nskb)->tx_flags = skb_shinfo(skb)->tx_flags & SKBTX_SHARED_FRAG; - while (pos < offset + len && i < nfrags) { - *frag = skb_shinfo(skb)->frags[i]; + while (pos < offset + len) { + if (i >= nfrags) { + BUG_ON(skb_headlen(fskb)); + + i = 0; + nfrags = skb_shinfo(fskb)->nr_frags; + skb_frag = skb_shinfo(fskb)->frags; + + BUG_ON(!nfrags); + + fskb = fskb->next; + } + + if (unlikely(skb_shinfo(nskb)->nr_frags >= + MAX_SKB_FRAGS)) { + net_warn_ratelimited( + "skb_segment: too many frags: %u %u\n", + pos, mss); + goto err; + } + + *frag = *skb_frag; __skb_frag_ref(frag); size = skb_frag_size(frag); @@ -2913,6 +2956,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) if (pos + size <= offset + len) { i++; + skb_frag++; pos += size; } else { skb_frag_size_sub(frag, pos + size - (offset + len)); @@ -2922,25 +2966,6 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) frag++; } - if (pos < offset + len) { - struct sk_buff *fskb2 = fskb; - - BUG_ON(pos + fskb->len != offset + len); - - pos += fskb->len; - fskb = fskb->next; - - if (fskb2->next) { - fskb2 = skb_clone(fskb2, GFP_ATOMIC); - if (!fskb2) - goto err; - } else - skb_get(fskb2); - - SKB_FRAG_ASSERT(nskb); - skb_shinfo(nskb)->frag_list = fskb2; - } - skip_fraglist: nskb->data_len = len - hsize; nskb->len += nskb->data_len; diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c index 01cffea..f13bd91 100644 --- a/net/ipv4/netfilter/ipt_SYNPROXY.c +++ b/net/ipv4/netfilter/ipt_SYNPROXY.c @@ -244,6 +244,7 @@ synproxy_recv_client_ack(const struct synproxy_net *snet, this_cpu_inc(snet->stats->cookie_valid); opts->mss = mss; + opts->options |= XT_SYNPROXY_OPT_MSS; if (opts->options & XT_SYNPROXY_OPT_TIMESTAMP) synproxy_check_timestamp_cookie(opts); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index f428935..f8da282 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1776,8 +1776,12 @@ local_input: rth->dst.error= -err; rth->rt_flags &= ~RTCF_LOCAL; } - if (do_cache) - rt_cache_route(&FIB_RES_NH(res), rth); + if (do_cache) { + if (unlikely(!rt_cache_route(&FIB_RES_NH(res), rth))) { + rth->dst.flags |= DST_NOCACHE; + rt_add_uncached_list(rth); + } + } skb_dst_set(skb, &rth->dst); err = 0; goto out; diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c index bf9f612..f78f41a 100644 --- a/net/ipv6/netfilter/ip6t_SYNPROXY.c +++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c @@ -259,6 +259,7 @@ synproxy_recv_client_ack(const struct synproxy_net *snet, this_cpu_inc(snet->stats->cookie_valid); opts->mss = mss; + opts->options |= XT_SYNPROXY_OPT_MSS; if (opts->options & XT_SYNPROXY_OPT_TIMESTAMP) synproxy_check_timestamp_cookie(opts); diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index 7a1e0fc..e096025 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -1823,8 +1823,6 @@ static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock, if (skb->tstamp.tv64) sk->sk_stamp = skb->tstamp; - msg->msg_namelen = sizeof(*sipx); - if (sipx) { sipx->sipx_family = AF_IPX; sipx->sipx_port = ipx->ipx_source.sock; @@ -1832,6 +1830,7 @@ static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock, sipx->sipx_network = IPX_SKB_CB(skb)->ipx_source_net; sipx->sipx_type = ipx->ipx_type; sipx->sipx_zero = 0; + msg->msg_namelen = sizeof(*sipx); } rc = copied; diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index 0f67690..de7db23 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -1385,8 +1385,6 @@ static int irda_recvmsg_dgram(struct kiocb *iocb, struct socket *sock, IRDA_DEBUG(4, "%s()\n", __func__); - msg->msg_namelen = 0; - skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, flags & MSG_DONTWAIT, &err); if (!skb) @@ -1451,8 +1449,6 @@ static int irda_recvmsg_stream(struct kiocb *iocb, struct socket *sock, target = sock_rcvlowat(sk, flags & MSG_WAITALL, size); timeo = sock_rcvtimeo(sk, noblock); - msg->msg_namelen = 0; - do { int chunk; struct sk_buff *skb = skb_dequeue(&sk->sk_receive_queue); diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 168aff5..c4b7218 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -1324,8 +1324,6 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, int err = 0; u32 offset; - msg->msg_namelen = 0; - if ((sk->sk_state == IUCV_DISCONN) && skb_queue_empty(&iucv->backlog_skb_q) && skb_queue_empty(&sk->sk_receive_queue) && diff --git a/net/key/af_key.c b/net/key/af_key.c index 911ef03..545f047 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -3616,7 +3616,6 @@ static int pfkey_recvmsg(struct kiocb *kiocb, if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT)) goto out; - msg->msg_namelen = 0; skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err); if (skb == NULL) goto out; diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index ffda81e..be5fadf 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -197,8 +197,6 @@ static int pppol2tp_recvmsg(struct kiocb *iocb, struct socket *sock, if (sk->sk_state & PPPOX_BOUND) goto end; - msg->msg_namelen = 0; - err = 0; skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, flags & MSG_DONTWAIT, &err); diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 6cba486..7b01b9f 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -720,8 +720,6 @@ static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock, int target; /* Read at least this many bytes */ long timeo; - msg->msg_namelen = 0; - lock_sock(sk); copied = -ENOTCONN; if (unlikely(sk->sk_type == SOCK_STREAM && sk->sk_state == TCP_LISTEN)) diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 48acec1..c3398cd 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -909,7 +909,7 @@ config NETFILTER_XT_MATCH_CONNLABEL connection simultaneously. config NETFILTER_XT_MATCH_CONNLIMIT - tristate '"connlimit" match support"' + tristate '"connlimit" match support' depends on NF_CONNTRACK depends on NETFILTER_ADVANCED ---help--- diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index e22d950..43549eb 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -764,9 +764,10 @@ void nf_conntrack_free(struct nf_conn *ct) struct net *net = nf_ct_net(ct); nf_ct_ext_destroy(ct); - atomic_dec(&net->ct.count); nf_ct_ext_free(ct); kmem_cache_free(net->ct.nf_conntrack_cachep, ct); + smp_mb__before_atomic_dec(); + atomic_dec(&net->ct.count); } EXPORT_SYMBOL_GPL(nf_conntrack_free); diff --git a/net/netfilter/nf_conntrack_seqadj.c b/net/netfilter/nf_conntrack_seqadj.c index 5f9bfd0..17c1bcb 100644 --- a/net/netfilter/nf_conntrack_seqadj.c +++ b/net/netfilter/nf_conntrack_seqadj.c @@ -41,8 +41,8 @@ int nf_ct_seqadj_set(struct nf_conn *ct, enum ip_conntrack_info ctinfo, spin_lock_bh(&ct->lock); this_way = &seqadj->seq[dir]; if (this_way->offset_before == this_way->offset_after || - before(this_way->correction_pos, seq)) { - this_way->correction_pos = seq; + before(this_way->correction_pos, ntohl(seq))) { + this_way->correction_pos = ntohl(seq); this_way->offset_before = this_way->offset_after; this_way->offset_after += off; } diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index cdf4567..9858e3e 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c @@ -151,9 +151,10 @@ void synproxy_init_timestamp_cookie(const struct xt_synproxy_info *info, opts->tsecr = opts->tsval; opts->tsval = tcp_time_stamp & ~0x3f; - if (opts->options & XT_SYNPROXY_OPT_WSCALE) - opts->tsval |= info->wscale; - else + if (opts->options & XT_SYNPROXY_OPT_WSCALE) { + opts->tsval |= opts->wscale; + opts->wscale = info->wscale; + } else opts->tsval |= 0xf; if (opts->options & XT_SYNPROXY_OPT_SACK_PERM) diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index a82667c..da0c1f4 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -128,7 +128,7 @@ static const struct nla_policy nft_rule_compat_policy[NFTA_RULE_COMPAT_MAX + 1] [NFTA_RULE_COMPAT_FLAGS] = { .type = NLA_U32 }, }; -static u8 nft_parse_compat(const struct nlattr *attr, bool *inv) +static int nft_parse_compat(const struct nlattr *attr, u8 *proto, bool *inv) { struct nlattr *tb[NFTA_RULE_COMPAT_MAX+1]; u32 flags; @@ -148,7 +148,8 @@ static u8 nft_parse_compat(const struct nlattr *attr, bool *inv) if (flags & NFT_RULE_COMPAT_F_INV) *inv = true; - return ntohl(nla_get_be32(tb[NFTA_RULE_COMPAT_PROTO])); + *proto = ntohl(nla_get_be32(tb[NFTA_RULE_COMPAT_PROTO])); + return 0; } static int @@ -166,8 +167,11 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr, target_compat_from_user(target, nla_data(tb[NFTA_TARGET_INFO]), info); - if (ctx->nla[NFTA_RULE_COMPAT]) - proto = nft_parse_compat(ctx->nla[NFTA_RULE_COMPAT], &inv); + if (ctx->nla[NFTA_RULE_COMPAT]) { + ret = nft_parse_compat(ctx->nla[NFTA_RULE_COMPAT], &proto, &inv); + if (ret < 0) + goto err; + } nft_target_set_tgchk_param(&par, ctx, target, info, &e, proto, inv); @@ -356,8 +360,11 @@ nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr, match_compat_from_user(match, nla_data(tb[NFTA_MATCH_INFO]), info); - if (ctx->nla[NFTA_RULE_COMPAT]) - proto = nft_parse_compat(ctx->nla[NFTA_RULE_COMPAT], &inv); + if (ctx->nla[NFTA_RULE_COMPAT]) { + ret = nft_parse_compat(ctx->nla[NFTA_RULE_COMPAT], &proto, &inv); + if (ret < 0) + goto err; + } nft_match_set_mtchk_param(&par, ctx, match, info, &e, proto, inv); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index f0176e1..bca50b9 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2335,8 +2335,6 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, } #endif - msg->msg_namelen = 0; - copied = data_skb->len; if (len < copied) { msg->msg_flags |= MSG_TRUNC; diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 7dbc4f7..4518a57 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -1045,7 +1045,7 @@ static int genlmsg_mcast(struct sk_buff *skb, u32 portid, unsigned long group, int genlmsg_multicast_allns(struct genl_family *family, struct sk_buff *skb, u32 portid, unsigned int group, gfp_t flags) { - if (group >= family->n_mcgrps) + if (WARN_ON_ONCE(group >= family->n_mcgrps)) return -EINVAL; group = family->mcgrp_offset + group; return genlmsg_mcast(skb, portid, group, flags); @@ -1062,7 +1062,7 @@ void genl_notify(struct genl_family *family, if (nlh) report = nlmsg_report(nlh); - if (group >= family->n_mcgrps) + if (WARN_ON_ONCE(group >= family->n_mcgrps)) return; group = family->mcgrp_offset + group; nlmsg_notify(sk, skb, portid, group, report, flags); diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 698814b..53c19a3 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -1179,10 +1179,9 @@ static int nr_recvmsg(struct kiocb *iocb, struct socket *sock, sax->sax25_family = AF_NETROM; skb_copy_from_linear_data_offset(skb, 7, sax->sax25_call.ax25_call, AX25_ADDR_LEN); + msg->msg_namelen = sizeof(*sax); } - msg->msg_namelen = sizeof(*sax); - skb_free_datagram(sk, skb); release_sock(sk); diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index d308402b..824c605 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -807,8 +807,6 @@ static int llcp_sock_recvmsg(struct kiocb *iocb, struct socket *sock, pr_debug("%p %zu\n", sk, len); - msg->msg_namelen = 0; - lock_sock(sk); if (sk->sk_state == LLCP_CLOSED && diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c index cd958b3..66bcd2e 100644 --- a/net/nfc/rawsock.c +++ b/net/nfc/rawsock.c @@ -244,8 +244,6 @@ static int rawsock_recvmsg(struct kiocb *iocb, struct socket *sock, if (!skb) return rc; - msg->msg_namelen = 0; - copied = skb->len; if (len < copied) { msg->msg_flags |= MSG_TRUNC; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 2e8286b..ac27c86 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -244,11 +244,15 @@ static void __fanout_link(struct sock *sk, struct packet_sock *po); static void register_prot_hook(struct sock *sk) { struct packet_sock *po = pkt_sk(sk); + if (!po->running) { - if (po->fanout) + if (po->fanout) { __fanout_link(sk, po); - else + } else { dev_add_pack(&po->prot_hook); + rcu_assign_pointer(po->cached_dev, po->prot_hook.dev); + } + sock_hold(sk); po->running = 1; } @@ -266,10 +270,13 @@ static void __unregister_prot_hook(struct sock *sk, bool sync) struct packet_sock *po = pkt_sk(sk); po->running = 0; - if (po->fanout) + if (po->fanout) { __fanout_unlink(sk, po); - else + } else { __dev_remove_pack(&po->prot_hook); + RCU_INIT_POINTER(po->cached_dev, NULL); + } + __sock_put(sk); if (sync) { @@ -2052,12 +2059,24 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, return tp_len; } +static struct net_device *packet_cached_dev_get(struct packet_sock *po) +{ + struct net_device *dev; + + rcu_read_lock(); + dev = rcu_dereference(po->cached_dev); + if (dev) + dev_hold(dev); + rcu_read_unlock(); + + return dev; +} + static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) { struct sk_buff *skb; struct net_device *dev; __be16 proto; - bool need_rls_dev = false; int err, reserve = 0; void *ph; struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name; @@ -2070,7 +2089,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) mutex_lock(&po->pg_vec_lock); if (saddr == NULL) { - dev = po->prot_hook.dev; + dev = packet_cached_dev_get(po); proto = po->num; addr = NULL; } else { @@ -2084,19 +2103,17 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) proto = saddr->sll_protocol; addr = saddr->sll_addr; dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex); - need_rls_dev = true; } err = -ENXIO; if (unlikely(dev == NULL)) goto out; - - reserve = dev->hard_header_len; - err = -ENETDOWN; if (unlikely(!(dev->flags & IFF_UP))) goto out_put; + reserve = dev->hard_header_len; + size_max = po->tx_ring.frame_size - (po->tp_hdrlen - sizeof(struct sockaddr_ll)); @@ -2173,8 +2190,7 @@ out_status: __packet_set_status(po, ph, status); kfree_skb(skb); out_put: - if (need_rls_dev) - dev_put(dev); + dev_put(dev); out: mutex_unlock(&po->pg_vec_lock); return err; @@ -2212,7 +2228,6 @@ static int packet_snd(struct socket *sock, struct sk_buff *skb; struct net_device *dev; __be16 proto; - bool need_rls_dev = false; unsigned char *addr; int err, reserve = 0; struct virtio_net_hdr vnet_hdr = { 0 }; @@ -2228,7 +2243,7 @@ static int packet_snd(struct socket *sock, */ if (saddr == NULL) { - dev = po->prot_hook.dev; + dev = packet_cached_dev_get(po); proto = po->num; addr = NULL; } else { @@ -2240,19 +2255,17 @@ static int packet_snd(struct socket *sock, proto = saddr->sll_protocol; addr = saddr->sll_addr; dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex); - need_rls_dev = true; } err = -ENXIO; - if (dev == NULL) + if (unlikely(dev == NULL)) goto out_unlock; - if (sock->type == SOCK_RAW) - reserve = dev->hard_header_len; - err = -ENETDOWN; - if (!(dev->flags & IFF_UP)) + if (unlikely(!(dev->flags & IFF_UP))) goto out_unlock; + if (sock->type == SOCK_RAW) + reserve = dev->hard_header_len; if (po->has_vnet_hdr) { vnet_hdr_len = sizeof(vnet_hdr); @@ -2386,15 +2399,14 @@ static int packet_snd(struct socket *sock, if (err > 0 && (err = net_xmit_errno(err)) != 0) goto out_unlock; - if (need_rls_dev) - dev_put(dev); + dev_put(dev); return len; out_free: kfree_skb(skb); out_unlock: - if (dev && need_rls_dev) + if (dev) dev_put(dev); out: return err; @@ -2614,6 +2626,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, po = pkt_sk(sk); sk->sk_family = PF_PACKET; po->num = proto; + RCU_INIT_POINTER(po->cached_dev, NULL); sk->sk_destruct = packet_sock_destruct; sk_refcnt_debug_inc(sk); @@ -2660,7 +2673,6 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, struct sock *sk = sock->sk; struct sk_buff *skb; int copied, err; - struct sockaddr_ll *sll; int vnet_hdr_len = 0; err = -EINVAL; @@ -2744,22 +2756,10 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, goto out_free; } - /* - * If the address length field is there to be filled in, we fill - * it in now. + /* You lose any data beyond the buffer you gave. If it worries + * a user program they can ask the device for its MTU + * anyway. */ - - sll = &PACKET_SKB_CB(skb)->sa.ll; - if (sock->type == SOCK_PACKET) - msg->msg_namelen = sizeof(struct sockaddr_pkt); - else - msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr); - - /* - * You lose any data beyond the buffer you gave. If it worries a - * user program they can ask the device for its MTU anyway. - */ - copied = skb->len; if (copied > len) { copied = len; @@ -2772,9 +2772,20 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, sock_recv_ts_and_drops(msg, sk, skb); - if (msg->msg_name) + if (msg->msg_name) { + /* If the address length field is there to be filled + * in, we fill it in now. + */ + if (sock->type == SOCK_PACKET) { + msg->msg_namelen = sizeof(struct sockaddr_pkt); + } else { + struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll; + msg->msg_namelen = sll->sll_halen + + offsetof(struct sockaddr_ll, sll_addr); + } memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, msg->msg_namelen); + } if (pkt_sk(sk)->auxdata) { struct tpacket_auxdata aux; diff --git a/net/packet/internal.h b/net/packet/internal.h index c4e4b45..1035fa2 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -113,6 +113,7 @@ struct packet_sock { unsigned int tp_loss:1; unsigned int tp_tx_has_off:1; unsigned int tp_tstamp; + struct net_device __rcu *cached_dev; struct packet_type prot_hook ____cacheline_aligned_in_smp; }; diff --git a/net/rds/recv.c b/net/rds/recv.c index 9f0f17c..de339b2 100644 --- a/net/rds/recv.c +++ b/net/rds/recv.c @@ -410,8 +410,6 @@ int rds_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, rdsdebug("size %zu flags 0x%x timeo %ld\n", size, msg_flags, timeo); - msg->msg_namelen = 0; - if (msg_flags & MSG_OOB) goto out; diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index e98fcfb..33af772 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -1216,7 +1216,6 @@ static int rose_recvmsg(struct kiocb *iocb, struct socket *sock, { struct sock *sk = sock->sk; struct rose_sock *rose = rose_sk(sk); - struct sockaddr_rose *srose = (struct sockaddr_rose *)msg->msg_name; size_t copied; unsigned char *asmptr; struct sk_buff *skb; @@ -1252,8 +1251,11 @@ static int rose_recvmsg(struct kiocb *iocb, struct socket *sock, skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); - if (srose != NULL) { - memset(srose, 0, msg->msg_namelen); + if (msg->msg_name) { + struct sockaddr_rose *srose; + + memset(msg->msg_name, 0, sizeof(struct full_sockaddr_rose)); + srose = msg->msg_name; srose->srose_family = AF_ROSE; srose->srose_addr = rose->dest_addr; srose->srose_call = rose->dest_call; diff --git a/net/rxrpc/ar-recvmsg.c b/net/rxrpc/ar-recvmsg.c index 4b48687..898492a 100644 --- a/net/rxrpc/ar-recvmsg.c +++ b/net/rxrpc/ar-recvmsg.c @@ -143,10 +143,13 @@ int rxrpc_recvmsg(struct kiocb *iocb, struct socket *sock, /* copy the peer address and timestamp */ if (!continue_call) { - if (msg->msg_name && msg->msg_namelen > 0) + if (msg->msg_name) { + size_t len = + sizeof(call->conn->trans->peer->srx); memcpy(msg->msg_name, - &call->conn->trans->peer->srx, - sizeof(call->conn->trans->peer->srx)); + &call->conn->trans->peer->srx, len); + msg->msg_namelen = len; + } sock_recv_ts_and_drops(msg, &rx->sk, skb); } diff --git a/net/socket.c b/net/socket.c index c226ace..0b18693 100644 --- a/net/socket.c +++ b/net/socket.c @@ -221,12 +221,13 @@ static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen, int err; int len; + BUG_ON(klen > sizeof(struct sockaddr_storage)); err = get_user(len, ulen); if (err) return err; if (len > klen) len = klen; - if (len < 0 || len > sizeof(struct sockaddr_storage)) + if (len < 0) return -EINVAL; if (len) { if (audit_sockaddr(klen, kaddr)) @@ -1840,8 +1841,10 @@ SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size, msg.msg_iov = &iov; iov.iov_len = size; iov.iov_base = ubuf; - msg.msg_name = (struct sockaddr *)&address; - msg.msg_namelen = sizeof(address); + /* Save some cycles and don't copy the address if not needed */ + msg.msg_name = addr ? (struct sockaddr *)&address : NULL; + /* We assume all kernel code knows the size of sockaddr_storage */ + msg.msg_namelen = 0; if (sock->file->f_flags & O_NONBLOCK) flags |= MSG_DONTWAIT; err = sock_recvmsg(sock, &msg, size, flags); @@ -2221,16 +2224,14 @@ static int ___sys_recvmsg(struct socket *sock, struct msghdr __user *msg, goto out; } - /* - * Save the user-mode address (verify_iovec will change the - * kernel msghdr to use the kernel address space) + /* Save the user-mode address (verify_iovec will change the + * kernel msghdr to use the kernel address space) */ - uaddr = (__force void __user *)msg_sys->msg_name; uaddr_len = COMPAT_NAMELEN(msg); - if (MSG_CMSG_COMPAT & flags) { + if (MSG_CMSG_COMPAT & flags) err = verify_compat_iovec(msg_sys, iov, &addr, VERIFY_WRITE); - } else + else err = verify_iovec(msg_sys, iov, &addr, VERIFY_WRITE); if (err < 0) goto out_freeiov; @@ -2239,6 +2240,9 @@ static int ___sys_recvmsg(struct socket *sock, struct msghdr __user *msg, cmsg_ptr = (unsigned long)msg_sys->msg_control; msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT); + /* We assume all kernel code knows the size of sockaddr_storage */ + msg_sys->msg_namelen = 0; + if (sock->file->f_flags & O_NONBLOCK) flags |= MSG_DONTWAIT; err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys, diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index d0d14a0..bf04b30 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -471,15 +471,6 @@ struct rpc_filelist { umode_t mode; }; -static int rpc_delete_dentry(const struct dentry *dentry) -{ - return 1; -} - -static const struct dentry_operations rpc_dentry_operations = { - .d_delete = rpc_delete_dentry, -}; - static struct inode * rpc_get_inode(struct super_block *sb, umode_t mode) { @@ -1266,7 +1257,7 @@ rpc_fill_super(struct super_block *sb, void *data, int silent) sb->s_blocksize_bits = PAGE_CACHE_SHIFT; sb->s_magic = RPCAUTH_GSSMAGIC; sb->s_op = &s_ops; - sb->s_d_op = &rpc_dentry_operations; + sb->s_d_op = &simple_dentry_operations; sb->s_time_gran = 1; inode = rpc_get_inode(sb, S_IFDIR | S_IRUGO | S_IXUGO); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 3906527..3b61851 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -980,9 +980,6 @@ static int recv_msg(struct kiocb *iocb, struct socket *sock, goto exit; } - /* will be updated in set_orig_addr() if needed */ - m->msg_namelen = 0; - timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); restart: @@ -1091,9 +1088,6 @@ static int recv_stream(struct kiocb *iocb, struct socket *sock, goto exit; } - /* will be updated in set_orig_addr() if needed */ - m->msg_namelen = 0; - target = sock_rcvlowat(sk, flags & MSG_WAITALL, buf_len); timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index c1f403b..01625ccc 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1754,7 +1754,6 @@ static void unix_copy_addr(struct msghdr *msg, struct sock *sk) { struct unix_sock *u = unix_sk(sk); - msg->msg_namelen = 0; if (u->addr) { msg->msg_namelen = u->addr->len; memcpy(msg->msg_name, u->addr->name, u->addr->len); @@ -1778,8 +1777,6 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock, if (flags&MSG_OOB) goto out; - msg->msg_namelen = 0; - err = mutex_lock_interruptible(&u->readlock); if (err) { err = sock_intr_errno(sock_rcvtimeo(sk, noblock)); @@ -1924,8 +1921,6 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, target = sock_rcvlowat(sk, flags&MSG_WAITALL, size); timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT); - msg->msg_namelen = 0; - /* Lock the socket to prevent queue disordering * while sleeps in memcpy_tomsg */ diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 545c08b..5adfd94 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1662,8 +1662,6 @@ vsock_stream_recvmsg(struct kiocb *kiocb, vsk = vsock_sk(sk); err = 0; - msg->msg_namelen = 0; - lock_sock(sk); if (sk->sk_state != SS_CONNECTED) { diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index 9d69866..687360d 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -1746,8 +1746,6 @@ static int vmci_transport_dgram_dequeue(struct kiocb *kiocb, if (flags & MSG_OOB || flags & MSG_ERRQUEUE) return -EOPNOTSUPP; - msg->msg_namelen = 0; - /* Retrieve the head sk_buff from the socket's receive queue. */ err = 0; skb = skb_recv_datagram(&vsk->sk, flags, noblock, &err); diff --git a/net/wimax/stack.c b/net/wimax/stack.c index ef2191b..ec8b577 100644 --- a/net/wimax/stack.c +++ b/net/wimax/stack.c @@ -610,7 +610,6 @@ int __init wimax_subsys_init(void) d_fnend(4, NULL, "() = 0\n"); return 0; - genl_unregister_family(&wimax_gnl_family); error_register_family: d_fnend(4, NULL, "() = %d\n", result); return result; diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 45a3ab5..7622789 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -1340,10 +1340,9 @@ static int x25_recvmsg(struct kiocb *iocb, struct socket *sock, if (sx25) { sx25->sx25_family = AF_X25; sx25->sx25_addr = x25->dest_addr; + msg->msg_namelen = sizeof(*sx25); } - msg->msg_namelen = sizeof(struct sockaddr_x25); - x25_check_rbuf(sk); rc = copied; out_free_dgram: diff --git a/scripts/asn1_compiler.c b/scripts/asn1_compiler.c index db0e5cd..91c4117 100644 --- a/scripts/asn1_compiler.c +++ b/scripts/asn1_compiler.c @@ -1353,6 +1353,8 @@ static void render_out_of_line_list(FILE *out) render_opcode(out, "ASN1_OP_END_SET_OF%s,\n", act); render_opcode(out, "_jump_target(%u),\n", entry); break; + default: + break; } if (e->action) render_opcode(out, "_action(ACT_%s),\n", diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 61090e0..9c98100 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -3289,6 +3289,7 @@ sub process { } } if (!defined $suppress_whiletrailers{$linenr} && + defined($stat) && defined($cond) && $line =~ /\b(?:if|while|for)\s*\(/ && $line !~ /^.\s*#/) { my ($s, $c) = ($stat, $cond); diff --git a/security/Makefile b/security/Makefile index c26c81e..a5918e0 100644 --- a/security/Makefile +++ b/security/Makefile @@ -16,7 +16,6 @@ obj-$(CONFIG_MMU) += min_addr.o # Object file lists obj-$(CONFIG_SECURITY) += security.o capability.o obj-$(CONFIG_SECURITYFS) += inode.o -# Must precede capability.o in order to stack properly. obj-$(CONFIG_SECURITY_SELINUX) += selinux/built-in.o obj-$(CONFIG_SECURITY_SMACK) += smack/built-in.o obj-$(CONFIG_AUDIT) += lsm_audit.o diff --git a/security/apparmor/audit.c b/security/apparmor/audit.c index 031d2d9..89c7865 100644 --- a/security/apparmor/audit.c +++ b/security/apparmor/audit.c @@ -111,7 +111,6 @@ static const char *const aa_audit_type[] = { static void audit_pre(struct audit_buffer *ab, void *ca) { struct common_audit_data *sa = ca; - struct task_struct *tsk = sa->aad->tsk ? sa->aad->tsk : current; if (aa_g_audit_header) { audit_log_format(ab, "apparmor="); @@ -132,11 +131,6 @@ static void audit_pre(struct audit_buffer *ab, void *ca) if (sa->aad->profile) { struct aa_profile *profile = sa->aad->profile; - pid_t pid; - rcu_read_lock(); - pid = rcu_dereference(tsk->real_parent)->pid; - rcu_read_unlock(); - audit_log_format(ab, " parent=%d", pid); if (profile->ns != root_ns) { audit_log_format(ab, " namespace="); audit_log_untrustedstring(ab, profile->ns->base.hname); @@ -149,12 +143,6 @@ static void audit_pre(struct audit_buffer *ab, void *ca) audit_log_format(ab, " name="); audit_log_untrustedstring(ab, sa->aad->name); } - - if (sa->aad->tsk) { - audit_log_format(ab, " pid=%d comm=", tsk->pid); - audit_log_untrustedstring(ab, tsk->comm); - } - } /** @@ -212,7 +200,7 @@ int aa_audit(int type, struct aa_profile *profile, gfp_t gfp, if (sa->aad->type == AUDIT_APPARMOR_KILL) (void)send_sig_info(SIGKILL, NULL, - sa->aad->tsk ? sa->aad->tsk : current); + sa->u.tsk ? sa->u.tsk : current); if (sa->aad->type == AUDIT_APPARMOR_ALLOWED) return complain_error(sa->aad->error); diff --git a/security/apparmor/capability.c b/security/apparmor/capability.c index 84d1f5f..1101c6f 100644 --- a/security/apparmor/capability.c +++ b/security/apparmor/capability.c @@ -53,8 +53,7 @@ static void audit_cb(struct audit_buffer *ab, void *va) /** * audit_caps - audit a capability - * @profile: profile confining task (NOT NULL) - * @task: task capability test was performed against (NOT NULL) + * @profile: profile being tested for confinement (NOT NULL) * @cap: capability tested * @error: error code returned by test * @@ -63,8 +62,7 @@ static void audit_cb(struct audit_buffer *ab, void *va) * * Returns: 0 or sa->error on success, error code on failure */ -static int audit_caps(struct aa_profile *profile, struct task_struct *task, - int cap, int error) +static int audit_caps(struct aa_profile *profile, int cap, int error) { struct audit_cache *ent; int type = AUDIT_APPARMOR_AUTO; @@ -73,7 +71,6 @@ static int audit_caps(struct aa_profile *profile, struct task_struct *task, sa.type = LSM_AUDIT_DATA_CAP; sa.aad = &aad; sa.u.cap = cap; - sa.aad->tsk = task; sa.aad->op = OP_CAPABLE; sa.aad->error = error; @@ -124,8 +121,7 @@ static int profile_capable(struct aa_profile *profile, int cap) /** * aa_capable - test permission to use capability - * @task: task doing capability test against (NOT NULL) - * @profile: profile confining @task (NOT NULL) + * @profile: profile being tested against (NOT NULL) * @cap: capability to be tested * @audit: whether an audit record should be generated * @@ -133,8 +129,7 @@ static int profile_capable(struct aa_profile *profile, int cap) * * Returns: 0 on success, or else an error code. */ -int aa_capable(struct task_struct *task, struct aa_profile *profile, int cap, - int audit) +int aa_capable(struct aa_profile *profile, int cap, int audit) { int error = profile_capable(profile, cap); @@ -144,5 +139,5 @@ int aa_capable(struct task_struct *task, struct aa_profile *profile, int cap, return error; } - return audit_caps(profile, task, cap, error); + return audit_caps(profile, cap, error); } diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c index 26c607c..452567d 100644 --- a/security/apparmor/domain.c +++ b/security/apparmor/domain.c @@ -50,23 +50,21 @@ void aa_free_domain_entries(struct aa_domain *domain) /** * may_change_ptraced_domain - check if can change profile on ptraced task - * @task: task we want to change profile of (NOT NULL) * @to_profile: profile to change to (NOT NULL) * - * Check if the task is ptraced and if so if the tracing task is allowed + * Check if current is ptraced and if so if the tracing task is allowed * to trace the new domain * * Returns: %0 or error if change not allowed */ -static int may_change_ptraced_domain(struct task_struct *task, - struct aa_profile *to_profile) +static int may_change_ptraced_domain(struct aa_profile *to_profile) { struct task_struct *tracer; struct aa_profile *tracerp = NULL; int error = 0; rcu_read_lock(); - tracer = ptrace_parent(task); + tracer = ptrace_parent(current); if (tracer) /* released below */ tracerp = aa_get_task_profile(tracer); @@ -75,7 +73,7 @@ static int may_change_ptraced_domain(struct task_struct *task, if (!tracer || unconfined(tracerp)) goto out; - error = aa_may_ptrace(tracer, tracerp, to_profile, PTRACE_MODE_ATTACH); + error = aa_may_ptrace(tracerp, to_profile, PTRACE_MODE_ATTACH); out: rcu_read_unlock(); @@ -477,7 +475,7 @@ int apparmor_bprm_set_creds(struct linux_binprm *bprm) } if (bprm->unsafe & (LSM_UNSAFE_PTRACE | LSM_UNSAFE_PTRACE_CAP)) { - error = may_change_ptraced_domain(current, new_profile); + error = may_change_ptraced_domain(new_profile); if (error) { aa_put_profile(new_profile); goto audit; @@ -690,7 +688,7 @@ int aa_change_hat(const char *hats[], int count, u64 token, bool permtest) } } - error = may_change_ptraced_domain(current, hat); + error = may_change_ptraced_domain(hat); if (error) { info = "ptraced"; error = -EPERM; @@ -829,7 +827,7 @@ int aa_change_profile(const char *ns_name, const char *hname, bool onexec, } /* check if tracing task is allowed to trace target domain */ - error = may_change_ptraced_domain(current, target); + error = may_change_ptraced_domain(target); if (error) { info = "ptrace prevents transition"; goto audit; diff --git a/security/apparmor/include/audit.h b/security/apparmor/include/audit.h index 30e8d76..ba3dfd1 100644 --- a/security/apparmor/include/audit.h +++ b/security/apparmor/include/audit.h @@ -109,7 +109,6 @@ struct apparmor_audit_data { void *profile; const char *name; const char *info; - struct task_struct *tsk; union { void *target; struct { diff --git a/security/apparmor/include/capability.h b/security/apparmor/include/capability.h index 2e7c9d6..fc3fa38 100644 --- a/security/apparmor/include/capability.h +++ b/security/apparmor/include/capability.h @@ -4,7 +4,7 @@ * This file contains AppArmor capability mediation definitions. * * Copyright (C) 1998-2008 Novell/SUSE - * Copyright 2009-2010 Canonical Ltd. + * Copyright 2009-2013 Canonical Ltd. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as @@ -38,8 +38,7 @@ struct aa_caps { extern struct aa_fs_entry aa_fs_entry_caps[]; -int aa_capable(struct task_struct *task, struct aa_profile *profile, int cap, - int audit); +int aa_capable(struct aa_profile *profile, int cap, int audit); static inline void aa_free_cap_rules(struct aa_caps *caps) { diff --git a/security/apparmor/include/ipc.h b/security/apparmor/include/ipc.h index aeda0fbc..288ca76 100644 --- a/security/apparmor/include/ipc.h +++ b/security/apparmor/include/ipc.h @@ -19,8 +19,8 @@ struct aa_profile; -int aa_may_ptrace(struct task_struct *tracer_task, struct aa_profile *tracer, - struct aa_profile *tracee, unsigned int mode); +int aa_may_ptrace(struct aa_profile *tracer, struct aa_profile *tracee, + unsigned int mode); int aa_ptrace(struct task_struct *tracer, struct task_struct *tracee, unsigned int mode); diff --git a/security/apparmor/ipc.c b/security/apparmor/ipc.c index c51d226..777ac1c 100644 --- a/security/apparmor/ipc.c +++ b/security/apparmor/ipc.c @@ -54,15 +54,14 @@ static int aa_audit_ptrace(struct aa_profile *profile, /** * aa_may_ptrace - test if tracer task can trace the tracee - * @tracer_task: task who will do the tracing (NOT NULL) * @tracer: profile of the task doing the tracing (NOT NULL) * @tracee: task to be traced * @mode: whether PTRACE_MODE_READ || PTRACE_MODE_ATTACH * * Returns: %0 else error code if permission denied or error */ -int aa_may_ptrace(struct task_struct *tracer_task, struct aa_profile *tracer, - struct aa_profile *tracee, unsigned int mode) +int aa_may_ptrace(struct aa_profile *tracer, struct aa_profile *tracee, + unsigned int mode) { /* TODO: currently only based on capability, not extended ptrace * rules, @@ -72,7 +71,7 @@ int aa_may_ptrace(struct task_struct *tracer_task, struct aa_profile *tracer, if (unconfined(tracer) || tracer == tracee) return 0; /* log this capability request */ - return aa_capable(tracer_task, tracer, CAP_SYS_PTRACE, 1); + return aa_capable(tracer, CAP_SYS_PTRACE, 1); } /** @@ -101,7 +100,7 @@ int aa_ptrace(struct task_struct *tracer, struct task_struct *tracee, if (!unconfined(tracer_p)) { struct aa_profile *tracee_p = aa_get_task_profile(tracee); - error = aa_may_ptrace(tracer, tracer_p, tracee_p, mode); + error = aa_may_ptrace(tracer_p, tracee_p, mode); error = aa_audit_ptrace(tracer_p, tracee_p, error); aa_put_profile(tracee_p); diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index fb99e18..4257b7e 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -145,7 +145,7 @@ static int apparmor_capable(const struct cred *cred, struct user_namespace *ns, if (!error) { profile = aa_cred_profile(cred); if (!unconfined(profile)) - error = aa_capable(current, profile, cap, audit); + error = aa_capable(profile, cap, audit); } return error; } diff --git a/security/capability.c b/security/capability.c index dbeb9bc..8b4f24a 100644 --- a/security/capability.c +++ b/security/capability.c @@ -777,9 +777,15 @@ static int cap_xfrm_policy_delete_security(struct xfrm_sec_ctx *ctx) return 0; } -static int cap_xfrm_state_alloc_security(struct xfrm_state *x, - struct xfrm_user_sec_ctx *sec_ctx, - u32 secid) +static int cap_xfrm_state_alloc(struct xfrm_state *x, + struct xfrm_user_sec_ctx *sec_ctx) +{ + return 0; +} + +static int cap_xfrm_state_alloc_acquire(struct xfrm_state *x, + struct xfrm_sec_ctx *polsec, + u32 secid) { return 0; } @@ -1101,7 +1107,8 @@ void __init security_fixup_ops(struct security_operations *ops) set_to_cap_if_null(ops, xfrm_policy_clone_security); set_to_cap_if_null(ops, xfrm_policy_free_security); set_to_cap_if_null(ops, xfrm_policy_delete_security); - set_to_cap_if_null(ops, xfrm_state_alloc_security); + set_to_cap_if_null(ops, xfrm_state_alloc); + set_to_cap_if_null(ops, xfrm_state_alloc_acquire); set_to_cap_if_null(ops, xfrm_state_free_security); set_to_cap_if_null(ops, xfrm_state_delete_security); set_to_cap_if_null(ops, xfrm_policy_lookup); diff --git a/security/integrity/digsig.c b/security/integrity/digsig.c index 0b759e1..77ca965 100644 --- a/security/integrity/digsig.c +++ b/security/integrity/digsig.c @@ -13,7 +13,9 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/err.h> +#include <linux/sched.h> #include <linux/rbtree.h> +#include <linux/cred.h> #include <linux/key-type.h> #include <linux/digsig.h> @@ -21,21 +23,29 @@ static struct key *keyring[INTEGRITY_KEYRING_MAX]; +#ifdef CONFIG_IMA_TRUSTED_KEYRING +static const char *keyring_name[INTEGRITY_KEYRING_MAX] = { + ".evm", + ".module", + ".ima", +}; +#else static const char *keyring_name[INTEGRITY_KEYRING_MAX] = { "_evm", "_module", "_ima", }; +#endif int integrity_digsig_verify(const unsigned int id, const char *sig, int siglen, - const char *digest, int digestlen) + const char *digest, int digestlen) { if (id >= INTEGRITY_KEYRING_MAX) return -EINVAL; if (!keyring[id]) { keyring[id] = - request_key(&key_type_keyring, keyring_name[id], NULL); + request_key(&key_type_keyring, keyring_name[id], NULL); if (IS_ERR(keyring[id])) { int err = PTR_ERR(keyring[id]); pr_err("no %s keyring: %d\n", keyring_name[id], err); @@ -44,9 +54,10 @@ int integrity_digsig_verify(const unsigned int id, const char *sig, int siglen, } } - switch (sig[0]) { + switch (sig[1]) { case 1: - return digsig_verify(keyring[id], sig, siglen, + /* v1 API expect signature without xattr type */ + return digsig_verify(keyring[id], sig + 1, siglen - 1, digest, digestlen); case 2: return asymmetric_verify(keyring[id], sig, siglen, @@ -55,3 +66,21 @@ int integrity_digsig_verify(const unsigned int id, const char *sig, int siglen, return -EOPNOTSUPP; } + +int integrity_init_keyring(const unsigned int id) +{ + const struct cred *cred = current_cred(); + const struct user_struct *user = cred->user; + + keyring[id] = keyring_alloc(keyring_name[id], KUIDT_INIT(0), + KGIDT_INIT(0), cred, + ((KEY_POS_ALL & ~KEY_POS_SETATTR) | + KEY_USR_VIEW | KEY_USR_READ), + KEY_ALLOC_NOT_IN_QUOTA, user->uid_keyring); + if (!IS_ERR(keyring[id])) + set_bit(KEY_FLAG_TRUSTED_ONLY, &keyring[id]->flags); + else + pr_info("Can't allocate %s keyring (%ld)\n", + keyring_name[id], PTR_ERR(keyring[id])); + return 0; +} diff --git a/security/integrity/digsig_asymmetric.c b/security/integrity/digsig_asymmetric.c index b475466..9eae480 100644 --- a/security/integrity/digsig_asymmetric.c +++ b/security/integrity/digsig_asymmetric.c @@ -20,17 +20,6 @@ #include "integrity.h" /* - * signature format v2 - for using with asymmetric keys - */ -struct signature_v2_hdr { - uint8_t version; /* signature format version */ - uint8_t hash_algo; /* Digest algorithm [enum pkey_hash_algo] */ - uint32_t keyid; /* IMA key identifier - not X509/PGP specific*/ - uint16_t sig_size; /* signature size */ - uint8_t sig[0]; /* signature payload */ -} __packed; - -/* * Request an asymmetric key. */ static struct key *request_asymmetric_key(struct key *keyring, uint32_t keyid) diff --git a/security/integrity/evm/evm_main.c b/security/integrity/evm/evm_main.c index af9b685..336b3dd 100644 --- a/security/integrity/evm/evm_main.c +++ b/security/integrity/evm/evm_main.c @@ -123,7 +123,7 @@ static enum integrity_status evm_verify_hmac(struct dentry *dentry, goto out; } - xattr_len = rc - 1; + xattr_len = rc; /* check value type */ switch (xattr_data->type) { @@ -143,7 +143,7 @@ static enum integrity_status evm_verify_hmac(struct dentry *dentry, if (rc) break; rc = integrity_digsig_verify(INTEGRITY_KEYRING_EVM, - xattr_data->digest, xattr_len, + (const char *)xattr_data, xattr_len, calc.digest, sizeof(calc.digest)); if (!rc) { /* we probably want to replace rsa with hmac here */ diff --git a/security/integrity/evm/evm_posix_acl.c b/security/integrity/evm/evm_posix_acl.c index b1753e9..46408b9 100644 --- a/security/integrity/evm/evm_posix_acl.c +++ b/security/integrity/evm/evm_posix_acl.c @@ -11,8 +11,9 @@ #include <linux/module.h> #include <linux/xattr.h> +#include <linux/evm.h> -int posix_xattr_acl(char *xattr) +int posix_xattr_acl(const char *xattr) { int xattr_len = strlen(xattr); diff --git a/security/integrity/iint.c b/security/integrity/iint.c index 74522db..c49d3f1 100644 --- a/security/integrity/iint.c +++ b/security/integrity/iint.c @@ -70,6 +70,8 @@ struct integrity_iint_cache *integrity_iint_find(struct inode *inode) static void iint_free(struct integrity_iint_cache *iint) { + kfree(iint->ima_hash); + iint->ima_hash = NULL; iint->version = 0; iint->flags = 0UL; iint->ima_file_status = INTEGRITY_UNKNOWN; diff --git a/security/integrity/ima/Kconfig b/security/integrity/ima/Kconfig index 39196ab..dad8d4c 100644 --- a/security/integrity/ima/Kconfig +++ b/security/integrity/ima/Kconfig @@ -9,6 +9,7 @@ config IMA select CRYPTO_HMAC select CRYPTO_MD5 select CRYPTO_SHA1 + select CRYPTO_HASH_INFO select TCG_TPM if HAS_IOMEM && !UML select TCG_TIS if TCG_TPM && X86 select TCG_IBMVTPM if TCG_TPM && PPC64 @@ -45,6 +46,69 @@ config IMA_LSM_RULES help Disabling this option will disregard LSM based policy rules. +choice + prompt "Default template" + default IMA_NG_TEMPLATE + depends on IMA + help + Select the default IMA measurement template. + + The original 'ima' measurement list template contains a + hash, defined as 20 bytes, and a null terminated pathname, + limited to 255 characters. The 'ima-ng' measurement list + template permits both larger hash digests and longer + pathnames. + + config IMA_TEMPLATE + bool "ima" + config IMA_NG_TEMPLATE + bool "ima-ng (default)" + config IMA_SIG_TEMPLATE + bool "ima-sig" +endchoice + +config IMA_DEFAULT_TEMPLATE + string + depends on IMA + default "ima" if IMA_TEMPLATE + default "ima-ng" if IMA_NG_TEMPLATE + default "ima-sig" if IMA_SIG_TEMPLATE + +choice + prompt "Default integrity hash algorithm" + default IMA_DEFAULT_HASH_SHA1 + depends on IMA + help + Select the default hash algorithm used for the measurement + list, integrity appraisal and audit log. The compiled default + hash algorithm can be overwritten using the kernel command + line 'ima_hash=' option. + + config IMA_DEFAULT_HASH_SHA1 + bool "SHA1 (default)" + depends on CRYPTO_SHA1 + + config IMA_DEFAULT_HASH_SHA256 + bool "SHA256" + depends on CRYPTO_SHA256 && !IMA_TEMPLATE + + config IMA_DEFAULT_HASH_SHA512 + bool "SHA512" + depends on CRYPTO_SHA512 && !IMA_TEMPLATE + + config IMA_DEFAULT_HASH_WP512 + bool "WP512" + depends on CRYPTO_WP512 && !IMA_TEMPLATE +endchoice + +config IMA_DEFAULT_HASH + string + depends on IMA + default "sha1" if IMA_DEFAULT_HASH_SHA1 + default "sha256" if IMA_DEFAULT_HASH_SHA256 + default "sha512" if IMA_DEFAULT_HASH_SHA512 + default "wp512" if IMA_DEFAULT_HASH_WP512 + config IMA_APPRAISE bool "Appraise integrity measurements" depends on IMA @@ -59,3 +123,11 @@ config IMA_APPRAISE For more information on integrity appraisal refer to: <http://linux-ima.sourceforge.net> If unsure, say N. + +config IMA_TRUSTED_KEYRING + bool "Require all keys on the _ima keyring be signed" + depends on IMA_APPRAISE && SYSTEM_TRUSTED_KEYRING + default y + help + This option requires that all keys added to the _ima + keyring be signed by a key on the system trusted keyring. diff --git a/security/integrity/ima/Makefile b/security/integrity/ima/Makefile index 56dfee7..d79263d 100644 --- a/security/integrity/ima/Makefile +++ b/security/integrity/ima/Makefile @@ -6,5 +6,5 @@ obj-$(CONFIG_IMA) += ima.o ima-y := ima_fs.o ima_queue.o ima_init.o ima_main.o ima_crypto.o ima_api.o \ - ima_policy.o + ima_policy.o ima_template.o ima_template_lib.o ima-$(CONFIG_IMA_APPRAISE) += ima_appraise.o diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h index b3dd616..bf03c6a 100644 --- a/security/integrity/ima/ima.h +++ b/security/integrity/ima/ima.h @@ -36,23 +36,48 @@ enum tpm_pcrs { TPM_PCR0 = 0, TPM_PCR8 = 8 }; #define IMA_HASH_BITS 9 #define IMA_MEASURE_HTABLE_SIZE (1 << IMA_HASH_BITS) +#define IMA_TEMPLATE_FIELD_ID_MAX_LEN 16 +#define IMA_TEMPLATE_NUM_FIELDS_MAX 15 + +#define IMA_TEMPLATE_IMA_NAME "ima" +#define IMA_TEMPLATE_IMA_FMT "d|n" + /* set during initialization */ extern int ima_initialized; extern int ima_used_chip; -extern char *ima_hash; +extern int ima_hash_algo; extern int ima_appraise; -/* IMA inode template definition */ -struct ima_template_data { - u8 digest[IMA_DIGEST_SIZE]; /* sha1/md5 measurement hash */ - char file_name[IMA_EVENT_NAME_LEN_MAX + 1]; /* name + \0 */ +/* IMA template field data definition */ +struct ima_field_data { + u8 *data; + u32 len; +}; + +/* IMA template field definition */ +struct ima_template_field { + const char field_id[IMA_TEMPLATE_FIELD_ID_MAX_LEN]; + int (*field_init) (struct integrity_iint_cache *iint, struct file *file, + const unsigned char *filename, + struct evm_ima_xattr_data *xattr_value, + int xattr_len, struct ima_field_data *field_data); + void (*field_show) (struct seq_file *m, enum ima_show_type show, + struct ima_field_data *field_data); +}; + +/* IMA template descriptor definition */ +struct ima_template_desc { + char *name; + char *fmt; + int num_fields; + struct ima_template_field **fields; }; struct ima_template_entry { - u8 digest[IMA_DIGEST_SIZE]; /* sha1 or md5 measurement hash */ - const char *template_name; - int template_len; - struct ima_template_data template; + u8 digest[TPM_DIGEST_SIZE]; /* sha1 or md5 measurement hash */ + struct ima_template_desc *template_desc; /* template descriptor */ + u32 template_data_len; + struct ima_field_data template_data[0]; /* template related data */ }; struct ima_queue_entry { @@ -69,13 +94,21 @@ int ima_fs_init(void); void ima_fs_cleanup(void); int ima_inode_alloc(struct inode *inode); int ima_add_template_entry(struct ima_template_entry *entry, int violation, - const char *op, struct inode *inode); -int ima_calc_file_hash(struct file *file, char *digest); -int ima_calc_buffer_hash(const void *data, int len, char *digest); -int ima_calc_boot_aggregate(char *digest); -void ima_add_violation(struct inode *inode, const unsigned char *filename, + const char *op, struct inode *inode, + const unsigned char *filename); +int ima_calc_file_hash(struct file *file, struct ima_digest_data *hash); +int ima_calc_field_array_hash(struct ima_field_data *field_data, int num_fields, + struct ima_digest_data *hash); +int __init ima_calc_boot_aggregate(struct ima_digest_data *hash); +void ima_add_violation(struct file *file, const unsigned char *filename, const char *op, const char *cause); int ima_init_crypto(void); +void ima_putc(struct seq_file *m, void *data, int datalen); +void ima_print_digest(struct seq_file *m, u8 *digest, int size); +struct ima_template_desc *ima_template_desc_current(void); +int ima_init_template(void); + +int ima_init_template(void); /* * used to protect h_table and sha_table @@ -98,14 +131,21 @@ static inline unsigned long ima_hash_key(u8 *digest) int ima_get_action(struct inode *inode, int mask, int function); int ima_must_measure(struct inode *inode, int mask, int function); int ima_collect_measurement(struct integrity_iint_cache *iint, - struct file *file); + struct file *file, + struct evm_ima_xattr_data **xattr_value, + int *xattr_len); void ima_store_measurement(struct integrity_iint_cache *iint, struct file *file, - const unsigned char *filename); + const unsigned char *filename, + struct evm_ima_xattr_data *xattr_value, + int xattr_len); void ima_audit_measurement(struct integrity_iint_cache *iint, const unsigned char *filename); +int ima_alloc_init_template(struct integrity_iint_cache *iint, + struct file *file, const unsigned char *filename, + struct evm_ima_xattr_data *xattr_value, + int xattr_len, struct ima_template_entry **entry); int ima_store_template(struct ima_template_entry *entry, int violation, - struct inode *inode); -void ima_template_show(struct seq_file *m, void *e, enum ima_show_type show); + struct inode *inode, const unsigned char *filename); const char *ima_d_path(struct path *path, char **pathbuf); /* rbtree tree calls to lookup, insert, delete @@ -131,17 +171,25 @@ void ima_delete_rules(void); #ifdef CONFIG_IMA_APPRAISE int ima_appraise_measurement(int func, struct integrity_iint_cache *iint, - struct file *file, const unsigned char *filename); + struct file *file, const unsigned char *filename, + struct evm_ima_xattr_data *xattr_value, + int xattr_len); int ima_must_appraise(struct inode *inode, int mask, enum ima_hooks func); void ima_update_xattr(struct integrity_iint_cache *iint, struct file *file); enum integrity_status ima_get_cache_status(struct integrity_iint_cache *iint, int func); +void ima_get_hash_algo(struct evm_ima_xattr_data *xattr_value, int xattr_len, + struct ima_digest_data *hash); +int ima_read_xattr(struct dentry *dentry, + struct evm_ima_xattr_data **xattr_value); #else static inline int ima_appraise_measurement(int func, struct integrity_iint_cache *iint, struct file *file, - const unsigned char *filename) + const unsigned char *filename, + struct evm_ima_xattr_data *xattr_value, + int xattr_len) { return INTEGRITY_UNKNOWN; } @@ -162,6 +210,19 @@ static inline enum integrity_status ima_get_cache_status(struct integrity_iint_c { return INTEGRITY_UNKNOWN; } + +static inline void ima_get_hash_algo(struct evm_ima_xattr_data *xattr_value, + int xattr_len, + struct ima_digest_data *hash) +{ +} + +static inline int ima_read_xattr(struct dentry *dentry, + struct evm_ima_xattr_data **xattr_value) +{ + return 0; +} + #endif /* LSM based policy rules require audit */ diff --git a/security/integrity/ima/ima_api.c b/security/integrity/ima/ima_api.c index 1c03e8f1..0e75408 100644 --- a/security/integrity/ima/ima_api.c +++ b/security/integrity/ima/ima_api.c @@ -18,9 +18,46 @@ #include <linux/fs.h> #include <linux/xattr.h> #include <linux/evm.h> +#include <crypto/hash_info.h> #include "ima.h" -static const char *IMA_TEMPLATE_NAME = "ima"; +/* + * ima_alloc_init_template - create and initialize a new template entry + */ +int ima_alloc_init_template(struct integrity_iint_cache *iint, + struct file *file, const unsigned char *filename, + struct evm_ima_xattr_data *xattr_value, + int xattr_len, struct ima_template_entry **entry) +{ + struct ima_template_desc *template_desc = ima_template_desc_current(); + int i, result = 0; + + *entry = kzalloc(sizeof(**entry) + template_desc->num_fields * + sizeof(struct ima_field_data), GFP_NOFS); + if (!*entry) + return -ENOMEM; + + for (i = 0; i < template_desc->num_fields; i++) { + struct ima_template_field *field = template_desc->fields[i]; + u32 len; + + result = field->field_init(iint, file, filename, + xattr_value, xattr_len, + &((*entry)->template_data[i])); + if (result != 0) + goto out; + + len = (*entry)->template_data[i].len; + (*entry)->template_data_len += sizeof(len); + (*entry)->template_data_len += len; + } + (*entry)->template_desc = template_desc; + return 0; +out: + kfree(*entry); + *entry = NULL; + return result; +} /* * ima_store_template - store ima template measurements @@ -39,28 +76,34 @@ static const char *IMA_TEMPLATE_NAME = "ima"; * Returns 0 on success, error code otherwise */ int ima_store_template(struct ima_template_entry *entry, - int violation, struct inode *inode) + int violation, struct inode *inode, + const unsigned char *filename) { const char *op = "add_template_measure"; const char *audit_cause = "hashing_error"; + char *template_name = entry->template_desc->name; int result; - - memset(entry->digest, 0, sizeof(entry->digest)); - entry->template_name = IMA_TEMPLATE_NAME; - entry->template_len = sizeof(entry->template); + struct { + struct ima_digest_data hdr; + char digest[TPM_DIGEST_SIZE]; + } hash; if (!violation) { - result = ima_calc_buffer_hash(&entry->template, - entry->template_len, - entry->digest); + int num_fields = entry->template_desc->num_fields; + + /* this function uses default algo */ + hash.hdr.algo = HASH_ALGO_SHA1; + result = ima_calc_field_array_hash(&entry->template_data[0], + num_fields, &hash.hdr); if (result < 0) { integrity_audit_msg(AUDIT_INTEGRITY_PCR, inode, - entry->template_name, op, + template_name, op, audit_cause, result, 0); return result; } + memcpy(entry->digest, hash.hdr.digest, hash.hdr.length); } - result = ima_add_template_entry(entry, violation, op, inode); + result = ima_add_template_entry(entry, violation, op, inode, filename); return result; } @@ -71,24 +114,24 @@ int ima_store_template(struct ima_template_entry *entry, * By extending the PCR with 0xFF's instead of with zeroes, the PCR * value is invalidated. */ -void ima_add_violation(struct inode *inode, const unsigned char *filename, +void ima_add_violation(struct file *file, const unsigned char *filename, const char *op, const char *cause) { struct ima_template_entry *entry; + struct inode *inode = file->f_dentry->d_inode; int violation = 1; int result; /* can overflow, only indicator */ atomic_long_inc(&ima_htable.violations); - entry = kmalloc(sizeof(*entry), GFP_KERNEL); - if (!entry) { + result = ima_alloc_init_template(NULL, file, filename, + NULL, 0, &entry); + if (result < 0) { result = -ENOMEM; goto err_out; } - memset(&entry->template, 0, sizeof(entry->template)); - strncpy(entry->template.file_name, filename, IMA_EVENT_NAME_LEN_MAX); - result = ima_store_template(entry, violation, inode); + result = ima_store_template(entry, violation, inode, filename); if (result < 0) kfree(entry); err_out: @@ -138,20 +181,42 @@ int ima_must_measure(struct inode *inode, int mask, int function) * Return 0 on success, error code otherwise */ int ima_collect_measurement(struct integrity_iint_cache *iint, - struct file *file) + struct file *file, + struct evm_ima_xattr_data **xattr_value, + int *xattr_len) { struct inode *inode = file_inode(file); const char *filename = file->f_dentry->d_name.name; int result = 0; + struct { + struct ima_digest_data hdr; + char digest[IMA_MAX_DIGEST_SIZE]; + } hash; + + if (xattr_value) + *xattr_len = ima_read_xattr(file->f_dentry, xattr_value); if (!(iint->flags & IMA_COLLECTED)) { u64 i_version = file_inode(file)->i_version; - iint->ima_xattr.type = IMA_XATTR_DIGEST; - result = ima_calc_file_hash(file, iint->ima_xattr.digest); + /* use default hash algorithm */ + hash.hdr.algo = ima_hash_algo; + + if (xattr_value) + ima_get_hash_algo(*xattr_value, *xattr_len, &hash.hdr); + + result = ima_calc_file_hash(file, &hash.hdr); if (!result) { - iint->version = i_version; - iint->flags |= IMA_COLLECTED; + int length = sizeof(hash.hdr) + hash.hdr.length; + void *tmpbuf = krealloc(iint->ima_hash, length, + GFP_NOFS); + if (tmpbuf) { + iint->ima_hash = tmpbuf; + memcpy(iint->ima_hash, &hash, length); + iint->version = i_version; + iint->flags |= IMA_COLLECTED; + } else + result = -ENOMEM; } } if (result) @@ -177,7 +242,9 @@ int ima_collect_measurement(struct integrity_iint_cache *iint, * Must be called with iint->mutex held. */ void ima_store_measurement(struct integrity_iint_cache *iint, - struct file *file, const unsigned char *filename) + struct file *file, const unsigned char *filename, + struct evm_ima_xattr_data *xattr_value, + int xattr_len) { const char *op = "add_template_measure"; const char *audit_cause = "ENOMEM"; @@ -189,19 +256,15 @@ void ima_store_measurement(struct integrity_iint_cache *iint, if (iint->flags & IMA_MEASURED) return; - entry = kmalloc(sizeof(*entry), GFP_KERNEL); - if (!entry) { + result = ima_alloc_init_template(iint, file, filename, + xattr_value, xattr_len, &entry); + if (result < 0) { integrity_audit_msg(AUDIT_INTEGRITY_PCR, inode, filename, op, audit_cause, result, 0); return; } - memset(&entry->template, 0, sizeof(entry->template)); - memcpy(entry->template.digest, iint->ima_xattr.digest, IMA_DIGEST_SIZE); - strcpy(entry->template.file_name, - (strlen(filename) > IMA_EVENT_NAME_LEN_MAX) ? - file->f_dentry->d_name.name : filename); - result = ima_store_template(entry, violation, inode); + result = ima_store_template(entry, violation, inode, filename); if (!result || result == -EEXIST) iint->flags |= IMA_MEASURED; if (result < 0) @@ -212,14 +275,16 @@ void ima_audit_measurement(struct integrity_iint_cache *iint, const unsigned char *filename) { struct audit_buffer *ab; - char hash[(IMA_DIGEST_SIZE * 2) + 1]; + char hash[(iint->ima_hash->length * 2) + 1]; + const char *algo_name = hash_algo_name[iint->ima_hash->algo]; + char algo_hash[sizeof(hash) + strlen(algo_name) + 2]; int i; if (iint->flags & IMA_AUDITED) return; - for (i = 0; i < IMA_DIGEST_SIZE; i++) - hex_byte_pack(hash + (i * 2), iint->ima_xattr.digest[i]); + for (i = 0; i < iint->ima_hash->length; i++) + hex_byte_pack(hash + (i * 2), iint->ima_hash->digest[i]); hash[i * 2] = '\0'; ab = audit_log_start(current->audit_context, GFP_KERNEL, @@ -230,7 +295,8 @@ void ima_audit_measurement(struct integrity_iint_cache *iint, audit_log_format(ab, "file="); audit_log_untrustedstring(ab, filename); audit_log_format(ab, " hash="); - audit_log_untrustedstring(ab, hash); + snprintf(algo_hash, sizeof(algo_hash), "%s:%s", algo_name, hash); + audit_log_untrustedstring(ab, algo_hash); audit_log_task_info(ab, current); audit_log_end(ab); diff --git a/security/integrity/ima/ima_appraise.c b/security/integrity/ima/ima_appraise.c index 2d4beca..46353ee 100644 --- a/security/integrity/ima/ima_appraise.c +++ b/security/integrity/ima/ima_appraise.c @@ -15,6 +15,7 @@ #include <linux/magic.h> #include <linux/ima.h> #include <linux/evm.h> +#include <crypto/hash_info.h> #include "ima.h" @@ -43,19 +44,31 @@ int ima_must_appraise(struct inode *inode, int mask, enum ima_hooks func) } static int ima_fix_xattr(struct dentry *dentry, - struct integrity_iint_cache *iint) + struct integrity_iint_cache *iint) { - iint->ima_xattr.type = IMA_XATTR_DIGEST; - return __vfs_setxattr_noperm(dentry, XATTR_NAME_IMA, - (u8 *)&iint->ima_xattr, - sizeof(iint->ima_xattr), 0); + int rc, offset; + u8 algo = iint->ima_hash->algo; + + if (algo <= HASH_ALGO_SHA1) { + offset = 1; + iint->ima_hash->xattr.sha1.type = IMA_XATTR_DIGEST; + } else { + offset = 0; + iint->ima_hash->xattr.ng.type = IMA_XATTR_DIGEST_NG; + iint->ima_hash->xattr.ng.algo = algo; + } + rc = __vfs_setxattr_noperm(dentry, XATTR_NAME_IMA, + &iint->ima_hash->xattr.data[offset], + (sizeof(iint->ima_hash->xattr) - offset) + + iint->ima_hash->length, 0); + return rc; } /* Return specific func appraised cached result */ enum integrity_status ima_get_cache_status(struct integrity_iint_cache *iint, int func) { - switch(func) { + switch (func) { case MMAP_CHECK: return iint->ima_mmap_status; case BPRM_CHECK: @@ -71,7 +84,7 @@ enum integrity_status ima_get_cache_status(struct integrity_iint_cache *iint, static void ima_set_cache_status(struct integrity_iint_cache *iint, int func, enum integrity_status status) { - switch(func) { + switch (func) { case MMAP_CHECK: iint->ima_mmap_status = status; break; @@ -90,7 +103,7 @@ static void ima_set_cache_status(struct integrity_iint_cache *iint, static void ima_cache_flags(struct integrity_iint_cache *iint, int func) { - switch(func) { + switch (func) { case MMAP_CHECK: iint->flags |= (IMA_MMAP_APPRAISED | IMA_APPRAISED); break; @@ -107,6 +120,50 @@ static void ima_cache_flags(struct integrity_iint_cache *iint, int func) } } +void ima_get_hash_algo(struct evm_ima_xattr_data *xattr_value, int xattr_len, + struct ima_digest_data *hash) +{ + struct signature_v2_hdr *sig; + + if (!xattr_value || xattr_len < 2) + return; + + switch (xattr_value->type) { + case EVM_IMA_XATTR_DIGSIG: + sig = (typeof(sig))xattr_value; + if (sig->version != 2 || xattr_len <= sizeof(*sig)) + return; + hash->algo = sig->hash_algo; + break; + case IMA_XATTR_DIGEST_NG: + hash->algo = xattr_value->digest[0]; + break; + case IMA_XATTR_DIGEST: + /* this is for backward compatibility */ + if (xattr_len == 21) { + unsigned int zero = 0; + if (!memcmp(&xattr_value->digest[16], &zero, 4)) + hash->algo = HASH_ALGO_MD5; + else + hash->algo = HASH_ALGO_SHA1; + } else if (xattr_len == 17) + hash->algo = HASH_ALGO_MD5; + break; + } +} + +int ima_read_xattr(struct dentry *dentry, + struct evm_ima_xattr_data **xattr_value) +{ + struct inode *inode = dentry->d_inode; + + if (!inode->i_op->getxattr) + return 0; + + return vfs_getxattr_alloc(dentry, XATTR_NAME_IMA, (char **)xattr_value, + 0, GFP_NOFS); +} + /* * ima_appraise_measurement - appraise file measurement * @@ -116,23 +173,22 @@ static void ima_cache_flags(struct integrity_iint_cache *iint, int func) * Return 0 on success, error code otherwise */ int ima_appraise_measurement(int func, struct integrity_iint_cache *iint, - struct file *file, const unsigned char *filename) + struct file *file, const unsigned char *filename, + struct evm_ima_xattr_data *xattr_value, + int xattr_len) { struct dentry *dentry = file->f_dentry; struct inode *inode = dentry->d_inode; - struct evm_ima_xattr_data *xattr_value = NULL; enum integrity_status status = INTEGRITY_UNKNOWN; const char *op = "appraise_data"; char *cause = "unknown"; - int rc; + int rc = xattr_len, hash_start = 0; if (!ima_appraise) return 0; if (!inode->i_op->getxattr) return INTEGRITY_UNKNOWN; - rc = vfs_getxattr_alloc(dentry, XATTR_NAME_IMA, (char **)&xattr_value, - 0, GFP_NOFS); if (rc <= 0) { if (rc && rc != -ENODATA) goto out; @@ -153,14 +209,25 @@ int ima_appraise_measurement(int func, struct integrity_iint_cache *iint, goto out; } switch (xattr_value->type) { + case IMA_XATTR_DIGEST_NG: + /* first byte contains algorithm id */ + hash_start = 1; case IMA_XATTR_DIGEST: if (iint->flags & IMA_DIGSIG_REQUIRED) { cause = "IMA signature required"; status = INTEGRITY_FAIL; break; } - rc = memcmp(xattr_value->digest, iint->ima_xattr.digest, - IMA_DIGEST_SIZE); + if (xattr_len - sizeof(xattr_value->type) - hash_start >= + iint->ima_hash->length) + /* xattr length may be longer. md5 hash in previous + version occupied 20 bytes in xattr, instead of 16 + */ + rc = memcmp(&xattr_value->digest[hash_start], + iint->ima_hash->digest, + iint->ima_hash->length); + else + rc = -EINVAL; if (rc) { cause = "invalid-hash"; status = INTEGRITY_FAIL; @@ -171,9 +238,9 @@ int ima_appraise_measurement(int func, struct integrity_iint_cache *iint, case EVM_IMA_XATTR_DIGSIG: iint->flags |= IMA_DIGSIG; rc = integrity_digsig_verify(INTEGRITY_KEYRING_IMA, - xattr_value->digest, rc - 1, - iint->ima_xattr.digest, - IMA_DIGEST_SIZE); + (const char *)xattr_value, rc, + iint->ima_hash->digest, + iint->ima_hash->length); if (rc == -EOPNOTSUPP) { status = INTEGRITY_UNKNOWN; } else if (rc) { @@ -203,7 +270,6 @@ out: ima_cache_flags(iint, func); } ima_set_cache_status(iint, func, status); - kfree(xattr_value); return status; } @@ -219,7 +285,7 @@ void ima_update_xattr(struct integrity_iint_cache *iint, struct file *file) if (iint->flags & IMA_DIGSIG) return; - rc = ima_collect_measurement(iint, file); + rc = ima_collect_measurement(iint, file, NULL, NULL); if (rc < 0) return; @@ -315,3 +381,14 @@ int ima_inode_removexattr(struct dentry *dentry, const char *xattr_name) } return result; } + +#ifdef CONFIG_IMA_TRUSTED_KEYRING +static int __init init_ima_keyring(void) +{ + int ret; + + ret = integrity_init_keyring(INTEGRITY_KEYRING_IMA); + return 0; +} +late_initcall(init_ima_keyring); +#endif diff --git a/security/integrity/ima/ima_crypto.c b/security/integrity/ima/ima_crypto.c index a02e079..676e029 100644 --- a/security/integrity/ima/ima_crypto.c +++ b/security/integrity/ima/ima_crypto.c @@ -20,6 +20,7 @@ #include <linux/err.h> #include <linux/slab.h> #include <crypto/hash.h> +#include <crypto/hash_info.h> #include "ima.h" static struct crypto_shash *ima_shash_tfm; @@ -28,31 +29,58 @@ int ima_init_crypto(void) { long rc; - ima_shash_tfm = crypto_alloc_shash(ima_hash, 0, 0); + ima_shash_tfm = crypto_alloc_shash(hash_algo_name[ima_hash_algo], 0, 0); if (IS_ERR(ima_shash_tfm)) { rc = PTR_ERR(ima_shash_tfm); - pr_err("Can not allocate %s (reason: %ld)\n", ima_hash, rc); + pr_err("Can not allocate %s (reason: %ld)\n", + hash_algo_name[ima_hash_algo], rc); return rc; } return 0; } +static struct crypto_shash *ima_alloc_tfm(enum hash_algo algo) +{ + struct crypto_shash *tfm = ima_shash_tfm; + int rc; + + if (algo != ima_hash_algo && algo < HASH_ALGO__LAST) { + tfm = crypto_alloc_shash(hash_algo_name[algo], 0, 0); + if (IS_ERR(tfm)) { + rc = PTR_ERR(tfm); + pr_err("Can not allocate %s (reason: %d)\n", + hash_algo_name[algo], rc); + } + } + return tfm; +} + +static void ima_free_tfm(struct crypto_shash *tfm) +{ + if (tfm != ima_shash_tfm) + crypto_free_shash(tfm); +} + /* * Calculate the MD5/SHA1 file digest */ -int ima_calc_file_hash(struct file *file, char *digest) +static int ima_calc_file_hash_tfm(struct file *file, + struct ima_digest_data *hash, + struct crypto_shash *tfm) { loff_t i_size, offset = 0; char *rbuf; int rc, read = 0; struct { struct shash_desc shash; - char ctx[crypto_shash_descsize(ima_shash_tfm)]; + char ctx[crypto_shash_descsize(tfm)]; } desc; - desc.shash.tfm = ima_shash_tfm; + desc.shash.tfm = tfm; desc.shash.flags = 0; + hash->length = crypto_shash_digestsize(tfm); + rc = crypto_shash_init(&desc.shash); if (rc != 0) return rc; @@ -85,27 +113,83 @@ int ima_calc_file_hash(struct file *file, char *digest) } kfree(rbuf); if (!rc) - rc = crypto_shash_final(&desc.shash, digest); + rc = crypto_shash_final(&desc.shash, hash->digest); if (read) file->f_mode &= ~FMODE_READ; out: return rc; } +int ima_calc_file_hash(struct file *file, struct ima_digest_data *hash) +{ + struct crypto_shash *tfm; + int rc; + + tfm = ima_alloc_tfm(hash->algo); + if (IS_ERR(tfm)) + return PTR_ERR(tfm); + + rc = ima_calc_file_hash_tfm(file, hash, tfm); + + ima_free_tfm(tfm); + + return rc; +} + /* - * Calculate the hash of a given buffer + * Calculate the hash of template data */ -int ima_calc_buffer_hash(const void *data, int len, char *digest) +static int ima_calc_field_array_hash_tfm(struct ima_field_data *field_data, + int num_fields, + struct ima_digest_data *hash, + struct crypto_shash *tfm) { struct { struct shash_desc shash; - char ctx[crypto_shash_descsize(ima_shash_tfm)]; + char ctx[crypto_shash_descsize(tfm)]; } desc; + int rc, i; - desc.shash.tfm = ima_shash_tfm; + desc.shash.tfm = tfm; desc.shash.flags = 0; - return crypto_shash_digest(&desc.shash, data, len, digest); + hash->length = crypto_shash_digestsize(tfm); + + rc = crypto_shash_init(&desc.shash); + if (rc != 0) + return rc; + + for (i = 0; i < num_fields; i++) { + rc = crypto_shash_update(&desc.shash, + (const u8 *) &field_data[i].len, + sizeof(field_data[i].len)); + rc = crypto_shash_update(&desc.shash, field_data[i].data, + field_data[i].len); + if (rc) + break; + } + + if (!rc) + rc = crypto_shash_final(&desc.shash, hash->digest); + + return rc; +} + +int ima_calc_field_array_hash(struct ima_field_data *field_data, int num_fields, + struct ima_digest_data *hash) +{ + struct crypto_shash *tfm; + int rc; + + tfm = ima_alloc_tfm(hash->algo); + if (IS_ERR(tfm)) + return PTR_ERR(tfm); + + rc = ima_calc_field_array_hash_tfm(field_data, num_fields, hash, tfm); + + ima_free_tfm(tfm); + + return rc; } static void __init ima_pcrread(int idx, u8 *pcr) @@ -120,16 +204,17 @@ static void __init ima_pcrread(int idx, u8 *pcr) /* * Calculate the boot aggregate hash */ -int __init ima_calc_boot_aggregate(char *digest) +static int __init ima_calc_boot_aggregate_tfm(char *digest, + struct crypto_shash *tfm) { - u8 pcr_i[IMA_DIGEST_SIZE]; + u8 pcr_i[TPM_DIGEST_SIZE]; int rc, i; struct { struct shash_desc shash; - char ctx[crypto_shash_descsize(ima_shash_tfm)]; + char ctx[crypto_shash_descsize(tfm)]; } desc; - desc.shash.tfm = ima_shash_tfm; + desc.shash.tfm = tfm; desc.shash.flags = 0; rc = crypto_shash_init(&desc.shash); @@ -140,9 +225,26 @@ int __init ima_calc_boot_aggregate(char *digest) for (i = TPM_PCR0; i < TPM_PCR8; i++) { ima_pcrread(i, pcr_i); /* now accumulate with current aggregate */ - rc = crypto_shash_update(&desc.shash, pcr_i, IMA_DIGEST_SIZE); + rc = crypto_shash_update(&desc.shash, pcr_i, TPM_DIGEST_SIZE); } if (!rc) crypto_shash_final(&desc.shash, digest); return rc; } + +int __init ima_calc_boot_aggregate(struct ima_digest_data *hash) +{ + struct crypto_shash *tfm; + int rc; + + tfm = ima_alloc_tfm(hash->algo); + if (IS_ERR(tfm)) + return PTR_ERR(tfm); + + hash->length = crypto_shash_digestsize(tfm); + rc = ima_calc_boot_aggregate_tfm(hash->digest, tfm); + + ima_free_tfm(tfm); + + return rc; +} diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c index 38477c9..d47a7c8 100644 --- a/security/integrity/ima/ima_fs.c +++ b/security/integrity/ima/ima_fs.c @@ -88,8 +88,7 @@ static void *ima_measurements_next(struct seq_file *m, void *v, loff_t *pos) * against concurrent list-extension */ rcu_read_lock(); - qe = list_entry_rcu(qe->later.next, - struct ima_queue_entry, later); + qe = list_entry_rcu(qe->later.next, struct ima_queue_entry, later); rcu_read_unlock(); (*pos)++; @@ -100,7 +99,7 @@ static void ima_measurements_stop(struct seq_file *m, void *v) { } -static void ima_putc(struct seq_file *m, void *data, int datalen) +void ima_putc(struct seq_file *m, void *data, int datalen) { while (datalen--) seq_putc(m, *(char *)data++); @@ -111,6 +110,7 @@ static void ima_putc(struct seq_file *m, void *data, int datalen) * char[20]=template digest * 32bit-le=template name size * char[n]=template name + * [eventdata length] * eventdata[n]=template specific data */ static int ima_measurements_show(struct seq_file *m, void *v) @@ -120,6 +120,7 @@ static int ima_measurements_show(struct seq_file *m, void *v) struct ima_template_entry *e; int namelen; u32 pcr = CONFIG_IMA_MEASURE_PCR_IDX; + int i; /* get entry */ e = qe->entry; @@ -134,18 +135,25 @@ static int ima_measurements_show(struct seq_file *m, void *v) ima_putc(m, &pcr, sizeof pcr); /* 2nd: template digest */ - ima_putc(m, e->digest, IMA_DIGEST_SIZE); + ima_putc(m, e->digest, TPM_DIGEST_SIZE); /* 3rd: template name size */ - namelen = strlen(e->template_name); + namelen = strlen(e->template_desc->name); ima_putc(m, &namelen, sizeof namelen); /* 4th: template name */ - ima_putc(m, (void *)e->template_name, namelen); + ima_putc(m, e->template_desc->name, namelen); + + /* 5th: template length (except for 'ima' template) */ + if (strcmp(e->template_desc->name, IMA_TEMPLATE_IMA_NAME) != 0) + ima_putc(m, &e->template_data_len, + sizeof(e->template_data_len)); - /* 5th: template specific data */ - ima_template_show(m, (struct ima_template_data *)&e->template, - IMA_SHOW_BINARY); + /* 6th: template specific data */ + for (i = 0; i < e->template_desc->num_fields; i++) { + e->template_desc->fields[i]->field_show(m, IMA_SHOW_BINARY, + &e->template_data[i]); + } return 0; } @@ -168,41 +176,21 @@ static const struct file_operations ima_measurements_ops = { .release = seq_release, }; -static void ima_print_digest(struct seq_file *m, u8 *digest) +void ima_print_digest(struct seq_file *m, u8 *digest, int size) { int i; - for (i = 0; i < IMA_DIGEST_SIZE; i++) + for (i = 0; i < size; i++) seq_printf(m, "%02x", *(digest + i)); } -void ima_template_show(struct seq_file *m, void *e, enum ima_show_type show) -{ - struct ima_template_data *entry = e; - int namelen; - - switch (show) { - case IMA_SHOW_ASCII: - ima_print_digest(m, entry->digest); - seq_printf(m, " %s\n", entry->file_name); - break; - case IMA_SHOW_BINARY: - ima_putc(m, entry->digest, IMA_DIGEST_SIZE); - - namelen = strlen(entry->file_name); - ima_putc(m, &namelen, sizeof namelen); - ima_putc(m, entry->file_name, namelen); - default: - break; - } -} - /* print in ascii */ static int ima_ascii_measurements_show(struct seq_file *m, void *v) { /* the list never shrinks, so we don't need a lock here */ struct ima_queue_entry *qe = v; struct ima_template_entry *e; + int i; /* get entry */ e = qe->entry; @@ -213,14 +201,21 @@ static int ima_ascii_measurements_show(struct seq_file *m, void *v) seq_printf(m, "%2d ", CONFIG_IMA_MEASURE_PCR_IDX); /* 2nd: SHA1 template hash */ - ima_print_digest(m, e->digest); + ima_print_digest(m, e->digest, TPM_DIGEST_SIZE); /* 3th: template name */ - seq_printf(m, " %s ", e->template_name); + seq_printf(m, " %s", e->template_desc->name); /* 4th: template specific data */ - ima_template_show(m, (struct ima_template_data *)&e->template, - IMA_SHOW_ASCII); + for (i = 0; i < e->template_desc->num_fields; i++) { + seq_puts(m, " "); + if (e->template_data[i].len == 0) + continue; + + e->template_desc->fields[i]->field_show(m, IMA_SHOW_ASCII, + &e->template_data[i]); + } + seq_puts(m, "\n"); return 0; } diff --git a/security/integrity/ima/ima_init.c b/security/integrity/ima/ima_init.c index 162ea72..15f34bd 100644 --- a/security/integrity/ima/ima_init.c +++ b/security/integrity/ima/ima_init.c @@ -18,6 +18,7 @@ #include <linux/scatterlist.h> #include <linux/slab.h> #include <linux/err.h> +#include <crypto/hash_info.h> #include "ima.h" /* name for boot aggregate entry */ @@ -42,28 +43,38 @@ int ima_used_chip; static void __init ima_add_boot_aggregate(void) { struct ima_template_entry *entry; + struct integrity_iint_cache tmp_iint, *iint = &tmp_iint; const char *op = "add_boot_aggregate"; const char *audit_cause = "ENOMEM"; int result = -ENOMEM; - int violation = 1; + int violation = 0; + struct { + struct ima_digest_data hdr; + char digest[TPM_DIGEST_SIZE]; + } hash; - entry = kmalloc(sizeof(*entry), GFP_KERNEL); - if (!entry) - goto err_out; + memset(iint, 0, sizeof(*iint)); + memset(&hash, 0, sizeof(hash)); + iint->ima_hash = &hash.hdr; + iint->ima_hash->algo = HASH_ALGO_SHA1; + iint->ima_hash->length = SHA1_DIGEST_SIZE; - memset(&entry->template, 0, sizeof(entry->template)); - strncpy(entry->template.file_name, boot_aggregate_name, - IMA_EVENT_NAME_LEN_MAX); if (ima_used_chip) { - violation = 0; - result = ima_calc_boot_aggregate(entry->template.digest); + result = ima_calc_boot_aggregate(&hash.hdr); if (result < 0) { audit_cause = "hashing_error"; kfree(entry); goto err_out; } } - result = ima_store_template(entry, violation, NULL); + + result = ima_alloc_init_template(iint, NULL, boot_aggregate_name, + NULL, 0, &entry); + if (result < 0) + return; + + result = ima_store_template(entry, violation, NULL, + boot_aggregate_name); if (result < 0) kfree(entry); return; @@ -74,7 +85,7 @@ err_out: int __init ima_init(void) { - u8 pcr_i[IMA_DIGEST_SIZE]; + u8 pcr_i[TPM_DIGEST_SIZE]; int rc; ima_used_chip = 0; @@ -88,6 +99,10 @@ int __init ima_init(void) rc = ima_init_crypto(); if (rc) return rc; + rc = ima_init_template(); + if (rc != 0) + return rc; + ima_add_boot_aggregate(); /* boot aggregate must be first entry */ ima_init_policy(); diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c index e9508d5..149ee11 100644 --- a/security/integrity/ima/ima_main.c +++ b/security/integrity/ima/ima_main.c @@ -24,6 +24,7 @@ #include <linux/slab.h> #include <linux/xattr.h> #include <linux/ima.h> +#include <crypto/hash_info.h> #include "ima.h" @@ -35,11 +36,33 @@ int ima_appraise = IMA_APPRAISE_ENFORCE; int ima_appraise; #endif -char *ima_hash = "sha1"; +int ima_hash_algo = HASH_ALGO_SHA1; +static int hash_setup_done; + static int __init hash_setup(char *str) { - if (strncmp(str, "md5", 3) == 0) - ima_hash = "md5"; + struct ima_template_desc *template_desc = ima_template_desc_current(); + int i; + + if (hash_setup_done) + return 1; + + if (strcmp(template_desc->name, IMA_TEMPLATE_IMA_NAME) == 0) { + if (strncmp(str, "sha1", 4) == 0) + ima_hash_algo = HASH_ALGO_SHA1; + else if (strncmp(str, "md5", 3) == 0) + ima_hash_algo = HASH_ALGO_MD5; + goto out; + } + + for (i = 0; i < HASH_ALGO__LAST; i++) { + if (strcmp(str, hash_algo_name[i]) == 0) { + ima_hash_algo = i; + break; + } + } +out: + hash_setup_done = 1; return 1; } __setup("ima_hash=", hash_setup); @@ -92,10 +115,9 @@ out: pathname = dentry->d_name.name; if (send_tomtou) - ima_add_violation(inode, pathname, - "invalid_pcr", "ToMToU"); + ima_add_violation(file, pathname, "invalid_pcr", "ToMToU"); if (send_writers) - ima_add_violation(inode, pathname, + ima_add_violation(file, pathname, "invalid_pcr", "open_writers"); kfree(pathbuf); } @@ -144,9 +166,12 @@ static int process_measurement(struct file *file, const char *filename, { struct inode *inode = file_inode(file); struct integrity_iint_cache *iint; + struct ima_template_desc *template_desc = ima_template_desc_current(); char *pathbuf = NULL; const char *pathname = NULL; int rc = -ENOMEM, action, must_appraise, _func; + struct evm_ima_xattr_data *xattr_value = NULL, **xattr_ptr = NULL; + int xattr_len = 0; if (!ima_initialized || !S_ISREG(inode->i_mode)) return 0; @@ -185,7 +210,13 @@ static int process_measurement(struct file *file, const char *filename, goto out_digsig; } - rc = ima_collect_measurement(iint, file); + if (strcmp(template_desc->name, IMA_TEMPLATE_IMA_NAME) == 0) { + if (action & IMA_APPRAISE_SUBMASK) + xattr_ptr = &xattr_value; + } else + xattr_ptr = &xattr_value; + + rc = ima_collect_measurement(iint, file, xattr_ptr, &xattr_len); if (rc != 0) goto out_digsig; @@ -194,9 +225,11 @@ static int process_measurement(struct file *file, const char *filename, pathname = (const char *)file->f_dentry->d_name.name; if (action & IMA_MEASURE) - ima_store_measurement(iint, file, pathname); + ima_store_measurement(iint, file, pathname, + xattr_value, xattr_len); if (action & IMA_APPRAISE_SUBMASK) - rc = ima_appraise_measurement(_func, iint, file, pathname); + rc = ima_appraise_measurement(_func, iint, file, pathname, + xattr_value, xattr_len); if (action & IMA_AUDIT) ima_audit_measurement(iint, pathname); kfree(pathbuf); @@ -205,6 +238,7 @@ out_digsig: rc = -EACCES; out: mutex_unlock(&inode->i_mutex); + kfree(xattr_value); if ((rc && must_appraise) && (ima_appraise & IMA_APPRAISE_ENFORCE)) return -EACCES; return 0; @@ -244,9 +278,9 @@ int ima_file_mmap(struct file *file, unsigned long prot) int ima_bprm_check(struct linux_binprm *bprm) { return process_measurement(bprm->file, - (strcmp(bprm->filename, bprm->interp) == 0) ? - bprm->filename : bprm->interp, - MAY_EXEC, BPRM_CHECK); + (strcmp(bprm->filename, bprm->interp) == 0) ? + bprm->filename : bprm->interp, + MAY_EXEC, BPRM_CHECK); } /** @@ -263,8 +297,8 @@ int ima_file_check(struct file *file, int mask) { ima_rdwr_violation_check(file); return process_measurement(file, NULL, - mask & (MAY_READ | MAY_WRITE | MAY_EXEC), - FILE_CHECK); + mask & (MAY_READ | MAY_WRITE | MAY_EXEC), + FILE_CHECK); } EXPORT_SYMBOL_GPL(ima_file_check); @@ -294,6 +328,7 @@ static int __init init_ima(void) { int error; + hash_setup(CONFIG_IMA_DEFAULT_HASH); error = ima_init(); if (!error) ima_initialized = 1; diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c index 399433a..a9c3d3c 100644 --- a/security/integrity/ima/ima_policy.c +++ b/security/integrity/ima/ima_policy.c @@ -73,7 +73,6 @@ static struct ima_rule_entry default_rules[] = { {.action = DONT_MEASURE,.fsmagic = SYSFS_MAGIC,.flags = IMA_FSMAGIC}, {.action = DONT_MEASURE,.fsmagic = DEBUGFS_MAGIC,.flags = IMA_FSMAGIC}, {.action = DONT_MEASURE,.fsmagic = TMPFS_MAGIC,.flags = IMA_FSMAGIC}, - {.action = DONT_MEASURE,.fsmagic = RAMFS_MAGIC,.flags = IMA_FSMAGIC}, {.action = DONT_MEASURE,.fsmagic = DEVPTS_SUPER_MAGIC,.flags = IMA_FSMAGIC}, {.action = DONT_MEASURE,.fsmagic = BINFMTFS_MAGIC,.flags = IMA_FSMAGIC}, {.action = DONT_MEASURE,.fsmagic = SECURITYFS_MAGIC,.flags = IMA_FSMAGIC}, diff --git a/security/integrity/ima/ima_queue.c b/security/integrity/ima/ima_queue.c index ff63fe0..d85e997 100644 --- a/security/integrity/ima/ima_queue.c +++ b/security/integrity/ima/ima_queue.c @@ -50,7 +50,7 @@ static struct ima_queue_entry *ima_lookup_digest_entry(u8 *digest_value) key = ima_hash_key(digest_value); rcu_read_lock(); hlist_for_each_entry_rcu(qe, &ima_htable.queue[key], hnext) { - rc = memcmp(qe->entry->digest, digest_value, IMA_DIGEST_SIZE); + rc = memcmp(qe->entry->digest, digest_value, TPM_DIGEST_SIZE); if (rc == 0) { ret = qe; break; @@ -104,9 +104,10 @@ static int ima_pcr_extend(const u8 *hash) * and extend the pcr. */ int ima_add_template_entry(struct ima_template_entry *entry, int violation, - const char *op, struct inode *inode) + const char *op, struct inode *inode, + const unsigned char *filename) { - u8 digest[IMA_DIGEST_SIZE]; + u8 digest[TPM_DIGEST_SIZE]; const char *audit_cause = "hash_added"; char tpm_audit_cause[AUDIT_CAUSE_LEN_MAX]; int audit_info = 1; @@ -141,8 +142,7 @@ int ima_add_template_entry(struct ima_template_entry *entry, int violation, } out: mutex_unlock(&ima_extend_list_mutex); - integrity_audit_msg(AUDIT_INTEGRITY_PCR, inode, - entry->template.file_name, + integrity_audit_msg(AUDIT_INTEGRITY_PCR, inode, filename, op, audit_cause, result, audit_info); return result; } diff --git a/security/integrity/ima/ima_template.c b/security/integrity/ima/ima_template.c new file mode 100644 index 0000000..4e5da99 --- /dev/null +++ b/security/integrity/ima/ima_template.c @@ -0,0 +1,178 @@ +/* + * Copyright (C) 2013 Politecnico di Torino, Italy + * TORSEC group -- http://security.polito.it + * + * Author: Roberto Sassu <roberto.sassu@polito.it> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation, version 2 of the + * License. + * + * File: ima_template.c + * Helpers to manage template descriptors. + */ +#include <crypto/hash_info.h> + +#include "ima.h" +#include "ima_template_lib.h" + +static struct ima_template_desc defined_templates[] = { + {.name = IMA_TEMPLATE_IMA_NAME, .fmt = IMA_TEMPLATE_IMA_FMT}, + {.name = "ima-ng",.fmt = "d-ng|n-ng"}, + {.name = "ima-sig",.fmt = "d-ng|n-ng|sig"}, +}; + +static struct ima_template_field supported_fields[] = { + {.field_id = "d",.field_init = ima_eventdigest_init, + .field_show = ima_show_template_digest}, + {.field_id = "n",.field_init = ima_eventname_init, + .field_show = ima_show_template_string}, + {.field_id = "d-ng",.field_init = ima_eventdigest_ng_init, + .field_show = ima_show_template_digest_ng}, + {.field_id = "n-ng",.field_init = ima_eventname_ng_init, + .field_show = ima_show_template_string}, + {.field_id = "sig",.field_init = ima_eventsig_init, + .field_show = ima_show_template_sig}, +}; + +static struct ima_template_desc *ima_template; +static struct ima_template_desc *lookup_template_desc(const char *name); + +static int __init ima_template_setup(char *str) +{ + struct ima_template_desc *template_desc; + int template_len = strlen(str); + + /* + * Verify that a template with the supplied name exists. + * If not, use CONFIG_IMA_DEFAULT_TEMPLATE. + */ + template_desc = lookup_template_desc(str); + if (!template_desc) + return 1; + + /* + * Verify whether the current hash algorithm is supported + * by the 'ima' template. + */ + if (template_len == 3 && strcmp(str, IMA_TEMPLATE_IMA_NAME) == 0 && + ima_hash_algo != HASH_ALGO_SHA1 && ima_hash_algo != HASH_ALGO_MD5) { + pr_err("IMA: template does not support hash alg\n"); + return 1; + } + + ima_template = template_desc; + return 1; +} +__setup("ima_template=", ima_template_setup); + +static struct ima_template_desc *lookup_template_desc(const char *name) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(defined_templates); i++) { + if (strcmp(defined_templates[i].name, name) == 0) + return defined_templates + i; + } + + return NULL; +} + +static struct ima_template_field *lookup_template_field(const char *field_id) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(supported_fields); i++) + if (strncmp(supported_fields[i].field_id, field_id, + IMA_TEMPLATE_FIELD_ID_MAX_LEN) == 0) + return &supported_fields[i]; + return NULL; +} + +static int template_fmt_size(char *template_fmt) +{ + char c; + int template_fmt_len = strlen(template_fmt); + int i = 0, j = 0; + + while (i < template_fmt_len) { + c = template_fmt[i]; + if (c == '|') + j++; + i++; + } + + return j + 1; +} + +static int template_desc_init_fields(char *template_fmt, + struct ima_template_field ***fields, + int *num_fields) +{ + char *c, *template_fmt_ptr = template_fmt; + int template_num_fields = template_fmt_size(template_fmt); + int i, result = 0; + + if (template_num_fields > IMA_TEMPLATE_NUM_FIELDS_MAX) + return -EINVAL; + + *fields = kzalloc(template_num_fields * sizeof(*fields), GFP_KERNEL); + if (*fields == NULL) { + result = -ENOMEM; + goto out; + } + for (i = 0; (c = strsep(&template_fmt_ptr, "|")) != NULL && + i < template_num_fields; i++) { + struct ima_template_field *f = lookup_template_field(c); + + if (!f) { + result = -ENOENT; + goto out; + } + (*fields)[i] = f; + } + *num_fields = i; + return 0; +out: + kfree(*fields); + *fields = NULL; + return result; +} + +static int init_defined_templates(void) +{ + int i = 0; + int result = 0; + + /* Init defined templates. */ + for (i = 0; i < ARRAY_SIZE(defined_templates); i++) { + struct ima_template_desc *template = &defined_templates[i]; + + result = template_desc_init_fields(template->fmt, + &(template->fields), + &(template->num_fields)); + if (result < 0) + return result; + } + return result; +} + +struct ima_template_desc *ima_template_desc_current(void) +{ + if (!ima_template) + ima_template = + lookup_template_desc(CONFIG_IMA_DEFAULT_TEMPLATE); + return ima_template; +} + +int ima_init_template(void) +{ + int result; + + result = init_defined_templates(); + if (result < 0) + return result; + + return 0; +} diff --git a/security/integrity/ima/ima_template_lib.c b/security/integrity/ima/ima_template_lib.c new file mode 100644 index 0000000..6d66ad6 --- /dev/null +++ b/security/integrity/ima/ima_template_lib.c @@ -0,0 +1,347 @@ +/* + * Copyright (C) 2013 Politecnico di Torino, Italy + * TORSEC group -- http://security.polito.it + * + * Author: Roberto Sassu <roberto.sassu@polito.it> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation, version 2 of the + * License. + * + * File: ima_template_lib.c + * Library of supported template fields. + */ +#include <crypto/hash_info.h> + +#include "ima_template_lib.h" + +static bool ima_template_hash_algo_allowed(u8 algo) +{ + if (algo == HASH_ALGO_SHA1 || algo == HASH_ALGO_MD5) + return true; + + return false; +} + +enum data_formats { + DATA_FMT_DIGEST = 0, + DATA_FMT_DIGEST_WITH_ALGO, + DATA_FMT_EVENT_NAME, + DATA_FMT_STRING, + DATA_FMT_HEX +}; + +static int ima_write_template_field_data(const void *data, const u32 datalen, + enum data_formats datafmt, + struct ima_field_data *field_data) +{ + u8 *buf, *buf_ptr; + u32 buflen; + + switch (datafmt) { + case DATA_FMT_EVENT_NAME: + buflen = IMA_EVENT_NAME_LEN_MAX + 1; + break; + case DATA_FMT_STRING: + buflen = datalen + 1; + break; + default: + buflen = datalen; + } + + buf = kzalloc(buflen, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + memcpy(buf, data, datalen); + + /* + * Replace all space characters with underscore for event names and + * strings. This avoid that, during the parsing of a measurements list, + * filenames with spaces or that end with the suffix ' (deleted)' are + * split into multiple template fields (the space is the delimitator + * character for measurements lists in ASCII format). + */ + if (datafmt == DATA_FMT_EVENT_NAME || datafmt == DATA_FMT_STRING) { + for (buf_ptr = buf; buf_ptr - buf < datalen; buf_ptr++) + if (*buf_ptr == ' ') + *buf_ptr = '_'; + } + + field_data->data = buf; + field_data->len = buflen; + return 0; +} + +static void ima_show_template_data_ascii(struct seq_file *m, + enum ima_show_type show, + enum data_formats datafmt, + struct ima_field_data *field_data) +{ + u8 *buf_ptr = field_data->data, buflen = field_data->len; + + switch (datafmt) { + case DATA_FMT_DIGEST_WITH_ALGO: + buf_ptr = strnchr(field_data->data, buflen, ':'); + if (buf_ptr != field_data->data) + seq_printf(m, "%s", field_data->data); + + /* skip ':' and '\0' */ + buf_ptr += 2; + buflen -= buf_ptr - field_data->data; + case DATA_FMT_DIGEST: + case DATA_FMT_HEX: + if (!buflen) + break; + ima_print_digest(m, buf_ptr, buflen); + break; + case DATA_FMT_STRING: + seq_printf(m, "%s", buf_ptr); + break; + default: + break; + } +} + +static void ima_show_template_data_binary(struct seq_file *m, + enum ima_show_type show, + enum data_formats datafmt, + struct ima_field_data *field_data) +{ + ima_putc(m, &field_data->len, sizeof(u32)); + if (!field_data->len) + return; + ima_putc(m, field_data->data, field_data->len); +} + +static void ima_show_template_field_data(struct seq_file *m, + enum ima_show_type show, + enum data_formats datafmt, + struct ima_field_data *field_data) +{ + switch (show) { + case IMA_SHOW_ASCII: + ima_show_template_data_ascii(m, show, datafmt, field_data); + break; + case IMA_SHOW_BINARY: + ima_show_template_data_binary(m, show, datafmt, field_data); + break; + default: + break; + } +} + +void ima_show_template_digest(struct seq_file *m, enum ima_show_type show, + struct ima_field_data *field_data) +{ + ima_show_template_field_data(m, show, DATA_FMT_DIGEST, field_data); +} + +void ima_show_template_digest_ng(struct seq_file *m, enum ima_show_type show, + struct ima_field_data *field_data) +{ + ima_show_template_field_data(m, show, DATA_FMT_DIGEST_WITH_ALGO, + field_data); +} + +void ima_show_template_string(struct seq_file *m, enum ima_show_type show, + struct ima_field_data *field_data) +{ + ima_show_template_field_data(m, show, DATA_FMT_STRING, field_data); +} + +void ima_show_template_sig(struct seq_file *m, enum ima_show_type show, + struct ima_field_data *field_data) +{ + ima_show_template_field_data(m, show, DATA_FMT_HEX, field_data); +} + +static int ima_eventdigest_init_common(u8 *digest, u32 digestsize, u8 hash_algo, + struct ima_field_data *field_data, + bool size_limit) +{ + /* + * digest formats: + * - DATA_FMT_DIGEST: digest + * - DATA_FMT_DIGEST_WITH_ALGO: [<hash algo>] + ':' + '\0' + digest, + * where <hash algo> is provided if the hash algoritm is not + * SHA1 or MD5 + */ + u8 buffer[CRYPTO_MAX_ALG_NAME + 2 + IMA_MAX_DIGEST_SIZE] = { 0 }; + enum data_formats fmt = DATA_FMT_DIGEST; + u32 offset = 0; + + if (!size_limit) { + fmt = DATA_FMT_DIGEST_WITH_ALGO; + if (hash_algo < HASH_ALGO__LAST) + offset += snprintf(buffer, CRYPTO_MAX_ALG_NAME + 1, + "%s", hash_algo_name[hash_algo]); + buffer[offset] = ':'; + offset += 2; + } + + if (digest) + memcpy(buffer + offset, digest, digestsize); + else + /* + * If digest is NULL, the event being recorded is a violation. + * Make room for the digest by increasing the offset of + * IMA_DIGEST_SIZE. + */ + offset += IMA_DIGEST_SIZE; + + return ima_write_template_field_data(buffer, offset + digestsize, + fmt, field_data); +} + +/* + * This function writes the digest of an event (with size limit). + */ +int ima_eventdigest_init(struct integrity_iint_cache *iint, struct file *file, + const unsigned char *filename, + struct evm_ima_xattr_data *xattr_value, int xattr_len, + struct ima_field_data *field_data) +{ + struct { + struct ima_digest_data hdr; + char digest[IMA_MAX_DIGEST_SIZE]; + } hash; + u8 *cur_digest = NULL; + u32 cur_digestsize = 0; + struct inode *inode; + int result; + + memset(&hash, 0, sizeof(hash)); + + if (!iint) /* recording a violation. */ + goto out; + + if (ima_template_hash_algo_allowed(iint->ima_hash->algo)) { + cur_digest = iint->ima_hash->digest; + cur_digestsize = iint->ima_hash->length; + goto out; + } + + if (!file) /* missing info to re-calculate the digest */ + return -EINVAL; + + inode = file_inode(file); + hash.hdr.algo = ima_template_hash_algo_allowed(ima_hash_algo) ? + ima_hash_algo : HASH_ALGO_SHA1; + result = ima_calc_file_hash(file, &hash.hdr); + if (result) { + integrity_audit_msg(AUDIT_INTEGRITY_DATA, inode, + filename, "collect_data", + "failed", result, 0); + return result; + } + cur_digest = hash.hdr.digest; + cur_digestsize = hash.hdr.length; +out: + return ima_eventdigest_init_common(cur_digest, cur_digestsize, -1, + field_data, true); +} + +/* + * This function writes the digest of an event (without size limit). + */ +int ima_eventdigest_ng_init(struct integrity_iint_cache *iint, + struct file *file, const unsigned char *filename, + struct evm_ima_xattr_data *xattr_value, + int xattr_len, struct ima_field_data *field_data) +{ + u8 *cur_digest = NULL, hash_algo = HASH_ALGO__LAST; + u32 cur_digestsize = 0; + + /* If iint is NULL, we are recording a violation. */ + if (!iint) + goto out; + + cur_digest = iint->ima_hash->digest; + cur_digestsize = iint->ima_hash->length; + + hash_algo = iint->ima_hash->algo; +out: + return ima_eventdigest_init_common(cur_digest, cur_digestsize, + hash_algo, field_data, false); +} + +static int ima_eventname_init_common(struct integrity_iint_cache *iint, + struct file *file, + const unsigned char *filename, + struct ima_field_data *field_data, + bool size_limit) +{ + const char *cur_filename = NULL; + u32 cur_filename_len = 0; + enum data_formats fmt = size_limit ? + DATA_FMT_EVENT_NAME : DATA_FMT_STRING; + + BUG_ON(filename == NULL && file == NULL); + + if (filename) { + cur_filename = filename; + cur_filename_len = strlen(filename); + + if (!size_limit || cur_filename_len <= IMA_EVENT_NAME_LEN_MAX) + goto out; + } + + if (file) { + cur_filename = file->f_dentry->d_name.name; + cur_filename_len = strlen(cur_filename); + } else + /* + * Truncate filename if the latter is too long and + * the file descriptor is not available. + */ + cur_filename_len = IMA_EVENT_NAME_LEN_MAX; +out: + return ima_write_template_field_data(cur_filename, cur_filename_len, + fmt, field_data); +} + +/* + * This function writes the name of an event (with size limit). + */ +int ima_eventname_init(struct integrity_iint_cache *iint, struct file *file, + const unsigned char *filename, + struct evm_ima_xattr_data *xattr_value, int xattr_len, + struct ima_field_data *field_data) +{ + return ima_eventname_init_common(iint, file, filename, + field_data, true); +} + +/* + * This function writes the name of an event (without size limit). + */ +int ima_eventname_ng_init(struct integrity_iint_cache *iint, struct file *file, + const unsigned char *filename, + struct evm_ima_xattr_data *xattr_value, int xattr_len, + struct ima_field_data *field_data) +{ + return ima_eventname_init_common(iint, file, filename, + field_data, false); +} + +/* + * ima_eventsig_init - include the file signature as part of the template data + */ +int ima_eventsig_init(struct integrity_iint_cache *iint, struct file *file, + const unsigned char *filename, + struct evm_ima_xattr_data *xattr_value, int xattr_len, + struct ima_field_data *field_data) +{ + enum data_formats fmt = DATA_FMT_HEX; + int rc = 0; + + if ((!xattr_value) || (xattr_value->type != EVM_IMA_XATTR_DIGSIG)) + goto out; + + rc = ima_write_template_field_data(xattr_value, xattr_len, fmt, + field_data); +out: + return rc; +} diff --git a/security/integrity/ima/ima_template_lib.h b/security/integrity/ima/ima_template_lib.h new file mode 100644 index 0000000..63f6b52 --- /dev/null +++ b/security/integrity/ima/ima_template_lib.h @@ -0,0 +1,49 @@ +/* + * Copyright (C) 2013 Politecnico di Torino, Italy + * TORSEC group -- http://security.polito.it + * + * Author: Roberto Sassu <roberto.sassu@polito.it> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation, version 2 of the + * License. + * + * File: ima_template_lib.h + * Header for the library of supported template fields. + */ +#ifndef __LINUX_IMA_TEMPLATE_LIB_H +#define __LINUX_IMA_TEMPLATE_LIB_H + +#include <linux/seq_file.h> +#include "ima.h" + +void ima_show_template_digest(struct seq_file *m, enum ima_show_type show, + struct ima_field_data *field_data); +void ima_show_template_digest_ng(struct seq_file *m, enum ima_show_type show, + struct ima_field_data *field_data); +void ima_show_template_string(struct seq_file *m, enum ima_show_type show, + struct ima_field_data *field_data); +void ima_show_template_sig(struct seq_file *m, enum ima_show_type show, + struct ima_field_data *field_data); +int ima_eventdigest_init(struct integrity_iint_cache *iint, struct file *file, + const unsigned char *filename, + struct evm_ima_xattr_data *xattr_value, int xattr_len, + struct ima_field_data *field_data); +int ima_eventname_init(struct integrity_iint_cache *iint, struct file *file, + const unsigned char *filename, + struct evm_ima_xattr_data *xattr_value, int xattr_len, + struct ima_field_data *field_data); +int ima_eventdigest_ng_init(struct integrity_iint_cache *iint, + struct file *file, const unsigned char *filename, + struct evm_ima_xattr_data *xattr_value, + int xattr_len, struct ima_field_data *field_data); +int ima_eventname_ng_init(struct integrity_iint_cache *iint, struct file *file, + const unsigned char *filename, + struct evm_ima_xattr_data *xattr_value, int xattr_len, + struct ima_field_data *field_data); +int ima_eventsig_init(struct integrity_iint_cache *iint, struct file *file, + const unsigned char *filename, + struct evm_ima_xattr_data *xattr_value, int xattr_len, + struct ima_field_data *field_data); +#endif /* __LINUX_IMA_TEMPLATE_LIB_H */ diff --git a/security/integrity/integrity.h b/security/integrity/integrity.h index c42fb7a..b9e7c13 100644 --- a/security/integrity/integrity.h +++ b/security/integrity/integrity.h @@ -54,25 +54,57 @@ enum evm_ima_xattr_type { IMA_XATTR_DIGEST = 0x01, EVM_XATTR_HMAC, EVM_IMA_XATTR_DIGSIG, + IMA_XATTR_DIGEST_NG, }; struct evm_ima_xattr_data { u8 type; u8 digest[SHA1_DIGEST_SIZE]; -} __attribute__((packed)); +} __packed; + +#define IMA_MAX_DIGEST_SIZE 64 + +struct ima_digest_data { + u8 algo; + u8 length; + union { + struct { + u8 unused; + u8 type; + } sha1; + struct { + u8 type; + u8 algo; + } ng; + u8 data[2]; + } xattr; + u8 digest[0]; +} __packed; + +/* + * signature format v2 - for using with asymmetric keys + */ +struct signature_v2_hdr { + uint8_t type; /* xattr type */ + uint8_t version; /* signature format version */ + uint8_t hash_algo; /* Digest algorithm [enum pkey_hash_algo] */ + uint32_t keyid; /* IMA key identifier - not X509/PGP specific */ + uint16_t sig_size; /* signature size */ + uint8_t sig[0]; /* signature payload */ +} __packed; /* integrity data associated with an inode */ struct integrity_iint_cache { - struct rb_node rb_node; /* rooted in integrity_iint_tree */ + struct rb_node rb_node; /* rooted in integrity_iint_tree */ struct inode *inode; /* back pointer to inode in question */ u64 version; /* track inode changes */ unsigned long flags; - struct evm_ima_xattr_data ima_xattr; enum integrity_status ima_file_status:4; enum integrity_status ima_mmap_status:4; enum integrity_status ima_bprm_status:4; enum integrity_status ima_module_status:4; enum integrity_status evm_status:4; + struct ima_digest_data *ima_hash; }; /* rbtree tree calls to lookup, insert, delete @@ -89,7 +121,7 @@ struct integrity_iint_cache *integrity_iint_find(struct inode *inode); #ifdef CONFIG_INTEGRITY_SIGNATURE int integrity_digsig_verify(const unsigned int id, const char *sig, int siglen, - const char *digest, int digestlen); + const char *digest, int digestlen); #else @@ -105,12 +137,19 @@ static inline int integrity_digsig_verify(const unsigned int id, #ifdef CONFIG_INTEGRITY_ASYMMETRIC_KEYS int asymmetric_verify(struct key *keyring, const char *sig, int siglen, const char *data, int datalen); + +int integrity_init_keyring(const unsigned int id); #else static inline int asymmetric_verify(struct key *keyring, const char *sig, int siglen, const char *data, int datalen) { return -EOPNOTSUPP; } + +static int integrity_init_keyring(const unsigned int id) +{ + return 0; +} #endif #ifdef CONFIG_INTEGRITY_AUDIT diff --git a/security/keys/Kconfig b/security/keys/Kconfig index a90d6d30..a4f3f8c 100644 --- a/security/keys/Kconfig +++ b/security/keys/Kconfig @@ -4,6 +4,7 @@ config KEYS bool "Enable access key retention support" + select ASSOCIATIVE_ARRAY help This option provides support for retaining authentication tokens and access keys in the kernel. @@ -19,6 +20,34 @@ config KEYS If you are unsure as to whether this is required, answer N. +config PERSISTENT_KEYRINGS + bool "Enable register of persistent per-UID keyrings" + depends on KEYS + help + This option provides a register of persistent per-UID keyrings, + primarily aimed at Kerberos key storage. The keyrings are persistent + in the sense that they stay around after all processes of that UID + have exited, not that they survive the machine being rebooted. + + A particular keyring may be accessed by either the user whose keyring + it is or by a process with administrative privileges. The active + LSMs gets to rule on which admin-level processes get to access the + cache. + + Keyrings are created and added into the register upon demand and get + removed if they expire (a default timeout is set upon creation). + +config BIG_KEYS + bool "Large payload keys" + depends on KEYS + depends on TMPFS + help + This option provides support for holding large keys within the kernel + (for example Kerberos ticket caches). The data may be stored out to + swapspace by tmpfs. + + If you are unsure as to whether this is required, answer N. + config TRUSTED_KEYS tristate "TRUSTED KEYS" depends on KEYS && TCG_TPM diff --git a/security/keys/Makefile b/security/keys/Makefile index 504aaa0..dfb3a7b 100644 --- a/security/keys/Makefile +++ b/security/keys/Makefile @@ -18,9 +18,11 @@ obj-y := \ obj-$(CONFIG_KEYS_COMPAT) += compat.o obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_SYSCTL) += sysctl.o +obj-$(CONFIG_PERSISTENT_KEYRINGS) += persistent.o # # Key types # +obj-$(CONFIG_BIG_KEYS) += big_key.o obj-$(CONFIG_TRUSTED_KEYS) += trusted.o obj-$(CONFIG_ENCRYPTED_KEYS) += encrypted-keys/ diff --git a/security/keys/big_key.c b/security/keys/big_key.c new file mode 100644 index 0000000..7f44c32 --- /dev/null +++ b/security/keys/big_key.c @@ -0,0 +1,207 @@ +/* Large capacity key type + * + * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/seq_file.h> +#include <linux/file.h> +#include <linux/shmem_fs.h> +#include <linux/err.h> +#include <keys/user-type.h> +#include <keys/big_key-type.h> + +MODULE_LICENSE("GPL"); + +/* + * If the data is under this limit, there's no point creating a shm file to + * hold it as the permanently resident metadata for the shmem fs will be at + * least as large as the data. + */ +#define BIG_KEY_FILE_THRESHOLD (sizeof(struct inode) + sizeof(struct dentry)) + +/* + * big_key defined keys take an arbitrary string as the description and an + * arbitrary blob of data as the payload + */ +struct key_type key_type_big_key = { + .name = "big_key", + .def_lookup_type = KEYRING_SEARCH_LOOKUP_DIRECT, + .instantiate = big_key_instantiate, + .match = user_match, + .revoke = big_key_revoke, + .destroy = big_key_destroy, + .describe = big_key_describe, + .read = big_key_read, +}; + +/* + * Instantiate a big key + */ +int big_key_instantiate(struct key *key, struct key_preparsed_payload *prep) +{ + struct path *path = (struct path *)&key->payload.data2; + struct file *file; + ssize_t written; + size_t datalen = prep->datalen; + int ret; + + ret = -EINVAL; + if (datalen <= 0 || datalen > 1024 * 1024 || !prep->data) + goto error; + + /* Set an arbitrary quota */ + ret = key_payload_reserve(key, 16); + if (ret < 0) + goto error; + + key->type_data.x[1] = datalen; + + if (datalen > BIG_KEY_FILE_THRESHOLD) { + /* Create a shmem file to store the data in. This will permit the data + * to be swapped out if needed. + * + * TODO: Encrypt the stored data with a temporary key. + */ + file = shmem_file_setup("", datalen, 0); + if (IS_ERR(file)) { + ret = PTR_ERR(file); + goto err_quota; + } + + written = kernel_write(file, prep->data, prep->datalen, 0); + if (written != datalen) { + ret = written; + if (written >= 0) + ret = -ENOMEM; + goto err_fput; + } + + /* Pin the mount and dentry to the key so that we can open it again + * later + */ + *path = file->f_path; + path_get(path); + fput(file); + } else { + /* Just store the data in a buffer */ + void *data = kmalloc(datalen, GFP_KERNEL); + if (!data) { + ret = -ENOMEM; + goto err_quota; + } + + key->payload.data = memcpy(data, prep->data, prep->datalen); + } + return 0; + +err_fput: + fput(file); +err_quota: + key_payload_reserve(key, 0); +error: + return ret; +} + +/* + * dispose of the links from a revoked keyring + * - called with the key sem write-locked + */ +void big_key_revoke(struct key *key) +{ + struct path *path = (struct path *)&key->payload.data2; + + /* clear the quota */ + key_payload_reserve(key, 0); + if (key_is_instantiated(key) && key->type_data.x[1] > BIG_KEY_FILE_THRESHOLD) + vfs_truncate(path, 0); +} + +/* + * dispose of the data dangling from the corpse of a big_key key + */ +void big_key_destroy(struct key *key) +{ + if (key->type_data.x[1] > BIG_KEY_FILE_THRESHOLD) { + struct path *path = (struct path *)&key->payload.data2; + path_put(path); + path->mnt = NULL; + path->dentry = NULL; + } else { + kfree(key->payload.data); + key->payload.data = NULL; + } +} + +/* + * describe the big_key key + */ +void big_key_describe(const struct key *key, struct seq_file *m) +{ + unsigned long datalen = key->type_data.x[1]; + + seq_puts(m, key->description); + + if (key_is_instantiated(key)) + seq_printf(m, ": %lu [%s]", + datalen, + datalen > BIG_KEY_FILE_THRESHOLD ? "file" : "buff"); +} + +/* + * read the key data + * - the key's semaphore is read-locked + */ +long big_key_read(const struct key *key, char __user *buffer, size_t buflen) +{ + unsigned long datalen = key->type_data.x[1]; + long ret; + + if (!buffer || buflen < datalen) + return datalen; + + if (datalen > BIG_KEY_FILE_THRESHOLD) { + struct path *path = (struct path *)&key->payload.data2; + struct file *file; + loff_t pos; + + file = dentry_open(path, O_RDONLY, current_cred()); + if (IS_ERR(file)) + return PTR_ERR(file); + + pos = 0; + ret = vfs_read(file, buffer, datalen, &pos); + fput(file); + if (ret >= 0 && ret != datalen) + ret = -EIO; + } else { + ret = datalen; + if (copy_to_user(buffer, key->payload.data, datalen) != 0) + ret = -EFAULT; + } + + return ret; +} + +/* + * Module stuff + */ +static int __init big_key_init(void) +{ + return register_key_type(&key_type_big_key); +} + +static void __exit big_key_cleanup(void) +{ + unregister_key_type(&key_type_big_key); +} + +module_init(big_key_init); +module_exit(big_key_cleanup); diff --git a/security/keys/compat.c b/security/keys/compat.c index d65fa7f..bbd32c7 100644 --- a/security/keys/compat.c +++ b/security/keys/compat.c @@ -138,6 +138,9 @@ asmlinkage long compat_sys_keyctl(u32 option, case KEYCTL_INVALIDATE: return keyctl_invalidate_key(arg2); + case KEYCTL_GET_PERSISTENT: + return keyctl_get_persistent(arg2, arg3); + default: return -EOPNOTSUPP; } diff --git a/security/keys/gc.c b/security/keys/gc.c index d67c97b..d3222b6 100644 --- a/security/keys/gc.c +++ b/security/keys/gc.c @@ -131,50 +131,6 @@ void key_gc_keytype(struct key_type *ktype) } /* - * Garbage collect pointers from a keyring. - * - * Not called with any locks held. The keyring's key struct will not be - * deallocated under us as only our caller may deallocate it. - */ -static void key_gc_keyring(struct key *keyring, time_t limit) -{ - struct keyring_list *klist; - int loop; - - kenter("%x", key_serial(keyring)); - - if (keyring->flags & ((1 << KEY_FLAG_INVALIDATED) | - (1 << KEY_FLAG_REVOKED))) - goto dont_gc; - - /* scan the keyring looking for dead keys */ - rcu_read_lock(); - klist = rcu_dereference(keyring->payload.subscriptions); - if (!klist) - goto unlock_dont_gc; - - loop = klist->nkeys; - smp_rmb(); - for (loop--; loop >= 0; loop--) { - struct key *key = rcu_dereference(klist->keys[loop]); - if (key_is_dead(key, limit)) - goto do_gc; - } - -unlock_dont_gc: - rcu_read_unlock(); -dont_gc: - kleave(" [no gc]"); - return; - -do_gc: - rcu_read_unlock(); - - keyring_gc(keyring, limit); - kleave(" [gc]"); -} - -/* * Garbage collect a list of unreferenced, detached keys */ static noinline void key_gc_unused_keys(struct list_head *keys) @@ -392,8 +348,7 @@ found_unreferenced_key: */ found_keyring: spin_unlock(&key_serial_lock); - kdebug("scan keyring %d", key->serial); - key_gc_keyring(key, limit); + keyring_gc(key, limit); goto maybe_resched; /* We found a dead key that is still referenced. Reset its type and diff --git a/security/keys/internal.h b/security/keys/internal.h index d4f1468..80b2aac 100644 --- a/security/keys/internal.h +++ b/security/keys/internal.h @@ -89,42 +89,53 @@ extern struct key_type *key_type_lookup(const char *type); extern void key_type_put(struct key_type *ktype); extern int __key_link_begin(struct key *keyring, - const struct key_type *type, - const char *description, - unsigned long *_prealloc); + const struct keyring_index_key *index_key, + struct assoc_array_edit **_edit); extern int __key_link_check_live_key(struct key *keyring, struct key *key); -extern void __key_link(struct key *keyring, struct key *key, - unsigned long *_prealloc); +extern void __key_link(struct key *key, struct assoc_array_edit **_edit); extern void __key_link_end(struct key *keyring, - struct key_type *type, - unsigned long prealloc); + const struct keyring_index_key *index_key, + struct assoc_array_edit *edit); -extern key_ref_t __keyring_search_one(key_ref_t keyring_ref, - const struct key_type *type, - const char *description, - key_perm_t perm); +extern key_ref_t find_key_to_update(key_ref_t keyring_ref, + const struct keyring_index_key *index_key); extern struct key *keyring_search_instkey(struct key *keyring, key_serial_t target_id); +extern int iterate_over_keyring(const struct key *keyring, + int (*func)(const struct key *key, void *data), + void *data); + typedef int (*key_match_func_t)(const struct key *, const void *); +struct keyring_search_context { + struct keyring_index_key index_key; + const struct cred *cred; + key_match_func_t match; + const void *match_data; + unsigned flags; +#define KEYRING_SEARCH_LOOKUP_TYPE 0x0001 /* [as type->def_lookup_type] */ +#define KEYRING_SEARCH_NO_STATE_CHECK 0x0002 /* Skip state checks */ +#define KEYRING_SEARCH_DO_STATE_CHECK 0x0004 /* Override NO_STATE_CHECK */ +#define KEYRING_SEARCH_NO_UPDATE_TIME 0x0008 /* Don't update times */ +#define KEYRING_SEARCH_NO_CHECK_PERM 0x0010 /* Don't check permissions */ +#define KEYRING_SEARCH_DETECT_TOO_DEEP 0x0020 /* Give an error on excessive depth */ + + int (*iterator)(const void *object, void *iterator_data); + + /* Internal stuff */ + int skipped_ret; + bool possessed; + key_ref_t result; + struct timespec now; +}; + extern key_ref_t keyring_search_aux(key_ref_t keyring_ref, - const struct cred *cred, - struct key_type *type, - const void *description, - key_match_func_t match, - bool no_state_check); - -extern key_ref_t search_my_process_keyrings(struct key_type *type, - const void *description, - key_match_func_t match, - bool no_state_check, - const struct cred *cred); -extern key_ref_t search_process_keyrings(struct key_type *type, - const void *description, - key_match_func_t match, - const struct cred *cred); + struct keyring_search_context *ctx); + +extern key_ref_t search_my_process_keyrings(struct keyring_search_context *ctx); +extern key_ref_t search_process_keyrings(struct keyring_search_context *ctx); extern struct key *find_keyring_by_name(const char *name, bool skip_perm_check); @@ -202,7 +213,7 @@ extern struct key *key_get_instantiation_authkey(key_serial_t target_id); /* * Determine whether a key is dead. */ -static inline bool key_is_dead(struct key *key, time_t limit) +static inline bool key_is_dead(const struct key *key, time_t limit) { return key->flags & ((1 << KEY_FLAG_DEAD) | @@ -244,6 +255,15 @@ extern long keyctl_invalidate_key(key_serial_t); extern long keyctl_instantiate_key_common(key_serial_t, const struct iovec *, unsigned, size_t, key_serial_t); +#ifdef CONFIG_PERSISTENT_KEYRINGS +extern long keyctl_get_persistent(uid_t, key_serial_t); +extern unsigned persistent_keyring_expiry; +#else +static inline long keyctl_get_persistent(uid_t uid, key_serial_t destring) +{ + return -EOPNOTSUPP; +} +#endif /* * Debugging key validation diff --git a/security/keys/key.c b/security/keys/key.c index 8fb7c7b..55d110f 100644 --- a/security/keys/key.c +++ b/security/keys/key.c @@ -242,8 +242,8 @@ struct key *key_alloc(struct key_type *type, const char *desc, } } - desclen = strlen(desc) + 1; - quotalen = desclen + type->def_datalen; + desclen = strlen(desc); + quotalen = desclen + 1 + type->def_datalen; /* get hold of the key tracking for this user */ user = key_user_lookup(uid); @@ -277,7 +277,8 @@ struct key *key_alloc(struct key_type *type, const char *desc, goto no_memory_2; if (desc) { - key->description = kmemdup(desc, desclen, GFP_KERNEL); + key->index_key.desc_len = desclen; + key->index_key.description = kmemdup(desc, desclen + 1, GFP_KERNEL); if (!key->description) goto no_memory_3; } @@ -285,7 +286,7 @@ struct key *key_alloc(struct key_type *type, const char *desc, atomic_set(&key->usage, 1); init_rwsem(&key->sem); lockdep_set_class(&key->sem, &type->lock_class); - key->type = type; + key->index_key.type = type; key->user = user; key->quotalen = quotalen; key->datalen = type->def_datalen; @@ -299,6 +300,8 @@ struct key *key_alloc(struct key_type *type, const char *desc, if (!(flags & KEY_ALLOC_NOT_IN_QUOTA)) key->flags |= 1 << KEY_FLAG_IN_QUOTA; + if (flags & KEY_ALLOC_TRUSTED) + key->flags |= 1 << KEY_FLAG_TRUSTED; memset(&key->type_data, 0, sizeof(key->type_data)); @@ -408,7 +411,7 @@ static int __key_instantiate_and_link(struct key *key, struct key_preparsed_payload *prep, struct key *keyring, struct key *authkey, - unsigned long *_prealloc) + struct assoc_array_edit **_edit) { int ret, awaken; @@ -435,7 +438,7 @@ static int __key_instantiate_and_link(struct key *key, /* and link it into the destination keyring */ if (keyring) - __key_link(keyring, key, _prealloc); + __key_link(key, _edit); /* disable the authorisation key */ if (authkey) @@ -475,7 +478,7 @@ int key_instantiate_and_link(struct key *key, struct key *authkey) { struct key_preparsed_payload prep; - unsigned long prealloc; + struct assoc_array_edit *edit; int ret; memset(&prep, 0, sizeof(prep)); @@ -489,17 +492,15 @@ int key_instantiate_and_link(struct key *key, } if (keyring) { - ret = __key_link_begin(keyring, key->type, key->description, - &prealloc); + ret = __key_link_begin(keyring, &key->index_key, &edit); if (ret < 0) goto error_free_preparse; } - ret = __key_instantiate_and_link(key, &prep, keyring, authkey, - &prealloc); + ret = __key_instantiate_and_link(key, &prep, keyring, authkey, &edit); if (keyring) - __key_link_end(keyring, key->type, prealloc); + __key_link_end(keyring, &key->index_key, edit); error_free_preparse: if (key->type->preparse) @@ -537,7 +538,7 @@ int key_reject_and_link(struct key *key, struct key *keyring, struct key *authkey) { - unsigned long prealloc; + struct assoc_array_edit *edit; struct timespec now; int ret, awaken, link_ret = 0; @@ -548,8 +549,7 @@ int key_reject_and_link(struct key *key, ret = -EBUSY; if (keyring) - link_ret = __key_link_begin(keyring, key->type, - key->description, &prealloc); + link_ret = __key_link_begin(keyring, &key->index_key, &edit); mutex_lock(&key_construction_mutex); @@ -557,9 +557,10 @@ int key_reject_and_link(struct key *key, if (!test_bit(KEY_FLAG_INSTANTIATED, &key->flags)) { /* mark the key as being negatively instantiated */ atomic_inc(&key->user->nikeys); + key->type_data.reject_error = -error; + smp_wmb(); set_bit(KEY_FLAG_NEGATIVE, &key->flags); set_bit(KEY_FLAG_INSTANTIATED, &key->flags); - key->type_data.reject_error = -error; now = current_kernel_time(); key->expiry = now.tv_sec + timeout; key_schedule_gc(key->expiry + key_gc_delay); @@ -571,7 +572,7 @@ int key_reject_and_link(struct key *key, /* and link it into the destination keyring */ if (keyring && link_ret == 0) - __key_link(keyring, key, &prealloc); + __key_link(key, &edit); /* disable the authorisation key */ if (authkey) @@ -581,7 +582,7 @@ int key_reject_and_link(struct key *key, mutex_unlock(&key_construction_mutex); if (keyring) - __key_link_end(keyring, key->type, prealloc); + __key_link_end(keyring, &key->index_key, edit); /* wake up anyone waiting for a key to be constructed */ if (awaken) @@ -645,7 +646,7 @@ found: /* this races with key_put(), but that doesn't matter since key_put() * doesn't actually change the key */ - atomic_inc(&key->usage); + __key_get(key); error: spin_unlock(&key_serial_lock); @@ -780,25 +781,27 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref, key_perm_t perm, unsigned long flags) { - unsigned long prealloc; + struct keyring_index_key index_key = { + .description = description, + }; struct key_preparsed_payload prep; + struct assoc_array_edit *edit; const struct cred *cred = current_cred(); - struct key_type *ktype; struct key *keyring, *key = NULL; key_ref_t key_ref; int ret; /* look up the key type to see if it's one of the registered kernel * types */ - ktype = key_type_lookup(type); - if (IS_ERR(ktype)) { + index_key.type = key_type_lookup(type); + if (IS_ERR(index_key.type)) { key_ref = ERR_PTR(-ENODEV); goto error; } key_ref = ERR_PTR(-EINVAL); - if (!ktype->match || !ktype->instantiate || - (!description && !ktype->preparse)) + if (!index_key.type->match || !index_key.type->instantiate || + (!index_key.description && !index_key.type->preparse)) goto error_put_type; keyring = key_ref_to_ptr(keyring_ref); @@ -812,21 +815,28 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref, memset(&prep, 0, sizeof(prep)); prep.data = payload; prep.datalen = plen; - prep.quotalen = ktype->def_datalen; - if (ktype->preparse) { - ret = ktype->preparse(&prep); + prep.quotalen = index_key.type->def_datalen; + prep.trusted = flags & KEY_ALLOC_TRUSTED; + if (index_key.type->preparse) { + ret = index_key.type->preparse(&prep); if (ret < 0) { key_ref = ERR_PTR(ret); goto error_put_type; } - if (!description) - description = prep.description; + if (!index_key.description) + index_key.description = prep.description; key_ref = ERR_PTR(-EINVAL); - if (!description) + if (!index_key.description) goto error_free_prep; } + index_key.desc_len = strlen(index_key.description); + + key_ref = ERR_PTR(-EPERM); + if (!prep.trusted && test_bit(KEY_FLAG_TRUSTED_ONLY, &keyring->flags)) + goto error_free_prep; + flags |= prep.trusted ? KEY_ALLOC_TRUSTED : 0; - ret = __key_link_begin(keyring, ktype, description, &prealloc); + ret = __key_link_begin(keyring, &index_key, &edit); if (ret < 0) { key_ref = ERR_PTR(ret); goto error_free_prep; @@ -844,10 +854,9 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref, * key of the same type and description in the destination keyring and * update that instead if possible */ - if (ktype->update) { - key_ref = __keyring_search_one(keyring_ref, ktype, description, - 0); - if (!IS_ERR(key_ref)) + if (index_key.type->update) { + key_ref = find_key_to_update(keyring_ref, &index_key); + if (key_ref) goto found_matching_key; } @@ -856,23 +865,24 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref, perm = KEY_POS_VIEW | KEY_POS_SEARCH | KEY_POS_LINK | KEY_POS_SETATTR; perm |= KEY_USR_VIEW; - if (ktype->read) + if (index_key.type->read) perm |= KEY_POS_READ; - if (ktype == &key_type_keyring || ktype->update) + if (index_key.type == &key_type_keyring || + index_key.type->update) perm |= KEY_POS_WRITE; } /* allocate a new key */ - key = key_alloc(ktype, description, cred->fsuid, cred->fsgid, cred, - perm, flags); + key = key_alloc(index_key.type, index_key.description, + cred->fsuid, cred->fsgid, cred, perm, flags); if (IS_ERR(key)) { key_ref = ERR_CAST(key); goto error_link_end; } /* instantiate it and link it into the target keyring */ - ret = __key_instantiate_and_link(key, &prep, keyring, NULL, &prealloc); + ret = __key_instantiate_and_link(key, &prep, keyring, NULL, &edit); if (ret < 0) { key_put(key); key_ref = ERR_PTR(ret); @@ -882,12 +892,12 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref, key_ref = make_key_ref(key, is_key_possessed(keyring_ref)); error_link_end: - __key_link_end(keyring, ktype, prealloc); + __key_link_end(keyring, &index_key, edit); error_free_prep: - if (ktype->preparse) - ktype->free_preparse(&prep); + if (index_key.type->preparse) + index_key.type->free_preparse(&prep); error_put_type: - key_type_put(ktype); + key_type_put(index_key.type); error: return key_ref; @@ -895,7 +905,7 @@ error: /* we found a matching key, so we're going to try to update it * - we can drop the locks first as we have the key pinned */ - __key_link_end(keyring, ktype, prealloc); + __key_link_end(keyring, &index_key, edit); key_ref = __key_update(key_ref, &prep); goto error_free_prep; diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index 33cfd27b..cee72ce 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -1667,6 +1667,9 @@ SYSCALL_DEFINE5(keyctl, int, option, unsigned long, arg2, unsigned long, arg3, case KEYCTL_INVALIDATE: return keyctl_invalidate_key((key_serial_t) arg2); + case KEYCTL_GET_PERSISTENT: + return keyctl_get_persistent((uid_t)arg2, (key_serial_t)arg3); + default: return -EOPNOTSUPP; } diff --git a/security/keys/keyring.c b/security/keys/keyring.c index 6ece7f2..69f0cb7 100644 --- a/security/keys/keyring.c +++ b/security/keys/keyring.c @@ -1,6 +1,6 @@ /* Keyring handling * - * Copyright (C) 2004-2005, 2008 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2004-2005, 2008, 2013 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * This program is free software; you can redistribute it and/or @@ -17,25 +17,11 @@ #include <linux/seq_file.h> #include <linux/err.h> #include <keys/keyring-type.h> +#include <keys/user-type.h> +#include <linux/assoc_array_priv.h> #include <linux/uaccess.h> #include "internal.h" -#define rcu_dereference_locked_keyring(keyring) \ - (rcu_dereference_protected( \ - (keyring)->payload.subscriptions, \ - rwsem_is_locked((struct rw_semaphore *)&(keyring)->sem))) - -#define rcu_deref_link_locked(klist, index, keyring) \ - (rcu_dereference_protected( \ - (klist)->keys[index], \ - rwsem_is_locked((struct rw_semaphore *)&(keyring)->sem))) - -#define MAX_KEYRING_LINKS \ - min_t(size_t, USHRT_MAX - 1, \ - ((PAGE_SIZE - sizeof(struct keyring_list)) / sizeof(struct key *))) - -#define KEY_LINK_FIXQUOTA 1UL - /* * When plumbing the depths of the key tree, this sets a hard limit * set on how deep we're willing to go. @@ -47,6 +33,28 @@ */ #define KEYRING_NAME_HASH_SIZE (1 << 5) +/* + * We mark pointers we pass to the associative array with bit 1 set if + * they're keyrings and clear otherwise. + */ +#define KEYRING_PTR_SUBTYPE 0x2UL + +static inline bool keyring_ptr_is_keyring(const struct assoc_array_ptr *x) +{ + return (unsigned long)x & KEYRING_PTR_SUBTYPE; +} +static inline struct key *keyring_ptr_to_key(const struct assoc_array_ptr *x) +{ + void *object = assoc_array_ptr_to_leaf(x); + return (struct key *)((unsigned long)object & ~KEYRING_PTR_SUBTYPE); +} +static inline void *keyring_key_to_ptr(struct key *key) +{ + if (key->type == &key_type_keyring) + return (void *)((unsigned long)key | KEYRING_PTR_SUBTYPE); + return key; +} + static struct list_head keyring_name_hash[KEYRING_NAME_HASH_SIZE]; static DEFINE_RWLOCK(keyring_name_lock); @@ -67,7 +75,6 @@ static inline unsigned keyring_hash(const char *desc) */ static int keyring_instantiate(struct key *keyring, struct key_preparsed_payload *prep); -static int keyring_match(const struct key *keyring, const void *criterion); static void keyring_revoke(struct key *keyring); static void keyring_destroy(struct key *keyring); static void keyring_describe(const struct key *keyring, struct seq_file *m); @@ -76,9 +83,9 @@ static long keyring_read(const struct key *keyring, struct key_type key_type_keyring = { .name = "keyring", - .def_datalen = sizeof(struct keyring_list), + .def_datalen = 0, .instantiate = keyring_instantiate, - .match = keyring_match, + .match = user_match, .revoke = keyring_revoke, .destroy = keyring_destroy, .describe = keyring_describe, @@ -127,6 +134,7 @@ static int keyring_instantiate(struct key *keyring, ret = -EINVAL; if (prep->datalen == 0) { + assoc_array_init(&keyring->keys); /* make the keyring available by name if it has one */ keyring_publish_name(keyring); ret = 0; @@ -136,15 +144,226 @@ static int keyring_instantiate(struct key *keyring, } /* - * Match keyrings on their name + * Multiply 64-bits by 32-bits to 96-bits and fold back to 64-bit. Ideally we'd + * fold the carry back too, but that requires inline asm. + */ +static u64 mult_64x32_and_fold(u64 x, u32 y) +{ + u64 hi = (u64)(u32)(x >> 32) * y; + u64 lo = (u64)(u32)(x) * y; + return lo + ((u64)(u32)hi << 32) + (u32)(hi >> 32); +} + +/* + * Hash a key type and description. + */ +static unsigned long hash_key_type_and_desc(const struct keyring_index_key *index_key) +{ + const unsigned level_shift = ASSOC_ARRAY_LEVEL_STEP; + const unsigned long level_mask = ASSOC_ARRAY_LEVEL_STEP_MASK; + const char *description = index_key->description; + unsigned long hash, type; + u32 piece; + u64 acc; + int n, desc_len = index_key->desc_len; + + type = (unsigned long)index_key->type; + + acc = mult_64x32_and_fold(type, desc_len + 13); + acc = mult_64x32_and_fold(acc, 9207); + for (;;) { + n = desc_len; + if (n <= 0) + break; + if (n > 4) + n = 4; + piece = 0; + memcpy(&piece, description, n); + description += n; + desc_len -= n; + acc = mult_64x32_and_fold(acc, piece); + acc = mult_64x32_and_fold(acc, 9207); + } + + /* Fold the hash down to 32 bits if need be. */ + hash = acc; + if (ASSOC_ARRAY_KEY_CHUNK_SIZE == 32) + hash ^= acc >> 32; + + /* Squidge all the keyrings into a separate part of the tree to + * ordinary keys by making sure the lowest level segment in the hash is + * zero for keyrings and non-zero otherwise. + */ + if (index_key->type != &key_type_keyring && (hash & level_mask) == 0) + return hash | (hash >> (ASSOC_ARRAY_KEY_CHUNK_SIZE - level_shift)) | 1; + if (index_key->type == &key_type_keyring && (hash & level_mask) != 0) + return (hash + (hash << level_shift)) & ~level_mask; + return hash; +} + +/* + * Build the next index key chunk. + * + * On 32-bit systems the index key is laid out as: + * + * 0 4 5 9... + * hash desclen typeptr desc[] + * + * On 64-bit systems: + * + * 0 8 9 17... + * hash desclen typeptr desc[] + * + * We return it one word-sized chunk at a time. */ -static int keyring_match(const struct key *keyring, const void *description) +static unsigned long keyring_get_key_chunk(const void *data, int level) +{ + const struct keyring_index_key *index_key = data; + unsigned long chunk = 0; + long offset = 0; + int desc_len = index_key->desc_len, n = sizeof(chunk); + + level /= ASSOC_ARRAY_KEY_CHUNK_SIZE; + switch (level) { + case 0: + return hash_key_type_and_desc(index_key); + case 1: + return ((unsigned long)index_key->type << 8) | desc_len; + case 2: + if (desc_len == 0) + return (u8)((unsigned long)index_key->type >> + (ASSOC_ARRAY_KEY_CHUNK_SIZE - 8)); + n--; + offset = 1; + default: + offset += sizeof(chunk) - 1; + offset += (level - 3) * sizeof(chunk); + if (offset >= desc_len) + return 0; + desc_len -= offset; + if (desc_len > n) + desc_len = n; + offset += desc_len; + do { + chunk <<= 8; + chunk |= ((u8*)index_key->description)[--offset]; + } while (--desc_len > 0); + + if (level == 2) { + chunk <<= 8; + chunk |= (u8)((unsigned long)index_key->type >> + (ASSOC_ARRAY_KEY_CHUNK_SIZE - 8)); + } + return chunk; + } +} + +static unsigned long keyring_get_object_key_chunk(const void *object, int level) +{ + const struct key *key = keyring_ptr_to_key(object); + return keyring_get_key_chunk(&key->index_key, level); +} + +static bool keyring_compare_object(const void *object, const void *data) { - return keyring->description && - strcmp(keyring->description, description) == 0; + const struct keyring_index_key *index_key = data; + const struct key *key = keyring_ptr_to_key(object); + + return key->index_key.type == index_key->type && + key->index_key.desc_len == index_key->desc_len && + memcmp(key->index_key.description, index_key->description, + index_key->desc_len) == 0; } /* + * Compare the index keys of a pair of objects and determine the bit position + * at which they differ - if they differ. + */ +static int keyring_diff_objects(const void *_a, const void *_b) +{ + const struct key *key_a = keyring_ptr_to_key(_a); + const struct key *key_b = keyring_ptr_to_key(_b); + const struct keyring_index_key *a = &key_a->index_key; + const struct keyring_index_key *b = &key_b->index_key; + unsigned long seg_a, seg_b; + int level, i; + + level = 0; + seg_a = hash_key_type_and_desc(a); + seg_b = hash_key_type_and_desc(b); + if ((seg_a ^ seg_b) != 0) + goto differ; + + /* The number of bits contributed by the hash is controlled by a + * constant in the assoc_array headers. Everything else thereafter we + * can deal with as being machine word-size dependent. + */ + level += ASSOC_ARRAY_KEY_CHUNK_SIZE / 8; + seg_a = a->desc_len; + seg_b = b->desc_len; + if ((seg_a ^ seg_b) != 0) + goto differ; + + /* The next bit may not work on big endian */ + level++; + seg_a = (unsigned long)a->type; + seg_b = (unsigned long)b->type; + if ((seg_a ^ seg_b) != 0) + goto differ; + + level += sizeof(unsigned long); + if (a->desc_len == 0) + goto same; + + i = 0; + if (((unsigned long)a->description | (unsigned long)b->description) & + (sizeof(unsigned long) - 1)) { + do { + seg_a = *(unsigned long *)(a->description + i); + seg_b = *(unsigned long *)(b->description + i); + if ((seg_a ^ seg_b) != 0) + goto differ_plus_i; + i += sizeof(unsigned long); + } while (i < (a->desc_len & (sizeof(unsigned long) - 1))); + } + + for (; i < a->desc_len; i++) { + seg_a = *(unsigned char *)(a->description + i); + seg_b = *(unsigned char *)(b->description + i); + if ((seg_a ^ seg_b) != 0) + goto differ_plus_i; + } + +same: + return -1; + +differ_plus_i: + level += i; +differ: + i = level * 8 + __ffs(seg_a ^ seg_b); + return i; +} + +/* + * Free an object after stripping the keyring flag off of the pointer. + */ +static void keyring_free_object(void *object) +{ + key_put(keyring_ptr_to_key(object)); +} + +/* + * Operations for keyring management by the index-tree routines. + */ +static const struct assoc_array_ops keyring_assoc_array_ops = { + .get_key_chunk = keyring_get_key_chunk, + .get_object_key_chunk = keyring_get_object_key_chunk, + .compare_object = keyring_compare_object, + .diff_objects = keyring_diff_objects, + .free_object = keyring_free_object, +}; + +/* * Clean up a keyring when it is destroyed. Unpublish its name if it had one * and dispose of its data. * @@ -155,9 +374,6 @@ static int keyring_match(const struct key *keyring, const void *description) */ static void keyring_destroy(struct key *keyring) { - struct keyring_list *klist; - int loop; - if (keyring->description) { write_lock(&keyring_name_lock); @@ -168,12 +384,7 @@ static void keyring_destroy(struct key *keyring) write_unlock(&keyring_name_lock); } - klist = rcu_access_pointer(keyring->payload.subscriptions); - if (klist) { - for (loop = klist->nkeys - 1; loop >= 0; loop--) - key_put(rcu_access_pointer(klist->keys[loop])); - kfree(klist); - } + assoc_array_destroy(&keyring->keys, &keyring_assoc_array_ops); } /* @@ -181,76 +392,88 @@ static void keyring_destroy(struct key *keyring) */ static void keyring_describe(const struct key *keyring, struct seq_file *m) { - struct keyring_list *klist; - if (keyring->description) seq_puts(m, keyring->description); else seq_puts(m, "[anon]"); if (key_is_instantiated(keyring)) { - rcu_read_lock(); - klist = rcu_dereference(keyring->payload.subscriptions); - if (klist) - seq_printf(m, ": %u/%u", klist->nkeys, klist->maxkeys); + if (keyring->keys.nr_leaves_on_tree != 0) + seq_printf(m, ": %lu", keyring->keys.nr_leaves_on_tree); else seq_puts(m, ": empty"); - rcu_read_unlock(); } } +struct keyring_read_iterator_context { + size_t qty; + size_t count; + key_serial_t __user *buffer; +}; + +static int keyring_read_iterator(const void *object, void *data) +{ + struct keyring_read_iterator_context *ctx = data; + const struct key *key = keyring_ptr_to_key(object); + int ret; + + kenter("{%s,%d},,{%zu/%zu}", + key->type->name, key->serial, ctx->count, ctx->qty); + + if (ctx->count >= ctx->qty) + return 1; + + ret = put_user(key->serial, ctx->buffer); + if (ret < 0) + return ret; + ctx->buffer++; + ctx->count += sizeof(key->serial); + return 0; +} + /* * Read a list of key IDs from the keyring's contents in binary form * - * The keyring's semaphore is read-locked by the caller. + * The keyring's semaphore is read-locked by the caller. This prevents someone + * from modifying it under us - which could cause us to read key IDs multiple + * times. */ static long keyring_read(const struct key *keyring, char __user *buffer, size_t buflen) { - struct keyring_list *klist; - struct key *key; - size_t qty, tmp; - int loop, ret; + struct keyring_read_iterator_context ctx; + unsigned long nr_keys; + int ret; - ret = 0; - klist = rcu_dereference_locked_keyring(keyring); - if (klist) { - /* calculate how much data we could return */ - qty = klist->nkeys * sizeof(key_serial_t); - - if (buffer && buflen > 0) { - if (buflen > qty) - buflen = qty; - - /* copy the IDs of the subscribed keys into the - * buffer */ - ret = -EFAULT; - - for (loop = 0; loop < klist->nkeys; loop++) { - key = rcu_deref_link_locked(klist, loop, - keyring); - - tmp = sizeof(key_serial_t); - if (tmp > buflen) - tmp = buflen; - - if (copy_to_user(buffer, - &key->serial, - tmp) != 0) - goto error; - - buflen -= tmp; - if (buflen == 0) - break; - buffer += tmp; - } - } + kenter("{%d},,%zu", key_serial(keyring), buflen); + + if (buflen & (sizeof(key_serial_t) - 1)) + return -EINVAL; + + nr_keys = keyring->keys.nr_leaves_on_tree; + if (nr_keys == 0) + return 0; - ret = qty; + /* Calculate how much data we could return */ + ctx.qty = nr_keys * sizeof(key_serial_t); + + if (!buffer || !buflen) + return ctx.qty; + + if (buflen > ctx.qty) + ctx.qty = buflen; + + /* Copy the IDs of the subscribed keys into the buffer */ + ctx.buffer = (key_serial_t __user *)buffer; + ctx.count = 0; + ret = assoc_array_iterate(&keyring->keys, keyring_read_iterator, &ctx); + if (ret < 0) { + kleave(" = %d [iterate]", ret); + return ret; } -error: - return ret; + kleave(" = %zu [ok]", ctx.count); + return ctx.count; } /* @@ -277,227 +500,361 @@ struct key *keyring_alloc(const char *description, kuid_t uid, kgid_t gid, } EXPORT_SYMBOL(keyring_alloc); -/** - * keyring_search_aux - Search a keyring tree for a key matching some criteria - * @keyring_ref: A pointer to the keyring with possession indicator. - * @cred: The credentials to use for permissions checks. - * @type: The type of key to search for. - * @description: Parameter for @match. - * @match: Function to rule on whether or not a key is the one required. - * @no_state_check: Don't check if a matching key is bad - * - * Search the supplied keyring tree for a key that matches the criteria given. - * The root keyring and any linked keyrings must grant Search permission to the - * caller to be searchable and keys can only be found if they too grant Search - * to the caller. The possession flag on the root keyring pointer controls use - * of the possessor bits in permissions checking of the entire tree. In - * addition, the LSM gets to forbid keyring searches and key matches. - * - * The search is performed as a breadth-then-depth search up to the prescribed - * limit (KEYRING_SEARCH_MAX_DEPTH). - * - * Keys are matched to the type provided and are then filtered by the match - * function, which is given the description to use in any way it sees fit. The - * match function may use any attributes of a key that it wishes to to - * determine the match. Normally the match function from the key type would be - * used. - * - * RCU is used to prevent the keyring key lists from disappearing without the - * need to take lots of locks. - * - * Returns a pointer to the found key and increments the key usage count if - * successful; -EAGAIN if no matching keys were found, or if expired or revoked - * keys were found; -ENOKEY if only negative keys were found; -ENOTDIR if the - * specified keyring wasn't a keyring. - * - * In the case of a successful return, the possession attribute from - * @keyring_ref is propagated to the returned key reference. +/* + * Iteration function to consider each key found. */ -key_ref_t keyring_search_aux(key_ref_t keyring_ref, - const struct cred *cred, - struct key_type *type, - const void *description, - key_match_func_t match, - bool no_state_check) +static int keyring_search_iterator(const void *object, void *iterator_data) { - struct { - /* Need a separate keylist pointer for RCU purposes */ - struct key *keyring; - struct keyring_list *keylist; - int kix; - } stack[KEYRING_SEARCH_MAX_DEPTH]; - - struct keyring_list *keylist; - struct timespec now; - unsigned long possessed, kflags; - struct key *keyring, *key; - key_ref_t key_ref; - long err; - int sp, nkeys, kix; + struct keyring_search_context *ctx = iterator_data; + const struct key *key = keyring_ptr_to_key(object); + unsigned long kflags = key->flags; - keyring = key_ref_to_ptr(keyring_ref); - possessed = is_key_possessed(keyring_ref); - key_check(keyring); + kenter("{%d}", key->serial); - /* top keyring must have search permission to begin the search */ - err = key_task_permission(keyring_ref, cred, KEY_SEARCH); - if (err < 0) { - key_ref = ERR_PTR(err); - goto error; + /* ignore keys not of this type */ + if (key->type != ctx->index_key.type) { + kleave(" = 0 [!type]"); + return 0; } - key_ref = ERR_PTR(-ENOTDIR); - if (keyring->type != &key_type_keyring) - goto error; + /* skip invalidated, revoked and expired keys */ + if (ctx->flags & KEYRING_SEARCH_DO_STATE_CHECK) { + if (kflags & ((1 << KEY_FLAG_INVALIDATED) | + (1 << KEY_FLAG_REVOKED))) { + ctx->result = ERR_PTR(-EKEYREVOKED); + kleave(" = %d [invrev]", ctx->skipped_ret); + goto skipped; + } - rcu_read_lock(); + if (key->expiry && ctx->now.tv_sec >= key->expiry) { + ctx->result = ERR_PTR(-EKEYEXPIRED); + kleave(" = %d [expire]", ctx->skipped_ret); + goto skipped; + } + } - now = current_kernel_time(); - err = -EAGAIN; - sp = 0; - - /* firstly we should check to see if this top-level keyring is what we - * are looking for */ - key_ref = ERR_PTR(-EAGAIN); - kflags = keyring->flags; - if (keyring->type == type && match(keyring, description)) { - key = keyring; - if (no_state_check) - goto found; + /* keys that don't match */ + if (!ctx->match(key, ctx->match_data)) { + kleave(" = 0 [!match]"); + return 0; + } - /* check it isn't negative and hasn't expired or been - * revoked */ - if (kflags & (1 << KEY_FLAG_REVOKED)) - goto error_2; - if (key->expiry && now.tv_sec >= key->expiry) - goto error_2; - key_ref = ERR_PTR(key->type_data.reject_error); - if (kflags & (1 << KEY_FLAG_NEGATIVE)) - goto error_2; - goto found; + /* key must have search permissions */ + if (!(ctx->flags & KEYRING_SEARCH_NO_CHECK_PERM) && + key_task_permission(make_key_ref(key, ctx->possessed), + ctx->cred, KEY_SEARCH) < 0) { + ctx->result = ERR_PTR(-EACCES); + kleave(" = %d [!perm]", ctx->skipped_ret); + goto skipped; } - /* otherwise, the top keyring must not be revoked, expired, or - * negatively instantiated if we are to search it */ - key_ref = ERR_PTR(-EAGAIN); - if (kflags & ((1 << KEY_FLAG_INVALIDATED) | - (1 << KEY_FLAG_REVOKED) | - (1 << KEY_FLAG_NEGATIVE)) || - (keyring->expiry && now.tv_sec >= keyring->expiry)) - goto error_2; - - /* start processing a new keyring */ -descend: - kflags = keyring->flags; - if (kflags & ((1 << KEY_FLAG_INVALIDATED) | - (1 << KEY_FLAG_REVOKED))) - goto not_this_keyring; + if (ctx->flags & KEYRING_SEARCH_DO_STATE_CHECK) { + /* we set a different error code if we pass a negative key */ + if (kflags & (1 << KEY_FLAG_NEGATIVE)) { + smp_rmb(); + ctx->result = ERR_PTR(key->type_data.reject_error); + kleave(" = %d [neg]", ctx->skipped_ret); + goto skipped; + } + } - keylist = rcu_dereference(keyring->payload.subscriptions); - if (!keylist) - goto not_this_keyring; + /* Found */ + ctx->result = make_key_ref(key, ctx->possessed); + kleave(" = 1 [found]"); + return 1; - /* iterate through the keys in this keyring first */ - nkeys = keylist->nkeys; - smp_rmb(); - for (kix = 0; kix < nkeys; kix++) { - key = rcu_dereference(keylist->keys[kix]); - kflags = key->flags; +skipped: + return ctx->skipped_ret; +} - /* ignore keys not of this type */ - if (key->type != type) - continue; +/* + * Search inside a keyring for a key. We can search by walking to it + * directly based on its index-key or we can iterate over the entire + * tree looking for it, based on the match function. + */ +static int search_keyring(struct key *keyring, struct keyring_search_context *ctx) +{ + if ((ctx->flags & KEYRING_SEARCH_LOOKUP_TYPE) == + KEYRING_SEARCH_LOOKUP_DIRECT) { + const void *object; + + object = assoc_array_find(&keyring->keys, + &keyring_assoc_array_ops, + &ctx->index_key); + return object ? ctx->iterator(object, ctx) : 0; + } + return assoc_array_iterate(&keyring->keys, ctx->iterator, ctx); +} - /* skip invalidated, revoked and expired keys */ - if (!no_state_check) { - if (kflags & ((1 << KEY_FLAG_INVALIDATED) | - (1 << KEY_FLAG_REVOKED))) - continue; +/* + * Search a tree of keyrings that point to other keyrings up to the maximum + * depth. + */ +static bool search_nested_keyrings(struct key *keyring, + struct keyring_search_context *ctx) +{ + struct { + struct key *keyring; + struct assoc_array_node *node; + int slot; + } stack[KEYRING_SEARCH_MAX_DEPTH]; - if (key->expiry && now.tv_sec >= key->expiry) - continue; - } + struct assoc_array_shortcut *shortcut; + struct assoc_array_node *node; + struct assoc_array_ptr *ptr; + struct key *key; + int sp = 0, slot; - /* keys that don't match */ - if (!match(key, description)) - continue; + kenter("{%d},{%s,%s}", + keyring->serial, + ctx->index_key.type->name, + ctx->index_key.description); - /* key must have search permissions */ - if (key_task_permission(make_key_ref(key, possessed), - cred, KEY_SEARCH) < 0) - continue; + if (ctx->index_key.description) + ctx->index_key.desc_len = strlen(ctx->index_key.description); - if (no_state_check) + /* Check to see if this top-level keyring is what we are looking for + * and whether it is valid or not. + */ + if (ctx->flags & KEYRING_SEARCH_LOOKUP_ITERATE || + keyring_compare_object(keyring, &ctx->index_key)) { + ctx->skipped_ret = 2; + ctx->flags |= KEYRING_SEARCH_DO_STATE_CHECK; + switch (ctx->iterator(keyring_key_to_ptr(keyring), ctx)) { + case 1: goto found; - - /* we set a different error code if we pass a negative key */ - if (kflags & (1 << KEY_FLAG_NEGATIVE)) { - err = key->type_data.reject_error; - continue; + case 2: + return false; + default: + break; } + } + + ctx->skipped_ret = 0; + if (ctx->flags & KEYRING_SEARCH_NO_STATE_CHECK) + ctx->flags &= ~KEYRING_SEARCH_DO_STATE_CHECK; + /* Start processing a new keyring */ +descend_to_keyring: + kdebug("descend to %d", keyring->serial); + if (keyring->flags & ((1 << KEY_FLAG_INVALIDATED) | + (1 << KEY_FLAG_REVOKED))) + goto not_this_keyring; + + /* Search through the keys in this keyring before its searching its + * subtrees. + */ + if (search_keyring(keyring, ctx)) goto found; - } - /* search through the keyrings nested in this one */ - kix = 0; -ascend: - nkeys = keylist->nkeys; - smp_rmb(); - for (; kix < nkeys; kix++) { - key = rcu_dereference(keylist->keys[kix]); - if (key->type != &key_type_keyring) - continue; + /* Then manually iterate through the keyrings nested in this one. + * + * Start from the root node of the index tree. Because of the way the + * hash function has been set up, keyrings cluster on the leftmost + * branch of the root node (root slot 0) or in the root node itself. + * Non-keyrings avoid the leftmost branch of the root entirely (root + * slots 1-15). + */ + ptr = ACCESS_ONCE(keyring->keys.root); + if (!ptr) + goto not_this_keyring; - /* recursively search nested keyrings - * - only search keyrings for which we have search permission + if (assoc_array_ptr_is_shortcut(ptr)) { + /* If the root is a shortcut, either the keyring only contains + * keyring pointers (everything clusters behind root slot 0) or + * doesn't contain any keyring pointers. */ - if (sp >= KEYRING_SEARCH_MAX_DEPTH) + shortcut = assoc_array_ptr_to_shortcut(ptr); + smp_read_barrier_depends(); + if ((shortcut->index_key[0] & ASSOC_ARRAY_FAN_MASK) != 0) + goto not_this_keyring; + + ptr = ACCESS_ONCE(shortcut->next_node); + node = assoc_array_ptr_to_node(ptr); + goto begin_node; + } + + node = assoc_array_ptr_to_node(ptr); + smp_read_barrier_depends(); + + ptr = node->slots[0]; + if (!assoc_array_ptr_is_meta(ptr)) + goto begin_node; + +descend_to_node: + /* Descend to a more distal node in this keyring's content tree and go + * through that. + */ + kdebug("descend"); + if (assoc_array_ptr_is_shortcut(ptr)) { + shortcut = assoc_array_ptr_to_shortcut(ptr); + smp_read_barrier_depends(); + ptr = ACCESS_ONCE(shortcut->next_node); + BUG_ON(!assoc_array_ptr_is_node(ptr)); + node = assoc_array_ptr_to_node(ptr); + } + +begin_node: + kdebug("begin_node"); + smp_read_barrier_depends(); + slot = 0; +ascend_to_node: + /* Go through the slots in a node */ + for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { + ptr = ACCESS_ONCE(node->slots[slot]); + + if (assoc_array_ptr_is_meta(ptr) && node->back_pointer) + goto descend_to_node; + + if (!keyring_ptr_is_keyring(ptr)) continue; - if (key_task_permission(make_key_ref(key, possessed), - cred, KEY_SEARCH) < 0) + key = keyring_ptr_to_key(ptr); + + if (sp >= KEYRING_SEARCH_MAX_DEPTH) { + if (ctx->flags & KEYRING_SEARCH_DETECT_TOO_DEEP) { + ctx->result = ERR_PTR(-ELOOP); + return false; + } + goto not_this_keyring; + } + + /* Search a nested keyring */ + if (!(ctx->flags & KEYRING_SEARCH_NO_CHECK_PERM) && + key_task_permission(make_key_ref(key, ctx->possessed), + ctx->cred, KEY_SEARCH) < 0) continue; /* stack the current position */ stack[sp].keyring = keyring; - stack[sp].keylist = keylist; - stack[sp].kix = kix; + stack[sp].node = node; + stack[sp].slot = slot; sp++; /* begin again with the new keyring */ keyring = key; - goto descend; + goto descend_to_keyring; } - /* the keyring we're looking at was disqualified or didn't contain a - * matching key */ + /* We've dealt with all the slots in the current node, so now we need + * to ascend to the parent and continue processing there. + */ + ptr = ACCESS_ONCE(node->back_pointer); + slot = node->parent_slot; + + if (ptr && assoc_array_ptr_is_shortcut(ptr)) { + shortcut = assoc_array_ptr_to_shortcut(ptr); + smp_read_barrier_depends(); + ptr = ACCESS_ONCE(shortcut->back_pointer); + slot = shortcut->parent_slot; + } + if (!ptr) + goto not_this_keyring; + node = assoc_array_ptr_to_node(ptr); + smp_read_barrier_depends(); + slot++; + + /* If we've ascended to the root (zero backpointer), we must have just + * finished processing the leftmost branch rather than the root slots - + * so there can't be any more keyrings for us to find. + */ + if (node->back_pointer) { + kdebug("ascend %d", slot); + goto ascend_to_node; + } + + /* The keyring we're looking at was disqualified or didn't contain a + * matching key. + */ not_this_keyring: - if (sp > 0) { - /* resume the processing of a keyring higher up in the tree */ - sp--; - keyring = stack[sp].keyring; - keylist = stack[sp].keylist; - kix = stack[sp].kix + 1; - goto ascend; + kdebug("not_this_keyring %d", sp); + if (sp <= 0) { + kleave(" = false"); + return false; } - key_ref = ERR_PTR(err); - goto error_2; + /* Resume the processing of a keyring higher up in the tree */ + sp--; + keyring = stack[sp].keyring; + node = stack[sp].node; + slot = stack[sp].slot + 1; + kdebug("ascend to %d [%d]", keyring->serial, slot); + goto ascend_to_node; - /* we found a viable match */ + /* We found a viable match */ found: - atomic_inc(&key->usage); - key->last_used_at = now.tv_sec; - keyring->last_used_at = now.tv_sec; - while (sp > 0) - stack[--sp].keyring->last_used_at = now.tv_sec; + key = key_ref_to_ptr(ctx->result); key_check(key); - key_ref = make_key_ref(key, possessed); -error_2: + if (!(ctx->flags & KEYRING_SEARCH_NO_UPDATE_TIME)) { + key->last_used_at = ctx->now.tv_sec; + keyring->last_used_at = ctx->now.tv_sec; + while (sp > 0) + stack[--sp].keyring->last_used_at = ctx->now.tv_sec; + } + kleave(" = true"); + return true; +} + +/** + * keyring_search_aux - Search a keyring tree for a key matching some criteria + * @keyring_ref: A pointer to the keyring with possession indicator. + * @ctx: The keyring search context. + * + * Search the supplied keyring tree for a key that matches the criteria given. + * The root keyring and any linked keyrings must grant Search permission to the + * caller to be searchable and keys can only be found if they too grant Search + * to the caller. The possession flag on the root keyring pointer controls use + * of the possessor bits in permissions checking of the entire tree. In + * addition, the LSM gets to forbid keyring searches and key matches. + * + * The search is performed as a breadth-then-depth search up to the prescribed + * limit (KEYRING_SEARCH_MAX_DEPTH). + * + * Keys are matched to the type provided and are then filtered by the match + * function, which is given the description to use in any way it sees fit. The + * match function may use any attributes of a key that it wishes to to + * determine the match. Normally the match function from the key type would be + * used. + * + * RCU can be used to prevent the keyring key lists from disappearing without + * the need to take lots of locks. + * + * Returns a pointer to the found key and increments the key usage count if + * successful; -EAGAIN if no matching keys were found, or if expired or revoked + * keys were found; -ENOKEY if only negative keys were found; -ENOTDIR if the + * specified keyring wasn't a keyring. + * + * In the case of a successful return, the possession attribute from + * @keyring_ref is propagated to the returned key reference. + */ +key_ref_t keyring_search_aux(key_ref_t keyring_ref, + struct keyring_search_context *ctx) +{ + struct key *keyring; + long err; + + ctx->iterator = keyring_search_iterator; + ctx->possessed = is_key_possessed(keyring_ref); + ctx->result = ERR_PTR(-EAGAIN); + + keyring = key_ref_to_ptr(keyring_ref); + key_check(keyring); + + if (keyring->type != &key_type_keyring) + return ERR_PTR(-ENOTDIR); + + if (!(ctx->flags & KEYRING_SEARCH_NO_CHECK_PERM)) { + err = key_task_permission(keyring_ref, ctx->cred, KEY_SEARCH); + if (err < 0) + return ERR_PTR(err); + } + + rcu_read_lock(); + ctx->now = current_kernel_time(); + if (search_nested_keyrings(keyring, ctx)) + __key_get(key_ref_to_ptr(ctx->result)); rcu_read_unlock(); -error: - return key_ref; + return ctx->result; } /** @@ -507,77 +864,73 @@ error: * @description: The name of the keyring we want to find. * * As keyring_search_aux() above, but using the current task's credentials and - * type's default matching function. + * type's default matching function and preferred search method. */ key_ref_t keyring_search(key_ref_t keyring, struct key_type *type, const char *description) { - if (!type->match) + struct keyring_search_context ctx = { + .index_key.type = type, + .index_key.description = description, + .cred = current_cred(), + .match = type->match, + .match_data = description, + .flags = (type->def_lookup_type | + KEYRING_SEARCH_DO_STATE_CHECK), + }; + + if (!ctx.match) return ERR_PTR(-ENOKEY); - return keyring_search_aux(keyring, current->cred, - type, description, type->match, false); + return keyring_search_aux(keyring, &ctx); } EXPORT_SYMBOL(keyring_search); /* - * Search the given keyring only (no recursion). + * Search the given keyring for a key that might be updated. * * The caller must guarantee that the keyring is a keyring and that the - * permission is granted to search the keyring as no check is made here. - * - * RCU is used to make it unnecessary to lock the keyring key list here. + * permission is granted to modify the keyring as no check is made here. The + * caller must also hold a lock on the keyring semaphore. * * Returns a pointer to the found key with usage count incremented if - * successful and returns -ENOKEY if not found. Revoked keys and keys not - * providing the requested permission are skipped over. + * successful and returns NULL if not found. Revoked and invalidated keys are + * skipped over. * * If successful, the possession indicator is propagated from the keyring ref * to the returned key reference. */ -key_ref_t __keyring_search_one(key_ref_t keyring_ref, - const struct key_type *ktype, - const char *description, - key_perm_t perm) +key_ref_t find_key_to_update(key_ref_t keyring_ref, + const struct keyring_index_key *index_key) { - struct keyring_list *klist; - unsigned long possessed; struct key *keyring, *key; - int nkeys, loop; + const void *object; keyring = key_ref_to_ptr(keyring_ref); - possessed = is_key_possessed(keyring_ref); - rcu_read_lock(); + kenter("{%d},{%s,%s}", + keyring->serial, index_key->type->name, index_key->description); - klist = rcu_dereference(keyring->payload.subscriptions); - if (klist) { - nkeys = klist->nkeys; - smp_rmb(); - for (loop = 0; loop < nkeys ; loop++) { - key = rcu_dereference(klist->keys[loop]); - if (key->type == ktype && - (!key->type->match || - key->type->match(key, description)) && - key_permission(make_key_ref(key, possessed), - perm) == 0 && - !(key->flags & ((1 << KEY_FLAG_INVALIDATED) | - (1 << KEY_FLAG_REVOKED))) - ) - goto found; - } - } + object = assoc_array_find(&keyring->keys, &keyring_assoc_array_ops, + index_key); - rcu_read_unlock(); - return ERR_PTR(-ENOKEY); + if (object) + goto found; + + kleave(" = NULL"); + return NULL; found: - atomic_inc(&key->usage); - keyring->last_used_at = key->last_used_at = - current_kernel_time().tv_sec; - rcu_read_unlock(); - return make_key_ref(key, possessed); + key = keyring_ptr_to_key(object); + if (key->flags & ((1 << KEY_FLAG_INVALIDATED) | + (1 << KEY_FLAG_REVOKED))) { + kleave(" = NULL [x]"); + return NULL; + } + __key_get(key); + kleave(" = {%d}", key->serial); + return make_key_ref(key, is_key_possessed(keyring_ref)); } /* @@ -640,6 +993,19 @@ out: return keyring; } +static int keyring_detect_cycle_iterator(const void *object, + void *iterator_data) +{ + struct keyring_search_context *ctx = iterator_data; + const struct key *key = keyring_ptr_to_key(object); + + kenter("{%d}", key->serial); + + BUG_ON(key != ctx->match_data); + ctx->result = ERR_PTR(-EDEADLK); + return 1; +} + /* * See if a cycle will will be created by inserting acyclic tree B in acyclic * tree A at the topmost level (ie: as a direct child of A). @@ -649,116 +1015,39 @@ out: */ static int keyring_detect_cycle(struct key *A, struct key *B) { - struct { - struct keyring_list *keylist; - int kix; - } stack[KEYRING_SEARCH_MAX_DEPTH]; - - struct keyring_list *keylist; - struct key *subtree, *key; - int sp, nkeys, kix, ret; + struct keyring_search_context ctx = { + .index_key = A->index_key, + .match_data = A, + .iterator = keyring_detect_cycle_iterator, + .flags = (KEYRING_SEARCH_LOOKUP_DIRECT | + KEYRING_SEARCH_NO_STATE_CHECK | + KEYRING_SEARCH_NO_UPDATE_TIME | + KEYRING_SEARCH_NO_CHECK_PERM | + KEYRING_SEARCH_DETECT_TOO_DEEP), + }; rcu_read_lock(); - - ret = -EDEADLK; - if (A == B) - goto cycle_detected; - - subtree = B; - sp = 0; - - /* start processing a new keyring */ -descend: - if (test_bit(KEY_FLAG_REVOKED, &subtree->flags)) - goto not_this_keyring; - - keylist = rcu_dereference(subtree->payload.subscriptions); - if (!keylist) - goto not_this_keyring; - kix = 0; - -ascend: - /* iterate through the remaining keys in this keyring */ - nkeys = keylist->nkeys; - smp_rmb(); - for (; kix < nkeys; kix++) { - key = rcu_dereference(keylist->keys[kix]); - - if (key == A) - goto cycle_detected; - - /* recursively check nested keyrings */ - if (key->type == &key_type_keyring) { - if (sp >= KEYRING_SEARCH_MAX_DEPTH) - goto too_deep; - - /* stack the current position */ - stack[sp].keylist = keylist; - stack[sp].kix = kix; - sp++; - - /* begin again with the new keyring */ - subtree = key; - goto descend; - } - } - - /* the keyring we're looking at was disqualified or didn't contain a - * matching key */ -not_this_keyring: - if (sp > 0) { - /* resume the checking of a keyring higher up in the tree */ - sp--; - keylist = stack[sp].keylist; - kix = stack[sp].kix + 1; - goto ascend; - } - - ret = 0; /* no cycles detected */ - -error: + search_nested_keyrings(B, &ctx); rcu_read_unlock(); - return ret; - -too_deep: - ret = -ELOOP; - goto error; - -cycle_detected: - ret = -EDEADLK; - goto error; -} - -/* - * Dispose of a keyring list after the RCU grace period, freeing the unlinked - * key - */ -static void keyring_unlink_rcu_disposal(struct rcu_head *rcu) -{ - struct keyring_list *klist = - container_of(rcu, struct keyring_list, rcu); - - if (klist->delkey != USHRT_MAX) - key_put(rcu_access_pointer(klist->keys[klist->delkey])); - kfree(klist); + return PTR_ERR(ctx.result) == -EAGAIN ? 0 : PTR_ERR(ctx.result); } /* * Preallocate memory so that a key can be linked into to a keyring. */ -int __key_link_begin(struct key *keyring, const struct key_type *type, - const char *description, unsigned long *_prealloc) +int __key_link_begin(struct key *keyring, + const struct keyring_index_key *index_key, + struct assoc_array_edit **_edit) __acquires(&keyring->sem) __acquires(&keyring_serialise_link_sem) { - struct keyring_list *klist, *nklist; - unsigned long prealloc; - unsigned max; - time_t lowest_lru; - size_t size; - int loop, lru, ret; + struct assoc_array_edit *edit; + int ret; + + kenter("%d,%s,%s,", + keyring->serial, index_key->type->name, index_key->description); - kenter("%d,%s,%s,", key_serial(keyring), type->name, description); + BUG_ON(index_key->desc_len == 0); if (keyring->type != &key_type_keyring) return -ENOTDIR; @@ -771,100 +1060,39 @@ int __key_link_begin(struct key *keyring, const struct key_type *type, /* serialise link/link calls to prevent parallel calls causing a cycle * when linking two keyring in opposite orders */ - if (type == &key_type_keyring) + if (index_key->type == &key_type_keyring) down_write(&keyring_serialise_link_sem); - klist = rcu_dereference_locked_keyring(keyring); - - /* see if there's a matching key we can displace */ - lru = -1; - if (klist && klist->nkeys > 0) { - lowest_lru = TIME_T_MAX; - for (loop = klist->nkeys - 1; loop >= 0; loop--) { - struct key *key = rcu_deref_link_locked(klist, loop, - keyring); - if (key->type == type && - strcmp(key->description, description) == 0) { - /* Found a match - we'll replace the link with - * one to the new key. We record the slot - * position. - */ - klist->delkey = loop; - prealloc = 0; - goto done; - } - if (key->last_used_at < lowest_lru) { - lowest_lru = key->last_used_at; - lru = loop; - } - } - } - - /* If the keyring is full then do an LRU discard */ - if (klist && - klist->nkeys == klist->maxkeys && - klist->maxkeys >= MAX_KEYRING_LINKS) { - kdebug("LRU discard %d\n", lru); - klist->delkey = lru; - prealloc = 0; - goto done; - } - - /* check that we aren't going to overrun the user's quota */ - ret = key_payload_reserve(keyring, - keyring->datalen + KEYQUOTA_LINK_BYTES); - if (ret < 0) + /* Create an edit script that will insert/replace the key in the + * keyring tree. + */ + edit = assoc_array_insert(&keyring->keys, + &keyring_assoc_array_ops, + index_key, + NULL); + if (IS_ERR(edit)) { + ret = PTR_ERR(edit); goto error_sem; + } - if (klist && klist->nkeys < klist->maxkeys) { - /* there's sufficient slack space to append directly */ - klist->delkey = klist->nkeys; - prealloc = KEY_LINK_FIXQUOTA; - } else { - /* grow the key list */ - max = 4; - if (klist) { - max += klist->maxkeys; - if (max > MAX_KEYRING_LINKS) - max = MAX_KEYRING_LINKS; - BUG_ON(max <= klist->maxkeys); - } - - size = sizeof(*klist) + sizeof(struct key *) * max; - - ret = -ENOMEM; - nklist = kmalloc(size, GFP_KERNEL); - if (!nklist) - goto error_quota; - - nklist->maxkeys = max; - if (klist) { - memcpy(nklist->keys, klist->keys, - sizeof(struct key *) * klist->nkeys); - nklist->delkey = klist->nkeys; - nklist->nkeys = klist->nkeys + 1; - klist->delkey = USHRT_MAX; - } else { - nklist->nkeys = 1; - nklist->delkey = 0; - } - - /* add the key into the new space */ - RCU_INIT_POINTER(nklist->keys[nklist->delkey], NULL); - prealloc = (unsigned long)nklist | KEY_LINK_FIXQUOTA; + /* If we're not replacing a link in-place then we're going to need some + * extra quota. + */ + if (!edit->dead_leaf) { + ret = key_payload_reserve(keyring, + keyring->datalen + KEYQUOTA_LINK_BYTES); + if (ret < 0) + goto error_cancel; } -done: - *_prealloc = prealloc; + *_edit = edit; kleave(" = 0"); return 0; -error_quota: - /* undo the quota changes */ - key_payload_reserve(keyring, - keyring->datalen - KEYQUOTA_LINK_BYTES); +error_cancel: + assoc_array_cancel_edit(edit); error_sem: - if (type == &key_type_keyring) + if (index_key->type == &key_type_keyring) up_write(&keyring_serialise_link_sem); error_krsem: up_write(&keyring->sem); @@ -895,60 +1123,12 @@ int __key_link_check_live_key(struct key *keyring, struct key *key) * holds at most one link to any given key of a particular type+description * combination. */ -void __key_link(struct key *keyring, struct key *key, - unsigned long *_prealloc) +void __key_link(struct key *key, struct assoc_array_edit **_edit) { - struct keyring_list *klist, *nklist; - struct key *discard; - - nklist = (struct keyring_list *)(*_prealloc & ~KEY_LINK_FIXQUOTA); - *_prealloc = 0; - - kenter("%d,%d,%p", keyring->serial, key->serial, nklist); - - klist = rcu_dereference_locked_keyring(keyring); - - atomic_inc(&key->usage); - keyring->last_used_at = key->last_used_at = - current_kernel_time().tv_sec; - - /* there's a matching key we can displace or an empty slot in a newly - * allocated list we can fill */ - if (nklist) { - kdebug("reissue %hu/%hu/%hu", - nklist->delkey, nklist->nkeys, nklist->maxkeys); - - RCU_INIT_POINTER(nklist->keys[nklist->delkey], key); - - rcu_assign_pointer(keyring->payload.subscriptions, nklist); - - /* dispose of the old keyring list and, if there was one, the - * displaced key */ - if (klist) { - kdebug("dispose %hu/%hu/%hu", - klist->delkey, klist->nkeys, klist->maxkeys); - call_rcu(&klist->rcu, keyring_unlink_rcu_disposal); - } - } else if (klist->delkey < klist->nkeys) { - kdebug("replace %hu/%hu/%hu", - klist->delkey, klist->nkeys, klist->maxkeys); - - discard = rcu_dereference_protected( - klist->keys[klist->delkey], - rwsem_is_locked(&keyring->sem)); - rcu_assign_pointer(klist->keys[klist->delkey], key); - /* The garbage collector will take care of RCU - * synchronisation */ - key_put(discard); - } else { - /* there's sufficient slack space to append directly */ - kdebug("append %hu/%hu/%hu", - klist->delkey, klist->nkeys, klist->maxkeys); - - RCU_INIT_POINTER(klist->keys[klist->delkey], key); - smp_wmb(); - klist->nkeys++; - } + __key_get(key); + assoc_array_insert_set_object(*_edit, keyring_key_to_ptr(key)); + assoc_array_apply_edit(*_edit); + *_edit = NULL; } /* @@ -956,24 +1136,22 @@ void __key_link(struct key *keyring, struct key *key, * * Must be called with __key_link_begin() having being called. */ -void __key_link_end(struct key *keyring, struct key_type *type, - unsigned long prealloc) +void __key_link_end(struct key *keyring, + const struct keyring_index_key *index_key, + struct assoc_array_edit *edit) __releases(&keyring->sem) __releases(&keyring_serialise_link_sem) { - BUG_ON(type == NULL); - BUG_ON(type->name == NULL); - kenter("%d,%s,%lx", keyring->serial, type->name, prealloc); + BUG_ON(index_key->type == NULL); + kenter("%d,%s,", keyring->serial, index_key->type->name); - if (type == &key_type_keyring) + if (index_key->type == &key_type_keyring) up_write(&keyring_serialise_link_sem); - if (prealloc) { - if (prealloc & KEY_LINK_FIXQUOTA) - key_payload_reserve(keyring, - keyring->datalen - - KEYQUOTA_LINK_BYTES); - kfree((struct keyring_list *)(prealloc & ~KEY_LINK_FIXQUOTA)); + if (edit && !edit->dead_leaf) { + key_payload_reserve(keyring, + keyring->datalen - KEYQUOTA_LINK_BYTES); + assoc_array_cancel_edit(edit); } up_write(&keyring->sem); } @@ -1000,20 +1178,28 @@ void __key_link_end(struct key *keyring, struct key_type *type, */ int key_link(struct key *keyring, struct key *key) { - unsigned long prealloc; + struct assoc_array_edit *edit; int ret; + kenter("{%d,%d}", keyring->serial, atomic_read(&keyring->usage)); + key_check(keyring); key_check(key); - ret = __key_link_begin(keyring, key->type, key->description, &prealloc); + if (test_bit(KEY_FLAG_TRUSTED_ONLY, &keyring->flags) && + !test_bit(KEY_FLAG_TRUSTED, &key->flags)) + return -EPERM; + + ret = __key_link_begin(keyring, &key->index_key, &edit); if (ret == 0) { + kdebug("begun {%d,%d}", keyring->serial, atomic_read(&keyring->usage)); ret = __key_link_check_live_key(keyring, key); if (ret == 0) - __key_link(keyring, key, &prealloc); - __key_link_end(keyring, key->type, prealloc); + __key_link(key, &edit); + __key_link_end(keyring, &key->index_key, edit); } + kleave(" = %d {%d,%d}", ret, keyring->serial, atomic_read(&keyring->usage)); return ret; } EXPORT_SYMBOL(key_link); @@ -1037,90 +1223,37 @@ EXPORT_SYMBOL(key_link); */ int key_unlink(struct key *keyring, struct key *key) { - struct keyring_list *klist, *nklist; - int loop, ret; + struct assoc_array_edit *edit; + int ret; key_check(keyring); key_check(key); - ret = -ENOTDIR; if (keyring->type != &key_type_keyring) - goto error; + return -ENOTDIR; down_write(&keyring->sem); - klist = rcu_dereference_locked_keyring(keyring); - if (klist) { - /* search the keyring for the key */ - for (loop = 0; loop < klist->nkeys; loop++) - if (rcu_access_pointer(klist->keys[loop]) == key) - goto key_is_present; + edit = assoc_array_delete(&keyring->keys, &keyring_assoc_array_ops, + &key->index_key); + if (IS_ERR(edit)) { + ret = PTR_ERR(edit); + goto error; } - - up_write(&keyring->sem); ret = -ENOENT; - goto error; - -key_is_present: - /* we need to copy the key list for RCU purposes */ - nklist = kmalloc(sizeof(*klist) + - sizeof(struct key *) * klist->maxkeys, - GFP_KERNEL); - if (!nklist) - goto nomem; - nklist->maxkeys = klist->maxkeys; - nklist->nkeys = klist->nkeys - 1; - - if (loop > 0) - memcpy(&nklist->keys[0], - &klist->keys[0], - loop * sizeof(struct key *)); - - if (loop < nklist->nkeys) - memcpy(&nklist->keys[loop], - &klist->keys[loop + 1], - (nklist->nkeys - loop) * sizeof(struct key *)); - - /* adjust the user's quota */ - key_payload_reserve(keyring, - keyring->datalen - KEYQUOTA_LINK_BYTES); - - rcu_assign_pointer(keyring->payload.subscriptions, nklist); - - up_write(&keyring->sem); - - /* schedule for later cleanup */ - klist->delkey = loop; - call_rcu(&klist->rcu, keyring_unlink_rcu_disposal); + if (edit == NULL) + goto error; + assoc_array_apply_edit(edit); + key_payload_reserve(keyring, keyring->datalen - KEYQUOTA_LINK_BYTES); ret = 0; error: - return ret; -nomem: - ret = -ENOMEM; up_write(&keyring->sem); - goto error; + return ret; } EXPORT_SYMBOL(key_unlink); -/* - * Dispose of a keyring list after the RCU grace period, releasing the keys it - * links to. - */ -static void keyring_clear_rcu_disposal(struct rcu_head *rcu) -{ - struct keyring_list *klist; - int loop; - - klist = container_of(rcu, struct keyring_list, rcu); - - for (loop = klist->nkeys - 1; loop >= 0; loop--) - key_put(rcu_access_pointer(klist->keys[loop])); - - kfree(klist); -} - /** * keyring_clear - Clear a keyring * @keyring: The keyring to clear. @@ -1131,33 +1264,25 @@ static void keyring_clear_rcu_disposal(struct rcu_head *rcu) */ int keyring_clear(struct key *keyring) { - struct keyring_list *klist; + struct assoc_array_edit *edit; int ret; - ret = -ENOTDIR; - if (keyring->type == &key_type_keyring) { - /* detach the pointer block with the locks held */ - down_write(&keyring->sem); - - klist = rcu_dereference_locked_keyring(keyring); - if (klist) { - /* adjust the quota */ - key_payload_reserve(keyring, - sizeof(struct keyring_list)); - - rcu_assign_pointer(keyring->payload.subscriptions, - NULL); - } - - up_write(&keyring->sem); + if (keyring->type != &key_type_keyring) + return -ENOTDIR; - /* free the keys after the locks have been dropped */ - if (klist) - call_rcu(&klist->rcu, keyring_clear_rcu_disposal); + down_write(&keyring->sem); + edit = assoc_array_clear(&keyring->keys, &keyring_assoc_array_ops); + if (IS_ERR(edit)) { + ret = PTR_ERR(edit); + } else { + if (edit) + assoc_array_apply_edit(edit); + key_payload_reserve(keyring, 0); ret = 0; } + up_write(&keyring->sem); return ret; } EXPORT_SYMBOL(keyring_clear); @@ -1169,111 +1294,68 @@ EXPORT_SYMBOL(keyring_clear); */ static void keyring_revoke(struct key *keyring) { - struct keyring_list *klist; + struct assoc_array_edit *edit; + + edit = assoc_array_clear(&keyring->keys, &keyring_assoc_array_ops); + if (!IS_ERR(edit)) { + if (edit) + assoc_array_apply_edit(edit); + key_payload_reserve(keyring, 0); + } +} + +static bool keyring_gc_select_iterator(void *object, void *iterator_data) +{ + struct key *key = keyring_ptr_to_key(object); + time_t *limit = iterator_data; - klist = rcu_dereference_locked_keyring(keyring); + if (key_is_dead(key, *limit)) + return false; + key_get(key); + return true; +} - /* adjust the quota */ - key_payload_reserve(keyring, 0); +static int keyring_gc_check_iterator(const void *object, void *iterator_data) +{ + const struct key *key = keyring_ptr_to_key(object); + time_t *limit = iterator_data; - if (klist) { - rcu_assign_pointer(keyring->payload.subscriptions, NULL); - call_rcu(&klist->rcu, keyring_clear_rcu_disposal); - } + key_check(key); + return key_is_dead(key, *limit); } /* - * Collect garbage from the contents of a keyring, replacing the old list with - * a new one with the pointers all shuffled down. + * Garbage collect pointers from a keyring. * - * Dead keys are classed as oned that are flagged as being dead or are revoked, - * expired or negative keys that were revoked or expired before the specified - * limit. + * Not called with any locks held. The keyring's key struct will not be + * deallocated under us as only our caller may deallocate it. */ void keyring_gc(struct key *keyring, time_t limit) { - struct keyring_list *klist, *new; - struct key *key; - int loop, keep, max; - - kenter("{%x,%s}", key_serial(keyring), keyring->description); - - down_write(&keyring->sem); - - klist = rcu_dereference_locked_keyring(keyring); - if (!klist) - goto no_klist; - - /* work out how many subscriptions we're keeping */ - keep = 0; - for (loop = klist->nkeys - 1; loop >= 0; loop--) - if (!key_is_dead(rcu_deref_link_locked(klist, loop, keyring), - limit)) - keep++; - - if (keep == klist->nkeys) - goto just_return; - - /* allocate a new keyring payload */ - max = roundup(keep, 4); - new = kmalloc(sizeof(struct keyring_list) + max * sizeof(struct key *), - GFP_KERNEL); - if (!new) - goto nomem; - new->maxkeys = max; - new->nkeys = 0; - new->delkey = 0; - - /* install the live keys - * - must take care as expired keys may be updated back to life - */ - keep = 0; - for (loop = klist->nkeys - 1; loop >= 0; loop--) { - key = rcu_deref_link_locked(klist, loop, keyring); - if (!key_is_dead(key, limit)) { - if (keep >= max) - goto discard_new; - RCU_INIT_POINTER(new->keys[keep++], key_get(key)); - } - } - new->nkeys = keep; - - /* adjust the quota */ - key_payload_reserve(keyring, - sizeof(struct keyring_list) + - KEYQUOTA_LINK_BYTES * keep); + int result; - if (keep == 0) { - rcu_assign_pointer(keyring->payload.subscriptions, NULL); - kfree(new); - } else { - rcu_assign_pointer(keyring->payload.subscriptions, new); - } + kenter("%x{%s}", keyring->serial, keyring->description ?: ""); - up_write(&keyring->sem); + if (keyring->flags & ((1 << KEY_FLAG_INVALIDATED) | + (1 << KEY_FLAG_REVOKED))) + goto dont_gc; - call_rcu(&klist->rcu, keyring_clear_rcu_disposal); - kleave(" [yes]"); - return; - -discard_new: - new->nkeys = keep; - keyring_clear_rcu_disposal(&new->rcu); - up_write(&keyring->sem); - kleave(" [discard]"); - return; - -just_return: - up_write(&keyring->sem); - kleave(" [no dead]"); - return; + /* scan the keyring looking for dead keys */ + rcu_read_lock(); + result = assoc_array_iterate(&keyring->keys, + keyring_gc_check_iterator, &limit); + rcu_read_unlock(); + if (result == true) + goto do_gc; -no_klist: - up_write(&keyring->sem); - kleave(" [no_klist]"); +dont_gc: + kleave(" [no gc]"); return; -nomem: +do_gc: + down_write(&keyring->sem); + assoc_array_gc(&keyring->keys, &keyring_assoc_array_ops, + keyring_gc_select_iterator, &limit); up_write(&keyring->sem); - kleave(" [oom]"); + kleave(" [gc]"); } diff --git a/security/keys/persistent.c b/security/keys/persistent.c new file mode 100644 index 0000000..0ad3ee2 --- /dev/null +++ b/security/keys/persistent.c @@ -0,0 +1,167 @@ +/* General persistent per-UID keyrings register + * + * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include <linux/user_namespace.h> +#include "internal.h" + +unsigned persistent_keyring_expiry = 3 * 24 * 3600; /* Expire after 3 days of non-use */ + +/* + * Create the persistent keyring register for the current user namespace. + * + * Called with the namespace's sem locked for writing. + */ +static int key_create_persistent_register(struct user_namespace *ns) +{ + struct key *reg = keyring_alloc(".persistent_register", + KUIDT_INIT(0), KGIDT_INIT(0), + current_cred(), + ((KEY_POS_ALL & ~KEY_POS_SETATTR) | + KEY_USR_VIEW | KEY_USR_READ), + KEY_ALLOC_NOT_IN_QUOTA, NULL); + if (IS_ERR(reg)) + return PTR_ERR(reg); + + ns->persistent_keyring_register = reg; + return 0; +} + +/* + * Create the persistent keyring for the specified user. + * + * Called with the namespace's sem locked for writing. + */ +static key_ref_t key_create_persistent(struct user_namespace *ns, kuid_t uid, + struct keyring_index_key *index_key) +{ + struct key *persistent; + key_ref_t reg_ref, persistent_ref; + + if (!ns->persistent_keyring_register) { + long err = key_create_persistent_register(ns); + if (err < 0) + return ERR_PTR(err); + } else { + reg_ref = make_key_ref(ns->persistent_keyring_register, true); + persistent_ref = find_key_to_update(reg_ref, index_key); + if (persistent_ref) + return persistent_ref; + } + + persistent = keyring_alloc(index_key->description, + uid, INVALID_GID, current_cred(), + ((KEY_POS_ALL & ~KEY_POS_SETATTR) | + KEY_USR_VIEW | KEY_USR_READ), + KEY_ALLOC_NOT_IN_QUOTA, + ns->persistent_keyring_register); + if (IS_ERR(persistent)) + return ERR_CAST(persistent); + + return make_key_ref(persistent, true); +} + +/* + * Get the persistent keyring for a specific UID and link it to the nominated + * keyring. + */ +static long key_get_persistent(struct user_namespace *ns, kuid_t uid, + key_ref_t dest_ref) +{ + struct keyring_index_key index_key; + struct key *persistent; + key_ref_t reg_ref, persistent_ref; + char buf[32]; + long ret; + + /* Look in the register if it exists */ + index_key.type = &key_type_keyring; + index_key.description = buf; + index_key.desc_len = sprintf(buf, "_persistent.%u", from_kuid(ns, uid)); + + if (ns->persistent_keyring_register) { + reg_ref = make_key_ref(ns->persistent_keyring_register, true); + down_read(&ns->persistent_keyring_register_sem); + persistent_ref = find_key_to_update(reg_ref, &index_key); + up_read(&ns->persistent_keyring_register_sem); + + if (persistent_ref) + goto found; + } + + /* It wasn't in the register, so we'll need to create it. We might + * also need to create the register. + */ + down_write(&ns->persistent_keyring_register_sem); + persistent_ref = key_create_persistent(ns, uid, &index_key); + up_write(&ns->persistent_keyring_register_sem); + if (!IS_ERR(persistent_ref)) + goto found; + + return PTR_ERR(persistent_ref); + +found: + ret = key_task_permission(persistent_ref, current_cred(), KEY_LINK); + if (ret == 0) { + persistent = key_ref_to_ptr(persistent_ref); + ret = key_link(key_ref_to_ptr(dest_ref), persistent); + if (ret == 0) { + key_set_timeout(persistent, persistent_keyring_expiry); + ret = persistent->serial; + } + } + + key_ref_put(persistent_ref); + return ret; +} + +/* + * Get the persistent keyring for a specific UID and link it to the nominated + * keyring. + */ +long keyctl_get_persistent(uid_t _uid, key_serial_t destid) +{ + struct user_namespace *ns = current_user_ns(); + key_ref_t dest_ref; + kuid_t uid; + long ret; + + /* -1 indicates the current user */ + if (_uid == (uid_t)-1) { + uid = current_uid(); + } else { + uid = make_kuid(ns, _uid); + if (!uid_valid(uid)) + return -EINVAL; + + /* You can only see your own persistent cache if you're not + * sufficiently privileged. + */ + if (!uid_eq(uid, current_uid()) && + !uid_eq(uid, current_euid()) && + !ns_capable(ns, CAP_SETUID)) + return -EPERM; + } + + /* There must be a destination keyring */ + dest_ref = lookup_user_key(destid, KEY_LOOKUP_CREATE, KEY_WRITE); + if (IS_ERR(dest_ref)) + return PTR_ERR(dest_ref); + if (key_ref_to_ptr(dest_ref)->type != &key_type_keyring) { + ret = -ENOTDIR; + goto out_put_dest; + } + + ret = key_get_persistent(ns, uid, dest_ref); + +out_put_dest: + key_ref_put(dest_ref); + return ret; +} diff --git a/security/keys/proc.c b/security/keys/proc.c index 217b685..88e9a46 100644 --- a/security/keys/proc.c +++ b/security/keys/proc.c @@ -182,7 +182,6 @@ static void proc_keys_stop(struct seq_file *p, void *v) static int proc_keys_show(struct seq_file *m, void *v) { - const struct cred *cred = current_cred(); struct rb_node *_p = v; struct key *key = rb_entry(_p, struct key, serial_node); struct timespec now; @@ -191,15 +190,23 @@ static int proc_keys_show(struct seq_file *m, void *v) char xbuf[12]; int rc; + struct keyring_search_context ctx = { + .index_key.type = key->type, + .index_key.description = key->description, + .cred = current_cred(), + .match = lookup_user_key_possessed, + .match_data = key, + .flags = (KEYRING_SEARCH_NO_STATE_CHECK | + KEYRING_SEARCH_LOOKUP_DIRECT), + }; + key_ref = make_key_ref(key, 0); /* determine if the key is possessed by this process (a test we can * skip if the key does not indicate the possessor can view it */ if (key->perm & KEY_POS_VIEW) { - skey_ref = search_my_process_keyrings(key->type, key, - lookup_user_key_possessed, - true, cred); + skey_ref = search_my_process_keyrings(&ctx); if (!IS_ERR(skey_ref)) { key_ref_put(skey_ref); key_ref = make_key_ref(key, 1); @@ -211,7 +218,7 @@ static int proc_keys_show(struct seq_file *m, void *v) * - the caller holds a spinlock, and thus the RCU read lock, making our * access to __current_cred() safe */ - rc = key_task_permission(key_ref, cred, KEY_VIEW); + rc = key_task_permission(key_ref, ctx.cred, KEY_VIEW); if (rc < 0) return 0; diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index 42defae..0cf8a13 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -235,7 +235,7 @@ int install_session_keyring_to_cred(struct cred *cred, struct key *keyring) if (IS_ERR(keyring)) return PTR_ERR(keyring); } else { - atomic_inc(&keyring->usage); + __key_get(keyring); } /* install the keyring */ @@ -319,11 +319,7 @@ void key_fsgid_changed(struct task_struct *tsk) * In the case of a successful return, the possession attribute is set on the * returned key reference. */ -key_ref_t search_my_process_keyrings(struct key_type *type, - const void *description, - key_match_func_t match, - bool no_state_check, - const struct cred *cred) +key_ref_t search_my_process_keyrings(struct keyring_search_context *ctx) { key_ref_t key_ref, ret, err; @@ -339,10 +335,9 @@ key_ref_t search_my_process_keyrings(struct key_type *type, err = ERR_PTR(-EAGAIN); /* search the thread keyring first */ - if (cred->thread_keyring) { + if (ctx->cred->thread_keyring) { key_ref = keyring_search_aux( - make_key_ref(cred->thread_keyring, 1), - cred, type, description, match, no_state_check); + make_key_ref(ctx->cred->thread_keyring, 1), ctx); if (!IS_ERR(key_ref)) goto found; @@ -358,10 +353,9 @@ key_ref_t search_my_process_keyrings(struct key_type *type, } /* search the process keyring second */ - if (cred->process_keyring) { + if (ctx->cred->process_keyring) { key_ref = keyring_search_aux( - make_key_ref(cred->process_keyring, 1), - cred, type, description, match, no_state_check); + make_key_ref(ctx->cred->process_keyring, 1), ctx); if (!IS_ERR(key_ref)) goto found; @@ -379,11 +373,11 @@ key_ref_t search_my_process_keyrings(struct key_type *type, } /* search the session keyring */ - if (cred->session_keyring) { + if (ctx->cred->session_keyring) { rcu_read_lock(); key_ref = keyring_search_aux( - make_key_ref(rcu_dereference(cred->session_keyring), 1), - cred, type, description, match, no_state_check); + make_key_ref(rcu_dereference(ctx->cred->session_keyring), 1), + ctx); rcu_read_unlock(); if (!IS_ERR(key_ref)) @@ -402,10 +396,10 @@ key_ref_t search_my_process_keyrings(struct key_type *type, } } /* or search the user-session keyring */ - else if (cred->user->session_keyring) { + else if (ctx->cred->user->session_keyring) { key_ref = keyring_search_aux( - make_key_ref(cred->user->session_keyring, 1), - cred, type, description, match, no_state_check); + make_key_ref(ctx->cred->user->session_keyring, 1), + ctx); if (!IS_ERR(key_ref)) goto found; @@ -437,18 +431,14 @@ found: * * Return same as search_my_process_keyrings(). */ -key_ref_t search_process_keyrings(struct key_type *type, - const void *description, - key_match_func_t match, - const struct cred *cred) +key_ref_t search_process_keyrings(struct keyring_search_context *ctx) { struct request_key_auth *rka; key_ref_t key_ref, ret = ERR_PTR(-EACCES), err; might_sleep(); - key_ref = search_my_process_keyrings(type, description, match, - false, cred); + key_ref = search_my_process_keyrings(ctx); if (!IS_ERR(key_ref)) goto found; err = key_ref; @@ -457,18 +447,21 @@ key_ref_t search_process_keyrings(struct key_type *type, * search the keyrings of the process mentioned there * - we don't permit access to request_key auth keys via this method */ - if (cred->request_key_auth && - cred == current_cred() && - type != &key_type_request_key_auth + if (ctx->cred->request_key_auth && + ctx->cred == current_cred() && + ctx->index_key.type != &key_type_request_key_auth ) { + const struct cred *cred = ctx->cred; + /* defend against the auth key being revoked */ down_read(&cred->request_key_auth->sem); - if (key_validate(cred->request_key_auth) == 0) { - rka = cred->request_key_auth->payload.data; + if (key_validate(ctx->cred->request_key_auth) == 0) { + rka = ctx->cred->request_key_auth->payload.data; - key_ref = search_process_keyrings(type, description, - match, rka->cred); + ctx->cred = rka->cred; + key_ref = search_process_keyrings(ctx); + ctx->cred = cred; up_read(&cred->request_key_auth->sem); @@ -522,19 +515,23 @@ int lookup_user_key_possessed(const struct key *key, const void *target) key_ref_t lookup_user_key(key_serial_t id, unsigned long lflags, key_perm_t perm) { + struct keyring_search_context ctx = { + .match = lookup_user_key_possessed, + .flags = (KEYRING_SEARCH_NO_STATE_CHECK | + KEYRING_SEARCH_LOOKUP_DIRECT), + }; struct request_key_auth *rka; - const struct cred *cred; struct key *key; key_ref_t key_ref, skey_ref; int ret; try_again: - cred = get_current_cred(); + ctx.cred = get_current_cred(); key_ref = ERR_PTR(-ENOKEY); switch (id) { case KEY_SPEC_THREAD_KEYRING: - if (!cred->thread_keyring) { + if (!ctx.cred->thread_keyring) { if (!(lflags & KEY_LOOKUP_CREATE)) goto error; @@ -546,13 +543,13 @@ try_again: goto reget_creds; } - key = cred->thread_keyring; - atomic_inc(&key->usage); + key = ctx.cred->thread_keyring; + __key_get(key); key_ref = make_key_ref(key, 1); break; case KEY_SPEC_PROCESS_KEYRING: - if (!cred->process_keyring) { + if (!ctx.cred->process_keyring) { if (!(lflags & KEY_LOOKUP_CREATE)) goto error; @@ -564,13 +561,13 @@ try_again: goto reget_creds; } - key = cred->process_keyring; - atomic_inc(&key->usage); + key = ctx.cred->process_keyring; + __key_get(key); key_ref = make_key_ref(key, 1); break; case KEY_SPEC_SESSION_KEYRING: - if (!cred->session_keyring) { + if (!ctx.cred->session_keyring) { /* always install a session keyring upon access if one * doesn't exist yet */ ret = install_user_keyrings(); @@ -580,13 +577,13 @@ try_again: ret = join_session_keyring(NULL); else ret = install_session_keyring( - cred->user->session_keyring); + ctx.cred->user->session_keyring); if (ret < 0) goto error; goto reget_creds; - } else if (cred->session_keyring == - cred->user->session_keyring && + } else if (ctx.cred->session_keyring == + ctx.cred->user->session_keyring && lflags & KEY_LOOKUP_CREATE) { ret = join_session_keyring(NULL); if (ret < 0) @@ -595,33 +592,33 @@ try_again: } rcu_read_lock(); - key = rcu_dereference(cred->session_keyring); - atomic_inc(&key->usage); + key = rcu_dereference(ctx.cred->session_keyring); + __key_get(key); rcu_read_unlock(); key_ref = make_key_ref(key, 1); break; case KEY_SPEC_USER_KEYRING: - if (!cred->user->uid_keyring) { + if (!ctx.cred->user->uid_keyring) { ret = install_user_keyrings(); if (ret < 0) goto error; } - key = cred->user->uid_keyring; - atomic_inc(&key->usage); + key = ctx.cred->user->uid_keyring; + __key_get(key); key_ref = make_key_ref(key, 1); break; case KEY_SPEC_USER_SESSION_KEYRING: - if (!cred->user->session_keyring) { + if (!ctx.cred->user->session_keyring) { ret = install_user_keyrings(); if (ret < 0) goto error; } - key = cred->user->session_keyring; - atomic_inc(&key->usage); + key = ctx.cred->user->session_keyring; + __key_get(key); key_ref = make_key_ref(key, 1); break; @@ -631,29 +628,29 @@ try_again: goto error; case KEY_SPEC_REQKEY_AUTH_KEY: - key = cred->request_key_auth; + key = ctx.cred->request_key_auth; if (!key) goto error; - atomic_inc(&key->usage); + __key_get(key); key_ref = make_key_ref(key, 1); break; case KEY_SPEC_REQUESTOR_KEYRING: - if (!cred->request_key_auth) + if (!ctx.cred->request_key_auth) goto error; - down_read(&cred->request_key_auth->sem); + down_read(&ctx.cred->request_key_auth->sem); if (test_bit(KEY_FLAG_REVOKED, - &cred->request_key_auth->flags)) { + &ctx.cred->request_key_auth->flags)) { key_ref = ERR_PTR(-EKEYREVOKED); key = NULL; } else { - rka = cred->request_key_auth->payload.data; + rka = ctx.cred->request_key_auth->payload.data; key = rka->dest_keyring; - atomic_inc(&key->usage); + __key_get(key); } - up_read(&cred->request_key_auth->sem); + up_read(&ctx.cred->request_key_auth->sem); if (!key) goto error; key_ref = make_key_ref(key, 1); @@ -673,9 +670,13 @@ try_again: key_ref = make_key_ref(key, 0); /* check to see if we possess the key */ - skey_ref = search_process_keyrings(key->type, key, - lookup_user_key_possessed, - cred); + ctx.index_key.type = key->type; + ctx.index_key.description = key->description; + ctx.index_key.desc_len = strlen(key->description); + ctx.match_data = key; + kdebug("check possessed"); + skey_ref = search_process_keyrings(&ctx); + kdebug("possessed=%p", skey_ref); if (!IS_ERR(skey_ref)) { key_put(key); @@ -715,14 +716,14 @@ try_again: goto invalid_key; /* check the permissions */ - ret = key_task_permission(key_ref, cred, perm); + ret = key_task_permission(key_ref, ctx.cred, perm); if (ret < 0) goto invalid_key; key->last_used_at = current_kernel_time().tv_sec; error: - put_cred(cred); + put_cred(ctx.cred); return key_ref; invalid_key: @@ -733,7 +734,7 @@ invalid_key: /* if we attempted to install a keyring, then it may have caused new * creds to be installed */ reget_creds: - put_cred(cred); + put_cred(ctx.cred); goto try_again; } @@ -856,3 +857,13 @@ void key_change_session_keyring(struct callback_head *twork) commit_creds(new); } + +/* + * Make sure that root's user and user-session keyrings exist. + */ +static int __init init_root_keyring(void) +{ + return install_user_keyrings(); +} + +late_initcall(init_root_keyring); diff --git a/security/keys/request_key.c b/security/keys/request_key.c index c411f9b..3814119 100644 --- a/security/keys/request_key.c +++ b/security/keys/request_key.c @@ -345,33 +345,34 @@ static void construct_get_dest_keyring(struct key **_dest_keyring) * May return a key that's already under construction instead if there was a * race between two thread calling request_key(). */ -static int construct_alloc_key(struct key_type *type, - const char *description, +static int construct_alloc_key(struct keyring_search_context *ctx, struct key *dest_keyring, unsigned long flags, struct key_user *user, struct key **_key) { - const struct cred *cred = current_cred(); - unsigned long prealloc; + struct assoc_array_edit *edit; struct key *key; key_perm_t perm; key_ref_t key_ref; int ret; - kenter("%s,%s,,,", type->name, description); + kenter("%s,%s,,,", + ctx->index_key.type->name, ctx->index_key.description); *_key = NULL; mutex_lock(&user->cons_lock); perm = KEY_POS_VIEW | KEY_POS_SEARCH | KEY_POS_LINK | KEY_POS_SETATTR; perm |= KEY_USR_VIEW; - if (type->read) + if (ctx->index_key.type->read) perm |= KEY_POS_READ; - if (type == &key_type_keyring || type->update) + if (ctx->index_key.type == &key_type_keyring || + ctx->index_key.type->update) perm |= KEY_POS_WRITE; - key = key_alloc(type, description, cred->fsuid, cred->fsgid, cred, + key = key_alloc(ctx->index_key.type, ctx->index_key.description, + ctx->cred->fsuid, ctx->cred->fsgid, ctx->cred, perm, flags); if (IS_ERR(key)) goto alloc_failed; @@ -379,8 +380,7 @@ static int construct_alloc_key(struct key_type *type, set_bit(KEY_FLAG_USER_CONSTRUCT, &key->flags); if (dest_keyring) { - ret = __key_link_begin(dest_keyring, type, description, - &prealloc); + ret = __key_link_begin(dest_keyring, &ctx->index_key, &edit); if (ret < 0) goto link_prealloc_failed; } @@ -390,16 +390,16 @@ static int construct_alloc_key(struct key_type *type, * waited for locks */ mutex_lock(&key_construction_mutex); - key_ref = search_process_keyrings(type, description, type->match, cred); + key_ref = search_process_keyrings(ctx); if (!IS_ERR(key_ref)) goto key_already_present; if (dest_keyring) - __key_link(dest_keyring, key, &prealloc); + __key_link(key, &edit); mutex_unlock(&key_construction_mutex); if (dest_keyring) - __key_link_end(dest_keyring, type, prealloc); + __key_link_end(dest_keyring, &ctx->index_key, edit); mutex_unlock(&user->cons_lock); *_key = key; kleave(" = 0 [%d]", key_serial(key)); @@ -414,8 +414,8 @@ key_already_present: if (dest_keyring) { ret = __key_link_check_live_key(dest_keyring, key); if (ret == 0) - __key_link(dest_keyring, key, &prealloc); - __key_link_end(dest_keyring, type, prealloc); + __key_link(key, &edit); + __key_link_end(dest_keyring, &ctx->index_key, edit); if (ret < 0) goto link_check_failed; } @@ -444,8 +444,7 @@ alloc_failed: /* * Commence key construction. */ -static struct key *construct_key_and_link(struct key_type *type, - const char *description, +static struct key *construct_key_and_link(struct keyring_search_context *ctx, const char *callout_info, size_t callout_len, void *aux, @@ -464,8 +463,7 @@ static struct key *construct_key_and_link(struct key_type *type, construct_get_dest_keyring(&dest_keyring); - ret = construct_alloc_key(type, description, dest_keyring, flags, user, - &key); + ret = construct_alloc_key(ctx, dest_keyring, flags, user, &key); key_user_put(user); if (ret == 0) { @@ -529,17 +527,24 @@ struct key *request_key_and_link(struct key_type *type, struct key *dest_keyring, unsigned long flags) { - const struct cred *cred = current_cred(); + struct keyring_search_context ctx = { + .index_key.type = type, + .index_key.description = description, + .cred = current_cred(), + .match = type->match, + .match_data = description, + .flags = KEYRING_SEARCH_LOOKUP_DIRECT, + }; struct key *key; key_ref_t key_ref; int ret; kenter("%s,%s,%p,%zu,%p,%p,%lx", - type->name, description, callout_info, callout_len, aux, - dest_keyring, flags); + ctx.index_key.type->name, ctx.index_key.description, + callout_info, callout_len, aux, dest_keyring, flags); /* search all the process keyrings for a key */ - key_ref = search_process_keyrings(type, description, type->match, cred); + key_ref = search_process_keyrings(&ctx); if (!IS_ERR(key_ref)) { key = key_ref_to_ptr(key_ref); @@ -562,9 +567,8 @@ struct key *request_key_and_link(struct key_type *type, if (!callout_info) goto error; - key = construct_key_and_link(type, description, callout_info, - callout_len, aux, dest_keyring, - flags); + key = construct_key_and_link(&ctx, callout_info, callout_len, + aux, dest_keyring, flags); } error: @@ -592,8 +596,10 @@ int wait_for_key_construction(struct key *key, bool intr) intr ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE); if (ret < 0) return ret; - if (test_bit(KEY_FLAG_NEGATIVE, &key->flags)) + if (test_bit(KEY_FLAG_NEGATIVE, &key->flags)) { + smp_rmb(); return key->type_data.reject_error; + } return key_validate(key); } EXPORT_SYMBOL(wait_for_key_construction); diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c index 85730d5..7495a93 100644 --- a/security/keys/request_key_auth.c +++ b/security/keys/request_key_auth.c @@ -18,6 +18,7 @@ #include <linux/slab.h> #include <asm/uaccess.h> #include "internal.h" +#include <keys/user-type.h> static int request_key_auth_instantiate(struct key *, struct key_preparsed_payload *); @@ -222,32 +223,26 @@ error_alloc: } /* - * See if an authorisation key is associated with a particular key. - */ -static int key_get_instantiation_authkey_match(const struct key *key, - const void *_id) -{ - struct request_key_auth *rka = key->payload.data; - key_serial_t id = (key_serial_t)(unsigned long) _id; - - return rka->target_key->serial == id; -} - -/* * Search the current process's keyrings for the authorisation key for * instantiation of a key. */ struct key *key_get_instantiation_authkey(key_serial_t target_id) { - const struct cred *cred = current_cred(); + char description[16]; + struct keyring_search_context ctx = { + .index_key.type = &key_type_request_key_auth, + .index_key.description = description, + .cred = current_cred(), + .match = user_match, + .match_data = description, + .flags = KEYRING_SEARCH_LOOKUP_DIRECT, + }; struct key *authkey; key_ref_t authkey_ref; - authkey_ref = search_process_keyrings( - &key_type_request_key_auth, - (void *) (unsigned long) target_id, - key_get_instantiation_authkey_match, - cred); + sprintf(description, "%x", target_id); + + authkey_ref = search_process_keyrings(&ctx); if (IS_ERR(authkey_ref)) { authkey = ERR_CAST(authkey_ref); diff --git a/security/keys/sysctl.c b/security/keys/sysctl.c index ee32d18..8c0af08 100644 --- a/security/keys/sysctl.c +++ b/security/keys/sysctl.c @@ -61,5 +61,16 @@ ctl_table key_sysctls[] = { .extra1 = (void *) &zero, .extra2 = (void *) &max, }, +#ifdef CONFIG_PERSISTENT_KEYRINGS + { + .procname = "persistent_keyring_expiry", + .data = &persistent_keyring_expiry, + .maxlen = sizeof(unsigned), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = (void *) &zero, + .extra2 = (void *) &max, + }, +#endif { } }; diff --git a/security/keys/user_defined.c b/security/keys/user_defined.c index 55dc889..faa2cae 100644 --- a/security/keys/user_defined.c +++ b/security/keys/user_defined.c @@ -25,14 +25,15 @@ static int logon_vet_description(const char *desc); * arbitrary blob of data as the payload */ struct key_type key_type_user = { - .name = "user", - .instantiate = user_instantiate, - .update = user_update, - .match = user_match, - .revoke = user_revoke, - .destroy = user_destroy, - .describe = user_describe, - .read = user_read, + .name = "user", + .def_lookup_type = KEYRING_SEARCH_LOOKUP_DIRECT, + .instantiate = user_instantiate, + .update = user_update, + .match = user_match, + .revoke = user_revoke, + .destroy = user_destroy, + .describe = user_describe, + .read = user_read, }; EXPORT_SYMBOL_GPL(key_type_user); @@ -45,6 +46,7 @@ EXPORT_SYMBOL_GPL(key_type_user); */ struct key_type key_type_logon = { .name = "logon", + .def_lookup_type = KEYRING_SEARCH_LOOKUP_DIRECT, .instantiate = user_instantiate, .update = user_update, .match = user_match, diff --git a/security/lsm_audit.c b/security/lsm_audit.c index 234bc2a..9a62045 100644 --- a/security/lsm_audit.c +++ b/security/lsm_audit.c @@ -397,7 +397,8 @@ void common_lsm_audit(struct common_audit_data *a, if (a == NULL) return; /* we use GFP_ATOMIC so we won't sleep */ - ab = audit_log_start(current->audit_context, GFP_ATOMIC, AUDIT_AVC); + ab = audit_log_start(current->audit_context, GFP_ATOMIC | __GFP_NOWARN, + AUDIT_AVC); if (ab == NULL) return; diff --git a/security/security.c b/security/security.c index 4dc31f4..15b6928 100644 --- a/security/security.c +++ b/security/security.c @@ -1340,22 +1340,17 @@ int security_xfrm_policy_delete(struct xfrm_sec_ctx *ctx) return security_ops->xfrm_policy_delete_security(ctx); } -int security_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *sec_ctx) +int security_xfrm_state_alloc(struct xfrm_state *x, + struct xfrm_user_sec_ctx *sec_ctx) { - return security_ops->xfrm_state_alloc_security(x, sec_ctx, 0); + return security_ops->xfrm_state_alloc(x, sec_ctx); } EXPORT_SYMBOL(security_xfrm_state_alloc); int security_xfrm_state_alloc_acquire(struct xfrm_state *x, struct xfrm_sec_ctx *polsec, u32 secid) { - if (!polsec) - return 0; - /* - * We want the context to be taken from secid which is usually - * from the sock. - */ - return security_ops->xfrm_state_alloc_security(x, NULL, secid); + return security_ops->xfrm_state_alloc_acquire(x, polsec, secid); } int security_xfrm_state_delete(struct xfrm_state *x) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index c540795..794c3ca 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -95,7 +95,9 @@ #include "audit.h" #include "avc_ss.h" -#define NUM_SEL_MNT_OPTS 5 +#define SB_TYPE_FMT "%s%s%s" +#define SB_SUBTYPE(sb) (sb->s_subtype && sb->s_subtype[0]) +#define SB_TYPE_ARGS(sb) sb->s_type->name, SB_SUBTYPE(sb) ? "." : "", SB_SUBTYPE(sb) ? sb->s_subtype : "" extern struct security_operations *security_ops; @@ -139,12 +141,28 @@ static struct kmem_cache *sel_inode_cache; * This function checks the SECMARK reference counter to see if any SECMARK * targets are currently configured, if the reference counter is greater than * zero SECMARK is considered to be enabled. Returns true (1) if SECMARK is - * enabled, false (0) if SECMARK is disabled. + * enabled, false (0) if SECMARK is disabled. If the always_check_network + * policy capability is enabled, SECMARK is always considered enabled. * */ static int selinux_secmark_enabled(void) { - return (atomic_read(&selinux_secmark_refcount) > 0); + return (selinux_policycap_alwaysnetwork || atomic_read(&selinux_secmark_refcount)); +} + +/** + * selinux_peerlbl_enabled - Check to see if peer labeling is currently enabled + * + * Description: + * This function checks if NetLabel or labeled IPSEC is enabled. Returns true + * (1) if any are enabled or false (0) if neither are enabled. If the + * always_check_network policy capability is enabled, peer labeling + * is always considered enabled. + * + */ +static int selinux_peerlbl_enabled(void) +{ + return (selinux_policycap_alwaysnetwork || netlbl_enabled() || selinux_xfrm_enabled()); } /* @@ -309,8 +327,11 @@ enum { Opt_defcontext = 3, Opt_rootcontext = 4, Opt_labelsupport = 5, + Opt_nextmntopt = 6, }; +#define NUM_SEL_MNT_OPTS (Opt_nextmntopt - 1) + static const match_table_t tokens = { {Opt_context, CONTEXT_STR "%s"}, {Opt_fscontext, FSCONTEXT_STR "%s"}, @@ -355,6 +376,29 @@ static int may_context_mount_inode_relabel(u32 sid, return rc; } +static int selinux_is_sblabel_mnt(struct super_block *sb) +{ + struct superblock_security_struct *sbsec = sb->s_security; + + if (sbsec->behavior == SECURITY_FS_USE_XATTR || + sbsec->behavior == SECURITY_FS_USE_TRANS || + sbsec->behavior == SECURITY_FS_USE_TASK) + return 1; + + /* Special handling for sysfs. Is genfs but also has setxattr handler*/ + if (strncmp(sb->s_type->name, "sysfs", sizeof("sysfs")) == 0) + return 1; + + /* + * Special handling for rootfs. Is genfs but supports + * setting SELinux context on in-core inodes. + */ + if (strncmp(sb->s_type->name, "rootfs", sizeof("rootfs")) == 0) + return 1; + + return 0; +} + static int sb_finish_set_opts(struct super_block *sb) { struct superblock_security_struct *sbsec = sb->s_security; @@ -369,8 +413,8 @@ static int sb_finish_set_opts(struct super_block *sb) the first boot of the SELinux kernel before we have assigned xattr values to the filesystem. */ if (!root_inode->i_op->getxattr) { - printk(KERN_WARNING "SELinux: (dev %s, type %s) has no " - "xattr support\n", sb->s_id, sb->s_type->name); + printk(KERN_WARNING "SELinux: (dev %s, type "SB_TYPE_FMT") has no " + "xattr support\n", sb->s_id, SB_TYPE_ARGS(sb)); rc = -EOPNOTSUPP; goto out; } @@ -378,35 +422,27 @@ static int sb_finish_set_opts(struct super_block *sb) if (rc < 0 && rc != -ENODATA) { if (rc == -EOPNOTSUPP) printk(KERN_WARNING "SELinux: (dev %s, type " - "%s) has no security xattr handler\n", - sb->s_id, sb->s_type->name); + SB_TYPE_FMT") has no security xattr handler\n", + sb->s_id, SB_TYPE_ARGS(sb)); else printk(KERN_WARNING "SELinux: (dev %s, type " - "%s) getxattr errno %d\n", sb->s_id, - sb->s_type->name, -rc); + SB_TYPE_FMT") getxattr errno %d\n", sb->s_id, + SB_TYPE_ARGS(sb), -rc); goto out; } } - sbsec->flags |= (SE_SBINITIALIZED | SE_SBLABELSUPP); - if (sbsec->behavior > ARRAY_SIZE(labeling_behaviors)) - printk(KERN_ERR "SELinux: initialized (dev %s, type %s), unknown behavior\n", - sb->s_id, sb->s_type->name); + printk(KERN_ERR "SELinux: initialized (dev %s, type "SB_TYPE_FMT"), unknown behavior\n", + sb->s_id, SB_TYPE_ARGS(sb)); else - printk(KERN_DEBUG "SELinux: initialized (dev %s, type %s), %s\n", - sb->s_id, sb->s_type->name, + printk(KERN_DEBUG "SELinux: initialized (dev %s, type "SB_TYPE_FMT"), %s\n", + sb->s_id, SB_TYPE_ARGS(sb), labeling_behaviors[sbsec->behavior-1]); - if (sbsec->behavior == SECURITY_FS_USE_GENFS || - sbsec->behavior == SECURITY_FS_USE_MNTPOINT || - sbsec->behavior == SECURITY_FS_USE_NONE || - sbsec->behavior > ARRAY_SIZE(labeling_behaviors)) - sbsec->flags &= ~SE_SBLABELSUPP; - - /* Special handling for sysfs. Is genfs but also has setxattr handler*/ - if (strncmp(sb->s_type->name, "sysfs", sizeof("sysfs")) == 0) - sbsec->flags |= SE_SBLABELSUPP; + sbsec->flags |= SE_SBINITIALIZED; + if (selinux_is_sblabel_mnt(sb)) + sbsec->flags |= SBLABEL_MNT; /* Initialize the root inode. */ rc = inode_doinit_with_dentry(root_inode, root); @@ -460,15 +496,18 @@ static int selinux_get_mnt_opts(const struct super_block *sb, if (!ss_initialized) return -EINVAL; + /* make sure we always check enough bits to cover the mask */ + BUILD_BUG_ON(SE_MNTMASK >= (1 << NUM_SEL_MNT_OPTS)); + tmp = sbsec->flags & SE_MNTMASK; /* count the number of mount options for this sb */ - for (i = 0; i < 8; i++) { + for (i = 0; i < NUM_SEL_MNT_OPTS; i++) { if (tmp & 0x01) opts->num_mnt_opts++; tmp >>= 1; } /* Check if the Label support flag is set */ - if (sbsec->flags & SE_SBLABELSUPP) + if (sbsec->flags & SBLABEL_MNT) opts->num_mnt_opts++; opts->mnt_opts = kcalloc(opts->num_mnt_opts, sizeof(char *), GFP_ATOMIC); @@ -515,9 +554,9 @@ static int selinux_get_mnt_opts(const struct super_block *sb, opts->mnt_opts[i] = context; opts->mnt_opts_flags[i++] = ROOTCONTEXT_MNT; } - if (sbsec->flags & SE_SBLABELSUPP) { + if (sbsec->flags & SBLABEL_MNT) { opts->mnt_opts[i] = NULL; - opts->mnt_opts_flags[i++] = SE_SBLABELSUPP; + opts->mnt_opts_flags[i++] = SBLABEL_MNT; } BUG_ON(i != opts->num_mnt_opts); @@ -561,7 +600,6 @@ static int selinux_set_mnt_opts(struct super_block *sb, const struct cred *cred = current_cred(); int rc = 0, i; struct superblock_security_struct *sbsec = sb->s_security; - const char *name = sb->s_type->name; struct inode *inode = sbsec->sb->s_root->d_inode; struct inode_security_struct *root_isec = inode->i_security; u32 fscontext_sid = 0, context_sid = 0, rootcontext_sid = 0; @@ -614,14 +652,14 @@ static int selinux_set_mnt_opts(struct super_block *sb, for (i = 0; i < num_opts; i++) { u32 sid; - if (flags[i] == SE_SBLABELSUPP) + if (flags[i] == SBLABEL_MNT) continue; rc = security_context_to_sid(mount_options[i], strlen(mount_options[i]), &sid); if (rc) { printk(KERN_WARNING "SELinux: security_context_to_sid" - "(%s) failed for (dev %s, type %s) errno=%d\n", - mount_options[i], sb->s_id, name, rc); + "(%s) failed for (dev %s, type "SB_TYPE_FMT") errno=%d\n", + mount_options[i], sb->s_id, SB_TYPE_ARGS(sb), rc); goto out; } switch (flags[i]) { @@ -685,9 +723,7 @@ static int selinux_set_mnt_opts(struct super_block *sb, * Determine the labeling behavior to use for this * filesystem type. */ - rc = security_fs_use((sbsec->flags & SE_SBPROC) ? - "proc" : sb->s_type->name, - &sbsec->behavior, &sbsec->sid); + rc = security_fs_use(sb); if (rc) { printk(KERN_WARNING "%s: security_fs_use(%s) returned %d\n", @@ -770,7 +806,8 @@ out: out_double_mount: rc = -EINVAL; printk(KERN_WARNING "SELinux: mount invalid. Same superblock, different " - "security settings for (dev %s, type %s)\n", sb->s_id, name); + "security settings for (dev %s, type "SB_TYPE_FMT")\n", sb->s_id, + SB_TYPE_ARGS(sb)); goto out; } @@ -1037,7 +1074,7 @@ static void selinux_write_opts(struct seq_file *m, case DEFCONTEXT_MNT: prefix = DEFCONTEXT_STR; break; - case SE_SBLABELSUPP: + case SBLABEL_MNT: seq_putc(m, ','); seq_puts(m, LABELSUPP_STR); continue; @@ -1649,7 +1686,7 @@ static int may_create(struct inode *dir, if (rc) return rc; - if (!newsid || !(sbsec->flags & SE_SBLABELSUPP)) { + if (!newsid || !(sbsec->flags & SBLABEL_MNT)) { rc = security_transition_sid(sid, dsec->sid, tclass, &dentry->d_name, &newsid); if (rc) @@ -2437,14 +2474,14 @@ static int selinux_sb_remount(struct super_block *sb, void *data) u32 sid; size_t len; - if (flags[i] == SE_SBLABELSUPP) + if (flags[i] == SBLABEL_MNT) continue; len = strlen(mount_options[i]); rc = security_context_to_sid(mount_options[i], len, &sid); if (rc) { printk(KERN_WARNING "SELinux: security_context_to_sid" - "(%s) failed for (dev %s, type %s) errno=%d\n", - mount_options[i], sb->s_id, sb->s_type->name, rc); + "(%s) failed for (dev %s, type "SB_TYPE_FMT") errno=%d\n", + mount_options[i], sb->s_id, SB_TYPE_ARGS(sb), rc); goto out_free_opts; } rc = -EINVAL; @@ -2482,8 +2519,8 @@ out_free_secdata: return rc; out_bad_option: printk(KERN_WARNING "SELinux: unable to change security options " - "during remount (dev %s, type=%s)\n", sb->s_id, - sb->s_type->name); + "during remount (dev %s, type "SB_TYPE_FMT")\n", sb->s_id, + SB_TYPE_ARGS(sb)); goto out_free_opts; } @@ -2606,7 +2643,7 @@ static int selinux_inode_init_security(struct inode *inode, struct inode *dir, if ((sbsec->flags & SE_SBINITIALIZED) && (sbsec->behavior == SECURITY_FS_USE_MNTPOINT)) newsid = sbsec->mntpoint_sid; - else if (!newsid || !(sbsec->flags & SE_SBLABELSUPP)) { + else if (!newsid || !(sbsec->flags & SBLABEL_MNT)) { rc = security_transition_sid(sid, dsec->sid, inode_mode_to_security_class(inode->i_mode), qstr, &newsid); @@ -2628,7 +2665,7 @@ static int selinux_inode_init_security(struct inode *inode, struct inode *dir, isec->initialized = 1; } - if (!ss_initialized || !(sbsec->flags & SE_SBLABELSUPP)) + if (!ss_initialized || !(sbsec->flags & SBLABEL_MNT)) return -EOPNOTSUPP; if (name) @@ -2830,7 +2867,7 @@ static int selinux_inode_setxattr(struct dentry *dentry, const char *name, return selinux_inode_setotherxattr(dentry, name); sbsec = inode->i_sb->s_security; - if (!(sbsec->flags & SE_SBLABELSUPP)) + if (!(sbsec->flags & SBLABEL_MNT)) return -EOPNOTSUPP; if (!inode_owner_or_capable(inode)) @@ -3791,8 +3828,12 @@ static int selinux_skb_peerlbl_sid(struct sk_buff *skb, u16 family, u32 *sid) u32 nlbl_sid; u32 nlbl_type; - selinux_skb_xfrm_sid(skb, &xfrm_sid); - selinux_netlbl_skbuff_getsid(skb, family, &nlbl_type, &nlbl_sid); + err = selinux_skb_xfrm_sid(skb, &xfrm_sid); + if (unlikely(err)) + return -EACCES; + err = selinux_netlbl_skbuff_getsid(skb, family, &nlbl_type, &nlbl_sid); + if (unlikely(err)) + return -EACCES; err = security_net_peersid_resolve(nlbl_sid, nlbl_type, xfrm_sid, sid); if (unlikely(err)) { @@ -4246,7 +4287,7 @@ static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) return selinux_sock_rcv_skb_compat(sk, skb, family); secmark_active = selinux_secmark_enabled(); - peerlbl_active = netlbl_enabled() || selinux_xfrm_enabled(); + peerlbl_active = selinux_peerlbl_enabled(); if (!secmark_active && !peerlbl_active) return 0; @@ -4628,7 +4669,7 @@ static unsigned int selinux_ip_forward(struct sk_buff *skb, int ifindex, secmark_active = selinux_secmark_enabled(); netlbl_active = netlbl_enabled(); - peerlbl_active = netlbl_active || selinux_xfrm_enabled(); + peerlbl_active = selinux_peerlbl_enabled(); if (!secmark_active && !peerlbl_active) return NF_ACCEPT; @@ -4780,7 +4821,7 @@ static unsigned int selinux_ip_postroute(struct sk_buff *skb, int ifindex, return NF_ACCEPT; #endif secmark_active = selinux_secmark_enabled(); - peerlbl_active = netlbl_enabled() || selinux_xfrm_enabled(); + peerlbl_active = selinux_peerlbl_enabled(); if (!secmark_active && !peerlbl_active) return NF_ACCEPT; @@ -5784,7 +5825,8 @@ static struct security_operations selinux_ops = { .xfrm_policy_clone_security = selinux_xfrm_policy_clone, .xfrm_policy_free_security = selinux_xfrm_policy_free, .xfrm_policy_delete_security = selinux_xfrm_policy_delete, - .xfrm_state_alloc_security = selinux_xfrm_state_alloc, + .xfrm_state_alloc = selinux_xfrm_state_alloc, + .xfrm_state_alloc_acquire = selinux_xfrm_state_alloc_acquire, .xfrm_state_free_security = selinux_xfrm_state_free, .xfrm_state_delete_security = selinux_xfrm_state_delete, .xfrm_policy_lookup = selinux_xfrm_policy_lookup, diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h index aa47bca..b1dfe10 100644 --- a/security/selinux/include/objsec.h +++ b/security/selinux/include/objsec.h @@ -58,8 +58,8 @@ struct superblock_security_struct { u32 sid; /* SID of file system superblock */ u32 def_sid; /* default SID for labeling */ u32 mntpoint_sid; /* SECURITY_FS_USE_MNTPOINT context for files */ - unsigned int behavior; /* labeling behavior */ - unsigned char flags; /* which mount options were specified */ + unsigned short behavior; /* labeling behavior */ + unsigned short flags; /* which mount options were specified */ struct mutex lock; struct list_head isec_head; spinlock_t isec_lock; diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h index 8fd8e18..fe341ae 100644 --- a/security/selinux/include/security.h +++ b/security/selinux/include/security.h @@ -45,14 +45,15 @@ /* Mask for just the mount related flags */ #define SE_MNTMASK 0x0f /* Super block security struct flags for mount options */ +/* BE CAREFUL, these need to be the low order bits for selinux_get_mnt_opts */ #define CONTEXT_MNT 0x01 #define FSCONTEXT_MNT 0x02 #define ROOTCONTEXT_MNT 0x04 #define DEFCONTEXT_MNT 0x08 +#define SBLABEL_MNT 0x10 /* Non-mount related flags */ -#define SE_SBINITIALIZED 0x10 -#define SE_SBPROC 0x20 -#define SE_SBLABELSUPP 0x40 +#define SE_SBINITIALIZED 0x0100 +#define SE_SBPROC 0x0200 #define CONTEXT_STR "context=" #define FSCONTEXT_STR "fscontext=" @@ -68,12 +69,15 @@ extern int selinux_enabled; enum { POLICYDB_CAPABILITY_NETPEER, POLICYDB_CAPABILITY_OPENPERM, + POLICYDB_CAPABILITY_REDHAT1, + POLICYDB_CAPABILITY_ALWAYSNETWORK, __POLICYDB_CAPABILITY_MAX }; #define POLICYDB_CAPABILITY_MAX (__POLICYDB_CAPABILITY_MAX - 1) extern int selinux_policycap_netpeer; extern int selinux_policycap_openperm; +extern int selinux_policycap_alwaysnetwork; /* * type_datum properties @@ -172,8 +176,7 @@ int security_get_allow_unknown(void); #define SECURITY_FS_USE_NATIVE 7 /* use native label support */ #define SECURITY_FS_USE_MAX 7 /* Highest SECURITY_FS_USE_XXX */ -int security_fs_use(const char *fstype, unsigned int *behavior, - u32 *sid); +int security_fs_use(struct super_block *sb); int security_genfs_sid(const char *fstype, char *name, u16 sclass, u32 *sid); diff --git a/security/selinux/include/xfrm.h b/security/selinux/include/xfrm.h index 6713f04..0dec76c 100644 --- a/security/selinux/include/xfrm.h +++ b/security/selinux/include/xfrm.h @@ -10,29 +10,21 @@ #include <net/flow.h> int selinux_xfrm_policy_alloc(struct xfrm_sec_ctx **ctxp, - struct xfrm_user_sec_ctx *sec_ctx); + struct xfrm_user_sec_ctx *uctx); int selinux_xfrm_policy_clone(struct xfrm_sec_ctx *old_ctx, struct xfrm_sec_ctx **new_ctxp); void selinux_xfrm_policy_free(struct xfrm_sec_ctx *ctx); int selinux_xfrm_policy_delete(struct xfrm_sec_ctx *ctx); int selinux_xfrm_state_alloc(struct xfrm_state *x, - struct xfrm_user_sec_ctx *sec_ctx, u32 secid); + struct xfrm_user_sec_ctx *uctx); +int selinux_xfrm_state_alloc_acquire(struct xfrm_state *x, + struct xfrm_sec_ctx *polsec, u32 secid); void selinux_xfrm_state_free(struct xfrm_state *x); int selinux_xfrm_state_delete(struct xfrm_state *x); int selinux_xfrm_policy_lookup(struct xfrm_sec_ctx *ctx, u32 fl_secid, u8 dir); int selinux_xfrm_state_pol_flow_match(struct xfrm_state *x, - struct xfrm_policy *xp, const struct flowi *fl); - -/* - * Extract the security blob from the sock (it's actually on the socket) - */ -static inline struct inode_security_struct *get_sock_isec(struct sock *sk) -{ - if (!sk->sk_socket) - return NULL; - - return SOCK_INODE(sk->sk_socket)->i_security; -} + struct xfrm_policy *xp, + const struct flowi *fl); #ifdef CONFIG_SECURITY_NETWORK_XFRM extern atomic_t selinux_xfrm_refcount; @@ -42,10 +34,10 @@ static inline int selinux_xfrm_enabled(void) return (atomic_read(&selinux_xfrm_refcount) > 0); } -int selinux_xfrm_sock_rcv_skb(u32 sid, struct sk_buff *skb, - struct common_audit_data *ad); -int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb, - struct common_audit_data *ad, u8 proto); +int selinux_xfrm_sock_rcv_skb(u32 sk_sid, struct sk_buff *skb, + struct common_audit_data *ad); +int selinux_xfrm_postroute_last(u32 sk_sid, struct sk_buff *skb, + struct common_audit_data *ad, u8 proto); int selinux_xfrm_decode_session(struct sk_buff *skb, u32 *sid, int ckall); static inline void selinux_xfrm_notify_policyload(void) @@ -64,19 +56,21 @@ static inline int selinux_xfrm_enabled(void) return 0; } -static inline int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb, - struct common_audit_data *ad) +static inline int selinux_xfrm_sock_rcv_skb(u32 sk_sid, struct sk_buff *skb, + struct common_audit_data *ad) { return 0; } -static inline int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb, - struct common_audit_data *ad, u8 proto) +static inline int selinux_xfrm_postroute_last(u32 sk_sid, struct sk_buff *skb, + struct common_audit_data *ad, + u8 proto) { return 0; } -static inline int selinux_xfrm_decode_session(struct sk_buff *skb, u32 *sid, int ckall) +static inline int selinux_xfrm_decode_session(struct sk_buff *skb, u32 *sid, + int ckall) { *sid = SECSID_NULL; return 0; @@ -87,10 +81,9 @@ static inline void selinux_xfrm_notify_policyload(void) } #endif -static inline void selinux_skb_xfrm_sid(struct sk_buff *skb, u32 *sid) +static inline int selinux_skb_xfrm_sid(struct sk_buff *skb, u32 *sid) { - int err = selinux_xfrm_decode_session(skb, sid, 0); - BUG_ON(err); + return selinux_xfrm_decode_session(skb, sid, 0); } #endif /* _SELINUX_XFRM_H_ */ diff --git a/security/selinux/netlabel.c b/security/selinux/netlabel.c index da4b8b2..6235d05 100644 --- a/security/selinux/netlabel.c +++ b/security/selinux/netlabel.c @@ -442,8 +442,7 @@ int selinux_netlbl_socket_connect(struct sock *sk, struct sockaddr *addr) sksec->nlbl_state != NLBL_CONNLABELED) return 0; - local_bh_disable(); - bh_lock_sock_nested(sk); + lock_sock(sk); /* connected sockets are allowed to disconnect when the address family * is set to AF_UNSPEC, if that is what is happening we want to reset @@ -464,7 +463,6 @@ int selinux_netlbl_socket_connect(struct sock *sk, struct sockaddr *addr) sksec->nlbl_state = NLBL_CONNLABELED; socket_connect_return: - bh_unlock_sock(sk); - local_bh_enable(); + release_sock(sk); return rc; } diff --git a/security/selinux/netnode.c b/security/selinux/netnode.c index c5454c0..03a72c3 100644 --- a/security/selinux/netnode.c +++ b/security/selinux/netnode.c @@ -166,6 +166,7 @@ static void sel_netnode_insert(struct sel_netnode *node) break; default: BUG(); + return; } /* we need to impose a limit on the growth of the hash table so check @@ -225,6 +226,7 @@ static int sel_netnode_sid_slow(void *addr, u16 family, u32 *sid) break; default: BUG(); + ret = -EINVAL; } if (ret != 0) goto out; diff --git a/security/selinux/nlmsgtab.c b/security/selinux/nlmsgtab.c index 855e464..332ac8a 100644 --- a/security/selinux/nlmsgtab.c +++ b/security/selinux/nlmsgtab.c @@ -116,6 +116,8 @@ static struct nlmsg_perm nlmsg_audit_perms[] = { AUDIT_MAKE_EQUIV, NETLINK_AUDIT_SOCKET__NLMSG_WRITE }, { AUDIT_TTY_GET, NETLINK_AUDIT_SOCKET__NLMSG_READ }, { AUDIT_TTY_SET, NETLINK_AUDIT_SOCKET__NLMSG_TTY_AUDIT }, + { AUDIT_GET_FEATURE, NETLINK_AUDIT_SOCKET__NLMSG_READ }, + { AUDIT_SET_FEATURE, NETLINK_AUDIT_SOCKET__NLMSG_WRITE }, }; diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index ff42773..5122aff 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -44,7 +44,9 @@ /* Policy capability filenames */ static char *policycap_names[] = { "network_peer_controls", - "open_perms" + "open_perms", + "redhat1", + "always_check_network" }; unsigned int selinux_checkreqprot = CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE; diff --git a/security/selinux/ss/ebitmap.c b/security/selinux/ss/ebitmap.c index 30f119b..820313a 100644 --- a/security/selinux/ss/ebitmap.c +++ b/security/selinux/ss/ebitmap.c @@ -213,7 +213,12 @@ netlbl_import_failure: } #endif /* CONFIG_NETLABEL */ -int ebitmap_contains(struct ebitmap *e1, struct ebitmap *e2) +/* + * Check to see if all the bits set in e2 are also set in e1. Optionally, + * if last_e2bit is non-zero, the highest set bit in e2 cannot exceed + * last_e2bit. + */ +int ebitmap_contains(struct ebitmap *e1, struct ebitmap *e2, u32 last_e2bit) { struct ebitmap_node *n1, *n2; int i; @@ -223,14 +228,25 @@ int ebitmap_contains(struct ebitmap *e1, struct ebitmap *e2) n1 = e1->node; n2 = e2->node; + while (n1 && n2 && (n1->startbit <= n2->startbit)) { if (n1->startbit < n2->startbit) { n1 = n1->next; continue; } - for (i = 0; i < EBITMAP_UNIT_NUMS; i++) { + for (i = EBITMAP_UNIT_NUMS - 1; (i >= 0) && !n2->maps[i]; ) + i--; /* Skip trailing NULL map entries */ + if (last_e2bit && (i >= 0)) { + u32 lastsetbit = n2->startbit + i * EBITMAP_UNIT_SIZE + + __fls(n2->maps[i]); + if (lastsetbit > last_e2bit) + return 0; + } + + while (i >= 0) { if ((n1->maps[i] & n2->maps[i]) != n2->maps[i]) return 0; + i--; } n1 = n1->next; diff --git a/security/selinux/ss/ebitmap.h b/security/selinux/ss/ebitmap.h index 922f8af..712c8a7 100644 --- a/security/selinux/ss/ebitmap.h +++ b/security/selinux/ss/ebitmap.h @@ -16,7 +16,13 @@ #include <net/netlabel.h> -#define EBITMAP_UNIT_NUMS ((32 - sizeof(void *) - sizeof(u32)) \ +#ifdef CONFIG_64BIT +#define EBITMAP_NODE_SIZE 64 +#else +#define EBITMAP_NODE_SIZE 32 +#endif + +#define EBITMAP_UNIT_NUMS ((EBITMAP_NODE_SIZE-sizeof(void *)-sizeof(u32))\ / sizeof(unsigned long)) #define EBITMAP_UNIT_SIZE BITS_PER_LONG #define EBITMAP_SIZE (EBITMAP_UNIT_NUMS * EBITMAP_UNIT_SIZE) @@ -117,7 +123,7 @@ static inline void ebitmap_node_clr_bit(struct ebitmap_node *n, int ebitmap_cmp(struct ebitmap *e1, struct ebitmap *e2); int ebitmap_cpy(struct ebitmap *dst, struct ebitmap *src); -int ebitmap_contains(struct ebitmap *e1, struct ebitmap *e2); +int ebitmap_contains(struct ebitmap *e1, struct ebitmap *e2, u32 last_e2bit); int ebitmap_get_bit(struct ebitmap *e, unsigned long bit); int ebitmap_set_bit(struct ebitmap *e, unsigned long bit, int value); void ebitmap_destroy(struct ebitmap *e); diff --git a/security/selinux/ss/mls.c b/security/selinux/ss/mls.c index 40de8d3..c85bc1e 100644 --- a/security/selinux/ss/mls.c +++ b/security/selinux/ss/mls.c @@ -160,8 +160,6 @@ void mls_sid_to_context(struct context *context, int mls_level_isvalid(struct policydb *p, struct mls_level *l) { struct level_datum *levdatum; - struct ebitmap_node *node; - int i; if (!l->sens || l->sens > p->p_levels.nprim) return 0; @@ -170,19 +168,13 @@ int mls_level_isvalid(struct policydb *p, struct mls_level *l) if (!levdatum) return 0; - ebitmap_for_each_positive_bit(&l->cat, node, i) { - if (i > p->p_cats.nprim) - return 0; - if (!ebitmap_get_bit(&levdatum->level->cat, i)) { - /* - * Category may not be associated with - * sensitivity. - */ - return 0; - } - } - - return 1; + /* + * Return 1 iff all the bits set in l->cat are also be set in + * levdatum->level->cat and no bit in l->cat is larger than + * p->p_cats.nprim. + */ + return ebitmap_contains(&levdatum->level->cat, &l->cat, + p->p_cats.nprim); } int mls_range_isvalid(struct policydb *p, struct mls_range *r) diff --git a/security/selinux/ss/mls_types.h b/security/selinux/ss/mls_types.h index 03bed52..e936487 100644 --- a/security/selinux/ss/mls_types.h +++ b/security/selinux/ss/mls_types.h @@ -35,7 +35,7 @@ static inline int mls_level_eq(struct mls_level *l1, struct mls_level *l2) static inline int mls_level_dom(struct mls_level *l1, struct mls_level *l2) { return ((l1->sens >= l2->sens) && - ebitmap_contains(&l1->cat, &l2->cat)); + ebitmap_contains(&l1->cat, &l2->cat, 0)); } #define mls_level_incomp(l1, l2) \ diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c index c8adde3..f6195eb 100644 --- a/security/selinux/ss/policydb.c +++ b/security/selinux/ss/policydb.c @@ -3203,9 +3203,8 @@ static int range_write_helper(void *key, void *data, void *ptr) static int range_write(struct policydb *p, void *fp) { - size_t nel; __le32 buf[1]; - int rc; + int rc, nel; struct policy_data pd; pd.p = p; diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index b4feecc..ee470a0 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -72,6 +72,7 @@ int selinux_policycap_netpeer; int selinux_policycap_openperm; +int selinux_policycap_alwaysnetwork; static DEFINE_RWLOCK(policy_rwlock); @@ -1812,6 +1813,8 @@ static void security_load_policycaps(void) POLICYDB_CAPABILITY_NETPEER); selinux_policycap_openperm = ebitmap_get_bit(&policydb.policycaps, POLICYDB_CAPABILITY_OPENPERM); + selinux_policycap_alwaysnetwork = ebitmap_get_bit(&policydb.policycaps, + POLICYDB_CAPABILITY_ALWAYSNETWORK); } static int security_preserve_bools(struct policydb *p); @@ -2323,43 +2326,74 @@ out: /** * security_fs_use - Determine how to handle labeling for a filesystem. - * @fstype: filesystem type - * @behavior: labeling behavior - * @sid: SID for filesystem (superblock) + * @sb: superblock in question */ -int security_fs_use( - const char *fstype, - unsigned int *behavior, - u32 *sid) +int security_fs_use(struct super_block *sb) { int rc = 0; struct ocontext *c; + struct superblock_security_struct *sbsec = sb->s_security; + const char *fstype = sb->s_type->name; + const char *subtype = (sb->s_subtype && sb->s_subtype[0]) ? sb->s_subtype : NULL; + struct ocontext *base = NULL; read_lock(&policy_rwlock); - c = policydb.ocontexts[OCON_FSUSE]; - while (c) { - if (strcmp(fstype, c->u.name) == 0) + for (c = policydb.ocontexts[OCON_FSUSE]; c; c = c->next) { + char *sub; + int baselen; + + baselen = strlen(fstype); + + /* if base does not match, this is not the one */ + if (strncmp(fstype, c->u.name, baselen)) + continue; + + /* if there is no subtype, this is the one! */ + if (!subtype) + break; + + /* skip past the base in this entry */ + sub = c->u.name + baselen; + + /* entry is only a base. save it. keep looking for subtype */ + if (sub[0] == '\0') { + base = c; + continue; + } + + /* entry is not followed by a subtype, so it is not a match */ + if (sub[0] != '.') + continue; + + /* whew, we found a subtype of this fstype */ + sub++; /* move past '.' */ + + /* exact match of fstype AND subtype */ + if (!strcmp(subtype, sub)) break; - c = c->next; } + /* in case we had found an fstype match but no subtype match */ + if (!c) + c = base; + if (c) { - *behavior = c->v.behavior; + sbsec->behavior = c->v.behavior; if (!c->sid[0]) { rc = sidtab_context_to_sid(&sidtab, &c->context[0], &c->sid[0]); if (rc) goto out; } - *sid = c->sid[0]; + sbsec->sid = c->sid[0]; } else { - rc = security_genfs_sid(fstype, "/", SECCLASS_DIR, sid); + rc = security_genfs_sid(fstype, "/", SECCLASS_DIR, &sbsec->sid); if (rc) { - *behavior = SECURITY_FS_USE_NONE; + sbsec->behavior = SECURITY_FS_USE_NONE; rc = 0; } else { - *behavior = SECURITY_FS_USE_GENFS; + sbsec->behavior = SECURITY_FS_USE_GENFS; } } diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c index d030818..a91d205 100644 --- a/security/selinux/xfrm.c +++ b/security/selinux/xfrm.c @@ -56,7 +56,7 @@ atomic_t selinux_xfrm_refcount = ATOMIC_INIT(0); /* - * Returns true if an LSM/SELinux context + * Returns true if the context is an LSM/SELinux context. */ static inline int selinux_authorizable_ctx(struct xfrm_sec_ctx *ctx) { @@ -66,7 +66,7 @@ static inline int selinux_authorizable_ctx(struct xfrm_sec_ctx *ctx) } /* - * Returns true if the xfrm contains a security blob for SELinux + * Returns true if the xfrm contains a security blob for SELinux. */ static inline int selinux_authorizable_xfrm(struct xfrm_state *x) { @@ -74,48 +74,111 @@ static inline int selinux_authorizable_xfrm(struct xfrm_state *x) } /* - * LSM hook implementation that authorizes that a flow can use - * a xfrm policy rule. + * Allocates a xfrm_sec_state and populates it using the supplied security + * xfrm_user_sec_ctx context. */ -int selinux_xfrm_policy_lookup(struct xfrm_sec_ctx *ctx, u32 fl_secid, u8 dir) +static int selinux_xfrm_alloc_user(struct xfrm_sec_ctx **ctxp, + struct xfrm_user_sec_ctx *uctx) { int rc; - u32 sel_sid; + const struct task_security_struct *tsec = current_security(); + struct xfrm_sec_ctx *ctx = NULL; + u32 str_len; - /* Context sid is either set to label or ANY_ASSOC */ - if (ctx) { - if (!selinux_authorizable_ctx(ctx)) - return -EINVAL; - - sel_sid = ctx->ctx_sid; - } else - /* - * All flows should be treated as polmatch'ing an - * otherwise applicable "non-labeled" policy. This - * would prevent inadvertent "leaks". - */ - return 0; + if (ctxp == NULL || uctx == NULL || + uctx->ctx_doi != XFRM_SC_DOI_LSM || + uctx->ctx_alg != XFRM_SC_ALG_SELINUX) + return -EINVAL; - rc = avc_has_perm(fl_secid, sel_sid, SECCLASS_ASSOCIATION, - ASSOCIATION__POLMATCH, - NULL); + str_len = uctx->ctx_len; + if (str_len >= PAGE_SIZE) + return -ENOMEM; - if (rc == -EACCES) - return -ESRCH; + ctx = kmalloc(sizeof(*ctx) + str_len + 1, GFP_KERNEL); + if (!ctx) + return -ENOMEM; + ctx->ctx_doi = XFRM_SC_DOI_LSM; + ctx->ctx_alg = XFRM_SC_ALG_SELINUX; + ctx->ctx_len = str_len; + memcpy(ctx->ctx_str, &uctx[1], str_len); + ctx->ctx_str[str_len] = '\0'; + rc = security_context_to_sid(ctx->ctx_str, str_len, &ctx->ctx_sid); + if (rc) + goto err; + + rc = avc_has_perm(tsec->sid, ctx->ctx_sid, + SECCLASS_ASSOCIATION, ASSOCIATION__SETCONTEXT, NULL); + if (rc) + goto err; + + *ctxp = ctx; + atomic_inc(&selinux_xfrm_refcount); + return 0; + +err: + kfree(ctx); return rc; } /* + * Free the xfrm_sec_ctx structure. + */ +static void selinux_xfrm_free(struct xfrm_sec_ctx *ctx) +{ + if (!ctx) + return; + + atomic_dec(&selinux_xfrm_refcount); + kfree(ctx); +} + +/* + * Authorize the deletion of a labeled SA or policy rule. + */ +static int selinux_xfrm_delete(struct xfrm_sec_ctx *ctx) +{ + const struct task_security_struct *tsec = current_security(); + + if (!ctx) + return 0; + + return avc_has_perm(tsec->sid, ctx->ctx_sid, + SECCLASS_ASSOCIATION, ASSOCIATION__SETCONTEXT, + NULL); +} + +/* + * LSM hook implementation that authorizes that a flow can use a xfrm policy + * rule. + */ +int selinux_xfrm_policy_lookup(struct xfrm_sec_ctx *ctx, u32 fl_secid, u8 dir) +{ + int rc; + + /* All flows should be treated as polmatch'ing an otherwise applicable + * "non-labeled" policy. This would prevent inadvertent "leaks". */ + if (!ctx) + return 0; + + /* Context sid is either set to label or ANY_ASSOC */ + if (!selinux_authorizable_ctx(ctx)) + return -EINVAL; + + rc = avc_has_perm(fl_secid, ctx->ctx_sid, + SECCLASS_ASSOCIATION, ASSOCIATION__POLMATCH, NULL); + return (rc == -EACCES ? -ESRCH : rc); +} + +/* * LSM hook implementation that authorizes that a state matches * the given policy, flow combo. */ - -int selinux_xfrm_state_pol_flow_match(struct xfrm_state *x, struct xfrm_policy *xp, - const struct flowi *fl) +int selinux_xfrm_state_pol_flow_match(struct xfrm_state *x, + struct xfrm_policy *xp, + const struct flowi *fl) { u32 state_sid; - int rc; if (!xp->security) if (x->security) @@ -138,187 +201,80 @@ int selinux_xfrm_state_pol_flow_match(struct xfrm_state *x, struct xfrm_policy * if (fl->flowi_secid != state_sid) return 0; - rc = avc_has_perm(fl->flowi_secid, state_sid, SECCLASS_ASSOCIATION, - ASSOCIATION__SENDTO, - NULL)? 0:1; - - /* - * We don't need a separate SA Vs. policy polmatch check - * since the SA is now of the same label as the flow and - * a flow Vs. policy polmatch check had already happened - * in selinux_xfrm_policy_lookup() above. - */ - - return rc; + /* We don't need a separate SA Vs. policy polmatch check since the SA + * is now of the same label as the flow and a flow Vs. policy polmatch + * check had already happened in selinux_xfrm_policy_lookup() above. */ + return (avc_has_perm(fl->flowi_secid, state_sid, + SECCLASS_ASSOCIATION, ASSOCIATION__SENDTO, + NULL) ? 0 : 1); } /* * LSM hook implementation that checks and/or returns the xfrm sid for the * incoming packet. */ - int selinux_xfrm_decode_session(struct sk_buff *skb, u32 *sid, int ckall) { + u32 sid_session = SECSID_NULL; struct sec_path *sp; - *sid = SECSID_NULL; - if (skb == NULL) - return 0; + goto out; sp = skb->sp; if (sp) { - int i, sid_set = 0; + int i; - for (i = sp->len-1; i >= 0; i--) { + for (i = sp->len - 1; i >= 0; i--) { struct xfrm_state *x = sp->xvec[i]; if (selinux_authorizable_xfrm(x)) { struct xfrm_sec_ctx *ctx = x->security; - if (!sid_set) { - *sid = ctx->ctx_sid; - sid_set = 1; - + if (sid_session == SECSID_NULL) { + sid_session = ctx->ctx_sid; if (!ckall) - break; - } else if (*sid != ctx->ctx_sid) + goto out; + } else if (sid_session != ctx->ctx_sid) { + *sid = SECSID_NULL; return -EINVAL; + } } } } - return 0; -} - -/* - * Security blob allocation for xfrm_policy and xfrm_state - * CTX does not have a meaningful value on input - */ -static int selinux_xfrm_sec_ctx_alloc(struct xfrm_sec_ctx **ctxp, - struct xfrm_user_sec_ctx *uctx, u32 sid) -{ - int rc = 0; - const struct task_security_struct *tsec = current_security(); - struct xfrm_sec_ctx *ctx = NULL; - char *ctx_str = NULL; - u32 str_len; - - BUG_ON(uctx && sid); - - if (!uctx) - goto not_from_user; - - if (uctx->ctx_alg != XFRM_SC_ALG_SELINUX) - return -EINVAL; - - str_len = uctx->ctx_len; - if (str_len >= PAGE_SIZE) - return -ENOMEM; - - *ctxp = ctx = kmalloc(sizeof(*ctx) + - str_len + 1, - GFP_KERNEL); - - if (!ctx) - return -ENOMEM; - - ctx->ctx_doi = uctx->ctx_doi; - ctx->ctx_len = str_len; - ctx->ctx_alg = uctx->ctx_alg; - - memcpy(ctx->ctx_str, - uctx+1, - str_len); - ctx->ctx_str[str_len] = 0; - rc = security_context_to_sid(ctx->ctx_str, - str_len, - &ctx->ctx_sid); - - if (rc) - goto out; - - /* - * Does the subject have permission to set security context? - */ - rc = avc_has_perm(tsec->sid, ctx->ctx_sid, - SECCLASS_ASSOCIATION, - ASSOCIATION__SETCONTEXT, NULL); - if (rc) - goto out; - - return rc; - -not_from_user: - rc = security_sid_to_context(sid, &ctx_str, &str_len); - if (rc) - goto out; - - *ctxp = ctx = kmalloc(sizeof(*ctx) + - str_len, - GFP_ATOMIC); - - if (!ctx) { - rc = -ENOMEM; - goto out; - } - - ctx->ctx_doi = XFRM_SC_DOI_LSM; - ctx->ctx_alg = XFRM_SC_ALG_SELINUX; - ctx->ctx_sid = sid; - ctx->ctx_len = str_len; - memcpy(ctx->ctx_str, - ctx_str, - str_len); - - goto out2; - out: - *ctxp = NULL; - kfree(ctx); -out2: - kfree(ctx_str); - return rc; + *sid = sid_session; + return 0; } /* - * LSM hook implementation that allocs and transfers uctx spec to - * xfrm_policy. + * LSM hook implementation that allocs and transfers uctx spec to xfrm_policy. */ int selinux_xfrm_policy_alloc(struct xfrm_sec_ctx **ctxp, struct xfrm_user_sec_ctx *uctx) { - int err; - - BUG_ON(!uctx); - - err = selinux_xfrm_sec_ctx_alloc(ctxp, uctx, 0); - if (err == 0) - atomic_inc(&selinux_xfrm_refcount); - - return err; + return selinux_xfrm_alloc_user(ctxp, uctx); } - /* - * LSM hook implementation that copies security data structure from old to - * new for policy cloning. + * LSM hook implementation that copies security data structure from old to new + * for policy cloning. */ int selinux_xfrm_policy_clone(struct xfrm_sec_ctx *old_ctx, struct xfrm_sec_ctx **new_ctxp) { struct xfrm_sec_ctx *new_ctx; - if (old_ctx) { - new_ctx = kmalloc(sizeof(*old_ctx) + old_ctx->ctx_len, - GFP_ATOMIC); - if (!new_ctx) - return -ENOMEM; + if (!old_ctx) + return 0; + + new_ctx = kmemdup(old_ctx, sizeof(*old_ctx) + old_ctx->ctx_len, + GFP_ATOMIC); + if (!new_ctx) + return -ENOMEM; + atomic_inc(&selinux_xfrm_refcount); + *new_ctxp = new_ctx; - memcpy(new_ctx, old_ctx, sizeof(*new_ctx)); - memcpy(new_ctx->ctx_str, old_ctx->ctx_str, new_ctx->ctx_len); - atomic_inc(&selinux_xfrm_refcount); - *new_ctxp = new_ctx; - } return 0; } @@ -327,8 +283,7 @@ int selinux_xfrm_policy_clone(struct xfrm_sec_ctx *old_ctx, */ void selinux_xfrm_policy_free(struct xfrm_sec_ctx *ctx) { - atomic_dec(&selinux_xfrm_refcount); - kfree(ctx); + selinux_xfrm_free(ctx); } /* @@ -336,31 +291,55 @@ void selinux_xfrm_policy_free(struct xfrm_sec_ctx *ctx) */ int selinux_xfrm_policy_delete(struct xfrm_sec_ctx *ctx) { - const struct task_security_struct *tsec = current_security(); - - if (!ctx) - return 0; + return selinux_xfrm_delete(ctx); +} - return avc_has_perm(tsec->sid, ctx->ctx_sid, - SECCLASS_ASSOCIATION, ASSOCIATION__SETCONTEXT, - NULL); +/* + * LSM hook implementation that allocates a xfrm_sec_state, populates it using + * the supplied security context, and assigns it to the xfrm_state. + */ +int selinux_xfrm_state_alloc(struct xfrm_state *x, + struct xfrm_user_sec_ctx *uctx) +{ + return selinux_xfrm_alloc_user(&x->security, uctx); } /* - * LSM hook implementation that allocs and transfers sec_ctx spec to - * xfrm_state. + * LSM hook implementation that allocates a xfrm_sec_state and populates based + * on a secid. */ -int selinux_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *uctx, - u32 secid) +int selinux_xfrm_state_alloc_acquire(struct xfrm_state *x, + struct xfrm_sec_ctx *polsec, u32 secid) { - int err; + int rc; + struct xfrm_sec_ctx *ctx; + char *ctx_str = NULL; + int str_len; + + if (!polsec) + return 0; - BUG_ON(!x); + if (secid == 0) + return -EINVAL; - err = selinux_xfrm_sec_ctx_alloc(&x->security, uctx, secid); - if (err == 0) - atomic_inc(&selinux_xfrm_refcount); - return err; + rc = security_sid_to_context(secid, &ctx_str, &str_len); + if (rc) + return rc; + + ctx = kmalloc(sizeof(*ctx) + str_len, GFP_ATOMIC); + if (!ctx) + return -ENOMEM; + + ctx->ctx_doi = XFRM_SC_DOI_LSM; + ctx->ctx_alg = XFRM_SC_ALG_SELINUX; + ctx->ctx_sid = secid; + ctx->ctx_len = str_len; + memcpy(ctx->ctx_str, ctx_str, str_len); + kfree(ctx_str); + + x->security = ctx; + atomic_inc(&selinux_xfrm_refcount); + return 0; } /* @@ -368,24 +347,15 @@ int selinux_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *uct */ void selinux_xfrm_state_free(struct xfrm_state *x) { - atomic_dec(&selinux_xfrm_refcount); - kfree(x->security); + selinux_xfrm_free(x->security); } - /* - * LSM hook implementation that authorizes deletion of labeled SAs. - */ +/* + * LSM hook implementation that authorizes deletion of labeled SAs. + */ int selinux_xfrm_state_delete(struct xfrm_state *x) { - const struct task_security_struct *tsec = current_security(); - struct xfrm_sec_ctx *ctx = x->security; - - if (!ctx) - return 0; - - return avc_has_perm(tsec->sid, ctx->ctx_sid, - SECCLASS_ASSOCIATION, ASSOCIATION__SETCONTEXT, - NULL); + return selinux_xfrm_delete(x->security); } /* @@ -395,14 +365,12 @@ int selinux_xfrm_state_delete(struct xfrm_state *x) * we need to check for unlabelled access since this may not have * gone thru the IPSec process. */ -int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb, - struct common_audit_data *ad) +int selinux_xfrm_sock_rcv_skb(u32 sk_sid, struct sk_buff *skb, + struct common_audit_data *ad) { - int i, rc = 0; - struct sec_path *sp; - u32 sel_sid = SECINITSID_UNLABELED; - - sp = skb->sp; + int i; + struct sec_path *sp = skb->sp; + u32 peer_sid = SECINITSID_UNLABELED; if (sp) { for (i = 0; i < sp->len; i++) { @@ -410,23 +378,17 @@ int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb, if (x && selinux_authorizable_xfrm(x)) { struct xfrm_sec_ctx *ctx = x->security; - sel_sid = ctx->ctx_sid; + peer_sid = ctx->ctx_sid; break; } } } - /* - * This check even when there's no association involved is - * intended, according to Trent Jaeger, to make sure a - * process can't engage in non-ipsec communication unless - * explicitly allowed by policy. - */ - - rc = avc_has_perm(isec_sid, sel_sid, SECCLASS_ASSOCIATION, - ASSOCIATION__RECVFROM, ad); - - return rc; + /* This check even when there's no association involved is intended, + * according to Trent Jaeger, to make sure a process can't engage in + * non-IPsec communication unless explicitly allowed by policy. */ + return avc_has_perm(sk_sid, peer_sid, + SECCLASS_ASSOCIATION, ASSOCIATION__RECVFROM, ad); } /* @@ -436,49 +398,38 @@ int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb, * If we do have a authorizable security association, then it has already been * checked in the selinux_xfrm_state_pol_flow_match hook above. */ -int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb, - struct common_audit_data *ad, u8 proto) +int selinux_xfrm_postroute_last(u32 sk_sid, struct sk_buff *skb, + struct common_audit_data *ad, u8 proto) { struct dst_entry *dst; - int rc = 0; - - dst = skb_dst(skb); - - if (dst) { - struct dst_entry *dst_test; - - for (dst_test = dst; dst_test != NULL; - dst_test = dst_test->child) { - struct xfrm_state *x = dst_test->xfrm; - - if (x && selinux_authorizable_xfrm(x)) - goto out; - } - } switch (proto) { case IPPROTO_AH: case IPPROTO_ESP: case IPPROTO_COMP: - /* - * We should have already seen this packet once before - * it underwent xfrm(s). No need to subject it to the - * unlabeled check. - */ - goto out; + /* We should have already seen this packet once before it + * underwent xfrm(s). No need to subject it to the unlabeled + * check. */ + return 0; default: break; } - /* - * This check even when there's no association involved is - * intended, according to Trent Jaeger, to make sure a - * process can't engage in non-ipsec communication unless - * explicitly allowed by policy. - */ + dst = skb_dst(skb); + if (dst) { + struct dst_entry *iter; - rc = avc_has_perm(isec_sid, SECINITSID_UNLABELED, SECCLASS_ASSOCIATION, - ASSOCIATION__SENDTO, ad); -out: - return rc; + for (iter = dst; iter != NULL; iter = iter->child) { + struct xfrm_state *x = iter->xfrm; + + if (x && selinux_authorizable_xfrm(x)) + return 0; + } + } + + /* This check even when there's no association involved is intended, + * according to Trent Jaeger, to make sure a process can't engage in + * non-IPsec communication unless explicitly allowed by policy. */ + return avc_has_perm(sk_sid, SECINITSID_UNLABELED, + SECCLASS_ASSOCIATION, ASSOCIATION__SENDTO, ad); } diff --git a/security/smack/smack.h b/security/smack/smack.h index 076b8e8..364cc64 100644 --- a/security/smack/smack.h +++ b/security/smack/smack.h @@ -177,9 +177,13 @@ struct smk_port_label { #define SMACK_CIPSO_MAXCATNUM 184 /* 23 * 8 */ /* - * Flag for transmute access + * Flags for untraditional access modes. + * It shouldn't be necessary to avoid conflicts with definitions + * in fs.h, but do so anyway. */ -#define MAY_TRANSMUTE 64 +#define MAY_TRANSMUTE 0x00001000 /* Controls directory labeling */ +#define MAY_LOCK 0x00002000 /* Locks should be writes, but ... */ + /* * Just to make the common cases easier to deal with */ @@ -188,9 +192,9 @@ struct smk_port_label { #define MAY_NOT 0 /* - * Number of access types used by Smack (rwxat) + * Number of access types used by Smack (rwxatl) */ -#define SMK_NUM_ACCESS_TYPE 5 +#define SMK_NUM_ACCESS_TYPE 6 /* SMACK data */ struct smack_audit_data { diff --git a/security/smack/smack_access.c b/security/smack/smack_access.c index b3b59b1..14293cd 100644 --- a/security/smack/smack_access.c +++ b/security/smack/smack_access.c @@ -84,6 +84,8 @@ int log_policy = SMACK_AUDIT_DENIED; * * Do the object check first because that is more * likely to differ. + * + * Allowing write access implies allowing locking. */ int smk_access_entry(char *subject_label, char *object_label, struct list_head *rule_list) @@ -99,6 +101,11 @@ int smk_access_entry(char *subject_label, char *object_label, } } + /* + * MAY_WRITE implies MAY_LOCK. + */ + if ((may & MAY_WRITE) == MAY_WRITE) + may |= MAY_LOCK; return may; } @@ -245,6 +252,7 @@ out_audit: static inline void smack_str_from_perm(char *string, int access) { int i = 0; + if (access & MAY_READ) string[i++] = 'r'; if (access & MAY_WRITE) @@ -255,6 +263,8 @@ static inline void smack_str_from_perm(char *string, int access) string[i++] = 'a'; if (access & MAY_TRANSMUTE) string[i++] = 't'; + if (access & MAY_LOCK) + string[i++] = 'l'; string[i] = '\0'; } /** diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 8825375..b0be893 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -185,7 +185,7 @@ static int smack_ptrace_access_check(struct task_struct *ctp, unsigned int mode) smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_TASK); smk_ad_setfield_u_tsk(&ad, ctp); - rc = smk_curacc(skp->smk_known, MAY_READWRITE, &ad); + rc = smk_curacc(skp->smk_known, mode, &ad); return rc; } @@ -1146,7 +1146,7 @@ static int smack_file_ioctl(struct file *file, unsigned int cmd, * @file: the object * @cmd: unused * - * Returns 0 if current has write access, error code otherwise + * Returns 0 if current has lock access, error code otherwise */ static int smack_file_lock(struct file *file, unsigned int cmd) { @@ -1154,7 +1154,7 @@ static int smack_file_lock(struct file *file, unsigned int cmd) smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_PATH); smk_ad_setfield_u_fs_path(&ad, file->f_path); - return smk_curacc(file->f_security, MAY_WRITE, &ad); + return smk_curacc(file->f_security, MAY_LOCK, &ad); } /** @@ -1178,8 +1178,13 @@ static int smack_file_fcntl(struct file *file, unsigned int cmd, switch (cmd) { case F_GETLK: + break; case F_SETLK: case F_SETLKW: + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_PATH); + smk_ad_setfield_u_fs_path(&ad, file->f_path); + rc = smk_curacc(file->f_security, MAY_LOCK, &ad); + break; case F_SETOWN: case F_SETSIG: smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_PATH); diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c index 80f4b4a..160aa08e 100644 --- a/security/smack/smackfs.c +++ b/security/smack/smackfs.c @@ -139,7 +139,7 @@ const char *smack_cipso_option = SMACK_CIPSO_OPTION; * SMK_LOADLEN: Smack rule length */ #define SMK_OACCESS "rwxa" -#define SMK_ACCESS "rwxat" +#define SMK_ACCESS "rwxatl" #define SMK_OACCESSLEN (sizeof(SMK_OACCESS) - 1) #define SMK_ACCESSLEN (sizeof(SMK_ACCESS) - 1) #define SMK_OLOADLEN (SMK_LABELLEN + SMK_LABELLEN + SMK_OACCESSLEN) @@ -282,6 +282,10 @@ static int smk_perm_from_str(const char *string) case 'T': perm |= MAY_TRANSMUTE; break; + case 'l': + case 'L': + perm |= MAY_LOCK; + break; default: return perm; } @@ -452,7 +456,7 @@ static ssize_t smk_write_rules_list(struct file *file, const char __user *buf, /* * Minor hack for backward compatibility */ - if (count != SMK_OLOADLEN && count != SMK_LOADLEN) + if (count < SMK_OLOADLEN || count > SMK_LOADLEN) return -EINVAL; } else { if (count >= PAGE_SIZE) { @@ -592,6 +596,8 @@ static void smk_rule_show(struct seq_file *s, struct smack_rule *srp, int max) seq_putc(s, 'a'); if (srp->smk_access & MAY_TRANSMUTE) seq_putc(s, 't'); + if (srp->smk_access & MAY_LOCK) + seq_putc(s, 'l'); seq_putc(s, '\n'); } diff --git a/sound/firewire/amdtp.h b/sound/firewire/amdtp.h index 839ebf8..2746ecd 100644 --- a/sound/firewire/amdtp.h +++ b/sound/firewire/amdtp.h @@ -4,6 +4,7 @@ #include <linux/err.h> #include <linux/interrupt.h> #include <linux/mutex.h> +#include <sound/asound.h> #include "packets-buffer.h" /** diff --git a/sound/pci/hda/Kconfig b/sound/pci/hda/Kconfig index 8de66cc..4cdd9de 100644 --- a/sound/pci/hda/Kconfig +++ b/sound/pci/hda/Kconfig @@ -209,8 +209,9 @@ config SND_HDA_CODEC_CA0132 config SND_HDA_CODEC_CA0132_DSP bool "Support new DSP code for CA0132 codec" - depends on SND_HDA_CODEC_CA0132 && FW_LOADER + depends on SND_HDA_CODEC_CA0132 select SND_HDA_DSP_LOADER + select FW_LOADER help Say Y here to enable the DSP for Creative CA0132 for extended features like equalizer or echo cancellation. diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index afb90f4..69178c4 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -4000,6 +4000,10 @@ static void hda_call_codec_resume(struct hda_codec *codec) * in the resume / power-save sequence */ hda_keep_power_on(codec); + if (codec->pm_down_notified) { + codec->pm_down_notified = 0; + hda_call_pm_notify(codec->bus, true); + } hda_set_power_state(codec, AC_PWRST_D0); restore_shutup_pins(codec); hda_exec_init_verbs(codec); diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 04d1e6b..5e42059 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -1512,6 +1512,7 @@ enum { ALC260_FIXUP_KN1, ALC260_FIXUP_FSC_S7020, ALC260_FIXUP_FSC_S7020_JWSE, + ALC260_FIXUP_VAIO_PINS, }; static void alc260_gpio1_automute(struct hda_codec *codec) @@ -1652,6 +1653,24 @@ static const struct hda_fixup alc260_fixups[] = { .chained = true, .chain_id = ALC260_FIXUP_FSC_S7020, }, + [ALC260_FIXUP_VAIO_PINS] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + /* Pin configs are missing completely on some VAIOs */ + { 0x0f, 0x01211020 }, + { 0x10, 0x0001003f }, + { 0x11, 0x411111f0 }, + { 0x12, 0x01a15930 }, + { 0x13, 0x411111f0 }, + { 0x14, 0x411111f0 }, + { 0x15, 0x411111f0 }, + { 0x16, 0x411111f0 }, + { 0x17, 0x411111f0 }, + { 0x18, 0x411111f0 }, + { 0x19, 0x411111f0 }, + { } + } + }, }; static const struct snd_pci_quirk alc260_fixup_tbl[] = { @@ -1660,6 +1679,8 @@ static const struct snd_pci_quirk alc260_fixup_tbl[] = { SND_PCI_QUIRK(0x1025, 0x008f, "Acer", ALC260_FIXUP_GPIO1), SND_PCI_QUIRK(0x103c, 0x280a, "HP dc5750", ALC260_FIXUP_HP_DC5750), SND_PCI_QUIRK(0x103c, 0x30ba, "HP Presario B1900", ALC260_FIXUP_HP_B1900), + SND_PCI_QUIRK(0x104d, 0x81bb, "Sony VAIO", ALC260_FIXUP_VAIO_PINS), + SND_PCI_QUIRK(0x104d, 0x81e2, "Sony VAIO TX", ALC260_FIXUP_HP_PIN_0F), SND_PCI_QUIRK(0x10cf, 0x1326, "FSC LifeBook S7020", ALC260_FIXUP_FSC_S7020), SND_PCI_QUIRK(0x1509, 0x4540, "Favorit 100XS", ALC260_FIXUP_GPIO1), SND_PCI_QUIRK(0x152d, 0x0729, "Quanta KN1", ALC260_FIXUP_KN1), @@ -3393,7 +3414,7 @@ static void alc_update_headset_mode_hook(struct hda_codec *codec, static void alc_update_headset_jack_cb(struct hda_codec *codec, struct hda_jack_tbl *jack) { struct alc_spec *spec = codec->spec; - spec->current_headset_type = ALC_HEADSET_MODE_UNKNOWN; + spec->current_headset_type = ALC_HEADSET_TYPE_UNKNOWN; snd_hda_gen_hp_automute(codec, jack); } @@ -3652,9 +3673,29 @@ static void alc290_fixup_mono_speakers(struct hda_codec *codec, #if IS_ENABLED(CONFIG_THINKPAD_ACPI) #include <linux/thinkpad_acpi.h> +#include <acpi/acpi.h> static int (*led_set_func)(int, bool); +static acpi_status acpi_check_cb(acpi_handle handle, u32 lvl, void *context, + void **rv) +{ + bool *found = context; + *found = true; + return AE_OK; +} + +static bool is_thinkpad(struct hda_codec *codec) +{ + bool found = false; + if (codec->subsystem_id >> 16 != 0x17aa) + return false; + if (ACPI_SUCCESS(acpi_get_devices("LEN0068", acpi_check_cb, &found, NULL)) && found) + return true; + found = false; + return ACPI_SUCCESS(acpi_get_devices("IBM0068", acpi_check_cb, &found, NULL)) && found; +} + static void update_tpacpi_mute_led(void *private_data, int enabled) { if (led_set_func) @@ -3680,6 +3721,8 @@ static void alc_fixup_thinkpad_acpi(struct hda_codec *codec, bool removefunc = false; if (action == HDA_FIXUP_ACT_PROBE) { + if (!is_thinkpad(codec)) + return; if (!led_set_func) led_set_func = symbol_request(tpacpi_led_set); if (!led_set_func) { @@ -3923,6 +3966,8 @@ static const struct hda_fixup alc269_fixups[] = { [ALC269_FIXUP_PINCFG_NO_HP_TO_LINEOUT] = { .type = HDA_FIXUP_FUNC, .v.func = alc269_fixup_pincfg_no_hp_to_lineout, + .chained = true, + .chain_id = ALC269_FIXUP_THINKPAD_ACPI, }, [ALC269_FIXUP_DELL1_MIC_NO_PRESENCE] = { .type = HDA_FIXUP_PINS, @@ -4027,6 +4072,8 @@ static const struct hda_fixup alc269_fixups[] = { [ALC269_FIXUP_LIMIT_INT_MIC_BOOST] = { .type = HDA_FIXUP_FUNC, .v.func = alc269_fixup_limit_int_mic_boost, + .chained = true, + .chain_id = ALC269_FIXUP_THINKPAD_ACPI, }, [ALC269_FIXUP_LIMIT_INT_MIC_BOOST_MUTE_LED] = { .type = HDA_FIXUP_FUNC, @@ -4070,8 +4117,6 @@ static const struct hda_fixup alc269_fixups[] = { [ALC269_FIXUP_THINKPAD_ACPI] = { .type = HDA_FIXUP_FUNC, .v.func = alc_fixup_thinkpad_acpi, - .chained = true, - .chain_id = ALC269_FIXUP_LIMIT_INT_MIC_BOOST }, [ALC255_FIXUP_DELL1_MIC_NO_PRESENCE] = { .type = HDA_FIXUP_PINS, @@ -4128,6 +4173,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x0608, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x0609, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x0613, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1028, 0x0614, "Dell Inspiron 3135", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x0616, "Dell Vostro 5470", ALC290_FIXUP_MONO_SPEAKERS), SND_PCI_QUIRK(0x1028, 0x061f, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x063f, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE), @@ -4173,7 +4219,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x2208, "Thinkpad T431s", ALC269_FIXUP_LENOVO_DOCK), SND_PCI_QUIRK(0x17aa, 0x220c, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x17aa, 0x2212, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), - SND_PCI_QUIRK(0x17aa, 0x2214, "Thinkpad", ALC269_FIXUP_THINKPAD_ACPI), + SND_PCI_QUIRK(0x17aa, 0x2214, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x17aa, 0x2215, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x17aa, 0x5013, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x17aa, 0x501a, "Thinkpad", ALC283_FIXUP_INT_MIC), @@ -4181,6 +4227,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x5109, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x17aa, 0x3bf8, "Quanta FL1", ALC269_FIXUP_PCM_44K), SND_PCI_QUIRK(0x17aa, 0x9e54, "LENOVO NB", ALC269_FIXUP_LENOVO_EAPD), + SND_PCI_QUIRK_VENDOR(0x17aa, "Thinkpad", ALC269_FIXUP_THINKPAD_ACPI), SND_PCI_QUIRK(0x1b7d, 0xa831, "Ordissimo EVE2 ", ALC269VB_FIXUP_ORDISSIMO_EVE2), /* Also known as Malata PC-B1303 */ #if 0 @@ -4698,6 +4745,8 @@ enum { ALC668_FIXUP_DELL_MIC_NO_PRESENCE, ALC668_FIXUP_HEADSET_MODE, ALC662_FIXUP_BASS_CHMAP, + ALC662_FIXUP_BASS_1A, + ALC662_FIXUP_BASS_1A_CHMAP, }; static const struct hda_fixup alc662_fixups[] = { @@ -4878,6 +4927,19 @@ static const struct hda_fixup alc662_fixups[] = { .chained = true, .chain_id = ALC662_FIXUP_ASUS_MODE4 }, + [ALC662_FIXUP_BASS_1A] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + {0x1a, 0x80106111}, /* bass speaker */ + {} + }, + }, + [ALC662_FIXUP_BASS_1A_CHMAP] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc662_fixup_bass_chmap, + .chained = true, + .chain_id = ALC662_FIXUP_BASS_1A, + }, }; static const struct snd_pci_quirk alc662_fixup_tbl[] = { @@ -4890,8 +4952,10 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = { SND_PCI_QUIRK(0x1025, 0x038b, "Acer Aspire 8943G", ALC662_FIXUP_ASPIRE), SND_PCI_QUIRK(0x1028, 0x05d8, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x05db, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1028, 0x0625, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x0626, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x1632, "HP RP5800", ALC662_FIXUP_HP_RP5800), + SND_PCI_QUIRK(0x1043, 0x11cd, "Asus N550", ALC662_FIXUP_BASS_1A_CHMAP), SND_PCI_QUIRK(0x1043, 0x1477, "ASUS N56VZ", ALC662_FIXUP_BASS_CHMAP), SND_PCI_QUIRK(0x1043, 0x1bf3, "ASUS N76VZ", ALC662_FIXUP_BASS_CHMAP), SND_PCI_QUIRK(0x1043, 0x8469, "ASUS mobo", ALC662_FIXUP_NO_JACK_DETECT), diff --git a/sound/soc/codecs/ab8500-codec.c b/sound/soc/codecs/ab8500-codec.c index 21ae8d4..1ad92cb 100644 --- a/sound/soc/codecs/ab8500-codec.c +++ b/sound/soc/codecs/ab8500-codec.c @@ -126,8 +126,6 @@ struct ab8500_codec_drvdata_dbg { /* Private data for AB8500 device-driver */ struct ab8500_codec_drvdata { - struct regmap *regmap; - /* Sidetone */ long *sid_fir_values; enum sid_state sid_status; @@ -168,34 +166,48 @@ static inline const char *amic_type_str(enum amic_type type) */ /* Read a register from the audio-bank of AB8500 */ -static int ab8500_codec_read_reg(void *context, unsigned int reg, - unsigned int *value) +static unsigned int ab8500_codec_read_reg(struct snd_soc_codec *codec, + unsigned int reg) { - struct device *dev = context; int status; + unsigned int value = 0; u8 value8; - status = abx500_get_register_interruptible(dev, AB8500_AUDIO, - reg, &value8); - *value = (unsigned int)value8; + status = abx500_get_register_interruptible(codec->dev, AB8500_AUDIO, + reg, &value8); + if (status < 0) { + dev_err(codec->dev, + "%s: ERROR: Register (0x%02x:0x%02x) read failed (%d).\n", + __func__, (u8)AB8500_AUDIO, (u8)reg, status); + } else { + dev_dbg(codec->dev, + "%s: Read 0x%02x from register 0x%02x:0x%02x\n", + __func__, value8, (u8)AB8500_AUDIO, (u8)reg); + value = (unsigned int)value8; + } - return status; + return value; } /* Write to a register in the audio-bank of AB8500 */ -static int ab8500_codec_write_reg(void *context, unsigned int reg, - unsigned int value) +static int ab8500_codec_write_reg(struct snd_soc_codec *codec, + unsigned int reg, unsigned int value) { - struct device *dev = context; + int status; - return abx500_set_register_interruptible(dev, AB8500_AUDIO, - reg, value); -} + status = abx500_set_register_interruptible(codec->dev, AB8500_AUDIO, + reg, value); + if (status < 0) + dev_err(codec->dev, + "%s: ERROR: Register (%02x:%02x) write failed (%d).\n", + __func__, (u8)AB8500_AUDIO, (u8)reg, status); + else + dev_dbg(codec->dev, + "%s: Wrote 0x%02x into register %02x:%02x\n", + __func__, (u8)value, (u8)AB8500_AUDIO, (u8)reg); -static const struct regmap_config ab8500_codec_regmap = { - .reg_read = ab8500_codec_read_reg, - .reg_write = ab8500_codec_write_reg, -}; + return status; +} /* * Controls - DAPM @@ -2473,13 +2485,9 @@ static int ab8500_codec_probe(struct snd_soc_codec *codec) dev_dbg(dev, "%s: Enter.\n", __func__); - snd_soc_codec_set_cache_io(codec, 0, 0, SND_SOC_REGMAP); - /* Setup AB8500 according to board-settings */ pdata = dev_get_platdata(dev->parent); - codec->control_data = drvdata->regmap; - if (np) { if (!pdata) pdata = devm_kzalloc(dev, @@ -2557,6 +2565,9 @@ static int ab8500_codec_probe(struct snd_soc_codec *codec) static struct snd_soc_codec_driver ab8500_codec_driver = { .probe = ab8500_codec_probe, + .read = ab8500_codec_read_reg, + .write = ab8500_codec_write_reg, + .reg_word_size = sizeof(u8), .controls = ab8500_ctrls, .num_controls = ARRAY_SIZE(ab8500_ctrls), .dapm_widgets = ab8500_dapm_widgets, @@ -2581,15 +2592,6 @@ static int ab8500_codec_driver_probe(struct platform_device *pdev) drvdata->anc_status = ANC_UNCONFIGURED; dev_set_drvdata(&pdev->dev, drvdata); - drvdata->regmap = devm_regmap_init(&pdev->dev, NULL, &pdev->dev, - &ab8500_codec_regmap); - if (IS_ERR(drvdata->regmap)) { - status = PTR_ERR(drvdata->regmap); - dev_err(&pdev->dev, "%s: Failed to allocate regmap: %d\n", - __func__, status); - return status; - } - dev_dbg(&pdev->dev, "%s: Register codec.\n", __func__); status = snd_soc_register_codec(&pdev->dev, &ab8500_codec_driver, ab8500_codec_dai, diff --git a/sound/soc/codecs/arizona.c b/sound/soc/codecs/arizona.c index 6f05b17..fea9910 100644 --- a/sound/soc/codecs/arizona.c +++ b/sound/soc/codecs/arizona.c @@ -1529,6 +1529,8 @@ static void arizona_enable_fll(struct arizona_fll *fll, try_wait_for_completion(&fll->ok); regmap_update_bits(arizona->regmap, fll->base + 1, + ARIZONA_FLL1_FREERUN, 0); + regmap_update_bits(arizona->regmap, fll->base + 1, ARIZONA_FLL1_ENA, ARIZONA_FLL1_ENA); if (use_sync) regmap_update_bits(arizona->regmap, fll->base + 0x11, @@ -1546,6 +1548,8 @@ static void arizona_disable_fll(struct arizona_fll *fll) struct arizona *arizona = fll->arizona; bool change; + regmap_update_bits(arizona->regmap, fll->base + 1, + ARIZONA_FLL1_FREERUN, ARIZONA_FLL1_FREERUN); regmap_update_bits_check(arizona->regmap, fll->base + 1, ARIZONA_FLL1_ENA, 0, &change); regmap_update_bits(arizona->regmap, fll->base + 0x11, diff --git a/sound/soc/codecs/wm5110.c b/sound/soc/codecs/wm5110.c index f2d1094..c3c7396 100644 --- a/sound/soc/codecs/wm5110.c +++ b/sound/soc/codecs/wm5110.c @@ -37,6 +37,47 @@ struct wm5110_priv { struct arizona_fll fll[2]; }; +static const struct reg_default wm5110_sysclk_revd_patch[] = { + { 0x3093, 0x1001 }, + { 0x30E3, 0x1301 }, + { 0x3133, 0x1201 }, + { 0x3183, 0x1501 }, + { 0x31D3, 0x1401 }, +}; + +static int wm5110_sysclk_ev(struct snd_soc_dapm_widget *w, + struct snd_kcontrol *kcontrol, int event) +{ + struct snd_soc_codec *codec = w->codec; + struct arizona *arizona = dev_get_drvdata(codec->dev->parent); + struct regmap *regmap = codec->control_data; + const struct reg_default *patch = NULL; + int i, patch_size; + + switch (arizona->rev) { + case 3: + patch = wm5110_sysclk_revd_patch; + patch_size = ARRAY_SIZE(wm5110_sysclk_revd_patch); + break; + default: + return 0; + } + + switch (event) { + case SND_SOC_DAPM_POST_PMU: + if (patch) + for (i = 0; i < patch_size; i++) + regmap_write(regmap, patch[i].reg, + patch[i].def); + break; + + default: + break; + } + + return 0; +} + static DECLARE_TLV_DB_SCALE(ana_tlv, 0, 100, 0); static DECLARE_TLV_DB_SCALE(eq_tlv, -1200, 100, 0); static DECLARE_TLV_DB_SCALE(digital_tlv, -6400, 50, 0); @@ -400,7 +441,7 @@ static const struct snd_kcontrol_new wm5110_aec_loopback_mux = static const struct snd_soc_dapm_widget wm5110_dapm_widgets[] = { SND_SOC_DAPM_SUPPLY("SYSCLK", ARIZONA_SYSTEM_CLOCK_1, ARIZONA_SYSCLK_ENA_SHIFT, - 0, NULL, 0), + 0, wm5110_sysclk_ev, SND_SOC_DAPM_POST_PMU), SND_SOC_DAPM_SUPPLY("ASYNCCLK", ARIZONA_ASYNC_CLOCK_1, ARIZONA_ASYNC_CLK_ENA_SHIFT, 0, NULL, 0), SND_SOC_DAPM_SUPPLY("OPCLK", ARIZONA_OUTPUT_SYSTEM_CLOCK, diff --git a/sound/soc/sh/rcar/core.c b/sound/soc/sh/rcar/core.c index 78c35b4..b3653d3 100644 --- a/sound/soc/sh/rcar/core.c +++ b/sound/soc/sh/rcar/core.c @@ -200,9 +200,8 @@ static void rsnd_dma_do_work(struct work_struct *work) return; } + dma_async_issue_pending(dma->chan); } - - dma_async_issue_pending(dma->chan); } int rsnd_dma_available(struct rsnd_dma *dma) @@ -288,15 +287,13 @@ int rsnd_dai_connect(struct rsnd_dai *rdai, struct rsnd_mod *mod, struct rsnd_dai_stream *io) { - struct rsnd_priv *priv = rsnd_mod_to_priv(mod); - struct device *dev = rsnd_priv_to_dev(priv); - - if (!mod) { - dev_err(dev, "NULL mod\n"); + if (!mod) return -EIO; - } if (!list_empty(&mod->list)) { + struct rsnd_priv *priv = rsnd_mod_to_priv(mod); + struct device *dev = rsnd_priv_to_dev(priv); + dev_err(dev, "%s%d is not empty\n", rsnd_mod_name(mod), rsnd_mod_id(mod)); diff --git a/sound/soc/sh/rcar/scu.c b/sound/soc/sh/rcar/scu.c index f4453e3..fa8fa15 100644 --- a/sound/soc/sh/rcar/scu.c +++ b/sound/soc/sh/rcar/scu.c @@ -68,7 +68,7 @@ static int rsnd_scu_set_route(struct rsnd_priv *priv, return 0; id = rsnd_mod_id(mod); - if (id < 0 || id > ARRAY_SIZE(routes)) + if (id < 0 || id >= ARRAY_SIZE(routes)) return -EIO; /* diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 662f34c..a0aa84b 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1615,8 +1615,9 @@ EXPORT_SYMBOL_GPL(kvm_read_guest_cached); int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len) { - return kvm_write_guest_page(kvm, gfn, (const void *) empty_zero_page, - offset, len); + const void *zero_page = (const void *) __va(page_to_phys(ZERO_PAGE(0))); + + return kvm_write_guest_page(kvm, gfn, zero_page, offset, len); } EXPORT_SYMBOL_GPL(kvm_clear_guest_page); |