summaryrefslogtreecommitdiffstats
path: root/include/linux/crush
diff options
context:
space:
mode:
authorIlya Dryomov <idryomov@gmail.com>2017-06-22 19:44:05 +0200
committerIlya Dryomov <idryomov@gmail.com>2017-07-07 17:25:19 +0200
commit069f3222ca96acfe8c59937e98c401bda5475b48 (patch)
tree28db93df7b122265120acee62403a412295022bd /include/linux/crush
parent1c2e7b451b889bead46cef410a737d1767cd6f0b (diff)
downloadop-kernel-dev-069f3222ca96acfe8c59937e98c401bda5475b48.zip
op-kernel-dev-069f3222ca96acfe8c59937e98c401bda5475b48.tar.gz
crush: implement weight and id overrides for straw2
bucket_straw2_choose needs to use weights that may be different from weight_items. For instance to compensate for an uneven distribution caused by a low number of values. Or to fix the probability biais introduced by conditional probabilities (see http://tracker.ceph.com/issues/15653 for more information). We introduce a weight_set for each straw2 bucket to set the desired weight for a given item at a given position. The weight of a given item when picking the first replica (first position) may be different from the weight the second replica (second position). For instance the weight matrix for a given bucket containing items 3, 7 and 13 could be as follows: position 0 position 1 item 3 0x10000 0x100000 item 7 0x40000 0x10000 item 13 0x40000 0x10000 When crush_do_rule picks the first of two replicas (position 0), item 7, 3 are four times more likely to be choosen by bucket_straw2_choose than item 13. When choosing the second replica (position 1), item 3 is ten times more likely to be choosen than item 7, 13. By default the weight_set of each bucket exactly matches the content of item_weights for each position to ensure backward compatibility. bucket_straw2_choose compares items by using their id. The same ids are also used to index buckets and they must be unique. For each item in a bucket an array of ids can be provided for placement purposes and they are used instead of the ids. If no replacement ids are provided, the legacy behavior is preserved. Reflects ceph.git commit 19537a450fd5c5a0bb8b7830947507a76db2ceca. Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Diffstat (limited to 'include/linux/crush')
-rw-r--r--include/linux/crush/crush.h58
-rw-r--r--include/linux/crush/mapper.h9
2 files changed, 62 insertions, 5 deletions
diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h
index fbecbd0..d8676e5 100644
--- a/include/linux/crush/crush.h
+++ b/include/linux/crush/crush.h
@@ -137,6 +137,64 @@ struct crush_bucket {
};
+/** @ingroup API
+ *
+ * Replacement weights for each item in a bucket. The size of the
+ * array must be exactly the size of the straw2 bucket, just as the
+ * item_weights array.
+ *
+ */
+struct crush_weight_set {
+ __u32 *weights; /*!< 16.16 fixed point weights
+ in the same order as items */
+ __u32 size; /*!< size of the __weights__ array */
+};
+
+/** @ingroup API
+ *
+ * Replacement weights and ids for a given straw2 bucket, for
+ * placement purposes.
+ *
+ * When crush_do_rule() chooses the Nth item from a straw2 bucket, the
+ * replacement weights found at __weight_set[N]__ are used instead of
+ * the weights from __item_weights__. If __N__ is greater than
+ * __weight_set_size__, the weights found at __weight_set_size-1__ are
+ * used instead. For instance if __weight_set__ is:
+ *
+ * [ [ 0x10000, 0x20000 ], // position 0
+ * [ 0x20000, 0x40000 ] ] // position 1
+ *
+ * choosing the 0th item will use position 0 weights [ 0x10000, 0x20000 ]
+ * choosing the 1th item will use position 1 weights [ 0x20000, 0x40000 ]
+ * choosing the 2th item will use position 1 weights [ 0x20000, 0x40000 ]
+ * etc.
+ *
+ */
+struct crush_choose_arg {
+ __s32 *ids; /*!< values to use instead of items */
+ __u32 ids_size; /*!< size of the __ids__ array */
+ struct crush_weight_set *weight_set; /*!< weight replacements for
+ a given position */
+ __u32 weight_set_size; /*!< size of the __weight_set__ array */
+};
+
+/** @ingroup API
+ *
+ * Replacement weights and ids for each bucket in the crushmap. The
+ * __size__ of the __args__ array must be exactly the same as the
+ * __map->max_buckets__.
+ *
+ * The __crush_choose_arg__ at index N will be used when choosing
+ * an item from the bucket __map->buckets[N]__ bucket, provided it
+ * is a straw2 bucket.
+ *
+ */
+struct crush_choose_arg_map {
+ struct crush_choose_arg *args; /*!< replacement for each bucket
+ in the crushmap */
+ __u32 size; /*!< size of the __args__ array */
+};
+
struct crush_bucket_uniform {
struct crush_bucket h;
__u32 item_weight; /* 16-bit fixed point; all items equally weighted */
diff --git a/include/linux/crush/mapper.h b/include/linux/crush/mapper.h
index c95e19e..141edab 100644
--- a/include/linux/crush/mapper.h
+++ b/include/linux/crush/mapper.h
@@ -11,11 +11,10 @@
#include "crush.h"
extern int crush_find_rule(const struct crush_map *map, int ruleset, int type, int size);
-extern int crush_do_rule(const struct crush_map *map,
- int ruleno,
- int x, int *result, int result_max,
- const __u32 *weights, int weight_max,
- void *cwin);
+int crush_do_rule(const struct crush_map *map,
+ int ruleno, int x, int *result, int result_max,
+ const __u32 *weight, int weight_max,
+ void *cwin, const struct crush_choose_arg *choose_args);
/*
* Returns the exact amount of workspace that will need to be used
OpenPOWER on IntegriCloud