summaryrefslogtreecommitdiffstats
path: root/net/ceph/osd_client.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ceph/osd_client.c')
-rw-r--r--net/ceph/osd_client.c59
1 files changed, 12 insertions, 47 deletions
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index c1d756c..780caf6 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -221,6 +221,7 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
kref_init(&req->r_kref);
init_completion(&req->r_completion);
init_completion(&req->r_safe_completion);
+ RB_CLEAR_NODE(&req->r_node);
INIT_LIST_HEAD(&req->r_unsafe_item);
INIT_LIST_HEAD(&req->r_linger_item);
INIT_LIST_HEAD(&req->r_linger_osd);
@@ -580,7 +581,7 @@ static void __kick_osd_requests(struct ceph_osd_client *osdc,
dout("__kick_osd_requests osd%d\n", osd->o_osd);
err = __reset_osd(osdc, osd);
- if (err == -EAGAIN)
+ if (err)
return;
list_for_each_entry(req, &osd->o_requests, r_osd_item) {
@@ -607,14 +608,6 @@ static void __kick_osd_requests(struct ceph_osd_client *osdc,
}
}
-static void kick_osd_requests(struct ceph_osd_client *osdc,
- struct ceph_osd *kickosd)
-{
- mutex_lock(&osdc->request_mutex);
- __kick_osd_requests(osdc, kickosd);
- mutex_unlock(&osdc->request_mutex);
-}
-
/*
* If the osd connection drops, we need to resubmit all requests.
*/
@@ -628,7 +621,9 @@ static void osd_reset(struct ceph_connection *con)
dout("osd_reset osd%d\n", osd->o_osd);
osdc = osd->o_osdc;
down_read(&osdc->map_sem);
- kick_osd_requests(osdc, osd);
+ mutex_lock(&osdc->request_mutex);
+ __kick_osd_requests(osdc, osd);
+ mutex_unlock(&osdc->request_mutex);
send_queued(osdc);
up_read(&osdc->map_sem);
}
@@ -647,6 +642,7 @@ static struct ceph_osd *create_osd(struct ceph_osd_client *osdc, int onum)
atomic_set(&osd->o_ref, 1);
osd->o_osdc = osdc;
osd->o_osd = onum;
+ RB_CLEAR_NODE(&osd->o_node);
INIT_LIST_HEAD(&osd->o_requests);
INIT_LIST_HEAD(&osd->o_linger_requests);
INIT_LIST_HEAD(&osd->o_osd_lru);
@@ -750,6 +746,7 @@ static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd)
if (list_empty(&osd->o_requests) &&
list_empty(&osd->o_linger_requests)) {
__remove_osd(osdc, osd);
+ ret = -ENODEV;
} else if (memcmp(&osdc->osdmap->osd_addr[osd->o_osd],
&osd->o_con.peer_addr,
sizeof(osd->o_con.peer_addr)) == 0 &&
@@ -876,9 +873,9 @@ static void __unregister_request(struct ceph_osd_client *osdc,
req->r_osd = NULL;
}
+ list_del_init(&req->r_req_lru_item);
ceph_osdc_put_request(req);
- list_del_init(&req->r_req_lru_item);
if (osdc->num_requests == 0) {
dout(" no requests, canceling timeout\n");
__cancel_osd_timeout(osdc);
@@ -910,8 +907,8 @@ static void __unregister_linger_request(struct ceph_osd_client *osdc,
struct ceph_osd_request *req)
{
dout("__unregister_linger_request %p\n", req);
+ list_del_init(&req->r_linger_item);
if (req->r_osd) {
- list_del_init(&req->r_linger_item);
list_del_init(&req->r_linger_osd);
if (list_empty(&req->r_osd->o_requests) &&
@@ -1090,12 +1087,10 @@ static void handle_timeout(struct work_struct *work)
{
struct ceph_osd_client *osdc =
container_of(work, struct ceph_osd_client, timeout_work.work);
- struct ceph_osd_request *req, *last_req = NULL;
+ struct ceph_osd_request *req;
struct ceph_osd *osd;
- unsigned long timeout = osdc->client->options->osd_timeout * HZ;
unsigned long keepalive =
osdc->client->options->osd_keepalive_timeout * HZ;
- unsigned long last_stamp = 0;
struct list_head slow_osds;
dout("timeout\n");
down_read(&osdc->map_sem);
@@ -1105,37 +1100,6 @@ static void handle_timeout(struct work_struct *work)
mutex_lock(&osdc->request_mutex);
/*
- * reset osds that appear to be _really_ unresponsive. this
- * is a failsafe measure.. we really shouldn't be getting to
- * this point if the system is working properly. the monitors
- * should mark the osd as failed and we should find out about
- * it from an updated osd map.
- */
- while (timeout && !list_empty(&osdc->req_lru)) {
- req = list_entry(osdc->req_lru.next, struct ceph_osd_request,
- r_req_lru_item);
-
- /* hasn't been long enough since we sent it? */
- if (time_before(jiffies, req->r_stamp + timeout))
- break;
-
- /* hasn't been long enough since it was acked? */
- if (req->r_request->ack_stamp == 0 ||
- time_before(jiffies, req->r_request->ack_stamp + timeout))
- break;
-
- BUG_ON(req == last_req && req->r_stamp == last_stamp);
- last_req = req;
- last_stamp = req->r_stamp;
-
- osd = req->r_osd;
- BUG_ON(!osd);
- pr_warning(" tid %llu timed out on osd%d, will reset osd\n",
- req->r_tid, osd->o_osd);
- __kick_osd_requests(osdc, osd);
- }
-
- /*
* ping osds that are a bit slow. this ensures that if there
* is a break in the TCP connection we will notice, and reopen
* a connection with that osd (from the fault callback).
@@ -1364,8 +1328,8 @@ static void kick_requests(struct ceph_osd_client *osdc, int force_resend)
dout("kicking lingering %p tid %llu osd%d\n", req, req->r_tid,
req->r_osd ? req->r_osd->o_osd : -1);
- __unregister_linger_request(osdc, req);
__register_request(osdc, req);
+ __unregister_linger_request(osdc, req);
}
mutex_unlock(&osdc->request_mutex);
@@ -1599,6 +1563,7 @@ int ceph_osdc_create_event(struct ceph_osd_client *osdc,
event->data = data;
event->osdc = osdc;
INIT_LIST_HEAD(&event->osd_node);
+ RB_CLEAR_NODE(&event->node);
kref_init(&event->kref); /* one ref for us */
kref_get(&event->kref); /* one ref for the caller */
init_completion(&event->completion);
OpenPOWER on IntegriCloud