diff options
Diffstat (limited to 'net/ceph/osd_client.c')
-rw-r--r-- | net/ceph/osd_client.c | 59 |
1 files changed, 12 insertions, 47 deletions
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index c1d756c..780caf6 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -221,6 +221,7 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, kref_init(&req->r_kref); init_completion(&req->r_completion); init_completion(&req->r_safe_completion); + RB_CLEAR_NODE(&req->r_node); INIT_LIST_HEAD(&req->r_unsafe_item); INIT_LIST_HEAD(&req->r_linger_item); INIT_LIST_HEAD(&req->r_linger_osd); @@ -580,7 +581,7 @@ static void __kick_osd_requests(struct ceph_osd_client *osdc, dout("__kick_osd_requests osd%d\n", osd->o_osd); err = __reset_osd(osdc, osd); - if (err == -EAGAIN) + if (err) return; list_for_each_entry(req, &osd->o_requests, r_osd_item) { @@ -607,14 +608,6 @@ static void __kick_osd_requests(struct ceph_osd_client *osdc, } } -static void kick_osd_requests(struct ceph_osd_client *osdc, - struct ceph_osd *kickosd) -{ - mutex_lock(&osdc->request_mutex); - __kick_osd_requests(osdc, kickosd); - mutex_unlock(&osdc->request_mutex); -} - /* * If the osd connection drops, we need to resubmit all requests. */ @@ -628,7 +621,9 @@ static void osd_reset(struct ceph_connection *con) dout("osd_reset osd%d\n", osd->o_osd); osdc = osd->o_osdc; down_read(&osdc->map_sem); - kick_osd_requests(osdc, osd); + mutex_lock(&osdc->request_mutex); + __kick_osd_requests(osdc, osd); + mutex_unlock(&osdc->request_mutex); send_queued(osdc); up_read(&osdc->map_sem); } @@ -647,6 +642,7 @@ static struct ceph_osd *create_osd(struct ceph_osd_client *osdc, int onum) atomic_set(&osd->o_ref, 1); osd->o_osdc = osdc; osd->o_osd = onum; + RB_CLEAR_NODE(&osd->o_node); INIT_LIST_HEAD(&osd->o_requests); INIT_LIST_HEAD(&osd->o_linger_requests); INIT_LIST_HEAD(&osd->o_osd_lru); @@ -750,6 +746,7 @@ static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd) if (list_empty(&osd->o_requests) && list_empty(&osd->o_linger_requests)) { __remove_osd(osdc, osd); + ret = -ENODEV; } else if (memcmp(&osdc->osdmap->osd_addr[osd->o_osd], &osd->o_con.peer_addr, sizeof(osd->o_con.peer_addr)) == 0 && @@ -876,9 +873,9 @@ static void __unregister_request(struct ceph_osd_client *osdc, req->r_osd = NULL; } + list_del_init(&req->r_req_lru_item); ceph_osdc_put_request(req); - list_del_init(&req->r_req_lru_item); if (osdc->num_requests == 0) { dout(" no requests, canceling timeout\n"); __cancel_osd_timeout(osdc); @@ -910,8 +907,8 @@ static void __unregister_linger_request(struct ceph_osd_client *osdc, struct ceph_osd_request *req) { dout("__unregister_linger_request %p\n", req); + list_del_init(&req->r_linger_item); if (req->r_osd) { - list_del_init(&req->r_linger_item); list_del_init(&req->r_linger_osd); if (list_empty(&req->r_osd->o_requests) && @@ -1090,12 +1087,10 @@ static void handle_timeout(struct work_struct *work) { struct ceph_osd_client *osdc = container_of(work, struct ceph_osd_client, timeout_work.work); - struct ceph_osd_request *req, *last_req = NULL; + struct ceph_osd_request *req; struct ceph_osd *osd; - unsigned long timeout = osdc->client->options->osd_timeout * HZ; unsigned long keepalive = osdc->client->options->osd_keepalive_timeout * HZ; - unsigned long last_stamp = 0; struct list_head slow_osds; dout("timeout\n"); down_read(&osdc->map_sem); @@ -1105,37 +1100,6 @@ static void handle_timeout(struct work_struct *work) mutex_lock(&osdc->request_mutex); /* - * reset osds that appear to be _really_ unresponsive. this - * is a failsafe measure.. we really shouldn't be getting to - * this point if the system is working properly. the monitors - * should mark the osd as failed and we should find out about - * it from an updated osd map. - */ - while (timeout && !list_empty(&osdc->req_lru)) { - req = list_entry(osdc->req_lru.next, struct ceph_osd_request, - r_req_lru_item); - - /* hasn't been long enough since we sent it? */ - if (time_before(jiffies, req->r_stamp + timeout)) - break; - - /* hasn't been long enough since it was acked? */ - if (req->r_request->ack_stamp == 0 || - time_before(jiffies, req->r_request->ack_stamp + timeout)) - break; - - BUG_ON(req == last_req && req->r_stamp == last_stamp); - last_req = req; - last_stamp = req->r_stamp; - - osd = req->r_osd; - BUG_ON(!osd); - pr_warning(" tid %llu timed out on osd%d, will reset osd\n", - req->r_tid, osd->o_osd); - __kick_osd_requests(osdc, osd); - } - - /* * ping osds that are a bit slow. this ensures that if there * is a break in the TCP connection we will notice, and reopen * a connection with that osd (from the fault callback). @@ -1364,8 +1328,8 @@ static void kick_requests(struct ceph_osd_client *osdc, int force_resend) dout("kicking lingering %p tid %llu osd%d\n", req, req->r_tid, req->r_osd ? req->r_osd->o_osd : -1); - __unregister_linger_request(osdc, req); __register_request(osdc, req); + __unregister_linger_request(osdc, req); } mutex_unlock(&osdc->request_mutex); @@ -1599,6 +1563,7 @@ int ceph_osdc_create_event(struct ceph_osd_client *osdc, event->data = data; event->osdc = osdc; INIT_LIST_HEAD(&event->osd_node); + RB_CLEAR_NODE(&event->node); kref_init(&event->kref); /* one ref for us */ kref_get(&event->kref); /* one ref for the caller */ init_completion(&event->completion); |