Skip to content

Commit

Permalink
runner: fix state_lock and cfgfs use
Browse files Browse the repository at this point in the history
The kernel can end up taking a configfs lock then call up to userspace,
so we must not have a lock that is taken in this upcall and is taken
when interacting with configfs. As reported by sherlockxiao:

#595

this happens with the state_lock where during deletion the kernel will
hold the state_lock, but some code paths will hold the state_lock while
calling into configfs.

This moves our configfs access out of the state_lock.
  • Loading branch information
Mike Christie committed Oct 17, 2019
1 parent 7a6186b commit 493893b
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 18 deletions.
18 changes: 4 additions & 14 deletions target.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
static struct list_head tpg_recovery_list = LIST_HEAD_INIT(tpg_recovery_list);
/*
* Locking ordering:
* rdev->state_lock
* rdev->state_lock (see tcmur_device.h for more state_lock restrictions)
* tpg_recovery_lock
*/
static pthread_mutex_t tpg_recovery_lock = PTHREAD_MUTEX_INITIALIZER;
Expand Down Expand Up @@ -276,32 +276,23 @@ static void *tgt_port_grp_recovery_thread_fn(void *arg)
return NULL;
}

int tcmu_add_dev_to_recovery_list(struct tcmu_device *dev)
int tcmu_add_dev_to_recovery_list(struct tcmu_device *dev,
struct list_head *alua_list)
{
struct tcmur_device *rdev = tcmu_dev_get_private(dev);
struct list_head alua_list;
struct alua_grp *group;
struct tgt_port_grp *tpg;
struct tgt_port *port, *enabled_port = NULL;
int ret;

pthread_mutex_lock(&tpg_recovery_lock);

list_head_init(&alua_list);
ret = tcmu_get_alua_grps(dev, &alua_list);
if (ret) {
/* User is deleting device so fast fail */
tcmu_dev_warn(dev, "Could not find any tpgs.\n");
goto done;
}

/*
* This assumes a tcmu_dev is only exported though one local
* enabled tpg. The kernel members file only returns
* the one and runner is not passed info about which
* tpg/port IO was received on.
*/
list_for_each(&alua_list, group, entry) {
list_for_each(alua_list, group, entry) {
list_for_each(&group->tgt_ports, port, entry) {
if (port->enabled)
enabled_port = port;
Expand Down Expand Up @@ -340,7 +331,6 @@ int tcmu_add_dev_to_recovery_list(struct tcmu_device *dev)
add_to_list:
list_add(&tpg->devs, &rdev->recovery_entry);
done:
tcmu_release_alua_grps(&alua_list);
pthread_mutex_unlock(&tpg_recovery_lock);
return ret;
}
4 changes: 3 additions & 1 deletion target.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "ccan/list/list.h"

struct tgt_port_grp;
struct list_head;

struct tgt_port {
uint16_t rel_port_id;
Expand All @@ -31,6 +32,7 @@ struct tgt_port {

void tcmu_free_tgt_port(struct tgt_port *port);
struct tgt_port *tcmu_get_tgt_port(char *member_str);
int tcmu_add_dev_to_recovery_list(struct tcmu_device *dev);
int tcmu_add_dev_to_recovery_list(struct tcmu_device *dev,
struct list_head *alua_list);

#endif
15 changes: 13 additions & 2 deletions tcmur_device.c
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,16 @@ void tcmu_cancel_recovery(struct tcmu_device *dev)
void tcmu_notify_conn_lost(struct tcmu_device *dev)
{
struct tcmur_device *rdev = tcmu_dev_get_private(dev);
struct list_head alua_list;
int ret;

list_head_init(&alua_list);
ret = tcmu_get_alua_grps(dev, &alua_list);
if (ret) {
/* User is deleting device so fast fail */
tcmu_dev_warn(dev, "Could not find any tpgs.\n");
return;
}

pthread_mutex_lock(&rdev->state_lock);

Expand All @@ -200,10 +210,11 @@ void tcmu_notify_conn_lost(struct tcmu_device *dev)
tcmu_dev_err(dev, "Handler connection lost (lock state %d)\n",
rdev->lock_state);

if (!tcmu_add_dev_to_recovery_list(dev))
if (!tcmu_add_dev_to_recovery_list(dev, &alua_list))
rdev->flags |= TCMUR_DEV_FLAG_IN_RECOVERY;
unlock:
pthread_mutex_unlock(&rdev->state_lock);
tcmu_release_alua_grps(&alua_list);
}

/**
Expand Down Expand Up @@ -439,11 +450,11 @@ int tcmu_acquire_dev_lock(struct tcmu_device *dev, uint16_t tag)

tcmu_dev_info(dev, "Lock acquisition %s\n",
rdev->lock == TCMUR_DEV_LOCK_LOCKED ? "successful" : "unsuccessful");
tcmu_cfgfs_dev_exec_action(dev, "block_dev", 0);

pthread_cond_signal(&rdev->lock_cond);
pthread_mutex_unlock(&rdev->state_lock);

tcmu_cfgfs_dev_exec_action(dev, "block_dev", 0);
return ret;
}

Expand Down
13 changes: 12 additions & 1 deletion tcmur_device.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,18 @@ struct tcmur_device {
pthread_t lock_thread;
pthread_cond_t lock_cond;

/* General lock for lock state, thread, dev state, etc */
/*
* General lock for lock state, thread, dev state, etc.
*
* Locking order:
* 1. Kernel configfs lock
* 2. state_lock
* 3. tpg_recovery_lock
*
* On deletion the kernel will grab the configfs lock then call into
* userspace, so we must not hold the state_lock then perform a configfs
* operation.
* */
pthread_mutex_t state_lock;
int pending_uas;

Expand Down

0 comments on commit 493893b

Please sign in to comment.