Skip to content

Commit

Permalink
Make lock mechanism quicker and clearer
Browse files Browse the repository at this point in the history
By removing the sleep when locking a task the time between polling the
status and setting a new one is reduced, this is important to lower the
chances of a race condition.

"Taken" is being divided into "Locked" and "Taken", that can also help us
to troubleshoot future issues.

"pending for rerun" status now includes the runner that set it and when
it was set.

Issue: #365

Signed-off-by: Armando Neto <[email protected]>
  • Loading branch information
netoarmando committed Nov 24, 2020
1 parent a94781f commit f9364ad
Showing 1 changed file with 45 additions and 12 deletions.
57 changes: 45 additions & 12 deletions github/internals/entities.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,9 @@
GITHUB_DESCRIPTION_LIMIT = 139
RACE_TIMEOUT = 17
RERUN_PENDING = "pending for rerun"
RERUN_PENDING_FMT = "pending for rerun by {runner_id} on {date}"
TASK_TAKEN_FMT = "Taken by {runner_id} on {date}"
TASK_LOCKED_FMT = "Locked by {runner_id} on {date}"
SENTRY_URL = (
"https://d24d8d622cbb4e2ea447c9a64f19b81a:"
"[email protected]/193222"
Expand Down Expand Up @@ -165,10 +167,11 @@ def get_rate_limit(self, resource: Text=None) -> RateLimit:
)

def poll_status(
self, pr_number: int, task_name: Text
self, pr_number: int, task_name: Text, no_sleep: bool=False
) -> "Status":
"""Gets commit status on GitHub using GraphQL API"""
sleep(randint(8, 13)) # FIXME: We're polling too concurrently
if not no_sleep:
sleep(randint(8, 13)) # FIXME: We're polling too concurrently
pr_query = queries.make_pull_request_query(
self.repo_owner, self.repo_name, pr_number
)
Expand Down Expand Up @@ -344,27 +347,44 @@ def failed(self) -> bool:
def taken(self) -> bool:
return "taken" in self.description.lower()

@property
def locked(self) -> bool:
return "locked" in self.description.lower()

@property
def unassigned(self) -> bool:
return "unassigned" in self.description.lower()

@property
def rerun_pending(self) -> bool:
return self.description == RERUN_PENDING
return self.description.startswith(RERUN_PENDING)

@property
def processing(self) -> bool:
return any((
self.pending, self.rerun_pending, self.taken, self.unassigned
self.pending,
self.rerun_pending,
self.taken,
self.unassigned,
self.locked,
))

def stalled(self, task: "Task") -> bool:
"""Checks if commit status is timed out"""
now = datetime.now(pytz.UTC)
timeout = timedelta(seconds=task.timeout)
if not timeout:

if self.taken:
format_string = TASK_TAKEN_FMT
timeout = timedelta(seconds=task.timeout)
if not timeout:
return False
elif self.locked:
format_string = TASK_LOCKED_FMT
timeout = timedelta(seconds=60)
else:
return False
parsed = parse.parse(TASK_TAKEN_FMT, self.description)

parsed = parse.parse(format_string, self.description)
if not parsed:
return False

Expand Down Expand Up @@ -652,16 +672,17 @@ def lock(self, world: World) -> None:
)
)

# Locking task
time_now = datetime.utcnow().strftime("%Y-%m-%d %H:%M UTC")
description = TASK_TAKEN_FMT.format(
description = TASK_LOCKED_FMT.format(
runner_id=world.runner_id,
date=time_now
)
world.create_status(self, State.PENDING, description)

sleep(RACE_TIMEOUT)

status = world.poll_status(self.pr_number, self.name)
status = world.poll_status(self.pr_number, self.name, no_sleep=True)

if status.description != description:
raise EnvironmentError(
Expand All @@ -670,6 +691,14 @@ def lock(self, world: World) -> None:
)
)

# Taking task
time_now = datetime.utcnow().strftime("%Y-%m-%d %H:%M UTC")
description = TASK_TAKEN_FMT.format(
runner_id=world.runner_id,
date=time_now
)
world.create_status(self, State.PENDING, description)

self.description = description

def set_unassigned(self, world: World) -> None:
Expand All @@ -690,13 +719,12 @@ def set_unassigned(self, world: World) -> None:
)
)


def set_rerun(self, world: World) -> None:
"""Creates a commit status on GitHub using REST API
Sets the status description to RERUN_PENDING value.
"""
status = world.poll_status(self.pr_number, self.name)
status = world.poll_status(self.pr_number, self.name, no_sleep=True)
if status.succeeded or (status.taken and not status.stalled):
raise EnvironmentError(
"Task {} PR#{} is changed".format(
Expand All @@ -710,7 +738,12 @@ def set_rerun(self, world: World) -> None:
)
)

world.create_status(self, State.PENDING, RERUN_PENDING)
time_now = datetime.utcnow().strftime("%Y-%m-%d %H:%M UTC")
description = RERUN_PENDING_FMT.format(
runner_id=world.runner_id,
date=time_now
)
world.create_status(self, State.PENDING, description)

def execute(self, world: World, statuses: Dict) -> None:
"""Runs the related task class defined in tasks/tasks.py"""
Expand Down

0 comments on commit f9364ad

Please sign in to comment.