Skip to content

Commit

Permalink
fix pause reqs (#741)
Browse files Browse the repository at this point in the history
  • Loading branch information
shihaobai authored Feb 20, 2025
1 parent 971936e commit 57bb6e1
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 3 deletions.
6 changes: 3 additions & 3 deletions lightllm/server/router/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,7 @@ async def _step(self):
paused_reqs = select_paused_reqs(
self.running_batch, self.pause_strategy, self.req_queue, self.max_total_token_num
)
await self._pause_reqs(self.running_batch, paused_reqs)
await self._pause_reqs(paused_reqs)
logger.debug(f"pasued req num: {self.req_queue.get_paused_req_num()}")
self.has_wait_tokens = 0
return
Expand Down Expand Up @@ -342,9 +342,9 @@ async def _decode_batch(self, batch: Batch):
)
return

async def _pause_reqs(self, batch: Batch, pasue_reqs):
async def _pause_reqs(self, pasue_reqs):
pasue_req_ids = [r.request_id for r in pasue_reqs]
await self.model_rpc_client.pause_reqs(batch.batch_id, pasue_req_ids)
await self.model_rpc_client.pause_reqs(pasue_req_ids)
return

def _filter_runing_batch(self):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ def post_handel(self, run_reqs: List[InferReq], next_token_ids, next_token_logpr

req_obj.cur_kv_len = len(req_obj.get_chuncked_input_token_ids())
if req_obj.cur_kv_len < req_obj.get_cur_total_len():
if self.tp_rank < self.dp_size:
req_obj.shm_req.shm_cur_kv_len = req_obj.cur_kv_len
continue

req_obj.set_next_gen_token_id(next_token_id, next_token_logprob)
Expand Down

0 comments on commit 57bb6e1

Please sign in to comment.