Skip to content

Commit

Permalink
Merge pull request #166 from Datura-ai/main
Browse files Browse the repository at this point in the history
deploy validator
  • Loading branch information
pyon12 authored Jan 6, 2025
2 parents 3e41588 + 0d5cca3 commit 8263a73
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 21 deletions.
2 changes: 1 addition & 1 deletion neurons/validators/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,6 @@ RUN echo "export PYTHONPATH=$PYTHONPATH" >> ~/.bash_profile
COPY --from=base-image /root/app/ /root/app/
COPY --from=base-image /opt/pypackages/ /opt/pypackages/

LABEL version="3.3.15"
LABEL version="3.3.16"

CMD ["bash", "run.sh"]
2 changes: 1 addition & 1 deletion neurons/validators/Dockerfile.runner
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ WORKDIR /root/validator
COPY docker-compose.app.yml docker-compose.yml
COPY entrypoint.sh /entrypoint.sh

LABEL version="3.3.15"
LABEL version="3.3.16"

RUN chmod u+x /entrypoint.sh
ENTRYPOINT ["/entrypoint.sh"]
45 changes: 29 additions & 16 deletions neurons/validators/src/services/docker_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,8 @@ async def execute_and_stream_logs(
command: str,
log_tag: str,
):
result = True
status = True
error = ''
async with ssh_client.create_process(command) as process:
async for line in process.stdout:
async with self.lock:
Expand All @@ -98,7 +99,8 @@ async def execute_and_stream_logs(
)

async for line in process.stderr:
result = False
status = False
error = line.strip()
async with self.lock:
self.logs_queue.append(
{
Expand All @@ -108,7 +110,7 @@ async def execute_and_stream_logs(
}
)

return result
return status, error

async def handle_stream_logs(
self,
Expand Down Expand Up @@ -207,6 +209,7 @@ async def create_container(
),
)

log_tag = "container_creation"
custom_options = payload.custom_options

try:
Expand Down Expand Up @@ -249,8 +252,6 @@ async def create_container(
),
)

log_tag = "container_creation"

# set real-time logging
self.log_task = asyncio.create_task(
self.handle_stream_logs(
Expand All @@ -269,15 +270,18 @@ async def create_container(
)

command = f"docker pull {payload.docker_image}"
result = await self.execute_and_stream_logs(
status, error = await self.execute_and_stream_logs(
ssh_client=ssh_client,
command=command,
log_tag=log_tag,
)
if not result:
if not status:
log_text = _m(
"Docker pull failed",
extra=get_extra_info({default_extra}),
extra=get_extra_info({
**default_extra,
"error": error,
}),
)
logger.error(log_text)

Expand Down Expand Up @@ -344,13 +348,16 @@ async def create_container(

volume_name = f"volume_{uuid}"
command = f"docker volume create {volume_name}"
result = await self.execute_and_stream_logs(
status, error = await self.execute_and_stream_logs(
ssh_client=ssh_client, command=command, log_tag="container_creation"
)
if not result:
if not status:
log_text = _m(
"Docker volume creation failed",
extra=get_extra_info({default_extra}),
extra=get_extra_info({
**default_extra,
"error": error
}),
)
logger.error(log_text)

Expand Down Expand Up @@ -387,13 +394,16 @@ async def create_container(
else:
command = f'docker run -d {port_flags} {volume_flags} {entrypoint_flag} -e PUBLIC_KEY="{payload.user_public_key}" {env_flags} --mount source={volume_name},target=/root --gpus all --name {container_name} {payload.docker_image} {startup_commands}'

result = await self.execute_and_stream_logs(
status, error = await self.execute_and_stream_logs(
ssh_client=ssh_client, command=command, log_tag="container_creation"
)
if not result:
if not status:
log_text = _m(
"Docker container creation failed",
extra=get_extra_info(default_extra),
extra=get_extra_info({
**default_extra,
"error": error,
}),
)
logger.error(log_text)

Expand All @@ -410,7 +420,10 @@ async def create_container(
if not await self.check_container_running(ssh_client, container_name):
log_text = _m(
"Run docker run command but container is not running",
extra=get_extra_info({**default_extra, "container_name": container_name}),
extra=get_extra_info({
**default_extra,
"container_name": container_name,
}),
)
logger.error(log_text)

Expand Down Expand Up @@ -450,7 +463,7 @@ async def create_container(
)
except Exception as e:
log_text = _m(
"Docker container creation failed",
"Unknown Error create_container",
extra=get_extra_info({**default_extra, "error": str(e)}),
)
logger.error(log_text, exc_info=True)
Expand Down
5 changes: 2 additions & 3 deletions neurons/validators/src/services/task_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ def __init__(
):
self.ssh_service = ssh_service
self.redis_service = redis_service
self.is_valid = True
self.wallet = settings.get_bittensor_wallet()

async def upload_directory(
Expand Down Expand Up @@ -677,8 +676,8 @@ async def create_task(

# if not rented, check docker digests
docker_digests = machine_spec.get("docker", {}).get("containers", [])
self.is_valid = self.validate_digests(docker_digests, docker_hub_digests)
if not self.is_valid:
is_docker_valid = self.validate_digests(docker_digests, docker_hub_digests)
if not is_docker_valid:
log_text = _m(
"Docker digests are not valid",
extra=get_extra_info(
Expand Down

0 comments on commit 8263a73

Please sign in to comment.