Skip to content

Commit

Permalink
Merge pull request #3 from soar-zhengjian/master
Browse files Browse the repository at this point in the history
 Add some operations for UAI Train job
  • Loading branch information
classicsong authored Nov 15, 2017
2 parents 9d4e7a7 + a31c49e commit cc0c3ac
Show file tree
Hide file tree
Showing 26 changed files with 769 additions and 90 deletions.
6 changes: 6 additions & 0 deletions uai/utils/logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,3 +72,9 @@ def format_normal(self):

def format_exception(self, *args):
return ("%s%s: " + self.__message) % args % self.__args

def printConsoleOnlyError():
global uai_logger
LOGGING['handlers']['console']['level'] = 'ERROR'
logging.config.dictConfig(LOGGING)
uai_logger = logging.getLogger("uaiservice")
2 changes: 0 additions & 2 deletions uai/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import hashlib
import tarfile
import json

GATEWAY_DEFAULT='Default'

def _verfy_ac(private_key, params):
Expand All @@ -17,7 +16,6 @@ def _verfy_ac(private_key, params):
sign = hashlib.sha1()
sign.update(params_data.encode('utf-8')) # must encode to adapt python3
signature = sign.hexdigest()
print("Signature",signature)
return signature

def val_to_str(val):
Expand Down
3 changes: 2 additions & 1 deletion uaitrain/api/base_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@ def _cmd_common_request(self):
self.cmd_params.pop('Signature')
self.cmd_params['Signature'] = _verfy_ac(self.priv_key,
self.cmd_params)
print (self.cmd_params)
uai_logger.info("Signature: {0}".format(self.cmd_params['Signature']))
uai_logger.info(self.cmd_params)
uai_logger.info("Call http request: {0} ".format(get_request(self.cmd_url, params=self.cmd_params)))
r = requests.get(self.cmd_url, params=self.cmd_params)
rsp = json.loads(r.text, 'utf-8')
Expand Down
51 changes: 51 additions & 0 deletions uaitrain/api/get_train_job_predict_start_time.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# Copyright 2017 The UAI-SDK Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

from uaitrain.api.base_op import BaseUAITrainAPIOp

class GetUAITrainJobStartPredictOp(BaseUAITrainAPIOp):
ACTION_NAME = "GetUAITrainJobStartPredict"
"""
GetUAITrainJobStartPredictOp
Compatable with UAI Train GetUAITrainJobStartPredict API func
Input:
pub_key string(required) Public key of the user
priv_key string(required) Private key of the user
project_id int(optional) Project ID of the job
region string(optional) Which Region to run the job
zone string(optional) Which Zone in the Region to run the job
job_id string(required) Job id of the job
Output:
RetCode int(required) Op return code: 0: success, others: error code
Action string(required) Action name
Message string(not required) Message: error description
"""

def __init__(self, pub_key, priv_key, job_id, project_id="", region="", zone=""):
super(GetUAITrainJobStartPredictOp, self).__init__(self.ACTION_NAME,
pub_key,
priv_key,
project_id,
region,
zone)
self.cmd_params["TrainJobId"] = job_id

def _check_args(self):
super(GetUAITrainJobStartPredictOp, self)._check_args()

if self.cmd_params["TrainJobId"] == "" or type(self.cmd_params["TrainJobId"] != str):
raise RuntimeError("job_id shoud be <str> and is not nil.")
51 changes: 51 additions & 0 deletions uaitrain/api/get_train_job_running_log.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# Copyright 2017 The UAI-SDK Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

from uaitrain.api.base_op import BaseUAITrainAPIOp

class GetUAITrainRunningLogOp(BaseUAITrainAPIOp):
ACTION_NAME = "GetUAITrainRunningLog"
"""
GetUAITrainRunningLogOp
Compatable with UAI Train GetUAITrainRunningLog API func
Input:
pub_key string(required) Public key of the user
priv_key string(required) Private key of the user
project_id int(optional) Project ID of the job
region string(optional) Which Region to run the job
zone string(optional) Which Zone in the Region to run the job
job_id string(required) Job id of the job
Output:
RetCode int(required) Op return code: 0: success, others: error code
Action string(required) Action name
Message string(not required) Message: error description
RunningLog []string realtime log that train job produces
"""

def __init__(self, pub_key, priv_key, job_id, project_id="", region="", zone=""):
super(GetUAITrainRunningLogOp, self).__init__(self.ACTION_NAME,
pub_key,
priv_key,
project_id,
region,
zone)
self.cmd_params["TrainJobId"] = job_id

def _check_args(self):
super(GetUAITrainRunningLogOp, self)._check_args()

if type(self.cmd_params["TrainJobId"]) != str or self.cmd_params["TrainJobId"] == "":
raise RuntimeError("job_id shoud be str and is not nil.")
130 changes: 65 additions & 65 deletions uaitrain/arch/pytorch/uargs.py
Original file line number Diff line number Diff line change
@@ -1,66 +1,66 @@
# Copyright 2017 The UAI-SDK Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

def add_uai_args(parser):
uai = parser.add_argument_group('uai-args', 'the UAI related args')

'''
Default work dir. The working dir for the traing job, it will contains:
/data/data --data_dir
/data/output --output_dir
Note: DO NOT CHANGE THIS VALUE
UCloud Train Job Executor Will Set it Automatically
'''
uai.add_argument('--work_dir', type=str, default="/data", help='Default work path')

'''
Default data path used in Training, all data will be downloaded into this path
Please use data in this path as input for Training
Note: DO NOT CHANGE THIS VALUE
UCloud Train Job Executor Will Set it Automatically
'''
uai.add_argument("--data_dir", type=str, default="/data/data", help="Default data path")

'''
Default output path used in Training, files in this path will be uploaded to UFile
after training finished.
You can also assume your checkpoint files inside output_path (If you provided
in the UCloud console), files will also be downloaded into this path befor
Training start
Note: DO NOT CHANGE THIS VALUE
UCloud Train Job Executor Will Set it Automatically
'''
uai.add_argument("--output_dir", type=str, default="/data/output", help="Default output path")

'''
Default tensorboard output path used in Training, iles in this path will be uploaded to UFile
after training finished.
This dir is same as output_dir
Note: DO NOT CHANGE THIS VALUE
UCloud Train Job Executor Will Set it Automatically
'''
uai.add_argument("--log_dir", type=str, default="/data/output", help="Default log path")

'''
Define num_gpus for training
Note: DO NOT CHANGE THIS VALUE
UCloud Train Job Executor Will Set it Automatically
'''
# Copyright 2017 The UAI-SDK Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

def add_uai_args(parser):
uai = parser.add_argument_group('uai-args', 'the UAI related args')

'''
Default work dir. The working dir for the traing job, it will contains:
/data/data --data_dir
/data/output --output_dir
Note: DO NOT CHANGE THIS VALUE
UCloud Train Job Executor Will Set it Automatically
'''
uai.add_argument('--work_dir', type=str, default="/data", help='Default work path')

'''
Default data path used in Training, all data will be downloaded into this path
Please use data in this path as input for Training
Note: DO NOT CHANGE THIS VALUE
UCloud Train Job Executor Will Set it Automatically
'''
uai.add_argument("--data_dir", type=str, default="/data/data", help="Default data path")

'''
Default output path used in Training, files in this path will be uploaded to UFile
after training finished.
You can also assume your checkpoint files inside output_path (If you provided
in the UCloud console), files will also be downloaded into this path befor
Training start
Note: DO NOT CHANGE THIS VALUE
UCloud Train Job Executor Will Set it Automatically
'''
uai.add_argument("--output_dir", type=str, default="/data/output", help="Default output path")

'''
Default tensorboard output path used in Training, iles in this path will be uploaded to UFile
after training finished.
This dir is same as output_dir
Note: DO NOT CHANGE THIS VALUE
UCloud Train Job Executor Will Set it Automatically
'''
uai.add_argument("--log_dir", type=str, default="/data/output", help="Default log path")

'''
Define num_gpus for training
Note: DO NOT CHANGE THIS VALUE
UCloud Train Job Executor Will Set it Automatically
'''
uai.add_argument("--num_gpus", type=int, help="Num of avaliable gpus")
Empty file added uaitrain/arch_conf/__init__.py
Empty file.
Empty file added uaitrain/arch_conf/tf_conf.py
Empty file.
Empty file added uaitrain/cmd/__init__.py
Empty file.
6 changes: 3 additions & 3 deletions uaitrain/operation/base_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,16 +54,16 @@ def _add_args(self):
def _parse_args(self, args):
self.pub_key = args['public_key']
self.pri_key = args['private_key']
if args['project_id'] != None:
if 'project_id' in args and args['project_id'] != None:
self.project_id = args['project_id']
else:
self.project_id = ""
if args['region'] != None:
if 'region' in args and args['region'] != None:
self.region = args['region']
else:
self.region = ""

if args['zone'] != None:
if 'zone' in args and args['zone'] != None:
self.zone = args['zone']
else:
self.zone = ""
Expand Down
Empty file.
93 changes: 93 additions & 0 deletions uaitrain/operation/get_realtime_log/base_log_op.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
# Copyright 2017 The UAI-SDK Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

import time
from uai.utils.logger import uai_logger
from uai.utils.logger import printConsoleOnlyError
from uaitrain.operation.base_op import BaseUAITrainOp
from uaitrain.api.get_train_job_running_log import GetUAITrainRunningLogOp
from uaitrain.api.get_train_job_list import GetUAITrainJobListOp

class BaseUAITrainGetRealtimeLogOp(BaseUAITrainOp):
def __init__(self, parser):
super(BaseUAITrainGetRealtimeLogOp, self).__init__(parser)
printConsoleOnlyError()

def _add_job_info_args(self, job_parser):
info_parser = job_parser.add_argument_group(
'Job Info Params', 'Job Infos')
info_parser.add_argument(
'--job_id',
type=str,
required=True,
help='The <job_id> to query')

def _add_args(self):
parser = self.parser.add_parser('log', help='Get realtime log of UAI Train Job')
self.job_parser = parser
self._add_account_args(parser)
self._add_job_info_args(parser)

def _parse_args(self, args):
super(BaseUAITrainGetRealtimeLogOp, self)._parse_args(args)

self.job_id = args['job_id']
return True

def _check_job_running(self):
job_op = GetUAITrainJobListOp(
pub_key=self.pub_key,
priv_key=self.pri_key,
job_id=self.job_id,
project_id=self.project_id,
region=self.region,
zone=self.zone)

succ, resp = job_op.call_api()
if succ is False:
print("Error get job status info. job {0} ".format(self.job_id))
return False

if resp['DataSet'][0]['Status'] in ['Done', 'Stopped', 'Deleted', 'Error']:
return False
return True

def cmd_run(self, args):
if self._parse_args(args) == False:
return False

while True:
log_op = GetUAITrainRunningLogOp(
pub_key=self.pub_key,
priv_key=self.pri_key,
job_id=self.job_id,
project_id=self.project_id,
region=self.region,
zone=self.zone)

succ, resp = log_op.call_api()
if succ is False:
uai_logger.warn("Error get realtime log info. job {0}, check your job_id, it may be not running.".format(self.job_id))
time.sleep(10)
continue
result = resp['RunningLog'] if resp['RunningLog'] is not None else []
for log in result:
print (log)

if self._check_job_running() is True:
time.sleep(10)
else:
break
return True
Empty file.
Loading

0 comments on commit cc0c3ac

Please sign in to comment.