-
Notifications
You must be signed in to change notification settings - Fork 10
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
f04d040
commit 586cfb1
Showing
12 changed files
with
991 additions
and
60 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
# wedpr专家模式用户手册 | ||
|
||
## 配置 | ||
|
||
1. 左侧用户目录中新建配置文件,文件命名为:config.properties | ||
2. 配置信息参考: | ||
|
||
``` | ||
access_key_id= | ||
access_key_secret= | ||
remote_entrypoints=http://139.159.202.235:8005,http://139.159.202.235:8006 | ||
agency_name=SGD | ||
workspace_path=/user/ppc/milestone2/sgd/ | ||
user=test_user | ||
storage_endpoint=http://192.168.0.18:50070 | ||
``` | ||
|
||
3. 通过前端页面登录,例如:http://139.159.202.235:8005/ | ||
4. 创建个人项目空间,通过【打开jupyter】按钮进入专家模式 | ||
|
||
## 基础功能 | ||
|
||
1. 支持通过launcher启动python,jupyter,终端,文本编辑等功能 | ||
2. 支持在用户目录空间创建/修改/删除配置文件,文本文件,bash,python notebook等格式文件 | ||
3. 通过launcher启动python,jupyter,终端后可以正常执行对应的代码功能 | ||
|
||
## hdfs数据功能 | ||
|
||
1. 支持注册dataset,支持两种方式: pd.Dataframe, hdfs_path | ||
2. 支持更新dataset | ||
|
||
* 详细使用说明参考示例文件:【test_dataset.ipynb】 | ||
|
||
## wedpr任务功能 | ||
|
||
1. 支持配置任务参数 | ||
2. 支持提交psi,建模训练,预测等任务 | ||
3. 支持获取任务结果 | ||
4. 支持对任务结果进行明文处理 | ||
|
||
* 详细使用说明参考示例文件:【test_psi.ipynb】和【test_xgboost.ipynb】 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,4 +6,3 @@ agency_name=SGD | |
workspace_path=/user/wedpr/milestone2/sgd/ | ||
user=test_user | ||
storage_endpoint=http://127.0.0.1:50070 | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,209 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"['/usr/lib/python3/dist-packages/wedpr_ml_toolkit/', 'd:\\\\github\\\\wedpr3.0\\\\WeDPR-Component\\\\python\\\\wedpr_ml_toolkit', 'd:\\\\github\\\\wedpr3.0\\\\WeDPR-Component\\\\python', 'd:\\\\github\\\\wedpr3.0\\\\WeDPR-Component\\\\python', 'c:\\\\Users\\\\yanxi\\\\anaconda3\\\\python38.zip', 'c:\\\\Users\\\\yanxi\\\\anaconda3\\\\DLLs', 'c:\\\\Users\\\\yanxi\\\\anaconda3\\\\lib', 'c:\\\\Users\\\\yanxi\\\\anaconda3', '', 'c:\\\\Users\\\\yanxi\\\\anaconda3\\\\lib\\\\site-packages', 'c:\\\\Users\\\\yanxi\\\\anaconda3\\\\lib\\\\site-packages\\\\win32', 'c:\\\\Users\\\\yanxi\\\\anaconda3\\\\lib\\\\site-packages\\\\win32\\\\lib', 'c:\\\\Users\\\\yanxi\\\\anaconda3\\\\lib\\\\site-packages\\\\Pythonwin', 'c:\\\\Users\\\\yanxi\\\\anaconda3\\\\lib\\\\site-packages\\\\IPython\\\\extensions', 'C:\\\\Users\\\\yanxi\\\\.ipython']\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"import numpy as np\n", | ||
"import pandas as pd\n", | ||
"from wedpr_ml_toolkit.config.wedpr_ml_config import WeDPRMlConfigBuilder\n", | ||
"from wedpr_ml_toolkit.wedpr_ml_toolkit import WeDPRMlToolkit\n", | ||
"from wedpr_ml_toolkit.toolkit.dataset_toolkit import DatasetToolkit" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# 读取配置文件\n", | ||
"wedpr_config = WeDPRMlConfigBuilder.build_from_properties_file('config.properties')\n", | ||
"wedpr_ml_toolkit = WeDPRMlToolkit(wedpr_config)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"http://139.159.202.235:50070 /user/ppc/milestone2/sgd/test_user SGD\n", | ||
"/user/ppc/milestone2/sgd/test_user\\d-101\n", | ||
" id y x1 x2 x3 x4 x5 x6 \\\n", | ||
"0 0 1 0.954183 0.652034 0.704070 0.180889 0.025025 0.511596 \n", | ||
"1 1 1 0.302088 0.462222 0.435542 0.029966 0.931294 0.848483 \n", | ||
"2 2 1 0.468104 0.430161 0.239322 0.588153 0.470668 0.225856 \n", | ||
"3 3 0 0.152269 0.811666 0.834451 0.354288 0.635447 0.062092 \n", | ||
"4 4 0 0.841470 0.800512 0.451507 0.118651 0.748845 0.557916 \n", | ||
"\n", | ||
" x7 x8 x9 x10 \n", | ||
"0 0.529848 0.759689 0.159081 0.556419 \n", | ||
"1 0.962787 0.224096 0.464418 0.208487 \n", | ||
"2 0.564879 0.730366 0.394245 0.299081 \n", | ||
"3 0.424057 0.202234 0.577448 0.636958 \n", | ||
"4 0.030906 0.514350 0.340864 0.123303 \n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"# 注册 dataset,支持两种方式: pd.Dataframe, hdfs_path\n", | ||
"# 1. pd.Dataframe\n", | ||
"df = pd.DataFrame({\n", | ||
" 'id': np.arange(0, 100), # id列,顺序整数\n", | ||
" 'y': np.random.randint(0, 2, size=100),\n", | ||
" # x1到x10列,随机数\n", | ||
" **{f'x{i}': np.random.rand(100) for i in range(1, 11)}\n", | ||
"})\n", | ||
"\n", | ||
"dataset1 = DatasetToolkit(storage_entrypoint=wedpr_ml_toolkit.get_storage_entry_point(),\n", | ||
" storage_workspace=wedpr_config.user_config.get_workspace_path(),\n", | ||
" dataset_owner='flyhuang1',\n", | ||
" agency=wedpr_config.user_config.agency_name,\n", | ||
" values=df,\n", | ||
" is_label_holder=True)\n", | ||
"print(dataset1.storage_client.storage_client.endpoint, dataset1.storage_workspace, dataset1.agency)\n", | ||
"dataset1.storage_client = None # 本地测试时跳过hdfs上传/下载过程\n", | ||
"dataset1.save_values(path='d-101')\n", | ||
"print(dataset1.dataset_path)\n", | ||
"print(dataset1.values.head())" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 4, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"http://139.159.202.235:50070 /user/ppc/milestone2/sgd/test_user WeBank\n", | ||
"/user/ppc/milestone2/webank/flyhuang/d-9606695119693829\n", | ||
"/user/ppc/milestone2/webank/flyhuang/d-9606695119693829\n", | ||
" id z1 z2 z3 z4 z5 z6 z7 \\\n", | ||
"0 0 0.597205 0.942475 0.886443 0.560584 0.254432 0.370152 0.076031 \n", | ||
"1 1 0.778616 0.607374 0.616211 0.602282 0.385989 0.816963 0.756814 \n", | ||
"2 2 0.999795 0.596794 0.240741 0.241070 0.857676 0.342412 0.066459 \n", | ||
"3 3 0.968410 0.895163 0.636140 0.978791 0.237098 0.095272 0.938806 \n", | ||
"4 4 0.921513 0.454901 0.004514 0.769216 0.627185 0.676253 0.184952 \n", | ||
"\n", | ||
" z8 z9 z10 \n", | ||
"0 0.587627 0.851390 0.864929 \n", | ||
"1 0.661537 0.865674 0.050091 \n", | ||
"2 0.473916 0.080120 0.477873 \n", | ||
"3 0.452399 0.953515 0.405465 \n", | ||
"4 0.877475 0.316322 0.139290 \n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"# 2. hdfs_path\n", | ||
"dataset2 = DatasetToolkit(storage_entrypoint=wedpr_ml_toolkit.get_storage_entry_point(), \n", | ||
" storage_workspace=wedpr_config.user_config.get_workspace_path(), \n", | ||
" dataset_owner='flyhuang',\n", | ||
" dataset_path=\"/user/ppc/milestone2/webank/flyhuang/d-9606695119693829\", \n", | ||
" agency=\"WeBank\")\n", | ||
"print(dataset2.storage_client.storage_client.endpoint, dataset2.storage_workspace, dataset2.agency)\n", | ||
"print(dataset2.dataset_path)\n", | ||
"dataset2.storage_client = None # 本地测试时跳过hdfs上传/下载过程\n", | ||
"\n", | ||
"# 提供本地测试数据\n", | ||
"if dataset2.storage_client is None:\n", | ||
" # 支持更新dataset的values数据\n", | ||
" df2 = pd.DataFrame({\n", | ||
" 'id': np.arange(0, 100), # id列,顺序整数\n", | ||
" **{f'z{i}': np.random.rand(100) for i in range(1, 11)} # x1到x10列,随机数\n", | ||
" })\n", | ||
" dataset2.update_values(values=df2)\n", | ||
" dataset2.save_values()\n", | ||
" print(dataset2.dataset_path)\n", | ||
" print(dataset2.values.head())\n", | ||
"\n", | ||
"# 对于己方数据集支持load_values,其他方数据集无需load_values,可直接使用\n", | ||
"if dataset2.storage_client is not None:\n", | ||
" # 仅支持load本机构hdfs的数据集\n", | ||
" dataset2.load_values(header=0)\n", | ||
" print(dataset2.dataset_path)\n", | ||
" print(dataset2.values.head())" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 5, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"/user/ppc/milestone2/sgd/test_user\\d-101\n", | ||
" id y x1 x2 x3 x4 x5 x6 \\\n", | ||
"0 0 1 0.954183 0.652034 0.704070 0.180889 0.025025 0.511596 \n", | ||
"1 1 1 0.302088 0.462222 0.435542 0.029966 0.931294 0.848483 \n", | ||
"2 2 1 0.468104 0.430161 0.239322 0.588153 0.470668 0.225856 \n", | ||
"3 3 0 0.152269 0.811666 0.834451 0.354288 0.635447 0.062092 \n", | ||
"4 4 0 0.841470 0.800512 0.451507 0.118651 0.748845 0.557916 \n", | ||
"\n", | ||
" x7 x8 x9 x10 \n", | ||
"0 0.529848 0.759689 0.159081 0.556419 \n", | ||
"1 0.962787 0.224096 0.464418 0.208487 \n", | ||
"2 0.564879 0.730366 0.394245 0.299081 \n", | ||
"3 0.424057 0.202234 0.577448 0.636958 \n", | ||
"4 0.030906 0.514350 0.340864 0.123303 \n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"# 更新数据集\n", | ||
"if dataset1.storage_client is not None:\n", | ||
" dataset1.update_values(\n", | ||
" path='/user/ppc/milestone2/sgd/flyhuang1/d-9606704699156485')\n", | ||
" dataset1.load_values(header=0)\n", | ||
"print(dataset1.dataset_path)\n", | ||
"print(dataset1.values.head())" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "base", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.8.5" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
Oops, something went wrong.