Skip to content

Commit

Permalink
update test ml toolkit
Browse files Browse the repository at this point in the history
  • Loading branch information
yanxinyi620 committed Oct 30, 2024
1 parent f04d040 commit 586cfb1
Show file tree
Hide file tree
Showing 12 changed files with 991 additions and 60 deletions.
42 changes: 42 additions & 0 deletions python/wedpr_ml_toolkit/test/UserGuide.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# wedpr专家模式用户手册

## 配置

1. 左侧用户目录中新建配置文件,文件命名为:config.properties
2. 配置信息参考:

```
access_key_id=
access_key_secret=
remote_entrypoints=http://139.159.202.235:8005,http://139.159.202.235:8006
agency_name=SGD
workspace_path=/user/ppc/milestone2/sgd/
user=test_user
storage_endpoint=http://192.168.0.18:50070
```

3. 通过前端页面登录,例如:http://139.159.202.235:8005/
4. 创建个人项目空间,通过【打开jupyter】按钮进入专家模式

## 基础功能

1. 支持通过launcher启动python,jupyter,终端,文本编辑等功能
2. 支持在用户目录空间创建/修改/删除配置文件,文本文件,bash,python notebook等格式文件
3. 通过launcher启动python,jupyter,终端后可以正常执行对应的代码功能

## hdfs数据功能

1. 支持注册dataset,支持两种方式: pd.Dataframe, hdfs_path
2. 支持更新dataset

* 详细使用说明参考示例文件:【test_dataset.ipynb】

## wedpr任务功能

1. 支持配置任务参数
2. 支持提交psi,建模训练,预测等任务
3. 支持获取任务结果
4. 支持对任务结果进行明文处理

* 详细使用说明参考示例文件:【test_psi.ipynb】和【test_xgboost.ipynb】
1 change: 0 additions & 1 deletion python/wedpr_ml_toolkit/test/config.properties
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,3 @@ agency_name=SGD
workspace_path=/user/wedpr/milestone2/sgd/
user=test_user
storage_endpoint=http://127.0.0.1:50070

209 changes: 209 additions & 0 deletions python/wedpr_ml_toolkit/test/test_dataset.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,209 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['/usr/lib/python3/dist-packages/wedpr_ml_toolkit/', 'd:\\\\github\\\\wedpr3.0\\\\WeDPR-Component\\\\python\\\\wedpr_ml_toolkit', 'd:\\\\github\\\\wedpr3.0\\\\WeDPR-Component\\\\python', 'd:\\\\github\\\\wedpr3.0\\\\WeDPR-Component\\\\python', 'c:\\\\Users\\\\yanxi\\\\anaconda3\\\\python38.zip', 'c:\\\\Users\\\\yanxi\\\\anaconda3\\\\DLLs', 'c:\\\\Users\\\\yanxi\\\\anaconda3\\\\lib', 'c:\\\\Users\\\\yanxi\\\\anaconda3', '', 'c:\\\\Users\\\\yanxi\\\\anaconda3\\\\lib\\\\site-packages', 'c:\\\\Users\\\\yanxi\\\\anaconda3\\\\lib\\\\site-packages\\\\win32', 'c:\\\\Users\\\\yanxi\\\\anaconda3\\\\lib\\\\site-packages\\\\win32\\\\lib', 'c:\\\\Users\\\\yanxi\\\\anaconda3\\\\lib\\\\site-packages\\\\Pythonwin', 'c:\\\\Users\\\\yanxi\\\\anaconda3\\\\lib\\\\site-packages\\\\IPython\\\\extensions', 'C:\\\\Users\\\\yanxi\\\\.ipython']\n"
]
}
],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"from wedpr_ml_toolkit.config.wedpr_ml_config import WeDPRMlConfigBuilder\n",
"from wedpr_ml_toolkit.wedpr_ml_toolkit import WeDPRMlToolkit\n",
"from wedpr_ml_toolkit.toolkit.dataset_toolkit import DatasetToolkit"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# 读取配置文件\n",
"wedpr_config = WeDPRMlConfigBuilder.build_from_properties_file('config.properties')\n",
"wedpr_ml_toolkit = WeDPRMlToolkit(wedpr_config)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"http://139.159.202.235:50070 /user/ppc/milestone2/sgd/test_user SGD\n",
"/user/ppc/milestone2/sgd/test_user\\d-101\n",
" id y x1 x2 x3 x4 x5 x6 \\\n",
"0 0 1 0.954183 0.652034 0.704070 0.180889 0.025025 0.511596 \n",
"1 1 1 0.302088 0.462222 0.435542 0.029966 0.931294 0.848483 \n",
"2 2 1 0.468104 0.430161 0.239322 0.588153 0.470668 0.225856 \n",
"3 3 0 0.152269 0.811666 0.834451 0.354288 0.635447 0.062092 \n",
"4 4 0 0.841470 0.800512 0.451507 0.118651 0.748845 0.557916 \n",
"\n",
" x7 x8 x9 x10 \n",
"0 0.529848 0.759689 0.159081 0.556419 \n",
"1 0.962787 0.224096 0.464418 0.208487 \n",
"2 0.564879 0.730366 0.394245 0.299081 \n",
"3 0.424057 0.202234 0.577448 0.636958 \n",
"4 0.030906 0.514350 0.340864 0.123303 \n"
]
}
],
"source": [
"# 注册 dataset,支持两种方式: pd.Dataframe, hdfs_path\n",
"# 1. pd.Dataframe\n",
"df = pd.DataFrame({\n",
" 'id': np.arange(0, 100), # id列,顺序整数\n",
" 'y': np.random.randint(0, 2, size=100),\n",
" # x1到x10列,随机数\n",
" **{f'x{i}': np.random.rand(100) for i in range(1, 11)}\n",
"})\n",
"\n",
"dataset1 = DatasetToolkit(storage_entrypoint=wedpr_ml_toolkit.get_storage_entry_point(),\n",
" storage_workspace=wedpr_config.user_config.get_workspace_path(),\n",
" dataset_owner='flyhuang1',\n",
" agency=wedpr_config.user_config.agency_name,\n",
" values=df,\n",
" is_label_holder=True)\n",
"print(dataset1.storage_client.storage_client.endpoint, dataset1.storage_workspace, dataset1.agency)\n",
"dataset1.storage_client = None # 本地测试时跳过hdfs上传/下载过程\n",
"dataset1.save_values(path='d-101')\n",
"print(dataset1.dataset_path)\n",
"print(dataset1.values.head())"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"http://139.159.202.235:50070 /user/ppc/milestone2/sgd/test_user WeBank\n",
"/user/ppc/milestone2/webank/flyhuang/d-9606695119693829\n",
"/user/ppc/milestone2/webank/flyhuang/d-9606695119693829\n",
" id z1 z2 z3 z4 z5 z6 z7 \\\n",
"0 0 0.597205 0.942475 0.886443 0.560584 0.254432 0.370152 0.076031 \n",
"1 1 0.778616 0.607374 0.616211 0.602282 0.385989 0.816963 0.756814 \n",
"2 2 0.999795 0.596794 0.240741 0.241070 0.857676 0.342412 0.066459 \n",
"3 3 0.968410 0.895163 0.636140 0.978791 0.237098 0.095272 0.938806 \n",
"4 4 0.921513 0.454901 0.004514 0.769216 0.627185 0.676253 0.184952 \n",
"\n",
" z8 z9 z10 \n",
"0 0.587627 0.851390 0.864929 \n",
"1 0.661537 0.865674 0.050091 \n",
"2 0.473916 0.080120 0.477873 \n",
"3 0.452399 0.953515 0.405465 \n",
"4 0.877475 0.316322 0.139290 \n"
]
}
],
"source": [
"# 2. hdfs_path\n",
"dataset2 = DatasetToolkit(storage_entrypoint=wedpr_ml_toolkit.get_storage_entry_point(), \n",
" storage_workspace=wedpr_config.user_config.get_workspace_path(), \n",
" dataset_owner='flyhuang',\n",
" dataset_path=\"/user/ppc/milestone2/webank/flyhuang/d-9606695119693829\", \n",
" agency=\"WeBank\")\n",
"print(dataset2.storage_client.storage_client.endpoint, dataset2.storage_workspace, dataset2.agency)\n",
"print(dataset2.dataset_path)\n",
"dataset2.storage_client = None # 本地测试时跳过hdfs上传/下载过程\n",
"\n",
"# 提供本地测试数据\n",
"if dataset2.storage_client is None:\n",
" # 支持更新dataset的values数据\n",
" df2 = pd.DataFrame({\n",
" 'id': np.arange(0, 100), # id列,顺序整数\n",
" **{f'z{i}': np.random.rand(100) for i in range(1, 11)} # x1到x10列,随机数\n",
" })\n",
" dataset2.update_values(values=df2)\n",
" dataset2.save_values()\n",
" print(dataset2.dataset_path)\n",
" print(dataset2.values.head())\n",
"\n",
"# 对于己方数据集支持load_values,其他方数据集无需load_values,可直接使用\n",
"if dataset2.storage_client is not None:\n",
" # 仅支持load本机构hdfs的数据集\n",
" dataset2.load_values(header=0)\n",
" print(dataset2.dataset_path)\n",
" print(dataset2.values.head())"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"/user/ppc/milestone2/sgd/test_user\\d-101\n",
" id y x1 x2 x3 x4 x5 x6 \\\n",
"0 0 1 0.954183 0.652034 0.704070 0.180889 0.025025 0.511596 \n",
"1 1 1 0.302088 0.462222 0.435542 0.029966 0.931294 0.848483 \n",
"2 2 1 0.468104 0.430161 0.239322 0.588153 0.470668 0.225856 \n",
"3 3 0 0.152269 0.811666 0.834451 0.354288 0.635447 0.062092 \n",
"4 4 0 0.841470 0.800512 0.451507 0.118651 0.748845 0.557916 \n",
"\n",
" x7 x8 x9 x10 \n",
"0 0.529848 0.759689 0.159081 0.556419 \n",
"1 0.962787 0.224096 0.464418 0.208487 \n",
"2 0.564879 0.730366 0.394245 0.299081 \n",
"3 0.424057 0.202234 0.577448 0.636958 \n",
"4 0.030906 0.514350 0.340864 0.123303 \n"
]
}
],
"source": [
"# 更新数据集\n",
"if dataset1.storage_client is not None:\n",
" dataset1.update_values(\n",
" path='/user/ppc/milestone2/sgd/flyhuang1/d-9606704699156485')\n",
" dataset1.load_values(header=0)\n",
"print(dataset1.dataset_path)\n",
"print(dataset1.values.head())"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "base",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading

0 comments on commit 586cfb1

Please sign in to comment.