diff --git a/.idea/workspace.xml b/.idea/workspace.xml
index fd45114..6d59da9 100644
--- a/.idea/workspace.xml
+++ b/.idea/workspace.xml
@@ -3,12 +3,10 @@
-
-
-
-
-
-
+
+
+
+
@@ -78,6 +76,10 @@
+
+
+
+
@@ -85,10 +87,6 @@
-
-
-
-
diff --git a/boss_spider/.ipynb_checkpoints/boss_job-checkpoint.ipynb b/boss_spider/.ipynb_checkpoints/boss_job-checkpoint.ipynb
index 646a26f..96e2942 100644
--- a/boss_spider/.ipynb_checkpoints/boss_job-checkpoint.ipynb
+++ b/boss_spider/.ipynb_checkpoints/boss_job-checkpoint.ipynb
@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
- "execution_count": 167,
+ "execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
@@ -14,9 +14,9 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": 9,
"metadata": {
- "collapsed": true
+ "scrolled": true
},
"outputs": [
{
@@ -219,7 +219,7 @@
"[1167 rows x 9 columns]"
]
},
- "execution_count": 2,
+ "execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
@@ -231,7 +231,7 @@
},
{
"cell_type": "code",
- "execution_count": 69,
+ "execution_count": 10,
"metadata": {
"scrolled": true
},
@@ -249,7 +249,7 @@
},
{
"cell_type": "code",
- "execution_count": 70,
+ "execution_count": 11,
"metadata": {
"scrolled": true
},
@@ -310,7 +310,7 @@
"cell_type": "code",
"execution_count": 106,
"metadata": {
- "scrolled": true
+ "collapsed": true
},
"outputs": [
{
@@ -389,7 +389,9 @@
{
"cell_type": "code",
"execution_count": 107,
- "metadata": {},
+ "metadata": {
+ "collapsed": true
+ },
"outputs": [
{
"data": {
@@ -414,14 +416,14 @@
}
],
"source": [
- "xian_java\n"
+ "xian_java"
]
},
{
"cell_type": "code",
"execution_count": 108,
"metadata": {
- "scrolled": true
+ "collapsed": true
},
"outputs": [
{
@@ -492,7 +494,9 @@
{
"cell_type": "code",
"execution_count": 109,
- "metadata": {},
+ "metadata": {
+ "collapsed": true
+ },
"outputs": [
{
"data": {
@@ -522,7 +526,7 @@
},
{
"cell_type": "code",
- "execution_count": 112,
+ "execution_count": 12,
"metadata": {
"scrolled": false
},
@@ -538,14 +542,14 @@
" });\n",
"\n",
"\n",
- "
\n",
+ " \n",
"\n",
"\n",
"\n"
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 112,
+ "execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
@@ -920,61 +924,122 @@
},
{
"cell_type": "code",
- "execution_count": 130,
+ "execution_count": 34,
"metadata": {},
"outputs": [],
"source": [
- "provinces = ['北京', '上海', '广州', '深圳', '成都', '杭州', '南京', '武汉', '西安']\n",
- "provinces2 = ['北京', '上海', '广东', '四川', '浙江', '江苏', '湖北', '陕西']"
+ "provinces = ['北京', '上海', '广州', '深圳', '成都', '杭州', '南京', '武汉', '西安']"
]
},
{
"cell_type": "code",
- "execution_count": 131,
+ "execution_count": 50,
"metadata": {},
"outputs": [],
"source": [
- "Python_values = ['25.1',\n",
+ "Python_values = [\n",
+ " '25.1',\n",
"'24.5',\n",
"'14.5',\n",
"'25.3',\n",
+ "'13.9',\n",
"'14.9',\n",
"'14.4',\n",
"'12.4',\n",
- "'13.4',\n",
+ "'13.4'\n",
+ "]\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "Java_values = [\n",
+ " '14.7',\n",
+ "'32.4',\n",
+ "'16.5',\n",
+ "'14.5',\n",
+ "'15.9',\n",
+ "'32.8',\n",
+ "'15.4',\n",
+ "'15.3',\n",
+ "'15.7'\n",
+ "]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 35,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "dataanalyse_values = [\n",
+ " '24.6',\n",
+ "'15.8',\n",
+ "'14.6',\n",
+ "'32.6',\n",
+ "'5.5',\n",
+ "'14.6',\n",
+ "'9.2',\n",
+ "'5.6',\n",
+ "'7.8'\n",
+ "]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 37,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "product_values = [\n",
+ "'32.7',\n",
+ "'23.3',\n",
+ "'17.2',\n",
+ "'25.9',\n",
+ "'16.1',\n",
+ "'24.9',\n",
+ "'25',\n",
+ "'18',\n",
+ "'15.6'\n",
"]"
]
},
{
"cell_type": "code",
- "execution_count": 132,
+ "execution_count": 51,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "[['北京', '25.1'],\n",
- " ['上海', '24.5'],\n",
- " ['广东', '14.5'],\n",
- " ['四川', '25.3'],\n",
- " ['浙江', '14.9'],\n",
- " ['江苏', '14.4'],\n",
- " ['湖北', '12.4'],\n",
- " ['陕西', '13.4']]"
+ "[24.275000000000002,\n",
+ " 24.0,\n",
+ " 15.7,\n",
+ " 24.575000000000003,\n",
+ " 12.85,\n",
+ " 21.799999999999997,\n",
+ " 16.0,\n",
+ " 12.825000000000001,\n",
+ " 13.125]"
]
},
- "execution_count": 132,
+ "execution_count": 51,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "[list(z) for z in zip(provinces2, Python_values)]"
+ "total_values = [(float(Python_values[i]) + float(Java_values[i]) + float(dataanalyse_values[i]) + float(product_values[i]))/4 for i in range(0, len(Python_values))]\n",
+ "total_values"
]
},
{
"cell_type": "code",
- "execution_count": 149,
+ "execution_count": 52,
"metadata": {},
"outputs": [],
"source": [
@@ -985,8 +1050,8 @@
" .add(\"\", [list(z) for z in zip(provinces, Python_values)])\n",
" .set_series_opts(label_opts=opts.LabelOpts(is_show=False))\n",
" .set_global_opts(\n",
- " visualmap_opts=opts.VisualMapOpts(is_piecewise=True, max_=26, min_=10),\n",
- " title_opts=opts.TitleOpts(title=\"Python 招聘薪资情况\"),\n",
+ " visualmap_opts=opts.VisualMapOpts(is_piecewise=True, max_=26, min_=12),\n",
+ " title_opts=opts.TitleOpts(title=\"Python 需求情况\"),\n",
" )\n",
" )\n",
" return c"
@@ -994,7 +1059,7 @@
},
{
"cell_type": "code",
- "execution_count": 150,
+ "execution_count": 53,
"metadata": {},
"outputs": [
{
@@ -1008,14 +1073,14 @@
" });\n",
"\n",
"\n",
- " \n",
+ " \n",
"\n",
"\n",
"\n"
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 150,
+ "execution_count": 53,
"metadata": {},
"output_type": "execute_result"
}
@@ -1196,7 +1269,7 @@
},
{
"cell_type": "code",
- "execution_count": 153,
+ "execution_count": 56,
"metadata": {},
"outputs": [],
"source": [
@@ -1204,11 +1277,11 @@
" c = (\n",
" Geo()\n",
" .add_schema(maptype=\"china\")\n",
- " .add(\"\", [list(z) for z in zip(provinces, Python_values)], type_=ChartType.HEATMAP,)\n",
+ " .add(\"\", [list(z) for z in zip(provinces, total_values)], type_=ChartType.HEATMAP,)\n",
" .set_series_opts(label_opts=opts.LabelOpts(is_show=False))\n",
" .set_global_opts(\n",
- " visualmap_opts=opts.VisualMapOpts(is_piecewise=False, max_=26, min_=10),\n",
- " title_opts=opts.TitleOpts(title=\"Python 招聘薪资情况\"),\n",
+ " visualmap_opts=opts.VisualMapOpts(is_piecewise=False, max_=25, min_=12),\n",
+ " title_opts=opts.TitleOpts(title=\"需求热力图\"),\n",
" )\n",
" )\n",
" return c"
@@ -1216,7 +1289,7 @@
},
{
"cell_type": "code",
- "execution_count": 154,
+ "execution_count": 57,
"metadata": {},
"outputs": [
{
@@ -1230,14 +1303,14 @@
" });\n",
"\n",
"\n",
- " \n",
+ " \n",
"\n",
"\n",
"\n"
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 154,
+ "execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
@@ -1417,7 +1498,7 @@
},
{
"cell_type": "code",
- "execution_count": 216,
+ "execution_count": 58,
"metadata": {
"scrolled": true
},
@@ -1443,7 +1524,7 @@
},
{
"cell_type": "code",
- "execution_count": 217,
+ "execution_count": 59,
"metadata": {
"scrolled": true
},
@@ -1459,95 +1540,95 @@
" ('金融产品经理', 4),\n",
" ('后台产品经理', 3),\n",
" ('用户产品经理', 3),\n",
- " ('财务产品经理', 3),\n",
" ('移动产品经理', 3),\n",
+ " ('财务产品经理', 3),\n",
" ('商业化产品经理', 2),\n",
+ " ('资深产品经理', 2),\n",
+ " ('广告产品经理', 2),\n",
+ " ('CRM产品经理', 2),\n",
+ " ('增长产品经理', 2),\n",
" ('搜索产品经理', 2),\n",
+ " ('黄金流程产品经理', 2),\n",
+ " ('平台产品经理', 2),\n",
" ('用户增长产品经理', 2),\n",
+ " ('APP产品经理', 2),\n",
" ('C端产品经理', 2),\n",
" ('社区产品经理', 2),\n",
- " ('平台产品经理', 2),\n",
- " ('黄金流程产品经理', 2),\n",
- " ('资深产品经理', 2),\n",
- " ('APP产品经理', 2),\n",
- " ('CRM产品经理', 2),\n",
- " ('增长产品经理', 2),\n",
- " ('广告产品经理', 2),\n",
- " ('产品经理 (MJ000472)', 1),\n",
- " ('产品经理-北美业务', 1),\n",
- " ('信息化产品经理', 1),\n",
- " ('社区/用户运营产品经理', 1),\n",
- " ('产品经理-电商', 1),\n",
- " ('产品经理-BI方向', 1),\n",
- " ('产品经理(前端)', 1),\n",
- " ('算法产品经理', 1),\n",
- " ('用户端产品经理(金融)', 1),\n",
- " ('策略产品经理(北京)', 1),\n",
- " ('高级产品经理(转化)', 1),\n",
- " ('直播产品经理', 1),\n",
- " ('产品经理-可视化', 1),\n",
- " ('高级客户端产品经理', 1),\n",
- " ('ELD产品经理', 1),\n",
+ " ('产品经理(J16880)', 1),\n",
" ('高级CRM产品经理', 1),\n",
- " ('开放平台产品经理', 1),\n",
- " ('物流产品经理', 1),\n",
- " ('高级社区产品经理', 1),\n",
+ " ('大数据产品经理', 1),\n",
" ('ERP产品经理', 1),\n",
- " ('分期商城产品经理', 1),\n",
- " ('车载产品经理', 1),\n",
- " ('支付产品经理', 1),\n",
+ " ('ELD产品经理', 1),\n",
+ " ('高级地图产品经理', 1),\n",
+ " ('营销产品经理', 1),\n",
" ('高级数据产品经理', 1),\n",
- " ('医疗产品经理', 1),\n",
- " ('京东产品经理', 1),\n",
- " ('调度策略产品经理(北京)', 1),\n",
- " ('产品经理(J16880)', 1),\n",
- " ('文娱-用户产品经理', 1),\n",
- " ('大数据产品经理', 1),\n",
- " ('高级策略产品经理', 1),\n",
- " ('产品经理(边缘计算方向)', 1),\n",
- " ('商业策略产品经理', 1),\n",
- " ('gis产品经理', 1),\n",
- " ('推荐产品经理', 1),\n",
- " ('高级增长产品经理', 1),\n",
" ('BI产品经理', 1),\n",
+ " ('ToB产品经理', 1),\n",
" ('高级用户产品经理', 1),\n",
- " ('安全产品经理', 1),\n",
- " ('视觉算法/产品经理', 1),\n",
- " ('EHR产品经理', 1),\n",
" ('IT产品经理', 1),\n",
- " ('广告高级产品经理', 1),\n",
- " ('会员产品经理', 1),\n",
- " ('Saas后台产品经理', 1),\n",
+ " ('高级客户端产品经理', 1),\n",
+ " ('高级商业产品经理', 1),\n",
+ " ('推荐策略产品经理', 1),\n",
+ " ('AI产品经理', 1),\n",
+ " ('供应链产品经理', 1),\n",
+ " ('推荐产品经理', 1),\n",
+ " ('售前产品经理(安全)', 1),\n",
+ " ('社区/用户运营产品经理', 1),\n",
+ " ('产品经理-收益方向', 1),\n",
+ " ('分期商城产品经理', 1),\n",
" ('初级产品经理', 1),\n",
+ " ('产品经理(边缘计算方向)', 1),\n",
+ " ('产品经理(saas)', 1),\n",
+ " ('资深产品经理(北京)', 1),\n",
+ " ('产品经理-可视化', 1),\n",
+ " ('学生端产品经理 (MJ000132)', 1),\n",
+ " ('信息化产品经理', 1),\n",
+ " ('高级策略产品经理', 1),\n",
+ " ('产品经理-北美业务', 1),\n",
+ " ('安全产品经理', 1),\n",
+ " ('EHR产品经理', 1),\n",
+ " ('支付产品经理', 1),\n",
" ('流量产品经理', 1),\n",
- " ('高级地图产品经理', 1),\n",
- " ('ToB产品经理', 1),\n",
- " ('产品经理-收益方向', 1),\n",
- " ('AI产品经理', 1),\n",
+ " ('策略产品经理(北京)', 1),\n",
+ " ('内容产品经理', 1),\n",
+ " ('中台产品经理', 1),\n",
+ " ('高级产品经理(转化)', 1),\n",
+ " ('调度策略产品经理(北京)', 1),\n",
+ " ('产品经理-BI方向', 1),\n",
+ " ('实习产品经理', 1),\n",
+ " ('会员产品经理', 1),\n",
+ " ('高级社区产品经理', 1),\n",
+ " ('文娱-用户产品经理', 1),\n",
" ('产品经理岗', 1),\n",
- " ('营销产品经理', 1),\n",
- " ('前台产品经理', 1),\n",
+ " ('直播产品经理', 1),\n",
+ " ('京东产品经理', 1),\n",
" ('售前产品经理', 1),\n",
- " ('WMS产品经理', 1),\n",
- " ('推荐策略产品经理', 1),\n",
- " ('供应链产品经理', 1),\n",
- " ('产品经理(saas)', 1),\n",
- " ('售后产品经理', 1),\n",
+ " ('产品经理 (MJ000472)', 1),\n",
+ " ('广告高级产品经理', 1),\n",
" ('音乐榜单高级产品经理', 1),\n",
+ " ('视觉算法/产品经理', 1),\n",
+ " ('产品经理(前端)', 1),\n",
+ " ('产品经理(资管)', 1),\n",
+ " ('售后产品经理', 1),\n",
+ " ('商业策略产品经理', 1),\n",
+ " ('产品经理-电商', 1),\n",
" ('产品经理(金融数据)', 1),\n",
- " ('学生端产品经理 (MJ000132)', 1),\n",
- " ('高级商业产品经理', 1),\n",
- " ('售前产品经理(安全)', 1),\n",
- " ('中台产品经理', 1),\n",
+ " ('开放平台产品经理', 1),\n",
+ " ('高级增长产品经理', 1),\n",
+ " ('gis产品经理', 1),\n",
+ " ('用户端产品经理(金融)', 1),\n",
+ " ('算法产品经理', 1),\n",
+ " ('医疗产品经理', 1),\n",
+ " ('Saas后台产品经理', 1),\n",
+ " ('车载产品经理', 1),\n",
+ " ('WMS产品经理', 1),\n",
" ('后端产品经理', 1),\n",
- " ('资深产品经理(北京)', 1),\n",
- " ('产品经理(资管)', 1),\n",
- " ('实习产品经理', 1),\n",
+ " ('物流产品经理', 1),\n",
" ('硬件产品经理', 1),\n",
- " ('内容产品经理', 1)]"
+ " ('前台产品经理', 1)]"
]
},
- "execution_count": 217,
+ "execution_count": 59,
"metadata": {},
"output_type": "execute_result"
}
@@ -1562,6 +1643,13 @@
"beijing_product_jobname_pd_word"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
{
"cell_type": "code",
"execution_count": 203,
@@ -5511,6 +5599,453 @@
"source": [
"wordcloud_base(beijing_product_jobname_pd_word).render_notebook()"
]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 62,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "BOSS直聘 52\n",
+ "京东集团 45\n",
+ "京东数字科技 18\n",
+ "VIPKID 18\n",
+ "旷视MEGVII 14\n",
+ " ..\n",
+ "IMS 1\n",
+ "金吉列留学 1\n",
+ "安一恒通 1\n",
+ "天启慧眼 1\n",
+ "腾信软创科技 1\n",
+ "Name: company_name, Length: 627, dtype: int64"
+ ]
+ },
+ "execution_count": 62,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "beijing['company_name'].value_counts()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 70,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 70,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "bar = Bar()\n",
+ "bar.add_xaxis(beijing['company_name'].value_counts().index.tolist()[:10])\n",
+ "bar.add_yaxis(\"北京\", beijing['company_name'].value_counts().values.tolist()[:10])\n",
+ "bar.set_global_opts(\n",
+ " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n",
+ " title_opts=opts.TitleOpts(title=\"北京企业招聘岗位数量\", subtitle=\"\"),\n",
+ " yaxis_opts=opts.AxisOpts(\n",
+ " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\")\n",
+ " ),\n",
+ " # datazoom_opts=opts.DataZoomOpts(),\n",
+ ")\n",
+ "bar.set_series_opts(\n",
+ " label_opts=opts.LabelOpts(is_show=True)\n",
+ ")\n",
+ "bar.render_notebook()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 71,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 71,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "bar = Bar()\n",
+ "bar.add_xaxis(nanjing['company_name'].value_counts().index.tolist()[:10])\n",
+ "bar.add_yaxis(\"南京\", nanjing['company_name'].value_counts().values.tolist()[:10])\n",
+ "bar.set_global_opts(\n",
+ " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n",
+ " title_opts=opts.TitleOpts(title=\"南京企业招聘岗位数量\", subtitle=\"\"),\n",
+ " yaxis_opts=opts.AxisOpts(\n",
+ " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\")\n",
+ " ),\n",
+ " # datazoom_opts=opts.DataZoomOpts(),\n",
+ ")\n",
+ "bar.set_series_opts(\n",
+ " label_opts=opts.LabelOpts(is_show=True)\n",
+ ")\n",
+ "bar.render_notebook()"
+ ]
}
],
"metadata": {
diff --git a/boss_spider/.ipynb_checkpoints/company_of_job-checkpoint.ipynb b/boss_spider/.ipynb_checkpoints/company_of_job-checkpoint.ipynb
new file mode 100644
index 0000000..43c0205
--- /dev/null
+++ b/boss_spider/.ipynb_checkpoints/company_of_job-checkpoint.ipynb
@@ -0,0 +1,5837 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 273,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "from pyecharts.charts import Bar, Geo, WordCloud, Grid, Line, Scatter, Pie\n",
+ "from pyecharts import options as opts\n",
+ "from pyecharts.globals import ChartType, SymbolType"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 247,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "beijing = pd.read_csv(\"beijing_data.csv\")\n",
+ "shanghai = pd.read_csv(\"shanghai_data.csv\")\n",
+ "shenzhen = pd.read_csv(\"shenzhen_data.csv\")\n",
+ "guangzhou = pd.read_csv(\"guangzhou_data.csv\")\n",
+ "hangzhou = pd.read_csv(\"hangzhou_data.csv\")\n",
+ "nanjing = pd.read_csv(\"nanjing_data.csv\")\n",
+ "wuhan = pd.read_csv(\"wuhan_data.csv\")\n",
+ "xian = pd.read_csv(\"xian_data.csv\")\n",
+ "chengdu = pd.read_csv(\"chengdu_data.csv\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 255,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " company_name | \n",
+ " uri | \n",
+ " salary | \n",
+ " site | \n",
+ " year | \n",
+ " edu | \n",
+ " job_name | \n",
+ " city | \n",
+ " job_type | \n",
+ " avg_salary | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 中国电信云 | \n",
+ " https://www.zhipin.com/job_detail/11266fc18dc1... | \n",
+ " 20-40K·17薪 | \n",
+ " 北京 海淀区 西山 | \n",
+ " 经验不限 | \n",
+ " 本科 | \n",
+ " Python | \n",
+ " beijing | \n",
+ " python | \n",
+ " 30 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 奇虎360 | \n",
+ " https://www.zhipin.com/job_detail/2a3103941dc2... | \n",
+ " 20-40K·15薪 | \n",
+ " 北京 朝阳区 酒仙桥 | \n",
+ " 3-5年 | \n",
+ " 大专 | \n",
+ " Python | \n",
+ " beijing | \n",
+ " python | \n",
+ " 30 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " VIPKID | \n",
+ " https://www.zhipin.com/job_detail/2dd7f2760947... | \n",
+ " 20-40K·14薪 | \n",
+ " 北京 朝阳区 十里堡 | \n",
+ " 5-10年 | \n",
+ " 本科 | \n",
+ " Python | \n",
+ " beijing | \n",
+ " python | \n",
+ " 30 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 天阳科技 | \n",
+ " https://www.zhipin.com/job_detail/a0c8485a448b... | \n",
+ " 12-24K | \n",
+ " 北京 石景山区 八宝山 | \n",
+ " 1-3年 | \n",
+ " 本科 | \n",
+ " python工程师 | \n",
+ " beijing | \n",
+ " python | \n",
+ " 18 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 武汉佰钧成 | \n",
+ " https://www.zhipin.com/job_detail/d6627bf7c1e2... | \n",
+ " 12-17K | \n",
+ " 北京 朝阳区 三元桥 | \n",
+ " 3-5年 | \n",
+ " 大专 | \n",
+ " python开发 | \n",
+ " beijing | \n",
+ " python | \n",
+ " 14 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 9820 | \n",
+ " 第壹街舞 | \n",
+ " https://www.zhipin.com/job_detail/23f4fcf1e936... | \n",
+ " 15-30K·13薪 | \n",
+ " 成都 武侯区 跳伞塔 | \n",
+ " 3-5年 | \n",
+ " 本科 | \n",
+ " 产品经理-ERP项目经理 | \n",
+ " chengdu | \n",
+ " 产品经理 | \n",
+ " 22 | \n",
+ "
\n",
+ " \n",
+ " 9821 | \n",
+ " 易停车 | \n",
+ " https://www.zhipin.com/job_detail/1bde3c5dc4c9... | \n",
+ " 6-8K | \n",
+ " 成都 | \n",
+ " 3-5年 | \n",
+ " 大专 | \n",
+ " 产品经理 | \n",
+ " chengdu | \n",
+ " 产品经理 | \n",
+ " 7 | \n",
+ "
\n",
+ " \n",
+ " 9822 | \n",
+ " 金瑞麒科技 | \n",
+ " https://www.zhipin.com/job_detail/333b26dcf515... | \n",
+ " 10-15K | \n",
+ " 成都 双流区 华阳 | \n",
+ " 3-5年 | \n",
+ " 本科 | \n",
+ " 产品经理 | \n",
+ " chengdu | \n",
+ " 产品经理 | \n",
+ " 12 | \n",
+ "
\n",
+ " \n",
+ " 9823 | \n",
+ " 朗培商学院 | \n",
+ " https://www.zhipin.com/job_detail/b711ea9545d2... | \n",
+ " 11-20K | \n",
+ " 成都 | \n",
+ " 3-5年 | \n",
+ " 大专 | \n",
+ " 中高级移动产品经理 | \n",
+ " chengdu | \n",
+ " 产品经理 | \n",
+ " 15 | \n",
+ "
\n",
+ " \n",
+ " 9824 | \n",
+ " 微品致远 | \n",
+ " https://www.zhipin.com/job_detail/03168bcffeca... | \n",
+ " 8-11K | \n",
+ " 成都 武侯区 高升桥 | \n",
+ " 1-3年 | \n",
+ " 本科 | \n",
+ " 产品经理(成都) | \n",
+ " chengdu | \n",
+ " 产品经理 | \n",
+ " 9 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
9825 rows × 10 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " company_name uri \\\n",
+ "0 中国电信云 https://www.zhipin.com/job_detail/11266fc18dc1... \n",
+ "1 奇虎360 https://www.zhipin.com/job_detail/2a3103941dc2... \n",
+ "2 VIPKID https://www.zhipin.com/job_detail/2dd7f2760947... \n",
+ "3 天阳科技 https://www.zhipin.com/job_detail/a0c8485a448b... \n",
+ "4 武汉佰钧成 https://www.zhipin.com/job_detail/d6627bf7c1e2... \n",
+ "... ... ... \n",
+ "9820 第壹街舞 https://www.zhipin.com/job_detail/23f4fcf1e936... \n",
+ "9821 易停车 https://www.zhipin.com/job_detail/1bde3c5dc4c9... \n",
+ "9822 金瑞麒科技 https://www.zhipin.com/job_detail/333b26dcf515... \n",
+ "9823 朗培商学院 https://www.zhipin.com/job_detail/b711ea9545d2... \n",
+ "9824 微品致远 https://www.zhipin.com/job_detail/03168bcffeca... \n",
+ "\n",
+ " salary site year edu job_name city job_type \\\n",
+ "0 20-40K·17薪 北京 海淀区 西山 经验不限 本科 Python beijing python \n",
+ "1 20-40K·15薪 北京 朝阳区 酒仙桥 3-5年 大专 Python beijing python \n",
+ "2 20-40K·14薪 北京 朝阳区 十里堡 5-10年 本科 Python beijing python \n",
+ "3 12-24K 北京 石景山区 八宝山 1-3年 本科 python工程师 beijing python \n",
+ "4 12-17K 北京 朝阳区 三元桥 3-5年 大专 python开发 beijing python \n",
+ "... ... ... ... .. ... ... ... \n",
+ "9820 15-30K·13薪 成都 武侯区 跳伞塔 3-5年 本科 产品经理-ERP项目经理 chengdu 产品经理 \n",
+ "9821 6-8K 成都 3-5年 大专 产品经理 chengdu 产品经理 \n",
+ "9822 10-15K 成都 双流区 华阳 3-5年 本科 产品经理 chengdu 产品经理 \n",
+ "9823 11-20K 成都 3-5年 大专 中高级移动产品经理 chengdu 产品经理 \n",
+ "9824 8-11K 成都 武侯区 高升桥 1-3年 本科 产品经理(成都) chengdu 产品经理 \n",
+ "\n",
+ " avg_salary \n",
+ "0 30 \n",
+ "1 30 \n",
+ "2 30 \n",
+ "3 18 \n",
+ "4 14 \n",
+ "... ... \n",
+ "9820 22 \n",
+ "9821 7 \n",
+ "9822 12 \n",
+ "9823 15 \n",
+ "9824 9 \n",
+ "\n",
+ "[9825 rows x 10 columns]"
+ ]
+ },
+ "execution_count": 255,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# 合并城市数据\n",
+ "all_data = pd.concat([beijing, shanghai, shenzhen, guangzhou, hangzhou, nanjing, wuhan, xian, chengdu], ignore_index=True)\n",
+ "all_data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import re"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 108,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def get_num(mystr):\n",
+ " rege = r'(\\d+)-(\\d+)K'\n",
+ " res = re.match(rege, mystr)\n",
+ " result = (int(res.group(1)) + int(res.group(2)))/2\n",
+ " return int(result)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 257,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " company_name | \n",
+ " uri | \n",
+ " salary | \n",
+ " site | \n",
+ " year | \n",
+ " edu | \n",
+ " job_name | \n",
+ " city | \n",
+ " job_type | \n",
+ " avg_salary | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 中国电信云 | \n",
+ " https://www.zhipin.com/job_detail/11266fc18dc1... | \n",
+ " 20-40K·17薪 | \n",
+ " 北京 海淀区 西山 | \n",
+ " 经验不限 | \n",
+ " 本科 | \n",
+ " Python | \n",
+ " beijing | \n",
+ " python | \n",
+ " 30 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 奇虎360 | \n",
+ " https://www.zhipin.com/job_detail/2a3103941dc2... | \n",
+ " 20-40K·15薪 | \n",
+ " 北京 朝阳区 酒仙桥 | \n",
+ " 3-5年 | \n",
+ " 大专 | \n",
+ " Python | \n",
+ " beijing | \n",
+ " python | \n",
+ " 30 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " VIPKID | \n",
+ " https://www.zhipin.com/job_detail/2dd7f2760947... | \n",
+ " 20-40K·14薪 | \n",
+ " 北京 朝阳区 十里堡 | \n",
+ " 5-10年 | \n",
+ " 本科 | \n",
+ " Python | \n",
+ " beijing | \n",
+ " python | \n",
+ " 30 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 天阳科技 | \n",
+ " https://www.zhipin.com/job_detail/a0c8485a448b... | \n",
+ " 12-24K | \n",
+ " 北京 石景山区 八宝山 | \n",
+ " 1-3年 | \n",
+ " 本科 | \n",
+ " python工程师 | \n",
+ " beijing | \n",
+ " python | \n",
+ " 18 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 武汉佰钧成 | \n",
+ " https://www.zhipin.com/job_detail/d6627bf7c1e2... | \n",
+ " 12-17K | \n",
+ " 北京 朝阳区 三元桥 | \n",
+ " 3-5年 | \n",
+ " 大专 | \n",
+ " python开发 | \n",
+ " beijing | \n",
+ " python | \n",
+ " 14 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 9820 | \n",
+ " 第壹街舞 | \n",
+ " https://www.zhipin.com/job_detail/23f4fcf1e936... | \n",
+ " 15-30K·13薪 | \n",
+ " 成都 武侯区 跳伞塔 | \n",
+ " 3-5年 | \n",
+ " 本科 | \n",
+ " 产品经理-ERP项目经理 | \n",
+ " chengdu | \n",
+ " 产品经理 | \n",
+ " 22 | \n",
+ "
\n",
+ " \n",
+ " 9821 | \n",
+ " 易停车 | \n",
+ " https://www.zhipin.com/job_detail/1bde3c5dc4c9... | \n",
+ " 6-8K | \n",
+ " 成都 | \n",
+ " 3-5年 | \n",
+ " 大专 | \n",
+ " 产品经理 | \n",
+ " chengdu | \n",
+ " 产品经理 | \n",
+ " 7 | \n",
+ "
\n",
+ " \n",
+ " 9822 | \n",
+ " 金瑞麒科技 | \n",
+ " https://www.zhipin.com/job_detail/333b26dcf515... | \n",
+ " 10-15K | \n",
+ " 成都 双流区 华阳 | \n",
+ " 3-5年 | \n",
+ " 本科 | \n",
+ " 产品经理 | \n",
+ " chengdu | \n",
+ " 产品经理 | \n",
+ " 12 | \n",
+ "
\n",
+ " \n",
+ " 9823 | \n",
+ " 朗培商学院 | \n",
+ " https://www.zhipin.com/job_detail/b711ea9545d2... | \n",
+ " 11-20K | \n",
+ " 成都 | \n",
+ " 3-5年 | \n",
+ " 大专 | \n",
+ " 中高级移动产品经理 | \n",
+ " chengdu | \n",
+ " 产品经理 | \n",
+ " 15 | \n",
+ "
\n",
+ " \n",
+ " 9824 | \n",
+ " 微品致远 | \n",
+ " https://www.zhipin.com/job_detail/03168bcffeca... | \n",
+ " 8-11K | \n",
+ " 成都 武侯区 高升桥 | \n",
+ " 1-3年 | \n",
+ " 本科 | \n",
+ " 产品经理(成都) | \n",
+ " chengdu | \n",
+ " 产品经理 | \n",
+ " 9 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
9825 rows × 10 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " company_name uri \\\n",
+ "0 中国电信云 https://www.zhipin.com/job_detail/11266fc18dc1... \n",
+ "1 奇虎360 https://www.zhipin.com/job_detail/2a3103941dc2... \n",
+ "2 VIPKID https://www.zhipin.com/job_detail/2dd7f2760947... \n",
+ "3 天阳科技 https://www.zhipin.com/job_detail/a0c8485a448b... \n",
+ "4 武汉佰钧成 https://www.zhipin.com/job_detail/d6627bf7c1e2... \n",
+ "... ... ... \n",
+ "9820 第壹街舞 https://www.zhipin.com/job_detail/23f4fcf1e936... \n",
+ "9821 易停车 https://www.zhipin.com/job_detail/1bde3c5dc4c9... \n",
+ "9822 金瑞麒科技 https://www.zhipin.com/job_detail/333b26dcf515... \n",
+ "9823 朗培商学院 https://www.zhipin.com/job_detail/b711ea9545d2... \n",
+ "9824 微品致远 https://www.zhipin.com/job_detail/03168bcffeca... \n",
+ "\n",
+ " salary site year edu job_name city job_type \\\n",
+ "0 20-40K·17薪 北京 海淀区 西山 经验不限 本科 Python beijing python \n",
+ "1 20-40K·15薪 北京 朝阳区 酒仙桥 3-5年 大专 Python beijing python \n",
+ "2 20-40K·14薪 北京 朝阳区 十里堡 5-10年 本科 Python beijing python \n",
+ "3 12-24K 北京 石景山区 八宝山 1-3年 本科 python工程师 beijing python \n",
+ "4 12-17K 北京 朝阳区 三元桥 3-5年 大专 python开发 beijing python \n",
+ "... ... ... ... .. ... ... ... \n",
+ "9820 15-30K·13薪 成都 武侯区 跳伞塔 3-5年 本科 产品经理-ERP项目经理 chengdu 产品经理 \n",
+ "9821 6-8K 成都 3-5年 大专 产品经理 chengdu 产品经理 \n",
+ "9822 10-15K 成都 双流区 华阳 3-5年 本科 产品经理 chengdu 产品经理 \n",
+ "9823 11-20K 成都 3-5年 大专 中高级移动产品经理 chengdu 产品经理 \n",
+ "9824 8-11K 成都 武侯区 高升桥 1-3年 本科 产品经理(成都) chengdu 产品经理 \n",
+ "\n",
+ " avg_salary \n",
+ "0 30 \n",
+ "1 30 \n",
+ "2 30 \n",
+ "3 18 \n",
+ "4 14 \n",
+ "... ... \n",
+ "9820 22 \n",
+ "9821 7 \n",
+ "9822 12 \n",
+ "9823 15 \n",
+ "9824 9 \n",
+ "\n",
+ "[9825 rows x 10 columns]"
+ ]
+ },
+ "execution_count": 257,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "all_data['avg_salary'] = all_data['salary'].apply(get_num)\n",
+ "all_data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 264,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " company_name | \n",
+ " counts | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 100教育 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 100课堂 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 1药网 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 31会议网 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 360 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 4794 | \n",
+ " 齐聚科技 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 4795 | \n",
+ " 龙之力 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 4796 | \n",
+ " 龙信科技 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 4797 | \n",
+ " 龙婴本铺 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 4798 | \n",
+ " 龙渊网络科技有限公司 | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
4799 rows × 2 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " company_name counts\n",
+ "0 100教育 2\n",
+ "1 100课堂 1\n",
+ "2 1药网 2\n",
+ "3 31会议网 2\n",
+ "4 360 1\n",
+ "... ... ...\n",
+ "4794 齐聚科技 1\n",
+ "4795 龙之力 2\n",
+ "4796 龙信科技 1\n",
+ "4797 龙婴本铺 1\n",
+ "4798 龙渊网络科技有限公司 4\n",
+ "\n",
+ "[4799 rows x 2 columns]"
+ ]
+ },
+ "execution_count": 264,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "all_company_avg_salary = all_data.groupby('company_name').mean()\n",
+ "all_company_avg_salary.reset_index(inplace=True)\n",
+ "\n",
+ "all_company_avg_salary.sort_values(by='avg_salary', ascending=False, inplace=True)\n",
+ "\n",
+ "all_job_count = all_data.groupby('company_name').size()\n",
+ "dict_site = {'company_name': all_job_count.index, 'counts': all_job_count.values}\n",
+ "all_job_count_data = pd.DataFrame(dict_site)\n",
+ "all_job_count_data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 310,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "all_company_avg_salary['avg_salary'] = all_company_avg_salary['avg_salary'].apply(lambda x: round(x, 2))\n",
+ "all_company_avg_salary_counts = pd.merge(all_company_avg_salary, all_job_count_data, on='company_name')\n",
+ "all_company = all_company_avg_salary_counts.sort_values(by='counts', ascending=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 269,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 269,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "bar = Bar()\n",
+ "bar.add_xaxis(all_company_avg_salary_counts['company_name'].values.tolist()[:10])\n",
+ "bar.add_yaxis(\"全国企业平均招聘薪资\", all_company_avg_salary_counts['avg_salary'].values.tolist()[:10])\n",
+ "bar.add_yaxis(\"全国企业在招岗位数量\", all_company_avg_salary_counts['counts'].values.tolist()[:10])\n",
+ "bar.set_global_opts(\n",
+ " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n",
+ " title_opts=opts.TitleOpts(title=\"\", subtitle=\"\"),\n",
+ " yaxis_opts=opts.AxisOpts(\n",
+ " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\")\n",
+ " ),\n",
+ " # datazoom_opts=opts.DataZoomOpts(),\n",
+ ")\n",
+ "bar.set_series_opts(\n",
+ " label_opts=opts.LabelOpts(is_show=True)\n",
+ ")\n",
+ "bar.render_notebook()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 305,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 305,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "bar = Bar()\n",
+ "bar.add_xaxis(all_company['company_name'].values.tolist()[:10])\n",
+ "bar.add_yaxis(\"全国招聘岗位数量\", all_company['counts'].values.tolist()[:10])\n",
+ "# bar.add_yaxis(\"平均招聘薪资\", beijing_company['avg_salary'].values.tolist()[:10])\n",
+ "bar.extend_axis(\n",
+ " yaxis=opts.AxisOpts(\n",
+ " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\")\n",
+ " )\n",
+ " )\n",
+ "bar.set_global_opts(\n",
+ " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n",
+ " title_opts=opts.TitleOpts(title=\"全国企业招聘情况\", subtitle=\"\"),\n",
+ " yaxis_opts=opts.AxisOpts(\n",
+ " axislabel_opts=opts.LabelOpts(formatter=\"{value}个\")\n",
+ " ),\n",
+ " # datazoom_opts=opts.DataZoomOpts(),\n",
+ ")\n",
+ "bar.set_series_opts(\n",
+ " label_opts=opts.LabelOpts(is_show=True)\n",
+ ")\n",
+ "bar2 = Bar().add_xaxis(all_company['company_name'].values.tolist()[:10])\n",
+ "bar2.add_yaxis(\"全国平均招聘薪资\", all_company['avg_salary'].values.tolist()[:10], yaxis_index=1)\n",
+ "bar.overlap(bar2)\n",
+ "bar.render_notebook()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 307,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 307,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "bar = Bar()\n",
+ "bar.add_xaxis(all_company['company_name'].values.tolist()[2:20])\n",
+ "bar.add_yaxis(\"全国招聘岗位数量\", all_company['counts'].values.tolist()[2:20])\n",
+ "# bar.add_yaxis(\"平均招聘薪资\", beijing_company['avg_salary'].values.tolist()[:10])\n",
+ "bar.extend_axis(\n",
+ " yaxis=opts.AxisOpts(\n",
+ " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\")\n",
+ " )\n",
+ " )\n",
+ "bar.set_global_opts(\n",
+ " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n",
+ " title_opts=opts.TitleOpts(title=\"全国企业招聘情况\", subtitle=\"\"),\n",
+ " yaxis_opts=opts.AxisOpts(\n",
+ " axislabel_opts=opts.LabelOpts(formatter=\"{value}个\")\n",
+ " ),\n",
+ " # datazoom_opts=opts.DataZoomOpts(),\n",
+ ")\n",
+ "bar.set_series_opts(\n",
+ " label_opts=opts.LabelOpts(is_show=True)\n",
+ ")\n",
+ "bar2 = Bar().add_xaxis(all_company['company_name'].values.tolist()[2:20])\n",
+ "bar2.add_yaxis(\"全国平均招聘薪资\", all_company['avg_salary'].values.tolist()[2:20], yaxis_index=1)\n",
+ "bar.overlap(bar2)\n",
+ "bar.render_notebook()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 313,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "all_sort = all_data.sort_values(by='avg_salary', ascending=False)\n",
+ "all_job_sort_by_salary = all_sort[['company_name', 'avg_salary', 'job_name', 'job_type']]\n",
+ "all_job_type = all_job_sort_by_salary['job_type'][:30].value_counts()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 314,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 314,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pie = Pie()\n",
+ "pie.add(\"\", [list(z) for z in zip(all_job_type.index.tolist(), all_job_type.values.tolist())],\n",
+ " radius=[\"30%\", \"75%\"],\n",
+ " center=[\"40%\", \"50%\"],\n",
+ " rosetype=\"radius\")\n",
+ "pie.set_global_opts(\n",
+ " title_opts=opts.TitleOpts(title=\"全国高薪岗位分布\"),\n",
+ " legend_opts=opts.LegendOpts(\n",
+ " type_=\"scroll\", pos_left=\"80%\", orient=\"vertical\"\n",
+ " ),\n",
+ " )\n",
+ "pie.render_notebook()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 301,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " job_type | \n",
+ " avg_salary | \n",
+ " counts | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " java | \n",
+ " 18.077381 | \n",
+ " 2.688 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 产品经理 | \n",
+ " 17.389509 | \n",
+ " 2.688 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " python | \n",
+ " 16.713082 | \n",
+ " 2.018 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 数据分析 | \n",
+ " 14.804607 | \n",
+ " 2.431 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " job_type avg_salary counts\n",
+ "0 java 18.077381 2.688\n",
+ "1 产品经理 17.389509 2.688\n",
+ "2 python 16.713082 2.018\n",
+ "3 数据分析 14.804607 2.431"
+ ]
+ },
+ "execution_count": 301,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "all_jobtype_avg_salary = all_data.groupby('job_type').mean()\n",
+ "all_jobtype_avg_salary.reset_index(inplace=True)\n",
+ "\n",
+ "all_jobtype_counts = all_data.groupby('job_type').size()\n",
+ "dict_site = {'job_type': all_jobtype_counts.index, 'counts': all_jobtype_counts.values/1000}\n",
+ "all_job_counts_data = pd.DataFrame(dict_site)\n",
+ "all_jobtype_avg_salary.sort_values(by='avg_salary', ascending=False, inplace=True)\n",
+ "all_jobtype_data = pd.merge(all_jobtype_avg_salary, all_job_counts_data, on='job_type')\n",
+ "all_jobtype_data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 297,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "job_type\n",
+ "java 2688\n",
+ "python 2018\n",
+ "产品经理 2688\n",
+ "数据分析 2431\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 297,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "all_jobtype_counts = all_data.groupby('job_type').size()\n",
+ "all_jobtype_counts"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 303,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 303,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "bar = Bar()\n",
+ "bar.add_xaxis(all_jobtype_data['job_type'].values.tolist()[:10])\n",
+ "bar.add_yaxis(\"全国企业岗位平均招聘薪资\", all_jobtype_data['avg_salary'].values.tolist()[:10])\n",
+ "bar.add_yaxis(\"全国企业岗位招聘数量\", all_jobtype_data['counts'].values.tolist()[:10])\n",
+ "bar.reversal_axis()\n",
+ "bar.set_global_opts(\n",
+ " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30, formatter=\"{value}K\")),\n",
+ " title_opts=opts.TitleOpts(title=\"\", subtitle=\"\"),\n",
+ " yaxis_opts=opts.AxisOpts(\n",
+ " axislabel_opts=opts.LabelOpts()\n",
+ " ),\n",
+ " # datazoom_opts=opts.DataZoomOpts(),\n",
+ ")\n",
+ "bar.set_series_opts(\n",
+ " label_opts=opts.LabelOpts(is_show=True, position=\"right\")\n",
+ ")\n",
+ "bar.render_notebook()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# 分城市分析"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 250,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "beijing['avg_salary'] = beijing['salary'].apply(get_num)\n",
+ "shanghai['avg_salary'] = shanghai['salary'].apply(get_num)\n",
+ "shenzhen['avg_salary'] = shenzhen['salary'].apply(get_num)\n",
+ "hangzhou['avg_salary'] = hangzhou['salary'].apply(get_num)\n",
+ "\n",
+ "guangzhou['avg_salary'] = guangzhou['salary'].apply(get_num)\n",
+ "nanjing['avg_salary'] = nanjing['salary'].apply(get_num)\n",
+ "xian['avg_salary'] = xian['salary'].apply(get_num)\n",
+ "wuhan['avg_salary'] = wuhan['salary'].apply(get_num)\n",
+ "chengdu['avg_salary'] = chengdu['salary'].apply(get_num)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 316,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "beijing_job_count = beijing.groupby('company_name').size()\n",
+ "dict_site = {'company_name': beijing_job_count.index, 'counts': beijing_job_count.values}\n",
+ "beijing_job_count_data = pd.DataFrame(dict_site)\n",
+ "\n",
+ "shanghai_job_count = shanghai.groupby('company_name').size()\n",
+ "dict_site = {'company_name': shanghai_job_count.index, 'counts': shanghai_job_count.values}\n",
+ "shanghai_job_count_data = pd.DataFrame(dict_site)\n",
+ "\n",
+ "shenzhen_job_count = shenzhen.groupby('company_name').size()\n",
+ "dict_site = {'company_name': shenzhen_job_count.index, 'counts': shenzhen_job_count.values}\n",
+ "shenzhen_job_count_data = pd.DataFrame(dict_site)\n",
+ "\n",
+ "hangzhou_job_count = hangzhou.groupby('company_name').size()\n",
+ "dict_site = {'company_name': hangzhou_job_count.index, 'counts': hangzhou_job_count.values}\n",
+ "hangzhou_job_count_data = pd.DataFrame(dict_site)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 256,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "beijing_company_avg_salary = beijing.groupby('company_name').mean()\n",
+ "beijing_company_avg_salary.reset_index(inplace=True)\n",
+ "\n",
+ "beijing_company_avg_salary.sort_values(by='avg_salary', ascending=False, inplace=True)\n",
+ "\n",
+ "beijing_company_avg_salary['avg_salary'] = beijing_company_avg_salary['avg_salary'].apply(lambda x: round(x, 2))\n",
+ "beijing_company_avg_salary_counts = pd.merge(beijing_company_avg_salary, beijing_job_count_data, on='company_name')\n",
+ "beijing_company = beijing_company_avg_salary_counts.sort_values(by='counts', ascending=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 317,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "shanghai_company_avg_salary = shanghai.groupby('company_name').mean()\n",
+ "shanghai_company_avg_salary.reset_index(inplace=True)\n",
+ "\n",
+ "shanghai_company_avg_salary.sort_values(by='avg_salary', ascending=False, inplace=True)\n",
+ "\n",
+ "shanghai_company_avg_salary['avg_salary'] = shanghai_company_avg_salary['avg_salary'].apply(lambda x: round(x, 2))\n",
+ "shanghai_company_avg_salary_counts = pd.merge(shanghai_company_avg_salary, shanghai_job_count_data, on='company_name')\n",
+ "shanghai_company = shanghai_company_avg_salary_counts.sort_values(by='counts', ascending=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 318,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "shenzhen_company_avg_salary = shenzhen.groupby('company_name').mean()\n",
+ "shenzhen_company_avg_salary.reset_index(inplace=True)\n",
+ "\n",
+ "shenzhen_company_avg_salary.sort_values(by='avg_salary', ascending=False, inplace=True)\n",
+ "\n",
+ "shenzhen_company_avg_salary['avg_salary'] = shenzhen_company_avg_salary['avg_salary'].apply(lambda x: round(x, 2))\n",
+ "shenzhen_company_avg_salary_counts = pd.merge(shenzhen_company_avg_salary, shenzhen_job_count_data, on='company_name')\n",
+ "shenzhen_company = shenzhen_company_avg_salary_counts.sort_values(by='counts', ascending=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 319,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "hangzhou_company_avg_salary = hangzhou.groupby('company_name').mean()\n",
+ "hangzhou_company_avg_salary.reset_index(inplace=True)\n",
+ "\n",
+ "hangzhou_company_avg_salary.sort_values(by='avg_salary', ascending=False, inplace=True)\n",
+ "\n",
+ "hangzhou_company_avg_salary['avg_salary'] = hangzhou_company_avg_salary['avg_salary'].apply(lambda x: round(x, 2))\n",
+ "hangzhou_company_avg_salary_counts = pd.merge(hangzhou_company_avg_salary, hangzhou_job_count_data, on='company_name')\n",
+ "hangzhou_company = hangzhou_company_avg_salary_counts.sort_values(by='counts', ascending=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 142,
+ "metadata": {
+ "scrolled": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 142,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "bar = Bar()\n",
+ "bar.add_xaxis(beijing_company['company_name'].values.tolist()[:10])\n",
+ "bar.add_yaxis(\"招聘岗位数量\", beijing_company['counts'].values.tolist()[:10])\n",
+ "# bar.add_yaxis(\"平均招聘薪资\", beijing_company['avg_salary'].values.tolist()[:10])\n",
+ "bar.extend_axis(\n",
+ " yaxis=opts.AxisOpts(\n",
+ " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\")\n",
+ " )\n",
+ " )\n",
+ "bar.set_global_opts(\n",
+ " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n",
+ " title_opts=opts.TitleOpts(title=\"北京企业招聘情况\", subtitle=\"\"),\n",
+ " yaxis_opts=opts.AxisOpts(\n",
+ " axislabel_opts=opts.LabelOpts(formatter=\"{value}个\")\n",
+ " ),\n",
+ " # datazoom_opts=opts.DataZoomOpts(),\n",
+ ")\n",
+ "bar.set_series_opts(\n",
+ " label_opts=opts.LabelOpts(is_show=True)\n",
+ ")\n",
+ "bar2 = Bar().add_xaxis(beijing_company['company_name'].values.tolist()[:10])\n",
+ "bar2.add_yaxis(\"平均招聘薪资\", beijing_company['avg_salary'].values.tolist()[:10], yaxis_index=1)\n",
+ "bar.overlap(bar2)\n",
+ "bar.render_notebook()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 144,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " company_name | \n",
+ " avg_salary | \n",
+ " counts | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 悠易互通 | \n",
+ " 50.0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 广联达 | \n",
+ " 50.0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 宝誉德 | \n",
+ " 47.5 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 网易 | \n",
+ " 45.0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 忽客 | \n",
+ " 45.0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 622 | \n",
+ " 天地在线 | \n",
+ " 4.0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 623 | \n",
+ " 全时天地在线 | \n",
+ " 4.0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 624 | \n",
+ " 明大启微 | \n",
+ " 4.0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 625 | \n",
+ " 视觉互联 | \n",
+ " 3.0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 626 | \n",
+ " 有缘网 | \n",
+ " 2.0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
627 rows × 3 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " company_name avg_salary counts\n",
+ "0 悠易互通 50.0 1\n",
+ "1 广联达 50.0 1\n",
+ "2 宝誉德 47.5 2\n",
+ "3 网易 45.0 1\n",
+ "4 忽客 45.0 1\n",
+ ".. ... ... ...\n",
+ "622 天地在线 4.0 1\n",
+ "623 全时天地在线 4.0 1\n",
+ "624 明大启微 4.0 1\n",
+ "625 视觉互联 3.0 1\n",
+ "626 有缘网 2.0 1\n",
+ "\n",
+ "[627 rows x 3 columns]"
+ ]
+ },
+ "execution_count": 144,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "beijing_company_avg_salary_counts"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 315,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 315,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "bar = Bar()\n",
+ "bar.add_xaxis(beijing_company_avg_salary_counts['company_name'].values.tolist()[:10])\n",
+ "bar.add_yaxis(\"北京平均招聘薪资\", beijing_company_avg_salary_counts['avg_salary'].values.tolist()[:10])\n",
+ "bar.add_yaxis(\"北京在招岗位数量\", beijing_company_avg_salary_counts['counts'].values.tolist()[:10])\n",
+ "bar.set_global_opts(\n",
+ " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n",
+ " title_opts=opts.TitleOpts(title=\"\", subtitle=\"\"),\n",
+ " yaxis_opts=opts.AxisOpts(\n",
+ " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\")\n",
+ " ),\n",
+ " # datazoom_opts=opts.DataZoomOpts(),\n",
+ ")\n",
+ "bar.set_series_opts(\n",
+ " label_opts=opts.LabelOpts(is_show=True)\n",
+ ")\n",
+ "bar.render_notebook()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 321,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 321,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "bar = Bar()\n",
+ "bar.add_xaxis(shanghai_company_avg_salary_counts['company_name'].values.tolist()[:10])\n",
+ "bar.add_yaxis(\"上海平均招聘薪资\", shanghai_company_avg_salary_counts['avg_salary'].values.tolist()[:10])\n",
+ "bar.add_yaxis(\"上海在招岗位数量\", shanghai_company_avg_salary_counts['counts'].values.tolist()[:10])\n",
+ "bar.set_global_opts(\n",
+ " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n",
+ " title_opts=opts.TitleOpts(title=\"\", subtitle=\"\"),\n",
+ " yaxis_opts=opts.AxisOpts(\n",
+ " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\")\n",
+ " ),\n",
+ " # datazoom_opts=opts.DataZoomOpts(),\n",
+ ")\n",
+ "bar.set_series_opts(\n",
+ " label_opts=opts.LabelOpts(is_show=True)\n",
+ ")\n",
+ "bar.render_notebook()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 322,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 322,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "bar = Bar()\n",
+ "bar.add_xaxis(shenzhen_company_avg_salary_counts['company_name'].values.tolist()[:10])\n",
+ "bar.add_yaxis(\"深圳平均招聘薪资\", shenzhen_company_avg_salary_counts['avg_salary'].values.tolist()[:10])\n",
+ "bar.add_yaxis(\"深圳在招岗位数量\", shenzhen_company_avg_salary_counts['counts'].values.tolist()[:10])\n",
+ "bar.set_global_opts(\n",
+ " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n",
+ " title_opts=opts.TitleOpts(title=\"\", subtitle=\"\"),\n",
+ " yaxis_opts=opts.AxisOpts(\n",
+ " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\")\n",
+ " ),\n",
+ " # datazoom_opts=opts.DataZoomOpts(),\n",
+ ")\n",
+ "bar.set_series_opts(\n",
+ " label_opts=opts.LabelOpts(is_show=True)\n",
+ ")\n",
+ "bar.render_notebook()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 323,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 323,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "bar = Bar()\n",
+ "bar.add_xaxis(hangzhou_company_avg_salary_counts['company_name'].values.tolist()[:10])\n",
+ "bar.add_yaxis(\"杭州平均招聘薪资\", hangzhou_company_avg_salary_counts['avg_salary'].values.tolist()[:10])\n",
+ "bar.add_yaxis(\"杭州在招岗位数量\", hangzhou_company_avg_salary_counts['counts'].values.tolist()[:10])\n",
+ "bar.set_global_opts(\n",
+ " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n",
+ " title_opts=opts.TitleOpts(title=\"\", subtitle=\"\"),\n",
+ " yaxis_opts=opts.AxisOpts(\n",
+ " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\")\n",
+ " ),\n",
+ " # datazoom_opts=opts.DataZoomOpts(),\n",
+ ")\n",
+ "bar.set_series_opts(\n",
+ " label_opts=opts.LabelOpts(is_show=True)\n",
+ ")\n",
+ "bar.render_notebook()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 324,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "beijing_sort = beijing.sort_values(by='avg_salary', ascending=False)\n",
+ "shanghai_sort = shanghai.sort_values(by='avg_salary', ascending=False)\n",
+ "shenzhen_sort = shenzhen.sort_values(by='avg_salary', ascending=False)\n",
+ "hangzhou_sort = hangzhou.sort_values(by='avg_salary', ascending=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 325,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "beijing_job_sort_by_salary = beijing_sort[['company_name', 'avg_salary', 'job_name', 'job_type']]\n",
+ "shanghai_job_sort_by_salary = shanghai_sort[['company_name', 'avg_salary', 'job_name', 'job_type']]\n",
+ "shenzhen_job_sort_by_salary = shenzhen_sort[['company_name', 'avg_salary', 'job_name', 'job_type']]\n",
+ "hangzhou_job_sort_by_salary = hangzhou_sort[['company_name', 'avg_salary', 'job_name', 'job_type']]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 329,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "beijing_job_sort_by_salary_salary = beijing_job_sort_by_salary['avg_salary'].values.tolist()[:10]\n",
+ "beijing_job_sort_by_salary_jobname = beijing_job_sort_by_salary['job_name'].values.tolist()[:10]\n",
+ "beijing_job_sort_by_salary_company = beijing_job_sort_by_salary['company_name'].values.tolist()[:10]\n",
+ "beijing_x_index = [beijing_job_sort_by_salary_jobname[i] + \":\" + str(beijing_job_sort_by_salary_salary[i]) for i in range(0, len(beijing_job_sort_by_salary_company))]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 330,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "shanghai_job_sort_by_salary_salary = shanghai_job_sort_by_salary['avg_salary'].values.tolist()[:10]\n",
+ "shanghai_job_sort_by_salary_jobname = shanghai_job_sort_by_salary['job_name'].values.tolist()[:10]\n",
+ "shanghai_job_sort_by_salary_company = shanghai_job_sort_by_salary['company_name'].values.tolist()[:10]\n",
+ "shanghai_x_index = [shanghai_job_sort_by_salary_jobname[i] + \":\" + str(shanghai_job_sort_by_salary_salary[i]) for i in range(0, len(shanghai_job_sort_by_salary_company))]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 331,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "shenzhen_job_sort_by_salary_salary = shenzhen_job_sort_by_salary['avg_salary'].values.tolist()[:10]\n",
+ "shenzhen_job_sort_by_salary_jobname = shenzhen_job_sort_by_salary['job_name'].values.tolist()[:10]\n",
+ "shenzhen_job_sort_by_salary_company = shenzhen_job_sort_by_salary['company_name'].values.tolist()[:10]\n",
+ "shenzhen_x_index = [shenzhen_job_sort_by_salary_jobname[i] + \":\" + str(shenzhen_job_sort_by_salary_salary[i]) for i in range(0, len(shenzhen_job_sort_by_salary_company))]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 334,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "hangzhou_job_sort_by_salary_salary = hangzhou_job_sort_by_salary['avg_salary'].values.tolist()[:10]\n",
+ "hangzhou_job_sort_by_salary_jobname = hangzhou_job_sort_by_salary['job_name'].values.tolist()[:10]\n",
+ "hangzhou_job_sort_by_salary_company = hangzhou_job_sort_by_salary['company_name'].values.tolist()[:10]\n",
+ "hangzhou_x_index = [hangzhou_job_sort_by_salary_jobname[i] + \":\" + str(hangzhou_job_sort_by_salary_salary[i]) for i in range(0, len(hangzhou_job_sort_by_salary_company))]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 336,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 336,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# 单个岗位工资排行\n",
+ "bar = Bar()\n",
+ "bar.add_xaxis(beijing_job_sort_by_salary['company_name'].values.tolist()[:10])\n",
+ "bar.add_yaxis(\"北京招聘薪资排行\", beijing_job_sort_by_salary['avg_salary'].values.tolist()[:10])\n",
+ "bar.set_global_opts(\n",
+ " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n",
+ " title_opts=opts.TitleOpts(title=\"\", subtitle=\"\"),\n",
+ " yaxis_opts=opts.AxisOpts(\n",
+ " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\")\n",
+ " ),\n",
+ " # datazoom_opts=opts.DataZoomOpts(),\n",
+ ")\n",
+ "bar.set_series_opts(\n",
+ " label_opts=opts.LabelOpts(is_show=True)\n",
+ ")\n",
+ "\n",
+ "# line = Line()\n",
+ "# line.add_xaxis(x_index)\n",
+ "# line.add_yaxis(\"\", beijing_job_sort_by_salary['avg_salary'].values.tolist()[:10])\n",
+ "# line.set_global_opts(\n",
+ "# xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n",
+ "# title_opts=opts.TitleOpts(title=\"\", subtitle=\"\"),\n",
+ "# yaxis_opts=opts.AxisOpts(\n",
+ "# axislabel_opts=opts.LabelOpts(formatter=\"{value}K\")\n",
+ "# ),\n",
+ "# # datazoom_opts=opts.DataZoomOpts(),\n",
+ "# )\n",
+ "\n",
+ "scatter = Scatter()\n",
+ "scatter.add_xaxis(beijing_x_index)\n",
+ "scatter.add_yaxis(\"北京高薪岗位\", beijing_job_sort_by_salary['avg_salary'].values.tolist()[:10])\n",
+ "scatter.set_global_opts(\n",
+ " title_opts=opts.TitleOpts(title=\"\", pos_top=\"48%\"),\n",
+ " legend_opts=opts.LegendOpts(pos_top=\"50%\"),\n",
+ " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n",
+ " yaxis_opts=opts.AxisOpts(\n",
+ " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\"),\n",
+ " min_=50\n",
+ " ),\n",
+ " # datazoom_opts=opts.DataZoomOpts(),\n",
+ ")\n",
+ "\n",
+ "grid = Grid()\n",
+ "grid.add(bar, grid_opts=opts.GridOpts(pos_bottom=\"60%\"))\n",
+ "# grid.add(line, grid_opts=opts.GridOpts(pos_top=\"60%\"))\n",
+ "grid.add(scatter, grid_opts=opts.GridOpts(pos_top=\"60%\"))\n",
+ "grid.render_notebook()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 338,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 338,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "bar = Bar()\n",
+ "bar.add_xaxis(shanghai_job_sort_by_salary['company_name'].values.tolist()[:10])\n",
+ "bar.add_yaxis(\"上海招聘薪资排行\", shanghai_job_sort_by_salary['avg_salary'].values.tolist()[:10])\n",
+ "bar.set_global_opts(\n",
+ " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n",
+ " title_opts=opts.TitleOpts(title=\"\", subtitle=\"\"),\n",
+ " yaxis_opts=opts.AxisOpts(\n",
+ " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\")\n",
+ " )\n",
+ ")\n",
+ "bar.set_series_opts(\n",
+ " label_opts=opts.LabelOpts(is_show=True)\n",
+ ")\n",
+ "\n",
+ "scatter = Scatter()\n",
+ "scatter.add_xaxis(shanghai_x_index)\n",
+ "scatter.add_yaxis(\"上海高薪岗位\", shanghai_job_sort_by_salary['avg_salary'].values.tolist()[:10])\n",
+ "scatter.set_global_opts(\n",
+ " title_opts=opts.TitleOpts(title=\"\", pos_top=\"48%\"),\n",
+ " legend_opts=opts.LegendOpts(pos_top=\"50%\"),\n",
+ " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n",
+ " yaxis_opts=opts.AxisOpts(\n",
+ " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\"),\n",
+ " min_=40\n",
+ " )\n",
+ ")\n",
+ "\n",
+ "grid = Grid()\n",
+ "grid.add(bar, grid_opts=opts.GridOpts(pos_bottom=\"60%\"))\n",
+ "grid.add(scatter, grid_opts=opts.GridOpts(pos_top=\"60%\"))\n",
+ "grid.render_notebook()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 339,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 339,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "bar = Bar()\n",
+ "bar.add_xaxis(shenzhen_job_sort_by_salary['company_name'].values.tolist()[:10])\n",
+ "bar.add_yaxis(\"深圳招聘薪资排行\", shenzhen_job_sort_by_salary['avg_salary'].values.tolist()[:10])\n",
+ "bar.set_global_opts(\n",
+ " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n",
+ " title_opts=opts.TitleOpts(title=\"\", subtitle=\"\"),\n",
+ " yaxis_opts=opts.AxisOpts(\n",
+ " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\")\n",
+ " )\n",
+ ")\n",
+ "bar.set_series_opts(\n",
+ " label_opts=opts.LabelOpts(is_show=True)\n",
+ ")\n",
+ "\n",
+ "scatter = Scatter()\n",
+ "scatter.add_xaxis(shenzhen_x_index)\n",
+ "scatter.add_yaxis(\"深圳高薪岗位\", shenzhen_job_sort_by_salary['avg_salary'].values.tolist()[:10])\n",
+ "scatter.set_global_opts(\n",
+ " title_opts=opts.TitleOpts(title=\"\", pos_top=\"48%\"),\n",
+ " legend_opts=opts.LegendOpts(pos_top=\"50%\"),\n",
+ " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n",
+ " yaxis_opts=opts.AxisOpts(\n",
+ " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\"),\n",
+ " min_=40\n",
+ " )\n",
+ ")\n",
+ "\n",
+ "grid = Grid()\n",
+ "grid.add(bar, grid_opts=opts.GridOpts(pos_bottom=\"60%\"))\n",
+ "grid.add(scatter, grid_opts=opts.GridOpts(pos_top=\"60%\"))\n",
+ "grid.render_notebook()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 340,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 340,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "bar = Bar()\n",
+ "bar.add_xaxis(hangzhou_job_sort_by_salary['company_name'].values.tolist()[:10])\n",
+ "bar.add_yaxis(\"杭州招聘薪资排行\", hangzhou_job_sort_by_salary['avg_salary'].values.tolist()[:10])\n",
+ "bar.set_global_opts(\n",
+ " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n",
+ " title_opts=opts.TitleOpts(title=\"\", subtitle=\"\"),\n",
+ " yaxis_opts=opts.AxisOpts(\n",
+ " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\")\n",
+ " )\n",
+ ")\n",
+ "bar.set_series_opts(\n",
+ " label_opts=opts.LabelOpts(is_show=True)\n",
+ ")\n",
+ "\n",
+ "scatter = Scatter()\n",
+ "scatter.add_xaxis(hangzhou_x_index)\n",
+ "scatter.add_yaxis(\"杭州高薪岗位\", hangzhou_job_sort_by_salary['avg_salary'].values.tolist()[:10])\n",
+ "scatter.set_global_opts(\n",
+ " title_opts=opts.TitleOpts(title=\"\", pos_top=\"48%\"),\n",
+ " legend_opts=opts.LegendOpts(pos_top=\"50%\"),\n",
+ " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n",
+ " yaxis_opts=opts.AxisOpts(\n",
+ " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\"),\n",
+ " min_=40\n",
+ " )\n",
+ ")\n",
+ "\n",
+ "grid = Grid()\n",
+ "grid.add(bar, grid_opts=opts.GridOpts(pos_bottom=\"60%\"))\n",
+ "grid.add(scatter, grid_opts=opts.GridOpts(pos_top=\"60%\"))\n",
+ "grid.render_notebook()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 233,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 233,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "scatter = Scatter()\n",
+ "scatter.add_xaxis(x_index)\n",
+ "scatter.add_yaxis(\"招聘岗位\", beijing_job_sort_by_salary['avg_salary'].values.tolist()[:10])\n",
+ "scatter.set_global_opts(\n",
+ " visualmap_opts=opts.VisualMapOpts(type_=\"size\", max_=70, min_=50),\n",
+ " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n",
+ " yaxis_opts=opts.AxisOpts(\n",
+ " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\"),\n",
+ " min_=50\n",
+ " ),\n",
+ " # datazoom_opts=opts.DataZoomOpts(),\n",
+ ")\n",
+ "scatter.render_notebook()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 341,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "beijing_job_type = beijing_job_sort_by_salary['job_type'][:10].value_counts()\n",
+ "shanghai_job_type = shanghai_job_sort_by_salary['job_type'][:10].value_counts()\n",
+ "shenzhen_job_type = shenzhen_job_sort_by_salary['job_type'][:10].value_counts()\n",
+ "hangzhou_job_type = hangzhou_job_sort_by_salary['job_type'][:10].value_counts()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 278,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 278,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pie = Pie()\n",
+ "pie.add(\"\", [list(z) for z in zip(beijing_job_type.index.tolist(), beijing_job_type.values.tolist())],\n",
+ " radius=[\"30%\", \"75%\"],\n",
+ " center=[\"40%\", \"50%\"],\n",
+ " rosetype=\"radius\")\n",
+ "pie.set_global_opts(\n",
+ " title_opts=opts.TitleOpts(title=\"北京高薪岗位分布\"),\n",
+ " legend_opts=opts.LegendOpts(\n",
+ " type_=\"scroll\", pos_left=\"80%\", orient=\"vertical\"\n",
+ " ),\n",
+ " )\n",
+ "pie.render_notebook()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 343,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 343,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pie = Pie()\n",
+ "pie.add(\"\", [list(z) for z in zip(shanghai_job_type.index.tolist(), shanghai_job_type.values.tolist())],\n",
+ " radius=[\"30%\", \"75%\"],\n",
+ " center=[\"40%\", \"50%\"],\n",
+ " rosetype=\"radius\")\n",
+ "pie.set_global_opts(\n",
+ " title_opts=opts.TitleOpts(title=\"上海高薪岗位分布\"),\n",
+ " legend_opts=opts.LegendOpts(\n",
+ " type_=\"scroll\", pos_left=\"80%\", orient=\"vertical\"\n",
+ " ),\n",
+ " )\n",
+ "pie.render_notebook()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 344,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 344,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pie = Pie()\n",
+ "pie.add(\"\", [list(z) for z in zip(shenzhen_job_type.index.tolist(), shenzhen_job_type.values.tolist())],\n",
+ " radius=[\"30%\", \"75%\"],\n",
+ " center=[\"40%\", \"50%\"],\n",
+ " rosetype=\"radius\")\n",
+ "pie.set_global_opts(\n",
+ " title_opts=opts.TitleOpts(title=\"深圳高薪岗位分布\"),\n",
+ " legend_opts=opts.LegendOpts(\n",
+ " type_=\"scroll\", pos_left=\"80%\", orient=\"vertical\"\n",
+ " ),\n",
+ " )\n",
+ "pie.render_notebook()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 345,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 345,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pie = Pie()\n",
+ "pie.add(\"\", [list(z) for z in zip(hangzhou_job_type.index.tolist(), hangzhou_job_type.values.tolist())],\n",
+ " radius=[\"30%\", \"75%\"],\n",
+ " center=[\"40%\", \"50%\"],\n",
+ " rosetype=\"radius\")\n",
+ "pie.set_global_opts(\n",
+ " title_opts=opts.TitleOpts(title=\"杭州高薪岗位分布\"),\n",
+ " legend_opts=opts.LegendOpts(\n",
+ " type_=\"scroll\", pos_left=\"80%\", orient=\"vertical\"\n",
+ " ),\n",
+ " )\n",
+ "pie.render_notebook()"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.7.1"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/boss_spider/boss_job.ipynb b/boss_spider/boss_job.ipynb
index f569428..96e2942 100644
--- a/boss_spider/boss_job.ipynb
+++ b/boss_spider/boss_job.ipynb
@@ -310,7 +310,7 @@
"cell_type": "code",
"execution_count": 106,
"metadata": {
- "scrolled": true
+ "collapsed": true
},
"outputs": [
{
@@ -389,7 +389,9 @@
{
"cell_type": "code",
"execution_count": 107,
- "metadata": {},
+ "metadata": {
+ "collapsed": true
+ },
"outputs": [
{
"data": {
@@ -421,7 +423,7 @@
"cell_type": "code",
"execution_count": 108,
"metadata": {
- "scrolled": true
+ "collapsed": true
},
"outputs": [
{
@@ -492,7 +494,9 @@
{
"cell_type": "code",
"execution_count": 109,
- "metadata": {},
+ "metadata": {
+ "collapsed": true
+ },
"outputs": [
{
"data": {
@@ -1639,6 +1643,13 @@
"beijing_product_jobname_pd_word"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
{
"cell_type": "code",
"execution_count": 203,
@@ -5588,6 +5599,453 @@
"source": [
"wordcloud_base(beijing_product_jobname_pd_word).render_notebook()"
]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 62,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "BOSS直聘 52\n",
+ "京东集团 45\n",
+ "京东数字科技 18\n",
+ "VIPKID 18\n",
+ "旷视MEGVII 14\n",
+ " ..\n",
+ "IMS 1\n",
+ "金吉列留学 1\n",
+ "安一恒通 1\n",
+ "天启慧眼 1\n",
+ "腾信软创科技 1\n",
+ "Name: company_name, Length: 627, dtype: int64"
+ ]
+ },
+ "execution_count": 62,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "beijing['company_name'].value_counts()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 70,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 70,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "bar = Bar()\n",
+ "bar.add_xaxis(beijing['company_name'].value_counts().index.tolist()[:10])\n",
+ "bar.add_yaxis(\"北京\", beijing['company_name'].value_counts().values.tolist()[:10])\n",
+ "bar.set_global_opts(\n",
+ " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n",
+ " title_opts=opts.TitleOpts(title=\"北京企业招聘岗位数量\", subtitle=\"\"),\n",
+ " yaxis_opts=opts.AxisOpts(\n",
+ " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\")\n",
+ " ),\n",
+ " # datazoom_opts=opts.DataZoomOpts(),\n",
+ ")\n",
+ "bar.set_series_opts(\n",
+ " label_opts=opts.LabelOpts(is_show=True)\n",
+ ")\n",
+ "bar.render_notebook()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 71,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 71,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "bar = Bar()\n",
+ "bar.add_xaxis(nanjing['company_name'].value_counts().index.tolist()[:10])\n",
+ "bar.add_yaxis(\"南京\", nanjing['company_name'].value_counts().values.tolist()[:10])\n",
+ "bar.set_global_opts(\n",
+ " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n",
+ " title_opts=opts.TitleOpts(title=\"南京企业招聘岗位数量\", subtitle=\"\"),\n",
+ " yaxis_opts=opts.AxisOpts(\n",
+ " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\")\n",
+ " ),\n",
+ " # datazoom_opts=opts.DataZoomOpts(),\n",
+ ")\n",
+ "bar.set_series_opts(\n",
+ " label_opts=opts.LabelOpts(is_show=True)\n",
+ ")\n",
+ "bar.render_notebook()"
+ ]
}
],
"metadata": {
diff --git a/boss_spider/company_of_job.ipynb b/boss_spider/company_of_job.ipynb
new file mode 100644
index 0000000..43c0205
--- /dev/null
+++ b/boss_spider/company_of_job.ipynb
@@ -0,0 +1,5837 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 273,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "from pyecharts.charts import Bar, Geo, WordCloud, Grid, Line, Scatter, Pie\n",
+ "from pyecharts import options as opts\n",
+ "from pyecharts.globals import ChartType, SymbolType"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 247,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "beijing = pd.read_csv(\"beijing_data.csv\")\n",
+ "shanghai = pd.read_csv(\"shanghai_data.csv\")\n",
+ "shenzhen = pd.read_csv(\"shenzhen_data.csv\")\n",
+ "guangzhou = pd.read_csv(\"guangzhou_data.csv\")\n",
+ "hangzhou = pd.read_csv(\"hangzhou_data.csv\")\n",
+ "nanjing = pd.read_csv(\"nanjing_data.csv\")\n",
+ "wuhan = pd.read_csv(\"wuhan_data.csv\")\n",
+ "xian = pd.read_csv(\"xian_data.csv\")\n",
+ "chengdu = pd.read_csv(\"chengdu_data.csv\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 255,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " company_name | \n",
+ " uri | \n",
+ " salary | \n",
+ " site | \n",
+ " year | \n",
+ " edu | \n",
+ " job_name | \n",
+ " city | \n",
+ " job_type | \n",
+ " avg_salary | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 中国电信云 | \n",
+ " https://www.zhipin.com/job_detail/11266fc18dc1... | \n",
+ " 20-40K·17薪 | \n",
+ " 北京 海淀区 西山 | \n",
+ " 经验不限 | \n",
+ " 本科 | \n",
+ " Python | \n",
+ " beijing | \n",
+ " python | \n",
+ " 30 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 奇虎360 | \n",
+ " https://www.zhipin.com/job_detail/2a3103941dc2... | \n",
+ " 20-40K·15薪 | \n",
+ " 北京 朝阳区 酒仙桥 | \n",
+ " 3-5年 | \n",
+ " 大专 | \n",
+ " Python | \n",
+ " beijing | \n",
+ " python | \n",
+ " 30 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " VIPKID | \n",
+ " https://www.zhipin.com/job_detail/2dd7f2760947... | \n",
+ " 20-40K·14薪 | \n",
+ " 北京 朝阳区 十里堡 | \n",
+ " 5-10年 | \n",
+ " 本科 | \n",
+ " Python | \n",
+ " beijing | \n",
+ " python | \n",
+ " 30 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 天阳科技 | \n",
+ " https://www.zhipin.com/job_detail/a0c8485a448b... | \n",
+ " 12-24K | \n",
+ " 北京 石景山区 八宝山 | \n",
+ " 1-3年 | \n",
+ " 本科 | \n",
+ " python工程师 | \n",
+ " beijing | \n",
+ " python | \n",
+ " 18 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 武汉佰钧成 | \n",
+ " https://www.zhipin.com/job_detail/d6627bf7c1e2... | \n",
+ " 12-17K | \n",
+ " 北京 朝阳区 三元桥 | \n",
+ " 3-5年 | \n",
+ " 大专 | \n",
+ " python开发 | \n",
+ " beijing | \n",
+ " python | \n",
+ " 14 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 9820 | \n",
+ " 第壹街舞 | \n",
+ " https://www.zhipin.com/job_detail/23f4fcf1e936... | \n",
+ " 15-30K·13薪 | \n",
+ " 成都 武侯区 跳伞塔 | \n",
+ " 3-5年 | \n",
+ " 本科 | \n",
+ " 产品经理-ERP项目经理 | \n",
+ " chengdu | \n",
+ " 产品经理 | \n",
+ " 22 | \n",
+ "
\n",
+ " \n",
+ " 9821 | \n",
+ " 易停车 | \n",
+ " https://www.zhipin.com/job_detail/1bde3c5dc4c9... | \n",
+ " 6-8K | \n",
+ " 成都 | \n",
+ " 3-5年 | \n",
+ " 大专 | \n",
+ " 产品经理 | \n",
+ " chengdu | \n",
+ " 产品经理 | \n",
+ " 7 | \n",
+ "
\n",
+ " \n",
+ " 9822 | \n",
+ " 金瑞麒科技 | \n",
+ " https://www.zhipin.com/job_detail/333b26dcf515... | \n",
+ " 10-15K | \n",
+ " 成都 双流区 华阳 | \n",
+ " 3-5年 | \n",
+ " 本科 | \n",
+ " 产品经理 | \n",
+ " chengdu | \n",
+ " 产品经理 | \n",
+ " 12 | \n",
+ "
\n",
+ " \n",
+ " 9823 | \n",
+ " 朗培商学院 | \n",
+ " https://www.zhipin.com/job_detail/b711ea9545d2... | \n",
+ " 11-20K | \n",
+ " 成都 | \n",
+ " 3-5年 | \n",
+ " 大专 | \n",
+ " 中高级移动产品经理 | \n",
+ " chengdu | \n",
+ " 产品经理 | \n",
+ " 15 | \n",
+ "
\n",
+ " \n",
+ " 9824 | \n",
+ " 微品致远 | \n",
+ " https://www.zhipin.com/job_detail/03168bcffeca... | \n",
+ " 8-11K | \n",
+ " 成都 武侯区 高升桥 | \n",
+ " 1-3年 | \n",
+ " 本科 | \n",
+ " 产品经理(成都) | \n",
+ " chengdu | \n",
+ " 产品经理 | \n",
+ " 9 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
9825 rows × 10 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " company_name uri \\\n",
+ "0 中国电信云 https://www.zhipin.com/job_detail/11266fc18dc1... \n",
+ "1 奇虎360 https://www.zhipin.com/job_detail/2a3103941dc2... \n",
+ "2 VIPKID https://www.zhipin.com/job_detail/2dd7f2760947... \n",
+ "3 天阳科技 https://www.zhipin.com/job_detail/a0c8485a448b... \n",
+ "4 武汉佰钧成 https://www.zhipin.com/job_detail/d6627bf7c1e2... \n",
+ "... ... ... \n",
+ "9820 第壹街舞 https://www.zhipin.com/job_detail/23f4fcf1e936... \n",
+ "9821 易停车 https://www.zhipin.com/job_detail/1bde3c5dc4c9... \n",
+ "9822 金瑞麒科技 https://www.zhipin.com/job_detail/333b26dcf515... \n",
+ "9823 朗培商学院 https://www.zhipin.com/job_detail/b711ea9545d2... \n",
+ "9824 微品致远 https://www.zhipin.com/job_detail/03168bcffeca... \n",
+ "\n",
+ " salary site year edu job_name city job_type \\\n",
+ "0 20-40K·17薪 北京 海淀区 西山 经验不限 本科 Python beijing python \n",
+ "1 20-40K·15薪 北京 朝阳区 酒仙桥 3-5年 大专 Python beijing python \n",
+ "2 20-40K·14薪 北京 朝阳区 十里堡 5-10年 本科 Python beijing python \n",
+ "3 12-24K 北京 石景山区 八宝山 1-3年 本科 python工程师 beijing python \n",
+ "4 12-17K 北京 朝阳区 三元桥 3-5年 大专 python开发 beijing python \n",
+ "... ... ... ... .. ... ... ... \n",
+ "9820 15-30K·13薪 成都 武侯区 跳伞塔 3-5年 本科 产品经理-ERP项目经理 chengdu 产品经理 \n",
+ "9821 6-8K 成都 3-5年 大专 产品经理 chengdu 产品经理 \n",
+ "9822 10-15K 成都 双流区 华阳 3-5年 本科 产品经理 chengdu 产品经理 \n",
+ "9823 11-20K 成都 3-5年 大专 中高级移动产品经理 chengdu 产品经理 \n",
+ "9824 8-11K 成都 武侯区 高升桥 1-3年 本科 产品经理(成都) chengdu 产品经理 \n",
+ "\n",
+ " avg_salary \n",
+ "0 30 \n",
+ "1 30 \n",
+ "2 30 \n",
+ "3 18 \n",
+ "4 14 \n",
+ "... ... \n",
+ "9820 22 \n",
+ "9821 7 \n",
+ "9822 12 \n",
+ "9823 15 \n",
+ "9824 9 \n",
+ "\n",
+ "[9825 rows x 10 columns]"
+ ]
+ },
+ "execution_count": 255,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# 合并城市数据\n",
+ "all_data = pd.concat([beijing, shanghai, shenzhen, guangzhou, hangzhou, nanjing, wuhan, xian, chengdu], ignore_index=True)\n",
+ "all_data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import re"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 108,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def get_num(mystr):\n",
+ " rege = r'(\\d+)-(\\d+)K'\n",
+ " res = re.match(rege, mystr)\n",
+ " result = (int(res.group(1)) + int(res.group(2)))/2\n",
+ " return int(result)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 257,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " company_name | \n",
+ " uri | \n",
+ " salary | \n",
+ " site | \n",
+ " year | \n",
+ " edu | \n",
+ " job_name | \n",
+ " city | \n",
+ " job_type | \n",
+ " avg_salary | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 中国电信云 | \n",
+ " https://www.zhipin.com/job_detail/11266fc18dc1... | \n",
+ " 20-40K·17薪 | \n",
+ " 北京 海淀区 西山 | \n",
+ " 经验不限 | \n",
+ " 本科 | \n",
+ " Python | \n",
+ " beijing | \n",
+ " python | \n",
+ " 30 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 奇虎360 | \n",
+ " https://www.zhipin.com/job_detail/2a3103941dc2... | \n",
+ " 20-40K·15薪 | \n",
+ " 北京 朝阳区 酒仙桥 | \n",
+ " 3-5年 | \n",
+ " 大专 | \n",
+ " Python | \n",
+ " beijing | \n",
+ " python | \n",
+ " 30 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " VIPKID | \n",
+ " https://www.zhipin.com/job_detail/2dd7f2760947... | \n",
+ " 20-40K·14薪 | \n",
+ " 北京 朝阳区 十里堡 | \n",
+ " 5-10年 | \n",
+ " 本科 | \n",
+ " Python | \n",
+ " beijing | \n",
+ " python | \n",
+ " 30 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 天阳科技 | \n",
+ " https://www.zhipin.com/job_detail/a0c8485a448b... | \n",
+ " 12-24K | \n",
+ " 北京 石景山区 八宝山 | \n",
+ " 1-3年 | \n",
+ " 本科 | \n",
+ " python工程师 | \n",
+ " beijing | \n",
+ " python | \n",
+ " 18 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 武汉佰钧成 | \n",
+ " https://www.zhipin.com/job_detail/d6627bf7c1e2... | \n",
+ " 12-17K | \n",
+ " 北京 朝阳区 三元桥 | \n",
+ " 3-5年 | \n",
+ " 大专 | \n",
+ " python开发 | \n",
+ " beijing | \n",
+ " python | \n",
+ " 14 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 9820 | \n",
+ " 第壹街舞 | \n",
+ " https://www.zhipin.com/job_detail/23f4fcf1e936... | \n",
+ " 15-30K·13薪 | \n",
+ " 成都 武侯区 跳伞塔 | \n",
+ " 3-5年 | \n",
+ " 本科 | \n",
+ " 产品经理-ERP项目经理 | \n",
+ " chengdu | \n",
+ " 产品经理 | \n",
+ " 22 | \n",
+ "
\n",
+ " \n",
+ " 9821 | \n",
+ " 易停车 | \n",
+ " https://www.zhipin.com/job_detail/1bde3c5dc4c9... | \n",
+ " 6-8K | \n",
+ " 成都 | \n",
+ " 3-5年 | \n",
+ " 大专 | \n",
+ " 产品经理 | \n",
+ " chengdu | \n",
+ " 产品经理 | \n",
+ " 7 | \n",
+ "
\n",
+ " \n",
+ " 9822 | \n",
+ " 金瑞麒科技 | \n",
+ " https://www.zhipin.com/job_detail/333b26dcf515... | \n",
+ " 10-15K | \n",
+ " 成都 双流区 华阳 | \n",
+ " 3-5年 | \n",
+ " 本科 | \n",
+ " 产品经理 | \n",
+ " chengdu | \n",
+ " 产品经理 | \n",
+ " 12 | \n",
+ "
\n",
+ " \n",
+ " 9823 | \n",
+ " 朗培商学院 | \n",
+ " https://www.zhipin.com/job_detail/b711ea9545d2... | \n",
+ " 11-20K | \n",
+ " 成都 | \n",
+ " 3-5年 | \n",
+ " 大专 | \n",
+ " 中高级移动产品经理 | \n",
+ " chengdu | \n",
+ " 产品经理 | \n",
+ " 15 | \n",
+ "
\n",
+ " \n",
+ " 9824 | \n",
+ " 微品致远 | \n",
+ " https://www.zhipin.com/job_detail/03168bcffeca... | \n",
+ " 8-11K | \n",
+ " 成都 武侯区 高升桥 | \n",
+ " 1-3年 | \n",
+ " 本科 | \n",
+ " 产品经理(成都) | \n",
+ " chengdu | \n",
+ " 产品经理 | \n",
+ " 9 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
9825 rows × 10 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " company_name uri \\\n",
+ "0 中国电信云 https://www.zhipin.com/job_detail/11266fc18dc1... \n",
+ "1 奇虎360 https://www.zhipin.com/job_detail/2a3103941dc2... \n",
+ "2 VIPKID https://www.zhipin.com/job_detail/2dd7f2760947... \n",
+ "3 天阳科技 https://www.zhipin.com/job_detail/a0c8485a448b... \n",
+ "4 武汉佰钧成 https://www.zhipin.com/job_detail/d6627bf7c1e2... \n",
+ "... ... ... \n",
+ "9820 第壹街舞 https://www.zhipin.com/job_detail/23f4fcf1e936... \n",
+ "9821 易停车 https://www.zhipin.com/job_detail/1bde3c5dc4c9... \n",
+ "9822 金瑞麒科技 https://www.zhipin.com/job_detail/333b26dcf515... \n",
+ "9823 朗培商学院 https://www.zhipin.com/job_detail/b711ea9545d2... \n",
+ "9824 微品致远 https://www.zhipin.com/job_detail/03168bcffeca... \n",
+ "\n",
+ " salary site year edu job_name city job_type \\\n",
+ "0 20-40K·17薪 北京 海淀区 西山 经验不限 本科 Python beijing python \n",
+ "1 20-40K·15薪 北京 朝阳区 酒仙桥 3-5年 大专 Python beijing python \n",
+ "2 20-40K·14薪 北京 朝阳区 十里堡 5-10年 本科 Python beijing python \n",
+ "3 12-24K 北京 石景山区 八宝山 1-3年 本科 python工程师 beijing python \n",
+ "4 12-17K 北京 朝阳区 三元桥 3-5年 大专 python开发 beijing python \n",
+ "... ... ... ... .. ... ... ... \n",
+ "9820 15-30K·13薪 成都 武侯区 跳伞塔 3-5年 本科 产品经理-ERP项目经理 chengdu 产品经理 \n",
+ "9821 6-8K 成都 3-5年 大专 产品经理 chengdu 产品经理 \n",
+ "9822 10-15K 成都 双流区 华阳 3-5年 本科 产品经理 chengdu 产品经理 \n",
+ "9823 11-20K 成都 3-5年 大专 中高级移动产品经理 chengdu 产品经理 \n",
+ "9824 8-11K 成都 武侯区 高升桥 1-3年 本科 产品经理(成都) chengdu 产品经理 \n",
+ "\n",
+ " avg_salary \n",
+ "0 30 \n",
+ "1 30 \n",
+ "2 30 \n",
+ "3 18 \n",
+ "4 14 \n",
+ "... ... \n",
+ "9820 22 \n",
+ "9821 7 \n",
+ "9822 12 \n",
+ "9823 15 \n",
+ "9824 9 \n",
+ "\n",
+ "[9825 rows x 10 columns]"
+ ]
+ },
+ "execution_count": 257,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "all_data['avg_salary'] = all_data['salary'].apply(get_num)\n",
+ "all_data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 264,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " company_name | \n",
+ " counts | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 100教育 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 100课堂 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 1药网 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 31会议网 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 360 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 4794 | \n",
+ " 齐聚科技 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 4795 | \n",
+ " 龙之力 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 4796 | \n",
+ " 龙信科技 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 4797 | \n",
+ " 龙婴本铺 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 4798 | \n",
+ " 龙渊网络科技有限公司 | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
4799 rows × 2 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " company_name counts\n",
+ "0 100教育 2\n",
+ "1 100课堂 1\n",
+ "2 1药网 2\n",
+ "3 31会议网 2\n",
+ "4 360 1\n",
+ "... ... ...\n",
+ "4794 齐聚科技 1\n",
+ "4795 龙之力 2\n",
+ "4796 龙信科技 1\n",
+ "4797 龙婴本铺 1\n",
+ "4798 龙渊网络科技有限公司 4\n",
+ "\n",
+ "[4799 rows x 2 columns]"
+ ]
+ },
+ "execution_count": 264,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "all_company_avg_salary = all_data.groupby('company_name').mean()\n",
+ "all_company_avg_salary.reset_index(inplace=True)\n",
+ "\n",
+ "all_company_avg_salary.sort_values(by='avg_salary', ascending=False, inplace=True)\n",
+ "\n",
+ "all_job_count = all_data.groupby('company_name').size()\n",
+ "dict_site = {'company_name': all_job_count.index, 'counts': all_job_count.values}\n",
+ "all_job_count_data = pd.DataFrame(dict_site)\n",
+ "all_job_count_data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 310,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "all_company_avg_salary['avg_salary'] = all_company_avg_salary['avg_salary'].apply(lambda x: round(x, 2))\n",
+ "all_company_avg_salary_counts = pd.merge(all_company_avg_salary, all_job_count_data, on='company_name')\n",
+ "all_company = all_company_avg_salary_counts.sort_values(by='counts', ascending=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 269,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 269,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "bar = Bar()\n",
+ "bar.add_xaxis(all_company_avg_salary_counts['company_name'].values.tolist()[:10])\n",
+ "bar.add_yaxis(\"全国企业平均招聘薪资\", all_company_avg_salary_counts['avg_salary'].values.tolist()[:10])\n",
+ "bar.add_yaxis(\"全国企业在招岗位数量\", all_company_avg_salary_counts['counts'].values.tolist()[:10])\n",
+ "bar.set_global_opts(\n",
+ " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n",
+ " title_opts=opts.TitleOpts(title=\"\", subtitle=\"\"),\n",
+ " yaxis_opts=opts.AxisOpts(\n",
+ " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\")\n",
+ " ),\n",
+ " # datazoom_opts=opts.DataZoomOpts(),\n",
+ ")\n",
+ "bar.set_series_opts(\n",
+ " label_opts=opts.LabelOpts(is_show=True)\n",
+ ")\n",
+ "bar.render_notebook()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 305,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 305,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "bar = Bar()\n",
+ "bar.add_xaxis(all_company['company_name'].values.tolist()[:10])\n",
+ "bar.add_yaxis(\"全国招聘岗位数量\", all_company['counts'].values.tolist()[:10])\n",
+ "# bar.add_yaxis(\"平均招聘薪资\", beijing_company['avg_salary'].values.tolist()[:10])\n",
+ "bar.extend_axis(\n",
+ " yaxis=opts.AxisOpts(\n",
+ " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\")\n",
+ " )\n",
+ " )\n",
+ "bar.set_global_opts(\n",
+ " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n",
+ " title_opts=opts.TitleOpts(title=\"全国企业招聘情况\", subtitle=\"\"),\n",
+ " yaxis_opts=opts.AxisOpts(\n",
+ " axislabel_opts=opts.LabelOpts(formatter=\"{value}个\")\n",
+ " ),\n",
+ " # datazoom_opts=opts.DataZoomOpts(),\n",
+ ")\n",
+ "bar.set_series_opts(\n",
+ " label_opts=opts.LabelOpts(is_show=True)\n",
+ ")\n",
+ "bar2 = Bar().add_xaxis(all_company['company_name'].values.tolist()[:10])\n",
+ "bar2.add_yaxis(\"全国平均招聘薪资\", all_company['avg_salary'].values.tolist()[:10], yaxis_index=1)\n",
+ "bar.overlap(bar2)\n",
+ "bar.render_notebook()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 307,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 307,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "bar = Bar()\n",
+ "bar.add_xaxis(all_company['company_name'].values.tolist()[2:20])\n",
+ "bar.add_yaxis(\"全国招聘岗位数量\", all_company['counts'].values.tolist()[2:20])\n",
+ "# bar.add_yaxis(\"平均招聘薪资\", beijing_company['avg_salary'].values.tolist()[:10])\n",
+ "bar.extend_axis(\n",
+ " yaxis=opts.AxisOpts(\n",
+ " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\")\n",
+ " )\n",
+ " )\n",
+ "bar.set_global_opts(\n",
+ " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n",
+ " title_opts=opts.TitleOpts(title=\"全国企业招聘情况\", subtitle=\"\"),\n",
+ " yaxis_opts=opts.AxisOpts(\n",
+ " axislabel_opts=opts.LabelOpts(formatter=\"{value}个\")\n",
+ " ),\n",
+ " # datazoom_opts=opts.DataZoomOpts(),\n",
+ ")\n",
+ "bar.set_series_opts(\n",
+ " label_opts=opts.LabelOpts(is_show=True)\n",
+ ")\n",
+ "bar2 = Bar().add_xaxis(all_company['company_name'].values.tolist()[2:20])\n",
+ "bar2.add_yaxis(\"全国平均招聘薪资\", all_company['avg_salary'].values.tolist()[2:20], yaxis_index=1)\n",
+ "bar.overlap(bar2)\n",
+ "bar.render_notebook()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 313,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "all_sort = all_data.sort_values(by='avg_salary', ascending=False)\n",
+ "all_job_sort_by_salary = all_sort[['company_name', 'avg_salary', 'job_name', 'job_type']]\n",
+ "all_job_type = all_job_sort_by_salary['job_type'][:30].value_counts()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 314,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 314,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pie = Pie()\n",
+ "pie.add(\"\", [list(z) for z in zip(all_job_type.index.tolist(), all_job_type.values.tolist())],\n",
+ " radius=[\"30%\", \"75%\"],\n",
+ " center=[\"40%\", \"50%\"],\n",
+ " rosetype=\"radius\")\n",
+ "pie.set_global_opts(\n",
+ " title_opts=opts.TitleOpts(title=\"全国高薪岗位分布\"),\n",
+ " legend_opts=opts.LegendOpts(\n",
+ " type_=\"scroll\", pos_left=\"80%\", orient=\"vertical\"\n",
+ " ),\n",
+ " )\n",
+ "pie.render_notebook()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 301,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " job_type | \n",
+ " avg_salary | \n",
+ " counts | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " java | \n",
+ " 18.077381 | \n",
+ " 2.688 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 产品经理 | \n",
+ " 17.389509 | \n",
+ " 2.688 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " python | \n",
+ " 16.713082 | \n",
+ " 2.018 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 数据分析 | \n",
+ " 14.804607 | \n",
+ " 2.431 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " job_type avg_salary counts\n",
+ "0 java 18.077381 2.688\n",
+ "1 产品经理 17.389509 2.688\n",
+ "2 python 16.713082 2.018\n",
+ "3 数据分析 14.804607 2.431"
+ ]
+ },
+ "execution_count": 301,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "all_jobtype_avg_salary = all_data.groupby('job_type').mean()\n",
+ "all_jobtype_avg_salary.reset_index(inplace=True)\n",
+ "\n",
+ "all_jobtype_counts = all_data.groupby('job_type').size()\n",
+ "dict_site = {'job_type': all_jobtype_counts.index, 'counts': all_jobtype_counts.values/1000}\n",
+ "all_job_counts_data = pd.DataFrame(dict_site)\n",
+ "all_jobtype_avg_salary.sort_values(by='avg_salary', ascending=False, inplace=True)\n",
+ "all_jobtype_data = pd.merge(all_jobtype_avg_salary, all_job_counts_data, on='job_type')\n",
+ "all_jobtype_data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 297,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "job_type\n",
+ "java 2688\n",
+ "python 2018\n",
+ "产品经理 2688\n",
+ "数据分析 2431\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 297,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "all_jobtype_counts = all_data.groupby('job_type').size()\n",
+ "all_jobtype_counts"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 303,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 303,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "bar = Bar()\n",
+ "bar.add_xaxis(all_jobtype_data['job_type'].values.tolist()[:10])\n",
+ "bar.add_yaxis(\"全国企业岗位平均招聘薪资\", all_jobtype_data['avg_salary'].values.tolist()[:10])\n",
+ "bar.add_yaxis(\"全国企业岗位招聘数量\", all_jobtype_data['counts'].values.tolist()[:10])\n",
+ "bar.reversal_axis()\n",
+ "bar.set_global_opts(\n",
+ " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30, formatter=\"{value}K\")),\n",
+ " title_opts=opts.TitleOpts(title=\"\", subtitle=\"\"),\n",
+ " yaxis_opts=opts.AxisOpts(\n",
+ " axislabel_opts=opts.LabelOpts()\n",
+ " ),\n",
+ " # datazoom_opts=opts.DataZoomOpts(),\n",
+ ")\n",
+ "bar.set_series_opts(\n",
+ " label_opts=opts.LabelOpts(is_show=True, position=\"right\")\n",
+ ")\n",
+ "bar.render_notebook()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# 分城市分析"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 250,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "beijing['avg_salary'] = beijing['salary'].apply(get_num)\n",
+ "shanghai['avg_salary'] = shanghai['salary'].apply(get_num)\n",
+ "shenzhen['avg_salary'] = shenzhen['salary'].apply(get_num)\n",
+ "hangzhou['avg_salary'] = hangzhou['salary'].apply(get_num)\n",
+ "\n",
+ "guangzhou['avg_salary'] = guangzhou['salary'].apply(get_num)\n",
+ "nanjing['avg_salary'] = nanjing['salary'].apply(get_num)\n",
+ "xian['avg_salary'] = xian['salary'].apply(get_num)\n",
+ "wuhan['avg_salary'] = wuhan['salary'].apply(get_num)\n",
+ "chengdu['avg_salary'] = chengdu['salary'].apply(get_num)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 316,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "beijing_job_count = beijing.groupby('company_name').size()\n",
+ "dict_site = {'company_name': beijing_job_count.index, 'counts': beijing_job_count.values}\n",
+ "beijing_job_count_data = pd.DataFrame(dict_site)\n",
+ "\n",
+ "shanghai_job_count = shanghai.groupby('company_name').size()\n",
+ "dict_site = {'company_name': shanghai_job_count.index, 'counts': shanghai_job_count.values}\n",
+ "shanghai_job_count_data = pd.DataFrame(dict_site)\n",
+ "\n",
+ "shenzhen_job_count = shenzhen.groupby('company_name').size()\n",
+ "dict_site = {'company_name': shenzhen_job_count.index, 'counts': shenzhen_job_count.values}\n",
+ "shenzhen_job_count_data = pd.DataFrame(dict_site)\n",
+ "\n",
+ "hangzhou_job_count = hangzhou.groupby('company_name').size()\n",
+ "dict_site = {'company_name': hangzhou_job_count.index, 'counts': hangzhou_job_count.values}\n",
+ "hangzhou_job_count_data = pd.DataFrame(dict_site)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 256,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "beijing_company_avg_salary = beijing.groupby('company_name').mean()\n",
+ "beijing_company_avg_salary.reset_index(inplace=True)\n",
+ "\n",
+ "beijing_company_avg_salary.sort_values(by='avg_salary', ascending=False, inplace=True)\n",
+ "\n",
+ "beijing_company_avg_salary['avg_salary'] = beijing_company_avg_salary['avg_salary'].apply(lambda x: round(x, 2))\n",
+ "beijing_company_avg_salary_counts = pd.merge(beijing_company_avg_salary, beijing_job_count_data, on='company_name')\n",
+ "beijing_company = beijing_company_avg_salary_counts.sort_values(by='counts', ascending=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 317,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "shanghai_company_avg_salary = shanghai.groupby('company_name').mean()\n",
+ "shanghai_company_avg_salary.reset_index(inplace=True)\n",
+ "\n",
+ "shanghai_company_avg_salary.sort_values(by='avg_salary', ascending=False, inplace=True)\n",
+ "\n",
+ "shanghai_company_avg_salary['avg_salary'] = shanghai_company_avg_salary['avg_salary'].apply(lambda x: round(x, 2))\n",
+ "shanghai_company_avg_salary_counts = pd.merge(shanghai_company_avg_salary, shanghai_job_count_data, on='company_name')\n",
+ "shanghai_company = shanghai_company_avg_salary_counts.sort_values(by='counts', ascending=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 318,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "shenzhen_company_avg_salary = shenzhen.groupby('company_name').mean()\n",
+ "shenzhen_company_avg_salary.reset_index(inplace=True)\n",
+ "\n",
+ "shenzhen_company_avg_salary.sort_values(by='avg_salary', ascending=False, inplace=True)\n",
+ "\n",
+ "shenzhen_company_avg_salary['avg_salary'] = shenzhen_company_avg_salary['avg_salary'].apply(lambda x: round(x, 2))\n",
+ "shenzhen_company_avg_salary_counts = pd.merge(shenzhen_company_avg_salary, shenzhen_job_count_data, on='company_name')\n",
+ "shenzhen_company = shenzhen_company_avg_salary_counts.sort_values(by='counts', ascending=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 319,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "hangzhou_company_avg_salary = hangzhou.groupby('company_name').mean()\n",
+ "hangzhou_company_avg_salary.reset_index(inplace=True)\n",
+ "\n",
+ "hangzhou_company_avg_salary.sort_values(by='avg_salary', ascending=False, inplace=True)\n",
+ "\n",
+ "hangzhou_company_avg_salary['avg_salary'] = hangzhou_company_avg_salary['avg_salary'].apply(lambda x: round(x, 2))\n",
+ "hangzhou_company_avg_salary_counts = pd.merge(hangzhou_company_avg_salary, hangzhou_job_count_data, on='company_name')\n",
+ "hangzhou_company = hangzhou_company_avg_salary_counts.sort_values(by='counts', ascending=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 142,
+ "metadata": {
+ "scrolled": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 142,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "bar = Bar()\n",
+ "bar.add_xaxis(beijing_company['company_name'].values.tolist()[:10])\n",
+ "bar.add_yaxis(\"招聘岗位数量\", beijing_company['counts'].values.tolist()[:10])\n",
+ "# bar.add_yaxis(\"平均招聘薪资\", beijing_company['avg_salary'].values.tolist()[:10])\n",
+ "bar.extend_axis(\n",
+ " yaxis=opts.AxisOpts(\n",
+ " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\")\n",
+ " )\n",
+ " )\n",
+ "bar.set_global_opts(\n",
+ " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n",
+ " title_opts=opts.TitleOpts(title=\"北京企业招聘情况\", subtitle=\"\"),\n",
+ " yaxis_opts=opts.AxisOpts(\n",
+ " axislabel_opts=opts.LabelOpts(formatter=\"{value}个\")\n",
+ " ),\n",
+ " # datazoom_opts=opts.DataZoomOpts(),\n",
+ ")\n",
+ "bar.set_series_opts(\n",
+ " label_opts=opts.LabelOpts(is_show=True)\n",
+ ")\n",
+ "bar2 = Bar().add_xaxis(beijing_company['company_name'].values.tolist()[:10])\n",
+ "bar2.add_yaxis(\"平均招聘薪资\", beijing_company['avg_salary'].values.tolist()[:10], yaxis_index=1)\n",
+ "bar.overlap(bar2)\n",
+ "bar.render_notebook()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 144,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " company_name | \n",
+ " avg_salary | \n",
+ " counts | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 悠易互通 | \n",
+ " 50.0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 广联达 | \n",
+ " 50.0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 宝誉德 | \n",
+ " 47.5 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 网易 | \n",
+ " 45.0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 忽客 | \n",
+ " 45.0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 622 | \n",
+ " 天地在线 | \n",
+ " 4.0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 623 | \n",
+ " 全时天地在线 | \n",
+ " 4.0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 624 | \n",
+ " 明大启微 | \n",
+ " 4.0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 625 | \n",
+ " 视觉互联 | \n",
+ " 3.0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 626 | \n",
+ " 有缘网 | \n",
+ " 2.0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
627 rows × 3 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " company_name avg_salary counts\n",
+ "0 悠易互通 50.0 1\n",
+ "1 广联达 50.0 1\n",
+ "2 宝誉德 47.5 2\n",
+ "3 网易 45.0 1\n",
+ "4 忽客 45.0 1\n",
+ ".. ... ... ...\n",
+ "622 天地在线 4.0 1\n",
+ "623 全时天地在线 4.0 1\n",
+ "624 明大启微 4.0 1\n",
+ "625 视觉互联 3.0 1\n",
+ "626 有缘网 2.0 1\n",
+ "\n",
+ "[627 rows x 3 columns]"
+ ]
+ },
+ "execution_count": 144,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "beijing_company_avg_salary_counts"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 315,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 315,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "bar = Bar()\n",
+ "bar.add_xaxis(beijing_company_avg_salary_counts['company_name'].values.tolist()[:10])\n",
+ "bar.add_yaxis(\"北京平均招聘薪资\", beijing_company_avg_salary_counts['avg_salary'].values.tolist()[:10])\n",
+ "bar.add_yaxis(\"北京在招岗位数量\", beijing_company_avg_salary_counts['counts'].values.tolist()[:10])\n",
+ "bar.set_global_opts(\n",
+ " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n",
+ " title_opts=opts.TitleOpts(title=\"\", subtitle=\"\"),\n",
+ " yaxis_opts=opts.AxisOpts(\n",
+ " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\")\n",
+ " ),\n",
+ " # datazoom_opts=opts.DataZoomOpts(),\n",
+ ")\n",
+ "bar.set_series_opts(\n",
+ " label_opts=opts.LabelOpts(is_show=True)\n",
+ ")\n",
+ "bar.render_notebook()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 321,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 321,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "bar = Bar()\n",
+ "bar.add_xaxis(shanghai_company_avg_salary_counts['company_name'].values.tolist()[:10])\n",
+ "bar.add_yaxis(\"上海平均招聘薪资\", shanghai_company_avg_salary_counts['avg_salary'].values.tolist()[:10])\n",
+ "bar.add_yaxis(\"上海在招岗位数量\", shanghai_company_avg_salary_counts['counts'].values.tolist()[:10])\n",
+ "bar.set_global_opts(\n",
+ " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n",
+ " title_opts=opts.TitleOpts(title=\"\", subtitle=\"\"),\n",
+ " yaxis_opts=opts.AxisOpts(\n",
+ " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\")\n",
+ " ),\n",
+ " # datazoom_opts=opts.DataZoomOpts(),\n",
+ ")\n",
+ "bar.set_series_opts(\n",
+ " label_opts=opts.LabelOpts(is_show=True)\n",
+ ")\n",
+ "bar.render_notebook()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 322,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 322,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "bar = Bar()\n",
+ "bar.add_xaxis(shenzhen_company_avg_salary_counts['company_name'].values.tolist()[:10])\n",
+ "bar.add_yaxis(\"深圳平均招聘薪资\", shenzhen_company_avg_salary_counts['avg_salary'].values.tolist()[:10])\n",
+ "bar.add_yaxis(\"深圳在招岗位数量\", shenzhen_company_avg_salary_counts['counts'].values.tolist()[:10])\n",
+ "bar.set_global_opts(\n",
+ " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n",
+ " title_opts=opts.TitleOpts(title=\"\", subtitle=\"\"),\n",
+ " yaxis_opts=opts.AxisOpts(\n",
+ " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\")\n",
+ " ),\n",
+ " # datazoom_opts=opts.DataZoomOpts(),\n",
+ ")\n",
+ "bar.set_series_opts(\n",
+ " label_opts=opts.LabelOpts(is_show=True)\n",
+ ")\n",
+ "bar.render_notebook()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 323,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 323,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "bar = Bar()\n",
+ "bar.add_xaxis(hangzhou_company_avg_salary_counts['company_name'].values.tolist()[:10])\n",
+ "bar.add_yaxis(\"杭州平均招聘薪资\", hangzhou_company_avg_salary_counts['avg_salary'].values.tolist()[:10])\n",
+ "bar.add_yaxis(\"杭州在招岗位数量\", hangzhou_company_avg_salary_counts['counts'].values.tolist()[:10])\n",
+ "bar.set_global_opts(\n",
+ " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n",
+ " title_opts=opts.TitleOpts(title=\"\", subtitle=\"\"),\n",
+ " yaxis_opts=opts.AxisOpts(\n",
+ " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\")\n",
+ " ),\n",
+ " # datazoom_opts=opts.DataZoomOpts(),\n",
+ ")\n",
+ "bar.set_series_opts(\n",
+ " label_opts=opts.LabelOpts(is_show=True)\n",
+ ")\n",
+ "bar.render_notebook()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 324,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "beijing_sort = beijing.sort_values(by='avg_salary', ascending=False)\n",
+ "shanghai_sort = shanghai.sort_values(by='avg_salary', ascending=False)\n",
+ "shenzhen_sort = shenzhen.sort_values(by='avg_salary', ascending=False)\n",
+ "hangzhou_sort = hangzhou.sort_values(by='avg_salary', ascending=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 325,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "beijing_job_sort_by_salary = beijing_sort[['company_name', 'avg_salary', 'job_name', 'job_type']]\n",
+ "shanghai_job_sort_by_salary = shanghai_sort[['company_name', 'avg_salary', 'job_name', 'job_type']]\n",
+ "shenzhen_job_sort_by_salary = shenzhen_sort[['company_name', 'avg_salary', 'job_name', 'job_type']]\n",
+ "hangzhou_job_sort_by_salary = hangzhou_sort[['company_name', 'avg_salary', 'job_name', 'job_type']]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 329,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "beijing_job_sort_by_salary_salary = beijing_job_sort_by_salary['avg_salary'].values.tolist()[:10]\n",
+ "beijing_job_sort_by_salary_jobname = beijing_job_sort_by_salary['job_name'].values.tolist()[:10]\n",
+ "beijing_job_sort_by_salary_company = beijing_job_sort_by_salary['company_name'].values.tolist()[:10]\n",
+ "beijing_x_index = [beijing_job_sort_by_salary_jobname[i] + \":\" + str(beijing_job_sort_by_salary_salary[i]) for i in range(0, len(beijing_job_sort_by_salary_company))]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 330,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "shanghai_job_sort_by_salary_salary = shanghai_job_sort_by_salary['avg_salary'].values.tolist()[:10]\n",
+ "shanghai_job_sort_by_salary_jobname = shanghai_job_sort_by_salary['job_name'].values.tolist()[:10]\n",
+ "shanghai_job_sort_by_salary_company = shanghai_job_sort_by_salary['company_name'].values.tolist()[:10]\n",
+ "shanghai_x_index = [shanghai_job_sort_by_salary_jobname[i] + \":\" + str(shanghai_job_sort_by_salary_salary[i]) for i in range(0, len(shanghai_job_sort_by_salary_company))]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 331,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "shenzhen_job_sort_by_salary_salary = shenzhen_job_sort_by_salary['avg_salary'].values.tolist()[:10]\n",
+ "shenzhen_job_sort_by_salary_jobname = shenzhen_job_sort_by_salary['job_name'].values.tolist()[:10]\n",
+ "shenzhen_job_sort_by_salary_company = shenzhen_job_sort_by_salary['company_name'].values.tolist()[:10]\n",
+ "shenzhen_x_index = [shenzhen_job_sort_by_salary_jobname[i] + \":\" + str(shenzhen_job_sort_by_salary_salary[i]) for i in range(0, len(shenzhen_job_sort_by_salary_company))]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 334,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "hangzhou_job_sort_by_salary_salary = hangzhou_job_sort_by_salary['avg_salary'].values.tolist()[:10]\n",
+ "hangzhou_job_sort_by_salary_jobname = hangzhou_job_sort_by_salary['job_name'].values.tolist()[:10]\n",
+ "hangzhou_job_sort_by_salary_company = hangzhou_job_sort_by_salary['company_name'].values.tolist()[:10]\n",
+ "hangzhou_x_index = [hangzhou_job_sort_by_salary_jobname[i] + \":\" + str(hangzhou_job_sort_by_salary_salary[i]) for i in range(0, len(hangzhou_job_sort_by_salary_company))]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 336,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 336,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# 单个岗位工资排行\n",
+ "bar = Bar()\n",
+ "bar.add_xaxis(beijing_job_sort_by_salary['company_name'].values.tolist()[:10])\n",
+ "bar.add_yaxis(\"北京招聘薪资排行\", beijing_job_sort_by_salary['avg_salary'].values.tolist()[:10])\n",
+ "bar.set_global_opts(\n",
+ " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n",
+ " title_opts=opts.TitleOpts(title=\"\", subtitle=\"\"),\n",
+ " yaxis_opts=opts.AxisOpts(\n",
+ " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\")\n",
+ " ),\n",
+ " # datazoom_opts=opts.DataZoomOpts(),\n",
+ ")\n",
+ "bar.set_series_opts(\n",
+ " label_opts=opts.LabelOpts(is_show=True)\n",
+ ")\n",
+ "\n",
+ "# line = Line()\n",
+ "# line.add_xaxis(x_index)\n",
+ "# line.add_yaxis(\"\", beijing_job_sort_by_salary['avg_salary'].values.tolist()[:10])\n",
+ "# line.set_global_opts(\n",
+ "# xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n",
+ "# title_opts=opts.TitleOpts(title=\"\", subtitle=\"\"),\n",
+ "# yaxis_opts=opts.AxisOpts(\n",
+ "# axislabel_opts=opts.LabelOpts(formatter=\"{value}K\")\n",
+ "# ),\n",
+ "# # datazoom_opts=opts.DataZoomOpts(),\n",
+ "# )\n",
+ "\n",
+ "scatter = Scatter()\n",
+ "scatter.add_xaxis(beijing_x_index)\n",
+ "scatter.add_yaxis(\"北京高薪岗位\", beijing_job_sort_by_salary['avg_salary'].values.tolist()[:10])\n",
+ "scatter.set_global_opts(\n",
+ " title_opts=opts.TitleOpts(title=\"\", pos_top=\"48%\"),\n",
+ " legend_opts=opts.LegendOpts(pos_top=\"50%\"),\n",
+ " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n",
+ " yaxis_opts=opts.AxisOpts(\n",
+ " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\"),\n",
+ " min_=50\n",
+ " ),\n",
+ " # datazoom_opts=opts.DataZoomOpts(),\n",
+ ")\n",
+ "\n",
+ "grid = Grid()\n",
+ "grid.add(bar, grid_opts=opts.GridOpts(pos_bottom=\"60%\"))\n",
+ "# grid.add(line, grid_opts=opts.GridOpts(pos_top=\"60%\"))\n",
+ "grid.add(scatter, grid_opts=opts.GridOpts(pos_top=\"60%\"))\n",
+ "grid.render_notebook()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 338,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 338,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "bar = Bar()\n",
+ "bar.add_xaxis(shanghai_job_sort_by_salary['company_name'].values.tolist()[:10])\n",
+ "bar.add_yaxis(\"上海招聘薪资排行\", shanghai_job_sort_by_salary['avg_salary'].values.tolist()[:10])\n",
+ "bar.set_global_opts(\n",
+ " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n",
+ " title_opts=opts.TitleOpts(title=\"\", subtitle=\"\"),\n",
+ " yaxis_opts=opts.AxisOpts(\n",
+ " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\")\n",
+ " )\n",
+ ")\n",
+ "bar.set_series_opts(\n",
+ " label_opts=opts.LabelOpts(is_show=True)\n",
+ ")\n",
+ "\n",
+ "scatter = Scatter()\n",
+ "scatter.add_xaxis(shanghai_x_index)\n",
+ "scatter.add_yaxis(\"上海高薪岗位\", shanghai_job_sort_by_salary['avg_salary'].values.tolist()[:10])\n",
+ "scatter.set_global_opts(\n",
+ " title_opts=opts.TitleOpts(title=\"\", pos_top=\"48%\"),\n",
+ " legend_opts=opts.LegendOpts(pos_top=\"50%\"),\n",
+ " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n",
+ " yaxis_opts=opts.AxisOpts(\n",
+ " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\"),\n",
+ " min_=40\n",
+ " )\n",
+ ")\n",
+ "\n",
+ "grid = Grid()\n",
+ "grid.add(bar, grid_opts=opts.GridOpts(pos_bottom=\"60%\"))\n",
+ "grid.add(scatter, grid_opts=opts.GridOpts(pos_top=\"60%\"))\n",
+ "grid.render_notebook()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 339,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 339,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "bar = Bar()\n",
+ "bar.add_xaxis(shenzhen_job_sort_by_salary['company_name'].values.tolist()[:10])\n",
+ "bar.add_yaxis(\"深圳招聘薪资排行\", shenzhen_job_sort_by_salary['avg_salary'].values.tolist()[:10])\n",
+ "bar.set_global_opts(\n",
+ " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n",
+ " title_opts=opts.TitleOpts(title=\"\", subtitle=\"\"),\n",
+ " yaxis_opts=opts.AxisOpts(\n",
+ " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\")\n",
+ " )\n",
+ ")\n",
+ "bar.set_series_opts(\n",
+ " label_opts=opts.LabelOpts(is_show=True)\n",
+ ")\n",
+ "\n",
+ "scatter = Scatter()\n",
+ "scatter.add_xaxis(shenzhen_x_index)\n",
+ "scatter.add_yaxis(\"深圳高薪岗位\", shenzhen_job_sort_by_salary['avg_salary'].values.tolist()[:10])\n",
+ "scatter.set_global_opts(\n",
+ " title_opts=opts.TitleOpts(title=\"\", pos_top=\"48%\"),\n",
+ " legend_opts=opts.LegendOpts(pos_top=\"50%\"),\n",
+ " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n",
+ " yaxis_opts=opts.AxisOpts(\n",
+ " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\"),\n",
+ " min_=40\n",
+ " )\n",
+ ")\n",
+ "\n",
+ "grid = Grid()\n",
+ "grid.add(bar, grid_opts=opts.GridOpts(pos_bottom=\"60%\"))\n",
+ "grid.add(scatter, grid_opts=opts.GridOpts(pos_top=\"60%\"))\n",
+ "grid.render_notebook()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 340,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 340,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "bar = Bar()\n",
+ "bar.add_xaxis(hangzhou_job_sort_by_salary['company_name'].values.tolist()[:10])\n",
+ "bar.add_yaxis(\"杭州招聘薪资排行\", hangzhou_job_sort_by_salary['avg_salary'].values.tolist()[:10])\n",
+ "bar.set_global_opts(\n",
+ " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n",
+ " title_opts=opts.TitleOpts(title=\"\", subtitle=\"\"),\n",
+ " yaxis_opts=opts.AxisOpts(\n",
+ " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\")\n",
+ " )\n",
+ ")\n",
+ "bar.set_series_opts(\n",
+ " label_opts=opts.LabelOpts(is_show=True)\n",
+ ")\n",
+ "\n",
+ "scatter = Scatter()\n",
+ "scatter.add_xaxis(hangzhou_x_index)\n",
+ "scatter.add_yaxis(\"杭州高薪岗位\", hangzhou_job_sort_by_salary['avg_salary'].values.tolist()[:10])\n",
+ "scatter.set_global_opts(\n",
+ " title_opts=opts.TitleOpts(title=\"\", pos_top=\"48%\"),\n",
+ " legend_opts=opts.LegendOpts(pos_top=\"50%\"),\n",
+ " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n",
+ " yaxis_opts=opts.AxisOpts(\n",
+ " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\"),\n",
+ " min_=40\n",
+ " )\n",
+ ")\n",
+ "\n",
+ "grid = Grid()\n",
+ "grid.add(bar, grid_opts=opts.GridOpts(pos_bottom=\"60%\"))\n",
+ "grid.add(scatter, grid_opts=opts.GridOpts(pos_top=\"60%\"))\n",
+ "grid.render_notebook()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 233,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 233,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "scatter = Scatter()\n",
+ "scatter.add_xaxis(x_index)\n",
+ "scatter.add_yaxis(\"招聘岗位\", beijing_job_sort_by_salary['avg_salary'].values.tolist()[:10])\n",
+ "scatter.set_global_opts(\n",
+ " visualmap_opts=opts.VisualMapOpts(type_=\"size\", max_=70, min_=50),\n",
+ " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n",
+ " yaxis_opts=opts.AxisOpts(\n",
+ " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\"),\n",
+ " min_=50\n",
+ " ),\n",
+ " # datazoom_opts=opts.DataZoomOpts(),\n",
+ ")\n",
+ "scatter.render_notebook()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 341,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "beijing_job_type = beijing_job_sort_by_salary['job_type'][:10].value_counts()\n",
+ "shanghai_job_type = shanghai_job_sort_by_salary['job_type'][:10].value_counts()\n",
+ "shenzhen_job_type = shenzhen_job_sort_by_salary['job_type'][:10].value_counts()\n",
+ "hangzhou_job_type = hangzhou_job_sort_by_salary['job_type'][:10].value_counts()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 278,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 278,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pie = Pie()\n",
+ "pie.add(\"\", [list(z) for z in zip(beijing_job_type.index.tolist(), beijing_job_type.values.tolist())],\n",
+ " radius=[\"30%\", \"75%\"],\n",
+ " center=[\"40%\", \"50%\"],\n",
+ " rosetype=\"radius\")\n",
+ "pie.set_global_opts(\n",
+ " title_opts=opts.TitleOpts(title=\"北京高薪岗位分布\"),\n",
+ " legend_opts=opts.LegendOpts(\n",
+ " type_=\"scroll\", pos_left=\"80%\", orient=\"vertical\"\n",
+ " ),\n",
+ " )\n",
+ "pie.render_notebook()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 343,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 343,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pie = Pie()\n",
+ "pie.add(\"\", [list(z) for z in zip(shanghai_job_type.index.tolist(), shanghai_job_type.values.tolist())],\n",
+ " radius=[\"30%\", \"75%\"],\n",
+ " center=[\"40%\", \"50%\"],\n",
+ " rosetype=\"radius\")\n",
+ "pie.set_global_opts(\n",
+ " title_opts=opts.TitleOpts(title=\"上海高薪岗位分布\"),\n",
+ " legend_opts=opts.LegendOpts(\n",
+ " type_=\"scroll\", pos_left=\"80%\", orient=\"vertical\"\n",
+ " ),\n",
+ " )\n",
+ "pie.render_notebook()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 344,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 344,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pie = Pie()\n",
+ "pie.add(\"\", [list(z) for z in zip(shenzhen_job_type.index.tolist(), shenzhen_job_type.values.tolist())],\n",
+ " radius=[\"30%\", \"75%\"],\n",
+ " center=[\"40%\", \"50%\"],\n",
+ " rosetype=\"radius\")\n",
+ "pie.set_global_opts(\n",
+ " title_opts=opts.TitleOpts(title=\"深圳高薪岗位分布\"),\n",
+ " legend_opts=opts.LegendOpts(\n",
+ " type_=\"scroll\", pos_left=\"80%\", orient=\"vertical\"\n",
+ " ),\n",
+ " )\n",
+ "pie.render_notebook()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 345,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 345,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pie = Pie()\n",
+ "pie.add(\"\", [list(z) for z in zip(hangzhou_job_type.index.tolist(), hangzhou_job_type.values.tolist())],\n",
+ " radius=[\"30%\", \"75%\"],\n",
+ " center=[\"40%\", \"50%\"],\n",
+ " rosetype=\"radius\")\n",
+ "pie.set_global_opts(\n",
+ " title_opts=opts.TitleOpts(title=\"杭州高薪岗位分布\"),\n",
+ " legend_opts=opts.LegendOpts(\n",
+ " type_=\"scroll\", pos_left=\"80%\", orient=\"vertical\"\n",
+ " ),\n",
+ " )\n",
+ "pie.render_notebook()"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.7.1"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/boss_spider/job_analyse.py b/boss_spider/job_analyse.py
index 0cd34f4..e8fb3ca 100644
--- a/boss_spider/job_analyse.py
+++ b/boss_spider/job_analyse.py
@@ -5,3 +5,4 @@
@File: job_analyse.py
"""
+# check with jupyter notebook
\ No newline at end of file
diff --git a/college/analyse.py b/college/analyse.py
index 9b191c4..041552e 100644
--- a/college/analyse.py
+++ b/college/analyse.py
@@ -11,7 +11,6 @@
from pyecharts.commons.utils import JsCode
from pyecharts.globals import ChartType, SymbolType
-
df = pd.read_csv("college_data.csv")
df_new = df.drop_duplicates(subset=['name']) # 有重复的数据,需要删除
df_site = df_new[df_new['site'] != '——']