diff --git a/.idea/workspace.xml b/.idea/workspace.xml index fd45114..6d59da9 100644 --- a/.idea/workspace.xml +++ b/.idea/workspace.xml @@ -3,12 +3,10 @@ - - - - - - + + + + + + + + @@ -85,10 +87,6 @@ - - - - diff --git a/boss_spider/.ipynb_checkpoints/boss_job-checkpoint.ipynb b/boss_spider/.ipynb_checkpoints/boss_job-checkpoint.ipynb index 646a26f..96e2942 100644 --- a/boss_spider/.ipynb_checkpoints/boss_job-checkpoint.ipynb +++ b/boss_spider/.ipynb_checkpoints/boss_job-checkpoint.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 167, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -14,9 +14,9 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 9, "metadata": { - "collapsed": true + "scrolled": true }, "outputs": [ { @@ -219,7 +219,7 @@ "[1167 rows x 9 columns]" ] }, - "execution_count": 2, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -231,7 +231,7 @@ }, { "cell_type": "code", - "execution_count": 69, + "execution_count": 10, "metadata": { "scrolled": true }, @@ -249,7 +249,7 @@ }, { "cell_type": "code", - "execution_count": 70, + "execution_count": 11, "metadata": { "scrolled": true }, @@ -310,7 +310,7 @@ "cell_type": "code", "execution_count": 106, "metadata": { - "scrolled": true + "collapsed": true }, "outputs": [ { @@ -389,7 +389,9 @@ { "cell_type": "code", "execution_count": 107, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [ { "data": { @@ -414,14 +416,14 @@ } ], "source": [ - "xian_java\n" + "xian_java" ] }, { "cell_type": "code", "execution_count": 108, "metadata": { - "scrolled": true + "collapsed": true }, "outputs": [ { @@ -492,7 +494,9 @@ { "cell_type": "code", "execution_count": 109, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [ { "data": { @@ -522,7 +526,7 @@ }, { "cell_type": "code", - "execution_count": 112, + "execution_count": 12, "metadata": { "scrolled": false }, @@ -538,14 +542,14 @@ " });\n", "\n", "\n", - "
\n", + "
\n", "\n", "\n", "\n" ], "text/plain": [ - "" + "" ] }, - "execution_count": 112, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -920,61 +924,122 @@ }, { "cell_type": "code", - "execution_count": 130, + "execution_count": 34, "metadata": {}, "outputs": [], "source": [ - "provinces = ['北京', '上海', '广州', '深圳', '成都', '杭州', '南京', '武汉', '西安']\n", - "provinces2 = ['北京', '上海', '广东', '四川', '浙江', '江苏', '湖北', '陕西']" + "provinces = ['北京', '上海', '广州', '深圳', '成都', '杭州', '南京', '武汉', '西安']" ] }, { "cell_type": "code", - "execution_count": 131, + "execution_count": 50, "metadata": {}, "outputs": [], "source": [ - "Python_values = ['25.1',\n", + "Python_values = [\n", + " '25.1',\n", "'24.5',\n", "'14.5',\n", "'25.3',\n", + "'13.9',\n", "'14.9',\n", "'14.4',\n", "'12.4',\n", - "'13.4',\n", + "'13.4'\n", + "]\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "Java_values = [\n", + " '14.7',\n", + "'32.4',\n", + "'16.5',\n", + "'14.5',\n", + "'15.9',\n", + "'32.8',\n", + "'15.4',\n", + "'15.3',\n", + "'15.7'\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [], + "source": [ + "dataanalyse_values = [\n", + " '24.6',\n", + "'15.8',\n", + "'14.6',\n", + "'32.6',\n", + "'5.5',\n", + "'14.6',\n", + "'9.2',\n", + "'5.6',\n", + "'7.8'\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [], + "source": [ + "product_values = [\n", + "'32.7',\n", + "'23.3',\n", + "'17.2',\n", + "'25.9',\n", + "'16.1',\n", + "'24.9',\n", + "'25',\n", + "'18',\n", + "'15.6'\n", "]" ] }, { "cell_type": "code", - "execution_count": 132, + "execution_count": 51, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[['北京', '25.1'],\n", - " ['上海', '24.5'],\n", - " ['广东', '14.5'],\n", - " ['四川', '25.3'],\n", - " ['浙江', '14.9'],\n", - " ['江苏', '14.4'],\n", - " ['湖北', '12.4'],\n", - " ['陕西', '13.4']]" + "[24.275000000000002,\n", + " 24.0,\n", + " 15.7,\n", + " 24.575000000000003,\n", + " 12.85,\n", + " 21.799999999999997,\n", + " 16.0,\n", + " 12.825000000000001,\n", + " 13.125]" ] }, - "execution_count": 132, + "execution_count": 51, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "[list(z) for z in zip(provinces2, Python_values)]" + "total_values = [(float(Python_values[i]) + float(Java_values[i]) + float(dataanalyse_values[i]) + float(product_values[i]))/4 for i in range(0, len(Python_values))]\n", + "total_values" ] }, { "cell_type": "code", - "execution_count": 149, + "execution_count": 52, "metadata": {}, "outputs": [], "source": [ @@ -985,8 +1050,8 @@ " .add(\"\", [list(z) for z in zip(provinces, Python_values)])\n", " .set_series_opts(label_opts=opts.LabelOpts(is_show=False))\n", " .set_global_opts(\n", - " visualmap_opts=opts.VisualMapOpts(is_piecewise=True, max_=26, min_=10),\n", - " title_opts=opts.TitleOpts(title=\"Python 招聘薪资情况\"),\n", + " visualmap_opts=opts.VisualMapOpts(is_piecewise=True, max_=26, min_=12),\n", + " title_opts=opts.TitleOpts(title=\"Python 需求情况\"),\n", " )\n", " )\n", " return c" @@ -994,7 +1059,7 @@ }, { "cell_type": "code", - "execution_count": 150, + "execution_count": 53, "metadata": {}, "outputs": [ { @@ -1008,14 +1073,14 @@ " });\n", "\n", "\n", - "
\n", + "
\n", "\n", "\n", "\n" ], "text/plain": [ - "" + "" ] }, - "execution_count": 150, + "execution_count": 53, "metadata": {}, "output_type": "execute_result" } @@ -1196,7 +1269,7 @@ }, { "cell_type": "code", - "execution_count": 153, + "execution_count": 56, "metadata": {}, "outputs": [], "source": [ @@ -1204,11 +1277,11 @@ " c = (\n", " Geo()\n", " .add_schema(maptype=\"china\")\n", - " .add(\"\", [list(z) for z in zip(provinces, Python_values)], type_=ChartType.HEATMAP,)\n", + " .add(\"\", [list(z) for z in zip(provinces, total_values)], type_=ChartType.HEATMAP,)\n", " .set_series_opts(label_opts=opts.LabelOpts(is_show=False))\n", " .set_global_opts(\n", - " visualmap_opts=opts.VisualMapOpts(is_piecewise=False, max_=26, min_=10),\n", - " title_opts=opts.TitleOpts(title=\"Python 招聘薪资情况\"),\n", + " visualmap_opts=opts.VisualMapOpts(is_piecewise=False, max_=25, min_=12),\n", + " title_opts=opts.TitleOpts(title=\"需求热力图\"),\n", " )\n", " )\n", " return c" @@ -1216,7 +1289,7 @@ }, { "cell_type": "code", - "execution_count": 154, + "execution_count": 57, "metadata": {}, "outputs": [ { @@ -1230,14 +1303,14 @@ " });\n", "\n", "\n", - "
\n", + "
\n", "\n", "\n", "\n" ], "text/plain": [ - "" + "" ] }, - "execution_count": 154, + "execution_count": 57, "metadata": {}, "output_type": "execute_result" } @@ -1417,7 +1498,7 @@ }, { "cell_type": "code", - "execution_count": 216, + "execution_count": 58, "metadata": { "scrolled": true }, @@ -1443,7 +1524,7 @@ }, { "cell_type": "code", - "execution_count": 217, + "execution_count": 59, "metadata": { "scrolled": true }, @@ -1459,95 +1540,95 @@ " ('金融产品经理', 4),\n", " ('后台产品经理', 3),\n", " ('用户产品经理', 3),\n", - " ('财务产品经理', 3),\n", " ('移动产品经理', 3),\n", + " ('财务产品经理', 3),\n", " ('商业化产品经理', 2),\n", + " ('资深产品经理', 2),\n", + " ('广告产品经理', 2),\n", + " ('CRM产品经理', 2),\n", + " ('增长产品经理', 2),\n", " ('搜索产品经理', 2),\n", + " ('黄金流程产品经理', 2),\n", + " ('平台产品经理', 2),\n", " ('用户增长产品经理', 2),\n", + " ('APP产品经理', 2),\n", " ('C端产品经理', 2),\n", " ('社区产品经理', 2),\n", - " ('平台产品经理', 2),\n", - " ('黄金流程产品经理', 2),\n", - " ('资深产品经理', 2),\n", - " ('APP产品经理', 2),\n", - " ('CRM产品经理', 2),\n", - " ('增长产品经理', 2),\n", - " ('广告产品经理', 2),\n", - " ('产品经理 (MJ000472)', 1),\n", - " ('产品经理-北美业务', 1),\n", - " ('信息化产品经理', 1),\n", - " ('社区/用户运营产品经理', 1),\n", - " ('产品经理-电商', 1),\n", - " ('产品经理-BI方向', 1),\n", - " ('产品经理(前端)', 1),\n", - " ('算法产品经理', 1),\n", - " ('用户端产品经理(金融)', 1),\n", - " ('策略产品经理(北京)', 1),\n", - " ('高级产品经理(转化)', 1),\n", - " ('直播产品经理', 1),\n", - " ('产品经理-可视化', 1),\n", - " ('高级客户端产品经理', 1),\n", - " ('ELD产品经理', 1),\n", + " ('产品经理(J16880)', 1),\n", " ('高级CRM产品经理', 1),\n", - " ('开放平台产品经理', 1),\n", - " ('物流产品经理', 1),\n", - " ('高级社区产品经理', 1),\n", + " ('大数据产品经理', 1),\n", " ('ERP产品经理', 1),\n", - " ('分期商城产品经理', 1),\n", - " ('车载产品经理', 1),\n", - " ('支付产品经理', 1),\n", + " ('ELD产品经理', 1),\n", + " ('高级地图产品经理', 1),\n", + " ('营销产品经理', 1),\n", " ('高级数据产品经理', 1),\n", - " ('医疗产品经理', 1),\n", - " ('京东产品经理', 1),\n", - " ('调度策略产品经理(北京)', 1),\n", - " ('产品经理(J16880)', 1),\n", - " ('文娱-用户产品经理', 1),\n", - " ('大数据产品经理', 1),\n", - " ('高级策略产品经理', 1),\n", - " ('产品经理(边缘计算方向)', 1),\n", - " ('商业策略产品经理', 1),\n", - " ('gis产品经理', 1),\n", - " ('推荐产品经理', 1),\n", - " ('高级增长产品经理', 1),\n", " ('BI产品经理', 1),\n", + " ('ToB产品经理', 1),\n", " ('高级用户产品经理', 1),\n", - " ('安全产品经理', 1),\n", - " ('视觉算法/产品经理', 1),\n", - " ('EHR产品经理', 1),\n", " ('IT产品经理', 1),\n", - " ('广告高级产品经理', 1),\n", - " ('会员产品经理', 1),\n", - " ('Saas后台产品经理', 1),\n", + " ('高级客户端产品经理', 1),\n", + " ('高级商业产品经理', 1),\n", + " ('推荐策略产品经理', 1),\n", + " ('AI产品经理', 1),\n", + " ('供应链产品经理', 1),\n", + " ('推荐产品经理', 1),\n", + " ('售前产品经理(安全)', 1),\n", + " ('社区/用户运营产品经理', 1),\n", + " ('产品经理-收益方向', 1),\n", + " ('分期商城产品经理', 1),\n", " ('初级产品经理', 1),\n", + " ('产品经理(边缘计算方向)', 1),\n", + " ('产品经理(saas)', 1),\n", + " ('资深产品经理(北京)', 1),\n", + " ('产品经理-可视化', 1),\n", + " ('学生端产品经理 (MJ000132)', 1),\n", + " ('信息化产品经理', 1),\n", + " ('高级策略产品经理', 1),\n", + " ('产品经理-北美业务', 1),\n", + " ('安全产品经理', 1),\n", + " ('EHR产品经理', 1),\n", + " ('支付产品经理', 1),\n", " ('流量产品经理', 1),\n", - " ('高级地图产品经理', 1),\n", - " ('ToB产品经理', 1),\n", - " ('产品经理-收益方向', 1),\n", - " ('AI产品经理', 1),\n", + " ('策略产品经理(北京)', 1),\n", + " ('内容产品经理', 1),\n", + " ('中台产品经理', 1),\n", + " ('高级产品经理(转化)', 1),\n", + " ('调度策略产品经理(北京)', 1),\n", + " ('产品经理-BI方向', 1),\n", + " ('实习产品经理', 1),\n", + " ('会员产品经理', 1),\n", + " ('高级社区产品经理', 1),\n", + " ('文娱-用户产品经理', 1),\n", " ('产品经理岗', 1),\n", - " ('营销产品经理', 1),\n", - " ('前台产品经理', 1),\n", + " ('直播产品经理', 1),\n", + " ('京东产品经理', 1),\n", " ('售前产品经理', 1),\n", - " ('WMS产品经理', 1),\n", - " ('推荐策略产品经理', 1),\n", - " ('供应链产品经理', 1),\n", - " ('产品经理(saas)', 1),\n", - " ('售后产品经理', 1),\n", + " ('产品经理 (MJ000472)', 1),\n", + " ('广告高级产品经理', 1),\n", " ('音乐榜单高级产品经理', 1),\n", + " ('视觉算法/产品经理', 1),\n", + " ('产品经理(前端)', 1),\n", + " ('产品经理(资管)', 1),\n", + " ('售后产品经理', 1),\n", + " ('商业策略产品经理', 1),\n", + " ('产品经理-电商', 1),\n", " ('产品经理(金融数据)', 1),\n", - " ('学生端产品经理 (MJ000132)', 1),\n", - " ('高级商业产品经理', 1),\n", - " ('售前产品经理(安全)', 1),\n", - " ('中台产品经理', 1),\n", + " ('开放平台产品经理', 1),\n", + " ('高级增长产品经理', 1),\n", + " ('gis产品经理', 1),\n", + " ('用户端产品经理(金融)', 1),\n", + " ('算法产品经理', 1),\n", + " ('医疗产品经理', 1),\n", + " ('Saas后台产品经理', 1),\n", + " ('车载产品经理', 1),\n", + " ('WMS产品经理', 1),\n", " ('后端产品经理', 1),\n", - " ('资深产品经理(北京)', 1),\n", - " ('产品经理(资管)', 1),\n", - " ('实习产品经理', 1),\n", + " ('物流产品经理', 1),\n", " ('硬件产品经理', 1),\n", - " ('内容产品经理', 1)]" + " ('前台产品经理', 1)]" ] }, - "execution_count": 217, + "execution_count": 59, "metadata": {}, "output_type": "execute_result" } @@ -1562,6 +1643,13 @@ "beijing_product_jobname_pd_word" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "code", "execution_count": 203, @@ -5511,6 +5599,453 @@ "source": [ "wordcloud_base(beijing_product_jobname_pd_word).render_notebook()" ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "BOSS直聘 52\n", + "京东集团 45\n", + "京东数字科技 18\n", + "VIPKID 18\n", + "旷视MEGVII 14\n", + " ..\n", + "IMS 1\n", + "金吉列留学 1\n", + "安一恒通 1\n", + "天启慧眼 1\n", + "腾信软创科技 1\n", + "Name: company_name, Length: 627, dtype: int64" + ] + }, + "execution_count": 62, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "beijing['company_name'].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 70, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bar = Bar()\n", + "bar.add_xaxis(beijing['company_name'].value_counts().index.tolist()[:10])\n", + "bar.add_yaxis(\"北京\", beijing['company_name'].value_counts().values.tolist()[:10])\n", + "bar.set_global_opts(\n", + " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n", + " title_opts=opts.TitleOpts(title=\"北京企业招聘岗位数量\", subtitle=\"\"),\n", + " yaxis_opts=opts.AxisOpts(\n", + " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\")\n", + " ),\n", + " # datazoom_opts=opts.DataZoomOpts(),\n", + ")\n", + "bar.set_series_opts(\n", + " label_opts=opts.LabelOpts(is_show=True)\n", + ")\n", + "bar.render_notebook()" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 71, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bar = Bar()\n", + "bar.add_xaxis(nanjing['company_name'].value_counts().index.tolist()[:10])\n", + "bar.add_yaxis(\"南京\", nanjing['company_name'].value_counts().values.tolist()[:10])\n", + "bar.set_global_opts(\n", + " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n", + " title_opts=opts.TitleOpts(title=\"南京企业招聘岗位数量\", subtitle=\"\"),\n", + " yaxis_opts=opts.AxisOpts(\n", + " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\")\n", + " ),\n", + " # datazoom_opts=opts.DataZoomOpts(),\n", + ")\n", + "bar.set_series_opts(\n", + " label_opts=opts.LabelOpts(is_show=True)\n", + ")\n", + "bar.render_notebook()" + ] } ], "metadata": { diff --git a/boss_spider/.ipynb_checkpoints/company_of_job-checkpoint.ipynb b/boss_spider/.ipynb_checkpoints/company_of_job-checkpoint.ipynb new file mode 100644 index 0000000..43c0205 --- /dev/null +++ b/boss_spider/.ipynb_checkpoints/company_of_job-checkpoint.ipynb @@ -0,0 +1,5837 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 273, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from pyecharts.charts import Bar, Geo, WordCloud, Grid, Line, Scatter, Pie\n", + "from pyecharts import options as opts\n", + "from pyecharts.globals import ChartType, SymbolType" + ] + }, + { + "cell_type": "code", + "execution_count": 247, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "beijing = pd.read_csv(\"beijing_data.csv\")\n", + "shanghai = pd.read_csv(\"shanghai_data.csv\")\n", + "shenzhen = pd.read_csv(\"shenzhen_data.csv\")\n", + "guangzhou = pd.read_csv(\"guangzhou_data.csv\")\n", + "hangzhou = pd.read_csv(\"hangzhou_data.csv\")\n", + "nanjing = pd.read_csv(\"nanjing_data.csv\")\n", + "wuhan = pd.read_csv(\"wuhan_data.csv\")\n", + "xian = pd.read_csv(\"xian_data.csv\")\n", + "chengdu = pd.read_csv(\"chengdu_data.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 255, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
company_nameurisalarysiteyearedujob_namecityjob_typeavg_salary
0中国电信云https://www.zhipin.com/job_detail/11266fc18dc1...20-40K·17薪北京 海淀区 西山经验不限本科Pythonbeijingpython30
1奇虎360https://www.zhipin.com/job_detail/2a3103941dc2...20-40K·15薪北京 朝阳区 酒仙桥3-5年大专Pythonbeijingpython30
2VIPKIDhttps://www.zhipin.com/job_detail/2dd7f2760947...20-40K·14薪北京 朝阳区 十里堡5-10年本科Pythonbeijingpython30
3天阳科技https://www.zhipin.com/job_detail/a0c8485a448b...12-24K北京 石景山区 八宝山1-3年本科python工程师beijingpython18
4武汉佰钧成https://www.zhipin.com/job_detail/d6627bf7c1e2...12-17K北京 朝阳区 三元桥3-5年大专python开发beijingpython14
.................................
9820第壹街舞https://www.zhipin.com/job_detail/23f4fcf1e936...15-30K·13薪成都 武侯区 跳伞塔3-5年本科产品经理-ERP项目经理chengdu产品经理22
9821易停车https://www.zhipin.com/job_detail/1bde3c5dc4c9...6-8K成都3-5年大专产品经理chengdu产品经理7
9822金瑞麒科技https://www.zhipin.com/job_detail/333b26dcf515...10-15K成都 双流区 华阳3-5年本科产品经理chengdu产品经理12
9823朗培商学院https://www.zhipin.com/job_detail/b711ea9545d2...11-20K成都3-5年大专中高级移动产品经理chengdu产品经理15
9824微品致远https://www.zhipin.com/job_detail/03168bcffeca...8-11K成都 武侯区 高升桥1-3年本科产品经理(成都)chengdu产品经理9
\n", + "

9825 rows × 10 columns

\n", + "
" + ], + "text/plain": [ + " company_name uri \\\n", + "0 中国电信云 https://www.zhipin.com/job_detail/11266fc18dc1... \n", + "1 奇虎360 https://www.zhipin.com/job_detail/2a3103941dc2... \n", + "2 VIPKID https://www.zhipin.com/job_detail/2dd7f2760947... \n", + "3 天阳科技 https://www.zhipin.com/job_detail/a0c8485a448b... \n", + "4 武汉佰钧成 https://www.zhipin.com/job_detail/d6627bf7c1e2... \n", + "... ... ... \n", + "9820 第壹街舞 https://www.zhipin.com/job_detail/23f4fcf1e936... \n", + "9821 易停车 https://www.zhipin.com/job_detail/1bde3c5dc4c9... \n", + "9822 金瑞麒科技 https://www.zhipin.com/job_detail/333b26dcf515... \n", + "9823 朗培商学院 https://www.zhipin.com/job_detail/b711ea9545d2... \n", + "9824 微品致远 https://www.zhipin.com/job_detail/03168bcffeca... \n", + "\n", + " salary site year edu job_name city job_type \\\n", + "0 20-40K·17薪 北京 海淀区 西山 经验不限 本科 Python beijing python \n", + "1 20-40K·15薪 北京 朝阳区 酒仙桥 3-5年 大专 Python beijing python \n", + "2 20-40K·14薪 北京 朝阳区 十里堡 5-10年 本科 Python beijing python \n", + "3 12-24K 北京 石景山区 八宝山 1-3年 本科 python工程师 beijing python \n", + "4 12-17K 北京 朝阳区 三元桥 3-5年 大专 python开发 beijing python \n", + "... ... ... ... .. ... ... ... \n", + "9820 15-30K·13薪 成都 武侯区 跳伞塔 3-5年 本科 产品经理-ERP项目经理 chengdu 产品经理 \n", + "9821 6-8K 成都 3-5年 大专 产品经理 chengdu 产品经理 \n", + "9822 10-15K 成都 双流区 华阳 3-5年 本科 产品经理 chengdu 产品经理 \n", + "9823 11-20K 成都 3-5年 大专 中高级移动产品经理 chengdu 产品经理 \n", + "9824 8-11K 成都 武侯区 高升桥 1-3年 本科 产品经理(成都) chengdu 产品经理 \n", + "\n", + " avg_salary \n", + "0 30 \n", + "1 30 \n", + "2 30 \n", + "3 18 \n", + "4 14 \n", + "... ... \n", + "9820 22 \n", + "9821 7 \n", + "9822 12 \n", + "9823 15 \n", + "9824 9 \n", + "\n", + "[9825 rows x 10 columns]" + ] + }, + "execution_count": 255, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 合并城市数据\n", + "all_data = pd.concat([beijing, shanghai, shenzhen, guangzhou, hangzhou, nanjing, wuhan, xian, chengdu], ignore_index=True)\n", + "all_data" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "import re" + ] + }, + { + "cell_type": "code", + "execution_count": 108, + "metadata": {}, + "outputs": [], + "source": [ + "def get_num(mystr):\n", + " rege = r'(\\d+)-(\\d+)K'\n", + " res = re.match(rege, mystr)\n", + " result = (int(res.group(1)) + int(res.group(2)))/2\n", + " return int(result)" + ] + }, + { + "cell_type": "code", + "execution_count": 257, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
company_nameurisalarysiteyearedujob_namecityjob_typeavg_salary
0中国电信云https://www.zhipin.com/job_detail/11266fc18dc1...20-40K·17薪北京 海淀区 西山经验不限本科Pythonbeijingpython30
1奇虎360https://www.zhipin.com/job_detail/2a3103941dc2...20-40K·15薪北京 朝阳区 酒仙桥3-5年大专Pythonbeijingpython30
2VIPKIDhttps://www.zhipin.com/job_detail/2dd7f2760947...20-40K·14薪北京 朝阳区 十里堡5-10年本科Pythonbeijingpython30
3天阳科技https://www.zhipin.com/job_detail/a0c8485a448b...12-24K北京 石景山区 八宝山1-3年本科python工程师beijingpython18
4武汉佰钧成https://www.zhipin.com/job_detail/d6627bf7c1e2...12-17K北京 朝阳区 三元桥3-5年大专python开发beijingpython14
.................................
9820第壹街舞https://www.zhipin.com/job_detail/23f4fcf1e936...15-30K·13薪成都 武侯区 跳伞塔3-5年本科产品经理-ERP项目经理chengdu产品经理22
9821易停车https://www.zhipin.com/job_detail/1bde3c5dc4c9...6-8K成都3-5年大专产品经理chengdu产品经理7
9822金瑞麒科技https://www.zhipin.com/job_detail/333b26dcf515...10-15K成都 双流区 华阳3-5年本科产品经理chengdu产品经理12
9823朗培商学院https://www.zhipin.com/job_detail/b711ea9545d2...11-20K成都3-5年大专中高级移动产品经理chengdu产品经理15
9824微品致远https://www.zhipin.com/job_detail/03168bcffeca...8-11K成都 武侯区 高升桥1-3年本科产品经理(成都)chengdu产品经理9
\n", + "

9825 rows × 10 columns

\n", + "
" + ], + "text/plain": [ + " company_name uri \\\n", + "0 中国电信云 https://www.zhipin.com/job_detail/11266fc18dc1... \n", + "1 奇虎360 https://www.zhipin.com/job_detail/2a3103941dc2... \n", + "2 VIPKID https://www.zhipin.com/job_detail/2dd7f2760947... \n", + "3 天阳科技 https://www.zhipin.com/job_detail/a0c8485a448b... \n", + "4 武汉佰钧成 https://www.zhipin.com/job_detail/d6627bf7c1e2... \n", + "... ... ... \n", + "9820 第壹街舞 https://www.zhipin.com/job_detail/23f4fcf1e936... \n", + "9821 易停车 https://www.zhipin.com/job_detail/1bde3c5dc4c9... \n", + "9822 金瑞麒科技 https://www.zhipin.com/job_detail/333b26dcf515... \n", + "9823 朗培商学院 https://www.zhipin.com/job_detail/b711ea9545d2... \n", + "9824 微品致远 https://www.zhipin.com/job_detail/03168bcffeca... \n", + "\n", + " salary site year edu job_name city job_type \\\n", + "0 20-40K·17薪 北京 海淀区 西山 经验不限 本科 Python beijing python \n", + "1 20-40K·15薪 北京 朝阳区 酒仙桥 3-5年 大专 Python beijing python \n", + "2 20-40K·14薪 北京 朝阳区 十里堡 5-10年 本科 Python beijing python \n", + "3 12-24K 北京 石景山区 八宝山 1-3年 本科 python工程师 beijing python \n", + "4 12-17K 北京 朝阳区 三元桥 3-5年 大专 python开发 beijing python \n", + "... ... ... ... .. ... ... ... \n", + "9820 15-30K·13薪 成都 武侯区 跳伞塔 3-5年 本科 产品经理-ERP项目经理 chengdu 产品经理 \n", + "9821 6-8K 成都 3-5年 大专 产品经理 chengdu 产品经理 \n", + "9822 10-15K 成都 双流区 华阳 3-5年 本科 产品经理 chengdu 产品经理 \n", + "9823 11-20K 成都 3-5年 大专 中高级移动产品经理 chengdu 产品经理 \n", + "9824 8-11K 成都 武侯区 高升桥 1-3年 本科 产品经理(成都) chengdu 产品经理 \n", + "\n", + " avg_salary \n", + "0 30 \n", + "1 30 \n", + "2 30 \n", + "3 18 \n", + "4 14 \n", + "... ... \n", + "9820 22 \n", + "9821 7 \n", + "9822 12 \n", + "9823 15 \n", + "9824 9 \n", + "\n", + "[9825 rows x 10 columns]" + ] + }, + "execution_count": 257, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "all_data['avg_salary'] = all_data['salary'].apply(get_num)\n", + "all_data" + ] + }, + { + "cell_type": "code", + "execution_count": 264, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
company_namecounts
0100教育2
1100课堂1
21药网2
331会议网2
43601
.........
4794齐聚科技1
4795龙之力2
4796龙信科技1
4797龙婴本铺1
4798龙渊网络科技有限公司4
\n", + "

4799 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " company_name counts\n", + "0 100教育 2\n", + "1 100课堂 1\n", + "2 1药网 2\n", + "3 31会议网 2\n", + "4 360 1\n", + "... ... ...\n", + "4794 齐聚科技 1\n", + "4795 龙之力 2\n", + "4796 龙信科技 1\n", + "4797 龙婴本铺 1\n", + "4798 龙渊网络科技有限公司 4\n", + "\n", + "[4799 rows x 2 columns]" + ] + }, + "execution_count": 264, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "all_company_avg_salary = all_data.groupby('company_name').mean()\n", + "all_company_avg_salary.reset_index(inplace=True)\n", + "\n", + "all_company_avg_salary.sort_values(by='avg_salary', ascending=False, inplace=True)\n", + "\n", + "all_job_count = all_data.groupby('company_name').size()\n", + "dict_site = {'company_name': all_job_count.index, 'counts': all_job_count.values}\n", + "all_job_count_data = pd.DataFrame(dict_site)\n", + "all_job_count_data" + ] + }, + { + "cell_type": "code", + "execution_count": 310, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "all_company_avg_salary['avg_salary'] = all_company_avg_salary['avg_salary'].apply(lambda x: round(x, 2))\n", + "all_company_avg_salary_counts = pd.merge(all_company_avg_salary, all_job_count_data, on='company_name')\n", + "all_company = all_company_avg_salary_counts.sort_values(by='counts', ascending=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 269, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 269, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bar = Bar()\n", + "bar.add_xaxis(all_company_avg_salary_counts['company_name'].values.tolist()[:10])\n", + "bar.add_yaxis(\"全国企业平均招聘薪资\", all_company_avg_salary_counts['avg_salary'].values.tolist()[:10])\n", + "bar.add_yaxis(\"全国企业在招岗位数量\", all_company_avg_salary_counts['counts'].values.tolist()[:10])\n", + "bar.set_global_opts(\n", + " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n", + " title_opts=opts.TitleOpts(title=\"\", subtitle=\"\"),\n", + " yaxis_opts=opts.AxisOpts(\n", + " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\")\n", + " ),\n", + " # datazoom_opts=opts.DataZoomOpts(),\n", + ")\n", + "bar.set_series_opts(\n", + " label_opts=opts.LabelOpts(is_show=True)\n", + ")\n", + "bar.render_notebook()" + ] + }, + { + "cell_type": "code", + "execution_count": 305, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 305, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bar = Bar()\n", + "bar.add_xaxis(all_company['company_name'].values.tolist()[:10])\n", + "bar.add_yaxis(\"全国招聘岗位数量\", all_company['counts'].values.tolist()[:10])\n", + "# bar.add_yaxis(\"平均招聘薪资\", beijing_company['avg_salary'].values.tolist()[:10])\n", + "bar.extend_axis(\n", + " yaxis=opts.AxisOpts(\n", + " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\")\n", + " )\n", + " )\n", + "bar.set_global_opts(\n", + " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n", + " title_opts=opts.TitleOpts(title=\"全国企业招聘情况\", subtitle=\"\"),\n", + " yaxis_opts=opts.AxisOpts(\n", + " axislabel_opts=opts.LabelOpts(formatter=\"{value}个\")\n", + " ),\n", + " # datazoom_opts=opts.DataZoomOpts(),\n", + ")\n", + "bar.set_series_opts(\n", + " label_opts=opts.LabelOpts(is_show=True)\n", + ")\n", + "bar2 = Bar().add_xaxis(all_company['company_name'].values.tolist()[:10])\n", + "bar2.add_yaxis(\"全国平均招聘薪资\", all_company['avg_salary'].values.tolist()[:10], yaxis_index=1)\n", + "bar.overlap(bar2)\n", + "bar.render_notebook()" + ] + }, + { + "cell_type": "code", + "execution_count": 307, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 307, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bar = Bar()\n", + "bar.add_xaxis(all_company['company_name'].values.tolist()[2:20])\n", + "bar.add_yaxis(\"全国招聘岗位数量\", all_company['counts'].values.tolist()[2:20])\n", + "# bar.add_yaxis(\"平均招聘薪资\", beijing_company['avg_salary'].values.tolist()[:10])\n", + "bar.extend_axis(\n", + " yaxis=opts.AxisOpts(\n", + " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\")\n", + " )\n", + " )\n", + "bar.set_global_opts(\n", + " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n", + " title_opts=opts.TitleOpts(title=\"全国企业招聘情况\", subtitle=\"\"),\n", + " yaxis_opts=opts.AxisOpts(\n", + " axislabel_opts=opts.LabelOpts(formatter=\"{value}个\")\n", + " ),\n", + " # datazoom_opts=opts.DataZoomOpts(),\n", + ")\n", + "bar.set_series_opts(\n", + " label_opts=opts.LabelOpts(is_show=True)\n", + ")\n", + "bar2 = Bar().add_xaxis(all_company['company_name'].values.tolist()[2:20])\n", + "bar2.add_yaxis(\"全国平均招聘薪资\", all_company['avg_salary'].values.tolist()[2:20], yaxis_index=1)\n", + "bar.overlap(bar2)\n", + "bar.render_notebook()" + ] + }, + { + "cell_type": "code", + "execution_count": 313, + "metadata": {}, + "outputs": [], + "source": [ + "all_sort = all_data.sort_values(by='avg_salary', ascending=False)\n", + "all_job_sort_by_salary = all_sort[['company_name', 'avg_salary', 'job_name', 'job_type']]\n", + "all_job_type = all_job_sort_by_salary['job_type'][:30].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 314, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 314, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pie = Pie()\n", + "pie.add(\"\", [list(z) for z in zip(all_job_type.index.tolist(), all_job_type.values.tolist())],\n", + " radius=[\"30%\", \"75%\"],\n", + " center=[\"40%\", \"50%\"],\n", + " rosetype=\"radius\")\n", + "pie.set_global_opts(\n", + " title_opts=opts.TitleOpts(title=\"全国高薪岗位分布\"),\n", + " legend_opts=opts.LegendOpts(\n", + " type_=\"scroll\", pos_left=\"80%\", orient=\"vertical\"\n", + " ),\n", + " )\n", + "pie.render_notebook()" + ] + }, + { + "cell_type": "code", + "execution_count": 301, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
job_typeavg_salarycounts
0java18.0773812.688
1产品经理17.3895092.688
2python16.7130822.018
3数据分析14.8046072.431
\n", + "
" + ], + "text/plain": [ + " job_type avg_salary counts\n", + "0 java 18.077381 2.688\n", + "1 产品经理 17.389509 2.688\n", + "2 python 16.713082 2.018\n", + "3 数据分析 14.804607 2.431" + ] + }, + "execution_count": 301, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "all_jobtype_avg_salary = all_data.groupby('job_type').mean()\n", + "all_jobtype_avg_salary.reset_index(inplace=True)\n", + "\n", + "all_jobtype_counts = all_data.groupby('job_type').size()\n", + "dict_site = {'job_type': all_jobtype_counts.index, 'counts': all_jobtype_counts.values/1000}\n", + "all_job_counts_data = pd.DataFrame(dict_site)\n", + "all_jobtype_avg_salary.sort_values(by='avg_salary', ascending=False, inplace=True)\n", + "all_jobtype_data = pd.merge(all_jobtype_avg_salary, all_job_counts_data, on='job_type')\n", + "all_jobtype_data" + ] + }, + { + "cell_type": "code", + "execution_count": 297, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "job_type\n", + "java 2688\n", + "python 2018\n", + "产品经理 2688\n", + "数据分析 2431\n", + "dtype: int64" + ] + }, + "execution_count": 297, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "all_jobtype_counts = all_data.groupby('job_type').size()\n", + "all_jobtype_counts" + ] + }, + { + "cell_type": "code", + "execution_count": 303, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 303, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bar = Bar()\n", + "bar.add_xaxis(all_jobtype_data['job_type'].values.tolist()[:10])\n", + "bar.add_yaxis(\"全国企业岗位平均招聘薪资\", all_jobtype_data['avg_salary'].values.tolist()[:10])\n", + "bar.add_yaxis(\"全国企业岗位招聘数量\", all_jobtype_data['counts'].values.tolist()[:10])\n", + "bar.reversal_axis()\n", + "bar.set_global_opts(\n", + " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30, formatter=\"{value}K\")),\n", + " title_opts=opts.TitleOpts(title=\"\", subtitle=\"\"),\n", + " yaxis_opts=opts.AxisOpts(\n", + " axislabel_opts=opts.LabelOpts()\n", + " ),\n", + " # datazoom_opts=opts.DataZoomOpts(),\n", + ")\n", + "bar.set_series_opts(\n", + " label_opts=opts.LabelOpts(is_show=True, position=\"right\")\n", + ")\n", + "bar.render_notebook()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# 分城市分析" + ] + }, + { + "cell_type": "code", + "execution_count": 250, + "metadata": {}, + "outputs": [], + "source": [ + "beijing['avg_salary'] = beijing['salary'].apply(get_num)\n", + "shanghai['avg_salary'] = shanghai['salary'].apply(get_num)\n", + "shenzhen['avg_salary'] = shenzhen['salary'].apply(get_num)\n", + "hangzhou['avg_salary'] = hangzhou['salary'].apply(get_num)\n", + "\n", + "guangzhou['avg_salary'] = guangzhou['salary'].apply(get_num)\n", + "nanjing['avg_salary'] = nanjing['salary'].apply(get_num)\n", + "xian['avg_salary'] = xian['salary'].apply(get_num)\n", + "wuhan['avg_salary'] = wuhan['salary'].apply(get_num)\n", + "chengdu['avg_salary'] = chengdu['salary'].apply(get_num)" + ] + }, + { + "cell_type": "code", + "execution_count": 316, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "beijing_job_count = beijing.groupby('company_name').size()\n", + "dict_site = {'company_name': beijing_job_count.index, 'counts': beijing_job_count.values}\n", + "beijing_job_count_data = pd.DataFrame(dict_site)\n", + "\n", + "shanghai_job_count = shanghai.groupby('company_name').size()\n", + "dict_site = {'company_name': shanghai_job_count.index, 'counts': shanghai_job_count.values}\n", + "shanghai_job_count_data = pd.DataFrame(dict_site)\n", + "\n", + "shenzhen_job_count = shenzhen.groupby('company_name').size()\n", + "dict_site = {'company_name': shenzhen_job_count.index, 'counts': shenzhen_job_count.values}\n", + "shenzhen_job_count_data = pd.DataFrame(dict_site)\n", + "\n", + "hangzhou_job_count = hangzhou.groupby('company_name').size()\n", + "dict_site = {'company_name': hangzhou_job_count.index, 'counts': hangzhou_job_count.values}\n", + "hangzhou_job_count_data = pd.DataFrame(dict_site)" + ] + }, + { + "cell_type": "code", + "execution_count": 256, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "beijing_company_avg_salary = beijing.groupby('company_name').mean()\n", + "beijing_company_avg_salary.reset_index(inplace=True)\n", + "\n", + "beijing_company_avg_salary.sort_values(by='avg_salary', ascending=False, inplace=True)\n", + "\n", + "beijing_company_avg_salary['avg_salary'] = beijing_company_avg_salary['avg_salary'].apply(lambda x: round(x, 2))\n", + "beijing_company_avg_salary_counts = pd.merge(beijing_company_avg_salary, beijing_job_count_data, on='company_name')\n", + "beijing_company = beijing_company_avg_salary_counts.sort_values(by='counts', ascending=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 317, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "shanghai_company_avg_salary = shanghai.groupby('company_name').mean()\n", + "shanghai_company_avg_salary.reset_index(inplace=True)\n", + "\n", + "shanghai_company_avg_salary.sort_values(by='avg_salary', ascending=False, inplace=True)\n", + "\n", + "shanghai_company_avg_salary['avg_salary'] = shanghai_company_avg_salary['avg_salary'].apply(lambda x: round(x, 2))\n", + "shanghai_company_avg_salary_counts = pd.merge(shanghai_company_avg_salary, shanghai_job_count_data, on='company_name')\n", + "shanghai_company = shanghai_company_avg_salary_counts.sort_values(by='counts', ascending=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 318, + "metadata": {}, + "outputs": [], + "source": [ + "shenzhen_company_avg_salary = shenzhen.groupby('company_name').mean()\n", + "shenzhen_company_avg_salary.reset_index(inplace=True)\n", + "\n", + "shenzhen_company_avg_salary.sort_values(by='avg_salary', ascending=False, inplace=True)\n", + "\n", + "shenzhen_company_avg_salary['avg_salary'] = shenzhen_company_avg_salary['avg_salary'].apply(lambda x: round(x, 2))\n", + "shenzhen_company_avg_salary_counts = pd.merge(shenzhen_company_avg_salary, shenzhen_job_count_data, on='company_name')\n", + "shenzhen_company = shenzhen_company_avg_salary_counts.sort_values(by='counts', ascending=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 319, + "metadata": {}, + "outputs": [], + "source": [ + "hangzhou_company_avg_salary = hangzhou.groupby('company_name').mean()\n", + "hangzhou_company_avg_salary.reset_index(inplace=True)\n", + "\n", + "hangzhou_company_avg_salary.sort_values(by='avg_salary', ascending=False, inplace=True)\n", + "\n", + "hangzhou_company_avg_salary['avg_salary'] = hangzhou_company_avg_salary['avg_salary'].apply(lambda x: round(x, 2))\n", + "hangzhou_company_avg_salary_counts = pd.merge(hangzhou_company_avg_salary, hangzhou_job_count_data, on='company_name')\n", + "hangzhou_company = hangzhou_company_avg_salary_counts.sort_values(by='counts', ascending=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 142, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 142, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bar = Bar()\n", + "bar.add_xaxis(beijing_company['company_name'].values.tolist()[:10])\n", + "bar.add_yaxis(\"招聘岗位数量\", beijing_company['counts'].values.tolist()[:10])\n", + "# bar.add_yaxis(\"平均招聘薪资\", beijing_company['avg_salary'].values.tolist()[:10])\n", + "bar.extend_axis(\n", + " yaxis=opts.AxisOpts(\n", + " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\")\n", + " )\n", + " )\n", + "bar.set_global_opts(\n", + " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n", + " title_opts=opts.TitleOpts(title=\"北京企业招聘情况\", subtitle=\"\"),\n", + " yaxis_opts=opts.AxisOpts(\n", + " axislabel_opts=opts.LabelOpts(formatter=\"{value}个\")\n", + " ),\n", + " # datazoom_opts=opts.DataZoomOpts(),\n", + ")\n", + "bar.set_series_opts(\n", + " label_opts=opts.LabelOpts(is_show=True)\n", + ")\n", + "bar2 = Bar().add_xaxis(beijing_company['company_name'].values.tolist()[:10])\n", + "bar2.add_yaxis(\"平均招聘薪资\", beijing_company['avg_salary'].values.tolist()[:10], yaxis_index=1)\n", + "bar.overlap(bar2)\n", + "bar.render_notebook()" + ] + }, + { + "cell_type": "code", + "execution_count": 144, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
company_nameavg_salarycounts
0悠易互通50.01
1广联达50.01
2宝誉德47.52
3网易45.01
4忽客45.01
............
622天地在线4.01
623全时天地在线4.01
624明大启微4.01
625视觉互联3.01
626有缘网2.01
\n", + "

627 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " company_name avg_salary counts\n", + "0 悠易互通 50.0 1\n", + "1 广联达 50.0 1\n", + "2 宝誉德 47.5 2\n", + "3 网易 45.0 1\n", + "4 忽客 45.0 1\n", + ".. ... ... ...\n", + "622 天地在线 4.0 1\n", + "623 全时天地在线 4.0 1\n", + "624 明大启微 4.0 1\n", + "625 视觉互联 3.0 1\n", + "626 有缘网 2.0 1\n", + "\n", + "[627 rows x 3 columns]" + ] + }, + "execution_count": 144, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "beijing_company_avg_salary_counts" + ] + }, + { + "cell_type": "code", + "execution_count": 315, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 315, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bar = Bar()\n", + "bar.add_xaxis(beijing_company_avg_salary_counts['company_name'].values.tolist()[:10])\n", + "bar.add_yaxis(\"北京平均招聘薪资\", beijing_company_avg_salary_counts['avg_salary'].values.tolist()[:10])\n", + "bar.add_yaxis(\"北京在招岗位数量\", beijing_company_avg_salary_counts['counts'].values.tolist()[:10])\n", + "bar.set_global_opts(\n", + " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n", + " title_opts=opts.TitleOpts(title=\"\", subtitle=\"\"),\n", + " yaxis_opts=opts.AxisOpts(\n", + " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\")\n", + " ),\n", + " # datazoom_opts=opts.DataZoomOpts(),\n", + ")\n", + "bar.set_series_opts(\n", + " label_opts=opts.LabelOpts(is_show=True)\n", + ")\n", + "bar.render_notebook()" + ] + }, + { + "cell_type": "code", + "execution_count": 321, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 321, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bar = Bar()\n", + "bar.add_xaxis(shanghai_company_avg_salary_counts['company_name'].values.tolist()[:10])\n", + "bar.add_yaxis(\"上海平均招聘薪资\", shanghai_company_avg_salary_counts['avg_salary'].values.tolist()[:10])\n", + "bar.add_yaxis(\"上海在招岗位数量\", shanghai_company_avg_salary_counts['counts'].values.tolist()[:10])\n", + "bar.set_global_opts(\n", + " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n", + " title_opts=opts.TitleOpts(title=\"\", subtitle=\"\"),\n", + " yaxis_opts=opts.AxisOpts(\n", + " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\")\n", + " ),\n", + " # datazoom_opts=opts.DataZoomOpts(),\n", + ")\n", + "bar.set_series_opts(\n", + " label_opts=opts.LabelOpts(is_show=True)\n", + ")\n", + "bar.render_notebook()" + ] + }, + { + "cell_type": "code", + "execution_count": 322, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 322, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bar = Bar()\n", + "bar.add_xaxis(shenzhen_company_avg_salary_counts['company_name'].values.tolist()[:10])\n", + "bar.add_yaxis(\"深圳平均招聘薪资\", shenzhen_company_avg_salary_counts['avg_salary'].values.tolist()[:10])\n", + "bar.add_yaxis(\"深圳在招岗位数量\", shenzhen_company_avg_salary_counts['counts'].values.tolist()[:10])\n", + "bar.set_global_opts(\n", + " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n", + " title_opts=opts.TitleOpts(title=\"\", subtitle=\"\"),\n", + " yaxis_opts=opts.AxisOpts(\n", + " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\")\n", + " ),\n", + " # datazoom_opts=opts.DataZoomOpts(),\n", + ")\n", + "bar.set_series_opts(\n", + " label_opts=opts.LabelOpts(is_show=True)\n", + ")\n", + "bar.render_notebook()" + ] + }, + { + "cell_type": "code", + "execution_count": 323, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 323, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bar = Bar()\n", + "bar.add_xaxis(hangzhou_company_avg_salary_counts['company_name'].values.tolist()[:10])\n", + "bar.add_yaxis(\"杭州平均招聘薪资\", hangzhou_company_avg_salary_counts['avg_salary'].values.tolist()[:10])\n", + "bar.add_yaxis(\"杭州在招岗位数量\", hangzhou_company_avg_salary_counts['counts'].values.tolist()[:10])\n", + "bar.set_global_opts(\n", + " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n", + " title_opts=opts.TitleOpts(title=\"\", subtitle=\"\"),\n", + " yaxis_opts=opts.AxisOpts(\n", + " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\")\n", + " ),\n", + " # datazoom_opts=opts.DataZoomOpts(),\n", + ")\n", + "bar.set_series_opts(\n", + " label_opts=opts.LabelOpts(is_show=True)\n", + ")\n", + "bar.render_notebook()" + ] + }, + { + "cell_type": "code", + "execution_count": 324, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "beijing_sort = beijing.sort_values(by='avg_salary', ascending=False)\n", + "shanghai_sort = shanghai.sort_values(by='avg_salary', ascending=False)\n", + "shenzhen_sort = shenzhen.sort_values(by='avg_salary', ascending=False)\n", + "hangzhou_sort = hangzhou.sort_values(by='avg_salary', ascending=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 325, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "beijing_job_sort_by_salary = beijing_sort[['company_name', 'avg_salary', 'job_name', 'job_type']]\n", + "shanghai_job_sort_by_salary = shanghai_sort[['company_name', 'avg_salary', 'job_name', 'job_type']]\n", + "shenzhen_job_sort_by_salary = shenzhen_sort[['company_name', 'avg_salary', 'job_name', 'job_type']]\n", + "hangzhou_job_sort_by_salary = hangzhou_sort[['company_name', 'avg_salary', 'job_name', 'job_type']]" + ] + }, + { + "cell_type": "code", + "execution_count": 329, + "metadata": {}, + "outputs": [], + "source": [ + "beijing_job_sort_by_salary_salary = beijing_job_sort_by_salary['avg_salary'].values.tolist()[:10]\n", + "beijing_job_sort_by_salary_jobname = beijing_job_sort_by_salary['job_name'].values.tolist()[:10]\n", + "beijing_job_sort_by_salary_company = beijing_job_sort_by_salary['company_name'].values.tolist()[:10]\n", + "beijing_x_index = [beijing_job_sort_by_salary_jobname[i] + \":\" + str(beijing_job_sort_by_salary_salary[i]) for i in range(0, len(beijing_job_sort_by_salary_company))]" + ] + }, + { + "cell_type": "code", + "execution_count": 330, + "metadata": {}, + "outputs": [], + "source": [ + "shanghai_job_sort_by_salary_salary = shanghai_job_sort_by_salary['avg_salary'].values.tolist()[:10]\n", + "shanghai_job_sort_by_salary_jobname = shanghai_job_sort_by_salary['job_name'].values.tolist()[:10]\n", + "shanghai_job_sort_by_salary_company = shanghai_job_sort_by_salary['company_name'].values.tolist()[:10]\n", + "shanghai_x_index = [shanghai_job_sort_by_salary_jobname[i] + \":\" + str(shanghai_job_sort_by_salary_salary[i]) for i in range(0, len(shanghai_job_sort_by_salary_company))]" + ] + }, + { + "cell_type": "code", + "execution_count": 331, + "metadata": {}, + "outputs": [], + "source": [ + "shenzhen_job_sort_by_salary_salary = shenzhen_job_sort_by_salary['avg_salary'].values.tolist()[:10]\n", + "shenzhen_job_sort_by_salary_jobname = shenzhen_job_sort_by_salary['job_name'].values.tolist()[:10]\n", + "shenzhen_job_sort_by_salary_company = shenzhen_job_sort_by_salary['company_name'].values.tolist()[:10]\n", + "shenzhen_x_index = [shenzhen_job_sort_by_salary_jobname[i] + \":\" + str(shenzhen_job_sort_by_salary_salary[i]) for i in range(0, len(shenzhen_job_sort_by_salary_company))]" + ] + }, + { + "cell_type": "code", + "execution_count": 334, + "metadata": {}, + "outputs": [], + "source": [ + "hangzhou_job_sort_by_salary_salary = hangzhou_job_sort_by_salary['avg_salary'].values.tolist()[:10]\n", + "hangzhou_job_sort_by_salary_jobname = hangzhou_job_sort_by_salary['job_name'].values.tolist()[:10]\n", + "hangzhou_job_sort_by_salary_company = hangzhou_job_sort_by_salary['company_name'].values.tolist()[:10]\n", + "hangzhou_x_index = [hangzhou_job_sort_by_salary_jobname[i] + \":\" + str(hangzhou_job_sort_by_salary_salary[i]) for i in range(0, len(hangzhou_job_sort_by_salary_company))]" + ] + }, + { + "cell_type": "code", + "execution_count": 336, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 336, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 单个岗位工资排行\n", + "bar = Bar()\n", + "bar.add_xaxis(beijing_job_sort_by_salary['company_name'].values.tolist()[:10])\n", + "bar.add_yaxis(\"北京招聘薪资排行\", beijing_job_sort_by_salary['avg_salary'].values.tolist()[:10])\n", + "bar.set_global_opts(\n", + " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n", + " title_opts=opts.TitleOpts(title=\"\", subtitle=\"\"),\n", + " yaxis_opts=opts.AxisOpts(\n", + " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\")\n", + " ),\n", + " # datazoom_opts=opts.DataZoomOpts(),\n", + ")\n", + "bar.set_series_opts(\n", + " label_opts=opts.LabelOpts(is_show=True)\n", + ")\n", + "\n", + "# line = Line()\n", + "# line.add_xaxis(x_index)\n", + "# line.add_yaxis(\"\", beijing_job_sort_by_salary['avg_salary'].values.tolist()[:10])\n", + "# line.set_global_opts(\n", + "# xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n", + "# title_opts=opts.TitleOpts(title=\"\", subtitle=\"\"),\n", + "# yaxis_opts=opts.AxisOpts(\n", + "# axislabel_opts=opts.LabelOpts(formatter=\"{value}K\")\n", + "# ),\n", + "# # datazoom_opts=opts.DataZoomOpts(),\n", + "# )\n", + "\n", + "scatter = Scatter()\n", + "scatter.add_xaxis(beijing_x_index)\n", + "scatter.add_yaxis(\"北京高薪岗位\", beijing_job_sort_by_salary['avg_salary'].values.tolist()[:10])\n", + "scatter.set_global_opts(\n", + " title_opts=opts.TitleOpts(title=\"\", pos_top=\"48%\"),\n", + " legend_opts=opts.LegendOpts(pos_top=\"50%\"),\n", + " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n", + " yaxis_opts=opts.AxisOpts(\n", + " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\"),\n", + " min_=50\n", + " ),\n", + " # datazoom_opts=opts.DataZoomOpts(),\n", + ")\n", + "\n", + "grid = Grid()\n", + "grid.add(bar, grid_opts=opts.GridOpts(pos_bottom=\"60%\"))\n", + "# grid.add(line, grid_opts=opts.GridOpts(pos_top=\"60%\"))\n", + "grid.add(scatter, grid_opts=opts.GridOpts(pos_top=\"60%\"))\n", + "grid.render_notebook()" + ] + }, + { + "cell_type": "code", + "execution_count": 338, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 338, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bar = Bar()\n", + "bar.add_xaxis(shanghai_job_sort_by_salary['company_name'].values.tolist()[:10])\n", + "bar.add_yaxis(\"上海招聘薪资排行\", shanghai_job_sort_by_salary['avg_salary'].values.tolist()[:10])\n", + "bar.set_global_opts(\n", + " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n", + " title_opts=opts.TitleOpts(title=\"\", subtitle=\"\"),\n", + " yaxis_opts=opts.AxisOpts(\n", + " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\")\n", + " )\n", + ")\n", + "bar.set_series_opts(\n", + " label_opts=opts.LabelOpts(is_show=True)\n", + ")\n", + "\n", + "scatter = Scatter()\n", + "scatter.add_xaxis(shanghai_x_index)\n", + "scatter.add_yaxis(\"上海高薪岗位\", shanghai_job_sort_by_salary['avg_salary'].values.tolist()[:10])\n", + "scatter.set_global_opts(\n", + " title_opts=opts.TitleOpts(title=\"\", pos_top=\"48%\"),\n", + " legend_opts=opts.LegendOpts(pos_top=\"50%\"),\n", + " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n", + " yaxis_opts=opts.AxisOpts(\n", + " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\"),\n", + " min_=40\n", + " )\n", + ")\n", + "\n", + "grid = Grid()\n", + "grid.add(bar, grid_opts=opts.GridOpts(pos_bottom=\"60%\"))\n", + "grid.add(scatter, grid_opts=opts.GridOpts(pos_top=\"60%\"))\n", + "grid.render_notebook()" + ] + }, + { + "cell_type": "code", + "execution_count": 339, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 339, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bar = Bar()\n", + "bar.add_xaxis(shenzhen_job_sort_by_salary['company_name'].values.tolist()[:10])\n", + "bar.add_yaxis(\"深圳招聘薪资排行\", shenzhen_job_sort_by_salary['avg_salary'].values.tolist()[:10])\n", + "bar.set_global_opts(\n", + " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n", + " title_opts=opts.TitleOpts(title=\"\", subtitle=\"\"),\n", + " yaxis_opts=opts.AxisOpts(\n", + " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\")\n", + " )\n", + ")\n", + "bar.set_series_opts(\n", + " label_opts=opts.LabelOpts(is_show=True)\n", + ")\n", + "\n", + "scatter = Scatter()\n", + "scatter.add_xaxis(shenzhen_x_index)\n", + "scatter.add_yaxis(\"深圳高薪岗位\", shenzhen_job_sort_by_salary['avg_salary'].values.tolist()[:10])\n", + "scatter.set_global_opts(\n", + " title_opts=opts.TitleOpts(title=\"\", pos_top=\"48%\"),\n", + " legend_opts=opts.LegendOpts(pos_top=\"50%\"),\n", + " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n", + " yaxis_opts=opts.AxisOpts(\n", + " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\"),\n", + " min_=40\n", + " )\n", + ")\n", + "\n", + "grid = Grid()\n", + "grid.add(bar, grid_opts=opts.GridOpts(pos_bottom=\"60%\"))\n", + "grid.add(scatter, grid_opts=opts.GridOpts(pos_top=\"60%\"))\n", + "grid.render_notebook()" + ] + }, + { + "cell_type": "code", + "execution_count": 340, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 340, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bar = Bar()\n", + "bar.add_xaxis(hangzhou_job_sort_by_salary['company_name'].values.tolist()[:10])\n", + "bar.add_yaxis(\"杭州招聘薪资排行\", hangzhou_job_sort_by_salary['avg_salary'].values.tolist()[:10])\n", + "bar.set_global_opts(\n", + " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n", + " title_opts=opts.TitleOpts(title=\"\", subtitle=\"\"),\n", + " yaxis_opts=opts.AxisOpts(\n", + " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\")\n", + " )\n", + ")\n", + "bar.set_series_opts(\n", + " label_opts=opts.LabelOpts(is_show=True)\n", + ")\n", + "\n", + "scatter = Scatter()\n", + "scatter.add_xaxis(hangzhou_x_index)\n", + "scatter.add_yaxis(\"杭州高薪岗位\", hangzhou_job_sort_by_salary['avg_salary'].values.tolist()[:10])\n", + "scatter.set_global_opts(\n", + " title_opts=opts.TitleOpts(title=\"\", pos_top=\"48%\"),\n", + " legend_opts=opts.LegendOpts(pos_top=\"50%\"),\n", + " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n", + " yaxis_opts=opts.AxisOpts(\n", + " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\"),\n", + " min_=40\n", + " )\n", + ")\n", + "\n", + "grid = Grid()\n", + "grid.add(bar, grid_opts=opts.GridOpts(pos_bottom=\"60%\"))\n", + "grid.add(scatter, grid_opts=opts.GridOpts(pos_top=\"60%\"))\n", + "grid.render_notebook()" + ] + }, + { + "cell_type": "code", + "execution_count": 233, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 233, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "scatter = Scatter()\n", + "scatter.add_xaxis(x_index)\n", + "scatter.add_yaxis(\"招聘岗位\", beijing_job_sort_by_salary['avg_salary'].values.tolist()[:10])\n", + "scatter.set_global_opts(\n", + " visualmap_opts=opts.VisualMapOpts(type_=\"size\", max_=70, min_=50),\n", + " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n", + " yaxis_opts=opts.AxisOpts(\n", + " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\"),\n", + " min_=50\n", + " ),\n", + " # datazoom_opts=opts.DataZoomOpts(),\n", + ")\n", + "scatter.render_notebook()" + ] + }, + { + "cell_type": "code", + "execution_count": 341, + "metadata": {}, + "outputs": [], + "source": [ + "beijing_job_type = beijing_job_sort_by_salary['job_type'][:10].value_counts()\n", + "shanghai_job_type = shanghai_job_sort_by_salary['job_type'][:10].value_counts()\n", + "shenzhen_job_type = shenzhen_job_sort_by_salary['job_type'][:10].value_counts()\n", + "hangzhou_job_type = hangzhou_job_sort_by_salary['job_type'][:10].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 278, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 278, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pie = Pie()\n", + "pie.add(\"\", [list(z) for z in zip(beijing_job_type.index.tolist(), beijing_job_type.values.tolist())],\n", + " radius=[\"30%\", \"75%\"],\n", + " center=[\"40%\", \"50%\"],\n", + " rosetype=\"radius\")\n", + "pie.set_global_opts(\n", + " title_opts=opts.TitleOpts(title=\"北京高薪岗位分布\"),\n", + " legend_opts=opts.LegendOpts(\n", + " type_=\"scroll\", pos_left=\"80%\", orient=\"vertical\"\n", + " ),\n", + " )\n", + "pie.render_notebook()" + ] + }, + { + "cell_type": "code", + "execution_count": 343, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 343, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pie = Pie()\n", + "pie.add(\"\", [list(z) for z in zip(shanghai_job_type.index.tolist(), shanghai_job_type.values.tolist())],\n", + " radius=[\"30%\", \"75%\"],\n", + " center=[\"40%\", \"50%\"],\n", + " rosetype=\"radius\")\n", + "pie.set_global_opts(\n", + " title_opts=opts.TitleOpts(title=\"上海高薪岗位分布\"),\n", + " legend_opts=opts.LegendOpts(\n", + " type_=\"scroll\", pos_left=\"80%\", orient=\"vertical\"\n", + " ),\n", + " )\n", + "pie.render_notebook()" + ] + }, + { + "cell_type": "code", + "execution_count": 344, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 344, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pie = Pie()\n", + "pie.add(\"\", [list(z) for z in zip(shenzhen_job_type.index.tolist(), shenzhen_job_type.values.tolist())],\n", + " radius=[\"30%\", \"75%\"],\n", + " center=[\"40%\", \"50%\"],\n", + " rosetype=\"radius\")\n", + "pie.set_global_opts(\n", + " title_opts=opts.TitleOpts(title=\"深圳高薪岗位分布\"),\n", + " legend_opts=opts.LegendOpts(\n", + " type_=\"scroll\", pos_left=\"80%\", orient=\"vertical\"\n", + " ),\n", + " )\n", + "pie.render_notebook()" + ] + }, + { + "cell_type": "code", + "execution_count": 345, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 345, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pie = Pie()\n", + "pie.add(\"\", [list(z) for z in zip(hangzhou_job_type.index.tolist(), hangzhou_job_type.values.tolist())],\n", + " radius=[\"30%\", \"75%\"],\n", + " center=[\"40%\", \"50%\"],\n", + " rosetype=\"radius\")\n", + "pie.set_global_opts(\n", + " title_opts=opts.TitleOpts(title=\"杭州高薪岗位分布\"),\n", + " legend_opts=opts.LegendOpts(\n", + " type_=\"scroll\", pos_left=\"80%\", orient=\"vertical\"\n", + " ),\n", + " )\n", + "pie.render_notebook()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.1" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/boss_spider/boss_job.ipynb b/boss_spider/boss_job.ipynb index f569428..96e2942 100644 --- a/boss_spider/boss_job.ipynb +++ b/boss_spider/boss_job.ipynb @@ -310,7 +310,7 @@ "cell_type": "code", "execution_count": 106, "metadata": { - "scrolled": true + "collapsed": true }, "outputs": [ { @@ -389,7 +389,9 @@ { "cell_type": "code", "execution_count": 107, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [ { "data": { @@ -421,7 +423,7 @@ "cell_type": "code", "execution_count": 108, "metadata": { - "scrolled": true + "collapsed": true }, "outputs": [ { @@ -492,7 +494,9 @@ { "cell_type": "code", "execution_count": 109, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [ { "data": { @@ -1639,6 +1643,13 @@ "beijing_product_jobname_pd_word" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "code", "execution_count": 203, @@ -5588,6 +5599,453 @@ "source": [ "wordcloud_base(beijing_product_jobname_pd_word).render_notebook()" ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "BOSS直聘 52\n", + "京东集团 45\n", + "京东数字科技 18\n", + "VIPKID 18\n", + "旷视MEGVII 14\n", + " ..\n", + "IMS 1\n", + "金吉列留学 1\n", + "安一恒通 1\n", + "天启慧眼 1\n", + "腾信软创科技 1\n", + "Name: company_name, Length: 627, dtype: int64" + ] + }, + "execution_count": 62, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "beijing['company_name'].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 70, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bar = Bar()\n", + "bar.add_xaxis(beijing['company_name'].value_counts().index.tolist()[:10])\n", + "bar.add_yaxis(\"北京\", beijing['company_name'].value_counts().values.tolist()[:10])\n", + "bar.set_global_opts(\n", + " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n", + " title_opts=opts.TitleOpts(title=\"北京企业招聘岗位数量\", subtitle=\"\"),\n", + " yaxis_opts=opts.AxisOpts(\n", + " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\")\n", + " ),\n", + " # datazoom_opts=opts.DataZoomOpts(),\n", + ")\n", + "bar.set_series_opts(\n", + " label_opts=opts.LabelOpts(is_show=True)\n", + ")\n", + "bar.render_notebook()" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 71, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bar = Bar()\n", + "bar.add_xaxis(nanjing['company_name'].value_counts().index.tolist()[:10])\n", + "bar.add_yaxis(\"南京\", nanjing['company_name'].value_counts().values.tolist()[:10])\n", + "bar.set_global_opts(\n", + " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n", + " title_opts=opts.TitleOpts(title=\"南京企业招聘岗位数量\", subtitle=\"\"),\n", + " yaxis_opts=opts.AxisOpts(\n", + " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\")\n", + " ),\n", + " # datazoom_opts=opts.DataZoomOpts(),\n", + ")\n", + "bar.set_series_opts(\n", + " label_opts=opts.LabelOpts(is_show=True)\n", + ")\n", + "bar.render_notebook()" + ] } ], "metadata": { diff --git a/boss_spider/company_of_job.ipynb b/boss_spider/company_of_job.ipynb new file mode 100644 index 0000000..43c0205 --- /dev/null +++ b/boss_spider/company_of_job.ipynb @@ -0,0 +1,5837 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 273, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from pyecharts.charts import Bar, Geo, WordCloud, Grid, Line, Scatter, Pie\n", + "from pyecharts import options as opts\n", + "from pyecharts.globals import ChartType, SymbolType" + ] + }, + { + "cell_type": "code", + "execution_count": 247, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "beijing = pd.read_csv(\"beijing_data.csv\")\n", + "shanghai = pd.read_csv(\"shanghai_data.csv\")\n", + "shenzhen = pd.read_csv(\"shenzhen_data.csv\")\n", + "guangzhou = pd.read_csv(\"guangzhou_data.csv\")\n", + "hangzhou = pd.read_csv(\"hangzhou_data.csv\")\n", + "nanjing = pd.read_csv(\"nanjing_data.csv\")\n", + "wuhan = pd.read_csv(\"wuhan_data.csv\")\n", + "xian = pd.read_csv(\"xian_data.csv\")\n", + "chengdu = pd.read_csv(\"chengdu_data.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 255, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
company_nameurisalarysiteyearedujob_namecityjob_typeavg_salary
0中国电信云https://www.zhipin.com/job_detail/11266fc18dc1...20-40K·17薪北京 海淀区 西山经验不限本科Pythonbeijingpython30
1奇虎360https://www.zhipin.com/job_detail/2a3103941dc2...20-40K·15薪北京 朝阳区 酒仙桥3-5年大专Pythonbeijingpython30
2VIPKIDhttps://www.zhipin.com/job_detail/2dd7f2760947...20-40K·14薪北京 朝阳区 十里堡5-10年本科Pythonbeijingpython30
3天阳科技https://www.zhipin.com/job_detail/a0c8485a448b...12-24K北京 石景山区 八宝山1-3年本科python工程师beijingpython18
4武汉佰钧成https://www.zhipin.com/job_detail/d6627bf7c1e2...12-17K北京 朝阳区 三元桥3-5年大专python开发beijingpython14
.................................
9820第壹街舞https://www.zhipin.com/job_detail/23f4fcf1e936...15-30K·13薪成都 武侯区 跳伞塔3-5年本科产品经理-ERP项目经理chengdu产品经理22
9821易停车https://www.zhipin.com/job_detail/1bde3c5dc4c9...6-8K成都3-5年大专产品经理chengdu产品经理7
9822金瑞麒科技https://www.zhipin.com/job_detail/333b26dcf515...10-15K成都 双流区 华阳3-5年本科产品经理chengdu产品经理12
9823朗培商学院https://www.zhipin.com/job_detail/b711ea9545d2...11-20K成都3-5年大专中高级移动产品经理chengdu产品经理15
9824微品致远https://www.zhipin.com/job_detail/03168bcffeca...8-11K成都 武侯区 高升桥1-3年本科产品经理(成都)chengdu产品经理9
\n", + "

9825 rows × 10 columns

\n", + "
" + ], + "text/plain": [ + " company_name uri \\\n", + "0 中国电信云 https://www.zhipin.com/job_detail/11266fc18dc1... \n", + "1 奇虎360 https://www.zhipin.com/job_detail/2a3103941dc2... \n", + "2 VIPKID https://www.zhipin.com/job_detail/2dd7f2760947... \n", + "3 天阳科技 https://www.zhipin.com/job_detail/a0c8485a448b... \n", + "4 武汉佰钧成 https://www.zhipin.com/job_detail/d6627bf7c1e2... \n", + "... ... ... \n", + "9820 第壹街舞 https://www.zhipin.com/job_detail/23f4fcf1e936... \n", + "9821 易停车 https://www.zhipin.com/job_detail/1bde3c5dc4c9... \n", + "9822 金瑞麒科技 https://www.zhipin.com/job_detail/333b26dcf515... \n", + "9823 朗培商学院 https://www.zhipin.com/job_detail/b711ea9545d2... \n", + "9824 微品致远 https://www.zhipin.com/job_detail/03168bcffeca... \n", + "\n", + " salary site year edu job_name city job_type \\\n", + "0 20-40K·17薪 北京 海淀区 西山 经验不限 本科 Python beijing python \n", + "1 20-40K·15薪 北京 朝阳区 酒仙桥 3-5年 大专 Python beijing python \n", + "2 20-40K·14薪 北京 朝阳区 十里堡 5-10年 本科 Python beijing python \n", + "3 12-24K 北京 石景山区 八宝山 1-3年 本科 python工程师 beijing python \n", + "4 12-17K 北京 朝阳区 三元桥 3-5年 大专 python开发 beijing python \n", + "... ... ... ... .. ... ... ... \n", + "9820 15-30K·13薪 成都 武侯区 跳伞塔 3-5年 本科 产品经理-ERP项目经理 chengdu 产品经理 \n", + "9821 6-8K 成都 3-5年 大专 产品经理 chengdu 产品经理 \n", + "9822 10-15K 成都 双流区 华阳 3-5年 本科 产品经理 chengdu 产品经理 \n", + "9823 11-20K 成都 3-5年 大专 中高级移动产品经理 chengdu 产品经理 \n", + "9824 8-11K 成都 武侯区 高升桥 1-3年 本科 产品经理(成都) chengdu 产品经理 \n", + "\n", + " avg_salary \n", + "0 30 \n", + "1 30 \n", + "2 30 \n", + "3 18 \n", + "4 14 \n", + "... ... \n", + "9820 22 \n", + "9821 7 \n", + "9822 12 \n", + "9823 15 \n", + "9824 9 \n", + "\n", + "[9825 rows x 10 columns]" + ] + }, + "execution_count": 255, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 合并城市数据\n", + "all_data = pd.concat([beijing, shanghai, shenzhen, guangzhou, hangzhou, nanjing, wuhan, xian, chengdu], ignore_index=True)\n", + "all_data" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "import re" + ] + }, + { + "cell_type": "code", + "execution_count": 108, + "metadata": {}, + "outputs": [], + "source": [ + "def get_num(mystr):\n", + " rege = r'(\\d+)-(\\d+)K'\n", + " res = re.match(rege, mystr)\n", + " result = (int(res.group(1)) + int(res.group(2)))/2\n", + " return int(result)" + ] + }, + { + "cell_type": "code", + "execution_count": 257, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
company_nameurisalarysiteyearedujob_namecityjob_typeavg_salary
0中国电信云https://www.zhipin.com/job_detail/11266fc18dc1...20-40K·17薪北京 海淀区 西山经验不限本科Pythonbeijingpython30
1奇虎360https://www.zhipin.com/job_detail/2a3103941dc2...20-40K·15薪北京 朝阳区 酒仙桥3-5年大专Pythonbeijingpython30
2VIPKIDhttps://www.zhipin.com/job_detail/2dd7f2760947...20-40K·14薪北京 朝阳区 十里堡5-10年本科Pythonbeijingpython30
3天阳科技https://www.zhipin.com/job_detail/a0c8485a448b...12-24K北京 石景山区 八宝山1-3年本科python工程师beijingpython18
4武汉佰钧成https://www.zhipin.com/job_detail/d6627bf7c1e2...12-17K北京 朝阳区 三元桥3-5年大专python开发beijingpython14
.................................
9820第壹街舞https://www.zhipin.com/job_detail/23f4fcf1e936...15-30K·13薪成都 武侯区 跳伞塔3-5年本科产品经理-ERP项目经理chengdu产品经理22
9821易停车https://www.zhipin.com/job_detail/1bde3c5dc4c9...6-8K成都3-5年大专产品经理chengdu产品经理7
9822金瑞麒科技https://www.zhipin.com/job_detail/333b26dcf515...10-15K成都 双流区 华阳3-5年本科产品经理chengdu产品经理12
9823朗培商学院https://www.zhipin.com/job_detail/b711ea9545d2...11-20K成都3-5年大专中高级移动产品经理chengdu产品经理15
9824微品致远https://www.zhipin.com/job_detail/03168bcffeca...8-11K成都 武侯区 高升桥1-3年本科产品经理(成都)chengdu产品经理9
\n", + "

9825 rows × 10 columns

\n", + "
" + ], + "text/plain": [ + " company_name uri \\\n", + "0 中国电信云 https://www.zhipin.com/job_detail/11266fc18dc1... \n", + "1 奇虎360 https://www.zhipin.com/job_detail/2a3103941dc2... \n", + "2 VIPKID https://www.zhipin.com/job_detail/2dd7f2760947... \n", + "3 天阳科技 https://www.zhipin.com/job_detail/a0c8485a448b... \n", + "4 武汉佰钧成 https://www.zhipin.com/job_detail/d6627bf7c1e2... \n", + "... ... ... \n", + "9820 第壹街舞 https://www.zhipin.com/job_detail/23f4fcf1e936... \n", + "9821 易停车 https://www.zhipin.com/job_detail/1bde3c5dc4c9... \n", + "9822 金瑞麒科技 https://www.zhipin.com/job_detail/333b26dcf515... \n", + "9823 朗培商学院 https://www.zhipin.com/job_detail/b711ea9545d2... \n", + "9824 微品致远 https://www.zhipin.com/job_detail/03168bcffeca... \n", + "\n", + " salary site year edu job_name city job_type \\\n", + "0 20-40K·17薪 北京 海淀区 西山 经验不限 本科 Python beijing python \n", + "1 20-40K·15薪 北京 朝阳区 酒仙桥 3-5年 大专 Python beijing python \n", + "2 20-40K·14薪 北京 朝阳区 十里堡 5-10年 本科 Python beijing python \n", + "3 12-24K 北京 石景山区 八宝山 1-3年 本科 python工程师 beijing python \n", + "4 12-17K 北京 朝阳区 三元桥 3-5年 大专 python开发 beijing python \n", + "... ... ... ... .. ... ... ... \n", + "9820 15-30K·13薪 成都 武侯区 跳伞塔 3-5年 本科 产品经理-ERP项目经理 chengdu 产品经理 \n", + "9821 6-8K 成都 3-5年 大专 产品经理 chengdu 产品经理 \n", + "9822 10-15K 成都 双流区 华阳 3-5年 本科 产品经理 chengdu 产品经理 \n", + "9823 11-20K 成都 3-5年 大专 中高级移动产品经理 chengdu 产品经理 \n", + "9824 8-11K 成都 武侯区 高升桥 1-3年 本科 产品经理(成都) chengdu 产品经理 \n", + "\n", + " avg_salary \n", + "0 30 \n", + "1 30 \n", + "2 30 \n", + "3 18 \n", + "4 14 \n", + "... ... \n", + "9820 22 \n", + "9821 7 \n", + "9822 12 \n", + "9823 15 \n", + "9824 9 \n", + "\n", + "[9825 rows x 10 columns]" + ] + }, + "execution_count": 257, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "all_data['avg_salary'] = all_data['salary'].apply(get_num)\n", + "all_data" + ] + }, + { + "cell_type": "code", + "execution_count": 264, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
company_namecounts
0100教育2
1100课堂1
21药网2
331会议网2
43601
.........
4794齐聚科技1
4795龙之力2
4796龙信科技1
4797龙婴本铺1
4798龙渊网络科技有限公司4
\n", + "

4799 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " company_name counts\n", + "0 100教育 2\n", + "1 100课堂 1\n", + "2 1药网 2\n", + "3 31会议网 2\n", + "4 360 1\n", + "... ... ...\n", + "4794 齐聚科技 1\n", + "4795 龙之力 2\n", + "4796 龙信科技 1\n", + "4797 龙婴本铺 1\n", + "4798 龙渊网络科技有限公司 4\n", + "\n", + "[4799 rows x 2 columns]" + ] + }, + "execution_count": 264, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "all_company_avg_salary = all_data.groupby('company_name').mean()\n", + "all_company_avg_salary.reset_index(inplace=True)\n", + "\n", + "all_company_avg_salary.sort_values(by='avg_salary', ascending=False, inplace=True)\n", + "\n", + "all_job_count = all_data.groupby('company_name').size()\n", + "dict_site = {'company_name': all_job_count.index, 'counts': all_job_count.values}\n", + "all_job_count_data = pd.DataFrame(dict_site)\n", + "all_job_count_data" + ] + }, + { + "cell_type": "code", + "execution_count": 310, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "all_company_avg_salary['avg_salary'] = all_company_avg_salary['avg_salary'].apply(lambda x: round(x, 2))\n", + "all_company_avg_salary_counts = pd.merge(all_company_avg_salary, all_job_count_data, on='company_name')\n", + "all_company = all_company_avg_salary_counts.sort_values(by='counts', ascending=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 269, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 269, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bar = Bar()\n", + "bar.add_xaxis(all_company_avg_salary_counts['company_name'].values.tolist()[:10])\n", + "bar.add_yaxis(\"全国企业平均招聘薪资\", all_company_avg_salary_counts['avg_salary'].values.tolist()[:10])\n", + "bar.add_yaxis(\"全国企业在招岗位数量\", all_company_avg_salary_counts['counts'].values.tolist()[:10])\n", + "bar.set_global_opts(\n", + " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n", + " title_opts=opts.TitleOpts(title=\"\", subtitle=\"\"),\n", + " yaxis_opts=opts.AxisOpts(\n", + " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\")\n", + " ),\n", + " # datazoom_opts=opts.DataZoomOpts(),\n", + ")\n", + "bar.set_series_opts(\n", + " label_opts=opts.LabelOpts(is_show=True)\n", + ")\n", + "bar.render_notebook()" + ] + }, + { + "cell_type": "code", + "execution_count": 305, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 305, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bar = Bar()\n", + "bar.add_xaxis(all_company['company_name'].values.tolist()[:10])\n", + "bar.add_yaxis(\"全国招聘岗位数量\", all_company['counts'].values.tolist()[:10])\n", + "# bar.add_yaxis(\"平均招聘薪资\", beijing_company['avg_salary'].values.tolist()[:10])\n", + "bar.extend_axis(\n", + " yaxis=opts.AxisOpts(\n", + " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\")\n", + " )\n", + " )\n", + "bar.set_global_opts(\n", + " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n", + " title_opts=opts.TitleOpts(title=\"全国企业招聘情况\", subtitle=\"\"),\n", + " yaxis_opts=opts.AxisOpts(\n", + " axislabel_opts=opts.LabelOpts(formatter=\"{value}个\")\n", + " ),\n", + " # datazoom_opts=opts.DataZoomOpts(),\n", + ")\n", + "bar.set_series_opts(\n", + " label_opts=opts.LabelOpts(is_show=True)\n", + ")\n", + "bar2 = Bar().add_xaxis(all_company['company_name'].values.tolist()[:10])\n", + "bar2.add_yaxis(\"全国平均招聘薪资\", all_company['avg_salary'].values.tolist()[:10], yaxis_index=1)\n", + "bar.overlap(bar2)\n", + "bar.render_notebook()" + ] + }, + { + "cell_type": "code", + "execution_count": 307, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 307, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bar = Bar()\n", + "bar.add_xaxis(all_company['company_name'].values.tolist()[2:20])\n", + "bar.add_yaxis(\"全国招聘岗位数量\", all_company['counts'].values.tolist()[2:20])\n", + "# bar.add_yaxis(\"平均招聘薪资\", beijing_company['avg_salary'].values.tolist()[:10])\n", + "bar.extend_axis(\n", + " yaxis=opts.AxisOpts(\n", + " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\")\n", + " )\n", + " )\n", + "bar.set_global_opts(\n", + " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n", + " title_opts=opts.TitleOpts(title=\"全国企业招聘情况\", subtitle=\"\"),\n", + " yaxis_opts=opts.AxisOpts(\n", + " axislabel_opts=opts.LabelOpts(formatter=\"{value}个\")\n", + " ),\n", + " # datazoom_opts=opts.DataZoomOpts(),\n", + ")\n", + "bar.set_series_opts(\n", + " label_opts=opts.LabelOpts(is_show=True)\n", + ")\n", + "bar2 = Bar().add_xaxis(all_company['company_name'].values.tolist()[2:20])\n", + "bar2.add_yaxis(\"全国平均招聘薪资\", all_company['avg_salary'].values.tolist()[2:20], yaxis_index=1)\n", + "bar.overlap(bar2)\n", + "bar.render_notebook()" + ] + }, + { + "cell_type": "code", + "execution_count": 313, + "metadata": {}, + "outputs": [], + "source": [ + "all_sort = all_data.sort_values(by='avg_salary', ascending=False)\n", + "all_job_sort_by_salary = all_sort[['company_name', 'avg_salary', 'job_name', 'job_type']]\n", + "all_job_type = all_job_sort_by_salary['job_type'][:30].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 314, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 314, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pie = Pie()\n", + "pie.add(\"\", [list(z) for z in zip(all_job_type.index.tolist(), all_job_type.values.tolist())],\n", + " radius=[\"30%\", \"75%\"],\n", + " center=[\"40%\", \"50%\"],\n", + " rosetype=\"radius\")\n", + "pie.set_global_opts(\n", + " title_opts=opts.TitleOpts(title=\"全国高薪岗位分布\"),\n", + " legend_opts=opts.LegendOpts(\n", + " type_=\"scroll\", pos_left=\"80%\", orient=\"vertical\"\n", + " ),\n", + " )\n", + "pie.render_notebook()" + ] + }, + { + "cell_type": "code", + "execution_count": 301, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
job_typeavg_salarycounts
0java18.0773812.688
1产品经理17.3895092.688
2python16.7130822.018
3数据分析14.8046072.431
\n", + "
" + ], + "text/plain": [ + " job_type avg_salary counts\n", + "0 java 18.077381 2.688\n", + "1 产品经理 17.389509 2.688\n", + "2 python 16.713082 2.018\n", + "3 数据分析 14.804607 2.431" + ] + }, + "execution_count": 301, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "all_jobtype_avg_salary = all_data.groupby('job_type').mean()\n", + "all_jobtype_avg_salary.reset_index(inplace=True)\n", + "\n", + "all_jobtype_counts = all_data.groupby('job_type').size()\n", + "dict_site = {'job_type': all_jobtype_counts.index, 'counts': all_jobtype_counts.values/1000}\n", + "all_job_counts_data = pd.DataFrame(dict_site)\n", + "all_jobtype_avg_salary.sort_values(by='avg_salary', ascending=False, inplace=True)\n", + "all_jobtype_data = pd.merge(all_jobtype_avg_salary, all_job_counts_data, on='job_type')\n", + "all_jobtype_data" + ] + }, + { + "cell_type": "code", + "execution_count": 297, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "job_type\n", + "java 2688\n", + "python 2018\n", + "产品经理 2688\n", + "数据分析 2431\n", + "dtype: int64" + ] + }, + "execution_count": 297, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "all_jobtype_counts = all_data.groupby('job_type').size()\n", + "all_jobtype_counts" + ] + }, + { + "cell_type": "code", + "execution_count": 303, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 303, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bar = Bar()\n", + "bar.add_xaxis(all_jobtype_data['job_type'].values.tolist()[:10])\n", + "bar.add_yaxis(\"全国企业岗位平均招聘薪资\", all_jobtype_data['avg_salary'].values.tolist()[:10])\n", + "bar.add_yaxis(\"全国企业岗位招聘数量\", all_jobtype_data['counts'].values.tolist()[:10])\n", + "bar.reversal_axis()\n", + "bar.set_global_opts(\n", + " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30, formatter=\"{value}K\")),\n", + " title_opts=opts.TitleOpts(title=\"\", subtitle=\"\"),\n", + " yaxis_opts=opts.AxisOpts(\n", + " axislabel_opts=opts.LabelOpts()\n", + " ),\n", + " # datazoom_opts=opts.DataZoomOpts(),\n", + ")\n", + "bar.set_series_opts(\n", + " label_opts=opts.LabelOpts(is_show=True, position=\"right\")\n", + ")\n", + "bar.render_notebook()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# 分城市分析" + ] + }, + { + "cell_type": "code", + "execution_count": 250, + "metadata": {}, + "outputs": [], + "source": [ + "beijing['avg_salary'] = beijing['salary'].apply(get_num)\n", + "shanghai['avg_salary'] = shanghai['salary'].apply(get_num)\n", + "shenzhen['avg_salary'] = shenzhen['salary'].apply(get_num)\n", + "hangzhou['avg_salary'] = hangzhou['salary'].apply(get_num)\n", + "\n", + "guangzhou['avg_salary'] = guangzhou['salary'].apply(get_num)\n", + "nanjing['avg_salary'] = nanjing['salary'].apply(get_num)\n", + "xian['avg_salary'] = xian['salary'].apply(get_num)\n", + "wuhan['avg_salary'] = wuhan['salary'].apply(get_num)\n", + "chengdu['avg_salary'] = chengdu['salary'].apply(get_num)" + ] + }, + { + "cell_type": "code", + "execution_count": 316, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "beijing_job_count = beijing.groupby('company_name').size()\n", + "dict_site = {'company_name': beijing_job_count.index, 'counts': beijing_job_count.values}\n", + "beijing_job_count_data = pd.DataFrame(dict_site)\n", + "\n", + "shanghai_job_count = shanghai.groupby('company_name').size()\n", + "dict_site = {'company_name': shanghai_job_count.index, 'counts': shanghai_job_count.values}\n", + "shanghai_job_count_data = pd.DataFrame(dict_site)\n", + "\n", + "shenzhen_job_count = shenzhen.groupby('company_name').size()\n", + "dict_site = {'company_name': shenzhen_job_count.index, 'counts': shenzhen_job_count.values}\n", + "shenzhen_job_count_data = pd.DataFrame(dict_site)\n", + "\n", + "hangzhou_job_count = hangzhou.groupby('company_name').size()\n", + "dict_site = {'company_name': hangzhou_job_count.index, 'counts': hangzhou_job_count.values}\n", + "hangzhou_job_count_data = pd.DataFrame(dict_site)" + ] + }, + { + "cell_type": "code", + "execution_count": 256, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "beijing_company_avg_salary = beijing.groupby('company_name').mean()\n", + "beijing_company_avg_salary.reset_index(inplace=True)\n", + "\n", + "beijing_company_avg_salary.sort_values(by='avg_salary', ascending=False, inplace=True)\n", + "\n", + "beijing_company_avg_salary['avg_salary'] = beijing_company_avg_salary['avg_salary'].apply(lambda x: round(x, 2))\n", + "beijing_company_avg_salary_counts = pd.merge(beijing_company_avg_salary, beijing_job_count_data, on='company_name')\n", + "beijing_company = beijing_company_avg_salary_counts.sort_values(by='counts', ascending=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 317, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "shanghai_company_avg_salary = shanghai.groupby('company_name').mean()\n", + "shanghai_company_avg_salary.reset_index(inplace=True)\n", + "\n", + "shanghai_company_avg_salary.sort_values(by='avg_salary', ascending=False, inplace=True)\n", + "\n", + "shanghai_company_avg_salary['avg_salary'] = shanghai_company_avg_salary['avg_salary'].apply(lambda x: round(x, 2))\n", + "shanghai_company_avg_salary_counts = pd.merge(shanghai_company_avg_salary, shanghai_job_count_data, on='company_name')\n", + "shanghai_company = shanghai_company_avg_salary_counts.sort_values(by='counts', ascending=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 318, + "metadata": {}, + "outputs": [], + "source": [ + "shenzhen_company_avg_salary = shenzhen.groupby('company_name').mean()\n", + "shenzhen_company_avg_salary.reset_index(inplace=True)\n", + "\n", + "shenzhen_company_avg_salary.sort_values(by='avg_salary', ascending=False, inplace=True)\n", + "\n", + "shenzhen_company_avg_salary['avg_salary'] = shenzhen_company_avg_salary['avg_salary'].apply(lambda x: round(x, 2))\n", + "shenzhen_company_avg_salary_counts = pd.merge(shenzhen_company_avg_salary, shenzhen_job_count_data, on='company_name')\n", + "shenzhen_company = shenzhen_company_avg_salary_counts.sort_values(by='counts', ascending=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 319, + "metadata": {}, + "outputs": [], + "source": [ + "hangzhou_company_avg_salary = hangzhou.groupby('company_name').mean()\n", + "hangzhou_company_avg_salary.reset_index(inplace=True)\n", + "\n", + "hangzhou_company_avg_salary.sort_values(by='avg_salary', ascending=False, inplace=True)\n", + "\n", + "hangzhou_company_avg_salary['avg_salary'] = hangzhou_company_avg_salary['avg_salary'].apply(lambda x: round(x, 2))\n", + "hangzhou_company_avg_salary_counts = pd.merge(hangzhou_company_avg_salary, hangzhou_job_count_data, on='company_name')\n", + "hangzhou_company = hangzhou_company_avg_salary_counts.sort_values(by='counts', ascending=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 142, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 142, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bar = Bar()\n", + "bar.add_xaxis(beijing_company['company_name'].values.tolist()[:10])\n", + "bar.add_yaxis(\"招聘岗位数量\", beijing_company['counts'].values.tolist()[:10])\n", + "# bar.add_yaxis(\"平均招聘薪资\", beijing_company['avg_salary'].values.tolist()[:10])\n", + "bar.extend_axis(\n", + " yaxis=opts.AxisOpts(\n", + " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\")\n", + " )\n", + " )\n", + "bar.set_global_opts(\n", + " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n", + " title_opts=opts.TitleOpts(title=\"北京企业招聘情况\", subtitle=\"\"),\n", + " yaxis_opts=opts.AxisOpts(\n", + " axislabel_opts=opts.LabelOpts(formatter=\"{value}个\")\n", + " ),\n", + " # datazoom_opts=opts.DataZoomOpts(),\n", + ")\n", + "bar.set_series_opts(\n", + " label_opts=opts.LabelOpts(is_show=True)\n", + ")\n", + "bar2 = Bar().add_xaxis(beijing_company['company_name'].values.tolist()[:10])\n", + "bar2.add_yaxis(\"平均招聘薪资\", beijing_company['avg_salary'].values.tolist()[:10], yaxis_index=1)\n", + "bar.overlap(bar2)\n", + "bar.render_notebook()" + ] + }, + { + "cell_type": "code", + "execution_count": 144, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
company_nameavg_salarycounts
0悠易互通50.01
1广联达50.01
2宝誉德47.52
3网易45.01
4忽客45.01
............
622天地在线4.01
623全时天地在线4.01
624明大启微4.01
625视觉互联3.01
626有缘网2.01
\n", + "

627 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " company_name avg_salary counts\n", + "0 悠易互通 50.0 1\n", + "1 广联达 50.0 1\n", + "2 宝誉德 47.5 2\n", + "3 网易 45.0 1\n", + "4 忽客 45.0 1\n", + ".. ... ... ...\n", + "622 天地在线 4.0 1\n", + "623 全时天地在线 4.0 1\n", + "624 明大启微 4.0 1\n", + "625 视觉互联 3.0 1\n", + "626 有缘网 2.0 1\n", + "\n", + "[627 rows x 3 columns]" + ] + }, + "execution_count": 144, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "beijing_company_avg_salary_counts" + ] + }, + { + "cell_type": "code", + "execution_count": 315, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 315, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bar = Bar()\n", + "bar.add_xaxis(beijing_company_avg_salary_counts['company_name'].values.tolist()[:10])\n", + "bar.add_yaxis(\"北京平均招聘薪资\", beijing_company_avg_salary_counts['avg_salary'].values.tolist()[:10])\n", + "bar.add_yaxis(\"北京在招岗位数量\", beijing_company_avg_salary_counts['counts'].values.tolist()[:10])\n", + "bar.set_global_opts(\n", + " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n", + " title_opts=opts.TitleOpts(title=\"\", subtitle=\"\"),\n", + " yaxis_opts=opts.AxisOpts(\n", + " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\")\n", + " ),\n", + " # datazoom_opts=opts.DataZoomOpts(),\n", + ")\n", + "bar.set_series_opts(\n", + " label_opts=opts.LabelOpts(is_show=True)\n", + ")\n", + "bar.render_notebook()" + ] + }, + { + "cell_type": "code", + "execution_count": 321, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 321, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bar = Bar()\n", + "bar.add_xaxis(shanghai_company_avg_salary_counts['company_name'].values.tolist()[:10])\n", + "bar.add_yaxis(\"上海平均招聘薪资\", shanghai_company_avg_salary_counts['avg_salary'].values.tolist()[:10])\n", + "bar.add_yaxis(\"上海在招岗位数量\", shanghai_company_avg_salary_counts['counts'].values.tolist()[:10])\n", + "bar.set_global_opts(\n", + " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n", + " title_opts=opts.TitleOpts(title=\"\", subtitle=\"\"),\n", + " yaxis_opts=opts.AxisOpts(\n", + " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\")\n", + " ),\n", + " # datazoom_opts=opts.DataZoomOpts(),\n", + ")\n", + "bar.set_series_opts(\n", + " label_opts=opts.LabelOpts(is_show=True)\n", + ")\n", + "bar.render_notebook()" + ] + }, + { + "cell_type": "code", + "execution_count": 322, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 322, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bar = Bar()\n", + "bar.add_xaxis(shenzhen_company_avg_salary_counts['company_name'].values.tolist()[:10])\n", + "bar.add_yaxis(\"深圳平均招聘薪资\", shenzhen_company_avg_salary_counts['avg_salary'].values.tolist()[:10])\n", + "bar.add_yaxis(\"深圳在招岗位数量\", shenzhen_company_avg_salary_counts['counts'].values.tolist()[:10])\n", + "bar.set_global_opts(\n", + " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n", + " title_opts=opts.TitleOpts(title=\"\", subtitle=\"\"),\n", + " yaxis_opts=opts.AxisOpts(\n", + " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\")\n", + " ),\n", + " # datazoom_opts=opts.DataZoomOpts(),\n", + ")\n", + "bar.set_series_opts(\n", + " label_opts=opts.LabelOpts(is_show=True)\n", + ")\n", + "bar.render_notebook()" + ] + }, + { + "cell_type": "code", + "execution_count": 323, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 323, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bar = Bar()\n", + "bar.add_xaxis(hangzhou_company_avg_salary_counts['company_name'].values.tolist()[:10])\n", + "bar.add_yaxis(\"杭州平均招聘薪资\", hangzhou_company_avg_salary_counts['avg_salary'].values.tolist()[:10])\n", + "bar.add_yaxis(\"杭州在招岗位数量\", hangzhou_company_avg_salary_counts['counts'].values.tolist()[:10])\n", + "bar.set_global_opts(\n", + " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n", + " title_opts=opts.TitleOpts(title=\"\", subtitle=\"\"),\n", + " yaxis_opts=opts.AxisOpts(\n", + " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\")\n", + " ),\n", + " # datazoom_opts=opts.DataZoomOpts(),\n", + ")\n", + "bar.set_series_opts(\n", + " label_opts=opts.LabelOpts(is_show=True)\n", + ")\n", + "bar.render_notebook()" + ] + }, + { + "cell_type": "code", + "execution_count": 324, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "beijing_sort = beijing.sort_values(by='avg_salary', ascending=False)\n", + "shanghai_sort = shanghai.sort_values(by='avg_salary', ascending=False)\n", + "shenzhen_sort = shenzhen.sort_values(by='avg_salary', ascending=False)\n", + "hangzhou_sort = hangzhou.sort_values(by='avg_salary', ascending=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 325, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "beijing_job_sort_by_salary = beijing_sort[['company_name', 'avg_salary', 'job_name', 'job_type']]\n", + "shanghai_job_sort_by_salary = shanghai_sort[['company_name', 'avg_salary', 'job_name', 'job_type']]\n", + "shenzhen_job_sort_by_salary = shenzhen_sort[['company_name', 'avg_salary', 'job_name', 'job_type']]\n", + "hangzhou_job_sort_by_salary = hangzhou_sort[['company_name', 'avg_salary', 'job_name', 'job_type']]" + ] + }, + { + "cell_type": "code", + "execution_count": 329, + "metadata": {}, + "outputs": [], + "source": [ + "beijing_job_sort_by_salary_salary = beijing_job_sort_by_salary['avg_salary'].values.tolist()[:10]\n", + "beijing_job_sort_by_salary_jobname = beijing_job_sort_by_salary['job_name'].values.tolist()[:10]\n", + "beijing_job_sort_by_salary_company = beijing_job_sort_by_salary['company_name'].values.tolist()[:10]\n", + "beijing_x_index = [beijing_job_sort_by_salary_jobname[i] + \":\" + str(beijing_job_sort_by_salary_salary[i]) for i in range(0, len(beijing_job_sort_by_salary_company))]" + ] + }, + { + "cell_type": "code", + "execution_count": 330, + "metadata": {}, + "outputs": [], + "source": [ + "shanghai_job_sort_by_salary_salary = shanghai_job_sort_by_salary['avg_salary'].values.tolist()[:10]\n", + "shanghai_job_sort_by_salary_jobname = shanghai_job_sort_by_salary['job_name'].values.tolist()[:10]\n", + "shanghai_job_sort_by_salary_company = shanghai_job_sort_by_salary['company_name'].values.tolist()[:10]\n", + "shanghai_x_index = [shanghai_job_sort_by_salary_jobname[i] + \":\" + str(shanghai_job_sort_by_salary_salary[i]) for i in range(0, len(shanghai_job_sort_by_salary_company))]" + ] + }, + { + "cell_type": "code", + "execution_count": 331, + "metadata": {}, + "outputs": [], + "source": [ + "shenzhen_job_sort_by_salary_salary = shenzhen_job_sort_by_salary['avg_salary'].values.tolist()[:10]\n", + "shenzhen_job_sort_by_salary_jobname = shenzhen_job_sort_by_salary['job_name'].values.tolist()[:10]\n", + "shenzhen_job_sort_by_salary_company = shenzhen_job_sort_by_salary['company_name'].values.tolist()[:10]\n", + "shenzhen_x_index = [shenzhen_job_sort_by_salary_jobname[i] + \":\" + str(shenzhen_job_sort_by_salary_salary[i]) for i in range(0, len(shenzhen_job_sort_by_salary_company))]" + ] + }, + { + "cell_type": "code", + "execution_count": 334, + "metadata": {}, + "outputs": [], + "source": [ + "hangzhou_job_sort_by_salary_salary = hangzhou_job_sort_by_salary['avg_salary'].values.tolist()[:10]\n", + "hangzhou_job_sort_by_salary_jobname = hangzhou_job_sort_by_salary['job_name'].values.tolist()[:10]\n", + "hangzhou_job_sort_by_salary_company = hangzhou_job_sort_by_salary['company_name'].values.tolist()[:10]\n", + "hangzhou_x_index = [hangzhou_job_sort_by_salary_jobname[i] + \":\" + str(hangzhou_job_sort_by_salary_salary[i]) for i in range(0, len(hangzhou_job_sort_by_salary_company))]" + ] + }, + { + "cell_type": "code", + "execution_count": 336, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 336, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 单个岗位工资排行\n", + "bar = Bar()\n", + "bar.add_xaxis(beijing_job_sort_by_salary['company_name'].values.tolist()[:10])\n", + "bar.add_yaxis(\"北京招聘薪资排行\", beijing_job_sort_by_salary['avg_salary'].values.tolist()[:10])\n", + "bar.set_global_opts(\n", + " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n", + " title_opts=opts.TitleOpts(title=\"\", subtitle=\"\"),\n", + " yaxis_opts=opts.AxisOpts(\n", + " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\")\n", + " ),\n", + " # datazoom_opts=opts.DataZoomOpts(),\n", + ")\n", + "bar.set_series_opts(\n", + " label_opts=opts.LabelOpts(is_show=True)\n", + ")\n", + "\n", + "# line = Line()\n", + "# line.add_xaxis(x_index)\n", + "# line.add_yaxis(\"\", beijing_job_sort_by_salary['avg_salary'].values.tolist()[:10])\n", + "# line.set_global_opts(\n", + "# xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n", + "# title_opts=opts.TitleOpts(title=\"\", subtitle=\"\"),\n", + "# yaxis_opts=opts.AxisOpts(\n", + "# axislabel_opts=opts.LabelOpts(formatter=\"{value}K\")\n", + "# ),\n", + "# # datazoom_opts=opts.DataZoomOpts(),\n", + "# )\n", + "\n", + "scatter = Scatter()\n", + "scatter.add_xaxis(beijing_x_index)\n", + "scatter.add_yaxis(\"北京高薪岗位\", beijing_job_sort_by_salary['avg_salary'].values.tolist()[:10])\n", + "scatter.set_global_opts(\n", + " title_opts=opts.TitleOpts(title=\"\", pos_top=\"48%\"),\n", + " legend_opts=opts.LegendOpts(pos_top=\"50%\"),\n", + " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n", + " yaxis_opts=opts.AxisOpts(\n", + " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\"),\n", + " min_=50\n", + " ),\n", + " # datazoom_opts=opts.DataZoomOpts(),\n", + ")\n", + "\n", + "grid = Grid()\n", + "grid.add(bar, grid_opts=opts.GridOpts(pos_bottom=\"60%\"))\n", + "# grid.add(line, grid_opts=opts.GridOpts(pos_top=\"60%\"))\n", + "grid.add(scatter, grid_opts=opts.GridOpts(pos_top=\"60%\"))\n", + "grid.render_notebook()" + ] + }, + { + "cell_type": "code", + "execution_count": 338, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 338, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bar = Bar()\n", + "bar.add_xaxis(shanghai_job_sort_by_salary['company_name'].values.tolist()[:10])\n", + "bar.add_yaxis(\"上海招聘薪资排行\", shanghai_job_sort_by_salary['avg_salary'].values.tolist()[:10])\n", + "bar.set_global_opts(\n", + " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n", + " title_opts=opts.TitleOpts(title=\"\", subtitle=\"\"),\n", + " yaxis_opts=opts.AxisOpts(\n", + " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\")\n", + " )\n", + ")\n", + "bar.set_series_opts(\n", + " label_opts=opts.LabelOpts(is_show=True)\n", + ")\n", + "\n", + "scatter = Scatter()\n", + "scatter.add_xaxis(shanghai_x_index)\n", + "scatter.add_yaxis(\"上海高薪岗位\", shanghai_job_sort_by_salary['avg_salary'].values.tolist()[:10])\n", + "scatter.set_global_opts(\n", + " title_opts=opts.TitleOpts(title=\"\", pos_top=\"48%\"),\n", + " legend_opts=opts.LegendOpts(pos_top=\"50%\"),\n", + " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n", + " yaxis_opts=opts.AxisOpts(\n", + " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\"),\n", + " min_=40\n", + " )\n", + ")\n", + "\n", + "grid = Grid()\n", + "grid.add(bar, grid_opts=opts.GridOpts(pos_bottom=\"60%\"))\n", + "grid.add(scatter, grid_opts=opts.GridOpts(pos_top=\"60%\"))\n", + "grid.render_notebook()" + ] + }, + { + "cell_type": "code", + "execution_count": 339, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 339, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bar = Bar()\n", + "bar.add_xaxis(shenzhen_job_sort_by_salary['company_name'].values.tolist()[:10])\n", + "bar.add_yaxis(\"深圳招聘薪资排行\", shenzhen_job_sort_by_salary['avg_salary'].values.tolist()[:10])\n", + "bar.set_global_opts(\n", + " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n", + " title_opts=opts.TitleOpts(title=\"\", subtitle=\"\"),\n", + " yaxis_opts=opts.AxisOpts(\n", + " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\")\n", + " )\n", + ")\n", + "bar.set_series_opts(\n", + " label_opts=opts.LabelOpts(is_show=True)\n", + ")\n", + "\n", + "scatter = Scatter()\n", + "scatter.add_xaxis(shenzhen_x_index)\n", + "scatter.add_yaxis(\"深圳高薪岗位\", shenzhen_job_sort_by_salary['avg_salary'].values.tolist()[:10])\n", + "scatter.set_global_opts(\n", + " title_opts=opts.TitleOpts(title=\"\", pos_top=\"48%\"),\n", + " legend_opts=opts.LegendOpts(pos_top=\"50%\"),\n", + " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n", + " yaxis_opts=opts.AxisOpts(\n", + " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\"),\n", + " min_=40\n", + " )\n", + ")\n", + "\n", + "grid = Grid()\n", + "grid.add(bar, grid_opts=opts.GridOpts(pos_bottom=\"60%\"))\n", + "grid.add(scatter, grid_opts=opts.GridOpts(pos_top=\"60%\"))\n", + "grid.render_notebook()" + ] + }, + { + "cell_type": "code", + "execution_count": 340, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 340, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bar = Bar()\n", + "bar.add_xaxis(hangzhou_job_sort_by_salary['company_name'].values.tolist()[:10])\n", + "bar.add_yaxis(\"杭州招聘薪资排行\", hangzhou_job_sort_by_salary['avg_salary'].values.tolist()[:10])\n", + "bar.set_global_opts(\n", + " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n", + " title_opts=opts.TitleOpts(title=\"\", subtitle=\"\"),\n", + " yaxis_opts=opts.AxisOpts(\n", + " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\")\n", + " )\n", + ")\n", + "bar.set_series_opts(\n", + " label_opts=opts.LabelOpts(is_show=True)\n", + ")\n", + "\n", + "scatter = Scatter()\n", + "scatter.add_xaxis(hangzhou_x_index)\n", + "scatter.add_yaxis(\"杭州高薪岗位\", hangzhou_job_sort_by_salary['avg_salary'].values.tolist()[:10])\n", + "scatter.set_global_opts(\n", + " title_opts=opts.TitleOpts(title=\"\", pos_top=\"48%\"),\n", + " legend_opts=opts.LegendOpts(pos_top=\"50%\"),\n", + " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n", + " yaxis_opts=opts.AxisOpts(\n", + " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\"),\n", + " min_=40\n", + " )\n", + ")\n", + "\n", + "grid = Grid()\n", + "grid.add(bar, grid_opts=opts.GridOpts(pos_bottom=\"60%\"))\n", + "grid.add(scatter, grid_opts=opts.GridOpts(pos_top=\"60%\"))\n", + "grid.render_notebook()" + ] + }, + { + "cell_type": "code", + "execution_count": 233, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 233, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "scatter = Scatter()\n", + "scatter.add_xaxis(x_index)\n", + "scatter.add_yaxis(\"招聘岗位\", beijing_job_sort_by_salary['avg_salary'].values.tolist()[:10])\n", + "scatter.set_global_opts(\n", + " visualmap_opts=opts.VisualMapOpts(type_=\"size\", max_=70, min_=50),\n", + " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-30)),\n", + " yaxis_opts=opts.AxisOpts(\n", + " axislabel_opts=opts.LabelOpts(formatter=\"{value}K\"),\n", + " min_=50\n", + " ),\n", + " # datazoom_opts=opts.DataZoomOpts(),\n", + ")\n", + "scatter.render_notebook()" + ] + }, + { + "cell_type": "code", + "execution_count": 341, + "metadata": {}, + "outputs": [], + "source": [ + "beijing_job_type = beijing_job_sort_by_salary['job_type'][:10].value_counts()\n", + "shanghai_job_type = shanghai_job_sort_by_salary['job_type'][:10].value_counts()\n", + "shenzhen_job_type = shenzhen_job_sort_by_salary['job_type'][:10].value_counts()\n", + "hangzhou_job_type = hangzhou_job_sort_by_salary['job_type'][:10].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 278, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 278, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pie = Pie()\n", + "pie.add(\"\", [list(z) for z in zip(beijing_job_type.index.tolist(), beijing_job_type.values.tolist())],\n", + " radius=[\"30%\", \"75%\"],\n", + " center=[\"40%\", \"50%\"],\n", + " rosetype=\"radius\")\n", + "pie.set_global_opts(\n", + " title_opts=opts.TitleOpts(title=\"北京高薪岗位分布\"),\n", + " legend_opts=opts.LegendOpts(\n", + " type_=\"scroll\", pos_left=\"80%\", orient=\"vertical\"\n", + " ),\n", + " )\n", + "pie.render_notebook()" + ] + }, + { + "cell_type": "code", + "execution_count": 343, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 343, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pie = Pie()\n", + "pie.add(\"\", [list(z) for z in zip(shanghai_job_type.index.tolist(), shanghai_job_type.values.tolist())],\n", + " radius=[\"30%\", \"75%\"],\n", + " center=[\"40%\", \"50%\"],\n", + " rosetype=\"radius\")\n", + "pie.set_global_opts(\n", + " title_opts=opts.TitleOpts(title=\"上海高薪岗位分布\"),\n", + " legend_opts=opts.LegendOpts(\n", + " type_=\"scroll\", pos_left=\"80%\", orient=\"vertical\"\n", + " ),\n", + " )\n", + "pie.render_notebook()" + ] + }, + { + "cell_type": "code", + "execution_count": 344, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 344, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pie = Pie()\n", + "pie.add(\"\", [list(z) for z in zip(shenzhen_job_type.index.tolist(), shenzhen_job_type.values.tolist())],\n", + " radius=[\"30%\", \"75%\"],\n", + " center=[\"40%\", \"50%\"],\n", + " rosetype=\"radius\")\n", + "pie.set_global_opts(\n", + " title_opts=opts.TitleOpts(title=\"深圳高薪岗位分布\"),\n", + " legend_opts=opts.LegendOpts(\n", + " type_=\"scroll\", pos_left=\"80%\", orient=\"vertical\"\n", + " ),\n", + " )\n", + "pie.render_notebook()" + ] + }, + { + "cell_type": "code", + "execution_count": 345, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 345, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pie = Pie()\n", + "pie.add(\"\", [list(z) for z in zip(hangzhou_job_type.index.tolist(), hangzhou_job_type.values.tolist())],\n", + " radius=[\"30%\", \"75%\"],\n", + " center=[\"40%\", \"50%\"],\n", + " rosetype=\"radius\")\n", + "pie.set_global_opts(\n", + " title_opts=opts.TitleOpts(title=\"杭州高薪岗位分布\"),\n", + " legend_opts=opts.LegendOpts(\n", + " type_=\"scroll\", pos_left=\"80%\", orient=\"vertical\"\n", + " ),\n", + " )\n", + "pie.render_notebook()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.1" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/boss_spider/job_analyse.py b/boss_spider/job_analyse.py index 0cd34f4..e8fb3ca 100644 --- a/boss_spider/job_analyse.py +++ b/boss_spider/job_analyse.py @@ -5,3 +5,4 @@ @File: job_analyse.py """ +# check with jupyter notebook \ No newline at end of file diff --git a/college/analyse.py b/college/analyse.py index 9b191c4..041552e 100644 --- a/college/analyse.py +++ b/college/analyse.py @@ -11,7 +11,6 @@ from pyecharts.commons.utils import JsCode from pyecharts.globals import ChartType, SymbolType - df = pd.read_csv("college_data.csv") df_new = df.drop_duplicates(subset=['name']) # 有重复的数据,需要删除 df_site = df_new[df_new['site'] != '——']