-
Notifications
You must be signed in to change notification settings - Fork 0
/
index.html
1337 lines (523 loc) · 63.4 KB
/
index.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
<!DOCTYPE html>
<html class="theme-next pisces use-motion" lang="zh-Hans">
<head>
<meta charset="UTF-8"/>
<meta http-equiv="X-UA-Compatible" content="IE=edge" />
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1"/>
<meta name="theme-color" content="#222">
<meta http-equiv="Cache-Control" content="no-transform" />
<meta http-equiv="Cache-Control" content="no-siteapp" />
<link href="/lib/fancybox/source/jquery.fancybox.css?v=2.1.5" rel="stylesheet" type="text/css" />
<link href="/lib/font-awesome/css/font-awesome.min.css?v=4.6.2" rel="stylesheet" type="text/css" />
<link href="/css/main.css?v=5.1.4" rel="stylesheet" type="text/css" />
<link rel="apple-touch-icon" sizes="180x180" href="/images/apple-touch-icon-next.png?v=5.1.4">
<link rel="icon" type="image/png" sizes="32x32" href="/images/favicon-32x32-next.png?v=5.1.4">
<link rel="icon" type="image/png" sizes="16x16" href="/images/favicon-16x16-next.png?v=5.1.4">
<link rel="mask-icon" href="/images/logo.svg?v=5.1.4" color="#222">
<meta name="keywords" content="Hexo, NexT" />
<meta property="og:type" content="website">
<meta property="og:title" content="Yuxiang Coding">
<meta property="og:url" content="https://yuxianglu.github.io/index.html">
<meta property="og:site_name" content="Yuxiang Coding">
<meta property="og:locale" content="zh-Hans">
<meta name="twitter:card" content="summary">
<meta name="twitter:title" content="Yuxiang Coding">
<script type="text/javascript" id="hexo.configurations">
var NexT = window.NexT || {};
var CONFIG = {
root: '/',
scheme: 'Pisces',
version: '5.1.4',
sidebar: {"position":"left","display":"post","offset":12,"b2t":false,"scrollpercent":false,"onmobile":false},
fancybox: true,
tabs: true,
motion: {"enable":true,"async":false,"transition":{"post_block":"fadeIn","post_header":"slideDownIn","post_body":"slideDownIn","coll_header":"slideLeftIn","sidebar":"slideUpIn"}},
duoshuo: {
userId: '0',
author: '博主'
},
algolia: {
applicationID: '',
apiKey: '',
indexName: '',
hits: {"per_page":10},
labels: {"input_placeholder":"Search for Posts","hits_empty":"We didn't find any results for the search: ${query}","hits_stats":"${hits} results found in ${time} ms"}
}
};
</script>
<link rel="canonical" href="https://yuxianglu.github.io/"/>
<title>Yuxiang Coding</title>
</head>
<body itemscope itemtype="http://schema.org/WebPage" lang="zh-Hans">
<div class="container sidebar-position-left
page-home">
<div class="headband"></div>
<header id="header" class="header" itemscope itemtype="http://schema.org/WPHeader">
<div class="header-inner"><div class="site-brand-wrapper">
<div class="site-meta ">
<div class="custom-logo-site-title">
<a href="/" class="brand" rel="start">
<span class="logo-line-before"><i></i></span>
<span class="site-title">Yuxiang Coding</span>
<span class="logo-line-after"><i></i></span>
</a>
</div>
<p class="site-subtitle"></p>
</div>
<div class="site-nav-toggle">
<button>
<span class="btn-bar"></span>
<span class="btn-bar"></span>
<span class="btn-bar"></span>
</button>
</div>
</div>
<nav class="site-nav">
<ul id="menu" class="menu">
<li class="menu-item menu-item-home">
<a href="/" rel="section">
<i class="menu-item-icon fa fa-fw fa-home"></i> <br />
首页
</a>
</li>
<li class="menu-item menu-item-categories">
<a href="/categories/" rel="section">
<i class="menu-item-icon fa fa-fw fa-th"></i> <br />
分类
</a>
</li>
<li class="menu-item menu-item-tags">
<a href="/tags/" rel="section">
<i class="menu-item-icon fa fa-fw fa-tags"></i> <br />
标签
</a>
</li>
<li class="menu-item menu-item-archives">
<a href="/archives/" rel="section">
<i class="menu-item-icon fa fa-fw fa-archive"></i> <br />
归档
</a>
</li>
</ul>
</nav>
</div>
</header>
<main id="main" class="main">
<div class="main-inner">
<div class="content-wrap">
<div id="content" class="content">
<section id="posts" class="posts-expand">
<article class="post post-type-normal" itemscope itemtype="http://schema.org/Article">
<div class="post-block">
<link itemprop="mainEntityOfPage" href="https://yuxianglu.github.io/undefined/linux/tar/">
<span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
<meta itemprop="name" content="Yuxiang Lu">
<meta itemprop="description" content="">
<meta itemprop="image" content="/images/avatar.gif">
</span>
<span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
<meta itemprop="name" content="Yuxiang Coding">
</span>
<header class="post-header">
<h1 class="post-title" itemprop="name headline">
<a class="post-title-link" href="/undefined/linux/tar/" itemprop="url">Linux下的tar压缩解压缩命令详解【转载】</a></h1>
<div class="post-meta">
<span class="post-time">
<span class="post-meta-item-icon">
<i class="fa fa-calendar-o"></i>
</span>
<span class="post-meta-item-text">发表于</span>
<time title="创建于" itemprop="dateCreated datePublished" datetime="2018-03-09T00:00:00+08:00">
2018-03-09
</time>
</span>
<span class="post-category" >
<span class="post-meta-divider">|</span>
<span class="post-meta-item-icon">
<i class="fa fa-folder-o"></i>
</span>
<span class="post-meta-item-text">分类于</span>
<span itemprop="about" itemscope itemtype="http://schema.org/Thing">
<a href="/categories/linux/" itemprop="url" rel="index">
<span itemprop="name">linux</span>
</a>
</span>
</span>
</div>
</header>
<div class="post-body" itemprop="articleBody">
<h2 id="tar"><a href="#tar" class="headerlink" title="tar"></a>tar</h2><p>-c: 建立压缩档案<br>-x:解压<br>-t:查看内容<br>-r:向压缩归档文件末尾追加文件<br>-u:更新原压缩包中的文件</p>
<p>这五个是独立的命令,压缩解压都要用到其中一个,可以和别的命令连用但只能用其中一个。下面的参数是根据需要在压缩或解压档案时可选的。<br>-z:有gzip属性的<br>-j:有bz2属性的<br>-Z:有compress属性的<br>-v:显示所有过程<br>-O:将文件解开到标准输出</p>
<p>下面的参数-f是必须的</p>
<p>-f: 使用档案名字,切记,这个参数是最后一个参数,后面只能接档案名。</p>
<p># tar -cf all.tar *.jpg<br>这条命令是将所有.jpg的文件打成一个名为all.tar的包。-c是表示产生新的包,-f指定包的文件名。</p>
<p># tar -rf all.tar *.gif<br>这条命令是将所有.gif的文件增加到all.tar的包里面去。-r是表示增加文件的意思。<br># tar -uf all.tar logo.gif<br>这条命令是更新原来tar包all.tar中logo.gif文件,-u是表示更新文件的意思。<br># tar -tf all.tar<br>这条命令是列出all.tar包中所有文件,-t是列出文件的意思<br># tar -xf all.tar<br>这条命令是解出all.tar包中所有文件,-t是解开的意思</p>
<h2 id="压缩"><a href="#压缩" class="headerlink" title="压缩"></a>压缩</h2><p>tar -cvf jpg.tar *.jpg //将目录里所有jpg文件打包成tar.jpg </p>
<p>tar -czf jpg.tar.gz *.jpg //将目录里所有jpg文件打包成jpg.tar后,并且将其用gzip压缩,生成一个gzip压缩过的包,命名为jpg.tar.gz</p>
<p> tar -cjf jpg.tar.bz2 *.jpg //将目录里所有jpg文件打包成jpg.tar后,并且将其用bzip2压缩,生成一个bzip2压缩过的包,命名为jpg.tar.bz2</p>
<p>tar -cZf jpg.tar.Z *.jpg //将目录里所有jpg文件打包成jpg.tar后,并且将其用compress压缩,生成一个umcompress压缩过的包,命名为jpg.tar.Z</p>
<p>rar a jpg.rar *.jpg //rar格式的压缩,需要先下载rar for linux</p>
<p>zip jpg.zip *.jpg //zip格式的压缩,需要先下载zip for linux</p>
<h2 id="解压"><a href="#解压" class="headerlink" title="解压"></a>解压</h2><p>tar -xvf file.tar //解压 tar包</p>
<p>tar -xzvf file.tar.gz //解压tar.gz</p>
<p>tar -xjvf file.tar.bz2 //解压 tar.bz2</p>
<p>tar -xZvf file.tar.Z //解压tar.Z</p>
<p>unrar e file.rar //解压rar</p>
<p>unzip file.zip //解压zip</p>
<h2 id="总结"><a href="#总结" class="headerlink" title="总结"></a>总结</h2><p>1、*.tar 用 tar -xvf 解压</p>
<p>2、*.gz 用 gzip -d或者gunzip 解压</p>
<p>3、<em>.tar.gz和</em>.tgz 用 tar -xzf 解压</p>
<p>4、*.bz2 用 bzip2 -d或者用bunzip2 解压</p>
<p>5、*.tar.bz2用tar -xjf 解压</p>
<p>6、*.Z 用 uncompress 解压</p>
<p>7、*.tar.Z 用tar -xZf 解压</p>
<p>8、*.rar 用 unrar e解压</p>
<p>9、*.zip 用 unzip 解压</p>
</div>
<footer class="post-footer">
<div class="post-eof"></div>
</footer>
</div>
</article>
<article class="post post-type-normal" itemscope itemtype="http://schema.org/Article">
<div class="post-block">
<link itemprop="mainEntityOfPage" href="https://yuxianglu.github.io/undefined/hello-world/">
<span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
<meta itemprop="name" content="Yuxiang Lu">
<meta itemprop="description" content="">
<meta itemprop="image" content="/images/avatar.gif">
</span>
<span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
<meta itemprop="name" content="Yuxiang Coding">
</span>
<header class="post-header">
<h1 class="post-title" itemprop="name headline">
<a class="post-title-link" href="/undefined/hello-world/" itemprop="url">Yuxiang Coding</a></h1>
<div class="post-meta">
<span class="post-time">
<span class="post-meta-item-icon">
<i class="fa fa-calendar-o"></i>
</span>
<span class="post-meta-item-text">发表于</span>
<time title="创建于" itemprop="dateCreated datePublished" datetime="2018-01-25T10:40:50+08:00">
2018-01-25
</time>
</span>
</div>
</header>
<div class="post-body" itemprop="articleBody">
<p>我会在这里分享我在<a href="https://yuxianglu.github.io/categories/mac/">使用mac</a>、学习<a href="https://yuxianglu.github.io/categories/nlp/">自然语言处理</a>的过程中的心得。</p>
</div>
<footer class="post-footer">
<div class="post-eof"></div>
</footer>
</div>
</article>
<article class="post post-type-normal" itemscope itemtype="http://schema.org/Article">
<div class="post-block">
<link itemprop="mainEntityOfPage" href="https://yuxianglu.github.io/undefined/nlp/word2vec/">
<span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
<meta itemprop="name" content="Yuxiang Lu">
<meta itemprop="description" content="">
<meta itemprop="image" content="/images/avatar.gif">
</span>
<span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
<meta itemprop="name" content="Yuxiang Coding">
</span>
<header class="post-header">
<h1 class="post-title" itemprop="name headline">
<a class="post-title-link" href="/undefined/nlp/word2vec/" itemprop="url">使用word2vec训练词向量</a></h1>
<div class="post-meta">
<span class="post-time">
<span class="post-meta-item-icon">
<i class="fa fa-calendar-o"></i>
</span>
<span class="post-meta-item-text">发表于</span>
<time title="创建于" itemprop="dateCreated datePublished" datetime="2018-01-20T15:00:03+08:00">
2018-01-20
</time>
</span>
<span class="post-category" >
<span class="post-meta-divider">|</span>
<span class="post-meta-item-icon">
<i class="fa fa-folder-o"></i>
</span>
<span class="post-meta-item-text">分类于</span>
<span itemprop="about" itemscope itemtype="http://schema.org/Thing">
<a href="/categories/nlp/" itemprop="url" rel="index">
<span itemprop="name">nlp</span>
</a>
</span>
</span>
</div>
</header>
<div class="post-body" itemprop="articleBody">
<p>将词转换为向量是目前自然语言处理的基本流程之一。词向量有好几种,本文主要讲述如何利用word2vec将中文词训练为分布式词向量。</p>
<h2 id="语料准备"><a href="#语料准备" class="headerlink" title="语料准备"></a>语料准备</h2><p>本文使用维基百科文章作为训练语料。使用wget命令下载。</p>
<figure class="highlight shell"><table><tr><td class="gutter"><pre><span class="line">1</span><br></pre></td><td class="code"><pre><span class="line">wget https://dumps.wikimedia.org/zhwiki/latest/zhwiki-latest-pages-articles.xml.bz2</span><br></pre></td></tr></table></figure>
<p>下载完成后,应该得到一个名为 <strong>zhwiki-latest-pages-articles.xml.bz2</strong> 的文件。</p>
<p>这个文件是无法直接使用的,需要将其转换为文本,使用python中的<strong>gensim</strong>模块的<strong>WikiCorpus</strong>函数处理,完整代码如下:</p>
<p><strong>* wiki_to_txt.py</strong></p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># -*- coding: utf-8 -*-</span></span><br><span class="line"><span class="keyword">import</span> logging</span><br><span class="line"><span class="keyword">import</span> sys</span><br><span class="line">reload(sys)</span><br><span class="line">sys.setdefaultencoding(<span class="string">'utf-8'</span>)</span><br><span class="line"><span class="keyword">import</span> io</span><br><span class="line"><span class="keyword">from</span> gensim.corpora <span class="keyword">import</span> WikiCorpus</span><br><span class="line"></span><br><span class="line"><span class="function"><span class="keyword">def</span> <span class="title">main</span><span class="params">()</span>:</span></span><br><span class="line"> <span class="keyword">if</span> len(sys.argv) != <span class="number">2</span>:</span><br><span class="line"> print(<span class="string">"Usage: python3 "</span> + sys.argv[<span class="number">0</span>] + <span class="string">" wiki_data_path"</span>)</span><br><span class="line"> exit()</span><br><span class="line"></span><br><span class="line"> logging.basicConfig(format=<span class="string">'%(asctime)s : %(levelname)s : %(message)s'</span>, level=logging.INFO)</span><br><span class="line"> wiki_corpus = WikiCorpus(sys.argv[<span class="number">1</span>], dictionary={})</span><br><span class="line"> texts_num = <span class="number">0</span></span><br><span class="line"></span><br><span class="line"> <span class="keyword">with</span> io.open(<span class="string">"wiki_texts.txt"</span>, <span class="string">'w'</span>, encoding=<span class="string">'utf-8'</span>) <span class="keyword">as</span> output:</span><br><span class="line"> <span class="keyword">for</span> text <span class="keyword">in</span> wiki_corpus.get_texts():</span><br><span class="line"> output.write(<span class="string">b' '</span>.join(text).decode(<span class="string">'utf-8'</span>) + <span class="string">'\n'</span>)</span><br><span class="line"> texts_num += <span class="number">1</span></span><br><span class="line"> <span class="keyword">if</span> texts_num % <span class="number">10000</span> == <span class="number">0</span>:</span><br><span class="line"> logging.info(<span class="string">"已处理 %d 篇文章"</span> % texts_num)</span><br><span class="line"></span><br><span class="line"><span class="keyword">if</span> __name__ == <span class="string">"__main__"</span>:</span><br><span class="line"> main()</span><br></pre></td></tr></table></figure>
<p>执行:<code>pyhton wiki_to_txt.py zhwiki-latest-pages-articles.xml.bz2</code>进行处理。</p>
<p>处理结束后得到一个wiki_texts.txt文件。</p>
<p>查看一下这个文件:</p>
<p><img src="http://wx1.sinaimg.cn/mw690/71cd66b9ly1fnvbsdq2e7j211s0mq4qp.jpg" alt="wiki_txt"></p>
<p>发现文章是繁体字(应该是数据集下错了?),没关系,多加一步将繁体转换为中文:</p>
<p>首先,下载两个python文件:<a href="https://github.com/csdz/nstools/blob/master/zhtools/zh_wiki.py" target="_blank" rel="noopener">zh_wiki.py</a>和<a href="https://github.com/csdz/nstools/blob/master/zhtools/langconv.py" target="_blank" rel="noopener">langconv.py</a>。</p>
<p><strong>* t2s.py</strong></p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># -*- coding: utf-8 -*-</span></span><br><span class="line"><span class="keyword">from</span> langconv <span class="keyword">import</span> *</span><br><span class="line"></span><br><span class="line"><span class="function"><span class="keyword">def</span> <span class="title">t2s</span><span class="params">(line)</span>:</span></span><br><span class="line"> line = Converter(<span class="string">'zh-hans'</span>).convert(line.decode(<span class="string">'utf-8'</span>))</span><br><span class="line"> line = line.encode(<span class="string">'utf-8'</span>)</span><br><span class="line"> <span class="keyword">return</span> line</span><br><span class="line"></span><br><span class="line"><span class="keyword">if</span> __name__ == <span class="string">"__main__"</span>:</span><br><span class="line"> <span class="keyword">with</span> open(<span class="string">'wiki_texts.txt'</span>, <span class="string">'r'</span>) <span class="keyword">as</span> r:</span><br><span class="line"> <span class="keyword">with</span> open(<span class="string">'wiki_zhs.txt'</span>, <span class="string">'w'</span>) <span class="keyword">as</span> w:</span><br><span class="line"> lines = r.readlines()</span><br><span class="line"> <span class="keyword">print</span> <span class="string">'start'</span></span><br><span class="line"> <span class="keyword">for</span> line <span class="keyword">in</span> lines:</span><br><span class="line"> w.write(t2s(line.rstrip() + <span class="string">'\n'</span>))</span><br></pre></td></tr></table></figure>
<p><code>python t2s.py</code>运行这个文件后得到简体中文的wiki数据集。处理结果存放在wiki_zhs.txt中。</p>
<h2 id="分词和去除停止词"><a href="#分词和去除停止词" class="headerlink" title="分词和去除停止词"></a>分词和去除停止词</h2><p>使用 <strong>jieba</strong> 分词,在jieba_dict文件夹中的dict.txt.big文件可以自己定义一些想要分的词,在stopwords.txt中放入中文的停止词(可选)。完整代码如下:</p>
<p><strong>* segment.py</strong></p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br><span class="line">33</span><br><span class="line">34</span><br><span class="line">35</span><br><span class="line">36</span><br><span class="line">37</span><br><span class="line">38</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># -*- coding: utf-8 -*-</span></span><br><span class="line"></span><br><span class="line"><span class="keyword">import</span> jieba</span><br><span class="line"><span class="keyword">import</span> logging</span><br><span class="line"><span class="keyword">import</span> io</span><br><span class="line"><span class="keyword">import</span> sys</span><br><span class="line">reload(sys)</span><br><span class="line">sys.setdefaultencoding(<span class="string">'utf-8'</span>)</span><br><span class="line"></span><br><span class="line"><span class="function"><span class="keyword">def</span> <span class="title">main</span><span class="params">()</span>:</span></span><br><span class="line"> logging.basicConfig(format=<span class="string">'%(asctime)s : %(levelname)s : %(message)s'</span>, level=logging.INFO)</span><br><span class="line"></span><br><span class="line"> <span class="comment"># jieba custom setting.</span></span><br><span class="line"> jieba.set_dictionary(<span class="string">'jieba_dict/dict.txt.big'</span>)</span><br><span class="line"></span><br><span class="line"> <span class="comment"># load stopwords set</span></span><br><span class="line"> stopword_set = set()</span><br><span class="line"> <span class="keyword">with</span> io.open(<span class="string">'jieba_dict/stopwords.txt'</span>, <span class="string">'r'</span>, encoding=<span class="string">'utf-8'</span>) <span class="keyword">as</span> stopwords:</span><br><span class="line"> <span class="keyword">for</span> stopword <span class="keyword">in</span> stopwords:</span><br><span class="line"> stopword_set.add(stopword.strip(<span class="string">'\n'</span>))</span><br><span class="line"></span><br><span class="line"> output = io.open(<span class="string">'wiki_seg.txt'</span>, <span class="string">'w'</span>, encoding=<span class="string">'utf-8'</span>)</span><br><span class="line"> <span class="keyword">with</span> io.open(<span class="string">'wiki_zhs.txt'</span>, <span class="string">'r'</span>, encoding=<span class="string">'utf-8'</span>) <span class="keyword">as</span> content:</span><br><span class="line"> <span class="keyword">for</span> texts_num, line <span class="keyword">in</span> enumerate(content):</span><br><span class="line"> line = line.strip(<span class="string">'\n'</span>)</span><br><span class="line"> words = jieba.cut(line, cut_all=<span class="keyword">False</span>)</span><br><span class="line"> <span class="keyword">for</span> word <span class="keyword">in</span> words:</span><br><span class="line"> <span class="keyword">if</span> word <span class="keyword">not</span> <span class="keyword">in</span> stopword_set:</span><br><span class="line"> output.write(word + <span class="string">' '</span>)</span><br><span class="line"> output.write(unicode(<span class="string">'\n'</span>))</span><br><span class="line"></span><br><span class="line"> <span class="keyword">if</span> (texts_num + <span class="number">1</span>) % <span class="number">10000</span> == <span class="number">0</span>:</span><br><span class="line"> logging.info(<span class="string">"已完成前 %d 行的断词"</span> % (texts_num + <span class="number">1</span>))</span><br><span class="line"> output.close()</span><br><span class="line"></span><br><span class="line"></span><br><span class="line"><span class="keyword">if</span> __name__ == <span class="string">'__main__'</span>:</span><br><span class="line"> main()</span><br></pre></td></tr></table></figure>
<p>处理结果存放在wiki_seg.txt中,如下图:</p>
<p><img src="http://wx1.sinaimg.cn/mw690/71cd66b9ly1fnvbs8xu5wj21240no7u8.jpg" alt="wiki_seg"></p>
<h2 id="训练词向量"><a href="#训练词向量" class="headerlink" title="训练词向量"></a>训练词向量</h2><p>主要使用 <strong>gensim</strong> 中的 <strong>word2vec</strong> 训练词向量,完整代码如下:</p>
<p><strong>* train.py</strong></p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># -*- coding: utf-8 -*-</span></span><br><span class="line"></span><br><span class="line"><span class="keyword">import</span> logging</span><br><span class="line"><span class="keyword">from</span> gensim.models <span class="keyword">import</span> word2vec</span><br><span class="line"></span><br><span class="line"><span class="function"><span class="keyword">def</span> <span class="title">main</span><span class="params">()</span>:</span></span><br><span class="line"> logging.basicConfig(format=<span class="string">'%(asctime)s : %(levelname)s : %(message)s'</span>, level=logging.INFO)</span><br><span class="line"> sentences = word2vec.LineSentence(<span class="string">"wiki_seg.txt"</span>)</span><br><span class="line"> model = word2vec.Word2Vec(sentences, size=<span class="number">250</span>)</span><br><span class="line"></span><br><span class="line"> <span class="comment"># 保存模型,供日后使用</span></span><br><span class="line"> model.save(<span class="string">u"word2vec.model"</span>)</span><br><span class="line"></span><br><span class="line"> <span class="comment"># 模型读取方式</span></span><br><span class="line"> <span class="comment"># model = word2vec.Word2Vec.load("your_model_name")</span></span><br><span class="line"></span><br><span class="line"><span class="keyword">if</span> __name__ == <span class="string">"__main__"</span>:</span><br><span class="line"> main()</span><br></pre></td></tr></table></figure>
<p>训练结果存储在word2vec.model中。</p>
<h2 id="使用"><a href="#使用" class="headerlink" title="使用"></a>使用</h2><p>这里提供3种word2vec的测试场景:(1)一个词的相似词;(2)两个词的相似词;(3)类比推理。</p>
<p>完整代码如下:</p>
<p><strong>* demo.py</strong></p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br><span class="line">33</span><br><span class="line">34</span><br><span class="line">35</span><br><span class="line">36</span><br><span class="line">37</span><br><span class="line">38</span><br><span class="line">39</span><br><span class="line">40</span><br><span class="line">41</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># -*- coding: utf-8 -*-</span></span><br><span class="line"></span><br><span class="line"><span class="keyword">from</span> gensim.models <span class="keyword">import</span> word2vec</span><br><span class="line"><span class="keyword">from</span> gensim <span class="keyword">import</span> models</span><br><span class="line"><span class="keyword">import</span> logging</span><br><span class="line"></span><br><span class="line"><span class="function"><span class="keyword">def</span> <span class="title">main</span><span class="params">()</span>:</span></span><br><span class="line"> logging.basicConfig(format=<span class="string">'%(asctime)s : %(levelname)s : %(message)s'</span>, level=logging.INFO)</span><br><span class="line"> model = models.Word2Vec.load(<span class="string">'word2vec.model'</span>)</span><br><span class="line"></span><br><span class="line"> print(<span class="string">"提供 3 种测试模式"</span>)</span><br><span class="line"> print(<span class="string">"输入一个词,则去寻找前一百个该词的相似詞"</span>)</span><br><span class="line"> print(<span class="string">"输入两个词,则去计算两个词的余弦相似度"</span>)</span><br><span class="line"> print(<span class="string">"输入三个词,进行类比推理"</span>)</span><br><span class="line"></span><br><span class="line"> <span class="keyword">while</span> <span class="keyword">True</span>:</span><br><span class="line"> query = raw_input(<span class="string">"请输入: "</span>)</span><br><span class="line"> query = query.decode(<span class="string">'utf-8'</span>)</span><br><span class="line"> q_list = query.split()</span><br><span class="line"> <span class="keyword">try</span>:</span><br><span class="line"> <span class="keyword">if</span> len(q_list) == <span class="number">1</span>:</span><br><span class="line"> print(<span class="string">"相似词前 100 排序"</span>)</span><br><span class="line"> res = model.most_similar(q_list[<span class="number">0</span>], topn=<span class="number">100</span>)</span><br><span class="line"> <span class="keyword">for</span> item <span class="keyword">in</span> res:</span><br><span class="line"> print(item[<span class="number">0</span>] + <span class="string">","</span> + str(item[<span class="number">1</span>]))</span><br><span class="line"></span><br><span class="line"> <span class="keyword">elif</span> len(q_list) == <span class="number">2</span>:</span><br><span class="line"> print(<span class="string">"计算 Cosine 相似度"</span>)</span><br><span class="line"> res = model.similarity(q_list[<span class="number">0</span>], q_list[<span class="number">1</span>])</span><br><span class="line"> print(res)</span><br><span class="line"> <span class="keyword">else</span>:</span><br><span class="line"> print(<span class="string">"%s之于%s,如%s之于"</span> % (q_list[<span class="number">0</span>], q_list[<span class="number">2</span>], q_list[<span class="number">1</span>]))</span><br><span class="line"> res = model.most_similar([q_list[<span class="number">0</span>], q_list[<span class="number">1</span>]], [q_list[<span class="number">2</span>]], topn=<span class="number">100</span>)</span><br><span class="line"> <span class="keyword">for</span> item <span class="keyword">in</span> res:</span><br><span class="line"> print(item[<span class="number">0</span>] + <span class="string">","</span> + str(item[<span class="number">1</span>]))</span><br><span class="line"> print(<span class="string">"----------------------------"</span>)</span><br><span class="line"> <span class="keyword">except</span> Exception <span class="keyword">as</span> e:</span><br><span class="line"> print(repr(e))</span><br><span class="line"></span><br><span class="line"><span class="keyword">if</span> __name__ == <span class="string">"__main__"</span>:</span><br><span class="line"> main()</span><br></pre></td></tr></table></figure>
</div>
<footer class="post-footer">
<div class="post-eof"></div>
</footer>
</div>
</article>
<article class="post post-type-normal" itemscope itemtype="http://schema.org/Article">
<div class="post-block">
<link itemprop="mainEntityOfPage" href="https://yuxianglu.github.io/undefined/nlp/dialogue_system_review/">
<span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
<meta itemprop="name" content="Yuxiang Lu">
<meta itemprop="description" content="">
<meta itemprop="image" content="/images/avatar.gif">
</span>
<span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
<meta itemprop="name" content="Yuxiang Coding">
</span>
<header class="post-header">
<h1 class="post-title" itemprop="name headline">
<a class="post-title-link" href="/undefined/nlp/dialogue_system_review/" itemprop="url">对话系统综述</a></h1>
<div class="post-meta">
<span class="post-time">
<span class="post-meta-item-icon">
<i class="fa fa-calendar-o"></i>
</span>
<span class="post-meta-item-text">发表于</span>
<time title="创建于" itemprop="dateCreated datePublished" datetime="2018-01-19T19:40:00+08:00">
2018-01-19
</time>
</span>
<span class="post-category" >
<span class="post-meta-divider">|</span>
<span class="post-meta-item-icon">
<i class="fa fa-folder-o"></i>
</span>
<span class="post-meta-item-text">分类于</span>
<span itemprop="about" itemscope itemtype="http://schema.org/Thing">
<a href="/categories/nlp/" itemprop="url" rel="index">
<span itemprop="name">nlp</span>
</a>
</span>
</span>
</div>
</header>
<div class="post-body" itemprop="articleBody">
<p>本文主要是对论文:<a href="https://arxiv.org/abs/1711.01731" target="_blank" rel="noopener">A Survey on Dialogue Systems: Recent Advances and New Frontiers</a> 的理解和翻译。下文中提及的参考文献编号,均可从原论文中找到对应的文献。</p>
<h2 id="综述"><a href="#综述" class="headerlink" title="综述"></a>综述</h2><p>目前,从应用的角度看,对话系统大致可以分为两类:</p>
<ul>
<li>task-oriented systems(任务型)</li>
<li>non-task-oriented systems(闲聊型)</li>
</ul>
<p>任务型系统主要帮助人们完成确定的任务(例如,订票)。之前广泛应用的方法流程如Figure 1。系统先理解用户的话(nature language understanding,NLU),将其表示成系统可以理解的状态,然后根据策略采取一些action,最后根据这些action转换成自然语言(nature language generating,NLG)回复。这里的NLU使用统计模型来处理。可靠的对话系统仍使用人工特征和规则来表示状态和策略、检测意图等,使得实际使用中的对话系统成本很高,而且很难被用于其他领域。</p>
<p><img src="http://wx3.sinaimg.cn/mw690/71cd66b9ly1fnveptvt51j20ix0a0gnr.jpg" alt="figure1"></p>
<p>最近,很多深度学习算法通过学习高维的分布式特征表示来解决这些问题。聊天型对话系统主要使用了这两种模型:</p>
<ul>
<li>生成模型(generative methods),例如:Seq2Seq model;</li>
<li>检索模型(retrieval-based methods),学习从已有库中选择当前对话对应的回答。</li>
</ul>
<h2 id="任务型对话系统"><a href="#任务型对话系统" class="headerlink" title="任务型对话系统"></a>任务型对话系统</h2><p>任务型对话系统主要可以分为两类:</p>
<ul>
<li>管道型(pipeline)</li>
<li>端到端型(end-to-end)</li>
</ul>
<h3 id="管道型(Pipeline-Methods)"><a href="#管道型(Pipeline-Methods)" class="headerlink" title="管道型(Pipeline Methods)"></a>管道型(Pipeline Methods)</h3><p>管道型对话系统主要包含4个部分:</p>
<ul>
<li>自然语言理解(NLU)</li>
<li>对话状态追踪器(dialogue state tracking)</li>
<li>策略网络(policy learning)</li>
<li>自然语言生成(NLG)</li>
</ul>
<h4 id="自然语言理解"><a href="#自然语言理解" class="headerlink" title="自然语言理解"></a>自然语言理解</h4><p>NLU模块对自然语言进行意图识别和信息抽取,添加到对应的语义槽(semantic slots)。识别意图就是将用户的话语分类到预定义好的类别中。<code>[15,84,112]</code>使用了<strong>深度学习</strong>进行意图识别。<code>[25,29,74]</code>使用了<strong>CNN</strong>进行分类。类似的方法同样适用于类别和领域的分类。槽填充(slot filling)是将用户的话中的词打上语义标签(例如,日期、地点等):<code>[15,17]</code>使用了<strong>深度信念网络(DBNs)</strong>,<code>[51,66,115,113]</code>使用了<strong>RNN</strong>。NLU的结果(intent和slot)会进一步被对话管理器(dialogue management component)进行处理(对话管理器主要包括对话状态追踪器和策略网络)。</p>
<h4 id="对话状态追踪器"><a href="#对话状态追踪器" class="headerlink" title="对话状态追踪器"></a>对话状态追踪器</h4><p>在每一轮对话中估计用户的目标。常用的state结构是slot filling或semantic frame。传统方法是使用人工定义的规则来选择最有可能的结果<code>[23]</code>。最近,<code>[26]</code>提出了单领域的,基于深度学习的belief tracking;<code>[58]</code>提出了基于RNN,多领域的tracking模型;<code>[59]</code>提出了neural belief tracker(NBT)来检测slot-value pairs。</p>
<h4 id="策略网络"><a href="#策略网络" class="headerlink" title="策略网络"></a>策略网络</h4><p>基于状态的表示,policy learning用来生成系统的下一步action。可以使用监督学习和强化学习。<code>[14]</code>使用了<strong>深度强化学习</strong>,得到了很好的表现。</p>
<h4 id="自然语言生成"><a href="#自然语言生成" class="headerlink" title="自然语言生成"></a>自然语言生成</h4><p>根据action生成自然语言。<code>[83,94,95,123]</code>使用了基于<strong>LSTM</strong>的神经网络模型。<code>[20]</code>提出了基于<strong>Seq2Seq</strong>方法的NLG,<code>[19]</code>扩展了<code>[20]</code>,使得模型能适应用户说话的方式,作出合适的回应。</p>
<h3 id="端到端模型(End-to-End-Methods)"><a href="#端到端模型(End-to-End-Methods)" class="headerlink" title="端到端模型(End-to-End Methods)"></a>端到端模型(End-to-End Methods)</h3><p>基于Pipeline方法的系统,有很多在具体领域的人工设计,难以应用到其他领域,并且还有两点局限:一是用户的反馈难以传给模型,二是各个模块间相互依赖(一个模块的输出是另一个模块的输入),适应新环境时修改起来需要很多人力。<code>[7,97]</code>提出了基于神经网络的、端到端的、可训练的任务型对话系统,将对话系统的学习看作是一个从历史对话记录的匹配过程的学习,使用<strong>Encoder-Decoder模型</strong>来训练整个网络,缺点是监督学习需要大量数据,并且不够健壮。<code>[120]</code>首先提出了<strong>端到端的强化学习方法</strong>来训DM,优化系统的鲁棒性(系统问用户一系列Yes/No问题来确定答案)。<code>[45]</code>将端到端系统训练为task completion neural dialogue,最终目的是完成一项任务,例如,订电影票。</p>
<p>任务型对话系统通常需要查询外部的知识库。先前的系统一般会发出一次符号查询来获得结果<code>[97,103,45]</code>。<code>[21]</code>使用基于<strong>注意力模型(attention)</strong>、键值对的检索机制来增强现有循环网络架构。<code>[18]</code>提出了在知识库上的“soft”posteriordistribution来推断用户的兴趣点,取代了符号查询。<code>[102]</code>结合了<strong>RNN</strong>与领域知识编码的软件和系统action模版。</p>
<h2 id="闲聊型对话系统"><a href="#闲聊型对话系统" class="headerlink" title="闲聊型对话系统"></a>闲聊型对话系统</h2><h3 id="基于神经网络的生成模型"><a href="#基于神经网络的生成模型" class="headerlink" title="基于神经网络的生成模型"></a>基于神经网络的生成模型</h3><p><code>[64]</code>提出了(基于phrase-based Statistical Machine Translation<code>[118]</code>的)<strong>概率生成模型</strong>,将应答生成看作翻译,但效果不好。随着深度学习在翻译中的应用(Neural Machine Translation)获得成功,激励了在神经网络中生成对话系统的研究。</p>
<h4 id="Sequence-to-Sequence-Models"><a href="#Sequence-to-Sequence-Models" class="headerlink" title="Sequence-to-Sequence Models"></a>Sequence-to-Sequence Models</h4><p>主要使用Encoder-Decoder结构,输入X,输出Y。得到中间状态的非线性函数可以是<strong>LSTM</strong><code>[27]</code>或<strong>GRU</strong>(gated recurrent unit)<code>[12]</code>。<code>[5]</code>使用了<strong>attention机制</strong>提高了性能。<code>[71,12,87,50]</code>使用了<strong>RNN</strong>的Encoder-Decoder或类似的结构。</p>
<p><img src="http://wx1.sinaimg.cn/mw690/71cd66b9ly1fnvepxprzpj20ji0j5wih.jpg" alt="figure2"></p>
<h4 id="对话上下文(context)"><a href="#对话上下文(context)" class="headerlink" title="对话上下文(context)"></a>对话上下文(context)</h4><p>考虑对话的上下文的能力是建立对话系统的关键。<code>[77]</code>通过连续表示(<strong>continuous representation</strong>)来表示历史对话记录(包括当前信息),解决了基于上下文的应答生成问题。<code>[52,12]</code>使用了RNN作为decoder,<code>[68]</code>使用了<strong>层次(hierarchical)</strong>方法,先捕获用户话语的意思,然后将它们合成一句论述。<code>[109]</code>分别使用词和句层面的<strong>注意力(attention)机制</strong>扩展了hierarchical结构。<code>[82]</code>对比现有方法发现:(1)hierarchical RNN普遍比non-hierarchical性能好,(2)基于上下文信息,神经网络倾向于生成更长更有意义而且多样性的应答。</p>
<p>=============================</p>
<p>未完待续。。。</p>
<h4 id="应答多样性"><a href="#应答多样性" class="headerlink" title="应答多样性"></a>应答多样性</h4><p><code>[77,87,68,38,6,9,114,65,42,86,72,42,38,77,72,57,11,34,33,75,69,68,67,73]</code></p>
<h4 id="主题及个性化"><a href="#主题及个性化" class="headerlink" title="主题及个性化"></a>主题及个性化</h4><p><code>[108,107,13,122,3,86,61,39,119,55]</code></p>
<h4 id="外部知识库"><a href="#外部知识库" class="headerlink" title="外部知识库"></a>外部知识库</h4><p><code>[22,88,116]</code></p>
<h4 id="交互性对话学习"><a href="#交互性对话学习" class="headerlink" title="交互性对话学习"></a>交互性对话学习</h4><p><code>[43,1,104,40,4,41,37]</code></p>
<h4 id="评价"><a href="#评价" class="headerlink" title="评价"></a>评价</h4><p><code>[46,89,56,31,2,46,53,80,47,81,85,32,10,8,44,24,16]</code></p>
<h3 id="检索模型(Retrieval-base-Methods)"><a href="#检索模型(Retrieval-base-Methods)" class="headerlink" title="检索模型(Retrieval-base Methods)"></a>检索模型(Retrieval-base Methods)</h3><h4 id="单轮对话"><a href="#单轮对话" class="headerlink" title="单轮对话"></a>单轮对话</h4><h4 id="多轮对话"><a href="#多轮对话" class="headerlink" title="多轮对话"></a>多轮对话</h4><h4 id="融合方法"><a href="#融合方法" class="headerlink" title="融合方法"></a>融合方法</h4><h2 id="讨论与总结"><a href="#讨论与总结" class="headerlink" title="讨论与总结"></a>讨论与总结</h2>
</div>
<footer class="post-footer">
<div class="post-eof"></div>
</footer>
</div>
</article>
<article class="post post-type-normal" itemscope itemtype="http://schema.org/Article">
<div class="post-block">
<link itemprop="mainEntityOfPage" href="https://yuxianglu.github.io/undefined/mac/zsh/">
<span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
<meta itemprop="name" content="Yuxiang Lu">
<meta itemprop="description" content="">
<meta itemprop="image" content="/images/avatar.gif">
</span>
<span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
<meta itemprop="name" content="Yuxiang Coding">
</span>
<header class="post-header">
<h1 class="post-title" itemprop="name headline">
<a class="post-title-link" href="/undefined/mac/zsh/" itemprop="url">在mac上鼓捣终端</a></h1>
<div class="post-meta">
<span class="post-time">
<span class="post-meta-item-icon">
<i class="fa fa-calendar-o"></i>
</span>
<span class="post-meta-item-text">发表于</span>
<time title="创建于" itemprop="dateCreated datePublished" datetime="2017-12-15T00:00:00+08:00">
2017-12-15
</time>
</span>
<span class="post-category" >
<span class="post-meta-divider">|</span>
<span class="post-meta-item-icon">
<i class="fa fa-folder-o"></i>
</span>
<span class="post-meta-item-text">分类于</span>
<span itemprop="about" itemscope itemtype="http://schema.org/Thing">
<a href="/categories/mac/" itemprop="url" rel="index">
<span itemprop="name">mac</span>
</a>
</span>
</span>
</div>
</header>
<div class="post-body" itemprop="articleBody">
<p>好看+好用</p>
<p><img src="http://wx1.sinaimg.cn/mw690/71cd66b9ly1fnvbrglzjsj21380p8tvj.jpg" alt="iterm2"></p>
<h2 id="iTerm2"><a href="#iTerm2" class="headerlink" title="iTerm2"></a>iTerm2</h2><p>mac必备,官网地址:<a href="http://iterm2.org/" target="_blank" rel="noopener">http://iterm2.org/</a>,或点击<a href="https://iterm2.com/downloads/stable/latest" target="_blank" rel="noopener">直接下载</a>。</p>
<h2 id="安装zsh"><a href="#安装zsh" class="headerlink" title="安装zsh"></a>安装zsh</h2><p>一般mac会自带zsh,在命令行输入:<code>zsh —version</code>产看版本。</p>
<p>或者直接安装:<code>brew install zsh</code>(<a href="https://brew.sh/" target="_blank" rel="noopener">Homebrew</a>是mac的安装器)。</p>
<p>使zsh成为默认的shell:命令行输入<code>chsh -s zsh</code>,重启你的iTerm2。</p>
<h2 id="安装Oh-My-Zsh"><a href="#安装Oh-My-Zsh" class="headerlink" title="安装Oh My Zsh"></a>安装Oh My Zsh</h2><p>通过wget安装,首先也要安装wget:<code>brew install wget</code></p>
<p>在命令行输入:</p>
<figure class="highlight shell"><table><tr><td class="gutter"><pre><span class="line">1</span><br></pre></td><td class="code"><pre><span class="line">sh -c "$(wget https://raw.githubusercontent.com/robbyrussell/oh-my-zsh/master/tools/install.sh -O -)"</span><br></pre></td></tr></table></figure>
<h2 id="主题"><a href="#主题" class="headerlink" title="主题"></a>主题</h2><p>oh my zsh自带各种主题,放在<code>~/.oh-my-zsh/themes/</code>文件夹下。</p>
<p>通过<code>vi ~/.zshrc</code>修改其中<code>ZSH_THEME</code>为你喜欢的主题(我比较喜欢默认主题)。</p>
<p>为你的vim安装<em>Solarized</em>或者<em>Tomorrow</em>主题:</p>
<figure class="highlight shell"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br></pre></td><td class="code"><pre><span class="line"><span class="meta">#</span> git下载Solarized源码</span><br><span class="line">git clone git://github.com/altercation/solarized.git</span><br><span class="line"><span class="meta">#</span> 进入刚刚下载好的目录</span><br><span class="line">cd solarized/vim-colors-solarized/colors</span><br><span class="line"><span class="meta">#</span> 创建vim的配置文件夹</span><br><span class="line">sudo mkdir -p ~/.vim/colors</span><br><span class="line"><span class="meta">#</span> 把刚刚下载好的主题复制过去</span><br><span class="line">sudo cp solarized.vim ~/.vim/colors/</span><br><span class="line"><span class="meta">#</span> 创建.vimrc配置文件并修改</span><br><span class="line">sudo vim ~/.vimrc</span><br><span class="line"><span class="meta">#</span> 在.vimrc文件中加入以下几行</span><br><span class="line">syntax enable</span><br><span class="line">set background=dark</span><br><span class="line">colorscheme solarized</span><br></pre></td></tr></table></figure>
<p>保存.vimrc。</p>
<p>效果如下(不好意思,我用的是Tomorrow主题):</p>