forked from ynhacler/RedKindle
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy patham2aintest.py
172 lines (141 loc) · 4.52 KB
/
am2aintest.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
# -*- coding:utf-8 -*-
import os,re,urllib,urlparse,datetime,logging
from datetime import date, timedelta
from config import *
from books.base import BaseFeedBook, BaseUrlBook,WebpageBook
from jinja2 import Environment, PackageLoader
from os import path, listdir, system
from shutil import copy,copytree
from books.ZhihuDaily import ZhihuDaily
from books.DoubanBook import DoubanBook
from books.PaoPao import PaoPao
from books.Economist import Economist
from books.Qiushibaike import Qiushibaike
from books.Lianhezaobao import Lianhezaobao
from books.Lianhe_china import Lianhe_china
from books.Shuwu import Shuwu
from books.Chuansm import Chuansm
def render_and_write(template_name, context, output_name, output_dir):
"""Render `template_name` with `context` and write the result in the file
`output_dir`/`output_name`."""
template = templates_env.get_template(template_name)
f = open(path.join(output_dir, output_name), "w")
f.write(template.render(**context).encode('utf-8'))
f.close()
def mobi(input_file, exec_path):
system("%s %s" % (exec_path, input_file))
log = logging.getLogger()
feeds=[[u'163easynet',"http://www.xinhuanet.com/ent/news_ent.xml"],
[u'XXXzzhXXX',"http://www.sciencenet.cn/xml/news.aspx?news=0"]]
feeds2=[[u'XXXzzhXXX',"http://www.sciencenet.cn/xml/news.aspx?news=0"]]
feeds3=[[u'163easynet',"http://www.xinhuanet.com/ent/news_ent.xml"]]
feeds4=[[u'3lian','http://feed.36kr.com/c/33346/f/566026/index.rss']]
feeds5=[[u'nytimes','http://blog.sina.com.cn/rss/sciam.xml',True]]
zzh = BaseFeedBook(log)
zzh2 = ZhihuDaily(log)
zzh3= DoubanBook(log)
zzh4 = PaoPao(log)
zzh6 = Economist(log)
zzh8 = Qiushibaike(log)
zzh9 = Lianhezaobao(log)
zzh10 = Lianhe_china(log)
zzh11 = Shuwu(log)
zzh12 = Chuansm(log)
zzh.feeds = feeds4
zzh.keep_image = False
zzh2.keep_image = False
zzh3.keep_image = True
zzh12.keep_image = False
#zzh.fulltext_by_readability = False
#zzh.fulltext_by_instapaper = False
#print zzh12.ParseFeedUrls()
zzhs = []
zzhs.append(zzh11)
#总的img计数
imgindex_temp = 0
#所有的信息
data = []
feed_number = 1
entry_number = 0
play_order = 0
temp_sec = ''
#输出目录
output_dir='/home/zzh/Desktop/temp/v3'
ROOT = path.dirname(path.abspath(__file__))
templates_env = Environment(loader=PackageLoader('amaintest', 'templates2'))
if __name__ == '__main__':
img_num = []
i=-1 #对feed进行计数
#自动处理的
for zz in zzhs:
zz._imgindex = imgindex_temp
for sec_or_media, url, title, content,brief in zz.Items():
if sec_or_media.startswith(r'image/'):
filename = 'image/'+title
img_num.append(title)
fout = open(filename, "wb")
fout.write(content)
fout.close()
else:
#新的feed开始
if temp_sec != sec_or_media:
temp_sec = sec_or_media
feed_number += 1
play_order += 1
entry_number = 0
local = {
'number':feed_number,
'play_order':play_order,
'entries':[],
'title':sec_or_media
}
i += 1
data.insert(i,local)
#处理文章
play_order += 1
entry_number += 1
local_entry = {
'number':entry_number,
'play_order':play_order,
'title':title,
'description':brief,
'content':content,
'url':url,
}
data[i]['entries'].append(local_entry)
#raw_input("Input your id plz")
imgindex_temp = zz._imgindex
#======================end for
#手动处理的
'''
filename = 'image/doc/'+str(play_order)+'.html'
fout = open(filename, "wb+")
fout.write(content.encode('utf-8'))
fout.close()
'''
wrap ={
'date': date.today().isoformat(),
'feeds':data,
'img_nums':imgindex_temp,
'img_name':img_num,
}
## TOC (NCX)
render_and_write('toc.xml', wrap, 'toc.ncx', output_dir)
## COVER (HTML)
render_and_write('cover.html',wrap,'cover.html',output_dir)
## TOC (HTML)
render_and_write('toc.html', wrap, 'toc.html', output_dir)
## OPF
render_and_write('opf.xml', wrap, 'daily.opf', output_dir)
#/home/zzh/Desktop/temp/v3
for feed in data:
for entry in feed['entries']:
render_and_write('feed.html',entry,'article_%s_%s.html' % (feed['number'],entry['number']),output_dir)
for name in listdir(path.join(ROOT, 'image')):
copy(path.join(ROOT, 'image', name), path.join(output_dir, name))
copy(path.join(ROOT, 'templates2', 'masthead.jpg'), path.join(output_dir, 'masthead.jpg'))
copy(path.join(ROOT, 'templates2', 'cover.jpg'), path.join(output_dir, 'cover.jpg'))
mobi(path.join(output_dir,'daily.opf'),path.join(ROOT,'kindlegen_1.1'))
#copytree(path.join(ROOT, 'image'), path.join(output_dir,'image'))
print zzh._imgindex
print '-=end=-'