-
Notifications
You must be signed in to change notification settings - Fork 0
/
kemono.js
169 lines (153 loc) · 5.99 KB
/
kemono.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
const fs = require('fs')
const path = require('path')
const cheerio = require('cheerio')
const ThreadPool = require('./threadpool-mkz')
const { download, fetch, delay, LOG, fileExists, purifyName }= require('./utils')
const colors = require('colors')
const DL_Path = 'download'
const userAgent = 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.85 YaBrowser/21.11.1.877 (beta) Yowser/2.5 Safari/537.36'
const headers = {
'Cookie': '__ddgid=QMtberxZJMJjLR3J; __ddg2=XlTpOqpjHhIfmig7; __ddg1=8wY9YWl6GPUOImzSIf7a;',
'User-Agent': userAgent
}
let errorLog = ''
async function main() {
const url = process.argv[2]
if (!url) return console.log('node kemono.js <url> [outputPath] [threads] [cookies] [sequence(1/-1)] [exclude extension(.rar,.zip)]')
let dst = process.argv[3] || DL_Path
const threads = process.argv[4] || 4
headers.Cookie = process.argv[5] || headers.Cookie
let seq = parseInt(process.argv[6]) !== -1 ? true : false
const excludeExts = process.argv[7] || ''
const willBeExcluded = fileName => {
if (!excludeExts) return false
let ext = path.parse(fileName).ext
if (excludeExts.includes(ext)) return true
};
let poolIndex = 1
let pool = initPool(threads, poolIndex)
let index = 0
let count = 0
let init = false
let isEnd = false
do {
if (index < 0) {
index = 0;
isEnd = true;
}
let pageInfo = ''
try {
pageInfo = await getPageInfo(url, index)
} catch {
continue
}
const { posts, artistName } = pageInfo
if (!init) {
dst = `${dst}/${artistName}`
count = count || pageInfo.count
init = true
if (!seq) {
index = count - 25
continue
}
}
if (!(await fileExists(dst))) await fs.promises.mkdir(dst, { recursive: true })
if (posts.length === 0) break
let postIndex = seq ? 0 : posts.length - 1
do {
let pageURL = `${url.split('?')[0]}/post/${posts[postIndex]}`
console.log(`${LOG.fetching} ${LOG.post} ${pageURL}`)
let post = ''
try {
post = await fetch(pageURL, headers)
while (!post.includes(url.split('kemono.part')[1])) {
console.log(`${LOG.retry} ${LOG.post} retry in 5 seconds: ${pageURL}`)
await delay(5000)
post = await (await fetch(pageURL, headers))
}
} catch {
continue
}
let progressCount = seq ? (postIndex + 1) : (count - index - postIndex)
console.log(`${LOG.fetched} ${LOG.post}` + ' ' + ` ${progressCount} / ${count} `.bgBlue.white + ' ' + pageURL)
const $ = cheerio.load(post)
const titleNode = $('.post__title')
let titleIndex = $('.post__published time').text().trim().split(' ')[0] || ''
const title = purifyName(`${titleIndex}-` + titleNode.text().trim().slice(0, titleNode.text().trim().lastIndexOf('(') - 1))
if (!(await fileExists(`${dst}/${title}`))) await fs.promises.mkdir(`${dst}/${title}`, { recursive: true })
const content = $('.post__content').text()
if (content.length > 0) fs.writeFile(`${dst}/${title}/content.txt`, content.trim(), () => {})
Array.from($('.post__attachments li').map((i, attach) => ({
filename: attach.childNodes[1].firstChild.data.trim().slice(9),
url: attach.childNodes[1].attribs.href
}))).forEach(attach => {
if (willBeExcluded(attach.filename)) return
if (pool.isFinished()) pool = initPool(threads, ++poolIndex)
pool.add(async () => {
let url = `https://kemono.party${attach.url}`
let redirectLink = await fetch(url, headers)
try {
await download(redirectLink, `${dst}/${title}/${attach.filename}`, false, { headers })
} catch (e) {
errorLog += e
}
})
})
const files = $('.post__files .post__thumbnail')
Array.from(files.map((i, file) => ({
filename: file.childNodes[1].attribs.href.split('f=')[1],
url: file.childNodes[1].attribs.href
}))).forEach((file, index) => {
if (willBeExcluded(file.filename)) return
if (pool.isFinished()) pool = initPool(threads, ++poolIndex)
pool.add(async () => {
let url = `https://kemono.party${file.url}`
let redirectLink = await fetch(url, headers)
try {
await download(redirectLink, `${dst}/${title}/${index}-${file.filename}`, false, { headers })
} catch (e) {
errorLog += e
}
})
})
postIndex += seq ? 1 : -1
if (seq && postIndex >= posts.length) break
if (!seq && postIndex < 0) break
} while (true)
index += 25 * (seq ? 1 : -1)
pool.run()
} while (index < count && !isEnd)
async function getPageInfo(url, index) {
url = `${url}?o=${index}`
let content = await (await fetch(url, headers))
console.log(`${LOG.fetching} ${LOG.list} ${url}`)
while (!content.includes('fancy-image')) {
console.log(`${LOG.failed} ${LOG.list} retry in 9 seconds ${url}`)
await delay(5000)
console.log(`${LOG.retry} ${url}`)
content = await (await fetch(url, headers))
}
console.log(`${LOG.fetched} ${LOG.list} ${url}`)
const $ = cheerio.load(content)
const posts = $('.card-list__items article.post-card')
return {
posts: posts.map((i, p) => p.attribs["data-id"]),
artistName: $('meta[name="artist_name"]')[0].attribs.content,
count: +$('#paginator-top small')[0].children[0].data.trim().split('of')[1].trim()
}
}
}
function initPool(threads, index) {
const pool = new ThreadPool(threads)
pool.step = () => console.log(` Pool ${index} `.bgBlue.white + ' Progress > '.bgBlue.white + ' ' + ` ${ pool.counter } / ${ pool.sum } ${ pool.status() } `.bgMagenta.white)
pool.finish(() => {
console.log(` Pool ${index} Finished `.bgGreen.white)
if (errorLog.length > 0) {
fs.writeFile('error.log', errorLog, () => {
console.warn(' Error Occurred. Error Log Generated '.bgRed.white)
})
}
})
return pool
}
main()