-
Notifications
You must be signed in to change notification settings - Fork 3
/
sanitize_titles.coffee
executable file
·196 lines (157 loc) · 6.78 KB
/
sanitize_titles.coffee
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
#!/usr/bin/env coffee
###
# Title Sanitizer
#
# @author David Lasley, [email protected]
# @website https://dlasley.net/blog/projects/remote-makemkv/
# @package remote-makemkv
# @license GPLv3
###
fs = require 'fs',
xml2js = require 'xml2js'
## Sanitize video titles.. could probably be abstracted for more uses
class SanitizeTitles
# Order of hierarchy
DIR_HIERARCHY: ['season', 'disc', 'episode']
@RESERVED_CHAR_MAP: { # Filesystem reserved char replacement map
'/':' -', '\\':' -', ':':' -', '|':' -',
'?':' ', '%':' ', '*':' ', '"':' ', '<':' ', '>':' '
}
## Init
constructor: () ->
@NO_UPPERCASE = ['the', 'a', 'an', 'of', 'by' , 'up' , 'is' , 'in' , 'at' , 'on' , 'to']
@DEFAULT_TITLE = 'Title'
@VID_EXTS = ['mkv', 'mpg', 'avi', 'mp4', 'm4v']
@SPACE_CHARS = /[ _\-\.\u2013]+/g
@FORMAT_SEASON = /[, ]+(e|d|s|v|t)(pisode|isc|isk|eason|eries|olume|ol|rack|itle)? ?([0-9]{1,2})/ig
# Generate regexes from XML file
parser = new xml2js.Parser()
fs.readFile(__dirname + '/rename_regexes.xml', (err, data) =>
parser.parseString(data, (err, result) =>
raw_regexes = result.renaming.videos.shift()
@VIDEO_RULES = {'regex':[], 'replace':[]}
for re in raw_regexes.regex
@VIDEO_RULES.regex.push(new RegExp(re, 'gi'))
for re in raw_regexes.replace
@VIDEO_RULES.replace.push({
'original_r':new RegExp(re.original_r.shift(), 'g')
'change_to':re.change_to.shift()
})
)
)
## Fully sanitize an input string
# @param Str string Input
# @param list fallbacks fallback titles to use for S/D/T gathering
# @return list [sanitized,volume_info]
do_sanitize: (title, fallbacks=[]) =>
if title
fallbacks.unshift(title)
for title in fallbacks
if title
for change_to, change_from of @RESERVED_CHAR_MAP
title = title.replace(change_from, change_to)
vi = @volume_info(title)
console.log(vi)
# Assign to volume_info, or fill missing keys
if not volume_info
volume_info = vi
else
for key, val of vi
if not volume_info[key]
volume_info[key] = val
if volume_info.sanitized
# regex-->_strip_spaces->title_case->format_season->return
volume_info['sanitized'] = @_do_title_case(@_strip_spaces(@do_regexes(volume_info.sanitized)))
@format_season(volume_info).trim()
else
false
## Loop regexes from XML, replace
# @param Str title Input
# @return Sanitized string
do_regexes: (title) =>
for regex in @VIDEO_RULES.regex
title = title.replace(regex, ' ')
for replace in @VIDEO_RULES.replace
title = title.replace(replace.original_r, replace.change_to)
title
## Extract episode/seasons from string.
## Also removes the extracted strings from input variable
#
# @param Str string input string
# @return Dict {season,episode,disk,txt}
volume_info: (title) =>
lpad = (value, padding=2, zeroes='0') ->
zeroes = "0"
zeroes += "0" for i in [1..padding]
(zeroes + value).slice(padding * -1)
match_map = {
'e':@DIR_HIERARCHY[2], 't':@DIR_HIERARCHY[2], 'v':@DIR_HIERARCHY[0],
's':@DIR_HIERARCHY[0], 'd':@DIR_HIERARCHY[1]
}
matched = {}
sanitized = []
trim_loc = 0
title = title.toLowerCase()
while match = @FORMAT_SEASON.exec(title)
if match[1] and match[3] #< If Letter and Number in right spot
matched[match_map[match[1]]] = lpad(match[3])
sanitized.push(title[trim_loc...match.index])
trim_loc = match.index + match[0].length
sanitized.push(title[trim_loc..])
matched.sanitized = sanitized.join('')
matched
## Format season information to Sanitized S#D#E#
# @param Dict season_information as returned by volume_info
# @param Bool include_disc_num include disc in out
format_season: (season_info, inc_disc_num=true) =>
season_out = []
for type_ in @DIR_HIERARCHY
if season_info[type_]
if type_ != 'disc' or inc_disc_num
season_out.push(type_[0].toUpperCase(), season_info[type_])
if season_out
season_info.sanitized + ' ' + season_out.join('')
else
season_info.sanitized
## Turn all SPACE_CHARS into spaces, multiples into singles
#
# @param Str string_in input string
# @return Str Sanitized String
_strip_spaces: (str_in, callback=false) ->
str_ = str_in.replace(@SPACE_CHARS, ' ').trim()
if callback
callback(str_)
else
str_
## Make string title case & move leading `the` to end with a comma.
# Also .upper() Roman Numerals
#
# @param Str string_in String in
# @return Str Title cased string
_do_title_case: (str_in, callback=false) ->
ROMAN_NUMERAL_REGEX = /^m{0,4}(cm|cd|d?c{0,3})(xc|xl|l?x{0,3})(ix|iv|v?i{0,3})$/
the_ = false
out = []
split_str = str_in.toLowerCase().split(' ')
for word in split_str
if word
if ROMAN_NUMERAL_REGEX.test(word) #< Cap if Roman Numeral
out.push(word.toUpperCase())
else if word not in @NO_UPPERCASE #< Cap first letter of good words
out.push(word[0].toUpperCase() + word[1..])
else #< No capital
if not out.length
if word == 'the' #< Don't add `the` if it is first word
the_ = true
else #< Else Cap it
out.push(word[0].toUpperCase() + word[1..])
else
out.push(word)
joined = out.join(' ')
if the_ #< Add the at the end
joined += ', the'
if callback
callback(joined)
else
joined
module.exports = SanitizeTitles