From e820de905312647be68bae1df2c5ecaa2df887f5 Mon Sep 17 00:00:00 2001
From: Brawlence <42910943+Brawlence@users.noreply.github.com>
Date: Sun, 2 Aug 2020 04:32:52 +0300
Subject: [PATCH] Long filenames handling hotfix! And more:
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Features:
Introduced cutoff author handle length;
If no author handle fetched and ID present, ignore @{OR};
DB now works even without logging in;

Fixes:
DB, DF, VA now provide IDs correctly;
Decoration is turned off by default;
Updated repo description;
Updated test targets — included 4 corner cases description
---
 Extension/Background.js | 20 +++++++--------
 Extension/Content.js    | 16 +++++++++---
 Extension/Parsers/DB.js | 11 ++++++---
 Extension/Parsers/DF.js |  6 ++++-
 Extension/Parsers/VA.js |  6 ++++-
 Extension/manifest.json |  2 +-
 README.md               | 54 ++++++++++++++++++++++++++---------------
 package.json            |  2 +-
 test.js                 | 23 +++++++++---------
 9 files changed, 89 insertions(+), 51 deletions(-)

diff --git a/Extension/Background.js b/Extension/Background.js
index 579cfe9..2d30a7d 100644
--- a/Extension/Background.js
+++ b/Extension/Background.js
@@ -28,7 +28,7 @@ const validComboSets = [				// valid combinations of tags origin, image hoster,
 ];
 
 var invokeSaveAs = true,
-	useDecoration = true;
+	useDecoration = false;
 var firefoxEnviroment = false,
 	useIcons = false;
 var fileNameTemplate = "{handle}@{OR} {ID} {name} {caption} {tags}";
@@ -50,14 +50,14 @@ function processURL( /* object */ image, tabId) {
 		filename = url.substring(url.lastIndexOf('/') + 1);
 
 	// substring search with indexOf is the fastest, see https://jsperf.com/substring-test
-	if (filename.indexOf('.') > -1) {
+	if (filename.indexOf('.') > -1) {									// ! GENERAL CASE - dot is present
 		if (filename.indexOf('?') > -1) {
 			filename = filename.substring(0, filename.indexOf('?')); 	//prune the access token from the name if it exists
 		}
 		ext = filename.substring(filename.lastIndexOf('.') + 1); 		// extract extension
 	};
 	
-	if (image.origin === "TW") {			 							// TWITTER
+	if (image.origin === "TW") {			 							// ! TWITTER - special case: extracting EXT
 		if (filename.indexOf('?') > -1) {
 			ext = url.substr( url.indexOf('format=')+7, 3 );			// as far as I know, Twitter uses only png, jpg or svg - all ext are 3-lettered
 			url = url.replace(/(name=[\w\d]+)/g,'name=orig'); 			// force Twitter to serve us with original image
@@ -67,7 +67,7 @@ function processURL( /* object */ image, tabId) {
 		};
 	}; 
 	
-	if (image.origin === "PX") {										// PIXIV — get the page number (since pixiv_xxxx can hold many images)
+	if (image.origin === "PX") {										// ! PIXIV — special case: additional tag 'page_' (since pixiv_xxxx can hold many images)
 		let PXpage = filename.substring(filename.indexOf('_p') + 2, filename.indexOf('.'));
 		let PXthumb = "";
 		
@@ -85,16 +85,16 @@ function processURL( /* object */ image, tabId) {
 }
 
 function generateFilename(image) {
-	if (image.ext.length > 5) image.ext = "maybe.jpeg";					// make sure that extention did not go out of bounds
-	if (image.tags === "") image.tags = "tagme"; 						// make sure the name is not left blank
+	if (image.ext.length > 5) image.ext = "maybe.jpeg";						// make sure that extention did not go out of bounds
+	if (image.tags === "") image.tags = "tagme"; 							// make sure the name is not left blank
 
-	image.tags = image.tags.replace(/[,\\/:*?"<>|\t\n\v\f\r]/g, '')		// make sure the name in general doesn't contain any illegal characters
-						   .replace(/[ ]{2, }/g, ' ') 					// collapse multiple spaces
+	image.tags = image.tags.replace(/[,\\/:*?"<>|\t\n\v\f\r]/g, '')			// make sure the name in general doesn't contain any illegal characters
+						   .replace(/\s{2,}/g, ' ') 						// collapse multiple spaces
 	 					   .trim();
 	
 	if (image.tags.length + image.ext.length + 1 >= 255) {
-		image.tags = image.tags.substr(0, FILENAME_LENGTH_CUTOFF)		// substr - specified amount,
-							   .substring(0, name.lastIndexOf(' '));	// substring - between the specified indices
+		image.tags = image.tags.substr(0, FILENAME_LENGTH_CUTOFF);			// substr - specified amount,
+		image.tags = image.tags.substring(0, image.tags.lastIndexOf(' '));	// substring - between the specified indices
 	}
 	
 	image.filename = image.tags + "." + image.ext;
diff --git a/Extension/Content.js b/Extension/Content.js
index ec53cde..d8a2c7c 100644
--- a/Extension/Content.js
+++ b/Extension/Content.js
@@ -2,6 +2,8 @@
 
 "use strict";
 
+const AUTHOR_HANDLE_LENGTH_CUTOFF = 100;
+
 const sirHighlightStyle = String.raw`
 	{
 		border-width: 2px;
@@ -141,15 +143,23 @@ function safeGetByClass(classSelector) {
 
 // relies on defined get...() functions in XX_tagsParser.js
 function getNameBy(template) {
+	let authorHandle = getAuthorHandle(),
+		pictureID = getPictureID();
+
+	if ( (!authorHandle) && pictureID) template = template.replace(/@\{OR\}/g, ''); // if there is no authorHandle and we have an ID, ignore '@XX' in the template
+	if (authorHandle.length > AUTHOR_HANDLE_LENGTH_CUTOFF) {						// if authorHandle is too big (multiple artists?), trim it
+		authorHandle = authorHandle.substr(0, AUTHOR_HANDLE_LENGTH_CUTOFF);
+		authorHandle = authorHandle.substring(0, authorHandle.lastIndexOf('+') + 1) + "…";
+	}
 
-	template = template.replace(/\{handle\}/g, getAuthorHandle());
+	template = template.replace(/\{handle\}/g, authorHandle);
 	template = template.replace(/\{OR\}/g, tagsOrigin);
-	template = template.replace(/\{ID\}/g, getPictureID());
+	template = template.replace(/\{ID\}/g, pictureID);
 	template = template.replace(/\{name\}/g, getAuthorName());
 	template = template.replace(/\{caption\}/g, getPictureName());
 	template = template.replace(/\{tags\}/g, getTags());
 
-	template = template.replace(/[ ]{1,4}/g, ' ');
+	template = template.replace(/\s{2,}/g, ' ').trim();
 
 	return template;
 };
diff --git a/Extension/Parsers/DB.js b/Extension/Parsers/DB.js
index 918a838..7f542e2 100644
--- a/Extension/Parsers/DB.js
+++ b/Extension/Parsers/DB.js
@@ -19,10 +19,15 @@ function getPictureName() {
 function getTags() {
 	var	tempString = safeQuery('textarea[id="post_tag_string"]').innerHTML;
 	tempString = tempString.replace(/\n/g,'');
-	return tempString.replace(/\s?(\w+?)_\((art|color)ist\)/g, '').replace(/[,\\/:?<>\t\n\v\f\r]/g, '_');
+	let failover_noLogin = safeQuery('section[id="tag-list"]').innerText.replace(/(Copyrights|Characters|Artists|Tags|Meta)\n/g, '').replace(/\? ([\w\:\_\- ]+) [\d\.]+k?$/gmi,'$1').replace(/ /g, '_').replace(/[,\\/:?<>\t\n\v\f\r]/g, ' ');
+	return tempString.replace(/\s?(\w+?)_\((art|color)ist\)/g, '').replace(/[,\\/:?<>\t\n\v\f\r]/g, '_') || failover_noLogin;
 };
 
 function getPictureID() {
-	var lefter = pick("post-information").innerText.trim();
-	return 	"danbooru_" + lefter.substring(lefter.indexOf('ID: ') + 4, lefter.indexOf('\nDate: ')); //add the danboroo_ ID to the tags array
+	let lefter = pick("post-information").innerText.trim();
+	let id_string = lefter.match(/Id: [\d]+$/gim)[0];
+	if (id_string) {
+		return "danbooru_" + id_string.substring(4); //add the danboroo_ ID to the tags array 	
+	}
+	return "";
 }
\ No newline at end of file
diff --git a/Extension/Parsers/DF.js b/Extension/Parsers/DF.js
index d0e480a..0fee7b1 100644
--- a/Extension/Parsers/DF.js
+++ b/Extension/Parsers/DF.js
@@ -29,5 +29,9 @@ function getTags() {
 
 function getPictureID() {
 	var lefter = safeQuery('div [id="tag_list"]').innerText.trim();
-	return 	"drawfriends_" + lefter.substring(lefter.indexOf('Id: ') + 4, lefter.indexOf('\nPosted: ')); //add the drawfriends_ ID to the tags array
+	let id_string = lefter.match(/Id: [\d]+$/gim)[0];
+	if (id_string) {
+		return "drawfriends_" + id_string.substring(4); //add the drawfriends_ ID to the tags array 	
+	}
+	return "";
 }
\ No newline at end of file
diff --git a/Extension/Parsers/VA.js b/Extension/Parsers/VA.js
index 882c7ab..fb05f2e 100644
--- a/Extension/Parsers/VA.js
+++ b/Extension/Parsers/VA.js
@@ -29,5 +29,9 @@ function getTags() {
 
 function getPictureID() {
 	var lefter = safeQuery('div [id="tag_list"]').innerText.trim();
-	return 	"vidyart_" + lefter.substring(lefter.indexOf('Id: ') + 4, lefter.indexOf('\nPosted: ')); //add the vidyart_ID to the tags array
+	let id_string = lefter.match(/Id: [\d]+$/gim)[0];
+	if (id_string) {
+		return "vidyart_" + id_string.substring(4); //add the vidyart_ID to the tags array
+	}
+	return "";
 }
\ No newline at end of file
diff --git a/Extension/manifest.json b/Extension/manifest.json
index ceb107d..164c1d8 100644
--- a/Extension/manifest.json
+++ b/Extension/manifest.json
@@ -7,7 +7,7 @@
 		"48": "SIR_48x48.png",
 		"16": "SIR_16x16.png"
 	},
-	"version": "1.4.2",
+	"version": "1.4.3",
 	"manifest_version": 2,
 	"permissions": ["downloads", "contextMenus"],
 	"content_scripts": [
diff --git a/README.md b/README.md
index 2076b75..c934ee6 100644
--- a/README.md
+++ b/README.md
@@ -5,38 +5,48 @@
 [![CII Best Practices](https://bestpractices.coreinfrastructure.org/projects/3565/badge)](https://bestpractices.coreinfrastructure.org/projects/3565)
 [![pipeline status](https://gitlab.com/Brawlence/SIR/badges/master/pipeline.svg)](https://gitlab.com/Brawlence/SIR/commits/master)
 [![coverage report](https://gitlab.com/Brawlence/SIR/badges/master/coverage.svg)](https://gitlab.com/Brawlence/SIR/commits/master)
+[![GitHub Releases](https://img.shields.io/github/v/release/Brawlence/SIR.svg)](https://github.com/Brawlence/SIR/releases)
+
+## Features ##
+* Customizable naming template
+* Option to bypass 'Save As…' dialog
+* Fetched Tags String preview & copy _(with hotkeys!)_
+* Pixiv thumbnail warning on save request
+* Twitter max quality promoter & navigation prompt on 'View Image…'
+* Automatic handling of long names & multiple artists collaboration
 
 ## Description ##
-**SIR** fetches data from popular image galleries, suggesting informative file names through the usual 'save file' dialog. 
+**SIR** fetches data from popular image galleries, suggesting informative file names through the usual 'Save As…' dialog. 
 
 The naming template is customizable; by default it's set to:
 
-`{handle}@{OR} {name} {caption} {tags}.ext`,  
+`{handle}@{OR} {ID} {name} {caption} {tags}`,  
 
 _{handle}_ represents author's nickname (usually it's a part of the gallery link),
-_{OR}_ is site abbrevation as follows below,
+_{OR}_ is site abbreviation as follows below,
+_{ID}_ is a platform-unique ID of the picture (if present),
 _{name}_ is author's human-readable name (often it's not the same as handle!),
 _{caption}_ is the image title as specified by the creator,
 _{tags}_ is a string of tags, separated by spaces (in-tag spaces are replaced by underscores).
 
-Supported site | OR ('origin')
---------------- | --------------- 
-Artstation | **AS**
-Deviantart | **DA**
-Drawfriends | **DF**
-Hentai-Foundry | **HF**
-Pixiv | **PX**
-Tumblr | **TU**
-Twitter | **TW**
-MedicalWhiskey | **MW**
-VidyArt | **VA**
-Danbooru. | **DB**
+Supported site | OR ('origin') | ID type
+--------------- | --------------- |  --------------- 
+Artstation | **AS** | case-sensitive alphanumeric
+Deviantart | **DA** | numeric
+Drawfriends | **DF** | numeric
+Hentai-Foundry | **HF** | numeric
+Pixiv | **PX** | numeric
+Tumblr | **TU** | none
+Twitter | **TW** | none
+MedicalWhiskey | **MW** | numeric
+VidyArt | **VA** | numeric
+Danbooru. | **DB** | numeric
 
 The resulting filename is compatible with https://github.com/0xb8/WiseTagger/issues/1 and can be further tweaked by specifying a *custom template* through the extension context menu.
 
 *Please note:*
 - *On some sites many or all identifiers could be missing (and thus cannot be fetched).*
-- *Unique IDs are not present on Twitter and Tumblr.*
+- *No Unique IDs are known for Twitter and Tumblr. If you do happen to know how to decipher ids for these platforms, please contact me.*
 
 ## Technical details ##
 Every time a new page from the listed domains is loaded, **SIR** adds to it a content script, which responds for pings from the extension.
@@ -49,6 +59,7 @@ By user request (`"SIR Image Renamer"` → `"Download with tags"`), content scri
 One can see what info is discovered by **SIR** (`"SIR Image Renamer"` → `"Highlight fetched tags?"`):
 ![Example of tag highlighting](./Img/tag_highlighting.png)
 
+When opening single images from `twimg.com` domain (*Twitter* hosting server), **SIR** will prompt you for navigation to their full-sized original counterparts.
 Additionally, if you're on *Pixiv* and are trying to save a thumbnail, **SIR** will halt you (but won't restrict your ability to proceed):
 
 ![Example alert](./Img/thumbnail_warning.png)
@@ -68,12 +79,15 @@ To install and run the latest (non stable) version of this extension, follow the
 
 ## Recommendations ##
 - **SIR** is a self-sufficient extension, yet it works wonders in conjunction with ![WiseTagger](https://github.com/0xb8/WiseTagger).
-- Although a handy tool, it's **not** a batch downloader. You still have to manually save each file.
+- Although a handy tool, it's **not** _(yet?)_ a batch downloader. You still have to manually save each file.
 - Some authors include no tags whatsoever, others fill way too much useless general-sounding clutter, so if you're aiming for maximum resolution, please check and tweak the name before saving.
-- Overabundant tags can sometimes exceed the filename length limit, thus they are currently trimmed to nearest space symbol below 230 symbols.  
+- Overabundant tags can sometimes exceed the filename length limit, thus they will be trimmed to nearest space below 230 symbols.  
 
-## Planned features and TODO ##
-- Implement an 'Options' page (page action) and store persistent user options
+## Planned features (TODO) ##
+- 'Options' page (as page action) to tweak and store persistent user options
+- Localisation _(…s?)_
+- Integration with Hydrus Network for stealing tags _(maybe?)_
+- Batch downloads _(maybe?)_
 
 ## Known bugs ##
 - *Twitter* - when scrolling through the infinite view, unrelated tags are fetched (since **SIR** fetches tags from the whole visible area). Please save images from individual post page for now.
diff --git a/package.json b/package.json
index 30b32cb..fd39d69 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
     "name": "sir_image_renamer",
-    "version": "1.4.2",
+    "version": "1.4.3",
     "description": "SIR is an Image Renamer.\nProvides meaningful image names when saving on Pixiv, Deviantart, Artstation, etc.",
     "scripts": {
         "test": "web-ext lint --source-dir=./Extension/",
diff --git a/test.js b/test.js
index 8511739..67bc8d4 100644
--- a/test.js
+++ b/test.js
@@ -1,15 +1,15 @@
 // TODO: Plan & implement proper testing suite
 var Links_to_test = [
-    "https://www.artstation.com/artwork/zARRXD",                                                //ID tracking implemented
-    "https://deviantart.com/view/787225844",                                                    //ID tracking implemented Original was "https://www.deviantart.com/chrissiezullo/art/Nejire-Hadou-787225844"
-    "https://danbooru.donmai.us/posts/3278046",                                                 //ID tracking implemented
-    "https://drawfriends.booru.org/index.php?page=post&s=view&id=99115",                        //ID tracking implemented
-    "https://www.hentai-foundry.com/pictures/774805",                                           //ID tracking implemented Original was "https://www.hentai-foundry.com/pictures/user/BBC-Chan/774805/Elyzabeth-1"
-    "http://medicalwhiskey.com/?p=12513",                                                       //ID tracking implemented
-    "https://www.pixiv.net/artworks/79196939",                                                  //ID tracking implemented
-    "https://blurryken.tumblr.com/post/185528821532/do-you-want-to-share-a-bubble-tea-with-me", //NO IDS KNOWN
-    "https://twitter.com/ToyNewsInterna1/status/1222547463002681344",                           //NO IDS KNOWN
-    "https://vidyart.booru.org/index.php?page=post&s=view&id=377421"                            //ID tracking implemented, but bugged
+    "https://www.artstation.com/artwork/zARRXD",                                                // ID is alphanumeric
+    "https://deviantart.com/view/787225844",                                                    // Original was "https://www.deviantart.com/chrissiezullo/art/Nejire-Hadou-787225844"
+    "https://danbooru.donmai.us/posts/3887268",                                                 // Special case: tags string > 230 characters limit
+    "https://drawfriends.booru.org/index.php?page=post&s=view&id=99115",                        //
+    "https://www.hentai-foundry.com/pictures/774805",                                           // Original was "https://www.hentai-foundry.com/pictures/user/BBC-Chan/774805/Elyzabeth-1"
+    "http://medicalwhiskey.com/?p=12513",                                                       //
+    "https://www.pixiv.net/artworks/79196939",                                                  // Special case: thumbnail detection
+    "https://blurryken.tumblr.com/post/185528821532/do-you-want-to-share-a-bubble-tea-with-me", // NO IDS KNOWN - ID tracking not implemented
+    "https://twitter.com/RGVaerialphotos/status/1280334509863579648",                           // NO IDS KNOWN - ID tracking not implemented Special case: image must be ORIG
+    "https://vidyart.booru.org/index.php?page=post&s=view&id=375444"                            // Special cases: >6 artists; very long tags string
 ];
 
 for (let link of Links_to_test) { window.open(link,"_blank"); };
@@ -18,4 +18,5 @@ for (let link of Links_to_test) { window.open(link,"_blank"); };
 //1. CSS injection 
 //2. Get Tags String returning the proper filled string
 //3. Download With Tags actually downloading the file
-//4. File length trimming?
\ No newline at end of file
+//4. File length trimming to below 200 symbols
+//5. Multiple authors are trimmed to below 100 symols