Anakunda / GazelleTagManager

// ==UserScript==
// ==UserLibrary==
// @name         GazelleTagManager
// @namespace    https://openuserjs.org/users/Anakunda
// @version      1.01.04
// @author       Anakunda
// @license      GPL-3.0-or-later
// @copyright    2021, Anakunda (https://openuserjs.org/users/Anakunda)
// @exclude      *
// ==/UserScript==
// ==/UserLibrary==

String.prototype.toASCII = function() {
	return this.normalize("NFKD").replace(/[\x00-\x1F\u0080-\uFFFF]/g, '');
};

const tmWhitelist = [
	'electronic',
	'rock',
	'pop',
	'hip.hop',
	'techno',
	'experimental',
	'jazz',
	'house',
	'ambient',
	'classical',
	'folk',
	'alternative',
	'indie',
	'metal',
	'deep.house',
	'soul',
	'2010s',
	'indie.rock',
	'funk',
	'world.music',
	'tech.house',
	'punk',
	'japanese',
	'electro',
	'country',
	'alternative.rock',
	'synthpop',
	'1990s',
	'drum.and.bass',
	'pop.rock',
	'2020s',
	'2000s',
	'dance',
	'downtempo',
	'trance',
	'rhythm.and.blues',
	'instrumental',
	'psychedelic',
	'disco',
	'black.metal',
	'blues',
	'industrial',
	'minimal',
	'progressive.rock',
	'freely.available',
	'drone',
	'latin',
	'noise',
	'progressive.house',
	'psychedelic.rock',
	'1980s',
	'psytrance',
	'non.fiction',
	'reggae',
	'idm',
	'hard.rock',
	'death.metal',
	'vocal',
	'1970s',
	'jpop',
	'indie.pop',
	'female.vocalist',
	'hardcore.punk',
	'singer.songwriter',
	'dub',
	'score',
	'jam.band',
	'lo.fi',
	'heavy.metal',
	'folk.rock',
	'acoustic',
	'new.wave',
	'dubstep',
	'avant.garde',
	'post.rock',
	'hardcore.dance',
	'video.game',
	'acid',
	'breakbeat',
	'piano',
	'stage.and.screen',
	'post.punk',
	'fiction',
	'shoegaze',
	'doom.metal',
	'dream.pop',
	'modern.classical',
	'garage.rock',
	'dark.ambient',
	'breaks',
	'contemporary.jazz',
	'free.improvisation',
	'blues.rock',
	'german',
	'minimal.house',
	'fusion',
	'new.age',
	'free.jazz',
	'orchestral',
	'progressive.trance',
	'jungle',
	'1960s',
	'bass',
	'french',
	'trip.hop',
	'baroque.era',
	'nu.disco',
	'korean',
	'synthwave',
	'ballad',
	'leftfield',
	'contemporary',
	'chamber.music',
	'thrash.metal',
	'swing',
	'dub.techno',
	'progressive.metal',
	'gospel',
	'kpop',
	'field.recording',
	'trap',
	'ebm',
	'rock.and.roll',
	'metalcore',
	'vaporwave',
	'apps.windows',
	'power.pop',
	'doujin',
	'classic.rock',
	'anison',
	'brazilian',
	'fantasy',
	'glitch',
	'australian',
	'dark.psytrance',
	'christmas',
	'easy.listening',
	'art.rock',
	'christian',
	'uk.garage',
	'epub',
	'darkwave',
	'bluegrass',
	'post.hardcore',
	'african',
	'electro.house',
	'dancehall',
	'gothic',
	'sludge.metal',
	'opera',
	'new.zealander',
	'noise.rock',
	'spanish',
	'greek',
	'tribal',
	'swedish',
	'emo',
	'jazz.rock',
	'italian',
	'jazz.funk',
	'americana',
	'breakcore',
	'comedy',
	'ska',
	'italo.disco',
	'hard.techno',
	'pop.punk',
	'indie.dance',
	'happy.hardcore',
	'grime',
	'hard.trance',
	'krautrock',
	'chillout',
	'gabber',
	'finnish',
	'science.fiction',
	'power.metal',
	'romantic',
	'soft.rock',
	'space.rock',
	'garage.house',
	'chanson',
	'beats',
	'childrens.music',
	'goa.trance',
	'chiptune',
	'guitar',
	'turkish',
	'american',
	'math.rock',
	'british',
	'future.jazz',
	'grindcore',
	'male.vocalist',
	'europop',
	'canadian',
	'psychill',
	'history',
	'minimal.techno',
	'epic',
	'choral',
	'roots.reggae',
	'big.band',
	'post.bop',
	'trailer.music',
	'new.york',
	'celtic',
	'country.rock',
	'lounge',
	'magazine',
	'romantic.era',
	'1950s',
	'spoken.word',
	'deep.tech',
	'gangsta',
	'musique.concrete',
	'synth',
	'acid.house',
	'chillwave',
	'apps.mac',
	'russian',
	'dutch',
	'post.metal',
	'melodic.death.metal',
	'psybient',
	'electroacoustic',
	'bossa.nova',
	'uplifting.trance',
	'mpb',
	'smooth.jazz',
	'soul.jazz',
	'nazi',
	'norwegian',
	'holiday',
	'chinese',
	'vocal.jazz',
	'polish',
	'neofolk',
	'afrobeat',
	'touhou',
	'atmospheric.black.metal',
	'ebook',
	'business',
	'20th.century',
	'political',
	'traditional',
	'latin.jazz',
	'argentinian',
	'melodic.house',
	'electropop',
];
const tmExcludedCountries = [
	/^(?:United\s+States|USA?|U\.S\.(?:A\.)?)$/i,
	/^(?:United\s+Kingdom|(?:Great\s+)?Britain|England|GB|G\.B\.|UK|U\.K\.)$/i,
	/^(?:Europe|European\s+Union|EU|E\.U\.)$/i,
	/^(?:Unknown)$/i,
];
const tmPresubstitutions = [
	[/\b(?:Singer\/Songwriter)\b/i, 'singer.songwriter'],
	[/\b(?:Pop\/Rock)\b/i, 'pop.rock'],
	[/\b(?:Folk\/Rock)\b/i, 'folk.rock'],
	[/^(?:Psy\/Goa\s+Trance)$/i, 'psytrance, goa.trance'],
	[/\s*,\s*(?:&\s*|and\s+)/i, ' & '],
	[/\b(?:Xmas)\b/i, 'Christmas'],
];
const tmSubstitutions = [
	[/^Pop\s*(?:[\-\−\—\–]\s*)?Rock$/i, 'pop.rock'],
	[/^Rock\s*(?:[\-\−\—\–]\s*)?Pop$/i, 'pop.rock'],
	[/^Rock\s+n\s+Roll$/i, 'rock.and.roll'],
	['AOR', 'album.oriented.rock'],
	[/^(?:Prog)\.?\s*(?:Rock)$/i, 'progressive.rock'],
	[/^Synth[\s\-\−\—\–]+Pop$/i, 'synthpop'],
	//[/^World(?:\s+and\s+|\s*[&+]\s*)Country$/i, 'world.music', 'country'],
	['World', 'world.music'],
	[/^(?:Singer(?:\s+and\s+|\s*[&+]\s*))?Songwriter$/i, 'singer.songwriter'],
	[/^(?:R\s*(?:[\'\’\`][Nn](?:\s+|[\'\’\`]\s*)|&\s*)B|RnB)$/i, 'rhythm.and.blues'],
	[/^(?:Alternat(?:iv)?e)\s+(?:R\s*(?:[\'\’\`][Nn](?:\s+|[\'\’\`]\s*)|&\s*)B|RnB)$/i, 'alternative.rhythm.and.blues'],
	[/\b(?:Soundtracks?)$/i, 'score'],
	['Electro', 'electronic'],
	//['Metal', 'heavy.metal'],
	['NonFiction', 'non.fiction'],
	['Rap', 'hip.hop'],
	['HipHop', 'hip.hop'],
	['NeoSoul', 'neo.soul'],
	['NuJazz', 'nu.jazz'],
	['DreamPop', 'dream.pop'],
	['IndiePop', 'indie.pop'],
	['IndieRock', 'indie.rock'],
	['FolkMetal', 'folk.metal'],
	['deathmetal', 'death.metal'],
	['melodic.deathmetal', 'melodic.death.metal'],
	['dubtechno', 'dub.techno'],
	['mdm', 'melodic.death.metal'],
	['surfrock', 'surf.rock'],
	[/^J[\s\-]Pop$/i, 'jpop'],
	[/^K[\s\-]Pop$/i, 'kpop'],
	[/^J[\s\-]Rock$/i, 'jrock'],
	['Hardcore', 'hardcore.punk'],
	['Game', 'video.game'],
	['Game Music', 'video.game'],
	['game.music', 'video.game'],
	[/^(?:Neo[\s\-\−\—\–]+Classical)$/i, 'neoclassical'],
	[/^(?:Bluesy[\s\-\−\—\–]+Rock)$/i, 'blues.rock'],
	[/^(?:Be[\s\-\−\—\–]+Bop)$/i, 'bebop'],
	[/^(?:Chill)[\s\-\−\—\–]+(?:Out)$/i, 'chillout'],
	[/^(?:Atmospheric)[\s\-\−\—\–]+(?:Black)$/i, 'atmospheric.black.metal'],
	['GoaTrance', 'goa.trance'],
	[/^Female\s+Vocal\w*$/i, 'female.vocalist'],
	['Contemporary R&B', 'contemporary.rhythm.and.blues'],
	[/^(?:Gothic[\-\s]Rock)$/i, 'rock, gothic'],
	['Free Jazz & Avant-Garde', 'free.jazz', 'avant.garde'],
	['electro synthwave new wave pop', 'electro', 'synthwave', 'new wave', 'pop'], // BC - STUB Rec.
	['Dans', 'dance'], // RO → EN
	['Alternativa', 'alternative'],
	['Field recordings', 'field.recording'],
	['field.recordings', 'field.recording'],
	['Experimental music', 'experimental'],
	['experimental.music', 'experimental'],
	// Country aliases
	['Canada', 'canadian'],
	['Australia', 'australian'],
	['New Zealand', 'new.zealander'],
	['Japan', 'japanese'],
	['JP', 'japanese'],
	['Taiwan', 'thai'],
	['China', 'chinese'],
	['Singapore', 'singaporean'],
	[/^(?:Russia|Russian\s+Federation|Россия|USSR|СССР)$/i, 'russian'],
	['Turkey', 'turkish'],
	['Israel', 'israeli'],
	['France', 'french'],
	['Germany', 'german'],
	['Spain', 'spanish'],
	['Italy', 'italian'],
	['Sweden', 'swedish'],
	['Norway', 'norwegian'],
	['Finland', 'finnish'],
	['Greece', 'greek'],
	[/^(?:Netherlands|Holland)$/i, 'dutch'],
	['Belgium', 'belgian'],
	['Luxembourg', 'luxembourgish'],
	['Denmark', 'danish'],
	['Switzerland', 'swiss'],
	['Austria', 'austrian'],
	['Portugal', 'portugese'],
	['Ireland', 'irish'],
	['Scotland', 'scotish'],
	['Iceland', 'icelandic'],
	[/^(?:Czech\s+Republic|Czechia)$/i, 'czech'],
	[/^(?:Slovak\s+Republic|Slovakia)$/i, 'slovak'],
	['Hungary', 'hungarian'],
	['Poland', 'polish'],
	['Estonia', 'estonian'],
	['Latvia', 'latvian'],
	['Lithuania', 'lithuanian'],
	['Moldova', 'moldovan'],
	['Armenia', 'armenian'],
	['Belarus', 'belarussian'],
	['Ukraine', 'ukrainian'],
	['Yugoslavia', 'yugoslav'],
	['Serbia', 'serbian'],
	['Slovenia', 'slovenian'],
	['Croatia', 'croatian'],
	['Macedonia', 'macedonian'],
	['Montenegro', 'montenegrin'],
	['Romania', 'romanian'],
	['Malta', 'maltese'],
	['Brazil', 'brazilian'],
	['Mexico', 'mexican'],
	['Argentina', 'argentinean'],
	['Jamaica', 'jamaican'],
	['Nigeria', 'nigerian'],
	// Books
	['Beletrie', 'fiction'],
	['Satira', 'satire'],
	['Komiks', 'comics'],
	['Komix', 'comics'],
	// Removals
	['Indie Rock/Rock Pop'],
	['Unknown'],
	['Other'],
	['New'],
	['Ostatni'],
	['Knihy'],
	['Audioknihy'],
	['dsbm'],
	[/^(?:Audio\s*kniha|Audio\s*Book)$/i],
].concat(tmExcludedCountries.map(it => [it]));
const tmSplits = [
	['Alternative', 'Indie'],
	['Rock', 'Pop'],
	['Soul', 'Funk'],
	['Ska', 'Rocksteady'],
	['Jazz Fusion', 'Jazz Rock'],
	['Rock', 'Pop'],
	['Jazz', 'Funk'],
];
const tmAdditions = [
	[/^(?:(?:(?:Be|Post|Neo)[\s\-\−\—\–]*)?Bop|Modal|Fusion|Free[\s\-\−\—\–]+Improvisation|Modern\s+Creative|Jazz[\s\-\−\—\–]+Fusion|Big[\s\-\−\—\–]*Band)$/i, 'jazz'],
	[/^(?:(?:Free|Cool|Avant[\s\-\−\—\–]*Garde|Contemporary|Instrumental|Crossover|Modal|Mainstream|Modern|Soul|Smooth|Piano|Afro[\s\-\−\—\–]*Cuban)[\s\-\−\—\–]+Jazz)$/i, 'jazz'],
	[/^(?:Opera)$/i, 'classical'],
	[/\b(?:Chamber[\s\-\−\—\–]+Music)\b/i, 'classical'],
	[/\b(?:Orchestral[\s\-\−\—\–]+Music)\b/i, 'classical'],
	[/^(?:Symphony)$/i, 'classical'],
	[/^(?:Sacred\s+Vocal)\b/i, 'classical'],
	[/\b(?:Soundtracks?|Films?|Games?|Video|Series?|Theatre|Musical)\b/i, 'score'],
];
const tmRemovals = [
	'delete.this.tag',
	'live',
	'vinyl',
	'flac',
	'party.music',
];
const tmPostSubstitutions = [
    [/^\.+|\.+$/g, ''],
    [/^(?:Alt\.)\s*(\w+)$/i, 'Alternative $1'],
    [/\b(?:Alt\.)(?=\s+)/i, 'Alternative'],
    [/^[3-9]0s$/i, '19$0'],
    [/^[0-2]0s$/i, '20$0'],
    [/\b(Psy)[\s\-\−\—\–]+(Trance|Core|Chill)\b/i, '$1$2'],
    [/\s*(?:[\'\’\`][Nn](?:\s+|[\'\’\`]\s*)|[\&\+]\s*)/, ' and '],
    [/[\s\-\−\—\–\_\.\,\~]+/g, '.'],
    [/[^\w\.]+/g, ''],
    [/^(?:singer\.and\.songwriter)$/i, 'singer.songwriter'],
];

class TagManager extends Array {
	constructor(...tags) {
		super();
		if (tags.length > 0) this.add(...tags);
	}

	add(...tags) {
		let added = 0;
		for (let tag of tags.map(tag => tag.trim()).filter(Boolean)) {
			if (typeof tag == 'string') tag = qbGenreToEnglish(tag); else continue;
			for (var k of tmPresubstitutions) if (k[0].test(tag)) tag = tag.replace(...k);
			tag.split(/(?:[\,\/\;\>\|]|\r?\n)+/).map(tag => tag.trim().toASCII()
					.replace(/\s*(?:\(.*?\)|\[.*?\]|\{.*?\})/g, '').replace(/\s+/g, ' ')).forEach(function(tag) {
				//tag = qbGenreToEnglish(tag);
				if (tag.length <= 0 || tag == '?') return;
				const test = obj => typeof obj == 'string' && tag.toLowerCase() == obj.toLowerCase()
					|| obj instanceof RegExp && obj.test(tag);
				for (k of tmSubstitutions) if (test(k[0])) {
					if (k.length > 1) added += this.add(...k.slice(1));
					return;
				}
				for (k of tmAdditions) if (test(k[0])) added += this.add(...k.slice(1));
				for (k of tmSplits) if ([[0, 1], [1, 0]].some(n =>
						new RegExp('^' + k[n[0]] + '(?:\\s+and\\s+|\\s*[&+]\\s*)' + k[n[1]] + '$', 'i').test(tag))) {
					added += this.add(k[0], k[1]);
					return;
				}
				tag = tmPostSubstitutions.reduce((tag, subst) => tag.replace(...subst), tag).toLowerCase();
				if (tag.length < 2 || tmRemovals.some(test) || this.includes(tag)) return;
				this.push(tag);
				++added;
			}.bind(this));
		}
		return added;
	}
	toString() { return this.join(', ') }
	toStringSorted() { return Array.from(this).sort().join(', ') }
};

//////////////////////////////////////////////////////////////////////////
////////////////////////////// SAFE PADDING //////////////////////////////
//////////////////////////////////////////////////////////////////////////