feat: implemented Monochrome downloading

This commit is contained in:
2026-03-08 20:14:30 +01:00
parent 6dff83ac61
commit f4dee850f3
8 changed files with 1157 additions and 9 deletions

View File

@@ -0,0 +1,269 @@
"""
Convert Spotify URLs to Monochrome/Tidal track IDs.
Usage:
python spotify_to_ids.py <spotify_url> [<spotify_url>...] [-v] [--threshold N]
Supports track, album, and playlist URLs. Outputs one track ID per line (stdout).
Examples:
python spotify_to_ids.py https://open.spotify.com/track/4PTG3Z6ehGkBFwjybzWkR8
python spotify_to_ids.py -v https://open.spotify.com/album/4aawyAB9vmqN3uQ7FjRGTy
python spotify_to_ids.py https://open.spotify.com/playlist/xxx | xargs -I{} python download.py {}
"""
import argparse
import json
import os
import random
import re
import sys
import time
import urllib.error
import urllib.parse
import urllib.request
from monochrome import fetch, fetch_json, discover_instances
# --- Spotify URL parsing ---
def parse_spotify_url(url):
"""Parse a Spotify URL into (type, id). Returns (None, None) on failure."""
match = re.match(
r'https?://open\.spotify\.com/(?:intl-\w+/)?(track|album|playlist)/([a-zA-Z0-9]+)',
url.strip()
)
if not match:
return None, None
return match.group(1), match.group(2)
# --- Spotify metadata extraction ---
def fetch_spotify_embed(sp_type, sp_id):
"""Fetch Spotify embed page and extract __NEXT_DATA__ JSON."""
url = f"https://open.spotify.com/embed/{sp_type}/{sp_id}"
try:
with fetch(url, timeout=15, use_ssl_ctx=False) as resp:
html = resp.read().decode()
except Exception as e:
print(f"[!] Failed to fetch Spotify embed: {e}", file=sys.stderr)
return None
match = re.search(
r'<script\s+id="__NEXT_DATA__"\s+type="application/json">\s*({.+?})\s*</script>',
html, re.DOTALL
)
if match:
try:
return json.loads(match.group(1))
except json.JSONDecodeError:
pass
print("[!] __NEXT_DATA__ not found in embed page", file=sys.stderr)
return None
def fetch_spotify_oembed(sp_type, sp_id):
"""Fallback: use oEmbed API to get at least a title string."""
spotify_url = f"https://open.spotify.com/{sp_type}/{sp_id}"
oembed_url = f"https://open.spotify.com/oembed?url={urllib.parse.quote(spotify_url, safe='')}"
try:
data = fetch_json(oembed_url, timeout=15, use_ssl_ctx=False)
return data.get("title", "")
except Exception:
return None
def extract_collection_name(embed_data, sp_type):
"""Extract album/playlist name from __NEXT_DATA__ JSON. Returns None for single tracks."""
if not embed_data or sp_type == "track":
return None
try:
entity = embed_data["props"]["pageProps"]["state"]["data"]["entity"]
return entity.get("name") or entity.get("title")
except (KeyError, TypeError, IndexError):
return None
def extract_tracks(embed_data, sp_type, sp_id):
"""Extract list of {title, artist} dicts from __NEXT_DATA__ JSON.
Falls back to oEmbed if embed data is missing or malformed."""
if embed_data:
try:
entity = embed_data["props"]["pageProps"]["state"]["data"]["entity"]
if sp_type == "track":
title = entity.get("name") or entity.get("title", "")
artists = entity.get("artists")
if artists and isinstance(artists, list):
artist = artists[0].get("name", "")
else:
artist = entity.get("subtitle", "")
if title:
return [{"title": title, "artist": artist}]
elif sp_type in ("album", "playlist"):
track_list = entity.get("trackList", [])
if track_list:
tracks = []
for t in track_list:
title = t.get("title", "")
artist = t.get("subtitle", "")
if title:
tracks.append({"title": title, "artist": artist})
if tracks:
return tracks
except (KeyError, TypeError, IndexError):
pass
# Fallback: oEmbed (single tracks only, limited data)
if sp_type == "track":
oembed_title = fetch_spotify_oembed(sp_type, sp_id)
if oembed_title:
print(f'[*] Using oEmbed fallback: "{oembed_title}"', file=sys.stderr)
return [{"title": oembed_title, "artist": ""}]
return []
# --- Fuzzy matching ---
def normalize(text):
"""Normalize text for comparison: lowercase, strip feat/remaster/punctuation."""
text = text.lower()
text = re.sub(r'\(feat\.?[^)]*\)', '', text)
text = re.sub(r'\(ft\.?[^)]*\)', '', text)
text = re.sub(r'\(remaster(ed)?\)', '', text, flags=re.IGNORECASE)
text = re.sub(r'[^\w\s]', ' ', text)
return ' '.join(text.split())
def similarity(a, b):
"""Token overlap ratio (Jaccard index)."""
tokens_a = set(normalize(a).split())
tokens_b = set(normalize(b).split())
if not tokens_a or not tokens_b:
return 0.0
return len(tokens_a & tokens_b) / len(tokens_a | tokens_b)
def find_best_match(results, target_title, target_artist, threshold=0.4):
"""Find the best matching track from Monochrome search results."""
best = None
best_score = 0
for r in results:
r_title = r.get("title", "")
r_artist_obj = r.get("artist", {})
if isinstance(r_artist_obj, dict):
r_artist = r_artist_obj.get("name", "")
else:
r_artist = str(r_artist_obj)
title_sim = similarity(target_title, r_title)
artist_sim = similarity(target_artist, r_artist) if target_artist else 0.5
score = 0.6 * title_sim + 0.4 * artist_sim
if score > best_score:
best_score = score
best = r
if best and best_score >= threshold:
return best, best_score
return None, 0
# --- Monochrome search ---
def search_monochrome(instances, query, log=None):
"""Search Monochrome instances for tracks matching a query string."""
if log is None:
log = print
shuffled = list(instances)
random.shuffle(shuffled)
encoded = urllib.parse.quote(query)
for base in shuffled:
url = f"{base}/search/?s={encoded}"
try:
data = fetch_json(url, timeout=15)
if isinstance(data, dict) and "data" in data and "version" in data:
data = data["data"]
if isinstance(data, dict) and "items" in data:
return data["items"]
if isinstance(data, list):
return data
if isinstance(data, dict) and "tracks" in data:
return data["tracks"]
except Exception:
continue
return []
# --- Main ---
def main():
parser = argparse.ArgumentParser(
description="Convert Spotify URLs to Monochrome/Tidal track IDs"
)
parser.add_argument("urls", nargs="+", help="Spotify track/album/playlist URLs")
parser.add_argument("-v", "--verbose", action="store_true",
help="Show matched title/artist alongside IDs")
parser.add_argument("--threshold", type=float, default=0.4,
help="Minimum match score 0-1 (default: 0.4)")
args = parser.parse_args()
instances = discover_instances()
found = 0
missed = 0
for url in args.urls:
sp_type, sp_id = parse_spotify_url(url)
if not sp_type:
print(f"[!] Invalid Spotify URL: {url}", file=sys.stderr)
continue
print(f"[*] Fetching Spotify {sp_type}: {sp_id}", file=sys.stderr)
embed_data = fetch_spotify_embed(sp_type, sp_id)
tracks = extract_tracks(embed_data, sp_type, sp_id)
if not tracks:
print(f"[!] Could not extract tracks from {url}", file=sys.stderr)
continue
print(f"[*] Found {len(tracks)} track(s) on Spotify", file=sys.stderr)
for i, track in enumerate(tracks):
query = f"{track['artist']} {track['title']}".strip()
print(f"[*] Searching: {query}", file=sys.stderr)
results = search_monochrome(instances, query)
match, score = find_best_match(results, track["title"], track["artist"], args.threshold)
if match:
tid = match.get("id")
found += 1
if args.verbose:
m_title = match.get("title", "?")
m_artist_obj = match.get("artist", {})
m_artist = m_artist_obj.get("name", "?") if isinstance(m_artist_obj, dict) else str(m_artist_obj)
print(f"{tid}\t{m_artist} - {m_title}\t(score: {score:.2f})")
else:
print(tid)
else:
missed += 1
print(f"[!] No match: {track['artist']} - {track['title']}", file=sys.stderr)
# Rate limit delay between searches (skip after last track)
if i < len(tracks) - 1:
time.sleep(0.5)
print(f"\n[*] Done: {found} matched, {missed} missed", file=sys.stderr)
if __name__ == "__main__":
# Allow running as standalone script
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
main()