""" Convert Spotify URLs to Monochrome/Tidal track IDs. Usage: python spotify_to_ids.py [...] [-v] [--threshold N] Supports track, album, and playlist URLs. Outputs one track ID per line (stdout). Examples: python spotify_to_ids.py https://open.spotify.com/track/4PTG3Z6ehGkBFwjybzWkR8 python spotify_to_ids.py -v https://open.spotify.com/album/4aawyAB9vmqN3uQ7FjRGTy python spotify_to_ids.py https://open.spotify.com/playlist/xxx | xargs -I{} python download.py {} """ import argparse import json import os import random import re import sys import time import urllib.error import urllib.parse import urllib.request from monochrome import fetch, fetch_json, discover_instances # --- Spotify URL parsing --- def parse_spotify_url(url): """Parse a Spotify URL into (type, id). Returns (None, None) on failure.""" match = re.match( r'https?://open\.spotify\.com/(?:intl-\w+/)?(track|album|playlist)/([a-zA-Z0-9]+)', url.strip() ) if not match: return None, None return match.group(1), match.group(2) # --- Spotify metadata extraction --- def fetch_spotify_embed(sp_type, sp_id): """Fetch Spotify embed page and extract __NEXT_DATA__ JSON.""" url = f"https://open.spotify.com/embed/{sp_type}/{sp_id}" try: with fetch(url, timeout=15, use_ssl_ctx=False) as resp: html = resp.read().decode() except Exception as e: print(f"[!] Failed to fetch Spotify embed: {e}", file=sys.stderr) return None match = re.search( r'\s*({.+?})\s*', html, re.DOTALL ) if match: try: return json.loads(match.group(1)) except json.JSONDecodeError: pass print("[!] __NEXT_DATA__ not found in embed page", file=sys.stderr) return None def fetch_spotify_oembed(sp_type, sp_id): """Fallback: use oEmbed API to get at least a title string.""" spotify_url = f"https://open.spotify.com/{sp_type}/{sp_id}" oembed_url = f"https://open.spotify.com/oembed?url={urllib.parse.quote(spotify_url, safe='')}" try: data = fetch_json(oembed_url, timeout=15, use_ssl_ctx=False) return data.get("title", "") except Exception: return None def extract_collection_name(embed_data, sp_type): """Extract album/playlist name from __NEXT_DATA__ JSON. Returns None for single tracks.""" if not embed_data or sp_type == "track": return None try: entity = embed_data["props"]["pageProps"]["state"]["data"]["entity"] return entity.get("name") or entity.get("title") except (KeyError, TypeError, IndexError): return None def extract_tracks(embed_data, sp_type, sp_id): """Extract list of {title, artist} dicts from __NEXT_DATA__ JSON. Falls back to oEmbed if embed data is missing or malformed.""" if embed_data: try: entity = embed_data["props"]["pageProps"]["state"]["data"]["entity"] if sp_type == "track": title = entity.get("name") or entity.get("title", "") artists = entity.get("artists") if artists and isinstance(artists, list): artist = artists[0].get("name", "") else: artist = entity.get("subtitle", "") if title: return [{"title": title, "artist": artist, "sp_id": sp_id, "duration": entity.get("duration")}] elif sp_type in ("album", "playlist"): track_list = entity.get("trackList", []) if track_list: tracks = [] for t in track_list: title = t.get("title", "") artist = t.get("subtitle", "") # Prefer uri (contains real Spotify track ID) over uid (internal hex UID) track_uid = None uri = t.get("uri", "") if uri.startswith("spotify:track:"): track_uid = uri.split(":")[-1] if not track_uid: track_uid = t.get("uid") if title: tracks.append({"title": title, "artist": artist, "sp_id": track_uid, "duration": t.get("duration")}) if tracks: return tracks except (KeyError, TypeError, IndexError): pass # Fallback: oEmbed (single tracks only, limited data) if sp_type == "track": oembed_title = fetch_spotify_oembed(sp_type, sp_id) if oembed_title: print(f'[*] Using oEmbed fallback: "{oembed_title}"', file=sys.stderr) return [{"title": oembed_title, "artist": "", "sp_id": sp_id, "duration": None}] return [] # --- Fuzzy matching --- def normalize(text): """Normalize text for comparison: lowercase, strip feat/remaster/punctuation.""" text = text.lower() text = re.sub(r'\(feat\.?[^)]*\)', '', text) text = re.sub(r'\(ft\.?[^)]*\)', '', text) text = re.sub(r'\(remaster(ed)?\)', '', text, flags=re.IGNORECASE) text = re.sub(r'[^\w\s]', ' ', text) return ' '.join(text.split()) def similarity(a, b): """Token overlap ratio (Jaccard index).""" tokens_a = set(normalize(a).split()) tokens_b = set(normalize(b).split()) if not tokens_a or not tokens_b: return 0.0 return len(tokens_a & tokens_b) / len(tokens_a | tokens_b) def find_best_match(results, target_title, target_artist, threshold=0.4): """Find the best matching track from Monochrome search results.""" best = None best_score = 0 for r in results: r_title = r.get("title", "") r_artist_obj = r.get("artist", {}) if isinstance(r_artist_obj, dict): r_artist = r_artist_obj.get("name", "") else: r_artist = str(r_artist_obj) title_sim = similarity(target_title, r_title) artist_sim = similarity(target_artist, r_artist) if target_artist else 0.5 score = 0.6 * title_sim + 0.4 * artist_sim if score > best_score: best_score = score best = r if best and best_score >= threshold: return best, best_score return None, 0 # --- Monochrome search --- def search_monochrome(instances, query, log=None): """Search Monochrome instances for tracks matching a query string.""" if log is None: log = print shuffled = list(instances) random.shuffle(shuffled) encoded = urllib.parse.quote(query) for base in shuffled: url = f"{base}/search/?s={encoded}" try: data = fetch_json(url, timeout=15) if isinstance(data, dict) and "data" in data and "version" in data: data = data["data"] if isinstance(data, dict) and "items" in data: return data["items"] if isinstance(data, list): return data if isinstance(data, dict) and "tracks" in data: return data["tracks"] except Exception: continue return [] # --- Main --- def main(): parser = argparse.ArgumentParser( description="Convert Spotify URLs to Monochrome/Tidal track IDs" ) parser.add_argument("urls", nargs="+", help="Spotify track/album/playlist URLs") parser.add_argument("-v", "--verbose", action="store_true", help="Show matched title/artist alongside IDs") parser.add_argument("--threshold", type=float, default=0.4, help="Minimum match score 0-1 (default: 0.4)") args = parser.parse_args() instances = discover_instances() found = 0 missed = 0 for url in args.urls: sp_type, sp_id = parse_spotify_url(url) if not sp_type: print(f"[!] Invalid Spotify URL: {url}", file=sys.stderr) continue print(f"[*] Fetching Spotify {sp_type}: {sp_id}", file=sys.stderr) embed_data = fetch_spotify_embed(sp_type, sp_id) tracks = extract_tracks(embed_data, sp_type, sp_id) if not tracks: print(f"[!] Could not extract tracks from {url}", file=sys.stderr) continue print(f"[*] Found {len(tracks)} track(s) on Spotify", file=sys.stderr) for i, track in enumerate(tracks): query = f"{track['artist']} {track['title']}".strip() print(f"[*] Searching: {query}", file=sys.stderr) results = search_monochrome(instances, query) match, score = find_best_match(results, track["title"], track["artist"], args.threshold) if match: tid = match.get("id") found += 1 if args.verbose: m_title = match.get("title", "?") m_artist_obj = match.get("artist", {}) m_artist = m_artist_obj.get("name", "?") if isinstance(m_artist_obj, dict) else str(m_artist_obj) print(f"{tid}\t{m_artist} - {m_title}\t(score: {score:.2f})") else: print(tid) else: missed += 1 print(f"[!] No match: {track['artist']} - {track['title']}", file=sys.stderr) # Rate limit delay between searches (skip after last track) if i < len(tracks) - 1: time.sleep(0.5) print(f"\n[*] Done: {found} matched, {missed} missed", file=sys.stderr) if __name__ == "__main__": # Allow running as standalone script sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) main()