270 lines
9.0 KiB
Python
270 lines
9.0 KiB
Python
"""
|
|
Convert Spotify URLs to Monochrome/Tidal track IDs.
|
|
|
|
Usage:
|
|
python spotify_to_ids.py <spotify_url> [<spotify_url>...] [-v] [--threshold N]
|
|
|
|
Supports track, album, and playlist URLs. Outputs one track ID per line (stdout).
|
|
|
|
Examples:
|
|
python spotify_to_ids.py https://open.spotify.com/track/4PTG3Z6ehGkBFwjybzWkR8
|
|
python spotify_to_ids.py -v https://open.spotify.com/album/4aawyAB9vmqN3uQ7FjRGTy
|
|
python spotify_to_ids.py https://open.spotify.com/playlist/xxx | xargs -I{} python download.py {}
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import os
|
|
import random
|
|
import re
|
|
import sys
|
|
import time
|
|
import urllib.error
|
|
import urllib.parse
|
|
import urllib.request
|
|
|
|
from monochrome import fetch, fetch_json, discover_instances
|
|
|
|
|
|
# --- Spotify URL parsing ---
|
|
|
|
def parse_spotify_url(url):
|
|
"""Parse a Spotify URL into (type, id). Returns (None, None) on failure."""
|
|
match = re.match(
|
|
r'https?://open\.spotify\.com/(?:intl-\w+/)?(track|album|playlist)/([a-zA-Z0-9]+)',
|
|
url.strip()
|
|
)
|
|
if not match:
|
|
return None, None
|
|
return match.group(1), match.group(2)
|
|
|
|
|
|
# --- Spotify metadata extraction ---
|
|
|
|
def fetch_spotify_embed(sp_type, sp_id):
|
|
"""Fetch Spotify embed page and extract __NEXT_DATA__ JSON."""
|
|
url = f"https://open.spotify.com/embed/{sp_type}/{sp_id}"
|
|
try:
|
|
with fetch(url, timeout=15, use_ssl_ctx=False) as resp:
|
|
html = resp.read().decode()
|
|
except Exception as e:
|
|
print(f"[!] Failed to fetch Spotify embed: {e}", file=sys.stderr)
|
|
return None
|
|
|
|
match = re.search(
|
|
r'<script\s+id="__NEXT_DATA__"\s+type="application/json">\s*({.+?})\s*</script>',
|
|
html, re.DOTALL
|
|
)
|
|
if match:
|
|
try:
|
|
return json.loads(match.group(1))
|
|
except json.JSONDecodeError:
|
|
pass
|
|
|
|
print("[!] __NEXT_DATA__ not found in embed page", file=sys.stderr)
|
|
return None
|
|
|
|
|
|
def fetch_spotify_oembed(sp_type, sp_id):
|
|
"""Fallback: use oEmbed API to get at least a title string."""
|
|
spotify_url = f"https://open.spotify.com/{sp_type}/{sp_id}"
|
|
oembed_url = f"https://open.spotify.com/oembed?url={urllib.parse.quote(spotify_url, safe='')}"
|
|
try:
|
|
data = fetch_json(oembed_url, timeout=15, use_ssl_ctx=False)
|
|
return data.get("title", "")
|
|
except Exception:
|
|
return None
|
|
|
|
|
|
def extract_collection_name(embed_data, sp_type):
|
|
"""Extract album/playlist name from __NEXT_DATA__ JSON. Returns None for single tracks."""
|
|
if not embed_data or sp_type == "track":
|
|
return None
|
|
try:
|
|
entity = embed_data["props"]["pageProps"]["state"]["data"]["entity"]
|
|
return entity.get("name") or entity.get("title")
|
|
except (KeyError, TypeError, IndexError):
|
|
return None
|
|
|
|
|
|
def extract_tracks(embed_data, sp_type, sp_id):
|
|
"""Extract list of {title, artist} dicts from __NEXT_DATA__ JSON.
|
|
Falls back to oEmbed if embed data is missing or malformed."""
|
|
if embed_data:
|
|
try:
|
|
entity = embed_data["props"]["pageProps"]["state"]["data"]["entity"]
|
|
|
|
if sp_type == "track":
|
|
title = entity.get("name") or entity.get("title", "")
|
|
artists = entity.get("artists")
|
|
if artists and isinstance(artists, list):
|
|
artist = artists[0].get("name", "")
|
|
else:
|
|
artist = entity.get("subtitle", "")
|
|
if title:
|
|
return [{"title": title, "artist": artist}]
|
|
|
|
elif sp_type in ("album", "playlist"):
|
|
track_list = entity.get("trackList", [])
|
|
if track_list:
|
|
tracks = []
|
|
for t in track_list:
|
|
title = t.get("title", "")
|
|
artist = t.get("subtitle", "")
|
|
if title:
|
|
tracks.append({"title": title, "artist": artist})
|
|
if tracks:
|
|
return tracks
|
|
except (KeyError, TypeError, IndexError):
|
|
pass
|
|
|
|
# Fallback: oEmbed (single tracks only, limited data)
|
|
if sp_type == "track":
|
|
oembed_title = fetch_spotify_oembed(sp_type, sp_id)
|
|
if oembed_title:
|
|
print(f'[*] Using oEmbed fallback: "{oembed_title}"', file=sys.stderr)
|
|
return [{"title": oembed_title, "artist": ""}]
|
|
|
|
return []
|
|
|
|
|
|
# --- Fuzzy matching ---
|
|
|
|
def normalize(text):
|
|
"""Normalize text for comparison: lowercase, strip feat/remaster/punctuation."""
|
|
text = text.lower()
|
|
text = re.sub(r'\(feat\.?[^)]*\)', '', text)
|
|
text = re.sub(r'\(ft\.?[^)]*\)', '', text)
|
|
text = re.sub(r'\(remaster(ed)?\)', '', text, flags=re.IGNORECASE)
|
|
text = re.sub(r'[^\w\s]', ' ', text)
|
|
return ' '.join(text.split())
|
|
|
|
|
|
def similarity(a, b):
|
|
"""Token overlap ratio (Jaccard index)."""
|
|
tokens_a = set(normalize(a).split())
|
|
tokens_b = set(normalize(b).split())
|
|
if not tokens_a or not tokens_b:
|
|
return 0.0
|
|
return len(tokens_a & tokens_b) / len(tokens_a | tokens_b)
|
|
|
|
|
|
def find_best_match(results, target_title, target_artist, threshold=0.4):
|
|
"""Find the best matching track from Monochrome search results."""
|
|
best = None
|
|
best_score = 0
|
|
|
|
for r in results:
|
|
r_title = r.get("title", "")
|
|
r_artist_obj = r.get("artist", {})
|
|
if isinstance(r_artist_obj, dict):
|
|
r_artist = r_artist_obj.get("name", "")
|
|
else:
|
|
r_artist = str(r_artist_obj)
|
|
|
|
title_sim = similarity(target_title, r_title)
|
|
artist_sim = similarity(target_artist, r_artist) if target_artist else 0.5
|
|
score = 0.6 * title_sim + 0.4 * artist_sim
|
|
|
|
if score > best_score:
|
|
best_score = score
|
|
best = r
|
|
|
|
if best and best_score >= threshold:
|
|
return best, best_score
|
|
return None, 0
|
|
|
|
|
|
# --- Monochrome search ---
|
|
|
|
def search_monochrome(instances, query, log=None):
|
|
"""Search Monochrome instances for tracks matching a query string."""
|
|
if log is None:
|
|
log = print
|
|
shuffled = list(instances)
|
|
random.shuffle(shuffled)
|
|
encoded = urllib.parse.quote(query)
|
|
|
|
for base in shuffled:
|
|
url = f"{base}/search/?s={encoded}"
|
|
try:
|
|
data = fetch_json(url, timeout=15)
|
|
if isinstance(data, dict) and "data" in data and "version" in data:
|
|
data = data["data"]
|
|
if isinstance(data, dict) and "items" in data:
|
|
return data["items"]
|
|
if isinstance(data, list):
|
|
return data
|
|
if isinstance(data, dict) and "tracks" in data:
|
|
return data["tracks"]
|
|
except Exception:
|
|
continue
|
|
return []
|
|
|
|
|
|
# --- Main ---
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description="Convert Spotify URLs to Monochrome/Tidal track IDs"
|
|
)
|
|
parser.add_argument("urls", nargs="+", help="Spotify track/album/playlist URLs")
|
|
parser.add_argument("-v", "--verbose", action="store_true",
|
|
help="Show matched title/artist alongside IDs")
|
|
parser.add_argument("--threshold", type=float, default=0.4,
|
|
help="Minimum match score 0-1 (default: 0.4)")
|
|
args = parser.parse_args()
|
|
|
|
instances = discover_instances()
|
|
found = 0
|
|
missed = 0
|
|
|
|
for url in args.urls:
|
|
sp_type, sp_id = parse_spotify_url(url)
|
|
if not sp_type:
|
|
print(f"[!] Invalid Spotify URL: {url}", file=sys.stderr)
|
|
continue
|
|
|
|
print(f"[*] Fetching Spotify {sp_type}: {sp_id}", file=sys.stderr)
|
|
embed_data = fetch_spotify_embed(sp_type, sp_id)
|
|
tracks = extract_tracks(embed_data, sp_type, sp_id)
|
|
|
|
if not tracks:
|
|
print(f"[!] Could not extract tracks from {url}", file=sys.stderr)
|
|
continue
|
|
|
|
print(f"[*] Found {len(tracks)} track(s) on Spotify", file=sys.stderr)
|
|
|
|
for i, track in enumerate(tracks):
|
|
query = f"{track['artist']} {track['title']}".strip()
|
|
print(f"[*] Searching: {query}", file=sys.stderr)
|
|
|
|
results = search_monochrome(instances, query)
|
|
match, score = find_best_match(results, track["title"], track["artist"], args.threshold)
|
|
|
|
if match:
|
|
tid = match.get("id")
|
|
found += 1
|
|
if args.verbose:
|
|
m_title = match.get("title", "?")
|
|
m_artist_obj = match.get("artist", {})
|
|
m_artist = m_artist_obj.get("name", "?") if isinstance(m_artist_obj, dict) else str(m_artist_obj)
|
|
print(f"{tid}\t{m_artist} - {m_title}\t(score: {score:.2f})")
|
|
else:
|
|
print(tid)
|
|
else:
|
|
missed += 1
|
|
print(f"[!] No match: {track['artist']} - {track['title']}", file=sys.stderr)
|
|
|
|
# Rate limit delay between searches (skip after last track)
|
|
if i < len(tracks) - 1:
|
|
time.sleep(0.5)
|
|
|
|
print(f"\n[*] Done: {found} matched, {missed} missed", file=sys.stderr)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# Allow running as standalone script
|
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
main()
|