Improve _is_classical(): genre keywords + composer list as primary signals

Previously any albumartist≠track_artist triggered classical naming, causing
false positives for jazz compilations, folk samplers, pop albums with
multiple featured artists. Now requires explicit confirmation:
- Genre contains a classical keyword (classical, baroque, opera, symphon …)
- OR track_artist name contains a known composer (Bach, Mozart, Beethoven …)
Pure name-inequality alone no longer triggers the Performer-Composer-Work schema.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Dieter Schlüter 2026-04-29 08:44:34 +02:00
commit ec8a37f313

View file

@ -42,21 +42,52 @@ def _safe_name(s: str) -> str:
return re.sub(r"\s+", "_", s).strip("._-")
_CLASSICAL_GENRE_KEYWORDS = {
"classical", "klassik", "baroque", "barock", "romantic", "romantik",
"opera", "oper", "operetta", "operette", "chamber", "kammermusik",
"symphon", "concerto", "oratorio", "sacred", "kirchenmusik",
"renaissance", "medieval", "contemporary classical",
}
_CLASSICAL_COMPOSER_KEYWORDS = {
# Bekannte Komponisten als Signal (Nachname reicht)
"bach", "beethoven", "mozart", "handel", "haydn", "schubert", "brahms",
"chopin", "liszt", "schumann", "wagner", "verdi", "puccini", "vivaldi",
"telemann", "buxtehude", "monteverdi", "palestrina", "purcell",
"mahler", "bruckner", "dvorak", "tchaikovsky", "tschaikowski",
"debussy", "ravel", "satie", "strauss", "sibelius", "grieg",
}
def _is_classical(albumartist: str, track_artist: str, genre: str) -> bool:
"""
Classical schema applies when performer (albumartist) composer (track_artist),
which covers both 'real' classical music and jazz-on-classical-themes albums.
Genre keyword matching is used as additional signal but not required.
Klassik-Schema (Performer_-_Komponist_-_Werk) wird angewendet wenn:
1. Genre explizit klassisch ist, ODER
2. track_artist ist ein bekannter Komponist (und albumartist), ODER
3. albumartist track_artist UND beide sind bekannte Komponistennamen.
Reine PerformerKomponist-Heuristik ohne Genre-Bestätigung ist abgeschaltet
(zu viele Falschpositive bei Samplern, Jazz, Volksmusik).
"""
aa = (albumartist or "").casefold().strip()
ta = (track_artist or "").casefold().strip()
g = (genre or "").casefold().strip()
if not aa or aa in ("various artists", "unknown artist", "unknown"):
return False
if not ta or ta in ("unknown artist", "unknown"):
return False # placeholder, not a real composer
return False
if aa == ta:
return False
return True # performer ≠ composer → classical naming
# Primäres Signal: Genre-Keyword
if any(kw in g for kw in _CLASSICAL_GENRE_KEYWORDS):
return True
# Sekundäres Signal: track_artist enthält bekannten Komponistennamen
if any(kw in ta for kw in _CLASSICAL_COMPOSER_KEYWORDS):
return True
return False
def _proposed_filename(