Improve _is_classical(): genre keywords + composer list as primary signals
Previously any albumartist≠track_artist triggered classical naming, causing false positives for jazz compilations, folk samplers, pop albums with multiple featured artists. Now requires explicit confirmation: - Genre contains a classical keyword (classical, baroque, opera, symphon …) - OR track_artist name contains a known composer (Bach, Mozart, Beethoven …) Pure name-inequality alone no longer triggers the Performer-Composer-Work schema. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
031e595ff7
commit
ec8a37f313
1 changed files with 36 additions and 5 deletions
41
executor.py
41
executor.py
|
|
@ -42,21 +42,52 @@ def _safe_name(s: str) -> str:
|
|||
return re.sub(r"\s+", "_", s).strip("._-")
|
||||
|
||||
|
||||
_CLASSICAL_GENRE_KEYWORDS = {
|
||||
"classical", "klassik", "baroque", "barock", "romantic", "romantik",
|
||||
"opera", "oper", "operetta", "operette", "chamber", "kammermusik",
|
||||
"symphon", "concerto", "oratorio", "sacred", "kirchenmusik",
|
||||
"renaissance", "medieval", "contemporary classical",
|
||||
}
|
||||
|
||||
_CLASSICAL_COMPOSER_KEYWORDS = {
|
||||
# Bekannte Komponisten als Signal (Nachname reicht)
|
||||
"bach", "beethoven", "mozart", "handel", "haydn", "schubert", "brahms",
|
||||
"chopin", "liszt", "schumann", "wagner", "verdi", "puccini", "vivaldi",
|
||||
"telemann", "buxtehude", "monteverdi", "palestrina", "purcell",
|
||||
"mahler", "bruckner", "dvorak", "tchaikovsky", "tschaikowski",
|
||||
"debussy", "ravel", "satie", "strauss", "sibelius", "grieg",
|
||||
}
|
||||
|
||||
|
||||
def _is_classical(albumartist: str, track_artist: str, genre: str) -> bool:
|
||||
"""
|
||||
Classical schema applies when performer (albumartist) ≠ composer (track_artist),
|
||||
which covers both 'real' classical music and jazz-on-classical-themes albums.
|
||||
Genre keyword matching is used as additional signal but not required.
|
||||
Klassik-Schema (Performer_-_Komponist_-_Werk) wird angewendet wenn:
|
||||
1. Genre explizit klassisch ist, ODER
|
||||
2. track_artist ist ein bekannter Komponist (und ≠ albumartist), ODER
|
||||
3. albumartist ≠ track_artist UND beide sind bekannte Komponistennamen.
|
||||
Reine Performer≠Komponist-Heuristik ohne Genre-Bestätigung ist abgeschaltet
|
||||
(zu viele Falschpositive bei Samplern, Jazz, Volksmusik).
|
||||
"""
|
||||
aa = (albumartist or "").casefold().strip()
|
||||
ta = (track_artist or "").casefold().strip()
|
||||
g = (genre or "").casefold().strip()
|
||||
|
||||
if not aa or aa in ("various artists", "unknown artist", "unknown"):
|
||||
return False
|
||||
if not ta or ta in ("unknown artist", "unknown"):
|
||||
return False # placeholder, not a real composer
|
||||
return False
|
||||
if aa == ta:
|
||||
return False
|
||||
return True # performer ≠ composer → classical naming
|
||||
|
||||
# Primäres Signal: Genre-Keyword
|
||||
if any(kw in g for kw in _CLASSICAL_GENRE_KEYWORDS):
|
||||
return True
|
||||
|
||||
# Sekundäres Signal: track_artist enthält bekannten Komponistennamen
|
||||
if any(kw in ta for kw in _CLASSICAL_COMPOSER_KEYWORDS):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def _proposed_filename(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue