From 882e24bd12cde1c0b89b2848bf3da5932cb39693 Mon Sep 17 00:00:00 2001 From: OxygenCobalt Date: Sat, 4 Jun 2022 11:15:54 -0600 Subject: [PATCH] music: improve genre parsing Rework genre parsing to properly align with the ID3v2 standard. The ID3v2 genre specification is weirdly complex. Auxio ignored most of that and just parsed out the most common cases for genres (digits and (digits)). For the sake of completeness, rework Auxio's genre parser to handle the weirder cases like RX/CR, multiple values, and escaping, with an implementation roughly based on Mutagen's genre parser. --- .../java/org/oxycblt/auxio/music/Music.kt | 5 +- .../java/org/oxycblt/auxio/music/MusicUtil.kt | 72 ++++++++++++------- .../auxio/music/backend/ExoPlayerBackend.kt | 7 +- .../auxio/music/backend/MediaStoreBackend.kt | 10 +-- 4 files changed, 60 insertions(+), 34 deletions(-) diff --git a/app/src/main/java/org/oxycblt/auxio/music/Music.kt b/app/src/main/java/org/oxycblt/auxio/music/Music.kt index 964161d7d..40ee1aaa9 100644 --- a/app/src/main/java/org/oxycblt/auxio/music/Music.kt +++ b/app/src/main/java/org/oxycblt/auxio/music/Music.kt @@ -253,8 +253,7 @@ data class Genre(override val rawName: String?, override val songs: List) get() = (rawName ?: MediaStore.UNKNOWN_STRING).hashCode().toLong() override val sortName: String? - get() = rawName?.id3v2GenreName + get() = rawName - override fun resolveName(context: Context) = - rawName?.id3v2GenreName ?: context.getString(R.string.def_genre) + override fun resolveName(context: Context) = rawName ?: context.getString(R.string.def_genre) } diff --git a/app/src/main/java/org/oxycblt/auxio/music/MusicUtil.kt b/app/src/main/java/org/oxycblt/auxio/music/MusicUtil.kt index 500ed2709..2c9083929 100644 --- a/app/src/main/java/org/oxycblt/auxio/music/MusicUtil.kt +++ b/app/src/main/java/org/oxycblt/auxio/music/MusicUtil.kt @@ -93,38 +93,62 @@ val String.withoutArticle: String } /** - * Decodes the genre name from an ID3(v2) constant. See [genreConstantTable] for the genre constant - * map that Auxio uses. + * Decodes the genre name from an ID3(v2) constant. See [GENRE_TABLE] for the genre constant map + * that Auxio uses. */ -val String.id3v2GenreName: String - get() { - val newName = - when { - // ID3v1, should just be digits - isDigitsOnly() -> genreConstantTable.getOrNull(toInt()) - // ID3v2.3/ID3v2.4, parse out the parentheses and get the integer - // Any genres formatted as "(CHARS)" will be ignored. - startsWith('(') && endsWith(')') -> { +val String.id3GenreName: String + get() = parseId3v1Genre() ?: parseId3v2Genre() ?: this - // TODO: Technically, the spec for genres is far more complex here. Perhaps we - // should copy mutagen's implementation? - // https://github.com/quodlibet/mutagen/blob/master/mutagen/id3/_frames.py +private fun String.parseId3v1Genre(): String? = + when { + // ID3v1 genres are a plain integer value without formatting, so in that case + // try to index the genre table with such. + isDigitsOnly() -> GENRE_TABLE.getOrNull(toInt()) - substring(1 until lastIndex).toIntOrNull()?.let { - genreConstantTable.getOrNull(it) - } - } - // Current name is fine - else -> null - } - - return newName ?: this + // CR and RX are not technically ID3v1, but are formatted similarly to a plain number. + this == "CR" -> "Cover" + this == "RX" -> "Remix" + else -> null } +private fun String.parseId3v2Genre(): String? { + val groups = GENRE_RE.matchEntire(this)?.groups ?: return null + val genres = mutableListOf() + + // ID3v2 genres are far more complex and require string grokking to properly implement. + // You can read the spec for it here: https://id3.org/id3v2.3.0#TCON + // This implementation in particular is based off Mutagen's genre parser. + + // Case 1: Genre IDs in the format (DIGITS|RX|CR). If these exist, parse them as + // ID3v1 tags. + val genreIds = groups[1] + if (genreIds != null && genreIds.value.isNotEmpty()) { + val ids = genreIds.value.substring(1 until genreIds.value.lastIndex).split(")(") + for (id in ids) { + id.parseId3v1Genre()?.let(genres::add) + } + } + + // Case 2: Genre names as a normal string. The only case we have to look out for are + // escaped strings formatted as ((genre). + val genreName = groups[3] + if (genreName != null && genreName.value.isNotEmpty()) { + if (genreName.value.startsWith("((")) { + genres.add(genreName.value.substring(1)) + } else { + genres.add(genreName.value) + } + } + + return genres.distinctBy { it }.joinToString(separator = ", ").ifEmpty { null } +} + +private val GENRE_RE = Regex("((?:\\(([0-9]+|RX|CR)\\))*)(.+)?") + /** * A complete table of all the constant genre values for ID3(v2), including non-standard extensions. */ -private val genreConstantTable = +private val GENRE_TABLE = arrayOf( // ID3 Standard "Blues", diff --git a/app/src/main/java/org/oxycblt/auxio/music/backend/ExoPlayerBackend.kt b/app/src/main/java/org/oxycblt/auxio/music/backend/ExoPlayerBackend.kt index fc75301d3..08ca676af 100644 --- a/app/src/main/java/org/oxycblt/auxio/music/backend/ExoPlayerBackend.kt +++ b/app/src/main/java/org/oxycblt/auxio/music/backend/ExoPlayerBackend.kt @@ -34,6 +34,7 @@ import kotlinx.coroutines.asExecutor import org.oxycblt.auxio.music.Indexer import org.oxycblt.auxio.music.Song import org.oxycblt.auxio.music.audioUri +import org.oxycblt.auxio.music.id3GenreName import org.oxycblt.auxio.music.iso8601year import org.oxycblt.auxio.music.no import org.oxycblt.auxio.util.logW @@ -169,8 +170,8 @@ class ExoPlayerBackend(private val inner: MediaStoreBackend) : Indexer.Backend { "TPE1" -> audio.artist = value // Album artist "TPE2" -> audio.albumArtist = value - // Genre, with the weird ID3v2 rules - "TCON" -> audio.genre = value + // Genre, with the weird ID3 rules + "TCON" -> audio.genre = value.id3GenreName } } @@ -196,7 +197,7 @@ class ExoPlayerBackend(private val inner: MediaStoreBackend) : Indexer.Backend { "ARTIST" -> audio.artist = value // Album artist "ALBUMARTIST" -> audio.albumArtist = value - // Genre, assumed that ID3v2 rules will apply here too. + // Genre, assumed that ID3 rules do not apply here. "GENRE" -> audio.genre = value } } diff --git a/app/src/main/java/org/oxycblt/auxio/music/backend/MediaStoreBackend.kt b/app/src/main/java/org/oxycblt/auxio/music/backend/MediaStoreBackend.kt index 4d356f38b..f72584207 100644 --- a/app/src/main/java/org/oxycblt/auxio/music/backend/MediaStoreBackend.kt +++ b/app/src/main/java/org/oxycblt/auxio/music/backend/MediaStoreBackend.kt @@ -29,6 +29,7 @@ import org.oxycblt.auxio.music.Song import org.oxycblt.auxio.music.albumCoverUri import org.oxycblt.auxio.music.audioUri import org.oxycblt.auxio.music.excluded.ExcludedDatabase +import org.oxycblt.auxio.music.id3GenreName import org.oxycblt.auxio.music.no import org.oxycblt.auxio.music.queryCursor import org.oxycblt.auxio.music.useQuery @@ -156,11 +157,12 @@ abstract class MediaStoreBackend : Indexer.Backend { val nameIndex = genreCursor.getColumnIndexOrThrow(MediaStore.Audio.Genres.NAME) while (genreCursor.moveToNext()) { - // Genre names can be a normal name, an ID3v2 constant, or null. Normal names - // are resolved as usual, but null values don't make sense and are often junk - // anyway, so we skip genres that have them. + // Genre names could theoretically be anything, including null for some reason. + // Null values are junk and should be ignored, but since we cannot assume the + // format a genre was derived from, we have to treat them like they are ID3 + // genres, even when they might not be. val id = genreCursor.getLong(idIndex) - val name = genreCursor.getStringOrNull(nameIndex) ?: continue + val name = (genreCursor.getStringOrNull(nameIndex) ?: continue).id3GenreName context.contentResolverSafe.useQuery( MediaStore.Audio.Genres.Members.getContentUri(VOLUME_EXTERNAL, id),