music: improve genre parsing

Rework genre parsing to properly align with the ID3v2 standard.

The ID3v2 genre specification is weirdly complex. Auxio ignored most
of that and just parsed out the most common cases for genres (digits
and (digits)). For the sake of completeness, rework Auxio's genre
parser to handle the weirder cases like RX/CR, multiple values,
and escaping, with an implementation roughly based on Mutagen's
genre parser.
This commit is contained in:
OxygenCobalt 2022-06-04 11:15:54 -06:00
parent 09392ef381
commit 882e24bd12
No known key found for this signature in database
GPG key ID: 37DBE3621FE9AD47
4 changed files with 60 additions and 34 deletions

View file

@ -253,8 +253,7 @@ data class Genre(override val rawName: String?, override val songs: List<Song>)
get() = (rawName ?: MediaStore.UNKNOWN_STRING).hashCode().toLong() get() = (rawName ?: MediaStore.UNKNOWN_STRING).hashCode().toLong()
override val sortName: String? override val sortName: String?
get() = rawName?.id3v2GenreName get() = rawName
override fun resolveName(context: Context) = override fun resolveName(context: Context) = rawName ?: context.getString(R.string.def_genre)
rawName?.id3v2GenreName ?: context.getString(R.string.def_genre)
} }

View file

@ -93,38 +93,62 @@ val String.withoutArticle: String
} }
/** /**
* Decodes the genre name from an ID3(v2) constant. See [genreConstantTable] for the genre constant * Decodes the genre name from an ID3(v2) constant. See [GENRE_TABLE] for the genre constant map
* map that Auxio uses. * that Auxio uses.
*/ */
val String.id3v2GenreName: String val String.id3GenreName: String
get() { get() = parseId3v1Genre() ?: parseId3v2Genre() ?: this
val newName =
when {
// ID3v1, should just be digits
isDigitsOnly() -> genreConstantTable.getOrNull(toInt())
// ID3v2.3/ID3v2.4, parse out the parentheses and get the integer
// Any genres formatted as "(CHARS)" will be ignored.
startsWith('(') && endsWith(')') -> {
// TODO: Technically, the spec for genres is far more complex here. Perhaps we private fun String.parseId3v1Genre(): String? =
// should copy mutagen's implementation? when {
// https://github.com/quodlibet/mutagen/blob/master/mutagen/id3/_frames.py // ID3v1 genres are a plain integer value without formatting, so in that case
// try to index the genre table with such.
isDigitsOnly() -> GENRE_TABLE.getOrNull(toInt())
substring(1 until lastIndex).toIntOrNull()?.let { // CR and RX are not technically ID3v1, but are formatted similarly to a plain number.
genreConstantTable.getOrNull(it) this == "CR" -> "Cover"
} this == "RX" -> "Remix"
} else -> null
// Current name is fine
else -> null
}
return newName ?: this
} }
private fun String.parseId3v2Genre(): String? {
val groups = GENRE_RE.matchEntire(this)?.groups ?: return null
val genres = mutableListOf<String>()
// ID3v2 genres are far more complex and require string grokking to properly implement.
// You can read the spec for it here: https://id3.org/id3v2.3.0#TCON
// This implementation in particular is based off Mutagen's genre parser.
// Case 1: Genre IDs in the format (DIGITS|RX|CR). If these exist, parse them as
// ID3v1 tags.
val genreIds = groups[1]
if (genreIds != null && genreIds.value.isNotEmpty()) {
val ids = genreIds.value.substring(1 until genreIds.value.lastIndex).split(")(")
for (id in ids) {
id.parseId3v1Genre()?.let(genres::add)
}
}
// Case 2: Genre names as a normal string. The only case we have to look out for are
// escaped strings formatted as ((genre).
val genreName = groups[3]
if (genreName != null && genreName.value.isNotEmpty()) {
if (genreName.value.startsWith("((")) {
genres.add(genreName.value.substring(1))
} else {
genres.add(genreName.value)
}
}
return genres.distinctBy { it }.joinToString(separator = ", ").ifEmpty { null }
}
private val GENRE_RE = Regex("((?:\\(([0-9]+|RX|CR)\\))*)(.+)?")
/** /**
* A complete table of all the constant genre values for ID3(v2), including non-standard extensions. * A complete table of all the constant genre values for ID3(v2), including non-standard extensions.
*/ */
private val genreConstantTable = private val GENRE_TABLE =
arrayOf( arrayOf(
// ID3 Standard // ID3 Standard
"Blues", "Blues",

View file

@ -34,6 +34,7 @@ import kotlinx.coroutines.asExecutor
import org.oxycblt.auxio.music.Indexer import org.oxycblt.auxio.music.Indexer
import org.oxycblt.auxio.music.Song import org.oxycblt.auxio.music.Song
import org.oxycblt.auxio.music.audioUri import org.oxycblt.auxio.music.audioUri
import org.oxycblt.auxio.music.id3GenreName
import org.oxycblt.auxio.music.iso8601year import org.oxycblt.auxio.music.iso8601year
import org.oxycblt.auxio.music.no import org.oxycblt.auxio.music.no
import org.oxycblt.auxio.util.logW import org.oxycblt.auxio.util.logW
@ -169,8 +170,8 @@ class ExoPlayerBackend(private val inner: MediaStoreBackend) : Indexer.Backend {
"TPE1" -> audio.artist = value "TPE1" -> audio.artist = value
// Album artist // Album artist
"TPE2" -> audio.albumArtist = value "TPE2" -> audio.albumArtist = value
// Genre, with the weird ID3v2 rules // Genre, with the weird ID3 rules
"TCON" -> audio.genre = value "TCON" -> audio.genre = value.id3GenreName
} }
} }
@ -196,7 +197,7 @@ class ExoPlayerBackend(private val inner: MediaStoreBackend) : Indexer.Backend {
"ARTIST" -> audio.artist = value "ARTIST" -> audio.artist = value
// Album artist // Album artist
"ALBUMARTIST" -> audio.albumArtist = value "ALBUMARTIST" -> audio.albumArtist = value
// Genre, assumed that ID3v2 rules will apply here too. // Genre, assumed that ID3 rules do not apply here.
"GENRE" -> audio.genre = value "GENRE" -> audio.genre = value
} }
} }

View file

@ -29,6 +29,7 @@ import org.oxycblt.auxio.music.Song
import org.oxycblt.auxio.music.albumCoverUri import org.oxycblt.auxio.music.albumCoverUri
import org.oxycblt.auxio.music.audioUri import org.oxycblt.auxio.music.audioUri
import org.oxycblt.auxio.music.excluded.ExcludedDatabase import org.oxycblt.auxio.music.excluded.ExcludedDatabase
import org.oxycblt.auxio.music.id3GenreName
import org.oxycblt.auxio.music.no import org.oxycblt.auxio.music.no
import org.oxycblt.auxio.music.queryCursor import org.oxycblt.auxio.music.queryCursor
import org.oxycblt.auxio.music.useQuery import org.oxycblt.auxio.music.useQuery
@ -156,11 +157,12 @@ abstract class MediaStoreBackend : Indexer.Backend {
val nameIndex = genreCursor.getColumnIndexOrThrow(MediaStore.Audio.Genres.NAME) val nameIndex = genreCursor.getColumnIndexOrThrow(MediaStore.Audio.Genres.NAME)
while (genreCursor.moveToNext()) { while (genreCursor.moveToNext()) {
// Genre names can be a normal name, an ID3v2 constant, or null. Normal names // Genre names could theoretically be anything, including null for some reason.
// are resolved as usual, but null values don't make sense and are often junk // Null values are junk and should be ignored, but since we cannot assume the
// anyway, so we skip genres that have them. // format a genre was derived from, we have to treat them like they are ID3
// genres, even when they might not be.
val id = genreCursor.getLong(idIndex) val id = genreCursor.getLong(idIndex)
val name = genreCursor.getStringOrNull(nameIndex) ?: continue val name = (genreCursor.getStringOrNull(nameIndex) ?: continue).id3GenreName
context.contentResolverSafe.useQuery( context.contentResolverSafe.useQuery(
MediaStore.Audio.Genres.Members.getContentUri(VOLUME_EXTERNAL, id), MediaStore.Audio.Genres.Members.getContentUri(VOLUME_EXTERNAL, id),