music: improve genre parsing

Rework genre parsing to properly align with the ID3v2 standard.

The ID3v2 genre specification is weirdly complex. Auxio ignored most
of that and just parsed out the most common cases for genres (digits
and (digits)). For the sake of completeness, rework Auxio's genre
parser to handle the weirder cases like RX/CR, multiple values,
and escaping, with an implementation roughly based on Mutagen's
genre parser.
This commit is contained in:
OxygenCobalt 2022-06-04 11:15:54 -06:00
parent 09392ef381
commit 882e24bd12
No known key found for this signature in database
GPG key ID: 37DBE3621FE9AD47
4 changed files with 60 additions and 34 deletions

View file

@ -253,8 +253,7 @@ data class Genre(override val rawName: String?, override val songs: List<Song>)
get() = (rawName ?: MediaStore.UNKNOWN_STRING).hashCode().toLong()
override val sortName: String?
get() = rawName?.id3v2GenreName
get() = rawName
override fun resolveName(context: Context) =
rawName?.id3v2GenreName ?: context.getString(R.string.def_genre)
override fun resolveName(context: Context) = rawName ?: context.getString(R.string.def_genre)
}

View file

@ -93,38 +93,62 @@ val String.withoutArticle: String
}
/**
* Decodes the genre name from an ID3(v2) constant. See [genreConstantTable] for the genre constant
* map that Auxio uses.
* Decodes the genre name from an ID3(v2) constant. See [GENRE_TABLE] for the genre constant map
* that Auxio uses.
*/
val String.id3v2GenreName: String
get() {
val newName =
when {
// ID3v1, should just be digits
isDigitsOnly() -> genreConstantTable.getOrNull(toInt())
// ID3v2.3/ID3v2.4, parse out the parentheses and get the integer
// Any genres formatted as "(CHARS)" will be ignored.
startsWith('(') && endsWith(')') -> {
val String.id3GenreName: String
get() = parseId3v1Genre() ?: parseId3v2Genre() ?: this
// TODO: Technically, the spec for genres is far more complex here. Perhaps we
// should copy mutagen's implementation?
// https://github.com/quodlibet/mutagen/blob/master/mutagen/id3/_frames.py
private fun String.parseId3v1Genre(): String? =
when {
// ID3v1 genres are a plain integer value without formatting, so in that case
// try to index the genre table with such.
isDigitsOnly() -> GENRE_TABLE.getOrNull(toInt())
substring(1 until lastIndex).toIntOrNull()?.let {
genreConstantTable.getOrNull(it)
}
}
// Current name is fine
else -> null
}
return newName ?: this
// CR and RX are not technically ID3v1, but are formatted similarly to a plain number.
this == "CR" -> "Cover"
this == "RX" -> "Remix"
else -> null
}
private fun String.parseId3v2Genre(): String? {
val groups = GENRE_RE.matchEntire(this)?.groups ?: return null
val genres = mutableListOf<String>()
// ID3v2 genres are far more complex and require string grokking to properly implement.
// You can read the spec for it here: https://id3.org/id3v2.3.0#TCON
// This implementation in particular is based off Mutagen's genre parser.
// Case 1: Genre IDs in the format (DIGITS|RX|CR). If these exist, parse them as
// ID3v1 tags.
val genreIds = groups[1]
if (genreIds != null && genreIds.value.isNotEmpty()) {
val ids = genreIds.value.substring(1 until genreIds.value.lastIndex).split(")(")
for (id in ids) {
id.parseId3v1Genre()?.let(genres::add)
}
}
// Case 2: Genre names as a normal string. The only case we have to look out for are
// escaped strings formatted as ((genre).
val genreName = groups[3]
if (genreName != null && genreName.value.isNotEmpty()) {
if (genreName.value.startsWith("((")) {
genres.add(genreName.value.substring(1))
} else {
genres.add(genreName.value)
}
}
return genres.distinctBy { it }.joinToString(separator = ", ").ifEmpty { null }
}
private val GENRE_RE = Regex("((?:\\(([0-9]+|RX|CR)\\))*)(.+)?")
/**
* A complete table of all the constant genre values for ID3(v2), including non-standard extensions.
*/
private val genreConstantTable =
private val GENRE_TABLE =
arrayOf(
// ID3 Standard
"Blues",

View file

@ -34,6 +34,7 @@ import kotlinx.coroutines.asExecutor
import org.oxycblt.auxio.music.Indexer
import org.oxycblt.auxio.music.Song
import org.oxycblt.auxio.music.audioUri
import org.oxycblt.auxio.music.id3GenreName
import org.oxycblt.auxio.music.iso8601year
import org.oxycblt.auxio.music.no
import org.oxycblt.auxio.util.logW
@ -169,8 +170,8 @@ class ExoPlayerBackend(private val inner: MediaStoreBackend) : Indexer.Backend {
"TPE1" -> audio.artist = value
// Album artist
"TPE2" -> audio.albumArtist = value
// Genre, with the weird ID3v2 rules
"TCON" -> audio.genre = value
// Genre, with the weird ID3 rules
"TCON" -> audio.genre = value.id3GenreName
}
}
@ -196,7 +197,7 @@ class ExoPlayerBackend(private val inner: MediaStoreBackend) : Indexer.Backend {
"ARTIST" -> audio.artist = value
// Album artist
"ALBUMARTIST" -> audio.albumArtist = value
// Genre, assumed that ID3v2 rules will apply here too.
// Genre, assumed that ID3 rules do not apply here.
"GENRE" -> audio.genre = value
}
}

View file

@ -29,6 +29,7 @@ import org.oxycblt.auxio.music.Song
import org.oxycblt.auxio.music.albumCoverUri
import org.oxycblt.auxio.music.audioUri
import org.oxycblt.auxio.music.excluded.ExcludedDatabase
import org.oxycblt.auxio.music.id3GenreName
import org.oxycblt.auxio.music.no
import org.oxycblt.auxio.music.queryCursor
import org.oxycblt.auxio.music.useQuery
@ -156,11 +157,12 @@ abstract class MediaStoreBackend : Indexer.Backend {
val nameIndex = genreCursor.getColumnIndexOrThrow(MediaStore.Audio.Genres.NAME)
while (genreCursor.moveToNext()) {
// Genre names can be a normal name, an ID3v2 constant, or null. Normal names
// are resolved as usual, but null values don't make sense and are often junk
// anyway, so we skip genres that have them.
// Genre names could theoretically be anything, including null for some reason.
// Null values are junk and should be ignored, but since we cannot assume the
// format a genre was derived from, we have to treat them like they are ID3
// genres, even when they might not be.
val id = genreCursor.getLong(idIndex)
val name = genreCursor.getStringOrNull(nameIndex) ?: continue
val name = (genreCursor.getStringOrNull(nameIndex) ?: continue).id3GenreName
context.contentResolverSafe.useQuery(
MediaStore.Audio.Genres.Members.getContentUri(VOLUME_EXTERNAL, id),