music: improve genre parsing
Rework genre parsing to properly align with the ID3v2 standard. The ID3v2 genre specification is weirdly complex. Auxio ignored most of that and just parsed out the most common cases for genres (digits and (digits)). For the sake of completeness, rework Auxio's genre parser to handle the weirder cases like RX/CR, multiple values, and escaping, with an implementation roughly based on Mutagen's genre parser.
This commit is contained in:
parent
09392ef381
commit
882e24bd12
4 changed files with 60 additions and 34 deletions
|
@ -253,8 +253,7 @@ data class Genre(override val rawName: String?, override val songs: List<Song>)
|
|||
get() = (rawName ?: MediaStore.UNKNOWN_STRING).hashCode().toLong()
|
||||
|
||||
override val sortName: String?
|
||||
get() = rawName?.id3v2GenreName
|
||||
get() = rawName
|
||||
|
||||
override fun resolveName(context: Context) =
|
||||
rawName?.id3v2GenreName ?: context.getString(R.string.def_genre)
|
||||
override fun resolveName(context: Context) = rawName ?: context.getString(R.string.def_genre)
|
||||
}
|
||||
|
|
|
@ -93,38 +93,62 @@ val String.withoutArticle: String
|
|||
}
|
||||
|
||||
/**
|
||||
* Decodes the genre name from an ID3(v2) constant. See [genreConstantTable] for the genre constant
|
||||
* map that Auxio uses.
|
||||
* Decodes the genre name from an ID3(v2) constant. See [GENRE_TABLE] for the genre constant map
|
||||
* that Auxio uses.
|
||||
*/
|
||||
val String.id3v2GenreName: String
|
||||
get() {
|
||||
val newName =
|
||||
when {
|
||||
// ID3v1, should just be digits
|
||||
isDigitsOnly() -> genreConstantTable.getOrNull(toInt())
|
||||
// ID3v2.3/ID3v2.4, parse out the parentheses and get the integer
|
||||
// Any genres formatted as "(CHARS)" will be ignored.
|
||||
startsWith('(') && endsWith(')') -> {
|
||||
val String.id3GenreName: String
|
||||
get() = parseId3v1Genre() ?: parseId3v2Genre() ?: this
|
||||
|
||||
// TODO: Technically, the spec for genres is far more complex here. Perhaps we
|
||||
// should copy mutagen's implementation?
|
||||
// https://github.com/quodlibet/mutagen/blob/master/mutagen/id3/_frames.py
|
||||
private fun String.parseId3v1Genre(): String? =
|
||||
when {
|
||||
// ID3v1 genres are a plain integer value without formatting, so in that case
|
||||
// try to index the genre table with such.
|
||||
isDigitsOnly() -> GENRE_TABLE.getOrNull(toInt())
|
||||
|
||||
substring(1 until lastIndex).toIntOrNull()?.let {
|
||||
genreConstantTable.getOrNull(it)
|
||||
}
|
||||
}
|
||||
// Current name is fine
|
||||
else -> null
|
||||
}
|
||||
|
||||
return newName ?: this
|
||||
// CR and RX are not technically ID3v1, but are formatted similarly to a plain number.
|
||||
this == "CR" -> "Cover"
|
||||
this == "RX" -> "Remix"
|
||||
else -> null
|
||||
}
|
||||
|
||||
private fun String.parseId3v2Genre(): String? {
|
||||
val groups = GENRE_RE.matchEntire(this)?.groups ?: return null
|
||||
val genres = mutableListOf<String>()
|
||||
|
||||
// ID3v2 genres are far more complex and require string grokking to properly implement.
|
||||
// You can read the spec for it here: https://id3.org/id3v2.3.0#TCON
|
||||
// This implementation in particular is based off Mutagen's genre parser.
|
||||
|
||||
// Case 1: Genre IDs in the format (DIGITS|RX|CR). If these exist, parse them as
|
||||
// ID3v1 tags.
|
||||
val genreIds = groups[1]
|
||||
if (genreIds != null && genreIds.value.isNotEmpty()) {
|
||||
val ids = genreIds.value.substring(1 until genreIds.value.lastIndex).split(")(")
|
||||
for (id in ids) {
|
||||
id.parseId3v1Genre()?.let(genres::add)
|
||||
}
|
||||
}
|
||||
|
||||
// Case 2: Genre names as a normal string. The only case we have to look out for are
|
||||
// escaped strings formatted as ((genre).
|
||||
val genreName = groups[3]
|
||||
if (genreName != null && genreName.value.isNotEmpty()) {
|
||||
if (genreName.value.startsWith("((")) {
|
||||
genres.add(genreName.value.substring(1))
|
||||
} else {
|
||||
genres.add(genreName.value)
|
||||
}
|
||||
}
|
||||
|
||||
return genres.distinctBy { it }.joinToString(separator = ", ").ifEmpty { null }
|
||||
}
|
||||
|
||||
private val GENRE_RE = Regex("((?:\\(([0-9]+|RX|CR)\\))*)(.+)?")
|
||||
|
||||
/**
|
||||
* A complete table of all the constant genre values for ID3(v2), including non-standard extensions.
|
||||
*/
|
||||
private val genreConstantTable =
|
||||
private val GENRE_TABLE =
|
||||
arrayOf(
|
||||
// ID3 Standard
|
||||
"Blues",
|
||||
|
|
|
@ -34,6 +34,7 @@ import kotlinx.coroutines.asExecutor
|
|||
import org.oxycblt.auxio.music.Indexer
|
||||
import org.oxycblt.auxio.music.Song
|
||||
import org.oxycblt.auxio.music.audioUri
|
||||
import org.oxycblt.auxio.music.id3GenreName
|
||||
import org.oxycblt.auxio.music.iso8601year
|
||||
import org.oxycblt.auxio.music.no
|
||||
import org.oxycblt.auxio.util.logW
|
||||
|
@ -169,8 +170,8 @@ class ExoPlayerBackend(private val inner: MediaStoreBackend) : Indexer.Backend {
|
|||
"TPE1" -> audio.artist = value
|
||||
// Album artist
|
||||
"TPE2" -> audio.albumArtist = value
|
||||
// Genre, with the weird ID3v2 rules
|
||||
"TCON" -> audio.genre = value
|
||||
// Genre, with the weird ID3 rules
|
||||
"TCON" -> audio.genre = value.id3GenreName
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -196,7 +197,7 @@ class ExoPlayerBackend(private val inner: MediaStoreBackend) : Indexer.Backend {
|
|||
"ARTIST" -> audio.artist = value
|
||||
// Album artist
|
||||
"ALBUMARTIST" -> audio.albumArtist = value
|
||||
// Genre, assumed that ID3v2 rules will apply here too.
|
||||
// Genre, assumed that ID3 rules do not apply here.
|
||||
"GENRE" -> audio.genre = value
|
||||
}
|
||||
}
|
||||
|
|
|
@ -29,6 +29,7 @@ import org.oxycblt.auxio.music.Song
|
|||
import org.oxycblt.auxio.music.albumCoverUri
|
||||
import org.oxycblt.auxio.music.audioUri
|
||||
import org.oxycblt.auxio.music.excluded.ExcludedDatabase
|
||||
import org.oxycblt.auxio.music.id3GenreName
|
||||
import org.oxycblt.auxio.music.no
|
||||
import org.oxycblt.auxio.music.queryCursor
|
||||
import org.oxycblt.auxio.music.useQuery
|
||||
|
@ -156,11 +157,12 @@ abstract class MediaStoreBackend : Indexer.Backend {
|
|||
val nameIndex = genreCursor.getColumnIndexOrThrow(MediaStore.Audio.Genres.NAME)
|
||||
|
||||
while (genreCursor.moveToNext()) {
|
||||
// Genre names can be a normal name, an ID3v2 constant, or null. Normal names
|
||||
// are resolved as usual, but null values don't make sense and are often junk
|
||||
// anyway, so we skip genres that have them.
|
||||
// Genre names could theoretically be anything, including null for some reason.
|
||||
// Null values are junk and should be ignored, but since we cannot assume the
|
||||
// format a genre was derived from, we have to treat them like they are ID3
|
||||
// genres, even when they might not be.
|
||||
val id = genreCursor.getLong(idIndex)
|
||||
val name = genreCursor.getStringOrNull(nameIndex) ?: continue
|
||||
val name = (genreCursor.getStringOrNull(nameIndex) ?: continue).id3GenreName
|
||||
|
||||
context.contentResolverSafe.useQuery(
|
||||
MediaStore.Audio.Genres.Members.getContentUri(VOLUME_EXTERNAL, id),
|
||||
|
|
Loading…
Reference in a new issue