catalog: safer MP4 XMP read

This commit is contained in:
Thibault Deckers 2022-08-26 18:18:42 +02:00
parent 726c8a76a5
commit 71eace6503
11 changed files with 113 additions and 49 deletions

View file

@ -15,7 +15,11 @@ import android.util.Log
import androidx.exifinterface.media.ExifInterface
import com.drew.metadata.file.FileTypeDirectory
import deckers.thibault.aves.channel.calls.Coresult.Companion.safe
import deckers.thibault.aves.metadata.*
import deckers.thibault.aves.metadata.ExifInterfaceHelper
import deckers.thibault.aves.metadata.MediaMetadataRetrieverHelper
import deckers.thibault.aves.metadata.Metadata
import deckers.thibault.aves.metadata.PixyMetaHelper
import deckers.thibault.aves.metadata.metadataextractor.Helper
import deckers.thibault.aves.model.FieldMap
import deckers.thibault.aves.utils.LogUtils
import deckers.thibault.aves.utils.MimeTypes.canReadWithExifInterface
@ -284,7 +288,7 @@ class DebugHandler(private val context: Context) : MethodCallHandler {
if (canReadWithMetadataExtractor(mimeType)) {
try {
Metadata.openSafeInputStream(context, uri, mimeType, sizeBytes)?.use { input ->
val metadata = MetadataExtractorHelper.safeRead(input)
val metadata = Helper.safeRead(input)
metadataMap["mimeType"] = metadata.getDirectoriesOfType(FileTypeDirectory::class.java).joinToString { dir ->
if (dir.containsTag(FileTypeDirectory.TAG_DETECTED_FILE_MIME_TYPE)) {
dir.getString(FileTypeDirectory.TAG_DETECTED_FILE_MIME_TYPE)

View file

@ -12,10 +12,10 @@ import com.drew.metadata.xmp.XmpDirectory
import deckers.thibault.aves.channel.calls.Coresult.Companion.safe
import deckers.thibault.aves.channel.calls.Coresult.Companion.safeSuspend
import deckers.thibault.aves.metadata.Metadata
import deckers.thibault.aves.metadata.MetadataExtractorHelper
import deckers.thibault.aves.metadata.MultiPage
import deckers.thibault.aves.metadata.XMP.getSafeStructField
import deckers.thibault.aves.metadata.XMPPropName
import deckers.thibault.aves.metadata.metadataextractor.Helper
import deckers.thibault.aves.model.FieldMap
import deckers.thibault.aves.model.provider.ContentImageProvider
import deckers.thibault.aves.model.provider.ImageProvider
@ -118,7 +118,7 @@ class EmbeddedDataHandler(private val context: Context) : MethodCallHandler {
retriever.embeddedPicture?.let { bytes ->
var embedMimeType: String? = null
bytes.inputStream().use { input ->
MetadataExtractorHelper.readMimeType(input)?.let { embedMimeType = it }
Helper.readMimeType(input)?.let { embedMimeType = it }
}
embedMimeType?.let { mime ->
copyEmbeddedBytes(result, mime, displayName, bytes.inputStream())
@ -158,7 +158,7 @@ class EmbeddedDataHandler(private val context: Context) : MethodCallHandler {
if (canReadWithMetadataExtractor(mimeType)) {
try {
Metadata.openSafeInputStream(context, uri, mimeType, sizeBytes)?.use { input ->
val metadata = MetadataExtractorHelper.safeRead(input)
val metadata = Helper.safeRead(input)
// data can be large and stored in "Extended XMP",
// which is returned as a second XMP directory
val xmpDirs = metadata.getDirectoriesOfType(XmpDirectory::class.java)

View file

@ -42,21 +42,6 @@ import deckers.thibault.aves.metadata.Metadata.DIR_EXIF_GEOTIFF
import deckers.thibault.aves.metadata.Metadata.DIR_PNG_TEXTUAL_DATA
import deckers.thibault.aves.metadata.Metadata.getRotationDegreesForExifCode
import deckers.thibault.aves.metadata.Metadata.isFlippedForExifCode
import deckers.thibault.aves.metadata.MetadataExtractorHelper.PNG_ITXT_DIR_NAME
import deckers.thibault.aves.metadata.MetadataExtractorHelper.PNG_LAST_MODIFICATION_TIME_FORMAT
import deckers.thibault.aves.metadata.MetadataExtractorHelper.PNG_TIME_DIR_NAME
import deckers.thibault.aves.metadata.MetadataExtractorHelper.containsGeoTiffTags
import deckers.thibault.aves.metadata.MetadataExtractorHelper.extractGeoKeys
import deckers.thibault.aves.metadata.MetadataExtractorHelper.extractPngProfile
import deckers.thibault.aves.metadata.MetadataExtractorHelper.getDateDigitizedMillis
import deckers.thibault.aves.metadata.MetadataExtractorHelper.getDateModifiedMillis
import deckers.thibault.aves.metadata.MetadataExtractorHelper.getDateOriginalMillis
import deckers.thibault.aves.metadata.MetadataExtractorHelper.getSafeBoolean
import deckers.thibault.aves.metadata.MetadataExtractorHelper.getSafeDateMillis
import deckers.thibault.aves.metadata.MetadataExtractorHelper.getSafeInt
import deckers.thibault.aves.metadata.MetadataExtractorHelper.getSafeRational
import deckers.thibault.aves.metadata.MetadataExtractorHelper.getSafeString
import deckers.thibault.aves.metadata.MetadataExtractorHelper.isPngTextDir
import deckers.thibault.aves.metadata.XMP.doesPropExist
import deckers.thibault.aves.metadata.XMP.getPropArrayItemValues
import deckers.thibault.aves.metadata.XMP.getSafeDateMillis
@ -65,6 +50,22 @@ import deckers.thibault.aves.metadata.XMP.getSafeLocalizedText
import deckers.thibault.aves.metadata.XMP.getSafeString
import deckers.thibault.aves.metadata.XMP.isMotionPhoto
import deckers.thibault.aves.metadata.XMP.isPanorama
import deckers.thibault.aves.metadata.metadataextractor.Helper
import deckers.thibault.aves.metadata.metadataextractor.Helper.PNG_ITXT_DIR_NAME
import deckers.thibault.aves.metadata.metadataextractor.Helper.PNG_LAST_MODIFICATION_TIME_FORMAT
import deckers.thibault.aves.metadata.metadataextractor.Helper.PNG_TIME_DIR_NAME
import deckers.thibault.aves.metadata.metadataextractor.Helper.containsGeoTiffTags
import deckers.thibault.aves.metadata.metadataextractor.Helper.extractGeoKeys
import deckers.thibault.aves.metadata.metadataextractor.Helper.extractPngProfile
import deckers.thibault.aves.metadata.metadataextractor.Helper.getDateDigitizedMillis
import deckers.thibault.aves.metadata.metadataextractor.Helper.getDateModifiedMillis
import deckers.thibault.aves.metadata.metadataextractor.Helper.getDateOriginalMillis
import deckers.thibault.aves.metadata.metadataextractor.Helper.getSafeBoolean
import deckers.thibault.aves.metadata.metadataextractor.Helper.getSafeDateMillis
import deckers.thibault.aves.metadata.metadataextractor.Helper.getSafeInt
import deckers.thibault.aves.metadata.metadataextractor.Helper.getSafeRational
import deckers.thibault.aves.metadata.metadataextractor.Helper.getSafeString
import deckers.thibault.aves.metadata.metadataextractor.Helper.isPngTextDir
import deckers.thibault.aves.model.FieldMap
import deckers.thibault.aves.utils.ContextUtils.queryContentResolverProp
import deckers.thibault.aves.utils.LogUtils
@ -150,7 +151,7 @@ class MetadataFetchHandler(private val context: Context) : MethodCallHandler {
if (canReadWithMetadataExtractor(mimeType)) {
try {
Metadata.openSafeInputStream(context, uri, mimeType, sizeBytes)?.use { input ->
val metadata = MetadataExtractorHelper.safeRead(input)
val metadata = Helper.safeRead(input)
foundExif = metadata.directories.any { it is ExifDirectoryBase && it.tagCount > 0 }
foundXmp = metadata.directories.any { it is XmpDirectory && it.tagCount > 0 }
@ -505,7 +506,7 @@ class MetadataFetchHandler(private val context: Context) : MethodCallHandler {
if (canReadWithMetadataExtractor(mimeType)) {
try {
Metadata.openSafeInputStream(context, uri, mimeType, sizeBytes)?.use { input ->
val metadata = MetadataExtractorHelper.safeRead(input)
val metadata = Helper.safeRead(input)
foundExif = metadata.directories.any { it is ExifDirectoryBase && it.tagCount > 0 }
foundXmp = metadata.directories.any { it is XmpDirectory && it.tagCount > 0 }
@ -741,7 +742,7 @@ class MetadataFetchHandler(private val context: Context) : MethodCallHandler {
if (canReadWithMetadataExtractor(mimeType)) {
try {
Metadata.openSafeInputStream(context, uri, mimeType, sizeBytes)?.use { input ->
val metadata = MetadataExtractorHelper.safeRead(input)
val metadata = Helper.safeRead(input)
for (dir in metadata.getDirectoriesOfType(ExifSubIFDDirectory::class.java)) {
foundExif = true
dir.getSafeRational(ExifDirectoryBase.TAG_FNUMBER) { metadataMap[KEY_APERTURE] = it.numerator.toDouble() / it.denominator }
@ -791,7 +792,7 @@ class MetadataFetchHandler(private val context: Context) : MethodCallHandler {
if (canReadWithMetadataExtractor(mimeType)) {
try {
Metadata.openSafeInputStream(context, uri, mimeType, sizeBytes)?.use { input ->
val metadata = MetadataExtractorHelper.safeRead(input)
val metadata = Helper.safeRead(input)
val fields = HashMap<Int, Any?>()
for (dir in metadata.getDirectoriesOfType(ExifIFD0Directory::class.java)) {
if (dir.containsGeoTiffTags()) {
@ -875,7 +876,7 @@ class MetadataFetchHandler(private val context: Context) : MethodCallHandler {
if (canReadWithMetadataExtractor(mimeType)) {
try {
Metadata.openSafeInputStream(context, uri, mimeType, sizeBytes)?.use { input ->
val metadata = MetadataExtractorHelper.safeRead(input)
val metadata = Helper.safeRead(input)
foundXmp = metadata.directories.any { it is XmpDirectory && it.tagCount > 0 }
metadata.getDirectoriesOfType(XmpDirectory::class.java).map { it.xmpMeta }.forEach(::processXmp)
}
@ -945,7 +946,7 @@ class MetadataFetchHandler(private val context: Context) : MethodCallHandler {
if (canReadWithMetadataExtractor(mimeType)) {
try {
Metadata.openSafeInputStream(context, uri, mimeType, sizeBytes)?.use { input ->
val metadata = MetadataExtractorHelper.safeRead(input)
val metadata = Helper.safeRead(input)
foundXmp = metadata.directories.any { it is XmpDirectory && it.tagCount > 0 }
metadata.getDirectoriesOfType(XmpDirectory::class.java).map { it.xmpMeta }.forEach(::processXmp)
}
@ -1019,7 +1020,7 @@ class MetadataFetchHandler(private val context: Context) : MethodCallHandler {
if (canReadWithMetadataExtractor(mimeType)) {
try {
Metadata.openSafeInputStream(context, uri, mimeType, sizeBytes)?.use { input ->
val metadata = MetadataExtractorHelper.safeRead(input)
val metadata = Helper.safeRead(input)
val tag = when (field) {
ExifInterface.TAG_DATETIME -> ExifIFD0Directory.TAG_DATETIME
ExifInterface.TAG_DATETIME_DIGITIZED -> ExifSubIFDDirectory.TAG_DATETIME_DIGITIZED
@ -1088,7 +1089,7 @@ class MetadataFetchHandler(private val context: Context) : MethodCallHandler {
if (canReadWithMetadataExtractor(mimeType)) {
try {
Metadata.openSafeInputStream(context, uri, mimeType, sizeBytes)?.use { input ->
val metadata = MetadataExtractorHelper.safeRead(input)
val metadata = Helper.safeRead(input)
for (dir in metadata.getDirectoriesOfType(XmpDirectory::class.java)) {
val xmpMeta = dir.xmpMeta

View file

@ -14,6 +14,7 @@ import deckers.thibault.aves.metadata.XMP.countPropArrayItems
import deckers.thibault.aves.metadata.XMP.doesPropExist
import deckers.thibault.aves.metadata.XMP.getSafeLong
import deckers.thibault.aves.metadata.XMP.getSafeStructField
import deckers.thibault.aves.metadata.metadataextractor.Helper
import deckers.thibault.aves.model.FieldMap
import deckers.thibault.aves.utils.LogUtils
import deckers.thibault.aves.utils.MimeTypes
@ -191,7 +192,7 @@ object MultiPage {
try {
Metadata.openSafeInputStream(context, uri, mimeType, sizeBytes)?.use { input ->
val metadata = MetadataExtractorHelper.safeRead(input)
val metadata = Helper.safeRead(input)
foundXmp = metadata.directories.any { it is XmpDirectory && it.tagCount > 0 }
metadata.getDirectoriesOfType(XmpDirectory::class.java).map { it.xmpMeta }.forEach(::processXmp)
}

View file

@ -10,6 +10,7 @@ import com.adobe.internal.xmp.XMPException
import com.adobe.internal.xmp.XMPMeta
import com.adobe.internal.xmp.XMPMetaFactory
import com.adobe.internal.xmp.properties.XMPProperty
import deckers.thibault.aves.metadata.metadataextractor.SafeXmpReader
import deckers.thibault.aves.utils.ContextUtils.queryContentResolverProp
import deckers.thibault.aves.utils.LogUtils
import deckers.thibault.aves.utils.MimeTypes
@ -98,7 +99,7 @@ object XMP {
try {
val xmpBytes = context.queryContentResolverProp(uri, mimeType, MediaStore.MediaColumns.XMP)
if (xmpBytes is ByteArray) {
val xmpMeta = XMPMetaFactory.parseFromBuffer(xmpBytes, MetadataExtractorSafeXmpReader.PARSE_OPTIONS)
val xmpMeta = XMPMetaFactory.parseFromBuffer(xmpBytes, SafeXmpReader.PARSE_OPTIONS)
processXmp(xmpMeta)
}
} catch (e: Exception) {

View file

@ -1,4 +1,4 @@
package deckers.thibault.aves.metadata
package deckers.thibault.aves.metadata.metadataextractor
import android.util.Log
import com.drew.imaging.FileType
@ -6,6 +6,7 @@ import com.drew.imaging.FileTypeDetector
import com.drew.imaging.ImageMetadataReader
import com.drew.imaging.jpeg.JpegMetadataReader
import com.drew.imaging.jpeg.JpegSegmentMetadataReader
import com.drew.imaging.mp4.Mp4Reader
import com.drew.lang.ByteArrayReader
import com.drew.lang.Rational
import com.drew.lang.SequentialByteArrayReader
@ -19,14 +20,17 @@ import com.drew.metadata.file.FileTypeDirectory
import com.drew.metadata.iptc.IptcReader
import com.drew.metadata.png.PngDirectory
import com.drew.metadata.xmp.XmpReader
import deckers.thibault.aves.metadata.ExifGeoTiffTags
import deckers.thibault.aves.metadata.GeoTiffKeys
import deckers.thibault.aves.metadata.Metadata
import deckers.thibault.aves.utils.LogUtils
import java.io.BufferedInputStream
import java.io.InputStream
import java.text.SimpleDateFormat
import java.util.*
object MetadataExtractorHelper {
private val LOG_TAG = LogUtils.createTag<MetadataExtractorHelper>()
object Helper {
private val LOG_TAG = LogUtils.createTag<Helper>()
const val PNG_ITXT_DIR_NAME = "PNG-iTXt"
private const val PNG_TEXT_DIR_NAME = "PNG-tEXt"
@ -52,9 +56,10 @@ object MetadataExtractorHelper {
val bufferedInputStream = if (input is BufferedInputStream) input else BufferedInputStream(input)
val fileType = FileTypeDetector.detectFileType(bufferedInputStream)
val metadata = if (fileType == FileType.Jpeg) {
safeReadJpeg(bufferedInputStream)
} else {
val metadata = when (fileType) {
FileType.Jpeg -> safeReadJpeg(bufferedInputStream)
FileType.Mp4 -> safeReadMp4(bufferedInputStream)
else ->
// providing the stream length is risky, as it may crash if it is incorrect
ImageMetadataReader.readMetadata(bufferedInputStream, -1L, fileType)
}
@ -63,13 +68,19 @@ object MetadataExtractorHelper {
return metadata
}
private fun safeReadMp4(input: InputStream): com.drew.metadata.Metadata {
val metadata = com.drew.metadata.Metadata()
Mp4Reader.extract(input, SafeMp4BoxHandler(metadata))
return metadata
}
// Some JPEG (and other types?) contain XMP with a preposterous number of `DocumentAncestors`.
// This bloated XMP is unsafely loaded in memory by Adobe's `XMPMetaParser.parseInputSource`
// which easily yields OOM on Android, so we try to detect and strip extended XMP with a modified XMP reader.
private fun safeReadJpeg(input: InputStream): com.drew.metadata.Metadata {
val readers = ArrayList<JpegSegmentMetadataReader>().apply {
addAll(JpegMetadataReader.ALL_READERS.filter { it !is XmpReader })
add(MetadataExtractorSafeXmpReader())
add(SafeXmpReader())
}
val metadata = com.drew.metadata.Metadata()

View file

@ -0,0 +1,22 @@
package deckers.thibault.aves.metadata.metadataextractor
import com.drew.imaging.mp4.Mp4Handler
import com.drew.lang.annotations.NotNull
import com.drew.lang.annotations.Nullable
import com.drew.metadata.Metadata
import com.drew.metadata.mp4.Mp4BoxHandler
import com.drew.metadata.mp4.Mp4BoxTypes
import com.drew.metadata.mp4.Mp4Context
import java.io.IOException
class SafeMp4BoxHandler(metadata: Metadata) : Mp4BoxHandler(metadata) {
@Throws(IOException::class)
override fun processBox(@NotNull type: String, @Nullable payload: ByteArray?, boxSize: Long, context: Mp4Context?): Mp4Handler<*>? {
if (payload != null && type == Mp4BoxTypes.BOX_USER_DEFINED) {
val userBoxHandler = SafeMp4UuidBoxHandler(metadata)
userBoxHandler.processBox(type, payload, boxSize, context)
return this
}
return super.processBox(type, payload, boxSize, context)
}
}

View file

@ -0,0 +1,24 @@
package deckers.thibault.aves.metadata.metadataextractor
import com.drew.imaging.mp4.Mp4Handler
import com.drew.metadata.Metadata
import com.drew.metadata.mp4.Mp4Context
import com.drew.metadata.mp4.media.Mp4UuidBoxHandler
import com.drew.metadata.xmp.XmpReader
class SafeMp4UuidBoxHandler(metadata: Metadata) : Mp4UuidBoxHandler(metadata) {
override fun processBox(type: String?, payload: ByteArray?, boxSize: Long, context: Mp4Context?): Mp4Handler<*> {
if (payload != null && payload.size >= 16) {
val payloadUuid = payload.copyOfRange(0, 16)
if (payloadUuid.contentEquals(xmpUuid)) {
SafeXmpReader().extract(payload, 16, payload.size - 16, metadata, directory)
return this
}
}
return super.processBox(type, payload, boxSize, context)
}
companion object {
val xmpUuid = byteArrayOf(0xbe.toByte(), 0x7a, 0xcf.toByte(), 0xcb.toByte(), 0x97.toByte(), 0xa9.toByte(), 0x42, 0xe8.toByte(), 0x9c.toByte(), 0x71, 0x99.toByte(), 0x94.toByte(), 0x91.toByte(), 0xe3.toByte(), 0xaf.toByte(), 0xac.toByte())
}
}

View file

@ -1,4 +1,4 @@
package deckers.thibault.aves.metadata
package deckers.thibault.aves.metadata.metadataextractor
import android.util.Log
import com.adobe.internal.xmp.XMPException
@ -19,7 +19,7 @@ import com.drew.metadata.xmp.XmpReader
import deckers.thibault.aves.utils.LogUtils
import java.io.IOException
class MetadataExtractorSafeXmpReader : XmpReader() {
class SafeXmpReader : XmpReader() {
// adapted from `XmpReader` to detect and skip large extended XMP
override fun readJpegSegments(segments: Iterable<ByteArray>, metadata: Metadata, segmentType: JpegSegmentType) {
val preambleLength = XMP_JPEG_PREAMBLE.length
@ -132,7 +132,7 @@ class MetadataExtractorSafeXmpReader : XmpReader() {
}
companion object {
private val LOG_TAG = LogUtils.createTag<MetadataExtractorSafeXmpReader>()
private val LOG_TAG = LogUtils.createTag<SafeXmpReader>()
// arbitrary size to detect extended XMP that may yield an OOM
private const val segmentTypeSizeDangerThreshold = 3 * (1 shl 20) // MB

View file

@ -22,10 +22,10 @@ import deckers.thibault.aves.metadata.MediaMetadataRetrieverHelper.getSafeLong
import deckers.thibault.aves.metadata.MediaMetadataRetrieverHelper.getSafeString
import deckers.thibault.aves.metadata.Metadata
import deckers.thibault.aves.metadata.Metadata.getRotationDegreesForExifCode
import deckers.thibault.aves.metadata.MetadataExtractorHelper
import deckers.thibault.aves.metadata.MetadataExtractorHelper.getSafeDateMillis
import deckers.thibault.aves.metadata.MetadataExtractorHelper.getSafeInt
import deckers.thibault.aves.metadata.MetadataExtractorHelper.getSafeLong
import deckers.thibault.aves.metadata.metadataextractor.Helper
import deckers.thibault.aves.metadata.metadataextractor.Helper.getSafeDateMillis
import deckers.thibault.aves.metadata.metadataextractor.Helper.getSafeInt
import deckers.thibault.aves.metadata.metadataextractor.Helper.getSafeLong
import deckers.thibault.aves.utils.MimeTypes
import deckers.thibault.aves.utils.StorageUtils
import deckers.thibault.aves.utils.UriUtils.tryParseId
@ -161,7 +161,7 @@ class SourceEntry {
try {
Metadata.openSafeInputStream(context, uri, sourceMimeType, sizeBytes)?.use { input ->
val metadata = MetadataExtractorHelper.safeRead(input)
val metadata = Helper.safeRead(input)
// do not switch on specific MIME types, as the reported MIME type could be wrong
// (e.g. PNG registered as JPG)

View file

@ -6,7 +6,7 @@ import android.provider.MediaStore
import android.provider.OpenableColumns
import android.util.Log
import deckers.thibault.aves.metadata.Metadata
import deckers.thibault.aves.metadata.MetadataExtractorHelper
import deckers.thibault.aves.metadata.metadataextractor.Helper
import deckers.thibault.aves.model.FieldMap
import deckers.thibault.aves.model.SourceEntry
import deckers.thibault.aves.utils.LogUtils
@ -22,7 +22,7 @@ internal class ContentImageProvider : ImageProvider() {
StorageUtils.openInputStream(context, safeUri)?.use { input ->
// `metadata-extractor` is the most reliable, except for `tiff` (false positives, false negatives)
// cf https://github.com/drewnoakes/metadata-extractor/issues/296
MetadataExtractorHelper.readMimeType(input)?.takeIf { it != MimeTypes.TIFF }?.let {
Helper.readMimeType(input)?.takeIf { it != MimeTypes.TIFF }?.let {
extractorMimeType = it
if (extractorMimeType != sourceMimeType) {
Log.d(LOG_TAG, "source MIME type is $sourceMimeType but extracted MIME type is $extractorMimeType for uri=$uri")