Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

AV1 rebased with jmt #1996

Closed
wants to merge 13 commits into from
Closed
14 changes: 14 additions & 0 deletions jitsi-media-transform/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,13 @@
<artifactId>spotbugs-annotations</artifactId>
<version>${spotbugs.version}</version>
</dependency>
<!-- VOWEL starts -->
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-compress</artifactId>
<version>1.21</version>
</dependency>
<!-- VOWEL ends -->

<!-- test -->
<dependency>
Expand Down Expand Up @@ -126,6 +133,13 @@
<version>2.3.1</version>
<scope>test</scope>
</dependency>
<!-- https://mvnrepository.com/artifact/org.jcodec/jcodec -->
<dependency>
<groupId>org.jcodec</groupId>
<artifactId>jcodec</artifactId>
<version>0.2.5</version>
<scope>test</scope>
</dependency>
</dependencies>

<build>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ enum class PayloadTypeEncoding {
VP8,
VP9,
H264,
AV1,
RED,
RTX,
OPUS;
Expand Down Expand Up @@ -124,6 +125,12 @@ class H264PayloadType(
rtcpFeedbackSet: RtcpFeedbackSet = emptySet()
) : VideoPayloadType(pt, PayloadTypeEncoding.H264, parameters = parameters, rtcpFeedbackSet = rtcpFeedbackSet)

class Av1PayloadType(
pt: Byte,
parameters: PayloadTypeParams = ConcurrentHashMap(),
rtcpFeedbackSet: RtcpFeedbackSet = emptySet()
) : VideoPayloadType(pt, PayloadTypeEncoding.AV1, parameters = parameters, rtcpFeedbackSet = rtcpFeedbackSet)

class RtxPayloadType(
pt: Byte,
parameters: PayloadTypeParams = ConcurrentHashMap()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,14 @@ enum class RtpExtensionType(val uri: String) {
/**
* The URN which identifies the RTP Header Extension for Video Orientation.
*/
VIDEO_ORIENTATION("urn:3gpp:video-orientation");
VIDEO_ORIENTATION("urn:3gpp:video-orientation"),

/**
* [VOWEL] Dependency Descriptor RTP Header Extension
*/
AV1_DEPENDENCY_DESCRIPTOR(
"https://aomediacodec.github.io/av1-rtp-spec/#dependency-descriptor-rtp-header-extension"
);

companion object {
private val uriMap = RtpExtensionType.values().associateBy(RtpExtensionType::uri)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
/*
* Copyright @ 2023 - present 8x8, Inc.
* Copyright @ 2023 - Vowel, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.jitsi.nlj.rtp.codec.av1

import org.jitsi.nlj.rtp.RtpExtensionType
import org.jitsi.nlj.rtp.codec.av1.dd.BytesView
import org.jitsi.nlj.rtp.codec.av1.dd.DependencyDescriptorReader
import org.jitsi.nlj.rtp.codec.av1.dd.FrameDependencyStructure
import org.jitsi.nlj.rtp.codec.av1.dd.TwoBytesExtNormalizer
import org.jitsi.nlj.util.ReadOnlyStreamInformationStore
import org.jitsi.rtp.rtp.RtpPacket
import java.util.concurrent.ConcurrentHashMap

class Av1PacketConverter(val streamInformationStore: ReadOnlyStreamInformationStore) {
private var ddExtId: Int? = null
private var structures = ConcurrentHashMap<Long, FrameDependencyStructure>()
private val twoBytesExtNormalizer = TwoBytesExtNormalizer()

init {
streamInformationStore.onRtpExtensionMapping(RtpExtensionType.AV1_DEPENDENCY_DESCRIPTOR) {
ddExtId = it
}
}

fun parse(rtpPacket: RtpPacket): Av1packet {
val ssrc = rtpPacket.ssrc
val lastStructure = structures[ssrc]

val extId = checkNotNull(ddExtId) { "missing dd ext id" }

val twoBytesExtensions = twoBytesExtNormalizer.handle(rtpPacket)

val ddView = checkNotNull(
twoBytesExtensions.find { it.id == extId }?.let { BytesView(it) }
?: rtpPacket.getHeaderExtension(extId)?.let { BytesView(it) }
) {
"missing dd ext($ddExtId) from $rtpPacket"
}

val (descriptor, structure) = DependencyDescriptorReader(ddView, lastStructure).parse()

structures[ssrc] = structure

return Av1packet(
rtpPacket.buffer,
rtpPacket.offset,
rtpPacket.length,
isKeyframe = descriptor.isKeyFrame(),
isStartOfFrame = descriptor.firstPacketInFrame,
isEndOfFrame = descriptor.lastPacketInFrame,
frameNumber = descriptor.frameNumber,
temporalLayerIndex = descriptor.frameDependencies!!.temporalId,
spatialLayerIndex = descriptor.frameDependencies!!.spatialId,
structure = descriptor.attachedStructure
)
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
/*
* Copyright @ 2023 - present 8x8, Inc.
* Copyright @ 2023 - Vowel, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.jitsi.nlj.rtp.codec.av1

import org.jitsi.nlj.MediaSourceDesc
import org.jitsi.nlj.PacketInfo
import org.jitsi.nlj.RtpEncodingDesc
import org.jitsi.nlj.RtpLayerDesc
import org.jitsi.nlj.rtp.codec.VideoCodecParser
import org.jitsi.nlj.rtp.codec.av1.dd.FrameDependencyStructure
import org.jitsi.utils.logging2.Logger
import org.jitsi.utils.logging2.createChildLogger

class Av1Parser(
sources: Array<MediaSourceDesc>,
parentLogger: Logger
) : VideoCodecParser(sources) {
private val logger = createChildLogger(parentLogger)

/** Encodings we've actually seen. Used to clear out inferred-from-signaling encoding information. */
private val ssrcsSeen = HashSet<Long>()
private var numSpatialLayers = -1

override fun parse(packetInfo: PacketInfo) {
val av1packet = packetInfo.packetAs<Av1packet>()

ssrcsSeen.add(av1packet.ssrc)

val structure = av1packet.structure ?: return

val packetSpatialLayers = structure.resolutions.size
if (packetSpatialLayers > 0) {
if (numSpatialLayers != -1 && numSpatialLayers != packetSpatialLayers) {
packetInfo.layeringChanged = true
}
numSpatialLayers = packetSpatialLayers
}

findSourceDescAndRtpEncodingDesc(av1packet)?.let { (src, enc) ->
val desc = getScalabilityStructure(
eid = enc.eid,
structure = structure,
ssrc = av1packet.ssrc
)
src.setEncodingLayers(desc.layers, av1packet.ssrc)

for (otherEnc in src.rtpEncodings) {
if (!ssrcsSeen.contains(otherEnc.primarySSRC)) {
src.setEncodingLayers(emptyArray(), otherEnc.primarySSRC)
}
}
}
}

fun getScalabilityStructure(eid: Int, structure: FrameDependencyStructure, ssrc: Long): RtpEncodingDesc {
val spatialIds = structure.templates.map { it.spatialId }.distinct().sorted()
val temporalIds = structure.templates.map { it.temporalId }.distinct().sorted()
val layers = ArrayList<RtpLayerDesc>()

for (s in spatialIds) {
for (t in temporalIds) {
val dependencies = ArrayList<RtpLayerDesc>()
val softDependencies = ArrayList<RtpLayerDesc>()
if (s > 0) {
/* Because of K-SVC, spatial layer dependencies are soft */
layers.find { it.sid == s - 1 && it.tid == t }?.let { softDependencies.add(it) }
}
if (t > 0) {
layers.find { it.sid == s && it.tid == t - 1 }?.let { dependencies.add(it) }
}
val layerDesc = RtpLayerDesc(
eid = eid,
tid = t,
sid = s,
height = structure.resolutions[s].height,
frameRate = RtpLayerDesc.NO_FRAME_RATE,
dependencyLayers = dependencies.toArray(arrayOf<RtpLayerDesc>()),
softDependencyLayers = softDependencies.toArray(arrayOf<RtpLayerDesc>())
)
layers.add(layerDesc)
}
}

return RtpEncodingDesc(ssrc, layers.toArray(arrayOf()), eid)
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
/*
* Copyright @ 2023 - present 8x8, Inc.
* Copyright @ 2023 - Vowel, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.jitsi.nlj.rtp.codec.av1

import org.jitsi.nlj.RtpLayerDesc
import org.jitsi.nlj.rtp.ParsedVideoPacket
import org.jitsi.nlj.rtp.codec.av1.dd.FrameDependencyStructure
import org.jitsi.rtp.extensions.bytearray.hashCodeOfSegment
import org.jitsi.utils.logging2.createLogger
import org.jitsi_modified.impl.neomedia.codec.video.vp9.DePacketizer

class Av1packet(
buffer: ByteArray,
offset: Int,
length: Int,
override val isKeyframe: Boolean,
override val isStartOfFrame: Boolean,
override val isEndOfFrame: Boolean,
val frameNumber: Int,
val temporalLayerIndex: Int,
val spatialLayerIndex: Int,
val structure: FrameDependencyStructure?
) : ParsedVideoPacket(buffer, offset, length, null) {

override val layerId: Int
get() = RtpLayerDesc.getIndex(0, spatialLayerIndex, temporalLayerIndex)

override val payloadVerification: String
get() {
val rtpPayloadLength = payloadLength
val rtpPayloadOffset = payloadOffset
val vp9pdSize = DePacketizer.VP9PayloadDescriptor.getSize(buffer, rtpPayloadOffset, rtpPayloadLength)
val vp9PayloadLength = rtpPayloadLength - vp9pdSize
val hashCode = buffer.hashCodeOfSegment(rtpPayloadOffset + vp9pdSize, rtpPayloadOffset + rtpPayloadLength)
return "type=AV1Packet len=$vp9PayloadLength hashCode=$hashCode"
}

override fun toString(): String {
return super.toString() + ", SID=$spatialLayerIndex, TID=$temporalLayerIndex, FrameNumber=$frameNumber"
}

override fun clone(): Av1packet {
return Av1packet(
cloneBuffer(BYTES_TO_LEAVE_AT_START_OF_PACKET),
BYTES_TO_LEAVE_AT_START_OF_PACKET,
length,
isKeyframe = isKeyframe,
isStartOfFrame = isStartOfFrame,
isEndOfFrame = isEndOfFrame,
frameNumber = frameNumber,
temporalLayerIndex = temporalLayerIndex,
spatialLayerIndex = spatialLayerIndex,
structure = structure
)
}

companion object {
private val logger = createLogger()
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
package org.jitsi.nlj.rtp.codec.av1.dd

import org.apache.commons.compress.utils.BitInputStream
import org.jitsi.rtp.rtp.RtpPacket
import java.io.ByteArrayInputStream
import java.nio.ByteOrder

class BytesView(val bytes: ByteArray, val offset: Int, val length: Int) {
private val bitStream = BitInputStream(ByteArrayInputStream(bytes, offset, length), ByteOrder.BIG_ENDIAN)
constructor(ext: RtpPacket.HeaderExtension) : this(
ext.currExtBuffer,
ext.currExtOffset + 1,
ext.currExtLength - 1
)

constructor(ext: TwoBytesExtNormalizer.TwoBytesExtension) : this(
ext.payload,
0,
ext.payloadLength
)

fun readBoolean(): Boolean {
return readInt(1) == 1
}

fun readInt(bitCount: Int): Int {
return bitStream.readBits(bitCount).toInt()
}

fun readNonSymmetric(n: Int): Int {
var w = 0
var x = n
while (x != 0) {
x = x shr 1
w++
}
val m = (1 shl w) - n
val v = readInt(w - 1)
if (v < m)
return v
val extraBit = readInt(1)
return (v shl 1) - m + extraBit
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package org.jitsi.nlj.rtp.codec.av1.dd

enum class DecodeTargetIndication(val id: Int) {
// DecodeTargetInfo symbol '-'
NOT_PRESENT(0),
// DecodeTargetInfo symbol 'D'
DISCARDABLE(1),
// DecodeTargetInfo symbol 'S'
SWITCH(2),
// DecodeTargetInfo symbol 'R'
REQUIRED(3);

companion object {
private val map = values().associateBy { it.id }

fun parse(id: Int): DecodeTargetIndication {
return map.getValue(id)
}
}
}
Loading