Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Post processing step #77

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -349,7 +349,9 @@ private List<DataSource> buildAudioDataSources()
if (dataSource.getTrackFormat(TrackType.AUDIO) != null) {
result.add(dataSource);
} else {
result.add(new BlankAudioDataSource(dataSource.getDurationUs()));
DataSource blankDataSource = new BlankAudioDataSource(dataSource.getDurationUs());
blankDataSource.setPostProcessor(dataSource.getPostProcessor());
result.add(blankDataSource);
}
}
return result;
Expand Down
38 changes: 26 additions & 12 deletions lib/src/main/java/com/otaliastudios/transcoder/engine/Engine.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,12 @@
import com.otaliastudios.transcoder.TranscoderOptions;
import com.otaliastudios.transcoder.internal.TrackTypeMap;
import com.otaliastudios.transcoder.internal.ValidatorException;
import com.otaliastudios.transcoder.postprocessor.AudioPostProcessor;
import com.otaliastudios.transcoder.sink.DataSink;
import com.otaliastudios.transcoder.sink.InvalidOutputFormatException;
import com.otaliastudios.transcoder.source.DataSource;
import com.otaliastudios.transcoder.strategy.TrackStrategy;
import com.otaliastudios.transcoder.time.PresentationTime;
import com.otaliastudios.transcoder.time.TimeInterpolator;
import com.otaliastudios.transcoder.transcode.AudioTrackTranscoder;
import com.otaliastudios.transcoder.transcode.NoOpTrackTranscoder;
Expand Down Expand Up @@ -70,6 +72,7 @@ public interface ProgressCallback {
private final TrackTypeMap<MediaFormat> mOutputFormats = new TrackTypeMap<>();
private volatile double mProgress;
private final ProgressCallback mProgressCallback;
private final PresentationTime mAudioPresentationTime = new PresentationTime();

public Engine(@Nullable ProgressCallback progressCallback) {
mProgressCallback = progressCallback;
Expand Down Expand Up @@ -177,7 +180,9 @@ private void openCurrentStep(@NonNull TrackType type, @NonNull TranscoderOptions
transcoder = new AudioTrackTranscoder(dataSource, mDataSink,
interpolator,
options.getAudioStretcher(),
options.getAudioResampler());
options.getAudioResampler(),
(AudioPostProcessor)dataSource.getPostProcessor(),
mAudioPresentationTime);
break;
default:
throw new RuntimeException("Unknown type: " + type);
Expand Down Expand Up @@ -253,46 +258,55 @@ public long interpolate(@NonNull TrackType type, long time) {
};
}

private long getTrackDurationUs(@NonNull TrackType type) {
private long getTrackDurationUs(@NonNull TrackType type, boolean processedDuration) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you explain to me why we need two versions of getTrackDurationUs and getTotalDurationUs ? These are used to compute transcoding progress/end so it seems to me that we should not take post processing into account here. Even if the processor transforms 10 seconds into 20 seconds, when the source reaches 10, we'll have decoded 10 seconds and encoded 20 and so we're done.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

processedDuration=true: Duration after that the data went through the processor. It is used to get the expected total duratio, to know if we should force the end of the stream.

processedDuration=false: Duration of the data that has been read. It is used to calculate the progress because it is the reading and extracting steps that take most of the time: It depends of course of what the processors do but that we can't know.

In our case, we use this system to mix the sound of the audio track with the data sources (https://github.com/cbernier2/Transcoder/blob/release/lib/src/main/java/com/otaliastudios/transcoder/postprocessor/MixerSourceAudioPostProcessor.java and https://github.com/cbernier2/Transcoder/blob/release/lib/src/main/java/com/otaliastudios/transcoder/postprocessor/MixerTargetAudioPostProcessor.java) ... so one processor only acculate data and skip it so the processed duration doesn't increase while the transcoder is doing this step,

if (!mStatuses.require(type).isTranscoding()) return 0L;
int current = mCurrentStep.require(type);
long totalDurationUs = 0;
for (int i = 0; i < mDataSources.require(type).size(); i++) {
DataSource source = mDataSources.require(type).get(i);
long dataSourceDurationUs;
if (i < current) { // getReadUs() is a better approximation for sure.
totalDurationUs += source.getReadUs();
dataSourceDurationUs = source.getReadUs();
} else {
totalDurationUs += source.getDurationUs();
dataSourceDurationUs = source.getDurationUs();
}
if (processedDuration && source.getPostProcessor() != null) {
dataSourceDurationUs = source.getPostProcessor().calculateNewDurationUs(dataSourceDurationUs);
}
totalDurationUs += dataSourceDurationUs;
}
return totalDurationUs;
}

private long getTotalDurationUs() {
boolean hasVideo = hasVideoSources() && mStatuses.requireVideo().isTranscoding();
boolean hasAudio = hasAudioSources() && mStatuses.requireAudio().isTranscoding();
long video = hasVideo ? getTrackDurationUs(TrackType.VIDEO) : Long.MAX_VALUE;
long audio = hasAudio ? getTrackDurationUs(TrackType.AUDIO) : Long.MAX_VALUE;
long video = hasVideo ? getTrackDurationUs(TrackType.VIDEO, true) : Long.MAX_VALUE;
long audio = hasAudio ? getTrackDurationUs(TrackType.AUDIO, true) : Long.MAX_VALUE;
return Math.min(video, audio);
}

private long getTrackReadUs(@NonNull TrackType type) {
private long getTrackProgressUs(@NonNull TrackType type, boolean processedDuration) {
if (!mStatuses.require(type).isTranscoding()) return 0L;
int current = mCurrentStep.require(type);
long completedDurationUs = 0;
for (int i = 0; i < mDataSources.require(type).size(); i++) {
DataSource source = mDataSources.require(type).get(i);
if (i <= current) {
completedDurationUs += source.getReadUs();
long dataSourceReadUs = source.getReadUs();
if (processedDuration && source.getPostProcessor() != null) {
dataSourceReadUs = source.getPostProcessor().calculateNewDurationUs(dataSourceReadUs);
}
completedDurationUs += dataSourceReadUs;
}
}
return completedDurationUs;
}

private double getTrackProgress(@NonNull TrackType type) {
if (!mStatuses.require(type).isTranscoding()) return 0.0D;
long readUs = getTrackReadUs(type);
long totalUs = getTotalDurationUs();
long readUs = getTrackProgressUs(type, false);
long totalUs = getTrackDurationUs(type, false);
LOG.v("getTrackProgress - readUs:" + readUs + ", totalUs:" + totalUs);
if (totalUs == 0) totalUs = 1; // Avoid NaN
return (double) readUs / (double) totalUs;
Expand Down Expand Up @@ -361,8 +375,8 @@ public void transcode(@NonNull TranscoderOptions options) throws InterruptedExce
// This can happen, for example, if user adds 1 minute (video only) with 20 seconds
// of audio. The video track must be stopped once the audio stops.
long totalUs = getTotalDurationUs() + 100 /* tolerance */;
forceAudioEos = getTrackReadUs(TrackType.AUDIO) > totalUs;
forceVideoEos = getTrackReadUs(TrackType.VIDEO) > totalUs;
forceAudioEos = getTrackProgressUs(TrackType.AUDIO, true) > totalUs;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For example, here you compare a processed progress (getTrackProgress(true)) with a non-processed duration (getTrackDuration(false)). I don't think this makes sense, we should rather use false for both. But maybe I'm missing something.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In our case we use this system to mix the custom audio track with the videos audio track, So the initial audio track duration is 2x the duration fo the videos. We use https://github.com/cbernier2/Transcoder/blob/release/lib/src/main/java/com/otaliastudios/transcoder/postprocessor/MixerSourceAudioPostProcessor.java to accumulate the audio track data and not write it until it it can be mixed: with https://github.com/cbernier2/Transcoder/blob/release/lib/src/main/java/com/otaliastudios/transcoder/postprocessor/MixerTargetAudioPostProcessor.java.

Without processedDuration=true, the transcoder would end the audio track stream before that the audio get mixed... even if nothing has been written to the output file for the audio track.

forceVideoEos = getTrackProgressUs(TrackType.VIDEO, true) > totalUs;

// Now step for transcoders that are not completed.
audioCompleted = isCompleted(TrackType.AUDIO);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
package com.otaliastudios.transcoder.postprocessor;

import androidx.annotation.NonNull;

import java.nio.ShortBuffer;

public interface AudioPostProcessor extends PostProcessor {
/**
* Manipulates the raw audio data inside inputBuffer and put the result in outputBuffer
* @param inputBuffer the input data (as raw audio data)
* @param outputBuffer the data after the manipulation
* @param bufferDurationUs the duration of the input data
* @return the duration of the output data
*/
long postProcess(@NonNull final ShortBuffer inputBuffer, @NonNull final ShortBuffer outputBuffer, long bufferDurationUs);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that the number of channels and sample rate should also be here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that the number of channels and sample rate should also be here.

Indeed! I will add them.

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
package com.otaliastudios.transcoder.postprocessor;

import androidx.annotation.NonNull;
import java.nio.ShortBuffer;

public class DefaultAudioPostProcessor implements AudioPostProcessor {
@Override
public long calculateNewDurationUs(long durationUs) {
return durationUs;
}

@Override
public long postProcess(@NonNull ShortBuffer inputBuffer, @NonNull ShortBuffer outputBuffer, long bufferDurationUs) {
outputBuffer.put(inputBuffer);
return bufferDurationUs;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
package com.otaliastudios.transcoder.postprocessor;

public interface PostProcessor {
Comment on lines +1 to +3
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Small detail, can we change the package and name to processor/Processor? Removing the "post" everywhere.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will do!

/**
* Returns the duration of the data source on it has been processed (after calling the postProcess() method)
* @param durationUs the original duratin in Us
* @return the new duration in Us
*/
long calculateNewDurationUs(long durationUs);
Comment on lines +4 to +9
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you guys really need this ability? For what? Everything would be much simpler if you remove this option.
For example the postProcess() function could be as simple as :

void postProcess(ShortBuffer buffer, int sampleRate, int channels);

If the duration is unchanged, the audio processor can simply rewrite into the input buffer. This would be more efficient and simplify the AudioEngine as well.

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
package com.otaliastudios.transcoder.postprocessor;

import androidx.annotation.NonNull;

import java.nio.ShortBuffer;

public class VolumeAudioPostProcessor implements AudioPostProcessor {
private float mVolume;

public VolumeAudioPostProcessor(float volume) {
mVolume = volume;
}

@Override
public long calculateNewDurationUs(long durationUs) {
return durationUs;
}

private short applyVolume(short sample) {
float sampleAtVolume = sample * mVolume;
if (sampleAtVolume < Short.MIN_VALUE)
sampleAtVolume = Short.MIN_VALUE;
else if (sampleAtVolume > Short.MAX_VALUE)
sampleAtVolume = Short.MAX_VALUE;
return (short)sampleAtVolume;
}

@Override
public long postProcess(@NonNull ShortBuffer inputBuffer, @NonNull ShortBuffer outputBuffer, long bufferDurationUs) {
int inputRemaining = inputBuffer.remaining();
for (int i=0; i<inputRemaining; i++) {
outputBuffer.put(applyVolume(inputBuffer.get()));
}
return bufferDurationUs;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
import androidx.annotation.Nullable;

import com.otaliastudios.transcoder.engine.TrackType;
import com.otaliastudios.transcoder.postprocessor.DefaultAudioPostProcessor;
import com.otaliastudios.transcoder.postprocessor.PostProcessor;

import java.nio.ByteBuffer;
import java.nio.ByteOrder;
Expand Down Expand Up @@ -46,6 +48,18 @@ public BlankAudioDataSource(long durationUs) {
audioFormat.setInteger(MediaFormat.KEY_SAMPLE_RATE, SAMPLE_RATE);
}

private PostProcessor postProcessor = new DefaultAudioPostProcessor();

@Override
public void setPostProcessor(@NonNull PostProcessor postProcessor) {
this.postProcessor = postProcessor;
}

@Override
public PostProcessor getPostProcessor() {
return postProcessor;
}

@Override
public int getOrientation() {
return 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import androidx.annotation.Nullable;

import com.otaliastudios.transcoder.engine.TrackType;
import com.otaliastudios.transcoder.postprocessor.PostProcessor;

import java.nio.ByteBuffer;

Expand All @@ -14,6 +15,22 @@
*/
public interface DataSource {

/**
* Returns an handler that need to be executed with the raw data source data
* before that it gets encoded.
*
* @return the PostProcessor object
*/
PostProcessor getPostProcessor();

/**
* Sets the handler that needs to be called before that the raw data source data
* gets sent to the encoder.
*
* @param postProcessor the PostProcessor object
*/
void setPostProcessor(@NonNull PostProcessor postProcessor);

Comment on lines 16 to +33
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I know this is practical but I think that a DataSource should not hold the processor. It's just a source.

Instead, we could create an internal map like so TranscoderOptions.addProcessor(Processor, DataSource...). So when you add the processor you specify all the sources it applies to

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will do!

/**
* Metadata information. Returns the video orientation, or 0.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
import androidx.annotation.Nullable;

import com.otaliastudios.transcoder.engine.TrackType;
import com.otaliastudios.transcoder.postprocessor.DefaultAudioPostProcessor;
import com.otaliastudios.transcoder.postprocessor.PostProcessor;

/**
* A {@link DataSource} wrapper that simply delegates all methods to the
Expand All @@ -26,6 +28,18 @@ protected DataSource getSource() {
return mSource;
}

private PostProcessor postProcessor = new DefaultAudioPostProcessor();

@Override
public void setPostProcessor(@NonNull PostProcessor postProcessor) {
this.postProcessor = postProcessor;
}

@Override
public PostProcessor getPostProcessor() {
return postProcessor;
}

@Override
public int getOrientation() {
return mSource.getOrientation();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
import com.otaliastudios.transcoder.internal.ISO6709LocationParser;
import com.otaliastudios.transcoder.internal.Logger;
import com.otaliastudios.transcoder.internal.TrackTypeMap;
import com.otaliastudios.transcoder.postprocessor.DefaultAudioPostProcessor;
import com.otaliastudios.transcoder.postprocessor.PostProcessor;

import java.io.IOException;
import java.util.HashSet;
Expand Down Expand Up @@ -58,6 +60,18 @@ private void ensureExtractor() {

protected abstract void applyRetriever(@NonNull MediaMetadataRetriever retriever);

private PostProcessor postProcessor = new DefaultAudioPostProcessor();

@Override
public void setPostProcessor(@NonNull PostProcessor postProcessor) {
this.postProcessor = postProcessor;
}

@Override
public PostProcessor getPostProcessor() {
return postProcessor;
}

@Override
public void selectTrack(@NonNull TrackType type) {
mSelectedTracks.add(type);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
package com.otaliastudios.transcoder.time;

public class PresentationTime {
private long mTotalEncoderDurationUs = 0;
public void increaseEncoderDuration(long encoderDurationUs) {
mTotalEncoderDurationUs += encoderDurationUs;
}
public long getEncoderPresentationTimeUs() {
return mTotalEncoderDurationUs;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,12 @@

import com.otaliastudios.transcoder.engine.TrackType;
import com.otaliastudios.transcoder.internal.MediaCodecBuffers;
import com.otaliastudios.transcoder.postprocessor.AudioPostProcessor;
import com.otaliastudios.transcoder.resample.AudioResampler;
import com.otaliastudios.transcoder.sink.DataSink;
import com.otaliastudios.transcoder.source.DataSource;
import com.otaliastudios.transcoder.stretch.AudioStretcher;
import com.otaliastudios.transcoder.time.PresentationTime;
import com.otaliastudios.transcoder.time.TimeInterpolator;
import com.otaliastudios.transcoder.transcode.internal.AudioEngine;

Expand All @@ -20,8 +22,10 @@
public class AudioTrackTranscoder extends BaseTrackTranscoder {

private TimeInterpolator mTimeInterpolator;
private PresentationTime mPresentationTime;
private AudioStretcher mAudioStretcher;
private AudioResampler mAudioResampler;
private AudioPostProcessor mAudioPostProcessor;
private AudioEngine mAudioEngine;
private MediaCodec mEncoder; // to create the channel
private MediaFormat mEncoderOutputFormat; // to create the channel
Expand All @@ -30,11 +34,15 @@ public AudioTrackTranscoder(@NonNull DataSource dataSource,
@NonNull DataSink dataSink,
@NonNull TimeInterpolator timeInterpolator,
@NonNull AudioStretcher audioStretcher,
@NonNull AudioResampler audioResampler) {
@NonNull AudioResampler audioResampler,
@NonNull AudioPostProcessor audioPostProcessor,
@NonNull PresentationTime presentationTime) {
super(dataSource, dataSink, TrackType.AUDIO);
mTimeInterpolator = timeInterpolator;
mAudioStretcher = audioStretcher;
mAudioResampler = audioResampler;
mAudioPostProcessor = audioPostProcessor;
mPresentationTime = presentationTime;
}

@Override
Expand All @@ -57,7 +65,9 @@ protected void onDecoderOutputFormatChanged(@NonNull MediaCodec decoder, @NonNul
mEncoder, mEncoderOutputFormat,
mTimeInterpolator,
mAudioStretcher,
mAudioResampler);
mAudioResampler,
mAudioPostProcessor,
mPresentationTime);
mEncoder = null;
mEncoderOutputFormat = null;
mTimeInterpolator = null;
Expand Down
Loading