I have found the answer for Java: https://stackoverflow.com/a/36357819/202179 and tried to port it to Xamarin.
Here is the code that I've made:
    const string COMPRESSED_AUDIO_FILE_MIME_TYPE = "audio/mp4a-latm";
    const int COMPRESSED_AUDIO_FILE_BIT_RATE = 64000; // 64kbps
    const int SAMPLING_RATE = 48000;
    const int BUFFER_SIZE = 48000;
    const int CODEC_TIMEOUT_IN_MS = 5000;
    void Compress()
    {
        var inputFile = new Java.IO.File(tempFileWavPath);
        var fis = new Java.IO.FileInputStream(inputFile);
        var outputFile = new Java.IO.File(fileM4APath);
        if (outputFile.Exists())
            outputFile.Delete();
        var mux = new MediaMuxer(outputFile.AbsolutePath, MuxerOutputType.Mpeg4);
        MediaFormat outputFormat = MediaFormat.CreateAudioFormat(COMPRESSED_AUDIO_FILE_MIME_TYPE, SAMPLING_RATE, 1);
        outputFormat.SetInteger(MediaFormat.KeyAacProfile, (int)MediaCodecProfileType.Aacobjectlc);
        outputFormat.SetInteger(MediaFormat.KeyBitRate, COMPRESSED_AUDIO_FILE_BIT_RATE);
        outputFormat.SetInteger(MediaFormat.KeyMaxInputSize, 16384);
        MediaCodec codec = MediaCodec.CreateEncoderByType(COMPRESSED_AUDIO_FILE_MIME_TYPE);
        codec.Configure(outputFormat, null, null, MediaCodecConfigFlags.Encode);
        codec.Start();
        MediaCodec.BufferInfo outBuffInfo = new MediaCodec.BufferInfo();
        byte[] tempBuffer = new byte[BUFFER_SIZE];
        var hasMoreData = true;
        double presentationTimeUs = 0;
        int audioTrackIdx = 0;
        int totalBytesRead = 0;
        int percentComplete = 0;
        do
        {
            int inputBufIndex = 0;
            while (inputBufIndex != -1 && hasMoreData)
            {
                inputBufIndex = codec.DequeueInputBuffer(CODEC_TIMEOUT_IN_MS);
                if (inputBufIndex >= 0)
                {
                    var dstBuf = codec.GetInputBuffer(inputBufIndex);
                    dstBuf.Clear();
                    int bytesRead = fis.Read(tempBuffer, 0, dstBuf.Limit());
                    if (bytesRead == -1)
                    { // -1 implies EOS
                        hasMoreData = false;
                        codec.QueueInputBuffer(inputBufIndex, 0, 0, (long)presentationTimeUs, MediaCodecBufferFlags.EndOfStream);
                    }
                    else
                    {
                        totalBytesRead += bytesRead;
                        dstBuf.Put(tempBuffer, 0, bytesRead);
                        codec.QueueInputBuffer(inputBufIndex, 0, bytesRead, (long)presentationTimeUs, 0);
                        presentationTimeUs = 1000000l * (totalBytesRead / 2) / SAMPLING_RATE;
                    }
                }
            }
            // Drain audio
            int outputBufIndex = 0;
            while (outputBufIndex != (int)MediaCodecInfoState.TryAgainLater)
            {
                outputBufIndex = codec.DequeueOutputBuffer(outBuffInfo, CODEC_TIMEOUT_IN_MS);
                if (outputBufIndex >= 0)
                {
                    var encodedData = codec.GetOutputBuffer(outputBufIndex);
                    encodedData.Position(outBuffInfo.Offset);
                    encodedData.Limit(outBuffInfo.Offset + outBuffInfo.Size);
                    if ((outBuffInfo.Flags & MediaCodecBufferFlags.CodecConfig) != 0 && outBuffInfo.Size != 0)
                    {
                        codec.ReleaseOutputBuffer(outputBufIndex, false);
                    }
                    else
                    {
                        mux.WriteSampleData(audioTrackIdx, encodedData, outBuffInfo);
                        codec.ReleaseOutputBuffer(outputBufIndex, false);
                    }
                }
                else if (outputBufIndex == (int)MediaCodecInfoState.OutputFormatChanged)
                {
                    outputFormat = codec.OutputFormat;
                    audioTrackIdx = mux.AddTrack(outputFormat);
                    mux.Start();
                }
            }
            percentComplete = (int)Math.Round(((float)totalBytesRead / (float)inputFile.Length()) * 100.0);
        } while (outBuffInfo.Flags != MediaCodecBufferFlags.EndOfStream);
        fis.Close();
        mux.Stop();
        mux.Release();
    }
This almost works as it converts the file, but the resulting file appears to be encoded too fast - the pitch is too high and speed is too high and the reproduction lasts shorter than expected.
It is likely that just some slight change is needed, but I am not sure what. Can anyone suggest?
I could reproduce the resulting file appears to be encoded too fast when i use the different size of SAMPLING_RATE.
For example, i download a wav file online. The Sampline Rate is 11025. If i use the original rate 48000 in the code, it would play too fast. When i use 11025, it would work.

So we need to know the Sampling Rate of the wav fille and then set it in the code.
const int SAMPLING_RATE = 11025;//44100, 48000
                        If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With