[android] Rxjava + Audio Recording Utils を作ってみた

最近ChatBotを開発していて、文字入力を音声でやりたいという要件があった。つまりSpeech Dectationである。
この部分をAndroidで実装するために音声の録音用Utilsを作成したので共有。

使い方

下記のようにAudioRecordUtilsのインスタンスを作成しAudioRecordUtils#startRecordingを呼びUtilsに開始を知らせる。

そしてAudioRecordUtils#doRecordAudioのObservableをSubscribeして録音を開始する。
ここのObservable内の処理で録音する処理が走るようになっている。

終了はAudioRecordUtils#stopRecordingを呼ぶ。それにより、onSuccessへ終了後の処理を行うことができる。
ここでは、ファイル送信用APIへ録音したFileをAudioRecordUtils#getRecordedFileで受取渡している。

また、onSuccessでLong[]を受け取っているが別になんでもよく、変更可能である。

private void onClickStartRecording() {
    AudioRecordUtils recordUtils = new AudioRecordUtils();
        recordUtils.startRecording();
        recordUtils.doRecordAudio()
                .subscribeOn(Schedulers.io())
                .compose(view.getRxLifecycle().forSingle())
                .observeOn(AndroidSchedulers.mainThread())
                .subscribe(new SingleSubscriber<Long[]>() {
                               @Override
                               public void onSuccess(Long[] longs) {
                                   sendAudioFile(recordUtils.getRecordedFile());
                               }

                               @Override
                               public void onError(Throwable error) {
                                   logAnalytics.e(error.getMessage());
                               }
                           }
        );
}

private void onClickStopRecording() {
  recordUtils.stopRecording();
}

private void onClickStartRecording() {

AudioRecordUtils recordUtils = new AudioRecordUtils();

recordUtils.startRecording();

recordUtils.doRecordAudio()

.subscribeOn(Schedulers.io())

.compose(view.getRxLifecycle().forSingle())

.observeOn(AndroidSchedulers.mainThread())

.subscribe(new SingleSubscriber<Long[]>() {

@Override

public void onSuccess(Long[] longs) {

sendAudioFile(recordUtils.getRecordedFile());

}

@Override

public void onError(Throwable error) {

logAnalytics.e(error.getMessage());

}

);

}

private void onClickStopRecording() {

recordUtils.stopRecording();

}

なお、録音したFileは取っおく必要がないので、送信直後に削除するようにしている。

file.delete();

1 2	file.delete();

AudioRecordUtils.java

package com.funckyhacker.util;

import android.media.AudioFormat;
import android.media.AudioRecord;
import android.media.MediaRecorder;
import android.os.Environment;
import android.os.SystemClock;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;

import rx.Single;
import rx.SingleSubscriber;

public class AudioRecordUtils {

    private static final int AUDIO_SOURCE = MediaRecorder.AudioSource.MIC;
    private static final int SAMPLE_RATE = 16000; // Hz
    private static final int ENCODING = AudioFormat.ENCODING_PCM_16BIT;
    private static final int CHANNEL_MASK = AudioFormat.CHANNEL_IN_MONO;
    private static final int BUFFER_SIZE = 2 * AudioRecord.getMinBufferSize(SAMPLE_RATE, CHANNEL_MASK, ENCODING);

    private File mWavFile;
    private boolean isRecording;

    public void startRecording() {
        isRecording = true;
    }

    public void stopRecording() {
        isRecording = false;
    }

    public File getRecordedFile() {
        return mWavFile;
    }

    public boolean isRecording() {
        return isRecording;
    }

    public Single<Long[]> doRecordAudio() {
        return Single.create((SingleSubscriber<? super Long[]> singleSubscriber) -> {
            final File file = getNewRecordingFile();
            AudioRecord audioRecord = null;
            FileOutputStream wavOut = null;
            long startTime = 0;
            long endTime = 0;

            try {
                // Open our two resources
                audioRecord = new AudioRecord(AUDIO_SOURCE, SAMPLE_RATE, CHANNEL_MASK, ENCODING, BUFFER_SIZE);
                wavOut = new FileOutputStream(file);

                // Write out the wav file header
                FileUtils.writeWavHeader(wavOut, CHANNEL_MASK, SAMPLE_RATE, ENCODING);

                // Avoiding loop allocations
                byte[] buffer = new byte[BUFFER_SIZE];
                boolean run = true;
                int read;
                long total = 0;

                // Let's go
                startTime = SystemClock.elapsedRealtime();
                audioRecord.startRecording();
                while (run && isRecording) {
                    read = audioRecord.read(buffer, 0, buffer.length);

                    // WAVs cannot be > 4 GB due to the use of 32 bit unsigned integers.
                    if (total + read > 4294967295L) {
                        // Write as many bytes as we can before hitting the max size
                        for (int i = 0; i < read && total <= 4294967295L; i++, total++) {
                            wavOut.write(buffer[i]);
                        }
                        run = false;
                    } else {
                        // Write out the entire read buffer
                        wavOut.write(buffer, 0, read);
                        total += read;
                    }
                }
            } catch (IOException ex) {
                singleSubscriber.onError(ex);
            } finally {
                if (audioRecord != null) {
                    try {
                        if (audioRecord.getRecordingState() == AudioRecord.RECORDSTATE_RECORDING) {
                            audioRecord.stop();
                            endTime = SystemClock.elapsedRealtime();
                        }
                    } catch (IllegalStateException ex) {
                        singleSubscriber.onError(ex);
                    }
                    if (audioRecord.getState() == AudioRecord.STATE_INITIALIZED) {
                        audioRecord.release();
                    }
                }
                if (wavOut != null) {
                    try {
                        wavOut.close();
                    } catch (IOException ex) {
                        singleSubscriber.onError(ex);
                    }
                }
            }

            try {
                // This is not put in the try/catch/finally above since it needs to run
                // after we close the FileOutputStream
                FileUtils.updateWavHeader(file);
            } catch (IOException ex) {
                singleSubscriber.onError(ex);
            }

            singleSubscriber.onSuccess(new Long[]{file.length(), endTime - startTime});
        });
    }

    private File getNewRecordingFile() {
        mWavFile = new File(Environment.getExternalStorageDirectory().getPath(),
                "recording_" + System.currentTimeMillis() / 1000 + ".wav");
        return mWavFile;
    }
}

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

package com.funckyhacker.util;

import android.media.AudioFormat;

import android.media.AudioRecord;

import android.media.MediaRecorder;

import android.os.Environment;

import android.os.SystemClock;

import java.io.File;

import java.io.FileOutputStream;

import java.io.IOException;

import rx.Single;

import rx.SingleSubscriber;

public class AudioRecordUtils {

private static final int AUDIO_SOURCE = MediaRecorder.AudioSource.MIC;

private static final int SAMPLE_RATE = 16000; // Hz

private static final int ENCODING = AudioFormat.ENCODING_PCM_16BIT;

private static final int CHANNEL_MASK = AudioFormat.CHANNEL_IN_MONO;

private static final int BUFFER_SIZE = 2 * AudioRecord.getMinBufferSize(SAMPLE_RATE, CHANNEL_MASK, ENCODING);

private File mWavFile;

private boolean isRecording;

public void startRecording() {

isRecording = true;

}

public void stopRecording() {

isRecording = false;

}

public File getRecordedFile() {

return mWavFile;

}

public boolean isRecording() {

return isRecording;

}

public Single<Long[]> doRecordAudio() {

return Single.create((SingleSubscriber<? super Long[]> singleSubscriber) -> {

final File file = getNewRecordingFile();

AudioRecord audioRecord = null;

FileOutputStream wavOut = null;

long startTime = 0;

long endTime = 0;

try {

// Open our two resources

audioRecord = new AudioRecord(AUDIO_SOURCE, SAMPLE_RATE, CHANNEL_MASK, ENCODING, BUFFER_SIZE);

wavOut = new FileOutputStream(file);

// Write out the wav file header

FileUtils.writeWavHeader(wavOut, CHANNEL_MASK, SAMPLE_RATE, ENCODING);

// Avoiding loop allocations

byte[] buffer = new byte[BUFFER_SIZE];

boolean run = true;

int read;

long total = 0;

// Let's go

startTime = SystemClock.elapsedRealtime();

audioRecord.startRecording();

while (run && isRecording) {

read = audioRecord.read(buffer, 0, buffer.length);

// WAVs cannot be > 4 GB due to the use of 32 bit unsigned integers.

if (total + read > 4294967295L) {

// Write as many bytes as we can before hitting the max size

for (int i = 0; i < read && total <= 4294967295L; i++, total++) {

wavOut.write(buffer[i]);

}

run = false;

} else {

// Write out the entire read buffer

wavOut.write(buffer, 0, read);

total += read;

}

} catch (IOException ex) {

singleSubscriber.onError(ex);

} finally {

if (audioRecord != null) {

try {

if (audioRecord.getRecordingState() == AudioRecord.RECORDSTATE_RECORDING) {

audioRecord.stop();

endTime = SystemClock.elapsedRealtime();

}

} catch (IllegalStateException ex) {

singleSubscriber.onError(ex);

}

if (audioRecord.getState() == AudioRecord.STATE_INITIALIZED) {

audioRecord.release();

}

if (wavOut != null) {

try {

wavOut.close();

} catch (IOException ex) {

singleSubscriber.onError(ex);

}

try {

// This is not put in the try/catch/finally above since it needs to run

// after we close the FileOutputStream

FileUtils.updateWavHeader(file);

} catch (IOException ex) {

singleSubscriber.onError(ex);

}

singleSubscriber.onSuccess(new Long[]{file.length(), endTime - startTime});

});

}

private File getNewRecordingFile() {

mWavFile = new File(Environment.getExternalStorageDirectory().getPath(),

"recording_" + System.currentTimeMillis() / 1000 + ".wav");

return mWavFile;

}

FileUtils.java

package com.funckyhacker.util;

import android.media.AudioFormat;
import java.io.File;
import java.io.IOException;
import java.io.OutputStream;
import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;

public class FileUtils {

  /**
   * Writes the proper 44-byte RIFF/WAVE header to/for the given stream
   * Two size fields are left empty/null since we do not yet know the final stream size
   *
   * @param out The stream to write the header to
   * @param channelMask An AudioFormat.CHANNEL_* mask
   * @param sampleRate The sample rate in hertz
   * @param encoding An AudioFormat.ENCODING_PCM_* value
   * @throws IOException
   */
  public static void writeWavHeader(OutputStream out, int channelMask, int sampleRate, int encoding)
      throws IOException {
    short channels;
    switch (channelMask) {
    case AudioFormat.CHANNEL_IN_MONO:
      channels = 1;
      break;
    case AudioFormat.CHANNEL_IN_STEREO:
      channels = 2;
      break;
    default:
      throw new IllegalArgumentException("Unacceptable channel mask");
    }

    short bitDepth;
    switch (encoding) {
    case AudioFormat.ENCODING_PCM_8BIT:
      bitDepth = 8;
      break;
    case AudioFormat.ENCODING_PCM_16BIT:
      bitDepth = 16;
      break;
    case AudioFormat.ENCODING_PCM_FLOAT:
      bitDepth = 32;
      break;
    default:
      throw new IllegalArgumentException("Unacceptable encoding");
    }

    writeWavHeader(out, channels, sampleRate, bitDepth);
  }

  /**
   * Writes the proper 44-byte RIFF/WAVE header to/for the given stream
   * Two size fields are left empty/null since we do not yet know the final stream size
   *
   * @param out The stream to write the header to
   * @param channels The number of channels
   * @param sampleRate The sample rate in hertz
   * @param bitDepth The bit depth
   * @throws IOException
   */
  private static void writeWavHeader(OutputStream out, short channels, int sampleRate, short bitDepth)
      throws IOException {
    // Convert the multi-byte integers to raw bytes in little endian format as required by the spec
    byte[] littleBytes = ByteBuffer.allocate(14)
        .order(ByteOrder.LITTLE_ENDIAN)
        .putShort(channels)
        .putInt(sampleRate)
        .putInt(sampleRate * channels * (bitDepth / 8))
        .putShort((short) (channels * (bitDepth / 8)))
        .putShort(bitDepth)
        .array();

    // Not necessarily the best, but it's very easy to visualize this way
    out.write(new byte[] {
        // RIFF header
        'R', 'I', 'F', 'F', // ChunkID
        0, 0, 0, 0, // ChunkSize (must be updated later)
        'W', 'A', 'V', 'E', // Format
        // fmt subchunk
        'f', 'm', 't', ' ', // Subchunk1ID
        16, 0, 0, 0, // Subchunk1Size
        1, 0, // AudioFormat
        littleBytes[0], littleBytes[1], // NumChannels
        littleBytes[2], littleBytes[3], littleBytes[4], littleBytes[5], // SampleRate
        littleBytes[6], littleBytes[7], littleBytes[8], littleBytes[9], // ByteRate
        littleBytes[10], littleBytes[11], // BlockAlign
        littleBytes[12], littleBytes[13], // BitsPerSample
        // data subchunk
        'd', 'a', 't', 'a', // Subchunk2ID
        0, 0, 0, 0, // Subchunk2Size (must be updated later)
    });
  }

  /**
   * Updates the given wav file's header to include the final chunk sizes
   *
   * @param wav The wav file to update
   * @throws IOException
   */
  public static void updateWavHeader(File wav) throws IOException {
    byte[] sizes = ByteBuffer.allocate(8).order(ByteOrder.LITTLE_ENDIAN)
        // There are probably a bunch of different/better ways to calculate
        // these two given your circumstances. Cast should be safe since if the WAV is
        // > 4 GB we've already made a terrible mistake.
        .putInt((int) (wav.length() - 8)) // ChunkSize
        .putInt((int) (wav.length() - 44)) // Subchunk2Size
        .array();

    RandomAccessFile accessWave = null;
    //noinspection CaughtExceptionImmediatelyRethrown
    try {
      accessWave = new RandomAccessFile(wav, "rw");
      // ChunkSize
      accessWave.seek(4);
      accessWave.write(sizes, 0, 4);

      // Subchunk2Size
      accessWave.seek(40);
      accessWave.write(sizes, 4, 4);
    } catch (IOException ex) {
      // Rethrow but we still close accessWave in our finally
      throw ex;
    } finally {
      if (accessWave != null) {
        try {
          accessWave.close();
        } catch (IOException ex) {
          //
        }
      }
    }
  }
}

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

package com.funckyhacker.util;

import android.media.AudioFormat;

import java.io.File;

import java.io.IOException;

import java.io.OutputStream;

import java.io.RandomAccessFile;

import java.nio.ByteBuffer;

import java.nio.ByteOrder;

public class FileUtils {

/**

* Writes the proper 44-byte RIFF/WAVE header to/for the given stream

* Two size fields are left empty/null since we do not yet know the final stream size

* @param out The stream to write the header to

* @param channelMask An AudioFormat.CHANNEL_* mask

* @param sampleRate The sample rate in hertz

* @param encoding An AudioFormat.ENCODING_PCM_* value

* @throws IOException

public static void writeWavHeader(OutputStream out, int channelMask, int sampleRate, int encoding)

throws IOException {

short channels;

switch (channelMask) {

case AudioFormat.CHANNEL_IN_MONO:

channels = 1;

break;

case AudioFormat.CHANNEL_IN_STEREO:

channels = 2;

break;

default:

throw new IllegalArgumentException("Unacceptable channel mask");

}

short bitDepth;

switch (encoding) {

case AudioFormat.ENCODING_PCM_8BIT:

bitDepth = 8;

break;

case AudioFormat.ENCODING_PCM_16BIT:

bitDepth = 16;

break;

case AudioFormat.ENCODING_PCM_FLOAT:

bitDepth = 32;

break;

default:

throw new IllegalArgumentException("Unacceptable encoding");

}

writeWavHeader(out, channels, sampleRate, bitDepth);

}

/**

* Writes the proper 44-byte RIFF/WAVE header to/for the given stream

* Two size fields are left empty/null since we do not yet know the final stream size

* @param out The stream to write the header to

* @param channels The number of channels

* @param sampleRate The sample rate in hertz

* @param bitDepth The bit depth

* @throws IOException

private static void writeWavHeader(OutputStream out, short channels, int sampleRate, short bitDepth)

throws IOException {

// Convert the multi-byte integers to raw bytes in little endian format as required by the spec

byte[] littleBytes = ByteBuffer.allocate(14)

.order(ByteOrder.LITTLE_ENDIAN)

.putShort(channels)

.putInt(sampleRate)

.putInt(sampleRate * channels * (bitDepth / 8))

.putShort((short) (channels * (bitDepth / 8)))

.putShort(bitDepth)

.array();

// Not necessarily the best, but it's very easy to visualize this way

out.write(new byte[] {

// RIFF header

'R', 'I', 'F', 'F', // ChunkID

0, 0, 0, 0, // ChunkSize (must be updated later)

'W', 'A', 'V', 'E', // Format

// fmt subchunk

'f', 'm', 't', ' ', // Subchunk1ID

16, 0, 0, 0, // Subchunk1Size

1, 0, // AudioFormat

littleBytes[0], littleBytes[1], // NumChannels

littleBytes[2], littleBytes[3], littleBytes[4], littleBytes[5], // SampleRate

littleBytes[6], littleBytes[7], littleBytes[8], littleBytes[9], // ByteRate

littleBytes[10], littleBytes[11], // BlockAlign

littleBytes[12], littleBytes[13], // BitsPerSample

// data subchunk

'd', 'a', 't', 'a', // Subchunk2ID

0, 0, 0, 0, // Subchunk2Size (must be updated later)

});

}

/**

* Updates the given wav file's header to include the final chunk sizes

* @param wav The wav file to update

* @throws IOException

public static void updateWavHeader(File wav) throws IOException {

byte[] sizes = ByteBuffer.allocate(8).order(ByteOrder.LITTLE_ENDIAN)

// There are probably a bunch of different/better ways to calculate

// these two given your circumstances. Cast should be safe since if the WAV is

// > 4 GB we've already made a terrible mistake.

.putInt((int) (wav.length() - 8)) // ChunkSize

.putInt((int) (wav.length() - 44)) // Subchunk2Size

.array();

RandomAccessFile accessWave = null;

//noinspection CaughtExceptionImmediatelyRethrown

try {

accessWave = new RandomAccessFile(wav, "rw");

// ChunkSize

accessWave.seek(4);

accessWave.write(sizes, 0, 4);

// Subchunk2Size

accessWave.seek(40);

accessWave.write(sizes, 4, 4);

} catch (IOException ex) {

// Rethrow but we still close accessWave in our finally

throw ex;

} finally {

if (accessWave != null) {

try {

accessWave.close();

} catch (IOException ex) {

}