最近ChatBotを開発していて、文字入力を音声でやりたいという要件があった。つまりSpeech Dectationである。
この部分をAndroidで実装するために音声の録音用Utilsを作成したので共有。
使い方
下記のようにAudioRecordUtilsのインスタンスを作成しAudioRecordUtils#startRecording
を呼びUtilsに開始を知らせる。
そしてAudioRecordUtils#doRecordAudio
のObservableをSubscribeして録音を開始する。
ここのObservable内の処理で録音する処理が走るようになっている。
終了はAudioRecordUtils#stopRecording
を呼ぶ。それにより、onSuccess
へ終了後の処理を行うことができる。
ここでは、ファイル送信用APIへ録音したFileをAudioRecordUtils#getRecordedFile
で受取渡している。
また、onSuccess
でLong[]
を受け取っているが別になんでもよく、変更可能である。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 |
private void onClickStartRecording() { AudioRecordUtils recordUtils = new AudioRecordUtils(); recordUtils.startRecording(); recordUtils.doRecordAudio() .subscribeOn(Schedulers.io()) .compose(view.getRxLifecycle().forSingle()) .observeOn(AndroidSchedulers.mainThread()) .subscribe(new SingleSubscriber<Long[]>() { @Override public void onSuccess(Long[] longs) { sendAudioFile(recordUtils.getRecordedFile()); } @Override public void onError(Throwable error) { logAnalytics.e(error.getMessage()); } } ); } private void onClickStopRecording() { recordUtils.stopRecording(); } |
なお、録音したFileは取っおく必要がないので、送信直後に削除するようにしている。
1 2 |
file.delete(); |
AudioRecordUtils.java
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 |
package com.funckyhacker.util; import android.media.AudioFormat; import android.media.AudioRecord; import android.media.MediaRecorder; import android.os.Environment; import android.os.SystemClock; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import rx.Single; import rx.SingleSubscriber; public class AudioRecordUtils { private static final int AUDIO_SOURCE = MediaRecorder.AudioSource.MIC; private static final int SAMPLE_RATE = 16000; // Hz private static final int ENCODING = AudioFormat.ENCODING_PCM_16BIT; private static final int CHANNEL_MASK = AudioFormat.CHANNEL_IN_MONO; private static final int BUFFER_SIZE = 2 * AudioRecord.getMinBufferSize(SAMPLE_RATE, CHANNEL_MASK, ENCODING); private File mWavFile; private boolean isRecording; public void startRecording() { isRecording = true; } public void stopRecording() { isRecording = false; } public File getRecordedFile() { return mWavFile; } public boolean isRecording() { return isRecording; } public Single<Long[]> doRecordAudio() { return Single.create((SingleSubscriber<? super Long[]> singleSubscriber) -> { final File file = getNewRecordingFile(); AudioRecord audioRecord = null; FileOutputStream wavOut = null; long startTime = 0; long endTime = 0; try { // Open our two resources audioRecord = new AudioRecord(AUDIO_SOURCE, SAMPLE_RATE, CHANNEL_MASK, ENCODING, BUFFER_SIZE); wavOut = new FileOutputStream(file); // Write out the wav file header FileUtils.writeWavHeader(wavOut, CHANNEL_MASK, SAMPLE_RATE, ENCODING); // Avoiding loop allocations byte[] buffer = new byte[BUFFER_SIZE]; boolean run = true; int read; long total = 0; // Let's go startTime = SystemClock.elapsedRealtime(); audioRecord.startRecording(); while (run && isRecording) { read = audioRecord.read(buffer, 0, buffer.length); // WAVs cannot be > 4 GB due to the use of 32 bit unsigned integers. if (total + read > 4294967295L) { // Write as many bytes as we can before hitting the max size for (int i = 0; i < read && total <= 4294967295L; i++, total++) { wavOut.write(buffer[i]); } run = false; } else { // Write out the entire read buffer wavOut.write(buffer, 0, read); total += read; } } } catch (IOException ex) { singleSubscriber.onError(ex); } finally { if (audioRecord != null) { try { if (audioRecord.getRecordingState() == AudioRecord.RECORDSTATE_RECORDING) { audioRecord.stop(); endTime = SystemClock.elapsedRealtime(); } } catch (IllegalStateException ex) { singleSubscriber.onError(ex); } if (audioRecord.getState() == AudioRecord.STATE_INITIALIZED) { audioRecord.release(); } } if (wavOut != null) { try { wavOut.close(); } catch (IOException ex) { singleSubscriber.onError(ex); } } } try { // This is not put in the try/catch/finally above since it needs to run // after we close the FileOutputStream FileUtils.updateWavHeader(file); } catch (IOException ex) { singleSubscriber.onError(ex); } singleSubscriber.onSuccess(new Long[]{file.length(), endTime - startTime}); }); } private File getNewRecordingFile() { mWavFile = new File(Environment.getExternalStorageDirectory().getPath(), "recording_" + System.currentTimeMillis() / 1000 + ".wav"); return mWavFile; } } |
FileUtils.java
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
package com.funckyhacker.util; import android.media.AudioFormat; import java.io.File; import java.io.IOException; import java.io.OutputStream; import java.io.RandomAccessFile; import java.nio.ByteBuffer; import java.nio.ByteOrder; public class FileUtils { /** * Writes the proper 44-byte RIFF/WAVE header to/for the given stream * Two size fields are left empty/null since we do not yet know the final stream size * * @param out The stream to write the header to * @param channelMask An AudioFormat.CHANNEL_* mask * @param sampleRate The sample rate in hertz * @param encoding An AudioFormat.ENCODING_PCM_* value * @throws IOException */ public static void writeWavHeader(OutputStream out, int channelMask, int sampleRate, int encoding) throws IOException { short channels; switch (channelMask) { case AudioFormat.CHANNEL_IN_MONO: channels = 1; break; case AudioFormat.CHANNEL_IN_STEREO: channels = 2; break; default: throw new IllegalArgumentException("Unacceptable channel mask"); } short bitDepth; switch (encoding) { case AudioFormat.ENCODING_PCM_8BIT: bitDepth = 8; break; case AudioFormat.ENCODING_PCM_16BIT: bitDepth = 16; break; case AudioFormat.ENCODING_PCM_FLOAT: bitDepth = 32; break; default: throw new IllegalArgumentException("Unacceptable encoding"); } writeWavHeader(out, channels, sampleRate, bitDepth); } /** * Writes the proper 44-byte RIFF/WAVE header to/for the given stream * Two size fields are left empty/null since we do not yet know the final stream size * * @param out The stream to write the header to * @param channels The number of channels * @param sampleRate The sample rate in hertz * @param bitDepth The bit depth * @throws IOException */ private static void writeWavHeader(OutputStream out, short channels, int sampleRate, short bitDepth) throws IOException { // Convert the multi-byte integers to raw bytes in little endian format as required by the spec byte[] littleBytes = ByteBuffer.allocate(14) .order(ByteOrder.LITTLE_ENDIAN) .putShort(channels) .putInt(sampleRate) .putInt(sampleRate * channels * (bitDepth / 8)) .putShort((short) (channels * (bitDepth / 8))) .putShort(bitDepth) .array(); // Not necessarily the best, but it's very easy to visualize this way out.write(new byte[] { // RIFF header 'R', 'I', 'F', 'F', // ChunkID 0, 0, 0, 0, // ChunkSize (must be updated later) 'W', 'A', 'V', 'E', // Format // fmt subchunk 'f', 'm', 't', ' ', // Subchunk1ID 16, 0, 0, 0, // Subchunk1Size 1, 0, // AudioFormat littleBytes[0], littleBytes[1], // NumChannels littleBytes[2], littleBytes[3], littleBytes[4], littleBytes[5], // SampleRate littleBytes[6], littleBytes[7], littleBytes[8], littleBytes[9], // ByteRate littleBytes[10], littleBytes[11], // BlockAlign littleBytes[12], littleBytes[13], // BitsPerSample // data subchunk 'd', 'a', 't', 'a', // Subchunk2ID 0, 0, 0, 0, // Subchunk2Size (must be updated later) }); } /** * Updates the given wav file's header to include the final chunk sizes * * @param wav The wav file to update * @throws IOException */ public static void updateWavHeader(File wav) throws IOException { byte[] sizes = ByteBuffer.allocate(8).order(ByteOrder.LITTLE_ENDIAN) // There are probably a bunch of different/better ways to calculate // these two given your circumstances. Cast should be safe since if the WAV is // > 4 GB we've already made a terrible mistake. .putInt((int) (wav.length() - 8)) // ChunkSize .putInt((int) (wav.length() - 44)) // Subchunk2Size .array(); RandomAccessFile accessWave = null; //noinspection CaughtExceptionImmediatelyRethrown try { accessWave = new RandomAccessFile(wav, "rw"); // ChunkSize accessWave.seek(4); accessWave.write(sizes, 0, 4); // Subchunk2Size accessWave.seek(40); accessWave.write(sizes, 4, 4); } catch (IOException ex) { // Rethrow but we still close accessWave in our finally throw ex; } finally { if (accessWave != null) { try { accessWave.close(); } catch (IOException ex) { // } } } } } |