- 03-5820-1777平日10:00〜18:00
- お問い合わせ
今回はChatGPTに音声で質問する簡単なアプリケーションをJava言語で作成してみたいと思います。
テキストから音声への変換、音声からテキストへの変換はGoogle Cloud Speechを使用し、ChatGPTへの質問はOpenAIのAPIを使用します。
GOOGLE_APPLICATION_CREDENTIALS
を設定。<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.example</groupId>
<artifactId>app</artifactId>
<version>1.0-SNAPSHOT</version>
<dependencyManagement>
<dependencies>
<dependency>
<groupId>com.google.cloud</groupId>
<artifactId>libraries-bom</artifactId>
<version>26.19.0</version>
<type>pom</type>
<scope>import</scope>
</dependency>
</dependencies>
</dependencyManagement>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
</properties>
<dependencies>
<!-- Google Cloud Speech-to-Text client library -->
<dependency>
<groupId>com.google.cloud</groupId>
<artifactId>google-cloud-speech</artifactId>
</dependency>
<dependency>
<groupId>com.google.cloud</groupId>
<artifactId>google-cloud-texttospeech</artifactId>
</dependency>
<dependency>
<groupId>com.google.code.gson</groupId>
<artifactId>gson</artifactId>
<version>2.8.8</version>
</dependency>
<!-- Apache HttpComponents for HTTP requests -->
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.5.13</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>3.2.4</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<filters>
<filter>
<artifact>*:*</artifact>
<excludes>
<exclude>META-INF/*.SF</exclude>
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*.RSA</exclude>
</excludes>
</filter>
</filters>
<transformers>
<transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
<mainClass>com.example.App</mainClass>
</transformer>
</transformers>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
package com.example;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.Scanner;
import com.google.cloud.speech.v1.*;
import com.google.protobuf.ByteString;
import com.google.gson.JsonArray;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import org.apache.http.HttpHeaders;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import java.util.*;
import com.google.cloud.texttospeech.v1.*;
import com.google.protobuf.ByteString;
import javax.sound.sampled.*;
import java.io.ByteArrayInputStream;
import java.io.InputStream;
public class App {
private static final String API_URL = "https://api.openai.com/v1/chat/completions"; //OpenAI APIのURLを指定
private static final String API_KEY = "sk-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"; // OpenAI APIキーをセット
public static void main(String[] args) throws Exception {
final int SAMPLE_RATE = 16000;
final int SAMPLE_SIZE = 16;
final int CHANNELS = 1;
final boolean SIGNED = true;
final boolean BIG_ENDIAN = false;
//マイクから音声取得するための設定
AudioFormat format = new AudioFormat(SAMPLE_RATE, SAMPLE_SIZE, CHANNELS, SIGNED, BIG_ENDIAN);
DataLine.Info info = new DataLine.Info(TargetDataLine.class, format);
if (!AudioSystem.isLineSupported(info)) {
System.out.println("Line not supported");
System.exit(0);
}
JsonArray messageHistoryArray = new JsonArray();
while( true ){
//マイクから音声取得するための設定
TargetDataLine microphone = (TargetDataLine) AudioSystem.getLine(info);
microphone.open(format);
microphone.start();
ByteArrayOutputStream outStream = new ByteArrayOutputStream();
//マイクの情報を取得し、Streamに格納するスレッド
Thread recordingThread = new Thread(() -> {
while (true) {
byte[] buffer = new byte[4096];
int bytesRead = microphone.read(buffer, 0, buffer.length);
outStream.write(buffer, 0, bytesRead);
}
});
recordingThread.start();
//Enterが押されたら、マイクを閉じてGoogle CloudのSpeechToTextにテキストへの変換依頼をする
Scanner scanner = new Scanner(System.in);
System.out.println("質問をしてEnterボタンを押下して下さい。");
String line = scanner.nextLine(); // Wait for Enter key press
microphone.stop();
microphone.close();
if( line.equals("exit")) {;
recordingThread.stop();
break;
}
recordingThread.stop();
//Google CloudのSpeechToTextにテキストへの変換依頼
String messageStr = sendToGoogleSpeech(outStream.toByteArray());
System.out.println("-----質問-----");
System.out.println( messageStr );
System.out.println("");
System.out.println("質問中…");
//ロールとメッセージを設定。話の流れが分かるようにする為に質問の履歴も設定している。
JsonObject message = new JsonObject();
message.addProperty("role", "user");
message.addProperty("content", messageStr);
messageHistoryArray.add( message );
//ChatGPTに質問を送信し結果を取得
String response = requestCompletion(messageHistoryArray);
System.out.println("-----返信-----");
System.out.println( response);
System.out.println("");
//取得した返信を音声に変換し再生する。
playText( response );
}
}
//Speech To Textのライブラリを使用して、音声データをテキストに変換
private static String sendToGoogleSpeech(byte[] audioData) throws Exception{
try (SpeechClient speechClient = SpeechClient.create()) {
ByteString audioBytes = ByteString.copyFrom(audioData);
RecognitionConfig config = RecognitionConfig.newBuilder()
.setEncoding(RecognitionConfig.AudioEncoding.LINEAR16)
.setSampleRateHertz(16000)
.setLanguageCode("ja-JP")
.build();
RecognitionAudio audio = RecognitionAudio.newBuilder()
.setContent(audioBytes)
.build();
RecognizeResponse response = speechClient.recognize(config, audio);
String allMessage = "";
for (SpeechRecognitionResult result : response.getResultsList()) {
String message = result.getAlternativesList().get(0).getTranscript();
allMessage += message;
}
return allMessage;
} catch (Exception e) {
e.printStackTrace();
throw e;
}
}
//質問事項をOpenAIのAPI経由でChatGPTに送信し、返信を取得する
public static String requestCompletion(JsonArray messages) {
try (CloseableHttpClient httpClient = HttpClients.createDefault()) {
HttpPost postRequest = new HttpPost(API_URL);
postRequest.setHeader(HttpHeaders.AUTHORIZATION, "Bearer " + API_KEY);
postRequest.setHeader(HttpHeaders.CONTENT_TYPE, "application/json");
JsonObject requestBody = new JsonObject();
requestBody.addProperty("model", "gpt-3.5-turbo");
requestBody.add("messages", messages);
StringEntity entity = new StringEntity(requestBody.toString(), "UTF-8");
postRequest.setEntity(entity);
try (CloseableHttpResponse response = httpClient.execute(postRequest)) {
String responseBody = EntityUtils.toString(response.getEntity(), "UTF-8");
JsonObject jsonResponse = JsonParser.parseString(responseBody).getAsJsonObject();
return jsonResponse.getAsJsonArray("choices").get(0).getAsJsonObject().get("message").getAsJsonObject().get("content").getAsString();
}
} catch (Exception e) {
e.printStackTrace();
return null;
}
}
//テキストをGoogle Text to Speechのライブラリを使用して音声に変換し再生する。
public static void playText(String text) throws Exception {
ByteString audioData = synthesizeText(text);
playAudio(audioData.toByteArray());
}
//テキストをGoogle Text to Speechのライブラリを使用して音声に変換する。
public static ByteString synthesizeText(String text) throws Exception {
try (TextToSpeechClient textToSpeechClient = TextToSpeechClient.create()) {
SynthesisInput input = SynthesisInput.newBuilder().setText(text).build();
VoiceSelectionParams voice = VoiceSelectionParams.newBuilder().setLanguageCode("ja-JP").build();
AudioConfig audioConfig = AudioConfig.newBuilder().setAudioEncoding(AudioEncoding.LINEAR16).build(); // LINEAR16 for javax.sound.sampled
return textToSpeechClient.synthesizeSpeech(input, voice, audioConfig).getAudioContent();
}
}
//音声データを再生する。
public static void playAudio(byte[] audioData) throws Exception {
InputStream audioStream = new ByteArrayInputStream(audioData);
AudioInputStream ais = AudioSystem.getAudioInputStream(audioStream);
AudioFormat format = ais.getFormat();
DataLine.Info info = new DataLine.Info(SourceDataLine.class, format);
SourceDataLine audioLine = (SourceDataLine) AudioSystem.getLine(info);
audioLine.open(format);
audioLine.start();
byte[] buffer = new byte[4096];
int bytesRead = -1;
while ((bytesRead = ais.read(buffer)) != -1) {
audioLine.write(buffer, 0, bytesRead);
}
audioLine.drain();
audioLine.close();
ais.close();
}
}
動画では結構スムーズに実行出来ていますが、ChatGPTからの返信が長くなると結構待たされます。
業務システムで効果的な使用方法があるかは分かりませんが、面白そうだったのでやってみました。
今度は長い返信でも、ストレスないスピード感が出せるか調べてみようと思います。