Skip to content

Commit

Permalink
Merge pull request #236 from BinaryStudioAcademy/task/OV-230-replace-…
Browse files Browse the repository at this point in the history
…mocked-script-audio-with-tts

OV-230: Replace mocked script audio with text-to-speech
  • Loading branch information
nikita-remeslov authored Sep 11, 2024
2 parents ee0bce6 + 18fe37a commit c09285e
Show file tree
Hide file tree
Showing 19 changed files with 191 additions and 57 deletions.
4 changes: 4 additions & 0 deletions backend/src/bundles/speech/speech.controller.ts
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,8 @@ class SpeechController extends BaseController {
* type: object
* required: [text, voiceName]
* properties:
* scriptId:
* type: string
* text:
* type: string
* voiceName:
Expand All @@ -117,6 +119,8 @@ class SpeechController extends BaseController {
* schema:
* type: object
* properties:
* scriptId:
* type: string
* audioUrl:
* type: string
*/
Expand Down
3 changes: 2 additions & 1 deletion backend/src/common/services/azure-ai/azure-ai.service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ class AzureAIService {
}

public async textToSpeech({
scriptId,
text,
voiceName,
}: GenerateSpeechRequestDto): Promise<GenerateSpeechResponseDto> {
Expand All @@ -106,7 +107,7 @@ class AzureAIService {
await this.fileService.uploadFile(audioBuffer, audioFileName);
const audioUrl = this.fileService.getCloudFrontFileUrl(audioFileName);

return { audioUrl };
return { scriptId, audioUrl };
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,15 @@ import { AudioEvent } from './enums/enums.js';
type Properties = {
isPlaying: boolean;
audioUrl: string;
handleAudioEnd: () => void;
handleSetDuration: (duration: number) => void;
onAudioEnd: () => void;
onSetDuration: (duration: number) => void;
};

const AudioPlayer: React.FC<Properties> = ({
isPlaying,
audioUrl,
handleAudioEnd,
handleSetDuration,
onAudioEnd,
onSetDuration,
}) => {
const playerReference = useRef<PlayerRef>(null);

Expand All @@ -39,22 +39,22 @@ const AudioPlayer: React.FC<Properties> = ({
getAudioData(audioUrl)
.then(({ durationInSeconds }) => {
setDurationInFrames(Math.round(durationInSeconds * FPS));
handleSetDuration(durationInSeconds);
onSetDuration(durationInSeconds);
})
.catch(() => {
setDurationInFrames(1);
});
}, [audioUrl, handleSetDuration]);
}, [audioUrl, onSetDuration]);

useEffect(() => {
const player = playerReference.current;

player?.addEventListener(AudioEvent.ENDED, handleAudioEnd);
player?.addEventListener(AudioEvent.ENDED, onAudioEnd);

return () => {
player?.removeEventListener(AudioEvent.ENDED, handleAudioEnd);
player?.removeEventListener(AudioEvent.ENDED, onAudioEnd);
};
}, [handleAudioEnd, playerReference]);
}, [onAudioEnd, playerReference]);

return (
<LibraryPlayer
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,10 @@ import { AvatarCard } from './components/components.js';
const AvatarsContent: React.FC = () => {
const dispatch = useAppDispatch();

const { items: avatars, dataStatus } = useAppSelector(
({ studio }) => studio.avatars,
);
const { avatars, dataStatus } = useAppSelector(({ studio }) => ({
avatars: studio.avatars,
dataStatus: studio.dataStatus,
}));

useEffect(() => {
void dispatch(studioActions.loadAvatars());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,25 +12,27 @@ import {
import {
useAppDispatch,
useCallback,
useEffect,
useMemo,
useState,
} from '~/bundles/common/hooks/hooks.js';
import { IconName } from '~/bundles/common/icons/icons.js';
import { AudioPlayer } from '~/bundles/studio/components/audio-player/audio-player.js';
import { PlayIconNames } from '~/bundles/studio/enums/play-icon-names.enum.js';
import { actions as studioActions } from '~/bundles/studio/store/studio.js';
import { type Script as ScriptT } from '~/bundles/studio/types/types.js';

// TODO: remove mocked url when script audioUrl will be taken from text-to-speech
const audioUrl = 'https://d2tm5q3cg1nlwf.cloudfront.net/tts_1725818217391.wav';

type Properties = ScriptT;

const Script: React.FC<Properties> = ({ id, text, url }) => {
const Script: React.FC<Properties> = ({
id,
text,
voiceName,
url,
iconName,
}) => {
const dispatch = useAppDispatch();

const [isPlaying, setIsPlaying] = useState(false);
const [isAudioLoading, setIsAudioLoading] = useState(false);

const handleDeleteScript = useCallback((): void => {
void dispatch(studioActions.deleteScript(id));
Expand Down Expand Up @@ -62,31 +64,26 @@ const Script: React.FC<Properties> = ({ id, text, url }) => {
return;
}

setIsAudioLoading(true);

//TODO: replace with fetching real script audioUrl
setTimeout(() => {
void dispatch(studioActions.editScript({ id, url: audioUrl }));
}, 1000);
}, [dispatch, id, url]);
void dispatch(
studioActions.generateScriptSpeech({
scriptId: id,
text,
voiceName,
}),
);
}, [dispatch, id, text, url, voiceName]);

const handleAudioEnd = useCallback((): void => {
setIsPlaying(false);
}, []);

useEffect(() => {
if (url) {
setIsAudioLoading(false);
}
}, [url]);

const iconComponent = useMemo(() => {
if (isAudioLoading) {
if (iconName === PlayIconNames.LOADING) {
return Spinner;
}

return isPlaying ? IconName.STOP : IconName.PLAY;
}, [isAudioLoading, isPlaying]);
}, [iconName, isPlaying]);

return (
<VStack w="full">
Expand Down Expand Up @@ -147,8 +144,8 @@ const Script: React.FC<Properties> = ({ id, text, url }) => {
<AudioPlayer
isPlaying={isPlaying}
audioUrl={url}
handleAudioEnd={handleAudioEnd}
handleSetDuration={handleSetScriptDuration}
onAudioEnd={handleAudioEnd}
onSetDuration={handleSetScriptDuration}
/>
)}
</VStack>
Expand Down
3 changes: 2 additions & 1 deletion frontend/src/bundles/studio/enums/enums.ts
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
export { PlayIconNames } from './play-icon-names.enum.js';
export { RowNames } from './row-names.enum.js';
export { AvatarsApiPath } from 'shared';
export { AvatarsApiPath, SpeechApiPath } from 'shared';
6 changes: 6 additions & 0 deletions frontend/src/bundles/studio/enums/play-icon-names.enum.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
const PlayIconNames = {
LOADING: 'loading',
READY: 'ready',
} as const;

export { PlayIconNames };
40 changes: 40 additions & 0 deletions frontend/src/bundles/studio/speech-api.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import { ApiPath, ContentType } from '~/bundles/common/enums/enums.js';
import {
type GenerateSpeechRequestDto,
type GenerateSpeechResponseDto,
} from '~/bundles/studio/types/types.js';
import { type Http, HTTPMethod } from '~/framework/http/http.js';
import { BaseHttpApi } from '~/framework/http-api/http-api.js';
import { type Storage } from '~/framework/storage/storage.js';

import { SpeechApiPath } from './enums/enums.js';

type Constructor = {
baseUrl: string;
http: Http;
storage: Storage;
};

class SpeechApi extends BaseHttpApi {
public constructor({ baseUrl, http, storage }: Constructor) {
super({ path: ApiPath.SPEECH, baseUrl, http, storage });
}

public async generateScriptSpeech(
payload: GenerateSpeechRequestDto,
): Promise<GenerateSpeechResponseDto> {
const response = await this.load(
this.getFullEndpoint(SpeechApiPath.GENERATE, {}),
{
method: HTTPMethod.POST,
contentType: ContentType.JSON,
payload: JSON.stringify(payload),
hasAuth: true,
},
);

return await response.json<GenerateSpeechResponseDto>();
}
}

export { SpeechApi };
18 changes: 16 additions & 2 deletions frontend/src/bundles/studio/store/actions.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
import { createAsyncThunk } from '@reduxjs/toolkit';

import { type AsyncThunkConfig } from '~/bundles/common/types/types.js';
import { type AvatarGetAllResponseDto } from '~/bundles/studio/types/types.js';
import {
type AvatarGetAllResponseDto,
type GenerateSpeechRequestDto,
type GenerateSpeechResponseDto,
} from '~/bundles/studio/types/types.js';

import { name as sliceName } from './slice.js';

Expand All @@ -15,4 +19,14 @@ const loadAvatars = createAsyncThunk<
return avatarsApi.loadAvatars();
});

export { loadAvatars };
const generateScriptSpeech = createAsyncThunk<
GenerateSpeechResponseDto,
GenerateSpeechRequestDto,
AsyncThunkConfig
>(`${sliceName}/generate-script-speech`, (payload, { extra }) => {
const { speechApi } = extra;

return speechApi.generateScriptSpeech(payload);
});

export { generateScriptSpeech, loadAvatars };
68 changes: 53 additions & 15 deletions frontend/src/bundles/studio/store/slice.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ import {
MIN_SCRIPT_DURATION,
} from '~/bundles/studio/constants/constants.js';

import { RowNames } from '../enums/enums.js';
import { PlayIconNames, RowNames } from '../enums/enums.js';
import {
getDestinationPointerValue,
getNewItemIndexBySpan,
Expand All @@ -29,7 +29,7 @@ import {
type Script,
type TimelineItemWithSpan,
} from '../types/types.js';
import { loadAvatars } from './actions.js';
import { generateScriptSpeech, loadAvatars } from './actions.js';

type SelectedItem = {
id: string;
Expand All @@ -45,15 +45,17 @@ type DestinationPointerActionPayload = ItemActionPayload & {
type: RowType;
};

// TODO: remove when we will have voices in store
const defaultVoiceName = 'en-US-BrianMultilingualNeural';

type State = {
dataStatus: ValueOf<typeof DataStatus>;
avatars: Array<AvatarGetResponseDto> | [];
player: {
isPlaying: boolean;
elapsedTime: number; // ms
};
avatars: {
dataStatus: ValueOf<typeof DataStatus>;
items: Array<AvatarGetResponseDto> | [];
};

scenes: Array<Scene>;
scripts: Array<Script>;
videoSize: VideoPreviewT;
Expand All @@ -64,14 +66,12 @@ type State = {
};

const initialState: State = {
dataStatus: DataStatus.IDLE,
avatars: [],
player: {
isPlaying: false,
elapsedTime: 0,
},
avatars: {
dataStatus: DataStatus.IDLE,
items: [],
},
scenes: [{ id: uuidv4(), duration: MIN_SCENE_DURATION }],
scripts: [],
videoSize: VideoPreview.LANDSCAPE,
Expand All @@ -90,6 +90,8 @@ const { reducer, actions, name } = createSlice({
id: uuidv4(),
duration: MIN_SCRIPT_DURATION,
text: action.payload,
voiceName: defaultVoiceName,
iconName: PlayIconNames.READY,
};

state.scripts.push(script);
Expand Down Expand Up @@ -245,15 +247,51 @@ const { reducer, actions, name } = createSlice({
},
extraReducers(builder) {
builder.addCase(loadAvatars.pending, (state) => {
state.avatars.dataStatus = DataStatus.PENDING;
state.dataStatus = DataStatus.PENDING;
});
builder.addCase(loadAvatars.fulfilled, (state, action) => {
state.avatars.items = action.payload.items;
state.avatars.dataStatus = DataStatus.FULFILLED;
state.avatars = action.payload.items;
state.dataStatus = DataStatus.FULFILLED;
});
builder.addCase(loadAvatars.rejected, (state) => {
state.avatars.items = [];
state.avatars.dataStatus = DataStatus.REJECTED;
state.avatars = [];
state.dataStatus = DataStatus.REJECTED;
});
builder.addCase(generateScriptSpeech.pending, (state, action) => {
const { scriptId } = action.meta.arg;

state.scripts = state.scripts.map((script) =>
script.id === scriptId
? { ...script, iconName: PlayIconNames.LOADING }
: script,
);
state.dataStatus = DataStatus.PENDING;
});
builder.addCase(generateScriptSpeech.fulfilled, (state, action) => {
const { scriptId, audioUrl } = action.payload;

state.scripts = state.scripts.map((script) => {
if (script.id !== scriptId) {
return script;
}

return {
...script,
url: audioUrl,
iconName: PlayIconNames.READY,
};
});
state.dataStatus = DataStatus.FULFILLED;
});
builder.addCase(generateScriptSpeech.rejected, (state, action) => {
const { scriptId } = action.meta.arg;

state.scripts = state.scripts.map((script) =>
script.id === scriptId
? { ...script, iconName: PlayIconNames.READY }
: script,
);
state.dataStatus = DataStatus.REJECTED;
});
},
});
Expand Down
Loading

0 comments on commit c09285e

Please sign in to comment.