Add latest updates.

cartesia-ai · Nov 27, 2024 · 22c9758 · 22c9758
1 parent 7fc96a0
commit 22c9758
Show file tree

Hide file tree

Showing 29 changed files with 205 additions and 148 deletions.
diff --git a/.fernignore b/.fernignore
@@ -1 +1,5 @@
 # Specify files that shouldn't be modified by Fern
+
+src/wrapper
+src/core/websocket
+src/serialization/resources/tts/types/OutputFormat.ts
diff --git a/jest.config.js b/jest.config.js
@@ -1,5 +1,15 @@
 /** @type {import('jest').Config} */
 module.exports = {
-    preset: "ts-jest",
-    testEnvironment: "node",
-};
+    preset: 'ts-jest/presets/default-esm',
+    testEnvironment: 'node',
+    extensionsToTreatAsEsm: ['.ts'],
+    globals: {
+      'ts-jest': {
+        useESM: true,
+      },
+    },
+    transformIgnorePatterns: [
+      '/node_modules/(?!(emittery)/)',
+    ],
+  };
+
diff --git a/reference.md b/reference.md
@@ -58,17 +58,17 @@ await client.apiStatus.get();
 
 ```typescript
 await client.tts.bytes({
-    model_id: "sonic-english",
+    modelId: "sonic-english",
     transcript: "Hello, world!",
     voice: {
         mode: "id",
         id: "694f9389-aac1-45b6-b726-9d9369183238",
     },
     language: "en",
-    output_format: {
+    outputFormat: {
         container: "mp3",
-        sample_rate: 44100,
-        bit_rate: 128000,
+        sampleRate: 44100,
+        bitRate: 128000,
     },
 });
 ```
@@ -119,18 +119,18 @@ await client.tts.bytes({
 
 ```typescript
 const response = await client.tts.sse({
-    model_id: "string",
+    modelId: "string",
     transcript: "string",
     voice: {
         mode: "id",
         id: "string",
-        __experimental_controls: {
+        experimentalControls: {
             speed: 1.1,
             emotion: "anger:lowest",
         },
     },
     language: "en",
-    output_format: {
+    outputFormat: {
         container: "raw",
     },
     duration: 1.1,
@@ -205,10 +205,10 @@ This endpoint is priced at 15 characters per second of input audio.
 
 ```typescript
 await client.voiceChanger.bytes(fs.createReadStream("/path/to/your/file"), {
-    "voice[id]": "694f9389-aac1-45b6-b726-9d9369183238",
-    "output_format[container]": "mp3",
-    "output_format[sample_rate]": 44100,
-    "output_format[bit_rate]": 128000,
+    voiceId: "694f9389-aac1-45b6-b726-9d9369183238",
+    outputFormatContainer: "mp3",
+    outputFormatSampleRate: 44100,
+    outputFormatBitRate: 128000,
 });
 ```
 
@@ -266,10 +266,10 @@ await client.voiceChanger.bytes(fs.createReadStream("/path/to/your/file"), {
 
 ```typescript
 const response = await client.voiceChanger.sse(fs.createReadStream("/path/to/your/file"), {
-    "voice[id]": "694f9389-aac1-45b6-b726-9d9369183238",
-    "output_format[container]": "mp3",
-    "output_format[sample_rate]": 44100,
-    "output_format[bit_rate]": 128000,
+    voiceId: "694f9389-aac1-45b6-b726-9d9369183238",
+    outputFormatContainer: "mp3",
+    outputFormatSampleRate: 44100,
+    outputFormatBitRate: 128000,
 });
 for await (const item of response) {
     console.log(item);
@@ -383,7 +383,7 @@ await client.voices.create({
         1, 1, 1, 1, 1, 1, 1,
     ],
     language: "en",
-    base_voice_id: "string",
+    baseVoiceId: "string",
 });
 ```
 
@@ -597,7 +597,7 @@ await client.voices.localize({
         1, 1, 1, 1, 1, 1, 1,
     ],
     language: "en",
-    original_speaker_gender: "male",
+    originalSpeakerGender: "male",
     dialect: "au",
 });
 ```

diff --git a/src/api/resources/apiStatus/client/Client.ts b/src/api/resources/apiStatus/client/Client.ts
@@ -5,6 +5,7 @@
 import * as environments from "../../../../environments";
 import * as core from "../../../../core";
 import * as Cartesia from "../../../index";
+import * as serializers from "../../../../serialization/index";
 import * as errors from "../../../../errors/index";
 
 export declare namespace ApiStatus {
@@ -58,7 +59,13 @@ export class ApiStatus {
             abortSignal: requestOptions?.abortSignal,
         });
         if (_response.ok) {
-            return _response.body as Cartesia.ApiInfo;
+            return serializers.ApiInfo.parseOrThrow(_response.body, {
+                unrecognizedObjectKeys: "passthrough",
+                allowUnrecognizedUnionMembers: true,
+                allowUnrecognizedEnumValues: true,
+                skipValidation: true,
+                breadcrumbsPrefix: ["response"],
+            });
         }
 
         if (_response.error.reason === "status-code") {

diff --git a/src/api/resources/tts/client/Client.ts b/src/api/resources/tts/client/Client.ts
@@ -6,9 +6,9 @@ import * as environments from "../../../../environments";
 import * as core from "../../../../core";
 import * as Cartesia from "../../../index";
 import * as stream from "stream";
+import * as serializers from "../../../../serialization/index";
 import urlJoin from "url-join";
 import * as errors from "../../../../errors/index";
-import * as serializers from "../../../../serialization/index";
 
 export declare namespace Tts {
     interface Options {
@@ -53,7 +53,7 @@ export class Tts {
             },
             contentType: "application/json",
             requestType: "json",
-            body: request,
+            body: serializers.TtsRequest.jsonOrThrow(request, { unrecognizedObjectKeys: "strip" }),
             responseType: "streaming",
             timeoutMs: requestOptions?.timeoutInSeconds != null ? requestOptions.timeoutInSeconds * 1000 : 60000,
             maxRetries: requestOptions?.maxRetries,
@@ -107,7 +107,7 @@ export class Tts {
             },
             contentType: "application/json",
             requestType: "json",
-            body: request,
+            body: serializers.TtsRequest.jsonOrThrow(request, { unrecognizedObjectKeys: "strip" }),
             responseType: "sse",
             timeoutMs: requestOptions?.timeoutInSeconds != null ? requestOptions.timeoutInSeconds * 1000 : 60000,
             maxRetries: requestOptions?.maxRetries,

diff --git a/src/api/resources/tts/types/CancelContextRequest.ts b/src/api/resources/tts/types/CancelContextRequest.ts
@@ -6,7 +6,7 @@ import * as Cartesia from "../../../index";
 
 export interface CancelContextRequest {
     /** The ID of the context to cancel. */
-    context_id: Cartesia.ContextId;
+    contextId: Cartesia.ContextId;
     /** Whether to cancel the context, so that no more messages are generated for that context. */
     cancel: true;
 }
diff --git a/src/api/resources/tts/types/GenerationRequest.ts b/src/api/resources/tts/types/GenerationRequest.ts
@@ -6,22 +6,22 @@ import * as Cartesia from "../../../index";
 
 export interface GenerationRequest {
     /** The ID of the model to use for the generation. See [Models](/build-with-sonic/models) for available models. */
-    model_id: string;
+    modelId: string;
     transcript: string;
     voice: Cartesia.TtsRequestVoiceSpecifier;
     language?: Cartesia.SupportedLanguage;
-    output_format: Cartesia.WebSocketRawOutputFormat;
+    outputFormat: Cartesia.WebSocketRawOutputFormat;
     /**
      * The maximum duration of the audio in seconds. You do not usually need to specify this.
      * If the duration is not appropriate for the length of the transcript, the output audio may be truncated.
      */
     duration?: number;
-    context_id: Cartesia.ContextId;
+    contextId: Cartesia.ContextId;
     /**
      * Whether this input may be followed by more inputs.
      * If not specified, this defaults to `false`.
      */
     continue?: boolean;
     /** Whether to return word-level timestamps. */
-    add_timestamps?: boolean;
+    addTimestamps?: boolean;
 }
diff --git a/src/api/resources/tts/types/Mp3OutputFormat.ts b/src/api/resources/tts/types/Mp3OutputFormat.ts
@@ -3,6 +3,6 @@
  */
 
 export interface Mp3OutputFormat {
-    sample_rate: number;
-    bit_rate: number;
+    sampleRate: number;
+    bitRate: number;
 }
diff --git a/src/api/resources/tts/types/RawOutputFormat.ts b/src/api/resources/tts/types/RawOutputFormat.ts
@@ -6,5 +6,5 @@ import * as Cartesia from "../../../index";
 
 export interface RawOutputFormat {
     encoding: Cartesia.RawEncoding;
-    sample_rate: number;
+    sampleRate: number;
 }
diff --git a/src/api/resources/tts/types/TtsRequest.ts b/src/api/resources/tts/types/TtsRequest.ts
@@ -6,11 +6,11 @@ import * as Cartesia from "../../../index";
 
 export interface TtsRequest {
     /** The ID of the model to use for the generation. See [Models](/build-with-sonic/models) for available models. */
-    model_id: string;
+    modelId: string;
     transcript: string;
     voice: Cartesia.TtsRequestVoiceSpecifier;
     language?: Cartesia.SupportedLanguage;
-    output_format: Cartesia.OutputFormat;
+    outputFormat: Cartesia.OutputFormat;
     /**
      * The maximum duration of the audio in seconds. You do not usually need to specify this.
      * If the duration is not appropriate for the length of the transcript, the output audio may be truncated.

diff --git a/src/api/resources/tts/types/TtsRequestEmbeddingSpecifier.ts b/src/api/resources/tts/types/TtsRequestEmbeddingSpecifier.ts
@@ -7,5 +7,5 @@ import * as Cartesia from "../../../index";
 export interface TtsRequestEmbeddingSpecifier {
     mode: "embedding";
     embedding: Cartesia.Embedding;
-    __experimental_controls?: Cartesia.Controls;
+    experimentalControls?: Cartesia.Controls;
 }
diff --git a/src/api/resources/tts/types/TtsRequestIdSpecifier.ts b/src/api/resources/tts/types/TtsRequestIdSpecifier.ts
@@ -7,5 +7,5 @@ import * as Cartesia from "../../../index";
 export interface TtsRequestIdSpecifier {
     mode: "id";
     id: Cartesia.VoiceId;
-    __experimental_controls?: Cartesia.Controls;
+    experimentalControls?: Cartesia.Controls;
 }
diff --git a/src/api/resources/tts/types/WebSocketBaseResponse.ts b/src/api/resources/tts/types/WebSocketBaseResponse.ts
@@ -5,7 +5,7 @@
 import * as Cartesia from "../../../index";
 
 export interface WebSocketBaseResponse {
-    context_id?: Cartesia.ContextId;
-    status_code: number;
+    contextId?: Cartesia.ContextId;
+    statusCode: number;
     done: boolean;
 }
diff --git a/src/api/resources/tts/types/WebSocketChunkResponse.ts b/src/api/resources/tts/types/WebSocketChunkResponse.ts
@@ -6,5 +6,5 @@ import * as Cartesia from "../../../index";
 
 export interface WebSocketChunkResponse extends Cartesia.WebSocketBaseResponse {
     data: string;
-    step_time: number;
+    stepTime: number;
 }
diff --git a/src/api/resources/tts/types/WebSocketRawOutputFormat.ts b/src/api/resources/tts/types/WebSocketRawOutputFormat.ts
@@ -7,5 +7,5 @@ import * as Cartesia from "../../../index";
 export interface WebSocketRawOutputFormat {
     container: "raw";
     encoding: Cartesia.RawEncoding;
-    sample_rate: number;
+    sampleRate: number;
 }
diff --git a/src/api/resources/tts/types/WebSocketResponse.ts b/src/api/resources/tts/types/WebSocketResponse.ts
@@ -7,7 +7,7 @@ import * as Cartesia from "../../../index";
 export type WebSocketResponse =
     | Cartesia.WebSocketResponse.Chunk
     | Cartesia.WebSocketResponse.Done
-    | Cartesia.WebSocketResponse.Timestamp
+    | Cartesia.WebSocketResponse.Timestamps
     | Cartesia.WebSocketResponse.Error_;
 
 export declare namespace WebSocketResponse {
@@ -19,8 +19,8 @@ export declare namespace WebSocketResponse {
         type: "done";
     }
 
-    interface Timestamp extends Cartesia.WebSocketTimestampResponse {
-        type: "timestamp";
+    interface Timestamps extends Cartesia.WebSocketTimestampsResponse {
+        type: "timestamps";
     }
 
     interface Error_ extends Cartesia.WebSocketErrorResponse {

diff --git a/src/api/resources/tts/types/WebSocketTimestampResponse.ts b/src/api/resources/tts/types/WebSocketTimestampResponse.ts
diff --git a/src/api/resources/tts/types/WebSocketTtsOutput.ts b/src/api/resources/tts/types/WebSocketTtsOutput.ts
@@ -5,7 +5,7 @@
 import * as Cartesia from "../../../index";
 
 export interface WebSocketTtsOutput {
-    word_timestamps?: Cartesia.WordTimestamps;
+    wordTimestamps?: Cartesia.WordTimestamps;
     audio?: unknown;
-    context_id?: Cartesia.ContextId;
+    contextId?: Cartesia.ContextId;
 }
diff --git a/src/api/resources/tts/types/WebSocketTtsRequest.ts b/src/api/resources/tts/types/WebSocketTtsRequest.ts
@@ -6,12 +6,12 @@ import * as Cartesia from "../../../index";
 
 export interface WebSocketTtsRequest {
     /** The ID of the model to use for the generation. See [Models](/build-with-sonic/models) for available models. */
-    model_id: string;
-    output_format: Cartesia.OutputFormat;
+    modelId: string;
+    outputFormat?: Cartesia.OutputFormat;
     transcript?: string;
     voice: Cartesia.TtsRequestVoiceSpecifier;
     duration?: number;
     language?: string;
-    add_timestamps: boolean;
-    context_id?: string;
+    addTimestamps?: boolean;
+    contextId?: string;
 }
diff --git a/src/api/resources/tts/types/index.ts b/src/api/resources/tts/types/index.ts
@@ -3,8 +3,9 @@ export * from "./WebSocketBaseResponse";
 export * from "./WebSocketResponse";
 export * from "./WebSocketErrorResponse";
 export * from "./WebSocketChunkResponse";
-export * from "./WebSocketTimestampResponse";
+export * from "./WebSocketTimestampsResponse";
 export * from "./WebSocketTtsOutput";
+export * from "./WebSocketStreamOptions";
 export * from "./WordTimestamps";
 export * from "./WebSocketDoneResponse";
 export * from "./CancelContextRequest";

diff --git a/src/api/resources/voiceChanger/client/Client.ts b/src/api/resources/voiceChanger/client/Client.ts
@@ -48,15 +48,15 @@ export class VoiceChanger {
     ): Promise<stream.Readable> {
         const _request = await core.newFormData();
         await _request.appendFile("clip", clip);
-        await _request.append("voice[id]", request.voice[id]);
-        await _request.append("output_format[container]", request.output_format[container]);
-        await _request.append("output_format[sample_rate]", request.output_format[sample_rate].toString());
-        if (request.output_format[encoding] != null) {
-            await _request.append("output_format[encoding]", request.output_format[encoding]);
+        await _request.append("voice[id]", request.voiceId);
+        await _request.append("output_format[container]", request.outputFormatContainer);
+        await _request.append("output_format[sample_rate]", request.outputFormatSampleRate.toString());
+        if (request.outputFormatEncoding != null) {
+            await _request.append("output_format[encoding]", request.outputFormatEncoding);
         }
 
-        if (request.output_format[bit_rate] != null) {
-            await _request.append("output_format[bit_rate]", request.output_format[bit_rate].toString());
+        if (request.outputFormatBitRate != null) {
+            await _request.append("output_format[bit_rate]", request.outputFormatBitRate.toString());
         }
 
         const _maybeEncodedRequest = await _request.getRequest();
@@ -118,15 +118,15 @@ export class VoiceChanger {
     ): Promise<core.Stream<Cartesia.StreamingResponse>> {
         const _request = await core.newFormData();
         await _request.appendFile("clip", clip);
-        await _request.append("voice[id]", request.voice[id]);
-        await _request.append("output_format[container]", request.output_format[container]);
-        await _request.append("output_format[sample_rate]", request.output_format[sample_rate].toString());
-        if (request.output_format[encoding] != null) {
-            await _request.append("output_format[encoding]", request.output_format[encoding]);
+        await _request.append("voice[id]", request.voiceId);
+        await _request.append("output_format[container]", request.outputFormatContainer);
+        await _request.append("output_format[sample_rate]", request.outputFormatSampleRate.toString());
+        if (request.outputFormatEncoding != null) {
+            await _request.append("output_format[encoding]", request.outputFormatEncoding);
         }
 
-        if (request.output_format[bit_rate] != null) {
-            await _request.append("output_format[bit_rate]", request.output_format[bit_rate].toString());
+        if (request.outputFormatBitRate != null) {
+            await _request.append("output_format[bit_rate]", request.outputFormatBitRate.toString());
         }
 
         const _maybeEncodedRequest = await _request.getRequest();