NDLANO · rauboti · Nov 27, 2024 · Nov 27, 2024 · Nov 29, 2024 · Nov 29, 2024
diff --git a/package.json b/package.json
@@ -77,6 +77,7 @@
   "dependencies": {
     "@ark-ui/react": "^4.1.2",
     "@aws-sdk/client-bedrock-runtime": "^3.670.0",
+    "@aws-sdk/client-transcribe": "^3.699.0",
     "@dnd-kit/core": "^6.0.8",
     "@dnd-kit/modifiers": "^6.0.1",
     "@dnd-kit/sortable": "^7.0.2",
@@ -100,6 +101,7 @@
     "@ndla/video-search": "^8.0.70-alpha.0",
     "@tanstack/react-query": "5.62.3",
     "auth0-js": "^9.22.1",
+    "buffer": "^6.0.3",
     "compression": "^1.7.4",
     "cross-fetch": "^3.1.5",
     "date-fns": "2.30.0",

diff --git a/src/components/LLM/helpers.ts b/src/components/LLM/helpers.ts
@@ -6,28 +6,36 @@
  *
  */
 
+import { Buffer } from "buffer";
+
 export const claudeHaikuDefaults = { top_p: 0.7, top_k: 100, temperature: 0.9 };
 
 interface modelProps {
   prompt: string;
+  image?: {
+    base64: string;
+    fileType: string;
+  };
   max_tokens?: number;
 }
 
-export const invokeModel = async ({ prompt, max_tokens = 2000, ...rest }: modelProps) => {
+export const invokeModel = async ({ prompt, image, max_tokens = 2000, ...rest }: modelProps) => {
   if (!prompt) {
     // console.error("No prompt provided to invokeModel");
     return null;
   }
+
+  const payload: any = { prompt, max_tokens, ...rest };
+  if (image) {
+    payload.image = image;
+  }
+
   const response = await fetch("/invoke-model", {
     method: "POST",
     headers: {
       "Content-Type": "application/json",
     },
-    body: JSON.stringify({
-      prompt: prompt,
-      max_tokens: max_tokens,
-      ...rest,
-    }),
+    body: JSON.stringify(payload),
   });
 
   if (!response.ok) {
@@ -48,3 +56,7 @@ export const getTextFromHTML = (html: string) => {
 const parseResponse = (response: string) => {
   return response.split("<answer>")[1].split("</answer>")[0].trim();
 };
+
+export const convertBufferToBase64 = (buffer: ArrayBuffer) => {
+  return Buffer.from(buffer).toString("base64");
+};
diff --git a/src/components/SlateEditor/RichTextEditor.tsx b/src/components/SlateEditor/RichTextEditor.tsx
@@ -140,7 +140,11 @@ const RichTextEditor = ({
 
   useEffect(() => {
     // When form is submitted or form content has been revert to a previous version, the editor has to be reinitialized.
-    if ((!submitted && prevSubmitted.current) || status === "revertVersion") {
+    if (
+      (!submitted && prevSubmitted.current) ||
+      status.status === "revertVersion" ||
+      status.status === "acceptGenerated"
+    ) {
       if (isFirstNormalize) {
         return;
       }

diff --git a/src/components/SlateEditor/plugins/image/ImageEmbedForm.tsx b/src/components/SlateEditor/plugins/image/ImageEmbedForm.tsx
@@ -7,10 +7,10 @@
  */
 
 import { Formik, useFormikContext } from "formik";
-import { useMemo } from "react";
+import { useMemo, useState } from "react";
 import { useTranslation } from "react-i18next";
 import { Descendant } from "slate";
-import { CheckLine } from "@ndla/icons";
+import { FileListLine, CheckLine } from "@ndla/icons";
 import {
   Button,
   CheckboxControl,
@@ -22,10 +22,12 @@ import {
   FieldRoot,
   FieldErrorMessage,
   FieldTextArea,
+  Spinner,
 } from "@ndla/primitives";
 import { styled } from "@ndla/styled-system/jsx";
 import { IImageMetaInformationV3DTO } from "@ndla/types-backend/image-api";
 import { ImageEmbedData } from "@ndla/types-embed";
+import { convertBufferToBase64, claudeHaikuDefaults, invokeModel } from "../../../../components/LLM/helpers";
 import { InlineField } from "../../../../containers/FormikForm/InlineField";
 import ImageEditor from "../../../../containers/ImageEditor/ImageEditor";
 import { inlineContentToEditorValue, inlineContentToHTML } from "../../../../util/articleContentConverter";
@@ -146,6 +148,12 @@ const InputWrapper = styled("div", {
   },
 });
 
+const StyledButton = styled(Button, {
+  base: {
+    alignSelf: "flex-start",
+  },
+});
+
 const EmbedForm = ({
   onClose,
   language,
@@ -156,6 +164,31 @@ const EmbedForm = ({
   const inGrid = useInGrid();
   const { values, initialValues, isValid, setFieldValue, dirty, isSubmitting } =
     useFormikContext<ImageEmbedFormValues>();
+  const [isLoading, setIsLoading] = useState<boolean>(false);
+
+  const generateAltText = async () => {
+    setIsLoading(true);
+    if (!image?.image.imageUrl) {
+      return null;
+    }
+
+    const response = await fetch(image?.image.imageUrl);
+    const responseContentType = response.headers.get("Content-Type");
+    const buffer = await response.arrayBuffer();
+    const base64 = convertBufferToBase64(buffer);
+
+    const result = await invokeModel({
+      prompt: t("textGeneration.altText.prompt", { language: t(`languages.${language}`) }),
+      image: {
+        base64,
+        fileType: responseContentType ?? "",
+      },
+      max_tokens: 2000,
+      ...claudeHaikuDefaults,
+    });
+    setIsLoading(false);
+    return result;
+  };
 
   const formIsDirty = isFormikFormDirty({
     values,
@@ -182,13 +215,25 @@ const EmbedForm = ({
             </FieldRoot>
           )}
         </FormField>
-
         {!values.isDecorative && (
           <FormField name="alt">
-            {({ field, meta }) => (
+            {({ field, meta, helpers }) => (
               <FieldRoot invalid={!!meta.error}>
                 <FieldLabel>{t("form.image.alt.label")}</FieldLabel>
                 <FieldTextArea {...field} placeholder={t("form.image.alt.placeholder")} />
+                <StyledButton
+                  onClick={async () => {
+                    const text = await generateAltText();
+                    if (text && text.length > 0) {
+                      helpers.setValue(text);
+                    }
+                  }}
+                  size="small"
+                  title={t("textGeneration.altText.title")}
+                >
+                  {t("textGeneration.altText.button")}
+                  {isLoading ? <Spinner size="small" /> : <FileListLine />}
+                </StyledButton>
                 <FieldErrorMessage>{meta.error}</FieldErrorMessage>
               </FieldRoot>
             )}

diff --git a/src/components/Transcribe/helpers.ts b/src/components/Transcribe/helpers.ts
@@ -0,0 +1,48 @@
+/**
+ * Copyright (c) 2024-present, NDLA.
+ *
+ * This source code is licensed under the GPLv3 license found in the
+ * LICENSE file in the root directory of this source tree.
+ *
+ */
+interface props {
+  fileUrl: string;
+  languageCode: string;
+  mediaFormat: string;
+  maxSpeakers?: number;
+  outputFileName: string;
+}
+
+export const transcribe = async ({ fileUrl, maxSpeakers, mediaFormat, languageCode, outputFileName }: props) => {
+  const payload: any = {
+    mediaFileUri: fileUrl,
+    languageCode: languageCode,
+    mediaFormat: mediaFormat,
+    outputFileName: outputFileName,
+  };
+
+  if (maxSpeakers) {
+    payload.maxSpeakers = maxSpeakers;
+  }
+
+  const response = await fetch("/transcribe", {
+    method: "POST",
+    headers: {
+      "Content-Type": "application/json",
+    },
+    body: JSON.stringify(payload),
+  });
+
+  return response.json();
+};
+
+export const getTranscription = async (jobName: string) => {
+  const response = await fetch(`/transcribe/${jobName}`, {
+    method: "GET",
+    headers: {
+      "Content-Type": "application/json",
+    },
+  });
+  const result = await response.json();
+  return result;
+};
diff --git a/src/config.ts b/src/config.ts
@@ -124,6 +124,15 @@ const usernamePasswordEnabled = (ndlaEnvironment: string) => {
   }
 };
 
+const getAudioS3Root = (ndlaEnvironment: string) => {
+  switch (ndlaEnvironment) {
+    case "prod":
+      return "s3://prod.audio.2.ndla/";
+    default:
+      return "s3://test.audio.2.ndla/";
+  }
+};
+
 export type ConfigType = {
   brightcoveAccountId: string | undefined;
   logEnvironment: string | undefined;
@@ -159,6 +168,7 @@ export type ConfigType = {
   defaultLanguage: LocaleType;
   runtimeType: RuntimeType;
   enableH5pCopy: boolean;
+  s3AudioRoot: string;
 };
 
 const getServerSideConfig = (): ConfigType => {
@@ -204,6 +214,7 @@ const getServerSideConfig = (): ConfigType => {
     isVercel: getEnvironmentVariabel("IS_VERCEL", "false") === "true",
     runtimeType: getEnvironmentVariabel("NODE_ENV", "development") as "test" | "development" | "production",
     enableH5pCopy: getEnvironmentVariabel("ENABLE_H5P_COPY", "true") === "true",
+    s3AudioRoot: getAudioS3Root(ndlaEnvironment),
   };
 };
 

diff --git a/src/containers/AudioUploader/components/AudioForm.tsx b/src/containers/AudioUploader/components/AudioForm.tsx
@@ -222,7 +222,13 @@ const AudioForm = ({
                 title={t("podcastForm.fields.manuscript")}
                 hasError={[].some((field) => field in errors)}
               >
-                <AudioManuscript />
+                <AudioManuscript
+                  audioName={audio?.title.title}
+                  audioId={audio?.id}
+                  audioLanguage={audioLanguage}
+                  audioUrl={audio?.audioFile.url}
+                  audioType={audio?.audioFile.url.split(".").pop()}
+                />
               </FormAccordion>
               <FormAccordion
                 id="audio-upload-copyright"