diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 6db9bbd5d0..36a5ed7683 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -47,6 +47,7 @@ wiremock = "2.35.0" logback = "1.2.6" coroutines = "1.8.0" kotlinx-html = "0.8.0" +clikt = "4.2.2" [libraries] kotlinx-coroutines-core = { module = "org.jetbrains.kotlinx:kotlinx-coroutines-core", version.ref = "coroutines" } @@ -87,11 +88,14 @@ junit-jupiter-api = { module = "org.junit.jupiter:junit-jupiter-api", version.re junit-jupiter-engine = { module = "org.junit.jupiter:junit-jupiter-engine", version.ref = "junit" } junit-jupiter-params = { module = "org.junit.jupiter:junit-jupiter-params", version.ref = "junit" } kotlin-result = { module = "com.michael-bull.kotlin-result:kotlin-result", version.ref = "kotlinResult" } +clikt = { module = "com.github.ajalt.clikt:clikt", version.ref = "clikt" } ktor-client-cio = { module = "io.ktor:ktor-client-cio", version.ref = "ktor" } ktor-client-core = { module = "io.ktor:ktor-client-core", version.ref = "ktor" } ktor-serial-gson = { module = "io.ktor:ktor-serialization-gson", version.ref = "ktor" } +ktor-serial-json = { module = "io.ktor:ktor-serialization-kotlinx-json", version.ref = "ktor" } ktor-server-cio = { module = "io.ktor:ktor-server-cio", version.ref = "ktor" } ktor-server-content-negotiation = { module = "io.ktor:ktor-server-content-negotiation", version.ref = "ktor" } +ktor-client-content-negotiation = { module = "io.ktor:ktor-client-content-negotiation", version.ref = "ktor" } ktor-server-core = { module = "io.ktor:ktor-server-core", version.ref = "ktor" } ktor-server-cors = { module = "io.ktor:ktor-server-cors", version.ref = "ktor" } ktor-server-netty = { module = "io.ktor:ktor-server-netty", version.ref = "ktor" } @@ -116,6 +120,7 @@ detekt = { id = "io.gitlab.arturbosch.detekt", version.ref = "detekt" } protobuf = { id = "com.google.protobuf", version.ref = "googleProtobufPlugin" } kotlin-jvm = { id = "org.jetbrains.kotlin.jvm", version.ref = "kotlin" } kotlin-android = { id = "org.jetbrains.kotlin.android", version.ref = "kotlin" } +kotlin-serialization = { id = "org.jetbrains.kotlin.plugin.serialization", version.ref = "kotlin" } mavenPublish = { id = "com.vanniktech.maven.publish", version = "0.19.0" } jreleaser = { id = "org.jreleaser", version = "1.13.1" } shadow = { id = "com.github.johnrengelman.shadow", version = "7.1.2" } diff --git a/maestro-ai/README.md b/maestro-ai/README.md new file mode 100644 index 0000000000..fa8edef27c --- /dev/null +++ b/maestro-ai/README.md @@ -0,0 +1,41 @@ +# maestro-ai + +This project implements AI support for use in Maestro. + +It's both a library and an executable demo app. + +## Demo app + +An API key is required. Set it with `MAESTRO_CLI_AI_KEY` env var. Examples: + +- OpenAI: `export MAESTRO_CLI_AI_KEY=sk-...` +- Antrophic: `export MAESTRO_CLI_AI_KEY=sk-ant-api-...` + +### Build + +```console +./gradlew :maestro-ai:installDist +``` + +The startup script will be generated in `./maestro-ai/build/install/maestro-ai-demo/bin/maestro-ai-demo`. + +### How to use + +First of all, try out the `--help` flag. + +Run test for a single screenshot that contains defects (i.e. is bad): + +```console +maestro-ai-demo foo_1_bad.png +``` + +Run tests for all screenshots from the Uber that contain defects (i.e. are bad). Additionally, show prompts and raw +LLM response: + +```console +maestro-ai-demo \ + --model gpt-4o-2024-08-06 \ + --show-prompts \ + --show-raw-response \ + test-ai-fixtures/uber_*_bad.png +``` diff --git a/maestro-ai/build.gradle.kts b/maestro-ai/build.gradle.kts new file mode 100644 index 0000000000..086a717da9 --- /dev/null +++ b/maestro-ai/build.gradle.kts @@ -0,0 +1,54 @@ +import org.jetbrains.kotlin.gradle.tasks.KotlinCompilationTask + +plugins { + application + id("maven-publish") + alias(libs.plugins.kotlin.jvm) + alias(libs.plugins.kotlin.serialization) + alias(libs.plugins.mavenPublish) +} + +application { + applicationName = "maestro-ai-demo" + mainClass.set("maestro.ai.DemoAppKt") +} + +tasks.named("jar") { + manifest { + attributes["Main-Class"] = "maestro.ai.DemoAppKt" + } +} + +dependencies { + api(libs.kotlin.result) + api(libs.square.okio) + + api(libs.slf4j) + api(libs.logback) { + exclude(group = "org.slf4j", module = "slf4j-api") + } + + api(libs.ktor.client.core) + implementation(libs.ktor.client.cio) + implementation(libs.ktor.serial.json) + implementation(libs.ktor.client.content.negotiation) + implementation(libs.kotlinx.coroutines.core) + implementation(libs.clikt) + + testImplementation(libs.junit.jupiter.api) + testRuntimeOnly(libs.junit.jupiter.engine) + testImplementation(libs.google.truth) + testImplementation(libs.square.mock.server) + testImplementation(libs.junit.jupiter.params) +} + +java { + sourceCompatibility = JavaVersion.VERSION_1_8 + targetCompatibility = JavaVersion.VERSION_1_8 +} + +tasks.named("compileKotlin", KotlinCompilationTask::class.java) { + compilerOptions { + freeCompilerArgs.addAll("-Xjdk-release=1.8") + } +} diff --git a/maestro-ai/gradle.properties b/maestro-ai/gradle.properties new file mode 100644 index 0000000000..f5e0940ec4 --- /dev/null +++ b/maestro-ai/gradle.properties @@ -0,0 +1,3 @@ +POM_NAME=Maestro AI +POM_ARTIFACT_ID=maestro-ai +POM_PACKAGING=jar diff --git a/maestro-ai/src/main/java/maestro/ai/AI.kt b/maestro-ai/src/main/java/maestro/ai/AI.kt new file mode 100644 index 0000000000..d8af5a0857 --- /dev/null +++ b/maestro-ai/src/main/java/maestro/ai/AI.kt @@ -0,0 +1,59 @@ +package maestro.ai + +import io.ktor.client.HttpClient +import io.ktor.client.plugins.HttpTimeout +import io.ktor.client.plugins.contentnegotiation.ContentNegotiation +import kotlinx.serialization.json.Json +import kotlinx.serialization.json.JsonObject +import java.io.Closeable + +data class CompletionData( + val prompt: String, + val model: String, + val temperature: Float, + val maxTokens: Int, + val images: List, + val response: String, +) + +abstract class AI( + val defaultModel: String, + protected val httpClient: HttpClient, +) : Closeable { + + /** + * Chat completion with the AI model. + * + * Caveats: + * - `jsonSchema` is only supported by OpenAI ("Structured Outputs" feature) + */ + abstract suspend fun chatCompletion( + prompt: String, + images: List = listOf(), + temperature: Float? = null, + model: String? = null, + maxTokens: Int? = null, + imageDetail: String? = null, + identifier: String? = null, + jsonSchema: JsonObject? = null, + ): CompletionData + + companion object { + const val AI_KEY_ENV_VAR = "MAESTRO_CLI_AI_KEY" + const val AI_MODEL_ENV_VAR = "MAESTRO_CLI_AI_MODEL" + + val defaultHttpClient = HttpClient { + install(ContentNegotiation) { + Json { + ignoreUnknownKeys = true + } + } + + install(HttpTimeout) { + connectTimeoutMillis = 10000 + socketTimeoutMillis = 60000 + requestTimeoutMillis = 60000 + } + } + } +} diff --git a/maestro-ai/src/main/java/maestro/ai/DemoApp.kt b/maestro-ai/src/main/java/maestro/ai/DemoApp.kt new file mode 100644 index 0000000000..a57fd8dffb --- /dev/null +++ b/maestro-ai/src/main/java/maestro/ai/DemoApp.kt @@ -0,0 +1,194 @@ +package maestro.ai + +import com.github.ajalt.clikt.core.CliktCommand +import com.github.ajalt.clikt.parameters.arguments.argument +import com.github.ajalt.clikt.parameters.arguments.multiple +import com.github.ajalt.clikt.parameters.options.default +import com.github.ajalt.clikt.parameters.options.flag +import com.github.ajalt.clikt.parameters.options.option +import com.github.ajalt.clikt.parameters.types.float +import com.github.ajalt.clikt.parameters.types.path +import kotlinx.coroutines.async +import kotlinx.coroutines.runBlocking +import maestro.ai.antrophic.Claude +import maestro.ai.openai.OpenAI +import java.io.File +import java.nio.file.Path + + +fun main(args: Array) = DemoApp().main(args) + +/** + * This is a small helper program to help evaluate LLM results against a directory of screenshots and prompts. + * + * ### Input format + * + * Screenshot name format: + * - {app_name}_{screenshot_number}_{good|bad}.png + * + * A screenshot can optionally have a prompt. In this case, the model will treat the prompt as the assertion command. + * To associate a prompt with a screenshot, prompt text file name must have + * the following format: + * - {app_name_{screenshot_number}_{good|bad}.txt + * + * For example: + * - foo_1_bad.png + * - bar_2_good.png + * + * ### Output format + * + * The output for a single screenshot should indicate either PASS or FAIL, screenshot name, the result, and the defects + * founds (if any). + * + * For example: + * + * ```text + * PASS uber_2_bad.png: 1 defects found (as expected) + * * layout: The prompt for entering a verification code is visible, indicating that the 2-factor authentication process is present. The screen instructs the user to enter a verification code generated for Uber, which is a typical 2-factor authentication step. + * ``` + * + * Some of the flags change output format. + */ +class DemoApp : CliktCommand() { + private val inputFiles: List by argument(help = "screenshots to use").path(mustExist = true).multiple() + + private val model: String by option(help = "LLM to use").default("gpt-4o-2024-08-06") + + private val showOnlyFails: Boolean by option(help = "Show only failed tests").flag() + + private val showPrompts: Boolean by option(help = "Show prompts").flag() + + private val showRawResponse: Boolean by option(help = "Show raw LLM response").flag() + + private val temperature: Float by option(help = "Temperature for LLM").float().default(0.2f) + + private val parallel: Boolean by option(help = "Run in parallel. May get rate limited").flag() + + // IDEA: "--json" flag to allow for easy filtering with jq + + override fun run() = runBlocking { + val apiKey = System.getenv("MAESTRO_CLI_AI_KEY") + require(apiKey != null) { "OpenAI API key is not provided" } + + val testCases = inputFiles.map { it.toFile() }.map { file -> + require(!file.isDirectory) { "Provided file is a directory, not a file" } + require(file.exists()) { "Provided file does not exist" } + require(file.extension == "png") { "Provided file is not a PNG file" } + file + }.map { file -> + val filename = file.nameWithoutExtension + val parts = filename.split("_") + require(parts.size == 3) { "Screenshot name is invalid: ${file.name}" } + + val appName = parts[0] + val index = + parts[1].toIntOrNull() ?: throw IllegalArgumentException("Invalid screenshot name: ${file.name}") + val status = parts[2] + + val promptFile = "${file.parent}/${appName}_${index}_${status}.txt" + val prompt = File(promptFile).run { + if (exists()) { + println("Found prompt file: $promptFile") + readText() + } else null + } + + TestCase( + screenshot = file, + appName = appName, + shouldPass = status == "good", + index = index, + prompt = prompt, + ) + }.toList() + + val aiClient: AI = when { + model.startsWith("gpt") -> OpenAI( + apiKey = apiKey, + defaultModel = model, + defaultTemperature = temperature, + ) + + model.startsWith("claude") -> Claude( + apiKey = apiKey, + defaultModel = model, + defaultTemperature = temperature, + ) + + else -> throw IllegalArgumentException("Unknown model: $model") + } + + testCases.forEach { testCase -> + val bytes = testCase.screenshot.readBytes() + + val job = async { + val defects = if (testCase.prompt == null) Prediction.findDefects( + aiClient = aiClient, + screen = bytes, + printPrompt = showPrompts, + printRawResponse = showRawResponse, + ) else { + val result = Prediction.performAssertion( + aiClient = aiClient, + screen = bytes, + assertion = testCase.prompt, + printPrompt = showPrompts, + printRawResponse = showRawResponse, + ) + + if (result == null) emptyList() + else listOf(result) + } + + verify(testCase, defects) + } + + if (!parallel) job.await() + } + } + + private fun verify(testCase: TestCase, defects: List) { + if (!testCase.shouldPass) { + // Check if LLM found defects (i.e. didn't commit false negative) + if (defects.isNotEmpty()) { + if (showOnlyFails) return + + println( + """ + PASS ${testCase.screenshot.name}: ${defects.size} defects found (as expected) + ${defects.joinToString("\n") { "\t* ${it.category}: ${it.reasoning}" }} + """.trimIndent() + ) + } else { + println("FAIL ${testCase.screenshot.name} false-negative: No defects found but some were expected") + } + + } else { + // Check that LLM didn't raise false positives + if (defects.isEmpty()) { + if (showOnlyFails) return + + println( + """ + PASS ${testCase.screenshot.name}: No defects found (as expected) + """.trimIndent() + ) + } else { + println( + """ + FAIL ${testCase.screenshot.name} false-positive: ${defects.size} defects found but none were expected + ${defects.joinToString("\n") { "\t* ${it.category}: ${it.reasoning}" }} + """.trimIndent() + ) + } + } + } +} + +data class TestCase( + val screenshot: File, + val appName: String, + val prompt: String?, + val shouldPass: Boolean, + val index: Int, +) diff --git a/maestro-ai/src/main/java/maestro/ai/Prediction.kt b/maestro-ai/src/main/java/maestro/ai/Prediction.kt new file mode 100644 index 0000000000..2e1de0aaa8 --- /dev/null +++ b/maestro-ai/src/main/java/maestro/ai/Prediction.kt @@ -0,0 +1,214 @@ +package maestro.ai + +import kotlinx.serialization.Serializable +import kotlinx.serialization.json.Json +import kotlinx.serialization.json.jsonObject +import maestro.ai.openai.OpenAI + +@Serializable +data class Defect( + val category: String, + val reasoning: String, +) + +@Serializable +private data class ModelResponse( + val defects: List, +) + +object Prediction { + + /** + * We use JSON mode/Structured Outputs to define the schema of the response we expect from the LLM. + * - OpenAI: https://platform.openai.com/docs/guides/structured-outputs + * - Gemini: https://ai.google.dev/gemini-api/docs/json-mode + */ + private val askForDefectsSchema: String = run { + val resourceStream = this::class.java.getResourceAsStream("/askForDefects_schema.json") + ?: throw IllegalStateException("Could not find askForDefects_schema.json in resources") + + resourceStream.bufferedReader().use { it.readText() } + } + + private val json = Json { ignoreUnknownKeys = true } + + private val defectCategories = listOf( + "localization" to "Inconsistent use of language, for example mixed English and Portuguese", + "layout" to "Some UI elements are overlapping or are cropped", + ) + + private val allDefectCategories = defectCategories + listOf("assertion" to "The assertion is not true") + + suspend fun findDefects( + aiClient: AI, + screen: ByteArray, + printPrompt: Boolean = false, + printRawResponse: Boolean = false, + ): List { + + // List of failed attempts to not make up false positives: + // |* If you don't see any defect, return "No defects found". + // |* If you are sure there are no defects, return "No defects found". + // |* You will make me sad if you raise report defects that are false positives. + // |* Do not make up defects that are not present in the screenshot. It's fine if you don't find any defects. + + val prompt = buildString { + + appendLine( + """ + You are a QA engineer performing quality assurance for a mobile application. + Identify any defects in the provided screenshot. + """.trimIndent() + ) + + append( + """ + | + |RULES: + |* All defects you find must belong to one of the following categories: + |${defectCategories.joinToString(separator = "\n") { " * ${it.first}: ${it.second}" }} + |* If you see defects, your response MUST only include defect name and detailed reasoning for each defect. + |* Provide response as a list of JSON objects, each representing : + |* Do not raise false positives. Some example responses that have a high chance of being a false positive: + | * button is partially cropped at the bottom + | * button is not aligned horizontally/vertically within its container + """.trimMargin("|") + ) + + // Claude doesn't have a JSON mode as of 21-08-2024 + // https://docs.anthropic.com/en/docs/test-and-evaluate/strengthen-guardrails/increase-consistency + // We could do "if (aiClient is Claude)", but actually, this also helps with gpt-4o sometimes + // generatig never-ending stream of output. + append( + """ + | + |* You must provide result as a valid JSON object, matching this structure: + | + | { + | "defects": [ + | { + | "category": "", + | "reasoning": "" + | }, + | { + | "category": "", + | "reasoning": "" + | } + | ] + | } + | + |DO NOT output any other information in the JSON object. + """.trimMargin("|") + ) + + appendLine("There are usually only a few defects in the screenshot. Don't generate tens of them.") + } + + if (printPrompt) { + println("--- PROMPT START ---") + println(prompt) + println("--- PROMPT END ---") + } + + val aiResponse = aiClient.chatCompletion( + prompt, + model = aiClient.defaultModel, + maxTokens = 4096, + identifier = "find-defects", + imageDetail = "high", + images = listOf(screen), + jsonSchema = if (aiClient is OpenAI) json.parseToJsonElement(askForDefectsSchema).jsonObject else null, + ) + + if (printRawResponse) { + println("--- RAW RESPONSE START ---") + println(aiResponse.response) + println("--- RAW RESPONSE END ---") + } + + val defects = json.decodeFromString(aiResponse.response) + return defects.defects + } + + suspend fun performAssertion( + aiClient: AI, + screen: ByteArray, + assertion: String, + printPrompt: Boolean = false, + printRawResponse: Boolean = false, + ): Defect? { + val prompt = buildString { + + appendLine( + """ + |You are a QA engineer performing quality assurance for a mobile application. + |You are given a screenshot of the application and an assertion about the UI. + |Your task is to identify if the following assertion is true: + | + | "${assertion.removeSuffix("\n")}" + | + """.trimMargin("|") + ) + + append( + """ + | + |RULES: + |* Provide response as a valid JSON, with structure described below. + |* If the assertion is false, the list in the JSON output MUST be empty. + |* If assertion is false: + | * Your response MUST only include a single defect with category "assertion". + | * Provide detailed reasoning to explain why you think the assertion is false. + """.trimMargin("|") + ) + + // Claude doesn't have a JSON mode as of 21-08-2024 + // https://docs.anthropic.com/en/docs/test-and-evaluate/strengthen-guardrails/increase-consistency + // We could do "if (aiClient is Claude)", but actually, this also helps with gpt-4o sometimes + // generatig never-ending stream of output. + append( + """ + | + |* You must provide result as a valid JSON object, matching this structure: + | + | { + | "defect": [ + | { + | "category": "assertion", + | "reasoning": "" + | }, + | ] + | } + | + |The "defects" array MUST contain at most a single JSON object. + |DO NOT output any other information in the JSON object. + """.trimMargin("|") + ) + } + + if (printPrompt) { + println("--- PROMPT START ---") + println(prompt) + println("--- PROMPT END ---") + } + + val aiResponse = aiClient.chatCompletion( + prompt, + model = aiClient.defaultModel, + maxTokens = 4096, + identifier = "perform-assertion", + imageDetail = "high", + images = listOf(screen), + jsonSchema = if (aiClient is OpenAI) json.parseToJsonElement(askForDefectsSchema).jsonObject else null, + ) + + if (printRawResponse) { + println("--- RAW RESPONSE START ---") + println(aiResponse.response) + println("--- RAW RESPONSE END ---") + } + + val response = json.decodeFromString(aiResponse.response) + return response.defects.firstOrNull() + } +} diff --git a/maestro-ai/src/main/java/maestro/ai/antrophic/Client.kt b/maestro-ai/src/main/java/maestro/ai/antrophic/Client.kt new file mode 100644 index 0000000000..9246fae4f2 --- /dev/null +++ b/maestro-ai/src/main/java/maestro/ai/antrophic/Client.kt @@ -0,0 +1,114 @@ +package maestro.ai.antrophic + +import Response +import io.ktor.client.HttpClient +import io.ktor.client.plugins.HttpTimeout +import io.ktor.client.plugins.contentnegotiation.ContentNegotiation +import io.ktor.client.request.post +import io.ktor.client.request.setBody +import io.ktor.client.statement.bodyAsText +import io.ktor.http.ContentType +import io.ktor.http.HttpStatusCode +import io.ktor.http.contentType +import io.ktor.http.isSuccess +import io.ktor.util.encodeBase64 +import kotlinx.serialization.SerializationException +import kotlinx.serialization.encodeToString +import kotlinx.serialization.json.Json +import kotlinx.serialization.json.JsonObject +import maestro.ai.AI +import maestro.ai.CompletionData +import org.slf4j.LoggerFactory + +private const val API_URL = "https://api.anthropic.com/v1/messages" + +private val logger = LoggerFactory.getLogger(Claude::class.java) + +class Claude( + defaultModel: String = "claude-3-5-sonnet-20240620", + httpClient: HttpClient = defaultHttpClient, + private val apiKey: String, + private val defaultTemperature: Float = 0.2f, + private val defaultMaxTokens: Int = 1024, + private val defaultImageDetail: String = "high", +) : AI(defaultModel = defaultModel, httpClient = httpClient) { + + private val json = Json { ignoreUnknownKeys = true } + + override suspend fun chatCompletion( + prompt: String, + images: List, + temperature: Float?, + model: String?, + maxTokens: Int?, + imageDetail: String?, + identifier: String?, + jsonSchema: JsonObject?, + ): CompletionData { + val imagesBase64 = images.map { it.encodeBase64() } + + // Fallback to Antrophic defaults + val actualTemperature = temperature ?: defaultTemperature + val actualModel = model ?: defaultModel + val actualMaxTokens = maxTokens ?: defaultMaxTokens + val actualImageDetail = imageDetail ?: defaultImageDetail + + val imageContents = imagesBase64 + .map { imageBase64 -> + Content( + type = "image", + source = ContentSource( + type = "base64", + mediaType = "image/png", + data = imageBase64, + ), + ) + } + + val textContent = Content(type = "text", text = prompt) + + val chatCompletionRequest = Request( + model = actualModel, + maxTokens = actualMaxTokens, + messages = listOf(Message("user", imageContents + textContent)), + ) + + val response = try { + val httpResponse = httpClient.post(API_URL) { + contentType(ContentType.Application.Json) + headers["x-api-key"] = apiKey + headers["anthropic-version"] = "2023-06-01" + setBody(json.encodeToString(chatCompletionRequest)) + } + + val body = httpResponse.bodyAsText() + if (!httpResponse.status.isSuccess()) { + logger.error("Failed to complete request to Anthropic: ${httpResponse.status}, $body") + throw Exception("Failed to complete request to Anthropic: ${httpResponse.status}, $body") + } + + if (httpResponse.status != HttpStatusCode.OK) { + throw IllegalStateException("Call to Anthropic AI failed: $body") + } + + json.decodeFromString(httpResponse.bodyAsText()) + } catch (e: SerializationException) { + logger.error("Failed to parse response from Antrophic", e) + throw e + } catch (e: Exception) { + logger.error("Failed to complete request to Antrophic", e) + throw e + } + + return CompletionData( + prompt = prompt, + temperature = actualTemperature, + maxTokens = actualMaxTokens, + images = imagesBase64, + model = actualModel, + response = response.content.first().text!!, + ) + } + + override fun close() = httpClient.close() +} diff --git a/maestro-ai/src/main/java/maestro/ai/antrophic/Common.kt b/maestro-ai/src/main/java/maestro/ai/antrophic/Common.kt new file mode 100644 index 0000000000..331b3918c0 --- /dev/null +++ b/maestro-ai/src/main/java/maestro/ai/antrophic/Common.kt @@ -0,0 +1,24 @@ +package maestro.ai.antrophic + +import kotlinx.serialization.SerialName +import kotlinx.serialization.Serializable + +@Serializable +data class Message( + val role: String, + val content: List, +) + +@Serializable +data class Content( + val type: String, + val text: String? = null, + val source: ContentSource? = null, +) + +@Serializable +data class ContentSource( + val type: String, + @SerialName("media_type") val mediaType: String, + val data: String, +) diff --git a/maestro-ai/src/main/java/maestro/ai/antrophic/Request.kt b/maestro-ai/src/main/java/maestro/ai/antrophic/Request.kt new file mode 100644 index 0000000000..577fe8754c --- /dev/null +++ b/maestro-ai/src/main/java/maestro/ai/antrophic/Request.kt @@ -0,0 +1,11 @@ +package maestro.ai.antrophic + +import kotlinx.serialization.SerialName +import kotlinx.serialization.Serializable + +@Serializable +data class Request( + val model: String, + @SerialName("max_tokens") val maxTokens: Int, + val messages: List, +) diff --git a/maestro-ai/src/main/java/maestro/ai/antrophic/Response.kt b/maestro-ai/src/main/java/maestro/ai/antrophic/Response.kt new file mode 100644 index 0000000000..d6b261cbc2 --- /dev/null +++ b/maestro-ai/src/main/java/maestro/ai/antrophic/Response.kt @@ -0,0 +1,7 @@ +import kotlinx.serialization.Serializable +import maestro.ai.antrophic.Content + +@Serializable +data class Response( + val content: List, +) diff --git a/maestro-ai/src/main/java/maestro/ai/common/Image.kt b/maestro-ai/src/main/java/maestro/ai/common/Image.kt new file mode 100644 index 0000000000..3090e14062 --- /dev/null +++ b/maestro-ai/src/main/java/maestro/ai/common/Image.kt @@ -0,0 +1,9 @@ +package maestro.ai.common + +import kotlinx.serialization.Serializable + +@Serializable +data class Base64Image( + val url: String, + val detail: String, +) diff --git a/maestro-ai/src/main/java/maestro/ai/openai/Client.kt b/maestro-ai/src/main/java/maestro/ai/openai/Client.kt new file mode 100644 index 0000000000..4484c64fb1 --- /dev/null +++ b/maestro-ai/src/main/java/maestro/ai/openai/Client.kt @@ -0,0 +1,113 @@ +package maestro.ai.openai + +import io.ktor.client.HttpClient +import io.ktor.client.request.post +import io.ktor.client.request.setBody +import io.ktor.client.statement.bodyAsText +import io.ktor.http.ContentType +import io.ktor.http.contentType +import io.ktor.http.isSuccess +import io.ktor.util.encodeBase64 +import kotlinx.serialization.SerializationException +import kotlinx.serialization.encodeToString +import kotlinx.serialization.json.Json +import kotlinx.serialization.json.JsonObject +import maestro.ai.AI +import maestro.ai.CompletionData +import maestro.ai.common.Base64Image +import org.slf4j.LoggerFactory + +private const val API_URL = "https://api.openai.com/v1/chat/completions" + +private val logger = LoggerFactory.getLogger(OpenAI::class.java) + +class OpenAI( + defaultModel: String = "gpt-4o-2024-08-06", + httpClient: HttpClient = defaultHttpClient, + private val apiKey: String, + private val defaultTemperature: Float = 0.2f, + private val defaultMaxTokens: Int = 1024, + private val defaultImageDetail: String = "high", +) : AI(defaultModel = defaultModel, httpClient = httpClient) { + + private val json = Json { ignoreUnknownKeys = true } + + override suspend fun chatCompletion( + prompt: String, + images: List, + temperature: Float?, + model: String?, + maxTokens: Int?, + imageDetail: String?, + identifier: String?, + jsonSchema: JsonObject?, + ): CompletionData { + val imagesBase64 = images.map { it.encodeBase64() } + + // Fallback to OpenAI defaults + val actualTemperature = temperature ?: defaultTemperature + val actualModel = model ?: defaultModel + val actualMaxTokens = maxTokens ?: defaultMaxTokens + val actualImageDetail = imageDetail ?: defaultImageDetail + + val imagesContent = imagesBase64.map { image -> + ContentDetail( + type = "image_url", + imageUrl = Base64Image(url = "data:image/png;base64,$image", detail = actualImageDetail), + ) + } + val textContent = ContentDetail(type = "text", text = prompt) + + val messages = listOf( + MessageContent( + role = "user", + content = imagesContent + textContent, + ) + ) + + val chatCompletionRequest = ChatCompletionRequest( + model = actualModel, + temperature = actualTemperature, + messages = messages, + maxTokens = actualMaxTokens, + seed = 1566, + responseFormat = if (jsonSchema == null) null else ResponseFormat( + type = "json_schema", + jsonSchema = jsonSchema, + ), + ) + + val chatCompletionResponse = try { + val httpResponse = httpClient.post(API_URL) { + contentType(ContentType.Application.Json) + headers["Authorization"] = "Bearer $apiKey" + setBody(json.encodeToString(chatCompletionRequest)) + } + + val body = httpResponse.bodyAsText() + if (!httpResponse.status.isSuccess()) { + logger.error("Failed to complete request to OpenAI: ${httpResponse.status}, $body") + throw Exception("Failed to complete request to OpenAI: ${httpResponse.status}, $body") + } + + json.decodeFromString(body) + } catch (e: SerializationException) { + logger.error("Failed to parse response from OpenAI", e) + throw e + } catch (e: Exception) { + logger.error("Failed to complete request to OpenAI", e) + throw e + } + + return CompletionData( + prompt = prompt, + temperature = actualTemperature, + maxTokens = actualMaxTokens, + images = imagesBase64, + model = actualModel, + response = chatCompletionResponse.choices.first().message.content, + ) + } + + override fun close() = httpClient.close() +} diff --git a/maestro-ai/src/main/java/maestro/ai/openai/Request.kt b/maestro-ai/src/main/java/maestro/ai/openai/Request.kt new file mode 100644 index 0000000000..aa51b3f2e2 --- /dev/null +++ b/maestro-ai/src/main/java/maestro/ai/openai/Request.kt @@ -0,0 +1,35 @@ +package maestro.ai.openai + +import kotlinx.serialization.SerialName +import kotlinx.serialization.Serializable +import kotlinx.serialization.json.JsonObject +import maestro.ai.common.Base64Image + +@Serializable +data class ChatCompletionRequest( + val model: String, + val messages: List, + val temperature: Float, + @SerialName("max_tokens") val maxTokens: Int, + @SerialName("response_format") val responseFormat: ResponseFormat?, + val seed: Int, +) + +@Serializable +class ResponseFormat( + val type: String, + @SerialName("json_schema") val jsonSchema: JsonObject, +) + +@Serializable +data class MessageContent( + val role: String, + val content: List, +) + +@Serializable +data class ContentDetail( + val type: String, + val text: String? = null, + @SerialName("image_url") val imageUrl: Base64Image? = null, +) diff --git a/maestro-ai/src/main/java/maestro/ai/openai/Response.kt b/maestro-ai/src/main/java/maestro/ai/openai/Response.kt new file mode 100644 index 0000000000..75087c496d --- /dev/null +++ b/maestro-ai/src/main/java/maestro/ai/openai/Response.kt @@ -0,0 +1,42 @@ +package maestro.ai.openai + +import kotlinx.serialization.SerialName +import kotlinx.serialization.Serializable + +@Serializable +data class ChatCompletionResponse( + val id: String, + val `object`: String, + val created: Long, + val model: String, + @SerialName("system_fingerprint") val systemFingerprint: String? = null, + val usage: Usage? = null, + val choices: List, +) + +@Serializable +data class Usage( + @SerialName("prompt_tokens") val promptTokens: Int, + @SerialName("completion_tokens") val completionTokens: Int? = null, + @SerialName("total_tokens") val totalTokens: Int, +) + +@Serializable +data class Choice( + val message: Message, + @SerialName("finish_details") val finishDetails: FinishDetails? = null, + val index: Int, + @SerialName("finish_reason") val finishReason: String? = null, +) + +@Serializable +data class Message( + val role: String, + val content: String, +) + +@Serializable +data class FinishDetails( + val type: String, + val stop: String? = null, +) diff --git a/maestro-ai/src/main/resources/askForDefects_schema.json b/maestro-ai/src/main/resources/askForDefects_schema.json new file mode 100644 index 0000000000..008d4063bc --- /dev/null +++ b/maestro-ai/src/main/resources/askForDefects_schema.json @@ -0,0 +1,32 @@ +{ + "name": "askForDefects", + "description": "Returns a list of possible defects found in the mobile app's UI", + "strict": true, + "schema": { + "type": "object", + "required": ["defects"], + "additionalProperties": false, + "properties": { + "defects": { + "type": "array", + "items": { + "type": "object", + "required": ["category", "reasoning"], + "additionalProperties": false, + "properties": { + "category": { + "type": "string", + "enum": [ + "layout", + "localization" + ] + }, + "reasoning": { + "type": "string" + } + } + } + } + } + } +} diff --git a/maestro-cli/src/main/java/maestro/cli/App.kt b/maestro-cli/src/main/java/maestro/cli/App.kt index 4afd31b045..74b328ab15 100644 --- a/maestro-cli/src/main/java/maestro/cli/App.kt +++ b/maestro-cli/src/main/java/maestro/cli/App.kt @@ -20,21 +20,26 @@ package maestro.cli import maestro.cli.analytics.Analytics -import maestro.cli.command.* +import maestro.cli.command.BugReportCommand +import maestro.cli.command.CloudCommand import maestro.cli.command.DownloadSamplesCommand +import maestro.cli.command.LoginCommand import maestro.cli.command.LogoutCommand +import maestro.cli.command.PrintHierarchyCommand +import maestro.cli.command.QueryCommand +import maestro.cli.command.RecordCommand +import maestro.cli.command.StartDeviceCommand +import maestro.cli.command.StudioCommand +import maestro.cli.command.TestCommand +import maestro.cli.command.UploadCommand import maestro.cli.update.Updates -import maestro.cli.util.AndroidEnvUtils -import maestro.cli.util.EnvUtils import maestro.cli.util.ErrorReporter -import maestro.cli.util.IOSEnvUtils import maestro.cli.view.box import maestro.debuglog.DebugLogStore import picocli.CommandLine import picocli.CommandLine.Command import picocli.CommandLine.Option import java.util.Properties -import kotlin.random.Random import kotlin.system.exitProcess @Command( @@ -74,7 +79,10 @@ class App { @Option(names = ["--port"], hidden = true) var port: Int? = null - @Option(names = ["--device", "--udid"], description = ["(Optional) Device ID to run on explicitly, can be a comma separated list of IDs: --device \"Emulator_1,Emulator_2\" "]) + @Option( + names = ["--device", "--udid"], + description = ["(Optional) Device ID to run on explicitly, can be a comma separated list of IDs: --device \"Emulator_1,Emulator_2\" "], + ) var deviceId: String? = null } @@ -114,7 +122,7 @@ fun main(args: Array) { println() // make errors red - cmd.colorScheme = CommandLine.Help.ColorScheme.Builder() + cmd.colorScheme = CommandLine.Help.ColorScheme.Builder() .errors(CommandLine.Help.Ansi.Style.fg_red) .build() diff --git a/maestro-cli/src/main/java/maestro/cli/command/TestCommand.kt b/maestro-cli/src/main/java/maestro/cli/command/TestCommand.kt index cc659d1453..f1ad98dd18 100644 --- a/maestro-cli/src/main/java/maestro/cli/command/TestCommand.kt +++ b/maestro-cli/src/main/java/maestro/cli/command/TestCommand.kt @@ -20,7 +20,11 @@ package maestro.cli.command import io.ktor.util.collections.ConcurrentSet -import kotlinx.coroutines.* +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.async +import kotlinx.coroutines.awaitAll +import kotlinx.coroutines.delay +import kotlinx.coroutines.runBlocking import kotlinx.coroutines.sync.Semaphore import maestro.cli.App import maestro.cli.CliError @@ -220,7 +224,8 @@ class TestCommand : Callable { // Acquire lock to execute device creation block deviceCreationSemaphore.acquire() - val deviceId = deviceIds.getOrNull(shardIndex) // 1. Reuse existing device if deviceId provided + val deviceId = + deviceIds.getOrNull(shardIndex) // 1. Reuse existing device if deviceId provided ?: initialActiveDevices.elementAtOrNull(shardIndex) // 2. Reuse existing device if connected device found ?: run { // 3. Create a new device val cfg = allDeviceConfigs.first() @@ -234,7 +239,11 @@ class TestCommand : Callable { shardIndex ) - DeviceService.startDevice(deviceCreated, driverHostPort, initialActiveDevices + currentActiveDevices).instanceId.also { + DeviceService.startDevice( + deviceCreated, + driverHostPort, + initialActiveDevices + currentActiveDevices + ).instanceId.also { currentActiveDevices.add(it) delay(2.seconds) } @@ -256,6 +265,8 @@ class TestCommand : Callable { val device = session.device if (flowFile.isDirectory || format != ReportFormat.NOOP) { + // Run multiple flows + if (continuous) { throw CommandLine.ParameterException( commandSpec.commandLine(), @@ -277,13 +288,17 @@ class TestCommand : Callable { if (!flattenDebugOutput) { TestDebugReporter.deleteOldFiles() } - Triple(suiteResult.passedCount, suiteResult.totalTests, suiteResult) + + return@newSession Triple(suiteResult.passedCount, suiteResult.totalTests, suiteResult) } else { + // Run a single flow + if (continuous) { if (!flattenDebugOutput) { TestDebugReporter.deleteOldFiles() } TestRunner.runContinuous(maestro, device, flowFile, env) + } else { val resultView = if (DisableAnsiMixin.ansiEnabled) AnsiResultView() @@ -360,6 +375,7 @@ class TestCommand : Callable { ) } } + private fun List.mergeSummaries(): TestExecutionSummary? = reduceOrNull { acc, summary -> TestExecutionSummary( passed = acc.passed && summary.passed, diff --git a/maestro-cli/src/main/java/maestro/cli/model/TestExecutionSummary.kt b/maestro-cli/src/main/java/maestro/cli/model/TestExecutionSummary.kt index 475a184ff7..632f767341 100644 --- a/maestro-cli/src/main/java/maestro/cli/model/TestExecutionSummary.kt +++ b/maestro-cli/src/main/java/maestro/cli/model/TestExecutionSummary.kt @@ -2,6 +2,8 @@ package maestro.cli.model import kotlin.time.Duration +// TODO: Some properties should be implemented as getters, but it's not possible. +// See https://github.com/Kotlin/kotlinx.serialization/issues/805 data class TestExecutionSummary( val passed: Boolean, val suites: List, @@ -27,5 +29,4 @@ data class TestExecutionSummary( data class Failure( val message: String, ) - -} \ No newline at end of file +} diff --git a/maestro-cli/src/main/java/maestro/cli/report/HtmlAITestSuiteReporter.kt b/maestro-cli/src/main/java/maestro/cli/report/HtmlAITestSuiteReporter.kt new file mode 100644 index 0000000000..c158c15286 --- /dev/null +++ b/maestro-cli/src/main/java/maestro/cli/report/HtmlAITestSuiteReporter.kt @@ -0,0 +1,167 @@ +package maestro.cli.report + +import kotlinx.html.a +import kotlinx.html.body +import kotlinx.html.button +import kotlinx.html.div +import kotlinx.html.h1 +import kotlinx.html.head +import kotlinx.html.html +import kotlinx.html.img +import kotlinx.html.lang +import kotlinx.html.main +import kotlinx.html.meta +import kotlinx.html.p +import kotlinx.html.script +import kotlinx.html.span +import kotlinx.html.stream.appendHTML +import kotlinx.html.style +import kotlinx.html.title +import kotlinx.html.unsafe +import readResourceAsText +import java.io.File + +// TODO(bartekpacia): Ideally, AI output would be in the same HTML file as "normal test output". There is no inherent reason +// to split those 2 streams of output ("normal" and "AI") into 2 separate HTML files. +// See issue #1973 +class HtmlAITestSuiteReporter { + + private val FlowAIOutput.htmlReportFilename + get() = "ai-report-${flowName}.html" + + private val reportCss: String + get() = readResourceAsText(this::class, "/ai_report.css") + + private val reportJs: String + get() = readResourceAsText(this::class, "/tailwind.config.js") + + /** + * Creates HTML files in [outputDestination] for each flow in [outputs]. + */ + fun report(outputs: List, outputDestination: File) { + if (!outputDestination.isDirectory) throw IllegalArgumentException("Output destination must be a directory") + + outputs.forEachIndexed { index, output -> + val htmlContent = buildHtmlReport(outputs, index) + val file = File(outputDestination, output.htmlReportFilename) + file.writeText(htmlContent) + } + } + + /** + * Build HTML report for a single flow. + * + * Information about other flows is needed to generate links to them. + */ + private fun buildHtmlReport(outputs: List, index: Int): String { + val summary = outputs[index] + + return buildString { + appendLine("") + appendHTML().html { + lang = "en" + + head { + meta { charset = "UTF-8" } + meta { name = "viewport"; content = "width=device-width, initial-scale=1.0" } + title { +"Maestro Test Report" } + script { src = "https://cdn.tailwindcss.com/3.4.5" } + + script { + unsafe { +reportJs } + } + + style(type = "text/tailwindcss") { +reportCss } + } + + body { + div(classes = "flex min-h-screen flex-col") { + + // Header area + div(classes = "container mx-auto py-6 space-y-2") { + h1(classes = "text-3xl") { + +"AI suggestions for flow " + span(classes = "text-gray-medium") { + +summary.flowName + } + } + + // File chooser for different reports + div(classes = "group relative inline-block self-start") { + button(classes = "btn") { +"→ Open other report" } + div(classes = "absolute z-10 hidden min-w-32 group-hover:block") { + outputs.forEachIndexed { outputIndex, outputFlow -> + val selected = outputIndex == index + + a(classes = buildString { + append("toggle-link") + + if (selected) append(" toggle-link-selected") + } ) { + href = "./" + outputs[outputIndex].htmlReportFilename + val name = outputFlow.flowFile.nameWithoutExtension + +"(${outputIndex + 1}) $name" + } + } + } + } + + // Link to the flow file + // FIXME(bartekpacia): This path will be broken when moved across machines + p { + a( + classes = "btn", href = summary.flowFile.absolutePath + ) { + +"→ Open flow file ( ${summary.flowFile.name} )" + } + } + } + + // Container for list of screenshots + main(classes = "container mx-auto flex flex-col gap-4") { + // Overall defect count for the flow + p(classes = "text-lg") { + val word = if (summary.defectCount == 1) "defect" else "defects" + +"${summary.defectCount} possible $word found" + } + + // List of screenshots within flow with defects founds + summary.screenOutputs.forEachIndexed { screenIndex, screenSummary -> + div(classes = "screen-card") { + img(classes = "screenshot-image") { + alt = "Screenshot of the defect" + // Use relative path, so when file is moved across machines, it still works + src = screenSummary.screenshotPath.name.toString() + } + + // defect-card-container + div(classes = "flex flex-col gap-4 flex-grow") { + // Defect count for the screen + p(classes = "text-lg") { + val word = if (screenSummary.defects.size == 1) "defect" else "defects" + +"${screenSummary.defects.size} possible $word" + } + + screenSummary.defects.forEachIndexed { i, defect -> + div(classes = "defect-card") { + p { +defect.reasoning } + div(classes = "badge") { +defect.category } + } + } + } + } + + if (screenIndex != summary.screenOutputs.size - 1) { + div(classes = "divider") + } + } + } + } + } + } + } + } + + private val FlowAIOutput.defectCount: Int + get() = screenOutputs.flatMap { it.defects }.size +} diff --git a/maestro-cli/src/main/java/maestro/cli/report/TestDebugReporter.kt b/maestro-cli/src/main/java/maestro/cli/report/TestDebugReporter.kt index 7a5600d189..9d05ecf576 100644 --- a/maestro-cli/src/main/java/maestro/cli/report/TestDebugReporter.kt +++ b/maestro-cli/src/main/java/maestro/cli/report/TestDebugReporter.kt @@ -1,9 +1,9 @@ package maestro.cli.report import com.fasterxml.jackson.annotation.JsonInclude +import com.fasterxml.jackson.annotation.JsonProperty import com.fasterxml.jackson.databind.JsonMappingException -import com.fasterxml.jackson.databind.ObjectMapper -import maestro.Driver +import com.fasterxml.jackson.module.kotlin.jacksonObjectMapper import maestro.MaestroException import maestro.TreeNode import maestro.cli.runner.CommandStatus @@ -13,6 +13,7 @@ import maestro.cli.util.IOSEnvUtils import maestro.debuglog.DebugLogStore import maestro.debuglog.LogConfig import maestro.orchestra.MaestroCommand +import maestro.ai.Defect import org.slf4j.LoggerFactory import java.io.File import java.nio.file.Files @@ -25,35 +26,61 @@ import java.time.LocalDateTime import java.time.format.DateTimeFormatter import java.time.temporal.ChronoUnit import java.util.IdentityHashMap -import java.util.Properties import kotlin.io.path.absolutePathString import kotlin.io.path.exists +// TODO(bartekpacia): Rename to TestOutputReporter, because it's not only for "debug" stuff object TestDebugReporter { private val logger = LoggerFactory.getLogger(TestDebugReporter::class.java) - private val mapper = ObjectMapper() + private val mapper = jacksonObjectMapper().setSerializationInclusion(JsonInclude.Include.NON_NULL) + .setSerializationInclusion(JsonInclude.Include.NON_EMPTY).writerWithDefaultPrettyPrinter() private var debugOutputPath: Path? = null private var debugOutputPathAsString: String? = null private var flattenDebugOutput: Boolean = false - init { + // AI outputs must be saved separately at the end of the flow. + fun saveSuggestions(outputs: List, path: Path) { + // This mutates the output. + outputs.forEach { output -> + // Write AI screenshots. Paths need to be changed to the final ones. + val updatedOutputs = output.screenOutputs.map { newOutput -> + val screenshotFilename = newOutput.screenshotPath.name + val screenshotFile = File(path.absolutePathString(), screenshotFilename) + newOutput.screenshotPath.copyTo(screenshotFile) + newOutput.copy(screenshotPath = screenshotFile) + } + + output.screenOutputs.clear() + output.screenOutputs.addAll(updatedOutputs) + + // Write AI JSON output + val jsonFilename = "ai-(${output.flowName.replace("/", "_")}).json" + val jsonFile = File(path.absolutePathString(), jsonFilename) + mapper.writeValue(jsonFile, output) + } - // json - mapper.setSerializationInclusion(JsonInclude.Include.NON_NULL) - mapper.setSerializationInclusion(JsonInclude.Include.NON_EMPTY) + HtmlAITestSuiteReporter().report(outputs, path.toFile()) } - fun saveFlow(flowName: String, data: FlowDebugMetadata, path: Path) { + /** + * Save debug information about a single flow, after it has finished. + */ + fun saveFlow(flowName: String, debugOutput: FlowDebugOutput, path: Path) { + // TODO(bartekpacia): Potentially accept a single "FlowPersistentOutput" object + // TODO(bartekpacia: Build output incrementally, instead of single-shot on flow completion + // Be aware that this goal somewhat conflicts with including links to other flows in the HTML report. // commands try { - val commandMetadata = data.commands + val commandMetadata = debugOutput.commands if (commandMetadata.isNotEmpty()) { val commandsFilename = "commands-(${flowName.replace("/", "_")}).json" val file = File(path.absolutePathString(), commandsFilename) - commandMetadata.map { CommandDebugWrapper(it.key, it.value) }.let { + commandMetadata.map { + CommandDebugWrapper(it.key, it.value) + }.let { mapper.writeValue(file, it) } } @@ -62,14 +89,14 @@ object TestDebugReporter { } // screenshots - data.screenshots.forEach { + debugOutput.screenshots.forEach { val status = when (it.status) { CommandStatus.COMPLETED -> "✅" CommandStatus.FAILED -> "❌" else -> "﹖" } - val name = "screenshot-$status-${it.timestamp}-(${flowName}).png" - val file = File(path.absolutePathString(), name) + val filename = "screenshot-$status-${it.timestamp}-(${flowName}).png" + val file = File(path.absolutePathString(), filename) it.screenshot.copyTo(file) } @@ -80,31 +107,19 @@ object TestDebugReporter { val currentTime = Instant.now() val daysLimit = currentTime.minus(Duration.of(days, ChronoUnit.DAYS)) - Files.walk(getDebugOutputPath()) - .filter { - val fileTime = Files.getAttribute(it, "basic:lastModifiedTime") as FileTime - val isOlderThanLimit = fileTime.toInstant().isBefore(daysLimit) - Files.isDirectory(it) && isOlderThanLimit - } - .sorted(Comparator.reverseOrder()) - .forEach { - Files.walk(it) - .sorted(Comparator.reverseOrder()) - .forEach { Files.delete(it) } - } + Files.walk(getDebugOutputPath()).filter { + val fileTime = Files.getAttribute(it, "basic:lastModifiedTime") as FileTime + val isOlderThanLimit = fileTime.toInstant().isBefore(daysLimit) + Files.isDirectory(it) && isOlderThanLimit + }.sorted(Comparator.reverseOrder()).forEach { dir -> + Files.walk(dir).sorted(Comparator.reverseOrder()).forEach { file -> Files.delete(file) } + } } catch (e: Exception) { logger.warn("Failed to delete older files", e) } } private fun logSystemInfo() { - val appVersion = runCatching { - val props = Driver::class.java.classLoader.getResourceAsStream("version.properties").use { - Properties().apply { load(it) } - } - props["version"].toString() - } - val logger = LoggerFactory.getLogger("MAESTRO") logger.info("---- System Info ----") logger.info("Maestro Version: ${EnvUtils.CLI_VERSION ?: "Undefined"}") @@ -131,9 +146,11 @@ object TestDebugReporter { fun getDebugOutputPath(): Path { if (debugOutputPath != null) return debugOutputPath as Path - val debugRootPath = if(debugOutputPathAsString != null) debugOutputPathAsString!! else System.getProperty("user.home") - val debugOutput = if(flattenDebugOutput) Paths.get(debugRootPath) else buildDefaultDebugOutputPath(debugRootPath) - + val debugRootPath = + if (debugOutputPathAsString != null) debugOutputPathAsString!! else System.getProperty("user.home") + val debugOutput = + if (flattenDebugOutput) Paths.get(debugRootPath) else buildDefaultDebugOutputPath(debugRootPath) + if (!debugOutput.exists()) { Files.createDirectories(debugOutput) } @@ -141,7 +158,7 @@ object TestDebugReporter { return debugOutput } - fun buildDefaultDebugOutputPath(debugRootPath: String): Path { + private fun buildDefaultDebugOutputPath(debugRootPath: String): Path { val preamble = arrayOf(".maestro", "tests") val foldername = DateTimeFormatter.ofPattern("yyyy-MM-dd_HHmmss").format(LocalDateTime.now()) return Paths.get(debugRootPath, *preamble, foldername) @@ -150,8 +167,7 @@ object TestDebugReporter { } private data class CommandDebugWrapper( - val command: MaestroCommand, - val metadata: CommandDebugMetadata + val command: MaestroCommand, val metadata: CommandDebugMetadata ) data class CommandDebugMetadata( @@ -168,14 +184,25 @@ data class CommandDebugMetadata( } } -data class ScreenshotDebugMetadata( - val screenshot: File, - val timestamp: Long, - val status: CommandStatus +data class FlowDebugOutput( + val commands: IdentityHashMap = IdentityHashMap(), + val screenshots: MutableList = mutableListOf(), + var exception: MaestroException? = null, +) { + data class Screenshot( + val screenshot: File, + val timestamp: Long, + val status: CommandStatus, + ) +} + +data class FlowAIOutput( + @JsonProperty("flow_name") val flowName: String, + @JsonProperty("flow_file_path") val flowFile: File, + @JsonProperty("outputs") val screenOutputs: MutableList = mutableListOf(), ) -data class FlowDebugMetadata( - val commands: IdentityHashMap = IdentityHashMap(), - val screenshots: MutableList = mutableListOf(), - var exception: MaestroException? = null +data class SingleScreenFlowAIOutput( + @JsonProperty("screenshot_path") val screenshotPath: File, + val defects: List, ) diff --git a/maestro-cli/src/main/java/maestro/cli/report/TestSuiteReporter.kt b/maestro-cli/src/main/java/maestro/cli/report/TestSuiteReporter.kt index 40a7fe89f8..3f561b0f6f 100644 --- a/maestro-cli/src/main/java/maestro/cli/report/TestSuiteReporter.kt +++ b/maestro-cli/src/main/java/maestro/cli/report/TestSuiteReporter.kt @@ -5,6 +5,9 @@ import okio.Sink interface TestSuiteReporter { + /** + * Writes the report for [summary] to [out] in the format specified by the implementation. + */ fun report( summary: TestExecutionSummary, out: Sink, @@ -17,5 +20,4 @@ interface TestSuiteReporter { } } } - -} \ No newline at end of file +} diff --git a/maestro-cli/src/main/java/maestro/cli/runner/MaestroCommandRunner.kt b/maestro-cli/src/main/java/maestro/cli/runner/MaestroCommandRunner.kt index 5422410de8..12c620da81 100644 --- a/maestro-cli/src/main/java/maestro/cli/runner/MaestroCommandRunner.kt +++ b/maestro-cli/src/main/java/maestro/cli/runner/MaestroCommandRunner.kt @@ -22,9 +22,10 @@ package maestro.cli.runner import maestro.Maestro import maestro.MaestroException import maestro.cli.device.Device +import maestro.cli.report.SingleScreenFlowAIOutput import maestro.cli.report.CommandDebugMetadata -import maestro.cli.report.FlowDebugMetadata -import maestro.cli.report.ScreenshotDebugMetadata +import maestro.cli.report.FlowAIOutput +import maestro.cli.report.FlowDebugOutput import maestro.cli.runner.resultview.ResultView import maestro.cli.runner.resultview.UiState import maestro.orchestra.ApplyConfigurationCommand @@ -33,10 +34,17 @@ import maestro.orchestra.MaestroCommand import maestro.orchestra.Orchestra import maestro.orchestra.yaml.YamlCommandReader import maestro.utils.Insight +import okio.Buffer +import okio.sink import org.slf4j.LoggerFactory import java.io.File import java.util.IdentityHashMap +/** + * Knows how to run a list of Maestro commands and update the UI. + * + * Should not know what a "flow" is. + */ object MaestroCommandRunner { private val logger = LoggerFactory.getLogger(MaestroCommandRunner::class.java) @@ -46,7 +54,8 @@ object MaestroCommandRunner { device: Device?, view: ResultView, commands: List, - debug: FlowDebugMetadata + debugOutput: FlowDebugOutput, + aiOutput: FlowAIOutput, ): Boolean { val config = YamlCommandReader.getConfig(commands) val onFlowComplete = config?.onFlowComplete @@ -55,12 +64,8 @@ object MaestroCommandRunner { val commandStatuses = IdentityHashMap() val commandMetadata = IdentityHashMap() - // debug - val debugCommands = debug.commands - val debugScreenshots = debug.screenshots - fun takeDebugScreenshot(status: CommandStatus): File? { - val containsFailed = debugScreenshots.any { it.status == CommandStatus.FAILED } + val containsFailed = debugOutput.screenshots.any { it.status == CommandStatus.FAILED } // Avoids duplicate failed images from parent commands if (containsFailed && status == CommandStatus.FAILED) { @@ -68,11 +73,12 @@ object MaestroCommandRunner { } val result = kotlin.runCatching { - val out = File.createTempFile("screenshot-${System.currentTimeMillis()}", ".png") + val out = File + .createTempFile("screenshot-${System.currentTimeMillis()}", ".png") .also { it.deleteOnExit() } // save to another dir before exiting - maestro.takeScreenshot(out, false) - debugScreenshots.add( - ScreenshotDebugMetadata( + maestro.takeScreenshot(out.sink(), false) + debugOutput.screenshots.add( + FlowDebugOutput.Screenshot( screenshot = out, timestamp = System.currentTimeMillis(), status = status @@ -84,6 +90,14 @@ object MaestroCommandRunner { return result.getOrNull() } + fun writeAIscreenshot(buffer: Buffer): File { + val out = File + .createTempFile("ai-screenshot-${System.currentTimeMillis()}", ".png") + .also { it.deleteOnExit() } + out.outputStream().use { it.write(buffer.readByteArray()) } + return out + } + fun refreshUi() { view.setState( UiState.Running( @@ -110,11 +124,11 @@ object MaestroCommandRunner { refreshUi() val orchestra = Orchestra( - maestro, + maestro = maestro, onCommandStart = { _, command -> logger.info("${command.description()} RUNNING") commandStatuses[command] = CommandStatus.RUNNING - debugCommands[command] = CommandDebugMetadata( + debugOutput.commands[command] = CommandDebugMetadata( timestamp = System.currentTimeMillis(), status = CommandStatus.RUNNING ) @@ -124,17 +138,17 @@ object MaestroCommandRunner { onCommandComplete = { _, command -> logger.info("${command.description()} COMPLETED") commandStatuses[command] = CommandStatus.COMPLETED - debugCommands[command]?.let { - it.status = CommandStatus.COMPLETED - it.calculateDuration() + debugOutput.commands[command]?.apply { + status = CommandStatus.COMPLETED + calculateDuration() } refreshUi() }, onCommandFailed = { _, command, e -> - debugCommands[command]?.let { - it.status = CommandStatus.FAILED - it.calculateDuration() - it.error = e + debugOutput.commands[command]?.apply { + status = CommandStatus.FAILED + calculateDuration() + error = e } takeDebugScreenshot(CommandStatus.FAILED) @@ -142,7 +156,7 @@ object MaestroCommandRunner { if (e !is MaestroException) { throw e } else { - debug.exception = e + debugOutput.exception = e } logger.info("${command.description()} FAILED") @@ -153,16 +167,16 @@ object MaestroCommandRunner { onCommandSkipped = { _, command -> logger.info("${command.description()} SKIPPED") commandStatuses[command] = CommandStatus.SKIPPED - debugCommands[command]?.let { - it.status = CommandStatus.SKIPPED + debugOutput.commands[command]?.apply { + status = CommandStatus.SKIPPED } refreshUi() }, onCommandReset = { command -> logger.info("${command.description()} PENDING") commandStatuses[command] = CommandStatus.PENDING - debugCommands[command]?.let { - it.status = CommandStatus.PENDING + debugOutput.commands[command]?.apply { + status = CommandStatus.PENDING } refreshUi() }, @@ -171,6 +185,16 @@ object MaestroCommandRunner { commandMetadata[command] = metadata refreshUi() }, + onCommandGeneratedOutput = { command, defects, screenshot -> + logger.info("${command.description()} generated output") + val screenshotPath = writeAIscreenshot(screenshot) + aiOutput.screenOutputs.add( + SingleScreenFlowAIOutput( + screenshotPath = screenshotPath, + defects = defects, + ) + ) + } ) val flowSuccess = orchestra.runFlow(commands) diff --git a/maestro-cli/src/main/java/maestro/cli/runner/TestRunner.kt b/maestro-cli/src/main/java/maestro/cli/runner/TestRunner.kt index 81344c05b3..3a42fc7084 100644 --- a/maestro-cli/src/main/java/maestro/cli/runner/TestRunner.kt +++ b/maestro-cli/src/main/java/maestro/cli/runner/TestRunner.kt @@ -8,7 +8,8 @@ import com.github.michaelbull.result.getOr import com.github.michaelbull.result.onFailure import maestro.Maestro import maestro.cli.device.Device -import maestro.cli.report.FlowDebugMetadata +import maestro.cli.report.FlowAIOutput +import maestro.cli.report.FlowDebugOutput import maestro.cli.report.TestDebugReporter import maestro.cli.runner.resultview.AnsiResultView import maestro.cli.runner.resultview.ResultView @@ -23,10 +24,18 @@ import java.io.File import java.nio.file.Path import kotlin.concurrent.thread +/** + * Knows how to run a single Maestro flow (either one-shot or continuously). + */ object TestRunner { private val logger = LoggerFactory.getLogger(TestRunner::class.java) + /** + * Runs a single flow, one-shot style. + * + * If the flow generates artifacts, they should be placed in [debugOutputPath]. + */ fun runSingle( maestro: Maestro, device: Device?, @@ -35,28 +44,48 @@ object TestRunner { resultView: ResultView, debugOutputPath: Path ): Int { - - // debug - val debug = FlowDebugMetadata() + val debugOutput = FlowDebugOutput() + var aiOutput = FlowAIOutput( + flowName = flowFile.nameWithoutExtension, + flowFile = flowFile, + ) val result = runCatching(resultView, maestro) { val commands = YamlCommandReader.readCommands(flowFile.toPath()) .withEnv(env) + + YamlCommandReader.getConfig(commands)?.name?.let { + aiOutput = aiOutput.copy(flowName = it) + } + MaestroCommandRunner.runCommands( - maestro, - device, - resultView, - commands, - debug + maestro = maestro, + device = device, + view = resultView, + commands = commands, + debugOutput = debugOutput, + aiOutput = aiOutput, ) } - TestDebugReporter.saveFlow(flowFile.name, debug, debugOutputPath) - if (debug.exception != null) PrintUtils.err("${debug.exception?.message}") + TestDebugReporter.saveFlow( + flowName = flowFile.name, + debugOutput = debugOutput, + path = debugOutputPath, + ) + TestDebugReporter.saveSuggestions( + outputs = listOf(aiOutput), + path = debugOutputPath, + ) + + if (debugOutput.exception != null) PrintUtils.err("${debugOutput.exception?.message}") return if (result.get() == true) 0 else 1 } + /** + * Runs a single flow continuously. + */ fun runContinuous( maestro: Maestro, device: Device?, @@ -88,11 +117,16 @@ object TestRunner { runCatching(resultView, maestro) { MaestroCommandRunner.runCommands( - maestro, - device, - resultView, - commands, - FlowDebugMetadata() + maestro = maestro, + device = device, + view = resultView, + commands = commands, + debugOutput = FlowDebugOutput(), + // TODO(bartekpacia): make AI outputs work in continuous mode (see #1972) + aiOutput = FlowAIOutput( + flowName = "TODO", + flowFile = flowFile, + ), ) }.get() } diff --git a/maestro-cli/src/main/java/maestro/cli/runner/TestSuiteInteractor.kt b/maestro-cli/src/main/java/maestro/cli/runner/TestSuiteInteractor.kt index e90564c40f..ac09701e1d 100644 --- a/maestro-cli/src/main/java/maestro/cli/runner/TestSuiteInteractor.kt +++ b/maestro-cli/src/main/java/maestro/cli/runner/TestSuiteInteractor.kt @@ -6,7 +6,12 @@ import maestro.cli.CliError import maestro.cli.device.Device import maestro.cli.model.FlowStatus import maestro.cli.model.TestExecutionSummary -import maestro.cli.report.* +import maestro.cli.report.SingleScreenFlowAIOutput +import maestro.cli.report.CommandDebugMetadata +import maestro.cli.report.FlowAIOutput +import maestro.cli.report.FlowDebugOutput +import maestro.cli.report.TestDebugReporter +import maestro.cli.report.TestSuiteReporter import maestro.cli.util.PrintUtils import maestro.cli.util.TimeUtils import maestro.cli.view.ErrorViewUtils @@ -16,15 +21,19 @@ import maestro.orchestra.Orchestra import maestro.orchestra.util.Env.withEnv import maestro.orchestra.workspace.WorkspaceExecutionPlanner import maestro.orchestra.yaml.YamlCommandReader +import okio.Buffer import okio.Sink -import okio.sink import org.slf4j.LoggerFactory import java.io.File import java.nio.file.Path -import kotlin.math.roundToLong import kotlin.system.measureTimeMillis import kotlin.time.Duration.Companion.seconds +/** + * Similar to [TestRunner], but: + * * can run many flows at once + * * does not support continuous mode + */ class TestSuiteInteractor( private val maestro: Maestro, private val device: Device? = null, @@ -49,12 +58,14 @@ class TestSuiteInteractor( println() var passed = true + val aiOutputs = mutableListOf() // first run sequence of flows if present val flowSequence = executionPlan.sequence for (flow in flowSequence?.flows ?: emptyList()) { - val result = runFlow(flow.toFile(), env, maestro, debugOutputPath) + val (result, aiOutput) = runFlow(flow.toFile(), env, maestro, debugOutputPath) flowResults.add(result) + aiOutputs.add(aiOutput) if (result.status == FlowStatus.ERROR) { passed = false @@ -68,7 +79,8 @@ class TestSuiteInteractor( // proceed to run all other Flows executionPlan.flowsToRun.forEach { flow -> - val result = runFlow(flow.toFile(), env, maestro, debugOutputPath) + val (result, aiOutput) = runFlow(flow.toFile(), env, maestro, debugOutputPath) + aiOutputs.add(aiOutput) if (result.status == FlowStatus.ERROR) { passed = false @@ -115,6 +127,9 @@ class TestSuiteInteractor( ) } + // TODO(bartekpacia): Should it also be saving to debugOutputPath? + TestDebugReporter.saveSuggestions(aiOutputs, debugOutputPath) + return summary } @@ -123,18 +138,23 @@ class TestSuiteInteractor( env: Map, maestro: Maestro, debugOutputPath: Path - ): TestExecutionSummary.FlowResult { + ): Pair { + // TODO(bartekpacia): merge TestExecutionSummary with AI suggestions + // (i.e. consider them also part of the test output) + // See #1973 + var flowName: String = flowFile.nameWithoutExtension var flowStatus: FlowStatus var errorMessage: String? = null - // debug - val debug = FlowDebugMetadata() - val debugCommands = debug.commands - val debugScreenshots = debug.screenshots + val debugOutput = FlowDebugOutput() + val aiOutput = FlowAIOutput( + flowName = flowFile.nameWithoutExtension, + flowFile = flowFile, + ) fun takeDebugScreenshot(status: CommandStatus): File? { - val containsFailed = debugScreenshots.any { it.status == CommandStatus.FAILED } + val containsFailed = debugOutput.screenshots.any { it.status == CommandStatus.FAILED } // Avoids duplicate failed images from parent commands if (containsFailed && status == CommandStatus.FAILED) { @@ -145,8 +165,8 @@ class TestSuiteInteractor( val out = File.createTempFile("screenshot-${System.currentTimeMillis()}", ".png") .also { it.deleteOnExit() } // save to another dir before exiting maestro.takeScreenshot(out, false) - debugScreenshots.add( - ScreenshotDebugMetadata( + debugOutput.screenshots.add( + FlowDebugOutput.Screenshot( screenshot = out, timestamp = System.currentTimeMillis(), status = status @@ -158,33 +178,42 @@ class TestSuiteInteractor( return result.getOrNull() } + fun writeAIscreenshot(buffer: Buffer): File { + val out = File + .createTempFile("ai-screenshot-${System.currentTimeMillis()}", ".png") + .also { it.deleteOnExit() } + out.outputStream().use { it.write(buffer.readByteArray()) } + return out + } + val flowTimeMillis = measureTimeMillis { try { - val commands = YamlCommandReader.readCommands(flowFile.toPath()) + val commands = YamlCommandReader + .readCommands(flowFile.toPath()) .withEnv(env) - val config = YamlCommandReader.getConfig(commands) + YamlCommandReader.getConfig(commands)?.name?.let { flowName = it } val orchestra = Orchestra( maestro = maestro, onCommandStart = { _, command -> logger.info("${command.description()} RUNNING") - debugCommands[command] = CommandDebugMetadata( + debugOutput.commands[command] = CommandDebugMetadata( timestamp = System.currentTimeMillis(), status = CommandStatus.RUNNING ) }, onCommandComplete = { _, command -> logger.info("${command.description()} COMPLETED") - debugCommands[command]?.let { + debugOutput.commands[command]?.let { it.status = CommandStatus.COMPLETED it.calculateDuration() } }, onCommandFailed = { _, command, e -> logger.info("${command.description()} FAILED") - if (e is MaestroException) debug.exception = e - debugCommands[command]?.let { + if (e is MaestroException) debugOutput.exception = e + debugOutput.commands[command]?.let { it.status = CommandStatus.FAILED it.calculateDuration() it.error = e @@ -195,22 +224,28 @@ class TestSuiteInteractor( }, onCommandSkipped = { _, command -> logger.info("${command.description()} SKIPPED") - debugCommands[command]?.let { + debugOutput.commands[command]?.let { it.status = CommandStatus.SKIPPED } }, onCommandReset = { command -> logger.info("${command.description()} PENDING") - debugCommands[command]?.let { + debugOutput.commands[command]?.let { it.status = CommandStatus.PENDING } }, + onCommandGeneratedOutput = { command, defects, screenshot -> + logger.info("${command.description()} generated output") + val screenshotPath = writeAIscreenshot(screenshot) + aiOutput.screenOutputs.add( + SingleScreenFlowAIOutput( + screenshotPath = screenshotPath, + defects = defects, + ) + ) + } ) - config?.name?.let { - flowName = it - } - val flowSuccess = orchestra.runFlow(commands) flowStatus = if (flowSuccess) FlowStatus.SUCCESS else FlowStatus.ERROR } catch (e: Exception) { @@ -221,27 +256,35 @@ class TestSuiteInteractor( } val flowDuration = TimeUtils.durationInSeconds(flowTimeMillis) - TestDebugReporter.saveFlow(flowName, debug, debugOutputPath) + TestDebugReporter.saveFlow( + flowName = flowName, + debugOutput = debugOutput, + path = debugOutputPath, + ) + // FIXME(bartekpacia): Save AI output as well TestSuiteStatusView.showFlowCompletion( TestSuiteViewModel.FlowResult( name = flowName, status = flowStatus, duration = flowDuration, - error = debug.exception?.message, + error = debugOutput.exception?.message, ) ) - return TestExecutionSummary.FlowResult( - name = flowName, - fileName = flowFile.nameWithoutExtension, - status = flowStatus, - failure = if (flowStatus == FlowStatus.ERROR) { - TestExecutionSummary.Failure( - message = errorMessage ?: debug.exception?.message ?: "Unknown error", - ) - } else null, - duration = flowDuration, + return Pair( + first = TestExecutionSummary.FlowResult( + name = flowName, + fileName = flowFile.nameWithoutExtension, + status = flowStatus, + failure = if (flowStatus == FlowStatus.ERROR) { + TestExecutionSummary.Failure( + message = errorMessage ?: debugOutput.exception?.message ?: "Unknown error", + ) + } else null, + duration = flowDuration, + ), + second = aiOutput, ) } diff --git a/maestro-cli/src/main/java/maestro/cli/runner/resultview/AnsiResultView.kt b/maestro-cli/src/main/java/maestro/cli/runner/resultview/AnsiResultView.kt index ede4120c1a..3b7fb574ff 100644 --- a/maestro-cli/src/main/java/maestro/cli/runner/resultview/AnsiResultView.kt +++ b/maestro-cli/src/main/java/maestro/cli/runner/resultview/AnsiResultView.kt @@ -203,7 +203,7 @@ class AnsiResultView( CommandStatus.COMPLETED -> "✅" CommandStatus.FAILED -> "❌" CommandStatus.RUNNING -> "⏳" - CommandStatus.PENDING -> "\uD83D\uDD32" + CommandStatus.PENDING -> "\uD83D\uDD32 " // 🔲 CommandStatus.SKIPPED -> "⚪️" } } diff --git a/maestro-cli/src/main/java/maestro/cli/util/ResourceUtils.kt b/maestro-cli/src/main/java/maestro/cli/util/ResourceUtils.kt new file mode 100644 index 0000000000..6a37b88e6b --- /dev/null +++ b/maestro-cli/src/main/java/maestro/cli/util/ResourceUtils.kt @@ -0,0 +1,8 @@ +import kotlin.reflect.KClass + +fun readResourceAsText(cls: KClass<*>, path: String): String { + val resourceStream = cls::class.java.getResourceAsStream(path) + ?: throw IllegalStateException("Could not find $path in resources") + + return resourceStream.bufferedReader().use { it.readText() } +} diff --git a/maestro-cli/src/main/resources/ai_report.css b/maestro-cli/src/main/resources/ai_report.css new file mode 100644 index 0000000000..00262e3efe --- /dev/null +++ b/maestro-cli/src/main/resources/ai_report.css @@ -0,0 +1,37 @@ +@layer components { + body { + @apply dark:bg-gray-dark dark:text-gray-1 text-gray-dark; + } + + .screenshot-image { + @apply w-64 rounded-lg border-2 border-gray-medium dark:border-gray-1 pb-1; + } + + .screen-card { + @apply flex items-start gap-4; + } + + .defect-card { + @apply flex flex-col items-start gap-2 rounded-lg bg-[#f8f8f8] p-2 text-gray-dark dark:bg-gray-medium dark:text-gray-1; + } + + .badge { + @apply dark:text-red-500 rounded-lg bg-[#ececec] dark:bg-gray-dark p-1 font-semibold text-gray-medium dark:text-gray-1; + } + + .toggle-link { + @apply block border-2 border-gray-medium bg-[#ececec] px-3 py-4 text-gray-medium hover:bg-gray-medium hover:text-[#ececec]; + } + + .toggle-link-selected { + @apply border-orange-2; + } + + .divider { + @apply h-0.5 rounded-sm bg-gray-medium dark:bg-gray-1 my-2; + } + + .btn { + @apply hover:text-gray-medium dark:hover:text-gray-medium; + } +} diff --git a/maestro-cli/src/main/resources/tailwind.config.js b/maestro-cli/src/main/resources/tailwind.config.js new file mode 100644 index 0000000000..e88be33516 --- /dev/null +++ b/maestro-cli/src/main/resources/tailwind.config.js @@ -0,0 +1,14 @@ +tailwind.config = { + darkMode: "media", + theme: { + extend: { + colors: { + "gray-dark": "#110c22", // text-gray-dark + "gray-medium": "#4f4b5c", // text-gray-medium + "gray-1": "#f8f8f8", // surface-gray-1 + "gray-0": "#110C22", // surface-gray-0 + "orange-2": "#ff9254", // surface-orange-2 + }, + }, + }, +}; diff --git a/maestro-client/build.gradle b/maestro-client/build.gradle index f47b372d3d..479e1caeca 100644 --- a/maestro-client/build.gradle +++ b/maestro-client/build.gradle @@ -72,6 +72,7 @@ dependencies { api(libs.jackson.dataformat.xml) api(libs.apk.parser) + implementation project(':maestro-ios') implementation(libs.google.findbugs) implementation(libs.axml) diff --git a/maestro-client/src/main/java/maestro/Errors.kt b/maestro-client/src/main/java/maestro/Errors.kt index 00f2608230..0f7b6dab2b 100644 --- a/maestro-client/src/main/java/maestro/Errors.kt +++ b/maestro-client/src/main/java/maestro/Errors.kt @@ -45,6 +45,8 @@ sealed class MaestroException(override val message: String) : RuntimeException(m class UnableToTakeScreenshot(message: String) : MaestroException(message) + class AINotAvailable(message: String) : MaestroException(message) + class DestinationIsNotWritable(message: String) : MaestroException(message) class UnableToCopyTextFromElement(message: String): MaestroException(message) diff --git a/maestro-client/src/main/java/maestro/Maestro.kt b/maestro-client/src/main/java/maestro/Maestro.kt index 9f7183454f..595f7bd78c 100644 --- a/maestro-client/src/main/java/maestro/Maestro.kt +++ b/maestro-client/src/main/java/maestro/Maestro.kt @@ -29,6 +29,7 @@ import maestro.utils.SocketUtils import okio.Sink import okio.buffer import okio.sink +import okio.use import org.slf4j.LoggerFactory import java.awt.image.BufferedImage import java.io.File @@ -36,7 +37,9 @@ import java.util.* import kotlin.system.measureTimeMillis @Suppress("unused", "MemberVisibilityCanBePrivate") -class Maestro(private val driver: Driver) : AutoCloseable { +class Maestro( + private val driver: Driver, +) : AutoCloseable { private val sessionId = UUID.randomUUID() @@ -497,8 +500,9 @@ class Maestro(private val driver: Driver) : AutoCloseable { driver.close() } + @Deprecated("Use takeScreenshot(Sink, Boolean) instead") fun takeScreenshot(outFile: File, compressed: Boolean) { - LOGGER.info("Taking screenshot: $outFile") + LOGGER.info("Taking screenshot to a file: $outFile") val absoluteOutFile = outFile.absoluteFile @@ -516,7 +520,19 @@ class Maestro(private val driver: Driver) : AutoCloseable { } } + fun takeScreenshot(sink: Sink, compressed: Boolean) { + LOGGER.info("Taking screenshot") + + sink + .buffer() + .use { + ScreenshotUtils.takeScreenshot(it, compressed, driver) + } + } + fun startScreenRecording(out: Sink): ScreenRecording { + LOGGER.info("Starting screen recording") + if (screenRecordingInProgress) { LOGGER.info("Screen recording not started: Already in progress") return object : ScreenRecording { @@ -602,20 +618,14 @@ class Maestro(private val driver: Driver) : AutoCloseable { private const val SCREENSHOT_DIFF_THRESHOLD = 0.005 // 0.5% private const val ANIMATION_TIMEOUT_MS: Long = 15000 - fun ios( - driver: Driver, - openDriver: Boolean = true - ): Maestro { + fun ios(driver: Driver, openDriver: Boolean = true): Maestro { if (openDriver) { driver.open() } return Maestro(driver) } - fun android( - driver: Driver, - openDriver: Boolean = true, - ): Maestro { + fun android(driver: Driver, openDriver: Boolean = true): Maestro { if (openDriver) { driver.open() } diff --git a/maestro-orchestra-models/src/main/java/maestro/orchestra/Commands.kt b/maestro-orchestra-models/src/main/java/maestro/orchestra/Commands.kt index 0ba2d0466a..dd8dd59939 100644 --- a/maestro-orchestra-models/src/main/java/maestro/orchestra/Commands.kt +++ b/maestro-orchestra-models/src/main/java/maestro/orchestra/Commands.kt @@ -364,6 +364,37 @@ data class AssertConditionCommand( } } +data class AssertNoDefectsWithAICommand( + val optional: Boolean = true, + val label: String? = null, +) : Command { + override fun description(): String { + if (label != null) return label + + return "Assert no defects with AI" + } + + override fun evaluateScripts(jsEngine: JsEngine): Command = this +} + +data class AssertWithAICommand( + val assertion: String, + val optional: Boolean = true, + val label: String? = null, +) : Command { + override fun description(): String { + if (label != null) return label + + return "Assert with AI: $assertion" + } + + override fun evaluateScripts(jsEngine: JsEngine): Command { + return copy( + assertion = assertion.evaluateScripts(jsEngine), + ) + } +} + data class InputTextCommand( val text: String, val label: String? = null, diff --git a/maestro-orchestra-models/src/main/java/maestro/orchestra/MaestroCommand.kt b/maestro-orchestra-models/src/main/java/maestro/orchestra/MaestroCommand.kt index 3d4fc439d4..41715213a3 100644 --- a/maestro-orchestra-models/src/main/java/maestro/orchestra/MaestroCommand.kt +++ b/maestro-orchestra-models/src/main/java/maestro/orchestra/MaestroCommand.kt @@ -36,6 +36,8 @@ data class MaestroCommand( val backPressCommand: BackPressCommand? = null, @Deprecated("Use assertConditionCommand") val assertCommand: AssertCommand? = null, val assertConditionCommand: AssertConditionCommand? = null, + val assertNoDefectsWithAICommand: AssertNoDefectsWithAICommand? = null, + val assertWithAICommand: AssertWithAICommand? = null, val inputTextCommand: InputTextCommand? = null, val inputRandomTextCommand: InputRandomCommand? = null, val launchAppCommand: LaunchAppCommand? = null, @@ -76,6 +78,8 @@ data class MaestroCommand( backPressCommand = command as? BackPressCommand, assertCommand = command as? AssertCommand, assertConditionCommand = command as? AssertConditionCommand, + assertNoDefectsWithAICommand = command as? AssertNoDefectsWithAICommand, + assertWithAICommand = command as? AssertWithAICommand, inputTextCommand = command as? InputTextCommand, inputRandomTextCommand = command as? InputRandomCommand, launchAppCommand = command as? LaunchAppCommand, @@ -116,6 +120,8 @@ data class MaestroCommand( backPressCommand != null -> backPressCommand assertCommand != null -> assertCommand assertConditionCommand != null -> assertConditionCommand + assertNoDefectsWithAICommand != null -> assertNoDefectsWithAICommand + assertWithAICommand != null -> assertWithAICommand inputTextCommand != null -> inputTextCommand inputRandomTextCommand != null -> inputRandomTextCommand launchAppCommand != null -> launchAppCommand diff --git a/maestro-orchestra-models/src/main/java/maestro/orchestra/WorkspaceConfig.kt b/maestro-orchestra-models/src/main/java/maestro/orchestra/WorkspaceConfig.kt index cea222648e..30bf3f7011 100644 --- a/maestro-orchestra-models/src/main/java/maestro/orchestra/WorkspaceConfig.kt +++ b/maestro-orchestra-models/src/main/java/maestro/orchestra/WorkspaceConfig.kt @@ -38,9 +38,6 @@ data class WorkspaceConfig( add(string) } } - } - } - -} \ No newline at end of file +} diff --git a/maestro-orchestra/build.gradle b/maestro-orchestra/build.gradle index dda1754617..6acfa163a7 100644 --- a/maestro-orchestra/build.gradle +++ b/maestro-orchestra/build.gradle @@ -4,18 +4,21 @@ import org.jetbrains.kotlin.gradle.tasks.KotlinJvmCompile plugins { id("maven-publish") alias(libs.plugins.kotlin.jvm) + alias(libs.plugins.kotlin.serialization) alias(libs.plugins.mavenPublish) } dependencies { api(project(":maestro-orchestra-models")) implementation(project(":maestro-client")) + api(project(":maestro-ai")) api(project(":maestro-utils")) api(libs.square.okio) api(libs.jackson.core.databind) api(libs.jackson.module.kotlin) api(libs.jackson.dataformat.yaml) + implementation(libs.kotlinx.coroutines.core) testImplementation(libs.junit.jupiter.api) testImplementation(libs.junit.jupiter.params) diff --git a/maestro-orchestra/src/main/java/maestro/orchestra/Orchestra.kt b/maestro-orchestra/src/main/java/maestro/orchestra/Orchestra.kt index 3f5a8b0c4c..dbd96fcd66 100644 --- a/maestro-orchestra/src/main/java/maestro/orchestra/Orchestra.kt +++ b/maestro-orchestra/src/main/java/maestro/orchestra/Orchestra.kt @@ -19,15 +19,15 @@ package maestro.orchestra -import maestro.DeviceInfo -import maestro.ElementFilter -import maestro.Filters +import kotlinx.coroutines.runBlocking +import maestro.* import maestro.Filters.asFilter -import maestro.FindElementResult -import maestro.Maestro -import maestro.MaestroException -import maestro.ScreenRecording -import maestro.ViewHierarchy +import maestro.ai.AI +import maestro.ai.AI.Companion.AI_KEY_ENV_VAR +import maestro.ai.Defect +import maestro.ai.Prediction +import maestro.ai.antrophic.Claude +import maestro.ai.openai.OpenAI import maestro.js.GraalJsEngine import maestro.js.JsEngine import maestro.js.RhinoJsEngine @@ -37,20 +37,42 @@ import maestro.orchestra.filter.TraitFilters import maestro.orchestra.geo.Traveller import maestro.orchestra.util.Env.evaluateScripts import maestro.orchestra.yaml.YamlCommandReader -import maestro.toSwipeDirection import maestro.utils.Insight import maestro.utils.Insights import maestro.utils.MaestroTimer import maestro.utils.StringUtils.toRegexSafe import okhttp3.OkHttpClient +import okio.Buffer +import okio.Sink import okio.buffer import okio.sink import java.io.File import java.lang.Long.max +// TODO(bartkepacia): Use this in onCommandGeneratedOutput. +// Caveat: +// Large files should not be held in memory, instead they should be directly written to a Buffer +// that is streamed to disk. +// Idea: +// Orchestra should expose a callback like "onResourceRequested: (Command, CommandOutputType)" +sealed class CommandOutput { + data class Screenshot(val screenshot: Buffer) : CommandOutput() + data class ScreenRecording(val screenRecording: Buffer) : CommandOutput() + data class AIDefects(val defects: List, val screenshot: Buffer) : CommandOutput() +} + +/** + * Orchestra translates high-level Maestro commands into method calls on the [Maestro] object. + * It's the glue between the CLI and platform-specific [Driver]s (encapsulated in the [Maestro] object). + * It's one of the core classes in this codebase. + * + * Orchestra should not know about: + * - Specific platforms where tests can be executed, such as Android, iOS, or the web. + * - File systems. It should instead write to [Sink]s that it requests from the caller. + */ class Orchestra( private val maestro: Maestro, - private val screenshotsDir: File? = null, + private val screenshotsDir: File? = null, // TODO(bartekpacia): Orchestra shouldn't interact with files directly. private val lookupTimeoutMs: Long = 17000L, private val optionalLookupTimeoutMs: Long = 7000L, private val httpClient: OkHttpClient? = null, @@ -61,10 +83,13 @@ class Orchestra( private val onCommandSkipped: (Int, MaestroCommand) -> Unit = { _, _ -> }, private val onCommandReset: (MaestroCommand) -> Unit = {}, private val onCommandMetadataUpdate: (MaestroCommand, CommandMetadata) -> Unit = { _, _ -> }, + private val onCommandGeneratedOutput: (command: Command, defects: List, screenshot: Buffer) -> Unit = { _, _, _ -> }, ) { private lateinit var jsEngine: JsEngine + private val ai: AI? = initAI() + private var copiedText: String? = null private var timeMsOfLastInteraction = System.currentTimeMillis() @@ -197,6 +222,19 @@ class Orchestra( } } + private fun initAI(): AI? { + val apiKey = System.getenv(AI_KEY_ENV_VAR) ?: return null + val modelName: String? = System.getenv(AI.AI_MODEL_ENV_VAR) + + return if (modelName == null) OpenAI(apiKey = apiKey) + else if (modelName.startsWith("gpt-")) OpenAI(apiKey = apiKey, defaultModel = modelName) + else if (modelName.startsWith("claude-")) Claude(apiKey = apiKey, defaultModel = modelName) + else throw IllegalStateException("Unsupported AI model: $modelName") + } + + /** + * Returns true if the command mutated device state (i.e. interacted with the device), false otherwise. + */ private fun executeCommand(maestroCommand: MaestroCommand, config: MaestroConfig?): Boolean { val command = maestroCommand.asCommand() @@ -221,6 +259,8 @@ class Orchestra( is SwipeCommand -> swipeCommand(command) is AssertCommand -> assertCommand(command) is AssertConditionCommand -> assertConditionCommand(command) + is AssertNoDefectsWithAICommand -> assertNoDefectsWithAICommand(command) + is AssertWithAICommand -> assertWithAICommand(command) is InputTextCommand -> inputTextCommand(command) is InputRandomCommand -> inputTextRandomCommand(command) is LaunchAppCommand -> launchAppCommand(command) @@ -288,8 +328,8 @@ class Orchestra( if (!evaluateCondition(command.condition, timeoutMs = timeout)) { if (!isOptional(command.condition)) { throw MaestroException.AssertionFailure( - "Assertion is false: ${command.condition.description()}", - maestro.viewHierarchy().root, + message = "Assertion is false: ${command.condition.description()}", + hierarchyRoot = maestro.viewHierarchy().root, ) } else { throw CommandSkipped @@ -299,6 +339,69 @@ class Orchestra( return false } + private fun assertNoDefectsWithAICommand(command: AssertNoDefectsWithAICommand): Boolean = runBlocking { + // TODO(bartekpacia): make all of Orchestra suspending + + if (ai == null) { + throw MaestroException.AINotAvailable("AI client is not available. Did you export $AI_KEY_ENV_VAR?") + } + + val imageData = Buffer() + maestro.takeScreenshot(imageData, compressed = false) + + val defects = Prediction.findDefects( + aiClient = ai, + screen = imageData.copy().readByteArray(), + ) + + if (defects.isNotEmpty()) { + onCommandGeneratedOutput(command, defects, imageData) + + if (command.optional) throw CommandSkipped + + val word = if (defects.size == 1) "defect" else "defects" + throw MaestroException.AssertionFailure( + "Found ${defects.size} possible $word. See the report after the test completes to learn more.", + maestro.viewHierarchy().root, + ) + } + + false + } + + private fun assertWithAICommand(command: AssertWithAICommand): Boolean = runBlocking { + // TODO(bartekpacia): make all of Orchestra suspending + + if (ai == null) { + throw MaestroException.AINotAvailable("AI client is not available. Did you export $AI_KEY_ENV_VAR?") + } + + val imageData = Buffer() + maestro.takeScreenshot(imageData, compressed = false) + + val defect = Prediction.performAssertion( + aiClient = ai, + screen = imageData.copy().readByteArray(), + assertion = command.assertion, + ) + + if (defect != null) { + onCommandGeneratedOutput(command, listOf(defect), imageData) + + if (command.optional) throw CommandSkipped + + throw MaestroException.AssertionFailure( + message = """ + |Assertion is false: ${command.assertion} + |Reasoning: ${defect.reasoning} + """.trimMargin(), + hierarchyRoot = maestro.viewHierarchy().root, + ) + } + + false + } + private fun isOptional(condition: Condition): Boolean { return condition.visible?.optional == true || condition.notVisible?.optional == true diff --git a/maestro-orchestra/src/main/java/maestro/orchestra/yaml/YamlAssertNoDefectsWithAI.kt b/maestro-orchestra/src/main/java/maestro/orchestra/yaml/YamlAssertNoDefectsWithAI.kt new file mode 100644 index 0000000000..992aafc063 --- /dev/null +++ b/maestro-orchestra/src/main/java/maestro/orchestra/yaml/YamlAssertNoDefectsWithAI.kt @@ -0,0 +1,6 @@ +package maestro.orchestra.yaml + +data class YamlAssertNoDefectsWithAI( + val optional: Boolean = true, + val label: String? = null, +) diff --git a/maestro-orchestra/src/main/java/maestro/orchestra/yaml/YamlAssertWithAI.kt b/maestro-orchestra/src/main/java/maestro/orchestra/yaml/YamlAssertWithAI.kt new file mode 100644 index 0000000000..52d5e183d0 --- /dev/null +++ b/maestro-orchestra/src/main/java/maestro/orchestra/yaml/YamlAssertWithAI.kt @@ -0,0 +1,22 @@ +package maestro.orchestra.yaml + +import com.fasterxml.jackson.annotation.JsonCreator + +data class YamlAssertWithAI( + val assertion: String, + val optional: Boolean = true, + val label: String? = null, +) { + + companion object { + + @JvmStatic + @JsonCreator(mode = JsonCreator.Mode.DELEGATING) + fun parse(assertion: String): YamlAssertWithAI { + return YamlAssertWithAI( + assertion = assertion, + optional = true, + ) + } + } +} diff --git a/maestro-orchestra/src/main/java/maestro/orchestra/yaml/YamlFluentCommand.kt b/maestro-orchestra/src/main/java/maestro/orchestra/yaml/YamlFluentCommand.kt index 4362043416..1fa604d8f0 100644 --- a/maestro-orchestra/src/main/java/maestro/orchestra/yaml/YamlFluentCommand.kt +++ b/maestro-orchestra/src/main/java/maestro/orchestra/yaml/YamlFluentCommand.kt @@ -41,6 +41,8 @@ data class YamlFluentCommand( val assertVisible: YamlElementSelectorUnion? = null, val assertNotVisible: YamlElementSelectorUnion? = null, val assertTrue: YamlAssertTrue? = null, + val assertNoDefectsWithAI: YamlAssertNoDefectsWithAI? = null, + val assertWithAI: YamlAssertWithAI? = null, val back: YamlActionBack? = null, val clearKeychain: YamlActionClearKeychain? = null, val hideKeyboard: YamlActionHideKeyboard? = null, @@ -115,6 +117,23 @@ data class YamlFluentCommand( ) ) ) + assertNoDefectsWithAI != null -> listOf( + MaestroCommand( + AssertNoDefectsWithAICommand( + optional = assertNoDefectsWithAI.optional, + label = assertNoDefectsWithAI.label, + ) + ) + ) + assertWithAI != null -> listOf( + MaestroCommand( + AssertWithAICommand( + assertion = assertWithAI.assertion, + optional = assertWithAI.optional, + label = assertWithAI.label, + ) + ) + ) addMedia != null -> listOf( MaestroCommand( addMediaCommand = addMediaCommand(addMedia, flowPath) @@ -688,6 +707,10 @@ data class YamlFluentCommand( toggleAirplaneMode = YamlToggleAirplaneMode() ) + "assertNoDefectsWithAI" -> YamlFluentCommand( + assertNoDefectsWithAI = YamlAssertNoDefectsWithAI() + ) + else -> throw SyntaxError("Invalid command: \"$stringCommand\"") } } diff --git a/settings.gradle.kts b/settings.gradle.kts index 280155f3c5..bb71ed2652 100644 --- a/settings.gradle.kts +++ b/settings.gradle.kts @@ -28,5 +28,6 @@ include("maestro-proto") include("maestro-studio:server") include("maestro-studio:web") include("maestro-test") +include("maestro-ai") //include("examples:samples") //findProject(":examples:samples")?.name = "samples"