feat(analyze): Adds initial Analyze option to test command locally

mobile-dev-inc · luistak · Jan 24, 2025 · Jan 13, 2025 · Jan 14, 2025 · Jan 15, 2025
commit adb27689f37dc5b3ef21a2a6fcc489d4f72d3300
diff --git a/maestro-ai/src/main/java/maestro/ai/Prediction.kt b/maestro-ai/src/main/java/maestro/ai/Prediction.kt
@@ -4,6 +4,13 @@ import kotlinx.serialization.Serializable
 import kotlinx.serialization.json.Json
 import kotlinx.serialization.json.jsonObject
 import maestro.ai.openai.OpenAI
+import java.nio.file.Path
+
+data class FlowFiles(
+    val jsonFiles: List<Pair<ByteArray, Path>>,
+    val imageFiles: List<Pair<ByteArray, Path>>,
+    val textFiles: List<Pair<ByteArray, Path>>
+)
 
 @Serializable
 data class Defect(
@@ -21,12 +28,27 @@ private data class ExtractTextResponse(
     val text: String?
 )
 
+@Serializable
+data class Insight(
+    val category: String,
+    val reasoning: String,
+)
+
+@Serializable
+private data class AskForInsightsResponse(
+    val insights: List<Insight>,
+)
+
 object Prediction {
 
     private val askForDefectsSchema by lazy {
         readSchema("askForDefects")
     }
 
+    private val askForInsightsSchema by lazy {
+        readSchema("askForInsights")
+    }
+
     private val extractTextSchema by lazy {
         readSchema("extractText")
     }
@@ -51,6 +73,12 @@ object Prediction {
         "layout" to "Some UI elements are overlapping or are cropped",
     )
 
+    private val insightsCategories = listOf(
+        "visual" to "Insights related to UI elements that are overlapping or cropped",
+        "text" to " insights on Grammar and spelling, like suggestions and optimizations on the text on the page, or Inconsistent use of language, for example, mixed English and Portuguese",
+        "maestro" to "Insights on the maestro testing tool usage, best practices, tips on debugging, optimizing the workspace, or how to make the best usage of maestro commands, APIs, or extra features like maestro studio, or Robin cloud",
+    )
+
     private val allDefectCategories = defectCategories + listOf("assertion" to "The assertion is not true")
 
     suspend fun findDefects(
@@ -144,6 +172,101 @@ object Prediction {
         return defects.defects
     }
 
+    suspend fun generateInsights(
+        aiClient: AI,
+        flowFiles: List<FlowFiles>,
+        printRawResponse: Boolean = false,
+    ): List<Insight> {
+        val prompt = buildString {
+            appendLine(
+                """
+                You are a QA engineer performing quality assurance for a mobile application.
+                Identify any defects in the provided screenshots and optionally 
+
+                You are using Maestro for e2e mobile testing, understand the tool API and best practices on how to use it based on its Docs
+                You are given screenshots of the application and the JSON and text files artifacts from the debug artifacts of maestro e2e testing tool.
+
+                Given the following maestro flows
+                """.trimIndent()
+            )
+
+            flowFiles.forEach {
+                appendLine(
+                    """
+                    You are going to transcribe the screenshots and analyze every file below:
+                    ${if (it.jsonFiles.isNotEmpty()) "Based on this JSON files: ${it.jsonFiles.joinToString("\n",transform = { (content) -> String(content) })}\n" else ""}
+                    ${if (it.textFiles.isNotEmpty()) "Based on this files: ${it.textFiles.joinToString("\n",transform = { (content) -> String(content) })}\n" else ""}
+                    """.trimIndent()
+                )
+            }
+
+            append(
+                """
+                |
+                |RULES:
+                |
+                |Your task is to generate Insights following the RULES:
+                |* You must explain understand each context based on the provided data analyzsing each flow.
+                |*
+                |* All Insights you find must belong to one of the following categories:
+                |${insightsCategories.joinToString(separator = "\n") { "  * ${it.first}: ${it.second}" }}
+                |* If you see Insights, your response MUST only include defect name and detailed reasoning for each defect.
+                |* Provide response as a list of JSON objects, each representing <category>:<reasoning>
+                |* Do not repeat the context text into the insights, make it useful for the QA developer reading the insights.
+                |* Do not generate duplicated or similar insights just changing the category.
+                |* Do not generate spam insights that are too obvious based on the screenshot.
+                |* Do not raise false positives. Some example responses that have a high chance of being a false positive:
+                |  * button is partially cropped at the bottom
+                |  * button is not aligned horizontally/vertically within its container
+                |  * element not found because it does not exist on the current screen
+                |  * ensure that the app is in the correct state before looking for the text
+                """.trimMargin("|")
+            )
+
+            append(
+                """
+                |
+                |* You must provide result as a valid JSON object, matching this structure:
+                |
+                |  {
+                |      "insights": [
+                |          {
+                |              "category": "<defect category, string>",
+                |              "reasoning": "<reasoning, string>"
+                |          },
+                |          {
+                |              "category": "<defect category, string>",
+                |              "reasoning": "<reasoning, string>"
+                |          }
+                |       ]
+                |  }
+                |
+                |DO NOT output any other information in the JSON object.
+                """.trimMargin("|")
+            )
+        }
+
+        val aiResponse = aiClient.chatCompletion(
+            prompt,
+            model = aiClient.defaultModel,
+            maxTokens = 4096,
+            identifier = "find-defects",
+            imageDetail = "high",
+            images = flowFiles.flatMap { it.imageFiles.map { (content) -> content } },
+            jsonSchema = if (aiClient is OpenAI) json.parseToJsonElement(askForInsightsSchema).jsonObject else null,
+        )
+
+        if (printRawResponse) {
+            println("--- RAW RESPONSE START ---")
+            println(aiResponse.response)
+            println("--- RAW RESPONSE END ---")
+        }
+
+        val insights = json.decodeFromString<AskForInsightsResponse>(aiResponse.response)
+
+        return insights.insights;
+    }
+
     suspend fun performAssertion(
         aiClient: AI,
         screen: ByteArray,
@@ -270,4 +393,4 @@ object Prediction {
         return response.text ?: ""
     }
 
-}
+}
diff --git a/maestro-ai/src/main/resources/askForInsights_schema.json b/maestro-ai/src/main/resources/askForInsights_schema.json
@@ -0,0 +1,32 @@
+{
+  "name": "askForInsights",
+  "description": "Returns a list of possible insights found in the mobile app's UI",
+  "strict": true,
+  "schema": {
+    "type": "object",
+    "required": ["insights"],
+    "additionalProperties": false,
+    "properties": {
+      "insights": {
+        "type": "array",
+        "items": {
+          "type": "object",
+          "required": ["category", "reasoning"],
+          "additionalProperties": false,
+          "properties": {
+            "category": {
+              "type": "string",
+              "enum": [
+                "layout",
+                "localization"
+              ]
+            },
+            "reasoning": {
+              "type": "string"
+            }
+          }
+        }
+      }
+    }
+  }
+}
diff --git a/maestro-cli/src/main/java/maestro/cli/command/TestCommand.kt b/maestro-cli/src/main/java/maestro/cli/command/TestCommand.kt
@@ -29,6 +29,8 @@ import maestro.cli.App
 import maestro.cli.CliError
 import maestro.cli.DisableAnsiMixin
 import maestro.cli.ShowHelpMixin
+import maestro.cli.api.ApiClient
+import maestro.cli.auth.Auth
 import maestro.cli.device.Device
 import maestro.cli.device.DeviceService
 import maestro.cli.model.TestExecutionSummary
@@ -43,6 +45,7 @@ import maestro.cli.session.MaestroSessionManager
 import maestro.cli.util.EnvUtils
 import maestro.cli.util.FileUtils.isWebFlow
 import maestro.cli.util.PrintUtils
+import maestro.cli.util.TestAnalysisReporter
 import maestro.cli.view.box
 import maestro.orchestra.error.ValidationError
 import maestro.orchestra.util.Env.withDefaultEnvVars
@@ -56,6 +59,7 @@ import org.slf4j.LoggerFactory
 import picocli.CommandLine
 import picocli.CommandLine.Option
 import java.io.File
+import java.nio.file.Files
 import java.nio.file.Path
 import java.util.concurrent.Callable
 import java.util.concurrent.ConcurrentHashMap
@@ -158,11 +162,20 @@ class TestCommand : Callable<Int> {
     )
     private var headless: Boolean = false
 
+    @Option(
+        names = ["--analyze"],
+        description = ["[Beta] Enhance the test output analysis with AI Insights"],
+    )
+    private var analyze: Boolean = false
+
     @CommandLine.Spec
     lateinit var commandSpec: CommandLine.Model.CommandSpec
 
     private val usedPorts = ConcurrentHashMap<Int, Boolean>()
     private val logger = LoggerFactory.getLogger(TestCommand::class.java)
+    private val auth by lazy {
+        Auth(ApiClient("https://api.copilot.mobile.dev/v2"))
+    }
 
     private fun isWebFlow(): Boolean {
         if (flowFiles.isSingleFile) {
@@ -172,6 +185,7 @@ class TestCommand : Callable<Int> {
         return false
     }
 
+
     override fun call(): Int {
         TestDebugReporter.install(
             debugOutputPathAsString = debugOutput,
@@ -193,6 +207,19 @@ class TestCommand : Callable<Int> {
             throw CliError("The config file ${configFile?.absolutePath} does not exist.")
         }
 
+        // TODO: Integrate with `maestro login`
+        //        if (analyze) {
+        //            if (auth.getCachedAuthToken() == null) {
+        //                throw CliError(listOf(
+        //                    "❌ Login Required\n",
+        //                    "You need to sign in before using the --analyze option.",
+        //                    "Please run:",
+        //                    "`maestro login`\n",
+        //                    "After signing in, try running your command again."
+        //                ).joinToString("\n").box())
+        //            }
+        //        }
+
         val executionPlan = try {
             WorkspaceExecutionPlanner.plan(
                 input = flowFiles.map { it.toPath().toAbsolutePath() }.toSet(),
@@ -290,6 +317,7 @@ class TestCommand : Callable<Int> {
         suites.mergeSummaries()?.saveReport()
 
         if (effectiveShards > 1) printShardsMessage(passed, total, suites)
+        if (analyze) TestAnalysisReporter().runAnalysis(debugOutputPath)
         if (passed == total) 0 else 1
     }
 
@@ -333,7 +361,7 @@ class TestCommand : Callable<Int> {
                     if (!flattenDebugOutput) {
                         TestDebugReporter.deleteOldFiles()
                     }
-                    TestRunner.runContinuous(maestro, device, flowFile, env)
+                    TestRunner.runContinuous(maestro, device, flowFile, env, analyze)
                 } else {
                     runSingleFlow(maestro, device, flowFile, debugOutputPath)
                 }
@@ -371,6 +399,7 @@ class TestCommand : Callable<Int> {
             env = env,
             resultView = resultView,
             debugOutputPath = debugOutputPath,
+            analyze = analyze
         )
 
         if (resultSingle == 1) {