smileidentity · tobitech · Aug 26, 2024 · Jul 19, 2024 · Jul 22, 2024 · Jul 24, 2024
@@ -32,7 +32,7 @@ SPEC CHECKSUMS:
   lottie-ios: fcb5e73e17ba4c983140b7d21095c834b3087418
   netfox: 9d5cc727fe7576c4c7688a2504618a156b7d44b7
   Sentry: 96ae1dcdf01a644bc3a3b1dc279cecaf48a833fb
-  SmileID: 500429946fbb916221450c9f792fa94ee1060955
+  SmileID: a76ed6f7b5a5a157e8c8d14e779f1b7c6322f112
   SwiftLint: 3fe909719babe5537c552ee8181c0031392be933
   ZIPFoundation: b8c29ea7ae353b309bc810586181fd073cb3312c
 

@@ -0,0 +1,17 @@
+import CoreGraphics
+import CoreImage
+import VideoToolbox
+
+extension CGImage {
+  /**
+    Creates a new CGImage from a CVPixelBuffer.
+
+    - Note: Not all CVPixelBuffer pixel formats support conversion into a
+            CGImage-compatible pixel format.
+  */
+  public static func create(pixelBuffer: CVPixelBuffer) -> CGImage? {
+    var cgImage: CGImage?
+    VTCreateCGImageFromCVPixelBuffer(pixelBuffer, options: nil, imageOut: &cgImage)
+    return cgImage
+  }
+}
@@ -0,0 +1,117 @@
+import CoreML
+import Vision
+
+/// An enum representing possible errors during image classification
+enum ImageClassifierError: Error {
+    case preprocessingFailed
+    case classificationFailed
+    case invalidOutputFormat
+    case imageConversionFailed
+    case faceCroppingFailed
+}
+
+/// A structure representing the image quality check result
+struct ImageQualityResult {
+    let passed: Bool
+    let confidence: Float
+
+    var description: String {
+        return passed ? "Passed" : "Failed"
+    }
+}
+
+/// A class that performs image classification to determine selfie quality using a Core ML Model
+class ModelImageClassifier {
+    let cropSize = (width: 120, height: 120)
+
+    init() {}
+
+    /// Classifies an image using the Core ML Model
+    /// - Parameter image: The input image as a UIImage
+    /// - Returns: A result containing classifiction confidence.
+    func classify(imageBuffer: CVPixelBuffer) async throws -> ImageQualityResult {
+        do {
+            guard let image = UIImage(pixelBuffer: imageBuffer) else {
+                throw ImageClassifierError.preprocessingFailed
+            }
+            let croppedImage = try await cropToFace(image: image)
+            guard let convertedImage = croppedImage.pixelBuffer(width: cropSize.width, height: cropSize.height) else {
+                throw ImageClassifierError.preprocessingFailed
+            }
+            return try performClassification(imageBuffer: convertedImage)
+        } catch {
+            throw error
+        }
+    }
+
+    /// Crops the input image to the region of the first face in the image.
+    /// - Parameter image: The original input image that should have a face.
+    /// - Returns: A cropped image of the detected face in the input image.
+    private func cropToFace(image: UIImage) async throws -> UIImage {
+        guard let cgImage = image.cgImage else {
+            throw ImageClassifierError.faceCroppingFailed
+        }
+
+        let request = VNDetectFaceRectanglesRequest()
+        let handler = VNImageRequestHandler(cgImage: cgImage, options: [:])
+
+        try handler.perform([request])
+
+        guard let results = request.results,
+              let face = results.first else {
+            throw ImageClassifierError.faceCroppingFailed
+        }
+
+        let boundingBox = face.boundingBox
+
+        let size = CGSize(
+            width: boundingBox.width * image.size.width,
+            height: boundingBox.height * image.size.height
+        )
+        let origin = CGPoint(
+            x: boundingBox.minX * image.size.width,
+            y: (1 - boundingBox.minY) * image.size.height - size.height
+        )
+
+        let faceRect = CGRect(origin: origin, size: size)
+
+        guard let croppedImage = cgImage.cropping(to: faceRect) else {
+            throw ImageClassifierError.faceCroppingFailed
+        }
+
+        return UIImage(cgImage: croppedImage)
+    }
+
+    /// Performs the actual classification using the `ImageQualityCP20` model
+    /// - Parameter mlMultiArray: The processed input image as a MultiArray
+    /// - Returns: The ImageQualityResult
+    private func performClassification(imageBuffer: CVPixelBuffer) throws -> ImageQualityResult {
+        let modelConfiguration = MLModelConfiguration()
+        let model = try SelfieQualityDetector(configuration: modelConfiguration)
+
+        let input = SelfieQualityDetectorInput(conv2d_193_input: imageBuffer)
+
+        let prediction = try model.prediction(input: input)
+        let output = prediction.Identity
+        return try processModelOuput(output)
+    }
+
+    /// Processes the model's output to determine the final classification
+    /// - Parameter output: The MLMultiArray output from the model
+    /// - Returns: The ImageQualityResult
+    private func processModelOuput(_ output: MLMultiArray) throws -> ImageQualityResult {
+        guard output.shape.count == 2,
+              output.shape[0] == 1,
+              output.shape[1] == 2 else {
+            throw ImageClassifierError.invalidOutputFormat
+        }
+
+        let failScore = output[0].floatValue
+        let passScore = output[1].floatValue
+
+        let passed = passScore > failScore
+        let confidence = passed ? passScore : failScore
+
+        return ImageQualityResult(passed: passed, confidence: confidence)
+    }
+}
@@ -0,0 +1,64 @@
+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Float32",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float32",
+        "formattedType" : "MultiArray (Float32)",
+        "shortDescription" : "",
+        "shape" : "[]",
+        "name" : "Identity",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+
+    ],
+    "specificationVersion" : 4,
+    "computePrecision" : "Float16",
+    "isUpdatable" : "0",
+    "availability" : {
+      "macOS" : "10.15",
+      "tvOS" : "13.0",
+      "visionOS" : "1.0",
+      "watchOS" : "6.0",
+      "iOS" : "13.0",
+      "macCatalyst" : "13.0"
+    },
+    "neuralNetworkLayerTypeHistogram" : {
+      "ActivationReLU" : 5,
+      "ReshapeStatic" : 1,
+      "Transpose" : 1,
+      "SoftmaxND" : 1,
+      "Convolution" : 5,
+      "InnerProduct" : 2,
+      "PoolingMax" : 3
+    },
+    "modelType" : {
+      "name" : "MLModelType_neuralNetwork"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.version" : "7.2",
+      "com.github.apple.coremltools.source" : "tensorflow==2.15.0"
+    },
+    "generatedClassName" : "SelfieQualityDetector",
+    "inputSchema" : [
+      {
+        "height" : "120",
+        "colorspace" : "RGB",
+        "isOptional" : "0",
+        "width" : "120",
+        "isColor" : "1",
+        "formattedType" : "Image (Color 120 × 120)",
+        "hasSizeFlexibility" : "0",
+        "type" : "Image",
+        "shortDescription" : "",
+        "name" : "conv2d_193_input"
+      }
+    ],
+    "method" : "predict"
+  }
+]