From 43c8f8bee6611a90bbe466a803e81de1af62ebb0 Mon Sep 17 00:00:00 2001 From: john-rocky Date: Tue, 21 May 2024 11:55:29 +0900 Subject: [PATCH 01/16] Add human feature. --- YOLO.xcodeproj/project.pbxproj | 4 + YOLO/Main.storyboard | 17 +- YOLO/Utilities/BoundingBoxView.swift | 36 ++- YOLO/Utilities/PostProcessing.swift | 137 ++++++++ YOLO/ViewController.swift | 464 +++++++++++++++++---------- 5 files changed, 474 insertions(+), 184 deletions(-) create mode 100644 YOLO/Utilities/PostProcessing.swift diff --git a/YOLO.xcodeproj/project.pbxproj b/YOLO.xcodeproj/project.pbxproj index ddcf965..19a6758 100644 --- a/YOLO.xcodeproj/project.pbxproj +++ b/YOLO.xcodeproj/project.pbxproj @@ -21,6 +21,7 @@ 63CF371F2514455300E2DEA1 /* LaunchScreen.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 6323C44D22186177008AE681 /* LaunchScreen.storyboard */; }; 63CF37202514455300E2DEA1 /* Main.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 6323C44F22186177008AE681 /* Main.storyboard */; }; 63CF37212514455300E2DEA1 /* ultralytics_yolo_logotype.png in Resources */ = {isa = PBXBuildFile; fileRef = 6323C45122186177008AE681 /* ultralytics_yolo_logotype.png */; }; + 730E72CD2BFC43BF000E1F45 /* PostProcessing.swift in Sources */ = {isa = PBXBuildFile; fileRef = 730E72CC2BFC43BF000E1F45 /* PostProcessing.swift */; }; 8EDAA33950796844333D60A7 /* BoundingBoxView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8EDAA633C1F2B50286D16008 /* BoundingBoxView.swift */; }; /* End PBXBuildFile section */ @@ -41,6 +42,7 @@ 6381D2162B7817C200ABA4E8 /* yolov8m.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = yolov8m.mlpackage; sourceTree = ""; }; 6381D2172B7817C200ABA4E8 /* yolov8n.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = yolov8n.mlpackage; sourceTree = ""; }; 63B8B0A821E62A890026FBC3 /* .gitignore */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = .gitignore; sourceTree = ""; }; + 730E72CC2BFC43BF000E1F45 /* PostProcessing.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = PostProcessing.swift; sourceTree = ""; }; 7BCB411721C3096100BFC4D0 /* YOLO.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = YOLO.app; sourceTree = BUILT_PRODUCTS_DIR; }; 8EDAA633C1F2B50286D16008 /* BoundingBoxView.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = BoundingBoxView.swift; sourceTree = ""; }; 8EDAAA4507D2D23D7FAB827F /* README.md */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = net.daringfireball.markdown; path = README.md; sourceTree = ""; }; @@ -60,6 +62,7 @@ 636166E72514438D0054FA7E /* Utilities */ = { isa = PBXGroup; children = ( + 730E72CC2BFC43BF000E1F45 /* PostProcessing.swift */, 636166E9251443B20054FA7E /* ThresholdProvider.swift */, 8EDAA633C1F2B50286D16008 /* BoundingBoxView.swift */, ); @@ -212,6 +215,7 @@ files = ( 6381D21B2B7817C200ABA4E8 /* yolov8m.mlpackage in Sources */, 6381D21C2B7817C200ABA4E8 /* yolov8n.mlpackage in Sources */, + 730E72CD2BFC43BF000E1F45 /* PostProcessing.swift in Sources */, 636EFCAF21E62DD300DE43BC /* VideoCapture.swift in Sources */, 636166EA251443B20054FA7E /* ThresholdProvider.swift in Sources */, 6381D2182B7817C200ABA4E8 /* yolov8l.mlpackage in Sources */, diff --git a/YOLO/Main.storyboard b/YOLO/Main.storyboard index 048e9f6..c5daed8 100644 --- a/YOLO/Main.storyboard +++ b/YOLO/Main.storyboard @@ -1,9 +1,9 @@ - + - + @@ -182,18 +182,30 @@ + + + + + + + + + + + + @@ -223,6 +235,7 @@ + diff --git a/YOLO/Utilities/BoundingBoxView.swift b/YOLO/Utilities/BoundingBoxView.swift index b506545..bfbe2fb 100644 --- a/YOLO/Utilities/BoundingBoxView.swift +++ b/YOLO/Utilities/BoundingBoxView.swift @@ -20,6 +20,9 @@ class BoundingBoxView { /// The layer that displays the label and confidence score for the detected object. let textLayer: CATextLayer + /// The layer that displays the inner text within the bounding box. + let innerTextLayer: CATextLayer + /// Initializes a new BoundingBoxView with configured shape and text layers. init() { shapeLayer = CAShapeLayer() @@ -33,22 +36,32 @@ class BoundingBoxView { textLayer.fontSize = 14 // Set font size for the label text textLayer.font = UIFont(name: "Avenir", size: textLayer.fontSize) // Use Avenir font for labels textLayer.alignmentMode = .center // Center-align the text within the layer + + innerTextLayer = CATextLayer() + innerTextLayer.isHidden = true // Initially hidden; shown with label when a detection occurs + innerTextLayer.contentsScale = UIScreen.main.scale // Ensure the text is sharp on retina displays + innerTextLayer.fontSize = 12 // Set font size for the inner text + innerTextLayer.font = UIFont(name: "Avenir", size: innerTextLayer.fontSize) // Use Avenir font for inner text + innerTextLayer.alignmentMode = .left // Left-align the text within the layer + innerTextLayer.isWrapped = true // Wrap the text to fit within the layer } - /// Adds the bounding box and text layers to a specified parent layer. - /// - Parameter parent: The CALayer to which the bounding box and text layers will be added. + /// Adds the bounding box, text, and inner text layers to a specified parent layer. + /// - Parameter parent: The CALayer to which the bounding box, text, and inner text layers will be added. func addToLayer(_ parent: CALayer) { parent.addSublayer(shapeLayer) parent.addSublayer(textLayer) + parent.addSublayer(innerTextLayer) } - /// Updates the bounding box and label to be visible with specified properties. + /// Updates the bounding box, label, and inner text to be visible with specified properties. /// - Parameters: /// - frame: The CGRect frame defining the bounding box's size and position. /// - label: The text label to display (e.g., object class and confidence). /// - color: The color of the bounding box stroke and label background. /// - alpha: The opacity level for the bounding box stroke and label background. - func show(frame: CGRect, label: String, color: UIColor, alpha: CGFloat) { + /// - innerTexts: The text to display inside the bounding box. + func show(frame: CGRect, label: String, color: UIColor, alpha: CGFloat, innerTexts: String) { CATransaction.setDisableActions(true) // Disable implicit animations let path = UIBezierPath(roundedRect: frame, cornerRadius: 6.0) // Rounded rectangle for the bounding box @@ -69,11 +82,24 @@ class BoundingBoxView { let textSize = CGSize(width: textRect.width + 12, height: textRect.height) // Add padding to the text size let textOrigin = CGPoint(x: frame.origin.x - 2, y: frame.origin.y - textSize.height - 2) // Position above the bounding box textLayer.frame = CGRect(origin: textOrigin, size: textSize) // Set the text layer frame + + if !innerTexts.isEmpty { + innerTextLayer.string = innerTexts // Set the inner text + innerTextLayer.backgroundColor = UIColor.red.withAlphaComponent(0.5).cgColor // No background color + innerTextLayer.isHidden = false // Make the inner text layer visible + innerTextLayer.foregroundColor = UIColor.white.cgColor // Set text color + innerTextLayer.frame = CGRect(x: frame.origin.x + 4, y: frame.origin.y + 4, width: frame.width / 2 - 8, height: frame.height - 8) + // Set the inner text layer frame + } else { + innerTextLayer.isHidden = true // Hide the inner text layer if innerTexts is empty + } + } - /// Hides the bounding box and text layers. + /// Hides the bounding box, text, and inner text layers. func hide() { shapeLayer.isHidden = true textLayer.isHidden = true + innerTextLayer.isHidden = true } } diff --git a/YOLO/Utilities/PostProcessing.swift b/YOLO/Utilities/PostProcessing.swift new file mode 100644 index 0000000..b29d2d1 --- /dev/null +++ b/YOLO/Utilities/PostProcessing.swift @@ -0,0 +1,137 @@ +// Ultralytics YOLO 🚀 - AGPL-3.0 License +// +// PostProcessing for Ultralytics YOLO App +// This feature is designed to post-process the output of a YOLOv8 model within the Ultralytics YOLO app to extract high-confidence objects. +// Output high confidence boxes and their corresponding feature values using Non max suppression. +// Licensed under AGPL-3.0. For commercial use, refer to Ultralytics licensing: https://ultralytics.com/license +// Access the source code: https://github.com/ultralytics/yolo-ios-app + + +import Foundation +import CoreML +import Vision + +func nonMaxSuppression(boxes: [CGRect], scores: [Float], threshold: Float) -> [Int] { + let sortedIndices = scores.enumerated().sorted { $0.element > $1.element }.map { $0.offset } + var selectedIndices = [Int]() + var activeIndices = [Bool](repeating: true, count: boxes.count) + + for i in 0.. CGFloat(threshold) * min(boxes[idx].area, boxes[otherIdx].area) { + activeIndices[otherIdx] = false + } + } + } + } + } + return selectedIndices +} + +// Human model's output [1,95,8400] to [(Box, Confidence, HumanFeatures)] + +func PostProcessHuman(prediction: MLMultiArray, confidenceThreshold: Float, iouThreshold: Float) -> [(CGRect, Float, [Float])] { + let numAnchors = prediction.shape[2].intValue + var boxes = [CGRect]() + var scores = [Float]() + var features = [[Float]]() + let featurePointer = UnsafeMutablePointer(OpaquePointer(prediction.dataPointer)) + let lock = DispatchQueue(label: "com.example.lock") + + DispatchQueue.concurrentPerform(iterations: numAnchors) { j in + let confIndex = 4 * numAnchors + j + let confidence = featurePointer[confIndex] + if confidence > confidenceThreshold { + let x = featurePointer[j] + let y = featurePointer[numAnchors + j] + let width = featurePointer[2 * numAnchors + j] + let height = featurePointer[3 * numAnchors + j] + + let boxWidth = CGFloat(width) + let boxHeight = CGFloat(height) + let boxX = CGFloat(x - width / 2) + let boxY = CGFloat(y - height / 2) + + let boundingBox = CGRect(x: boxX, y: boxY, width: boxWidth, height: boxHeight) + + var boxFeatures = [Float](repeating: 0, count: 11) + for k in 0..<11 { + let key = (84 + k) * numAnchors + j + boxFeatures[k] = featurePointer[key] + } + + lock.sync { + boxes.append(boundingBox) + scores.append(confidence) + features.append(boxFeatures) + } + } + } + + let selectedIndices = nonMaxSuppression(boxes: boxes, scores: scores, threshold: iouThreshold) + var selectedBoxesAndFeatures = [(CGRect, Float, [Float])]() + + for idx in selectedIndices { + selectedBoxesAndFeatures.append((boxes[idx], scores[idx], features[idx])) + } + + return selectedBoxesAndFeatures +} + +let genders = ["female", "male"] +let races = ["asian", "white", "middle eastern", "indian", "latino", "black"] + +struct HumanFeatures { + let weight: Float + let height: Float + let age: Int + let gender: String + let genderConfidence: Float + let race: String + let raceConfidence: Float + + init(features:[Float]) { + self.weight = features[0] + self.height = features[1] + self.age = Int(round(features[2])) + let genderCandidates = Array(features[3..<5]) + var genderMaxIndex = 0 + var genderMaxValue = genderCandidates[0] + + for (genderIndex, genderValue) in genderCandidates.dropFirst().enumerated() { + if genderValue > genderMaxValue { + genderMaxValue = genderValue + genderMaxIndex = genderIndex + 1 + } + } + + self.gender = genders[genderMaxIndex] + self.genderConfidence = genderMaxValue + + let raceCandidates = Array(features[5...]) + var raceMaxIndex = 0 + var raceMaxValue = raceCandidates[0] + + for (raceIndex, raceValue) in raceCandidates.dropFirst().enumerated() { + if raceValue > raceMaxValue { + raceMaxValue = raceValue + raceMaxIndex = raceIndex + 1 + } + } + self.race = races[raceMaxIndex] + self.raceConfidence = raceMaxValue + } +} + +extension CGRect { + var area: CGFloat { + return width * height + } +} + diff --git a/YOLO/ViewController.swift b/YOLO/ViewController.swift index 025a3de..e11dfd9 100644 --- a/YOLO/ViewController.swift +++ b/YOLO/ViewController.swift @@ -23,6 +23,7 @@ class ViewController: UIViewController { @IBOutlet var videoPreview: UIView! @IBOutlet var View0: UIView! @IBOutlet var segmentedControl: UISegmentedControl! + @IBOutlet weak var taskSegmentControl: UISegmentedControl! @IBOutlet var playButtonOutlet: UIBarButtonItem! @IBOutlet var pauseButtonOutlet: UIBarButtonItem! @IBOutlet var slider: UISlider! @@ -36,7 +37,7 @@ class ViewController: UIViewController { @IBOutlet weak var labelSliderConf: UILabel! @IBOutlet weak var labelSliderIoU: UILabel! @IBOutlet weak var activityIndicator: UIActivityIndicatorView! - + let selection = UISelectionFeedbackGenerator() var detector = try! VNCoreMLModel(for: mlModel) var session: AVCaptureSession! @@ -49,12 +50,12 @@ class ViewController: UIViewController { var t3 = CACurrentMediaTime() // FPS start var t4 = 0.0 // FPS dt smoothed // var cameraOutput: AVCapturePhotoOutput! - + // Developer mode let developerMode = UserDefaults.standard.bool(forKey: "developer_mode") // developer mode selected in settings let save_detections = false // write every detection to detections.txt let save_frames = false // write every frame to frames.txt - + lazy var visionRequest: VNCoreMLRequest = { let request = VNCoreMLRequest(model: detector, completionHandler: { [weak self] request, error in @@ -64,54 +65,110 @@ class ViewController: UIViewController { request.imageCropAndScaleOption = .scaleFill // .scaleFit, .scaleFill, .centerCrop return request }() + + enum Task { + case detect + case human + } + + var task: Task = .detect + var confidenceThreshold:Float = 0.25 + var iouThreshold:Float = 0.4 override func viewDidLoad() { super.viewDidLoad() slider.value = 30 + taskSegmentControl.selectedSegmentIndex = 0 setLabels() setUpBoundingBoxViews() startVideo() // setModel() } - + @IBAction func vibrate(_ sender: Any) { selection.selectionChanged() } - + @IBAction func indexChanged(_ sender: Any) { selection.selectionChanged() activityIndicator.startAnimating() - - /// Switch model - switch segmentedControl.selectedSegmentIndex { - case 0: - self.labelName.text = "YOLOv8n" - mlModel = try! yolov8n(configuration: .init()).model - case 1: - self.labelName.text = "YOLOv8s" - mlModel = try! yolov8s(configuration: .init()).model - case 2: - self.labelName.text = "YOLOv8m" - mlModel = try! yolov8m(configuration: .init()).model - case 3: - self.labelName.text = "YOLOv8l" - mlModel = try! yolov8l(configuration: .init()).model - case 4: - self.labelName.text = "YOLOv8x" - mlModel = try! yolov8x(configuration: .init()).model - default: - break - } setModel() setUpBoundingBoxViews() activityIndicator.stopAnimating() } - + func setModel() { + + /// Switch model + switch task { + case .detect: + switch segmentedControl.selectedSegmentIndex { + case 0: + self.labelName.text = "YOLOv8n" + mlModel = try! yolov8n(configuration: .init()).model + case 1: + self.labelName.text = "YOLOv8s" + mlModel = try! yolov8s(configuration: .init()).model + case 2: + self.labelName.text = "YOLOv8m" + mlModel = try! yolov8m(configuration: .init()).model + case 3: + self.labelName.text = "YOLOv8l" + mlModel = try! yolov8l(configuration: .init()).model + case 4: + self.labelName.text = "YOLOv8x" + mlModel = try! yolov8x(configuration: .init()).model + default: + break + } + case .human: + switch segmentedControl.selectedSegmentIndex { + case 0: + self.labelName.text = "YOLOv8n" + if #available(iOS 15.0, *) { + mlModel = try! yolov8n_human(configuration: .init()).model + } else { + // Fallback on earlier versions + } + case 1: + self.labelName.text = "YOLOv8s" + if #available(iOS 15.0, *) { + mlModel = try! yolov8s_human(configuration: .init()).model + } else { + // Fallback on earlier versions + } + case 2: + self.labelName.text = "YOLOv8m" + if #available(iOS 15.0, *) { + mlModel = try! yolov8m_human(configuration: .init()).model + } else { + // Fallback on earlier versions + } + case 3: + self.labelName.text = "YOLOv8l" + if #available(iOS 15.0, *) { + mlModel = try! yolov8l_human(configuration: .init()).model + } else { + // Fallback on earlier versions + } + case 4: + self.labelName.text = "YOLOv8x" + if #available(iOS 15.0, *) { + mlModel = try! yolov8x_human(configuration: .init()).model + } else { + // Fallback on earlier versions + } + default: + break + } + + } + DispatchQueue.global(qos: .userInitiated).async { [self] in + /// VNCoreMLModel detector = try! VNCoreMLModel(for: mlModel) detector.featureProvider = ThresholdProvider() - + /// VNCoreMLRequest let request = VNCoreMLRequest(model: detector, completionHandler: { [weak self] request, error in self?.processObservations(for: request, error: error) @@ -121,20 +178,43 @@ class ViewController: UIViewController { t2 = 0.0 // inference dt smoothed t3 = CACurrentMediaTime() // FPS start t4 = 0.0 // FPS dt smoothed + } } - + /// Update thresholds from slider values @IBAction func sliderChanged(_ sender: Any) { + self.confidenceThreshold = sliderConf.value + self.iouThreshold = sliderIoU.value let conf = Double(round(100 * sliderConf.value)) / 100 let iou = Double(round(100 * sliderIoU.value)) / 100 self.labelSliderConf.text = String(conf) + " Confidence Threshold" self.labelSliderIoU.text = String(iou) + " IoU Threshold" detector.featureProvider = ThresholdProvider(iouThreshold: iou, confidenceThreshold: conf) } - + + @IBAction func taskSegmentControlChanged(_ sender: UISegmentedControl) { + switch sender.selectedSegmentIndex { + case 0: + if self.task != .detect { + self.task = .detect + self.setModel() + } + case 1: + if self.task != .human { + self.task = .human + for i in 0.. Double { let fileURL = URL(fileURLWithPath: NSHomeDirectory() as String) @@ -376,7 +484,7 @@ class ViewController: UIViewController { } return 0 } - + // Return RAM usage (GB) func memoryUsage() -> Double { var taskInfo = mach_task_basic_info() @@ -392,138 +500,140 @@ class ViewController: UIViewController { return 0 } } - - func show(predictions: [VNRecognizedObjectObservation]) { - let width = videoPreview.bounds.width // 375 pix - let height = videoPreview.bounds.height // 812 pix + + func show(predictions: [VNRecognizedObjectObservation], boxesAndValues: [(CGRect, Float, [Float])]) { + let width = videoPreview.bounds.width + let height = videoPreview.bounds.height var str = "" - - // ratio = videoPreview AR divided by sessionPreset AR + var ratio: CGFloat = 1.0 if videoCapture.captureSession.sessionPreset == .photo { - ratio = (height / width) / (4.0 / 3.0) // .photo + ratio = (height / width) / (4.0 / 3.0) } else { - ratio = (height / width) / (16.0 / 9.0) // .hd4K3840x2160, .hd1920x1080, .hd1280x720 etc. + ratio = (height / width) / (16.0 / 9.0) } - - // date + let date = Date() let calendar = Calendar.current let hour = calendar.component(.hour, from: date) let minutes = calendar.component(.minute, from: date) let seconds = calendar.component(.second, from: date) let nanoseconds = calendar.component(.nanosecond, from: date) - let sec_day = Double(hour) * 3600.0 + Double(minutes) * 60.0 + Double(seconds) + Double(nanoseconds) / 1E9 // seconds in the day - - self.labelSlider.text = String(predictions.count) + " items (max " + String(Int(slider.value)) + ")" + let sec_day = Double(hour) * 3600.0 + Double(minutes) * 60.0 + Double(seconds) + Double(nanoseconds) / 1E9 + + var resultCount = 0 + + switch task { + case .detect: + resultCount = predictions.count + case .human: + resultCount = boxesAndValues.count + } + self.labelSlider.text = String(resultCount) + " items (max " + String(Int(slider.value)) + ")" for i in 0..= 1 { // iPhone ratio = 1.218 - let offset = (1 - ratio) * (0.5 - rect.minX) - let transform = CGAffineTransform(scaleX: 1, y: -1).translatedBy(x: offset, y: -1) - rect = rect.applying(transform) - rect.size.width *= ratio - } else { // iPad ratio = 0.75 - let offset = (ratio - 1) * (0.5 - rect.maxY) - let transform = CGAffineTransform(scaleX: 1, y: -1).translatedBy(x: 0, y: offset - 1) - rect = rect.applying(transform) - rect.size.height /= ratio - } - - // Scale normalized to pixels [375, 812] [width, height] - rect = VNImageRectForNormalizedRect(rect, Int(width), Int(height)) - - // The labels array is a list of VNClassificationObservation objects, - // with the highest scoring class first in the list. - let bestClass = prediction.labels[0].identifier - let confidence = prediction.labels[0].confidence - // print(confidence, rect) // debug (confidence, xywh) with xywh origin top left (pixels) - - // Show the bounding box. - boundingBoxViews[i].show(frame: rect, - label: String(format: "%@ %.1f", bestClass, confidence * 100), - color: colors[bestClass] ?? UIColor.white, - alpha: CGFloat((confidence - 0.2) / (1.0 - 0.2) * 0.9)) // alpha 0 (transparent) to 1 (opaque) for conf threshold 0.2 to 1.0) - + if ratio >= 1 { + let offset = (1 - ratio) * (0.5 - displayRect.minX) + if task == .detect { + let transform = CGAffineTransform(scaleX: 1, y: -1).translatedBy(x: offset, y: -1) + displayRect = displayRect.applying(transform) + } else { + let transform = CGAffineTransform(translationX: offset, y: 0) + displayRect = displayRect.applying(transform) + } + displayRect.size.width *= ratio + } else { + let offset = (ratio - 1) * (0.5 - displayRect.maxY) + if task == .detect { + let transform = CGAffineTransform(scaleX: 1, y: -1).translatedBy(x: 0, y: offset - 1) + displayRect = displayRect.applying(transform) + } else { + let transform = CGAffineTransform(translationX: 0, y: 1-offset) + displayRect = displayRect.applying(transform) + } + displayRect.size.height /= ratio + } + displayRect = VNImageRectForNormalizedRect(displayRect, Int(width), Int(height)) + + boundingBoxViews[i].show(frame: displayRect, label: label, color: boxColor, alpha: alpha, innerTexts: innerTexts) + if developerMode { - // Write if save_detections { str += String(format: "%.3f %.3f %.3f %@ %.2f %.1f %.1f %.1f %.1f\n", - sec_day, freeSpace(), UIDevice.current.batteryLevel, bestClass, confidence, - rect.origin.x, rect.origin.y, rect.size.width, rect.size.height) + sec_day, freeSpace(), UIDevice.current.batteryLevel, bestClass, confidence, + rect.origin.x, rect.origin.y, rect.size.width, rect.size.height) } - - // Action trigger upon detection - // if false { - // if (bestClass == "car") { // "cell phone", "car", "person" - // self.takePhoto(nil) - // // self.pauseButton(nil) - // sleep(2) - // } - // } } + } else { boundingBoxViews[i].hide() } } - - // Write - if developerMode { - if save_detections { - saveText(text: str, file: "detections.txt") // Write stats for each detection - } - if save_frames { - str = String(format: "%.3f %.3f %.3f %.3f %.1f %.1f %.1f\n", - sec_day, freeSpace(), memoryUsage(), UIDevice.current.batteryLevel, - self.t1 * 1000, self.t2 * 1000, 1 / self.t4) - saveText(text: str, file: "frames.txt") // Write stats for each image - } - } - - // Debug - // print(str) - // print(UIDevice.current.identifierForVendor!) - // saveImage() } - + + // Pinch to Zoom Start --------------------------------------------------------------------------------------------- let minimumZoom: CGFloat = 1.0 let maximumZoom: CGFloat = 10.0 var lastZoomFactor: CGFloat = 1.0 - + @IBAction func pinch(_ pinch: UIPinchGestureRecognizer) { let device = videoCapture.captureDevice - + // Return zoom value between the minimum and maximum zoom values func minMaxZoom(_ factor: CGFloat) -> CGFloat { return min(min(max(factor, minimumZoom), maximumZoom), device.activeFormat.videoMaxZoomFactor) } - + func update(scale factor: CGFloat) { do { try device.lockForConfiguration() @@ -535,7 +645,7 @@ class ViewController: UIViewController { print("\(error.localizedDescription)") } } - + let newScaleFactor = minMaxZoom(pinch.scale * lastZoomFactor) switch pinch.state { case .began: fallthrough @@ -569,7 +679,7 @@ extension ViewController: AVCapturePhotoCaptureDelegate { let dataProvider = CGDataProvider(data: dataImage as CFData) let cgImageRef: CGImage! = CGImage(jpegDataProviderSource: dataProvider!, decode: nil, shouldInterpolate: true, intent: .defaultIntent) let image = UIImage(cgImage: cgImageRef, scale: 0.5, orientation: UIImage.Orientation.right) - + // Save to camera roll UIImageWriteToSavedPhotosAlbum(image, nil, nil, nil); } else { From 1c72a5285760bbba6f9c088784bc64003a1326ce Mon Sep 17 00:00:00 2001 From: john-rocky Date: Tue, 21 May 2024 12:17:38 +0900 Subject: [PATCH 02/16] Add human feature. --- YOLO.xcodeproj/project.pbxproj | 24 ++---------------------- YOLO/Info.plist | 2 +- YOLO/Utilities/BoundingBoxView.swift | 4 ++-- 3 files changed, 5 insertions(+), 25 deletions(-) diff --git a/YOLO.xcodeproj/project.pbxproj b/YOLO.xcodeproj/project.pbxproj index 19a6758..7dac285 100644 --- a/YOLO.xcodeproj/project.pbxproj +++ b/YOLO.xcodeproj/project.pbxproj @@ -13,11 +13,6 @@ 636EFCAF21E62DD300DE43BC /* VideoCapture.swift in Sources */ = {isa = PBXBuildFile; fileRef = 636EFCA221E62DD300DE43BC /* VideoCapture.swift */; }; 636EFCB321E62DD300DE43BC /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = 636EFCA721E62DD300DE43BC /* AppDelegate.swift */; }; 636EFCB921E62E3900DE43BC /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 636EFCB821E62E3900DE43BC /* Assets.xcassets */; }; - 6381D2182B7817C200ABA4E8 /* yolov8l.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 6381D2132B7817C200ABA4E8 /* yolov8l.mlpackage */; }; - 6381D2192B7817C200ABA4E8 /* yolov8x.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 6381D2142B7817C200ABA4E8 /* yolov8x.mlpackage */; }; - 6381D21A2B7817C200ABA4E8 /* yolov8s.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 6381D2152B7817C200ABA4E8 /* yolov8s.mlpackage */; }; - 6381D21B2B7817C200ABA4E8 /* yolov8m.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 6381D2162B7817C200ABA4E8 /* yolov8m.mlpackage */; }; - 6381D21C2B7817C200ABA4E8 /* yolov8n.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 6381D2172B7817C200ABA4E8 /* yolov8n.mlpackage */; }; 63CF371F2514455300E2DEA1 /* LaunchScreen.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 6323C44D22186177008AE681 /* LaunchScreen.storyboard */; }; 63CF37202514455300E2DEA1 /* Main.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 6323C44F22186177008AE681 /* Main.storyboard */; }; 63CF37212514455300E2DEA1 /* ultralytics_yolo_logotype.png in Resources */ = {isa = PBXBuildFile; fileRef = 6323C45122186177008AE681 /* ultralytics_yolo_logotype.png */; }; @@ -36,11 +31,6 @@ 636EFCA221E62DD300DE43BC /* VideoCapture.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = VideoCapture.swift; sourceTree = ""; }; 636EFCA721E62DD300DE43BC /* AppDelegate.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = AppDelegate.swift; sourceTree = ""; }; 636EFCB821E62E3900DE43BC /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = ""; }; - 6381D2132B7817C200ABA4E8 /* yolov8l.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = yolov8l.mlpackage; sourceTree = ""; }; - 6381D2142B7817C200ABA4E8 /* yolov8x.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = yolov8x.mlpackage; sourceTree = ""; }; - 6381D2152B7817C200ABA4E8 /* yolov8s.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = yolov8s.mlpackage; sourceTree = ""; }; - 6381D2162B7817C200ABA4E8 /* yolov8m.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = yolov8m.mlpackage; sourceTree = ""; }; - 6381D2172B7817C200ABA4E8 /* yolov8n.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = yolov8n.mlpackage; sourceTree = ""; }; 63B8B0A821E62A890026FBC3 /* .gitignore */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = .gitignore; sourceTree = ""; }; 730E72CC2BFC43BF000E1F45 /* PostProcessing.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = PostProcessing.swift; sourceTree = ""; }; 7BCB411721C3096100BFC4D0 /* YOLO.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = YOLO.app; sourceTree = BUILT_PRODUCTS_DIR; }; @@ -90,11 +80,6 @@ 63A946D8271800E20001C3ED /* Models */ = { isa = PBXGroup; children = ( - 6381D2132B7817C200ABA4E8 /* yolov8l.mlpackage */, - 6381D2162B7817C200ABA4E8 /* yolov8m.mlpackage */, - 6381D2172B7817C200ABA4E8 /* yolov8n.mlpackage */, - 6381D2152B7817C200ABA4E8 /* yolov8s.mlpackage */, - 6381D2142B7817C200ABA4E8 /* yolov8x.mlpackage */, ); path = Models; sourceTree = ""; @@ -213,14 +198,9 @@ isa = PBXSourcesBuildPhase; buildActionMask = 2147483647; files = ( - 6381D21B2B7817C200ABA4E8 /* yolov8m.mlpackage in Sources */, - 6381D21C2B7817C200ABA4E8 /* yolov8n.mlpackage in Sources */, 730E72CD2BFC43BF000E1F45 /* PostProcessing.swift in Sources */, 636EFCAF21E62DD300DE43BC /* VideoCapture.swift in Sources */, 636166EA251443B20054FA7E /* ThresholdProvider.swift in Sources */, - 6381D2182B7817C200ABA4E8 /* yolov8l.mlpackage in Sources */, - 6381D21A2B7817C200ABA4E8 /* yolov8s.mlpackage in Sources */, - 6381D2192B7817C200ABA4E8 /* yolov8x.mlpackage in Sources */, 636EFCB321E62DD300DE43BC /* AppDelegate.swift in Sources */, 636EFCAA21E62DD300DE43BC /* ViewController.swift in Sources */, 8EDAA33950796844333D60A7 /* BoundingBoxView.swift in Sources */, @@ -355,7 +335,7 @@ ASSETCATALOG_COMPILER_INCLUDE_ALL_APPICON_ASSETS = NO; CODE_SIGN_STYLE = Automatic; CURRENT_PROJECT_VERSION = 0; - DEVELOPMENT_TEAM = 3MR4P6CL3X; + DEVELOPMENT_TEAM = MFN25KNUGJ; INFOPLIST_FILE = YOLO/Info.plist; INFOPLIST_KEY_CFBundleDisplayName = "Ultralytics YOLO"; INFOPLIST_KEY_LSApplicationCategoryType = "public.app-category.developer-tools"; @@ -383,7 +363,7 @@ ASSETCATALOG_COMPILER_INCLUDE_ALL_APPICON_ASSETS = NO; CODE_SIGN_STYLE = Automatic; CURRENT_PROJECT_VERSION = 0; - DEVELOPMENT_TEAM = 3MR4P6CL3X; + DEVELOPMENT_TEAM = MFN25KNUGJ; INFOPLIST_FILE = YOLO/Info.plist; INFOPLIST_KEY_CFBundleDisplayName = "Ultralytics YOLO"; INFOPLIST_KEY_LSApplicationCategoryType = "public.app-category.developer-tools"; diff --git a/YOLO/Info.plist b/YOLO/Info.plist index c36dbc0..0917317 100644 --- a/YOLO/Info.plist +++ b/YOLO/Info.plist @@ -21,7 +21,7 @@ CFBundleShortVersionString $(MARKETING_VERSION) CFBundleVersion - 24 + 29 ITSAppUsesNonExemptEncryption LSRequiresIPhoneOS diff --git a/YOLO/Utilities/BoundingBoxView.swift b/YOLO/Utilities/BoundingBoxView.swift index bfbe2fb..dfb758e 100644 --- a/YOLO/Utilities/BoundingBoxView.swift +++ b/YOLO/Utilities/BoundingBoxView.swift @@ -85,9 +85,9 @@ class BoundingBoxView { if !innerTexts.isEmpty { innerTextLayer.string = innerTexts // Set the inner text - innerTextLayer.backgroundColor = UIColor.red.withAlphaComponent(0.5).cgColor // No background color + innerTextLayer.backgroundColor = UIColor.clear.cgColor // No background color innerTextLayer.isHidden = false // Make the inner text layer visible - innerTextLayer.foregroundColor = UIColor.white.cgColor // Set text color + innerTextLayer.foregroundColor = UIColor.red.cgColor // Set text color innerTextLayer.frame = CGRect(x: frame.origin.x + 4, y: frame.origin.y + 4, width: frame.width / 2 - 8, height: frame.height - 8) // Set the inner text layer frame } else { From 0cef67b72cdbf5c355e21486451720dc78d134a9 Mon Sep 17 00:00:00 2001 From: john-rocky Date: Wed, 22 May 2024 21:33:37 +0900 Subject: [PATCH 03/16] Edit porstprocessing. --- YOLO/Utilities/PostProcessing.swift | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/YOLO/Utilities/PostProcessing.swift b/YOLO/Utilities/PostProcessing.swift index b29d2d1..8ecdd42 100644 --- a/YOLO/Utilities/PostProcessing.swift +++ b/YOLO/Utilities/PostProcessing.swift @@ -34,7 +34,7 @@ func nonMaxSuppression(boxes: [CGRect], scores: [Float], threshold: Float) -> [I return selectedIndices } -// Human model's output [1,95,8400] to [(Box, Confidence, HumanFeatures)] +// Human model's output [1,15,8400] to [(Box, Confidence, HumanFeatures)] func PostProcessHuman(prediction: MLMultiArray, confidenceThreshold: Float, iouThreshold: Float) -> [(CGRect, Float, [Float])] { let numAnchors = prediction.shape[2].intValue @@ -62,7 +62,7 @@ func PostProcessHuman(prediction: MLMultiArray, confidenceThreshold: Float, iouT var boxFeatures = [Float](repeating: 0, count: 11) for k in 0..<11 { - let key = (84 + k) * numAnchors + j + let key = (5 + k) * numAnchors + j boxFeatures[k] = featurePointer[key] } @@ -80,7 +80,7 @@ func PostProcessHuman(prediction: MLMultiArray, confidenceThreshold: Float, iouT for idx in selectedIndices { selectedBoxesAndFeatures.append((boxes[idx], scores[idx], features[idx])) } - + print(selectedBoxesAndFeatures) return selectedBoxesAndFeatures } From 09c644710e13a3b22ada2c3a6a4803a6b97fa5d5 Mon Sep 17 00:00:00 2001 From: john-rocky Date: Tue, 4 Jun 2024 11:27:56 +0900 Subject: [PATCH 04/16] simple swift tracking --- YOLO.xcodeproj/project.pbxproj | 8 ++ YOLO/Info.plist | 2 +- YOLO/Main.storyboard | 18 +++- YOLO/Utilities/HumanModel.swift | 155 ++++++++++++++++++++++++++++ YOLO/Utilities/PostProcessing.swift | 50 ++------- YOLO/Utilities/TrackingModel.swift | 126 ++++++++++++++++++++++ YOLO/ViewController.swift | 49 ++++++--- 7 files changed, 350 insertions(+), 58 deletions(-) create mode 100644 YOLO/Utilities/HumanModel.swift create mode 100644 YOLO/Utilities/TrackingModel.swift diff --git a/YOLO.xcodeproj/project.pbxproj b/YOLO.xcodeproj/project.pbxproj index 7dac285..bd01f8c 100644 --- a/YOLO.xcodeproj/project.pbxproj +++ b/YOLO.xcodeproj/project.pbxproj @@ -17,6 +17,8 @@ 63CF37202514455300E2DEA1 /* Main.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 6323C44F22186177008AE681 /* Main.storyboard */; }; 63CF37212514455300E2DEA1 /* ultralytics_yolo_logotype.png in Resources */ = {isa = PBXBuildFile; fileRef = 6323C45122186177008AE681 /* ultralytics_yolo_logotype.png */; }; 730E72CD2BFC43BF000E1F45 /* PostProcessing.swift in Sources */ = {isa = PBXBuildFile; fileRef = 730E72CC2BFC43BF000E1F45 /* PostProcessing.swift */; }; + 73A4E7752C0EA36D00218E8F /* HumanModel.swift in Sources */ = {isa = PBXBuildFile; fileRef = 73A4E7742C0EA36D00218E8F /* HumanModel.swift */; }; + 73A4E7772C0EA37300218E8F /* TrackingModel.swift in Sources */ = {isa = PBXBuildFile; fileRef = 73A4E7762C0EA37300218E8F /* TrackingModel.swift */; }; 8EDAA33950796844333D60A7 /* BoundingBoxView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8EDAA633C1F2B50286D16008 /* BoundingBoxView.swift */; }; /* End PBXBuildFile section */ @@ -33,6 +35,8 @@ 636EFCB821E62E3900DE43BC /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = ""; }; 63B8B0A821E62A890026FBC3 /* .gitignore */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = .gitignore; sourceTree = ""; }; 730E72CC2BFC43BF000E1F45 /* PostProcessing.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = PostProcessing.swift; sourceTree = ""; }; + 73A4E7742C0EA36D00218E8F /* HumanModel.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = HumanModel.swift; sourceTree = ""; }; + 73A4E7762C0EA37300218E8F /* TrackingModel.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = TrackingModel.swift; sourceTree = ""; }; 7BCB411721C3096100BFC4D0 /* YOLO.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = YOLO.app; sourceTree = BUILT_PRODUCTS_DIR; }; 8EDAA633C1F2B50286D16008 /* BoundingBoxView.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = BoundingBoxView.swift; sourceTree = ""; }; 8EDAAA4507D2D23D7FAB827F /* README.md */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = net.daringfireball.markdown; path = README.md; sourceTree = ""; }; @@ -52,6 +56,8 @@ 636166E72514438D0054FA7E /* Utilities */ = { isa = PBXGroup; children = ( + 73A4E7762C0EA37300218E8F /* TrackingModel.swift */, + 73A4E7742C0EA36D00218E8F /* HumanModel.swift */, 730E72CC2BFC43BF000E1F45 /* PostProcessing.swift */, 636166E9251443B20054FA7E /* ThresholdProvider.swift */, 8EDAA633C1F2B50286D16008 /* BoundingBoxView.swift */, @@ -202,8 +208,10 @@ 636EFCAF21E62DD300DE43BC /* VideoCapture.swift in Sources */, 636166EA251443B20054FA7E /* ThresholdProvider.swift in Sources */, 636EFCB321E62DD300DE43BC /* AppDelegate.swift in Sources */, + 73A4E7772C0EA37300218E8F /* TrackingModel.swift in Sources */, 636EFCAA21E62DD300DE43BC /* ViewController.swift in Sources */, 8EDAA33950796844333D60A7 /* BoundingBoxView.swift in Sources */, + 73A4E7752C0EA36D00218E8F /* HumanModel.swift in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; diff --git a/YOLO/Info.plist b/YOLO/Info.plist index 0917317..8372868 100644 --- a/YOLO/Info.plist +++ b/YOLO/Info.plist @@ -21,7 +21,7 @@ CFBundleShortVersionString $(MARKETING_VERSION) CFBundleVersion - 29 + 34 ITSAppUsesNonExemptEncryption LSRequiresIPhoneOS diff --git a/YOLO/Main.storyboard b/YOLO/Main.storyboard index c5daed8..e1ae774 100644 --- a/YOLO/Main.storyboard +++ b/YOLO/Main.storyboard @@ -192,13 +192,27 @@ + + + + + + + + + @@ -209,10 +223,12 @@ + + @@ -257,7 +273,7 @@ - + diff --git a/YOLO/Utilities/HumanModel.swift b/YOLO/Utilities/HumanModel.swift new file mode 100644 index 0000000..0a167e7 --- /dev/null +++ b/YOLO/Utilities/HumanModel.swift @@ -0,0 +1,155 @@ +// Ultralytics YOLO 🚀 - AGPL-3.0 License +// +// HumanModel for Ultralytics YOLO App +// This struct is designed to turn the inference results of the YOLOv8-Human model into a manageable DataModel of human feature values ​​in the Ultralytics YOLO app. When in tracking mode, this struct averages the feature values ​​of a given individual across frames to a stable value. +// This struct automatically analyzes the boxes, scores, and feature values ​​provided to the update function to create a human model.// Licensed under AGPL-3.0. For commercial use, refer to Ultralytics licensing: https://ultralytics.com/license +// Access the source code: https://github.com/ultralytics/yolo-ios-app + + +import Foundation +import UIKit + +let updateFrequency: Int = 120 + +struct Person { + var index: Int + var box: CGRect = .zero + + var score: Float = 0 + var weight: Float = 0 + var height: Float = 0 + + var age: Int = 0 + + var gender: String = "female" + var genderConfidence: Float = 0 + var race: String = "asian" + var raceConfidence: Float = 0 + + var listCount: Int = 0 + var scoreRawList: [Float] = [] + var weightRawList: [Float] = [] + var heightRawList: [Float] = [] + var ageRawList: [Float] = [] + var maleRawList: [Float] = [] + var femaleRawList: [Float] = [] + var asianRawList: [Float] = [] + var whiteRawList: [Float] = [] + var middleEasternRawList: [Float] = [] + var indianRawList: [Float] = [] + var latinoRawList: [Float] = [] + var blackRawList: [Float] = [] + + var trackedBox: CGRect? + var color:UIColor + + var unDetectedCounter: Int = 0 + var stable = false + + init(index: Int) { + self.index = index + self.color = UIColor(red: CGFloat.random(in: 0...1), + green: CGFloat.random(in: 0...1), + blue: CGFloat.random(in: 0...1), + alpha: 0.6) + } + + mutating func update(box:CGRect, score:Float, features:[Float]) { + self.box = box + + self.scoreRawList.append(score) + self.weightRawList.append(features[0]) + self.heightRawList.append(features[1]) + self.ageRawList.append(features[2]) + self.maleRawList.append(features[3]) + self.femaleRawList.append(features[4]) + self.asianRawList.append(features[5]) + self.whiteRawList.append(features[6]) + self.middleEasternRawList.append(features[7]) + self.indianRawList.append(features[8]) + self.latinoRawList.append(features[9]) + self.blackRawList.append(features[10]) + + if !stable || scoreRawList.count >= updateFrequency { + stable = true + calcurateFeatures() + } + + if scoreRawList.count >= updateFrequency { + scoreRawList.removeAll() + weightRawList.removeAll() + heightRawList.removeAll() + ageRawList.removeAll() + maleRawList.removeAll() + femaleRawList.removeAll() + asianRawList.removeAll() + whiteRawList.removeAll() + middleEasternRawList.removeAll() + indianRawList.removeAll() + latinoRawList.removeAll() + blackRawList.removeAll() + + } + + self.unDetectedCounter = 0 + } + + private mutating func calcurateFeatures() { + + self.score = average(of: scoreRawList) + self.weight = average(of: weightRawList) + self.height = average(of: heightRawList) + self.age = Int(round(average(of: ageRawList))) + let femaleAverage = average(of: femaleRawList) + let maleAverage = average(of: maleRawList) + let genderCandidates = [femaleAverage,maleAverage] + var genderMaxIndex = 0 + var genderMaxValue = genderCandidates[0] + + for (genderIndex, genderValue) in genderCandidates.dropFirst().enumerated() { + if genderValue > genderMaxValue { + genderMaxValue = genderValue + genderMaxIndex = genderIndex + 1 + } + } + + self.gender = genders[genderMaxIndex] + self.genderConfidence = genderMaxValue + + let asianAverage = average(of: asianRawList) + let whiteAverage = average(of: whiteRawList) + let middleEasternAverage = average(of: middleEasternRawList) + let indianAverage = average(of: indianRawList) + let latinoAverage = average(of: latinoRawList) + let blackAverage = average(of: blackRawList) + + let raceCandidates = [asianAverage,whiteAverage,middleEasternAverage,indianAverage,latinoAverage,blackAverage] + var raceMaxIndex = 0 + var raceMaxValue = raceCandidates[0] + + for (raceIndex, raceValue) in raceCandidates.dropFirst().enumerated() { + if raceValue > raceMaxValue { + raceMaxValue = raceValue + raceMaxIndex = raceIndex + 1 + } + } + self.race = races[raceMaxIndex] + self.raceConfidence = raceMaxValue + } + + func average(of numbers: [Float]) -> Float { + guard !numbers.isEmpty else { + return 0 + } + var sum: Float = 0 + for number in numbers { + sum += number + } + return sum / Float(numbers.count) + } + +} + +let genders = ["female", "male"] +let races = ["asian", "white", "middle eastern", "indian", "latino", "black"] + diff --git a/YOLO/Utilities/PostProcessing.swift b/YOLO/Utilities/PostProcessing.swift index 8ecdd42..db25d12 100644 --- a/YOLO/Utilities/PostProcessing.swift +++ b/YOLO/Utilities/PostProcessing.swift @@ -84,49 +84,15 @@ func PostProcessHuman(prediction: MLMultiArray, confidenceThreshold: Float, iouT return selectedBoxesAndFeatures } -let genders = ["female", "male"] -let races = ["asian", "white", "middle eastern", "indian", "latino", "black"] - -struct HumanFeatures { - let weight: Float - let height: Float - let age: Int - let gender: String - let genderConfidence: Float - let race: String - let raceConfidence: Float - - init(features:[Float]) { - self.weight = features[0] - self.height = features[1] - self.age = Int(round(features[2])) - let genderCandidates = Array(features[3..<5]) - var genderMaxIndex = 0 - var genderMaxValue = genderCandidates[0] - - for (genderIndex, genderValue) in genderCandidates.dropFirst().enumerated() { - if genderValue > genderMaxValue { - genderMaxValue = genderValue - genderMaxIndex = genderIndex + 1 - } - } - - self.gender = genders[genderMaxIndex] - self.genderConfidence = genderMaxValue - - let raceCandidates = Array(features[5...]) - var raceMaxIndex = 0 - var raceMaxValue = raceCandidates[0] - - for (raceIndex, raceValue) in raceCandidates.dropFirst().enumerated() { - if raceValue > raceMaxValue { - raceMaxValue = raceValue - raceMaxIndex = raceIndex + 1 - } - } - self.race = races[raceMaxIndex] - self.raceConfidence = raceMaxValue +func toPerson(boxesAndScoresAndFeatures:[(CGRect, Float, [Float])]) -> [Person] { + var persons = [Person]() + for detectedHuman in boxesAndScoresAndFeatures { + var person = Person(index: -1) + person.update(box: detectedHuman.0, score: detectedHuman.1, features: detectedHuman.2) + person.color = .red + persons.append(person) } + return persons } extension CGRect { diff --git a/YOLO/Utilities/TrackingModel.swift b/YOLO/Utilities/TrackingModel.swift new file mode 100644 index 0000000..a4f5dc5 --- /dev/null +++ b/YOLO/Utilities/TrackingModel.swift @@ -0,0 +1,126 @@ +// Ultralytics YOLO 🚀 - AGPL-3.0 License +// +// HumanModel for Ultralytics YOLO App + +// This class is designed to track and identify the same person across frames using the inference results of the YOLOv8-Human model in the Ultralytics YOLO app. +// The tack function is a simple tracking algorithm that tracks boxes of the same person based on box overlap across frames. +// Access the source code: https://github.com/ultralytics/yolo-ios-app + +import Foundation +import Vision +import Accelerate + +class TrackingModel { + var persons = [Person]() + var personIndex:Int = 0 + var recent:[(CGRect, Float, [Float])] = [] + + func track(boxesAndScoresAndFeatures:[(CGRect, Float, [Float])]) -> [Person] { + + if persons.isEmpty { + for detectedHuman in boxesAndScoresAndFeatures { + var person = Person(index: personIndex) + person.update(box: detectedHuman.0, score: detectedHuman.1, features: detectedHuman.2) + personIndex += 1 + persons.append(person) + + } + return persons + } + + var unDetectedPersonIndexes:[Int] = [] + var usedDetectedIndex:Set = Set() + + for (pi, person) in persons.enumerated() { + var bestIOU:CGFloat = 0 + var bestIndex = 0 + + for (i, detected) in boxesAndScoresAndFeatures.enumerated() { + let IoU = overlapPercentage(rect1: person.box, rect2: detected.0) + if IoU > bestIOU { + bestIOU = IoU + bestIndex = i + } + } + if bestIOU >= 50 { + let detectedPerson = boxesAndScoresAndFeatures[bestIndex] + persons[pi].update(box: detectedPerson.0, score: detectedPerson.1, features: detectedPerson.2) + usedDetectedIndex.insert(bestIndex) + } else { + unDetectedPersonIndexes.append(pi) + } + } + + let sortedIndices = unDetectedPersonIndexes.sorted(by: >) + for index in sortedIndices { + persons[index].unDetectedCounter += 1 + } + + for (index, det) in boxesAndScoresAndFeatures.enumerated() { + if !usedDetectedIndex.contains(index) { + var person = Person(index: personIndex) + person.update(box: det.0, score: det.1, features: det.2) + personIndex += 1 + persons.append(person) + } + } + + persons = removeOverlappingRects(persons: persons) + + var personsToShow: [Person] = [] + var removePersonIndexes: [Int] = [] + for (pindex, person) in persons.enumerated() { + if person.unDetectedCounter == 0 { + personsToShow.append(person) + } else if person.unDetectedCounter >= 15 { + removePersonIndexes.append(pindex) + } + } + let sortedRemoveIndices = removePersonIndexes.sorted(by: >) + for index in sortedRemoveIndices { + persons.remove(at: index) + } + + return personsToShow + + } +} + +func overlapPercentage(rect1: CGRect, rect2: CGRect) -> CGFloat { + let intersection = rect1.intersection(rect2) + + if intersection.isNull { + return 0.0 + } + + let intersectionArea = intersection.width * intersection.height + + let rect1Area = rect1.width * rect1.height + + let overlapPercentage = (intersectionArea / rect1Area) * 100 + + return overlapPercentage +} + +func removeOverlappingRects(persons: [Person], threshold: CGFloat = 90.0) -> [Person] { + var filteredPersons = persons + var index = 0 + + while index < filteredPersons.count { + var shouldRemove = false + for j in (index + 1)..= threshold { + shouldRemove = true + break + } + } + if shouldRemove { + filteredPersons.remove(at: index) + } else { + index += 1 + } + } + + return filteredPersons +} diff --git a/YOLO/ViewController.swift b/YOLO/ViewController.swift index 3307627..c848de5 100644 --- a/YOLO/ViewController.swift +++ b/YOLO/ViewController.swift @@ -74,6 +74,8 @@ class ViewController: UIViewController { var task: Task = .detect var confidenceThreshold:Float = 0.25 var iouThreshold:Float = 0.4 + var tracking = false + var tracker = TrackingModel() override func viewDidLoad() { super.viewDidLoad() @@ -212,6 +214,16 @@ class ViewController: UIViewController { } } + @IBAction func TrackingSwitch(_ sender: UISwitch) { + tracking.toggle() + if tracking { + sender.isOn = true + } else { + sender.isOn = false + } + } + + @IBAction func takePhoto(_ sender: Any?) { let t0 = DispatchTime.now().uptimeNanoseconds @@ -393,9 +405,9 @@ class ViewController: UIViewController { case .detect: DispatchQueue.main.async { if let results = request.results as? [VNRecognizedObjectObservation] { - self.show(predictions: results, boxesAndValues: []) + self.show(predictions: results, persons: []) } else { - self.show(predictions: [], boxesAndValues: []) + self.show(predictions: [], persons: []) } // Measure FPS @@ -413,9 +425,15 @@ class ViewController: UIViewController { if let prediction = results.first?.featureValue.multiArrayValue { let pred = PostProcessHuman(prediction:prediction, confidenceThreshold: self.confidenceThreshold, iouThreshold: self.iouThreshold) - self.show(predictions: [], boxesAndValues: pred) + var persons:[Person] = [] + if !self.tracking { + persons = toPerson(boxesAndScoresAndFeatures: pred) + } else { + persons = self.tracker.track(boxesAndScoresAndFeatures: pred) + } + self.show(predictions: [], persons: persons) } else { - self.show(predictions: [], boxesAndValues: []) + self.show(predictions: [], persons: []) } if self.t1 < 10.0 { // valid dt self.t2 = self.t1 * 0.05 + self.t2 * 0.95 // smoothed inference time @@ -492,7 +510,7 @@ class ViewController: UIViewController { } } - func show(predictions: [VNRecognizedObjectObservation], boxesAndValues: [(CGRect, Float, [Float])]) { + func show(predictions: [VNRecognizedObjectObservation], persons: [Person]) { let width = videoPreview.bounds.width let height = videoPreview.bounds.height var str = "" @@ -518,7 +536,7 @@ class ViewController: UIViewController { case .detect: resultCount = predictions.count case .human: - resultCount = boxesAndValues.count + resultCount = persons.count } self.labelSlider.text = String(resultCount) + " items (max " + String(Int(slider.value)) + ")" for i in 0.. Date: Tue, 4 Jun 2024 15:16:31 +0900 Subject: [PATCH 05/16] hide a tracking toggle in detect mode --- YOLO/Info.plist | 2 +- YOLO/Main.storyboard | 6 ++++-- YOLO/ViewController.swift | 6 ++++++ 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/YOLO/Info.plist b/YOLO/Info.plist index 8372868..f671e09 100644 --- a/YOLO/Info.plist +++ b/YOLO/Info.plist @@ -21,7 +21,7 @@ CFBundleShortVersionString $(MARKETING_VERSION) CFBundleVersion - 34 + 40 ITSAppUsesNonExemptEncryption LSRequiresIPhoneOS diff --git a/YOLO/Main.storyboard b/YOLO/Main.storyboard index e1ae774..2672b4e 100644 --- a/YOLO/Main.storyboard +++ b/YOLO/Main.storyboard @@ -192,13 +192,13 @@ -