diff --git a/.github/workflows/format.yml b/.github/workflows/format.yml index f988ad3..5b39b9b 100644 --- a/.github/workflows/format.yml +++ b/.github/workflows/format.yml @@ -13,7 +13,7 @@ on: jobs: format: - runs-on: ubuntu-latest + runs-on: macos-latest steps: - name: Run Ultralytics Formatting uses: ultralytics/actions@main @@ -23,6 +23,7 @@ jobs: python: true # format Python code and docstrings markdown: true # format Markdown prettier: true # format YAML + swift: true # format Swift spelling: false # check spelling links: false # check broken links summary: true # print PR summary with GPT4 (requires 'openai_api_key' or 'openai_azure_api_key' and 'openai_azure_endpoint') diff --git a/YOLO.xcodeproj/project.pbxproj b/YOLO.xcodeproj/project.pbxproj index ddcf965..33dfe0e 100644 --- a/YOLO.xcodeproj/project.pbxproj +++ b/YOLO.xcodeproj/project.pbxproj @@ -3,7 +3,7 @@ archiveVersion = 1; classes = { }; - objectVersion = 54; + objectVersion = 63; objects = { /* Begin PBXBuildFile section */ @@ -13,14 +13,13 @@ 636EFCAF21E62DD300DE43BC /* VideoCapture.swift in Sources */ = {isa = PBXBuildFile; fileRef = 636EFCA221E62DD300DE43BC /* VideoCapture.swift */; }; 636EFCB321E62DD300DE43BC /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = 636EFCA721E62DD300DE43BC /* AppDelegate.swift */; }; 636EFCB921E62E3900DE43BC /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 636EFCB821E62E3900DE43BC /* Assets.xcassets */; }; - 6381D2182B7817C200ABA4E8 /* yolov8l.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 6381D2132B7817C200ABA4E8 /* yolov8l.mlpackage */; }; - 6381D2192B7817C200ABA4E8 /* yolov8x.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 6381D2142B7817C200ABA4E8 /* yolov8x.mlpackage */; }; - 6381D21A2B7817C200ABA4E8 /* yolov8s.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 6381D2152B7817C200ABA4E8 /* yolov8s.mlpackage */; }; - 6381D21B2B7817C200ABA4E8 /* yolov8m.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 6381D2162B7817C200ABA4E8 /* yolov8m.mlpackage */; }; - 6381D21C2B7817C200ABA4E8 /* yolov8n.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 6381D2172B7817C200ABA4E8 /* yolov8n.mlpackage */; }; 63CF371F2514455300E2DEA1 /* LaunchScreen.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 6323C44D22186177008AE681 /* LaunchScreen.storyboard */; }; 63CF37202514455300E2DEA1 /* Main.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 6323C44F22186177008AE681 /* Main.storyboard */; }; 63CF37212514455300E2DEA1 /* ultralytics_yolo_logotype.png in Resources */ = {isa = PBXBuildFile; fileRef = 6323C45122186177008AE681 /* ultralytics_yolo_logotype.png */; }; + 730E72CD2BFC43BF000E1F45 /* PostProcessing.swift in Sources */ = {isa = PBXBuildFile; fileRef = 730E72CC2BFC43BF000E1F45 /* PostProcessing.swift */; }; + 73A4E7752C0EA36D00218E8F /* HumanModel.swift in Sources */ = {isa = PBXBuildFile; fileRef = 73A4E7742C0EA36D00218E8F /* HumanModel.swift */; }; + 73A4E7772C0EA37300218E8F /* TrackingModel.swift in Sources */ = {isa = PBXBuildFile; fileRef = 73A4E7762C0EA37300218E8F /* TrackingModel.swift */; }; + 73FE95772C2B5A2D00C6C806 /* SaveResults.swift in Sources */ = {isa = PBXBuildFile; fileRef = 73FE95762C2B5A2D00C6C806 /* SaveResults.swift */; }; 8EDAA33950796844333D60A7 /* BoundingBoxView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8EDAA633C1F2B50286D16008 /* BoundingBoxView.swift */; }; /* End PBXBuildFile section */ @@ -35,12 +34,11 @@ 636EFCA221E62DD300DE43BC /* VideoCapture.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = VideoCapture.swift; sourceTree = ""; }; 636EFCA721E62DD300DE43BC /* AppDelegate.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = AppDelegate.swift; sourceTree = ""; }; 636EFCB821E62E3900DE43BC /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = ""; }; - 6381D2132B7817C200ABA4E8 /* yolov8l.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = yolov8l.mlpackage; sourceTree = ""; }; - 6381D2142B7817C200ABA4E8 /* yolov8x.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = yolov8x.mlpackage; sourceTree = ""; }; - 6381D2152B7817C200ABA4E8 /* yolov8s.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = yolov8s.mlpackage; sourceTree = ""; }; - 6381D2162B7817C200ABA4E8 /* yolov8m.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = yolov8m.mlpackage; sourceTree = ""; }; - 6381D2172B7817C200ABA4E8 /* yolov8n.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = yolov8n.mlpackage; sourceTree = ""; }; 63B8B0A821E62A890026FBC3 /* .gitignore */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = .gitignore; sourceTree = ""; }; + 730E72CC2BFC43BF000E1F45 /* PostProcessing.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = PostProcessing.swift; sourceTree = ""; }; + 73A4E7742C0EA36D00218E8F /* HumanModel.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = HumanModel.swift; sourceTree = ""; }; + 73A4E7762C0EA37300218E8F /* TrackingModel.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = TrackingModel.swift; sourceTree = ""; }; + 73FE95762C2B5A2D00C6C806 /* SaveResults.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SaveResults.swift; sourceTree = ""; }; 7BCB411721C3096100BFC4D0 /* YOLO.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = YOLO.app; sourceTree = BUILT_PRODUCTS_DIR; }; 8EDAA633C1F2B50286D16008 /* BoundingBoxView.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = BoundingBoxView.swift; sourceTree = ""; }; 8EDAAA4507D2D23D7FAB827F /* README.md */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = net.daringfireball.markdown; path = README.md; sourceTree = ""; }; @@ -60,8 +58,12 @@ 636166E72514438D0054FA7E /* Utilities */ = { isa = PBXGroup; children = ( + 73A4E7762C0EA37300218E8F /* TrackingModel.swift */, + 73A4E7742C0EA36D00218E8F /* HumanModel.swift */, + 730E72CC2BFC43BF000E1F45 /* PostProcessing.swift */, 636166E9251443B20054FA7E /* ThresholdProvider.swift */, 8EDAA633C1F2B50286D16008 /* BoundingBoxView.swift */, + 73FE95762C2B5A2D00C6C806 /* SaveResults.swift */, ); path = Utilities; sourceTree = ""; @@ -87,11 +89,6 @@ 63A946D8271800E20001C3ED /* Models */ = { isa = PBXGroup; children = ( - 6381D2132B7817C200ABA4E8 /* yolov8l.mlpackage */, - 6381D2162B7817C200ABA4E8 /* yolov8m.mlpackage */, - 6381D2172B7817C200ABA4E8 /* yolov8n.mlpackage */, - 6381D2152B7817C200ABA4E8 /* yolov8s.mlpackage */, - 6381D2142B7817C200ABA4E8 /* yolov8x.mlpackage */, ); path = Models; sourceTree = ""; @@ -160,6 +157,7 @@ Base, ); mainGroup = 7BCB410E21C3096100BFC4D0; + minimizedProjectReferenceProxies = 1; productRefGroup = 7BCB411821C3096100BFC4D0 /* Products */; projectDirPath = ""; projectRoot = ""; @@ -210,16 +208,15 @@ isa = PBXSourcesBuildPhase; buildActionMask = 2147483647; files = ( - 6381D21B2B7817C200ABA4E8 /* yolov8m.mlpackage in Sources */, - 6381D21C2B7817C200ABA4E8 /* yolov8n.mlpackage in Sources */, + 730E72CD2BFC43BF000E1F45 /* PostProcessing.swift in Sources */, 636EFCAF21E62DD300DE43BC /* VideoCapture.swift in Sources */, 636166EA251443B20054FA7E /* ThresholdProvider.swift in Sources */, - 6381D2182B7817C200ABA4E8 /* yolov8l.mlpackage in Sources */, - 6381D21A2B7817C200ABA4E8 /* yolov8s.mlpackage in Sources */, - 6381D2192B7817C200ABA4E8 /* yolov8x.mlpackage in Sources */, 636EFCB321E62DD300DE43BC /* AppDelegate.swift in Sources */, + 73A4E7772C0EA37300218E8F /* TrackingModel.swift in Sources */, 636EFCAA21E62DD300DE43BC /* ViewController.swift in Sources */, 8EDAA33950796844333D60A7 /* BoundingBoxView.swift in Sources */, + 73A4E7752C0EA36D00218E8F /* HumanModel.swift in Sources */, + 73FE95772C2B5A2D00C6C806 /* SaveResults.swift in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -350,8 +347,8 @@ ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; ASSETCATALOG_COMPILER_INCLUDE_ALL_APPICON_ASSETS = NO; CODE_SIGN_STYLE = Automatic; - CURRENT_PROJECT_VERSION = 0; - DEVELOPMENT_TEAM = 3MR4P6CL3X; + CURRENT_PROJECT_VERSION = 2.2; + DEVELOPMENT_TEAM = MFN25KNUGJ; INFOPLIST_FILE = YOLO/Info.plist; INFOPLIST_KEY_CFBundleDisplayName = "Ultralytics YOLO"; INFOPLIST_KEY_LSApplicationCategoryType = "public.app-category.developer-tools"; @@ -378,8 +375,8 @@ ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; ASSETCATALOG_COMPILER_INCLUDE_ALL_APPICON_ASSETS = NO; CODE_SIGN_STYLE = Automatic; - CURRENT_PROJECT_VERSION = 0; - DEVELOPMENT_TEAM = 3MR4P6CL3X; + CURRENT_PROJECT_VERSION = 2.2; + DEVELOPMENT_TEAM = MFN25KNUGJ; INFOPLIST_FILE = YOLO/Info.plist; INFOPLIST_KEY_CFBundleDisplayName = "Ultralytics YOLO"; INFOPLIST_KEY_LSApplicationCategoryType = "public.app-category.developer-tools"; @@ -389,7 +386,7 @@ "@executable_path/Frameworks", ); MARKETING_VERSION = 8.2.0; - PRODUCT_BUNDLE_IDENTIFIER = com.ultralytics.iDetection; + PRODUCT_BUNDLE_IDENTIFIER = com.YoloiOSApp; PRODUCT_NAME = "$(TARGET_NAME)"; SUPPORTED_PLATFORMS = "iphoneos iphonesimulator"; SUPPORTS_MACCATALYST = NO; diff --git a/YOLO.xcodeproj/xcshareddata/xcschemes/YOLO.xcscheme b/YOLO.xcodeproj/xcshareddata/xcschemes/YOLO.xcscheme new file mode 100644 index 0000000..3bb677d --- /dev/null +++ b/YOLO.xcodeproj/xcshareddata/xcschemes/YOLO.xcscheme @@ -0,0 +1,78 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/YOLO/AppDelegate.swift b/YOLO/AppDelegate.swift index fe2f900..dfd8998 100644 --- a/YOLO/AppDelegate.swift +++ b/YOLO/AppDelegate.swift @@ -17,49 +17,61 @@ import UIKit /// The main application delegate, handling global app behavior and configuration. @UIApplicationMain class AppDelegate: UIResponder, UIApplicationDelegate { - var window: UIWindow? + var window: UIWindow? - /// Called when the app finishes launching, used here to set global app settings. - func application(_ application: UIApplication, didFinishLaunchingWithOptions launchOptions: [UIApplication.LaunchOptionsKey: Any]?) -> Bool { - // Disable screen dimming and auto-lock to keep the app active during long operations. - UIApplication.shared.isIdleTimerDisabled = true + /// Called when the app finishes launching, used here to set global app settings. + func application( + _ application: UIApplication, + didFinishLaunchingWithOptions launchOptions: [UIApplication.LaunchOptionsKey: Any]? + ) -> Bool { + // Disable screen dimming and auto-lock to keep the app active during long operations. + UIApplication.shared.isIdleTimerDisabled = true - // Enable battery monitoring to allow the app to adapt its behavior based on battery level. - UIDevice.current.isBatteryMonitoringEnabled = true + // Enable battery monitoring to allow the app to adapt its behavior based on battery level. + UIDevice.current.isBatteryMonitoringEnabled = true - // Store the app version and build version in UserDefaults for easy access elsewhere in the app. - if let appVersion = Bundle.main.infoDictionary?["CFBundleShortVersionString"] as? String, - let buildVersion = Bundle.main.infoDictionary?["CFBundleVersion"] as? String { - UserDefaults.standard.set("\(appVersion) (\(buildVersion))", forKey: "app_version") - } + // Store the app version and build version in UserDefaults for easy access elsewhere in the app. + if let appVersion = Bundle.main.infoDictionary?["CFBundleShortVersionString"] as? String, + let buildVersion = Bundle.main.infoDictionary?["CFBundleVersion"] as? String + { + UserDefaults.standard.set("\(appVersion) (\(buildVersion))", forKey: "app_version") + } - // Store the device's UUID in UserDefaults for identification purposes. - if let uuid = UIDevice.current.identifierForVendor?.uuidString { - UserDefaults.standard.set(uuid, forKey: "uuid") - } + // Store the device's UUID in UserDefaults for identification purposes. + if let uuid = UIDevice.current.identifierForVendor?.uuidString { + UserDefaults.standard.set(uuid, forKey: "uuid") + } - // Ensure UserDefaults changes are immediately saved. - UserDefaults.standard.synchronize() + // Ensure UserDefaults changes are immediately saved. + UserDefaults.standard.synchronize() - return true - } + return true + } + + func applicationDidBecomeActive(_ application: UIApplication) { + NotificationCenter.default.post(name: .settingsChanged, object: nil) + } +} + +extension Notification.Name { + static let settingsChanged = Notification.Name("settingsChanged") } /// Extension to CALayer to add functionality for generating screenshots of any layer. extension CALayer { - var screenShot: UIImage? { - // Begin a new image context, using the device's screen scale to ensure high-resolution output. - UIGraphicsBeginImageContextWithOptions(frame.size, false, UIScreen.main.scale) - defer { - UIGraphicsEndImageContext() - } // Ensure the image context is cleaned up correctly. + var screenShot: UIImage? { + // Begin a new image context, using the device's screen scale to ensure high-resolution output. + UIGraphicsBeginImageContextWithOptions(frame.size, false, UIScreen.main.scale) + defer { + UIGraphicsEndImageContext() + } // Ensure the image context is cleaned up correctly. - if let context = UIGraphicsGetCurrentContext() { - // Render the layer into the current context. - render(in: context) - // Attempt to generate an image from the current context. - return UIGraphicsGetImageFromCurrentImageContext() - } - return nil // Return nil if the operation fails. + if let context = UIGraphicsGetCurrentContext() { + // Render the layer into the current context. + render(in: context) + // Attempt to generate an image from the current context. + return UIGraphicsGetImageFromCurrentImageContext() } + return nil // Return nil if the operation fails. + } } diff --git a/YOLO/Assets.xcassets/Focus.imageset/Contents.json b/YOLO/Assets.xcassets/Focus.imageset/Contents.json index dae9ce7..5db4a9b 100644 --- a/YOLO/Assets.xcassets/Focus.imageset/Contents.json +++ b/YOLO/Assets.xcassets/Focus.imageset/Contents.json @@ -1,21 +1,23 @@ { "images": [ { + "filename": "ultralytics_square_focus_image.png", "idiom": "universal", "scale": "1x" }, { + "filename": "ultralytics_square_focus_image 1.png", "idiom": "universal", "scale": "2x" }, { + "filename": "ultralytics_square_focus_image 2.png", "idiom": "universal", - "filename": "ultralytics_square_focus_image.png", "scale": "3x" } ], "info": { - "version": 1, - "author": "xcode" + "author": "xcode", + "version": 1 } } diff --git a/YOLO/Assets.xcassets/Focus.imageset/ultralytics_square_focus_image 1.png b/YOLO/Assets.xcassets/Focus.imageset/ultralytics_square_focus_image 1.png new file mode 100644 index 0000000..d250520 Binary files /dev/null and b/YOLO/Assets.xcassets/Focus.imageset/ultralytics_square_focus_image 1.png differ diff --git a/YOLO/Assets.xcassets/Focus.imageset/ultralytics_square_focus_image 2.png b/YOLO/Assets.xcassets/Focus.imageset/ultralytics_square_focus_image 2.png new file mode 100644 index 0000000..d250520 Binary files /dev/null and b/YOLO/Assets.xcassets/Focus.imageset/ultralytics_square_focus_image 2.png differ diff --git a/YOLO/Info.plist b/YOLO/Info.plist index c36dbc0..599e123 100644 --- a/YOLO/Info.plist +++ b/YOLO/Info.plist @@ -21,7 +21,7 @@ CFBundleShortVersionString $(MARKETING_VERSION) CFBundleVersion - 24 + 386 ITSAppUsesNonExemptEncryption LSRequiresIPhoneOS @@ -52,6 +52,8 @@ UIStatusBarStyleDefault UISupportedInterfaceOrientations + UIInterfaceOrientationLandscapeLeft + UIInterfaceOrientationLandscapeRight UIInterfaceOrientationPortrait UISupportedInterfaceOrientations~ipad diff --git a/YOLO/LaunchScreen.storyboard b/YOLO/LaunchScreen.storyboard index 5311997..c4280f4 100755 --- a/YOLO/LaunchScreen.storyboard +++ b/YOLO/LaunchScreen.storyboard @@ -1,9 +1,9 @@ - + - + diff --git a/YOLO/Main.storyboard b/YOLO/Main.storyboard index 048e9f6..1db1d54 100644 --- a/YOLO/Main.storyboard +++ b/YOLO/Main.storyboard @@ -1,9 +1,9 @@ - + - + @@ -17,16 +17,14 @@ - - - + + - + + + + + + + + + + + + - + + + + + + + + + + - + + + + - + + + + + + - - + + + + + + + + + + + + + + + + + + + + + + + + @@ -210,19 +335,29 @@ + + + + + + + + + + @@ -233,7 +368,7 @@ - + @@ -241,7 +376,7 @@ - + diff --git a/YOLO/Utilities/BoundingBoxView.swift b/YOLO/Utilities/BoundingBoxView.swift index b506545..c743e44 100644 --- a/YOLO/Utilities/BoundingBoxView.swift +++ b/YOLO/Utilities/BoundingBoxView.swift @@ -14,66 +14,95 @@ import UIKit /// Manages the visualization of bounding boxes and associated labels for object detection results. class BoundingBoxView { - /// The layer that draws the bounding box around a detected object. - let shapeLayer: CAShapeLayer + /// The layer that draws the bounding box around a detected object. + let shapeLayer: CAShapeLayer - /// The layer that displays the label and confidence score for the detected object. - let textLayer: CATextLayer + /// The layer that displays the label and confidence score for the detected object. + let textLayer: CATextLayer - /// Initializes a new BoundingBoxView with configured shape and text layers. - init() { - shapeLayer = CAShapeLayer() - shapeLayer.fillColor = UIColor.clear.cgColor // No fill to only show the bounding outline - shapeLayer.lineWidth = 4 // Set the stroke line width - shapeLayer.isHidden = true // Initially hidden; shown when a detection occurs + /// The layer that displays the inner text within the bounding box. + let innerTextLayer: CATextLayer - textLayer = CATextLayer() - textLayer.isHidden = true // Initially hidden; shown with label when a detection occurs - textLayer.contentsScale = UIScreen.main.scale // Ensure the text is sharp on retina displays - textLayer.fontSize = 14 // Set font size for the label text - textLayer.font = UIFont(name: "Avenir", size: textLayer.fontSize) // Use Avenir font for labels - textLayer.alignmentMode = .center // Center-align the text within the layer - } + /// Initializes a new BoundingBoxView with configured shape and text layers. + init() { + shapeLayer = CAShapeLayer() + shapeLayer.fillColor = UIColor.clear.cgColor // No fill to only show the bounding outline + shapeLayer.lineWidth = 4 // Set the stroke line width + shapeLayer.isHidden = true // Initially hidden; shown when a detection occurs - /// Adds the bounding box and text layers to a specified parent layer. - /// - Parameter parent: The CALayer to which the bounding box and text layers will be added. - func addToLayer(_ parent: CALayer) { - parent.addSublayer(shapeLayer) - parent.addSublayer(textLayer) - } + textLayer = CATextLayer() + textLayer.isHidden = true // Initially hidden; shown with label when a detection occurs + textLayer.contentsScale = UIScreen.main.scale // Ensure the text is sharp on retina displays + textLayer.fontSize = 14 // Set font size for the label text + textLayer.font = UIFont(name: "Avenir", size: textLayer.fontSize) // Use Avenir font for labels + textLayer.alignmentMode = .center // Center-align the text within the layer - /// Updates the bounding box and label to be visible with specified properties. - /// - Parameters: - /// - frame: The CGRect frame defining the bounding box's size and position. - /// - label: The text label to display (e.g., object class and confidence). - /// - color: The color of the bounding box stroke and label background. - /// - alpha: The opacity level for the bounding box stroke and label background. - func show(frame: CGRect, label: String, color: UIColor, alpha: CGFloat) { - CATransaction.setDisableActions(true) // Disable implicit animations + innerTextLayer = CATextLayer() + innerTextLayer.isHidden = true // Initially hidden; shown with label when a detection occurs + innerTextLayer.contentsScale = UIScreen.main.scale // Ensure the text is sharp on retina displays + innerTextLayer.fontSize = 12 // Set font size for the inner text + innerTextLayer.font = UIFont(name: "Avenir", size: innerTextLayer.fontSize) // Use Avenir font for inner text + innerTextLayer.alignmentMode = .left // Left-align the text within the layer + innerTextLayer.isWrapped = true // Wrap the text to fit within the layer + } - let path = UIBezierPath(roundedRect: frame, cornerRadius: 6.0) // Rounded rectangle for the bounding box - shapeLayer.path = path.cgPath - shapeLayer.strokeColor = color.withAlphaComponent(alpha).cgColor // Apply color and alpha to the stroke - shapeLayer.isHidden = false // Make the shape layer visible + /// Adds the bounding box, text, and inner text layers to a specified parent layer. + /// - Parameter parent: The CALayer to which the bounding box, text, and inner text layers will be added. + func addToLayer(_ parent: CALayer) { + parent.addSublayer(shapeLayer) + parent.addSublayer(textLayer) + parent.addSublayer(innerTextLayer) + } - textLayer.string = label // Set the label text - textLayer.backgroundColor = color.withAlphaComponent(alpha).cgColor // Apply color and alpha to the background - textLayer.isHidden = false // Make the text layer visible - textLayer.foregroundColor = UIColor.white.withAlphaComponent(alpha).cgColor // Set text color + /// Updates the bounding box, label, and inner text to be visible with specified properties. + /// - Parameters: + /// - frame: The CGRect frame defining the bounding box's size and position. + /// - label: The text label to display (e.g., object class and confidence). + /// - color: The color of the bounding box stroke and label background. + /// - alpha: The opacity level for the bounding box stroke and label background. + /// - innerTexts: The text to display inside the bounding box. + func show(frame: CGRect, label: String, color: UIColor, alpha: CGFloat, innerTexts: String) { + CATransaction.setDisableActions(true) // Disable implicit animations - // Calculate the text size and position based on the label content - let attributes = [NSAttributedString.Key.font: textLayer.font as Any] - let textRect = label.boundingRect(with: CGSize(width: 400, height: 100), - options: .truncatesLastVisibleLine, - attributes: attributes, context: nil) - let textSize = CGSize(width: textRect.width + 12, height: textRect.height) // Add padding to the text size - let textOrigin = CGPoint(x: frame.origin.x - 2, y: frame.origin.y - textSize.height - 2) // Position above the bounding box - textLayer.frame = CGRect(origin: textOrigin, size: textSize) // Set the text layer frame - } + let path = UIBezierPath(roundedRect: frame, cornerRadius: 6.0) // Rounded rectangle for the bounding box + shapeLayer.path = path.cgPath + shapeLayer.strokeColor = color.withAlphaComponent(alpha).cgColor // Apply color and alpha to the stroke + shapeLayer.isHidden = false // Make the shape layer visible + + textLayer.string = label // Set the label text + textLayer.backgroundColor = color.withAlphaComponent(alpha).cgColor // Apply color and alpha to the background + textLayer.isHidden = false // Make the text layer visible + textLayer.foregroundColor = UIColor.white.withAlphaComponent(alpha).cgColor // Set text color - /// Hides the bounding box and text layers. - func hide() { - shapeLayer.isHidden = true - textLayer.isHidden = true + // Calculate the text size and position based on the label content + let attributes = [NSAttributedString.Key.font: textLayer.font as Any] + let textRect = label.boundingRect( + with: CGSize(width: 400, height: 100), + options: .truncatesLastVisibleLine, + attributes: attributes, context: nil) + let textSize = CGSize(width: textRect.width + 12, height: textRect.height) // Add padding to the text size + let textOrigin = CGPoint(x: frame.origin.x - 2, y: frame.origin.y - textSize.height - 2) // Position above the bounding box + textLayer.frame = CGRect(origin: textOrigin, size: textSize) // Set the text layer frame + + if !innerTexts.isEmpty { + innerTextLayer.string = innerTexts // Set the inner text + innerTextLayer.backgroundColor = UIColor.clear.cgColor // No background color + innerTextLayer.isHidden = false // Make the inner text layer visible + innerTextLayer.foregroundColor = UIColor.red.cgColor // Set text color + innerTextLayer.frame = CGRect( + x: frame.origin.x + 4, y: frame.origin.y + 4, width: frame.width / 2 - 8, + height: frame.height - 8) + // Set the inner text layer frame + } else { + innerTextLayer.isHidden = true // Hide the inner text layer if innerTexts is empty } + + } + + /// Hides the bounding box, text, and inner text layers. + func hide() { + shapeLayer.isHidden = true + textLayer.isHidden = true + innerTextLayer.isHidden = true + } } diff --git a/YOLO/Utilities/HumanModel.swift b/YOLO/Utilities/HumanModel.swift new file mode 100644 index 0000000..13778be --- /dev/null +++ b/YOLO/Utilities/HumanModel.swift @@ -0,0 +1,150 @@ +// Ultralytics YOLO 🚀 - AGPL-3.0 License +// +// HumanModel for Ultralytics YOLO App +// This struct is designed to turn the inference results of the YOLOv8-Human model into a manageable DataModel of human feature values ​​in the Ultralytics YOLO app. When in tracking mode, this struct averages the feature values ​​of a given individual across frames to a stable value. +// This struct automatically analyzes the boxes, scores, and feature values ​​provided to the update function to create a human model.// Licensed under AGPL-3.0. For commercial use, refer to Ultralytics licensing: https://ultralytics.com/license +// Access the source code: https://github.com/ultralytics/yolo-ios-app + +import Foundation +import UIKit + +let updateFrequency: Int = 120 + +struct Person { + var index: Int + var box: CGRect = .zero + + var score: Float = 0 + var weight: Float = 0 + var height: Float = 0 + + var age: Int = 0 + + var gender: String = "female" + var genderConfidence: Float = 0 + var race: String = "asian" + var raceConfidence: Float = 0 + + var listCount: Int = 0 + var scoreRawList: [Float] = [] + var weightRawList: [Float] = [] + var heightRawList: [Float] = [] + var ageRawList: [Float] = [] + var maleRawList: [Float] = [] + var femaleRawList: [Float] = [] + var asianRawList: [Float] = [] + var whiteRawList: [Float] = [] + var middleEasternRawList: [Float] = [] + var indianRawList: [Float] = [] + var latinoRawList: [Float] = [] + var blackRawList: [Float] = [] + + var trackedBox: CGRect? + var color: UIColor + + var unDetectedCounter: Int = 0 + var stable = false + + init(index: Int) { + self.index = index + self.color = UIColor( + red: CGFloat.random(in: 0...1), + green: CGFloat.random(in: 0...1), + blue: CGFloat.random(in: 0...1), + alpha: 0.6) + } + + mutating func update(box: CGRect, score: Float, features: [Float]) { + self.box = box + if scoreRawList.count >= updateFrequency { + scoreRawList.removeFirst() + weightRawList.removeFirst() + heightRawList.removeFirst() + ageRawList.removeFirst() + maleRawList.removeFirst() + femaleRawList.removeFirst() + asianRawList.removeFirst() + whiteRawList.removeFirst() + middleEasternRawList.removeFirst() + indianRawList.removeFirst() + latinoRawList.removeFirst() + blackRawList.removeFirst() + } + + self.scoreRawList.append(score) + self.weightRawList.append(features[0]) + self.heightRawList.append(features[1]) + self.ageRawList.append(features[2]) + self.femaleRawList.append(features[3]) + self.maleRawList.append(features[4]) + self.asianRawList.append(features[5]) + self.whiteRawList.append(features[6]) + self.middleEasternRawList.append(features[7]) + self.indianRawList.append(features[8]) + self.latinoRawList.append(features[9]) + self.blackRawList.append(features[10]) + calcurateFeatures() + + self.unDetectedCounter = 0 + } + + private mutating func calcurateFeatures() { + + self.score = average(of: scoreRawList) + self.weight = average(of: weightRawList) + self.height = average(of: heightRawList) + self.age = Int(round(average(of: ageRawList))) + let femaleAverage = average(of: femaleRawList) + let maleAverage = average(of: maleRawList) + let genderCandidates = [femaleAverage, maleAverage] + var genderMaxIndex = 0 + var genderMaxValue = genderCandidates[0] + + for (genderIndex, genderValue) in genderCandidates.dropFirst().enumerated() { + if genderValue > genderMaxValue { + genderMaxValue = genderValue + genderMaxIndex = genderIndex + 1 + } + } + + self.gender = genders[genderMaxIndex] + self.genderConfidence = genderMaxValue + + let asianAverage = average(of: asianRawList) + let whiteAverage = average(of: whiteRawList) + let middleEasternAverage = average(of: middleEasternRawList) + let indianAverage = average(of: indianRawList) + let latinoAverage = average(of: latinoRawList) + let blackAverage = average(of: blackRawList) + + let raceCandidates = [ + asianAverage, whiteAverage, middleEasternAverage, indianAverage, latinoAverage, blackAverage, + ] + var raceMaxIndex = 0 + var raceMaxValue = raceCandidates[0] + + for (raceIndex, raceValue) in raceCandidates.dropFirst().enumerated() { + if raceValue > raceMaxValue { + raceMaxValue = raceValue + raceMaxIndex = raceIndex + 1 + } + } + self.race = races[raceMaxIndex] + self.raceConfidence = raceMaxValue + } + + func average(of numbers: [Float]) -> Float { + guard !numbers.isEmpty else { + return 0 + } + var sum: Float = 0 + for number in numbers { + sum += number + } + return sum / Float(numbers.count) + } + +} + +let genders = ["female", "male"] +let races = ["asian", "white", "middle eastern", "indian", "latino", "black"] diff --git a/YOLO/Utilities/PostProcessing.swift b/YOLO/Utilities/PostProcessing.swift new file mode 100644 index 0000000..d84e14a --- /dev/null +++ b/YOLO/Utilities/PostProcessing.swift @@ -0,0 +1,103 @@ +// Ultralytics YOLO 🚀 - AGPL-3.0 License +// +// PostProcessing for Ultralytics YOLO App +// This feature is designed to post-process the output of a YOLOv8 model within the Ultralytics YOLO app to extract high-confidence objects. +// Output high confidence boxes and their corresponding feature values using Non max suppression. +// Licensed under AGPL-3.0. For commercial use, refer to Ultralytics licensing: https://ultralytics.com/license +// Access the source code: https://github.com/ultralytics/yolo-ios-app + +import CoreML +import Foundation +import Vision + +func nonMaxSuppression(boxes: [CGRect], scores: [Float], threshold: Float) -> [Int] { + let sortedIndices = scores.enumerated().sorted { $0.element > $1.element }.map { $0.offset } + var selectedIndices = [Int]() + var activeIndices = [Bool](repeating: true, count: boxes.count) + + for i in 0.. CGFloat(threshold) * min(boxes[idx].area, boxes[otherIdx].area) { + activeIndices[otherIdx] = false + } + } + } + } + } + return selectedIndices +} + +// Human model's output [1,15,8400] to [(Box, Confidence, HumanFeatures)] + +func PostProcessHuman(prediction: MLMultiArray, confidenceThreshold: Float, iouThreshold: Float) + -> [(CGRect, Float, [Float])] +{ + let numAnchors = prediction.shape[2].intValue + var boxes = [CGRect]() + var scores = [Float]() + var features = [[Float]]() + let featurePointer = UnsafeMutablePointer(OpaquePointer(prediction.dataPointer)) + let lock = DispatchQueue(label: "com.example.lock") + + DispatchQueue.concurrentPerform(iterations: numAnchors) { j in + let confIndex = 4 * numAnchors + j + let confidence = featurePointer[confIndex] + if confidence > confidenceThreshold { + let x = featurePointer[j] + let y = featurePointer[numAnchors + j] + let width = featurePointer[2 * numAnchors + j] + let height = featurePointer[3 * numAnchors + j] + + let boxWidth = CGFloat(width) + let boxHeight = CGFloat(height) + let boxX = CGFloat(x - width / 2) + let boxY = CGFloat(y - height / 2) + + let boundingBox = CGRect(x: boxX, y: boxY, width: boxWidth, height: boxHeight) + + var boxFeatures = [Float](repeating: 0, count: 11) + for k in 0..<11 { + let key = (5 + k) * numAnchors + j + boxFeatures[k] = featurePointer[key] + } + + lock.sync { + boxes.append(boundingBox) + scores.append(confidence) + features.append(boxFeatures) + } + } + } + + let selectedIndices = nonMaxSuppression(boxes: boxes, scores: scores, threshold: iouThreshold) + var selectedBoxesAndFeatures = [(CGRect, Float, [Float])]() + + for idx in selectedIndices { + selectedBoxesAndFeatures.append((boxes[idx], scores[idx], features[idx])) + } + print(selectedBoxesAndFeatures) + return selectedBoxesAndFeatures +} + +func toPerson(boxesAndScoresAndFeatures: [(CGRect, Float, [Float])]) -> [Person] { + var persons = [Person]() + for detectedHuman in boxesAndScoresAndFeatures { + var person = Person(index: -1) + person.update(box: detectedHuman.0, score: detectedHuman.1, features: detectedHuman.2) + person.color = .red + persons.append(person) + } + return persons +} + +extension CGRect { + var area: CGFloat { + return width * height + } +} diff --git a/YOLO/Utilities/SaveResults.swift b/YOLO/Utilities/SaveResults.swift new file mode 100644 index 0000000..ced631d --- /dev/null +++ b/YOLO/Utilities/SaveResults.swift @@ -0,0 +1,50 @@ +// +// SaveResults.swift +// YOLO +// +// Created by 間嶋大輔 on 2024/06/26. +// Copyright © 2024 Ultralytics. All rights reserved. +// + +import Foundation + +let detectionHeader = + "sec_day, free_space, batteryLevel ,class,confidence,box_x, box_y, box_w, box_h\n" +let humanHeader = + "sec_day, free_space, battery_level ,id, confidence, box_x, box_y, box_w, box_h, weight, height, age, gender, gender_confidence, race, race_confidence \n" + +func saveDetectionResultsToCSV(detectionResults: [String], task: Task) -> URL? { + var header = "" + var taskName = "" + switch task { + case .detect: + header = detectionHeader + taskName = "detection" + + case .human: + header = humanHeader + taskName = "human" + } + let formatter = DateFormatter() + formatter.dateFormat = "yyyyMMdd_HH:mm:ss" + let dateString = formatter.string(from: Date()) + let fileName = taskName + "_results_\(dateString).csv" + + let path = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask)[0] + .appendingPathComponent(fileName) + + var csvText = header + + for result in detectionResults { + csvText.append(contentsOf: result) + } + + do { + try csvText.write(to: path, atomically: true, encoding: .utf8) + print("CSV file saved at: \(path)") + return path + } catch { + print("Failed to save CSV file: \(error)") + return nil + } +} diff --git a/YOLO/Utilities/ThresholdProvider.swift b/YOLO/Utilities/ThresholdProvider.swift index 53702d1..22c8d6a 100644 --- a/YOLO/Utilities/ThresholdProvider.swift +++ b/YOLO/Utilities/ThresholdProvider.swift @@ -14,29 +14,29 @@ import CoreML /// Provides custom IoU and confidence thresholds for adjusting model predictions. class ThresholdProvider: MLFeatureProvider { - /// Stores IoU and confidence thresholds as MLFeatureValue objects. - var values: [String: MLFeatureValue] + /// Stores IoU and confidence thresholds as MLFeatureValue objects. + var values: [String: MLFeatureValue] - /// The set of feature names provided by this provider. - var featureNames: Set { - return Set(values.keys) - } + /// The set of feature names provided by this provider. + var featureNames: Set { + return Set(values.keys) + } - /// Initializes the provider with specified IoU and confidence thresholds. - /// - Parameters: - /// - iouThreshold: The IoU threshold for determining object overlap. - /// - confidenceThreshold: The minimum confidence for considering a detection valid. - init(iouThreshold: Double = 0.45, confidenceThreshold: Double = 0.25) { - values = [ - "iouThreshold": MLFeatureValue(double: iouThreshold), - "confidenceThreshold": MLFeatureValue(double: confidenceThreshold) - ] - } + /// Initializes the provider with specified IoU and confidence thresholds. + /// - Parameters: + /// - iouThreshold: The IoU threshold for determining object overlap. + /// - confidenceThreshold: The minimum confidence for considering a detection valid. + init(iouThreshold: Double = 0.45, confidenceThreshold: Double = 0.25) { + values = [ + "iouThreshold": MLFeatureValue(double: iouThreshold), + "confidenceThreshold": MLFeatureValue(double: confidenceThreshold), + ] + } - /// Returns the feature value for the given feature name. - /// - Parameter featureName: The name of the feature. - /// - Returns: The MLFeatureValue object corresponding to the feature name. - func featureValue(for featureName: String) -> MLFeatureValue? { - return values[featureName] - } + /// Returns the feature value for the given feature name. + /// - Parameter featureName: The name of the feature. + /// - Returns: The MLFeatureValue object corresponding to the feature name. + func featureValue(for featureName: String) -> MLFeatureValue? { + return values[featureName] + } } diff --git a/YOLO/Utilities/TrackingModel.swift b/YOLO/Utilities/TrackingModel.swift new file mode 100644 index 0000000..1beedfa --- /dev/null +++ b/YOLO/Utilities/TrackingModel.swift @@ -0,0 +1,128 @@ +// Ultralytics YOLO 🚀 - AGPL-3.0 License +// +// HumanModel for Ultralytics YOLO App + +// This class is designed to track and identify the same person across frames using the inference results of the YOLOv8-Human model in the Ultralytics YOLO app. +// The tack function is a simple tracking algorithm that tracks boxes of the same person based on box overlap across frames. +// Access the source code: https://github.com/ultralytics/yolo-ios-app + +import Accelerate +import Foundation +import Vision + +class TrackingModel { + var persons = [Person]() + var personIndex: Int = 0 + var recent: [(CGRect, Float, [Float])] = [] + + func track(boxesAndScoresAndFeatures: [(CGRect, Float, [Float])]) -> [Person] { + + if persons.isEmpty { + for detectedHuman in boxesAndScoresAndFeatures { + var person = Person(index: personIndex) + person.update(box: detectedHuman.0, score: detectedHuman.1, features: detectedHuman.2) + personIndex += 1 + persons.append(person) + + } + return persons + } + + var unDetectedPersonIndexes: [Int] = [] + var usedDetectedIndex: Set = Set() + + for (pi, person) in persons.enumerated() { + var bestIOU: CGFloat = 0 + var bestIndex = 0 + + for (i, detected) in boxesAndScoresAndFeatures.enumerated() { + let IoU = overlapPercentage(rect1: person.box, rect2: detected.0) + if IoU > bestIOU { + bestIOU = IoU + bestIndex = i + } + } + if bestIOU >= 50 { + let detectedPerson = boxesAndScoresAndFeatures[bestIndex] + persons[pi].update( + box: detectedPerson.0, score: detectedPerson.1, features: detectedPerson.2) + usedDetectedIndex.insert(bestIndex) + } else { + unDetectedPersonIndexes.append(pi) + } + } + + let sortedIndices = unDetectedPersonIndexes.sorted(by: >) + for index in sortedIndices { + persons[index].unDetectedCounter += 1 + } + + for (index, det) in boxesAndScoresAndFeatures.enumerated() { + if !usedDetectedIndex.contains(index) { + var person = Person(index: personIndex) + person.update(box: det.0, score: det.1, features: det.2) + personIndex += 1 + persons.append(person) + } + } + + persons = removeOverlappingRects(persons: persons) + + var personsToShow: [Person] = [] + var removePersonIndexes: [Int] = [] + for (pindex, person) in persons.enumerated() { + if person.unDetectedCounter == 0 { + personsToShow.append(person) + } else if person.unDetectedCounter >= 15 { + removePersonIndexes.append(pindex) + } + } + let sortedRemoveIndices = removePersonIndexes.sorted(by: >) + for index in sortedRemoveIndices { + persons.remove(at: index) + } + + return personsToShow + + } +} + +func overlapPercentage(rect1: CGRect, rect2: CGRect) -> CGFloat { + let intersection = rect1.intersection(rect2) + + if intersection.isNull { + return 0.0 + } + + let intersectionArea = intersection.width * intersection.height + + let rect1Area = rect1.width * rect1.height + + let overlapPercentage = (intersectionArea / rect1Area) * 100 + + return overlapPercentage +} + +func removeOverlappingRects(persons: [Person], threshold: CGFloat = 90.0) -> [Person] { + var filteredPersons = persons + var index = 0 + + while index < filteredPersons.count { + var shouldRemove = false + for j in (index + 1)..= threshold { + shouldRemove = true + break + } + } + if shouldRemove { + filteredPersons.remove(at: index) + } else { + index += 1 + } + } + + return filteredPersons +} diff --git a/YOLO/VideoCapture.swift b/YOLO/VideoCapture.swift index 79aaf99..7faf8f9 100644 --- a/YOLO/VideoCapture.swift +++ b/YOLO/VideoCapture.swift @@ -11,123 +11,165 @@ // the capture session. It also provides methods to start and stop video capture and delivers captured frames // to a delegate implementing the VideoCaptureDelegate protocol. - import AVFoundation import CoreVideo import UIKit // Defines the protocol for handling video frame capture events. public protocol VideoCaptureDelegate: AnyObject { - func videoCapture(_ capture: VideoCapture, didCaptureVideoFrame: CMSampleBuffer) + func videoCapture(_ capture: VideoCapture, didCaptureVideoFrame: CMSampleBuffer) } // Identifies the best available camera device based on user preferences and device capabilities. func bestCaptureDevice() -> AVCaptureDevice { - if UserDefaults.standard.bool(forKey: "use_telephoto"), let device = AVCaptureDevice.default(.builtInTelephotoCamera, for: .video, position: .back) { - return device - } else if let device = AVCaptureDevice.default(.builtInDualCamera, for: .video, position: .back) { - return device - } else if let device = AVCaptureDevice.default(.builtInWideAngleCamera, for: .video, position: .back) { - return device - } else { - fatalError("Expected back camera device is not available.") - } + if UserDefaults.standard.bool(forKey: "use_telephoto"), + let device = AVCaptureDevice.default(.builtInTelephotoCamera, for: .video, position: .back) + { + return device + } else if let device = AVCaptureDevice.default(.builtInDualCamera, for: .video, position: .back) { + return device + } else if let device = AVCaptureDevice.default( + .builtInWideAngleCamera, for: .video, position: .back) + { + return device + } else { + fatalError("Expected back camera device is not available.") + } } public class VideoCapture: NSObject { - public var previewLayer: AVCaptureVideoPreviewLayer? - public weak var delegate: VideoCaptureDelegate? - - let captureDevice = bestCaptureDevice() - let captureSession = AVCaptureSession() - let videoOutput = AVCaptureVideoDataOutput() - var cameraOutput = AVCapturePhotoOutput() - let queue = DispatchQueue(label: "camera-queue") - - // Configures the camera and capture session with optional session presets. - public func setUp(sessionPreset: AVCaptureSession.Preset = .hd1280x720, completion: @escaping (Bool) -> Void) { - queue.async { - let success = self.setUpCamera(sessionPreset: sessionPreset) - DispatchQueue.main.async { - completion(success) - } - } + public var previewLayer: AVCaptureVideoPreviewLayer? + public weak var delegate: VideoCaptureDelegate? + + let captureDevice = bestCaptureDevice() + let captureSession = AVCaptureSession() + let videoOutput = AVCaptureVideoDataOutput() + var cameraOutput = AVCapturePhotoOutput() + let queue = DispatchQueue(label: "camera-queue") + + // Configures the camera and capture session with optional session presets. + public func setUp( + sessionPreset: AVCaptureSession.Preset = .hd1280x720, completion: @escaping (Bool) -> Void + ) { + queue.async { + let success = self.setUpCamera(sessionPreset: sessionPreset) + DispatchQueue.main.async { + completion(success) + } } + } + + // Internal method to configure camera inputs, outputs, and session properties. + private func setUpCamera(sessionPreset: AVCaptureSession.Preset) -> Bool { + captureSession.beginConfiguration() + captureSession.sessionPreset = sessionPreset - // Internal method to configure camera inputs, outputs, and session properties. - private func setUpCamera(sessionPreset: AVCaptureSession.Preset) -> Bool { - captureSession.beginConfiguration() - captureSession.sessionPreset = sessionPreset - - guard let videoInput = try? AVCaptureDeviceInput(device: captureDevice) else { - return false - } - - if captureSession.canAddInput(videoInput) { - captureSession.addInput(videoInput) - } - - let previewLayer = AVCaptureVideoPreviewLayer(session: captureSession) - previewLayer.videoGravity = .resizeAspectFill - previewLayer.connection?.videoOrientation = .portrait - self.previewLayer = previewLayer - - let settings: [String: Any] = [ - kCVPixelBufferPixelFormatTypeKey as String: NSNumber(value: kCVPixelFormatType_32BGRA) - ] - - videoOutput.videoSettings = settings - videoOutput.alwaysDiscardsLateVideoFrames = true - videoOutput.setSampleBufferDelegate(self, queue: queue) - if captureSession.canAddOutput(videoOutput) { - captureSession.addOutput(videoOutput) - } - - if captureSession.canAddOutput(cameraOutput) { - captureSession.addOutput(cameraOutput) - } - - videoOutput.connection(with: .video)?.videoOrientation = .portrait - - do { - try captureDevice.lockForConfiguration() - captureDevice.focusMode = .continuousAutoFocus - captureDevice.focusPointOfInterest = CGPoint(x: 0.5, y: 0.5) - captureDevice.exposureMode = .continuousAutoExposure - captureDevice.unlockForConfiguration() - } catch { - print("Unable to configure the capture device.") - return false - } - - captureSession.commitConfiguration() - return true + guard let videoInput = try? AVCaptureDeviceInput(device: captureDevice) else { + return false } - // Starts the video capture session. - public func start() { - if !captureSession.isRunning { - DispatchQueue.global(qos: .userInitiated).async { [weak self] in - self?.captureSession.startRunning() - } - } + if captureSession.canAddInput(videoInput) { + captureSession.addInput(videoInput) } - // Stops the video capture session. - public func stop() { - if captureSession.isRunning { - captureSession.stopRunning() - } + let previewLayer = AVCaptureVideoPreviewLayer(session: captureSession) + previewLayer.videoGravity = .resizeAspectFill + previewLayer.connection?.videoOrientation = .portrait + self.previewLayer = previewLayer + + let settings: [String: Any] = [ + kCVPixelBufferPixelFormatTypeKey as String: NSNumber(value: kCVPixelFormatType_32BGRA) + ] + + videoOutput.videoSettings = settings + videoOutput.alwaysDiscardsLateVideoFrames = true + videoOutput.setSampleBufferDelegate(self, queue: queue) + if captureSession.canAddOutput(videoOutput) { + captureSession.addOutput(videoOutput) } -} -// Extension to handle AVCaptureVideoDataOutputSampleBufferDelegate events. -extension VideoCapture: AVCaptureVideoDataOutputSampleBufferDelegate { - public func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) { - delegate?.videoCapture(self, didCaptureVideoFrame: sampleBuffer) + if captureSession.canAddOutput(cameraOutput) { + captureSession.addOutput(cameraOutput) + } + switch UIDevice.current.orientation { + case .portrait: + videoOutput.connection(with: .video)?.videoOrientation = .portrait + case .portraitUpsideDown: + videoOutput.connection(with: .video)?.videoOrientation = .portraitUpsideDown + case .landscapeRight: + videoOutput.connection(with: .video)?.videoOrientation = .landscapeLeft + case .landscapeLeft: + videoOutput.connection(with: .video)?.videoOrientation = .landscapeRight + default: + videoOutput.connection(with: .video)?.videoOrientation = .portrait } - public func captureOutput(_ output: AVCaptureOutput, didDrop sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) { - // Optionally handle dropped frames, e.g., due to full buffer. + if let connection = videoOutput.connection(with: .video) { + self.previewLayer?.connection?.videoOrientation = connection.videoOrientation } + do { + try captureDevice.lockForConfiguration() + captureDevice.focusMode = .continuousAutoFocus + captureDevice.focusPointOfInterest = CGPoint(x: 0.5, y: 0.5) + captureDevice.exposureMode = .continuousAutoExposure + captureDevice.unlockForConfiguration() + } catch { + print("Unable to configure the capture device.") + return false + } + + captureSession.commitConfiguration() + return true + } + + // Starts the video capture session. + public func start() { + if !captureSession.isRunning { + DispatchQueue.global(qos: .userInitiated).async { [weak self] in + self?.captureSession.startRunning() + } + } + } + + // Stops the video capture session. + public func stop() { + if captureSession.isRunning { + captureSession.stopRunning() + } + } + + func updateVideoOrientation() { + guard let connection = videoOutput.connection(with: .video) else { return } + switch UIDevice.current.orientation { + case .portrait: + connection.videoOrientation = .portrait + case .portraitUpsideDown: + connection.videoOrientation = .portraitUpsideDown + case .landscapeRight: + connection.videoOrientation = .landscapeLeft + case .landscapeLeft: + connection.videoOrientation = .landscapeRight + default: + return + } + self.previewLayer?.connection?.videoOrientation = connection.videoOrientation + } + +} + +// Extension to handle AVCaptureVideoDataOutputSampleBufferDelegate events. +extension VideoCapture: AVCaptureVideoDataOutputSampleBufferDelegate { + public func captureOutput( + _ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, + from connection: AVCaptureConnection + ) { + delegate?.videoCapture(self, didCaptureVideoFrame: sampleBuffer) + } + + public func captureOutput( + _ output: AVCaptureOutput, didDrop sampleBuffer: CMSampleBuffer, + from connection: AVCaptureConnection + ) { + // Optionally handle dropped frames, e.g., due to full buffer. + } } diff --git a/YOLO/ViewController.swift b/YOLO/ViewController.swift index 9ab212e..10886d4 100644 --- a/YOLO/ViewController.swift +++ b/YOLO/ViewController.swift @@ -12,578 +12,915 @@ // the device's camera. import AVFoundation -import CoreMedia import CoreML +import CoreMedia import UIKit import Vision var mlModel = try! yolov8m(configuration: .init()).model +enum Task { + case detect + case human +} class ViewController: UIViewController { - @IBOutlet var videoPreview: UIView! - @IBOutlet var View0: UIView! - @IBOutlet var segmentedControl: UISegmentedControl! - @IBOutlet var playButtonOutlet: UIBarButtonItem! - @IBOutlet var pauseButtonOutlet: UIBarButtonItem! - @IBOutlet var slider: UISlider! - @IBOutlet var sliderConf: UISlider! - @IBOutlet var sliderIoU: UISlider! - @IBOutlet weak var labelName: UILabel! - @IBOutlet weak var labelFPS: UILabel! - @IBOutlet weak var labelZoom: UILabel! - @IBOutlet weak var labelVersion: UILabel! - @IBOutlet weak var labelSlider: UILabel! - @IBOutlet weak var labelSliderConf: UILabel! - @IBOutlet weak var labelSliderIoU: UILabel! - @IBOutlet weak var activityIndicator: UIActivityIndicatorView! - - let selection = UISelectionFeedbackGenerator() - var detector = try! VNCoreMLModel(for: mlModel) - var session: AVCaptureSession! - var videoCapture: VideoCapture! - var currentBuffer: CVPixelBuffer? - var framesDone = 0 - var t0 = 0.0 // inference start - var t1 = 0.0 // inference dt - var t2 = 0.0 // inference dt smoothed - var t3 = CACurrentMediaTime() // FPS start - var t4 = 0.0 // FPS dt smoothed - // var cameraOutput: AVCapturePhotoOutput! - - // Developer mode - let developerMode = UserDefaults.standard.bool(forKey: "developer_mode") // developer mode selected in settings - let save_detections = false // write every detection to detections.txt - let save_frames = false // write every frame to frames.txt - - lazy var visionRequest: VNCoreMLRequest = { - let request = VNCoreMLRequest(model: detector, completionHandler: { - [weak self] request, error in - self?.processObservations(for: request, error: error) - }) - // NOTE: BoundingBoxView object scaling depends on request.imageCropAndScaleOption https://developer.apple.com/documentation/vision/vnimagecropandscaleoption - request.imageCropAndScaleOption = .scaleFill // .scaleFit, .scaleFill, .centerCrop - return request - }() - - override func viewDidLoad() { - super.viewDidLoad() - slider.value = 30 - setLabels() - setUpBoundingBoxViews() - startVideo() - // setModel() + @IBOutlet var videoPreview: UIView! + @IBOutlet var View0: UIView! + @IBOutlet var segmentedControl: UISegmentedControl! + @IBOutlet weak var taskSegmentControl: UISegmentedControl! + @IBOutlet weak var trackingLabel: UILabel! + @IBOutlet weak var trackingSwitch: UISwitch! + @IBOutlet var playButtonOutlet: UIBarButtonItem! + @IBOutlet var pauseButtonOutlet: UIBarButtonItem! + @IBOutlet var slider: UISlider! + @IBOutlet var sliderConf: UISlider! + @IBOutlet weak var sliderConfLandScape: UISlider! + @IBOutlet var sliderIoU: UISlider! + @IBOutlet weak var sliderIoULandScape: UISlider! + @IBOutlet weak var labelName: UILabel! + @IBOutlet weak var labelFPS: UILabel! + @IBOutlet weak var labelZoom: UILabel! + @IBOutlet weak var labelVersion: UILabel! + @IBOutlet weak var labelSlider: UILabel! + @IBOutlet weak var labelSliderConf: UILabel! + @IBOutlet weak var labelSliderConfLandScape: UILabel! + @IBOutlet weak var labelSliderIoU: UILabel! + @IBOutlet weak var labelSliderIoULandScape: UILabel! + @IBOutlet weak var activityIndicator: UIActivityIndicatorView! + + @IBOutlet weak var forcus: UIImageView! + + @IBOutlet weak var toolBar: UIToolbar! + + @IBOutlet weak var saveDataButton: UIBarButtonItem! + + let selection = UISelectionFeedbackGenerator() + var detector = try! VNCoreMLModel(for: mlModel) + var session: AVCaptureSession! + var videoCapture: VideoCapture! + var currentBuffer: CVPixelBuffer? + var framesDone = 0 + var t0 = 0.0 // inference start + var t1 = 0.0 // inference dt + var t2 = 0.0 // inference dt smoothed + var t3 = CACurrentMediaTime() // FPS start + var t4 = 0.0 // FPS dt smoothed + // var cameraOutput: AVCapturePhotoOutput! + + // Developer mode + var developerMode = UserDefaults.standard.bool(forKey: "developer_mode") // developer mode selected in settings + var save_detections = false // write every detection to detections.txt + let save_frames = false // write every frame to frames.txt + var save_strings: [String] = [] + let saveQueue = DispatchQueue(label: "com.ultralytics.saveQueue") + + lazy var visionRequest: VNCoreMLRequest = { + let request = VNCoreMLRequest( + model: detector, + completionHandler: { + [weak self] request, error in + self?.processObservations(for: request, error: error) + }) + // NOTE: BoundingBoxView object scaling depends on request.imageCropAndScaleOption https://developer.apple.com/documentation/vision/vnimagecropandscaleoption + request.imageCropAndScaleOption = .scaleFill // .scaleFit, .scaleFill, .centerCrop + return request + }() + + var task: Task = .detect + var confidenceThreshold: Float = 0.25 + var iouThreshold: Float = 0.4 + var tracking = false + var tracker = TrackingModel() + + override func viewDidLoad() { + super.viewDidLoad() + NotificationCenter.default.addObserver( + self, selector: #selector(updateDeveloperMode), name: .settingsChanged, object: nil) + slider.value = 30 + taskSegmentControl.selectedSegmentIndex = 0 + setLabels() + setUpBoundingBoxViews() + setUpOrientationChangeNotification() + startVideo() + // setModel() + } + + @objc func updateDeveloperMode() { + let userDefaults = UserDefaults.standard + developerMode = userDefaults.bool(forKey: "developer_mode") + if !developerMode { + save_detections = false + saveDataButton.isEnabled = false + saveDataButton.tintColor = UIColor.clear + } else { + saveDataButton.isEnabled = true + saveDataButton.tintColor = nil } - - @IBAction func vibrate(_ sender: Any) { - selection.selectionChanged() + } + + override func viewWillDisappear(_ animated: Bool) { + super.viewWillDisappear(animated) + if developerMode { + save_strings = [] + save_detections = false + saveDataButton.isEnabled = true + saveDataButton.tintColor = nil } - - @IBAction func indexChanged(_ sender: Any) { - selection.selectionChanged() - activityIndicator.startAnimating() - - /// Switch model - switch segmentedControl.selectedSegmentIndex { - case 0: - self.labelName.text = "YOLOv8n" - mlModel = try! yolov8n(configuration: .init()).model - case 1: - self.labelName.text = "YOLOv8s" - mlModel = try! yolov8s(configuration: .init()).model - case 2: - self.labelName.text = "YOLOv8m" - mlModel = try! yolov8m(configuration: .init()).model - case 3: - self.labelName.text = "YOLOv8l" - mlModel = try! yolov8l(configuration: .init()).model - case 4: - self.labelName.text = "YOLOv8x" - mlModel = try! yolov8x(configuration: .init()).model - default: - break - } - setModel() - setUpBoundingBoxViews() - activityIndicator.stopAnimating() + } + + deinit { + NotificationCenter.default.removeObserver(self, name: .settingsChanged, object: nil) + } + + override func viewWillTransition( + to size: CGSize, with coordinator: any UIViewControllerTransitionCoordinator + ) { + super.viewWillTransition(to: size, with: coordinator) + + if size.width > size.height { + labelSliderConf.isHidden = true + sliderConf.isHidden = true + labelSliderIoU.isHidden = true + sliderIoU.isHidden = true + toolBar.setBackgroundImage(UIImage(), forToolbarPosition: .any, barMetrics: .default) + toolBar.setShadowImage(UIImage(), forToolbarPosition: .any) + + labelSliderConfLandScape.isHidden = false + sliderConfLandScape.isHidden = false + labelSliderIoULandScape.isHidden = false + sliderIoULandScape.isHidden = false + + } else { + labelSliderConf.isHidden = false + sliderConf.isHidden = false + labelSliderIoU.isHidden = false + sliderIoU.isHidden = false + toolBar.setBackgroundImage(nil, forToolbarPosition: .any, barMetrics: .default) + toolBar.setShadowImage(nil, forToolbarPosition: .any) + + labelSliderConfLandScape.isHidden = true + sliderConfLandScape.isHidden = true + labelSliderIoULandScape.isHidden = true + sliderIoULandScape.isHidden = true } + self.videoCapture.previewLayer?.frame = CGRect( + x: 0, y: 0, width: size.width, height: size.height) + + } + + private func setUpOrientationChangeNotification() { + NotificationCenter.default.addObserver( + self, selector: #selector(orientationDidChange), + name: UIDevice.orientationDidChangeNotification, object: nil) + } + + @objc func orientationDidChange() { + videoCapture.updateVideoOrientation() + + } + + @IBAction func vibrate(_ sender: Any) { + selection.selectionChanged() + } + + @IBAction func indexChanged(_ sender: Any) { + selection.selectionChanged() + activityIndicator.startAnimating() + setModel() + setUpBoundingBoxViews() + activityIndicator.stopAnimating() + } + + func setModel() { + + /// Switch model + switch task { + case .detect: + switch segmentedControl.selectedSegmentIndex { + case 0: + self.labelName.text = "YOLOv8n" + mlModel = try! yolov8n(configuration: .init()).model + case 1: + self.labelName.text = "YOLOv8s" + mlModel = try! yolov8s(configuration: .init()).model + case 2: + self.labelName.text = "YOLOv8m" + mlModel = try! yolov8m(configuration: .init()).model + case 3: + self.labelName.text = "YOLOv8l" + mlModel = try! yolov8l(configuration: .init()).model + case 4: + self.labelName.text = "YOLOv8x" + mlModel = try! yolov8x(configuration: .init()).model + default: + break + } + case .human: + switch segmentedControl.selectedSegmentIndex { + case 0: + self.labelName.text = "YOLOv8n" + if #available(iOS 15.0, *) { + mlModel = try! yolov8n_human(configuration: .init()).model + } else { + // Fallback on earlier versions + } + case 1: + self.labelName.text = "YOLOv8s" + if #available(iOS 15.0, *) { + mlModel = try! yolov8s_human(configuration: .init()).model + } else { + // Fallback on earlier versions + } + case 2: + self.labelName.text = "YOLOv8m" + if #available(iOS 15.0, *) { + mlModel = try! yolov8m_human(configuration: .init()).model + } else { + // Fallback on earlier versions + } + case 3: + self.labelName.text = "YOLOv8l" + if #available(iOS 15.0, *) { + mlModel = try! yolov8l_human(configuration: .init()).model + } else { + // Fallback on earlier versions + } + case 4: + self.labelName.text = "YOLOv8x" + if #available(iOS 15.0, *) { + mlModel = try! yolov8x_human(configuration: .init()).model + } else { + // Fallback on earlier versions + } + default: + break + } - func setModel() { - /// VNCoreMLModel - detector = try! VNCoreMLModel(for: mlModel) - detector.featureProvider = ThresholdProvider() - - /// VNCoreMLRequest - let request = VNCoreMLRequest(model: detector, completionHandler: { [weak self] request, error in - self?.processObservations(for: request, error: error) - }) - request.imageCropAndScaleOption = .scaleFill // .scaleFit, .scaleFill, .centerCrop - visionRequest = request - t2 = 0.0 // inference dt smoothed - t3 = CACurrentMediaTime() // FPS start - t4 = 0.0 // FPS dt smoothed } + DispatchQueue.global(qos: .userInitiated).async { [self] in - /// Update thresholds from slider values - @IBAction func sliderChanged(_ sender: Any) { - let conf = Double(round(100 * sliderConf.value)) / 100 - let iou = Double(round(100 * sliderIoU.value)) / 100 - self.labelSliderConf.text = String(conf) + " Confidence Threshold" - self.labelSliderIoU.text = String(iou) + " IoU Threshold" - detector.featureProvider = ThresholdProvider(iouThreshold: iou, confidenceThreshold: conf) - } + /// VNCoreMLModel + detector = try! VNCoreMLModel(for: mlModel) + detector.featureProvider = ThresholdProvider() - @IBAction func takePhoto(_ sender: Any?) { - let t0 = DispatchTime.now().uptimeNanoseconds - - // 1. captureSession and cameraOutput - // session = videoCapture.captureSession // session = AVCaptureSession() - // session.sessionPreset = AVCaptureSession.Preset.photo - // cameraOutput = AVCapturePhotoOutput() - // cameraOutput.isHighResolutionCaptureEnabled = true - // cameraOutput.isDualCameraDualPhotoDeliveryEnabled = true - // print("1 Done: ", Double(DispatchTime.now().uptimeNanoseconds - t0) / 1E9) - - // 2. Settings - let settings = AVCapturePhotoSettings() - // settings.flashMode = .off - // settings.isHighResolutionPhotoEnabled = cameraOutput.isHighResolutionCaptureEnabled - // settings.isDualCameraDualPhotoDeliveryEnabled = self.videoCapture.cameraOutput.isDualCameraDualPhotoDeliveryEnabled - - // 3. Capture Photo - usleep(20_000) // short 10 ms delay to allow camera to focus - self.videoCapture.cameraOutput.capturePhoto(with: settings, delegate: self as AVCapturePhotoCaptureDelegate) - print("3 Done: ", Double(DispatchTime.now().uptimeNanoseconds - t0) / 1E9) + /// VNCoreMLRequest + let request = VNCoreMLRequest( + model: detector, + completionHandler: { [weak self] request, error in + self?.processObservations(for: request, error: error) + }) + request.imageCropAndScaleOption = .scaleFill // .scaleFit, .scaleFill, .centerCrop + visionRequest = request + t2 = 0.0 // inference dt smoothed + t3 = CACurrentMediaTime() // FPS start + t4 = 0.0 // FPS dt smoothed } - - @IBAction func logoButton(_ sender: Any) { - selection.selectionChanged() - if let link = URL(string: "https://www.ultralytics.com") { - UIApplication.shared.open(link) + } + + /// Update thresholds from slider values + @IBAction func sliderChanged(_ sender: Any) { + self.confidenceThreshold = sliderConf.value + self.iouThreshold = sliderIoU.value + let conf = Double(round(100 * sliderConf.value)) / 100 + let iou = Double(round(100 * sliderIoU.value)) / 100 + self.labelSliderConf.text = String(conf) + " Confidence Threshold" + self.labelSliderIoU.text = String(iou) + " IoU Threshold" + detector.featureProvider = ThresholdProvider(iouThreshold: iou, confidenceThreshold: conf) + } + + @IBAction func taskSegmentControlChanged(_ sender: UISegmentedControl) { + save_strings.removeAll() + saveDataButton.tintColor = nil + save_detections = false + + switch sender.selectedSegmentIndex { + case 0: + if self.task != .detect { + self.trackingLabel.isHidden = true + self.trackingSwitch.isHidden = true + self.task = .detect + self.setModel() + } + case 1: + if self.task != .human { + self.task = .human + for i in 0.. Double { + let fileURL = URL(fileURLWithPath: NSHomeDirectory() as String) + do { + let values = try fileURL.resourceValues(forKeys: [ + .volumeAvailableCapacityForImportantUsageKey + ]) + return Double(values.volumeAvailableCapacityForImportantUsage!) / 1E9 // Bytes to GB + } catch { + print("Error retrieving storage capacity: \(error.localizedDescription)") + } + return 0 + } + + // Return RAM usage (GB) + func memoryUsage() -> Double { + var taskInfo = mach_task_basic_info() + var count = mach_msg_type_number_t(MemoryLayout.size) / 4 + let kerr: kern_return_t = withUnsafeMutablePointer(to: &taskInfo) { + $0.withMemoryRebound(to: integer_t.self, capacity: 1) { + task_info(mach_task_self_, task_flavor_t(MACH_TASK_BASIC_INFO), $0, &count) + } } + if kerr == KERN_SUCCESS { + return Double(taskInfo.resident_size) / 1E9 // Bytes to GB + } else { + return 0 + } + } - // Save text file - func saveText(text: String, file: String = "saved.txt") { - if let dir = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask).first { - let fileURL = dir.appendingPathComponent(file) - - // Writing - do { // Append to file if it exists - let fileHandle = try FileHandle(forWritingTo: fileURL) - fileHandle.seekToEndOfFile() - fileHandle.write(text.data(using: .utf8)!) - fileHandle.closeFile() - } catch { // Create new file and write - do { - try text.write(to: fileURL, atomically: false, encoding: .utf8) - } catch { - print("no file written") - } - } + func show(predictions: [VNRecognizedObjectObservation], persons: [Person]) { + let width = videoPreview.bounds.width + let height = videoPreview.bounds.height + var str = "" - // Reading - // do {let text2 = try String(contentsOf: fileURL, encoding: .utf8)} catch {/* error handling here */} - } - } + var ratio: CGFloat = 1.0 - // Save image file - func saveImage() { - let dir = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask).first - let fileURL = dir!.appendingPathComponent("saved.jpg") - let image = UIImage(named: "ultralytics_yolo_logotype.png") - FileManager.default.createFile(atPath: fileURL.path, contents: image!.jpegData(compressionQuality: 0.5), attributes: nil) + if videoCapture.captureSession.sessionPreset == .photo { + ratio = (height / width) / (4.0 / 3.0) + } else { + ratio = (height / width) / (16.0 / 9.0) } - // Return hard drive space (GB) - func freeSpace() -> Double { - let fileURL = URL(fileURLWithPath: NSHomeDirectory() as String) - do { - let values = try fileURL.resourceValues(forKeys: [.volumeAvailableCapacityForImportantUsageKey]) - return Double(values.volumeAvailableCapacityForImportantUsage!) / 1E9 // Bytes to GB - } catch { - print("Error retrieving storage capacity: \(error.localizedDescription)") - } - return 0 + let date = Date() + let calendar = Calendar.current + let hour = calendar.component(.hour, from: date) + let minutes = calendar.component(.minute, from: date) + let seconds = calendar.component(.second, from: date) + let nanoseconds = calendar.component(.nanosecond, from: date) + let sec_day = + Double(hour) * 3600.0 + Double(minutes) * 60.0 + Double(seconds) + Double(nanoseconds) / 1E9 + + var resultCount = 0 + + switch task { + case .detect: + resultCount = predictions.count + case .human: + resultCount = persons.count } + self.labelSlider.text = String(resultCount) + " items (max " + String(Int(slider.value)) + ")" + for i in 0.. Double { - var taskInfo = mach_task_basic_info() - var count = mach_msg_type_number_t(MemoryLayout.size) / 4 - let kerr: kern_return_t = withUnsafeMutablePointer(to: &taskInfo) { - $0.withMemoryRebound(to: integer_t.self, capacity: 1) { - task_info(mach_task_self_, task_flavor_t(MACH_TASK_BASIC_INFO), $0, &count) - } } - if kerr == KERN_SUCCESS { - return Double(taskInfo.resident_size) / 1E9 // Bytes to GB - } else { - return 0 + var displayRect = rect + switch UIDevice.current.orientation { + case .portraitUpsideDown: + displayRect = CGRect( + x: 1.0 - rect.origin.x - rect.width, + y: 1.0 - rect.origin.y - rect.height, + width: rect.width, + height: rect.height) + case .landscapeLeft: + displayRect = CGRect( + x: rect.origin.x, + y: rect.origin.y, + width: rect.width, + height: rect.height) + case .landscapeRight: + displayRect = CGRect( + x: rect.origin.x, + y: rect.origin.y, + width: rect.width, + height: rect.height) + case .unknown: + print("The device orientation is unknown, the predictions may be affected") + fallthrough + default: break } - } - - func show(predictions: [VNRecognizedObjectObservation]) { - let width = videoPreview.bounds.width // 375 pix - let height = videoPreview.bounds.height // 812 pix - var str = "" - - // ratio = videoPreview AR divided by sessionPreset AR - var ratio: CGFloat = 1.0 - if videoCapture.captureSession.sessionPreset == .photo { - ratio = (height / width) / (4.0 / 3.0) // .photo + if ratio >= 1 { + let offset = (1 - ratio) * (0.5 - displayRect.minX) + if task == .detect { + let transform = CGAffineTransform(scaleX: 1, y: -1).translatedBy(x: offset, y: -1) + displayRect = displayRect.applying(transform) + } else { + let transform = CGAffineTransform(translationX: offset, y: 0) + displayRect = displayRect.applying(transform) + } + displayRect.size.width *= ratio } else { - ratio = (height / width) / (16.0 / 9.0) // .hd4K3840x2160, .hd1920x1080, .hd1280x720 etc. + if task == .detect { + let offset = (ratio - 1) * (0.5 - displayRect.maxY) + + let transform = CGAffineTransform(scaleX: 1, y: -1).translatedBy(x: 0, y: offset - 1) + displayRect = displayRect.applying(transform) + } else { + let offset = (ratio - 1) * (0.5 - displayRect.minY) + let transform = CGAffineTransform(translationX: 0, y: offset) + displayRect = displayRect.applying(transform) + } + ratio = (height / width) / (3.0 / 4.0) + displayRect.size.height /= ratio } + displayRect = VNImageRectForNormalizedRect(displayRect, Int(width), Int(height)) - // date - let date = Date() - let calendar = Calendar.current - let hour = calendar.component(.hour, from: date) - let minutes = calendar.component(.minute, from: date) - let seconds = calendar.component(.second, from: date) - let nanoseconds = calendar.component(.nanosecond, from: date) - let sec_day = Double(hour) * 3600.0 + Double(minutes) * 60.0 + Double(seconds) + Double(nanoseconds) / 1E9 // seconds in the day - - self.labelSlider.text = String(predictions.count) + " items (max " + String(Int(slider.value)) + ")" - for i in 0..= 1 { // iPhone ratio = 1.218 - let offset = (1 - ratio) * (0.5 - rect.minX) - let transform = CGAffineTransform(scaleX: 1, y: -1).translatedBy(x: offset, y: -1) - rect = rect.applying(transform) - rect.size.width *= ratio - } else { // iPad ratio = 0.75 - let offset = (ratio - 1) * (0.5 - rect.maxY) - let transform = CGAffineTransform(scaleX: 1, y: -1).translatedBy(x: 0, y: offset - 1) - rect = rect.applying(transform) - rect.size.height /= ratio - } - - // Scale normalized to pixels [375, 812] [width, height] - rect = VNImageRectForNormalizedRect(rect, Int(width), Int(height)) - - // The labels array is a list of VNClassificationObservation objects, - // with the highest scoring class first in the list. - let bestClass = prediction.labels[0].identifier - let confidence = prediction.labels[0].confidence - // print(confidence, rect) // debug (confidence, xywh) with xywh origin top left (pixels) - - // Show the bounding box. - boundingBoxViews[i].show(frame: rect, - label: String(format: "%@ %.1f", bestClass, confidence * 100), - color: colors[bestClass] ?? UIColor.white, - alpha: CGFloat((confidence - 0.2) / (1.0 - 0.2) * 0.9)) // alpha 0 (transparent) to 1 (opaque) for conf threshold 0.2 to 1.0) - - if developerMode { - // Write - if save_detections { - str += String(format: "%.3f %.3f %.3f %@ %.2f %.1f %.1f %.1f %.1f\n", - sec_day, freeSpace(), UIDevice.current.batteryLevel, bestClass, confidence, - rect.origin.x, rect.origin.y, rect.size.width, rect.size.height) - } - - // Action trigger upon detection - // if false { - // if (bestClass == "car") { // "cell phone", "car", "person" - // self.takePhoto(nil) - // // self.pauseButton(nil) - // sleep(2) - // } - // } - } - } else { - boundingBoxViews[i].hide() - } - } - - // Write if developerMode { - if save_detections { - saveText(text: str, file: "detections.txt") // Write stats for each detection - } - if save_frames { - str = String(format: "%.3f %.3f %.3f %.3f %.1f %.1f %.1f\n", - sec_day, freeSpace(), memoryUsage(), UIDevice.current.batteryLevel, - self.t1 * 1000, self.t2 * 1000, 1 / self.t4) - saveText(text: str, file: "frames.txt") // Write stats for each image + if save_detections { + saveQueue.async { [self] in + guard save_detections else { return } + var str = "" + switch task { + case .detect: + str += String( + format: "%.3f,%.3f,%.3f,%@, %.2f,%.3f,%.3f,%.3f,%.3f\n", + sec_day, freeSpace(), UIDevice.current.batteryLevel, bestClass, confidence, + rect.origin.x, rect.origin.y, rect.size.width, rect.size.height) + case .human: + let person = persons[i] + var id = "" + if person.index == -1 { + id = "-" + } else { + id = String(person.index) + } + str += String( + format: + "%.3f,%.3f,%.3f,%@, %.2f,%.3f,%.3f,%.3f,%.3f,%.2f,%.2f,%d,%@,%.2f,%@,%.2f\n", + sec_day, freeSpace(), UIDevice.current.batteryLevel, id, confidence, + rect.origin.x, rect.origin.y, rect.size.width, rect.size.height, person.weight, + person.height, person.age, person.gender, person.genderConfidence, person.race, + person.raceConfidence) + } + + save_strings.append(str) } + } } - // Debug - // print(str) - // print(UIDevice.current.identifierForVendor!) - // saveImage() + } else { + boundingBoxViews[i].hide() + } } + } - // Pinch to Zoom Start --------------------------------------------------------------------------------------------- - let minimumZoom: CGFloat = 1.0 - let maximumZoom: CGFloat = 10.0 - var lastZoomFactor: CGFloat = 1.0 + // Pinch to Zoom Start --------------------------------------------------------------------------------------------- + let minimumZoom: CGFloat = 1.0 + let maximumZoom: CGFloat = 10.0 + var lastZoomFactor: CGFloat = 1.0 - @IBAction func pinch(_ pinch: UIPinchGestureRecognizer) { - let device = videoCapture.captureDevice + @IBAction func pinch(_ pinch: UIPinchGestureRecognizer) { + let device = videoCapture.captureDevice - // Return zoom value between the minimum and maximum zoom values - func minMaxZoom(_ factor: CGFloat) -> CGFloat { - return min(min(max(factor, minimumZoom), maximumZoom), device.activeFormat.videoMaxZoomFactor) - } + // Return zoom value between the minimum and maximum zoom values + func minMaxZoom(_ factor: CGFloat) -> CGFloat { + return min(min(max(factor, minimumZoom), maximumZoom), device.activeFormat.videoMaxZoomFactor) + } - func update(scale factor: CGFloat) { - do { - try device.lockForConfiguration() - defer { - device.unlockForConfiguration() - } - device.videoZoomFactor = factor - } catch { - print("\(error.localizedDescription)") - } + func update(scale factor: CGFloat) { + do { + try device.lockForConfiguration() + defer { + device.unlockForConfiguration() } + device.videoZoomFactor = factor + } catch { + print("\(error.localizedDescription)") + } + } - let newScaleFactor = minMaxZoom(pinch.scale * lastZoomFactor) - switch pinch.state { - case .began: fallthrough - case .changed: - update(scale: newScaleFactor) - self.labelZoom.text = String(format: "%.2fx", newScaleFactor) - self.labelZoom.font = UIFont.preferredFont(forTextStyle: .title2) - case .ended: - lastZoomFactor = minMaxZoom(newScaleFactor) - update(scale: lastZoomFactor) - self.labelZoom.font = UIFont.preferredFont(forTextStyle: .body) - default: break - } - } // Pinch to Zoom Start ------------------------------------------------------------------------------------------ + let newScaleFactor = minMaxZoom(pinch.scale * lastZoomFactor) + switch pinch.state { + case .began, .changed: + update(scale: newScaleFactor) + self.labelZoom.text = String(format: "%.2fx", newScaleFactor) + self.labelZoom.font = UIFont.preferredFont(forTextStyle: .title2) + case .ended: + lastZoomFactor = minMaxZoom(newScaleFactor) + update(scale: lastZoomFactor) + self.labelZoom.font = UIFont.preferredFont(forTextStyle: .body) + default: break + } + } // Pinch to Zoom Start + + // ------------------------------------------------------------------------------------------ } // ViewController class End extension ViewController: VideoCaptureDelegate { - func videoCapture(_ capture: VideoCapture, didCaptureVideoFrame sampleBuffer: CMSampleBuffer) { - predict(sampleBuffer: sampleBuffer) - } + func videoCapture(_ capture: VideoCapture, didCaptureVideoFrame sampleBuffer: CMSampleBuffer) { + predict(sampleBuffer: sampleBuffer) + } } // Programmatically save image extension ViewController: AVCapturePhotoCaptureDelegate { - func photoOutput(_ output: AVCapturePhotoOutput, didFinishProcessingPhoto photo: AVCapturePhoto, error: Error?) { - if let error = error { - print("error occurred : \(error.localizedDescription)") - } - if let dataImage = photo.fileDataRepresentation() { - let dataProvider = CGDataProvider(data: dataImage as CFData) - let cgImageRef: CGImage! = CGImage(jpegDataProviderSource: dataProvider!, decode: nil, shouldInterpolate: true, intent: .defaultIntent) - let image = UIImage(cgImage: cgImageRef, scale: 0.5, orientation: UIImage.Orientation.right) - let imageView = UIImageView(image: image) - imageView.contentMode = .scaleAspectFill - imageView.frame = videoPreview.frame - let imageLayer = imageView.layer - var sublayers = videoPreview.layer.sublayers ?? [] - let insertIndex = max(sublayers.count - 1, 0) - videoPreview.layer.insertSublayer(imageLayer, above: videoCapture.previewLayer) - - let bounds = UIScreen.main.bounds - UIGraphicsBeginImageContextWithOptions(bounds.size, true, 0.0) - self.View0.drawHierarchy(in: bounds, afterScreenUpdates: true) - let img = UIGraphicsGetImageFromCurrentImageContext() - UIGraphicsEndImageContext() - imageLayer.removeFromSuperlayer() - let activityViewController = UIActivityViewController(activityItems: [img!], applicationActivities: nil) - activityViewController.popoverPresentationController?.sourceView = self.View0 - self.present(activityViewController, animated: true, completion: nil) -// -// // Save to camera roll -// UIImageWriteToSavedPhotosAlbum(img!, nil, nil, nil); - } else { - print("AVCapturePhotoCaptureDelegate Error") - } + func photoOutput( + _ output: AVCapturePhotoOutput, didFinishProcessingPhoto photo: AVCapturePhoto, error: Error? + ) { + if let error = error { + print("error occurred : \(error.localizedDescription)") + } + if let dataImage = photo.fileDataRepresentation() { + let dataProvider = CGDataProvider(data: dataImage as CFData) + let cgImageRef: CGImage! = CGImage( + jpegDataProviderSource: dataProvider!, decode: nil, shouldInterpolate: true, + intent: .defaultIntent) + var orientation = CGImagePropertyOrientation.right + switch UIDevice.current.orientation { + case .landscapeLeft: + orientation = .up + case .landscapeRight: + orientation = .down + default: + break + } + var image = UIImage(cgImage: cgImageRef, scale: 0.5, orientation: .right) + if let orientedCIImage = CIImage(image: image)?.oriented(orientation), + let cgImage = CIContext().createCGImage(orientedCIImage, from: orientedCIImage.extent) + { + image = UIImage(cgImage: cgImage) + + } + let imageView = UIImageView(image: image) + imageView.contentMode = .scaleAspectFill + imageView.frame = videoPreview.frame + let imageLayer = imageView.layer + var sublayers = videoPreview.layer.sublayers ?? [] + let insertIndex = max(sublayers.count - 1, 0) + videoPreview.layer.insertSublayer(imageLayer, above: videoCapture.previewLayer) + + let bounds = UIScreen.main.bounds + UIGraphicsBeginImageContextWithOptions(bounds.size, true, 0.0) + self.View0.drawHierarchy(in: bounds, afterScreenUpdates: true) + let img = UIGraphicsGetImageFromCurrentImageContext() + UIGraphicsEndImageContext() + imageLayer.removeFromSuperlayer() + let activityViewController = UIActivityViewController( + activityItems: [img!], applicationActivities: nil) + activityViewController.popoverPresentationController?.sourceView = self.View0 + self.present(activityViewController, animated: true, completion: nil) + // + // // Save to camera roll + // UIImageWriteToSavedPhotosAlbum(img!, nil, nil, nil); + } else { + print("AVCapturePhotoCaptureDelegate Error") } + } } - - -