First commit (migration from internal repo)

SalesforceLabs · Apr 20, 2020 · 9ff302a · 9ff302a
1 parent 8ae4958
commit 9ff302a
Show file tree

Hide file tree

Showing 338 changed files with 67,167 additions and 1 deletion.
diff --git a/.forceignore b/.forceignore
@@ -0,0 +1,21 @@
+# List files or directories below to ignore them when running force:source:push, force:source:pull, and force:source:status
+# More information: https://developer.salesforce.com/docs/atlas.en-us.sfdx_dev.meta/sfdx_dev/sfdx_dev_exclude_source.htm
+#
+
+package.xml
+
+# LWC configuration files
+**/jsconfig.json
+**/.eslintrc.json
+
+# LWC Jest
+**/__tests__/**
+
+# Profile changes
+force-app/main/default/profiles/Admin.profile-meta.xml
+force-app/main/default/profiles/Custom%3A Marketing Profile.profile-meta.xml
+force-app/main/default/profiles/Custom%3A Sales Profile.profile-meta.xml
+force-app/main/default/profiles/Custom%3A Support Profile.profile-meta.xml
+
+# Workaround for "Load of metadata from db failed for metadata of type:AppMenu and file name:AppSwitcher"
+AppSwitcher.appMenu
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,9 @@
+# Ignore SFDX object definitions and other settings
+.sfdx
+.DS_Store
+.project
+.salesforce
+.settings
+node_modules
+.idea
+mdapi-source
diff --git a/LICENSE.txt b/LICENSE.txt
@@ -0,0 +1,12 @@
+Copyright (c) 2020, Salesforce.com, Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
+
+* Neither the name of Salesforce.com nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/README.md b/README.md
@@ -1 +1,97 @@
-# ClusterAnalysis
+# SFDC Cluster Analysis Package
+Performs [cluster analysis](https://en.wikipedia.org/wiki/Cluster_analysis) on Salesforce standard and custom objects, breaks records into groups (clusters) using K-Means and K-Medoids (CLARA) algorithms.<br/>
+Supports clustering objects with mixed data types (numeric, category/picklist, text) using Gower distance function.<br/>
+Visualizes the clustering result using t-SNE dimensionality reduction technique.<br/>
+Click [here](../../wiki/Cluster-Analysis-in-Salesforce) to get more information about the methodology and algorithms used in this app.
+
+## Installation
+Install the application from Salesforce AppExchange
+
+## Dev, Build and Test
+
+### Create a scratch org
+```
+sfdx force:org:create -f ./config/project-scratch-def.json -a ScratchOrgAlias --durationdays 30
+sfdx force:config:set defaultusername=<user name returned from the previous command>
+```
+
+### Push the source code to the scratch org
+```
+sfdx force:org:push
+```
+
+### Create sample lead records
+```
+sfdx force:data:bulk:upsert -f force-app/main/default/staticresources/ClustanLeadsMock.csv -s Lead -i Email
+```
+
+### Run Apex tests
+```
+sfdx force:apex:test:run
+```
+
+### Authorise an org
+```
+sfdx force:auth:web:login --setalias OrgAlias
+```
+
+### Deploy to an org
+The app also works if deployed to an org without a namespace. However I recommend using a managed package installation
+```
+sfdx force:source:deploy --checkonly --sourcepath force-app --targetusername OrgAlias --testlevel RunLocalTests
+sfdx force:source:deploy --sourcepath force-app --targetusername OrgAlias --testlevel NoTestRun
+```
+
+### Create a managed package
+```
+sfdx force:package:create --name "Cluster Analysis" --path force-app --packagetype Managed -d "Group records from any object into clusters and visualize the result using machine learning algorithms"
+```
+
+### Create and promote a package version
+```
+sfdx force:package:version:create --package "Cluster Analysis" --wait 10 --installationkeybypass --codecoverage
+sfdx force:package:version:promote --package "Cluster [email protected]"
+```
+
+
+## Description of Files and Directories
+* **sfdx-project.json**: Required by Salesforce DX. Configures your project.  Use this file to specify the parameters that affect your Salesforce development project.
+* **config/project-scratch-def.json**: Sample file that shows how to define the shape of a scratch org.  You reference this file when you create your scratch org with the force:org:create command.   
+* **force-app**: Directory that contains the source for the Cluster Analysis package and tests.
+* **force-app/main/default**: Directory that contains the app source and shared classes.
+* **force-app/main/algorithms**: Directory that contains algorithm classes.
+* **force-app/main/utils**: Directory that contains utility classes.
+* **force-app/main/test**: Directory that contains Apex test classes.
+* **.project**:  Required by the Eclipse IDE.  Describes the Eclipse project. 
+* **.gitignore**:  Optional Git file. Specifies intentionally untracked files that you want Git (or in this case GitHub) to ignore.
+
+## Resources
+Clustering Large Data Sets (By Leonard Kaufman, Peter J.Rousseeuw, 1986)
+
+Clustering with optimised weights for Gower’s metric (By Jeroen van den Hoven)
+https://beta.vu.nl/nl/Images/stageverslag-hoven_tcm235-777817.pdf
+
+Clustering on mixed type data (by Thomas Filaire)
+https://towardsdatascience.com/clustering-on-mixed-type-data-8bbd0a2569c3
+
+Visualizing Data using t-SNE (by Laurens van der Maaten)
+https://lvdmaaten.github.io/tsne/
+
+tSNEJS (Copyright Andrej Karpathy)
+https://github.com/karpathy/tsnejs
+
+Javascript SOQL parser (Copyright 2019 Austin Turner)
+https://github.com/paustint/soql-parser-js
+
+JavaScript Algorithms and Data Structures (Copyright (c) 2018 Oleksii Trekhleb)
+https://github.com/trekhleb/javascript-algorithms
+
+Data-Driven Documents (D3.js, Copyright 2010-2017 Mike Bostock)
+https://d3js.org/
+
+Building Machine Learning Systems with Apex (Presented on DF14 by Jen Wyher and Paul Battisson)
+https://www.slideshare.net/pbattisson/df14-building-machine-learning-systems-with-apex
+
+
+## Issues
+To report an bug or suggest an enhancement create an issue on "Issues" tab.
diff --git a/config/project-scratch-def.json b/config/project-scratch-def.json
@@ -0,0 +1,10 @@
+{
+  "orgName": "imukhamedgaliyev Company",
+  "edition": "Developer",
+  "features": ["DebugApex"],
+  "settings": {
+    "lightningExperienceSettings": {
+      "enableS1DesktopEnabled": true
+    }
+  }
+}
diff --git a/force-app/main/algorithms/classes/ClusterAlgorithmFactory.cls b/force-app/main/algorithms/classes/ClusterAlgorithmFactory.cls
@@ -0,0 +1,30 @@
+/*
+ * Algorithm factory - creates algorithm runners
+ *
+ * @author: Iskander Mukhamedgaliyev
+ */
+public with sharing class ClusterAlgorithmFactory {
+    public static List<UiSelectOption> getAlgorithmList() {
+        //TODO: move this to some settings class, possibly create custom setings
+        List<UiSelectOption> values = new List<UiSelectOption>();
+        values.add(new UiSelectOption(ClusterConstants.ALGORITHM_KMEDOIDS, ClusterConstants.ALGORITHM_KMEDOIDS ));
+        values.add(new UiSelectOption(ClusterConstants.ALGORITHM_KMEANS, ClusterConstants.ALGORITHM_KMEANS ));
+        return values;
+    }
+
+    public static ClusterAlgorithmRunner getRunner(ClusterModelWrapper model) {
+        return getRunner(model.algorithm);
+    }
+
+    public static ClusterAlgorithmRunner getRunner(String algorithm) {
+        if (algorithm == ClusterConstants.ALGORITHM_KMEDOIDS) {
+                return new ClusterKMedoidsPAMRunner();
+        }
+        else if (algorithm == ClusterConstants.ALGORITHM_KMEANS) {
+            return new ClusterKMeansRunner();
+        }
+        else {
+            throw new ClusterException(algorithm + ' algorithm is not supported');
+        }
+    }
+}
diff --git a/force-app/main/algorithms/classes/ClusterAlgorithmFactory.cls-meta.xml b/force-app/main/algorithms/classes/ClusterAlgorithmFactory.cls-meta.xml
@@ -0,0 +1,5 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<ApexClass xmlns="http://soap.sforce.com/2006/04/metadata">
+    <apiVersion>47.0</apiVersion>
+    <status>Active</status>
+</ApexClass>
diff --git a/force-app/main/algorithms/classes/ClusterAlgorithmParameter.cls b/force-app/main/algorithms/classes/ClusterAlgorithmParameter.cls
@@ -0,0 +1,29 @@
+/*
+ * Algorithm parameters
+ *
+ * @author: Iskander Mukhamedgaliyev
+ */
+public with sharing class ClusterAlgorithmParameter {
+    public enum ParameterType { STRING_PARAMETER, INTEGER_PARAMETER, DECIMAL_PARAMETER, BOOLEAN_PARAMETER, SELECT_PARAMETER }
+
+    @AuraEnabled
+    public String name;
+    @AuraEnabled
+    public String description;
+    @AuraEnabled
+    public String type;
+    public ParameterType typeEnum;
+    @AuraEnabled
+    public Object value;
+    @AuraEnabled
+    public String[] options;
+    public ClusterAlgorithmParameter() { }
+    public ClusterAlgorithmParameter(String paramName, String paramDescription, ParameterType paramType, Object paramValue, String[] paramOptions) {
+        this.name = paramName;
+        this.description = paramDescription;
+        this.typeEnum = paramType;
+        this.type = paramType.name();
+        this.value = paramValue;
+        this.options = paramOptions;
+    }
+}
diff --git a/force-app/main/algorithms/classes/ClusterAlgorithmParameter.cls-meta.xml b/force-app/main/algorithms/classes/ClusterAlgorithmParameter.cls-meta.xml
@@ -0,0 +1,5 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<ApexClass xmlns="http://soap.sforce.com/2006/04/metadata">
+    <apiVersion>47.0</apiVersion>
+    <status>Active</status>
+</ApexClass>
diff --git a/force-app/main/algorithms/classes/ClusterAlgorithmRunner.cls b/force-app/main/algorithms/classes/ClusterAlgorithmRunner.cls
@@ -0,0 +1,158 @@
+/*
+ * Base class for algorithms logic
+ *
+ * @author: Iskander Mukhamedgaliyev
+ */
+public abstract with sharing class ClusterAlgorithmRunner {
+    private static final String MSG_ERR_MAX_ITERATIONS = 'Maximum algorithm iterations reached, job was interrupted';
+    private static final String MSG_JOB_CANCELLED = 'Job was cancelled by request';
+    @TestVisible
+    private static final String MSG_ERR_OUTPUTFIELDVALIDATION = 'Cluster Result Output field name must start with \'ClusterNumber\' and should belong to the model object. Please create a new custom field which starts with \'ClusterNumber\'';
+    public class ModelValidationResult {
+        public List<String> messages;
+        public Boolean isValid;        
+    }
+    public static Logger log = LogFactory.getLogger();
+    @TestVisible
+    public static ClusterJobState jobState;
+    public List<ClusterAlgorithmStep> steps;
+    public ClusterAlgorithmRunner() {
+        this.steps = new List<ClusterAlgorithmStep>();
+    }
+
+    public virtual ClusterAlgorithmRunner.ModelValidationResult validateModel(ClusterModelWrapper model) {
+        ModelValidationResult result = new ModelValidationResult();
+        result.isValid = true;
+        result.messages = new List<String>();
+        Boolean hasIdField = false;
+        Boolean hasValueFields = false;
+        for (Integer i=0; i<model.fields.size(); i++) {
+            if (model.fields[i].distanceType == ClusterConstants.FIELDTYPE_OUTPUT) {
+                result.isValid &= model.fields[i].name.toLowerCase().startsWith('clusternumber');
+                result.isValid &= model.objectName.toLowerCase() == model.fields[i].objectName.toLowerCase();
+            }
+            else if (model.fields[i].distanceType == ClusterConstants.FIELDTYPE_NONE && model.fields[i].isIdField) {
+                hasIdField = true;
+            }
+            else if ((model.fields[i].distanceType == ClusterConstants.FIELDTYPE_CATEGORY) || 
+                (model.fields[i].distanceType == ClusterConstants.FIELDTYPE_NUMERIC) || 
+                (model.fields[i].distanceType == ClusterConstants.FIELDTYPE_TEXT)) 
+            {
+                hasValueFields = true;
+            }
+        }
+        if (!result.isValid) {
+            result.messages.add(MSG_ERR_OUTPUTFIELDVALIDATION);
+        }
+        if (!hasIdField) {
+            result.isValid = false;
+            result.messages.add('Model must have object id field');
+        }
+        if (!hasValueFields) {
+            result.isValid = false;
+            result.messages.add('Model must have at least one Numeric, Category or Text field');
+        }
+        List<String> accessErrors = ClusterAccessCheck.checkModelObjectAccessPermissions(model);
+        if (accessErrors.size() > 0) {
+            result.isValid = false;
+            result.messages.addAll(accessErrors);
+        }
+        return result;
+    }
+
+    public virtual void init(ClusterModelWrapper model) {
+        Boolean hasJobOutput = false;
+        for (Integer i=0; i<model.fields.size(); i++) {
+            if (model.fields[i].distanceType == ClusterConstants.FIELDTYPE_OUTPUT) {
+                hasJobOutput = true;
+                break;
+            }
+        }
+        if (hasJobOutput) {
+            ClusterJobOutputStep jobOutputStep = new ClusterJobOutputStep();
+            this.steps.add(jobOutputStep);
+        }
+    }
+    public abstract ClusterJobState getJobState();
+    public abstract void setJobState(ClusterJobState state);
+
+    public virtual void start() {
+        try {
+            ClusterJobState state = this.getJobState();
+            log.debug('Starting algorithm: ' + state.model.algorithm);
+            //We will CRUD permissions on ClusterJobResult__c once here and will not check check them in every step
+            ClusterAccessCheck.checkCRUDPermission(Schema.SObjectType.ClusterJobResult__c);
+            state.clusterJob = ClusterJobManager.createJob(state.model.name, state.model);
+            state.jobClusters = ClusterJobManager.createJobClusters(state.clusterJob);
+            state.totalIterationsCount = 0;
+            log.debug('Created cluster job: ' + state.clusterJob);
+            log.debug('Created ' + state.jobClusters.size() + ' job clusters');
+            log.debug('Executing algorithm steps');
+            this.runCurrentStep();
+            if (!this.steps[state.currentAlgorithmStep].isAsync()) {
+                this.processNextSteps();
+            }
+        }
+        catch (Exception ex) {
+            log.error('Exception caught in start method', ex);
+            throw ex;
+        }
+    }
+
+    public virtual void runCurrentStep(){
+        ClusterJobState state = this.getJobState();
+        log.debug('Running algorithm step ' + state.currentAlgorithmStep);
+        ClusterAlgorithmStep currentStep = this.steps[state.currentAlgorithmStep];
+        currentStep.init(this);
+        currentStep.run();
+        if (!currentStep.isAsync()) {
+            currentStep.done();
+        }
+    }
+
+    public virtual void processNextSteps() {
+        ClusterJobState state = this.getJobState();
+        log.debug('Finished algorithm step ' + state.currentAlgorithmStep);
+        while (state.currentAlgorithmStep < this.steps.size()) {
+            state.totalIterationsCount++;
+            if (state.totalIterationsCount > ClusterConstants.getMaxIterations()) {
+                //Break if exceeded max iterations
+                log.error(MSG_ERR_MAX_ITERATIONS);
+                ClusterJobManager.saveJobState(state, ClusterConstants.JOBSTATUS_CANCELLED, MSG_ERR_MAX_ITERATIONS);
+                break;
+            }
+            ClusterJob__c currentJob = ClusterJobManager.getJob(state.clusterJob.Id);
+            if (currentJob.JobStatus__c == ClusterConstants.JOBSTATUS_MARKED_FOR_CANCEL) {
+                //Break if job was marked for cancel, set status to calcelled
+                log.debug(MSG_JOB_CANCELLED);
+                ClusterJobManager.saveJobState(state, ClusterConstants.JOBSTATUS_CANCELLED, MSG_JOB_CANCELLED);
+                break;
+            }
+            if (currentJob.JobStatus__c == ClusterConstants.JOBSTATUS_FAILED) {
+                //Break if job status is failed
+                log.debug('Job status was set to FAILED on the previous step. Finishing algorithm execution');
+                break;
+            }
+            Boolean lastStep = this.prepareNextStep();
+            jobState = state;
+            if (lastStep || state.currentAlgorithmStep > this.steps.size() - 1) {
+                log.debug('Finished algorithm ' + state.model.algorithm);
+                ClusterJobManager.saveJobState(state, ClusterConstants.JOBSTATUS_COMPLETED, 'Cluster job finished');
+                break;
+            }
+            else {
+                ClusterJobManager.saveJobState(state, ClusterConstants.JOBSTATUS_IN_PROGRESS, 'Executing step ' + (state.currentAlgorithmStep + 1) + ' of ' + this.steps.size());
+                if (Test.isRunningTest()){
+                    break; //Can't chain batch apex if running test
+                }
+                this.runCurrentStep();
+                if (this.steps[state.currentAlgorithmStep].isAsync()) {
+                    break; //If step is async this method will be called again
+                }
+            }
+        }
+    }
+    public abstract Boolean prepareNextStep();
+    public abstract Double calculateDistance(Object[] currentObject, Object[] centroid);
+    public abstract ClusterAlgorithmParameter[] getParameters(ClusterModelWrapper model);
+}
diff --git a/force-app/main/algorithms/classes/ClusterAlgorithmRunner.cls-meta.xml b/force-app/main/algorithms/classes/ClusterAlgorithmRunner.cls-meta.xml
@@ -0,0 +1,5 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<ApexClass xmlns="urn:metadata.tooling.soap.sforce.com" fqn="ClusterAlgorithmRunner">
+    <apiVersion>46.0</apiVersion>
+    <status>Active</status>
+</ApexClass>