Skip to content
This repository has been archived by the owner on Jan 21, 2024. It is now read-only.

Commit

Permalink
First commit (migration from internal repo)
Browse files Browse the repository at this point in the history
  • Loading branch information
iskander-m authored and iskander-sfdc committed Apr 20, 2020
1 parent 8ae4958 commit 9ff302a
Show file tree
Hide file tree
Showing 338 changed files with 67,167 additions and 1 deletion.
21 changes: 21 additions & 0 deletions .forceignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# List files or directories below to ignore them when running force:source:push, force:source:pull, and force:source:status
# More information: https://developer.salesforce.com/docs/atlas.en-us.sfdx_dev.meta/sfdx_dev/sfdx_dev_exclude_source.htm
#

package.xml

# LWC configuration files
**/jsconfig.json
**/.eslintrc.json

# LWC Jest
**/__tests__/**

# Profile changes
force-app/main/default/profiles/Admin.profile-meta.xml
force-app/main/default/profiles/Custom%3A Marketing Profile.profile-meta.xml
force-app/main/default/profiles/Custom%3A Sales Profile.profile-meta.xml
force-app/main/default/profiles/Custom%3A Support Profile.profile-meta.xml

# Workaround for "Load of metadata from db failed for metadata of type:AppMenu and file name:AppSwitcher"
AppSwitcher.appMenu
9 changes: 9 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Ignore SFDX object definitions and other settings
.sfdx
.DS_Store
.project
.salesforce
.settings
node_modules
.idea
mdapi-source
12 changes: 12 additions & 0 deletions LICENSE.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
Copyright (c) 2020, Salesforce.com, Inc.
All rights reserved.

Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:

* Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.

* Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.

* Neither the name of Salesforce.com nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
98 changes: 97 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1 +1,97 @@
# ClusterAnalysis
# SFDC Cluster Analysis Package
Performs [cluster analysis](https://en.wikipedia.org/wiki/Cluster_analysis) on Salesforce standard and custom objects, breaks records into groups (clusters) using K-Means and K-Medoids (CLARA) algorithms.<br/>
Supports clustering objects with mixed data types (numeric, category/picklist, text) using Gower distance function.<br/>
Visualizes the clustering result using t-SNE dimensionality reduction technique.<br/>
Click [here](../../wiki/Cluster-Analysis-in-Salesforce) to get more information about the methodology and algorithms used in this app.

## Installation
Install the application from Salesforce AppExchange

## Dev, Build and Test

### Create a scratch org
```
sfdx force:org:create -f ./config/project-scratch-def.json -a ScratchOrgAlias --durationdays 30
sfdx force:config:set defaultusername=<user name returned from the previous command>
```

### Push the source code to the scratch org
```
sfdx force:org:push
```

### Create sample lead records
```
sfdx force:data:bulk:upsert -f force-app/main/default/staticresources/ClustanLeadsMock.csv -s Lead -i Email
```

### Run Apex tests
```
sfdx force:apex:test:run
```

### Authorise an org
```
sfdx force:auth:web:login --setalias OrgAlias
```

### Deploy to an org
The app also works if deployed to an org without a namespace. However I recommend using a managed package installation
```
sfdx force:source:deploy --checkonly --sourcepath force-app --targetusername OrgAlias --testlevel RunLocalTests
sfdx force:source:deploy --sourcepath force-app --targetusername OrgAlias --testlevel NoTestRun
```

### Create a managed package
```
sfdx force:package:create --name "Cluster Analysis" --path force-app --packagetype Managed -d "Group records from any object into clusters and visualize the result using machine learning algorithms"
```

### Create and promote a package version
```
sfdx force:package:version:create --package "Cluster Analysis" --wait 10 --installationkeybypass --codecoverage
sfdx force:package:version:promote --package "Cluster [email protected]"
```


## Description of Files and Directories
* **sfdx-project.json**: Required by Salesforce DX. Configures your project. Use this file to specify the parameters that affect your Salesforce development project.
* **config/project-scratch-def.json**: Sample file that shows how to define the shape of a scratch org. You reference this file when you create your scratch org with the force:org:create command.
* **force-app**: Directory that contains the source for the Cluster Analysis package and tests.
* **force-app/main/default**: Directory that contains the app source and shared classes.
* **force-app/main/algorithms**: Directory that contains algorithm classes.
* **force-app/main/utils**: Directory that contains utility classes.
* **force-app/main/test**: Directory that contains Apex test classes.
* **.project**: Required by the Eclipse IDE. Describes the Eclipse project.
* **.gitignore**: Optional Git file. Specifies intentionally untracked files that you want Git (or in this case GitHub) to ignore.

## Resources
Clustering Large Data Sets (By Leonard Kaufman, Peter J.Rousseeuw, 1986)

Clustering with optimised weights for Gower’s metric (By Jeroen van den Hoven)
https://beta.vu.nl/nl/Images/stageverslag-hoven_tcm235-777817.pdf

Clustering on mixed type data (by Thomas Filaire)
https://towardsdatascience.com/clustering-on-mixed-type-data-8bbd0a2569c3

Visualizing Data using t-SNE (by Laurens van der Maaten)
https://lvdmaaten.github.io/tsne/

tSNEJS (Copyright Andrej Karpathy)
https://github.com/karpathy/tsnejs

Javascript SOQL parser (Copyright 2019 Austin Turner)
https://github.com/paustint/soql-parser-js

JavaScript Algorithms and Data Structures (Copyright (c) 2018 Oleksii Trekhleb)
https://github.com/trekhleb/javascript-algorithms

Data-Driven Documents (D3.js, Copyright 2010-2017 Mike Bostock)
https://d3js.org/

Building Machine Learning Systems with Apex (Presented on DF14 by Jen Wyher and Paul Battisson)
https://www.slideshare.net/pbattisson/df14-building-machine-learning-systems-with-apex


## Issues
To report an bug or suggest an enhancement create an issue on "Issues" tab.
10 changes: 10 additions & 0 deletions config/project-scratch-def.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
"orgName": "imukhamedgaliyev Company",
"edition": "Developer",
"features": ["DebugApex"],
"settings": {
"lightningExperienceSettings": {
"enableS1DesktopEnabled": true
}
}
}
30 changes: 30 additions & 0 deletions force-app/main/algorithms/classes/ClusterAlgorithmFactory.cls
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
/*
* Algorithm factory - creates algorithm runners
*
* @author: Iskander Mukhamedgaliyev
*/
public with sharing class ClusterAlgorithmFactory {
public static List<UiSelectOption> getAlgorithmList() {
//TODO: move this to some settings class, possibly create custom setings
List<UiSelectOption> values = new List<UiSelectOption>();
values.add(new UiSelectOption(ClusterConstants.ALGORITHM_KMEDOIDS, ClusterConstants.ALGORITHM_KMEDOIDS ));
values.add(new UiSelectOption(ClusterConstants.ALGORITHM_KMEANS, ClusterConstants.ALGORITHM_KMEANS ));
return values;
}

public static ClusterAlgorithmRunner getRunner(ClusterModelWrapper model) {
return getRunner(model.algorithm);
}

public static ClusterAlgorithmRunner getRunner(String algorithm) {
if (algorithm == ClusterConstants.ALGORITHM_KMEDOIDS) {
return new ClusterKMedoidsPAMRunner();
}
else if (algorithm == ClusterConstants.ALGORITHM_KMEANS) {
return new ClusterKMeansRunner();
}
else {
throw new ClusterException(algorithm + ' algorithm is not supported');
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
<ApexClass xmlns="http://soap.sforce.com/2006/04/metadata">
<apiVersion>47.0</apiVersion>
<status>Active</status>
</ApexClass>
29 changes: 29 additions & 0 deletions force-app/main/algorithms/classes/ClusterAlgorithmParameter.cls
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
/*
* Algorithm parameters
*
* @author: Iskander Mukhamedgaliyev
*/
public with sharing class ClusterAlgorithmParameter {
public enum ParameterType { STRING_PARAMETER, INTEGER_PARAMETER, DECIMAL_PARAMETER, BOOLEAN_PARAMETER, SELECT_PARAMETER }

@AuraEnabled
public String name;
@AuraEnabled
public String description;
@AuraEnabled
public String type;
public ParameterType typeEnum;
@AuraEnabled
public Object value;
@AuraEnabled
public String[] options;
public ClusterAlgorithmParameter() { }
public ClusterAlgorithmParameter(String paramName, String paramDescription, ParameterType paramType, Object paramValue, String[] paramOptions) {
this.name = paramName;
this.description = paramDescription;
this.typeEnum = paramType;
this.type = paramType.name();
this.value = paramValue;
this.options = paramOptions;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
<ApexClass xmlns="http://soap.sforce.com/2006/04/metadata">
<apiVersion>47.0</apiVersion>
<status>Active</status>
</ApexClass>
158 changes: 158 additions & 0 deletions force-app/main/algorithms/classes/ClusterAlgorithmRunner.cls
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
/*
* Base class for algorithms logic
*
* @author: Iskander Mukhamedgaliyev
*/
public abstract with sharing class ClusterAlgorithmRunner {
private static final String MSG_ERR_MAX_ITERATIONS = 'Maximum algorithm iterations reached, job was interrupted';
private static final String MSG_JOB_CANCELLED = 'Job was cancelled by request';
@TestVisible
private static final String MSG_ERR_OUTPUTFIELDVALIDATION = 'Cluster Result Output field name must start with \'ClusterNumber\' and should belong to the model object. Please create a new custom field which starts with \'ClusterNumber\'';
public class ModelValidationResult {
public List<String> messages;
public Boolean isValid;
}
public static Logger log = LogFactory.getLogger();
@TestVisible
public static ClusterJobState jobState;
public List<ClusterAlgorithmStep> steps;
public ClusterAlgorithmRunner() {
this.steps = new List<ClusterAlgorithmStep>();
}

public virtual ClusterAlgorithmRunner.ModelValidationResult validateModel(ClusterModelWrapper model) {
ModelValidationResult result = new ModelValidationResult();
result.isValid = true;
result.messages = new List<String>();
Boolean hasIdField = false;
Boolean hasValueFields = false;
for (Integer i=0; i<model.fields.size(); i++) {
if (model.fields[i].distanceType == ClusterConstants.FIELDTYPE_OUTPUT) {
result.isValid &= model.fields[i].name.toLowerCase().startsWith('clusternumber');
result.isValid &= model.objectName.toLowerCase() == model.fields[i].objectName.toLowerCase();
}
else if (model.fields[i].distanceType == ClusterConstants.FIELDTYPE_NONE && model.fields[i].isIdField) {
hasIdField = true;
}
else if ((model.fields[i].distanceType == ClusterConstants.FIELDTYPE_CATEGORY) ||
(model.fields[i].distanceType == ClusterConstants.FIELDTYPE_NUMERIC) ||
(model.fields[i].distanceType == ClusterConstants.FIELDTYPE_TEXT))
{
hasValueFields = true;
}
}
if (!result.isValid) {
result.messages.add(MSG_ERR_OUTPUTFIELDVALIDATION);
}
if (!hasIdField) {
result.isValid = false;
result.messages.add('Model must have object id field');
}
if (!hasValueFields) {
result.isValid = false;
result.messages.add('Model must have at least one Numeric, Category or Text field');
}
List<String> accessErrors = ClusterAccessCheck.checkModelObjectAccessPermissions(model);
if (accessErrors.size() > 0) {
result.isValid = false;
result.messages.addAll(accessErrors);
}
return result;
}

public virtual void init(ClusterModelWrapper model) {
Boolean hasJobOutput = false;
for (Integer i=0; i<model.fields.size(); i++) {
if (model.fields[i].distanceType == ClusterConstants.FIELDTYPE_OUTPUT) {
hasJobOutput = true;
break;
}
}
if (hasJobOutput) {
ClusterJobOutputStep jobOutputStep = new ClusterJobOutputStep();
this.steps.add(jobOutputStep);
}
}
public abstract ClusterJobState getJobState();
public abstract void setJobState(ClusterJobState state);

public virtual void start() {
try {
ClusterJobState state = this.getJobState();
log.debug('Starting algorithm: ' + state.model.algorithm);
//We will CRUD permissions on ClusterJobResult__c once here and will not check check them in every step
ClusterAccessCheck.checkCRUDPermission(Schema.SObjectType.ClusterJobResult__c);
state.clusterJob = ClusterJobManager.createJob(state.model.name, state.model);
state.jobClusters = ClusterJobManager.createJobClusters(state.clusterJob);
state.totalIterationsCount = 0;
log.debug('Created cluster job: ' + state.clusterJob);
log.debug('Created ' + state.jobClusters.size() + ' job clusters');
log.debug('Executing algorithm steps');
this.runCurrentStep();
if (!this.steps[state.currentAlgorithmStep].isAsync()) {
this.processNextSteps();
}
}
catch (Exception ex) {
log.error('Exception caught in start method', ex);
throw ex;
}
}

public virtual void runCurrentStep(){
ClusterJobState state = this.getJobState();
log.debug('Running algorithm step ' + state.currentAlgorithmStep);
ClusterAlgorithmStep currentStep = this.steps[state.currentAlgorithmStep];
currentStep.init(this);
currentStep.run();
if (!currentStep.isAsync()) {
currentStep.done();
}
}

public virtual void processNextSteps() {
ClusterJobState state = this.getJobState();
log.debug('Finished algorithm step ' + state.currentAlgorithmStep);
while (state.currentAlgorithmStep < this.steps.size()) {
state.totalIterationsCount++;
if (state.totalIterationsCount > ClusterConstants.getMaxIterations()) {
//Break if exceeded max iterations
log.error(MSG_ERR_MAX_ITERATIONS);
ClusterJobManager.saveJobState(state, ClusterConstants.JOBSTATUS_CANCELLED, MSG_ERR_MAX_ITERATIONS);
break;
}
ClusterJob__c currentJob = ClusterJobManager.getJob(state.clusterJob.Id);
if (currentJob.JobStatus__c == ClusterConstants.JOBSTATUS_MARKED_FOR_CANCEL) {
//Break if job was marked for cancel, set status to calcelled
log.debug(MSG_JOB_CANCELLED);
ClusterJobManager.saveJobState(state, ClusterConstants.JOBSTATUS_CANCELLED, MSG_JOB_CANCELLED);
break;
}
if (currentJob.JobStatus__c == ClusterConstants.JOBSTATUS_FAILED) {
//Break if job status is failed
log.debug('Job status was set to FAILED on the previous step. Finishing algorithm execution');
break;
}
Boolean lastStep = this.prepareNextStep();
jobState = state;
if (lastStep || state.currentAlgorithmStep > this.steps.size() - 1) {
log.debug('Finished algorithm ' + state.model.algorithm);
ClusterJobManager.saveJobState(state, ClusterConstants.JOBSTATUS_COMPLETED, 'Cluster job finished');
break;
}
else {
ClusterJobManager.saveJobState(state, ClusterConstants.JOBSTATUS_IN_PROGRESS, 'Executing step ' + (state.currentAlgorithmStep + 1) + ' of ' + this.steps.size());
if (Test.isRunningTest()){
break; //Can't chain batch apex if running test
}
this.runCurrentStep();
if (this.steps[state.currentAlgorithmStep].isAsync()) {
break; //If step is async this method will be called again
}
}
}
}
public abstract Boolean prepareNextStep();
public abstract Double calculateDistance(Object[] currentObject, Object[] centroid);
public abstract ClusterAlgorithmParameter[] getParameters(ClusterModelWrapper model);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
<ApexClass xmlns="urn:metadata.tooling.soap.sforce.com" fqn="ClusterAlgorithmRunner">
<apiVersion>46.0</apiVersion>
<status>Active</status>
</ApexClass>
Loading

0 comments on commit 9ff302a

Please sign in to comment.