Skip to content

Commit

Permalink
Merge pull request #359 from ironmussa/develop
Browse files Browse the repository at this point in the history
Preparing new release 2.1.1
  • Loading branch information
FavioVazquez authored Oct 17, 2018
2 parents 824f768 + 0a0c45b commit cebda2e
Show file tree
Hide file tree
Showing 42 changed files with 14,366 additions and 27,913 deletions.
32 changes: 16 additions & 16 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,27 +1,27 @@

language: python
python:
- "3.6"
- "3.6-dev" # 3.6 development branch
- "3.6"
- "3.6-dev" # 3.6 development branch

env:
global:
- SPARK_HOME=/tmp/spark-2.3.1-bin-hadoop2.7
global:
- SPARK_HOME=/tmp/spark-2.3.1-bin-hadoop2.7

before_install:
- chmod +x install-spark-2-3-1.sh
- pip install pytest pytest-cov
- pip install coveralls
- chmod +x install-spark-2-3-1.sh
- pip install --upgrade pip
- pip install pytest pytest-cov
- pip install coveralls

install:
- ./install-spark-2-3-1.sh
- "pip install -r requirements-test.txt"
- ./install-spark-2-3-1.sh
- "pip install -r requirements-test.txt --progress-bar=off"

jdk:
- oraclejdk8
- oraclejdk8

script:
- py.test -v --ignore=optimus/dl/ --ignore=tests/test_dl.py
- py.test -v --ignore=optimus/dl/ --ignore=tests/test_dl.py --ignore=examples/

deploy:
provider: pypi
Expand All @@ -33,11 +33,11 @@ deploy:
on:
tags: true
branch: master

notifications:
email: false
slack:
secure: "LsrLufpz5yLaouTM0V5uDy8u3QhBeN42PaT2pbkfUNQysSyF6/9LNMbwlhkUjf4IAEpKkFvRgGg5IlEF8XtXf5xaAJQXTG9Y92EUU62Nv5kj47iXHd7O75gvwgSjqs6a60F1vG0nWkgCJZjmr02eeAnwM25b6bxSM3GgnDyq3e+NJAza5vkkvCgUcwxL5ZtoY/za9xdE16yfgTkAhqRwR2ajzxFBsNOgSN04pgF9K69WM2vopY65N/vBDCQAnQPwlglXxCsf6caKqzXL1ohpQCeqOD5nMbyN1LlF51/UPSrQfMYiKt8ogI61+WYFB0jiueX/trCbd5lBvf8dkd9v6mH8o33sPo6f5ddsrwH9IkRrNsKQtxw/AkekteVmXZ5ym61yISfuRuuqVcy6yYqLrSh+oR9/+NWhZhBfxL03Lb6uotfa/v5s36INW+puyX+Ef22voSjeA9HgUhUMUoKiQkkSxMO1Pr2boOPDDsnyzgiUnKPt/S/PnGv03DYQxdLqdtf6b1jmFNaVSZEwINntmZxtxTArsCaehNKMCA9Amz1MHderkPDHLPGHDmzKq2pv5HCW3a2cH/ic4CudHTtC79JR0Ag85lrEAKXVaijV7FDQIcrFO3Tyug6yMd3CGAhmUIBUZHkcUZ1xNiBkv80HwTIrG3RWqklbdoEJmDYSnFQ="

after_success:
- coveralls
- coveralls
35 changes: 25 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,11 +1,4 @@
[![Logo Optimus](https://github.com/ironmussa/Optimus/blob/master/images/logoOptimus.png)](https://hioptimus.com)

[![PyPI version](https://badge.fury.io/py/optimuspyspark.svg)](https://badge.fury.io/py/optimuspyspark) [![Build Status](https://travis-ci.org/ironmussa/Optimus.svg?branch=master)](https://travis-ci.org/ironmussa/Optimus) [![Documentation Status](https://readthedocs.org/projects/optimus-ironmussa/badge/?version=latest)](http://optimus-ironmussa.readthedocs.io/en/latest/?badge=latest)
[![built_by iron](https://img.shields.io/badge/built_by-iron-FF69A4.svg)](http://ironmussa.com) [![Updates](https://pyup.io/repos/github/ironmussa/Optimus/shield.svg)](https://pyup.io/repos/github/ironmussa/Optimus/)
[![GitHub release](https://img.shields.io/github/release/ironmussa/optimus.svg)](https://github.com/ironmussa/Optimus/) [![Codacy Badge](https://api.codacy.com/project/badge/Grade/e01572e2af5640fcbcdd58e7408f3ea0)](https://www.codacy.com/app/favio.vazquezp/Optimus?utm_source=github.com&utm_medium=referral&utm_content=ironmussa/Optimus&utm_campaign=badger) [![StackShare](https://img.shields.io/badge/tech-stack-0690fa.svg?style=flat)](https://stackshare.io/iron-mussa/devops)
[![Platforms](https://img.shields.io/badge/platform-Linux%20%7C%20Mac%20OS%20%7C%20Windows-blue.svg)](https://spark.apache.org/docs/2.2.0/#downloading) [![Code Health](https://landscape.io/github/ironmussa/Optimus/develop/landscape.svg?style=flat)](https://landscape.io/github/ironmussa/Optimus/develop) [![Coverage Status](https://coveralls.io/repos/github/ironmussa/Optimus/badge.svg?branch=master)](https://coveralls.io/github/ironmussa/Optimus?branch=master) [![Mentioned in Awesome Data Science](https://awesome.re/mentioned-badge.svg)](https://github.com/bulutyazilim/awesome-datascience)
[![Join the chat at https://gitter.im/optimuspyspark/Lobby](https://badges.gitter.im/optimuspyspark/Lobby.svg)](https://gitter.im/optimuspyspark/Lobby?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
[![Binder](https://mybinder.org/badge.svg)](https://mybinder.org/v2/gh/ironmussa/Optimus/master)
[![Logo Optimus](https://github.com/ironmussa/Optimus/blob/master/images/logoOptimus.png)](https://hioptimus.com) [![PyPI version](https://badge.fury.io/py/optimuspyspark.svg)](https://badge.fury.io/py/optimuspyspark) [![Build Status](https://travis-ci.org/ironmussa/Optimus.svg?branch=master)](https://travis-ci.org/ironmussa/Optimus) [![Documentation Status](https://readthedocs.org/projects/optimus-ironmussa/badge/?version=latest)](http://optimus-ironmussa.readthedocs.io/en/latest/?badge=latest) [![built_by iron](https://img.shields.io/badge/built_by-iron-FF69A4.svg)](http://ironmussa.com) [![Updates](https://pyup.io/repos/github/ironmussa/Optimus/shield.svg)](https://pyup.io/repos/github/ironmussa/Optimus/) [![GitHub release](https://img.shields.io/github/release/ironmussa/optimus.svg)](https://github.com/ironmussa/Optimus/) [![Codacy Badge](https://api.codacy.com/project/badge/Grade/e01572e2af5640fcbcdd58e7408f3ea0)](https://www.codacy.com/app/favio.vazquezp/Optimus?utm_source=github.com&utm_medium=referral&utm_content=ironmussa/Optimus&utm_campaign=badger) [![StackShare](https://img.shields.io/badge/tech-stack-0690fa.svg?style=flat)](https://stackshare.io/iron-mussa/devops) [![Platforms](https://img.shields.io/badge/platform-Linux%20%7C%20Mac%20OS%20%7C%20Windows-blue.svg)](https://spark.apache.org/docs/2.2.0/#downloading) [![Code Health](https://landscape.io/github/ironmussa/Optimus/develop/landscape.svg?style=flat)](https://landscape.io/github/ironmussa/Optimus/develop) [![Coverage Status](https://coveralls.io/repos/github/ironmussa/Optimus/badge.svg?branch=master)](https://coveralls.io/github/ironmussa/Optimus?branch=master) [![Mentioned in Awesome Data Science](https://awesome.re/mentioned-badge.svg)](https://github.com/bulutyazilim/awesome-datascience) [![Join the chat at https://gitter.im/optimuspyspark/Lobby](https://badges.gitter.im/optimuspyspark/Lobby.svg)](https://gitter.im/optimuspyspark/Lobby?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) [![Binder](https://mybinder.org/badge.svg)](https://mybinder.org/v2/gh/ironmussa/Optimus/master)

Optimus is the missing framework to profile, clean, process and do ML in a distributed fashion using Apache Spark(PySpark).

Expand All @@ -17,9 +10,14 @@ In your terminal just type `pip install optimuspyspark`
* Apache Spark>= 2.3.0
* Python>=3.6

## Examples
## Examples

You can go to the 10 minutes to Optimus notebook where you can find the basic to start working.

Also you can go to the [examples](examples/) folder to found specific notebooks about data cleaning, data munging, profiling, data enrichment and how to create ML and DL models.

and check the [Cheat Sheet](https://htmlpreview.github.io/?https://github.com/ironmussa/Optimus/blob/master/docs/cheatsheet/optimus_cheat_sheet.html)

After installation go to the [examples](examples/) folder to found notebooks about data cleaning, data munging, and how to create ML and DL models.

## Documentation

Expand All @@ -33,6 +31,13 @@ https://optimus.featureupvote.com/

And if you want to see some cool information and tutorials about Optimus check out our blog https://medium.com/hi-optimus

## Start Optimus

```python
from optimus import Optimus
op= Optimus()
```

## Loading data
Now Optimus can load data in csv, json, parquet and avro from a local file or an URL.

Expand Down Expand Up @@ -178,6 +183,16 @@ Optimus comes with a powerful and unique data profiler. Besides basic and advanc
it also let you know what type of data has every column. For example if a string column have string, integer, float, bool, date Optimus can give you an unique overview about your data.
Just run `df.profile("*")` to profile all the columns. For more info about the profiler please go to this [notebook](examples/new-api-profiler.ipynb)

[![](images/profiler.png)]

## Data enrichment

You can connect to externala API's enrich your data using optimus.

```python
sdfsadfasdas
```

## Machine Learning

Machine Learning is one of the last steps, and the goal for most Data Science WorkFlows.
Expand Down
200 changes: 198 additions & 2 deletions docs/cheatsheet/optimus_cheat_sheet.html
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
}

.gray-light {
background-color: #e7e6e6;
background-color: #ccc;
}

.green {
Expand Down Expand Up @@ -615,7 +615,203 @@ <h3>Reshaping Data</h3>
("name","names"),
("function","task")])</code></pre>
<span class="commentary">Drop columns from a DataFrame.</span>

<h3>Nest</h3>

<pre><code class="python">df = df\
.cols.nest(["height", "function", "rank"])</code></pre>
<span class="commentary">Merge multiple columns as string</span>
<div class="flex-row d-flex">
<div>
<table class="table-operations-with-data">
<thead>
<tr>
<td class="gray">names</td>
<td class="green">height</td>
<td class="green">function</td>
<td class="green">rank</td>
</tr>
</thead>
<tbody>
<tr>
<td class="gray-light">optimus</td>
<td class="green-light">28.0</td>
<td class="green-light">leader</td>
<td class="green-light">10</td>
</tr>
<tr>
<td class="gray-light">ironhide</td>
<td class="green-light">26.0</td>
<td class="green-light">security</td>
<td class="green-light">7</td>
</tr>
<tr>
<td class="gray-light">bumblebee</td>
<td class="green-light">17.5</td>
<td class="green-light">espionage</td>
<td class="green-light">7</td>
</tr>
</tbody>
</table>
</div>
<div class="operation-symbol">
🡆
</div>
<div>
<table class="table-operations-with-data">
<thead>
<tr>
<td class="gray">names</td>
<td class="green">_0</td>
</tr>
</thead>
<tbody>
<tr>
<td class="gray-light">optimus</td>
<td class="green-light">28.0 leader 10</td>
</tr>
<tr>
<td class="gray-light">ironhide</td>
<td class="green-light">26.0, security 7</td>
</tr>
<tr>
<td class="gray-light">bumblebee</td>
<td class="green-light">17.5 espionage 7]</td>
</tr>
</tbody>
</table>
</div>
</div>
<pre><code class="python">df = df\
.cols.nest(["height", "function", "rank"],
"array")</code></pre>
<span class="commentary">Merge multiple columns as array</span>
<div class="flex-row d-flex">
<div>
<table class="table-operations-with-data">
<thead>
<tr>
<td class="gray">names</td>
<td class="green">height</td>
<td class="green">function</td>
<td class="green">rank</td>
</tr>
</thead>
<tbody>
<tr>
<td class="gray-light">optimus</td>
<td class="green-light">28.0</td>
<td class="green-light">leader</td>
<td class="green-light">10</td>
</tr>
<tr>
<td class="gray-light">ironhide</td>
<td class="green-light">26.0</td>
<td class="green-light">security</td>
<td class="green-light">7</td>
</tr>
<tr>
<td class="gray-light">bumblebee</td>
<td class="green-light">17.5</td>
<td class="green-light">espionage</td>
<td class="green-light">7</td>
</tr>
</tbody>
</table>
</div>
<div class="operation-symbol">
🡆
</div>
<div>
<table class="table-operations-with-data">
<thead>
<tr>
<td class="gray">names</td>
<td class="green">_0</td>
</tr>
</thead>
<tbody>
<tr>
<td class="gray-light">optimus</td>
<td class="green-light">[28.0, leader, 10]</td>
</tr>
<tr>
<td class="gray-light">ironhide</td>
<td class="green-light">[26.0, security, 7]</td>
</tr>
<tr>
<td class="gray-light">bumblebee</td>
<td class="green-light">[17.5, espionage, 7]</td>
</tr>
</tbody>
</table>
</div>
</div>
<pre><code class="python">df = df\
.cols.nest(["height", "function", "rank"],
"vector")</code></pre>
<span class="commentary">Merge multiple columns as vector</span>
<div class="flex-row d-flex">
<div>
<table class="table-operations-with-data">
<thead>
<tr>
<td class="gray">names</td>
<td class="green">height</td>
<td class="green">function</td>
</tr>
</thead>
<tbody>
<tr>
<td class="gray-light">optimus</td>
<td class="green-light">28.0</td>
<td class="green-light">10</td>
</tr>
<tr>
<td class="gray-light">ironhide</td>
<td class="green-light">26.0</td>
<td class="green-light">7</td>
</tr>
<tr>
<td class="gray-light">bumblebee</td>
<td class="green-light">17.5</td>
<td class="green-light">7</td>
</tr>
</tbody>
</table>
</div>
<div class="operation-symbol">
🡆
</div>
<div>
<table class="table-operations-with-data">
<thead>
<tr>
<td class="gray">names</td>
<td class="green">_0</td>
</tr>
</thead>
<tbody>
<tr>
<td class="gray-light">optimus</td>
<td class="green-light">[28.0, 10]</td>
</tr>
<tr>
<td class="gray-light">ironhide</td>
<td class="green-light">[26.0, 7]</td>
</tr>
<tr>
<td class="gray-light">bumblebee</td>
<td class="green-light">[17.5, 7]</td>
</tr>
</tbody>
</table>
</div>
</div>

</div>


<div class="col-sm-4">
<h3>Select Rows</h3>
<div class="flex-row d-flex">
Expand Down Expand Up @@ -1201,4 +1397,4 @@ <h4>Distance Cluster</h4>

</div>
</body>
</html>
</html>
2 changes: 1 addition & 1 deletion docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@
# The short X.Y version.
version = '2.1'
# The full version, including alpha/beta/rc tags.
release = '2.1.0'
release = '2.1.1'

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
Expand Down
Loading

0 comments on commit cebda2e

Please sign in to comment.