Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Python 3.10 Support and Fix Compilation Issues #194

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -61,3 +61,9 @@ target/
examples/australian.txt
examples/diabetes.txt
examples/heart.txt

libact/query_strategies/_hintsvm.c
.devcontainer/
.github/dependabot.yml

temp/
17 changes: 9 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,16 @@ Comments and questions on the package is welcomed at `libact-users@googlegroups.

# Basic Dependencies

* Python 2.7, 3.3, 3.4, 3.5, 3.6
* Python 2.7, 3.3, 3.4, 3.5, 3.6, 3.10

* Python dependencies
* Debian (>= 7) / Ubuntu (>= 14.04)
```
pip install -r requirements.txt
sudo apt-get install build-essential gfortran libatlas-base-dev liblapacke-dev python3-dev
```

* Debian (>= 7) / Ubuntu (>= 14.04)
* Python dependencies
```
sudo apt-get install build-essential gfortran libatlas-base-dev liblapacke-dev python3-dev
pip install -r requirements.txt
```

* Arch
Expand Down Expand Up @@ -66,9 +66,10 @@ python setup.py install --user
```

To build and install from souce for all users on Unix/Linux:

**(This is the recommended method for Python 3.10 users)**
```
python setup.py build
sudo python setup.py install
pip install -e .
```

## Installation Options
Expand Down Expand Up @@ -154,4 +155,4 @@ If you find this package useful, please cite the original works (see Reference o

# Acknowledgments

The authors thank Chih-Wei Chang and other members of the [Computational Learning Lab](https://learner.csie.ntu.edu.tw/) at National Taiwan University for valuable discussions and various contributions to making this package better.
The authors thank Chih-Wei Chang and other members of the [Computational Learning Lab](https://learner.csie.ntu.edu.tw/) at National Taiwan University for valuable discussions and various contributions to making this package better.
32 changes: 22 additions & 10 deletions examples/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,12 @@

import numpy as np
import matplotlib.pyplot as plt
try:
from sklearn.model_selection import train_test_split
except ImportError:
from sklearn.cross_validation import train_test_split
from sklearn.model_selection import train_test_split

# libact classes
from libact.base.dataset import Dataset, import_libsvm_sparse
from libact.models import LogisticRegression
from libact.query_strategies import RandomSampling, UncertaintySampling
from libact.query_strategies import RandomSampling, UncertaintySampling, VarianceReduction, HintSVM
from libact.labelers import IdealLabeler


Expand Down Expand Up @@ -64,6 +61,8 @@ def main():
trn_ds, tst_ds, y_train, fully_labeled_trn_ds = \
split_train_test(dataset_filepath, test_size, n_labeled)
trn_ds2 = copy.deepcopy(trn_ds)
trn_ds3 = copy.deepcopy(trn_ds)
trn_ds4 = copy.deepcopy(trn_ds)
lbr = IdealLabeler(fully_labeled_trn_ds)

quota = len(y_train) - n_labeled # number of samples to query
Expand All @@ -78,20 +77,33 @@ def main():
model = LogisticRegression()
E_in_2, E_out_2 = run(trn_ds2, tst_ds, lbr, model, qs2, quota)

# qs3 = VarianceReduction(trn_ds3, model=LogisticRegression())
# E_in_3, E_out_3 = run(trn_ds3, tst_ds, lbr, model, qs3, quota)

qs4 = HintSVM(trn_ds4)
E_in_4, E_out_4 = run(trn_ds4, tst_ds, lbr, model, qs4, quota)

# Plot the learning curve of UncertaintySampling to RandomSampling
# The x-axis is the number of queries, and the y-axis is the corresponding
# error rate.
query_num = np.arange(1, quota + 1)
plt.plot(query_num, E_in_1, 'b', label='qs Ein')
plt.plot(query_num, E_in_2, 'r', label='random Ein')
plt.plot(query_num, E_out_1, 'g', label='qs Eout')
plt.plot(query_num, E_out_2, 'k', label='random Eout')
plt.plot(query_num, E_in_1, 'b', label='qs Ein',
linestyle='dashed')
plt.plot(query_num, E_out_1, 'b', label='qs Eout')
plt.plot(query_num, E_in_2, 'r', label='random Ein',
linestyle='dashed')
plt.plot(query_num, E_out_2, 'r', label='random Eout')
# plt.plot(query_num, E_in_3, 'g', label='vr Ein', linestyle='dashed')
# plt.plot(query_num, E_out_3, 'g', label='vr Eout')
plt.plot(query_num, E_in_4, 'k', label='SVM Ein',
linestyle='dashed')
plt.plot(query_num, E_out_4, 'k', label='SVM Eout')
plt.xlabel('Number of Queries')
plt.ylabel('Error')
plt.title('Experiment Result')
plt.legend(loc='upper center', bbox_to_anchor=(0.5, -0.05),
fancybox=True, shadow=True, ncol=5)
plt.show()
plt.savefig("test.png")


if __name__ == '__main__':
Expand Down
12 changes: 7 additions & 5 deletions libact/base/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,14 @@ class Dataset(object):
"""

def __init__(self, X=None, y=None):
if X is None: X = np.array([])
if X is None:
X = np.array([])
elif not isinstance(X, sp.csr_matrix):
X = np.array(X)

if y is None: y = []
y = np.array(y)
if y is None:
y = []
y = np.array(y, dtype=object)

self._X = X
self._y = y
Expand Down Expand Up @@ -123,7 +125,7 @@ def append(self, feature, label=None):
"""
if isinstance(self._X, np.ndarray):
self._X = np.vstack([self._X, feature])
else: # sp.csr_matrix
else: # sp.csr_matrix
self._X = sp.vstack([self._X, feature])
self._y = np.append(self._y, label)

Expand Down Expand Up @@ -218,7 +220,7 @@ def labeled_uniform_sample(self, sample_size, replace=True):
sample_size
"""
idx = np.random.choice(np.where(self.get_labeled_mask())[0],
size=sample_size, replace=replace )
size=sample_size, replace=replace)
return Dataset(self._X[idx], self._y[idx])


Expand Down
4 changes: 2 additions & 2 deletions libact/query_strategies/_hintsvm.pyx
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
#defining NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION

import numpy as np
import numpy as np
cimport numpy as np
from libc.stdlib cimport free
cimport _hintsvm
from . cimport _hintsvm

cdef extern from *:
ctypedef struct svm_parameter:
Expand Down
10 changes: 5 additions & 5 deletions libact/query_strategies/density_weighted_meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,13 +86,12 @@ def __init__(self, dataset, base_query_strategy, similarity_metric=None,
else:
self.clustering_method = KMeans(
n_clusters=5, random_state=self.random_state_)

if similarity_metric is not None:
self.similarity_metric = similarity_metric
else:
self.similarity_metric = cosine_similarity


@inherit_docstring_from(QueryStrategy)
def update(self, entry_id, label):
pass
Expand All @@ -104,7 +103,7 @@ def _get_scores(self):
scores = self.base_query_strategy._get_scores()
_, X_pool = dataset.get_unlabeled_entries()
unlabeled_entry_ids, base_scores = zip(*scores)

self.clustering_method.fit(X)
pool_cluster = self.clustering_method.predict(X_pool)
cluster_center = self.clustering_method.cluster_centers_
Expand All @@ -126,6 +125,7 @@ def make_query(self):
dataset = self.dataset

unlabeled_entry_ids, scores = zip(*self._get_scores())
ask_id = self.random_state_.choice(np.where(scores == np.max(scores))[0])
ask_id = self.random_state_.choice(
np.where(scores == np.max(scores))[0])

return unlabeled_entry_ids[ask_id]
return unlabeled_entry_ids[ask_id]
22 changes: 12 additions & 10 deletions libact/query_strategies/density_weighted_uncertainty_sampling.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ def __init__(self, *args, **kwargs):
dis = np.zeros((len(all_x), self.n_clusts))
for i in range(self.n_clusts):
dis[:, i] = np.exp(-np.einsum('ij,ji->i', (all_x - centers[i]),
(all_x - centers[i]).T) / 2 / self.sigma)
(all_x - centers[i]).T) / 2 / self.sigma)

# EM percedure to estimate the prior
for _ in range(self.max_iter):
Expand Down Expand Up @@ -153,6 +153,7 @@ def make_query(self):

return unlabeled_entry_ids[ask_id]


class DensityWeightedLogisticRegression(object):
"""Density Weighted Logistic Regression

Expand Down Expand Up @@ -203,15 +204,16 @@ def __init__(self, density_estimate, centers, C):

def _likelihood(self, w, X, y):
w = w.reshape(-1, 1)
sigmoid = lambda t: 1. / (1. + np.exp(-t))
def sigmoid(t): return 1. / (1. + np.exp(-t))
# w --> shape = (d+1, 1)
L = lambda w: (self.C/2. * np.dot(w[:-1].T, w[:-1]) - \
np.sum(np.log(
np.sum(self.density *
sigmoid(np.dot(y,
(np.dot(self.centers, w[:-1]) + w[-1]).T)
), axis=1)
), axis=0))[0][0]

def L(w): return (self.C/2. * np.dot(w[:-1].T, w[:-1]) -
np.sum(np.log(
np.sum(self.density *
sigmoid(np.dot(y,
(np.dot(self.centers, w[:-1]) + w[-1]).T)
), axis=1)
), axis=0))[0][0]

return L(w)

Expand All @@ -235,7 +237,7 @@ def predict(self):

"""
if self.w_ is not None:
sigmoid = lambda t: 1. / (1. + np.exp(-t))
def sigmoid(t): return 1. / (1. + np.exp(-t))
return sigmoid(np.dot(self.centers, self.w_[:-1]) + self.w_[-1])
else:
# TODO the model is not trained
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,8 @@ class CostSensitiveReferencePairEncoding(QueryStrategy):

def __init__(self, dataset, scoring_fn, model, base_model, n_models=100,
n_jobs=1, random_state=None):
super(CostSensitiveReferencePairEncoding, self).__init__(dataset=dataset)
super(CostSensitiveReferencePairEncoding,
self).__init__(dataset=dataset)

self.model_ = model
self.csrpe_ = CSRPE(scoring_fn=scoring_fn, base_clf=base_model,
Expand All @@ -94,12 +95,12 @@ def make_query(self):
Z = self.csrpe_.predicted_code(X_pool)
predZ = self.csrpe_.encode(predY)

dist = paired_distances(Z, predZ, metric=hamming) # z1 z2
dist2 = self.csrpe_.predict_dist(X_pool) # z1 zt
#dist3 = self.csrpe.distance(predZ) # z2 zt
dist = paired_distances(Z, predZ, metric=hamming) # z1 z2
dist2 = self.csrpe_.predict_dist(X_pool) # z1 zt
# dist3 = self.csrpe.distance(predZ) # z2 zt

dist = dist + dist2
#dist = dist + dist3
# dist = dist + dist3

ask_id = self.random_state_.choice(
np.where(np.isclose(dist, np.max(dist)))[0])
Expand Down Expand Up @@ -127,8 +128,10 @@ def train(self, X, y):
self.n_samples = np.shape(X)[0]
self.n_labels = np.shape(y)[1]

score0 = self.scoring_fn(y, np.tile(self.rep_label[0], (self.n_samples, 1)))
score1 = self.scoring_fn(y, np.tile(self.rep_label[1], (self.n_samples, 1)))
score0 = self.scoring_fn(y, np.tile(
self.rep_label[0], (self.n_samples, 1)))
score1 = self.scoring_fn(y, np.tile(
self.rep_label[1], (self.n_samples, 1)))
lbl = (((score1 - score0) > 0) + 0.0)

weight = np.abs(score1 - score0)
Expand All @@ -153,8 +156,8 @@ def __init__(self, scoring_fn, base_clf, n_clfs, n_jobs,
metric='euclidean', random_state=None):
self.scoring_fn = scoring_fn
self.base_clf = base_clf
self.nn_ = NearestNeighbors(1, algorithm='ball_tree',
metric=metric, n_jobs=n_jobs)
self.nn_ = NearestNeighbors(n_neighbors=1, algorithm='ball_tree',
metric=metric, n_jobs=n_jobs)
self.n_clfs = n_clfs
self.random_state_ = seed_random_state(random_state)

Expand Down Expand Up @@ -224,4 +227,4 @@ def predict_dist(self, X):
encoded = self.predicted_code(X)
dist, _ = self.nn_.kneighbors(encoded, 1, return_distance=True)
dist = dist.reshape(-1)
return dist
return dist
6 changes: 4 additions & 2 deletions libact/query_strategies/tests/test_density_weighted_meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ def setUp(self):
self.quota = 10

def test_density_weighted_meta_uncertainty_lc(self):
trn_ds = Dataset(self.X[:20], np.concatenate([self.y[:6], [None] * 14]))
trn_ds = Dataset(self.X[:20], np.concatenate(
[self.y[:6], [None] * 14]))
base_qs = UncertaintySampling(
trn_ds, method='lc',
model=LogisticRegression(solver='liblinear', multi_class="ovr"))
Expand All @@ -37,7 +38,8 @@ def test_density_weighted_meta_uncertainty_lc(self):
beta=1.0, random_state=1126)
model = LogisticRegression(solver='liblinear', multi_class="ovr")
qseq = run_qs(trn_ds, qs, self.y, self.quota)
assert_array_equal(qseq, np.array([13, 18, 9, 12, 8, 16, 10, 19, 15, 17]))
assert_array_equal(qseq, np.array(
[18, 13, 9, 12, 8, 16, 10, 19, 15, 7]))


if __name__ == '__main__':
Expand Down
2 changes: 1 addition & 1 deletion libact/query_strategies/tests/test_realdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ def test_DensityWeightedUncertaintySampling(self):
qs = DWUS(trn_ds, random_state=1126)
qseq = run_qs(trn_ds, qs, self.y, self.quota)
assert_array_equal(
qseq, np.array([30, 179, 104, 186, 28, 65, 142, 62, 257, 221]))
qseq, np.array([257, 220, 179, 84, 208, 70, 245, 62, 50, 69]))


if __name__ == '__main__':
Expand Down
14 changes: 7 additions & 7 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
setuptools
numpy
scipy
scikit-learn>=0.24
matplotlib
Cython
joblib
setuptools==75.8.0
numpy==2.2.2
scipy==1.15.1
scikit-learn==1.6.1
matplotlib==3.10.0
Cython==3.0.11
joblib==1.4.2
10 changes: 6 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
#!/usr/bin/env python

from io import open # python 2 compatibility
from io import open # python 2 compatibility
import os
from setuptools import setup, Extension
import sys

BUILD_HINTSVM = int(os.environ.get("LIBACT_BUILD_HINTSVM", 1))
BUILD_VARIANCE_REDUCTION = int(os.environ.get("LIBACT_BUILD_VARIANCE_REDUCTION", 1))
BUILD_VARIANCE_REDUCTION = int(os.environ.get(
"LIBACT_BUILD_VARIANCE_REDUCTION", 1))


on_rtd = os.environ.get('READTHEDOCS', None) == 'True'
Expand Down Expand Up @@ -85,11 +86,12 @@
name='libact',
version='0.1.6',
description='Pool-based active learning in Python',
long_description=open('README.md', 'r', newline='', encoding='utf-8').read(),
long_description=open('README.md', 'r', newline='',
encoding='utf-8').read(),
long_description_content_type="text/markdown",
author='Y.-Y. Yang, S.-C. Lee, Y.-A. Chung, T.-E. Wu, H.-T. Lin',
author_email='[email protected], [email protected], '
'[email protected], [email protected], [email protected]',
'[email protected], [email protected], [email protected]',
url='https://github.com/ntucllab/libact',
cmdclass=cmdclasses,
setup_requires=setup_requires,
Expand Down