Skip to content

Commit

Permalink
Merge pull request #59 from Roche/dev
Browse files Browse the repository at this point in the history
v0.3.3
  • Loading branch information
ofajardo authored Apr 24, 2020
2 parents 8ac732a + 78034c9 commit 6c3f1d0
Show file tree
Hide file tree
Showing 16 changed files with 1,213 additions and 1,128 deletions.
3 changes: 3 additions & 0 deletions change_log.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# 0.3.3 (github, pypi and conda 20200424)
* added capability to set_value_labels to handle subset of dataframes, solves #58

# 0.3.2 (github, pypi and conda 20200422)
* fixed bug when writing string columns with missing values (#54 and #55)

Expand Down
Binary file modified docs/_build/doctrees/environment.pickle
Binary file not shown.
2 changes: 1 addition & 1 deletion docs/_build/html/.buildinfo
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Sphinx build info version 1
# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
config: 3c3bb2a4bec2d6479578309a09db4796
config: e5aad791b428aec4c4ae13c2e09050db
tags: 645f666f9bcd5a90fca523b33c5a78b7
2 changes: 1 addition & 1 deletion docs/_build/html/_static/documentation_options.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
var DOCUMENTATION_OPTIONS = {
URL_ROOT: document.getElementById("documentation_options").getAttribute('data-url_root'),
VERSION: '0.3.2',
VERSION: '0.3.3',
LANGUAGE: 'None',
COLLAPSE_INDEX: false,
BUILDER: 'html',
Expand Down
2 changes: 1 addition & 1 deletion docs/_build/html/genindex.html
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

<meta name="viewport" content="width=device-width, initial-scale=1.0">

<title>Index &mdash; pyreadstat 0.3.2 documentation</title>
<title>Index &mdash; pyreadstat 0.3.3 documentation</title>



Expand Down
2 changes: 1 addition & 1 deletion docs/_build/html/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

<meta name="viewport" content="width=device-width, initial-scale=1.0">

<title>Welcome to pyreadstat’s documentation! &mdash; pyreadstat 0.3.2 documentation</title>
<title>Welcome to pyreadstat’s documentation! &mdash; pyreadstat 0.3.3 documentation</title>



Expand Down
2 changes: 1 addition & 1 deletion docs/_build/html/py-modindex.html
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

<meta name="viewport" content="width=device-width, initial-scale=1.0">

<title>Python Module Index &mdash; pyreadstat 0.3.2 documentation</title>
<title>Python Module Index &mdash; pyreadstat 0.3.3 documentation</title>



Expand Down
2 changes: 1 addition & 1 deletion docs/_build/html/search.html
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

<meta name="viewport" content="width=device-width, initial-scale=1.0">

<title>Search &mdash; pyreadstat 0.3.2 documentation</title>
<title>Search &mdash; pyreadstat 0.3.3 documentation</title>



Expand Down
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
# The short X.Y version
version = ''
# The full version, including alpha/beta/rc tags
release = '0.3.2'
release = '0.3.3'


# -- General configuration ---------------------------------------------------
Expand Down
2 changes: 1 addition & 1 deletion pyreadstat/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,4 @@
from .pyreadstat import read_file_in_chunks
from ._readstat_parser import ReadstatError, metadata_container

__version__ = "0.3.2"
__version__ = "0.3.3"
1,567 changes: 800 additions & 767 deletions pyreadstat/_readstat_writer.c

Large diffs are not rendered by default.

12 changes: 8 additions & 4 deletions pyreadstat/_readstat_writer.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -557,8 +557,16 @@ cdef int run_write(df, str filename_path, dst_file_format file_format, str file_
cdef char *file_labl

cdef list col_names = df.columns.values.tolist()

for variable_name in col_names:
if type(variable_name) != str:
raise PyreadstatError("variable name %s is of type %s and it must be str (not starting with numbers!)" % (variable_name, str(type(variable_name))))
if not variable_name[0].isalpha():
raise PyreadstatError("variable name %s starts with an illegal (non-alphabetic) character" % variable_name)

if file_format == FILE_FORMAT_POR:
col_names = [x.upper() for x in col_names]

cdef list col_types = get_pandas_column_types(df, missing_user_values)
cdef int row_count = len(df)
cdef int col_count = len(col_names)
Expand Down Expand Up @@ -618,10 +626,6 @@ cdef int run_write(df, str filename_path, dst_file_format file_format, str file_
#if file_format == FILE_FORMAT_XPORT and curtype == PYWRITER_DOUBLE:
# max_length = 8
variable_name = col_names[col_indx]
if type(variable_name) != str:
raise PyreadstatError("variable name %s is of type %s and it must be str (not starting with numbers!)" % (variable_name, str(type(variable_name))))
if not variable_name[0].isalpha():
raise PyreadstatError("variable name %s starts with an illegal (non-alphabetic) character" % variable_name)
variable = readstat_add_variable(writer, variable_name.encode("utf-8"), pandas_to_readstat_types[curtype], max_length)
if curtype in pyrwriter_datetimelike_types:
curformat = get_datetimelike_format_for_readstat(file_format, curtype)
Expand Down
722 changes: 377 additions & 345 deletions pyreadstat/pyreadstat.c

Large diffs are not rendered by default.

7 changes: 4 additions & 3 deletions pyreadstat/pyreadstat.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -474,9 +474,10 @@ def set_value_labels(dataframe, metadata, formats_as_category=True):
for var_name, label_name in metadata.variable_to_label.items():
labels = metadata.value_labels.get(label_name)
if labels:
df_copy[var_name] = df_copy[var_name].apply(lambda x: labels.get(x, x))
if formats_as_category:
df_copy[var_name] = df_copy[var_name].astype("category")
if var_name in df_copy.columns:
df_copy[var_name] = df_copy[var_name].apply(lambda x: labels.get(x, x))
if formats_as_category:
df_copy[var_name] = df_copy[var_name].astype("category")

return df_copy

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@

setup(
name='pyreadstat',
version='0.3.2',
version='0.3.3',
description=short_description,
author="Otto Fajardo",
author_email="[email protected]",
Expand Down
12 changes: 12 additions & 0 deletions tests/test_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -800,6 +800,18 @@ def test_dta_write_charnan(self):
df2['object'] = df2['object'].astype(str)
self.assertTrue(df2.equals(df))

def test_set_value_labels(self):

df, meta = pyreadstat.read_sav(os.path.join(self.basic_data_folder, "sample.sav"))
df_formatted = pyreadstat.set_value_labels(df, meta, formats_as_category=True)
#df.columns = self.df_pandas_formatted.columns
self.assertTrue(df_formatted.equals(self.df_pandas_formatted))
# partial
sub1_raw = df[['myord']]
sub1 = pyreadstat.set_value_labels(sub1_raw, meta, formats_as_category=True)
sub2 = self.df_pandas_formatted[['myord']]
self.assertTrue(sub1.equals(sub2))

if __name__ == '__main__':

import sys
Expand Down

0 comments on commit 6c3f1d0

Please sign in to comment.