Merge pull request #5 from Bugazelle/dev

[Feat] Release to 0.1.10
Bugazelle · Jul 14, 2019 · 43de1cf · 43de1cf
2 parents ef3f689 + 998b325
commit 43de1cf
Show file tree

Hide file tree

Showing 7 changed files with 248 additions and 194 deletions.
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -0,0 +1,2 @@
+include README.md
+include 中文说明.md
diff --git a/README.md b/README.md
@@ -48,10 +48,10 @@ You could use `export_csv_to_influx -h` to see the help guide.
 -ls, --limit_length, Limit length. Default: 20.
 -dd, --drop_database, Drop database before inserting data. Default: False.
 -dm, --drop_measurement, Drop measurement before inserting data. Default: False.
--mc, --match_columns, Match the data you want to get for certain columns, separated by comma. Default: None.
+-mc, --match_columns, Match the data you want to get for certain columns, separated by comma. Match Rule: All matches, then match. Default: None.
 -mbs, --match_by_string, Match by string, separated by comma. Default: None.
 -mbr, --match_by_regex, Match by regex, separated by comma. Default: None.
--fic, --filter_columns, Filter the data you want to filter for certain columns, separated by comma. Default: None.
+-fic, --filter_columns, Filter the data you want to filter for certain columns, separated by comma. Filter Rule: Any one filter success, the filter. Default: None.
 -fibs, --filter_by_string, Filter by string, separated by comma. Default: None.
 -fibr, --filter_by_regex, Filter by regex, separated by comma. Default: None.
 -ecm, --enable_count_measurement, Enable count measurement. Default: False.
@@ -97,6 +97,7 @@ timestamp,url,response_time
     --field_columns response_time \
     --user admin \
     --password admin \
+    --force_insert_even_csv_no_update True \
     --server 127.0.0.1:8086
     ```
 
@@ -112,6 +113,7 @@ timestamp,url,response_time
     --user admin \
     --password admin \
     --server 127.0.0.1:8086 \
+    --force_insert_even_csv_no_update True \
     --drop_database=True
     ```
 
@@ -128,11 +130,30 @@ timestamp,url,response_time
     --password test-automation-monitoring-2019 \
     --server 127.0.0.1:8086 \
     --drop_database=True \
+    --force_insert_even_csv_no_update True \
     --match_columns=timestamp,url \
     --match_by_reg='2019-07-12,sample-\d+'
     ```
+    
+4. Filter part of data, and the export into influx: **url filter sample**
+
+    ``` 
+    export_csv_to_influx \
+    --csv demo.csv \
+    --dbname demo \
+    --measurement demo \
+    --tag_columns url \
+    --field_columns response_time \
+    --user admin \
+    --password test-automation-monitoring-2019 \
+    --server 127.0.0.1:8086 \
+    --drop_database True \
+    --force_insert_even_csv_no_update True \
+    --filter_columns timestamp,url \
+    --filter_by_reg 'sample'
+    ```
 
-4. Enable count measurement. A new measurement named: **demo.count** generated
+5. Enable count measurement. A new measurement named: **demo.count** generated, with match: **timestamp matches 2019-07-12 and url matches sample-\d+**
 
     ```
     export_csv_to_influx \
@@ -145,11 +166,22 @@ timestamp,url,response_time
     --password admin \
     --server 127.0.0.1:8086 \
     --drop_database True \
+    --force_insert_even_csv_no_update True \
     --match_columns timestamp,url \
     --match_by_reg '2019-07-12,sample-\d+' \
-    --force_insert_even_csv_no_update True \
     --enable_count_measurement True 
     ```
+    
+    The count measurement is:
+    
+    ```text
+    select * from "demo.count"
+ 
+    name: demo.count
+    time                match_timestamp match_url total
+    ----                --------------- --------- -----
+    1562957134000000000 3               2         9
+    ```
 
 ## Special Thanks
 

diff --git a/setup.py b/setup.py
@@ -9,102 +9,22 @@
     VERSION = re.search("__version__ = '(.*)'", f.read()).group(1)
     download_url = '{0}/archive/v{1}.tar.gz'.format(url, VERSION)
 
-long_description = '''
-Export CSV To Influx
-====================
 
-**Export CSV To Influx**: Process CSV data, and export the data to influx db
+def readme():
+    with open('README.md') as f:
+        long_description = f.read()
+        index = long_description.find('```\n\n> **Note 1:**')
+        long_description = long_description[:index]
+        long_description = long_description.replace('## Install', '**Install**')
+        long_description = long_description.replace('## Features', '**Features**')
+        long_description = long_description.replace('## Command Arguments', '**Command Arguments**')
+        long_description = long_description.replace('```bash', '')
+        long_description = long_description.replace('\n-', '\n\n-')
+        long_description = long_description.replace('\n-c', '-c')
+        long_description += '\n\nFor more info, please refer to the {0}'.format(url)
 
-**Install**
+        return long_description
 
-Use the pip to install the library. Then the binary **export_csv_to_influx** is ready.
-
-```
-pip install ExportCsvToInflux
-```
-
-**Features**
-
-1. Allow to use binary **export_csv_to_influx** to run exporter
-
-2. Allow to check dozens of csv files in a folder
-
-3. Auto convert csv data to int/float/string in Influx
-
-4. Allow to limit string length in Influx
-
-5. Allow to judge the csv has new data or not
-
-6. Allow to use the latest file modify time as time column
-
-7. Auto Create database if not exist
-
-8. Allow to drop database before inserting data
-
-9. Allow to drop measurements before inserting data
-
-10. Allow to match or filter the data by using string or regex.
-
-11. Allow to count, and generate count measurement
-
-**Command Arguments**
-
-You could use `export_csv_to_influx -h` to see the help guide.
-
--c, --csv, Input CSV file path, or the folder path. **Mandatory**
-
--d, --delimiter, CSV delimiter. Default: ','. 
-
--lt, --lineterminator, CSV lineterminator. Default: '\n'. 
-
--s, --server, InfluxDB Server address. Default: localhost:8086.
-
--u, --user, InfluxDB User name. Default: admin
-
--p, --password, InfluxDB Password. Default: admin
-
--db, --dbname, InfluxDB Database name. **Mandatory**
-
--m, --measurement, Measurement name. **Mandatory**
-
--t, --time_column, Timestamp column name. Default: timestamp. If no timestamp column, the timestamp is set to the last file modify time for whole csv rows.
-
--tf, --time_format, Timestamp format. Default: '%Y-%m-%d %H:%M:%S' e.g.: 1970-01-01 00:00:00.
-
--tz, --time_zone, Timezone of supplied data. Default: UTC.
-
--fc, --field_columns, List of csv columns to use as fields, separated by comma. **Mandatory**
-
--tc, --tag_columns, List of csv columns to use as tags, separated by comma. **Mandatory**
-
--b, --batch_size, Batch size when inserting data to influx. Default: 500.
-
--lslc, --limit_string_length_columns, Limit string length column, separated by comma. Default: None.
-
--ls, --limit_length, Limit length. Default: 20.
-
--dd, --drop_database, Drop database before inserting data. Default: False.
-
--dm, --drop_measurement, Drop measurement before inserting data. Default: False.
-
--mc, --match_columns, Match the data you want to get for certain columns, separated by comma. Default: None.
-
--mbs, --match_by_string, Match by string, separated by comma. Default: None.
-
--mbr, --match_by_regex, Match by regex, separated by comma. Default: None.
-
--fic, --filter_columns, Filter the data you want to filter for certain columns, separated by comma. Default: None.
-
--fibs, --filter_by_string, Filter by string, separated by comma. Default: None.
-
--fibr, --filter_by_regex, Filter by regex, separated by comma. Default: None.
-
--ecm, --enable_count_measurement, Enable count measurement. Default: False.
-
--fi, --force_insert_even_csv_no_update, Force insert data to influx, even csv no update. Default: False.
-
-For more info, please refer to the https://github.com/Bugazelle/export-csv-to-inlfux
-'''
 
 setup(
     name='ExportCsvToInflux',
@@ -115,12 +35,15 @@
     description='Export',
     zip_safe=False,
     include_package_data=True,
-    long_description=long_description,
+    long_description=readme(),
     long_description_content_type='text/markdown',
     author='Bugazelle',
     author_email='[email protected]',
     keywords=['python', 'csv', 'influx'],
-    install_requires=['influxdb', ],
+    install_requires=[
+        'influxdb>=5.2.2',
+        'python-dateutil>=2.8.0'
+        ],
     download_url=download_url,
     url=url,
     classifiers=(

diff --git a/src/ExportCsvToInflux/__version__.py b/src/ExportCsvToInflux/__version__.py
@@ -1 +1 @@
-__version__ = '0.1.8'
+__version__ = '0.1.10'
diff --git a/src/ExportCsvToInflux/csv_object.py b/src/ExportCsvToInflux/csv_object.py
@@ -15,6 +15,25 @@ def __init__(self, delimiter=',', lineterminator='\n'):
         self.delimiter = delimiter
         self.lineterminator = lineterminator
 
+    def get_csv_header(self, file_name):
+        """Function: get_csv_header.
+
+        :param file_name: the file name
+        :return return csv header as list
+
+        """
+
+        self.valid_file_exit(file_name)
+
+        with open(file_name) as f:
+            sniffer = csv.Sniffer()
+            has_header = sniffer.has_header(f.read(40960))
+            f.seek(0)
+            csv_reader = csv.DictReader(f, delimiter=self.delimiter, lineterminator=self.lineterminator)
+            headers = csv_reader.fieldnames if has_header else []
+
+            return headers
+
     @staticmethod
     def search_files_in_dir(directory, match_suffix='.csv', filter_pattern='influx.csv'):
         """Function: search_files_in_dir