From 9fd4fd44a38c2346d0d79e9070bf09fa6495e618 Mon Sep 17 00:00:00 2001
From: ethanchewy <17chiue@gmail.com>
Date: Mon, 23 Oct 2017 12:14:36 -0700
Subject: [PATCH 1/8] Add time stamp functionality.

---
 GoogleScraper/database.py | 4 +++-
 GoogleScraper/parsing.py  | 3 +++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/GoogleScraper/database.py b/GoogleScraper/database.py
index f3122397..e3538ebf 100644
--- a/GoogleScraper/database.py
+++ b/GoogleScraper/database.py
@@ -121,11 +121,12 @@ def set_values_from_parser(self, parser):
                     parsed = urlparse(link['link'])
 
                     # fill with nones to prevent key errors
-                    [link.update({key: None}) for key in ('snippet', 'title', 'visible_link') if key not in link]
+                    [link.update({key: None}) for key in ('snippet', 'time_stamp','title', 'visible_link') if key not in link]
 
                     Link(
                         link=link['link'],
                         snippet=link['snippet'],
+                        time_stamp=link['time_stamp'],
                         title=link['title'],
                         visible_link=link['visible_link'],
                         domain=parsed.netloc,
@@ -169,6 +170,7 @@ class Link(Base):
     id = Column(Integer, primary_key=True)
     title = Column(String)
     snippet = Column(String)
+    time_stamp=Column(String)
     link = Column(String)
     domain = Column(String)
     visible_link = Column(String)
diff --git a/GoogleScraper/parsing.py b/GoogleScraper/parsing.py
index 09fd4b41..04db9c9b 100644
--- a/GoogleScraper/parsing.py
+++ b/GoogleScraper/parsing.py
@@ -359,6 +359,7 @@ class GoogleParser(Parser):
                 'result_container': 'div.g ',
                 'link': 'h3.r > a:first-child::attr(href)',
                 'snippet': 'div.s span.st::text',
+                'time_stamp' : 'div.slp',
                 'title': 'h3.r > a:first-child::text',
                 'visible_link': 'cite::text'
             },
@@ -367,6 +368,7 @@ class GoogleParser(Parser):
                 'result_container': 'li.g ',
                 'link': 'h3.r > a:first-child::attr(href)',
                 'snippet': 'div.s span.st::text',
+                'time_stamp' : 'div.slp',
                 'title': 'h3.r > a:first-child::text',
                 'visible_link': 'cite::text'
             },
@@ -374,6 +376,7 @@ class GoogleParser(Parser):
                 'container': 'li.card-section',
                 'link': 'a._Dk::attr(href)',
                 'snippet': 'span._dwd::text',
+                'time_stamp' : 'div.slp',
                 'title': 'a._Dk::text',
                 'visible_link': 'cite::text'
             },

From 0dee8db326f05e6d1d5e0ccd5e4b1231fecc3d7f Mon Sep 17 00:00:00 2001
From: ethanchewy <17chiue@gmail.com>
Date: Mon, 23 Oct 2017 21:45:58 -0700
Subject: [PATCH 2/8] Add time stamp functionality.

---
 GoogleScraper/parsing.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/GoogleScraper/parsing.py b/GoogleScraper/parsing.py
index 04db9c9b..8962bb8a 100644
--- a/GoogleScraper/parsing.py
+++ b/GoogleScraper/parsing.py
@@ -359,7 +359,7 @@ class GoogleParser(Parser):
                 'result_container': 'div.g ',
                 'link': 'h3.r > a:first-child::attr(href)',
                 'snippet': 'div.s span.st::text',
-                'time_stamp' : 'div.slp',
+                'time_stamp' : 'div.slp::text',
                 'title': 'h3.r > a:first-child::text',
                 'visible_link': 'cite::text'
             },
@@ -368,7 +368,7 @@ class GoogleParser(Parser):
                 'result_container': 'li.g ',
                 'link': 'h3.r > a:first-child::attr(href)',
                 'snippet': 'div.s span.st::text',
-                'time_stamp' : 'div.slp',
+                'time_stamp' : 'div.slp::text',
                 'title': 'h3.r > a:first-child::text',
                 'visible_link': 'cite::text'
             },
@@ -376,7 +376,7 @@ class GoogleParser(Parser):
                 'container': 'li.card-section',
                 'link': 'a._Dk::attr(href)',
                 'snippet': 'span._dwd::text',
-                'time_stamp' : 'div.slp',
+                'time_stamp' : 'div.slp::text',
                 'title': 'a._Dk::text',
                 'visible_link': 'cite::text'
             },

From 129883b89071970d76215b487a87b93528445161 Mon Sep 17 00:00:00 2001
From: ethanchewy <17chiue@gmail.com>
Date: Tue, 24 Oct 2017 03:32:10 -0700
Subject: [PATCH 3/8] make time stamp functionality broader

---
 GoogleScraper/parsing.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/GoogleScraper/parsing.py b/GoogleScraper/parsing.py
index 8962bb8a..ebd015f9 100644
--- a/GoogleScraper/parsing.py
+++ b/GoogleScraper/parsing.py
@@ -359,7 +359,7 @@ class GoogleParser(Parser):
                 'result_container': 'div.g ',
                 'link': 'h3.r > a:first-child::attr(href)',
                 'snippet': 'div.s span.st::text',
-                'time_stamp' : 'div.slp::text',
+                'time_stamp' : 'div.f::text',
                 'title': 'h3.r > a:first-child::text',
                 'visible_link': 'cite::text'
             },
@@ -368,7 +368,7 @@ class GoogleParser(Parser):
                 'result_container': 'li.g ',
                 'link': 'h3.r > a:first-child::attr(href)',
                 'snippet': 'div.s span.st::text',
-                'time_stamp' : 'div.slp::text',
+                'time_stamp' : 'div.f::text',
                 'title': 'h3.r > a:first-child::text',
                 'visible_link': 'cite::text'
             },
@@ -376,7 +376,7 @@ class GoogleParser(Parser):
                 'container': 'li.card-section',
                 'link': 'a._Dk::attr(href)',
                 'snippet': 'span._dwd::text',
-                'time_stamp' : 'div.slp::text',
+                'time_stamp' : 'div.f::text',
                 'title': 'a._Dk::text',
                 'visible_link': 'cite::text'
             },

From ef2891ed088fa521bfa1f881c9d6714bfcaf561c Mon Sep 17 00:00:00 2001
From: ethanchewy <17chiue@gmail.com>
Date: Tue, 24 Oct 2017 04:12:05 -0700
Subject: [PATCH 4/8] buggy code for ref

---
 GoogleScraper/database.py | 4 +++-
 GoogleScraper/parsing.py  | 9 ++++++---
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/GoogleScraper/database.py b/GoogleScraper/database.py
index e3538ebf..038b05c4 100644
--- a/GoogleScraper/database.py
+++ b/GoogleScraper/database.py
@@ -121,12 +121,13 @@ def set_values_from_parser(self, parser):
                     parsed = urlparse(link['link'])
 
                     # fill with nones to prevent key errors
-                    [link.update({key: None}) for key in ('snippet', 'time_stamp','title', 'visible_link') if key not in link]
+                    [link.update({key: None}) for key in ('snippet', 'time_stamp', 'recent_time', 'title', 'visible_link') if key not in link]
 
                     Link(
                         link=link['link'],
                         snippet=link['snippet'],
                         time_stamp=link['time_stamp'],
+                        recent_time=link['recent_time'],
                         title=link['title'],
                         visible_link=link['visible_link'],
                         domain=parsed.netloc,
@@ -171,6 +172,7 @@ class Link(Base):
     title = Column(String)
     snippet = Column(String)
     time_stamp=Column(String)
+    recent_time=Column(String)
     link = Column(String)
     domain = Column(String)
     visible_link = Column(String)
diff --git a/GoogleScraper/parsing.py b/GoogleScraper/parsing.py
index ebd015f9..0f55337c 100644
--- a/GoogleScraper/parsing.py
+++ b/GoogleScraper/parsing.py
@@ -359,7 +359,8 @@ class GoogleParser(Parser):
                 'result_container': 'div.g ',
                 'link': 'h3.r > a:first-child::attr(href)',
                 'snippet': 'div.s span.st::text',
-                'time_stamp' : 'div.f::text',
+                'time_stamp' : 'div.slp.f::text',
+                'recent_time' : 'div.slp.f::text',
                 'title': 'h3.r > a:first-child::text',
                 'visible_link': 'cite::text'
             },
@@ -368,7 +369,8 @@ class GoogleParser(Parser):
                 'result_container': 'li.g ',
                 'link': 'h3.r > a:first-child::attr(href)',
                 'snippet': 'div.s span.st::text',
-                'time_stamp' : 'div.f::text',
+                'time_stamp' : 'div.slp.f::text',
+                'recent_time' : 'div.slp.f::text',
                 'title': 'h3.r > a:first-child::text',
                 'visible_link': 'cite::text'
             },
@@ -376,7 +378,8 @@ class GoogleParser(Parser):
                 'container': 'li.card-section',
                 'link': 'a._Dk::attr(href)',
                 'snippet': 'span._dwd::text',
-                'time_stamp' : 'div.f::text',
+                'time_stamp' : 'div.slp.f::text',
+                'recent_time' : 'div.slp.f::text',
                 'title': 'a._Dk::text',
                 'visible_link': 'cite::text'
             },

From cb2064d84fac31ef42c2cefeb726a4f4ac0f04d1 Mon Sep 17 00:00:00 2001
From: ethanchewy <17chiue@gmail.com>
Date: Tue, 24 Oct 2017 04:30:37 -0700
Subject: [PATCH 5/8] fix time

---
 GoogleScraper/database.py | 4 +---
 GoogleScraper/parsing.py  | 9 +++------
 2 files changed, 4 insertions(+), 9 deletions(-)

diff --git a/GoogleScraper/database.py b/GoogleScraper/database.py
index 038b05c4..51925bcc 100644
--- a/GoogleScraper/database.py
+++ b/GoogleScraper/database.py
@@ -121,13 +121,12 @@ def set_values_from_parser(self, parser):
                     parsed = urlparse(link['link'])
 
                     # fill with nones to prevent key errors
-                    [link.update({key: None}) for key in ('snippet', 'time_stamp', 'recent_time', 'title', 'visible_link') if key not in link]
+                    [link.update({key: None}) for key in ('snippet', 'time_stamp', 'title', 'visible_link') if key not in link]
 
                     Link(
                         link=link['link'],
                         snippet=link['snippet'],
                         time_stamp=link['time_stamp'],
-                        recent_time=link['recent_time'],
                         title=link['title'],
                         visible_link=link['visible_link'],
                         domain=parsed.netloc,
@@ -172,7 +171,6 @@ class Link(Base):
     title = Column(String)
     snippet = Column(String)
     time_stamp=Column(String)
-    recent_time=Column(String)
     link = Column(String)
     domain = Column(String)
     visible_link = Column(String)
diff --git a/GoogleScraper/parsing.py b/GoogleScraper/parsing.py
index 0f55337c..924b752d 100644
--- a/GoogleScraper/parsing.py
+++ b/GoogleScraper/parsing.py
@@ -359,8 +359,7 @@ class GoogleParser(Parser):
                 'result_container': 'div.g ',
                 'link': 'h3.r > a:first-child::attr(href)',
                 'snippet': 'div.s span.st::text',
-                'time_stamp' : 'div.slp.f::text',
-                'recent_time' : 'div.slp.f::text',
+                'time_stamp' : 'div.s div.slp:text',
                 'title': 'h3.r > a:first-child::text',
                 'visible_link': 'cite::text'
             },
@@ -369,8 +368,7 @@ class GoogleParser(Parser):
                 'result_container': 'li.g ',
                 'link': 'h3.r > a:first-child::attr(href)',
                 'snippet': 'div.s span.st::text',
-                'time_stamp' : 'div.slp.f::text',
-                'recent_time' : 'div.slp.f::text',
+                'time_stamp' : 'div.s div.slp:text',
                 'title': 'h3.r > a:first-child::text',
                 'visible_link': 'cite::text'
             },
@@ -378,8 +376,7 @@ class GoogleParser(Parser):
                 'container': 'li.card-section',
                 'link': 'a._Dk::attr(href)',
                 'snippet': 'span._dwd::text',
-                'time_stamp' : 'div.slp.f::text',
-                'recent_time' : 'div.slp.f::text',
+                'time_stamp' : 'div.s div.slp:text',
                 'title': 'a._Dk::text',
                 'visible_link': 'cite::text'
             },

From 998ef3eb4d61f136d217fc7637c4ec1bd7742faa Mon Sep 17 00:00:00 2001
From: ethanchewy <17chiue@gmail.com>
Date: Wed, 25 Oct 2017 19:07:09 -0700
Subject: [PATCH 6/8] reverse buggy code

---
 GoogleScraper/database.py | 4 ++--
 GoogleScraper/parsing.py  | 8 ++++----
 output.json               | 1 +
 3 files changed, 7 insertions(+), 6 deletions(-)
 create mode 100644 output.json

diff --git a/GoogleScraper/database.py b/GoogleScraper/database.py
index 51925bcc..bc64c478 100644
--- a/GoogleScraper/database.py
+++ b/GoogleScraper/database.py
@@ -121,7 +121,7 @@ def set_values_from_parser(self, parser):
                     parsed = urlparse(link['link'])
 
                     # fill with nones to prevent key errors
-                    [link.update({key: None}) for key in ('snippet', 'time_stamp', 'title', 'visible_link') if key not in link]
+                    [link.update({key: None}) for key in ('snippet', 'time_stamp','title', 'visible_link') if key not in link]
 
                     Link(
                         link=link['link'],
@@ -290,4 +290,4 @@ def fixtures(config, session):
             if not search_engine:
                 session.add(SearchEngine(name=se))
 
-    session.commit()
+    session.commit()
\ No newline at end of file
diff --git a/GoogleScraper/parsing.py b/GoogleScraper/parsing.py
index 924b752d..6e2b1608 100644
--- a/GoogleScraper/parsing.py
+++ b/GoogleScraper/parsing.py
@@ -359,7 +359,7 @@ class GoogleParser(Parser):
                 'result_container': 'div.g ',
                 'link': 'h3.r > a:first-child::attr(href)',
                 'snippet': 'div.s span.st::text',
-                'time_stamp' : 'div.s div.slp:text',
+                'time_stamp' : 'div.slp::text',
                 'title': 'h3.r > a:first-child::text',
                 'visible_link': 'cite::text'
             },
@@ -368,7 +368,7 @@ class GoogleParser(Parser):
                 'result_container': 'li.g ',
                 'link': 'h3.r > a:first-child::attr(href)',
                 'snippet': 'div.s span.st::text',
-                'time_stamp' : 'div.s div.slp:text',
+                'time_stamp' : 'div.slp::text',
                 'title': 'h3.r > a:first-child::text',
                 'visible_link': 'cite::text'
             },
@@ -376,7 +376,7 @@ class GoogleParser(Parser):
                 'container': 'li.card-section',
                 'link': 'a._Dk::attr(href)',
                 'snippet': 'span._dwd::text',
-                'time_stamp' : 'div.s div.slp:text',
+                'time_stamp' : 'div.slp::text',
                 'title': 'a._Dk::text',
                 'visible_link': 'cite::text'
             },
@@ -1079,4 +1079,4 @@ def parse_serp(config, html=None, parser=None, scraper=None, search_engine=None,
     print(parser)
 
     with open('/tmp/testhtml.html', 'w') as of:
-        of.write(raw_html)
+        of.write(raw_html)
\ No newline at end of file
diff --git a/output.json b/output.json
new file mode 100644
index 00000000..0637a088
--- /dev/null
+++ b/output.json
@@ -0,0 +1 @@
+[]
\ No newline at end of file

From 9bd8c5d642fe432ae0a4404154db6ab61c03fc8b Mon Sep 17 00:00:00 2001
From: ethanchewy <17chiue@gmail.com>
Date: Mon, 20 Nov 2017 21:29:11 -0800
Subject: [PATCH 7/8] Added search by time functionality!

---
 GoogleScraper/http_mode.py | 32 ++++++++++++++++++++++++++++++--
 1 file changed, 30 insertions(+), 2 deletions(-)

diff --git a/GoogleScraper/http_mode.py b/GoogleScraper/http_mode.py
index fab1ec3d..69480897 100644
--- a/GoogleScraper/http_mode.py
+++ b/GoogleScraper/http_mode.py
@@ -44,6 +44,12 @@ def get_GET_params_for_search_engine(query, search_engine, page_number=1, num_re
         # state by some hard coded needles.
         search_params['hl'] = 'en'
         search_params['q'] = query
+        #+ "&tbs=cdr%3A1%2Ccd_min%3A2015%2Ccd_max%3A2016&tbm="
+        '''
+        search_params['tbs'] ='cdr:1'
+        search_params['cd_min'] = '2015'
+        search_params['cd_max'] = '2016'
+        '''
         # only set when other num results than 10.
         if num_results_per_page != 10:
             search_params['num'] = str(num_results_per_page)
@@ -75,6 +81,17 @@ def get_GET_params_for_search_engine(query, search_engine, page_number=1, num_re
                 'source': 'lnms',
                 'sa': 'X'
             })
+        #TEST
+
+
+        '''
+        search_params.update({
+            'tbs' : 'cdr:1',
+            'cd_min': '3/2/2015',
+            'cd_max': '3/2/2016'
+        })
+        '''
+
 
     elif search_engine == 'yandex':
         search_params['text'] = query
@@ -157,7 +174,8 @@ def __init__(self, config, *args, time_offset=0.0, **kwargs):
         self.scrape_method = 'http'
 
         # get the base search url based on the search engine.
-        self.base_search_url = get_base_search_url_by_search_engine(self.config, self.search_engine_name, self.scrape_method)
+        #+ "&source=lnt&tbs=cdr%3A1%2Ccd_min%3A2015%2Ccd_max%3A2016&tbm=
+        self.base_search_url = get_base_search_url_by_search_engine(self.config, self.search_engine_name, self.scrape_method) + "&source=lnt&tbs=cdr%3A1%2Ccd_min%3A2015%2Ccd_max%3A2016&tbm="
 
         super().instance_creation_info(self.__class__.__name__)
 
@@ -256,7 +274,16 @@ def search(self, rand=True, timeout=15):
         success = True
 
         self.build_search()
-
+        #"tbs=cdr%3A1%2Ccd_min%3A2016%2Ccd_max%3A2015&ei=kdsMWtn0NumD0gLdvYWQDA&"
+        '''
+        self.headers['User-Agent'] = random_user_agent(only_desktop=True)
+        super().detection_prevention_sleep()
+        super().keyword_info()
+        print self.base_search_url + urlencode(self.search_params)
+        sys.exit()
+        '''
+
+        
         if rand:
             self.headers['User-Agent'] = random_user_agent(only_desktop=True)
 
@@ -291,6 +318,7 @@ def search(self, rand=True, timeout=15):
                 success = False
 
         super().after_search()
+        
 
         return success
 

From 4356901540ea0f3ad4a639e8b5d084656895098f Mon Sep 17 00:00:00 2001
From: ethanchewy <17chiue@gmail.com>
Date: Mon, 20 Nov 2017 22:35:01 -0800
Subject: [PATCH 8/8] Fixed buggy time search where it only worked for http
 search not for async mode

---
 GoogleScraper/http_mode.py | 2 +-
 GoogleScraper/parsing.py   | 6 +++---
 GoogleScraper/scraping.py  | 7 +++----
 3 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/GoogleScraper/http_mode.py b/GoogleScraper/http_mode.py
index 69480897..1723c5e0 100644
--- a/GoogleScraper/http_mode.py
+++ b/GoogleScraper/http_mode.py
@@ -175,7 +175,7 @@ def __init__(self, config, *args, time_offset=0.0, **kwargs):
 
         # get the base search url based on the search engine.
         #+ "&source=lnt&tbs=cdr%3A1%2Ccd_min%3A2015%2Ccd_max%3A2016&tbm=
-        self.base_search_url = get_base_search_url_by_search_engine(self.config, self.search_engine_name, self.scrape_method) + "&source=lnt&tbs=cdr%3A1%2Ccd_min%3A2015%2Ccd_max%3A2016&tbm="
+        self.base_search_url = get_base_search_url_by_search_engine(self.config, self.search_engine_name, self.scrape_method)
 
         super().instance_creation_info(self.__class__.__name__)
 
diff --git a/GoogleScraper/parsing.py b/GoogleScraper/parsing.py
index 6e2b1608..7c452131 100644
--- a/GoogleScraper/parsing.py
+++ b/GoogleScraper/parsing.py
@@ -359,7 +359,7 @@ class GoogleParser(Parser):
                 'result_container': 'div.g ',
                 'link': 'h3.r > a:first-child::attr(href)',
                 'snippet': 'div.s span.st::text',
-                'time_stamp' : 'div.slp::text',
+                'time_stamp' : 'div.s div.slp::text',
                 'title': 'h3.r > a:first-child::text',
                 'visible_link': 'cite::text'
             },
@@ -368,7 +368,7 @@ class GoogleParser(Parser):
                 'result_container': 'li.g ',
                 'link': 'h3.r > a:first-child::attr(href)',
                 'snippet': 'div.s span.st::text',
-                'time_stamp' : 'div.slp::text',
+                'time_stamp' : 'div.s div.slp::text',
                 'title': 'h3.r > a:first-child::text',
                 'visible_link': 'cite::text'
             },
@@ -376,7 +376,7 @@ class GoogleParser(Parser):
                 'container': 'li.card-section',
                 'link': 'a._Dk::attr(href)',
                 'snippet': 'span._dwd::text',
-                'time_stamp' : 'div.slp::text',
+                'time_stamp' : 'div.s div.slp::text',
                 'title': 'a._Dk::text',
                 'visible_link': 'cite::text'
             },
diff --git a/GoogleScraper/scraping.py b/GoogleScraper/scraping.py
index 8d691f4b..d768eaae 100644
--- a/GoogleScraper/scraping.py
+++ b/GoogleScraper/scraping.py
@@ -78,7 +78,7 @@ def get_base_search_url_by_search_engine(config, search_engine_name, search_mode
     """
     assert search_mode in SEARCH_MODES, 'search mode "{}" is not available'.format(search_mode)
 
-    specific_base_url = config.get('{}_{}_search_url'.format(search_mode, search_engine_name), None)
+    specific_base_url = config.get('{}_{}_search_url'.format(search_mode, search_engine_name), None) 
 
     if not specific_base_url:
         specific_base_url = config.get('{}_search_url'.format(search_engine_name), None)
@@ -90,9 +90,8 @@ def get_base_search_url_by_search_engine(config, search_engine_name, search_mode
             ips = file.read().split('\n')
             random_ip = random.choice(ips)
             return random_ip
-
-    return specific_base_url
-
+    specific_base_url += "&source=lnt&tbs=cdr%3A1%2Ccd_min%3A3%2F1%2F2015%2Ccd_max%3A11%2F1%2F2015&tbm=&"
+    return specific_base_url 
 
 class SearchEngineScrape(metaclass=abc.ABCMeta):
     """Abstract base class that represents a search engine scrape.