Skip to content

Commit

Permalink
Merge pull request #382 from k4cg/master_issue_381
Browse files Browse the repository at this point in the history
Master issue 381
  • Loading branch information
jkowalleck authored Sep 10, 2020
2 parents 132d82e + 3108160 commit 1de8060
Show file tree
Hide file tree
Showing 18 changed files with 386 additions and 549 deletions.
7 changes: 6 additions & 1 deletion HISTORY.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,12 @@

## Unreleased

add upcoming unreleased modiications here
add upcoming unreleased modifications here

## 2.4.3

* Fixed
* issue #381 - Instagram Profile Crawler

## 2.4.2

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from glob import glob
from os.path import basename, dirname, join as path_join, realpath

import pytest # type: ignore
import pytest
from ddt import ddt, idata, unpack # type: ignore

from nichtparasoup.testing.config import ConfigFileTest
Expand Down
4 changes: 4 additions & 0 deletions nichtparasoup/cli/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ class Commands(object):
@staticmethod
def run(config_file: Optional[str] = None) -> int:
from os.path import abspath

from nichtparasoup._internals import _logging_init
from nichtparasoup.config import get_config, get_imagecrawler
from nichtparasoup.core import NPCore
Expand Down Expand Up @@ -44,6 +45,7 @@ def config(cls, **actions: Any) -> int:
@staticmethod
def config_dump_file(config_file: str) -> int:
from os.path import abspath, isfile

from nichtparasoup._internals import _confirm
from nichtparasoup.config import dump_defaults
config_file = abspath(config_file)
Expand All @@ -62,6 +64,7 @@ def config_dump_file(config_file: str) -> int:
@staticmethod
def config_check_file(config_file: str) -> int:
from os.path import abspath

from nichtparasoup.testing.config import ConfigFileTest
config_file = abspath(config_file)
config_test = ConfigFileTest()
Expand Down Expand Up @@ -136,6 +139,7 @@ def info_imagecrawler_desc(imagecrawler: str) -> int:
@staticmethod
def completion(shell: str) -> int:
from sys import stdout

from argcomplete import shellcode # type: ignore
stdout.write(shellcode(
['nichtparasoup'], shell=shell,
Expand Down
4 changes: 2 additions & 2 deletions nichtparasoup/imagecrawler/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,10 @@ class KnownImageCrawlers(object):
@staticmethod
def _builtins() -> Dict[_ImagecrawlerName, _ImagecrawlerClass]:
from .echo import Echo
from .picsum import Picsum
from .reddit import Reddit
from .instagram import InstagramHashtag, InstagramProfile
from .picsum import Picsum
from .pr0gramm import Pr0gramm
from .reddit import Reddit
return dict(
Echo=Echo,
Picsum=Picsum,
Expand Down
2 changes: 1 addition & 1 deletion nichtparasoup/imagecrawler/instagram.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@
class InstagramQueryHashFinder(object):
__CONTAINER_PATH_RE = {
'tag': r'/static/bundles/metro/TagPageContainer\.js/.+?\.js',
'profile': r'/static/bundles/metro/ProfilePageContainer\.js/.+?\.js',
'profile': r'/static/bundles/metro/Consumer\.js/.+?\.js',
}

__QUERY_HASH_RE = r'queryId:"(.+?)"'
Expand Down
1 change: 1 addition & 0 deletions nichtparasoup/webserver/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ def on_sourceicons(self, _: Request) -> Response:

def run(self) -> None:
from werkzeug.serving import run_simple

from nichtparasoup._internals import _log
self.imageserver.start()
try:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import pytest # type: ignore
import pytest

from nichtparasoup.config import _DEFAULTS_FILE
from nichtparasoup.testing.config import ConfigFileTest
Expand Down
4 changes: 2 additions & 2 deletions tests/test_10_nichtparasoup/test_core/test_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def test_not_running(self) -> None:
))

def test_running_under_timeout(self) -> None:
from time import time
from time import time # isort:skip
# arrange
self.server.is_alive = lambda: True # type: ignore
self.server._stats.time_started = int(time())
Expand All @@ -92,7 +92,7 @@ def test_running_under_timeout(self) -> None:
))

def test_running_over_timeout(self) -> None:
from time import time
from time import time # isort:skip
# arrange
self.server.is_alive = lambda: True # type: ignore
self.server._stats.time_started = int(time()) - 2 * self.server.reset_timeout
Expand Down
22 changes: 10 additions & 12 deletions tests/test_10_nichtparasoup/test_imagecrawler/test_instagram.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ def _uri_sort_query(cls, uri: str) -> str:

_FILE_FETCHER = _InstagramFileFetcher({ # relative to './testdata_instagram'
'/': 'index.html',
'/static/bundles/metro/ProfilePageContainer.js/e243abb1f92c.js': 'e243abb1f92c.js',
'/static/bundles/metro/TagPageContainer.js/1bad9348735e.js': '1bad9348735e.js',
'/static/bundles/metro/Consumer.js/ebbdfced63f8.js': 'ebbdfced63f8.js',
'/graphql/query/?query_hash=f0986789a5c5d17c2400faebf16efd0d&'
'variables=%7B%22first%22%3A+1%2C+%22after%22%3A+%22%22%2C+%22tag_name%22%3A+%22foo%22%7D':
'query_hash=f0986789a5c5d17c2400faebf16efd0d&variables={first-1,after,tag_name-foo}',
Expand All @@ -50,31 +50,29 @@ def _uri_sort_query(cls, uri: str) -> str:
'variables=%7B%22first%22%3A+5%2C+%22after%22%3A+%22%22%2C+%22tag_name%22%3A+%22foo%22%7D':
'query_hash=174a5243287c5f3a7de741089750ab3b&variables={first-5,after,tag_name-foo}',
'/natgeo/?__a=1': 'natgeo.__a=1',
'/graphql/query/?query_hash=2c5d4d8b70cad329c4a6ebe3abb6eedd&'
'/graphql/query/?query_hash=51fdd02b67508306ad4484ff574a0b62&'
'variables=%7B%22first%22%3A+1%2C+%22after%22%3A+%22%22%2C+%22id%22%3A+%22787132%22%7D':
'query_hash=2c5d4d8b70cad329c4a6ebe3abb6eedd&variables={first-1,after,id-787132}',
'query_hash=51fdd02b67508306ad4484ff574a0b62&variables={first-1,after,id-787132}',
'/graphql/query/?query_hash=ff260833edf142911047af6024eb634a&'
'variables=%7B%22first%22%3A+1%2C+%22after%22%3A+%22%22%2C+%22id%22%3A+%22787132%22%7D':
'query_hash=ff260833edf142911047af6024eb634a&variables={first-1,after,id-787132}',
'/graphql/query/?query_hash=f0986789a5c5d17c2400faebf16efd0d&'
'variables=%7B%22first%22%3A+1%2C+%22after%22%3A+%22%22%2C+%22id%22%3A+%22787132%22%7D':
'query_hash=f0986789a5c5d17c2400faebf16efd0d&variables={first-1,after,id-787132}',
'/graphql/query/?query_hash=8c86fed24fa03a8a2eea2a70a80c7b6b&'
'/graphql/query/?query_hash=97b41c52301f77ce508f55e66d17620e&'
'variables=%7B%22first%22%3A+1%2C+%22after%22%3A+%22%22%2C+%22id%22%3A+%22787132%22%7D':
'query_hash=8c86fed24fa03a8a2eea2a70a80c7b6b&variables={first-1,after,id-787132}',
'/graphql/query/?query_hash=2c5d4d8b70cad329c4a6ebe3abb6eedd&'
'query_hash=97b41c52301f77ce508f55e66d17620e&variables={first-1,after,id-787132}',
'/graphql/query/?query_hash=51fdd02b67508306ad4484ff574a0b62&'
'variables=%7B%22first%22%3A+5%2C+%22after%22%3A+%22%22%2C+%22id%22%3A+%22787132%22%7D':
'query_hash=2c5d4d8b70cad329c4a6ebe3abb6eedd&variables={first-5,after,id-787132}',
'query_hash=51fdd02b67508306ad4484ff574a0b62&variables={first-5,after,id-787132}',
}, base_dir=path_join(dirname(__file__), 'testdata_instagram'))

_QUERYHASHES_EXPECTED_TAG = {'f0986789a5c5d17c2400faebf16efd0d',
'ff260833edf142911047af6024eb634a',
'174a5243287c5f3a7de741089750ab3b'}

_QUERYHASHES_EXPECTED_PROFILE = {'f0986789a5c5d17c2400faebf16efd0d',
'ff260833edf142911047af6024eb634a',
'2c5d4d8b70cad329c4a6ebe3abb6eedd',
'8c86fed24fa03a8a2eea2a70a80c7b6b'}
_QUERYHASHES_EXPECTED_PROFILE = {'97b41c52301f77ce508f55e66d17620e',
'51fdd02b67508306ad4484ff574a0b62'}


class InstagramQueryHashFinderTest(unittest.TestCase):
Expand Down Expand Up @@ -333,7 +331,7 @@ def test_loader(self) -> None:
class InstagramProfileTest(unittest.TestCase):

_PROFILE_ID = '787132'
_QUERY_HASH = '2c5d4d8b70cad329c4a6ebe3abb6eedd'
_QUERY_HASH = '51fdd02b67508306ad4484ff574a0b62'

def setUp(self) -> None:
InstagramProfile._query_hash = None
Expand Down

This file was deleted.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def test_propagate_errors(self) -> None:
pass

def test_detect_duplicates(self) -> None:
import re
import re # isort:skip
# arrange
tester = ConfigFileTest()
test_file = path.join(self._TESTDATA_DIR, 'duplicates.yaml')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def test__get_file_uri__unknown_basedir(self) -> None:
filefetcher._get_file_uri('unknown_file')

def test_get_stream(self) -> None:
from urllib.response import addinfourl
from urllib.response import addinfourl # isort:skip
# arrange
filefetcher = FileFetcher(
dict(
Expand Down
2 changes: 1 addition & 1 deletion tests/test_10_nichtparasoup/test_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ class VersionTest(unittest.TestCase):

def test_version_known(self) -> None:
# arrange
from nichtparasoup import VERSION
from nichtparasoup import VERSION # isort:skip
# assert
self.assertFalse(
VERSION.startswith('UNKNOWN'),
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from glob import glob
from os.path import basename, dirname, join as path_join, realpath

import pytest # type: ignore
import pytest
from ddt import ddt, idata, unpack # type: ignore

from nichtparasoup.testing.config import ConfigFileTest
Expand Down

0 comments on commit 1de8060

Please sign in to comment.