diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..661c727 --- /dev/null +++ b/.gitignore @@ -0,0 +1,16 @@ +# python cache +__pycache__/ + +# packaging +env/* + +# logs +*.log +*.error +*.error.1 + +# vim +*~ +*.swp +*.un~ +*.swo diff --git a/src/ptth/__init__.py b/src/ptth/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/ptth/cli.py b/src/ptth/cli.py new file mode 100644 index 0000000..beaa580 --- /dev/null +++ b/src/ptth/cli.py @@ -0,0 +1,33 @@ +import sys +from client import * + +def print_status(response: HTTPResponse): + print(response) + +def print_headers(response: HTTPResponse): + for key in response.headers: + print(f"\"{key}\":\"{response.headers[key]}\"") + +def print_body(response: HTTPResponse): + print(response.get_body_raw()) + +if __name__ == "__main__": + import sys + + if len(sys.argv) != 2: + print("Expected : 'app '") + sys.exit(1) + + url = sys.argv[1] + try: + res = request("GET", url, headers={"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"}) + except BadUrlError as e: + print(e) + sys.exit(1) + + # print_status(res) + # print_headers(res) + print_body(res) + # Cannot visualize on my Pc yet + # res.visualise() + diff --git a/src/ptth/client.py b/src/ptth/client.py new file mode 100644 index 0000000..0d97eb3 --- /dev/null +++ b/src/ptth/client.py @@ -0,0 +1,176 @@ +import os +import socket +import time + +from requests.structures import CaseInsensitiveDict + +from parse_http_url import parse_http_url,BadUrlError + +class NotConnected(Exception): + pass + +HTTP_PORT = 80 + +class HTTPResponse: + + def __init__(self, /, **kwargs): + self.version = kwargs.get("version") + self.code = kwargs.get("code") + self.reason = kwargs.get("reason") + self.headers = kwargs.get("headers") + self.body = kwargs.get("body") + + def get_body_raw(self): + return self.body + + def get_headers(self): + return self.headers + + def visualise(self): + with open("/tmp/tmp.html", "wb") as file: + file.write(self.body) + os.system("xdg-open /tmp/tmp.html") + + def __str__(self): + return f"<{self.__class__.__name__} [{self.code}]>" + +def readline(connection, index=0): + buffer = b'' + while (c := connection.receive(1)) != b'\n': + buffer += c + index += 1 + + return buffer, index + +def parse_response(method, conn): + buffer, index = readline(conn) + version, code, reason = map(str.strip, buffer.decode().split(" ", 2)) + # ignore \n + index += 1 + + buffer, index = readline(conn, index) + headers = CaseInsensitiveDict() + while buffer != b'\r': + header, value = map(str.strip, buffer.decode().split(":", 1)) + headers[header] = value + index += 1 + buffer, index = readline(conn, index) + # ignore \r + index += 1 + + body = b'' + cl = int(headers.get("Content-Length", 0)) + # sometimes it sends an incomplete response + # it will hang if c-l is wrong + # NOTE HEAD request returns c-l as if it were a + # GET so it will hang as well + # https://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.13 + while len(body) < cl and method != "HEAD": + body += conn.receive(cl - len(body)) + + return HTTPResponse( + version=version, + code=code, + reason=reason, + headers=headers, + body=body + ) + +class HTTPConnection: + _version = 11 + _version_str = 'HTTP/1.1' + default_port = HTTP_PORT + + def __init__(self, host: "IP", port: int, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, blocksize=8192): + self.host = host + self.port = port + self.timeout = timeout + + # sock + self.sock = None + self.blocksize = blocksize + + def connect(self): + print(f"INFO - connecting to %s:%s" % (self.host, self.port)) + self.sock = socket.create_connection((self.host, self.port), self.timeout) + + def close(self): + if self.sock: + self.sock.close() + self.sock = None + + def send(self, data): + if not self.sock: + raise NotConnected() + print(f"INFO - sending\n%s" % (data,)) + + self.sock.sendall(data) + + def receive(self, count=1024): + return self.sock.recv(count) + + def request(self, method, url, body="", headers=None): + headers = headers or {} + # https://httpwg.org/specs/rfc9110.html#field.host + headers["Host"] = headers.get("Host", self.host) + # https://httpwg.org/specs/rfc9110.html#field.content-length + headers["Content-Length"] = str(len(body)) + # https://httpwg.org/specs/rfc9110.html#field.accept-encoding + # no encoding + headers["Accept-Encoding"] = headers.get("Accept", "identity") + + req = f"{method.upper()} {url} {self._version_str}\r\n" + for header, value in headers.items(): + req += f"{header}: {value}\r\n" + + if body: + req += f"\r\n{body}\r\n" + req += "\r\n" + + self.send(req.encode()) + + def __str__(self): + return f"<({self.__class__.__name__}) [{self.host=}, {self.port=}]>" + + __repr__ = __str__ + + +def request(method="GET", url="/", headers=None, body=""): + host,port,_,_ = parse_http_url(url) # Can launch a BadUrlError + conn = HTTPConnection(host, port) + res = None + data = None + try: + conn.connect() + conn.request(method, url, headers=headers, body=body) + data = parse_response(method, conn) + finally: + conn.close() + + return data + +if __name__ == "__main__": + URL = "http://httpbin.org/" + #URL = "http://www.cubadebate.cu/" + #URL = "http://127.0.0.1:8000" + + res = request("GET", URL, headers={"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"}) + print(res) + res.visualise() + + res = request("HEAD", URL) + print(res, res.headers) + res = request("OPTIONS", URL) + print(res, res.headers) + + URL = "http://httpbin.org/status/100" + res = request("DELETE", URL) + print(res, res.headers, res.reason, res.body) + + URL = "http://httpbin.org/anything" + res = request("POST", URL, body="blob doko") + print(res, res.headers, res.reason, res.body) + res = request("PATCH", URL, body="skipped all the text") + print(res, res.headers, res.reason, res.body) + res = request("PUT", URL, body="dodo") + print(res, res.headers, res.reason, res.body) diff --git a/src/ptth/parse_http_url.py b/src/ptth/parse_http_url.py new file mode 100644 index 0000000..14bd797 --- /dev/null +++ b/src/ptth/parse_http_url.py @@ -0,0 +1,54 @@ +class BadUrlError(Exception): + pass + +def parse_http_url(url: str): + def parse_http_scheme(url: str,index: int): + isHttp: bool = False + httpScheme = "http://" + i = 0 + while i < len(httpScheme) and index + i < len(url): + if url[index + i].lower() != httpScheme[i]: + break + i = i + 1 + if i == len(httpScheme): + isHttp = True + if not isHttp: + raise BadUrlError(f"Not an http url scheme: `{url[:len(httpScheme)]}`") + return index + i + + def parse_host(url: str,index: int): + start_index = index + while index < len(url) and url[index] != ':' and url[index] != '/': + index = index + 1 + return (index,url[start_index:index]) + + def parse_port(url: str,index: int): + if index < len(url) and url[index] == ':': + index = index + 1 + start_index = index + while index < len(url) and url[index] != '/': + index = index + 1 + port = url[start_index:index] + if port == '': + port = 80 + elif not port.isnumeric(): + raise BadUrlError(f"Port is not a number: `{port}`") + return (index,int(port)) + def parse_abs_path(url: str,index: int): + startIndex = index + while index < len(url) and url[index] != '?': + index = index + 1 + absPath = url[startIndex:index] + if absPath == "": + absPath = "/" + return (index,absPath) + def parse_query(url: str,index: int): + return (index,url[index:]) + + index = 0 + index = parse_http_scheme(url,index) + index, host = parse_host(url,index) + index, port = parse_port(url,index) + index, absPath = parse_abs_path(url,index) + index, query = parse_query(url,index) + return (host,port,absPath,query) \ No newline at end of file diff --git a/src/ptth/server.py b/src/ptth/server.py new file mode 100644 index 0000000..8c9b931 --- /dev/null +++ b/src/ptth/server.py @@ -0,0 +1,4 @@ +from http.server import test + +if __name__ == "__main__": + test()