Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sanitize input #173

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
10 changes: 7 additions & 3 deletions benchmarks/000.microbenchmarks/010.sleep/python/function.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@

from time import sleep

def handler(event):

# start timing
sleep_time = event.get('sleep')
sleep_time = event.get('sleep', None)
if sleep_time is None:
return { "status": "failure", "result": "Error: Key 'sleep' not found on input data." }
elif not isinstance(sleep_time, (int, float)):
return { "status": "failure", "result": "Error: Unexpected type for 'sleep' (expected int or float)"}

sleep(sleep_time)
return { 'result': sleep_time }
return { "status": "success", "result": "Returned with no error", "measurement": sleep_time }
Original file line number Diff line number Diff line change
@@ -1,28 +1,44 @@
import csv
import json
import socket
from datetime import datetime
from time import sleep
from jsonschema import validate
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did you try running any of the benchmarks, e.g., on AWS? I think that jsonschema should be added to requirements.txt of each benchmark.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I haven't test-run the benchmarks on AWS. I really need assistance with this.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@niranjank2022 If the issue is that you cannot use the free tier of AWS, then please let me know - I'm happy to do the testing on this PR. Otherwise, if you found deploying and running benchmarks too complicated or not documented well enough, then I'm happy to help with that as well - just let me know.


from . import storage

def handler(event):

schema = {
"type": "object",
"required": [ "request_id", "server-address", "server-port", "repetitions", "output-bucket" ],
"properties": {
"request-id": {"type": "integer"},
"server-address": {"type": "integer"},
"server-port": {"type": "integer"},
"repetitions": {"type": "integer"},
"output-bucket": {"type": "object"}
}
}
try:
validate(event, schema=schema)
except:
return { 'status': 'failure', 'result': 'Some value(s) is/are not found in JSON data or of incorrect type' }

request_id = event['request-id']
address = event['server-address']
port = event['server-port']
repetitions = event['repetitions']
output_bucket = event.get('output-bucket')
times = []
output_bucket = event['output-bucket']

i = 0
times = []
socket.setdefaulttimeout(3)
server_socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
server_socket.bind(('', 0))
message = request_id.encode('utf-8')
adr = (address, port)
consecutive_failures = 0
while i < repetitions + 1:
while i <= repetitions:
try:
send_begin = datetime.now().timestamp()
server_socket.sendto(message, adr)
Expand All @@ -32,8 +48,8 @@ def handler(event):
i += 1
consecutive_failures += 1
if consecutive_failures == 5:
print("Can't setup the connection")
break
server_socket.close()
return { 'status': 'failure', 'result': 'Unable to setup connection' }
continue
if i > 0:
times.append([i, send_begin, recv_end])
Expand All @@ -42,14 +58,12 @@ def handler(event):
server_socket.settimeout(2)
server_socket.close()

if consecutive_failures != 5:
with open('/tmp/data.csv', 'w', newline='') as csvfile:
writer = csv.writer(csvfile, delimiter=',')
writer.writerow(["id", "client_send", "client_rcv"])
for row in times:
writer.writerow(row)

client = storage.storage.get_instance()
key = client.upload(output_bucket, 'results-{}.csv'.format(request_id), '/tmp/data.csv')

return { 'result': key }
with open('/tmp/data.csv', 'w', newline='') as csvfile:
writer = csv.writer(csvfile, delimiter=',')
writer.writerow(["id", "client_send", "client_rcv"])
for row in times:
writer.writerow(row)

client = storage.storage.get_instance()
key = client.upload(output_bucket, f'results-{request_id}.csv', '/tmp/data.csv')
return { 'status': 'success', 'result': 'Returned with no error', 'measurement': key }
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is correct, but we also need to update the experiment in sebs/experiments/... to make sure it now checks for measurement key to download results.

Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,39 @@
import socket
from datetime import datetime
from time import sleep
from jsonschema import validate

from . import storage

def handler(event):

schema = {
"type": "object",
"required": [ "request_id", "server-address", "server-port", "repetitions", "output-bucket", "income-timestamp" ],
"properties": {
"request-id": {"type": "integer"},
"server-address": {"type": "integer"},
"server-port": {"type": "integer"},
"repetitions": {"type": "integer"},
"output-bucket": {"type": "object"},
"income-timestamp": {"type": "number"}
}
}
try:
validate(event, schema=schema)
except:
# !? To return 'measurement': {'bucket-key': None, 'timestamp': event['income-timestamp']}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Leftover comments?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I had a doubt to clarify. That's why I commented on it. When there is an exception, is it enough to return only {'result',' status' }? I noticed in some cases, that some values do exist and can be returned. For example, here, 'timestamp' can be returned in 'measurements'

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@niranjank2022 Yes, in that scenario we should return the exception contents (as string) and a failure status.

return { 'status': 'failure', 'result': 'Some value(s) is/are not found in JSON data or of incorrect type' }

request_id = event['request-id']
address = event['server-address']
port = event['server-port']
repetitions = event['repetitions']
output_bucket = event.get('output-bucket')
times = []
print("Starting communication with {}:{}".format(address, port))
output_bucket = event['output-bucket']

i = 0
times = []
print(f"Starting communication with {address}:{port}")
socket.setdefaulttimeout(4)
server_socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
Expand All @@ -35,14 +55,15 @@ def handler(event):
i += 1
consecutive_failures += 1
if consecutive_failures == 7:
print("Can't setup the connection")
break
server_socket.close()
# !? To return 'measurement': {'bucket-key': None, 'timestamp': event['income-timestamp']}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Leftover comments?

return { 'status': 'failure', 'result': 'Unable to setup connection' }
continue
if i > 0:
times.append([i, send_begin, recv_end])
cur_time = recv_end - send_begin
print("Time {} Min Time {} NotSmaller {}".format(cur_time, cur_min, measurements_not_smaller))
if cur_time > cur_min and cur_min > 0:
print(f"Time {cur_time} Min Time {cur_min} NotSmaller {measurements_not_smaller}")
if cur_time > cur_min > 0:
measurements_not_smaller += 1
if measurements_not_smaller == repetitions:
message = "stop".encode('utf-8')
Expand All @@ -56,16 +77,13 @@ def handler(event):
server_socket.settimeout(4)
server_socket.close()

if consecutive_failures != 5:
with open('/tmp/data.csv', 'w', newline='') as csvfile:
writer = csv.writer(csvfile, delimiter=',')
writer.writerow(["id", "client_send", "client_rcv"])
for row in times:
writer.writerow(row)

client = storage.storage.get_instance()
key = client.upload(output_bucket, 'results-{}.csv'.format(request_id), '/tmp/data.csv')
else:
key = None
with open('/tmp/data.csv', 'w', newline='') as csvfile:
writer = csv.writer(csvfile, delimiter=',')
writer.writerow(["id", "client_send", "client_rcv"])
for row in times:
writer.writerow(row)

client = storage.storage.get_instance()
key = client.upload(output_bucket, f'results-{request_id}.csv', '/tmp/data.csv')

return { 'result': {'bucket-key': key, 'timestamp': event['income-timestamp']} }
return { 'status': 'success', 'result': 'Returned with no error', 'measurement': {'bucket-key': key, 'timestamp': event['income-timestamp']} }
20 changes: 17 additions & 3 deletions benchmarks/000.microbenchmarks/040.server-reply/python/function.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,27 @@

import socket
from time import sleep
from jsonschema import validate

def handler(event):

scheme = {
"type": "object",
"required": ["ip-address", "port"],
"properties": {
"ip-address": {"type": "number"},
"port": {"type": "integer"}
}
}
try:
validate(event, schema=scheme)
except:
return { 'status': 'failure', 'result': 'Some value(s) is/are not found in JSON data or of incorrect type' }

# start timing
addr = (event.get('ip-address'), event.get('port'))
addr = (event['ip-address'], event['port'])

socket.setdefaulttimeout(20)
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.connect(addr)
msg = s.recv(1024).decode()
return {"result": msg}
return { 'status': 'success', 'result': 'Returned with no error', "measurement": msg }
25 changes: 20 additions & 5 deletions benchmarks/100.webapps/110.dynamic-html/python/function.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from datetime import datetime
from random import sample
from os import path
from time import time
from time import time_ns
from jsonschema import validate

import os

from jinja2 import Template
Expand All @@ -10,13 +12,26 @@

def handler(event):

scheme = {
"type": "object",
"required": ["username", "random_len"],
"properties": {
"username": {"type": "string"},
"random_len": {"type": "integer"}
}
}
try:
validate(event, schema=scheme)
except:
return { 'status': 'failure', 'result': 'Some value(s) is/are not found in JSON data or of incorrect type' }
# start timing
name = event.get('username')
size = event.get('random_len')
name = event['username']
size = event['random_len']

cur_time = datetime.now()
random_numbers = sample(range(0, 1000000), size)
template = Template( open(path.join(SCRIPT_DIR, 'templates', 'template.html'), 'r').read())
template = Template(open(path.join(SCRIPT_DIR, 'templates', 'template.html'), 'r').read())
html = template.render(username = name, cur_time = cur_time, random_numbers = random_numbers)
# end timing
# dump stats
return {'result': html}
return { 'status': 'success', 'result': 'Returned with no error', 'measurement': html }
38 changes: 28 additions & 10 deletions benchmarks/100.webapps/120.uploader/python/function.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,38 @@

import datetime
import os
import uuid

import urllib.request
from jsonschema import validate

from . import storage
client = storage.storage.get_instance()


def handler(event):

output_bucket = event.get('bucket').get('output')
url = event.get('object').get('url')
scheme = {
"type": "object",
"required": ["bucket", "object"],
"properties": {
"bucket": {
"type": "object",
"required": ["output"]
},
"object": {
"type": "object",
"required": ["url"]
}
}
}

try:
validate(event, schema=scheme)
except:
return { 'status': 'failure', 'result': 'Some value(s) is/are not found in JSON data or of incorrect type' }

output_bucket = event['bucket']['output']
url = event['object']['url']
name = os.path.basename(url)
download_path = '/tmp/{}'.format(name)
download_path = f'/tmp/{name}'

process_begin = datetime.datetime.now()
urllib.request.urlretrieve(url, filename=download_path)
Expand All @@ -28,12 +46,12 @@ def handler(event):
process_time = (process_end - process_begin) / datetime.timedelta(microseconds=1)
upload_time = (upload_end - upload_begin) / datetime.timedelta(microseconds=1)
return {
'result': {
'status': 'success',
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We also need to update the results parsing in sebs/faas/function.py and sebs/{platform}/triggers.py - make sure that we check for status, and then retrieve results from measurement.

'result': 'Returned with no error',
'measurement': {
'bucket': output_bucket,
'url': url,
'key': key_name
},
'measurement': {
'key': key_name,
'download_time': 0,
'download_size': 0,
'upload_time': upload_time,
Expand Down
39 changes: 30 additions & 9 deletions benchmarks/200.multimedia/210.thumbnailer/python/function.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import uuid
from urllib.parse import unquote_plus
from PIL import Image
from jsonschema import validate

from . import storage
client = storage.storage.get_instance()
Expand All @@ -27,11 +28,31 @@ def resize_image(image_bytes, w, h):

def handler(event):

input_bucket = event.get('bucket').get('input')
output_bucket = event.get('bucket').get('output')
key = unquote_plus(event.get('object').get('key'))
width = event.get('object').get('width')
height = event.get('object').get('height')
scheme = {
"type": "object",
"required": ["bucket", "object"],
"properties": {
"bucket": {
"type": "object",
"required": ["output", "input"]
},
"object": {
"type": "object",
"required": ["key", "width", "height"]
}
}
}

try:
validate(event, schema=scheme)
except:
return { 'status': 'failure', 'result': 'Some value(s) is/are not found in JSON data or of incorrect type' }

input_bucket = event['bucket']['input']
output_bucket = event['bucket']['output']
key = unquote_plus(event['object']['key'])
width = event['object']['width']
height = event['object']['height']
# UUID to handle multiple calls
#download_path = '/tmp/{}-{}'.format(uuid.uuid4(), key)
#upload_path = '/tmp/resized-{}'.format(key)
Expand All @@ -55,11 +76,11 @@ def handler(event):
upload_time = (upload_end - upload_begin) / datetime.timedelta(microseconds=1)
process_time = (process_end - process_begin) / datetime.timedelta(microseconds=1)
return {
'result': {
'bucket': output_bucket,
'key': key_name
},
'status': 'success',
'result': 'Returned with no error',
'measurement': {
'bucket': output_bucket,
'key': key_name,
'download_time': download_time,
'download_size': len(img),
'upload_time': upload_time,
Expand Down
Loading