-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathp1804247Helper.py
executable file
·355 lines (352 loc) · 21.5 KB
/
p1804247Helper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
#!/usr/bin/env python3
# ==============================================================
# SINGAPORE POLYTECHNIC |
# SCHOOL OF COMPUTING |
# ST2411 |
# PROGRAMMING IN PYTHON AND C |
# NAME: ALEK KWEK |
# CLASS: DISM/FT/1A/23 |
# ADMIN NO: 1804247 |
# YEAR: 2019 |
# ==============================================================
# source file: p1804247Helper.py
# Define all the helper functions for the p1804247Server.py
# IMPORT
import socket
import os
import re as r
import operator
import time as t
import datetime as d
# ===================================================================================
# It maintains the client session (and keep echos back the message to the client),
# until the client sends in a 'x'.
# The client request handling is now refactored into an independent
# function, handler.
# After this refactoring, this handler function can be easily utilizing
# threading model to hanlde muliple echo requests from more than one client.
# At this version, however, it remains handling one client at a time.
totalItems = 0 # global variable to store number of items
totalSales = 0 # global variable to store total number of sales
item_dict = {} # global variable to store items and their amount
sorted_list = [] # global variable to store the sorted dict
output = "" # global variable to store output to client
def GenerateReport(cityName):
#city stores the content of cityName.title()
#the function title() converts the first character of every word to uppercase
#e.g "singapore polytechnic" is now "Singapore Polytechnic"
#It stores the updated string into the variable city
city = cityName.title()
#statementSpacing is a function that produces an apporiate spacing so that the items and the money is organised
#======================================================
#DVDs 2369572.76 -> giving the right number of spacing is
#CDs 2303437.88 important
#Sporting Goods 2303015.72
#======================================================
#[item][remaining][amount]
def statementSpacing(word_string):
space = " " # The variable speed is defined with " "
length = len(word_string) # The length of items is calculated using len ans stores in length
max_length = 44 # The max_length states the maximum number of spacing is allowed. The max number is 44 * space
remaining = max_length - length # The variable remaining stores the difference between the
# the length of items and max_length. This variable stores the value to create the value * spaces
for i in range(0,remaining): # For loop is used from 0 to the remaining
space = space + " " # Space is added with new space for each iteration of the for loop
return space # After the for loop has been completed, it returns the number of spaces
# Sorting sorts the dictonary based on the key value
def sorting(dict_data):
#To sort the dictionary, convert it to a list with turples
dict_items = dict_data.items()
#Covert to a list with tuples
dict_item_list = list(dict_items)
#Now our data is stored in a list we can order by the amount
#by the operator itemgetter(1) where 1 is the second element in the list which is amount in float numbers
#reverse = True reverse the order from ascending to descending order
sorted_list = sorted(dict_item_list,key = operator.itemgetter(1), reverse = True)
return sorted_list # returns the sorted list
#extractItems is a function that uses regular expression(regex) to extract out the item's name e.g computer, toys, clothing
def extractItems(sales_lines):
#For demostration, I will be using this sales data
#2012-01-01 09:00 Fort Worth Women's Clothing 153.57 Visa
#r.findall uses regex [\D]+[\W]+ to extract out the city name and item name
#[\D]+[\W]+ : [\D\]+ Matches between one and unlimited times, as many times as possible, giving back as needed
#\D matches any character that's not a digit (equal to [^0-9])
#[\W]+ Matches between one and unlimited times, as many times as possible, giving back as needed
#\W matches any non-word character (equal to [^a-zA-Z0-9_])
#Using the regex, the extracted data from
#2012-01-01 09:00 Fort Worth Women's Clothing 153.57 Visa
#is
# Fort Worth Women's Clothing
#This extracted data is then stored in firstLayer as objects
#SecondLayer stores the string firstLayer[0]
firstLayer = r.findall("[\D]+[\W]+",sales_lines)
secondLayer = firstLayer[0]
#The variable pattern is used to search for the city name
#1st Alternative \t[a-zA-Z]\s*[a-zA-Z]*\t
#\t matches a tab character (ASCII 9)
#Match a single character present in the list below [a-zA-Z]
#a-z a single character in the range between a (index 97) and z (index 122) (case sensitive)
#A-Z a single character in the range between A (index 65) and Z (index 90) (case sensitive)
#\s* matches any whitespace character (equal to [\r\n\t\f\v ])
#* Quantifier — Matches between zero and unlimited times, as many times as possible, giving back as needed (greedy)
#Match a single character present in the list below [a-zA-Z]*
#* Quantifier — Matches between zero and unlimited times, as many times as possible, giving back as needed (greedy)
#a-z a single character in the range between a (index 97) and z (index 122) (case sensitive)
#A-Z a single character in the range between A (index 65) and Z (index 90) (case sensitive)
#\t matches a tab character (ASCII 9)
#2nd Alternative \t[a-zA-Z]+\s*[a-zA-z]*\t
#\t matches a tab character (ASCII 9)
#Match a single character present in the list below [a-zA-Z]+
#+ Quantifier — Matches between one and unlimited times, as many times as possible, giving back as needed (greedy)
#a-z a single character in the range between a (index 97) and z (index 122) (case sensitive)
#A-Z a single character in the range between A (index 65) and Z (index 90) (case sensitive)
#\s* matches any whitespace character (equal to [\r\n\t\f\v ])
#* Quantifier — Matches between zero and unlimited times, as many times as possible, giving back as needed (greedy)
#Match a single character present in the list below [a-zA-z]*
#* Quantifier — Matches between zero and unlimited times, as many times as possible, giving back as needed (greedy)
#a-z a single character in the range between a (index 97) and z (index 122) (case sensitive)
#A-z a single character in the range between A (index 65) and z (index 122) (case sensitive)
#\t matches a tab character (ASCII 9)
#3rd Alternative \t\w+\.+\-*\s*\w*\t
#\t matches a tab character (ASCII 9)
#\w+ matches any word character (equal to [a-zA-Z0-9_])
#+ Quantifier — Matches between one and unlimited times, as many times as possible, giving back as needed (greedy)
#\.+ matches the character . literally (case sensitive)
#+ Quantifier — Matches between one and unlimited times, as many times as possible, giving back as needed (greedy)
#\-* matches the character - literally (case sensitive)
#* Quantifier — Matches between zero and unlimited times, as many times as possible, giving back as needed (greedy)
#\s* matches any whitespace character (equal to [\r\n\t\f\v ])
#* Quantifier — Matches between zero and unlimited times, as many times as possible, giving back as needed (greedy)
#\w* matches any word character (equal to [a-zA-Z0-9_])
#\t matches a tab character (ASCII 9)
#4th Alternative \t\w*\–*\w+\s?\w?\t
#\t matches a tab character (ASCII 9)
#\w* matches any word character (equal to [a-zA-Z0-9_])
#* Quantifier — Matches between zero and unlimited times, as many times as possible, giving back as needed (greedy)
#\–* matches the character – literally (case sensitive)
#* Quantifier — Matches between zero and unlimited times, as many times as possible, giving back as needed (greedy)
#\w+ matches any word character (equal to [a-zA-Z0-9_])
#+ Quantifier — Matches between one and unlimited times, as many times as possible, giving back as needed (greedy)
#\s? matches any whitespace character (equal to [\r\n\t\f\v ])
#\w? matches any word character (equal to [a-zA-Z0-9_])
#\t matches a tab character (ASCII 9)
#5th Alternative \t\w+\s*\w*\s*\w*\t
#\t matches a tab character (ASCII 9)
#\w+ matches any word character (equal to [a-zA-Z0-9_])
#+ Quantifier — Matches between one and unlimited times, as many times as possible, giving back as needed (greedy)
#\s* matches any whitespace character (equal to [\r\n\t\f\v ])
# * Quantifier — Matches between zero and unlimited times, as many times as possible, giving back as needed (greedy)
#\w* matches any word character (equal to [a-zA-Z0-9_])
# \s* matches any whitespace character (equal to [\r\n\t\f\v ])
#\w* matches any word character (equal to [a-zA-Z0-9_])
#\t matches a tab character (ASCII 9)
#The reason why this regex is so complex is because there are different types of wording of cities
#There are one word cities, Madison
#There are two words cities, New York
#There are special character word cities. St. Petersburg.txt and Winston–Salem.txt
#Hence, various regex are needed to extract out all these types of wordings
pattern = r"(\t[a-zA-Z]\s*[a-zA-Z]*\t|\t[a-zA-Z]+\s*[a-zA-z]*\t|\t\w+\.+\-*\s*\w*\t|\t\w*\–*\w+\s?\w?\t|\t\w+\s*\w*\s*\w*\t)"
#r.sub finds the secondLayer with the pattern
#it replaces the the said pattern with "\t" and pass the value to thirdLayer
#Hence, the value is changed from
# Fort Worth Women's Clothing
#to this
#\tWomen's Clothing\t
thirdLayer = r.sub(pattern,r"\t",secondLayer)
#Another sub function is used to replace "\t" with "" and pass to itemtype
#The value changed from
#\tWomen's Clothing\t
#to
#Women's Clothing
#Hence, the item's name is extracted from the sales data
itemType = r.sub("\t","",thirdLayer)
return itemType #returns item name
#extractSales is a function that uses regular expression(regex) to extract out sales figure
def extractSales(sales_lines):
#For demostration, I will be using this sales data
#2012-01-01 09:00 Fort Worth Women's Clothing 153.57 Visa
#the regex (\d+\.\d+|\t\d+\t)
#1st Alternative \d+\.\d+
#\d+ matches a digit (equal to [0-9])
#+ Quantifier — Matches between one and unlimited times, as many times as possible, giving back as needed (greedy)
#\. matches the character . literally (case sensitive)
#\d+ matches a digit (equal to [0-9])
#+ Quantifier — Matches between one and unlimited times, as many times as possible, giving back as needed (greedy)
#2nd Alternative \t\d+\t
#\t matches a tab character (ASCII 9)
#\d+ matches a digit (equal to [0-9])
#+ Quantifier — Matches between one and unlimited times, as many times as possible, giving back as needed (greedy)
#\t matches a tab character (ASCII 9)
#Hence, the data extracted from
#2012-01-01 09:00 Fort Worth Women's Clothing 153.57 Visa
#is 153.57 and is stored as object in firstLayer
firstLayer = r.findall("(\d+\.\d+|\t\d+\t)",sales_lines)
#strsales stores the content of firstLayer[0] which is a string data type
strsales = firstLayer[0]
sales = float(strsales) #float converts strsales to a float value and pass to sales
return sales# return sales
#findFIles is a function that serach for the text file and determine if it exists.
#If it doesn't exist, then the function will output a statement saying the file name is invalid
#If the file exist, then extraction of items and sales is started
def findFiles(city):
# keyword global helps to reference to the global variable and does not create a local variable
global totalItems
global totalSales
global sorted_list
global item_dict
totalSales = 0
item_dict = {}
sorted_list = []
totalItems = 0
messages = "" #store statement
city_exist = 0 #set city exist to 0
fileName = ""
citytext = city + ".txt" #append .txt to cityname e.g city=New York -> city + ".txt" : New York.txt
# use for loop to find the file to match with with citytext starting at the reports/ directory
# reprts/ is a directory that has all the city files that is made by a C program
# For this assignment, the reports location will be in /home/st2411/Misc
for root,dir,files in os.walk("/home/st2411/Misc/reports/"):
for file in files:
if file.endswith(citytext): #if file name matches
fileName = os.path.join(root, file)
#pass the location of the file to fileName
city_exist = 1 #set city_exist bit to 1 to indicate the city is valid
break#stop the for loop
# If city exist
if (city_exist == 1):
datafile = open(fileName,"r") #do a file operation to open and read the contents of the file
#perform a for loop operation to process a line of sales file until the END OF LINE
for i in datafile:
items = extractItems(i) #extract out the item's name
sales = extractSales(i) #extract out the item's sales figure
#This condition is only valid at the start of the processing when the total items in the dictonary is 0
if(totalItems == 0):
item_dict[items] = sales #store the key which is sales to the item in item_dict
totalItems += 1 #once added to dict, increment the number of items by 1
totalSales = sales #let the totalSales be the sales at the beginning
else:
if items in item_dict: #check if items is only in the item_dict
#if it does exist...
item_dict[items] = item_dict[items] + sales#increment sales figure from new data with the same item type to the existing sales data in the item dict
totalSales += sales#increment the new sales data to the main total sales
else:
#if it does not exist...
item_dict[items] = sales #store the key which is sales to the item in item_dict
totalItems += 1 #once added to dict, increment the number of items by 1
totalSales += sales #let the totalSales be the sales
sorted_list = sorting(item_dict) #sort the dict and return the sorted list
messages = statement() #call statement function and store the output into messages
datafile.close() #close the file after processing has been completed
return messages #return the message that will be displayed to client
else:
messages = "\nInvalid City Name. Please Try Again" #display message to client
return messages #return the message that will be displayed to client
def average_sales():
global totalItems
#average sales is calculated by the total sales divided by the number of items
mean_sales = totalSales/totalItems
return mean_sales
#if the file exist, this function will print to the client
#example
#Total sales from New York is 40326944.93
#The Average Sales From 18 Item Categories:
# 2240385.83
#
#Top Three Item Categories
#======================================================
#DVDs 2369572.76
#CDs 2303437.88
#Sporting Goods 2303015.72
#======================================================
#Bottom Three Item Categories
#======================================================
#Cameras 2179139.24
#Music 2135497.64
#Women's Clothing 2095923.28
#======================================================
def statement():
# keyword global helps to reference to the global variable and does not create a local variable
global totalItems
global totalSales
global sorted_list
length_of_list = len(sorted_list) #find the length of the sorted_list with len and store the length to length_of_list
statement = "Total sales from {} is {:.2f}".format(city,totalSales)
statement += "\nThe Average Sales From {} Item Categories:".format(totalItems)
statement += "\n {:.2f}\n".format(average_sales())
if(totalItems >= 3):
statement += "\nTop Three Item Categories"
statement += "\n======================================================"
else:
statement += "\nTop "
statement += str(totalItems)
statement += " Item Categories"
statement += "\n======================================================"
#For loop prints the top three item categoires
if(totalItems < 3):
for i in range(0,totalItems):
#Here is a visualisation of a list
# [("New York","500.0"),("San Jose","300"),...]
# sorted_list[i][0] is the item
# sorted_list[i][1] is the amount
item = sorted_list[i][0]
amount = sorted_list[i][1]
statement += "\n{}".format(item) + statementSpacing(item) +"{:.2f}".format(amount)
statement += "\n======================================================"
else:
for i in range(0,3):
#Here is a visualisation of a list
# [("New York","500.0"),("San Jose","300"),...]
# sorted_list[i][0] is the item
# sorted_list[i][1] is the amount
item = sorted_list[i][0]
amount = sorted_list[i][1]
statement += "\n{}".format(item) + statementSpacing(item) +"{:.2f}".format(amount)
statement += "\n======================================================"
if(totalItems >= 6):
statement += "\nBottom Three Item Categories"
statement += "\n======================================================"
#For loop prints the bottom three item categoires
for i in range(length_of_list-3,length_of_list):
#Here is a visualisation of a list
# [("New York","500.0"),("San Jose","300"),...]
# sorted_list[i][0] is the item
# sorted_list[i][1] is the amount
item = sorted_list[i][0]
amount = sorted_list[i][1]
statement += "\n{}".format(item) + statementSpacing(item) +"{:.2f}".format(amount)
statement += "\n======================================================"
return statement
output = findFiles(city) #message is passed to output
return output
def handler(con):
while True:
buf = con.recv(5000) # buf is of the type of byte
if len(buf) > 0:
print("USER INPUT:",buf.decode()) # decode with system default encoding scheme
if buf == b"q" or buf == b"x":
break
else:
#decode client's input
cityName = buf.decode()
#process the input and return a statement
start_time_stamp = d.datetime.now()
report = "\n"
report += start_time_stamp.strftime("%a %b %d %H:%M:%S")
report +="\n"
report += GenerateReport(cityName)#generate data
report +="\n\n"
end_time_stamp = d.datetime.now()
report += end_time_stamp.strftime("%a %b %d %H:%M:%S")
report += "\nSee You Again"
#encode the statement and send it back to the client
buf = report.encode()
# echo back the same byte sequence to client
con.sendall(buf)
else: # 0 length buf implies client has dropped the con.
return "" # quit this handler immediately and return ""
con.close() #exit from the loop when client sent q or x
return buf.decode()
# MAIN PROGRAM STARTS HERE
serversocket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
serversocket.bind(('0.0.0.0', 8089))
serversocket.listen(5) # become a server socket, maximum 5 connections