-
Notifications
You must be signed in to change notification settings - Fork 2
/
Text_Gen.py
103 lines (78 loc) · 2.69 KB
/
Text_Gen.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
#!/usr/bin/env python
# coding: utf-8
# In[ ]:
from IPython.display import display, Javascript
from base64 import b64decode
import cv2
import requests
from PIL import Image
from io import BytesIO
import numpy as np
import time
import os
cam = cv2.VideoCapture(0)
cv2.namedWindow('Photo')
subscription_key = "1403fda12aed4df293653a40299e476d"
assert subscription_key
def analyzeImg(image_data):
vision_base_url = "https://westcentralus.api.cognitive.microsoft.com/vision/v2.0/"
analyze_url = vision_base_url + "analyze"
headers = {'Ocp-Apim-Subscription-Key': subscription_key,
'Content-Type': 'application/octet-stream'}
params = {'visualFeatures': 'Categories,Description,Color'}
response = requests.post(
analyze_url, headers=headers, params=params, data=image_data)
response.raise_for_status()
# The 'analysis' object contains various fields that describe the image. The most
# relevant caption for the image is obtained from the 'description' property.
analysis = response.json()
# print(analysis)
image_caption = analysis["description"]["captions"][0]["text"].capitalize()
return image_caption
# return analysis
def OCRImg(image_data):
vision_base_url = "https://westcentralus.api.cognitive.microsoft.com/vision/v2.0/"
ocr_url = vision_base_url + "ocr"
headers = {'Ocp-Apim-Subscription-Key': subscription_key,
'Content-Type': 'application/octet-stream'}
params = {'language': 'unk', 'detectOrientation': 'true'}
response = requests.post(ocr_url, headers=headers, params=params, data=image_data)
response.raise_for_status()
analysis = response.json()
text_data = []
for region in analysis["regions"]:
for lines in region["lines"]:
for word in lines["words"]:
text_data.append(word["text"])
return text_data
img_counter = 0
while True:
ret, frame = cam.read()
cv2.imshow("feelYourWay", frame)
if not ret:
break
k = cv2.waitKey(30)
if k%256 == 27:
# ESC pressed
# print("Escape hit, closing...")
break
elif k%256 == 32:
# SPACE pressed
# print("Analyzing the frame")
im = Image.fromarray(frame)
imgByteArr = BytesIO()
im.save(imgByteArr, format='PNG')
imgByteArr = imgByteArr.getvalue()
#caption = analyzeImg(imgByteArr)
#print("You're seeing: ", end='')
#print(caption)
ocr = OCRImg(imgByteArr)
# print("Texts around you are: ", end='')
print(" ".join(ocr))
# print(ocr)
time.sleep(5)
break
#img_counter += 1
cam.release()
cv2.destroyAllWindows()
# In[ ]: