diff --git a/Basic NLP for Resume Documents.ipynb b/Basic NLP for Resume Documents.ipynb deleted file mode 100644 index d66f451..0000000 --- a/Basic NLP for Resume Documents.ipynb +++ /dev/null @@ -1,980 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# NLP Understanding of Resumes\n", - "- Code basicially first converts the PDF to a Image\n", - "- Then using google cloud vision API, it converts that image to text\n", - " - Using my personal JSON Code\n", - "- " - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "ename": "SyntaxError", - "evalue": "invalid syntax (__init__.py, line 3)", - "output_type": "error", - "traceback": [ - "Traceback \u001b[1;36m(most recent call last)\u001b[0m:\n", - " File \u001b[0;32m\"C:\\Users\\kunal\\Anaconda3\\lib\\site-packages\\IPython\\core\\interactiveshell.py\"\u001b[0m, line \u001b[0;32m3326\u001b[0m, in \u001b[0;35mrun_code\u001b[0m\n exec(code_obj, self.user_global_ns, self.user_ns)\n", - "\u001b[1;36m File \u001b[1;32m\"\"\u001b[1;36m, line \u001b[1;32m1\u001b[1;36m, in \u001b[1;35m\u001b[1;36m\u001b[0m\n\u001b[1;33m from firebase import firebase\u001b[0m\n", - "\u001b[1;36m File \u001b[1;32m\"C:\\Users\\kunal\\Anaconda3\\lib\\site-packages\\firebase\\__init__.py\"\u001b[1;36m, line \u001b[1;32m3\u001b[0m\n\u001b[1;33m from .async import process_pool\u001b[0m\n\u001b[1;37m ^\u001b[0m\n\u001b[1;31mSyntaxError\u001b[0m\u001b[1;31m:\u001b[0m invalid syntax\n" - ] - } - ], - "source": [ - "from firebase import firebase" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 30, - "metadata": {}, - "outputs": [], - "source": [ - "def path_leaf(path):\n", - " head, tail = ntpath.split(path)\n", - " return tail or ntpath.basename(head)" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'Document_402.jpg'" - ] - }, - "execution_count": 31, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "path_leaf(pathString)" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [], - "source": [ - "pathString = '/Users/kunal/Documents/ResumeNLPVdart/Testing_Delete/Document_402.jpg'" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "/\n", - "U\n", - "s\n", - "e\n", - "r\n", - "s\n", - "/\n", - "k\n", - "u\n", - "n\n", - "a\n", - "l\n", - "/\n", - "D\n", - "o\n", - "c\n", - "u\n", - "m\n", - "e\n", - "n\n", - "t\n", - "s\n", - "/\n", - "R\n", - "e\n", - "s\n", - "u\n", - "m\n", - "e\n", - "N\n", - "L\n", - "P\n", - "V\n", - "d\n", - "a\n", - "r\n", - "t\n", - "/\n", - "T\n", - "e\n", - "s\n", - "t\n", - "i\n", - "n\n", - "g\n", - "_\n", - "D\n", - "e\n", - "l\n", - "e\n", - "t\n", - "e\n", - "/\n", - "D\n", - "o\n", - "c\n", - "u\n", - "m\n", - "e\n", - "n\n", - "t\n", - "_\n", - "4\n", - "0\n", - "2\n", - ".\n", - "j\n", - "p\n", - "g\n" - ] - } - ], - "source": [ - "for i in pathString:\n", - " print(i)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Import libaries" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Convert PDF to Image" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [], - "source": [ - "def convert_pdf_2_image(uploaded_image_path, uploaded_image):\n", - " project_dir = os.getcwd()\n", - " os.chdir(uploaded_image_path)\n", - " file_name = str(uploaded_image).replace('.pdf','')\n", - " output_file = file_name+'.jpg'\n", - " pages = convert_from_path(uploaded_image, 200,poppler_path='/Users/kunal/Documents/VdartWorking/Poppler/poppler-0.68.0_x86/poppler-0.68.0/bin/')\n", - " for page in pages:\n", - " page.save(output_file, 'JPEG')\n", - " break\n", - " #os.chdir(project_dir)\n", - " #img = Image.open(output_file)\n", - " #img = img.resize(img_size, PIL.Image.ANTIALIAS)\n", - " #img.save(output_file)\n", - " return output_file" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "data": { - "text/plain": [ - "'Document_402.jpg'" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "convert_pdf_2_image('/Users/kunal/Documents/ResumeNLPVdart/Testing_Delete/', \"Document_402.pdf\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Convert Image to Text" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [], - "source": [ - "keyDIR = \"/Users/kunal/Documents/VdartWorking/GOOGLEAPI/vdartrealfakevision-0f30bdc03946.json\"\n", - "credentials = service_account.Credentials.from_service_account_file(keyDIR)\n", - "client = vision.ImageAnnotatorClient(credentials=credentials)\n", - "with io.open('/Users/kunal/Documents/ResumeNLPVdart/Testing_Delete/Document_402.jpg', 'rb') as image_file:\n", - " content = image_file.read()\n", - "image = vision.types.Image(content=content)\n", - "response = client.text_detection(image=image)\n", - "texts = response.text_annotations\n", - "totalString = ''\n", - "for text in texts:\n", - " totalString+=text.description\n", - "totalString = totalString.rsplit(' ', 1)[0]\n" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "\"NISARGA HASSAN SREEDHAR\\nSan Jose, California |+1 (925) 789-8911| nisarga.nishu20@gmail.com | www.linkedin.com/in/nisarga-sreedhar-39938516b\\nEDUCATION:\\nMaster's in Electrical Engineering (Computer Networking), San Jose State University, California, USA.\\nCoursework: Internetworking, Broadband communications, Network Security, Internet of Things (IoT), Voice over IP\\nBachelor of Engineering in Telecommunication Engineering, Dayananda Sagar College of Engineering, Visvesvaraya\\nTechnological University, India\\nMay 2020\\nJune 2017\\nTECHNICAL SKILLS:\\nNetwork technologies: HTTP, DNS, DHCP, HTTPS, TLS-SSL, TCP/IP, UDP, IPV4, IPV6, ICMP, OSPF, BGP, ARP, VLAN, STP,\\nSIP, IPS, IDS, NAT, IS-IS, 802.11, MPLS, WPA2, WPA3, Packet level troubleshooting\\nProgramming: Python\\nOS Platform: Linux (Ubuntu, CentOS), Kali Linux, Cisco IOS\\nTools and IDE: Advanced Design System (ADS), Wireshark, VMware Workstation, VirtualBox, GNS3, Cisco Packet Tracer, PUTTY\\nCERTIFICATION:\\nCisco Certified Network Associate (CCNA) 200-301\\nAWS Certified Cloud Practitioner (CLF-C01)\\n(In Progress)\\n(In progress)\\nEXPERIENCE:\\nJune 2019 - July 2019\\nMarmon Food & Beverages Technologies, Cornelius, India\\nNetwork Engineer Intern\\nPython based Serial Communication (IoT)\\nUsed an Iot Dongle to read a file, convert it into a packet by adding header and footer and transmit serially.\\nPython code was written to send the file from dongle to Food Holding Bin.\\nACADEMIC PROJECTS:\\nSecure routing in IoT networks\\nAug 2019 - current\\nDesign and configure an IoT based network using Cisco Packet Tracer.\\nPerform a Man in the Middle attack to one of the devices using Kali Linux.\\nDetection of the attack and solution to the problem faced.\\nIllumino: IoT Smart Light\\nAug 2019 - Dec 2019\\nCreate a hardware of an IoT smart light using Arduino ESP8266 and Cayenne IoT Platform.\\nDesigned to operate in three modes: Auto mode, Lamp mode, Security mode.\\nUse of Cayenne web application to detect temperature and provides a siren at thresholds.\\nVoice over IP for Wireless Ad Hoc Networks (WANET)\\nAug 2019 - Dec 2019\\nSimple Call Establishment between two clients in a WANET that have registered with the Asterisk server.\\nCall on Hold with one user client to attend another client.\\nCall Conferencing between all three clients, all performed using X-Lite softphone software.\\nExperiencing Virtualization using Virtual Box\\nJan 2019 - April 2019\\nWorked on Open vSwitch in Virtual Box on an Ubuntu machine to run ovs and its versions successfully.\\nDemonstrated how the VLANS are implemented, three VMs and one virtual switch is created.\\nAttempted to communicate between the VMs and observed the PING result.\\nCorporate Company Network Design\\nAug 2018 - Dec 2018\\n• Designed and implemented a basic corporate network topology for the interconnection between offices with\\nswitches, routers, and hosts.\\n• Implemented the design using routing protocols such as OSPF, BGP, DNS, VLAN, STP, IP, DHCP and HSRP.\\nTested and troubleshot configurations in the console to check the communication between the networks.\\nDesign of X-Band 8PSK Modulator using ADS\\nJan 2017 - April 2017\\nDesigned various components of the modulator used in a satellite at ISRO (Indian Space Research Organization), Bangalore.\\nPerformed optimization of the components at 8.75GHZ frequency using the tools available in ADS to obtain the desired results of\\nInsertion loss, Return loss and Isolation\"" - ] - }, - "execution_count": 22, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "totalString" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def convert_image_to_text():\n", - " keyDIR = \"/Users/kunal/Documents/VdartWorking/GOOGLEAPI/vdartrealfakevision-0f30bdc03946.json\"\n", - " credentials = service_account.Credentials.from_service_account_file(keyDIR)\n", - " client = vision.ImageAnnotatorClient(credentials=credentials)\n", - "\n", - " with io.open(MAINIMAGEFILEPNG, 'rb') as image_file:\n", - " content = image_file.read()\n", - "\n", - " image = vision.types.Image(content=content)\n", - " response = client.document_text_detection(image=image)\n", - " textDocument = []\n", - " blockConfid = []\n", - " paraConfid = []\n", - " wordConfid = []\n", - " for page in response.full_text_annotation.pages:\n", - " for block in page.blocks:\n", - " for paragraph in block.paragraphs:\n", - " for word in paragraph.words:\n", - " word_text = ''.join([symbol.text for symbol in word.symbols])\n", - " textDocument.append(word_text)\n", - " blockConfid.append(block.confidence)\n", - " paraConfid.append(paragraph.confidence)\n", - " wordConfid.append(word.confidence)\n", - "\n", - " if response.error.message:\n", - " raise Exception(\n", - " '{}\\nFor more info on error messages, check: '\n", - " 'https://cloud.google.com/apis/design/errors'.format(\n", - " response.error.message))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Basic NLP Testing" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "text = \"\"\"NISARGA HASSAN SREEDHAR\n", - "San Jose, California | +1 (925) 789-8911| nisarga.nishu20@gmail.com | www.linkedin.com/in/nisarga-sreedhar-39938516b\n", - "EDUCATION:\n", - "Master’s in Electrical Engineering (Computer Networking), San Jose State University, California, USA. May 2020\n", - "Coursework: Internetworking, Broadband communications, Network Security, Internet of Things (IoT), Voice over IP\n", - "Bachelor of Engineering in Telecommunication Engineering, Dayananda Sagar College of Engineering, Visvesvaraya\n", - "Technological University, India June 2017\n", - "TECHNICAL SKILLS:\n", - "Network technologies: HTTP, DNS, DHCP, HTTPS, TLS-SSL, TCP/IP, UDP, IPv4, IPv6, ICMP, OSPF, BGP, ARP, VLAN, STP,\n", - "SIP, IPS, IDS, NAT, IS-IS, 802.11, MPLS, WPA2, WPA3, Packet level troubleshooting\n", - "Programming: Python\n", - "OS Platform: Linux (Ubuntu, CentOS), Kali Linux, Cisco IOS\n", - "Tools and IDE: Advanced Design System (ADS), Wireshark, VMware Workstation, VirtualBox, GNS3, Cisco Packet Tracer, PuTTY\n", - "CERTIFICATION:\n", - "• Cisco Certified Network Associate (CCNA) 200-301 (In Progress)\n", - "• AWS Certified Cloud Practitioner (CLF-C01) (In progress)\n", - "EXPERIENCE:\n", - "Marmon Food & Beverages Technologies, Cornelius, India June 2019 - July 2019\n", - "Network Engineer Intern\n", - "• Python based Serial Communication (IoT)\n", - "• Used an Iot Dongle to read a file, convert it into a packet by adding header and footer and transmit serially.\n", - "• Python code was written to send the file from dongle to Food Holding Bin.\n", - "ACADEMIC PROJECTS:\n", - "Secure routing in IoT networks Aug 2019 - current\n", - "• Design and configure an IoT based network using Cisco Packet Tracer.\n", - "• Perform a Man in the Middle attack to one of the devices using Kali Linux.\n", - "• Detection of the attack and solution to the problem faced.\n", - "Illumino: IoT Smart Light Aug 2019 - Dec 2019\n", - "• Create a hardware of an IoT smart light using Arduino ESP8266 and Cayenne IoT Platform.\n", - "• Designed to operate in three modes: Auto mode, Lamp mode, Security mode.\n", - "• Use of Cayenne web application to detect temperature and provides a siren at thresholds.\n", - "Voice over IP for Wireless Ad Hoc Networks (WANET) Aug 2019 - Dec 2019\n", - "• Simple Call Establishment between two clients in a WANET that have registered with the Asterisk server.\n", - "• Call on Hold with one user client to attend another client.\n", - "• Call Conferencing between all three clients, all performed using X-Lite softphone software.\n", - "Experiencing Virtualization using Virtual Box Jan 2019 - April 2019\n", - "• Worked on Open vSwitch in Virtual Box on an Ubuntu machine to run ovs and its versions successfully.\n", - "• Demonstrated how the VLANs are implemented, three VMs and one virtual switch is created.\n", - "• Attempted to communicate between the VMs and observed the PING result.\n", - "Corporate Company Network Design Aug 2018 - Dec 2018\n", - "• Designed and implemented a basic corporate network topology for the interconnection between offices with\n", - "switches, routers, and hosts.\n", - "• Implemented the design using routing protocols such as OSPF, BGP, DNS, VLAN, STP, IP, DHCP and HSRP.\n", - "• Tested and troubleshot configurations in the console to check the communication between the networks.\n", - "Design of X-Band 8PSK Modulator using ADS Jan 2017 - April 2017\n", - "• Designed various components of the modulator used in a satellite at ISRO (Indian Space Research Organization), Bangalore.\n", - "• Performed optimization of the components at 8.75GHz frequency using the tools available in ADS to obtain the desired results of\n", - "Insertion loss, Return loss and Isolation loss.\n", - "\"\"\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "tokens = [t for t in text.split()]\n", - "\n", - "freq = nltk.FreqDist(tokens)\n", - "\n", - "for key,val in freq.items():\n", - "\n", - " print (str(key) + ':' + str(val))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "freq.plot(20, cumulative=False)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from nltk.corpus import stopwords" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "clean_tokens = tokens[:]\n", - "sr = stopwords.words('english')\n", - "for token in tokens:\n", - " if token in stopwords.words('english'):\n", - " clean_tokens.remove(token)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "freq.plot(20,cumulative=False)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from nltk.tokenize import sent_tokenize" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(sent_tokenize(text))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from nltk.tokenize import word_tokenize" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(word_tokenize(text))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from nltk.stem import PorterStemmer\n", - "from nltk.stem import WordNetLemmatizer" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "stemmer = PorterStemmer()\n", - "print(stemmer.stem('working'))\n", - "lemmatizer = WordNetLemmatizer()\n", - "print(lemmatizer.lemmatize('increases'))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "gension nltk spacy" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "mongodb" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "ename": "ImportError", - "evalue": "preshed.maps does not export expected C function map_clear", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mImportError\u001b[0m Traceback (most recent call last)", - "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[1;32mimport\u001b[0m \u001b[0mspacy\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[0mnlp\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mspacy\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mload\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"en_core_web_sm\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4\u001b[0m \u001b[0mdoc\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnlp\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"Apple is looking at buying U.K. startup for $1 billion\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", - "\u001b[1;32m~\\AppData\\Roaming\\Python\\Python37\\site-packages\\spacy\\__init__.py\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 10\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0mthinc\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mneural\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mutil\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mprefer_gpu\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mrequire_gpu\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 11\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 12\u001b[1;33m \u001b[1;32mfrom\u001b[0m \u001b[1;33m.\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mpipeline\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 13\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[1;33m.\u001b[0m\u001b[0mcli\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0minfo\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0minfo\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0mcli_info\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 14\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[1;33m.\u001b[0m\u001b[0mglossary\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mexplain\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", - "\u001b[1;32m~\\AppData\\Roaming\\Python\\Python37\\site-packages\\spacy\\pipeline\\__init__.py\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0m__future__\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0municode_literals\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 4\u001b[1;33m \u001b[1;32mfrom\u001b[0m \u001b[1;33m.\u001b[0m\u001b[0mpipes\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mTagger\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mDependencyParser\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mEntityRecognizer\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mEntityLinker\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 5\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[1;33m.\u001b[0m\u001b[0mpipes\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mTextCategorizer\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mTensorizer\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mPipe\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mSentencizer\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 6\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[1;33m.\u001b[0m\u001b[0mmorphologizer\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mMorphologizer\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", - "\u001b[1;32mpipes.pyx\u001b[0m in \u001b[0;36minit spacy.pipeline.pipes\u001b[1;34m()\u001b[0m\n", - "\u001b[1;32m~\\AppData\\Roaming\\Python\\Python37\\site-packages\\spacy\\pipeline\\functions.py\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[1;33m.\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mlanguage\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mcomponent\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 5\u001b[1;33m \u001b[1;32mfrom\u001b[0m \u001b[1;33m.\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmatcher\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mMatcher\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 6\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[1;33m.\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mutil\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mfilter_spans\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 7\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", - "\u001b[1;32m~\\AppData\\Roaming\\Python\\Python37\\site-packages\\spacy\\matcher\\__init__.py\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[1;33m.\u001b[0m\u001b[0mmatcher\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mMatcher\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 5\u001b[1;33m \u001b[1;32mfrom\u001b[0m \u001b[1;33m.\u001b[0m\u001b[0mphrasematcher\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mPhraseMatcher\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 6\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[1;33m.\u001b[0m\u001b[0mdependencymatcher\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mDependencyMatcher\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 7\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", - "\u001b[1;32mphrasematcher.pyx\u001b[0m in \u001b[0;36minit spacy.matcher.phrasematcher\u001b[1;34m()\u001b[0m\n", - "\u001b[1;31mImportError\u001b[0m: preshed.maps does not export expected C function map_clear" - ] - } - ], - "source": [ - "import spacy\n", - "\n", - "nlp = spacy.load(\"en_core_web_sm\")\n", - "doc = nlp(\"Apple is looking at buying U.K. startup for $1 billion\")\n", - "\n", - "for token in doc:\n", - " print(token.text, token.lemma_, token.pos_, token.tag_, token.dep_,\n", - " token.shape_, token.is_alpha, token.is_stop)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 35, - "metadata": {}, - "outputs": [], - "source": [ - "import nltk\n", - "from nltk.corpus import stopwords\n", - "stop = stopwords.words('english')" - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "metadata": {}, - "outputs": [ - { - "ename": "NameError", - "evalue": "name 'document' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", - "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mdocument\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;34m' '\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mjoin\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mi\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mdocument\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mi\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mstop\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2\u001b[0m \u001b[0msentences\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnltk\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msent_tokenize\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdocument\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", - "\u001b[1;31mNameError\u001b[0m: name 'document' is not defined" - ] - } - ], - "source": [ - "document = ' '.join([i for i in document.split() if i not in stop])\n", - "sentences = nltk.sent_tokenize(document)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "sentences = [nltk.word_tokenize(sent) for sent in sentences]" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[nltk_data] Downloading package averaged_perceptron_tagger to\n", - "[nltk_data] C:\\Users\\kunal\\AppData\\Roaming\\nltk_data...\n", - "[nltk_data] Package averaged_perceptron_tagger is already up-to-\n", - "[nltk_data] date!\n" - ] - }, - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 34, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "nltk.download('averaged_perceptron_tagger')" - ] - }, - { - "cell_type": "code", - "execution_count": 36, - "metadata": {}, - "outputs": [ - { - "ename": "NameError", - "evalue": "name 'sentences' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", - "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0msentences\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m[\u001b[0m\u001b[0mnltk\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpos_tag\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msent\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0msent\u001b[0m \u001b[1;32min\u001b[0m \u001b[0msentences\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[1;31mNameError\u001b[0m: name 'sentences' is not defined" - ] - } - ], - "source": [ - "sentences = [nltk.pos_tag(sent) for sent in sentences]" - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "metadata": {}, - "outputs": [], - "source": [ - "import re\n", - "import nltk\n", - "from nltk.corpus import stopwords\n", - "stop = stopwords.words('english')\n", - "\n", - "string = \"\"\"\n", - "NISARGA HASSAN SREEDHAR\n", - "San Jose, California | +1 (925) 789-8911| nisarga.nishu20@gmail.com | www.linkedin.com/in/nisarga-sreedhar-39938516b\n", - "\"\"\"\n", - "\n", - "def extract_phone_numbers(string):\n", - " r = re.compile(r'(\\d{3}[-\\.\\s]??\\d{3}[-\\.\\s]??\\d{4}|\\(\\d{3}\\)\\s*\\d{3}[-\\.\\s]??\\d{4}|\\d{3}[-\\.\\s]??\\d{4})')\n", - " phone_numbers = r.findall(string)\n", - " return [re.sub(r'\\D', '', number) for number in phone_numbers]\n", - "\n", - "def extract_email_addresses(string):\n", - " r = re.compile(r'[\\w\\.-]+@[\\w\\.-]+')\n", - " return r.findall(string)\n", - "\n", - "def ie_preprocess(document):\n", - " document = ' '.join([i for i in document.split() if i not in stop])\n", - " sentences = nltk.sent_tokenize(document)\n", - " sentences = [nltk.word_tokenize(sent) for sent in sentences]\n", - " sentences = [nltk.pos_tag(sent) for sent in sentences]\n", - " return sentences\n", - "\n", - "def extract_names(document):\n", - " names = []\n", - " sentences = ie_preprocess(document)\n", - " for tagged_sentence in sentences:\n", - " for chunk in nltk.ne_chunk(tagged_sentence):\n", - " if type(chunk) == nltk.tree.Tree:\n", - " if chunk.label() == 'PERSON':\n", - " names.append(' '.join([c[0] for c in chunk]))\n", - " return names\n" - ] - }, - { - "cell_type": "code", - "execution_count": 38, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[nltk_data] Downloading package maxent_ne_chunker to\n", - "[nltk_data] C:\\Users\\kunal\\AppData\\Roaming\\nltk_data...\n", - "[nltk_data] Package maxent_ne_chunker is already up-to-date!\n", - "[nltk_data] Downloading package words to\n", - "[nltk_data] C:\\Users\\kunal\\AppData\\Roaming\\nltk_data...\n", - "[nltk_data] Package words is already up-to-date!\n" - ] - }, - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 38, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "nltk.download('maxent_ne_chunker')\n", - "nltk.download('words')" - ] - }, - { - "cell_type": "code", - "execution_count": 39, - "metadata": {}, - "outputs": [], - "source": [ - "numbers = extract_phone_numbers(string)\n", - "emails = extract_email_addresses(string)\n", - "names = extract_names(string)" - ] - }, - { - "cell_type": "code", - "execution_count": 40, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['9257898911', '3993851']" - ] - }, - "execution_count": 40, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "numbers" - ] - }, - { - "cell_type": "code", - "execution_count": 41, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['nisarga.nishu20@gmail.com']" - ] - }, - "execution_count": 41, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "emails" - ] - }, - { - "cell_type": "code", - "execution_count": 42, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['San Jose']" - ] - }, - "execution_count": 42, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "names" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.4" - }, - "toc": { - "base_numbering": 1, - "nav_menu": {}, - "number_sections": true, - "sideBar": true, - "skip_h1_title": false, - "title_cell": "Table of Contents", - "title_sidebar": "Contents", - "toc_cell": false, - "toc_position": { - "height": "calc(100% - 180px)", - "left": "10px", - "top": "150px", - "width": "349.091px" - }, - "toc_section_display": true, - "toc_window_display": true - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/Naive Bayes Classifier.ipynb b/Naive Bayes Classifier.ipynb deleted file mode 100644 index acb6103..0000000 --- a/Naive Bayes Classifier.ipynb +++ /dev/null @@ -1,356 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "from IPython.display import Image" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Naive Bayes Classifier\n", - "By Kunal Aneja" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### What is it \n", - "**A theorem that uses probability to calculate a specific probability**\n", - "\n", - "- P(A|B) --> conditional probability of A with respect to B" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAvQAAAGtCAIAAAD213fjAAAQ3klEQVR42uzdLXNTWQPA8YSpRa2pWNmuwuFC+gFazCocgyuSmg5mJeaZNSCJY+pQGOADlI3DMY/YVq6oWdUPkB0SWiBNbnPfz8vvp3Z2WUjuPfecf869DVuz2WwAAJCKOw4BACBuAADEDQCAuAEAEDcAgLgBABA3AADiBgBA3AAAiBsAQNwAAIgbAABxAwAgbgAAxA0AIG4AAMQNAIC4AQAQNwAA4gYAEDcAAOIGAEDcAACIGwAAcQMAiBsAAHEDACBuAADEDQCAuAEAxA0AgLgBABA3AADiBgBA3AAA4gYAQNwAAIgbAABxAwCIGwAAcQMAIG4AAMQNAIC4AQDEDQCAuAEAEDcAAOIGAEDcAADiBgBA3AAAiBsAAHEDACBuAABxAwAgbgAAxA0AgLgBABA3AIC4AQAQNwAA4gYAoAFbsb+B4XDoLAJAj2azWVCvx84NAJAUcQMAiBsAAHEDANCBrcTez+npqZMKAK3a29sL+eXZuQEAkiJuAABxAwAgbgAAxA0AgLgBAMQNAIC4AQAQNwAA4gYAQNwAAOIGAEDcAACIGwAAcQMAIG4AAHEDACBuAADEDQCAuAEAEDcAgLgBABA3AADiBgBA3AAAiBsAQNwAAIgbAABxAwAgbgAAxA0AIG4AAMQNAIC4AQAQNwAA4gYAEDcAAOIGAEDcAACIGwBA3AAApGPLIQDgpu3Lk92DyfK/HY1G9/54c7x/cfHJISJYdm5oxfjy5PnewsnleOyAQHz+/rLiX06n08nB7u6Dz9uua8QNmfn4bjL99o+Tdx8dD4jW6OXZ7LuzD4ejeeMcPXm17XML4oZ8jMeX7yaDweDwcD4NTl6YBCERO/uv37xc5M3b9+eOB+KGfHxctM3vr48fjUyCkFjfPHy0qJv/nzkYiBuya5v9q1lQ3QAgbojWePvzi6u2GfxQN+5MQRrO37+dP1G3uMZB3JDPxHc1713VzdGfHiuGFK7wV0+OpvPnjI/vfvLT4IgbMjAeb//cNt/vz/uhKYjS9OjJgyvD4XD3a9qMXp797/6FY4O4IZNPdctt82Pd+MIbiDJvrn0Pnt3nJ5fuNSNuyMLHP4+mN+7F27uBiP38PTezs7MP8x8Fn04Odp+6pBE3pO7q621uPGeobiAZOzv7z/46W3zVzeTg5NLmDSHyd0vRnMWPgA8GkxcPvrz4+T9Nv/2Hz8d/3fdX0kDshfPw0ehoOh0Mvvx9PrjveCBuSL9t5rfoV/+S6dv35/dNhRB93fx27+ozC4gbkjUeXz7d+/oPhx9mj++u2Ju5PNk7mMy/8ObZsws/QQpJfJS599vOYODHpgiOZ25oxvmrFyuft7m2//vhwJcVQ/zX+vmrpw8O5pf76OWx7/EjSHZuaMB4vP3q+bfvLF37vV5f62YymX+d3+njuw4aadvb2+vmDzo9PW33D5ge7Q6PVvz70eGHN3ZhETek/Fnux+9jXz3Zffp0d1E3g8m7y9evfbcpkqXLl1SlgdY8VzMaHT764/jh/o6yQdyQedsMvu/d+AEL5Etg72tl+lzcfXx6+njN/3Fx8cmjNoRrOJvN4n4Dw2HdTyeAiGHj6IGbV1BoLWHnBpAybHrc5A5REDeAlGlYl59il3av5Q6IG0DKhJgsbbyqljJI7iBuIKO10BQf40mM/THEam+t2e758ey4ChA3EEGyNP6SzP69nOKEI6bBo1E/epbOmtGOuIEICqbVN5X5StDg6ZYyTR23mrljUwdxg1XNcdjLMHrqn30pE0XuXJ9olYO4wWJGgo9w1hwDaiac3KnQOrZzEDdImVq6nDq7fKeR5k7lQ6RmUm0d2zk0wjcUk8gaH/Wp7+AQhXN8BE2+603VG1gm9vBnrdAuT3FDTKt1buc3pcNY7b0IGqFjhhc34sbQT2QZdh6TOdoVXqqgETqmCHEjbsRN9KussxZO7jR4Lsq+JE1D2coxdYgbcUMo66hzFEXrVD5Npf5oQUMjoWNWETfihh7WS+cl+R7VNKgccSNuxE3ii6ITkc9p3fx30zR0UznmH3Ejbmhs8XPwszrXmgaVY+oQN+ImzXXOARc6mobAK8c0lWHc+IZiNA3Lp9VfhkAUrkdaceUsxrNZKyviRtMIGorOtZ/oJpbK2SRxTGXiBlljIjBUSm/hLBYYiUOYiWMjR9ygaTBaKrpeYFQOvSSOe1XiBguVpqHcgPn3153BYPDLP+cbVo7Eoa/KuTVxTHrihmSzxuVNqaxZ+udbK0fiEGzi2MIRN2gaZE3RfyquHPeq6DdxCipH4ogbZA2ypuhX2sgh5MqxiyNuSC1rXLS0lDUr/69NNnIkDt33jRtV4gZZQ47DplrWrPxNJA6RJo5pU9wga5A1EoekEscWjrghuCXKBUnZYdN41qz8zQsqR+IgcRA31idZQwRZs/LPKk4cfUOAiWNqFTf0kzXKhgrDpsuy2TBxbOHQY+LYwhE3BLREueQoO2x6yRqJQ/h9YwtH3CBrUDZ1E8ddKiJKHH0jbpA1BDd4wsmapZdkC4cAE0ffiBu6KxvXFWlkTanE0Tf00jcrt3A8giNukDUomwYSxxYOPSaOLZyI3HEIlA3KJtjEWangW2Wh1b4pO2MjbrjlOjmdc3zIoWyuX/a6V65v6KtvViaOvhE3FK1MBWXj+FChbAr6IKLEWdc3Eoe+EkffBM4zN6F/5pY11CmbNN5d8VM4HsGhl77xiHHI7NwoG5RNTImzsm+MAXrpm1LzOeLGyqRsUDbl+kbioG8QN0GvTJ4dRtkU9I0tHPQN4ibcZWld2Tg4KJtbE0ffEE7feMRY3DBwKwplo29IL3H0jbixLCkblE0DfbPyXesbwukbxE2+y5KHbFA2dRJH3xBs39i8ETf5lo0jg7LRN+gbxI2ywRBSNvqGiC9exE2alA1trOiOBvTLw8XiJt9sVzbUHELW8k36xuYN4fQN4kbZgLLRNyTVNzZvxE3KyxIYQh3TN+gbcUO7bNtQk22bCsdH34C4oa3P3MqGmkNI2eg/ImLzRtwoG7Bmt3isbN4QSN8gbtKhbKjfx+gbXNqIG2OXlJdqIAo2b8RNmmVj2wZ9HEgR2rzBBS5ugEAXaSAiNm/ETWo9btsGn+qC6kKbN7jMxQ0Q3PKMA0h0bN6Im3RK3LYNBMjmDYgbKlI21E9kuw6NcBgJwdLmjTtT4gagSTZvQNxQ+gM3EBSbNyBuqMs9KSQysMSdKXEDubPZACBuQvnAbdsGwo9Fj92AuAEAEDcAwHoeuxE3kNEc54EbSJKvKhY3AE3y2A2IG6p84PY0MQCIGwAAcQMAIG4AAHEDACBuAADEDQCAuAEAEDcAgLgBWrX09Y+//HPumACImyD4W9AAWGfpL/3wpfbiJo4P3ECwlvbD/BWGIG4AAMQNAIC4gTx5phjS5oEbcRPxmuSZYgiQB25A3AAAiBuu2LyhGnemIFVL96QQN/GtSUBQZCJWDXEDWJVT5oEbOmbbRtwkwp0pfJ4TiOAyFzepDVZ9g7U5QLZt6JhtG3ED+FQnDXGBI24CHrI2b7BCB3XcbNvQMds24gbw2Q5c2oib4AeuzRsaYfOm/hGzbUPHbNuIG+CWUNY3EHXZ2LYRN0mtSTZvoGO2bUDcoG+IYyDZvFE2RMG2jbjJhb5B3/RSNqBsxA1trUn6But3L0fGtg39lg3iJv2+gUYGkr7ZkLKh97KxFoib9JclmzcI5ZZ41AYXL+JG3xD3QLJ5o2wIikdtxE3u9A36ptWyAWUjbuh6TdI3WNdbPQK2bei3bBA3+gZqDaSc+0bZEGbZ2LYRN/oG9I2yQdkgbtLqG4mDvlE2KBvETVLLki0cmuqbHBJn3dtUNnSZNcpG3KBv6G4gpd03696dsqHLsil1SSJu9I1bVOgbZYOyQdxEuyzZwkHfbJ41625FKRuUDd/PUewzwtIgi3dsrUsZVwuNDKR/f91JoGxW/ntZg6zpfaoJ7TK0cxP6J2+3qGhkIMW+haNsUDaIm1iXJbeo0Dc3X7ayQdkgbtJcmWzh0EjfRJQ4xVmjbOgsa5SNuKHFlckWDqVGUdRbOAUvUtbQZdmUvb4I4sR5oDhkBSnjuqL+KArzKePi9lI29Js1pt+VE4sHiin34btgYNnFoeYoCu0uVfHrcSuKzrJG2UR/Eu3cxP7h25VG/VEUwhaO+1AEuKaYbDecT+zc0PCHb1s41B9F/W7h3Lph49zRTdYom3TOpp2bZD58u/xoZBR1uYvj8RoCXEfMqxWmkdCuVnEjcTA39ZA4t24UKRtkjbipzG2pKBVfcm5UUX8UtXej6tbf2YPDKBvqnlw7Nwl/+HZ9Un8UNbiFY7cGWZPqvOG2lLjpIXFcrvSYOJvsAMkaZI24ETfiRuIQQeLIGmLJGtOjuBE3Eofch9CtlSNriKVpTIniRtwktT45XDSeOBs+gyxrkDXiRtyIm3YTx3GjZuJs/nNVsgZZI27EjbhROcQ0fjQNgTeNWU7ciJuslyhHkkYqR9Yga8RN97acoXxcX9ibLFGLX2MuoDJZg6ZB3NB15WyeOCYIn8/qLDwqh16axqwlbsg3cTZfvYSOplE5RNE05ijEDaUr58dfaQbJPGhKDR6Vg6aho2HjgWKa+tTu4DvXZX83lUMjTWP+6X3q8NNS4ib9lc+JcFr9XBWaRtyIG3GT7HLovGR++lQOjQeNKUXciBuEjtMUxGmq9nqEjqYxe4gbcWPox7R8Omt5novKL1XoZFgzpghxI25cAyksrs5jPke7zrsQOskHjXlA3IgbF0Piq2+25zefw1jznWqdNGrGHC5uxI24yX2FTum8O0TNHgqtE1HNmLrFjbgRN1onsjm0xzebwLXQ4NGTO0GljOla3IgbcSN3sE40PEi0TvcpY4oWN13y1y8Q4sIsd6wQxW+55ggpWLPz7J7GI0bNIG5g0wkxt+ixMGxyZJodFeuW+TSip72IMW4RNyB6LAZtHb2WBkOpLOi4hLpJFqMXcQOBTq9dZpBJP5DB0H379lsbBjaIG2QQcgeXDOIGINEFO8/uUTCIGwDdo2BA3ACk2z09ZpBkAXEDIDggcXccAgBA3AAAiBsAAHEDACBuAABxAwAgbgAAxA0AgLgBABA3AIC4AQAQNwAA4gYAQNwAAIgbAEDcAACIGwAAcQMAIG4AAMQNACBuAADEDQCAuAEAEDcAAOIGABA3AADiBgBA3AAAiBsAAHEDAIgbAABxAwAgbgAAxA0AgLgBAMQNAIC4AQAQNwAA4gYAEDcAAOIGAEDcAACIGwAAcQMAiBsAAHEDACBuAADEDQCAuAEAxA0AgLgBABA3AADiBgBA3AAA4gYAIEbD2WwW9xsYDp1FAOhRaC1h5wYASIq4AQDEDQCAuAEA6MBW7G8g9geiAYBm2bkBAMQNAIC4AQAQNwAA4gYAEDcAAOIGAEDcAACIGwAAcQMAiBsAAHEDACBuAADEDQCAuAEAxA0AgLgBABA3AADiBgBA3AAA4gYAQNwAAIgbAABxAwAgbgAAcQMAIG4AAMQNAIC4AQAQNwCAuAEAEDcAAOIGAEDcAACIGwBA3AAAiBsAAHEDACBuAABxAwAgbgAAxA0AgLgBABA3AIC4AQAQNwAA4gYAQNwAAIgbAEDcAACIGwAAcQMAIG4AAMQNACBuAADEDQCAuAEAEDcAAOIGABA3AADiBgBA3AAAiBsAAHEDAIgbAABxAwAgbgAAxA0AgLgBAMQNAIC4AQAQNwAA4gYAQNwAAOIGAEDcAACIGwAAcQMA5Oy/AAAA//8cV7gDBIHJngAAAABJRU5ErkJggg==\n", - "text/plain": [ - "" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "Image(filename=\"/Users/kunal/Desktop/img2.png\")" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAASUAAABRCAIAAAAbwSdDAAAOEElEQVR42uyd3WscVR/HT8XbNsyuVxKKZCIoDSzE6a6kqdCA2ai9UEiZUL0IVKyzimCgabq5dG27sSgUze5KcyNqdn1BCGabSaAFdxrsZi0bVBTaHUSKV7tO0/4BeXjy4zmc58zMmdmXJJvs73OVncxOZifz3d/7mcc3NzcJgiA7wmN4CRAE9YYgqDcEQVBvCIJ6QxAE9YYgqDcEQb0hCIJ6QxDUG4IgqDcEQb0hCOoNQRDUG4Kg3pAmWF9fHxsbm56ebuC9hmEcOHDAMIx635jJZEZGRhp447ZimmY+n6/r0rXbR/DD43jTt5CZmRluy+HDh8PhcE9Pj+POly9fnp2dHRsbEx+EEBKNRkOhUGNnlclkNjY2Jicn4eXZs2ePHDkyPj4+PDw8Ozu7Hcr57rvvuI19fX2RSCQYDLpdt8uXL09NTb388suOO9Rqtbm5ucOHD9Nr9ejRo0QiQQhJJpMNX5ldYBNpEZVKRVVVuKrJLeLxuCRJhJD5+XluZ/hVuVz2PEgymVQUhRCiKEq1WqV7FgoFQkihUBCfFexm/0dXq1VFUTRNa/l1KBQKcMKyLMP5a5pGCHH8vHAa3Eezk0wm4UuH255Opx0P27ag3lp8q4EwWP3ArWbfzS5Ct4Nsbm6CCOPxeL16g1vf8Yu1XC4TQtLpdMuvA8iDPdv5+XlCiKqq3J6qqsqyLBZbpVKBry273uBvSZJUqVT2xB2C8VvrGR4epj/39PQoimJZFhtsJBIJWZY5N5Lj6NGj7Mt3332XEPLLL7/UdSbZbBZuU3DJuN+GQiFN0y5cuGD/VUs4duwY/fnFF18khORyOS4EzeVyiUTCzc8Erly5AhbS8TwnJycDgUAsFsN8ScexurrK3WeEEDBxTz75JA30dV0/d+6c20F+//13EAO78eHDh6Be/ydTq9Visdgnn3wyNDRECPnjjz/s+5w8edKyLHu41SQ3btwghEQiEbrl/v374GGyu/n53jEMI5vNTkxMRKPRUqnkuM+5c+d0Xd8T6RPUWyu5c+cOd58ZhmFZlizLVCrffPMNIeTIkSNuB7l586Z9hx9//JEQ8sILL/g/mbm5ueHh4cHBQcE+kJ/44Ycf/OQDHRM5jhSLRUVRWKul6zpn+U3T1HWdBqtuJBKJS5cuiQ0gGM+vv/4a8yWdhSRJXPAG4dPi4iLdCN6d4CBgBNgtEPxwuQ1x/AZGFaIaiKbc9oQzdDuZarUKbywUChCF0i1uQFjIhZrSFmyUlU6nBUEs/eCyLPu5bpIk0T3bGawHtLKYZlkWTejfuXNneXkZxMamuXVdh+jfLZleqVRkWabG5Ntvv7UsK51Onz171v/JxGKxZDLJ+p/g0dkB02GapqOzev/+/UQiMTQ0NDAwEA6H8/n8e++9pyiKwGz+/PPPIHX4CDdu3NB1XVGUa9eusX9ifX2dENLd3S32hxcWFrjr43ie4XAYTCj6k50C3Gc0z9Hf3//ll1/++++/9ppSOBwWeGKEkN7eXni5sbFRqVR6e3uff/55/2diGEaxWDxz5gy78e+//xa85Z9//nHcHgqFlpaWRkdHr1+/ruv6G2+8cfXqVUjDiP3h/v5+eDk0NFQul9fW1riI1DRNQsizzz4r8IfD4TAnbLfzpB+8zW8StG+ttG9QTW6m/Prrr78SQsbHx2kWYWJi4umnnz5x4sTdu3fFYQxlfHxcluW5uTk2e9EwtVotHo8HAgFJkjRNe+aZZ8T7l0olSZJoeV2M2ycyTfP8+fOqqlI7f+/evX1wk6B9axngPTbZ6wAHYQ1gMBgMh8OWZTkmGO1kMhlCyKlTp+iWurKa9i8RUPvp06fD4fCZM2dUVRUk38EfFhhwn8Tj8Wg0So0ka/P3NGjfWgPcZxDTeyKINEqlEpvMbMAWXbhwgesRMwwjlUr99ddfgjfScgVHd3f3wsLC4OCgYRi6rvf09KytrQncNvCHoQLh87rZPyzU5SqVCvcrXdfdolC0b53Fn3/+SQh57rnnPPcUaBLuY9oRQu9I0Kcg1KF8/PHHgUDAsaIF8ZKjRAU2MBgMQgR18OBByHMSQgTJEvCH+/r6PE8V/qJjPPb+++9rmuZ4SuIoVFz8QL3tH6A+1tXV5bknaNLRREC5/MSJE6wYoEKVTCY9g7f19fWLFy/aK1pQK3cTW6lU8mOWQ6GQn5AMOkgOHTrk54COWdNMJlMqlU6ePMlt39jYEBytWCxyxfQ2BYtmzQOlJOifYEttgvZItjxFK11wx2iaRtt8oWzl2OLI1d9orS8ajXJtzfRGtPf1Li4utrCFkko9Go169hBDmc5eVIRiCXd9oC8ZrrC92dLxUFh/qw/DMI4fP14oFNrfSQD7Az94frUPDg7KspzL5T788EN2+8rKyltvvcVueeqppyB28nMCxWLx1BbQusW+ix5W13UunfPFF19IkjQ6Otr8FajVav1bwMuDBw962jdFUbLZLDsTtLq6OjU1ZQ/turq66PaVlRXOYQZ/+/Tp02jf/vvdo6qq/evcZ6+9Z/+7o7WJRqMNvHGHxwiatCoNXx/uCMlkcreuA1hXcYuJJ+AXOI4O7Pl5HLuLLEmSqqpuzgPMSogvKFgG+3/d8X6CNiU7qqqyjhw4Ue3sYLjNv+2Y3uA2tQ/I7DCqqkqSJJ7H2e4r2b56q1arXCQArX2Ol8zPhahWq+CX+9Qb/VJkJ8oWFxfhIKzktm+esoVDYm6x2XbrDcIkTdOaudFbAp03bWyADa6hZ8y8V/UGDx/mpichSuaMmHikkqJpGthM/3qD7Zz/4DjOuH3zlC10LBtztveZvw1fzXV5toVCIbrFHhrurltv4M5xt3U8HrcLJhqNevZrl8tlMICOk79i+8b9OUcRgp6bdFeQnaFSqdRlpsrlcjuH6G481kBVly0Q0RakgYEBthAkHqkEzp8/PzU1BRmzBw8e+DyHW7du2SuqMKNpb2vYpnlKpOX09PS4LRbklt7cE4nrpurdcK+zveqGYUALEvvhPUcqCSH5fP7evXs+u1pZYE0Btmu2Vqt99NFHjnlt//OUCNJ29QBFUWjwVqlUoAppT4p4jlRCcoz6D46uoJs/CUVP1r1UFEWWZTc/XjxPya5ghb0BSBvFbzQ5SZFlOR6P26Mje07FnlZiBeZ4NzvqzVEb4jwbiH+vrN+EoN7+7173k0RyW7qMGkZJklgh+dcbV6yrVquwRZD3B71ta2yNXhJ2KbY+XwLdtH5av8VcuXLFsqzjx48f+B/+3wsL8tDcTDAYnJycVBQllUrBuCf+d5F2/gasQ28wJuw53uvZFZlKpTjvzufYGM2FcokpaJx/9OhRw6d0wB9oN5Cd0xssdONzFNJtpDKRSLiNNnkuPmGapmVZdnHCpL24QdZtnhLUi34j0l56A2/N55y8m73KZrO6rte1zhQLzA5zM52ZTAZGUdwWMhDPUyJIO+rt0qVLcO/6WfvacaQSljezL4FmmiYIyXNU/qeffmJXLzZNc2Zm5u2335Zl+dq1a25i8zlP2ck08GynfD7vNjCOCPA1/zY9PQ1zu6VSaWRkZG1tTbz/Sy+9dPHixevXr9NAyzTN4eFhWJ7x9ddfX1paYosK8EMsFuvu7nZrGshkMqlUihDyyiuvsIY0nU6Pjo66zT7fvn2bEPLaa691yL/TMAxIa7EMDAy4XdVarfbOO++USqWrV6+yboh92YK+vj62/+Phw4fDW3zwwQc+Vw1DtjG3Jm/RZC9v83n85sc99hbz8/P0UTIwJA623fGBT9C/ai+lQEMsO2kOLelcY0O1WtU0zfNRUkjj/cr1qqWZ3vx9ME+5K4DA2N5feLgM978Qz7+B3lghwZoR3FOy4M/ZNyI7rbfmBwH3xzzlbumNlQpMVHAdCNAn4NZ2wzXNCdoSYGSk077U2lFvTY5U7o95yl0p9XIGB0wT6zfCmK9bU47jhBR08znGCJ3mtO9Qf0kDTE5OLiws3Lx5s4FHwkNZrIGZi0wmk0gkFhYWZmdnOy2Uh7IN+9gnOh7BPstqZWXFsiz7mnMAPAiBXdsY3mI/MvDqq69algU7ILuTL0F2BTBlbPDmGHdB/sPNmYR4j/UsILPilhrZQ8vR7Tq4nvm+Ap5Nc+vWrd9++21jY2N5eblUKqmq+tlnn7G7wYNC3XoAoGludQv6YC1N0yYmJhz9Beg0wHJcy+pvyF5heXlZlmVY5rmrq+vNN9/M5XJ2XdHFYe3QB9Cx4gwEAseOHRM753vi8WvoTyItw3F1GbecitvAFCQzOefQc6YJ76W2yJcgOwm0xXF5jnqBJTO4B4XDwjD2zhWkXlBv+wfoL2UXbhLg9vhCWB6m+Qe4Iai3fQ70tfp5bJUsy24rVTtOXX3++eeeo8bYFI566yBM04Sso5+SIyRL7BlFmBLgimyxWAweJem2Xh0U/XDiCfXWQWKjz4KKxWKeM1OwgijEeyyffvoprAU6s8X09HRvb28qlYrH41999ZXb0aA+zoV8iCMHcGx5HwAL7NKXnqt61mq1J554QlXVbDbLGjd7RqSvry8SiYht5sjISLFY9P84/44GU7SdCQzdNL9MIDSXYL+yT9C+dSi1Wi0SifT29rKzvw1w9OhRQsjS0hIaN4zfEFeCweD3339fLBb9xHtuih0bG7MsK5fLodhQb4gHoVDo7t27pmlGIpF8Pl/Xe7PZbCQSCQQCt2/fxsykf7B/stOt3NLS0vr6er2rdx46dGh5eRmVVi8YvyEI+pMIgnpDEAT1hiCoNwRBUG8IgnpDENQbgiCoNwRBvSEIgnpDENQbgqDeEATZLv4TAAD//6wgLUoZoGMrAAAAAElFTkSuQmCC\n", - "text/plain": [ - "" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "Image(filename=\"/Users/kunal/Desktop/img.png\")\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Example: Emails " - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "\n", - "text/plain": [ - "" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "Image(filename=\"/Users/kunal/Desktop/img3.png\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "- predict whether or not it’s spam\n", - "- new case: **JOB** \n", - "- is it spam or not?\n", - "- compute some basic probabilities" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAMEAAADxCAIAAAD9WkRmAAAUW0lEQVR42uyd32sUV//Hz355Lh8rZ9MrKVIyESoqAZ2IiCkkkMwqXrS0YbalF0KlMmkpVGuMG28KxjqL2CvNplTolZuoRQjs1t2Cge5aNK6yi4qCzhKCeDXrmL8gX8inDOc5szvZzY/NbPJ+XRQ7e+bMZOY957w/n3Nmzn8WFhYYACvg/3AJADQEoCEADQFoCABoCEBDABoC0BAA0BCAhgA0BKAhAKAhAA0BaAhAQwBAQwAaAtAQgIYAgIYANASgIQANAQANAWgIQEMAGgIAGgLQEFgX8otAQ2A5VCqVkZGR7u7uf/75Z3k1/AcXcTOTTqe/+uorx3FWUgk0tHmJRqPv3r2bmpq6fv362NgYNAQa5rvvvjt06BBjbNm9GPzQZocEhLgMILYH0BAA0BCAhgA0BKAhAA2BTc/8/PxKdkeeevNSLpdfvHhx7949Gui4ePEiY+zgwYPbtm1rb2+vv57QRl03KJ/Pd3d353K51crGbshLVHWU4+DBg41dtIUmUiwWdV2PxWIrrCeXy7nnb1mWT5lcLrfQCiQSCU3TWuVsJZavIUVRJDlyznVdLxaLVcubpsk5TyaTq3LelmUxxhRF8ddZC92VXC6nKIphGJtIQ7Ztk3Rc0SSTSVKSbdtS4VgsxjmvKi9Sg6qqDZ86Y7qubxgN0SVVVbXlZLSivowUI27RdZ0xJjU2dDtrtUCkvEY7OKozkUhsJA1Rd+//dwWQ/1uJq2eM9ff3ixupg5ubmxM3nj9/XlGUaDRatZ5oNLqwsDA6OtrQ0ckM7tq1a4P53M7OTsMwzp49W6lUNn5+6MWLF4yxnp4ecWM2myVj724plUqZTOb06dOre96PHz9exRkwgeLo0aOO49y6dWvja+jevXuMsQMHDoixYqFQUBRFvLU3btyo1WCMj4+HFunq6vI2coODg+FwOBQKhcPhkZERqUA2m9U0jTEWj8c7OjpCodDg4GA9pz0xMdHV1UXHDYfDkUiE3mdw6xkfHyfpRyKRUCjU0dFRKpWkStLptFtJNBqdmJgIh8PST9TuViqVwcFBOtbExMSSNTPGjhw5whi7fft2y6h+2b2gqqquGbIsK5FI8EUk40x32iem9Zohcki6rlPc7i1DNtw0zVgsRo4nFotJNqKqH6KaqZht24ZhUJlEImGaJgWbmqYlk0nDMGzbJnciRX+0VywWo9CBLCDFBJS8cGsuFouaplmL0NVOpVJizZqm1bq2/reGruqS0B8VUE/tBmWiE3Ivq7/v9t5U0W5XDdMURRErcQNAV6+kGPGWVNWQV9CMMfGcFUVRVVXUK+3ilkmlUpKg6UDSY2AYBuec5CLKQgwkfTREB62V+tognvr58+eSzF+9ejU6OtrW1uYtvH///lr1PHnyhDG2c+dOd8ulS5cYYz/99JOUeRLfX6G9hoeHOzs7GzptSuFTn+I2w+45VyoVy7Icxzl58qS04+vXr+kf33//PedcLEA/7dmzR+pqHcf58ssv3coLhQLn/MKFC65NdM+nFm/evFmjzidUB2vel5mmSS1zPfF/radN6hB92i3pVOmZFtsPah7EzErVdsiyLM652FGKUCVSXE3lax3F7drE2qiRFptS6rnEHamDrpXvoHaoVRITy2yH7t69yxj76KOPVvI0VCqVQqEgtlJkb6V2izaS7XD30jRNbPPI4H/88cdLtkMvX740DGNyclJVVbFBYow9ffpUihLK5bLjOCRZt4DU+GWzWUVRxBblwYMHjLGBgQF3y/379yngcrdMT0/7t9D+kCtfkng8Hty4LJPJcM7rHN3NZDI+HWJvb6+0Xdry559/MsY++eQTn70mJyc55319fUueTFtb29WrV6mVkkI5ejBEiVCAferUKbGYGGOOj49bliUlyUjQYoKDFCM+cl7ledm2bVutn+7cuVNPCzE0NBRQDVFfXucz5BNBUJ5w9+7d0vbZ2VmxJRgbGxNTlN6hZrqRw8PDVd2Y23qJffyhQ4f2798vvSMsab1cLl+8eNEnO1oqlUgc3pZJyl2RGXIVUyqVHMeRlCed7ZJuqbVjezea9UZhXijqrtq1SyGP6JCofC6Xo/8V8wW0F4XQtm2TsfCOMUl+yI2e6HC5XI5zLho1sizu8F/VQ7tJByqgaRo5pGQySWE8VS6ZIYo0xYjMPRnLsryj1GSnfExky4+XkSaIegZKq4a+rlP2DrzTZXXzBWJ47IqM8jfkdul/lxwvy+Vyuq67cw28NbtZKCojRebSn885JzlalkXlXSl4/15SnpSqIY+lqqr36apq7Zsw3Gua5vJmVTRj/pCySNV77DPwvipzjOoPbSi8CsJjret61bkPawo9G8tr/Joxn/r333+3LIvGEFyePXsmOuV1J5vNuvHX+s4tnJyc9Pd2a3FQN3HV1LGORmUueQtKPa/d09ZQO0QWZO0axfo7FEVRmnwadNCgt0OMsdHR0eHh4Z6eHmqNRkZGLMu6evVqM582Hyij452Z2eQWaMeOHf39/VeuXGnmcS9fvtzR0XH48OGgt0Nu2/Df//6XvGQ9Oe6mQWl3zvl6Tf5ar/nUqVSKc25ZlnfAMVieGgQT27Y55xQwrkRDeEdx8/Ltt98qirLyXDbeUdykpNPpyclJN7MqObOGXlPcsO8oAv/R7h07diiK4g4Mz87O0pjSN998c/HixampqfrnGaMd2ozMz88PDw/X+nV4eNhnuBftEKiZWeju7tY07c6dO43uC08NVgo0BKAhsBoW+/r164yxmZkZevW0IeCHgPwRmUYzRtAQQF8GoCEADQFoCABoCEBDABoC0BAA0BCAhgA0BKAhAKAhAA0BaAhAQwBAQwAaAtAQgIYAgIYANASgIQANAQANAWgIQEMAGgItTqlUisfjtOIbNASWw/Hjx8+cOeNdngsaAnURj8cLhQL6MrD8XuzMmTPiymBNA9+F3Ti9GK0YgXYILIeRkZFlfL8M7RD4F1p9jBbcQDsEGqZSqRw7diwWi9X/WXtoCPwPly9fZoydPHnS+5O0ev/agW96tnYv1t3drev63r17acvdu3czmYymafv27btw4UJzbi78UGtTKxbbunVr08I0aKiFObSItDGTyfT29jYzRoMfAtAQgIbAalEul2/evEnOulKpNO24iMs2DhMTE3Nzc/Tv7du3R6PRJh24JVZALhaLuq7HYrGAn+d6LTG+vqyDhgzD8EpZ07RaK96bpsk5TyaTLXFBc7mcoiiGYUBDa4uqqoyxRCIhXnfGmPcJjsVinPNisdha68arqrp5ZLQ+GtI0jTEmKiORSDDGpOtO44it0gJJna/4kEBDq4+iKJxzcUsqlaIeTZKaoigtemUNw+Cc27a94TW0DrF9pVKxLKu/v1/ceO/ePcZYb2+vu6VUKmUymdOnT7dolHT06FHHcW7duoX80Orz4MEDxlhPT4+oqrGxMcbY559/7m68ceMGY2zXrl214tiurq7QIuFwOBKJ0MsM8Xi8o6MjFAqNj4+TECORSCgU6ujoKJVKUiVu4XA4PDg4OD4+HolEpJ9q1ZNOp6lAJBKpmow5cuQIY+z27dsbP6nQ/KaP5vySGbJtO5VKkaGWfA95pqo1JJNJ123Ytk2BXi6XSyQSpmlSX6lpWjKZNAzDtm1yJ2K3SLaXc+5WQudA6YNa9dBxNU0zF3FtHP27VuiwpC+sZ2AVfqjKlXXhnBuG4Y286CefSy8VFp2Hoiiqqor5JNrFLUM6FrMJNMot5RekemzbprNybyq5/lr3mA5qWRY89Wof0uOdGy1GDU+teI3utKIooqrESNCyLG/lVKd4v6mY6ItJMaqqSuFkrTOhg65dyjEg/Uyz/RC5ln379q2kkh9//JFz/sUXX0SjUe967OS3Tp8+3dbW5m6cmZlhjHV2djLGrl27xhg7duyYuFc2m1UUpb293d3y4sULSje49Tx79owxdurUKbfM9PQ0Y2z//v2B9SEb0FPTK5h79uxZSSXt7e0vX740DGNyclJVVWnS59OnTxljBw4cEAcjHcdx+9BHjx4xxnbu3CkWqBUqHj58WFJMX1+fj/Iagnz6ksTjcXhqv+yi/wO05MACX8TfLZHXcXscbwFd171dktcRc85FY05W3ScfDT+0JnDO6xRu1biMvM6S/lrcYlmWdO+lXZLJpFfZdCDRM5E90nW9TjNUT1yGHGPDjI+PO45DfceShckzSZ+weP78Ob2SRymZfD4/MzMjRshuEoj+kc/nqY35448/xK6QTob++/fff1Nuc8uWLel0mqZM0IFE30aOSkxrzc/PM8bee++9fD4fjUalLFGlUikUCnVG78gP1QWNZrgsOQhAQZA03yOXy+m6TrkcCr4ocyO1DbFYjMpQ4kA6lm3bdGsVRaH8UCqVoj7RLewN9d0slNTCUePk7bDo723OkBml2ShrlUwmmzzAEuiWVlmk0VGqgHQfuq43Z7wsmUxyzlVVNU2THp4mT5UJtIaoKWroUaasYEDOvAn5ZWrtRJfm5tybNhUu6I6voflDZITFC7ou0F1szmlUTWNSR9y0EZKgz8kfHR0dHh7u6ekhC1zPaK7rltaFfD6/Y8eO/v7+K1euNOFwmUxGynXhvY4qDA0NTU1NTU9P0xdSfKDs4tjYWD2CW6PA8/z581NTU1evXhWz5GsHtUMURUps3boVc/JBXYa6qh9q5vQ3vCvd2kSj0bm5uTNnzliWNTAwMD8/Pzk52dHR0bSGEO8obgR2796tKIqrmLdv37569YoGjNGXgXr7MjEEa35sj3aotTl37hxj7Ouvv3a3tLW10ST069evoy8DS0MjwZL1oUno9QxKQkOA0YCdJBeaK7fsWU3Q0OaCxgcvXbrkbqlUKr/99htj7MSJE805B8T2rc3o6Chj7MKFCw8fPhwYGGCM/frrr2/fvk2lUjTxtwng2zEbgXK5/Ndff9F8pt27d9OrbU0DGgLwQwAaAtAQgIYAgIYANASgIQANAQANAWgIQEMAGgIAGgLQEICGADQEADQEoCEADQFoCABoCEBDABoC0BAA0BCAhgA0BKAhABoH345pDfL5PC0fuHXr1r6+vqZ9nwrt0EZgcHAwHA7/8MMPtNrV2bNnFUWRlo5cZ/Bp1aCvUvi/nzB3F7yqcw0TfBcWMNM0z5496/5ve3s7LUF8//59+CFQF0NDQ1W308dfgwC+g9ZiVCqV999/X1GUV69ewVODhimXy5FIRFEUcXlaaAjURTweJ/W0tbVls9mmffMVGtpQ9Pb26rqeyWRUVQ1UbA8/1GKUSqWenh7HcYrFYkBaI7RDLUZnZyd9G//GjRvoy8AyoUU2Hz16BA2BugKxeDxedQWg3t7eoJwlBhOCv8K/YRje5VotywrISSJPHWi2bNmiKMrY2Fi5XO7t7aXlWhlj09PTwRm6R1zWGrHY/fv312tJl83SlxWLRV3XY7HYCusxTZMui6Io3l8TiYSmaU1bJ7VlZhYE7YRocUkJRVEMw6i1YLtpmpzzZDK5iqvsihZEMih0MpBOcDW0sLCQSqVolUnXRcZiMcaYpmnewrFYjHNedTINqaHRxolsbCKRqFXAtm1VVSGjQGuI7qI48WphYYEmXknBCJWs1QJRLi6VSi2jO/Of4VUsFv11Bg0FYs126Q6pqupds13TtKrGZSVomlaPTTQMg3Neq3vdVAQxx/jkyRPG2IEDB9wtlUqlUCgwxg4dOiRGK5lMhtZyX0VmZmZIRv4cPXrUcZxbt24hbAyihrLZLOdcHFC8du2au4ayCw0YVZ3OF41GQ4uMjIxIP+XzeffXjo4OaQC8VCo5jtPb21upVGgyfDgcHh8f9x6CAuzbt29DQ4Hry2zbFs1QsVgkQ62qqtRx+Hc6uq57zRCp0DRNexGvYaJuNJVKGYZhWZZt23SUqvaIutd6MgX+VI0V4IdWGpSJqKpa1b2KsVstQy16cG+YRnkE0bzTXqJeSQemadZyTsEZc8BYx788ffqUvLNofWpBbzhU5eHDh5xzcUDg3LlzjLGTJ09KA+Dv3r0T92KM/fLLL21tbXWe8Js3b9Zo2CEUCtXTjcAPydy9e5cxtnPnzpVUQh68v79ftEGWZem6Lorj+fPn3r1UVRXlSyMM27dvD6bNgKeuQiaToVnDK6nkwYMHjLG9e/e6W+hFY3GLu/HTTz8V9xoYGJAMvn+D50M8Hg/VQSQSQVy2auTzebIj9QvOp0M8ePCgtF3acvPmTcZYX19frb3K5XKhUNA0zae32rZtW62fhoaG6mlL7ty5Aw2tGs+ePfO2Fj7hTKMd4uvXr91/p9PpQqFgGIarD9pLhEJCMlJVe0x68RSxfYCiekVRfMY7vSNl3sy16xJUVZWGcjnnqqpSGJVMJul/xXwBXRAKwSzLohit1kAK5SBaPSzfaLG92IXVM1BKg2Xekt65f+529xCqqkrhumVZqqomk0lKLFHM7zPNg3IQTRsyKxaLiUTCXCQ4X2sI7nhZ/SiLVM3srfXd1XW9OeNllOfknBuGYZomSVzTtOAM1bW2hqrO06CrvKapPzpu1cTjGjXPYtuTSCQC1Y22/DxGaf4QpZ7X9PqSb5OmpqypWL1/TqC87EaYC0vzGBOJhNvsr51jyOVy1K00pyuhftnr+aCh1YfediAjvHYCav58ap92yGesEBoCctxQ1Q81x43VA94NaoEXg44fP25ZVjQa/fDDDx8/fpzNZn/++ecTJ04gTw3q4oMPPujq6qLOi2YZOI5TKpUoS448NVg6BlRVVUpEIbYHDVDr9aaqb7msF+jLAs3c3Jw7V847+e7NmzfwQ2AJSD2zs7PS9pmZGf9pJ9AQ+Je+vj7O+cTERKlUcjdOTEw4jqPrekCmnSC2b4HY/rPPPnv79i3F9rOzs2NjY7quX7lyZYWzPaGhzUU6naZplgFcNwgaAvBDABoC0BCAhgCAhgA0BKAhAA0BAA0BaAhAQ2Az8/8BAAD//yCRK274D0tBAAAAAElFTkSuQmCC\n", - "text/plain": [ - "" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "Image(filename=\"/Users/kunal/Desktop/img4.png\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "- Basic multiplication" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAdsAAABjCAIAAABc9j3PAAAVRklEQVR42uydX2gU19vHxx+91TDK78ZfCZKJoMQSaadGYiwkJe42elMa2ZV6IVQqu4g3bWOd2CujddKWgrT5U+j1boogBHbrJmDA3UrVVXapoqCzlLR4tds07UUvfXnz5Xfe856ZnWz+bHaz+/1cFDuZv+fsfOc5z/Oc57zy8uVLjRBCSB3wLzYBIYRQkQkhhFCRCSGEikwIIYSKTAghVGRCCCFUZEIIoSITQgihIhNCCBWZEEIIFZkQQqjIhBBCqMiEEEJFJoQQQkUmhBAqMiGEECoyIYRQkQkhhFCRCSGEUJEJIYSKTAghhIpMCCFUZEIIIVRksq7k8/lwODw8PLz2U43+l0wm47lDJpPZsmVLub/WG5OTk8FgsK7udh07q9marla8wiZoZjKZzJ07d5SN3d3dPT095TT06tWrY2Nj4XB47VdfXFwcHx9fWFhIp9MN0Jhnzpzp6Og4derUkSNHxsbGGqyzNnXTVdK2mqaVa8kN5SVpYmKxmK7rmqYFAgF7iUAgoGmaaZrFYlHZ2bIsXddzuVy5U9m27T7KH1yu3F+h1Ol0ehM1abFYNE0zEonUc2c1W9P5X9SyLE3TbNuuh0agIjc7eKsTiYTYEolENE2bmJhwi2MsFiv7S9I0XddXenXDMEzTbCRFfvnyZS6XczdgXXVWEzadJ4lEAh+5+lFk+pHJ/9LV1SX+fezYMU3Tbty4Ie8wMjJiGEa58W+hULBte6XjzUKh4DjOkSNHGqwxOzs7I5HIhQsXSqVSHXZWMzedTDgcvnbt2vT0NL5qjOyRuiCVSpmmuWPHDrHlt99+0zStra1NDhClUqlPPvmk3Ena2tqGhoZWKgFPnz7VNO21115rvFY9duzYwsLC9evX67CzmrPp3Jw9e/bmzZs9PT27du1iZI/UBfl8XtM0xUqFwfXWW2+JLT/88IOmaR0dHe4zlEql77//Hv/+4IMPZLHAX2dnZ+fn5+H97OzslP/6008/aZp24MABTdOSyeSjR4984lTuO0+lUiK6tXfvXlxaOU+hULh+/XpLS8vg4KByb/JJ9u3bNzAwkEwm9+zZA3XDn1pbW/GZKZVK169fX1xcHBwcxA7+Zx4YGEBLnjlzxv9BksnkX3/9VcnHbO2dJa746NEjTdNaWlo6OjrQUKKnxAMikChaoNxJ0K2Tk5PiSZftAv8zV950a6Qu4niM7BGZiYkJxS+JLYpv1yf+5jhOKBTydCLbtq3rOsJQOIPi2QwEAoZhFIvFUCgk9pF9iOX8yIFAQNd1y7Js2zYMQ9O0SCTiOI5pmpZlGYaBm7FtOxKJIG6jPBEuigNt2zZNE+fJ5XLFYjESiYRCIdxPLpdLJBKhUAixMl3XHceZmJjAFveZBaZp+rxfuVzOcRzcZCgUQkv6B+LWpbOMJWzbxuOgU9LpNDZqmhYKhdAClmWhiSzLUu7cNE3R/rquiwYv1wVwCyAC6XPmCptu3cGDM7JHao94MfCK4k3AO7miqB1sJSXWr8grTi6fGW9pJBLBxmKxqGmaYRj+ihyLxWRhKhaLhmFMTEwUi0UomnjBxAcAiiCfB7IltjiOIz8jtuPq0BT5PKFQSJzZR/7wJ8iuZ0zJNM1EImH/F13X/SNaa+8s3L/YH2G03BLYGFgiFAqJbwM+VPKXDBIsngvdgY+KTxeID6c4s/s3U2HTNbYi02vR1MzMzBiG0dLSgjHs6dOnp6amZKekAL4FT5DL2dfXJ4fsrly5EggE5IHn8ePHs9nskydPMFrEUY7jTE1NYTCL/0IcfYAPRLBjx46RkZEDBw7sWELTtFu3bsH6E4NiOAofP36MS8fj8VQqZdu2GLfikcUzYjuSfx8+fBiPx7H9jz/+EEEh4Zbxv9sXL154tufAEplM5uHDh1NTU6Zpzs3NKV6dde+sQqGghNFs25YvmkqldF2fnp4WG9vb2x3Hyefz2PLZZ58tLCxMT0+L67766quapr3++uvoi3JdgKbrXKKSdvNpugaHdmLTAu2DdbPMSKq8OeM5moYtpvgoYOIJsxRHKYaJ8pv0tJGFPStfUQZGnNtgF/vDXpNNS2EOuy01+eqKmQ+j3t/Q80ndw/A/EokIG7/anQV71jRNT/cITGYlHVh+ZNyD4iSBgak8JjpIaSj5wEQi4WOWLtt0DWwjM9eiebl3756wbtbC3NyckpI1MzOjaVp/f7+8Wzab1TRt586d8lGDg4NK5Apvow9tbW14n48ePRoMBhW7L5/PLywsQP0FDx480DTtP//5D3ZwHCcQCMjhOJjD3d3dSvsYhiHs6FKplM1mQ6GQOPDu3bvK4KBy4vF4b2/v119/vWvXrg8//LCjo6Orq2tycrKqnRUOh23bzmaz+/fvHx4eVgzVn3/+WaTTyb2m6zoeeXZ2FmMdeYdbt24pUTL0YzgcVhrq9OnTSlBXaXDC7Lem5vbt25W/FSKxodxoWhY4RJDkLUg9Nk1TjENxlDwshSK8++67y97MwMDAs2fPLMtCNpisLE+ePNE07dChQ3K+RyqVMgwD4+W///5b07Q33nhDPuF3333nlpWFhQU5sQGyImtiJbIivkAKe/fuzWazuOKtW7d6enru3r178ODBanfW0NAQPkhXrlx5//333bkc8pc1mUzKnofFxUXkpcjdmkqllI8o+lFO/0BDyekf+Gb7ZzuUazoqMmlM7t+/D2lYdk8fu7VQKCwsLGBsK9Pe3i7/L9JLP/roI/koZZ8vvvhC13XZavY0LWEU79ix4/Lly5ZlLSwsQIVl5dq2bZvYguS8kZER+TzwxoLR0VGIlL+sINlL1kR/WcF3opwntLOzE3/at2/fqVOn8EQ+fuS1d9bo6KgYZNy8edM0TUW48Tjyd/TatWuapn388cfybnLbRqNR9ygBox/Zl40xijLa8PlR+TcdFZk0IIVCAW4Edy6tG1iUnqW5MJru7e2VNxqGkUqlhOmaz+fPnz9vmqawtnCUTDQadRxnbGzM/36i0aj72K1btyrKJdt9yqXBr7/+KiQeTk88Yz6fx21DVmQFxPBc3iJkpVQqwcCUNcVfdGR7f9lk5LV3FtpB8fCICcS4YSWmOjk5mUqlLMtSlPH3338XfYEBDaxmccWZmRld1+WjMJRRxjHQcdHgq2i69QK2f73AAFdzxvTEGyKSz3xA4Msze9SdWIb9EV5D6pg7SUvk9op0YF3X3XUYlMiekAwchTPLkShEkHRdN02z3KVFtMqyLOTGIhJlGAZSyhD1Ehm1Sq6evAX74CilBeDpXpf6DOvSWYjpiSRid3o4bljXdTk3XAl2OY6DXkMONRKZYZWHligWi+4IpPt+RIQwEokEAgEly20dm27Zhk0kEiIvG8+VTqc3LOvOky0iwE1kMpnM4cOH0+l0nU7sWRvyhDf4Fpc9BB6G58+fu7fjF+w262ZnZxcXF1taWg4ePKiMx5PJJPyVmCfW2tra39/vNgDdvVAoFJ4+fSqmnClnxv54peUZZe5BMSbgib9iFhlm7okxvjypDBPPlGlmYl6f++bD4fDMzMyzZ88qsWo3prPy+fyTJ0+QO9ja2nrgwAHZjB0dHT1//rzjOPfu3Zufn29paenv73f7DcQEPPHXeDw+Pz8vWhI7yHMv0bbKbEzM65MbvBpNt+w77q5u6l/gtNFs5Fwuh5lO9W9FrnvVMWTYKDMg5ASyQCBQz0XO0CCK5eLOaqptL6CRa16C0jOXruad5Q+mQdbJz6xOEtFqwuoVGUmdiltKnu3jfls8R6ZNoshi5Fiu/Ctmsm5wcdgV4S65iyeq3gBzpb3gX215Y8AcwkoShze4s9aSct5UTbdZFRn2kWySCF9VA9TP9tcCOCjLzVBYteVSk4rdK7X05cm+cKSutEp99RQZHuTa/mzgHK9em6y6s/wHrz5VJpqw6TalInvOoEdmvmIIb8b62f5a4J70tV5j6g2u2L26lnn77bf7+voQjqvqV3ZFioymQyWgmrRMHbqe0ul0JX5CzJ8MBAK1UsP699ptGKuP7BUKBQwxxKx/TdOGh4evXLli27YcfwgGg8+XaPLIXjAYTKVSyzZ4NBqNx+MbENlYNQjmlAvH1TCfTxTVrST8RdwxrnIVMskmiOx5JqkgTUf+1tW/0bdhfmSUplxdwxJCmoHVzxDBzEh53mcmk8lms3IpgGWLnY+Ojra3t29Zor29PRwOFwqFUqkUjUa3L4G082Qyid2CwaC7apQ4yfbt26PRKFYaV/6EigH5fD4YDOJaSOn3P7Ob7du3424xwVQxNMLhsHgWeeggZuX29fXJT+dZx0BU7Ka5QAht5EqBGxH/Rg1vZI8rjkWf8HckEtF1HXaoSIPH9kQigTRy5PnDYIS3S5kRgNvADojVihjFxMQE0mgMwwgEArFYDHEDRCDFgr6edcjK2cjiEorHDbE+LMaMstxK6E9U9UVt9WKxKAqie7atf9dUOKOpmbOISAPTwL//VSqySLSQJ85aluWODPjUz1YSbtLptBwohyJDSeVD5H2UKosieqZkQWDBYxHfEDO7RIe5syB9vBZYhUHeArV1z0qS83jkZRTkW/X80WxwxW5CyOZW5MoTuX3yHJE7VS5YD3eqkpwo67uQbLetKmuZKKcrpBA3Lys7bGRZ+n1sZPddua1md+Vct4d9WUWuXuiZQ0PS8IP4Nf7+a6XIq1xDBJFZuS7fKrh27drRo0f3799v27Z70Uz4qS9cuCC7YuWaUqjphbpZAneNRyx4HIlExPkfP34s1yHzrFZVDpRklAvrZDIZrDUn379cjUwUTzFNU/awo75Ja2trTX7lfNVJM6t83d7bKiN7qIO1Z8+etVx7YGAgl8sFAoHz5893dXUp1bMePHig67pclAAyJyrbosSfXIsLRXiVxXqh7O+8846iv3I9dbeO+8cz5UAlPk5KKXFsFKV+oeNKqW8UP6zkM+AG8cllEQUYCWkkGvn3v2qzv8JZ8JVcBU4Dt4NC8Ugo3lV3zNBzfoo7Sqas+uO5mI3PKsjK2TxXtcFFxa269/F0udCPTAiz31aM4j3wxzMqmkwmRYKapmlYH/PPP/+UXQHuQ1KpVCQSKWfJxuNxnEG2mt21Vt0V1t21yX1QKr0KRNFY3Go2m5VvFUMKGcQkL1686HmVZq7YTUgzsxpF/vzzz6EalSTwetbPfvToUSqVEhm7SMuVVyKAq/f58+e4RDweP3nypGmaly5dEvtAsHDs5OTk7du3cYatW7cmk0lMPVIcHZ4V1uHP3bZtGxKKfR4KT6F4RQYHB3Vd/+qrr1AO3PNWUU0RroxCoRCNRqempmKxmOeEwI2v2E0qsULC4fDw8PDaTxWPx0eXUDLWxbsQDAY9FwcgzeLkXhEw7kAlVV3K1c9G9T9xHiXlIBQKobo29kHtc3faA2QLS/kiPUPUQUfmgzsZzl1hHXW44TYRjgJPrwXO5llYXRjO7mdBqnUsFhPLcYZCIZ88Cs7ZqzaO49guEolEuaoO61u2MBaLITmnXKpS/VcBbBiKxWIikcAPIBaL1UORo41I8nAn8FZySG1rA3oqss+cjnUEX6Mmr4BV7c7FFxRferHaiGcupn/ZwnQ6bdv2Sn8SnrEHRSnqvApgA3yVYSFhzRSfH0ADKvJK62cj6lXz2oDud0YJCVbvupxrV22gifJvDNN8lPDysmULV/eRxvvv/9HdpAVhNtFXWWlesa5V4yvySusj18Ow3a3IeGOrqpWs2L3Biiy7s8Q0VEVw/b/BYiL+SoeAlXj8UGaAo6Uq2cioefD/1HCJ2prJ2ka+AxXWz3bPja65IudyObxFVa3OzordGwZsW7mpYZO60yLX3TKA9FfikWBEYaPVcA0JwevFKxsWQhwaGuru7v7mm2/m5+cvX77ssyemfpw7d27btm21WoKwp6cHPRSPx0+cOKHrejgcvnTpUpXKAU9OTt64cWN6erohF1qtQ+7du2eaptybyIeRE2l8yhaiy5Cl47m4Klb2xLxWZXFPTBdCtqVYO9WzKrGoAoj0UFJVUM0RDqXNN0OEkM2LexEjDFCU9Uf8V+0TSUfKIDeRSCCULeJFijkMh4njONYS2Kecq2rZKoBkHUeoNVxFpQZeC0LqBDFHFF5gKK9pmoq2LhvncUu2uzyW2wWHoyzLEpdTZni6L8HZm9XzJluWhfavk8yWf3G0QpoNFDYRpUj6+vpyudz9+/fdzgf/iampVEqZyHPu3Dld17/99luxBQVV4MGQ3SOHDh0Sl4Pz5MWLF+Uu5PMnskZaWlqOHz9umub4+LhYyKKGvMIuIc1GNpvVdX2Na/Hh1ZWng8bjccdx5CqDYvapclQgEFCcy6QmtLW14WcwNDSEiNF777139+7dGq4eSRuZNBcoELi6knsyKIdy6NAhseX27duaph07dswtwaLmKo5SSshiZj+DurUlHA6bpuk4DkKvVGRCNgLIn1xExd8v4e/6kAvSorCJUqIWNVdF6Vd3JW4swChm2Huyc+dOdtwGANNYdjFRkQmpLr/88kuFiy34F3uC68Ndn0/egtUMZD9GNptV9kGprLNnz3pehVUAq0QmkylXPbm7u7uWd8Z4K2kqUOWnkhWzkCbhuad7HUUxN1qkVWAGppxRh6Pklw4TQctF+d1rg5H1wl01DDmRlcylZK4FIevmK4QsjoyMLBtVR5rEjz/+WM71oSwcc+nSJcMwTp48ObzE7t27dV2fm5sTFi4WGDMMIxgMjo6OhsPhEydOWJY1NjbmeQNwaIqVaMg60traquv6iRMnotHo6OhoNBrt7e0NBAI3b96s7Y1t4ZJrpEkolUpYmxEMDg4u6w1ob29HnW5lezQaHR8fT6fTSjiuVCrNzs7Oz89j8Kv8Fd+Azs5OzOhraWnp7+/3uYdwODwzM/Ps2bMahv4b3neBquXl5l5SkQmprzf28OHDExMTyjzmN998M5vNFovF6mklLm3b9hqz9MjmgopMiB/Dw8Pj4+Nzc3PCgCoUCijR57kIyHqZ811dXaZpVu8SpD6hH5kQPy5fvvzpp5/29vYiKULTtC+//NInO2JdrOPdu3cfOXJEnvtHaCMTQv5PJa9evfrPP//8+9//xhqJntXa1g6qAF68eJETRqjIhBA/Ub5z586y4ThCqMiEENII0I9MCCFUZEIIIVRkQgihIhNCCKEiE0IIFZkQQggVmRBCqMiEEEKoyIQQQkUmhBBCRSaEECoyIYQQKjIhhFCRCSGEUJEJIYSKTAghhIpMCCFUZEIIIVRkQgghVGRCCKEiE0IIcfM/AQAA///RIVPmazAPlgAAAABJRU5ErkJggg==\n", - "text/plain": [ - "" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "Image(filename=\"/Users/kunal/Desktop/img5.png\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Summary**\n", - "- Probability for email being spam with the word JOB is 33%\n", - "- Single vector/variable calculation" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Different types of Naïve Bayes classifiers" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "In Gaussian Naive Bayes we assume that continuous value associated with each feature follow Gaussian distribution(Also known as normal distribution).\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Gaussian Naive Bayes" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "- Everything follows a normal distribution\n", - "- Uses log probability --> to deal with edge lower probailites\n", - "- only the **mean** and **standard deviation** of the data needs to be estimated." - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "data": { - "image/gif": "R0lGODlhpgEsAYcAAAAAAP///wgCH5BAEAAAAALAAAAACmASwBgAAAAAAAAAL/8DF1uf1hlJNWe3HWm3f/wVAcydI80VRd2dZ94Vie6dq+8Vzf+d7/gUHhkFg0HpFJ5ZLZdD6hUWkjUK0CApGsYYvpcq1cxRdbZmzJU/WaTT1Y0emreQ63Z+Hm9x5/x4bD6ADBwObaDhGZutDoEvDKGBkb+RwpvyQfIcUiN8USP0GJFjcDLRsxPQcRRuMyOTVhXSdDaWt1RmHHCF9RX/c6KU+BZXltjY9ncFsrU1FPVd8Cl68i6wBdnZG1t29/V5kzqLnHycvNz2HqwNHZ220Hs93l59sy/77p8/Wf7A319gEGFKXHXxqBBxHikEUwYUOHChcYfDiRogoyEitm1LiR/2NHjx9BhhQ5kmRJkydRplS5kuU5jC1hIvQXk2bCUjVxBryZk2e+lz2BmvsZlOi2oUWRvku6lNxRpk/rQZWqdGrVT06tZlWCVWvXIly9hgUCVmzZHWTN/kubbm0FtG0/vC0rFy4HumHv1g2nVwtfF3m7AvZLQXDWwoP7Ij6j2CLjiI5RHK4qGbKuyt4ui6AsdXNmtZA7Zw7NdHTl0klPO05ddLXi1kFfD47dczbf2jlv181dc3fb3jF/pw3ecvhcz5+P2z1eXCxzlc69QkcpXSt1k9atYiepfSp3kd6hggcpnvTy5CHIe0yP2vz5uO3dd1jPcT5r+PE31NeoH/Z9/P97PeMPKAEpIpAnAx9CECcFG2KQJgdl+k+5ACXMz78K3boQwwkgPKhD4jTcMDHRRLzgQ51KtOBEgFZ8LsQU3aAQRg5fnNEyEm0c8bIWU+LRpxx1NA1ICHykp8jrahwSOcaOLKlJd54cKUp2pgypSpeUdOBKobKMEccub9wRzMdkHBOfL830BM00tyynzY7eHCfOjeY0Ks0zxbxTTdNmGrNOWu4I1E/X/Ai0Txv/RCSPdfwYMlE2Fg2zkENLfFSNndZZJVIYLZUC00wd+RTDTqGgFNRQTf2PVCdSxfMMUVVdq1VXX501uVWXsBUzLWF1D9ckdN2V12CFbI4srjaNFS//tMBKNr5fv3rrWGKZjI5aYSW4VjZrATQRWmO+DUJbbLMdV7fq7pKr12InM3dPb/EL1wd334XX1+wAy2td19r1QF96jetOsHzlTaTgHAAmV8WEA+PssIEZrs5hEB4+eA2LbYh4SS8wnqJjGfbNMC6Nu3uK5I29/bhUkylrWWVWSdvM5TVh60zmlxVZ6uR65duZPaR8RjmcoO0DOrSjcQbWaBOQBm3pEkYjekDWSos66SOuPkHqqrMeqObItO56CLFHkFronvkdMLXTQsZL7cbCJvsHuUeGOzK6ecDbrtZWM/tB2vhewW+Y9NZgcJ5JOJylwodm4TXFXVwwtscZzxi3/8lbgHw6yf/KvHIaPidsttFDj6F0LWoj/dwHU39BcydZZ+uv0zsH7rbbaXfcdtPTyV3w3WV33fcUhg8VZNOL15q41xUmPnmol08mmefLXvy33pj/6Pnsm7eIevRW4r575wPuMbjzv3/PfMtBT9/f54aL3/0JTxKf1t7dnq64/ec3XH+IMta/bjmJOQUUoInq5xwDNuw6CkTYAUW2HegoEIKEaWA3HmgYAp7lFhXM1gYxmMHJSDBvZ/EgkUjIQROO8DvWoY79bCIlF/YAhh5q4bxoeEIvqaeGi8khZ6ykHSHqUFJ04s4QwzOeI46FiPerSA+1xESTac87VWwi4iYCxf8oSlFnPBQCeLQoD7yFcYfzuuIYxZPGJtKNjGU0IxHl1kY35hCORvxKtIr2xPWkR45u2s8ejdBHOWVEkD4UxQnFVkgy4XFqBarPfNq2uSzqB5KKPEbWLLnIQELwapnUJCNxkyBPFvGQBxJlrnIlQJyN8pObDGWDCBTL/r2Mla3c5PxUVktbRguXNkHQL933MV3u8pbA8RCDgJm+jg2TmFj7HsaY2cxiLk4nEEJmNPlRTU95anss6pA1sZkzfYRTms5MnrwieUePFQ+dJ3Ln8MJFTkNy03ffkuc8o5DOJ85Dn4GsR+6gdU98rhNJUPIRj/rZIIMa7BAJjSGVjlQkgY7/BaIRNdhEcYgOjM5xnRtVoVCe1CSHsgikURLpSMfpJo9u8SooNZJKqQIKZ+1TTleq0kprUCeXbgVcOD2eUXzKUkDtVKNA1caWZqpQZAQVhZckqh+dyo03JdWG72BqkKx6VeJltSlSfaqdhpoosWoVejL9ajabctaYKoqsNFKpWkMhUbiitaTLZCuuSGUHFEFqrvk0aFvpR097ipGqWBJsPI1U2K6WSrFc8kljj8oPyDp2nJNdK7AsS9l9NAqqSOCsQIT52cjesq+AgqVoL/vGzPJzkqiV6dwEtR9CGiqkD6SteuBEW4SCTLeA5Z0Se+ug3RhKkgSMreNaJQkiBZe5Xc117nOhG13pTpe61bXudbGbXe1ul7vY5ZWe9LIv34JXo1ckb1nNe171rpe97XXve+EbX/nOl771tS8Qz3bfkuVXv0nkb39jdg8Auy29A26qgRGcYAUvmMENFksBAAA7\n", - "text/plain": [ - "" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "Image(filename=\"/Users/kunal/Desktop/img6.gif\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Bernoulli" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "- Classify between **True** and **False**\n", - "- Very simple code\n", - "- Single Vector\n", - "- Used in email example" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Multinomial" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "- Used in Natrual Language Processing (NLP)\n", - "- The term Multinomial Naive Bayes simply lets us know that each p(fi|c) is a multinomial distribution, rather than some other distribution. This works well for data which can easily be turned into counts, such as word counts in text.\n", - "- Use multiple vectors/tokens (words) to calculate total sentence\n", - "- Works with extract meaning of multiline words using a spider connection between each" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "\n", - "text/plain": [ - "" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "Image(filename=\"/Users/kunal/Desktop/img7.png\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## How I am going to be using it" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "- Extracting info from a resume\n", - "- Unstructed data\n", - "- Extract names \n", - "- Understand what document means\n", - "- Multiple **Different** types of resumes" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#Example " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def extract_email_addresses(string):\n", - " r = re.compile(r'[\\w\\.-]+@[\\w\\.-]+')\n", - " return r.findall(string)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.4" - }, - "toc": { - "base_numbering": 1, - "nav_menu": {}, - "number_sections": true, - "sideBar": true, - "skip_h1_title": false, - "title_cell": "Table of Contents", - "title_sidebar": "Contents", - "toc_cell": false, - "toc_position": {}, - "toc_section_display": true, - "toc_window_display": true - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/README.md b/README.md index 819ec48..a1f7bde 100644 --- a/README.md +++ b/README.md @@ -1,34 +1,5 @@ # ClientResumeMatching A client company is looking for employees. Employees are looking for jobs. Employees send in a resume and the company sends in a job description. Extract exact which resume matches the job description the best. -## Main topics for the resume -- Skillset -- Qualifications -- Work Experience -- Career objectives -## Main topics for the Jary (free flow text from company) -- Skill set -- Experience -- Location -- Responsibilities -- Role - -## Flow plan -1. Create a dataset of skill sets and similar skills to each using dice - 2. [https://www.dice.com/skills](https://www.dice.com/skills) - 3. Use firebase database - 4. Use web scraping → robotic process automation web scraping -2. Connect the 50 resumes to the firebase database - 3. Be able to make database dynamic with easier to -3. Using NLP analysis resume getting basic understand - 4. Create new database where we can save this preliminary data -4. Build on the main topics of the resume and connect them to the generated topic - 5. This will be the basics of understanding -5. Connect the website to the python code - 6. With this we must also connect the database -6. Develop website to upload documents to database -7. Connect main topics of the Jary -8. Generate a SVD for understanding the topics (unstructured data) -9. Use and Bayes Theorem and Naive Bayes to generate a basic outline of both documents -10. U sing transfer learning build a running base model (Spacy) -11. Optimize model for speed and accuracy +# Branch for Unnaty +**Input your own thoughts** diff --git a/Untitled.ipynb b/Untitled.ipynb deleted file mode 100644 index fcf67b5..0000000 --- a/Untitled.ipynb +++ /dev/null @@ -1,646 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import spacy\n", - "#it means that the Python environment the model was installed in is not the same as your Jupyter environment." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "C:\\Users\\kunal\\Anaconda3\\lib\\site-packages\\spacy\\util.py:275: UserWarning: [W031] Model 'en_core_web_sm' (2.2.0) requires spaCy v2.2 and is incompatible with the current spaCy version (2.3.1). This may lead to unexpected results or runtime errors. To resolve this, download a newer compatible model or retrain your custom model with the current spaCy version. For more details and available updates, run: python -m spacy validate\n", - " warnings.warn(warn_msg)\n" - ] - } - ], - "source": [ - "import en_core_web_sm\n", - "nlp = en_core_web_sm.load()\n", - "#nlp = spacy.load(\"en_core_web_sm\") \n", - "#nlp = en_core_web_sm.load()" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "doc = nlp(\"Use of Cayenne web application to detect temperature and provides a siren at thresholds.\")" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "text = \"\"\"NISARGA HASSAN SREEDHAR\n", - "San Jose, California | +1 (925) 789-8911| nisarga.nishu20@gmail.com | www.linkedin.com/in/nisarga-sreedhar-39938516b\n", - "EDUCATION:\n", - "Master’s in Electrical Engineering (Computer Networking), San Jose State University, California, USA. May 2020\n", - "Coursework: Internetworking, Broadband communications, Network Security, Internet of Things (IoT), Voice over IP\n", - "Bachelor of Engineering in Telecommunication Engineering, Dayananda Sagar College of Engineering, Visvesvaraya\n", - "Technological University, India June 2017\n", - "TECHNICAL SKILLS:\n", - "Network technologies: HTTP, DNS, DHCP, HTTPS, TLS-SSL, TCP/IP, UDP, IPv4, IPv6, ICMP, OSPF, BGP, ARP, VLAN, STP,\n", - "SIP, IPS, IDS, NAT, IS-IS, 802.11, MPLS, WPA2, WPA3, Packet level troubleshooting\n", - "Programming: Python\n", - "OS Platform: Linux (Ubuntu, CentOS), Kali Linux, Cisco IOS\n", - "Tools and IDE: Advanced Design System (ADS), Wireshark, VMware Workstation, VirtualBox, GNS3, Cisco Packet Tracer, PuTTY\n", - "CERTIFICATION:\n", - "• Cisco Certified Network Associate (CCNA) 200-301 (In Progress)\n", - "• AWS Certified Cloud Practitioner (CLF-C01) (In progress)\n", - "EXPERIENCE:\n", - "Marmon Food & Beverages Technologies, Cornelius, India June 2019 - July 2019\n", - "Network Engineer Intern\n", - "• Python based Serial Communication (IoT)\n", - "• Used an Iot Dongle to read a file, convert it into a packet by adding header and footer and transmit serially.\n", - "• Python code was written to send the file from dongle to Food Holding Bin.\n", - "ACADEMIC PROJECTS:\n", - "Secure routing in IoT networks Aug 2019 - current\n", - "• Design and configure an IoT based network using Cisco Packet Tracer.\n", - "• Perform a Man in the Middle attack to one of the devices using Kali Linux.\n", - "• Detection of the attack and solution to the problem faced.\n", - "Illumino: IoT Smart Light Aug 2019 - Dec 2019\n", - "• Create a hardware of an IoT smart light using Arduino ESP8266 and Cayenne IoT Platform.\n", - "• Designed to operate in three modes: Auto mode, Lamp mode, Security mode.\n", - "• Use of Cayenne web application to detect temperature and provides a siren at thresholds.\n", - "Voice over IP for Wireless Ad Hoc Networks (WANET) Aug 2019 - Dec 2019\n", - "• Simple Call Establishment between two clients in a WANET that have registered with the Asterisk server.\n", - "• Call on Hold with one user client to attend another client.\n", - "• Call Conferencing between all three clients, all performed using X-Lite softphone software.\n", - "Experiencing Virtualization using Virtual Box Jan 2019 - April 2019\n", - "• Worked on Open vSwitch in Virtual Box on an Ubuntu machine to run ovs and its versions successfully.\n", - "• Demonstrated how the VLANs are implemented, three VMs and one virtual switch is created.\n", - "• Attempted to communicate between the VMs and observed the PING result.\n", - "Corporate Company Network Design Aug 2018 - Dec 2018\n", - "• Designed and implemented a basic corporate network topology for the interconnection between offices with\n", - "switches, routers, and hosts.\n", - "• Implemented the design using routing protocols such as OSPF, BGP, DNS, VLAN, STP, IP, DHCP and HSRP.\n", - "• Tested and troubleshot configurations in the console to check the communication between the networks.\n", - "Design of X-Band 8PSK Modulator using ADS Jan 2017 - April 2017\n", - "• Designed various components of the modulator used in a satellite at ISRO (Indian Space Research Organization), Bangalore.\n", - "• Performed optimization of the components at 8.75GHz frequency using the tools available in ADS to obtain the desired results of\n", - "Insertion loss, Return loss and Isolation loss.\n", - "\"\"\"" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "from collections import Counter\n", - "from string import punctuation" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "import en_core_web_lg\n", - "nlp = en_core_web_lg.load()" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "def get_hotwords(text):\n", - " result = []\n", - " pos_tag = ['PROPN', 'ADJ', 'NOUN'] # 1\n", - " doc = nlp(text.lower()) # 2\n", - " for token in doc:\n", - " # 3\n", - " if(token.text in nlp.Defaults.stop_words or token.text in punctuation):\n", - " continue\n", - " # 4\n", - " if(token.pos_ in pos_tag):\n", - " result.append(token.text)\n", - " \n", - " return result # 5" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "output = set(get_hotwords(text))" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'sreedhar', 'design', 'centos', 'result', 'vswitch', 'switch', 'icmp', 'vmware', 'corporate', 'asterisk', 'indian', 'x', 'nisarga', 'advanced', 'footer', 'simple', 'company', 'virtual', 'certification', 'internet', 'june', 'code', 'programming', 'jose', 'tcp', 'india', 'aws', 'lamp', 'web', 'wireless', 'modulator', 'vlans', 'hardware', 'optimization', 'dhcp', 'technological', 'c01', 'insertion', 'ips', 'technical', 'hassan', 'practitioner', 'networks', 'ssl', 'secure', 'computer', 'https', 'wireshark', 'bin', 'detection', 'technologies', 'configurations', 'bgp', 'experience', 'components', 'bachelor', 'tls', 'beverages', 'research', 'hosts', 'application', 'internetworking', 'dec', 'available', 'university', 'machine', 'nat', 'ads', 'bangalore', 'linux', 'california', 'electrical', 'progress', 'man', 'www.linkedin.com/in/nisarga-sreedhar-39938516b', 'networking', 'voice', 'os', 'ios', 'workstation', 'tracer', 'isro', 'frequency', 'gns3', 'satellite', 'hold', 'file', 'console', 'smart', 'ide', 'communication', 'academic', 'april', 'engineering', 'ip', 'header', 'devices', 'softphone', 'level', 'network', 'college', 'siren', 'dns', 'cisco', 'use', 'cayenne', 'isolation', 'iot', 'mpls', 'food', 'packet', 'tools', 'master', 'system', 'box', 'thresholds', 'offices', 'vms', 'arduino', 'usa', 'ospf', 'middle', 'establishment', 'virtualbox', '+1', 'loss', 'current', 'hsrp', 'wanet', 'results', 'intern', 'switches', 'space', 'serial', 'education', 'wpa2', 'solution', 'cloud', 'telecommunication', 'protocols', 'stp', 'modes', 'engineer', 'dayananda', 'ping', 'light', 'topology', 'state', 'temperature', 'server', 'lite', 'conferencing', 'user', 'marmon', 'interconnection', 'organization', 'july', 'dongle', 'clf', 'clients', 'skills', 'http', 'vlan', 'udp', 'problem', 'broadband', 'versions', 'python', 'aug', 'platform', 'virtualization', 'associate', 'projects', 'san', 'client', '802.11', 'ovs', 'ids', 'coursework', 'band', 'visvesvaraya', 'ubuntu', 'ccna', 'software', 'attack', 'routers', 'mode', 'sagar', 'basic', 'arp', 'auto', 'routing', '•', 'cornelius', 'communications', 'esp8266', 'putty', 'kali', 'open', 'things', 'security', 'jan'}\n" - ] - } - ], - "source": [ - "print(output)" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "#sreedhar #design #centos #result #vswitch #switch #icmp #vmware #corporate #asterisk\n" - ] - } - ], - "source": [ - "hashtags = [('#' + x[0]) for x in Counter(output).most_common(10)]\n", - "print(' '.join(hashtags))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Loaded language vocabulary\n", - "['aug current • design', 'sreedhar', 'design', 'centos', 'broadband communications', 'result', 'vswitch', 'switch', 'icmp', 'python os platform', 'transmit', 'vmware', 'asterisk', 'corporate', 'indian', 'illumino', 'x', 'footer', 'nisarga', 'advanced', 'simple', 'company', 'virtual', 'certification', 'internet', 'lamp mode', 'troubleshot', 'june', 'code', 'nisarga hassan sreedhar san jose', 'telecommunication engineering', 'technical skills', 'programming', 'modulator', 'optimization', 'jose', 'india', 'tcp', 'vlans', 'hardware', 'cayenne web application', 'aws', 'dhcp', 'various components', 'technological', 'c01', 'x band modulator', 'virtual box jan', 'wireless networks', 'open vswitch', 'ipv6', 'insertion', 'ips', 'technical', 'hassan', 'practitioner', 'networks', 'ssl', 'secure', 'https', 'wireshark', 'computer', 'bin', 'network technologies', 'detection', 'indian space research organization', 'technologies', 'configurations', 'bgp', 'experience', 'components', 'bachelor', 'virtual box', 'tls', 'beverages', 'wireless', 'research', 'sip', 'hosts', 'security mode', 'application', 'dec', 'available', 'electrical engineering', 'university', 'machine', 'nat', 'ads', 'bangalore', 'linux', 'california', 'electrical', 'progress', 'man', 'www.linkedin.com/in/nisarga-sreedhar-39938516b', 'voice', '• use', 'networking', 'os', 'marmon food beverages technologies', 'workstation', 'tracer', 'iot dongle', 'isro', 'frequency', 'gns3', 'satellite', 'hold', 'file', 'clf c01', 'console', 'may', 'ide', 'april', 'communication', 'academic', 'smart', 'engineering', 'ip', 'nisarga.nishu20@gmail.com', 'header', 'devices', 'insertion loss', 'softphone', 'level', 'network', 'college', 'iot smart light', 'siren', 'dns', 'cisco', 'lamp', 'dec • simple call establishment', 'cayenne', 'use', 'isolation', 'iot', 'mpls', 'food', 'ubuntu machine', 'packet', 'tools', 'master', 'cayenne iot platform', '8911|', 'thresholds', 'offices', 'system', 'box', 'vms', 'virtual switch', 'india june', 'isolation loss', 'arduino', 'ospf', 'usa', 'middle', 'corporate company network design', 'establishment', 'iot network', 'virtualbox', '+1', 'loss', 'current', 'python code', 'hsrp', 'packet level', 'nisarga.nishu20@gmail.com |', 'web', 'wanet', 'results', 'ubuntu centos', 'switches', 'intern', 'network security', 'serial', 'education', 'space', 'wpa2', 'solution', 'advanced design system', 'cisco packet tracer', 'auto mode', 'user client', 'vlan stp', 'protocols', 'telecommunication', 'cloud', 'stp', '• call', 'modes', 'engineer', 'dayananda', 'ping', 'troubleshot configurations', 'july network engineer intern • python serial communication', 'light', 'topology', 'state', 'putty certification', 'dayananda sagar college', 'temperature', 'server', 'academic projects', 'lite', 'interconnection', 'user', 'marmon', 'conferencing', 'organization', 'dongle', 'july', 'asterisk server', 'vmware workstation', 'clf', 'clients', 'skills', 'http', 'kali linux', 'san jose state university', 'udp', 'vlan', 'problem', 'broadband', 'visvesvaraya technological university', 'versions', 'lite softphone software', 'aug', 'python', 'computer networking', 'www.linkedin.com/in/nisarga-sreedhar-39938516b education', 'ipv4', 'platform', 'associate', 'projects', 'san', 'wpa3', 'client', 'ids', 'middle attack', 'coursework', 'california |', 'iot networks', 'ccna', 'visvesvaraya', 'ubuntu', 'software', 'band', 'attack', 'master ’s', 'tcp ip', 'routers', 'mode', 'basic corporate network topology', 'sagar', 'basic', 'arp', '• detection', 'routing', 'auto', 'cornelius', '•', 'esp8266', 'communications', 'putty', 'kali', 'open', 'things', 'ping result', 'security', 'jan']\n" - ] - } - ], - "source": [ - "import spacy\n", - "import subprocess\n", - "from string import punctuation\n", - "\n", - "\n", - "def extract_keywords(nlp, sequence, special_tags: list = None):\n", - " \"\"\" Takes a Spacy core language model,\n", - " string sequence of text and optional\n", - " list of special tags as arguments.\n", - " \n", - " If any of the words in the string are \n", - " in the list of special tags they are immediately \n", - " added to the result. \n", - " \n", - " Arguments:\n", - " sequence {str} -- string sequence to have keywords extracted from\n", - " \n", - " Keyword Arguments:\n", - " tags {list} -- list of tags to be automatically added (default: {None})\n", - " \n", - " Returns:\n", - " {list} -- list of the unique keywords extracted from a string\n", - " \"\"\"\n", - " result = []\n", - "\n", - " # custom list of part of speech tags we are interested in\n", - " # we are interested in proper nouns, nouns, and adjectives\n", - " # edit this list of POS tags according to your needs.\n", - " pos_tag = ['PROPN', 'NOUN', 'ADJ']\n", - "\n", - " # create a spacy doc object by calling the nlp object on the input sequence\n", - " doc = nlp(sequence.lower())\n", - "\n", - " # if special tags are given and exist in the input sequence\n", - " # add them to results by default\n", - " if special_tags:\n", - " tags = [tag.lower() for tag in special_tags]\n", - " for token in doc:\n", - " if token.text in tags:\n", - " result.append(token.text)\n", - "\n", - " for chunk in doc.noun_chunks:\n", - " final_chunk = \"\"\n", - " for token in chunk:\n", - " if (token.pos_ in pos_tag):\n", - " final_chunk = final_chunk + token.text + \" \"\n", - " if final_chunk:\n", - " result.append(final_chunk.strip())\n", - "\n", - " for token in doc:\n", - " if (token.text in nlp.Defaults.stop_words\n", - " or token.text in punctuation):\n", - " continue\n", - " if (token.pos_ in pos_tag):\n", - " result.append(token.text)\n", - " return list(set(result))\n", - "\n", - "\n", - "if __name__ == \"__main__\":\n", - " \"\"\"\n", - " install the langauge model using the subprocess package\n", - " useful when hosting the service in the cloud as it prevents against\n", - " us forgetting to do this via the CLI\n", - " \"\"\"\n", - " #subprocess.call(\"python -m spacy download en_core_web_sm\",shell=True)\n", - "\n", - " # load the small english language model,\n", - " nlp = spacy.load(\"en_core_web_sm\")\n", - "\n", - " print(\"Loaded language vocabulary\")\n", - " print(\n", - " extract_keywords(\n", - " nlp, text\n", - " ))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [], - "source": [ - "import spacy\n", - "from spacy import displacy\n", - "from collections import Counter\n", - "import en_core_web_sm\n", - "nlp = en_core_web_sm.load()" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[('San Jose', 'GPE'), ('California', 'GPE'), ('925', 'CARDINAL'), ('789', 'CARDINAL'), ('San Jose State University', 'GPE'), ('California', 'GPE'), ('USA', 'GPE'), ('May 2020', 'DATE'), ('Internetworking', 'GPE'), ('Broadband', 'GPE'), ('Network Security', 'ORG'), ('IoT', 'ORG'), ('Voice', 'ORG'), ('IP\\nBachelor of Engineering', 'ORG'), ('Telecommunication Engineering', 'ORG'), ('Dayananda Sagar College of Engineering,', 'ORG'), ('Visvesvaraya\\nTechnological University', 'ORG'), ('India', 'GPE'), ('June 2017', 'DATE'), ('HTTP', 'ORG'), ('HTTPS', 'ORG'), ('TLS-SSL', 'ORG'), ('TCP/IP', 'ORG'), ('UDP', 'ORG'), ('ICMP', 'ORG'), ('OSPF', 'ORG'), ('BGP', 'ORG'), ('ARP', 'ORG'), ('VLAN', 'ORG'), ('STP', 'ORG'), ('IPS', 'ORG'), ('IDS', 'ORG'), ('NAT', 'ORG'), ('IS-IS', 'ORG'), ('802.11', 'CARDINAL'), ('MPLS', 'ORG'), ('WPA2', 'ORG'), ('WPA3', 'ORG'), ('Packet', 'PERSON'), ('Linux', 'PERSON'), ('Kali Linux', 'PERSON'), ('Cisco IOS\\nTools', 'ORG'), ('IDE', 'ORG'), ('Advanced Design System', 'ORG'), ('ADS', 'ORG'), ('Wireshark', 'ORG'), ('VMware Workstation', 'ORG'), ('VirtualBox', 'ORG'), ('GNS3', 'ORG'), ('Cisco Packet Tracer', 'PERSON'), ('Cisco Certified Network Associate', 'ORG'), ('200-301', 'CARDINAL'), ('AWS', 'ORG'), ('Cloud Practitioner', 'PERSON'), ('CLF-C01', 'ORG'), ('Cornelius', 'GPE'), ('India', 'GPE'), ('June 2019 - July 2019', 'DATE'), ('Python', 'ORG'), ('Serial Communication', 'ORG'), ('IoT', 'ORG'), ('Iot Dongle', 'PERSON'), ('Python', 'ORG'), ('IoT', 'ORG'), ('IoT', 'ORG'), ('Cisco Packet Tracer', 'PERSON'), ('Perform a Man in the Middle attack', 'WORK_OF_ART'), ('Kali Linux', 'PERSON'), ('IoT', 'ORG'), ('Light Aug 2019', 'PERSON'), ('IoT', 'ORG'), ('Arduino ESP8266', 'PERSON'), ('Cayenne IoT Platform', 'ORG'), ('three', 'CARDINAL'), ('IP', 'GPE'), ('WANET', 'ORG'), ('two', 'CARDINAL'), ('WANET', 'ORG'), ('Asterisk', 'PERSON'), ('one', 'CARDINAL'), ('three', 'CARDINAL'), ('X-Lite', 'ORG'), ('Virtual Box Jan 2019', 'ORG'), ('Worked on Open vSwitch in Virtual Box', 'WORK_OF_ART'), ('three', 'CARDINAL'), ('one', 'CARDINAL'), ('PING', 'ORG'), ('OSPF', 'ORG'), ('BGP', 'ORG'), ('DNS', 'ORG'), ('VLAN', 'ORG'), ('STP', 'ORG'), ('IP', 'ORG'), ('HSRP', 'ORG'), ('ADS', 'ORG'), ('2017', 'DATE'), ('Indian Space Research Organization', 'ORG'), ('Bangalore', 'GPE'), ('ADS', 'ORG'), ('Return', 'ORG'), ('Isolation', 'ORG')]\n" - ] - } - ], - "source": [ - "doc = nlp(text)\n", - "print([(X.text, X.label_) for X in doc.ents])" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Pretty printing has been turned OFF\n" - ] - } - ], - "source": [ - "pprint([(X, X.ent_iob_, X.ent_type_) for X in doc])" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [], - "source": [ - "from bs4 import BeautifulSoup\n", - "import requests\n", - "import re" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "GPE | San Jose\n", - "GPE | California\n", - "CARDINAL | 925\n", - "CARDINAL | 789\n", - "GPE | San Jose State University\n", - "GPE | California\n", - "GPE | USA\n", - "DATE | May 2020\n", - "GPE | Internetworking\n", - "GPE | Broadband\n", - "ORG | Network Security\n", - "ORG | IoT\n", - "ORG | Voice\n", - "ORG | IP\n", - "Bachelor of Engineering\n", - "ORG | Telecommunication Engineering\n", - "ORG | Dayananda Sagar College of Engineering,\n", - "ORG | Visvesvaraya\n", - "Technological University\n", - "GPE | India\n", - "DATE | June 2017\n", - "ORG | HTTP\n", - "ORG | HTTPS\n", - "ORG | TLS-SSL\n", - "ORG | TCP/IP\n", - "ORG | UDP\n", - "ORG | ICMP\n", - "ORG | OSPF\n", - "ORG | BGP\n", - "ORG | ARP\n", - "ORG | VLAN\n", - "ORG | STP\n", - "ORG | IPS\n", - "ORG | IDS\n", - "ORG | NAT\n", - "ORG | IS-IS\n", - "CARDINAL | 802.11\n", - "ORG | MPLS\n", - "ORG | WPA2\n", - "ORG | WPA3\n", - "PERSON | Packet\n", - "PERSON | Linux\n", - "PERSON | Kali Linux\n", - "ORG | Cisco IOS\n", - "Tools\n", - "ORG | IDE\n", - "ORG | Advanced Design System\n", - "ORG | ADS\n", - "ORG | Wireshark\n", - "ORG | VMware Workstation\n", - "ORG | VirtualBox\n", - "ORG | GNS3\n", - "PERSON | Cisco Packet Tracer\n", - "ORG | Cisco Certified Network Associate\n", - "CARDINAL | 200-301\n", - "ORG | AWS\n", - "PERSON | Cloud Practitioner\n", - "ORG | CLF-C01\n", - "GPE | Cornelius\n", - "GPE | India\n", - "DATE | June 2019 - July 2019\n", - "ORG | Python\n", - "ORG | Serial Communication\n", - "ORG | IoT\n", - "PERSON | Iot Dongle\n", - "ORG | Python\n", - "ORG | IoT\n", - "ORG | IoT\n", - "PERSON | Cisco Packet Tracer\n", - "WORK_OF_ART | Perform a Man in the Middle attack\n", - "PERSON | Kali Linux\n", - "ORG | IoT\n", - "PERSON | Light Aug 2019\n", - "ORG | IoT\n", - "PERSON | Arduino ESP8266\n", - "ORG | Cayenne IoT Platform\n", - "CARDINAL | three\n", - "GPE | IP\n", - "ORG | WANET\n", - "CARDINAL | two\n", - "ORG | WANET\n", - "PERSON | Asterisk\n", - "CARDINAL | one\n", - "CARDINAL | three\n", - "ORG | X-Lite\n", - "ORG | Virtual Box Jan 2019\n", - "WORK_OF_ART | Worked on Open vSwitch in Virtual Box\n", - "CARDINAL | three\n", - "CARDINAL | one\n", - "ORG | PING\n", - "ORG | OSPF\n", - "ORG | BGP\n", - "ORG | DNS\n", - "ORG | VLAN\n", - "ORG | STP\n", - "ORG | IP\n", - "ORG | HSRP\n", - "ORG | ADS\n", - "DATE | 2017\n", - "ORG | Indian Space Research Organization\n", - "GPE | Bangalore\n", - "ORG | ADS\n", - "ORG | Return\n", - "ORG | Isolation\n" - ] - } - ], - "source": [ - "for entity in doc.ents:\n", - " print(entity.label_, ' | ', entity.text)\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "nlp = spacy.blank('en') # new, empty model. Let’s say it’s for the English language\n", - "nlp.vocab.vectors.name = 'example_model_training' # give a name to our list of vectors\n", - "# add NER pipeline\n", - "ner = nlp.create_pipe('ner') # our pipeline would just do NER\n", - "nlp.add_pipe(ner, last=True) # we add the pipeline to the model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "DATA = [\n", - " (u\"Search Analytics: Business Value & BigData NoSQL Backend, Otis Gospodnetic \", {'entities': [ (58,75,'PERSON') ] }),\n", - " (u\"Introduction to Elasticsearch by Radu \", {'entities': [ (16,29,'TECH'), (32, 36, 'PERSON') ] }),\n", - " # …\n", - "]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "nlp.entity.add_label('PERSON')\n", - "nlp.entity.add_label('TECH')\n", - "# ..." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "optimizer = nlp.begin_training()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "nlp.update([text], [annotations], sgd=optimizer)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "for i in range(20):\n", - " random.shuffle(DATA)\n", - " for text, annotations in DATA:\n", - " nlp.update([text], [annotations], sgd=optimizer)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "doc = nlp(u\"#bbuzz 2016: Rafał Kuć - Running High Performance And Fault Tolerant Elasticsearch\")\n", - "for entity in doc.ents:\n", - " print(entity.label_, ' | ', entity.text)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.4" - }, - "toc": { - "base_numbering": 1, - "nav_menu": {}, - "number_sections": true, - "sideBar": true, - "skip_h1_title": false, - "title_cell": "Table of Contents", - "title_sidebar": "Contents", - "toc_cell": false, - "toc_position": {}, - "toc_section_display": true, - "toc_window_display": false - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/Untitled1.ipynb b/Untitled1.ipynb deleted file mode 100644 index 5360f39..0000000 --- a/Untitled1.ipynb +++ /dev/null @@ -1,658 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Basic Starting Import" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import spacy" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "nlp = spacy.load(\"en_core_web_sm\")\n", - "#Different languages to load in including small, medium and large packages" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Basic Understanding of one-word matching and attributes" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Define Text" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "doc = nlp(\"i am an iOS dev and I like to code in objective-c\")\n", - "# this is the start defining of the text (Always needed) Convert it into nlp detected work\n", - "#doc2 = nlp(text)" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[i, am, an, iOS, dev, and, I, like, to, code, in, objective, -, c]" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "[t for t in doc]\n", - "#In the doc var, it splits every single words into different value in a list" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Display understanding of how sentence is shaped" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "\n", - " i\n", - " PRON\n", - "\n", - "\n", - "\n", - " am\n", - " AUX\n", - "\n", - "\n", - "\n", - " an\n", - " DET\n", - "\n", - "\n", - "\n", - " iOS\n", - " ADJ\n", - "\n", - "\n", - "\n", - " dev\n", - " NOUN\n", - "\n", - "\n", - "\n", - " and\n", - " CCONJ\n", - "\n", - "\n", - "\n", - " I\n", - " PRON\n", - "\n", - "\n", - "\n", - " like\n", - " VERB\n", - "\n", - "\n", - "\n", - " to\n", - " PART\n", - "\n", - "\n", - "\n", - " code\n", - " VERB\n", - "\n", - "\n", - "\n", - " in\n", - " ADP\n", - "\n", - "\n", - "\n", - " objective-\n", - " NOUN\n", - "\n", - "\n", - "\n", - " c\n", - " NOUN\n", - "\n", - "\n", - "\n", - " \n", - " \n", - " nsubj\n", - " \n", - " \n", - "\n", - "\n", - "\n", - " \n", - " \n", - " det\n", - " \n", - " \n", - "\n", - "\n", - "\n", - " \n", - " \n", - " amod\n", - " \n", - " \n", - "\n", - "\n", - "\n", - " \n", - " \n", - " attr\n", - " \n", - " \n", - "\n", - "\n", - "\n", - " \n", - " \n", - " cc\n", - " \n", - " \n", - "\n", - "\n", - "\n", - " \n", - " \n", - " nsubj\n", - " \n", - " \n", - "\n", - "\n", - "\n", - " \n", - " \n", - " aux\n", - " \n", - " \n", - "\n", - "\n", - "\n", - " \n", - " \n", - " xcomp\n", - " \n", - " \n", - "\n", - "\n", - "\n", - " \n", - " \n", - " prep\n", - " \n", - " \n", - "\n", - "\n", - "\n", - " \n", - " \n", - " amod\n", - " \n", - " \n", - "\n", - "\n", - "\n", - " \n", - " \n", - " pobj\n", - " \n", - " \n", - "\n", - "" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "from spacy import displacy\n", - "\n", - "displacy.render(doc)\n", - "# Very useful way to understand how to text is displayed and how to understand different \"nouns\"" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [], - "source": [ - "#spacy.explain(\"pron\")\n", - "#Way to explain what exactly each value means \n", - "# Also check spacy documentation at https://spacy.io/usage/linguistic-features" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Different Attributes of the word that could be used" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "My DET poss \n", - "O en -PRON- my my My M my Xx My PRP$ \n", - "name NOUN nsubj \n", - "O en name name name name n name xxxx ame NN \n", - "is AUX ROOT \n", - "O en be is is is i is xx is VBZ \n", - "Kunal PROPN attr \n", - "O en Kunal kunal kunal Kunal K kunal Xxxxx nal NNP \n" - ] - } - ], - "source": [ - "for t in doc:\n", - " print(t, t.pos_, t.dep_, t.ent_id_)\n", - " print(t.ent_iob_, t.ent_type_, t.lang_, t.lemma_, t.lower_, t.norm_, t.orth_, t.prefix_,t.lower_, t.shape_, t.suffix_, t.tag_, t.whitespace_)\n", - "#Different meanings for each word and things that describe the words -- could use to understand different skills in Resume" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Scan if Document has a specific text" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def has_go_token(doc):\n", - " for t in doc:\n", - " if t.lower_ in ['go', 'golang', 'python', 'ruby', 'objective-c']:\n", - " if t.pos_ != 'VERB':\n", - " return True\n", - " return False" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Just scans the entire list of words and find if the word that matches the list\n", - "# Works most of the time but doesn't work with multiple words that connetec together\n", - "\n", - "# For example \"objective-c\" is split into 3 words " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Understanding Matcher attribute and multiple words" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Scan for like connected Words using matcher" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "metadata": {}, - "outputs": [], - "source": [ - "from spacy.matcher import Matcher" - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "metadata": {}, - "outputs": [], - "source": [ - "pattern = [{'LOWER': 'objective'},\n", - " {'IS_PUNCT': True},\n", - " {'LOWER': 'c'}]\n", - "#get from website for build your own" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "metadata": {}, - "outputs": [], - "source": [ - "matcher = Matcher(nlp.vocab, validate=True)\n", - "#define matcher var - must have" - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "metadata": {}, - "outputs": [], - "source": [ - "matcher.add(\"OBJ_C\", None, pattern)\n", - "#Could add multiple values to check for multpiple things\n", - "#Name, None, Name of pattern" - ] - }, - { - "cell_type": "code", - "execution_count": 36, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[(14600203822370300727, 11, 14)]" - ] - }, - "execution_count": 36, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "matcher(doc)\n", - "#ID, start, end" - ] - }, - { - "cell_type": "code", - "execution_count": 38, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "objective-c" - ] - }, - "execution_count": 38, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "doc[11:14]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Major Links that help you with the code\n", - "Extract named enties like major things including names\n", - "https://explosion.ai/demos/displacy-ent\n", - "\n", - "\n", - "Runs the graphic of matching every single word to another word and sees how it connects to each other (connecting of the sentence)\n", - "https://explosion.ai/demos/displacy\n", - "\n", - "\n", - "Builds a automatic pattern that checks for connections with the matcher libary\n", - "https://explosion.ai/demos/matcher\n", - "\n", - "\n", - "https://spacy.io/usage/rule-based-matching" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**What is nlp.pipe()**" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**SUMMARY**\n", - "Need to build a code that will be able to build this pattern (for the matcher libary) automatically for every single skill. \n", - "\n", - "And run a classification report (check video 3 at the end)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Learning to build own Model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#!python -m spacy download en_core_web_sm\n", - "#!python -m spacy validate" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "text = \"\"\"NISARGA HASSAN SREEDHAR\n", - "San Jose, California | +1 (925) 789-8911| nisarga.nishu20@gmail.com | www.linkedin.com/in/nisarga-sreedhar-39938516b\n", - "EDUCATION:\n", - "Master’s in Electrical Engineering (Computer Networking), San Jose State University, California, USA. May 2020\n", - "Coursework: Internetworking, Broadband communications, Network Security, Internet of Things (IoT), Voice over IP\n", - "Bachelor of Engineering in Telecommunication Engineering, Dayananda Sagar College of Engineering, Visvesvaraya\n", - "Technological University, India June 2017\n", - "TECHNICAL SKILLS:\n", - "Network technologies: HTTP, DNS, DHCP, HTTPS, TLS-SSL, TCP/IP, UDP, IPv4, IPv6, ICMP, OSPF, BGP, ARP, VLAN, STP,\n", - "SIP, IPS, IDS, NAT, IS-IS, 802.11, MPLS, WPA2, WPA3, Packet level troubleshooting\n", - "Programming: Python\n", - "OS Platform: Linux (Ubuntu, CentOS), Kali Linux, Cisco IOS\n", - "Tools and IDE: Advanced Design System (ADS), Wireshark, VMware Workstation, VirtualBox, GNS3, Cisco Packet Tracer, PuTTY\n", - "CERTIFICATION:\n", - "• Cisco Certified Network Associate (CCNA) 200-301 (In Progress)\n", - "• AWS Certified Cloud Practitioner (CLF-C01) (In progress)\n", - "EXPERIENCE:\n", - "Marmon Food & Beverages Technologies, Cornelius, India June 2019 - July 2019\n", - "Network Engineer Intern\n", - "• Python based Serial Communication (IoT)\n", - "• Used an Iot Dongle to read a file, convert it into a packet by adding header and footer and transmit serially.\n", - "• Python code was written to send the file from dongle to Food Holding Bin.\n", - "ACADEMIC PROJECTS:\n", - "Secure routing in IoT networks Aug 2019 - current\n", - "• Design and configure an IoT based network using Cisco Packet Tracer.\n", - "• Perform a Man in the Middle attack to one of the devices using Kali Linux.\n", - "• Detection of the attack and solution to the problem faced.\n", - "Illumino: IoT Smart Light Aug 2019 - Dec 2019\n", - "• Create a hardware of an IoT smart light using Arduino ESP8266 and Cayenne IoT Platform.\n", - "• Designed to operate in three modes: Auto mode, Lamp mode, Security mode.\n", - "• Use of Cayenne web application to detect temperature and provides a siren at thresholds.\n", - "Voice over IP for Wireless Ad Hoc Networks (WANET) Aug 2019 - Dec 2019\n", - "• Simple Call Establishment between two clients in a WANET that have registered with the Asterisk server.\n", - "• Call on Hold with one user client to attend another client.\n", - "• Call Conferencing between all three clients, all performed using X-Lite softphone software.\n", - "Experiencing Virtualization using Virtual Box Jan 2019 - April 2019\n", - "• Worked on Open vSwitch in Virtual Box on an Ubuntu machine to run ovs and its versions successfully.\n", - "• Demonstrated how the VLANs are implemented, three VMs and one virtual switch is created.\n", - "• Attempted to communicate between the VMs and observed the PING result.\n", - "Corporate Company Network Design Aug 2018 - Dec 2018\n", - "• Designed and implemented a basic corporate network topology for the interconnection between offices with\n", - "switches, routers, and hosts.\n", - "• Implemented the design using routing protocols such as OSPF, BGP, DNS, VLAN, STP, IP, DHCP and HSRP.\n", - "• Tested and troubleshot configurations in the console to check the communication between the networks.\n", - "Design of X-Band 8PSK Modulator using ADS Jan 2017 - April 2017\n", - "• Designed various components of the modulator used in a satellite at ISRO (Indian Space Research Organization), Bangalore.\n", - "• Performed optimization of the components at 8.75GHz frequency using the tools available in ADS to obtain the desired results of\n", - "Insertion loss, Return loss and Isolation loss.\n", - "\"\"\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.4" - }, - "toc": { - "base_numbering": 1, - "nav_menu": {}, - "number_sections": true, - "sideBar": true, - "skip_h1_title": false, - "title_cell": "Table of Contents", - "title_sidebar": "Contents", - "toc_cell": false, - "toc_position": {}, - "toc_section_display": true, - "toc_window_display": true - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/WebScrappingData.ipynb b/WebScrappingData.ipynb deleted file mode 100644 index 11ada74..0000000 --- a/WebScrappingData.ipynb +++ /dev/null @@ -1,1692 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Generate List of Skills with href" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Import Webscrapping Soup and Selenium" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [], - "source": [ - "import requests # for making standard html requests\n", - "from bs4 import BeautifulSoup # magical tool for parsing html data\n", - "import json # for parsing data\n", - "from pandas import DataFrame as df # premier library for data organization\n", - "import pandas as pd" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [], - "source": [ - "from selenium import webdriver\n", - "from webdriver_manager.chrome import ChromeDriverManager" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [], - "source": [ - "def listofSkills(link):\n", - " URL = link\n", - " page = requests.get(URL)\n", - " page.encoding = 'ISO-885901'\n", - " soup = BeautifulSoup(page.text, 'html.parser')\n", - " dollar_tree_list = soup.find_all(class_ = 'col-md-3')\n", - " del dollar_tree_list[-1]\n", - " DataCollected = []\n", - " for i in dollar_tree_list:\n", - " example = i\n", - " content = example.contents\n", - " attrs = content[1].attrs\n", - " try:\n", - " HREF = content[1]['href']\n", - " #print(example_href)\n", - " except KeyError as e:\n", - " print(e)\n", - " nameOfSkill = clearName(HREF)\n", - " tempListMORECONTENT = [content, attrs, HREF,nameOfSkill]\n", - " tempList = [HREF,nameOfSkill]\n", - " DataCollected.append(tempList)\n", - " return DataCollected" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [], - "source": [ - "def clearName(link):\n", - " text = link.replace(\"/skills/\",\"\").replace(\"+\",\" \").replace(\"%26%2347\", \"/\").replace(\"%27\", \"'\").replace(\"%26%2345\",\"-\")\n", - " return text" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Generate Possible Links" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [], - "source": [ - "alpha_list = [chr(x) for x in range(ord('a'), ord('z') + 1)] \n", - "number_list = [\"1\",\"2\",\"3\",\"4\",\"5\"]\n", - "FinalList = number_list+alpha_list\n", - "webLink_List = []\n", - "for i in FinalList:\n", - " webLink_List.append(\"https://www.dice.com/skills/browse/\"+i)\n", - "#print(webLink_List)" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [], - "source": [ - "ALLSKILLSLIST = []\n", - "for i in webLink_List:\n", - " test = listofSkills(i)\n", - " ALLSKILLSLIST+=test" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [], - "source": [ - "SKILLSDF = pd.DataFrame(ALLSKILLSLIST, columns=[\"HREF\", \"Skill Name\"])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Save as excel sheet" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [], - "source": [ - "writer = pd.ExcelWriter('output2.xlsx')" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [], - "source": [ - "SKILLSDF.to_excel(writer,'Sheet1')\n", - "writer.save()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Extract closest skills" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'%%time\\ndriver = webdriver.Chrome(ChromeDriverManager().install())\\nDATA = CloseSkillsTotal(\"https://www.dice.com/skills/2D+computer+graphics\", driver)\\ndriver.close()\\n'" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "\"\"\"%%time\n", - "driver = webdriver.Chrome(ChromeDriverManager().install())\n", - "DATA = CloseSkillsTotal(\"https://www.dice.com/skills/2D+computer+graphics\", driver)\n", - "driver.close()\n", - "\"\"\"" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [], - "source": [ - "def GetName(text):\n", - " ini_string = text\n", - " c = \">\"\n", - " res = None\n", - " for i in range(0, len(ini_string)): \n", - " if ini_string[i] == c: \n", - " res = i + 1\n", - " break\n", - " finalText = ini_string[res:]\n", - " finalText = finalText.replace(\"\", \"\")\n", - " return finalText" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [], - "source": [ - "def MainClosestSkill(virus):\n", - " return '(204, 204, 204)' in virus" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [], - "source": [ - "def numberOfSkills(driver): \n", - " for i in range(100):\n", - " num = i+1\n", - " maxnum = 1\n", - " try: \n", - " driver.find_element_by_id(str(num))\n", - " except:\n", - " maxnum = num-1\n", - " break\n", - " CONNECTEDSKILLLIST = []\n", - " for i in range(maxnum):\n", - " CONNECTEDSKILLLIST.append(i+1)\n", - " return CONNECTEDSKILLLIST" - ] - }, - { - "cell_type": "code", - "execution_count": 43, - "metadata": {}, - "outputs": [], - "source": [ - "def CloseSkillsTotal(link, driver):\n", - " # Using Chrome to access web\n", - " # Open the website\n", - " driver.get(link)\n", - " NumberOfSkillsList = numberOfSkills(driver)\n", - " CLOSESKILLTOTAL =[]\n", - " for i in NumberOfSkillsList:\n", - " SKILL = driver.find_element_by_id(str(i))\n", - " SKILLNAME = SKILL.text\n", - " source_code = SKILL.get_attribute(\"outerHTML\")\n", - " #print(SKILLNAME)\n", - " if SKILLNAME == '':\n", - " SKILLNAME = GetName(source_code)\n", - " #print(\"YES\", source_code)\n", - " closeSkill = MainClosestSkill(source_code)\n", - " #print(closeSkill)\n", - " #True if close skill and far for other\n", - " temp_List = [SKILLNAME, closeSkill]\n", - " CLOSESKILLTOTAL.append(temp_List)\n", - " #print(CLOSESKILLTOTAL)\n", - " return CLOSESKILLTOTAL" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Run through all sites \n", - "**Takes 24 hours to run**" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": {}, - "outputs": [], - "source": [ - "import ast" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": {}, - "outputs": [], - "source": [ - "# Username - KunalResumeSkill\n", - "# Password - VDartDigital123!\n", - "# Database name - skills\n", - "# Collection name - skillsCollection\n", - "import pymongo\n", - "from pymongo import MongoClient\n" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "metadata": {}, - "outputs": [], - "source": [ - "cluster = MongoClient(\"mongodb+srv://KunalResumeSkill:VDartDigital123!@cluster0.tuvxg.mongodb.net/skills?retryWrites=true&w=majority\")" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": {}, - "outputs": [], - "source": [ - "db = cluster[\"skills\"]\n", - "collection = db[\"skillsCollection\"]" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "metadata": {}, - "outputs": [], - "source": [ - "jsonFileStr = \"\"\"\n", - "\n", - "{\n", - " \"skillName\": \"sNCode\",\n", - " \"url\": \"UCode\",\n", - " \"relation\" : [\n", - " {\n", - " \"skill_a\" : {\n", - " \"relatedSkill_a1\" : \"rS_a1Code\", \n", - " \"relatedSkill_a2\" : \"rS_a2Code\",\n", - " \"relatedSkill_a3\" : \"rS_a3Code\",\n", - " \"relatedSkill_a4\" : \"rS_a4Code\",\n", - " \"relatedSkill_a5\" : \"rS_a5Code\"\n", - " }, \n", - " \"skill_b\" : {\n", - " \"relatedSkill_b1\" : \"rS_b1Code\", \n", - " \"relatedSkill_b2\" : \"rS_b2Code\",\n", - " \"relatedSkill_b3\" : \"rS_b3Code\",\n", - " \"relatedSkill_b4\" : \"rS_b4Code\",\n", - " \"relatedSkill_b5\" : \"rS_b5Code\"\n", - " },\n", - " \"skill_c\" : {\n", - " \"relatedSkill_c1\" : \"rS_c1Code\", \n", - " \"relatedSkill_c2\" : \"rS_c2Code\",\n", - " \"relatedSkill_c3\" : \"rS_c3Code\",\n", - " \"relatedSkill_c4\" : \"rS_c4Code\",\n", - " \"relatedSkill_c5\" : \"rS_c5Code\"\n", - " },\n", - " \"skill_d\" : {\n", - " \"relatedSkill_d1\" : \"rS_d1Code\", \n", - " \"relatedSkill_d2\" : \"rS_d2Code\",\n", - " \"relatedSkill_d3\" : \"rS_d3Code\",\n", - " \"relatedSkill_d4\" : \"rS_d4Code\",\n", - " \"relatedSkill_d5\" : \"rS_d5Code\"\n", - " },\n", - " \"skill_e\" : {\n", - " \"relatedSkill_e1\" : \"rS_e1Code\", \n", - " \"relatedSkill_e2\" : \"rS_e2Code\",\n", - " \"relatedSkill_e3\" : \"rS_e3Code\",\n", - " \"relatedSkill_e4\" : \"rS_e4Code\",\n", - " \"relatedSkill_e5\" : \"rS_e5Code\"\n", - " }\n", - " }\n", - " ]\n", - "}\"\"\"" - ] - }, - { - "cell_type": "code", - "execution_count": 58, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'skillName': '3G',\n", - " 'url': '/skills/3G',\n", - " 'relation': [{'2G': {'relatedSkill_a1': '4G',\n", - " 'relatedSkill_a2': 'LTE',\n", - " 'relatedSkill_a3': 'GPRS',\n", - " 'relatedSkill_a4': 'UMTS',\n", - " 'relatedSkill_a5': 'W-CDMA'},\n", - " '4G': {'relatedSkill_b1': 'LTE',\n", - " 'relatedSkill_b2': '2G',\n", - " 'relatedSkill_b3': '3GPP',\n", - " 'relatedSkill_b4': 'W-CDMA',\n", - " 'relatedSkill_b5': 'GPRS'},\n", - " 'GPRS': {'relatedSkill_c1': 'UMTS',\n", - " 'relatedSkill_c2': '2G',\n", - " 'relatedSkill_c3': 'GSM',\n", - " 'relatedSkill_c4': 'EVDO',\n", - " 'relatedSkill_c5': 'W-CDMA'},\n", - " 'LTE': {'relatedSkill_d1': '3GPP',\n", - " 'relatedSkill_d2': 'W-CDMA',\n", - " 'relatedSkill_d3': 'CDMA',\n", - " 'relatedSkill_d4': '4G',\n", - " 'relatedSkill_d5': 'UMTS'},\n", - " 'UMTS': {'relatedSkill_e1': 'W-CDMA',\n", - " 'relatedSkill_e2': 'CDMA',\n", - " 'relatedSkill_e3': 'GSM',\n", - " 'relatedSkill_e4': 'LTE',\n", - " 'relatedSkill_e5': 'GPRS'}}],\n", - " '_id': ObjectId('5f0cf307fa540465a60e95ed')}" - ] - }, - "execution_count": 58, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "jsonFormatUpload" - ] - }, - { - "cell_type": "code", - "execution_count": 60, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[WDM] - Current google-chrome version is 83.0.4103\n", - "[WDM] - Get LATEST driver version for 83.0.4103\n", - "[WDM] - Driver [C:\\Users\\kunal\\.wdm\\drivers\\chromedriver\\win32\\83.0.4103.39\\chromedriver.exe] found in cache\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " \n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[WDM] - Current google-chrome version is 83.0.4103\n", - "[WDM] - Get LATEST driver version for 83.0.4103\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Uploaded: \t\t\t 10BASE-T\n", - " \n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[WDM] - Driver [C:\\Users\\kunal\\.wdm\\drivers\\chromedriver\\win32\\83.0.4103.39\\chromedriver.exe] found in cache\n", - "[WDM] - Current google-chrome version is 83.0.4103\n", - "[WDM] - Get LATEST driver version for 83.0.4103\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Running\n", - "Uploaded: \t\t\t 2.5D\n", - " \n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[WDM] - Driver [C:\\Users\\kunal\\.wdm\\drivers\\chromedriver\\win32\\83.0.4103.39\\chromedriver.exe] found in cache\n", - "[WDM] - Current google-chrome version is 83.0.4103\n", - "[WDM] - Get LATEST driver version for 83.0.4103\n", - "[WDM] - Driver [C:\\Users\\kunal\\.wdm\\drivers\\chromedriver\\win32\\83.0.4103.39\\chromedriver.exe] found in cache\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Running\n", - "Uploaded: \t\t\t 21 CFR Part 11\n", - " \n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[WDM] - Current google-chrome version is 83.0.4103\n", - "[WDM] - Get LATEST driver version for 83.0.4103\n", - "[WDM] - Driver [C:\\Users\\kunal\\.wdm\\drivers\\chromedriver\\win32\\83.0.4103.39\\chromedriver.exe] found in cache\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Uploaded: \t\t\t 2D animation\n", - " \n", - "Running\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[WDM] - Current google-chrome version is 83.0.4103\n", - "[WDM] - Get LATEST driver version for 83.0.4103\n", - "[WDM] - Driver [C:\\Users\\kunal\\.wdm\\drivers\\chromedriver\\win32\\83.0.4103.39\\chromedriver.exe] found in cache\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Uploaded: \t\t\t 2D computer graphics\n", - " \n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[WDM] - Current google-chrome version is 83.0.4103\n", - "[WDM] - Get LATEST driver version for 83.0.4103\n", - "[WDM] - Driver [C:\\Users\\kunal\\.wdm\\drivers\\chromedriver\\win32\\83.0.4103.39\\chromedriver.exe] found in cache\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Running\n", - "Uploaded: \t\t\t 2G\n", - " \n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[WDM] - Current google-chrome version is 83.0.4103\n", - "[WDM] - Get LATEST driver version for 83.0.4103\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Running\n", - "Uploaded: \t\t\t 3D CAD\n", - " \n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[WDM] - Driver [C:\\Users\\kunal\\.wdm\\drivers\\chromedriver\\win32\\83.0.4103.39\\chromedriver.exe] found in cache\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Running\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[WDM] - Current google-chrome version is 83.0.4103\n", - "[WDM] - Get LATEST driver version for 83.0.4103\n", - "[WDM] - Driver [C:\\Users\\kunal\\.wdm\\drivers\\chromedriver\\win32\\83.0.4103.39\\chromedriver.exe] found in cache\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Uploaded: \t\t\t 3D animation\n", - " \n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[WDM] - Current google-chrome version is 83.0.4103\n", - "[WDM] - Get LATEST driver version for 83.0.4103\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Running\n", - "Uploaded: \t\t\t 3D computer graphics\n", - " \n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[WDM] - Driver [C:\\Users\\kunal\\.wdm\\drivers\\chromedriver\\win32\\83.0.4103.39\\chromedriver.exe] found in cache\n", - "[WDM] - Current google-chrome version is 83.0.4103\n", - "[WDM] - Get LATEST driver version for 83.0.4103\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Running\n", - "Uploaded: \t\t\t 3D modeling\n", - " \n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[WDM] - Driver [C:\\Users\\kunal\\.wdm\\drivers\\chromedriver\\win32\\83.0.4103.39\\chromedriver.exe] found in cache\n", - "[WDM] - Current google-chrome version is 83.0.4103\n", - "[WDM] - Get LATEST driver version for 83.0.4103\n", - "[WDM] - Driver [C:\\Users\\kunal\\.wdm\\drivers\\chromedriver\\win32\\83.0.4103.39\\chromedriver.exe] found in cache\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Running\n", - "Uploaded: \t\t\t 3D printing\n", - " \n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[WDM] - Current google-chrome version is 83.0.4103\n", - "[WDM] - Get LATEST driver version for 83.0.4103\n", - "[WDM] - Driver [C:\\Users\\kunal\\.wdm\\drivers\\chromedriver\\win32\\83.0.4103.39\\chromedriver.exe] found in cache\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Running\n", - "Uploaded: \t\t\t 3D rendering\n", - " \n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[WDM] - Current google-chrome version is 83.0.4103\n", - "[WDM] - Get LATEST driver version for 83.0.4103\n", - "[WDM] - Driver [C:\\Users\\kunal\\.wdm\\drivers\\chromedriver\\win32\\83.0.4103.39\\chromedriver.exe] found in cache\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Running\n", - "Uploaded: \t\t\t 3G\n", - " \n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[WDM] - Current google-chrome version is 83.0.4103\n", - "[WDM] - Get LATEST driver version for 83.0.4103\n", - "[WDM] - Driver [C:\\Users\\kunal\\.wdm\\drivers\\chromedriver\\win32\\83.0.4103.39\\chromedriver.exe] found in cache\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "ERROR WATCH OUT SKIPPING /skills/3G+Bridge\n", - "Uploaded: \t\t\t 3G\n", - " \n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[WDM] - Current google-chrome version is 83.0.4103\n", - "[WDM] - Get LATEST driver version for 83.0.4103\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "ERROR WATCH OUT SKIPPING /skills/3G+MIMO\n", - "Uploaded: \t\t\t 3G\n", - " \n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[WDM] - Driver [C:\\Users\\kunal\\.wdm\\drivers\\chromedriver\\win32\\83.0.4103.39\\chromedriver.exe] found in cache\n", - "[WDM] - Current google-chrome version is 83.0.4103\n", - "[WDM] - Get LATEST driver version for 83.0.4103\n", - "[WDM] - Driver [C:\\Users\\kunal\\.wdm\\drivers\\chromedriver\\win32\\83.0.4103.39\\chromedriver.exe] found in cache\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "ERROR WATCH OUT SKIPPING /skills/3G+Studios\n", - "Uploaded: \t\t\t 3G\n", - " \n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[WDM] - Current google-chrome version is 83.0.4103\n", - "[WDM] - Get LATEST driver version for 83.0.4103\n", - "[WDM] - Driver [C:\\Users\\kunal\\.wdm\\drivers\\chromedriver\\win32\\83.0.4103.39\\chromedriver.exe] found in cache\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "ERROR WATCH OUT SKIPPING /skills/3G%26%2345324M\n", - "Uploaded: \t\t\t 3G\n", - " \n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[WDM] - Current google-chrome version is 83.0.4103\n", - "[WDM] - Get LATEST driver version for 83.0.4103\n", - "[WDM] - Driver [C:\\Users\\kunal\\.wdm\\drivers\\chromedriver\\win32\\83.0.4103.39\\chromedriver.exe] found in cache\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Running\n", - "Uploaded: \t\t\t 3GPP\n", - " \n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[WDM] - Current google-chrome version is 83.0.4103\n", - "[WDM] - Get LATEST driver version for 83.0.4103\n", - "[WDM] - Driver [C:\\Users\\kunal\\.wdm\\drivers\\chromedriver\\win32\\83.0.4103.39\\chromedriver.exe] found in cache\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Running\n", - "Uploaded: \t\t\t 3PAR\n", - " \n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[WDM] - Current google-chrome version is 83.0.4103\n", - "[WDM] - Get LATEST driver version for 83.0.4103\n", - "[WDM] - Driver [C:\\Users\\kunal\\.wdm\\drivers\\chromedriver\\win32\\83.0.4103.39\\chromedriver.exe] found in cache\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Running\n", - "Uploaded: \t\t\t 3PL\n", - " \n", - "Running\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[WDM] - Current google-chrome version is 83.0.4103\n", - "[WDM] - Get LATEST driver version for 83.0.4103\n", - "[WDM] - Driver [C:\\Users\\kunal\\.wdm\\drivers\\chromedriver\\win32\\83.0.4103.39\\chromedriver.exe] found in cache\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Uploaded: \t\t\t 4G\n", - " \n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[WDM] - Current google-chrome version is 83.0.4103\n", - "[WDM] - Get LATEST driver version for 83.0.4103\n", - "[WDM] - Driver [C:\\Users\\kunal\\.wdm\\drivers\\chromedriver\\win32\\83.0.4103.39\\chromedriver.exe] found in cache\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Running\n", - "Uploaded: \t\t\t 4GL\n", - " \n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[WDM] - Current google-chrome version is 83.0.4103\n", - "[WDM] - Get LATEST driver version for 83.0.4103\n", - "[WDM] - Driver [C:\\Users\\kunal\\.wdm\\drivers\\chromedriver\\win32\\83.0.4103.39\\chromedriver.exe] found in cache\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Uploaded: \t\t\t 5ESS switch\n", - " \n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[WDM] - Current google-chrome version is 83.0.4103\n", - "[WDM] - Get LATEST driver version for 83.0.4103\n", - "[WDM] - Driver [C:\\Users\\kunal\\.wdm\\drivers\\chromedriver\\win32\\83.0.4103.39\\chromedriver.exe] found in cache\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Running\n", - "Uploaded: \t\t\t 5S\n", - " \n", - "ERROR WATCH OUT SKIPPING /skills/Aplus\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[WDM] - Current google-chrome version is 83.0.4103\n", - "[WDM] - Get LATEST driver version for 83.0.4103\n", - "[WDM] - Driver [C:\\Users\\kunal\\.wdm\\drivers\\chromedriver\\win32\\83.0.4103.39\\chromedriver.exe] found in cache\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Uploaded: \t\t\t 5S\n", - " \n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[WDM] - Current google-chrome version is 83.0.4103\n", - "[WDM] - Get LATEST driver version for 83.0.4103\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Running\n", - "Uploaded: \t\t\t A/B testing\n", - " \n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[WDM] - Driver [C:\\Users\\kunal\\.wdm\\drivers\\chromedriver\\win32\\83.0.4103.39\\chromedriver.exe] found in cache\n", - "[WDM] - Current google-chrome version is 83.0.4103\n", - "[WDM] - Get LATEST driver version for 83.0.4103\n", - "[WDM] - Driver [C:\\Users\\kunal\\.wdm\\drivers\\chromedriver\\win32\\83.0.4103.39\\chromedriver.exe] found in cache\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Running\n", - "Uploaded: \t\t\t ACAPS\n", - " \n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[WDM] - Current google-chrome version is 83.0.4103\n", - "[WDM] - Get LATEST driver version for 83.0.4103\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Running\n", - "Uploaded: \t\t\t ACD\n", - " \n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[WDM] - Driver [C:\\Users\\kunal\\.wdm\\drivers\\chromedriver\\win32\\83.0.4103.39\\chromedriver.exe] found in cache\n", - "[WDM] - Current google-chrome version is 83.0.4103\n", - "[WDM] - Get LATEST driver version for 83.0.4103\n", - "[WDM] - Driver [C:\\Users\\kunal\\.wdm\\drivers\\chromedriver\\win32\\83.0.4103.39\\chromedriver.exe] found in cache\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Running\n", - "Uploaded: \t\t\t ACF2\n", - " \n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[WDM] - Current google-chrome version is 83.0.4103\n", - "[WDM] - Get LATEST driver version for 83.0.4103\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Running\n", - "Uploaded: \t\t\t ACH\n", - " \n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[WDM] - Driver [C:\\Users\\kunal\\.wdm\\drivers\\chromedriver\\win32\\83.0.4103.39\\chromedriver.exe] found in cache\n", - "[WDM] - Current google-chrome version is 83.0.4103\n", - "[WDM] - Get LATEST driver version for 83.0.4103\n", - "[WDM] - Driver [C:\\Users\\kunal\\.wdm\\drivers\\chromedriver\\win32\\83.0.4103.39\\chromedriver.exe] found in cache\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Running\n", - "Uploaded: \t\t\t ACL\n", - " \n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[WDM] - Current google-chrome version is 83.0.4103\n", - "[WDM] - Get LATEST driver version for 83.0.4103\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Running\n", - "Uploaded: \t\t\t ACMT\n", - " \n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[WDM] - Driver [C:\\Users\\kunal\\.wdm\\drivers\\chromedriver\\win32\\83.0.4103.39\\chromedriver.exe] found in cache\n", - "[WDM] - Current google-chrome version is 83.0.4103\n", - "[WDM] - Get LATEST driver version for 83.0.4103\n", - "[WDM] - Driver [C:\\Users\\kunal\\.wdm\\drivers\\chromedriver\\win32\\83.0.4103.39\\chromedriver.exe] found in cache\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Running\n", - "Uploaded: \t\t\t ACORD\n", - " \n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[WDM] - Current google-chrome version is 83.0.4103\n", - "[WDM] - Get LATEST driver version for 83.0.4103\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Running\n", - "Uploaded: \t\t\t ACS\n", - " \n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[WDM] - Driver [C:\\Users\\kunal\\.wdm\\drivers\\chromedriver\\win32\\83.0.4103.39\\chromedriver.exe] found in cache\n", - "[WDM] - Current google-chrome version is 83.0.4103\n", - "[WDM] - Get LATEST driver version for 83.0.4103\n", - "[WDM] - Driver [C:\\Users\\kunal\\.wdm\\drivers\\chromedriver\\win32\\83.0.4103.39\\chromedriver.exe] found in cache\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Uploaded: \t\t\t \n", - " \n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[WDM] - Current google-chrome version is 83.0.4103\n", - "[WDM] - Get LATEST driver version for 83.0.4103\n", - "[WDM] - Driver [C:\\Users\\kunal\\.wdm\\drivers\\chromedriver\\win32\\83.0.4103.39\\chromedriver.exe] found in cache\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Uploaded: \t\t\t ACSLS\n", - " \n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[WDM] - Current google-chrome version is 83.0.4103\n", - "[WDM] - Get LATEST driver version for 83.0.4103\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Running\n", - "Uploaded: \t\t\t ACT\n", - " \n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[WDM] - Driver [C:\\Users\\kunal\\.wdm\\drivers\\chromedriver\\win32\\83.0.4103.39\\chromedriver.exe] found in cache\n", - "[WDM] - Current google-chrome version is 83.0.4103\n", - "[WDM] - Get LATEST driver version for 83.0.4103\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Running\n", - "Uploaded: \t\t\t ADA\n", - " \n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[WDM] - Driver [C:\\Users\\kunal\\.wdm\\drivers\\chromedriver\\win32\\83.0.4103.39\\chromedriver.exe] found in cache\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Running\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[WDM] - Current google-chrome version is 83.0.4103\n", - "[WDM] - Get LATEST driver version for 83.0.4103\n", - "[WDM] - Driver [C:\\Users\\kunal\\.wdm\\drivers\\chromedriver\\win32\\83.0.4103.39\\chromedriver.exe] found in cache\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Uploaded: \t\t\t ADABAS\n", - " \n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[WDM] - Current google-chrome version is 83.0.4103\n", - "[WDM] - Get LATEST driver version for 83.0.4103\n", - "[WDM] - Driver [C:\\Users\\kunal\\.wdm\\drivers\\chromedriver\\win32\\83.0.4103.39\\chromedriver.exe] found in cache\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Running\n", - "Uploaded: \t\t\t ADDIE\n", - " \n", - "Running\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[WDM] - Current google-chrome version is 83.0.4103\n", - "[WDM] - Get LATEST driver version for 83.0.4103\n", - "[WDM] - Driver [C:\\Users\\kunal\\.wdm\\drivers\\chromedriver\\win32\\83.0.4103.39\\chromedriver.exe] found in cache\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Uploaded: \t\t\t ADDM\n", - " \n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[WDM] - Current google-chrome version is 83.0.4103\n", - "[WDM] - Get LATEST driver version for 83.0.4103\n", - "[WDM] - Driver [C:\\Users\\kunal\\.wdm\\drivers\\chromedriver\\win32\\83.0.4103.39\\chromedriver.exe] found in cache\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Running\n", - "Uploaded: \t\t\t ADF\n", - " \n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[WDM] - Current google-chrome version is 83.0.4103\n", - "[WDM] - Get LATEST driver version for 83.0.4103\n", - "[WDM] - Driver [C:\\Users\\kunal\\.wdm\\drivers\\chromedriver\\win32\\83.0.4103.39\\chromedriver.exe] found in cache\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Running\n", - "Uploaded: \t\t\t ADFS\n", - " \n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[WDM] - Current google-chrome version is 83.0.4103\n", - "[WDM] - Get LATEST driver version for 83.0.4103\n", - "[WDM] - Driver [C:\\Users\\kunal\\.wdm\\drivers\\chromedriver\\win32\\83.0.4103.39\\chromedriver.exe] found in cache\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Uploaded: \t\t\t ADINA\n", - " \n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[WDM] - Current google-chrome version is 83.0.4103\n", - "[WDM] - Get LATEST driver version for 83.0.4103\n", - "[WDM] - Driver [C:\\Users\\kunal\\.wdm\\drivers\\chromedriver\\win32\\83.0.4103.39\\chromedriver.exe] found in cache\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Running\n", - "Uploaded: \t\t\t ADMT\n", - " \n", - "Running\n", - "ERROR WITH THE UPLOAD TO MONGODB CHECK\n", - "Wall time: 8min 3s\n" - ] - } - ], - "source": [ - "%%time\n", - "similarSkillsTotalList = []\n", - "for i in ALLSKILLSLIST[:50]:\n", - " jsonFileRunning = jsonFileStr\n", - " #print( str(i[0]))\n", - " link = \"https://www.dice.com\" + str(i[0])\n", - " driver = webdriver.Chrome(ChromeDriverManager().install())\n", - " DATA = CloseSkillsTotal(link, driver)\n", - " driver.close()\n", - " running = True\n", - " if len(DATA) == 0:\n", - " jsonFileRunning = jsonFileStr\n", - " print(\"ERROR WATCH OUT SKIPPING \", i[0])\n", - " jsonFileRunning = jsonFileRunning.replace(\"sNCode\", i[1]).replace(\"UCode\", i[0])\n", - " letterList = [\"a\", \"b\", \"c\", \"d\", \"e\"]\n", - " for i in letterList:\n", - " for j in range(5):\n", - " numb = j+1\n", - " text = \"rS_\"+ i + str(numb) + \"Code\"\n", - " jsonFileRunning = jsonFileRunning.replace(text, \"n/a\")\n", - " running = False \n", - " if running == True: \n", - " skillnameCode = DATA[0][0]\n", - " jsonFileRunning = jsonFileRunning.replace(\"sNCode\", skillnameCode).replace(\"UCode\", str(i[0]))\n", - " if len(DATA) > 1: \n", - " print(\"Running\")\n", - " DATANEW = DATA[1:]\n", - " count = 0\n", - " SkillcountLetter = [\"a\", \"b\", \"c\", \"d\", \"e\"]\n", - " Skillcount = 0\n", - " for i in DATANEW:\n", - " if i[1] == True:\n", - " textString = \"skill_\" + SkillcountLetter[Skillcount]\n", - " #print(textString + i[0])\n", - " relatedSkillcount = 1\n", - " jsonFileRunning = jsonFileRunning.replace(textString, i[0])\n", - " for j in range(5):\n", - " z = j+count+1\n", - " textString2 = \"rS_\" + SkillcountLetter[Skillcount] + str(relatedSkillcount) + \"Code\"\n", - " #print(textString2 + str(DATANEW[z][0]))\n", - " jsonFileRunning = jsonFileRunning.replace(textString2, str(DATANEW[z][0]))\n", - " relatedSkillcount+=1\n", - " Skillcount+=1\n", - " count+=1\n", - " else:\n", - " letterList = [\"a\", \"b\", \"c\", \"d\", \"e\"]\n", - " for i in letterList:\n", - " for j in range(5):\n", - " numb = j+1\n", - " text = \"rS_\"+ i + str(numb) + \"Code\"\n", - " jsonFileRunning = jsonFileRunning.replace(text, \"n/a\")\n", - " \n", - " #print(jsonFileRunning)\n", - " jsonFormatUpload = ast.literal_eval(jsonFileRunning)\n", - " \n", - " try:\n", - " collection.insert_one(jsonFormatUpload)\n", - " print(\"Uploaded: \\t\\t\\t\", skillnameCode)\n", - " except Exception as e:\n", - " print(\"ERROR WITH THE UPLOAD TO MONGODB CHECK\")\n", - " break\n", - " similarSkillsTotalList.append(DATA)\n", - " \n", - " #print(similarSkillsTotalList)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Other base Testing" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import requests # for making standard html requests\n", - "from bs4 import BeautifulSoup # magical tool for parsing html data\n", - "import json # for parsing data\n", - "from pandas import DataFrame as df # premier library for data organization\n", - "import pandas as pd" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "URL = \"https://www.dice.com/skills/browse/1\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "page = requests.get(URL)\n", - "page.encoding = 'ISO-885901'\n", - "soup = BeautifulSoup(page.text, 'html.parser')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "soup" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "dollar_tree_list = soup.find_all(class_ = 'col-md-3')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "del dollar_tree_list[-1]\n", - "len(dollar_tree_list)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "example = dollar_tree_list[0] # a representative example\n", - "print(example)\n", - "print(\"\")\n", - "example_content = example.contents\n", - "print(example_content)\n", - "print(\"\")\n", - "attrs = example_content[1].attrs\n", - "print(attrs)\n", - "print(\"\")\n", - "example_href = example_content[1]['href']\n", - "print(example_href)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def clearName(link):\n", - " text = link.replace(\"/skills/\",\"\").replace(\"+\",\" \").replace(\"%26%2347\", \"/\").replace(\"%27\", \"'\").replace(\"%26%2345\",\"-\")\n", - " return text" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "DataCollected = []\n", - "for i in dollar_tree_list:\n", - " example = i\n", - " content = example.contents\n", - " attrs = content[1].attrs\n", - " try:\n", - " HREF = content[1]['href']\n", - " #print(example_href)\n", - " except KeyError as e:\n", - " print(e)\n", - " nameOfSkill = clearName(HREF)\n", - " tempListMORECONTENT = [content, attrs, example_href,nameOfSkill]\n", - " tempList = [example_href,nameOfSkill]\n", - " DataCollected.append(tempList)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "pd.DataFrame(DataCollected, columns=[\"HREF\", \"Skill Name\"])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "conn = MongoClient('localhost', port-number)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Other testing" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "skillnameCode = DATA[0][0]\n", - "print(skillnameCode)\n", - "DATANEW = DATA[1:]\n", - "count = 0\n", - "for i in DATANEW:\n", - " if i[1] == True:\n", - " print(\"RS_A: \" + i[0])\n", - " for j in range(5):\n", - " z = j+count+1\n", - " print(\"RS 2: \" + str(DATANEW[z][0]))\n", - " count+=1" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "jsonFileRunning = jsonFileStr\n", - "\n", - "skillnameCode = DATA[0][0]\n", - "print(skillnameCode)\n", - "\n", - "jsonFileRunning = jsonFileRunning.replace(\"sNCode\", skillnameCode)\n", - "\n", - "DATANEW = DATA[1:]\n", - "count = 0\n", - "SkillcountLetter = [\"a\", \"b\", \"c\", \"d\", \"e\"]\n", - "Skillcount = 0\n", - "for i in DATANEW:\n", - " if i[1] == True:\n", - " textString = \"skill_\" + SkillcountLetter[Skillcount]\n", - " #print(textString + i[0])\n", - " relatedSkillcount = 1\n", - " jsonFileRunning = jsonFileRunning.replace(textString, i[0])\n", - " for j in range(5):\n", - " z = j+count+1\n", - " textString2 = \"rS_\" + SkillcountLetter[Skillcount] + str(relatedSkillcount) + \"Code\"\n", - " #print(textString2 + str(DATANEW[z][0]))\n", - " jsonFileRunning = jsonFileRunning.replace(textString2, str(DATANEW[z][0]))\n", - " relatedSkillcount+=1\n", - " Skillcount+=1\n", - " count+=1" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%time\n", - "similarSkillsTotalList = []\n", - "for i in ALLSKILLSLIST[:2]:\n", - " print( str(i[0]))\n", - " link = \"https://www.dice.com\" + str(i[0])\n", - " driver = webdriver.Chrome(ChromeDriverManager().install()) \n", - " DATA = CloseSkillsTotal(link, driver)\n", - " driver.close()\n", - " similarSkillsTotalList.append(DATA)\n", - " #print(similarSkillsTotalList)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "jsonFileRunning = jsonFileStr\n", - "\n", - "skillnameCode = DATA[0][0]\n", - "print(skillnameCode)\n", - "\n", - "jsonFileRunning = jsonFileRunning.replace(\"sNCode\", skillnameCode)\n", - "\n", - "DATANEW = DATA[1:]\n", - "count = 0\n", - "SkillcountLetter = [\"a\", \"b\", \"c\", \"d\", \"e\"]\n", - "Skillcount = 0\n", - "for i in DATANEW:\n", - " if i[1] == True:\n", - " textString = \"skill_\" + SkillcountLetter[Skillcount]\n", - " #print(textString + i[0])\n", - " relatedSkillcount = 1\n", - " jsonFileRunning = jsonFileRunning.replace(textString, i[0])\n", - " for j in range(5):\n", - " z = j+count+1\n", - " textString2 = \"rS_\" + SkillcountLetter[Skillcount] + str(relatedSkillcount) + \"Code\"\n", - " #print(textString2 + str(DATANEW[z][0]))\n", - " jsonFileRunning = jsonFileRunning.replace(textString2, str(DATANEW[z][0]))\n", - " relatedSkillcount+=1\n", - " Skillcount+=1\n", - " count+=1" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.4" - }, - "toc": { - "base_numbering": 1, - "nav_menu": {}, - "number_sections": true, - "sideBar": true, - "skip_h1_title": false, - "title_cell": "Table of Contents", - "title_sidebar": "Contents", - "toc_cell": false, - "toc_position": { - "height": "calc(100% - 180px)", - "left": "10px", - "top": "150px", - "width": "219.261px" - }, - "toc_section_display": true, - "toc_window_display": true - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/output.xlsx b/output.xlsx deleted file mode 100644 index d2cb3bd..0000000 Binary files a/output.xlsx and /dev/null differ