-
Notifications
You must be signed in to change notification settings - Fork 0
/
p2.hpp
159 lines (143 loc) · 3.69 KB
/
p2.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
//File name: A3_Sheet2b2_20220835
//Purpose : Document Similarity
//Author : Safa Tawfik Al-sharabi
//ID : 20220835
//Section : S1
// Date : 5/12/2023
#include <iostream>
#include <fstream>
#include <unordered_set>
#include <vector>
#include <algorithm>
#include <cmath>
#include <sstream>
using namespace std;
class StringSet
{
private:
unordered_set<string> strings;
vector<string> stringV;
public:
//Default constructor
StringSet() {}
//constructor to load strings from a file
StringSet(string filename)
{
loadFromFile(filename);
}
//constructor to load strings from either a file or a string
StringSet(string input, bool fromString = true)
{
if (fromString)
{
loadFromString(input);
}
else
{
loadFromFile(input);
}
}
//load From file
void loadFromFile(string filename)
{
ifstream file(filename);
string word;
while (file >> word)
{
processWord(word);
}
}
//load From string
void loadFromString(string input)
{
istringstream iss(input);
string word;
while (iss >> word)
{
processWord(word);
}
}
void addString(string str)
{
processWord(str);
}
void removeString(string str)
{
strings.erase(str);
stringV.erase(remove(stringV.begin(), stringV.end(), str), stringV.end());
}
//clear all strings from the set
void clearSet()
{
strings.clear();
stringV.clear();
}
int size()
{
return strings.size();
}
void outputStrings()
{
for (const auto& str : stringV)
{
cout << str << " ";
}
cout << endl;
}
//overload "+" operator for set union
StringSet operator+(StringSet other) const
{
StringSet result = *this;
result.strings.insert(other.strings.begin(), other.strings.end());
result.updatestringV();
return result;
}
//overload "*" operator for set intersection
StringSet operator*(StringSet other) const
{
StringSet result;
for (const auto& str : stringV)
{
if (other.strings.count(str) > 0)
{
result.strings.insert(str);
result.stringV.push_back(str);
}
}
return result;
}
//Compute Simliarity by use the equation --> The size of set of common words / (sqrt size of D * sqrt size of Q)
double computeSimilarity(StringSet other)
{
StringSet QintersectD = *this * other;
double intersectionSize = (double)(QintersectD.size());
double denominator = sqrt((double)(size()) * (double)(other.size()));
return intersectionSize / denominator;
}
private:
//Process the word by converting to lowercase and adding to the set if not already present
void processWord(string& word)
{
string processedWord;
for (char& c : word)
{
if (isalpha(c))
{
processedWord += tolower(c);
}
}
if (!processedWord.empty() && strings.insert(processedWord).second)
{
stringV.push_back(processedWord);
}
}
// Update the vector representation of the set to reflect the current set elements
void updatestringV()
{
stringV.clear();
for (const auto& str : strings)
{
stringV.push_back(str);
}
}
};