Skip to content

Commit

Permalink
added compare regexes qualitatitive comparison
Browse files Browse the repository at this point in the history
  • Loading branch information
william-eiers committed Jul 7, 2024
1 parent c5a1420 commit 8d81611
Show file tree
Hide file tree
Showing 7 changed files with 230 additions and 12 deletions.
37 changes: 37 additions & 0 deletions src/interface/Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,9 @@ void Driver::InitializeSolver() {
constraint_sorter.start();
}

// Solver::NormalizationRenamer renamer(script_, symbol_table_);
// renamer.start();

}

void Driver::Solve() {
Expand Down Expand Up @@ -663,6 +666,40 @@ std::vector<Theory::BigInteger> Driver::MeasureDistance(std::string var_name, st
return results;
}

// 'qualitative' comparison results
// ONLY FOR STRING AUTOMATON: ENFORCES ONLY PRINTABLE ASCII CHARACTERS
std::vector<std::string> Driver::CompareRegexes(std::string var_name, std::string var_regex) {
auto var = symbol_table_->get_variable(var_name);
auto var_value = symbol_table_->get_value_at_scope(script_,var);
auto var_value_auto = var_value->getStringAutomaton();
auto projected_var_value_auto = var_value_auto->GetAutomatonForVariable(var_name);
auto printable_ascii_auto = Theory::StringAutomaton::MakeRegexAuto("[ -~]*");
auto tmp = projected_var_value_auto->Intersect(printable_ascii_auto);

delete projected_var_value_auto; projected_var_value_auto = nullptr;
delete printable_ascii_auto; printable_ascii_auto = nullptr;
var_value_auto = tmp;

auto regex_from_dfa = var_value_auto->DFAToRE();
auto regex_from_llm = new Util::RegularExpression(var_regex);

std::vector<std::string> results;
results.push_back("report regex_from_dfa: " + regex_from_dfa->str() + '\n');
results.push_back("report regex_from_llm: " + regex_from_llm->str() + '\n');
results.push_back("report ops_regex_from_dfa: " + std::to_string(regex_from_dfa->ops()) + '\n');
results.push_back("report ops_regex_from_llm: " + std::to_string(regex_from_llm->ops()) + '\n');
results.push_back("report length_regex_from_dfa: " + std::to_string(regex_from_dfa->str().length()) + '\n');
results.push_back("report length_regex_from_llm: " + std::to_string(regex_from_llm->str().length()) + '\n');

delete regex_from_llm;
regex_from_llm = nullptr;

delete regex_from_dfa;
regex_from_dfa = nullptr;

return results;
}

void Driver::set_option(const Option::Name option) {
switch (option) {
case Option::Name::USE_SIGNED_INTEGERS:
Expand Down
1 change: 1 addition & 0 deletions src/interface/Driver.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ class Driver {
std::vector<std::string> GetSimpleRegexes(std::string re_var, int num_regexes = 1, int alpha = 0, int omega = 0);
std::vector<std::string> GetNumRandomModels(std::vector<std::string> model_variables, unsigned long num_random_models, int min, int max);
std::vector<Theory::BigInteger> MeasureDistance(std::string variable, std::string regex, int bound);
std::vector<std::string> CompareRegexes(std::string variable, std::string regex);

void set_option(const Option::Name option);
void set_option(const Option::Name option, const int value);
Expand Down
5 changes: 5 additions & 0 deletions src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,11 @@ int main(const int argc, const char **argv) {
LOG(INFO) << "report baseline_not_synthesized:" << results[2];
LOG(INFO) << "report not_baseline_synthesized:" << results[3];

auto qual_results = driver.CompareRegexes(regex_compare_variable, line);
for(auto q : qual_results) {
LOG(INFO) << q;
}

delete regex_file;
}

Expand Down
135 changes: 123 additions & 12 deletions src/theory/Automaton.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2836,28 +2836,16 @@ Util::RegularExpression_ptr Automaton::DFAToRE() {
delete transition_table[i+1][j+1];
transition_table[i+1][j+1] = tmp_regex;
} else {
// LOG(INFO) << "NOT SAME";
auto it_vec = std::vector<char>(it.begin(),it.end());
it_vec.pop_back();
// for(auto ii : it_vec) {
// LOG(INFO) << ii;
// }

std::vector<char> decoded_transitions = decodeException(it_vec);

for(auto decoded_it : decoded_transitions) {
//unsigned char ch = strtobin(&it[0],8);
std::string str;
//str += static_cast<char>(ch);
// LOG(INFO) << int(decoded_it);
str += static_cast<char>(decoded_it);

// LOG(INFO) << str << " , " << decoded_it;

auto it_regex = Util::RegularExpression::makeString(str);
// LOG(INFO) << "it_regex = " << it_regex->str();
auto tmp_regex = Util::RegularExpression::makeUnion(transition_table[i+1][j+1]->clone(),it_regex->clone());
// LOG(INFO) << "tmp_regex = " << tmp_regex->str();
delete it_regex;
delete transition_table[i+1][j+1];
transition_table[i+1][j+1] = tmp_regex;
Expand Down Expand Up @@ -3026,6 +3014,129 @@ Util::RegularExpression_ptr Automaton::DFAToRE() {
return transition_table[0][final_state];
}

// Util::RegularExpression_ptr Automaton::DFA2Reg(const DFA_ptr dfa, const int number_of_bdd_variables) {


// // convert dfa to table
// int num_states = dfa->ns + 2; // new initial and final state
// int to_state = -1;
// paths state_paths = nullptr, pp = nullptr;
// trace_descr tp = nullptr;
// int* indices = Automaton::GetBddVariableIndices(number_of_bdd_variables);
// int sink = DFAGetSinkState(dfa);
// Util::RegularExpression_ptr dfa_table[num_states][num_states];
// for(int i = 0; i < num_states; i++) {
// for(int j = 0; j < num_states; j++) {
// dfa_table[i][j] = Util::RegularExpression::makeEmpty();
// }
// }

// // new initial state
// dfa_table[0][1] = Util::RegularExpression::makeString("");

// for(int i = 1; i < num_states-1; i++) {
// // if final, add empty string transition to new final state
// if(dfa->f[i-1] == 1) {
// dfa_table[i][num_states-1] = Util::RegularExpression::makeString("");
// }
// state_paths = pp = make_paths(dfa->bddm, dfa->q[i-1]);
// while (pp) {
// if (sink && pp->to == (unsigned)sink) {
// pp = pp->next;
// continue;
// }
// to_state = pp->to;
// std::string current_exception = "";
// for (int j = 0; j < number_of_bdd_variables; j++) {
// for (tp = pp->trace; tp && (tp->index != (unsigned)indices[j]); tp = tp->next);
// if (tp) {
// if (tp->value) {
// current_exception.push_back('1');
// } else {
// current_exception.push_back('0');
// }
// } else {
// current_exception.push_back('X');
// }
// }

// // convert symbolic path to regex
// LOG(INFO) << current_exception;
// std::vector<std::string> transitions = Automaton::ExpandException(current_exception);
// // Util::RegularExpression_ptr current_regex = Util::RegularExpression::makeString(transitions[0]);
// for(int j = 0; j < transitions.size(); j++) {
// LOG(INFO) << " -> " << transitions[j];
// auto r1 = Util::RegularExpression::makeString(transitions[j]);
// auto r2 = dfa_table[i][to_state+1];
// dfa_table[i][to_state+1] = Util::RegularExpression::makeUnion(r1,r2);
// // delete r1; r1 = nullptr;
// // delete r2; r2 = nullptr;
// }
// // LOG(INFO) << dfa_table[i][to_state+1]->str();
// // std::cin.get();

// tp = nullptr;
// pp = pp->next;
// }
// }

// LOG(INFO) << "---------------------";
// std::cin.get();

// // now that dfa is a table, apply extraction algo until 2 states left (initial and final)
// // since initial is guaranteed to be 0 and final is num_states-1, just iterate between them
// for(int i = 1; i < num_states-1; i++) {
// // loop through states from incoming transitions (q_in)
// for(int j = 0; j < num_states; j++) {
// if(i != j && dfa_table[j][i]->type() != Util::RegularExpression::Type::EMPTY) {
// // loop through states from outgoing transitions (q_out)
// for(int k = 0; k < num_states; k++) {
// if(i != k) {
// // gather transitions
// auto r_in = dfa_table[j][i];
// auto r_out = dfa_table[i][k];
// auto r_rip = dfa_table[i][i];
// auto r_dir = dfa_table[j][k];

// // remove transitions
// dfa_table[j][i] = Util::RegularExpression::makeEmpty();
// dfa_table[i][k] = Util::RegularExpression::makeEmpty();
// dfa_table[i][i] = Util::RegularExpression::makeEmpty();

// // r_dir = r_dir + r_in . (r_rip)* . r_out
// auto r1 = Util::RegularExpression::makeRepeatStar(r_rip);
// // delete r_rip; r_rip = nullptr;

// auto r2 = Util::RegularExpression::makeConcatenation(r_in, r1);
// // delete r_in; r_in = nullptr;
// // delete r1; r1 = nullptr;

// auto r3 = Util::RegularExpression::makeConcatenation(r2,r_out);
// // delete r_out; r_out = nullptr;
// // delete r2; r2 = nullptr;

// auto r4 = Util::RegularExpression::makeUnion(r_dir, r3);
// // delete r_dir; r_dir = nullptr;
// // delete r3; r3 = nullptr;

// dfa_table[j][k] = r4;
// }
// }
// }
// }
// }

// // cleanup
// auto regex = dfa_table[0][num_states-1]->clone();
// for(int i = 0; i < num_states; i++) {
// for(int j = 0; j < num_states; j++) {
// delete dfa_table[i][j]; dfa_table[i][j] = nullptr;
// }
// }

// return regex;
// }

void Automaton::getTransitionCharsHelper(pCharPair result[], char* transitions, int* indexInResult, int currentBit, int var){
int i;
boolean allX;
Expand Down
1 change: 1 addition & 0 deletions src/theory/Automaton.h
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,7 @@ class Automaton {

Util::RegularExpression_ptr DFAToRE();


friend std::ostream& operator<<(std::ostream& os, const Automaton& automaton);

static void CleanUp();
Expand Down
62 changes: 62 additions & 0 deletions src/utils/RegularExpression.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1034,6 +1034,68 @@ void RegularExpression::set_escape(bool escape) {
escape_ = escape;
}

int RegularExpression::ops() {
switch(type_) {
case RegularExpression::Type::NONE:
return 0;
break;
case RegularExpression::Type::UNION:
return 1 + exp1_->ops() + exp2_->ops();
break;
case RegularExpression::Type::CONCATENATION:
return 1 + exp1_->ops() + exp2_->ops();
break;
case RegularExpression::Type::INTERSECTION:
return 1 + exp1_->ops() + exp2_->ops();
break;
case RegularExpression::Type::OPTIONAL:
return 1 + exp1_->ops();
break;
case RegularExpression::Type::REPEAT_STAR:
return 1 + exp1_->ops();
break;
case RegularExpression::Type::REPEAT_PLUS:
return 1 + exp1_->ops();
break;
case RegularExpression::Type::REPEAT_MIN:
return 1 + exp1_->ops();
break;
case RegularExpression::Type::REPEAT_MINMAX:
return 1 + exp1_->ops();
break;
case RegularExpression::Type::COMPLEMENT:
return 1 + exp1_->ops();
break;
case RegularExpression::Type::CHAR:
return 1;
break;
case RegularExpression::Type::CHAR_RANGE:
return 1;
break;
case RegularExpression::Type::ANYCHAR:
return 1;
break;
case RegularExpression::Type::EMPTY:
return 1;
break;
case RegularExpression::Type::STRING:
return 1;
break;
case RegularExpression::Type::ANYSTRING:
return 1;
break;
case RegularExpression::Type::AUTOMATON:
return 1;
break;
case RegularExpression::Type::INTERVAL:
return 1;
break;
default:
LOG(FATAL) << "can't determine number of obs for undefined type";
return 0;
}
}

std::ostream& operator<<(std::ostream& os, const RegularExpression& regex) {
return os << regex.str();
}
Expand Down
1 change: 1 addition & 0 deletions src/utils/RegularExpression.h
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ class RegularExpression {
std::vector<std::string> enumerate();
void simplify(int alpha, int omega, int depth);
void set_escape(bool escape);
int ops();

friend std::ostream& operator<<(std::ostream& os, const RegularExpression& regex);
private:
Expand Down

0 comments on commit 8d81611

Please sign in to comment.