From 8d81611f31b4fa2f066087cb6de6930705a7c57c Mon Sep 17 00:00:00 2001 From: William Eiers Date: Sun, 7 Jul 2024 10:03:47 -0400 Subject: [PATCH] added compare regexes qualitatitive comparison --- src/interface/Driver.cpp | 37 +++++++++ src/interface/Driver.h | 1 + src/main.cpp | 5 ++ src/theory/Automaton.cpp | 135 +++++++++++++++++++++++++++++--- src/theory/Automaton.h | 1 + src/utils/RegularExpression.cpp | 62 +++++++++++++++ src/utils/RegularExpression.h | 1 + 7 files changed, 230 insertions(+), 12 deletions(-) diff --git a/src/interface/Driver.cpp b/src/interface/Driver.cpp index e444566..b9ce547 100644 --- a/src/interface/Driver.cpp +++ b/src/interface/Driver.cpp @@ -121,6 +121,9 @@ void Driver::InitializeSolver() { constraint_sorter.start(); } + // Solver::NormalizationRenamer renamer(script_, symbol_table_); + // renamer.start(); + } void Driver::Solve() { @@ -663,6 +666,40 @@ std::vector Driver::MeasureDistance(std::string var_name, st return results; } +// 'qualitative' comparison results +// ONLY FOR STRING AUTOMATON: ENFORCES ONLY PRINTABLE ASCII CHARACTERS +std::vector Driver::CompareRegexes(std::string var_name, std::string var_regex) { + auto var = symbol_table_->get_variable(var_name); + auto var_value = symbol_table_->get_value_at_scope(script_,var); + auto var_value_auto = var_value->getStringAutomaton(); + auto projected_var_value_auto = var_value_auto->GetAutomatonForVariable(var_name); + auto printable_ascii_auto = Theory::StringAutomaton::MakeRegexAuto("[ -~]*"); + auto tmp = projected_var_value_auto->Intersect(printable_ascii_auto); + + delete projected_var_value_auto; projected_var_value_auto = nullptr; + delete printable_ascii_auto; printable_ascii_auto = nullptr; + var_value_auto = tmp; + + auto regex_from_dfa = var_value_auto->DFAToRE(); + auto regex_from_llm = new Util::RegularExpression(var_regex); + + std::vector results; + results.push_back("report regex_from_dfa: " + regex_from_dfa->str() + '\n'); + results.push_back("report regex_from_llm: " + regex_from_llm->str() + '\n'); + results.push_back("report ops_regex_from_dfa: " + std::to_string(regex_from_dfa->ops()) + '\n'); + results.push_back("report ops_regex_from_llm: " + std::to_string(regex_from_llm->ops()) + '\n'); + results.push_back("report length_regex_from_dfa: " + std::to_string(regex_from_dfa->str().length()) + '\n'); + results.push_back("report length_regex_from_llm: " + std::to_string(regex_from_llm->str().length()) + '\n'); + + delete regex_from_llm; + regex_from_llm = nullptr; + + delete regex_from_dfa; + regex_from_dfa = nullptr; + + return results; +} + void Driver::set_option(const Option::Name option) { switch (option) { case Option::Name::USE_SIGNED_INTEGERS: diff --git a/src/interface/Driver.h b/src/interface/Driver.h index 0b1fa34..1299693 100644 --- a/src/interface/Driver.h +++ b/src/interface/Driver.h @@ -107,6 +107,7 @@ class Driver { std::vector GetSimpleRegexes(std::string re_var, int num_regexes = 1, int alpha = 0, int omega = 0); std::vector GetNumRandomModels(std::vector model_variables, unsigned long num_random_models, int min, int max); std::vector MeasureDistance(std::string variable, std::string regex, int bound); + std::vector CompareRegexes(std::string variable, std::string regex); void set_option(const Option::Name option); void set_option(const Option::Name option, const int value); diff --git a/src/main.cpp b/src/main.cpp index 337bf98..e225af4 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -413,6 +413,11 @@ int main(const int argc, const char **argv) { LOG(INFO) << "report baseline_not_synthesized:" << results[2]; LOG(INFO) << "report not_baseline_synthesized:" << results[3]; + auto qual_results = driver.CompareRegexes(regex_compare_variable, line); + for(auto q : qual_results) { + LOG(INFO) << q; + } + delete regex_file; } diff --git a/src/theory/Automaton.cpp b/src/theory/Automaton.cpp index 2881834..9be33e5 100644 --- a/src/theory/Automaton.cpp +++ b/src/theory/Automaton.cpp @@ -2836,28 +2836,16 @@ Util::RegularExpression_ptr Automaton::DFAToRE() { delete transition_table[i+1][j+1]; transition_table[i+1][j+1] = tmp_regex; } else { - // LOG(INFO) << "NOT SAME"; auto it_vec = std::vector(it.begin(),it.end()); it_vec.pop_back(); - // for(auto ii : it_vec) { - // LOG(INFO) << ii; - // } - std::vector decoded_transitions = decodeException(it_vec); for(auto decoded_it : decoded_transitions) { - //unsigned char ch = strtobin(&it[0],8); std::string str; - //str += static_cast(ch); - // LOG(INFO) << int(decoded_it); str += static_cast(decoded_it); - // LOG(INFO) << str << " , " << decoded_it; - auto it_regex = Util::RegularExpression::makeString(str); - // LOG(INFO) << "it_regex = " << it_regex->str(); auto tmp_regex = Util::RegularExpression::makeUnion(transition_table[i+1][j+1]->clone(),it_regex->clone()); - // LOG(INFO) << "tmp_regex = " << tmp_regex->str(); delete it_regex; delete transition_table[i+1][j+1]; transition_table[i+1][j+1] = tmp_regex; @@ -3026,6 +3014,129 @@ Util::RegularExpression_ptr Automaton::DFAToRE() { return transition_table[0][final_state]; } +// Util::RegularExpression_ptr Automaton::DFA2Reg(const DFA_ptr dfa, const int number_of_bdd_variables) { + + +// // convert dfa to table +// int num_states = dfa->ns + 2; // new initial and final state +// int to_state = -1; +// paths state_paths = nullptr, pp = nullptr; +// trace_descr tp = nullptr; +// int* indices = Automaton::GetBddVariableIndices(number_of_bdd_variables); +// int sink = DFAGetSinkState(dfa); +// Util::RegularExpression_ptr dfa_table[num_states][num_states]; +// for(int i = 0; i < num_states; i++) { +// for(int j = 0; j < num_states; j++) { +// dfa_table[i][j] = Util::RegularExpression::makeEmpty(); +// } +// } + +// // new initial state +// dfa_table[0][1] = Util::RegularExpression::makeString(""); + +// for(int i = 1; i < num_states-1; i++) { +// // if final, add empty string transition to new final state +// if(dfa->f[i-1] == 1) { +// dfa_table[i][num_states-1] = Util::RegularExpression::makeString(""); +// } +// state_paths = pp = make_paths(dfa->bddm, dfa->q[i-1]); +// while (pp) { +// if (sink && pp->to == (unsigned)sink) { +// pp = pp->next; +// continue; +// } +// to_state = pp->to; +// std::string current_exception = ""; +// for (int j = 0; j < number_of_bdd_variables; j++) { +// for (tp = pp->trace; tp && (tp->index != (unsigned)indices[j]); tp = tp->next); +// if (tp) { +// if (tp->value) { +// current_exception.push_back('1'); +// } else { +// current_exception.push_back('0'); +// } +// } else { +// current_exception.push_back('X'); +// } +// } + +// // convert symbolic path to regex +// LOG(INFO) << current_exception; +// std::vector transitions = Automaton::ExpandException(current_exception); +// // Util::RegularExpression_ptr current_regex = Util::RegularExpression::makeString(transitions[0]); +// for(int j = 0; j < transitions.size(); j++) { +// LOG(INFO) << " -> " << transitions[j]; +// auto r1 = Util::RegularExpression::makeString(transitions[j]); +// auto r2 = dfa_table[i][to_state+1]; +// dfa_table[i][to_state+1] = Util::RegularExpression::makeUnion(r1,r2); +// // delete r1; r1 = nullptr; +// // delete r2; r2 = nullptr; +// } +// // LOG(INFO) << dfa_table[i][to_state+1]->str(); +// // std::cin.get(); + +// tp = nullptr; +// pp = pp->next; +// } +// } + +// LOG(INFO) << "---------------------"; +// std::cin.get(); + +// // now that dfa is a table, apply extraction algo until 2 states left (initial and final) +// // since initial is guaranteed to be 0 and final is num_states-1, just iterate between them +// for(int i = 1; i < num_states-1; i++) { +// // loop through states from incoming transitions (q_in) +// for(int j = 0; j < num_states; j++) { +// if(i != j && dfa_table[j][i]->type() != Util::RegularExpression::Type::EMPTY) { +// // loop through states from outgoing transitions (q_out) +// for(int k = 0; k < num_states; k++) { +// if(i != k) { +// // gather transitions +// auto r_in = dfa_table[j][i]; +// auto r_out = dfa_table[i][k]; +// auto r_rip = dfa_table[i][i]; +// auto r_dir = dfa_table[j][k]; + +// // remove transitions +// dfa_table[j][i] = Util::RegularExpression::makeEmpty(); +// dfa_table[i][k] = Util::RegularExpression::makeEmpty(); +// dfa_table[i][i] = Util::RegularExpression::makeEmpty(); + +// // r_dir = r_dir + r_in . (r_rip)* . r_out +// auto r1 = Util::RegularExpression::makeRepeatStar(r_rip); +// // delete r_rip; r_rip = nullptr; + +// auto r2 = Util::RegularExpression::makeConcatenation(r_in, r1); +// // delete r_in; r_in = nullptr; +// // delete r1; r1 = nullptr; + +// auto r3 = Util::RegularExpression::makeConcatenation(r2,r_out); +// // delete r_out; r_out = nullptr; +// // delete r2; r2 = nullptr; + +// auto r4 = Util::RegularExpression::makeUnion(r_dir, r3); +// // delete r_dir; r_dir = nullptr; +// // delete r3; r3 = nullptr; + +// dfa_table[j][k] = r4; +// } +// } +// } +// } +// } + +// // cleanup +// auto regex = dfa_table[0][num_states-1]->clone(); +// for(int i = 0; i < num_states; i++) { +// for(int j = 0; j < num_states; j++) { +// delete dfa_table[i][j]; dfa_table[i][j] = nullptr; +// } +// } + +// return regex; +// } + void Automaton::getTransitionCharsHelper(pCharPair result[], char* transitions, int* indexInResult, int currentBit, int var){ int i; boolean allX; diff --git a/src/theory/Automaton.h b/src/theory/Automaton.h index 2c2c534..b8f32bf 100644 --- a/src/theory/Automaton.h +++ b/src/theory/Automaton.h @@ -236,6 +236,7 @@ class Automaton { Util::RegularExpression_ptr DFAToRE(); + friend std::ostream& operator<<(std::ostream& os, const Automaton& automaton); static void CleanUp(); diff --git a/src/utils/RegularExpression.cpp b/src/utils/RegularExpression.cpp index 96216b9..a36f904 100644 --- a/src/utils/RegularExpression.cpp +++ b/src/utils/RegularExpression.cpp @@ -1034,6 +1034,68 @@ void RegularExpression::set_escape(bool escape) { escape_ = escape; } +int RegularExpression::ops() { + switch(type_) { + case RegularExpression::Type::NONE: + return 0; + break; + case RegularExpression::Type::UNION: + return 1 + exp1_->ops() + exp2_->ops(); + break; + case RegularExpression::Type::CONCATENATION: + return 1 + exp1_->ops() + exp2_->ops(); + break; + case RegularExpression::Type::INTERSECTION: + return 1 + exp1_->ops() + exp2_->ops(); + break; + case RegularExpression::Type::OPTIONAL: + return 1 + exp1_->ops(); + break; + case RegularExpression::Type::REPEAT_STAR: + return 1 + exp1_->ops(); + break; + case RegularExpression::Type::REPEAT_PLUS: + return 1 + exp1_->ops(); + break; + case RegularExpression::Type::REPEAT_MIN: + return 1 + exp1_->ops(); + break; + case RegularExpression::Type::REPEAT_MINMAX: + return 1 + exp1_->ops(); + break; + case RegularExpression::Type::COMPLEMENT: + return 1 + exp1_->ops(); + break; + case RegularExpression::Type::CHAR: + return 1; + break; + case RegularExpression::Type::CHAR_RANGE: + return 1; + break; + case RegularExpression::Type::ANYCHAR: + return 1; + break; + case RegularExpression::Type::EMPTY: + return 1; + break; + case RegularExpression::Type::STRING: + return 1; + break; + case RegularExpression::Type::ANYSTRING: + return 1; + break; + case RegularExpression::Type::AUTOMATON: + return 1; + break; + case RegularExpression::Type::INTERVAL: + return 1; + break; + default: + LOG(FATAL) << "can't determine number of obs for undefined type"; + return 0; + } +} + std::ostream& operator<<(std::ostream& os, const RegularExpression& regex) { return os << regex.str(); } diff --git a/src/utils/RegularExpression.h b/src/utils/RegularExpression.h index 659dcfc..c257ab6 100644 --- a/src/utils/RegularExpression.h +++ b/src/utils/RegularExpression.h @@ -171,6 +171,7 @@ class RegularExpression { std::vector enumerate(); void simplify(int alpha, int omega, int depth); void set_escape(bool escape); + int ops(); friend std::ostream& operator<<(std::ostream& os, const RegularExpression& regex); private: