I'm trying to diff two strings to determine whether or not they solely vary in one numerical subset of the string structure; for example,
varies_in_single_number_field('foo7bar', 'foo123bar')
# Returns True, because 7 != 123, and there's only one varying
# number region between the two strings.
In Python I can use the difflib
to accomplish this:
import difflib, doctest
def varies_in_single_number_field(str1, str2):
"""
A typical use case is as follows:
>>> varies_in_single_number_field('foo7bar00', 'foo123bar00')
True
Numerical variation in two dimensions is no good:
>>> varies_in_single_number_field('foo7bar00', 'foo123bar01')
False
Varying in a nonexistent field is okay:
>>> varies_in_single_number_field('foobar00', 'foo123bar00')
True
Identical strings don't *vary* in any number field:
>>> varies_in_single_number_field('foobar00', 'foobar00')
False
"""
in_differing_substring = False
passed_differing_substring = False # There should be only one.
differ = difflib.Differ()
for letter_diff in differ.compare(str1, str2):
letter = letter_diff[2:]
if letter_diff.startswith(('-', '+')):
if passed_differing_substring: # Already saw a varying field.
return False
in_differing_substring = True
if not letter.isdigit(): return False # Non-digit diff character.
elif in_differing_substring: # Diff character not found - end of diff.
in_differing_substring = False
passed_differing_substring = True
return passed_differing_substring # No variation if no diff was passed.
if __name__ == '__main__': doctest.testmod()
But I have no idea how to find something like difflib
for C++. Alternative approaches welcome. :)
This might work, it at least passes your demonstration test: EDIT: I've made some modifications to deal with some string indexing issues. I believe it should be good now.
#include <iostream>
#include <string>
#include <vector>
#include <algorithm>
#include <cctype>
bool starts_with(const std::string &s1, const std::string &s2) {
return (s1.length() <= s2.length()) && (s2.substr(0, s1.length()) == s1);
}
bool ends_with(const std::string &s1, const std::string &s2) {
return (s1.length() <= s2.length()) && (s2.substr(s2.length() - s1.length()) == s1);
}
bool is_numeric(const std::string &s) {
for(std::string::const_iterator it = s.begin(); it != s.end(); ++it) {
if(!std::isdigit(*it)) {
return false;
}
}
return true;
}
bool varies_in_single_number_field(std::string s1, std::string s2) {
size_t index1 = 0;
size_t index2 = s1.length() - 1;
if(s1 == s2) {
return false;
}
if((s1.empty() && is_numeric(s2)) || (s2.empty() && is_numeric(s1))) {
return true;
}
if(s1.length() < s2.length()) {
s1.swap(s2);
}
while(index1 < s1.length() && starts_with(s1.substr(0, index1), s2)) { index1++; }
while(ends_with(s1.substr(index2), s2)) { index2--; }
return is_numeric(s1.substr(index1 - 1, (index2 + 1) - (index1 - 1)));
}
int main() {
std::cout << std::boolalpha << varies_in_single_number_field("foo7bar00", "foo123bar00") << std::endl;
std::cout << std::boolalpha << varies_in_single_number_field("foo7bar00", "foo123bar01") << std::endl;
std::cout << std::boolalpha << varies_in_single_number_field("foobar00", "foo123bar00") << std::endl;
std::cout << std::boolalpha << varies_in_single_number_field("foobar00", "foobar00") << std::endl;
std::cout << std::boolalpha << varies_in_single_number_field("7aaa", "aaa") << std::endl;
std::cout << std::boolalpha << varies_in_single_number_field("aaa7", "aaa") << std::endl;
std::cout << std::boolalpha << varies_in_single_number_field("aaa", "7aaa") << std::endl;
std::cout << std::boolalpha << varies_in_single_number_field("aaa", "aaa7") << std::endl;
}
Basically, it looks for a string which has 3 parts, string2 begins with part1, string2 ends with part3 and part2 is only digits.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With