Is there a way to make Boost tokenizer to split below string without splitting quoted part?
string s = "1st 2nd \"3rd with some comment\" 4th";
Exptected output:
1st
2nd
3rd with some comment
4th
You can use an escaped_list_separator from the tokenizer library. See this question for more details on how to apply it to your problem.
C++11 solution
#include <iostream>
#include <string>
#include <vector>
std::vector<std::string> tokenize(const std::string& str) {
    std::vector<std::string> tokens;
    std::string buffer;
    std::string::const_iterator iter = str.cbegin();
    bool in_string = false;
    while (iter != str.cend()) {
        char c = *iter;
        if (c == '"') {
            if (in_string) {
                tokens.push_back(buffer);
                buffer.clear();
            }
            in_string = !in_string;
        } else if (c == ' ') {
            if (in_string) {
                buffer.push_back(c);
            } else {
                if (!buffer.empty()) {
                    tokens.push_back(buffer);
                    buffer.clear();
                }
            }
        } else {
            buffer.push_back(c);
        }
        ++iter;
    }
    if (!buffer.empty()) {
        tokens.push_back(buffer);
    }
    return tokens;
}
int main() {
    std::string s = "1st 2nd \"3rd with some comment\" 4th";
    std::vector<std::string> tokens = tokenize(s);
    for (auto iter = tokens.cbegin(); iter != tokens.cend(); ++iter) {
        std::cout << *iter << "\n";
    }
}
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With