#include <range/v3/view/chunk_by.hpp>
#include <range/v3/view/split_when.hpp>
import std;
struct case_insensitive_hash{
constexpr std::size_t operator()(std::string_view text) const{
return std::accumulate(text.begin(), text.end(), 0UZ, [](std::size_t acc, char c){
return acc ^ std::hash<char>{}(c) + 0x9e3779b9 + (acc << 6) + (acc >> 2);
});
}
};
struct case_insensitive_equal{
constexpr bool operator()(std::string_view lhs, std::string_view rhs) const{
if (lhs.size() != rhs.size()){
return false;
}
return std::ranges::equal(lhs, rhs, [](char c1, char c2) {
return std::tolower(c1) == std::tolower(c2);
});
}
};
std::string read_file(const std::filesystem::path &path){
std::fstream file { path };
std::stringstream buffer;
buffer << file.rdbuf();
return buffer.str();
}
std::string to_lower(std::string text){
std::ranges::for_each(text, [](char &c){
c = std::tolower(c);
});
return text;
}
int main(int argc, char **argv) {
auto text = read_file(argv[1]);
auto words = text
| ranges::views::split_when([](char c) {
return !std::isalpha(c);
})
| std::views::transform([](auto &&rng) {
return std::string_view { &*rng.begin(), static_cast<std::size_t>(std::ranges::distance(rng)) };
});
std::unordered_map<std::string_view, std::size_t, case_insensitive_hash, case_insensitive_equal> frequencies;
for (std::string_view word : words){
++frequencies[word];
}
auto sorted_frequencies = frequencies
| std::views::transform([](auto &&kv){
auto &&[key, value] = kv;
return std::make_pair(value, key);
})
| std::ranges::to<std::vector>();
std::ranges::sort(sorted_frequencies, std::greater{}, [](const auto &kv) { return kv.first; });
auto result = sorted_frequencies
| ranges::views::chunk_by([](const auto &kv1, const auto &kv2) {
return kv1.first == kv2.first;
})
| std::views::transform([](auto &&rng) {
return std::make_pair(
rng.front().first,
rng | std::views::values
| std::views::transform([](std::string_view text) {
return to_lower(std::string { text });
})
| std::ranges::to<std::vector>()
);
});
auto header = std::format("{: >10} | {: >10} | {}", "Occurrence", "Count", "Words");
std::println("{}\n{:-^{}}", header, "", header.size());
for (auto &&[frequency, words] : result){
std::println("{: >10} | {: >10} | {}", frequency, words.size(), words);
}
}
No heap allocation except loading text into string, unordered_map construction, and lowering words at the final stage. Hashing and equal comparsion is consistent for character casing.
Compiler: Clang 17
External libraries: range-v3