STL Set Operations

A summary with code samples of the set operations that come with STL: union, intersection, symmetrical difference and set difference. For consistency, the two sets of integer vectors used in each example are the same and are: [code language="cpp"] vals1 = { 1, 2, 4 } vals2 = { 1, 2, 5, 7 }; [/code] Unions The union of two sets is formed by the elements that are present in either one of the sets, or in both. [code language="cpp"] #include <iostream> #include <algorithm> #include <iterator> #include <string> #include <vector> typedef std::vector<int>::iterator iter; template<typename T, typename InputIterator> void Print(std::ostream& ostr, InputIterator itbegin, InputIterator itend, const std::string& delimiter) { std::copy(itbegin, itend, std::ostream_iterator<T>(ostr, delimiter.c_str())); } int main() { // Initialise sample data sets and vectors int vals1[] = { 1, 2, 4 }; int vals2[] = { 1, 2, 5, 7 }; int size1 = sizeof( vals1 ) / sizeof ( vals1[ 0 ] ); int size2 = sizeof( vals2 ) / sizeof ( vals2[ 0 ] ); std::vector<int> v1( vals1, vals1 + size1 ); std::vector<int> v2( vals2, vals2 + size2 ); std::vector<int> v( size1 + size2 ); std::vector<int>::iterator it_union; std::vector<int>::iterator it; // Sort the vectors (essential) std::sort( v1.begin(), v1.end() ); std::sort( v2.begin(), v2.end() ); // Find the union of the two vector sets: elements present in either // of the sets or both it_union = set_union ( v1.begin(), v1.end(), v2.begin(), v2.end(), v.begin()); int union_size = int( it_union - v.begin() ); std::cout << "Union size = " << union_size << std::endl; // Print the union Print<int, iter>( std::cout, v.begin(), it_union, " " ); return 0; } [/code] Output: [code language="txt"] Union size = 5 1 2 4 5 7 [/code] Intersections Intersections consist of elements present in both sets at the same time. [code language="cpp"] #include <iostream> #include <algorithm> #include <iterator> #include <string> #include <vector> typedef std::vector<int>::iterator iter; template<typename T, typename InputIterator> void Print(std::ostream& ostr, InputIterator itbegin, InputIterator itend, const std::string& delimiter) { std::copy(itbegin, itend, std::ostream_iterator<T>(ostr, delimiter.c_str())); } int main() { // Initialise sample data sets and vectors int vals1[] = { 1, 2, 4 }; int vals2[] = { 1, 2, 5, 7 }; int size1 = sizeof( vals1 ) / sizeof ( vals1[ 0 ] ); int size2 = sizeof( vals2 ) / sizeof ( vals2[ 0 ] ); std::vector<int> v1( vals1, vals1 + size1 ); std::vector<int> v2( vals2, vals2 + size2 ); std::vector<int> v( size1 + size2 ); std::vector<int>::iterator it_intersect; std::vector<int>::iterator it; // Sort the vectors (essential) std::sort( v1.begin(), v1.end() ); std::sort( v2.begin(), v2.end() ); // Find the intersection of the two vector sets: elements present in both sets it_intersect = set_intersection( v1.begin(), v1.end(), v2.begin(), v2.end(), v.begin()); int intersect_size = int( it_intersect - v.begin() ); std::cout << "Intersection size = " << intersect_size << std::endl; // Print the intersection Print<int, iter>( std::cout, v.begin(), it_intersect, " " ); return 0; } [/code] Output: [code language="txt"] Intersection size = 2 1 2 [/code] Symmetric Differences The set of elements that are present in one of the sets, but not in the other. [code language="cpp"] #include <iostream> #include <algorithm> #include <iterator> #include <string> #include <vector> typedef std::vector<int>::iterator iter; template<typename T, typename InputIterator> void Print(std::ostream& ostr, InputIterator itbegin, InputIterator itend, const std::string& delimiter) { std::copy(itbegin, itend, std::ostream_iterator<T>(ostr, delimiter.c_str())); } int main() { // Initialise sample data sets and vectors int vals1[] = { 1, 2, 4 }; int vals2[] = { 1, 2, 5, 7 }; int size1 = sizeof( vals1 ) / sizeof ( vals1[ 0 ] ); int size2 = sizeof( vals2 ) / sizeof ( vals2[ 0 ] ); std::vector<int> v1( vals1, vals1 + size1 ); std::vector<int> v2( vals2, vals2 + size2 ); std::vector<int> v( size1 + size2 ); std::vector<int>::iterator it_symm_diff; std::vector<int>::iterator it; // Sort the vectors (essential) std::sort( v1.begin(), v1.end() ); std::sort( v2.begin(), v2.end() ); // The symmetric difference of two sets is formed by the elements that are present in one // of the sets, but not in the other. it_symm_diff = set_symmetric_difference( v1.begin(), v1.end(), v2.begin(), v2.end(), v.begin()); int symm_diff_size = int( it_symm_diff - v.begin() ); std::cout << "Symmetric difference size = " << symm_diff_size << std::endl; // Print the symmetric difference Print<int, iter>( std::cout, v.begin(), it_symm_diff, " " ); return 0; } [/code] Output: [code language="txt"] Symmetric difference size = 3 4 5 7 [/code] Set Differences The elements that present in the first set, but not in the second one. [code language="cpp"] #include <iostream> #include <algorithm> #include <iterator> #include <string> #include <vector> typedef std::vector<int>::iterator iter; template<typename T, typename InputIterator> void Print(std::ostream& ostr, InputIterator itbegin, InputIterator itend, const std::string& delimiter) { std::copy(itbegin, itend, std::ostream_iterator<T>(ostr, delimiter.c_str())); } int main() { // Initialise sample data sets and vectors int vals1[] = { 1, 2, 4 }; int vals2[] = { 1, 2, 5, 7 }; int size1 = sizeof( vals1 ) / sizeof ( vals1[ 0 ] ); int size2 = sizeof( vals2 ) / sizeof ( vals2[ 0 ] ); std::vector<int> v1( vals1, vals1 + size1 ); std::vector<int> v2( vals2, vals2 + size2 ); std::vector<int> v( size1 + size2 ); std::vector<int>::iterator it_set_diff; std::vector<int>::iterator it; // Sort the vectors (essential) std::sort( v1.begin(), v1.end() ); std::sort( v2.begin(), v2.end() ); // The set difference of two sets is formed by the elements that are present in the first // sets, but not in the second. it_set_diff = set_difference( v1.begin(), v1.end(), v2.begin(), v2.end(), v.begin()); int set_diff_size = int( it_set_diff - v.begin() ); std::cout << "Set difference size = " << set_diff_size << std::endl; // Print the set difference Print<int, iter>( std::cout, v.begin(), it_set_diff, " " ); return 0; } [/code] Output: [code language="txt"] Set difference size = 1 4 [/code] Practical example: using union / set intersection to calculate the Jaccard Index The Jaccard index is a means of measuring the similarity and/or diversity of sample sets: [code language="cpp"] #include <iostream> #include <vector> #include <algorithm> #include <iterator> #include <string> class Token { private: std::string str; public: Token() {} Token( std::string val ) : str( val ) {} std::string Word() const { return str; } }; struct Ascending { bool operator() ( Token& start, Token& end ) { return start.Word() < end.Word(); } }; int main() { std::string str1[] = { "The", "crazy", "cat", "sat", "on", "the", "mat" }; std::string str2[] = { "The", "cat", "sat", "on", "the", "red", "mat" }; std::vector<Token>::iterator tok_intersect, tok_union; int size1 = sizeof( str1 ) / sizeof( str1[ 0 ] ); int size2 = sizeof( str2 ) / sizeof( str2[ 0 ] ); std::vector<Token> tokens1( str1, str1 + size1 ); std::vector<Token> tokens2( str2, str2 + size2 ); std::vector<Token> tokens( size1 + size2 ); std::sort( tokens1.begin(), tokens1.end(), Ascending() ); std::sort( tokens2.begin(), tokens2.end(), Ascending() ); tok_intersect = std::set_intersection( tokens1.begin(), tokens1.end(), tokens2.begin(), tokens2.end(), tokens.begin(), Ascending() ); int intersect_size = int( tok_intersect - tokens.begin() ); tok_union = std::set_union ( tokens1.begin(), tokens1.end(), tokens2.begin(), tokens2.end(), tokens.begin(), Ascending() ); int union_size = int( tok_union - tokens.begin() ); double JaccardIndex = (double) intersect_size / (double) union_size; return 0; } [/code] In this example, the size of the union set is 8, since there are 8 words in either one of the sets, or in both: "The", "crazy", "cat", "sat", "on", "the", "red", "mat". The intersection size is 6 since there are 6 words present in both sets at the same time: "The", "cat", "sat", "on", "the", "mat". Thus the Jaccard Index is calculated as 6 / 8 = 0.75.

Comments

Popular posts from this blog

Using the Supervisor Controller Pattern to access View controls in MVVM

Getting started with client-server applications in C++

How to send an e-mail via Google SMTP using C#