== Char counter == * Can you implement a program to read a text file, and count the number of characters in that file? #include #include #include char* countChars(char* fileName) { char* charTable = NULL; std::ifstream infile(fileName); if (!infile.is_open()) { std::cerr << "Failed to open " << fileName << std::endl; return NULL; } else { charTable = new char[UCHAR_MAX]; std::string data; for (int i = 0; i < UCHAR_MAX; i++) { charTable[i] = 0; } while (infile >> data) { for (char c: data) { charTable[c]++; } } } return charTable; } int main(int argc, char** argv) { if (argc != 2) { std::cerr << "Syntax: cmd \n"; return 1; } else { char *charTable = countChars(argv[1]); for (int i = 0; i < UCHAR_MAX; i++) { std::cout << (char)i << ": " << std::to_string(charTable[i]) << std::endl; } return 0; } } * How to improve the program above? * We can use a library instead of user-code to initialize the array: charTable = new char[UCHAR_MAX]; std::fill_n(charTable, UCHAR_MAX, 0); * We can add a conditional to avoid printing entries with zeros: if (charTable[i]) { std::cout << (char)i << ": " << std::to_string(charTable[i]) << std::endl; } == Recursive Trees == * What is a tree? How can you define it? struct TreeNode { TreeNode(const int dd, const TreeNode* ll, const TreeNode *rr): left(ll), right(rr), data(dd) {} const int data; const TreeNode* left; const TreeNode* right; }; * Can you create a function to insert elements in a tree? This function should ensure that all the elements at the left of the root are less than all the elements at the right of the root. const TreeNode* insert(const TreeNode* root, const int data) { if (root) { if (data < root->data) { return new TreeNode(root->data, insert(root->left, data), root->right); } else if (data > root->data) { return new TreeNode(root->data, root->left, insert(root->right, data)); } else { return root; } } else { return new TreeNode(data, NULL, NULL); } } * Can you create a documented interface for this method? /** * This function inserts a new element into the tree rooted at 'root'. Insertion * preserves the search property. That is to say, elements less than the root * data are inserted on the left of the root, and elements greater than the root * data are inserted on the right of the root. * @param root the root of the tree where the element will be inserted. * @param data the data that is about to be inserted into the tree. */ const TreeNode* insert(const TreeNode* root, const int data); * What do we need to test this method? * We need to have a way to traverse and print the elements in the tree. Can you write such a method? void inOrder(const TreeNode* root) { if (root) { inOrder(root->left); std::cout << root->data << std::endl; inOrder(root->right); } } * If we assume that elements on the left are less than the root data, and elements to the right are greater, can you prove that this method prints all the elements in the tree in sorted order? * Can you create a documented interface for this method? /** * This method traverses the tree in-order. This traversal ordering ensures * that elements on the left branch are visited before the root, and elements * on the right branch are visited after the node. * @param root the root of the tree that is about to be traversed. */ void inOrder(const TreeNode* root); * Can you write a method that creates a tree out of numbers in a file? const TreeNode* create(char* fileName) { std::ifstream infile(fileName); if (!infile.is_open()) { std::cerr << "Failed to open " << fileName << std::endl; return NULL; } int data = 0; const TreeNode* root = NULL; while (infile >> data) { root = insert(root, data); } return root; } * Can you write a harness to test 'create' and 'inOrder'? int main(int argc, char** argv) { if (argc != 2) { std::cerr << "Syntax: cmd \n"; return 1; } else { const TreeNode *root = create(argv[1]); inOrder(root); return 0; } } * Again, why are the elements printed always in sorted order? * Can you write a function to give you the size of a tree? int size(const TreeNode* root) { return root ? 1 + size(root->left) + size(root->right) : 0; } * Write a driver to test it: int main(int argc, char** argv) { if (argc != 2) { std::cerr << "Syntax: cmd \n"; return 1; } else { const TreeNode *root = create(argv[1]); inOrder(root); std::cout << "Size = " << size(root) << std::endl; return 0; } } * How can we define the height of a tree? - The length of the longest path from root to some leaf. * Can you write a function to compute the height of a tree? int height(const TreeNode* root) { if (root) { const int leftHeight = height(root->left); const int rightHeight = height(root->right); const int maxHeight = leftHeight > rightHeight ? leftHeight : rightHeight; return 1 + maxHeight; } else { return 0; } } * Can you write code to test this program? int main(int argc, char** argv) { if (argc != 2) { std::cerr << "Syntax: cmd \n"; return 1; } else { const TreeNode *root = create(argv[1]); inOrder(root); std::cout << "Size = " << size(root) << std::endl; std::cout << "Height = " << height(root) << std::endl; return 0; } } * We could use both techniques to sort numbers: a linked list, or a tree. Which method is better? #include #include "tree.h" #include "list.h" int main(int argc, char** argv) { if (argc != 4) { std::cerr << "Syntax: cmd \n"; return 1; } else { const int num_ints = atoi(argv[1]); const int largest_int = atoi(argv[2]); const char mode = argv[3][0]; if (mode == 'l') { // Test the list: const Node *l = NULL; for (int i = 0; i < num_ints; i++) { l = new Node(rand() % largest_int, l); } const Node *ls = mergeSort(l); } else { // Test the tree: const TreeNode *t = NULL; for (int i = 0; i < num_ints; i++) { t = insert(t, rand() % largest_int); } } return 0; } } $ time ./a.out 60000 10000 l real 0m0.179s user 0m0.161s sys 0m0.013s $ time ./a.out 60000 10000 t real 0m0.108s user 0m0.094s sys 0m0.011s * Why is the tree based technique so much faster? * Is the tree based technique always faster? // Not really. Imagine that we insert the elements already in order: #include #include "tree.h" #include "list.h" int main(int argc, char** argv) { if (argc != 4) { std::cerr << "Syntax: cmd \n"; return 1; } else { const int num_ints = atoi(argv[1]); const int largest_int = atoi(argv[2]); const char mode = argv[3][0]; if (mode == 'l') { // Test the list: const Node *l = NULL; for (int i = 0; i < num_ints; i++) { l = new Node(i, l); } const Node *ls = mergeSort(l); } else { // Test the tree: const TreeNode *t = NULL; for (int i = 0; i < num_ints; i++) { t = insert(t, i); } } return 0; } } * How would be the relative runtime in this case? $ time ./a.out 10000 10000 t real 0m5.334s user 0m4.686s sys 0m0.634s $ time ./a.out 10000 10000 l real 0m0.048s user 0m0.028s sys 0m0.007s * Why the tree performs so badly when the tree is already sorted? == Bit Sets == * Do you remember the fundamental set operations? - insert - remove - contains - new * Can you define this interface in a header file? struct Set { Set (unsigned); bool contains(unsigned); void insert(unsigned); void remove(unsigned); private: char *_data; unsigned _capacity; }; * Can you create a program to test this code using assertions? #include #include "set.h" int main() { Set s0(10); s0.insert(1); s0.insert(3); assert(s0.contains(1) == 1); assert(s0.contains(2) == 0); assert(s0.contains(3) == 1); s0.remove(3); assert(s0.contains(1) == 1); assert(s0.contains(2) == 0); assert(s0.contains(3) == 0); } * Can you implement this type? You can start with a minimum code that passes some of the tests. #include "set.h" Set::Set(unsigned capacity) : _capacity(capacity) {} bool Set::contains(unsigned element) { return false; } void Set::insert(unsigned element) {} void Set::remove(unsigned element) {} * Can you now implement code to pass over the tests? Try to use a bit map. #include #include "set.h" Set::Set(unsigned capacity) : _capacity(capacity) { const unsigned size = _capacity/CHAR_BIT + 1; _data = new char[size]; for (int i = 0; i < size; i++) { _data[i] = false; } } bool Set::contains(unsigned element) { unsigned index = element / CHAR_BIT; char offset = element % CHAR_BIT; char bit = 1 << offset; return _data[index] & bit; } void Set::insert(unsigned element) { unsigned index = element / CHAR_BIT; char offset = element % CHAR_BIT; char bit = 1 << offset; char mask = _data[index] | bit; _data[index] = mask; } void Set::remove(unsigned element) { unsigned index = element / CHAR_BIT; char offset = element % CHAR_BIT; char bit = 1 << offset; char mask = _data[index] & ~bit; _data[index] = mask; } * What do you think about this program below? Should it work? #include #include "set.h" int main() { Set s0(10); s0.insert(12); assert(s0.contains(12) == 1); }