Find k max elements in array of N size (C/C++)

findKtop.cpp

First, we will see a version that uses an std::vector, thus it’s for C++. Then, we will see a version with arrays, that can be used both in C and C++.

#include <iostream>
#include <vector>

typedef float FT;
void swap(FT *a, FT *b) {
  *a = *a + *b;
  *b = *a - *b;
  *a = *a - *b;
}

void minHeapify(FT a[], int size, int i) {
  int l = 2* i ;
  int r = 2* i + 1;
  int smallest = i;
  if (l < size && a[l] < a[smallest])
    smallest = l;
  if (r < size && a[r] < a[smallest])
    smallest = r;
  if (smallest != i) {
    swap(&a[i], &a[smallest]);
    minHeapify(a, size, smallest);
  }

}

void buildMinHeap(FT a[], int size) {
  int i;
  for (i = size / 2; i >= 0; i--)
    minHeapify(a, size, i);

}

int kthLargest(std::vector<FT> a, int size, int k) {
  FT minHeap[k];
  int i;
  for (i = 0; i < k; i++)
    minHeap[i] = a[i];
  buildMinHeap(minHeap, k);
  for (i = k; i < size; i++) {
    if (a[i] > minHeap[0]) {
      minHeap[0] = a[i];
      minHeapify(minHeap, k, 0);
    }
  }
  return minHeap[0];
}

void find_k_max(std::vector<FT> a, int k, size_t indices[]) {
  FT kth = kthLargest(a, a.size(), k);
  int j = 0;
  for (size_t i = 0; i < a.size(); ++i)
    if (a[i] >= kth)
      indices[j++] = i;
}

int main() {
  std::vector<FT> v;
  int k = 5;
  for(int i = 0; i < 20; ++i)
    v.push_back(-i - 10);
  for(size_t i = 0; i < 20; ++i)
    std::cout<<v[i]<<std::endl;
  size_t max_ind[k];
  find_k_max(v, k, max_ind);
  for(int i = 0; i < k; ++i)
    std::cout << max_ind[i] << "\n";
  return 0;
}

Now the array version.

typedef float FT;
void swap(FT *a, FT *b) {
  *a = *a + *b;
  *b = *a - *b;
  *a = *a - *b;
}

void minHeapify(FT a[], int size, int i) {
  int l = 2* i ;
  int r = 2* i + 1;
  int smallest = i;
  if (l < size && a[l] < a[smallest])
    smallest = l;
  if (r < size && a[r] < a[smallest])
    smallest = r;
  if (smallest != i) {
    swap(&a[i], &a[smallest]);
    minHeapify(a, size, smallest);
  }

}

void buildMinHeap(FT a[], int size) {
  int i;
  for (i = size / 2; i >= 0; i--)
    minHeapify(a, size, i);

}

int kthLargest(FT a[], int size, int k) {
  FT minHeap[k];
  int i;
  for (i = 0; i < k; i++)
    minHeap[i] = a[i];
  buildMinHeap(minHeap, k);
  for (i = k; i < size; i++) {
    if (a[i] > minHeap[0]) {
      minHeap[0] = a[i];
      minHeapify(minHeap, k, 0);
    }
  }
  return minHeap[0];
}

void find_k_max(FT a[], int size, int k, int indices[]) {
  FT kth = kthLargest(a, size, k);
  int j = 0, i = 0;
  for (; i < size; ++i)
    if (a[i] >= kth)
      indices[j++] = i;
}

The algorithm (which uses a min heap) goes like this:

  1. Create a min heap of the k first elements in the array/vector.
    This costs O(k).
  2. For each element (not including in the first k of step 1), compare it with the root of min heap.
    If the element is greater than the root then make it root and call heapify for min heap.
    else ignore it.
    This costs O( (n-k) * logk ).
  3. Finally, min heap has k largest elements and root of the min heap is the kth largest element.

Time complexity: O(k + (n-k)Logk) without sorted output. If sorted output is needed then O(k + (n-k)Logk + kLogk).

The code above could be improved. We want the indices of the k max elements of the array/vector and in order to achieve that we find the k-th max element and then compare it with brute force to find all the k max elements.

We can obtain the indices in the same pass as we build the heap. That way we do not need to use brute force.

This approach is faster than the above, when N is big.

#include <stdio.h>
#include <iostream>
#include <vector>
#include <queue>

using namespace std;

typedef int FT;
void swap(Point::FT &a, Point::FT &b) {
  a = a + b;
  b = a - b;
  a = a - b;
}

// to swap the indices
void swap(int& a, int& b) {
	a = a + b;
	b = a - b;
	a = a - b;
}

void minHeapify(FT a[], int size, int i, size_t indices[]) {
  int l = 2* i ;
  int r = 2* i + 1;
  int smallest = i;
  if (l < size && a[l] < a[smallest])
    smallest = l;
  if (r < size && a[r] < a[smallest])
    smallest = r;
  if (smallest != i) {
    swap(a[i], a[smallest]);
    swap(indices[i], indices[smallest]);
    minHeapify(a, size, smallest, indices);
  }

}

void buildMinHeap(FT a[], int size, size_t indices[]) {
  int i;
  for (i = size / 2; i >= 0; i--)
    minHeapify(a, size, i, indices);
}

void kthLargest(FT a[], int size, int k, size_t indices[]) {
  FT minHeap[k];
  int i;
  for (i = 0; i < k; i++) {
    minHeap[i] = a[i];
    indices[i] = i;
  }
  buildMinHeap(minHeap, k, indices);
  for (i = k; i < size; i++) {
    if (a[i] > minHeap[0]) {
      minHeap[0] = a[i];
      indices[0] = i;
      minHeapify(minHeap, k, 0, indices);
    }
  }
}

int main() {
	int a[] = { 916, 17, 2666, 4, 12, 9, 5, 100 };
	int size = sizeof(a) / sizeof(a[0]);
	int k = 5;
	//printf("\n%d ",kthLargest(a,size,k));
	int ind[k];
	int i;
	kthLargest(a, size, k, ind);
	for (i = 0; i < k; ++i)
		printf("%d ", ind[i]);
	std::cout << "\n";

	// priority queue to test our results
	std::vector<int> test = { 916, 17, 2666, 4, 12, 9, 5, 100 };
	std::priority_queue<std::pair<int, int> > q;
	int min = test[0]; // this should be a limit
	for(int i = 0; i < k; ++i) {
		q.push(std::pair<int, int>(test[i], i));
		if(test[i] < min)
			min = test[i];
	}
	for (int i =k; i < (int) test.size(); ++i) {
		if(min <= test[i]) {
			// if you don't add things that are smaller than the smallest item
			//already on the queue.
			q.push(std::pair<int, int>(test[i], i));
		}
	}
	for (int i = 0; i < k; ++i) {
		int ki = q.top().second;
		std::cout << "index[" << i << "] = " << ki << std::endl;
		q.pop();
	}

	return 0;
}

This code can be improved too, by using only the indices array, which will index the actual elements of the array we are looking into. However this will alter the array we are looking into.

Also, the initialization of the k first elements could be skipped, if pointers/iterators were used.

This code was partially developed by me, Georgios Samaras and resources from the internet. The idea is to find the k-th largest element using a min-heap and then iterate over the array and collect all the elements that are greater or equal to this element. That way, we obtain the k maximum elements in the array. The code is written so that C and C++ can handle it.

Have questions about this code? Comments? Did you find a bug? Let me know!😀
Page created by G. (George) Samaras (DIT)

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s