Finding median of a stream

Jyothi
3 min readFeb 11, 2023

--

Basic idea is to use two heaps here. min-heap and max-heap. First half of elements are maintained using max-heap and next half of elements are maintained using min-heap. Median is found using the root elements of max heap and min heap.

I have written the below python code. I didnt use any built-in class here:


import math

# insert an element at end do min heapify
class median_info:
def __init__(self):
self.min_heap = []
self.max_heap = []

def bot_heapify_max_heap(self, ind):
#here the index will be pointing to the end.
# calculate the parent index and heapifyin bottom up approach
#if ind is 0 , its a base case, return
if (ind == 0):
return
pind = int (ind/2)
#if ind is even , decrement pind to get the right parent index
if (ind %2 ==0):
pind -=1
right = ind
left = ind-1
else:
right = ind+1
left = ind
ii = pind
if (left < len(self.max_heap) and self.max_heap[ii] < self.max_heap[left]) :
ii = left
if (right < len(self.max_heap) and self.max_heap[ii] < self.max_heap[right]) :
ii = right
if (ii != pind):
self.max_heap[ii], self.max_heap[pind] = self.max_heap[pind], self.max_heap[ii]
self.bot_heapify_max_heap(pind)

def heapify_max_heap(self, ind):
ii = ind
left= 2*ind +1
right = 2 *ind +2
if (left < len(self.max_heap) and self.max_heap[ii] < self.max_heap[left]) :
ii = left
if (right < len(self.max_heap) and self.max_heap[ii] < self.max_heap[right]) :
ii = right
if (ii != ind):
self.max_heap[ii], self.max_heap[ind] = self.max_heap[ind], self.max_heap[ii]
self.heapify_max_heap(ii)

def bot_heapify_min_heap(self, ind):
#here the index will be pointing to the end.
# calculate the parent index and heapify in bottom up approach
#if ind is 0 , its a base case, return
if (ind == 0):
return
pind = int (ind/2)
#if ind is even , decrement pind to get the right parent index
if (ind %2 ==0):
pind -=1
right = ind
left = ind-1
else:
right = ind+1
left = ind
ii = pind
if (left < len(self.min_heap) and self.min_heap[ii] > self.min_heap[left]) :
ii = left
if (right < len(self.max_heap) and self.min_heap[ii] > self.min_heap[right]) :
ii = right
if (ii != pind):
self.min_heap[ii], self.min_heap[pind] = self.min_heap[pind], self.min_heap[ii]
self.bot_heapify_min_heap(pind)

def heapify_min_heap(self, ind):
ii = ind
left= 2*ind +1
right = 2 *ind +2
if (left < len(self.min_heap) and self.min_heap[ii] > self.min_heap[left]) :
ii = left
if (right < len(self.max_heap) and self.min_heap[ii] > self.min_heap[right]) :
ii = right
if (ii != ind):
self.min_heap[ii], self.min_heap[ind] = self.min_heap[ind], self.min_heap[ii]
self.heapify_min_heap(ii)

def insert_to_maxheap(self, ele):
self.max_heap.append(ele)
self.bot_heapify_max_heap(len(self.max_heap)-1)

def insert_to_minheap(self, ele):
self.min_heap.append(ele)
self.bot_heapify_min_heap(len(self.min_heap)-1)

def insert(self, ele):
if len(self.max_heap) == 0 or ele < self.max_heap[0] :
self.insert_to_maxheap(ele)
else:
self.insert_to_minheap(ele)
#re balance
"""
print("before rebalance\n")
print(self.max_heap)
print(self.min_heap)
"""
self.rebalance_heaps()
"""
print("after rebalance\n")
print(self.max_heap)
print(self.min_heap)
"""

def median(self):
if len(self.max_heap) == len(self.min_heap) + 1:
return (self.max_heap[0])
if len(self.max_heap) == len(self.min_heap) :
return math.floor((self.max_heap[0] + self.min_heap[0])/2)

def rebalance_heaps(self):
if (len(self.max_heap) > len(self.min_heap) +1):
ele = self.max_heap[0]
self.max_heap[0] = self.max_heap.pop()
self.heapify_max_heap(0)
self.insert_to_minheap(ele)
elif (len(self. min_heap) > len(self.max_heap)):
ele = self.min_heap[0]
self.min_heap[0] =self.min_heap.pop()
self.heapify_min_heap(0)
self.insert_to_maxheap(ele)


def online_median(stream):
"""
Args:
stream(list_int32)
Returns:
list_int32


logic for this solution is to maintain min heap and max heap.
This method is faster than only maintaining min heap. While maintaining min heap we should extract n/2 elements to get the median
and again to put back those n/2 elements
This is overhead.
While maintaining min heap and max heap where first half of elements maintained in max heap, and next half of elements maintained in min heap.
median is :
if min heap size not same as max heap size
median = root of max heap
else
median = (root of min heap + root of max heap)/2
This computation is O(1)

framing min heap and max heaps:
if max heap is empty or the element value is less than max heap
insert the element into max heap
else
insert the element into min heap

balancing of max heap and min heap is a must to keep the sizes inline.
if max heap size is greater than min-heap size +1
remove the root of max heap and insert in min heap
else if min heap size is greater than max heap size
remove the root of min heap and insert in max heap
"""
# Write your code here.
mc = median_info()
le = len(stream)
res=[]
for ii in stream:
mc.insert(ii)
res.append(mc.median())
return res

Please give comments if it is correct or any issues with the code.

--

--