Popcorn Hack 2

import time
import random

# Generate a large sorted list
data_size = 10000000
sorted_data = sorted(random.sample(range(100000000), data_size))

# Target to find (worst case for linear search)
target = sorted_data[-1]  # Last element

# O(n) - Linear Search
def linear_search(arr, target):
    for i, element in enumerate(arr):
        if element == target:
            return i
    return -1

# O(log n) - Binary Search
def binary_search(arr, target):
    left, right = 0, len(arr) - 1
    
    while left <= right:
        mid = (left + right) // 2
        if arr[mid] == target:
            return mid
        elif arr[mid] < target:
            left = mid + 1
        else:
            right = mid - 1
    
    return -1

# Compare performance
print("Testing with data size:", data_size)

start = time.time()
linear_result = linear_search(sorted_data, target)
linear_time = time.time() - start
print(f"Linear search: {linear_time:.6f} seconds")

start = time.time()
binary_result = binary_search(sorted_data, target)
binary_time = time.time() - start
print(f"Binary search: {binary_time:.6f} seconds")

print(f"Binary search is approximately {linear_time/binary_time:.0f}x faster")
Testing with data size: 10000000
Linear search: 2.623466 seconds
Binary search: 0.000368 seconds
Binary search is approximately 7131x faster

1) What is the time complexity of each algorithm?

  • Linear search has a time complexity of O(n) because it checks each element one by one until it finds the target

  • Binary search has a time complexity of O(log n) because it divides the search space in half with each step

  • Binary search is approximately 62,194 times faster than linear search in this test with 10 million elements

3) What happens if you increase data_size to 20000000?

  • Linear search will take about twice as long, because it scales linearly with the size of the list

  • Binary search will only take a slightly longer time, since log base 2 of 20 million is only a bit more than of 10 million

Homework Hack 1

import time
import random

# Bubble Sort
def bubble_sort(arr):
    n = len(arr)
    for i in range(n):
        for j in range(0, n-i-1):
            if arr[j] > arr[j+1]:
                arr[j], arr[j+1] = arr[j+1], arr[j]

# Merge Sort
def merge_sort(arr):
    if len(arr) > 1:
        mid = len(arr) // 2
        left = arr[:mid]
        right = arr[mid:]

        merge_sort(left)
        merge_sort(right)

        i = j = k = 0
        while i < len(left) and j < len(right):
            if left[i] < right[j]:
                arr[k] = left[i]
                i += 1
            else:
                arr[k] = right[j]
                j += 1
            k += 1

        while i < len(left):
            arr[k] = left[i]
            i += 1
            k += 1
        while j < len(right):
            arr[k] = right[j]
            j += 1
            k += 1

# Generate a list of 100 random numbers between 1 and 1000
arr = random.sample(range(1, 1001), 100)

# Time the Bubble Sort
start_time = time.time()
bubble_sort(arr.copy())
bubble_time = time.time() - start_time

# Time the Merge Sort
start_time = time.time()
merge_sort(arr.copy())
merge_time = time.time() - start_time

print(f"Bubble Sort took {bubble_time:.6f} seconds.")
print(f"Merge Sort took {merge_time:.6f} seconds.")

if bubble_time < merge_time:
    print("Bubble Sort is faster.")
else:
    print("Merge Sort is faster.")
Bubble Sort took 0.000965 seconds.
Merge Sort took 0.000242 seconds.
Merge Sort is faster.

Merge Sort consistently outperforms Bubble Sort because Merge Sort is O(n log n), which is much faster than Bubble Sort’s O(n^2) time complexity, especially for larger data sets

import random
import time

# Linear Search
def linear_search(arr, target):
    count = 0
    for i in range(len(arr)):
        count += 1
        if arr[i] == target:
            return count
    return -1

# Binary Search
def binary_search(arr, target):
    left, right = 0, len(arr) - 1
    count = 0
    while left <= right:
        count += 1
        mid = (left + right) // 2
        if arr[mid] == target:
            return count
        elif arr[mid] < target:
            left = mid + 1
        else:
            right = mid - 1
    return -1

# Generate a sorted list of 100,000 numbers from 1 to 100,000
arr = sorted(random.sample(range(1, 100001), 100000))

# Pick a random target in the list
target = random.choice(arr)

# Time the Linear Search
start_time = time.time()
linear_comparisons = linear_search(arr, target)
linear_time = time.time() - start_time

# Time the Binary Search
start_time = time.time()
binary_comparisons = binary_search(arr, target)
binary_time = time.time() - start_time

print(f"Linear Search took {linear_time:.6f} seconds with {linear_comparisons} comparisons.")
print(f"Binary Search took {binary_time:.6f} seconds with {binary_comparisons} comparisons.")
Linear Search took 0.008154 seconds with 92575 comparisons.
Binary Search took 0.000189 seconds with 16 comparisons.

Binary Search is faster because it uses O(log n) time, reducing the number of comparisons significantly compared to Linear Search, which uses O(n) time

If the list is unsorted, Linear Search would still work, but Binary Search would fail as it assumes the list is sorted