algo-ds-01

1.000

18/18 tests· algorithms

Challenge · difficulty 5/5

# Range-assign array with sum and maximum-subarray queries

Implement a file **`solution.py`** containing a class `RangeArray` that maintains an
array of integers under **range-assignment** updates while answering two kinds of
range queries efficiently: the **sum** of a range, and the **maximum-subarray sum**
within a range.

```python
class RangeArray:
    def __init__(self, data):
        """Build the structure from an iterable of ints. `len(data) >= 1`."""

    def assign(self, l, r, v):
        """Set a[i] = v for every index i with l <= i < r."""

    def sum(self, l, r):
        """Return the sum of a[l:r]."""

    def max_subarray(self, l, r):
        """Return the maximum sum over all NON-EMPTY contiguous subarrays that lie
        entirely within a[l:r]."""
```

## Indexing and ranges

- Indices are **0-based**.
- Every range `[l, r)` is **half-open**: it covers indices `l, l+1, ..., r-1`.
- All three methods require a **valid, non-empty** range: `0 <= l < r <= n`, where
  `n` is the length of the array. If the range is invalid (out of bounds, or `l >= r`),
  the method must raise **`IndexError`**.
- Constructing a `RangeArray` from an **empty** iterable must raise **`ValueError`**.

## Semantics

- **`assign(l, r, v)`** overwrites every element in `[l, r)` with the integer `v`.
  Elements outside the range are untouched. Values (both stored and assigned) may be
  **negative, zero, or positive**, and may be large.
- **`sum(l, r)`** returns `a[l] + a[l+1] + ... + a[r-1]` reflecting **all** updates
  applied so far.
- **`max_subarray(l, r)`** returns the largest possible value of
  `a[i] + a[i+1] + ... + a[j]` over all `l <= i <= j < r`. The subarray must be
  **non-empty**, so it always contains at least one element. Consequently, when every
  element in the range is negative the answer is the single **largest** (least
  negative) element — the empty subarray is **not** allowed.

The number of operations can be large, so both queries and updates must be
**sub-linear per call** in the array length (a lazy segment tree is the intended
approach). A solution that scans the affected range on every operation will be too
slow on the larger tests.

## Worked example

```python
ra = RangeArray([2, -3, 4, -1, 2, 1, -5, 4])
assert ra.max_subarray(0, 8) == 6   # [4, -1, 2, 1]
assert ra.sum(0, 8) == 4
assert ra.max_subarray(2, 6) == 6   # [4, -1, 2, 1] within a[2:6]

ra.assign(2, 4, -100)               # a = [2, -3, -100, -100, 2, 1, -5, 4]
assert ra.sum(0, 8) == -199
assert ra.max_subarray(0, 8) == 4   # the trailing single 4
assert ra.max_subarray(4, 6) == 3   # [2, 1]

ra.assign(0, 8, 5)                  # all fives
assert ra.max_subarray(0, 8) == 40
assert ra.sum(3, 7) == 20

neg = RangeArray([-4, -2, -9, -1, -6])
assert neg.max_subarray(0, 5) == -1  # best non-empty subarray is a single element
```

tests/test_range_array.py

import random
import time

import pytest

from solution import RangeArray


# --------------------------------------------------------------------------
# Brute-force oracle over a plain Python list.
# --------------------------------------------------------------------------

class Brute:
    def __init__(self, data):
        self.a = list(data)

    def assign(self, l, r, v):
        for i in range(l, r):
            self.a[i] = v

    def sum(self, l, r):
        return sum(self.a[l:r])

    def max_subarray(self, l, r):
        return _kadane(self.a[l:r])


def _kadane(vals):
    best = vals[0]
    cur = vals[0]
    for x in vals[1:]:
        cur = max(x, cur + x)
        best = max(best, cur)
    return best


# --------------------------------------------------------------------------
# Basic / worked-example behaviour.
# --------------------------------------------------------------------------

def test_worked_example_sum_and_max_subarray():
    ra = RangeArray([2, -3, 4, -1, 2, 1, -5, 4])
    # Whole array max subarray is [4, -1, 2, 1] = 6.
    assert ra.max_subarray(0, 8) == 6
    assert ra.sum(0, 8) == 4
    # Sub-range [2, 6) = [4, -1, 2, 1] -> best 6, sum 6.
    assert ra.max_subarray(2, 6) == 6
    assert ra.sum(2, 6) == 6


def test_single_element_ranges():
    ra = RangeArray([5, -7, 3])
    assert ra.max_subarray(0, 1) == 5
    assert ra.max_subarray(1, 2) == -7   # forced to take the single element
    assert ra.max_subarray(2, 3) == 3
    assert ra.sum(1, 2) == -7


def test_all_negative_forces_single_best():
    ra = RangeArray([-4, -2, -9, -1, -6])
    # Best non-empty subarray is the single largest element (-1).
    assert ra.max_subarray(0, 5) == -1
    assert ra.max_subarray(0, 3) == -2
    assert ra.sum(0, 5) == -22


def test_assign_updates_both_queries():
    ra = RangeArray([1, 1, 1, 1, 1])
    assert ra.max_subarray(0, 5) == 5
    ra.assign(1, 4, -3)          # -> [1, -3, -3, -3, 1]
    assert ra.sum(0, 5) == -7
    assert ra.max_subarray(0, 5) == 1     # best is a single boundary 1
    ra.assign(0, 5, 2)           # -> all 2s
    assert ra.max_subarray(0, 5) == 10
    assert ra.sum(1, 3) == 4


def test_assign_positive_then_negative_block():
    ra = RangeArray([0] * 6)
    ra.assign(0, 6, 5)           # all 5
    assert ra.max_subarray(0, 6) == 30
    ra.assign(2, 4, -100)        # [5,5,-100,-100,5,5]
    assert ra.max_subarray(0, 6) == 10
    assert ra.max_subarray(0, 2) == 10
    assert ra.max_subarray(4, 6) == 10
    assert ra.max_subarray(2, 4) == -100
    assert ra.sum(0, 6) == -180


def test_zero_assignment():
    ra = RangeArray([-1, -1, -1])
    ra.assign(0, 3, 0)
    assert ra.max_subarray(0, 3) == 0
    assert ra.sum(0, 3) == 0


def test_partial_query_crossing_lazy_boundaries():
    ra = RangeArray(list(range(1, 17)))   # 1..16
    ra.assign(3, 12, -1)   # zero out the middle with -1
    # a = [1,2,3,-1,-1,-1,-1,-1,-1,-1,-1,-1,13,14,15,16]
    assert ra.sum(0, 16) == 1 + 2 + 3 + (-1) * 9 + 13 + 14 + 15 + 16
    assert ra.max_subarray(0, 16) == 13 + 14 + 15 + 16
    # A query window that starts and ends inside assigned/unassigned regions.
    # a[1:13] = [2,3,-1,-1,-1,-1,-1,-1,-1,-1,-1,13]; best is the single 13.
    assert ra.max_subarray(1, 13) == 13
    assert ra.max_subarray(2, 5) == 3           # [3,-1,-1] -> 3


def test_invalid_ranges_raise():
    ra = RangeArray([1, 2, 3])
    for bad in [(-1, 2), (0, 0), (2, 1), (0, 4), (1, 5)]:
        with pytest.raises(IndexError):
            ra.sum(*bad)
        with pytest.raises(IndexError):
            ra.max_subarray(*bad)
    with pytest.raises(IndexError):
        ra.assign(0, 0, 9)
    with pytest.raises(IndexError):
        ra.assign(1, 5, 9)


def test_empty_construction_rejected():
    with pytest.raises(ValueError):
        RangeArray([])


# --------------------------------------------------------------------------
# Randomized correctness against the brute-force oracle.
# --------------------------------------------------------------------------

@pytest.mark.parametrize("seed", [0, 1, 2, 3, 4])
def test_random_small_against_brute(seed):
    rng = random.Random(seed)
    n = rng.randint(1, 40)
    data = [rng.randint(-9, 9) for _ in range(n)]
    ra = RangeArray(data)
    br = Brute(data)
    for _ in range(400):
        l = rng.randint(0, n - 1)
        r = rng.randint(l + 1, n)
        op = rng.random()
        if op < 0.4:
            v = rng.randint(-9, 9)
            ra.assign(l, r, v)
            br.assign(l, r, v)
        elif op < 0.7:
            assert ra.sum(l, r) == br.sum(l, r)
        else:
            assert ra.max_subarray(l, r) == br.max_subarray(l, r)


@pytest.mark.parametrize("seed", [10, 11])
def test_random_medium_against_brute(seed):
    rng = random.Random(seed)
    n = rng.randint(200, 500)
    data = [rng.randint(-1000, 1000) for _ in range(n)]
    ra = RangeArray(data)
    br = Brute(data)
    for _ in range(1500):
        l = rng.randint(0, n - 1)
        r = rng.randint(l + 1, n)
        op = rng.random()
        if op < 0.45:
            v = rng.randint(-1000, 1000)
            ra.assign(l, r, v)
            br.assign(l, r, v)
        elif op < 0.7:
            assert ra.sum(l, r) == br.sum(l, r)
        else:
            assert ra.max_subarray(l, r) == br.max_subarray(l, r)


def test_full_range_max_matches_kadane_after_updates():
    rng = random.Random(99)
    n = 300
    data = [rng.randint(-50, 50) for _ in range(n)]
    ra = RangeArray(data)
    mirror = list(data)
    for _ in range(300):
        l = rng.randint(0, n - 1)
        r = rng.randint(l + 1, n)
        v = rng.randint(-50, 50)
        ra.assign(l, r, v)
        mirror[l:r] = [v] * (r - l)
        assert ra.max_subarray(0, n) == _kadane(mirror)
        assert ra.sum(0, n) == sum(mirror)


# --------------------------------------------------------------------------
# Larger / performance-oriented input. A naive O(n) per operation solution
# would time out here; a proper lazy segment tree runs comfortably.
# Correctness is spot-checked on a C-level mirror.
# --------------------------------------------------------------------------

def test_large_performance_and_spot_correctness():
    rng = random.Random(2024)
    n = 20000
    data = [rng.randint(-100, 100) for _ in range(n)]
    ra = RangeArray(data)
    mirror = list(data)   # updated with C-level slice assignment (cheap)

    ops = 40000
    checks = 0
    start = time.time()
    for k in range(ops):
        l = rng.randint(0, n - 1)
        r = rng.randint(l + 1, n)
        roll = rng.random()
        if roll < 0.5:
            v = rng.randint(-100, 100)
            ra.assign(l, r, v)
            mirror[l:r] = [v] * (r - l)
        elif roll < 0.75:
            got = ra.sum(l, r)
            if k % 200 == 0:          # spot check (O(n) each, sampled)
                assert got == sum(mirror[l:r])
                checks += 1
        else:
            got = ra.max_subarray(l, r)
            if k % 200 == 0:
                assert got == _kadane(mirror[l:r])
                checks += 1
    elapsed = time.time() - start
    assert checks > 0
    # Generous ceiling: the reference finishes well under this.
    assert elapsed < 25.0

System prompt

You are an expert programmer. Solve the task exactly as specified. Output your solution as fenced code blocks using the required file name(s) and the exact function/type signatures requested. Prefer correctness; do not include prose outside code unless asked.

Proposed solution

Looking at this problem, I need to implement a data structure that supports:
1. Range assignment (set all elements in [l,r) to value v)
2. Range sum query
3. Range maximum subarray sum query

This requires a lazy segment tree that can handle range assignment updates and track the necessary information for maximum subarray sums.

For each segment, I need to store:
- Sum of the segment
- Maximum subarray sum in the segment
- Maximum prefix sum
- Maximum suffix sum
- Minimum prefix sum (for handling assignments properly)
- Minimum suffix sum

Actually, for range assignment updates, I'll need a lazy propagation mechanism that can handle setting a range to a constant value.

Let me implement a segment tree where each node stores:
- `sum`: total sum of the segment
- `max_sub`: maximum subarray sum in the segment
- `max_prefix`: maximum prefix sum (starting from left)
- `max_suffix`: maximum suffix sum (ending at right)

For range assignment, when we assign value `v` to a segment of length `len`, the values become:
- `sum = v * len`
- `max_sub = v * len` (since all elements are same, best subarray is entire segment)
- `max_prefix = v * len` (entire prefix)
- `max_suffix = v * len` (entire suffix)

Wait, that's not quite right. For a segment of length `len` with all values `v`:
- `sum = v * len`
- `max_sub = v * len` (if v >= 0) or `v` (if v < 0, best is single element)
- Actually, for uniform value `v`, the maximum subarray is the entire segment if v >= 0, otherwise just one element (the largest single element, which is v)

So for uniform value `v` in a segment of length `len`:
- `sum = v * len`
- `max_sub = max(v, v * len)` but actually it's `v * len` if v >= 0, else `v`
- More precisely: `max_sub = max(v, v * len)` is wrong. It should be: if v >= 0, then entire segment; if v < 0, then single element (v).

Actually, for all elements equal to v:
- If v >= 0: max_subarray = v * len
- If v < 0: max_subarray = v (single element)

But we can write this as: `max_sub = max(v, v * len)` only works when len >= 1. Let me think again.

For a segment of length n with all values v:
- The maximum subarray sum is max(v, 2*v, 3*v, ..., n*v)
- If v >= 0: this is n*v
- If v < 0: this is v (the first element)

So `max_sub = v * len if v >= 0 else v`

But actually, we can express it as: `max_sub = max(v * i for i in range(1, len+1))`
Which equals: `v * len if v >= 0 else v`

Let me implement a segment tree with lazy propagation for range assignment.

```python
class RangeArray:
    def __init__(self, data):
        if not data:
            raise ValueError("Cannot create RangeArray from empty iterable")
        
        self.n = len(data)
        self.data = list(data)
        
        # Build segment tree
        self.size = 1
        while self.size < self.n:
            self.size *= 2
        
        # Tree arrays: sum, max_sub, max_prefix, max_suffix
        self.tree_sum = [0] * (2 * self.size)
        self.tree_max_sub = [0] * (2 * self.size)
        self.tree_max_prefix = [0] * (2 * self.size)
        self.tree_max_suffix = [0] * (2 * self.size)
        
        # Lazy propagation: None means no pending assignment, otherwise the value to assign
        self.lazy = [None] * (2 * self.size)
        
        # Build leaves
        for i in range(self.n):
            val = self.data[i]
            idx = self.size + i
            self.tree_sum[idx] = val
            self.tree_max_sub[idx] = val
            self.tree_max_prefix[idx] = val
            self.tree_max_suffix[idx] = val
        
        # Fill remaining leaves with 0 (but they won't be used)
        for i in range(self.n, self.size):
            idx = self.size + i
            self.tree_sum[idx] = 0
            self.tree_max_sub[idx] = float('-inf')  # Invalid for max subarray
            self.tree_max_prefix[idx] = float('-inf')
            self.tree_max_suffix[idx] = float('-inf')
        
        # Build internal nodes
        for i in range(self.size - 1, 0, -1):
            self._merge(i)
    
    def _merge(self, idx):
        left = 2 * idx
        right = 2 * idx + 1
        
        # Sum is straightforward
        self.tree_sum[idx] = self.tree_sum[left] + self.tree_sum[right]
        
        # For max_subarray, we consider three cases:
        # 1. Best subarray entirely in left child
        # 2. Best subarray entirely in right child  
        # 3. Best subarray crossing the boundary (suffix of left + prefix of right)
        cross = self.tree_max_suffix[left] + self.tree_max_prefix[right]
        self.tree_max_sub[idx] = max(self.tree_max_sub[left], self.tree_max_sub[right], cross)
        
        # Max prefix: either entire left child + prefix of right, or just prefix of left
        self.tree_max_prefix[idx] = max(self.tree_max_prefix[left], 
                                        self.tree_sum[left] + self.tree_max_prefix[right])
        
        # Max suffix: either entire right child + suffix of left, or just suffix of right
        self.tree_max_suffix[idx] = max(self.tree_max_suffix[right],
                                        self.tree_sum[right] + self.tree_max_suffix[left])
    
    def _apply_assignment(self, idx, l, r, val):
        """Apply assignment to node covering [l, r) range in the original array."""
        length = r - l
        
        # For a segment of uniform value val
        self.tree_sum[idx] = val * length
        self.tree_max_sub[idx] = val * length if val >= 0 else val
        self.tree_max_prefix[idx] = val * length
        self.tree_max_suffix[idx] = val * length
    
    def _push(self, idx, l, r):
        """Push lazy assignment to children."""
        if self.lazy[idx] is not None:
            val = self.lazy[idx]
            mid = (l + r) // 2
            
            # Push to left child [l, mid)
            self.lazy[2 * idx] = val
            self._apply_assignment(2 * idx, l, mid, val)
            
            # Push to right child [mid, r)
            self.lazy[2 * idx + 1] = val
            self._apply_assignment(2 * idx + 1, mid, r, val)
            
            self.lazy[idx] = None
    
    def _update_range(self, l, r, val, idx=1, seg_l=0, seg_r=None):
        if seg_r is None:
            seg_r = self.size
        
        # Out of range
        if r <= seg_l or l >= seg_r:
            return
        
        # Current segment completely inside [l, r)
        if l <= seg_l and seg_r <= r:
            self.lazy[idx] = val
            self._apply_assignment(idx, seg_l, seg_r, val)
            return
        
        # Partial overlap - push down first
        self._push(idx, seg_l, seg_r)
        
        mid = (seg_l + seg_r) // 2
        self._update_range(l, r, val, 2 * idx, seg_l, mid)
        self._update_range(l, r, val, 2 * idx + 1, mid, seg_r)
        
        # Update current node from children
        self._merge(idx)
    
    def _query_sum(self, l, r, idx=1, seg_l=0, seg_r=None):
        if seg_r is None:
            seg_r = self.size
        
        if r <= seg_l or l >= seg_r:
            return 0
        
        if l <= seg_l and seg_r <= r:
            return self.tree_sum[idx]
        
        self._push(idx, seg_l, seg_r)
        
        mid = (seg_l + seg_r) // 2
        left_sum = self._query_sum(l, r, 2 * idx, seg_l, mid)
        right_sum = self._query_sum(l, r, 2 * idx + 1, mid, seg_r)
        return left_sum + right_sum
    
    def _query_max_sub(self, l, r, idx=1, seg_l=0, seg_r=None):
        if seg_r is None:
            seg_r = self.size
        
        if r <= seg_l or l >= seg_r:
            # Return invalid values for out of range
            return (float('-inf'), float('-inf'), float('-inf'), 0)
            # (max_sub, max_prefix, max_suffix, sum)
        
        if l <= seg_l and seg_r <= r:
            return (self.tree_max_sub[idx], self.tree_max_prefix[idx], 
                   self.tree_max_suffix[idx], self.tree_sum[idx])
        
        self._push(idx, seg_l, seg_r)
        
        mid = (seg_l + seg_r) // 2
        left_res = self._query_max_sub(l, r, 2 * idx, seg_l, mid)
        right_res = self._query_max_sub(l, r, 2 * idx + 1, mid, seg_r)
        
        # If one side is invalid, return the other
        if left_res[0] == float('-inf'):
            return right_res
        if right_res[0] == float('-inf'):
            return left_res
        
        left_max_sub, left_max_prefix, left_max_suffix, left_sum = left_res
        right_max_sub, right_max_prefix, right_max_suffix, right_sum = right_res
        
        # Merge the two segments
        cross = left_max_suffix + right_max_prefix
        max_sub = max(left_max_sub, right_max_sub, cross)
        max_prefix = max(left_max_prefix, left_sum + right_max_prefix)
        max_suffix = max(right_max_suffix, right_sum + left_max_suffix)
        total_sum = left_sum + right_sum
        
        return (max_sub, max_prefix, max_suffix, total_sum)
    
    def assign(self, l, r, v):
        if not (0 <= l < r <= self.n):
            raise IndexError("Invalid range")
        self._update_range(l, r, v)
    
    def sum(self, l, r):
        if not (0 <= l < r <= self.n):
            raise IndexError("Invalid range")
        return self._query_sum(l, r)
    
    def max_subarray(self, l, r):
        if not (0 <= l < r <= self.n):
            raise IndexError("Invalid range")
        result = self._query_max_sub(l, r)
        return result[0]
```

Test output (stdout)

..................                                                       [100%]
18 passed in 0.95s