algo-ds-01
1.000
Challenge · difficulty 5/5
# Range-assign array with sum and maximum-subarray queries
Implement a file **`solution.py`** containing a class `RangeArray` that maintains an
array of integers under **range-assignment** updates while answering two kinds of
range queries efficiently: the **sum** of a range, and the **maximum-subarray sum**
within a range.
```python
class RangeArray:
def __init__(self, data):
"""Build the structure from an iterable of ints. `len(data) >= 1`."""
def assign(self, l, r, v):
"""Set a[i] = v for every index i with l <= i < r."""
def sum(self, l, r):
"""Return the sum of a[l:r]."""
def max_subarray(self, l, r):
"""Return the maximum sum over all NON-EMPTY contiguous subarrays that lie
entirely within a[l:r]."""
```
## Indexing and ranges
- Indices are **0-based**.
- Every range `[l, r)` is **half-open**: it covers indices `l, l+1, ..., r-1`.
- All three methods require a **valid, non-empty** range: `0 <= l < r <= n`, where
`n` is the length of the array. If the range is invalid (out of bounds, or `l >= r`),
the method must raise **`IndexError`**.
- Constructing a `RangeArray` from an **empty** iterable must raise **`ValueError`**.
## Semantics
- **`assign(l, r, v)`** overwrites every element in `[l, r)` with the integer `v`.
Elements outside the range are untouched. Values (both stored and assigned) may be
**negative, zero, or positive**, and may be large.
- **`sum(l, r)`** returns `a[l] + a[l+1] + ... + a[r-1]` reflecting **all** updates
applied so far.
- **`max_subarray(l, r)`** returns the largest possible value of
`a[i] + a[i+1] + ... + a[j]` over all `l <= i <= j < r`. The subarray must be
**non-empty**, so it always contains at least one element. Consequently, when every
element in the range is negative the answer is the single **largest** (least
negative) element — the empty subarray is **not** allowed.
The number of operations can be large, so both queries and updates must be
**sub-linear per call** in the array length (a lazy segment tree is the intended
approach). A solution that scans the affected range on every operation will be too
slow on the larger tests.
## Worked example
```python
ra = RangeArray([2, -3, 4, -1, 2, 1, -5, 4])
assert ra.max_subarray(0, 8) == 6 # [4, -1, 2, 1]
assert ra.sum(0, 8) == 4
assert ra.max_subarray(2, 6) == 6 # [4, -1, 2, 1] within a[2:6]
ra.assign(2, 4, -100) # a = [2, -3, -100, -100, 2, 1, -5, 4]
assert ra.sum(0, 8) == -199
assert ra.max_subarray(0, 8) == 4 # the trailing single 4
assert ra.max_subarray(4, 6) == 3 # [2, 1]
ra.assign(0, 8, 5) # all fives
assert ra.max_subarray(0, 8) == 40
assert ra.sum(3, 7) == 20
neg = RangeArray([-4, -2, -9, -1, -6])
assert neg.max_subarray(0, 5) == -1 # best non-empty subarray is a single element
```
tests/test_range_array.py
import random
import time
import pytest
from solution import RangeArray
# --------------------------------------------------------------------------
# Brute-force oracle over a plain Python list.
# --------------------------------------------------------------------------
class Brute:
def __init__(self, data):
self.a = list(data)
def assign(self, l, r, v):
for i in range(l, r):
self.a[i] = v
def sum(self, l, r):
return sum(self.a[l:r])
def max_subarray(self, l, r):
return _kadane(self.a[l:r])
def _kadane(vals):
best = vals[0]
cur = vals[0]
for x in vals[1:]:
cur = max(x, cur + x)
best = max(best, cur)
return best
# --------------------------------------------------------------------------
# Basic / worked-example behaviour.
# --------------------------------------------------------------------------
def test_worked_example_sum_and_max_subarray():
ra = RangeArray([2, -3, 4, -1, 2, 1, -5, 4])
# Whole array max subarray is [4, -1, 2, 1] = 6.
assert ra.max_subarray(0, 8) == 6
assert ra.sum(0, 8) == 4
# Sub-range [2, 6) = [4, -1, 2, 1] -> best 6, sum 6.
assert ra.max_subarray(2, 6) == 6
assert ra.sum(2, 6) == 6
def test_single_element_ranges():
ra = RangeArray([5, -7, 3])
assert ra.max_subarray(0, 1) == 5
assert ra.max_subarray(1, 2) == -7 # forced to take the single element
assert ra.max_subarray(2, 3) == 3
assert ra.sum(1, 2) == -7
def test_all_negative_forces_single_best():
ra = RangeArray([-4, -2, -9, -1, -6])
# Best non-empty subarray is the single largest element (-1).
assert ra.max_subarray(0, 5) == -1
assert ra.max_subarray(0, 3) == -2
assert ra.sum(0, 5) == -22
def test_assign_updates_both_queries():
ra = RangeArray([1, 1, 1, 1, 1])
assert ra.max_subarray(0, 5) == 5
ra.assign(1, 4, -3) # -> [1, -3, -3, -3, 1]
assert ra.sum(0, 5) == -7
assert ra.max_subarray(0, 5) == 1 # best is a single boundary 1
ra.assign(0, 5, 2) # -> all 2s
assert ra.max_subarray(0, 5) == 10
assert ra.sum(1, 3) == 4
def test_assign_positive_then_negative_block():
ra = RangeArray([0] * 6)
ra.assign(0, 6, 5) # all 5
assert ra.max_subarray(0, 6) == 30
ra.assign(2, 4, -100) # [5,5,-100,-100,5,5]
assert ra.max_subarray(0, 6) == 10
assert ra.max_subarray(0, 2) == 10
assert ra.max_subarray(4, 6) == 10
assert ra.max_subarray(2, 4) == -100
assert ra.sum(0, 6) == -180
def test_zero_assignment():
ra = RangeArray([-1, -1, -1])
ra.assign(0, 3, 0)
assert ra.max_subarray(0, 3) == 0
assert ra.sum(0, 3) == 0
def test_partial_query_crossing_lazy_boundaries():
ra = RangeArray(list(range(1, 17))) # 1..16
ra.assign(3, 12, -1) # zero out the middle with -1
# a = [1,2,3,-1,-1,-1,-1,-1,-1,-1,-1,-1,13,14,15,16]
assert ra.sum(0, 16) == 1 + 2 + 3 + (-1) * 9 + 13 + 14 + 15 + 16
assert ra.max_subarray(0, 16) == 13 + 14 + 15 + 16
# A query window that starts and ends inside assigned/unassigned regions.
# a[1:13] = [2,3,-1,-1,-1,-1,-1,-1,-1,-1,-1,13]; best is the single 13.
assert ra.max_subarray(1, 13) == 13
assert ra.max_subarray(2, 5) == 3 # [3,-1,-1] -> 3
def test_invalid_ranges_raise():
ra = RangeArray([1, 2, 3])
for bad in [(-1, 2), (0, 0), (2, 1), (0, 4), (1, 5)]:
with pytest.raises(IndexError):
ra.sum(*bad)
with pytest.raises(IndexError):
ra.max_subarray(*bad)
with pytest.raises(IndexError):
ra.assign(0, 0, 9)
with pytest.raises(IndexError):
ra.assign(1, 5, 9)
def test_empty_construction_rejected():
with pytest.raises(ValueError):
RangeArray([])
# --------------------------------------------------------------------------
# Randomized correctness against the brute-force oracle.
# --------------------------------------------------------------------------
@pytest.mark.parametrize("seed", [0, 1, 2, 3, 4])
def test_random_small_against_brute(seed):
rng = random.Random(seed)
n = rng.randint(1, 40)
data = [rng.randint(-9, 9) for _ in range(n)]
ra = RangeArray(data)
br = Brute(data)
for _ in range(400):
l = rng.randint(0, n - 1)
r = rng.randint(l + 1, n)
op = rng.random()
if op < 0.4:
v = rng.randint(-9, 9)
ra.assign(l, r, v)
br.assign(l, r, v)
elif op < 0.7:
assert ra.sum(l, r) == br.sum(l, r)
else:
assert ra.max_subarray(l, r) == br.max_subarray(l, r)
@pytest.mark.parametrize("seed", [10, 11])
def test_random_medium_against_brute(seed):
rng = random.Random(seed)
n = rng.randint(200, 500)
data = [rng.randint(-1000, 1000) for _ in range(n)]
ra = RangeArray(data)
br = Brute(data)
for _ in range(1500):
l = rng.randint(0, n - 1)
r = rng.randint(l + 1, n)
op = rng.random()
if op < 0.45:
v = rng.randint(-1000, 1000)
ra.assign(l, r, v)
br.assign(l, r, v)
elif op < 0.7:
assert ra.sum(l, r) == br.sum(l, r)
else:
assert ra.max_subarray(l, r) == br.max_subarray(l, r)
def test_full_range_max_matches_kadane_after_updates():
rng = random.Random(99)
n = 300
data = [rng.randint(-50, 50) for _ in range(n)]
ra = RangeArray(data)
mirror = list(data)
for _ in range(300):
l = rng.randint(0, n - 1)
r = rng.randint(l + 1, n)
v = rng.randint(-50, 50)
ra.assign(l, r, v)
mirror[l:r] = [v] * (r - l)
assert ra.max_subarray(0, n) == _kadane(mirror)
assert ra.sum(0, n) == sum(mirror)
# --------------------------------------------------------------------------
# Larger / performance-oriented input. A naive O(n) per operation solution
# would time out here; a proper lazy segment tree runs comfortably.
# Correctness is spot-checked on a C-level mirror.
# --------------------------------------------------------------------------
def test_large_performance_and_spot_correctness():
rng = random.Random(2024)
n = 20000
data = [rng.randint(-100, 100) for _ in range(n)]
ra = RangeArray(data)
mirror = list(data) # updated with C-level slice assignment (cheap)
ops = 40000
checks = 0
start = time.time()
for k in range(ops):
l = rng.randint(0, n - 1)
r = rng.randint(l + 1, n)
roll = rng.random()
if roll < 0.5:
v = rng.randint(-100, 100)
ra.assign(l, r, v)
mirror[l:r] = [v] * (r - l)
elif roll < 0.75:
got = ra.sum(l, r)
if k % 200 == 0: # spot check (O(n) each, sampled)
assert got == sum(mirror[l:r])
checks += 1
else:
got = ra.max_subarray(l, r)
if k % 200 == 0:
assert got == _kadane(mirror[l:r])
checks += 1
elapsed = time.time() - start
assert checks > 0
# Generous ceiling: the reference finishes well under this.
assert elapsed < 25.0
System prompt
You are an expert programmer. Solve the task exactly as specified. Output your solution as fenced code blocks using the required file name(s) and the exact function/type signatures requested. Prefer correctness; do not include prose outside code unless asked.
Proposed solution
Looking at this problem, I need to implement a data structure that supports:
1. Range assignment (set all elements in [l,r) to value v)
2. Range sum query
3. Range maximum subarray sum query
This requires a lazy segment tree that can handle range assignment updates and track the necessary information for maximum subarray sums.
For each segment, I need to store:
- Sum of the segment
- Maximum subarray sum in the segment
- Maximum prefix sum
- Maximum suffix sum
- Minimum prefix sum (for handling assignments properly)
- Minimum suffix sum
Actually, for range assignment updates, I'll need a lazy propagation mechanism that can handle setting a range to a constant value.
Let me implement a segment tree where each node stores:
- `sum`: total sum of the segment
- `max_sub`: maximum subarray sum in the segment
- `max_prefix`: maximum prefix sum (starting from left)
- `max_suffix`: maximum suffix sum (ending at right)
For range assignment, when we assign value `v` to a segment of length `len`, the values become:
- `sum = v * len`
- `max_sub = v * len` (since all elements are same, best subarray is entire segment)
- `max_prefix = v * len` (entire prefix)
- `max_suffix = v * len` (entire suffix)
Wait, that's not quite right. For a segment of length `len` with all values `v`:
- `sum = v * len`
- `max_sub = v * len` (if v >= 0) or `v` (if v < 0, best is single element)
- Actually, for uniform value `v`, the maximum subarray is the entire segment if v >= 0, otherwise just one element (the largest single element, which is v)
So for uniform value `v` in a segment of length `len`:
- `sum = v * len`
- `max_sub = max(v, v * len)` but actually it's `v * len` if v >= 0, else `v`
- More precisely: `max_sub = max(v, v * len)` is wrong. It should be: if v >= 0, then entire segment; if v < 0, then single element (v).
Actually, for all elements equal to v:
- If v >= 0: max_subarray = v * len
- If v < 0: max_subarray = v (single element)
But we can write this as: `max_sub = max(v, v * len)` only works when len >= 1. Let me think again.
For a segment of length n with all values v:
- The maximum subarray sum is max(v, 2*v, 3*v, ..., n*v)
- If v >= 0: this is n*v
- If v < 0: this is v (the first element)
So `max_sub = v * len if v >= 0 else v`
But actually, we can express it as: `max_sub = max(v * i for i in range(1, len+1))`
Which equals: `v * len if v >= 0 else v`
Let me implement a segment tree with lazy propagation for range assignment.
```python
class RangeArray:
def __init__(self, data):
if not data:
raise ValueError("Cannot create RangeArray from empty iterable")
self.n = len(data)
self.data = list(data)
# Build segment tree
self.size = 1
while self.size < self.n:
self.size *= 2
# Tree arrays: sum, max_sub, max_prefix, max_suffix
self.tree_sum = [0] * (2 * self.size)
self.tree_max_sub = [0] * (2 * self.size)
self.tree_max_prefix = [0] * (2 * self.size)
self.tree_max_suffix = [0] * (2 * self.size)
# Lazy propagation: None means no pending assignment, otherwise the value to assign
self.lazy = [None] * (2 * self.size)
# Build leaves
for i in range(self.n):
val = self.data[i]
idx = self.size + i
self.tree_sum[idx] = val
self.tree_max_sub[idx] = val
self.tree_max_prefix[idx] = val
self.tree_max_suffix[idx] = val
# Fill remaining leaves with 0 (but they won't be used)
for i in range(self.n, self.size):
idx = self.size + i
self.tree_sum[idx] = 0
self.tree_max_sub[idx] = float('-inf') # Invalid for max subarray
self.tree_max_prefix[idx] = float('-inf')
self.tree_max_suffix[idx] = float('-inf')
# Build internal nodes
for i in range(self.size - 1, 0, -1):
self._merge(i)
def _merge(self, idx):
left = 2 * idx
right = 2 * idx + 1
# Sum is straightforward
self.tree_sum[idx] = self.tree_sum[left] + self.tree_sum[right]
# For max_subarray, we consider three cases:
# 1. Best subarray entirely in left child
# 2. Best subarray entirely in right child
# 3. Best subarray crossing the boundary (suffix of left + prefix of right)
cross = self.tree_max_suffix[left] + self.tree_max_prefix[right]
self.tree_max_sub[idx] = max(self.tree_max_sub[left], self.tree_max_sub[right], cross)
# Max prefix: either entire left child + prefix of right, or just prefix of left
self.tree_max_prefix[idx] = max(self.tree_max_prefix[left],
self.tree_sum[left] + self.tree_max_prefix[right])
# Max suffix: either entire right child + suffix of left, or just suffix of right
self.tree_max_suffix[idx] = max(self.tree_max_suffix[right],
self.tree_sum[right] + self.tree_max_suffix[left])
def _apply_assignment(self, idx, l, r, val):
"""Apply assignment to node covering [l, r) range in the original array."""
length = r - l
# For a segment of uniform value val
self.tree_sum[idx] = val * length
self.tree_max_sub[idx] = val * length if val >= 0 else val
self.tree_max_prefix[idx] = val * length
self.tree_max_suffix[idx] = val * length
def _push(self, idx, l, r):
"""Push lazy assignment to children."""
if self.lazy[idx] is not None:
val = self.lazy[idx]
mid = (l + r) // 2
# Push to left child [l, mid)
self.lazy[2 * idx] = val
self._apply_assignment(2 * idx, l, mid, val)
# Push to right child [mid, r)
self.lazy[2 * idx + 1] = val
self._apply_assignment(2 * idx + 1, mid, r, val)
self.lazy[idx] = None
def _update_range(self, l, r, val, idx=1, seg_l=0, seg_r=None):
if seg_r is None:
seg_r = self.size
# Out of range
if r <= seg_l or l >= seg_r:
return
# Current segment completely inside [l, r)
if l <= seg_l and seg_r <= r:
self.lazy[idx] = val
self._apply_assignment(idx, seg_l, seg_r, val)
return
# Partial overlap - push down first
self._push(idx, seg_l, seg_r)
mid = (seg_l + seg_r) // 2
self._update_range(l, r, val, 2 * idx, seg_l, mid)
self._update_range(l, r, val, 2 * idx + 1, mid, seg_r)
# Update current node from children
self._merge(idx)
def _query_sum(self, l, r, idx=1, seg_l=0, seg_r=None):
if seg_r is None:
seg_r = self.size
if r <= seg_l or l >= seg_r:
return 0
if l <= seg_l and seg_r <= r:
return self.tree_sum[idx]
self._push(idx, seg_l, seg_r)
mid = (seg_l + seg_r) // 2
left_sum = self._query_sum(l, r, 2 * idx, seg_l, mid)
right_sum = self._query_sum(l, r, 2 * idx + 1, mid, seg_r)
return left_sum + right_sum
def _query_max_sub(self, l, r, idx=1, seg_l=0, seg_r=None):
if seg_r is None:
seg_r = self.size
if r <= seg_l or l >= seg_r:
# Return invalid values for out of range
return (float('-inf'), float('-inf'), float('-inf'), 0)
# (max_sub, max_prefix, max_suffix, sum)
if l <= seg_l and seg_r <= r:
return (self.tree_max_sub[idx], self.tree_max_prefix[idx],
self.tree_max_suffix[idx], self.tree_sum[idx])
self._push(idx, seg_l, seg_r)
mid = (seg_l + seg_r) // 2
left_res = self._query_max_sub(l, r, 2 * idx, seg_l, mid)
right_res = self._query_max_sub(l, r, 2 * idx + 1, mid, seg_r)
# If one side is invalid, return the other
if left_res[0] == float('-inf'):
return right_res
if right_res[0] == float('-inf'):
return left_res
left_max_sub, left_max_prefix, left_max_suffix, left_sum = left_res
right_max_sub, right_max_prefix, right_max_suffix, right_sum = right_res
# Merge the two segments
cross = left_max_suffix + right_max_prefix
max_sub = max(left_max_sub, right_max_sub, cross)
max_prefix = max(left_max_prefix, left_sum + right_max_prefix)
max_suffix = max(right_max_suffix, right_sum + left_max_suffix)
total_sum = left_sum + right_sum
return (max_sub, max_prefix, max_suffix, total_sum)
def assign(self, l, r, v):
if not (0 <= l < r <= self.n):
raise IndexError("Invalid range")
self._update_range(l, r, v)
def sum(self, l, r):
if not (0 <= l < r <= self.n):
raise IndexError("Invalid range")
return self._query_sum(l, r)
def max_subarray(self, l, r):
if not (0 <= l < r <= self.n):
raise IndexError("Invalid range")
result = self._query_max_sub(l, r)
return result[0]
```Test output (stdout)
.................. [100%] 18 passed in 0.95s