In Python, it's known that the most efficient way to create a list with n repetitions of the same element (let's say the string 's') is by using list multiplication, as shown below:
lst = ['s'] * 1000
However, when the list is non-empty initially, what would be the most optimal method to append the same element n times?
Here are a couple of methods that come to mind:
Method1:
lst = [1,2,3]
for _ in range(1000):
lst.append('s')
Method2:
lst = [1,2,3]
lst.extend(['s'] * 1000)
# or
# lst.extend(['s' for _ in range(1000)])
But it's worth noting that Method 2 does create a temporary long list, e.g. ['s' for _ in range(1000)].
Are there any alternative approaches that are more efficient, both in terms of time complexity and space usage? Or among the existing methods, which one is deemed the most efficient?
I propose:
def addition(N):
lst = [1, 2, 3]
return lst+['s']*N
I profiled the approaches:
import itertools
from performance_measurement import run_performance_comparison
def method1(N):
lst = [1, 2, 3]
for _ in range(N):
lst.append("s")
return lst
def method2(N):
lst = [1, 2, 3]
lst.extend(["s"] * N)
return lst
def generator_approach(N):
#@user2390182
lst = [1, 2, 3]
lst.extend("s" for _ in range(N))
return lst
def addition(N):
lst = [1, 2, 3]
return lst + ["s"] * N
def itertools_approach(N):
#@juanpa.arrivillaga
lst = [1, 2, 3]
lst.extend(itertools.repeat("s", N))
return lst
approaches = [method1, method2, generator_approach, addition, itertools_approach]
for approach in approaches[1:]:
data = [100]
assert approach(*data) == approaches[0](*data)
run_performance_comparison(
approaches,
[
1_000,
3_000,
5_000,
10_000,
30_000,
100_000,
300_000,
500_000,
1_000_000,
],
title="Performance Comparison",
number_of_repetitions=10,
)

It seems to me ['s']*N is very performant, but itertools beats it.
Profiling code:
import timeit
from functools import partial
import matplotlib.pyplot as plt
from typing import List, Dict, Callable
from contextlib import contextmanager
@contextmanager
def data_provider(data_size, setup=lambda N: N, teardown=lambda: None):
data = setup(data_size)
yield data
teardown()
def run_performance_comparison(approaches: List[Callable],
data_size: List[int],
setup=lambda N: [N],
teardown=lambda: None,
number_of_repetitions=5, title='Performance Comparison', data_name='N'):
approach_times: Dict[Callable, List[float]] = {approach: [] for approach in approaches}
for N in data_size:
with data_provider(N, setup, teardown) as data:
for approach in approaches:
function = partial(approach, *data)
approach_time = min(timeit.Timer(function).repeat(repeat=number_of_repetitions, number=1))
approach_times[approach].append(approach_time)
for approach in approaches:
plt.plot(data_size, approach_times[approach], label=approach.__name__)
plt.yscale('log')
plt.xscale('log')
plt.xlabel(data_name)
plt.ylabel('Execution Time (seconds)')
plt.title(title)
plt.legend()
plt.show()
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With