Iterator vs Generator¶
Generator is a special case of Iterator.
Generator is easy and convenient to use but at additional cost (memory and speed).
If you need performance, use plain iterator (with the help of the itertools
module).
If you need convenience and concise code, use generator.
Please refer to Python Generator vs Iterator for more detailed discussions.
When Not to Use Generator¶
You need to access the data multiple times (i.e. cache the results instead of recomputing them).
for i in outer: # used once, okay to be a generator or return a list for j in inner: # used multiple times, reusing a list is better ...
You need random access (or any access other than forward sequential order).
for i in reversed(data): ... # generators aren't reversible s[i], s[j] = s[j], s[i] # generators aren't indexable
You need to join strings (which requires two passes over the data).
s = ''.join(data) # lists are faster than generators in this use case
You are using PyPy which sometimes can't optimize generator code as much as it can with normal function calls and list manipulations.
import itertools as it
builtins.enumerate¶
words = ["how", "are", "you"]
for idx, word in enumerate(words):
print(f"{idx}: {word}")
words = ["how", "are", "you"]
for idx, word in enumerate(words, 10):
print(f"{idx}: {word}")
builtins.all¶
all([True, True, True, True])
all([True, False, True, True])
builtins.any¶
any([True, True, True, True])
any([True, False, True, True])
any([False, False, False, False])
builtins.max¶
max([1, 2, 3], default=0)
max([])
max([], default=0)
itertools.accumulate¶
import operator
list(it.accumulate([1, 2, 3, 4], operator.mul))
import operator
list(it.accumulate([1, 2, 3, 4], operator.add))
count (range)¶
Notice that count
does NOT count the number of elements in an iterator,
but instead generate an iterator of arithematic sequence of integers.
x = it.count(0, 1)
list(it.takewhile(lambda x: x < 10, x))
cycle¶
x = it.cycle("abcd")
list(it.islice(x, 0, 10))
itertools.chain¶
You can use itertools.chain
to combine/join multiple iterable objects.
x = it.chain("abcd", "efgh", "ijkl")
list(x)
it.chain((1, 2), [3, 4])
it.chain(*[[1, 2], [3, 4]])
list(it.chain(*[[1, 2], [3, 4]]))
it.chain.from_iterable([[1, 2], [3, 4]])
list(it.chain.from_iterable([[1, 2], [3, 4]]))
compress¶
x = it.compress([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
list(x)
dropwhile¶
x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
list(it.dropwhile(lambda i: i < 5, x))
x = [0, 1, 2, 3, 4, 5, 4, 3, 2, 1]
list(it.dropwhile(lambda i: i < 5, x))
x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
list(it.dropwhile(lambda i: i % 2 == 0, x))
combinations¶
x = [0, 1, 2, 3, 4]
list(it.combinations(x, 3))
combinations_with_replacement¶
x = [0, 1, 2, 3]
list(it.combinations_with_replacement(x, 3))
groupby¶
itertools.groupby
is a convenient way to group elements into groups of elements.
Its return type is Iterable[Tuple[KeyType, Iterable[ValueType]]]
.
Notice that itertools.groupby
iterate through the original collection only once,
so you must consume the result of itertools.groupby
all at one time (convert all iterators at one time).
x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
it.groupby(x, lambda e: e // 3)
The following way of consuming the resutl of itertools.groupby
is not right
as it does not convert all iterators at one time.
x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
groups = it.groupby(x, lambda e: e // 3)
groups = list(groups)
groups
groups[0]
Got no values for the key 0
as the iterator as already been consumed
the first time it.groupby(x, lambda e: e // 3)
is converted to a list.
list(groups[0][1])
Below is the right way to convert the result of itertools.groupby
to a list of tuples.
x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
groups = it.groupby(x, lambda e: e // 3)
[(key, list(val)) for key, val in groups]
Below is the right way to convert the result of itertools.groupby
to a dict.
x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
groups = it.groupby(x, lambda e: e // 3)
{key: list(val) for key, val in groups}
Below is a the right way to extract groups of values (without keys).
x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
groups = it.groupby(x, lambda e: e // 3)
[list(val) for _, val in groups]
Notice that you can also use itertools.zip_longest
to group values into equal size of groups
according to the iteration order of elements.
x = "abcdefghij"
myit = it.zip_longest(*([iter(x)] * 3))
[list(val) for val in myit]
filter¶
x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
list(filter(lambda e: e % 2 == 0, x))
filterfalse¶
x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
list(it.filterfalse(lambda e: e % 2 == 0, x))
from_iterable¶
list(it.chain.from_iterable(["abcd", "efgh", "ijkl"]))
islice¶
list(it.islice("abcd", 0, 2))
list(it.islice("abcd", 2, 2))
list(it.islice("abcd", 2, 10))
next¶
next
(when used with a conditon) is equivalent to Seq.find
in Scala.
A default value can be passed to next
.
def fibonacci(x1, x2):
while True:
yield x1
x1, x2 = x2, x1 + x2
fiter = fibonacci(1, 1)
next(fiter)
fiter = fibonacci(1, 1)
next(f for f in fiter if f > 100)
fiter = fibonacci(1, 1)
next((f for f in fiter if f > 100), 100)
slice¶
slice(10)
x = [1, 2, 3, 4]
x[0:2]
x = [1, 2, 3, 4]
x[slice(2)]
map¶
x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
list(map(lambda e: e * e, x))
permutations¶
x = [0, 1, 2, 3, 4]
list(it.permutations(x, 2))
product¶
list(it.product("abcd", "1234"))
repeat¶
x = it.repeat("abcd")
list(it.islice(x, 0, 10))
starmap¶
x = [[0, 1], [2, 3], [4, 5]]
list(it.starmap(pow, x))
takewhile¶
x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
list(it.takewhile(lambda i: i < 5, x))
x = [0, 1, 2, 3, 4, 5, 4, 3, 2, 1]
list(it.takewhile(lambda i: i < 5, x))
x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
list(it.takewhile(lambda i: i % 2 == 0, x))
tee¶
it1, it2 = it.tee("abcd")
print(list(it1))
print(list(it2))
zip_longest¶
list(it.zip_longest("abc", [1, 2, 3, 4, 5]))
Empty Iterator or Not¶
it = [1, 2, 3]
not any(True for _ in it)
it = []
not any(True for _ in it)
Iterator Receipes¶
def take(n, iterable):
"Return first n items of the iterable as a list"
return list(islice(iterable, n))
def tabulate(function, start=0):
"Return function(0), function(1), ..."
return map(function, count(start))
def tail(n, iterable):
"Return an iterator over the last n items"
# tail(3, 'ABCDEFG') --> E F G
return iter(collections.deque(iterable, maxlen=n))
def consume(iterator, n):
"Advance the iterator n-steps ahead. If n is none, consume entirely."
# Use functions that consume iterators at C speed.
if n is None:
# feed the entire iterator into a zero-length deque
collections.deque(iterator, maxlen=0)
else:
# advance to the empty slice starting at position n
next(islice(iterator, n, n), None)
def nth(iterable, n, default=None):
"Returns the nth item or a default value"
return next(islice(iterable, n, None), default)
def all_equal(iterable):
"Returns True if all the elements are equal to each other"
g = groupby(iterable)
return next(g, True) and not next(g, False)
def quantify(iterable, pred=bool):
"Count how many times the predicate is true"
return sum(map(pred, iterable))
def padnone(iterable):
"""Returns the sequence elements and then returns None indefinitely.
Useful for emulating the behavior of the built-in map() function.
"""
return chain(iterable, repeat(None))
def ncycles(iterable, n):
"Returns the sequence elements n times"
return chain.from_iterable(repeat(tuple(iterable), n))
def dotproduct(vec1, vec2):
return sum(map(operator.mul, vec1, vec2))
def flatten(listOfLists):
"Flatten one level of nesting"
return chain.from_iterable(listOfLists)
def repeatfunc(func, times=None, *args):
"""Repeat calls to func with specified arguments.
Example: repeatfunc(random.random)
"""
if times is None:
return starmap(func, repeat(args))
return starmap(func, repeat(args, times))
def pairwise(iterable):
"s -> (s0,s1), (s1,s2), (s2, s3), ..."
a, b = tee(iterable)
next(b, None)
return zip(a, b)
def grouper(iterable, n, fillvalue=None):
"Collect data into fixed-length chunks or blocks"
# grouper('ABCDEFG', 3, 'x') --> ABC DEF Gxx"
args = [iter(iterable)] * n
return zip_longest(*args, fillvalue=fillvalue)
def roundrobin(*iterables):
"roundrobin('ABC', 'D', 'EF') --> A D E B F C"
# Recipe credited to George Sakkis
pending = len(iterables)
nexts = cycle(iter(it).__next__ for it in iterables)
while pending:
try:
for next in nexts:
yield next()
except StopIteration:
pending -= 1
nexts = cycle(islice(nexts, pending))
def partition(pred, iterable):
"Use a predicate to partition entries into false entries and true entries"
# partition(is_odd, range(10)) --> 0 2 4 6 8 and 1 3 5 7 9
t1, t2 = tee(iterable)
return filterfalse(pred, t1), filter(pred, t2)
def powerset(iterable):
"powerset([1,2,3]) --> () (1,) (2,) (3,) (1,2) (1,3) (2,3) (1,2,3)"
s = list(iterable)
return chain.from_iterable(combinations(s, r) for r in range(len(s) + 1))
def unique_everseen(iterable, key=None):
"List unique elements, preserving order. Remember all elements ever seen."
# unique_everseen('AAAABBBCCDAABBB') --> A B C D
# unique_everseen('ABBCcAD', str.lower) --> A B C D
seen = set()
seen_add = seen.add
if key is None:
for element in filterfalse(seen.__contains__, iterable):
seen_add(element)
yield element
else:
for element in iterable:
k = key(element)
if k not in seen:
seen_add(k)
yield element
def unique_justseen(iterable, key=None):
"List unique elements, preserving order. Remember only the element just seen."
# unique_justseen('AAAABBBCCDAABBB') --> A B C D A B
# unique_justseen('ABBCcAD', str.lower) --> A B C A D
return map(next, map(itemgetter(1), groupby(iterable, key)))
def iter_except(func, exception, first=None):
"""Call a function repeatedly until an exception is raised.
Converts a call-until-exception interface to an iterator interface.
Like builtins.iter(func, sentinel) but uses an exception instead
of a sentinel to end the loop.
Examples:
iter_except(functools.partial(heappop, h), IndexError) # priority queue iterator
iter_except(d.popitem, KeyError) # non-blocking dict iterator
iter_except(d.popleft, IndexError) # non-blocking deque iterator
iter_except(q.get_nowait, Queue.Empty) # loop over a producer Queue
iter_except(s.pop, KeyError) # non-blocking set iterator
"""
try:
if first is not None:
yield first() # For database APIs needing an initial cast to db.first()
while True:
yield func()
except exception:
pass
def first_true(iterable, default=False, pred=None):
"""Returns the first true value in the iterable.
If no true value is found, returns *default*
If *pred* is not None, returns the first item
for which pred(item) is true.
"""
# first_true([a,b,c], x) --> a or b or c or x
# first_true([a,b], x, f) --> a if f(a) else b if f(b) else x
return next(filter(pred, iterable), default)
def random_product(*args, repeat=1):
"Random selection from itertools.product(*args, **kwds)"
pools = [tuple(pool) for pool in args] * repeat
return tuple(random.choice(pool) for pool in pools)
def random_permutation(iterable, r=None):
"Random selection from itertools.permutations(iterable, r)"
pool = tuple(iterable)
r = len(pool) if r is None else r
return tuple(random.sample(pool, r))
def random_combination(iterable, r):
"Random selection from itertools.combinations(iterable, r)"
pool = tuple(iterable)
n = len(pool)
indices = sorted(random.sample(range(n), r))
return tuple(pool[i] for i in indices)
def random_combination_with_replacement(iterable, r):
"Random selection from itertools.combinations_with_replacement(iterable, r)"
pool = tuple(iterable)
n = len(pool)
indices = sorted(random.randrange(n) for i in range(r))
return tuple(pool[i] for i in indices)