Functions,
Modules, and Pythonic Patterns
45 min readnotebookPython Foundations for ML
3 of 24Python for Machine Learning
Functions, Modules, and Pythonic Patterns
Well-structured functions and modules are what separate quick experiments from production-ready ML code. This lesson covers every function feature you'll use day-to-day, plus Pythonic patterns that make your code cleaner and more efficient.
1. Defining Functions
def train_model(X, y, n_epochs=10, learning_rate=0.01):
"""Train a simple model and return the loss history.
Parameters
----------
X : array-like, shape (n_samples, n_features)
y : array-like, shape (n_samples,)
n_epochs : int, default=10
learning_rate : float, default=0.01
Returns
-------
list[float]
Loss at each epoch.
"""
losses = []
for epoch in range(n_epochs):
loss = compute_loss(X, y) # placeholder
losses.append(loss)
return losses
Key points:
- Use default arguments for optional parameters.
- Always write a docstring — your future self will thank you.
- Return explicit values; avoid mutating inputs in-place when possible.
2. *args and **kwargs
# *args collects extra positional arguments into a tuple
def log_metrics(*metrics):
for name, value in metrics:
print(f" {name}: {value:.4f}")
log_metrics(("loss", 0.342), ("accuracy", 0.891))
# **kwargs collects extra keyword arguments into a dict
def create_model(model_class, **hyperparams):
print(f"Creating {model_class.__name__} with {hyperparams}")
return model_class(**hyperparams)
# Useful for wrapping sklearn estimators
from sklearn.ensemble import RandomForestClassifier
model = create_model(RandomForestClassifier, n_estimators=200, max_depth=10)
3. Lambda, map, and filter
lambda_demo.py
Run
# Lambda — a one-line anonymous function
square = lambda x: x ** 2
print(square(5)) # 25
# map — apply a function to every element
raw_scores = ["0.82", "0.91", "0.76", "0.88"]
scores = list(map(float, raw_scores))
print(scores) # [0.82, 0.91, 0.76, 0.88]
# filter — keep elements where function returns True
passing = list(filter(lambda s: s >= 0.80, scores))
print(passing) # [0.82, 0.91, 0.88]
# Sorting with a key function
models = [("RF", 0.91), ("SVM", 0.88), ("LR", 0.85)]
models.sort(key=lambda m: m[1], reverse=True)
print(models) # [('RF', 0.91), ('SVM', 0.88), ('LR', 0.85)]
25
[0.82, 0.91, 0.76, 0.88]
[0.82, 0.91, 0.88]
[('RF', 0.91), ('SVM', 0.88), ('LR', 0.85)]
4. Type Hints (PEP 484)
from typing import Optional
def normalize(
values: list[float],
method: str = "minmax",
clip: Optional[tuple[float, float]] = None,
) -> list[float]:
"""Normalize a list of floats."""
if method == "minmax":
lo, hi = min(values), max(values)
result = [(v - lo) / (hi - lo) for v in values]
elif method == "zscore":
mu = sum(values) / len(values)
sigma = (sum((v - mu) ** 2 for v in values) / len(values)) ** 0.5
result = [(v - mu) / sigma for v in values]
else:
raise ValueError(f"Unknown method: {method}")
if clip:
lo, hi = clip
result = [max(lo, min(hi, v)) for v in result]
return result
5. Decorators
A decorator wraps a function to add behaviour without modifying its source code. You'll encounter them in Flask routes, PyTorch hooks, and timing utilities.
decorator_demo.py
Run
import time
from functools import wraps
def timer(func):
"""Decorator that prints how long a function takes."""
@wraps(func)
def wrapper(*args, **kwargs):
start = time.perf_counter()
result = func(*args, **kwargs)
elapsed = time.perf_counter() - start
print(f" {func.__name__} took {elapsed:.4f}s")
return result
return wrapper
@timer
def slow_sum(n):
"""Sum numbers the slow way."""
total = 0
for i in range(n):
total += i
return total
result = slow_sum(1_000_000)
print(f"Result: {result}")
slow_sum took 0.0621s Result: 499999500000
6. Generators and yield
Generators produce values lazily — one at a time — without storing the entire sequence in memory. This is essential when working with datasets that don't fit in RAM.
generator_pipeline.py
Run
def read_batches(data, batch_size):
"""Yield successive batches from a list."""
for i in range(0, len(data), batch_size):
yield data[i : i + batch_size]
# Simulate a dataset of 10 samples
dataset = list(range(10))
for batch_num, batch in enumerate(read_batches(dataset, 3)):
print(f"Batch {batch_num}: {batch}")
# Generator expression (like list comp, but lazy)
squares_gen = (x**2 for x in range(1_000_000))
print(f"\nType: {type(squares_gen)}")
print(f"First 5: {[next(squares_gen) for _ in range(5)]}")
Batch 0: [0, 1, 2] Batch 1: [3, 4, 5] Batch 2: [6, 7, 8] Batch 3: [9] Type: <class 'generator'> First 5: [0, 1, 4, 9, 16]
7. Context Managers
# The "with" statement ensures resources are properly released
with open("results.csv", "w") as f:
f.write("model,accuracy\n")
f.write("RF,0.91\n")
# file is automatically closed here, even if an exception occurred
# You can write your own context manager
from contextlib import contextmanager
@contextmanager
def training_phase(model_name):
print(f"[START] Training {model_name}")
yield
print(f"[END] Training {model_name}")
with training_phase("RandomForest"):
print(" ... fitting ...")
print(" ... evaluating ...")
8. Modules and __name__ == "__main__"
# utils.py
def preprocess(data):
"""Clean and transform raw data."""
return [x.strip().lower() for x in data]
def main():
sample = [" Hello ", "WORLD ", " Python "]
print(preprocess(sample))
if __name__ == "__main__":
# This block runs ONLY when the file is executed directly,
# NOT when it's imported as a module.
main()
# train.py — importing from utils
from utils import preprocess
raw = load_data() # your data loading function
clean = preprocess(raw)
Up next · File I/O and Working with Data Formats