Functions,
Modules, and Pythonic Patterns

45 min readnotebookPython Foundations for ML

3 of 24Python for Machine Learning

Functions, Modules, and Pythonic Patterns

Well-structured functions and modules are what separate quick experiments from production-ready ML code. This lesson covers every function feature you'll use day-to-day, plus Pythonic patterns that make your code cleaner and more efficient.

1. Defining Functions

python

def train_model(X, y, n_epochs=10, learning_rate=0.01):
    """Train a simple model and return the loss history.

    Parameters
    ----------
    X : array-like, shape (n_samples, n_features)
    y : array-like, shape (n_samples,)
    n_epochs : int, default=10
    learning_rate : float, default=0.01

    Returns
    -------
    list[float]
        Loss at each epoch.
    """
    losses = []
    for epoch in range(n_epochs):
        loss = compute_loss(X, y)      # placeholder
        losses.append(loss)
    return losses

Key points:

Use default arguments for optional parameters.
Always write a docstring — your future self will thank you.
Return explicit values; avoid mutating inputs in-place when possible.

2. *args and **kwargs

python

# *args collects extra positional arguments into a tuple
def log_metrics(*metrics):
    for name, value in metrics:
        print(f"  {name}: {value:.4f}")

log_metrics(("loss", 0.342), ("accuracy", 0.891))

# **kwargs collects extra keyword arguments into a dict
def create_model(model_class, **hyperparams):
    print(f"Creating {model_class.__name__} with {hyperparams}")
    return model_class(**hyperparams)

# Useful for wrapping sklearn estimators
from sklearn.ensemble import RandomForestClassifier
model = create_model(RandomForestClassifier, n_estimators=200, max_depth=10)

3. Lambda, map, and filter

lambda_demo.py Run

python

# Lambda — a one-line anonymous function
square = lambda x: x ** 2
print(square(5))              # 25

# map — apply a function to every element
raw_scores = ["0.82", "0.91", "0.76", "0.88"]
scores = list(map(float, raw_scores))
print(scores)                 # [0.82, 0.91, 0.76, 0.88]

# filter — keep elements where function returns True
passing = list(filter(lambda s: s >= 0.80, scores))
print(passing)                # [0.82, 0.91, 0.88]

# Sorting with a key function
models = [("RF", 0.91), ("SVM", 0.88), ("LR", 0.85)]
models.sort(key=lambda m: m[1], reverse=True)
print(models)  # [('RF', 0.91), ('SVM', 0.88), ('LR', 0.85)]

25
[0.82, 0.91, 0.76, 0.88]
[0.82, 0.91, 0.88]
[('RF', 0.91), ('SVM', 0.88), ('LR', 0.85)]

4. Type Hints (PEP 484)

python

from typing import Optional

def normalize(
    values: list[float],
    method: str = "minmax",
    clip: Optional[tuple[float, float]] = None,
) -> list[float]:
    """Normalize a list of floats."""
    if method == "minmax":
        lo, hi = min(values), max(values)
        result = [(v - lo) / (hi - lo) for v in values]
    elif method == "zscore":
        mu = sum(values) / len(values)
        sigma = (sum((v - mu) ** 2 for v in values) / len(values)) ** 0.5
        result = [(v - mu) / sigma for v in values]
    else:
        raise ValueError(f"Unknown method: {method}")

    if clip:
        lo, hi = clip
        result = [max(lo, min(hi, v)) for v in result]
    return result

5. Decorators

A decorator wraps a function to add behaviour without modifying its source code. You'll encounter them in Flask routes, PyTorch hooks, and timing utilities.

decorator_demo.py Run

python

import time
from functools import wraps

def timer(func):
    """Decorator that prints how long a function takes."""
    @wraps(func)
    def wrapper(*args, **kwargs):
        start = time.perf_counter()
        result = func(*args, **kwargs)
        elapsed = time.perf_counter() - start
        print(f"  {func.__name__} took {elapsed:.4f}s")
        return result
    return wrapper

@timer
def slow_sum(n):
    """Sum numbers the slow way."""
    total = 0
    for i in range(n):
        total += i
    return total

result = slow_sum(1_000_000)
print(f"Result: {result}")

  slow_sum took 0.0621s
Result: 499999500000

6. Generators and yield

Generators produce values lazily — one at a time — without storing the entire sequence in memory. This is essential when working with datasets that don't fit in RAM.

generator_pipeline.py Run

python

def read_batches(data, batch_size):
    """Yield successive batches from a list."""
    for i in range(0, len(data), batch_size):
        yield data[i : i + batch_size]

# Simulate a dataset of 10 samples
dataset = list(range(10))

for batch_num, batch in enumerate(read_batches(dataset, 3)):
    print(f"Batch {batch_num}: {batch}")

# Generator expression (like list comp, but lazy)
squares_gen = (x**2 for x in range(1_000_000))
print(f"\nType: {type(squares_gen)}")
print(f"First 5: {[next(squares_gen) for _ in range(5)]}")

Batch 0: [0, 1, 2]
Batch 1: [3, 4, 5]
Batch 2: [6, 7, 8]
Batch 3: [9]

Type: <class 'generator'>
First 5: [0, 1, 4, 9, 16]

7. Context Managers

python

# The "with" statement ensures resources are properly released
with open("results.csv", "w") as f:
    f.write("model,accuracy\n")
    f.write("RF,0.91\n")
# file is automatically closed here, even if an exception occurred

# You can write your own context manager
from contextlib import contextmanager

@contextmanager
def training_phase(model_name):
    print(f"[START] Training {model_name}")
    yield
    print(f"[END]   Training {model_name}")

with training_phase("RandomForest"):
    print("  ... fitting ...")
    print("  ... evaluating ...")

8. Modules and name == "main"

python

# utils.py
def preprocess(data):
    """Clean and transform raw data."""
    return [x.strip().lower() for x in data]

def main():
    sample = ["  Hello ", "WORLD  ", " Python "]
    print(preprocess(sample))

if __name__ == "__main__":
    # This block runs ONLY when the file is executed directly,
    # NOT when it's imported as a module.
    main()

python

# train.py — importing from utils
from utils import preprocess

raw = load_data()           # your data loading function
clean = preprocess(raw)

← Previous lessonPython Refresher: Data Types and Control Flow

Up next · File I/O and Working with Data Formats