Numeric Computing

numpy is a powerful Python library that provides support for large, multi-dimensional arrays and matrices, along with a vast collection of high-level mathematical functions to operate on these arrays.

import numpy as np

# Define the original triangle vertices (in 2D)
triangle = np.array([[0, 0], [1, 0], [0.5, np.sqrt(3)/2]])

# Define a function to rotate the triangle using matrix multiplication
def rotate_triangle(triangle, angle_degrees):
    angle_radians = np.radians(angle_degrees)
    rotation_matrix = np.array([[np.cos(angle_radians), -np.sin(angle_radians)],
                                [np.sin(angle_radians), np.cos(angle_radians)]])
    # Apply the rotation matrix to each vertex of the triangle
    rotated_triangle = triangle @ rotation_matrix.T
    return rotated_triangle

# Rotate the triangle by 45 degrees
rotated_triangle = rotate_triangle(triangle, 45)

Scientific Computing

Scipy supports functions including calculus, optimisation, signal processing and statistics.

from scipy.optimize import minimize

# Define a simple quadratic function: f(x) = x^2 + 4x + 4, see the DataViz example above, the minimal value is x=-2, y=0
def f(x):
    return x**2 + 4*x + 4

# Optimize (minimize) the function, meaning to search for x so that f(x) is minimised
result = minimize(f, x0=0)  # 'x0=0' denotes search for x from x=x0=0
print(f"Minimum value at: x={result.x}, y={f(result.x)}")

Machine Learning

scikit-learn provides simple and efficient tools for machine learning, data mining, and data analysis.

from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures
import numpy as np

# Define the quadratic function
def f(x): return x**2 + 4*x + 4

# Generate 'x' values and 'y' values using the quadratic function
x = np.linspace(-10, 10, 400).reshape(-1, 1)
y = f(x)

# Reshape data to work properly with sklearn

# Create Polynomial Generator
poly = PolynomialFeatures(degree=2, include_bias=True)

X = poly.fit_transform(x)

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Linear regression
regressor = LinearRegression(fit_intercept=False)
regressor.fit(X_train, y_train) # keeping the line unchanged as per request
predictions = regressor.predict(poly.fit_transform(np.array([[-8], [-6], [-4], [-2], [0], [2], [4], [6], [8]])))

Statistics and Data Analytics

pandas provides a wide range of functionalities for handling, analyzing, and summarizing datasets. It is one of the most powerful libraries for data manipulation in Python, often used in data science and analytics workflows.

import numpy as np
import pandas as pd

# Creating a mock dataset for multimodal product sales (A, B, C, D)
np.random.seed(0)  # For reproducibility

# Generating multimodal data for each product
data = {
    'Product A': np.concatenate([np.random.normal(200, 30, 50), np.random.normal(300, 20, 50)]),  # Bimodal
    'Product B': np.random.normal(150, 20, 100),  # Unimodal
    'Product C': np.concatenate([np.random.normal(300, 50, 30), np.random.normal(400, 30, 30), np.random.normal(250, 20, 40)]),  # Trimodal
    'Product D': np.concatenate([np.random.normal(250, 40, 70), np.random.normal(180, 25, 30)])  # Bimodal
}

# Create a DataFrame
df = pd.DataFrame(data)

# Show descriptive statistics
statistics = df.describe()

print("The statistics of the data is ", statistics)

Deep Learning

pytorch for deep learning tasks. tensorflow is another approach but less popular in academic.

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

# Load datasets
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,)*3, (0.5,)*3)])
trainset = torchvision.datasets.CIFAR10('./data', True, transform, download=True)
trainloader = torch.utils.data.DataLoader(trainset, 32, True)
testset = torchvision.datasets.CIFAR10('./data', False, transform, download=True)
testloader = torch.utils.data.DataLoader(testset, 32, False)

# Define the neural network using Sequential
net = nn.Sequential(
    nn.Conv2d(3, 32, 3), nn.ReLU(), nn.MaxPool2d(2, 2),
    nn.Conv2d(32, 64, 3), nn.ReLU(), nn.MaxPool2d(2, 2),
    nn.Flatten(), nn.Linear(64 * 6 * 6, 512), nn.ReLU(), nn.Linear(512, 10)
)

# Setup optimizer and loss function
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

# Training loop
for epoch in range(10):
    for i, (inputs, labels) in enumerate(trainloader):
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if i % 200 == 199:
            print(f"[{epoch + 1}, {i + 1}] loss: {loss.item():.3f}")

# Evaluation
correct = sum((torch.max(net(images), 1)[1] == labels).sum().item() for images, labels in testloader)
print(f"Accuracy: {100 * correct / len(testset):.2f}%")

Data Storage

SQLAlchemy, redis-py, and pymongo are enables interacting with various storage solutions, from relational databases to NoSQL data stores and in-memory caches.

from sqlalchemy import create_engine, Column, Integer, String, Sequence
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker

Base = declarative_base()

# 1. Connect to SQLite and create a new db file named "example.db"
engine = create_engine('sqlite:///example.db')

# 2. Define a new table (a new class) named "User"
class User(Base):
    __tablename__ = 'users'
    id = Column(Integer, Sequence('user_id_seq'), primary_key=True)
    name = Column(String(50))
    age = Column(Integer)
    address = Column(String(100))

# Create the table in the database
Base.metadata.create_all(engine)

# 3. Insert some records
Session = sessionmaker(bind=engine)
session = Session()

# Adding single record
new_user = User(name='John Doe', age=28, address='1234 Elm Street')
session.add(new_user)

# Adding multiple records
users = [
    User(name='Jane Smith', age=25, address='5678 Oak Street'),
    User(name='Mike Johnson', age=31, address='9102 Pine Avenue')
]
session.add_all(users)

session.commit()

# 4. Query the records
for user in session.query(User).order_by(User.id):
    print(user.id, user.name, user.age, user.address)

session.close()

Data Communication

pyzmq is the Python binding for ZeroMQ, allowing developers to implement complex communication patterns with ease. The example below requires you to execute 2 Python files, e.g., from two different terminal.