From 0639ea9fb47c76ea5c8505714815d60b24b6c97d Mon Sep 17 00:00:00 2001
From: William Davies <williamthadiusdavies@gmail.com>
Date: Sun, 7 Jun 2026 21:26:45 -0500
Subject: [PATCH] Added Fama-French 3-factor and 5-factor expected return
 models

---
 README.md                      |   4 +
 docs/ExpectedReturns.rst       |  16 +-
 pypfopt/expected_returns.py    | 125 ++++++++++++++-
 tests/test_expected_returns.py | 274 +++++++++++++++++++++++++++++++++
 4 files changed, 417 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 6aaa632e..9405f790 100755
--- a/README.md
+++ b/README.md
@@ -211,6 +211,10 @@ A far more comprehensive version of this can be found on [ReadTheDocs](https://p
 - Capital Asset Pricing Model (CAPM):
   - a simple model to predict returns based on the beta to the market
   - this is used all over finance!
+- Fama-French 3-factor model:
+  - estimates expected returns using exposures to market, size, and value factors
+- Fama-French 5-factor model:
+  - extends FF3 with profitability and investment factors
 
 ### Risk models (covariance)
 
diff --git a/docs/ExpectedReturns.rst b/docs/ExpectedReturns.rst
index a1c7247c..2cbc6e54 100755
--- a/docs/ExpectedReturns.rst
+++ b/docs/ExpectedReturns.rst
@@ -6,7 +6,7 @@ Expected Returns
 
 Mean-variance optimization requires knowledge of the expected returns. In practice,
 these are rather difficult to know with any certainty. Thus the best we can do is to
-come up with estimates, for example by extrapolating historical data, This is the
+come up with estimates, for example by extrapolating historical data or factor models. This is the
 main flaw in mean-variance optimization – the optimization procedure is sound, and provides
 strong mathematical guarantees, *given the correct inputs*. This is one of the reasons
 why I have emphasised modularity: users should be able to come up with their own
@@ -51,6 +51,20 @@ superior models and feed them into the optimizer.
     .. autofunction:: returns_from_prices
 
     .. autofunction:: prices_from_returns
+    
+    .. autofunction:: ff_return
+
+        Estimates expected returns using the Fama-French factor models.
+
+        Set ``model="ff3"`` to use the three-factor specification with
+        market (Mkt-RF), size (SMB), and value (HML) factors.
+
+        Set ``model="ff5"`` to use the five-factor specification, which
+        additionally includes profitability (RMW) and investment (CMA)
+        factors.
+
+        Factor data must be supplied via the ``factor_data`` argument and
+        contain the required factor return columns indexed by date.
 
 
 .. References
diff --git a/pypfopt/expected_returns.py b/pypfopt/expected_returns.py
index bbbf69c6..2a739206 100644
--- a/pypfopt/expected_returns.py
+++ b/pypfopt/expected_returns.py
@@ -16,6 +16,7 @@
     - mean historical return
     - exponentially weighted mean historical return
     - CAPM estimate of returns
+    - Fama-French 3-factor and 5-factor estimates of returns
 
 Additionally, we provide utility functions to convert from returns to prices and vice-versa.
 """
@@ -105,6 +106,8 @@ def return_model(prices, method="mean_historical_return", **kwargs):
         - ``mean_historical_return``
         - ``ema_historical_return``
         - ``capm_return``
+        - ``ff3_return``
+        - ``ff5_return``
 
     Raises
     ------
@@ -114,7 +117,7 @@ def return_model(prices, method="mean_historical_return", **kwargs):
     Returns
     -------
     pd.DataFrame
-        annualised sample covariance matrix
+        annualised expected return estimate for each asset
     """
     if method == "mean_historical_return":
         return mean_historical_return(prices, **kwargs)
@@ -122,6 +125,10 @@ def return_model(prices, method="mean_historical_return", **kwargs):
         return ema_historical_return(prices, **kwargs)
     elif method == "capm_return":
         return capm_return(prices, **kwargs)
+    elif method == "ff3_return":
+        return ff_return(prices, model="ff3", **kwargs)
+    elif method == "ff5_return":
+        return ff_return(prices, model="ff5", **kwargs)
     else:
         raise NotImplementedError("Return model {} not implemented".format(method))
 
@@ -313,3 +320,119 @@ def capm_return(
 
     # CAPM formula
     return risk_free_rate + betas * (mkt_mean_ret - risk_free_rate)
+
+
+def ff_return(
+    prices,
+    factor_data,
+    returns_data=False,
+    model="ff3",
+    compounding=True,
+    frequency=252,
+    log_returns=False,
+):
+    """
+    Compute a return estimate using the Fama-French factor model.
+
+    Parameters
+    ----------
+    prices : pd.DataFrame
+        adjusted closing prices of the assets.
+    factor_data : pd.DataFrame
+        factor returns indexed by date.
+
+        Required columns for ff3:
+        - RF
+        - Mkt-RF
+        - SMB
+        - HML
+
+        Additional required columns for ff5:
+        - RMW
+        - CMA
+
+    returns_data : bool, optional
+        if true, prices is interpreted as returns.
+    model : str, optional
+        one of {"ff3", "ff5"}.
+    compounding : bool, optional
+        use geometric annualisation if True.
+    frequency : int, optional
+        periods per year.
+    log_returns : bool, optional
+        whether to compute log returns.
+
+    Returns
+    -------
+    pd.Series
+        annualised expected returns.
+    """
+
+    if not isinstance(prices, pd.DataFrame):
+        warnings.warn("prices are not in a dataframe", RuntimeWarning)
+        prices = pd.DataFrame(prices)
+
+    if not isinstance(factor_data, pd.DataFrame):
+        warnings.warn("factor_data is not in a dataframe", RuntimeWarning)
+        factor_data = pd.DataFrame(factor_data)
+
+    if model not in {"ff3", "ff5"}:
+        raise ValueError("model must be either 'ff3' or 'ff5'")
+
+    if returns_data:
+        returns = prices.copy()
+    else:
+        returns = returns_from_prices(prices, log_returns)
+
+    _check_returns(returns)
+
+    required = ["RF", "Mkt-RF", "SMB", "HML"]
+    if model == "ff5":
+        required.extend(["RMW", "CMA"])
+
+    missing = [c for c in required if c not in factor_data.columns]
+    if missing:
+        raise ValueError(f"factor_data missing required columns: {missing}")
+
+    common_index = returns.index.intersection(factor_data.index)
+    if len(common_index) == 0:
+        raise ValueError("No overlapping dates between asset returns and factor data")
+
+    returns = returns.loc[common_index]
+    factors = factor_data.loc[common_index, required].copy()
+
+    data = returns.join(factors, how="inner").dropna()
+    if data.empty:
+        raise ValueError("No valid rows after aligning returns and factor data")
+
+    returns = data[returns.columns]
+    factors = data[required]
+
+    excess_returns = returns.sub(factors["RF"], axis=0)
+
+    factor_cols = ["Mkt-RF", "SMB", "HML"]
+    if model == "ff5":
+        factor_cols.extend(["RMW", "CMA"])
+
+    X = np.column_stack([np.ones(len(factors)), factors[factor_cols].to_numpy()])
+    factor_means = factors[factor_cols].mean().to_numpy()
+
+    expected_returns = {}
+    rf_mean = factors["RF"].mean()
+
+    for asset in excess_returns.columns:
+        y = excess_returns[asset].to_numpy()
+        beta = np.linalg.lstsq(X, y, rcond=None)[0]
+        alpha = beta[0]
+        factor_loadings = beta[1:]
+
+        expected_period_return = rf_mean + alpha + factor_loadings @ factor_means
+
+        if compounding:
+            expected_return = (1 + expected_period_return) ** frequency - 1
+        else:
+            expected_return = expected_period_return * frequency
+
+        expected_returns[asset] = expected_return
+
+    return pd.Series(expected_returns, dtype="float64")
diff --git a/tests/test_expected_returns.py b/tests/test_expected_returns.py
index 4e16583b..d82112be 100644
--- a/tests/test_expected_returns.py
+++ b/tests/test_expected_returns.py
@@ -299,3 +299,277 @@ def test_log_return_passthrough():
         except AssertionError:
             return
         assert False
+
+
+def _make_ff_test_data(model="ff3", n_periods=120, n_assets=4, seed=42):
+    rng = np.random.default_rng(seed)
+    dates = pd.date_range("2020-01-01", periods=n_periods, freq="B")
+
+    factor_cols = ["Mkt-RF", "SMB", "HML"]
+    if model == "ff5":
+        factor_cols += ["RMW", "CMA"]
+
+    factors = pd.DataFrame(
+        {
+            "RF": rng.normal(0.0001, 0.00002, n_periods),
+            "Mkt-RF": rng.normal(0.0004, 0.008, n_periods),
+            "SMB": rng.normal(0.0002, 0.004, n_periods),
+            "HML": rng.normal(0.0001, 0.004, n_periods),
+        },
+        index=dates,
+    )
+
+    if model == "ff5":
+        factors["RMW"] = rng.normal(0.00015, 0.003, n_periods)
+        factors["CMA"] = rng.normal(0.0001, 0.003, n_periods)
+
+    betas = rng.normal(0.6, 0.15, size=(n_assets, len(factor_cols)))
+    alphas = rng.normal(0.0002, 0.00005, size=n_assets)
+
+    factor_matrix = factors[factor_cols].to_numpy()
+    excess_returns = alphas + factor_matrix @ betas.T
+    returns = excess_returns + factors["RF"].to_numpy()[:, None]
+
+    returns_df = pd.DataFrame(
+        returns, index=dates, columns=[f"Asset {i+1}" for i in range(n_assets)]
+    )
+    prices = expected_returns.prices_from_returns(returns_df)
+    prices.columns = returns_df.columns
+
+    return prices, returns_df, factors
+
+
+def test_ff3_return():
+    prices, _, factors = _make_ff_test_data(model="ff3")
+    mu = expected_returns.ff_return(prices, factors, model="ff3")
+
+    assert isinstance(mu, pd.Series)
+    assert list(mu.index) == list(prices.columns)
+    assert mu.notnull().all()
+    assert mu.dtype == "float64"
+
+
+def test_ff5_return():
+    prices, _, factors = _make_ff_test_data(model="ff5")
+    mu = expected_returns.ff_return(prices, factors, model="ff5")
+
+    assert isinstance(mu, pd.Series)
+    assert list(mu.index) == list(prices.columns)
+    assert mu.notnull().all()
+    assert mu.dtype == "float64"
+
+
+def test_ff_return_missing_columns():
+    prices, _, factors = _make_ff_test_data(model="ff3")
+    factors = factors.drop(columns=["HML"])
+
+    with pytest.raises(ValueError, match="missing required columns"):
+        expected_returns.ff_return(prices, factors, model="ff3")
+
+
+def test_ff_return_invalid_model():
+    prices, _, factors = _make_ff_test_data(model="ff3")
+
+    with pytest.raises(ValueError, match="model must be either"):
+        expected_returns.ff_return(prices, factors, model="ff4")
+
+
+def test_ff_return_no_overlap():
+    prices, _, factors = _make_ff_test_data(model="ff3")
+    factors.index = pd.date_range("2030-01-01", periods=len(factors), freq="B")
+
+    with pytest.raises(ValueError, match="No overlapping dates"):
+        expected_returns.ff_return(prices, factors, model="ff3")
+
+
+def test_return_model_ff3_return():
+    prices, _, factors = _make_ff_test_data(model="ff3")
+
+    mu1 = expected_returns.return_model(prices, method="ff3_return", factor_data=factors)
+    mu2 = expected_returns.ff_return(prices, factors)
+
+    pd.testing.assert_series_equal(mu1, mu2)
+
+
+def test_return_model_ff5_return():
+    prices, _, factors = _make_ff_test_data(model="ff5")
+
+    mu1 = expected_returns.return_model(prices, method="ff5_return", factor_data=factors)
+    mu2 = expected_returns.ff_return(prices, factors)
+
+    pd.testing.assert_series_equal(mu1, mu2)
+
+
+def _make_ff_known_data(model="ff3"):
+    rng = np.random.default_rng(7)
+    dates = pd.date_range("2021-01-01", periods=80, freq="B")
+
+    factors = pd.DataFrame(
+        {
+            "RF": rng.normal(0.0001, 0.00002, len(dates)),
+            "Mkt-RF": rng.normal(0.0004, 0.008, len(dates)),
+            "SMB": rng.normal(0.0002, 0.004, len(dates)),
+            "HML": rng.normal(0.0001, 0.004, len(dates)),
+        },
+        index=dates,
+    )
+
+    if model == "ff5":
+        factors["RMW"] = rng.normal(0.00015, 0.003, len(dates))
+        factors["CMA"] = rng.normal(0.00010, 0.003, len(dates))
+
+    factor_cols = ["Mkt-RF", "SMB", "HML"]
+    if model == "ff5":
+        factor_cols += ["RMW", "CMA"]
+
+    alphas = np.array([0.00020, 0.00010, -0.00005])
+    betas = np.array(
+        [
+            [1.10, 0.40, -0.20, 0.00, 0.00],
+            [0.80, -0.10, 0.35, 0.00, 0.00],
+            [1.25, 0.05, -0.15, 0.00, 0.00],
+        ]
+    )
+
+    if model == "ff3":
+        betas = betas[:, :3]
+
+    factor_matrix = factors[factor_cols].to_numpy()
+    returns = factors["RF"].to_numpy()[:, None] + alphas + factor_matrix @ betas.T
+
+    returns_df = pd.DataFrame(
+        returns, index=dates, columns=["Asset 1", "Asset 2", "Asset 3"]
+    )
+    return returns_df, factors, alphas, betas, factor_cols
+
+
+def test_ff5_return_missing_columns():
+    prices, _, factors = _make_ff_test_data(model="ff5")
+    factors = factors.drop(columns=["CMA"])
+
+    with pytest.raises(ValueError, match="missing required columns"):
+        expected_returns.ff_return(prices, factors, model="ff5")
+
+
+def test_ff_return_no_valid_rows_after_dropna():
+    prices, _, factors = _make_ff_test_data(model="ff3")
+    factors = factors.copy()
+    factors[["RF", "Mkt-RF", "SMB", "HML"]] = np.nan
+
+    with pytest.raises(ValueError, match="No valid rows after aligning"):
+        expected_returns.ff_return(prices, factors, model="ff3")
+
+
+def test_ff3_return_recovers_known_linear_model():
+    returns_df, factors, alphas, betas, factor_cols = _make_ff_known_data(model="ff3")
+
+    mu = expected_returns.ff_return(
+        returns_df,
+        factors,
+        returns_data=True,
+        model="ff3",
+        compounding=False,
+        frequency=1,
+    )
+
+    expected = (
+        factors["RF"].mean()
+        + alphas
+        + betas @ factors[factor_cols].mean().to_numpy()
+    )
+    expected = pd.Series(expected, index=returns_df.columns, dtype="float64")
+
+    pd.testing.assert_series_equal(mu, expected, check_exact=False, rtol=1e-10, atol=1e-12)
+
+
+def test_ff5_return_recovers_known_linear_model():
+    returns_df, factors, alphas, betas, factor_cols = _make_ff_known_data(model="ff5")
+
+    mu = expected_returns.ff_return(
+        returns_df,
+        factors,
+        returns_data=True,
+        model="ff5",
+        compounding=False,
+        frequency=1,
+    )
+
+    expected = (
+        factors["RF"].mean()
+        + alphas
+        + betas @ factors[factor_cols].mean().to_numpy()
+    )
+
+    expected = pd.Series(
+        expected,
+        index=returns_df.columns,
+        dtype="float64",
+    )
+
+    pd.testing.assert_series_equal(
+        mu,
+        expected,
+        check_exact=False,
+        rtol=1e-10,
+        atol=1e-12,
+    )
+
+
+def test_ff_return_returns_data():
+    prices, _, factors = _make_ff_test_data(model="ff3")
+
+    returns_df = expected_returns.returns_from_prices(prices)
+
+    mu_from_prices = expected_returns.ff_return(
+        prices,
+        factors,
+        model="ff3",
+    )
+
+    mu_from_returns = expected_returns.ff_return(
+        returns_df,
+        factors,
+        model="ff3",
+        returns_data=True,
+    )
+
+    pd.testing.assert_series_equal(mu_from_prices, mu_from_returns)
+
+
+def test_ff_return_compounding_branch():
+    returns_df, factors, alphas, betas, factor_cols = _make_ff_known_data(model="ff3")
+
+    mu = expected_returns.ff_return(
+        returns_df,
+        factors,
+        returns_data=True,
+        model="ff3",
+        compounding=True,
+        frequency=252,
+    )
+
+    expected_period_return = (
+        factors["RF"].mean()
+        + alphas
+        + betas @ factors[factor_cols].mean().to_numpy()
+    )
+    expected = pd.Series(
+        (1 + expected_period_return) ** 252 - 1,
+        index=returns_df.columns,
+        dtype="float64",
+    )
+
+    pd.testing.assert_series_equal(mu, expected, check_exact=False, rtol=1e-10, atol=1e-12)
+
+
+def test_ff_return_ignores_extra_factor_columns():
+    prices, _, factors = _make_ff_test_data(model="ff3")
+    factors = factors.copy()
+    factors["Unused"] = 123.456
+
+    mu_with_extra = expected_returns.ff_return(prices, factors, model="ff3")
+    mu_without_extra = expected_returns.ff_return(
+        prices, factors.drop(columns=["Unused"]), model="ff3"
+    )
+
+    pd.testing.assert_series_equal(mu_with_extra, mu_without_extra)