Source code for vistock.stock_indices

"""
Functions for retrieving and managing stock market indices data.

This module provides functions for retrieving ticker symbols for various stock
market indices and identifying index names from their ticker symbols. It
supports querying index tickers from specified sources and obtaining the name
of an index based on its symbol.

Main Functions:
~~~~~~~~~~~~~~~
- get_tickers(source): Retrieve ticker symbols for a specified stock market
  index source.
- get_name(index_symbol): Retrieve the name of an index from its ticker symbol.
- ticker_from_name(name): Get the ticker symbol of an index from its long name.

Usage Examples:
~~~~~~~~~~~~~~~
::

    from stock_indices import get_tickers, get_name

    # Get tickers for the S&P 500 index
    spx_tickers = get_tickers('SPX')

    # Get tickers for the Philadelphia Semiconductor
    sox_tickers = get_tickers('SOX')

    # Get tickers for the SPX and the SOX
    tickers = get_tickers('SPX+SOX')

    # Get the name of an index from its symbol
    index_name = get_name('^NDX')
"""
__version__ = "2.8"
__author__ = "York <york.jong@gmail.com>"
__date__ = "2024/08/06 (initial version) ~ 2024/10/01 (last revision)"

__all__ = [
    'get_tickers',
    'get_name',
    'ticker_from_name',
]

import functools
from io import StringIO

import requests
import pandas as pd
from bs4 import BeautifulSoup
import yfinance as yf

from . import tw

#------------------------------------------------------------------------------
# Wikipedia look-up
#------------------------------------------------------------------------------

def table_from_wikipedia(article, class_, id):
    """
    Fetches a table from a Wikipedia article and returns it as a pandas
    DataFrame.

    Parameters
    ----------
    article: str
        The name of the Wikipedia article.
    class_: str
        The class attribute of the table to retrieve.
    id: str
        The id attribute of the table to retrieve.

    Returns
    -------
    pandas.DataFrame
        The retrieved table.
    """
    url = f"https://en.wikipedia.org/wiki/{article}"
    response = requests.get(url)
    response.raise_for_status()
    soup = BeautifulSoup(response.text, 'html.parser')
    attrs = {}
    if class_:
        attrs['class'] = class_
    if id:
        attrs['id'] = id
    tag = soup.find('table', attrs=attrs)
    return pd.read_html(StringIO(str(tag)))[0]


def symbols_from_wikipedia_table(article,
                                 class_='wikitable sortable',
                                 id='constituents'):
    """
    Extracts stock symbols from a table in a Wikipedia article.

    Parameters
    ----------
    article: str
        The name of the Wikipedia article.
    class_: str, optional
        The class attribute of the table. Defaults to 'wikitable sortable'.
    id: str, optional
        The id attribute of the table. Defaults to 'constituents'.

    Returns
    -------
    list
        A list of stock symbols.
    """
    df = table_from_wikipedia(article, class_, id)
    if 'Symbol' in df.columns:
        return df['Symbol'].tolist()
    elif 'Ticker' in df.columns:
        return df['Ticker'].tolist()
    return []


spx_tickers = functools.partial(
        symbols_from_wikipedia_table, 'List_of_S%26P_500_companies',
        class_=None)
djia_tickers = functools.partial(
        symbols_from_wikipedia_table, 'Dow_Jones_Industrial_Average')
ndx_tickers = functools.partial(
        symbols_from_wikipedia_table, 'Nasdaq-100')
rui_tickers = functools.partial(
        symbols_from_wikipedia_table, 'Russell_1000_Index', id=None)

#------------------------------------------------------------------------------
# bullishbears.com look-up
#------------------------------------------------------------------------------

def table_from_bullishbears(article):
    """
    Fetches the first table from a specified Bullish Bears article.

    Parameters
    ----------
    article: str
        The relative path of the article on the Bullish Bears website.

    Returns
    -------
    pd.DataFrame
        The first table found in the specified URL, parsed into a Pandas DataFrame.
    """
    url = f'https://bullishbears.com/{article}'
    return pd.read_html(url)[0]


def symbols_from_bullishbears_table(article):
    """
    Extracts stock symbols from a table in a Bullish Bears article.

    Parameters
    ----------
    article: str
        The relative path of the article on the Bullish Bears website.

    Returns
    -------
    list
        A list of stock symbols from the table's "Symbol" column.

    Examples
    --------
    >>> symbols = symbols_from_bullishbears_table('sp500-stocks-list')
    >>> len(symbols)
    502
    >>> symbols = symbols_from_bullishbears_table('dow-jones-stocks-list')
    >>> len(symbols)
    30
    >>> symbols = symbols_from_bullishbears_table('nasdaq-stocks-list')
    >>> len(symbols)
    100
    >>> symbols = symbols_from_bullishbears_table('russell-2000-stocks-list')
    >>> 'SMCI' in symbols
    True
    >>> len(symbols) > 1990
    True
    """
    df = table_from_bullishbears(article)
    symbols = list(df['Symbol'])
    return [s for s in symbols if isinstance(s, str)]


rut_tickers = functools.partial(
        symbols_from_bullishbears_table, 'russell-2000-stocks-list')


#------------------------------------------------------------------------------
# StatementDog.com look-up
#------------------------------------------------------------------------------

def us_listed_tickers():
    """
    Fetches all symbols of U.S. listed stocks from the StatementDog US stock
    list page and returns them as a list.

    This function mimics the Wilshire 5000 Index by using the StatementDog
    website's US stock list as a proxy for all listed U.S. stocks. The list
    includes stocks from both the New York Stock Exchange (NYSE) and the
    NASDAQ, and serves as a comprehensive dataset of publicly traded companies
    in the U.S.

    Returns
    -------
    list
        A list of stock symbols from the StatementDog US stock list.
        Returns an empty list if the request fails.

    Examples
    --------
    >>> tickers = us_listed_tickers()
    >>> len(tickers) > 5000
    True
    >>> 'AAPL' in tickers
    True
    """
    # URL of the target page
    url = 'https://statementdog.com/us-stock-list'

    # Request the web page content
    response = requests.get(url)

    # Ensure the request was successful
    if response.status_code != 200:
        print("Failed to retrieve the page.")
        return []

    # Parse the HTML using BeautifulSoup
    soup = BeautifulSoup(response.content, 'html.parser')

    # Find all <span> elements with the class 'us-stock-company-ticker'
    symbols = [span.text for span
               in soup.find_all('span', class_='us-stock-company-ticker')]

    return symbols


#------------------------------------------------------------------------------
# Manually keyed stock list
#------------------------------------------------------------------------------

def sox_tickers():
    """
    Get a list of tickers for companies in the SOX (PHLX Semiconductor).

    This function returns a manually maintained list of SOX tickers.

    Note
    ----
    This list may not be up-to-date and requires periodic updates.

    Returns
    -------
    list
        A list of SOX tickers.

    Examples
    --------
    >>> tickers = sox_tickers()
    >>> len(tickers) == 30
    True
    >>> 'NVDA' in tickers
    True
    >>> 'AVGO' in tickers
    True
    >>> 'TSM' in tickers
    True
    """
    tickers = [
        'AMD', 'ADI', 'AMAT', 'ASML', 'AZTA', 'AVGO', 'COHR', 'ENTG', 'GFS',
        'INTC', 'IPGP', 'KLAC', 'LRCX', 'LSCC', 'MRVL', 'MCHP', 'MU', 'MPWR',
        'NOVT', 'NVDA', 'NXPI', 'ON', 'QRVO', 'QCOM', 'SWKS', 'SYNA', 'TSM',
        'TER', 'TXN', 'WOLF'
    ]
    return tickers

#------------------------------------------------------------------------------
# Published Functions
#------------------------------------------------------------------------------


[docs]
def get_tickers(source):
    """
    Retrieve a list of tickers for the specified index or combined indices.

    Parameters
    ----------
    source: str
        The ticker symbol or common abbreviation for the index or indices.

        - Yahoo Finance ticker symbols (e.g., '^GSPC' for S&P 500, '^NDX'
          for NASDAQ-100).
        - Common abbreviations (e.g., 'SPX' for S&P 500, 'NDX' for
          NASDAQ-100).
        - Multiple indices can be combined using '+' (e.g., '^GSPC+^NDX').

        Possible values include:

        - '^GSPC', 'SPX': S&P 500
        - '^DJI', 'DJIA': Dow Jones Industrial Average
        - '^NDX', 'NDX': NASDAQ-100
        - '^RUI', 'RUI': Russell 1000
        - '^RUT', 'RUT': Russell 2000
        - '^SOX', 'SOX': PHLX Semiconductor
        - '^W5000', 'W5000': Wilshire 5000 Total Market Index
        - 'U.S. listed': U.S. listed stocks

    Returns
    -------
    list
        A list of tickers for the specified source.

    Raises
    ------
    KeyError
        If the provided source is not recognized or does not
        correspond to a known index.

    Examples
    --------
    >>> len(get_tickers('SPX')) >= 500
    True
    >>> len(get_tickers('^GSPC')) >= 500
    True
    >>> len(get_tickers('^NDX')) >= 100
    True
    >>> len(get_tickers('^RUI')) >= 1000
    True
    >>> len(get_tickers('^RUT')) > 1900
    True
    >>> len(get_tickers('^W5000')) > 5000
    True
    >>> len(get_tickers('U.S.Listed')) > 5000
    True
    >>> len(get_tickers('USLS')) > 5000
    True
    >>> 500 < len(get_tickers('^GSPC+^NDX')) < (500+100)
    True
    >>> 500 < len(get_tickers('SPX+SOX+NDX')) < (500+30+100)
    True
    >>> get_tickers('^UNKNOWN')
    Traceback (most recent call last):
        ...
    KeyError: "Index symbol '^UNKNOWN' not found."
    """
    dic = {
        '^GSPC': spx_tickers,
        '^DJI': djia_tickers,
        '^NDX': ndx_tickers,
        '^RUI': rui_tickers,
        '^RUT': rut_tickers,
        '^SOX': sox_tickers,
        '^W5000': us_listed_tickers,
        '^TWII': tw.get_twse_tickers,
        'SPX': spx_tickers,
        'DJIA': djia_tickers,
        'NDX': ndx_tickers,
        'SOX': sox_tickers,
        'RUI': rui_tickers,
        'RUT': rut_tickers,
        'R1000': rui_tickers,
        'R2000': rut_tickers,
        'W5000': us_listed_tickers,
        'U.S.LISTED': us_listed_tickers,
        'USLS': us_listed_tickers,
        'TWII': tw.get_twse_tickers,
        'TWSE': tw.get_twse_tickers,
        'TPEX': tw.get_tpex_tickers,
        'ESB': tw.get_esb_tickers,
    }

    sources = [s.strip().upper() for s in source.split('+')]
    tickers = set()

    for s in sources:
        if s in dic:
            tickers.update(dic[s]())
        else:
            raise KeyError(f"Index symbol '{s}' not found.")

    symbols = sorted(list(tickers))

    # Filter out symbols containing '.', such as SPAC units or preferred
    # stocks, which may not be supported by yfinance

    # Filter out symbols containing '.', except for Taiwan stocks (.TW, .TWO),
    # as others may not be supported by yfinance
    symbols = [symbol for symbol in symbols
               if '.' not in symbol or symbol.endswith(('.TW', '.TWO'))]

    return symbols




[docs]
def ticker_from_name(name):
    """
    Get ticker symbol of an index from its long name.

    Parameters
    ----------
    name: str
        The long name of a ticker.

    Returns
    -------
    str
        The ticker symbol.

    Examples
    --------
    >>> ticker_from_name('S&P 500')
    '^GSPC'
    >>> ticker_from_name('Dow Jones Industrial Average')
    '^DJI'
    >>> ticker_from_name('NASDAQ 100')
    '^NDX'
    >>> ticker_from_name('Russell 1000')
    '^RUI'
    >>> ticker_from_name('Wilshire 5000 Total Market Index')
    '^W5000'
    >>> ticker_from_name('Taiwan Weighted Index')
    '^TWII'
    """
    dic = {
        "S&P 500": "^GSPC",
        "Dow Jones Industrial Average": "^DJI",
        "NASDAQ 100": "^NDX",
        'NASDAQ Composite': "^IXIC",
        "Russell 1000": "^RUI",
        "Russell 2000": "^RUT",
        "PHLX Semiconductor": "^SOX",
        "Wilshire 5000 Total Market Index": "^W5000",
        "U.S. Listed Stocks": "USLS",
        "Taiwan Weighted Index": "^TWII",
        'Euro Stoxx 50': '^STOXX50E',       # Europe
        'FTSE 100': '^FTSE',                # London, UK
        'DAX': '^GDAXI',                    # Frankfurt, Germany
        'CAC 40': '^FCHI',                  # Paris, France
        'S&P/TSX Composite': '^GSPTSE',     # Canada
        'Nikkei 225': '^N225',              # Japan
        'Hang Seng Index': '^HSI',          # Hong Kong
    }
    return dic[name]




[docs]
def get_name(index_symbol):
    """
    Return the name of the index based on the provided symbol.

    Parameters
    ----------
    index_symbol: str
        The ticker symbol or common abbreviation for the index.

        - Yahoo Finance ticker symbols (e.g., '^GSPC' for S&P 500, '^NDX' for
          NASDAQ-100).
        - Common abbreviations (e.g., 'SPX' for S&P 500, 'NDX' for
          NASDAQ-100).

        Possible values include:

        - '^GSPC', 'SPX': S&P 500
        - '^DJI', 'DJIA': Dow Jones Industrial Average
        - '^NDX', 'NDX': NASDAQ 100
        - '^IXIC', 'COMP': NASDAQ Composite
        - '^RUI', 'RUI', 'R1000': Russell 1000
        - '^RUT', 'RUT', 'R2000': Russell 2000
        - '^SOX', 'SOX': PHLX Semiconductor
        - '^NYA', 'NYA': NYSE Composite
        - '^MID', 'MID': S&P MidCap 400
        - '^TWII', 'TWII': Taiwan Weighted Index
        - '^W5000', 'W5000': Wilshire 5000 Total Market Index
        - 'USLS': U.S. Listed Stocks
        - '^STOXX50E': Euro Stoxx 50
        - '^FTSE': FTSE 100
        - '^GDAXI': DAX
        - '^FCHI': CAC 40
        - '^GSPTSE': S&P/TSX Composite
        - '^N225': Nikkei 225
        - '^HSI': Hang Seng Index

    Returns
    -------
    str
        The name of the index if found. If not found, returns the original index_symbol.

    Examples
    --------
    >>> get_name('SPX')
    'S&P 500'
    >>> get_name('^GSPC')
    'S&P 500'
    >>> get_name('^DJI')
    'Dow Jones Industrial Average'
    >>> get_name('^IXIC')
    'NASDAQ Composite'
    >>> get_name('^NDX')
    'NASDAQ 100'
    >>> get_name('^RUI')
    'Russell 1000'
    >>> get_name('^RUT')
    'Russell 2000'
    >>> get_name('^SOX')
    'PHLX Semiconductor'
    >>> get_name('^W5000')
    'Wilshire 5000 Total Market Index'
    >>> get_name('^TWII')
    'Taiwan Weighted Index'
    >>> get_name('^HSI')
    'Hang Seng Index'
    >>> get_name('SOXX')
    'iShares Semiconductor ETF'
    >>> get_name('AAPL')
    'AAPL'
    """
    dic = {
        '^GSPC': 'S&P 500',
        '^DJI': 'Dow Jones Industrial Average',
        '^NDX': 'NASDAQ 100',
        '^IXIC': 'NASDAQ Composite',
        '^RUI': 'Russell 1000',
        '^RUT': 'Russell 2000',
        '^SOX': 'PHLX Semiconductor',
        '^W5000': 'Wilshire 5000 Total Market Index',
        '^NYA': 'NYSE Composite',
        '^MID': 'S&P MidCap 400',
        '^TWII': 'Taiwan Weighted Index',
        'SPX': 'S&P 500',
        'DJIA': 'Dow Jones Industrial Average',
        'NDX': 'NASDAQ 100',
        'COMP': 'NASDAQ Composite',
        'RUI': 'Russell 1000',
        'RUT': 'Russell 2000',
        'R1000': 'Russell 1000',
        'R2000': 'Russell 2000',
        'SOX': 'PHLX Semiconductor',
        'W5000': 'Wilshire 5000 Total Market Index',
        'USLS': 'U.S. Listed Stocks',
        'NYA': 'NYSE Composite',
        'MID': 'S&P MidCap 400',
        'TWII': 'Taiwan Weighted Index',
        'TWSE': 'Taiwan Stock Exchange',
        'TPEX': 'Taipei Exchange',
        'ESB': 'Emerging Stock Boar',
        '^STOXX50E': 'Euro Stoxx 50',       # Europe
        '^FTSE': 'FTSE 100',                # London, UK
        '^GDAXI': 'DAX',                    # Frankfurt, Germany
        '^FCHI': 'CAC 40',                  # Paris, France
        '^GSPTSE': 'S&P/TSX Composite',     # Canada
        '^N225': 'Nikkei 225',              # Japan
        '^HSI': 'Hang Seng Index',          # Hong Kong
    }
    if index_symbol in dic:
        return dic[index_symbol]
    try:
        if tw.is_chinese(index_symbol) or tw.is_taiwan_stock(index_symbol):
            return index_symbol
        if yf.Ticker(index_symbol).info['quoteType'] in ('ETF', 'INDEX'):
            return yf.Ticker(index_symbol).info['shortName']
    except:
        pass
    return index_symbol



#------------------------------------------------------------------------------
# Test
#------------------------------------------------------------------------------

if __name__ == '__main__':
    import doctest, time

    start_time = time.time()
    doctest.testmod()
    print(f"Execution time: {time.time() - start_time:.4f} seconds")