Streamlit Apps

Python SQL Streamlit Pandas

About the project

I started using Streamlit almost a year ago and since then any time I need to visualize data for either reports or displaying live data, I use Streamlit. It is so simple and since I have a background using Python, it makes visualization a piece of cake. Below you can find 3 projects I have worked on using Streamlit, eahc using a different source of data (Google Sheet, CSV file, and from a PG Database).

Staff Message Data

Version 1

My first project was using a csv file as input to display data from staff on a month to month basis. Initially the data was stored on a database and then a csv was generated to then use for the Streamlit app. Below you can find an example of the app with some data selected as well as the code.

import streamlit as st
from urllib.error import URLError
import pandas as pd
import altair as alt
from datetime import datetime
import hmac

def check_password():
    """Returns `True` if the user had the correct password."""

    def password_entered():
        """Checks whether a password entered by the user is correct."""
        if hmac.compare_digest(st.session_state["password"], st.secrets["password"]):
            st.session_state["password_correct"] = True
            del st.session_state["password"]  # Don't store the password.
        else:
            st.session_state["password_correct"] = False

    # Return True if the passward is validated.
    if st.session_state.get("password_correct", False):
        return True

    # Show input for password.
    st.text_input(
        "Password", type="password", on_change=password_entered, key="password"
    )
    if "password_correct" in st.session_state:
        st.error("😕 Password incorrect")
    return False


if not check_password():
    st.stop()  # Do not continue if check_password is not True.

@st.cache_data
def get_UN_data():
    df = pd.read_csv("full_staff_dataset.csv")
    df = df.drop('nameid', axis=1)
    return df

    print(df)

def remove_partial_elements(main_list, filter_list):
    filtered_list = []
    
    for item in main_list:
        if any(part in item for part in filter_list):
            filtered_list.append(item)
    
    return filtered_list

try:
    # Title and removing excess space
    st.set_page_config(layout="wide")
    st.title("Staff Data")
    st.markdown("""
        <style>
               .block-container {
                    padding-top: 1rem;
                    padding-bottom: 0rem;
                    padding-left: 5rem;
                    padding-right: 5rem;
                }
        </style>
        """, unsafe_allow_html=True)

    # Get DF
    df = get_UN_data()

    # Hardset Name column
    selected_col = "name"

    # Allow 2 dropdowns side by side
    dropdown_columns = st.columns(2)
    dropdown_columns2 = st.columns(2)

    # Select Rows
    with dropdown_columns[0]:
        staff = st.multiselect(
            "Choose Staff Member(s)", df['name']
        )
    #Filter Rows based on staff wanted
    filtered_df = df[df['name'].isin(staff)]

    # Select date(s) you want
    # Gets the current date
    date_options = []
    current_month = datetime.now().month
    current_year = datetime.now().year
    monthyear = ""
    if current_month > 9:
        monthyear = str(current_month) + "/" + str(current_year).replace("20", "")
        current_year = int(str(current_year).replace("20", ""))
    else:
        monthyear = "0" + str(current_month) + "/" + str(current_year).replace("20", "")
        current_month = int("0" + str(current_month))
        current_year = int(str(current_year).replace("20", ""))
    # Gets a list of available dates
    start_month = 11
    start_year = 22
    while True:
        date_options.append(str(start_month) + "/" + str(start_year))
        if start_month >= current_month and start_year >= current_year:
            break
        if start_month == 12:
            start_year = start_year + 1
            start_month = 1
        else:
            start_month = start_month + 1

    with dropdown_columns[1]:
        dates = st.multiselect(
            'What date(s) to filter?',
            date_options
        )

    # Select if you want messages, characters or words
    types_mapping = {'messages':'msg', 'words':'word', 'characters':'char'}
    with dropdown_columns2[0]:
        types = st.multiselect(
            'What type(s) to filter?',
            types_mapping.keys()
        )

    types_values = [types_mapping[label] for label in types]

    # Select what Discord category to filter
    category_mapping = {'Canada':'ca', 'General':'gen', 'Advice & Support':'gen2', 'Important':'imp', 'Sneaker Info':'snki', 'Releases':'snkr', 'Staff':'sta', 'Support':'sup'}
    with dropdown_columns2[1]:
        category = st.multiselect(
            'What Discord category(ies) would you like to filter?',
            category_mapping.keys()
        )
    category_values = [category_mapping[label] for label in category]

    columns = []
    first_row = df.columns.tolist()
    first_row.pop(0)

    print(first_row)

    columns_dates = remove_partial_elements(first_row, dates)
    print(columns_dates)
    columns_types = remove_partial_elements(columns_dates, types_values)
    columns = remove_partial_elements(columns_types, category_values)

    # First checks for no staff selected, then continues
    if not columns:
        st.error("Please select at least one option from each drop down.")
    else:
        if columns:

            #Remove if not asked for 
            if "gen2" not in category_values and "gen" in category_values:
                columns = [ x for x in columns if "gen2" not in x ]

            # Adding default column to the selected ones.
            sel = [selected_col] + columns
            st.write("List of staff and their data.", filtered_df[sel])
        else:
            st.write(filtered_df)

        sel = [selected_col] + columns
        data = filtered_df[sel]
        #data.set_index('name', inplace=True)
        #data = data.T

        altair_df = data.melt(id_vars='name', var_name='Item', value_name='Value')

        # , height=90, width={"step": 100}
        chart = alt.Chart(altair_df).mark_bar(size=10).encode(
            x=alt.X('name:N', title='Name'),
            y=alt.Y('sum(Value):Q', title='Total Msg/Word/Char'),
            color='name:N',
            column='Item:N'
        ).properties(
            width=alt.Step(30)
        )

        cols = st.columns([1, 1, 1, 1, 1, 1, 1, 1, 1])
        with cols[0]:
            st.altair_chart(chart)

except URLError as e:
    st.error(
        """
        **This demo requires internet access.**
        Connection error: %s
    """
        % e.reason
    )

Version 2

This version was for another group and they requested something similar to what I created. After some back and fourth we landed on just pulling the data from the db table so that the data could be viewed live rather than month to month. This version is in my opinion much better (especially with the graphs) and much more hands off than the first version. You can find an example and the code below.

import streamlit as st
import psycopg2
from urllib.error import URLError
import pandas as pd
import altair as alt
from datetime import datetime
import hmac

DBTable = st.secrets["DBTable"]
DBHost = st.secrets["DBHost"]
DBUsr = st.secrets["DBUsr"]
DBPass = st.secrets["DBPass"]
DBPort = st.secrets["DBPort"]

def check_password():
    """Returns `True` if the user had the correct password."""

    def password_entered():
        """Checks whether a password entered by the user is correct."""
        if hmac.compare_digest(st.session_state["password"], st.secrets["password"]):
            st.session_state["password_correct"] = True
            del st.session_state["password"]  # Don't store the password.
        else:
            st.session_state["password_correct"] = False

    # Return True if the passward is validated.
    if st.session_state.get("password_correct", False):
        return True

    # Show input for password.
    st.text_input(
        "Password", type="password", on_change=password_entered, key="password"
    )
    if "password_correct" in st.session_state:
        st.error("😕 Password incorrect")
    return False


if not check_password():
    st.stop()  # Do not continue if check_password is not True.

@st.cache_data
def get_UN_data():
    # Connect to the database
    conn = psycopg2.connect(
        dbname=DBTable,
        user=DBUsr,
        password=DBPass,
        host=DBHost,
        port=DBPort
    )

    cur = conn.cursor()
    cur.execute("SELECT * FROM staff_message_data")
    rows = cur.fetchall()
    # Get column names
    columns = [desc[0] for desc in cur.description]
    df = pd.DataFrame(rows, columns=columns)
    cur.close()
    conn.close()

    # Drop the first three columns
    df = df.drop(columns=['id', 'nameid'])
    df['name'] = df['name'].str.replace(r'#0$', '', regex=True)

    return df

def remove_partial_elements(main_list, filter_list):
    filtered_list = []
    
    for item in main_list:
        if any(part in item for part in filter_list):
            filtered_list.append(item)
    
    return filtered_list

def filter_columns(main_list, filter_value):
    return [item for item in main_list if filter_value in item]

try:
    # Title and removing excess space
    st.set_page_config(layout="wide")
    st.title("Staff Data")
    st.markdown("""
        <style>
               .block-container {
                    padding-top: 1rem;
                    padding-bottom: 1rem; /* Adjusted padding */
                    padding-left: 2rem;   /* Adjusted padding */
                    padding-right: 2rem;  /* Adjusted padding */
                }
        </style>
        """, unsafe_allow_html=True)

    # Get DF
    df = get_UN_data()

    # Get unique staff members
    unique_staff = df['name'].drop_duplicates()

    # Hardset columns
    name_col = "name"
    cat_col = "category_name"

    # Allow 2 dropdowns side by side
    dropdown_columns = st.columns(2)
    dropdown_columns2 = st.columns(2)

    # Get unique staff members and sort them alphabetically
    unique_staff_sorted = sorted(unique_staff.tolist())

    # Select Rows
    with dropdown_columns[0]:
        staff = st.multiselect(
            "Choose Staff Member(s)", unique_staff_sorted
        )

    # Filter Rows based on staff wanted
    filtered_df = df[df['name'].isin(staff)]

    # Select date(s) you want
    # Gets the current date
    date_options = []
    current_month = datetime.now().month
    current_month = 4
    current_year = datetime.now().year
    current_year = 24
    str_year = str(current_year).replace("20", "")
    current_year = int(str_year)
    # Gets a list of available dates
    start_month = 2
    start_year = 24
    while True:
        date_options.append(str(start_month) + "/" + str(start_year))
        if start_month >= current_month and start_year >= current_year:
            break
        if start_month == 12:
            start_year = start_year + 1
            start_month = 1
        else:
            start_month = start_month + 1

    with dropdown_columns[1]:
        dates = st.multiselect(
            'What date(s) to filter?',
            date_options
        )

    # Replace '/' with '_'
    dates = [date.replace('/', '_') for date in dates]

    # Select if you want messages, characters, or words
    types_mapping = {'messages': 'msg', 'words': 'word', 'characters': 'char'}
    with dropdown_columns2[0]:
        selected_type = st.selectbox(
            'What type to filter?',
            types_mapping.keys()
        )

    selected_type_value = types_mapping[selected_type]

    # Select what Discord category to filter
    category_mapping = {
        'Polar Chefs Welcome Tickets': 1078079796342444133,
        'Polar Chefs Support Tickets': 703068599107059742, 
        'Polar Chefs Cancel Tickets': 1084954405650038944,
        'Polar Plus General Tickets': 1140344840022921336,
        'Polar Chefs ACO Tickets': 989572256114049104,
        'Polar Chefs Announcements/Pings': 498203333123768320, 
        'Polar Chefs Drop Day': 1162481850669871134,
        'Polar Chefs Collectible Flips': 1158582720331513866,
        'Polar Chefs Streetwear': 1100828042831597588,
        'Polar Chefs Tickets Reselling': 1159295090972446740, 
        'Polar Chefs Community/Chat': 496470678476816404,
        'Polar Chefs Future Releases': 572914169372737567,
        'Polar Plus Announcements': 1075157727485120543,
        'Polar Plus Chat': 1150653565740781638, 
        'Polar Plus Lupes': 1143047473217142824,
        'Credit & Cashouts': 1127116011380932608, 
        'Polar Plus Trivia': 1089050877538615346
    }

    with dropdown_columns2[1]:
        selected_categories = st.multiselect(
            'What Discord category(ies) would you like to filter?',
            category_mapping.keys()
        )

    # Get the category IDs for the selected categories
    selected_category_ids = [category_mapping[category_name] for category_name in selected_categories]
    # Get only rows that are given in the selection
    filtered_df = filtered_df[filtered_df['cat_id'].isin(selected_category_ids)]
    
    # Convert 'cat_id' values to strings before mapping
    filtered_df['category_name'] = filtered_df['cat_id'].map({v: k for k, v in category_mapping.items()})
    # Drop the 'cat_id' column if you no longer need it
    filtered_df.drop(columns=['cat_id'], inplace=True)

    columns = []
    first_row = filtered_df.columns.tolist()
    first_row.pop(0)

    columns_dates = remove_partial_elements(first_row, dates)
    columns = filter_columns(columns_dates, selected_type_value)

    # Define a function to format the column names
    def format_column_name(col):
        if col.startswith(('msg_', 'word_', 'char_')):
            parts = col.split('_')
            return f"{parts[1]}/{parts[2]}"
        else:
            return col

    # First checks for no staff selected, then continues
    if not columns:
        st.error("Please select at least one option from each drop down.")
    else:
        sel = [name_col] + [cat_col] + columns

        # Make a copy of the filtered DataFrame
        filtered_df_filtered = filtered_df[sel].copy()

        # Rename the columns in the copied DataFrame
        filtered_df_filtered.rename(columns=format_column_name, inplace=True)

        if columns:
            # Sort the DataFrame based on the "name" column
            filtered_df_filtered.sort_values(by='name', inplace=True)

            # Adding default column to the selected ones.
            st.write("List of staff and their data.", filtered_df_filtered)
        else:
            st.write(filtered_df_filtered)

        data = filtered_df_filtered

        # Group data by name and category_name
        grouped_df = data.groupby(['name', 'category_name']).sum().reset_index()

        # Melt the DataFrame to long format
        value = "Total " + selected_type_value
        melted_df = pd.melt(grouped_df, id_vars=['name', 'category_name'], var_name='date', value_name=value)

        # Create line charts for each category
        charts = []
        for category_name in melted_df['category_name'].unique():
            category_data = melted_df[melted_df['category_name'] == category_name]
            line_chart = alt.Chart(category_data).mark_line().encode(
                x=alt.X('date', title='Date', axis=alt.Axis(labelAngle=0)),  # Set labelAngle to 0 for horizontal text
                y=value,
                color='name',
                tooltip=['name', 'date', value]
            )

            # Add dots to the data points
            point_chart = alt.Chart(category_data).mark_circle().encode(
                x=alt.X('date'),
                y=value,
                tooltip=['name', 'date', value]
            )

            # Combine line and point charts
            chart = (line_chart + point_chart).properties(
                title=category_name,
                width=1000,
                height=300 
            )
            charts.append(chart)

        # Display the charts
        st.write(alt.vconcat(*charts))      

except URLError as e:
    st.error(
        """
        **This demo requires internet access.**
        Connection error: %s
    """
        % e.reason
    )

Sports Betting W/L Tracker

This project utilized data that was on a Google Sheet. Betters update their bets daily/weekly and then the Streamlit app will pull the data from the sheet and display it. This method is sadly the slowest but is still fast. Below you can find an example and the code.

import streamlit as st
from urllib.error import URLError
import pandas as pd
import altair as alt
from datetime import datetime

sheets_to_fetch = ['caleb%20recap', 'yous%20recap', 'serbian%20recap', 'cryp%20esports']
names_to_fetch = ['Caleb', 'Yous', 'Serbian', 'Cryp Esports']

def get_UN_data(indices, truncate_index):
    # Define column names
    column_names = ['Name', 'Date', 'Play', 'Odds', 'Units Risked', 'Result', 'Units Won/Lost']

    combined_df = pd.DataFrame()

    selected_sheets = [sheets_to_fetch[idx] for idx in indices]
    selected_names = [names_to_fetch[idx] for idx in indices]

    for sheet_name, name in zip(selected_sheets, selected_names):
        # Read the data while skipping rows that contain the misnamed cells 
        spreadsheet = pd.read_csv(f'https://docs.google.com/spreadsheets/d/12sCC92_qOCOTEGDsst_elS7gT4I1ko2CmPBqg9Y6NEU/gviz/tq?tqx=out:csv&sheet={sheet_name}', header=None, skiprows=1)

        # Add person's name to the beginning of the DataFrame
        spreadsheet.insert(0, 'Name', name)

        # Truncate each row after the specified column index
        spreadsheet = spreadsheet.iloc[:, :truncate_index + 1]  # +2 to keep the columns up to the desired index

        # Assign column names
        spreadsheet.columns = column_names

        # Remove 'u' character from 'Units Won/Lost' column
        spreadsheet['Units Won/Lost'] = spreadsheet['Units Won/Lost'].astype(str).str.replace('u', '')

        # Convert 'Units Won/Lost' column to numeric
        spreadsheet['Units Won/Lost'] = pd.to_numeric(spreadsheet['Units Won/Lost'], errors='coerce')

        # Append modified spreadsheet to the combined DataFrame 
        combined_df = pd.concat([combined_df, spreadsheet], ignore_index=True)

    return combined_df

def filter_dataframe_by_column_value(dataframe, column_name, values_to_keep):
    # Convert NaN values to empty string or any other suitable placeholder
    dataframe[column_name].fillna('', inplace=True)

    return dataframe[dataframe[column_name].astype(str).str.contains('|'.join(values_to_keep))]

def main():
    try:
        # Title and removing excess space
        st.set_page_config(layout="wide")
        st.title("Notify Betting Recaps")
        st.markdown("""
            <style>
                   .block-container {
                        padding-top: 1rem;
                        padding-bottom: 0rem;
                        padding-left: 5rem;
                        padding-right: 5rem;
                    }
            </style>
            """, unsafe_allow_html=True)

        # Allow 2 dropdowns side by side
        dropdown_columns = st.columns(2)
        table_columns = st.columns(2)

        # Select Betting Staff
        with dropdown_columns[0]:
            staff = st.multiselect(
                "Choose Staff Member(s)", names_to_fetch
            )

        if not staff:
            st.error("Please select at least one staff memeber and at least 2 dates or All.")
        else:
            # Get the indices of selected items
            indices = [names_to_fetch.index(name) for name in staff]

            df = get_UN_data(indices, 6)

            # Select date(s) you want
            # Gets the current date
            date_options = ["All"]
            current_month = datetime.now().month
            current_year = datetime.now().year
            monthyear = ""
            if current_month > 9:
                monthyear = str(current_month) + "/" + str(current_year)
                current_year = int(str(current_year))
            else:
                monthyear = "0" + str(current_month) + "/" + str(current_year)
                current_month = int("0" + str(current_month))
                current_year = int(str(current_year))
            # Gets a list of available dates
            start_month = 3
            start_year = 2023
            while True:
                date_options.append(str(start_month) + "/" + str(start_year))
                if start_month >= current_month and start_year >= current_year:
                    break
                if start_month == 12:
                    start_year = start_year + 1
                    start_month = 1
                else:
                    start_month = start_month + 1

            with dropdown_columns[1]:
                dates = st.multiselect(
                    'What date(s) to filter? (XX/MM/YY)',
                    date_options
                )

            # Convert the 'Date' to datetime format
            df['Date'] = pd.to_datetime(df['Date'], format='%m/%d/%Y')

            # Reformat the date to 'dd/mm/yyyy' format as a string
            df['Date'] = df['Date'].dt.strftime('%d/%m/%Y')

            # Filter the DataFrame based on selected dates
            if not dates or (len(dates) < 2 and "All" not in dates):
                st.error("Please select at least 2 dates or All.")
            else:
                if "All" in dates:
                    filtered_df = df
                else:
                    filtered_df = filter_dataframe_by_column_value(df, 'Date', dates)
                
                with table_columns[0]:
                    st.write("List of Selected Staff and Dates.", filtered_df)

                # Convert the 'Date' column to datetime with month and year only
                filtered_df['Date'] = pd.to_datetime(filtered_df['Date'], format='%d/%m/%Y').dt.to_period('M')

                name_count = filtered_df.groupby('Name')['Units Won/Lost'].count().reset_index()
                name_count.columns = ['Name', 'Total Bets']

                # Group by 'Date' and 'Name', summing 'Profit_Loss'
                grouped = filtered_df.groupby(['Date', 'Name'])['Units Won/Lost'].sum().unstack(fill_value=0)
                
                # Reset index to access 'Date' and 'Name' as columns for plotting
                grouped = grouped.reset_index()

                # Streamlit app
                st.title('Month-to-month PnL (Units)')

                # Melt the DataFrame to long format for Altair
                melted = pd.melt(grouped, id_vars='Date', var_name='Name', value_name='Units W/L (Monthly)')

                # Calculate the sum of 'Units W/L' for each 'Name'
                name_total = melted.groupby('Name')['Units W/L (Monthly)'].sum().reset_index()
                name_total.columns = ["Name", "Units W/L"]

                name_summary = pd.merge(name_total, name_count, on='Name')

                with table_columns[1]:
                    st.write("Summary Over Selected Period", name_summary)

                # Convert 'Date' back to datetime format from period
                melted['Date'] = melted['Date'].dt.to_timestamp()

                # Create an Altair chart
                line = alt.Chart(melted).mark_line().encode(
                    x='Date:T',
                    y='Units W/L (Monthly):Q',
                    color='Name:N'
                ).properties(
                    width=800,
                    height=400
                )

                # Create an Altair chart
                circles = alt.Chart(melted).mark_circle().encode(
                    x='Date:T',
                    y='Units W/L (Monthly):Q',
                    color='Name:N'
                ).properties(
                    width=800,
                    height=400
                )

                chart = line + circles

                # Show the Altair chart in Streamlit
                st.altair_chart(chart, use_container_width=True)

    except URLError as e:
        st.error(
            """
            **This demo requires internet access.**
            Connection error: %s
        """
            % e.reason
        )

    except Exception as e:
        st.error("Failed to fetch data. Please check the Google Sheet URL.")
        st.error(e)

if __name__ == "__main__":
    main()