import plotly.offline
import plotly.express as px

plotly.offline.init_notebook_mode()

import pandas as pd

s2024 = pd.read_csv('./Post-2024_Survey2025-02-05_15_25_01.csv')
s2023 = pd.read_csv('./Post-2023_Survey2025-02-05_15_36_54.csv')
s2022 = pd.read_excel('./2022_eoy_survey_full.xlsx')
s2021 = pd.read_excel('./eoy-survey-2021 (2).xlsx')

df = pd.concat(
    {
        2021: s2021,
        2022: s2022,
        2023: s2023,
        2024: s2024,
    }
)

df.index.names = ('year', None)
df.columns

Index(['Submission Date', 'How long have you been in Python Discord?',
       'What do you like about Python Discord? What keeps you involved in the server?',
       'Is there anything you would change that would get you more involved in Python Discord?',
       'How long have you been programming in Python?',
       'How long have you been programming in general?',
       'How would you describe yourself?',
       'What other programming languages do you know/use?',
       'Which operating system(s) do you use when programming?',
       'What other tech communities do you participate in, if any?',
       'Was python your first exposure to programming?',
       'What do you use Python for?',
       'Have you used Python Discord's list of learning resources?',
       'What have been your primary resources for learning Python?',
       'How often do you seek help in this server?',
       'What timezone do you live in? (We are interested in your timezone, not your specific location.)',
       'How old are you?', 'What do you most closely identify as?',
       'Where do you primarily seek help?',
       'How satisfied were you with the help you've received on the server?',
       'How often do you help others in this server?',
       'Where do you primarily help others?',
       'What motivates you to help others in the server?',
       'Is there anything we can improve about giving or receiving help on our server?',
       'Which of these events have you participated in on the server?',
       'How was your experience with the Code Jam?',
       'How was your experience with the PyWeek Game Jam?',
       'How was your experience with Pixels?',
       'How was your experience with Advent of Code?',
       'How was your experience with the stage channels?',
       'How was your experience with the livestreams?',
       'What did you enjoy most about the events?',
       'What did you not enjoy about the events?',
       'Would you be interested in any of these events in the future?',
       'How often do you join one of our voice channels?',
       'Are any of these causing you to use our voice channels less than you want to?',
       'Is contributing to Python Discord's open source projects something you are interested in doing?',
       'What do you feel is blocking you from contributing to Python Discord's open source projects?',
       'Is there anything you would like to see added to Python Discord's open source projects?',
       'Do you have any other comments or feedback?',
       'Was Python your first exposure to programming?',
       'What months/time of year works best for you to participate in long events such as the code jam?',
       'How do you feel about how often our branding (logo/banner) changes?',
       'What are your preferred Python dependency management systems?',
       'What sort of projects do you generally seek help for in the server?',
       'How was your experience with Revival of Code?',
       'What do you wish you could change about the server?',
       'How was your experience with the Trivia Night?',
       'Where are you? Answer this question by clicking on your country within your timezone, or select from the dropdown menu. ',
       'What are you interested to learn more about?',
       'Which operating system(s) or platforms do you use when programming?',
       'Why do you ask for help on Python Discord instead of using AI tools like ChatGPT or Copilot?',
       'How helpful do you find Python Discord compared to AI tools like ChatGPT or Copilot?',
       'How do you use AI tools like ChatGPT or Copilot?',
       'Where are you? Answer this question by clicking on your country within your timezone, or select from the dropdown menu. (We are only interested in your country and timezone, not your precise location.)',
       'What were you looking for when you didn't find anything helpful on our resources page?',
       'What could we do to improve the resources page?',
       'Do you use any assistive technologies (such as screen readers or contrast settings) when you use Discord?',
       'What assistive technologies do you use?',
       'What are some navigation or engagement issues you've experienced in the community?',
       'How can we make engaging with the server more fluid?'],
      dtype='object')

df.groupby(level=0).size()

year
2021    1048
2022    1736
2023    1220
2024     517
dtype: int64

import plotly.graph_objects as go

fig = go.Figure()

for year in [2023, 2024]:
    time = df.loc[year, 'Submission Date'].pipe(pd.to_datetime)
    days_since = time.sub(time.min()).dt.total_seconds() / 86400
    fig.add_trace(go.Histogram(x=days_since, name=str(year)))

fig.update_layout(xaxis_range=[-.3, 14], title="Survey submissions per day", title_x=.5, xaxis_title="Days since survey was posted", yaxis_title="Number of responses that day")
fig

def year_normalize(s: pd.Series, **kwargs):
    return (
        s.rename('value')
        .groupby(level=0)
        .value_counts(normalize=True)
        .reset_index()
    )


def _make_histogram(d: pd.DataFrame, inverted=False, order=None, height=400):
    x, color = 'value', 'year'

    if inverted:
        x, color = color, x
    
    fig = px.histogram(d, x=x, y='proportion', color=color, barmode='group', color_discrete_map=dict(zip(map(str, range(2021, 2025)), ['#636EFA', '#EF553B', '#00CC96', '#AB63FA'])))   
    return fig


def _secret_make_histogram(d: pd.DataFrame, title, order=None, height=400, horizontal=False):
    year_order_1 = list(range(2021, 2025)) + (order or sorted(d['value'].unique()))
    d = d.sort_values(by=['year', 'value'], key=lambda s: s.apply(year_order_1.index))
    d['year'] = d['year'].astype(str)
    
    fig_group = _make_histogram(d, order=order, height=height)
    fig_year = _make_histogram(d, order=order, height=height, inverted=True)

    fig = go.Figure()
    fig.add_traces(fig_group.data)
    fig.add_traces(fig_year.data)

    fig.update_layout(yaxis_title=None, xaxis_title=None, title_text=title, height=height, title_x=.5)

    n_group = len(fig_group.data)
    n_year = len(fig_year.data)

    visibility = ([True, ] * n_group) + ([False, ] * n_year)
    inverse_visibility = [not x for x in visibility]

    for i, b in enumerate(visibility):
        fig.data[i].visible = b

    year_order_2 = [str(y) for y in range(2021, 2025)] + (order or [])
    fig.update_xaxes(categoryorder="array", categoryarray=year_order_2)

    if horizontal:
        visibility, inverse_visibility = inverse_visibility, visibility
        for h in fig.data:
            h.orientation = 'h'
            h.x, h.y = h.y, h.x
            if not h.visible:
                h.text = h.legendgroup

    fig.update_layout(
        updatemenus=[
            {
                'buttons': [
                    {
                        'label': 'Toggle Histogram',
                        'method': 'update',
                        'args': [
                            {'visible': visibility},
                            {'visible': inverse_visibility},
                        ],
                        'args2': [
                            {'visible': inverse_visibility},
                            {'visible': visibility},
                        ]
                    }
                ],
                'direction': 'up',
                'showactive': True,
                'type': 'buttons'
            }
        ][::-1]  # not me reversing the list retroactively
    )

    return fig


def make_histogram(d: pd.DataFrame, column: str, order=None, height=400):
    return _secret_make_histogram(year_normalize(d[column]), title=column, order=order, height=height)


make_histogram(df, 'How old are you?', order=['< 18', '18 - 20', '21 - 25', '26 - 30', '31 - 40', '41 - 50', '51+'])

make_histogram(df, 'What do you most closely identify as?')

make_histogram(df, 'How long have you been in Python Discord?', order=['< 1 month', '1 - 6 months', '6 - 12 months', '1 - 2 years', '2+ years'])

df['How long have you been programming in Python?'] = df['How long have you been programming in Python?'].str.replace("python", "Python")

make_histogram(
    df, 'How long have you been programming in Python?',
    order=[ 
        'I don\'t program in Python',
        '< 1 month',
        '1 - 6 months',
        '6 months - 2 years',
        '2 - 5 years',
        '5 - 10 years',
        '10+ years',
    ],
    height=600
)

make_histogram(
    df, 'How long have you been programming in general?',
    order=[
        'I don\'t program at all',
        '< 1 month',
        '1 - 6 months',
        '6 months - 2 years',
        '2 - 5 years',
        '5 - 10 years',
        '10+ years',
    ]
)

px.bar(
    df['Was Python your first exposure to programming?'].pipe(year_normalize),
    x='year', color='value', y='proportion'
)

make_horizontal_histogram(df, 'How would you describe yourself?')

import datetime as dt
import pytz


timezones_before_2023 = (
    df['What timezone do you live in? (We are interested in your timezone, not your specific location.)']
    .str.extract(r'([^\(]+)', expand=False).str.strip()
    .dropna()
    # .apply(lambda s: pytz.timezone(s))
    .groupby(level=0, axis=0)
    .value_counts()
    .unstack(level=0)
    .fillna(0)
)

timezones_before_2023.index.name = None


timezones_2023 = (
    s2023['Where are you? Answer this question by clicking on your country within your timezone, or select from the dropdown menu. ']
    .str.extract(r'(\w+/(?:\w+/)*\w+)', expand=False)
    .dropna()
    .apply(lambda s: pytz.timezone(s))
    .rename(2023)
)

timezones_2024 = (
    s2024['Where are you? Answer this question by clicking on your country within your timezone, or select from the dropdown menu. (We are only interested in your country and timezone, not your precise location.)']
    .str.extract(r'(\w+/(?:\w+/)*\w+)', expand=False)
    .dropna()
    .apply(lambda s: pytz.timezone(s))
    .rename(2024)
)

timezones = (
    timezones_before_2023
    .join(timezones_2023.astype(str).value_counts().rename(2023))
    .join(timezones_2024.astype(str).value_counts().rename(2024))
    .fillna(0)
    .astype(int)
)

def get_offset(tzname: str):
    return dt.datetime.now(pytz.timezone(tzname)).strftime('%z')
    
timezones['offset'] = timezones.index.to_series().apply(get_offset).astype(int)
timezones.groupby('offset').sum().plot.kde(figsize=(10, 6))

<Axes: ylabel='Density'>

age_time = s2024[['How old are you?']].join(timezones_2024.astype(str).rename('timezone'))
age_time['region'] = age_time['timezone'].str.extract(r"^([^/]+)/", expand=False)
age_by_region = (
    age_time.pivot_table(index='How old are you?', columns='region', aggfunc='count')
    .fillna(0).astype(int)
    .droplevel(axis=1, level=0)
    .reindex(
        columns=['America', 'Atlantic', 'Africa', 'Europe', 'Indian', 'Asia', 'Australia', 'Pacific'],
        index=['< 18', '18 - 20', '21 - 25', '26 - 30', '31 - 40', '41 - 50', '51+']
    )
)
age_by_region

def multiselect_preprocess(s: pd.Series, replacements=None, top_n=5):
    return (
        s.rename('value')
        .str.split('\n')
        .explode()
        .str.lower()
        .replace(replacements or {})
        .groupby(level=0)
        .apply(lambda x: x.value_counts(normalize=True).head(top_n))
        .fillna(0)
        .reset_index()
        .rename(columns={'value': 'proportion', 'level_1': 'value'})
    )


def make_horizontal_histogram(df, title: str, top_n=5, replacements: dict = None, height=1000):
    return _secret_make_histogram(multiselect_preprocess(df[title], replacements, top_n), title=title, height=height, horizontal=True)


make_horizontal_histogram(
    df, title='What have been your primary resources for learning Python?', 
    replacements={'finding answers to individual questions on sites such as stackoverflow': 'sites like SO'},
)

make_horizontal_histogram(df, 'What other programming languages do you know/use?')

make_horizontal_histogram(df, 'What sort of projects do you generally seek help for in the server?', top_n=3, height=400)

where_get_help = (
    df['Where do you primarily seek help?']
    .str.split('\n')
    .explode()
    .dropna()
    .groupby(axis=0, level=0)
    .value_counts(normalize=True)
    .unstack(level=0)
)

where_get_help.loc['#python-discussion', [2021, 2022]] = where_get_help.loc['#python-general']
where_get_help.loc['Help channels/help forum', [2021, 2022]] = where_get_help.loc['Help channels']

d = (
    where_get_help
    .drop(index=[
        'Discussion channels (i.e. advanced discussion and career discussion)',
        '#python-general',
        'Help channels',
    ])
    .fillna(0)
    .reset_index()
    .melt(id_vars=["Where do you primarily seek help?"], var_name="Year", value_name="Proportion")
)


fig = px.bar(
    d,
    x="Proportion",
    y="Where do you primarily seek help?",
    color="Year",
    orientation='h',
    title="Where do you primarily seek help?",
    barmode='group',
    height=1000,
)

# Show figure
fig.show()

make_histogram(df, 'How satisfied were you with the help you\'ve received on the server?')

make_horizontal_histogram(df, 'What are your preferred Python dependency management systems?', top_n=8)

make_horizontal_histogram(df, 'Which operating system(s) or platforms do you use when programming?', top_n=8)

s2024['Have you used Python Discord\'s list of learning resources?']\
    .value_counts().plot.pie(title='Have you used Python Discord\'s list of learning resources?')

<Axes: title={'center': "Have you used Python Discord's list of learning resources?"}, ylabel='count'>

(
    s2024['How do you feel about how often our branding (logo/banner) changes?']
    .str.lower()
    .str.strip()
    .value_counts()
    .head(3)
    .plot.pie(title='How do you feel about how often our branding (logo/banner) changes?')
)

<Axes: title={'center': 'How do you feel about how often our branding (logo/banner) changes?'}, ylabel='count'>

import numpy as np
from PIL import Image
from pathlib import Path
from wordcloud import WordCloud as WC, ImageColorGenerator, STOPWORDS
from string import ascii_lowercase
import unicodedata

GOOD_CHARS = ascii_lowercase + " '-.:/\\"


def text_filter(text):
    text = unicodedata.normalize("NFKD", text).lower()
    words = [''.join(c for c in word if c in GOOD_CHARS).strip() for word in text.split()]
    return ' '.join(word for word in words if len(word) > 1)


def generate(image_path: Path, text_series: 'pd.Series[str]', scale=1):
    mask = np.asarray(Image.open(image_path))
    color_gen = ImageColorGenerator(mask)

    text: str = text_filter(text_series.str.cat(sep=' '))

    wc = WC(
        prefer_horizontal=.95,
        color_func=color_gen,
        background_color="#36393F",
        mask=mask,
        scale=scale,
        stopwords=STOPWORDS | set("nope really idk nothing dont cant none".split()),
    ).generate(text)

    return wc.to_image()


generate(
    Path('./images/logo_code.png'),
    s2024['What do you like about Python Discord? What keeps you involved in the server?'].dropna(),
    scale=.5
)

compared_to_chatpgt = s2024['How helpful do you find Python Discord compared to AI tools like ChatGPT or Copilot?'].dropna()

generate(
    Path('./images/ducky_lemon.JPG'),
    compared_to_chatpgt[~compared_to_chatpgt.str.startswith('I don\'t use AI')]
)

generate(
    Path('./images/lemoji_wink.JPG'),
    s2023['What do you wish you could change about the server?']
)

make_horizontal_histogram(df, 'What are you interested to learn more about?', top_n=10)

region	America	Atlantic	Africa	Europe	Indian	Asia	Australia	Pacific
How old are you?
< 18	37	0	4	63	1	40	4	1
18 - 20	29	0	1	45	1	24	3	2
21 - 25	15	0	5	37	0	19	2	0
26 - 30	16	0	1	18	1	6	0	0
31 - 40	31	0	0	22	0	4	3	0
41 - 50	20	1	0	11	0	1	0	1
51+	12	0	1	4	0	0	0	0

Python Discord 2024 Survey Report¶