(pl)(census) update python script for 2024 questions

This commit is contained in:
Andrea Vos 2024-02-25 13:31:24 +00:00
parent ef69fd23f4
commit 68bdf2a41f

View File

@ -183,7 +183,14 @@ def analyse(group: str, df: pd.DataFrame, echo: bool = False, diffs: List[str] =
'namesAggr': extractQuestion(df, 16, includeAnswers=False, includeAggregates=True), 'namesAggr': extractQuestion(df, 16, includeAnswers=False, includeAggregates=True),
'english': extractQuestion(df, 17, includeAggregates=True), 'english': extractQuestion(df, 17, includeAggregates=True),
'labelsGender': extractQuestion(df, 18, includeAggregates=True, removeUnderscores=False), 'labelsGender': extractQuestion(df, 18, includeAggregates=True, removeUnderscores=False),
'labelsSexuality': extractQuestion(df, 19, includeAggregates=True, removeUnderscores=False), 'labelsSexuality': extractQuestion(df, 20, includeAggregates=True, removeUnderscores=False),
'labelsRomantic': extractQuestion(df, 21, includeAggregates=True, removeUnderscores=False),
'transitionSocial': extractQuestionSingle(df, 23),
'transitionName': extractQuestionSingle(df, 24),
'transitionMarker': extractQuestionSingle(df, 25),
'transitionPhysical': extractQuestionSingle(df, 26),
'transitionHormonal': extractQuestionSingle(df, 27),
'transitionMedical': extractQuestionSingle(df, 28),
} }
stats_json = { stats_json = {
@ -194,7 +201,10 @@ def analyse(group: str, df: pd.DataFrame, echo: bool = False, diffs: List[str] =
stats_json['diff'] = {} stats_json['diff'] = {}
for prev_year in (diffs or []): for prev_year in (diffs or []):
with open(outputDir.parent / prev_year / group / 'stats.json', 'r') as f: file_path = outputDir.parent / prev_year / group / 'stats.json'
if not file_path.exists():
continue
with open(file_path, 'r') as f:
prev_stats = json.load(f) prev_stats = json.load(f)
stats_json['diff'][prev_year] = {} stats_json['diff'][prev_year] = {}
for k, v in stats_json.items(): for k, v in stats_json.items():
@ -246,7 +256,7 @@ if __name__ == '__main__':
df.loc[:, 'age'] = year - df['3_'] df.loc[:, 'age'] = year - df['3_']
df.loc[df['age'] > 100, 'age'] = None df.loc[df['age'] > 100, 'age'] = None
diffs = ['spis-2022'] diffs = ['spis-2022', 'spis-2023']
print(df) print(df)
@ -257,6 +267,8 @@ if __name__ == '__main__':
'agab_f': analyse('agab_f', df[df['1_'] == 'żeńską'], args.echo, diffs), 'agab_f': analyse('agab_f', df[df['1_'] == 'żeńską'], args.echo, diffs),
'agab_m': analyse('agab_m', df[df['1_'] == 'męską'], args.echo, diffs), 'agab_m': analyse('agab_m', df[df['1_'] == 'męską'], args.echo, diffs),
# 'agab_x': analyse('agab_x', df[df['1_'] == 'inną (w jurysdykcjach, gdzie to możliwe)'], args.echo, diffs), # 'agab_x': analyse('agab_x', df[df['1_'] == 'inną (w jurysdykcjach, gdzie to możliwe)'], args.echo, diffs),
'younger': analyse('younger', df[df['age'] < 25], args.echo, diffs),
'older': analyse('older', df[df['age'] >= 25], args.echo, diffs),
} }
comparisons = { comparisons = {
@ -270,6 +282,11 @@ if __name__ == '__main__':
'agab_f': 'AFAB', 'agab_f': 'AFAB',
'agab_m': 'AMAB', 'agab_m': 'AMAB',
}, },
'by_age': {
'general': 'Ogół',
'younger': 'Młodsze',
'older': 'Starsze',
},
} }
graphs = { graphs = {
@ -290,6 +307,13 @@ if __name__ == '__main__':
'english': 'Zaimki w języku angielskim', 'english': 'Zaimki w języku angielskim',
'labelsGender': 'Etykietki opisujące płeć', 'labelsGender': 'Etykietki opisujące płeć',
'labelsSexuality': 'Etykietki opisujące orientację seksualną', 'labelsSexuality': 'Etykietki opisujące orientację seksualną',
'labelsRomantic': 'Etykietki opisujące orientację romantyczną',
'transitionSocial': 'Tranzycja społeczna',
'transitionName': 'Tranzycja prawna imię',
'transitionMarker': 'Tranzycja prawna znacznik płci',
'transitionPhysical': 'Tranzycja medyczna bindery, packery, itp.',
'transitionHormonal': 'Tranzycja medyczna hormony',
'transitionMedical': 'Tranzycja medyczna zmiany chirurgiczne',
} }
for group, group_stats in stats.items(): for group, group_stats in stats.items():