1: Task: Statistics on the distribution of mental health data of men and women in various countries
(1) First, let's observe this data table
(2) The code is as follows:
(3) The statistical data table is as follows:
Two: specific code:
# -*- coding: utf-8 -*-
import csv
# Data set path
data_path = 'G:\BaiduDownload\python\advanced python data\first lecture codes\codes\lect01_proj\survey.csv'
def run_main() :
male_set = {'male', 'm'} # "male" possible values
female_set = {'female', 'f'} # "female" possible values
result_dict = {}
with open(data_path, 'r ', newline='') as csvfile:
rows = csv.reader(csvfile)
for i, row in enumerate(rows): #enumerate returns the number of loops i and the value inside row
if i == 0:
# skip the first Row header data
continue
if i % 50 == 0:
print('Processing row {} data...'.format(i))
# gender data
gender_val = row[2] # gender data is the second column
country_val = row[3] #Country data is the third column
# Remove possible spaces
gender_val = gender_val.replace(' ', ' ')
# Convert to lowercase
gender_val = gender_val.lower()
# Determine whether "country" already exists
if country_val not in result_dict:
# If it does not exist, initialize the data
result_dict[country_val] = [0, 0]
# Determine gender
if gender_val in female_set:
# Female
result_dict[country_val][0] += 1
elif gender_val in male_set:
# male
result_dict[country_val][1] += 1
else:
# Noise data, do not process
pass
# Write the result to a file
with open('G:\BaiduDownload\python\advanced python data\first lecture codes\codes\lect01_proj\gender_country.csv', 'w', newline='', encoding='utf -16') as csvfile:
csvwriter = csv.writer(csvfile, delimiter=',')
# write header
csvwriter.writerow(['country', 'male', 'female'])
# write statistical results
for k, v in list(result_dict.items()):
csvwriter.writerow([k, v[0], v[1]])
if __name__ == '__main__':
run_main()