Simple case of python data analysis -- data statistics (4)

1: Task: Statistics on the distribution of mental health data of men and women in various countries

(1) First, let's observe this data table


(2) The code is as follows:


(3) The statistical data table is as follows:


Two: specific code:

# -*- coding: utf-8 -*-
import csv
# Data set path
data_path = 'G:\BaiduDownload\python\advanced python data\first lecture codes\codes\lect01_proj\survey.csv'
def run_main() :
    male_set = {'male', 'm'} # "male" possible values
    ​​female_set = {'female', 'f'} # "female" possible values
    ​​result_dict = {}
    with open(data_path, 'r ', newline='') as csvfile:
        rows = csv.reader(csvfile)
        for i, row in enumerate(rows): #enumerate returns the number of loops i and the value inside row
            if i == 0:
                # skip the first Row header data
                continue
            if i % 50 == 0:
                print('Processing row {} data...'.format(i))
            # gender data
            gender_val = row[2] # gender data is the second column
            country_val = row[3] #Country data is the third column
            # Remove possible spaces
            gender_val = gender_val.replace(' ', ' ')
            # Convert to lowercase
            gender_val = gender_val.lower()
            # Determine whether "country" already exists
            if country_val not in result_dict:
                # If it does not exist, initialize the data
                result_dict[country_val] = [0, 0]
            # Determine gender
            if gender_val in female_set:
                # Female
                result_dict[country_val][0] += 1
            elif gender_val in male_set:
                # male
                result_dict[country_val][1] += 1
            else:
                # Noise data, do not process
                pass
    # Write the result to a file
    with open('G:\BaiduDownload\python\advanced python data\first lecture codes\codes\lect01_proj\gender_country.csv', 'w', newline='', encoding='utf -16') as csvfile:
        csvwriter = csv.writer(csvfile, delimiter=',')
        # write header
        csvwriter.writerow(['country', 'male', 'female'])
        # write statistical results
        for k, v in list(result_dict.items()):
            csvwriter.writerow([k, v[0], v[1]])
if __name__ == '__main__':
    run_main()

Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=324519495&siteId=291194637