
import csv

from utils import sqlConnect

connect = sqlConnect.MySQLConnection()


def getSpeciesData(ncbi_taxon_id):
    large = ['Rectum', 'Colon', 'Cecum']
    small = ['Ileum', 'Duodenum']
    large_disease_relative_abundance = 0
    large_disease_prevalence = 0
    large_healthy_relative_abundance = 0
    large_healthy_prevalence = 0
    small_disease_relative_abundance = 0
    small_disease_prevalence = 0
    small_healthy_relative_abundance = 0
    small_healthy_prevalence = 0
    result = []
    # 获取菌种的健康、疾病样本总量
    sql_healthy_total = 'SELECT SUM(loaded_uid_num_QC1) healthy_total FROM mbodymap_species_to_samples WHERE ncbi_taxon_id={} AND disease="D006262"'.format(
        ncbi_taxon_id)
    healthy_total = connect.queryOne(sql_healthy_total)['healthy_total']
    ## healthy为0时情况处理
    if healthy_total is None:
        healthy_total = 0
    else:
        healthy_total = int(healthy_total)
    sql_disease_total = 'SELECT SUM(loaded_uid_num_QC1) disease_total FROM mbodymap_species_to_samples WHERE ncbi_taxon_id={} AND disease!="D006262"'.format(
        ncbi_taxon_id)
    disease_total = connect.queryOne(sql_disease_total)['disease_total']
    ## disease为0时情况处理
    if disease_total is None:
        disease_total = 0
    else:
        disease_total = int(disease_total)
    # 获取菌种的所有部位
    sql_bodysites = 'SELECT DISTINCT BodySite FROM mbodymap_species_to_site WHERE ncbi_taxon_id={}'.format(
        ncbi_taxon_id)
    bodysites = connect.query(sql_bodysites)
    # 获取每个部位的相对丰度和流行度
    for bodysite in bodysites:
        # 相对丰度： relative_abundance_sum之和/loaded_uid_num之和
        # 疾病
        sql_disease_relative_abundance = 'SELECT SUM(loaded_uid_num_QC1) disease_sample_total, SUM(relative_abundance_sum) disease_sum_total FROM mbodymap_species_to_samples WHERE ncbi_taxon_id={} AND BodySite="{}" AND disease!="D006262"'.format(
            ncbi_taxon_id, bodysite['BodySite'])
        disease_relative_abundance_data = connect.queryOne(sql_disease_relative_abundance)
        # print(disease_relative_abundance_data, ncbi_taxon_id, bodysite['BodySite'])

        # 健康
        sql_healthy_relative_abundance = 'SELECT SUM(loaded_uid_num_QC1) healthy_sample_total, SUM(relative_abundance_sum) healthy_sum_total FROM mbodymap_species_to_samples WHERE ncbi_taxon_id={} AND BodySite="{}" AND disease="D006262"'.format(
            ncbi_taxon_id, bodysite['BodySite'])
        healthy_relative_abundance_data = connect.queryOne(sql_healthy_relative_abundance)
        # print(healthy_relative_abundance_data, ncbi_taxon_id, bodysite['BodySite'])

        # 样本量为0时情况处理
        if disease_relative_abundance_data['disease_sample_total'] is None or int(
                disease_relative_abundance_data['disease_sample_total']) == 0:
            disease_relative_abundance = disease_prevalence = 0
        else:
            # 疾病相对丰度
            disease_relative_abundance = float(disease_relative_abundance_data['disease_sum_total']) / int(
                disease_relative_abundance_data['disease_sample_total'])
            ## 疾病流行度
            disease_prevalence = int(disease_relative_abundance_data['disease_sample_total']) / disease_total

        if healthy_relative_abundance_data['healthy_sample_total'] is None or int(
                healthy_relative_abundance_data['healthy_sample_total']) == 0:
            healthy_relative_abundance = healthy_prevalence = 0
        else:
            # 健康相对丰度
            healthy_relative_abundance = float(healthy_relative_abundance_data['healthy_sum_total']) / int(
                healthy_relative_abundance_data['healthy_sample_total'])
            ## 健康流行度
            healthy_prevalence = int(healthy_relative_abundance_data['healthy_sample_total']) / healthy_total

        # 大小肠处理
        if bodysite['BodySite'] in large:
            large_disease_relative_abundance += disease_relative_abundance
            large_disease_prevalence += disease_prevalence
            large_healthy_relative_abundance += healthy_relative_abundance
            large_healthy_prevalence += healthy_prevalence
            result.append({
                "ncbi_taxon_id": ncbi_taxon_id,
                "BodySite": bodysite['BodySite'],
                "disease_relative_abundance": disease_relative_abundance,
                "disease_prevalence": disease_prevalence,
                "healthy_relative_abundance": healthy_relative_abundance,
                "healthy_prevalence": healthy_prevalence
            })
        elif bodysite['BodySite'] in small:
            small_disease_relative_abundance += disease_relative_abundance
            small_disease_prevalence += disease_prevalence
            small_healthy_relative_abundance += healthy_relative_abundance
            small_healthy_prevalence += healthy_prevalence
            result.append({
                "ncbi_taxon_id": ncbi_taxon_id,
                "BodySite": bodysite['BodySite'],
                "disease_relative_abundance": disease_relative_abundance,
                "disease_prevalence": disease_prevalence,
                "healthy_relative_abundance": healthy_relative_abundance,
                "healthy_prevalence": healthy_prevalence
            })
        else:
            result.append({
                "ncbi_taxon_id": ncbi_taxon_id,
                "BodySite": bodysite['BodySite'],
                "disease_relative_abundance": disease_relative_abundance,
                "disease_prevalence": disease_prevalence,
                "healthy_relative_abundance": healthy_relative_abundance,
                "healthy_prevalence": healthy_prevalence
            })

    result.append({
        "ncbi_taxon_id": ncbi_taxon_id,
        "BodySite": 'The large intestine',
        "disease_relative_abundance": large_disease_relative_abundance,
        "disease_prevalence": large_disease_prevalence,
        "healthy_relative_abundance": large_healthy_relative_abundance,
        "healthy_prevalence": large_healthy_prevalence
    })

    result.append({
        "ncbi_taxon_id": ncbi_taxon_id,
        "BodySite": 'The small intestine',
        "disease_relative_abundance": small_disease_relative_abundance,
        "disease_prevalence": small_disease_prevalence,
        "healthy_relative_abundance": small_healthy_relative_abundance,
        "healthy_prevalence": small_healthy_prevalence
    })

    return result


def getAllSpecies():
    sql_allSpecies = 'SELECT DISTINCT ncbi_taxon_id FROM mbodymap_species_to_samples'
    allSpecies = connect.query(sql_allSpecies)
    result = []
    for species in allSpecies:
        speciesResult = getSpeciesData(species['ncbi_taxon_id'])
        for item in speciesResult:
            result.append(item)
    writeInCsv(
        ['ncbi_taxon_id', 'BodySite', 'disease_relative_abundance', 'disease_prevalence', 'healthy_relative_abundance',
         'healthy_prevalence'], result, 'species.csv')
    print('已完成')


def writeInCsv(keys, lists, file):
    headers = keys
    values = lists
    with open(file, "w", encoding="utf-8", newline="") as fp:
        writer = csv.DictWriter(fp, headers)
        writer.writeheader()
        writer.writerows(values)


getAllSpecies()
