我们得到了一个 file.tsv,我们需要构建一个函数。其中之一是如果一列(此处称为“low_confidence_variant”)= True,则删除每一行。我在某种程度上为这部分而奋斗。另外,有什么优化建议吗?根据结果,我们需要制作一个迈阿密图。这是我到目前为止所做的。任何提示都会有用;
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
def read_file(file, chromosome):
df = pd.read_csv(file, sep='\t', usecols=['chromosome', 'position', 'pval', 'low_confidence_variant'])
df.drop(['low_confidence_variant'], True)
df.dropna()
sub_data = df.replace({'pval': 0}, 1e-274)
sub_data['log10'] = -np.log10(sub_data['pval'])
chr_group = sub_data.groupby(['chromosome'])
chromosome = chr_group.get_group(chromosome)
return chromosome
df1 = read_file('vitamin_d.females.tsv.gz', 1)
df2 = read_file('vitamin_d.males.tsv.gz', 1)
xa = df2['position']
ya = df2['log10']
xb = df1['position']
yb = df1['log10'] * -1
fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, sharex=True, figsize=(12, 4))
ax1.scatter(xa, ya, s=1, c="tab:blue")
ax1.set_ylabel('males $\it{-log_{10}(pval)}$')
ax1.set_title('vitamin D (nmol/L)', fontweight='bold')
ax1.axhline(-np.log10(5*10**-8), c ='darkgray', ls='--')
ax2.scatter(xb, yb, s=1, c="tab:blue")
ax2.set_ylabel('females $\it{log_{10}(pval)}$')
ax2.axhline(np.log10(5*10**-8), c ='darkgray', ls='--')
plt.xlabel('Chromosome 1 positions')
plt.subplots_adjust(hspace=.0)
plt.show()
fig.savefig(fname='miami.png', dpi=300, bbox_inches='tight', format='png')
largeQ
相关分类