#######################
### By Joyful Bioinfo
#######################
import glob, os, sys
def Make_dic_of_GeneSymbo_info(Geneinfo_file):
infile = open(Geneinfo_file,"r")
sFirstLine = infile.readline()
Dic_Geneid_GeneSymbol = {}
Dic_Geneid_GeneInfo = {}
for sLine in infile:
#print(sLine)
sList = sLine.strip().split("\t")
sGeneId = sList[0]
#print(sGeneId)
if len(sList) == 2 :
sGeneSymbol = "."
sGeneInfo = "."
elif len(sList) == 3:
sGeneSymbol = sList[2]
sGeneInfo = "."
elif len(sList) == 4:
sGeneSymbol = sList[2]
sGeneInfo = sList[3]
Dic_Geneid_GeneSymbol[sGeneId] = sGeneSymbol
Dic_Geneid_GeneInfo[sGeneId] = sGeneInfo
print("Dic Done")
return Dic_Geneid_GeneSymbol, Dic_Geneid_GeneInfo
def Open_cvs_file_and_write(Dic_Geneid_GeneSymbol, Dic_Geneid_GeneInfo,CountFile,OutfileName):
outfile = open(OutfileName,"w")
infile = open(CountFile,"r")
FirstLine = infile.readline()
FirstLine_list = FirstLine.split("\t")
outfile.write("Ensembl_Gene_ID\tGene_Symbol\tGene_Info\t"+"\t".join(FirstLine_list[1:]))
for sLine in infile:
sList = sLine.strip().split("\t")
sGeneId = sList[0].replace('"','')
if sGeneId in Dic_Geneid_GeneSymbol:
sGeneSymbol = Dic_Geneid_GeneSymbol[sGeneId]
else:
sGeneSymbol = "NA"
if sGeneId in Dic_Geneid_GeneInfo:
sGeneInfo = Dic_Geneid_GeneInfo[sGeneId]
else:
sGeneInfo = "NA"
outfile.write(sGeneId+"\t"+sGeneSymbol+"\t"+sGeneInfo+"\t")
outfile.write("\t".join(sList[1:])+"\n")
infile.close()
outfile.close()
if __name__ == "__main__" :
CountFile = sys.argv[1]
EnsemblFile = sys.argv[2]
OutfileName = sys.argv[3]
Dic_Geneid_GeneSymbol, Dic_Geneid_GeneInfo = Make_dic_of_GeneSymbo_info(EnsemblFile)
Open_cvs_file_and_write(Dic_Geneid_GeneSymbol, Dic_Geneid_GeneInfo,CountFile,OutfileName)