Crée par Vincent dans le projet yapp_forked, elle consiste en la correction de petits bugs, et l’amélioration générale du code. Focus sur un des point:
Probleme si pednode.indiv est un str.
Solution: créer des indexes.
Parametres idx -> on parcour le pedigree par individu.
version de add_indiv de Pedigree
version de add_indiv de PedigreePeel
Lors de la création d’un pédiger chaque individu est ajouter a l’aide de la fonction add_indiv
@classmethod
def from_fam_file(cls, path, prt_from_FID=False, default_prt=MALE:
[...]
if rel[1] is not None:
ped.set_father(rel[0], rel[1])
if rel[2] is not None:
ped.set_mother(rel[0], rel[2])
if rel[1] is None and rel[2] is None:
ped.add_indiv(rel[0])
if rel[1] is not None and rel[2] is not None:
ped.set_fam_peel(rel[1], rel[2], rel[0])
_ = ped.nodes[rel[0]]
return ped☑️ Créer un indexe relatif a chaque individus.
Il existe plusieurs type de fichier de génotypages. Deux fonction permette de lire différent type de fichier:
def LoadGenotypes(file: str, ped: PedigreePeel, sep: str = "\t", unknowGenotype=3) -> np.ndarray:
with open(file, newline="") as csvfile:
reader = csv.reader(csvfile, delimiter=sep)
# Read the first row to initialise np array
firstrow = next(reader)
# Create np.array (nInd, nSNP) full of "unknowGenotype" int: 3 by default
genotypes_array = np.full((len(ped.nodes), (len(firstrow) - 1)), unknowGenotype)
try:
genotypes_array[ped.nodes[firstrow[0]].idx, ...] = firstrow[1:]
except KeyError:
print(row[0], "is missing in the pedigree: individual ignored")
for row in reader:
try:
genotypes_array[ped.nodes[row[0]].idx, ...] = row[1:]
except KeyError:
print(row[0], "is missing in the pedigree: individual ignored")
return genotypes_arraydef ReadVCF(vcfile: str, ped, mode: str = "likelihood", baseValue: float = 0.0) -> np.ndarray:
samples = list(ped.nodes.keys()) # Try to remove list() when it's done
vcfdat = vcf.vcf2fph(vcfile, samples=samples, mode=mode)
assert mode in ["likelihood","genotype", ], "mode can only be likelihood or genotype."
# vcfdat is organized per chromosome, so we need to merge them
# Pre allocation table
nInd, nLoci = len(samples), sum(
[len(vcfdat["variants"][key]) for key in vcfdat["variants"].keys()]
)
if mode == "likelihood":
genotypes = np.full((nLoci, 3, nInd), baseValue, dtype=np.float32)
else:
genotypes = np.full((nLoci, nInd), baseValue, dtype=np.int8)
for chr in vcfdat["regions"]:
for node in ped:
try:
genotypes[..., node.idx] = vcfdat["data"][chr][node.indiv]
except KeyError:
logger.warning(f"Individual {node.indiv} missing in vcf.")
return genotypes
Pied de page