-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathgetMSRandomSubset.py
executable file
·64 lines (56 loc) · 2.19 KB
/
getMSRandomSubset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#!/usr/bin/env python
import sys, gzip, random
msFileName, numReps, header, headerNumReps = sys.argv[1:]
if headerNumReps != "actual":
if headerNumReps < 0:
sys.exit("headerNumReps must be < 0 or 'na' or 'actual'. AAAARRRGGGHHH!!!\n")
elif headerNumReps == "na":
assert header == "no_header"
else:
headerNumReps = int(headerNumReps)
numReps = int(numReps)
if not header in ["header", "no_header"]:
sys.exit("header must be set to either 'header' or 'no_header'. AAAARRRRGGGGHHHHHHHHHH!!!!\n")
def readAllMSRepsFromFile(msFileName):
msStream = open(msFileName)
header = msStream.readline().strip().split()
program,numSamples,numSims = header[:3]
if len(header) > 3:
otherParams = " " + " ".join(header[3:])
else:
otherParams = ""
numSamples, numSims = int(numSamples),int(numSims)
#advance to first simulation
line = msStream.readline()
while line.strip() != "//":
line = msStream.readline()
repLs = []
while line:
if line.strip() != "//":
sys.exit("Malformed ms-style output file: read '%s' instead of '//'. AAAARRRRGGHHH!!!!!\n" %(line.strip()))
repStr = ["\n//"]
repStr.append(msStream.readline().strip()) #segsites line
positionsLine = msStream.readline().strip()
if not positionsLine.startswith("positions:"):
sys.exit("Malformed ms-style output file. AAAARRRRGGHHH!!!!!\n")
repStr.append(positionsLine) #positions line
for i in range(numSamples):
currLine = msStream.readline()
repStr.append(currLine.strip())
line = msStream.readline()
#advance to the next non-empty line or EOF
while line and line.strip() == "":
line = msStream.readline()
repStr = "\n".join(repStr)
repLs.append(repStr)
msStream.close()
return numSamples, repLs
numSamples, repLs = readAllMSRepsFromFile(msFileName)
if header == "header":
if headerNumReps == "actual":
print "./msStyle %s %s\nblah" %(numSamples, numReps)
else:
print "./msStyle %s %s\nblah" %(numSamples, headerNumReps)
if numReps > 0:
random.shuffle(repLs)
print "\n".join(repLs[:numReps])