#!/usr/bin/python
import os, sys, string

#need to change read length of the libraries
READ_LENGTH = 150

fp = open(sys.argv[1],"r")
info = fp.readlines()
fp.close()
seginfo = []
for line in info:
	words = line.split()
	seginfo.append([float(words[1]),float(words[2])])
length = 0
totalcov = 0
for seg in seginfo:
	length += seg[0] + READ_LENGTH
	totalcov += seg[1] * READ_LENGTH
cov = totalcov/length

rp = open("repeats.fa","w")
repeats = []
for line in info:
	words = line.split()
	if float(words[2])*READ_LENGTH/(float(words[1]) + READ_LENGTH) > cov * 4:
		if repeats:
			if repeats[-1][0].split(":")[0] == words[0].split(":")[0] and repeats[-1][0].split("-")[1] == words[0].split(":")[1].split("-")[0]:
				repeats[-1][0] = repeats[-1][0].split("-")[0] + "-" + words[0].split("-")[1]
				repeats[-1][2] = (repeats[-1][1] * repeats[-1][2] + int(words[1]) * float(words[2]))/(repeats[-1][1] + int(words[1]))
				repeats[-1][1] = repeats[-1][1] + int(words[1])
			else:
				repeats.append([words[0],int(words[1]),float(words[2])])
		else:
			repeats.append([words[0],int(words[1]),float(words[2])])

repeatinfo = {}
scafs = []
for repeat in repeats:
	scafinfo = repeat[0].split(":")
	scaf = scafinfo[0]
	start = int(scafinfo[1].split("-")[0]) - 1
	end = int(scafinfo[1].split("-")[1])
	if not scaf in scafs:
		scafs.append(scaf)
		repeatinfo[scaf] = [[start,end]]
	else:
		repeatinfo[scaf].append([start,end])

for scaf in scafs:
	fp1 = open("all_scafs/" + scaf + ".fa", "r")
	info1 = fp1.readlines()
	fp1.close()
	seq = ""
	for line1 in info1[1:]:
		seq += line1[:-1]
	for repeat in repeatinfo[scaf]:
		if repeat[1] > len(seq):
			repeat[1] = len(seq)
		rp.write(">" + scaf + ":" + str(repeat[0]) + "-" + str(repeat[1]) + "\n")
		rp.write(seq[repeat[0]:repeat[1]] + "\n")
rp.close()
