
fraction _ function(a,b){
  a0_table(a,b)
  a0[1,2]/sum(a0[,2])
}

fixFraction _ function (mod){
   ind _ rep(F, length(x$name));ind[grep(mod,x$mods)]_T;
   nn_names(sort(-table(x$name[ind])));
   o_match(nn, names(table(x$name[ind]))); o_o[!is.na(o)];
   tmp_cbind(cumsum(table(x$name[ind])[o])/length(x$name[ind]),
   cumsum(tapply(x$ndelta[ind], x$name[ind], sum)[o])/sum(x$ndelta[ind]));
   ncoreall_order(abs((tmp[,1]+tmp[,2])/2*100-83))[1];
   core _ nn[1:ncoreall]

   ind1 _ ind & x$isPr > 0;
   o_match(core, names(table(x$name[ind1])));o_o[!is.na(o)];
   c(sum(table(x$name[ind1])[o])/length(x$name[ind1]),sum(tapply(x$ndelta[ind1], x$name[ind1], sum)[o])/sum(x$ndelta[ind1]), ncoreall, length(nn), length(o), length(table(x$name[ind1])));
}

listCore _function(mod){
ind _ rep(F, length(x$name));ind[grep(mod,x$mods)]_T;
nn_names(sort(-table(x$name[ind])));
o_match(nn, names(table(x$name[ind]))); o_o[!is.na(o)];
tmp_cbind(cumsum(table(x$name[ind])[o])/length(x$name[ind]),
cumsum(tapply(x$ndelta[ind], x$name[ind], sum)[o])/sum(x$ndelta[ind]));
ncoreall_order(abs((tmp[,1]+tmp[,2])/2*100-83))[1];
ind _ ind & (x$time/3600/24/365.25+1970)>2000;
nnall_nn;
nn_names(sort(-table(x$name[ind])));
o_match(nn, names(table(x$name[ind]))); o_o[!is.na(o)];
tmp_cbind(cumsum(table(x$name[ind])[o])/length(x$name[ind]),
cumsum(tapply(x$ndelta[ind], x$name[ind], sum)[o])/sum(x$ndelta[ind]));
ncore_order(abs((tmp[,1]+tmp[,2])/2*100-83))[1];
list(core=nnall[1:ncoreall],corenew=nn[1:ncore]);
}

report_function(ind,title, tmp){
ncore_order(abs((tmp[,1]+tmp[,2])/2*100-83))[1]
ntotal_dim(tmp)[1];
mrs _ names(table(x$mr[ind]));
nmr_length(mrs);
ndelta_sum(x$ndelta[match(x$mr,as.character(mrs),nomatch=0)])
nadd_sum(x$nadd[match(x$mr,as.character(mrs),nomatch=0)])

#this is less precise but works
str_dimnames(tmp)[[1]][1];
dind_rep(F,sum(ind));dind[grep(dimnames(tmp)[[1]][1], x$name[ind])] _ T;
interval_range(x$time[ind][dind])/3600/24/365.25;
avint _ interval[2]-interval[1];
for (dev in dimnames(tmp)[[1]][2:ncore]){  
  str _ paste(str,dev,sep="|");
  dind_rep(F,sum(ind));dind[grep(dev, x$name[ind])] _ T;
  interval_range(x$time[ind][dind])/3600/24/365.25;
 if (is.na(interval[2]-interval[1])){print (dev);}
  avint _ avint + interval[2]-interval[1];
}
dind_rep(F,sum(ind));dind[grep(str, x$name[ind])] _ T;
dmrs_names(table(x$mr[ind][dind]));
pmr_length(dmrs)/avint;
pdelta_sum(x$ndelta[ind][dind][match(x$mr[ind][dind],as.character(dmrs),nomatch=0)])/(avint);
padd_sum(x$nadd[ind][dind][match(x$mr[ind][dind],as.character(dmrs),nomatch=0)])/(avint);

#this is more precise but uses up all the memory
#pmr_0;pdelta_0;padd_0;
#for (dev in dimnames(tmp)[[1]][1:ncore]){
#  dind_rep(F,sum(ind));dind[grep(dev, x$name[ind])] _ T;
#  interval_range(x$time[ind][dind])/3600/24/365.25;
##  dmrs_names(table(x$mr[ind][dind]));
#  pmr_pmr+length(dmrs)/(interval[2]-interval[1]);
#  pdelta_pdelta+sum(x$ndelta[ind][dind][match(x$mr[ind][dind],as.character(dmrs),nomatch=0)])/(interval[2]-interval[1]);
#  padd_padd+sum(x$nadd[ind][dind][match(x$mr[ind][dind],as.character(dmrs),nomatch=0)])/(interval[2]-interval[1]);
#}
#pmr_pmr/ncore;pdelta_pdelta/ncore;padd_padd/ncore;


interval_range(x$time[ind])/3600/24/365.25;
print(title)
print(c(round(c(nmr,ndelta,nadd),0),round(interval[2]-interval[1],3),ncore,ntotal,round(c(pmr,pdelta,padd)/1000,2)));
""
}


plotTrend _function (nlog, nmr,ndelta,nadd,leg,i){
matplot (1970+as.numeric(names(ndelta))[-length(ndelta)],
cbind(nlog,nmr,ndelta,nadd)[-length(ndelta),],type="l",xlab="years",col=c(1,2,4,6),ylim=c(0,.6),ylab="fraction",lwd=3,main=i);
legend(1998.2,.6,col=c(1,2,4,6),lty=1:4,legend=leg,lwd=3,cex=1.8);
}

plotCode _ function (ind, title){
o_match(nn, names(table(x$name[ind]))); o_o[!is.na(o)];
tmp_cbind(cumsum(table(x$name[ind])[o])/length(x$name[ind]),
cumsum(tapply(x$ndelta[ind], x$name[ind], sum)[o])/sum(x$ndelta[ind]),
cumsum(tapply(x$nadd[ind], x$name[ind], sum)[o])/sum(x$nadd[ind]),
cumsum(tapply(x$ndel[ind], x$name[ind], sum)[o])/sum(x$ndel[ind]))
#print(cumsum(table(x$name[ind])[o])[c(10,15,20)]/sum(ind)*100);
matplot(1:dim(tmp)[1],tmp,type="l", lty=1:4, lwd=3,col=1:4,log="x", xlim=c(1, o0), ylim=c(0,1), xlab="Number of individuals", ylab="", cex=1,main=title);
legend(20, .2, lty=1:4, col=1:4, lwd=3,c("Fraction of MRs","Fraction of Delta","Fraction of Lines Added", "Fraction of Lines Deleted"),
cex=1);
tmp;
}

plotCode1 _ function (mods){
 
 for (i in 1:length(mods)){
  ind _ rep(F, length(x$name));
  ind[grep(mods[i],x$mods)]_T; 
  #ind[x$isPr>0]_F; 
  nn_ names(sort(-tapply(x$ndelta[ind], x$name[ind], sum)));
  o_match(nn, names(table(x$name[ind]))); o_o[!is.na(o)];
  tmp_cumsum(tapply(x$ndelta[ind], x$name[ind], sum)[o])/sum(x$ndelta[ind]);
  if (i==1){
    plot(1:length(tmp),tmp,type="l", lty=i, lwd=3,col=i,
      log="x", xlim=c(1, 174), ylim=c(0,1), 
        xlab="Number of individuals", ylab="", cex=1.8,main="");
  }else{
    lines (1:length(tmp),tmp, lty=i, lwd=3,col=i);
  }
 }   
 #lines (c(15,15),c(0,1));
 legend(20, .6, lty=1:length(mods), col=1:length(mods), lwd=3,mods, cex=1.8);
}

plotBugs _function(ind, title){
#broken by fix
tmp_tapply((x$c-x$o)[ind], !is.na(x$datef[ind]), quantile, probs=0:100/100);
matplot(cbind(tmp[[1]],tmp[[2]])/3600/24+1,0:100/100, 
log="x",type="l",xlab="Days open +1", ylab="Cumulative probability",
xlim=c(1,550),ylim=c(0,1),
lty=1:2,col=1:2,lwd=3,main=paste("PRs by fix", title));
lines(c(1,550),c(.5,.5));
legend(1,1,names(tmp), lty=1:2,col=1:2,lwd=3,cex=.5);

#broken by priority
tmp_tapply((x$c-x$o)[ind], x$pri[ind], quantile, probs=0:100/100);
matplot(cbind(tmp[[1]],tmp[[2]],tmp[[3]],tmp[[4]],tmp[[5]])/3600/24+1,0:100/100, 
log="x",type="l",xlab="Days open +1", ylab="Cumulative probability",
xlim=c(1,550),ylim=c(0,1),
lty=1:5,col=1:5,lwd=3,main=paste("PRs by priority", title));
lines(c(1,550),c(.5,.5));
legend(1,1,names(tmp), lty=1:5,col=1:5,lwd=3,cex=.5);

#broken by module
ind1_match(x$mod1,
names(table(x$mod1))[table(x$mod1)>350], nomatch=F)
ind1[ind1>0]_1;
tmp_tapply((x$c-x$o)[ind1>0&ind], x$mod1[ind1>0&ind], quantile, probs=0:100/100)
res_tmp[[1]];for (i in 2:length(names(tmp)))res_cbind(res, tmp[[i]]);

matplot(res/3600/24+1,0:100/100, 
log="x",type="l",xlab="Days open +1", ylab="Cumulative probability",
lty=1:length(names(tmp)),col=1:length(names(tmp)),lwd=3,
xlim=c(1,550),ylim=c(0,1),
main=paste("PRs by module", title));
lines(c(1,550),c(.5,.5));
legend(1,1,names(tmp), lty=1:length(names(tmp)),lwd=3,
col=1:length(names(tmp)),cex=.5);

#broken by isChange
tmp_tapply((x$c-x$o)[ind], x$isCh[ind], quantile, probs=0:100/100)
print (tmp[[1]][c(1,25,50,75, 100)]/3600/24)
print (tmp[[2]][c(1,25,50,75, 100)]/3600/24)
matplot(cbind(tmp[[1]],tmp[[2]])/3600/24+1,0:100/100, 
xlim=c(1,550),ylim=c(0,1),
log="x",type="l",xlab="Days open +1", ylab="Cumulative probability",
lty=1:2,col=1:2,lwd=3,main=paste("PRs by change", title));
lines(c(1,550),c(.5,.5));
legend(1,1,names(tmp),lty=1:2,col=1:2,lwd=3,cex=.6)
}

plotBugs1 _function(x, ind, prepost, title, mods){
#broken by fix
fff_rep("FIXED", sum (ind));
fff[is.na(x$datef[ind])]_ "OTHER";
tmp_tapply((x$c-x$o)[ind], fff, quantile, probs=0:100/100);
matplot(cbind(tmp[[1]],tmp[[2]])/3600/24+1,0:100/100, 
log="x",type="l",xlab="Days open + 1", ylab="Cumulative probability",
xlim=c(1,550),ylim=c(0,1),
lty=1:2,col=1:2,lwd=3,main=paste("PRs by fix", title));
lines(c(1,550),c(.5,.5));
legend(1,1,names(tmp), lty=1:2,col=1:2,lwd=3,cex=1.5);

#broken by priority
tmp_tapply((x$c-x$o)[ind], x$pri[ind], quantile, probs=0:100/100);
matplot(cbind(tmp[[1]],tmp[[2]],tmp[[3]],tmp[[4]],tmp[[5]])/3600/24+1,0:100/100, 
log="x",type="l",xlab="Days open + 1", ylab="Cumulative probability",
xlim=c(1,550),ylim=c(0,1),
lty=1:5,col=1:5,lwd=3,main=paste("PRs by priority", title));
lines(c(1,550),c(.5,.5));
legend(1,1,names(tmp), lty=1:5,col=1:5,lwd=3,cex=1.5);

#broken by module
ind1_match(x$mod1, mods, nomatch=F)
ind1[ind1>0]_1;
tmp_tapply((x$c-x$o)[ind1>0&ind], x$mod1[ind1>0&ind], quantile, probs=0:100/100)
res_tmp[[1]];for (i in 2:length(names(tmp)))res_cbind(res, tmp[[i]]);
matplot(res/3600/24+1,0:100/100, 
log="x",type="l",xlab="Days open + 1", ylab="Cumulative probability",
lty=1:length(names(tmp)),col=1:length(names(tmp)),lwd=3,
xlim=c(1,550),ylim=c(0,1),
main=paste("PRs by module", title));
lines(c(1,550),c(.5,.5));
legend(1,1,names(tmp), lty=1:length(names(tmp)),lwd=3,
col=1:length(names(tmp)),cex=1.4);

#broken by prepost
tmp_tapply((x$c-x$o)[ind], prepos[ind], quantile, probs=0:100/100)
#print (tmp[[1]][c(1,25,50,75, 100)]/3600/24)
#print (tmp[[2]][c(1,25,50,75, 100)]/3600/24)
matplot(cbind(tmp[[1]],tmp[[2]])/3600/24+1,0:100/100, 
xlim=c(1,550),ylim=c(0,1),
log="x",type="l",xlab="Days open + 1", ylab="Cumulative probability",
lty=1:2,col=1:2,lwd=3,main=paste("PRs by date", title));
lines(c(1,550),c(.5,.5));
legend(1,1,names(tmp),lty=1:2,col=1:2,lwd=3,cex=1.5)
}




















