#Author: Kristina L. Cockle

# Morphometrics of Eleothreptus anomalus and sexing of juveniles by morphometry
# Multiple logistic regression models with Monte Carlo Cross Validation, in a for loop

#This code reproduces the morphometric analysis for adults and juveniles in:
#Cockle KL, Fariña N, Villalba O, Kopuchian C, Kraemer S, Bodrati A, Pagano LG, Pietrek AG. 
#Females are larger and less detected than males but experience similar annual survival in the lek-displaying Eleothreptus anomalus (Sickle-winged Nightjar)
#Ornithology. Accepted 2 December 2025.

#Last updated 9 Jan 2026 by Kristina Cockle (simplified code for sharing online with datafiles)

#**********************************
######    Load packages and data #####
#**********************************
library(dplyr) # package to perform data manipulation 
library(tidyverse) # package to perform data manipulation and visualization 
library(caTools) # package Used to split the data used during classification into train and test subsets 
library(data.table) #used to randomly sample out the excess males from the training dataset
library(jtools) #plot model parameters
library(broom.mixed) #plot model parameters
library(ROCR)
library(logistf) #Firth's logistic regression
library(brglm2) # a different penalized logistic regression
library(ggstatsplot) #plot model parameters for logistf models (didn't work with model lists though)
library(RColorBrewer) # palette
library(Polychrome) # another palette
library(scales) #choose colors from palette
library(gridExtra) #arrange ggplot figs
library(glmmTMB) # for ICC

capturas<- read.csv("Eleothreptus_measurements.csv")
months_dat<-read.csv("Eleothreptus_months.csv", header = TRUE)


#*******************************************************
######    Get some numbers for the Methods and Results #####
#*******************************************************

length(unique(capturas$codigo_salida)) #total nights out capturing
length(unique(capturas$codigo_salida[capturas$Localidad=="RNRSM"])) #For MEthods: total nights out in RNRSM
table(capturas$Localidad[capturas$Localidad!="RNRSM"], 
      capturas$codigo_salida[capturas$Localidad!="RNRSM"] ) #For Methods: number of nights at other sites

cap<-data.frame(ID=capturas$ID_Final, 
                Localidad = capturas$Localidad, codigo_salida=capturas$codigo_salida, Year=capturas$Year, 
                Wt = capturas$Peso, Tail = capturas$Cola, Wing = capturas$Ala, Largo=capturas$Largo_total,
                Sec = capturas$Secundarias, Tarso = capturas$Tarso, Tarso_completo = capturas$Tarso_completo)

cap$Sexo_final<- capturas$Sexo_final
cap$Age <- capturas$Edad_final 
cap$Sexo_mol<-capturas$Sexado_Mol
cap$Sexo_plum<-capturas$Sexo_plum

cap<-arrange(cap, codigo_salida)
#cap<-subset(cap, cap$Localidad =="RNRSM") #we can keep birds from Urutaú, Don Luis y CSJ (total: 3 HA, 1 MA) for morfometrics part.
cap<-droplevels(cap)

head(cap)
summary(cap)

#***********************************************************************
######    A bit of data management     ###################
#***********************************************************************

cap[cap == ""] <- NA
cap[cap == "?"] <- NA
cap$Tarso[cap$Year<2019] <-NA #remove tarsus measurements prior to 2019 because only on 15 oct 2018 we started measuring consistently
#and remove doubtful IDs; they won't be used
cap<-subset(cap, cap$ID!="dudoso")
cap<-subset(cap, cap$ID!="escapo")
table(cap$ID, cap$Sexo_plum)

length(cap$ID) #Total number of captures with known ID
length(unique(cap$ID)) #Number of Individuals captured

# Dataset of individuals sexed by plumage
plum<-subset(cap, !is.na(cap$Sexo_plum))
length(unique(plum$ID)) #total individuals sexed by plumage
plumF<-subset(plum, plum$Sexo_plum == "H")
length(unique(plumF$ID)) #total Females sexed by plumaje
plumM<-subset(plum, plum$Sexo_plum == "M")
length(unique(plumM$ID)) #total Females sexed by plumaje

#aggregate by ID so using each individual only once.
#Take the first capture for each individual so we get them as juveniles

agg.sexofinal<-aggregate(c(cap$Sexo_final), by = list(cap$ID), FUN=last)
colnames(agg.sexofinal)<-c("ID", "Sexo_final")

agg.sexoplum<-aggregate(c(cap$Sexo_plum), by = list(cap$ID), FUN=last)
colnames(agg.sexoplum)<-c("ID", "Sexo_plum")

agg.sexomol<-aggregate(c(cap$Sexo_mol), by = list(cap$ID), FUN=first)
colnames(agg.sexomol)<-c("ID", "Sexo_mol")

agg.age<-aggregate(c(cap$Age), by = list(cap$ID), FUN=first)
colnames(agg.age)<-c("ID", "Age")

agg<-merge(agg.sexofinal, agg.sexomol)
agg<-merge(agg,agg.age)
agg<-merge(agg, agg.sexoplum)

table(agg$Sexo_plum)

table(agg$Sexo_mol, agg$Sexo_final)
table(agg$Sexo_mol, agg$Sexo_plum)

### how many of the unknown sex birds are known juveniles?
unk.sex<-subset(agg, is.na(agg$Sexo_final))
table(unk.sex$Age)
#remove the ones with age = Pichón (these are very small babies) and age = NA
#keep only individuals with Age = J
unk.sex<-subset(unk.sex, unk.sex$Age=="J")

# sexed individuals (sexo final is M or H). How many captures of juvenile M, juvenile H.
capM<-subset(cap, cap$Sexo_final=="M")
table(capM$Age)
capH<-subset(cap, cap$Sexo_final=="H")
table(capH$Age)

#total captures &individuals by sex - RESTRICT TO RNRSM for reporting to co-authors
length(capH$ID[capH$Localidad=="RNRSM"])
length(capM$ID[capM$Localidad =="RNRSM"])
length(unique(capH$ID[capH$Localidad=="RNRSM"]))
length(unique(capM$ID[capM$Localidad=="RNRSM"]))

HJ<-subset(capH, capH$Age=="J")
length(HJ$ID[HJ$Localidad=="RNRSM"]) # number of captures
length(unique(HJ$ID[HJ$Localidad=="RNRSM"])) #number of individuals
HI<-subset(capH, capH$Age=="I")
length(HI$ID[HI$Localidad=="RNRSM"])
length(unique(HI$ID[HI$Localidad=="RNRSM"]))
HA<-subset(capH, capH$Age=="A")
length(HA$ID[HA$Localidad=="RNRSM"])
length(unique(HA$ID[HA$Localidad=="RNRSM"]))

MJ<-subset(capM, capM$Age=="J")
length(MJ$ID[MJ$Localidad=="RNRSM"])
length(unique(MJ$ID[MJ$Localidad=="RNRSM"]))
MI<-subset(capM, capM$Age=="I")
length(MI$ID[MI$Localidad=="RNRSM"])
length(unique(MI$ID[MI$Localidad=="RNRSM"]))
MA<-subset(capM, capM$Age=="A")
length(MA$ID[MA$Localidad=="RNRSM"])
length(unique(MA$ID[MA$Localidad=="RNRSM"]))

#Juveniles sexed by molecular
jjj<-subset(cap, cap$Age=="J")
jjj.mol<-subset(jjj, !is.na(jjj$Sexo_mol))
length(unique(jjj.mol$ID)) #total number of juveniles sexados molecularmente a macho o hembra
jjj.molF<-subset(jjj.mol, jjj.mol$Sexo_mol=="H") # sexados-molcularmente a hembra
length(unique(jjj.molF$ID)) # N juveniles sexados molecularmente a hembra
jjj.molM<-subset(jjj.mol, jjj.mol$Sexo_mol=="M") # sexados-molcularmente a macho
length(unique(jjj.molM$ID))

jjj.mol$coincide_sexo<- ifelse(jjj.mol$Sexo_mol==jjj.mol$Sexo_plum, "1", "0")
table(jjj.mol$Sexo_mol, jjj.mol$coincide_sexo)
jjcoincide<-subset(jjj.mol, jjj.mol$coincide_sexo==1)
length(unique(jjcoincide$ID))

#***********************************************************************
######    Basic morphometry                                          #####
#***********************************************************************

# Make a dataset with the birds for which we have known sex
cap$SexAge<-paste(cap$Sexo_final, cap$Age)
cap$SexAge<-as.factor(cap$SexAge)

#remove birds of unknown sex or age for dispersion diagram
capo<-cap
capo[capo == ""] <- NA
capo[capo == "?"] <- NA
capo<-subset(capo, !is.na(capo$Sexo_final))
capo<-subset(capo, !is.na(capo$Age))
#remove intermediates
capo<-subset(capo, capo$Age != "I")
capo<-droplevels(capo)
summary(capo)
summary(capo$Wt) # make sure it came in as a number, otherwise have to fix in excel
summary(capo$Tail)# make sure it came in as a number
summary(capo$Wing)# make sure it came in as a number
summary(capo$Sec) #make sure it came in as a number
summary(capo$Tarso)#make sure it came in as a number
summary(capo$Tarso_completo)#make sure it came in as a number
summary(capo$Largo)

#drop a newbaby
capo<-subset(capo, capo$Wing>100)

#run histograms to make sure the measurements look good. Check for outliers!
par(mfrow=c(2,2))
hist(capo$Wt[capo$SexAge=="H A"], xlim=c(25,80), las=1, breaks=10)
hist(capo$Wt[capo$SexAge=="M A"], xlim=c(25,80),las=1, breaks=10)
hist(capo$Wt[capo$SexAge=="H J"], xlim=c(25,80),las=1, breaks=10)
hist(capo$Wt[capo$SexAge=="M J"], xlim=c(25,80),las=1, breaks=10)

hist(capo$Tail[capo$SexAge=="H A"],xlim=c(65,90), las=1, breaks=10)
hist(capo$Tail[capo$SexAge=="M A"], xlim=c(65,90), las=1,breaks=10)
hist(capo$Tail[capo$SexAge=="H J"],xlim=c(65,90), las=1, breaks=10)
hist(capo$Tail[capo$SexAge=="M J"], xlim=c(65,90), las=1,breaks=10)

hist(capo$Wing[capo$SexAge=="H A"], xlim=c(70,150), las=1, breaks=10)
hist(capo$Wing[capo$SexAge=="M A"], xlim=c(70,150),las=1, breaks=10)
hist(capo$Wing[capo$SexAge=="H J"], xlim=c(70,150),las=1, breaks=10)
hist(capo$Wing[capo$SexAge=="M J"], xlim=c(70,150),las=1, breaks=10)

hist(capo$Sec[capo$SexAge=="H A"], xlim=c(45,90), las=1, breaks=10, main = "", xlab = "Secundarias HA")
hist(capo$Sec[capo$SexAge=="M A"], xlim=c(45,90),las=1, breaks=10, main = "", xlab="Secundarias MA")
hist(capo$Sec[capo$SexAge=="H J"], xlim=c(45,90),las=1, breaks=10, main = "", xlab = "Secundarias HJ")
hist(capo$Sec[capo$SexAge=="M J"], xlim=c(45,90),las=1, breaks=10, main = "", xlab= "Secundarias MJ")

hist(capo$Tarso[capo$SexAge=="H A"], xlim=c(17,23), las=1)
hist(capo$Tarso[capo$SexAge=="M A"], xlim=c(17,23),las=1)
hist(capo$Tarso[capo$SexAge=="H J"], xlim=c(17,23),las=1)
hist(capo$Tarso[capo$SexAge=="M J"], xlim=c(17,23),las=1)

hist(capo$Tarso_completo[capo$SexAge=="H A"], xlim=c(20,26), las=1)
hist(capo$Tarso_completo[capo$SexAge=="M A"], xlim=c(20,26),las=1)
hist(capo$Tarso_completo[capo$SexAge=="M J"], xlim=c(20,26),las=1, breaks = 20)
hist(capo$Tarso_completo[capo$SexAge=="H J"], xlim=c(20,26),las=1)

hist(capo$Largo[capo$SexAge=="H A"],  las=1, breaks=10)
hist(capo$Largo[capo$SexAge=="M A"],las=1, breaks=10)
hist(capo$Largo[capo$SexAge=="M J"],las=1, breaks=10)
hist(capo$Larg[capo$SexAge=="H J"], las=1, breaks=10)

#Tarso completo looks a bit better than normal tarso. We'll use tarso completo.

#*********************************************************************************
######    Get means, SD, range, N, and Wilcoxon test for Supplementary Table S1 #######
#*********************************************************************************

capo$Wt<-as.numeric(capo$Wt)

mean(capo$Wt[capo$SexAge=="H A"], na.rm=TRUE)
sd(capo$Wt[capo$SexAge=="H A"], na.rm=TRUE)
min(capo$Wt[capo$SexAge=="H A"], na.rm=TRUE)
max(capo$Wt[capo$SexAge=="H A"], na.rm=TRUE)
length(na.omit(capo$Wt[capo$SexAge=="H A"]))

mean(capo$Wt[capo$SexAge=="H J"], na.rm=TRUE)
sd(capo$Wt[capo$SexAge=="H J"], na.rm=TRUE)
min(capo$Wt[capo$SexAge=="H J"], na.rm=TRUE)
max(capo$Wt[capo$SexAge=="H J"], na.rm=TRUE)
length(na.omit(capo$Wt[capo$SexAge=="H J"]))

mean(capo$Wt[capo$SexAge=="M A"], na.rm=TRUE)
sd(capo$Wt[capo$SexAge=="M A"], na.rm=TRUE)
min(capo$Wt[capo$SexAge=="M A"], na.rm=TRUE)
max(capo$Wt[capo$SexAge=="M A"], na.rm=TRUE)
length(na.omit(capo$Wt[capo$SexAge=="M A"]))

mean(capo$Wt[capo$SexAge=="M J"], na.rm=TRUE)
sd(capo$Wt[capo$SexAge=="M J"], na.rm=TRUE)
min(capo$Wt[capo$SexAge=="M J"], na.rm=TRUE)
max(capo$Wt[capo$SexAge=="M J"], na.rm=TRUE)
length(na.omit(capo$Wt[capo$SexAge=="M J"]))

#how much larger (% weight) is female compared to male?
(65-50)/50*100 #adults
#adults
(mean(capo$Wt[capo$SexAge=="H A"], na.rm=TRUE)-mean(capo$Wt[capo$SexAge=="M A"], na.rm=TRUE))/mean(capo$Wt[capo$SexAge=="M A"], na.rm=TRUE)*100
#juveniles
(mean(capo$Wt[capo$SexAge=="H J"], na.rm=TRUE)-mean(capo$Wt[capo$SexAge=="M J"], na.rm=TRUE))/mean(capo$Wt[capo$SexAge=="M J"], na.rm=TRUE)*100

is.numeric(capo$Tail)

mean(capo$Tail[capo$SexAge=="H A"], na.rm=TRUE)
sd(capo$Tail[capo$SexAge=="H A"], na.rm=TRUE)
min(capo$Tail[capo$SexAge=="H A"], na.rm=TRUE)
max(capo$Tail[capo$SexAge=="H A"], na.rm=TRUE)
length(na.omit(capo$Tail[capo$SexAge=="H A"]))

mean(capo$Tail[capo$SexAge=="H J"], na.rm=TRUE)
sd(capo$Tail[capo$SexAge=="H J"], na.rm=TRUE)
min(capo$Tail[capo$SexAge=="H J"], na.rm=TRUE)
max(capo$Tail[capo$SexAge=="H J"], na.rm=TRUE)
length(na.omit(capo$Tail[capo$SexAge=="H J"]))

mean(capo$Tail[capo$SexAge=="M A"], na.rm=TRUE)
sd(capo$Tail[capo$SexAge=="M A"], na.rm=TRUE)
min(capo$Tail[capo$SexAge=="M A"], na.rm=TRUE)
max(capo$Tail[capo$SexAge=="M A"], na.rm=TRUE)
length(na.omit(capo$Tail[capo$SexAge=="M A"]))

mean(capo$Tail[capo$SexAge=="M J"], na.rm=TRUE)
sd(capo$Tail[capo$SexAge=="M J"], na.rm=TRUE)
min(capo$Tail[capo$SexAge=="M J"], na.rm=TRUE)
max(capo$Tail[capo$SexAge=="M J"], na.rm=TRUE)
length(na.omit(capo$Tail[capo$SexAge=="M J"]))

#female % larger than male
(mean(capo$Tail[capo$SexAge=="H A"], na.rm=TRUE)-mean(capo$Tail[capo$SexAge=="M A"], na.rm=TRUE))/mean(capo$Tail[capo$SexAge=="M A"], na.rm=TRUE)*100
(mean(capo$Tail[capo$SexAge=="H J"], na.rm=TRUE)-mean(capo$Tail[capo$SexAge=="M J"], na.rm=TRUE))/mean(capo$Tail[capo$SexAge=="M J"], na.rm=TRUE)*100


is.numeric(capo$Wing)

mean(capo$Wing[capo$SexAge=="H A"], na.rm=TRUE)
sd(capo$Wing[capo$SexAge=="H A"], na.rm=TRUE)
min(capo$Wing[capo$SexAge=="H A"], na.rm=TRUE)
max(capo$Wing[capo$SexAge=="H A"], na.rm=TRUE)
length(na.omit(capo$Wing[capo$SexAge=="H A"]))

mean(capo$Wing[capo$SexAge=="H J"], na.rm=TRUE)
sd(capo$Wing[capo$SexAge=="H J"], na.rm=TRUE)
min(capo$Wing[capo$SexAge=="H J"], na.rm=TRUE)
max(capo$Wing[capo$SexAge=="H J"], na.rm=TRUE)
length(na.omit(capo$Wing[capo$SexAge=="H J"]))

mean(capo$Wing[capo$SexAge=="M A"], na.rm=TRUE)
sd(capo$Wing[capo$SexAge=="M A"], na.rm=TRUE)
min(capo$Wing[capo$SexAge=="M A"], na.rm=TRUE)
max(capo$Wing[capo$SexAge=="M A"], na.rm=TRUE)
length(na.omit(capo$Wing[capo$SexAge=="M A"]))

mean(capo$Wing[capo$SexAge=="M J"], na.rm=TRUE)
sd(capo$Wing[capo$SexAge=="M J"], na.rm=TRUE)
min(capo$Wing[capo$SexAge=="M J"], na.rm=TRUE)
max(capo$Wing[capo$SexAge=="M J"], na.rm=TRUE)
length(na.omit(capo$Wing[capo$SexAge=="M J"]))

(mean(capo$Wing[capo$SexAge=="H A"], na.rm=TRUE)-mean(capo$Wing[capo$SexAge=="M A"], na.rm=TRUE))/mean(capo$Wing[capo$SexAge=="M A"], na.rm=TRUE)*100
(mean(capo$Wing[capo$SexAge=="H J"], na.rm=TRUE)-mean(capo$Wing[capo$SexAge=="M J"], na.rm=TRUE))/mean(capo$Wing[capo$SexAge=="M J"], na.rm=TRUE)*100


is.numeric(capo$Tarso_completo)

mean(capo$Tarso_completo[capo$SexAge=="H A"], na.rm=TRUE)
sd(capo$Tarso_completo[capo$SexAge=="H A"], na.rm=TRUE)
min(capo$Tarso_completo[capo$SexAge=="H A"], na.rm=TRUE)
max(capo$Tarso_completo[capo$SexAge=="H A"], na.rm=TRUE)
length(na.omit(capo$Tarso_completo[capo$SexAge=="H A"]))

mean(capo$Tarso_completo[capo$SexAge=="H J"], na.rm=TRUE)
sd(capo$Tarso_completo[capo$SexAge=="H J"], na.rm=TRUE)
min(capo$Tarso_completo[capo$SexAge=="H J"], na.rm=TRUE)
max(capo$Tarso_completo[capo$SexAge=="H J"], na.rm=TRUE)
length(na.omit(capo$Tarso_completo[capo$SexAge=="H J"]))

mean(capo$Tarso_completo[capo$SexAge=="M A"], na.rm=TRUE)
sd(capo$Tarso_completo[capo$SexAge=="M A"], na.rm=TRUE)
min(capo$Tarso_completo[capo$SexAge=="M A"], na.rm=TRUE)
max(capo$Tarso_completo[capo$SexAge=="M A"], na.rm=TRUE)
length(na.omit(capo$Tarso_completo[capo$SexAge=="M A"]))

mean(capo$Tarso_completo[capo$SexAge=="M J"], na.rm=TRUE)
sd(capo$Tarso_completo[capo$SexAge=="M J"], na.rm=TRUE)
min(capo$Tarso_completo[capo$SexAge=="M J"], na.rm=TRUE)
max(capo$Tarso_completo[capo$SexAge=="M J"], na.rm=TRUE)
length(na.omit(capo$Tarso_completo[capo$SexAge=="M J"]))

(mean(capo$Tarso_completo[capo$SexAge=="H A"], na.rm=TRUE)-mean(capo$Tarso_completo[capo$SexAge=="M A"], na.rm=TRUE))/mean(capo$Tarso_completo[capo$SexAge=="M A"], na.rm=TRUE)*100
(mean(capo$Tarso_completo[capo$SexAge=="H J"], na.rm=TRUE)-mean(capo$Tarso_completo[capo$SexAge=="M J"], na.rm=TRUE))/mean(capo$Tarso_completo[capo$SexAge=="M J"], na.rm=TRUE)*100


is.numeric(capo$Sec)

mean(capo$Sec[capo$SexAge=="H A"], na.rm=TRUE)
sd(capo$Sec[capo$SexAge=="H A"], na.rm=TRUE)
min(capo$Sec[capo$SexAge=="H A"], na.rm=TRUE)
max(capo$Sec[capo$SexAge=="H A"], na.rm=TRUE)
length(na.omit(capo$Sec[capo$SexAge=="H A"]))

mean(capo$Sec[capo$SexAge=="H J"], na.rm=TRUE)
sd(capo$Sec[capo$SexAge=="H J"], na.rm=TRUE)
min(capo$Sec[capo$SexAge=="H J"], na.rm=TRUE)
max(capo$Sec[capo$SexAge=="H J"], na.rm=TRUE)
length(na.omit(capo$Sec[capo$SexAge=="H J"]))

mean(capo$Sec[capo$SexAge=="M A"], na.rm=TRUE)
sd(capo$Sec[capo$SexAge=="M A"], na.rm=TRUE)
min(capo$Sec[capo$SexAge=="M A"], na.rm=TRUE)
max(capo$Sec[capo$SexAge=="M A"], na.rm=TRUE)
length(na.omit(capo$Sec[capo$SexAge=="M A"]))

mean(capo$Sec[capo$SexAge=="M J"], na.rm=TRUE)
sd(capo$Sec[capo$SexAge=="M J"], na.rm=TRUE)
min(capo$Sec[capo$SexAge=="M J"], na.rm=TRUE)
max(capo$Sec[capo$SexAge=="M J"], na.rm=TRUE)
length(na.omit(capo$Sec[capo$SexAge=="M J"]))

(mean(capo$Sec[capo$SexAge=="H A"], na.rm=TRUE)-mean(capo$Sec[capo$SexAge=="M A"], na.rm=TRUE))/mean(capo$Sec[capo$SexAge=="M A"], na.rm=TRUE)*100
(mean(capo$Sec[capo$SexAge=="H J"], na.rm=TRUE)-mean(capo$Sec[capo$SexAge=="M J"], na.rm=TRUE))/mean(capo$Sec[capo$SexAge=="M J"], na.rm=TRUE)*100

is.numeric(capo$Largo)

mean(capo$Largo[capo$SexAge=="H A"], na.rm=TRUE)
sd(capo$Largo[capo$SexAge=="H A"], na.rm=TRUE)
min(capo$Largo[capo$SexAge=="H A"], na.rm=TRUE)
max(capo$Largo[capo$SexAge=="H A"], na.rm=TRUE)
length(na.omit(capo$Largo[capo$SexAge=="H A"]))

mean(capo$Largo[capo$SexAge=="H J"], na.rm=TRUE)
sd(capo$Largo[capo$SexAge=="H J"], na.rm=TRUE)
min(capo$Largo[capo$SexAge=="H J"], na.rm=TRUE)
max(capo$Largo[capo$SexAge=="H J"], na.rm=TRUE)
length(na.omit(capo$Largo[capo$SexAge=="H J"]))

mean(capo$Largo[capo$SexAge=="M A"], na.rm=TRUE)
sd(capo$Largo[capo$SexAge=="M A"], na.rm=TRUE)
min(capo$Largo[capo$SexAge=="M A"], na.rm=TRUE)
max(capo$Largo[capo$SexAge=="M A"], na.rm=TRUE)
length(na.omit(capo$Largo[capo$SexAge=="M A"]))

mean(capo$Largo[capo$SexAge=="M J"], na.rm=TRUE)
sd(capo$Largo[capo$SexAge=="M J"], na.rm=TRUE)
min(capo$Largo[capo$SexAge=="M J"], na.rm=TRUE)
max(capo$Largo[capo$SexAge=="M J"], na.rm=TRUE)
length(na.omit(capo$Largo[capo$SexAge=="M J"]))

(mean(capo$Largo[capo$SexAge=="H A"], na.rm=TRUE)-mean(capo$Largo[capo$SexAge=="M A"], na.rm=TRUE))/mean(capo$Largo[capo$SexAge=="M A"], na.rm=TRUE)*100
(mean(capo$Largo[capo$SexAge=="H J"], na.rm=TRUE)-mean(capo$Largo[capo$SexAge=="M J"], na.rm=TRUE))/mean(capo$Largo[capo$SexAge=="M J"], na.rm=TRUE)*100

wilcox.test(capo$Wt[capo$SexAge=="H A"], capo$Wt[capo$SexAge=="M A"])
wilcox.test(capo$Wt[capo$SexAge=="H J"], capo$Wt[capo$SexAge=="M J"])

wilcox.test(capo$Tail[capo$SexAge=="H A"], capo$Tail[capo$SexAge=="M A"])
wilcox.test(capo$Tail[capo$SexAge=="H J"], capo$Tail[capo$SexAge=="M J"])

wilcox.test(capo$Wing[capo$SexAge=="H A"], capo$Wing[capo$SexAge=="M A"])
wilcox.test(capo$Wing[capo$SexAge=="H J"], capo$Wing[capo$SexAge=="M J"])

wilcox.test(capo$Tarso_completo[capo$SexAge=="H A"], capo$Tarso_completo[capo$SexAge=="M A"])
wilcox.test(capo$Tarso_completo[capo$SexAge=="H J"], capo$Tarso_completo[capo$SexAge=="M J"])

wilcox.test(capo$Sec[capo$SexAge=="H A"], capo$Sec[capo$SexAge=="M A"])
wilcox.test(capo$Sec[capo$SexAge=="H J"], capo$Sec[capo$SexAge=="M J"])

wilcox.test(capo$Largo[capo$SexAge=="H A"], capo$Largo[capo$SexAge=="M A"])
wilcox.test(capo$Largo[capo$SexAge=="H J"], capo$Largo[capo$SexAge=="M J"])


##*******************************************
######    Produce dataset for violin plots #####
#*********************************************
  
#Can include same individual at different ages. Separate juveniles vs adults (2 datasets)
capjuv<-subset(capo, capo$Age=="J")
capad<-subset(capo, capo$Age=="A")

#aggregate by ID so using each individual only once. 
#use MEDIAN value to balance out possible errors in measurement
ad.wt<-aggregate(c(capad$Wt), by = list(capad$ID), FUN=median)
colnames(ad.wt)<-c("ID", "Wt")
ad.tail<-aggregate(c(capad$Tail), by = list(capad$ID), FUN=median)
colnames(ad.tail)<-c("ID", "Tail")
ad.sex<-aggregate(c(capad$Sexo_final), by = list(capad$ID), FUN=last)
colnames(ad.sex)<-c("ID", "Sex")
ad.wing<-aggregate(c(capad$Wing), by = list(capad$ID), FUN=median)
colnames(ad.wing)<-c("ID", "Wing")
ad.sec<-aggregate(c(capad$Sec), by = list(capad$ID), FUN=median)
colnames(ad.sec)<-c("ID", "Sec")
ad.tars<-aggregate(c(capad$Tarso_completo), by = list(capad$ID), FUN = median)
colnames(ad.tars)<-c("ID", "TarsC")
ad.largo<-aggregate(c(capad$Largo), by = list(capad$ID), FUN = median)
colnames(ad.largo)<-c("ID", "Largo")

juv.wt<-aggregate(c(capjuv$Wt), by = list(capjuv$ID), FUN=median)
colnames(juv.wt)<-c("ID", "Wt")
juv.tail<-aggregate(c(capjuv$Tail), by = list(capjuv$ID), FUN=median)
colnames(juv.tail)<-c("ID", "Tail")
juv.wing<-aggregate(c(capjuv$Wing), by = list(capjuv$ID), FUN=median)
colnames(juv.wing)<-c("ID", "Wing")
juv.sec<-aggregate(c(capjuv$Sec), by = list(capjuv$ID), FUN=median)
colnames(juv.sec)<-c("ID", "Sec")
juv.sex<-aggregate(c(capjuv$Sexo_final), by = list(capjuv$ID), FUN=last)
colnames(juv.sex)<-c("ID", "Sex")
juv.tars<-aggregate(c(capjuv$Tarso_completo), by = list(capjuv$ID), FUN = median)
colnames(juv.tars)<-c("ID", "TarsC")
juv.largo<-aggregate(c(capjuv$Largo), by = list(capjuv$ID), FUN = median)
colnames(juv.largo)<-c("ID", "Largo")

ads<-merge(ad.wt, ad.tail)
ads<-merge(ads, ad.sex)
ads<-merge(ads, ad.wing)
ads<-merge(ads, ad.sec)
ads<-merge(ads, ad.tars)
ads<-merge(ads, ad.largo)

juvs<-merge(juv.wt, juv.tail)
juvs<-merge(juvs, juv.wing)
juvs<-merge(juvs, juv.sex)
juvs<-merge(juvs, juv.sec)
juvs<-merge(juvs, juv.tars)
juvs<-merge(juvs, juv.largo)

#remove RSM496 from juvs; it was a very young baby in october.
juvs<-subset(juvs, juvs$ID !="RSM496")

#Remove 652 from the adults; the first bird captured and very unusually small, measurement errors?
ads<-subset(ads, ads$ID != "652")


#*****************************************************************************
#
######    Create violin plots (Fig 2)                          ######
#
#*****************************************************************************

#combine juvs and adults for violin plots
juvs$Age<-"J"
ads$Age<-"A"
juvs$SexAge<-paste(juvs$Sex,juvs$Age)
ads$SexAge<-paste(ads$Sex,ads$Age)
allbirds<-rbind(juvs, ads)

## ggplot customization:
theme_set(theme_bw())

scale_colour_discrete <- function(...,palette="Set1") {
  scale_colour_brewer(...,palette=palette)
}
scale_colour_orig <- ggplot2::scale_colour_discrete
scale_fill_discrete <- function(...,palette="Set1") {
  scale_fill_brewer(...,palette=palette)
}

#get N for each measurement, to put on figures
length(na.omit(allbirds$Wt))
length(na.omit(allbirds$Tail))
length(na.omit(allbirds$Wing))
length(na.omit(allbirds$TarsC))
length(na.omit(allbirds$Sec))
length(na.omit(allbirds$Largo))

#use the "Paired" palette in RColorBrewer
display.brewer.pal(n=8, name="Paired")
brewer.pal(n = 8, name = "Paired")
#colors to use: 
#FF7F00 F juv 
#FDBF6F F adult
#1F78B4 M juv
#A6CEE3 M adult

g1<-ggplot(allbirds, aes(x = SexAge, y = Wt, fill = SexAge)) +
  scale_fill_manual(values=c("#FDBF6F","#FF7F00", "#A6CEE3", "#1F78B4")) +
  geom_violin() +
  geom_dotplot(binaxis= "y",stackdir = "center", dotsize = 0.25, fill = 1) +
  labs(y = "Weight (g)", x = "") +
  scale_x_discrete(breaks=c("H A","H J", "M A", "M J"),
                   labels=c("F Adult", "F Juv", "M Adult", "M Juv")) +
  theme(aspect.ratio=3/3,panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  theme(legend.position = "none") +
  annotate("text", x = 0.65, y = 80, label = "A") +
  annotate("text", x = 3.8, y = 80, label = "n = 286")

g2<-ggplot(allbirds, aes(x = SexAge, y = Tail, fill = SexAge)) +
  scale_fill_manual(values=c("#FDBF6F","#FF7F00", "#A6CEE3", "#1F78B4")) +
  geom_violin() +
  geom_dotplot(binaxis= "y",stackdir = "center", dotsize = 0.25,fill = 1) +
  labs(y = "Length of tail (mm)", x = "") +
  scale_x_discrete(breaks=c("H A","H J", "M A", "M J"),
                   labels=c("F Adult", "F Juv", "M Adult", "M Juv")) +
  theme(aspect.ratio=3/3,panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  theme(legend.position = "none") +
  annotate("text", x = 0.65, y = 92,
           label = "B") +
  annotate ("text", x = 3.8, y = 92, label = "n = 285")

g3<-ggplot(allbirds, aes(x = SexAge, y = Wing, fill = SexAge)) +
  scale_fill_manual(values=c("#FDBF6F","#FF7F00", "#A6CEE3", "#1F78B4")) +
  geom_violin() +
  geom_dotplot(binaxis= "y", stackdir = "center", dotsize = 0.25, fill = 1) +
  labs(y = "Wing chord (mm)", x = "") +
  scale_x_discrete(breaks=c("H A","H J", "M A", "M J"),
                   labels=c("F Adult", "F Juv", "M Adult", "M Juv")) +
  theme(aspect.ratio=3/3,panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  theme(legend.position = "none") +
  annotate("text", x = 0.65, y = 145,
           label = "C") +
  annotate("text", x = 3.8, y = 145, label = "n = 292")

g4<-ggplot(allbirds, aes(x = SexAge, y = TarsC, fill = SexAge)) +
  scale_fill_manual(values=c("#FDBF6F","#FF7F00", "#A6CEE3", "#1F78B4")) +
  geom_violin() +
  geom_dotplot(binaxis= "y", stackdir = "center", dotsize = 0.25, fill = 1) +
  labs(y = "Tarsus (mm)", x = "") +
  scale_x_discrete(breaks=c("H A","H J", "M A", "M J"),
                   labels=c("F Adult", "F Juv", "M Adult", "M Juv")) +
  theme(aspect.ratio=3/3,panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  theme(legend.position = "none") +
  annotate("text", x = 0.65, y = 25, label = "D") +
  annotate("text", x = 3.8, y = 25, label = "n = 151" )

g5<-ggplot(allbirds, aes(x = SexAge, y = Sec, fill = SexAge)) +
  scale_fill_manual(values=c("#FDBF6F","#FF7F00", "#A6CEE3", "#1F78B4")) +
  geom_violin() +
  geom_dotplot(binaxis= "y", stackdir = "center", dotsize = 0.25, fill = 1) +
  labs(y = "Length of secondaries (mm)", x = "") +
  scale_x_discrete(breaks=c("H A","H J", "M A", "M J"),
                   labels=c("F Adult", "F Juv", "M Adult", "M Juv")) +
  theme(aspect.ratio=3/3,panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  theme(legend.position = "none")+
  annotate("text", x = 0.65, y = 90, label = "E") +
  annotate("text", x = 3.8, y = 90, label = "n = 188")

g6<-ggplot(allbirds, aes(x = SexAge, y = Largo, fill = SexAge)) +
  scale_fill_manual(values=c("#FDBF6F","#FF7F00", "#A6CEE3", "#1F78B4")) +
  geom_violin() +
  geom_dotplot(binaxis= "y", stackdir = "center", dotsize = 0.25, fill = 1) +
  labs(y = "Full length (mm)", x = "") +
  scale_x_discrete(breaks=c("H A","H J", "M A", "M J"),
                   labels=c("F Adult", "F Juv", "M Adult", "M Juv")) +
  theme(aspect.ratio=3/3,panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  theme(legend.position = "none") +
  annotate("text", x = 0.65, y = 210, label = "F") +
  annotate("text", x = 3.8, y = 210, label = "n = 112" )

grid.arrange(g1, g2, g3, g4, g5, g6, nrow = 2)

#***********************************************************************
######    A little more data management: looking for some weird cases #####
#***********************************************************************
heavymales<-subset(juvs, juvs$Sex=="M")
heavymales<-subset(heavymales, heavymales$Wt>60)

lightfemales<-subset(juvs, juvs$Sex=="H")
lightfemales<-subset(lightfemales, lightfemales$Wt<50)


##***********************************************************************************************
##*****************                                                              *********************
######    Prep for logistic regression prediction with cross-validation         ##################
##*****************                                                              *********************
##***********************************************************************************************

#use the 'juvs' dataframe for training and validation datasets
class(juvs$Sex) #need it to be a factor
juvs$Sex<-as.factor(juvs$Sex) #check that it kept it as M and H but now is a factor
class(juvs$Sex)

#remove rows without measurement of wing, wt, cola, secondaries
#don't worry about Tarsus - too many missing data and it's not a good separator of sexes.
juvs<-subset(juvs, !is.na (juvs$Wt))
juvs<-subset(juvs, !is.na (juvs$Wing))
juvs<-subset(juvs, !is.na (juvs$Sec)) 
juvs<-subset(juvs, !is.na (juvs$Tail))
summary(juvs$Sex)

#dataset for predicting sex of unknown-sex juveniles
un.J<-subset(cap, cap$Age=="J")
un.J<-subset(un.J, is.na(un.J$Sexo_final))

#remove observations with NA in Tail Wing Wt or secondaries 
un.J<-subset(un.J, !is.na(un.J$Tail))
un.J<-subset(un.J, !is.na(un.J$Wing))
un.J<-subset(un.J, !is.na(un.J$Wt))
un.J<-subset(un.J, !is.na(un.J$Sec))

class(un.J$Tail)
class(un.J$Wing)
class(un.J$Wt)
class(un.J$Sec)
#numeric.

#make sure the variables aren't too correlated with each other!
ju<-data.frame(Wt=juvs$Wt, Tail=juvs$Tail, Wing=juvs$Wing, Sec=juvs$Sec)
res <- cor(ju)
round(res, 2)

#Wing and Sec have a correlation of 0.82
#Wing and Tail have a correlation of 0.76
#Tail and Sec have a correlation of 0.70
#Wt and Wing have a correlation of 0.69

#combinations correlated <0.75: 
#Wt-Sec
#Wt-Tail
#Wt-Wing
#Tail-Sec
#These could be combined in models (8 models total)


##*****************************************************************************
##
######    Trying out the Firth and Bias-reduced Logistic Regression             #####
##
##*****************************************************************************

juvs$Sex1<-ifelse(juvs$Sex=="H",1,0) #female = 1, male = 0 #this code repeats below in the loop
juvs$Sex1<-as.factor(juvs$Sex1) #this code repeats below
summary (juvs$Sex1)

spl <- sample.split(juvs$Sex, SplitRatio = 0.8) #all this code repeats below in the loop
train <- subset(juvs, spl == TRUE) # the training dataset
Nmachos<-length(train$Sex[train$Sex=="M"])
Nhembras<-length(train$Sex[train$Sex=="H"])
ExcessMachos<-Nmachos-Nhembras
ExcessMachos
test <- subset(juvs, spl == FALSE) #the initial test dataset
DT <- data.table(train)
train2<-DT[-sample(which(Sex=="M"), ExcessMachos)] #balance the sexes in the training dataset

mf3<-logistf(Sex1 ~ Sec, data = train2, 
             control=logistf.control(maxit=1000, maxstep = 100), plcontrol=logistpl.control(maxit=1000), pl=FALSE)
summary(mf3)
testy<-test
testy$predicted3 <- predict(mf3, newdata = testy, type = "response")

mf32<-logistf(Sex1 ~ Sec + Wt, data = train2, 
             control=logistf.control(maxit=1000, maxstep = 100), plcontrol=logistpl.control(maxit=1000), pl=FALSE)
summary(mf32)


ggcoefstats(x = mf32, stats.labels=FALSE, title = "", exclude.intercept = TRUE)

## Optimistically trying out a regular logistic regression again ##
#lr.m1<-glm(Sex1 ~ Sec, data = train2, family = binomial) # didn't work in 2024, did work when 2025 data added.

#trying brglm because hoping it will work with the coefficient plotting package
hi2<- glm(Sex1~ Sec + Wt, data=train2, family=binomial(logit), method = "brglmFit")
summary(hi2)
plot_coefs(hi2,  point.shape = F) + 
  theme(legend.position="none") #remove the legend!


#***********************************************************************************
#******                                                                             **
######    Monte Carlo Cross-Validation (logistic regression models in a for loop)            #########
#****            (Also called Repeated random subsampling validation)                 **
#*  https://towardsdatascience.com/cross-validation-k-fold-vs-monte-carlo-e54df2fc179b/  **
#*                                                                                 **
#***********************************************************************************

juvs$Sex1<-ifelse(juvs$Sex=="H",1,0) #female = 1, male = 0
juvs$Sex1<-as.factor(juvs$Sex1)

# Read here for "next" statement https://www.tutorialspoint.com/r/r_next_statement.htm

nsims = 100 #repetir todo el loop (la division de datos + analisis) 'nsims' veces

out_test <- list() #list of dataframes, one for each iteration of loop, each contains test data and respective predictions for each model
out_un<-list() #List of dataframes, one for each iteration of loop; each df contains the IDs and all model predictions for unsexed juveniles
models1=list() #list of m1 models, one for each iteration of loop
models2=list() #list of m2 models, one for each iteration of loop
models3=list() #...
models4=list() 
models5=list()
models6=list()
models7=list()
models8=list()
perfs1=list() # list of AUCs, model m1
perfs2=list() # list of AUCs, model m2
perfs3=list() # 
perfs4=list()
perfs5=list()
perfs6=list()
perfs7=list()
perfs8=list()
out_smooth4<-list() #list of dataframes to make smooth lines with the parameters for Model 4 (100 iterations)

for(sim in 1:nsims){
  set.seed(12+sim) #el numero 12 puede ser qualquer numero
  print(paste("Starting", sim, "of", nsims, "simulations")) #Tells us which iteration we're on as it runs through the loop
  
## separacion de los datos de juveniles de sexo conocido, en test y validation sets
  spl <- sample.split(juvs$Sex, SplitRatio = 0.8) 
  train <- subset(juvs, spl == TRUE) # the training dataset
  Nmachos<-length(train$Sex[train$Sex=="M"])
  Nhembras<-length(train$Sex[train$Sex=="H"])
  ExcessMachos<-Nmachos-Nhembras
  ExcessMachos
  test <- subset(juvs, spl == FALSE) #the initial test dataset
  DT <- data.table(train)
  train2<-DT[-sample(which(Sex=="M"), ExcessMachos)] #balance the sexes in the training dataset
  ## corre los modelos
  m1<- glm(Sex1~ Wt, data=train2, family=binomial(logit), method = "brglmFit")
  m2<- glm(Sex1~ Tail, data = train2, family = binomial(logit), method = "brglmFit")
  m3<- glm(Sex1~ Wing, data = train2, family = binomial(logit), method = "brglmFit")
  m4<- glm(Sex1~ Sec, data = train2, family = binomial(logit), method = "brglmFit")
  m5<- glm(Sex1~ Wt + Tail, data = train2, family = binomial(logit), method = "brglmFit")
  m6<- glm(Sex1~ Wt + Sec, data = train2, family = binomial(logit), method = "brglmFit")
  m7<- glm(Sex1~ Wt + Wing, data = train2, family = binomial(logit), method = "brglmFit")  
  m8<- glm(Sex1~ Tail + Sec, data = train2, family = binomial(logit), method = "brglmFit")  
  
  #The following next statement was removed because it didn't run with the Firth logistic regression. It doesn't appear necessary in the brglm because all models converge.
  if (m1$converged == FALSE || m2$converged ==FALSE || m3$converged==FALSE || 
     m4$converged==FALSE || m5$converged==FALSE || m6$converged==FALSE || m7$converged==FALSE) { #exclude any iterations where 1 or more models didn't converge!
    next
  }
  testy<-test
  testy$predicted1 <- predict(m1, newdata = testy, type = "response")
  Validating1 <- data.frame(predictions=testy$predicted1, labels=testy$Sex1)
  pred1 <- prediction(Validating1$predictions, Validating1$labels)
  perf1<-performance(pred1, "auc")
  
  testy$predicted2 <- predict(m2, newdata = testy, type = "response")
  Validating2 <- data.frame(predictions=testy$predicted2, labels=testy$Sex1)
  pred2 <- prediction(Validating2$predictions, Validating2$labels)
  perf2<-performance(pred2, "auc")
  
  testy$predicted3 <- predict(m3, newdata = testy, type = "response")
  Validating3 <- data.frame(predictions=testy$predicted3, labels=testy$Sex1)
  pred3 <- prediction(Validating3$predictions, Validating3$labels)
  perf3<-performance(pred3, "auc")
  
  testy$predicted4 <- predict(m4, newdata = testy, type = "response")
  Validating4 <- data.frame(predictions=testy$predicted4, labels=testy$Sex1)
  pred4 <- prediction(Validating4$predictions, Validating4$labels)
  perf4<-performance(pred4, "auc")
  
  testy$predicted5 <- predict(m5, newdata = testy, type = "response")
  Validating5 <- data.frame(predictions=testy$predicted5, labels=testy$Sex1)
  pred5 <- prediction(Validating5$predictions, Validating5$labels)
  perf5<-performance(pred5, "auc")
  
  testy$predicted6 <- predict(m6, newdata = testy, type = "response")
  Validating6 <- data.frame(predictions=testy$predicted6, labels=testy$Sex1)
  pred6 <- prediction(Validating6$predictions, Validating6$labels)
  perf6<-performance(pred6, "auc")
  
  testy$predicted7 <- predict(m7, newdata = testy, type = "response")
  Validating7 <- data.frame(predictions=testy$predicted7, labels=testy$Sex1)
  pred7 <- prediction(Validating7$predictions, Validating7$labels)
  perf7<-performance(pred7, "auc")
  
  testy$predicted8 <- predict(m8, newdata = testy, type = "response")
  Validating8 <- data.frame(predictions=testy$predicted8, labels=testy$Sex1)
  pred8 <- prediction(Validating8$predictions, Validating8$labels)
  perf8<-performance(pred8, "auc")
  
  # the predictions on the unknown sex individuals
  un<-data.frame(ID = un.J$ID) #dataset of unknown-sex juveniles
  un$predicted1 = predict(m1, newdata = un.J, type = "response") 
  un$predicted2 = predict(m2, newdata = un.J, type = "response")
  un$predicted3 = predict(m3, newdata = un.J, type = "response")
  un$predicted4 = predict(m4, newdata = un.J, type = "response")
  un$predicted5 = predict(m5, newdata = un.J, type = "response")
  un$predicted6 = predict(m6, newdata = un.J, type = "response")
  un$predicted7 = predict(m7, newdata = un.J, type = "response")
  un$predicted8 = predict(m8, newdata = un.J, type = "response")
  
  #predictions for a smooth curve
  smoothy<-data.frame(Sec = seq(min(juvs$Sec), max(juvs$Sec), 0.1))
  smoothy$predicted4 = predict(m4, newdata=smoothy, type = "response")
  
    out_test[[sim]] <- c(testy) #guardar las predicciones del validation dataset en la lista para los 4 models
    out_un[[sim]]<-c(un) #guardar las predicciones de los un-sexed individuals en otra lista 
    out_smooth4 [[sim]] <-c(smoothy) #guardar las predicciones para la curva smooth en otra lista
    models1= c(models1,list(m1)) #this syntax allows them to be functioning glm models, not lists
    models2=c(models2, list(m2))
    models3=c(models3, list(m3))
    models4=c(models4, list(m4))
    models5=c(models5, list(m5))
    models6=c(models6, list(m6))
    models7=c(models7, list(m7))
    models8=c(models8, list(m8))
    
    perfs1[[sim]]<-c(perf1@y.values [[1]])
    perfs2[[sim]]<-c(perf2@y.values [[1]])
    perfs3[[sim]]<-c(perf3@y.values [[1]])
    perfs4[[sim]]<-c(perf4@y.values [[1]])
    perfs5[[sim]]<-c(perf5@y.values [[1]])
    perfs6[[sim]]<-c(perf6@y.values [[1]])
    perfs7[[sim]]<-c(perf7@y.values [[1]])
    perfs8[[sim]]<-c(perf8@y.values [[1]])
    
} #close the loop

class(out_test[[1]]) #class should be list
class(out_un[[1]]) #class should be list
class(models2[[3]]) # the 2 and [[3]] can be any number within the number of models/iterations run. Class Should be glm lm.
class(perfs1[[1]]) #should be numeric
class(out_smooth4[[1]])

# bind all the test results together for the boxplot prediction graphic
Test.df<-rbindlist(out_test)
un.df<-rbindlist(out_un)
perf.df<-data.frame(Model1 = unlist(perfs1), Model2 = unlist(perfs2), 
                    Model3 = unlist(perfs3), Model4 = unlist(perfs4), Model5 = unlist(perfs5), 
                    Model6 = unlist(perfs6), Model7 = unlist(perfs7), Model8 = unlist(perfs8))

#Graph the AUC of the various models in all their iterations

perf.df %>% 
  pivot_longer(everything()) %>%
  ggplot(aes(x=  name, y = value, fill = NA))+
  geom_boxplot(show.legend = FALSE)+
  xlab("Model") + 
  ylab("AUC") +
  scale_x_discrete(breaks=c("Model1","Model2", "Model3", "Model4", 
                            "Model5", "Model6", "Model7", "Model8"),
                   labels=c("Weight", "Tail", "Wing", "Secondaries", 
                            "Weight + Tail", "Weight + Secondaries", "Weight + Wing", "Tail + Secondaries")) +
  theme(aspect.ratio=3/3,panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  ggplot2::coord_flip()

#**********************************************************************************************
######    Graph the parameter estimates of the 8 models in all their iterations (1 at a time) (Fig S6) #####
# This figure goes in Supplementary Material                                
#**********************************************************************************************

#you have to give it something for 'colors' and many palettes don't have 100 colors. It wouldn't let me just put "black"
P100 = createPalette(100,  c("#ff0000", "#00ff00", "#0000ff")) #uses library Polychrome
#for whatever reason this code does not do what I expected, but it produces the greyscale figure that we wanted!


c1<-plot_coefs(models1, colors = P100, point.shape = F) +
  labs(y = '', x = '') +
  ggtitle("Model 1") +
  theme(plot.title = element_text(size=9))+
  theme(legend.position="none")  #remove the legend!
c2<-plot_coefs(models2, colors = P100, point.shape = F) + 
  labs(y = '', x = '') +
  ggtitle("Model 2") +
  theme(plot.title = element_text(size=9))+
  theme(legend.position="none") 
c3<-plot_coefs(models3, colors = P100, point.shape = F) + 
  labs(y = '', x = '') +
  ggtitle("Model 3") +
  theme(plot.title = element_text(size=9))+
  theme(legend.position="none") 
c4<-plot_coefs(models4, colors = P100, point.shape = F) + 
  labs(y = '', x = '') +
  ggtitle("Model 4") +
  theme(plot.title = element_text(size=9))+
  theme(legend.position="none") 
c5<-plot_coefs(models5, colors = P100, point.shape = F) + 
  labs(y = '', x = '') +
  ggtitle("Model 5") +
  theme(plot.title = element_text(size=9))+
  theme(legend.position="none") 
c6<-plot_coefs(models6, colors = P100, point.shape = F) + 
  labs(y = '', x = '') +
  ggtitle("Model 6") +
  theme(plot.title = element_text(size=9))+
  theme(legend.position="none") 
c7<-plot_coefs(models7, colors = P100, point.shape = F) + 
  labs(y = '', x = '') +
  ggtitle("Model 7") +
  theme(plot.title = element_text(size=9))+
  theme(legend.position="none") 
c8<-plot_coefs(models8, colors = P100, point.shape = F) + 
  labs(y = '', x = '') +
  ggtitle("Model 8") +
  theme(plot.title = element_text(size=9))+
  theme(legend.position="none")

grid.arrange(c1, c2, c3, c4, c5, c6, c7, c8, nrow = 4)



#**********************************************************************************************
######    Graph the predictions in 3 panels (Fig 3 for Main Text and Figs for SOM)             #####
#**********************************************************************************************

par(mfrow=c(3,1), oma=c(0.1,0.3,0.1,0.1), mai = c(.7, 0.9, 0.3, 0.1))

#colors 
# Formerly FDBF6F F juv border -> now this is the F adult
# Formerly A6CEE3 M juv border

# Now use these colors: "#FF7F00", F juv border;  "#1F78B4" M juv border

# MODEL 4 # Our top model for Main Text
#repeat for other models below (change predicted4 to the model number we want)
#PANEL 1: Known females
femTest<-subset(Test.df, Test.df$Sex=="H")
boxplot(predicted4 ~ reorder(ID, predicted4, FUN = median), data = femTest, 
        las=2, cex.axis=0.8, range = 0, xlab="", ylim=c(0,1), ylab="", col="white", 
        border = c(rep("darkgrey",6), rep("#FF7F00",25)))
        
abline(h=0.8, lty=2)
abline(h=0.2, lty=2)
mtext(side=3, line =.5, "A. Juvenile females validation dataset", cex=.8)
#PANEL 2: Known males
maleTest<-subset(Test.df, Test.df$Sex=="M")
boxplot(predicted4 ~ reorder(ID, predicted4, FUN = median), data = maleTest, 
        las=2, cex.axis=0.8, range = 0, xlab="", ylim=c(0,1), ylab="Probability that the bird is female", cex.lab=1,
        col= "white", border = c(rep("#1F78B4",46), rep("darkgrey",4), "#FF7F00"))
abline(h=0.8, lty=2)
abline(h=0.2, lty=2)
mtext(side=3, line =.5, "B. Juvenile males validation dataset", cex=0.8)
mtext(side=2, line =4, "Sex ~ Length of secondaries", cex=1)
#PANEL 3: Unknown Sex
boxplot(predicted4 ~ reorder(ID, predicted4, FUN = median), data = un.df, 
        las=2, cex.axis=0.8, range = 0, xlab="", ylab="", col = "white", 
        border = c(rep("#1F78B4", 24),"darkgrey","#1F78B4", rep("darkgrey",5), rep("#FF7F00",13)))
abline(h=0.8, lty=2)
abline(h=0.2, lty=2)
mtext(side=3, line =.5, "C. Juveniles of unknown sex", cex=.8)



# MODEL 6 # Weight + Secondaries for SOM
#(change predicted4 to predicted6, fix Y axis title, fix colors)

#PANEL 1: Known females
femTest<-subset(Test.df, Test.df$Sex=="H")
boxplot(predicted6 ~ reorder(ID, predicted6, FUN = median), data = femTest, 
        las=2, cex.axis=0.8, range = 0, xlab="", ylim=c(0,1), ylab="", col="white", 
        border = c(rep("darkgrey",6), "#FF7F00", "darkgrey", rep("#FF7F00",23)))

abline(h=0.8, lty=2)
abline(h=0.2, lty=2)
mtext(side=3, line =.5, "A. Juvenile females validation dataset", cex=.8)
#PANEL 2: Known males
maleTest<-subset(Test.df, Test.df$Sex=="M")
boxplot(predicted6 ~ reorder(ID, predicted6, FUN = median), data = maleTest, 
        las=2, cex.axis=0.8, range = 0, xlab="", ylim=c(0,1), ylab="Probability that the bird is female", cex.lab=1,
        col= "white", 
        border = c(rep("#1F78B4",44), rep("darkgrey",6), "#FF7F00"))
abline(h=0.8, lty=2)
abline(h=0.2, lty=2)
mtext(side=3, line =.5, "B. Juvenile males validation dataset", cex=0.8)
mtext(side=2, line =4, "Sex ~ Weight + Length of secondaries", cex=1)
#PANEL 3: Unknown Sex
boxplot(predicted6 ~ reorder(ID, predicted6, FUN = median), data = un.df, 
        las=2, cex.axis=0.8, range = 0, xlab="", ylab="", col = "white", 
        border = c(rep("#1F78B4", 23), rep("darkgrey",8), rep("#FF7F00",15)))
abline(h=0.8, lty=2)
abline(h=0.2, lty=2)
mtext(side=3, line =.5, "C. Juveniles of unknown sex", cex=.8)


# MODEL 8 # Tail + Secondaries for SOM
#(change predicted4 to predicted8, fix Y axis title, fix colors)
#PANEL 1: Known females
femTest<-subset(Test.df, Test.df$Sex=="H")
boxplot(predicted8 ~ reorder(ID, predicted8, FUN = median), data = femTest, 
        las=2, cex.axis=0.8, range = 0, xlab="", ylim=c(0,1), ylab="", col="white", 
        border = c(rep("darkgrey",6), rep("#FF7F00",25)))

abline(h=0.8, lty=2)
abline(h=0.2, lty=2)
mtext(side=3, line =.5, "A. Juvenile females validation dataset", cex=.8)
#PANEL 2: Known males
maleTest<-subset(Test.df, Test.df$Sex=="M")
boxplot(predicted8 ~ reorder(ID, predicted8, FUN = median), data = maleTest, 
        las=2, cex.axis=0.8, range = 0, xlab="", ylim=c(0,1), ylab="Probability that the bird is female", cex.lab=1,
        col= "white", border = c(rep("#1F78B4",42), rep("darkgrey",8), "#FF7F00"))
abline(h=0.8, lty=2)
abline(h=0.2, lty=2)
mtext(side=3, line =.5, "B. Juvenile males validation dataset", cex=0.8)
mtext(side=2, line =4, "Sex ~ Tail + Length of secondaries", cex=1)
#PANEL 3: Unknown Sex
boxplot(predicted8 ~ reorder(ID, predicted8, FUN = median), data = un.df, 
        las=2, cex.axis=0.8, range = 0, xlab="", ylab="", col = "white", 
        border = c(rep("#1F78B4", 21),"darkgrey",rep("#1F78B4", 2), 
                   rep("darkgrey",5), rep("#FF7F00",16)))
abline(h=0.8, lty=2)
abline(h=0.2, lty=2)
mtext(side=3, line =.5, "C. Juveniles of unknown sex", cex=.8)



#PREDICTIONS FROM OTHER MODELS (NOT INCLUDED IN PAPER)
# MODEL 1 # Weight only to check what happens with male RSM1237 (which had large secondary measurements, measured by a volunteer)
#(change predicted4 to predicted1, fix Y axis title)
# Known males
par(mfcol=c(1,1))
maleTest<-subset(Test.df, Test.df$Sex=="M")
boxplot(predicted1 ~ reorder(ID, predicted1, FUN = median), data = maleTest, 
        las=2, cex.axis=0.8, range = 0, xlab="", ylim=c(0,1), ylab="Probability that the bird is female", cex.lab=1)
abline(h=0.8, lty=2)
abline(h=0.2, lty=2)
mtext(side=3, line =.5, "B. Juvenile males validation dataset", cex=0.8)
#RSM1237 has an ambiguous classification in this model.

#MODEL 2 Tail length only to check what happens with male RSM1237
maleTest<-subset(Test.df, Test.df$Sex=="M")
boxplot(predicted2 ~ reorder(ID, predicted2, FUN = median), data = maleTest, 
        las=2, cex.axis=0.8, range = 0, xlab="", ylim=c(0,1), ylab="Probability that the bird is female", cex.lab=1)
abline(h=0.8, lty=2)
abline(h=0.2, lty=2)
mtext(side=3, line =.5, "B. Juvenile males validation dataset", cex=0.8)
#RSM1237 has an ambiguous classification in this model (much closer to male).

#MODEL 3 Wing length only to check what happens with male RSM1237
maleTest<-subset(Test.df, Test.df$Sex=="M")
boxplot(predicted3 ~ reorder(ID, predicted3, FUN = median), data = maleTest, 
        las=2, cex.axis=0.8, range = 0, xlab="", ylim=c(0,1), ylab="Probability that the bird is female", cex.lab=1)
abline(h=0.8, lty=2)
abline(h=0.2, lty=2)
mtext(side=3, line =.5, "B. Juvenile males validation dataset", cex=0.8)
#RSM1237 is classified as a male in this model.

#MODEL 5 Weight + tail to check what happens with male RSM1237
maleTest<-subset(Test.df, Test.df$Sex=="M")
boxplot(predicted5 ~ reorder(ID, predicted5, FUN = median), data = maleTest, 
        las=2, cex.axis=0.8, range = 0, xlab="", ylim=c(0,1), ylab="Probability that the bird is female", cex.lab=1)
abline(h=0.8, lty=2)
abline(h=0.2, lty=2)
mtext(side=3, line =.5, "B. Juvenile males validation dataset", cex=0.8)
#RSM1237 is ambiguous in this model

#MODEL 7 Weight + Wing
maleTest<-subset(Test.df, Test.df$Sex=="M")
boxplot(predicted7 ~ reorder(ID, predicted7, FUN = median), data = maleTest, 
        las=2, cex.axis=0.8, range = 0, xlab="", ylim=c(0,1), ylab="Probability that the bird is female", cex.lab=1)
abline(h=0.8, lty=2)
abline(h=0.2, lty=2)
mtext(side=3, line =.5, "B. Juvenile males validation dataset", cex=0.8)
#RSM1237 is male in this model

#**********************************************************************************************
######    Graph smooth lines of Probability (Female) vs Secondaries length (Fig 4)                          ####
#**********************************************************************************************
dev.off()
plot(out_smooth4[[1]][["Sec"]], out_smooth4[[1]][["predicted4"]], xaxt='n',
     xlab="Length of secondaries (mm)", ylab = "Probability that the bird is female", pch=21, col = "white", bg="white", cex = 0.5, las = 1)
axis(side=1, at=seq(68, 90, by=2))
lines(out_smooth4[[1]][["Sec"]], out_smooth4[[1]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[2]][["Sec"]], out_smooth4[[2]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[3]][["Sec"]], out_smooth4[[3]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[4]][["Sec"]], out_smooth4[[4]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[5]][["Sec"]], out_smooth4[[5]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[6]][["Sec"]], out_smooth4[[6]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[7]][["Sec"]], out_smooth4[[7]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[8]][["Sec"]], out_smooth4[[8]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[9]][["Sec"]], out_smooth4[[9]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[10]][["Sec"]], out_smooth4[[10]][["predicted4"]], col = "darkgrey" )
lines(out_smooth4[[11]][["Sec"]], out_smooth4[[11]][["predicted4"]] , col = "darkgrey")
lines(out_smooth4[[12]][["Sec"]], out_smooth4[[12]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[13]][["Sec"]], out_smooth4[[13]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[14]][["Sec"]], out_smooth4[[14]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[15]][["Sec"]], out_smooth4[[15]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[16]][["Sec"]], out_smooth4[[16]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[17]][["Sec"]], out_smooth4[[17]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[18]][["Sec"]], out_smooth4[[18]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[19]][["Sec"]], out_smooth4[[19]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[20]][["Sec"]], out_smooth4[[20]][["predicted4"]] , col = "darkgrey")
lines(out_smooth4[[21]][["Sec"]], out_smooth4[[21]][["predicted4"]] , col = "darkgrey")
lines(out_smooth4[[22]][["Sec"]], out_smooth4[[22]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[23]][["Sec"]], out_smooth4[[23]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[24]][["Sec"]], out_smooth4[[24]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[25]][["Sec"]], out_smooth4[[25]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[26]][["Sec"]], out_smooth4[[26]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[27]][["Sec"]], out_smooth4[[27]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[28]][["Sec"]], out_smooth4[[28]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[29]][["Sec"]], out_smooth4[[29]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[30]][["Sec"]], out_smooth4[[30]][["predicted4"]], col = "darkgrey" )
lines(out_smooth4[[31]][["Sec"]], out_smooth4[[31]][["predicted4"]], col = "darkgrey" )
lines(out_smooth4[[32]][["Sec"]], out_smooth4[[32]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[33]][["Sec"]], out_smooth4[[33]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[34]][["Sec"]], out_smooth4[[34]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[35]][["Sec"]], out_smooth4[[35]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[36]][["Sec"]], out_smooth4[[36]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[37]][["Sec"]], out_smooth4[[37]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[38]][["Sec"]], out_smooth4[[38]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[39]][["Sec"]], out_smooth4[[39]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[40]][["Sec"]], out_smooth4[[40]][["predicted4"]] , col = "darkgrey")
lines(out_smooth4[[41]][["Sec"]], out_smooth4[[41]][["predicted4"]] , col = "darkgrey")
lines(out_smooth4[[42]][["Sec"]], out_smooth4[[42]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[43]][["Sec"]], out_smooth4[[43]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[44]][["Sec"]], out_smooth4[[44]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[45]][["Sec"]], out_smooth4[[45]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[46]][["Sec"]], out_smooth4[[46]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[47]][["Sec"]], out_smooth4[[47]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[48]][["Sec"]], out_smooth4[[48]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[49]][["Sec"]], out_smooth4[[49]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[50]][["Sec"]], out_smooth4[[50]][["predicted4"]] , col = "darkgrey")
lines(out_smooth4[[51]][["Sec"]], out_smooth4[[51]][["predicted4"]] , col = "darkgrey")
lines(out_smooth4[[52]][["Sec"]], out_smooth4[[52]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[53]][["Sec"]], out_smooth4[[53]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[54]][["Sec"]], out_smooth4[[54]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[55]][["Sec"]], out_smooth4[[55]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[56]][["Sec"]], out_smooth4[[56]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[57]][["Sec"]], out_smooth4[[57]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[58]][["Sec"]], out_smooth4[[58]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[59]][["Sec"]], out_smooth4[[59]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[60]][["Sec"]], out_smooth4[[60]][["predicted4"]] , col = "darkgrey")
lines(out_smooth4[[61]][["Sec"]], out_smooth4[[61]][["predicted4"]] , col = "darkgrey")
lines(out_smooth4[[62]][["Sec"]], out_smooth4[[62]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[63]][["Sec"]], out_smooth4[[63]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[64]][["Sec"]], out_smooth4[[64]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[65]][["Sec"]], out_smooth4[[65]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[66]][["Sec"]], out_smooth4[[66]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[67]][["Sec"]], out_smooth4[[67]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[68]][["Sec"]], out_smooth4[[68]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[69]][["Sec"]], out_smooth4[[69]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[70]][["Sec"]], out_smooth4[[70]][["predicted4"]] , col = "darkgrey")
lines(out_smooth4[[71]][["Sec"]], out_smooth4[[71]][["predicted4"]] , col = "darkgrey")
lines(out_smooth4[[72]][["Sec"]], out_smooth4[[72]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[73]][["Sec"]], out_smooth4[[73]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[74]][["Sec"]], out_smooth4[[74]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[75]][["Sec"]], out_smooth4[[75]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[76]][["Sec"]], out_smooth4[[76]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[77]][["Sec"]], out_smooth4[[77]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[78]][["Sec"]], out_smooth4[[78]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[79]][["Sec"]], out_smooth4[[79]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[80]][["Sec"]], out_smooth4[[80]][["predicted4"]] , col = "darkgrey")
lines(out_smooth4[[81]][["Sec"]], out_smooth4[[81]][["predicted4"]] , col = "darkgrey")
lines(out_smooth4[[82]][["Sec"]], out_smooth4[[82]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[83]][["Sec"]], out_smooth4[[83]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[84]][["Sec"]], out_smooth4[[84]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[85]][["Sec"]], out_smooth4[[85]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[86]][["Sec"]], out_smooth4[[86]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[87]][["Sec"]], out_smooth4[[87]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[88]][["Sec"]], out_smooth4[[88]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[89]][["Sec"]], out_smooth4[[89]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[90]][["Sec"]], out_smooth4[[90]][["predicted4"]] , col = "darkgrey")
lines(out_smooth4[[91]][["Sec"]], out_smooth4[[91]][["predicted4"]] , col = "darkgrey")
lines(out_smooth4[[92]][["Sec"]], out_smooth4[[92]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[93]][["Sec"]], out_smooth4[[93]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[94]][["Sec"]], out_smooth4[[94]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[95]][["Sec"]], out_smooth4[[95]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[96]][["Sec"]], out_smooth4[[96]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[97]][["Sec"]], out_smooth4[[97]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[98]][["Sec"]], out_smooth4[[98]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[99]][["Sec"]], out_smooth4[[99]][["predicted4"]], col = "darkgrey")
lines(out_smooth4[[100]][["Sec"]], out_smooth4[[100]][["predicted4"]], col = "darkgrey")

#**********************************************************************************************
######    Reviewer question 1: Add quantitative repeatability measures to show whether measurements are repeatable within individuals (Table S1) #####
#                                    (Requested by reviewer)
#**********************************************************************************************

#dataset to use is capo, before aggregating by ID
#need to include only the individuals that were measured more than once in the same age class
# Mixed model: measurement ~ 1 + 1|Bird_ID
#among individual variance Va = Random Effect variance
#within individual variance Vw = Residual variance
#ICC = Va/(Va + Vw)

#do juveniles only
capoj<-capo
capoj<-subset(capoj, capoj$Age=="J")
#tarso
capojt<-subset(capoj, capoj$Tarso_completo!="NA")
tab1t<-table(capojt$ID)
df1t<-as.data.frame(tab1t)
df2t<-subset(df1t, df1t$Freq>1)
capo2t<-filter(capojt, capojt$ID %in% df2t$Var1)
TarsusICC<-glmmTMB(Tarso_completo ~ 1 + (1 | ID), data = capo2t)
summary(TarsusICC)
0.5998/(0.5998+0.2318)
#ICC 0.7212602

#secondaries
capojs<-subset(capoj, capoj$Sec!="NA")
tab1s<-table(capojs$ID)
df1s<-as.data.frame(tab1s)
df2s<-subset(df1s, df1s$Freq>1)
capo2s<-filter(capojs, capojs$ID %in% df2s$Var1)
SecondariesICC<-glmmTMB(Sec ~ 1 + (1 | ID), data = capo2s)
summary(SecondariesICC)
19.988/(19.988+1.723)
#ICC 0.9206393

#Weight
capojw<-subset(capoj, capoj$Wt!="NA")
tab1w<-table(capojw$ID)
df1w<-as.data.frame(tab1w)
df2w<-subset(df1w, df1w$Freq>1)
capo2w<-filter(capojw, capojw$ID %in% df2w$Var1)
WtICC<-glmmTMB(Wt ~ 1 + (1 | ID), data = capo2w)
summary(WtICC)
29.30/(29.30+12.29)
#ICC 0.704

#Wing chord
capojg<-subset(capoj, capoj$Wing!="NA")
tab1g<-table(capojg$ID)
df1g<-as.data.frame(tab1g)
df2g<-subset(df1g, df1g$Freq>1)
capo2g<-filter(capojg, capojg$ID %in% df2g$Var1)
WingICC<-glmmTMB(Wing ~ 1 + (1 | ID), data = capo2g)
summary(WingICC)
26.023/(26.023+4.098)
#ICC 0.8639487

#Tail
capojl<-subset(capoj, capoj$Tail!="NA")
tab1l<-table(capojl$ID)
df1l<-as.data.frame(tab1l)
df2l<-subset(df1l, df1l$Freq>1)
capo2l<-filter(capojl, capojl$ID %in% df2l$Var1)
TailICC<-glmmTMB(Tail ~ 1 + (1 | ID), data = capo2l)
summary(TailICC)
14.953/(14.953+7.632)
#ICC 0.662

#Total Length
capojtl<-subset(capoj, capoj$Largo!="NA")
tab1tl<-table(capojtl$ID)
df1tl<-as.data.frame(tab1tl)
df2tl<-subset(df1tl, df1tl$Freq>1)
capo2tl<-filter(capojtl, capojtl$ID %in% df2tl$Var1)
Total_lengthICC<-glmmTMB(Largo ~ 1 + (1 | ID), data = capo2tl)
summary(Total_lengthICC)
51.726/(51.726+9.005)
#ICC 0.852


#******************************************************************************************************
######    Reviewer question 2: Does dimorphism (of juveniles) in any of the traits increase with body size? (Fig S3) #####
#******************************************************************************************************

par(mfrow=c(1,3))
# Now use these colors: "#FF7F00", F juv;  "#1F78B4" M juv
capojw<-subset(capoj, capoj$Wt!="NA")
capojws<-subset(capojw, capojw$Sec!="NA")
plot (capojws$Wt, capojws$Sec, las=1, col = ifelse(capojws$Sexo_final=="M", "#1F78B4", "#FF7F00"),
      xlab="Weight (g)", ylab = "Length of secondaries (mm)")
lm1<-lm(Sec ~ Wt*Sexo_final, data = capojws)
summary(lm1)
prM<-data.frame(Wt=seq(min(capojws$Wt[capojws$Sexo_final=="M"]),max(capojws$Wt[capojws$Sexo_final=="M"]),0.2))
prM$Sexo_final<-"M"
prM$fitM<-predict.lm(lm1, newdata=prM, type= "response")
lines(prM$Wt, prM$fitM, col="#1F78B4")
prF<-data.frame(Wt=seq(min(capojws$Wt[capojws$Sexo_final=="H"]),max(capojws$Wt[capojws$Sexo_final=="H"]),0.2))
prF$Sexo_final<-"H"
prF$fitF<-predict.lm(lm1, newdata=prF, type= "response")
lines(prF$Wt, prF$fitF, col="#FF7F00")

capojww<-subset(capojw, capojw$Wing!="NA")
plot (capojww$Wt, capojww$Wing, las=1, col = ifelse(capojww$Sexo_final=="M", "#1F78B4", "#FF7F00"),
      xlab="Weight (g)", ylab = "Wing chord (mm)")
lm2<-lm(Wing ~ Wt*Sexo_final, data = capojww)
summary(lm2)
prM2<-data.frame(Wt=seq(min(capojww$Wt[capojww$Sexo_final=="M"]),max(capojww$Wt[capojww$Sexo_final=="M"]),0.2))
prM2$Sexo_final<-"M"
prM2$fitM<-predict.lm(lm2, newdata=prM2, type= "response")
lines(prM2$Wt, prM2$fitM, col="#1F78B4")
prF2<-data.frame(Wt=seq(min(capojww$Wt[capojww$Sexo_final=="H"]),max(capojww$Wt[capojww$Sexo_final=="H"]),0.2))
prF2$Sexo_final<-"H"
prF2$fitF<-predict.lm(lm2, newdata=prF2, type= "response")
lines(prF2$Wt, prF2$fitF, col="#FF7F00")

capojwt<-subset(capojw, capojw$Tail!="NA")
plot (capojwt$Wt, capojwt$Tail, las=1, col = ifelse(capojwt$Sexo_final=="M", "#1F78B4", "#FF7F00"),
      xlab="Weight (g)", ylab = "Tail (mm)")
lm3<-lm(Tail ~ Wt*Sexo_final, data = capojwt)
summary(lm3)
prM3<-data.frame(Wt=seq(min(capojwt$Wt[capojwt$Sexo_final=="M"]),max(capojwt$Wt[capojwt$Sexo_final=="M"]),0.2))
prM3$Sexo_final<-"M"
prM3$fitM<-predict.lm(lm3, newdata=prM3, type= "response")
lines(prM3$Wt, prM3$fitM, col="#1F78B4")
prF3<-data.frame(Wt=seq(min(capojwt$Wt[capojwt$Sexo_final=="H"]),max(capojwt$Wt[capojwt$Sexo_final=="H"]),0.2))
prF3$Sexo_final<-"H"
prF3$fitF<-predict.lm(lm3, newdata=prF3, type= "response")
lines(prF3$Wt, prF3$fitF, col="#FF7F00")

#****************************************************************************************************************
######    Reviewer question 3: histogram captures by month and year; intra-annual variation in adult weight (Supplementary Figures S9 and S4) #####
#****************************************************************************************************************


months_dat$YearMonth<-paste(months_dat$Year, months_dat$Month)
tabb<-table(months_dat$YearMonth)
tab.df<-as.data.frame(tabb)
colnames(tab.df)<-c("YearMonth", "Freq")

all.months<-data.frame(Year = c(rep(2016,12),rep(2017,12),rep(2018,12), rep(2019,12), rep(2020,12), rep(2021,12),
                                rep(2022,12), rep(2023,12), rep(2024,12), rep(2025,12)))
all.months$Month<-seq(1,12,1)
all.months$YearMonth<-paste(all.months$Year, all.months$Month)
joined<-all.months %>% left_join(y=tab.df, by=c("YearMonth"))
joined$Freq[is.na(joined$Freq)] <- 0
joined$Date = as.Date(paste(joined$Year, joined$Month, 01), "%Y %m %d")

par(mfrow=c(1,1))

joined$recruit<-ifelse(joined$Month==12,1,
                       ifelse(joined$Month==1,1,0))

barplot(height=joined$Freq, names=joined$Date, las=1, ylab= "Number of captures", col = ifelse(joined$recruit==1,"red","grey"), 
        xaxt='n', ylim=c(0,40))
mtext("2016             2017             2018            2019            2020            2021            2022            2023            2024            2025"  , side=1)
#stretch the graphics window to make the labels fit

#Intra-annual variation in adult male (and female) weight. Are adult males lighter during the lekking season? Are females heavier?
males_dat<-subset(months_dat, months_dat$Sexo_final == "M")
males_dat<-subset(males_dat, males_dat$Edad_final=="A")
males_dat<-subset(males_dat, males_dat$Peso!="NA")

wilcox.test(males_dat$Peso[males_dat$Lek_season==1], males_dat$Peso[males_dat$Lek_season==0])

males_dat$Lek_season<-as.factor(males_dat$Lek_season)

ggplot(males_dat, aes(x = Lek_season, y = Peso)) +
  geom_violin(fill="#A6CEE3") +
  geom_dotplot(binaxis= "y", stackdir = "center", dotsize = 0.2, fill = 1) +
  labs(y = "Weight (g)", x = "Season") +
  theme(aspect.ratio=3/3,panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  theme(legend.position = "none")

females_dat<-subset(months_dat, months_dat$Sexo_final == "H")
females_dat<-subset(females_dat, females_dat$Edad_final=="A")
females_dat<-subset(females_dat, females_dat$Peso!="NA")
females_dat$Lek_season<-as.factor(females_dat$Lek_season)
ggplot(females_dat, aes(x = Lek_season, y = Peso)) +
  geom_violin(fill="#FDBF6F") +
  geom_dotplot(binaxis= "y", stackdir = "center", dotsize = 0.2, fill = 1) +
  labs(y = "Weight (g)", x = "Season") +
  theme(aspect.ratio=3/3,panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  theme(legend.position = "none")

Adults_dat<-subset(months_dat, months_dat$Edad_final == "A")
Adults_dat<-subset(Adults_dat, Adults_dat$Peso!="NA")
Adults_dat$Lek_season<-as.factor(Adults_dat$Lek_season)
Adults_dat$Lek_Sex<-paste(Adults_dat$Lek_season, Adults_dat$Sexo_final)

dev.off()
ggplot(Adults_dat, aes(x = Lek_Sex, y = Peso, fill = Sexo_final)) +
  scale_fill_manual(values=c("#FDBF6F", "#A6CEE3", "#FDBF6F", "#A6CEE3")) +
  geom_violin() +
  geom_dotplot(binaxis= "y", stackdir = "center", dotsize = 0.15, fill = 1) +
  labs(y = "Weight (g)", x = "Season") +
  scale_x_discrete(breaks=c("0 H","0 M", "1 H", "1 M"),
                   labels=c("", "", "", "")) +
  theme(aspect.ratio=3/3,panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  theme(legend.position = "none")+
  theme(axis.ticks.x = element_blank())


