clear
set mem 400m
use Gravity_Replication.dta


capture log close
log using GravityPower_Replication.log, replace

/* This do-file replicates analyses for the article:

Hegre, Hvard, 2008. 'Gravitating toward War. Preponderance May Pacify, but Power Kills'. 
	Journal of Conflict Resolution 52(4): 566-589

The dataset is generated based on a replication dataset for John R. Oneal and Bruce Russett, 2005 'Rule of Three, Let It Be: When More 
Really Is Better', Conflict Management and Peace Science, 22(4)293-310. Thanks to John Oneal for sharing data for some variables
not contained in their replication dataset.
*/


set more off


sort year statea
capture drop Nt 
capture drop lnNt
by year statea: egen Nt= count(stateb) if statea==2
replace Nt = Nt + 1
replace Nt = Nt[_n-1] if statea!=2 & year==year[_n-1]
gen lnNt = ln(Nt)
summ lnNt
replace lnNt = lnNt - 3.583519
replace lnNt = 0 if dircont == 1


/* Transforming variables */
gen py8 = 2^(-py/8)
gen py1 = 2^(-py)

* gen onemajor=majpower


/* Weak link measures */
gen ln_sml_depend = ln(smldep + 1e-07)

/* Generating trade, gdp, population variables. Index i for the country with the larger gdp, j for the country with the smaller */
capture drop lngdp*
capture drop trade*
capture drop lntrade*
capture drop lnpop*
capture drop lnigo*
capture drop demi demj


gen lngdpa = ln(rgdp96pca*tpop_a)
gen lngdpb = ln(rgdp96pcb*tpop_b)
gen lnpopa = ln(tpop_a)
gen lnpopb = ln(tpop_b)

gen gdpb = rgdp96pcb*tpop_b

gen tradea = (dependa*rgdp96pca*tpop_a/100) + 0.2
gen tradeb = (dependb*rgdp96pcb*tpop_b/100) + 0.2

gen lntrade = lnrtrade

/* CALCULATE THE NEW WEAK LINK MEASURES */
capture drop a_largecnt
gen a_largecnt = .


/* Defined in terms of population */
replace a_largecnt = 1 if lnpopa >= lnpopb & lnpopa!=.
replace a_largecnt = 0 if lnpopb > lnpopa & lnpopb!=.


gen lngdpi = lngdpa if a_largecnt==1
replace lngdpi = lngdpb if a_largecnt==0
gen lngdpj = lngdpb if a_largecnt==1
replace lngdpj = lngdpa if a_largecnt==0

gen lnpopi = ln(tpop_a) if a_largecnt==1
replace lnpopi =ln(tpop_b) if a_largecnt==0
gen lnpopj = ln(tpop_a) if a_largecnt==0
replace lnpopj = ln(tpop_b) if a_largecnt==1

gen lninci = lngdpi-lnpopi
gen lnincj = lngdpj-lnpopj


gen demi= polity_a if a_largecnt==1
replace demi = polity_b if a_largecnt==0
gen demj = polity_b if a_largecnt==1
replace demj = polity_a if a_largecnt==0

capture drop demij
gen demij = demi*demj



capture drop dyadid cyear

gen cyear = year - 1975

summarize dircont if year == 2001
replace dircont=1 if year > 2000 & dircont==0 & dircont[_n-1]==1 & statea==statea[_n-1] & stateb==stateb[_n-1]
summarize dircont if year == 2001


/* Decomposing size */
capture drop lncapi lncapj
gen lncapi = ln(cap_a) if a_largecnt==1
replace lncapi = ln(cap_b) if a_largecnt==0
gen lncapj = ln(cap_b) if a_largecnt==1
replace lncapj = ln(cap_a) if a_largecnt==0
gen lntotcap = lncapi + lncapj
gen lnsumcap = ln(cap_a + cap_b)



capture drop lnmili lnmilj


gen lnmili = lncapi-lnpopi
gen lnmilj = lncapj-lnpopj

corr lnpopi lnpopj lncapi lncapj lnmili lnmilj

capture drop lnasym*
gen lnasymi = lncapi-lncapj
gen lnasym2 = lnasymi^2


/* Cases to include in analysis */
capture drop include
gen include = 0
replace include = 1 if lnpopi !=. & lnpopj !=. & lncapi !=. & lncapj !=. ///
& demi  !=. & demj !=. & lndstab !=. & dircont !=.  & py1 !=. & lnNt !=. & ///
year <= 2001 & allies  !=. 


/* Relationship between variable names in paper and names in do file 

Name in paper | Name in do-file | Description
---------------------------------------------------
lnP_i		  | lnpopi		  | Population, most capable country
lnP_j		  | lnpopj		  | Population, least capable country
lnM_i		  | lncapi		  | Military capabilities, most capable country
lnM_j		  | lncapj		  | Military capabilities, least capable country
lnm_i		  | lnmili		  | Capabilities per capita, most capable country
lnm_j		  | lnmilj		  | Capabilities per capita, least capable country
Democracy_i	  | demi		  | Polity index, most capable country
Democracy_j	  | demj		  | Polity index, least capable country
Democracy_ij  | demij		  | Product of two democracy scores
Cij		  | dircont		  | Direct contiguity between countries in dyad
lnDij		  | lndstab		  | log distance in km between countries in dyad
lnNt		  | systsize	  | System size correction
Brevity	  | py1		  | brevity of peace, decay function of time since war in dyad

*/

/* Gravity models */

/* Table 1*/
reg lncapi lnpopi if year == 2000 & stateb == 986



/* Table 2 */
summ lnpopi lnpopj lncapi lncapj lnmili lnmilj lntotcap lnsumcap lnasymi if include == 1


/*Table 3, column A */
logit mzfatal1 demi demj demij dircont lndstab lnNt py1 if include == 1 & lngdpi !=. & lngdpj !=., cluster(dyadid)

/* Table 3, column B */
logit mzfatal1 lncapi lncapj demi demj demij dircont lndstab lnNt py1 if include == 1 & lngdpi !=. & lngdpj !=., cluster(dyadid)

    /* Input to discussion re col. B */
    /* Remove Contiguity and distance */
    logit mzfatal1 demi demj demij lnNt py1 if lnmili!=. & lnmilj!=. & lnpopi!=. & lnpopj!=. & lngdpi!=. & lngdpj!=., cluster (dyadid)

    /* Remove democracy */
    logit mzfatal1 dircont lndstab lnNt py1 if lnmili!=. & lnmilj!=. & lnpopi!=. & lnpopj!=. & lngdpi!=. & lngdpj!=. & demi!=. & demj !=., cluster (dyadid)

/* Table 3, column C */
logit mzcowwar1 lncapi lncapj demi demj demij dircont lndstab lnNt py1 if include == 1 & lngdpi !=. & lngdpj !=., cluster(dyadid)

/* Table 3, column D */
logit mzfatal1 lnpopi lnpopj lnmili lnmilj demi demj demij dircont lndstab lnNt py1 if include == 1 & lngdpi !=. & lngdpj !=., cluster(dyadid)

/* Table 3, column E */
logit mzcowwar1 lnpopi lnpopj lnmili lnmilj demi demj demij dircont lndstab lnNt py1 if include == 1 & lngdpi !=. & lngdpj !=., cluster(dyadid)

/* Note: Analysis is restricted to dyads with data for GDP since the paper at some point
	decomposes miltiary capabilities into what is explained by GDP and other factors. 
	Replicating the analysis including dyads without data for GDP yield very similar results: */

/*Table 3, column A */
logit mzfatal1 demi demj demij dircont lndstab lnNt py1 if include == 1 , cluster(dyadid)

/* Table 3, column B */
logit mzfatal1 lncapi lncapj demi demj demij dircont lndstab lnNt py1 if include == 1 , cluster(dyadid)

/* Table 3, column C */
logit mzcowwar1 lncapi lncapj demi demj demij dircont lndstab lnNt py1 if include == 1 , cluster(dyadid)

/* Table 3, column D */
logit mzfatal1 lnpopi lnpopj lnmili lnmilj demi demj demij dircont lndstab lnNt py1 if include == 1 , cluster(dyadid)

/* Table 3, column E */
logit mzcowwar1 lnpopi lnpopj lnmili lnmilj demi demj demij dircont lndstab lnNt py1 if include == 1 , cluster(dyadid)





/* Table 4: Split-sample analysis */
/* Table 4 randomly draws 20 splits of dataset and generates predictions as detailed in Section 5.5. 
since the draws are random the replication results differ somewhat from the results reported in Table 4 */
/* Averaging over 20 seeds */
capture drop include2
gen include2 = 0
replace include2 = 1 if lnmili!=. & lnmilj!=. & lnpopi!=. & lnpopj!=. & lngdpi!=. & lngdpj!=. & py1 !=. & mzfatal1 !=. & demi !=. & demj !=.

forvalues i = 1001(1)1020 {
    display `i'
    capture drop sampler
    capture drop dyad_sampler
    capture drop estsample
    capture drop ds_median
    set seed `i'
    gen sampler = uniform() if include2 == 1
    sort dyadid
    by dyadid: egen dyad_sampler = mean(sampler)
    gen estsample = 0
    sum dyad_sampler
    egen ds_median = median(dyad_sampler) if include2 == 1
    replace estsample = 1 if dyad_sampler > ds_median
    capture drop tradprob
    capture drop tradpr* 

    quietly logit mzfatal1 demi demj demij dircont lndstab lnNt py1 if estsample == 1 & include2 == 1, cluster(dyadid) nolog
    predict tradprob, p
    gen tradpred = 0 if tradprob >=0  & include2 == 1
    replace tradpred = 1 if tradprob >= .50 & include2 == 1
    gen tradpred25 = 0 if tradprob >=0 & include2 == 1
    replace tradpred25 = 1 if tradprob >= .25 & include2 == 1
    gen tradpred10 = 0 if tradprob >=0 & include2 == 1
    replace tradpred10 = 1 if tradprob >= .10 & include2 == 1

    tab mzfatal1 tradpred if estsample == 0, row col
    tab mzfatal1 tradpred25 if estsample == 0, row col
    tab mzfatal1 tradpred10 if estsample == 0, row col

    quietly logit mzfatal1 lnpopi lnpopj lnmili lnmilj demi demj demij dircont lndstab lnNt py1 if estsample == 1 & include2 == 1, cluster(dyadid) nolog
    capture drop gravpr*
    predict gravprob, p
    gen gravpred = 0 if gravprob >=0 & include2 == 1
    replace gravpred = 1 if gravprob >= .50 & include2 == 1
    gen gravpred25 = 0 if gravprob >=0 & include2 == 1
    replace gravpred25 = 1 if gravprob >= .25 & include2 == 1
    gen gravpred10 = 0 if gravprob >=0 & include2 == 1
    replace gravpred10 = 1 if gravprob >= .10 & include2 == 1

    tab mzfatal1 gravpred if estsample == 0 & include2 == 1, row col
    tab mzfatal1 gravpred25 if estsample == 0 & include2 == 1, row col
    tab mzfatal1 gravpred10 if estsample == 0 & include2 == 1, row col
} /* end forvalues split */


/*Table 5, column A */
logit mzfatal1 lnasymi demi demj demij dircont lndstab lnNt py1 if include == 1 &  lngdpi !=. & lngdpj !=., cluster (dyadid)

/*Table 5, column B */
logit mzfatal1 lnasymi lncapi demi demj demij dircont lndstab lnNt py1 if include == 1 &  lngdpi !=. & lngdpj !=., cluster (dyadid)

/*Table 5, column C */
logit mzfatal1 lnasymi lntotcap demi demj demij dircont lndstab lnNt py1 if include == 1 &  lngdpi !=. & lngdpj !=., cluster (dyadid)

/* Table 5, column D */
logit mzfatal1 lnasymi lnsumcap demi demj demij dircont lndstab lnNt py1 if include == 1 &  lngdpi !=. & lngdpj !=., cluster (dyadid)

/* Table 5, column E */
logit mzfatal1 lnasymi lnasym2 lnsumcap demi demj demij dircont lndstab lnNt py1 if include == 1 &  lngdpi !=. & lngdpj !=., cluster (dyadid)

