Module gegravity.MonteCarloGE

Expand source code
__Author__ = "Peter Herman"
__Project__ = "gegravity"
__Created__ = "May 05, 2020"
__Description__ = '''A method for Generating Monte Carlo GE gegravity using the distributions of parameter estimates from 
the empirical model '''
__all__ = ['MonteCarloGE']

import numpy as np
import pandas as pd
from pandas import DataFrame
from gme.estimate.EstimationModel import EstimationModel
from .OneSectorGE import OneSectorGE, CostCoeffs, _GEMetaData
from .BaselineData import BaselineData
from scipy.stats import multivariate_normal
from statsmodels.genmod.generalized_linear_model import GLMResultsWrapper
from typing import List

class MonteCarloGE(object):
    def __init__(self,
                 baseline: BaselineData,
                 year:str,
                 trials: int,
                 sigma: float,
                 reference_importer: str,
                 cost_variables: list,
                 cost_coeff_values: CostCoeffs,
                 expend_var_name: str = None,
                 output_var_name: str = None,
                 mc_variables: list = None,
                 seed: int = 0,
                 allow_singular_covar:bool = False):
        '''
        Define a Monte Carlo GE model.
        Args:
            baseline (BaselineData): Baseline data defined using the gegravity BaselineData class structure.
            year (str): The year to be used for the baseline model. Works best if estimation_model year column has been
                cast as string.
            trials (int): The number of trial simulations to conduct.
            expend_var_name (str): Column name of variable containing expenditure data in estimation_model.
            output_var_name (str): Column name of variable containing output data in estimation_model.
            sigma (float): Elasticity of substitution.
            reference_importer (str): Identifier for the country to be used as the reference importer (inward
                multilateral resistance normalized to 1 and other multilateral resistances solved relative to it).
            cost_variables (List[str]): (optional) A list of variables to use to compute bilateral trade costs. By
                default, all included non-fixed effect variables are used.
            mc_variables (List[str]): (optional) A subset of the cost_variables to randomly sample in the Monte Carlo
                experiment. Coefficients for the variables in this list are randomly drawn based on their estimated mean
                 and variance/covariance. Those excluded use their gravity estimated values only. By default, the model
                uses all cost variables (or those supplied to cost_variables argument) are
            cost_coeff_values (CostCoeffs): (optional) A set of parameter values or estimates to use for constructing
                trade costs. Should be of type gegravity.CostCoeffs, statsmodels.GLMResultsWrapper, or
                gme.SlimResults. If no values are provided, the estimates in the EstimationModel are used.
            seed (int): (optional) The seed to use for the random draws of cost coefficients in order to provide
                unchanging, consistent draws across runs. By default, the seed is randomly determined each time the
                model is constructed.
            allow_singular_covar (bool): If true, allow the covariance matrix to be singular when drawing coefficient
                values from multivariate normal distribution. Default is False.

        Attributes:
            baseline_data (pandas.DataFrame): Baseline data supplied to model in gme.EstimationModel.
            coeff_sample (pandas.DataFrame): The randomly drawn sample of cost coefficients for each cost variable. Each
                column corresponds to a different trial.
            main_coeffs (pandas.Series): The main coefficient estimates for the cost varaibles supplied to the model.
            main_stderrs (pandas.Series): The standard errors for the main cost coefficients.
            trials (int): The number of trial simulations of the model.
            sample_stats (pandas.DataFrame): A dataframe depicting both the initially supplied estimate values for each
                cost variable ('beta_estimate' and 'stderr_estimate') as well as descriptive statistics for the randomly
                drawn values across all trials.
            sigma (int): The elasticity of substitution parameter value.
            ---
            **Attributes containing results populated after MonteCarloGE.run_trials()**:\n\n
            aggregate_trade_results (Pandas.DataFrame): Aggregate trade results summarized across all trials. See
                OneSectorGE ResultsLabels for description of results.
            bilateral_costs (Pandas.DataFrame): Baseline and counterfactual bilateral trade costs summarized across all
                trials. See OneSectorGE ResultsLabels for description of results.
            bilateral_trade_results (Pandas.DataFrame): Bilateral trade results summarized across all trials.
                See OneSectorGE ResultsLabels for description of results.
            country_mr_terms (Pandas.DataFrame): Baseline and experiment multilateral resistance terms summarized
                across all trials. See OneSectorGE ResultsLabels for description of results.
            country_results (Pandas.DataFrame): The main country level results summarized across
                all trials. See OneSectorGE ResultsLabels for description of results.
            factory_gate_prices (Pandas.DataFrame): Factory gate prices summarized across all trials. See OneSectorGE
                ResultsLabels for description of results.
            num_failed_trials (int): The number of trials for which the model failed to solve.
            replacement_sample (Padas.DataFrame): If run_trials options set to True, the vectors of additional parameter
                values are stored in this attribute.
            outputs_expenditures (Pandas.DataFrame): Baseline and experiment output and expenditure values summarized
                across all trials. See OneSectorGE ResultsLabels for description of results.
            solver_diagnostics (dict): A dictionary containing diagnostic information for each individual trial. The
                solver diagnostics for each trial correspond to the three solution routines: baseline
                multilateral resistances, conditional multilateral resistances (partial equilibrium counterfactual
                effects) and the full GE model. See the diagnostic info from scipy.optimize.root for more details.
            ---
            **Additional results populated if MonteCarloGE.run_trials(all_results=True)**\n\n
            all_aggregate_trade_results (Pandas.DataFrame): All aggregate trade results for each individual trial.
                Columns are multi-indexed by the trial number and type of result. See OneSectorGE ResultsLabels
                for description of results.
            all_bilateral_costs (Pandas.DataFrame): Bilateral trade costs for each individual trial. Columns are
                multi-indexed by the trial number and type of result. See OneSectorGE ResultsLabels for description of
                results.
            all_bilateral_trade_results (Pandas.DataFrame): Bilateral trade results for all individual trials.
                Columns are multi-indexed by the trial number and type of result.
                See OneSectorGE ResultsLabels for description of results.
            all_country_mr_terms (Pandas.DataFrame): All baseline and experiment multilateral resistance terms for each
                individual trial. Columns are multi-indexed by the trial number and type of result. See OneSectorGE
                ResultsLabels for description of results.
            all_country_results (Pandas.DataFrame): Main results for all individual trials.
                Columns are multi-indexed by the trial number and type of result.
                See OneSectorGE ResultsLabels for description of results.
            all_factory_gate_prices (Pandas.DataFrame): Factory gate prices for each individual trial. Columns are
                multi-indexed by the trial number and type of result. See OneSectorGE ResultsLabels for description of
                results.
            all_outputs_expenditures (Pandas.DataFrame): Baseline and experiment output and expenditure values for each
                individual trial. Columns are multi-indexed by the trial number and type of result. See OneSectorGE
                ResultsLabels for description of results.
            trial_models (gegravity.OneSectorGE): A list of OneSectorGE models corresponding to each trial.

        Examples:

            Create a MonteCarloGE model instance using BaselineData and CostCoeff instances (see respective APIs). Model
            will solve 10 randomly drawn trials.
            >>> baseline = ge.BaselineData(...)
            >>> cost_params = ge.CostCoeffs(...)
            >>> mc_model = ge.MonteCarloGE(
            ...            baseline,
            ...            year = '2006',
            ...            trials = 10,
            ...            reference_importer='DEU',
            ...            sigma=7,
            ...            cost_variables=['lndist', 'contiguity', 'common_language', 'pta', 'international', 'constant'],
            ...            cost_coeff_values=cost_params,
            ...            seed = 1)
        '''


        # Store some inputs in model object
        self._baseline = baseline

        # Determine whether to use expend_var_name and/or output_var_name from MonteCarlo inputs or Baseline meta data.
        # If both are supplied, the values given in MonteCarlo definition are used.
        if expend_var_name != None:
            use_expend_var_name = expend_var_name
        else:
            use_expend_var_name = self._baseline.meta_data.expend_var_name
        if output_var_name != None:
            use_output_var_name = output_var_name
        else:
            use_output_var_name = self._baseline.meta_data.output_var_name
        if use_output_var_name is None:
            raise ValueError("Must supply a variable name for the column containing output values.")
        if use_expend_var_name is None:
            raise ValueError("Must supply a variable name for the column containing output values.")


        self.meta_data = _GEMetaData(baseline.meta_data, use_expend_var_name, use_output_var_name)
        self._year = str(year)
        self.sigma = sigma
        self._reference_importer = reference_importer
        self._cost_variables = cost_variables
        self._cost_coeffs = cost_coeff_values
        self._allow_singular = allow_singular_covar
        if mc_variables is None:
            self._mc_variables = self._cost_variables
        else:
            self._mc_variables = mc_variables
        if seed is None:
            self._seed = np.random.randint(0,10000)
        else:
            self._seed = seed

        # Define Parameter values
        self.main_coeffs = self._cost_coeffs.params
        self.main_stderrs = self._cost_coeffs.bse
        self.main_covar_mat = self._cost_coeffs.covar
        self.trials = trials

        # Generate Sampling Distribution
        self.coeff_sample = self._draw_mc_trade_costs()

        ##
        # Define Results attributes
        ##
        self.num_failed_trials = None
        self.replacement_sample = None
        self.all_country_results = None
        self.country_results = None
        self.all_country_mr_terms = None
        self.country_mr_terms = None
        self.all_outputs_expenditures = None
        self.outputs_expenditures = None
        self.all_factory_gate_prices = None
        self.factory_gate_prices = None
        self.all_aggregate_trade_results = None
        self.aggregate_trade_results = None
        self.all_bilateral_trade_results = None
        self.bilateral_trade_results = None
        self.all_bilateral_costs = None
        self.bilateral_costs = None
        self.solver_diagnostics = None
        self.trial_models = None


        # prep baseline data
        _baseline_data = self._baseline.baseline_data.copy()
        _baseline_data[self.meta_data.year_var_name] = _baseline_data[self.meta_data.year_var_name].astype(str)
        self.baseline_data = _baseline_data.loc[_baseline_data[self.meta_data.year_var_name] == self._year, :].copy()
        if self.baseline_data.shape[0] == 0:
            raise ValueError("There are no observations corresponding to the supplied 'year'")

        # Create summary of sample distribution
        sample_stats = self.coeff_sample.copy()
        sample_stats.set_index(self._cost_coeffs._identifier_col, inplace= True)
        sample_stats = sample_stats.T.describe().T

        # sample_stats = self.coeff_sample.T.describe().T
        new_col_names = ['sample_{}'.format(col) for col in sample_stats]
        sample_stats.columns = new_col_names
        main_cost_ests = pd.DataFrame({'beta_estimate': self.main_coeffs[self._mc_variables],
                                       'stderr_estimate': self.main_stderrs[self._mc_variables]})
        self.sample_stats = pd.concat([main_cost_ests, sample_stats], axis=1)

    def _draw_mc_trade_costs(self):
        '''
        Draw coefficient values from multivariate normal distribution. For Poisson MLE,
        B-hat ~ Normal(B, (X'WX)^{-1}) where (X'WX)^{-1} is the covariance matrix. See An Introduction to Generalized
        Linear Models (2nd Ed) Annette J. Dobson, Chapman & Hall/CRC, Boca Raton Florida, Section 5.4.

        Returns: A dataframe of random draws of coefficients. Rows are cost variables from self._mc_variables, columns
            are different draws with the exception of a column with corresponding cost variable names ('index')
        '''
        print('Deriving sample of cost parameters...')
        # Get results and check that all needed info is available (i.e. covariance matrix in estimation model)
        betas = self.main_coeffs
        cov = self.main_covar_mat
        # Define distribution of beta parameters
        distribution_alt = multivariate_normal(betas, cov, seed=self._seed, allow_singular=self._allow_singular)
        draws = list()
        for i in range(self.trials):
            draws.append(pd.Series(distribution_alt.rvs()))
        all_draws = pd.concat(draws, axis=1)
        all_draws.index = betas.index
        all_draws = all_draws.loc[self._mc_variables,:]
        return all_draws.reset_index()


    def run_trials(self,
                   experiment_data:pd.DataFrame,
                   omr_rescale: float = 1,
                   imr_rescale: float = 1,
                   mr_method: str = 'hybr',
                   mr_max_iter: int = 1400,
                   mr_tolerance: float = 1e-8,
                   ge_method:str = 'hybr',
                   ge_tolerance: float = 1e-8,
                   ge_max_iter: int = 1000,
                   quiet: bool = False,
                   result_stats:list = ['mean', 'std', 'sem'],
                   all_results:bool = False,
                   redraw_failed_trials: bool = False,
                   trial_omr_rescale: dict = None):
        '''
        Conduct Monte Carlo Simulation of OneSectorGE gravity model.
        Args:
            experiment_data (pandas.DataFrame): A dataframe containing the counterfactual trade-cost data to use for the
                experiment. The best approach for creating this data is to copy the baseline data
                (MonteCarloGE.baseline_data.copy()) and modify columns/rows to reflect desired counterfactual experiment.
            omr_rescale (int): (optional) This value rescales the OMR values to assist in convergence. Often, OMR values
                are orders of magnitude different than IMR values, which can make convergence difficult. Scaling by a
                different order of magnitude can help. Values should be of the form 10^n for positive or negative
                integer n. By default, this value is 1 (10^0). However, users should be careful with this choice as
                results, even when convergent, may not be fully robust to any selection. The method
                MonteCarlo.check_omr_rescale() can help identify and compare feasible values for a given model.
            imr_rescale (int): (optional) This value rescales the IMR values to potentially aid in conversion. However,
                because the IMR for the reference importer is normalized to one, it is unlikely that there will be because
                because changing the default value, which is 1.
            mr_method (str): This parameter determines the type of non-linear solver used for solving the baseline and
                experiment MR terms. See the documentation for scipy.optimize.root for alternative methods. the default
                value is 'hybr'. (See also OneSectorGE.build_baseline())
            mr_max_iter (int): This parameter sets the maximum limit on the number of iterations conducted
                by the solver used to solve for MR terms. The default value is 1400.
                (See also OneSectorGE.build_baseline())
            mr_tolerance (float): This parameter sets the convergence tolerance level for the solver used to
                solve for MR terms. The default value is 1e-8. (See also OneSectorGE.build_baseline())
            ge_method (str): The solver method to use for the full GE non-linear solver. See scipy.root()
                documentation for option. Default is 'hybr'.
            ge_tolerance (float): The tolerance for determining if the GE system of equations is solved.
                Default is 1e-8.
            ge_max_iter (int): The maximum number of iterations allowed for the full GE nonlinear solver.
                Default is 1000.
            quiet (bool): If True, suppress console printouts detailing the solver success/failures of each trial.
                Default is False.
            result_stats (list): A list of functions to compute in order to summarize the results across trials. The
                default is ['mean', 'std', 'sem'], which computes the mean, standard deviation, and standard mean error
                of the results, respectively. The model should accept any function that can be used with the
                pandas.DataFrame.agg() function.
            all_results (bool): If true, MonteCarloGE attributes containing individual results for all trials are
                populated. Default is False to reduce memory use.
            redraw_failed_trials (bool): If True, draw additional trials to replace failed trials. Additional trials
                will be run until enough have succeeded to match the number that originally failed (up to a maximum of
                the number of trials originally specified). This helps insure that the model is solved for as many
                trials as were specified. E.g. If 10 trials are specified and 2 fail to solve, additional trials will be
                attempted with additional random draws until 2 additional models have been solved successfully, resulting
                in 10 successful trials, or 10 additional trails are run without 2 successes.
            trial_omr_rescale (dict): (option) An option to provide alternative OMR rescale factors for specific trials.
                The argument should be a dictionary keyed with the trial number (0 to N-1) and value equal to the
                desired rescale factor (10^n) with n a positive or negative integer (e.g. {0:0.001, 3:10} Any trial not
                specified here will be run using the value specified by the omr_rescale argument (default of 1).
        Returns:
            None: No return but populates many results attributes of the MonteCarloGE model.

        Examples:
             Run simulations for mc_model, which is an instance of MonteCarloGE (see respective documentation).
             'counterfactual_dataframe' is a counterfactual version of the baseline data to use for the experiment.
            >>> monte_model = ge.MonteCarloGE(...)
            >>> monte_model.run_trials(experiment_data = counterfactual_dataframe,
            ...                        omr_rescale = 1)
            * Simulating trial 0 *
            Solving for baseline MRs...
            The solution converged.
            Solving for conditional MRs...
            The solution converged.
            Solving full GE model...
            The solution converged.
            * Simulating trial 1 *
            Solving for baseline MRs...
            The solution converged.
            Solving for conditional MRs...
            The solution converged.
            Solving full GE model...
            The solution converged.
            (truncated...)

            Return all info about each trial, not just summary info across trials
            >>> monte_model.run_trials(experiment_data = counterfactual_dataframe,
            ...                        omr_rescale = 1,
            ...                        all_results=True)

            Specify alternative OMR rescale factors for trials 2 and 3, while using the value of 1 for all other trials.
            >>> monte_model.run_trials(experiment_data = counterfactual_dataframe,
            ...                        omr_rescale = 1,
            ...                        trail_rescale_factors = {2:0.001, 3:100})

             Draw additional random cost values to replace trials that failed to solve
             >>> monte_model.run_trials(experiment_data = counterfactual_dataframe,
             ...                        omr_rescale = 1,
             ...                        redraw_failed_trials=True)
        '''
        models = list()
        failed_trials = list()
        num_failed_iterations = 0
        for trial in range(self.trials):
            if not quiet:
                print("\n* Simulating trial {} *".format(trial))
            # Define a new CostCoeff instance using one of the trial values
            param_values = CostCoeffs(self.coeff_sample, coeff_col=trial, identifier_col=self._cost_coeffs._identifier_col)
            try:
                if (trial_omr_rescale is not None) and (trial in trial_omr_rescale):
                    # Use alternative omr if supplied
                    omr_rescale_use = trial_omr_rescale[trial]
                else:
                    omr_rescale_use = omr_rescale

                trial_model = self._run_single_trial(param_values=param_values, experiment_data=experiment_data,
                                                     quiet = quiet, omr_rescale = omr_rescale_use,
                                                     imr_rescale = imr_rescale, mr_method = mr_method,
                                                     mr_max_iter = mr_max_iter, mr_tolerance = mr_tolerance,
                                                     ge_method = ge_method, ge_tolerance = ge_tolerance,
                                                     ge_max_iter = ge_max_iter)
                models.append(trial_model)
            except:
                if not quiet:
                    print("Failed to solve model.\n")
                num_failed_iterations+=1
                failed_trials.append(trial)
        self.num_failed_trials = num_failed_iterations

        # For failed trails, redraw new estimates and solve new models for each failed trial
        if redraw_failed_trials:
            # Define distribution of beta parameters to draw new values from (sets new seed to draw from)
            new_dist = multivariate_normal(self.main_coeffs, self.main_covar_mat, seed=(1 + self._seed),
                                           allow_singular=self._allow_singular)
            # Create lists/variables to track successes/failures
            new_draws = list()
            new_successes = 0
            new_failures = 0
            tries = 0
            max_retries = self.trials
            while new_successes < self.num_failed_trials and tries <= max_retries:
                # Tick up try counter and created column name for random draw

                try_num = self.trials+tries
                # Create and format coefficient draw
                new_draw = pd.DataFrame(new_dist.rvs())
                new_draw.index = self.main_coeffs.index
                new_draw.rename(columns={0: str(try_num)}, inplace=True)
                new_draws.append(new_draw.copy())
                new_draw.reset_index(inplace = True)
                new_params = CostCoeffs(new_draw, coeff_col=str(try_num), identifier_col=self._cost_coeffs._identifier_col)

                if not quiet:
                    print("\n* Simulating failed trial replacement {} *".format(try_num))
                try:
                    new_trial_model = self._run_single_trial(param_values=new_params, experiment_data=experiment_data,
                                                             quiet = quiet, omr_rescale = omr_rescale,
                                                             imr_rescale = imr_rescale, mr_method = mr_method,
                                                             mr_max_iter = mr_max_iter, mr_tolerance = mr_tolerance,
                                                             ge_method = ge_method, ge_tolerance = ge_tolerance,
                                                             ge_max_iter = ge_max_iter)
                    models.append(new_trial_model)
                    new_successes+=1
                except:
                    if not quiet:
                        print("Failed to solve redrawn replacement model.\n")
                    new_failures += 1
                    failed_trials.append(try_num)

                tries += 1
            if len(new_draws)>0:
                self.replacement_sample = pd.concat(new_draws, axis=1)
            else:
                self.replacement_sample = new_draws



        # Get results labels from one of the OneSectorGE gegravity
        self.labels = models[0].labels
        if all_results:
            self.trial_models = models
        self.failed_trials = failed_trials
        self.all_country_results, self.country_results = self._compile_results(models, 'country_results', result_stats, all_results)
        self.all_country_mr_terms, self.country_mr_terms = self._compile_results(models, 'mr_terms', result_stats, all_results)
        self.all_outputs_expenditures, self.outputs_expenditures = self._compile_results(models, 'outputs_expenditures', result_stats, all_results)
        self.all_factory_gate_prices, self.factory_gate_prices = self._compile_results(models, 'factory_gate_prices', result_stats, all_results)
        self.all_aggregate_trade_results, self.aggregate_trade_results = self._compile_results(models, 'aggregate_trade_results', result_stats, all_results)
        self.all_bilateral_trade_results, self.bilateral_trade_results = self._compile_results(models, 'bilateral_trade', result_stats, all_results)
        self.all_bilateral_costs, self.bilateral_costs = self._compile_results(models, 'bilateral_costs', result_stats, all_results)
        self._compile_diagnostics(models)
        # ToDo: build some method for confidence intervals from Anderson Yotov (2016)



    def _run_single_trial(self, param_values: CostCoeffs, experiment_data, quiet, omr_rescale, imr_rescale, mr_method,
                          mr_max_iter, mr_tolerance, ge_method, ge_tolerance, ge_max_iter):
        '''
        Create and solve a single OneSectorGE model using given inputs. Inputs the same as OneSectorGE.
        Args:
            See arguments for OneSectorGE.

        Returns:
            Solved OneSectorGE model instance
        '''
        trial_model = OneSectorGE(self._baseline,
                                  year=self._year,
                                  reference_importer=self._reference_importer,
                                  expend_var_name=self.meta_data.expend_var_name,
                                  output_var_name=self.meta_data.output_var_name,
                                  sigma=self.sigma,
                                  cost_variables=self._cost_variables,
                                  cost_coeff_values=param_values,
                                  quiet=quiet)
        trial_model.build_baseline(omr_rescale=omr_rescale,
                                           imr_rescale=imr_rescale,
                                           mr_method=mr_method,
                                           mr_max_iter=mr_max_iter,
                                           mr_tolerance=mr_tolerance)
        trial_model.define_experiment(experiment_data)
        trial_model.simulate(ge_method=ge_method,
                                     ge_tolerance=ge_tolerance,
                                     ge_max_iter=ge_max_iter)
        return trial_model



    def _compile_results(self, models, result_type, result_stats, all_results):
        '''
        Compile results across all trials.
        :param models: (List[OneSectorGE]) A list of solved OneSectorGE gegravity.
        :param result_type: (str) Type of results to compile. Function works with:
            'country_results' - compiles results from OneSectorGE.country_mr_results
            'mr_terms' - compiles results from OneSectorGE.country_mr_terms
            'output_expenditures' - compiles results from OneSectorGE.output_expenditures
            'factory_gate_prices - compiles results from OneSectorGE.factory_gate_prices
            'aggregate_trade_results' - compiles results from OneSectorGE.aggregate_trade_results
            'bilateral_trade' - complies results from OneSectorGE.bilateral_trade_results
            'bilateral_costs' - compiles results from OneSectorGE.bilateral_costs
        :return:(pd.DataFrame, pd.DataFrame) Two dataframes. The first contains all results for each trial, with
            multiindex columns labeled (trial, result type). The second provides summary stats from all trials (mean,
            std, stderr)
        '''
        # Combine all results
        combined_results_list = list()
        for num, model in enumerate(models):
            if result_type == 'country_results':
                model_results = model.country_results.copy()
            if result_type == 'mr_terms':
                model_results = model.country_mr_terms
            if result_type == 'outputs_expenditures':
                model_results = model.outputs_expenditures
            if result_type == 'factory_gate_prices':
                model_results = model.factory_gate_prices
            if result_type == 'aggregate_trade_results':
                model_results = model.aggregate_trade_results
            if result_type == 'bilateral_trade':
                model_results = model.bilateral_trade_results
            if result_type == 'bilateral_costs':
                model_results = model.bilateral_costs

            # Label columns via multiindex with (trial #, result label)
            multi_columns = [(num,col) for col in model_results.columns]
            model_results.columns = pd.MultiIndex.from_tuples(multi_columns)
            combined_results_list.append(model_results)
        combined_results = pd.concat(combined_results_list, axis = 1)

        # Reshape trials to long format
        summary_results = combined_results.copy()
        if result_type in ['bilateral_trade','bilateral_costs']:
            # Bilateral trade has a two-part index (exporter and importer) and must be treated separately.
            summary_results = summary_results.stack(0, future_stack=True).reset_index(level=2)
            summary_results.rename(columns={'level_2': 'trial'}, inplace=True)
        else:
            summary_results = summary_results.stack(0, future_stack=True).reset_index(level=1)
            summary_results.rename(columns={'level_1': 'trial'}, inplace=True)

        # Compute mean and std across trials
        agg_dict = dict()
        var_list = list(summary_results.columns)
        var_list.remove('trial')
        for var in var_list:
            agg_dict[var] = result_stats
        if result_type in ['bilateral_trade','bilateral_costs']:
            summary_results = summary_results.groupby(level=[0, 1]).agg(agg_dict)
        else:
            summary_results = summary_results.groupby(level=0).agg(agg_dict)

        # Compute standard error for each result type
        # for col in summary_results.columns:
        #     if col[1] == 'std':
        #         summary_results[(col[0], 'stderr')] = summary_results[col] / (self.trials ** 0.5)
        if result_type in ['bilateral_trade','bilateral_costs']:
            summary_results = summary_results.stack(level=1, future_stack=True).reset_index(level=2)
            summary_results.rename(columns = {'level_2':'statistic'},inplace = True)
        else:
            summary_results = summary_results.stack(level=1, future_stack=True).reset_index(level=1)
            summary_results.rename(columns = {'level_1':'statistic'},inplace = True)
        if all_results:
            return combined_results, summary_results
        else:
            return None, summary_results

    def _compile_diagnostics(self, models):
        '''
        Compiles the diagnostics from each trial into a single dictionary, indexed by the trial number.
        Args:
            models: the list of OneSectorGE gegravity associated with each trial

        Returns: None, Populates the attribute self.solver_daignostics

        '''
        combined_diagnostics = dict()
        for trial in range(len(models)):
            combined_diagnostics[trial] = models[trial].solver_diagnostics
        self.solver_diagnostics = combined_diagnostics

    def export_results(self, directory:str = None, name:str = '',
                       country_names:DataFrame = None, country:bool = True, bilateral:bool = True,
                       diagnostics:bool = True):
        '''
        Export results to csv files. Three files are stored containing (1) country-level results, (2) bilateral results,
        and (3) solver diagnostics.
        Args:
            directory (str): (optional) Directory in which to write results files. If no directory is supplied,
                three compiled dataframes are returned as a tuple in the order (Country-level results, bilateral
                results, solver diagnostics).
            name (str): (optional) Name of the simulation to prefix to the result file names.
            include_levels (bool): (optional) If True, includes additional columns reflecting the simulated changes in
                levels based on observed trade flows (rather than modeled trade flows). Values are those from the
                method calculate_levels.
            country_names (pandas.DataFrame): (optional) Adds alternative identifiers such as names to the returned
                results tables. The supplied DataFrame should include exactly two columns. The first column must be
                the country identifiers used in the model. The second column must be the alternative identifiers to
                add.
            country (boolean): (optional) If True, export country-level results to csv. If False, skip these results.
                Default is True.
            bilateral (boolean): (optional) If True, export bilateral results to csv. If False, skip these results.
                Default is True.
            diagnostics (boolean): (optional) If True, export diagnostic information to csv. If False, skip these
                results. Default is True.

        Returns:
            None or Tuple[DataFrame, DataFrame, DataFrame]: If a directory argument is supplied, the method returns
                nothing and writes three .csv files instead. If no directory is supplied, it returns a tuple of
                DataFrames.

        Examples:
            Export all three results files with the file prefix "example_monte_carlo_results"
            >>> mc_model.export_results(directory="C:\simulation_results\", name = "example_monte_carlo_results")

            Export only the country level results (i.e. do not export the bilateral and diagnostic results)
            >>> mc_model.export_results(directory="C:\simulation_results\", name = "example_monte_carlo_results",
            ...                         bilateral = False, diagnostics = False)

        '''

        importer_col = self.meta_data.imp_var_name
        exporter_col = self.meta_data.exp_var_name

        country_result_set = [self.country_results, self.factory_gate_prices, self.aggregate_trade_results,
                              self.outputs_expenditures, self.country_mr_terms]
        country_results = pd.concat(country_result_set, axis = 1)
        # Order and select columns for inclusion, drop duplicates.
        country_results_cols = country_results.columns
        labs = self.labels
        # Country results to include
        results_cols = ['statistic'] + self.labels.country_level_labels
        included_columns = [col for col in results_cols if col in country_results_cols]
        country_results = country_results[included_columns]
        country_results = country_results.loc[:, ~country_results.columns.duplicated()]

        bilateral_results = self.bilateral_trade_results.reset_index()

        if country_names is not None:
            if country_names.shape[1]!=2:
                raise ValueError("country_names should have exactly 2 columns, not {}".format(country_names.shape[1]))
            code_col = country_names.columns[0]
            name_col = country_names.columns[1]
            country_names.set_index(code_col, inplace = True, drop = True)
            country_results = country_names.merge(country_results, how = 'right', left_index = True, right_index=True)

            # Add names to bilateral data
            for side in [exporter_col, importer_col]:
                side_names = country_names.copy()
                side_names.reset_index(inplace = True)
                side_names.rename(columns = {code_col:side, name_col:"{} {}".format(side,name_col)}, inplace = True)
                bilateral_results = bilateral_results.merge(side_names, how = 'left', on = side)

        # Create Dataframe with Diagnostic results
        column_list = list()
        diagnostic_info = self.solver_diagnostics
        for trial_num, trial in diagnostic_info.items():
            for results_type, results in trial.items():
                for key, value in results.items():
                    # Single Entry fields must be converted to list before creating DataFrame
                    if key in ['success', 'status', 'nfev', 'message']:
                        frame = pd.DataFrame({("trial_{}".format(trial_num), results_type, key): [value]})
                        column_list.append(frame)
                    # Vector-like fields Can be used as is. Several available fields are not included: 'fjac','r', and 'qtf'
                    elif key in ['x', 'fun']:
                        frame = pd.DataFrame({("trial_{}".format(trial_num), results_type, key): value})
                        column_list.append(frame)
        diag_frame = pd.concat(column_list, axis=1)
        diag_frame = diag_frame.fillna('')

        if directory is not None:
            if country:
                country_results.to_csv("{}/{}_country_results.csv".format(directory, name))
            if bilateral:
                bilateral_results.to_csv("{}/{}_bilateral_results.csv".format(directory, name), index=False)
            if diagnostics:
                diag_frame.to_csv("{}/{}_solver_diagnostics.csv".format(directory, name), index=False)
        else:
            return country_results, bilateral_results, diag_frame

    def check_omr_rescale(self,
                          omr_rescale_range: int = 10,
                          trials: List[int] = None,
                          mr_method: str = 'hybr',
                          mr_max_iter: int = 1400,
                          mr_tolerance: float = 1e-8,
                          countries: List[str] = []):
        '''
        Analyze different Outward Multilarteral Resistance (OMR) term rescale factors. This method can help identify
            feasible values to use for the omr_rescale argument in OneSectorGE.build_baseline().
        Args:
            omr_rescale_range (int): This parameter allows you to set the scope of the values tested. For example,
                if omr_rescale_range = 3, the model will check for convergence using omr_rescale values from the set
                [10^-3, 10^-2, 10^-1, 10^0, ..., 10^3]. The default value is 10.
            trials (List[int]): (optional) A list of trials by number (0 to (N-1)) corresponding to columns of the
                derived coefficient sample (MonteCarloGE.coeff_sample) for which to analyze OMR terms. If no input is
                provided, all trials are considered.
            mr_method (str): This parameter determines the type of non-linear solver used for solving the baseline and
                experiment MR terms. See the documentation for scipy.optimize.root for alternative methods. the default
                value is 'hybr'.
            mr_max_iter (int): (optional) This parameter sets the maximum limit on the number of iterations conducted
                by the solver used to solve for MR terms. The default value is 1400.
            mr_tolerance (float): (optional) This parameter sets the convergence tolerance level for the solver used to
                solve for MR terms. The default value is 1e-8.
            countries (List[str]):  A list of countries for which to return the estimated OMR values for user
                evaluation.
        Returns:
            pandas.DataFrame: A dataframe of diagnostic information for users to compare different omr_rescale factors.
                The returned dataframe contains the following columns:\n
                'trial': The trial number corresponding to the sample parameters for which the test was run.
                'omr_rescale': The rescale factor used\n
                'omr_rescale (alt format)': A string representation of the rescale factor as an exponential expression.\n
                'solved': If True, the MR model solved successfully. If False, it did not solve.\n
                'message': Description of the outcome of the solver.\n
                '..._func_value': Three columns reflecting the maximum, mean, and median values from the solver
                    objective functions. Function values closer to zero imply a better solution to system of equations.
                'reference_importer_omr': The solution value for the reference importer's OMR value.\n
                '..._omr': The solution value(s) for the user supplied countries.

        Examples:
            Check for potential OMR rescale factors for all trials within a range of 10^-10 to 10^10.
            >>> omr_all_trials = mc_model.check_omr_rescale(omr_rescale_range=10)

            Check for potential OMR rescale factors for trials 1, 2, 8, and 9 within a range of 10^-3 to 10^3.
            >>> omr_trial_subset = mc_model.check_omr_rescale(omr_rescale_range=3, trials = [1, 2, 8, 9])
        '''

        # Check trials argument if provided
        if trials is None:
            trials_list = range(self.trials)
        else:
            if not isinstance(trials, list):
                raise ValueError("trials argument must be a list of integer values")
            else:
                trials_list = trials

        all_outputs = list()
        for trial in trials_list:
            print("\n* Checking trial {} *".format(trial))
            trial_params = self.coeff_sample[[self._cost_coeffs._identifier_col ,trial]]
            trial_params_obj = CostCoeffs(trial_params, identifier_col=self._cost_coeffs._identifier_col,
                                          coeff_col=trial)

            omr_test_gemodel = OneSectorGE(self._baseline, year=self._year, reference_importer=self._reference_importer,
                                    output_var_name=self.meta_data.output_var_name,
                                    expend_var_name=self.meta_data.expend_var_name, sigma=self.sigma,
                                    cost_variables=self._cost_variables,
                                    cost_coeff_values=trial_params_obj)
            omr_checks = omr_test_gemodel.check_omr_rescale(omr_rescale_range, mr_method = mr_method,
                                                            mr_max_iter = mr_max_iter, mr_tolerance = mr_tolerance,
                                                            countries = countries)
            # Add column with trial number to the *beginning* of dataframe
            omr_checks.insert(loc=0, column = 'trail', value = trial)
            all_outputs.append(omr_checks)
        omr_outcomes = pd.concat(all_outputs, axis = 0)
        return omr_outcomes

Classes

class MonteCarloGE (baseline: BaselineData, year: str, trials: int, sigma: float, reference_importer: str, cost_variables: list, cost_coeff_values: CostCoeffs, expend_var_name: str = None, output_var_name: str = None, mc_variables: list = None, seed: int = 0, allow_singular_covar: bool = False)

Define a Monte Carlo GE model.

Args

baseline : BaselineData
Baseline data defined using the gegravity BaselineData class structure.
year : str
The year to be used for the baseline model. Works best if estimation_model year column has been cast as string.
trials : int
The number of trial simulations to conduct.
expend_var_name : str
Column name of variable containing expenditure data in estimation_model.
output_var_name : str
Column name of variable containing output data in estimation_model.
sigma : float
Elasticity of substitution.
reference_importer : str
Identifier for the country to be used as the reference importer (inward multilateral resistance normalized to 1 and other multilateral resistances solved relative to it).
cost_variables : List[str]
(optional) A list of variables to use to compute bilateral trade costs. By default, all included non-fixed effect variables are used.
mc_variables : List[str]
(optional) A subset of the cost_variables to randomly sample in the Monte Carlo experiment. Coefficients for the variables in this list are randomly drawn based on their estimated mean and variance/covariance. Those excluded use their gravity estimated values only. By default, the model uses all cost variables (or those supplied to cost_variables argument) are
cost_coeff_values : CostCoeffs
(optional) A set of parameter values or estimates to use for constructing trade costs. Should be of type gegravity.CostCoeffs, statsmodels.GLMResultsWrapper, or gme.SlimResults. If no values are provided, the estimates in the EstimationModel are used.
seed : int
(optional) The seed to use for the random draws of cost coefficients in order to provide unchanging, consistent draws across runs. By default, the seed is randomly determined each time the model is constructed.
allow_singular_covar : bool
If true, allow the covariance matrix to be singular when drawing coefficient values from multivariate normal distribution. Default is False.

Attributes

baseline_data : pandas.DataFrame
Baseline data supplied to model in gme.EstimationModel.
coeff_sample : pandas.DataFrame
The randomly drawn sample of cost coefficients for each cost variable. Each column corresponds to a different trial.
main_coeffs : pandas.Series
The main coefficient estimates for the cost varaibles supplied to the model.
main_stderrs : pandas.Series
The standard errors for the main cost coefficients.
trials : int
The number of trial simulations of the model.
sample_stats : pandas.DataFrame
A dataframe depicting both the initially supplied estimate values for each cost variable ('beta_estimate' and 'stderr_estimate') as well as descriptive statistics for the randomly drawn values across all trials.
sigma : int
The elasticity of substitution parameter value.

Attributes containing results populated after MonteCarloGE.run_trials():

aggregate_trade_results : Pandas.DataFrame
Aggregate trade results summarized across all trials. See OneSectorGE ResultsLabels for description of results.
bilateral_costs : Pandas.DataFrame
Baseline and counterfactual bilateral trade costs summarized across all trials. See OneSectorGE ResultsLabels for description of results.
bilateral_trade_results : Pandas.DataFrame
Bilateral trade results summarized across all trials. See OneSectorGE ResultsLabels for description of results.
country_mr_terms : Pandas.DataFrame
Baseline and experiment multilateral resistance terms summarized across all trials. See OneSectorGE ResultsLabels for description of results.
country_results : Pandas.DataFrame
The main country level results summarized across all trials. See OneSectorGE ResultsLabels for description of results.
factory_gate_prices : Pandas.DataFrame
Factory gate prices summarized across all trials. See OneSectorGE ResultsLabels for description of results.
num_failed_trials : int
The number of trials for which the model failed to solve.
failed_trials : list
List of the trials that failed to solve.
replacement_sample : Padas.DataFrame
If run_trials options set to True, the vectors of additional parameter values are stored in this attribute.
outputs_expenditures : Pandas.DataFrame
Baseline and experiment output and expenditure values summarized across all trials. See OneSectorGE ResultsLabels for description of results.
solver_diagnostics : dict
A dictionary containing diagnostic information for each individual trial. The solver diagnostics for each trial correspond to the three solution routines: baseline multilateral resistances, conditional multilateral resistances (partial equilibrium counterfactual effects) and the full GE model. See the diagnostic info from scipy.optimize.root for more details.

Additional results populated if MonteCarloGE.run_trials(all_results=True)

all_aggregate_trade_results : Pandas.DataFrame
All aggregate trade results for each individual trial. Columns are multi-indexed by the trial number and type of result. See OneSectorGE ResultsLabels for description of results.
all_bilateral_costs : Pandas.DataFrame
Bilateral trade costs for each individual trial. Columns are multi-indexed by the trial number and type of result. See OneSectorGE ResultsLabels for description of results.
all_bilateral_trade_results : Pandas.DataFrame
Bilateral trade results for all individual trials. Columns are multi-indexed by the trial number and type of result. See OneSectorGE ResultsLabels for description of results.
all_country_mr_terms : Pandas.DataFrame
All baseline and experiment multilateral resistance terms for each individual trial. Columns are multi-indexed by the trial number and type of result. See OneSectorGE ResultsLabels for description of results.
all_country_results : Pandas.DataFrame
Main results for all individual trials. Columns are multi-indexed by the trial number and type of result. See OneSectorGE ResultsLabels for description of results.
all_factory_gate_prices : Pandas.DataFrame
Factory gate prices for each individual trial. Columns are multi-indexed by the trial number and type of result. See OneSectorGE ResultsLabels for description of results.
all_outputs_expenditures : Pandas.DataFrame
Baseline and experiment output and expenditure values for each individual trial. Columns are multi-indexed by the trial number and type of result. See OneSectorGE ResultsLabels for description of results.
trial_models : gegravity.OneSectorGE
A list of OneSectorGE models corresponding to each trial.

Examples

Create a MonteCarloGE model instance using BaselineData and CostCoeff instances (see respective APIs). Model will solve 10 randomly drawn trials.

>>> baseline = ge.BaselineData(...)
>>> cost_params = ge.CostCoeffs(...)
>>> mc_model = ge.MonteCarloGE(
...            baseline,
...            year = '2006',
...            trials = 10,
...            reference_importer='DEU',
...            sigma=7,
...            cost_variables=['lndist', 'contiguity', 'common_language', 'pta', 'international', 'constant'],
...            cost_coeff_values=cost_params,
...            seed = 1)
Expand source code
class MonteCarloGE(object):
    def __init__(self,
                 baseline: BaselineData,
                 year:str,
                 trials: int,
                 sigma: float,
                 reference_importer: str,
                 cost_variables: list,
                 cost_coeff_values: CostCoeffs,
                 expend_var_name: str = None,
                 output_var_name: str = None,
                 mc_variables: list = None,
                 seed: int = 0,
                 allow_singular_covar:bool = False):
        '''
        Define a Monte Carlo GE model.
        Args:
            baseline (BaselineData): Baseline data defined using the gegravity BaselineData class structure.
            year (str): The year to be used for the baseline model. Works best if estimation_model year column has been
                cast as string.
            trials (int): The number of trial simulations to conduct.
            expend_var_name (str): Column name of variable containing expenditure data in estimation_model.
            output_var_name (str): Column name of variable containing output data in estimation_model.
            sigma (float): Elasticity of substitution.
            reference_importer (str): Identifier for the country to be used as the reference importer (inward
                multilateral resistance normalized to 1 and other multilateral resistances solved relative to it).
            cost_variables (List[str]): (optional) A list of variables to use to compute bilateral trade costs. By
                default, all included non-fixed effect variables are used.
            mc_variables (List[str]): (optional) A subset of the cost_variables to randomly sample in the Monte Carlo
                experiment. Coefficients for the variables in this list are randomly drawn based on their estimated mean
                 and variance/covariance. Those excluded use their gravity estimated values only. By default, the model
                uses all cost variables (or those supplied to cost_variables argument) are
            cost_coeff_values (CostCoeffs): (optional) A set of parameter values or estimates to use for constructing
                trade costs. Should be of type gegravity.CostCoeffs, statsmodels.GLMResultsWrapper, or
                gme.SlimResults. If no values are provided, the estimates in the EstimationModel are used.
            seed (int): (optional) The seed to use for the random draws of cost coefficients in order to provide
                unchanging, consistent draws across runs. By default, the seed is randomly determined each time the
                model is constructed.
            allow_singular_covar (bool): If true, allow the covariance matrix to be singular when drawing coefficient
                values from multivariate normal distribution. Default is False.

        Attributes:
            baseline_data (pandas.DataFrame): Baseline data supplied to model in gme.EstimationModel.
            coeff_sample (pandas.DataFrame): The randomly drawn sample of cost coefficients for each cost variable. Each
                column corresponds to a different trial.
            main_coeffs (pandas.Series): The main coefficient estimates for the cost varaibles supplied to the model.
            main_stderrs (pandas.Series): The standard errors for the main cost coefficients.
            trials (int): The number of trial simulations of the model.
            sample_stats (pandas.DataFrame): A dataframe depicting both the initially supplied estimate values for each
                cost variable ('beta_estimate' and 'stderr_estimate') as well as descriptive statistics for the randomly
                drawn values across all trials.
            sigma (int): The elasticity of substitution parameter value.
            ---
            **Attributes containing results populated after MonteCarloGE.run_trials()**:\n\n
            aggregate_trade_results (Pandas.DataFrame): Aggregate trade results summarized across all trials. See
                OneSectorGE ResultsLabels for description of results.
            bilateral_costs (Pandas.DataFrame): Baseline and counterfactual bilateral trade costs summarized across all
                trials. See OneSectorGE ResultsLabels for description of results.
            bilateral_trade_results (Pandas.DataFrame): Bilateral trade results summarized across all trials.
                See OneSectorGE ResultsLabels for description of results.
            country_mr_terms (Pandas.DataFrame): Baseline and experiment multilateral resistance terms summarized
                across all trials. See OneSectorGE ResultsLabels for description of results.
            country_results (Pandas.DataFrame): The main country level results summarized across
                all trials. See OneSectorGE ResultsLabels for description of results.
            factory_gate_prices (Pandas.DataFrame): Factory gate prices summarized across all trials. See OneSectorGE
                ResultsLabels for description of results.
            num_failed_trials (int): The number of trials for which the model failed to solve.
            replacement_sample (Padas.DataFrame): If run_trials options set to True, the vectors of additional parameter
                values are stored in this attribute.
            outputs_expenditures (Pandas.DataFrame): Baseline and experiment output and expenditure values summarized
                across all trials. See OneSectorGE ResultsLabels for description of results.
            solver_diagnostics (dict): A dictionary containing diagnostic information for each individual trial. The
                solver diagnostics for each trial correspond to the three solution routines: baseline
                multilateral resistances, conditional multilateral resistances (partial equilibrium counterfactual
                effects) and the full GE model. See the diagnostic info from scipy.optimize.root for more details.
            ---
            **Additional results populated if MonteCarloGE.run_trials(all_results=True)**\n\n
            all_aggregate_trade_results (Pandas.DataFrame): All aggregate trade results for each individual trial.
                Columns are multi-indexed by the trial number and type of result. See OneSectorGE ResultsLabels
                for description of results.
            all_bilateral_costs (Pandas.DataFrame): Bilateral trade costs for each individual trial. Columns are
                multi-indexed by the trial number and type of result. See OneSectorGE ResultsLabels for description of
                results.
            all_bilateral_trade_results (Pandas.DataFrame): Bilateral trade results for all individual trials.
                Columns are multi-indexed by the trial number and type of result.
                See OneSectorGE ResultsLabels for description of results.
            all_country_mr_terms (Pandas.DataFrame): All baseline and experiment multilateral resistance terms for each
                individual trial. Columns are multi-indexed by the trial number and type of result. See OneSectorGE
                ResultsLabels for description of results.
            all_country_results (Pandas.DataFrame): Main results for all individual trials.
                Columns are multi-indexed by the trial number and type of result.
                See OneSectorGE ResultsLabels for description of results.
            all_factory_gate_prices (Pandas.DataFrame): Factory gate prices for each individual trial. Columns are
                multi-indexed by the trial number and type of result. See OneSectorGE ResultsLabels for description of
                results.
            all_outputs_expenditures (Pandas.DataFrame): Baseline and experiment output and expenditure values for each
                individual trial. Columns are multi-indexed by the trial number and type of result. See OneSectorGE
                ResultsLabels for description of results.
            trial_models (gegravity.OneSectorGE): A list of OneSectorGE models corresponding to each trial.

        Examples:

            Create a MonteCarloGE model instance using BaselineData and CostCoeff instances (see respective APIs). Model
            will solve 10 randomly drawn trials.
            >>> baseline = ge.BaselineData(...)
            >>> cost_params = ge.CostCoeffs(...)
            >>> mc_model = ge.MonteCarloGE(
            ...            baseline,
            ...            year = '2006',
            ...            trials = 10,
            ...            reference_importer='DEU',
            ...            sigma=7,
            ...            cost_variables=['lndist', 'contiguity', 'common_language', 'pta', 'international', 'constant'],
            ...            cost_coeff_values=cost_params,
            ...            seed = 1)
        '''


        # Store some inputs in model object
        self._baseline = baseline

        # Determine whether to use expend_var_name and/or output_var_name from MonteCarlo inputs or Baseline meta data.
        # If both are supplied, the values given in MonteCarlo definition are used.
        if expend_var_name != None:
            use_expend_var_name = expend_var_name
        else:
            use_expend_var_name = self._baseline.meta_data.expend_var_name
        if output_var_name != None:
            use_output_var_name = output_var_name
        else:
            use_output_var_name = self._baseline.meta_data.output_var_name
        if use_output_var_name is None:
            raise ValueError("Must supply a variable name for the column containing output values.")
        if use_expend_var_name is None:
            raise ValueError("Must supply a variable name for the column containing output values.")


        self.meta_data = _GEMetaData(baseline.meta_data, use_expend_var_name, use_output_var_name)
        self._year = str(year)
        self.sigma = sigma
        self._reference_importer = reference_importer
        self._cost_variables = cost_variables
        self._cost_coeffs = cost_coeff_values
        self._allow_singular = allow_singular_covar
        if mc_variables is None:
            self._mc_variables = self._cost_variables
        else:
            self._mc_variables = mc_variables
        if seed is None:
            self._seed = np.random.randint(0,10000)
        else:
            self._seed = seed

        # Define Parameter values
        self.main_coeffs = self._cost_coeffs.params
        self.main_stderrs = self._cost_coeffs.bse
        self.main_covar_mat = self._cost_coeffs.covar
        self.trials = trials

        # Generate Sampling Distribution
        self.coeff_sample = self._draw_mc_trade_costs()

        ##
        # Define Results attributes
        ##
        self.num_failed_trials = None
        self.replacement_sample = None
        self.all_country_results = None
        self.country_results = None
        self.all_country_mr_terms = None
        self.country_mr_terms = None
        self.all_outputs_expenditures = None
        self.outputs_expenditures = None
        self.all_factory_gate_prices = None
        self.factory_gate_prices = None
        self.all_aggregate_trade_results = None
        self.aggregate_trade_results = None
        self.all_bilateral_trade_results = None
        self.bilateral_trade_results = None
        self.all_bilateral_costs = None
        self.bilateral_costs = None
        self.solver_diagnostics = None
        self.trial_models = None


        # prep baseline data
        _baseline_data = self._baseline.baseline_data.copy()
        _baseline_data[self.meta_data.year_var_name] = _baseline_data[self.meta_data.year_var_name].astype(str)
        self.baseline_data = _baseline_data.loc[_baseline_data[self.meta_data.year_var_name] == self._year, :].copy()
        if self.baseline_data.shape[0] == 0:
            raise ValueError("There are no observations corresponding to the supplied 'year'")

        # Create summary of sample distribution
        sample_stats = self.coeff_sample.copy()
        sample_stats.set_index(self._cost_coeffs._identifier_col, inplace= True)
        sample_stats = sample_stats.T.describe().T

        # sample_stats = self.coeff_sample.T.describe().T
        new_col_names = ['sample_{}'.format(col) for col in sample_stats]
        sample_stats.columns = new_col_names
        main_cost_ests = pd.DataFrame({'beta_estimate': self.main_coeffs[self._mc_variables],
                                       'stderr_estimate': self.main_stderrs[self._mc_variables]})
        self.sample_stats = pd.concat([main_cost_ests, sample_stats], axis=1)

    def _draw_mc_trade_costs(self):
        '''
        Draw coefficient values from multivariate normal distribution. For Poisson MLE,
        B-hat ~ Normal(B, (X'WX)^{-1}) where (X'WX)^{-1} is the covariance matrix. See An Introduction to Generalized
        Linear Models (2nd Ed) Annette J. Dobson, Chapman & Hall/CRC, Boca Raton Florida, Section 5.4.

        Returns: A dataframe of random draws of coefficients. Rows are cost variables from self._mc_variables, columns
            are different draws with the exception of a column with corresponding cost variable names ('index')
        '''
        print('Deriving sample of cost parameters...')
        # Get results and check that all needed info is available (i.e. covariance matrix in estimation model)
        betas = self.main_coeffs
        cov = self.main_covar_mat
        # Define distribution of beta parameters
        distribution_alt = multivariate_normal(betas, cov, seed=self._seed, allow_singular=self._allow_singular)
        draws = list()
        for i in range(self.trials):
            draws.append(pd.Series(distribution_alt.rvs()))
        all_draws = pd.concat(draws, axis=1)
        all_draws.index = betas.index
        all_draws = all_draws.loc[self._mc_variables,:]
        return all_draws.reset_index()


    def run_trials(self,
                   experiment_data:pd.DataFrame,
                   omr_rescale: float = 1,
                   imr_rescale: float = 1,
                   mr_method: str = 'hybr',
                   mr_max_iter: int = 1400,
                   mr_tolerance: float = 1e-8,
                   ge_method:str = 'hybr',
                   ge_tolerance: float = 1e-8,
                   ge_max_iter: int = 1000,
                   quiet: bool = False,
                   result_stats:list = ['mean', 'std', 'sem'],
                   all_results:bool = False,
                   redraw_failed_trials: bool = False,
                   trial_omr_rescale: dict = None):
        '''
        Conduct Monte Carlo Simulation of OneSectorGE gravity model.
        Args:
            experiment_data (pandas.DataFrame): A dataframe containing the counterfactual trade-cost data to use for the
                experiment. The best approach for creating this data is to copy the baseline data
                (MonteCarloGE.baseline_data.copy()) and modify columns/rows to reflect desired counterfactual experiment.
            omr_rescale (int): (optional) This value rescales the OMR values to assist in convergence. Often, OMR values
                are orders of magnitude different than IMR values, which can make convergence difficult. Scaling by a
                different order of magnitude can help. Values should be of the form 10^n for positive or negative
                integer n. By default, this value is 1 (10^0). However, users should be careful with this choice as
                results, even when convergent, may not be fully robust to any selection. The method
                MonteCarlo.check_omr_rescale() can help identify and compare feasible values for a given model.
            imr_rescale (int): (optional) This value rescales the IMR values to potentially aid in conversion. However,
                because the IMR for the reference importer is normalized to one, it is unlikely that there will be because
                because changing the default value, which is 1.
            mr_method (str): This parameter determines the type of non-linear solver used for solving the baseline and
                experiment MR terms. See the documentation for scipy.optimize.root for alternative methods. the default
                value is 'hybr'. (See also OneSectorGE.build_baseline())
            mr_max_iter (int): This parameter sets the maximum limit on the number of iterations conducted
                by the solver used to solve for MR terms. The default value is 1400.
                (See also OneSectorGE.build_baseline())
            mr_tolerance (float): This parameter sets the convergence tolerance level for the solver used to
                solve for MR terms. The default value is 1e-8. (See also OneSectorGE.build_baseline())
            ge_method (str): The solver method to use for the full GE non-linear solver. See scipy.root()
                documentation for option. Default is 'hybr'.
            ge_tolerance (float): The tolerance for determining if the GE system of equations is solved.
                Default is 1e-8.
            ge_max_iter (int): The maximum number of iterations allowed for the full GE nonlinear solver.
                Default is 1000.
            quiet (bool): If True, suppress console printouts detailing the solver success/failures of each trial.
                Default is False.
            result_stats (list): A list of functions to compute in order to summarize the results across trials. The
                default is ['mean', 'std', 'sem'], which computes the mean, standard deviation, and standard mean error
                of the results, respectively. The model should accept any function that can be used with the
                pandas.DataFrame.agg() function.
            all_results (bool): If true, MonteCarloGE attributes containing individual results for all trials are
                populated. Default is False to reduce memory use.
            redraw_failed_trials (bool): If True, draw additional trials to replace failed trials. Additional trials
                will be run until enough have succeeded to match the number that originally failed (up to a maximum of
                the number of trials originally specified). This helps insure that the model is solved for as many
                trials as were specified. E.g. If 10 trials are specified and 2 fail to solve, additional trials will be
                attempted with additional random draws until 2 additional models have been solved successfully, resulting
                in 10 successful trials, or 10 additional trails are run without 2 successes.
            trial_omr_rescale (dict): (option) An option to provide alternative OMR rescale factors for specific trials.
                The argument should be a dictionary keyed with the trial number (0 to N-1) and value equal to the
                desired rescale factor (10^n) with n a positive or negative integer (e.g. {0:0.001, 3:10} Any trial not
                specified here will be run using the value specified by the omr_rescale argument (default of 1).
        Returns:
            None: No return but populates many results attributes of the MonteCarloGE model.

        Examples:
             Run simulations for mc_model, which is an instance of MonteCarloGE (see respective documentation).
             'counterfactual_dataframe' is a counterfactual version of the baseline data to use for the experiment.
            >>> monte_model = ge.MonteCarloGE(...)
            >>> monte_model.run_trials(experiment_data = counterfactual_dataframe,
            ...                        omr_rescale = 1)
            * Simulating trial 0 *
            Solving for baseline MRs...
            The solution converged.
            Solving for conditional MRs...
            The solution converged.
            Solving full GE model...
            The solution converged.
            * Simulating trial 1 *
            Solving for baseline MRs...
            The solution converged.
            Solving for conditional MRs...
            The solution converged.
            Solving full GE model...
            The solution converged.
            (truncated...)

            Return all info about each trial, not just summary info across trials
            >>> monte_model.run_trials(experiment_data = counterfactual_dataframe,
            ...                        omr_rescale = 1,
            ...                        all_results=True)

            Specify alternative OMR rescale factors for trials 2 and 3, while using the value of 1 for all other trials.
            >>> monte_model.run_trials(experiment_data = counterfactual_dataframe,
            ...                        omr_rescale = 1,
            ...                        trail_rescale_factors = {2:0.001, 3:100})

             Draw additional random cost values to replace trials that failed to solve
             >>> monte_model.run_trials(experiment_data = counterfactual_dataframe,
             ...                        omr_rescale = 1,
             ...                        redraw_failed_trials=True)
        '''
        models = list()
        failed_trials = list()
        num_failed_iterations = 0
        for trial in range(self.trials):
            if not quiet:
                print("\n* Simulating trial {} *".format(trial))
            # Define a new CostCoeff instance using one of the trial values
            param_values = CostCoeffs(self.coeff_sample, coeff_col=trial, identifier_col=self._cost_coeffs._identifier_col)
            try:
                if (trial_omr_rescale is not None) and (trial in trial_omr_rescale):
                    # Use alternative omr if supplied
                    omr_rescale_use = trial_omr_rescale[trial]
                else:
                    omr_rescale_use = omr_rescale

                trial_model = self._run_single_trial(param_values=param_values, experiment_data=experiment_data,
                                                     quiet = quiet, omr_rescale = omr_rescale_use,
                                                     imr_rescale = imr_rescale, mr_method = mr_method,
                                                     mr_max_iter = mr_max_iter, mr_tolerance = mr_tolerance,
                                                     ge_method = ge_method, ge_tolerance = ge_tolerance,
                                                     ge_max_iter = ge_max_iter)
                models.append(trial_model)
            except:
                if not quiet:
                    print("Failed to solve model.\n")
                num_failed_iterations+=1
                failed_trials.append(trial)
        self.num_failed_trials = num_failed_iterations

        # For failed trails, redraw new estimates and solve new models for each failed trial
        if redraw_failed_trials:
            # Define distribution of beta parameters to draw new values from (sets new seed to draw from)
            new_dist = multivariate_normal(self.main_coeffs, self.main_covar_mat, seed=(1 + self._seed),
                                           allow_singular=self._allow_singular)
            # Create lists/variables to track successes/failures
            new_draws = list()
            new_successes = 0
            new_failures = 0
            tries = 0
            max_retries = self.trials
            while new_successes < self.num_failed_trials and tries <= max_retries:
                # Tick up try counter and created column name for random draw

                try_num = self.trials+tries
                # Create and format coefficient draw
                new_draw = pd.DataFrame(new_dist.rvs())
                new_draw.index = self.main_coeffs.index
                new_draw.rename(columns={0: str(try_num)}, inplace=True)
                new_draws.append(new_draw.copy())
                new_draw.reset_index(inplace = True)
                new_params = CostCoeffs(new_draw, coeff_col=str(try_num), identifier_col=self._cost_coeffs._identifier_col)

                if not quiet:
                    print("\n* Simulating failed trial replacement {} *".format(try_num))
                try:
                    new_trial_model = self._run_single_trial(param_values=new_params, experiment_data=experiment_data,
                                                             quiet = quiet, omr_rescale = omr_rescale,
                                                             imr_rescale = imr_rescale, mr_method = mr_method,
                                                             mr_max_iter = mr_max_iter, mr_tolerance = mr_tolerance,
                                                             ge_method = ge_method, ge_tolerance = ge_tolerance,
                                                             ge_max_iter = ge_max_iter)
                    models.append(new_trial_model)
                    new_successes+=1
                except:
                    if not quiet:
                        print("Failed to solve redrawn replacement model.\n")
                    new_failures += 1
                    failed_trials.append(try_num)

                tries += 1
            if len(new_draws)>0:
                self.replacement_sample = pd.concat(new_draws, axis=1)
            else:
                self.replacement_sample = new_draws



        # Get results labels from one of the OneSectorGE gegravity
        self.labels = models[0].labels
        if all_results:
            self.trial_models = models
        self.failed_trials = failed_trials
        self.all_country_results, self.country_results = self._compile_results(models, 'country_results', result_stats, all_results)
        self.all_country_mr_terms, self.country_mr_terms = self._compile_results(models, 'mr_terms', result_stats, all_results)
        self.all_outputs_expenditures, self.outputs_expenditures = self._compile_results(models, 'outputs_expenditures', result_stats, all_results)
        self.all_factory_gate_prices, self.factory_gate_prices = self._compile_results(models, 'factory_gate_prices', result_stats, all_results)
        self.all_aggregate_trade_results, self.aggregate_trade_results = self._compile_results(models, 'aggregate_trade_results', result_stats, all_results)
        self.all_bilateral_trade_results, self.bilateral_trade_results = self._compile_results(models, 'bilateral_trade', result_stats, all_results)
        self.all_bilateral_costs, self.bilateral_costs = self._compile_results(models, 'bilateral_costs', result_stats, all_results)
        self._compile_diagnostics(models)
        # ToDo: build some method for confidence intervals from Anderson Yotov (2016)



    def _run_single_trial(self, param_values: CostCoeffs, experiment_data, quiet, omr_rescale, imr_rescale, mr_method,
                          mr_max_iter, mr_tolerance, ge_method, ge_tolerance, ge_max_iter):
        '''
        Create and solve a single OneSectorGE model using given inputs. Inputs the same as OneSectorGE.
        Args:
            See arguments for OneSectorGE.

        Returns:
            Solved OneSectorGE model instance
        '''
        trial_model = OneSectorGE(self._baseline,
                                  year=self._year,
                                  reference_importer=self._reference_importer,
                                  expend_var_name=self.meta_data.expend_var_name,
                                  output_var_name=self.meta_data.output_var_name,
                                  sigma=self.sigma,
                                  cost_variables=self._cost_variables,
                                  cost_coeff_values=param_values,
                                  quiet=quiet)
        trial_model.build_baseline(omr_rescale=omr_rescale,
                                           imr_rescale=imr_rescale,
                                           mr_method=mr_method,
                                           mr_max_iter=mr_max_iter,
                                           mr_tolerance=mr_tolerance)
        trial_model.define_experiment(experiment_data)
        trial_model.simulate(ge_method=ge_method,
                                     ge_tolerance=ge_tolerance,
                                     ge_max_iter=ge_max_iter)
        return trial_model



    def _compile_results(self, models, result_type, result_stats, all_results):
        '''
        Compile results across all trials.
        :param models: (List[OneSectorGE]) A list of solved OneSectorGE gegravity.
        :param result_type: (str) Type of results to compile. Function works with:
            'country_results' - compiles results from OneSectorGE.country_mr_results
            'mr_terms' - compiles results from OneSectorGE.country_mr_terms
            'output_expenditures' - compiles results from OneSectorGE.output_expenditures
            'factory_gate_prices - compiles results from OneSectorGE.factory_gate_prices
            'aggregate_trade_results' - compiles results from OneSectorGE.aggregate_trade_results
            'bilateral_trade' - complies results from OneSectorGE.bilateral_trade_results
            'bilateral_costs' - compiles results from OneSectorGE.bilateral_costs
        :return:(pd.DataFrame, pd.DataFrame) Two dataframes. The first contains all results for each trial, with
            multiindex columns labeled (trial, result type). The second provides summary stats from all trials (mean,
            std, stderr)
        '''
        # Combine all results
        combined_results_list = list()
        for num, model in enumerate(models):
            if result_type == 'country_results':
                model_results = model.country_results.copy()
            if result_type == 'mr_terms':
                model_results = model.country_mr_terms
            if result_type == 'outputs_expenditures':
                model_results = model.outputs_expenditures
            if result_type == 'factory_gate_prices':
                model_results = model.factory_gate_prices
            if result_type == 'aggregate_trade_results':
                model_results = model.aggregate_trade_results
            if result_type == 'bilateral_trade':
                model_results = model.bilateral_trade_results
            if result_type == 'bilateral_costs':
                model_results = model.bilateral_costs

            # Label columns via multiindex with (trial #, result label)
            multi_columns = [(num,col) for col in model_results.columns]
            model_results.columns = pd.MultiIndex.from_tuples(multi_columns)
            combined_results_list.append(model_results)
        combined_results = pd.concat(combined_results_list, axis = 1)

        # Reshape trials to long format
        summary_results = combined_results.copy()
        if result_type in ['bilateral_trade','bilateral_costs']:
            # Bilateral trade has a two-part index (exporter and importer) and must be treated separately.
            summary_results = summary_results.stack(0, future_stack=True).reset_index(level=2)
            summary_results.rename(columns={'level_2': 'trial'}, inplace=True)
        else:
            summary_results = summary_results.stack(0, future_stack=True).reset_index(level=1)
            summary_results.rename(columns={'level_1': 'trial'}, inplace=True)

        # Compute mean and std across trials
        agg_dict = dict()
        var_list = list(summary_results.columns)
        var_list.remove('trial')
        for var in var_list:
            agg_dict[var] = result_stats
        if result_type in ['bilateral_trade','bilateral_costs']:
            summary_results = summary_results.groupby(level=[0, 1]).agg(agg_dict)
        else:
            summary_results = summary_results.groupby(level=0).agg(agg_dict)

        # Compute standard error for each result type
        # for col in summary_results.columns:
        #     if col[1] == 'std':
        #         summary_results[(col[0], 'stderr')] = summary_results[col] / (self.trials ** 0.5)
        if result_type in ['bilateral_trade','bilateral_costs']:
            summary_results = summary_results.stack(level=1, future_stack=True).reset_index(level=2)
            summary_results.rename(columns = {'level_2':'statistic'},inplace = True)
        else:
            summary_results = summary_results.stack(level=1, future_stack=True).reset_index(level=1)
            summary_results.rename(columns = {'level_1':'statistic'},inplace = True)
        if all_results:
            return combined_results, summary_results
        else:
            return None, summary_results

    def _compile_diagnostics(self, models):
        '''
        Compiles the diagnostics from each trial into a single dictionary, indexed by the trial number.
        Args:
            models: the list of OneSectorGE gegravity associated with each trial

        Returns: None, Populates the attribute self.solver_daignostics

        '''
        combined_diagnostics = dict()
        for trial in range(len(models)):
            combined_diagnostics[trial] = models[trial].solver_diagnostics
        self.solver_diagnostics = combined_diagnostics

    def export_results(self, directory:str = None, name:str = '',
                       country_names:DataFrame = None, country:bool = True, bilateral:bool = True,
                       diagnostics:bool = True):
        '''
        Export results to csv files. Three files are stored containing (1) country-level results, (2) bilateral results,
        and (3) solver diagnostics.
        Args:
            directory (str): (optional) Directory in which to write results files. If no directory is supplied,
                three compiled dataframes are returned as a tuple in the order (Country-level results, bilateral
                results, solver diagnostics).
            name (str): (optional) Name of the simulation to prefix to the result file names.
            include_levels (bool): (optional) If True, includes additional columns reflecting the simulated changes in
                levels based on observed trade flows (rather than modeled trade flows). Values are those from the
                method calculate_levels.
            country_names (pandas.DataFrame): (optional) Adds alternative identifiers such as names to the returned
                results tables. The supplied DataFrame should include exactly two columns. The first column must be
                the country identifiers used in the model. The second column must be the alternative identifiers to
                add.
            country (boolean): (optional) If True, export country-level results to csv. If False, skip these results.
                Default is True.
            bilateral (boolean): (optional) If True, export bilateral results to csv. If False, skip these results.
                Default is True.
            diagnostics (boolean): (optional) If True, export diagnostic information to csv. If False, skip these
                results. Default is True.

        Returns:
            None or Tuple[DataFrame, DataFrame, DataFrame]: If a directory argument is supplied, the method returns
                nothing and writes three .csv files instead. If no directory is supplied, it returns a tuple of
                DataFrames.

        Examples:
            Export all three results files with the file prefix "example_monte_carlo_results"
            >>> mc_model.export_results(directory="C:\simulation_results\", name = "example_monte_carlo_results")

            Export only the country level results (i.e. do not export the bilateral and diagnostic results)
            >>> mc_model.export_results(directory="C:\simulation_results\", name = "example_monte_carlo_results",
            ...                         bilateral = False, diagnostics = False)

        '''

        importer_col = self.meta_data.imp_var_name
        exporter_col = self.meta_data.exp_var_name

        country_result_set = [self.country_results, self.factory_gate_prices, self.aggregate_trade_results,
                              self.outputs_expenditures, self.country_mr_terms]
        country_results = pd.concat(country_result_set, axis = 1)
        # Order and select columns for inclusion, drop duplicates.
        country_results_cols = country_results.columns
        labs = self.labels
        # Country results to include
        results_cols = ['statistic'] + self.labels.country_level_labels
        included_columns = [col for col in results_cols if col in country_results_cols]
        country_results = country_results[included_columns]
        country_results = country_results.loc[:, ~country_results.columns.duplicated()]

        bilateral_results = self.bilateral_trade_results.reset_index()

        if country_names is not None:
            if country_names.shape[1]!=2:
                raise ValueError("country_names should have exactly 2 columns, not {}".format(country_names.shape[1]))
            code_col = country_names.columns[0]
            name_col = country_names.columns[1]
            country_names.set_index(code_col, inplace = True, drop = True)
            country_results = country_names.merge(country_results, how = 'right', left_index = True, right_index=True)

            # Add names to bilateral data
            for side in [exporter_col, importer_col]:
                side_names = country_names.copy()
                side_names.reset_index(inplace = True)
                side_names.rename(columns = {code_col:side, name_col:"{} {}".format(side,name_col)}, inplace = True)
                bilateral_results = bilateral_results.merge(side_names, how = 'left', on = side)

        # Create Dataframe with Diagnostic results
        column_list = list()
        diagnostic_info = self.solver_diagnostics
        for trial_num, trial in diagnostic_info.items():
            for results_type, results in trial.items():
                for key, value in results.items():
                    # Single Entry fields must be converted to list before creating DataFrame
                    if key in ['success', 'status', 'nfev', 'message']:
                        frame = pd.DataFrame({("trial_{}".format(trial_num), results_type, key): [value]})
                        column_list.append(frame)
                    # Vector-like fields Can be used as is. Several available fields are not included: 'fjac','r', and 'qtf'
                    elif key in ['x', 'fun']:
                        frame = pd.DataFrame({("trial_{}".format(trial_num), results_type, key): value})
                        column_list.append(frame)
        diag_frame = pd.concat(column_list, axis=1)
        diag_frame = diag_frame.fillna('')

        if directory is not None:
            if country:
                country_results.to_csv("{}/{}_country_results.csv".format(directory, name))
            if bilateral:
                bilateral_results.to_csv("{}/{}_bilateral_results.csv".format(directory, name), index=False)
            if diagnostics:
                diag_frame.to_csv("{}/{}_solver_diagnostics.csv".format(directory, name), index=False)
        else:
            return country_results, bilateral_results, diag_frame

    def check_omr_rescale(self,
                          omr_rescale_range: int = 10,
                          trials: List[int] = None,
                          mr_method: str = 'hybr',
                          mr_max_iter: int = 1400,
                          mr_tolerance: float = 1e-8,
                          countries: List[str] = []):
        '''
        Analyze different Outward Multilarteral Resistance (OMR) term rescale factors. This method can help identify
            feasible values to use for the omr_rescale argument in OneSectorGE.build_baseline().
        Args:
            omr_rescale_range (int): This parameter allows you to set the scope of the values tested. For example,
                if omr_rescale_range = 3, the model will check for convergence using omr_rescale values from the set
                [10^-3, 10^-2, 10^-1, 10^0, ..., 10^3]. The default value is 10.
            trials (List[int]): (optional) A list of trials by number (0 to (N-1)) corresponding to columns of the
                derived coefficient sample (MonteCarloGE.coeff_sample) for which to analyze OMR terms. If no input is
                provided, all trials are considered.
            mr_method (str): This parameter determines the type of non-linear solver used for solving the baseline and
                experiment MR terms. See the documentation for scipy.optimize.root for alternative methods. the default
                value is 'hybr'.
            mr_max_iter (int): (optional) This parameter sets the maximum limit on the number of iterations conducted
                by the solver used to solve for MR terms. The default value is 1400.
            mr_tolerance (float): (optional) This parameter sets the convergence tolerance level for the solver used to
                solve for MR terms. The default value is 1e-8.
            countries (List[str]):  A list of countries for which to return the estimated OMR values for user
                evaluation.
        Returns:
            pandas.DataFrame: A dataframe of diagnostic information for users to compare different omr_rescale factors.
                The returned dataframe contains the following columns:\n
                'trial': The trial number corresponding to the sample parameters for which the test was run.
                'omr_rescale': The rescale factor used\n
                'omr_rescale (alt format)': A string representation of the rescale factor as an exponential expression.\n
                'solved': If True, the MR model solved successfully. If False, it did not solve.\n
                'message': Description of the outcome of the solver.\n
                '..._func_value': Three columns reflecting the maximum, mean, and median values from the solver
                    objective functions. Function values closer to zero imply a better solution to system of equations.
                'reference_importer_omr': The solution value for the reference importer's OMR value.\n
                '..._omr': The solution value(s) for the user supplied countries.

        Examples:
            Check for potential OMR rescale factors for all trials within a range of 10^-10 to 10^10.
            >>> omr_all_trials = mc_model.check_omr_rescale(omr_rescale_range=10)

            Check for potential OMR rescale factors for trials 1, 2, 8, and 9 within a range of 10^-3 to 10^3.
            >>> omr_trial_subset = mc_model.check_omr_rescale(omr_rescale_range=3, trials = [1, 2, 8, 9])
        '''

        # Check trials argument if provided
        if trials is None:
            trials_list = range(self.trials)
        else:
            if not isinstance(trials, list):
                raise ValueError("trials argument must be a list of integer values")
            else:
                trials_list = trials

        all_outputs = list()
        for trial in trials_list:
            print("\n* Checking trial {} *".format(trial))
            trial_params = self.coeff_sample[[self._cost_coeffs._identifier_col ,trial]]
            trial_params_obj = CostCoeffs(trial_params, identifier_col=self._cost_coeffs._identifier_col,
                                          coeff_col=trial)

            omr_test_gemodel = OneSectorGE(self._baseline, year=self._year, reference_importer=self._reference_importer,
                                    output_var_name=self.meta_data.output_var_name,
                                    expend_var_name=self.meta_data.expend_var_name, sigma=self.sigma,
                                    cost_variables=self._cost_variables,
                                    cost_coeff_values=trial_params_obj)
            omr_checks = omr_test_gemodel.check_omr_rescale(omr_rescale_range, mr_method = mr_method,
                                                            mr_max_iter = mr_max_iter, mr_tolerance = mr_tolerance,
                                                            countries = countries)
            # Add column with trial number to the *beginning* of dataframe
            omr_checks.insert(loc=0, column = 'trail', value = trial)
            all_outputs.append(omr_checks)
        omr_outcomes = pd.concat(all_outputs, axis = 0)
        return omr_outcomes

Methods

def check_omr_rescale(self, omr_rescale_range: int = 10, trials: List[int] = None, mr_method: str = 'hybr', mr_max_iter: int = 1400, mr_tolerance: float = 1e-08, countries: List[str] = [])

Analyze different Outward Multilarteral Resistance (OMR) term rescale factors. This method can help identify feasible values to use for the omr_rescale argument in OneSectorGE.build_baseline().

Args

omr_rescale_range : int
This parameter allows you to set the scope of the values tested. For example, if omr_rescale_range = 3, the model will check for convergence using omr_rescale values from the set [10^-3, 10^-2, 10^-1, 10^0, …, 10^3]. The default value is 10.
trials : List[int]
(optional) A list of trials by number (0 to (N-1)) corresponding to columns of the derived coefficient sample (MonteCarloGE.coeff_sample) for which to analyze OMR terms. If no input is provided, all trials are considered.
mr_method : str
This parameter determines the type of non-linear solver used for solving the baseline and experiment MR terms. See the documentation for scipy.optimize.root for alternative methods. the default value is 'hybr'.
mr_max_iter : int
(optional) This parameter sets the maximum limit on the number of iterations conducted by the solver used to solve for MR terms. The default value is 1400.
mr_tolerance : float
(optional) This parameter sets the convergence tolerance level for the solver used to solve for MR terms. The default value is 1e-8.
countries : List[str]
A list of countries for which to return the estimated OMR values for user evaluation.

Returns

pandas.DataFrame

A dataframe of diagnostic information for users to compare different omr_rescale factors. The returned dataframe contains the following columns:

'trial': The trial number corresponding to the sample parameters for which the test was run. 'omr_rescale': The rescale factor used

'omr_rescale (alt format)': A string representation of the rescale factor as an exponential expression.

'solved': If True, the MR model solved successfully. If False, it did not solve.

'message': Description of the outcome of the solver.

'…_func_value': Three columns reflecting the maximum, mean, and median values from the solver objective functions. Function values closer to zero imply a better solution to system of equations. 'reference_importer_omr': The solution value for the reference importer's OMR value.

'…_omr': The solution value(s) for the user supplied countries.

Examples

Check for potential OMR rescale factors for all trials within a range of 10^-10 to 10^10.

>>> omr_all_trials = mc_model.check_omr_rescale(omr_rescale_range=10)

Check for potential OMR rescale factors for trials 1, 2, 8, and 9 within a range of 10^-3 to 10^3.

>>> omr_trial_subset = mc_model.check_omr_rescale(omr_rescale_range=3, trials = [1, 2, 8, 9])
Expand source code
def check_omr_rescale(self,
                      omr_rescale_range: int = 10,
                      trials: List[int] = None,
                      mr_method: str = 'hybr',
                      mr_max_iter: int = 1400,
                      mr_tolerance: float = 1e-8,
                      countries: List[str] = []):
    '''
    Analyze different Outward Multilarteral Resistance (OMR) term rescale factors. This method can help identify
        feasible values to use for the omr_rescale argument in OneSectorGE.build_baseline().
    Args:
        omr_rescale_range (int): This parameter allows you to set the scope of the values tested. For example,
            if omr_rescale_range = 3, the model will check for convergence using omr_rescale values from the set
            [10^-3, 10^-2, 10^-1, 10^0, ..., 10^3]. The default value is 10.
        trials (List[int]): (optional) A list of trials by number (0 to (N-1)) corresponding to columns of the
            derived coefficient sample (MonteCarloGE.coeff_sample) for which to analyze OMR terms. If no input is
            provided, all trials are considered.
        mr_method (str): This parameter determines the type of non-linear solver used for solving the baseline and
            experiment MR terms. See the documentation for scipy.optimize.root for alternative methods. the default
            value is 'hybr'.
        mr_max_iter (int): (optional) This parameter sets the maximum limit on the number of iterations conducted
            by the solver used to solve for MR terms. The default value is 1400.
        mr_tolerance (float): (optional) This parameter sets the convergence tolerance level for the solver used to
            solve for MR terms. The default value is 1e-8.
        countries (List[str]):  A list of countries for which to return the estimated OMR values for user
            evaluation.
    Returns:
        pandas.DataFrame: A dataframe of diagnostic information for users to compare different omr_rescale factors.
            The returned dataframe contains the following columns:\n
            'trial': The trial number corresponding to the sample parameters for which the test was run.
            'omr_rescale': The rescale factor used\n
            'omr_rescale (alt format)': A string representation of the rescale factor as an exponential expression.\n
            'solved': If True, the MR model solved successfully. If False, it did not solve.\n
            'message': Description of the outcome of the solver.\n
            '..._func_value': Three columns reflecting the maximum, mean, and median values from the solver
                objective functions. Function values closer to zero imply a better solution to system of equations.
            'reference_importer_omr': The solution value for the reference importer's OMR value.\n
            '..._omr': The solution value(s) for the user supplied countries.

    Examples:
        Check for potential OMR rescale factors for all trials within a range of 10^-10 to 10^10.
        >>> omr_all_trials = mc_model.check_omr_rescale(omr_rescale_range=10)

        Check for potential OMR rescale factors for trials 1, 2, 8, and 9 within a range of 10^-3 to 10^3.
        >>> omr_trial_subset = mc_model.check_omr_rescale(omr_rescale_range=3, trials = [1, 2, 8, 9])
    '''

    # Check trials argument if provided
    if trials is None:
        trials_list = range(self.trials)
    else:
        if not isinstance(trials, list):
            raise ValueError("trials argument must be a list of integer values")
        else:
            trials_list = trials

    all_outputs = list()
    for trial in trials_list:
        print("\n* Checking trial {} *".format(trial))
        trial_params = self.coeff_sample[[self._cost_coeffs._identifier_col ,trial]]
        trial_params_obj = CostCoeffs(trial_params, identifier_col=self._cost_coeffs._identifier_col,
                                      coeff_col=trial)

        omr_test_gemodel = OneSectorGE(self._baseline, year=self._year, reference_importer=self._reference_importer,
                                output_var_name=self.meta_data.output_var_name,
                                expend_var_name=self.meta_data.expend_var_name, sigma=self.sigma,
                                cost_variables=self._cost_variables,
                                cost_coeff_values=trial_params_obj)
        omr_checks = omr_test_gemodel.check_omr_rescale(omr_rescale_range, mr_method = mr_method,
                                                        mr_max_iter = mr_max_iter, mr_tolerance = mr_tolerance,
                                                        countries = countries)
        # Add column with trial number to the *beginning* of dataframe
        omr_checks.insert(loc=0, column = 'trail', value = trial)
        all_outputs.append(omr_checks)
    omr_outcomes = pd.concat(all_outputs, axis = 0)
    return omr_outcomes
def export_results(self, directory: str = None, name: str = '', country_names: pandas.core.frame.DataFrame = None, country: bool = True, bilateral: bool = True, diagnostics: bool = True)

Export results to csv files. Three files are stored containing (1) country-level results, (2) bilateral results, and (3) solver diagnostics.

Args

directory : str
(optional) Directory in which to write results files. If no directory is supplied, three compiled dataframes are returned as a tuple in the order (Country-level results, bilateral results, solver diagnostics).
name : str
(optional) Name of the simulation to prefix to the result file names.
include_levels : bool
(optional) If True, includes additional columns reflecting the simulated changes in levels based on observed trade flows (rather than modeled trade flows). Values are those from the method calculate_levels.
country_names : pandas.DataFrame
(optional) Adds alternative identifiers such as names to the returned results tables. The supplied DataFrame should include exactly two columns. The first column must be the country identifiers used in the model. The second column must be the alternative identifiers to add.
country : boolean
(optional) If True, export country-level results to csv. If False, skip these results. Default is True.
bilateral : boolean
(optional) If True, export bilateral results to csv. If False, skip these results. Default is True.
diagnostics : boolean
(optional) If True, export diagnostic information to csv. If False, skip these results. Default is True.

Returns

None or Tuple[DataFrame, DataFrame, DataFrame]
If a directory argument is supplied, the method returns nothing and writes three .csv files instead. If no directory is supplied, it returns a tuple of DataFrames.

Examples

Export all three results files with the file prefix "example_monte_carlo_results"

>>> mc_model.export_results(directory="C:\simulation_results", name = "example_monte_carlo_results")

Export only the country level results (i.e. do not export the bilateral and diagnostic results)

>>> mc_model.export_results(directory="C:\simulation_results", name = "example_monte_carlo_results",
...                         bilateral = False, diagnostics = False)
Expand source code
def export_results(self, directory:str = None, name:str = '',
                   country_names:DataFrame = None, country:bool = True, bilateral:bool = True,
                   diagnostics:bool = True):
    '''
    Export results to csv files. Three files are stored containing (1) country-level results, (2) bilateral results,
    and (3) solver diagnostics.
    Args:
        directory (str): (optional) Directory in which to write results files. If no directory is supplied,
            three compiled dataframes are returned as a tuple in the order (Country-level results, bilateral
            results, solver diagnostics).
        name (str): (optional) Name of the simulation to prefix to the result file names.
        include_levels (bool): (optional) If True, includes additional columns reflecting the simulated changes in
            levels based on observed trade flows (rather than modeled trade flows). Values are those from the
            method calculate_levels.
        country_names (pandas.DataFrame): (optional) Adds alternative identifiers such as names to the returned
            results tables. The supplied DataFrame should include exactly two columns. The first column must be
            the country identifiers used in the model. The second column must be the alternative identifiers to
            add.
        country (boolean): (optional) If True, export country-level results to csv. If False, skip these results.
            Default is True.
        bilateral (boolean): (optional) If True, export bilateral results to csv. If False, skip these results.
            Default is True.
        diagnostics (boolean): (optional) If True, export diagnostic information to csv. If False, skip these
            results. Default is True.

    Returns:
        None or Tuple[DataFrame, DataFrame, DataFrame]: If a directory argument is supplied, the method returns
            nothing and writes three .csv files instead. If no directory is supplied, it returns a tuple of
            DataFrames.

    Examples:
        Export all three results files with the file prefix "example_monte_carlo_results"
        >>> mc_model.export_results(directory="C:\simulation_results\", name = "example_monte_carlo_results")

        Export only the country level results (i.e. do not export the bilateral and diagnostic results)
        >>> mc_model.export_results(directory="C:\simulation_results\", name = "example_monte_carlo_results",
        ...                         bilateral = False, diagnostics = False)

    '''

    importer_col = self.meta_data.imp_var_name
    exporter_col = self.meta_data.exp_var_name

    country_result_set = [self.country_results, self.factory_gate_prices, self.aggregate_trade_results,
                          self.outputs_expenditures, self.country_mr_terms]
    country_results = pd.concat(country_result_set, axis = 1)
    # Order and select columns for inclusion, drop duplicates.
    country_results_cols = country_results.columns
    labs = self.labels
    # Country results to include
    results_cols = ['statistic'] + self.labels.country_level_labels
    included_columns = [col for col in results_cols if col in country_results_cols]
    country_results = country_results[included_columns]
    country_results = country_results.loc[:, ~country_results.columns.duplicated()]

    bilateral_results = self.bilateral_trade_results.reset_index()

    if country_names is not None:
        if country_names.shape[1]!=2:
            raise ValueError("country_names should have exactly 2 columns, not {}".format(country_names.shape[1]))
        code_col = country_names.columns[0]
        name_col = country_names.columns[1]
        country_names.set_index(code_col, inplace = True, drop = True)
        country_results = country_names.merge(country_results, how = 'right', left_index = True, right_index=True)

        # Add names to bilateral data
        for side in [exporter_col, importer_col]:
            side_names = country_names.copy()
            side_names.reset_index(inplace = True)
            side_names.rename(columns = {code_col:side, name_col:"{} {}".format(side,name_col)}, inplace = True)
            bilateral_results = bilateral_results.merge(side_names, how = 'left', on = side)

    # Create Dataframe with Diagnostic results
    column_list = list()
    diagnostic_info = self.solver_diagnostics
    for trial_num, trial in diagnostic_info.items():
        for results_type, results in trial.items():
            for key, value in results.items():
                # Single Entry fields must be converted to list before creating DataFrame
                if key in ['success', 'status', 'nfev', 'message']:
                    frame = pd.DataFrame({("trial_{}".format(trial_num), results_type, key): [value]})
                    column_list.append(frame)
                # Vector-like fields Can be used as is. Several available fields are not included: 'fjac','r', and 'qtf'
                elif key in ['x', 'fun']:
                    frame = pd.DataFrame({("trial_{}".format(trial_num), results_type, key): value})
                    column_list.append(frame)
    diag_frame = pd.concat(column_list, axis=1)
    diag_frame = diag_frame.fillna('')

    if directory is not None:
        if country:
            country_results.to_csv("{}/{}_country_results.csv".format(directory, name))
        if bilateral:
            bilateral_results.to_csv("{}/{}_bilateral_results.csv".format(directory, name), index=False)
        if diagnostics:
            diag_frame.to_csv("{}/{}_solver_diagnostics.csv".format(directory, name), index=False)
    else:
        return country_results, bilateral_results, diag_frame
def run_trials(self, experiment_data: pandas.core.frame.DataFrame, omr_rescale: float = 1, imr_rescale: float = 1, mr_method: str = 'hybr', mr_max_iter: int = 1400, mr_tolerance: float = 1e-08, ge_method: str = 'hybr', ge_tolerance: float = 1e-08, ge_max_iter: int = 1000, quiet: bool = False, result_stats: list = ['mean', 'std', 'sem'], all_results: bool = False, redraw_failed_trials: bool = False, trial_omr_rescale: dict = None)

Conduct Monte Carlo Simulation of OneSectorGE gravity model.

Args

experiment_data : pandas.DataFrame
A dataframe containing the counterfactual trade-cost data to use for the experiment. The best approach for creating this data is to copy the baseline data (MonteCarloGE.baseline_data.copy()) and modify columns/rows to reflect desired counterfactual experiment.
omr_rescale : int
(optional) This value rescales the OMR values to assist in convergence. Often, OMR values are orders of magnitude different than IMR values, which can make convergence difficult. Scaling by a different order of magnitude can help. Values should be of the form 10^n for positive or negative integer n. By default, this value is 1 (10^0). However, users should be careful with this choice as results, even when convergent, may not be fully robust to any selection. The method MonteCarlo.check_omr_rescale() can help identify and compare feasible values for a given model.
imr_rescale : int
(optional) This value rescales the IMR values to potentially aid in conversion. However, because the IMR for the reference importer is normalized to one, it is unlikely that there will be because because changing the default value, which is 1.
mr_method : str
This parameter determines the type of non-linear solver used for solving the baseline and experiment MR terms. See the documentation for scipy.optimize.root for alternative methods. the default value is 'hybr'. (See also OneSectorGE.build_baseline())
mr_max_iter : int
This parameter sets the maximum limit on the number of iterations conducted by the solver used to solve for MR terms. The default value is 1400. (See also OneSectorGE.build_baseline())
mr_tolerance : float
This parameter sets the convergence tolerance level for the solver used to solve for MR terms. The default value is 1e-8. (See also OneSectorGE.build_baseline())
ge_method : str
The solver method to use for the full GE non-linear solver. See scipy.root() documentation for option. Default is 'hybr'.
ge_tolerance : float
The tolerance for determining if the GE system of equations is solved. Default is 1e-8.
ge_max_iter : int
The maximum number of iterations allowed for the full GE nonlinear solver. Default is 1000.
quiet : bool
If True, suppress console printouts detailing the solver success/failures of each trial. Default is False.
result_stats : list
A list of functions to compute in order to summarize the results across trials. The default is ['mean', 'std', 'sem'], which computes the mean, standard deviation, and standard mean error of the results, respectively. The model should accept any function that can be used with the pandas.DataFrame.agg() function.
all_results : bool
If true, MonteCarloGE attributes containing individual results for all trials are populated. Default is False to reduce memory use.
redraw_failed_trials : bool
If True, draw additional trials to replace failed trials. Additional trials will be run until enough have succeeded to match the number that originally failed (up to a maximum of the number of trials originally specified). This helps insure that the model is solved for as many trials as were specified. E.g. If 10 trials are specified and 2 fail to solve, additional trials will be attempted with additional random draws until 2 additional models have been solved successfully, resulting in 10 successful trials, or 10 additional trails are run without 2 successes.
trial_omr_rescale : dict
(option) An option to provide alternative OMR rescale factors for specific trials. The argument should be a dictionary keyed with the trial number (0 to N-1) and value equal to the desired rescale factor (10^n) with n a positive or negative integer (e.g. {0:0.001, 3:10} Any trial not specified here will be run using the value specified by the omr_rescale argument (default of 1).

Returns

None
No return but populates many results attributes of the MonteCarloGE model.

Examples

Run simulations for mc_model, which is an instance of MonteCarloGE (see respective documentation). 'counterfactual_dataframe' is a counterfactual version of the baseline data to use for the experiment.

>>> monte_model = ge.MonteCarloGE(...)
>>> monte_model.run_trials(experiment_data = counterfactual_dataframe,
...                        omr_rescale = 1)
* Simulating trial 0 *
Solving for baseline MRs...
The solution converged.
Solving for conditional MRs...
The solution converged.
Solving full GE model...
The solution converged.
* Simulating trial 1 *
Solving for baseline MRs...
The solution converged.
Solving for conditional MRs...
The solution converged.
Solving full GE model...
The solution converged.
(truncated...)

Return all info about each trial, not just summary info across trials

>>> monte_model.run_trials(experiment_data = counterfactual_dataframe,
...                        omr_rescale = 1,
...                        all_results=True)

Specify alternative OMR rescale factors for trials 2 and 3, while using the value of 1 for all other trials.

>>> monte_model.run_trials(experiment_data = counterfactual_dataframe,
...                        omr_rescale = 1,
...                        trail_rescale_factors = {2:0.001, 3:100})

Draw additional random cost values to replace trials that failed to solve

monte_model.run_trials(experiment_data = counterfactual_dataframe, … omr_rescale = 1, … redraw_failed_trials=True)

Expand source code
def run_trials(self,
               experiment_data:pd.DataFrame,
               omr_rescale: float = 1,
               imr_rescale: float = 1,
               mr_method: str = 'hybr',
               mr_max_iter: int = 1400,
               mr_tolerance: float = 1e-8,
               ge_method:str = 'hybr',
               ge_tolerance: float = 1e-8,
               ge_max_iter: int = 1000,
               quiet: bool = False,
               result_stats:list = ['mean', 'std', 'sem'],
               all_results:bool = False,
               redraw_failed_trials: bool = False,
               trial_omr_rescale: dict = None):
    '''
    Conduct Monte Carlo Simulation of OneSectorGE gravity model.
    Args:
        experiment_data (pandas.DataFrame): A dataframe containing the counterfactual trade-cost data to use for the
            experiment. The best approach for creating this data is to copy the baseline data
            (MonteCarloGE.baseline_data.copy()) and modify columns/rows to reflect desired counterfactual experiment.
        omr_rescale (int): (optional) This value rescales the OMR values to assist in convergence. Often, OMR values
            are orders of magnitude different than IMR values, which can make convergence difficult. Scaling by a
            different order of magnitude can help. Values should be of the form 10^n for positive or negative
            integer n. By default, this value is 1 (10^0). However, users should be careful with this choice as
            results, even when convergent, may not be fully robust to any selection. The method
            MonteCarlo.check_omr_rescale() can help identify and compare feasible values for a given model.
        imr_rescale (int): (optional) This value rescales the IMR values to potentially aid in conversion. However,
            because the IMR for the reference importer is normalized to one, it is unlikely that there will be because
            because changing the default value, which is 1.
        mr_method (str): This parameter determines the type of non-linear solver used for solving the baseline and
            experiment MR terms. See the documentation for scipy.optimize.root for alternative methods. the default
            value is 'hybr'. (See also OneSectorGE.build_baseline())
        mr_max_iter (int): This parameter sets the maximum limit on the number of iterations conducted
            by the solver used to solve for MR terms. The default value is 1400.
            (See also OneSectorGE.build_baseline())
        mr_tolerance (float): This parameter sets the convergence tolerance level for the solver used to
            solve for MR terms. The default value is 1e-8. (See also OneSectorGE.build_baseline())
        ge_method (str): The solver method to use for the full GE non-linear solver. See scipy.root()
            documentation for option. Default is 'hybr'.
        ge_tolerance (float): The tolerance for determining if the GE system of equations is solved.
            Default is 1e-8.
        ge_max_iter (int): The maximum number of iterations allowed for the full GE nonlinear solver.
            Default is 1000.
        quiet (bool): If True, suppress console printouts detailing the solver success/failures of each trial.
            Default is False.
        result_stats (list): A list of functions to compute in order to summarize the results across trials. The
            default is ['mean', 'std', 'sem'], which computes the mean, standard deviation, and standard mean error
            of the results, respectively. The model should accept any function that can be used with the
            pandas.DataFrame.agg() function.
        all_results (bool): If true, MonteCarloGE attributes containing individual results for all trials are
            populated. Default is False to reduce memory use.
        redraw_failed_trials (bool): If True, draw additional trials to replace failed trials. Additional trials
            will be run until enough have succeeded to match the number that originally failed (up to a maximum of
            the number of trials originally specified). This helps insure that the model is solved for as many
            trials as were specified. E.g. If 10 trials are specified and 2 fail to solve, additional trials will be
            attempted with additional random draws until 2 additional models have been solved successfully, resulting
            in 10 successful trials, or 10 additional trails are run without 2 successes.
        trial_omr_rescale (dict): (option) An option to provide alternative OMR rescale factors for specific trials.
            The argument should be a dictionary keyed with the trial number (0 to N-1) and value equal to the
            desired rescale factor (10^n) with n a positive or negative integer (e.g. {0:0.001, 3:10} Any trial not
            specified here will be run using the value specified by the omr_rescale argument (default of 1).
    Returns:
        None: No return but populates many results attributes of the MonteCarloGE model.

    Examples:
         Run simulations for mc_model, which is an instance of MonteCarloGE (see respective documentation).
         'counterfactual_dataframe' is a counterfactual version of the baseline data to use for the experiment.
        >>> monte_model = ge.MonteCarloGE(...)
        >>> monte_model.run_trials(experiment_data = counterfactual_dataframe,
        ...                        omr_rescale = 1)
        * Simulating trial 0 *
        Solving for baseline MRs...
        The solution converged.
        Solving for conditional MRs...
        The solution converged.
        Solving full GE model...
        The solution converged.
        * Simulating trial 1 *
        Solving for baseline MRs...
        The solution converged.
        Solving for conditional MRs...
        The solution converged.
        Solving full GE model...
        The solution converged.
        (truncated...)

        Return all info about each trial, not just summary info across trials
        >>> monte_model.run_trials(experiment_data = counterfactual_dataframe,
        ...                        omr_rescale = 1,
        ...                        all_results=True)

        Specify alternative OMR rescale factors for trials 2 and 3, while using the value of 1 for all other trials.
        >>> monte_model.run_trials(experiment_data = counterfactual_dataframe,
        ...                        omr_rescale = 1,
        ...                        trail_rescale_factors = {2:0.001, 3:100})

         Draw additional random cost values to replace trials that failed to solve
         >>> monte_model.run_trials(experiment_data = counterfactual_dataframe,
         ...                        omr_rescale = 1,
         ...                        redraw_failed_trials=True)
    '''
    models = list()
    failed_trials = list()
    num_failed_iterations = 0
    for trial in range(self.trials):
        if not quiet:
            print("\n* Simulating trial {} *".format(trial))
        # Define a new CostCoeff instance using one of the trial values
        param_values = CostCoeffs(self.coeff_sample, coeff_col=trial, identifier_col=self._cost_coeffs._identifier_col)
        try:
            if (trial_omr_rescale is not None) and (trial in trial_omr_rescale):
                # Use alternative omr if supplied
                omr_rescale_use = trial_omr_rescale[trial]
            else:
                omr_rescale_use = omr_rescale

            trial_model = self._run_single_trial(param_values=param_values, experiment_data=experiment_data,
                                                 quiet = quiet, omr_rescale = omr_rescale_use,
                                                 imr_rescale = imr_rescale, mr_method = mr_method,
                                                 mr_max_iter = mr_max_iter, mr_tolerance = mr_tolerance,
                                                 ge_method = ge_method, ge_tolerance = ge_tolerance,
                                                 ge_max_iter = ge_max_iter)
            models.append(trial_model)
        except:
            if not quiet:
                print("Failed to solve model.\n")
            num_failed_iterations+=1
            failed_trials.append(trial)
    self.num_failed_trials = num_failed_iterations

    # For failed trails, redraw new estimates and solve new models for each failed trial
    if redraw_failed_trials:
        # Define distribution of beta parameters to draw new values from (sets new seed to draw from)
        new_dist = multivariate_normal(self.main_coeffs, self.main_covar_mat, seed=(1 + self._seed),
                                       allow_singular=self._allow_singular)
        # Create lists/variables to track successes/failures
        new_draws = list()
        new_successes = 0
        new_failures = 0
        tries = 0
        max_retries = self.trials
        while new_successes < self.num_failed_trials and tries <= max_retries:
            # Tick up try counter and created column name for random draw

            try_num = self.trials+tries
            # Create and format coefficient draw
            new_draw = pd.DataFrame(new_dist.rvs())
            new_draw.index = self.main_coeffs.index
            new_draw.rename(columns={0: str(try_num)}, inplace=True)
            new_draws.append(new_draw.copy())
            new_draw.reset_index(inplace = True)
            new_params = CostCoeffs(new_draw, coeff_col=str(try_num), identifier_col=self._cost_coeffs._identifier_col)

            if not quiet:
                print("\n* Simulating failed trial replacement {} *".format(try_num))
            try:
                new_trial_model = self._run_single_trial(param_values=new_params, experiment_data=experiment_data,
                                                         quiet = quiet, omr_rescale = omr_rescale,
                                                         imr_rescale = imr_rescale, mr_method = mr_method,
                                                         mr_max_iter = mr_max_iter, mr_tolerance = mr_tolerance,
                                                         ge_method = ge_method, ge_tolerance = ge_tolerance,
                                                         ge_max_iter = ge_max_iter)
                models.append(new_trial_model)
                new_successes+=1
            except:
                if not quiet:
                    print("Failed to solve redrawn replacement model.\n")
                new_failures += 1
                failed_trials.append(try_num)

            tries += 1
        if len(new_draws)>0:
            self.replacement_sample = pd.concat(new_draws, axis=1)
        else:
            self.replacement_sample = new_draws



    # Get results labels from one of the OneSectorGE gegravity
    self.labels = models[0].labels
    if all_results:
        self.trial_models = models
    self.failed_trials = failed_trials
    self.all_country_results, self.country_results = self._compile_results(models, 'country_results', result_stats, all_results)
    self.all_country_mr_terms, self.country_mr_terms = self._compile_results(models, 'mr_terms', result_stats, all_results)
    self.all_outputs_expenditures, self.outputs_expenditures = self._compile_results(models, 'outputs_expenditures', result_stats, all_results)
    self.all_factory_gate_prices, self.factory_gate_prices = self._compile_results(models, 'factory_gate_prices', result_stats, all_results)
    self.all_aggregate_trade_results, self.aggregate_trade_results = self._compile_results(models, 'aggregate_trade_results', result_stats, all_results)
    self.all_bilateral_trade_results, self.bilateral_trade_results = self._compile_results(models, 'bilateral_trade', result_stats, all_results)
    self.all_bilateral_costs, self.bilateral_costs = self._compile_results(models, 'bilateral_costs', result_stats, all_results)
    self._compile_diagnostics(models)
    # ToDo: build some method for confidence intervals from Anderson Yotov (2016)