Real World Health Care Data Analysis. Uwe Siebert

Читать онлайн книгу.

Real World Health Care Data Analysis - Uwe Siebert


Скачать книгу

      SET imbalance_new;

      BY _var2 _var1 variable2 variable1;

      IF vartype1 = ‘C’ AND LAST.variable2 THEN

      DELETE;

      RUN;

      PROC SORT DATA = imbalance_new;

      BY _var1 variable1 _var2 variable2;

      RUN;

      %* Dataset IMBALANCE is to contain all interaction terms and whether they are

       in the model;

      DATA imbalance;

      MERGE imbalance (WHERE = (_var2 ^= ‘’)) imbalance_new (KEEP = _var1

       _var2 IN = in0 OBS = 1);

      BY _var1 _var2;

      iter = 0;

      out = 0;

      in = 0;

      IF in0 THEN

      in = 1;

      RUN;

      %* Dataset ALLINTER is the dataset contain all interaction terms already in

       the model plus the one to be added.;

      DATA allinter;

      SET imbalance_new (IN = in0);

      IF in0 THEN

      iter = &count + 1;

      RUN;

      %LET n_inter = 0;

      %LET new_n_inter = 1;

      %LET _n_imbal_new = &_n_imbal;

      %LET _n_imbal_start = &_n_imbal;

      %* Add interaction terms to model and recalculate PS, _strata and

       standardized bias until no more interaction terms have standardized

       bias of more than &imbal_strata_crit and are not already in the model;

      %DO %WHILE (&new_n_inter > 0 AND &count < &maxiter AND &_n_imbal_new ^= 0);

      %LET count = %EVAL(&count + 1);

      %LET n_inter = &new_n_inter;

      %* Fill INTERACTIONSIN with all interaction to be fitted to the model

       of this step;

      DATA _NULL_;

      SET allinter END = last;

      CALL SYMPUT(‘_ibint’||COMPRESS(PUT(_n_, BEST.)),

       COMPRESS(variable1||’*’||variable2));

      IF last THEN

      CALL SYMPUT(‘_nibint’, COMPRESS(PUT(_n_, BEST.)));

      RUN;

      %LET interactionsin =;

      %DO iloop = 1 %TO &_nibint;

      %LET interactionsin = &interactionsin &&_ibint&iloop;

      %END;

      %* Run PSMATCH to create PS and derive _strata_ *;

      PROC PSMATCH DATA = _indata_ps REGION = ALLOBS;

      CLASS _cohort &classvars_bin_model;

      PSMODEL _cohort(Treated = “1”) = &contvars &classvars_bin_model

       &always_int &interactionsin;

      OUTPUT OUT = ps PS = _ps_;

      RUN;

      PROC SUMMARY DATA = ps NWAY;

      CLASS _mergekey _cohort;

      VAR _ps_;

      OUTPUT OUT = ps MEAN =;

      RUN;

      PROC PSMATCH DATA = ps REGION = ALLOBS;

      CLASS _cohort;

      PSDATA TREATVAR = _cohort(Treated = “1”) PS = _ps_;

      STRATA NSTRATA = &nstrata KEY = TOTAL;

      OUTPUT OUT (OBS = REGION) = ps;

      RUN;

      DATA ps;

      MERGE _indata ps;

      BY _mergekey;

      RUN;

      %* Calculate standardized bias;

      %_ps_stddiff_apmb (indata = ps);

      %* Calculate IMBALANCE as ABS(stddiff) > &imbal_strata_crit and count

       the number of imbalanced over strata per interaction.;

      DATA _stddiff;

      SET _stddiff;

      stddiff = ABS(stddiff);

      IF stddiff > &imbal_strata_crit THEN

      imbalance = 1;

      ELSE imbalance = 0;

      IF vartype1 = ‘C’ THEN

      DO;

      _var1 = UPCASE(REVERSE(variable1));

      _var1 = REVERSE(SUBSTR(_var1, INDEX(_var1, ‘_’) +

       1));

      END;

      ELSE _var1 = variable1;

      IF vartype2 = ‘C’ THEN

      DO;

      _var2 = UPCASE(REVERSE(variable2));

      _var2 = REVERSE(SUBSTR(_var2, INDEX(_var2, ‘_’) +

       1));

      END;

      ELSE _var2 = variable2;

      RUN;

      PROC SORT DATA = _stddiff;

      BY _var1 _var2;

      RUN;

      DATA imbalance_old;

      SET imbalance_new;

      RUN;

      PROC SUMMARY DATA = _stddiff NWAY MISSING;

      CLASS variable1 _var1 variable2 _var2;

      VAR imbalance stddiff;

      OUTPUT OUT = imbalance_new SUM = imbalance dum1 MEAN = dum2

       stddiff;

      RUN;

      %* For interaction involving class variable the maximum number and

       maximum mean over categories is taken;

      PROC SUMMARY DATA = imbalance_new NWAY MISSING;

      CLASS _var1 _var2;

      VAR imbalance stddiff;

      OUTPUT OUT = imbalance_new MAX = imbalance max;

      RUN;

      %* Macro variable _N_IMBAL_NEW with number of terms (main and

       interaction) with more than &imbal_nstrata_crit imbalanced strata is

       created;

      PROC SQL NOPRINT;

      SELECT MEAN(max) INTO: _max_new FROM imbalance_new;

      SELECT COMPRESS(PUT(COUNT(max), BEST.)) INTO: _n_imbal_new FROM

       imbalance_new WHERE (imbalance >= &imbal_nstrata_crit);

      QUIT;

      %* If no improvement since last step then remove the term from the

       existing terms by removing from dataset ALLINTER and setting

       variables IN = 0, OUT = 1 in dataset IMBALANCE.

      Select the record from dataset IMBALANCE with the next highest number

       of imbalanced


Скачать книгу