SAS macro to calculate Krippendorff's alpha (nominal, ordinal and interval level), Cohen's kappa, Perrault's index and Bennett's index

 

This macro was created by Kang, Kara, Laskey, and Seaton (1993). Thanks to James Helgeson, who provided this electronic version and offers the following advice: "It likes data saved as text files, with a spaces between observations, and the files stored where SAS looks for things by default on your system. Getting the article  and using the data in Appendix C and matching your results to those in the article will get you started."

 

%WINDOW TITLE
     #3 @15"A SAS MACRO FOR CALCULATING INTERCODER AGREEMENT IN"
     ATTR=HIGHLIGHT
     #4 @32"CONTENT ANALYSIS"ATTR=HIGHLIGHT
     #7 @30"COPYRIGHT(C) 1992"
     #11 @16"DEPARTMENT OF MARKETING AND BUSINESS ENVIRONMENT"
     #12 @23"FLORIDA INTERNATIONAL UNIVERSITY"
     #18 @45 "PRESS" @51 "ENTER"ATTR=UNDERLINE @57"TO CONTINUE";
%DISPLAY TITLE;
%WINDOW ASK1
     #7 @10"PLEASE ENTER NUMBER OF CODING CATEGORIES"
     @53 CT 3 ATTR=UNDERLINE;
%DISPLAY ASK1;
%WINDOW ASK2
     #7 @10"PLEASE ENTER NUMBER OF CODERS"
     @50 COD 3 ATTR=UNDERLINE;
%DISPLAY ASK2;
%WINDOW ASK3
     #7 @10"PLEASE ENTER NUMBER OF STIMULI/OBJECTS"
     @52 OBS 4 ATTR=UNDERLINE;
%DISPLAY ASK3;
%WINDOW ASK4
     #6 @10"PLEASE ENTER NAME OF DATA FILE"
     #7 @10 "(NO MORE THAN 12 CHARACTERS)"
     @43 DFILE 12 ATTR=UNDERLINE;
%DISPLAY ASK4;
OPTIONS LS=79;
DATA INTER;
     INFILE "&DFILE";                             /* DATA FILE */
     INPUT C1-C&COD;
%LET CELL=%EVAL(&CT*&CT);
%MACRO NEWDATA;
PROC FREQ;
     TABLES C1*C2/LIST SPARSE MISSPRINT OUT=CROSS NOPRINT;
DATA B;
     SET CROSS;
     KEEP COUNT;
     RUN;
PROC TRANSPOSE OUT=TRS;
DATA NEWW;
     SET TRS;
     KEEP COL1-COL&CELL;
     RUN;
DATA NULL;
     SET NEWW;
     FILE 'CROSS.DAT';
     %DO I=1 %TO &CELL %BY &CT;
     %DO J=&I+(&CT-1) %TO &I+(&CT-1);
     PUT COL&I-COL&J;
     %END;
     %END;
 DATA AGREE;
     INFILE 'CROSS.DAT';
     INPUT X1-X&CT;
DATA CAT;           /* USING SET FUNCTION CALL THE DATA*/
     SET INTER;                    /* INTO CAT DATA SET*/
     ARRAY CAT CAT1-CAT&CT;/* CREATE OBSXCT MATRIX WHICH*/
     DO OVER CAT;        /* CONTAINS ONLY ZEROS*/
          CAT=0;
     END;
DATA NEW(KEEP=CAT1-CAT&CT);   /*KEEP ONLY NEW OBSXCT MATRIX*/
     SET CAT;
     %DO I=1 %TO &COD;
          %DO J=1 %TO &CT;/* COUNT THE AGREEMENTS AND PUT THEM*/
               IF C&I=&J THEN CAT&J=CAT&J+1;/* IN RELATED*/
          %END;                              /*CATEGORIES*/
          %END;
%MEND NEWDATA;
%NEWDATA
PROC TRANSPOSE DATA=NEW;           /* TRANSPOSE THE MATRIX*/
DATA KI;
     SET;
     ARRAY GT COL1-COL&OBS;
     DO OVER GT;         /* REPLACE ZEROS WITH EMPTY POINTS*/
           IF GT=0 THEN GT=.;
     END;
DATA KO(KEEP=COL1-COL&OBS);   /* DROP THE IRRELEVANT INFO*/
     SET;
     ARRAY MT COL1-COL&OBS;
     DO OVER MT;
          MT=MT*(MT-1);
     END;
%MACRO ORDINAL;                         /* START IML*/
PROC IML;
     USE NEW;
     READ ALL INTO XX;
     ORD=XX(|+,|);     /*FIND THE FREQUENCIES OF EACH CATEGORY*/
     VARS="X1":"X&CT";
     CREATE LAST FROM ORD [COLNAME=VARS];
     APPEND FROM ORD;
DATA ORDD; /* CALCULATE THE ELEMENTS OF ORDINAL DISTANCE MATRIX*/
     SET LAST;
     %DO I=1 %TO &CT;
          %DO J=&I %TO &CT;
                    SUM=0;
                         %DO L=&I %TO &J;
                              SUM=SUM+X&L;
                         %END;
                    SUM1=SUM-X&I;
                    SUM2=SUM-X&J;
                    DBC&I&J=SUM1+SUM2;
                    DBC&J&I=SUM1+SUM2;
     %END;
%END;
DATA NULL;
 	SET ORDD;   
 	FILE 'DBC.MAT';
     %DO I=1 %TO &CT;
          %DO J=1 %TO 1;
               PUT DBC&I&J-DBC&I&CT;
          %END;
     %END;
DATA MAT;
     INFILE 'DBC.MAT';    /*CREATE ORDINAL DISTANCE MATRIX*/
     INPUT X1-X&CT;
%MEND ORDINAL;
%ORDINAL;
PROC IML;           /*START IML AND RELIABILITY CALCULATIONS*/
     USE INTER;
          READ ALL INTO COD;
          CODER=NCOL(COD);    /*NUMBER OF COLUMS OF COD MATRIX*/
          NOBS=NROW(COD);     /*NUMBER OF ROWS OF COD MATRIX*/
          FREE COD;
     USE NEW;
          READ ALL INTO XX;
     USE MAT;
          READ ALL INTO DC;
          DBC=DC#DC;
     USE KO;
          READ ALL INTO Y;
          X=t(XX)*XX;
          DG=VECDIAG(X);
          DDD=DIAG(DG);
          X=X-DDD;
          YYY=Y(|,+|);
          DP=DIAG(YYY);
          FINAL=X+DP;
PRINT'AGREEMENT MATRIX',FINAL;
     SSS=FINAL(|+,|);
     TTT=SUM(FINAL);
     VVV=t(SSS)*SSS;
     G1=TTT-CODER+1;
     G4=TTT*G1;
     CCSUM=(SUM(VVV)-SUM(VECDIAG(VVV)))/G4;
     DDSUM=(SUM(FINAL)-SUM(VECDIAG(FINAL)))/TTT;
     G5=DDSUM/CCSUM;
     ALPHA=1-G5;
PRINT'KRIPPENDORFF ALPHA FOR NOMINAL DATA', ALPHA;
     AAA=DBC#VVV;
     AASUM=(SUM(AAA))/G4;
     BBB=DBC#FINAL;
     BBSUM=(SUM(BBB))/TTT;
     BBBB=BBSUM/AASUM;
     ALPHA=1-BBBB;
PRINT'KRIPPENDORFF ALPHA FOR ORDINAL DATA', ALPHA;
     R=TOEPLITZ(0:&CT-1);/*GENERATE INTERVAL DISTANCE MATRIX*/
     A=R#R;           /*WHERE DIAGONAL ELEMENTS ARE THE SAME*/
     AA=A#VVV;
     ASUM=(SUM(AA))/G4;
     BB=A#FINAL;
     BSUM=(SUM(BB))/TTT;
     BBB=BSUM/ASUM;
     ALPHA=1-BBB;
PRINT'KRIPPENDORFF ALPHA FOR INTERVAL DATA', ALPHA;
     UUU=SUM(DP);
     CAT=NCOL(FINAL);
     FFF=XX##2;
     PPP=XX(|+,|);
     HHH=CODER*NOBS;
     GGG=PPP/HHH;
     P_BAR=(SUM(FFF)-(NOBS*CODER))/(NOBS*CODER*(CODER-1));
     PE=SUM(GGG#GGG);
     KAPPA=(P_BAR-PE)/(1-PE);
     USE AGREE;
     READ ALL INTO MAT;
     ROW=MAT(|+,|);
     COL=MAT(|,+|);
     TOT=SUM(MAT);
     M=t(ROW)#COL;
     MAR=M/TOT;
     D=VECDIAG(MAT);
     FO=SUM(D);
     FC=SUM(MAR);
     K=(FO-FC)/(TOT-FC);
IF CODER=2 THEN PRINT'COHEN"S KAPPA', K;
ELSE PRINT'COHEN"S KAPPA', KAPPA;
     D1=UUU/TTT;
     D2=1/CAT;
     D3=CAT/(CAT-1);
     PI=SQRT((D1-D2)*D3);
     LO_PI=PI-1.96*SQRT(PI*(1-PI)/TTT);
PRINT'PERREAULT"S INDEX AND 95 PERCENT LOWER CFI', PI LO_PI;
     S=((D1-D2)*D3);
PRINT'BENNETT"S INDEX', S;