Clinical Trials – Make SDTM DM and EX datasets
SAS programs for making SDTM DM and EX datasets
Yupeng Wang, Ph.D., Data Scientist
Raw data
1. Metadata
1
Clinical Trials – Make SDTM DM and EX datasets
2. DM raw data
3. Dosing raw data
2
Clinical Trials – Make SDTM DM and EX datasets
Programs
Program 1: make_empty_dataset.sas
/*make_empty_dataset.sas creates a zero record dataset based on a dataset metadata
spreadsheet. The dataset created is called EMPTY_ where "" is the name of the
dataset. This macro also creates a global macro variable called KEEPSTRING that
holds the dataset variables desired and listed in the order they should appear.
[The variable order is dictated by VARNUM in the metadata spreadsheet.]
MACRO PARAMETERS:
metadatafile = the MS Excel file containing the dataset metadata
dataset = the dataset or domain name you want to extract*/
%macro make_empty_dataset(metadatafile=,dataset=);
proc import
datafile="&metadatafile"
out=_temp
dbms=xlsx
replace;
sheet="VARIABLE_METADATA";
run;
** sort the dataset by expected specified variable order;
proc sort
data=_temp;
where domain = "&dataset";
by varnum;
run;
** create keepstring macro variable and load metadata
** information into macro variables;
%global &[Link];
data _null_;
set _temp nobs=nobs end=eof;
if _n_=1 then
call symput("vars", compress(put(nobs,3.)));
call symputx('var' || compress(put(_n_, 3.)), variable);
call symputx('label' || compress(put(_n_, 3.)), label);
call symputx('length' || compress(put(_n_, 3.)), put(length, 3.));
** valid ODM types include TEXT, INTEGER, FLOAT, DATETIME,
** DATE, TIME and map to SAS numeric or character;
if upcase(type) in ("INTEGER", "FLOAT") then
call symputx('type' || compress(put(_n_, 3.)), "");
else if upcase(type) in ("TEXT", "DATE", "DATETIME",
"DATE", "TIME") then
call symputx('type' || compress(put(_n_, 3.)), "$");
else
put "ERR" "OR: not using a valid ODM type. " type=;
** create **KEEPSTRING macro variable;
length keepstring $ 32767;
3
Clinical Trials – Make SDTM DM and EX datasets
retain keepstring;
keepstring = compress(keepstring) || "|" || left(variable);
if eof then
call symputx(upcase(compress("&dataset" || 'KEEPSTRING')),
left(trim(translate(keepstring," ","|"))));
run;
** create a 0-observation template data set used for assigning
** variable attributes to the actual data sets;
data EMPTY_&dataset;
%do i=1 %to &vars;
attrib &&var&i label="&&label&i" length=&&type&i.&&length&i...;
%if &&type&i=$ %then
retain &&var&i '';
%else
retain &&var&i .;
;
%end;
if 0;
run;
%mend make_empty_dataset;
Program 2: [Link]
/*Process the raw demographic data from a CSV file*/
%include '/folders/myfolders/test1/[Link]';
%common;
filename infl '/folders/myfolders/test1/[Link]';
proc format;
value trt
1 = "Active"
0 = "Placebo";
value gender
1 = "M"
2 = "F"
3 = "U";
value race
1 = "White"
2 = "Black"
3 = "Other";
run;
data [Link];
infile infl dlm='2C0D'x dsd missover;
length dob1 $10
randdt1 $10;
input subject trt gender race orace $ dob1 $ randdt1 $;
dob=input(dob1,mmddyy10.);
randdt=input(randdt1,mmddyy10.);
format dob randdt mmddyy10.;
uniqueid = 'UNI' || put(subject,3.);
gender1=put(gender,gender.);
4
Clinical Trials – Make SDTM DM and EX datasets
race1=put(race,race.);
trt1=put(trt,trt.);
label subject = "Subject Number"
trt = "Treatment"
gender = "Gender"
race = "Race"
orace = "Oher Race Specify"
dob = "Date of Birth"
uniqueid = "Company Wide Subject ID"
randdt = "Randomization Date";
drop dob1 randdt1 gender race;
rename gender1=gender race1=race;
run;
Program 3: [Link]
/*Process the raw dose data from an Excel file.
Key points: 1) make up missing dates; 2) convert Excel dates to SAS dates*/
%include '/folders/myfolders/test1/[Link]';
%common;
libname ds xlsx "/folders/myfolders/test1/[Link]";
data [Link];
set ds.Sheet1;
if find(startdt,'/') then
do;
array var1(3) $4.;
do i=1 to 3;
var1(i)=scan(startdt,i,"/");
if var1(i)=' ' then
var1(i)='1';
end;
startdt1=mdy(input(var1(1),$4.), input(var1(2),$4.) , input(var1(3),$4.));
end;
else
startdt1=input(startdt,$10.)-21916;
if find(enddt,'/') then
do;
array var2(3) $4.;
do i=1 to 3;
var2(i)=scan(enddt,i,"/");
if var2(i)=' ' then
var2(i)='1';
end;
enddt1=mdy(input(var2(1),$4.), input(var2(2),$4.) , input(var2(3),$4.));
end;
else
enddt1=input(enddt,$10.)-21916;
uniqueid = 'UNI' || put(subject,3.);
format startdt1 enddt1 mmddyy10.;
drop i startdt enddt var11-var13 var21-var23;
rename startdt1=startdt enddt1=enddt;
run;
5
Clinical Trials – Make SDTM DM and EX datasets
Program 4: make_sort_order.sas
/* make_sort_order.sas creates a global macro variable called SORTSTRING where ** is the name of the dataset that
contains the metadata specified sort order for a given dataset.
MACRO PARAMETERS:
metadatafile = the file containing the dataset metadata
dataset = the dataset or domain name*/
%macro make_sort_order(metadatafile=,dataset=);
proc import
datafile="&metadatafile"
out=_temp
dbms=xlsx
replace;
sheet="TOC_METADATA";
run;
** create **SORTSTRING macro variable;
%global &[Link];
data _null_;
set _temp;
where name = "&dataset";
call symputx(compress("&dataset" || "SORTSTRING"),
translate(domainkeys," ",","));
run;
%mend make_sort_order;
Program 5: STDM_DM.sas
/* STDM_DM.sas creates the SDTM DM and SUPPDM datasets and saves them as permanent SAS datasets to the target
libref */
%include '/folders/myfolders/test1/[Link]';
%common;
**** CREATE EMPTY DM DATASET CALLED EMPTY_DM;
%include '/folders/myfolders/test1/make_empty_dataset.sas';
%make_empty_dataset(metadatafile=/folders/myfolders/test1/SDTM_METADATA.xlsx,dataset=DM);
**** GET FIRST AND LAST DOSE DATE FOR RFSTDTC AND RFENDTC;
proc sort
data=[Link](keep=subject startdt enddt)
out=dosing;
by subject startdt;
run;
**** FIRSTDOSE=FIRST DOSING AND LASTDOSE=LAST DOSING;
data dosing;
set dosing;
by subject;
6
Clinical Trials – Make SDTM DM and EX datasets
format firstdose lastdose mmddyy10.;
retain firstdose lastdose;
if [Link] then
do;
firstdose = .;
lastdose = .;
end;
firstdose = min(firstdose,startdt,enddt);
lastdose = max(lastdose,startdt,enddt);
drop startdt enddt;
if [Link];
run;
**** GET DEMOGRAPHICS DATA;
proc sort
data=[Link]
out=demographic;
by subject;
run;
data demog_dose;
merge demographic
dosing;
by subject;
run;
**** DERIVE THE MAJORITY OF SDTM DM VARIABLES;
data dm;
set EMPTY_DM
demog_dose;
studyid = 'XYZ123';
domain = 'DM';
usubjid = left(uniqueid);
subjid = put(subject,3.);
rfstdtc = put(firstdose,yymmdd10.);
rfendtc = put(lastdose,yymmdd10.);
siteid = substr(subjid,1,1) || "00";
brthdtc = put(dob,yymmdd10.);
age = floor ((intck('month',dob,firstdose) -
(day(firstdose) < day(dob))) / 12);
if age ne . then
ageu = 'YEARS';
country = "USA";
sex=gender;
arm=trt1;
armcd=put(trt,3.);
drop gender trt trt1;
run;
%include '/folders/myfolders/test1/make_sort_order.sas';
%make_sort_order(metadatafile=/folders/myfolders/test1/SDTM_METADATA.xlsx,dataset=DM);
proc sort
data=dm(keep = &DMKEEPSTRING)
7
Clinical Trials – Make SDTM DM and EX datasets
out=[Link];
by &DMSORTSTRING;
run;
**** CREATE EMPTY SUPPDM DATASET CALLED EMPTY_DM;
%make_empty_dataset(metadatafile=/folders/myfolders/test1/SDTM_METADATA.xlsx,dataset=SUPPDM);
data suppdm;
set EMPTY_SUPPDM
dm;
keep &SUPPDMKEEPSTRING;
**** OUTPUT OTHER RACE AS A SUPPDM VALUE;
if orace ne '' then
do;
rdomain = 'DM';
qnam = 'RACEOTH';
qlabel = 'Race, Other';
qval = left(orace);
qorig = 'CRF';
output;
end;
**** OUTPUT RANDOMIZATION DATE AS SUPPDM VALUE;
if randdt ne . then
do;
rdomain = 'DM';
qnam = 'RANDDTC';
qlabel = 'Randomization Date';
qval = left(put(randdt,yymmdd10.));
qorig = 'CRF';
output;
end;
run;
%make_sort_order(metadatafile=/folders/myfolders/test1/SDTM_METADATA.xlsx,dataset=SUPPDM);
proc sort
data=suppdm
out=[Link];
by &SUPPDMSORTSTRING;
run;
Program 6: STDM_EX.sas
/* STDM_EX.sas creates the SDTM EX dataset and saves it as a permanent SAS dataset to the target libref */
%include '/folders/myfolders/test1/[Link]';
%common;
**** CREATE EMPTY DM DATASET CALLED EMPTY_DM;
%include '/folders/myfolders/test1/make_empty_dataset.sas';
8
Clinical Trials – Make SDTM DM and EX datasets
%make_empty_dataset(metadatafile=/folders/myfolders/test1/SDTM_METADATA.xlsx,datase
t=EX);
%include '/folders/myfolders/test1/make_sdtm_dy2.sas';
%include '/folders/myfolders/test1/make_sort_order.sas';
**** DERIVE THE MAJORITY OF SDTM EX VARIABLES;
data ex;
set EMPTY_EX
[Link];
studyid = 'XYZ123';
domain = 'EX';
usubjid = left(uniqueid);
exdose = dailydose;
exdostot = dailydose;
exdosu = 'mg';
exdosfrm = 'TABLET, COATED';
exstdtc=put(startdt,yymmdd10.);
exendtc=put(enddt,yymmdd10.);
run;
proc sort
data=ex;
by usubjid;
run;
**** CREATE SDTM STUDYDAY VARIABLES AND INSERT EXTRT;
data ex;
merge ex(in=inex) [Link](keep=usubjid rfstdtc arm);
by usubjid;
if inex;
%make_sdtm_dy(refdate=rfstdtc,date=exstdtc);
%make_sdtm_dy(refdate=rfstdtc,date=exendtc);
**** in this simplistic case all subjects received the treatment they were
randomized to;
extrt = arm;
run;
**** CREATE SEQ VARIABLE;
proc sort
data=ex;
by studyid usubjid extrt exstdtc;
run;
OPTIONS MISSING = ' ';
data ex;
retain STUDYID DOMAIN USUBJID EXSEQ EXTRT EXDOSE EXDOSU EXDOSFRM EXDOSTOT
EXSTDTC EXENDTC EXSTDY EXENDY;
set ex(drop=exseq);
by studyid usubjid extrt exstdtc;
if not ([Link] and [Link]) then
put "WARN" "ING: key variables do not define an unique record. " usubjid=;
9
Clinical Trials – Make SDTM DM and EX datasets
retain exseq;
if [Link] then
exseq = 1;
else
exseq = exseq + 1;
label exseq = "Sequence Number";
run;
**** SORT EX ACCORDING TO METADATA AND SAVE PERMANENT DATASET;
%make_sort_order(metadatafile=/folders/myfolders/test1/SDTM_METADATA.xlsx,dataset=E
X);
proc sort
data=ex(keep = &EXKEEPSTRING)
out=[Link];
by &EXSORTSTRING;
run;
10