{"@context":{"@language":"en","@vocab":"https:\/\/schema.org\/","citeAs":"cr:citeAs","column":"cr:column","conformsTo":"dct:conformsTo","cr":"http:\/\/mlcommons.org\/croissant\/","rai":"http:\/\/mlcommons.org\/croissant\/RAI\/","dct":"http:\/\/purl.org\/dc\/terms\/","data":{"@id":"cr:data","@type":"@id"},"dataType":{"@id":"cr:dataType","@type":"@id"},"examples":{"@id":"cr:examples","@type":"@id"},"extract":"cr:extract","field":"cr:field","fileProperty":"cr:fileProperty","fileObject":"cr:fileObject","fileSet":"cr:fileSet","format":"cr:format","includes":"cr:includes","isLiveDataset":"cr:isLiveDataset","jsonPath":"cr:jsonPath","key":"cr:key","md5":"cr:md5","parentField":"cr:parentField","path":"cr:path","recordSet":"cr:recordSet","references":"cr:references","regex":"cr:regex","repeated":"cr:repeated","replace":"cr:replace","separator":"cr:separator","source":"cr:source","subField":"cr:subField","transform":"cr:transform","sc":"https:\/\/schema.org\/","nada":"http:\/\/nada.org\/terms\/"},"@type":"sc:Dataset","conformsTo":"http:\/\/mlcommons.org\/croissant\/1.0","@id":"WLD_2023_SYNTH-SVY-EN_v01_M","name":"Synthetic Data for an Imaginary Country, Sample, 2023","alternateName":null,"identifier":"WLD_2023_SYNTH-SVY-EN_v01_M","description":"The dataset is a relational dataset of 8,000 households households, representing a sample of the population of an imaginary middle-income country. The dataset contains two data files: one with variables at the household level, the other one with variables at the individual level. It includes variables that are typically collected in population censuses (demography, education, occupation, dwelling characteristics, fertility, mortality, and migration) and in household surveys (household expenditure, anthropometric data for children, assets ownership). The data only includes ordinary households (no community households). The dataset was created using REaLTabFormer, a model that leverages deep learning methods. The dataset was created for the purpose of training and simulation and is not intended to be representative of any specific country.  \n\nThe full-population dataset (with about 10 million individuals) is also distributed as open data.","creator":[{"@type":"Organization","name":"Development Data Group, Data Analytics Unit"}],"publisher":[],"dateCreated":"2024-06-26","datePublished":"2023-05-01T04:00:00.000Z","version":"V. 2023-05-01 8K HH EN","spatialCoverage":[{"@type":"Place","name":"World"}],"temporalCoverage":"2023","license":null,"url":"https:\/\/nada-demo.ihsn.org\/index.php\/catalog\/study\/WLD_2023_SYNTH-SVY-EN_v01_M","keywords":["synthetic data","open data","safe data","demographics","education","mortality","fertility","child malnutrition","labor, employment","housing","dwelling","water and sanitation","household expenditure","migration"],"distribution":[{"@type":"cr:FileObject","@id":"349","name":"synthetic_survey_questionnaire_info.xlsx","encodingFormat":"application\/vnd.openxmlformats-officedocument.spreadsheetml.sheet","md5":"6bc717f28846f9f998998b4d5eea97ba","contentSize":"50417 B","description":"A fake questionnaire and some additional information corresponding to the variables included in the synthetic dataset, intended to be used as training material only. Contains this information in English and French.","contentUrl":"https:\/\/nada-demo.ihsn.org\/index.php\/catalog\/135\/download\/349"},{"@type":"cr:FileObject","@id":"350","name":"synthetic_data_technical_documentation.pdf","encodingFormat":"application\/pdf","md5":"ddc78e1ea887c83b8bdaccc8b72c1acf","contentSize":"1257763 B","description":"A technical description of the process of generating the synthetic dataset.","contentUrl":"https:\/\/nada-demo.ihsn.org\/index.php\/catalog\/135\/download\/350"},{"@type":"cr:FileObject","@id":"352","name":"20230505_draw_sample.R","encodingFormat":"application\/octet-stream","md5":"7232e3a8394b8cd7191f82ad412d86b1","contentSize":"16371 B","description":"A R script used to extract the sample of 8,000 households from the full synthetic dataset.","contentUrl":"https:\/\/nada-demo.ihsn.org\/index.php\/catalog\/135\/download\/352"},{"@type":"cr:FileObject","@id":"353","name":"WLD_2023_SYNTH-SVY-EN_v01_M_STATA17.zip","encodingFormat":"application\/zip","md5":"76dba078608ccf1d0c869ba9dbb64978","contentSize":"827856 B","description":"Sample synthetic dataset (household and individual-level data files); microdata in Stata format","contentUrl":"https:\/\/nada-demo.ihsn.org\/index.php\/catalog\/135\/download\/353"},{"@type":"cr:FileObject","@id":"354","name":"WLD_2023_SYNTH-SVY-EN_v01_M_SPSS.zip","encodingFormat":"application\/zip","md5":"d2623aedfdbb7d6c75e26218ca3e5555","contentSize":"1137194 B","description":"Sample synthetic dataset (household and individual-level data files); microdata in SPSS format","contentUrl":"https:\/\/nada-demo.ihsn.org\/index.php\/catalog\/135\/download\/354"},{"@type":"cr:FileObject","@id":"364","name":"WLD_2023_SYNTH-SVY-IND-EN_v01_M.csv","encodingFormat":"text\/csv","md5":"b37017fa3e189f80eddbf8b44b549b85","contentSize":"2572448 B","description":"","contentUrl":"https:\/\/nada-demo.ihsn.org\/index.php\/catalog\/135\/download\/364"}],"recordSet":[{"@id":"310","@type":"cr:RecordSet","field":[{"@type":"cr:Field","name":"hid","description":"Unique household identifier","dataType":"sc:Text","source":{"fileObject":{"@id":"364"}}},{"@type":"cr:Field","name":"idno","description":"Person identification number","dataType":"sc:Text","source":{"fileObject":{"@id":"364"}}},{"@type":"cr:Field","name":"relation","description":"Relationship to the head of household","dataType":"sc:Text","source":{"fileObject":{"@id":"364"}}},{"@type":"cr:Field","name":"sex","description":"Sex","dataType":"sc:Text","source":{"fileObject":{"@id":"364"}}},{"@type":"cr:Field","name":"age","description":"Age in years","dataType":"sc:Text","source":{"fileObject":{"@id":"364"}}},{"@type":"cr:Field","name":"age_month","description":"Age in months","dataType":"sc:Text","source":{"fileObject":{"@id":"364"}}},{"@type":"cr:Field","name":"marstat","description":"Marital status","dataType":"sc:Text","source":{"fileObject":{"@id":"364"}}},{"@type":"cr:Field","name":"religion","description":"Religion","dataType":"sc:Text","source":{"fileObject":{"@id":"364"}}},{"@type":"cr:Field","name":"school_attend","description":"School attendance","dataType":"sc:Text","source":{"fileObject":{"@id":"364"}}},{"@type":"cr:Field","name":"educ_attain","description":"Education attainment","dataType":"sc:Text","source":{"fileObject":{"@id":"364"}}},{"@type":"cr:Field","name":"yrs_school","description":"Years of schooling","dataType":"sc:Text","source":{"fileObject":{"@id":"364"}}},{"@type":"cr:Field","name":"literacy","description":"Literacy status","dataType":"sc:Text","source":{"fileObject":{"@id":"364"}}},{"@type":"cr:Field","name":"act_status","description":"Activity status","dataType":"sc:Text","source":{"fileObject":{"@id":"364"}}},{"@type":"cr:Field","name":"labor_force","description":"Labor force status","dataType":"sc:Text","source":{"fileObject":{"@id":"364"}}},{"@type":"cr:Field","name":"occupation","description":"Main occupation","dataType":"sc:Text","source":{"fileObject":{"@id":"364"}}},{"@type":"cr:Field","name":"industry","description":"Industry of main occupation","dataType":"sc:Text","source":{"fileObject":{"@id":"364"}}},{"@type":"cr:Field","name":"migrate_recent","description":"Recent migration","dataType":"sc:Text","source":{"fileObject":{"@id":"364"}}},{"@type":"cr:Field","name":"disability","description":"Has a disability","dataType":"sc:Text","source":{"fileObject":{"@id":"364"}}},{"@type":"cr:Field","name":"blind","description":"Blind","dataType":"sc:Text","source":{"fileObject":{"@id":"364"}}},{"@type":"cr:Field","name":"deaf","description":"Deaf","dataType":"sc:Text","source":{"fileObject":{"@id":"364"}}},{"@type":"cr:Field","name":"mental","description":"Mental disability","dataType":"sc:Text","source":{"fileObject":{"@id":"364"}}},{"@type":"cr:Field","name":"ch_weight","description":"Child weight (for 0 to 59 months)","dataType":"sc:Text","source":{"fileObject":{"@id":"364"}}},{"@type":"cr:Field","name":"ch_height","description":"Child height (for 0 to 59 months)","dataType":"sc:Text","source":{"fileObject":{"@id":"364"}}},{"@type":"cr:Field","name":"children_born","description":"Number of children ever born","dataType":"sc:Text","source":{"fileObject":{"@id":"364"}}},{"@type":"cr:Field","name":"children_surv","description":"Number of surviving children","dataType":"sc:Text","source":{"fileObject":{"@id":"364"}}},{"@type":"cr:Field","name":"births_12m","description":"Number of births in past 12 months","dataType":"sc:Text","source":{"fileObject":{"@id":"364"}}},{"@type":"cr:Field","name":"hhweight","description":"Household weight","dataType":"sc:Text","source":{"fileObject":{"@id":"364"}}}]}]}