Skip to contents

SurveyCTO Factor Generator

This package wrangles categorical variables for SurveyCTO. In the language of R, categorical variables are factors with levels and labels.

Motivation

SurveyCTO is one of the most widely used survey platforms in the world. Based on ODK, it helps users to collect clean and tidy data efficiently.

Its desktop software integrates nicely with Stata, Excel, and SPSS.

This package offers direct access to labeled survey datasets through the SurveyCTO API, rather than through a workaround such as importing SPSS or Stata files into R.

Pre-flight check

You will need access to a SurveyCTO server for this to work. Store these in a plaintext file.

  • servername
  • username
  • password

See the rsurveycto package for details.

API access

The {rsurveycto} package offers several methods to pull data and forms from the server via API. The first one requires and authorization block. Note that we store it in memory for this session.

library(rsurveycto)
auth <- scto_auth("./auth_details")

This function returns a boolean TRUE to confirm that you’ve saved your authentication block correctly.

check_scto_api()
#> [1] TRUE

Which form do I need?

cat("How many forms do we have on the server?")
#> How many forms do we have on the server?
scto_catalog(auth) |> nrow()
#>  Reading metadata for server `sctofactorgen`.
#> [1] 2

cat("Which one is for github?")
#> Which one is for github?
scto_catalog(auth) |> filter(str_detect(title, "github"))
#>  Reading metadata for server `sctofactorgen`.
#> Key: <type, id, title, version, group_id, group_title>
#>      type                     id                  title    version group_id
#>    <char>                 <char>                 <char>      <num>    <int>
#> 1:   form        demo_for_github        demo_for_github 2404261415        1
#> 2:   form github_with_dup_labels github_with_dup_labels 2405082252        1
#>                                    group_title
#>                                         <char>
#> 1: The root or home group (outside all groups)
#> 2: The root or home group (outside all groups)

Labels with {sctofactorgen}

Everything else can be done with sctofactorgen.

Load the response data.

df <- scto_responses(.formid = "demo_for_github") 
#>  Reading metadata for server `sctofactorgen`.
#>  Reading form `demo_for_github`.
glimpse(df[,1:20])
#> Rows: 11
#> Columns: 20
#> $ CompletionDate    <dttm> 2024-04-26 18:13:43, 2024-04-26 18:15:51, 2024-04-2…
#> $ SubmissionDate    <dttm> 2024-04-26 18:13:43, 2024-04-26 18:15:51, 2024-04-2…
#> $ starttime         <dttm> 2024-04-26 18:13:19, 2024-04-26 18:13:45, 2024-04-2…
#> $ endtime           <dttm> 2024-04-26 18:13:42, 2024-04-26 18:15:51, 2024-04-2…
#> $ deviceid          <chr> "(web)", "(web)", "(web)", "(web)", "(web)", "(web)"…
#> $ device_info       <chr> "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebK…
#> $ duration          <chr> "23", "126", "18", "22", "16", "18", "17", "19", "14…
#> $ consent           <chr> "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1"
#> $ name              <chr> "Paul Ellis", "Lyndon Riley", "Vivian Fowler", "Tara…
#> $ age               <chr> "28", "26", "24", "30", "24", "42", "61", "27", "24"…
#> $ best_jollof       <chr> "6", "2", "6", "5", "2", "7", "1", "3", "7", "6", "5"
#> $ recommend_kenya_3 <chr> "1", NA, NA, "1", "1", NA, NA, NA, NA, "1", NA
#> $ recommend_kenya_2 <chr> "1", "1", NA, NA, "1", NA, NA, NA, NA, "1", NA
#> $ instanceID        <chr> "uuid:b86c516f-f5e0-4fdb-a9ba-a4b520b618bc", "uuid:1…
#> $ formdef_version   <chr> "2404261409", "2404261409", "2404261415", "240426141…
#> $ review_status     <chr> "APPROVED", "APPROVED", "APPROVED", "APPROVED", "APP…
#> $ KEY               <chr> "uuid:b86c516f-f5e0-4fdb-a9ba-a4b520b618bc", "uuid:1…
#> $ recommend_kenya_8 <chr> NA, "1", NA, "1", NA, NA, NA, NA, NA, "1", NA
#> $ recommend_kenya_6 <chr> NA, "1", "1", NA, NA, NA, NA, "1", NA, "1", NA
#> $ recommend_kenya_5 <chr> NA, NA, "1", "1", NA, NA, "1", NA, NA, "1", NA

Load the questionnaire.

qnr <- scto_form("demo_for_github")
names(qnr)
#> [1] "settings"                        "choices"                        
#> [3] "fields"                          "formulasConvertedToStaticValues"
lapply(qnr, class)
#> $settings
#> [1] "data.table" "data.frame"
#> 
#> $choices
#> [1] "data.table" "data.frame"
#> 
#> $fields
#> [1] "data.table" "data.frame"
#> 
#> $formulasConvertedToStaticValues
#> [1] "logical"

Read the choices.

df_choices <- read_choices(qnr) |> glimpse()
#> Rows: 19
#> Columns: 5
#> $ list_name <chr> "yesno", "yesno", "jollofrice", "jollofrice", "jollofrice", …
#> $ value     <chr> "1", "0", "1", "2", "3", "4", "5", "6", "7", "1", "2", "3", …
#> $ label     <chr> "Yes", "No", "Senegal", "Gambia, The", "Ghana", "Nigeria", "…
#> $ image     <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", …
#> $ filter    <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", …

Read the questions.

df_questions <- read_questions(qnr) |> glimpse()
#> Rows: 19
#> Columns: 23
#> $ type               <chr> "start", "end", "deviceid", "phonenumber", "calcula…
#> $ name               <chr> "starttime", "endtime", "deviceid", "devicephonenum…
#> $ label              <chr> "", "", "", "", "", "", "", "Welcome to the sample …
#> $ hint               <chr> "", "", "", "", "", "", "", "", "", "", "", "", "",…
#> $ default            <chr> "", "", "", "", "", "", "", "", "", "", "", "", "",…
#> $ appearance         <chr> "", "", "", "", "", "", "", "intro", "", "quickcomp…
#> $ constraint         <chr> "", "", "", "", "", "", "", "", "", "", "", "", "",…
#> $ constraint_message <chr> "", "", "", "", "", "", "", "", "", "", "", "", "",…
#> $ relevance          <chr> "", "", "", "", "", "", "", "", "", "", "", "${cons…
#> $ disabled           <chr> "", "", "", "", "", "", "", "", "", "", "", "", "",…
#> $ required           <chr> "", "", "", "", "", "", "", "", "", "yes", "", "", …
#> $ required_message   <chr> "", "", "", "", "", "", "", "", "", "", "", "", "",…
#> $ read_only          <chr> "", "", "", "", "", "", "", "", "", "", "", "", "",…
#> $ calculation        <chr> "", "", "", "", "device-info()", "duration()", "", …
#> $ repeat_count       <chr> "", "", "", "", "", "", "", "", "", "", "", "", "",…
#> $ media_image        <chr> "", "", "", "", "", "", "", "", "", "", "", "", "",…
#> $ media_audio        <chr> "", "", "", "", "", "", "", "", "", "", "", "", "",…
#> $ media_video        <chr> "", "", "", "", "", "", "", "", "", "", "", "", "",…
#> $ choice_filter      <chr> "", "", "", "", "", "", "", "", "", "", "", "", "",…
#> $ note               <chr> "", "", "", "", "", "", "", "", "", "", "", "(Group…
#> $ response_note      <chr> "", "", "", "", "", "", "", "", "", "O", "", "", ""…
#> $ publishable        <chr> "", "", "", "", "", "", "", "", "", "", "", "", "",…
#> $ minimum_seconds    <chr> "", "", "", "", "", "", "", "", "", "", "", "", "",…

Attach variable labels to single selects.

cat("Labeling single select questions, question text as the attribute `label`.")
#> Labeling single select questions, question text as the attribute `label`.
df <- singsel_label(df, thefields = df_questions)
#> Prepare a list of variable labels.

Attach variable labels to multi selects.

cat("Labeling multiple select questions, question text and response value as the attribute `label`.")
#> Labeling multiple select questions, question text and response value as the attribute `label`.
df <- multisel_label(df, thefields = df_questions, thechoices = df_choices)
#> Loaded multiselect quetions. 
#> Generated the list of variable labels for multiselect columns.

Convert single selects to factor levels.

cat("Converting single select questions to factors.")
#> Converting single select questions to factors.
df <- singsel_asfactor(df, thefields = df_questions, thechoices = df_choices)
#> Loaded choices

Convert multi selects to integers.

cat("Converting multi select questions to integers.")
#> Converting multi select questions to integers.
df <- multisel_levels(df, thefields = df_questions, thechoices = df_choices)
#> Loaded multiselect quetions.

Attach variable labels to single selects.

cat("Labeling single select questions, question text as the attribute `label`.")
#> Labeling single select questions, question text as the attribute `label`.
df <- singsel_label(df, thefields = df_questions)
#> Prepare a list of variable labels.

Attach variable labels to multi selects.

cat("Labeling multiple select questions, question text and response value as the attribute `label`.")
#> Labeling multiple select questions, question text and response value as the attribute `label`.
df <- multisel_label(df, thefields = df_questions, thechoices = df_choices)
#> Loaded multiselect quetions. 
#> Generated the list of variable labels for multiselect columns.

Explore

Nice and tidy.

glimpse(df)
#> Rows: 11
#> Columns: 25
#> $ CompletionDate     <dttm> 2024-04-26 18:13:43, 2024-04-26 18:15:51, 2024-04-…
#> $ SubmissionDate     <dttm> 2024-04-26 18:13:43, 2024-04-26 18:15:51, 2024-04-…
#> $ starttime          <dttm> 2024-04-26 18:13:19, 2024-04-26 18:13:45, 2024-04-…
#> $ endtime            <dttm> 2024-04-26 18:13:42, 2024-04-26 18:15:51, 2024-04-…
#> $ deviceid           <chr> "(web)", "(web)", "(web)", "(web)", "(web)", "(web)…
#> $ device_info        <chr> "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWeb…
#> $ duration           <chr> "23", "126", "18", "22", "16", "18", "17", "19", "1…
#> $ consent            <fct> Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Y…
#> $ name               <chr> "Paul Ellis", "Lyndon Riley", "Vivian Fowler", "Tar…
#> $ age                <chr> "28", "26", "24", "30", "24", "42", "61", "27", "24…
#> $ best_jollof        <fct> "Sierra Leone", "Gambia, The", "Sierra Leone", "Lib…
#> $ recommend_kenya_3  <dbl> 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0
#> $ recommend_kenya_2  <dbl> 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0
#> $ instanceID         <chr> "uuid:b86c516f-f5e0-4fdb-a9ba-a4b520b618bc", "uuid:…
#> $ formdef_version    <chr> "2404261409", "2404261409", "2404261415", "24042614…
#> $ review_status      <chr> "APPROVED", "APPROVED", "APPROVED", "APPROVED", "AP…
#> $ KEY                <chr> "uuid:b86c516f-f5e0-4fdb-a9ba-a4b520b618bc", "uuid:…
#> $ recommend_kenya_8  <dbl> 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0
#> $ recommend_kenya_6  <dbl> 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0
#> $ recommend_kenya_5  <dbl> 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0
#> $ recommend_kenya_1  <dbl> 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0
#> $ recommend_kenya_9  <dbl> 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0
#> $ recommend_kenya_4  <dbl> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0
#> $ recommend_kenya_7  <dbl> 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0
#> $ recommend_kenya_97 <dbl> 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1
labelled::get_variable_labels(df) |> enframe() |> unnest(value) |> knitr::kable()
name value
consent Would you like to continue?
name What is your name?
age How old are you?
best_jollof Which country makes the best jollof rice?
recommend_kenya_3 On your first visit to Kenya, which foods should you try? ~ Nyama Choma
recommend_kenya_2 On your first visit to Kenya, which foods should you try? ~ Sukuma Wiki
recommend_kenya_8 On your first visit to Kenya, which foods should you try? ~ Tilapia Fry
recommend_kenya_6 On your first visit to Kenya, which foods should you try? ~ Githeri
recommend_kenya_5 On your first visit to Kenya, which foods should you try? ~ Mutura
recommend_kenya_1 On your first visit to Kenya, which foods should you try? ~ Ugali
recommend_kenya_9 On your first visit to Kenya, which foods should you try? ~ Pilau
recommend_kenya_4 On your first visit to Kenya, which foods should you try? ~ Chips Mayai
recommend_kenya_7 On your first visit to Kenya, which foods should you try? ~ Mandazi
recommend_kenya_97 On your first visit to Kenya, which foods should you try? ~ I can’t choose
str(df)
#> Classes 'data.table' and 'data.frame':   11 obs. of  25 variables:
#>  $ CompletionDate    : POSIXct, format: "2024-04-26 18:13:43" "2024-04-26 18:15:51" ...
#>  $ SubmissionDate    : POSIXct, format: "2024-04-26 18:13:43" "2024-04-26 18:15:51" ...
#>  $ starttime         : POSIXct, format: "2024-04-26 18:13:19" "2024-04-26 18:13:45" ...
#>  $ endtime           : POSIXct, format: "2024-04-26 18:13:42" "2024-04-26 18:15:51" ...
#>  $ deviceid          : chr  "(web)" "(web)" "(web)" "(web)" ...
#>  $ device_info       : chr  "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36"| __truncated__ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36"| __truncated__ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36"| __truncated__ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36"| __truncated__ ...
#>  $ duration          : chr  "23" "126" "18" "22" ...
#>  $ consent           : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
#>   ..- attr(*, "label")= chr "Would you like to continue?"
#>  $ name              : chr  "Paul Ellis" "Lyndon Riley" "Vivian Fowler" "Tara Murray" ...
#>   ..- attr(*, "label")= chr "What is your name?"
#>  $ age               : chr  "28" "26" "24" "30" ...
#>   ..- attr(*, "label")= chr "How old are you?"
#>  $ best_jollof       : Factor w/ 7 levels "Senegal","Gambia, The",..: 6 2 6 5 2 7 1 3 7 6 ...
#>   ..- attr(*, "label")= chr "Which country makes the best jollof rice?"
#>  $ recommend_kenya_3 : num  1 0 0 1 1 0 0 0 0 1 ...
#>   ..- attr(*, "label")= chr "On your first visit to Kenya, which foods should you try? ~ Nyama Choma"
#>  $ recommend_kenya_2 : num  1 1 0 0 1 0 0 0 0 1 ...
#>   ..- attr(*, "label")= chr "On your first visit to Kenya, which foods should you try? ~ Sukuma Wiki"
#>  $ instanceID        : chr  "uuid:b86c516f-f5e0-4fdb-a9ba-a4b520b618bc" "uuid:1f61c00c-2c82-425e-af6d-4b44ef7b7345" "uuid:a80b4daf-abfa-4f37-b248-d79710e9ddb4" "uuid:98705aa6-c97a-47c5-b4d4-6356104340b5" ...
#>  $ formdef_version   : chr  "2404261409" "2404261409" "2404261415" "2404261415" ...
#>  $ review_status     : chr  "APPROVED" "APPROVED" "APPROVED" "APPROVED" ...
#>  $ KEY               : chr  "uuid:b86c516f-f5e0-4fdb-a9ba-a4b520b618bc" "uuid:1f61c00c-2c82-425e-af6d-4b44ef7b7345" "uuid:a80b4daf-abfa-4f37-b248-d79710e9ddb4" "uuid:98705aa6-c97a-47c5-b4d4-6356104340b5" ...
#>  $ recommend_kenya_8 : num  0 1 0 1 0 0 0 0 0 1 ...
#>   ..- attr(*, "label")= chr "On your first visit to Kenya, which foods should you try? ~ Tilapia Fry"
#>  $ recommend_kenya_6 : num  0 1 1 0 0 0 0 1 0 1 ...
#>   ..- attr(*, "label")= chr "On your first visit to Kenya, which foods should you try? ~ Githeri"
#>  $ recommend_kenya_5 : num  0 0 1 1 0 0 1 0 0 1 ...
#>   ..- attr(*, "label")= chr "On your first visit to Kenya, which foods should you try? ~ Mutura"
#>  $ recommend_kenya_1 : num  0 0 1 1 0 0 0 0 0 0 ...
#>   ..- attr(*, "label")= chr "On your first visit to Kenya, which foods should you try? ~ Ugali"
#>  $ recommend_kenya_9 : num  0 0 1 0 0 0 0 1 0 0 ...
#>   ..- attr(*, "label")= chr "On your first visit to Kenya, which foods should you try? ~ Pilau"
#>  $ recommend_kenya_4 : num  0 0 0 1 0 0 0 0 0 0 ...
#>   ..- attr(*, "label")= chr "On your first visit to Kenya, which foods should you try? ~ Chips Mayai"
#>  $ recommend_kenya_7 : num  0 0 0 1 0 0 0 0 1 1 ...
#>   ..- attr(*, "label")= chr "On your first visit to Kenya, which foods should you try? ~ Mandazi"
#>  $ recommend_kenya_97: num  0 0 0 0 0 1 0 0 0 0 ...
#>   ..- attr(*, "label")= chr "On your first visit to Kenya, which foods should you try? ~ I can't choose"
#>  - attr(*, ".internal.selfref")=<externalptr> 
#>  - attr(*, "scto_type")= chr "form"
select(df, where(is.factor)) |> map(~janitor::tabyl(.) |> janitor::adorn_pct_formatting() |> arrange(-n))
#> $consent
#>    .  n percent
#>  Yes 11  100.0%
#>   No  0    0.0%
#> 
#> $best_jollof
#>             . n percent
#>  Sierra Leone 3   27.3%
#>   Gambia, The 2   18.2%
#>       Liberia 2   18.2%
#>      Cameroon 2   18.2%
#>       Senegal 1    9.1%
#>         Ghana 1    9.1%
#>       Nigeria 0    0.0%
select(df, where(is.numeric)) |> map(~janitor::tabyl(.) |> janitor::adorn_pct_formatting())
#> $recommend_kenya_3
#>  . n percent
#>  0 7   63.6%
#>  1 4   36.4%
#> 
#> $recommend_kenya_2
#>  . n percent
#>  0 7   63.6%
#>  1 4   36.4%
#> 
#> $recommend_kenya_8
#>  . n percent
#>  0 8   72.7%
#>  1 3   27.3%
#> 
#> $recommend_kenya_6
#>  . n percent
#>  0 7   63.6%
#>  1 4   36.4%
#> 
#> $recommend_kenya_5
#>  . n percent
#>  0 7   63.6%
#>  1 4   36.4%
#> 
#> $recommend_kenya_1
#>  . n percent
#>  0 9   81.8%
#>  1 2   18.2%
#> 
#> $recommend_kenya_9
#>  . n percent
#>  0 9   81.8%
#>  1 2   18.2%
#> 
#> $recommend_kenya_4
#>  .  n percent
#>  0 10   90.9%
#>  1  1    9.1%
#> 
#> $recommend_kenya_7
#>  . n percent
#>  0 8   72.7%
#>  1 3   27.3%
#> 
#> $recommend_kenya_97
#>  . n percent
#>  0 9   81.8%
#>  1 2   18.2%