@article {7319, title = {Selection Bias in Web Surveys and the Use of Propensity Scores}, journal = {Sociological Methods and Research}, volume = {37}, year = {2009}, pages = {291-318}, publisher = {37}, abstract = {Web surveys have several advantages compared to more traditional surveys with in-person interviews, telephone interviews, or mail surveys. Their most obvious potential drawback is that they may not be representative of the population of interest because the sub-population with access to Internet is quite specific. This paper investigates propensity scores as a method for dealing with selection bias in web surveys. The authors{\textquoteright} main example has an unusually rich sampling design, where the Internet sample is drawn from an existing much larger probability sample that is representative of the US 50 population and their spouses (the Health and Retirement Study). They use this to estimate propensity scores and to construct weights based on the propensity scores to correct for selectivity. They investigate whether propensity weights constructed on the basis of a relatively small set of variables are sufficient to correct the distribution of other variables so that these distributions become representative of the population. If this is the case, information about these other variables could be collected over the Internet only. Using a backward stepwise regression they find that at a minimum all demographic variables are needed to construct the weights. The propensity adjustment works well for many but not all variables investigated. For example, they find that correcting on the basis of socio-economic status by using education level and personal income is not enough to get a representative estimate of stock ownership. This casts some doubt on the common procedure to use a few basic variables to blindly correct for selectivity in convenience samples drawn over the Internet. Alternatives include providing non-Internet users with access to the Web or conducting web surveys in the context of mixed mode surveys.}, keywords = {Methodology}, doi = {https://doi.org/10.1177/0049124108327128}, author = {Matthias Schonlau and Arthur H.O. vanSoest and Arie Kapteyn and Mick P. Couper} } @article {7121, title = {Noncoverage and Nonresponse in an Internet Survey}, journal = {Social Science Research}, volume = {36}, year = {2007}, pages = {131}, publisher = {36}, abstract = {We explore the correlates of noncoverage and nonresponse in an Internet survey conducted as part of the Health and Retirement Study (HRS), a panel study of persons 50 years old and older in the US. About 30 of HRS respondents indicated they used the Internet. Of these, 73 expressed willingness to do a Web survey. A subset of this group was subsequently sent a mailed invitation to participate in a Web survey and 78 completed the survey. Using multivariate models, we Wnd signiWcant demographic, Wnancial, and health-related diVerences in access, consistent with other research. There are fewer diVerences in willingness (given access) and response (given willingness). However, disparities in health and socio-economic status persist after controlling for demographic diVerences in coverage and response. Weighting on demographics alone is thus unlikely to yield a representative sample in such surveys. Noncoverage (lack of access to the Internet) appears to be of greater concern than nonresponse (unwillingness to participate given access) for representation in Internet surveys of this age group.}, keywords = {Methodology}, doi = {https://doi.org/10.1016/j.ssresearch.2005.10.002}, author = {Mick P. Couper and Arie Kapteyn and Matthias Schonlau and Joachim Winter} } @article {6907, title = {Adjust for Selection Bias in Web Surveys with Propensity Scores: The case of the Health and Retirement Study}, journal = {Proceedings of the American Statistical Association}, year = {2004}, abstract = {Many web surveys allow respondents to self select into the survey. Making inference about the population from a self-selected survey is very difficult. We analyzed data from the Health and Retirement Study (HRS) respondents of the 2002 wave as well as supplementary information about which subset of HRS respondents also responded to an additional web survey (web responders). The HRS is a longitudinal study of health, retirement and aging. The target population of the HRS includes all adults in the contiguous United States, aged 51 and over, who reside in households. We investigated whether it is possible to adjust for selection bias using propensity scores. We found that it is possible to make inferences for financial assets based on data from web responders only. However, making inferences about home values was not possible based on data from the web responders only. }, keywords = {Methodology}, url = {https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.538.1141\&rep=rep1\&type=pdf}, author = {Matthias Schonlau and Arthur H.O. vanSoest and Mick P. Couper and Arie Kapteyn and Joachim Winter} }