This linux script pulls down all the play-by-play files that have been posted at Retrosheet.org and pushes them through Chadwick's cwbox application, thereby generating one SportsML file for every "Retrofile."
This script was donated by XML Team, to be used with no strings attached or obligations of any kind. No warranties, either! But it's simple enough that any linux rookie should be able to see what's going on.
Note that as of March 6, 2008, Retrosheet still has not posted game data for the 1999 season.
# retrosheet-spider.sh
#
# by Alan Karben, XML Team Solutions
# comment out the below if you do not want to clear out your directory first
echo Removing Old Directories
rm -rf source
rm -rf sportsml
echo Started >> log.txt
date >> log.txt
echo Starting to pull RetroFiles
mkdir -p source/zips
mkdir -p sportsml
for YEAR in `seq 1957 1997`
do
echo Processing Year ${YEAR}
wget -q "http://www.retrosheet.org/${YEAR}/${YEAR}al.zip"
wget -q "http://www.retrosheet.org/${YEAR}/${YEAR}nl.zip"
cp ${YEAR}al.zip source/zips/
cp ${YEAR}nl.zip source/zips/
mkdir -p source/${YEAR}
mkdir -p sportsml/${YEAR}
mv ${YEAR}al.zip source/${YEAR}
mv ${YEAR}nl.zip source/${YEAR}
cd source/${YEAR}/
unzip -q ${YEAR}al.zip
unzip -qu ${YEAR}nl.zip
rm *.zip
for EVFILE in `ls *.EV*`
do
echo Processing Event File $EVFILE
dos2unix $EVFILE
for GAMEID in `grep '^id,' $EVFILE | cut -d , -f 2`
do
echo "Processing Year: ${YEAR} | Event File: $EVFILE | Game: $GAMEID"
cwbox -q -i $GAMEID -y ${YEAR} -S $EVFILE > ../../sportsml/${YEAR}/${GAMEID}.xml
done
done
cd ../..
done
for YEAR in `seq 1998 2007`
do
echo Processing Year ${YEAR}
wget -q "http://www.retrosheet.org/${YEAR}/${YEAR}ml.zip"
cp ${YEAR}ml.zip source/zips/
mkdir -p source/${YEAR}
mkdir -p sportsml/${YEAR}
mv ${YEAR}ml.zip source/${YEAR}
cd source/${YEAR}/
unzip -q ${YEAR}ml.zip
rm *.zip
for EVFILE in `ls *.EV*`
do
echo Processing Event File $EVFILE
dos2unix $EVFILE
for GAMEID in `grep '^id,' $EVFILE | cut -d , -f 2`
do
echo "Processing Year: ${YEAR} | Event File: $EVFILE | Game: $GAMEID"
cwbox -q -i $GAMEID -y ${YEAR} -S $EVFILE > ../../sportsml/${YEAR}/${GAMEID}.xml
done
done
cd ../..
done
echo Ended >> log.txt
date >> log.txt