Commit 05a10ec4 authored by jon r's avatar jon r
Browse files

Update dataset and tooling to new source files

- get up to date source CSV
- repair multiline CSV
- account for additional metadata
- build CSV from GeoJSON
- build outliers JSON
- add csv2geojson and geojson2csv as explicit dependencies, use them without assuming --global install
parent 1e36aba2
......@@ -19,7 +19,7 @@ fi
echo "Project: $a"
echo "Run: $b"
hash csv2geojson 2>/dev/null || { echo >&2 "csv2geojson not installed, check your Node.js/NVM setup. Aborting." ; exit 1; }
if [ ! -d ../node_modules ]; then { printf >&2 'ERROR: node_modules not present.\n\nTry running npm install.\n\nAborting.' ; exit 1; } fi
source 99_template_session
export ext=csv
......@@ -31,3 +31,4 @@ source 99_template_ext
./replace_types.js $out $out-tags
./04_bundle.sh
#!/usr/bin/env bash
wget -O $i https://discourse.transformap.co/uploads/default/original/1X/bf3b2aa34e93becb84ab94482902f5cb30e42a06.csv
wget -O $i https://discourse.transformap.co/uploads/default/original/1X/c8738fb5ce0097c9256778995a65c2bd4961800e.csv
#!/usr/bin/env bash
cp $i $o
dos2unix $o
vim "+set nomore" "+bufdo set fileencoding=utf8 | w" "+q" $o
# CSV files created in Windows come with unusable text encoding, to be fixed here.
#dos2unix $o
#vim "+set nomore" "+bufdo set fileencoding=utf8 | w" "+q" $o
xargs -a regex-repair -I {} perl -i -0777 -pe '{}' $o
xargs -a regex-unicode -I {} perl -CSD -pi -e '{}' $o
# Remove lines without coordinates
# Find new lines within double quotes and replace them by spaces.
# Many kudos to https://www.linuxquestions.org/questions/linux-newbie-8/removing-multiple-lines-from-cell-data-in-a-csv-file-4175538718/#post5343293
# Requires gawk >= 4.1.0 https://stackoverflow.com/questions/16529716/awk-save-modifications-in-place
gawk -i inplace 'FPAT="([^,]+)|(\"[^\"]+\")"{x=$0;while((gsub(/\"/,"\"",x)%2)!=0){getline;x=x " ";x=x $0};$0=x;print}' $o
# Remove lines without coordinates and add those to a separate list of outliers
head -n 1 < $o > ${out}liers.csv
grep ";;;" $o >> ${out}liers.csv
sed -ni '/;;;/!p' $o
#!/usr/bin/env bash
csv2geojson --lat "lat" --lon "lon" --delimiter ";" $out.csv > $out.json
npm run --silent csv2geojson -- --lat "lat" --lon "lon" --delimiter ";" data/$out.csv > $out.json 2> ${out}liers.json
cat $out.json | jq -r '.features | map (.properties.types | split(";#") ) | { tags: . }' > $out-tags.json
npm run geojson2csv -- data/$out.json data/$out-verlan.csv
#!/usr/bin/env bash
cp $out-tags-merged.json gartenkarte.json
cp $out-verlan.csv gartenkarte.csv
cp ${out}liers.csv gartenkarte-outliers.csv
git add -f gartenkarte.json gartenkarte-outliers.csv
git commit -m "update gartenkarte.json"
cp ${out}liers.json gartenkarte-outliers.json
git add -f gartenkarte.json gartenkarte.csv gartenkarte-outliers.csv gartenkarte-outliers.json
git commit -m "build and outliers for $session"
......@@ -4,4 +4,6 @@ s/Entwicklungsstand:/status/g
s/Stadtackertyp/types/g
s/ID/id/g
s/(?<=[0-9]),(?=[0-9])/./g
s/Adressenzusatz/comment/g
s/Internetauftritt/url/g
......@@ -5,6 +5,8 @@
"license": "WTFPL",
"dependencies": {},
"devDependencies": {
"csv2geojson": "^5.0.2",
"geojson2csv": "git+https://github.com/tsamaya/geojson2csv.git#cd5435b",
"less": "^2.7.2",
"less-plugin-clean-css": "^1.5.1",
"uglify-es": "^3.1.0"
......@@ -13,7 +15,9 @@
"test": "echo \"Error: no test specified\" && exit 1",
"build": "npm run build-js && npm run build-css",
"build-css": "lessc --clean-css styles/less/style.less styles/css/style.css && cat styles/css/style.css > dist/site.css",
"build-js": "uglifyjs scripts/red_fetch.js scripts/map.js > dist/site.js"
"build-js": "uglifyjs scripts/red_fetch.js scripts/map.js > dist/site.js",
"csv2geojson": "csv2geojson",
"geojson2csv": "geojson2csv"
},
"repository": {
"type": "git",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment