Commit cf197acb authored by jon r's avatar jon r
Browse files

clean outliers after conversion, and differently. fixes #7

parent 6daf369e
......@@ -10,7 +10,3 @@ xargs -a regex-unicode -I {} perl -CSD -pi -e '{}' $o
# Many kudos to https://www.linuxquestions.org/questions/linux-newbie-8/removing-multiple-lines-from-cell-data-in-a-csv-file-4175538718/#post5343293
# Requires gawk >= 4.1.0 https://stackoverflow.com/questions/16529716/awk-save-modifications-in-place
gawk -i inplace 'FPAT="([^,]+)|(\"[^\"]+\")"{x=$0;while((gsub(/\"/,"\"",x)%2)!=0){getline;x=x " ";x=x $0};$0=x;print}' $o
# Remove lines without coordinates and add those to a separate list of outliers
head -n 1 < $o > ${out}liers.csv
grep ";;;" $o >> ${out}liers.csv
sed -ni '/;;;/!p' $o
......@@ -3,3 +3,8 @@
npm run --silent csv2geojson -- --lat "lat" --lon "lon" --delimiter ";" data/$out.csv > $out.json 2> ${out}liers.json
cat $out.json | jq -r '.features | map (.properties.types | split(";#") ) | { tags: . }' > $out-tags.json
npm run geojson2csv -- data/$out.json data/$out-verlan.csv
# Remove lines without coordinates and add those to a separate list of outliers
head -n 1 < $o > ${out}liers.csv
egrep -x "^.*;;$" $o >> ${out}liers.csv
sed -in '/^.*;;$/d' $o
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment