从零开始,构建电子地图网站:0_3_数据处理python(1)
一、环境搭建
Python往postgresql中导数据,至少需要3个库包,gdal、shaply、psycopg2。
先一个一个来。
1.安装gdal
直接安会有问题,先去下个安装程序,https://www.lfd.uci.edu/~gohlke/pythonlibs/#gdal
根据python的版本选择程序。
如果不知道python的版本,可以打开cmd,输入python,回车。
根据python版本和电脑位数,选了GDAL-3.0.1-cp37-cp37m-win32.whl下载。
安装的时候,打开cmd,运行语句:pip install D:\gismap\软件\ GDAL-3.0.1-cp37-cp37m-win32.whl,输入,回车。
如果提示,
提示要升级pip,写啥就敲啥。python -m pip install --upgrade pip
升级完了,接着运行上一个语句,把GADL安装了。
2.安装shaply
先去下个安装程序,https://www.lfd.uci.edu/~gohlke/pythonlibs/#shapely
根据python版本和电脑位数,选了Shapely-1.6.4.post2-cp37-cp37m-win32.whl下载。
安装的时候,打开cmd,运行语句:pip install D:\gismap\软件\Shapely-1.6.4.post2-cp37-cp37m-win32.whl,输入,回车。
如果提示,
提示要升级pip,写啥就敲啥。python -m pip install --upgrade pip
升级完了,接着运行上一个语句,把shapely安装了。
3.安装psycopg2
这个直接在pycharm上安装就行。
File——Settings,选择Project Interpreter,选择python.exe路径,点击加号,加库包,搜索需要的库包,选中,点击Install Package安装。
装pgcopy,能把psycopg2都装上。
二、测试库包
先建个txt,readshptopg.txt,改成readshptopg.py,用pycharm打开,直接拖拽到界面里就可以,如果显示,No interpreter,就设置一下,引到python.exe所在的路径下。
敲入:
import shapely
import psycopg2
import gdal
如果有代码补全,那么就说明安装得差不多了。
然后开始一个一个包来引用。
三、确定编码
首先确定电脑的编码。
打开cmd,输入chcp,我的电脑返回“活动代码页:936”,这说明我的电脑默认编码格式是gbk的。
- 脚本结构定义
Py脚本怎么写都能执行,但还是将结构规范化一下,这是代码的整洁之美。
# coding=gbk
Import 库包
Def 函数():
if __name__ == '__main__':
执行函数
四、读取shp文件
这个是读shp,返回一个list的方法,再把list写入一个文本文档的方法。
# coding=gbk try: from osgeo import gdal from osgeo import ogr except ImportError: import gdal import ogr # pathStr,shp文件的全路径 def ReadVectorFile(pathStr): # 返回结果是一个list result=[] # 支持中文路径 gdal.SetConfigOption("GDAL_FILENAME_IS_UTF8", "NO") # 属性表字段支持中文 gdal.SetConfigOption("SHAPE_ENCODING", "") strVectorFile = pathStr # 注册所有的驱动 ogr.RegisterAll() # 打开数据 ds = ogr.Open(strVectorFile, 0) # 获取该数据源中的图层个数,一般shp数据图层只有一个,如果是mdb、dxf等图层就会有多个 iLayerCount = ds.GetLayerCount() # 获取第一个图层 oLayer = ds.GetLayerByIndex(0) # 对图层进行初始化 oLayer.ResetReading() # 获取图层中的属性表表头并输出,可以定义建表语句 print("属性表结构信息:") oDefn = oLayer.GetLayerDefn() iFieldCount = oDefn.GetFieldCount() for iAttr in range(iFieldCount): oField = oDefn.GetFieldDefn(iAttr) print("%s: %s(%d.%d)" % ( \ \ oField.GetNameRef(), \ \ oField.GetFieldTypeName(oField.GetType()), \ \ oField.GetWidth(), \ \ oField.GetPrecision())) # 输出图层中的要素个数 print("要素个数 = ", oLayer.GetFeatureCount(0)) oFeature = oLayer.GetNextFeature() # 下面开始遍历图层中的要素,将对象都作为string输出 while oFeature is not None: # 获取要素中的属性表内容 lineStr=[] for iField in range(iFieldCount): lineStr.append(oFeature.GetFieldAsString(iField)) # 获取要素中的几何体 oGeometry = oFeature.GetGeometryRef() lineStr.append(str(oGeometry)) # print(lineStr) result.append(lineStr) # 循环 oFeature = oLayer.GetNextFeature() print("数据集关闭!") return result if __name__ == '__main__': result=ReadVectorFile(r'D:\gismap\data\v6_time_cnty_pts_utf_wgs84\v6_time_cnty_pts_utf_wgs84.shp') f_new=open(r'D:\gismap\data\v6_time_cnty_pts_utf_wgs84\v6_time_cnty_pts_utf_wgs84.txt','a',encoding='utf-8') for r in result: for p in r: f_new.write(p+'\t') f_new.write('\n') f_new.close()
好像这里没有能用的到shapely的方法,shapely是相对轻量级的gis库包,可以用来构建rtree和进行点面判断等。
五、Pg库建表语句
直接将之前shp生成的txt文件作为源,写入数据库。
既然要写入,至少要进行两个步骤的操作。
其一是建表。
其二是插入。
先写v6_time_cnty_pts_utf_wgs84.shp这个的建表语句,建表的时候最好加上字段说明,否则时间长了,都不知道这个表是干什么的了。
建表语句如下,顺便见了个索引。
--建表
CREATE TABLE public. v6_time_cnty_pts_utf_wgs84(
gid SERIAL8 PRIMARY KEY NOT NULL,
name_py varchar(40),
name_ch varchar(45),
name_ft varchar(45),
x_coor float8,
y_coor float8,
pres_loc varchar(60),
type_py varchar(15),
type_ch varchar(15),
lev_rank varchar(1),
beg_yr int8,
beg_rule varchar(1),
end_yr int8,
end_rule varchar(1),
note_id int8,
obj_type varchar(7),
sys_id int8,
geo_src varchar(10),
compiler varchar(12),
gecomplr varchar(10),
checker varchar(10),
ent_date varchar(10),
beg_chg_ty varchar(21),
end_chg_ty varchar(30),
geom geometry
);
--建立索引
CREATE INDEX v6_time_cnty_pts_utf_wgs84_index ON v6_time_cnty_pts_utf_wgs84 USING btree(gid);
--表说明
COMMENT ON TABLE public.v6_time_cnty_pts_utf_wgs84 IS '第6版中国历史地理时间序列点数据';
COMMENT ON COLUMN public.v6_time_cnty_pts_utf_wgs84.gid IS '主键ID';
COMMENT ON COLUMN public.v6_time_cnty_pts_utf_wgs84.name_py IS '拼音名称';
COMMENT ON COLUMN public.v6_time_cnty_pts_utf_wgs84.name_ch IS '简体中文名称';
COMMENT ON COLUMN public.v6_time_cnty_pts_utf_wgs84.name_ft IS '繁体中文名称';
COMMENT ON COLUMN public.v6_time_cnty_pts_utf_wgs84.x_coor IS '经度';
COMMENT ON COLUMN public.v6_time_cnty_pts_utf_wgs84.y_coor IS '纬度';
COMMENT ON COLUMN public.v6_time_cnty_pts_utf_wgs84.pres_loc IS '现所在地';
COMMENT ON COLUMN public.v6_time_cnty_pts_utf_wgs84.type_py IS '建制类型拼音';
COMMENT ON COLUMN public.v6_time_cnty_pts_utf_wgs84.type_ch IS '建制类型简体中文';
COMMENT ON COLUMN public.v6_time_cnty_pts_utf_wgs84.lev_rank IS '建制等级';
COMMENT ON COLUMN public.v6_time_cnty_pts_utf_wgs84.beg_yr IS '建制开始时间';
COMMENT ON COLUMN public.v6_time_cnty_pts_utf_wgs84.beg_rule IS '开始时间精度';
COMMENT ON COLUMN public.v6_time_cnty_pts_utf_wgs84.end_yr IS '建制结束时间';
COMMENT ON COLUMN public.v6_time_cnty_pts_utf_wgs84.end_rule IS '结束时间精度';
COMMENT ON COLUMN public.v6_time_cnty_pts_utf_wgs84.note_id IS '系统id';
COMMENT ON COLUMN public.v6_time_cnty_pts_utf_wgs84.obj_type IS 'geometry对象类型';
COMMENT ON COLUMN public.v6_time_cnty_pts_utf_wgs84.sys_id IS '系统id';
COMMENT ON COLUMN public.v6_time_cnty_pts_utf_wgs84.geo_src IS 'geometry数据来源';
COMMENT ON COLUMN public.v6_time_cnty_pts_utf_wgs84.compiler IS '编辑人员';
COMMENT ON COLUMN public.v6_time_cnty_pts_utf_wgs84.gecomplr IS '绘制人员';
COMMENT ON COLUMN public.v6_time_cnty_pts_utf_wgs84.checker IS '审核人员';
COMMENT ON COLUMN public.v6_time_cnty_pts_utf_wgs84.ent_date IS '结束时间';
COMMENT ON COLUMN public.v6_time_cnty_pts_utf_wgs84.beg_chg_ty IS '建制开始原因';
COMMENT ON COLUMN public.v6_time_cnty_pts_utf_wgs84.end_chg_ty IS '建制结束原因';
COMMENT ON COLUMN public.v6_time_cnty_pts_utf_wgs84.geom IS 'geometry对象';
六、Insert语句
插一条数据进去。
INSERT INTO v6_time_cnty_pts_utf_wgs84(name_py,name_ch,name_ft,x_coor,y_coor,pres_loc,type_py,type_ch,lev_rank,beg_yr,beg_rule,end_yr,end_rule,note_id,obj_type,sys_id,geo_src,compiler,gecomplr,checker,ent_date,beg_chg_ty,end_chg_ty,geom) VALUES('Luowubu','罗婺部','羅婺部',102.40378,25.88668,'云南省禄劝彝族苗族自治县西北七十二里云龙','Bu','部','6',960,null,1253,null,80317,' POINT',80317,'FROM_FD',null,null,null,null,'新建','撤销',st_geomfromtext('POINT(102.4037799950270653 25.88667999033716072)',4326))
七、Python写入pg库
先测试一下,能不能连接成功
# coding=gbk import psycopg2 conn = psycopg2.connect(database="postgres", user="postgres", password="123456", host="127.0.0.1", port="5432") print('连接成功')
打印“连接成功就说明连接成功了。
先执行个建表语句。
# coding=gbk
import psycopg2
# 连接数据库
conn = psycopg2.connect(database="postgres", user="postgres", password="123456", host="127.0.0.1", port="5432")
print('连接成功')
# 建立游标
cur = conn.cursor()
cur.execute('''--建表
CREATE TABLE public. v6_time_cnty_pts_utf_wgs84(
gid SERIAL8 PRIMARY KEY NOT NULL,
name_py varchar(40),
name_ch varchar(45),
name_ft varchar(45),
x_coor float8,
y_coor float8,
pres_loc varchar(60),
type_py varchar(15),
type_ch varchar(15),
lev_rank varchar(1),
beg_yr int8,
beg_rule varchar(1),
end_yr int8,
end_rule varchar(1),
note_id int8,
obj_type varchar(7),
sys_id int8,
geo_src varchar(10),
compiler varchar(12),
gecomplr varchar(10),
checker varchar(10),
ent_date varchar(10),
beg_chg_ty varchar(21),
end_chg_ty varchar(30),
geom geometry
);
--建立索引
CREATE INDEX v6_time_cnty_pts_utf_wgs84_index ON v6_time_cnty_pts_utf_wgs84 USING btree(gid);
--表说明
COMMENT ON TABLE public.v6_time_cnty_pts_utf_wgs84 IS '第6版中国历史地理时间序列点数据';
COMMENT ON COLUMN public.v6_time_cnty_pts_utf_wgs84.gid IS '主键ID';
COMMENT ON COLUMN public.v6_time_cnty_pts_utf_wgs84.name_py IS '拼音名称';
COMMENT ON COLUMN public.v6_time_cnty_pts_utf_wgs84.name_ch IS '简体中文名称';
COMMENT ON COLUMN public.v6_time_cnty_pts_utf_wgs84.name_ft IS '繁体中文名称';
COMMENT ON COLUMN public.v6_time_cnty_pts_utf_wgs84.x_coor IS '经度';
COMMENT ON COLUMN public.v6_time_cnty_pts_utf_wgs84.y_coor IS '纬度';
COMMENT ON COLUMN public.v6_time_cnty_pts_utf_wgs84.pres_loc IS '现所在地';
COMMENT ON COLUMN public.v6_time_cnty_pts_utf_wgs84.type_py IS '建制类型拼音';
COMMENT ON COLUMN public.v6_time_cnty_pts_utf_wgs84.type_ch IS '建制类型简体中文';
COMMENT ON COLUMN public.v6_time_cnty_pts_utf_wgs84.lev_rank IS '建制等级';
COMMENT ON COLUMN public.v6_time_cnty_pts_utf_wgs84.beg_yr IS '建制开始时间';
COMMENT ON COLUMN public.v6_time_cnty_pts_utf_wgs84.beg_rule IS '开始时间精度';
COMMENT ON COLUMN public.v6_time_cnty_pts_utf_wgs84.end_yr IS '建制结束时间';
COMMENT ON COLUMN public.v6_time_cnty_pts_utf_wgs84.end_rule IS '结束时间精度';
COMMENT ON COLUMN public.v6_time_cnty_pts_utf_wgs84.note_id IS '系统id';
COMMENT ON COLUMN public.v6_time_cnty_pts_utf_wgs84.obj_type IS 'geometry对象类型';
COMMENT ON COLUMN public.v6_time_cnty_pts_utf_wgs84.sys_id IS '系统id';
COMMENT ON COLUMN public.v6_time_cnty_pts_utf_wgs84.geo_src IS 'geometry数据来源';
COMMENT ON COLUMN public.v6_time_cnty_pts_utf_wgs84.compiler IS '编辑人员';
COMMENT ON COLUMN public.v6_time_cnty_pts_utf_wgs84.gecomplr IS '绘制人员';
COMMENT ON COLUMN public.v6_time_cnty_pts_utf_wgs84.checker IS '审核人员';
COMMENT ON COLUMN public.v6_time_cnty_pts_utf_wgs84.ent_date IS '结束时间';
COMMENT ON COLUMN public.v6_time_cnty_pts_utf_wgs84.beg_chg_ty IS '建制开始原因';
COMMENT ON COLUMN public.v6_time_cnty_pts_utf_wgs84.end_chg_ty IS '建制结束原因';
COMMENT ON COLUMN public.v6_time_cnty_pts_utf_wgs84.geom IS 'geometry对象';''')
建表完成之后,用之前生成的txt文件写库。
# coding=gbk import psycopg2 # 连接数据库 conn = psycopg2.connect(database="postgres", user="postgres", password="123456", host="127.0.0.1", port="5432") print('连接成功') # 建立游标 cur = conn.cursor() f=open(r'D:\gismap\data\v6_time_cnty_pts_utf_wgs84\v6_time_cnty_pts_utf_wgs84.txt','r',encoding='utf-8') # 按行读入txt flines=f.readlines() for line in flines: # 去掉干扰,切词 abbrlist=line.replace("'"," ").split('\t') # name_py,name_ch,name_ft,x_coor,y_coor,pres_loc,type_py,type_ch,lev_rank,beg_yr,beg_rule,end_yr,end_rule,note_id,obj_type,sys_id,geo_src,compiler,gecomplr,checker,ent_date,beg_chg_ty,end_chg_ty,geom name_py='null' if (abbrlist[0]!=''): name_py="'"+abbrlist[0]+"'" name_ch='null' if (abbrlist[1]!=''): name_ch="'"+abbrlist[1]+"'" name_ft='null' if (abbrlist[2]!=''): name_ft="'"+abbrlist[2]+"'" x_coor='null' if (abbrlist[3]!=''): x_coor=abbrlist[3] y_coor='null' if (abbrlist[4]!=''): y_coor=abbrlist[4] pres_loc='null' if (abbrlist[5]!=''): pres_loc="'"+abbrlist[5]+"'" type_py='null' if (abbrlist[6]!=''): type_py="'"+abbrlist[6]+"'" type_ch='null' if (abbrlist[7]!=''): type_ch="'"+abbrlist[7]+"'" lev_rank='null' if (abbrlist[8]!=''): lev_rank="'"+abbrlist[8]+"'" beg_yr='null' if (abbrlist[9]!=''): beg_yr=abbrlist[9] beg_rule='null' if (abbrlist[10]!=''): beg_rule="'"+abbrlist[10]+"'" end_yr='null' if (abbrlist[11]!=''): end_yr=abbrlist[11] end_rule='null' if (abbrlist[12]!=''): end_rule="'"+abbrlist[12]+"'" note_id='null' if (abbrlist[13]!=''): note_id=abbrlist[13] obj_type='null' if (abbrlist[14]!=''): obj_type="'"+abbrlist[14]+"'" sys_id='null' if (abbrlist[15]!=''): sys_id=abbrlist[15] geo_src='null' if (abbrlist[16]!=''): geo_src="'"+abbrlist[16]+"'" compiler='null' if (abbrlist[17]!=''): compiler="'"+abbrlist[17]+"'" gecomplr='null' if (abbrlist[18]!=''): gecomplr="'"+abbrlist[18]+"'" checker='null' if (abbrlist[19]!=''): checker="'"+abbrlist[19]+"'" ent_date='null' if (abbrlist[20]!=''): ent_date="'"+abbrlist[20]+"'" beg_chg_ty='null' if (abbrlist[21]!=''): beg_chg_ty="'"+abbrlist[21]+"'" end_chg_ty='null' if (abbrlist[22]!=''): end_chg_ty="'"+abbrlist[22]+"'" geom='null' if (abbrlist[23]!=''): geom="st_geomfromtext('"+abbrlist[23]+"',4326)" # 拼接sql语句 sqltxt="INSERT INTO v6_time_cnty_pts_utf_wgs84(" \ "name_py,name_ch,name_ft,x_coor,y_coor,pres_loc,type_py,type_ch,lev_rank,beg_yr,beg_rule," \ "end_yr,end_rule,note_id,obj_type,sys_id,geo_src,compiler,gecomplr,checker,ent_date," \ "beg_chg_ty,end_chg_ty,geom) VALUES("+name_py+","+name_ch+","+name_ft+","+x_coor+","+y_coor+","\ +pres_loc+","+type_py+","+type_ch+","+lev_rank+","+beg_yr+","+beg_rule+","+end_yr+","+end_rule+","\ +note_id+","+obj_type+","+sys_id+","+geo_src+","+compiler+","+gecomplr+","+checker+","+ent_date+","\ +beg_chg_ty+","+end_chg_ty+","+geom+")" print(sqltxt) # 执行sql cur.execute(sqltxt) # 关闭连接 conn.commit() conn.close() print('插入完成')
另外的文件就按照这个流程来就可以,polygon格式有点不同,详见下一篇。
要是觉得写脚本麻烦,就直接用postgis导入也可以。