Commit d6c7b891 authored by xiaxiaoyun's avatar xiaxiaoyun

Initial commit

parents
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
*.csv
# images
*.tif
*.tiff
*.TIFF
*.Tiff
*.img
*.IMG
*.bmp
*.png
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
# Translations
*.mo
*.pot
# Django stuff:
*.log
.static_storage/
.media/
local_settings.py
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
# PyCharm
.idea/*
# Test Files
test
src/viirs
\ No newline at end of file
FROM harbor.gagogroup.cn/mill/centos7-python:latest
ENV LANG C.UTF-8
MAINTAINER xiaxiaoyun@gagogroup.cn
# create directories
WORKDIR /opt/
RUN mkdir log/ project/ project/bin project/deploy/ project/src/
# requirements
ADD ./requirements.txt project/
WORKDIR project/
# install python dependencies
ENV PATH=/root/miniconda3/bin:/opt/project/bin:$PATH
ENV LD_LIBRARY_PATH=/root/miniconda3/lib:$LD_LIBRARY_PATH
RUN pip install --upgrade pip && pip install -r requirements.txt
WORKDIR /opt/
# copy bin and deploy files
ADD ./bin project/bin/
ADD ./deploy project/deploy/
# copy source files
ADD ./src project/
# set working directory
WORKDIR /opt/project/
# 佳格夜光日频影像下载
## 项目说明
> 每天定时NOAA日频夜光影像数据,原始文件上传oss。
## 项目架构
> celery定时任务
## 部署
> 本项目公司采用宁夏aliyun和oss进行生产部署。
采用docker-compose部署,yaml文件在deploy文件夹内
进入到项目工程路径
1. docker镜像打包:
sh bin/build_docker_image.sh
2. 部署
cd bin
sh start_compose.sh
## 运行
celery自动生产
\ No newline at end of file
#!/usr/bin/env bash
read -p "Current VERSION is ? (1.0.0):" VERSION
# build image from docker file
docker build -t harbor.gagogroup.cn/yaogan/night_light_daily_produce_daily_download:${VERSION} -t harbor.gagogroup.cn/yaogan/night_light_daily_produce_daily_download:latest .
# push them
docker push harbor.gagogroup.cn/yaogan/night_light_daily_produce_daily_download:${VERSION}
docker push harbor.gagogroup.cn/yaogan/night_light_daily_produce_daily_download:latest
# delete before images which tag is None
docker images|grep none|awk '{print $3}'|xargs docker rmi
\ No newline at end of file
File added
#!/usr/bin/env bash
# 设置python版本为python3
export PATH="/root/miniconda3/bin:$PATH"
# 执行supervisord
/usr/bin/supervisord -c /opt/project/deploy/supervisord.conf
\ No newline at end of file
#!/usr/bin/env bash
docker rm -f night_light_daily_produce_daily_download
docker rmi harbor.gagogroup.cn/yaogan/night_light_daily_produce_daily_download
docker-compose -f ../deploy/compose.yaml up -d
\ No newline at end of file
version: "3"
services:
night_light_daily_produce_daily_download:
image: harbor.gagogroup.cn/yaogan/night_light_daily_produce_daily_download:1.0.0
container_name: night_light_daily_produce_daily_download
volumes:
- "/var/log/npp_daily_produce_tiler:/opt/log"
- "/mysqldata:/mnt/resources"
- "/mnt/night_light_daily_produce_daily_download:/mnt/resources"
environment:
LC_ALL: en_US.utf-8
LANG: en_US.utf-8
command: /bin/sh /opt/project/bin/start.sh
environment:
- RABBITMQ_BROKER=amqp://test:test@172.31.24.224
- OS_STORAGE_SERVICE=oss
- oss_ACCESS_ID=LTAI4G5utRueY8A1iUrixWFg
- oss_ACCESS_KEY=BTm4iNYdkGrflmRzm5Qt6818LC8xTU
- oss_STORAGE_REGION=oss-cn-zhangjiakou.aliyuncs.com
- SRC_BUCKET_NAME=gago-data-test
- SRC_NL_DAILY_PREFIX=data-source/satellite-data/npp_nightlight/daily
- WORKING_DIR=/mnt/resources/night_light_daily_produce_daily_download
- DEBUG_MODE=False
restart: always
privileged: true
rabbitmq:
image: rabbitmq:3-management
container_name: night_light_daily_produce_daily_download_rabbitmq
environment:
- RABBITMQ_DEFAULT_USER=test
- RABBITMQ_DEFAULT_PASS=test
ports:
- "4370:4369"
- "5670:5671"
- "5673:5672"
- "15672:15671"
- "15673:15672"
- "25673:25672"
restart: always
privileged: true
\ No newline at end of file
[unix_http_server]
file=/var/run/supervisor.sock ; (the path to the socket file)
chmod=0700 ; socket file mode (default 0700)
[supervisord]
logfile=/opt/log/supervisord.master.log ; (main log file;default $CWD/supervisord.log)
logfile_maxbytes=50MB ; (max main logfile bytes b4 rotation;default 50MB)
logfile_backups=10 ; (num of main logfile rotation backups;default 10)
loglevel=info ; (log level;default info; others: debug,warn,trace)
pidfile=/var/run/supervisord.pid ; (supervisord pidfile;default supervisord.pid)
nodaemon=true ; (start in foreground if true;default false)
minfds=1024 ; (min. avail startup file descriptors;default 1024)
minprocs=200 ; (min. avail process descriptors;default 200)
[rpcinterface:supervisor]
supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface
[supervisorctl]
serverurl=unix:///var/run/supervisor.sock ; use a unix:// URL for a unix socket
[program:night_light_daily_produce_daily_download]
directory=/opt/project
command=celery -A tasks worker -B -l info --autoscale=5,3
autostart = true
autorestart = true
stderr_logfile = /opt/log/night_light_daily_produce_daily_download.stderr.log
stdout_logfile = /opt/log/night_light_daily_produce_daily_download.stdout.log
boto3==1.9.25
botocore==1.12.25
gagoos
requests
arrow
celery
fire
mercantile
geojson
geodex
pygeotile
# pyproj
bs4
\ No newline at end of file
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time : 2019/4/17 6:55 PM
# @Author : Dutt
# @Email : dutengteng1@163.com
# @File : __init__.py.py
\ No newline at end of file
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
# IDE: PyCharm 2017.2.4
# Author: dutt<dutengteng@gagogroup.com>
# Created on 2020-06-10
from os import getenv
from celery import Celery
from celery.beat import crontab
RABBITMQ_BROKER = getenv('RABBITMQ_BROKER', 'amqp://test:test@localhost')
celery_app = Celery(
'night_light_daily_produce_daily_download',
broker=RABBITMQ_BROKER,
include=['tasks']
)
class Config:
task_serializer = 'json'
enable_utc = True
task_track_started = True
# always reconnect
broker_connection_max_retries = 0
# fetch only one task once
worker_prefetch_multiplier = 1
# worker should send task events so we can track the task state
worker_send_task_events = 1
task_reject_on_worker_lost = True
# these two configurations ensure the task
# to be done even if the worker is down
task_acks_late = True
# ensure no memory leaks
worker_max_tasks_per_child = 10
celery_app.conf.update(
task_serializer='json',
enable_utc=True,
task_track_started=True,
# always reconnect
broker_connection_max_retries=0,
# fetch only one task once
worker_prefetch_multiplier=1,
# worker should send task events
# so we can track the task state
worker_send_task_events=1,
task_reject_on_worker_lost=True,
# ensure the task to be done
# even if the worker is down
task_acks_late=True,
# ensure no memory leaks
worker_max_tasks_per_child=10,
beat_schedule={
'npp_daily_produce_task':{
'task': 'npp_daily_produce_task',
'schedule': crontab('43', '5', '*', '*', '*'),
'args':()
}
}
)
celery_app.config_from_object(Config)
retry_policy = {
'max_retries': None,
'interval_start': 0,
'interval_step': 1,
'interval_max': 5,
}
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
# IDE: PyCharm 2017.2.4
# Author: dutt<dutengteng@gagogroup.com>
# Created on 2020-06-10
import os
# local working directory
WORKING_DIR = os.getenv(
'WORKING_DIR',
'/mnt/resources'
)
STORAGE_CONFIG: dict = {
'service': os.getenv(
'OS_STORAGE_SERVICE',
'oss'
),
'access_key': os.getenv(
'oss_ACCESS_ID',
'LTAI4G5utRueY8A1iUrixWFg'
),
'secret_key': os.getenv(
'oss_ACCESS_KEY',
'BTm4iNYdkGrflmRzm5Qt6818LC8xTU'
),
'region': os.getenv(
'oss_STORAGE_REGION',
"oss-cn-zhangjiakou.aliyuncs.com"
)
}
SRC_NL_DAILY_PREFIX: str = os.getenv(
"SRC_NL_DAILY_PREFIX",
"data-source/satellite-data/npp_nightlight/daily"
)
SRC_BUCKET_NAME: str = os.getenv(
'SRC_BUCKET_NAME',
'gago-data-test'
)
TEST_SRC_FILE: bool = os.getenv(
'TEST_SRC_FILE',
'False'
) == str(True)
<
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2020/7/27 10:18 上午
# @Author : Xiaoyun Xia
# @Site :
# @File : download_image_and_filedate_localproduce.py
# @Software: PyCharm
import os
import gdal
import requests
import datetime
import sqlite3
import logging
import uuid
from bs4 import BeautifulSoup
from osgeo import gdal
from gago.common.wget_download import download_file_with_wget
logging.basicConfig(level = logging.INFO,format =
'%(asctime)s - %(name)s - %(levelname)s - %(message)s')
def create_table(db_dir: str,
filename: str,
filedate: str
):
"""
新建数据库,插入需下载、更新的时间和文件名
:param filename:
:param filedate:
:return:
"""
print(db_dir)
conn = sqlite3.connect(db_dir)
try:
create_tb_cmd = '''
CREATE TABLE IF NOT EXISTS USER
(FileName TEXT(100) PRIMARY KEY NOT NULL,
DowloadFirstDate TEXT NOT NULL,
SearchFileDate TEXT NOT NULL
);
'''
conn.execute(create_tb_cmd)
except Exception as e:
print(e)
print("Create table failed...")
return False
try:
insert_dt_cmd = "INSERT OR IGNORE INTO USER(FileName, DowloadFirstDate, SearchFileDate) " \
"VALUES('{0}', '{1}', '{2}')".format(filename, filedate,
filedate)
conn.execute(insert_dt_cmd)
except:
print("Insert table failed...")
finally:
updata_dt_cmd = "UPDATE USER set SearchFileDate = '{1}' where FileName='{0}'".format(
filename, filedate)
conn.execute(updata_dt_cmd)
cur = conn.cursor()
cur.execute("SELECT * FROM USER")
print(cur.fetchall())
conn.commit()
conn.close()
def search_date(db_dir: str,
filename: str,
):
"""
查询更新的时间
:param db_dir:
:param filename:
:return:
"""
conn = sqlite3.connect(db_dir)
cur = conn.cursor()
# global search_file_date
try:
search_dt_cmd = "SELECT SearchFileDate FROM USER WHERE FileName='{0}'".format(
filename)
cur.execute(search_dt_cmd)
search_file_date = cur.fetchone()
if search_file_date == None:
search_file_date = "no one"
else:
search_file_date = search_file_date[0]
except Exception as e:
print(e)
finally:
conn.commit()
conn.close()
return search_file_date
class NppNightLightDataDownloader:
def __init__(self,
from_date :datetime,
to_date: datetime,
tilenames: list or None):
"""
NPP日频夜光影像下载器。用于下载指定日期范围内,指定分带的日频夜光影像。
:param from_date: 起始日期
:param to_date: 终止日期
:param tilenames: 要下载的分带列表。如中国区分带为["75N060E"]
"""
all_tilenames = ["75N060E", "75N060W", "75N180W",
"00N060W", "00N060E", "00N180W"]
self.all_tilenames = all_tilenames
self._logger = logging.getLogger(__name__)
self._logger.info("NPP月频夜光影像下载...")
if to_date < from_date:
self._logger.error("The end date is earlier than the start date! ")
return
if len(tilenames) == 0:
self._logger.error("The tilename list is empty!")
return
for tn in tilenames:
if tn not in all_tilenames:
self._logger.error("The input tilename: %s list is wrong!" % tn)
return
self.start = from_date
self.end = to_date
self.tilenames = tilenames
def get_urls(self):
"""
获取待下载的影像下载地址列表.
:return: 下载地址列表
"""
s_date = self.start
e_date = self.end
tilenames = self.tilenames
urls = []
db_file = ('/mnt/resources/night_Light_daily_dowload.db')
self._logger.info('获取所有数据的url......')
while (e_date - s_date).days >= 0:
for tilename in tilenames:
file_url = ("https://data.ngdc.noaa.gov/instruments/remote-"
"sensing/passive/spectrometers-radiometers/imaging/"
"viirs/mosaics//"
+ str(s_date.strftime("%Y%m%d"))
+"/")
session = requests.session()
res_staus_code = session.get(file_url).status_code
res_text = session.get(file_url).text
if res_staus_code == 200:
# print(file_url)
soup = BeautifulSoup(res_text, 'html.parser')
for i in range(2, 21):
file_text = soup.find('table').find_all('tr')[i].find_all("td")
file_name = str(file_text[0].text)
if (tilename in file_name) and ("rade9.tif" in file_name):
file_date = str(file_text[1].text)
print(file_name)
if not os.path.exists(db_file):
create_table(db_file, file_name, file_date)
url = file_url + file_name
urls.append(url)
else:
search_file_date = search_date(db_file,file_name)
print(search_file_date)
if search_file_date != file_date:
url = file_url + file_name
urls.append(url)
create_table(db_file,file_name,file_date)
s_date = s_date + datetime.timedelta(days=1)
return urls
def save_data(self, location: str, urls_todo=None or list):
"""
保存影像到本地。
:param location: 保存路径
:param urls_todo: 指定需要下载的下载地址。如果None则下载日期范围内的所有数据。
:return:
"""
if urls_todo is None:
urls = self.get_urls()
elif isinstance(urls_todo, list):
urls = urls_todo
else:
self._logger.error("The type of urls_todo must be None or list")
return