Completely reworked configuration

This commit is contained in:
SoniEx2 2019-06-19 21:49:27 -03:00
parent 65db4df76c
commit 752f4d36ad
2 changed files with 226 additions and 186 deletions

View File

@ -24,31 +24,37 @@ import hashlib
import hmac
import jinja2
import re
import qtoml
from collections import defaultdict
from urllib.parse import urlparse
MIGRATIONS = {
"gen-index": (
"toml-config": (
(
"""ALTER TABLE "config" ADD COLUMN "title" TEXT"""),
(),
"supports generating an index page"
'''UPDATE "repo_history" SET "project" = (SELECT "git_commit" FROM "config") WHERE "project" IS NULL''',
'''ALTER TABLE "repos" RENAME TO "repos_old"''',),
(
'''UPDATE "repo_history" SET "project" = NULL WHERE "project" = (SELECT "git_commit" FROM "config")''',
'''ALTER TABLE "repos_old" RENAME TO "repos"''',),
"switches to toml config format. the old 'repos' table is preserved as 'repos_old'"
),
"better-project-management": (
(
"""ALTER TABLE "repos" ADD COLUMN "branch" TEXT""",
"""ALTER TABLE "repos" ADD COLUMN "project" TEXT""",
"""CREATE UNIQUE INDEX "repos_url_branch_project" ON "repos" ("url", "branch", "project")""",
"""CREATE INDEX "repos_project" ON "repos" ("project")""",
"""ALTER TABLE "repo_history" ADD COLUMN "branch" TEXT""",
"""ALTER TABLE "repo_history" ADD COLUMN "project" TEXT""",
"""CREATE INDEX "repo_history_url_branch_project" ON "repo_history" ("url", "branch", "project")"""),
'''ALTER TABLE "repos" ADD COLUMN "branch" TEXT''',
'''ALTER TABLE "repos" ADD COLUMN "project" TEXT''',
'''CREATE UNIQUE INDEX "repos_url_branch_project" ON "repos" ("url", "branch", "project")''',
'''CREATE INDEX "repos_project" ON "repos" ("project")''',
'''ALTER TABLE "repo_history" ADD COLUMN "branch" TEXT''',
'''ALTER TABLE "repo_history" ADD COLUMN "project" TEXT''',
'''CREATE INDEX "repo_history_url_branch_project" ON "repo_history" ("url", "branch", "project")''',),
(
"""DELETE FROM "repos" WHERE "branch" IS NOT NULL OR "project" IS NOT NULL""",
"""DELETE FROM "repo_history" WHERE "branch" IS NOT NULL OR "project" IS NOT NULL"""),
'''DELETE FROM "repos" WHERE "branch" IS NOT NULL OR "project" IS NOT NULL''',
'''DELETE FROM "repo_history" WHERE "branch" IS NOT NULL OR "project" IS NOT NULL''',),
"supports multiple projects, and allows choosing non-default branches"
),
"test": (
("""-- apply""",),
("""-- revert""",),
('''-- apply''',),
('''-- revert''',),
"does nothing"
)
}
@ -120,6 +126,17 @@ def get_template_loader():
</p>
</body>
</html>
""",
## index.toml
'index.toml': """# Generated by GAnarchy
{%- for project, repos in config.projects.items() %}
[projects.{{project}}]
{%- for repo_url, branches in repos.items() %}{% for branch, options in branches.items() %}{% if options.active %}
"{{repo_url|tomle}}".{% if branch %}"{{branch|tomle}}"{% else %}HEAD{% endif %} = { active=true }
{%- endif %}{% endfor %}
{%- endfor %}
{% endfor -%}
""",
## project.html FIXME
'project.html': """<!DOCTYPE html>
@ -169,6 +186,24 @@ def get_template_loader():
})
])
tomletrans = str.maketrans({
0: '\\u0000', 1: '\\u0001', 2: '\\u0002', 3: '\\u0003', 4: '\\u0004',
5: '\\u0005', 6: '\\u0006', 7: '\\u0007', 8: '\\b', 9: '\\t', 10: '\\n',
11: '\\u000B', 12: '\\f', 13: '\\r', 14: '\\u000E', 15: '\\u000F',
16: '\\u0010', 17: '\\u0011', 18: '\\u0012', 19: '\\u0013', 20: '\\u0014',
21: '\\u0015', 22: '\\u0016', 23: '\\u0017', 24: '\\u0018', 25: '\\u0019',
26: '\\u001A', 27: '\\u001B', 28: '\\u001C', 29: '\\u001D', 30: '\\u001E',
31: '\\u001F', '"': '\\"', '\\': '\\\\'
})
def tomlescape(value):
return value.translate(tomletrans)
def get_env():
env = jinja2.Environment(loader=get_template_loader(), autoescape=False)
env.filters['tomlescape'] = tomlescape
env.filters['tomle'] = env.filters['tomlescape']
return env
@click.group()
def ganarchy():
@ -180,118 +215,8 @@ def initdb():
os.makedirs(data_home, exist_ok=True)
conn = sqlite3.connect(data_home + "/ganarchy.db")
c = conn.cursor()
c.execute('''CREATE TABLE "repos" ("url" TEXT PRIMARY KEY, "active" INT, "branch" TEXT, "project" TEXT)''')
c.execute('''CREATE UNIQUE INDEX "repos_url_branch_project" ON "repos" ("url", "branch", "project")''')
c.execute('''CREATE INDEX "repos_project" ON "repos" ("project")''')
c.execute('''CREATE INDEX "repos_active" ON "repos" ("active")''')
c.execute('''CREATE TABLE "repo_history" ("entry" INTEGER PRIMARY KEY ASC AUTOINCREMENT, "url" TEXT, "count" INTEGER, "head_commit" TEXT, "branch" TEXT, "project" TEXT)''')
c.execute('''CREATE INDEX "repo_history_url_branch_project" ON "repo_history" ("url", "branch", "project")''')
c.execute('''CREATE TABLE "config" ("git_commit" TEXT, "base_url" TEXT, "title" TEXT)''')
c.execute('''INSERT INTO "config" VALUES ('', '', '')''')
conn.commit()
conn.close()
@ganarchy.command()
@click.argument('commit')
def set_commit(commit):
"""Sets the commit that represents the project."""
if not re.fullmatch("[a-fA-F0-9]{40}", commit):
raise click.BadArgumentUsage("COMMIT must be a git commit hash")
conn = sqlite3.connect(data_home + "/ganarchy.db")
c = conn.cursor()
c.execute('''UPDATE "config" SET "git_commit"=?''', (commit,))
conn.commit()
conn.close()
@ganarchy.command()
@click.argument('base-url')
def set_base_url(base_url):
"""Sets the GAnarchy instance's base URL. Used for the URI handler."""
conn = sqlite3.connect(data_home + "/ganarchy.db")
c = conn.cursor()
c.execute('''UPDATE "config" SET "base_url"=?''', (base_url,))
conn.commit()
conn.close()
@ganarchy.command()
@click.argument('title')
def set_title(title):
"""Sets the GAnarchy instance's title. This title is displayed on the index."""
conn = sqlite3.connect(data_home + "/ganarchy.db")
c = conn.cursor()
c.execute('''UPDATE "config" SET "title"=?''', (title,))
conn.commit()
conn.close()
# TODO move --branch into here?
@ganarchy.group()
def repo():
"""Modifies repos to track."""
@repo.command()
@click.option('--branch', default=None, help="Sets the branch to be used for the repo")
@click.option('--project', default=None, help="Sets the project commit to be used for the repo")
@click.option('--disabled', default=False, is_flag=True, help="Mark the repo as disabled")
@click.argument('url')
def add(branch, project, disabled, url):
"""Adds a repo to track."""
conn = sqlite3.connect(data_home + "/ganarchy.db")
c = conn.cursor()
c.execute('''SELECT "git_commit", "base_url" FROM "config"''')
(project_commit, base_url) = c.fetchone()
if project_commit == project:
project = None
c.execute('''INSERT INTO "repos" ("url", "active", "branch", "project") VALUES (?, ?, ?, ?)''', (url, int(not disabled), branch, project))
conn.commit()
conn.close()
@repo.command()
@click.option('--branch', default=None, help="Sets the branch to be used for the repo")
@click.option('--project', default=None, help="Sets the project commit to be used for the repo")
@click.argument('url')
def enable(branch, project, url):
"""Enables tracking of a repo."""
conn = sqlite3.connect(data_home + "/ganarchy.db")
c = conn.cursor()
c.execute('''SELECT "git_commit", "base_url" FROM "config"''')
(project_commit, base_url) = c.fetchone()
if project_commit == project:
project = None
c.execute('''UPDATE "repos" SET "active"=1 WHERE "url"=? AND "branch" IS ? AND "project" IS ?''', (url, branch, project))
conn.commit()
conn.close()
@repo.command()
@click.option('--branch', default=None, help="Sets the branch to be used for the repo")
@click.option('--project', default=None, help="Sets the project commit to be used for the repo")
@click.argument('url')
def disable(branch, project, url):
"""Disables tracking of a repo."""
conn = sqlite3.connect(data_home + "/ganarchy.db")
c = conn.cursor()
c.execute('''SELECT "git_commit", "base_url" FROM "config"''')
(project_commit, base_url) = c.fetchone()
if project_commit == project:
project = None
c.execute('''UPDATE repos SET "active"=0 WHERE "url"=? AND "branch" IS ? AND "project" IS ?''', (url, branch, project))
conn.commit()
conn.close()
@repo.command()
@click.option('--branch', default=None, help="Sets the branch to be used for the repo")
@click.option('--project', default=None, help="Sets the project commit to be used for the repo")
@click.argument('url')
def remove(branch, project, url):
"""Stops tracking a repo."""
click.confirm("WARNING: This operation does not delete the commits associated with the given repo! Are you sure you want to continue? This operation cannot be undone.")
conn = sqlite3.connect(data_home + "/ganarchy.db")
c = conn.cursor()
c.execute('''SELECT "git_commit", "base_url" FROM "config"''')
(project_commit, base_url) = c.fetchone()
if project_commit == project:
project = None
c.execute('''DELETE FROM "repos" WHERE "url"=? AND "branch" IS ? AND "project" IS ?''', (url, branch, project))
c.execute('''DELETE FROM "repo_history" WHERE "url"=? AND "branch" IS ? AND "project" IS ?''', (url, branch, project))
conn.commit()
conn.close()
@ -363,10 +288,11 @@ class Git:
GIT = Git(cache_home)
class Repo:
def __init__(self, dbconn, project, url, branch, head_commit, list_metadata=False):
def __init__(self, dbconn, project_commit, url, branch, head_commit, list_metadata=False):
self.url = url
self.branch = branch
self.project_commit = project.commit
self.project_commit = project_commit
self.erroring = False
if not branch:
self.branchname = "gan" + hashlib.sha256(url.encode("utf-8")).hexdigest()
@ -378,9 +304,10 @@ class Repo:
if head_commit:
self.hash = head_commit
else:
try:
try: # FIXME should we even do this?
self.hash = GIT.get_hash(self.branchname)
except GitError:
self.erroring = True
self.hash = None
self.message = None
@ -388,6 +315,7 @@ class Repo:
try:
self.update_metadata()
except GitError:
self.erroring = True
pass
def update_metadata(self):
@ -402,14 +330,15 @@ class Repo:
except subprocess.CalledProcessError as e:
# This may error for various reasons, but some are important: dead links, etc
click.echo(e.output, err=True)
self.erroring = True
return None
pre_hash = self.hash
try:
post_hash = GIT.get_hash(self.branchname)
except GitError as e:
# This should never happen, but maybe there's some edge cases?
# Can you force-push an empty branch?
# TODO
# TODO check
self.erroring = True
return None
self.hash = post_hash
if not pre_hash:
@ -421,18 +350,21 @@ class Repo:
try:
subprocess.check_call(["git", "-C", cache_home, "merge-base", "--is-ancestor", self.project_commit, self.branchname], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
self.update_metadata()
return count, post_hash, self.message
return count
except (subprocess.CalledProcessError, GitError) as e:
click.echo(e, err=True)
self.erroring = True
return None
class Project:
def __init__(self, dbconn, ganarchy, project_commit, list_repos=False):
def __init__(self, dbconn, project_commit, list_repos=False):
self.commit = project_commit
if ganarchy.project_commit == project_commit:
project_commit = None
self.refresh_metadata()
self.repos = None
if list_repos:
self.list_repos(dbconn)
def list_repos(self, dbconn):
repos = []
with dbconn:
for (e, url, branch, head_commit) in dbconn.execute('''SELECT "max"("e"), "url", "branch", "head_commit" FROM (SELECT "max"("T1"."entry") "e", "T1"."url", "T1"."branch", "T1"."head_commit" FROM "repo_history" "T1"
@ -440,11 +372,9 @@ class Project:
GROUP BY "T1"."url", "T1"."branch"
UNION
SELECT null, "T3"."url", "T3"."branch", null FROM "repos" "T3" WHERE "active" AND "project" IS ?1)
GROUP BY "url" ORDER BY "e"''', (project_commit,)):
repos.append(Repo(dbconn, self, url, branch, head_commit))
GROUP BY "url" ORDER BY "e"''', (self.commit,)):
repos.append(Repo(dbconn, self.commit, url, branch, head_commit))
self.repos = repos
else:
self.repos = None
def refresh_metadata(self):
try:
@ -452,8 +382,8 @@ class Project:
project_title, project_desc = (lambda x: x.groups() if x is not None else ('', None))(re.fullmatch('^\\[Project\\]\s+(.+?)(?:\n\n(.+))?$', project, flags=re.ASCII|re.DOTALL|re.IGNORECASE))
if not project_title.strip(): # FIXME
project_title, project_desc = ("Error parsing project commit",)*2
if project_desc: # FIXME
project_desc = project_desc.strip()
# if project_desc: # FIXME
# project_desc = project_desc.strip()
self.commit_body = project
self.title = project_title
self.description = project_desc
@ -463,74 +393,183 @@ class Project:
self.description = None
def update(self):
# TODO
# TODO? check if working correctly
results = [(repo, repo.update()) for repo in self.repos]
self.refresh_metadata()
return results
class GAnarchy:
def __init__(self, dbconn, list_projects=False):
with dbconn:
# TODO
#(project_commit, base_url, title) = dbconn.execute('''SELECT "git_commit", "base_url", "title" FROM "config"''').fetchone()
(project_commit, base_url) = dbconn.execute('''SELECT "git_commit", "base_url" FROM "config"''').fetchone()
title = None
self.project_commit = project_commit
self.base_url = base_url
def __init__(self, dbconn, config, list_projects=False, list_repos=False):
base_url = config.base_url
title = config.title
if not base_url:
pass ## TODO
# FIXME use a more appropriate error type
raise ValueError
if not title:
from urllib.parse import urlparse
title = "GAnarchy on " + urlparse(base_url).hostname
self.title = title
self.base_url = base_url
# load config onto DB
c = dbconn.cursor()
c.execute('''CREATE TEMPORARY TABLE "repos" ("url" TEXT PRIMARY KEY, "active" INT, "branch" TEXT, "project" TEXT)''')
c.execute('''CREATE UNIQUE INDEX "temp"."repos_url_branch_project" ON "repos" ("url", "branch", "project")''')
c.execute('''CREATE INDEX "temp"."repos_project" ON "repos" ("project")''')
c.execute('''CREATE INDEX "temp"."repos_active" ON "repos" ("active")''')
for (project_commit, repos) in config.projects.items():
for (repo_url, branches) in repos.items():
for (branchname, options) in branches.items():
if options['active']: # no need to insert inactive repos since they get ignored anyway
c.execute('''INSERT INTO "repos" VALUES (?, ?, ?, ?)''', (repo_url, 1, branchname, project_commit))
dbconn.commit()
if list_projects:
projects = []
with dbconn:
for (project,) in dbconn.execute('''SELECT DISTINCT "project" FROM "repos" '''): # FIXME? *maybe* sort by activity in the future
if project == None:
project = self.project_commit
projects.append(Project(dbconn, ganarchy, project))
projects.sort(key=lambda project: project.title)
projects.append(Project(dbconn, project, list_repos=list_repos))
projects.sort(key=lambda project: project.title) # sort projects by title
self.projects = projects
else:
self.projects = None
class Config:
def __init__(self, toml_file, base=None, remove=True):
self.projects = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(dict))))
config_data = qtoml.load(toml_file)
self.title = config_data.get('title', '')
self.base_url = config_data.get('base_url', '')
# TODO blocked domains (but only read them from config_data if remove is True)
self.blocked_domains = []
self.blocked_domain_suffixes = []
self.blocked_domains.sort()
self.blocked_domain_suffixes.sort(key=lambda x: x[::-1])
# FIXME remove duplicates and process invalid entries
self.blocked_domains = tuple(self.blocked_domains)
self.blocked_domain_suffixes = tuple(self.blocked_domain_suffixes) # MUST be tuple
# TODO re.compile("(^" + "|^".join(map(re.escape, domains)) + "|" + "|".join(map(re.escape, suffixes) + ")$")
if base:
self._update_projects(base.projects, sanitize=False) # already sanitized
projects = config_data.get('projects', {})
self._update_projects(projects, remove=remove)
def _update_projects(self, projects, remove, sanitize=True):
for (project_commit, repos) in projects.items():
if sanitize and not isinstance(repos, dict):
# TODO emit warnings?
continue
if sanitize and not re.fullmatch("[0-9a-fA-F]{40}|[0-9a-fA-F]{64}", project_commit): # future-proofing: sha256 support
# TODO emit warnings?
continue
project = self.projects[project_commit]
for (repo_url, branches) in repos.items():
if sanitize and not isinstance(branches, dict):
# TODO emit warnings?
continue
try:
u = urlparse(repo_url)
if not u:
raise ValueError
getattr(u, 'port') # raises ValueError if port is invalid
if u.scheme in ('file', ''):
raise ValueError
if (u.hostname in self.blocked_domains) or (u.hostname.endswith(self.blocked_domain_suffixes)):
raise ValueError
except ValueError:
if sanitize:
# TODO emit warnings?
continue
else:
raise
repo = project[repo_url]
for (branchname, options) in branches.items():
if sanitize and not isinstance(options, dict):
# TODO emit warnings?
continue
if branchname == "HEAD":
if sanitize:
# feels weird, but generally makes things easier
# DO NOT emit warnings here. this is deliberate.
branchname = None
else:
raise ValueError
branch = repo[branchname]
active = options.get('active', False)
if active not in (True, False):
if sanitize:
# TODO emit warnings?
continue
else:
raise ValueError
## | remove | branch.active | options.active | result |
## | x | false | false | false |
## | x | false | true | true |
## | x | true | true | true |
## | false | true | false | true |
## | true | true | false | false |
branch['active'] = branch.get('active', False) or active
if remove and not active:
branch['active'] = False
@ganarchy.command()
@click.option('--skip-errors/--no-skip-errors', default=False)
@click.argument('files', type=click.File('r', encoding='utf-8'), nargs=-1)
def merge_configs(skip_errors, files):
"""Merges config files."""
config = None
for f in files:
try:
f.reconfigure(newline='')
config = Config(f, config, remove=False)
except (UnicodeDecodeError, qtoml.decoder.TOMLDecodeError):
if not skip_errors:
raise
if config:
env = get_env()
template = env.get_template('index.toml')
click.echo(template.render(config=config))
@ganarchy.command()
@click.argument('project', required=False)
def cron_target(project):
"""Runs ganarchy as a cron target."""
conf = None
# reverse order is intentional
for d in reversed(config_dirs):
try:
conf = Config(open(d + "/config.toml", 'r', encoding='utf-8', newline=''), conf)
except (OSError, UnicodeDecodeError, qtoml.decoder.TOMLDecodeError):
pass
with open(config_home + "/config.toml", 'r', encoding='utf-8', newline='') as f:
conf = Config(f, conf)
env = get_env()
if project == "config":
# render the config
# doesn't have access to a GAnarchy object. this is deliberate.
template = env.get_template('index.toml')
click.echo(template.render(config = conf))
return
# make sure the cache dir exists
os.makedirs(cache_home, exist_ok=True)
# make sure it is a git repo
subprocess.call(["git", "-C", cache_home, "init", "-q"])
conn = sqlite3.connect(data_home + "/ganarchy.db")
instance = GAnarchy(conn, list_projects=project=="index")
env = jinja2.Environment(loader=get_template_loader(), autoescape=False)
instance = GAnarchy(conn, conf, list_projects=project in ["index", "config"])
if project == "index":
# render the index
template = env.get_template('index.html')
click.echo(template.render(ganarchy = instance))
return
project_commit = instance.project_commit
base_url = instance.base_url
if not base_url or not (project or project_commit):
if not instance.base_url or not project:
click.echo("No base URL or project commit specified", err=True)
return
if project_commit == project:
project = None
elif project is not None:
project_commit = project
entries = []
generate_html = []
c = conn.cursor()
p = Project(conn, instance, project_commit, list_repos=True)
# FIXME: this should be moved into Project.update()
for repo in p.repos:
result = repo.update()
if result is not None:
count, post_hash, msg = result
entries.append((repo.url, count, post_hash, repo.branch, project))
generate_html.append((repo.url, msg, count, repo.branch))
p.refresh_metadata()
p = Project(conn, project, list_repos=True)
results = p.update()
for (repo, count) in results:
if count is not None:
entries.append((repo.url, count, repo.hash, repo.branch, project))
generate_html.append((repo.url, repo.message, count, repo.branch))
# sort stuff twice because reasons
entries.sort(key=lambda x: x[1], reverse=True)
generate_html.sort(key=lambda x: x[2], reverse=True)
@ -547,7 +586,7 @@ def cron_target(project):
project_body = p.commit_body,
project_commit = p.commit,
repos = html_entries,
base_url = base_url,
base_url = instance.base_url,
# I don't think this thing supports deprecating the above?
project = p,
ganarchy = instance))

View File

@ -1,2 +1,3 @@
Click==7.0
Jinja2==2.10.1
qtoml==0.2.4