Completely reworked configuration

This commit is contained in:
SoniEx2 2019-06-19 21:49:27 -03:00
parent 65db4df76c
commit 752f4d36ad
2 changed files with 226 additions and 186 deletions

View File

@ -24,31 +24,37 @@ import hashlib
import hmac
import jinja2
import re
import qtoml
from collections import defaultdict
from urllib.parse import urlparse
MIGRATIONS = {
"gen-index": (
"toml-config": (
(
"""ALTER TABLE "config" ADD COLUMN "title" TEXT"""),
(),
"supports generating an index page"
'''UPDATE "repo_history" SET "project" = (SELECT "git_commit" FROM "config") WHERE "project" IS NULL''',
'''ALTER TABLE "repos" RENAME TO "repos_old"''',),
(
'''UPDATE "repo_history" SET "project" = NULL WHERE "project" = (SELECT "git_commit" FROM "config")''',
'''ALTER TABLE "repos_old" RENAME TO "repos"''',),
"switches to toml config format. the old 'repos' table is preserved as 'repos_old'"
),
"better-project-management": (
(
"""ALTER TABLE "repos" ADD COLUMN "branch" TEXT""",
"""ALTER TABLE "repos" ADD COLUMN "project" TEXT""",
"""CREATE UNIQUE INDEX "repos_url_branch_project" ON "repos" ("url", "branch", "project")""",
"""CREATE INDEX "repos_project" ON "repos" ("project")""",
"""ALTER TABLE "repo_history" ADD COLUMN "branch" TEXT""",
"""ALTER TABLE "repo_history" ADD COLUMN "project" TEXT""",
"""CREATE INDEX "repo_history_url_branch_project" ON "repo_history" ("url", "branch", "project")"""),
'''ALTER TABLE "repos" ADD COLUMN "branch" TEXT''',
'''ALTER TABLE "repos" ADD COLUMN "project" TEXT''',
'''CREATE UNIQUE INDEX "repos_url_branch_project" ON "repos" ("url", "branch", "project")''',
'''CREATE INDEX "repos_project" ON "repos" ("project")''',
'''ALTER TABLE "repo_history" ADD COLUMN "branch" TEXT''',
'''ALTER TABLE "repo_history" ADD COLUMN "project" TEXT''',
'''CREATE INDEX "repo_history_url_branch_project" ON "repo_history" ("url", "branch", "project")''',),
(
"""DELETE FROM "repos" WHERE "branch" IS NOT NULL OR "project" IS NOT NULL""",
"""DELETE FROM "repo_history" WHERE "branch" IS NOT NULL OR "project" IS NOT NULL"""),
'''DELETE FROM "repos" WHERE "branch" IS NOT NULL OR "project" IS NOT NULL''',
'''DELETE FROM "repo_history" WHERE "branch" IS NOT NULL OR "project" IS NOT NULL''',),
"supports multiple projects, and allows choosing non-default branches"
),
"test": (
("""-- apply""",),
("""-- revert""",),
('''-- apply''',),
('''-- revert''',),
"does nothing"
)
}
@ -120,6 +126,17 @@ def get_template_loader():
</p>
</body>
</html>
""",
## index.toml
'index.toml': """# Generated by GAnarchy
{%- for project, repos in config.projects.items() %}
[projects.{{project}}]
{%- for repo_url, branches in repos.items() %}{% for branch, options in branches.items() %}{% if options.active %}
"{{repo_url|tomle}}".{% if branch %}"{{branch|tomle}}"{% else %}HEAD{% endif %} = { active=true }
{%- endif %}{% endfor %}
{%- endfor %}
{% endfor -%}
""",
## project.html FIXME
'project.html': """<!DOCTYPE html>
@ -169,6 +186,24 @@ def get_template_loader():
})
])
tomletrans = str.maketrans({
0: '\\u0000', 1: '\\u0001', 2: '\\u0002', 3: '\\u0003', 4: '\\u0004',
5: '\\u0005', 6: '\\u0006', 7: '\\u0007', 8: '\\b', 9: '\\t', 10: '\\n',
11: '\\u000B', 12: '\\f', 13: '\\r', 14: '\\u000E', 15: '\\u000F',
16: '\\u0010', 17: '\\u0011', 18: '\\u0012', 19: '\\u0013', 20: '\\u0014',
21: '\\u0015', 22: '\\u0016', 23: '\\u0017', 24: '\\u0018', 25: '\\u0019',
26: '\\u001A', 27: '\\u001B', 28: '\\u001C', 29: '\\u001D', 30: '\\u001E',
31: '\\u001F', '"': '\\"', '\\': '\\\\'
})
def tomlescape(value):
return value.translate(tomletrans)
def get_env():
env = jinja2.Environment(loader=get_template_loader(), autoescape=False)
env.filters['tomlescape'] = tomlescape
env.filters['tomle'] = env.filters['tomlescape']
return env
@click.group()
def ganarchy():
@ -180,118 +215,8 @@ def initdb():
os.makedirs(data_home, exist_ok=True)
conn = sqlite3.connect(data_home + "/ganarchy.db")
c = conn.cursor()
c.execute('''CREATE TABLE "repos" ("url" TEXT PRIMARY KEY, "active" INT, "branch" TEXT, "project" TEXT)''')
c.execute('''CREATE UNIQUE INDEX "repos_url_branch_project" ON "repos" ("url", "branch", "project")''')
c.execute('''CREATE INDEX "repos_project" ON "repos" ("project")''')
c.execute('''CREATE INDEX "repos_active" ON "repos" ("active")''')
c.execute('''CREATE TABLE "repo_history" ("entry" INTEGER PRIMARY KEY ASC AUTOINCREMENT, "url" TEXT, "count" INTEGER, "head_commit" TEXT, "branch" TEXT, "project" TEXT)''')
c.execute('''CREATE INDEX "repo_history_url_branch_project" ON "repo_history" ("url", "branch", "project")''')
c.execute('''CREATE TABLE "config" ("git_commit" TEXT, "base_url" TEXT, "title" TEXT)''')
c.execute('''INSERT INTO "config" VALUES ('', '', '')''')
conn.commit()
conn.close()
@ganarchy.command()
@click.argument('commit')
def set_commit(commit):
"""Sets the commit that represents the project."""
if not re.fullmatch("[a-fA-F0-9]{40}", commit):
raise click.BadArgumentUsage("COMMIT must be a git commit hash")
conn = sqlite3.connect(data_home + "/ganarchy.db")
c = conn.cursor()
c.execute('''UPDATE "config" SET "git_commit"=?''', (commit,))
conn.commit()
conn.close()
@ganarchy.command()
@click.argument('base-url')
def set_base_url(base_url):
"""Sets the GAnarchy instance's base URL. Used for the URI handler."""
conn = sqlite3.connect(data_home + "/ganarchy.db")
c = conn.cursor()
c.execute('''UPDATE "config" SET "base_url"=?''', (base_url,))
conn.commit()
conn.close()
@ganarchy.command()
@click.argument('title')
def set_title(title):
"""Sets the GAnarchy instance's title. This title is displayed on the index."""
conn = sqlite3.connect(data_home + "/ganarchy.db")
c = conn.cursor()
c.execute('''UPDATE "config" SET "title"=?''', (title,))
conn.commit()
conn.close()
# TODO move --branch into here?
@ganarchy.group()
def repo():
"""Modifies repos to track."""
@repo.command()
@click.option('--branch', default=None, help="Sets the branch to be used for the repo")
@click.option('--project', default=None, help="Sets the project commit to be used for the repo")
@click.option('--disabled', default=False, is_flag=True, help="Mark the repo as disabled")
@click.argument('url')
def add(branch, project, disabled, url):
"""Adds a repo to track."""
conn = sqlite3.connect(data_home + "/ganarchy.db")
c = conn.cursor()
c.execute('''SELECT "git_commit", "base_url" FROM "config"''')
(project_commit, base_url) = c.fetchone()
if project_commit == project:
project = None
c.execute('''INSERT INTO "repos" ("url", "active", "branch", "project") VALUES (?, ?, ?, ?)''', (url, int(not disabled), branch, project))
conn.commit()
conn.close()
@repo.command()
@click.option('--branch', default=None, help="Sets the branch to be used for the repo")
@click.option('--project', default=None, help="Sets the project commit to be used for the repo")
@click.argument('url')
def enable(branch, project, url):
"""Enables tracking of a repo."""
conn = sqlite3.connect(data_home + "/ganarchy.db")
c = conn.cursor()
c.execute('''SELECT "git_commit", "base_url" FROM "config"''')
(project_commit, base_url) = c.fetchone()
if project_commit == project:
project = None
c.execute('''UPDATE "repos" SET "active"=1 WHERE "url"=? AND "branch" IS ? AND "project" IS ?''', (url, branch, project))
conn.commit()
conn.close()
@repo.command()
@click.option('--branch', default=None, help="Sets the branch to be used for the repo")
@click.option('--project', default=None, help="Sets the project commit to be used for the repo")
@click.argument('url')
def disable(branch, project, url):
"""Disables tracking of a repo."""
conn = sqlite3.connect(data_home + "/ganarchy.db")
c = conn.cursor()
c.execute('''SELECT "git_commit", "base_url" FROM "config"''')
(project_commit, base_url) = c.fetchone()
if project_commit == project:
project = None
c.execute('''UPDATE repos SET "active"=0 WHERE "url"=? AND "branch" IS ? AND "project" IS ?''', (url, branch, project))
conn.commit()
conn.close()
@repo.command()
@click.option('--branch', default=None, help="Sets the branch to be used for the repo")
@click.option('--project', default=None, help="Sets the project commit to be used for the repo")
@click.argument('url')
def remove(branch, project, url):
"""Stops tracking a repo."""
click.confirm("WARNING: This operation does not delete the commits associated with the given repo! Are you sure you want to continue? This operation cannot be undone.")
conn = sqlite3.connect(data_home + "/ganarchy.db")
c = conn.cursor()
c.execute('''SELECT "git_commit", "base_url" FROM "config"''')
(project_commit, base_url) = c.fetchone()
if project_commit == project:
project = None
c.execute('''DELETE FROM "repos" WHERE "url"=? AND "branch" IS ? AND "project" IS ?''', (url, branch, project))
c.execute('''DELETE FROM "repo_history" WHERE "url"=? AND "branch" IS ? AND "project" IS ?''', (url, branch, project))
conn.commit()
conn.close()
@ -363,10 +288,11 @@ class Git:
GIT = Git(cache_home)
class Repo:
def __init__(self, dbconn, project, url, branch, head_commit, list_metadata=False):
def __init__(self, dbconn, project_commit, url, branch, head_commit, list_metadata=False):
self.url = url
self.branch = branch
self.project_commit = project.commit
self.project_commit = project_commit
self.erroring = False
if not branch:
self.branchname = "gan" + hashlib.sha256(url.encode("utf-8")).hexdigest()
@ -378,9 +304,10 @@ class Repo:
if head_commit:
self.hash = head_commit
else:
try:
try: # FIXME should we even do this?
self.hash = GIT.get_hash(self.branchname)
except GitError:
self.erroring = True
self.hash = None
self.message = None
@ -388,6 +315,7 @@ class Repo:
try:
self.update_metadata()
except GitError:
self.erroring = True
pass
def update_metadata(self):
@ -402,14 +330,15 @@ class Repo:
except subprocess.CalledProcessError as e:
# This may error for various reasons, but some are important: dead links, etc
click.echo(e.output, err=True)
self.erroring = True
return None
pre_hash = self.hash
try:
post_hash = GIT.get_hash(self.branchname)
except GitError as e:
# This should never happen, but maybe there's some edge cases?
# Can you force-push an empty branch?
# TODO
# TODO check
self.erroring = True
return None
self.hash = post_hash
if not pre_hash:
@ -421,30 +350,31 @@ class Repo:
try:
subprocess.check_call(["git", "-C", cache_home, "merge-base", "--is-ancestor", self.project_commit, self.branchname], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
self.update_metadata()
return count, post_hash, self.message
return count
except (subprocess.CalledProcessError, GitError) as e:
click.echo(e, err=True)
self.erroring = True
return None
class Project:
def __init__(self, dbconn, ganarchy, project_commit, list_repos=False):
def __init__(self, dbconn, project_commit, list_repos=False):
self.commit = project_commit
if ganarchy.project_commit == project_commit:
project_commit = None
self.refresh_metadata()
self.repos = None
if list_repos:
repos = []
with dbconn:
for (e, url, branch, head_commit) in dbconn.execute('''SELECT "max"("e"), "url", "branch", "head_commit" FROM (SELECT "max"("T1"."entry") "e", "T1"."url", "T1"."branch", "T1"."head_commit" FROM "repo_history" "T1"
WHERE (SELECT "active" FROM "repos" "T2" WHERE "url" = "T1"."url" AND "branch" IS "T1"."branch" AND "project" IS ?1)
GROUP BY "T1"."url", "T1"."branch"
UNION
SELECT null, "T3"."url", "T3"."branch", null FROM "repos" "T3" WHERE "active" AND "project" IS ?1)
GROUP BY "url" ORDER BY "e"''', (project_commit,)):
repos.append(Repo(dbconn, self, url, branch, head_commit))
self.repos = repos
else:
self.repos = None
self.list_repos(dbconn)
def list_repos(self, dbconn):
repos = []
with dbconn:
for (e, url, branch, head_commit) in dbconn.execute('''SELECT "max"("e"), "url", "branch", "head_commit" FROM (SELECT "max"("T1"."entry") "e", "T1"."url", "T1"."branch", "T1"."head_commit" FROM "repo_history" "T1"
WHERE (SELECT "active" FROM "repos" "T2" WHERE "url" = "T1"."url" AND "branch" IS "T1"."branch" AND "project" IS ?1)
GROUP BY "T1"."url", "T1"."branch"
UNION
SELECT null, "T3"."url", "T3"."branch", null FROM "repos" "T3" WHERE "active" AND "project" IS ?1)
GROUP BY "url" ORDER BY "e"''', (self.commit,)):
repos.append(Repo(dbconn, self.commit, url, branch, head_commit))
self.repos = repos
def refresh_metadata(self):
try:
@ -452,8 +382,8 @@ class Project:
project_title, project_desc = (lambda x: x.groups() if x is not None else ('', None))(re.fullmatch('^\\[Project\\]\s+(.+?)(?:\n\n(.+))?$', project, flags=re.ASCII|re.DOTALL|re.IGNORECASE))
if not project_title.strip(): # FIXME
project_title, project_desc = ("Error parsing project commit",)*2
if project_desc: # FIXME
project_desc = project_desc.strip()
# if project_desc: # FIXME
# project_desc = project_desc.strip()
self.commit_body = project
self.title = project_title
self.description = project_desc
@ -463,74 +393,183 @@ class Project:
self.description = None
def update(self):
# TODO
# TODO? check if working correctly
results = [(repo, repo.update()) for repo in self.repos]
self.refresh_metadata()
return results
class GAnarchy:
def __init__(self, dbconn, list_projects=False):
with dbconn:
# TODO
#(project_commit, base_url, title) = dbconn.execute('''SELECT "git_commit", "base_url", "title" FROM "config"''').fetchone()
(project_commit, base_url) = dbconn.execute('''SELECT "git_commit", "base_url" FROM "config"''').fetchone()
title = None
self.project_commit = project_commit
self.base_url = base_url
if not base_url:
pass ## TODO
if not title:
from urllib.parse import urlparse
title = "GAnarchy on " + urlparse(base_url).hostname
self.title = title
def __init__(self, dbconn, config, list_projects=False, list_repos=False):
base_url = config.base_url
title = config.title
if not base_url:
# FIXME use a more appropriate error type
raise ValueError
if not title:
title = "GAnarchy on " + urlparse(base_url).hostname
self.title = title
self.base_url = base_url
# load config onto DB
c = dbconn.cursor()
c.execute('''CREATE TEMPORARY TABLE "repos" ("url" TEXT PRIMARY KEY, "active" INT, "branch" TEXT, "project" TEXT)''')
c.execute('''CREATE UNIQUE INDEX "temp"."repos_url_branch_project" ON "repos" ("url", "branch", "project")''')
c.execute('''CREATE INDEX "temp"."repos_project" ON "repos" ("project")''')
c.execute('''CREATE INDEX "temp"."repos_active" ON "repos" ("active")''')
for (project_commit, repos) in config.projects.items():
for (repo_url, branches) in repos.items():
for (branchname, options) in branches.items():
if options['active']: # no need to insert inactive repos since they get ignored anyway
c.execute('''INSERT INTO "repos" VALUES (?, ?, ?, ?)''', (repo_url, 1, branchname, project_commit))
dbconn.commit()
if list_projects:
projects = []
with dbconn:
for (project,) in dbconn.execute('''SELECT DISTINCT "project" FROM "repos" '''): # FIXME? *maybe* sort by activity in the future
if project == None:
project = self.project_commit
projects.append(Project(dbconn, ganarchy, project))
projects.sort(key=lambda project: project.title)
projects.append(Project(dbconn, project, list_repos=list_repos))
projects.sort(key=lambda project: project.title) # sort projects by title
self.projects = projects
else:
self.projects = None
class Config:
def __init__(self, toml_file, base=None, remove=True):
self.projects = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(dict))))
config_data = qtoml.load(toml_file)
self.title = config_data.get('title', '')
self.base_url = config_data.get('base_url', '')
# TODO blocked domains (but only read them from config_data if remove is True)
self.blocked_domains = []
self.blocked_domain_suffixes = []
self.blocked_domains.sort()
self.blocked_domain_suffixes.sort(key=lambda x: x[::-1])
# FIXME remove duplicates and process invalid entries
self.blocked_domains = tuple(self.blocked_domains)
self.blocked_domain_suffixes = tuple(self.blocked_domain_suffixes) # MUST be tuple
# TODO re.compile("(^" + "|^".join(map(re.escape, domains)) + "|" + "|".join(map(re.escape, suffixes) + ")$")
if base:
self._update_projects(base.projects, sanitize=False) # already sanitized
projects = config_data.get('projects', {})
self._update_projects(projects, remove=remove)
def _update_projects(self, projects, remove, sanitize=True):
for (project_commit, repos) in projects.items():
if sanitize and not isinstance(repos, dict):
# TODO emit warnings?
continue
if sanitize and not re.fullmatch("[0-9a-fA-F]{40}|[0-9a-fA-F]{64}", project_commit): # future-proofing: sha256 support
# TODO emit warnings?
continue
project = self.projects[project_commit]
for (repo_url, branches) in repos.items():
if sanitize and not isinstance(branches, dict):
# TODO emit warnings?
continue
try:
u = urlparse(repo_url)
if not u:
raise ValueError
getattr(u, 'port') # raises ValueError if port is invalid
if u.scheme in ('file', ''):
raise ValueError
if (u.hostname in self.blocked_domains) or (u.hostname.endswith(self.blocked_domain_suffixes)):
raise ValueError
except ValueError:
if sanitize:
# TODO emit warnings?
continue
else:
raise
repo = project[repo_url]
for (branchname, options) in branches.items():
if sanitize and not isinstance(options, dict):
# TODO emit warnings?
continue
if branchname == "HEAD":
if sanitize:
# feels weird, but generally makes things easier
# DO NOT emit warnings here. this is deliberate.
branchname = None
else:
raise ValueError
branch = repo[branchname]
active = options.get('active', False)
if active not in (True, False):
if sanitize:
# TODO emit warnings?
continue
else:
raise ValueError
## | remove | branch.active | options.active | result |
## | x | false | false | false |
## | x | false | true | true |
## | x | true | true | true |
## | false | true | false | true |
## | true | true | false | false |
branch['active'] = branch.get('active', False) or active
if remove and not active:
branch['active'] = False
@ganarchy.command()
@click.option('--skip-errors/--no-skip-errors', default=False)
@click.argument('files', type=click.File('r', encoding='utf-8'), nargs=-1)
def merge_configs(skip_errors, files):
"""Merges config files."""
config = None
for f in files:
try:
f.reconfigure(newline='')
config = Config(f, config, remove=False)
except (UnicodeDecodeError, qtoml.decoder.TOMLDecodeError):
if not skip_errors:
raise
if config:
env = get_env()
template = env.get_template('index.toml')
click.echo(template.render(config=config))
@ganarchy.command()
@click.argument('project', required=False)
def cron_target(project):
"""Runs ganarchy as a cron target."""
conf = None
# reverse order is intentional
for d in reversed(config_dirs):
try:
conf = Config(open(d + "/config.toml", 'r', encoding='utf-8', newline=''), conf)
except (OSError, UnicodeDecodeError, qtoml.decoder.TOMLDecodeError):
pass
with open(config_home + "/config.toml", 'r', encoding='utf-8', newline='') as f:
conf = Config(f, conf)
env = get_env()
if project == "config":
# render the config
# doesn't have access to a GAnarchy object. this is deliberate.
template = env.get_template('index.toml')
click.echo(template.render(config = conf))
return
# make sure the cache dir exists
os.makedirs(cache_home, exist_ok=True)
# make sure it is a git repo
subprocess.call(["git", "-C", cache_home, "init", "-q"])
conn = sqlite3.connect(data_home + "/ganarchy.db")
instance = GAnarchy(conn, list_projects=project=="index")
env = jinja2.Environment(loader=get_template_loader(), autoescape=False)
instance = GAnarchy(conn, conf, list_projects=project in ["index", "config"])
if project == "index":
# render the index
template = env.get_template('index.html')
click.echo(template.render(ganarchy = instance))
return
project_commit = instance.project_commit
base_url = instance.base_url
if not base_url or not (project or project_commit):
if not instance.base_url or not project:
click.echo("No base URL or project commit specified", err=True)
return
if project_commit == project:
project = None
elif project is not None:
project_commit = project
entries = []
generate_html = []
c = conn.cursor()
p = Project(conn, instance, project_commit, list_repos=True)
# FIXME: this should be moved into Project.update()
for repo in p.repos:
result = repo.update()
if result is not None:
count, post_hash, msg = result
entries.append((repo.url, count, post_hash, repo.branch, project))
generate_html.append((repo.url, msg, count, repo.branch))
p.refresh_metadata()
p = Project(conn, project, list_repos=True)
results = p.update()
for (repo, count) in results:
if count is not None:
entries.append((repo.url, count, repo.hash, repo.branch, project))
generate_html.append((repo.url, repo.message, count, repo.branch))
# sort stuff twice because reasons
entries.sort(key=lambda x: x[1], reverse=True)
generate_html.sort(key=lambda x: x[2], reverse=True)
@ -547,7 +586,7 @@ def cron_target(project):
project_body = p.commit_body,
project_commit = p.commit,
repos = html_entries,
base_url = base_url,
base_url = instance.base_url,
# I don't think this thing supports deprecating the above?
project = p,
ganarchy = instance))

View File

@ -1,2 +1,3 @@
Click==7.0
Jinja2==2.10.1
qtoml==0.2.4