diff --git a/ganarchy.py b/ganarchy.py index bfb4c14..9278429 100755 --- a/ganarchy.py +++ b/ganarchy.py @@ -24,31 +24,37 @@ import hashlib import hmac import jinja2 import re +import qtoml +from collections import defaultdict +from urllib.parse import urlparse MIGRATIONS = { - "gen-index": ( + "toml-config": ( ( - """ALTER TABLE "config" ADD COLUMN "title" TEXT"""), - (), - "supports generating an index page" + '''UPDATE "repo_history" SET "project" = (SELECT "git_commit" FROM "config") WHERE "project" IS NULL''', + '''ALTER TABLE "repos" RENAME TO "repos_old"''',), + ( + '''UPDATE "repo_history" SET "project" = NULL WHERE "project" = (SELECT "git_commit" FROM "config")''', + '''ALTER TABLE "repos_old" RENAME TO "repos"''',), + "switches to toml config format. the old 'repos' table is preserved as 'repos_old'" ), "better-project-management": ( ( - """ALTER TABLE "repos" ADD COLUMN "branch" TEXT""", - """ALTER TABLE "repos" ADD COLUMN "project" TEXT""", - """CREATE UNIQUE INDEX "repos_url_branch_project" ON "repos" ("url", "branch", "project")""", - """CREATE INDEX "repos_project" ON "repos" ("project")""", - """ALTER TABLE "repo_history" ADD COLUMN "branch" TEXT""", - """ALTER TABLE "repo_history" ADD COLUMN "project" TEXT""", - """CREATE INDEX "repo_history_url_branch_project" ON "repo_history" ("url", "branch", "project")"""), + '''ALTER TABLE "repos" ADD COLUMN "branch" TEXT''', + '''ALTER TABLE "repos" ADD COLUMN "project" TEXT''', + '''CREATE UNIQUE INDEX "repos_url_branch_project" ON "repos" ("url", "branch", "project")''', + '''CREATE INDEX "repos_project" ON "repos" ("project")''', + '''ALTER TABLE "repo_history" ADD COLUMN "branch" TEXT''', + '''ALTER TABLE "repo_history" ADD COLUMN "project" TEXT''', + '''CREATE INDEX "repo_history_url_branch_project" ON "repo_history" ("url", "branch", "project")''',), ( - """DELETE FROM "repos" WHERE "branch" IS NOT NULL OR "project" IS NOT NULL""", - """DELETE FROM "repo_history" WHERE "branch" IS NOT NULL OR "project" IS NOT NULL"""), + '''DELETE FROM "repos" WHERE "branch" IS NOT NULL OR "project" IS NOT NULL''', + '''DELETE FROM "repo_history" WHERE "branch" IS NOT NULL OR "project" IS NOT NULL''',), "supports multiple projects, and allows choosing non-default branches" ), "test": ( - ("""-- apply""",), - ("""-- revert""",), + ('''-- apply''',), + ('''-- revert''',), "does nothing" ) } @@ -120,6 +126,17 @@ def get_template_loader():

+""", + ## index.toml + 'index.toml': """# Generated by GAnarchy + +{%- for project, repos in config.projects.items() %} +[projects.{{project}}] +{%- for repo_url, branches in repos.items() %}{% for branch, options in branches.items() %}{% if options.active %} +"{{repo_url|tomle}}".{% if branch %}"{{branch|tomle}}"{% else %}HEAD{% endif %} = { active=true } +{%- endif %}{% endfor %} +{%- endfor %} +{% endfor -%} """, ## project.html FIXME 'project.html': """ @@ -169,6 +186,24 @@ def get_template_loader(): }) ]) +tomletrans = str.maketrans({ + 0: '\\u0000', 1: '\\u0001', 2: '\\u0002', 3: '\\u0003', 4: '\\u0004', + 5: '\\u0005', 6: '\\u0006', 7: '\\u0007', 8: '\\b', 9: '\\t', 10: '\\n', + 11: '\\u000B', 12: '\\f', 13: '\\r', 14: '\\u000E', 15: '\\u000F', + 16: '\\u0010', 17: '\\u0011', 18: '\\u0012', 19: '\\u0013', 20: '\\u0014', + 21: '\\u0015', 22: '\\u0016', 23: '\\u0017', 24: '\\u0018', 25: '\\u0019', + 26: '\\u001A', 27: '\\u001B', 28: '\\u001C', 29: '\\u001D', 30: '\\u001E', + 31: '\\u001F', '"': '\\"', '\\': '\\\\' + }) +def tomlescape(value): + return value.translate(tomletrans) + +def get_env(): + env = jinja2.Environment(loader=get_template_loader(), autoescape=False) + env.filters['tomlescape'] = tomlescape + env.filters['tomle'] = env.filters['tomlescape'] + return env + @click.group() def ganarchy(): @@ -180,118 +215,8 @@ def initdb(): os.makedirs(data_home, exist_ok=True) conn = sqlite3.connect(data_home + "/ganarchy.db") c = conn.cursor() - c.execute('''CREATE TABLE "repos" ("url" TEXT PRIMARY KEY, "active" INT, "branch" TEXT, "project" TEXT)''') - c.execute('''CREATE UNIQUE INDEX "repos_url_branch_project" ON "repos" ("url", "branch", "project")''') - c.execute('''CREATE INDEX "repos_project" ON "repos" ("project")''') - c.execute('''CREATE INDEX "repos_active" ON "repos" ("active")''') c.execute('''CREATE TABLE "repo_history" ("entry" INTEGER PRIMARY KEY ASC AUTOINCREMENT, "url" TEXT, "count" INTEGER, "head_commit" TEXT, "branch" TEXT, "project" TEXT)''') c.execute('''CREATE INDEX "repo_history_url_branch_project" ON "repo_history" ("url", "branch", "project")''') - c.execute('''CREATE TABLE "config" ("git_commit" TEXT, "base_url" TEXT, "title" TEXT)''') - c.execute('''INSERT INTO "config" VALUES ('', '', '')''') - conn.commit() - conn.close() - -@ganarchy.command() -@click.argument('commit') -def set_commit(commit): - """Sets the commit that represents the project.""" - if not re.fullmatch("[a-fA-F0-9]{40}", commit): - raise click.BadArgumentUsage("COMMIT must be a git commit hash") - conn = sqlite3.connect(data_home + "/ganarchy.db") - c = conn.cursor() - c.execute('''UPDATE "config" SET "git_commit"=?''', (commit,)) - conn.commit() - conn.close() - -@ganarchy.command() -@click.argument('base-url') -def set_base_url(base_url): - """Sets the GAnarchy instance's base URL. Used for the URI handler.""" - conn = sqlite3.connect(data_home + "/ganarchy.db") - c = conn.cursor() - c.execute('''UPDATE "config" SET "base_url"=?''', (base_url,)) - conn.commit() - conn.close() - -@ganarchy.command() -@click.argument('title') -def set_title(title): - """Sets the GAnarchy instance's title. This title is displayed on the index.""" - conn = sqlite3.connect(data_home + "/ganarchy.db") - c = conn.cursor() - c.execute('''UPDATE "config" SET "title"=?''', (title,)) - conn.commit() - conn.close() - -# TODO move --branch into here? -@ganarchy.group() -def repo(): - """Modifies repos to track.""" - -@repo.command() -@click.option('--branch', default=None, help="Sets the branch to be used for the repo") -@click.option('--project', default=None, help="Sets the project commit to be used for the repo") -@click.option('--disabled', default=False, is_flag=True, help="Mark the repo as disabled") -@click.argument('url') -def add(branch, project, disabled, url): - """Adds a repo to track.""" - conn = sqlite3.connect(data_home + "/ganarchy.db") - c = conn.cursor() - c.execute('''SELECT "git_commit", "base_url" FROM "config"''') - (project_commit, base_url) = c.fetchone() - if project_commit == project: - project = None - c.execute('''INSERT INTO "repos" ("url", "active", "branch", "project") VALUES (?, ?, ?, ?)''', (url, int(not disabled), branch, project)) - conn.commit() - conn.close() - -@repo.command() -@click.option('--branch', default=None, help="Sets the branch to be used for the repo") -@click.option('--project', default=None, help="Sets the project commit to be used for the repo") -@click.argument('url') -def enable(branch, project, url): - """Enables tracking of a repo.""" - conn = sqlite3.connect(data_home + "/ganarchy.db") - c = conn.cursor() - c.execute('''SELECT "git_commit", "base_url" FROM "config"''') - (project_commit, base_url) = c.fetchone() - if project_commit == project: - project = None - c.execute('''UPDATE "repos" SET "active"=1 WHERE "url"=? AND "branch" IS ? AND "project" IS ?''', (url, branch, project)) - conn.commit() - conn.close() - -@repo.command() -@click.option('--branch', default=None, help="Sets the branch to be used for the repo") -@click.option('--project', default=None, help="Sets the project commit to be used for the repo") -@click.argument('url') -def disable(branch, project, url): - """Disables tracking of a repo.""" - conn = sqlite3.connect(data_home + "/ganarchy.db") - c = conn.cursor() - c.execute('''SELECT "git_commit", "base_url" FROM "config"''') - (project_commit, base_url) = c.fetchone() - if project_commit == project: - project = None - c.execute('''UPDATE repos SET "active"=0 WHERE "url"=? AND "branch" IS ? AND "project" IS ?''', (url, branch, project)) - conn.commit() - conn.close() - -@repo.command() -@click.option('--branch', default=None, help="Sets the branch to be used for the repo") -@click.option('--project', default=None, help="Sets the project commit to be used for the repo") -@click.argument('url') -def remove(branch, project, url): - """Stops tracking a repo.""" - click.confirm("WARNING: This operation does not delete the commits associated with the given repo! Are you sure you want to continue? This operation cannot be undone.") - conn = sqlite3.connect(data_home + "/ganarchy.db") - c = conn.cursor() - c.execute('''SELECT "git_commit", "base_url" FROM "config"''') - (project_commit, base_url) = c.fetchone() - if project_commit == project: - project = None - c.execute('''DELETE FROM "repos" WHERE "url"=? AND "branch" IS ? AND "project" IS ?''', (url, branch, project)) - c.execute('''DELETE FROM "repo_history" WHERE "url"=? AND "branch" IS ? AND "project" IS ?''', (url, branch, project)) conn.commit() conn.close() @@ -363,10 +288,11 @@ class Git: GIT = Git(cache_home) class Repo: - def __init__(self, dbconn, project, url, branch, head_commit, list_metadata=False): + def __init__(self, dbconn, project_commit, url, branch, head_commit, list_metadata=False): self.url = url self.branch = branch - self.project_commit = project.commit + self.project_commit = project_commit + self.erroring = False if not branch: self.branchname = "gan" + hashlib.sha256(url.encode("utf-8")).hexdigest() @@ -378,9 +304,10 @@ class Repo: if head_commit: self.hash = head_commit else: - try: + try: # FIXME should we even do this? self.hash = GIT.get_hash(self.branchname) except GitError: + self.erroring = True self.hash = None self.message = None @@ -388,6 +315,7 @@ class Repo: try: self.update_metadata() except GitError: + self.erroring = True pass def update_metadata(self): @@ -402,14 +330,15 @@ class Repo: except subprocess.CalledProcessError as e: # This may error for various reasons, but some are important: dead links, etc click.echo(e.output, err=True) + self.erroring = True return None pre_hash = self.hash try: post_hash = GIT.get_hash(self.branchname) except GitError as e: # This should never happen, but maybe there's some edge cases? - # Can you force-push an empty branch? - # TODO + # TODO check + self.erroring = True return None self.hash = post_hash if not pre_hash: @@ -421,30 +350,31 @@ class Repo: try: subprocess.check_call(["git", "-C", cache_home, "merge-base", "--is-ancestor", self.project_commit, self.branchname], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) self.update_metadata() - return count, post_hash, self.message + return count except (subprocess.CalledProcessError, GitError) as e: click.echo(e, err=True) + self.erroring = True return None class Project: - def __init__(self, dbconn, ganarchy, project_commit, list_repos=False): + def __init__(self, dbconn, project_commit, list_repos=False): self.commit = project_commit - if ganarchy.project_commit == project_commit: - project_commit = None self.refresh_metadata() + self.repos = None if list_repos: - repos = [] - with dbconn: - for (e, url, branch, head_commit) in dbconn.execute('''SELECT "max"("e"), "url", "branch", "head_commit" FROM (SELECT "max"("T1"."entry") "e", "T1"."url", "T1"."branch", "T1"."head_commit" FROM "repo_history" "T1" - WHERE (SELECT "active" FROM "repos" "T2" WHERE "url" = "T1"."url" AND "branch" IS "T1"."branch" AND "project" IS ?1) - GROUP BY "T1"."url", "T1"."branch" - UNION - SELECT null, "T3"."url", "T3"."branch", null FROM "repos" "T3" WHERE "active" AND "project" IS ?1) - GROUP BY "url" ORDER BY "e"''', (project_commit,)): - repos.append(Repo(dbconn, self, url, branch, head_commit)) - self.repos = repos - else: - self.repos = None + self.list_repos(dbconn) + + def list_repos(self, dbconn): + repos = [] + with dbconn: + for (e, url, branch, head_commit) in dbconn.execute('''SELECT "max"("e"), "url", "branch", "head_commit" FROM (SELECT "max"("T1"."entry") "e", "T1"."url", "T1"."branch", "T1"."head_commit" FROM "repo_history" "T1" + WHERE (SELECT "active" FROM "repos" "T2" WHERE "url" = "T1"."url" AND "branch" IS "T1"."branch" AND "project" IS ?1) + GROUP BY "T1"."url", "T1"."branch" + UNION + SELECT null, "T3"."url", "T3"."branch", null FROM "repos" "T3" WHERE "active" AND "project" IS ?1) + GROUP BY "url" ORDER BY "e"''', (self.commit,)): + repos.append(Repo(dbconn, self.commit, url, branch, head_commit)) + self.repos = repos def refresh_metadata(self): try: @@ -452,8 +382,8 @@ class Project: project_title, project_desc = (lambda x: x.groups() if x is not None else ('', None))(re.fullmatch('^\\[Project\\]\s+(.+?)(?:\n\n(.+))?$', project, flags=re.ASCII|re.DOTALL|re.IGNORECASE)) if not project_title.strip(): # FIXME project_title, project_desc = ("Error parsing project commit",)*2 - if project_desc: # FIXME - project_desc = project_desc.strip() + # if project_desc: # FIXME + # project_desc = project_desc.strip() self.commit_body = project self.title = project_title self.description = project_desc @@ -463,74 +393,183 @@ class Project: self.description = None def update(self): - # TODO + # TODO? check if working correctly + results = [(repo, repo.update()) for repo in self.repos] self.refresh_metadata() + return results class GAnarchy: - def __init__(self, dbconn, list_projects=False): - with dbconn: - # TODO - #(project_commit, base_url, title) = dbconn.execute('''SELECT "git_commit", "base_url", "title" FROM "config"''').fetchone() - (project_commit, base_url) = dbconn.execute('''SELECT "git_commit", "base_url" FROM "config"''').fetchone() - title = None - self.project_commit = project_commit - self.base_url = base_url - if not base_url: - pass ## TODO - if not title: - from urllib.parse import urlparse - title = "GAnarchy on " + urlparse(base_url).hostname - self.title = title + def __init__(self, dbconn, config, list_projects=False, list_repos=False): + base_url = config.base_url + title = config.title + if not base_url: + # FIXME use a more appropriate error type + raise ValueError + if not title: + title = "GAnarchy on " + urlparse(base_url).hostname + self.title = title + self.base_url = base_url + # load config onto DB + c = dbconn.cursor() + c.execute('''CREATE TEMPORARY TABLE "repos" ("url" TEXT PRIMARY KEY, "active" INT, "branch" TEXT, "project" TEXT)''') + c.execute('''CREATE UNIQUE INDEX "temp"."repos_url_branch_project" ON "repos" ("url", "branch", "project")''') + c.execute('''CREATE INDEX "temp"."repos_project" ON "repos" ("project")''') + c.execute('''CREATE INDEX "temp"."repos_active" ON "repos" ("active")''') + for (project_commit, repos) in config.projects.items(): + for (repo_url, branches) in repos.items(): + for (branchname, options) in branches.items(): + if options['active']: # no need to insert inactive repos since they get ignored anyway + c.execute('''INSERT INTO "repos" VALUES (?, ?, ?, ?)''', (repo_url, 1, branchname, project_commit)) + dbconn.commit() if list_projects: projects = [] with dbconn: for (project,) in dbconn.execute('''SELECT DISTINCT "project" FROM "repos" '''): # FIXME? *maybe* sort by activity in the future - if project == None: - project = self.project_commit - projects.append(Project(dbconn, ganarchy, project)) - projects.sort(key=lambda project: project.title) + projects.append(Project(dbconn, project, list_repos=list_repos)) + projects.sort(key=lambda project: project.title) # sort projects by title self.projects = projects else: self.projects = None +class Config: + def __init__(self, toml_file, base=None, remove=True): + self.projects = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(dict)))) + config_data = qtoml.load(toml_file) + self.title = config_data.get('title', '') + self.base_url = config_data.get('base_url', '') + # TODO blocked domains (but only read them from config_data if remove is True) + self.blocked_domains = [] + self.blocked_domain_suffixes = [] + self.blocked_domains.sort() + self.blocked_domain_suffixes.sort(key=lambda x: x[::-1]) + # FIXME remove duplicates and process invalid entries + self.blocked_domains = tuple(self.blocked_domains) + self.blocked_domain_suffixes = tuple(self.blocked_domain_suffixes) # MUST be tuple + # TODO re.compile("(^" + "|^".join(map(re.escape, domains)) + "|" + "|".join(map(re.escape, suffixes) + ")$") + if base: + self._update_projects(base.projects, sanitize=False) # already sanitized + projects = config_data.get('projects', {}) + self._update_projects(projects, remove=remove) + + def _update_projects(self, projects, remove, sanitize=True): + for (project_commit, repos) in projects.items(): + if sanitize and not isinstance(repos, dict): + # TODO emit warnings? + continue + if sanitize and not re.fullmatch("[0-9a-fA-F]{40}|[0-9a-fA-F]{64}", project_commit): # future-proofing: sha256 support + # TODO emit warnings? + continue + project = self.projects[project_commit] + for (repo_url, branches) in repos.items(): + if sanitize and not isinstance(branches, dict): + # TODO emit warnings? + continue + try: + u = urlparse(repo_url) + if not u: + raise ValueError + getattr(u, 'port') # raises ValueError if port is invalid + if u.scheme in ('file', ''): + raise ValueError + if (u.hostname in self.blocked_domains) or (u.hostname.endswith(self.blocked_domain_suffixes)): + raise ValueError + except ValueError: + if sanitize: + # TODO emit warnings? + continue + else: + raise + repo = project[repo_url] + for (branchname, options) in branches.items(): + if sanitize and not isinstance(options, dict): + # TODO emit warnings? + continue + if branchname == "HEAD": + if sanitize: + # feels weird, but generally makes things easier + # DO NOT emit warnings here. this is deliberate. + branchname = None + else: + raise ValueError + branch = repo[branchname] + active = options.get('active', False) + if active not in (True, False): + if sanitize: + # TODO emit warnings? + continue + else: + raise ValueError + ## | remove | branch.active | options.active | result | + ## | x | false | false | false | + ## | x | false | true | true | + ## | x | true | true | true | + ## | false | true | false | true | + ## | true | true | false | false | + branch['active'] = branch.get('active', False) or active + if remove and not active: + branch['active'] = False + +@ganarchy.command() +@click.option('--skip-errors/--no-skip-errors', default=False) +@click.argument('files', type=click.File('r', encoding='utf-8'), nargs=-1) +def merge_configs(skip_errors, files): + """Merges config files.""" + config = None + for f in files: + try: + f.reconfigure(newline='') + config = Config(f, config, remove=False) + except (UnicodeDecodeError, qtoml.decoder.TOMLDecodeError): + if not skip_errors: + raise + if config: + env = get_env() + template = env.get_template('index.toml') + click.echo(template.render(config=config)) @ganarchy.command() @click.argument('project', required=False) def cron_target(project): """Runs ganarchy as a cron target.""" + conf = None + # reverse order is intentional + for d in reversed(config_dirs): + try: + conf = Config(open(d + "/config.toml", 'r', encoding='utf-8', newline=''), conf) + except (OSError, UnicodeDecodeError, qtoml.decoder.TOMLDecodeError): + pass + with open(config_home + "/config.toml", 'r', encoding='utf-8', newline='') as f: + conf = Config(f, conf) + env = get_env() + if project == "config": + # render the config + # doesn't have access to a GAnarchy object. this is deliberate. + template = env.get_template('index.toml') + click.echo(template.render(config = conf)) + return # make sure the cache dir exists os.makedirs(cache_home, exist_ok=True) # make sure it is a git repo subprocess.call(["git", "-C", cache_home, "init", "-q"]) conn = sqlite3.connect(data_home + "/ganarchy.db") - instance = GAnarchy(conn, list_projects=project=="index") - env = jinja2.Environment(loader=get_template_loader(), autoescape=False) + instance = GAnarchy(conn, conf, list_projects=project in ["index", "config"]) if project == "index": # render the index template = env.get_template('index.html') click.echo(template.render(ganarchy = instance)) return - project_commit = instance.project_commit - base_url = instance.base_url - if not base_url or not (project or project_commit): + if not instance.base_url or not project: click.echo("No base URL or project commit specified", err=True) return - if project_commit == project: - project = None - elif project is not None: - project_commit = project entries = [] generate_html = [] c = conn.cursor() - p = Project(conn, instance, project_commit, list_repos=True) - # FIXME: this should be moved into Project.update() - for repo in p.repos: - result = repo.update() - if result is not None: - count, post_hash, msg = result - entries.append((repo.url, count, post_hash, repo.branch, project)) - generate_html.append((repo.url, msg, count, repo.branch)) - p.refresh_metadata() + p = Project(conn, project, list_repos=True) + results = p.update() + for (repo, count) in results: + if count is not None: + entries.append((repo.url, count, repo.hash, repo.branch, project)) + generate_html.append((repo.url, repo.message, count, repo.branch)) # sort stuff twice because reasons entries.sort(key=lambda x: x[1], reverse=True) generate_html.sort(key=lambda x: x[2], reverse=True) @@ -547,7 +586,7 @@ def cron_target(project): project_body = p.commit_body, project_commit = p.commit, repos = html_entries, - base_url = base_url, + base_url = instance.base_url, # I don't think this thing supports deprecating the above? project = p, ganarchy = instance)) diff --git a/requirements.txt b/requirements.txt index 729e97d..4128339 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,3 @@ Click==7.0 Jinja2==2.10.1 +qtoml==0.2.4