From b26fdc7f55effd4654664363d7b7cfa6f3927da1 Mon Sep 17 00:00:00 2001 From: Nolan Lawson Date: Tue, 27 Feb 2018 23:38:33 -0800 Subject: [PATCH] correctly measure status length --- package-lock.json | 5 + package.json | 1 + routes/_store/LocalStorageStore.js | 3 + routes/_store/statusComputations.js | 3 +- routes/_utils/handleRegex.js | 3 + routes/_utils/measureText.js | 18 +++ routes/_utils/urlRegex.js | 202 ++++++++++++++++++++++++++++ 7 files changed, 234 insertions(+), 1 deletion(-) create mode 100644 routes/_utils/handleRegex.js create mode 100644 routes/_utils/measureText.js create mode 100644 routes/_utils/urlRegex.js diff --git a/package-lock.json b/package-lock.json index ac4904e..ee29b48 100644 --- a/package-lock.json +++ b/package-lock.json @@ -8937,6 +8937,11 @@ "resolved": "https://registry.npmjs.org/stringstream/-/stringstream-0.0.5.tgz", "integrity": "sha1-TkhM1N5aC7vuGORjB3EKioFiGHg=" }, + "stringz": { + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/stringz/-/stringz-0.4.0.tgz", + "integrity": "sha512-g4/wJlLo0qRUiEtc9TDst1RulMW6hsiTKwXxpcVNoc7E5P21i9vBti6oBrV9Tzk/P+5doxCULZVqgoocgHcRKQ==" + }, "strip-ansi": { "version": "3.0.1", "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-3.0.1.tgz", diff --git a/package.json b/package.json index af42229..6dd57e5 100644 --- a/package.json +++ b/package.json @@ -53,6 +53,7 @@ "sapper": "nolanlawson/sapper#fix-style-loader-built", "serve-static": "^1.13.1", "standard": "^10.0.3", + "stringz": "^0.4.0", "style-loader": "^0.19.1", "svelte": "^1.54.0", "svelte-extras": "^1.6.0", diff --git a/routes/_store/LocalStorageStore.js b/routes/_store/LocalStorageStore.js index 5e4eb70..ecaef04 100644 --- a/routes/_store/LocalStorageStore.js +++ b/routes/_store/LocalStorageStore.js @@ -30,6 +30,9 @@ export class LocalStorageStore extends Store { } }) }) + if (process.browser) { + window.addEventListener('beforeunload', () => this.save()) + } } save () { diff --git a/routes/_store/statusComputations.js b/routes/_store/statusComputations.js index 3d6ad07..d3fae67 100644 --- a/routes/_store/statusComputations.js +++ b/routes/_store/statusComputations.js @@ -1,9 +1,10 @@ import { CHAR_LIMIT } from '../_static/statuses' +import { measureText } from '../_utils/measureText' export function statusComputations (store) { store.compute('rawComposeTextLength', ['rawComposeText'], - (rawComposeText) => rawComposeText.length + (rawComposeText) => measureText(rawComposeText) ) store.compute('rawComposeTextOverLimit', diff --git a/routes/_utils/handleRegex.js b/routes/_utils/handleRegex.js new file mode 100644 index 0000000..a07ec3b --- /dev/null +++ b/routes/_utils/handleRegex.js @@ -0,0 +1,3 @@ +/* eslint-disable */ +export const handleRegex = /(^|[^\/\w])@(([a-z0-9_]+)@[a-z0-9\.\-]+[a-z0-9]+)/ig +/* eslint-enable */ diff --git a/routes/_utils/measureText.js b/routes/_utils/measureText.js new file mode 100644 index 0000000..f9ce555 --- /dev/null +++ b/routes/_utils/measureText.js @@ -0,0 +1,18 @@ +// via https://github.com/tootsuite/mastodon/blob/5d5c0f4/app/javascript/mastodon/features/compose/util/counter.js + +import { urlRegex } from './urlRegex' +import { handleRegex } from './handleRegex' +import { mark, stop } from './marks' +import { length } from 'stringz' + +const urlPlaceholder = 'xxxxxxxxxxxxxxxxxxxxxxx' + +export function measureText (inputText) { + mark('measureText()') + let normalizedText = inputText + .replace(urlRegex, urlPlaceholder) + .replace(handleRegex, '$1@$3') + let len = length(normalizedText) + stop('measureText()') + return len +} diff --git a/routes/_utils/urlRegex.js b/routes/_utils/urlRegex.js new file mode 100644 index 0000000..288e433 --- /dev/null +++ b/routes/_utils/urlRegex.js @@ -0,0 +1,202 @@ +// via https://github.com/tootsuite/mastodon/blob/5d5c0f4/app/javascript/mastodon/features/compose/util/url_regex.js + +/* eslint-disable */ + +const regexen = {} + +const regexSupplant = function (regex, flags) { + flags = flags || '' + if (typeof regex !== 'string') { + if (regex.global && flags.indexOf('g') < 0) { + flags += 'g' + } + if (regex.ignoreCase && flags.indexOf('i') < 0) { + flags += 'i' + } + if (regex.multiline && flags.indexOf('m') < 0) { + flags += 'm' + } + + regex = regex.source + } + return new RegExp(regex.replace(/#\{(\w+)\}/g, function (match, name) { + var newRegex = regexen[name] || '' + if (typeof newRegex !== 'string') { + newRegex = newRegex.source + } + return newRegex + }), flags) +} + +const stringSupplant = function (str, values) { + return str.replace(/#\{(\w+)\}/g, function (match, name) { + return values[name] || '' + }) +} + +export const urlRegex = (function () { + regexen.spaces_group = /\x09-\x0D\x20\x85\xA0\u1680\u180E\u2000-\u200A\u2028\u2029\u202F\u205F\u3000/ + regexen.invalid_chars_group = /\uFFFE\uFEFF\uFFFF\u202A-\u202E/ + regexen.punct = /\!'#%&'\(\)*\+,\\\-\.\/:;<=>\?@\[\]\^_{|}~\$/ + regexen.validUrlPrecedingChars = regexSupplant(/(?:[^A-Za-z0-9@@$###{invalid_chars_group}]|^)/) + regexen.invalidDomainChars = stringSupplant('#{punct}#{spaces_group}#{invalid_chars_group}', regexen) + regexen.validDomainChars = regexSupplant(/[^#{invalidDomainChars}]/) + regexen.validSubdomain = regexSupplant(/(?:(?:#{validDomainChars}(?:[_-]|#{validDomainChars})*)?#{validDomainChars}\.)/) + regexen.validDomainName = regexSupplant(/(?:(?:#{validDomainChars}(?:-|#{validDomainChars})*)?#{validDomainChars}\.)/) + regexen.validGTLD = regexSupplant(RegExp( + '(?:(?:' + + '삼성|닷컴|닷넷|香格里拉|餐厅|食品|飞利浦|電訊盈科|集团|通販|购物|谷歌|诺基亚|联通|网络|网站|网店|网址|组织机构|移动|珠宝|点看|游戏|淡马锡|机构|書籍|时尚|新闻|政府|' + + '政务|手表|手机|我爱你|慈善|微博|广东|工行|家電|娱乐|天主教|大拿|大众汽车|在线|嘉里大酒店|嘉里|商标|商店|商城|公益|公司|八卦|健康|信息|佛山|企业|中文网|中信|世界|' + + 'ポイント|ファッション|セール|ストア|コム|グーグル|クラウド|みんな|คอม|संगठन|नेट|कॉम|همراه|موقع|موبايلي|كوم|كاثوليك|عرب|شبكة|' + + 'بيتك|بازار|العليان|ارامكو|اتصالات|ابوظبي|קום|сайт|рус|орг|онлайн|москва|ком|католик|дети|' + + 'zuerich|zone|zippo|zip|zero|zara|zappos|yun|youtube|you|yokohama|yoga|yodobashi|yandex|yamaxun|' + + 'yahoo|yachts|xyz|xxx|xperia|xin|xihuan|xfinity|xerox|xbox|wtf|wtc|wow|world|works|work|woodside|' + + 'wolterskluwer|wme|winners|wine|windows|win|williamhill|wiki|wien|whoswho|weir|weibo|wedding|wed|' + + 'website|weber|webcam|weatherchannel|weather|watches|watch|warman|wanggou|wang|walter|walmart|' + + 'wales|vuelos|voyage|voto|voting|vote|volvo|volkswagen|vodka|vlaanderen|vivo|viva|vistaprint|' + + 'vista|vision|visa|virgin|vip|vin|villas|viking|vig|video|viajes|vet|versicherung|' + + 'vermögensberatung|vermögensberater|verisign|ventures|vegas|vanguard|vana|vacations|ups|uol|uno|' + + 'university|unicom|uconnect|ubs|ubank|tvs|tushu|tunes|tui|tube|trv|trust|travelersinsurance|' + + 'travelers|travelchannel|travel|training|trading|trade|toys|toyota|town|tours|total|toshiba|' + + 'toray|top|tools|tokyo|today|tmall|tkmaxx|tjx|tjmaxx|tirol|tires|tips|tiffany|tienda|tickets|' + + 'tiaa|theatre|theater|thd|teva|tennis|temasek|telefonica|telecity|tel|technology|tech|team|tdk|' + + 'tci|taxi|tax|tattoo|tatar|tatamotors|target|taobao|talk|taipei|tab|systems|symantec|sydney|' + + 'swiss|swiftcover|swatch|suzuki|surgery|surf|support|supply|supplies|sucks|style|study|studio|' + + 'stream|store|storage|stockholm|stcgroup|stc|statoil|statefarm|statebank|starhub|star|staples|' + + 'stada|srt|srl|spreadbetting|spot|spiegel|space|soy|sony|song|solutions|solar|sohu|software|' + + 'softbank|social|soccer|sncf|smile|smart|sling|skype|sky|skin|ski|site|singles|sina|silk|shriram|' + + 'showtime|show|shouji|shopping|shop|shoes|shiksha|shia|shell|shaw|sharp|shangrila|sfr|sexy|sex|' + + 'sew|seven|ses|services|sener|select|seek|security|secure|seat|search|scot|scor|scjohnson|' + + 'science|schwarz|schule|school|scholarships|schmidt|schaeffler|scb|sca|sbs|sbi|saxo|save|sas|' + + 'sarl|sapo|sap|sanofi|sandvikcoromant|sandvik|samsung|samsclub|salon|sale|sakura|safety|safe|' + + 'saarland|ryukyu|rwe|run|ruhr|rugby|rsvp|room|rogers|rodeo|rocks|rocher|rmit|rip|rio|ril|' + + 'rightathome|ricoh|richardli|rich|rexroth|reviews|review|restaurant|rest|republican|report|' + + 'repair|rentals|rent|ren|reliance|reit|reisen|reise|rehab|redumbrella|redstone|red|recipes|' + + 'realty|realtor|realestate|read|raid|radio|racing|qvc|quest|quebec|qpon|pwc|pub|prudential|pru|' + + 'protection|property|properties|promo|progressive|prof|productions|prod|pro|prime|press|praxi|' + + 'pramerica|post|porn|politie|poker|pohl|pnc|plus|plumbing|playstation|play|place|pizza|pioneer|' + + 'pink|ping|pin|pid|pictures|pictet|pics|piaget|physio|photos|photography|photo|phone|philips|phd|' + + 'pharmacy|pfizer|pet|pccw|pay|passagens|party|parts|partners|pars|paris|panerai|panasonic|' + + 'pamperedchef|page|ovh|ott|otsuka|osaka|origins|orientexpress|organic|org|orange|oracle|open|ooo|' + + 'onyourside|online|onl|ong|one|omega|ollo|oldnavy|olayangroup|olayan|okinawa|office|off|observer|' + + 'obi|nyc|ntt|nrw|nra|nowtv|nowruz|now|norton|northwesternmutual|nokia|nissay|nissan|ninja|nikon|' + + 'nike|nico|nhk|ngo|nfl|nexus|nextdirect|next|news|newholland|new|neustar|network|netflix|netbank|' + + 'net|nec|nba|navy|natura|nationwide|name|nagoya|nadex|nab|mutuelle|mutual|museum|mtr|mtpc|mtn|' + + 'msd|movistar|movie|mov|motorcycles|moto|moscow|mortgage|mormon|mopar|montblanc|monster|money|' + + 'monash|mom|moi|moe|moda|mobily|mobile|mobi|mma|mls|mlb|mitsubishi|mit|mint|mini|mil|microsoft|' + + 'miami|metlife|merckmsd|meo|menu|men|memorial|meme|melbourne|meet|media|med|mckinsey|mcdonalds|' + + 'mcd|mba|mattel|maserati|marshalls|marriott|markets|marketing|market|map|mango|management|man|' + + 'makeup|maison|maif|madrid|macys|luxury|luxe|lupin|lundbeck|ltda|ltd|lplfinancial|lpl|love|lotto|' + + 'lotte|london|lol|loft|locus|locker|loans|loan|lixil|living|live|lipsy|link|linde|lincoln|limo|' + + 'limited|lilly|like|lighting|lifestyle|lifeinsurance|life|lidl|liaison|lgbt|lexus|lego|legal|' + + 'lefrak|leclerc|lease|lds|lawyer|law|latrobe|latino|lat|lasalle|lanxess|landrover|land|lancome|' + + 'lancia|lancaster|lamer|lamborghini|ladbrokes|lacaixa|kyoto|kuokgroup|kred|krd|kpn|kpmg|kosher|' + + 'komatsu|koeln|kiwi|kitchen|kindle|kinder|kim|kia|kfh|kerryproperties|kerrylogistics|kerryhotels|' + + 'kddi|kaufen|juniper|juegos|jprs|jpmorgan|joy|jot|joburg|jobs|jnj|jmp|jll|jlc|jio|jewelry|jetzt|' + + 'jeep|jcp|jcb|java|jaguar|iwc|iveco|itv|itau|istanbul|ist|ismaili|iselect|irish|ipiranga|' + + 'investments|intuit|international|intel|int|insure|insurance|institute|ink|ing|info|infiniti|' + + 'industries|immobilien|immo|imdb|imamat|ikano|iinet|ifm|ieee|icu|ice|icbc|ibm|hyundai|hyatt|' + + 'hughes|htc|hsbc|how|house|hotmail|hotels|hoteles|hot|hosting|host|hospital|horse|honeywell|' + + 'honda|homesense|homes|homegoods|homedepot|holiday|holdings|hockey|hkt|hiv|hitachi|hisamitsu|' + + 'hiphop|hgtv|hermes|here|helsinki|help|healthcare|health|hdfcbank|hdfc|hbo|haus|hangout|hamburg|' + + 'hair|guru|guitars|guide|guge|gucci|guardian|group|grocery|gripe|green|gratis|graphics|grainger|' + + 'gov|got|gop|google|goog|goodyear|goodhands|goo|golf|goldpoint|gold|godaddy|gmx|gmo|gmbh|gmail|' + + 'globo|global|gle|glass|glade|giving|gives|gifts|gift|ggee|george|genting|gent|gea|gdn|gbiz|' + + 'garden|gap|games|game|gallup|gallo|gallery|gal|fyi|futbol|furniture|fund|fun|fujixerox|fujitsu|' + + 'ftr|frontier|frontdoor|frogans|frl|fresenius|free|fox|foundation|forum|forsale|forex|ford|' + + 'football|foodnetwork|food|foo|fly|flsmidth|flowers|florist|flir|flights|flickr|fitness|fit|' + + 'fishing|fish|firmdale|firestone|fire|financial|finance|final|film|fido|fidelity|fiat|ferrero|' + + 'ferrari|feedback|fedex|fast|fashion|farmers|farm|fans|fan|family|faith|fairwinds|fail|fage|' + + 'extraspace|express|exposed|expert|exchange|everbank|events|eus|eurovision|etisalat|esurance|' + + 'estate|esq|erni|ericsson|equipment|epson|epost|enterprises|engineering|engineer|energy|emerck|' + + 'email|education|edu|edeka|eco|eat|earth|dvr|dvag|durban|dupont|duns|dunlop|duck|dubai|dtv|drive|' + + 'download|dot|doosan|domains|doha|dog|dodge|doctor|docs|dnp|diy|dish|discover|discount|directory|' + + 'direct|digital|diet|diamonds|dhl|dev|design|desi|dentist|dental|democrat|delta|deloitte|dell|' + + 'delivery|degree|deals|dealer|deal|dds|dclk|day|datsun|dating|date|data|dance|dad|dabur|cyou|' + + 'cymru|cuisinella|csc|cruises|cruise|crs|crown|cricket|creditunion|creditcard|credit|courses|' + + 'coupons|coupon|country|corsica|coop|cool|cookingchannel|cooking|contractors|contact|consulting|' + + 'construction|condos|comsec|computer|compare|company|community|commbank|comcast|com|cologne|' + + 'college|coffee|codes|coach|clubmed|club|cloud|clothing|clinique|clinic|click|cleaning|claims|' + + 'cityeats|city|citic|citi|citadel|cisco|circle|cipriani|church|chrysler|chrome|christmas|chloe|' + + 'chintai|cheap|chat|chase|channel|chanel|cfd|cfa|cern|ceo|center|ceb|cbs|cbre|cbn|cba|catholic|' + + 'catering|cat|casino|cash|caseih|case|casa|cartier|cars|careers|career|care|cards|caravan|car|' + + 'capitalone|capital|capetown|canon|cancerresearch|camp|camera|cam|calvinklein|call|cal|cafe|cab|' + + 'bzh|buzz|buy|business|builders|build|bugatti|budapest|brussels|brother|broker|broadway|' + + 'bridgestone|bradesco|box|boutique|bot|boston|bostik|bosch|boots|booking|book|boo|bond|bom|bofa|' + + 'boehringer|boats|bnpparibas|bnl|bmw|bms|blue|bloomberg|blog|blockbuster|blanco|blackfriday|' + + 'black|biz|bio|bingo|bing|bike|bid|bible|bharti|bet|bestbuy|best|berlin|bentley|beer|beauty|' + + 'beats|bcn|bcg|bbva|bbt|bbc|bayern|bauhaus|basketball|baseball|bargains|barefoot|barclays|' + + 'barclaycard|barcelona|bar|bank|band|bananarepublic|banamex|baidu|baby|azure|axa|aws|avianca|' + + 'autos|auto|author|auspost|audio|audible|audi|auction|attorney|athleta|associates|asia|asda|arte|' + + 'art|arpa|army|archi|aramco|arab|aquarelle|apple|app|apartments|aol|anz|anquan|android|analytics|' + + 'amsterdam|amica|amfam|amex|americanfamily|americanexpress|alstom|alsace|ally|allstate|allfinanz|' + + 'alipay|alibaba|alfaromeo|akdn|airtel|airforce|airbus|aigo|aig|agency|agakhan|africa|afl|' + + 'afamilycompany|aetna|aero|aeg|adult|ads|adac|actor|active|aco|accountants|accountant|accenture|' + + 'academy|abudhabi|abogado|able|abc|abbvie|abbott|abb|abarth|aarp|aaa|onion' + + ')(?=[^0-9a-zA-Z@]|$))')) + regexen.validCCTLD = regexSupplant(RegExp( + '(?:(?:' + + '한국|香港|澳門|新加坡|台灣|台湾|中國|中国|გე|ไทย|ලංකා|ഭാരതം|ಭಾರತ|భారత్|சிங்கப்பூர்|இலங்கை|இந்தியா|ଭାରତ|ભારત|ਭਾਰਤ|' + + 'ভাৰত|ভারত|বাংলা|भारोत|भारतम्|भारत|ڀارت|پاکستان|مليسيا|مصر|قطر|فلسطين|عمان|عراق|سورية|سودان|تونس|' + + 'بھارت|بارت|ایران|امارات|المغرب|السعودية|الجزائر|الاردن|հայ|қаз|укр|срб|рф|мон|мкд|ею|бел|бг|ελ|' + + 'zw|zm|za|yt|ye|ws|wf|vu|vn|vi|vg|ve|vc|va|uz|uy|us|um|uk|ug|ua|tz|tw|tv|tt|tr|tp|to|tn|tm|tl|tk|' + + 'tj|th|tg|tf|td|tc|sz|sy|sx|sv|su|st|ss|sr|so|sn|sm|sl|sk|sj|si|sh|sg|se|sd|sc|sb|sa|rw|ru|rs|ro|' + + 're|qa|py|pw|pt|ps|pr|pn|pm|pl|pk|ph|pg|pf|pe|pa|om|nz|nu|nr|np|no|nl|ni|ng|nf|ne|nc|na|mz|my|mx|' + + 'mw|mv|mu|mt|ms|mr|mq|mp|mo|mn|mm|ml|mk|mh|mg|mf|me|md|mc|ma|ly|lv|lu|lt|ls|lr|lk|li|lc|lb|la|kz|' + + 'ky|kw|kr|kp|kn|km|ki|kh|kg|ke|jp|jo|jm|je|it|is|ir|iq|io|in|im|il|ie|id|hu|ht|hr|hn|hm|hk|gy|gw|' + + 'gu|gt|gs|gr|gq|gp|gn|gm|gl|gi|gh|gg|gf|ge|gd|gb|ga|fr|fo|fm|fk|fj|fi|eu|et|es|er|eh|eg|ee|ec|dz|' + + 'do|dm|dk|dj|de|cz|cy|cx|cw|cv|cu|cr|co|cn|cm|cl|ck|ci|ch|cg|cf|cd|cc|ca|bz|by|bw|bv|bt|bs|br|bq|' + + 'bo|bn|bm|bl|bj|bi|bh|bg|bf|be|bd|bb|ba|az|ax|aw|au|at|as|ar|aq|ao|an|am|al|ai|ag|af|ae|ad|ac' + + ')(?=[^0-9a-zA-Z@]|$))')) + regexen.validPunycode = /(?:xn--[0-9a-z]+)/ + regexen.validSpecialCCTLD = /(?:(?:co|tv)(?=[^0-9a-zA-Z@]|$))/ + regexen.validDomain = regexSupplant(/(?:#{validSubdomain}*#{validDomainName}(?:#{validGTLD}|#{validCCTLD}|#{validPunycode}))/) + regexen.validPortNumber = /[0-9]+/ + regexen.pd = /\u002d\u058a\u05be\u1400\u1806\u2010-\u2015\u2e17\u2e1a\u2e3a\u2e40\u301c\u3030\u30a0\ufe31\ufe58\ufe63\uff0d/ + regexen.validGeneralUrlPathChars = regexSupplant(/[^#{spaces_group}\(\)\?]/i) + // Allow URL paths to contain up to two nested levels of balanced parens + // 1. Used in Wikipedia URLs like /Primer_(film) + // 2. Used in IIS sessions like /S(dfd346)/ + // 3. Used in Rdio URLs like /track/We_Up_(Album_Version_(Edited))/ + regexen.validUrlBalancedParens = regexSupplant( + '\\(' + + '(?:' + + '#{validGeneralUrlPathChars}+' + + '|' + + // allow one nested level of balanced parentheses + '(?:' + + '#{validGeneralUrlPathChars}*' + + '\\(' + + '#{validGeneralUrlPathChars}+' + + '\\)' + + '#{validGeneralUrlPathChars}*' + + ')' + + ')' + + '\\)', + 'i') + // Valid end-of-path chracters (so /foo. does not gobble the period). + // 1. Allow =&# for empty URL parameters and other URL-join artifacts + regexen.validUrlPathEndingChars = regexSupplant(/[^#{spaces_group}\(\)\?!\*';:=\,\.\$%\[\]#{pd}~&\|@]|(?:#{validUrlBalancedParens})/i) + // Allow @ in a url, but only in the middle. Catch things like http://example.com/@user/ + regexen.validUrlPath = regexSupplant('(?:' + + '(?:' + + '#{validGeneralUrlPathChars}*' + + '(?:#{validUrlBalancedParens}#{validGeneralUrlPathChars}*)*' + + '#{validUrlPathEndingChars}' + + ')|(?:@#{validGeneralUrlPathChars}+\/)' + + ')', 'i') + regexen.validUrlQueryChars = /[a-z0-9!?\*'@\(\);:&=\+\$\/%#\[\]\-_\.,~|]/i + regexen.validUrlQueryEndingChars = /[a-z0-9_&=#\/]/i + regexen.validUrl = regexSupplant( + '(' + // $1 URL + '(https?:\\/\\/)' + // $2 Protocol + '(#{validDomain})' + // $3 Domain(s) + '(?::(#{validPortNumber}))?' + // $4 Port number (optional) + '(\\/#{validUrlPath}*)?' + // $5 URL Path + '(\\?#{validUrlQueryChars}*#{validUrlQueryEndingChars})?' + // $6 Query String + ')', + 'gi') + return regexen.validUrl +}()) + +/* eslint-enable */