add hunspell spell checks, fix bugs in generation of plaintext file

This commit is contained in:
Thibault Kruse 2016-05-13 23:14:52 +09:00
parent 0785e0b415
commit 4a4edb5807
4 changed files with 566 additions and 9 deletions

View File

@ -9,6 +9,11 @@ language: c++
# alternatives: gcc, clang, or both (as yaml list) # alternatives: gcc, clang, or both (as yaml list)
compiler: clang compiler: clang
addons:
apt:
packages:
- hunspell
install: install:
- -

View File

@ -17,6 +17,7 @@ all: \
check-markdown \ check-markdown \
check-references \ check-references \
check-notabs \ check-notabs \
hunspell-check \
cpplint-all \ cpplint-all \
check-badchars check-badchars
@ -78,6 +79,17 @@ check-badchars: $(SOURCEPATH) $(BUILD_DIR) Makefile
if [ -s $(BUILD_DIR)/CppCoreGuidelines.md.badchars ]; then echo 'Warning: Undesired chars (–’‘“”¸…¦) found, use straight quotes instead:'; cat $(BUILD_DIR)/CppCoreGuidelines.md.badchars; false; fi; if [ -s $(BUILD_DIR)/CppCoreGuidelines.md.badchars ]; then echo 'Warning: Undesired chars (–’‘“”¸…¦) found, use straight quotes instead:'; cat $(BUILD_DIR)/CppCoreGuidelines.md.badchars; false; fi;
.PHONY: hunspell-check
hunspell-check: $(BUILD_DIR)/plain-nohtml.txt
hunspell -p hunspell/isocpp.dic -u < build/plain-nohtml.txt > $(BUILD_DIR)/hunspell-report.txt
if [ -s $(BUILD_DIR)/hunspell-report.txt ]; then echo 'Warning: Spellcheck failed, fix words or add to dictionary:'; cat $(BUILD_DIR)/hunspell-report.txt; false; fi;
# only list words that are not in dict
# to include all add them to bottom of hunspell/isocpp.dict, and run
# cat hunspell/isocpp.dic | sort | uniq > hunspell/isocpp.dic2; mv hunspell/isocpp.dic2 hunspell/isocpp.dic
.PHONY: hunspell-list
hunspell-list: $(BUILD_DIR)/plain.txt
hunspell -p hunspell/isocpp.dic -l < build/plain-nohtml.txt
#### Cpplint #### Cpplint
@ -96,6 +108,9 @@ $(BUILD_DIR)/codeblocks: splitfile
$(BUILD_DIR)/plain.txt: splitfile $(BUILD_DIR)/plain.txt: splitfile
$(BUILD_DIR)/plain-nohtml.txt: $(BUILD_DIR)/plain.txt
sed 's;<a \(name\|href\)=".*</a>;;g' $(BUILD_DIR)/plain.txt > $(BUILD_DIR)/plain-nohtml.txt
.PHONY: splitfile .PHONY: splitfile
splitfile: $(SOURCEPATH) ./python/md-split.py splitfile: $(SOURCEPATH) ./python/md-split.py
python ./python/md-split.py $(SOURCEPATH) $(BUILD_DIR)/plain.txt $(BUILD_DIR)/codeblocks python ./python/md-split.py $(SOURCEPATH) $(BUILD_DIR)/plain.txt $(BUILD_DIR)/codeblocks

539
scripts/hunspell/isocpp.dic Normal file
View File

@ -0,0 +1,539 @@
'
0xFF0000
'14
2D
2K
2ndEdition
2RDU00001
3rdEdition
78e
86xWVb4XIyE
98's
à
a1
A1
a2
A2
aa
ABA
abi
ABI
ABIs
abstr
accessor
ack
addressof
adl
ADL
Adve
Alexandrescu
Alexandrescu01
algo
alloc
alloc0
ap
API
APIs
archetypical
arg
argh
args
arr2
arrayindex
ASIC
AST
async
BDE
behaviorless
BigPOD
Bjarne
Bloomberg
Boehm
bool
buf
bufmax
C1
C2
callees
callers'
call's
camelCase
CamelCase
CaMelcAse
CaMeLcAsEvArIaBlE
Cargill
Cargill92
CComPtr
cerr
chrono
cin
class'
clib
Cline99
ClosePort
CommonMark
composability
composable
conceptsTS
cond
const
Const
constcast
constexpr
constness
copy2
CORBA
cout
CP
cplusplus
Cplusplus
Cplusplus03
CplusplusCS
cpp
cpp98
CppCon
CRTP
cstdarg
cstring
cstylecast
ctor
ctors
cxx
cyclomatic
Cyclomatic
czstring
d1
D1
d1's
D2
d2's
dag
dcl
dd
de
Dechev
default0
default00
defop
del
derived1
derived2
destructors
Destructors
detatch
Dewhurst
Dewhurst03
disambiguator
draw2
dtor
dtors
dyn
dynarray
ECBS
endl
enum
Enum
enums
eq
EqualityComparable
errno
expr
f1
f2
f3
f4
fac
Facebook
fallthrough
fallthroughs
faq
fclose
fct
fib10
file1
file2
file3
flag1
fmt
fn
fo
foo
Foo
foobar
Foobar
FOOBAR
fopen
fs
func
func1
fx
g1
g2
GCC
Geosoft
getx
GFM
Girou
github
GitHub
gp
GPLv3
gsl
GSL
gx
handcoded
Henricson
Henricson97
hh
hier
hierclass
hnd
homebrew
HPL
href
Hyslop
IDE
IDEs
IEC
ifdef
iff
ifstream
impactful
Impl
incompleat
increment1
Incrementable
indices
ing
init
inkorrekt
inline
inlined
inlining
inout
InputIterator
int32
int64
ints
io
iostream
Iostream
iso
isocpp
ISORC
istream
Iter
Jiangang
join's
JSF
Juhl
knr
Koenig97
l
Lakos
Lakos96
Lavavej
LCSD05
lifecycle
llvm
lockfree
Lomow
LSP
lst
lvalue
lvalues
m1
m2
macros2
malloc
mallocfree
'many'
Mathematizing
maul2
md
memberinit
members'
memcmp
memmove
memoization
memoized
memset
metameta
metaprogram
metaprogramming
Metaprogramming
Meyers01
Meyers05
Meyers15
Meyers96
Meyers97
microbenchmarks
modify1
modify2
moredata
msgsl
mtx
Murray93
mutex
mutexes
myMap
MyMap
myset
myX
n'
namespace
namespaces
Namespaces
NaN
nargs
Naumann
ness
newdelete
nh
NL
noexcept
nondependent
nonexported
nongeneric
nonlocally
nonprivate
nonreusable
nonvirtual
nonvirtually
nothrow
NR
nullptr
NVI
ok
oo
OO
OOP
OOPSLA'09
oper
O'Reilly
org
ostream
overabstract
overconstrain
overconstrained
overridable
p1
p2
p3
pµÃoorly
Pardoe
parens
passthrough
pb
pb1
pb2
pc
performant
pessimization
PIMPL
Pirkelbauer
PL4
PLDI
Poco
PODs
poly
polymorphically
POPL
PortHandle
PostInitialize
PPP
pre
Pre
precomputation
prefetcher
printf
printf's
Proc
productinfo
Productinfo
proto
ps
ptr
Ptr
ptr2
ptr's
q2
qqq
R0
r2
raii
RAII
Rc
rcon
Rcon
Rconc
Rconst
Rcpl
Rec2
refactor
refactored
refcount
regex
Regex
RegularFunction
reimplement
reinterpretcast
Reis
Renum
reseat
reseating
reseats
resizable
retryable
reusability
Reusability
Ri
Rl
rnd
Rnr
Ro
Rouquette
Rp
Rper
Rr
RRconc
Rsl
RTTI
rvalue
rvalues
RVO
's
s1
s1's
s2
Sarkar
scanf
Sd
SEI
Semiregular
SemiRegular
Sergey
Sewell
SFINAE
sharedness
sharedptrparam
'sharedptrparam'
SignedIntegral
simpleFunc
'size'
sizeof
sl
SL
smartptrconcepts
smartptrget
smartptrparam
smartptrs
SMS
SomeLargeType
specialization2
spinlock
splonk
splunk
SScp
stdarg
stdlib
Stepanov
stl
STL
stmt
str
strdup
strlen
Stroustrup
Stroustrup00
Stroustrup05
Stroustrup13
Stroustrup14
Stroustrup's
struct
suboperations
subsetting
sum1
sum2
supertype
Susmit
SuttAlex05
Sutter
Sutter00
Sutter02
Sutter04
Sutter's
SuttHysl04b
sz
T0
Taligent94
Taligent's
TBD
templated
Templating
templatize
templatized
thread1
thread2
Tjark
tmp
TMP
TODO
toolchains
TotallyOrdered
TP
tradeoff
TSs
tt
typeid
typename
typesafe
UB
unaliased
uncompromised
unenforcable
uninit
uniqueptrparam
unnamed2
use1
users'
util
v1
va
ValueType
vararg
varargs
variables'
variadic
Variadic
vbase
vd1
vec
Vector0
Vector1
Vector2
vid
virtuality
virtuals
VLAs
volatile2
vr
vtbls
vv
w0
webby
Webcolor
webcolors
WG21
'widen'
x1
x2
xmax
xor
Xs
years'
yy
Zhuang
zstring
Zubkov
zz

View File

@ -63,7 +63,7 @@ def main():
code_block_index += 1 code_block_index += 1
# reach here either line was not code, or was code # reach here either line was not code, or was code
# and we dealt with n code lines # and we dealt with n code lines
if not is_code(line, indent_depth): if indent_depth < 4 or not is_code(line, indent_depth):
# store header id for codeblock # store header id for codeblock
section_id = get_marker(line) section_id = get_marker(line)
if section_id is not None: if section_id is not None:
@ -80,7 +80,7 @@ def process_code(read_filehandle, text_filehandle, line, linenum, sourcefile, co
try: try:
line = read_filehandle.next() line = read_filehandle.next()
linenum += 1 linenum += 1
text_filehandle.write('') text_filehandle.write('\n')
except StopIteration: except StopIteration:
return ('', linenum) return ('', linenum)
start_linenum = linenum start_linenum = linenum
@ -101,10 +101,9 @@ def process_code(read_filehandle, text_filehandle, line, linenum, sourcefile, co
has_actual_code = True has_actual_code = True
else: else:
# write empty line so line numbers stay stable # write empty line so line numbers stay stable
text_filehandle.write('') text_filehandle.write('\n')
if (not line.strip() == '```'): if (not line.strip() == '```'):
if ('???' in no_comment_line or '...' in no_comment_line): if ('???' in no_comment_line or '...' in no_comment_line):
has_question_marks = True has_question_marks = True
linebuffer.append(dedent(line) if not fenced else line) linebuffer.append(dedent(line) if not fenced else line)
@ -115,9 +114,8 @@ def process_code(read_filehandle, text_filehandle, line, linenum, sourcefile, co
line = '' line = ''
break break
codefile = os.path.join(codedir, '%s%s.cpp' % (name, index)) codefile = os.path.join(codedir, '%s%s.cpp' % (name, index))
if fenced: if fenced:
text_filehandle.write('') text_filehandle.write('\n')
if (has_actual_code and not has_question_marks): if (has_actual_code and not has_question_marks):
# add commonly used headers, so that lines can compile # add commonly used headers, so that lines can compile
@ -139,8 +137,8 @@ using namespace std; // by md-split
// %s : %s // %s : %s
''' % (sourcefile, start_linenum)) ''' % (sourcefile, start_linenum))
# TODO: if not toplevel code, wrap inside class # TODO: if not toplevel code, wrap inside class
for line in linebuffer: for codeline in linebuffer:
code_filehandle.write(line) code_filehandle.write(codeline)
return (line, linenum) return (line, linenum)