-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdiigoku.py
180 lines (139 loc) · 5.19 KB
/
diigoku.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
# -*- coding: utf-8 -*-
# A buku importer for Diigo hacked together from bits of https://github.com/shanedabes/poku
#
# usage: python3 diigoku.py DIIGO_APPLICATION_KEY DIIGO_USER_NAME
#
# Get your diigo application key here: https://www.diigo.com/api_keys/new/
import sys
import logging
import buku
from buku import BukuDb, parse_tags, prompt
import requests
from requests.auth import HTTPBasicAuth
from dateutil import parser
import argparse
import itertools
logging.basicConfig(filename='diigoku.log', encoding='utf-8', level=logging.INFO, filemode='w')
spinner = itertools.cycle(['-', '\\', '|', '/'])
argParser = argparse.ArgumentParser( description = 'Import Diigo bookmarks into Buku')
argParser.add_argument('key', metavar='key', type=str, help='Your Diigo application key')
argParser.add_argument('username', metavar='username', type=str, help='Your Diigo username')
argParser.add_argument('pw', metavar='pw', type=str, help='Your Diigo password')
key = argParser.parse_args().key
user = argParser.parse_args().username
pw = argParser.parse_args().pw
# debugging variables
limit = -1
count = -1
def buku_item_to_dict(b_item):
""" convert buku item to universal dict """
out = {
'url': b_item[1],
'title': b_item[2],
'tags': sorted(b_item[3].split(',')[1:-1]),
'timestamp': b_item[0],
'desc' : b_item[4]
}
return out
def tags_to_tagstring(tag_list):
""" convert list of tags to tagstring """
if tag_list == []:
return ','
return ',{},'.format(','.join(tag_list))
def no_tag(var):
return var != 'no_tag'
def diigo_get_desc( item ):
desc = f"{item.get( 'desc' )}\n" if item.get('desc') else ""
return desc
def diigo_get_comm( item, sub ):
rval = ""
if item.get( 'comments' ):
for c in item.get( 'comments' ):
rval += '\n'
if sub: rval += '\t'
rval += f'\"{c.get("content")}\" --{c.get("user")}, {c.get("created_at")}\n'
return rval
def diigo_get_annot( item ):
rval = ""
if item.get( 'annotations' ):
for a in item.get( 'annotations' ):
rval += f'\n\"{a.get("content")}\"\n'
rval += diigo_get_comm( a, sub = True )
return rval
def diigo_make_desc( item ):
desc = diigo_get_desc( item )
anno = diigo_get_annot( item )
comm = diigo_get_comm( item, sub = False )
rval = f"{desc}{anno}{comm}"
return rval
def diigo_item_to_dict(p_item):
""" convert diigo item to universal dict """
out = {
'url': p_item.get('url'),
'title': p_item.get('title'),
'tags': sorted((filter(no_tag, p_item.get('tags').split(',')))),
'timestamp': parser.parse(p_item.get('created_at')),
'desc' : diigo_make_desc( p_item )
}
return out
def sort_dict_items(item_list):
""" sort list of dict items based on update time """
return sorted(item_list, key=lambda x: x['timestamp'])
def dict_list_difference(l1, l2):
""" return items in l1 but not in l2 """
return [i for i in l1 if i['url'] not in [j['url'] for j in l2]]
def dict_list_ensure_unique(item_list):
""" ensure all items in list have a unique url (newer wins) """
return list({i['url']: i for i in item_list}.values())
start = 0
def get_bookmarks( start, count ):
if limit >= 0 and start >= limit:
return ''
if count == -1 or count > 100:
count = 100
url = f'https://secure.diigo.com/api/v2/bookmarks?key={key}&user={user}&filter=all&count={count}&start={start}'
response = requests.get(url, auth=HTTPBasicAuth(user, pw))
sys.stdout.write(next(spinner)) # write the next character
sys.stdout.flush() # flush stdout buffer (actual character display)
sys.stdout.write('\b') # erase the last written char
response.close()
return response.json()
#--------------------------------------------------------------------------------------------------------
#
bukudb = buku.BukuDb()
buku_items = [buku_item_to_dict(i) for i in bukudb.get_rec_all()]
logging.info(f'{len(buku_items)} buku items retrieved')
buku_items = sort_dict_items(buku_items)
sys.stdout.write( "Fetching bookmarks..." )
diigo_bookmarks = []
while bookmarks := get_bookmarks(start=start, count=count):
if bookmarks:
for b in bookmarks:
logging.info( f'Recieving -- {b.get("url")}' )
diigo_bookmarks += bookmarks
start += 100
sys.stdout.write( "done!\n" )
sys.stdout.write( f'{len(diigo_bookmarks)} bookmarks fetched.\n' )
# diigo delivers bookmarks in reverse chrono order. We need to flip it
diigo_bookmarks.reverse()
# convert them to a generic object
diigoitems = [diigo_item_to_dict(i)
for i in diigo_bookmarks]
# dedupe
# diigoitems = dict_list_ensure_unique(diigoitems)
# sort the results
diigoitems = sort_dict_items(diigoitems)
# Add items to buku
new_buku_items = dict_list_difference(diigoitems, buku_items)
print(f'Adding {len(new_buku_items)} new items to buku')
for item in new_buku_items:
bukudb.add_rec(
item['url'],
title_in = item['title'],
tags_in = tags_to_tagstring(item['tags']),
desc = item['desc'],
delay_commit = True,
fetch = False,
immutable = True
)
bukudb.conn.commit()