All current tweets have been processed.

Sets counter to 1 when new Member is created

Unicode problems temporarly solved, but some work to be done there.

Also have to create something to store problematic tweets and try to correct them later (and search for patterns).
This commit is contained in:
Julien Lengrand-Lambert
2013-01-04 15:42:24 +01:00
parent 3c1fceb82c
commit b604a12bc3
3 changed files with 13 additions and 8 deletions

Binary file not shown.

View File

@@ -64,18 +64,19 @@ class Counter():
# Checking if we already have such a member
reslen = len(m_query.all())
if reslen == 1:
print "I found a member. I have to update it"
#print "I found a member. I have to update it"
self.update(m_query.first(), tweet)
elif reslen == 0:
print "I have to create a new member."
#print "I have to create a new member."
self.create(tweet)
else:
print "Error, can't get more than one member. Exiting"
#print "Error, can't get more than one member. Exiting"
raise ElementException # FIXME : Take care
self.flush()
except ElementException:
print "Exception on %s " % (tweet)
def update(self, member, tweet):
"""
Updates member values.
@@ -101,7 +102,7 @@ class Counter():
author/hashtag couple.
"""
if (tweet.has_author() and tweet.has_hashtag()):
member = Member(tweet.author, tweet.hashtag)
member = Member(tweet.author, tweet.hashtag, 1)
self.session.add(member)
# sets tweet to crawled state
@@ -148,4 +149,4 @@ class ElementException(Exception):
c = Counter(engine_url)
c.count()
c.member_show()
#c.member_show()

View File

@@ -30,12 +30,12 @@ class Member(Base):
updated = Column(DateTime) # date of last count update
count = Column(Integer) # Number of tweets for this couple author/hashtag
def __init__(self, author, hashtag):
def __init__(self, author, hashtag, count=0):
self.author = author
self.hashtag = hashtag
self.created = datetime.datetime.now()
self.updated = datetime.datetime.now()
self.count = 0
self.count = count
def increment(self):
"""
@@ -122,4 +122,8 @@ class Tweet(Base):
return (len(self.hashtag) != 0 and self.hashtag is not None)
def __repr__(self):
return "<%s('%s','%s', '%s')>" % (self.author.encode("UTF-8"), self.created, self.hashtag.encode("UTF-8"), self.text.encode("UTF-8"))
# FIXME: Solve this!
try:
return "<%s('%s','%s', '%s')>" % (self.author.encode("UTF-8"), self.created, self.hashtag.encode("UTF-8"), self.text.encode("UTF-8"))
except UnicodeDecodeError:
return "Contains Unicode!!"