Ja sam na TRIOS-u (Debian oldstable…) koji je omatorio odavno, pa su i verzije haskela i svega ostalog starije
View attachment 5485
View attachment 5485
Last edited:
import sys
import re
from collections import Counter
from string import punctuation
cnt = Counter()
words = re.findall(’\w+’, open(sys.argv[1]).read().lower())
for word in words:
cnt[word] += 1
top_words = cnt.most_common(20)
for w, n in top_words:
print(f’{w}\t{n}’)
Dobro ti je resenje ;pCode:import sys import re from collections import Counter from string import punctuation cnt = Counter() words = re.findall(’\w+’, open(sys.argv[1]).read().lower()) for word in words: cnt[word] += 1 top_words = cnt.most_common(20) for w, n in top_words: print(f’{w}\t{n}’)
Као почетник, мислим да је за први покушај сасвим читко решење.
Требало би овај проблем решити и у модерном C+±у.
Go је врло добар избор као језик који је једноставнији него C, а бржи него Python.
Evo grešaka:trebalo bi da moze i sa 7icom, koje greske ti javlja?
[dragan@trios-eudev][/media/dragan/Hg/TRIOS-SCRIPTS/Playground/TMP]$ ghc -O2 bbl-t1.hs
[1 of 1] Compiling Main ( bbl-t1.hs, bbl-t1.o )
bbl-t1.hs:15:22:
No instance for (hashable-1.2.1.0:Data.Hashable.Class.Hashable
B8.ByteString)
arising from a use of [ICODE]updatefreq' Possible fix: add an instance declaration for (hashable-1.2.1.0:Data.Hashable.Class.Hashable B8.ByteString) In the first argument of [/ICODE]mapM_’, namely `(updatefreq ht)’
In a stmt of a ‘do’ block: mapM_ (updatefreq ht) xs
In the expression:
do { ht <- H.new :: IO (HashTable B8.ByteString (IORef Int));
mapM_ (updatefreq ht) xs;
lst <- H.toList ht;
mapM
(\ (x, y)
-> do { v <- readIORef y;
… })
lst }
[dragan@trios-eudev][/media/dragan/Hg/TRIOS-SCRIPTS/Playground/TMP]$
time awk '{print tolower($0)}' bible.txt|grep -o \[a-zA-Z\]* |sort |uniq -c |sort -rn |head -n 20
Mea culpa…prevideo sam da je to u postavci zadatka u inicijalnoj poruciprvih 20 reci sortirane reverzno po frekvenciji
{-# Language BangPatterns,DeriveGeneric #-}
import qualified Data.HashTable.IO as H
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as B8
import Data.List
import Text.Printf
import Data.Char
import Data.IORef
import Data.Hashable
import GHC.Generics
type HashTable k v = H.BasicHashTable k v
newtype MyString = MyString B.ByteString
deriving (Generic,Eq)
instance Hashable MyString where
hash (MyString s) = B.foldl (\x y -> x*33 + fromIntegral y) 5381 s
wordFreq :: [B.ByteString] -> IO [(B.ByteString, Int)]
wordFreq xs = do
ht <- H.new :: IO (HashTable MyString (IORef Int))
mapM_ (updatefreq ht) xs
lst <- H.toList ht
mapM ((MyString x,y)-> do
v <- readIORef y
return (x,v)) lst
where updatefreq ht word = do
!lu <- H.lookup ht (MyString word)
case lu of
Nothing -> do
ref <- newIORef 1
H.insert ht (MyString word) ref
Just x -> modifyIORef’ x (+1)
return ()
main = do
contents <- B.readFile “bible.txt”
result <- wordFreq.B8.words $
B8.map toLower $ B8.filter (not.isPunct) contents
let sorted = reverse.sort $ map ((x,y) -> (y,x)) $ result
mapM_ ((x,y) -> printf “%8d %s\n” x (B8.unpack y)) $ take 20 sorted
isPunct c =
c == ‘’’ || c == ‘.’ || c == ‘;’ || c == ‘(’ || c == ‘)’
|| c == ‘"’ || c == ‘?’ || c == ‘-’ || c == ‘_’ || c == ‘!’
|| c == ‘,’ || c == ‘:’ || c == ‘|’
Nix, ne ide kompajliranje…evo verzija koja bi trebala i kod mene i kod tebe da kompajlira, sa naivnom implementacijom hasha
[dragan@trios-eudev][/media/dragan/Hg/TRIOS-SCRIPTS/Playground/TMP]$ cabal install bytestring
Resolving dependencies…
All the requested packages are already installed:
bytestring-0.10.8.2
Use --reinstall if you want to reinstall anyway.
[dragan@trios-eudev][/media/dragan/Hg/TRIOS-SCRIPTS/Playground/TMP]$ cabal install hashtables
Resolving dependencies…
All the requested packages are already installed:
hashtables-1.2.1.1
Use --reinstall if you want to reinstall anyway.
[dragan@trios-eudev][/media/dragan/Hg/TRIOS-SCRIPTS/Playground/TMP]$ ghc -O2 bbl-t3.hs
[1 of 1] Compiling Main ( bbl-t3.hs, bbl-t3.o )
bbl-t3.hs:14:10:
No instance for (Hashable B8.ByteString)
arising from a use of [ICODE]hashable-1.2.1.0:Data.Hashable.Class.$gdmhashWithSalt' Possible fix: add an instance declaration for (Hashable B8.ByteString) In the expression: (hashable-1.2.1.0:Data.Hashable.Class.$gdmhashWithSalt) In an equation for [/ICODE]hashWithSalt’:
hashWithSalt
= (hashable-1.2.1.0:Data.Hashable.Class.$gdmhashWithSalt)
In the instance declaration for `Hashable MyString’
[dragan@trios-eudev][/media/dragan/Hg/TRIOS-SCRIPTS/Playground/TMP]$
{-# Language BangPatterns #-}
import qualified Data.HashTable.IO as H
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as B8
import Data.List
import Text.Printf
import Data.Char
import Data.IORef
import Data.Hashable
import GHC.Generics
type HashTable k v = H.BasicHashTable k v
newtype MyString = MyString B.ByteString
deriving (Eq)
instance Hashable MyString where
hash (MyString s) = B.foldl (\x y -> x*33 + fromIntegral y) 5381 s
hashWithSalt salt s = hash s
wordFreq :: [B.ByteString] -> IO [(B.ByteString, Int)]
wordFreq xs = do
ht <- H.new :: IO (HashTable MyString (IORef Int))
mapM_ (updatefreq ht) xs
lst <- H.toList ht
mapM ((MyString x,y)-> do
v <- readIORef y
return (x,v)) lst
where updatefreq ht word = do
!lu <- H.lookup ht (MyString word)
case lu of
Nothing -> do
ref <- newIORef 1
H.insert ht (MyString word) ref
Just x -> modifyIORef’ x (+1)
return ()
main = do
contents <- B.readFile “bible.txt”
result <- wordFreq.B8.words $
B8.map toLower $ B8.filter (not.isPunct) contents
let sorted = reverse.sort $ map ((x,y) -> (y,x)) $ result
mapM_ ((x,y) -> printf “%8d %s\n” x (B8.unpack y)) $ take 20 sorted
isPunct c =
c == ‘’’ || c == ‘.’ || c == ‘;’ || c == ‘(’ || c == ‘)’
|| c == ‘"’ || c == ‘?’ || c == ‘-’ || c == ‘_’ || c == ‘!’
|| c == ‘,’ || c == ‘:’ || c == ‘|’
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Text.RegularExpressions;
namespace ConsoleAppBible
{
class Program
{
public class Word
{
public string WordChars { get; set; }
public int WordCount { get; set; } = 0;
}
static void Main(string[] args)
{
Stopwatch sw = new Stopwatch();
sw.Start();
Func<string, string> lowerwriting = fnc => fnc.ToLower();
List isString(List list, int len)
{
List filterd = new List();
bool isword;
for (int i = 0; i < len; i++)
{
isword = Regex.IsMatch(list[i], @"^[a-zA-Z]+$");
if (isword)
filterd.Add(list[i]);
}
return filterd;
}
IEnumerable SeparateWords(string input)
{
MatchCollection matches = Regex.Matches(input, @"\b[\w’]*\b");
[CODE] var words = from m in matches.Cast<Match>()
where !string.IsNullOrEmpty(m.Value)
select (m.Value);
return words;
}
List<Word> GetUniqueWords(IEnumerable<string> list, out int x)
{
List<Word> words = new List<Word>();
var uWords = list.Distinct().ToList();
x = uWords.Count() - 1;
uWords = isString(uWords, x);
x = uWords.Count() - 1;
for (int i = 0; i < x; i++)
{
Word word = new Word();
word.WordChars = uWords[i];
word.WordCount = 0;
words.Add(word);
}
return words;
}
void CalcReps(List<string> text, List<Word> words, int count)
{
for (int i = 0; i < count; i++)
{
var finalList = words.Where(w => w.WordChars == text[i]).Select(w => { w.WordCount++; return w; }).ToList();
}
var sorted = words.ToList().OrderBy(w => w.WordCount).TakeLast(20).Reverse();
sw.Stop();
foreach (var item in sorted)
{
Console.ForegroundColor = ConsoleColor.Red;
Console.Write($"{item.WordChars} ");
Console.ResetColor();
Console.Write("appeared ");
Console.ForegroundColor = ConsoleColor.Red;
Console.Write($"{item.WordCount} ");
Console.ResetColor();
Console.Write(" times.");
Console.WriteLine();
}
Console.WriteLine($"it took me {sw.ElapsedMilliseconds} miliseconds to do this");
}
string txt = System.IO.File.ReadAllText(@"E:\bible.txt");
var wholeText = SeparateWords(txt);
var unique = GetUniqueWords(wholeText, out int y);
CalcReps(wholeText.ToList(), unique, y);
Console.ReadLine();
}
}